diff options
Diffstat (limited to 'compiler')
346 files changed, 20229 insertions, 32072 deletions
diff --git a/compiler/Android.bp b/compiler/Android.bp index 32e42bc02a..52bd89fb7d 100644 --- a/compiler/Android.bp +++ b/compiler/Android.bp @@ -26,20 +26,12 @@ art_cc_defaults { srcs: [ "compiled_method.cc", "debug/elf_debug_writer.cc", - "dex/dex_to_dex_compiler.cc", "dex/inline_method_analyser.cc", "dex/verified_method.cc", "dex/verification_results.cc", - "dex/quick_compiler_callbacks.cc", "driver/compiled_method_storage.cc", - "driver/compiler_driver.cc", "driver/compiler_options.cc", "driver/dex_compilation_unit.cc", - "linker/buffered_output_stream.cc", - "linker/file_output_stream.cc", - "linker/output_stream.cc", - "linker/vector_output_stream.cc", - "linker/relative_patcher.cc", "jit/jit_compiler.cc", "jit/jit_logger.cc", "jni/quick/calling_convention.cc", @@ -64,12 +56,14 @@ art_cc_defaults { "optimizing/inliner.cc", "optimizing/instruction_builder.cc", "optimizing/instruction_simplifier.cc", + "optimizing/intrinsic_objects.cc", "optimizing/intrinsics.cc", "optimizing/licm.cc", "optimizing/linear_order.cc", "optimizing/load_store_analysis.cc", "optimizing/load_store_elimination.cc", "optimizing/locations.cc", + "optimizing/loop_analysis.cc", "optimizing/loop_optimization.cc", "optimizing/nodes.cc", "optimizing/optimization.cc", @@ -101,8 +95,6 @@ art_cc_defaults { arm: { srcs: [ "jni/quick/arm/calling_convention_arm.cc", - "linker/arm/relative_patcher_arm_base.cc", - "linker/arm/relative_patcher_thumb2.cc", "optimizing/code_generator_arm_vixl.cc", "optimizing/code_generator_vector_arm_vixl.cc", "optimizing/instruction_simplifier_arm.cc", @@ -119,7 +111,6 @@ art_cc_defaults { arm64: { srcs: [ "jni/quick/arm64/calling_convention_arm64.cc", - "linker/arm64/relative_patcher_arm64.cc", "optimizing/code_generator_arm64.cc", "optimizing/code_generator_vector_arm64.cc", "optimizing/scheduler_arm64.cc", @@ -133,7 +124,6 @@ art_cc_defaults { mips: { srcs: [ "jni/quick/mips/calling_convention_mips.cc", - "linker/mips/relative_patcher_mips.cc", "optimizing/code_generator_mips.cc", "optimizing/code_generator_vector_mips.cc", "optimizing/instruction_simplifier_mips.cc", @@ -146,7 +136,6 @@ art_cc_defaults { mips64: { srcs: [ "jni/quick/mips64/calling_convention_mips64.cc", - "linker/mips64/relative_patcher_mips64.cc", "optimizing/code_generator_mips64.cc", "optimizing/code_generator_vector_mips64.cc", "optimizing/intrinsics_mips64.cc", @@ -157,11 +146,11 @@ art_cc_defaults { x86: { srcs: [ "jni/quick/x86/calling_convention_x86.cc", - "linker/x86/relative_patcher_x86.cc", - "linker/x86/relative_patcher_x86_base.cc", "optimizing/code_generator_x86.cc", "optimizing/code_generator_vector_x86.cc", "optimizing/intrinsics_x86.cc", + "optimizing/instruction_simplifier_x86_shared.cc", + "optimizing/instruction_simplifier_x86.cc", "optimizing/pc_relative_fixups_x86.cc", "optimizing/x86_memory_gen.cc", "utils/x86/assembler_x86.cc", @@ -172,8 +161,8 @@ art_cc_defaults { x86_64: { srcs: [ "jni/quick/x86_64/calling_convention_x86_64.cc", - "linker/x86_64/relative_patcher_x86_64.cc", "optimizing/intrinsics_x86_64.cc", + "optimizing/instruction_simplifier_x86_64.cc", "optimizing/code_generator_x86_64.cc", "optimizing/code_generator_vector_x86_64.cc", "utils/x86_64/assembler_x86_64.cc", @@ -185,8 +174,6 @@ art_cc_defaults { generated_sources: ["art_compiler_operator_srcs"], shared_libs: [ "libbase", - "libcutils", // for atrace. - "liblzma", ], include_dirs: ["art/disassembler"], header_libs: [ @@ -197,13 +184,18 @@ art_cc_defaults { export_include_dirs: ["."], } +cc_defaults { + name: "libart-compiler_static_base_defaults", + static_libs: [ + "libbase", + ], +} + gensrcs { name: "art_compiler_operator_srcs", cmd: "$(location generate_operator_out) art/compiler $(in) > $(out)", tools: ["generate_operator_out"], srcs: [ - "dex/dex_to_dex_compiler.h", - "driver/compiler_driver.h", "driver/compiler_options.h", "linker/linker_patch.h", "optimizing/locations.h", @@ -227,12 +219,12 @@ art_cc_library { // VIXL assembly support for ARM targets. static: { whole_static_libs: [ - "libvixl-arm", + "libvixl", ], }, shared: { shared_libs: [ - "libvixl-arm", + "libvixl", ], }, }, @@ -240,20 +232,24 @@ art_cc_library { // VIXL assembly support for ARM64 targets. static: { whole_static_libs: [ - "libvixl-arm64", + "libvixl", ], }, shared: { shared_libs: [ - "libvixl-arm64", + "libvixl", ], }, }, }, shared_libs: [ "libart", + "libartbase", + "libartpalette", + "libprofile", "libdexfile", ], + whole_static_libs: ["libelffile"], target: { android: { @@ -264,6 +260,18 @@ art_cc_library { }, } +cc_defaults { + name: "libart-compiler_static_defaults", + defaults: [ + "libart-compiler_static_base_defaults", + "libart_static_defaults", + "libartbase_static_defaults", + "libdexfile_static_defaults", + "libprofile_static_defaults", + ], + static_libs: ["libart-compiler"], +} + art_cc_library { name: "libartd-compiler", defaults: [ @@ -275,12 +283,12 @@ art_cc_library { // VIXL assembly support for ARM targets. static: { whole_static_libs: [ - "libvixld-arm", + "libvixld", ], }, shared: { shared_libs: [ - "libvixld-arm", + "libvixld", ], }, }, @@ -288,29 +296,48 @@ art_cc_library { // VIXL assembly support for ARM64 targets. static: { whole_static_libs: [ - "libvixld-arm64", + "libvixld", ], }, shared: { shared_libs: [ - "libvixld-arm64", + "libvixld", ], }, }, }, shared_libs: [ + "libartbased", "libartd", + "libartpalette", + "libprofiled", "libdexfiled", ], + whole_static_libs: ["libelffiled"], +} + +cc_defaults { + name: "libartd-compiler_static_defaults", + defaults: [ + "libart-compiler_static_base_defaults", + "libartd_static_defaults", + "libartbased_static_defaults", + "libdexfiled_static_defaults", + "libprofiled_static_defaults", + ], + static_libs: ["libartd-compiler"], } art_cc_library { name: "libart-compiler-gtest", defaults: ["libart-gtest-defaults"], - srcs: ["common_compiler_test.cc"], + srcs: [ + "common_compiler_test.cc", + ], shared_libs: [ "libartd-compiler", "libartd-disassembler", + "libartbase-art-gtest", "libart-runtime-gtest", "libbase", ], @@ -324,9 +351,7 @@ art_cc_test { srcs: [ "debug/dwarf/dwarf_test.cc", "debug/src_map_elem_test.cc", - "dex/dex_to_dex_decompiler_test.cc", "driver/compiled_method_storage_test.cc", - "driver/compiler_driver_test.cc", "exception_test.cc", "jni/jni_compiler_test.cc", "linker/linker_patch_test.cc", @@ -349,6 +374,7 @@ art_cc_test { "optimizing/parallel_move_test.cc", "optimizing/pretty_printer_test.cc", "optimizing/reference_type_propagation_test.cc", + "optimizing/select_generator_test.cc", "optimizing/side_effects_test.cc", "optimizing/ssa_liveness_analysis_test.cc", "optimizing/ssa_test.cc", @@ -358,7 +384,6 @@ art_cc_test { "utils/dedupe_set_test.cc", "utils/intrusive_forward_list_test.cc", "utils/swap_space_test.cc", - "verifier_deps_test.cc", "jni/jni_cfi_test.cc", "optimizing/codegen_test.cc", @@ -370,31 +395,25 @@ art_cc_test { codegen: { arm: { srcs: [ - "linker/arm/relative_patcher_thumb2_test.cc", "utils/arm/managed_register_arm_test.cc", ], }, arm64: { srcs: [ - "linker/arm64/relative_patcher_arm64_test.cc", "utils/arm64/managed_register_arm64_test.cc", ], }, mips: { srcs: [ - "linker/mips/relative_patcher_mips_test.cc", - "linker/mips/relative_patcher_mips32r6_test.cc", ], }, mips64: { srcs: [ - "linker/mips64/relative_patcher_mips64_test.cc", "utils/mips64/managed_register_mips64_test.cc", ], }, x86: { srcs: [ - "linker/x86/relative_patcher_x86_test.cc", "utils/x86/managed_register_x86_test.cc", // These tests are testing architecture-independent @@ -410,7 +429,8 @@ art_cc_test { }, x86_64: { srcs: [ - "linker/x86_64/relative_patcher_x86_64_test.cc", + // Is this test a bit-rotten copy of the x86 test? b/77951326 + // "utils/x86_64/managed_register_x86_64_test.cc", ], }, }, @@ -421,10 +441,10 @@ art_cc_test { ], shared_libs: [ + "libprofiled", "libartd-compiler", "libartd-simulator-container", - "libvixld-arm", - "libvixld-arm64", + "libvixld", "libbacktrace", "libnativeloader", @@ -481,7 +501,6 @@ art_cc_test { }, shared_libs: [ "libartd-compiler", - "libvixld-arm", - "libvixld-arm64", + "libvixld", ], } diff --git a/compiler/cfi_test.h b/compiler/cfi_test.h index 29ff235cea..9755ef12d0 100644 --- a/compiler/cfi_test.h +++ b/compiler/cfi_test.h @@ -23,22 +23,20 @@ #include "arch/instruction_set.h" #include "base/enums.h" -#include "debug/dwarf/dwarf_constants.h" #include "debug/dwarf/dwarf_test.h" -#include "debug/dwarf/headers.h" #include "disassembler.h" +#include "dwarf/dwarf_constants.h" +#include "dwarf/headers.h" #include "gtest/gtest.h" #include "thread.h" namespace art { -constexpr dwarf::CFIFormat kCFIFormat = dwarf::DW_DEBUG_FRAME_FORMAT; - class CFITest : public dwarf::DwarfTest { public: void GenerateExpected(FILE* f, InstructionSet isa, const char* isa_str, - const std::vector<uint8_t>& actual_asm, - const std::vector<uint8_t>& actual_cfi) { + ArrayRef<const uint8_t> actual_asm, + ArrayRef<const uint8_t> actual_cfi) { std::vector<std::string> lines; // Print the raw bytes. fprintf(f, "static constexpr uint8_t expected_asm_%s[] = {", isa_str); @@ -50,11 +48,14 @@ class CFITest : public dwarf::DwarfTest { // Pretty-print CFI opcodes. constexpr bool is64bit = false; dwarf::DebugFrameOpCodeWriter<> initial_opcodes; - dwarf::WriteCIE(is64bit, dwarf::Reg(8), - initial_opcodes, kCFIFormat, &debug_frame_data_); + dwarf::WriteCIE(is64bit, dwarf::Reg(8), initial_opcodes, &debug_frame_data_); std::vector<uintptr_t> debug_frame_patches; - dwarf::WriteFDE(is64bit, 0, 0, 0, actual_asm.size(), ArrayRef<const uint8_t>(actual_cfi), - kCFIFormat, 0, &debug_frame_data_, &debug_frame_patches); + dwarf::WriteFDE(is64bit, + /* cie_pointer= */ 0, + /* code_address= */ 0, + actual_asm.size(), + actual_cfi, + &debug_frame_data_); ReformatCfi(Objdump(false, "-W"), &lines); // Pretty-print assembly. const uint8_t* asm_base = actual_asm.data(); @@ -142,7 +143,7 @@ class CFITest : public dwarf::DwarfTest { } // Pretty-print byte array. 12 bytes per line. - static void HexDump(FILE* f, const std::vector<uint8_t>& data) { + static void HexDump(FILE* f, ArrayRef<const uint8_t> data) { for (size_t i = 0; i < data.size(); i++) { fprintf(f, i % 12 == 0 ? "\n " : " "); // Whitespace. fprintf(f, "0x%02X,", data[i]); diff --git a/compiler/common_compiler_test.cc b/compiler/common_compiler_test.cc index d3e3a51f7a..a44b9ae523 100644 --- a/compiler/common_compiler_test.cc +++ b/compiler/common_compiler_test.cc @@ -16,19 +16,22 @@ #include "common_compiler_test.h" +#include <type_traits> + #include "arch/instruction_set_features.h" #include "art_field-inl.h" #include "art_method-inl.h" #include "base/callee_save_type.h" +#include "base/casts.h" #include "base/enums.h" #include "base/utils.h" #include "class_linker.h" #include "compiled_method-inl.h" #include "dex/descriptors_names.h" -#include "dex/quick_compiler_callbacks.h" #include "dex/verification_results.h" -#include "driver/compiler_driver.h" +#include "driver/compiled_method_storage.h" #include "driver/compiler_options.h" +#include "jni/java_vm_ext.h" #include "interpreter/interpreter.h" #include "mirror/class-inl.h" #include "mirror/class_loader.h" @@ -37,23 +40,15 @@ #include "oat_quick_method_header.h" #include "scoped_thread_state_change-inl.h" #include "thread-current-inl.h" +#include "utils/atomic_dex_ref_map-inl.h" namespace art { CommonCompilerTest::CommonCompilerTest() {} CommonCompilerTest::~CommonCompilerTest() {} -void CommonCompilerTest::MakeExecutable(ArtMethod* method) { +void CommonCompilerTest::MakeExecutable(ArtMethod* method, const CompiledMethod* compiled_method) { CHECK(method != nullptr); - - const CompiledMethod* compiled_method = nullptr; - if (!method->IsAbstract()) { - mirror::DexCache* dex_cache = method->GetDeclaringClass()->GetDexCache(); - const DexFile& dex_file = *dex_cache->GetDexFile(); - compiled_method = - compiler_driver_->GetCompiledMethod(MethodReference(&dex_file, - method->GetDexMethodIndex())); - } // If the code size is 0 it means the method was skipped due to profile guided compilation. if (compiled_method != nullptr && compiled_method->GetQuickCode().size() != 0u) { ArrayRef<const uint8_t> code = compiled_method->GetQuickCode(); @@ -61,27 +56,17 @@ void CommonCompilerTest::MakeExecutable(ArtMethod* method) { ArrayRef<const uint8_t> vmap_table = compiled_method->GetVmapTable(); const uint32_t vmap_table_offset = vmap_table.empty() ? 0u : sizeof(OatQuickMethodHeader) + vmap_table.size(); - // The method info is directly before the vmap table. - ArrayRef<const uint8_t> method_info = compiled_method->GetMethodInfo(); - const uint32_t method_info_offset = method_info.empty() ? 0u - : vmap_table_offset + method_info.size(); - - OatQuickMethodHeader method_header(vmap_table_offset, - method_info_offset, - compiled_method->GetFrameSizeInBytes(), - compiled_method->GetCoreSpillMask(), - compiled_method->GetFpSpillMask(), - code_size); + OatQuickMethodHeader method_header(vmap_table_offset, code_size); header_code_and_maps_chunks_.push_back(std::vector<uint8_t>()); std::vector<uint8_t>* chunk = &header_code_and_maps_chunks_.back(); const size_t max_padding = GetInstructionSetAlignment(compiled_method->GetInstructionSet()); - const size_t size = method_info.size() + vmap_table.size() + sizeof(method_header) + code_size; + const size_t size = vmap_table.size() + sizeof(method_header) + code_size; chunk->reserve(size + max_padding); chunk->resize(sizeof(method_header)); + static_assert(std::is_trivially_copyable<OatQuickMethodHeader>::value, "Cannot use memcpy"); memcpy(&(*chunk)[0], &method_header, sizeof(method_header)); chunk->insert(chunk->begin(), vmap_table.begin(), vmap_table.end()); - chunk->insert(chunk->begin(), method_info.begin(), method_info.end()); chunk->insert(chunk->end(), code.begin(), code.end()); CHECK_EQ(chunk->size(), size); const void* unaligned_code_ptr = chunk->data() + (size - code_size); @@ -114,48 +99,7 @@ void CommonCompilerTest::MakeExecutable(const void* code_start, size_t code_leng int result = mprotect(reinterpret_cast<void*>(base), len, PROT_READ | PROT_WRITE | PROT_EXEC); CHECK_EQ(result, 0); - FlushInstructionCache(reinterpret_cast<char*>(base), reinterpret_cast<char*>(base + len)); -} - -void CommonCompilerTest::MakeExecutable(ObjPtr<mirror::ClassLoader> class_loader, - const char* class_name) { - std::string class_descriptor(DotToDescriptor(class_name)); - Thread* self = Thread::Current(); - StackHandleScope<1> hs(self); - Handle<mirror::ClassLoader> loader(hs.NewHandle(class_loader)); - mirror::Class* klass = class_linker_->FindClass(self, class_descriptor.c_str(), loader); - CHECK(klass != nullptr) << "Class not found " << class_name; - PointerSize pointer_size = class_linker_->GetImagePointerSize(); - for (auto& m : klass->GetMethods(pointer_size)) { - MakeExecutable(&m); - } -} - -// Get the set of image classes given to the compiler-driver in SetUp. Note: the compiler -// driver assumes ownership of the set, so the test should properly release the set. -std::unordered_set<std::string>* CommonCompilerTest::GetImageClasses() { - // Empty set: by default no classes are retained in the image. - return new std::unordered_set<std::string>(); -} - -// Get the set of compiled classes given to the compiler-driver in SetUp. Note: the compiler -// driver assumes ownership of the set, so the test should properly release the set. -std::unordered_set<std::string>* CommonCompilerTest::GetCompiledClasses() { - // Null, no selection of compiled-classes. - return nullptr; -} - -// Get the set of compiled methods given to the compiler-driver in SetUp. Note: the compiler -// driver assumes ownership of the set, so the test should properly release the set. -std::unordered_set<std::string>* CommonCompilerTest::GetCompiledMethods() { - // Null, no selection of compiled-methods. - return nullptr; -} - -// Get ProfileCompilationInfo that should be passed to the driver. -ProfileCompilationInfo* CommonCompilerTest::GetProfileCompilationInfo() { - // Null, profile information will not be taken into account. - return nullptr; + FlushInstructionCache(reinterpret_cast<void*>(base), reinterpret_cast<void*>(base + len)); } void CommonCompilerTest::SetUp() { @@ -163,40 +107,41 @@ void CommonCompilerTest::SetUp() { { ScopedObjectAccess soa(Thread::Current()); - const InstructionSet instruction_set = kRuntimeISA; - // Take the default set of instruction features from the build. - instruction_set_features_ = InstructionSetFeatures::FromCppDefines(); - - runtime_->SetInstructionSet(instruction_set); + runtime_->SetInstructionSet(instruction_set_); for (uint32_t i = 0; i < static_cast<uint32_t>(CalleeSaveType::kLastCalleeSaveType); ++i) { CalleeSaveType type = CalleeSaveType(i); if (!runtime_->HasCalleeSaveMethod(type)) { runtime_->SetCalleeSaveMethod(runtime_->CreateCalleeSaveMethod(), type); } } + } +} - CreateCompilerDriver(compiler_kind_, instruction_set); +void CommonCompilerTest::ApplyInstructionSet() { + // Copy local instruction_set_ and instruction_set_features_ to *compiler_options_; + CHECK(instruction_set_features_ != nullptr); + if (instruction_set_ == InstructionSet::kThumb2) { + CHECK_EQ(InstructionSet::kArm, instruction_set_features_->GetInstructionSet()); + } else { + CHECK_EQ(instruction_set_, instruction_set_features_->GetInstructionSet()); } + compiler_options_->instruction_set_ = instruction_set_; + compiler_options_->instruction_set_features_ = + InstructionSetFeatures::FromBitmap(instruction_set_, instruction_set_features_->AsBitmap()); + CHECK(compiler_options_->instruction_set_features_->Equals(instruction_set_features_.get())); } -void CommonCompilerTest::CreateCompilerDriver(Compiler::Kind kind, - InstructionSet isa, - size_t number_of_threads) { - compiler_options_->boot_image_ = true; - compiler_options_->SetCompilerFilter(GetCompilerFilter()); - compiler_driver_.reset(new CompilerDriver(compiler_options_.get(), - verification_results_.get(), - kind, - isa, - instruction_set_features_.get(), - GetImageClasses(), - GetCompiledClasses(), - GetCompiledMethods(), - number_of_threads, - /* swap_fd */ -1, - GetProfileCompilationInfo())); - // We typically don't generate an image in unit tests, disable this optimization by default. - compiler_driver_->SetSupportBootImageFixup(false); +void CommonCompilerTest::OverrideInstructionSetFeatures(InstructionSet instruction_set, + const std::string& variant) { + instruction_set_ = instruction_set; + std::string error_msg; + instruction_set_features_ = + InstructionSetFeatures::FromVariant(instruction_set, variant, &error_msg); + CHECK(instruction_set_features_ != nullptr) << error_msg; + + if (compiler_options_ != nullptr) { + ApplyInstructionSet(); + } } void CommonCompilerTest::SetUpRuntimeOptions(RuntimeOptions* options) { @@ -204,10 +149,8 @@ void CommonCompilerTest::SetUpRuntimeOptions(RuntimeOptions* options) { compiler_options_.reset(new CompilerOptions); verification_results_.reset(new VerificationResults(compiler_options_.get())); - QuickCompilerCallbacks* callbacks = - new QuickCompilerCallbacks(CompilerCallbacks::CallbackMode::kCompileApp); - callbacks->SetVerificationResults(verification_results_.get()); - callbacks_.reset(callbacks); + + ApplyInstructionSet(); } Compiler::Kind CommonCompilerTest::GetCompilerKind() const { @@ -218,41 +161,55 @@ void CommonCompilerTest::SetCompilerKind(Compiler::Kind compiler_kind) { compiler_kind_ = compiler_kind; } -InstructionSet CommonCompilerTest::GetInstructionSet() const { - DCHECK(compiler_driver_.get() != nullptr); - return compiler_driver_->GetInstructionSet(); -} - void CommonCompilerTest::TearDown() { - compiler_driver_.reset(); - callbacks_.reset(); verification_results_.reset(); compiler_options_.reset(); - image_reservation_.reset(); CommonRuntimeTest::TearDown(); } -void CommonCompilerTest::CompileClass(mirror::ClassLoader* class_loader, const char* class_name) { - std::string class_descriptor(DotToDescriptor(class_name)); - Thread* self = Thread::Current(); - StackHandleScope<1> hs(self); - Handle<mirror::ClassLoader> loader(hs.NewHandle(class_loader)); - mirror::Class* klass = class_linker_->FindClass(self, class_descriptor.c_str(), loader); - CHECK(klass != nullptr) << "Class not found " << class_name; - auto pointer_size = class_linker_->GetImagePointerSize(); - for (auto& m : klass->GetMethods(pointer_size)) { - CompileMethod(&m); - } -} - void CommonCompilerTest::CompileMethod(ArtMethod* method) { CHECK(method != nullptr); - TimingLogger timings("CommonTest::CompileMethod", false, false); + TimingLogger timings("CommonCompilerTest::CompileMethod", false, false); TimingLogger::ScopedTiming t(__FUNCTION__, &timings); - compiler_driver_->CompileOne(Thread::Current(), method, &timings); - TimingLogger::ScopedTiming t2("MakeExecutable", &timings); - MakeExecutable(method); + CompiledMethodStorage storage(/*swap_fd=*/ -1); + CompiledMethod* compiled_method = nullptr; + { + DCHECK(!Runtime::Current()->IsStarted()); + Thread* self = Thread::Current(); + StackHandleScope<2> hs(self); + std::unique_ptr<Compiler> compiler( + Compiler::Create(*compiler_options_, &storage, compiler_kind_)); + const DexFile& dex_file = *method->GetDexFile(); + Handle<mirror::DexCache> dex_cache = hs.NewHandle(class_linker_->FindDexCache(self, dex_file)); + Handle<mirror::ClassLoader> class_loader = hs.NewHandle(method->GetClassLoader()); + compiler_options_->verification_results_ = verification_results_.get(); + if (method->IsNative()) { + compiled_method = compiler->JniCompile(method->GetAccessFlags(), + method->GetDexMethodIndex(), + dex_file, + dex_cache); + } else { + verification_results_->AddDexFile(&dex_file); + verification_results_->CreateVerifiedMethodFor( + MethodReference(&dex_file, method->GetDexMethodIndex())); + compiled_method = compiler->Compile(method->GetCodeItem(), + method->GetAccessFlags(), + method->GetInvokeType(), + method->GetClassDefIndex(), + method->GetDexMethodIndex(), + class_loader, + dex_file, + dex_cache); + } + compiler_options_->verification_results_ = nullptr; + } + CHECK(method != nullptr); + { + TimingLogger::ScopedTiming t2("MakeExecutable", &timings); + MakeExecutable(method, compiled_method); + } + CompiledMethod::ReleaseSwapAllocatedCompiledMethod(&storage, compiled_method); } void CommonCompilerTest::CompileDirectMethod(Handle<mirror::ClassLoader> class_loader, @@ -260,7 +217,8 @@ void CommonCompilerTest::CompileDirectMethod(Handle<mirror::ClassLoader> class_l const char* signature) { std::string class_descriptor(DotToDescriptor(class_name)); Thread* self = Thread::Current(); - mirror::Class* klass = class_linker_->FindClass(self, class_descriptor.c_str(), class_loader); + ObjPtr<mirror::Class> klass = + class_linker_->FindClass(self, class_descriptor.c_str(), class_loader); CHECK(klass != nullptr) << "Class not found " << class_name; auto pointer_size = class_linker_->GetImagePointerSize(); ArtMethod* method = klass->FindClassMethod(method_name, signature, pointer_size); @@ -274,7 +232,8 @@ void CommonCompilerTest::CompileVirtualMethod(Handle<mirror::ClassLoader> class_ const char* signature) { std::string class_descriptor(DotToDescriptor(class_name)); Thread* self = Thread::Current(); - mirror::Class* klass = class_linker_->FindClass(self, class_descriptor.c_str(), class_loader); + ObjPtr<mirror::Class> klass = + class_linker_->FindClass(self, class_descriptor.c_str(), class_loader); CHECK(klass != nullptr) << "Class not found " << class_name; auto pointer_size = class_linker_->GetImagePointerSize(); ArtMethod* method = klass->FindClassMethod(method_name, signature, pointer_size); @@ -283,23 +242,8 @@ void CommonCompilerTest::CompileVirtualMethod(Handle<mirror::ClassLoader> class_ CompileMethod(method); } -void CommonCompilerTest::ReserveImageSpace() { - // Reserve where the image will be loaded up front so that other parts of test set up don't - // accidentally end up colliding with the fixed memory address when we need to load the image. - std::string error_msg; - MemMap::Init(); - image_reservation_.reset(MemMap::MapAnonymous("image reservation", - reinterpret_cast<uint8_t*>(ART_BASE_ADDRESS), - (size_t)120 * 1024 * 1024, // 120MB - PROT_NONE, - false /* no need for 4gb flag with fixed mmap*/, - false /* not reusing existing reservation */, - &error_msg)); - CHECK(image_reservation_.get() != nullptr) << error_msg; -} - -void CommonCompilerTest::UnreserveImageSpace() { - image_reservation_.reset(); +void CommonCompilerTest::ClearBootImageOption() { + compiler_options_->image_type_ = CompilerOptions::ImageType::kNone; } } // namespace art diff --git a/compiler/common_compiler_test.h b/compiler/common_compiler_test.h index 8af29d44f0..4f4e49a720 100644 --- a/compiler/common_compiler_test.h +++ b/compiler/common_compiler_test.h @@ -18,9 +18,12 @@ #define ART_COMPILER_COMMON_COMPILER_TEST_H_ #include <list> -#include <unordered_set> #include <vector> +#include <jni.h> + +#include "arch/instruction_set.h" +#include "arch/instruction_set_features.h" #include "common_runtime_test.h" #include "compiler.h" #include "oat_file.h" @@ -30,10 +33,11 @@ namespace mirror { class ClassLoader; } // namespace mirror -class CompilerDriver; +class CompiledMethod; class CompilerOptions; class CumulativeLogger; -class ProfileCompilationInfo; +class DexFile; +class TimingLogger; class VerificationResults; template<class T> class Handle; @@ -43,48 +47,24 @@ class CommonCompilerTest : public CommonRuntimeTest { CommonCompilerTest(); ~CommonCompilerTest(); - // Create an OatMethod based on pointers (for unit tests). - OatFile::OatMethod CreateOatMethod(const void* code); - - void MakeExecutable(ArtMethod* method) REQUIRES_SHARED(Locks::mutator_lock_); + void MakeExecutable(ArtMethod* method, const CompiledMethod* compiled_method) + REQUIRES_SHARED(Locks::mutator_lock_); static void MakeExecutable(const void* code_start, size_t code_length); - void MakeExecutable(ObjPtr<mirror::ClassLoader> class_loader, const char* class_name) - REQUIRES_SHARED(Locks::mutator_lock_); - protected: - virtual void SetUp(); + void SetUp() override; - virtual void SetUpRuntimeOptions(RuntimeOptions* options); + void SetUpRuntimeOptions(RuntimeOptions* options) override; Compiler::Kind GetCompilerKind() const; void SetCompilerKind(Compiler::Kind compiler_kind); - InstructionSet GetInstructionSet() const; - - // Get the set of image classes given to the compiler-driver in SetUp. Note: the compiler - // driver assumes ownership of the set, so the test should properly release the set. - virtual std::unordered_set<std::string>* GetImageClasses(); - - // Get the set of compiled classes given to the compiler-driver in SetUp. Note: the compiler - // driver assumes ownership of the set, so the test should properly release the set. - virtual std::unordered_set<std::string>* GetCompiledClasses(); - - // Get the set of compiled methods given to the compiler-driver in SetUp. Note: the compiler - // driver assumes ownership of the set, so the test should properly release the set. - virtual std::unordered_set<std::string>* GetCompiledMethods(); - - virtual ProfileCompilationInfo* GetProfileCompilationInfo(); - virtual CompilerFilter::Filter GetCompilerFilter() const { return CompilerFilter::kDefaultCompilerFilter; } - virtual void TearDown(); - - void CompileClass(mirror::ClassLoader* class_loader, const char* class_name) - REQUIRES_SHARED(Locks::mutator_lock_); + void TearDown() override; void CompileMethod(ArtMethod* method) REQUIRES_SHARED(Locks::mutator_lock_); @@ -96,22 +76,23 @@ class CommonCompilerTest : public CommonRuntimeTest { const char* method_name, const char* signature) REQUIRES_SHARED(Locks::mutator_lock_); - void CreateCompilerDriver(Compiler::Kind kind, InstructionSet isa, size_t number_of_threads = 2U); - - void ReserveImageSpace(); + void ApplyInstructionSet(); + void OverrideInstructionSetFeatures(InstructionSet instruction_set, const std::string& variant); - void UnreserveImageSpace(); + void ClearBootImageOption(); Compiler::Kind compiler_kind_ = Compiler::kOptimizing; + + InstructionSet instruction_set_ = + (kRuntimeISA == InstructionSet::kArm) ? InstructionSet::kThumb2 : kRuntimeISA; + // Take the default set of instruction features from the build. + std::unique_ptr<const InstructionSetFeatures> instruction_set_features_ + = InstructionSetFeatures::FromCppDefines(); + std::unique_ptr<CompilerOptions> compiler_options_; std::unique_ptr<VerificationResults> verification_results_; - std::unique_ptr<CompilerDriver> compiler_driver_; - std::unique_ptr<const InstructionSetFeatures> instruction_set_features_; - private: - std::unique_ptr<MemMap> image_reservation_; - // Chunks must not move their storage after being created - use the node-based std::list. std::list<std::vector<uint8_t>> header_code_and_maps_chunks_; }; diff --git a/compiler/compiled_method-inl.h b/compiler/compiled_method-inl.h index c43274782e..e60b30fed2 100644 --- a/compiler/compiled_method-inl.h +++ b/compiler/compiled_method-inl.h @@ -38,10 +38,6 @@ inline ArrayRef<const T> CompiledCode::GetArray(const LengthPrefixedArray<T>* ar return ArrayRef<const T>(&array->At(0), array->size()); } -inline ArrayRef<const uint8_t> CompiledMethod::GetMethodInfo() const { - return GetArray(method_info_); -} - inline ArrayRef<const uint8_t> CompiledMethod::GetVmapTable() const { return GetArray(vmap_table_); } diff --git a/compiler/compiled_method.cc b/compiler/compiled_method.cc index e41371855d..58f7e4f227 100644 --- a/compiler/compiled_method.cc +++ b/compiler/compiled_method.cc @@ -17,21 +17,20 @@ #include "compiled_method.h" #include "driver/compiled_method_storage.h" -#include "driver/compiler_driver.h" #include "utils/swap_space.h" namespace art { -CompiledCode::CompiledCode(CompilerDriver* compiler_driver, +CompiledCode::CompiledCode(CompiledMethodStorage* storage, InstructionSet instruction_set, const ArrayRef<const uint8_t>& quick_code) - : compiler_driver_(compiler_driver), - quick_code_(compiler_driver_->GetCompiledMethodStorage()->DeduplicateCode(quick_code)), + : storage_(storage), + quick_code_(storage->DeduplicateCode(quick_code)), packed_fields_(InstructionSetField::Encode(instruction_set)) { } CompiledCode::~CompiledCode() { - compiler_driver_->GetCompiledMethodStorage()->ReleaseCode(quick_code_); + GetStorage()->ReleaseCode(quick_code_); } bool CompiledCode::operator==(const CompiledCode& rhs) const { @@ -74,7 +73,7 @@ size_t CompiledCode::CodeDelta(InstructionSet instruction_set) { } default: LOG(FATAL) << "Unknown InstructionSet: " << instruction_set; - return 0; + UNREACHABLE(); } } @@ -95,68 +94,52 @@ const void* CompiledCode::CodePointer(const void* code_pointer, InstructionSet i } default: LOG(FATAL) << "Unknown InstructionSet: " << instruction_set; - return nullptr; + UNREACHABLE(); } } -CompiledMethod::CompiledMethod(CompilerDriver* driver, +CompiledMethod::CompiledMethod(CompiledMethodStorage* storage, InstructionSet instruction_set, const ArrayRef<const uint8_t>& quick_code, - const size_t frame_size_in_bytes, - const uint32_t core_spill_mask, - const uint32_t fp_spill_mask, - const ArrayRef<const uint8_t>& method_info, const ArrayRef<const uint8_t>& vmap_table, const ArrayRef<const uint8_t>& cfi_info, const ArrayRef<const linker::LinkerPatch>& patches) - : CompiledCode(driver, instruction_set, quick_code), - frame_size_in_bytes_(frame_size_in_bytes), - core_spill_mask_(core_spill_mask), - fp_spill_mask_(fp_spill_mask), - method_info_(driver->GetCompiledMethodStorage()->DeduplicateMethodInfo(method_info)), - vmap_table_(driver->GetCompiledMethodStorage()->DeduplicateVMapTable(vmap_table)), - cfi_info_(driver->GetCompiledMethodStorage()->DeduplicateCFIInfo(cfi_info)), - patches_(driver->GetCompiledMethodStorage()->DeduplicateLinkerPatches(patches)) { + : CompiledCode(storage, instruction_set, quick_code), + vmap_table_(storage->DeduplicateVMapTable(vmap_table)), + cfi_info_(storage->DeduplicateCFIInfo(cfi_info)), + patches_(storage->DeduplicateLinkerPatches(patches)) { } CompiledMethod* CompiledMethod::SwapAllocCompiledMethod( - CompilerDriver* driver, + CompiledMethodStorage* storage, InstructionSet instruction_set, const ArrayRef<const uint8_t>& quick_code, - const size_t frame_size_in_bytes, - const uint32_t core_spill_mask, - const uint32_t fp_spill_mask, - const ArrayRef<const uint8_t>& method_info, const ArrayRef<const uint8_t>& vmap_table, const ArrayRef<const uint8_t>& cfi_info, const ArrayRef<const linker::LinkerPatch>& patches) { - SwapAllocator<CompiledMethod> alloc(driver->GetCompiledMethodStorage()->GetSwapSpaceAllocator()); + SwapAllocator<CompiledMethod> alloc(storage->GetSwapSpaceAllocator()); CompiledMethod* ret = alloc.allocate(1); alloc.construct(ret, - driver, + storage, instruction_set, quick_code, - frame_size_in_bytes, - core_spill_mask, - fp_spill_mask, - method_info, vmap_table, cfi_info, patches); return ret; } -void CompiledMethod::ReleaseSwapAllocatedCompiledMethod(CompilerDriver* driver, CompiledMethod* m) { - SwapAllocator<CompiledMethod> alloc(driver->GetCompiledMethodStorage()->GetSwapSpaceAllocator()); +void CompiledMethod::ReleaseSwapAllocatedCompiledMethod(CompiledMethodStorage* storage, + CompiledMethod* m) { + SwapAllocator<CompiledMethod> alloc(storage->GetSwapSpaceAllocator()); alloc.destroy(m); alloc.deallocate(m, 1); } CompiledMethod::~CompiledMethod() { - CompiledMethodStorage* storage = GetCompilerDriver()->GetCompiledMethodStorage(); + CompiledMethodStorage* storage = GetStorage(); storage->ReleaseLinkerPatches(patches_); storage->ReleaseCFIInfo(cfi_info_); storage->ReleaseVMapTable(vmap_table_); - storage->ReleaseMethodInfo(method_info_); } } // namespace art diff --git a/compiler/compiled_method.h b/compiler/compiled_method.h index acdce260e5..e92777ff12 100644 --- a/compiler/compiled_method.h +++ b/compiler/compiled_method.h @@ -28,7 +28,6 @@ namespace art { template <typename T> class ArrayRef; -class CompilerDriver; class CompiledMethodStorage; template<typename T> class LengthPrefixedArray; @@ -39,7 +38,7 @@ class LinkerPatch; class CompiledCode { public: // For Quick to supply an code blob - CompiledCode(CompilerDriver* compiler_driver, + CompiledCode(CompiledMethodStorage* storage, InstructionSet instruction_set, const ArrayRef<const uint8_t>& quick_code); @@ -78,8 +77,8 @@ class CompiledCode { template <typename T> static ArrayRef<const T> GetArray(const LengthPrefixedArray<T>* array); - CompilerDriver* GetCompilerDriver() { - return compiler_driver_; + CompiledMethodStorage* GetStorage() { + return storage_; } template <typename BitFieldType> @@ -96,7 +95,7 @@ class CompiledCode { private: using InstructionSetField = BitField<InstructionSet, 0u, kInstructionSetFieldSize>; - CompilerDriver* const compiler_driver_; + CompiledMethodStorage* const storage_; // Used to store the compiled code. const LengthPrefixedArray<uint8_t>* const quick_code_; @@ -104,18 +103,14 @@ class CompiledCode { uint32_t packed_fields_; }; -class CompiledMethod FINAL : public CompiledCode { +class CompiledMethod final : public CompiledCode { public: // Constructs a CompiledMethod. // Note: Consider using the static allocation methods below that will allocate the CompiledMethod // in the swap space. - CompiledMethod(CompilerDriver* driver, + CompiledMethod(CompiledMethodStorage* storage, InstructionSet instruction_set, const ArrayRef<const uint8_t>& quick_code, - const size_t frame_size_in_bytes, - const uint32_t core_spill_mask, - const uint32_t fp_spill_mask, - const ArrayRef<const uint8_t>& method_info, const ArrayRef<const uint8_t>& vmap_table, const ArrayRef<const uint8_t>& cfi_info, const ArrayRef<const linker::LinkerPatch>& patches); @@ -123,18 +118,14 @@ class CompiledMethod FINAL : public CompiledCode { virtual ~CompiledMethod(); static CompiledMethod* SwapAllocCompiledMethod( - CompilerDriver* driver, + CompiledMethodStorage* storage, InstructionSet instruction_set, const ArrayRef<const uint8_t>& quick_code, - const size_t frame_size_in_bytes, - const uint32_t core_spill_mask, - const uint32_t fp_spill_mask, - const ArrayRef<const uint8_t>& method_info, const ArrayRef<const uint8_t>& vmap_table, const ArrayRef<const uint8_t>& cfi_info, const ArrayRef<const linker::LinkerPatch>& patches); - static void ReleaseSwapAllocatedCompiledMethod(CompilerDriver* driver, CompiledMethod* m); + static void ReleaseSwapAllocatedCompiledMethod(CompiledMethodStorage* storage, CompiledMethod* m); bool IsIntrinsic() const { return GetPackedField<IsIntrinsicField>(); @@ -145,23 +136,9 @@ class CompiledMethod FINAL : public CompiledCode { // This affects debug information generated at link time. void MarkAsIntrinsic() { DCHECK(!IsIntrinsic()); - SetPackedField<IsIntrinsicField>(/* value */ true); + SetPackedField<IsIntrinsicField>(/* value= */ true); } - size_t GetFrameSizeInBytes() const { - return frame_size_in_bytes_; - } - - uint32_t GetCoreSpillMask() const { - return core_spill_mask_; - } - - uint32_t GetFpSpillMask() const { - return fp_spill_mask_; - } - - ArrayRef<const uint8_t> GetMethodInfo() const; - ArrayRef<const uint8_t> GetVmapTable() const; ArrayRef<const uint8_t> GetCFIInfo() const; @@ -177,14 +154,6 @@ class CompiledMethod FINAL : public CompiledCode { using IsIntrinsicField = BitField<bool, kIsIntrinsicLsb, kIsIntrinsicSize>; - // For quick code, the size of the activation used by the code. - const size_t frame_size_in_bytes_; - // For quick code, a bit mask describing spilled GPR callee-save registers. - const uint32_t core_spill_mask_; - // For quick code, a bit mask describing spilled FPR callee-save registers. - const uint32_t fp_spill_mask_; - // For quick code, method specific information that is not very dedupe friendly (method indices). - const LengthPrefixedArray<uint8_t>* const method_info_; // For quick code, holds code infos which contain stack maps, inline information, and etc. const LengthPrefixedArray<uint8_t>* const vmap_table_; // For quick code, a FDE entry for the debug_frame section. diff --git a/compiler/compiler.cc b/compiler/compiler.cc index 646040fd9d..98d73396bc 100644 --- a/compiler/compiler.cc +++ b/compiler/compiler.cc @@ -21,17 +21,23 @@ #include "base/macros.h" #include "base/utils.h" #include "dex/code_item_accessors-inl.h" -#include "driver/compiler_driver.h" +#include "dex/dex_file.h" +#include "oat.h" #include "optimizing/optimizing_compiler.h" namespace art { -Compiler* Compiler::Create(CompilerDriver* driver, Compiler::Kind kind) { +Compiler* Compiler::Create(const CompilerOptions& compiler_options, + CompiledMethodStorage* storage, + Compiler::Kind kind) { + // Check that oat version when runtime was compiled matches the oat version of the compiler. + constexpr std::array<uint8_t, 4> compiler_oat_version = OatHeader::kOatVersion; + OatHeader::CheckOatVersion(compiler_oat_version); switch (kind) { case kQuick: // TODO: Remove Quick in options. case kOptimizing: - return CreateOptimizingCompiler(driver); + return CreateOptimizingCompiler(compiler_options, storage); default: LOG(FATAL) << "UNREACHABLE"; @@ -39,7 +45,7 @@ Compiler* Compiler::Create(CompilerDriver* driver, Compiler::Kind kind) { } } -bool Compiler::IsPathologicalCase(const DexFile::CodeItem& code_item, +bool Compiler::IsPathologicalCase(const dex::CodeItem& code_item, uint32_t method_idx, const DexFile& dex_file) { /* diff --git a/compiler/compiler.h b/compiler/compiler.h index f2ec3a9fa3..a496c6ced5 100644 --- a/compiler/compiler.h +++ b/compiler/compiler.h @@ -19,10 +19,13 @@ #include "base/mutex.h" #include "base/os.h" -#include "dex/dex_file.h" +#include "dex/invoke_type.h" namespace art { +namespace dex { +struct CodeItem; +} // namespace dex namespace jit { class JitCodeCache; class JitLogger; @@ -33,18 +36,14 @@ class DexCache; } // namespace mirror class ArtMethod; -class CompilerDriver; class CompiledMethod; +class CompiledMethodStorage; +class CompilerOptions; +class DexFile; template<class T> class Handle; class OatWriter; class Thread; -enum class CopyOption { - kNever, - kAlways, - kOnlyIfCompressed -}; - class Compiler { public: enum Kind { @@ -52,15 +51,13 @@ class Compiler { kOptimizing }; - static Compiler* Create(CompilerDriver* driver, Kind kind); - - virtual void Init() = 0; - - virtual void UnInit() const = 0; + static Compiler* Create(const CompilerOptions& compiler_options, + CompiledMethodStorage* storage, + Kind kind); virtual bool CanCompileMethod(uint32_t method_idx, const DexFile& dex_file) const = 0; - virtual CompiledMethod* Compile(const DexFile::CodeItem* code_item, + virtual CompiledMethod* Compile(const dex::CodeItem* code_item, uint32_t access_flags, InvokeType invoke_type, uint16_t class_def_idx, @@ -77,6 +74,7 @@ class Compiler { virtual bool JitCompile(Thread* self ATTRIBUTE_UNUSED, jit::JitCodeCache* code_cache ATTRIBUTE_UNUSED, ArtMethod* method ATTRIBUTE_UNUSED, + bool baseline ATTRIBUTE_UNUSED, bool osr ATTRIBUTE_UNUSED, jit::JitLogger* jit_logger ATTRIBUTE_UNUSED) REQUIRES_SHARED(Locks::mutator_lock_) { @@ -92,36 +90,32 @@ class Compiler { virtual ~Compiler() {} - /* - * @brief Generate and return Dwarf CFI initialization, if supported by the - * backend. - * @param driver CompilerDriver for this compile. - * @returns nullptr if not supported by backend or a vector of bytes for CFI DWARF - * information. - * @note This is used for backtrace information in generated code. - */ - virtual std::vector<uint8_t>* GetCallFrameInformationInitialization( - const CompilerDriver& driver ATTRIBUTE_UNUSED) const { - return nullptr; - } - // Returns whether the method to compile is such a pathological case that // it's not worth compiling. - static bool IsPathologicalCase(const DexFile::CodeItem& code_item, + static bool IsPathologicalCase(const dex::CodeItem& code_item, uint32_t method_idx, const DexFile& dex_file); protected: - Compiler(CompilerDriver* driver, uint64_t warning) : - driver_(driver), maximum_compilation_time_before_warning_(warning) { + Compiler(const CompilerOptions& compiler_options, + CompiledMethodStorage* storage, + uint64_t warning) : + compiler_options_(compiler_options), + storage_(storage), + maximum_compilation_time_before_warning_(warning) { + } + + const CompilerOptions& GetCompilerOptions() const { + return compiler_options_; } - CompilerDriver* GetCompilerDriver() const { - return driver_; + CompiledMethodStorage* GetCompiledMethodStorage() const { + return storage_; } private: - CompilerDriver* const driver_; + const CompilerOptions& compiler_options_; + CompiledMethodStorage* const storage_; const uint64_t maximum_compilation_time_before_warning_; DISALLOW_COPY_AND_ASSIGN(Compiler); diff --git a/compiler/debug/dwarf/debug_abbrev_writer.h b/compiler/debug/dwarf/debug_abbrev_writer.h deleted file mode 100644 index cccca255c1..0000000000 --- a/compiler/debug/dwarf/debug_abbrev_writer.h +++ /dev/null @@ -1,98 +0,0 @@ -/* - * Copyright (C) 2016 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ART_COMPILER_DEBUG_DWARF_DEBUG_ABBREV_WRITER_H_ -#define ART_COMPILER_DEBUG_DWARF_DEBUG_ABBREV_WRITER_H_ - -#include <cstdint> -#include <type_traits> -#include <unordered_map> - -#include "base/casts.h" -#include "base/leb128.h" -#include "base/stl_util.h" -#include "debug/dwarf/dwarf_constants.h" -#include "debug/dwarf/writer.h" - -namespace art { -namespace dwarf { - -// Writer for the .debug_abbrev. -// -// Abbreviations specify the format of entries in .debug_info. -// Each entry specifies abbreviation code, which in turns -// determines all the attributes and their format. -// It is possible to think of them as type definitions. -template <typename Vector = std::vector<uint8_t>> -class DebugAbbrevWriter FINAL : private Writer<Vector> { - static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type"); - - public: - explicit DebugAbbrevWriter(Vector* buffer) - : Writer<Vector>(buffer), - current_abbrev_(buffer->get_allocator()) { - this->PushUint8(0); // Add abbrev table terminator. - } - - // Start abbreviation declaration. - void StartAbbrev(Tag tag) { - DCHECK(current_abbrev_.empty()); - EncodeUnsignedLeb128(¤t_abbrev_, tag); - has_children_offset_ = current_abbrev_.size(); - current_abbrev_.push_back(0); // Place-holder for DW_CHILDREN. - } - - // Add attribute specification. - void AddAbbrevAttribute(Attribute name, Form type) { - EncodeUnsignedLeb128(¤t_abbrev_, name); - EncodeUnsignedLeb128(¤t_abbrev_, type); - } - - // End abbreviation declaration and return its code. - // This will deduplicate abbreviations. - uint32_t EndAbbrev(Children has_children) { - DCHECK(!current_abbrev_.empty()); - current_abbrev_[has_children_offset_] = has_children; - auto it = abbrev_codes_.insert(std::make_pair(std::move(current_abbrev_), NextAbbrevCode())); - uint32_t abbrev_code = it.first->second; - if (UNLIKELY(it.second)) { // Inserted new entry. - const Vector& abbrev = it.first->first; - this->Pop(); // Remove abbrev table terminator. - this->PushUleb128(abbrev_code); - this->PushData(abbrev.data(), abbrev.size()); - this->PushUint8(0); // Attribute list end. - this->PushUint8(0); // Attribute list end. - this->PushUint8(0); // Add abbrev table terminator. - } - current_abbrev_.clear(); - return abbrev_code; - } - - // Get the next free abbrev code. - uint32_t NextAbbrevCode() { - return dchecked_integral_cast<uint32_t>(1 + abbrev_codes_.size()); - } - - private: - Vector current_abbrev_; - size_t has_children_offset_ = 0; - std::unordered_map<Vector, uint32_t, FNVHash<Vector> > abbrev_codes_; -}; - -} // namespace dwarf -} // namespace art - -#endif // ART_COMPILER_DEBUG_DWARF_DEBUG_ABBREV_WRITER_H_ diff --git a/compiler/debug/dwarf/debug_frame_opcode_writer.h b/compiler/debug/dwarf/debug_frame_opcode_writer.h deleted file mode 100644 index 7c75c9bf37..0000000000 --- a/compiler/debug/dwarf/debug_frame_opcode_writer.h +++ /dev/null @@ -1,341 +0,0 @@ -/* - * Copyright (C) 2015 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ART_COMPILER_DEBUG_DWARF_DEBUG_FRAME_OPCODE_WRITER_H_ -#define ART_COMPILER_DEBUG_DWARF_DEBUG_FRAME_OPCODE_WRITER_H_ - -#include "base/bit_utils.h" -#include "debug/dwarf/dwarf_constants.h" -#include "debug/dwarf/register.h" -#include "debug/dwarf/writer.h" - -namespace art { -namespace dwarf { - -// Writer for .debug_frame opcodes (DWARF-3). -// See the DWARF specification for the precise meaning of the opcodes. -// The writer is very light-weight, however it will do the following for you: -// * Choose the most compact encoding of a given opcode. -// * Keep track of current state and convert absolute values to deltas. -// * Divide by header-defined factors as appropriate. -template<typename Vector = std::vector<uint8_t> > -class DebugFrameOpCodeWriter : private Writer<Vector> { - static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type"); - - public: - // To save space, DWARF divides most offsets by header-defined factors. - // They are used in integer divisions, so we make them constants. - // We usually subtract from stack base pointer, so making the factor - // negative makes the encoded values positive and thus easier to encode. - static constexpr int kDataAlignmentFactor = -4; - static constexpr int kCodeAlignmentFactor = 1; - - // Explicitely advance the program counter to given location. - void ALWAYS_INLINE AdvancePC(int absolute_pc) { - DCHECK_GE(absolute_pc, current_pc_); - if (UNLIKELY(enabled_)) { - int delta = FactorCodeOffset(absolute_pc - current_pc_); - if (delta != 0) { - if (delta <= 0x3F) { - this->PushUint8(DW_CFA_advance_loc | delta); - } else if (delta <= UINT8_MAX) { - this->PushUint8(DW_CFA_advance_loc1); - this->PushUint8(delta); - } else if (delta <= UINT16_MAX) { - this->PushUint8(DW_CFA_advance_loc2); - this->PushUint16(delta); - } else { - this->PushUint8(DW_CFA_advance_loc4); - this->PushUint32(delta); - } - } - current_pc_ = absolute_pc; - } - } - - // Override this method to automatically advance the PC before each opcode. - virtual void ImplicitlyAdvancePC() { } - - // Common alias in assemblers - spill relative to current stack pointer. - void ALWAYS_INLINE RelOffset(Reg reg, int offset) { - Offset(reg, offset - current_cfa_offset_); - } - - // Common alias in assemblers - increase stack frame size. - void ALWAYS_INLINE AdjustCFAOffset(int delta) { - DefCFAOffset(current_cfa_offset_ + delta); - } - - // Custom alias - spill many registers based on bitmask. - void ALWAYS_INLINE RelOffsetForMany(Reg reg_base, int offset, - uint32_t reg_mask, int reg_size) { - DCHECK(reg_size == 4 || reg_size == 8); - if (UNLIKELY(enabled_)) { - for (int i = 0; reg_mask != 0u; reg_mask >>= 1, i++) { - // Skip zero bits and go to the set bit. - int num_zeros = CTZ(reg_mask); - i += num_zeros; - reg_mask >>= num_zeros; - RelOffset(Reg(reg_base.num() + i), offset); - offset += reg_size; - } - } - } - - // Custom alias - unspill many registers based on bitmask. - void ALWAYS_INLINE RestoreMany(Reg reg_base, uint32_t reg_mask) { - if (UNLIKELY(enabled_)) { - for (int i = 0; reg_mask != 0u; reg_mask >>= 1, i++) { - // Skip zero bits and go to the set bit. - int num_zeros = CTZ(reg_mask); - i += num_zeros; - reg_mask >>= num_zeros; - Restore(Reg(reg_base.num() + i)); - } - } - } - - void ALWAYS_INLINE Nop() { - if (UNLIKELY(enabled_)) { - this->PushUint8(DW_CFA_nop); - } - } - - void ALWAYS_INLINE Offset(Reg reg, int offset) { - if (UNLIKELY(enabled_)) { - ImplicitlyAdvancePC(); - int factored_offset = FactorDataOffset(offset); // May change sign. - if (factored_offset >= 0) { - if (0 <= reg.num() && reg.num() <= 0x3F) { - this->PushUint8(DW_CFA_offset | reg.num()); - this->PushUleb128(factored_offset); - } else { - this->PushUint8(DW_CFA_offset_extended); - this->PushUleb128(reg.num()); - this->PushUleb128(factored_offset); - } - } else { - uses_dwarf3_features_ = true; - this->PushUint8(DW_CFA_offset_extended_sf); - this->PushUleb128(reg.num()); - this->PushSleb128(factored_offset); - } - } - } - - void ALWAYS_INLINE Restore(Reg reg) { - if (UNLIKELY(enabled_)) { - ImplicitlyAdvancePC(); - if (0 <= reg.num() && reg.num() <= 0x3F) { - this->PushUint8(DW_CFA_restore | reg.num()); - } else { - this->PushUint8(DW_CFA_restore_extended); - this->PushUleb128(reg.num()); - } - } - } - - void ALWAYS_INLINE Undefined(Reg reg) { - if (UNLIKELY(enabled_)) { - ImplicitlyAdvancePC(); - this->PushUint8(DW_CFA_undefined); - this->PushUleb128(reg.num()); - } - } - - void ALWAYS_INLINE SameValue(Reg reg) { - if (UNLIKELY(enabled_)) { - ImplicitlyAdvancePC(); - this->PushUint8(DW_CFA_same_value); - this->PushUleb128(reg.num()); - } - } - - // The previous value of "reg" is stored in register "new_reg". - void ALWAYS_INLINE Register(Reg reg, Reg new_reg) { - if (UNLIKELY(enabled_)) { - ImplicitlyAdvancePC(); - this->PushUint8(DW_CFA_register); - this->PushUleb128(reg.num()); - this->PushUleb128(new_reg.num()); - } - } - - void ALWAYS_INLINE RememberState() { - if (UNLIKELY(enabled_)) { - ImplicitlyAdvancePC(); - this->PushUint8(DW_CFA_remember_state); - } - } - - void ALWAYS_INLINE RestoreState() { - if (UNLIKELY(enabled_)) { - ImplicitlyAdvancePC(); - this->PushUint8(DW_CFA_restore_state); - } - } - - void ALWAYS_INLINE DefCFA(Reg reg, int offset) { - if (UNLIKELY(enabled_)) { - ImplicitlyAdvancePC(); - if (offset >= 0) { - this->PushUint8(DW_CFA_def_cfa); - this->PushUleb128(reg.num()); - this->PushUleb128(offset); // Non-factored. - } else { - uses_dwarf3_features_ = true; - this->PushUint8(DW_CFA_def_cfa_sf); - this->PushUleb128(reg.num()); - this->PushSleb128(FactorDataOffset(offset)); - } - } - current_cfa_offset_ = offset; - } - - void ALWAYS_INLINE DefCFARegister(Reg reg) { - if (UNLIKELY(enabled_)) { - ImplicitlyAdvancePC(); - this->PushUint8(DW_CFA_def_cfa_register); - this->PushUleb128(reg.num()); - } - } - - void ALWAYS_INLINE DefCFAOffset(int offset) { - if (UNLIKELY(enabled_)) { - if (current_cfa_offset_ != offset) { - ImplicitlyAdvancePC(); - if (offset >= 0) { - this->PushUint8(DW_CFA_def_cfa_offset); - this->PushUleb128(offset); // Non-factored. - } else { - uses_dwarf3_features_ = true; - this->PushUint8(DW_CFA_def_cfa_offset_sf); - this->PushSleb128(FactorDataOffset(offset)); - } - } - } - // Uncoditional so that the user can still get and check the value. - current_cfa_offset_ = offset; - } - - void ALWAYS_INLINE ValOffset(Reg reg, int offset) { - if (UNLIKELY(enabled_)) { - ImplicitlyAdvancePC(); - uses_dwarf3_features_ = true; - int factored_offset = FactorDataOffset(offset); // May change sign. - if (factored_offset >= 0) { - this->PushUint8(DW_CFA_val_offset); - this->PushUleb128(reg.num()); - this->PushUleb128(factored_offset); - } else { - this->PushUint8(DW_CFA_val_offset_sf); - this->PushUleb128(reg.num()); - this->PushSleb128(factored_offset); - } - } - } - - void ALWAYS_INLINE DefCFAExpression(uint8_t* expr, int expr_size) { - if (UNLIKELY(enabled_)) { - ImplicitlyAdvancePC(); - uses_dwarf3_features_ = true; - this->PushUint8(DW_CFA_def_cfa_expression); - this->PushUleb128(expr_size); - this->PushData(expr, expr_size); - } - } - - void ALWAYS_INLINE Expression(Reg reg, uint8_t* expr, int expr_size) { - if (UNLIKELY(enabled_)) { - ImplicitlyAdvancePC(); - uses_dwarf3_features_ = true; - this->PushUint8(DW_CFA_expression); - this->PushUleb128(reg.num()); - this->PushUleb128(expr_size); - this->PushData(expr, expr_size); - } - } - - void ALWAYS_INLINE ValExpression(Reg reg, uint8_t* expr, int expr_size) { - if (UNLIKELY(enabled_)) { - ImplicitlyAdvancePC(); - uses_dwarf3_features_ = true; - this->PushUint8(DW_CFA_val_expression); - this->PushUleb128(reg.num()); - this->PushUleb128(expr_size); - this->PushData(expr, expr_size); - } - } - - bool IsEnabled() const { return enabled_; } - - void SetEnabled(bool value) { - enabled_ = value; - if (enabled_ && opcodes_.capacity() == 0u) { - opcodes_.reserve(kDefaultCapacity); - } - } - - int GetCurrentPC() const { return current_pc_; } - - int GetCurrentCFAOffset() const { return current_cfa_offset_; } - - void SetCurrentCFAOffset(int offset) { current_cfa_offset_ = offset; } - - using Writer<Vector>::data; - - explicit DebugFrameOpCodeWriter(bool enabled = true, - const typename Vector::allocator_type& alloc = - typename Vector::allocator_type()) - : Writer<Vector>(&opcodes_), - enabled_(false), - opcodes_(alloc), - current_cfa_offset_(0), - current_pc_(0), - uses_dwarf3_features_(false) { - SetEnabled(enabled); - } - - virtual ~DebugFrameOpCodeWriter() { } - - protected: - // Best guess based on couple of observed outputs. - static constexpr size_t kDefaultCapacity = 32u; - - int FactorDataOffset(int offset) const { - DCHECK_EQ(offset % kDataAlignmentFactor, 0); - return offset / kDataAlignmentFactor; - } - - int FactorCodeOffset(int offset) const { - DCHECK_EQ(offset % kCodeAlignmentFactor, 0); - return offset / kCodeAlignmentFactor; - } - - bool enabled_; // If disabled all writes are no-ops. - Vector opcodes_; - int current_cfa_offset_; - int current_pc_; - bool uses_dwarf3_features_; - - private: - DISALLOW_COPY_AND_ASSIGN(DebugFrameOpCodeWriter); -}; - -} // namespace dwarf -} // namespace art - -#endif // ART_COMPILER_DEBUG_DWARF_DEBUG_FRAME_OPCODE_WRITER_H_ diff --git a/compiler/debug/dwarf/debug_info_entry_writer.h b/compiler/debug/dwarf/debug_info_entry_writer.h deleted file mode 100644 index 89d16f2b2a..0000000000 --- a/compiler/debug/dwarf/debug_info_entry_writer.h +++ /dev/null @@ -1,228 +0,0 @@ -/* - * Copyright (C) 2014 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ART_COMPILER_DEBUG_DWARF_DEBUG_INFO_ENTRY_WRITER_H_ -#define ART_COMPILER_DEBUG_DWARF_DEBUG_INFO_ENTRY_WRITER_H_ - -#include <cstdint> -#include <unordered_map> - -#include "base/casts.h" -#include "base/leb128.h" -#include "debug/dwarf/debug_abbrev_writer.h" -#include "debug/dwarf/dwarf_constants.h" -#include "debug/dwarf/expression.h" -#include "debug/dwarf/writer.h" - -namespace art { -namespace dwarf { - -/* - * Writer for debug information entries (DIE). - * - * Usage: - * StartTag(DW_TAG_compile_unit); - * WriteStrp(DW_AT_producer, "Compiler name", debug_str); - * StartTag(DW_TAG_subprogram); - * WriteStrp(DW_AT_name, "Foo", debug_str); - * EndTag(); - * EndTag(); - */ -template <typename Vector = std::vector<uint8_t>> -class DebugInfoEntryWriter FINAL : private Writer<Vector> { - static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type"); - - public: - static constexpr size_t kCompilationUnitHeaderSize = 11; - - // Start debugging information entry. - // Returns offset of the entry in compilation unit. - size_t StartTag(Tag tag) { - if (inside_entry_) { - // Write abbrev code for the previous entry. - // Parent entry is finalized before any children are written. - this->UpdateUleb128(abbrev_code_offset_, debug_abbrev_->EndAbbrev(DW_CHILDREN_yes)); - inside_entry_ = false; - } - debug_abbrev_->StartAbbrev(tag); - // Abbrev code placeholder of sufficient size. - abbrev_code_offset_ = this->data()->size(); - this->PushUleb128(debug_abbrev_->NextAbbrevCode()); - depth_++; - inside_entry_ = true; - return abbrev_code_offset_ + kCompilationUnitHeaderSize; - } - - // End debugging information entry. - void EndTag() { - DCHECK_GT(depth_, 0); - if (inside_entry_) { - // Write abbrev code for this entry. - this->UpdateUleb128(abbrev_code_offset_, debug_abbrev_->EndAbbrev(DW_CHILDREN_no)); - inside_entry_ = false; - // This entry has no children and so there is no terminator. - } else { - // The entry has been already finalized so it must be parent entry - // and we need to write the terminator required by DW_CHILDREN_yes. - this->PushUint8(0); - } - depth_--; - } - - void WriteAddr(Attribute attrib, uint64_t value) { - debug_abbrev_->AddAbbrevAttribute(attrib, DW_FORM_addr); - patch_locations_.push_back(this->data()->size()); - if (is64bit_) { - this->PushUint64(value); - } else { - this->PushUint32(value); - } - } - - void WriteBlock(Attribute attrib, const uint8_t* ptr, size_t num_bytes) { - debug_abbrev_->AddAbbrevAttribute(attrib, DW_FORM_block); - this->PushUleb128(num_bytes); - this->PushData(ptr, num_bytes); - } - - void WriteExprLoc(Attribute attrib, const Expression& expr) { - debug_abbrev_->AddAbbrevAttribute(attrib, DW_FORM_exprloc); - this->PushUleb128(dchecked_integral_cast<uint32_t>(expr.size())); - this->PushData(expr.data()); - } - - void WriteData1(Attribute attrib, uint8_t value) { - debug_abbrev_->AddAbbrevAttribute(attrib, DW_FORM_data1); - this->PushUint8(value); - } - - void WriteData2(Attribute attrib, uint16_t value) { - debug_abbrev_->AddAbbrevAttribute(attrib, DW_FORM_data2); - this->PushUint16(value); - } - - void WriteData4(Attribute attrib, uint32_t value) { - debug_abbrev_->AddAbbrevAttribute(attrib, DW_FORM_data4); - this->PushUint32(value); - } - - void WriteData8(Attribute attrib, uint64_t value) { - debug_abbrev_->AddAbbrevAttribute(attrib, DW_FORM_data8); - this->PushUint64(value); - } - - void WriteSecOffset(Attribute attrib, uint32_t offset) { - debug_abbrev_->AddAbbrevAttribute(attrib, DW_FORM_sec_offset); - this->PushUint32(offset); - } - - void WriteSdata(Attribute attrib, int value) { - debug_abbrev_->AddAbbrevAttribute(attrib, DW_FORM_sdata); - this->PushSleb128(value); - } - - void WriteUdata(Attribute attrib, int value) { - debug_abbrev_->AddAbbrevAttribute(attrib, DW_FORM_udata); - this->PushUleb128(value); - } - - void WriteUdata(Attribute attrib, uint32_t value) { - debug_abbrev_->AddAbbrevAttribute(attrib, DW_FORM_udata); - this->PushUleb128(value); - } - - void WriteFlag(Attribute attrib, bool value) { - debug_abbrev_->AddAbbrevAttribute(attrib, DW_FORM_flag); - this->PushUint8(value ? 1 : 0); - } - - void WriteFlagPresent(Attribute attrib) { - debug_abbrev_->AddAbbrevAttribute(attrib, DW_FORM_flag_present); - } - - void WriteRef4(Attribute attrib, uint32_t cu_offset) { - debug_abbrev_->AddAbbrevAttribute(attrib, DW_FORM_ref4); - this->PushUint32(cu_offset); - } - - void WriteRef(Attribute attrib, uint32_t cu_offset) { - debug_abbrev_->AddAbbrevAttribute(attrib, DW_FORM_ref_udata); - this->PushUleb128(cu_offset); - } - - void WriteString(Attribute attrib, const char* value) { - debug_abbrev_->AddAbbrevAttribute(attrib, DW_FORM_string); - this->PushString(value); - } - - void WriteStrp(Attribute attrib, size_t debug_str_offset) { - debug_abbrev_->AddAbbrevAttribute(attrib, DW_FORM_strp); - this->PushUint32(dchecked_integral_cast<uint32_t>(debug_str_offset)); - } - - void WriteStrp(Attribute attrib, const char* str, size_t len, - std::vector<uint8_t>* debug_str) { - debug_abbrev_->AddAbbrevAttribute(attrib, DW_FORM_strp); - this->PushUint32(debug_str->size()); - debug_str->insert(debug_str->end(), str, str + len); - debug_str->push_back(0); - } - - void WriteStrp(Attribute attrib, const char* str, std::vector<uint8_t>* debug_str) { - WriteStrp(attrib, str, strlen(str), debug_str); - } - - bool Is64bit() const { return is64bit_; } - - const std::vector<uintptr_t>& GetPatchLocations() const { - return patch_locations_; - } - - int Depth() const { return depth_; } - - using Writer<Vector>::data; - using Writer<Vector>::size; - using Writer<Vector>::UpdateUint32; - - DebugInfoEntryWriter(bool is64bitArch, - DebugAbbrevWriter<Vector>* debug_abbrev, - const typename Vector::allocator_type& alloc = - typename Vector::allocator_type()) - : Writer<Vector>(&entries_), - debug_abbrev_(debug_abbrev), - entries_(alloc), - is64bit_(is64bitArch) { - } - - ~DebugInfoEntryWriter() { - DCHECK(!inside_entry_); - DCHECK_EQ(depth_, 0); - } - - private: - DebugAbbrevWriter<Vector>* debug_abbrev_; - Vector entries_; - bool is64bit_; - int depth_ = 0; - size_t abbrev_code_offset_ = 0; // Location to patch once we know the code. - bool inside_entry_ = false; // Entry ends at first child (if any). - std::vector<uintptr_t> patch_locations_; -}; - -} // namespace dwarf -} // namespace art - -#endif // ART_COMPILER_DEBUG_DWARF_DEBUG_INFO_ENTRY_WRITER_H_ diff --git a/compiler/debug/dwarf/debug_line_opcode_writer.h b/compiler/debug/dwarf/debug_line_opcode_writer.h deleted file mode 100644 index b4a4d63f01..0000000000 --- a/compiler/debug/dwarf/debug_line_opcode_writer.h +++ /dev/null @@ -1,261 +0,0 @@ -/* - * Copyright (C) 2015 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ART_COMPILER_DEBUG_DWARF_DEBUG_LINE_OPCODE_WRITER_H_ -#define ART_COMPILER_DEBUG_DWARF_DEBUG_LINE_OPCODE_WRITER_H_ - -#include <cstdint> - -#include "debug/dwarf/dwarf_constants.h" -#include "debug/dwarf/writer.h" - -namespace art { -namespace dwarf { - -// Writer for the .debug_line opcodes (DWARF-3). -// The writer is very light-weight, however it will do the following for you: -// * Choose the most compact encoding of a given opcode. -// * Keep track of current state and convert absolute values to deltas. -// * Divide by header-defined factors as appropriate. -template<typename Vector = std::vector<uint8_t>> -class DebugLineOpCodeWriter FINAL : private Writer<Vector> { - static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type"); - - public: - static constexpr int kOpcodeBase = 13; - static constexpr bool kDefaultIsStmt = false; - static constexpr int kLineBase = -5; - static constexpr int kLineRange = 14; - - void AddRow() { - this->PushUint8(DW_LNS_copy); - } - - void AdvancePC(uint64_t absolute_address) { - DCHECK_NE(current_address_, 0u); // Use SetAddress for the first advance. - DCHECK_GE(absolute_address, current_address_); - if (absolute_address != current_address_) { - uint64_t delta = FactorCodeOffset(absolute_address - current_address_); - if (delta <= INT32_MAX) { - this->PushUint8(DW_LNS_advance_pc); - this->PushUleb128(static_cast<int>(delta)); - current_address_ = absolute_address; - } else { - SetAddress(absolute_address); - } - } - } - - void AdvanceLine(int absolute_line) { - int delta = absolute_line - current_line_; - if (delta != 0) { - this->PushUint8(DW_LNS_advance_line); - this->PushSleb128(delta); - current_line_ = absolute_line; - } - } - - void SetFile(int file) { - if (current_file_ != file) { - this->PushUint8(DW_LNS_set_file); - this->PushUleb128(file); - current_file_ = file; - } - } - - void SetColumn(int column) { - this->PushUint8(DW_LNS_set_column); - this->PushUleb128(column); - } - - void SetIsStmt(bool is_stmt) { - if (is_stmt_ != is_stmt) { - this->PushUint8(DW_LNS_negate_stmt); - is_stmt_ = is_stmt; - } - } - - void SetBasicBlock() { - this->PushUint8(DW_LNS_set_basic_block); - } - - void SetPrologueEnd() { - uses_dwarf3_features_ = true; - this->PushUint8(DW_LNS_set_prologue_end); - } - - void SetEpilogueBegin() { - uses_dwarf3_features_ = true; - this->PushUint8(DW_LNS_set_epilogue_begin); - } - - void SetISA(int isa) { - uses_dwarf3_features_ = true; - this->PushUint8(DW_LNS_set_isa); - this->PushUleb128(isa); - } - - void EndSequence() { - this->PushUint8(0); - this->PushUleb128(1); - this->PushUint8(DW_LNE_end_sequence); - current_address_ = 0; - current_file_ = 1; - current_line_ = 1; - is_stmt_ = kDefaultIsStmt; - } - - // Uncoditionally set address using the long encoding. - // This gives the linker opportunity to relocate the address. - void SetAddress(uint64_t absolute_address) { - DCHECK_GE(absolute_address, current_address_); - FactorCodeOffset(absolute_address); // Check if it is factorable. - this->PushUint8(0); - if (use_64bit_address_) { - this->PushUleb128(1 + 8); - this->PushUint8(DW_LNE_set_address); - patch_locations_.push_back(this->data()->size()); - this->PushUint64(absolute_address); - } else { - this->PushUleb128(1 + 4); - this->PushUint8(DW_LNE_set_address); - patch_locations_.push_back(this->data()->size()); - this->PushUint32(absolute_address); - } - current_address_ = absolute_address; - } - - void DefineFile(const char* filename, - int directory_index, - int modification_time, - int file_size) { - int size = 1 + - strlen(filename) + 1 + - UnsignedLeb128Size(directory_index) + - UnsignedLeb128Size(modification_time) + - UnsignedLeb128Size(file_size); - this->PushUint8(0); - this->PushUleb128(size); - size_t start = data()->size(); - this->PushUint8(DW_LNE_define_file); - this->PushString(filename); - this->PushUleb128(directory_index); - this->PushUleb128(modification_time); - this->PushUleb128(file_size); - DCHECK_EQ(start + size, data()->size()); - } - - // Compact address and line opcode. - void AddRow(uint64_t absolute_address, int absolute_line) { - DCHECK_GE(absolute_address, current_address_); - - // If the address is definitely too far, use the long encoding. - uint64_t delta_address = FactorCodeOffset(absolute_address - current_address_); - if (delta_address > UINT8_MAX) { - AdvancePC(absolute_address); - delta_address = 0; - } - - // If the line is definitely too far, use the long encoding. - int delta_line = absolute_line - current_line_; - if (!(kLineBase <= delta_line && delta_line < kLineBase + kLineRange)) { - AdvanceLine(absolute_line); - delta_line = 0; - } - - // Both address and line should be reasonable now. Use the short encoding. - int opcode = kOpcodeBase + (delta_line - kLineBase) + - (static_cast<int>(delta_address) * kLineRange); - if (opcode > UINT8_MAX) { - // If the address is still too far, try to increment it by const amount. - int const_advance = (0xFF - kOpcodeBase) / kLineRange; - opcode -= (kLineRange * const_advance); - if (opcode <= UINT8_MAX) { - this->PushUint8(DW_LNS_const_add_pc); - } else { - // Give up and use long encoding for address. - AdvancePC(absolute_address); - // Still use the opcode to do line advance and copy. - opcode = kOpcodeBase + (delta_line - kLineBase); - } - } - DCHECK(kOpcodeBase <= opcode && opcode <= 0xFF); - this->PushUint8(opcode); // Special opcode. - current_line_ = absolute_line; - current_address_ = absolute_address; - } - - int GetCodeFactorBits() const { - return code_factor_bits_; - } - - uint64_t CurrentAddress() const { - return current_address_; - } - - int CurrentFile() const { - return current_file_; - } - - int CurrentLine() const { - return current_line_; - } - - const std::vector<uintptr_t>& GetPatchLocations() const { - return patch_locations_; - } - - using Writer<Vector>::data; - - DebugLineOpCodeWriter(bool use64bitAddress, - int codeFactorBits, - const typename Vector::allocator_type& alloc = - typename Vector::allocator_type()) - : Writer<Vector>(&opcodes_), - opcodes_(alloc), - uses_dwarf3_features_(false), - use_64bit_address_(use64bitAddress), - code_factor_bits_(codeFactorBits), - current_address_(0), - current_file_(1), - current_line_(1), - is_stmt_(kDefaultIsStmt) { - } - - private: - uint64_t FactorCodeOffset(uint64_t offset) const { - DCHECK_GE(code_factor_bits_, 0); - DCHECK_EQ((offset >> code_factor_bits_) << code_factor_bits_, offset); - return offset >> code_factor_bits_; - } - - Vector opcodes_; - bool uses_dwarf3_features_; - bool use_64bit_address_; - int code_factor_bits_; - uint64_t current_address_; - int current_file_; - int current_line_; - bool is_stmt_; - std::vector<uintptr_t> patch_locations_; - - DISALLOW_COPY_AND_ASSIGN(DebugLineOpCodeWriter); -}; - -} // namespace dwarf -} // namespace art - -#endif // ART_COMPILER_DEBUG_DWARF_DEBUG_LINE_OPCODE_WRITER_H_ diff --git a/compiler/debug/dwarf/dwarf_constants.h b/compiler/debug/dwarf/dwarf_constants.h deleted file mode 100644 index 96f805e85f..0000000000 --- a/compiler/debug/dwarf/dwarf_constants.h +++ /dev/null @@ -1,694 +0,0 @@ -/* - * Copyright (C) 2014 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ART_COMPILER_DEBUG_DWARF_DWARF_CONSTANTS_H_ -#define ART_COMPILER_DEBUG_DWARF_DWARF_CONSTANTS_H_ - -namespace art { -namespace dwarf { - -// Based on the Dwarf 4 specification at dwarfstd.com and issues marked -// for inclusion in Dwarf 5 on same. Values not specified in the Dwarf 4 -// standard might change or be removed in the future and may be different -// than the values used currently by other implementations for the same trait, -// use at your own risk. - -enum Tag { - DW_TAG_array_type = 0x01, - DW_TAG_class_type = 0x02, - DW_TAG_entry_point = 0x03, - DW_TAG_enumeration_type = 0x04, - DW_TAG_formal_parameter = 0x05, - DW_TAG_imported_declaration = 0x08, - DW_TAG_label = 0x0a, - DW_TAG_lexical_block = 0x0b, - DW_TAG_member = 0x0d, - DW_TAG_pointer_type = 0x0f, - DW_TAG_reference_type = 0x10, - DW_TAG_compile_unit = 0x11, - DW_TAG_string_type = 0x12, - DW_TAG_structure_type = 0x13, - DW_TAG_subroutine_type = 0x15, - DW_TAG_typedef = 0x16, - DW_TAG_union_type = 0x17, - DW_TAG_unspecified_parameters = 0x18, - DW_TAG_variant = 0x19, - DW_TAG_common_block = 0x1a, - DW_TAG_common_inclusion = 0x1b, - DW_TAG_inheritance = 0x1c, - DW_TAG_inlined_subroutine = 0x1d, - DW_TAG_module = 0x1e, - DW_TAG_ptr_to_member_type = 0x1f, - DW_TAG_set_type = 0x20, - DW_TAG_subrange_type = 0x21, - DW_TAG_with_stmt = 0x22, - DW_TAG_access_declaration = 0x23, - DW_TAG_base_type = 0x24, - DW_TAG_catch_block = 0x25, - DW_TAG_const_type = 0x26, - DW_TAG_constant = 0x27, - DW_TAG_enumerator = 0x28, - DW_TAG_file_type = 0x29, - DW_TAG_friend = 0x2a, - DW_TAG_namelist = 0x2b, - DW_TAG_namelist_item = 0x2c, - DW_TAG_packed_type = 0x2d, - DW_TAG_subprogram = 0x2e, - DW_TAG_template_type_parameter = 0x2f, - DW_TAG_template_value_parameter = 0x30, - DW_TAG_thrown_type = 0x31, - DW_TAG_try_block = 0x32, - DW_TAG_variant_part = 0x33, - DW_TAG_variable = 0x34, - DW_TAG_volatile_type = 0x35, - DW_TAG_dwarf_procedure = 0x36, - DW_TAG_restrict_type = 0x37, - DW_TAG_interface_type = 0x38, - DW_TAG_namespace = 0x39, - DW_TAG_imported_module = 0x3a, - DW_TAG_unspecified_type = 0x3b, - DW_TAG_partial_unit = 0x3c, - DW_TAG_imported_unit = 0x3d, - DW_TAG_condition = 0x3f, - DW_TAG_shared_type = 0x40, - DW_TAG_type_unit = 0x41, - DW_TAG_rvalue_reference_type = 0x42, - DW_TAG_template_alias = 0x43, -#ifdef INCLUDE_DWARF5_VALUES - // Values to be added in Dwarf 5. Final value not yet specified. Values listed - // may be different than other implementations. Use with caution. - // TODO Update these values when Dwarf 5 is released. - DW_TAG_coarray_type = 0x44, - DW_TAG_call_site = 0x45, - DW_TAG_call_site_parameter = 0x46, - DW_TAG_generic_subrange = 0x47, - DW_TAG_atomic_type = 0x48, - DW_TAG_dynamic_type = 0x49, - DW_TAG_aligned_type = 0x50, -#endif - DW_TAG_lo_user = 0x4080, - DW_TAG_hi_user = 0xffff -}; - -enum Children : uint8_t { - DW_CHILDREN_no = 0x00, - DW_CHILDREN_yes = 0x01 -}; - -enum Attribute { - DW_AT_sibling = 0x01, - DW_AT_location = 0x02, - DW_AT_name = 0x03, - DW_AT_ordering = 0x09, - DW_AT_byte_size = 0x0b, - DW_AT_bit_offset = 0x0c, - DW_AT_bit_size = 0x0d, - DW_AT_stmt_list = 0x10, - DW_AT_low_pc = 0x11, - DW_AT_high_pc = 0x12, - DW_AT_language = 0x13, - DW_AT_discr = 0x15, - DW_AT_discr_value = 0x16, - DW_AT_visibility = 0x17, - DW_AT_import = 0x18, - DW_AT_string_length = 0x19, - DW_AT_common_reference = 0x1a, - DW_AT_comp_dir = 0x1b, - DW_AT_const_value = 0x1c, - DW_AT_containing_type = 0x1d, - DW_AT_default_value = 0x1e, - DW_AT_inline = 0x20, - DW_AT_is_optional = 0x21, - DW_AT_lower_bound = 0x22, - DW_AT_producer = 0x25, - DW_AT_prototyped = 0x27, - DW_AT_return_addr = 0x2a, - DW_AT_start_scope = 0x2c, - DW_AT_bit_stride = 0x2e, - DW_AT_upper_bound = 0x2f, - DW_AT_abstract_origin = 0x31, - DW_AT_accessibility = 0x32, - DW_AT_address_class = 0x33, - DW_AT_artificial = 0x34, - DW_AT_base_types = 0x35, - DW_AT_calling_convention = 0x36, - DW_AT_count = 0x37, - DW_AT_data_member_location = 0x38, - DW_AT_decl_column = 0x39, - DW_AT_decl_file = 0x3a, - DW_AT_decl_line = 0x3b, - DW_AT_declaration = 0x3c, - DW_AT_discr_list = 0x3d, - DW_AT_encoding = 0x3e, - DW_AT_external = 0x3f, - DW_AT_frame_base = 0x40, - DW_AT_friend = 0x41, - DW_AT_identifier_case = 0x42, - DW_AT_macro_info = 0x43, - DW_AT_namelist_item = 0x44, - DW_AT_priority = 0x45, - DW_AT_segment = 0x46, - DW_AT_specification = 0x47, - DW_AT_static_link = 0x48, - DW_AT_type = 0x49, - DW_AT_use_location = 0x4a, - DW_AT_variable_parameter = 0x4b, - DW_AT_virtuality = 0x4c, - DW_AT_vtable_elem_location = 0x4d, - DW_AT_allocated = 0x4e, - DW_AT_associated = 0x4f, - DW_AT_data_location = 0x50, - DW_AT_byte_stride = 0x51, - DW_AT_entry_pc = 0x52, - DW_AT_use_UTF8 = 0x53, - DW_AT_extension = 0x54, - DW_AT_ranges = 0x55, - DW_AT_trampoline = 0x56, - DW_AT_call_column = 0x57, - DW_AT_call_file = 0x58, - DW_AT_call_line = 0x59, - DW_AT_description = 0x5a, - DW_AT_binary_scale = 0x5b, - DW_AT_decimal_scale = 0x5c, - DW_AT_small = 0x5d, - DW_AT_decimal_sign = 0x5e, - DW_AT_digit_count = 0x5f, - DW_AT_picture_string = 0x60, - DW_AT_mutable = 0x61, - DW_AT_threads_scaled = 0x62, - DW_AT_explicit = 0x63, - DW_AT_object_pointer = 0x64, - DW_AT_endianity = 0x65, - DW_AT_elemental = 0x66, - DW_AT_pure = 0x67, - DW_AT_recursive = 0x68, - DW_AT_signature = 0x69, - DW_AT_main_subprogram = 0x6a, - DW_AT_data_bit_offset = 0x6b, - DW_AT_const_expr = 0x6c, - DW_AT_enum_class = 0x6d, - DW_AT_linkage_name = 0x6e, -#ifdef INCLUDE_DWARF5_VALUES - // Values to be added in Dwarf 5. Final value not yet specified. Values listed - // may be different than other implementations. Use with caution. - // TODO Update these values when Dwarf 5 is released. - DW_AT_call_site_value = 0x6f, - DW_AT_call_site_data_value = 0x70, - DW_AT_call_site_target = 0x71, - DW_AT_call_site_target_clobbered = 0x72, - DW_AT_tail_call = 0x73, - DW_AT_all_tail_call_sites = 0x74, - DW_AT_all_call_sites = 0x75, - DW_AT_all_source_call_sites = 0x76, - DW_AT_call_site_parameter = 0x77, - DW_AT_tail_call = 0x78, - DW_AT_all_tail_call_sites = 0x79, - DW_AT_all_call_sites = 0x7a, - DW_AT_all_source_call_sites = 0x7b, - DW_AT_rank = 0x7c, - DW_AT_string_bitsize = 0x7d, - DW_AT_string_byte_size = 0x7e, - DW_AT_reference = 0x7f, - DW_AT_rvalue_reference = 0x80, - DW_AT_noreturn = 0x81, - DW_AT_alignment = 0x82, -#endif - DW_AT_lo_user = 0x2000, - DW_AT_hi_user = 0xffff -}; - -enum Form : uint8_t { - DW_FORM_addr = 0x01, - DW_FORM_block2 = 0x03, - DW_FORM_block4 = 0x04, - DW_FORM_data2 = 0x05, - DW_FORM_data4 = 0x06, - DW_FORM_data8 = 0x07, - DW_FORM_string = 0x08, - DW_FORM_block = 0x09, - DW_FORM_block1 = 0x0a, - DW_FORM_data1 = 0x0b, - DW_FORM_flag = 0x0c, - DW_FORM_sdata = 0x0d, - DW_FORM_strp = 0x0e, - DW_FORM_udata = 0x0f, - DW_FORM_ref_addr = 0x10, - DW_FORM_ref1 = 0x11, - DW_FORM_ref2 = 0x12, - DW_FORM_ref4 = 0x13, - DW_FORM_ref8 = 0x14, - DW_FORM_ref_udata = 0x15, - DW_FORM_indirect = 0x16, - DW_FORM_sec_offset = 0x17, - DW_FORM_exprloc = 0x18, - DW_FORM_flag_present = 0x19, - DW_FORM_ref_sig8 = 0x20 -}; - -enum Operation : uint16_t { - DW_OP_addr = 0x03, - DW_OP_deref = 0x06, - DW_OP_const1u = 0x08, - DW_OP_const1s = 0x09, - DW_OP_const2u = 0x0a, - DW_OP_const2s = 0x0b, - DW_OP_const4u = 0x0c, - DW_OP_const4s = 0x0d, - DW_OP_const8u = 0x0e, - DW_OP_const8s = 0x0f, - DW_OP_constu = 0x10, - DW_OP_consts = 0x11, - DW_OP_dup = 0x12, - DW_OP_drop = 0x13, - DW_OP_over = 0x14, - DW_OP_pick = 0x15, - DW_OP_swap = 0x16, - DW_OP_rot = 0x17, - DW_OP_xderef = 0x18, - DW_OP_abs = 0x19, - DW_OP_and = 0x1a, - DW_OP_div = 0x1b, - DW_OP_minus = 0x1c, - DW_OP_mod = 0x1d, - DW_OP_mul = 0x1e, - DW_OP_neg = 0x1f, - DW_OP_not = 0x20, - DW_OP_or = 0x21, - DW_OP_plus = 0x22, - DW_OP_plus_uconst = 0x23, - DW_OP_shl = 0x24, - DW_OP_shr = 0x25, - DW_OP_shra = 0x26, - DW_OP_xor = 0x27, - DW_OP_skip = 0x2f, - DW_OP_bra = 0x28, - DW_OP_eq = 0x29, - DW_OP_ge = 0x2a, - DW_OP_gt = 0x2b, - DW_OP_le = 0x2c, - DW_OP_lt = 0x2d, - DW_OP_ne = 0x2e, - DW_OP_lit0 = 0x30, - DW_OP_lit1 = 0x31, - DW_OP_lit2 = 0x32, - DW_OP_lit3 = 0x33, - DW_OP_lit4 = 0x34, - DW_OP_lit5 = 0x35, - DW_OP_lit6 = 0x36, - DW_OP_lit7 = 0x37, - DW_OP_lit8 = 0x38, - DW_OP_lit9 = 0x39, - DW_OP_lit10 = 0x3a, - DW_OP_lit11 = 0x3b, - DW_OP_lit12 = 0x3c, - DW_OP_lit13 = 0x3d, - DW_OP_lit14 = 0x3e, - DW_OP_lit15 = 0x3f, - DW_OP_lit16 = 0x40, - DW_OP_lit17 = 0x41, - DW_OP_lit18 = 0x42, - DW_OP_lit19 = 0x43, - DW_OP_lit20 = 0x44, - DW_OP_lit21 = 0x45, - DW_OP_lit22 = 0x46, - DW_OP_lit23 = 0x47, - DW_OP_lit24 = 0x48, - DW_OP_lit25 = 0x49, - DW_OP_lit26 = 0x4a, - DW_OP_lit27 = 0x4b, - DW_OP_lit28 = 0x4c, - DW_OP_lit29 = 0x4d, - DW_OP_lit30 = 0x4e, - DW_OP_lit31 = 0x4f, - DW_OP_reg0 = 0x50, - DW_OP_reg1 = 0x51, - DW_OP_reg2 = 0x52, - DW_OP_reg3 = 0x53, - DW_OP_reg4 = 0x54, - DW_OP_reg5 = 0x55, - DW_OP_reg6 = 0x56, - DW_OP_reg7 = 0x57, - DW_OP_reg8 = 0x58, - DW_OP_reg9 = 0x59, - DW_OP_reg10 = 0x5a, - DW_OP_reg11 = 0x5b, - DW_OP_reg12 = 0x5c, - DW_OP_reg13 = 0x5d, - DW_OP_reg14 = 0x5e, - DW_OP_reg15 = 0x5f, - DW_OP_reg16 = 0x60, - DW_OP_reg17 = 0x61, - DW_OP_reg18 = 0x62, - DW_OP_reg19 = 0x63, - DW_OP_reg20 = 0x64, - DW_OP_reg21 = 0x65, - DW_OP_reg22 = 0x66, - DW_OP_reg23 = 0x67, - DW_OP_reg24 = 0x68, - DW_OP_reg25 = 0x69, - DW_OP_reg26 = 0x6a, - DW_OP_reg27 = 0x6b, - DW_OP_reg28 = 0x6c, - DW_OP_reg29 = 0x6d, - DW_OP_reg30 = 0x6e, - DW_OP_reg31 = 0x6f, - DW_OP_breg0 = 0x70, - DW_OP_breg1 = 0x71, - DW_OP_breg2 = 0x72, - DW_OP_breg3 = 0x73, - DW_OP_breg4 = 0x74, - DW_OP_breg5 = 0x75, - DW_OP_breg6 = 0x76, - DW_OP_breg7 = 0x77, - DW_OP_breg8 = 0x78, - DW_OP_breg9 = 0x79, - DW_OP_breg10 = 0x7a, - DW_OP_breg11 = 0x7b, - DW_OP_breg12 = 0x7c, - DW_OP_breg13 = 0x7d, - DW_OP_breg14 = 0x7e, - DW_OP_breg15 = 0x7f, - DW_OP_breg16 = 0x80, - DW_OP_breg17 = 0x81, - DW_OP_breg18 = 0x82, - DW_OP_breg19 = 0x83, - DW_OP_breg20 = 0x84, - DW_OP_breg21 = 0x85, - DW_OP_breg22 = 0x86, - DW_OP_breg23 = 0x87, - DW_OP_breg24 = 0x88, - DW_OP_breg25 = 0x89, - DW_OP_breg26 = 0x8a, - DW_OP_breg27 = 0x8b, - DW_OP_breg28 = 0x8c, - DW_OP_breg29 = 0x8d, - DW_OP_breg30 = 0x8e, - DW_OP_breg31 = 0x8f, - DW_OP_regx = 0x90, - DW_OP_fbreg = 0x91, - DW_OP_bregx = 0x92, - DW_OP_piece = 0x93, - DW_OP_deref_size = 0x94, - DW_OP_xderef_size = 0x95, - DW_OP_nop = 0x96, - DW_OP_push_object_address = 0x97, - DW_OP_call2 = 0x98, - DW_OP_call4 = 0x99, - DW_OP_call_ref = 0x9a, - DW_OP_form_tls_address = 0x9b, - DW_OP_call_frame_cfa = 0x9c, - DW_OP_bit_piece = 0x9d, - DW_OP_implicit_value = 0x9e, - DW_OP_stack_value = 0x9f, -#ifdef INCLUDE_DWARF5_VALUES - // Values to be added in Dwarf 5. Final value not yet specified. Values listed - // may be different than other implementations. Use with caution. - // TODO Update these values when Dwarf 5 is released. - DW_OP_entry_value = 0xa0, - DW_OP_const_type = 0xa1, - DW_OP_regval_type = 0xa2, - DW_OP_deref_type = 0xa3, - DW_OP_xderef_type = 0xa4, - DW_OP_convert = 0xa5, - DW_OP_reinterpret = 0xa6, -#endif - DW_OP_lo_user = 0xe0, - DW_OP_hi_user = 0xff -}; - -enum BaseTypeEncoding : uint8_t { - DW_ATE_address = 0x01, - DW_ATE_boolean = 0x02, - DW_ATE_complex_float = 0x03, - DW_ATE_float = 0x04, - DW_ATE_signed = 0x05, - DW_ATE_signed_char = 0x06, - DW_ATE_unsigned = 0x07, - DW_ATE_unsigned_char = 0x08, - DW_ATE_imaginary_float = 0x09, - DW_ATE_packed_decimal = 0x0a, - DW_ATE_numeric_string = 0x0b, - DW_ATE_edited = 0x0c, - DW_ATE_signed_fixed = 0x0d, - DW_ATE_unsigned_fixed = 0x0e, - DW_ATE_decimal_float = 0x0f, - DW_ATE_UTF = 0x10, - DW_ATE_lo_user = 0x80, - DW_ATE_hi_user = 0xff -}; - -enum DecimalSign : uint8_t { - DW_DS_unsigned = 0x01, - DW_DS_leading_overpunch = 0x02, - DW_DS_trailing_overpunch = 0x03, - DW_DS_leading_separate = 0x04, - DW_DS_trailing_separate = 0x05 -}; - -enum Endianity : uint8_t { - DW_END_default = 0x00, - DW_END_big = 0x01, - DW_END_little = 0x02, - DW_END_lo_user = 0x40, - DW_END_hi_user = 0xff -}; - -enum Accessibility : uint8_t { - DW_ACCESS_public = 0x01, - DW_ACCESS_protected = 0x02, - DW_ACCESS_private = 0x03 -}; - -enum Visibility : uint8_t { - DW_VIS_local = 0x01, - DW_VIS_exported = 0x02, - DW_VIS_qualified = 0x03 -}; - -enum Virtuality : uint8_t { - DW_VIRTUALITY_none = 0x00, - DW_VIRTUALITY_virtual = 0x01, - DW_VIRTUALITY_pure_virtual = 0x02 -}; - -enum Language { - DW_LANG_C89 = 0x01, - DW_LANG_C = 0x02, - DW_LANG_Ada83 = 0x03, - DW_LANG_C_plus_plus = 0x04, - DW_LANG_Cobol74 = 0x05, - DW_LANG_Cobol85 = 0x06, - DW_LANG_Fortran77 = 0x07, - DW_LANG_Fortran90 = 0x08, - DW_LANG_Pascal83 = 0x09, - DW_LANG_Modula2 = 0x0a, - DW_LANG_Java = 0x0b, - DW_LANG_C99 = 0x0c, - DW_LANG_Ada95 = 0x0d, - DW_LANG_Fortran95 = 0x0e, - DW_LANG_PLI = 0x0f, - DW_LANG_ObjC = 0x10, - DW_LANG_ObjC_plus_plus = 0x11, - DW_LANG_UPC = 0x12, - DW_LANG_D = 0x13, - DW_LANG_Python = 0x14, -#ifdef INCLUDE_DWARF5_VALUES - // Values to be added in Dwarf 5. Final value not yet specified. Values listed - // may be different than other implementations. Use with caution. - // TODO Update these values when Dwarf 5 is released. - DW_LANG_OpenCL = 0x15, - DW_LANG_Go = 0x16, - DW_LANG_Modula3 = 0x17, - DW_LANG_Haskell = 0x18, - DW_LANG_C_plus_plus_03 = 0x19, - DW_LANG_C_plus_plus_11 = 0x1a, - DW_LANG_OCaml = 0x1b, - DW_LANG_Rust = 0x1c, - DW_LANG_C11 = 0x1d, - DW_LANG_Swift = 0x1e, - DW_LANG_Julia = 0x1f, -#endif - DW_LANG_lo_user = 0x8000, - DW_LANG_hi_user = 0xffff -}; - -enum Identifier : uint8_t { - DW_ID_case_sensitive = 0x00, - DW_ID_up_case = 0x01, - DW_ID_down_case = 0x02, - DW_ID_case_insensitive = 0x03 -}; - -enum CallingConvention : uint8_t { - DW_CC_normal = 0x01, - DW_CC_program = 0x02, - DW_CC_nocall = 0x03, - DW_CC_lo_user = 0x40, - DW_CC_hi_user = 0xff -}; - -enum Inline : uint8_t { - DW_INL_not_inlined = 0x00, - DW_INL_inlined = 0x01, - DW_INL_declared_not_inlined = 0x02, - DW_INL_declared_inlined = 0x03 -}; - -enum ArrayOrdering : uint8_t { - DW_ORD_row_major = 0x00, - DW_ORD_col_major = 0x01 -}; - -enum DiscriminantList : uint8_t { - DW_DSC_label = 0x00, - DW_DSC_range = 0x01 -}; - -enum LineNumberOpcode : uint8_t { - DW_LNS_copy = 0x01, - DW_LNS_advance_pc = 0x02, - DW_LNS_advance_line = 0x03, - DW_LNS_set_file = 0x04, - DW_LNS_set_column = 0x05, - DW_LNS_negate_stmt = 0x06, - DW_LNS_set_basic_block = 0x07, - DW_LNS_const_add_pc = 0x08, - DW_LNS_fixed_advance_pc = 0x09, - DW_LNS_set_prologue_end = 0x0a, - DW_LNS_set_epilogue_begin = 0x0b, - DW_LNS_set_isa = 0x0c -}; - -enum LineNumberExtendedOpcode : uint8_t { - DW_LNE_end_sequence = 0x01, - DW_LNE_set_address = 0x02, - DW_LNE_define_file = 0x03, - DW_LNE_set_discriminator = 0x04, - DW_LNE_lo_user = 0x80, - DW_LNE_hi_user = 0xff -}; - -#ifdef INCLUDE_DWARF5_VALUES -enum LineNumberFormat : uint8_t { - // Values to be added in Dwarf 5. Final value not yet specified. Values listed - // may be different than other implementations. Use with caution. - // TODO Update these values when Dwarf 5 is released. - // - DW_LNF_path = 0x1, - DW_LNF_include_index = 0x2, - DW_LNF_timestamp = 0x3, - DW_LNF_size = 0x4, - DW_LNF_MD5 = 0x5, - DW_LNF_lo_user = 0x2000, - DW_LNF_hi_user = 0x3fff -}; -#endif - -enum MacroInfo : uint8_t { - DW_MACINFO_define = 0x01, - DW_MACINFO_undef = 0x02, - DW_MACINFO_start_file = 0x03, - DW_MACINFO_end_file = 0x04, - DW_MACINFO_vendor_ext = 0xff -}; - -#ifdef INCLUDE_DWARF5_VALUES -enum Macro : uint8_t { - // Values to be added in Dwarf 5. Final value not yet specified. Values listed - // may be different than other implementations. Use with caution. - // TODO Update these values when Dwarf 5 is released. - DW_MACRO_define = 0x01, - DW_MACRO_undef = 0x02, - DW_MACRO_start_file = 0x03, - DW_MACRO_end_file = 0x04, - DW_MACRO_define_indirect = 0x05, - DW_MACRO_undef_indirect = 0x06, - DW_MACRO_transparent_include = 0x07, - DW_MACRO_define_indirectx = 0x0b, - DW_MACRO_undef_indirectx = 0x0c, - DW_MACRO_lo_user = 0xe0, - DW_MACRO_hi_user = 0xff -}; -#endif - -const uint32_t CIE_ID_32 = 0xffffffff; -const uint64_t CIE_ID_64 = 0xffffffffffffffff; - -enum CallFrameInstruction : uint8_t { - DW_CFA_advance_loc = 0x40, - DW_CFA_offset = 0x80, - DW_CFA_restore = 0xc0, - DW_CFA_nop = 0x00, - DW_CFA_set_loc = 0x01, - DW_CFA_advance_loc1 = 0x02, - DW_CFA_advance_loc2 = 0x03, - DW_CFA_advance_loc4 = 0x04, - DW_CFA_offset_extended = 0x05, - DW_CFA_restore_extended = 0x06, - DW_CFA_undefined = 0x07, - DW_CFA_same_value = 0x08, - DW_CFA_register = 0x09, - DW_CFA_remember_state = 0x0a, - DW_CFA_restore_state = 0x0b, - DW_CFA_def_cfa = 0x0c, - DW_CFA_def_cfa_register = 0x0d, - DW_CFA_def_cfa_offset = 0x0e, - DW_CFA_def_cfa_expression = 0x0f, - DW_CFA_expression = 0x10, - DW_CFA_offset_extended_sf = 0x11, - DW_CFA_def_cfa_sf = 0x12, - DW_CFA_def_cfa_offset_sf = 0x13, - DW_CFA_val_offset = 0x14, - DW_CFA_val_offset_sf = 0x15, - DW_CFA_val_expression = 0x16, - DW_CFA_lo_user = 0x1c, - DW_CFA_hi_user = 0x3f -}; - -enum ExceptionHeaderValueFormat : uint8_t { - DW_EH_PE_native = 0x00, - DW_EH_PE_uleb128 = 0x01, - DW_EH_PE_udata2 = 0x02, - DW_EH_PE_udata4 = 0x03, - DW_EH_PE_udata8 = 0x04, - DW_EH_PE_sleb128 = 0x09, - DW_EH_PE_sdata2 = 0x0A, - DW_EH_PE_sdata4 = 0x0B, - DW_EH_PE_sdata8 = 0x0C, - DW_EH_PE_omit = 0xFF, -}; - -enum ExceptionHeaderValueApplication : uint8_t { - DW_EH_PE_absptr = 0x00, - DW_EH_PE_pcrel = 0x10, - DW_EH_PE_textrel = 0x20, - DW_EH_PE_datarel = 0x30, - DW_EH_PE_funcrel = 0x40, - DW_EH_PE_aligned = 0x50, -}; - -enum CFIFormat : uint8_t { - // This is the original format as defined by the specification. - // It is used for the .debug_frame section. - DW_DEBUG_FRAME_FORMAT, - // Slightly modified format used for the .eh_frame section. - DW_EH_FRAME_FORMAT -}; - -} // namespace dwarf -} // namespace art - -#endif // ART_COMPILER_DEBUG_DWARF_DWARF_CONSTANTS_H_ diff --git a/compiler/debug/dwarf/dwarf_test.cc b/compiler/debug/dwarf/dwarf_test.cc index 933034f593..5946af8d55 100644 --- a/compiler/debug/dwarf/dwarf_test.cc +++ b/compiler/debug/dwarf/dwarf_test.cc @@ -16,11 +16,11 @@ #include "dwarf_test.h" -#include "debug/dwarf/debug_frame_opcode_writer.h" -#include "debug/dwarf/debug_info_entry_writer.h" -#include "debug/dwarf/debug_line_opcode_writer.h" -#include "debug/dwarf/dwarf_constants.h" -#include "debug/dwarf/headers.h" +#include "dwarf/debug_frame_opcode_writer.h" +#include "dwarf/debug_info_entry_writer.h" +#include "dwarf/debug_line_opcode_writer.h" +#include "dwarf/dwarf_constants.h" +#include "dwarf/headers.h" #include "gtest/gtest.h" namespace art { @@ -29,8 +29,6 @@ namespace dwarf { // Run the tests only on host since we need objdump. #ifndef ART_TARGET_ANDROID -constexpr CFIFormat kCFIFormat = DW_DEBUG_FRAME_FORMAT; - TEST_F(DwarfTest, DebugFrame) { const bool is64bit = false; @@ -75,13 +73,11 @@ TEST_F(DwarfTest, DebugFrame) { opcodes.SameValue(reg); DW_CHECK_NEXT("DW_CFA_same_value: r6 (esi)"); opcodes.Offset(Reg(0x3F), -offset); - // Bad register likely means that it does not exist on x86, - // but we want to test high register numbers anyway. - DW_CHECK_NEXT("DW_CFA_offset: bad register: r63 at cfa-40000"); + DW_CHECK_NEXT("DW_CFA_offset: r63 at cfa-40000"); opcodes.Offset(Reg(0x40), -offset); - DW_CHECK_NEXT("DW_CFA_offset_extended: bad register: r64 at cfa-40000"); + DW_CHECK_NEXT("DW_CFA_offset_extended: r64 at cfa-40000"); opcodes.Offset(Reg(0x40), offset); - DW_CHECK_NEXT("DW_CFA_offset_extended_sf: bad register: r64 at cfa+40000"); + DW_CHECK_NEXT("DW_CFA_offset_extended_sf: r64 at cfa+40000"); opcodes.ValOffset(reg, -offset); DW_CHECK_NEXT("DW_CFA_val_offset: r6 (esi) at cfa-40000"); opcodes.ValOffset(reg, offset); @@ -122,31 +118,30 @@ TEST_F(DwarfTest, DebugFrame) { DW_CHECK_NEXT("DW_CFA_restore: r5 (ebp)"); DebugFrameOpCodeWriter<> initial_opcodes; - WriteCIE(is64bit, Reg(is64bit ? 16 : 8), - initial_opcodes, kCFIFormat, &debug_frame_data_); - std::vector<uintptr_t> debug_frame_patches; - std::vector<uintptr_t> expected_patches = { 28 }; - WriteFDE(is64bit, 0, 0, 0x01000000, 0x01000000, ArrayRef<const uint8_t>(*opcodes.data()), - kCFIFormat, 0, &debug_frame_data_, &debug_frame_patches); + WriteCIE(is64bit, Reg(is64bit ? 16 : 8), initial_opcodes, &debug_frame_data_); + WriteFDE(is64bit, + /* cie_pointer= */ 0, + 0x01000000, + 0x01000000, + ArrayRef<const uint8_t>(*opcodes.data()), + &debug_frame_data_); - EXPECT_EQ(expected_patches, debug_frame_patches); CheckObjdumpOutput(is64bit, "-W"); } -TEST_F(DwarfTest, DebugFrame64) { +TEST_F(DwarfTest, DISABLED_DebugFrame64) { constexpr bool is64bit = true; DebugFrameOpCodeWriter<> initial_opcodes; - WriteCIE(is64bit, Reg(16), - initial_opcodes, kCFIFormat, &debug_frame_data_); + WriteCIE(is64bit, Reg(16), initial_opcodes, &debug_frame_data_); DebugFrameOpCodeWriter<> opcodes; - std::vector<uintptr_t> debug_frame_patches; - std::vector<uintptr_t> expected_patches = { 32 }; - WriteFDE(is64bit, 0, 0, 0x0100000000000000, 0x0200000000000000, + WriteFDE(is64bit, + /* cie_pointer= */ 0, + 0x0100000000000000, + 0x0200000000000000, ArrayRef<const uint8_t>(*opcodes.data()), - kCFIFormat, 0, &debug_frame_data_, &debug_frame_patches); + &debug_frame_data_); DW_CHECK("FDE cie=00000000 pc=100000000000000..300000000000000"); - EXPECT_EQ(expected_patches, debug_frame_patches); CheckObjdumpOutput(is64bit, "-W"); } @@ -176,17 +171,18 @@ TEST_F(DwarfTest, x86_64_RegisterMapping) { DW_CHECK_NEXT("DW_CFA_offset: r14 (r14)"); DW_CHECK_NEXT("DW_CFA_offset: r15 (r15)"); DebugFrameOpCodeWriter<> initial_opcodes; - WriteCIE(is64bit, Reg(16), - initial_opcodes, kCFIFormat, &debug_frame_data_); - std::vector<uintptr_t> debug_frame_patches; - WriteFDE(is64bit, 0, 0, 0x0100000000000000, 0x0200000000000000, + WriteCIE(is64bit, Reg(16), initial_opcodes, &debug_frame_data_); + WriteFDE(is64bit, + /* cie_pointer= */ 0, + 0x0100000000000000, + 0x0200000000000000, ArrayRef<const uint8_t>(*opcodes.data()), - kCFIFormat, 0, &debug_frame_data_, &debug_frame_patches); + &debug_frame_data_); CheckObjdumpOutput(is64bit, "-W"); } -TEST_F(DwarfTest, DebugLine) { +TEST_F(DwarfTest, DISABLED_DebugLine) { const bool is64bit = false; const int code_factor_bits = 1; DebugLineOpCodeWriter<> opcodes(is64bit, code_factor_bits); @@ -236,12 +232,8 @@ TEST_F(DwarfTest, DebugLine) { DW_CHECK_NEXT("Entry\tDir\tTime\tSize\tName"); DW_CHECK_NEXT("1\t0\t1000\t2000\tfile.c"); - std::vector<uintptr_t> debug_line_patches; - std::vector<uintptr_t> expected_patches = { 87 }; - WriteDebugLineTable(include_directories, files, opcodes, - 0, &debug_line_data_, &debug_line_patches); + WriteDebugLineTable(include_directories, files, opcodes, &debug_line_data_); - EXPECT_EQ(expected_patches, debug_line_patches); CheckObjdumpOutput(is64bit, "-W"); } @@ -276,9 +268,7 @@ TEST_F(DwarfTest, DebugLineSpecialOpcodes) { std::vector<std::string> directories; std::vector<FileEntry> files = { { "file.c", 0, 1000, 2000 } }; - std::vector<uintptr_t> debug_line_patches; - WriteDebugLineTable(directories, files, opcodes, - 0, &debug_line_data_, &debug_line_patches); + WriteDebugLineTable(directories, files, opcodes, &debug_line_data_); CheckObjdumpOutput(is64bit, "-W -WL"); } @@ -332,12 +322,8 @@ TEST_F(DwarfTest, DebugInfo) { DW_CHECK_NEXT("DW_AT_high_pc DW_FORM_addr"); DW_CHECK("3 DW_TAG_compile_unit [no children]"); - std::vector<uintptr_t> debug_info_patches; - std::vector<uintptr_t> expected_patches = { 16, 20, 29, 33, 42, 46 }; - dwarf::WriteDebugInfoCU(0 /* debug_abbrev_offset */, info, - 0, &debug_info_data_, &debug_info_patches); + dwarf::WriteDebugInfoCU(/* debug_abbrev_offset= */ 0, info, &debug_info_data_); - EXPECT_EQ(expected_patches, debug_info_patches); CheckObjdumpOutput(is64bit, "-W"); } diff --git a/compiler/debug/dwarf/dwarf_test.h b/compiler/debug/dwarf/dwarf_test.h index 9a7c604ca1..e51f807dd4 100644 --- a/compiler/debug/dwarf/dwarf_test.h +++ b/compiler/debug/dwarf/dwarf_test.h @@ -28,10 +28,10 @@ #include "base/os.h" #include "base/unix_file/fd_file.h" -#include "common_runtime_test.h" +#include "common_compiler_test.h" +#include "elf/elf_builder.h" #include "gtest/gtest.h" -#include "linker/elf_builder.h" -#include "linker/file_output_stream.h" +#include "stream/file_output_stream.h" namespace art { namespace dwarf { @@ -39,7 +39,7 @@ namespace dwarf { #define DW_CHECK(substring) Check(substring, false, __FILE__, __LINE__) #define DW_CHECK_NEXT(substring) Check(substring, true, __FILE__, __LINE__) -class DwarfTest : public CommonRuntimeTest { +class DwarfTest : public CommonCompilerTest { public: static constexpr bool kPrintObjdumpOutput = false; // debugging. @@ -63,8 +63,8 @@ class DwarfTest : public CommonRuntimeTest { InstructionSet isa = (sizeof(typename ElfTypes::Addr) == 8) ? InstructionSet::kX86_64 : InstructionSet::kX86; ScratchFile file; - linker::FileOutputStream output_stream(file.GetFile()); - linker::ElfBuilder<ElfTypes> builder(isa, nullptr, &output_stream); + FileOutputStream output_stream(file.GetFile()); + ElfBuilder<ElfTypes> builder(isa, &output_stream); builder.Start(); if (!debug_info_data_.empty()) { builder.WriteSection(".debug_info", &debug_info_data_); diff --git a/compiler/debug/dwarf/expression.h b/compiler/debug/dwarf/expression.h deleted file mode 100644 index fafc0462d6..0000000000 --- a/compiler/debug/dwarf/expression.h +++ /dev/null @@ -1,121 +0,0 @@ -/* - * Copyright (C) 2016 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ART_COMPILER_DEBUG_DWARF_EXPRESSION_H_ -#define ART_COMPILER_DEBUG_DWARF_EXPRESSION_H_ - -#include <cstddef> -#include <cstdint> - -#include "debug/dwarf/dwarf_constants.h" -#include "debug/dwarf/writer.h" - -namespace art { -namespace dwarf { - -// Writer for DWARF expressions which are used in .debug_info and .debug_loc sections. -// See the DWARF specification for the precise meaning of the opcodes. -// If multiple equivalent encodings are possible, it will choose the most compact one. -// The writer is not exhaustive - it only implements opcodes we have needed so far. -class Expression : private Writer<> { - public: - using Writer<>::data; - using Writer<>::size; - - // Push signed integer on the stack. - void WriteOpConsts(int32_t value) { - if (0 <= value && value < 32) { - PushUint8(DW_OP_lit0 + value); - } else { - PushUint8(DW_OP_consts); - PushSleb128(value); - } - } - - // Push unsigned integer on the stack. - void WriteOpConstu(uint32_t value) { - if (value < 32) { - PushUint8(DW_OP_lit0 + value); - } else { - PushUint8(DW_OP_constu); - PushUleb128(value); - } - } - - // Variable is stored in given register. - void WriteOpReg(uint32_t dwarf_reg_num) { - if (dwarf_reg_num < 32) { - PushUint8(DW_OP_reg0 + dwarf_reg_num); - } else { - PushUint8(DW_OP_regx); - PushUleb128(dwarf_reg_num); - } - } - - // Variable is stored on stack. Also see DW_AT_frame_base. - void WriteOpFbreg(int32_t stack_offset) { - PushUint8(DW_OP_fbreg); - PushSleb128(stack_offset); - } - - // The variable is stored in multiple locations (pieces). - void WriteOpPiece(uint32_t num_bytes) { - PushUint8(DW_OP_piece); - PushUleb128(num_bytes); - } - - // Loads 32-bit or 64-bit value depending on architecture. - void WriteOpDeref() { PushUint8(DW_OP_deref); } - - // Loads value of given byte size. - void WriteOpDerefSize(uint8_t num_bytes) { - PushUint8(DW_OP_deref_size); - PushUint8(num_bytes); - } - - // Pop two values and push their sum. - void WriteOpPlus() { PushUint8(DW_OP_plus); } - - // Add constant value to value on top of stack. - void WriteOpPlusUconst(uint32_t offset) { - PushUint8(DW_OP_plus_uconst); - PushUleb128(offset); - } - - // Negate top of stack. - void WriteOpNeg() { PushUint8(DW_OP_neg); } - - // Pop two values and push their bitwise-AND. - void WriteOpAnd() { PushUint8(DW_OP_and); } - - // Push stack base pointer as determined from .debug_frame. - void WriteOpCallFrameCfa() { PushUint8(DW_OP_call_frame_cfa); } - - // Push address of the variable we are working with. - void WriteOpPushObjectAddress() { PushUint8(DW_OP_push_object_address); } - - // Return the top stack as the value of the variable. - // Otherwise, the top of stack is the variable's location. - void WriteOpStackValue() { PushUint8(DW_OP_stack_value); } - - explicit Expression(std::vector<uint8_t>* buffer) : Writer<>(buffer) { - buffer->clear(); - } -}; -} // namespace dwarf -} // namespace art - -#endif // ART_COMPILER_DEBUG_DWARF_EXPRESSION_H_ diff --git a/compiler/debug/dwarf/headers.h b/compiler/debug/dwarf/headers.h deleted file mode 100644 index 28f108423e..0000000000 --- a/compiler/debug/dwarf/headers.h +++ /dev/null @@ -1,207 +0,0 @@ -/* - * Copyright (C) 2015 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ART_COMPILER_DEBUG_DWARF_HEADERS_H_ -#define ART_COMPILER_DEBUG_DWARF_HEADERS_H_ - -#include <cstdint> - -#include "base/array_ref.h" -#include "debug/dwarf/debug_frame_opcode_writer.h" -#include "debug/dwarf/debug_info_entry_writer.h" -#include "debug/dwarf/debug_line_opcode_writer.h" -#include "debug/dwarf/dwarf_constants.h" -#include "debug/dwarf/register.h" -#include "debug/dwarf/writer.h" - -namespace art { -namespace dwarf { - -// Note that all headers start with 32-bit length. -// DWARF also supports 64-bit lengths, but we never use that. -// It is intended to support very large debug sections (>4GB), -// and compilers are expected *not* to use it by default. -// In particular, it is not related to machine architecture. - -// Write common information entry (CIE) to .debug_frame or .eh_frame section. -template<typename Vector> -void WriteCIE(bool is64bit, - Reg return_address_register, - const DebugFrameOpCodeWriter<Vector>& opcodes, - CFIFormat format, - std::vector<uint8_t>* buffer) { - static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type"); - - Writer<> writer(buffer); - size_t cie_header_start_ = writer.data()->size(); - writer.PushUint32(0); // Length placeholder. - writer.PushUint32((format == DW_EH_FRAME_FORMAT) ? 0 : 0xFFFFFFFF); // CIE id. - writer.PushUint8(1); // Version. - writer.PushString("zR"); - writer.PushUleb128(DebugFrameOpCodeWriter<Vector>::kCodeAlignmentFactor); - writer.PushSleb128(DebugFrameOpCodeWriter<Vector>::kDataAlignmentFactor); - writer.PushUleb128(return_address_register.num()); // ubyte in DWARF2. - writer.PushUleb128(1); // z: Augmentation data size. - if (is64bit) { - if (format == DW_EH_FRAME_FORMAT) { - writer.PushUint8(DW_EH_PE_pcrel | DW_EH_PE_sdata8); // R: Pointer encoding. - } else { - DCHECK(format == DW_DEBUG_FRAME_FORMAT); - writer.PushUint8(DW_EH_PE_absptr | DW_EH_PE_udata8); // R: Pointer encoding. - } - } else { - if (format == DW_EH_FRAME_FORMAT) { - writer.PushUint8(DW_EH_PE_pcrel | DW_EH_PE_sdata4); // R: Pointer encoding. - } else { - DCHECK(format == DW_DEBUG_FRAME_FORMAT); - writer.PushUint8(DW_EH_PE_absptr | DW_EH_PE_udata4); // R: Pointer encoding. - } - } - writer.PushData(opcodes.data()); - writer.Pad(is64bit ? 8 : 4); - writer.UpdateUint32(cie_header_start_, writer.data()->size() - cie_header_start_ - 4); -} - -// Write frame description entry (FDE) to .debug_frame or .eh_frame section. -inline -void WriteFDE(bool is64bit, - uint64_t section_address, // Absolute address of the section. - uint64_t cie_address, // Absolute address of last CIE. - uint64_t code_address, - uint64_t code_size, - const ArrayRef<const uint8_t>& opcodes, - CFIFormat format, - uint64_t buffer_address, // Address of buffer in linked application. - std::vector<uint8_t>* buffer, - std::vector<uintptr_t>* patch_locations) { - CHECK_GE(cie_address, section_address); - CHECK_GE(buffer_address, section_address); - - Writer<> writer(buffer); - size_t fde_header_start = writer.data()->size(); - writer.PushUint32(0); // Length placeholder. - if (format == DW_EH_FRAME_FORMAT) { - uint32_t cie_pointer = (buffer_address + buffer->size()) - cie_address; - writer.PushUint32(cie_pointer); - } else { - DCHECK(format == DW_DEBUG_FRAME_FORMAT); - uint32_t cie_pointer = cie_address - section_address; - writer.PushUint32(cie_pointer); - } - if (format == DW_EH_FRAME_FORMAT) { - // .eh_frame encodes the location as relative address. - code_address -= buffer_address + buffer->size(); - } else { - DCHECK(format == DW_DEBUG_FRAME_FORMAT); - // Relocate code_address if it has absolute value. - patch_locations->push_back(buffer_address + buffer->size() - section_address); - } - if (is64bit) { - writer.PushUint64(code_address); - writer.PushUint64(code_size); - } else { - writer.PushUint32(code_address); - writer.PushUint32(code_size); - } - writer.PushUleb128(0); // Augmentation data size. - writer.PushData(opcodes.data(), opcodes.size()); - writer.Pad(is64bit ? 8 : 4); - writer.UpdateUint32(fde_header_start, writer.data()->size() - fde_header_start - 4); -} - -// Write compilation unit (CU) to .debug_info section. -template<typename Vector> -void WriteDebugInfoCU(uint32_t debug_abbrev_offset, - const DebugInfoEntryWriter<Vector>& entries, - size_t debug_info_offset, // offset from start of .debug_info. - std::vector<uint8_t>* debug_info, - std::vector<uintptr_t>* debug_info_patches) { - static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type"); - - Writer<> writer(debug_info); - size_t start = writer.data()->size(); - writer.PushUint32(0); // Length placeholder. - writer.PushUint16(4); // Version. - writer.PushUint32(debug_abbrev_offset); - writer.PushUint8(entries.Is64bit() ? 8 : 4); - size_t entries_offset = writer.data()->size(); - DCHECK_EQ(entries_offset, DebugInfoEntryWriter<Vector>::kCompilationUnitHeaderSize); - writer.PushData(entries.data()); - writer.UpdateUint32(start, writer.data()->size() - start - 4); - // Copy patch locations and make them relative to .debug_info section. - for (uintptr_t patch_location : entries.GetPatchLocations()) { - debug_info_patches->push_back(debug_info_offset + entries_offset + patch_location); - } -} - -struct FileEntry { - std::string file_name; - int directory_index; - int modification_time; - int file_size; -}; - -// Write line table to .debug_line section. -template<typename Vector> -void WriteDebugLineTable(const std::vector<std::string>& include_directories, - const std::vector<FileEntry>& files, - const DebugLineOpCodeWriter<Vector>& opcodes, - size_t debug_line_offset, // offset from start of .debug_line. - std::vector<uint8_t>* debug_line, - std::vector<uintptr_t>* debug_line_patches) { - static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type"); - - Writer<> writer(debug_line); - size_t header_start = writer.data()->size(); - writer.PushUint32(0); // Section-length placeholder. - writer.PushUint16(3); // .debug_line version. - size_t header_length_pos = writer.data()->size(); - writer.PushUint32(0); // Header-length placeholder. - writer.PushUint8(1 << opcodes.GetCodeFactorBits()); - writer.PushUint8(DebugLineOpCodeWriter<Vector>::kDefaultIsStmt ? 1 : 0); - writer.PushInt8(DebugLineOpCodeWriter<Vector>::kLineBase); - writer.PushUint8(DebugLineOpCodeWriter<Vector>::kLineRange); - writer.PushUint8(DebugLineOpCodeWriter<Vector>::kOpcodeBase); - static const int opcode_lengths[DebugLineOpCodeWriter<Vector>::kOpcodeBase] = { - 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1 }; - for (int i = 1; i < DebugLineOpCodeWriter<Vector>::kOpcodeBase; i++) { - writer.PushUint8(opcode_lengths[i]); - } - for (const std::string& directory : include_directories) { - writer.PushData(directory.data(), directory.size() + 1); - } - writer.PushUint8(0); // Terminate include_directories list. - for (const FileEntry& file : files) { - writer.PushData(file.file_name.data(), file.file_name.size() + 1); - writer.PushUleb128(file.directory_index); - writer.PushUleb128(file.modification_time); - writer.PushUleb128(file.file_size); - } - writer.PushUint8(0); // Terminate file list. - writer.UpdateUint32(header_length_pos, writer.data()->size() - header_length_pos - 4); - size_t opcodes_offset = writer.data()->size(); - writer.PushData(opcodes.data()); - writer.UpdateUint32(header_start, writer.data()->size() - header_start - 4); - // Copy patch locations and make them relative to .debug_line section. - for (uintptr_t patch_location : opcodes.GetPatchLocations()) { - debug_line_patches->push_back(debug_line_offset + opcodes_offset + patch_location); - } -} - -} // namespace dwarf -} // namespace art - -#endif // ART_COMPILER_DEBUG_DWARF_HEADERS_H_ diff --git a/compiler/debug/dwarf/register.h b/compiler/debug/dwarf/register.h deleted file mode 100644 index 24bacac292..0000000000 --- a/compiler/debug/dwarf/register.h +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Copyright (C) 2015 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ART_COMPILER_DEBUG_DWARF_REGISTER_H_ -#define ART_COMPILER_DEBUG_DWARF_REGISTER_H_ - -namespace art { -namespace dwarf { - -// Represents DWARF register. -class Reg { - public: - explicit Reg(int reg_num) : num_(reg_num) { } - int num() const { return num_; } - - // TODO: Arm S0–S31 register mapping is obsolescent. - // We should use VFP-v3/Neon D0-D31 mapping instead. - // However, D0 is aliased to pair of S0 and S1, so using that - // mapping we cannot easily say S0 is spilled and S1 is not. - // There are ways around this in DWARF but they are complex. - // It would be much simpler to always spill whole D registers. - // Arm64 mapping is correct since we already do this there. - // libunwind might struggle with the new mapping as well. - - static Reg ArmCore(int num) { return Reg(num); } // R0-R15. - static Reg ArmFp(int num) { return Reg(64 + num); } // S0–S31. - static Reg ArmDp(int num) { return Reg(256 + num); } // D0–D31. - static Reg Arm64Core(int num) { return Reg(num); } // X0-X31. - static Reg Arm64Fp(int num) { return Reg(64 + num); } // V0-V31. - static Reg MipsCore(int num) { return Reg(num); } - static Reg Mips64Core(int num) { return Reg(num); } - static Reg MipsFp(int num) { return Reg(32 + num); } - static Reg Mips64Fp(int num) { return Reg(32 + num); } - static Reg X86Core(int num) { return Reg(num); } - static Reg X86Fp(int num) { return Reg(21 + num); } - static Reg X86_64Core(int num) { - static const int map[8] = {0, 2, 1, 3, 7, 6, 4, 5}; - return Reg(num < 8 ? map[num] : num); - } - static Reg X86_64Fp(int num) { return Reg(17 + num); } - - private: - int num_; -}; - -} // namespace dwarf -} // namespace art - -#endif // ART_COMPILER_DEBUG_DWARF_REGISTER_H_ diff --git a/compiler/debug/dwarf/writer.h b/compiler/debug/dwarf/writer.h deleted file mode 100644 index c09d97aa79..0000000000 --- a/compiler/debug/dwarf/writer.h +++ /dev/null @@ -1,184 +0,0 @@ -/* - * Copyright (C) 2015 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ART_COMPILER_DEBUG_DWARF_WRITER_H_ -#define ART_COMPILER_DEBUG_DWARF_WRITER_H_ - -#include <type_traits> -#include <vector> - -#include <android-base/logging.h> - -#include "base/bit_utils.h" -#include "base/leb128.h" - -namespace art { -namespace dwarf { - -// The base class for all DWARF writers. -template <typename Vector = std::vector<uint8_t>> -class Writer { - static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type"); - - public: - void PushUint8(int value) { - DCHECK_GE(value, 0); - DCHECK_LE(value, UINT8_MAX); - data_->push_back(value & 0xff); - } - - void PushUint16(int value) { - DCHECK_GE(value, 0); - DCHECK_LE(value, UINT16_MAX); - data_->push_back((value >> 0) & 0xff); - data_->push_back((value >> 8) & 0xff); - } - - void PushUint32(uint32_t value) { - data_->push_back((value >> 0) & 0xff); - data_->push_back((value >> 8) & 0xff); - data_->push_back((value >> 16) & 0xff); - data_->push_back((value >> 24) & 0xff); - } - - void PushUint32(int value) { - DCHECK_GE(value, 0); - PushUint32(static_cast<uint32_t>(value)); - } - - void PushUint32(uint64_t value) { - DCHECK_LE(value, UINT32_MAX); - PushUint32(static_cast<uint32_t>(value)); - } - - void PushUint64(uint64_t value) { - data_->push_back((value >> 0) & 0xff); - data_->push_back((value >> 8) & 0xff); - data_->push_back((value >> 16) & 0xff); - data_->push_back((value >> 24) & 0xff); - data_->push_back((value >> 32) & 0xff); - data_->push_back((value >> 40) & 0xff); - data_->push_back((value >> 48) & 0xff); - data_->push_back((value >> 56) & 0xff); - } - - void PushInt8(int value) { - DCHECK_GE(value, INT8_MIN); - DCHECK_LE(value, INT8_MAX); - PushUint8(static_cast<uint8_t>(value)); - } - - void PushInt16(int value) { - DCHECK_GE(value, INT16_MIN); - DCHECK_LE(value, INT16_MAX); - PushUint16(static_cast<uint16_t>(value)); - } - - void PushInt32(int value) { - PushUint32(static_cast<uint32_t>(value)); - } - - void PushInt64(int64_t value) { - PushUint64(static_cast<uint64_t>(value)); - } - - // Variable-length encoders. - - void PushUleb128(uint32_t value) { - EncodeUnsignedLeb128(data_, value); - } - - void PushUleb128(int value) { - DCHECK_GE(value, 0); - EncodeUnsignedLeb128(data_, value); - } - - void PushSleb128(int value) { - EncodeSignedLeb128(data_, value); - } - - // Miscellaneous functions. - - void PushString(const char* value) { - data_->insert(data_->end(), value, value + strlen(value) + 1); - } - - void PushData(const uint8_t* ptr, size_t num_bytes) { - data_->insert(data_->end(), ptr, ptr + num_bytes); - } - - void PushData(const char* ptr, size_t num_bytes) { - data_->insert(data_->end(), ptr, ptr + num_bytes); - } - - void PushData(const Vector* buffer) { - data_->insert(data_->end(), buffer->begin(), buffer->end()); - } - - void UpdateUint32(size_t offset, uint32_t value) { - DCHECK_LT(offset + 3, data_->size()); - (*data_)[offset + 0] = (value >> 0) & 0xFF; - (*data_)[offset + 1] = (value >> 8) & 0xFF; - (*data_)[offset + 2] = (value >> 16) & 0xFF; - (*data_)[offset + 3] = (value >> 24) & 0xFF; - } - - void UpdateUint64(size_t offset, uint64_t value) { - DCHECK_LT(offset + 7, data_->size()); - (*data_)[offset + 0] = (value >> 0) & 0xFF; - (*data_)[offset + 1] = (value >> 8) & 0xFF; - (*data_)[offset + 2] = (value >> 16) & 0xFF; - (*data_)[offset + 3] = (value >> 24) & 0xFF; - (*data_)[offset + 4] = (value >> 32) & 0xFF; - (*data_)[offset + 5] = (value >> 40) & 0xFF; - (*data_)[offset + 6] = (value >> 48) & 0xFF; - (*data_)[offset + 7] = (value >> 56) & 0xFF; - } - - void UpdateUleb128(size_t offset, uint32_t value) { - DCHECK_LE(offset + UnsignedLeb128Size(value), data_->size()); - UpdateUnsignedLeb128(data_->data() + offset, value); - } - - void Pop() { - return data_->pop_back(); - } - - void Pad(int alignment) { - DCHECK_NE(alignment, 0); - data_->resize(RoundUp(data_->size(), alignment), 0); - } - - const Vector* data() const { - return data_; - } - - size_t size() const { - return data_->size(); - } - - explicit Writer(Vector* buffer) : data_(buffer) { } - - private: - Vector* const data_; - - DISALLOW_COPY_AND_ASSIGN(Writer); -}; - -} // namespace dwarf -} // namespace art - -#endif // ART_COMPILER_DEBUG_DWARF_WRITER_H_ diff --git a/compiler/debug/elf_debug_frame_writer.h b/compiler/debug/elf_debug_frame_writer.h index 27b70c8caa..31bfed6d24 100644 --- a/compiler/debug/elf_debug_frame_writer.h +++ b/compiler/debug/elf_debug_frame_writer.h @@ -20,18 +20,18 @@ #include <vector> #include "arch/instruction_set.h" -#include "debug/dwarf/debug_frame_opcode_writer.h" -#include "debug/dwarf/dwarf_constants.h" -#include "debug/dwarf/headers.h" #include "debug/method_debug_info.h" -#include "linker/elf_builder.h" +#include "dwarf/debug_frame_opcode_writer.h" +#include "dwarf/dwarf_constants.h" +#include "dwarf/headers.h" +#include "elf/elf_builder.h" namespace art { namespace debug { -static void WriteCIE(InstructionSet isa, - dwarf::CFIFormat format, - std::vector<uint8_t>* buffer) { +static constexpr bool kWriteDebugFrameHdr = false; + +static void WriteCIE(InstructionSet isa, /*inout*/ std::vector<uint8_t>* buffer) { using Reg = dwarf::Reg; // Scratch registers should be marked as undefined. This tells the // debugger that its value in the previous frame is not recoverable. @@ -58,7 +58,7 @@ static void WriteCIE(InstructionSet isa, } } auto return_reg = Reg::ArmCore(14); // R14(LR). - WriteCIE(is64bit, return_reg, opcodes, format, buffer); + WriteCIE(is64bit, return_reg, opcodes, buffer); return; } case InstructionSet::kArm64: { @@ -81,7 +81,7 @@ static void WriteCIE(InstructionSet isa, } } auto return_reg = Reg::Arm64Core(30); // R30(LR). - WriteCIE(is64bit, return_reg, opcodes, format, buffer); + WriteCIE(is64bit, return_reg, opcodes, buffer); return; } case InstructionSet::kMips: @@ -105,7 +105,7 @@ static void WriteCIE(InstructionSet isa, } } auto return_reg = Reg::MipsCore(31); // R31(RA). - WriteCIE(is64bit, return_reg, opcodes, format, buffer); + WriteCIE(is64bit, return_reg, opcodes, buffer); return; } case InstructionSet::kX86: { @@ -131,7 +131,7 @@ static void WriteCIE(InstructionSet isa, } } auto return_reg = Reg::X86Core(8); // R8(EIP). - WriteCIE(is64bit, return_reg, opcodes, format, buffer); + WriteCIE(is64bit, return_reg, opcodes, buffer); return; } case InstructionSet::kX86_64: { @@ -157,7 +157,7 @@ static void WriteCIE(InstructionSet isa, } } auto return_reg = Reg::X86_64Core(16); // R16(RIP). - WriteCIE(is64bit, return_reg, opcodes, format, buffer); + WriteCIE(is64bit, return_reg, opcodes, buffer); return; } case InstructionSet::kNone: @@ -168,11 +168,8 @@ static void WriteCIE(InstructionSet isa, } template<typename ElfTypes> -void WriteCFISection(linker::ElfBuilder<ElfTypes>* builder, - const ArrayRef<const MethodDebugInfo>& method_infos, - dwarf::CFIFormat format, - bool write_oat_patches) { - CHECK(format == dwarf::DW_DEBUG_FRAME_FORMAT || format == dwarf::DW_EH_FRAME_FORMAT); +void WriteCFISection(ElfBuilder<ElfTypes>* builder, + const ArrayRef<const MethodDebugInfo>& method_infos) { typedef typename ElfTypes::Addr Elf_Addr; // The methods can be written in any order. @@ -199,80 +196,57 @@ void WriteCFISection(linker::ElfBuilder<ElfTypes>* builder, }); std::vector<uint32_t> binary_search_table; - std::vector<uintptr_t> patch_locations; - if (format == dwarf::DW_EH_FRAME_FORMAT) { + if (kWriteDebugFrameHdr) { binary_search_table.reserve(2 * sorted_method_infos.size()); - } else { - patch_locations.reserve(sorted_method_infos.size()); } - // Write .eh_frame/.debug_frame section. - const bool is_debug_frame = format == dwarf::DW_DEBUG_FRAME_FORMAT; - auto* cfi_section = (is_debug_frame ? builder->GetDebugFrame() : builder->GetEhFrame()); + // Write .debug_frame section. + auto* cfi_section = builder->GetDebugFrame(); { cfi_section->Start(); const bool is64bit = Is64BitInstructionSet(builder->GetIsa()); - const Elf_Addr cfi_address = (is_debug_frame ? 0 : cfi_section->GetAddress()); - const Elf_Addr cie_address = cfi_address; - Elf_Addr buffer_address = cfi_address; std::vector<uint8_t> buffer; // Small temporary buffer. - WriteCIE(builder->GetIsa(), format, &buffer); + WriteCIE(builder->GetIsa(), &buffer); cfi_section->WriteFully(buffer.data(), buffer.size()); - buffer_address += buffer.size(); buffer.clear(); for (const MethodDebugInfo* mi : sorted_method_infos) { DCHECK(!mi->deduped); DCHECK(!mi->cfi.empty()); const Elf_Addr code_address = mi->code_address + (mi->is_code_address_text_relative ? builder->GetText()->GetAddress() : 0); - if (format == dwarf::DW_EH_FRAME_FORMAT) { + if (kWriteDebugFrameHdr) { binary_search_table.push_back(dchecked_integral_cast<uint32_t>(code_address)); - binary_search_table.push_back(dchecked_integral_cast<uint32_t>(buffer_address)); + binary_search_table.push_back(cfi_section->GetPosition()); } - WriteFDE(is64bit, cfi_address, cie_address, - code_address, mi->code_size, - mi->cfi, format, buffer_address, &buffer, - &patch_locations); + dwarf::WriteFDE(is64bit, + /* cie_pointer= */ 0, + code_address, + mi->code_size, + mi->cfi, + &buffer); cfi_section->WriteFully(buffer.data(), buffer.size()); - buffer_address += buffer.size(); buffer.clear(); } cfi_section->End(); } - if (format == dwarf::DW_EH_FRAME_FORMAT) { - auto* header_section = builder->GetEhFrameHdr(); - header_section->Start(); - uint32_t header_address = dchecked_integral_cast<int32_t>(header_section->GetAddress()); - // Write .eh_frame_hdr section. - std::vector<uint8_t> buffer; - dwarf::Writer<> header(&buffer); + if (kWriteDebugFrameHdr) { + std::sort(binary_search_table.begin(), binary_search_table.end()); + + // Custom Android section. It is very similar to the official .eh_frame_hdr format. + std::vector<uint8_t> header_buffer; + dwarf::Writer<> header(&header_buffer); header.PushUint8(1); // Version. - // Encoding of .eh_frame pointer - libunwind does not honor datarel here, - // so we have to use pcrel which means relative to the pointer's location. - header.PushUint8(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4); - // Encoding of binary search table size. - header.PushUint8(dwarf::DW_EH_PE_udata4); - // Encoding of binary search table addresses - libunwind supports only this - // specific combination, which means relative to the start of .eh_frame_hdr. - header.PushUint8(dwarf::DW_EH_PE_datarel | dwarf::DW_EH_PE_sdata4); - // .eh_frame pointer - header.PushInt32(cfi_section->GetAddress() - (header_address + 4u)); - // Binary search table size (number of entries). + header.PushUint8(dwarf::DW_EH_PE_omit); // Encoding of .eh_frame pointer - none. + header.PushUint8(dwarf::DW_EH_PE_udata4); // Encoding of binary search table size. + header.PushUint8(dwarf::DW_EH_PE_udata4); // Encoding of binary search table data. header.PushUint32(dchecked_integral_cast<uint32_t>(binary_search_table.size()/2)); - header_section->WriteFully(buffer.data(), buffer.size()); - // Binary search table. - for (size_t i = 0; i < binary_search_table.size(); i++) { - // Make addresses section-relative since we know the header address now. - binary_search_table[i] -= header_address; - } + + auto* header_section = builder->GetDebugFrameHdr(); + header_section->Start(); + header_section->WriteFully(header_buffer.data(), header_buffer.size()); header_section->WriteFully(binary_search_table.data(), binary_search_table.size()); header_section->End(); - } else { - if (write_oat_patches) { - builder->WritePatches(".debug_frame.oat_patches", - ArrayRef<const uintptr_t>(patch_locations)); - } } } diff --git a/compiler/debug/elf_debug_info_writer.h b/compiler/debug/elf_debug_info_writer.h index 893cad288b..986c7e820f 100644 --- a/compiler/debug/elf_debug_info_writer.h +++ b/compiler/debug/elf_debug_info_writer.h @@ -22,45 +22,34 @@ #include <vector> #include "art_field-inl.h" -#include "debug/dwarf/debug_abbrev_writer.h" -#include "debug/dwarf/debug_info_entry_writer.h" #include "debug/elf_compilation_unit.h" #include "debug/elf_debug_loc_writer.h" #include "debug/method_debug_info.h" #include "dex/code_item_accessors-inl.h" #include "dex/dex_file-inl.h" #include "dex/dex_file.h" +#include "dwarf/debug_abbrev_writer.h" +#include "dwarf/debug_info_entry_writer.h" +#include "elf/elf_builder.h" #include "heap_poisoning.h" #include "linear_alloc.h" -#include "linker/elf_builder.h" #include "mirror/array.h" #include "mirror/class-inl.h" #include "mirror/class.h" #include "oat_file.h" +#include "obj_ptr-inl.h" namespace art { namespace debug { -typedef std::vector<DexFile::LocalInfo> LocalInfos; - -static void LocalInfoCallback(void* ctx, const DexFile::LocalInfo& entry) { - static_cast<LocalInfos*>(ctx)->push_back(entry); -} - static std::vector<const char*> GetParamNames(const MethodDebugInfo* mi) { std::vector<const char*> names; + DCHECK(mi->dex_file != nullptr); CodeItemDebugInfoAccessor accessor(*mi->dex_file, mi->code_item, mi->dex_method_index); if (accessor.HasCodeItem()) { - DCHECK(mi->dex_file != nullptr); - const uint8_t* stream = mi->dex_file->GetDebugInfoStream(accessor.DebugInfoOffset()); - if (stream != nullptr) { - DecodeUnsignedLeb128(&stream); // line. - uint32_t parameters_size = DecodeUnsignedLeb128(&stream); - for (uint32_t i = 0; i < parameters_size; ++i) { - uint32_t id = DecodeUnsignedLeb128P1(&stream); - names.push_back(mi->dex_file->StringDataByIdx(dex::StringIndex(id))); - } - } + accessor.VisitParameterNames([&](const dex::StringIndex& id) { + names.push_back(mi->dex_file->StringDataByIdx(id)); + }); } return names; } @@ -71,7 +60,7 @@ class ElfDebugInfoWriter { using Elf_Addr = typename ElfTypes::Addr; public: - explicit ElfDebugInfoWriter(linker::ElfBuilder<ElfTypes>* builder) + explicit ElfDebugInfoWriter(ElfBuilder<ElfTypes>* builder) : builder_(builder), debug_abbrev_(&debug_abbrev_buffer_) { } @@ -80,12 +69,8 @@ class ElfDebugInfoWriter { builder_->GetDebugInfo()->Start(); } - void End(bool write_oat_patches) { + void End() { builder_->GetDebugInfo()->End(); - if (write_oat_patches) { - builder_->WritePatches(".debug_info.oat_patches", - ArrayRef<const uintptr_t>(debug_info_patches_)); - } builder_->WriteSection(".debug_abbrev", &debug_abbrev_buffer_); if (!debug_loc_.empty()) { builder_->WriteSection(".debug_loc", &debug_loc_); @@ -96,8 +81,7 @@ class ElfDebugInfoWriter { } private: - linker::ElfBuilder<ElfTypes>* builder_; - std::vector<uintptr_t> debug_info_patches_; + ElfBuilder<ElfTypes>* builder_; std::vector<uint8_t> debug_abbrev_buffer_; dwarf::DebugAbbrevWriter<> debug_abbrev_; std::vector<uint8_t> debug_loc_; @@ -164,9 +148,9 @@ class ElfCompilationUnitWriter { DCHECK(mi->dex_file != nullptr); const DexFile* dex = mi->dex_file; CodeItemDebugInfoAccessor accessor(*dex, mi->code_item, mi->dex_method_index); - const DexFile::MethodId& dex_method = dex->GetMethodId(mi->dex_method_index); - const DexFile::ProtoId& dex_proto = dex->GetMethodPrototype(dex_method); - const DexFile::TypeList* dex_params = dex->GetProtoParameters(dex_proto); + const dex::MethodId& dex_method = dex->GetMethodId(mi->dex_method_index); + const dex::ProtoId& dex_proto = dex->GetMethodPrototype(dex_method); + const dex::TypeList* dex_params = dex->GetProtoParameters(dex_proto); const char* dex_class_desc = dex->GetMethodDeclaringClassDescriptor(dex_method); const bool is_static = (mi->access_flags & kAccStatic) != 0; @@ -204,14 +188,12 @@ class ElfCompilationUnitWriter { // Decode dex register locations for all stack maps. // It might be expensive, so do it just once and reuse the result. + std::unique_ptr<const CodeInfo> code_info; std::vector<DexRegisterMap> dex_reg_maps; if (accessor.HasCodeItem() && mi->code_info != nullptr) { - const CodeInfo code_info(mi->code_info); - CodeInfoEncoding encoding = code_info.ExtractEncoding(); - for (size_t s = 0; s < code_info.GetNumberOfStackMaps(encoding); ++s) { - const StackMap& stack_map = code_info.GetStackMapAt(s, encoding); - dex_reg_maps.push_back(code_info.GetDexRegisterMapOf( - stack_map, encoding, accessor.RegistersSize())); + code_info.reset(new CodeInfo(mi->code_info)); + for (StackMap stack_map : code_info->GetStackMaps()) { + dex_reg_maps.push_back(code_info->GetDexRegisterMapOf(stack_map)); } } @@ -259,11 +241,12 @@ class ElfCompilationUnitWriter { } // Write local variables. - LocalInfos local_infos; + std::vector<DexFile::LocalInfo> local_infos; if (accessor.DecodeDebugLocalInfo(is_static, mi->dex_method_index, - LocalInfoCallback, - &local_infos)) { + [&](const DexFile::LocalInfo& entry) { + local_infos.push_back(entry); + })) { for (const DexFile::LocalInfo& var : local_infos) { if (var.reg_ < accessor.RegistersSize() - accessor.InsSize()) { info_.StartTag(DW_TAG_variable); @@ -294,10 +277,9 @@ class ElfCompilationUnitWriter { CHECK_EQ(info_.Depth(), 0); std::vector<uint8_t> buffer; buffer.reserve(info_.data()->size() + KB); - const size_t offset = owner_->builder_->GetDebugInfo()->GetPosition(); // All compilation units share single table which is at the start of .debug_abbrev. const size_t debug_abbrev_offset = 0; - WriteDebugInfoCU(debug_abbrev_offset, info_, offset, &buffer, &owner_->debug_info_patches_); + WriteDebugInfoCU(debug_abbrev_offset, info_, &buffer); owner_->builder_->GetDebugInfo()->WriteFully(buffer.data(), buffer.size()); } @@ -322,7 +304,7 @@ class ElfCompilationUnitWriter { WriteTypeDeclaration(type->GetDescriptor(nullptr)); } } else if (type->IsArrayClass()) { - mirror::Class* element_type = type->GetComponentType(); + ObjPtr<mirror::Class> element_type = type->GetComponentType(); uint32_t component_size = type->GetComponentSize(); uint32_t data_offset = mirror::Array::DataOffset(component_size).Uint32Value(); uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value(); @@ -385,10 +367,10 @@ class ElfCompilationUnitWriter { } // Base class. - mirror::Class* base_class = type->GetSuperClass(); + ObjPtr<mirror::Class> base_class = type->GetSuperClass(); if (base_class != nullptr) { info_.StartTag(DW_TAG_inheritance); - base_class_references.emplace(info_.size(), base_class); + base_class_references.emplace(info_.size(), base_class.Ptr()); info_.WriteRef4(DW_AT_type, 0); info_.WriteUdata(DW_AT_data_member_location, 0); info_.WriteSdata(DW_AT_accessibility, DW_ACCESS_public); @@ -459,10 +441,9 @@ class ElfCompilationUnitWriter { CHECK_EQ(info_.Depth(), 0); std::vector<uint8_t> buffer; buffer.reserve(info_.data()->size() + KB); - const size_t offset = owner_->builder_->GetDebugInfo()->GetPosition(); // All compilation units share single table which is at the start of .debug_abbrev. const size_t debug_abbrev_offset = 0; - WriteDebugInfoCU(debug_abbrev_offset, info_, offset, &buffer, &owner_->debug_info_patches_); + WriteDebugInfoCU(debug_abbrev_offset, info_, &buffer); owner_->builder_->GetDebugInfo()->WriteFully(buffer.data(), buffer.size()); } diff --git a/compiler/debug/elf_debug_line_writer.h b/compiler/debug/elf_debug_line_writer.h index 44504c1efb..479725be99 100644 --- a/compiler/debug/elf_debug_line_writer.h +++ b/compiler/debug/elf_debug_line_writer.h @@ -20,12 +20,12 @@ #include <unordered_set> #include <vector> -#include "debug/dwarf/debug_line_opcode_writer.h" -#include "debug/dwarf/headers.h" #include "debug/elf_compilation_unit.h" #include "debug/src_map_elem.h" #include "dex/dex_file-inl.h" -#include "linker/elf_builder.h" +#include "dwarf/debug_line_opcode_writer.h" +#include "dwarf/headers.h" +#include "elf/elf_builder.h" #include "oat_file.h" #include "stack_map.h" @@ -34,17 +34,12 @@ namespace debug { typedef std::vector<DexFile::PositionInfo> PositionInfos; -static bool PositionInfoCallback(void* ctx, const DexFile::PositionInfo& entry) { - static_cast<PositionInfos*>(ctx)->push_back(entry); - return false; -} - template<typename ElfTypes> class ElfDebugLineWriter { using Elf_Addr = typename ElfTypes::Addr; public: - explicit ElfDebugLineWriter(linker::ElfBuilder<ElfTypes>* builder) : builder_(builder) { + explicit ElfDebugLineWriter(ElfBuilder<ElfTypes>* builder) : builder_(builder) { } void Start() { @@ -100,15 +95,12 @@ class ElfDebugLineWriter { if (mi->code_info != nullptr) { // Use stack maps to create mapping table from pc to dex. const CodeInfo code_info(mi->code_info); - const CodeInfoEncoding encoding = code_info.ExtractEncoding(); - pc2dex_map.reserve(code_info.GetNumberOfStackMaps(encoding)); - for (uint32_t s = 0; s < code_info.GetNumberOfStackMaps(encoding); s++) { - StackMap stack_map = code_info.GetStackMapAt(s, encoding); - DCHECK(stack_map.IsValid()); - const uint32_t pc = stack_map.GetNativePcOffset(encoding.stack_map.encoding, isa); - const int32_t dex = stack_map.GetDexPc(encoding.stack_map.encoding); + pc2dex_map.reserve(code_info.GetNumberOfStackMaps()); + for (StackMap stack_map : code_info.GetStackMaps()) { + const uint32_t pc = stack_map.GetNativePcOffset(isa); + const int32_t dex = stack_map.GetDexPc(); pc2dex_map.push_back({pc, dex}); - if (stack_map.HasDexRegisterMap(encoding.stack_map.encoding)) { + if (stack_map.HasDexRegisterMap()) { // Guess that the first map with local variables is the end of prologue. prologue_end = std::min(prologue_end, pc); } @@ -157,11 +149,14 @@ class ElfDebugLineWriter { Elf_Addr method_address = base_address + mi->code_address; PositionInfos dex2line_map; - DCHECK(mi->dex_file != nullptr); const DexFile* dex = mi->dex_file; + DCHECK(dex != nullptr); CodeItemDebugInfoAccessor accessor(*dex, mi->code_item, mi->dex_method_index); - const uint32_t debug_info_offset = accessor.DebugInfoOffset(); - if (!dex->DecodeDebugPositionInfo(debug_info_offset, PositionInfoCallback, &dex2line_map)) { + if (!accessor.DecodeDebugPositionInfo( + [&](const DexFile::PositionInfo& entry) { + dex2line_map.push_back(entry); + return false; + })) { continue; } @@ -268,23 +263,17 @@ class ElfDebugLineWriter { } std::vector<uint8_t> buffer; buffer.reserve(opcodes.data()->size() + KB); - size_t offset = builder_->GetDebugLine()->GetPosition(); - WriteDebugLineTable(directories, files, opcodes, offset, &buffer, &debug_line_patches_); + WriteDebugLineTable(directories, files, opcodes, &buffer); builder_->GetDebugLine()->WriteFully(buffer.data(), buffer.size()); return buffer.size(); } - void End(bool write_oat_patches) { + void End() { builder_->GetDebugLine()->End(); - if (write_oat_patches) { - builder_->WritePatches(".debug_line.oat_patches", - ArrayRef<const uintptr_t>(debug_line_patches_)); - } } private: - linker::ElfBuilder<ElfTypes>* builder_; - std::vector<uintptr_t> debug_line_patches_; + ElfBuilder<ElfTypes>* builder_; }; } // namespace debug diff --git a/compiler/debug/elf_debug_loc_writer.h b/compiler/debug/elf_debug_loc_writer.h index 9ea9f01cd9..a5a84bbb10 100644 --- a/compiler/debug/elf_debug_loc_writer.h +++ b/compiler/debug/elf_debug_loc_writer.h @@ -22,9 +22,9 @@ #include "arch/instruction_set.h" #include "compiled_method.h" -#include "debug/dwarf/debug_info_entry_writer.h" -#include "debug/dwarf/register.h" #include "debug/method_debug_info.h" +#include "dwarf/debug_info_entry_writer.h" +#include "dwarf/register.h" #include "stack_map.h" namespace art { @@ -99,12 +99,11 @@ static std::vector<VariableLocation> GetVariableLocations( // Get stack maps sorted by pc (they might not be sorted internally). // TODO(dsrbecky) Remove this once stackmaps get sorted by pc. const CodeInfo code_info(method_info->code_info); - const CodeInfoEncoding encoding = code_info.ExtractEncoding(); std::map<uint32_t, uint32_t> stack_maps; // low_pc -> stack_map_index. - for (uint32_t s = 0; s < code_info.GetNumberOfStackMaps(encoding); s++) { - StackMap stack_map = code_info.GetStackMapAt(s, encoding); + for (uint32_t s = 0; s < code_info.GetNumberOfStackMaps(); s++) { + StackMap stack_map = code_info.GetStackMapAt(s); DCHECK(stack_map.IsValid()); - if (!stack_map.HasDexRegisterMap(encoding.stack_map.encoding)) { + if (!stack_map.HasDexRegisterMap()) { // The compiler creates stackmaps without register maps at the start of // basic blocks in order to keep instruction-accurate line number mapping. // However, we never stop at those (breakpoint locations always have map). @@ -112,7 +111,7 @@ static std::vector<VariableLocation> GetVariableLocations( // The main reason for this is to save space by avoiding undefined gaps. continue; } - const uint32_t pc_offset = stack_map.GetNativePcOffset(encoding.stack_map.encoding, isa); + const uint32_t pc_offset = stack_map.GetNativePcOffset(isa); DCHECK_LE(pc_offset, method_info->code_size); DCHECK_LE(compilation_unit_code_address, method_info->code_address); const uint32_t low_pc = dchecked_integral_cast<uint32_t>( @@ -124,7 +123,7 @@ static std::vector<VariableLocation> GetVariableLocations( for (auto it = stack_maps.begin(); it != stack_maps.end(); it++) { const uint32_t low_pc = it->first; const uint32_t stack_map_index = it->second; - const StackMap& stack_map = code_info.GetStackMapAt(stack_map_index, encoding); + const StackMap stack_map = code_info.GetStackMapAt(stack_map_index); auto next_it = it; next_it++; const uint32_t high_pc = next_it != stack_maps.end() @@ -136,7 +135,7 @@ static std::vector<VariableLocation> GetVariableLocations( } // Check that the stack map is in the requested range. - uint32_t dex_pc = stack_map.GetDexPc(encoding.stack_map.encoding); + uint32_t dex_pc = stack_map.GetDexPc(); if (!(dex_pc_low <= dex_pc && dex_pc < dex_pc_high)) { // The variable is not in scope at this PC. Therefore omit the entry. // Note that this is different to None() entry which means in scope, but unknown location. @@ -148,13 +147,11 @@ static std::vector<VariableLocation> GetVariableLocations( DexRegisterLocation reg_hi = DexRegisterLocation::None(); DCHECK_LT(stack_map_index, dex_register_maps.size()); DexRegisterMap dex_register_map = dex_register_maps[stack_map_index]; - DCHECK(dex_register_map.IsValid()); + DCHECK(!dex_register_map.empty()); CodeItemDataAccessor accessor(*method_info->dex_file, method_info->code_item); - reg_lo = dex_register_map.GetDexRegisterLocation( - vreg, accessor.RegistersSize(), code_info, encoding); + reg_lo = dex_register_map[vreg]; if (is64bitValue) { - reg_hi = dex_register_map.GetDexRegisterLocation( - vreg + 1, accessor.RegistersSize(), code_info, encoding); + reg_hi = dex_register_map[vreg + 1]; } // Add location entry for this address range. diff --git a/compiler/debug/elf_debug_writer.cc b/compiler/debug/elf_debug_writer.cc index 59a080fc21..10f673b2d6 100644 --- a/compiler/debug/elf_debug_writer.cc +++ b/compiler/debug/elf_debug_writer.cc @@ -16,39 +16,43 @@ #include "elf_debug_writer.h" -#include <vector> +#include <type_traits> #include <unordered_map> +#include <vector> #include "base/array_ref.h" -#include "debug/dwarf/dwarf_constants.h" +#include "base/stl_util.h" #include "debug/elf_compilation_unit.h" #include "debug/elf_debug_frame_writer.h" #include "debug/elf_debug_info_writer.h" #include "debug/elf_debug_line_writer.h" #include "debug/elf_debug_loc_writer.h" -#include "debug/elf_gnu_debugdata_writer.h" #include "debug/elf_symtab_writer.h" #include "debug/method_debug_info.h" -#include "linker/elf_builder.h" -#include "linker/vector_output_stream.h" +#include "dwarf/dwarf_constants.h" +#include "elf/elf_builder.h" +#include "elf/elf_debug_reader.h" +#include "elf/elf_utils.h" +#include "elf/xz_utils.h" #include "oat.h" +#include "stream/vector_output_stream.h" namespace art { namespace debug { +using ElfRuntimeTypes = std::conditional<sizeof(void*) == 4, ElfTypes32, ElfTypes64>::type; + template <typename ElfTypes> -void WriteDebugInfo(linker::ElfBuilder<ElfTypes>* builder, - const DebugInfo& debug_info, - dwarf::CFIFormat cfi_format, - bool write_oat_patches) { +void WriteDebugInfo(ElfBuilder<ElfTypes>* builder, + const DebugInfo& debug_info) { // Write .strtab and .symtab. - WriteDebugSymbols(builder, false /* mini-debug-info */, debug_info); + WriteDebugSymbols(builder, /* mini-debug-info= */ false, debug_info); // Write .debug_frame. - WriteCFISection(builder, debug_info.compiled_methods, cfi_format, write_oat_patches); + WriteCFISection(builder, debug_info.compiled_methods); // Group the methods into compilation units based on class. - std::unordered_map<const DexFile::ClassDef*, ElfCompilationUnit> class_to_compilation_unit; + std::unordered_map<const dex::ClassDef*, ElfCompilationUnit> class_to_compilation_unit; for (const MethodDebugInfo& mi : debug_info.compiled_methods) { if (mi.dex_file != nullptr) { auto& dex_class_def = mi.dex_file->GetClassDef(mi.class_def_index); @@ -90,7 +94,7 @@ void WriteDebugInfo(linker::ElfBuilder<ElfTypes>* builder, for (auto& compilation_unit : compilation_units) { line_writer.WriteCompilationUnit(compilation_unit); } - line_writer.End(write_oat_patches); + line_writer.End(); } // Write .debug_info section. @@ -101,8 +105,43 @@ void WriteDebugInfo(linker::ElfBuilder<ElfTypes>* builder, ElfCompilationUnitWriter<ElfTypes> cu_writer(&info_writer); cu_writer.Write(compilation_unit); } - info_writer.End(write_oat_patches); + info_writer.End(); + } +} + +template <typename ElfTypes> +static std::vector<uint8_t> MakeMiniDebugInfoInternal( + InstructionSet isa, + const InstructionSetFeatures* features ATTRIBUTE_UNUSED, + typename ElfTypes::Addr text_section_address, + size_t text_section_size, + typename ElfTypes::Addr dex_section_address, + size_t dex_section_size, + const DebugInfo& debug_info) { + std::vector<uint8_t> buffer; + buffer.reserve(KB); + VectorOutputStream out("Mini-debug-info ELF file", &buffer); + std::unique_ptr<ElfBuilder<ElfTypes>> builder(new ElfBuilder<ElfTypes>(isa, &out)); + builder->Start(/* write_program_headers= */ false); + // Mirror ELF sections as NOBITS since the added symbols will reference them. + if (text_section_size != 0) { + builder->GetText()->AllocateVirtualMemory(text_section_address, text_section_size); + } + if (dex_section_size != 0) { + builder->GetDex()->AllocateVirtualMemory(dex_section_address, dex_section_size); + } + if (!debug_info.Empty()) { + WriteDebugSymbols(builder.get(), /* mini-debug-info= */ true, debug_info); } + if (!debug_info.compiled_methods.empty()) { + WriteCFISection(builder.get(), debug_info.compiled_methods); + } + builder->End(); + CHECK(builder->Good()); + std::vector<uint8_t> compressed_buffer; + compressed_buffer.reserve(buffer.size() / 4); + XzCompress(ArrayRef<const uint8_t>(buffer), &compressed_buffer); + return compressed_buffer; } std::vector<uint8_t> MakeMiniDebugInfo( @@ -132,118 +171,205 @@ std::vector<uint8_t> MakeMiniDebugInfo( } } -template <typename ElfTypes> -static std::vector<uint8_t> MakeElfFileForJITInternal( +std::vector<uint8_t> MakeElfFileForJIT( InstructionSet isa, - const InstructionSetFeatures* features, + const InstructionSetFeatures* features ATTRIBUTE_UNUSED, bool mini_debug_info, - ArrayRef<const MethodDebugInfo> method_infos) { - CHECK_GT(method_infos.size(), 0u); - uint64_t min_address = std::numeric_limits<uint64_t>::max(); - uint64_t max_address = 0; - for (const MethodDebugInfo& mi : method_infos) { - CHECK_EQ(mi.is_code_address_text_relative, false); - min_address = std::min(min_address, mi.code_address); - max_address = std::max(max_address, mi.code_address + mi.code_size); - } + const MethodDebugInfo& method_info) { + using ElfTypes = ElfRuntimeTypes; + CHECK_EQ(sizeof(ElfTypes::Addr), static_cast<size_t>(GetInstructionSetPointerSize(isa))); + CHECK_EQ(method_info.is_code_address_text_relative, false); DebugInfo debug_info{}; - debug_info.compiled_methods = method_infos; + debug_info.compiled_methods = ArrayRef<const MethodDebugInfo>(&method_info, 1); std::vector<uint8_t> buffer; buffer.reserve(KB); - linker::VectorOutputStream out("Debug ELF file", &buffer); - std::unique_ptr<linker::ElfBuilder<ElfTypes>> builder( - new linker::ElfBuilder<ElfTypes>(isa, features, &out)); + VectorOutputStream out("Debug ELF file", &buffer); + std::unique_ptr<ElfBuilder<ElfTypes>> builder(new ElfBuilder<ElfTypes>(isa, &out)); // No program headers since the ELF file is not linked and has no allocated sections. - builder->Start(false /* write_program_headers */); + builder->Start(/* write_program_headers= */ false); + builder->GetText()->AllocateVirtualMemory(method_info.code_address, method_info.code_size); if (mini_debug_info) { - if (method_infos.size() > 1) { - std::vector<uint8_t> mdi = MakeMiniDebugInfo(isa, - features, - min_address, - max_address - min_address, - /* dex_section_address */ 0, - /* dex_section_size */ 0, - debug_info); - builder->WriteSection(".gnu_debugdata", &mdi); - } else { - // The compression is great help for multiple methods but it is not worth it for a - // single method due to the overheads so skip the compression here for performance. - builder->GetText()->AllocateVirtualMemory(min_address, max_address - min_address); - WriteDebugSymbols(builder.get(), true /* mini-debug-info */, debug_info); - WriteCFISection(builder.get(), - debug_info.compiled_methods, - dwarf::DW_DEBUG_FRAME_FORMAT, - false /* write_oat_paches */); - } + // The compression is great help for multiple methods but it is not worth it for a + // single method due to the overheads so skip the compression here for performance. + WriteDebugSymbols(builder.get(), /* mini-debug-info= */ true, debug_info); + WriteCFISection(builder.get(), debug_info.compiled_methods); } else { - builder->GetText()->AllocateVirtualMemory(min_address, max_address - min_address); - WriteDebugInfo(builder.get(), - debug_info, - dwarf::DW_DEBUG_FRAME_FORMAT, - false /* write_oat_patches */); + WriteDebugInfo(builder.get(), debug_info); } builder->End(); CHECK(builder->Good()); + // Verify the ELF file by reading it back using the trivial reader. + if (kIsDebugBuild) { + using Elf_Sym = typename ElfTypes::Sym; + size_t num_syms = 0; + size_t num_cies = 0; + size_t num_fdes = 0; + using Reader = ElfDebugReader<ElfTypes>; + Reader reader(buffer); + reader.VisitFunctionSymbols([&](Elf_Sym sym, const char*) { + DCHECK_EQ(sym.st_value, method_info.code_address + CompiledMethod::CodeDelta(isa)); + DCHECK_EQ(sym.st_size, method_info.code_size); + num_syms++; + }); + reader.VisitDebugFrame([&](const Reader::CIE* cie ATTRIBUTE_UNUSED) { + num_cies++; + }, [&](const Reader::FDE* fde, const Reader::CIE* cie ATTRIBUTE_UNUSED) { + DCHECK_EQ(fde->sym_addr, method_info.code_address); + DCHECK_EQ(fde->sym_size, method_info.code_size); + num_fdes++; + }); + DCHECK_EQ(num_syms, 1u); + DCHECK_LE(num_cies, 1u); + DCHECK_LE(num_fdes, 1u); + } return buffer; } -std::vector<uint8_t> MakeElfFileForJIT( +// Combine several mini-debug-info ELF files into one, while filtering some symbols. +std::vector<uint8_t> PackElfFileForJIT( InstructionSet isa, - const InstructionSetFeatures* features, - bool mini_debug_info, - ArrayRef<const MethodDebugInfo> method_infos) { - if (Is64BitInstructionSet(isa)) { - return MakeElfFileForJITInternal<ElfTypes64>(isa, features, mini_debug_info, method_infos); - } else { - return MakeElfFileForJITInternal<ElfTypes32>(isa, features, mini_debug_info, method_infos); + const InstructionSetFeatures* features ATTRIBUTE_UNUSED, + std::vector<ArrayRef<const uint8_t>>& added_elf_files, + std::vector<const void*>& removed_symbols, + /*out*/ size_t* num_symbols) { + using ElfTypes = ElfRuntimeTypes; + using Elf_Addr = typename ElfTypes::Addr; + using Elf_Sym = typename ElfTypes::Sym; + CHECK_EQ(sizeof(Elf_Addr), static_cast<size_t>(GetInstructionSetPointerSize(isa))); + auto is_removed_symbol = [&removed_symbols](Elf_Addr addr) { + const void* code_ptr = reinterpret_cast<const void*>(addr); + return std::binary_search(removed_symbols.begin(), removed_symbols.end(), code_ptr); + }; + uint64_t min_address = std::numeric_limits<uint64_t>::max(); + uint64_t max_address = 0; + + // Produce the inner ELF file. + // It will contain the symbols (.symtab) and unwind information (.debug_frame). + std::vector<uint8_t> inner_elf_file; + { + inner_elf_file.reserve(1 * KB); // Approximate size of ELF file with a single symbol. + VectorOutputStream out("Mini-debug-info ELF file for JIT", &inner_elf_file); + std::unique_ptr<ElfBuilder<ElfTypes>> builder(new ElfBuilder<ElfTypes>(isa, &out)); + builder->Start(/*write_program_headers=*/ false); + auto* text = builder->GetText(); + auto* strtab = builder->GetStrTab(); + auto* symtab = builder->GetSymTab(); + auto* debug_frame = builder->GetDebugFrame(); + std::deque<Elf_Sym> symbols; + + using Reader = ElfDebugReader<ElfTypes>; + std::deque<Reader> readers; + for (ArrayRef<const uint8_t> added_elf_file : added_elf_files) { + readers.emplace_back(added_elf_file); + } + + // Write symbols names. All other data is buffered. + strtab->Start(); + strtab->Write(""); // strtab should start with empty string. + for (Reader& reader : readers) { + reader.VisitFunctionSymbols([&](Elf_Sym sym, const char* name) { + if (is_removed_symbol(sym.st_value)) { + return; + } + sym.st_name = strtab->Write(name); + symbols.push_back(sym); + min_address = std::min<uint64_t>(min_address, sym.st_value); + max_address = std::max<uint64_t>(max_address, sym.st_value + sym.st_size); + }); + } + strtab->End(); + + // Create .text covering the code range. Needed for gdb to find the symbols. + if (max_address > min_address) { + text->AllocateVirtualMemory(min_address, max_address - min_address); + } + + // Add the symbols. + *num_symbols = symbols.size(); + for (; !symbols.empty(); symbols.pop_front()) { + symtab->Add(symbols.front(), text); + } + symtab->WriteCachedSection(); + + // Add the CFI/unwind section. + debug_frame->Start(); + // ART always produces the same CIE, so we copy the first one and ignore the rest. + bool copied_cie = false; + for (Reader& reader : readers) { + reader.VisitDebugFrame([&](const Reader::CIE* cie) { + if (!copied_cie) { + debug_frame->WriteFully(cie->data(), cie->size()); + copied_cie = true; + } + }, [&](const Reader::FDE* fde, const Reader::CIE* cie ATTRIBUTE_UNUSED) { + DCHECK(copied_cie); + DCHECK_EQ(fde->cie_pointer, 0); + if (!is_removed_symbol(fde->sym_addr)) { + debug_frame->WriteFully(fde->data(), fde->size()); + } + }); + } + debug_frame->End(); + + builder->End(); + CHECK(builder->Good()); } + + // Produce the outer ELF file. + // It contains only the inner ELF file compressed as .gnu_debugdata section. + // This extra wrapping is not necessary but the compression saves space. + std::vector<uint8_t> outer_elf_file; + { + std::vector<uint8_t> gnu_debugdata; + gnu_debugdata.reserve(inner_elf_file.size() / 4); + XzCompress(ArrayRef<const uint8_t>(inner_elf_file), &gnu_debugdata); + + outer_elf_file.reserve(KB + gnu_debugdata.size()); + VectorOutputStream out("Mini-debug-info ELF file for JIT", &outer_elf_file); + std::unique_ptr<ElfBuilder<ElfTypes>> builder(new ElfBuilder<ElfTypes>(isa, &out)); + builder->Start(/*write_program_headers=*/ false); + if (max_address > min_address) { + builder->GetText()->AllocateVirtualMemory(min_address, max_address - min_address); + } + builder->WriteSection(".gnu_debugdata", &gnu_debugdata); + builder->End(); + CHECK(builder->Good()); + } + + return outer_elf_file; } -template <typename ElfTypes> -static std::vector<uint8_t> WriteDebugElfFileForClassesInternal( +std::vector<uint8_t> WriteDebugElfFileForClasses( InstructionSet isa, - const InstructionSetFeatures* features, + const InstructionSetFeatures* features ATTRIBUTE_UNUSED, const ArrayRef<mirror::Class*>& types) REQUIRES_SHARED(Locks::mutator_lock_) { + using ElfTypes = ElfRuntimeTypes; + CHECK_EQ(sizeof(ElfTypes::Addr), static_cast<size_t>(GetInstructionSetPointerSize(isa))); std::vector<uint8_t> buffer; buffer.reserve(KB); - linker::VectorOutputStream out("Debug ELF file", &buffer); - std::unique_ptr<linker::ElfBuilder<ElfTypes>> builder( - new linker::ElfBuilder<ElfTypes>(isa, features, &out)); + VectorOutputStream out("Debug ELF file", &buffer); + std::unique_ptr<ElfBuilder<ElfTypes>> builder(new ElfBuilder<ElfTypes>(isa, &out)); // No program headers since the ELF file is not linked and has no allocated sections. - builder->Start(false /* write_program_headers */); + builder->Start(/* write_program_headers= */ false); ElfDebugInfoWriter<ElfTypes> info_writer(builder.get()); info_writer.Start(); ElfCompilationUnitWriter<ElfTypes> cu_writer(&info_writer); cu_writer.Write(types); - info_writer.End(false /* write_oat_patches */); + info_writer.End(); builder->End(); CHECK(builder->Good()); return buffer; } -std::vector<uint8_t> WriteDebugElfFileForClasses(InstructionSet isa, - const InstructionSetFeatures* features, - const ArrayRef<mirror::Class*>& types) { - if (Is64BitInstructionSet(isa)) { - return WriteDebugElfFileForClassesInternal<ElfTypes64>(isa, features, types); - } else { - return WriteDebugElfFileForClassesInternal<ElfTypes32>(isa, features, types); - } -} - // Explicit instantiations template void WriteDebugInfo<ElfTypes32>( - linker::ElfBuilder<ElfTypes32>* builder, - const DebugInfo& debug_info, - dwarf::CFIFormat cfi_format, - bool write_oat_patches); + ElfBuilder<ElfTypes32>* builder, + const DebugInfo& debug_info); template void WriteDebugInfo<ElfTypes64>( - linker::ElfBuilder<ElfTypes64>* builder, - const DebugInfo& debug_info, - dwarf::CFIFormat cfi_format, - bool write_oat_patches); + ElfBuilder<ElfTypes64>* builder, + const DebugInfo& debug_info); } // namespace debug } // namespace art diff --git a/compiler/debug/elf_debug_writer.h b/compiler/debug/elf_debug_writer.h index e442e0016c..14a5edbace 100644 --- a/compiler/debug/elf_debug_writer.h +++ b/compiler/debug/elf_debug_writer.h @@ -19,12 +19,13 @@ #include <vector> +#include "arch/instruction_set_features.h" #include "base/array_ref.h" #include "base/macros.h" #include "base/mutex.h" -#include "debug/dwarf/dwarf_constants.h" #include "debug/debug_info.h" -#include "linker/elf_builder.h" +#include "dwarf/dwarf_constants.h" +#include "elf/elf_builder.h" namespace art { class OatHeader; @@ -36,10 +37,8 @@ struct MethodDebugInfo; template <typename ElfTypes> void WriteDebugInfo( - linker::ElfBuilder<ElfTypes>* builder, - const DebugInfo& debug_info, - dwarf::CFIFormat cfi_format, - bool write_oat_patches); + ElfBuilder<ElfTypes>* builder, + const DebugInfo& debug_info); std::vector<uint8_t> MakeMiniDebugInfo( InstructionSet isa, @@ -54,7 +53,14 @@ std::vector<uint8_t> MakeElfFileForJIT( InstructionSet isa, const InstructionSetFeatures* features, bool mini_debug_info, - ArrayRef<const MethodDebugInfo> method_infos); + const MethodDebugInfo& method_info); + +std::vector<uint8_t> PackElfFileForJIT( + InstructionSet isa, + const InstructionSetFeatures* features, + std::vector<ArrayRef<const uint8_t>>& added_elf_files, + std::vector<const void*>& removed_symbols, + /*out*/ size_t* num_symbols); std::vector<uint8_t> WriteDebugElfFileForClasses( InstructionSet isa, diff --git a/compiler/debug/elf_gnu_debugdata_writer.h b/compiler/debug/elf_gnu_debugdata_writer.h deleted file mode 100644 index a88c5cb213..0000000000 --- a/compiler/debug/elf_gnu_debugdata_writer.h +++ /dev/null @@ -1,116 +0,0 @@ -/* - * Copyright (C) 2016 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ART_COMPILER_DEBUG_ELF_GNU_DEBUGDATA_WRITER_H_ -#define ART_COMPILER_DEBUG_ELF_GNU_DEBUGDATA_WRITER_H_ - -#include <vector> - -#include "arch/instruction_set.h" -#include "linker/elf_builder.h" -#include "linker/vector_output_stream.h" - -// liblzma. -#include "7zCrc.h" -#include "XzCrc64.h" -#include "XzEnc.h" - -namespace art { -namespace debug { - -static void XzCompress(const std::vector<uint8_t>* src, std::vector<uint8_t>* dst) { - // Configure the compression library. - CrcGenerateTable(); - Crc64GenerateTable(); - CLzma2EncProps lzma2Props; - Lzma2EncProps_Init(&lzma2Props); - lzma2Props.lzmaProps.level = 1; // Fast compression. - Lzma2EncProps_Normalize(&lzma2Props); - CXzProps props; - XzProps_Init(&props); - props.lzma2Props = &lzma2Props; - // Implement the required interface for communication (written in C so no virtual methods). - struct XzCallbacks : public ISeqInStream, public ISeqOutStream, public ICompressProgress { - static SRes ReadImpl(void* p, void* buf, size_t* size) { - auto* ctx = static_cast<XzCallbacks*>(reinterpret_cast<ISeqInStream*>(p)); - *size = std::min(*size, ctx->src_->size() - ctx->src_pos_); - memcpy(buf, ctx->src_->data() + ctx->src_pos_, *size); - ctx->src_pos_ += *size; - return SZ_OK; - } - static size_t WriteImpl(void* p, const void* buf, size_t size) { - auto* ctx = static_cast<XzCallbacks*>(reinterpret_cast<ISeqOutStream*>(p)); - const uint8_t* buffer = reinterpret_cast<const uint8_t*>(buf); - ctx->dst_->insert(ctx->dst_->end(), buffer, buffer + size); - return size; - } - static SRes ProgressImpl(void* , UInt64, UInt64) { - return SZ_OK; - } - size_t src_pos_; - const std::vector<uint8_t>* src_; - std::vector<uint8_t>* dst_; - }; - XzCallbacks callbacks; - callbacks.Read = XzCallbacks::ReadImpl; - callbacks.Write = XzCallbacks::WriteImpl; - callbacks.Progress = XzCallbacks::ProgressImpl; - callbacks.src_pos_ = 0; - callbacks.src_ = src; - callbacks.dst_ = dst; - // Compress. - SRes res = Xz_Encode(&callbacks, &callbacks, &props, &callbacks); - CHECK_EQ(res, SZ_OK); -} - -template <typename ElfTypes> -static std::vector<uint8_t> MakeMiniDebugInfoInternal( - InstructionSet isa, - const InstructionSetFeatures* features, - typename ElfTypes::Addr text_section_address, - size_t text_section_size, - typename ElfTypes::Addr dex_section_address, - size_t dex_section_size, - const DebugInfo& debug_info) { - std::vector<uint8_t> buffer; - buffer.reserve(KB); - linker::VectorOutputStream out("Mini-debug-info ELF file", &buffer); - std::unique_ptr<linker::ElfBuilder<ElfTypes>> builder( - new linker::ElfBuilder<ElfTypes>(isa, features, &out)); - builder->Start(false /* write_program_headers */); - // Mirror ELF sections as NOBITS since the added symbols will reference them. - builder->GetText()->AllocateVirtualMemory(text_section_address, text_section_size); - if (dex_section_size != 0) { - builder->GetDex()->AllocateVirtualMemory(dex_section_address, dex_section_size); - } - WriteDebugSymbols(builder.get(), true /* mini-debug-info */, debug_info); - WriteCFISection(builder.get(), - debug_info.compiled_methods, - dwarf::DW_DEBUG_FRAME_FORMAT, - false /* write_oat_paches */); - builder->End(); - CHECK(builder->Good()); - std::vector<uint8_t> compressed_buffer; - compressed_buffer.reserve(buffer.size() / 4); - XzCompress(&buffer, &compressed_buffer); - return compressed_buffer; -} - -} // namespace debug -} // namespace art - -#endif // ART_COMPILER_DEBUG_ELF_GNU_DEBUGDATA_WRITER_H_ - diff --git a/compiler/debug/elf_symtab_writer.h b/compiler/debug/elf_symtab_writer.h index 7a8e29191a..2ed3a4b1a4 100644 --- a/compiler/debug/elf_symtab_writer.h +++ b/compiler/debug/elf_symtab_writer.h @@ -25,7 +25,7 @@ #include "debug/method_debug_info.h" #include "dex/dex_file-inl.h" #include "dex/code_item_accessors.h" -#include "linker/elf_builder.h" +#include "elf/elf_builder.h" namespace art { namespace debug { @@ -45,7 +45,7 @@ constexpr bool kGenerateArmMappingSymbol = true; constexpr const char* kDexFileSymbolName = "$dexfile"; template <typename ElfTypes> -static void WriteDebugSymbols(linker::ElfBuilder<ElfTypes>* builder, +static void WriteDebugSymbols(ElfBuilder<ElfTypes>* builder, bool mini_debug_info, const DebugInfo& debug_info) { uint64_t mapping_symbol_address = std::numeric_limits<uint64_t>::max(); diff --git a/compiler/debug/method_debug_info.h b/compiler/debug/method_debug_info.h index d0b03ec441..152db6eaf0 100644 --- a/compiler/debug/method_debug_info.h +++ b/compiler/debug/method_debug_info.h @@ -32,7 +32,7 @@ struct MethodDebugInfo { size_t class_def_index; uint32_t dex_method_index; uint32_t access_flags; - const DexFile::CodeItem* code_item; + const dex::CodeItem* code_item; InstructionSet isa; bool deduped; bool is_native_debuggable; @@ -41,7 +41,7 @@ struct MethodDebugInfo { uint64_t code_address; uint32_t code_size; uint32_t frame_size_in_bytes; - const void* code_info; + const uint8_t* code_info; ArrayRef<const uint8_t> cfi; }; diff --git a/compiler/dex/dex_to_dex_compiler.cc b/compiler/dex/dex_to_dex_compiler.cc deleted file mode 100644 index be8641fd86..0000000000 --- a/compiler/dex/dex_to_dex_compiler.cc +++ /dev/null @@ -1,688 +0,0 @@ -/* - * Copyright (C) 2011 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "dex_to_dex_compiler.h" - -#include <android-base/logging.h> -#include <android-base/stringprintf.h> - -#include "art_field-inl.h" -#include "art_method-inl.h" -#include "base/logging.h" // For VLOG -#include "base/macros.h" -#include "base/mutex.h" -#include "compiled_method.h" -#include "dex/bytecode_utils.h" -#include "dex/dex_file-inl.h" -#include "dex/dex_instruction-inl.h" -#include "dex_to_dex_decompiler.h" -#include "driver/compiler_driver.h" -#include "driver/dex_compilation_unit.h" -#include "mirror/dex_cache.h" -#include "quicken_info.h" -#include "thread-current-inl.h" - -namespace art { -namespace optimizer { - -using android::base::StringPrintf; - -// Controls quickening activation. -const bool kEnableQuickening = true; -// Control check-cast elision. -const bool kEnableCheckCastEllision = true; - -// Holds the state for compiling a single method. -struct DexToDexCompiler::CompilationState { - struct QuickenedInfo { - QuickenedInfo(uint32_t pc, uint16_t index) : dex_pc(pc), dex_member_index(index) {} - - uint32_t dex_pc; - uint16_t dex_member_index; - }; - - CompilationState(DexToDexCompiler* compiler, - const DexCompilationUnit& unit, - const CompilationLevel compilation_level, - const std::vector<uint8_t>* quicken_data); - - const std::vector<QuickenedInfo>& GetQuickenedInfo() const { - return quickened_info_; - } - - // Returns the quickening info, or an empty array if it was not quickened. - // If already_quickened is true, then don't change anything but still return what the quicken - // data would have been. - std::vector<uint8_t> Compile(); - - const DexFile& GetDexFile() const; - - // Compiles a RETURN-VOID into a RETURN-VOID-BARRIER within a constructor where - // a barrier is required. - void CompileReturnVoid(Instruction* inst, uint32_t dex_pc); - - // Compiles a CHECK-CAST into 2 NOP instructions if it is known to be safe. In - // this case, returns the second NOP instruction pointer. Otherwise, returns - // the given "inst". - Instruction* CompileCheckCast(Instruction* inst, uint32_t dex_pc); - - // Compiles a field access into a quick field access. - // The field index is replaced by an offset within an Object where we can read - // from / write to this field. Therefore, this does not involve any resolution - // at runtime. - // Since the field index is encoded with 16 bits, we can replace it only if the - // field offset can be encoded with 16 bits too. - void CompileInstanceFieldAccess(Instruction* inst, uint32_t dex_pc, - Instruction::Code new_opcode, bool is_put); - - // Compiles a virtual method invocation into a quick virtual method invocation. - // The method index is replaced by the vtable index where the corresponding - // executable can be found. Therefore, this does not involve any resolution - // at runtime. - // Since the method index is encoded with 16 bits, we can replace it only if the - // vtable index can be encoded with 16 bits too. - void CompileInvokeVirtual(Instruction* inst, uint32_t dex_pc, - Instruction::Code new_opcode, bool is_range); - - // Return the next index. - uint16_t NextIndex(); - - // Returns the dequickened index if an instruction is quickened, otherwise return index. - uint16_t GetIndexForInstruction(const Instruction* inst, uint32_t index); - - DexToDexCompiler* const compiler_; - CompilerDriver& driver_; - const DexCompilationUnit& unit_; - const CompilationLevel compilation_level_; - - // Filled by the compiler when quickening, in order to encode that information - // in the .oat file. The runtime will use that information to get to the original - // opcodes. - std::vector<QuickenedInfo> quickened_info_; - - // True if we optimized a return void to a return void no barrier. - bool optimized_return_void_ = false; - - // If the code item was already quickened previously. - const bool already_quickened_; - const QuickenInfoTable existing_quicken_info_; - uint32_t quicken_index_ = 0u; - - DISALLOW_COPY_AND_ASSIGN(CompilationState); -}; - -DexToDexCompiler::DexToDexCompiler(CompilerDriver* driver) - : driver_(driver), - lock_("Quicken lock", kDexToDexCompilerLock) { - DCHECK(driver != nullptr); -} - -void DexToDexCompiler::ClearState() { - MutexLock lock(Thread::Current(), lock_); - active_dex_file_ = nullptr; - active_bit_vector_ = nullptr; - should_quicken_.clear(); - shared_code_item_quicken_info_.clear(); -} - -size_t DexToDexCompiler::NumCodeItemsToQuicken(Thread* self) const { - MutexLock lock(self, lock_); - return num_code_items_; -} - -BitVector* DexToDexCompiler::GetOrAddBitVectorForDex(const DexFile* dex_file) { - if (active_dex_file_ != dex_file) { - active_dex_file_ = dex_file; - auto inserted = should_quicken_.emplace(dex_file, - BitVector(dex_file->NumMethodIds(), - /*expandable*/ false, - Allocator::GetMallocAllocator())); - active_bit_vector_ = &inserted.first->second; - } - return active_bit_vector_; -} - -void DexToDexCompiler::MarkForCompilation(Thread* self, - const MethodReference& method_ref) { - MutexLock lock(self, lock_); - BitVector* const bitmap = GetOrAddBitVectorForDex(method_ref.dex_file); - DCHECK(bitmap != nullptr); - DCHECK(!bitmap->IsBitSet(method_ref.index)); - bitmap->SetBit(method_ref.index); - ++num_code_items_; -} - -DexToDexCompiler::CompilationState::CompilationState(DexToDexCompiler* compiler, - const DexCompilationUnit& unit, - const CompilationLevel compilation_level, - const std::vector<uint8_t>* quicken_data) - : compiler_(compiler), - driver_(*compiler->GetDriver()), - unit_(unit), - compilation_level_(compilation_level), - already_quickened_(quicken_data != nullptr), - existing_quicken_info_(already_quickened_ - ? ArrayRef<const uint8_t>(*quicken_data) : ArrayRef<const uint8_t>()) {} - -uint16_t DexToDexCompiler::CompilationState::NextIndex() { - DCHECK(already_quickened_); - if (kIsDebugBuild && quicken_index_ >= existing_quicken_info_.NumIndices()) { - for (const DexInstructionPcPair& pair : unit_.GetCodeItemAccessor()) { - LOG(ERROR) << pair->DumpString(nullptr); - } - LOG(FATAL) << "Mismatched number of quicken slots."; - } - const uint16_t ret = existing_quicken_info_.GetData(quicken_index_); - quicken_index_++; - return ret; -} - -uint16_t DexToDexCompiler::CompilationState::GetIndexForInstruction(const Instruction* inst, - uint32_t index) { - if (UNLIKELY(already_quickened_)) { - return inst->IsQuickened() ? NextIndex() : index; - } - DCHECK(!inst->IsQuickened()); - return index; -} - -bool DexToDexCompiler::ShouldCompileMethod(const MethodReference& ref) { - // TODO: It's probably safe to avoid the lock here if the active_dex_file_ matches since we only - // only call ShouldCompileMethod on one dex at a time. - MutexLock lock(Thread::Current(), lock_); - return GetOrAddBitVectorForDex(ref.dex_file)->IsBitSet(ref.index); -} - -std::vector<uint8_t> DexToDexCompiler::CompilationState::Compile() { - DCHECK_EQ(compilation_level_, CompilationLevel::kOptimize); - const CodeItemDataAccessor& instructions = unit_.GetCodeItemAccessor(); - for (DexInstructionIterator it = instructions.begin(); it != instructions.end(); ++it) { - const uint32_t dex_pc = it.DexPc(); - Instruction* inst = const_cast<Instruction*>(&it.Inst()); - - if (!already_quickened_) { - DCHECK(!inst->IsQuickened()); - } - - switch (inst->Opcode()) { - case Instruction::RETURN_VOID: - CompileReturnVoid(inst, dex_pc); - break; - - case Instruction::CHECK_CAST: - inst = CompileCheckCast(inst, dex_pc); - if (inst->Opcode() == Instruction::NOP) { - // We turned the CHECK_CAST into two NOPs, avoid visiting the second NOP twice since this - // would add 2 quickening info entries. - ++it; - } - break; - - case Instruction::IGET: - case Instruction::IGET_QUICK: - CompileInstanceFieldAccess(inst, dex_pc, Instruction::IGET_QUICK, false); - break; - - case Instruction::IGET_WIDE: - case Instruction::IGET_WIDE_QUICK: - CompileInstanceFieldAccess(inst, dex_pc, Instruction::IGET_WIDE_QUICK, false); - break; - - case Instruction::IGET_OBJECT: - case Instruction::IGET_OBJECT_QUICK: - CompileInstanceFieldAccess(inst, dex_pc, Instruction::IGET_OBJECT_QUICK, false); - break; - - case Instruction::IGET_BOOLEAN: - case Instruction::IGET_BOOLEAN_QUICK: - CompileInstanceFieldAccess(inst, dex_pc, Instruction::IGET_BOOLEAN_QUICK, false); - break; - - case Instruction::IGET_BYTE: - case Instruction::IGET_BYTE_QUICK: - CompileInstanceFieldAccess(inst, dex_pc, Instruction::IGET_BYTE_QUICK, false); - break; - - case Instruction::IGET_CHAR: - case Instruction::IGET_CHAR_QUICK: - CompileInstanceFieldAccess(inst, dex_pc, Instruction::IGET_CHAR_QUICK, false); - break; - - case Instruction::IGET_SHORT: - case Instruction::IGET_SHORT_QUICK: - CompileInstanceFieldAccess(inst, dex_pc, Instruction::IGET_SHORT_QUICK, false); - break; - - case Instruction::IPUT: - case Instruction::IPUT_QUICK: - CompileInstanceFieldAccess(inst, dex_pc, Instruction::IPUT_QUICK, true); - break; - - case Instruction::IPUT_BOOLEAN: - case Instruction::IPUT_BOOLEAN_QUICK: - CompileInstanceFieldAccess(inst, dex_pc, Instruction::IPUT_BOOLEAN_QUICK, true); - break; - - case Instruction::IPUT_BYTE: - case Instruction::IPUT_BYTE_QUICK: - CompileInstanceFieldAccess(inst, dex_pc, Instruction::IPUT_BYTE_QUICK, true); - break; - - case Instruction::IPUT_CHAR: - case Instruction::IPUT_CHAR_QUICK: - CompileInstanceFieldAccess(inst, dex_pc, Instruction::IPUT_CHAR_QUICK, true); - break; - - case Instruction::IPUT_SHORT: - case Instruction::IPUT_SHORT_QUICK: - CompileInstanceFieldAccess(inst, dex_pc, Instruction::IPUT_SHORT_QUICK, true); - break; - - case Instruction::IPUT_WIDE: - case Instruction::IPUT_WIDE_QUICK: - CompileInstanceFieldAccess(inst, dex_pc, Instruction::IPUT_WIDE_QUICK, true); - break; - - case Instruction::IPUT_OBJECT: - case Instruction::IPUT_OBJECT_QUICK: - CompileInstanceFieldAccess(inst, dex_pc, Instruction::IPUT_OBJECT_QUICK, true); - break; - - case Instruction::INVOKE_VIRTUAL: - case Instruction::INVOKE_VIRTUAL_QUICK: - CompileInvokeVirtual(inst, dex_pc, Instruction::INVOKE_VIRTUAL_QUICK, false); - break; - - case Instruction::INVOKE_VIRTUAL_RANGE: - case Instruction::INVOKE_VIRTUAL_RANGE_QUICK: - CompileInvokeVirtual(inst, dex_pc, Instruction::INVOKE_VIRTUAL_RANGE_QUICK, true); - break; - - case Instruction::NOP: - if (already_quickened_) { - const uint16_t reference_index = NextIndex(); - quickened_info_.push_back(QuickenedInfo(dex_pc, reference_index)); - if (reference_index == DexFile::kDexNoIndex16) { - // This means it was a normal nop and not a check-cast. - break; - } - const uint16_t type_index = NextIndex(); - if (driver_.IsSafeCast(&unit_, dex_pc)) { - quickened_info_.push_back(QuickenedInfo(dex_pc, type_index)); - } - ++it; - } else { - // We need to differentiate between check cast inserted NOP and normal NOP, put an invalid - // index in the map for normal nops. This should be rare in real code. - quickened_info_.push_back(QuickenedInfo(dex_pc, DexFile::kDexNoIndex16)); - } - break; - - default: - // Nothing to do. - break; - } - } - - if (already_quickened_) { - DCHECK_EQ(quicken_index_, existing_quicken_info_.NumIndices()); - } - - // Even if there are no indicies, generate an empty quicken info so that we know the method was - // quickened. - - std::vector<uint8_t> quicken_data; - if (kIsDebugBuild) { - // Double check that the counts line up with the size of the quicken info. - size_t quicken_count = 0; - for (const DexInstructionPcPair& pair : instructions) { - if (QuickenInfoTable::NeedsIndexForInstruction(&pair.Inst())) { - ++quicken_count; - } - } - CHECK_EQ(quicken_count, GetQuickenedInfo().size()); - } - - QuickenInfoTable::Builder builder(&quicken_data, GetQuickenedInfo().size()); - // Length is encoded by the constructor. - for (const CompilationState::QuickenedInfo& info : GetQuickenedInfo()) { - // Dex pc is not serialized, only used for checking the instructions. Since we access the - // array based on the index of the quickened instruction, the indexes must line up perfectly. - // The reader side uses the NeedsIndexForInstruction function too. - const Instruction& inst = instructions.InstructionAt(info.dex_pc); - CHECK(QuickenInfoTable::NeedsIndexForInstruction(&inst)) << inst.Opcode(); - builder.AddIndex(info.dex_member_index); - } - DCHECK(!quicken_data.empty()); - return quicken_data; -} - -void DexToDexCompiler::CompilationState::CompileReturnVoid(Instruction* inst, uint32_t dex_pc) { - DCHECK_EQ(inst->Opcode(), Instruction::RETURN_VOID); - if (unit_.IsConstructor()) { - // Are we compiling a non clinit constructor which needs a barrier ? - if (!unit_.IsStatic() && - driver_.RequiresConstructorBarrier(Thread::Current(), unit_.GetDexFile(), - unit_.GetClassDefIndex())) { - return; - } - } - // Replace RETURN_VOID by RETURN_VOID_NO_BARRIER. - VLOG(compiler) << "Replacing " << Instruction::Name(inst->Opcode()) - << " by " << Instruction::Name(Instruction::RETURN_VOID_NO_BARRIER) - << " at dex pc " << StringPrintf("0x%x", dex_pc) << " in method " - << GetDexFile().PrettyMethod(unit_.GetDexMethodIndex(), true); - inst->SetOpcode(Instruction::RETURN_VOID_NO_BARRIER); - optimized_return_void_ = true; -} - -Instruction* DexToDexCompiler::CompilationState::CompileCheckCast(Instruction* inst, - uint32_t dex_pc) { - if (!kEnableCheckCastEllision) { - return inst; - } - if (!driver_.IsSafeCast(&unit_, dex_pc)) { - return inst; - } - // Ok, this is a safe cast. Since the "check-cast" instruction size is 2 code - // units and a "nop" instruction size is 1 code unit, we need to replace it by - // 2 consecutive NOP instructions. - // Because the caller loops over instructions by calling Instruction::Next onto - // the current instruction, we need to return the 2nd NOP instruction. Indeed, - // its next instruction is the former check-cast's next instruction. - VLOG(compiler) << "Removing " << Instruction::Name(inst->Opcode()) - << " by replacing it with 2 NOPs at dex pc " - << StringPrintf("0x%x", dex_pc) << " in method " - << GetDexFile().PrettyMethod(unit_.GetDexMethodIndex(), true); - if (!already_quickened_) { - quickened_info_.push_back(QuickenedInfo(dex_pc, inst->VRegA_21c())); - quickened_info_.push_back(QuickenedInfo(dex_pc, inst->VRegB_21c())); - - // We are modifying 4 consecutive bytes. - inst->SetOpcode(Instruction::NOP); - inst->SetVRegA_10x(0u); // keep compliant with verifier. - // Get to next instruction which is the second half of check-cast and replace - // it by a NOP. - inst = const_cast<Instruction*>(inst->Next()); - inst->SetOpcode(Instruction::NOP); - inst->SetVRegA_10x(0u); // keep compliant with verifier. - } - return inst; -} - -void DexToDexCompiler::CompilationState::CompileInstanceFieldAccess(Instruction* inst, - uint32_t dex_pc, - Instruction::Code new_opcode, - bool is_put) { - if (!kEnableQuickening) { - return; - } - uint32_t field_idx = GetIndexForInstruction(inst, inst->VRegC_22c()); - MemberOffset field_offset(0u); - bool is_volatile; - bool fast_path = driver_.ComputeInstanceFieldInfo(field_idx, &unit_, is_put, - &field_offset, &is_volatile); - if (fast_path && !is_volatile && IsUint<16>(field_offset.Int32Value())) { - VLOG(compiler) << "Quickening " << Instruction::Name(inst->Opcode()) - << " to " << Instruction::Name(new_opcode) - << " by replacing field index " << field_idx - << " by field offset " << field_offset.Int32Value() - << " at dex pc " << StringPrintf("0x%x", dex_pc) << " in method " - << GetDexFile().PrettyMethod(unit_.GetDexMethodIndex(), true); - if (!already_quickened_) { - // We are modifying 4 consecutive bytes. - inst->SetOpcode(new_opcode); - // Replace field index by field offset. - inst->SetVRegC_22c(static_cast<uint16_t>(field_offset.Int32Value())); - } - quickened_info_.push_back(QuickenedInfo(dex_pc, field_idx)); - } -} - -const DexFile& DexToDexCompiler::CompilationState::GetDexFile() const { - return *unit_.GetDexFile(); -} - -void DexToDexCompiler::CompilationState::CompileInvokeVirtual(Instruction* inst, - uint32_t dex_pc, - Instruction::Code new_opcode, - bool is_range) { - if (!kEnableQuickening) { - return; - } - uint32_t method_idx = GetIndexForInstruction(inst, - is_range ? inst->VRegB_3rc() : inst->VRegB_35c()); - ScopedObjectAccess soa(Thread::Current()); - - ClassLinker* class_linker = unit_.GetClassLinker(); - ArtMethod* resolved_method = - class_linker->ResolveMethod<ClassLinker::ResolveMode::kCheckICCEAndIAE>( - method_idx, - unit_.GetDexCache(), - unit_.GetClassLoader(), - /* referrer */ nullptr, - kVirtual); - - if (UNLIKELY(resolved_method == nullptr)) { - // Clean up any exception left by type resolution. - soa.Self()->ClearException(); - return; - } - - uint32_t vtable_idx = resolved_method->GetMethodIndex(); - DCHECK(IsUint<16>(vtable_idx)); - VLOG(compiler) << "Quickening " << Instruction::Name(inst->Opcode()) - << "(" << GetDexFile().PrettyMethod(method_idx, true) << ")" - << " to " << Instruction::Name(new_opcode) - << " by replacing method index " << method_idx - << " by vtable index " << vtable_idx - << " at dex pc " << StringPrintf("0x%x", dex_pc) << " in method " - << GetDexFile().PrettyMethod(unit_.GetDexMethodIndex(), true); - if (!already_quickened_) { - // We are modifying 4 consecutive bytes. - inst->SetOpcode(new_opcode); - // Replace method index by vtable index. - if (is_range) { - inst->SetVRegB_3rc(static_cast<uint16_t>(vtable_idx)); - } else { - inst->SetVRegB_35c(static_cast<uint16_t>(vtable_idx)); - } - } - quickened_info_.push_back(QuickenedInfo(dex_pc, method_idx)); -} - -CompiledMethod* DexToDexCompiler::CompileMethod( - const DexFile::CodeItem* code_item, - uint32_t access_flags, - InvokeType invoke_type ATTRIBUTE_UNUSED, - uint16_t class_def_idx, - uint32_t method_idx, - Handle<mirror::ClassLoader> class_loader, - const DexFile& dex_file, - CompilationLevel compilation_level) { - if (compilation_level == CompilationLevel::kDontDexToDexCompile) { - return nullptr; - } - - ScopedObjectAccess soa(Thread::Current()); - StackHandleScope<1> hs(soa.Self()); - ClassLinker* const class_linker = Runtime::Current()->GetClassLinker(); - art::DexCompilationUnit unit( - class_loader, - class_linker, - dex_file, - code_item, - class_def_idx, - method_idx, - access_flags, - driver_->GetVerifiedMethod(&dex_file, method_idx), - hs.NewHandle(class_linker->FindDexCache(soa.Self(), dex_file))); - - std::vector<uint8_t> quicken_data; - // If the code item is shared with multiple different method ids, make sure that we quicken only - // once and verify that all the dequicken maps match. - if (UNLIKELY(shared_code_items_.find(code_item) != shared_code_items_.end())) { - // Avoid quickening the shared code items for now because the existing conflict detection logic - // does not currently handle cases where the code item is quickened in one place but - // compiled in another. - static constexpr bool kAvoidQuickeningSharedCodeItems = true; - if (kAvoidQuickeningSharedCodeItems) { - return nullptr; - } - // For shared code items, use a lock to prevent races. - MutexLock mu(soa.Self(), lock_); - auto existing = shared_code_item_quicken_info_.find(code_item); - QuickenState* existing_data = nullptr; - std::vector<uint8_t>* existing_quicken_data = nullptr; - if (existing != shared_code_item_quicken_info_.end()) { - existing_data = &existing->second; - if (existing_data->conflict_) { - return nullptr; - } - existing_quicken_data = &existing_data->quicken_data_; - } - bool optimized_return_void; - { - CompilationState state(this, unit, compilation_level, existing_quicken_data); - quicken_data = state.Compile(); - optimized_return_void = state.optimized_return_void_; - } - - // Already quickened, check that the data matches what was previously seen. - MethodReference method_ref(&dex_file, method_idx); - if (existing_data != nullptr) { - if (*existing_quicken_data != quicken_data || - existing_data->optimized_return_void_ != optimized_return_void) { - VLOG(compiler) << "Quicken data mismatch, for method " - << dex_file.PrettyMethod(method_idx); - // Mark the method as a conflict to never attempt to quicken it in the future. - existing_data->conflict_ = true; - } - existing_data->methods_.push_back(method_ref); - } else { - QuickenState new_state; - new_state.methods_.push_back(method_ref); - new_state.quicken_data_ = quicken_data; - new_state.optimized_return_void_ = optimized_return_void; - bool inserted = shared_code_item_quicken_info_.emplace(code_item, new_state).second; - CHECK(inserted) << "Failed to insert " << dex_file.PrettyMethod(method_idx); - } - - // Easy sanity check is to check that the existing stuff matches by re-quickening using the - // newly produced quicken data. - // Note that this needs to be behind the lock for this case since we may unquicken in another - // thread. - if (kIsDebugBuild) { - CompilationState state2(this, unit, compilation_level, &quicken_data); - std::vector<uint8_t> new_data = state2.Compile(); - CHECK(new_data == quicken_data) << "Mismatch producing new quicken data"; - } - } else { - CompilationState state(this, unit, compilation_level, /*quicken_data*/ nullptr); - quicken_data = state.Compile(); - - // Easy sanity check is to check that the existing stuff matches by re-quickening using the - // newly produced quicken data. - if (kIsDebugBuild) { - CompilationState state2(this, unit, compilation_level, &quicken_data); - std::vector<uint8_t> new_data = state2.Compile(); - CHECK(new_data == quicken_data) << "Mismatch producing new quicken data"; - } - } - - if (quicken_data.empty()) { - return nullptr; - } - - // Create a `CompiledMethod`, with the quickened information in the vmap table. - InstructionSet instruction_set = driver_->GetInstructionSet(); - if (instruction_set == InstructionSet::kThumb2) { - // Don't use the thumb2 instruction set to avoid the one off code delta. - instruction_set = InstructionSet::kArm; - } - CompiledMethod* ret = CompiledMethod::SwapAllocCompiledMethod( - driver_, - instruction_set, - ArrayRef<const uint8_t>(), // no code - 0, - 0, - 0, - ArrayRef<const uint8_t>(), // method_info - ArrayRef<const uint8_t>(quicken_data), // vmap_table - ArrayRef<const uint8_t>(), // cfi data - ArrayRef<const linker::LinkerPatch>()); - DCHECK(ret != nullptr); - return ret; -} - -void DexToDexCompiler::SetDexFiles(const std::vector<const DexFile*>& dex_files) { - // Record what code items are already seen to detect when multiple methods have the same code - // item. - std::unordered_set<const DexFile::CodeItem*> seen_code_items; - for (const DexFile* dex_file : dex_files) { - for (size_t i = 0; i < dex_file->NumClassDefs(); ++i) { - const DexFile::ClassDef& class_def = dex_file->GetClassDef(i); - const uint8_t* class_data = dex_file->GetClassData(class_def); - if (class_data == nullptr) { - continue; - } - ClassDataItemIterator it(*dex_file, class_data); - it.SkipAllFields(); - for (; it.HasNextMethod(); it.Next()) { - const DexFile::CodeItem* code_item = it.GetMethodCodeItem(); - // Detect the shared code items. - if (!seen_code_items.insert(code_item).second) { - shared_code_items_.insert(code_item); - } - } - } - } - VLOG(compiler) << "Shared code items " << shared_code_items_.size(); -} - -void DexToDexCompiler::UnquickenConflictingMethods() { - MutexLock mu(Thread::Current(), lock_); - size_t unquicken_count = 0; - for (const auto& pair : shared_code_item_quicken_info_) { - const DexFile::CodeItem* code_item = pair.first; - const QuickenState& state = pair.second; - CHECK_GE(state.methods_.size(), 1u); - if (state.conflict_) { - // Unquicken using the existing quicken data. - // TODO: Do we really need to pass a dex file in? - optimizer::ArtDecompileDEX(*state.methods_[0].dex_file, - *code_item, - ArrayRef<const uint8_t>(state.quicken_data_), - /* decompile_return_instruction*/ true); - ++unquicken_count; - // Go clear the vmaps for all the methods that were already quickened to avoid writing them - // out during oat writing. - for (const MethodReference& ref : state.methods_) { - CompiledMethod* method = driver_->RemoveCompiledMethod(ref); - if (method != nullptr) { - // There is up to one compiled method for each method ref. Releasing it leaves the - // deduped data intact, this means its safe to do even when other threads might be - // compiling. - CompiledMethod::ReleaseSwapAllocatedCompiledMethod(driver_, method); - } - } - } - } -} - -} // namespace optimizer - -} // namespace art diff --git a/compiler/dex/dex_to_dex_compiler.h b/compiler/dex/dex_to_dex_compiler.h deleted file mode 100644 index 7536c3126a..0000000000 --- a/compiler/dex/dex_to_dex_compiler.h +++ /dev/null @@ -1,121 +0,0 @@ -/* - * Copyright (C) 2015 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ART_COMPILER_DEX_DEX_TO_DEX_COMPILER_H_ -#define ART_COMPILER_DEX_DEX_TO_DEX_COMPILER_H_ - -#include <set> -#include <unordered_map> -#include <unordered_set> - -#include "base/bit_vector.h" -#include "dex/dex_file.h" -#include "dex/invoke_type.h" -#include "dex/method_reference.h" -#include "handle.h" -#include "quicken_info.h" - -namespace art { - -class CompiledMethod; -class CompilerDriver; -class DexCompilationUnit; - -namespace mirror { -class ClassLoader; -} // namespace mirror - -namespace optimizer { - -class DexToDexCompiler { - public: - enum class CompilationLevel { - kDontDexToDexCompile, // Only meaning wrt image time interpretation. - kOptimize // Perform peep-hole optimizations. - }; - - explicit DexToDexCompiler(CompilerDriver* driver); - - CompiledMethod* CompileMethod(const DexFile::CodeItem* code_item, - uint32_t access_flags, - InvokeType invoke_type, - uint16_t class_def_idx, - uint32_t method_idx, - Handle<mirror::ClassLoader> class_loader, - const DexFile& dex_file, - const CompilationLevel compilation_level) WARN_UNUSED; - - void MarkForCompilation(Thread* self, - const MethodReference& method_ref); - - void ClearState(); - - // Unquicken all methods that have conflicting quicken info. This is not done during the - // quickening process to avoid race conditions. - void UnquickenConflictingMethods(); - - CompilerDriver* GetDriver() { - return driver_; - } - - bool ShouldCompileMethod(const MethodReference& ref); - - // Return the number of code items to quicken. - size_t NumCodeItemsToQuicken(Thread* self) const; - - void SetDexFiles(const std::vector<const DexFile*>& dex_files); - - private: - // Holds the state for compiling a single method. - struct CompilationState; - - // Quicken state for a code item, may be referenced by multiple methods. - struct QuickenState { - std::vector<MethodReference> methods_; - std::vector<uint8_t> quicken_data_; - bool optimized_return_void_ = false; - bool conflict_ = false; - }; - - BitVector* GetOrAddBitVectorForDex(const DexFile* dex_file) REQUIRES(lock_); - - CompilerDriver* const driver_; - - // State for adding methods (should this be in its own class?). - const DexFile* active_dex_file_ = nullptr; - BitVector* active_bit_vector_ = nullptr; - - // Lock that guards duplicate code items and the bitmap. - mutable Mutex lock_; - // Record what method references are going to get quickened. - std::unordered_map<const DexFile*, BitVector> should_quicken_; - // Guarded by lock_ during writing, accessed without a lock during quickening. - // This is safe because no thread is adding to the shared code items during the quickening phase. - std::unordered_set<const DexFile::CodeItem*> shared_code_items_; - // Blacklisted code items are unquickened in UnquickenConflictingMethods. - std::unordered_map<const DexFile::CodeItem*, QuickenState> shared_code_item_quicken_info_ - GUARDED_BY(lock_); - // Number of added code items. - size_t num_code_items_ GUARDED_BY(lock_) = 0u; -}; - -std::ostream& operator<<(std::ostream& os, const DexToDexCompiler::CompilationLevel& rhs); - -} // namespace optimizer - -} // namespace art - -#endif // ART_COMPILER_DEX_DEX_TO_DEX_COMPILER_H_ diff --git a/compiler/dex/dex_to_dex_decompiler_test.cc b/compiler/dex/dex_to_dex_decompiler_test.cc deleted file mode 100644 index 19b190093f..0000000000 --- a/compiler/dex/dex_to_dex_decompiler_test.cc +++ /dev/null @@ -1,125 +0,0 @@ -/* - * Copyright (C) 2016 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "dex_to_dex_decompiler.h" - -#include "class_linker.h" -#include "common_compiler_test.h" -#include "compiled_method-inl.h" -#include "compiler_callbacks.h" -#include "dex/dex_file.h" -#include "driver/compiler_driver.h" -#include "driver/compiler_options.h" -#include "handle_scope-inl.h" -#include "mirror/class_loader.h" -#include "runtime.h" -#include "scoped_thread_state_change-inl.h" -#include "thread.h" -#include "verifier/method_verifier-inl.h" -#include "verifier/verifier_deps.h" - -namespace art { - -class DexToDexDecompilerTest : public CommonCompilerTest { - public: - void CompileAll(jobject class_loader) REQUIRES(!Locks::mutator_lock_) { - TimingLogger timings("CompilerDriverTest::CompileAll", false, false); - TimingLogger::ScopedTiming t(__FUNCTION__, &timings); - compiler_options_->boot_image_ = false; - compiler_options_->SetCompilerFilter(CompilerFilter::kQuicken); - // Create the main VerifierDeps, here instead of in the compiler since we want to aggregate - // the results for all the dex files, not just the results for the current dex file. - Runtime::Current()->GetCompilerCallbacks()->SetVerifierDeps( - new verifier::VerifierDeps(GetDexFiles(class_loader))); - compiler_driver_->SetDexFilesForOatFile(GetDexFiles(class_loader)); - compiler_driver_->CompileAll(class_loader, GetDexFiles(class_loader), &timings); - } - - void RunTest(const char* dex_name) { - Thread* self = Thread::Current(); - // First load the original dex file. - jobject original_class_loader; - { - ScopedObjectAccess soa(self); - original_class_loader = LoadDex(dex_name); - } - const DexFile* original_dex_file = GetDexFiles(original_class_loader)[0]; - - // Load the dex file again and make it writable to quicken them. - jobject class_loader; - const DexFile* updated_dex_file = nullptr; - { - ScopedObjectAccess soa(self); - class_loader = LoadDex(dex_name); - updated_dex_file = GetDexFiles(class_loader)[0]; - Runtime::Current()->GetClassLinker()->RegisterDexFile( - *updated_dex_file, soa.Decode<mirror::ClassLoader>(class_loader).Ptr()); - } - // The dex files should be identical. - int cmp = memcmp(original_dex_file->Begin(), - updated_dex_file->Begin(), - updated_dex_file->Size()); - ASSERT_EQ(0, cmp); - - updated_dex_file->EnableWrite(); - CompileAll(class_loader); - // The dex files should be different after quickening. - cmp = memcmp(original_dex_file->Begin(), updated_dex_file->Begin(), updated_dex_file->Size()); - ASSERT_NE(0, cmp); - - // Unquicken the dex file. - for (uint32_t i = 0; i < updated_dex_file->NumClassDefs(); ++i) { - const DexFile::ClassDef& class_def = updated_dex_file->GetClassDef(i); - const uint8_t* class_data = updated_dex_file->GetClassData(class_def); - if (class_data == nullptr) { - continue; - } - ClassDataItemIterator it(*updated_dex_file, class_data); - it.SkipAllFields(); - - // Unquicken each method. - while (it.HasNextMethod()) { - uint32_t method_idx = it.GetMemberIndex(); - CompiledMethod* compiled_method = - compiler_driver_->GetCompiledMethod(MethodReference(updated_dex_file, method_idx)); - ArrayRef<const uint8_t> table; - if (compiled_method != nullptr) { - table = compiled_method->GetVmapTable(); - } - optimizer::ArtDecompileDEX(*updated_dex_file, - *it.GetMethodCodeItem(), - table, - /* decompile_return_instruction */ true); - it.Next(); - } - DCHECK(!it.HasNext()); - } - - // Make sure after unquickening we go back to the same contents as the original dex file. - cmp = memcmp(original_dex_file->Begin(), updated_dex_file->Begin(), updated_dex_file->Size()); - ASSERT_EQ(0, cmp); - } -}; - -TEST_F(DexToDexDecompilerTest, VerifierDeps) { - RunTest("VerifierDeps"); -} - -TEST_F(DexToDexDecompilerTest, DexToDexDecompiler) { - RunTest("DexToDexDecompiler"); -} - -} // namespace art diff --git a/compiler/dex/inline_method_analyser.cc b/compiler/dex/inline_method_analyser.cc index dc044c1210..b0f025d092 100644 --- a/compiler/dex/inline_method_analyser.cc +++ b/compiler/dex/inline_method_analyser.cc @@ -41,7 +41,7 @@ namespace { // anonymous namespace class Matcher { public: // Match function type. - typedef bool MatchFn(Matcher* matcher); + using MatchFn = bool(Matcher*); template <size_t size> static bool Match(const CodeItemDataAccessor* code_item, MatchFn* const (&pattern)[size]); @@ -216,7 +216,7 @@ bool RecordConstructorIPut(ArtMethod* method, DCHECK(IsInstructionIPut(new_iput->Opcode())); uint32_t field_index = new_iput->VRegC_22c(); ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); - ArtField* field = class_linker->LookupResolvedField(field_index, method, /* is_static */ false); + ArtField* field = class_linker->LookupResolvedField(field_index, method, /* is_static= */ false); if (UNLIKELY(field == nullptr)) { return false; } @@ -228,7 +228,7 @@ bool RecordConstructorIPut(ArtMethod* method, } ArtField* f = class_linker->LookupResolvedField(iputs[old_pos].field_index, method, - /* is_static */ false); + /* is_static= */ false); DCHECK(f != nullptr); if (f == field) { auto back_it = std::copy(iputs + old_pos + 1, iputs + arraysize(iputs), iputs + old_pos); @@ -511,7 +511,7 @@ bool InlineMethodAnalyser::AnalyseMethodCode(const CodeItemDataAccessor* code_it } bool InlineMethodAnalyser::IsSyntheticAccessor(MethodReference ref) { - const DexFile::MethodId& method_id = ref.dex_file->GetMethodId(ref.index); + const dex::MethodId& method_id = ref.dex_file->GetMethodId(ref.index); const char* method_name = ref.dex_file->GetMethodName(method_id); // javac names synthetic accessors "access$nnn", // jack names them "-getN", "-putN", "-wrapN". @@ -713,7 +713,7 @@ bool InlineMethodAnalyser::ComputeSpecialAccessorInfo(ArtMethod* method, } ObjPtr<mirror::DexCache> dex_cache = method->GetDexCache(); ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); - ArtField* field = class_linker->LookupResolvedField(field_idx, method, /* is_static */ false); + ArtField* field = class_linker->LookupResolvedField(field_idx, method, /* is_static= */ false); if (field == nullptr || field->IsStatic()) { return false; } @@ -724,7 +724,8 @@ bool InlineMethodAnalyser::ComputeSpecialAccessorInfo(ArtMethod* method, return false; } DCHECK_GE(field->GetOffset().Int32Value(), 0); - // Do not interleave function calls with bit field writes to placate valgrind. Bug: 27552451. + // Historical note: We made sure not to interleave function calls with bit field writes to + // placate Valgrind. Bug: 27552451. uint32_t field_offset = field->GetOffset().Uint32Value(); bool is_volatile = field->IsVolatile(); result->field_idx = field_idx; diff --git a/compiler/dex/quick_compiler_callbacks.cc b/compiler/dex/quick_compiler_callbacks.cc deleted file mode 100644 index baf97a852e..0000000000 --- a/compiler/dex/quick_compiler_callbacks.cc +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (C) 2011 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "quick_compiler_callbacks.h" - -#include "driver/compiler_driver.h" -#include "mirror/class-inl.h" -#include "mirror/object.h" -#include "obj_ptr-inl.h" -#include "thread-current-inl.h" -#include "verification_results.h" -#include "verifier/method_verifier-inl.h" - -namespace art { - -void QuickCompilerCallbacks::MethodVerified(verifier::MethodVerifier* verifier) { - if (verification_results_ != nullptr) { - verification_results_->ProcessVerifiedMethod(verifier); - } -} - -void QuickCompilerCallbacks::ClassRejected(ClassReference ref) { - if (verification_results_ != nullptr) { - verification_results_->AddRejectedClass(ref); - } -} - -ClassStatus QuickCompilerCallbacks::GetPreviousClassState(ClassReference ref) { - // If we don't have class unloading enabled in the compiler, we will never see class that were - // previously verified. Return false to avoid overhead from the lookup in the compiler driver. - if (!does_class_unloading_) { - return ClassStatus::kNotReady; - } - DCHECK(compiler_driver_ != nullptr); - // In the case of the quicken filter: avoiding verification of quickened instructions, which the - // verifier doesn't currently support. - // In the case of the verify filter, avoiding verifiying twice. - return compiler_driver_->GetClassStatus(ref); -} - -void QuickCompilerCallbacks::UpdateClassState(ClassReference ref, ClassStatus status) { - // Driver is null when bootstrapping the runtime. - if (compiler_driver_ != nullptr) { - compiler_driver_->RecordClassStatus(ref, status); - } -} - -bool QuickCompilerCallbacks::CanUseOatStatusForVerification(mirror::Class* klass) { - // No dex files: conservatively false. - if (dex_files_ == nullptr) { - return false; - } - - // If the class isn't from one of the dex files, accept oat file data. - const DexFile* dex_file = &klass->GetDexFile(); - return std::find(dex_files_->begin(), dex_files_->end(), dex_file) == dex_files_->end(); -} - -} // namespace art diff --git a/compiler/dex/quick_compiler_callbacks.h b/compiler/dex/quick_compiler_callbacks.h deleted file mode 100644 index 8a07e9c12c..0000000000 --- a/compiler/dex/quick_compiler_callbacks.h +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Copyright (C) 2011 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ART_COMPILER_DEX_QUICK_COMPILER_CALLBACKS_H_ -#define ART_COMPILER_DEX_QUICK_COMPILER_CALLBACKS_H_ - -#include "compiler_callbacks.h" -#include "verifier/verifier_deps.h" - -namespace art { - -class CompilerDriver; -class DexFile; -class VerificationResults; - -class QuickCompilerCallbacks FINAL : public CompilerCallbacks { - public: - explicit QuickCompilerCallbacks(CompilerCallbacks::CallbackMode mode) - : CompilerCallbacks(mode), dex_files_(nullptr) {} - - ~QuickCompilerCallbacks() { } - - void MethodVerified(verifier::MethodVerifier* verifier) - REQUIRES_SHARED(Locks::mutator_lock_) OVERRIDE; - - void ClassRejected(ClassReference ref) OVERRIDE; - - // We are running in an environment where we can call patchoat safely so we should. - bool IsRelocationPossible() OVERRIDE { - return true; - } - - verifier::VerifierDeps* GetVerifierDeps() const OVERRIDE { - return verifier_deps_.get(); - } - - void SetVerifierDeps(verifier::VerifierDeps* deps) OVERRIDE { - verifier_deps_.reset(deps); - } - - void SetVerificationResults(VerificationResults* verification_results) { - verification_results_ = verification_results; - } - - ClassStatus GetPreviousClassState(ClassReference ref) OVERRIDE; - - void SetDoesClassUnloading(bool does_class_unloading, CompilerDriver* compiler_driver) - OVERRIDE { - does_class_unloading_ = does_class_unloading; - compiler_driver_ = compiler_driver; - DCHECK(!does_class_unloading || compiler_driver_ != nullptr); - } - - void UpdateClassState(ClassReference ref, ClassStatus state) OVERRIDE; - - bool CanUseOatStatusForVerification(mirror::Class* klass) OVERRIDE - REQUIRES_SHARED(Locks::mutator_lock_); - - void SetDexFiles(const std::vector<const DexFile*>* dex_files) { - dex_files_ = dex_files; - } - - private: - VerificationResults* verification_results_ = nullptr; - bool does_class_unloading_ = false; - CompilerDriver* compiler_driver_ = nullptr; - std::unique_ptr<verifier::VerifierDeps> verifier_deps_; - const std::vector<const DexFile*>* dex_files_; -}; - -} // namespace art - -#endif // ART_COMPILER_DEX_QUICK_COMPILER_CALLBACKS_H_ diff --git a/compiler/dex/verification_results.cc b/compiler/dex/verification_results.cc index 1e0b94de81..e7a3817c56 100644 --- a/compiler/dex/verification_results.cc +++ b/compiler/dex/verification_results.cc @@ -20,7 +20,6 @@ #include "base/mutex-inl.h" #include "base/stl_util.h" -#include "driver/compiler_driver.h" #include "driver/compiler_options.h" #include "runtime.h" #include "thread-current-inl.h" @@ -79,7 +78,7 @@ void VerificationResults::ProcessVerifiedMethod(verifier::MethodVerifier* method if (inserted) { // Successfully added, release the unique_ptr since we no longer have ownership. DCHECK_EQ(GetVerifiedMethod(ref), verified_method.get()); - verified_method.release(); + verified_method.release(); // NOLINT b/117926937 } else { // TODO: Investigate why are we doing the work again for this method and try to avoid it. LOG(WARNING) << "Method processed more than once: " << ref.PrettyMethod(); @@ -97,7 +96,7 @@ void VerificationResults::ProcessVerifiedMethod(verifier::MethodVerifier* method } } -const VerifiedMethod* VerificationResults::GetVerifiedMethod(MethodReference ref) { +const VerifiedMethod* VerificationResults::GetVerifiedMethod(MethodReference ref) const { const VerifiedMethod* ret = nullptr; if (atomic_verified_methods_.Get(ref, &ret)) { return ret; @@ -112,12 +111,12 @@ void VerificationResults::CreateVerifiedMethodFor(MethodReference ref) { // which have no verifier error, nor has methods that we know will throw // at runtime. std::unique_ptr<VerifiedMethod> verified_method = std::make_unique<VerifiedMethod>( - /* encountered_error_types */ 0, /* has_runtime_throw */ false); + /* encountered_error_types= */ 0, /* has_runtime_throw= */ false); if (atomic_verified_methods_.Insert(ref, /*expected*/ nullptr, verified_method.get()) == AtomicMap::InsertResult::kInsertResultSuccess) { - verified_method.release(); + verified_method.release(); // NOLINT b/117926937 } } @@ -129,13 +128,13 @@ void VerificationResults::AddRejectedClass(ClassReference ref) { DCHECK(IsClassRejected(ref)); } -bool VerificationResults::IsClassRejected(ClassReference ref) { +bool VerificationResults::IsClassRejected(ClassReference ref) const { ReaderMutexLock mu(Thread::Current(), rejected_classes_lock_); return (rejected_classes_.find(ref) != rejected_classes_.end()); } bool VerificationResults::IsCandidateForCompilation(MethodReference&, - const uint32_t access_flags) { + const uint32_t access_flags) const { if (!compiler_options_->IsAotCompilationEnabled()) { return false; } diff --git a/compiler/dex/verification_results.h b/compiler/dex/verification_results.h index 56f00309c0..04c4fa65e6 100644 --- a/compiler/dex/verification_results.h +++ b/compiler/dex/verification_results.h @@ -51,13 +51,13 @@ class VerificationResults { void CreateVerifiedMethodFor(MethodReference ref) REQUIRES(!verified_methods_lock_); - const VerifiedMethod* GetVerifiedMethod(MethodReference ref) + const VerifiedMethod* GetVerifiedMethod(MethodReference ref) const REQUIRES(!verified_methods_lock_); void AddRejectedClass(ClassReference ref) REQUIRES(!rejected_classes_lock_); - bool IsClassRejected(ClassReference ref) REQUIRES(!rejected_classes_lock_); + bool IsClassRejected(ClassReference ref) const REQUIRES(!rejected_classes_lock_); - bool IsCandidateForCompilation(MethodReference& method_ref, const uint32_t access_flags); + bool IsCandidateForCompilation(MethodReference& method_ref, const uint32_t access_flags) const; // Add a dex file to enable using the atomic map. void AddDexFile(const DexFile* dex_file) REQUIRES(!verified_methods_lock_); @@ -74,10 +74,12 @@ class VerificationResults { // GetVerifiedMethod. AtomicMap atomic_verified_methods_; - ReaderWriterMutex verified_methods_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER; + // TODO: External locking during CompilerDriver::PreCompile(), no locking during compilation. + mutable ReaderWriterMutex verified_methods_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER; // Rejected classes. - ReaderWriterMutex rejected_classes_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER; + // TODO: External locking during CompilerDriver::PreCompile(), no locking during compilation. + mutable ReaderWriterMutex rejected_classes_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER; std::set<ClassReference> rejected_classes_ GUARDED_BY(rejected_classes_lock_); friend class verifier::VerifierDepsTest; diff --git a/compiler/dex/verified_method.cc b/compiler/dex/verified_method.cc index f2da3ffc2f..54f216a64d 100644 --- a/compiler/dex/verified_method.cc +++ b/compiler/dex/verified_method.cc @@ -82,7 +82,7 @@ void VerifiedMethod::GenerateSafeCastSet(verifier::MethodVerifier* method_verifi method_verifier->ResolveCheckedClass(dex::TypeIndex(inst.VRegB_21c())); // Pass null for the method verifier to not record the VerifierDeps dependency // if the types are not assignable. - if (cast_type.IsStrictlyAssignableFrom(reg_type, /* method_verifier */ nullptr)) { + if (cast_type.IsStrictlyAssignableFrom(reg_type, /* verifier= */ nullptr)) { // The types are assignable, we record that dependency in the VerifierDeps so // that if this changes after OTA, we will re-verify again. // We check if reg_type has a class, as the verifier may have inferred it's @@ -92,8 +92,8 @@ void VerifiedMethod::GenerateSafeCastSet(verifier::MethodVerifier* method_verifi verifier::VerifierDeps::MaybeRecordAssignability(method_verifier->GetDexFile(), cast_type.GetClass(), reg_type.GetClass(), - /* strict */ true, - /* assignable */ true); + /* is_strict= */ true, + /* is_assignable= */ true); } if (safe_cast_set_ == nullptr) { safe_cast_set_.reset(new SafeCastSet()); diff --git a/compiler/driver/compiled_method_storage.cc b/compiler/driver/compiled_method_storage.cc index a26a985ff9..31062fb390 100644 --- a/compiler/driver/compiled_method_storage.cc +++ b/compiler/driver/compiled_method_storage.cc @@ -21,6 +21,7 @@ #include <android-base/logging.h> +#include "base/data_hash.h" #include "base/utils.h" #include "compiled_method.h" #include "linker/linker_patch.h" @@ -80,65 +81,7 @@ class CompiledMethodStorage::DedupeHashFunc { public: size_t operator()(const ArrayRef<ContentType>& array) const { - const uint8_t* data = reinterpret_cast<const uint8_t*>(array.data()); - // TODO: More reasonable assertion. - // static_assert(IsPowerOfTwo(sizeof(ContentType)), - // "ContentType is not power of two, don't know whether array layout is as assumed"); - uint32_t len = sizeof(ContentType) * array.size(); - if (kUseMurmur3Hash) { - static constexpr uint32_t c1 = 0xcc9e2d51; - static constexpr uint32_t c2 = 0x1b873593; - static constexpr uint32_t r1 = 15; - static constexpr uint32_t r2 = 13; - static constexpr uint32_t m = 5; - static constexpr uint32_t n = 0xe6546b64; - - uint32_t hash = 0; - - const int nblocks = len / 4; - typedef __attribute__((__aligned__(1))) uint32_t unaligned_uint32_t; - const unaligned_uint32_t *blocks = reinterpret_cast<const uint32_t*>(data); - int i; - for (i = 0; i < nblocks; i++) { - uint32_t k = blocks[i]; - k *= c1; - k = (k << r1) | (k >> (32 - r1)); - k *= c2; - - hash ^= k; - hash = ((hash << r2) | (hash >> (32 - r2))) * m + n; - } - - const uint8_t *tail = reinterpret_cast<const uint8_t*>(data + nblocks * 4); - uint32_t k1 = 0; - - switch (len & 3) { - case 3: - k1 ^= tail[2] << 16; - FALLTHROUGH_INTENDED; - case 2: - k1 ^= tail[1] << 8; - FALLTHROUGH_INTENDED; - case 1: - k1 ^= tail[0]; - - k1 *= c1; - k1 = (k1 << r1) | (k1 >> (32 - r1)); - k1 *= c2; - hash ^= k1; - } - - hash ^= len; - hash ^= (hash >> 16); - hash *= 0x85ebca6b; - hash ^= (hash >> 13); - hash *= 0xc2b2ae35; - hash ^= (hash >> 16); - - return hash; - } else { - return HashBytes(data, len); - } + return DataHash()(array); } }; @@ -161,17 +104,57 @@ class CompiledMethodStorage::LengthPrefixedArrayAlloc { SwapSpace* const swap_space_; }; +class CompiledMethodStorage::ThunkMapKey { + public: + ThunkMapKey(linker::LinkerPatch::Type type, uint32_t custom_value1, uint32_t custom_value2) + : type_(type), custom_value1_(custom_value1), custom_value2_(custom_value2) {} + + bool operator<(const ThunkMapKey& other) const { + if (custom_value1_ != other.custom_value1_) { + return custom_value1_ < other.custom_value1_; + } + if (custom_value2_ != other.custom_value2_) { + return custom_value2_ < other.custom_value2_; + } + return type_ < other.type_; + } + + private: + linker::LinkerPatch::Type type_; + uint32_t custom_value1_; + uint32_t custom_value2_; +}; + +class CompiledMethodStorage::ThunkMapValue { + public: + ThunkMapValue(std::vector<uint8_t, SwapAllocator<uint8_t>>&& code, + const std::string& debug_name) + : code_(std::move(code)), debug_name_(debug_name) {} + + ArrayRef<const uint8_t> GetCode() const { + return ArrayRef<const uint8_t>(code_); + } + + const std::string& GetDebugName() const { + return debug_name_; + } + + private: + std::vector<uint8_t, SwapAllocator<uint8_t>> code_; + std::string debug_name_; +}; + CompiledMethodStorage::CompiledMethodStorage(int swap_fd) : swap_space_(swap_fd == -1 ? nullptr : new SwapSpace(swap_fd, 10 * MB)), dedupe_enabled_(true), dedupe_code_("dedupe code", LengthPrefixedArrayAlloc<uint8_t>(swap_space_.get())), - dedupe_method_info_("dedupe method info", - LengthPrefixedArrayAlloc<uint8_t>(swap_space_.get())), dedupe_vmap_table_("dedupe vmap table", LengthPrefixedArrayAlloc<uint8_t>(swap_space_.get())), dedupe_cfi_info_("dedupe cfi info", LengthPrefixedArrayAlloc<uint8_t>(swap_space_.get())), dedupe_linker_patches_("dedupe cfi info", - LengthPrefixedArrayAlloc<linker::LinkerPatch>(swap_space_.get())) { + LengthPrefixedArrayAlloc<linker::LinkerPatch>(swap_space_.get())), + thunk_map_lock_("thunk_map_lock"), + thunk_map_(std::less<ThunkMapKey>(), SwapAllocator<ThunkMapValueType>(swap_space_.get())) { } CompiledMethodStorage::~CompiledMethodStorage() { @@ -200,15 +183,6 @@ void CompiledMethodStorage::ReleaseCode(const LengthPrefixedArray<uint8_t>* code ReleaseArrayIfNotDeduplicated(code); } -const LengthPrefixedArray<uint8_t>* CompiledMethodStorage::DeduplicateMethodInfo( - const ArrayRef<const uint8_t>& src_map) { - return AllocateOrDeduplicateArray(src_map, &dedupe_method_info_); -} - -void CompiledMethodStorage::ReleaseMethodInfo(const LengthPrefixedArray<uint8_t>* method_info) { - ReleaseArrayIfNotDeduplicated(method_info); -} - const LengthPrefixedArray<uint8_t>* CompiledMethodStorage::DeduplicateVMapTable( const ArrayRef<const uint8_t>& table) { return AllocateOrDeduplicateArray(table, &dedupe_vmap_table_); @@ -237,4 +211,55 @@ void CompiledMethodStorage::ReleaseLinkerPatches( ReleaseArrayIfNotDeduplicated(linker_patches); } +CompiledMethodStorage::ThunkMapKey CompiledMethodStorage::GetThunkMapKey( + const linker::LinkerPatch& linker_patch) { + uint32_t custom_value1 = 0u; + uint32_t custom_value2 = 0u; + switch (linker_patch.GetType()) { + case linker::LinkerPatch::Type::kBakerReadBarrierBranch: + custom_value1 = linker_patch.GetBakerCustomValue1(); + custom_value2 = linker_patch.GetBakerCustomValue2(); + break; + case linker::LinkerPatch::Type::kCallRelative: + // No custom values. + break; + default: + LOG(FATAL) << "Unexpected patch type: " << linker_patch.GetType(); + UNREACHABLE(); + } + return ThunkMapKey(linker_patch.GetType(), custom_value1, custom_value2); +} + +ArrayRef<const uint8_t> CompiledMethodStorage::GetThunkCode(const linker::LinkerPatch& linker_patch, + /*out*/ std::string* debug_name) { + ThunkMapKey key = GetThunkMapKey(linker_patch); + MutexLock lock(Thread::Current(), thunk_map_lock_); + auto it = thunk_map_.find(key); + if (it != thunk_map_.end()) { + const ThunkMapValue& value = it->second; + if (debug_name != nullptr) { + *debug_name = value.GetDebugName(); + } + return value.GetCode(); + } else { + if (debug_name != nullptr) { + *debug_name = std::string(); + } + return ArrayRef<const uint8_t>(); + } +} + +void CompiledMethodStorage::SetThunkCode(const linker::LinkerPatch& linker_patch, + ArrayRef<const uint8_t> code, + const std::string& debug_name) { + DCHECK(!code.empty()); + ThunkMapKey key = GetThunkMapKey(linker_patch); + std::vector<uint8_t, SwapAllocator<uint8_t>> code_copy( + code.begin(), code.end(), SwapAllocator<uint8_t>(swap_space_.get())); + ThunkMapValue value(std::move(code_copy), debug_name); + MutexLock lock(Thread::Current(), thunk_map_lock_); + // Note: Multiple threads can try and compile the same thunk, so this may not create a new entry. + thunk_map_.emplace(key, std::move(value)); +} + } // namespace art diff --git a/compiler/driver/compiled_method_storage.h b/compiler/driver/compiled_method_storage.h index 249f06c20f..a5a7691e12 100644 --- a/compiler/driver/compiled_method_storage.h +++ b/compiler/driver/compiled_method_storage.h @@ -18,6 +18,7 @@ #define ART_COMPILER_DRIVER_COMPILED_METHOD_STORAGE_H_ #include <iosfwd> +#include <map> #include <memory> #include "base/array_ref.h" @@ -53,10 +54,6 @@ class CompiledMethodStorage { const LengthPrefixedArray<uint8_t>* DeduplicateCode(const ArrayRef<const uint8_t>& code); void ReleaseCode(const LengthPrefixedArray<uint8_t>* code); - const LengthPrefixedArray<uint8_t>* DeduplicateMethodInfo( - const ArrayRef<const uint8_t>& method_info); - void ReleaseMethodInfo(const LengthPrefixedArray<uint8_t>* method_info); - const LengthPrefixedArray<uint8_t>* DeduplicateVMapTable(const ArrayRef<const uint8_t>& table); void ReleaseVMapTable(const LengthPrefixedArray<uint8_t>* table); @@ -67,7 +64,29 @@ class CompiledMethodStorage { const ArrayRef<const linker::LinkerPatch>& linker_patches); void ReleaseLinkerPatches(const LengthPrefixedArray<linker::LinkerPatch>* linker_patches); + // Returns the code associated with the given patch. + // If the code has not been set, returns empty data. + // If `debug_name` is not null, stores the associated debug name in `*debug_name`. + ArrayRef<const uint8_t> GetThunkCode(const linker::LinkerPatch& linker_patch, + /*out*/ std::string* debug_name = nullptr); + + // Sets the code and debug name associated with the given patch. + void SetThunkCode(const linker::LinkerPatch& linker_patch, + ArrayRef<const uint8_t> code, + const std::string& debug_name); + private: + class ThunkMapKey; + class ThunkMapValue; + using ThunkMapValueType = std::pair<const ThunkMapKey, ThunkMapValue>; + using ThunkMap = std::map<ThunkMapKey, + ThunkMapValue, + std::less<ThunkMapKey>, + SwapAllocator<ThunkMapValueType>>; + static_assert(std::is_same<ThunkMapValueType, ThunkMap::value_type>::value, "Value type check."); + + static ThunkMapKey GetThunkMapKey(const linker::LinkerPatch& linker_patch); + template <typename T, typename DedupeSetType> const LengthPrefixedArray<T>* AllocateOrDeduplicateArray(const ArrayRef<const T>& data, DedupeSetType* dedupe_set); @@ -97,11 +116,13 @@ class CompiledMethodStorage { bool dedupe_enabled_; ArrayDedupeSet<uint8_t> dedupe_code_; - ArrayDedupeSet<uint8_t> dedupe_method_info_; ArrayDedupeSet<uint8_t> dedupe_vmap_table_; ArrayDedupeSet<uint8_t> dedupe_cfi_info_; ArrayDedupeSet<linker::LinkerPatch> dedupe_linker_patches_; + Mutex thunk_map_lock_; + ThunkMap thunk_map_ GUARDED_BY(thunk_map_lock_); + DISALLOW_COPY_AND_ASSIGN(CompiledMethodStorage); }; diff --git a/compiler/driver/compiled_method_storage_test.cc b/compiler/driver/compiled_method_storage_test.cc index 0769561d0e..05eacd848d 100644 --- a/compiler/driver/compiled_method_storage_test.cc +++ b/compiler/driver/compiled_method_storage_test.cc @@ -19,29 +19,13 @@ #include <gtest/gtest.h> #include "compiled_method-inl.h" -#include "compiler_driver.h" -#include "compiler_options.h" -#include "dex/verification_results.h" namespace art { TEST(CompiledMethodStorage, Deduplicate) { - CompilerOptions compiler_options; - VerificationResults verification_results(&compiler_options); - CompilerDriver driver(&compiler_options, - &verification_results, - Compiler::kOptimizing, - /* instruction_set_ */ InstructionSet::kNone, - /* instruction_set_features */ nullptr, - /* image_classes */ nullptr, - /* compiled_classes */ nullptr, - /* compiled_methods */ nullptr, - /* thread_count */ 1u, - /* swap_fd */ -1, - /* profile_compilation_info */ nullptr); - CompiledMethodStorage* storage = driver.GetCompiledMethodStorage(); + CompiledMethodStorage storage(/* swap_fd= */ -1); - ASSERT_TRUE(storage->DedupeEnabled()); // The default. + ASSERT_TRUE(storage.DedupeEnabled()); // The default. const uint8_t raw_code1[] = { 1u, 2u, 3u }; const uint8_t raw_code2[] = { 4u, 3u, 2u, 1u }; @@ -49,12 +33,6 @@ TEST(CompiledMethodStorage, Deduplicate) { ArrayRef<const uint8_t>(raw_code1), ArrayRef<const uint8_t>(raw_code2), }; - const uint8_t raw_method_info_map1[] = { 1u, 2u, 3u, 4u, 5u, 6u }; - const uint8_t raw_method_info_map2[] = { 8u, 7u, 6u, 5u, 4u, 3u, 2u, 1u }; - ArrayRef<const uint8_t> method_info[] = { - ArrayRef<const uint8_t>(raw_method_info_map1), - ArrayRef<const uint8_t>(raw_method_info_map2), - }; const uint8_t raw_vmap_table1[] = { 2, 4, 6 }; const uint8_t raw_vmap_table2[] = { 7, 5, 3, 1 }; ArrayRef<const uint8_t> vmap_table[] = { @@ -68,11 +46,11 @@ TEST(CompiledMethodStorage, Deduplicate) { ArrayRef<const uint8_t>(raw_cfi_info2), }; const linker::LinkerPatch raw_patches1[] = { - linker::LinkerPatch::CodePatch(0u, nullptr, 1u), + linker::LinkerPatch::IntrinsicReferencePatch(0u, 0u, 0u), linker::LinkerPatch::RelativeMethodPatch(4u, nullptr, 0u, 1u), }; const linker::LinkerPatch raw_patches2[] = { - linker::LinkerPatch::CodePatch(0u, nullptr, 1u), + linker::LinkerPatch::IntrinsicReferencePatch(0u, 0u, 0u), linker::LinkerPatch::RelativeMethodPatch(4u, nullptr, 0u, 2u), }; ArrayRef<const linker::LinkerPatch> patches[] = { @@ -81,38 +59,32 @@ TEST(CompiledMethodStorage, Deduplicate) { }; std::vector<CompiledMethod*> compiled_methods; - compiled_methods.reserve(1u << 7); + compiled_methods.reserve(1u << 4); for (auto&& c : code) { - for (auto&& s : method_info) { - for (auto&& v : vmap_table) { - for (auto&& f : cfi_info) { - for (auto&& p : patches) { - compiled_methods.push_back(CompiledMethod::SwapAllocCompiledMethod( - &driver, InstructionSet::kNone, c, 0u, 0u, 0u, s, v, f, p)); - } + for (auto&& v : vmap_table) { + for (auto&& f : cfi_info) { + for (auto&& p : patches) { + compiled_methods.push_back(CompiledMethod::SwapAllocCompiledMethod( + &storage, InstructionSet::kNone, c, v, f, p)); } } } } - constexpr size_t code_bit = 1u << 4; - constexpr size_t src_map_bit = 1u << 3; + constexpr size_t code_bit = 1u << 3; constexpr size_t vmap_table_bit = 1u << 2; constexpr size_t cfi_info_bit = 1u << 1; constexpr size_t patches_bit = 1u << 0; - CHECK_EQ(compiled_methods.size(), 1u << 5); + CHECK_EQ(compiled_methods.size(), 1u << 4); for (size_t i = 0; i != compiled_methods.size(); ++i) { for (size_t j = 0; j != compiled_methods.size(); ++j) { CompiledMethod* lhs = compiled_methods[i]; CompiledMethod* rhs = compiled_methods[j]; bool same_code = ((i ^ j) & code_bit) == 0u; - bool same_src_map = ((i ^ j) & src_map_bit) == 0u; bool same_vmap_table = ((i ^ j) & vmap_table_bit) == 0u; bool same_cfi_info = ((i ^ j) & cfi_info_bit) == 0u; bool same_patches = ((i ^ j) & patches_bit) == 0u; ASSERT_EQ(same_code, lhs->GetQuickCode().data() == rhs->GetQuickCode().data()) << i << " " << j; - ASSERT_EQ(same_src_map, lhs->GetMethodInfo().data() == rhs->GetMethodInfo().data()) - << i << " " << j; ASSERT_EQ(same_vmap_table, lhs->GetVmapTable().data() == rhs->GetVmapTable().data()) << i << " " << j; ASSERT_EQ(same_cfi_info, lhs->GetCFIInfo().data() == rhs->GetCFIInfo().data()) @@ -122,7 +94,7 @@ TEST(CompiledMethodStorage, Deduplicate) { } } for (CompiledMethod* method : compiled_methods) { - CompiledMethod::ReleaseSwapAllocatedCompiledMethod(&driver, method); + CompiledMethod::ReleaseSwapAllocatedCompiledMethod(&storage, method); } } diff --git a/compiler/driver/compiler_driver-inl.h b/compiler/driver/compiler_driver-inl.h deleted file mode 100644 index 294072d7e7..0000000000 --- a/compiler/driver/compiler_driver-inl.h +++ /dev/null @@ -1,128 +0,0 @@ -/* - * Copyright (C) 2012 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ART_COMPILER_DRIVER_COMPILER_DRIVER_INL_H_ -#define ART_COMPILER_DRIVER_COMPILER_DRIVER_INL_H_ - -#include "compiler_driver.h" - -#include "art_field-inl.h" -#include "art_method-inl.h" -#include "base/enums.h" -#include "class_linker-inl.h" -#include "dex_compilation_unit.h" -#include "handle_scope-inl.h" -#include "mirror/class_loader.h" -#include "mirror/dex_cache-inl.h" -#include "runtime.h" -#include "scoped_thread_state_change-inl.h" - -namespace art { - -inline ObjPtr<mirror::Class> CompilerDriver::ResolveClass( - const ScopedObjectAccess& soa, - Handle<mirror::DexCache> dex_cache, - Handle<mirror::ClassLoader> class_loader, - dex::TypeIndex cls_index, - const DexCompilationUnit* mUnit) { - DCHECK_EQ(dex_cache->GetDexFile(), mUnit->GetDexFile()); - DCHECK_EQ(class_loader.Get(), mUnit->GetClassLoader().Get()); - ObjPtr<mirror::Class> cls = - mUnit->GetClassLinker()->ResolveType(cls_index, dex_cache, class_loader); - DCHECK_EQ(cls == nullptr, soa.Self()->IsExceptionPending()); - if (UNLIKELY(cls == nullptr)) { - // Clean up any exception left by type resolution. - soa.Self()->ClearException(); - } - return cls; -} - -inline ObjPtr<mirror::Class> CompilerDriver::ResolveCompilingMethodsClass( - const ScopedObjectAccess& soa, - Handle<mirror::DexCache> dex_cache, - Handle<mirror::ClassLoader> class_loader, - const DexCompilationUnit* mUnit) { - DCHECK_EQ(dex_cache->GetDexFile(), mUnit->GetDexFile()); - DCHECK_EQ(class_loader.Get(), mUnit->GetClassLoader().Get()); - const DexFile::MethodId& referrer_method_id = - mUnit->GetDexFile()->GetMethodId(mUnit->GetDexMethodIndex()); - return ResolveClass(soa, dex_cache, class_loader, referrer_method_id.class_idx_, mUnit); -} - -inline ArtField* CompilerDriver::ResolveField(const ScopedObjectAccess& soa, - Handle<mirror::DexCache> dex_cache, - Handle<mirror::ClassLoader> class_loader, - uint32_t field_idx, - bool is_static) { - ArtField* resolved_field = Runtime::Current()->GetClassLinker()->ResolveField( - field_idx, dex_cache, class_loader, is_static); - DCHECK_EQ(resolved_field == nullptr, soa.Self()->IsExceptionPending()); - if (UNLIKELY(resolved_field == nullptr)) { - // Clean up any exception left by type resolution. - soa.Self()->ClearException(); - return nullptr; - } - if (UNLIKELY(resolved_field->IsStatic() != is_static)) { - // ClassLinker can return a field of the wrong kind directly from the DexCache. - // Silently return null on such incompatible class change. - return nullptr; - } - return resolved_field; -} - -inline std::pair<bool, bool> CompilerDriver::IsFastInstanceField( - ObjPtr<mirror::DexCache> dex_cache, - ObjPtr<mirror::Class> referrer_class, - ArtField* resolved_field, - uint16_t field_idx) { - DCHECK(!resolved_field->IsStatic()); - ObjPtr<mirror::Class> fields_class = resolved_field->GetDeclaringClass(); - bool fast_get = referrer_class != nullptr && - referrer_class->CanAccessResolvedField(fields_class, - resolved_field, - dex_cache, - field_idx); - bool fast_put = fast_get && (!resolved_field->IsFinal() || fields_class == referrer_class); - return std::make_pair(fast_get, fast_put); -} - -inline ArtMethod* CompilerDriver::ResolveMethod( - ScopedObjectAccess& soa, - Handle<mirror::DexCache> dex_cache, - Handle<mirror::ClassLoader> class_loader, - const DexCompilationUnit* mUnit, - uint32_t method_idx, - InvokeType invoke_type) { - DCHECK_EQ(class_loader.Get(), mUnit->GetClassLoader().Get()); - ArtMethod* resolved_method = - mUnit->GetClassLinker()->ResolveMethod<ClassLinker::ResolveMode::kCheckICCEAndIAE>( - method_idx, dex_cache, class_loader, /* referrer */ nullptr, invoke_type); - if (UNLIKELY(resolved_method == nullptr)) { - DCHECK(soa.Self()->IsExceptionPending()); - // Clean up any exception left by type resolution. - soa.Self()->ClearException(); - } - return resolved_method; -} - -inline VerificationResults* CompilerDriver::GetVerificationResults() const { - DCHECK(Runtime::Current()->IsAotCompiler()); - return verification_results_; -} - -} // namespace art - -#endif // ART_COMPILER_DRIVER_COMPILER_DRIVER_INL_H_ diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc deleted file mode 100644 index 53604761d1..0000000000 --- a/compiler/driver/compiler_driver.cc +++ /dev/null @@ -1,3031 +0,0 @@ -/* - * Copyright (C) 2011 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "compiler_driver.h" - -#include <unistd.h> -#include <unordered_set> -#include <vector> - -#ifndef __APPLE__ -#include <malloc.h> // For mallinfo -#endif - -#include "android-base/strings.h" - -#include "art_field-inl.h" -#include "art_method-inl.h" -#include "base/arena_allocator.h" -#include "base/array_ref.h" -#include "base/bit_vector.h" -#include "base/enums.h" -#include "base/logging.h" // For VLOG -#include "base/stl_util.h" -#include "base/systrace.h" -#include "base/time_utils.h" -#include "base/timing_logger.h" -#include "class_linker-inl.h" -#include "compiled_method-inl.h" -#include "compiler.h" -#include "compiler_callbacks.h" -#include "compiler_driver-inl.h" -#include "dex/descriptors_names.h" -#include "dex/dex_file-inl.h" -#include "dex/dex_file_annotations.h" -#include "dex/dex_instruction-inl.h" -#include "dex/dex_to_dex_compiler.h" -#include "dex/verification_results.h" -#include "dex/verified_method.h" -#include "dex_compilation_unit.h" -#include "driver/compiler_options.h" -#include "gc/accounting/card_table-inl.h" -#include "gc/accounting/heap_bitmap.h" -#include "gc/space/image_space.h" -#include "gc/space/space.h" -#include "handle_scope-inl.h" -#include "intrinsics_enum.h" -#include "jit/profile_compilation_info.h" -#include "jni_internal.h" -#include "linker/linker_patch.h" -#include "mirror/class-inl.h" -#include "mirror/class_loader.h" -#include "mirror/dex_cache-inl.h" -#include "mirror/object-inl.h" -#include "mirror/object-refvisitor-inl.h" -#include "mirror/object_array-inl.h" -#include "mirror/throwable.h" -#include "nativehelper/ScopedLocalRef.h" -#include "object_lock.h" -#include "runtime.h" -#include "runtime_intrinsics.h" -#include "scoped_thread_state_change-inl.h" -#include "thread.h" -#include "thread_list.h" -#include "thread_pool.h" -#include "trampolines/trampoline_compiler.h" -#include "transaction.h" -#include "utils/atomic_dex_ref_map-inl.h" -#include "utils/dex_cache_arrays_layout-inl.h" -#include "utils/swap_space.h" -#include "vdex_file.h" -#include "verifier/method_verifier-inl.h" -#include "verifier/method_verifier.h" -#include "verifier/verifier_deps.h" -#include "verifier/verifier_enums.h" - -namespace art { - -static constexpr bool kTimeCompileMethod = !kIsDebugBuild; - -// Print additional info during profile guided compilation. -static constexpr bool kDebugProfileGuidedCompilation = false; - -// Max encoded fields allowed for initializing app image. Hardcode the number for now -// because 5000 should be large enough. -static constexpr uint32_t kMaxEncodedFields = 5000; - -static double Percentage(size_t x, size_t y) { - return 100.0 * (static_cast<double>(x)) / (static_cast<double>(x + y)); -} - -static void DumpStat(size_t x, size_t y, const char* str) { - if (x == 0 && y == 0) { - return; - } - LOG(INFO) << Percentage(x, y) << "% of " << str << " for " << (x + y) << " cases"; -} - -class CompilerDriver::AOTCompilationStats { - public: - AOTCompilationStats() - : stats_lock_("AOT compilation statistics lock"), - resolved_types_(0), unresolved_types_(0), - resolved_instance_fields_(0), unresolved_instance_fields_(0), - resolved_local_static_fields_(0), resolved_static_fields_(0), unresolved_static_fields_(0), - type_based_devirtualization_(0), - safe_casts_(0), not_safe_casts_(0) { - for (size_t i = 0; i <= kMaxInvokeType; i++) { - resolved_methods_[i] = 0; - unresolved_methods_[i] = 0; - virtual_made_direct_[i] = 0; - direct_calls_to_boot_[i] = 0; - direct_methods_to_boot_[i] = 0; - } - } - - void Dump() { - DumpStat(resolved_types_, unresolved_types_, "types resolved"); - DumpStat(resolved_instance_fields_, unresolved_instance_fields_, "instance fields resolved"); - DumpStat(resolved_local_static_fields_ + resolved_static_fields_, unresolved_static_fields_, - "static fields resolved"); - DumpStat(resolved_local_static_fields_, resolved_static_fields_ + unresolved_static_fields_, - "static fields local to a class"); - DumpStat(safe_casts_, not_safe_casts_, "check-casts removed based on type information"); - // Note, the code below subtracts the stat value so that when added to the stat value we have - // 100% of samples. TODO: clean this up. - DumpStat(type_based_devirtualization_, - resolved_methods_[kVirtual] + unresolved_methods_[kVirtual] + - resolved_methods_[kInterface] + unresolved_methods_[kInterface] - - type_based_devirtualization_, - "virtual/interface calls made direct based on type information"); - - for (size_t i = 0; i <= kMaxInvokeType; i++) { - std::ostringstream oss; - oss << static_cast<InvokeType>(i) << " methods were AOT resolved"; - DumpStat(resolved_methods_[i], unresolved_methods_[i], oss.str().c_str()); - if (virtual_made_direct_[i] > 0) { - std::ostringstream oss2; - oss2 << static_cast<InvokeType>(i) << " methods made direct"; - DumpStat(virtual_made_direct_[i], - resolved_methods_[i] + unresolved_methods_[i] - virtual_made_direct_[i], - oss2.str().c_str()); - } - if (direct_calls_to_boot_[i] > 0) { - std::ostringstream oss2; - oss2 << static_cast<InvokeType>(i) << " method calls are direct into boot"; - DumpStat(direct_calls_to_boot_[i], - resolved_methods_[i] + unresolved_methods_[i] - direct_calls_to_boot_[i], - oss2.str().c_str()); - } - if (direct_methods_to_boot_[i] > 0) { - std::ostringstream oss2; - oss2 << static_cast<InvokeType>(i) << " method calls have methods in boot"; - DumpStat(direct_methods_to_boot_[i], - resolved_methods_[i] + unresolved_methods_[i] - direct_methods_to_boot_[i], - oss2.str().c_str()); - } - } - } - -// Allow lossy statistics in non-debug builds. -#ifndef NDEBUG -#define STATS_LOCK() MutexLock mu(Thread::Current(), stats_lock_) -#else -#define STATS_LOCK() -#endif - - void TypeDoesntNeedAccessCheck() REQUIRES(!stats_lock_) { - STATS_LOCK(); - resolved_types_++; - } - - void TypeNeedsAccessCheck() REQUIRES(!stats_lock_) { - STATS_LOCK(); - unresolved_types_++; - } - - void ResolvedInstanceField() REQUIRES(!stats_lock_) { - STATS_LOCK(); - resolved_instance_fields_++; - } - - void UnresolvedInstanceField() REQUIRES(!stats_lock_) { - STATS_LOCK(); - unresolved_instance_fields_++; - } - - void ResolvedLocalStaticField() REQUIRES(!stats_lock_) { - STATS_LOCK(); - resolved_local_static_fields_++; - } - - void ResolvedStaticField() REQUIRES(!stats_lock_) { - STATS_LOCK(); - resolved_static_fields_++; - } - - void UnresolvedStaticField() REQUIRES(!stats_lock_) { - STATS_LOCK(); - unresolved_static_fields_++; - } - - // Indicate that type information from the verifier led to devirtualization. - void PreciseTypeDevirtualization() REQUIRES(!stats_lock_) { - STATS_LOCK(); - type_based_devirtualization_++; - } - - // A check-cast could be eliminated due to verifier type analysis. - void SafeCast() REQUIRES(!stats_lock_) { - STATS_LOCK(); - safe_casts_++; - } - - // A check-cast couldn't be eliminated due to verifier type analysis. - void NotASafeCast() REQUIRES(!stats_lock_) { - STATS_LOCK(); - not_safe_casts_++; - } - - private: - Mutex stats_lock_; - - size_t resolved_types_; - size_t unresolved_types_; - - size_t resolved_instance_fields_; - size_t unresolved_instance_fields_; - - size_t resolved_local_static_fields_; - size_t resolved_static_fields_; - size_t unresolved_static_fields_; - // Type based devirtualization for invoke interface and virtual. - size_t type_based_devirtualization_; - - size_t resolved_methods_[kMaxInvokeType + 1]; - size_t unresolved_methods_[kMaxInvokeType + 1]; - size_t virtual_made_direct_[kMaxInvokeType + 1]; - size_t direct_calls_to_boot_[kMaxInvokeType + 1]; - size_t direct_methods_to_boot_[kMaxInvokeType + 1]; - - size_t safe_casts_; - size_t not_safe_casts_; - - DISALLOW_COPY_AND_ASSIGN(AOTCompilationStats); -}; - -CompilerDriver::CompilerDriver( - const CompilerOptions* compiler_options, - VerificationResults* verification_results, - Compiler::Kind compiler_kind, - InstructionSet instruction_set, - const InstructionSetFeatures* instruction_set_features, - std::unordered_set<std::string>* image_classes, - std::unordered_set<std::string>* compiled_classes, - std::unordered_set<std::string>* compiled_methods, - size_t thread_count, - int swap_fd, - const ProfileCompilationInfo* profile_compilation_info) - : compiler_options_(compiler_options), - verification_results_(verification_results), - compiler_(Compiler::Create(this, compiler_kind)), - compiler_kind_(compiler_kind), - instruction_set_( - instruction_set == InstructionSet::kArm ? InstructionSet::kThumb2 : instruction_set), - instruction_set_features_(instruction_set_features), - requires_constructor_barrier_lock_("constructor barrier lock"), - non_relative_linker_patch_count_(0u), - image_classes_(image_classes), - classes_to_compile_(compiled_classes), - methods_to_compile_(compiled_methods), - number_of_soft_verifier_failures_(0), - had_hard_verifier_failure_(false), - parallel_thread_count_(thread_count), - stats_(new AOTCompilationStats), - compiler_context_(nullptr), - support_boot_image_fixup_(true), - compiled_method_storage_(swap_fd), - profile_compilation_info_(profile_compilation_info), - max_arena_alloc_(0), - dex_to_dex_compiler_(this) { - DCHECK(compiler_options_ != nullptr); - - compiler_->Init(); - - if (GetCompilerOptions().IsBootImage()) { - CHECK(image_classes_.get() != nullptr) << "Expected image classes for boot image"; - } - - compiled_method_storage_.SetDedupeEnabled(compiler_options_->DeduplicateCode()); -} - -CompilerDriver::~CompilerDriver() { - compiled_methods_.Visit([this](const DexFileReference& ref ATTRIBUTE_UNUSED, - CompiledMethod* method) { - if (method != nullptr) { - CompiledMethod::ReleaseSwapAllocatedCompiledMethod(this, method); - } - }); - compiler_->UnInit(); -} - - -#define CREATE_TRAMPOLINE(type, abi, offset) \ - if (Is64BitInstructionSet(instruction_set_)) { \ - return CreateTrampoline64(instruction_set_, abi, \ - type ## _ENTRYPOINT_OFFSET(PointerSize::k64, offset)); \ - } else { \ - return CreateTrampoline32(instruction_set_, abi, \ - type ## _ENTRYPOINT_OFFSET(PointerSize::k32, offset)); \ - } - -std::unique_ptr<const std::vector<uint8_t>> CompilerDriver::CreateJniDlsymLookup() const { - CREATE_TRAMPOLINE(JNI, kJniAbi, pDlsymLookup) -} - -std::unique_ptr<const std::vector<uint8_t>> CompilerDriver::CreateQuickGenericJniTrampoline() - const { - CREATE_TRAMPOLINE(QUICK, kQuickAbi, pQuickGenericJniTrampoline) -} - -std::unique_ptr<const std::vector<uint8_t>> CompilerDriver::CreateQuickImtConflictTrampoline() - const { - CREATE_TRAMPOLINE(QUICK, kQuickAbi, pQuickImtConflictTrampoline) -} - -std::unique_ptr<const std::vector<uint8_t>> CompilerDriver::CreateQuickResolutionTrampoline() - const { - CREATE_TRAMPOLINE(QUICK, kQuickAbi, pQuickResolutionTrampoline) -} - -std::unique_ptr<const std::vector<uint8_t>> CompilerDriver::CreateQuickToInterpreterBridge() - const { - CREATE_TRAMPOLINE(QUICK, kQuickAbi, pQuickToInterpreterBridge) -} -#undef CREATE_TRAMPOLINE - -void CompilerDriver::CompileAll(jobject class_loader, - const std::vector<const DexFile*>& dex_files, - TimingLogger* timings) { - DCHECK(!Runtime::Current()->IsStarted()); - - InitializeThreadPools(); - - VLOG(compiler) << "Before precompile " << GetMemoryUsageString(false); - // Precompile: - // 1) Load image classes - // 2) Resolve all classes - // 3) Attempt to verify all classes - // 4) Attempt to initialize image classes, and trivially initialized classes - PreCompile(class_loader, dex_files, timings); - if (GetCompilerOptions().IsBootImage()) { - // We don't need to setup the intrinsics for non boot image compilation, as - // those compilations will pick up a boot image that have the ArtMethod already - // set with the intrinsics flag. - InitializeIntrinsics(); - } - // Compile: - // 1) Compile all classes and methods enabled for compilation. May fall back to dex-to-dex - // compilation. - if (GetCompilerOptions().IsAnyCompilationEnabled()) { - Compile(class_loader, dex_files, timings); - } - if (GetCompilerOptions().GetDumpStats()) { - stats_->Dump(); - } - - FreeThreadPools(); -} - -static optimizer::DexToDexCompiler::CompilationLevel GetDexToDexCompilationLevel( - Thread* self, const CompilerDriver& driver, Handle<mirror::ClassLoader> class_loader, - const DexFile& dex_file, const DexFile::ClassDef& class_def) - REQUIRES_SHARED(Locks::mutator_lock_) { - // When the dex file is uncompressed in the APK, we do not generate a copy in the .vdex - // file. As a result, dex2oat will map the dex file read-only, and we only need to check - // that to know if we can do quickening. - if (dex_file.GetContainer() != nullptr && dex_file.GetContainer()->IsReadOnly()) { - return optimizer::DexToDexCompiler::CompilationLevel::kDontDexToDexCompile; - } - auto* const runtime = Runtime::Current(); - DCHECK(driver.GetCompilerOptions().IsQuickeningCompilationEnabled()); - const char* descriptor = dex_file.GetClassDescriptor(class_def); - ClassLinker* class_linker = runtime->GetClassLinker(); - mirror::Class* klass = class_linker->FindClass(self, descriptor, class_loader); - if (klass == nullptr) { - CHECK(self->IsExceptionPending()); - self->ClearException(); - return optimizer::DexToDexCompiler::CompilationLevel::kDontDexToDexCompile; - } - // DexToDex at the kOptimize level may introduce quickened opcodes, which replace symbolic - // references with actual offsets. We cannot re-verify such instructions. - // - // We store the verification information in the class status in the oat file, which the linker - // can validate (checksums) and use to skip load-time verification. It is thus safe to - // optimize when a class has been fully verified before. - optimizer::DexToDexCompiler::CompilationLevel max_level = - optimizer::DexToDexCompiler::CompilationLevel::kOptimize; - if (driver.GetCompilerOptions().GetDebuggable()) { - // We are debuggable so definitions of classes might be changed. We don't want to do any - // optimizations that could break that. - max_level = optimizer::DexToDexCompiler::CompilationLevel::kDontDexToDexCompile; - } - if (klass->IsVerified()) { - // Class is verified so we can enable DEX-to-DEX compilation for performance. - return max_level; - } else { - // Class verification has failed: do not run DEX-to-DEX optimizations. - return optimizer::DexToDexCompiler::CompilationLevel::kDontDexToDexCompile; - } -} - -static optimizer::DexToDexCompiler::CompilationLevel GetDexToDexCompilationLevel( - Thread* self, - const CompilerDriver& driver, - jobject jclass_loader, - const DexFile& dex_file, - const DexFile::ClassDef& class_def) { - ScopedObjectAccess soa(self); - StackHandleScope<1> hs(soa.Self()); - Handle<mirror::ClassLoader> class_loader( - hs.NewHandle(soa.Decode<mirror::ClassLoader>(jclass_loader))); - return GetDexToDexCompilationLevel(self, driver, class_loader, dex_file, class_def); -} - -// Does the runtime for the InstructionSet provide an implementation returned by -// GetQuickGenericJniStub allowing down calls that aren't compiled using a JNI compiler? -static bool InstructionSetHasGenericJniStub(InstructionSet isa) { - switch (isa) { - case InstructionSet::kArm: - case InstructionSet::kArm64: - case InstructionSet::kThumb2: - case InstructionSet::kMips: - case InstructionSet::kMips64: - case InstructionSet::kX86: - case InstructionSet::kX86_64: return true; - default: return false; - } -} - -template <typename CompileFn> -static void CompileMethodHarness( - Thread* self, - CompilerDriver* driver, - const DexFile::CodeItem* code_item, - uint32_t access_flags, - InvokeType invoke_type, - uint16_t class_def_idx, - uint32_t method_idx, - Handle<mirror::ClassLoader> class_loader, - const DexFile& dex_file, - optimizer::DexToDexCompiler::CompilationLevel dex_to_dex_compilation_level, - bool compilation_enabled, - Handle<mirror::DexCache> dex_cache, - CompileFn compile_fn) { - DCHECK(driver != nullptr); - CompiledMethod* compiled_method; - uint64_t start_ns = kTimeCompileMethod ? NanoTime() : 0; - MethodReference method_ref(&dex_file, method_idx); - - compiled_method = compile_fn(self, - driver, - code_item, - access_flags, - invoke_type, - class_def_idx, - method_idx, - class_loader, - dex_file, - dex_to_dex_compilation_level, - compilation_enabled, - dex_cache); - - if (kTimeCompileMethod) { - uint64_t duration_ns = NanoTime() - start_ns; - if (duration_ns > MsToNs(driver->GetCompiler()->GetMaximumCompilationTimeBeforeWarning())) { - LOG(WARNING) << "Compilation of " << dex_file.PrettyMethod(method_idx) - << " took " << PrettyDuration(duration_ns); - } - } - - if (compiled_method != nullptr) { - // Count non-relative linker patches. - size_t non_relative_linker_patch_count = 0u; - for (const linker::LinkerPatch& patch : compiled_method->GetPatches()) { - if (!patch.IsPcRelative()) { - ++non_relative_linker_patch_count; - } - } - bool compile_pic = driver->GetCompilerOptions().GetCompilePic(); // Off by default - // When compiling with PIC, there should be zero non-relative linker patches - CHECK(!compile_pic || non_relative_linker_patch_count == 0u); - - driver->AddCompiledMethod(method_ref, compiled_method, non_relative_linker_patch_count); - } - - if (self->IsExceptionPending()) { - ScopedObjectAccess soa(self); - LOG(FATAL) << "Unexpected exception compiling: " << dex_file.PrettyMethod(method_idx) << "\n" - << self->GetException()->Dump(); - } -} - -static void CompileMethodDex2Dex( - Thread* self, - CompilerDriver* driver, - const DexFile::CodeItem* code_item, - uint32_t access_flags, - InvokeType invoke_type, - uint16_t class_def_idx, - uint32_t method_idx, - Handle<mirror::ClassLoader> class_loader, - const DexFile& dex_file, - optimizer::DexToDexCompiler::CompilationLevel dex_to_dex_compilation_level, - bool compilation_enabled, - Handle<mirror::DexCache> dex_cache) { - auto dex_2_dex_fn = [](Thread* self ATTRIBUTE_UNUSED, - CompilerDriver* driver, - const DexFile::CodeItem* code_item, - uint32_t access_flags, - InvokeType invoke_type, - uint16_t class_def_idx, - uint32_t method_idx, - Handle<mirror::ClassLoader> class_loader, - const DexFile& dex_file, - optimizer::DexToDexCompiler::CompilationLevel dex_to_dex_compilation_level, - bool compilation_enabled ATTRIBUTE_UNUSED, - Handle<mirror::DexCache> dex_cache ATTRIBUTE_UNUSED) -> CompiledMethod* { - DCHECK(driver != nullptr); - MethodReference method_ref(&dex_file, method_idx); - - optimizer::DexToDexCompiler* const compiler = &driver->GetDexToDexCompiler(); - - if (compiler->ShouldCompileMethod(method_ref)) { - VerificationResults* results = driver->GetVerificationResults(); - DCHECK(results != nullptr); - const VerifiedMethod* verified_method = results->GetVerifiedMethod(method_ref); - // Do not optimize if a VerifiedMethod is missing. SafeCast elision, - // for example, relies on it. - return compiler->CompileMethod( - code_item, - access_flags, - invoke_type, - class_def_idx, - method_idx, - class_loader, - dex_file, - (verified_method != nullptr) - ? dex_to_dex_compilation_level - : optimizer::DexToDexCompiler::CompilationLevel::kDontDexToDexCompile); - } - return nullptr; - }; - CompileMethodHarness(self, - driver, - code_item, - access_flags, - invoke_type, - class_def_idx, - method_idx, - class_loader, - dex_file, - dex_to_dex_compilation_level, - compilation_enabled, - dex_cache, - dex_2_dex_fn); -} - -static void CompileMethodQuick( - Thread* self, - CompilerDriver* driver, - const DexFile::CodeItem* code_item, - uint32_t access_flags, - InvokeType invoke_type, - uint16_t class_def_idx, - uint32_t method_idx, - Handle<mirror::ClassLoader> class_loader, - const DexFile& dex_file, - optimizer::DexToDexCompiler::CompilationLevel dex_to_dex_compilation_level, - bool compilation_enabled, - Handle<mirror::DexCache> dex_cache) { - auto quick_fn = []( - Thread* self, - CompilerDriver* driver, - const DexFile::CodeItem* code_item, - uint32_t access_flags, - InvokeType invoke_type, - uint16_t class_def_idx, - uint32_t method_idx, - Handle<mirror::ClassLoader> class_loader, - const DexFile& dex_file, - optimizer::DexToDexCompiler::CompilationLevel dex_to_dex_compilation_level, - bool compilation_enabled, - Handle<mirror::DexCache> dex_cache) { - DCHECK(driver != nullptr); - CompiledMethod* compiled_method = nullptr; - MethodReference method_ref(&dex_file, method_idx); - - if ((access_flags & kAccNative) != 0) { - // Are we extracting only and have support for generic JNI down calls? - if (!driver->GetCompilerOptions().IsJniCompilationEnabled() && - InstructionSetHasGenericJniStub(driver->GetInstructionSet())) { - // Leaving this empty will trigger the generic JNI version - } else { - // Query any JNI optimization annotations such as @FastNative or @CriticalNative. - access_flags |= annotations::GetNativeMethodAnnotationAccessFlags( - dex_file, dex_file.GetClassDef(class_def_idx), method_idx); - - compiled_method = driver->GetCompiler()->JniCompile( - access_flags, method_idx, dex_file, dex_cache); - CHECK(compiled_method != nullptr); - } - } else if ((access_flags & kAccAbstract) != 0) { - // Abstract methods don't have code. - } else { - VerificationResults* results = driver->GetVerificationResults(); - DCHECK(results != nullptr); - const VerifiedMethod* verified_method = results->GetVerifiedMethod(method_ref); - bool compile = compilation_enabled && - // Basic checks, e.g., not <clinit>. - results->IsCandidateForCompilation(method_ref, access_flags) && - // Did not fail to create VerifiedMethod metadata. - verified_method != nullptr && - // Do not have failures that should punt to the interpreter. - !verified_method->HasRuntimeThrow() && - (verified_method->GetEncounteredVerificationFailures() & - (verifier::VERIFY_ERROR_FORCE_INTERPRETER | verifier::VERIFY_ERROR_LOCKING)) == 0 && - // Is eligable for compilation by methods-to-compile filter. - driver->IsMethodToCompile(method_ref) && - driver->ShouldCompileBasedOnProfile(method_ref); - - if (compile) { - // NOTE: if compiler declines to compile this method, it will return null. - compiled_method = driver->GetCompiler()->Compile(code_item, - access_flags, - invoke_type, - class_def_idx, - method_idx, - class_loader, - dex_file, - dex_cache); - } - if (compiled_method == nullptr && - dex_to_dex_compilation_level != - optimizer::DexToDexCompiler::CompilationLevel::kDontDexToDexCompile) { - DCHECK(!Runtime::Current()->UseJitCompilation()); - // TODO: add a command-line option to disable DEX-to-DEX compilation ? - driver->GetDexToDexCompiler().MarkForCompilation(self, method_ref); - } - } - return compiled_method; - }; - CompileMethodHarness(self, - driver, - code_item, - access_flags, - invoke_type, - class_def_idx, - method_idx, - class_loader, - dex_file, - dex_to_dex_compilation_level, - compilation_enabled, - dex_cache, - quick_fn); -} - -void CompilerDriver::CompileOne(Thread* self, ArtMethod* method, TimingLogger* timings) { - DCHECK(!Runtime::Current()->IsStarted()); - jobject jclass_loader; - const DexFile* dex_file; - uint16_t class_def_idx; - uint32_t method_idx = method->GetDexMethodIndex(); - uint32_t access_flags = method->GetAccessFlags(); - InvokeType invoke_type = method->GetInvokeType(); - StackHandleScope<2> hs(self); - Handle<mirror::DexCache> dex_cache(hs.NewHandle(method->GetDexCache())); - Handle<mirror::ClassLoader> class_loader( - hs.NewHandle(method->GetDeclaringClass()->GetClassLoader())); - { - ScopedObjectAccessUnchecked soa(self); - ScopedLocalRef<jobject> local_class_loader( - soa.Env(), soa.AddLocalReference<jobject>(class_loader.Get())); - jclass_loader = soa.Env()->NewGlobalRef(local_class_loader.get()); - // Find the dex_file - dex_file = method->GetDexFile(); - class_def_idx = method->GetClassDefIndex(); - } - const DexFile::CodeItem* code_item = dex_file->GetCodeItem(method->GetCodeItemOffset()); - - // Go to native so that we don't block GC during compilation. - ScopedThreadSuspension sts(self, kNative); - - std::vector<const DexFile*> dex_files; - dex_files.push_back(dex_file); - - InitializeThreadPools(); - - PreCompile(jclass_loader, dex_files, timings); - - // Can we run DEX-to-DEX compiler on this class ? - optimizer::DexToDexCompiler::CompilationLevel dex_to_dex_compilation_level = - GetDexToDexCompilationLevel(self, - *this, - jclass_loader, - *dex_file, - dex_file->GetClassDef(class_def_idx)); - - CompileMethodQuick(self, - this, - code_item, - access_flags, - invoke_type, - class_def_idx, - method_idx, - class_loader, - *dex_file, - dex_to_dex_compilation_level, - true, - dex_cache); - - const size_t num_methods = dex_to_dex_compiler_.NumCodeItemsToQuicken(self); - if (num_methods != 0) { - DCHECK_EQ(num_methods, 1u); - CompileMethodDex2Dex(self, - this, - code_item, - access_flags, - invoke_type, - class_def_idx, - method_idx, - class_loader, - *dex_file, - dex_to_dex_compilation_level, - true, - dex_cache); - dex_to_dex_compiler_.ClearState(); - } - - FreeThreadPools(); - - self->GetJniEnv()->DeleteGlobalRef(jclass_loader); -} - -void CompilerDriver::Resolve(jobject class_loader, - const std::vector<const DexFile*>& dex_files, - TimingLogger* timings) { - // Resolution allocates classes and needs to run single-threaded to be deterministic. - bool force_determinism = GetCompilerOptions().IsForceDeterminism(); - ThreadPool* resolve_thread_pool = force_determinism - ? single_thread_pool_.get() - : parallel_thread_pool_.get(); - size_t resolve_thread_count = force_determinism ? 1U : parallel_thread_count_; - - for (size_t i = 0; i != dex_files.size(); ++i) { - const DexFile* dex_file = dex_files[i]; - CHECK(dex_file != nullptr); - ResolveDexFile(class_loader, - *dex_file, - dex_files, - resolve_thread_pool, - resolve_thread_count, - timings); - } -} - -// Resolve const-strings in the code. Done to have deterministic allocation behavior. Right now -// this is single-threaded for simplicity. -// TODO: Collect the relevant string indices in parallel, then allocate them sequentially in a -// stable order. - -static void ResolveConstStrings(Handle<mirror::DexCache> dex_cache, - const DexFile& dex_file, - const DexFile::CodeItem* code_item) - REQUIRES_SHARED(Locks::mutator_lock_) { - if (code_item == nullptr) { - // Abstract or native method. - return; - } - - ClassLinker* const class_linker = Runtime::Current()->GetClassLinker(); - for (const DexInstructionPcPair& inst : CodeItemInstructionAccessor(dex_file, code_item)) { - switch (inst->Opcode()) { - case Instruction::CONST_STRING: - case Instruction::CONST_STRING_JUMBO: { - dex::StringIndex string_index((inst->Opcode() == Instruction::CONST_STRING) - ? inst->VRegB_21c() - : inst->VRegB_31c()); - ObjPtr<mirror::String> string = class_linker->ResolveString(string_index, dex_cache); - CHECK(string != nullptr) << "Could not allocate a string when forcing determinism"; - break; - } - - default: - break; - } - } -} - -static void ResolveConstStrings(CompilerDriver* driver, - const std::vector<const DexFile*>& dex_files, - TimingLogger* timings) { - ScopedObjectAccess soa(Thread::Current()); - StackHandleScope<1> hs(soa.Self()); - ClassLinker* const class_linker = Runtime::Current()->GetClassLinker(); - MutableHandle<mirror::DexCache> dex_cache(hs.NewHandle<mirror::DexCache>(nullptr)); - - for (const DexFile* dex_file : dex_files) { - dex_cache.Assign(class_linker->FindDexCache(soa.Self(), *dex_file)); - TimingLogger::ScopedTiming t("Resolve const-string Strings", timings); - - size_t class_def_count = dex_file->NumClassDefs(); - for (size_t class_def_index = 0; class_def_index < class_def_count; ++class_def_index) { - const DexFile::ClassDef& class_def = dex_file->GetClassDef(class_def_index); - - const uint8_t* class_data = dex_file->GetClassData(class_def); - if (class_data == nullptr) { - // empty class, probably a marker interface - continue; - } - - ClassDataItemIterator it(*dex_file, class_data); - it.SkipAllFields(); - - bool compilation_enabled = driver->IsClassToCompile( - dex_file->StringByTypeIdx(class_def.class_idx_)); - if (!compilation_enabled) { - // Compilation is skipped, do not resolve const-string in code of this class. - // TODO: Make sure that inlining honors this. - continue; - } - - // Direct and virtual methods. - int64_t previous_method_idx = -1; - while (it.HasNextMethod()) { - uint32_t method_idx = it.GetMemberIndex(); - if (method_idx == previous_method_idx) { - // smali can create dex files with two encoded_methods sharing the same method_idx - // http://code.google.com/p/smali/issues/detail?id=119 - it.Next(); - continue; - } - previous_method_idx = method_idx; - ResolveConstStrings(dex_cache, *dex_file, it.GetMethodCodeItem()); - it.Next(); - } - DCHECK(!it.HasNext()); - } - } -} - -inline void CompilerDriver::CheckThreadPools() { - DCHECK(parallel_thread_pool_ != nullptr); - DCHECK(single_thread_pool_ != nullptr); -} - -static void EnsureVerifiedOrVerifyAtRuntime(jobject jclass_loader, - const std::vector<const DexFile*>& dex_files) { - ScopedObjectAccess soa(Thread::Current()); - StackHandleScope<2> hs(soa.Self()); - Handle<mirror::ClassLoader> class_loader( - hs.NewHandle(soa.Decode<mirror::ClassLoader>(jclass_loader))); - MutableHandle<mirror::Class> cls(hs.NewHandle<mirror::Class>(nullptr)); - ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); - - for (const DexFile* dex_file : dex_files) { - for (uint32_t i = 0; i < dex_file->NumClassDefs(); ++i) { - const DexFile::ClassDef& class_def = dex_file->GetClassDef(i); - const char* descriptor = dex_file->GetClassDescriptor(class_def); - cls.Assign(class_linker->FindClass(soa.Self(), descriptor, class_loader)); - if (cls == nullptr) { - soa.Self()->ClearException(); - } else if (&cls->GetDexFile() == dex_file) { - DCHECK(cls->IsErroneous() || cls->IsVerified() || cls->ShouldVerifyAtRuntime()) - << cls->PrettyClass() - << " " << cls->GetStatus(); - } - } - } -} - -void CompilerDriver::PreCompile(jobject class_loader, - const std::vector<const DexFile*>& dex_files, - TimingLogger* timings) { - CheckThreadPools(); - - LoadImageClasses(timings); - VLOG(compiler) << "LoadImageClasses: " << GetMemoryUsageString(false); - - if (compiler_options_->IsAnyCompilationEnabled()) { - // Avoid adding the dex files in the case where we aren't going to add compiled methods. - // This reduces RAM usage for this case. - for (const DexFile* dex_file : dex_files) { - // Can be already inserted if the caller is CompileOne. This happens for gtests. - if (!compiled_methods_.HaveDexFile(dex_file)) { - compiled_methods_.AddDexFile(dex_file); - } - } - // Resolve eagerly to prepare for compilation. - Resolve(class_loader, dex_files, timings); - VLOG(compiler) << "Resolve: " << GetMemoryUsageString(false); - } - - if (compiler_options_->AssumeClassesAreVerified()) { - VLOG(compiler) << "Verify none mode specified, skipping verification."; - SetVerified(class_loader, dex_files, timings); - } - - if (!compiler_options_->IsVerificationEnabled()) { - return; - } - - if (GetCompilerOptions().IsForceDeterminism() && GetCompilerOptions().IsBootImage()) { - // Resolve strings from const-string. Do this now to have a deterministic image. - ResolveConstStrings(this, dex_files, timings); - VLOG(compiler) << "Resolve const-strings: " << GetMemoryUsageString(false); - } - - Verify(class_loader, dex_files, timings); - VLOG(compiler) << "Verify: " << GetMemoryUsageString(false); - - if (had_hard_verifier_failure_ && GetCompilerOptions().AbortOnHardVerifierFailure()) { - // Avoid dumping threads. Even if we shut down the thread pools, there will still be three - // instances of this thread's stack. - LOG(FATAL_WITHOUT_ABORT) << "Had a hard failure verifying all classes, and was asked to abort " - << "in such situations. Please check the log."; - _exit(1); - } else if (number_of_soft_verifier_failures_ > 0 && - GetCompilerOptions().AbortOnSoftVerifierFailure()) { - LOG(FATAL_WITHOUT_ABORT) << "Had " << number_of_soft_verifier_failures_ << " soft failure(s) " - << "verifying all classes, and was asked to abort in such situations. " - << "Please check the log."; - _exit(1); - } - - if (compiler_options_->IsAnyCompilationEnabled()) { - if (kIsDebugBuild) { - EnsureVerifiedOrVerifyAtRuntime(class_loader, dex_files); - } - InitializeClasses(class_loader, dex_files, timings); - VLOG(compiler) << "InitializeClasses: " << GetMemoryUsageString(false); - } - - UpdateImageClasses(timings); - VLOG(compiler) << "UpdateImageClasses: " << GetMemoryUsageString(false); -} - -bool CompilerDriver::IsImageClass(const char* descriptor) const { - if (image_classes_ != nullptr) { - // If we have a set of image classes, use those. - return image_classes_->find(descriptor) != image_classes_->end(); - } - // No set of image classes, assume we include all the classes. - // NOTE: Currently only reachable from InitImageMethodVisitor for the app image case. - return !GetCompilerOptions().IsBootImage(); -} - -bool CompilerDriver::IsClassToCompile(const char* descriptor) const { - if (classes_to_compile_ == nullptr) { - return true; - } - return classes_to_compile_->find(descriptor) != classes_to_compile_->end(); -} - -bool CompilerDriver::IsMethodToCompile(const MethodReference& method_ref) const { - if (methods_to_compile_ == nullptr) { - return true; - } - - std::string tmp = method_ref.PrettyMethod(); - return methods_to_compile_->find(tmp.c_str()) != methods_to_compile_->end(); -} - -bool CompilerDriver::ShouldCompileBasedOnProfile(const MethodReference& method_ref) const { - // Profile compilation info may be null if no profile is passed. - if (!CompilerFilter::DependsOnProfile(compiler_options_->GetCompilerFilter())) { - // Use the compiler filter instead of the presence of profile_compilation_info_ since - // we may want to have full speed compilation along with profile based layout optimizations. - return true; - } - // If we are using a profile filter but do not have a profile compilation info, compile nothing. - if (profile_compilation_info_ == nullptr) { - return false; - } - // Compile only hot methods, it is the profile saver's job to decide what startup methods to mark - // as hot. - bool result = profile_compilation_info_->GetMethodHotness(method_ref).IsHot(); - - if (kDebugProfileGuidedCompilation) { - LOG(INFO) << "[ProfileGuidedCompilation] " - << (result ? "Compiled" : "Skipped") << " method:" << method_ref.PrettyMethod(true); - } - return result; -} - -class ResolveCatchBlockExceptionsClassVisitor : public ClassVisitor { - public: - ResolveCatchBlockExceptionsClassVisitor() : classes_() {} - - virtual bool operator()(ObjPtr<mirror::Class> c) OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_) { - classes_.push_back(c); - return true; - } - - void FindExceptionTypesToResolve( - std::set<std::pair<dex::TypeIndex, const DexFile*>>* exceptions_to_resolve) - REQUIRES_SHARED(Locks::mutator_lock_) { - const auto pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize(); - for (ObjPtr<mirror::Class> klass : classes_) { - for (ArtMethod& method : klass->GetMethods(pointer_size)) { - FindExceptionTypesToResolveForMethod(&method, exceptions_to_resolve); - } - } - } - - private: - void FindExceptionTypesToResolveForMethod( - ArtMethod* method, - std::set<std::pair<dex::TypeIndex, const DexFile*>>* exceptions_to_resolve) - REQUIRES_SHARED(Locks::mutator_lock_) { - if (method->GetCodeItem() == nullptr) { - return; // native or abstract method - } - CodeItemDataAccessor accessor(method->DexInstructionData()); - if (accessor.TriesSize() == 0) { - return; // nothing to process - } - const uint8_t* encoded_catch_handler_list = accessor.GetCatchHandlerData(); - size_t num_encoded_catch_handlers = DecodeUnsignedLeb128(&encoded_catch_handler_list); - for (size_t i = 0; i < num_encoded_catch_handlers; i++) { - int32_t encoded_catch_handler_size = DecodeSignedLeb128(&encoded_catch_handler_list); - bool has_catch_all = false; - if (encoded_catch_handler_size <= 0) { - encoded_catch_handler_size = -encoded_catch_handler_size; - has_catch_all = true; - } - for (int32_t j = 0; j < encoded_catch_handler_size; j++) { - dex::TypeIndex encoded_catch_handler_handlers_type_idx = - dex::TypeIndex(DecodeUnsignedLeb128(&encoded_catch_handler_list)); - // Add to set of types to resolve if not already in the dex cache resolved types - if (!method->IsResolvedTypeIdx(encoded_catch_handler_handlers_type_idx)) { - exceptions_to_resolve->emplace(encoded_catch_handler_handlers_type_idx, - method->GetDexFile()); - } - // ignore address associated with catch handler - DecodeUnsignedLeb128(&encoded_catch_handler_list); - } - if (has_catch_all) { - // ignore catch all address - DecodeUnsignedLeb128(&encoded_catch_handler_list); - } - } - } - - std::vector<ObjPtr<mirror::Class>> classes_; -}; - -class RecordImageClassesVisitor : public ClassVisitor { - public: - explicit RecordImageClassesVisitor(std::unordered_set<std::string>* image_classes) - : image_classes_(image_classes) {} - - bool operator()(ObjPtr<mirror::Class> klass) OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_) { - std::string temp; - image_classes_->insert(klass->GetDescriptor(&temp)); - return true; - } - - private: - std::unordered_set<std::string>* const image_classes_; -}; - -// Make a list of descriptors for classes to include in the image -void CompilerDriver::LoadImageClasses(TimingLogger* timings) { - CHECK(timings != nullptr); - if (!GetCompilerOptions().IsBootImage()) { - return; - } - - TimingLogger::ScopedTiming t("LoadImageClasses", timings); - // Make a first class to load all classes explicitly listed in the file - Thread* self = Thread::Current(); - ScopedObjectAccess soa(self); - ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); - CHECK(image_classes_.get() != nullptr); - for (auto it = image_classes_->begin(), end = image_classes_->end(); it != end;) { - const std::string& descriptor(*it); - StackHandleScope<1> hs(self); - Handle<mirror::Class> klass( - hs.NewHandle(class_linker->FindSystemClass(self, descriptor.c_str()))); - if (klass == nullptr) { - VLOG(compiler) << "Failed to find class " << descriptor; - image_classes_->erase(it++); - self->ClearException(); - } else { - ++it; - } - } - - // Resolve exception classes referenced by the loaded classes. The catch logic assumes - // exceptions are resolved by the verifier when there is a catch block in an interested method. - // Do this here so that exception classes appear to have been specified image classes. - std::set<std::pair<dex::TypeIndex, const DexFile*>> unresolved_exception_types; - StackHandleScope<1> hs(self); - Handle<mirror::Class> java_lang_Throwable( - hs.NewHandle(class_linker->FindSystemClass(self, "Ljava/lang/Throwable;"))); - do { - unresolved_exception_types.clear(); - { - // Thread suspension is not allowed while ResolveCatchBlockExceptionsClassVisitor - // is using a std::vector<ObjPtr<mirror::Class>>. - ScopedAssertNoThreadSuspension ants(__FUNCTION__); - ResolveCatchBlockExceptionsClassVisitor visitor; - class_linker->VisitClasses(&visitor); - visitor.FindExceptionTypesToResolve(&unresolved_exception_types); - } - for (const auto& exception_type : unresolved_exception_types) { - dex::TypeIndex exception_type_idx = exception_type.first; - const DexFile* dex_file = exception_type.second; - StackHandleScope<1> hs2(self); - Handle<mirror::DexCache> dex_cache(hs2.NewHandle(class_linker->RegisterDexFile(*dex_file, - nullptr))); - ObjPtr<mirror::Class> klass = - (dex_cache != nullptr) - ? class_linker->ResolveType(exception_type_idx, - dex_cache, - ScopedNullHandle<mirror::ClassLoader>()) - : nullptr; - if (klass == nullptr) { - const DexFile::TypeId& type_id = dex_file->GetTypeId(exception_type_idx); - const char* descriptor = dex_file->GetTypeDescriptor(type_id); - LOG(FATAL) << "Failed to resolve class " << descriptor; - } - DCHECK(java_lang_Throwable->IsAssignableFrom(klass)); - } - // Resolving exceptions may load classes that reference more exceptions, iterate until no - // more are found - } while (!unresolved_exception_types.empty()); - - // We walk the roots looking for classes so that we'll pick up the - // above classes plus any classes them depend on such super - // classes, interfaces, and the required ClassLinker roots. - RecordImageClassesVisitor visitor(image_classes_.get()); - class_linker->VisitClasses(&visitor); - - CHECK_NE(image_classes_->size(), 0U); -} - -static void MaybeAddToImageClasses(Thread* self, - ObjPtr<mirror::Class> klass, - std::unordered_set<std::string>* image_classes) - REQUIRES_SHARED(Locks::mutator_lock_) { - DCHECK_EQ(self, Thread::Current()); - StackHandleScope<1> hs(self); - std::string temp; - const PointerSize pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize(); - while (!klass->IsObjectClass()) { - const char* descriptor = klass->GetDescriptor(&temp); - std::pair<std::unordered_set<std::string>::iterator, bool> result = - image_classes->insert(descriptor); - if (!result.second) { // Previously inserted. - break; - } - VLOG(compiler) << "Adding " << descriptor << " to image classes"; - for (size_t i = 0, num_interfaces = klass->NumDirectInterfaces(); i != num_interfaces; ++i) { - ObjPtr<mirror::Class> interface = mirror::Class::GetDirectInterface(self, klass, i); - DCHECK(interface != nullptr); - MaybeAddToImageClasses(self, interface, image_classes); - } - for (auto& m : klass->GetVirtualMethods(pointer_size)) { - MaybeAddToImageClasses(self, m.GetDeclaringClass(), image_classes); - } - if (klass->IsArrayClass()) { - MaybeAddToImageClasses(self, klass->GetComponentType(), image_classes); - } - klass.Assign(klass->GetSuperClass()); - } -} - -// Keeps all the data for the update together. Also doubles as the reference visitor. -// Note: we can use object pointers because we suspend all threads. -class ClinitImageUpdate { - public: - static ClinitImageUpdate* Create(VariableSizedHandleScope& hs, - std::unordered_set<std::string>* image_class_descriptors, - Thread* self, - ClassLinker* linker) { - std::unique_ptr<ClinitImageUpdate> res(new ClinitImageUpdate(hs, - image_class_descriptors, - self, - linker)); - return res.release(); - } - - ~ClinitImageUpdate() { - // Allow others to suspend again. - self_->EndAssertNoThreadSuspension(old_cause_); - } - - // Visitor for VisitReferences. - void operator()(ObjPtr<mirror::Object> object, - MemberOffset field_offset, - bool /* is_static */) const - REQUIRES_SHARED(Locks::mutator_lock_) { - mirror::Object* ref = object->GetFieldObject<mirror::Object>(field_offset); - if (ref != nullptr) { - VisitClinitClassesObject(ref); - } - } - - // java.lang.ref.Reference visitor for VisitReferences. - void operator()(ObjPtr<mirror::Class> klass ATTRIBUTE_UNUSED, - ObjPtr<mirror::Reference> ref ATTRIBUTE_UNUSED) const {} - - // Ignore class native roots. - void VisitRootIfNonNull(mirror::CompressedReference<mirror::Object>* root ATTRIBUTE_UNUSED) - const {} - void VisitRoot(mirror::CompressedReference<mirror::Object>* root ATTRIBUTE_UNUSED) const {} - - void Walk() REQUIRES_SHARED(Locks::mutator_lock_) { - // Use the initial classes as roots for a search. - for (Handle<mirror::Class> klass_root : image_classes_) { - VisitClinitClassesObject(klass_root.Get()); - } - Thread* self = Thread::Current(); - ScopedAssertNoThreadSuspension ants(__FUNCTION__); - for (Handle<mirror::Class> h_klass : to_insert_) { - MaybeAddToImageClasses(self, h_klass.Get(), image_class_descriptors_); - } - } - - private: - class FindImageClassesVisitor : public ClassVisitor { - public: - explicit FindImageClassesVisitor(VariableSizedHandleScope& hs, - ClinitImageUpdate* data) - : data_(data), - hs_(hs) {} - - bool operator()(ObjPtr<mirror::Class> klass) OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_) { - std::string temp; - const char* name = klass->GetDescriptor(&temp); - if (data_->image_class_descriptors_->find(name) != data_->image_class_descriptors_->end()) { - data_->image_classes_.push_back(hs_.NewHandle(klass)); - } else { - // Check whether it is initialized and has a clinit. They must be kept, too. - if (klass->IsInitialized() && klass->FindClassInitializer( - Runtime::Current()->GetClassLinker()->GetImagePointerSize()) != nullptr) { - data_->image_classes_.push_back(hs_.NewHandle(klass)); - } - } - return true; - } - - private: - ClinitImageUpdate* const data_; - VariableSizedHandleScope& hs_; - }; - - ClinitImageUpdate(VariableSizedHandleScope& hs, - std::unordered_set<std::string>* image_class_descriptors, - Thread* self, - ClassLinker* linker) REQUIRES_SHARED(Locks::mutator_lock_) - : hs_(hs), - image_class_descriptors_(image_class_descriptors), - self_(self) { - CHECK(linker != nullptr); - CHECK(image_class_descriptors != nullptr); - - // Make sure nobody interferes with us. - old_cause_ = self->StartAssertNoThreadSuspension("Boot image closure"); - - // Find all the already-marked classes. - WriterMutexLock mu(self, *Locks::heap_bitmap_lock_); - FindImageClassesVisitor visitor(hs_, this); - linker->VisitClasses(&visitor); - } - - void VisitClinitClassesObject(mirror::Object* object) const - REQUIRES_SHARED(Locks::mutator_lock_) { - DCHECK(object != nullptr); - if (marked_objects_.find(object) != marked_objects_.end()) { - // Already processed. - return; - } - - // Mark it. - marked_objects_.insert(object); - - if (object->IsClass()) { - // Add to the TODO list since MaybeAddToImageClasses may cause thread suspension. Thread - // suspensionb is not safe to do in VisitObjects or VisitReferences. - to_insert_.push_back(hs_.NewHandle(object->AsClass())); - } else { - // Else visit the object's class. - VisitClinitClassesObject(object->GetClass()); - } - - // If it is not a DexCache, visit all references. - if (!object->IsDexCache()) { - object->VisitReferences(*this, *this); - } - } - - VariableSizedHandleScope& hs_; - mutable std::vector<Handle<mirror::Class>> to_insert_; - mutable std::unordered_set<mirror::Object*> marked_objects_; - std::unordered_set<std::string>* const image_class_descriptors_; - std::vector<Handle<mirror::Class>> image_classes_; - Thread* const self_; - const char* old_cause_; - - DISALLOW_COPY_AND_ASSIGN(ClinitImageUpdate); -}; - -void CompilerDriver::UpdateImageClasses(TimingLogger* timings) { - if (GetCompilerOptions().IsBootImage()) { - TimingLogger::ScopedTiming t("UpdateImageClasses", timings); - - Runtime* runtime = Runtime::Current(); - - // Suspend all threads. - ScopedSuspendAll ssa(__FUNCTION__); - - VariableSizedHandleScope hs(Thread::Current()); - std::string error_msg; - std::unique_ptr<ClinitImageUpdate> update(ClinitImageUpdate::Create(hs, - image_classes_.get(), - Thread::Current(), - runtime->GetClassLinker())); - - // Do the marking. - update->Walk(); - } -} - -bool CompilerDriver::CanAssumeClassIsLoaded(mirror::Class* klass) { - Runtime* runtime = Runtime::Current(); - if (!runtime->IsAotCompiler()) { - DCHECK(runtime->UseJitCompilation()); - // Having the klass reference here implies that the klass is already loaded. - return true; - } - if (!GetCompilerOptions().IsBootImage()) { - // Assume loaded only if klass is in the boot image. App classes cannot be assumed - // loaded because we don't even know what class loader will be used to load them. - bool class_in_image = runtime->GetHeap()->FindSpaceFromObject(klass, false)->IsImageSpace(); - return class_in_image; - } - std::string temp; - const char* descriptor = klass->GetDescriptor(&temp); - return IsImageClass(descriptor); -} - -bool CompilerDriver::CanAccessTypeWithoutChecks(ObjPtr<mirror::Class> referrer_class, - ObjPtr<mirror::Class> resolved_class) { - if (resolved_class == nullptr) { - stats_->TypeNeedsAccessCheck(); - return false; // Unknown class needs access checks. - } - bool is_accessible = resolved_class->IsPublic(); // Public classes are always accessible. - if (!is_accessible) { - if (referrer_class == nullptr) { - stats_->TypeNeedsAccessCheck(); - return false; // Incomplete referrer knowledge needs access check. - } - // Perform access check, will return true if access is ok or false if we're going to have to - // check this at runtime (for example for class loaders). - is_accessible = referrer_class->CanAccess(resolved_class); - } - if (is_accessible) { - stats_->TypeDoesntNeedAccessCheck(); - } else { - stats_->TypeNeedsAccessCheck(); - } - return is_accessible; -} - -bool CompilerDriver::CanAccessInstantiableTypeWithoutChecks(ObjPtr<mirror::Class> referrer_class, - ObjPtr<mirror::Class> resolved_class, - bool* finalizable) { - if (resolved_class == nullptr) { - stats_->TypeNeedsAccessCheck(); - // Be conservative. - *finalizable = true; - return false; // Unknown class needs access checks. - } - *finalizable = resolved_class->IsFinalizable(); - bool is_accessible = resolved_class->IsPublic(); // Public classes are always accessible. - if (!is_accessible) { - if (referrer_class == nullptr) { - stats_->TypeNeedsAccessCheck(); - return false; // Incomplete referrer knowledge needs access check. - } - // Perform access and instantiable checks, will return true if access is ok or false if we're - // going to have to check this at runtime (for example for class loaders). - is_accessible = referrer_class->CanAccess(resolved_class); - } - bool result = is_accessible && resolved_class->IsInstantiable(); - if (result) { - stats_->TypeDoesntNeedAccessCheck(); - } else { - stats_->TypeNeedsAccessCheck(); - } - return result; -} - -void CompilerDriver::ProcessedInstanceField(bool resolved) { - if (!resolved) { - stats_->UnresolvedInstanceField(); - } else { - stats_->ResolvedInstanceField(); - } -} - -void CompilerDriver::ProcessedStaticField(bool resolved, bool local) { - if (!resolved) { - stats_->UnresolvedStaticField(); - } else if (local) { - stats_->ResolvedLocalStaticField(); - } else { - stats_->ResolvedStaticField(); - } -} - -ArtField* CompilerDriver::ComputeInstanceFieldInfo(uint32_t field_idx, - const DexCompilationUnit* mUnit, - bool is_put, - const ScopedObjectAccess& soa) { - // Try to resolve the field and compiling method's class. - ArtField* resolved_field; - ObjPtr<mirror::Class> referrer_class; - Handle<mirror::DexCache> dex_cache(mUnit->GetDexCache()); - { - Handle<mirror::ClassLoader> class_loader = mUnit->GetClassLoader(); - resolved_field = ResolveField(soa, dex_cache, class_loader, field_idx, /* is_static */ false); - referrer_class = resolved_field != nullptr - ? ResolveCompilingMethodsClass(soa, dex_cache, class_loader, mUnit) : nullptr; - } - bool can_link = false; - if (resolved_field != nullptr && referrer_class != nullptr) { - std::pair<bool, bool> fast_path = IsFastInstanceField( - dex_cache.Get(), referrer_class, resolved_field, field_idx); - can_link = is_put ? fast_path.second : fast_path.first; - } - ProcessedInstanceField(can_link); - return can_link ? resolved_field : nullptr; -} - -bool CompilerDriver::ComputeInstanceFieldInfo(uint32_t field_idx, const DexCompilationUnit* mUnit, - bool is_put, MemberOffset* field_offset, - bool* is_volatile) { - ScopedObjectAccess soa(Thread::Current()); - ArtField* resolved_field = ComputeInstanceFieldInfo(field_idx, mUnit, is_put, soa); - - if (resolved_field == nullptr) { - // Conservative defaults. - *is_volatile = true; - *field_offset = MemberOffset(static_cast<size_t>(-1)); - return false; - } else { - *is_volatile = resolved_field->IsVolatile(); - *field_offset = resolved_field->GetOffset(); - return true; - } -} - -const VerifiedMethod* CompilerDriver::GetVerifiedMethod(const DexFile* dex_file, - uint32_t method_idx) const { - MethodReference ref(dex_file, method_idx); - return verification_results_->GetVerifiedMethod(ref); -} - -bool CompilerDriver::IsSafeCast(const DexCompilationUnit* mUnit, uint32_t dex_pc) { - if (!compiler_options_->IsVerificationEnabled()) { - // If we didn't verify, every cast has to be treated as non-safe. - return false; - } - DCHECK(mUnit->GetVerifiedMethod() != nullptr); - bool result = mUnit->GetVerifiedMethod()->IsSafeCast(dex_pc); - if (result) { - stats_->SafeCast(); - } else { - stats_->NotASafeCast(); - } - return result; -} - -class CompilationVisitor { - public: - virtual ~CompilationVisitor() {} - virtual void Visit(size_t index) = 0; -}; - -class ParallelCompilationManager { - public: - ParallelCompilationManager(ClassLinker* class_linker, - jobject class_loader, - CompilerDriver* compiler, - const DexFile* dex_file, - const std::vector<const DexFile*>& dex_files, - ThreadPool* thread_pool) - : index_(0), - class_linker_(class_linker), - class_loader_(class_loader), - compiler_(compiler), - dex_file_(dex_file), - dex_files_(dex_files), - thread_pool_(thread_pool) {} - - ClassLinker* GetClassLinker() const { - CHECK(class_linker_ != nullptr); - return class_linker_; - } - - jobject GetClassLoader() const { - return class_loader_; - } - - CompilerDriver* GetCompiler() const { - CHECK(compiler_ != nullptr); - return compiler_; - } - - const DexFile* GetDexFile() const { - CHECK(dex_file_ != nullptr); - return dex_file_; - } - - const std::vector<const DexFile*>& GetDexFiles() const { - return dex_files_; - } - - void ForAll(size_t begin, size_t end, CompilationVisitor* visitor, size_t work_units) - REQUIRES(!*Locks::mutator_lock_) { - ForAllLambda(begin, end, [visitor](size_t index) { visitor->Visit(index); }, work_units); - } - - template <typename Fn> - void ForAllLambda(size_t begin, size_t end, Fn fn, size_t work_units) - REQUIRES(!*Locks::mutator_lock_) { - Thread* self = Thread::Current(); - self->AssertNoPendingException(); - CHECK_GT(work_units, 0U); - - index_.StoreRelaxed(begin); - for (size_t i = 0; i < work_units; ++i) { - thread_pool_->AddTask(self, new ForAllClosureLambda<Fn>(this, end, fn)); - } - thread_pool_->StartWorkers(self); - - // Ensure we're suspended while we're blocked waiting for the other threads to finish (worker - // thread destructor's called below perform join). - CHECK_NE(self->GetState(), kRunnable); - - // Wait for all the worker threads to finish. - thread_pool_->Wait(self, true, false); - - // And stop the workers accepting jobs. - thread_pool_->StopWorkers(self); - } - - size_t NextIndex() { - return index_.FetchAndAddSequentiallyConsistent(1); - } - - private: - template <typename Fn> - class ForAllClosureLambda : public Task { - public: - ForAllClosureLambda(ParallelCompilationManager* manager, size_t end, Fn fn) - : manager_(manager), - end_(end), - fn_(fn) {} - - void Run(Thread* self) OVERRIDE { - while (true) { - const size_t index = manager_->NextIndex(); - if (UNLIKELY(index >= end_)) { - break; - } - fn_(index); - self->AssertNoPendingException(); - } - } - - void Finalize() OVERRIDE { - delete this; - } - - private: - ParallelCompilationManager* const manager_; - const size_t end_; - Fn fn_; - }; - - AtomicInteger index_; - ClassLinker* const class_linker_; - const jobject class_loader_; - CompilerDriver* const compiler_; - const DexFile* const dex_file_; - const std::vector<const DexFile*>& dex_files_; - ThreadPool* const thread_pool_; - - DISALLOW_COPY_AND_ASSIGN(ParallelCompilationManager); -}; - -// A fast version of SkipClass above if the class pointer is available -// that avoids the expensive FindInClassPath search. -static bool SkipClass(jobject class_loader, const DexFile& dex_file, ObjPtr<mirror::Class> klass) - REQUIRES_SHARED(Locks::mutator_lock_) { - DCHECK(klass != nullptr); - const DexFile& original_dex_file = *klass->GetDexCache()->GetDexFile(); - if (&dex_file != &original_dex_file) { - if (class_loader == nullptr) { - LOG(WARNING) << "Skipping class " << klass->PrettyDescriptor() << " from " - << dex_file.GetLocation() << " previously found in " - << original_dex_file.GetLocation(); - } - return true; - } - return false; -} - -static void CheckAndClearResolveException(Thread* self) - REQUIRES_SHARED(Locks::mutator_lock_) { - CHECK(self->IsExceptionPending()); - mirror::Throwable* exception = self->GetException(); - std::string temp; - const char* descriptor = exception->GetClass()->GetDescriptor(&temp); - const char* expected_exceptions[] = { - "Ljava/lang/IllegalAccessError;", - "Ljava/lang/IncompatibleClassChangeError;", - "Ljava/lang/InstantiationError;", - "Ljava/lang/LinkageError;", - "Ljava/lang/NoClassDefFoundError;", - "Ljava/lang/NoSuchFieldError;", - "Ljava/lang/NoSuchMethodError;" - }; - bool found = false; - for (size_t i = 0; (found == false) && (i < arraysize(expected_exceptions)); ++i) { - if (strcmp(descriptor, expected_exceptions[i]) == 0) { - found = true; - } - } - if (!found) { - LOG(FATAL) << "Unexpected exception " << exception->Dump(); - } - self->ClearException(); -} - -bool CompilerDriver::RequiresConstructorBarrier(const DexFile& dex_file, - uint16_t class_def_idx) const { - const DexFile::ClassDef& class_def = dex_file.GetClassDef(class_def_idx); - const uint8_t* class_data = dex_file.GetClassData(class_def); - if (class_data == nullptr) { - // Empty class such as a marker interface. - return false; - } - ClassDataItemIterator it(dex_file, class_data); - it.SkipStaticFields(); - // We require a constructor barrier if there are final instance fields. - while (it.HasNextInstanceField()) { - if (it.MemberIsFinal()) { - return true; - } - it.Next(); - } - return false; -} - -class ResolveClassFieldsAndMethodsVisitor : public CompilationVisitor { - public: - explicit ResolveClassFieldsAndMethodsVisitor(const ParallelCompilationManager* manager) - : manager_(manager) {} - - void Visit(size_t class_def_index) OVERRIDE REQUIRES(!Locks::mutator_lock_) { - ScopedTrace trace(__FUNCTION__); - Thread* const self = Thread::Current(); - jobject jclass_loader = manager_->GetClassLoader(); - const DexFile& dex_file = *manager_->GetDexFile(); - ClassLinker* class_linker = manager_->GetClassLinker(); - - // If an instance field is final then we need to have a barrier on the return, static final - // fields are assigned within the lock held for class initialization. Conservatively assume - // constructor barriers are always required. - bool requires_constructor_barrier = true; - - // Method and Field are the worst. We can't resolve without either - // context from the code use (to disambiguate virtual vs direct - // method and instance vs static field) or from class - // definitions. While the compiler will resolve what it can as it - // needs it, here we try to resolve fields and methods used in class - // definitions, since many of them many never be referenced by - // generated code. - const DexFile::ClassDef& class_def = dex_file.GetClassDef(class_def_index); - ScopedObjectAccess soa(self); - StackHandleScope<2> hs(soa.Self()); - Handle<mirror::ClassLoader> class_loader( - hs.NewHandle(soa.Decode<mirror::ClassLoader>(jclass_loader))); - Handle<mirror::DexCache> dex_cache(hs.NewHandle(class_linker->FindDexCache( - soa.Self(), dex_file))); - // Resolve the class. - ObjPtr<mirror::Class> klass = - class_linker->ResolveType(class_def.class_idx_, dex_cache, class_loader); - bool resolve_fields_and_methods; - if (klass == nullptr) { - // Class couldn't be resolved, for example, super-class is in a different dex file. Don't - // attempt to resolve methods and fields when there is no declaring class. - CheckAndClearResolveException(soa.Self()); - resolve_fields_and_methods = false; - } else { - // We successfully resolved a class, should we skip it? - if (SkipClass(jclass_loader, dex_file, klass)) { - return; - } - // We want to resolve the methods and fields eagerly. - resolve_fields_and_methods = true; - } - // Note the class_data pointer advances through the headers, - // static fields, instance fields, direct methods, and virtual - // methods. - const uint8_t* class_data = dex_file.GetClassData(class_def); - if (class_data == nullptr) { - // Empty class such as a marker interface. - requires_constructor_barrier = false; - } else { - ClassDataItemIterator it(dex_file, class_data); - while (it.HasNextStaticField()) { - if (resolve_fields_and_methods) { - ArtField* field = class_linker->ResolveField( - it.GetMemberIndex(), dex_cache, class_loader, /* is_static */ true); - if (field == nullptr) { - CheckAndClearResolveException(soa.Self()); - } - } - it.Next(); - } - // We require a constructor barrier if there are final instance fields. - requires_constructor_barrier = false; - while (it.HasNextInstanceField()) { - if (it.MemberIsFinal()) { - requires_constructor_barrier = true; - } - if (resolve_fields_and_methods) { - ArtField* field = class_linker->ResolveField( - it.GetMemberIndex(), dex_cache, class_loader, /* is_static */ false); - if (field == nullptr) { - CheckAndClearResolveException(soa.Self()); - } - } - it.Next(); - } - if (resolve_fields_and_methods) { - while (it.HasNextMethod()) { - ArtMethod* method = class_linker->ResolveMethod<ClassLinker::ResolveMode::kNoChecks>( - it.GetMemberIndex(), - dex_cache, - class_loader, - /* referrer */ nullptr, - it.GetMethodInvokeType(class_def)); - if (method == nullptr) { - CheckAndClearResolveException(soa.Self()); - } - it.Next(); - } - DCHECK(!it.HasNext()); - } - } - manager_->GetCompiler()->SetRequiresConstructorBarrier(self, - &dex_file, - class_def_index, - requires_constructor_barrier); - } - - private: - const ParallelCompilationManager* const manager_; -}; - -class ResolveTypeVisitor : public CompilationVisitor { - public: - explicit ResolveTypeVisitor(const ParallelCompilationManager* manager) : manager_(manager) { - } - void Visit(size_t type_idx) OVERRIDE REQUIRES(!Locks::mutator_lock_) { - // Class derived values are more complicated, they require the linker and loader. - ScopedObjectAccess soa(Thread::Current()); - ClassLinker* class_linker = manager_->GetClassLinker(); - const DexFile& dex_file = *manager_->GetDexFile(); - StackHandleScope<2> hs(soa.Self()); - Handle<mirror::ClassLoader> class_loader( - hs.NewHandle(soa.Decode<mirror::ClassLoader>(manager_->GetClassLoader()))); - Handle<mirror::DexCache> dex_cache(hs.NewHandle(class_linker->RegisterDexFile( - dex_file, - class_loader.Get()))); - ObjPtr<mirror::Class> klass = (dex_cache != nullptr) - ? class_linker->ResolveType(dex::TypeIndex(type_idx), dex_cache, class_loader) - : nullptr; - - if (klass == nullptr) { - soa.Self()->AssertPendingException(); - mirror::Throwable* exception = soa.Self()->GetException(); - VLOG(compiler) << "Exception during type resolution: " << exception->Dump(); - if (exception->GetClass()->DescriptorEquals("Ljava/lang/OutOfMemoryError;")) { - // There's little point continuing compilation if the heap is exhausted. - LOG(FATAL) << "Out of memory during type resolution for compilation"; - } - soa.Self()->ClearException(); - } - } - - private: - const ParallelCompilationManager* const manager_; -}; - -void CompilerDriver::ResolveDexFile(jobject class_loader, - const DexFile& dex_file, - const std::vector<const DexFile*>& dex_files, - ThreadPool* thread_pool, - size_t thread_count, - TimingLogger* timings) { - ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); - - // TODO: we could resolve strings here, although the string table is largely filled with class - // and method names. - - ParallelCompilationManager context(class_linker, class_loader, this, &dex_file, dex_files, - thread_pool); - if (GetCompilerOptions().IsBootImage()) { - // For images we resolve all types, such as array, whereas for applications just those with - // classdefs are resolved by ResolveClassFieldsAndMethods. - TimingLogger::ScopedTiming t("Resolve Types", timings); - ResolveTypeVisitor visitor(&context); - context.ForAll(0, dex_file.NumTypeIds(), &visitor, thread_count); - } - - TimingLogger::ScopedTiming t("Resolve MethodsAndFields", timings); - ResolveClassFieldsAndMethodsVisitor visitor(&context); - context.ForAll(0, dex_file.NumClassDefs(), &visitor, thread_count); -} - -void CompilerDriver::SetVerified(jobject class_loader, - const std::vector<const DexFile*>& dex_files, - TimingLogger* timings) { - // This can be run in parallel. - for (const DexFile* dex_file : dex_files) { - CHECK(dex_file != nullptr); - SetVerifiedDexFile(class_loader, - *dex_file, - dex_files, - parallel_thread_pool_.get(), - parallel_thread_count_, - timings); - } -} - -static void PopulateVerifiedMethods(const DexFile& dex_file, - uint32_t class_def_index, - VerificationResults* verification_results) { - const DexFile::ClassDef& class_def = dex_file.GetClassDef(class_def_index); - const uint8_t* class_data = dex_file.GetClassData(class_def); - if (class_data == nullptr) { - return; - } - ClassDataItemIterator it(dex_file, class_data); - it.SkipAllFields(); - - while (it.HasNextMethod()) { - verification_results->CreateVerifiedMethodFor(MethodReference(&dex_file, it.GetMemberIndex())); - it.Next(); - } - DCHECK(!it.HasNext()); -} - -static void LoadAndUpdateStatus(const DexFile& dex_file, - const DexFile::ClassDef& class_def, - ClassStatus status, - Handle<mirror::ClassLoader> class_loader, - Thread* self) - REQUIRES_SHARED(Locks::mutator_lock_) { - StackHandleScope<1> hs(self); - const char* descriptor = dex_file.GetClassDescriptor(class_def); - ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); - Handle<mirror::Class> cls(hs.NewHandle<mirror::Class>( - class_linker->FindClass(self, descriptor, class_loader))); - if (cls != nullptr) { - // Check that the class is resolved with the current dex file. We might get - // a boot image class, or a class in a different dex file for multidex, and - // we should not update the status in that case. - if (&cls->GetDexFile() == &dex_file) { - ObjectLock<mirror::Class> lock(self, cls); - mirror::Class::SetStatus(cls, status, self); - } - } else { - DCHECK(self->IsExceptionPending()); - self->ClearException(); - } -} - -bool CompilerDriver::FastVerify(jobject jclass_loader, - const std::vector<const DexFile*>& dex_files, - TimingLogger* timings) { - verifier::VerifierDeps* verifier_deps = - Runtime::Current()->GetCompilerCallbacks()->GetVerifierDeps(); - // If there exist VerifierDeps that aren't the ones we just created to output, use them to verify. - if (verifier_deps == nullptr || verifier_deps->OutputOnly()) { - return false; - } - TimingLogger::ScopedTiming t("Fast Verify", timings); - ScopedObjectAccess soa(Thread::Current()); - StackHandleScope<2> hs(soa.Self()); - Handle<mirror::ClassLoader> class_loader( - hs.NewHandle(soa.Decode<mirror::ClassLoader>(jclass_loader))); - if (!verifier_deps->ValidateDependencies(class_loader, soa.Self())) { - return false; - } - - bool compiler_only_verifies = !GetCompilerOptions().IsAnyCompilationEnabled(); - - // We successfully validated the dependencies, now update class status - // of verified classes. Note that the dependencies also record which classes - // could not be fully verified; we could try again, but that would hurt verification - // time. So instead we assume these classes still need to be verified at - // runtime. - for (const DexFile* dex_file : dex_files) { - // Fetch the list of unverified classes. - const std::set<dex::TypeIndex>& unverified_classes = - verifier_deps->GetUnverifiedClasses(*dex_file); - for (uint32_t i = 0; i < dex_file->NumClassDefs(); ++i) { - const DexFile::ClassDef& class_def = dex_file->GetClassDef(i); - if (unverified_classes.find(class_def.class_idx_) == unverified_classes.end()) { - if (compiler_only_verifies) { - // Just update the compiled_classes_ map. The compiler doesn't need to resolve - // the type. - ClassReference ref(dex_file, i); - ClassStatus existing = ClassStatus::kNotReady; - DCHECK(compiled_classes_.Get(ref, &existing)) << ref.dex_file->GetLocation(); - ClassStateTable::InsertResult result = - compiled_classes_.Insert(ref, existing, ClassStatus::kVerified); - CHECK_EQ(result, ClassStateTable::kInsertResultSuccess); - } else { - // Update the class status, so later compilation stages know they don't need to verify - // the class. - LoadAndUpdateStatus( - *dex_file, class_def, ClassStatus::kVerified, class_loader, soa.Self()); - // Create `VerifiedMethod`s for each methods, the compiler expects one for - // quickening or compiling. - // Note that this means: - // - We're only going to compile methods that did verify. - // - Quickening will not do checkcast ellision. - // TODO(ngeoffray): Reconsider this once we refactor compiler filters. - PopulateVerifiedMethods(*dex_file, i, verification_results_); - } - } else if (!compiler_only_verifies) { - // Make sure later compilation stages know they should not try to verify - // this class again. - LoadAndUpdateStatus(*dex_file, - class_def, - ClassStatus::kRetryVerificationAtRuntime, - class_loader, - soa.Self()); - } - } - } - return true; -} - -void CompilerDriver::Verify(jobject jclass_loader, - const std::vector<const DexFile*>& dex_files, - TimingLogger* timings) { - if (FastVerify(jclass_loader, dex_files, timings)) { - return; - } - - // If there is no existing `verifier_deps` (because of non-existing vdex), or - // the existing `verifier_deps` is not valid anymore, create a new one for - // non boot image compilation. The verifier will need it to record the new dependencies. - // Then dex2oat can update the vdex file with these new dependencies. - if (!GetCompilerOptions().IsBootImage()) { - // Dex2oat creates the verifier deps. - // Create the main VerifierDeps, and set it to this thread. - verifier::VerifierDeps* verifier_deps = - Runtime::Current()->GetCompilerCallbacks()->GetVerifierDeps(); - CHECK(verifier_deps != nullptr); - Thread::Current()->SetVerifierDeps(verifier_deps); - // Create per-thread VerifierDeps to avoid contention on the main one. - // We will merge them after verification. - for (ThreadPoolWorker* worker : parallel_thread_pool_->GetWorkers()) { - worker->GetThread()->SetVerifierDeps(new verifier::VerifierDeps(dex_files_for_oat_file_)); - } - } - - // Verification updates VerifierDeps and needs to run single-threaded to be deterministic. - bool force_determinism = GetCompilerOptions().IsForceDeterminism(); - ThreadPool* verify_thread_pool = - force_determinism ? single_thread_pool_.get() : parallel_thread_pool_.get(); - size_t verify_thread_count = force_determinism ? 1U : parallel_thread_count_; - for (const DexFile* dex_file : dex_files) { - CHECK(dex_file != nullptr); - VerifyDexFile(jclass_loader, - *dex_file, - dex_files, - verify_thread_pool, - verify_thread_count, - timings); - } - - if (!GetCompilerOptions().IsBootImage()) { - // Merge all VerifierDeps into the main one. - verifier::VerifierDeps* verifier_deps = Thread::Current()->GetVerifierDeps(); - for (ThreadPoolWorker* worker : parallel_thread_pool_->GetWorkers()) { - verifier::VerifierDeps* thread_deps = worker->GetThread()->GetVerifierDeps(); - worker->GetThread()->SetVerifierDeps(nullptr); - verifier_deps->MergeWith(*thread_deps, dex_files_for_oat_file_); - delete thread_deps; - } - Thread::Current()->SetVerifierDeps(nullptr); - } -} - -class VerifyClassVisitor : public CompilationVisitor { - public: - VerifyClassVisitor(const ParallelCompilationManager* manager, verifier::HardFailLogMode log_level) - : manager_(manager), log_level_(log_level) {} - - virtual void Visit(size_t class_def_index) REQUIRES(!Locks::mutator_lock_) OVERRIDE { - ScopedTrace trace(__FUNCTION__); - ScopedObjectAccess soa(Thread::Current()); - const DexFile& dex_file = *manager_->GetDexFile(); - const DexFile::ClassDef& class_def = dex_file.GetClassDef(class_def_index); - const char* descriptor = dex_file.GetClassDescriptor(class_def); - ClassLinker* class_linker = manager_->GetClassLinker(); - jobject jclass_loader = manager_->GetClassLoader(); - StackHandleScope<3> hs(soa.Self()); - Handle<mirror::ClassLoader> class_loader( - hs.NewHandle(soa.Decode<mirror::ClassLoader>(jclass_loader))); - Handle<mirror::Class> klass( - hs.NewHandle(class_linker->FindClass(soa.Self(), descriptor, class_loader))); - verifier::FailureKind failure_kind; - if (klass == nullptr) { - CHECK(soa.Self()->IsExceptionPending()); - soa.Self()->ClearException(); - - /* - * At compile time, we can still structurally verify the class even if FindClass fails. - * This is to ensure the class is structurally sound for compilation. An unsound class - * will be rejected by the verifier and later skipped during compilation in the compiler. - */ - Handle<mirror::DexCache> dex_cache(hs.NewHandle(class_linker->FindDexCache( - soa.Self(), dex_file))); - std::string error_msg; - failure_kind = - verifier::MethodVerifier::VerifyClass(soa.Self(), - &dex_file, - dex_cache, - class_loader, - class_def, - Runtime::Current()->GetCompilerCallbacks(), - true /* allow soft failures */, - log_level_, - &error_msg); - if (failure_kind == verifier::FailureKind::kHardFailure) { - LOG(ERROR) << "Verification failed on class " << PrettyDescriptor(descriptor) - << " because: " << error_msg; - manager_->GetCompiler()->SetHadHardVerifierFailure(); - } else if (failure_kind == verifier::FailureKind::kSoftFailure) { - manager_->GetCompiler()->AddSoftVerifierFailure(); - } else { - // Force a soft failure for the VerifierDeps. This is a sanity measure, as - // the vdex file already records that the class hasn't been resolved. It avoids - // trying to do future verification optimizations when processing the vdex file. - DCHECK(failure_kind == verifier::FailureKind::kNoFailure) << failure_kind; - failure_kind = verifier::FailureKind::kSoftFailure; - } - } else if (!SkipClass(jclass_loader, dex_file, klass.Get())) { - CHECK(klass->IsResolved()) << klass->PrettyClass(); - failure_kind = class_linker->VerifyClass(soa.Self(), klass, log_level_); - - if (klass->IsErroneous()) { - // ClassLinker::VerifyClass throws, which isn't useful in the compiler. - CHECK(soa.Self()->IsExceptionPending()); - soa.Self()->ClearException(); - manager_->GetCompiler()->SetHadHardVerifierFailure(); - } else if (failure_kind == verifier::FailureKind::kSoftFailure) { - manager_->GetCompiler()->AddSoftVerifierFailure(); - } - - CHECK(klass->ShouldVerifyAtRuntime() || klass->IsVerified() || klass->IsErroneous()) - << klass->PrettyDescriptor() << ": state=" << klass->GetStatus(); - - // Class has a meaningful status for the compiler now, record it. - ClassReference ref(manager_->GetDexFile(), class_def_index); - manager_->GetCompiler()->RecordClassStatus(ref, klass->GetStatus()); - - // It is *very* problematic if there are resolution errors in the boot classpath. - // - // It is also bad if classes fail verification. For example, we rely on things working - // OK without verification when the decryption dialog is brought up. It is thus highly - // recommended to compile the boot classpath with - // --abort-on-hard-verifier-error --abort-on-soft-verifier-error - // which is the default build system configuration. - if (kIsDebugBuild) { - if (manager_->GetCompiler()->GetCompilerOptions().IsBootImage()) { - if (!klass->IsResolved() || klass->IsErroneous()) { - LOG(FATAL) << "Boot classpath class " << klass->PrettyClass() - << " failed to resolve/is erroneous: state= " << klass->GetStatus(); - UNREACHABLE(); - } - } - if (klass->IsVerified()) { - DCHECK_EQ(failure_kind, verifier::FailureKind::kNoFailure); - } else if (klass->ShouldVerifyAtRuntime()) { - DCHECK_EQ(failure_kind, verifier::FailureKind::kSoftFailure); - } else { - DCHECK_EQ(failure_kind, verifier::FailureKind::kHardFailure); - } - } - } else { - // Make the skip a soft failure, essentially being considered as verify at runtime. - failure_kind = verifier::FailureKind::kSoftFailure; - } - verifier::VerifierDeps::MaybeRecordVerificationStatus( - dex_file, class_def.class_idx_, failure_kind); - soa.Self()->AssertNoPendingException(); - } - - private: - const ParallelCompilationManager* const manager_; - const verifier::HardFailLogMode log_level_; -}; - -void CompilerDriver::VerifyDexFile(jobject class_loader, - const DexFile& dex_file, - const std::vector<const DexFile*>& dex_files, - ThreadPool* thread_pool, - size_t thread_count, - TimingLogger* timings) { - TimingLogger::ScopedTiming t("Verify Dex File", timings); - ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); - ParallelCompilationManager context(class_linker, class_loader, this, &dex_file, dex_files, - thread_pool); - bool abort_on_verifier_failures = GetCompilerOptions().AbortOnHardVerifierFailure() - || GetCompilerOptions().AbortOnSoftVerifierFailure(); - verifier::HardFailLogMode log_level = abort_on_verifier_failures - ? verifier::HardFailLogMode::kLogInternalFatal - : verifier::HardFailLogMode::kLogWarning; - VerifyClassVisitor visitor(&context, log_level); - context.ForAll(0, dex_file.NumClassDefs(), &visitor, thread_count); -} - -class SetVerifiedClassVisitor : public CompilationVisitor { - public: - explicit SetVerifiedClassVisitor(const ParallelCompilationManager* manager) : manager_(manager) {} - - virtual void Visit(size_t class_def_index) REQUIRES(!Locks::mutator_lock_) OVERRIDE { - ScopedTrace trace(__FUNCTION__); - ScopedObjectAccess soa(Thread::Current()); - const DexFile& dex_file = *manager_->GetDexFile(); - const DexFile::ClassDef& class_def = dex_file.GetClassDef(class_def_index); - const char* descriptor = dex_file.GetClassDescriptor(class_def); - ClassLinker* class_linker = manager_->GetClassLinker(); - jobject jclass_loader = manager_->GetClassLoader(); - StackHandleScope<3> hs(soa.Self()); - Handle<mirror::ClassLoader> class_loader( - hs.NewHandle(soa.Decode<mirror::ClassLoader>(jclass_loader))); - Handle<mirror::Class> klass( - hs.NewHandle(class_linker->FindClass(soa.Self(), descriptor, class_loader))); - // Class might have failed resolution. Then don't set it to verified. - if (klass != nullptr) { - // Only do this if the class is resolved. If even resolution fails, quickening will go very, - // very wrong. - if (klass->IsResolved() && !klass->IsErroneousResolved()) { - if (klass->GetStatus() < ClassStatus::kVerified) { - ObjectLock<mirror::Class> lock(soa.Self(), klass); - // Set class status to verified. - mirror::Class::SetStatus(klass, ClassStatus::kVerified, soa.Self()); - // Mark methods as pre-verified. If we don't do this, the interpreter will run with - // access checks. - klass->SetSkipAccessChecksFlagOnAllMethods( - GetInstructionSetPointerSize(manager_->GetCompiler()->GetInstructionSet())); - klass->SetVerificationAttempted(); - } - // Record the final class status if necessary. - ClassReference ref(manager_->GetDexFile(), class_def_index); - manager_->GetCompiler()->RecordClassStatus(ref, klass->GetStatus()); - } - } else { - Thread* self = soa.Self(); - DCHECK(self->IsExceptionPending()); - self->ClearException(); - } - } - - private: - const ParallelCompilationManager* const manager_; -}; - -void CompilerDriver::SetVerifiedDexFile(jobject class_loader, - const DexFile& dex_file, - const std::vector<const DexFile*>& dex_files, - ThreadPool* thread_pool, - size_t thread_count, - TimingLogger* timings) { - TimingLogger::ScopedTiming t("Verify Dex File", timings); - if (!compiled_classes_.HaveDexFile(&dex_file)) { - compiled_classes_.AddDexFile(&dex_file); - } - ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); - ParallelCompilationManager context(class_linker, class_loader, this, &dex_file, dex_files, - thread_pool); - SetVerifiedClassVisitor visitor(&context); - context.ForAll(0, dex_file.NumClassDefs(), &visitor, thread_count); -} - -class InitializeClassVisitor : public CompilationVisitor { - public: - explicit InitializeClassVisitor(const ParallelCompilationManager* manager) : manager_(manager) {} - - void Visit(size_t class_def_index) OVERRIDE { - ScopedTrace trace(__FUNCTION__); - jobject jclass_loader = manager_->GetClassLoader(); - const DexFile& dex_file = *manager_->GetDexFile(); - const DexFile::ClassDef& class_def = dex_file.GetClassDef(class_def_index); - const DexFile::TypeId& class_type_id = dex_file.GetTypeId(class_def.class_idx_); - const char* descriptor = dex_file.StringDataByIdx(class_type_id.descriptor_idx_); - - ScopedObjectAccess soa(Thread::Current()); - StackHandleScope<3> hs(soa.Self()); - Handle<mirror::ClassLoader> class_loader( - hs.NewHandle(soa.Decode<mirror::ClassLoader>(jclass_loader))); - Handle<mirror::Class> klass( - hs.NewHandle(manager_->GetClassLinker()->FindClass(soa.Self(), descriptor, class_loader))); - - if (klass != nullptr && !SkipClass(manager_->GetClassLoader(), dex_file, klass.Get())) { - TryInitializeClass(klass, class_loader); - } - // Clear any class not found or verification exceptions. - soa.Self()->ClearException(); - } - - // A helper function for initializing klass. - void TryInitializeClass(Handle<mirror::Class> klass, Handle<mirror::ClassLoader>& class_loader) - REQUIRES_SHARED(Locks::mutator_lock_) { - const DexFile& dex_file = klass->GetDexFile(); - const DexFile::ClassDef* class_def = klass->GetClassDef(); - const DexFile::TypeId& class_type_id = dex_file.GetTypeId(class_def->class_idx_); - const char* descriptor = dex_file.StringDataByIdx(class_type_id.descriptor_idx_); - ScopedObjectAccessUnchecked soa(Thread::Current()); - StackHandleScope<3> hs(soa.Self()); - const bool is_boot_image = manager_->GetCompiler()->GetCompilerOptions().IsBootImage(); - const bool is_app_image = manager_->GetCompiler()->GetCompilerOptions().IsAppImage(); - - ClassStatus old_status = klass->GetStatus(); - // Don't initialize classes in boot space when compiling app image - if (is_app_image && klass->IsBootStrapClassLoaded()) { - // Also return early and don't store the class status in the recorded class status. - return; - } - // Only try to initialize classes that were successfully verified. - if (klass->IsVerified()) { - // Attempt to initialize the class but bail if we either need to initialize the super-class - // or static fields. - manager_->GetClassLinker()->EnsureInitialized(soa.Self(), klass, false, false); - old_status = klass->GetStatus(); - if (!klass->IsInitialized()) { - // We don't want non-trivial class initialization occurring on multiple threads due to - // deadlock problems. For example, a parent class is initialized (holding its lock) that - // refers to a sub-class in its static/class initializer causing it to try to acquire the - // sub-class' lock. While on a second thread the sub-class is initialized (holding its lock) - // after first initializing its parents, whose locks are acquired. This leads to a - // parent-to-child and a child-to-parent lock ordering and consequent potential deadlock. - // We need to use an ObjectLock due to potential suspension in the interpreting code. Rather - // than use a special Object for the purpose we use the Class of java.lang.Class. - Handle<mirror::Class> h_klass(hs.NewHandle(klass->GetClass())); - ObjectLock<mirror::Class> lock(soa.Self(), h_klass); - // Attempt to initialize allowing initialization of parent classes but still not static - // fields. - // Initialize dependencies first only for app image, to make TryInitialize recursive. - bool is_superclass_initialized = !is_app_image ? true : - InitializeDependencies(klass, class_loader, soa.Self()); - if (!is_app_image || (is_app_image && is_superclass_initialized)) { - manager_->GetClassLinker()->EnsureInitialized(soa.Self(), klass, false, true); - } - // Otherwise it's in app image but superclasses can't be initialized, no need to proceed. - old_status = klass->GetStatus(); - - bool too_many_encoded_fields = false; - if (!is_boot_image && klass->NumStaticFields() > kMaxEncodedFields) { - too_many_encoded_fields = true; - } - // If the class was not initialized, we can proceed to see if we can initialize static - // fields. Limit the max number of encoded fields. - if (!klass->IsInitialized() && - (is_app_image || is_boot_image) && - is_superclass_initialized && - !too_many_encoded_fields && - manager_->GetCompiler()->IsImageClass(descriptor)) { - bool can_init_static_fields = false; - if (is_boot_image) { - // We need to initialize static fields, we only do this for image classes that aren't - // marked with the $NoPreloadHolder (which implies this should not be initialized - // early). - can_init_static_fields = !StringPiece(descriptor).ends_with("$NoPreloadHolder;"); - } else { - CHECK(is_app_image); - // The boot image case doesn't need to recursively initialize the dependencies with - // special logic since the class linker already does this. - can_init_static_fields = - ClassLinker::kAppImageMayContainStrings && - !soa.Self()->IsExceptionPending() && - is_superclass_initialized && - NoClinitInDependency(klass, soa.Self(), &class_loader); - // TODO The checking for clinit can be removed since it's already - // checked when init superclass. Currently keep it because it contains - // processing of intern strings. Will be removed later when intern strings - // and clinit are both initialized. - } - - if (can_init_static_fields) { - VLOG(compiler) << "Initializing: " << descriptor; - // TODO multithreading support. We should ensure the current compilation thread has - // exclusive access to the runtime and the transaction. To achieve this, we could use - // a ReaderWriterMutex but we're holding the mutator lock so we fail mutex sanity - // checks in Thread::AssertThreadSuspensionIsAllowable. - Runtime* const runtime = Runtime::Current(); - // Run the class initializer in transaction mode. - runtime->EnterTransactionMode(is_app_image, klass.Get()); - bool success = manager_->GetClassLinker()->EnsureInitialized(soa.Self(), klass, true, - true); - // TODO we detach transaction from runtime to indicate we quit the transactional - // mode which prevents the GC from visiting objects modified during the transaction. - // Ensure GC is not run so don't access freed objects when aborting transaction. - - { - ScopedAssertNoThreadSuspension ants("Transaction end"); - - if (success) { - runtime->ExitTransactionMode(); - DCHECK(!runtime->IsActiveTransaction()); - } - - if (!success) { - CHECK(soa.Self()->IsExceptionPending()); - mirror::Throwable* exception = soa.Self()->GetException(); - VLOG(compiler) << "Initialization of " << descriptor << " aborted because of " - << exception->Dump(); - std::ostream* file_log = manager_->GetCompiler()-> - GetCompilerOptions().GetInitFailureOutput(); - if (file_log != nullptr) { - *file_log << descriptor << "\n"; - *file_log << exception->Dump() << "\n"; - } - soa.Self()->ClearException(); - runtime->RollbackAllTransactions(); - CHECK_EQ(old_status, klass->GetStatus()) << "Previous class status not restored"; - } else if (is_boot_image) { - // For boot image, we want to put the updated status in the oat class since we can't - // reject the image anyways. - old_status = klass->GetStatus(); - } - } - - if (!success) { - // On failure, still intern strings of static fields and seen in <clinit>, as these - // will be created in the zygote. This is separated from the transaction code just - // above as we will allocate strings, so must be allowed to suspend. - if (&klass->GetDexFile() == manager_->GetDexFile()) { - InternStrings(klass, class_loader); - } else { - DCHECK(!is_boot_image) << "Boot image must have equal dex files"; - } - } - } - } - // If the class still isn't initialized, at least try some checks that initialization - // would do so they can be skipped at runtime. - if (!klass->IsInitialized() && - manager_->GetClassLinker()->ValidateSuperClassDescriptors(klass)) { - old_status = ClassStatus::kSuperclassValidated; - } else { - soa.Self()->ClearException(); - } - soa.Self()->AssertNoPendingException(); - } - } - // Record the final class status if necessary. - ClassReference ref(&dex_file, klass->GetDexClassDefIndex()); - // Back up the status before doing initialization for static encoded fields, - // because the static encoded branch wants to keep the status to uninitialized. - manager_->GetCompiler()->RecordClassStatus(ref, old_status); - } - - private: - void InternStrings(Handle<mirror::Class> klass, Handle<mirror::ClassLoader> class_loader) - REQUIRES_SHARED(Locks::mutator_lock_) { - DCHECK(manager_->GetCompiler()->GetCompilerOptions().IsBootImage()); - DCHECK(klass->IsVerified()); - DCHECK(!klass->IsInitialized()); - - StackHandleScope<1> hs(Thread::Current()); - Handle<mirror::DexCache> dex_cache = hs.NewHandle(klass->GetDexCache()); - const DexFile::ClassDef* class_def = klass->GetClassDef(); - ClassLinker* class_linker = manager_->GetClassLinker(); - - // Check encoded final field values for strings and intern. - annotations::RuntimeEncodedStaticFieldValueIterator value_it(dex_cache, - class_loader, - manager_->GetClassLinker(), - *class_def); - for ( ; value_it.HasNext(); value_it.Next()) { - if (value_it.GetValueType() == annotations::RuntimeEncodedStaticFieldValueIterator::kString) { - // Resolve the string. This will intern the string. - art::ObjPtr<mirror::String> resolved = class_linker->ResolveString( - dex::StringIndex(value_it.GetJavaValue().i), dex_cache); - CHECK(resolved != nullptr); - } - } - - // Intern strings seen in <clinit>. - ArtMethod* clinit = klass->FindClassInitializer(class_linker->GetImagePointerSize()); - if (clinit != nullptr) { - for (const DexInstructionPcPair& inst : clinit->DexInstructions()) { - if (inst->Opcode() == Instruction::CONST_STRING) { - ObjPtr<mirror::String> s = class_linker->ResolveString( - dex::StringIndex(inst->VRegB_21c()), dex_cache); - CHECK(s != nullptr); - } else if (inst->Opcode() == Instruction::CONST_STRING_JUMBO) { - ObjPtr<mirror::String> s = class_linker->ResolveString( - dex::StringIndex(inst->VRegB_31c()), dex_cache); - CHECK(s != nullptr); - } - } - } - } - - bool ResolveTypesOfMethods(Thread* self, ArtMethod* m) - REQUIRES_SHARED(Locks::mutator_lock_) { - // Return value of ResolveReturnType() is discarded because resolve will be done internally. - ObjPtr<mirror::Class> rtn_type = m->ResolveReturnType(); - if (rtn_type == nullptr) { - self->ClearException(); - return false; - } - const DexFile::TypeList* types = m->GetParameterTypeList(); - if (types != nullptr) { - for (uint32_t i = 0; i < types->Size(); ++i) { - dex::TypeIndex param_type_idx = types->GetTypeItem(i).type_idx_; - ObjPtr<mirror::Class> param_type = m->ResolveClassFromTypeIndex(param_type_idx); - if (param_type == nullptr) { - self->ClearException(); - return false; - } - } - } - return true; - } - - // Pre resolve types mentioned in all method signatures before start a transaction - // since ResolveType doesn't work in transaction mode. - bool PreResolveTypes(Thread* self, const Handle<mirror::Class>& klass) - REQUIRES_SHARED(Locks::mutator_lock_) { - PointerSize pointer_size = manager_->GetClassLinker()->GetImagePointerSize(); - for (ArtMethod& m : klass->GetMethods(pointer_size)) { - if (!ResolveTypesOfMethods(self, &m)) { - return false; - } - } - if (klass->IsInterface()) { - return true; - } else if (klass->HasSuperClass()) { - StackHandleScope<1> hs(self); - MutableHandle<mirror::Class> super_klass(hs.NewHandle<mirror::Class>(klass->GetSuperClass())); - for (int i = super_klass->GetVTableLength() - 1; i >= 0; --i) { - ArtMethod* m = klass->GetVTableEntry(i, pointer_size); - ArtMethod* super_m = super_klass->GetVTableEntry(i, pointer_size); - if (!ResolveTypesOfMethods(self, m) || !ResolveTypesOfMethods(self, super_m)) { - return false; - } - } - for (int32_t i = 0; i < klass->GetIfTableCount(); ++i) { - super_klass.Assign(klass->GetIfTable()->GetInterface(i)); - if (klass->GetClassLoader() != super_klass->GetClassLoader()) { - uint32_t num_methods = super_klass->NumVirtualMethods(); - for (uint32_t j = 0; j < num_methods; ++j) { - ArtMethod* m = klass->GetIfTable()->GetMethodArray(i)->GetElementPtrSize<ArtMethod*>( - j, pointer_size); - ArtMethod* super_m = super_klass->GetVirtualMethod(j, pointer_size); - if (!ResolveTypesOfMethods(self, m) || !ResolveTypesOfMethods(self, super_m)) { - return false; - } - } - } - } - } - return true; - } - - // Initialize the klass's dependencies recursively before initializing itself. - // Checking for interfaces is also necessary since interfaces can contain - // both default methods and static encoded fields. - bool InitializeDependencies(const Handle<mirror::Class>& klass, - Handle<mirror::ClassLoader> class_loader, - Thread* self) - REQUIRES_SHARED(Locks::mutator_lock_) { - if (klass->HasSuperClass()) { - ObjPtr<mirror::Class> super_class = klass->GetSuperClass(); - StackHandleScope<1> hs(self); - Handle<mirror::Class> handle_scope_super(hs.NewHandle(super_class)); - if (!handle_scope_super->IsInitialized()) { - this->TryInitializeClass(handle_scope_super, class_loader); - if (!handle_scope_super->IsInitialized()) { - return false; - } - } - } - - uint32_t num_if = klass->NumDirectInterfaces(); - for (size_t i = 0; i < num_if; i++) { - ObjPtr<mirror::Class> - interface = mirror::Class::GetDirectInterface(self, klass.Get(), i); - StackHandleScope<1> hs(self); - Handle<mirror::Class> handle_interface(hs.NewHandle(interface)); - - TryInitializeClass(handle_interface, class_loader); - - if (!handle_interface->IsInitialized()) { - return false; - } - } - - return PreResolveTypes(self, klass); - } - - // In this phase the classes containing class initializers are ignored. Make sure no - // clinit appears in kalss's super class chain and interfaces. - bool NoClinitInDependency(const Handle<mirror::Class>& klass, - Thread* self, - Handle<mirror::ClassLoader>* class_loader) - REQUIRES_SHARED(Locks::mutator_lock_) { - ArtMethod* clinit = - klass->FindClassInitializer(manager_->GetClassLinker()->GetImagePointerSize()); - if (clinit != nullptr) { - VLOG(compiler) << klass->PrettyClass() << ' ' << clinit->PrettyMethod(true); - return false; - } - if (klass->HasSuperClass()) { - ObjPtr<mirror::Class> super_class = klass->GetSuperClass(); - StackHandleScope<1> hs(self); - Handle<mirror::Class> handle_scope_super(hs.NewHandle(super_class)); - if (!NoClinitInDependency(handle_scope_super, self, class_loader)) { - return false; - } - } - - uint32_t num_if = klass->NumDirectInterfaces(); - for (size_t i = 0; i < num_if; i++) { - ObjPtr<mirror::Class> - interface = mirror::Class::GetDirectInterface(self, klass.Get(), i); - StackHandleScope<1> hs(self); - Handle<mirror::Class> handle_interface(hs.NewHandle(interface)); - if (!NoClinitInDependency(handle_interface, self, class_loader)) { - return false; - } - } - - return true; - } - - const ParallelCompilationManager* const manager_; -}; - -void CompilerDriver::InitializeClasses(jobject jni_class_loader, - const DexFile& dex_file, - const std::vector<const DexFile*>& dex_files, - TimingLogger* timings) { - TimingLogger::ScopedTiming t("InitializeNoClinit", timings); - - // Initialization allocates objects and needs to run single-threaded to be deterministic. - bool force_determinism = GetCompilerOptions().IsForceDeterminism(); - ThreadPool* init_thread_pool = force_determinism - ? single_thread_pool_.get() - : parallel_thread_pool_.get(); - size_t init_thread_count = force_determinism ? 1U : parallel_thread_count_; - - ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); - ParallelCompilationManager context(class_linker, jni_class_loader, this, &dex_file, dex_files, - init_thread_pool); - - if (GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsAppImage()) { - // Set the concurrency thread to 1 to support initialization for App Images since transaction - // doesn't support multithreading now. - // TODO: remove this when transactional mode supports multithreading. - init_thread_count = 1U; - } - InitializeClassVisitor visitor(&context); - context.ForAll(0, dex_file.NumClassDefs(), &visitor, init_thread_count); -} - -class InitializeArrayClassesAndCreateConflictTablesVisitor : public ClassVisitor { - public: - explicit InitializeArrayClassesAndCreateConflictTablesVisitor(VariableSizedHandleScope& hs) - : hs_(hs) {} - - virtual bool operator()(ObjPtr<mirror::Class> klass) OVERRIDE - REQUIRES_SHARED(Locks::mutator_lock_) { - if (Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(klass)) { - return true; - } - if (klass->IsArrayClass()) { - StackHandleScope<1> hs(Thread::Current()); - auto h_klass = hs.NewHandleWrapper(&klass); - Runtime::Current()->GetClassLinker()->EnsureInitialized(hs.Self(), h_klass, true, true); - } - // Collect handles since there may be thread suspension in future EnsureInitialized. - to_visit_.push_back(hs_.NewHandle(klass)); - return true; - } - - void FillAllIMTAndConflictTables() REQUIRES_SHARED(Locks::mutator_lock_) { - for (Handle<mirror::Class> c : to_visit_) { - // Create the conflict tables. - FillIMTAndConflictTables(c.Get()); - } - } - - private: - void FillIMTAndConflictTables(ObjPtr<mirror::Class> klass) - REQUIRES_SHARED(Locks::mutator_lock_) { - if (!klass->ShouldHaveImt()) { - return; - } - if (visited_classes_.find(klass) != visited_classes_.end()) { - return; - } - if (klass->HasSuperClass()) { - FillIMTAndConflictTables(klass->GetSuperClass()); - } - if (!klass->IsTemp()) { - Runtime::Current()->GetClassLinker()->FillIMTAndConflictTables(klass); - } - visited_classes_.insert(klass); - } - - VariableSizedHandleScope& hs_; - std::vector<Handle<mirror::Class>> to_visit_; - std::unordered_set<ObjPtr<mirror::Class>, HashObjPtr> visited_classes_; -}; - -void CompilerDriver::InitializeClasses(jobject class_loader, - const std::vector<const DexFile*>& dex_files, - TimingLogger* timings) { - for (size_t i = 0; i != dex_files.size(); ++i) { - const DexFile* dex_file = dex_files[i]; - CHECK(dex_file != nullptr); - InitializeClasses(class_loader, *dex_file, dex_files, timings); - } - if (GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsAppImage()) { - // Make sure that we call EnsureIntiailized on all the array classes to call - // SetVerificationAttempted so that the access flags are set. If we do not do this they get - // changed at runtime resulting in more dirty image pages. - // Also create conflict tables. - // Only useful if we are compiling an image (image_classes_ is not null). - ScopedObjectAccess soa(Thread::Current()); - VariableSizedHandleScope hs(soa.Self()); - InitializeArrayClassesAndCreateConflictTablesVisitor visitor(hs); - Runtime::Current()->GetClassLinker()->VisitClassesWithoutClassesLock(&visitor); - visitor.FillAllIMTAndConflictTables(); - } - if (GetCompilerOptions().IsBootImage()) { - // Prune garbage objects created during aborted transactions. - Runtime::Current()->GetHeap()->CollectGarbage(/* clear_soft_references */ true); - } -} - -template <typename CompileFn> -static void CompileDexFile(CompilerDriver* driver, - jobject class_loader, - const DexFile& dex_file, - const std::vector<const DexFile*>& dex_files, - ThreadPool* thread_pool, - size_t thread_count, - TimingLogger* timings, - const char* timing_name, - CompileFn compile_fn) { - TimingLogger::ScopedTiming t(timing_name, timings); - ParallelCompilationManager context(Runtime::Current()->GetClassLinker(), - class_loader, - driver, - &dex_file, - dex_files, - thread_pool); - - auto compile = [&context, &compile_fn](size_t class_def_index) { - ScopedTrace trace(__FUNCTION__); - const DexFile& dex_file = *context.GetDexFile(); - const DexFile::ClassDef& class_def = dex_file.GetClassDef(class_def_index); - ClassLinker* class_linker = context.GetClassLinker(); - jobject jclass_loader = context.GetClassLoader(); - ClassReference ref(&dex_file, class_def_index); - // Skip compiling classes with generic verifier failures since they will still fail at runtime - if (context.GetCompiler()->GetVerificationResults()->IsClassRejected(ref)) { - return; - } - // Use a scoped object access to perform to the quick SkipClass check. - const char* descriptor = dex_file.GetClassDescriptor(class_def); - ScopedObjectAccess soa(Thread::Current()); - StackHandleScope<3> hs(soa.Self()); - Handle<mirror::ClassLoader> class_loader( - hs.NewHandle(soa.Decode<mirror::ClassLoader>(jclass_loader))); - Handle<mirror::Class> klass( - hs.NewHandle(class_linker->FindClass(soa.Self(), descriptor, class_loader))); - Handle<mirror::DexCache> dex_cache; - if (klass == nullptr) { - soa.Self()->AssertPendingException(); - soa.Self()->ClearException(); - dex_cache = hs.NewHandle(class_linker->FindDexCache(soa.Self(), dex_file)); - } else if (SkipClass(jclass_loader, dex_file, klass.Get())) { - return; - } else { - dex_cache = hs.NewHandle(klass->GetDexCache()); - } - - const uint8_t* class_data = dex_file.GetClassData(class_def); - if (class_data == nullptr) { - // empty class, probably a marker interface - return; - } - - // Go to native so that we don't block GC during compilation. - ScopedThreadSuspension sts(soa.Self(), kNative); - - CompilerDriver* const driver = context.GetCompiler(); - - // Can we run DEX-to-DEX compiler on this class ? - optimizer::DexToDexCompiler::CompilationLevel dex_to_dex_compilation_level = - GetDexToDexCompilationLevel(soa.Self(), *driver, jclass_loader, dex_file, class_def); - - ClassDataItemIterator it(dex_file, class_data); - it.SkipAllFields(); - - bool compilation_enabled = driver->IsClassToCompile( - dex_file.StringByTypeIdx(class_def.class_idx_)); - - // Compile direct and virtual methods. - int64_t previous_method_idx = -1; - while (it.HasNextMethod()) { - uint32_t method_idx = it.GetMemberIndex(); - if (method_idx == previous_method_idx) { - // smali can create dex files with two encoded_methods sharing the same method_idx - // http://code.google.com/p/smali/issues/detail?id=119 - it.Next(); - continue; - } - previous_method_idx = method_idx; - compile_fn(soa.Self(), - driver, - it.GetMethodCodeItem(), - it.GetMethodAccessFlags(), - it.GetMethodInvokeType(class_def), - class_def_index, - method_idx, - class_loader, - dex_file, - dex_to_dex_compilation_level, - compilation_enabled, - dex_cache); - it.Next(); - } - DCHECK(!it.HasNext()); - }; - context.ForAllLambda(0, dex_file.NumClassDefs(), compile, thread_count); -} - -void CompilerDriver::Compile(jobject class_loader, - const std::vector<const DexFile*>& dex_files, - TimingLogger* timings) { - if (kDebugProfileGuidedCompilation) { - LOG(INFO) << "[ProfileGuidedCompilation] " << - ((profile_compilation_info_ == nullptr) - ? "null" - : profile_compilation_info_->DumpInfo(&dex_files)); - } - - dex_to_dex_compiler_.ClearState(); - for (const DexFile* dex_file : dex_files) { - CHECK(dex_file != nullptr); - CompileDexFile(this, - class_loader, - *dex_file, - dex_files, - parallel_thread_pool_.get(), - parallel_thread_count_, - timings, - "Compile Dex File Quick", - CompileMethodQuick); - const ArenaPool* const arena_pool = Runtime::Current()->GetArenaPool(); - const size_t arena_alloc = arena_pool->GetBytesAllocated(); - max_arena_alloc_ = std::max(arena_alloc, max_arena_alloc_); - Runtime::Current()->ReclaimArenaPoolMemory(); - } - - if (dex_to_dex_compiler_.NumCodeItemsToQuicken(Thread::Current()) > 0u) { - // TODO: Not visit all of the dex files, its probably rare that only one would have quickened - // methods though. - for (const DexFile* dex_file : dex_files) { - CompileDexFile(this, - class_loader, - *dex_file, - dex_files, - parallel_thread_pool_.get(), - parallel_thread_count_, - timings, - "Compile Dex File Dex2Dex", - CompileMethodDex2Dex); - } - dex_to_dex_compiler_.ClearState(); - } - - VLOG(compiler) << "Compile: " << GetMemoryUsageString(false); -} - -void CompilerDriver::AddCompiledMethod(const MethodReference& method_ref, - CompiledMethod* const compiled_method, - size_t non_relative_linker_patch_count) { - DCHECK(GetCompiledMethod(method_ref) == nullptr) << method_ref.PrettyMethod(); - MethodTable::InsertResult result = compiled_methods_.Insert(method_ref, - /*expected*/ nullptr, - compiled_method); - CHECK(result == MethodTable::kInsertResultSuccess); - non_relative_linker_patch_count_.FetchAndAddRelaxed(non_relative_linker_patch_count); - DCHECK(GetCompiledMethod(method_ref) != nullptr) << method_ref.PrettyMethod(); -} - -CompiledMethod* CompilerDriver::RemoveCompiledMethod(const MethodReference& method_ref) { - CompiledMethod* ret = nullptr; - CHECK(compiled_methods_.Remove(method_ref, &ret)); - return ret; -} - -bool CompilerDriver::GetCompiledClass(const ClassReference& ref, ClassStatus* status) const { - DCHECK(status != nullptr); - // The table doesn't know if something wasn't inserted. For this case it will return - // ClassStatus::kNotReady. To handle this, just assume anything we didn't try to verify - // is not compiled. - if (!compiled_classes_.Get(ref, status) || - *status < ClassStatus::kRetryVerificationAtRuntime) { - return false; - } - return true; -} - -ClassStatus CompilerDriver::GetClassStatus(const ClassReference& ref) const { - ClassStatus status = ClassStatus::kNotReady; - if (!GetCompiledClass(ref, &status)) { - classpath_classes_.Get(ref, &status); - } - return status; -} - -void CompilerDriver::RecordClassStatus(const ClassReference& ref, ClassStatus status) { - switch (status) { - case ClassStatus::kErrorResolved: - case ClassStatus::kErrorUnresolved: - case ClassStatus::kNotReady: - case ClassStatus::kResolved: - case ClassStatus::kRetryVerificationAtRuntime: - case ClassStatus::kVerified: - case ClassStatus::kSuperclassValidated: - case ClassStatus::kInitialized: - break; // Expected states. - default: - LOG(FATAL) << "Unexpected class status for class " - << PrettyDescriptor( - ref.dex_file->GetClassDescriptor(ref.dex_file->GetClassDef(ref.index))) - << " of " << status; - } - - ClassStateTable::InsertResult result; - ClassStateTable* table = &compiled_classes_; - do { - ClassStatus existing = ClassStatus::kNotReady; - if (!table->Get(ref, &existing)) { - // A classpath class. - if (kIsDebugBuild) { - // Check to make sure it's not a dex file for an oat file we are compiling since these - // should always succeed. These do not include classes in for used libraries. - for (const DexFile* dex_file : GetDexFilesForOatFile()) { - CHECK_NE(ref.dex_file, dex_file) << ref.dex_file->GetLocation(); - } - } - if (!classpath_classes_.HaveDexFile(ref.dex_file)) { - // Boot classpath dex file. - return; - } - table = &classpath_classes_; - table->Get(ref, &existing); - } - if (existing >= status) { - // Existing status is already better than we expect, break. - break; - } - // Update the status if we now have a greater one. This happens with vdex, - // which records a class is verified, but does not resolve it. - result = table->Insert(ref, existing, status); - CHECK(result != ClassStateTable::kInsertResultInvalidDexFile) << ref.dex_file->GetLocation(); - } while (result != ClassStateTable::kInsertResultSuccess); -} - -CompiledMethod* CompilerDriver::GetCompiledMethod(MethodReference ref) const { - CompiledMethod* compiled_method = nullptr; - compiled_methods_.Get(ref, &compiled_method); - return compiled_method; -} - -bool CompilerDriver::IsMethodVerifiedWithoutFailures(uint32_t method_idx, - uint16_t class_def_idx, - const DexFile& dex_file) const { - const VerifiedMethod* verified_method = GetVerifiedMethod(&dex_file, method_idx); - if (verified_method != nullptr) { - return !verified_method->HasVerificationFailures(); - } - - // If we can't find verification metadata, check if this is a system class (we trust that system - // classes have their methods verified). If it's not, be conservative and assume the method - // has not been verified successfully. - - // TODO: When compiling the boot image it should be safe to assume that everything is verified, - // even if methods are not found in the verification cache. - const char* descriptor = dex_file.GetClassDescriptor(dex_file.GetClassDef(class_def_idx)); - ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); - Thread* self = Thread::Current(); - ScopedObjectAccess soa(self); - bool is_system_class = class_linker->FindSystemClass(self, descriptor) != nullptr; - if (!is_system_class) { - self->ClearException(); - } - return is_system_class; -} - -size_t CompilerDriver::GetNonRelativeLinkerPatchCount() const { - return non_relative_linker_patch_count_.LoadRelaxed(); -} - -void CompilerDriver::SetRequiresConstructorBarrier(Thread* self, - const DexFile* dex_file, - uint16_t class_def_index, - bool requires) { - WriterMutexLock mu(self, requires_constructor_barrier_lock_); - requires_constructor_barrier_.emplace(ClassReference(dex_file, class_def_index), requires); -} - -bool CompilerDriver::RequiresConstructorBarrier(Thread* self, - const DexFile* dex_file, - uint16_t class_def_index) { - ClassReference class_ref(dex_file, class_def_index); - { - ReaderMutexLock mu(self, requires_constructor_barrier_lock_); - auto it = requires_constructor_barrier_.find(class_ref); - if (it != requires_constructor_barrier_.end()) { - return it->second; - } - } - WriterMutexLock mu(self, requires_constructor_barrier_lock_); - const bool requires = RequiresConstructorBarrier(*dex_file, class_def_index); - requires_constructor_barrier_.emplace(class_ref, requires); - return requires; -} - -std::string CompilerDriver::GetMemoryUsageString(bool extended) const { - std::ostringstream oss; - const gc::Heap* const heap = Runtime::Current()->GetHeap(); - const size_t java_alloc = heap->GetBytesAllocated(); - oss << "arena alloc=" << PrettySize(max_arena_alloc_) << " (" << max_arena_alloc_ << "B)"; - oss << " java alloc=" << PrettySize(java_alloc) << " (" << java_alloc << "B)"; -#if defined(__BIONIC__) || defined(__GLIBC__) - const struct mallinfo info = mallinfo(); - const size_t allocated_space = static_cast<size_t>(info.uordblks); - const size_t free_space = static_cast<size_t>(info.fordblks); - oss << " native alloc=" << PrettySize(allocated_space) << " (" << allocated_space << "B)" - << " free=" << PrettySize(free_space) << " (" << free_space << "B)"; -#endif - compiled_method_storage_.DumpMemoryUsage(oss, extended); - return oss.str(); -} - -bool CompilerDriver::MayInlineInternal(const DexFile* inlined_from, - const DexFile* inlined_into) const { - // We're not allowed to inline across dex files if we're the no-inline-from dex file. - if (inlined_from != inlined_into && - compiler_options_->GetNoInlineFromDexFile() != nullptr && - ContainsElement(*compiler_options_->GetNoInlineFromDexFile(), inlined_from)) { - return false; - } - - return true; -} - -void CompilerDriver::InitializeThreadPools() { - size_t parallel_count = parallel_thread_count_ > 0 ? parallel_thread_count_ - 1 : 0; - parallel_thread_pool_.reset( - new ThreadPool("Compiler driver thread pool", parallel_count)); - single_thread_pool_.reset(new ThreadPool("Single-threaded Compiler driver thread pool", 0)); -} - -void CompilerDriver::FreeThreadPools() { - parallel_thread_pool_.reset(); - single_thread_pool_.reset(); -} - -void CompilerDriver::SetDexFilesForOatFile(const std::vector<const DexFile*>& dex_files) { - dex_files_for_oat_file_ = dex_files; - compiled_classes_.AddDexFiles(dex_files); - dex_to_dex_compiler_.SetDexFiles(dex_files); -} - -void CompilerDriver::SetClasspathDexFiles(const std::vector<const DexFile*>& dex_files) { - classpath_classes_.AddDexFiles(dex_files); -} - -} // namespace art diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h deleted file mode 100644 index a5462eefe2..0000000000 --- a/compiler/driver/compiler_driver.h +++ /dev/null @@ -1,554 +0,0 @@ -/* - * Copyright (C) 2011 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ART_COMPILER_DRIVER_COMPILER_DRIVER_H_ -#define ART_COMPILER_DRIVER_COMPILER_DRIVER_H_ - -#include <atomic> -#include <set> -#include <string> -#include <unordered_set> -#include <vector> - -#include "android-base/strings.h" - -#include "arch/instruction_set.h" -#include "base/array_ref.h" -#include "base/bit_utils.h" -#include "base/mutex.h" -#include "base/os.h" -#include "base/quasi_atomic.h" -#include "base/safe_map.h" -#include "base/timing_logger.h" -#include "class_status.h" -#include "compiler.h" -#include "dex/class_reference.h" -#include "dex/dex_file.h" -#include "dex/dex_file_types.h" -#include "dex/dex_to_dex_compiler.h" -#include "dex/method_reference.h" -#include "driver/compiled_method_storage.h" -#include "thread_pool.h" -#include "utils/atomic_dex_ref_map.h" -#include "utils/dex_cache_arrays_layout.h" - -namespace art { - -namespace mirror { -class Class; -class DexCache; -} // namespace mirror - -namespace verifier { -class MethodVerifier; -class VerifierDepsTest; -} // namespace verifier - -class ArtField; -class BitVector; -class CompiledMethod; -class CompilerOptions; -class DexCompilationUnit; -template<class T> class Handle; -struct InlineIGetIPutData; -class InstructionSetFeatures; -class InternTable; -enum InvokeType : uint32_t; -class MemberOffset; -template<class MirrorType> class ObjPtr; -class ParallelCompilationManager; -class ProfileCompilationInfo; -class ScopedObjectAccess; -template <class Allocator> class SrcMap; -class TimingLogger; -class VdexFile; -class VerificationResults; -class VerifiedMethod; - -enum EntryPointCallingConvention { - // ABI of invocations to a method's interpreter entry point. - kInterpreterAbi, - // ABI of calls to a method's native code, only used for native methods. - kJniAbi, - // ABI of calls to a method's quick code entry point. - kQuickAbi -}; - -class CompilerDriver { - public: - // Create a compiler targeting the requested "instruction_set". - // "image" should be true if image specific optimizations should be - // enabled. "image_classes" lets the compiler know what classes it - // can assume will be in the image, with null implying all available - // classes. - CompilerDriver(const CompilerOptions* compiler_options, - VerificationResults* verification_results, - Compiler::Kind compiler_kind, - InstructionSet instruction_set, - const InstructionSetFeatures* instruction_set_features, - std::unordered_set<std::string>* image_classes, - std::unordered_set<std::string>* compiled_classes, - std::unordered_set<std::string>* compiled_methods, - size_t thread_count, - int swap_fd, - const ProfileCompilationInfo* profile_compilation_info); - - ~CompilerDriver(); - - // Set dex files associated with the oat file being compiled. - void SetDexFilesForOatFile(const std::vector<const DexFile*>& dex_files); - - // Set dex files classpath. - void SetClasspathDexFiles(const std::vector<const DexFile*>& dex_files); - - // Get dex files associated with the the oat file being compiled. - ArrayRef<const DexFile* const> GetDexFilesForOatFile() const { - return ArrayRef<const DexFile* const>(dex_files_for_oat_file_); - } - - void CompileAll(jobject class_loader, - const std::vector<const DexFile*>& dex_files, - TimingLogger* timings) - REQUIRES(!Locks::mutator_lock_); - - // Compile a single Method. - void CompileOne(Thread* self, ArtMethod* method, TimingLogger* timings) - REQUIRES_SHARED(Locks::mutator_lock_); - - VerificationResults* GetVerificationResults() const; - - InstructionSet GetInstructionSet() const { - return instruction_set_; - } - - const InstructionSetFeatures* GetInstructionSetFeatures() const { - return instruction_set_features_; - } - - const CompilerOptions& GetCompilerOptions() const { - return *compiler_options_; - } - - Compiler* GetCompiler() const { - return compiler_.get(); - } - - const std::unordered_set<std::string>* GetImageClasses() const { - return image_classes_.get(); - } - - // Generate the trampolines that are invoked by unresolved direct methods. - std::unique_ptr<const std::vector<uint8_t>> CreateJniDlsymLookup() const; - std::unique_ptr<const std::vector<uint8_t>> CreateQuickGenericJniTrampoline() const; - std::unique_ptr<const std::vector<uint8_t>> CreateQuickImtConflictTrampoline() const; - std::unique_ptr<const std::vector<uint8_t>> CreateQuickResolutionTrampoline() const; - std::unique_ptr<const std::vector<uint8_t>> CreateQuickToInterpreterBridge() const; - - ClassStatus GetClassStatus(const ClassReference& ref) const; - bool GetCompiledClass(const ClassReference& ref, ClassStatus* status) const; - - CompiledMethod* GetCompiledMethod(MethodReference ref) const; - size_t GetNonRelativeLinkerPatchCount() const; - // Add a compiled method. - void AddCompiledMethod(const MethodReference& method_ref, - CompiledMethod* const compiled_method, - size_t non_relative_linker_patch_count); - CompiledMethod* RemoveCompiledMethod(const MethodReference& method_ref); - - void SetRequiresConstructorBarrier(Thread* self, - const DexFile* dex_file, - uint16_t class_def_index, - bool requires) - REQUIRES(!requires_constructor_barrier_lock_); - - // Do the <init> methods for this class require a constructor barrier (prior to the return)? - // The answer is "yes", if and only if this class has any instance final fields. - // (This must not be called for any non-<init> methods; the answer would be "no"). - // - // --- - // - // JLS 17.5.1 "Semantics of final fields" mandates that all final fields are frozen at the end - // of the invoked constructor. The constructor barrier is a conservative implementation means of - // enforcing the freezes happen-before the object being constructed is observable by another - // thread. - // - // Note: This question only makes sense for instance constructors; - // static constructors (despite possibly having finals) never need - // a barrier. - // - // JLS 12.4.2 "Detailed Initialization Procedure" approximately describes - // class initialization as: - // - // lock(class.lock) - // class.state = initializing - // unlock(class.lock) - // - // invoke <clinit> - // - // lock(class.lock) - // class.state = initialized - // unlock(class.lock) <-- acts as a release - // - // The last operation in the above example acts as an atomic release - // for any stores in <clinit>, which ends up being stricter - // than what a constructor barrier needs. - // - // See also QuasiAtomic::ThreadFenceForConstructor(). - bool RequiresConstructorBarrier(Thread* self, - const DexFile* dex_file, - uint16_t class_def_index) - REQUIRES(!requires_constructor_barrier_lock_); - - // Are runtime access checks necessary in the compiled code? - bool CanAccessTypeWithoutChecks(ObjPtr<mirror::Class> referrer_class, - ObjPtr<mirror::Class> resolved_class) - REQUIRES_SHARED(Locks::mutator_lock_); - - // Are runtime access and instantiable checks necessary in the code? - // out_is_finalizable is set to whether the type is finalizable. - bool CanAccessInstantiableTypeWithoutChecks(ObjPtr<mirror::Class> referrer_class, - ObjPtr<mirror::Class> resolved_class, - bool* out_is_finalizable) - REQUIRES_SHARED(Locks::mutator_lock_); - - // Resolve compiling method's class. Returns null on failure. - ObjPtr<mirror::Class> ResolveCompilingMethodsClass(const ScopedObjectAccess& soa, - Handle<mirror::DexCache> dex_cache, - Handle<mirror::ClassLoader> class_loader, - const DexCompilationUnit* mUnit) - REQUIRES_SHARED(Locks::mutator_lock_); - - ObjPtr<mirror::Class> ResolveClass(const ScopedObjectAccess& soa, - Handle<mirror::DexCache> dex_cache, - Handle<mirror::ClassLoader> class_loader, - dex::TypeIndex type_index, - const DexCompilationUnit* mUnit) - REQUIRES_SHARED(Locks::mutator_lock_); - - // Resolve a field. Returns null on failure, including incompatible class change. - // NOTE: Unlike ClassLinker's ResolveField(), this method enforces is_static. - ArtField* ResolveField(const ScopedObjectAccess& soa, - Handle<mirror::DexCache> dex_cache, - Handle<mirror::ClassLoader> class_loader, - uint32_t field_idx, - bool is_static) - REQUIRES_SHARED(Locks::mutator_lock_); - - // Can we fast-path an IGET/IPUT access to an instance field? If yes, compute the field offset. - std::pair<bool, bool> IsFastInstanceField(ObjPtr<mirror::DexCache> dex_cache, - ObjPtr<mirror::Class> referrer_class, - ArtField* resolved_field, - uint16_t field_idx) - REQUIRES_SHARED(Locks::mutator_lock_); - - // Resolve a method. Returns null on failure, including incompatible class change. - ArtMethod* ResolveMethod( - ScopedObjectAccess& soa, - Handle<mirror::DexCache> dex_cache, - Handle<mirror::ClassLoader> class_loader, - const DexCompilationUnit* mUnit, - uint32_t method_idx, - InvokeType invoke_type) - REQUIRES_SHARED(Locks::mutator_lock_); - - void ProcessedInstanceField(bool resolved); - void ProcessedStaticField(bool resolved, bool local); - - // Can we fast path instance field access? Computes field's offset and volatility. - bool ComputeInstanceFieldInfo(uint32_t field_idx, const DexCompilationUnit* mUnit, bool is_put, - MemberOffset* field_offset, bool* is_volatile) - REQUIRES(!Locks::mutator_lock_); - - ArtField* ComputeInstanceFieldInfo(uint32_t field_idx, - const DexCompilationUnit* mUnit, - bool is_put, - const ScopedObjectAccess& soa) - REQUIRES_SHARED(Locks::mutator_lock_); - - - const VerifiedMethod* GetVerifiedMethod(const DexFile* dex_file, uint32_t method_idx) const; - bool IsSafeCast(const DexCompilationUnit* mUnit, uint32_t dex_pc); - - bool GetSupportBootImageFixup() const { - return support_boot_image_fixup_; - } - - void SetSupportBootImageFixup(bool support_boot_image_fixup) { - support_boot_image_fixup_ = support_boot_image_fixup; - } - - void SetCompilerContext(void* compiler_context) { - compiler_context_ = compiler_context; - } - - void* GetCompilerContext() const { - return compiler_context_; - } - - size_t GetThreadCount() const { - return parallel_thread_count_; - } - - void SetDedupeEnabled(bool dedupe_enabled) { - compiled_method_storage_.SetDedupeEnabled(dedupe_enabled); - } - - bool DedupeEnabled() const { - return compiled_method_storage_.DedupeEnabled(); - } - - // Checks if class specified by type_idx is one of the image_classes_ - bool IsImageClass(const char* descriptor) const; - - // Checks whether the provided class should be compiled, i.e., is in classes_to_compile_. - bool IsClassToCompile(const char* descriptor) const; - - // Checks whether the provided method should be compiled, i.e., is in method_to_compile_. - bool IsMethodToCompile(const MethodReference& method_ref) const; - - // Checks whether profile guided compilation is enabled and if the method should be compiled - // according to the profile file. - bool ShouldCompileBasedOnProfile(const MethodReference& method_ref) const; - - // Checks whether profile guided verification is enabled and if the method should be verified - // according to the profile file. - bool ShouldVerifyClassBasedOnProfile(const DexFile& dex_file, uint16_t class_idx) const; - - void RecordClassStatus(const ClassReference& ref, ClassStatus status); - - // Checks if the specified method has been verified without failures. Returns - // false if the method is not in the verification results (GetVerificationResults). - bool IsMethodVerifiedWithoutFailures(uint32_t method_idx, - uint16_t class_def_idx, - const DexFile& dex_file) const; - - // Get memory usage during compilation. - std::string GetMemoryUsageString(bool extended) const; - - void SetHadHardVerifierFailure() { - had_hard_verifier_failure_ = true; - } - void AddSoftVerifierFailure() { - number_of_soft_verifier_failures_++; - } - - Compiler::Kind GetCompilerKind() { - return compiler_kind_; - } - - CompiledMethodStorage* GetCompiledMethodStorage() { - return &compiled_method_storage_; - } - - // Can we assume that the klass is loaded? - bool CanAssumeClassIsLoaded(mirror::Class* klass) - REQUIRES_SHARED(Locks::mutator_lock_); - - bool MayInline(const DexFile* inlined_from, const DexFile* inlined_into) const { - if (!kIsTargetBuild) { - return MayInlineInternal(inlined_from, inlined_into); - } - return true; - } - - const ProfileCompilationInfo* GetProfileCompilationInfo() const { - return profile_compilation_info_; - } - - // Is `boot_image_filename` the name of a core image (small boot - // image used for ART testing only)? - static bool IsCoreImageFilename(const std::string& boot_image_filename) { - // Look for "core.art" or "core-*.art". - if (android::base::EndsWith(boot_image_filename, "core.art")) { - return true; - } - if (!android::base::EndsWith(boot_image_filename, ".art")) { - return false; - } - size_t slash_pos = boot_image_filename.rfind('/'); - if (slash_pos == std::string::npos) { - return android::base::StartsWith(boot_image_filename, "core-"); - } - return boot_image_filename.compare(slash_pos + 1, 5u, "core-") == 0; - } - - optimizer::DexToDexCompiler& GetDexToDexCompiler() { - return dex_to_dex_compiler_; - } - - private: - void PreCompile(jobject class_loader, - const std::vector<const DexFile*>& dex_files, - TimingLogger* timings) - REQUIRES(!Locks::mutator_lock_); - - void LoadImageClasses(TimingLogger* timings) REQUIRES(!Locks::mutator_lock_); - - // Attempt to resolve all type, methods, fields, and strings - // referenced from code in the dex file following PathClassLoader - // ordering semantics. - void Resolve(jobject class_loader, - const std::vector<const DexFile*>& dex_files, - TimingLogger* timings) - REQUIRES(!Locks::mutator_lock_); - void ResolveDexFile(jobject class_loader, - const DexFile& dex_file, - const std::vector<const DexFile*>& dex_files, - ThreadPool* thread_pool, - size_t thread_count, - TimingLogger* timings) - REQUIRES(!Locks::mutator_lock_); - - // Do fast verification through VerifierDeps if possible. Return whether - // verification was successful. - bool FastVerify(jobject class_loader, - const std::vector<const DexFile*>& dex_files, - TimingLogger* timings); - - void Verify(jobject class_loader, - const std::vector<const DexFile*>& dex_files, - TimingLogger* timings); - - void VerifyDexFile(jobject class_loader, - const DexFile& dex_file, - const std::vector<const DexFile*>& dex_files, - ThreadPool* thread_pool, - size_t thread_count, - TimingLogger* timings) - REQUIRES(!Locks::mutator_lock_); - - void SetVerified(jobject class_loader, - const std::vector<const DexFile*>& dex_files, - TimingLogger* timings); - void SetVerifiedDexFile(jobject class_loader, - const DexFile& dex_file, - const std::vector<const DexFile*>& dex_files, - ThreadPool* thread_pool, - size_t thread_count, - TimingLogger* timings) - REQUIRES(!Locks::mutator_lock_); - - void InitializeClasses(jobject class_loader, - const std::vector<const DexFile*>& dex_files, - TimingLogger* timings) - REQUIRES(!Locks::mutator_lock_); - void InitializeClasses(jobject class_loader, - const DexFile& dex_file, - const std::vector<const DexFile*>& dex_files, - TimingLogger* timings) - REQUIRES(!Locks::mutator_lock_); - - void UpdateImageClasses(TimingLogger* timings) REQUIRES(!Locks::mutator_lock_); - - void Compile(jobject class_loader, - const std::vector<const DexFile*>& dex_files, - TimingLogger* timings); - - bool MayInlineInternal(const DexFile* inlined_from, const DexFile* inlined_into) const; - - void InitializeThreadPools(); - void FreeThreadPools(); - void CheckThreadPools(); - - bool RequiresConstructorBarrier(const DexFile& dex_file, uint16_t class_def_idx) const; - - const CompilerOptions* const compiler_options_; - VerificationResults* const verification_results_; - - std::unique_ptr<Compiler> compiler_; - Compiler::Kind compiler_kind_; - - const InstructionSet instruction_set_; - const InstructionSetFeatures* const instruction_set_features_; - - // All class references that require constructor barriers. If the class reference is not in the - // set then the result has not yet been computed. - mutable ReaderWriterMutex requires_constructor_barrier_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER; - std::map<ClassReference, bool> requires_constructor_barrier_ - GUARDED_BY(requires_constructor_barrier_lock_); - - // All class references that this compiler has compiled. Indexed by class defs. - using ClassStateTable = AtomicDexRefMap<ClassReference, ClassStatus>; - ClassStateTable compiled_classes_; - // All class references that are in the classpath. Indexed by class defs. - ClassStateTable classpath_classes_; - - typedef AtomicDexRefMap<MethodReference, CompiledMethod*> MethodTable; - - private: - // All method references that this compiler has compiled. - MethodTable compiled_methods_; - - // Number of non-relative patches in all compiled methods. These patches need space - // in the .oat_patches ELF section if requested in the compiler options. - Atomic<size_t> non_relative_linker_patch_count_; - - // If image_ is true, specifies the classes that will be included in the image. - // Note if image_classes_ is null, all classes are included in the image. - std::unique_ptr<std::unordered_set<std::string>> image_classes_; - - // Specifies the classes that will be compiled. Note that if classes_to_compile_ is null, - // all classes are eligible for compilation (duplication filters etc. will still apply). - // This option may be restricted to the boot image, depending on a flag in the implementation. - std::unique_ptr<std::unordered_set<std::string>> classes_to_compile_; - - // Specifies the methods that will be compiled. Note that if methods_to_compile_ is null, - // all methods are eligible for compilation (compilation filters etc. will still apply). - // This option may be restricted to the boot image, depending on a flag in the implementation. - std::unique_ptr<std::unordered_set<std::string>> methods_to_compile_; - - std::atomic<uint32_t> number_of_soft_verifier_failures_; - bool had_hard_verifier_failure_; - - // A thread pool that can (potentially) run tasks in parallel. - std::unique_ptr<ThreadPool> parallel_thread_pool_; - size_t parallel_thread_count_; - - // A thread pool that guarantees running single-threaded on the main thread. - std::unique_ptr<ThreadPool> single_thread_pool_; - - class AOTCompilationStats; - std::unique_ptr<AOTCompilationStats> stats_; - - typedef void (*CompilerCallbackFn)(CompilerDriver& driver); - typedef MutexLock* (*CompilerMutexLockFn)(CompilerDriver& driver); - - void* compiler_context_; - - bool support_boot_image_fixup_; - - // List of dex files associates with the oat file. - std::vector<const DexFile*> dex_files_for_oat_file_; - - CompiledMethodStorage compiled_method_storage_; - - // Info for profile guided compilation. - const ProfileCompilationInfo* const profile_compilation_info_; - - size_t max_arena_alloc_; - - // Compiler for dex to dex (quickening). - optimizer::DexToDexCompiler dex_to_dex_compiler_; - - friend class CompileClassVisitor; - friend class DexToDexDecompilerTest; - friend class verifier::VerifierDepsTest; - DISALLOW_COPY_AND_ASSIGN(CompilerDriver); -}; - -} // namespace art - -#endif // ART_COMPILER_DRIVER_COMPILER_DRIVER_H_ diff --git a/compiler/driver/compiler_driver_test.cc b/compiler/driver/compiler_driver_test.cc deleted file mode 100644 index 162904c0e7..0000000000 --- a/compiler/driver/compiler_driver_test.cc +++ /dev/null @@ -1,411 +0,0 @@ -/* - * Copyright (C) 2011 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "driver/compiler_driver.h" - -#include <limits> -#include <stdint.h> -#include <stdio.h> -#include <memory> - -#include "art_method-inl.h" -#include "base/casts.h" -#include "class_linker-inl.h" -#include "common_compiler_test.h" -#include "compiler_callbacks.h" -#include "dex/dex_file.h" -#include "dex/dex_file_types.h" -#include "gc/heap.h" -#include "handle_scope-inl.h" -#include "jit/profile_compilation_info.h" -#include "mirror/class-inl.h" -#include "mirror/class_loader.h" -#include "mirror/dex_cache-inl.h" -#include "mirror/object-inl.h" -#include "mirror/object_array-inl.h" -#include "scoped_thread_state_change-inl.h" - -namespace art { - -class CompilerDriverTest : public CommonCompilerTest { - protected: - void CompileAll(jobject class_loader) REQUIRES(!Locks::mutator_lock_) { - TimingLogger timings("CompilerDriverTest::CompileAll", false, false); - TimingLogger::ScopedTiming t(__FUNCTION__, &timings); - dex_files_ = GetDexFiles(class_loader); - compiler_driver_->SetDexFilesForOatFile(dex_files_);; - compiler_driver_->CompileAll(class_loader, dex_files_, &timings); - t.NewTiming("MakeAllExecutable"); - MakeAllExecutable(class_loader); - } - - void EnsureCompiled(jobject class_loader, const char* class_name, const char* method, - const char* signature, bool is_virtual) - REQUIRES(!Locks::mutator_lock_) { - CompileAll(class_loader); - Thread::Current()->TransitionFromSuspendedToRunnable(); - bool started = runtime_->Start(); - CHECK(started); - env_ = Thread::Current()->GetJniEnv(); - class_ = env_->FindClass(class_name); - CHECK(class_ != nullptr) << "Class not found: " << class_name; - if (is_virtual) { - mid_ = env_->GetMethodID(class_, method, signature); - } else { - mid_ = env_->GetStaticMethodID(class_, method, signature); - } - CHECK(mid_ != nullptr) << "Method not found: " << class_name << "." << method << signature; - } - - void MakeAllExecutable(jobject class_loader) { - const std::vector<const DexFile*> class_path = GetDexFiles(class_loader); - for (size_t i = 0; i != class_path.size(); ++i) { - const DexFile* dex_file = class_path[i]; - CHECK(dex_file != nullptr); - MakeDexFileExecutable(class_loader, *dex_file); - } - } - - void MakeDexFileExecutable(jobject class_loader, const DexFile& dex_file) { - ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); - for (size_t i = 0; i < dex_file.NumClassDefs(); i++) { - const DexFile::ClassDef& class_def = dex_file.GetClassDef(i); - const char* descriptor = dex_file.GetClassDescriptor(class_def); - ScopedObjectAccess soa(Thread::Current()); - StackHandleScope<1> hs(soa.Self()); - Handle<mirror::ClassLoader> loader( - hs.NewHandle(soa.Decode<mirror::ClassLoader>(class_loader))); - mirror::Class* c = class_linker->FindClass(soa.Self(), descriptor, loader); - CHECK(c != nullptr); - const auto pointer_size = class_linker->GetImagePointerSize(); - for (auto& m : c->GetMethods(pointer_size)) { - MakeExecutable(&m); - } - } - } - - JNIEnv* env_; - jclass class_; - jmethodID mid_; - std::vector<const DexFile*> dex_files_; -}; - -// Disabled due to 10 second runtime on host -// TODO: Update the test for hash-based dex cache arrays. Bug: 30627598 -TEST_F(CompilerDriverTest, DISABLED_LARGE_CompileDexLibCore) { - CompileAll(nullptr); - - // All libcore references should resolve - ScopedObjectAccess soa(Thread::Current()); - ASSERT_TRUE(java_lang_dex_file_ != nullptr); - const DexFile& dex = *java_lang_dex_file_; - ObjPtr<mirror::DexCache> dex_cache = class_linker_->FindDexCache(soa.Self(), dex); - EXPECT_EQ(dex.NumStringIds(), dex_cache->NumStrings()); - for (size_t i = 0; i < dex_cache->NumStrings(); i++) { - const mirror::String* string = dex_cache->GetResolvedString(dex::StringIndex(i)); - EXPECT_TRUE(string != nullptr) << "string_idx=" << i; - } - EXPECT_EQ(dex.NumTypeIds(), dex_cache->NumResolvedTypes()); - for (size_t i = 0; i < dex_cache->NumResolvedTypes(); i++) { - mirror::Class* type = dex_cache->GetResolvedType(dex::TypeIndex(i)); - EXPECT_TRUE(type != nullptr) << "type_idx=" << i - << " " << dex.GetTypeDescriptor(dex.GetTypeId(dex::TypeIndex(i))); - } - EXPECT_TRUE(dex_cache->StaticMethodSize() == dex_cache->NumResolvedMethods() - || dex.NumMethodIds() == dex_cache->NumResolvedMethods()); - auto* cl = Runtime::Current()->GetClassLinker(); - auto pointer_size = cl->GetImagePointerSize(); - for (size_t i = 0; i < dex_cache->NumResolvedMethods(); i++) { - // FIXME: This is outdated for hash-based method array. - ArtMethod* method = dex_cache->GetResolvedMethod(i, pointer_size); - EXPECT_TRUE(method != nullptr) << "method_idx=" << i - << " " << dex.GetMethodDeclaringClassDescriptor(dex.GetMethodId(i)) - << " " << dex.GetMethodName(dex.GetMethodId(i)); - EXPECT_TRUE(method->GetEntryPointFromQuickCompiledCode() != nullptr) << "method_idx=" << i - << " " << dex.GetMethodDeclaringClassDescriptor(dex.GetMethodId(i)) << " " - << dex.GetMethodName(dex.GetMethodId(i)); - } - EXPECT_TRUE(dex_cache->StaticArtFieldSize() == dex_cache->NumResolvedFields() - || dex.NumFieldIds() == dex_cache->NumResolvedFields()); - for (size_t i = 0; i < dex_cache->NumResolvedFields(); i++) { - // FIXME: This is outdated for hash-based field array. - ArtField* field = dex_cache->GetResolvedField(i, cl->GetImagePointerSize()); - EXPECT_TRUE(field != nullptr) << "field_idx=" << i - << " " << dex.GetFieldDeclaringClassDescriptor(dex.GetFieldId(i)) - << " " << dex.GetFieldName(dex.GetFieldId(i)); - } - - // TODO check Class::IsVerified for all classes - - // TODO: check that all Method::GetCode() values are non-null -} - -TEST_F(CompilerDriverTest, AbstractMethodErrorStub) { - jobject class_loader; - { - ScopedObjectAccess soa(Thread::Current()); - class_loader = LoadDex("AbstractMethod"); - } - ASSERT_TRUE(class_loader != nullptr); - EnsureCompiled(class_loader, "AbstractClass", "foo", "()V", true); - - // Create a jobj_ of ConcreteClass, NOT AbstractClass. - jclass c_class = env_->FindClass("ConcreteClass"); - - jmethodID constructor = env_->GetMethodID(c_class, "<init>", "()V"); - - jobject jobj_ = env_->NewObject(c_class, constructor); - ASSERT_TRUE(jobj_ != nullptr); - - // Force non-virtual call to AbstractClass foo, will throw AbstractMethodError exception. - env_->CallNonvirtualVoidMethod(jobj_, class_, mid_); - - EXPECT_EQ(env_->ExceptionCheck(), JNI_TRUE); - jthrowable exception = env_->ExceptionOccurred(); - env_->ExceptionClear(); - jclass jlame = env_->FindClass("java/lang/AbstractMethodError"); - EXPECT_TRUE(env_->IsInstanceOf(exception, jlame)); - { - ScopedObjectAccess soa(Thread::Current()); - Thread::Current()->ClearException(); - } -} - -class CompilerDriverMethodsTest : public CompilerDriverTest { - protected: - std::unordered_set<std::string>* GetCompiledMethods() OVERRIDE { - return new std::unordered_set<std::string>({ - "byte StaticLeafMethods.identity(byte)", - "int StaticLeafMethods.sum(int, int, int)", - "double StaticLeafMethods.sum(double, double, double, double)" - }); - } -}; - -TEST_F(CompilerDriverMethodsTest, Selection) { - Thread* self = Thread::Current(); - jobject class_loader; - { - ScopedObjectAccess soa(self); - class_loader = LoadDex("StaticLeafMethods"); - } - ASSERT_NE(class_loader, nullptr); - - // Need to enable dex-file writability. Methods rejected to be compiled will run through the - // dex-to-dex compiler. - for (const DexFile* dex_file : GetDexFiles(class_loader)) { - ASSERT_TRUE(dex_file->EnableWrite()); - } - - CompileAll(class_loader); - - ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); - ScopedObjectAccess soa(self); - StackHandleScope<1> hs(self); - Handle<mirror::ClassLoader> h_loader( - hs.NewHandle(soa.Decode<mirror::ClassLoader>(class_loader))); - mirror::Class* klass = class_linker->FindClass(self, "LStaticLeafMethods;", h_loader); - ASSERT_NE(klass, nullptr); - - std::unique_ptr<std::unordered_set<std::string>> expected(GetCompiledMethods()); - - const auto pointer_size = class_linker->GetImagePointerSize(); - for (auto& m : klass->GetDirectMethods(pointer_size)) { - std::string name = m.PrettyMethod(true); - const void* code = m.GetEntryPointFromQuickCompiledCodePtrSize(pointer_size); - ASSERT_NE(code, nullptr); - if (expected->find(name) != expected->end()) { - expected->erase(name); - EXPECT_FALSE(class_linker->IsQuickToInterpreterBridge(code)); - } else { - EXPECT_TRUE(class_linker->IsQuickToInterpreterBridge(code)); - } - } - EXPECT_TRUE(expected->empty()); -} - -class CompilerDriverProfileTest : public CompilerDriverTest { - protected: - ProfileCompilationInfo* GetProfileCompilationInfo() OVERRIDE { - ScopedObjectAccess soa(Thread::Current()); - std::vector<std::unique_ptr<const DexFile>> dex_files = OpenTestDexFiles("ProfileTestMultiDex"); - - ProfileCompilationInfo info; - for (const std::unique_ptr<const DexFile>& dex_file : dex_files) { - profile_info_.AddMethodIndex(ProfileCompilationInfo::MethodHotness::kFlagHot, - MethodReference(dex_file.get(), 1)); - profile_info_.AddMethodIndex(ProfileCompilationInfo::MethodHotness::kFlagHot, - MethodReference(dex_file.get(), 2)); - } - return &profile_info_; - } - - CompilerFilter::Filter GetCompilerFilter() const OVERRIDE { - // Use a profile based filter. - return CompilerFilter::kSpeedProfile; - } - - std::unordered_set<std::string> GetExpectedMethodsForClass(const std::string& clazz) { - if (clazz == "Main") { - return std::unordered_set<std::string>({ - "java.lang.String Main.getA()", - "java.lang.String Main.getB()"}); - } else if (clazz == "Second") { - return std::unordered_set<std::string>({ - "java.lang.String Second.getX()", - "java.lang.String Second.getY()"}); - } else { - return std::unordered_set<std::string>(); - } - } - - void CheckCompiledMethods(jobject class_loader, - const std::string& clazz, - const std::unordered_set<std::string>& expected_methods) { - ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); - Thread* self = Thread::Current(); - ScopedObjectAccess soa(self); - StackHandleScope<1> hs(self); - Handle<mirror::ClassLoader> h_loader( - hs.NewHandle(soa.Decode<mirror::ClassLoader>(class_loader))); - mirror::Class* klass = class_linker->FindClass(self, clazz.c_str(), h_loader); - ASSERT_NE(klass, nullptr); - - const auto pointer_size = class_linker->GetImagePointerSize(); - size_t number_of_compiled_methods = 0; - for (auto& m : klass->GetVirtualMethods(pointer_size)) { - std::string name = m.PrettyMethod(true); - const void* code = m.GetEntryPointFromQuickCompiledCodePtrSize(pointer_size); - ASSERT_NE(code, nullptr); - if (expected_methods.find(name) != expected_methods.end()) { - number_of_compiled_methods++; - EXPECT_FALSE(class_linker->IsQuickToInterpreterBridge(code)); - } else { - EXPECT_TRUE(class_linker->IsQuickToInterpreterBridge(code)); - } - } - EXPECT_EQ(expected_methods.size(), number_of_compiled_methods); - } - - private: - ProfileCompilationInfo profile_info_; -}; - -TEST_F(CompilerDriverProfileTest, ProfileGuidedCompilation) { - Thread* self = Thread::Current(); - jobject class_loader; - { - ScopedObjectAccess soa(self); - class_loader = LoadDex("ProfileTestMultiDex"); - } - ASSERT_NE(class_loader, nullptr); - - // Need to enable dex-file writability. Methods rejected to be compiled will run through the - // dex-to-dex compiler. - for (const DexFile* dex_file : GetDexFiles(class_loader)) { - ASSERT_TRUE(dex_file->EnableWrite()); - } - - CompileAll(class_loader); - - std::unordered_set<std::string> m = GetExpectedMethodsForClass("Main"); - std::unordered_set<std::string> s = GetExpectedMethodsForClass("Second"); - CheckCompiledMethods(class_loader, "LMain;", m); - CheckCompiledMethods(class_loader, "LSecond;", s); -} - -// Test that a verify only compiler filter updates the CompiledClass map, -// which will be used for OatClass. -class CompilerDriverVerifyTest : public CompilerDriverTest { - protected: - CompilerFilter::Filter GetCompilerFilter() const OVERRIDE { - return CompilerFilter::kVerify; - } - - void CheckVerifiedClass(jobject class_loader, const std::string& clazz) const { - ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); - Thread* self = Thread::Current(); - ScopedObjectAccess soa(self); - StackHandleScope<1> hs(self); - Handle<mirror::ClassLoader> h_loader( - hs.NewHandle(soa.Decode<mirror::ClassLoader>(class_loader))); - mirror::Class* klass = class_linker->FindClass(self, clazz.c_str(), h_loader); - ASSERT_NE(klass, nullptr); - EXPECT_TRUE(klass->IsVerified()); - - ClassStatus status; - bool found = compiler_driver_->GetCompiledClass( - ClassReference(&klass->GetDexFile(), klass->GetDexTypeIndex().index_), &status); - ASSERT_TRUE(found); - EXPECT_EQ(status, ClassStatus::kVerified); - } -}; - -TEST_F(CompilerDriverVerifyTest, VerifyCompilation) { - Thread* self = Thread::Current(); - jobject class_loader; - { - ScopedObjectAccess soa(self); - class_loader = LoadDex("ProfileTestMultiDex"); - } - ASSERT_NE(class_loader, nullptr); - - CompileAll(class_loader); - - CheckVerifiedClass(class_loader, "LMain;"); - CheckVerifiedClass(class_loader, "LSecond;"); -} - -// Test that a class of status ClassStatus::kRetryVerificationAtRuntime is indeed -// recorded that way in the driver. -TEST_F(CompilerDriverVerifyTest, RetryVerifcationStatusCheckVerified) { - Thread* const self = Thread::Current(); - jobject class_loader; - std::vector<const DexFile*> dex_files; - const DexFile* dex_file = nullptr; - { - ScopedObjectAccess soa(self); - class_loader = LoadDex("ProfileTestMultiDex"); - ASSERT_NE(class_loader, nullptr); - dex_files = GetDexFiles(class_loader); - ASSERT_GT(dex_files.size(), 0u); - dex_file = dex_files.front(); - } - compiler_driver_->SetDexFilesForOatFile(dex_files); - callbacks_->SetDoesClassUnloading(true, compiler_driver_.get()); - ClassReference ref(dex_file, 0u); - // Test that the status is read from the compiler driver as expected. - static_assert(enum_cast<size_t>(ClassStatus::kLast) < std::numeric_limits<size_t>::max(), - "Make sure incrementing the class status does not overflow."); - for (size_t i = enum_cast<size_t>(ClassStatus::kRetryVerificationAtRuntime); - i <= enum_cast<size_t>(ClassStatus::kLast); - ++i) { - const ClassStatus expected_status = enum_cast<ClassStatus>(i); - // Skip unsupported status that are not supposed to be ever recorded. - if (expected_status == ClassStatus::kVerifyingAtRuntime || - expected_status == ClassStatus::kInitializing) { - continue; - } - compiler_driver_->RecordClassStatus(ref, expected_status); - ClassStatus status = {}; - ASSERT_TRUE(compiler_driver_->GetCompiledClass(ref, &status)); - EXPECT_EQ(status, expected_status); - } -} - -// TODO: need check-cast test (when stub complete & we can throw/catch - -} // namespace art diff --git a/compiler/driver/compiler_options.cc b/compiler/driver/compiler_options.cc index 2d82d79c4a..6f39488cc7 100644 --- a/compiler/driver/compiler_options.cc +++ b/compiler/driver/compiler_options.cc @@ -17,14 +17,23 @@ #include "compiler_options.h" #include <fstream> +#include <string_view> #include "android-base/stringprintf.h" +#include "android-base/strings.h" +#include "arch/instruction_set.h" +#include "arch/instruction_set_features.h" #include "base/runtime_debug.h" #include "base/variant_map.h" +#include "class_linker.h" #include "cmdline_parser.h" #include "compiler_options_map-inl.h" +#include "dex/dex_file-inl.h" +#include "dex/verification_results.h" +#include "dex/verified_method.h" #include "runtime.h" +#include "scoped_thread_state_change-inl.h" #include "simple_compiler_options_map.h" namespace art { @@ -37,11 +46,15 @@ CompilerOptions::CompilerOptions() tiny_method_threshold_(kDefaultTinyMethodThreshold), num_dex_methods_threshold_(kDefaultNumDexMethodsThreshold), inline_max_code_units_(kUnsetInlineMaxCodeUnits), - no_inline_from_(nullptr), - boot_image_(false), - core_image_(false), - app_image_(false), - top_k_profile_threshold_(kDefaultTopKProfileThreshold), + instruction_set_(kRuntimeISA == InstructionSet::kArm ? InstructionSet::kThumb2 : kRuntimeISA), + instruction_set_features_(nullptr), + no_inline_from_(), + dex_files_for_oat_file_(), + image_classes_(), + verification_results_(nullptr), + image_type_(ImageType::kNone), + compiling_with_core_image_(false), + baseline_(false), debuggable_(false), generate_debug_info_(kDefaultGenerateDebugInfo), generate_mini_debug_info_(kDefaultGenerateMiniDebugInfo), @@ -51,7 +64,10 @@ CompilerOptions::CompilerOptions() implicit_suspend_checks_(false), compile_pic_(false), dump_timings_(false), + dump_pass_timings_(false), dump_stats_(false), + top_k_profile_threshold_(kDefaultTopKProfileThreshold), + profile_compilation_info_(nullptr), verbose_methods_(), abort_on_hard_verifier_failure_(false), abort_on_soft_verifier_failure_(false), @@ -61,13 +77,16 @@ CompilerOptions::CompilerOptions() force_determinism_(false), deduplicate_code_(true), count_hotness_in_compiled_code_(false), + resolve_startup_const_strings_(false), + check_profiled_methods_(ProfileMethodsCheck::kNone), + max_image_block_size_(std::numeric_limits<uint32_t>::max()), register_allocation_strategy_(RegisterAllocator::kRegisterAllocatorDefault), passes_to_run_(nullptr) { } CompilerOptions::~CompilerOptions() { - // The destructor looks empty but it destroys a PassManagerOptions object. We keep it here - // because we don't want to include the PassManagerOptions definition from the header file. + // Everything done by member destructors. + // The definitions of classes forward-declared in the header have now been #included. } namespace { @@ -109,9 +128,6 @@ bool CompilerOptions::ParseRegisterAllocationStrategy(const std::string& option, return true; } -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wframe-larger-than=" - bool CompilerOptions::ParseCompilerOptions(const std::vector<std::string>& options, bool ignore_unrecognized, std::string* error_msg) { @@ -126,6 +142,62 @@ bool CompilerOptions::ParseCompilerOptions(const std::vector<std::string>& optio return ReadCompilerOptions(args, this, error_msg); } -#pragma GCC diagnostic pop +bool CompilerOptions::IsImageClass(const char* descriptor) const { + // Historical note: We used to hold the set indirectly and there was a distinction between an + // empty set and a null, null meaning to include all classes. However, the distiction has been + // removed; if we don't have a profile, we treat it as an empty set of classes. b/77340429 + return image_classes_.find(std::string_view(descriptor)) != image_classes_.end(); +} + +const VerificationResults* CompilerOptions::GetVerificationResults() const { + DCHECK(Runtime::Current()->IsAotCompiler()); + return verification_results_; +} + +const VerifiedMethod* CompilerOptions::GetVerifiedMethod(const DexFile* dex_file, + uint32_t method_idx) const { + MethodReference ref(dex_file, method_idx); + return verification_results_->GetVerifiedMethod(ref); +} + +bool CompilerOptions::IsMethodVerifiedWithoutFailures(uint32_t method_idx, + uint16_t class_def_idx, + const DexFile& dex_file) const { + const VerifiedMethod* verified_method = GetVerifiedMethod(&dex_file, method_idx); + if (verified_method != nullptr) { + return !verified_method->HasVerificationFailures(); + } + + // If we can't find verification metadata, check if this is a system class (we trust that system + // classes have their methods verified). If it's not, be conservative and assume the method + // has not been verified successfully. + + // TODO: When compiling the boot image it should be safe to assume that everything is verified, + // even if methods are not found in the verification cache. + const char* descriptor = dex_file.GetClassDescriptor(dex_file.GetClassDef(class_def_idx)); + ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); + Thread* self = Thread::Current(); + ScopedObjectAccess soa(self); + bool is_system_class = class_linker->FindSystemClass(self, descriptor) != nullptr; + if (!is_system_class) { + self->ClearException(); + } + return is_system_class; +} + +bool CompilerOptions::IsCoreImageFilename(const std::string& boot_image_filename) { + // Look for "core.art" or "core-*.art". + if (android::base::EndsWith(boot_image_filename, "core.art")) { + return true; + } + if (!android::base::EndsWith(boot_image_filename, ".art")) { + return false; + } + size_t slash_pos = boot_image_filename.rfind('/'); + if (slash_pos == std::string::npos) { + return android::base::StartsWith(boot_image_filename, "core-"); + } + return boot_image_filename.compare(slash_pos + 1, 5u, "core-") == 0; +} } // namespace art diff --git a/compiler/driver/compiler_options.h b/compiler/driver/compiler_options.h index 05d8805e81..0ab5ff1907 100644 --- a/compiler/driver/compiler_options.h +++ b/compiler/driver/compiler_options.h @@ -17,25 +17,47 @@ #ifndef ART_COMPILER_DRIVER_COMPILER_OPTIONS_H_ #define ART_COMPILER_DRIVER_COMPILER_OPTIONS_H_ +#include <memory> #include <ostream> #include <string> #include <vector> +#include "base/globals.h" +#include "base/hash_set.h" #include "base/macros.h" #include "base/utils.h" #include "compiler_filter.h" -#include "globals.h" #include "optimizing/register_allocator.h" namespace art { +namespace jit { +class JitCompiler; +} // namespace jit + namespace verifier { class VerifierDepsTest; } // namespace verifier +namespace linker { +class Arm64RelativePatcherTest; +} // namespace linker + class DexFile; +enum class InstructionSet; +class InstructionSetFeatures; +class ProfileCompilationInfo; +class VerificationResults; +class VerifiedMethod; + +// Enum for CheckProfileMethodsCompiled. Outside CompilerOptions so it can be forward-declared. +enum class ProfileMethodsCheck : uint8_t { + kNone, + kLog, + kAbort, +}; -class CompilerOptions FINAL { +class CompilerOptions final { public: // Guide heuristics to determine whether to compile method if profile data not available. static const size_t kDefaultHugeMethodThreshold = 10000; @@ -49,6 +71,13 @@ class CompilerOptions FINAL { static const size_t kDefaultInlineMaxCodeUnits = 32; static constexpr size_t kUnsetInlineMaxCodeUnits = -1; + enum class ImageType : uint8_t { + kNone, // JIT or AOT app compilation producing only an oat file but no image. + kBootImage, // Creating boot image. + kAppImage, // Creating app image. + kApexBootImage, // Creating the apex image for jit/zygote experiment b/119800099. + }; + CompilerOptions(); ~CompilerOptions(); @@ -182,23 +211,27 @@ class CompilerOptions FINAL { // Are we compiling a boot image? bool IsBootImage() const { - return boot_image_; + return image_type_ == ImageType::kBootImage || image_type_ == ImageType::kApexBootImage; + } + + bool IsApexBootImage() const { + return image_type_ == ImageType::kApexBootImage; } - // Are we compiling a core image (small boot image only used for ART testing)? - bool IsCoreImage() const { - // Ensure that `core_image_` => `boot_image_`. - DCHECK(!core_image_ || boot_image_); - return core_image_; + bool IsBaseline() const { + return baseline_; } // Are we compiling an app image? bool IsAppImage() const { - return app_image_; + return image_type_ == ImageType::kAppImage; } - void DisableAppImage() { - app_image_ = false; + // Returns whether we are compiling against a "core" image, which + // is an indicative we are running tests. The compiler will use that + // information for checking invariants. + bool CompilingWithCoreImage() const { + return compiling_with_core_image_; } // Should the code be compiled as position independent? @@ -206,6 +239,10 @@ class CompilerOptions FINAL { return compile_pic_; } + const ProfileCompilationInfo* GetProfileCompilationInfo() const { + return profile_compilation_info_; + } + bool HasVerboseMethods() const { return !verbose_methods_.empty(); } @@ -230,10 +267,39 @@ class CompilerOptions FINAL { return abort_on_soft_verifier_failure_; } - const std::vector<const DexFile*>* GetNoInlineFromDexFile() const { + InstructionSet GetInstructionSet() const { + return instruction_set_; + } + + const InstructionSetFeatures* GetInstructionSetFeatures() const { + return instruction_set_features_.get(); + } + + + const std::vector<const DexFile*>& GetNoInlineFromDexFile() const { return no_inline_from_; } + const std::vector<const DexFile*>& GetDexFilesForOatFile() const { + return dex_files_for_oat_file_; + } + + const HashSet<std::string>& GetImageClasses() const { + return image_classes_; + } + + bool IsImageClass(const char* descriptor) const; + + const VerificationResults* GetVerificationResults() const; + + const VerifiedMethod* GetVerifiedMethod(const DexFile* dex_file, uint32_t method_idx) const; + + // Checks if the specified method has been verified without failures. Returns + // false if the method is not in the verification results (GetVerificationResults). + bool IsMethodVerifiedWithoutFailures(uint32_t method_idx, + uint16_t class_def_idx, + const DexFile& dex_file) const; + bool ParseCompilerOptions(const std::vector<std::string>& options, bool ignore_unrecognized, std::string* error_msg); @@ -270,6 +336,10 @@ class CompilerOptions FINAL { return dump_timings_; } + bool GetDumpPassTimings() const { + return dump_pass_timings_; + } + bool GetDumpStats() const { return dump_stats_; } @@ -278,15 +348,28 @@ class CompilerOptions FINAL { return count_hotness_in_compiled_code_; } + bool ResolveStartupConstStrings() const { + return resolve_startup_const_strings_; + } + + ProfileMethodsCheck CheckProfiledMethodsCompiled() const { + return check_profiled_methods_; + } + + uint32_t MaxImageBlockSize() const { + return max_image_block_size_; + } + + void SetMaxImageBlockSize(uint32_t size) { + max_image_block_size_ = size; + } + + // Is `boot_image_filename` the name of a core image (small boot + // image used for ART testing only)? + static bool IsCoreImageFilename(const std::string& boot_image_filename); + private: bool ParseDumpInitFailures(const std::string& option, std::string* error_msg); - void ParseDumpCfgPasses(const StringPiece& option, UsageFn Usage); - void ParseInlineMaxCodeUnits(const StringPiece& option, UsageFn Usage); - void ParseNumDexMethods(const StringPiece& option, UsageFn Usage); - void ParseTinyMethodMax(const StringPiece& option, UsageFn Usage); - void ParseSmallMethodMax(const StringPiece& option, UsageFn Usage); - void ParseLargeMethodMax(const StringPiece& option, UsageFn Usage); - void ParseHugeMethodMax(const StringPiece& option, UsageFn Usage); bool ParseRegisterAllocationStrategy(const std::string& option, std::string* error_msg); CompilerFilter::Filter compiler_filter_; @@ -297,16 +380,27 @@ class CompilerOptions FINAL { size_t num_dex_methods_threshold_; size_t inline_max_code_units_; - // Dex files from which we should not inline code. + InstructionSet instruction_set_; + std::unique_ptr<const InstructionSetFeatures> instruction_set_features_; + + // Dex files from which we should not inline code. Does not own the dex files. // This is usually a very short list (i.e. a single dex file), so we // prefer vector<> over a lookup-oriented container, such as set<>. - const std::vector<const DexFile*>* no_inline_from_; + std::vector<const DexFile*> no_inline_from_; - bool boot_image_; - bool core_image_; - bool app_image_; - // When using a profile file only the top K% of the profiled samples will be compiled. - double top_k_profile_threshold_; + // List of dex files associated with the oat file, empty for JIT. + std::vector<const DexFile*> dex_files_for_oat_file_; + + // Image classes, specifies the classes that will be included in the image if creating an image. + // Must not be empty for real boot image, only for tests pretending to compile boot image. + HashSet<std::string> image_classes_; + + // Results of AOT verification. + const VerificationResults* verification_results_; + + ImageType image_type_; + bool compiling_with_core_image_; + bool baseline_; bool debuggable_; bool generate_debug_info_; bool generate_mini_debug_info_; @@ -316,8 +410,15 @@ class CompilerOptions FINAL { bool implicit_suspend_checks_; bool compile_pic_; bool dump_timings_; + bool dump_pass_timings_; bool dump_stats_; + // When using a profile file only the top K% of the profiled samples will be compiled. + double top_k_profile_threshold_; + + // Info for profile guided compilation. + const ProfileCompilationInfo* profile_compilation_info_; + // Vector of methods to have verbose output enabled for. std::vector<std::string> verbose_methods_; @@ -344,6 +445,17 @@ class CompilerOptions FINAL { // won't be atomic for performance reasons, so we accept races, just like in interpreter. bool count_hotness_in_compiled_code_; + // Whether we eagerly resolve all of the const strings that are loaded from startup methods in the + // profile. + bool resolve_startup_const_strings_; + + // When running profile-guided compilation, check that methods intended to be compiled end + // up compiled and are not punted. + ProfileMethodsCheck check_profiled_methods_; + + // Maximum solid block size in the generated image. + uint32_t max_image_block_size_; + RegisterAllocator::Strategy register_allocation_strategy_; // If not null, specifies optimization passes which will be run instead of defaults. @@ -356,8 +468,11 @@ class CompilerOptions FINAL { friend class Dex2Oat; friend class DexToDexDecompilerTest; + friend class CommonCompilerDriverTest; friend class CommonCompilerTest; + friend class jit::JitCompiler; friend class verifier::VerifierDepsTest; + friend class linker::Arm64RelativePatcherTest; template <class Base> friend bool ReadCompilerOptions(Base& map, CompilerOptions* options, std::string* error_msg); diff --git a/compiler/driver/compiler_options_map-inl.h b/compiler/driver/compiler_options_map-inl.h index 3b18db09fc..7e2a64b52b 100644 --- a/compiler/driver/compiler_options_map-inl.h +++ b/compiler/driver/compiler_options_map-inl.h @@ -43,9 +43,6 @@ inline bool ReadCompilerOptions(Base& map, CompilerOptions* options, std::string } options->SetCompilerFilter(compiler_filter); } - if (map.Exists(Base::PIC)) { - options->compile_pic_ = true; - } map.AssignIfExists(Base::HugeMethodMaxThreshold, &options->huge_method_threshold_); map.AssignIfExists(Base::LargeMethodMaxThreshold, &options->large_method_threshold_); map.AssignIfExists(Base::SmallMethodMaxThreshold, &options->small_method_threshold_); @@ -58,6 +55,9 @@ inline bool ReadCompilerOptions(Base& map, CompilerOptions* options, std::string if (map.Exists(Base::Debuggable)) { options->debuggable_ = true; } + if (map.Exists(Base::Baseline)) { + options->baseline_ = true; + } map.AssignIfExists(Base::TopKProfileThreshold, &options->top_k_profile_threshold_); map.AssignIfExists(Base::AbortOnHardVerifierFailure, &options->abort_on_hard_verifier_failure_); map.AssignIfExists(Base::AbortOnSoftVerifierFailure, &options->abort_on_soft_verifier_failure_); @@ -80,11 +80,20 @@ inline bool ReadCompilerOptions(Base& map, CompilerOptions* options, std::string if (map.Exists(Base::CountHotnessInCompiledCode)) { options->count_hotness_in_compiled_code_ = true; } + map.AssignIfExists(Base::ResolveStartupConstStrings, &options->resolve_startup_const_strings_); + if (map.Exists(Base::CheckProfiledMethods)) { + options->check_profiled_methods_ = *map.Get(Base::CheckProfiledMethods); + } + map.AssignIfExists(Base::MaxImageBlockSize, &options->max_image_block_size_); if (map.Exists(Base::DumpTimings)) { options->dump_timings_ = true; } + if (map.Exists(Base::DumpPassTimings)) { + options->dump_pass_timings_ = true; + } + if (map.Exists(Base::DumpStats)) { options->dump_stats_ = true; } @@ -102,9 +111,6 @@ inline void AddCompilerOptionsArgumentParserOptions(Builder& b) { .template WithType<std::string>() .IntoKey(Map::CompilerFilter) - .Define("--compile-pic") - .IntoKey(Map::PIC) - .Define("--huge-method-max=_") .template WithType<unsigned int>() .IntoKey(Map::HugeMethodMaxThreshold) @@ -143,15 +149,27 @@ inline void AddCompilerOptionsArgumentParserOptions(Builder& b) { .Define({"--count-hotness-in-compiled-code"}) .IntoKey(Map::CountHotnessInCompiledCode) + .Define({"--check-profiled-methods=_"}) + .template WithType<ProfileMethodsCheck>() + .WithValueMap({{"log", ProfileMethodsCheck::kLog}, + {"abort", ProfileMethodsCheck::kAbort}}) + .IntoKey(Map::CheckProfiledMethods) + .Define({"--dump-timings"}) .IntoKey(Map::DumpTimings) + .Define({"--dump-pass-timings"}) + .IntoKey(Map::DumpPassTimings) + .Define({"--dump-stats"}) .IntoKey(Map::DumpStats) .Define("--debuggable") .IntoKey(Map::Debuggable) + .Define("--baseline") + .IntoKey(Map::Baseline) + .Define("--top-k-profile-threshold=_") .template WithType<double>().WithRange(0.0, 100.0) .IntoKey(Map::TopKProfileThreshold) @@ -177,9 +195,18 @@ inline void AddCompilerOptionsArgumentParserOptions(Builder& b) { .template WithType<std::string>() .IntoKey(Map::RegisterAllocationStrategy) + .Define("--resolve-startup-const-strings=_") + .template WithType<bool>() + .WithValueMap({{"false", false}, {"true", true}}) + .IntoKey(Map::ResolveStartupConstStrings) + .Define("--verbose-methods=_") .template WithType<ParseStringList<','>>() - .IntoKey(Map::VerboseMethods); + .IntoKey(Map::VerboseMethods) + + .Define("--max-image-block-size=_") + .template WithType<unsigned int>() + .IntoKey(Map::MaxImageBlockSize); } #pragma GCC diagnostic pop diff --git a/compiler/driver/compiler_options_map.def b/compiler/driver/compiler_options_map.def index acddae7299..0a9c873988 100644 --- a/compiler/driver/compiler_options_map.def +++ b/compiler/driver/compiler_options_map.def @@ -48,18 +48,23 @@ COMPILER_OPTIONS_KEY (bool, GenerateDebugInfo) COMPILER_OPTIONS_KEY (bool, GenerateMiniDebugInfo) COMPILER_OPTIONS_KEY (bool, GenerateBuildID) COMPILER_OPTIONS_KEY (Unit, Debuggable) +COMPILER_OPTIONS_KEY (Unit, Baseline) COMPILER_OPTIONS_KEY (double, TopKProfileThreshold) COMPILER_OPTIONS_KEY (bool, AbortOnHardVerifierFailure) COMPILER_OPTIONS_KEY (bool, AbortOnSoftVerifierFailure) +COMPILER_OPTIONS_KEY (bool, ResolveStartupConstStrings, false) COMPILER_OPTIONS_KEY (std::string, DumpInitFailures) COMPILER_OPTIONS_KEY (std::string, DumpCFG) COMPILER_OPTIONS_KEY (Unit, DumpCFGAppend) // TODO: Add type parser. COMPILER_OPTIONS_KEY (std::string, RegisterAllocationStrategy) COMPILER_OPTIONS_KEY (ParseStringList<','>, VerboseMethods) -COMPILER_OPTIONS_KEY (bool, DeduplicateCode, true) +COMPILER_OPTIONS_KEY (bool, DeduplicateCode, true) COMPILER_OPTIONS_KEY (Unit, CountHotnessInCompiledCode) +COMPILER_OPTIONS_KEY (ProfileMethodsCheck, CheckProfiledMethods) COMPILER_OPTIONS_KEY (Unit, DumpTimings) +COMPILER_OPTIONS_KEY (Unit, DumpPassTimings) COMPILER_OPTIONS_KEY (Unit, DumpStats) +COMPILER_OPTIONS_KEY (unsigned int, MaxImageBlockSize) #undef COMPILER_OPTIONS_KEY diff --git a/compiler/driver/compiler_options_map.h b/compiler/driver/compiler_options_map.h index b9bc8b6ea1..af212d66a1 100644 --- a/compiler/driver/compiler_options_map.h +++ b/compiler/driver/compiler_options_map.h @@ -25,6 +25,8 @@ namespace art { +enum class ProfileMethodsCheck : uint8_t; + // Defines a type-safe heterogeneous key->value map. This is to be used as the base for // an extended map. template <typename Base, template <typename TV> class KeyType> diff --git a/compiler/driver/dex_compilation_unit.cc b/compiler/driver/dex_compilation_unit.cc index c90c37d54a..0d0f074917 100644 --- a/compiler/driver/dex_compilation_unit.cc +++ b/compiler/driver/dex_compilation_unit.cc @@ -16,22 +16,27 @@ #include "dex_compilation_unit.h" +#include "art_field.h" #include "base/utils.h" +#include "dex/class_accessor-inl.h" #include "dex/code_item_accessors-inl.h" #include "dex/descriptors_names.h" +#include "mirror/class-inl.h" #include "mirror/dex_cache.h" +#include "scoped_thread_state_change-inl.h" namespace art { DexCompilationUnit::DexCompilationUnit(Handle<mirror::ClassLoader> class_loader, ClassLinker* class_linker, const DexFile& dex_file, - const DexFile::CodeItem* code_item, + const dex::CodeItem* code_item, uint16_t class_def_idx, uint32_t method_idx, uint32_t access_flags, const VerifiedMethod* verified_method, - Handle<mirror::DexCache> dex_cache) + Handle<mirror::DexCache> dex_cache, + Handle<mirror::Class> compiling_class) : class_loader_(class_loader), class_linker_(class_linker), dex_file_(&dex_file), @@ -41,7 +46,8 @@ DexCompilationUnit::DexCompilationUnit(Handle<mirror::ClassLoader> class_loader, access_flags_(access_flags), verified_method_(verified_method), dex_cache_(dex_cache), - code_item_accessor_(dex_file, code_item) {} + code_item_accessor_(dex_file, code_item), + compiling_class_(compiling_class) {} const std::string& DexCompilationUnit::GetSymbol() { if (symbol_.empty()) { @@ -51,4 +57,32 @@ const std::string& DexCompilationUnit::GetSymbol() { return symbol_; } +bool DexCompilationUnit::RequiresConstructorBarrier() const { + // Constructor barriers are applicable only for <init> methods. + DCHECK(!IsStatic()); + DCHECK(IsConstructor()); + + // We require a constructor barrier if there are final instance fields. + if (GetCompilingClass().GetReference() != nullptr && !GetCompilingClass().IsNull()) { + // Decoding class data can be slow, so iterate over fields of the compiling class if resolved. + ScopedObjectAccess soa(Thread::Current()); + ObjPtr<mirror::Class> compiling_class = GetCompilingClass().Get(); + for (size_t i = 0, size = compiling_class->NumInstanceFields(); i != size; ++i) { + ArtField* field = compiling_class->GetInstanceField(i); + if (field->IsFinal()) { + return true; + } + } + } else { + // Iterate over field definitions in the class data. + ClassAccessor accessor(*GetDexFile(), GetClassDefIndex()); + for (const ClassAccessor::Field& field : accessor.GetInstanceFields()) { + if (field.IsFinal()) { + return true; + } + } + } + return false; +} + } // namespace art diff --git a/compiler/driver/dex_compilation_unit.h b/compiler/driver/dex_compilation_unit.h index c1ae3c938b..def90fa4e1 100644 --- a/compiler/driver/dex_compilation_unit.h +++ b/compiler/driver/dex_compilation_unit.h @@ -23,10 +23,10 @@ #include "dex/code_item_accessors.h" #include "dex/dex_file.h" #include "handle.h" -#include "jni.h" namespace art { namespace mirror { +class Class; class ClassLoader; class DexCache; } // namespace mirror @@ -38,12 +38,13 @@ class DexCompilationUnit : public DeletableArenaObject<kArenaAllocMisc> { DexCompilationUnit(Handle<mirror::ClassLoader> class_loader, ClassLinker* class_linker, const DexFile& dex_file, - const DexFile::CodeItem* code_item, + const dex::CodeItem* code_item, uint16_t class_def_idx, uint32_t method_idx, uint32_t access_flags, const VerifiedMethod* verified_method, - Handle<mirror::DexCache> dex_cache); + Handle<mirror::DexCache> dex_cache, + Handle<mirror::Class> compiling_class = Handle<mirror::Class>()); Handle<mirror::ClassLoader> GetClassLoader() const { return class_loader_; @@ -65,17 +66,17 @@ class DexCompilationUnit : public DeletableArenaObject<kArenaAllocMisc> { return dex_method_idx_; } - const DexFile::CodeItem* GetCodeItem() const { + const dex::CodeItem* GetCodeItem() const { return code_item_; } const char* GetShorty() const { - const DexFile::MethodId& method_id = dex_file_->GetMethodId(dex_method_idx_); + const dex::MethodId& method_id = dex_file_->GetMethodId(dex_method_idx_); return dex_file_->GetMethodShorty(method_id); } const char* GetShorty(uint32_t* shorty_len) const { - const DexFile::MethodId& method_id = dex_file_->GetMethodId(dex_method_idx_); + const dex::MethodId& method_id = dex_file_->GetMethodId(dex_method_idx_); return dex_file_->GetMethodShorty(method_id, shorty_len); } @@ -117,6 +118,45 @@ class DexCompilationUnit : public DeletableArenaObject<kArenaAllocMisc> { return code_item_accessor_; } + Handle<mirror::Class> GetCompilingClass() const { + return compiling_class_; + } + + // Does this <init> method require a constructor barrier (prior to the return)? + // The answer is "yes", if and only if the class has any instance final fields. + // (This must not be called for any non-<init> methods; the answer would be "no"). + // + // --- + // + // JLS 17.5.1 "Semantics of final fields" mandates that all final fields are frozen at the end + // of the invoked constructor. The constructor barrier is a conservative implementation means of + // enforcing the freezes happen-before the object being constructed is observable by another + // thread. + // + // Note: This question only makes sense for instance constructors; + // static constructors (despite possibly having finals) never need + // a barrier. + // + // JLS 12.4.2 "Detailed Initialization Procedure" approximately describes + // class initialization as: + // + // lock(class.lock) + // class.state = initializing + // unlock(class.lock) + // + // invoke <clinit> + // + // lock(class.lock) + // class.state = initialized + // unlock(class.lock) <-- acts as a release + // + // The last operation in the above example acts as an atomic release + // for any stores in <clinit>, which ends up being stricter + // than what a constructor barrier needs. + // + // See also QuasiAtomic::ThreadFenceForConstructor(). + bool RequiresConstructorBarrier() const; + private: const Handle<mirror::ClassLoader> class_loader_; @@ -124,7 +164,7 @@ class DexCompilationUnit : public DeletableArenaObject<kArenaAllocMisc> { const DexFile* const dex_file_; - const DexFile::CodeItem* const code_item_; + const dex::CodeItem* const code_item_; const uint16_t class_def_idx_; const uint32_t dex_method_idx_; const uint32_t access_flags_; @@ -134,6 +174,8 @@ class DexCompilationUnit : public DeletableArenaObject<kArenaAllocMisc> { const CodeItemDataAccessor code_item_accessor_; + Handle<mirror::Class> compiling_class_; + std::string symbol_; }; diff --git a/compiler/driver/simple_compiler_options_map.h b/compiler/driver/simple_compiler_options_map.h index 3860da9f66..e7a51a4995 100644 --- a/compiler/driver/simple_compiler_options_map.h +++ b/compiler/driver/simple_compiler_options_map.h @@ -50,7 +50,7 @@ using Parser = CmdlineParser<SimpleParseArgumentMap, SimpleParseArgumentMapKey>; static inline Parser CreateSimpleParser(bool ignore_unrecognized) { std::unique_ptr<Parser::Builder> parser_builder = - std::unique_ptr<Parser::Builder>(new Parser::Builder()); + std::make_unique<Parser::Builder>(); AddCompilerOptionsArgumentParserOptions<SimpleParseArgumentMap>(*parser_builder); diff --git a/compiler/exception_test.cc b/compiler/exception_test.cc index f582341b18..633e124d07 100644 --- a/compiler/exception_test.cc +++ b/compiler/exception_test.cc @@ -15,11 +15,13 @@ */ #include <memory> +#include <type_traits> #include "base/arena_allocator.h" #include "base/callee_save_type.h" #include "base/enums.h" #include "base/leb128.h" +#include "base/malloc_arena_pool.h" #include "class_linker.h" #include "common_runtime_test.h" #include "dex/code_item_accessors-inl.h" @@ -31,8 +33,9 @@ #include "mirror/class-inl.h" #include "mirror/object-inl.h" #include "mirror/object_array-inl.h" -#include "mirror/stack_trace_element.h" +#include "mirror/stack_trace_element-inl.h" #include "oat_quick_method_header.h" +#include "obj_ptr-inl.h" #include "optimizing/stack_map_stream.h" #include "runtime-inl.h" #include "scoped_thread_state_change-inl.h" @@ -47,7 +50,7 @@ class ExceptionTest : public CommonRuntimeTest { // which always points to the first source statement. static constexpr const uint32_t kDexPc = 0; - virtual void SetUp() { + void SetUp() override { CommonRuntimeTest::SetUp(); ScopedObjectAccess soa(Thread::Current()); @@ -67,47 +70,37 @@ class ExceptionTest : public CommonRuntimeTest { fake_code_.push_back(0x70 | i); } - ArenaPool pool; + const uint32_t native_pc_offset = 4u; + CHECK_ALIGNED_PARAM(native_pc_offset, GetInstructionSetInstructionAlignment(kRuntimeISA)); + + MallocArenaPool pool; ArenaStack arena_stack(&pool); ScopedArenaAllocator allocator(&arena_stack); StackMapStream stack_maps(&allocator, kRuntimeISA); - stack_maps.BeginStackMapEntry(kDexPc, - /* native_pc_offset */ 3u, - /* register_mask */ 0u, - /* sp_mask */ nullptr, - /* num_dex_registers */ 0u, - /* inlining_depth */ 0u); + stack_maps.BeginMethod(4 * sizeof(void*), 0u, 0u, 0u); + stack_maps.BeginStackMapEntry(kDexPc, native_pc_offset); stack_maps.EndStackMapEntry(); - size_t stack_maps_size = stack_maps.PrepareForFillIn(); - size_t stack_maps_offset = stack_maps_size + sizeof(OatQuickMethodHeader); - - fake_header_code_and_maps_.resize(stack_maps_offset + fake_code_.size()); - MemoryRegion stack_maps_region(&fake_header_code_and_maps_[0], stack_maps_size); - stack_maps.FillInCodeInfo(stack_maps_region); - OatQuickMethodHeader method_header(stack_maps_offset, 0u, 4 * sizeof(void*), 0u, 0u, code_size); - memcpy(&fake_header_code_and_maps_[stack_maps_size], &method_header, sizeof(method_header)); - std::copy(fake_code_.begin(), - fake_code_.end(), - fake_header_code_and_maps_.begin() + stack_maps_offset); - - // Align the code. - const size_t alignment = GetInstructionSetAlignment(kRuntimeISA); - fake_header_code_and_maps_.reserve(fake_header_code_and_maps_.size() + alignment); - const void* unaligned_code_ptr = - fake_header_code_and_maps_.data() + (fake_header_code_and_maps_.size() - code_size); - size_t offset = dchecked_integral_cast<size_t>(reinterpret_cast<uintptr_t>(unaligned_code_ptr)); - size_t padding = RoundUp(offset, alignment) - offset; - // Make sure no resizing takes place. - CHECK_GE(fake_header_code_and_maps_.capacity(), fake_header_code_and_maps_.size() + padding); - fake_header_code_and_maps_.insert(fake_header_code_and_maps_.begin(), padding, 0); - const void* code_ptr = reinterpret_cast<const uint8_t*>(unaligned_code_ptr) + padding; - CHECK_EQ(code_ptr, - static_cast<const void*>(fake_header_code_and_maps_.data() + - (fake_header_code_and_maps_.size() - code_size))); + stack_maps.EndMethod(); + ScopedArenaVector<uint8_t> stack_map = stack_maps.Encode(); + + const size_t stack_maps_size = stack_map.size(); + const size_t header_size = sizeof(OatQuickMethodHeader); + const size_t code_alignment = GetInstructionSetAlignment(kRuntimeISA); + + fake_header_code_and_maps_.resize(stack_maps_size + header_size + code_size + code_alignment); + // NB: The start of the vector might not have been allocated the desired alignment. + uint8_t* code_ptr = + AlignUp(&fake_header_code_and_maps_[stack_maps_size + header_size], code_alignment); + + memcpy(&fake_header_code_and_maps_[0], stack_map.data(), stack_maps_size); + OatQuickMethodHeader method_header(code_ptr - fake_header_code_and_maps_.data(), code_size); + static_assert(std::is_trivially_copyable<OatQuickMethodHeader>::value, "Cannot use memcpy"); + memcpy(code_ptr - header_size, &method_header, header_size); + memcpy(code_ptr, fake_code_.data(), fake_code_.size()); if (kRuntimeISA == InstructionSet::kArm) { // Check that the Thumb2 adjustment will be a NOP, see EntryPointToCodePointer(). - CHECK_ALIGNED(stack_maps_offset, 2); + CHECK_ALIGNED(code_ptr, 2); } method_f_ = my_klass_->FindClassMethod("f", "()I", kRuntimePointerSize); @@ -130,7 +123,7 @@ class ExceptionTest : public CommonRuntimeTest { ArtMethod* method_g_; private: - mirror::Class* my_klass_; + ObjPtr<mirror::Class> my_klass_; }; TEST_F(ExceptionTest, FindCatchHandler) { @@ -142,8 +135,8 @@ TEST_F(ExceptionTest, FindCatchHandler) { ASSERT_EQ(2u, accessor.TriesSize()); ASSERT_NE(0u, accessor.InsnsSizeInCodeUnits()); - const DexFile::TryItem& t0 = accessor.TryItems().begin()[0]; - const DexFile::TryItem& t1 = accessor.TryItems().begin()[1]; + const dex::TryItem& t0 = accessor.TryItems().begin()[0]; + const dex::TryItem& t1 = accessor.TryItems().begin()[1]; EXPECT_LE(t0.start_addr_, t1.start_addr_); { CatchHandlerIterator iter(accessor, 4 /* Dex PC in the first try block */); @@ -194,14 +187,14 @@ TEST_F(ExceptionTest, StackTraceElement) { } fake_stack.push_back(method_g_->GetOatQuickMethodHeader(0)->ToNativeQuickPc( - method_g_, kDexPc, /* is_catch_handler */ false)); // return pc + method_g_, kDexPc, /* is_for_catch_handler= */ false)); // return pc // Create/push fake 16byte stack frame for method g fake_stack.push_back(reinterpret_cast<uintptr_t>(method_g_)); fake_stack.push_back(0); fake_stack.push_back(0); fake_stack.push_back(method_g_->GetOatQuickMethodHeader(0)->ToNativeQuickPc( - method_g_, kDexPc, /* is_catch_handler */ false)); // return pc + method_g_, kDexPc, /* is_for_catch_handler= */ false)); // return pc // Create/push fake 16byte stack frame for method f fake_stack.push_back(reinterpret_cast<uintptr_t>(method_f_)); diff --git a/compiler/jit/jit_compiler.cc b/compiler/jit/jit_compiler.cc index ac5c6fb01f..1957c82ef5 100644 --- a/compiler/jit/jit_compiler.cc +++ b/compiler/jit/jit_compiler.cc @@ -22,22 +22,17 @@ #include "arch/instruction_set_features.h" #include "art_method-inl.h" #include "base/logging.h" // For VLOG -#include "base/stringpiece.h" +#include "base/string_view_cpp20.h" #include "base/systrace.h" #include "base/time_utils.h" #include "base/timing_logger.h" -#include "base/unix_file/fd_file.h" +#include "compiler.h" #include "debug/elf_debug_writer.h" -#include "driver/compiler_driver.h" #include "driver/compiler_options.h" #include "jit/debugger_interface.h" #include "jit/jit.h" #include "jit/jit_code_cache.h" -#include "oat_file-inl.h" -#include "oat_quick_method_header.h" -#include "object_lock.h" -#include "optimizing/register_allocator.h" -#include "thread_list.h" +#include "jit/jit_logger.h" namespace art { namespace jit { @@ -46,53 +41,16 @@ JitCompiler* JitCompiler::Create() { return new JitCompiler(); } -extern "C" void* jit_load(bool* generate_debug_info) { - VLOG(jit) << "loading jit compiler"; - auto* const jit_compiler = JitCompiler::Create(); - CHECK(jit_compiler != nullptr); - *generate_debug_info = jit_compiler->GetCompilerOptions()->GetGenerateDebugInfo(); - VLOG(jit) << "Done loading jit compiler"; - return jit_compiler; -} - -extern "C" void jit_unload(void* handle) { - DCHECK(handle != nullptr); - delete reinterpret_cast<JitCompiler*>(handle); -} - -extern "C" bool jit_compile_method( - void* handle, ArtMethod* method, Thread* self, bool osr) - REQUIRES_SHARED(Locks::mutator_lock_) { - auto* jit_compiler = reinterpret_cast<JitCompiler*>(handle); - DCHECK(jit_compiler != nullptr); - return jit_compiler->CompileMethod(self, method, osr); -} - -extern "C" void jit_types_loaded(void* handle, mirror::Class** types, size_t count) - REQUIRES_SHARED(Locks::mutator_lock_) { - auto* jit_compiler = reinterpret_cast<JitCompiler*>(handle); - DCHECK(jit_compiler != nullptr); - if (jit_compiler->GetCompilerOptions()->GetGenerateDebugInfo()) { - const ArrayRef<mirror::Class*> types_array(types, count); - std::vector<uint8_t> elf_file = debug::WriteDebugElfFileForClasses( - kRuntimeISA, jit_compiler->GetCompilerDriver()->GetInstructionSetFeatures(), types_array); - MutexLock mu(Thread::Current(), *Locks::native_debug_interface_lock_); - // We never free debug info for types, so we don't need to provide a handle - // (which would have been otherwise used as identifier to remove it later). - AddNativeDebugInfoForJit(nullptr /* handle */, elf_file); - } -} - -JitCompiler::JitCompiler() { - compiler_options_.reset(new CompilerOptions()); +void JitCompiler::ParseCompilerOptions() { // Special case max code units for inlining, whose default is "unset" (implictly - // meaning no limit). Do this before parsing the actuall passed options. + // meaning no limit). Do this before parsing the actual passed options. compiler_options_->SetInlineMaxCodeUnits(CompilerOptions::kDefaultInlineMaxCodeUnits); + Runtime* runtime = Runtime::Current(); { std::string error_msg; - if (!compiler_options_->ParseCompilerOptions(Runtime::Current()->GetCompilerOptions(), - true /* ignore_unrecognized */, - &error_msg)) { + if (!compiler_options_->ParseCompilerOptions(runtime->GetCompilerOptions(), + /*ignore_unrecognized=*/ true, + &error_msg)) { LOG(FATAL) << error_msg; UNREACHABLE(); } @@ -100,73 +58,130 @@ JitCompiler::JitCompiler() { // JIT is never PIC, no matter what the runtime compiler options specify. compiler_options_->SetNonPic(); - // Set debuggability based on the runtime value. - compiler_options_->SetDebuggable(Runtime::Current()->IsJavaDebuggable()); + // If the options don't provide whether we generate debuggable code, set + // debuggability based on the runtime value. + if (!compiler_options_->GetDebuggable()) { + compiler_options_->SetDebuggable(runtime->IsJavaDebuggable()); + } - const InstructionSet instruction_set = kRuntimeISA; - for (const StringPiece option : Runtime::Current()->GetCompilerOptions()) { + const InstructionSet instruction_set = compiler_options_->GetInstructionSet(); + if (kRuntimeISA == InstructionSet::kArm) { + DCHECK_EQ(instruction_set, InstructionSet::kThumb2); + } else { + DCHECK_EQ(instruction_set, kRuntimeISA); + } + std::unique_ptr<const InstructionSetFeatures> instruction_set_features; + for (const std::string& option : runtime->GetCompilerOptions()) { VLOG(compiler) << "JIT compiler option " << option; std::string error_msg; - if (option.starts_with("--instruction-set-variant=")) { - StringPiece str = option.substr(strlen("--instruction-set-variant=")).data(); + if (StartsWith(option, "--instruction-set-variant=")) { + const char* str = option.c_str() + strlen("--instruction-set-variant="); VLOG(compiler) << "JIT instruction set variant " << str; - instruction_set_features_ = InstructionSetFeatures::FromVariant( - instruction_set, str.as_string(), &error_msg); - if (instruction_set_features_ == nullptr) { + instruction_set_features = InstructionSetFeatures::FromVariant( + instruction_set, str, &error_msg); + if (instruction_set_features == nullptr) { LOG(WARNING) << "Error parsing " << option << " message=" << error_msg; } - } else if (option.starts_with("--instruction-set-features=")) { - StringPiece str = option.substr(strlen("--instruction-set-features=")).data(); + } else if (StartsWith(option, "--instruction-set-features=")) { + const char* str = option.c_str() + strlen("--instruction-set-features="); VLOG(compiler) << "JIT instruction set features " << str; - if (instruction_set_features_ == nullptr) { - instruction_set_features_ = InstructionSetFeatures::FromVariant( + if (instruction_set_features == nullptr) { + instruction_set_features = InstructionSetFeatures::FromVariant( instruction_set, "default", &error_msg); - if (instruction_set_features_ == nullptr) { + if (instruction_set_features == nullptr) { LOG(WARNING) << "Error parsing " << option << " message=" << error_msg; } } - instruction_set_features_ = - instruction_set_features_->AddFeaturesFromString(str.as_string(), &error_msg); - if (instruction_set_features_ == nullptr) { + instruction_set_features = + instruction_set_features->AddFeaturesFromString(str, &error_msg); + if (instruction_set_features == nullptr) { LOG(WARNING) << "Error parsing " << option << " message=" << error_msg; } } } - if (instruction_set_features_ == nullptr) { - instruction_set_features_ = InstructionSetFeatures::FromCppDefines(); + + if (instruction_set_features == nullptr) { + // '--instruction-set-features/--instruction-set-variant' were not used. + // Use build-time defined features. + instruction_set_features = InstructionSetFeatures::FromCppDefines(); } - compiler_driver_.reset(new CompilerDriver( - compiler_options_.get(), - /* verification_results */ nullptr, - Compiler::kOptimizing, - instruction_set, - instruction_set_features_.get(), - /* image_classes */ nullptr, - /* compiled_classes */ nullptr, - /* compiled_methods */ nullptr, - /* thread_count */ 1, - /* swap_fd */ -1, - /* profile_compilation_info */ nullptr)); - // Disable dedupe so we can remove compiled methods. - compiler_driver_->SetDedupeEnabled(false); - compiler_driver_->SetSupportBootImageFixup(false); - - size_t thread_count = compiler_driver_->GetThreadCount(); + compiler_options_->instruction_set_features_ = std::move(instruction_set_features); + compiler_options_->compiling_with_core_image_ = + CompilerOptions::IsCoreImageFilename(runtime->GetImageLocation()); + if (compiler_options_->GetGenerateDebugInfo()) { - DCHECK_EQ(thread_count, 1u) - << "Generating debug info only works with one compiler thread"; jit_logger_.reset(new JitLogger()); jit_logger_->OpenLog(); } } +extern "C" void* jit_load() { + VLOG(jit) << "Create jit compiler"; + auto* const jit_compiler = JitCompiler::Create(); + CHECK(jit_compiler != nullptr); + VLOG(jit) << "Done creating jit compiler"; + return jit_compiler; +} + +extern "C" void jit_unload(void* handle) { + DCHECK(handle != nullptr); + delete reinterpret_cast<JitCompiler*>(handle); +} + +extern "C" bool jit_compile_method( + void* handle, ArtMethod* method, Thread* self, bool baseline, bool osr) + REQUIRES_SHARED(Locks::mutator_lock_) { + auto* jit_compiler = reinterpret_cast<JitCompiler*>(handle); + DCHECK(jit_compiler != nullptr); + return jit_compiler->CompileMethod(self, method, baseline, osr); +} + +extern "C" void jit_types_loaded(void* handle, mirror::Class** types, size_t count) + REQUIRES_SHARED(Locks::mutator_lock_) { + auto* jit_compiler = reinterpret_cast<JitCompiler*>(handle); + DCHECK(jit_compiler != nullptr); + const CompilerOptions& compiler_options = jit_compiler->GetCompilerOptions(); + if (compiler_options.GetGenerateDebugInfo()) { + const ArrayRef<mirror::Class*> types_array(types, count); + std::vector<uint8_t> elf_file = debug::WriteDebugElfFileForClasses( + kRuntimeISA, compiler_options.GetInstructionSetFeatures(), types_array); + // We never free debug info for types, so we don't need to provide a handle + // (which would have been otherwise used as identifier to remove it later). + AddNativeDebugInfoForJit(Thread::Current(), + /*code_ptr=*/ nullptr, + elf_file, + debug::PackElfFileForJIT, + compiler_options.GetInstructionSet(), + compiler_options.GetInstructionSetFeatures()); + } +} + +extern "C" void jit_update_options(void* handle) { + JitCompiler* jit_compiler = reinterpret_cast<JitCompiler*>(handle); + DCHECK(jit_compiler != nullptr); + jit_compiler->ParseCompilerOptions(); +} + +extern "C" bool jit_generate_debug_info(void* handle) { + JitCompiler* jit_compiler = reinterpret_cast<JitCompiler*>(handle); + DCHECK(jit_compiler != nullptr); + return jit_compiler->GetCompilerOptions().GetGenerateDebugInfo(); +} + +JitCompiler::JitCompiler() { + compiler_options_.reset(new CompilerOptions()); + ParseCompilerOptions(); + compiler_.reset( + Compiler::Create(*compiler_options_, /*storage=*/ nullptr, Compiler::kOptimizing)); +} + JitCompiler::~JitCompiler() { if (compiler_options_->GetGenerateDebugInfo()) { jit_logger_->CloseLog(); } } -bool JitCompiler::CompileMethod(Thread* self, ArtMethod* method, bool osr) { +bool JitCompiler::CompileMethod(Thread* self, ArtMethod* method, bool baseline, bool osr) { SCOPED_TRACE << "JIT compiling " << method->PrettyMethod(); DCHECK(!method->IsProxyMethod()); @@ -182,8 +197,13 @@ bool JitCompiler::CompileMethod(Thread* self, ArtMethod* method, bool osr) { { TimingLogger::ScopedTiming t2("Compiling", &logger); JitCodeCache* const code_cache = runtime->GetJit()->GetCodeCache(); - success = compiler_driver_->GetCompiler()->JitCompile( - self, code_cache, method, osr, jit_logger_.get()); + uint64_t start_ns = NanoTime(); + success = compiler_->JitCompile(self, code_cache, method, baseline, osr, jit_logger_.get()); + uint64_t duration_ns = NanoTime() - start_ns; + VLOG(jit) << "Compilation of " + << method->PrettyMethod() + << " took " + << PrettyDuration(duration_ns); } // Trim maps to reduce memory usage. diff --git a/compiler/jit/jit_compiler.h b/compiler/jit/jit_compiler.h index 31dc9e2fe5..d008de404a 100644 --- a/compiler/jit/jit_compiler.h +++ b/compiler/jit/jit_compiler.h @@ -18,47 +18,41 @@ #define ART_COMPILER_JIT_JIT_COMPILER_H_ #include "base/mutex.h" -#include "compiled_method.h" -#include "driver/compiler_driver.h" -#include "driver/compiler_options.h" -#include "jit_logger.h" namespace art { class ArtMethod; -class InstructionSetFeatures; +class CompiledMethod; +class Compiler; +class CompilerOptions; +class Thread; namespace jit { +class JitLogger; + class JitCompiler { public: static JitCompiler* Create(); virtual ~JitCompiler(); // Compilation entrypoint. Returns whether the compilation succeeded. - bool CompileMethod(Thread* self, ArtMethod* method, bool osr) + bool CompileMethod(Thread* self, ArtMethod* method, bool baseline, bool osr) REQUIRES_SHARED(Locks::mutator_lock_); - CompilerOptions* GetCompilerOptions() const { - return compiler_options_.get(); - } - CompilerDriver* GetCompilerDriver() const { - return compiler_driver_.get(); + const CompilerOptions& GetCompilerOptions() const { + return *compiler_options_.get(); } + void ParseCompilerOptions(); + private: std::unique_ptr<CompilerOptions> compiler_options_; - std::unique_ptr<CompilerDriver> compiler_driver_; - std::unique_ptr<const InstructionSetFeatures> instruction_set_features_; + std::unique_ptr<Compiler> compiler_; std::unique_ptr<JitLogger> jit_logger_; JitCompiler(); - // This is in the compiler since the runtime doesn't have access to the compiled method - // structures. - bool AddToCodeCache(ArtMethod* method, const CompiledMethod* compiled_method) - REQUIRES_SHARED(Locks::mutator_lock_); - DISALLOW_COPY_AND_ASSIGN(JitCompiler); }; diff --git a/compiler/jit/jit_logger.cc b/compiler/jit/jit_logger.cc index 2199b64139..6b9453f525 100644 --- a/compiler/jit/jit_logger.cc +++ b/compiler/jit/jit_logger.cc @@ -20,7 +20,6 @@ #include "art_method-inl.h" #include "base/time_utils.h" #include "base/unix_file/fd_file.h" -#include "driver/compiler_driver.h" #include "jit/jit.h" #include "jit/jit_code_cache.h" #include "oat_file-inl.h" diff --git a/compiler/jit/jit_logger.h b/compiler/jit/jit_logger.h index 8b39888315..f4ef75a5fe 100644 --- a/compiler/jit/jit_logger.h +++ b/compiler/jit/jit_logger.h @@ -17,10 +17,11 @@ #ifndef ART_COMPILER_JIT_JIT_LOGGER_H_ #define ART_COMPILER_JIT_JIT_LOGGER_H_ +#include <memory> + #include "base/mutex.h" +#include "base/os.h" #include "compiled_method.h" -#include "driver/compiler_driver.h" -#include "driver/compiler_options.h" namespace art { diff --git a/compiler/jni/jni_cfi_test.cc b/compiler/jni/jni_cfi_test.cc index 236b5c0c2e..b19a2b8843 100644 --- a/compiler/jni/jni_cfi_test.cc +++ b/compiler/jni/jni_cfi_test.cc @@ -20,6 +20,7 @@ #include "arch/instruction_set.h" #include "base/arena_allocator.h" #include "base/enums.h" +#include "base/malloc_arena_pool.h" #include "cfi_test.h" #include "gtest/gtest.h" #include "jni/quick/calling_convention.h" @@ -61,7 +62,7 @@ class JNICFITest : public CFITest { const bool is_synchronized = false; const char* shorty = "IIFII"; - ArenaPool pool; + MallocArenaPool pool; ArenaAllocator allocator(&pool); std::unique_ptr<JniCallingConvention> jni_conv( @@ -85,7 +86,7 @@ class JNICFITest : public CFITest { callee_save_regs, mr_conv->EntrySpills()); jni_asm->IncreaseFrameSize(32); jni_asm->DecreaseFrameSize(32); - jni_asm->RemoveFrame(frame_size, callee_save_regs, /* may_suspend */ true); + jni_asm->RemoveFrame(frame_size, callee_save_regs, /* may_suspend= */ true); jni_asm->FinalizeCode(); std::vector<uint8_t> actual_asm(jni_asm->CodeSize()); MemoryRegion code(&actual_asm[0], actual_asm.size()); @@ -94,7 +95,11 @@ class JNICFITest : public CFITest { const std::vector<uint8_t>& actual_cfi = *(jni_asm->cfi().data()); if (kGenerateExpected) { - GenerateExpected(stdout, isa, isa_str, actual_asm, actual_cfi); + GenerateExpected(stdout, + isa, + isa_str, + ArrayRef<const uint8_t>(actual_asm), + ArrayRef<const uint8_t>(actual_cfi)); } else { EXPECT_EQ(expected_asm, actual_asm); EXPECT_EQ(expected_cfi, actual_cfi); diff --git a/compiler/jni/jni_compiler_test.cc b/compiler/jni/jni_compiler_test.cc index 451a909965..0d0f8a0c30 100644 --- a/compiler/jni/jni_compiler_test.cc +++ b/compiler/jni/jni_compiler_test.cc @@ -21,20 +21,20 @@ #include "art_method-inl.h" #include "base/bit_utils.h" +#include "base/mem_map.h" #include "class_linker.h" #include "common_compiler_test.h" #include "compiler.h" #include "dex/dex_file.h" #include "gtest/gtest.h" #include "indirect_reference_table.h" -#include "java_vm_ext.h" -#include "jni_internal.h" -#include "mem_map.h" +#include "jni/java_vm_ext.h" +#include "jni/jni_internal.h" #include "mirror/class-inl.h" #include "mirror/class_loader.h" #include "mirror/object-inl.h" #include "mirror/object_array-inl.h" -#include "mirror/stack_trace_element.h" +#include "mirror/stack_trace_element-inl.h" #include "nativehelper/ScopedLocalRef.h" #include "nativeloader/native_loader.h" #include "runtime.h" @@ -221,12 +221,12 @@ struct jni_remove_extra_parameters : public remove_extra_parameters_helper<T, fn class JniCompilerTest : public CommonCompilerTest { protected: - void SetUp() OVERRIDE { + void SetUp() override { CommonCompilerTest::SetUp(); check_generic_jni_ = false; } - void TearDown() OVERRIDE { + void TearDown() override { android::ResetNativeLoader(); CommonCompilerTest::TearDown(); } @@ -245,7 +245,7 @@ class JniCompilerTest : public CommonCompilerTest { Handle<mirror::ClassLoader> loader( hs.NewHandle(soa.Decode<mirror::ClassLoader>(class_loader))); // Compile the native method before starting the runtime - mirror::Class* c = class_linker_->FindClass(soa.Self(), "LMyClassNatives;", loader); + ObjPtr<mirror::Class> c = class_linker_->FindClass(soa.Self(), "LMyClassNatives;", loader); const auto pointer_size = class_linker_->GetImagePointerSize(); ArtMethod* method = c->FindClassMethod(method_name, method_sig, pointer_size); ASSERT_TRUE(method != nullptr) << method_name << " " << method_sig; @@ -659,7 +659,7 @@ void JniCompilerTest::CompileAndRunIntMethodThroughStubImpl() { std::string reason; ASSERT_TRUE(Runtime::Current()->GetJavaVM()-> - LoadNativeLibrary(env_, "", class_loader_, &reason)) + LoadNativeLibrary(env_, "", class_loader_, nullptr, &reason)) << reason; jint result = env_->CallNonvirtualIntMethod(jobj_, jklass_, jmethod_, 24); @@ -675,7 +675,7 @@ void JniCompilerTest::CompileAndRunStaticIntMethodThroughStubImpl() { std::string reason; ASSERT_TRUE(Runtime::Current()->GetJavaVM()-> - LoadNativeLibrary(env_, "", class_loader_, &reason)) + LoadNativeLibrary(env_, "", class_loader_, nullptr, &reason)) << reason; jint result = env_->CallStaticIntMethod(jklass_, jmethod_, 42); @@ -1188,7 +1188,7 @@ jint Java_MyClassNatives_nativeUpCall(JNIEnv* env, jobject thisObj, jint i) { // Check stack trace entries have expected values for (int32_t j = 0; j < trace_array->GetLength(); ++j) { EXPECT_EQ(-2, trace_array->Get(j)->GetLineNumber()); - mirror::StackTraceElement* ste = trace_array->Get(j); + ObjPtr<mirror::StackTraceElement> ste = trace_array->Get(j); EXPECT_STREQ("MyClassNatives.java", ste->GetFileName()->ToModifiedUtf8().c_str()); EXPECT_STREQ("MyClassNatives", ste->GetDeclaringClass()->ToModifiedUtf8().c_str()); EXPECT_EQ(("fooI" + CurrentJniStringSuffix()), ste->GetMethodName()->ToModifiedUtf8()); @@ -1300,15 +1300,15 @@ jint my_gettext(JNIEnv* env, jclass klass, jlong val1, jobject obj1, jlong val2, EXPECT_TRUE(env->IsInstanceOf(JniCompilerTest::jobj_, klass)); EXPECT_TRUE(env->IsSameObject(JniCompilerTest::jobj_, obj1)); EXPECT_TRUE(env->IsSameObject(JniCompilerTest::jobj_, obj2)); - EXPECT_EQ(0x12345678ABCDEF88ll, val1); - EXPECT_EQ(0x7FEDCBA987654321ll, val2); + EXPECT_EQ(0x12345678ABCDEF88LL, val1); + EXPECT_EQ(0x7FEDCBA987654321LL, val2); return 42; } void JniCompilerTest::GetTextImpl() { SetUpForTest(true, "getText", "(JLjava/lang/Object;JLjava/lang/Object;)I", CURRENT_JNI_WRAPPER(my_gettext)); - jint result = env_->CallStaticIntMethod(jklass_, jmethod_, 0x12345678ABCDEF88ll, jobj_, + jint result = env_->CallStaticIntMethod(jklass_, jmethod_, 0x12345678ABCDEF88LL, jobj_, INT64_C(0x7FEDCBA987654321), jobj_); EXPECT_EQ(result, 42); } @@ -1322,7 +1322,7 @@ jarray Java_MyClassNatives_GetSinkProperties(JNIEnv*, jobject thisObj, jstring s Thread* self = Thread::Current(); ScopedObjectAccess soa(self); - EXPECT_TRUE(self->HoldsLock(soa.Decode<mirror::Object>(thisObj).Ptr())); + EXPECT_TRUE(self->HoldsLock(soa.Decode<mirror::Object>(thisObj))); return nullptr; } @@ -2196,7 +2196,7 @@ void Java_MyClassNatives_normalNative(JNIEnv*, jclass) { // Methods not annotated with anything are not considered "fast native" // -- Check that the annotation lookup does not find it. void JniCompilerTest::NormalNativeImpl() { - SetUpForTest(/* direct */ true, + SetUpForTest(/* direct= */ true, "normalNative", "()V", CURRENT_JNI_WRAPPER(Java_MyClassNatives_normalNative)); @@ -2218,7 +2218,7 @@ void Java_MyClassNatives_fastNative(JNIEnv*, jclass) { } void JniCompilerTest::FastNativeImpl() { - SetUpForTest(/* direct */ true, + SetUpForTest(/* direct= */ true, "fastNative", "()V", CURRENT_JNI_WRAPPER(Java_MyClassNatives_fastNative)); @@ -2241,7 +2241,7 @@ void Java_MyClassNatives_criticalNative() { } void JniCompilerTest::CriticalNativeImpl() { - SetUpForTest(/* direct */ true, + SetUpForTest(/* direct= */ true, // Important: Don't change the "current jni" yet to avoid a method name suffix. "criticalNative", "()V", diff --git a/compiler/jni/quick/arm/calling_convention_arm.cc b/compiler/jni/quick/arm/calling_convention_arm.cc index 54f193b551..42a4603571 100644 --- a/compiler/jni/quick/arm/calling_convention_arm.cc +++ b/compiler/jni/quick/arm/calling_convention_arm.cc @@ -18,6 +18,7 @@ #include <android-base/logging.h> +#include "arch/instruction_set.h" #include "base/macros.h" #include "handle_scope-inl.h" #include "utils/arm/managed_register_arm.h" @@ -173,7 +174,7 @@ bool ArmManagedRuntimeCallingConvention::IsCurrentParamOnStack() { ManagedRegister ArmManagedRuntimeCallingConvention::CurrentParamRegister() { LOG(FATAL) << "Should not reach here"; - return ManagedRegister::NoRegister(); + UNREACHABLE(); } FrameOffset ArmManagedRuntimeCallingConvention::CurrentParamStackOffset() { diff --git a/compiler/jni/quick/arm/calling_convention_arm.h b/compiler/jni/quick/arm/calling_convention_arm.h index 249f20225d..b327898483 100644 --- a/compiler/jni/quick/arm/calling_convention_arm.h +++ b/compiler/jni/quick/arm/calling_convention_arm.h @@ -25,24 +25,24 @@ namespace arm { constexpr size_t kFramePointerSize = static_cast<size_t>(PointerSize::k32); -class ArmManagedRuntimeCallingConvention FINAL : public ManagedRuntimeCallingConvention { +class ArmManagedRuntimeCallingConvention final : public ManagedRuntimeCallingConvention { public: ArmManagedRuntimeCallingConvention(bool is_static, bool is_synchronized, const char* shorty) : ManagedRuntimeCallingConvention(is_static, is_synchronized, shorty, PointerSize::k32) {} - ~ArmManagedRuntimeCallingConvention() OVERRIDE {} + ~ArmManagedRuntimeCallingConvention() override {} // Calling convention - ManagedRegister ReturnRegister() OVERRIDE; - ManagedRegister InterproceduralScratchRegister() OVERRIDE; + ManagedRegister ReturnRegister() override; + ManagedRegister InterproceduralScratchRegister() override; // Managed runtime calling convention - ManagedRegister MethodRegister() OVERRIDE; - bool IsCurrentParamInRegister() OVERRIDE; - bool IsCurrentParamOnStack() OVERRIDE; - ManagedRegister CurrentParamRegister() OVERRIDE; - FrameOffset CurrentParamStackOffset() OVERRIDE; - const ManagedRegisterEntrySpills& EntrySpills() OVERRIDE; + ManagedRegister MethodRegister() override; + bool IsCurrentParamInRegister() override; + bool IsCurrentParamOnStack() override; + ManagedRegister CurrentParamRegister() override; + FrameOffset CurrentParamStackOffset() override; + const ManagedRegisterEntrySpills& EntrySpills() override; private: ManagedRegisterEntrySpills entry_spills_; @@ -50,37 +50,37 @@ class ArmManagedRuntimeCallingConvention FINAL : public ManagedRuntimeCallingCon DISALLOW_COPY_AND_ASSIGN(ArmManagedRuntimeCallingConvention); }; -class ArmJniCallingConvention FINAL : public JniCallingConvention { +class ArmJniCallingConvention final : public JniCallingConvention { public: ArmJniCallingConvention(bool is_static, bool is_synchronized, bool is_critical_native, const char* shorty); - ~ArmJniCallingConvention() OVERRIDE {} + ~ArmJniCallingConvention() override {} // Calling convention - ManagedRegister ReturnRegister() OVERRIDE; - ManagedRegister IntReturnRegister() OVERRIDE; - ManagedRegister InterproceduralScratchRegister() OVERRIDE; + ManagedRegister ReturnRegister() override; + ManagedRegister IntReturnRegister() override; + ManagedRegister InterproceduralScratchRegister() override; // JNI calling convention - void Next() OVERRIDE; // Override default behavior for AAPCS - size_t FrameSize() OVERRIDE; - size_t OutArgSize() OVERRIDE; - ArrayRef<const ManagedRegister> CalleeSaveRegisters() const OVERRIDE; - ManagedRegister ReturnScratchRegister() const OVERRIDE; - uint32_t CoreSpillMask() const OVERRIDE; - uint32_t FpSpillMask() const OVERRIDE; - bool IsCurrentParamInRegister() OVERRIDE; - bool IsCurrentParamOnStack() OVERRIDE; - ManagedRegister CurrentParamRegister() OVERRIDE; - FrameOffset CurrentParamStackOffset() OVERRIDE; + void Next() override; // Override default behavior for AAPCS + size_t FrameSize() override; + size_t OutArgSize() override; + ArrayRef<const ManagedRegister> CalleeSaveRegisters() const override; + ManagedRegister ReturnScratchRegister() const override; + uint32_t CoreSpillMask() const override; + uint32_t FpSpillMask() const override; + bool IsCurrentParamInRegister() override; + bool IsCurrentParamOnStack() override; + ManagedRegister CurrentParamRegister() override; + FrameOffset CurrentParamStackOffset() override; // AAPCS mandates return values are extended. - bool RequiresSmallResultTypeExtension() const OVERRIDE { + bool RequiresSmallResultTypeExtension() const override { return false; } protected: - size_t NumberOfOutgoingStackArgs() OVERRIDE; + size_t NumberOfOutgoingStackArgs() override; private: // Padding to ensure longs and doubles are not split in AAPCS diff --git a/compiler/jni/quick/arm64/calling_convention_arm64.cc b/compiler/jni/quick/arm64/calling_convention_arm64.cc index 328ecbbc5c..4a6a754b5f 100644 --- a/compiler/jni/quick/arm64/calling_convention_arm64.cc +++ b/compiler/jni/quick/arm64/calling_convention_arm64.cc @@ -18,6 +18,7 @@ #include <android-base/logging.h> +#include "arch/instruction_set.h" #include "handle_scope-inl.h" #include "utils/arm64/managed_register_arm64.h" @@ -181,7 +182,7 @@ bool Arm64ManagedRuntimeCallingConvention::IsCurrentParamOnStack() { ManagedRegister Arm64ManagedRuntimeCallingConvention::CurrentParamRegister() { LOG(FATAL) << "Should not reach here"; - return ManagedRegister::NoRegister(); + UNREACHABLE(); } FrameOffset Arm64ManagedRuntimeCallingConvention::CurrentParamStackOffset() { diff --git a/compiler/jni/quick/arm64/calling_convention_arm64.h b/compiler/jni/quick/arm64/calling_convention_arm64.h index 56189427b6..ed0ddeb1b2 100644 --- a/compiler/jni/quick/arm64/calling_convention_arm64.h +++ b/compiler/jni/quick/arm64/calling_convention_arm64.h @@ -25,24 +25,24 @@ namespace arm64 { constexpr size_t kFramePointerSize = static_cast<size_t>(PointerSize::k64); -class Arm64ManagedRuntimeCallingConvention FINAL : public ManagedRuntimeCallingConvention { +class Arm64ManagedRuntimeCallingConvention final : public ManagedRuntimeCallingConvention { public: Arm64ManagedRuntimeCallingConvention(bool is_static, bool is_synchronized, const char* shorty) : ManagedRuntimeCallingConvention(is_static, is_synchronized, shorty, PointerSize::k64) {} - ~Arm64ManagedRuntimeCallingConvention() OVERRIDE {} + ~Arm64ManagedRuntimeCallingConvention() override {} // Calling convention - ManagedRegister ReturnRegister() OVERRIDE; - ManagedRegister InterproceduralScratchRegister() OVERRIDE; + ManagedRegister ReturnRegister() override; + ManagedRegister InterproceduralScratchRegister() override; // Managed runtime calling convention - ManagedRegister MethodRegister() OVERRIDE; - bool IsCurrentParamInRegister() OVERRIDE; - bool IsCurrentParamOnStack() OVERRIDE; - ManagedRegister CurrentParamRegister() OVERRIDE; - FrameOffset CurrentParamStackOffset() OVERRIDE; - const ManagedRegisterEntrySpills& EntrySpills() OVERRIDE; + ManagedRegister MethodRegister() override; + bool IsCurrentParamInRegister() override; + bool IsCurrentParamOnStack() override; + ManagedRegister CurrentParamRegister() override; + FrameOffset CurrentParamStackOffset() override; + const ManagedRegisterEntrySpills& EntrySpills() override; private: ManagedRegisterEntrySpills entry_spills_; @@ -50,36 +50,36 @@ class Arm64ManagedRuntimeCallingConvention FINAL : public ManagedRuntimeCallingC DISALLOW_COPY_AND_ASSIGN(Arm64ManagedRuntimeCallingConvention); }; -class Arm64JniCallingConvention FINAL : public JniCallingConvention { +class Arm64JniCallingConvention final : public JniCallingConvention { public: Arm64JniCallingConvention(bool is_static, bool is_synchronized, bool is_critical_native, const char* shorty); - ~Arm64JniCallingConvention() OVERRIDE {} + ~Arm64JniCallingConvention() override {} // Calling convention - ManagedRegister ReturnRegister() OVERRIDE; - ManagedRegister IntReturnRegister() OVERRIDE; - ManagedRegister InterproceduralScratchRegister() OVERRIDE; + ManagedRegister ReturnRegister() override; + ManagedRegister IntReturnRegister() override; + ManagedRegister InterproceduralScratchRegister() override; // JNI calling convention - size_t FrameSize() OVERRIDE; - size_t OutArgSize() OVERRIDE; - ArrayRef<const ManagedRegister> CalleeSaveRegisters() const OVERRIDE; - ManagedRegister ReturnScratchRegister() const OVERRIDE; - uint32_t CoreSpillMask() const OVERRIDE; - uint32_t FpSpillMask() const OVERRIDE; - bool IsCurrentParamInRegister() OVERRIDE; - bool IsCurrentParamOnStack() OVERRIDE; - ManagedRegister CurrentParamRegister() OVERRIDE; - FrameOffset CurrentParamStackOffset() OVERRIDE; + size_t FrameSize() override; + size_t OutArgSize() override; + ArrayRef<const ManagedRegister> CalleeSaveRegisters() const override; + ManagedRegister ReturnScratchRegister() const override; + uint32_t CoreSpillMask() const override; + uint32_t FpSpillMask() const override; + bool IsCurrentParamInRegister() override; + bool IsCurrentParamOnStack() override; + ManagedRegister CurrentParamRegister() override; + FrameOffset CurrentParamStackOffset() override; // aarch64 calling convention leaves upper bits undefined. - bool RequiresSmallResultTypeExtension() const OVERRIDE { + bool RequiresSmallResultTypeExtension() const override { return true; } protected: - size_t NumberOfOutgoingStackArgs() OVERRIDE; + size_t NumberOfOutgoingStackArgs() override; private: DISALLOW_COPY_AND_ASSIGN(Arm64JniCallingConvention); diff --git a/compiler/jni/quick/calling_convention.cc b/compiler/jni/quick/calling_convention.cc index ff814c8a6b..f031b9be82 100644 --- a/compiler/jni/quick/calling_convention.cc +++ b/compiler/jni/quick/calling_convention.cc @@ -18,6 +18,8 @@ #include <android-base/logging.h> +#include "arch/instruction_set.h" + #ifdef ART_ENABLE_CODEGEN_arm #include "jni/quick/arm/calling_convention_arm.h" #endif diff --git a/compiler/jni/quick/calling_convention.h b/compiler/jni/quick/calling_convention.h index e256ce647d..77a5d595d7 100644 --- a/compiler/jni/quick/calling_convention.h +++ b/compiler/jni/quick/calling_convention.h @@ -27,6 +27,8 @@ namespace art { +enum class InstructionSet; + // Top-level abstraction for different calling conventions. class CallingConvention : public DeletableArenaObject<kArenaAllocCallingConvention> { public: diff --git a/compiler/jni/quick/jni_compiler.cc b/compiler/jni/quick/jni_compiler.cc index d001cfe4fc..70540783b6 100644 --- a/compiler/jni/quick/jni_compiler.cc +++ b/compiler/jni/quick/jni_compiler.cc @@ -27,16 +27,16 @@ #include "base/enums.h" #include "base/logging.h" // For VLOG. #include "base/macros.h" +#include "base/malloc_arena_pool.h" +#include "base/memory_region.h" #include "base/utils.h" #include "calling_convention.h" #include "class_linker.h" -#include "debug/dwarf/debug_frame_opcode_writer.h" +#include "dwarf/debug_frame_opcode_writer.h" #include "dex/dex_file-inl.h" -#include "driver/compiler_driver.h" #include "driver/compiler_options.h" #include "entrypoints/quick/quick_entrypoints.h" -#include "jni_env_ext.h" -#include "memory_region.h" +#include "jni/jni_env_ext.h" #include "thread.h" #include "utils/arm/managed_register_arm.h" #include "utils/arm64/managed_register_arm64.h" @@ -114,7 +114,7 @@ static ThreadOffset<kPointerSize> GetJniEntrypointThreadOffset(JniEntrypoint whi // convention. // template <PointerSize kPointerSize> -static JniCompiledMethod ArtJniCompileMethodInternal(CompilerDriver* driver, +static JniCompiledMethod ArtJniCompileMethodInternal(const CompilerOptions& compiler_options, uint32_t access_flags, uint32_t method_idx, const DexFile& dex_file) { @@ -123,8 +123,9 @@ static JniCompiledMethod ArtJniCompileMethodInternal(CompilerDriver* driver, const bool is_static = (access_flags & kAccStatic) != 0; const bool is_synchronized = (access_flags & kAccSynchronized) != 0; const char* shorty = dex_file.GetMethodShorty(dex_file.GetMethodId(method_idx)); - InstructionSet instruction_set = driver->GetInstructionSet(); - const InstructionSetFeatures* instruction_set_features = driver->GetInstructionSetFeatures(); + InstructionSet instruction_set = compiler_options.GetInstructionSet(); + const InstructionSetFeatures* instruction_set_features = + compiler_options.GetInstructionSetFeatures(); // i.e. if the method was annotated with @FastNative const bool is_fast_native = (access_flags & kAccFastNative) != 0u; @@ -150,7 +151,7 @@ static JniCompiledMethod ArtJniCompileMethodInternal(CompilerDriver* driver, // Don't allow both @FastNative and @CriticalNative. They are mutually exclusive. if (UNLIKELY(is_fast_native && is_critical_native)) { LOG(FATAL) << "JniCompile: Method cannot be both @CriticalNative and @FastNative" - << dex_file.PrettyMethod(method_idx, /* with_signature */ true); + << dex_file.PrettyMethod(method_idx, /* with_signature= */ true); } // @CriticalNative - extra checks: @@ -161,20 +162,20 @@ static JniCompiledMethod ArtJniCompileMethodInternal(CompilerDriver* driver, CHECK(is_static) << "@CriticalNative functions cannot be virtual since that would" << "require passing a reference parameter (this), which is illegal " - << dex_file.PrettyMethod(method_idx, /* with_signature */ true); + << dex_file.PrettyMethod(method_idx, /* with_signature= */ true); CHECK(!is_synchronized) << "@CriticalNative functions cannot be synchronized since that would" << "require passing a (class and/or this) reference parameter, which is illegal " - << dex_file.PrettyMethod(method_idx, /* with_signature */ true); + << dex_file.PrettyMethod(method_idx, /* with_signature= */ true); for (size_t i = 0; i < strlen(shorty); ++i) { CHECK_NE(Primitive::kPrimNot, Primitive::GetType(shorty[i])) << "@CriticalNative methods' shorty types must not have illegal references " - << dex_file.PrettyMethod(method_idx, /* with_signature */ true); + << dex_file.PrettyMethod(method_idx, /* with_signature= */ true); } } } - ArenaPool pool; + MallocArenaPool pool; ArenaAllocator allocator(&pool); // Calling conventions used to iterate over parameters to method @@ -215,16 +216,9 @@ static JniCompiledMethod ArtJniCompileMethodInternal(CompilerDriver* driver, // Assembler that holds generated instructions std::unique_ptr<JNIMacroAssembler<kPointerSize>> jni_asm = GetMacroAssembler<kPointerSize>(&allocator, instruction_set, instruction_set_features); - const CompilerOptions& compiler_options = driver->GetCompilerOptions(); jni_asm->cfi().SetEnabled(compiler_options.GenerateAnyDebugInfo()); jni_asm->SetEmitRunTimeChecksInDebugMode(compiler_options.EmitRunTimeChecksInDebugMode()); - // Offsets into data structures - // TODO: if cross compiling these offsets are for the host not the target - const Offset functions(OFFSETOF_MEMBER(JNIEnvExt, functions)); - const Offset monitor_enter(OFFSETOF_MEMBER(JNINativeInterface, MonitorEnter)); - const Offset monitor_exit(OFFSETOF_MEMBER(JNINativeInterface, MonitorExit)); - // 1. Build the frame saving all callee saves, Method*, and PC return address. const size_t frame_size(main_jni_conv->FrameSize()); // Excludes outgoing args. ArrayRef<const ManagedRegister> callee_save_regs = main_jni_conv->CalleeSaveRegisters(); @@ -638,7 +632,7 @@ static JniCompiledMethod ArtJniCompileMethodInternal(CompilerDriver* driver, __ DecreaseFrameSize(current_out_arg_size); // 15. Process pending exceptions from JNI call or monitor exit. - __ ExceptionPoll(main_jni_conv->InterproceduralScratchRegister(), 0 /* stack_adjust */); + __ ExceptionPoll(main_jni_conv->InterproceduralScratchRegister(), 0 /* stack_adjust= */); // 16. Remove activation - need to restore callee save registers since the GC may have changed // them. @@ -770,16 +764,16 @@ static void SetNativeParameter(JNIMacroAssembler<kPointerSize>* jni_asm, } } -JniCompiledMethod ArtQuickJniCompileMethod(CompilerDriver* compiler, +JniCompiledMethod ArtQuickJniCompileMethod(const CompilerOptions& compiler_options, uint32_t access_flags, uint32_t method_idx, const DexFile& dex_file) { - if (Is64BitInstructionSet(compiler->GetInstructionSet())) { + if (Is64BitInstructionSet(compiler_options.GetInstructionSet())) { return ArtJniCompileMethodInternal<PointerSize::k64>( - compiler, access_flags, method_idx, dex_file); + compiler_options, access_flags, method_idx, dex_file); } else { return ArtJniCompileMethodInternal<PointerSize::k32>( - compiler, access_flags, method_idx, dex_file); + compiler_options, access_flags, method_idx, dex_file); } } diff --git a/compiler/jni/quick/jni_compiler.h b/compiler/jni/quick/jni_compiler.h index 11419947a0..313fcd361e 100644 --- a/compiler/jni/quick/jni_compiler.h +++ b/compiler/jni/quick/jni_compiler.h @@ -25,7 +25,7 @@ namespace art { class ArtMethod; -class CompilerDriver; +class CompilerOptions; class DexFile; class JniCompiledMethod { @@ -62,7 +62,7 @@ class JniCompiledMethod { std::vector<uint8_t> cfi_; }; -JniCompiledMethod ArtQuickJniCompileMethod(CompilerDriver* compiler, +JniCompiledMethod ArtQuickJniCompileMethod(const CompilerOptions& compiler_options, uint32_t access_flags, uint32_t method_idx, const DexFile& dex_file); diff --git a/compiler/jni/quick/mips/calling_convention_mips.cc b/compiler/jni/quick/mips/calling_convention_mips.cc index 5ec1addcb9..c69854d19a 100644 --- a/compiler/jni/quick/mips/calling_convention_mips.cc +++ b/compiler/jni/quick/mips/calling_convention_mips.cc @@ -18,6 +18,7 @@ #include <android-base/logging.h> +#include "arch/instruction_set.h" #include "handle_scope-inl.h" #include "utils/mips/managed_register_mips.h" @@ -124,7 +125,7 @@ bool MipsManagedRuntimeCallingConvention::IsCurrentParamOnStack() { ManagedRegister MipsManagedRuntimeCallingConvention::CurrentParamRegister() { LOG(FATAL) << "Should not reach here"; - return ManagedRegister::NoRegister(); + UNREACHABLE(); } FrameOffset MipsManagedRuntimeCallingConvention::CurrentParamStackOffset() { diff --git a/compiler/jni/quick/mips/calling_convention_mips.h b/compiler/jni/quick/mips/calling_convention_mips.h index ad3f118bad..8b395a0300 100644 --- a/compiler/jni/quick/mips/calling_convention_mips.h +++ b/compiler/jni/quick/mips/calling_convention_mips.h @@ -27,24 +27,24 @@ constexpr size_t kFramePointerSize = 4; static_assert(kFramePointerSize == static_cast<size_t>(PointerSize::k32), "Invalid frame pointer size"); -class MipsManagedRuntimeCallingConvention FINAL : public ManagedRuntimeCallingConvention { +class MipsManagedRuntimeCallingConvention final : public ManagedRuntimeCallingConvention { public: MipsManagedRuntimeCallingConvention(bool is_static, bool is_synchronized, const char* shorty) : ManagedRuntimeCallingConvention(is_static, is_synchronized, shorty, PointerSize::k32) {} - ~MipsManagedRuntimeCallingConvention() OVERRIDE {} + ~MipsManagedRuntimeCallingConvention() override {} // Calling convention - ManagedRegister ReturnRegister() OVERRIDE; - ManagedRegister InterproceduralScratchRegister() OVERRIDE; + ManagedRegister ReturnRegister() override; + ManagedRegister InterproceduralScratchRegister() override; // Managed runtime calling convention - ManagedRegister MethodRegister() OVERRIDE; - bool IsCurrentParamInRegister() OVERRIDE; - bool IsCurrentParamOnStack() OVERRIDE; - ManagedRegister CurrentParamRegister() OVERRIDE; - FrameOffset CurrentParamStackOffset() OVERRIDE; - const ManagedRegisterEntrySpills& EntrySpills() OVERRIDE; + ManagedRegister MethodRegister() override; + bool IsCurrentParamInRegister() override; + bool IsCurrentParamOnStack() override; + ManagedRegister CurrentParamRegister() override; + FrameOffset CurrentParamStackOffset() override; + const ManagedRegisterEntrySpills& EntrySpills() override; private: ManagedRegisterEntrySpills entry_spills_; @@ -52,42 +52,42 @@ class MipsManagedRuntimeCallingConvention FINAL : public ManagedRuntimeCallingCo DISALLOW_COPY_AND_ASSIGN(MipsManagedRuntimeCallingConvention); }; -class MipsJniCallingConvention FINAL : public JniCallingConvention { +class MipsJniCallingConvention final : public JniCallingConvention { public: MipsJniCallingConvention(bool is_static, bool is_synchronized, bool is_critical_native, const char* shorty); - ~MipsJniCallingConvention() OVERRIDE {} + ~MipsJniCallingConvention() override {} // Calling convention - ManagedRegister ReturnRegister() OVERRIDE; - ManagedRegister IntReturnRegister() OVERRIDE; - ManagedRegister InterproceduralScratchRegister() OVERRIDE; + ManagedRegister ReturnRegister() override; + ManagedRegister IntReturnRegister() override; + ManagedRegister InterproceduralScratchRegister() override; // JNI calling convention - void Next() OVERRIDE; // Override default behavior for o32. - size_t FrameSize() OVERRIDE; - size_t OutArgSize() OVERRIDE; - ArrayRef<const ManagedRegister> CalleeSaveRegisters() const OVERRIDE; - ManagedRegister ReturnScratchRegister() const OVERRIDE; - uint32_t CoreSpillMask() const OVERRIDE; - uint32_t FpSpillMask() const OVERRIDE; - bool IsCurrentParamInRegister() OVERRIDE; - bool IsCurrentParamOnStack() OVERRIDE; - ManagedRegister CurrentParamRegister() OVERRIDE; - FrameOffset CurrentParamStackOffset() OVERRIDE; + void Next() override; // Override default behavior for o32. + size_t FrameSize() override; + size_t OutArgSize() override; + ArrayRef<const ManagedRegister> CalleeSaveRegisters() const override; + ManagedRegister ReturnScratchRegister() const override; + uint32_t CoreSpillMask() const override; + uint32_t FpSpillMask() const override; + bool IsCurrentParamInRegister() override; + bool IsCurrentParamOnStack() override; + ManagedRegister CurrentParamRegister() override; + FrameOffset CurrentParamStackOffset() override; // Mips does not need to extend small return types. - bool RequiresSmallResultTypeExtension() const OVERRIDE { + bool RequiresSmallResultTypeExtension() const override { return false; } protected: - size_t NumberOfOutgoingStackArgs() OVERRIDE; + size_t NumberOfOutgoingStackArgs() override; private: // Padding to ensure longs and doubles are not split in o32. size_t padding_; - size_t use_fp_arg_registers_; + bool use_fp_arg_registers_; DISALLOW_COPY_AND_ASSIGN(MipsJniCallingConvention); }; diff --git a/compiler/jni/quick/mips64/calling_convention_mips64.cc b/compiler/jni/quick/mips64/calling_convention_mips64.cc index a7012aefa8..2c297b3ce3 100644 --- a/compiler/jni/quick/mips64/calling_convention_mips64.cc +++ b/compiler/jni/quick/mips64/calling_convention_mips64.cc @@ -18,6 +18,7 @@ #include <android-base/logging.h> +#include "arch/instruction_set.h" #include "handle_scope-inl.h" #include "utils/mips64/managed_register_mips64.h" @@ -109,7 +110,7 @@ bool Mips64ManagedRuntimeCallingConvention::IsCurrentParamOnStack() { ManagedRegister Mips64ManagedRuntimeCallingConvention::CurrentParamRegister() { LOG(FATAL) << "Should not reach here"; - return ManagedRegister::NoRegister(); + UNREACHABLE(); } FrameOffset Mips64ManagedRuntimeCallingConvention::CurrentParamStackOffset() { diff --git a/compiler/jni/quick/mips64/calling_convention_mips64.h b/compiler/jni/quick/mips64/calling_convention_mips64.h index faedaeff6c..d87f73a1ea 100644 --- a/compiler/jni/quick/mips64/calling_convention_mips64.h +++ b/compiler/jni/quick/mips64/calling_convention_mips64.h @@ -27,24 +27,24 @@ constexpr size_t kFramePointerSize = 8; static_assert(kFramePointerSize == static_cast<size_t>(PointerSize::k64), "Invalid frame pointer size"); -class Mips64ManagedRuntimeCallingConvention FINAL : public ManagedRuntimeCallingConvention { +class Mips64ManagedRuntimeCallingConvention final : public ManagedRuntimeCallingConvention { public: Mips64ManagedRuntimeCallingConvention(bool is_static, bool is_synchronized, const char* shorty) : ManagedRuntimeCallingConvention(is_static, is_synchronized, shorty, PointerSize::k64) {} - ~Mips64ManagedRuntimeCallingConvention() OVERRIDE {} + ~Mips64ManagedRuntimeCallingConvention() override {} // Calling convention - ManagedRegister ReturnRegister() OVERRIDE; - ManagedRegister InterproceduralScratchRegister() OVERRIDE; + ManagedRegister ReturnRegister() override; + ManagedRegister InterproceduralScratchRegister() override; // Managed runtime calling convention - ManagedRegister MethodRegister() OVERRIDE; - bool IsCurrentParamInRegister() OVERRIDE; - bool IsCurrentParamOnStack() OVERRIDE; - ManagedRegister CurrentParamRegister() OVERRIDE; - FrameOffset CurrentParamStackOffset() OVERRIDE; - const ManagedRegisterEntrySpills& EntrySpills() OVERRIDE; + ManagedRegister MethodRegister() override; + bool IsCurrentParamInRegister() override; + bool IsCurrentParamOnStack() override; + ManagedRegister CurrentParamRegister() override; + FrameOffset CurrentParamStackOffset() override; + const ManagedRegisterEntrySpills& EntrySpills() override; private: ManagedRegisterEntrySpills entry_spills_; @@ -52,36 +52,36 @@ class Mips64ManagedRuntimeCallingConvention FINAL : public ManagedRuntimeCalling DISALLOW_COPY_AND_ASSIGN(Mips64ManagedRuntimeCallingConvention); }; -class Mips64JniCallingConvention FINAL : public JniCallingConvention { +class Mips64JniCallingConvention final : public JniCallingConvention { public: Mips64JniCallingConvention(bool is_static, bool is_synchronized, bool is_critical_native, const char* shorty); - ~Mips64JniCallingConvention() OVERRIDE {} + ~Mips64JniCallingConvention() override {} // Calling convention - ManagedRegister ReturnRegister() OVERRIDE; - ManagedRegister IntReturnRegister() OVERRIDE; - ManagedRegister InterproceduralScratchRegister() OVERRIDE; + ManagedRegister ReturnRegister() override; + ManagedRegister IntReturnRegister() override; + ManagedRegister InterproceduralScratchRegister() override; // JNI calling convention - size_t FrameSize() OVERRIDE; - size_t OutArgSize() OVERRIDE; - ArrayRef<const ManagedRegister> CalleeSaveRegisters() const OVERRIDE; - ManagedRegister ReturnScratchRegister() const OVERRIDE; - uint32_t CoreSpillMask() const OVERRIDE; - uint32_t FpSpillMask() const OVERRIDE; - bool IsCurrentParamInRegister() OVERRIDE; - bool IsCurrentParamOnStack() OVERRIDE; - ManagedRegister CurrentParamRegister() OVERRIDE; - FrameOffset CurrentParamStackOffset() OVERRIDE; + size_t FrameSize() override; + size_t OutArgSize() override; + ArrayRef<const ManagedRegister> CalleeSaveRegisters() const override; + ManagedRegister ReturnScratchRegister() const override; + uint32_t CoreSpillMask() const override; + uint32_t FpSpillMask() const override; + bool IsCurrentParamInRegister() override; + bool IsCurrentParamOnStack() override; + ManagedRegister CurrentParamRegister() override; + FrameOffset CurrentParamStackOffset() override; // Mips64 does not need to extend small return types. - bool RequiresSmallResultTypeExtension() const OVERRIDE { + bool RequiresSmallResultTypeExtension() const override { return false; } protected: - size_t NumberOfOutgoingStackArgs() OVERRIDE; + size_t NumberOfOutgoingStackArgs() override; private: DISALLOW_COPY_AND_ASSIGN(Mips64JniCallingConvention); diff --git a/compiler/jni/quick/x86/calling_convention_x86.cc b/compiler/jni/quick/x86/calling_convention_x86.cc index ad58e3820d..1f255e2bbd 100644 --- a/compiler/jni/quick/x86/calling_convention_x86.cc +++ b/compiler/jni/quick/x86/calling_convention_x86.cc @@ -18,6 +18,7 @@ #include <android-base/logging.h> +#include "arch/instruction_set.h" #include "handle_scope-inl.h" #include "utils/x86/managed_register_x86.h" @@ -257,7 +258,7 @@ bool X86JniCallingConvention::IsCurrentParamOnStack() { ManagedRegister X86JniCallingConvention::CurrentParamRegister() { LOG(FATAL) << "Should not reach here"; - return ManagedRegister::NoRegister(); + UNREACHABLE(); } FrameOffset X86JniCallingConvention::CurrentParamStackOffset() { diff --git a/compiler/jni/quick/x86/calling_convention_x86.h b/compiler/jni/quick/x86/calling_convention_x86.h index be83cdaad0..d0c6198e77 100644 --- a/compiler/jni/quick/x86/calling_convention_x86.h +++ b/compiler/jni/quick/x86/calling_convention_x86.h @@ -25,7 +25,7 @@ namespace x86 { constexpr size_t kFramePointerSize = static_cast<size_t>(PointerSize::k32); -class X86ManagedRuntimeCallingConvention FINAL : public ManagedRuntimeCallingConvention { +class X86ManagedRuntimeCallingConvention final : public ManagedRuntimeCallingConvention { public: X86ManagedRuntimeCallingConvention(bool is_static, bool is_synchronized, const char* shorty) : ManagedRuntimeCallingConvention(is_static, @@ -33,17 +33,17 @@ class X86ManagedRuntimeCallingConvention FINAL : public ManagedRuntimeCallingCon shorty, PointerSize::k32), gpr_arg_count_(0) {} - ~X86ManagedRuntimeCallingConvention() OVERRIDE {} + ~X86ManagedRuntimeCallingConvention() override {} // Calling convention - ManagedRegister ReturnRegister() OVERRIDE; - ManagedRegister InterproceduralScratchRegister() OVERRIDE; + ManagedRegister ReturnRegister() override; + ManagedRegister InterproceduralScratchRegister() override; // Managed runtime calling convention - ManagedRegister MethodRegister() OVERRIDE; - bool IsCurrentParamInRegister() OVERRIDE; - bool IsCurrentParamOnStack() OVERRIDE; - ManagedRegister CurrentParamRegister() OVERRIDE; - FrameOffset CurrentParamStackOffset() OVERRIDE; - const ManagedRegisterEntrySpills& EntrySpills() OVERRIDE; + ManagedRegister MethodRegister() override; + bool IsCurrentParamInRegister() override; + bool IsCurrentParamOnStack() override; + ManagedRegister CurrentParamRegister() override; + FrameOffset CurrentParamStackOffset() override; + const ManagedRegisterEntrySpills& EntrySpills() override; private: int gpr_arg_count_; @@ -53,36 +53,36 @@ class X86ManagedRuntimeCallingConvention FINAL : public ManagedRuntimeCallingCon }; // Implements the x86 cdecl calling convention. -class X86JniCallingConvention FINAL : public JniCallingConvention { +class X86JniCallingConvention final : public JniCallingConvention { public: X86JniCallingConvention(bool is_static, bool is_synchronized, bool is_critical_native, const char* shorty); - ~X86JniCallingConvention() OVERRIDE {} + ~X86JniCallingConvention() override {} // Calling convention - ManagedRegister ReturnRegister() OVERRIDE; - ManagedRegister IntReturnRegister() OVERRIDE; - ManagedRegister InterproceduralScratchRegister() OVERRIDE; + ManagedRegister ReturnRegister() override; + ManagedRegister IntReturnRegister() override; + ManagedRegister InterproceduralScratchRegister() override; // JNI calling convention - size_t FrameSize() OVERRIDE; - size_t OutArgSize() OVERRIDE; - ArrayRef<const ManagedRegister> CalleeSaveRegisters() const OVERRIDE; - ManagedRegister ReturnScratchRegister() const OVERRIDE; - uint32_t CoreSpillMask() const OVERRIDE; - uint32_t FpSpillMask() const OVERRIDE; - bool IsCurrentParamInRegister() OVERRIDE; - bool IsCurrentParamOnStack() OVERRIDE; - ManagedRegister CurrentParamRegister() OVERRIDE; - FrameOffset CurrentParamStackOffset() OVERRIDE; + size_t FrameSize() override; + size_t OutArgSize() override; + ArrayRef<const ManagedRegister> CalleeSaveRegisters() const override; + ManagedRegister ReturnScratchRegister() const override; + uint32_t CoreSpillMask() const override; + uint32_t FpSpillMask() const override; + bool IsCurrentParamInRegister() override; + bool IsCurrentParamOnStack() override; + ManagedRegister CurrentParamRegister() override; + FrameOffset CurrentParamStackOffset() override; // x86 needs to extend small return types. - bool RequiresSmallResultTypeExtension() const OVERRIDE { + bool RequiresSmallResultTypeExtension() const override { return true; } protected: - size_t NumberOfOutgoingStackArgs() OVERRIDE; + size_t NumberOfOutgoingStackArgs() override; private: DISALLOW_COPY_AND_ASSIGN(X86JniCallingConvention); diff --git a/compiler/jni/quick/x86_64/calling_convention_x86_64.cc b/compiler/jni/quick/x86_64/calling_convention_x86_64.cc index e5e96d01fc..9e77d6b36c 100644 --- a/compiler/jni/quick/x86_64/calling_convention_x86_64.cc +++ b/compiler/jni/quick/x86_64/calling_convention_x86_64.cc @@ -18,6 +18,7 @@ #include <android-base/logging.h> +#include "arch/instruction_set.h" #include "base/bit_utils.h" #include "handle_scope-inl.h" #include "utils/x86_64/managed_register_x86_64.h" diff --git a/compiler/jni/quick/x86_64/calling_convention_x86_64.h b/compiler/jni/quick/x86_64/calling_convention_x86_64.h index cdba334d81..dfab41b154 100644 --- a/compiler/jni/quick/x86_64/calling_convention_x86_64.h +++ b/compiler/jni/quick/x86_64/calling_convention_x86_64.h @@ -23,59 +23,59 @@ namespace art { namespace x86_64 { -class X86_64ManagedRuntimeCallingConvention FINAL : public ManagedRuntimeCallingConvention { +class X86_64ManagedRuntimeCallingConvention final : public ManagedRuntimeCallingConvention { public: X86_64ManagedRuntimeCallingConvention(bool is_static, bool is_synchronized, const char* shorty) : ManagedRuntimeCallingConvention(is_static, is_synchronized, shorty, PointerSize::k64) {} - ~X86_64ManagedRuntimeCallingConvention() OVERRIDE {} + ~X86_64ManagedRuntimeCallingConvention() override {} // Calling convention - ManagedRegister ReturnRegister() OVERRIDE; - ManagedRegister InterproceduralScratchRegister() OVERRIDE; + ManagedRegister ReturnRegister() override; + ManagedRegister InterproceduralScratchRegister() override; // Managed runtime calling convention - ManagedRegister MethodRegister() OVERRIDE; - bool IsCurrentParamInRegister() OVERRIDE; - bool IsCurrentParamOnStack() OVERRIDE; - ManagedRegister CurrentParamRegister() OVERRIDE; - FrameOffset CurrentParamStackOffset() OVERRIDE; - const ManagedRegisterEntrySpills& EntrySpills() OVERRIDE; + ManagedRegister MethodRegister() override; + bool IsCurrentParamInRegister() override; + bool IsCurrentParamOnStack() override; + ManagedRegister CurrentParamRegister() override; + FrameOffset CurrentParamStackOffset() override; + const ManagedRegisterEntrySpills& EntrySpills() override; private: ManagedRegisterEntrySpills entry_spills_; DISALLOW_COPY_AND_ASSIGN(X86_64ManagedRuntimeCallingConvention); }; -class X86_64JniCallingConvention FINAL : public JniCallingConvention { +class X86_64JniCallingConvention final : public JniCallingConvention { public: X86_64JniCallingConvention(bool is_static, bool is_synchronized, bool is_critical_native, const char* shorty); - ~X86_64JniCallingConvention() OVERRIDE {} + ~X86_64JniCallingConvention() override {} // Calling convention - ManagedRegister ReturnRegister() OVERRIDE; - ManagedRegister IntReturnRegister() OVERRIDE; - ManagedRegister InterproceduralScratchRegister() OVERRIDE; + ManagedRegister ReturnRegister() override; + ManagedRegister IntReturnRegister() override; + ManagedRegister InterproceduralScratchRegister() override; // JNI calling convention - size_t FrameSize() OVERRIDE; - size_t OutArgSize() OVERRIDE; - ArrayRef<const ManagedRegister> CalleeSaveRegisters() const OVERRIDE; - ManagedRegister ReturnScratchRegister() const OVERRIDE; - uint32_t CoreSpillMask() const OVERRIDE; - uint32_t FpSpillMask() const OVERRIDE; - bool IsCurrentParamInRegister() OVERRIDE; - bool IsCurrentParamOnStack() OVERRIDE; - ManagedRegister CurrentParamRegister() OVERRIDE; - FrameOffset CurrentParamStackOffset() OVERRIDE; + size_t FrameSize() override; + size_t OutArgSize() override; + ArrayRef<const ManagedRegister> CalleeSaveRegisters() const override; + ManagedRegister ReturnScratchRegister() const override; + uint32_t CoreSpillMask() const override; + uint32_t FpSpillMask() const override; + bool IsCurrentParamInRegister() override; + bool IsCurrentParamOnStack() override; + ManagedRegister CurrentParamRegister() override; + FrameOffset CurrentParamStackOffset() override; // x86-64 needs to extend small return types. - bool RequiresSmallResultTypeExtension() const OVERRIDE { + bool RequiresSmallResultTypeExtension() const override { return true; } protected: - size_t NumberOfOutgoingStackArgs() OVERRIDE; + size_t NumberOfOutgoingStackArgs() override; private: DISALLOW_COPY_AND_ASSIGN(X86_64JniCallingConvention); diff --git a/compiler/linker/arm/relative_patcher_arm_base.cc b/compiler/linker/arm/relative_patcher_arm_base.cc deleted file mode 100644 index 6e0286afac..0000000000 --- a/compiler/linker/arm/relative_patcher_arm_base.cc +++ /dev/null @@ -1,522 +0,0 @@ -/* - * Copyright (C) 2015 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "linker/arm/relative_patcher_arm_base.h" - -#include "base/stl_util.h" -#include "compiled_method-inl.h" -#include "debug/method_debug_info.h" -#include "dex/dex_file_types.h" -#include "linker/linker_patch.h" -#include "linker/output_stream.h" -#include "oat.h" -#include "oat_quick_method_header.h" - -namespace art { -namespace linker { - -class ArmBaseRelativePatcher::ThunkData { - public: - ThunkData(std::vector<uint8_t> code, uint32_t max_next_offset) - : code_(std::move(code)), - offsets_(), - max_next_offset_(max_next_offset), - pending_offset_(0u) { - DCHECK(NeedsNextThunk()); // The data is constructed only when we expect to need the thunk. - } - - ThunkData(ThunkData&& src) = default; - - size_t CodeSize() const { - return code_.size(); - } - - ArrayRef<const uint8_t> GetCode() const { - return ArrayRef<const uint8_t>(code_); - } - - bool NeedsNextThunk() const { - return max_next_offset_ != 0u; - } - - uint32_t MaxNextOffset() const { - DCHECK(NeedsNextThunk()); - return max_next_offset_; - } - - void ClearMaxNextOffset() { - DCHECK(NeedsNextThunk()); - max_next_offset_ = 0u; - } - - void SetMaxNextOffset(uint32_t max_next_offset) { - DCHECK(!NeedsNextThunk()); - max_next_offset_ = max_next_offset; - } - - // Adjust the MaxNextOffset() down if needed to fit the code before the next thunk. - // Returns true if it was adjusted, false if the old value was kept. - bool MakeSpaceBefore(const ThunkData& next_thunk, size_t alignment) { - DCHECK(NeedsNextThunk()); - DCHECK(next_thunk.NeedsNextThunk()); - DCHECK_ALIGNED_PARAM(MaxNextOffset(), alignment); - DCHECK_ALIGNED_PARAM(next_thunk.MaxNextOffset(), alignment); - if (next_thunk.MaxNextOffset() - CodeSize() < MaxNextOffset()) { - max_next_offset_ = RoundDown(next_thunk.MaxNextOffset() - CodeSize(), alignment); - return true; - } else { - return false; - } - } - - uint32_t ReserveOffset(size_t offset) { - DCHECK(NeedsNextThunk()); - DCHECK_LE(offset, max_next_offset_); - max_next_offset_ = 0u; // The reserved offset should satisfy all pending references. - offsets_.push_back(offset); - return offset + CodeSize(); - } - - bool HasReservedOffset() const { - return !offsets_.empty(); - } - - uint32_t LastReservedOffset() const { - DCHECK(HasReservedOffset()); - return offsets_.back(); - } - - bool HasPendingOffset() const { - return pending_offset_ != offsets_.size(); - } - - uint32_t GetPendingOffset() const { - DCHECK(HasPendingOffset()); - return offsets_[pending_offset_]; - } - - void MarkPendingOffsetAsWritten() { - DCHECK(HasPendingOffset()); - ++pending_offset_; - } - - bool HasWrittenOffset() const { - return pending_offset_ != 0u; - } - - uint32_t LastWrittenOffset() const { - DCHECK(HasWrittenOffset()); - return offsets_[pending_offset_ - 1u]; - } - - size_t IndexOfFirstThunkAtOrAfter(uint32_t offset) const { - size_t number_of_thunks = NumberOfThunks(); - for (size_t i = 0; i != number_of_thunks; ++i) { - if (GetThunkOffset(i) >= offset) { - return i; - } - } - return number_of_thunks; - } - - size_t NumberOfThunks() const { - return offsets_.size(); - } - - uint32_t GetThunkOffset(size_t index) const { - DCHECK_LT(index, NumberOfThunks()); - return offsets_[index]; - } - - private: - std::vector<uint8_t> code_; // The code of the thunk. - std::vector<uint32_t> offsets_; // Offsets at which the thunk needs to be written. - uint32_t max_next_offset_; // The maximum offset at which the next thunk can be placed. - uint32_t pending_offset_; // The index of the next offset to write. -}; - -class ArmBaseRelativePatcher::PendingThunkComparator { - public: - bool operator()(const ThunkData* lhs, const ThunkData* rhs) const { - DCHECK(lhs->HasPendingOffset()); - DCHECK(rhs->HasPendingOffset()); - // The top of the heap is defined to contain the highest element and we want to pick - // the thunk with the smallest pending offset, so use the reverse ordering, i.e. ">". - return lhs->GetPendingOffset() > rhs->GetPendingOffset(); - } -}; - -uint32_t ArmBaseRelativePatcher::ReserveSpace(uint32_t offset, - const CompiledMethod* compiled_method, - MethodReference method_ref) { - return ReserveSpaceInternal(offset, compiled_method, method_ref, 0u); -} - -uint32_t ArmBaseRelativePatcher::ReserveSpaceEnd(uint32_t offset) { - // For multi-oat compilations (boot image), ReserveSpaceEnd() is called for each oat file. - // Since we do not know here whether this is the last file or whether the next opportunity - // to place thunk will be soon enough, we need to reserve all needed thunks now. Code for - // subsequent oat files can still call back to them. - if (!unprocessed_method_call_patches_.empty()) { - ResolveMethodCalls(offset, MethodReference(nullptr, dex::kDexNoIndex)); - } - for (ThunkData* data : unreserved_thunks_) { - uint32_t thunk_offset = CompiledCode::AlignCode(offset, instruction_set_); - offset = data->ReserveOffset(thunk_offset); - } - unreserved_thunks_.clear(); - // We also need to delay initiating the pending_thunks_ until the call to WriteThunks(). - // Check that the `pending_thunks_.capacity()` indicates that no WriteThunks() has taken place. - DCHECK_EQ(pending_thunks_.capacity(), 0u); - return offset; -} - -uint32_t ArmBaseRelativePatcher::WriteThunks(OutputStream* out, uint32_t offset) { - if (pending_thunks_.capacity() == 0u) { - if (thunks_.empty()) { - return offset; - } - // First call to WriteThunks(), prepare the thunks for writing. - pending_thunks_.reserve(thunks_.size()); - for (auto& entry : thunks_) { - ThunkData* data = &entry.second; - if (data->HasPendingOffset()) { - pending_thunks_.push_back(data); - } - } - std::make_heap(pending_thunks_.begin(), pending_thunks_.end(), PendingThunkComparator()); - } - uint32_t aligned_offset = CompiledMethod::AlignCode(offset, instruction_set_); - while (!pending_thunks_.empty() && - pending_thunks_.front()->GetPendingOffset() == aligned_offset) { - // Write alignment bytes and code. - uint32_t aligned_code_delta = aligned_offset - offset; - if (aligned_code_delta != 0u && UNLIKELY(!WriteCodeAlignment(out, aligned_code_delta))) { - return 0u; - } - if (UNLIKELY(!WriteThunk(out, pending_thunks_.front()->GetCode()))) { - return 0u; - } - offset = aligned_offset + pending_thunks_.front()->CodeSize(); - // Mark the thunk as written at the pending offset and update the `pending_thunks_` heap. - std::pop_heap(pending_thunks_.begin(), pending_thunks_.end(), PendingThunkComparator()); - pending_thunks_.back()->MarkPendingOffsetAsWritten(); - if (pending_thunks_.back()->HasPendingOffset()) { - std::push_heap(pending_thunks_.begin(), pending_thunks_.end(), PendingThunkComparator()); - } else { - pending_thunks_.pop_back(); - } - aligned_offset = CompiledMethod::AlignCode(offset, instruction_set_); - } - DCHECK(pending_thunks_.empty() || pending_thunks_.front()->GetPendingOffset() > aligned_offset); - return offset; -} - -std::vector<debug::MethodDebugInfo> ArmBaseRelativePatcher::GenerateThunkDebugInfo( - uint32_t executable_offset) { - // For multi-oat compilation (boot image), `thunks_` records thunks for all oat files. - // To return debug info for the current oat file, we must ignore thunks before the - // `executable_offset` as they are in the previous oat files and this function must be - // called before reserving thunk positions for subsequent oat files. - size_t number_of_thunks = 0u; - for (auto&& entry : thunks_) { - const ThunkData& data = entry.second; - number_of_thunks += data.NumberOfThunks() - data.IndexOfFirstThunkAtOrAfter(executable_offset); - } - std::vector<debug::MethodDebugInfo> result; - result.reserve(number_of_thunks); - for (auto&& entry : thunks_) { - const ThunkKey& key = entry.first; - const ThunkData& data = entry.second; - size_t start = data.IndexOfFirstThunkAtOrAfter(executable_offset); - if (start == data.NumberOfThunks()) { - continue; - } - // Get the base name to use for the first occurrence of the thunk. - std::string base_name = GetThunkDebugName(key); - for (size_t i = start, num = data.NumberOfThunks(); i != num; ++i) { - debug::MethodDebugInfo info = {}; - if (i == 0u) { - info.custom_name = base_name; - } else { - // Add a disambiguating tag for subsequent identical thunks. Since the `thunks_` - // keeps records also for thunks in previous oat files, names based on the thunk - // index shall be unique across the whole multi-oat output. - info.custom_name = base_name + "_" + std::to_string(i); - } - info.isa = instruction_set_; - info.is_code_address_text_relative = true; - info.code_address = data.GetThunkOffset(i) - executable_offset; - info.code_size = data.CodeSize(); - result.push_back(std::move(info)); - } - } - return result; -} - -ArmBaseRelativePatcher::ArmBaseRelativePatcher(RelativePatcherTargetProvider* provider, - InstructionSet instruction_set) - : provider_(provider), - instruction_set_(instruction_set), - thunks_(), - unprocessed_method_call_patches_(), - method_call_thunk_(nullptr), - pending_thunks_() { -} - -ArmBaseRelativePatcher::~ArmBaseRelativePatcher() { - // All work done by member destructors. -} - -uint32_t ArmBaseRelativePatcher::ReserveSpaceInternal(uint32_t offset, - const CompiledMethod* compiled_method, - MethodReference method_ref, - uint32_t max_extra_space) { - // Adjust code size for extra space required by the subclass. - uint32_t max_code_size = compiled_method->GetQuickCode().size() + max_extra_space; - uint32_t code_offset; - uint32_t next_aligned_offset; - while (true) { - code_offset = compiled_method->AlignCode(offset + sizeof(OatQuickMethodHeader)); - next_aligned_offset = compiled_method->AlignCode(code_offset + max_code_size); - if (unreserved_thunks_.empty() || - unreserved_thunks_.front()->MaxNextOffset() >= next_aligned_offset) { - break; - } - ThunkData* thunk = unreserved_thunks_.front(); - if (thunk == method_call_thunk_) { - ResolveMethodCalls(code_offset, method_ref); - // This may have changed `method_call_thunk_` data, so re-check if we need to reserve. - if (unreserved_thunks_.empty() || - unreserved_thunks_.front()->MaxNextOffset() >= next_aligned_offset) { - break; - } - // We need to process the new `front()` whether it's still the `method_call_thunk_` or not. - thunk = unreserved_thunks_.front(); - } - unreserved_thunks_.pop_front(); - uint32_t thunk_offset = CompiledCode::AlignCode(offset, instruction_set_); - offset = thunk->ReserveOffset(thunk_offset); - if (thunk == method_call_thunk_) { - // All remaining method call patches will be handled by this thunk. - DCHECK(!unprocessed_method_call_patches_.empty()); - DCHECK_LE(thunk_offset - unprocessed_method_call_patches_.front().GetPatchOffset(), - MaxPositiveDisplacement(GetMethodCallKey())); - unprocessed_method_call_patches_.clear(); - } - } - - // Process patches and check that adding thunks for the current method did not push any - // thunks (previously existing or newly added) before `next_aligned_offset`. This is - // essentially a check that we never compile a method that's too big. The calls or branches - // from the method should be able to reach beyond the end of the method and over any pending - // thunks. (The number of different thunks should be relatively low and their code short.) - ProcessPatches(compiled_method, code_offset); - CHECK(unreserved_thunks_.empty() || - unreserved_thunks_.front()->MaxNextOffset() >= next_aligned_offset); - - return offset; -} - -uint32_t ArmBaseRelativePatcher::CalculateMethodCallDisplacement(uint32_t patch_offset, - uint32_t target_offset) { - DCHECK(method_call_thunk_ != nullptr); - // Unsigned arithmetic with its well-defined overflow behavior is just fine here. - uint32_t displacement = target_offset - patch_offset; - uint32_t max_positive_displacement = MaxPositiveDisplacement(GetMethodCallKey()); - uint32_t max_negative_displacement = MaxNegativeDisplacement(GetMethodCallKey()); - // NOTE: With unsigned arithmetic we do mean to use && rather than || below. - if (displacement > max_positive_displacement && displacement < -max_negative_displacement) { - // Unwritten thunks have higher offsets, check if it's within range. - DCHECK(!method_call_thunk_->HasPendingOffset() || - method_call_thunk_->GetPendingOffset() > patch_offset); - if (method_call_thunk_->HasPendingOffset() && - method_call_thunk_->GetPendingOffset() - patch_offset <= max_positive_displacement) { - displacement = method_call_thunk_->GetPendingOffset() - patch_offset; - } else { - // We must have a previous thunk then. - DCHECK(method_call_thunk_->HasWrittenOffset()); - DCHECK_LT(method_call_thunk_->LastWrittenOffset(), patch_offset); - displacement = method_call_thunk_->LastWrittenOffset() - patch_offset; - DCHECK_GE(displacement, -max_negative_displacement); - } - } - return displacement; -} - -uint32_t ArmBaseRelativePatcher::GetThunkTargetOffset(const ThunkKey& key, uint32_t patch_offset) { - auto it = thunks_.find(key); - CHECK(it != thunks_.end()); - const ThunkData& data = it->second; - if (data.HasWrittenOffset()) { - uint32_t offset = data.LastWrittenOffset(); - DCHECK_LT(offset, patch_offset); - if (patch_offset - offset <= MaxNegativeDisplacement(key)) { - return offset; - } - } - DCHECK(data.HasPendingOffset()); - uint32_t offset = data.GetPendingOffset(); - DCHECK_GT(offset, patch_offset); - DCHECK_LE(offset - patch_offset, MaxPositiveDisplacement(key)); - return offset; -} - -ArmBaseRelativePatcher::ThunkKey ArmBaseRelativePatcher::GetMethodCallKey() { - return ThunkKey(ThunkType::kMethodCall); -} - -ArmBaseRelativePatcher::ThunkKey ArmBaseRelativePatcher::GetBakerThunkKey( - const LinkerPatch& patch) { - DCHECK_EQ(patch.GetType(), LinkerPatch::Type::kBakerReadBarrierBranch); - return ThunkKey(ThunkType::kBakerReadBarrier, - patch.GetBakerCustomValue1(), - patch.GetBakerCustomValue2()); -} - -void ArmBaseRelativePatcher::ProcessPatches(const CompiledMethod* compiled_method, - uint32_t code_offset) { - for (const LinkerPatch& patch : compiled_method->GetPatches()) { - uint32_t patch_offset = code_offset + patch.LiteralOffset(); - ThunkKey key(static_cast<ThunkType>(-1)); - ThunkData* old_data = nullptr; - if (patch.GetType() == LinkerPatch::Type::kCallRelative) { - key = GetMethodCallKey(); - unprocessed_method_call_patches_.emplace_back(patch_offset, patch.TargetMethod()); - if (method_call_thunk_ == nullptr) { - uint32_t max_next_offset = CalculateMaxNextOffset(patch_offset, key); - auto it = thunks_.Put(key, ThunkData(CompileThunk(key), max_next_offset)); - method_call_thunk_ = &it->second; - AddUnreservedThunk(method_call_thunk_); - } else { - old_data = method_call_thunk_; - } - } else if (patch.GetType() == LinkerPatch::Type::kBakerReadBarrierBranch) { - key = GetBakerThunkKey(patch); - auto lb = thunks_.lower_bound(key); - if (lb == thunks_.end() || thunks_.key_comp()(key, lb->first)) { - uint32_t max_next_offset = CalculateMaxNextOffset(patch_offset, key); - auto it = thunks_.PutBefore(lb, key, ThunkData(CompileThunk(key), max_next_offset)); - AddUnreservedThunk(&it->second); - } else { - old_data = &lb->second; - } - } - if (old_data != nullptr) { - // Shared path where an old thunk may need an update. - DCHECK(key.GetType() != static_cast<ThunkType>(-1)); - DCHECK(!old_data->HasReservedOffset() || old_data->LastReservedOffset() < patch_offset); - if (old_data->NeedsNextThunk()) { - // Patches for a method are ordered by literal offset, so if we still need to place - // this thunk for a previous patch, that thunk shall be in range for this patch. - DCHECK_LE(old_data->MaxNextOffset(), CalculateMaxNextOffset(patch_offset, key)); - } else { - if (!old_data->HasReservedOffset() || - patch_offset - old_data->LastReservedOffset() > MaxNegativeDisplacement(key)) { - old_data->SetMaxNextOffset(CalculateMaxNextOffset(patch_offset, key)); - AddUnreservedThunk(old_data); - } - } - } - } -} - -void ArmBaseRelativePatcher::AddUnreservedThunk(ThunkData* data) { - DCHECK(data->NeedsNextThunk()); - size_t index = unreserved_thunks_.size(); - while (index != 0u && data->MaxNextOffset() < unreserved_thunks_[index - 1u]->MaxNextOffset()) { - --index; - } - unreserved_thunks_.insert(unreserved_thunks_.begin() + index, data); - // We may need to update the max next offset(s) if the thunk code would not fit. - size_t alignment = GetInstructionSetAlignment(instruction_set_); - if (index + 1u != unreserved_thunks_.size()) { - // Note: Ignore the return value as we need to process previous thunks regardless. - data->MakeSpaceBefore(*unreserved_thunks_[index + 1u], alignment); - } - // Make space for previous thunks. Once we find a pending thunk that does - // not need an adjustment, we can stop. - while (index != 0u && unreserved_thunks_[index - 1u]->MakeSpaceBefore(*data, alignment)) { - --index; - data = unreserved_thunks_[index]; - } -} - -void ArmBaseRelativePatcher::ResolveMethodCalls(uint32_t quick_code_offset, - MethodReference method_ref) { - DCHECK(!unreserved_thunks_.empty()); - DCHECK(!unprocessed_method_call_patches_.empty()); - DCHECK(method_call_thunk_ != nullptr); - uint32_t max_positive_displacement = MaxPositiveDisplacement(GetMethodCallKey()); - uint32_t max_negative_displacement = MaxNegativeDisplacement(GetMethodCallKey()); - // Process as many patches as possible, stop only on unresolved targets or calls too far back. - while (!unprocessed_method_call_patches_.empty()) { - MethodReference target_method = unprocessed_method_call_patches_.front().GetTargetMethod(); - uint32_t patch_offset = unprocessed_method_call_patches_.front().GetPatchOffset(); - DCHECK(!method_call_thunk_->HasReservedOffset() || - method_call_thunk_->LastReservedOffset() <= patch_offset); - if (!method_call_thunk_->HasReservedOffset() || - patch_offset - method_call_thunk_->LastReservedOffset() > max_negative_displacement) { - // No previous thunk in range, check if we can reach the target directly. - if (target_method == method_ref) { - DCHECK_GT(quick_code_offset, patch_offset); - if (quick_code_offset - patch_offset > max_positive_displacement) { - break; - } - } else { - auto result = provider_->FindMethodOffset(target_method); - if (!result.first) { - break; - } - uint32_t target_offset = result.second - CompiledCode::CodeDelta(instruction_set_); - if (target_offset >= patch_offset) { - DCHECK_LE(target_offset - patch_offset, max_positive_displacement); - } else if (patch_offset - target_offset > max_negative_displacement) { - break; - } - } - } - unprocessed_method_call_patches_.pop_front(); - } - if (!unprocessed_method_call_patches_.empty()) { - // Try to adjust the max next offset in `method_call_thunk_`. Do this conservatively only if - // the thunk shall be at the end of the `unreserved_thunks_` to avoid dealing with overlaps. - uint32_t new_max_next_offset = - unprocessed_method_call_patches_.front().GetPatchOffset() + max_positive_displacement; - if (new_max_next_offset > - unreserved_thunks_.back()->MaxNextOffset() + unreserved_thunks_.back()->CodeSize()) { - method_call_thunk_->ClearMaxNextOffset(); - method_call_thunk_->SetMaxNextOffset(new_max_next_offset); - if (method_call_thunk_ != unreserved_thunks_.back()) { - RemoveElement(unreserved_thunks_, method_call_thunk_); - unreserved_thunks_.push_back(method_call_thunk_); - } - } - } else { - // We have resolved all method calls, we do not need a new thunk anymore. - method_call_thunk_->ClearMaxNextOffset(); - RemoveElement(unreserved_thunks_, method_call_thunk_); - } -} - -inline uint32_t ArmBaseRelativePatcher::CalculateMaxNextOffset(uint32_t patch_offset, - const ThunkKey& key) { - return RoundDown(patch_offset + MaxPositiveDisplacement(key), - GetInstructionSetAlignment(instruction_set_)); -} - -} // namespace linker -} // namespace art diff --git a/compiler/linker/arm/relative_patcher_arm_base.h b/compiler/linker/arm/relative_patcher_arm_base.h deleted file mode 100644 index ee09bf96b3..0000000000 --- a/compiler/linker/arm/relative_patcher_arm_base.h +++ /dev/null @@ -1,157 +0,0 @@ -/* - * Copyright (C) 2015 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ART_COMPILER_LINKER_ARM_RELATIVE_PATCHER_ARM_BASE_H_ -#define ART_COMPILER_LINKER_ARM_RELATIVE_PATCHER_ARM_BASE_H_ - -#include <deque> -#include <vector> - -#include "base/safe_map.h" -#include "dex/method_reference.h" -#include "linker/relative_patcher.h" - -namespace art { -namespace linker { - -class ArmBaseRelativePatcher : public RelativePatcher { - public: - uint32_t ReserveSpace(uint32_t offset, - const CompiledMethod* compiled_method, - MethodReference method_ref) OVERRIDE; - uint32_t ReserveSpaceEnd(uint32_t offset) OVERRIDE; - uint32_t WriteThunks(OutputStream* out, uint32_t offset) OVERRIDE; - std::vector<debug::MethodDebugInfo> GenerateThunkDebugInfo(uint32_t executable_offset) OVERRIDE; - - protected: - ArmBaseRelativePatcher(RelativePatcherTargetProvider* provider, - InstructionSet instruction_set); - ~ArmBaseRelativePatcher(); - - enum class ThunkType { - kMethodCall, // Method call thunk. - kBakerReadBarrier, // Baker read barrier. - }; - - class ThunkKey { - public: - explicit ThunkKey(ThunkType type, uint32_t custom_value1 = 0u, uint32_t custom_value2 = 0u) - : type_(type), custom_value1_(custom_value1), custom_value2_(custom_value2) { } - - ThunkType GetType() const { - return type_; - } - - uint32_t GetCustomValue1() const { - return custom_value1_; - } - - uint32_t GetCustomValue2() const { - return custom_value2_; - } - - private: - ThunkType type_; - uint32_t custom_value1_; - uint32_t custom_value2_; - }; - - class ThunkKeyCompare { - public: - bool operator()(const ThunkKey& lhs, const ThunkKey& rhs) const { - if (lhs.GetType() != rhs.GetType()) { - return lhs.GetType() < rhs.GetType(); - } - if (lhs.GetCustomValue1() != rhs.GetCustomValue1()) { - return lhs.GetCustomValue1() < rhs.GetCustomValue1(); - } - return lhs.GetCustomValue2() < rhs.GetCustomValue2(); - } - }; - - static ThunkKey GetMethodCallKey(); - static ThunkKey GetBakerThunkKey(const LinkerPatch& patch); - - uint32_t ReserveSpaceInternal(uint32_t offset, - const CompiledMethod* compiled_method, - MethodReference method_ref, - uint32_t max_extra_space); - uint32_t GetThunkTargetOffset(const ThunkKey& key, uint32_t patch_offset); - - uint32_t CalculateMethodCallDisplacement(uint32_t patch_offset, - uint32_t target_offset); - - virtual std::vector<uint8_t> CompileThunk(const ThunkKey& key) = 0; - virtual std::string GetThunkDebugName(const ThunkKey& key) = 0; - virtual uint32_t MaxPositiveDisplacement(const ThunkKey& key) = 0; - virtual uint32_t MaxNegativeDisplacement(const ThunkKey& key) = 0; - - private: - class ThunkData; - - void ProcessPatches(const CompiledMethod* compiled_method, uint32_t code_offset); - void AddUnreservedThunk(ThunkData* data); - - void ResolveMethodCalls(uint32_t quick_code_offset, MethodReference method_ref); - - uint32_t CalculateMaxNextOffset(uint32_t patch_offset, const ThunkKey& key); - - RelativePatcherTargetProvider* const provider_; - const InstructionSet instruction_set_; - - // The data for all thunks. - // SafeMap<> nodes don't move after being inserted, so we can use direct pointers to the data. - using ThunkMap = SafeMap<ThunkKey, ThunkData, ThunkKeyCompare>; - ThunkMap thunks_; - - // ReserveSpace() tracks unprocessed method call patches. These may be resolved later. - class UnprocessedMethodCallPatch { - public: - UnprocessedMethodCallPatch(uint32_t patch_offset, MethodReference target_method) - : patch_offset_(patch_offset), target_method_(target_method) { } - - uint32_t GetPatchOffset() const { - return patch_offset_; - } - - MethodReference GetTargetMethod() const { - return target_method_; - } - - private: - uint32_t patch_offset_; - MethodReference target_method_; - }; - std::deque<UnprocessedMethodCallPatch> unprocessed_method_call_patches_; - // Once we have compiled a method call thunk, cache pointer to the data. - ThunkData* method_call_thunk_; - - // Thunks - std::deque<ThunkData*> unreserved_thunks_; - - class PendingThunkComparator; - std::vector<ThunkData*> pending_thunks_; // Heap with the PendingThunkComparator. - - friend class Arm64RelativePatcherTest; - friend class Thumb2RelativePatcherTest; - - DISALLOW_COPY_AND_ASSIGN(ArmBaseRelativePatcher); -}; - -} // namespace linker -} // namespace art - -#endif // ART_COMPILER_LINKER_ARM_RELATIVE_PATCHER_ARM_BASE_H_ diff --git a/compiler/linker/arm/relative_patcher_thumb2.cc b/compiler/linker/arm/relative_patcher_thumb2.cc deleted file mode 100644 index 78755176e4..0000000000 --- a/compiler/linker/arm/relative_patcher_thumb2.cc +++ /dev/null @@ -1,484 +0,0 @@ -/* - * Copyright (C) 2015 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "linker/arm/relative_patcher_thumb2.h" - -#include <sstream> - -#include "arch/arm/asm_support_arm.h" -#include "art_method.h" -#include "base/bit_utils.h" -#include "compiled_method.h" -#include "entrypoints/quick/quick_entrypoints_enum.h" -#include "linker/linker_patch.h" -#include "lock_word.h" -#include "mirror/array-inl.h" -#include "mirror/object.h" -#include "read_barrier.h" -#include "utils/arm/assembler_arm_vixl.h" - -namespace art { -namespace linker { - -// PC displacement from patch location; Thumb2 PC is always at instruction address + 4. -static constexpr int32_t kPcDisplacement = 4; - -// Maximum positive and negative displacement for method call measured from the patch location. -// (Signed 25 bit displacement with the last bit 0 has range [-2^24, 2^24-2] measured from -// the Thumb2 PC pointing right after the BL, i.e. 4 bytes later than the patch location.) -constexpr uint32_t kMaxMethodCallPositiveDisplacement = (1u << 24) - 2 + kPcDisplacement; -constexpr uint32_t kMaxMethodCallNegativeDisplacement = (1u << 24) - kPcDisplacement; - -// Maximum positive and negative displacement for a conditional branch measured from the patch -// location. (Signed 21 bit displacement with the last bit 0 has range [-2^20, 2^20-2] measured -// from the Thumb2 PC pointing right after the B.cond, i.e. 4 bytes later than the patch location.) -constexpr uint32_t kMaxBcondPositiveDisplacement = (1u << 20) - 2u + kPcDisplacement; -constexpr uint32_t kMaxBcondNegativeDisplacement = (1u << 20) - kPcDisplacement; - -Thumb2RelativePatcher::Thumb2RelativePatcher(RelativePatcherTargetProvider* provider) - : ArmBaseRelativePatcher(provider, InstructionSet::kThumb2) { -} - -void Thumb2RelativePatcher::PatchCall(std::vector<uint8_t>* code, - uint32_t literal_offset, - uint32_t patch_offset, - uint32_t target_offset) { - DCHECK_LE(literal_offset + 4u, code->size()); - DCHECK_EQ(literal_offset & 1u, 0u); - DCHECK_EQ(patch_offset & 1u, 0u); - DCHECK_EQ(target_offset & 1u, 1u); // Thumb2 mode bit. - uint32_t displacement = CalculateMethodCallDisplacement(patch_offset, target_offset & ~1u); - displacement -= kPcDisplacement; // The base PC is at the end of the 4-byte patch. - DCHECK_EQ(displacement & 1u, 0u); - DCHECK((displacement >> 24) == 0u || (displacement >> 24) == 255u); // 25-bit signed. - uint32_t signbit = (displacement >> 31) & 0x1; - uint32_t i1 = (displacement >> 23) & 0x1; - uint32_t i2 = (displacement >> 22) & 0x1; - uint32_t imm10 = (displacement >> 12) & 0x03ff; - uint32_t imm11 = (displacement >> 1) & 0x07ff; - uint32_t j1 = i1 ^ (signbit ^ 1); - uint32_t j2 = i2 ^ (signbit ^ 1); - uint32_t value = (signbit << 26) | (j1 << 13) | (j2 << 11) | (imm10 << 16) | imm11; - value |= 0xf000d000; // BL - - // Check that we're just overwriting an existing BL. - DCHECK_EQ(GetInsn32(code, literal_offset) & 0xf800d000, 0xf000d000); - // Write the new BL. - SetInsn32(code, literal_offset, value); -} - -void Thumb2RelativePatcher::PatchPcRelativeReference(std::vector<uint8_t>* code, - const LinkerPatch& patch, - uint32_t patch_offset, - uint32_t target_offset) { - uint32_t literal_offset = patch.LiteralOffset(); - uint32_t pc_literal_offset = patch.PcInsnOffset(); - uint32_t pc_base = patch_offset + (pc_literal_offset - literal_offset) + 4u /* PC adjustment */; - uint32_t diff = target_offset - pc_base; - - uint32_t insn = GetInsn32(code, literal_offset); - DCHECK_EQ(insn & 0xff7ff0ffu, 0xf2400000u); // MOVW/MOVT, unpatched (imm16 == 0). - uint32_t diff16 = ((insn & 0x00800000u) != 0u) ? (diff >> 16) : (diff & 0xffffu); - uint32_t imm4 = (diff16 >> 12) & 0xfu; - uint32_t imm = (diff16 >> 11) & 0x1u; - uint32_t imm3 = (diff16 >> 8) & 0x7u; - uint32_t imm8 = diff16 & 0xffu; - insn = (insn & 0xfbf08f00u) | (imm << 26) | (imm4 << 16) | (imm3 << 12) | imm8; - SetInsn32(code, literal_offset, insn); -} - -void Thumb2RelativePatcher::PatchBakerReadBarrierBranch(std::vector<uint8_t>* code, - const LinkerPatch& patch, - uint32_t patch_offset) { - DCHECK_ALIGNED(patch_offset, 2u); - uint32_t literal_offset = patch.LiteralOffset(); - DCHECK_ALIGNED(literal_offset, 2u); - DCHECK_LT(literal_offset, code->size()); - uint32_t insn = GetInsn32(code, literal_offset); - DCHECK_EQ(insn, 0xf0408000); // BNE +0 (unpatched) - ThunkKey key = GetBakerThunkKey(patch); - if (kIsDebugBuild) { - const uint32_t encoded_data = key.GetCustomValue1(); - BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data); - // Check that the next instruction matches the expected LDR. - switch (kind) { - case BakerReadBarrierKind::kField: { - BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data); - if (width == BakerReadBarrierWidth::kWide) { - DCHECK_GE(code->size() - literal_offset, 8u); - uint32_t next_insn = GetInsn32(code, literal_offset + 4u); - // LDR (immediate), encoding T3, with correct base_reg. - CheckValidReg((next_insn >> 12) & 0xfu); // Check destination register. - const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data); - CHECK_EQ(next_insn & 0xffff0000u, 0xf8d00000u | (base_reg << 16)); - } else { - DCHECK_GE(code->size() - literal_offset, 6u); - uint32_t next_insn = GetInsn16(code, literal_offset + 4u); - // LDR (immediate), encoding T1, with correct base_reg. - CheckValidReg(next_insn & 0x7u); // Check destination register. - const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data); - CHECK_EQ(next_insn & 0xf838u, 0x6800u | (base_reg << 3)); - } - break; - } - case BakerReadBarrierKind::kArray: { - DCHECK_GE(code->size() - literal_offset, 8u); - uint32_t next_insn = GetInsn32(code, literal_offset + 4u); - // LDR (register) with correct base_reg, S=1 and option=011 (LDR Wt, [Xn, Xm, LSL #2]). - CheckValidReg((next_insn >> 12) & 0xfu); // Check destination register. - const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data); - CHECK_EQ(next_insn & 0xffff0ff0u, 0xf8500020u | (base_reg << 16)); - CheckValidReg(next_insn & 0xf); // Check index register - break; - } - case BakerReadBarrierKind::kGcRoot: { - BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data); - if (width == BakerReadBarrierWidth::kWide) { - DCHECK_GE(literal_offset, 4u); - uint32_t prev_insn = GetInsn32(code, literal_offset - 4u); - // LDR (immediate), encoding T3, with correct root_reg. - const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data); - CHECK_EQ(prev_insn & 0xfff0f000u, 0xf8d00000u | (root_reg << 12)); - } else { - DCHECK_GE(literal_offset, 2u); - uint32_t prev_insn = GetInsn16(code, literal_offset - 2u); - // LDR (immediate), encoding T1, with correct root_reg. - const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data); - CHECK_EQ(prev_insn & 0xf807u, 0x6800u | root_reg); - } - break; - } - default: - LOG(FATAL) << "Unexpected type: " << static_cast<uint32_t>(key.GetType()); - UNREACHABLE(); - } - } - uint32_t target_offset = GetThunkTargetOffset(key, patch_offset); - DCHECK_ALIGNED(target_offset, 4u); - uint32_t disp = target_offset - (patch_offset + kPcDisplacement); - DCHECK((disp >> 20) == 0u || (disp >> 20) == 0xfffu); // 21-bit signed. - insn |= ((disp << (26 - 20)) & 0x04000000u) | // Shift bit 20 to 26, "S". - ((disp >> (19 - 11)) & 0x00000800u) | // Shift bit 19 to 13, "J1". - ((disp >> (18 - 13)) & 0x00002000u) | // Shift bit 18 to 11, "J2". - ((disp << (16 - 12)) & 0x003f0000u) | // Shift bits 12-17 to 16-25, "imm6". - ((disp >> (1 - 0)) & 0x000007ffu); // Shift bits 1-12 to 0-11, "imm11". - SetInsn32(code, literal_offset, insn); -} - -#define __ assembler.GetVIXLAssembler()-> - -static void EmitGrayCheckAndFastPath(arm::ArmVIXLAssembler& assembler, - vixl::aarch32::Register base_reg, - vixl::aarch32::MemOperand& lock_word, - vixl::aarch32::Label* slow_path, - int32_t raw_ldr_offset) { - using namespace vixl::aarch32; // NOLINT(build/namespaces) - // Load the lock word containing the rb_state. - __ Ldr(ip, lock_word); - // Given the numeric representation, it's enough to check the low bit of the rb_state. - static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0"); - static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); - __ Tst(ip, Operand(LockWord::kReadBarrierStateMaskShifted)); - __ B(ne, slow_path, /* is_far_target */ false); - __ Add(lr, lr, raw_ldr_offset); - // Introduce a dependency on the lock_word including rb_state, - // to prevent load-load reordering, and without using - // a memory barrier (which would be more expensive). - __ Add(base_reg, base_reg, Operand(ip, LSR, 32)); - __ Bx(lr); // And return back to the function. - // Note: The fake dependency is unnecessary for the slow path. -} - -// Load the read barrier introspection entrypoint in register `entrypoint` -static void LoadReadBarrierMarkIntrospectionEntrypoint(arm::ArmVIXLAssembler& assembler, - vixl::aarch32::Register entrypoint) { - using vixl::aarch32::MemOperand; - using vixl::aarch32::ip; - // Thread Register. - const vixl::aarch32::Register tr = vixl::aarch32::r9; - - // The register where the read barrier introspection entrypoint is loaded - // is fixed: `Thumb2RelativePatcher::kBakerCcEntrypointRegister` (R4). - DCHECK_EQ(entrypoint.GetCode(), Thumb2RelativePatcher::kBakerCcEntrypointRegister); - // entrypoint = Thread::Current()->pReadBarrierMarkReg12, i.e. pReadBarrierMarkIntrospection. - DCHECK_EQ(ip.GetCode(), 12u); - const int32_t entry_point_offset = - Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ip.GetCode()); - __ Ldr(entrypoint, MemOperand(tr, entry_point_offset)); -} - -void Thumb2RelativePatcher::CompileBakerReadBarrierThunk(arm::ArmVIXLAssembler& assembler, - uint32_t encoded_data) { - using namespace vixl::aarch32; // NOLINT(build/namespaces) - BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data); - switch (kind) { - case BakerReadBarrierKind::kField: { - // Check if the holder is gray and, if not, add fake dependency to the base register - // and return to the LDR instruction to load the reference. Otherwise, use introspection - // to load the reference and call the entrypoint (in kBakerCcEntrypointRegister) - // that performs further checks on the reference and marks it if needed. - Register base_reg(BakerReadBarrierFirstRegField::Decode(encoded_data)); - CheckValidReg(base_reg.GetCode()); - Register holder_reg(BakerReadBarrierSecondRegField::Decode(encoded_data)); - CheckValidReg(holder_reg.GetCode()); - BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data); - UseScratchRegisterScope temps(assembler.GetVIXLAssembler()); - temps.Exclude(ip); - // If base_reg differs from holder_reg, the offset was too large and we must have - // emitted an explicit null check before the load. Otherwise, we need to null-check - // the holder as we do not necessarily do that check before going to the thunk. - vixl::aarch32::Label throw_npe; - if (holder_reg.Is(base_reg)) { - __ CompareAndBranchIfZero(holder_reg, &throw_npe, /* is_far_target */ false); - } - vixl::aarch32::Label slow_path; - MemOperand lock_word(holder_reg, mirror::Object::MonitorOffset().Int32Value()); - const int32_t raw_ldr_offset = (width == BakerReadBarrierWidth::kWide) - ? BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET - : BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET; - EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path, raw_ldr_offset); - __ Bind(&slow_path); - const int32_t ldr_offset = /* Thumb state adjustment (LR contains Thumb state). */ -1 + - raw_ldr_offset; - Register ep_reg(kBakerCcEntrypointRegister); - LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ep_reg); - if (width == BakerReadBarrierWidth::kWide) { - MemOperand ldr_half_address(lr, ldr_offset + 2); - __ Ldrh(ip, ldr_half_address); // Load the LDR immediate half-word with "Rt | imm12". - __ Ubfx(ip, ip, 0, 12); // Extract the offset imm12. - __ Ldr(ip, MemOperand(base_reg, ip)); // Load the reference. - } else { - MemOperand ldr_address(lr, ldr_offset); - __ Ldrh(ip, ldr_address); // Load the LDR immediate, encoding T1. - __ Add(ep_reg, // Adjust the entrypoint address to the entrypoint - ep_reg, // for narrow LDR. - Operand(BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_ENTRYPOINT_OFFSET)); - __ Ubfx(ip, ip, 6, 5); // Extract the imm5, i.e. offset / 4. - __ Ldr(ip, MemOperand(base_reg, ip, LSL, 2)); // Load the reference. - } - // Do not unpoison. With heap poisoning enabled, the entrypoint expects a poisoned reference. - __ Bx(ep_reg); // Jump to the entrypoint. - if (holder_reg.Is(base_reg)) { - // Add null check slow path. The stack map is at the address pointed to by LR. - __ Bind(&throw_npe); - int32_t offset = GetThreadOffset<kArmPointerSize>(kQuickThrowNullPointer).Int32Value(); - __ Ldr(ip, MemOperand(/* Thread* */ vixl::aarch32::r9, offset)); - __ Bx(ip); - } - break; - } - case BakerReadBarrierKind::kArray: { - Register base_reg(BakerReadBarrierFirstRegField::Decode(encoded_data)); - CheckValidReg(base_reg.GetCode()); - DCHECK_EQ(kInvalidEncodedReg, BakerReadBarrierSecondRegField::Decode(encoded_data)); - DCHECK(BakerReadBarrierWidthField::Decode(encoded_data) == BakerReadBarrierWidth::kWide); - UseScratchRegisterScope temps(assembler.GetVIXLAssembler()); - temps.Exclude(ip); - vixl::aarch32::Label slow_path; - int32_t data_offset = - mirror::Array::DataOffset(Primitive::ComponentSize(Primitive::kPrimNot)).Int32Value(); - MemOperand lock_word(base_reg, mirror::Object::MonitorOffset().Int32Value() - data_offset); - DCHECK_LT(lock_word.GetOffsetImmediate(), 0); - const int32_t raw_ldr_offset = BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET; - EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path, raw_ldr_offset); - __ Bind(&slow_path); - const int32_t ldr_offset = /* Thumb state adjustment (LR contains Thumb state). */ -1 + - raw_ldr_offset; - MemOperand ldr_address(lr, ldr_offset + 2); - __ Ldrb(ip, ldr_address); // Load the LDR (register) byte with "00 | imm2 | Rm", - // i.e. Rm+32 because the scale in imm2 is 2. - Register ep_reg(kBakerCcEntrypointRegister); - LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ep_reg); - __ Bfi(ep_reg, ip, 3, 6); // Insert ip to the entrypoint address to create - // a switch case target based on the index register. - __ Mov(ip, base_reg); // Move the base register to ip0. - __ Bx(ep_reg); // Jump to the entrypoint's array switch case. - break; - } - case BakerReadBarrierKind::kGcRoot: { - // Check if the reference needs to be marked and if so (i.e. not null, not marked yet - // and it does not have a forwarding address), call the correct introspection entrypoint; - // otherwise return the reference (or the extracted forwarding address). - // There is no gray bit check for GC roots. - Register root_reg(BakerReadBarrierFirstRegField::Decode(encoded_data)); - CheckValidReg(root_reg.GetCode()); - DCHECK_EQ(kInvalidEncodedReg, BakerReadBarrierSecondRegField::Decode(encoded_data)); - BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data); - UseScratchRegisterScope temps(assembler.GetVIXLAssembler()); - temps.Exclude(ip); - vixl::aarch32::Label return_label, not_marked, forwarding_address; - __ CompareAndBranchIfZero(root_reg, &return_label, /* is_far_target */ false); - MemOperand lock_word(root_reg, mirror::Object::MonitorOffset().Int32Value()); - __ Ldr(ip, lock_word); - __ Tst(ip, LockWord::kMarkBitStateMaskShifted); - __ B(eq, ¬_marked); - __ Bind(&return_label); - __ Bx(lr); - __ Bind(¬_marked); - static_assert(LockWord::kStateShift == 30 && LockWord::kStateForwardingAddress == 3, - "To use 'CMP ip, #modified-immediate; BHS', we need the lock word state in " - " the highest bits and the 'forwarding address' state to have all bits set"); - __ Cmp(ip, Operand(0xc0000000)); - __ B(hs, &forwarding_address); - Register ep_reg(kBakerCcEntrypointRegister); - LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ep_reg); - // Adjust the art_quick_read_barrier_mark_introspection address in kBakerCcEntrypointRegister - // to art_quick_read_barrier_mark_introspection_gc_roots. - int32_t entrypoint_offset = (width == BakerReadBarrierWidth::kWide) - ? BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_ENTRYPOINT_OFFSET - : BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_ENTRYPOINT_OFFSET; - __ Add(ep_reg, ep_reg, Operand(entrypoint_offset)); - __ Mov(ip, root_reg); - __ Bx(ep_reg); - __ Bind(&forwarding_address); - __ Lsl(root_reg, ip, LockWord::kForwardingAddressShift); - __ Bx(lr); - break; - } - default: - LOG(FATAL) << "Unexpected kind: " << static_cast<uint32_t>(kind); - UNREACHABLE(); - } -} - -std::vector<uint8_t> Thumb2RelativePatcher::CompileThunk(const ThunkKey& key) { - ArenaPool pool; - ArenaAllocator allocator(&pool); - arm::ArmVIXLAssembler assembler(&allocator); - - switch (key.GetType()) { - case ThunkType::kMethodCall: - // The thunk just uses the entry point in the ArtMethod. This works even for calls - // to the generic JNI and interpreter trampolines. - assembler.LoadFromOffset( - arm::kLoadWord, - vixl::aarch32::pc, - vixl::aarch32::r0, - ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize).Int32Value()); - __ Bkpt(0); - break; - case ThunkType::kBakerReadBarrier: - CompileBakerReadBarrierThunk(assembler, key.GetCustomValue1()); - break; - } - - assembler.FinalizeCode(); - std::vector<uint8_t> thunk_code(assembler.CodeSize()); - MemoryRegion code(thunk_code.data(), thunk_code.size()); - assembler.FinalizeInstructions(code); - return thunk_code; -} - -std::string Thumb2RelativePatcher::GetThunkDebugName(const ThunkKey& key) { - switch (key.GetType()) { - case ThunkType::kMethodCall: - return "MethodCallThunk"; - - case ThunkType::kBakerReadBarrier: { - uint32_t encoded_data = key.GetCustomValue1(); - BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data); - std::ostringstream oss; - oss << "BakerReadBarrierThunk"; - switch (kind) { - case BakerReadBarrierKind::kField: - oss << "Field"; - if (BakerReadBarrierWidthField::Decode(encoded_data) == BakerReadBarrierWidth::kWide) { - oss << "Wide"; - } - oss << "_r" << BakerReadBarrierFirstRegField::Decode(encoded_data) - << "_r" << BakerReadBarrierSecondRegField::Decode(encoded_data); - break; - case BakerReadBarrierKind::kArray: - oss << "Array_r" << BakerReadBarrierFirstRegField::Decode(encoded_data); - DCHECK_EQ(kInvalidEncodedReg, BakerReadBarrierSecondRegField::Decode(encoded_data)); - DCHECK(BakerReadBarrierWidthField::Decode(encoded_data) == BakerReadBarrierWidth::kWide); - break; - case BakerReadBarrierKind::kGcRoot: - oss << "GcRoot"; - if (BakerReadBarrierWidthField::Decode(encoded_data) == BakerReadBarrierWidth::kWide) { - oss << "Wide"; - } - oss << "_r" << BakerReadBarrierFirstRegField::Decode(encoded_data); - DCHECK_EQ(kInvalidEncodedReg, BakerReadBarrierSecondRegField::Decode(encoded_data)); - break; - } - return oss.str(); - } - } -} - -#undef __ - -uint32_t Thumb2RelativePatcher::MaxPositiveDisplacement(const ThunkKey& key) { - switch (key.GetType()) { - case ThunkType::kMethodCall: - return kMaxMethodCallPositiveDisplacement; - case ThunkType::kBakerReadBarrier: - return kMaxBcondPositiveDisplacement; - } -} - -uint32_t Thumb2RelativePatcher::MaxNegativeDisplacement(const ThunkKey& key) { - switch (key.GetType()) { - case ThunkType::kMethodCall: - return kMaxMethodCallNegativeDisplacement; - case ThunkType::kBakerReadBarrier: - return kMaxBcondNegativeDisplacement; - } -} - -void Thumb2RelativePatcher::SetInsn32(std::vector<uint8_t>* code, uint32_t offset, uint32_t value) { - DCHECK_LE(offset + 4u, code->size()); - DCHECK_ALIGNED(offset, 2u); - uint8_t* addr = &(*code)[offset]; - addr[0] = (value >> 16) & 0xff; - addr[1] = (value >> 24) & 0xff; - addr[2] = (value >> 0) & 0xff; - addr[3] = (value >> 8) & 0xff; -} - -uint32_t Thumb2RelativePatcher::GetInsn32(ArrayRef<const uint8_t> code, uint32_t offset) { - DCHECK_LE(offset + 4u, code.size()); - DCHECK_ALIGNED(offset, 2u); - const uint8_t* addr = &code[offset]; - return - (static_cast<uint32_t>(addr[0]) << 16) + - (static_cast<uint32_t>(addr[1]) << 24) + - (static_cast<uint32_t>(addr[2]) << 0)+ - (static_cast<uint32_t>(addr[3]) << 8); -} - -template <typename Vector> -uint32_t Thumb2RelativePatcher::GetInsn32(Vector* code, uint32_t offset) { - static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type"); - return GetInsn32(ArrayRef<const uint8_t>(*code), offset); -} - -uint32_t Thumb2RelativePatcher::GetInsn16(ArrayRef<const uint8_t> code, uint32_t offset) { - DCHECK_LE(offset + 2u, code.size()); - DCHECK_ALIGNED(offset, 2u); - const uint8_t* addr = &code[offset]; - return (static_cast<uint32_t>(addr[0]) << 0) + (static_cast<uint32_t>(addr[1]) << 8); -} - -template <typename Vector> -uint32_t Thumb2RelativePatcher::GetInsn16(Vector* code, uint32_t offset) { - static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type"); - return GetInsn16(ArrayRef<const uint8_t>(*code), offset); -} - -} // namespace linker -} // namespace art diff --git a/compiler/linker/arm/relative_patcher_thumb2.h b/compiler/linker/arm/relative_patcher_thumb2.h deleted file mode 100644 index 68386c00f4..0000000000 --- a/compiler/linker/arm/relative_patcher_thumb2.h +++ /dev/null @@ -1,147 +0,0 @@ -/* - * Copyright (C) 2015 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ART_COMPILER_LINKER_ARM_RELATIVE_PATCHER_THUMB2_H_ -#define ART_COMPILER_LINKER_ARM_RELATIVE_PATCHER_THUMB2_H_ - -#include "arch/arm/registers_arm.h" -#include "base/array_ref.h" -#include "base/bit_field.h" -#include "base/bit_utils.h" -#include "linker/arm/relative_patcher_arm_base.h" - -namespace art { - -namespace arm { -class ArmVIXLAssembler; -} // namespace arm - -namespace linker { - -class Thumb2RelativePatcher FINAL : public ArmBaseRelativePatcher { - public: - static constexpr uint32_t kBakerCcEntrypointRegister = 4u; - - static uint32_t EncodeBakerReadBarrierFieldData(uint32_t base_reg, - uint32_t holder_reg, - bool narrow) { - CheckValidReg(base_reg); - CheckValidReg(holder_reg); - DCHECK(!narrow || base_reg < 8u) << base_reg; - BakerReadBarrierWidth width = - narrow ? BakerReadBarrierWidth::kNarrow : BakerReadBarrierWidth::kWide; - return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kField) | - BakerReadBarrierFirstRegField::Encode(base_reg) | - BakerReadBarrierSecondRegField::Encode(holder_reg) | - BakerReadBarrierWidthField::Encode(width); - } - - static uint32_t EncodeBakerReadBarrierArrayData(uint32_t base_reg) { - CheckValidReg(base_reg); - return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kArray) | - BakerReadBarrierFirstRegField::Encode(base_reg) | - BakerReadBarrierSecondRegField::Encode(kInvalidEncodedReg) | - BakerReadBarrierWidthField::Encode(BakerReadBarrierWidth::kWide); - } - - static uint32_t EncodeBakerReadBarrierGcRootData(uint32_t root_reg, bool narrow) { - CheckValidReg(root_reg); - DCHECK(!narrow || root_reg < 8u) << root_reg; - BakerReadBarrierWidth width = - narrow ? BakerReadBarrierWidth::kNarrow : BakerReadBarrierWidth::kWide; - return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kGcRoot) | - BakerReadBarrierFirstRegField::Encode(root_reg) | - BakerReadBarrierSecondRegField::Encode(kInvalidEncodedReg) | - BakerReadBarrierWidthField::Encode(width); - } - - explicit Thumb2RelativePatcher(RelativePatcherTargetProvider* provider); - - void PatchCall(std::vector<uint8_t>* code, - uint32_t literal_offset, - uint32_t patch_offset, - uint32_t target_offset) OVERRIDE; - void PatchPcRelativeReference(std::vector<uint8_t>* code, - const LinkerPatch& patch, - uint32_t patch_offset, - uint32_t target_offset) OVERRIDE; - void PatchBakerReadBarrierBranch(std::vector<uint8_t>* code, - const LinkerPatch& patch, - uint32_t patch_offset) OVERRIDE; - - protected: - std::vector<uint8_t> CompileThunk(const ThunkKey& key) OVERRIDE; - std::string GetThunkDebugName(const ThunkKey& key) OVERRIDE; - uint32_t MaxPositiveDisplacement(const ThunkKey& key) OVERRIDE; - uint32_t MaxNegativeDisplacement(const ThunkKey& key) OVERRIDE; - - private: - static constexpr uint32_t kInvalidEncodedReg = /* pc is invalid */ 15u; - - enum class BakerReadBarrierKind : uint8_t { - kField, // Field get or array get with constant offset (i.e. constant index). - kArray, // Array get with index in register. - kGcRoot, // GC root load. - kLast = kGcRoot - }; - - enum class BakerReadBarrierWidth : uint8_t { - kWide, // 32-bit LDR (and 32-bit NEG if heap poisoning is enabled). - kNarrow, // 16-bit LDR (and 16-bit NEG if heap poisoning is enabled). - kLast = kNarrow - }; - - static constexpr size_t kBitsForBakerReadBarrierKind = - MinimumBitsToStore(static_cast<size_t>(BakerReadBarrierKind::kLast)); - static constexpr size_t kBitsForRegister = 4u; - using BakerReadBarrierKindField = - BitField<BakerReadBarrierKind, 0, kBitsForBakerReadBarrierKind>; - using BakerReadBarrierFirstRegField = - BitField<uint32_t, kBitsForBakerReadBarrierKind, kBitsForRegister>; - using BakerReadBarrierSecondRegField = - BitField<uint32_t, kBitsForBakerReadBarrierKind + kBitsForRegister, kBitsForRegister>; - static constexpr size_t kBitsForBakerReadBarrierWidth = - MinimumBitsToStore(static_cast<size_t>(BakerReadBarrierWidth::kLast)); - using BakerReadBarrierWidthField = BitField<BakerReadBarrierWidth, - kBitsForBakerReadBarrierKind + 2 * kBitsForRegister, - kBitsForBakerReadBarrierWidth>; - - static void CheckValidReg(uint32_t reg) { - DCHECK(reg < 12u && reg != kBakerCcEntrypointRegister) << reg; - } - - void CompileBakerReadBarrierThunk(arm::ArmVIXLAssembler& assembler, uint32_t encoded_data); - - void SetInsn32(std::vector<uint8_t>* code, uint32_t offset, uint32_t value); - static uint32_t GetInsn32(ArrayRef<const uint8_t> code, uint32_t offset); - - template <typename Vector> - static uint32_t GetInsn32(Vector* code, uint32_t offset); - - static uint32_t GetInsn16(ArrayRef<const uint8_t> code, uint32_t offset); - - template <typename Vector> - static uint32_t GetInsn16(Vector* code, uint32_t offset); - - friend class Thumb2RelativePatcherTest; - - DISALLOW_COPY_AND_ASSIGN(Thumb2RelativePatcher); -}; - -} // namespace linker -} // namespace art - -#endif // ART_COMPILER_LINKER_ARM_RELATIVE_PATCHER_THUMB2_H_ diff --git a/compiler/linker/arm/relative_patcher_thumb2_test.cc b/compiler/linker/arm/relative_patcher_thumb2_test.cc deleted file mode 100644 index 2c22a352c2..0000000000 --- a/compiler/linker/arm/relative_patcher_thumb2_test.cc +++ /dev/null @@ -1,1287 +0,0 @@ -/* - * Copyright (C) 2015 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "linker/arm/relative_patcher_thumb2.h" - -#include "base/casts.h" -#include "linker/relative_patcher_test.h" -#include "lock_word.h" -#include "mirror/array-inl.h" -#include "mirror/object.h" -#include "oat_quick_method_header.h" - -namespace art { -namespace linker { - -class Thumb2RelativePatcherTest : public RelativePatcherTest { - public: - Thumb2RelativePatcherTest() : RelativePatcherTest(InstructionSet::kThumb2, "default") { } - - protected: - static const uint8_t kCallRawCode[]; - static const ArrayRef<const uint8_t> kCallCode; - static const uint8_t kNopRawCode[]; - static const ArrayRef<const uint8_t> kNopCode; - static const uint8_t kUnpatchedPcRelativeRawCode[]; - static const ArrayRef<const uint8_t> kUnpatchedPcRelativeCode; - static const uint32_t kPcInsnOffset; - - // The PC in Thumb mode is 4 bytes after the instruction location. - static constexpr uint32_t kPcAdjustment = 4u; - - // Branches within range [-256, 256) can be created from these by adding the low 8 bits. - static constexpr uint32_t kBlPlus0 = 0xf000f800u; - static constexpr uint32_t kBlMinus256 = 0xf7ffff00u; - - // Special BL values. - static constexpr uint32_t kBlPlusMax = 0xf3ffd7ffu; - static constexpr uint32_t kBlMinusMax = 0xf400d000u; - - // BNE +0, 32-bit, encoding T3. Bits 0-10, 11, 13, 16-21, 26 are placeholder for target offset. - static constexpr uint32_t kBneWPlus0 = 0xf0408000u; - - // LDR immediate, 16-bit, encoding T1. Bits 6-10 are imm5, 0-2 are Rt, 3-5 are Rn. - static constexpr uint32_t kLdrInsn = 0x6800u; - - // LDR immediate, 32-bit, encoding T3. Bits 0-11 are offset, 12-15 are Rt, 16-20 are Rn. - static constexpr uint32_t kLdrWInsn = 0xf8d00000u; - - // LDR immediate, negative offset, encoding T4. Bits 0-7 are the offset to subtract. - static constexpr uint32_t kLdrNegativeOffset = 0xf8500c00u; - - // LDR register, lsl #2. Bits 4-5 are the imm2, i.e. the lsl shift. - static constexpr uint32_t kLdrRegLsl2 = 0xf8500020u; - - // NOP instructions. - static constexpr uint32_t kNopInsn = 0xbf00u; - static constexpr uint32_t kNopWInsn = 0xf3af8000u; - - void InsertInsn(std::vector<uint8_t>* code, size_t pos, uint32_t insn) { - CHECK_LE(pos, code->size()); - if (IsUint<16>(insn)) { - const uint8_t insn_code[] = { - static_cast<uint8_t>(insn), - static_cast<uint8_t>(insn >> 8), - }; - static_assert(sizeof(insn_code) == 2u, "Invalid sizeof(insn_code)."); - code->insert(code->begin() + pos, insn_code, insn_code + sizeof(insn_code)); - } else { - const uint8_t insn_code[] = { - static_cast<uint8_t>(insn >> 16), - static_cast<uint8_t>(insn >> 24), - static_cast<uint8_t>(insn), - static_cast<uint8_t>(insn >> 8), - }; - static_assert(sizeof(insn_code) == 4u, "Invalid sizeof(insn_code)."); - code->insert(code->begin() + pos, insn_code, insn_code + sizeof(insn_code)); - } - } - - void PushBackInsn(std::vector<uint8_t>* code, uint32_t insn) { - InsertInsn(code, code->size(), insn); - } - - std::vector<uint8_t> GenNops(size_t num_nops) { - std::vector<uint8_t> result; - result.reserve(num_nops * 2u); - for (size_t i = 0; i != num_nops; ++i) { - PushBackInsn(&result, kNopInsn); - } - return result; - } - - std::vector<uint8_t> RawCode(std::initializer_list<uint32_t> insns) { - std::vector<uint8_t> raw_code; - size_t number_of_16_bit_insns = - std::count_if(insns.begin(), insns.end(), [](uint32_t x) { return IsUint<16>(x); }); - raw_code.reserve(insns.size() * 4u - number_of_16_bit_insns * 2u); - for (uint32_t insn : insns) { - PushBackInsn(&raw_code, insn); - } - return raw_code; - } - - uint32_t BneWWithOffset(uint32_t bne_offset, uint32_t target_offset) { - if (!IsAligned<2u>(bne_offset)) { - LOG(ERROR) << "Unaligned bne_offset: " << bne_offset; - return 0xffffffffu; // Fails code diff later. - } - if (!IsAligned<2u>(target_offset)) { - LOG(ERROR) << "Unaligned target_offset: " << target_offset; - return 0xffffffffu; // Fails code diff later. - } - uint32_t diff = target_offset - bne_offset - kPcAdjustment; - DCHECK_ALIGNED(diff, 2u); - if ((diff >> 20) != 0 && (diff >> 20) != 0xfffu) { - LOG(ERROR) << "Target out of range: " << diff; - return 0xffffffffu; // Fails code diff later. - } - return kBneWPlus0 | ((diff >> 1) & 0x7ffu) // imm11 - | (((diff >> 12) & 0x3fu) << 16) // imm6 - | (((diff >> 18) & 1) << 13) // J1 - | (((diff >> 19) & 1) << 11) // J2 - | (((diff >> 20) & 1) << 26); // S - } - - bool Create2MethodsWithGap(const ArrayRef<const uint8_t>& method1_code, - const ArrayRef<const LinkerPatch>& method1_patches, - const ArrayRef<const uint8_t>& method3_code, - const ArrayRef<const LinkerPatch>& method3_patches, - uint32_t distance_without_thunks) { - CHECK_EQ(distance_without_thunks % kArmAlignment, 0u); - uint32_t method1_offset = - kTrampolineSize + CodeAlignmentSize(kTrampolineSize) + sizeof(OatQuickMethodHeader); - AddCompiledMethod(MethodRef(1u), method1_code, method1_patches); - - // We want to put the method3 at a very precise offset. - const uint32_t method3_offset = method1_offset + distance_without_thunks; - CHECK_ALIGNED(method3_offset, kArmAlignment); - - // Calculate size of method2 so that we put method3 at the correct place. - const uint32_t method1_end = method1_offset + method1_code.size(); - const uint32_t method2_offset = - method1_end + CodeAlignmentSize(method1_end) + sizeof(OatQuickMethodHeader); - const uint32_t method2_size = (method3_offset - sizeof(OatQuickMethodHeader) - method2_offset); - std::vector<uint8_t> method2_raw_code(method2_size); - ArrayRef<const uint8_t> method2_code(method2_raw_code); - AddCompiledMethod(MethodRef(2u), method2_code); - - AddCompiledMethod(MethodRef(3u), method3_code, method3_patches); - - Link(); - - // Check assumptions. - CHECK_EQ(GetMethodOffset(1), method1_offset); - CHECK_EQ(GetMethodOffset(2), method2_offset); - auto result3 = method_offset_map_.FindMethodOffset(MethodRef(3)); - CHECK(result3.first); - // There may be a thunk before method2. - if (result3.second == method3_offset + 1 /* thumb mode */) { - return false; // No thunk. - } else { - uint32_t thunk_end = - CompiledCode::AlignCode(method3_offset - sizeof(OatQuickMethodHeader), - InstructionSet::kThumb2) + - MethodCallThunkSize(); - uint32_t header_offset = thunk_end + CodeAlignmentSize(thunk_end); - CHECK_EQ(result3.second, header_offset + sizeof(OatQuickMethodHeader) + 1 /* thumb mode */); - return true; // Thunk present. - } - } - - uint32_t GetMethodOffset(uint32_t method_idx) { - auto result = method_offset_map_.FindMethodOffset(MethodRef(method_idx)); - CHECK(result.first); - CHECK_NE(result.second & 1u, 0u); - return result.second - 1 /* thumb mode */; - } - - std::vector<uint8_t> CompileMethodCallThunk() { - ArmBaseRelativePatcher::ThunkKey key = ArmBaseRelativePatcher::GetMethodCallKey(); - return static_cast<Thumb2RelativePatcher*>(patcher_.get())->CompileThunk(key); - } - - uint32_t MethodCallThunkSize() { - return CompileMethodCallThunk().size(); - } - - bool CheckThunk(uint32_t thunk_offset) { - const std::vector<uint8_t> expected_code = CompileMethodCallThunk(); - if (output_.size() < thunk_offset + expected_code.size()) { - LOG(ERROR) << "output_.size() == " << output_.size() << " < " - << "thunk_offset + expected_code.size() == " << (thunk_offset + expected_code.size()); - return false; - } - ArrayRef<const uint8_t> linked_code(&output_[thunk_offset], expected_code.size()); - if (linked_code == ArrayRef<const uint8_t>(expected_code)) { - return true; - } - // Log failure info. - DumpDiff(ArrayRef<const uint8_t>(expected_code), linked_code); - return false; - } - - std::vector<uint8_t> GenNopsAndBl(size_t num_nops, uint32_t bl) { - std::vector<uint8_t> result; - result.reserve(num_nops * 2u + 4u); - for (size_t i = 0; i != num_nops; ++i) { - PushBackInsn(&result, kNopInsn); - } - PushBackInsn(&result, bl); - return result; - } - - void TestStringBssEntry(uint32_t bss_begin, uint32_t string_entry_offset); - void TestStringReference(uint32_t string_offset); - void CheckPcRelativePatch(const ArrayRef<const LinkerPatch>& patches, uint32_t target_offset); - - std::vector<uint8_t> CompileBakerOffsetThunk(uint32_t base_reg, - uint32_t holder_reg, - bool narrow) { - const LinkerPatch patch = LinkerPatch::BakerReadBarrierBranchPatch( - 0u, Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(base_reg, holder_reg, narrow)); - ArmBaseRelativePatcher::ThunkKey key = ArmBaseRelativePatcher::GetBakerThunkKey(patch); - return down_cast<Thumb2RelativePatcher*>(patcher_.get())->CompileThunk(key); - } - - std::vector<uint8_t> CompileBakerArrayThunk(uint32_t base_reg) { - LinkerPatch patch = LinkerPatch::BakerReadBarrierBranchPatch( - 0u, Thumb2RelativePatcher::EncodeBakerReadBarrierArrayData(base_reg)); - ArmBaseRelativePatcher::ThunkKey key = ArmBaseRelativePatcher::GetBakerThunkKey(patch); - return down_cast<Thumb2RelativePatcher*>(patcher_.get())->CompileThunk(key); - } - - std::vector<uint8_t> CompileBakerGcRootThunk(uint32_t root_reg, bool narrow) { - LinkerPatch patch = LinkerPatch::BakerReadBarrierBranchPatch( - 0u, Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg, narrow)); - ArmBaseRelativePatcher::ThunkKey key = ArmBaseRelativePatcher::GetBakerThunkKey(patch); - return down_cast<Thumb2RelativePatcher*>(patcher_.get())->CompileThunk(key); - } - - uint32_t GetOutputInsn32(uint32_t offset) { - CHECK_LE(offset, output_.size()); - CHECK_GE(output_.size() - offset, 4u); - return (static_cast<uint32_t>(output_[offset]) << 16) | - (static_cast<uint32_t>(output_[offset + 1]) << 24) | - (static_cast<uint32_t>(output_[offset + 2]) << 0) | - (static_cast<uint32_t>(output_[offset + 3]) << 8); - } - - uint16_t GetOutputInsn16(uint32_t offset) { - CHECK_LE(offset, output_.size()); - CHECK_GE(output_.size() - offset, 2u); - return (static_cast<uint32_t>(output_[offset]) << 0) | - (static_cast<uint32_t>(output_[offset + 1]) << 8); - } - - void TestBakerFieldWide(uint32_t offset, uint32_t ref_reg); - void TestBakerFieldNarrow(uint32_t offset, uint32_t ref_reg); -}; - -const uint8_t Thumb2RelativePatcherTest::kCallRawCode[] = { - 0x00, 0xf0, 0x00, 0xf8 -}; - -const ArrayRef<const uint8_t> Thumb2RelativePatcherTest::kCallCode(kCallRawCode); - -const uint8_t Thumb2RelativePatcherTest::kNopRawCode[] = { - 0x00, 0xbf -}; - -const ArrayRef<const uint8_t> Thumb2RelativePatcherTest::kNopCode(kNopRawCode); - -const uint8_t Thumb2RelativePatcherTest::kUnpatchedPcRelativeRawCode[] = { - 0x40, 0xf2, 0x00, 0x00, // MOVW r0, #0 (placeholder) - 0xc0, 0xf2, 0x00, 0x00, // MOVT r0, #0 (placeholder) - 0x78, 0x44, // ADD r0, pc -}; -const ArrayRef<const uint8_t> Thumb2RelativePatcherTest::kUnpatchedPcRelativeCode( - kUnpatchedPcRelativeRawCode); -const uint32_t Thumb2RelativePatcherTest::kPcInsnOffset = 8u; - -void Thumb2RelativePatcherTest::TestStringBssEntry(uint32_t bss_begin, - uint32_t string_entry_offset) { - constexpr uint32_t kStringIndex = 1u; - string_index_to_offset_map_.Put(kStringIndex, string_entry_offset); - bss_begin_ = bss_begin; - const LinkerPatch patches[] = { - LinkerPatch::StringBssEntryPatch(0u, nullptr, kPcInsnOffset, kStringIndex), - LinkerPatch::StringBssEntryPatch(4u, nullptr, kPcInsnOffset, kStringIndex), - }; - CheckPcRelativePatch(ArrayRef<const LinkerPatch>(patches), bss_begin_ + string_entry_offset); -} - -void Thumb2RelativePatcherTest::TestStringReference(uint32_t string_offset) { - constexpr uint32_t kStringIndex = 1u; - string_index_to_offset_map_.Put(kStringIndex, string_offset); - const LinkerPatch patches[] = { - LinkerPatch::RelativeStringPatch(0u, nullptr, kPcInsnOffset, kStringIndex), - LinkerPatch::RelativeStringPatch(4u, nullptr, kPcInsnOffset, kStringIndex), - }; - CheckPcRelativePatch(ArrayRef<const LinkerPatch>(patches), string_offset); -} - -void Thumb2RelativePatcherTest::CheckPcRelativePatch(const ArrayRef<const LinkerPatch>& patches, - uint32_t target_offset) { - AddCompiledMethod(MethodRef(1u), kUnpatchedPcRelativeCode, ArrayRef<const LinkerPatch>(patches)); - Link(); - - uint32_t method1_offset = GetMethodOffset(1u); - uint32_t pc_base_offset = method1_offset + kPcInsnOffset + 4u /* PC adjustment */; - uint32_t diff = target_offset - pc_base_offset; - // Distribute the bits of the diff between the MOVW and MOVT: - uint32_t diffw = diff & 0xffffu; - uint32_t difft = diff >> 16; - uint32_t movw = 0xf2400000u | // MOVW r0, #0 (placeholder), - ((diffw & 0xf000u) << (16 - 12)) | // move imm4 from bits 12-15 to bits 16-19, - ((diffw & 0x0800u) << (26 - 11)) | // move imm from bit 11 to bit 26, - ((diffw & 0x0700u) << (12 - 8)) | // move imm3 from bits 8-10 to bits 12-14, - ((diffw & 0x00ffu)); // keep imm8 at bits 0-7. - uint32_t movt = 0xf2c00000u | // MOVT r0, #0 (placeholder), - ((difft & 0xf000u) << (16 - 12)) | // move imm4 from bits 12-15 to bits 16-19, - ((difft & 0x0800u) << (26 - 11)) | // move imm from bit 11 to bit 26, - ((difft & 0x0700u) << (12 - 8)) | // move imm3 from bits 8-10 to bits 12-14, - ((difft & 0x00ffu)); // keep imm8 at bits 0-7. - const uint8_t expected_code[] = { - static_cast<uint8_t>(movw >> 16), static_cast<uint8_t>(movw >> 24), - static_cast<uint8_t>(movw >> 0), static_cast<uint8_t>(movw >> 8), - static_cast<uint8_t>(movt >> 16), static_cast<uint8_t>(movt >> 24), - static_cast<uint8_t>(movt >> 0), static_cast<uint8_t>(movt >> 8), - 0x78, 0x44, - }; - EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); -} - -TEST_F(Thumb2RelativePatcherTest, CallSelf) { - const LinkerPatch patches[] = { - LinkerPatch::RelativeCodePatch(0u, nullptr, 1u), - }; - AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(patches)); - Link(); - - static const uint8_t expected_code[] = { - 0xff, 0xf7, 0xfe, 0xff - }; - EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); -} - -TEST_F(Thumb2RelativePatcherTest, CallOther) { - const LinkerPatch method1_patches[] = { - LinkerPatch::RelativeCodePatch(0u, nullptr, 2u), - }; - AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(method1_patches)); - const LinkerPatch method2_patches[] = { - LinkerPatch::RelativeCodePatch(0u, nullptr, 1u), - }; - AddCompiledMethod(MethodRef(2u), kCallCode, ArrayRef<const LinkerPatch>(method2_patches)); - Link(); - - uint32_t method1_offset = GetMethodOffset(1u); - uint32_t method2_offset = GetMethodOffset(2u); - uint32_t diff_after = method2_offset - (method1_offset + 4u /* PC adjustment */); - ASSERT_EQ(diff_after & 1u, 0u); - ASSERT_LT(diff_after >> 1, 1u << 8); // Simple encoding, (diff_after >> 1) fits into 8 bits. - static const uint8_t method1_expected_code[] = { - 0x00, 0xf0, static_cast<uint8_t>(diff_after >> 1), 0xf8 - }; - EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(method1_expected_code))); - uint32_t diff_before = method1_offset - (method2_offset + 4u /* PC adjustment */); - ASSERT_EQ(diff_before & 1u, 0u); - ASSERT_GE(diff_before, -1u << 9); // Simple encoding, -256 <= (diff >> 1) < 0. - auto method2_expected_code = GenNopsAndBl(0u, kBlMinus256 | ((diff_before >> 1) & 0xffu)); - EXPECT_TRUE(CheckLinkedMethod(MethodRef(2u), ArrayRef<const uint8_t>(method2_expected_code))); -} - -TEST_F(Thumb2RelativePatcherTest, CallTrampoline) { - const LinkerPatch patches[] = { - LinkerPatch::RelativeCodePatch(0u, nullptr, 2u), - }; - AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(patches)); - Link(); - - uint32_t method1_offset = GetMethodOffset(1u); - uint32_t diff = kTrampolineOffset - (method1_offset + 4u); - ASSERT_EQ(diff & 1u, 0u); - ASSERT_GE(diff, -1u << 9); // Simple encoding, -256 <= (diff >> 1) < 0 (checked as unsigned). - auto expected_code = GenNopsAndBl(0u, kBlMinus256 | ((diff >> 1) & 0xffu)); - EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); -} - -TEST_F(Thumb2RelativePatcherTest, CallTrampolineTooFar) { - constexpr uint32_t missing_method_index = 1024u; - auto method3_raw_code = GenNopsAndBl(3u, kBlPlus0); - constexpr uint32_t bl_offset_in_method3 = 3u * 2u; // After NOPs. - ArrayRef<const uint8_t> method3_code(method3_raw_code); - ASSERT_EQ(bl_offset_in_method3 + 4u, method3_code.size()); - const LinkerPatch method3_patches[] = { - LinkerPatch::RelativeCodePatch(bl_offset_in_method3, nullptr, missing_method_index), - }; - - constexpr uint32_t just_over_max_negative_disp = 16 * MB + 2 - 4u /* PC adjustment */; - bool thunk_in_gap = Create2MethodsWithGap(kNopCode, - ArrayRef<const LinkerPatch>(), - method3_code, - ArrayRef<const LinkerPatch>(method3_patches), - just_over_max_negative_disp - bl_offset_in_method3); - ASSERT_FALSE(thunk_in_gap); // There should be a thunk but it should be after the method2. - ASSERT_FALSE(method_offset_map_.FindMethodOffset(MethodRef(missing_method_index)).first); - - // Check linked code. - uint32_t method3_offset = GetMethodOffset(3u); - uint32_t thunk_offset = CompiledCode::AlignCode(method3_offset + method3_code.size(), - InstructionSet::kThumb2); - uint32_t diff = thunk_offset - (method3_offset + bl_offset_in_method3 + 4u /* PC adjustment */); - ASSERT_EQ(diff & 1u, 0u); - ASSERT_LT(diff >> 1, 1u << 8); // Simple encoding, (diff >> 1) fits into 8 bits. - auto expected_code = GenNopsAndBl(3u, kBlPlus0 | ((diff >> 1) & 0xffu)); - EXPECT_TRUE(CheckLinkedMethod(MethodRef(3u), ArrayRef<const uint8_t>(expected_code))); - EXPECT_TRUE(CheckThunk(thunk_offset)); -} - -TEST_F(Thumb2RelativePatcherTest, CallOtherAlmostTooFarAfter) { - auto method1_raw_code = GenNopsAndBl(3u, kBlPlus0); - constexpr uint32_t bl_offset_in_method1 = 3u * 2u; // After NOPs. - ArrayRef<const uint8_t> method1_code(method1_raw_code); - ASSERT_EQ(bl_offset_in_method1 + 4u, method1_code.size()); - const LinkerPatch method1_patches[] = { - LinkerPatch::RelativeCodePatch(bl_offset_in_method1, nullptr, 3u), - }; - - constexpr uint32_t max_positive_disp = 16 * MB - 2u + 4u /* PC adjustment */; - bool thunk_in_gap = Create2MethodsWithGap(method1_code, - ArrayRef<const LinkerPatch>(method1_patches), - kNopCode, - ArrayRef<const LinkerPatch>(), - bl_offset_in_method1 + max_positive_disp); - ASSERT_FALSE(thunk_in_gap); // There should be no thunk. - - // Check linked code. - auto expected_code = GenNopsAndBl(3u, kBlPlusMax); - EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); -} - -TEST_F(Thumb2RelativePatcherTest, CallOtherAlmostTooFarBefore) { - auto method3_raw_code = GenNopsAndBl(2u, kBlPlus0); - constexpr uint32_t bl_offset_in_method3 = 2u * 2u; // After NOPs. - ArrayRef<const uint8_t> method3_code(method3_raw_code); - ASSERT_EQ(bl_offset_in_method3 + 4u, method3_code.size()); - const LinkerPatch method3_patches[] = { - LinkerPatch::RelativeCodePatch(bl_offset_in_method3, nullptr, 1u), - }; - - constexpr uint32_t just_over_max_negative_disp = 16 * MB - 4u /* PC adjustment */; - bool thunk_in_gap = Create2MethodsWithGap(kNopCode, - ArrayRef<const LinkerPatch>(), - method3_code, - ArrayRef<const LinkerPatch>(method3_patches), - just_over_max_negative_disp - bl_offset_in_method3); - ASSERT_FALSE(thunk_in_gap); // There should be no thunk. - - // Check linked code. - auto expected_code = GenNopsAndBl(2u, kBlMinusMax); - EXPECT_TRUE(CheckLinkedMethod(MethodRef(3u), ArrayRef<const uint8_t>(expected_code))); -} - -TEST_F(Thumb2RelativePatcherTest, CallOtherJustTooFarAfter) { - auto method1_raw_code = GenNopsAndBl(2u, kBlPlus0); - constexpr uint32_t bl_offset_in_method1 = 2u * 2u; // After NOPs. - ArrayRef<const uint8_t> method1_code(method1_raw_code); - ASSERT_EQ(bl_offset_in_method1 + 4u, method1_code.size()); - const LinkerPatch method1_patches[] = { - LinkerPatch::RelativeCodePatch(bl_offset_in_method1, nullptr, 3u), - }; - - constexpr uint32_t just_over_max_positive_disp = 16 * MB + 4u /* PC adjustment */; - bool thunk_in_gap = Create2MethodsWithGap(method1_code, - ArrayRef<const LinkerPatch>(method1_patches), - kNopCode, - ArrayRef<const LinkerPatch>(), - bl_offset_in_method1 + just_over_max_positive_disp); - ASSERT_TRUE(thunk_in_gap); - - uint32_t method1_offset = GetMethodOffset(1u); - uint32_t method3_offset = GetMethodOffset(3u); - ASSERT_TRUE(IsAligned<kArmAlignment>(method3_offset)); - uint32_t method3_header_offset = method3_offset - sizeof(OatQuickMethodHeader); - uint32_t thunk_size = MethodCallThunkSize(); - uint32_t thunk_offset = RoundDown(method3_header_offset - thunk_size, kArmAlignment); - DCHECK_EQ(thunk_offset + thunk_size + CodeAlignmentSize(thunk_offset + thunk_size), - method3_header_offset); - ASSERT_TRUE(IsAligned<kArmAlignment>(thunk_offset)); - uint32_t diff = thunk_offset - (method1_offset + bl_offset_in_method1 + 4u /* PC adjustment */); - ASSERT_EQ(diff & 1u, 0u); - ASSERT_GE(diff, 16 * MB - (1u << 9)); // Simple encoding, unknown bits fit into the low 8 bits. - auto expected_code = GenNopsAndBl(2u, 0xf3ffd700 | ((diff >> 1) & 0xffu)); - EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); - CheckThunk(thunk_offset); -} - -TEST_F(Thumb2RelativePatcherTest, CallOtherJustTooFarBefore) { - auto method3_raw_code = GenNopsAndBl(3u, kBlPlus0); - constexpr uint32_t bl_offset_in_method3 = 3u * 2u; // After NOPs. - ArrayRef<const uint8_t> method3_code(method3_raw_code); - ASSERT_EQ(bl_offset_in_method3 + 4u, method3_code.size()); - const LinkerPatch method3_patches[] = { - LinkerPatch::RelativeCodePatch(bl_offset_in_method3, nullptr, 1u), - }; - - constexpr uint32_t just_over_max_negative_disp = 16 * MB + 2 - 4u /* PC adjustment */; - bool thunk_in_gap = Create2MethodsWithGap(kNopCode, - ArrayRef<const LinkerPatch>(), - method3_code, - ArrayRef<const LinkerPatch>(method3_patches), - just_over_max_negative_disp - bl_offset_in_method3); - ASSERT_FALSE(thunk_in_gap); // There should be a thunk but it should be after the method2. - - // Check linked code. - uint32_t method3_offset = GetMethodOffset(3u); - uint32_t thunk_offset = CompiledCode::AlignCode(method3_offset + method3_code.size(), - InstructionSet::kThumb2); - uint32_t diff = thunk_offset - (method3_offset + bl_offset_in_method3 + 4u /* PC adjustment */); - ASSERT_EQ(diff & 1u, 0u); - ASSERT_LT(diff >> 1, 1u << 8); // Simple encoding, (diff >> 1) fits into 8 bits. - auto expected_code = GenNopsAndBl(3u, kBlPlus0 | ((diff >> 1) & 0xffu)); - EXPECT_TRUE(CheckLinkedMethod(MethodRef(3u), ArrayRef<const uint8_t>(expected_code))); - EXPECT_TRUE(CheckThunk(thunk_offset)); -} - -TEST_F(Thumb2RelativePatcherTest, StringBssEntry1) { - TestStringBssEntry(0x00ff0000u, 0x00fcu); - ASSERT_LT(GetMethodOffset(1u), 0xfcu); -} - -TEST_F(Thumb2RelativePatcherTest, StringBssEntry2) { - TestStringBssEntry(0x02ff0000u, 0x05fcu); - ASSERT_LT(GetMethodOffset(1u), 0xfcu); -} - -TEST_F(Thumb2RelativePatcherTest, StringBssEntry3) { - TestStringBssEntry(0x08ff0000u, 0x08fcu); - ASSERT_LT(GetMethodOffset(1u), 0xfcu); -} - -TEST_F(Thumb2RelativePatcherTest, StringBssEntry4) { - TestStringBssEntry(0xd0ff0000u, 0x60fcu); - ASSERT_LT(GetMethodOffset(1u), 0xfcu); -} - -TEST_F(Thumb2RelativePatcherTest, StringReference1) { - TestStringReference(0x00ff00fcu); - ASSERT_LT(GetMethodOffset(1u), 0xfcu); -} - -TEST_F(Thumb2RelativePatcherTest, StringReference2) { - TestStringReference(0x02ff05fcu); - ASSERT_LT(GetMethodOffset(1u), 0xfcu); -} - -TEST_F(Thumb2RelativePatcherTest, StringReference3) { - TestStringReference(0x08ff08fcu); - ASSERT_LT(GetMethodOffset(1u), 0xfcu); -} - -TEST_F(Thumb2RelativePatcherTest, StringReference4) { - TestStringReference(0xd0ff60fcu); - ASSERT_LT(GetMethodOffset(1u), 0xfcu); -} - -void Thumb2RelativePatcherTest::TestBakerFieldWide(uint32_t offset, uint32_t ref_reg) { - uint32_t valid_regs[] = { - 0, 1, 2, 3, 5, 6, 7, // R4 is reserved for entrypoint address. - 8, 9, 10, 11, // IP, SP, LR and PC are reserved. - }; - DCHECK_ALIGNED(offset, 4u); - DCHECK_LT(offset, 4 * KB); - constexpr size_t kMethodCodeSize = 8u; - constexpr size_t kLiteralOffset = 0u; - uint32_t method_idx = 0u; - for (uint32_t base_reg : valid_regs) { - for (uint32_t holder_reg : valid_regs) { - uint32_t ldr = kLdrWInsn | offset | (base_reg << 16) | (ref_reg << 12); - const std::vector<uint8_t> raw_code = RawCode({kBneWPlus0, ldr}); - ASSERT_EQ(kMethodCodeSize, raw_code.size()); - ArrayRef<const uint8_t> code(raw_code); - uint32_t encoded_data = Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData( - base_reg, holder_reg, /* narrow */ false); - const LinkerPatch patches[] = { - LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset, encoded_data), - }; - ++method_idx; - AddCompiledMethod(MethodRef(method_idx), code, ArrayRef<const LinkerPatch>(patches)); - } - } - Link(); - - // All thunks are at the end. - uint32_t thunk_offset = GetMethodOffset(method_idx) + RoundUp(kMethodCodeSize, kArmAlignment); - method_idx = 0u; - for (uint32_t base_reg : valid_regs) { - for (uint32_t holder_reg : valid_regs) { - ++method_idx; - uint32_t bne = BneWWithOffset(GetMethodOffset(method_idx) + kLiteralOffset, thunk_offset); - uint32_t ldr = kLdrWInsn | offset | (base_reg << 16) | (ref_reg << 12); - const std::vector<uint8_t> expected_code = RawCode({bne, ldr}); - ASSERT_EQ(kMethodCodeSize, expected_code.size()) << "bne=0x" << std::hex << bne; - ASSERT_TRUE( - CheckLinkedMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(expected_code))); - - std::vector<uint8_t> expected_thunk = - CompileBakerOffsetThunk(base_reg, holder_reg, /* narrow */ false); - ASSERT_GT(output_.size(), thunk_offset); - ASSERT_GE(output_.size() - thunk_offset, expected_thunk.size()); - ArrayRef<const uint8_t> compiled_thunk(output_.data() + thunk_offset, - expected_thunk.size()); - if (ArrayRef<const uint8_t>(expected_thunk) != compiled_thunk) { - DumpDiff(ArrayRef<const uint8_t>(expected_thunk), compiled_thunk); - ASSERT_TRUE(false); - } - - size_t gray_check_offset = thunk_offset; - if (holder_reg == base_reg) { - // Verify that the null-check uses the correct register, i.e. holder_reg. - if (holder_reg < 8) { - ASSERT_GE(output_.size() - gray_check_offset, 2u); - ASSERT_EQ(0xb100 | holder_reg, GetOutputInsn16(thunk_offset) & 0xfd07u); - gray_check_offset +=2u; - } else { - ASSERT_GE(output_.size() - gray_check_offset, 6u); - ASSERT_EQ(0xf1b00f00u | (holder_reg << 16), GetOutputInsn32(thunk_offset) & 0xfbff8f00u); - ASSERT_EQ(0xd000u, GetOutputInsn16(thunk_offset + 4u) & 0xff00u); // BEQ - gray_check_offset += 6u; - } - } - // Verify that the lock word for gray bit check is loaded from the holder address. - ASSERT_GE(output_.size() - gray_check_offset, - 4u * /* 32-bit instructions */ 4u + 2u * /* 16-bit instructions */ 2u); - const uint32_t load_lock_word = - kLdrWInsn | - (holder_reg << 16) | - (/* IP */ 12 << 12) | - mirror::Object::MonitorOffset().Uint32Value(); - ASSERT_EQ(load_lock_word, GetOutputInsn32(gray_check_offset)); - // Verify the gray bit check. - DCHECK_GE(LockWord::kReadBarrierStateShift, 8u); // ROR modified immediate. - uint32_t ror_shift = 7 + (32 - LockWord::kReadBarrierStateShift); - const uint32_t tst_gray_bit_without_offset = - 0xf0100f00 | (/* IP */ 12 << 16) - | (((ror_shift >> 4) & 1) << 26) // i - | (((ror_shift >> 1) & 7) << 12) // imm3 - | ((ror_shift & 1) << 7); // imm8, ROR('1':imm8<7:0>, ror_shift). - EXPECT_EQ(tst_gray_bit_without_offset, GetOutputInsn32(gray_check_offset + 4u)); - EXPECT_EQ(0xd100u, GetOutputInsn16(gray_check_offset + 8u) & 0xff00u); // BNE - // Verify the fake dependency (skip "ADD LR, LR, #ldr_offset"). - const uint32_t fake_dependency = - 0xeb000010 | // ADD Rd, Rn, Rm, LSR 32 (type=01, imm3=000, imm2=00) - (/* IP */ 12) | // Rm = IP - (base_reg << 16) | // Rn = base_reg - (base_reg << 8); // Rd = base_reg - EXPECT_EQ(fake_dependency, GetOutputInsn32(gray_check_offset + 14u)); - // Do not check the rest of the implementation. - - // The next thunk follows on the next aligned offset. - thunk_offset += RoundUp(expected_thunk.size(), kArmAlignment); - } - } -} - -void Thumb2RelativePatcherTest::TestBakerFieldNarrow(uint32_t offset, uint32_t ref_reg) { - uint32_t valid_regs[] = { - 0, 1, 2, 3, 5, 6, 7, // R4 is reserved for entrypoint address. - 8, 9, 10, 11, // IP, SP, LR and PC are reserved. - }; - DCHECK_ALIGNED(offset, 4u); - DCHECK_LT(offset, 32u); - constexpr size_t kMethodCodeSize = 6u; - constexpr size_t kLiteralOffset = 0u; - uint32_t method_idx = 0u; - for (uint32_t base_reg : valid_regs) { - if (base_reg >= 8u) { - continue; - } - for (uint32_t holder_reg : valid_regs) { - uint32_t ldr = kLdrInsn | (offset << (6 - 2)) | (base_reg << 3) | ref_reg; - const std::vector<uint8_t> raw_code = RawCode({kBneWPlus0, ldr}); - ASSERT_EQ(kMethodCodeSize, raw_code.size()); - ArrayRef<const uint8_t> code(raw_code); - uint32_t encoded_data = Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData( - base_reg, holder_reg, /* narrow */ true); - const LinkerPatch patches[] = { - LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset, encoded_data), - }; - ++method_idx; - AddCompiledMethod(MethodRef(method_idx), code, ArrayRef<const LinkerPatch>(patches)); - } - } - Link(); - - // All thunks are at the end. - uint32_t thunk_offset = GetMethodOffset(method_idx) + RoundUp(kMethodCodeSize, kArmAlignment); - method_idx = 0u; - for (uint32_t base_reg : valid_regs) { - if (base_reg >= 8u) { - continue; - } - for (uint32_t holder_reg : valid_regs) { - ++method_idx; - uint32_t bne = BneWWithOffset(GetMethodOffset(method_idx) + kLiteralOffset, thunk_offset); - uint32_t ldr = kLdrInsn | (offset << (6 - 2)) | (base_reg << 3) | ref_reg; - const std::vector<uint8_t> expected_code = RawCode({bne, ldr}); - ASSERT_EQ(kMethodCodeSize, expected_code.size()) << "bne=0x" << std::hex << bne; - ASSERT_TRUE( - CheckLinkedMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(expected_code))); - - std::vector<uint8_t> expected_thunk = - CompileBakerOffsetThunk(base_reg, holder_reg, /* narrow */ true); - ASSERT_GT(output_.size(), thunk_offset); - ASSERT_GE(output_.size() - thunk_offset, expected_thunk.size()); - ArrayRef<const uint8_t> compiled_thunk(output_.data() + thunk_offset, - expected_thunk.size()); - if (ArrayRef<const uint8_t>(expected_thunk) != compiled_thunk) { - DumpDiff(ArrayRef<const uint8_t>(expected_thunk), compiled_thunk); - ASSERT_TRUE(false); - } - - size_t gray_check_offset = thunk_offset; - if (holder_reg == base_reg) { - // Verify that the null-check uses the correct register, i.e. holder_reg. - if (holder_reg < 8) { - ASSERT_GE(output_.size() - gray_check_offset, 2u); - ASSERT_EQ(0xb100 | holder_reg, GetOutputInsn16(thunk_offset) & 0xfd07u); - gray_check_offset +=2u; - } else { - ASSERT_GE(output_.size() - gray_check_offset, 6u); - ASSERT_EQ(0xf1b00f00u | (holder_reg << 16), GetOutputInsn32(thunk_offset) & 0xfbff8f00u); - ASSERT_EQ(0xd000u, GetOutputInsn16(thunk_offset + 4u) & 0xff00u); // BEQ - gray_check_offset += 6u; - } - } - // Verify that the lock word for gray bit check is loaded from the holder address. - ASSERT_GE(output_.size() - gray_check_offset, - 4u * /* 32-bit instructions */ 4u + 2u * /* 16-bit instructions */ 2u); - const uint32_t load_lock_word = - kLdrWInsn | - (holder_reg << 16) | - (/* IP */ 12 << 12) | - mirror::Object::MonitorOffset().Uint32Value(); - ASSERT_EQ(load_lock_word, GetOutputInsn32(gray_check_offset)); - // Verify the gray bit check. - DCHECK_GE(LockWord::kReadBarrierStateShift, 8u); // ROR modified immediate. - uint32_t ror_shift = 7 + (32 - LockWord::kReadBarrierStateShift); - const uint32_t tst_gray_bit_without_offset = - 0xf0100f00 | (/* IP */ 12 << 16) - | (((ror_shift >> 4) & 1) << 26) // i - | (((ror_shift >> 1) & 7) << 12) // imm3 - | ((ror_shift & 1) << 7); // imm8, ROR('1':imm8<7:0>, ror_shift). - EXPECT_EQ(tst_gray_bit_without_offset, GetOutputInsn32(gray_check_offset + 4u)); - EXPECT_EQ(0xd100u, GetOutputInsn16(gray_check_offset + 8u) & 0xff00u); // BNE - // Verify the fake dependency (skip "ADD LR, LR, #ldr_offset"). - const uint32_t fake_dependency = - 0xeb000010 | // ADD Rd, Rn, Rm, LSR 32 (type=01, imm3=000, imm2=00) - (/* IP */ 12) | // Rm = IP - (base_reg << 16) | // Rn = base_reg - (base_reg << 8); // Rd = base_reg - EXPECT_EQ(fake_dependency, GetOutputInsn32(gray_check_offset + 14u)); - // Do not check the rest of the implementation. - - // The next thunk follows on the next aligned offset. - thunk_offset += RoundUp(expected_thunk.size(), kArmAlignment); - } - } -} - -#define TEST_BAKER_FIELD_WIDE(offset, ref_reg) \ - TEST_F(Thumb2RelativePatcherTest, \ - BakerOffsetWide##offset##_##ref_reg) { \ - TestBakerFieldWide(offset, ref_reg); \ - } - -TEST_BAKER_FIELD_WIDE(/* offset */ 0, /* ref_reg */ 0) -TEST_BAKER_FIELD_WIDE(/* offset */ 8, /* ref_reg */ 3) -TEST_BAKER_FIELD_WIDE(/* offset */ 28, /* ref_reg */ 7) -TEST_BAKER_FIELD_WIDE(/* offset */ 0xffc, /* ref_reg */ 11) - -#define TEST_BAKER_FIELD_NARROW(offset, ref_reg) \ - TEST_F(Thumb2RelativePatcherTest, \ - BakerOffsetNarrow##offset##_##ref_reg) { \ - TestBakerFieldNarrow(offset, ref_reg); \ - } - -TEST_BAKER_FIELD_NARROW(/* offset */ 0, /* ref_reg */ 0) -TEST_BAKER_FIELD_NARROW(/* offset */ 8, /* ref_reg */ 3) -TEST_BAKER_FIELD_NARROW(/* offset */ 28, /* ref_reg */ 7) - -TEST_F(Thumb2RelativePatcherTest, BakerOffsetThunkInTheMiddle) { - // One thunk in the middle with maximum distance branches to it from both sides. - // Use offset = 0, base_reg = 0, ref_reg = 0, the LDR is simply `kLdrWInsn`. - constexpr uint32_t kLiteralOffset1 = 6u; - const std::vector<uint8_t> raw_code1 = RawCode({kNopWInsn, kNopInsn, kBneWPlus0, kLdrWInsn}); - ArrayRef<const uint8_t> code1(raw_code1); - uint32_t encoded_data = Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData( - /* base_reg */ 0, /* holder_reg */ 0, /* narrow */ false); - const LinkerPatch patches1[] = { - LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset1, encoded_data), - }; - AddCompiledMethod(MethodRef(1u), code1, ArrayRef<const LinkerPatch>(patches1)); - - constexpr uint32_t expected_thunk_offset = - kLiteralOffset1 + kPcAdjustment + /* kMaxBcondPositiveDisplacement */ ((1 << 20) - 2u); - static_assert(IsAligned<kArmAlignment>(expected_thunk_offset), "Target offset must be aligned."); - size_t filler1_size = expected_thunk_offset - - RoundUp(raw_code1.size() + sizeof(OatQuickMethodHeader), kArmAlignment); - std::vector<uint8_t> raw_filler1_code = GenNops(filler1_size / 2u); - ArrayRef<const uint8_t> filler1_code(raw_filler1_code); - AddCompiledMethod(MethodRef(2u), filler1_code); - - // Enforce thunk reservation with a tiny method. - AddCompiledMethod(MethodRef(3u), kNopCode); - - constexpr uint32_t kLiteralOffset2 = 4; - static_assert(IsAligned<kArmAlignment>(kLiteralOffset2 + kPcAdjustment), - "PC for BNE must be aligned."); - - // Allow reaching the thunk from the very beginning of a method almost 1MiB away. Backward branch - // reaches the full 1MiB but we need to take PC adjustment into account. Things to subtract: - // - thunk size and method 3 pre-header, rounded up (padding in between if needed) - // - method 3 code and method 4 pre-header, rounded up (padding in between if needed) - // - method 4 header (let there be no padding between method 4 code and method 5 pre-header). - size_t thunk_size = - CompileBakerOffsetThunk(/* base_reg */ 0, /* holder_reg */ 0, /* narrow */ false).size(); - size_t filler2_size = - 1 * MB - (kLiteralOffset2 + kPcAdjustment) - - RoundUp(thunk_size + sizeof(OatQuickMethodHeader), kArmAlignment) - - RoundUp(kNopCode.size() + sizeof(OatQuickMethodHeader), kArmAlignment) - - sizeof(OatQuickMethodHeader); - std::vector<uint8_t> raw_filler2_code = GenNops(filler2_size / 2u); - ArrayRef<const uint8_t> filler2_code(raw_filler2_code); - AddCompiledMethod(MethodRef(4u), filler2_code); - - const std::vector<uint8_t> raw_code2 = RawCode({kNopWInsn, kBneWPlus0, kLdrWInsn}); - ArrayRef<const uint8_t> code2(raw_code2); - const LinkerPatch patches2[] = { - LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset2, encoded_data), - }; - AddCompiledMethod(MethodRef(5u), code2, ArrayRef<const LinkerPatch>(patches2)); - - Link(); - - uint32_t first_method_offset = GetMethodOffset(1u); - uint32_t last_method_offset = GetMethodOffset(5u); - EXPECT_EQ(2 * MB, last_method_offset - first_method_offset); - - const uint32_t bne_max_forward = kBneWPlus0 | 0x003f2fff; - const uint32_t bne_max_backward = kBneWPlus0 | 0x04000000; - const std::vector<uint8_t> expected_code1 = - RawCode({kNopWInsn, kNopInsn, bne_max_forward, kLdrWInsn}); - const std::vector<uint8_t> expected_code2 = RawCode({kNopWInsn, bne_max_backward, kLdrWInsn}); - ASSERT_TRUE(CheckLinkedMethod(MethodRef(1), ArrayRef<const uint8_t>(expected_code1))); - ASSERT_TRUE(CheckLinkedMethod(MethodRef(5), ArrayRef<const uint8_t>(expected_code2))); -} - -TEST_F(Thumb2RelativePatcherTest, BakerOffsetThunkBeforeFiller) { - // Based on the first part of BakerOffsetThunkInTheMiddle but the BNE is one instruction - // earlier, so the thunk is emitted before the filler. - // Use offset = 0, base_reg = 0, ref_reg = 0, the LDR is simply `kLdrWInsn`. - constexpr uint32_t kLiteralOffset1 = 4u; - const std::vector<uint8_t> raw_code1 = RawCode({kNopWInsn, kBneWPlus0, kLdrWInsn, kNopInsn}); - ArrayRef<const uint8_t> code1(raw_code1); - uint32_t encoded_data = Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData( - /* base_reg */ 0, /* holder_reg */ 0, /* narrow */ false); - const LinkerPatch patches1[] = { - LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset1, encoded_data), - }; - AddCompiledMethod(MethodRef(1u), code1, ArrayRef<const LinkerPatch>(patches1)); - - constexpr uint32_t expected_thunk_offset = - kLiteralOffset1 + kPcAdjustment + /* kMaxBcondPositiveDisplacement + 2 */ (1u << 20); - static_assert(IsAligned<kArmAlignment>(expected_thunk_offset), "Target offset must be aligned."); - size_t filler1_size = expected_thunk_offset - - RoundUp(raw_code1.size() + sizeof(OatQuickMethodHeader), kArmAlignment); - std::vector<uint8_t> raw_filler1_code = GenNops(filler1_size / 2u); - ArrayRef<const uint8_t> filler1_code(raw_filler1_code); - AddCompiledMethod(MethodRef(2u), filler1_code); - - Link(); - - const uint32_t bne = BneWWithOffset(kLiteralOffset1, RoundUp(raw_code1.size(), kArmAlignment)); - const std::vector<uint8_t> expected_code1 = RawCode({kNopWInsn, bne, kLdrWInsn, kNopInsn}); - ASSERT_TRUE(CheckLinkedMethod(MethodRef(1), ArrayRef<const uint8_t>(expected_code1))); -} - -TEST_F(Thumb2RelativePatcherTest, BakerOffsetThunkInTheMiddleUnreachableFromLast) { - // Based on the BakerOffsetThunkInTheMiddle but the BNE in the last method is preceded - // by NOP and cannot reach the thunk in the middle, so we emit an extra thunk at the end. - // Use offset = 0, base_reg = 0, ref_reg = 0, the LDR is simply `kLdrWInsn`. - constexpr uint32_t kLiteralOffset1 = 6u; - const std::vector<uint8_t> raw_code1 = RawCode({kNopWInsn, kNopInsn, kBneWPlus0, kLdrWInsn}); - ArrayRef<const uint8_t> code1(raw_code1); - uint32_t encoded_data = Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData( - /* base_reg */ 0, /* holder_reg */ 0, /* narrow */ false); - const LinkerPatch patches1[] = { - LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset1, encoded_data), - }; - AddCompiledMethod(MethodRef(1u), code1, ArrayRef<const LinkerPatch>(patches1)); - - constexpr uint32_t expected_thunk_offset = - kLiteralOffset1 + kPcAdjustment + /* kMaxBcondPositiveDisplacement */ ((1 << 20) - 2u); - static_assert(IsAligned<kArmAlignment>(expected_thunk_offset), "Target offset must be aligned."); - size_t filler1_size = expected_thunk_offset - - RoundUp(raw_code1.size() + sizeof(OatQuickMethodHeader), kArmAlignment); - std::vector<uint8_t> raw_filler1_code = GenNops(filler1_size / 2u); - ArrayRef<const uint8_t> filler1_code(raw_filler1_code); - AddCompiledMethod(MethodRef(2u), filler1_code); - - // Enforce thunk reservation with a tiny method. - AddCompiledMethod(MethodRef(3u), kNopCode); - - constexpr uint32_t kReachableFromOffset2 = 4; - constexpr uint32_t kLiteralOffset2 = kReachableFromOffset2 + 2; - static_assert(IsAligned<kArmAlignment>(kReachableFromOffset2 + kPcAdjustment), - "PC for BNE must be aligned."); - - // If not for the extra NOP, this would allow reaching the thunk from the BNE - // of a method 1MiB away. Backward branch reaches the full 1MiB but we need to take - // PC adjustment into account. Things to subtract: - // - thunk size and method 3 pre-header, rounded up (padding in between if needed) - // - method 3 code and method 4 pre-header, rounded up (padding in between if needed) - // - method 4 header (let there be no padding between method 4 code and method 5 pre-header). - size_t thunk_size = - CompileBakerOffsetThunk(/* base_reg */ 0, /* holder_reg */ 0, /* narrow */ false).size(); - size_t filler2_size = - 1 * MB - (kReachableFromOffset2 + kPcAdjustment) - - RoundUp(thunk_size + sizeof(OatQuickMethodHeader), kArmAlignment) - - RoundUp(kNopCode.size() + sizeof(OatQuickMethodHeader), kArmAlignment) - - sizeof(OatQuickMethodHeader); - std::vector<uint8_t> raw_filler2_code = GenNops(filler2_size / 2u); - ArrayRef<const uint8_t> filler2_code(raw_filler2_code); - AddCompiledMethod(MethodRef(4u), filler2_code); - - // Extra 16-bit NOP compared to BakerOffsetThunkInTheMiddle. - const std::vector<uint8_t> raw_code2 = RawCode({kNopWInsn, kNopInsn, kBneWPlus0, kLdrWInsn}); - ArrayRef<const uint8_t> code2(raw_code2); - const LinkerPatch patches2[] = { - LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset2, encoded_data), - }; - AddCompiledMethod(MethodRef(5u), code2, ArrayRef<const LinkerPatch>(patches2)); - - Link(); - - uint32_t first_method_offset = GetMethodOffset(1u); - uint32_t last_method_offset = GetMethodOffset(5u); - EXPECT_EQ(2 * MB, last_method_offset - first_method_offset); - - const uint32_t bne_max_forward = kBneWPlus0 | 0x003f2fff; - const uint32_t bne_last = - BneWWithOffset(kLiteralOffset2, RoundUp(raw_code2.size(), kArmAlignment)); - const std::vector<uint8_t> expected_code1 = - RawCode({kNopWInsn, kNopInsn, bne_max_forward, kLdrWInsn}); - const std::vector<uint8_t> expected_code2 = - RawCode({kNopWInsn, kNopInsn, bne_last, kLdrWInsn}); - ASSERT_TRUE(CheckLinkedMethod(MethodRef(1), ArrayRef<const uint8_t>(expected_code1))); - ASSERT_TRUE(CheckLinkedMethod(MethodRef(5), ArrayRef<const uint8_t>(expected_code2))); -} - -TEST_F(Thumb2RelativePatcherTest, BakerArray) { - uint32_t valid_regs[] = { - 0, 1, 2, 3, 5, 6, 7, // R4 is reserved for entrypoint address. - 8, 9, 10, 11, // IP, SP, LR and PC are reserved. - }; - auto ldr = [](uint32_t base_reg) { - uint32_t index_reg = (base_reg == 0u) ? 1u : 0u; - uint32_t ref_reg = (base_reg == 2) ? 3u : 2u; - return kLdrRegLsl2 | index_reg | (base_reg << 16) | (ref_reg << 12); - }; - constexpr size_t kMethodCodeSize = 8u; - constexpr size_t kLiteralOffset = 0u; - uint32_t method_idx = 0u; - for (uint32_t base_reg : valid_regs) { - ++method_idx; - const std::vector<uint8_t> raw_code = RawCode({kBneWPlus0, ldr(base_reg)}); - ASSERT_EQ(kMethodCodeSize, raw_code.size()); - ArrayRef<const uint8_t> code(raw_code); - const LinkerPatch patches[] = { - LinkerPatch::BakerReadBarrierBranchPatch( - kLiteralOffset, Thumb2RelativePatcher::EncodeBakerReadBarrierArrayData(base_reg)), - }; - AddCompiledMethod(MethodRef(method_idx), code, ArrayRef<const LinkerPatch>(patches)); - } - Link(); - - // All thunks are at the end. - uint32_t thunk_offset = GetMethodOffset(method_idx) + RoundUp(kMethodCodeSize, kArmAlignment); - method_idx = 0u; - for (uint32_t base_reg : valid_regs) { - ++method_idx; - uint32_t bne = BneWWithOffset(GetMethodOffset(method_idx) + kLiteralOffset, thunk_offset); - const std::vector<uint8_t> expected_code = RawCode({bne, ldr(base_reg)}); - ASSERT_EQ(kMethodCodeSize, expected_code.size()); - EXPECT_TRUE(CheckLinkedMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(expected_code))); - - std::vector<uint8_t> expected_thunk = CompileBakerArrayThunk(base_reg); - ASSERT_GT(output_.size(), thunk_offset); - ASSERT_GE(output_.size() - thunk_offset, expected_thunk.size()); - ArrayRef<const uint8_t> compiled_thunk(output_.data() + thunk_offset, - expected_thunk.size()); - if (ArrayRef<const uint8_t>(expected_thunk) != compiled_thunk) { - DumpDiff(ArrayRef<const uint8_t>(expected_thunk), compiled_thunk); - ASSERT_TRUE(false); - } - - // Verify that the lock word for gray bit check is loaded from the correct address - // before the base_reg which points to the array data. - ASSERT_GE(output_.size() - thunk_offset, - 4u * /* 32-bit instructions */ 4u + 2u * /* 16-bit instructions */ 2u); - int32_t data_offset = - mirror::Array::DataOffset(Primitive::ComponentSize(Primitive::kPrimNot)).Int32Value(); - int32_t offset = mirror::Object::MonitorOffset().Int32Value() - data_offset; - ASSERT_LT(offset, 0); - ASSERT_GT(offset, -256); - const uint32_t load_lock_word = - kLdrNegativeOffset | - (-offset & 0xffu) | - (base_reg << 16) | - (/* IP */ 12 << 12); - EXPECT_EQ(load_lock_word, GetOutputInsn32(thunk_offset)); - // Verify the gray bit check. - DCHECK_GE(LockWord::kReadBarrierStateShift, 8u); // ROR modified immediate. - uint32_t ror_shift = 7 + (32 - LockWord::kReadBarrierStateShift); - const uint32_t tst_gray_bit_without_offset = - 0xf0100f00 | (/* IP */ 12 << 16) - | (((ror_shift >> 4) & 1) << 26) // i - | (((ror_shift >> 1) & 7) << 12) // imm3 - | ((ror_shift & 1) << 7); // imm8, ROR('1':imm8<7:0>, ror_shift). - EXPECT_EQ(tst_gray_bit_without_offset, GetOutputInsn32(thunk_offset + 4u)); - EXPECT_EQ(0xd100u, GetOutputInsn16(thunk_offset + 8u) & 0xff00u); // BNE - // Verify the fake dependency. - const uint32_t fake_dependency = - 0xeb000010 | // ADD Rd, Rn, Rm, LSR 32 (type=01, imm3=000, imm2=00) - (/* IP */ 12) | // Rm = IP - (base_reg << 16) | // Rn = base_reg - (base_reg << 8); // Rd = base_reg - EXPECT_EQ(fake_dependency, GetOutputInsn32(thunk_offset + 14u)); - // Do not check the rest of the implementation. - - // The next thunk follows on the next aligned offset. - thunk_offset += RoundUp(expected_thunk.size(), kArmAlignment); - } -} - -TEST_F(Thumb2RelativePatcherTest, BakerGcRootWide) { - uint32_t valid_regs[] = { - 0, 1, 2, 3, 5, 6, 7, // R4 is reserved for entrypoint address. - 8, 9, 10, 11, // IP, SP, LR and PC are reserved. - }; - constexpr size_t kMethodCodeSize = 8u; - constexpr size_t kLiteralOffset = 4u; - uint32_t method_idx = 0u; - for (uint32_t root_reg : valid_regs) { - ++method_idx; - uint32_t ldr = kLdrWInsn | (/* offset */ 8) | (/* base_reg */ 0 << 16) | (root_reg << 12); - const std::vector<uint8_t> raw_code = RawCode({ldr, kBneWPlus0}); - ASSERT_EQ(kMethodCodeSize, raw_code.size()); - ArrayRef<const uint8_t> code(raw_code); - const LinkerPatch patches[] = { - LinkerPatch::BakerReadBarrierBranchPatch( - kLiteralOffset, - Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg, /* narrow */ false)), - }; - AddCompiledMethod(MethodRef(method_idx), code, ArrayRef<const LinkerPatch>(patches)); - } - Link(); - - // All thunks are at the end. - uint32_t thunk_offset = GetMethodOffset(method_idx) + RoundUp(kMethodCodeSize, kArmAlignment); - method_idx = 0u; - for (uint32_t root_reg : valid_regs) { - ++method_idx; - uint32_t bne = BneWWithOffset(GetMethodOffset(method_idx) + kLiteralOffset, thunk_offset); - uint32_t ldr = kLdrWInsn | (/* offset */ 8) | (/* base_reg */ 0 << 16) | (root_reg << 12); - const std::vector<uint8_t> expected_code = RawCode({ldr, bne}); - ASSERT_EQ(kMethodCodeSize, expected_code.size()); - EXPECT_TRUE(CheckLinkedMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(expected_code))); - - std::vector<uint8_t> expected_thunk = CompileBakerGcRootThunk(root_reg, /* narrow */ false); - ASSERT_GT(output_.size(), thunk_offset); - ASSERT_GE(output_.size() - thunk_offset, expected_thunk.size()); - ArrayRef<const uint8_t> compiled_thunk(output_.data() + thunk_offset, - expected_thunk.size()); - if (ArrayRef<const uint8_t>(expected_thunk) != compiled_thunk) { - DumpDiff(ArrayRef<const uint8_t>(expected_thunk), compiled_thunk); - ASSERT_TRUE(false); - } - - // Verify that the fast-path null-check uses the correct register, i.e. root_reg. - if (root_reg < 8) { - ASSERT_GE(output_.size() - thunk_offset, 2u); - ASSERT_EQ(0xb100 | root_reg, GetOutputInsn16(thunk_offset) & 0xfd07u); - } else { - ASSERT_GE(output_.size() - thunk_offset, 6u); - ASSERT_EQ(0xf1b00f00u | (root_reg << 16), GetOutputInsn32(thunk_offset) & 0xfbff8f00u); - ASSERT_EQ(0xd000u, GetOutputInsn16(thunk_offset + 4u) & 0xff00u); // BEQ - } - // Do not check the rest of the implementation. - - // The next thunk follows on the next aligned offset. - thunk_offset += RoundUp(expected_thunk.size(), kArmAlignment); - } -} - -TEST_F(Thumb2RelativePatcherTest, BakerGcRootNarrow) { - uint32_t valid_regs[] = { - 0, 1, 2, 3, 5, 6, 7, // R4 is reserved for entrypoint address. - // Not appplicable to high registers. - }; - constexpr size_t kMethodCodeSize = 6u; - constexpr size_t kLiteralOffset = 2u; - uint32_t method_idx = 0u; - for (uint32_t root_reg : valid_regs) { - ++method_idx; - uint32_t ldr = kLdrInsn | (/* offset */ 8 << (6 - 2)) | (/* base_reg */ 0 << 3) | root_reg; - const std::vector<uint8_t> raw_code = RawCode({ldr, kBneWPlus0}); - ASSERT_EQ(kMethodCodeSize, raw_code.size()); - ArrayRef<const uint8_t> code(raw_code); - const LinkerPatch patches[] = { - LinkerPatch::BakerReadBarrierBranchPatch( - kLiteralOffset, - Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg, /* narrow */ true)), - }; - AddCompiledMethod(MethodRef(method_idx), code, ArrayRef<const LinkerPatch>(patches)); - } - Link(); - - // All thunks are at the end. - uint32_t thunk_offset = GetMethodOffset(method_idx) + RoundUp(kMethodCodeSize, kArmAlignment); - method_idx = 0u; - for (uint32_t root_reg : valid_regs) { - ++method_idx; - uint32_t bne = BneWWithOffset(GetMethodOffset(method_idx) + kLiteralOffset, thunk_offset); - uint32_t ldr = kLdrInsn | (/* offset */ 8 << (6 - 2)) | (/* base_reg */ 0 << 3) | root_reg; - const std::vector<uint8_t> expected_code = RawCode({ldr, bne}); - ASSERT_EQ(kMethodCodeSize, expected_code.size()); - EXPECT_TRUE(CheckLinkedMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(expected_code))); - - std::vector<uint8_t> expected_thunk = CompileBakerGcRootThunk(root_reg, /* narrow */ true); - ASSERT_GT(output_.size(), thunk_offset); - ASSERT_GE(output_.size() - thunk_offset, expected_thunk.size()); - ArrayRef<const uint8_t> compiled_thunk(output_.data() + thunk_offset, - expected_thunk.size()); - if (ArrayRef<const uint8_t>(expected_thunk) != compiled_thunk) { - DumpDiff(ArrayRef<const uint8_t>(expected_thunk), compiled_thunk); - ASSERT_TRUE(false); - } - - // Verify that the fast-path null-check CBZ uses the correct register, i.e. root_reg. - ASSERT_GE(output_.size() - thunk_offset, 2u); - ASSERT_EQ(0xb100 | root_reg, GetOutputInsn16(thunk_offset) & 0xfd07u); - // Do not check the rest of the implementation. - - // The next thunk follows on the next aligned offset. - thunk_offset += RoundUp(expected_thunk.size(), kArmAlignment); - } -} - -TEST_F(Thumb2RelativePatcherTest, BakerGcRootOffsetBits) { - // Test 1MiB of patches to the same thunk to stress-test different large offsets. - // (The low bits are not that important but the location of the high bits is easy to get wrong.) - std::vector<uint8_t> code; - code.reserve(1 * MB); - const size_t num_patches = 1 * MB / 8u; - std::vector<LinkerPatch> patches; - patches.reserve(num_patches); - const uint32_t ldr = - kLdrWInsn | (/* offset */ 8) | (/* base_reg */ 0 << 16) | (/* root_reg */ 0 << 12); - uint32_t encoded_data = - Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(/* root_reg */ 0, /* narrow */ false); - for (size_t i = 0; i != num_patches; ++i) { - PushBackInsn(&code, ldr); - PushBackInsn(&code, kBneWPlus0); - patches.push_back(LinkerPatch::BakerReadBarrierBranchPatch(8u * i + 4u, encoded_data)); - } - ASSERT_EQ(1 * MB, code.size()); - ASSERT_EQ(num_patches, patches.size()); - AddCompiledMethod(MethodRef(1u), - ArrayRef<const uint8_t>(code), - ArrayRef<const LinkerPatch>(patches)); - Link(); - - // The thunk is right after the method code. - DCHECK_ALIGNED(1 * MB, kArmAlignment); - std::vector<uint8_t> expected_code; - for (size_t i = 0; i != num_patches; ++i) { - PushBackInsn(&expected_code, ldr); - PushBackInsn(&expected_code, BneWWithOffset(8u * i + 4u, 1 * MB)); - patches.push_back(LinkerPatch::BakerReadBarrierBranchPatch(8u * i + 4u, encoded_data)); - } - EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); -} - -TEST_F(Thumb2RelativePatcherTest, BakerAndMethodCallInteraction) { - // During development, there was a `DCHECK_LE(MaxNextOffset(), next_thunk.MaxNextOffset());` - // in `ArmBaseRelativePatcher::ThunkData::MakeSpaceBefore()` which does not necessarily - // hold when we're reserving thunks of different sizes. This test exposes the situation - // by using Baker thunks and a method call thunk. - - // Add a method call patch that can reach to method 1 offset + 16MiB. - uint32_t method_idx = 0u; - constexpr size_t kMethodCallLiteralOffset = 2u; - constexpr uint32_t kMissingMethodIdx = 2u; - const std::vector<uint8_t> raw_code1 = RawCode({kNopInsn, kBlPlus0}); - const LinkerPatch method1_patches[] = { - LinkerPatch::RelativeCodePatch(kMethodCallLiteralOffset, nullptr, 2u), - }; - ArrayRef<const uint8_t> code1(raw_code1); - ++method_idx; - AddCompiledMethod(MethodRef(1u), code1, ArrayRef<const LinkerPatch>(method1_patches)); - - // Skip kMissingMethodIdx. - ++method_idx; - ASSERT_EQ(kMissingMethodIdx, method_idx); - // Add a method with the right size that the method code for the next one starts 1MiB - // after code for method 1. - size_t filler_size = - 1 * MB - RoundUp(raw_code1.size() + sizeof(OatQuickMethodHeader), kArmAlignment) - - sizeof(OatQuickMethodHeader); - std::vector<uint8_t> filler_code = GenNops(filler_size / 2u); - ++method_idx; - AddCompiledMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(filler_code)); - // Add 14 methods with 1MiB code+header, making the code for the next method start 1MiB - // before the currently scheduled MaxNextOffset() for the method call thunk. - for (uint32_t i = 0; i != 14; ++i) { - filler_size = 1 * MB - sizeof(OatQuickMethodHeader); - filler_code = GenNops(filler_size / 2u); - ++method_idx; - AddCompiledMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(filler_code)); - } - - // Add 2 Baker GC root patches to the last method, one that would allow the thunk at - // 1MiB + kArmAlignment, i.e. kArmAlignment after the method call thunk, and the - // second that needs it kArmAlignment after that. Given the size of the GC root thunk - // is more than the space required by the method call thunk plus kArmAlignment, - // this pushes the first GC root thunk's pending MaxNextOffset() before the method call - // thunk's pending MaxNextOffset() which needs to be adjusted. - ASSERT_LT(RoundUp(CompileMethodCallThunk().size(), kArmAlignment) + kArmAlignment, - CompileBakerGcRootThunk(/* root_reg */ 0, /* narrow */ false).size()); - static_assert(kArmAlignment == 8, "Code below assumes kArmAlignment == 8"); - constexpr size_t kBakerLiteralOffset1 = kArmAlignment + 2u - kPcAdjustment; - constexpr size_t kBakerLiteralOffset2 = kBakerLiteralOffset1 + kArmAlignment; - // Use offset = 0, base_reg = 0, the LDR is simply `kLdrWInsn | (root_reg << 12)`. - const uint32_t ldr1 = kLdrWInsn | (/* root_reg */ 1 << 12); - const uint32_t ldr2 = kLdrWInsn | (/* root_reg */ 2 << 12); - const std::vector<uint8_t> last_method_raw_code = RawCode({ - kNopInsn, // Padding before first GC root read barrier. - ldr1, kBneWPlus0, // First GC root LDR with read barrier. - ldr2, kBneWPlus0, // Second GC root LDR with read barrier. - }); - uint32_t encoded_data1 = - Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(/* root_reg */ 1, /* narrow */ false); - uint32_t encoded_data2 = - Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(/* root_reg */ 2, /* narrow */ false); - const LinkerPatch last_method_patches[] = { - LinkerPatch::BakerReadBarrierBranchPatch(kBakerLiteralOffset1, encoded_data1), - LinkerPatch::BakerReadBarrierBranchPatch(kBakerLiteralOffset2, encoded_data2), - }; - ++method_idx; - AddCompiledMethod(MethodRef(method_idx), - ArrayRef<const uint8_t>(last_method_raw_code), - ArrayRef<const LinkerPatch>(last_method_patches)); - - // The main purpose of the test is to check that Link() does not cause a crash. - Link(); - - ASSERT_EQ(15 * MB, GetMethodOffset(method_idx) - GetMethodOffset(1u)); -} - -} // namespace linker -} // namespace art diff --git a/compiler/linker/arm64/relative_patcher_arm64.cc b/compiler/linker/arm64/relative_patcher_arm64.cc deleted file mode 100644 index 52a07965b9..0000000000 --- a/compiler/linker/arm64/relative_patcher_arm64.cc +++ /dev/null @@ -1,683 +0,0 @@ -/* - * Copyright (C) 2015 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "linker/arm64/relative_patcher_arm64.h" - -#include "arch/arm64/asm_support_arm64.h" -#include "arch/arm64/instruction_set_features_arm64.h" -#include "art_method.h" -#include "base/bit_utils.h" -#include "compiled_method-inl.h" -#include "driver/compiler_driver.h" -#include "entrypoints/quick/quick_entrypoints_enum.h" -#include "heap_poisoning.h" -#include "linker/linker_patch.h" -#include "linker/output_stream.h" -#include "lock_word.h" -#include "mirror/array-inl.h" -#include "mirror/object.h" -#include "oat.h" -#include "oat_quick_method_header.h" -#include "read_barrier.h" -#include "utils/arm64/assembler_arm64.h" - -namespace art { -namespace linker { - -namespace { - -// Maximum positive and negative displacement for method call measured from the patch location. -// (Signed 28 bit displacement with the last two bits 0 has range [-2^27, 2^27-4] measured from -// the ARM64 PC pointing to the BL.) -constexpr uint32_t kMaxMethodCallPositiveDisplacement = (1u << 27) - 4u; -constexpr uint32_t kMaxMethodCallNegativeDisplacement = (1u << 27); - -// Maximum positive and negative displacement for a conditional branch measured from the patch -// location. (Signed 21 bit displacement with the last two bits 0 has range [-2^20, 2^20-4] -// measured from the ARM64 PC pointing to the B.cond.) -constexpr uint32_t kMaxBcondPositiveDisplacement = (1u << 20) - 4u; -constexpr uint32_t kMaxBcondNegativeDisplacement = (1u << 20); - -// The ADRP thunk for erratum 843419 is 2 instructions, i.e. 8 bytes. -constexpr uint32_t kAdrpThunkSize = 8u; - -inline bool IsAdrpPatch(const LinkerPatch& patch) { - switch (patch.GetType()) { - case LinkerPatch::Type::kCall: - case LinkerPatch::Type::kCallRelative: - case LinkerPatch::Type::kBakerReadBarrierBranch: - return false; - case LinkerPatch::Type::kMethodRelative: - case LinkerPatch::Type::kMethodBssEntry: - case LinkerPatch::Type::kTypeRelative: - case LinkerPatch::Type::kTypeClassTable: - case LinkerPatch::Type::kTypeBssEntry: - case LinkerPatch::Type::kStringRelative: - case LinkerPatch::Type::kStringInternTable: - case LinkerPatch::Type::kStringBssEntry: - return patch.LiteralOffset() == patch.PcInsnOffset(); - } -} - -inline uint32_t MaxExtraSpace(size_t num_adrp, size_t code_size) { - if (num_adrp == 0u) { - return 0u; - } - uint32_t alignment_bytes = - CompiledMethod::AlignCode(code_size, InstructionSet::kArm64) - code_size; - return kAdrpThunkSize * num_adrp + alignment_bytes; -} - -} // anonymous namespace - -Arm64RelativePatcher::Arm64RelativePatcher(RelativePatcherTargetProvider* provider, - const Arm64InstructionSetFeatures* features) - : ArmBaseRelativePatcher(provider, InstructionSet::kArm64), - fix_cortex_a53_843419_(features->NeedFixCortexA53_843419()), - reserved_adrp_thunks_(0u), - processed_adrp_thunks_(0u) { - if (fix_cortex_a53_843419_) { - adrp_thunk_locations_.reserve(16u); - current_method_thunks_.reserve(16u * kAdrpThunkSize); - } -} - -uint32_t Arm64RelativePatcher::ReserveSpace(uint32_t offset, - const CompiledMethod* compiled_method, - MethodReference method_ref) { - if (!fix_cortex_a53_843419_) { - DCHECK(adrp_thunk_locations_.empty()); - return ReserveSpaceInternal(offset, compiled_method, method_ref, 0u); - } - - // Add thunks for previous method if any. - if (reserved_adrp_thunks_ != adrp_thunk_locations_.size()) { - size_t num_adrp_thunks = adrp_thunk_locations_.size() - reserved_adrp_thunks_; - offset = CompiledMethod::AlignCode(offset, InstructionSet::kArm64) + - kAdrpThunkSize * num_adrp_thunks; - reserved_adrp_thunks_ = adrp_thunk_locations_.size(); - } - - // Count the number of ADRP insns as the upper bound on the number of thunks needed - // and use it to reserve space for other linker patches. - size_t num_adrp = 0u; - DCHECK(compiled_method != nullptr); - for (const LinkerPatch& patch : compiled_method->GetPatches()) { - if (IsAdrpPatch(patch)) { - ++num_adrp; - } - } - ArrayRef<const uint8_t> code = compiled_method->GetQuickCode(); - uint32_t max_extra_space = MaxExtraSpace(num_adrp, code.size()); - offset = ReserveSpaceInternal(offset, compiled_method, method_ref, max_extra_space); - if (num_adrp == 0u) { - return offset; - } - - // Now that we have the actual offset where the code will be placed, locate the ADRP insns - // that actually require the thunk. - uint32_t quick_code_offset = compiled_method->AlignCode(offset + sizeof(OatQuickMethodHeader)); - uint32_t thunk_offset = compiled_method->AlignCode(quick_code_offset + code.size()); - DCHECK(compiled_method != nullptr); - for (const LinkerPatch& patch : compiled_method->GetPatches()) { - if (IsAdrpPatch(patch)) { - uint32_t patch_offset = quick_code_offset + patch.LiteralOffset(); - if (NeedsErratum843419Thunk(code, patch.LiteralOffset(), patch_offset)) { - adrp_thunk_locations_.emplace_back(patch_offset, thunk_offset); - thunk_offset += kAdrpThunkSize; - } - } - } - return offset; -} - -uint32_t Arm64RelativePatcher::ReserveSpaceEnd(uint32_t offset) { - if (!fix_cortex_a53_843419_) { - DCHECK(adrp_thunk_locations_.empty()); - } else { - // Add thunks for the last method if any. - if (reserved_adrp_thunks_ != adrp_thunk_locations_.size()) { - size_t num_adrp_thunks = adrp_thunk_locations_.size() - reserved_adrp_thunks_; - offset = CompiledMethod::AlignCode(offset, InstructionSet::kArm64) + - kAdrpThunkSize * num_adrp_thunks; - reserved_adrp_thunks_ = adrp_thunk_locations_.size(); - } - } - return ArmBaseRelativePatcher::ReserveSpaceEnd(offset); -} - -uint32_t Arm64RelativePatcher::WriteThunks(OutputStream* out, uint32_t offset) { - if (fix_cortex_a53_843419_) { - if (!current_method_thunks_.empty()) { - uint32_t aligned_offset = CompiledMethod::AlignCode(offset, InstructionSet::kArm64); - if (kIsDebugBuild) { - CHECK_ALIGNED(current_method_thunks_.size(), kAdrpThunkSize); - size_t num_thunks = current_method_thunks_.size() / kAdrpThunkSize; - CHECK_LE(num_thunks, processed_adrp_thunks_); - for (size_t i = 0u; i != num_thunks; ++i) { - const auto& entry = adrp_thunk_locations_[processed_adrp_thunks_ - num_thunks + i]; - CHECK_EQ(entry.second, aligned_offset + i * kAdrpThunkSize); - } - } - uint32_t aligned_code_delta = aligned_offset - offset; - if (aligned_code_delta != 0u && !WriteCodeAlignment(out, aligned_code_delta)) { - return 0u; - } - if (!WriteMiscThunk(out, ArrayRef<const uint8_t>(current_method_thunks_))) { - return 0u; - } - offset = aligned_offset + current_method_thunks_.size(); - current_method_thunks_.clear(); - } - } - return ArmBaseRelativePatcher::WriteThunks(out, offset); -} - -void Arm64RelativePatcher::PatchCall(std::vector<uint8_t>* code, - uint32_t literal_offset, - uint32_t patch_offset, uint32_t - target_offset) { - DCHECK_LE(literal_offset + 4u, code->size()); - DCHECK_EQ(literal_offset & 3u, 0u); - DCHECK_EQ(patch_offset & 3u, 0u); - DCHECK_EQ(target_offset & 3u, 0u); - uint32_t displacement = CalculateMethodCallDisplacement(patch_offset, target_offset & ~1u); - DCHECK_EQ(displacement & 3u, 0u); - DCHECK((displacement >> 27) == 0u || (displacement >> 27) == 31u); // 28-bit signed. - uint32_t insn = (displacement & 0x0fffffffu) >> 2; - insn |= 0x94000000; // BL - - // Check that we're just overwriting an existing BL. - DCHECK_EQ(GetInsn(code, literal_offset) & 0xfc000000u, 0x94000000u); - // Write the new BL. - SetInsn(code, literal_offset, insn); -} - -void Arm64RelativePatcher::PatchPcRelativeReference(std::vector<uint8_t>* code, - const LinkerPatch& patch, - uint32_t patch_offset, - uint32_t target_offset) { - DCHECK_EQ(patch_offset & 3u, 0u); - DCHECK_EQ(target_offset & 3u, 0u); - uint32_t literal_offset = patch.LiteralOffset(); - uint32_t insn = GetInsn(code, literal_offset); - uint32_t pc_insn_offset = patch.PcInsnOffset(); - uint32_t disp = target_offset - ((patch_offset - literal_offset + pc_insn_offset) & ~0xfffu); - bool wide = (insn & 0x40000000) != 0; - uint32_t shift = wide ? 3u : 2u; - if (literal_offset == pc_insn_offset) { - // Check it's an ADRP with imm == 0 (unset). - DCHECK_EQ((insn & 0xffffffe0u), 0x90000000u) - << literal_offset << ", " << pc_insn_offset << ", 0x" << std::hex << insn; - if (fix_cortex_a53_843419_ && processed_adrp_thunks_ != adrp_thunk_locations_.size() && - adrp_thunk_locations_[processed_adrp_thunks_].first == patch_offset) { - DCHECK(NeedsErratum843419Thunk(ArrayRef<const uint8_t>(*code), - literal_offset, patch_offset)); - uint32_t thunk_offset = adrp_thunk_locations_[processed_adrp_thunks_].second; - uint32_t adrp_disp = target_offset - (thunk_offset & ~0xfffu); - uint32_t adrp = PatchAdrp(insn, adrp_disp); - - uint32_t out_disp = thunk_offset - patch_offset; - DCHECK_EQ(out_disp & 3u, 0u); - DCHECK((out_disp >> 27) == 0u || (out_disp >> 27) == 31u); // 28-bit signed. - insn = (out_disp & 0x0fffffffu) >> shift; - insn |= 0x14000000; // B <thunk> - - uint32_t back_disp = -out_disp; - DCHECK_EQ(back_disp & 3u, 0u); - DCHECK((back_disp >> 27) == 0u || (back_disp >> 27) == 31u); // 28-bit signed. - uint32_t b_back = (back_disp & 0x0fffffffu) >> 2; - b_back |= 0x14000000; // B <back> - size_t thunks_code_offset = current_method_thunks_.size(); - current_method_thunks_.resize(thunks_code_offset + kAdrpThunkSize); - SetInsn(¤t_method_thunks_, thunks_code_offset, adrp); - SetInsn(¤t_method_thunks_, thunks_code_offset + 4u, b_back); - static_assert(kAdrpThunkSize == 2 * 4u, "thunk has 2 instructions"); - - processed_adrp_thunks_ += 1u; - } else { - insn = PatchAdrp(insn, disp); - } - // Write the new ADRP (or B to the erratum 843419 thunk). - SetInsn(code, literal_offset, insn); - } else { - if ((insn & 0xfffffc00) == 0x91000000) { - // ADD immediate, 64-bit with imm12 == 0 (unset). - if (!kEmitCompilerReadBarrier) { - DCHECK(patch.GetType() == LinkerPatch::Type::kMethodRelative || - patch.GetType() == LinkerPatch::Type::kTypeRelative || - patch.GetType() == LinkerPatch::Type::kStringRelative) << patch.GetType(); - } else { - // With the read barrier (non-Baker) enabled, it could be kStringBssEntry or kTypeBssEntry. - DCHECK(patch.GetType() == LinkerPatch::Type::kMethodRelative || - patch.GetType() == LinkerPatch::Type::kTypeRelative || - patch.GetType() == LinkerPatch::Type::kStringRelative || - patch.GetType() == LinkerPatch::Type::kTypeBssEntry || - patch.GetType() == LinkerPatch::Type::kStringBssEntry) << patch.GetType(); - } - shift = 0u; // No shift for ADD. - } else { - // LDR/STR 32-bit or 64-bit with imm12 == 0 (unset). - DCHECK(patch.GetType() == LinkerPatch::Type::kMethodBssEntry || - patch.GetType() == LinkerPatch::Type::kTypeClassTable || - patch.GetType() == LinkerPatch::Type::kTypeBssEntry || - patch.GetType() == LinkerPatch::Type::kStringInternTable || - patch.GetType() == LinkerPatch::Type::kStringBssEntry) << patch.GetType(); - DCHECK_EQ(insn & 0xbfbffc00, 0xb9000000) << std::hex << insn; - } - if (kIsDebugBuild) { - uint32_t adrp = GetInsn(code, pc_insn_offset); - if ((adrp & 0x9f000000u) != 0x90000000u) { - CHECK(fix_cortex_a53_843419_); - CHECK_EQ(adrp & 0xfc000000u, 0x14000000u); // B <thunk> - CHECK_ALIGNED(current_method_thunks_.size(), kAdrpThunkSize); - size_t num_thunks = current_method_thunks_.size() / kAdrpThunkSize; - CHECK_LE(num_thunks, processed_adrp_thunks_); - uint32_t b_offset = patch_offset - literal_offset + pc_insn_offset; - for (size_t i = processed_adrp_thunks_ - num_thunks; ; ++i) { - CHECK_NE(i, processed_adrp_thunks_); - if (adrp_thunk_locations_[i].first == b_offset) { - size_t idx = num_thunks - (processed_adrp_thunks_ - i); - adrp = GetInsn(¤t_method_thunks_, idx * kAdrpThunkSize); - break; - } - } - } - CHECK_EQ(adrp & 0x9f00001fu, // Check that pc_insn_offset points - 0x90000000 | ((insn >> 5) & 0x1fu)); // to ADRP with matching register. - } - uint32_t imm12 = (disp & 0xfffu) >> shift; - insn = (insn & ~(0xfffu << 10)) | (imm12 << 10); - SetInsn(code, literal_offset, insn); - } -} - -void Arm64RelativePatcher::PatchBakerReadBarrierBranch(std::vector<uint8_t>* code, - const LinkerPatch& patch, - uint32_t patch_offset) { - DCHECK_ALIGNED(patch_offset, 4u); - uint32_t literal_offset = patch.LiteralOffset(); - DCHECK_ALIGNED(literal_offset, 4u); - DCHECK_LT(literal_offset, code->size()); - uint32_t insn = GetInsn(code, literal_offset); - DCHECK_EQ(insn & 0xffffffe0u, 0xb5000000); // CBNZ Xt, +0 (unpatched) - ThunkKey key = GetBakerThunkKey(patch); - if (kIsDebugBuild) { - const uint32_t encoded_data = key.GetCustomValue1(); - BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data); - // Check that the next instruction matches the expected LDR. - switch (kind) { - case BakerReadBarrierKind::kField: { - DCHECK_GE(code->size() - literal_offset, 8u); - uint32_t next_insn = GetInsn(code, literal_offset + 4u); - // LDR (immediate) with correct base_reg. - CheckValidReg(next_insn & 0x1fu); // Check destination register. - const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data); - CHECK_EQ(next_insn & 0xffc003e0u, 0xb9400000u | (base_reg << 5)); - break; - } - case BakerReadBarrierKind::kArray: { - DCHECK_GE(code->size() - literal_offset, 8u); - uint32_t next_insn = GetInsn(code, literal_offset + 4u); - // LDR (register) with the correct base_reg, size=10 (32-bit), option=011 (extend = LSL), - // and S=1 (shift amount = 2 for 32-bit version), i.e. LDR Wt, [Xn, Xm, LSL #2]. - CheckValidReg(next_insn & 0x1fu); // Check destination register. - const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data); - CHECK_EQ(next_insn & 0xffe0ffe0u, 0xb8607800u | (base_reg << 5)); - CheckValidReg((next_insn >> 16) & 0x1f); // Check index register - break; - } - case BakerReadBarrierKind::kGcRoot: { - DCHECK_GE(literal_offset, 4u); - uint32_t prev_insn = GetInsn(code, literal_offset - 4u); - // LDR (immediate) with correct root_reg. - const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data); - CHECK_EQ(prev_insn & 0xffc0001fu, 0xb9400000u | root_reg); - break; - } - default: - LOG(FATAL) << "Unexpected kind: " << static_cast<uint32_t>(kind); - UNREACHABLE(); - } - } - uint32_t target_offset = GetThunkTargetOffset(key, patch_offset); - DCHECK_ALIGNED(target_offset, 4u); - uint32_t disp = target_offset - patch_offset; - DCHECK((disp >> 20) == 0u || (disp >> 20) == 4095u); // 21-bit signed. - insn |= (disp << (5 - 2)) & 0x00ffffe0u; // Shift bits 2-20 to 5-23. - SetInsn(code, literal_offset, insn); -} - -#define __ assembler.GetVIXLAssembler()-> - -static void EmitGrayCheckAndFastPath(arm64::Arm64Assembler& assembler, - vixl::aarch64::Register base_reg, - vixl::aarch64::MemOperand& lock_word, - vixl::aarch64::Label* slow_path) { - using namespace vixl::aarch64; // NOLINT(build/namespaces) - // Load the lock word containing the rb_state. - __ Ldr(ip0.W(), lock_word); - // Given the numeric representation, it's enough to check the low bit of the rb_state. - static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0"); - static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); - __ Tbnz(ip0.W(), LockWord::kReadBarrierStateShift, slow_path); - static_assert( - BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET, - "Field and array LDR offsets must be the same to reuse the same code."); - // Adjust the return address back to the LDR (1 instruction; 2 for heap poisoning). - static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4), - "Field LDR must be 1 instruction (4B) before the return address label; " - " 2 instructions (8B) for heap poisoning."); - __ Add(lr, lr, BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET); - // Introduce a dependency on the lock_word including rb_state, - // to prevent load-load reordering, and without using - // a memory barrier (which would be more expensive). - __ Add(base_reg, base_reg, Operand(ip0, LSR, 32)); - __ Br(lr); // And return back to the function. - // Note: The fake dependency is unnecessary for the slow path. -} - -// Load the read barrier introspection entrypoint in register `entrypoint`. -static void LoadReadBarrierMarkIntrospectionEntrypoint(arm64::Arm64Assembler& assembler, - vixl::aarch64::Register entrypoint) { - using vixl::aarch64::MemOperand; - using vixl::aarch64::ip0; - // Thread Register. - const vixl::aarch64::Register tr = vixl::aarch64::x19; - - // entrypoint = Thread::Current()->pReadBarrierMarkReg16, i.e. pReadBarrierMarkIntrospection. - DCHECK_EQ(ip0.GetCode(), 16u); - const int32_t entry_point_offset = - Thread::ReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ip0.GetCode()); - __ Ldr(entrypoint, MemOperand(tr, entry_point_offset)); -} - -void Arm64RelativePatcher::CompileBakerReadBarrierThunk(arm64::Arm64Assembler& assembler, - uint32_t encoded_data) { - using namespace vixl::aarch64; // NOLINT(build/namespaces) - BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data); - switch (kind) { - case BakerReadBarrierKind::kField: { - // Check if the holder is gray and, if not, add fake dependency to the base register - // and return to the LDR instruction to load the reference. Otherwise, use introspection - // to load the reference and call the entrypoint (in IP1) that performs further checks - // on the reference and marks it if needed. - auto base_reg = - Register::GetXRegFromCode(BakerReadBarrierFirstRegField::Decode(encoded_data)); - CheckValidReg(base_reg.GetCode()); - auto holder_reg = - Register::GetXRegFromCode(BakerReadBarrierSecondRegField::Decode(encoded_data)); - CheckValidReg(holder_reg.GetCode()); - UseScratchRegisterScope temps(assembler.GetVIXLAssembler()); - temps.Exclude(ip0, ip1); - // If base_reg differs from holder_reg, the offset was too large and we must have - // emitted an explicit null check before the load. Otherwise, we need to null-check - // the holder as we do not necessarily do that check before going to the thunk. - vixl::aarch64::Label throw_npe; - if (holder_reg.Is(base_reg)) { - __ Cbz(holder_reg.W(), &throw_npe); - } - vixl::aarch64::Label slow_path; - MemOperand lock_word(holder_reg, mirror::Object::MonitorOffset().Int32Value()); - EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path); - __ Bind(&slow_path); - MemOperand ldr_address(lr, BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET); - __ Ldr(ip0.W(), ldr_address); // Load the LDR (immediate) unsigned offset. - LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ip1); - __ Ubfx(ip0.W(), ip0.W(), 10, 12); // Extract the offset. - __ Ldr(ip0.W(), MemOperand(base_reg, ip0, LSL, 2)); // Load the reference. - // Do not unpoison. With heap poisoning enabled, the entrypoint expects a poisoned reference. - __ Br(ip1); // Jump to the entrypoint. - if (holder_reg.Is(base_reg)) { - // Add null check slow path. The stack map is at the address pointed to by LR. - __ Bind(&throw_npe); - int32_t offset = GetThreadOffset<kArm64PointerSize>(kQuickThrowNullPointer).Int32Value(); - __ Ldr(ip0, MemOperand(/* Thread* */ vixl::aarch64::x19, offset)); - __ Br(ip0); - } - break; - } - case BakerReadBarrierKind::kArray: { - auto base_reg = - Register::GetXRegFromCode(BakerReadBarrierFirstRegField::Decode(encoded_data)); - CheckValidReg(base_reg.GetCode()); - DCHECK_EQ(kInvalidEncodedReg, BakerReadBarrierSecondRegField::Decode(encoded_data)); - UseScratchRegisterScope temps(assembler.GetVIXLAssembler()); - temps.Exclude(ip0, ip1); - vixl::aarch64::Label slow_path; - int32_t data_offset = - mirror::Array::DataOffset(Primitive::ComponentSize(Primitive::kPrimNot)).Int32Value(); - MemOperand lock_word(base_reg, mirror::Object::MonitorOffset().Int32Value() - data_offset); - DCHECK_LT(lock_word.GetOffset(), 0); - EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path); - __ Bind(&slow_path); - MemOperand ldr_address(lr, BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET); - __ Ldr(ip0.W(), ldr_address); // Load the LDR (register) unsigned offset. - LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ip1); - __ Ubfx(ip0, ip0, 16, 6); // Extract the index register, plus 32 (bit 21 is set). - __ Bfi(ip1, ip0, 3, 6); // Insert ip0 to the entrypoint address to create - // a switch case target based on the index register. - __ Mov(ip0, base_reg); // Move the base register to ip0. - __ Br(ip1); // Jump to the entrypoint's array switch case. - break; - } - case BakerReadBarrierKind::kGcRoot: { - // Check if the reference needs to be marked and if so (i.e. not null, not marked yet - // and it does not have a forwarding address), call the correct introspection entrypoint; - // otherwise return the reference (or the extracted forwarding address). - // There is no gray bit check for GC roots. - auto root_reg = - Register::GetWRegFromCode(BakerReadBarrierFirstRegField::Decode(encoded_data)); - CheckValidReg(root_reg.GetCode()); - DCHECK_EQ(kInvalidEncodedReg, BakerReadBarrierSecondRegField::Decode(encoded_data)); - UseScratchRegisterScope temps(assembler.GetVIXLAssembler()); - temps.Exclude(ip0, ip1); - vixl::aarch64::Label return_label, not_marked, forwarding_address; - __ Cbz(root_reg, &return_label); - MemOperand lock_word(root_reg.X(), mirror::Object::MonitorOffset().Int32Value()); - __ Ldr(ip0.W(), lock_word); - __ Tbz(ip0.W(), LockWord::kMarkBitStateShift, ¬_marked); - __ Bind(&return_label); - __ Br(lr); - __ Bind(¬_marked); - __ Tst(ip0.W(), Operand(ip0.W(), LSL, 1)); - __ B(&forwarding_address, mi); - LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ip1); - // Adjust the art_quick_read_barrier_mark_introspection address in IP1 to - // art_quick_read_barrier_mark_introspection_gc_roots. - __ Add(ip1, ip1, Operand(BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRYPOINT_OFFSET)); - __ Mov(ip0.W(), root_reg); - __ Br(ip1); - __ Bind(&forwarding_address); - __ Lsl(root_reg, ip0.W(), LockWord::kForwardingAddressShift); - __ Br(lr); - break; - } - default: - LOG(FATAL) << "Unexpected kind: " << static_cast<uint32_t>(kind); - UNREACHABLE(); - } -} - -std::vector<uint8_t> Arm64RelativePatcher::CompileThunk(const ThunkKey& key) { - ArenaPool pool; - ArenaAllocator allocator(&pool); - arm64::Arm64Assembler assembler(&allocator); - - switch (key.GetType()) { - case ThunkType::kMethodCall: { - // The thunk just uses the entry point in the ArtMethod. This works even for calls - // to the generic JNI and interpreter trampolines. - Offset offset(ArtMethod::EntryPointFromQuickCompiledCodeOffset( - kArm64PointerSize).Int32Value()); - assembler.JumpTo(ManagedRegister(arm64::X0), offset, ManagedRegister(arm64::IP0)); - break; - } - case ThunkType::kBakerReadBarrier: { - CompileBakerReadBarrierThunk(assembler, key.GetCustomValue1()); - break; - } - } - - // Ensure we emit the literal pool. - assembler.FinalizeCode(); - std::vector<uint8_t> thunk_code(assembler.CodeSize()); - MemoryRegion code(thunk_code.data(), thunk_code.size()); - assembler.FinalizeInstructions(code); - return thunk_code; -} - -std::string Arm64RelativePatcher::GetThunkDebugName(const ThunkKey& key) { - switch (key.GetType()) { - case ThunkType::kMethodCall: - return "MethodCallThunk"; - - case ThunkType::kBakerReadBarrier: { - uint32_t encoded_data = key.GetCustomValue1(); - BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data); - std::ostringstream oss; - oss << "BakerReadBarrierThunk"; - switch (kind) { - case BakerReadBarrierKind::kField: - oss << "Field_r" << BakerReadBarrierFirstRegField::Decode(encoded_data) - << "_r" << BakerReadBarrierSecondRegField::Decode(encoded_data); - break; - case BakerReadBarrierKind::kArray: - oss << "Array_r" << BakerReadBarrierFirstRegField::Decode(encoded_data); - DCHECK_EQ(kInvalidEncodedReg, BakerReadBarrierSecondRegField::Decode(encoded_data)); - break; - case BakerReadBarrierKind::kGcRoot: - oss << "GcRoot_r" << BakerReadBarrierFirstRegField::Decode(encoded_data); - DCHECK_EQ(kInvalidEncodedReg, BakerReadBarrierSecondRegField::Decode(encoded_data)); - break; - } - return oss.str(); - } - } -} - -#undef __ - -uint32_t Arm64RelativePatcher::MaxPositiveDisplacement(const ThunkKey& key) { - switch (key.GetType()) { - case ThunkType::kMethodCall: - return kMaxMethodCallPositiveDisplacement; - case ThunkType::kBakerReadBarrier: - return kMaxBcondPositiveDisplacement; - } -} - -uint32_t Arm64RelativePatcher::MaxNegativeDisplacement(const ThunkKey& key) { - switch (key.GetType()) { - case ThunkType::kMethodCall: - return kMaxMethodCallNegativeDisplacement; - case ThunkType::kBakerReadBarrier: - return kMaxBcondNegativeDisplacement; - } -} - -uint32_t Arm64RelativePatcher::PatchAdrp(uint32_t adrp, uint32_t disp) { - return (adrp & 0x9f00001fu) | // Clear offset bits, keep ADRP with destination reg. - // Bottom 12 bits are ignored, the next 2 lowest bits are encoded in bits 29-30. - ((disp & 0x00003000u) << (29 - 12)) | - // The next 16 bits are encoded in bits 5-22. - ((disp & 0xffffc000u) >> (12 + 2 - 5)) | - // Since the target_offset is based on the beginning of the oat file and the - // image space precedes the oat file, the target_offset into image space will - // be negative yet passed as uint32_t. Therefore we limit the displacement - // to +-2GiB (rather than the maximim +-4GiB) and determine the sign bit from - // the highest bit of the displacement. This is encoded in bit 23. - ((disp & 0x80000000u) >> (31 - 23)); -} - -bool Arm64RelativePatcher::NeedsErratum843419Thunk(ArrayRef<const uint8_t> code, - uint32_t literal_offset, - uint32_t patch_offset) { - DCHECK_EQ(patch_offset & 0x3u, 0u); - if ((patch_offset & 0xff8) == 0xff8) { // ...ff8 or ...ffc - uint32_t adrp = GetInsn(code, literal_offset); - DCHECK_EQ(adrp & 0x9f000000, 0x90000000); - uint32_t next_offset = patch_offset + 4u; - uint32_t next_insn = GetInsn(code, literal_offset + 4u); - - // Below we avoid patching sequences where the adrp is followed by a load which can easily - // be proved to be aligned. - - // First check if the next insn is the LDR using the result of the ADRP. - // LDR <Wt>, [<Xn>, #pimm], where <Xn> == ADRP destination reg. - if ((next_insn & 0xffc00000) == 0xb9400000 && - (((next_insn >> 5) ^ adrp) & 0x1f) == 0) { - return false; - } - - // And since LinkerPatch::Type::k{Method,Type,String}Relative is using the result - // of the ADRP for an ADD immediate, check for that as well. We generalize a bit - // to include ADD/ADDS/SUB/SUBS immediate that either uses the ADRP destination - // or stores the result to a different register. - if ((next_insn & 0x1f000000) == 0x11000000 && - ((((next_insn >> 5) ^ adrp) & 0x1f) == 0 || ((next_insn ^ adrp) & 0x1f) != 0)) { - return false; - } - - // LDR <Wt>, <label> is always aligned and thus it doesn't cause boundary crossing. - if ((next_insn & 0xff000000) == 0x18000000) { - return false; - } - - // LDR <Xt>, <label> is aligned iff the pc + displacement is a multiple of 8. - if ((next_insn & 0xff000000) == 0x58000000) { - bool is_aligned_load = (((next_offset >> 2) ^ (next_insn >> 5)) & 1) == 0; - return !is_aligned_load; - } - - // LDR <Wt>, [SP, #<pimm>] and LDR <Xt>, [SP, #<pimm>] are always aligned loads, as SP is - // guaranteed to be 128-bits aligned and <pimm> is multiple of the load size. - if ((next_insn & 0xbfc003e0) == 0xb94003e0) { - return false; - } - return true; - } - return false; -} - -void Arm64RelativePatcher::SetInsn(std::vector<uint8_t>* code, uint32_t offset, uint32_t value) { - DCHECK_LE(offset + 4u, code->size()); - DCHECK_EQ(offset & 3u, 0u); - uint8_t* addr = &(*code)[offset]; - addr[0] = (value >> 0) & 0xff; - addr[1] = (value >> 8) & 0xff; - addr[2] = (value >> 16) & 0xff; - addr[3] = (value >> 24) & 0xff; -} - -uint32_t Arm64RelativePatcher::GetInsn(ArrayRef<const uint8_t> code, uint32_t offset) { - DCHECK_LE(offset + 4u, code.size()); - DCHECK_EQ(offset & 3u, 0u); - const uint8_t* addr = &code[offset]; - return - (static_cast<uint32_t>(addr[0]) << 0) + - (static_cast<uint32_t>(addr[1]) << 8) + - (static_cast<uint32_t>(addr[2]) << 16)+ - (static_cast<uint32_t>(addr[3]) << 24); -} - -template <typename Alloc> -uint32_t Arm64RelativePatcher::GetInsn(std::vector<uint8_t, Alloc>* code, uint32_t offset) { - return GetInsn(ArrayRef<const uint8_t>(*code), offset); -} - -} // namespace linker -} // namespace art diff --git a/compiler/linker/arm64/relative_patcher_arm64.h b/compiler/linker/arm64/relative_patcher_arm64.h deleted file mode 100644 index 8ba59976e7..0000000000 --- a/compiler/linker/arm64/relative_patcher_arm64.h +++ /dev/null @@ -1,134 +0,0 @@ -/* - * Copyright (C) 2015 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ART_COMPILER_LINKER_ARM64_RELATIVE_PATCHER_ARM64_H_ -#define ART_COMPILER_LINKER_ARM64_RELATIVE_PATCHER_ARM64_H_ - -#include "base/array_ref.h" -#include "base/bit_field.h" -#include "base/bit_utils.h" -#include "linker/arm/relative_patcher_arm_base.h" - -namespace art { - -namespace arm64 { -class Arm64Assembler; -} // namespace arm64 - -namespace linker { - -class Arm64RelativePatcher FINAL : public ArmBaseRelativePatcher { - public: - static uint32_t EncodeBakerReadBarrierFieldData(uint32_t base_reg, uint32_t holder_reg) { - CheckValidReg(base_reg); - CheckValidReg(holder_reg); - return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kField) | - BakerReadBarrierFirstRegField::Encode(base_reg) | - BakerReadBarrierSecondRegField::Encode(holder_reg); - } - - static uint32_t EncodeBakerReadBarrierArrayData(uint32_t base_reg) { - CheckValidReg(base_reg); - return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kArray) | - BakerReadBarrierFirstRegField::Encode(base_reg) | - BakerReadBarrierSecondRegField::Encode(kInvalidEncodedReg); - } - - static uint32_t EncodeBakerReadBarrierGcRootData(uint32_t root_reg) { - CheckValidReg(root_reg); - return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kGcRoot) | - BakerReadBarrierFirstRegField::Encode(root_reg) | - BakerReadBarrierSecondRegField::Encode(kInvalidEncodedReg); - } - - Arm64RelativePatcher(RelativePatcherTargetProvider* provider, - const Arm64InstructionSetFeatures* features); - - uint32_t ReserveSpace(uint32_t offset, - const CompiledMethod* compiled_method, - MethodReference method_ref) OVERRIDE; - uint32_t ReserveSpaceEnd(uint32_t offset) OVERRIDE; - uint32_t WriteThunks(OutputStream* out, uint32_t offset) OVERRIDE; - void PatchCall(std::vector<uint8_t>* code, - uint32_t literal_offset, - uint32_t patch_offset, - uint32_t target_offset) OVERRIDE; - void PatchPcRelativeReference(std::vector<uint8_t>* code, - const LinkerPatch& patch, - uint32_t patch_offset, - uint32_t target_offset) OVERRIDE; - void PatchBakerReadBarrierBranch(std::vector<uint8_t>* code, - const LinkerPatch& patch, - uint32_t patch_offset) OVERRIDE; - - protected: - std::vector<uint8_t> CompileThunk(const ThunkKey& key) OVERRIDE; - std::string GetThunkDebugName(const ThunkKey& key) OVERRIDE; - uint32_t MaxPositiveDisplacement(const ThunkKey& key) OVERRIDE; - uint32_t MaxNegativeDisplacement(const ThunkKey& key) OVERRIDE; - - private: - static constexpr uint32_t kInvalidEncodedReg = /* sp/zr is invalid */ 31u; - - enum class BakerReadBarrierKind : uint8_t { - kField, // Field get or array get with constant offset (i.e. constant index). - kArray, // Array get with index in register. - kGcRoot, // GC root load. - kLast = kGcRoot - }; - - static constexpr size_t kBitsForBakerReadBarrierKind = - MinimumBitsToStore(static_cast<size_t>(BakerReadBarrierKind::kLast)); - static constexpr size_t kBitsForRegister = 5u; - using BakerReadBarrierKindField = - BitField<BakerReadBarrierKind, 0, kBitsForBakerReadBarrierKind>; - using BakerReadBarrierFirstRegField = - BitField<uint32_t, kBitsForBakerReadBarrierKind, kBitsForRegister>; - using BakerReadBarrierSecondRegField = - BitField<uint32_t, kBitsForBakerReadBarrierKind + kBitsForRegister, kBitsForRegister>; - - static void CheckValidReg(uint32_t reg) { - DCHECK(reg < 30u && reg != 16u && reg != 17u) << reg; - } - - void CompileBakerReadBarrierThunk(arm64::Arm64Assembler& assembler, uint32_t encoded_data); - - static uint32_t PatchAdrp(uint32_t adrp, uint32_t disp); - - static bool NeedsErratum843419Thunk(ArrayRef<const uint8_t> code, uint32_t literal_offset, - uint32_t patch_offset); - void SetInsn(std::vector<uint8_t>* code, uint32_t offset, uint32_t value); - static uint32_t GetInsn(ArrayRef<const uint8_t> code, uint32_t offset); - - template <typename Alloc> - static uint32_t GetInsn(std::vector<uint8_t, Alloc>* code, uint32_t offset); - - const bool fix_cortex_a53_843419_; - // Map original patch_offset to thunk offset. - std::vector<std::pair<uint32_t, uint32_t>> adrp_thunk_locations_; - size_t reserved_adrp_thunks_; - size_t processed_adrp_thunks_; - std::vector<uint8_t> current_method_thunks_; - - friend class Arm64RelativePatcherTest; - - DISALLOW_COPY_AND_ASSIGN(Arm64RelativePatcher); -}; - -} // namespace linker -} // namespace art - -#endif // ART_COMPILER_LINKER_ARM64_RELATIVE_PATCHER_ARM64_H_ diff --git a/compiler/linker/arm64/relative_patcher_arm64_test.cc b/compiler/linker/arm64/relative_patcher_arm64_test.cc deleted file mode 100644 index 05459a2a82..0000000000 --- a/compiler/linker/arm64/relative_patcher_arm64_test.cc +++ /dev/null @@ -1,1364 +0,0 @@ -/* - * Copyright (C) 2015 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "linker/arm64/relative_patcher_arm64.h" - -#include "base/casts.h" -#include "linker/relative_patcher_test.h" -#include "lock_word.h" -#include "mirror/array-inl.h" -#include "mirror/object.h" -#include "oat_quick_method_header.h" - -namespace art { -namespace linker { - -class Arm64RelativePatcherTest : public RelativePatcherTest { - public: - explicit Arm64RelativePatcherTest(const std::string& variant) - : RelativePatcherTest(InstructionSet::kArm64, variant) { } - - protected: - static const uint8_t kCallRawCode[]; - static const ArrayRef<const uint8_t> kCallCode; - static const uint8_t kNopRawCode[]; - static const ArrayRef<const uint8_t> kNopCode; - - // NOP instruction. - static constexpr uint32_t kNopInsn = 0xd503201f; - - // All branches can be created from kBlPlus0 or kBPlus0 by adding the low 26 bits. - static constexpr uint32_t kBlPlus0 = 0x94000000u; - static constexpr uint32_t kBPlus0 = 0x14000000u; - - // Special BL values. - static constexpr uint32_t kBlPlusMax = 0x95ffffffu; - static constexpr uint32_t kBlMinusMax = 0x96000000u; - - // LDR immediate, 32-bit, unsigned offset. - static constexpr uint32_t kLdrWInsn = 0xb9400000u; - - // LDR register, 32-bit, LSL #2. - static constexpr uint32_t kLdrWLsl2Insn = 0xb8607800u; - - // LDUR, 32-bit. - static constexpr uint32_t kLdurWInsn = 0xb8400000u; - - // ADD/ADDS/SUB/SUBS immediate, 64-bit. - static constexpr uint32_t kAddXInsn = 0x91000000u; - static constexpr uint32_t kAddsXInsn = 0xb1000000u; - static constexpr uint32_t kSubXInsn = 0xd1000000u; - static constexpr uint32_t kSubsXInsn = 0xf1000000u; - - // LDUR x2, [sp, #4], i.e. unaligned load crossing 64-bit boundary (assuming aligned sp). - static constexpr uint32_t kLdurInsn = 0xf840405fu; - - // LDR w12, <label> and LDR x12, <label>. Bits 5-23 contain label displacement in 4-byte units. - static constexpr uint32_t kLdrWPcRelInsn = 0x1800000cu; - static constexpr uint32_t kLdrXPcRelInsn = 0x5800000cu; - - // LDR w13, [SP, #<pimm>] and LDR x13, [SP, #<pimm>]. Bits 10-21 contain displacement from SP - // in units of 4-bytes (for 32-bit load) or 8-bytes (for 64-bit load). - static constexpr uint32_t kLdrWSpRelInsn = 0xb94003edu; - static constexpr uint32_t kLdrXSpRelInsn = 0xf94003edu; - - // CBNZ x17, +0. Bits 5-23 are a placeholder for target offset from PC in units of 4-bytes. - static constexpr uint32_t kCbnzIP1Plus0Insn = 0xb5000011u; - - void InsertInsn(std::vector<uint8_t>* code, size_t pos, uint32_t insn) { - CHECK_LE(pos, code->size()); - const uint8_t insn_code[] = { - static_cast<uint8_t>(insn), - static_cast<uint8_t>(insn >> 8), - static_cast<uint8_t>(insn >> 16), - static_cast<uint8_t>(insn >> 24), - }; - static_assert(sizeof(insn_code) == 4u, "Invalid sizeof(insn_code)."); - code->insert(code->begin() + pos, insn_code, insn_code + sizeof(insn_code)); - } - - void PushBackInsn(std::vector<uint8_t>* code, uint32_t insn) { - InsertInsn(code, code->size(), insn); - } - - std::vector<uint8_t> RawCode(std::initializer_list<uint32_t> insns) { - std::vector<uint8_t> raw_code; - raw_code.reserve(insns.size() * 4u); - for (uint32_t insn : insns) { - PushBackInsn(&raw_code, insn); - } - return raw_code; - } - - uint32_t Create2MethodsWithGap(const ArrayRef<const uint8_t>& method1_code, - const ArrayRef<const LinkerPatch>& method1_patches, - const ArrayRef<const uint8_t>& last_method_code, - const ArrayRef<const LinkerPatch>& last_method_patches, - uint32_t distance_without_thunks) { - CHECK_EQ(distance_without_thunks % kArm64Alignment, 0u); - uint32_t method1_offset = - kTrampolineSize + CodeAlignmentSize(kTrampolineSize) + sizeof(OatQuickMethodHeader); - AddCompiledMethod(MethodRef(1u), method1_code, method1_patches); - const uint32_t gap_start = method1_offset + method1_code.size(); - - // We want to put the method3 at a very precise offset. - const uint32_t last_method_offset = method1_offset + distance_without_thunks; - CHECK_ALIGNED(last_method_offset, kArm64Alignment); - const uint32_t gap_end = last_method_offset - sizeof(OatQuickMethodHeader); - - // Fill the gap with intermediate methods in chunks of 2MiB and the first in [2MiB, 4MiB). - // (This allows deduplicating the small chunks to avoid using 256MiB of memory for +-128MiB - // offsets by this test. Making the first chunk bigger makes it easy to give all intermediate - // methods the same alignment of the end, so the thunk insertion adds a predictable size as - // long as it's after the first chunk.) - uint32_t method_idx = 2u; - constexpr uint32_t kSmallChunkSize = 2 * MB; - std::vector<uint8_t> gap_code; - uint32_t gap_size = gap_end - gap_start; - uint32_t num_small_chunks = std::max(gap_size / kSmallChunkSize, 1u) - 1u; - uint32_t chunk_start = gap_start; - uint32_t chunk_size = gap_size - num_small_chunks * kSmallChunkSize; - for (uint32_t i = 0; i <= num_small_chunks; ++i) { // num_small_chunks+1 iterations. - uint32_t chunk_code_size = - chunk_size - CodeAlignmentSize(chunk_start) - sizeof(OatQuickMethodHeader); - gap_code.resize(chunk_code_size, 0u); - AddCompiledMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(gap_code)); - method_idx += 1u; - chunk_start += chunk_size; - chunk_size = kSmallChunkSize; // For all but the first chunk. - DCHECK_EQ(CodeAlignmentSize(gap_end), CodeAlignmentSize(chunk_start)); - } - - // Add the last method and link - AddCompiledMethod(MethodRef(method_idx), last_method_code, last_method_patches); - Link(); - - // Check assumptions. - CHECK_EQ(GetMethodOffset(1), method1_offset); - auto last_result = method_offset_map_.FindMethodOffset(MethodRef(method_idx)); - CHECK(last_result.first); - // There may be a thunk before method2. - if (last_result.second != last_method_offset) { - // Thunk present. Check that there's only one. - uint32_t thunk_end = - CompiledCode::AlignCode(gap_end, InstructionSet::kArm64) + MethodCallThunkSize(); - uint32_t header_offset = thunk_end + CodeAlignmentSize(thunk_end); - CHECK_EQ(last_result.second, header_offset + sizeof(OatQuickMethodHeader)); - } - return method_idx; - } - - uint32_t GetMethodOffset(uint32_t method_idx) { - auto result = method_offset_map_.FindMethodOffset(MethodRef(method_idx)); - CHECK(result.first); - CHECK_ALIGNED(result.second, 4u); - return result.second; - } - - std::vector<uint8_t> CompileMethodCallThunk() { - ArmBaseRelativePatcher::ThunkKey key = ArmBaseRelativePatcher::GetMethodCallKey(); - return down_cast<Arm64RelativePatcher*>(patcher_.get())->CompileThunk(key); - } - - uint32_t MethodCallThunkSize() { - return CompileMethodCallThunk().size(); - } - - bool CheckThunk(uint32_t thunk_offset) { - const std::vector<uint8_t> expected_code = CompileMethodCallThunk(); - if (output_.size() < thunk_offset + expected_code.size()) { - LOG(ERROR) << "output_.size() == " << output_.size() << " < " - << "thunk_offset + expected_code.size() == " << (thunk_offset + expected_code.size()); - return false; - } - ArrayRef<const uint8_t> linked_code(&output_[thunk_offset], expected_code.size()); - if (linked_code == ArrayRef<const uint8_t>(expected_code)) { - return true; - } - // Log failure info. - DumpDiff(ArrayRef<const uint8_t>(expected_code), linked_code); - return false; - } - - std::vector<uint8_t> GenNops(size_t num_nops) { - std::vector<uint8_t> result; - result.reserve(num_nops * 4u); - for (size_t i = 0; i != num_nops; ++i) { - PushBackInsn(&result, kNopInsn); - } - return result; - } - - std::vector<uint8_t> GenNopsAndBl(size_t num_nops, uint32_t bl) { - std::vector<uint8_t> result; - result.reserve(num_nops * 4u + 4u); - for (size_t i = 0; i != num_nops; ++i) { - PushBackInsn(&result, kNopInsn); - } - PushBackInsn(&result, bl); - return result; - } - - std::vector<uint8_t> GenNopsAndAdrpAndUse(size_t num_nops, - uint32_t method_offset, - uint32_t target_offset, - uint32_t use_insn) { - std::vector<uint8_t> result; - result.reserve(num_nops * 4u + 8u); - for (size_t i = 0; i != num_nops; ++i) { - PushBackInsn(&result, kNopInsn); - } - CHECK_ALIGNED(method_offset, 4u); - CHECK_ALIGNED(target_offset, 4u); - uint32_t adrp_offset = method_offset + num_nops * 4u; - uint32_t disp = target_offset - (adrp_offset & ~0xfffu); - if (use_insn == kLdrWInsn) { - DCHECK_ALIGNED(disp, 1u << 2); - use_insn |= 1 | // LDR x1, [x0, #(imm12 << 2)] - ((disp & 0xfffu) << (10 - 2)); // imm12 = ((disp & 0xfffu) >> 2) is at bit 10. - } else if (use_insn == kAddXInsn) { - use_insn |= 1 | // ADD x1, x0, #imm - (disp & 0xfffu) << 10; // imm12 = (disp & 0xfffu) is at bit 10. - } else { - LOG(FATAL) << "Unexpected instruction: 0x" << std::hex << use_insn; - } - uint32_t adrp = 0x90000000u | // ADRP x0, +SignExtend(immhi:immlo:Zeros(12), 64) - ((disp & 0x3000u) << (29 - 12)) | // immlo = ((disp & 0x3000u) >> 12) is at bit 29, - ((disp & 0xffffc000) >> (14 - 5)) | // immhi = (disp >> 14) is at bit 5, - // We take the sign bit from the disp, limiting disp to +- 2GiB. - ((disp & 0x80000000) >> (31 - 23)); // sign bit in immhi is at bit 23. - PushBackInsn(&result, adrp); - PushBackInsn(&result, use_insn); - return result; - } - - std::vector<uint8_t> GenNopsAndAdrpLdr(size_t num_nops, - uint32_t method_offset, - uint32_t target_offset) { - return GenNopsAndAdrpAndUse(num_nops, method_offset, target_offset, kLdrWInsn); - } - - void TestNopsAdrpLdr(size_t num_nops, uint32_t bss_begin, uint32_t string_entry_offset) { - constexpr uint32_t kStringIndex = 1u; - string_index_to_offset_map_.Put(kStringIndex, string_entry_offset); - bss_begin_ = bss_begin; - auto code = GenNopsAndAdrpLdr(num_nops, 0u, 0u); // Unpatched. - const LinkerPatch patches[] = { - LinkerPatch::StringBssEntryPatch(num_nops * 4u , nullptr, num_nops * 4u, kStringIndex), - LinkerPatch::StringBssEntryPatch(num_nops * 4u + 4u, nullptr, num_nops * 4u, kStringIndex), - }; - AddCompiledMethod(MethodRef(1u), - ArrayRef<const uint8_t>(code), - ArrayRef<const LinkerPatch>(patches)); - Link(); - - uint32_t method1_offset = GetMethodOffset(1u); - uint32_t target_offset = bss_begin_ + string_entry_offset; - auto expected_code = GenNopsAndAdrpLdr(num_nops, method1_offset, target_offset); - EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); - } - - std::vector<uint8_t> GenNopsAndAdrpAdd(size_t num_nops, - uint32_t method_offset, - uint32_t target_offset) { - return GenNopsAndAdrpAndUse(num_nops, method_offset, target_offset, kAddXInsn); - } - - void TestNopsAdrpAdd(size_t num_nops, uint32_t string_offset) { - constexpr uint32_t kStringIndex = 1u; - string_index_to_offset_map_.Put(kStringIndex, string_offset); - auto code = GenNopsAndAdrpAdd(num_nops, 0u, 0u); // Unpatched. - const LinkerPatch patches[] = { - LinkerPatch::RelativeStringPatch(num_nops * 4u , nullptr, num_nops * 4u, kStringIndex), - LinkerPatch::RelativeStringPatch(num_nops * 4u + 4u, nullptr, num_nops * 4u, kStringIndex), - }; - AddCompiledMethod(MethodRef(1u), - ArrayRef<const uint8_t>(code), - ArrayRef<const LinkerPatch>(patches)); - Link(); - - uint32_t method1_offset = GetMethodOffset(1u); - auto expected_code = GenNopsAndAdrpAdd(num_nops, method1_offset, string_offset); - EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); - } - - void PrepareNopsAdrpInsn2Ldr(size_t num_nops, - uint32_t insn2, - uint32_t bss_begin, - uint32_t string_entry_offset) { - constexpr uint32_t kStringIndex = 1u; - string_index_to_offset_map_.Put(kStringIndex, string_entry_offset); - bss_begin_ = bss_begin; - auto code = GenNopsAndAdrpLdr(num_nops, 0u, 0u); // Unpatched. - InsertInsn(&code, num_nops * 4u + 4u, insn2); - const LinkerPatch patches[] = { - LinkerPatch::StringBssEntryPatch(num_nops * 4u , nullptr, num_nops * 4u, kStringIndex), - LinkerPatch::StringBssEntryPatch(num_nops * 4u + 8u, nullptr, num_nops * 4u, kStringIndex), - }; - AddCompiledMethod(MethodRef(1u), - ArrayRef<const uint8_t>(code), - ArrayRef<const LinkerPatch>(patches)); - Link(); - } - - void PrepareNopsAdrpInsn2Add(size_t num_nops, uint32_t insn2, uint32_t string_offset) { - constexpr uint32_t kStringIndex = 1u; - string_index_to_offset_map_.Put(kStringIndex, string_offset); - auto code = GenNopsAndAdrpAdd(num_nops, 0u, 0u); // Unpatched. - InsertInsn(&code, num_nops * 4u + 4u, insn2); - const LinkerPatch patches[] = { - LinkerPatch::RelativeStringPatch(num_nops * 4u , nullptr, num_nops * 4u, kStringIndex), - LinkerPatch::RelativeStringPatch(num_nops * 4u + 8u, nullptr, num_nops * 4u, kStringIndex), - }; - AddCompiledMethod(MethodRef(1u), - ArrayRef<const uint8_t>(code), - ArrayRef<const LinkerPatch>(patches)); - Link(); - } - - void TestNopsAdrpInsn2AndUse(size_t num_nops, - uint32_t insn2, - uint32_t target_offset, - uint32_t use_insn) { - uint32_t method1_offset = GetMethodOffset(1u); - auto expected_code = GenNopsAndAdrpAndUse(num_nops, method1_offset, target_offset, use_insn); - InsertInsn(&expected_code, num_nops * 4u + 4u, insn2); - EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); - } - - void TestNopsAdrpInsn2AndUseHasThunk(size_t num_nops, - uint32_t insn2, - uint32_t target_offset, - uint32_t use_insn) { - uint32_t method1_offset = GetMethodOffset(1u); - CHECK(!compiled_method_refs_.empty()); - CHECK_EQ(compiled_method_refs_[0].index, 1u); - CHECK_EQ(compiled_method_refs_.size(), compiled_methods_.size()); - uint32_t method1_size = compiled_methods_[0]->GetQuickCode().size(); - uint32_t thunk_offset = - CompiledCode::AlignCode(method1_offset + method1_size, InstructionSet::kArm64); - uint32_t b_diff = thunk_offset - (method1_offset + num_nops * 4u); - CHECK_ALIGNED(b_diff, 4u); - ASSERT_LT(b_diff, 128 * MB); - uint32_t b_out = kBPlus0 + ((b_diff >> 2) & 0x03ffffffu); - uint32_t b_in = kBPlus0 + ((-b_diff >> 2) & 0x03ffffffu); - - auto expected_code = GenNopsAndAdrpAndUse(num_nops, method1_offset, target_offset, use_insn); - InsertInsn(&expected_code, num_nops * 4u + 4u, insn2); - // Replace adrp with bl. - expected_code.erase(expected_code.begin() + num_nops * 4u, - expected_code.begin() + num_nops * 4u + 4u); - InsertInsn(&expected_code, num_nops * 4u, b_out); - EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); - - auto expected_thunk_code = GenNopsAndAdrpLdr(0u, thunk_offset, target_offset); - ASSERT_EQ(expected_thunk_code.size(), 8u); - expected_thunk_code.erase(expected_thunk_code.begin() + 4u, expected_thunk_code.begin() + 8u); - InsertInsn(&expected_thunk_code, 4u, b_in); - ASSERT_EQ(expected_thunk_code.size(), 8u); - - uint32_t thunk_size = MethodCallThunkSize(); - ASSERT_EQ(thunk_offset + thunk_size, output_.size()); - ASSERT_EQ(thunk_size, expected_thunk_code.size()); - ArrayRef<const uint8_t> thunk_code(&output_[thunk_offset], thunk_size); - if (ArrayRef<const uint8_t>(expected_thunk_code) != thunk_code) { - DumpDiff(ArrayRef<const uint8_t>(expected_thunk_code), thunk_code); - FAIL(); - } - } - - void TestAdrpInsn2Ldr(uint32_t insn2, - uint32_t adrp_offset, - bool has_thunk, - uint32_t bss_begin, - uint32_t string_entry_offset) { - uint32_t method1_offset = - kTrampolineSize + CodeAlignmentSize(kTrampolineSize) + sizeof(OatQuickMethodHeader); - ASSERT_LT(method1_offset, adrp_offset); - CHECK_ALIGNED(adrp_offset, 4u); - uint32_t num_nops = (adrp_offset - method1_offset) / 4u; - PrepareNopsAdrpInsn2Ldr(num_nops, insn2, bss_begin, string_entry_offset); - uint32_t target_offset = bss_begin_ + string_entry_offset; - if (has_thunk) { - TestNopsAdrpInsn2AndUseHasThunk(num_nops, insn2, target_offset, kLdrWInsn); - } else { - TestNopsAdrpInsn2AndUse(num_nops, insn2, target_offset, kLdrWInsn); - } - ASSERT_EQ(method1_offset, GetMethodOffset(1u)); // If this fails, num_nops is wrong. - } - - void TestAdrpLdurLdr(uint32_t adrp_offset, - bool has_thunk, - uint32_t bss_begin, - uint32_t string_entry_offset) { - TestAdrpInsn2Ldr(kLdurInsn, adrp_offset, has_thunk, bss_begin, string_entry_offset); - } - - void TestAdrpLdrPcRelLdr(uint32_t pcrel_ldr_insn, - int32_t pcrel_disp, - uint32_t adrp_offset, - bool has_thunk, - uint32_t bss_begin, - uint32_t string_entry_offset) { - ASSERT_LT(pcrel_disp, 0x100000); - ASSERT_GE(pcrel_disp, -0x100000); - ASSERT_EQ(pcrel_disp & 0x3, 0); - uint32_t insn2 = pcrel_ldr_insn | (((static_cast<uint32_t>(pcrel_disp) >> 2) & 0x7ffffu) << 5); - TestAdrpInsn2Ldr(insn2, adrp_offset, has_thunk, bss_begin, string_entry_offset); - } - - void TestAdrpLdrSpRelLdr(uint32_t sprel_ldr_insn, - uint32_t sprel_disp_in_load_units, - uint32_t adrp_offset, - bool has_thunk, - uint32_t bss_begin, - uint32_t string_entry_offset) { - ASSERT_LT(sprel_disp_in_load_units, 0x1000u); - uint32_t insn2 = sprel_ldr_insn | ((sprel_disp_in_load_units & 0xfffu) << 10); - TestAdrpInsn2Ldr(insn2, adrp_offset, has_thunk, bss_begin, string_entry_offset); - } - - void TestAdrpInsn2Add(uint32_t insn2, - uint32_t adrp_offset, - bool has_thunk, - uint32_t string_offset) { - uint32_t method1_offset = - kTrampolineSize + CodeAlignmentSize(kTrampolineSize) + sizeof(OatQuickMethodHeader); - ASSERT_LT(method1_offset, adrp_offset); - CHECK_ALIGNED(adrp_offset, 4u); - uint32_t num_nops = (adrp_offset - method1_offset) / 4u; - PrepareNopsAdrpInsn2Add(num_nops, insn2, string_offset); - if (has_thunk) { - TestNopsAdrpInsn2AndUseHasThunk(num_nops, insn2, string_offset, kAddXInsn); - } else { - TestNopsAdrpInsn2AndUse(num_nops, insn2, string_offset, kAddXInsn); - } - ASSERT_EQ(method1_offset, GetMethodOffset(1u)); // If this fails, num_nops is wrong. - } - - void TestAdrpLdurAdd(uint32_t adrp_offset, bool has_thunk, uint32_t string_offset) { - TestAdrpInsn2Add(kLdurInsn, adrp_offset, has_thunk, string_offset); - } - - void TestAdrpLdrPcRelAdd(uint32_t pcrel_ldr_insn, - int32_t pcrel_disp, - uint32_t adrp_offset, - bool has_thunk, - uint32_t string_offset) { - ASSERT_LT(pcrel_disp, 0x100000); - ASSERT_GE(pcrel_disp, -0x100000); - ASSERT_EQ(pcrel_disp & 0x3, 0); - uint32_t insn2 = pcrel_ldr_insn | (((static_cast<uint32_t>(pcrel_disp) >> 2) & 0x7ffffu) << 5); - TestAdrpInsn2Add(insn2, adrp_offset, has_thunk, string_offset); - } - - void TestAdrpLdrSpRelAdd(uint32_t sprel_ldr_insn, - uint32_t sprel_disp_in_load_units, - uint32_t adrp_offset, - bool has_thunk, - uint32_t string_offset) { - ASSERT_LT(sprel_disp_in_load_units, 0x1000u); - uint32_t insn2 = sprel_ldr_insn | ((sprel_disp_in_load_units & 0xfffu) << 10); - TestAdrpInsn2Add(insn2, adrp_offset, has_thunk, string_offset); - } - - std::vector<uint8_t> CompileBakerOffsetThunk(uint32_t base_reg, uint32_t holder_reg) { - const LinkerPatch patch = LinkerPatch::BakerReadBarrierBranchPatch( - 0u, Arm64RelativePatcher::EncodeBakerReadBarrierFieldData(base_reg, holder_reg)); - ArmBaseRelativePatcher::ThunkKey key = ArmBaseRelativePatcher::GetBakerThunkKey(patch); - return down_cast<Arm64RelativePatcher*>(patcher_.get())->CompileThunk(key); - } - - std::vector<uint8_t> CompileBakerArrayThunk(uint32_t base_reg) { - LinkerPatch patch = LinkerPatch::BakerReadBarrierBranchPatch( - 0u, Arm64RelativePatcher::EncodeBakerReadBarrierArrayData(base_reg)); - ArmBaseRelativePatcher::ThunkKey key = ArmBaseRelativePatcher::GetBakerThunkKey(patch); - return down_cast<Arm64RelativePatcher*>(patcher_.get())->CompileThunk(key); - } - - std::vector<uint8_t> CompileBakerGcRootThunk(uint32_t root_reg) { - LinkerPatch patch = LinkerPatch::BakerReadBarrierBranchPatch( - 0u, Arm64RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg)); - ArmBaseRelativePatcher::ThunkKey key = ArmBaseRelativePatcher::GetBakerThunkKey(patch); - return down_cast<Arm64RelativePatcher*>(patcher_.get())->CompileThunk(key); - } - - uint32_t GetOutputInsn(uint32_t offset) { - CHECK_LE(offset, output_.size()); - CHECK_GE(output_.size() - offset, 4u); - return (static_cast<uint32_t>(output_[offset]) << 0) | - (static_cast<uint32_t>(output_[offset + 1]) << 8) | - (static_cast<uint32_t>(output_[offset + 2]) << 16) | - (static_cast<uint32_t>(output_[offset + 3]) << 24); - } - - void TestBakerField(uint32_t offset, uint32_t ref_reg); -}; - -const uint8_t Arm64RelativePatcherTest::kCallRawCode[] = { - 0x00, 0x00, 0x00, 0x94 -}; - -const ArrayRef<const uint8_t> Arm64RelativePatcherTest::kCallCode(kCallRawCode); - -const uint8_t Arm64RelativePatcherTest::kNopRawCode[] = { - 0x1f, 0x20, 0x03, 0xd5 -}; - -const ArrayRef<const uint8_t> Arm64RelativePatcherTest::kNopCode(kNopRawCode); - -class Arm64RelativePatcherTestDefault : public Arm64RelativePatcherTest { - public: - Arm64RelativePatcherTestDefault() : Arm64RelativePatcherTest("default") { } -}; - -class Arm64RelativePatcherTestDenver64 : public Arm64RelativePatcherTest { - public: - Arm64RelativePatcherTestDenver64() : Arm64RelativePatcherTest("denver64") { } -}; - -TEST_F(Arm64RelativePatcherTestDefault, CallSelf) { - const LinkerPatch patches[] = { - LinkerPatch::RelativeCodePatch(0u, nullptr, 1u), - }; - AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(patches)); - Link(); - - const std::vector<uint8_t> expected_code = RawCode({kBlPlus0}); - EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); -} - -TEST_F(Arm64RelativePatcherTestDefault, CallOther) { - const LinkerPatch method1_patches[] = { - LinkerPatch::RelativeCodePatch(0u, nullptr, 2u), - }; - AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(method1_patches)); - const LinkerPatch method2_patches[] = { - LinkerPatch::RelativeCodePatch(0u, nullptr, 1u), - }; - AddCompiledMethod(MethodRef(2u), kCallCode, ArrayRef<const LinkerPatch>(method2_patches)); - Link(); - - uint32_t method1_offset = GetMethodOffset(1u); - uint32_t method2_offset = GetMethodOffset(2u); - uint32_t diff_after = method2_offset - method1_offset; - CHECK_ALIGNED(diff_after, 4u); - ASSERT_LT(diff_after >> 2, 1u << 8); // Simple encoding, (diff_after >> 2) fits into 8 bits. - const std::vector<uint8_t> method1_expected_code = RawCode({kBlPlus0 + (diff_after >> 2)}); - EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(method1_expected_code))); - uint32_t diff_before = method1_offset - method2_offset; - CHECK_ALIGNED(diff_before, 4u); - ASSERT_GE(diff_before, -1u << 27); - auto method2_expected_code = GenNopsAndBl(0u, kBlPlus0 | ((diff_before >> 2) & 0x03ffffffu)); - EXPECT_TRUE(CheckLinkedMethod(MethodRef(2u), ArrayRef<const uint8_t>(method2_expected_code))); -} - -TEST_F(Arm64RelativePatcherTestDefault, CallTrampoline) { - const LinkerPatch patches[] = { - LinkerPatch::RelativeCodePatch(0u, nullptr, 2u), - }; - AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(patches)); - Link(); - - uint32_t method1_offset = GetMethodOffset(1u); - uint32_t diff = kTrampolineOffset - method1_offset; - ASSERT_EQ(diff & 1u, 0u); - ASSERT_GE(diff, -1u << 9); // Simple encoding, -256 <= (diff >> 1) < 0 (checked as unsigned). - auto expected_code = GenNopsAndBl(0u, kBlPlus0 | ((diff >> 2) & 0x03ffffffu)); - EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); -} - -TEST_F(Arm64RelativePatcherTestDefault, CallTrampolineTooFar) { - constexpr uint32_t missing_method_index = 1024u; - auto last_method_raw_code = GenNopsAndBl(1u, kBlPlus0); - constexpr uint32_t bl_offset_in_last_method = 1u * 4u; // After NOPs. - ArrayRef<const uint8_t> last_method_code(last_method_raw_code); - ASSERT_EQ(bl_offset_in_last_method + 4u, last_method_code.size()); - const LinkerPatch last_method_patches[] = { - LinkerPatch::RelativeCodePatch(bl_offset_in_last_method, nullptr, missing_method_index), - }; - - constexpr uint32_t just_over_max_negative_disp = 128 * MB + 4; - uint32_t last_method_idx = Create2MethodsWithGap( - kNopCode, ArrayRef<const LinkerPatch>(), last_method_code, - ArrayRef<const LinkerPatch>(last_method_patches), - just_over_max_negative_disp - bl_offset_in_last_method); - uint32_t method1_offset = GetMethodOffset(1u); - uint32_t last_method_offset = GetMethodOffset(last_method_idx); - ASSERT_EQ(method1_offset, - last_method_offset + bl_offset_in_last_method - just_over_max_negative_disp); - ASSERT_FALSE(method_offset_map_.FindMethodOffset(MethodRef(missing_method_index)).first); - - // Check linked code. - uint32_t thunk_offset = - CompiledCode::AlignCode(last_method_offset + last_method_code.size(), InstructionSet::kArm64); - uint32_t diff = thunk_offset - (last_method_offset + bl_offset_in_last_method); - CHECK_ALIGNED(diff, 4u); - ASSERT_LT(diff, 128 * MB); - auto expected_code = GenNopsAndBl(1u, kBlPlus0 | (diff >> 2)); - EXPECT_TRUE(CheckLinkedMethod(MethodRef(last_method_idx), - ArrayRef<const uint8_t>(expected_code))); - EXPECT_TRUE(CheckThunk(thunk_offset)); -} - -TEST_F(Arm64RelativePatcherTestDefault, CallOtherAlmostTooFarAfter) { - auto method1_raw_code = GenNopsAndBl(1u, kBlPlus0); - constexpr uint32_t bl_offset_in_method1 = 1u * 4u; // After NOPs. - ArrayRef<const uint8_t> method1_code(method1_raw_code); - ASSERT_EQ(bl_offset_in_method1 + 4u, method1_code.size()); - uint32_t expected_last_method_idx = 65; // Based on 2MiB chunks in Create2MethodsWithGap(). - const LinkerPatch method1_patches[] = { - LinkerPatch::RelativeCodePatch(bl_offset_in_method1, nullptr, expected_last_method_idx), - }; - - constexpr uint32_t max_positive_disp = 128 * MB - 4u; - uint32_t last_method_idx = Create2MethodsWithGap(method1_code, - ArrayRef<const LinkerPatch>(method1_patches), - kNopCode, - ArrayRef<const LinkerPatch>(), - bl_offset_in_method1 + max_positive_disp); - ASSERT_EQ(expected_last_method_idx, last_method_idx); - - uint32_t method1_offset = GetMethodOffset(1u); - uint32_t last_method_offset = GetMethodOffset(last_method_idx); - ASSERT_EQ(method1_offset + bl_offset_in_method1 + max_positive_disp, last_method_offset); - - // Check linked code. - auto expected_code = GenNopsAndBl(1u, kBlPlusMax); - EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); -} - -TEST_F(Arm64RelativePatcherTestDefault, CallOtherAlmostTooFarBefore) { - auto last_method_raw_code = GenNopsAndBl(0u, kBlPlus0); - constexpr uint32_t bl_offset_in_last_method = 0u * 4u; // After NOPs. - ArrayRef<const uint8_t> last_method_code(last_method_raw_code); - ASSERT_EQ(bl_offset_in_last_method + 4u, last_method_code.size()); - const LinkerPatch last_method_patches[] = { - LinkerPatch::RelativeCodePatch(bl_offset_in_last_method, nullptr, 1u), - }; - - constexpr uint32_t max_negative_disp = 128 * MB; - uint32_t last_method_idx = Create2MethodsWithGap(kNopCode, - ArrayRef<const LinkerPatch>(), - last_method_code, - ArrayRef<const LinkerPatch>(last_method_patches), - max_negative_disp - bl_offset_in_last_method); - uint32_t method1_offset = GetMethodOffset(1u); - uint32_t last_method_offset = GetMethodOffset(last_method_idx); - ASSERT_EQ(method1_offset, last_method_offset + bl_offset_in_last_method - max_negative_disp); - - // Check linked code. - auto expected_code = GenNopsAndBl(0u, kBlMinusMax); - EXPECT_TRUE(CheckLinkedMethod(MethodRef(last_method_idx), - ArrayRef<const uint8_t>(expected_code))); -} - -TEST_F(Arm64RelativePatcherTestDefault, CallOtherJustTooFarAfter) { - auto method1_raw_code = GenNopsAndBl(0u, kBlPlus0); - constexpr uint32_t bl_offset_in_method1 = 0u * 4u; // After NOPs. - ArrayRef<const uint8_t> method1_code(method1_raw_code); - ASSERT_EQ(bl_offset_in_method1 + 4u, method1_code.size()); - uint32_t expected_last_method_idx = 65; // Based on 2MiB chunks in Create2MethodsWithGap(). - const LinkerPatch method1_patches[] = { - LinkerPatch::RelativeCodePatch(bl_offset_in_method1, nullptr, expected_last_method_idx), - }; - - constexpr uint32_t just_over_max_positive_disp = 128 * MB; - uint32_t last_method_idx = Create2MethodsWithGap( - method1_code, - ArrayRef<const LinkerPatch>(method1_patches), - kNopCode, - ArrayRef<const LinkerPatch>(), - bl_offset_in_method1 + just_over_max_positive_disp); - ASSERT_EQ(expected_last_method_idx, last_method_idx); - - uint32_t method1_offset = GetMethodOffset(1u); - uint32_t last_method_offset = GetMethodOffset(last_method_idx); - ASSERT_TRUE(IsAligned<kArm64Alignment>(last_method_offset)); - uint32_t last_method_header_offset = last_method_offset - sizeof(OatQuickMethodHeader); - uint32_t thunk_size = MethodCallThunkSize(); - uint32_t thunk_offset = RoundDown(last_method_header_offset - thunk_size, kArm64Alignment); - DCHECK_EQ(thunk_offset + thunk_size + CodeAlignmentSize(thunk_offset + thunk_size), - last_method_header_offset); - uint32_t diff = thunk_offset - (method1_offset + bl_offset_in_method1); - CHECK_ALIGNED(diff, 4u); - ASSERT_LT(diff, 128 * MB); - auto expected_code = GenNopsAndBl(0u, kBlPlus0 | (diff >> 2)); - EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); - CheckThunk(thunk_offset); -} - -TEST_F(Arm64RelativePatcherTestDefault, CallOtherJustTooFarBefore) { - auto last_method_raw_code = GenNopsAndBl(1u, kBlPlus0); - constexpr uint32_t bl_offset_in_last_method = 1u * 4u; // After NOPs. - ArrayRef<const uint8_t> last_method_code(last_method_raw_code); - ASSERT_EQ(bl_offset_in_last_method + 4u, last_method_code.size()); - const LinkerPatch last_method_patches[] = { - LinkerPatch::RelativeCodePatch(bl_offset_in_last_method, nullptr, 1u), - }; - - constexpr uint32_t just_over_max_negative_disp = 128 * MB + 4; - uint32_t last_method_idx = Create2MethodsWithGap( - kNopCode, ArrayRef<const LinkerPatch>(), last_method_code, - ArrayRef<const LinkerPatch>(last_method_patches), - just_over_max_negative_disp - bl_offset_in_last_method); - uint32_t method1_offset = GetMethodOffset(1u); - uint32_t last_method_offset = GetMethodOffset(last_method_idx); - ASSERT_EQ(method1_offset, - last_method_offset + bl_offset_in_last_method - just_over_max_negative_disp); - - // Check linked code. - uint32_t thunk_offset = - CompiledCode::AlignCode(last_method_offset + last_method_code.size(), InstructionSet::kArm64); - uint32_t diff = thunk_offset - (last_method_offset + bl_offset_in_last_method); - CHECK_ALIGNED(diff, 4u); - ASSERT_LT(diff, 128 * MB); - auto expected_code = GenNopsAndBl(1u, kBlPlus0 | (diff >> 2)); - EXPECT_TRUE(CheckLinkedMethod(MethodRef(last_method_idx), - ArrayRef<const uint8_t>(expected_code))); - EXPECT_TRUE(CheckThunk(thunk_offset)); -} - -TEST_F(Arm64RelativePatcherTestDefault, StringBssEntry1) { - TestNopsAdrpLdr(0u, 0x12345678u, 0x1234u); -} - -TEST_F(Arm64RelativePatcherTestDefault, StringBssEntry2) { - TestNopsAdrpLdr(0u, -0x12345678u, 0x4444u); -} - -TEST_F(Arm64RelativePatcherTestDefault, StringBssEntry3) { - TestNopsAdrpLdr(0u, 0x12345000u, 0x3ffcu); -} - -TEST_F(Arm64RelativePatcherTestDefault, StringBssEntry4) { - TestNopsAdrpLdr(0u, 0x12345000u, 0x4000u); -} - -TEST_F(Arm64RelativePatcherTestDefault, StringReference1) { - TestNopsAdrpAdd(0u, 0x12345678u); -} - -TEST_F(Arm64RelativePatcherTestDefault, StringReference2) { - TestNopsAdrpAdd(0u, -0x12345678u); -} - -TEST_F(Arm64RelativePatcherTestDefault, StringReference3) { - TestNopsAdrpAdd(0u, 0x12345000u); -} - -TEST_F(Arm64RelativePatcherTestDefault, StringReference4) { - TestNopsAdrpAdd(0u, 0x12345ffcu); -} - -#define TEST_FOR_OFFSETS(test, disp1, disp2) \ - test(0xff4u, disp1) test(0xff8u, disp1) test(0xffcu, disp1) test(0x1000u, disp1) \ - test(0xff4u, disp2) test(0xff8u, disp2) test(0xffcu, disp2) test(0x1000u, disp2) - -#define DEFAULT_LDUR_LDR_TEST(adrp_offset, disp) \ - TEST_F(Arm64RelativePatcherTestDefault, StringBssEntry ## adrp_offset ## Ldur ## disp) { \ - bool has_thunk = ((adrp_offset) == 0xff8u || (adrp_offset) == 0xffcu); \ - TestAdrpLdurLdr(adrp_offset, has_thunk, 0x12345678u, disp); \ - } - -TEST_FOR_OFFSETS(DEFAULT_LDUR_LDR_TEST, 0x1234, 0x1238) - -#define DENVER64_LDUR_LDR_TEST(adrp_offset, disp) \ - TEST_F(Arm64RelativePatcherTestDenver64, StringBssEntry ## adrp_offset ## Ldur ## disp) { \ - TestAdrpLdurLdr(adrp_offset, false, 0x12345678u, disp); \ - } - -TEST_FOR_OFFSETS(DENVER64_LDUR_LDR_TEST, 0x1234, 0x1238) - -// LDR <Wt>, <label> is always aligned. We should never have to use a fixup. -#define LDRW_PCREL_LDR_TEST(adrp_offset, disp) \ - TEST_F(Arm64RelativePatcherTestDefault, StringBssEntry ## adrp_offset ## WPcRel ## disp) { \ - TestAdrpLdrPcRelLdr(kLdrWPcRelInsn, disp, adrp_offset, false, 0x12345678u, 0x1234u); \ - } - -TEST_FOR_OFFSETS(LDRW_PCREL_LDR_TEST, 0x1234, 0x1238) - -// LDR <Xt>, <label> is aligned when offset + displacement is a multiple of 8. -#define LDRX_PCREL_LDR_TEST(adrp_offset, disp) \ - TEST_F(Arm64RelativePatcherTestDefault, StringBssEntry ## adrp_offset ## XPcRel ## disp) { \ - bool unaligned = !IsAligned<8u>((adrp_offset) + 4u + static_cast<uint32_t>(disp)); \ - bool has_thunk = ((adrp_offset) == 0xff8u || (adrp_offset) == 0xffcu) && unaligned; \ - TestAdrpLdrPcRelLdr(kLdrXPcRelInsn, disp, adrp_offset, has_thunk, 0x12345678u, 0x1234u); \ - } - -TEST_FOR_OFFSETS(LDRX_PCREL_LDR_TEST, 0x1234, 0x1238) - -// LDR <Wt>, [SP, #<pimm>] and LDR <Xt>, [SP, #<pimm>] are always aligned. No fixup needed. -#define LDRW_SPREL_LDR_TEST(adrp_offset, disp) \ - TEST_F(Arm64RelativePatcherTestDefault, StringBssEntry ## adrp_offset ## WSpRel ## disp) { \ - TestAdrpLdrSpRelLdr(kLdrWSpRelInsn, (disp) >> 2, adrp_offset, false, 0x12345678u, 0x1234u); \ - } - -TEST_FOR_OFFSETS(LDRW_SPREL_LDR_TEST, 0, 4) - -#define LDRX_SPREL_LDR_TEST(adrp_offset, disp) \ - TEST_F(Arm64RelativePatcherTestDefault, StringBssEntry ## adrp_offset ## XSpRel ## disp) { \ - TestAdrpLdrSpRelLdr(kLdrXSpRelInsn, (disp) >> 3, adrp_offset, false, 0x12345678u, 0x1234u); \ - } - -TEST_FOR_OFFSETS(LDRX_SPREL_LDR_TEST, 0, 8) - -#define DEFAULT_LDUR_ADD_TEST(adrp_offset, disp) \ - TEST_F(Arm64RelativePatcherTestDefault, StringReference ## adrp_offset ## Ldur ## disp) { \ - bool has_thunk = ((adrp_offset) == 0xff8u || (adrp_offset) == 0xffcu); \ - TestAdrpLdurAdd(adrp_offset, has_thunk, disp); \ - } - -TEST_FOR_OFFSETS(DEFAULT_LDUR_ADD_TEST, 0x12345678, 0xffffc840) - -#define DENVER64_LDUR_ADD_TEST(adrp_offset, disp) \ - TEST_F(Arm64RelativePatcherTestDenver64, StringReference ## adrp_offset ## Ldur ## disp) { \ - TestAdrpLdurAdd(adrp_offset, false, disp); \ - } - -TEST_FOR_OFFSETS(DENVER64_LDUR_ADD_TEST, 0x12345678, 0xffffc840) - -#define DEFAULT_SUBX3X2_ADD_TEST(adrp_offset, disp) \ - TEST_F(Arm64RelativePatcherTestDefault, StringReference ## adrp_offset ## SubX3X2 ## disp) { \ - /* SUB unrelated to "ADRP x0, addr". */ \ - uint32_t sub = kSubXInsn | (100 << 10) | (2u << 5) | 3u; /* SUB x3, x2, #100 */ \ - TestAdrpInsn2Add(sub, adrp_offset, false, disp); \ - } - -TEST_FOR_OFFSETS(DEFAULT_SUBX3X2_ADD_TEST, 0x12345678, 0xffffc840) - -#define DEFAULT_SUBSX3X0_ADD_TEST(adrp_offset, disp) \ - TEST_F(Arm64RelativePatcherTestDefault, StringReference ## adrp_offset ## SubsX3X0 ## disp) { \ - /* SUBS that uses the result of "ADRP x0, addr". */ \ - uint32_t subs = kSubsXInsn | (100 << 10) | (0u << 5) | 3u; /* SUBS x3, x0, #100 */ \ - TestAdrpInsn2Add(subs, adrp_offset, false, disp); \ - } - -TEST_FOR_OFFSETS(DEFAULT_SUBSX3X0_ADD_TEST, 0x12345678, 0xffffc840) - -#define DEFAULT_ADDX0X0_ADD_TEST(adrp_offset, disp) \ - TEST_F(Arm64RelativePatcherTestDefault, StringReference ## adrp_offset ## AddX0X0 ## disp) { \ - /* ADD that uses the result register of "ADRP x0, addr" as both source and destination. */ \ - uint32_t add = kSubXInsn | (100 << 10) | (0u << 5) | 0u; /* ADD x0, x0, #100 */ \ - TestAdrpInsn2Add(add, adrp_offset, false, disp); \ - } - -TEST_FOR_OFFSETS(DEFAULT_ADDX0X0_ADD_TEST, 0x12345678, 0xffffc840) - -#define DEFAULT_ADDSX0X2_ADD_TEST(adrp_offset, disp) \ - TEST_F(Arm64RelativePatcherTestDefault, StringReference ## adrp_offset ## AddsX0X2 ## disp) { \ - /* ADDS that does not use the result of "ADRP x0, addr" but overwrites that register. */ \ - uint32_t adds = kAddsXInsn | (100 << 10) | (2u << 5) | 0u; /* ADDS x0, x2, #100 */ \ - bool has_thunk = ((adrp_offset) == 0xff8u || (adrp_offset) == 0xffcu); \ - TestAdrpInsn2Add(adds, adrp_offset, has_thunk, disp); \ - } - -TEST_FOR_OFFSETS(DEFAULT_ADDSX0X2_ADD_TEST, 0x12345678, 0xffffc840) - -// LDR <Wt>, <label> is always aligned. We should never have to use a fixup. -#define LDRW_PCREL_ADD_TEST(adrp_offset, disp) \ - TEST_F(Arm64RelativePatcherTestDefault, StringReference ## adrp_offset ## WPcRel ## disp) { \ - TestAdrpLdrPcRelAdd(kLdrWPcRelInsn, disp, adrp_offset, false, 0x12345678u); \ - } - -TEST_FOR_OFFSETS(LDRW_PCREL_ADD_TEST, 0x1234, 0x1238) - -// LDR <Xt>, <label> is aligned when offset + displacement is a multiple of 8. -#define LDRX_PCREL_ADD_TEST(adrp_offset, disp) \ - TEST_F(Arm64RelativePatcherTestDefault, StringReference ## adrp_offset ## XPcRel ## disp) { \ - bool unaligned = !IsAligned<8u>((adrp_offset) + 4u + static_cast<uint32_t>(disp)); \ - bool has_thunk = ((adrp_offset) == 0xff8u || (adrp_offset) == 0xffcu) && unaligned; \ - TestAdrpLdrPcRelAdd(kLdrXPcRelInsn, disp, adrp_offset, has_thunk, 0x12345678u); \ - } - -TEST_FOR_OFFSETS(LDRX_PCREL_ADD_TEST, 0x1234, 0x1238) - -// LDR <Wt>, [SP, #<pimm>] and LDR <Xt>, [SP, #<pimm>] are always aligned. No fixup needed. -#define LDRW_SPREL_ADD_TEST(adrp_offset, disp) \ - TEST_F(Arm64RelativePatcherTestDefault, StringReference ## adrp_offset ## WSpRel ## disp) { \ - TestAdrpLdrSpRelAdd(kLdrWSpRelInsn, (disp) >> 2, adrp_offset, false, 0x12345678u); \ - } - -TEST_FOR_OFFSETS(LDRW_SPREL_ADD_TEST, 0, 4) - -#define LDRX_SPREL_ADD_TEST(adrp_offset, disp) \ - TEST_F(Arm64RelativePatcherTestDefault, StringReference ## adrp_offset ## XSpRel ## disp) { \ - TestAdrpLdrSpRelAdd(kLdrXSpRelInsn, (disp) >> 3, adrp_offset, false, 0x12345678u); \ - } - -TEST_FOR_OFFSETS(LDRX_SPREL_ADD_TEST, 0, 8) - -void Arm64RelativePatcherTest::TestBakerField(uint32_t offset, uint32_t ref_reg) { - uint32_t valid_regs[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, - 10, 11, 12, 13, 14, 15, 18, 19, // IP0 and IP1 are reserved. - 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, - // LR and SP/ZR are reserved. - }; - DCHECK_ALIGNED(offset, 4u); - DCHECK_LT(offset, 16 * KB); - constexpr size_t kMethodCodeSize = 8u; - constexpr size_t kLiteralOffset = 0u; - uint32_t method_idx = 0u; - for (uint32_t base_reg : valid_regs) { - for (uint32_t holder_reg : valid_regs) { - uint32_t ldr = kLdrWInsn | (offset << (10 - 2)) | (base_reg << 5) | ref_reg; - const std::vector<uint8_t> raw_code = RawCode({kCbnzIP1Plus0Insn, ldr}); - ASSERT_EQ(kMethodCodeSize, raw_code.size()); - ArrayRef<const uint8_t> code(raw_code); - uint32_t encoded_data = - Arm64RelativePatcher::EncodeBakerReadBarrierFieldData(base_reg, holder_reg); - const LinkerPatch patches[] = { - LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset, encoded_data), - }; - ++method_idx; - AddCompiledMethod(MethodRef(method_idx), code, ArrayRef<const LinkerPatch>(patches)); - } - } - Link(); - - // All thunks are at the end. - uint32_t thunk_offset = GetMethodOffset(method_idx) + RoundUp(kMethodCodeSize, kArm64Alignment); - method_idx = 0u; - for (uint32_t base_reg : valid_regs) { - for (uint32_t holder_reg : valid_regs) { - ++method_idx; - uint32_t cbnz_offset = thunk_offset - (GetMethodOffset(method_idx) + kLiteralOffset); - uint32_t cbnz = kCbnzIP1Plus0Insn | (cbnz_offset << (5 - 2)); - uint32_t ldr = kLdrWInsn | (offset << (10 - 2)) | (base_reg << 5) | ref_reg; - const std::vector<uint8_t> expected_code = RawCode({cbnz, ldr}); - ASSERT_EQ(kMethodCodeSize, expected_code.size()); - ASSERT_TRUE( - CheckLinkedMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(expected_code))); - - std::vector<uint8_t> expected_thunk = CompileBakerOffsetThunk(base_reg, holder_reg); - ASSERT_GT(output_.size(), thunk_offset); - ASSERT_GE(output_.size() - thunk_offset, expected_thunk.size()); - ArrayRef<const uint8_t> compiled_thunk(output_.data() + thunk_offset, - expected_thunk.size()); - if (ArrayRef<const uint8_t>(expected_thunk) != compiled_thunk) { - DumpDiff(ArrayRef<const uint8_t>(expected_thunk), compiled_thunk); - ASSERT_TRUE(false); - } - - size_t gray_check_offset = thunk_offset; - if (holder_reg == base_reg) { - // Verify that the null-check CBZ uses the correct register, i.e. holder_reg. - ASSERT_GE(output_.size() - gray_check_offset, 4u); - ASSERT_EQ(0x34000000u | holder_reg, GetOutputInsn(thunk_offset) & 0xff00001fu); - gray_check_offset +=4u; - } - // Verify that the lock word for gray bit check is loaded from the holder address. - static constexpr size_t kGrayCheckInsns = 5; - ASSERT_GE(output_.size() - gray_check_offset, 4u * kGrayCheckInsns); - const uint32_t load_lock_word = - kLdrWInsn | - (mirror::Object::MonitorOffset().Uint32Value() << (10 - 2)) | - (holder_reg << 5) | - /* ip0 */ 16; - EXPECT_EQ(load_lock_word, GetOutputInsn(gray_check_offset)); - // Verify the gray bit check. - const uint32_t check_gray_bit_without_offset = - 0x37000000u | (LockWord::kReadBarrierStateShift << 19) | /* ip0 */ 16; - EXPECT_EQ(check_gray_bit_without_offset, GetOutputInsn(gray_check_offset + 4u) & 0xfff8001fu); - // Verify the fake dependency. - const uint32_t fake_dependency = - 0x8b408000u | // ADD Xd, Xn, Xm, LSR 32 - (/* ip0 */ 16 << 16) | // Xm = ip0 - (base_reg << 5) | // Xn = base_reg - base_reg; // Xd = base_reg - EXPECT_EQ(fake_dependency, GetOutputInsn(gray_check_offset + 12u)); - // Do not check the rest of the implementation. - - // The next thunk follows on the next aligned offset. - thunk_offset += RoundUp(expected_thunk.size(), kArm64Alignment); - } - } -} - -#define TEST_BAKER_FIELD(offset, ref_reg) \ - TEST_F(Arm64RelativePatcherTestDefault, \ - BakerOffset##offset##_##ref_reg) { \ - TestBakerField(offset, ref_reg); \ - } - -TEST_BAKER_FIELD(/* offset */ 0, /* ref_reg */ 0) -TEST_BAKER_FIELD(/* offset */ 8, /* ref_reg */ 15) -TEST_BAKER_FIELD(/* offset */ 0x3ffc, /* ref_reg */ 29) - -TEST_F(Arm64RelativePatcherTestDefault, BakerOffsetThunkInTheMiddle) { - // One thunk in the middle with maximum distance branches to it from both sides. - // Use offset = 0, base_reg = 0, ref_reg = 0, the LDR is simply `kLdrWInsn`. - constexpr uint32_t kLiteralOffset1 = 4; - const std::vector<uint8_t> raw_code1 = RawCode({kNopInsn, kCbnzIP1Plus0Insn, kLdrWInsn}); - ArrayRef<const uint8_t> code1(raw_code1); - uint32_t encoded_data = - Arm64RelativePatcher::EncodeBakerReadBarrierFieldData(/* base_reg */ 0, /* holder_reg */ 0); - const LinkerPatch patches1[] = { - LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset1, encoded_data), - }; - AddCompiledMethod(MethodRef(1u), code1, ArrayRef<const LinkerPatch>(patches1)); - - // Allow thunk at 1MiB offset from the start of the method above. Literal offset being 4 - // allows the branch to reach that thunk. - size_t filler1_size = - 1 * MB - RoundUp(raw_code1.size() + sizeof(OatQuickMethodHeader), kArm64Alignment); - std::vector<uint8_t> raw_filler1_code = GenNops(filler1_size / 4u); - ArrayRef<const uint8_t> filler1_code(raw_filler1_code); - AddCompiledMethod(MethodRef(2u), filler1_code); - - // Enforce thunk reservation with a tiny method. - AddCompiledMethod(MethodRef(3u), kNopCode); - - // Allow reaching the thunk from the very beginning of a method 1MiB away. Backward branch - // reaches the full 1MiB. Things to subtract: - // - thunk size and method 3 pre-header, rounded up (padding in between if needed) - // - method 3 code and method 4 pre-header, rounded up (padding in between if needed) - // - method 4 header (let there be no padding between method 4 code and method 5 pre-header). - size_t thunk_size = CompileBakerOffsetThunk(/* base_reg */ 0, /* holder_reg */ 0).size(); - size_t filler2_size = - 1 * MB - RoundUp(thunk_size + sizeof(OatQuickMethodHeader), kArm64Alignment) - - RoundUp(kNopCode.size() + sizeof(OatQuickMethodHeader), kArm64Alignment) - - sizeof(OatQuickMethodHeader); - std::vector<uint8_t> raw_filler2_code = GenNops(filler2_size / 4u); - ArrayRef<const uint8_t> filler2_code(raw_filler2_code); - AddCompiledMethod(MethodRef(4u), filler2_code); - - constexpr uint32_t kLiteralOffset2 = 0; - const std::vector<uint8_t> raw_code2 = RawCode({kCbnzIP1Plus0Insn, kLdrWInsn}); - ArrayRef<const uint8_t> code2(raw_code2); - const LinkerPatch patches2[] = { - LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset2, encoded_data), - }; - AddCompiledMethod(MethodRef(5u), code2, ArrayRef<const LinkerPatch>(patches2)); - - Link(); - - uint32_t first_method_offset = GetMethodOffset(1u); - uint32_t last_method_offset = GetMethodOffset(5u); - EXPECT_EQ(2 * MB, last_method_offset - first_method_offset); - - const uint32_t cbnz_max_forward = kCbnzIP1Plus0Insn | 0x007fffe0; - const uint32_t cbnz_max_backward = kCbnzIP1Plus0Insn | 0x00800000; - const std::vector<uint8_t> expected_code1 = RawCode({kNopInsn, cbnz_max_forward, kLdrWInsn}); - const std::vector<uint8_t> expected_code2 = RawCode({cbnz_max_backward, kLdrWInsn}); - ASSERT_TRUE(CheckLinkedMethod(MethodRef(1), ArrayRef<const uint8_t>(expected_code1))); - ASSERT_TRUE(CheckLinkedMethod(MethodRef(5), ArrayRef<const uint8_t>(expected_code2))); -} - -TEST_F(Arm64RelativePatcherTestDefault, BakerOffsetThunkBeforeFiller) { - // Based on the first part of BakerOffsetThunkInTheMiddle but the CBNZ is one instruction - // earlier, so the thunk is emitted before the filler. - // Use offset = 0, base_reg = 0, ref_reg = 0, the LDR is simply `kLdrWInsn`. - constexpr uint32_t kLiteralOffset1 = 0; - const std::vector<uint8_t> raw_code1 = RawCode({kCbnzIP1Plus0Insn, kLdrWInsn, kNopInsn}); - ArrayRef<const uint8_t> code1(raw_code1); - uint32_t encoded_data = - Arm64RelativePatcher::EncodeBakerReadBarrierFieldData(/* base_reg */ 0, /* holder_reg */ 0); - const LinkerPatch patches1[] = { - LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset1, encoded_data), - }; - AddCompiledMethod(MethodRef(1u), code1, ArrayRef<const LinkerPatch>(patches1)); - - // Allow thunk at 1MiB offset from the start of the method above. Literal offset being 4 - // allows the branch to reach that thunk. - size_t filler1_size = - 1 * MB - RoundUp(raw_code1.size() + sizeof(OatQuickMethodHeader), kArm64Alignment); - std::vector<uint8_t> raw_filler1_code = GenNops(filler1_size / 4u); - ArrayRef<const uint8_t> filler1_code(raw_filler1_code); - AddCompiledMethod(MethodRef(2u), filler1_code); - - Link(); - - const uint32_t cbnz_offset = RoundUp(raw_code1.size(), kArm64Alignment) - kLiteralOffset1; - const uint32_t cbnz = kCbnzIP1Plus0Insn | (cbnz_offset << (5 - 2)); - const std::vector<uint8_t> expected_code1 = RawCode({cbnz, kLdrWInsn, kNopInsn}); - ASSERT_TRUE(CheckLinkedMethod(MethodRef(1), ArrayRef<const uint8_t>(expected_code1))); -} - -TEST_F(Arm64RelativePatcherTestDefault, BakerOffsetThunkInTheMiddleUnreachableFromLast) { - // Based on the BakerOffsetThunkInTheMiddle but the CBNZ in the last method is preceded - // by NOP and cannot reach the thunk in the middle, so we emit an extra thunk at the end. - // Use offset = 0, base_reg = 0, ref_reg = 0, the LDR is simply `kLdrWInsn`. - constexpr uint32_t kLiteralOffset1 = 4; - const std::vector<uint8_t> raw_code1 = RawCode({kNopInsn, kCbnzIP1Plus0Insn, kLdrWInsn}); - ArrayRef<const uint8_t> code1(raw_code1); - uint32_t encoded_data = - Arm64RelativePatcher::EncodeBakerReadBarrierFieldData(/* base_reg */ 0, /* holder_reg */ 0); - const LinkerPatch patches1[] = { - LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset1, encoded_data), - }; - AddCompiledMethod(MethodRef(1u), code1, ArrayRef<const LinkerPatch>(patches1)); - - // Allow thunk at 1MiB offset from the start of the method above. Literal offset being 4 - // allows the branch to reach that thunk. - size_t filler1_size = - 1 * MB - RoundUp(raw_code1.size() + sizeof(OatQuickMethodHeader), kArm64Alignment); - std::vector<uint8_t> raw_filler1_code = GenNops(filler1_size / 4u); - ArrayRef<const uint8_t> filler1_code(raw_filler1_code); - AddCompiledMethod(MethodRef(2u), filler1_code); - - // Enforce thunk reservation with a tiny method. - AddCompiledMethod(MethodRef(3u), kNopCode); - - // If not for the extra NOP, this would allow reaching the thunk from the very beginning - // of a method 1MiB away. Backward branch reaches the full 1MiB. Things to subtract: - // - thunk size and method 3 pre-header, rounded up (padding in between if needed) - // - method 3 code and method 4 pre-header, rounded up (padding in between if needed) - // - method 4 header (let there be no padding between method 4 code and method 5 pre-header). - size_t thunk_size = CompileBakerOffsetThunk(/* base_reg */ 0, /* holder_reg */ 0).size(); - size_t filler2_size = - 1 * MB - RoundUp(thunk_size + sizeof(OatQuickMethodHeader), kArm64Alignment) - - RoundUp(kNopCode.size() + sizeof(OatQuickMethodHeader), kArm64Alignment) - - sizeof(OatQuickMethodHeader); - std::vector<uint8_t> raw_filler2_code = GenNops(filler2_size / 4u); - ArrayRef<const uint8_t> filler2_code(raw_filler2_code); - AddCompiledMethod(MethodRef(4u), filler2_code); - - // Extra NOP compared to BakerOffsetThunkInTheMiddle. - constexpr uint32_t kLiteralOffset2 = 4; - const std::vector<uint8_t> raw_code2 = RawCode({kNopInsn, kCbnzIP1Plus0Insn, kLdrWInsn}); - ArrayRef<const uint8_t> code2(raw_code2); - const LinkerPatch patches2[] = { - LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset2, encoded_data), - }; - AddCompiledMethod(MethodRef(5u), code2, ArrayRef<const LinkerPatch>(patches2)); - - Link(); - - const uint32_t cbnz_max_forward = kCbnzIP1Plus0Insn | 0x007fffe0; - const uint32_t cbnz_last_offset = RoundUp(raw_code2.size(), kArm64Alignment) - kLiteralOffset2; - const uint32_t cbnz_last = kCbnzIP1Plus0Insn | (cbnz_last_offset << (5 - 2)); - const std::vector<uint8_t> expected_code1 = RawCode({kNopInsn, cbnz_max_forward, kLdrWInsn}); - const std::vector<uint8_t> expected_code2 = RawCode({kNopInsn, cbnz_last, kLdrWInsn}); - ASSERT_TRUE(CheckLinkedMethod(MethodRef(1), ArrayRef<const uint8_t>(expected_code1))); - ASSERT_TRUE(CheckLinkedMethod(MethodRef(5), ArrayRef<const uint8_t>(expected_code2))); -} - -TEST_F(Arm64RelativePatcherTestDefault, BakerArray) { - uint32_t valid_regs[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, - 10, 11, 12, 13, 14, 15, 18, 19, // IP0 and IP1 are reserved. - 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, - // LR and SP/ZR are reserved. - }; - auto ldr = [](uint32_t base_reg) { - uint32_t index_reg = (base_reg == 0u) ? 1u : 0u; - uint32_t ref_reg = (base_reg == 2) ? 3u : 2u; - return kLdrWLsl2Insn | (index_reg << 16) | (base_reg << 5) | ref_reg; - }; - constexpr size_t kMethodCodeSize = 8u; - constexpr size_t kLiteralOffset = 0u; - uint32_t method_idx = 0u; - for (uint32_t base_reg : valid_regs) { - ++method_idx; - const std::vector<uint8_t> raw_code = RawCode({kCbnzIP1Plus0Insn, ldr(base_reg)}); - ASSERT_EQ(kMethodCodeSize, raw_code.size()); - ArrayRef<const uint8_t> code(raw_code); - const LinkerPatch patches[] = { - LinkerPatch::BakerReadBarrierBranchPatch( - kLiteralOffset, Arm64RelativePatcher::EncodeBakerReadBarrierArrayData(base_reg)), - }; - AddCompiledMethod(MethodRef(method_idx), code, ArrayRef<const LinkerPatch>(patches)); - } - Link(); - - // All thunks are at the end. - uint32_t thunk_offset = GetMethodOffset(method_idx) + RoundUp(kMethodCodeSize, kArm64Alignment); - method_idx = 0u; - for (uint32_t base_reg : valid_regs) { - ++method_idx; - uint32_t cbnz_offset = thunk_offset - (GetMethodOffset(method_idx) + kLiteralOffset); - uint32_t cbnz = kCbnzIP1Plus0Insn | (cbnz_offset << (5 - 2)); - const std::vector<uint8_t> expected_code = RawCode({cbnz, ldr(base_reg)}); - ASSERT_EQ(kMethodCodeSize, expected_code.size()); - EXPECT_TRUE(CheckLinkedMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(expected_code))); - - std::vector<uint8_t> expected_thunk = CompileBakerArrayThunk(base_reg); - ASSERT_GT(output_.size(), thunk_offset); - ASSERT_GE(output_.size() - thunk_offset, expected_thunk.size()); - ArrayRef<const uint8_t> compiled_thunk(output_.data() + thunk_offset, - expected_thunk.size()); - if (ArrayRef<const uint8_t>(expected_thunk) != compiled_thunk) { - DumpDiff(ArrayRef<const uint8_t>(expected_thunk), compiled_thunk); - ASSERT_TRUE(false); - } - - // Verify that the lock word for gray bit check is loaded from the correct address - // before the base_reg which points to the array data. - static constexpr size_t kGrayCheckInsns = 5; - ASSERT_GE(output_.size() - thunk_offset, 4u * kGrayCheckInsns); - int32_t data_offset = - mirror::Array::DataOffset(Primitive::ComponentSize(Primitive::kPrimNot)).Int32Value(); - int32_t offset = mirror::Object::MonitorOffset().Int32Value() - data_offset; - ASSERT_LT(offset, 0); - const uint32_t load_lock_word = - kLdurWInsn | - ((offset & 0x1ffu) << 12) | - (base_reg << 5) | - /* ip0 */ 16; - EXPECT_EQ(load_lock_word, GetOutputInsn(thunk_offset)); - // Verify the gray bit check. - const uint32_t check_gray_bit_without_offset = - 0x37000000u | (LockWord::kReadBarrierStateShift << 19) | /* ip0 */ 16; - EXPECT_EQ(check_gray_bit_without_offset, GetOutputInsn(thunk_offset + 4u) & 0xfff8001fu); - // Verify the fake dependency. - const uint32_t fake_dependency = - 0x8b408000u | // ADD Xd, Xn, Xm, LSR 32 - (/* ip0 */ 16 << 16) | // Xm = ip0 - (base_reg << 5) | // Xn = base_reg - base_reg; // Xd = base_reg - EXPECT_EQ(fake_dependency, GetOutputInsn(thunk_offset + 12u)); - // Do not check the rest of the implementation. - - // The next thunk follows on the next aligned offset. - thunk_offset += RoundUp(expected_thunk.size(), kArm64Alignment); - } -} - -TEST_F(Arm64RelativePatcherTestDefault, BakerGcRoot) { - uint32_t valid_regs[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, - 10, 11, 12, 13, 14, 15, 18, 19, // IP0 and IP1 are reserved. - 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, - // LR and SP/ZR are reserved. - }; - constexpr size_t kMethodCodeSize = 8u; - constexpr size_t kLiteralOffset = 4u; - uint32_t method_idx = 0u; - for (uint32_t root_reg : valid_regs) { - ++method_idx; - uint32_t ldr = kLdrWInsn | (/* offset */ 8 << (10 - 2)) | (/* base_reg */ 0 << 5) | root_reg; - const std::vector<uint8_t> raw_code = RawCode({ldr, kCbnzIP1Plus0Insn}); - ASSERT_EQ(kMethodCodeSize, raw_code.size()); - ArrayRef<const uint8_t> code(raw_code); - const LinkerPatch patches[] = { - LinkerPatch::BakerReadBarrierBranchPatch( - kLiteralOffset, Arm64RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg)), - }; - AddCompiledMethod(MethodRef(method_idx), code, ArrayRef<const LinkerPatch>(patches)); - } - Link(); - - // All thunks are at the end. - uint32_t thunk_offset = GetMethodOffset(method_idx) + RoundUp(kMethodCodeSize, kArm64Alignment); - method_idx = 0u; - for (uint32_t root_reg : valid_regs) { - ++method_idx; - uint32_t cbnz_offset = thunk_offset - (GetMethodOffset(method_idx) + kLiteralOffset); - uint32_t cbnz = kCbnzIP1Plus0Insn | (cbnz_offset << (5 - 2)); - uint32_t ldr = kLdrWInsn | (/* offset */ 8 << (10 - 2)) | (/* base_reg */ 0 << 5) | root_reg; - const std::vector<uint8_t> expected_code = RawCode({ldr, cbnz}); - ASSERT_EQ(kMethodCodeSize, expected_code.size()); - EXPECT_TRUE(CheckLinkedMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(expected_code))); - - std::vector<uint8_t> expected_thunk = CompileBakerGcRootThunk(root_reg); - ASSERT_GT(output_.size(), thunk_offset); - ASSERT_GE(output_.size() - thunk_offset, expected_thunk.size()); - ArrayRef<const uint8_t> compiled_thunk(output_.data() + thunk_offset, - expected_thunk.size()); - if (ArrayRef<const uint8_t>(expected_thunk) != compiled_thunk) { - DumpDiff(ArrayRef<const uint8_t>(expected_thunk), compiled_thunk); - ASSERT_TRUE(false); - } - - // Verify that the fast-path null-check CBZ uses the correct register, i.e. root_reg. - ASSERT_GE(output_.size() - thunk_offset, 4u); - ASSERT_EQ(0x34000000u | root_reg, GetOutputInsn(thunk_offset) & 0xff00001fu); - // Do not check the rest of the implementation. - - // The next thunk follows on the next aligned offset. - thunk_offset += RoundUp(expected_thunk.size(), kArm64Alignment); - } -} - -TEST_F(Arm64RelativePatcherTestDefault, BakerAndMethodCallInteraction) { - // During development, there was a `DCHECK_LE(MaxNextOffset(), next_thunk.MaxNextOffset());` - // in `ArmBaseRelativePatcher::ThunkData::MakeSpaceBefore()` which does not necessarily - // hold when we're reserving thunks of different sizes. This test exposes the situation - // by using Baker thunks and a method call thunk. - - // Add a method call patch that can reach to method 1 offset + 128MiB. - uint32_t method_idx = 0u; - constexpr size_t kMethodCallLiteralOffset = 4u; - constexpr uint32_t kMissingMethodIdx = 2u; - const std::vector<uint8_t> raw_code1 = RawCode({kNopInsn, kBlPlus0}); - const LinkerPatch method1_patches[] = { - LinkerPatch::RelativeCodePatch(kMethodCallLiteralOffset, nullptr, 2u), - }; - ArrayRef<const uint8_t> code1(raw_code1); - ++method_idx; - AddCompiledMethod(MethodRef(1u), code1, ArrayRef<const LinkerPatch>(method1_patches)); - - // Skip kMissingMethodIdx. - ++method_idx; - ASSERT_EQ(kMissingMethodIdx, method_idx); - // Add a method with the right size that the method code for the next one starts 1MiB - // after code for method 1. - size_t filler_size = - 1 * MB - RoundUp(raw_code1.size() + sizeof(OatQuickMethodHeader), kArm64Alignment) - - sizeof(OatQuickMethodHeader); - std::vector<uint8_t> filler_code = GenNops(filler_size / 4u); - ++method_idx; - AddCompiledMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(filler_code)); - // Add 126 methods with 1MiB code+header, making the code for the next method start 1MiB - // before the currently scheduled MaxNextOffset() for the method call thunk. - for (uint32_t i = 0; i != 126; ++i) { - filler_size = 1 * MB - sizeof(OatQuickMethodHeader); - filler_code = GenNops(filler_size / 4u); - ++method_idx; - AddCompiledMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(filler_code)); - } - - // Add 2 Baker GC root patches to the last method, one that would allow the thunk at - // 1MiB + kArm64Alignment, i.e. kArm64Alignment after the method call thunk, and the - // second that needs it kArm64Alignment after that. Given the size of the GC root thunk - // is more than the space required by the method call thunk plus kArm64Alignment, - // this pushes the first GC root thunk's pending MaxNextOffset() before the method call - // thunk's pending MaxNextOffset() which needs to be adjusted. - ASSERT_LT(RoundUp(CompileMethodCallThunk().size(), kArm64Alignment) + kArm64Alignment, - CompileBakerGcRootThunk(/* root_reg */ 0).size()); - static_assert(kArm64Alignment == 16, "Code below assumes kArm64Alignment == 16"); - constexpr size_t kBakerLiteralOffset1 = 4u + kArm64Alignment; - constexpr size_t kBakerLiteralOffset2 = 4u + 2 * kArm64Alignment; - // Use offset = 0, base_reg = 0, the LDR is simply `kLdrWInsn | root_reg`. - const uint32_t ldr1 = kLdrWInsn | /* root_reg */ 1; - const uint32_t ldr2 = kLdrWInsn | /* root_reg */ 2; - const std::vector<uint8_t> last_method_raw_code = RawCode({ - kNopInsn, kNopInsn, kNopInsn, kNopInsn, // Padding before first GC root read barrier. - ldr1, kCbnzIP1Plus0Insn, // First GC root LDR with read barrier. - kNopInsn, kNopInsn, // Padding before second GC root read barrier. - ldr2, kCbnzIP1Plus0Insn, // Second GC root LDR with read barrier. - }); - uint32_t encoded_data1 = Arm64RelativePatcher::EncodeBakerReadBarrierGcRootData(/* root_reg */ 1); - uint32_t encoded_data2 = Arm64RelativePatcher::EncodeBakerReadBarrierGcRootData(/* root_reg */ 2); - const LinkerPatch last_method_patches[] = { - LinkerPatch::BakerReadBarrierBranchPatch(kBakerLiteralOffset1, encoded_data1), - LinkerPatch::BakerReadBarrierBranchPatch(kBakerLiteralOffset2, encoded_data2), - }; - ++method_idx; - AddCompiledMethod(MethodRef(method_idx), - ArrayRef<const uint8_t>(last_method_raw_code), - ArrayRef<const LinkerPatch>(last_method_patches)); - - // The main purpose of the test is to check that Link() does not cause a crash. - Link(); - - ASSERT_EQ(127 * MB, GetMethodOffset(method_idx) - GetMethodOffset(1u)); -} - -} // namespace linker -} // namespace art diff --git a/compiler/linker/buffered_output_stream.cc b/compiler/linker/buffered_output_stream.cc deleted file mode 100644 index 07066b76ac..0000000000 --- a/compiler/linker/buffered_output_stream.cc +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (C) 2013 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "buffered_output_stream.h" - -#include <string.h> - -namespace art { -namespace linker { - -BufferedOutputStream::BufferedOutputStream(std::unique_ptr<OutputStream> out) - : OutputStream(out->GetLocation()), // Before out is moved to out_. - out_(std::move(out)), - used_(0) {} - -BufferedOutputStream::~BufferedOutputStream() { - FlushBuffer(); -} - -bool BufferedOutputStream::WriteFully(const void* buffer, size_t byte_count) { - if (byte_count > kBufferSize) { - if (!FlushBuffer()) { - return false; - } - return out_->WriteFully(buffer, byte_count); - } - if (used_ + byte_count > kBufferSize) { - if (!FlushBuffer()) { - return false; - } - } - const uint8_t* src = reinterpret_cast<const uint8_t*>(buffer); - memcpy(&buffer_[used_], src, byte_count); - used_ += byte_count; - return true; -} - -bool BufferedOutputStream::Flush() { - return FlushBuffer() && out_->Flush(); -} - -bool BufferedOutputStream::FlushBuffer() { - bool success = true; - if (used_ > 0) { - success = out_->WriteFully(&buffer_[0], used_); - used_ = 0; - } - return success; -} - -off_t BufferedOutputStream::Seek(off_t offset, Whence whence) { - if (!FlushBuffer()) { - return -1; - } - return out_->Seek(offset, whence); -} - -} // namespace linker -} // namespace art diff --git a/compiler/linker/buffered_output_stream.h b/compiler/linker/buffered_output_stream.h deleted file mode 100644 index 66994e82a1..0000000000 --- a/compiler/linker/buffered_output_stream.h +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Copyright (C) 2013 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ART_COMPILER_LINKER_BUFFERED_OUTPUT_STREAM_H_ -#define ART_COMPILER_LINKER_BUFFERED_OUTPUT_STREAM_H_ - -#include <memory> - -#include "output_stream.h" - -#include "globals.h" - -namespace art { -namespace linker { - -class BufferedOutputStream FINAL : public OutputStream { - public: - explicit BufferedOutputStream(std::unique_ptr<OutputStream> out); - - ~BufferedOutputStream() OVERRIDE; - - bool WriteFully(const void* buffer, size_t byte_count) OVERRIDE; - - off_t Seek(off_t offset, Whence whence) OVERRIDE; - - bool Flush() OVERRIDE; - - private: - static const size_t kBufferSize = 8 * KB; - - bool FlushBuffer(); - - std::unique_ptr<OutputStream> const out_; - uint8_t buffer_[kBufferSize]; - size_t used_; - - DISALLOW_COPY_AND_ASSIGN(BufferedOutputStream); -}; - -} // namespace linker -} // namespace art - -#endif // ART_COMPILER_LINKER_BUFFERED_OUTPUT_STREAM_H_ diff --git a/compiler/linker/elf_builder.h b/compiler/linker/elf_builder.h deleted file mode 100644 index a5f60992ca..0000000000 --- a/compiler/linker/elf_builder.h +++ /dev/null @@ -1,1050 +0,0 @@ -/* - * Copyright (C) 2015 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ART_COMPILER_LINKER_ELF_BUILDER_H_ -#define ART_COMPILER_LINKER_ELF_BUILDER_H_ - -#include <vector> - -#include "arch/instruction_set.h" -#include "arch/mips/instruction_set_features_mips.h" -#include "base/array_ref.h" -#include "base/bit_utils.h" -#include "base/casts.h" -#include "base/leb128.h" -#include "base/unix_file/fd_file.h" -#include "elf_utils.h" -#include "linker/error_delaying_output_stream.h" - -namespace art { -namespace linker { - -// Writes ELF file. -// -// The basic layout of the elf file: -// Elf_Ehdr - The ELF header. -// Elf_Phdr[] - Program headers for the linker. -// .note.gnu.build-id - Optional build ID section (SHA-1 digest). -// .rodata - Oat metadata. -// .text - Compiled code. -// .bss - Zero-initialized writeable section. -// .dex - Reserved NOBITS space for dex-related data. -// .MIPS.abiflags - MIPS specific section. -// .dynstr - Names for .dynsym. -// .dynsym - A few oat-specific dynamic symbols. -// .hash - Hash-table for .dynsym. -// .dynamic - Tags which let the linker locate .dynsym. -// .strtab - Names for .symtab. -// .symtab - Debug symbols. -// .eh_frame - Unwind information (CFI). -// .eh_frame_hdr - Index of .eh_frame. -// .debug_frame - Unwind information (CFI). -// .debug_frame.oat_patches - Addresses for relocation. -// .debug_info - Debug information. -// .debug_info.oat_patches - Addresses for relocation. -// .debug_abbrev - Decoding information for .debug_info. -// .debug_str - Strings for .debug_info. -// .debug_line - Line number tables. -// .debug_line.oat_patches - Addresses for relocation. -// .text.oat_patches - Addresses for relocation. -// .shstrtab - Names of ELF sections. -// Elf_Shdr[] - Section headers. -// -// Some section are optional (the debug sections in particular). -// -// We try write the section data directly into the file without much -// in-memory buffering. This means we generally write sections based on the -// dependency order (e.g. .dynamic points to .dynsym which points to .text). -// -// In the cases where we need to buffer, we write the larger section first -// and buffer the smaller one (e.g. .strtab is bigger than .symtab). -// -// The debug sections are written last for easier stripping. -// -template <typename ElfTypes> -class ElfBuilder FINAL { - public: - static constexpr size_t kMaxProgramHeaders = 16; - // SHA-1 digest. Not using SHA_DIGEST_LENGTH from openssl/sha.h to avoid - // spreading this header dependency for just this single constant. - static constexpr size_t kBuildIdLen = 20; - - using Elf_Addr = typename ElfTypes::Addr; - using Elf_Off = typename ElfTypes::Off; - using Elf_Word = typename ElfTypes::Word; - using Elf_Sword = typename ElfTypes::Sword; - using Elf_Ehdr = typename ElfTypes::Ehdr; - using Elf_Shdr = typename ElfTypes::Shdr; - using Elf_Sym = typename ElfTypes::Sym; - using Elf_Phdr = typename ElfTypes::Phdr; - using Elf_Dyn = typename ElfTypes::Dyn; - - // Base class of all sections. - class Section : public OutputStream { - public: - Section(ElfBuilder<ElfTypes>* owner, - const std::string& name, - Elf_Word type, - Elf_Word flags, - const Section* link, - Elf_Word info, - Elf_Word align, - Elf_Word entsize) - : OutputStream(name), - owner_(owner), - header_(), - section_index_(0), - name_(name), - link_(link), - phdr_flags_(PF_R), - phdr_type_(0) { - DCHECK_GE(align, 1u); - header_.sh_type = type; - header_.sh_flags = flags; - header_.sh_info = info; - header_.sh_addralign = align; - header_.sh_entsize = entsize; - } - - // Allocate chunk of virtual memory for this section from the owning ElfBuilder. - // This must be done at the start for all SHF_ALLOC sections (i.e. mmaped by linker). - // It is fine to allocate section but never call Start/End() (e.g. the .bss section). - void AllocateVirtualMemory(Elf_Word size) { - AllocateVirtualMemory(owner_->virtual_address_, size); - } - - void AllocateVirtualMemory(Elf_Addr addr, Elf_Word size) { - CHECK_NE(header_.sh_flags & SHF_ALLOC, 0u); - Elf_Word align = AddSection(); - CHECK_EQ(header_.sh_addr, 0u); - header_.sh_addr = RoundUp(addr, align); - CHECK(header_.sh_size == 0u || header_.sh_size == size); - header_.sh_size = size; - CHECK_LE(owner_->virtual_address_, header_.sh_addr); - owner_->virtual_address_ = header_.sh_addr + header_.sh_size; - } - - // Start writing file data of this section. - void Start() { - CHECK(owner_->current_section_ == nullptr); - Elf_Word align = AddSection(); - CHECK_EQ(header_.sh_offset, 0u); - header_.sh_offset = owner_->AlignFileOffset(align); - owner_->current_section_ = this; - } - - // Finish writing file data of this section. - void End() { - CHECK(owner_->current_section_ == this); - Elf_Word position = GetPosition(); - CHECK(header_.sh_size == 0u || header_.sh_size == position); - header_.sh_size = position; - owner_->current_section_ = nullptr; - } - - // Get the number of bytes written so far. - // Only valid while writing the section. - Elf_Word GetPosition() const { - CHECK(owner_->current_section_ == this); - off_t file_offset = owner_->stream_.Seek(0, kSeekCurrent); - DCHECK_GE(file_offset, (off_t)header_.sh_offset); - return file_offset - header_.sh_offset; - } - - // Get the location of this section in virtual memory. - Elf_Addr GetAddress() const { - DCHECK_NE(header_.sh_flags & SHF_ALLOC, 0u); - DCHECK_NE(header_.sh_addr, 0u); - return header_.sh_addr; - } - - // This function always succeeds to simplify code. - // Use builder's Good() to check the actual status. - bool WriteFully(const void* buffer, size_t byte_count) OVERRIDE { - CHECK(owner_->current_section_ == this); - return owner_->stream_.WriteFully(buffer, byte_count); - } - - // This function always succeeds to simplify code. - // Use builder's Good() to check the actual status. - off_t Seek(off_t offset, Whence whence) OVERRIDE { - // Forward the seek as-is and trust the caller to use it reasonably. - return owner_->stream_.Seek(offset, whence); - } - - // This function flushes the output and returns whether it succeeded. - // If there was a previous failure, this does nothing and returns false, i.e. failed. - bool Flush() OVERRIDE { - return owner_->stream_.Flush(); - } - - Elf_Word GetSectionIndex() const { - DCHECK_NE(section_index_, 0u); - return section_index_; - } - - // Returns true if this section has been added. - bool Exists() const { - return section_index_ != 0; - } - - protected: - // Add this section to the list of generated ELF sections (if not there already). - // It also ensures the alignment is sufficient to generate valid program headers, - // since that depends on the previous section. It returns the required alignment. - Elf_Word AddSection() { - if (section_index_ == 0) { - std::vector<Section*>& sections = owner_->sections_; - Elf_Word last = sections.empty() ? PF_R : sections.back()->phdr_flags_; - if (phdr_flags_ != last) { - header_.sh_addralign = kPageSize; // Page-align if R/W/X flags changed. - } - sections.push_back(this); - section_index_ = sections.size(); // First ELF section has index 1. - } - return owner_->write_program_headers_ ? header_.sh_addralign : 1; - } - - ElfBuilder<ElfTypes>* owner_; - Elf_Shdr header_; - Elf_Word section_index_; - const std::string name_; - const Section* const link_; - Elf_Word phdr_flags_; - Elf_Word phdr_type_; - - friend class ElfBuilder; - - DISALLOW_COPY_AND_ASSIGN(Section); - }; - - class CachedSection : public Section { - public: - CachedSection(ElfBuilder<ElfTypes>* owner, - const std::string& name, - Elf_Word type, - Elf_Word flags, - const Section* link, - Elf_Word info, - Elf_Word align, - Elf_Word entsize) - : Section(owner, name, type, flags, link, info, align, entsize), cache_() { } - - Elf_Word Add(const void* data, size_t length) { - Elf_Word offset = cache_.size(); - const uint8_t* d = reinterpret_cast<const uint8_t*>(data); - cache_.insert(cache_.end(), d, d + length); - return offset; - } - - Elf_Word GetCacheSize() { - return cache_.size(); - } - - void Write() { - this->WriteFully(cache_.data(), cache_.size()); - cache_.clear(); - cache_.shrink_to_fit(); - } - - void WriteCachedSection() { - this->Start(); - Write(); - this->End(); - } - - private: - std::vector<uint8_t> cache_; - }; - - // Writer of .dynstr section. - class CachedStringSection FINAL : public CachedSection { - public: - CachedStringSection(ElfBuilder<ElfTypes>* owner, - const std::string& name, - Elf_Word flags, - Elf_Word align) - : CachedSection(owner, - name, - SHT_STRTAB, - flags, - /* link */ nullptr, - /* info */ 0, - align, - /* entsize */ 0) { } - - Elf_Word Add(const std::string& name) { - if (CachedSection::GetCacheSize() == 0u) { - DCHECK(name.empty()); - } - return CachedSection::Add(name.c_str(), name.length() + 1); - } - }; - - // Writer of .strtab and .shstrtab sections. - class StringSection FINAL : public Section { - public: - StringSection(ElfBuilder<ElfTypes>* owner, - const std::string& name, - Elf_Word flags, - Elf_Word align) - : Section(owner, - name, - SHT_STRTAB, - flags, - /* link */ nullptr, - /* info */ 0, - align, - /* entsize */ 0), - current_offset_(0), - last_offset_(0) { - } - - Elf_Word Write(const std::string& name) { - if (current_offset_ == 0) { - DCHECK(name.empty()); - } else if (name == last_name_) { - return last_offset_; // Very simple string de-duplication. - } - last_name_ = name; - last_offset_ = current_offset_; - this->WriteFully(name.c_str(), name.length() + 1); - current_offset_ += name.length() + 1; - return last_offset_; - } - - private: - Elf_Word current_offset_; - std::string last_name_; - Elf_Word last_offset_; - }; - - // Writer of .dynsym and .symtab sections. - class SymbolSection FINAL : public Section { - public: - SymbolSection(ElfBuilder<ElfTypes>* owner, - const std::string& name, - Elf_Word type, - Elf_Word flags, - Section* strtab) - : Section(owner, - name, - type, - flags, - strtab, - /* info */ 1, - sizeof(Elf_Off), - sizeof(Elf_Sym)) { - syms_.push_back(Elf_Sym()); // The symbol table always has to start with NULL symbol. - } - - // Buffer symbol for this section. It will be written later. - // If the symbol's section is null, it will be considered absolute (SHN_ABS). - // (we use this in JIT to reference code which is stored outside the debug ELF file) - void Add(Elf_Word name, - const Section* section, - Elf_Addr addr, - Elf_Word size, - uint8_t binding, - uint8_t type) { - Elf_Word section_index; - if (section != nullptr) { - DCHECK_LE(section->GetAddress(), addr); - DCHECK_LE(addr, section->GetAddress() + section->header_.sh_size); - section_index = section->GetSectionIndex(); - } else { - section_index = static_cast<Elf_Word>(SHN_ABS); - } - Add(name, section_index, addr, size, binding, type); - } - - // Buffer symbol for this section. It will be written later. - void Add(Elf_Word name, - Elf_Word section_index, - Elf_Addr addr, - Elf_Word size, - uint8_t binding, - uint8_t type) { - Elf_Sym sym = Elf_Sym(); - sym.st_name = name; - sym.st_value = addr; - sym.st_size = size; - sym.st_other = 0; - sym.st_shndx = section_index; - sym.st_info = (binding << 4) + (type & 0xf); - syms_.push_back(sym); - - // The sh_info file must be set to index one-past the last local symbol. - if (binding == STB_LOCAL) { - this->header_.sh_info = syms_.size(); - } - } - - Elf_Word GetCacheSize() { return syms_.size() * sizeof(Elf_Sym); } - - void WriteCachedSection() { - this->Start(); - this->WriteFully(syms_.data(), syms_.size() * sizeof(Elf_Sym)); - this->End(); - } - - private: - std::vector<Elf_Sym> syms_; // Buffered/cached content of the whole section. - }; - - class AbiflagsSection FINAL : public Section { - public: - // Section with Mips abiflag info. - static constexpr uint8_t MIPS_AFL_REG_NONE = 0; // no registers - static constexpr uint8_t MIPS_AFL_REG_32 = 1; // 32-bit registers - static constexpr uint8_t MIPS_AFL_REG_64 = 2; // 64-bit registers - static constexpr uint32_t MIPS_AFL_FLAGS1_ODDSPREG = 1; // Uses odd single-prec fp regs - static constexpr uint8_t MIPS_ABI_FP_DOUBLE = 1; // -mdouble-float - static constexpr uint8_t MIPS_ABI_FP_XX = 5; // -mfpxx - static constexpr uint8_t MIPS_ABI_FP_64A = 7; // -mips32r* -mfp64 -mno-odd-spreg - - AbiflagsSection(ElfBuilder<ElfTypes>* owner, - const std::string& name, - Elf_Word type, - Elf_Word flags, - const Section* link, - Elf_Word info, - Elf_Word align, - Elf_Word entsize, - InstructionSet isa, - const InstructionSetFeatures* features) - : Section(owner, name, type, flags, link, info, align, entsize) { - if (isa == InstructionSet::kMips || isa == InstructionSet::kMips64) { - bool fpu32 = false; // assume mips64 values - uint8_t isa_rev = 6; // assume mips64 values - if (isa == InstructionSet::kMips) { - // adjust for mips32 values - fpu32 = features->AsMipsInstructionSetFeatures()->Is32BitFloatingPoint(); - isa_rev = features->AsMipsInstructionSetFeatures()->IsR6() - ? 6 - : features->AsMipsInstructionSetFeatures()->IsMipsIsaRevGreaterThanEqual2() - ? (fpu32 ? 2 : 5) - : 1; - } - abiflags_.version = 0; // version of flags structure - abiflags_.isa_level = (isa == InstructionSet::kMips) ? 32 : 64; - abiflags_.isa_rev = isa_rev; - abiflags_.gpr_size = (isa == InstructionSet::kMips) ? MIPS_AFL_REG_32 : MIPS_AFL_REG_64; - abiflags_.cpr1_size = fpu32 ? MIPS_AFL_REG_32 : MIPS_AFL_REG_64; - abiflags_.cpr2_size = MIPS_AFL_REG_NONE; - // Set the fp_abi to MIPS_ABI_FP_64A for mips32 with 64-bit FPUs (ie: mips32 R5 and R6). - // Otherwise set to MIPS_ABI_FP_DOUBLE. - abiflags_.fp_abi = - (isa == InstructionSet::kMips && !fpu32) ? MIPS_ABI_FP_64A : MIPS_ABI_FP_DOUBLE; - abiflags_.isa_ext = 0; - abiflags_.ases = 0; - // To keep the code simple, we are not using odd FP reg for single floats for both - // mips32 and mips64 ART. Therefore we are not setting the MIPS_AFL_FLAGS1_ODDSPREG bit. - abiflags_.flags1 = 0; - abiflags_.flags2 = 0; - } - } - - Elf_Word GetSize() const { - return sizeof(abiflags_); - } - - void Write() { - this->WriteFully(&abiflags_, sizeof(abiflags_)); - } - - private: - struct { - uint16_t version; // version of this structure - uint8_t isa_level, isa_rev, gpr_size, cpr1_size, cpr2_size; - uint8_t fp_abi; - uint32_t isa_ext, ases, flags1, flags2; - } abiflags_; - }; - - class BuildIdSection FINAL : public Section { - public: - BuildIdSection(ElfBuilder<ElfTypes>* owner, - const std::string& name, - Elf_Word type, - Elf_Word flags, - const Section* link, - Elf_Word info, - Elf_Word align, - Elf_Word entsize) - : Section(owner, name, type, flags, link, info, align, entsize), - digest_start_(-1) { - } - - Elf_Word GetSize() { - return 16 + kBuildIdLen; - } - - void Write() { - // The size fields are 32-bit on both 32-bit and 64-bit systems, confirmed - // with the 64-bit linker and libbfd code. The size of name and desc must - // be a multiple of 4 and it currently is. - this->WriteUint32(4); // namesz. - this->WriteUint32(kBuildIdLen); // descsz. - this->WriteUint32(3); // type = NT_GNU_BUILD_ID. - this->WriteFully("GNU", 4); // name. - digest_start_ = this->Seek(0, kSeekCurrent); - static_assert(kBuildIdLen % 4 == 0, "expecting a mutliple of 4 for build ID length"); - this->WriteFully(std::string(kBuildIdLen, '\0').c_str(), kBuildIdLen); // desc. - DCHECK_EQ(this->GetPosition(), GetSize()); - } - - off_t GetDigestStart() { - CHECK_GT(digest_start_, 0); - return digest_start_; - } - - private: - bool WriteUint32(uint32_t v) { - return this->WriteFully(&v, sizeof(v)); - } - - // File offset where the build ID digest starts. - // Populated with zeros first, then updated with the actual value as the - // very last thing in the output file creation. - off_t digest_start_; - }; - - ElfBuilder(InstructionSet isa, const InstructionSetFeatures* features, OutputStream* output) - : isa_(isa), - features_(features), - stream_(output), - rodata_(this, ".rodata", SHT_PROGBITS, SHF_ALLOC, nullptr, 0, kPageSize, 0), - text_(this, ".text", SHT_PROGBITS, SHF_ALLOC | SHF_EXECINSTR, nullptr, 0, kPageSize, 0), - bss_(this, ".bss", SHT_NOBITS, SHF_ALLOC, nullptr, 0, kPageSize, 0), - dex_(this, ".dex", SHT_NOBITS, SHF_ALLOC, nullptr, 0, kPageSize, 0), - dynstr_(this, ".dynstr", SHF_ALLOC, kPageSize), - dynsym_(this, ".dynsym", SHT_DYNSYM, SHF_ALLOC, &dynstr_), - hash_(this, ".hash", SHT_HASH, SHF_ALLOC, &dynsym_, 0, sizeof(Elf_Word), sizeof(Elf_Word)), - dynamic_(this, ".dynamic", SHT_DYNAMIC, SHF_ALLOC, &dynstr_, 0, kPageSize, sizeof(Elf_Dyn)), - eh_frame_(this, ".eh_frame", SHT_PROGBITS, SHF_ALLOC, nullptr, 0, kPageSize, 0), - eh_frame_hdr_(this, ".eh_frame_hdr", SHT_PROGBITS, SHF_ALLOC, nullptr, 0, 4, 0), - strtab_(this, ".strtab", 0, 1), - symtab_(this, ".symtab", SHT_SYMTAB, 0, &strtab_), - debug_frame_(this, ".debug_frame", SHT_PROGBITS, 0, nullptr, 0, sizeof(Elf_Addr), 0), - debug_info_(this, ".debug_info", SHT_PROGBITS, 0, nullptr, 0, 1, 0), - debug_line_(this, ".debug_line", SHT_PROGBITS, 0, nullptr, 0, 1, 0), - shstrtab_(this, ".shstrtab", 0, 1), - abiflags_(this, ".MIPS.abiflags", SHT_MIPS_ABIFLAGS, SHF_ALLOC, nullptr, 0, kPageSize, 0, - isa, features), - build_id_(this, ".note.gnu.build-id", SHT_NOTE, SHF_ALLOC, nullptr, 0, 4, 0), - current_section_(nullptr), - started_(false), - write_program_headers_(false), - loaded_size_(0u), - virtual_address_(0) { - text_.phdr_flags_ = PF_R | PF_X; - bss_.phdr_flags_ = PF_R | PF_W; - dex_.phdr_flags_ = PF_R; - dynamic_.phdr_flags_ = PF_R | PF_W; - dynamic_.phdr_type_ = PT_DYNAMIC; - eh_frame_hdr_.phdr_type_ = PT_GNU_EH_FRAME; - abiflags_.phdr_type_ = PT_MIPS_ABIFLAGS; - build_id_.phdr_type_ = PT_NOTE; - } - ~ElfBuilder() {} - - InstructionSet GetIsa() { return isa_; } - BuildIdSection* GetBuildId() { return &build_id_; } - Section* GetRoData() { return &rodata_; } - Section* GetText() { return &text_; } - Section* GetBss() { return &bss_; } - Section* GetDex() { return &dex_; } - StringSection* GetStrTab() { return &strtab_; } - SymbolSection* GetSymTab() { return &symtab_; } - Section* GetEhFrame() { return &eh_frame_; } - Section* GetEhFrameHdr() { return &eh_frame_hdr_; } - Section* GetDebugFrame() { return &debug_frame_; } - Section* GetDebugInfo() { return &debug_info_; } - Section* GetDebugLine() { return &debug_line_; } - - // Encode patch locations as LEB128 list of deltas between consecutive addresses. - // (exposed publicly for tests) - static void EncodeOatPatches(const ArrayRef<const uintptr_t>& locations, - std::vector<uint8_t>* buffer) { - buffer->reserve(buffer->size() + locations.size() * 2); // guess 2 bytes per ULEB128. - uintptr_t address = 0; // relative to start of section. - for (uintptr_t location : locations) { - DCHECK_GE(location, address) << "Patch locations are not in sorted order"; - EncodeUnsignedLeb128(buffer, dchecked_integral_cast<uint32_t>(location - address)); - address = location; - } - } - - void WritePatches(const char* name, const ArrayRef<const uintptr_t>& patch_locations) { - std::vector<uint8_t> buffer; - EncodeOatPatches(patch_locations, &buffer); - std::unique_ptr<Section> s(new Section(this, name, SHT_OAT_PATCH, 0, nullptr, 0, 1, 0)); - s->Start(); - s->WriteFully(buffer.data(), buffer.size()); - s->End(); - other_sections_.push_back(std::move(s)); - } - - void WriteSection(const char* name, const std::vector<uint8_t>* buffer) { - std::unique_ptr<Section> s(new Section(this, name, SHT_PROGBITS, 0, nullptr, 0, 1, 0)); - s->Start(); - s->WriteFully(buffer->data(), buffer->size()); - s->End(); - other_sections_.push_back(std::move(s)); - } - - // Reserve space for ELF header and program headers. - // We do not know the number of headers until later, so - // it is easiest to just reserve a fixed amount of space. - // Program headers are required for loading by the linker. - // It is possible to omit them for ELF files used for debugging. - void Start(bool write_program_headers = true) { - int size = sizeof(Elf_Ehdr); - if (write_program_headers) { - size += sizeof(Elf_Phdr) * kMaxProgramHeaders; - } - stream_.Seek(size, kSeekSet); - started_ = true; - virtual_address_ += size; - write_program_headers_ = write_program_headers; - } - - void End() { - DCHECK(started_); - - // Note: loaded_size_ == 0 for tests that don't write .rodata, .text, .bss, - // .dynstr, dynsym, .hash and .dynamic. These tests should not read loaded_size_. - // TODO: Either refactor the .eh_frame creation so that it counts towards loaded_size_, - // or remove all support for .eh_frame. (The currently unused .eh_frame counts towards - // the virtual_address_ but we don't consider it for loaded_size_.) - CHECK(loaded_size_ == 0 || loaded_size_ == RoundUp(virtual_address_, kPageSize)) - << loaded_size_ << " " << virtual_address_; - - // Write section names and finish the section headers. - shstrtab_.Start(); - shstrtab_.Write(""); - for (auto* section : sections_) { - section->header_.sh_name = shstrtab_.Write(section->name_); - if (section->link_ != nullptr) { - section->header_.sh_link = section->link_->GetSectionIndex(); - } - if (section->header_.sh_offset == 0) { - section->header_.sh_type = SHT_NOBITS; - } - } - shstrtab_.End(); - - // Write section headers at the end of the ELF file. - std::vector<Elf_Shdr> shdrs; - shdrs.reserve(1u + sections_.size()); - shdrs.push_back(Elf_Shdr()); // NULL at index 0. - for (auto* section : sections_) { - shdrs.push_back(section->header_); - } - Elf_Off section_headers_offset; - section_headers_offset = AlignFileOffset(sizeof(Elf_Off)); - stream_.WriteFully(shdrs.data(), shdrs.size() * sizeof(shdrs[0])); - - // Flush everything else before writing the program headers. This should prevent - // the OS from reordering writes, so that we don't end up with valid headers - // and partially written data if we suddenly lose power, for example. - stream_.Flush(); - - // The main ELF header. - Elf_Ehdr elf_header = MakeElfHeader(isa_, features_); - elf_header.e_shoff = section_headers_offset; - elf_header.e_shnum = shdrs.size(); - elf_header.e_shstrndx = shstrtab_.GetSectionIndex(); - - // Program headers (i.e. mmap instructions). - std::vector<Elf_Phdr> phdrs; - if (write_program_headers_) { - phdrs = MakeProgramHeaders(); - CHECK_LE(phdrs.size(), kMaxProgramHeaders); - elf_header.e_phoff = sizeof(Elf_Ehdr); - elf_header.e_phnum = phdrs.size(); - } - - stream_.Seek(0, kSeekSet); - stream_.WriteFully(&elf_header, sizeof(elf_header)); - stream_.WriteFully(phdrs.data(), phdrs.size() * sizeof(phdrs[0])); - stream_.Flush(); - } - - // The running program does not have access to section headers - // and the loader is not supposed to use them either. - // The dynamic sections therefore replicates some of the layout - // information like the address and size of .rodata and .text. - // It also contains other metadata like the SONAME. - // The .dynamic section is found using the PT_DYNAMIC program header. - void PrepareDynamicSection(const std::string& elf_file_path, - Elf_Word rodata_size, - Elf_Word text_size, - Elf_Word bss_size, - Elf_Word bss_methods_offset, - Elf_Word bss_roots_offset, - Elf_Word dex_size) { - std::string soname(elf_file_path); - size_t directory_separator_pos = soname.rfind('/'); - if (directory_separator_pos != std::string::npos) { - soname = soname.substr(directory_separator_pos + 1); - } - - // Allocate all pre-dynamic sections. - rodata_.AllocateVirtualMemory(rodata_size); - text_.AllocateVirtualMemory(text_size); - if (bss_size != 0) { - bss_.AllocateVirtualMemory(bss_size); - } - if (dex_size != 0) { - dex_.AllocateVirtualMemory(dex_size); - } - if (isa_ == InstructionSet::kMips || isa_ == InstructionSet::kMips64) { - abiflags_.AllocateVirtualMemory(abiflags_.GetSize()); - } - - // Cache .dynstr, .dynsym and .hash data. - dynstr_.Add(""); // dynstr should start with empty string. - Elf_Word oatdata = dynstr_.Add("oatdata"); - dynsym_.Add(oatdata, &rodata_, rodata_.GetAddress(), rodata_size, STB_GLOBAL, STT_OBJECT); - if (text_size != 0u) { - // The runtime does not care about the size of this symbol (it uses the "lastword" symbol). - // We use size 0 (meaning "unknown size" in ELF) to prevent overlap with the debug symbols. - Elf_Word oatexec = dynstr_.Add("oatexec"); - dynsym_.Add(oatexec, &text_, text_.GetAddress(), /* size */ 0, STB_GLOBAL, STT_OBJECT); - Elf_Word oatlastword = dynstr_.Add("oatlastword"); - Elf_Word oatlastword_address = text_.GetAddress() + text_size - 4; - dynsym_.Add(oatlastword, &text_, oatlastword_address, 4, STB_GLOBAL, STT_OBJECT); - } else if (rodata_size != 0) { - // rodata_ can be size 0 for dwarf_test. - Elf_Word oatlastword = dynstr_.Add("oatlastword"); - Elf_Word oatlastword_address = rodata_.GetAddress() + rodata_size - 4; - dynsym_.Add(oatlastword, &rodata_, oatlastword_address, 4, STB_GLOBAL, STT_OBJECT); - } - DCHECK_LE(bss_roots_offset, bss_size); - if (bss_size != 0u) { - Elf_Word oatbss = dynstr_.Add("oatbss"); - dynsym_.Add(oatbss, &bss_, bss_.GetAddress(), bss_roots_offset, STB_GLOBAL, STT_OBJECT); - DCHECK_LE(bss_methods_offset, bss_roots_offset); - DCHECK_LE(bss_roots_offset, bss_size); - // Add a symbol marking the start of the methods part of the .bss, if not empty. - if (bss_methods_offset != bss_roots_offset) { - Elf_Word bss_methods_address = bss_.GetAddress() + bss_methods_offset; - Elf_Word bss_methods_size = bss_roots_offset - bss_methods_offset; - Elf_Word oatbssroots = dynstr_.Add("oatbssmethods"); - dynsym_.Add( - oatbssroots, &bss_, bss_methods_address, bss_methods_size, STB_GLOBAL, STT_OBJECT); - } - // Add a symbol marking the start of the GC roots part of the .bss, if not empty. - if (bss_roots_offset != bss_size) { - Elf_Word bss_roots_address = bss_.GetAddress() + bss_roots_offset; - Elf_Word bss_roots_size = bss_size - bss_roots_offset; - Elf_Word oatbssroots = dynstr_.Add("oatbssroots"); - dynsym_.Add( - oatbssroots, &bss_, bss_roots_address, bss_roots_size, STB_GLOBAL, STT_OBJECT); - } - Elf_Word oatbsslastword = dynstr_.Add("oatbsslastword"); - Elf_Word bsslastword_address = bss_.GetAddress() + bss_size - 4; - dynsym_.Add(oatbsslastword, &bss_, bsslastword_address, 4, STB_GLOBAL, STT_OBJECT); - } - if (dex_size != 0u) { - Elf_Word oatdex = dynstr_.Add("oatdex"); - dynsym_.Add(oatdex, &dex_, dex_.GetAddress(), /* size */ 0, STB_GLOBAL, STT_OBJECT); - Elf_Word oatdexlastword = dynstr_.Add("oatdexlastword"); - Elf_Word oatdexlastword_address = dex_.GetAddress() + dex_size - 4; - dynsym_.Add(oatdexlastword, &dex_, oatdexlastword_address, 4, STB_GLOBAL, STT_OBJECT); - } - - Elf_Word soname_offset = dynstr_.Add(soname); - - // We do not really need a hash-table since there is so few entries. - // However, the hash-table is the only way the linker can actually - // determine the number of symbols in .dynsym so it is required. - int count = dynsym_.GetCacheSize() / sizeof(Elf_Sym); // Includes NULL. - std::vector<Elf_Word> hash; - hash.push_back(1); // Number of buckets. - hash.push_back(count); // Number of chains. - // Buckets. Having just one makes it linear search. - hash.push_back(1); // Point to first non-NULL symbol. - // Chains. This creates linked list of symbols. - hash.push_back(0); // Dummy entry for the NULL symbol. - for (int i = 1; i < count - 1; i++) { - hash.push_back(i + 1); // Each symbol points to the next one. - } - hash.push_back(0); // Last symbol terminates the chain. - hash_.Add(hash.data(), hash.size() * sizeof(hash[0])); - - // Allocate all remaining sections. - dynstr_.AllocateVirtualMemory(dynstr_.GetCacheSize()); - dynsym_.AllocateVirtualMemory(dynsym_.GetCacheSize()); - hash_.AllocateVirtualMemory(hash_.GetCacheSize()); - - Elf_Dyn dyns[] = { - { DT_HASH, { hash_.GetAddress() } }, - { DT_STRTAB, { dynstr_.GetAddress() } }, - { DT_SYMTAB, { dynsym_.GetAddress() } }, - { DT_SYMENT, { sizeof(Elf_Sym) } }, - { DT_STRSZ, { dynstr_.GetCacheSize() } }, - { DT_SONAME, { soname_offset } }, - { DT_NULL, { 0 } }, - }; - dynamic_.Add(&dyns, sizeof(dyns)); - dynamic_.AllocateVirtualMemory(dynamic_.GetCacheSize()); - - loaded_size_ = RoundUp(virtual_address_, kPageSize); - } - - void WriteDynamicSection() { - dynstr_.WriteCachedSection(); - dynsym_.WriteCachedSection(); - hash_.WriteCachedSection(); - dynamic_.WriteCachedSection(); - } - - Elf_Word GetLoadedSize() { - CHECK_NE(loaded_size_, 0u); - return loaded_size_; - } - - void WriteMIPSabiflagsSection() { - abiflags_.Start(); - abiflags_.Write(); - abiflags_.End(); - } - - void WriteBuildIdSection() { - build_id_.Start(); - build_id_.Write(); - build_id_.End(); - } - - void WriteBuildId(uint8_t build_id[kBuildIdLen]) { - stream_.Seek(build_id_.GetDigestStart(), kSeekSet); - stream_.WriteFully(build_id, kBuildIdLen); - } - - // Returns true if all writes and seeks on the output stream succeeded. - bool Good() { - return stream_.Good(); - } - - // Returns the builder's internal stream. - OutputStream* GetStream() { - return &stream_; - } - - off_t AlignFileOffset(size_t alignment) { - return stream_.Seek(RoundUp(stream_.Seek(0, kSeekCurrent), alignment), kSeekSet); - } - - private: - static Elf_Ehdr MakeElfHeader(InstructionSet isa, const InstructionSetFeatures* features) { - Elf_Ehdr elf_header = Elf_Ehdr(); - switch (isa) { - case InstructionSet::kArm: - // Fall through. - case InstructionSet::kThumb2: { - elf_header.e_machine = EM_ARM; - elf_header.e_flags = EF_ARM_EABI_VER5; - break; - } - case InstructionSet::kArm64: { - elf_header.e_machine = EM_AARCH64; - elf_header.e_flags = 0; - break; - } - case InstructionSet::kX86: { - elf_header.e_machine = EM_386; - elf_header.e_flags = 0; - break; - } - case InstructionSet::kX86_64: { - elf_header.e_machine = EM_X86_64; - elf_header.e_flags = 0; - break; - } - case InstructionSet::kMips: { - elf_header.e_machine = EM_MIPS; - elf_header.e_flags = (EF_MIPS_NOREORDER | - EF_MIPS_PIC | - EF_MIPS_CPIC | - EF_MIPS_ABI_O32 | - (features->AsMipsInstructionSetFeatures()->IsR6() - ? EF_MIPS_ARCH_32R6 - : EF_MIPS_ARCH_32R2)); - break; - } - case InstructionSet::kMips64: { - elf_header.e_machine = EM_MIPS; - elf_header.e_flags = (EF_MIPS_NOREORDER | - EF_MIPS_PIC | - EF_MIPS_CPIC | - EF_MIPS_ARCH_64R6); - break; - } - case InstructionSet::kNone: { - LOG(FATAL) << "No instruction set"; - break; - } - default: { - LOG(FATAL) << "Unknown instruction set " << isa; - } - } - - elf_header.e_ident[EI_MAG0] = ELFMAG0; - elf_header.e_ident[EI_MAG1] = ELFMAG1; - elf_header.e_ident[EI_MAG2] = ELFMAG2; - elf_header.e_ident[EI_MAG3] = ELFMAG3; - elf_header.e_ident[EI_CLASS] = (sizeof(Elf_Addr) == sizeof(Elf32_Addr)) - ? ELFCLASS32 : ELFCLASS64; - elf_header.e_ident[EI_DATA] = ELFDATA2LSB; - elf_header.e_ident[EI_VERSION] = EV_CURRENT; - elf_header.e_ident[EI_OSABI] = ELFOSABI_LINUX; - elf_header.e_ident[EI_ABIVERSION] = 0; - elf_header.e_type = ET_DYN; - elf_header.e_version = 1; - elf_header.e_entry = 0; - elf_header.e_ehsize = sizeof(Elf_Ehdr); - elf_header.e_phentsize = sizeof(Elf_Phdr); - elf_header.e_shentsize = sizeof(Elf_Shdr); - return elf_header; - } - - // Create program headers based on written sections. - std::vector<Elf_Phdr> MakeProgramHeaders() { - CHECK(!sections_.empty()); - std::vector<Elf_Phdr> phdrs; - { - // The program headers must start with PT_PHDR which is used in - // loaded process to determine the number of program headers. - Elf_Phdr phdr = Elf_Phdr(); - phdr.p_type = PT_PHDR; - phdr.p_flags = PF_R; - phdr.p_offset = phdr.p_vaddr = phdr.p_paddr = sizeof(Elf_Ehdr); - phdr.p_filesz = phdr.p_memsz = 0; // We need to fill this later. - phdr.p_align = sizeof(Elf_Off); - phdrs.push_back(phdr); - // Tell the linker to mmap the start of file to memory. - Elf_Phdr load = Elf_Phdr(); - load.p_type = PT_LOAD; - load.p_flags = PF_R; - load.p_offset = load.p_vaddr = load.p_paddr = 0; - load.p_filesz = load.p_memsz = sizeof(Elf_Ehdr) + sizeof(Elf_Phdr) * kMaxProgramHeaders; - load.p_align = kPageSize; - phdrs.push_back(load); - } - // Create program headers for sections. - for (auto* section : sections_) { - const Elf_Shdr& shdr = section->header_; - if ((shdr.sh_flags & SHF_ALLOC) != 0 && shdr.sh_size != 0) { - DCHECK(shdr.sh_addr != 0u) << "Allocate virtual memory for the section"; - // PT_LOAD tells the linker to mmap part of the file. - // The linker can only mmap page-aligned sections. - // Single PT_LOAD may contain several ELF sections. - Elf_Phdr& prev = phdrs.back(); - Elf_Phdr load = Elf_Phdr(); - load.p_type = PT_LOAD; - load.p_flags = section->phdr_flags_; - load.p_offset = shdr.sh_offset; - load.p_vaddr = load.p_paddr = shdr.sh_addr; - load.p_filesz = (shdr.sh_type != SHT_NOBITS ? shdr.sh_size : 0u); - load.p_memsz = shdr.sh_size; - load.p_align = shdr.sh_addralign; - if (prev.p_type == load.p_type && - prev.p_flags == load.p_flags && - prev.p_filesz == prev.p_memsz && // Do not merge .bss - load.p_filesz == load.p_memsz) { // Do not merge .bss - // Merge this PT_LOAD with the previous one. - Elf_Word size = shdr.sh_offset + shdr.sh_size - prev.p_offset; - prev.p_filesz = size; - prev.p_memsz = size; - } else { - // If we are adding new load, it must be aligned. - CHECK_EQ(shdr.sh_addralign, (Elf_Word)kPageSize); - phdrs.push_back(load); - } - } - } - for (auto* section : sections_) { - const Elf_Shdr& shdr = section->header_; - if ((shdr.sh_flags & SHF_ALLOC) != 0 && shdr.sh_size != 0) { - // Other PT_* types allow the program to locate interesting - // parts of memory at runtime. They must overlap with PT_LOAD. - if (section->phdr_type_ != 0) { - Elf_Phdr phdr = Elf_Phdr(); - phdr.p_type = section->phdr_type_; - phdr.p_flags = section->phdr_flags_; - phdr.p_offset = shdr.sh_offset; - phdr.p_vaddr = phdr.p_paddr = shdr.sh_addr; - phdr.p_filesz = phdr.p_memsz = shdr.sh_size; - phdr.p_align = shdr.sh_addralign; - phdrs.push_back(phdr); - } - } - } - // Set the size of the initial PT_PHDR. - CHECK_EQ(phdrs[0].p_type, (Elf_Word)PT_PHDR); - phdrs[0].p_filesz = phdrs[0].p_memsz = phdrs.size() * sizeof(Elf_Phdr); - - return phdrs; - } - - InstructionSet isa_; - const InstructionSetFeatures* features_; - - ErrorDelayingOutputStream stream_; - - Section rodata_; - Section text_; - Section bss_; - Section dex_; - CachedStringSection dynstr_; - SymbolSection dynsym_; - CachedSection hash_; - CachedSection dynamic_; - Section eh_frame_; - Section eh_frame_hdr_; - StringSection strtab_; - SymbolSection symtab_; - Section debug_frame_; - Section debug_info_; - Section debug_line_; - StringSection shstrtab_; - AbiflagsSection abiflags_; - BuildIdSection build_id_; - std::vector<std::unique_ptr<Section>> other_sections_; - - // List of used section in the order in which they were written. - std::vector<Section*> sections_; - Section* current_section_; // The section which is currently being written. - - bool started_; - bool write_program_headers_; - - // The size of the memory taken by the ELF file when loaded. - size_t loaded_size_; - - // Used for allocation of virtual address space. - Elf_Addr virtual_address_; - - DISALLOW_COPY_AND_ASSIGN(ElfBuilder); -}; - -} // namespace linker -} // namespace art - -#endif // ART_COMPILER_LINKER_ELF_BUILDER_H_ diff --git a/compiler/linker/error_delaying_output_stream.h b/compiler/linker/error_delaying_output_stream.h deleted file mode 100644 index 659f1dc093..0000000000 --- a/compiler/linker/error_delaying_output_stream.h +++ /dev/null @@ -1,105 +0,0 @@ -/* - * Copyright (C) 2015 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ART_COMPILER_LINKER_ERROR_DELAYING_OUTPUT_STREAM_H_ -#define ART_COMPILER_LINKER_ERROR_DELAYING_OUTPUT_STREAM_H_ - -#include "output_stream.h" - -#include <android-base/logging.h> - -#include "base/macros.h" - -namespace art { -namespace linker { - -// OutputStream wrapper that delays reporting an error until Flush(). -class ErrorDelayingOutputStream FINAL : public OutputStream { - public: - explicit ErrorDelayingOutputStream(OutputStream* output) - : OutputStream(output->GetLocation()), - output_(output), - output_good_(true), - output_offset_(0) { } - - // This function always succeeds to simplify code. - // Use Good() to check the actual status of the output stream. - bool WriteFully(const void* buffer, size_t byte_count) OVERRIDE { - if (output_good_) { - if (!output_->WriteFully(buffer, byte_count)) { - PLOG(ERROR) << "Failed to write " << byte_count - << " bytes to " << GetLocation() << " at offset " << output_offset_; - output_good_ = false; - } - } - output_offset_ += byte_count; - return true; - } - - // This function always succeeds to simplify code. - // Use Good() to check the actual status of the output stream. - off_t Seek(off_t offset, Whence whence) OVERRIDE { - // We keep shadow copy of the offset so that we return - // the expected value even if the output stream failed. - off_t new_offset; - switch (whence) { - case kSeekSet: - new_offset = offset; - break; - case kSeekCurrent: - new_offset = output_offset_ + offset; - break; - default: - LOG(FATAL) << "Unsupported seek type: " << whence; - UNREACHABLE(); - } - if (output_good_) { - off_t actual_offset = output_->Seek(offset, whence); - if (actual_offset == static_cast<off_t>(-1)) { - PLOG(ERROR) << "Failed to seek in " << GetLocation() << ". Offset=" << offset - << " whence=" << whence << " new_offset=" << new_offset; - output_good_ = false; - } - DCHECK_EQ(actual_offset, new_offset); - } - output_offset_ = new_offset; - return new_offset; - } - - // Flush the output and return whether all operations have succeeded. - // Do nothing if we already have a pending error. - bool Flush() OVERRIDE { - if (output_good_) { - output_good_ = output_->Flush(); - } - return output_good_; - } - - // Check (without flushing) whether all operations have succeeded so far. - bool Good() const { - return output_good_; - } - - private: - OutputStream* output_; - bool output_good_; // True if all writes to output succeeded. - off_t output_offset_; // Keep track of the current position in the stream. -}; - -} // namespace linker -} // namespace art - -#endif // ART_COMPILER_LINKER_ERROR_DELAYING_OUTPUT_STREAM_H_ diff --git a/compiler/linker/file_output_stream.cc b/compiler/linker/file_output_stream.cc deleted file mode 100644 index 477846ec65..0000000000 --- a/compiler/linker/file_output_stream.cc +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Copyright (C) 2013 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "file_output_stream.h" - -#include <sys/types.h> -#include <unistd.h> - -#include "base/unix_file/fd_file.h" - -namespace art { -namespace linker { - -FileOutputStream::FileOutputStream(File* file) : OutputStream(file->GetPath()), file_(file) {} - -bool FileOutputStream::WriteFully(const void* buffer, size_t byte_count) { - return file_->WriteFully(buffer, byte_count); -} - -off_t FileOutputStream::Seek(off_t offset, Whence whence) { - return lseek(file_->Fd(), offset, static_cast<int>(whence)); -} - -bool FileOutputStream::Flush() { - return file_->Flush() == 0; -} - -} // namespace linker -} // namespace art diff --git a/compiler/linker/file_output_stream.h b/compiler/linker/file_output_stream.h deleted file mode 100644 index deb051fca4..0000000000 --- a/compiler/linker/file_output_stream.h +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Copyright (C) 2013 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ART_COMPILER_LINKER_FILE_OUTPUT_STREAM_H_ -#define ART_COMPILER_LINKER_FILE_OUTPUT_STREAM_H_ - -#include "base/os.h" - -#include "output_stream.h" - -namespace art { -namespace linker { - -class FileOutputStream FINAL : public OutputStream { - public: - explicit FileOutputStream(File* file); - - ~FileOutputStream() OVERRIDE {} - - bool WriteFully(const void* buffer, size_t byte_count) OVERRIDE; - - off_t Seek(off_t offset, Whence whence) OVERRIDE; - - bool Flush() OVERRIDE; - - private: - File* const file_; - - DISALLOW_COPY_AND_ASSIGN(FileOutputStream); -}; - -} // namespace linker -} // namespace art - -#endif // ART_COMPILER_LINKER_FILE_OUTPUT_STREAM_H_ diff --git a/compiler/linker/linker_patch.h b/compiler/linker/linker_patch.h index 77d689d4db..f9e3930f56 100644 --- a/compiler/linker/linker_patch.h +++ b/compiler/linker/linker_patch.h @@ -40,20 +40,39 @@ class LinkerPatch { // which is ridiculous given we have only a handful of values here. If we // choose to squeeze the Type into fewer than 8 bits, we'll have to declare // patch_type_ as an uintN_t and do explicit static_cast<>s. + // + // Note: Actual patching is instruction_set-dependent. enum class Type : uint8_t { - kMethodRelative, // NOTE: Actual patching is instruction_set-dependent. - kMethodBssEntry, // NOTE: Actual patching is instruction_set-dependent. - kCall, - kCallRelative, // NOTE: Actual patching is instruction_set-dependent. - kTypeRelative, // NOTE: Actual patching is instruction_set-dependent. - kTypeClassTable, // NOTE: Actual patching is instruction_set-dependent. - kTypeBssEntry, // NOTE: Actual patching is instruction_set-dependent. - kStringRelative, // NOTE: Actual patching is instruction_set-dependent. - kStringInternTable, // NOTE: Actual patching is instruction_set-dependent. - kStringBssEntry, // NOTE: Actual patching is instruction_set-dependent. - kBakerReadBarrierBranch, // NOTE: Actual patching is instruction_set-dependent. + kIntrinsicReference, // Boot image reference for an intrinsic, see IntrinsicObjects. + kDataBimgRelRo, + kMethodRelative, + kMethodBssEntry, + kCallRelative, + kTypeRelative, + kTypeBssEntry, + kStringRelative, + kStringBssEntry, + kBakerReadBarrierBranch, }; + static LinkerPatch IntrinsicReferencePatch(size_t literal_offset, + uint32_t pc_insn_offset, + uint32_t intrinsic_data) { + LinkerPatch patch(literal_offset, Type::kIntrinsicReference, /* target_dex_file= */ nullptr); + patch.intrinsic_data_ = intrinsic_data; + patch.pc_insn_offset_ = pc_insn_offset; + return patch; + } + + static LinkerPatch DataBimgRelRoPatch(size_t literal_offset, + uint32_t pc_insn_offset, + uint32_t boot_image_offset) { + LinkerPatch patch(literal_offset, Type::kDataBimgRelRo, /* target_dex_file= */ nullptr); + patch.boot_image_offset_ = boot_image_offset; + patch.pc_insn_offset_ = pc_insn_offset; + return patch; + } + static LinkerPatch RelativeMethodPatch(size_t literal_offset, const DexFile* target_dex_file, uint32_t pc_insn_offset, @@ -74,14 +93,6 @@ class LinkerPatch { return patch; } - static LinkerPatch CodePatch(size_t literal_offset, - const DexFile* target_dex_file, - uint32_t target_method_idx) { - LinkerPatch patch(literal_offset, Type::kCall, target_dex_file); - patch.method_idx_ = target_method_idx; - return patch; - } - static LinkerPatch RelativeCodePatch(size_t literal_offset, const DexFile* target_dex_file, uint32_t target_method_idx) { @@ -100,16 +111,6 @@ class LinkerPatch { return patch; } - static LinkerPatch TypeClassTablePatch(size_t literal_offset, - const DexFile* target_dex_file, - uint32_t pc_insn_offset, - uint32_t target_type_idx) { - LinkerPatch patch(literal_offset, Type::kTypeClassTable, target_dex_file); - patch.type_idx_ = target_type_idx; - patch.pc_insn_offset_ = pc_insn_offset; - return patch; - } - static LinkerPatch TypeBssEntryPatch(size_t literal_offset, const DexFile* target_dex_file, uint32_t pc_insn_offset, @@ -130,16 +131,6 @@ class LinkerPatch { return patch; } - static LinkerPatch StringInternTablePatch(size_t literal_offset, - const DexFile* target_dex_file, - uint32_t pc_insn_offset, - uint32_t target_string_idx) { - LinkerPatch patch(literal_offset, Type::kStringInternTable, target_dex_file); - patch.string_idx_ = target_string_idx; - patch.pc_insn_offset_ = pc_insn_offset; - return patch; - } - static LinkerPatch StringBssEntryPatch(size_t literal_offset, const DexFile* target_dex_file, uint32_t pc_insn_offset, @@ -153,7 +144,9 @@ class LinkerPatch { static LinkerPatch BakerReadBarrierBranchPatch(size_t literal_offset, uint32_t custom_value1 = 0u, uint32_t custom_value2 = 0u) { - LinkerPatch patch(literal_offset, Type::kBakerReadBarrierBranch, nullptr); + LinkerPatch patch(literal_offset, + Type::kBakerReadBarrierBranch, + /* target_dex_file= */ nullptr); patch.baker_custom_value1_ = custom_value1; patch.baker_custom_value2_ = custom_value2; return patch; @@ -170,68 +163,55 @@ class LinkerPatch { return patch_type_; } - bool IsPcRelative() const { - switch (GetType()) { - case Type::kMethodRelative: - case Type::kMethodBssEntry: - case Type::kCallRelative: - case Type::kTypeRelative: - case Type::kTypeClassTable: - case Type::kTypeBssEntry: - case Type::kStringRelative: - case Type::kStringInternTable: - case Type::kStringBssEntry: - case Type::kBakerReadBarrierBranch: - return true; - default: - return false; - } + uint32_t IntrinsicData() const { + DCHECK(patch_type_ == Type::kIntrinsicReference); + return intrinsic_data_; + } + + uint32_t BootImageOffset() const { + DCHECK(patch_type_ == Type::kDataBimgRelRo); + return boot_image_offset_; } MethodReference TargetMethod() const { DCHECK(patch_type_ == Type::kMethodRelative || patch_type_ == Type::kMethodBssEntry || - patch_type_ == Type::kCall || patch_type_ == Type::kCallRelative); return MethodReference(target_dex_file_, method_idx_); } const DexFile* TargetTypeDexFile() const { DCHECK(patch_type_ == Type::kTypeRelative || - patch_type_ == Type::kTypeClassTable || patch_type_ == Type::kTypeBssEntry); return target_dex_file_; } dex::TypeIndex TargetTypeIndex() const { DCHECK(patch_type_ == Type::kTypeRelative || - patch_type_ == Type::kTypeClassTable || patch_type_ == Type::kTypeBssEntry); return dex::TypeIndex(type_idx_); } const DexFile* TargetStringDexFile() const { DCHECK(patch_type_ == Type::kStringRelative || - patch_type_ == Type::kStringInternTable || patch_type_ == Type::kStringBssEntry); return target_dex_file_; } dex::StringIndex TargetStringIndex() const { DCHECK(patch_type_ == Type::kStringRelative || - patch_type_ == Type::kStringInternTable || patch_type_ == Type::kStringBssEntry); return dex::StringIndex(string_idx_); } uint32_t PcInsnOffset() const { - DCHECK(patch_type_ == Type::kMethodRelative || + DCHECK(patch_type_ == Type::kIntrinsicReference || + patch_type_ == Type::kDataBimgRelRo || + patch_type_ == Type::kMethodRelative || patch_type_ == Type::kMethodBssEntry || patch_type_ == Type::kTypeRelative || - patch_type_ == Type::kTypeClassTable || patch_type_ == Type::kTypeBssEntry || patch_type_ == Type::kStringRelative || - patch_type_ == Type::kStringInternTable || patch_type_ == Type::kStringBssEntry); return pc_insn_offset_; } @@ -263,14 +243,17 @@ class LinkerPatch { uint32_t literal_offset_ : 24; // Method code size up to 16MiB. Type patch_type_ : 8; union { - uint32_t cmp1_; // Used for relational operators. - uint32_t method_idx_; // Method index for Call/Method patches. - uint32_t type_idx_; // Type index for Type patches. - uint32_t string_idx_; // String index for String patches. + uint32_t cmp1_; // Used for relational operators. + uint32_t boot_image_offset_; // Data to write to the .data.bimg.rel.ro entry. + uint32_t method_idx_; // Method index for Call/Method patches. + uint32_t type_idx_; // Type index for Type patches. + uint32_t string_idx_; // String index for String patches. + uint32_t intrinsic_data_; // Data for IntrinsicObjects. uint32_t baker_custom_value1_; static_assert(sizeof(method_idx_) == sizeof(cmp1_), "needed by relational operators"); static_assert(sizeof(type_idx_) == sizeof(cmp1_), "needed by relational operators"); static_assert(sizeof(string_idx_) == sizeof(cmp1_), "needed by relational operators"); + static_assert(sizeof(intrinsic_data_) == sizeof(cmp1_), "needed by relational operators"); static_assert(sizeof(baker_custom_value1_) == sizeof(cmp1_), "needed by relational operators"); }; union { diff --git a/compiler/linker/linker_patch_test.cc b/compiler/linker/linker_patch_test.cc index e87dc8de6b..997418c4f7 100644 --- a/compiler/linker/linker_patch_test.cc +++ b/compiler/linker/linker_patch_test.cc @@ -25,10 +25,14 @@ TEST(LinkerPatch, LinkerPatchOperators) { const DexFile* dex_file1 = reinterpret_cast<const DexFile*>(1); const DexFile* dex_file2 = reinterpret_cast<const DexFile*>(2); LinkerPatch patches[] = { + LinkerPatch::IntrinsicReferencePatch(16u, 3000u, 1000u), + LinkerPatch::IntrinsicReferencePatch(16u, 3001u, 1000u), + LinkerPatch::IntrinsicReferencePatch(16u, 3000u, 1001u), + LinkerPatch::IntrinsicReferencePatch(16u, 3001u, 1001u), LinkerPatch::RelativeMethodPatch(16u, dex_file1, 3000u, 1000u), LinkerPatch::RelativeMethodPatch(16u, dex_file1, 3001u, 1000u), LinkerPatch::RelativeMethodPatch(16u, dex_file1, 3000u, 1001u), - LinkerPatch::RelativeMethodPatch(16u, dex_file1, 3001u, 1001u), // Index 3. + LinkerPatch::RelativeMethodPatch(16u, dex_file1, 3001u, 1001u), // Index 7. LinkerPatch::RelativeMethodPatch(16u, dex_file2, 3000u, 1000u), LinkerPatch::RelativeMethodPatch(16u, dex_file2, 3001u, 1000u), LinkerPatch::RelativeMethodPatch(16u, dex_file2, 3000u, 1001u), @@ -41,10 +45,6 @@ TEST(LinkerPatch, LinkerPatchOperators) { LinkerPatch::MethodBssEntryPatch(16u, dex_file2, 3001u, 1000u), LinkerPatch::MethodBssEntryPatch(16u, dex_file2, 3000u, 1001u), LinkerPatch::MethodBssEntryPatch(16u, dex_file2, 3001u, 1001u), - LinkerPatch::CodePatch(16u, dex_file1, 1000u), - LinkerPatch::CodePatch(16u, dex_file1, 1001u), - LinkerPatch::CodePatch(16u, dex_file2, 1000u), - LinkerPatch::CodePatch(16u, dex_file2, 1001u), LinkerPatch::RelativeCodePatch(16u, dex_file1, 1000u), LinkerPatch::RelativeCodePatch(16u, dex_file1, 1001u), LinkerPatch::RelativeCodePatch(16u, dex_file2, 1000u), @@ -86,6 +86,10 @@ TEST(LinkerPatch, LinkerPatchOperators) { LinkerPatch::BakerReadBarrierBranchPatch(16u, 1u, 0u), LinkerPatch::BakerReadBarrierBranchPatch(16u, 1u, 1u), + LinkerPatch::IntrinsicReferencePatch(32u, 3000u, 1000u), + LinkerPatch::IntrinsicReferencePatch(32u, 3001u, 1000u), + LinkerPatch::IntrinsicReferencePatch(32u, 3000u, 1001u), + LinkerPatch::IntrinsicReferencePatch(32u, 3001u, 1001u), LinkerPatch::RelativeMethodPatch(32u, dex_file1, 3000u, 1000u), LinkerPatch::RelativeMethodPatch(32u, dex_file1, 3001u, 1000u), LinkerPatch::RelativeMethodPatch(32u, dex_file1, 3000u, 1001u), @@ -102,10 +106,6 @@ TEST(LinkerPatch, LinkerPatchOperators) { LinkerPatch::MethodBssEntryPatch(32u, dex_file2, 3001u, 1000u), LinkerPatch::MethodBssEntryPatch(32u, dex_file2, 3000u, 1001u), LinkerPatch::MethodBssEntryPatch(32u, dex_file2, 3001u, 1001u), - LinkerPatch::CodePatch(32u, dex_file1, 1000u), - LinkerPatch::CodePatch(32u, dex_file1, 1001u), - LinkerPatch::CodePatch(32u, dex_file2, 1000u), - LinkerPatch::CodePatch(32u, dex_file2, 1001u), LinkerPatch::RelativeCodePatch(32u, dex_file1, 1000u), LinkerPatch::RelativeCodePatch(32u, dex_file1, 1001u), LinkerPatch::RelativeCodePatch(32u, dex_file2, 1000u), @@ -147,20 +147,20 @@ TEST(LinkerPatch, LinkerPatchOperators) { LinkerPatch::BakerReadBarrierBranchPatch(32u, 1u, 0u), LinkerPatch::BakerReadBarrierBranchPatch(32u, 1u, 1u), - LinkerPatch::RelativeMethodPatch(16u, dex_file1, 3001u, 1001u), // Same as patch at index 3. + LinkerPatch::RelativeMethodPatch(16u, dex_file1, 3001u, 1001u), // Same as patch at index 7. }; constexpr size_t last_index = arraysize(patches) - 1u; for (size_t i = 0; i != arraysize(patches); ++i) { for (size_t j = 0; j != arraysize(patches); ++j) { - bool expected = (i != last_index ? i : 3u) == (j != last_index ? j : 3u); + bool expected = (i != last_index ? i : 7u) == (j != last_index ? j : 7u); EXPECT_EQ(expected, patches[i] == patches[j]) << i << " " << j; } } for (size_t i = 0; i != arraysize(patches); ++i) { for (size_t j = 0; j != arraysize(patches); ++j) { - bool expected = (i != last_index ? i : 3u) < (j != last_index ? j : 3u); + bool expected = (i != last_index ? i : 7u) < (j != last_index ? j : 7u); EXPECT_EQ(expected, patches[i] < patches[j]) << i << " " << j; } } diff --git a/compiler/linker/mips/relative_patcher_mips.cc b/compiler/linker/mips/relative_patcher_mips.cc deleted file mode 100644 index 69e0846cb7..0000000000 --- a/compiler/linker/mips/relative_patcher_mips.cc +++ /dev/null @@ -1,101 +0,0 @@ -/* - * Copyright (C) 2016 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "linker/mips/relative_patcher_mips.h" - -#include "compiled_method.h" -#include "debug/method_debug_info.h" -#include "linker/linker_patch.h" - -namespace art { -namespace linker { - -uint32_t MipsRelativePatcher::ReserveSpace( - uint32_t offset, - const CompiledMethod* compiled_method ATTRIBUTE_UNUSED, - MethodReference method_ref ATTRIBUTE_UNUSED) { - return offset; // No space reserved; no limit on relative call distance. -} - -uint32_t MipsRelativePatcher::ReserveSpaceEnd(uint32_t offset) { - return offset; // No space reserved; no limit on relative call distance. -} - -uint32_t MipsRelativePatcher::WriteThunks(OutputStream* out ATTRIBUTE_UNUSED, uint32_t offset) { - return offset; // No thunks added; no limit on relative call distance. -} - -void MipsRelativePatcher::PatchCall(std::vector<uint8_t>* code ATTRIBUTE_UNUSED, - uint32_t literal_offset ATTRIBUTE_UNUSED, - uint32_t patch_offset ATTRIBUTE_UNUSED, - uint32_t target_offset ATTRIBUTE_UNUSED) { - UNIMPLEMENTED(FATAL) << "PatchCall unimplemented on MIPS"; -} - -void MipsRelativePatcher::PatchPcRelativeReference(std::vector<uint8_t>* code, - const LinkerPatch& patch, - uint32_t patch_offset, - uint32_t target_offset) { - uint32_t anchor_literal_offset = patch.PcInsnOffset(); - uint32_t literal_offset = patch.LiteralOffset(); - bool high_patch = ((*code)[literal_offset + 0] == 0x34) && ((*code)[literal_offset + 1] == 0x12); - - // Perform basic sanity checks. - if (high_patch) { - if (is_r6) { - // auipc reg, offset_high - DCHECK_EQ(((*code)[literal_offset + 2] & 0x1F), 0x1E); - DCHECK_EQ(((*code)[literal_offset + 3] & 0xFC), 0xEC); - } else { - // lui reg, offset_high - DCHECK_EQ(((*code)[literal_offset + 2] & 0xE0), 0x00); - DCHECK_EQ((*code)[literal_offset + 3], 0x3C); - } - } else { - // instr reg(s), offset_low - CHECK_EQ((*code)[literal_offset + 0], 0x78); - CHECK_EQ((*code)[literal_offset + 1], 0x56); - } - - // Apply patch. - uint32_t anchor_offset = patch_offset - literal_offset + anchor_literal_offset; - uint32_t diff = target_offset - anchor_offset; - diff += (diff & 0x8000) << 1; // Account for sign extension in "instr reg(s), offset_low". - - if (high_patch) { - // lui reg, offset_high / auipc reg, offset_high - (*code)[literal_offset + 0] = static_cast<uint8_t>(diff >> 16); - (*code)[literal_offset + 1] = static_cast<uint8_t>(diff >> 24); - } else { - // instr reg(s), offset_low - (*code)[literal_offset + 0] = static_cast<uint8_t>(diff >> 0); - (*code)[literal_offset + 1] = static_cast<uint8_t>(diff >> 8); - } -} - -void MipsRelativePatcher::PatchBakerReadBarrierBranch(std::vector<uint8_t>* code ATTRIBUTE_UNUSED, - const LinkerPatch& patch ATTRIBUTE_UNUSED, - uint32_t patch_offset ATTRIBUTE_UNUSED) { - LOG(FATAL) << "UNIMPLEMENTED"; -} - -std::vector<debug::MethodDebugInfo> MipsRelativePatcher::GenerateThunkDebugInfo( - uint32_t executable_offset ATTRIBUTE_UNUSED) { - return std::vector<debug::MethodDebugInfo>(); // No thunks added. -} - -} // namespace linker -} // namespace art diff --git a/compiler/linker/mips/relative_patcher_mips.h b/compiler/linker/mips/relative_patcher_mips.h deleted file mode 100644 index 5714a7d1b0..0000000000 --- a/compiler/linker/mips/relative_patcher_mips.h +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Copyright (C) 2016 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ART_COMPILER_LINKER_MIPS_RELATIVE_PATCHER_MIPS_H_ -#define ART_COMPILER_LINKER_MIPS_RELATIVE_PATCHER_MIPS_H_ - -#include "arch/mips/instruction_set_features_mips.h" -#include "linker/relative_patcher.h" - -namespace art { -namespace linker { - -class MipsRelativePatcher FINAL : public RelativePatcher { - public: - explicit MipsRelativePatcher(const MipsInstructionSetFeatures* features) - : is_r6(features->IsR6()) {} - - uint32_t ReserveSpace(uint32_t offset, - const CompiledMethod* compiled_method, - MethodReference method_ref) OVERRIDE; - uint32_t ReserveSpaceEnd(uint32_t offset) OVERRIDE; - uint32_t WriteThunks(OutputStream* out, uint32_t offset) OVERRIDE; - void PatchCall(std::vector<uint8_t>* code, - uint32_t literal_offset, - uint32_t patch_offset, - uint32_t target_offset) OVERRIDE; - void PatchPcRelativeReference(std::vector<uint8_t>* code, - const LinkerPatch& patch, - uint32_t patch_offset, - uint32_t target_offset) OVERRIDE; - void PatchBakerReadBarrierBranch(std::vector<uint8_t>* code, - const LinkerPatch& patch, - uint32_t patch_offset) OVERRIDE; - std::vector<debug::MethodDebugInfo> GenerateThunkDebugInfo(uint32_t executable_offset) OVERRIDE; - - private: - bool is_r6; - - DISALLOW_COPY_AND_ASSIGN(MipsRelativePatcher); -}; - -} // namespace linker -} // namespace art - -#endif // ART_COMPILER_LINKER_MIPS_RELATIVE_PATCHER_MIPS_H_ diff --git a/compiler/linker/mips/relative_patcher_mips32r6_test.cc b/compiler/linker/mips/relative_patcher_mips32r6_test.cc deleted file mode 100644 index 629fdd535d..0000000000 --- a/compiler/linker/mips/relative_patcher_mips32r6_test.cc +++ /dev/null @@ -1,110 +0,0 @@ -/* - * Copyright (C) 2016 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "linker/mips/relative_patcher_mips.h" -#include "linker/relative_patcher_test.h" - -namespace art { -namespace linker { - -class Mips32r6RelativePatcherTest : public RelativePatcherTest { - public: - Mips32r6RelativePatcherTest() : RelativePatcherTest(InstructionSet::kMips, "mips32r6") {} - - protected: - static const uint8_t kUnpatchedPcRelativeRawCode[]; - static const uint32_t kLiteralOffsetHigh; - static const uint32_t kLiteralOffsetLow1; - static const uint32_t kLiteralOffsetLow2; - static const uint32_t kAnchorOffset; - static const ArrayRef<const uint8_t> kUnpatchedPcRelativeCode; - - uint32_t GetMethodOffset(uint32_t method_idx) { - auto result = method_offset_map_.FindMethodOffset(MethodRef(method_idx)); - CHECK(result.first); - return result.second; - } - - void CheckPcRelativePatch(const ArrayRef<const LinkerPatch>& patches, uint32_t target_offset); - void TestStringBssEntry(uint32_t bss_begin, uint32_t string_entry_offset); - void TestStringReference(uint32_t string_offset); -}; - -const uint8_t Mips32r6RelativePatcherTest::kUnpatchedPcRelativeRawCode[] = { - 0x34, 0x12, 0x5E, 0xEE, // auipc s2, high(diff); placeholder = 0x1234 - 0x78, 0x56, 0x52, 0x26, // addiu s2, s2, low(diff); placeholder = 0x5678 - 0x78, 0x56, 0x52, 0x8E, // lw s2, (low(diff))(s2) ; placeholder = 0x5678 -}; -const uint32_t Mips32r6RelativePatcherTest::kLiteralOffsetHigh = 0; // At auipc. -const uint32_t Mips32r6RelativePatcherTest::kLiteralOffsetLow1 = 4; // At addiu. -const uint32_t Mips32r6RelativePatcherTest::kLiteralOffsetLow2 = 8; // At lw. -const uint32_t Mips32r6RelativePatcherTest::kAnchorOffset = 0; // At auipc (where PC+0 points). -const ArrayRef<const uint8_t> Mips32r6RelativePatcherTest::kUnpatchedPcRelativeCode( - kUnpatchedPcRelativeRawCode); - -void Mips32r6RelativePatcherTest::CheckPcRelativePatch(const ArrayRef<const LinkerPatch>& patches, - uint32_t target_offset) { - AddCompiledMethod(MethodRef(1u), kUnpatchedPcRelativeCode, ArrayRef<const LinkerPatch>(patches)); - Link(); - - auto result = method_offset_map_.FindMethodOffset(MethodRef(1u)); - ASSERT_TRUE(result.first); - - uint32_t diff = target_offset - (result.second + kAnchorOffset); - diff += (diff & 0x8000) << 1; // Account for sign extension in addiu/lw. - - const uint8_t expected_code[] = { - static_cast<uint8_t>(diff >> 16), static_cast<uint8_t>(diff >> 24), 0x5E, 0xEE, - static_cast<uint8_t>(diff), static_cast<uint8_t>(diff >> 8), 0x52, 0x26, - static_cast<uint8_t>(diff), static_cast<uint8_t>(diff >> 8), 0x52, 0x8E, - }; - EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); -} - -void Mips32r6RelativePatcherTest::TestStringBssEntry(uint32_t bss_begin, - uint32_t string_entry_offset) { - constexpr uint32_t kStringIndex = 1u; - string_index_to_offset_map_.Put(kStringIndex, string_entry_offset); - bss_begin_ = bss_begin; - LinkerPatch patches[] = { - LinkerPatch::StringBssEntryPatch(kLiteralOffsetHigh, nullptr, kAnchorOffset, kStringIndex), - LinkerPatch::StringBssEntryPatch(kLiteralOffsetLow1, nullptr, kAnchorOffset, kStringIndex), - LinkerPatch::StringBssEntryPatch(kLiteralOffsetLow2, nullptr, kAnchorOffset, kStringIndex) - }; - CheckPcRelativePatch(ArrayRef<const LinkerPatch>(patches), bss_begin_ + string_entry_offset); -} - -void Mips32r6RelativePatcherTest::TestStringReference(uint32_t string_offset) { - constexpr uint32_t kStringIndex = 1u; - string_index_to_offset_map_.Put(kStringIndex, string_offset); - LinkerPatch patches[] = { - LinkerPatch::RelativeStringPatch(kLiteralOffsetHigh, nullptr, kAnchorOffset, kStringIndex), - LinkerPatch::RelativeStringPatch(kLiteralOffsetLow1, nullptr, kAnchorOffset, kStringIndex), - LinkerPatch::RelativeStringPatch(kLiteralOffsetLow2, nullptr, kAnchorOffset, kStringIndex) - }; - CheckPcRelativePatch(ArrayRef<const LinkerPatch>(patches), string_offset); -} - -TEST_F(Mips32r6RelativePatcherTest, StringBssEntry) { - TestStringBssEntry(/* bss_begin */ 0x12345678, /* string_entry_offset */ 0x1234); -} - -TEST_F(Mips32r6RelativePatcherTest, StringReference) { - TestStringReference(/* string_offset*/ 0x87651234); -} - -} // namespace linker -} // namespace art diff --git a/compiler/linker/mips/relative_patcher_mips_test.cc b/compiler/linker/mips/relative_patcher_mips_test.cc deleted file mode 100644 index d876c76daa..0000000000 --- a/compiler/linker/mips/relative_patcher_mips_test.cc +++ /dev/null @@ -1,115 +0,0 @@ -/* - * Copyright (C) 2016 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "linker/mips/relative_patcher_mips.h" - -#include "linker/relative_patcher_test.h" - -namespace art { -namespace linker { - -class MipsRelativePatcherTest : public RelativePatcherTest { - public: - MipsRelativePatcherTest() : RelativePatcherTest(InstructionSet::kMips, "mips32r2") {} - - protected: - static const uint8_t kUnpatchedPcRelativeRawCode[]; - static const uint32_t kLiteralOffsetHigh; - static const uint32_t kLiteralOffsetLow1; - static const uint32_t kLiteralOffsetLow2; - static const uint32_t kAnchorOffset; - static const ArrayRef<const uint8_t> kUnpatchedPcRelativeCode; - - uint32_t GetMethodOffset(uint32_t method_idx) { - auto result = method_offset_map_.FindMethodOffset(MethodRef(method_idx)); - CHECK(result.first); - return result.second; - } - - void CheckPcRelativePatch(const ArrayRef<const LinkerPatch>& patches, uint32_t target_offset); - void TestStringBssEntry(uint32_t bss_begin, uint32_t string_entry_offset); - void TestStringReference(uint32_t string_offset); -}; - -const uint8_t MipsRelativePatcherTest::kUnpatchedPcRelativeRawCode[] = { - 0x00, 0x00, 0x10, 0x04, // nal - 0x34, 0x12, 0x12, 0x3C, // lui s2, high(diff); placeholder = 0x1234 - 0x21, 0x90, 0x5F, 0x02, // addu s2, s2, ra - 0x78, 0x56, 0x52, 0x26, // addiu s2, s2, low(diff); placeholder = 0x5678 - 0x78, 0x56, 0x52, 0x8E, // lw s2, (low(diff))(s2) ; placeholder = 0x5678 -}; -const uint32_t MipsRelativePatcherTest::kLiteralOffsetHigh = 4; // At lui. -const uint32_t MipsRelativePatcherTest::kLiteralOffsetLow1 = 12; // At addiu. -const uint32_t MipsRelativePatcherTest::kLiteralOffsetLow2 = 16; // At lw. -const uint32_t MipsRelativePatcherTest::kAnchorOffset = 8; // At addu (where PC+0 points). -const ArrayRef<const uint8_t> MipsRelativePatcherTest::kUnpatchedPcRelativeCode( - kUnpatchedPcRelativeRawCode); - -void MipsRelativePatcherTest::CheckPcRelativePatch(const ArrayRef<const LinkerPatch>& patches, - uint32_t target_offset) { - AddCompiledMethod(MethodRef(1u), kUnpatchedPcRelativeCode, ArrayRef<const LinkerPatch>(patches)); - Link(); - - auto result = method_offset_map_.FindMethodOffset(MethodRef(1u)); - ASSERT_TRUE(result.first); - - uint32_t diff = target_offset - (result.second + kAnchorOffset); - diff += (diff & 0x8000) << 1; // Account for sign extension in addiu/lw. - - const uint8_t expected_code[] = { - 0x00, 0x00, 0x10, 0x04, - static_cast<uint8_t>(diff >> 16), static_cast<uint8_t>(diff >> 24), 0x12, 0x3C, - 0x21, 0x90, 0x5F, 0x02, - static_cast<uint8_t>(diff), static_cast<uint8_t>(diff >> 8), 0x52, 0x26, - static_cast<uint8_t>(diff), static_cast<uint8_t>(diff >> 8), 0x52, 0x8E, - }; - EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); -} - -void MipsRelativePatcherTest::TestStringBssEntry(uint32_t bss_begin, - uint32_t string_entry_offset) { - constexpr uint32_t kStringIndex = 1u; - string_index_to_offset_map_.Put(kStringIndex, string_entry_offset); - bss_begin_ = bss_begin; - LinkerPatch patches[] = { - LinkerPatch::StringBssEntryPatch(kLiteralOffsetHigh, nullptr, kAnchorOffset, kStringIndex), - LinkerPatch::StringBssEntryPatch(kLiteralOffsetLow1, nullptr, kAnchorOffset, kStringIndex), - LinkerPatch::StringBssEntryPatch(kLiteralOffsetLow2, nullptr, kAnchorOffset, kStringIndex) - }; - CheckPcRelativePatch(ArrayRef<const LinkerPatch>(patches), bss_begin_ + string_entry_offset); -} - -void MipsRelativePatcherTest::TestStringReference(uint32_t string_offset) { - constexpr uint32_t kStringIndex = 1u; - string_index_to_offset_map_.Put(kStringIndex, string_offset); - LinkerPatch patches[] = { - LinkerPatch::RelativeStringPatch(kLiteralOffsetHigh, nullptr, kAnchorOffset, kStringIndex), - LinkerPatch::RelativeStringPatch(kLiteralOffsetLow1, nullptr, kAnchorOffset, kStringIndex), - LinkerPatch::RelativeStringPatch(kLiteralOffsetLow2, nullptr, kAnchorOffset, kStringIndex) - }; - CheckPcRelativePatch(ArrayRef<const LinkerPatch>(patches), string_offset); -} - -TEST_F(MipsRelativePatcherTest, StringBssEntry) { - TestStringBssEntry(/* bss_begin */ 0x12345678, /* string_entry_offset */ 0x1234); -} - -TEST_F(MipsRelativePatcherTest, StringReference) { - TestStringReference(/* string_offset*/ 0x87651234); -} - -} // namespace linker -} // namespace art diff --git a/compiler/linker/mips64/relative_patcher_mips64.cc b/compiler/linker/mips64/relative_patcher_mips64.cc deleted file mode 100644 index aae5746278..0000000000 --- a/compiler/linker/mips64/relative_patcher_mips64.cc +++ /dev/null @@ -1,99 +0,0 @@ -/* - * Copyright (C) 2016 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "linker/mips64/relative_patcher_mips64.h" - -#include "compiled_method.h" -#include "debug/method_debug_info.h" -#include "linker/linker_patch.h" - -namespace art { -namespace linker { - -uint32_t Mips64RelativePatcher::ReserveSpace( - uint32_t offset, - const CompiledMethod* compiled_method ATTRIBUTE_UNUSED, - MethodReference method_ref ATTRIBUTE_UNUSED) { - return offset; // No space reserved; no limit on relative call distance. -} - -uint32_t Mips64RelativePatcher::ReserveSpaceEnd(uint32_t offset) { - return offset; // No space reserved; no limit on relative call distance. -} - -uint32_t Mips64RelativePatcher::WriteThunks(OutputStream* out ATTRIBUTE_UNUSED, uint32_t offset) { - return offset; // No thunks added; no limit on relative call distance. -} - -void Mips64RelativePatcher::PatchCall(std::vector<uint8_t>* code ATTRIBUTE_UNUSED, - uint32_t literal_offset ATTRIBUTE_UNUSED, - uint32_t patch_offset ATTRIBUTE_UNUSED, - uint32_t target_offset ATTRIBUTE_UNUSED) { - UNIMPLEMENTED(FATAL) << "PatchCall unimplemented on MIPS64"; -} - -void Mips64RelativePatcher::PatchPcRelativeReference(std::vector<uint8_t>* code, - const LinkerPatch& patch, - uint32_t patch_offset, - uint32_t target_offset) { - uint32_t anchor_literal_offset = patch.PcInsnOffset(); - uint32_t literal_offset = patch.LiteralOffset(); - bool high_patch = ((*code)[literal_offset + 0] == 0x34) && ((*code)[literal_offset + 1] == 0x12); - - // Perform basic sanity checks. - if (high_patch) { - // auipc reg, offset_high - DCHECK_EQ(((*code)[literal_offset + 2] & 0x1F), 0x1E); - DCHECK_EQ(((*code)[literal_offset + 3] & 0xFC), 0xEC); - } else { - // instr reg(s), offset_low - CHECK_EQ((*code)[literal_offset + 0], 0x78); - CHECK_EQ((*code)[literal_offset + 1], 0x56); - } - - // Apply patch. - uint32_t anchor_offset = patch_offset - literal_offset + anchor_literal_offset; - uint32_t diff = target_offset - anchor_offset; - // Note that a combination of auipc with an instruction that adds a sign-extended - // 16-bit immediate operand (e.g. ld) provides a PC-relative range of - // PC-0x80000000 to PC+0x7FFF7FFF on MIPS64, that is, short of 2GB on one end - // by 32KB. - diff += (diff & 0x8000) << 1; // Account for sign extension in "instr reg(s), offset_low". - - if (high_patch) { - // auipc reg, offset_high - (*code)[literal_offset + 0] = static_cast<uint8_t>(diff >> 16); - (*code)[literal_offset + 1] = static_cast<uint8_t>(diff >> 24); - } else { - // instr reg(s), offset_low - (*code)[literal_offset + 0] = static_cast<uint8_t>(diff >> 0); - (*code)[literal_offset + 1] = static_cast<uint8_t>(diff >> 8); - } -} - -void Mips64RelativePatcher::PatchBakerReadBarrierBranch(std::vector<uint8_t>* code ATTRIBUTE_UNUSED, - const LinkerPatch& patch ATTRIBUTE_UNUSED, - uint32_t patch_offset ATTRIBUTE_UNUSED) { - LOG(FATAL) << "UNIMPLEMENTED"; -} - -std::vector<debug::MethodDebugInfo> Mips64RelativePatcher::GenerateThunkDebugInfo( - uint32_t executable_offset ATTRIBUTE_UNUSED) { - return std::vector<debug::MethodDebugInfo>(); // No thunks added. -} - -} // namespace linker -} // namespace art diff --git a/compiler/linker/mips64/relative_patcher_mips64.h b/compiler/linker/mips64/relative_patcher_mips64.h deleted file mode 100644 index 183bbedb39..0000000000 --- a/compiler/linker/mips64/relative_patcher_mips64.h +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Copyright (C) 2016 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ART_COMPILER_LINKER_MIPS64_RELATIVE_PATCHER_MIPS64_H_ -#define ART_COMPILER_LINKER_MIPS64_RELATIVE_PATCHER_MIPS64_H_ - -#include "linker/relative_patcher.h" - -namespace art { -namespace linker { - -class Mips64RelativePatcher FINAL : public RelativePatcher { - public: - Mips64RelativePatcher() {} - - uint32_t ReserveSpace(uint32_t offset, - const CompiledMethod* compiled_method, - MethodReference method_ref) OVERRIDE; - uint32_t ReserveSpaceEnd(uint32_t offset) OVERRIDE; - uint32_t WriteThunks(OutputStream* out, uint32_t offset) OVERRIDE; - void PatchCall(std::vector<uint8_t>* code, - uint32_t literal_offset, - uint32_t patch_offset, - uint32_t target_offset) OVERRIDE; - void PatchPcRelativeReference(std::vector<uint8_t>* code, - const LinkerPatch& patch, - uint32_t patch_offset, - uint32_t target_offset) OVERRIDE; - void PatchBakerReadBarrierBranch(std::vector<uint8_t>* code, - const LinkerPatch& patch, - uint32_t patch_offset) OVERRIDE; - std::vector<debug::MethodDebugInfo> GenerateThunkDebugInfo(uint32_t executable_offset) OVERRIDE; - - private: - DISALLOW_COPY_AND_ASSIGN(Mips64RelativePatcher); -}; - -} // namespace linker -} // namespace art - -#endif // ART_COMPILER_LINKER_MIPS64_RELATIVE_PATCHER_MIPS64_H_ diff --git a/compiler/linker/mips64/relative_patcher_mips64_test.cc b/compiler/linker/mips64/relative_patcher_mips64_test.cc deleted file mode 100644 index a02f5005e8..0000000000 --- a/compiler/linker/mips64/relative_patcher_mips64_test.cc +++ /dev/null @@ -1,97 +0,0 @@ -/* - * Copyright (C) 2016 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "linker/mips64/relative_patcher_mips64.h" - -#include "linker/relative_patcher_test.h" - -namespace art { -namespace linker { - -class Mips64RelativePatcherTest : public RelativePatcherTest { - public: - Mips64RelativePatcherTest() : RelativePatcherTest(InstructionSet::kMips64, "default") {} - - protected: - static const uint8_t kUnpatchedPcRelativeRawCode[]; - static const uint8_t kUnpatchedPcRelativeCallRawCode[]; - static const uint32_t kLiteralOffsetHigh; - static const uint32_t kLiteralOffsetLow1; - static const uint32_t kLiteralOffsetLow2; - static const uint32_t kAnchorOffset; - static const ArrayRef<const uint8_t> kUnpatchedPcRelativeCode; - - uint32_t GetMethodOffset(uint32_t method_idx) { - auto result = method_offset_map_.FindMethodOffset(MethodRef(method_idx)); - CHECK(result.first); - return result.second; - } - - void CheckPcRelativePatch(const ArrayRef<const LinkerPatch>& patches, uint32_t target_offset); - void TestStringBssEntry(uint32_t bss_begin, uint32_t string_entry_offset); - void TestStringReference(uint32_t string_offset); -}; - -const uint8_t Mips64RelativePatcherTest::kUnpatchedPcRelativeRawCode[] = { - 0x34, 0x12, 0x5E, 0xEE, // auipc s2, high(diff); placeholder = 0x1234 - 0x78, 0x56, 0x52, 0x66, // daddiu s2, s2, low(diff); placeholder = 0x5678 - 0x78, 0x56, 0x52, 0x9E, // lwu s2, (low(diff))(s2) ; placeholder = 0x5678 -}; -const uint32_t Mips64RelativePatcherTest::kLiteralOffsetHigh = 0; // At auipc. -const uint32_t Mips64RelativePatcherTest::kLiteralOffsetLow1 = 4; // At daddiu. -const uint32_t Mips64RelativePatcherTest::kLiteralOffsetLow2 = 8; // At lwu. -const uint32_t Mips64RelativePatcherTest::kAnchorOffset = 0; // At auipc (where PC+0 points). -const ArrayRef<const uint8_t> Mips64RelativePatcherTest::kUnpatchedPcRelativeCode( - kUnpatchedPcRelativeRawCode); - -void Mips64RelativePatcherTest::CheckPcRelativePatch(const ArrayRef<const LinkerPatch>& patches, - uint32_t target_offset) { - AddCompiledMethod(MethodRef(1u), kUnpatchedPcRelativeCode, ArrayRef<const LinkerPatch>(patches)); - Link(); - - auto result = method_offset_map_.FindMethodOffset(MethodRef(1u)); - ASSERT_TRUE(result.first); - - uint32_t diff = target_offset - (result.second + kAnchorOffset); - diff += (diff & 0x8000) << 1; // Account for sign extension in daddiu/lwu. - - const uint8_t expected_code[] = { - static_cast<uint8_t>(diff >> 16), static_cast<uint8_t>(diff >> 24), 0x5E, 0xEE, - static_cast<uint8_t>(diff), static_cast<uint8_t>(diff >> 8), 0x52, 0x66, - static_cast<uint8_t>(diff), static_cast<uint8_t>(diff >> 8), 0x52, 0x9E, - }; - EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); -} - -void Mips64RelativePatcherTest::TestStringBssEntry(uint32_t bss_begin, - uint32_t string_entry_offset) { - constexpr uint32_t kStringIndex = 1u; - string_index_to_offset_map_.Put(kStringIndex, string_entry_offset); - bss_begin_ = bss_begin; - LinkerPatch patches[] = { - LinkerPatch::StringBssEntryPatch(kLiteralOffsetHigh, nullptr, kAnchorOffset, kStringIndex), - LinkerPatch::StringBssEntryPatch(kLiteralOffsetLow1, nullptr, kAnchorOffset, kStringIndex), - LinkerPatch::StringBssEntryPatch(kLiteralOffsetLow2, nullptr, kAnchorOffset, kStringIndex) - }; - CheckPcRelativePatch(ArrayRef<const LinkerPatch>(patches), bss_begin_ + string_entry_offset); -} - -TEST_F(Mips64RelativePatcherTest, StringBssEntry) { - TestStringBssEntry(/* bss_begin */ 0x12345678, /* string_entry_offset */ 0x1234); -} - -} // namespace linker -} // namespace art diff --git a/compiler/linker/output_stream.h b/compiler/linker/output_stream.h deleted file mode 100644 index 5310e2fa41..0000000000 --- a/compiler/linker/output_stream.h +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Copyright (C) 2013 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ART_COMPILER_LINKER_OUTPUT_STREAM_H_ -#define ART_COMPILER_LINKER_OUTPUT_STREAM_H_ - -#include <ostream> -#include <string> - -#include "base/macros.h" - -namespace art { -namespace linker { - -enum Whence { - kSeekSet = SEEK_SET, - kSeekCurrent = SEEK_CUR, - kSeekEnd = SEEK_END, -}; -std::ostream& operator<<(std::ostream& os, const Whence& rhs); - -class OutputStream { - public: - explicit OutputStream(const std::string& location) : location_(location) {} - - virtual ~OutputStream() {} - - const std::string& GetLocation() const { - return location_; - } - - virtual bool WriteFully(const void* buffer, size_t byte_count) = 0; - - virtual off_t Seek(off_t offset, Whence whence) = 0; - - /* - * Flushes the stream. Returns whether the operation was successful. - * - * An OutputStream may delay reporting errors from WriteFully() or - * Seek(). In that case, Flush() shall report any pending error. - */ - virtual bool Flush() = 0; - - private: - const std::string location_; - - DISALLOW_COPY_AND_ASSIGN(OutputStream); -}; - -} // namespace linker -} // namespace art - -#endif // ART_COMPILER_LINKER_OUTPUT_STREAM_H_ diff --git a/compiler/linker/output_stream_test.cc b/compiler/linker/output_stream_test.cc index f93ea7a709..00231b1240 100644 --- a/compiler/linker/output_stream_test.cc +++ b/compiler/linker/output_stream_test.cc @@ -14,15 +14,14 @@ * limitations under the License. */ -#include "file_output_stream.h" -#include "vector_output_stream.h" - #include <android-base/logging.h> #include "base/macros.h" #include "base/unix_file/fd_file.h" -#include "buffered_output_stream.h" #include "common_runtime_test.h" +#include "stream/buffered_output_stream.h" +#include "stream/file_output_stream.h" +#include "stream/vector_output_stream.h" namespace art { namespace linker { @@ -106,20 +105,20 @@ TEST_F(OutputStreamTest, BufferedFlush) { CheckingOutputStream() : OutputStream("dummy"), flush_called(false) { } - ~CheckingOutputStream() OVERRIDE {} + ~CheckingOutputStream() override {} bool WriteFully(const void* buffer ATTRIBUTE_UNUSED, - size_t byte_count ATTRIBUTE_UNUSED) OVERRIDE { + size_t byte_count ATTRIBUTE_UNUSED) override { LOG(FATAL) << "UNREACHABLE"; UNREACHABLE(); } - off_t Seek(off_t offset ATTRIBUTE_UNUSED, Whence whence ATTRIBUTE_UNUSED) OVERRIDE { + off_t Seek(off_t offset ATTRIBUTE_UNUSED, Whence whence ATTRIBUTE_UNUSED) override { LOG(FATAL) << "UNREACHABLE"; UNREACHABLE(); } - bool Flush() OVERRIDE { + bool Flush() override { flush_called = true; return true; } diff --git a/compiler/linker/relative_patcher.cc b/compiler/linker/relative_patcher.cc deleted file mode 100644 index 13877f8f12..0000000000 --- a/compiler/linker/relative_patcher.cc +++ /dev/null @@ -1,159 +0,0 @@ -/* - * Copyright (C) 2015 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "linker/relative_patcher.h" - -#include "debug/method_debug_info.h" -#ifdef ART_ENABLE_CODEGEN_arm -#include "linker/arm/relative_patcher_thumb2.h" -#endif -#ifdef ART_ENABLE_CODEGEN_arm64 -#include "linker/arm64/relative_patcher_arm64.h" -#endif -#ifdef ART_ENABLE_CODEGEN_mips -#include "linker/mips/relative_patcher_mips.h" -#endif -#ifdef ART_ENABLE_CODEGEN_mips64 -#include "linker/mips64/relative_patcher_mips64.h" -#endif -#ifdef ART_ENABLE_CODEGEN_x86 -#include "linker/x86/relative_patcher_x86.h" -#endif -#ifdef ART_ENABLE_CODEGEN_x86_64 -#include "linker/x86_64/relative_patcher_x86_64.h" -#endif -#include "output_stream.h" - -namespace art { -namespace linker { - -std::unique_ptr<RelativePatcher> RelativePatcher::Create( - InstructionSet instruction_set, - const InstructionSetFeatures* features, - RelativePatcherTargetProvider* provider) { - class RelativePatcherNone FINAL : public RelativePatcher { - public: - RelativePatcherNone() { } - - uint32_t ReserveSpace(uint32_t offset, - const CompiledMethod* compiled_method ATTRIBUTE_UNUSED, - MethodReference method_ref ATTRIBUTE_UNUSED) OVERRIDE { - return offset; // No space reserved; no patches expected. - } - - uint32_t ReserveSpaceEnd(uint32_t offset) OVERRIDE { - return offset; // No space reserved; no patches expected. - } - - uint32_t WriteThunks(OutputStream* out ATTRIBUTE_UNUSED, uint32_t offset) OVERRIDE { - return offset; // No thunks added; no patches expected. - } - - void PatchCall(std::vector<uint8_t>* code ATTRIBUTE_UNUSED, - uint32_t literal_offset ATTRIBUTE_UNUSED, - uint32_t patch_offset ATTRIBUTE_UNUSED, - uint32_t target_offset ATTRIBUTE_UNUSED) OVERRIDE { - LOG(FATAL) << "Unexpected relative call patch."; - } - - void PatchPcRelativeReference(std::vector<uint8_t>* code ATTRIBUTE_UNUSED, - const LinkerPatch& patch ATTRIBUTE_UNUSED, - uint32_t patch_offset ATTRIBUTE_UNUSED, - uint32_t target_offset ATTRIBUTE_UNUSED) OVERRIDE { - LOG(FATAL) << "Unexpected relative dex cache array patch."; - } - - void PatchBakerReadBarrierBranch(std::vector<uint8_t>* code ATTRIBUTE_UNUSED, - const LinkerPatch& patch ATTRIBUTE_UNUSED, - uint32_t patch_offset ATTRIBUTE_UNUSED) { - LOG(FATAL) << "Unexpected baker read barrier branch patch."; - } - - std::vector<debug::MethodDebugInfo> GenerateThunkDebugInfo( - uint32_t executable_offset ATTRIBUTE_UNUSED) OVERRIDE { - return std::vector<debug::MethodDebugInfo>(); // No thunks added. - } - - private: - DISALLOW_COPY_AND_ASSIGN(RelativePatcherNone); - }; - - UNUSED(features); - UNUSED(provider); - switch (instruction_set) { -#ifdef ART_ENABLE_CODEGEN_x86 - case InstructionSet::kX86: - return std::unique_ptr<RelativePatcher>(new X86RelativePatcher()); -#endif -#ifdef ART_ENABLE_CODEGEN_x86_64 - case InstructionSet::kX86_64: - return std::unique_ptr<RelativePatcher>(new X86_64RelativePatcher()); -#endif -#ifdef ART_ENABLE_CODEGEN_arm - case InstructionSet::kArm: - // Fall through: we generate Thumb2 code for "arm". - case InstructionSet::kThumb2: - return std::unique_ptr<RelativePatcher>(new Thumb2RelativePatcher(provider)); -#endif -#ifdef ART_ENABLE_CODEGEN_arm64 - case InstructionSet::kArm64: - return std::unique_ptr<RelativePatcher>( - new Arm64RelativePatcher(provider, features->AsArm64InstructionSetFeatures())); -#endif -#ifdef ART_ENABLE_CODEGEN_mips - case InstructionSet::kMips: - return std::unique_ptr<RelativePatcher>( - new MipsRelativePatcher(features->AsMipsInstructionSetFeatures())); -#endif -#ifdef ART_ENABLE_CODEGEN_mips64 - case InstructionSet::kMips64: - return std::unique_ptr<RelativePatcher>(new Mips64RelativePatcher()); -#endif - default: - return std::unique_ptr<RelativePatcher>(new RelativePatcherNone); - } -} - -bool RelativePatcher::WriteCodeAlignment(OutputStream* out, uint32_t aligned_code_delta) { - static const uint8_t kPadding[] = { - 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u - }; - DCHECK_LE(aligned_code_delta, sizeof(kPadding)); - if (UNLIKELY(!out->WriteFully(kPadding, aligned_code_delta))) { - return false; - } - size_code_alignment_ += aligned_code_delta; - return true; -} - -bool RelativePatcher::WriteThunk(OutputStream* out, const ArrayRef<const uint8_t>& thunk) { - if (UNLIKELY(!out->WriteFully(thunk.data(), thunk.size()))) { - return false; - } - size_relative_call_thunks_ += thunk.size(); - return true; -} - -bool RelativePatcher::WriteMiscThunk(OutputStream* out, const ArrayRef<const uint8_t>& thunk) { - if (UNLIKELY(!out->WriteFully(thunk.data(), thunk.size()))) { - return false; - } - size_misc_thunks_ += thunk.size(); - return true; -} - -} // namespace linker -} // namespace art diff --git a/compiler/linker/relative_patcher.h b/compiler/linker/relative_patcher.h deleted file mode 100644 index b58e3dffbd..0000000000 --- a/compiler/linker/relative_patcher.h +++ /dev/null @@ -1,147 +0,0 @@ -/* - * Copyright (C) 2015 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ART_COMPILER_LINKER_RELATIVE_PATCHER_H_ -#define ART_COMPILER_LINKER_RELATIVE_PATCHER_H_ - -#include <vector> - -#include "arch/instruction_set.h" -#include "arch/instruction_set_features.h" -#include "base/array_ref.h" -#include "base/macros.h" -#include "dex/method_reference.h" - -namespace art { - -class CompiledMethod; - -namespace debug { -struct MethodDebugInfo; -} // namespace debug - -namespace linker { - -class LinkerPatch; -class OutputStream; - -/** - * @class RelativePatcherTargetProvider - * @brief Interface for providing method offsets for relative call targets. - */ -class RelativePatcherTargetProvider { - public: - /** - * Find the offset of the target method of a relative call if known. - * - * The process of assigning target method offsets includes calls to the relative patcher's - * ReserveSpace() which in turn can use FindMethodOffset() to determine if a method already - * has an offset assigned and, if so, what's that offset. If the offset has not yet been - * assigned or if it's too far for the particular architecture's relative call, - * ReserveSpace() may need to allocate space for a special dispatch thunk. - * - * @param ref the target method of the relative call. - * @return true in the first element of the pair if the method was found, false otherwise; - * if found, the second element specifies the offset. - */ - virtual std::pair<bool, uint32_t> FindMethodOffset(MethodReference ref) = 0; - - protected: - virtual ~RelativePatcherTargetProvider() { } -}; - -/** - * @class RelativePatcher - * @brief Interface for architecture-specific link-time patching of PC-relative references. - */ -class RelativePatcher { - public: - static std::unique_ptr<RelativePatcher> Create( - InstructionSet instruction_set, const InstructionSetFeatures* features, - RelativePatcherTargetProvider* provider); - - virtual ~RelativePatcher() { } - - uint32_t CodeAlignmentSize() const { - return size_code_alignment_; - } - - uint32_t RelativeCallThunksSize() const { - return size_relative_call_thunks_; - } - - uint32_t MiscThunksSize() const { - return size_misc_thunks_; - } - - // Reserve space for thunks if needed before a method, return adjusted offset. - virtual uint32_t ReserveSpace(uint32_t offset, - const CompiledMethod* compiled_method, - MethodReference method_ref) = 0; - - // Reserve space for thunks if needed after the last method, return adjusted offset. - // The caller may use this method to preemptively force thunk space reservation and - // then resume reservation for more methods. This is useful when there is a gap in - // the .text segment, for example when going to the next oat file for multi-image. - virtual uint32_t ReserveSpaceEnd(uint32_t offset) = 0; - - // Write relative call thunks if needed, return adjusted offset. Returns 0 on write failure. - virtual uint32_t WriteThunks(OutputStream* out, uint32_t offset) = 0; - - // Patch method code. The input displacement is relative to the patched location, - // the patcher may need to adjust it if the correct base is different. - virtual void PatchCall(std::vector<uint8_t>* code, - uint32_t literal_offset, - uint32_t patch_offset, - uint32_t target_offset) = 0; - - // Patch a reference to a dex cache location. - virtual void PatchPcRelativeReference(std::vector<uint8_t>* code, - const LinkerPatch& patch, - uint32_t patch_offset, - uint32_t target_offset) = 0; - - // Patch a branch to a Baker read barrier thunk. - virtual void PatchBakerReadBarrierBranch(std::vector<uint8_t>* code, - const LinkerPatch& patch, - uint32_t patch_offset) = 0; - - virtual std::vector<debug::MethodDebugInfo> GenerateThunkDebugInfo( - uint32_t executable_offset) = 0; - - protected: - RelativePatcher() - : size_code_alignment_(0u), - size_relative_call_thunks_(0u), - size_misc_thunks_(0u) { - } - - bool WriteCodeAlignment(OutputStream* out, uint32_t aligned_code_delta); - bool WriteThunk(OutputStream* out, const ArrayRef<const uint8_t>& thunk); - bool WriteMiscThunk(OutputStream* out, const ArrayRef<const uint8_t>& thunk); - - private: - uint32_t size_code_alignment_; - uint32_t size_relative_call_thunks_; - uint32_t size_misc_thunks_; - - DISALLOW_COPY_AND_ASSIGN(RelativePatcher); -}; - -} // namespace linker -} // namespace art - -#endif // ART_COMPILER_LINKER_RELATIVE_PATCHER_H_ diff --git a/compiler/linker/relative_patcher_test.h b/compiler/linker/relative_patcher_test.h deleted file mode 100644 index d21f2795b9..0000000000 --- a/compiler/linker/relative_patcher_test.h +++ /dev/null @@ -1,289 +0,0 @@ -/* - * Copyright (C) 2015 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ART_COMPILER_LINKER_RELATIVE_PATCHER_TEST_H_ -#define ART_COMPILER_LINKER_RELATIVE_PATCHER_TEST_H_ - -#include "arch/instruction_set.h" -#include "arch/instruction_set_features.h" -#include "base/array_ref.h" -#include "base/macros.h" -#include "compiled_method-inl.h" -#include "dex/verification_results.h" -#include "dex/method_reference.h" -#include "dex/string_reference.h" -#include "driver/compiler_driver.h" -#include "driver/compiler_options.h" -#include "globals.h" -#include "gtest/gtest.h" -#include "linker/relative_patcher.h" -#include "oat.h" -#include "oat_quick_method_header.h" -#include "vector_output_stream.h" - -namespace art { -namespace linker { - -// Base class providing infrastructure for architecture-specific tests. -class RelativePatcherTest : public testing::Test { - protected: - RelativePatcherTest(InstructionSet instruction_set, const std::string& variant) - : compiler_options_(), - verification_results_(&compiler_options_), - driver_(&compiler_options_, - &verification_results_, - Compiler::kQuick, - instruction_set, - /* instruction_set_features*/ nullptr, - /* image_classes */ nullptr, - /* compiled_classes */ nullptr, - /* compiled_methods */ nullptr, - /* thread_count */ 1u, - /* swap_fd */ -1, - /* profile_compilation_info */ nullptr), - error_msg_(), - instruction_set_(instruction_set), - features_(InstructionSetFeatures::FromVariant(instruction_set, variant, &error_msg_)), - method_offset_map_(), - patcher_(RelativePatcher::Create(instruction_set, features_.get(), &method_offset_map_)), - bss_begin_(0u), - compiled_method_refs_(), - compiled_methods_(), - patched_code_(), - output_(), - out_("test output stream", &output_) { - CHECK(error_msg_.empty()) << instruction_set << "/" << variant; - patched_code_.reserve(16 * KB); - } - - MethodReference MethodRef(uint32_t method_idx) { - CHECK_NE(method_idx, 0u); - return MethodReference(nullptr, method_idx); - } - - void AddCompiledMethod( - MethodReference method_ref, - const ArrayRef<const uint8_t>& code, - const ArrayRef<const LinkerPatch>& patches = ArrayRef<const LinkerPatch>()) { - compiled_method_refs_.push_back(method_ref); - compiled_methods_.emplace_back(new CompiledMethod( - &driver_, - instruction_set_, - code, - /* frame_size_in_bytes */ 0u, - /* core_spill_mask */ 0u, - /* fp_spill_mask */ 0u, - /* method_info */ ArrayRef<const uint8_t>(), - /* vmap_table */ ArrayRef<const uint8_t>(), - /* cfi_info */ ArrayRef<const uint8_t>(), - patches)); - } - - uint32_t CodeAlignmentSize(uint32_t header_offset_to_align) { - // We want to align the code rather than the preheader. - uint32_t unaligned_code_offset = header_offset_to_align + sizeof(OatQuickMethodHeader); - uint32_t aligned_code_offset = - CompiledMethod::AlignCode(unaligned_code_offset, instruction_set_); - return aligned_code_offset - unaligned_code_offset; - } - - void Link() { - // Reserve space. - static_assert(kTrampolineOffset == 0u, "Unexpected trampoline offset."); - uint32_t offset = kTrampolineSize; - size_t idx = 0u; - for (auto& compiled_method : compiled_methods_) { - offset = patcher_->ReserveSpace(offset, compiled_method.get(), compiled_method_refs_[idx]); - - uint32_t alignment_size = CodeAlignmentSize(offset); - offset += alignment_size; - - offset += sizeof(OatQuickMethodHeader); - uint32_t quick_code_offset = offset + compiled_method->CodeDelta(); - const auto code = compiled_method->GetQuickCode(); - offset += code.size(); - - method_offset_map_.map.Put(compiled_method_refs_[idx], quick_code_offset); - ++idx; - } - offset = patcher_->ReserveSpaceEnd(offset); - uint32_t output_size = offset; - output_.reserve(output_size); - - // Write data. - DCHECK(output_.empty()); - uint8_t dummy_trampoline[kTrampolineSize]; - memset(dummy_trampoline, 0, sizeof(dummy_trampoline)); - out_.WriteFully(dummy_trampoline, kTrampolineSize); - offset = kTrampolineSize; - static const uint8_t kPadding[] = { - 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u - }; - uint8_t dummy_header[sizeof(OatQuickMethodHeader)]; - memset(dummy_header, 0, sizeof(dummy_header)); - for (auto& compiled_method : compiled_methods_) { - offset = patcher_->WriteThunks(&out_, offset); - - uint32_t alignment_size = CodeAlignmentSize(offset); - CHECK_LE(alignment_size, sizeof(kPadding)); - out_.WriteFully(kPadding, alignment_size); - offset += alignment_size; - - out_.WriteFully(dummy_header, sizeof(OatQuickMethodHeader)); - offset += sizeof(OatQuickMethodHeader); - ArrayRef<const uint8_t> code = compiled_method->GetQuickCode(); - if (!compiled_method->GetPatches().empty()) { - patched_code_.assign(code.begin(), code.end()); - code = ArrayRef<const uint8_t>(patched_code_); - for (const LinkerPatch& patch : compiled_method->GetPatches()) { - if (patch.GetType() == LinkerPatch::Type::kCallRelative) { - auto result = method_offset_map_.FindMethodOffset(patch.TargetMethod()); - uint32_t target_offset = - result.first ? result.second : kTrampolineOffset + compiled_method->CodeDelta(); - patcher_->PatchCall(&patched_code_, patch.LiteralOffset(), - offset + patch.LiteralOffset(), target_offset); - } else if (patch.GetType() == LinkerPatch::Type::kStringBssEntry) { - uint32_t target_offset = - bss_begin_ + string_index_to_offset_map_.Get(patch.TargetStringIndex().index_); - patcher_->PatchPcRelativeReference(&patched_code_, - patch, - offset + patch.LiteralOffset(), - target_offset); - } else if (patch.GetType() == LinkerPatch::Type::kStringRelative) { - uint32_t target_offset = - string_index_to_offset_map_.Get(patch.TargetStringIndex().index_); - patcher_->PatchPcRelativeReference(&patched_code_, - patch, - offset + patch.LiteralOffset(), - target_offset); - } else if (patch.GetType() == LinkerPatch::Type::kBakerReadBarrierBranch) { - patcher_->PatchBakerReadBarrierBranch(&patched_code_, - patch, - offset + patch.LiteralOffset()); - } else { - LOG(FATAL) << "Bad patch type. " << patch.GetType(); - UNREACHABLE(); - } - } - } - out_.WriteFully(&code[0], code.size()); - offset += code.size(); - } - offset = patcher_->WriteThunks(&out_, offset); - CHECK_EQ(offset, output_size); - CHECK_EQ(output_.size(), output_size); - } - - bool CheckLinkedMethod(MethodReference method_ref, const ArrayRef<const uint8_t>& expected_code) { - // Sanity check: original code size must match linked_code.size(). - size_t idx = 0u; - for (auto ref : compiled_method_refs_) { - if (ref == method_ref) { - break; - } - ++idx; - } - CHECK_NE(idx, compiled_method_refs_.size()); - CHECK_EQ(compiled_methods_[idx]->GetQuickCode().size(), expected_code.size()); - - auto result = method_offset_map_.FindMethodOffset(method_ref); - CHECK(result.first); // Must have been linked. - size_t offset = result.second - compiled_methods_[idx]->CodeDelta(); - CHECK_LT(offset, output_.size()); - CHECK_LE(offset + expected_code.size(), output_.size()); - ArrayRef<const uint8_t> linked_code(&output_[offset], expected_code.size()); - if (linked_code == expected_code) { - return true; - } - // Log failure info. - DumpDiff(expected_code, linked_code); - return false; - } - - void DumpDiff(const ArrayRef<const uint8_t>& expected_code, - const ArrayRef<const uint8_t>& linked_code) { - std::ostringstream expected_hex; - std::ostringstream linked_hex; - std::ostringstream diff_indicator; - static const char digits[] = "0123456789abcdef"; - bool found_diff = false; - for (size_t i = 0; i != expected_code.size(); ++i) { - expected_hex << " " << digits[expected_code[i] >> 4] << digits[expected_code[i] & 0xf]; - linked_hex << " " << digits[linked_code[i] >> 4] << digits[linked_code[i] & 0xf]; - if (!found_diff) { - found_diff = (expected_code[i] != linked_code[i]); - diff_indicator << (found_diff ? " ^^" : " "); - } - } - CHECK(found_diff); - std::string expected_hex_str = expected_hex.str(); - std::string linked_hex_str = linked_hex.str(); - std::string diff_indicator_str = diff_indicator.str(); - if (diff_indicator_str.length() > 60) { - CHECK_EQ(diff_indicator_str.length() % 3u, 0u); - size_t remove = diff_indicator_str.length() / 3 - 5; - std::ostringstream oss; - oss << "[stripped " << remove << "]"; - std::string replacement = oss.str(); - expected_hex_str.replace(0u, remove * 3u, replacement); - linked_hex_str.replace(0u, remove * 3u, replacement); - diff_indicator_str.replace(0u, remove * 3u, replacement); - } - LOG(ERROR) << "diff expected_code linked_code"; - LOG(ERROR) << "<" << expected_hex_str; - LOG(ERROR) << ">" << linked_hex_str; - LOG(ERROR) << " " << diff_indicator_str; - } - - // Map method reference to assinged offset. - // Wrap the map in a class implementing RelativePatcherTargetProvider. - class MethodOffsetMap FINAL : public RelativePatcherTargetProvider { - public: - std::pair<bool, uint32_t> FindMethodOffset(MethodReference ref) OVERRIDE { - auto it = map.find(ref); - if (it == map.end()) { - return std::pair<bool, uint32_t>(false, 0u); - } else { - return std::pair<bool, uint32_t>(true, it->second); - } - } - SafeMap<MethodReference, uint32_t> map; - }; - - static const uint32_t kTrampolineSize = 4u; - static const uint32_t kTrampolineOffset = 0u; - - CompilerOptions compiler_options_; - VerificationResults verification_results_; - CompilerDriver driver_; // Needed for constructing CompiledMethod. - std::string error_msg_; - InstructionSet instruction_set_; - std::unique_ptr<const InstructionSetFeatures> features_; - MethodOffsetMap method_offset_map_; - std::unique_ptr<RelativePatcher> patcher_; - uint32_t bss_begin_; - SafeMap<uint32_t, uint32_t> string_index_to_offset_map_; - std::vector<MethodReference> compiled_method_refs_; - std::vector<std::unique_ptr<CompiledMethod>> compiled_methods_; - std::vector<uint8_t> patched_code_; - std::vector<uint8_t> output_; - VectorOutputStream out_; -}; - -} // namespace linker -} // namespace art - -#endif // ART_COMPILER_LINKER_RELATIVE_PATCHER_TEST_H_ diff --git a/compiler/linker/vector_output_stream.cc b/compiler/linker/vector_output_stream.cc deleted file mode 100644 index f2cae5b1d5..0000000000 --- a/compiler/linker/vector_output_stream.cc +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Copyright (C) 2013 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "vector_output_stream.h" - -#include <android-base/logging.h> - -namespace art { -namespace linker { - -VectorOutputStream::VectorOutputStream(const std::string& location, std::vector<uint8_t>* vector) - : OutputStream(location), offset_(vector->size()), vector_(vector) {} - -off_t VectorOutputStream::Seek(off_t offset, Whence whence) { - CHECK(whence == kSeekSet || whence == kSeekCurrent || whence == kSeekEnd) << whence; - off_t new_offset = 0; - switch (whence) { - case kSeekSet: { - new_offset = offset; - break; - } - case kSeekCurrent: { - new_offset = offset_ + offset; - break; - } - case kSeekEnd: { - new_offset = vector_->size() + offset; - break; - } - } - EnsureCapacity(new_offset); - offset_ = new_offset; - return offset_; -} - -} // namespace linker -} // namespace art diff --git a/compiler/linker/vector_output_stream.h b/compiler/linker/vector_output_stream.h deleted file mode 100644 index 92caf596ab..0000000000 --- a/compiler/linker/vector_output_stream.h +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (C) 2013 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ART_COMPILER_LINKER_VECTOR_OUTPUT_STREAM_H_ -#define ART_COMPILER_LINKER_VECTOR_OUTPUT_STREAM_H_ - -#include "output_stream.h" - -#include <string.h> -#include <string> -#include <vector> - -namespace art { -namespace linker { - -class VectorOutputStream FINAL : public OutputStream { - public: - VectorOutputStream(const std::string& location, std::vector<uint8_t>* vector); - - ~VectorOutputStream() OVERRIDE {} - - bool WriteFully(const void* buffer, size_t byte_count) OVERRIDE { - if (static_cast<size_t>(offset_) == vector_->size()) { - const uint8_t* start = reinterpret_cast<const uint8_t*>(buffer); - vector_->insert(vector_->end(), &start[0], &start[byte_count]); - offset_ += byte_count; - } else { - off_t new_offset = offset_ + byte_count; - EnsureCapacity(new_offset); - memcpy(&(*vector_)[offset_], buffer, byte_count); - offset_ = new_offset; - } - return true; - } - - off_t Seek(off_t offset, Whence whence) OVERRIDE; - - bool Flush() OVERRIDE { - return true; - } - - private: - void EnsureCapacity(off_t new_offset) { - if (new_offset > static_cast<off_t>(vector_->size())) { - vector_->resize(new_offset); - } - } - - off_t offset_; - std::vector<uint8_t>* const vector_; - - DISALLOW_COPY_AND_ASSIGN(VectorOutputStream); -}; - -} // namespace linker -} // namespace art - -#endif // ART_COMPILER_LINKER_VECTOR_OUTPUT_STREAM_H_ diff --git a/compiler/linker/x86/relative_patcher_x86.cc b/compiler/linker/x86/relative_patcher_x86.cc deleted file mode 100644 index cdd2cef13a..0000000000 --- a/compiler/linker/x86/relative_patcher_x86.cc +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Copyright (C) 2015 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "linker/x86/relative_patcher_x86.h" - -#include "compiled_method.h" -#include "linker/linker_patch.h" - -namespace art { -namespace linker { - -void X86RelativePatcher::PatchPcRelativeReference(std::vector<uint8_t>* code, - const LinkerPatch& patch, - uint32_t patch_offset, - uint32_t target_offset) { - uint32_t anchor_literal_offset = patch.PcInsnOffset(); - uint32_t literal_offset = patch.LiteralOffset(); - - // Check that the anchor points to pop in a "call +0; pop <reg>" sequence. - DCHECK_GE(anchor_literal_offset, 5u); - DCHECK_LT(anchor_literal_offset, code->size()); - DCHECK_EQ((*code)[anchor_literal_offset - 5u], 0xe8u); - DCHECK_EQ((*code)[anchor_literal_offset - 4u], 0x00u); - DCHECK_EQ((*code)[anchor_literal_offset - 3u], 0x00u); - DCHECK_EQ((*code)[anchor_literal_offset - 2u], 0x00u); - DCHECK_EQ((*code)[anchor_literal_offset - 1u], 0x00u); - DCHECK_EQ((*code)[anchor_literal_offset] & 0xf8u, 0x58u); - - // Check that the patched data contains kDummy32BitOffset. - // Must match X86Mir2Lir::kDummy32BitOffset and CodeGeneratorX86_64::kDummy32BitOffset. - constexpr int kDummy32BitOffset = 256; - DCHECK_LE(literal_offset, code->size()); - DCHECK_EQ((*code)[literal_offset + 0u], static_cast<uint8_t>(kDummy32BitOffset >> 0)); - DCHECK_EQ((*code)[literal_offset + 1u], static_cast<uint8_t>(kDummy32BitOffset >> 8)); - DCHECK_EQ((*code)[literal_offset + 2u], static_cast<uint8_t>(kDummy32BitOffset >> 16)); - DCHECK_EQ((*code)[literal_offset + 3u], static_cast<uint8_t>(kDummy32BitOffset >> 24)); - - // Apply patch. - uint32_t anchor_offset = patch_offset - literal_offset + anchor_literal_offset; - uint32_t diff = target_offset - anchor_offset; - (*code)[literal_offset + 0u] = static_cast<uint8_t>(diff >> 0); - (*code)[literal_offset + 1u] = static_cast<uint8_t>(diff >> 8); - (*code)[literal_offset + 2u] = static_cast<uint8_t>(diff >> 16); - (*code)[literal_offset + 3u] = static_cast<uint8_t>(diff >> 24); -} - -void X86RelativePatcher::PatchBakerReadBarrierBranch(std::vector<uint8_t>* code ATTRIBUTE_UNUSED, - const LinkerPatch& patch ATTRIBUTE_UNUSED, - uint32_t patch_offset ATTRIBUTE_UNUSED) { - LOG(FATAL) << "UNIMPLEMENTED"; -} - -} // namespace linker -} // namespace art diff --git a/compiler/linker/x86/relative_patcher_x86.h b/compiler/linker/x86/relative_patcher_x86.h deleted file mode 100644 index 63a8338722..0000000000 --- a/compiler/linker/x86/relative_patcher_x86.h +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Copyright (C) 2015 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ART_COMPILER_LINKER_X86_RELATIVE_PATCHER_X86_H_ -#define ART_COMPILER_LINKER_X86_RELATIVE_PATCHER_X86_H_ - -#include "linker/x86/relative_patcher_x86_base.h" - -namespace art { -namespace linker { - -class X86RelativePatcher FINAL : public X86BaseRelativePatcher { - public: - X86RelativePatcher() { } - - void PatchPcRelativeReference(std::vector<uint8_t>* code, - const LinkerPatch& patch, - uint32_t patch_offset, - uint32_t target_offset) OVERRIDE; - void PatchBakerReadBarrierBranch(std::vector<uint8_t>* code, - const LinkerPatch& patch, - uint32_t patch_offset) OVERRIDE; -}; - -} // namespace linker -} // namespace art - -#endif // ART_COMPILER_LINKER_X86_RELATIVE_PATCHER_X86_H_ diff --git a/compiler/linker/x86/relative_patcher_x86_base.cc b/compiler/linker/x86/relative_patcher_x86_base.cc deleted file mode 100644 index 6a9690d768..0000000000 --- a/compiler/linker/x86/relative_patcher_x86_base.cc +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Copyright (C) 2015 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "linker/x86/relative_patcher_x86_base.h" - -#include "debug/method_debug_info.h" - -namespace art { -namespace linker { - -uint32_t X86BaseRelativePatcher::ReserveSpace( - uint32_t offset, - const CompiledMethod* compiled_method ATTRIBUTE_UNUSED, - MethodReference method_ref ATTRIBUTE_UNUSED) { - return offset; // No space reserved; no limit on relative call distance. -} - -uint32_t X86BaseRelativePatcher::ReserveSpaceEnd(uint32_t offset) { - return offset; // No space reserved; no limit on relative call distance. -} - -uint32_t X86BaseRelativePatcher::WriteThunks(OutputStream* out ATTRIBUTE_UNUSED, uint32_t offset) { - return offset; // No thunks added; no limit on relative call distance. -} - -std::vector<debug::MethodDebugInfo> X86BaseRelativePatcher::GenerateThunkDebugInfo( - uint32_t executable_offset ATTRIBUTE_UNUSED) { - return std::vector<debug::MethodDebugInfo>(); // No thunks added. -} - -void X86BaseRelativePatcher::PatchCall(std::vector<uint8_t>* code, - uint32_t literal_offset, - uint32_t patch_offset, - uint32_t target_offset) { - DCHECK_LE(literal_offset + 4u, code->size()); - // Unsigned arithmetic with its well-defined overflow behavior is just fine here. - uint32_t displacement = target_offset - patch_offset; - displacement -= kPcDisplacement; // The base PC is at the end of the 4-byte patch. - - typedef __attribute__((__aligned__(1))) int32_t unaligned_int32_t; - reinterpret_cast<unaligned_int32_t*>(&(*code)[literal_offset])[0] = displacement; -} - -} // namespace linker -} // namespace art diff --git a/compiler/linker/x86/relative_patcher_x86_base.h b/compiler/linker/x86/relative_patcher_x86_base.h deleted file mode 100644 index 6097345657..0000000000 --- a/compiler/linker/x86/relative_patcher_x86_base.h +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright (C) 2015 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ART_COMPILER_LINKER_X86_RELATIVE_PATCHER_X86_BASE_H_ -#define ART_COMPILER_LINKER_X86_RELATIVE_PATCHER_X86_BASE_H_ - -#include "linker/relative_patcher.h" - -namespace art { -namespace linker { - -class X86BaseRelativePatcher : public RelativePatcher { - public: - uint32_t ReserveSpace(uint32_t offset, - const CompiledMethod* compiled_method, - MethodReference method_ref) OVERRIDE; - uint32_t ReserveSpaceEnd(uint32_t offset) OVERRIDE; - uint32_t WriteThunks(OutputStream* out, uint32_t offset) OVERRIDE; - void PatchCall(std::vector<uint8_t>* code, - uint32_t literal_offset, - uint32_t patch_offset, - uint32_t target_offset) OVERRIDE; - std::vector<debug::MethodDebugInfo> GenerateThunkDebugInfo(uint32_t executable_offset) OVERRIDE; - - protected: - X86BaseRelativePatcher() { } - - // PC displacement from patch location; the base address of x86/x86-64 relative - // calls and x86-64 RIP-relative addressing is the PC of the next instruction and - // the patch location is 4 bytes earlier. - static constexpr int32_t kPcDisplacement = 4; - - private: - DISALLOW_COPY_AND_ASSIGN(X86BaseRelativePatcher); -}; - -} // namespace linker -} // namespace art - -#endif // ART_COMPILER_LINKER_X86_RELATIVE_PATCHER_X86_BASE_H_ diff --git a/compiler/linker/x86/relative_patcher_x86_test.cc b/compiler/linker/x86/relative_patcher_x86_test.cc deleted file mode 100644 index b855dec91d..0000000000 --- a/compiler/linker/x86/relative_patcher_x86_test.cc +++ /dev/null @@ -1,177 +0,0 @@ -/* - * Copyright (C) 2015 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "linker/x86/relative_patcher_x86.h" - -#include "linker/relative_patcher_test.h" - -namespace art { -namespace linker { - -class X86RelativePatcherTest : public RelativePatcherTest { - public: - X86RelativePatcherTest() : RelativePatcherTest(InstructionSet::kX86, "default") { } - - protected: - static const uint8_t kCallRawCode[]; - static const ArrayRef<const uint8_t> kCallCode; - - uint32_t GetMethodOffset(uint32_t method_idx) { - auto result = method_offset_map_.FindMethodOffset(MethodRef(method_idx)); - CHECK(result.first); - return result.second; - } -}; - -const uint8_t X86RelativePatcherTest::kCallRawCode[] = { - 0xe8, 0x00, 0x01, 0x00, 0x00 -}; - -const ArrayRef<const uint8_t> X86RelativePatcherTest::kCallCode(kCallRawCode); - -TEST_F(X86RelativePatcherTest, CallSelf) { - LinkerPatch patches[] = { - LinkerPatch::RelativeCodePatch(kCallCode.size() - 4u, nullptr, 1u), - }; - AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(patches)); - Link(); - - static const uint8_t expected_code[] = { - 0xe8, 0xfb, 0xff, 0xff, 0xff - }; - EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); -} - -TEST_F(X86RelativePatcherTest, CallOther) { - LinkerPatch method1_patches[] = { - LinkerPatch::RelativeCodePatch(kCallCode.size() - 4u, nullptr, 2u), - }; - AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(method1_patches)); - LinkerPatch method2_patches[] = { - LinkerPatch::RelativeCodePatch(kCallCode.size() - 4u, nullptr, 1u), - }; - AddCompiledMethod(MethodRef(2u), kCallCode, ArrayRef<const LinkerPatch>(method2_patches)); - Link(); - - uint32_t method1_offset = GetMethodOffset(1u); - uint32_t method2_offset = GetMethodOffset(2u); - uint32_t diff_after = method2_offset - (method1_offset + kCallCode.size() /* PC adjustment */); - static const uint8_t method1_expected_code[] = { - 0xe8, - static_cast<uint8_t>(diff_after), - static_cast<uint8_t>(diff_after >> 8), - static_cast<uint8_t>(diff_after >> 16), - static_cast<uint8_t>(diff_after >> 24) - }; - EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(method1_expected_code))); - uint32_t diff_before = method1_offset - (method2_offset + kCallCode.size() /* PC adjustment */); - static const uint8_t method2_expected_code[] = { - 0xe8, - static_cast<uint8_t>(diff_before), - static_cast<uint8_t>(diff_before >> 8), - static_cast<uint8_t>(diff_before >> 16), - static_cast<uint8_t>(diff_before >> 24) - }; - EXPECT_TRUE(CheckLinkedMethod(MethodRef(2u), ArrayRef<const uint8_t>(method2_expected_code))); -} - -TEST_F(X86RelativePatcherTest, CallTrampoline) { - LinkerPatch patches[] = { - LinkerPatch::RelativeCodePatch(kCallCode.size() - 4u, nullptr, 2u), - }; - AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(patches)); - Link(); - - auto result = method_offset_map_.FindMethodOffset(MethodRef(1)); - ASSERT_TRUE(result.first); - uint32_t diff = kTrampolineOffset - (result.second + kCallCode.size()); - static const uint8_t expected_code[] = { - 0xe8, - static_cast<uint8_t>(diff), - static_cast<uint8_t>(diff >> 8), - static_cast<uint8_t>(diff >> 16), - static_cast<uint8_t>(diff >> 24) - }; - EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); -} - -TEST_F(X86RelativePatcherTest, StringBssEntry) { - bss_begin_ = 0x12345678; - constexpr size_t kStringEntryOffset = 0x1234; - constexpr uint32_t kStringIndex = 1u; - string_index_to_offset_map_.Put(kStringIndex, kStringEntryOffset); - static const uint8_t raw_code[] = { - 0xe8, 0x00, 0x00, 0x00, 0x00, // call +0 - 0x5b, // pop ebx - 0x8b, 0x83, 0x00, 0x01, 0x00, 0x00, // mov eax, [ebx + 256 (kDummy32BitValue)] - }; - constexpr uint32_t anchor_offset = 5u; // After call +0. - ArrayRef<const uint8_t> code(raw_code); - LinkerPatch patches[] = { - LinkerPatch::StringBssEntryPatch(code.size() - 4u, nullptr, anchor_offset, kStringIndex), - }; - AddCompiledMethod(MethodRef(1u), code, ArrayRef<const LinkerPatch>(patches)); - Link(); - - auto result = method_offset_map_.FindMethodOffset(MethodRef(1u)); - ASSERT_TRUE(result.first); - uint32_t diff = bss_begin_ + kStringEntryOffset - (result.second + anchor_offset); - static const uint8_t expected_code[] = { - 0xe8, 0x00, 0x00, 0x00, 0x00, // call +0 - 0x5b, // pop ebx - 0x8b, 0x83, // mov eax, [ebx + diff] - static_cast<uint8_t>(diff), - static_cast<uint8_t>(diff >> 8), - static_cast<uint8_t>(diff >> 16), - static_cast<uint8_t>(diff >> 24) - }; - EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); -} - -TEST_F(X86RelativePatcherTest, StringReference) { - constexpr uint32_t kStringIndex = 1u; - constexpr uint32_t kStringOffset = 0x12345678; - string_index_to_offset_map_.Put(kStringIndex, kStringOffset); - static const uint8_t raw_code[] = { - 0xe8, 0x00, 0x00, 0x00, 0x00, // call +0 - 0x5b, // pop ebx - 0x8d, 0x83, 0x00, 0x01, 0x00, 0x00, // lea eax, [ebx + 256 (kDummy32BitValue)] - }; - constexpr uint32_t anchor_offset = 5u; // After call +0. - ArrayRef<const uint8_t> code(raw_code); - LinkerPatch patches[] = { - LinkerPatch::RelativeStringPatch(code.size() - 4u, nullptr, anchor_offset, kStringIndex), - }; - AddCompiledMethod(MethodRef(1u), code, ArrayRef<const LinkerPatch>(patches)); - Link(); - - auto result = method_offset_map_.FindMethodOffset(MethodRef(1u)); - ASSERT_TRUE(result.first); - uint32_t diff = kStringOffset - (result.second + anchor_offset); - static const uint8_t expected_code[] = { - 0xe8, 0x00, 0x00, 0x00, 0x00, // call +0 - 0x5b, // pop ebx - 0x8d, 0x83, // lea eax, [ebx + diff] - static_cast<uint8_t>(diff), - static_cast<uint8_t>(diff >> 8), - static_cast<uint8_t>(diff >> 16), - static_cast<uint8_t>(diff >> 24) - }; - EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); -} - -} // namespace linker -} // namespace art diff --git a/compiler/linker/x86_64/relative_patcher_x86_64.cc b/compiler/linker/x86_64/relative_patcher_x86_64.cc deleted file mode 100644 index 9633564999..0000000000 --- a/compiler/linker/x86_64/relative_patcher_x86_64.cc +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Copyright (C) 2015 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "linker/x86_64/relative_patcher_x86_64.h" - -#include "compiled_method.h" -#include "linker/linker_patch.h" - -namespace art { -namespace linker { - -void X86_64RelativePatcher::PatchPcRelativeReference(std::vector<uint8_t>* code, - const LinkerPatch& patch, - uint32_t patch_offset, - uint32_t target_offset) { - DCHECK_LE(patch.LiteralOffset() + 4u, code->size()); - // Unsigned arithmetic with its well-defined overflow behavior is just fine here. - uint32_t displacement = target_offset - patch_offset; - displacement -= kPcDisplacement; // The base PC is at the end of the 4-byte patch. - - typedef __attribute__((__aligned__(1))) int32_t unaligned_int32_t; - reinterpret_cast<unaligned_int32_t*>(&(*code)[patch.LiteralOffset()])[0] = displacement; -} - -void X86_64RelativePatcher::PatchBakerReadBarrierBranch(std::vector<uint8_t>* code ATTRIBUTE_UNUSED, - const LinkerPatch& patch ATTRIBUTE_UNUSED, - uint32_t patch_offset ATTRIBUTE_UNUSED) { - LOG(FATAL) << "UNIMPLEMENTED"; -} - -} // namespace linker -} // namespace art diff --git a/compiler/linker/x86_64/relative_patcher_x86_64.h b/compiler/linker/x86_64/relative_patcher_x86_64.h deleted file mode 100644 index 4f3ec498cb..0000000000 --- a/compiler/linker/x86_64/relative_patcher_x86_64.h +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Copyright (C) 2015 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ART_COMPILER_LINKER_X86_64_RELATIVE_PATCHER_X86_64_H_ -#define ART_COMPILER_LINKER_X86_64_RELATIVE_PATCHER_X86_64_H_ - -#include "linker/x86/relative_patcher_x86_base.h" - -namespace art { -namespace linker { - -class X86_64RelativePatcher FINAL : public X86BaseRelativePatcher { - public: - X86_64RelativePatcher() { } - - void PatchPcRelativeReference(std::vector<uint8_t>* code, - const LinkerPatch& patch, - uint32_t patch_offset, - uint32_t target_offset) OVERRIDE; - void PatchBakerReadBarrierBranch(std::vector<uint8_t>* code, - const LinkerPatch& patch, - uint32_t patch_offset) OVERRIDE; -}; - -} // namespace linker -} // namespace art - -#endif // ART_COMPILER_LINKER_X86_64_RELATIVE_PATCHER_X86_64_H_ diff --git a/compiler/linker/x86_64/relative_patcher_x86_64_test.cc b/compiler/linker/x86_64/relative_patcher_x86_64_test.cc deleted file mode 100644 index 6baa92de36..0000000000 --- a/compiler/linker/x86_64/relative_patcher_x86_64_test.cc +++ /dev/null @@ -1,180 +0,0 @@ -/* - * Copyright (C) 2015 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "linker/x86_64/relative_patcher_x86_64.h" - -#include "linker/relative_patcher_test.h" - -namespace art { -namespace linker { - -class X86_64RelativePatcherTest : public RelativePatcherTest { - public: - X86_64RelativePatcherTest() : RelativePatcherTest(InstructionSet::kX86_64, "default") { } - - protected: - static const uint8_t kCallRawCode[]; - static const ArrayRef<const uint8_t> kCallCode; - static const uint8_t kDexCacheLoadRawCode[]; - static const ArrayRef<const uint8_t> kDexCacheLoadCode; - static const uint8_t kStringReferenceRawCode[]; - static const ArrayRef<const uint8_t> kStringReferenceCode; - - uint32_t GetMethodOffset(uint32_t method_idx) { - auto result = method_offset_map_.FindMethodOffset(MethodRef(method_idx)); - CHECK(result.first); - return result.second; - } -}; - -const uint8_t X86_64RelativePatcherTest::kCallRawCode[] = { - 0xe8, 0x00, 0x01, 0x00, 0x00 -}; - -const ArrayRef<const uint8_t> X86_64RelativePatcherTest::kCallCode(kCallRawCode); - -const uint8_t X86_64RelativePatcherTest::kDexCacheLoadRawCode[] = { - 0x8b, 0x05, // mov eax, [rip + <offset>] - 0x00, 0x01, 0x00, 0x00 -}; - -const ArrayRef<const uint8_t> X86_64RelativePatcherTest::kDexCacheLoadCode( - kDexCacheLoadRawCode); - -const uint8_t X86_64RelativePatcherTest::kStringReferenceRawCode[] = { - 0x8d, 0x05, // lea eax, [rip + <offset>] - 0x00, 0x01, 0x00, 0x00 -}; - -const ArrayRef<const uint8_t> X86_64RelativePatcherTest::kStringReferenceCode( - kStringReferenceRawCode); - -TEST_F(X86_64RelativePatcherTest, CallSelf) { - LinkerPatch patches[] = { - LinkerPatch::RelativeCodePatch(kCallCode.size() - 4u, nullptr, 1u), - }; - AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(patches)); - Link(); - - static const uint8_t expected_code[] = { - 0xe8, 0xfb, 0xff, 0xff, 0xff - }; - EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); -} - -TEST_F(X86_64RelativePatcherTest, CallOther) { - LinkerPatch method1_patches[] = { - LinkerPatch::RelativeCodePatch(kCallCode.size() - 4u, nullptr, 2u), - }; - AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(method1_patches)); - LinkerPatch method2_patches[] = { - LinkerPatch::RelativeCodePatch(kCallCode.size() - 4u, nullptr, 1u), - }; - AddCompiledMethod(MethodRef(2u), kCallCode, ArrayRef<const LinkerPatch>(method2_patches)); - Link(); - - uint32_t method1_offset = GetMethodOffset(1u); - uint32_t method2_offset = GetMethodOffset(2u); - uint32_t diff_after = method2_offset - (method1_offset + kCallCode.size() /* PC adjustment */); - static const uint8_t method1_expected_code[] = { - 0xe8, - static_cast<uint8_t>(diff_after), - static_cast<uint8_t>(diff_after >> 8), - static_cast<uint8_t>(diff_after >> 16), - static_cast<uint8_t>(diff_after >> 24) - }; - EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(method1_expected_code))); - uint32_t diff_before = method1_offset - (method2_offset + kCallCode.size() /* PC adjustment */); - static const uint8_t method2_expected_code[] = { - 0xe8, - static_cast<uint8_t>(diff_before), - static_cast<uint8_t>(diff_before >> 8), - static_cast<uint8_t>(diff_before >> 16), - static_cast<uint8_t>(diff_before >> 24) - }; - EXPECT_TRUE(CheckLinkedMethod(MethodRef(2u), ArrayRef<const uint8_t>(method2_expected_code))); -} - -TEST_F(X86_64RelativePatcherTest, CallTrampoline) { - LinkerPatch patches[] = { - LinkerPatch::RelativeCodePatch(kCallCode.size() - 4u, nullptr, 2u), - }; - AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(patches)); - Link(); - - auto result = method_offset_map_.FindMethodOffset(MethodRef(1u)); - ASSERT_TRUE(result.first); - uint32_t diff = kTrampolineOffset - (result.second + kCallCode.size()); - static const uint8_t expected_code[] = { - 0xe8, - static_cast<uint8_t>(diff), - static_cast<uint8_t>(diff >> 8), - static_cast<uint8_t>(diff >> 16), - static_cast<uint8_t>(diff >> 24) - }; - EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); -} - -TEST_F(X86_64RelativePatcherTest, StringBssEntry) { - bss_begin_ = 0x12345678; - constexpr size_t kStringEntryOffset = 0x1234; - constexpr uint32_t kStringIndex = 1u; - string_index_to_offset_map_.Put(kStringIndex, kStringEntryOffset); - LinkerPatch patches[] = { - LinkerPatch::StringBssEntryPatch(kDexCacheLoadCode.size() - 4u, nullptr, 0u, kStringIndex), - }; - AddCompiledMethod(MethodRef(1u), kDexCacheLoadCode, ArrayRef<const LinkerPatch>(patches)); - Link(); - - auto result = method_offset_map_.FindMethodOffset(MethodRef(1u)); - ASSERT_TRUE(result.first); - uint32_t diff = bss_begin_ + kStringEntryOffset - (result.second + kDexCacheLoadCode.size()); - static const uint8_t expected_code[] = { - 0x8b, 0x05, - static_cast<uint8_t>(diff), - static_cast<uint8_t>(diff >> 8), - static_cast<uint8_t>(diff >> 16), - static_cast<uint8_t>(diff >> 24) - }; - EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); -} - -TEST_F(X86_64RelativePatcherTest, StringReference) { - constexpr uint32_t kStringIndex = 1u; - constexpr uint32_t kStringOffset = 0x12345678; - string_index_to_offset_map_.Put(kStringIndex, kStringOffset); - LinkerPatch patches[] = { - LinkerPatch::RelativeStringPatch( - kStringReferenceCode.size() - 4u, nullptr, 0u, kStringIndex), - }; - AddCompiledMethod(MethodRef(1u), kStringReferenceCode, ArrayRef<const LinkerPatch>(patches)); - Link(); - - auto result = method_offset_map_.FindMethodOffset(MethodRef(1u)); - ASSERT_TRUE(result.first); - uint32_t diff = kStringOffset - (result.second + kStringReferenceCode.size()); - static const uint8_t expected_code[] = { - 0x8d, 0x05, - static_cast<uint8_t>(diff), - static_cast<uint8_t>(diff >> 8), - static_cast<uint8_t>(diff >> 16), - static_cast<uint8_t>(diff >> 24) - }; - EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); -} - -} // namespace linker -} // namespace art diff --git a/compiler/optimizing/block_builder.cc b/compiler/optimizing/block_builder.cc index d9df23fd47..a5f78cafe0 100644 --- a/compiler/optimizing/block_builder.cc +++ b/compiler/optimizing/block_builder.cc @@ -68,7 +68,7 @@ bool HBasicBlockBuilder::CreateBranchTargets() { // places where the program might fall through into/out of the a block and // where TryBoundary instructions will be inserted later. Other edges which // enter/exit the try blocks are a result of branches/switches. - for (const DexFile::TryItem& try_item : code_item_accessor_.TryItems()) { + for (const dex::TryItem& try_item : code_item_accessor_.TryItems()) { uint32_t dex_pc_start = try_item.start_addr_; uint32_t dex_pc_end = dex_pc_start + try_item.insn_count_; MaybeCreateBlockAt(dex_pc_start); @@ -222,9 +222,9 @@ void HBasicBlockBuilder::ConnectBasicBlocks() { } // Returns the TryItem stored for `block` or nullptr if there is no info for it. -static const DexFile::TryItem* GetTryItem( +static const dex::TryItem* GetTryItem( HBasicBlock* block, - const ScopedArenaSafeMap<uint32_t, const DexFile::TryItem*>& try_block_info) { + const ScopedArenaSafeMap<uint32_t, const dex::TryItem*>& try_block_info) { auto iterator = try_block_info.find(block->GetBlockId()); return (iterator == try_block_info.end()) ? nullptr : iterator->second; } @@ -235,7 +235,7 @@ static const DexFile::TryItem* GetTryItem( // for a handler. static void LinkToCatchBlocks(HTryBoundary* try_boundary, const CodeItemDataAccessor& accessor, - const DexFile::TryItem* try_item, + const dex::TryItem* try_item, const ScopedArenaSafeMap<uint32_t, HBasicBlock*>& catch_blocks) { for (CatchHandlerIterator it(accessor.GetCatchHandlerData(try_item->handler_off_)); it.HasNext(); @@ -279,7 +279,7 @@ void HBasicBlockBuilder::InsertTryBoundaryBlocks() { // Keep a map of all try blocks and their respective TryItems. We do not use // the block's pointer but rather its id to ensure deterministic iteration. - ScopedArenaSafeMap<uint32_t, const DexFile::TryItem*> try_block_info( + ScopedArenaSafeMap<uint32_t, const dex::TryItem*> try_block_info( std::less<uint32_t>(), local_allocator_->Adapter(kArenaAllocGraphBuilder)); // Obtain TryItem information for blocks with throwing instructions, and split @@ -295,7 +295,7 @@ void HBasicBlockBuilder::InsertTryBoundaryBlocks() { // loop for synchronized blocks. if (ContainsElement(throwing_blocks_, block)) { // Try to find a TryItem covering the block. - const DexFile::TryItem* try_item = code_item_accessor_.FindTryItem(block->GetDexPc()); + const dex::TryItem* try_item = code_item_accessor_.FindTryItem(block->GetDexPc()); if (try_item != nullptr) { // Block throwing and in a TryItem. Store the try block information. try_block_info.Put(block->GetBlockId(), try_item); @@ -315,8 +315,16 @@ void HBasicBlockBuilder::InsertTryBoundaryBlocks() { CatchHandlerIterator iterator(handlers_ptr); for (; iterator.HasNext(); iterator.Next()) { uint32_t address = iterator.GetHandlerAddress(); - if (catch_blocks.find(address) != catch_blocks.end()) { + auto existing = catch_blocks.find(address); + if (existing != catch_blocks.end()) { // Catch block already processed. + TryCatchInformation* info = existing->second->GetTryCatchInformation(); + if (iterator.GetHandlerTypeIndex() != info->GetCatchTypeIndex()) { + // The handler is for multiple types. We could record all the types, but + // doing class resolution here isn't ideal, and it's unclear whether wasting + // the space in TryCatchInformation is worth it. + info->SetInvalidTypeIndex(); + } continue; } @@ -337,7 +345,7 @@ void HBasicBlockBuilder::InsertTryBoundaryBlocks() { catch_blocks.Put(address, catch_block); catch_block->SetTryCatchInformation( - new (allocator_) TryCatchInformation(iterator.GetHandlerTypeIndex(), *dex_file_)); + new (allocator_) TryCatchInformation(iterator.GetHandlerTypeIndex(), *dex_file_)); } handlers_ptr = iterator.EndDataPointer(); } @@ -348,7 +356,7 @@ void HBasicBlockBuilder::InsertTryBoundaryBlocks() { // that all predecessors are relinked to. This preserves loop headers (b/23895756). for (const auto& entry : try_block_info) { uint32_t block_id = entry.first; - const DexFile::TryItem* try_item = entry.second; + const dex::TryItem* try_item = entry.second; HBasicBlock* try_block = graph_->GetBlocks()[block_id]; for (HBasicBlock* predecessor : try_block->GetPredecessors()) { if (GetTryItem(predecessor, try_block_info) != try_item) { @@ -367,7 +375,7 @@ void HBasicBlockBuilder::InsertTryBoundaryBlocks() { // the successor is not in the same TryItem. for (const auto& entry : try_block_info) { uint32_t block_id = entry.first; - const DexFile::TryItem* try_item = entry.second; + const dex::TryItem* try_item = entry.second; HBasicBlock* try_block = graph_->GetBlocks()[block_id]; // NOTE: Do not use iterators because SplitEdge would invalidate them. for (size_t i = 0, e = try_block->GetSuccessors().size(); i < e; ++i) { @@ -415,7 +423,7 @@ void HBasicBlockBuilder::BuildIntrinsic() { // Create blocks. HBasicBlock* entry_block = new (allocator_) HBasicBlock(graph_, kNoDexPc); HBasicBlock* exit_block = new (allocator_) HBasicBlock(graph_, kNoDexPc); - HBasicBlock* body = MaybeCreateBlockAt(/* semantic_dex_pc */ kNoDexPc, /* store_dex_pc */ 0u); + HBasicBlock* body = MaybeCreateBlockAt(/* semantic_dex_pc= */ kNoDexPc, /* store_dex_pc= */ 0u); // Add blocks to the graph. graph_->AddBlock(entry_block); diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc index d893cc88c4..e35d50220e 100644 --- a/compiler/optimizing/bounds_check_elimination.cc +++ b/compiler/optimizing/bounds_check_elimination.cc @@ -388,10 +388,10 @@ class MonotonicValueRange : public ValueRange { return induction_variable_->GetBlock(); } - MonotonicValueRange* AsMonotonicValueRange() OVERRIDE { return this; } + MonotonicValueRange* AsMonotonicValueRange() override { return this; } // If it's certain that this value range fits in other_range. - bool FitsIn(ValueRange* other_range) const OVERRIDE { + bool FitsIn(ValueRange* other_range) const override { if (other_range == nullptr) { return true; } @@ -402,7 +402,7 @@ class MonotonicValueRange : public ValueRange { // Try to narrow this MonotonicValueRange given another range. // Ideally it will return a normal ValueRange. But due to // possible overflow/underflow, that may not be possible. - ValueRange* Narrow(ValueRange* range) OVERRIDE { + ValueRange* Narrow(ValueRange* range) override { if (range == nullptr) { return this; } @@ -530,7 +530,7 @@ class BCEVisitor : public HGraphVisitor { induction_range_(induction_analysis), next_(nullptr) {} - void VisitBasicBlock(HBasicBlock* block) OVERRIDE { + void VisitBasicBlock(HBasicBlock* block) override { DCHECK(!IsAddedBlock(block)); first_index_bounds_check_map_.clear(); // Visit phis and instructions using a safe iterator. The iteration protects @@ -820,7 +820,7 @@ class BCEVisitor : public HGraphVisitor { } } - void VisitBoundsCheck(HBoundsCheck* bounds_check) OVERRIDE { + void VisitBoundsCheck(HBoundsCheck* bounds_check) override { HBasicBlock* block = bounds_check->GetBlock(); HInstruction* index = bounds_check->InputAt(0); HInstruction* array_length = bounds_check->InputAt(1); @@ -845,8 +845,10 @@ class BCEVisitor : public HGraphVisitor { // make one more attempt to get a constant in the array range. ValueRange* existing_range = LookupValueRange(array_length, block); if (existing_range != nullptr && - existing_range->IsConstantValueRange()) { - ValueRange constant_array_range(&allocator_, lower, existing_range->GetLower()); + existing_range->IsConstantValueRange() && + existing_range->GetLower().GetConstant() > 0) { + ValueBound constant_upper(nullptr, existing_range->GetLower().GetConstant() - 1); + ValueRange constant_array_range(&allocator_, lower, constant_upper); if (index_range->FitsIn(&constant_array_range)) { ReplaceInstruction(bounds_check, index); return; @@ -945,7 +947,7 @@ class BCEVisitor : public HGraphVisitor { return true; } - void VisitPhi(HPhi* phi) OVERRIDE { + void VisitPhi(HPhi* phi) override { if (phi->IsLoopHeaderPhi() && (phi->GetType() == DataType::Type::kInt32) && HasSameInputAtBackEdges(phi)) { @@ -992,14 +994,14 @@ class BCEVisitor : public HGraphVisitor { } } - void VisitIf(HIf* instruction) OVERRIDE { + void VisitIf(HIf* instruction) override { if (instruction->InputAt(0)->IsCondition()) { HCondition* cond = instruction->InputAt(0)->AsCondition(); HandleIf(instruction, cond->GetLeft(), cond->GetRight(), cond->GetCondition()); } } - void VisitAdd(HAdd* add) OVERRIDE { + void VisitAdd(HAdd* add) override { HInstruction* right = add->GetRight(); if (right->IsIntConstant()) { ValueRange* left_range = LookupValueRange(add->GetLeft(), add->GetBlock()); @@ -1013,7 +1015,7 @@ class BCEVisitor : public HGraphVisitor { } } - void VisitSub(HSub* sub) OVERRIDE { + void VisitSub(HSub* sub) override { HInstruction* left = sub->GetLeft(); HInstruction* right = sub->GetRight(); if (right->IsIntConstant()) { @@ -1115,19 +1117,19 @@ class BCEVisitor : public HGraphVisitor { } } - void VisitDiv(HDiv* div) OVERRIDE { + void VisitDiv(HDiv* div) override { FindAndHandlePartialArrayLength(div); } - void VisitShr(HShr* shr) OVERRIDE { + void VisitShr(HShr* shr) override { FindAndHandlePartialArrayLength(shr); } - void VisitUShr(HUShr* ushr) OVERRIDE { + void VisitUShr(HUShr* ushr) override { FindAndHandlePartialArrayLength(ushr); } - void VisitAnd(HAnd* instruction) OVERRIDE { + void VisitAnd(HAnd* instruction) override { if (instruction->GetRight()->IsIntConstant()) { int32_t constant = instruction->GetRight()->AsIntConstant()->GetValue(); if (constant > 0) { @@ -1142,7 +1144,7 @@ class BCEVisitor : public HGraphVisitor { } } - void VisitRem(HRem* instruction) OVERRIDE { + void VisitRem(HRem* instruction) override { HInstruction* left = instruction->GetLeft(); HInstruction* right = instruction->GetRight(); @@ -1202,7 +1204,7 @@ class BCEVisitor : public HGraphVisitor { } } - void VisitNewArray(HNewArray* new_array) OVERRIDE { + void VisitNewArray(HNewArray* new_array) override { HInstruction* len = new_array->GetLength(); if (!len->IsIntConstant()) { HInstruction *left; @@ -1240,7 +1242,7 @@ class BCEVisitor : public HGraphVisitor { * has occurred (see AddCompareWithDeoptimization()), since in those cases it would be * unsafe to hoist array references across their deoptimization instruction inside a loop. */ - void VisitArrayGet(HArrayGet* array_get) OVERRIDE { + void VisitArrayGet(HArrayGet* array_get) override { if (!has_dom_based_dynamic_bce_ && array_get->IsInLoop()) { HLoopInformation* loop = array_get->GetBlock()->GetLoopInformation(); if (loop->IsDefinedOutOfTheLoop(array_get->InputAt(0)) && @@ -1634,7 +1636,7 @@ class BCEVisitor : public HGraphVisitor { HBasicBlock* block = GetPreHeader(loop, check); HInstruction* cond = new (GetGraph()->GetAllocator()) HEqual(array, GetGraph()->GetNullConstant()); - InsertDeoptInLoop(loop, block, cond, /* is_null_check */ true); + InsertDeoptInLoop(loop, block, cond, /* is_null_check= */ true); ReplaceInstruction(check, array); return true; } @@ -1938,9 +1940,9 @@ class BCEVisitor : public HGraphVisitor { DISALLOW_COPY_AND_ASSIGN(BCEVisitor); }; -void BoundsCheckElimination::Run() { +bool BoundsCheckElimination::Run() { if (!graph_->HasBoundsChecks()) { - return; + return false; } // Reverse post order guarantees a node's dominators are visited first. @@ -1968,6 +1970,8 @@ void BoundsCheckElimination::Run() { // Perform cleanup. visitor.Finish(); + + return true; } } // namespace art diff --git a/compiler/optimizing/bounds_check_elimination.h b/compiler/optimizing/bounds_check_elimination.h index 79c67a8c7a..ef08877daa 100644 --- a/compiler/optimizing/bounds_check_elimination.h +++ b/compiler/optimizing/bounds_check_elimination.h @@ -34,7 +34,7 @@ class BoundsCheckElimination : public HOptimization { side_effects_(side_effects), induction_analysis_(induction_analysis) {} - void Run() OVERRIDE; + bool Run() override; static constexpr const char* kBoundsCheckEliminationPassName = "BCE"; diff --git a/compiler/optimizing/bounds_check_elimination_test.cc b/compiler/optimizing/bounds_check_elimination_test.cc index 1523478613..5927d681b2 100644 --- a/compiler/optimizing/bounds_check_elimination_test.cc +++ b/compiler/optimizing/bounds_check_elimination_test.cc @@ -43,7 +43,7 @@ class BoundsCheckEliminationTest : public OptimizingUnitTest { void RunBCE() { graph_->BuildDominatorTree(); - InstructionSimplifier(graph_, /* codegen */ nullptr, /* driver */ nullptr).Run(); + InstructionSimplifier(graph_, /* codegen= */ nullptr).Run(); SideEffectsAnalysis side_effects(graph_); side_effects.Run(); @@ -598,9 +598,10 @@ static HInstruction* BuildSSAGraph3(HGraph* graph, entry->AddSuccessor(block); // We pass a bogus constant for the class to avoid mocking one. HInstruction* new_array = new (allocator) HNewArray( - constant_10, - constant_10, - 0); + /* cls= */ constant_10, + /* length= */ constant_10, + /* dex_pc= */ 0, + /* component_size_shift= */ 0); block->AddInstruction(new_array); block->AddInstruction(new (allocator) HGoto()); @@ -977,7 +978,11 @@ TEST_F(BoundsCheckEliminationTest, ModArrayBoundsElimination) { graph_->AddBlock(block); entry->AddSuccessor(block); // We pass a bogus constant for the class to avoid mocking one. - HInstruction* new_array = new (GetAllocator()) HNewArray(constant_10, constant_10, 0); + HInstruction* new_array = new (GetAllocator()) HNewArray( + /* cls= */ constant_10, + /* length= */ constant_10, + /* dex_pc= */ 0, + /* component_size_shift= */ 0); block->AddInstruction(new_array); block->AddInstruction(new (GetAllocator()) HGoto()); diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc index a1a5692ef6..64aa1b9358 100644 --- a/compiler/optimizing/builder.cc +++ b/compiler/optimizing/builder.cc @@ -21,6 +21,7 @@ #include "base/bit_vector-inl.h" #include "base/logging.h" #include "block_builder.h" +#include "code_generator.h" #include "data_type-inl.h" #include "dex/verified_method.h" #include "driver/compiler_options.h" @@ -40,7 +41,6 @@ HGraphBuilder::HGraphBuilder(HGraph* graph, const CodeItemDebugInfoAccessor& accessor, const DexCompilationUnit* dex_compilation_unit, const DexCompilationUnit* outer_compilation_unit, - CompilerDriver* driver, CodeGenerator* code_generator, OptimizingCompilerStats* compiler_stats, ArrayRef<const uint8_t> interpreter_metadata, @@ -50,7 +50,6 @@ HGraphBuilder::HGraphBuilder(HGraph* graph, code_item_accessor_(accessor), dex_compilation_unit_(dex_compilation_unit), outer_compilation_unit_(outer_compilation_unit), - compiler_driver_(driver), code_generator_(code_generator), compilation_stats_(compiler_stats), interpreter_metadata_(interpreter_metadata), @@ -67,19 +66,18 @@ HGraphBuilder::HGraphBuilder(HGraph* graph, code_item_accessor_(accessor), dex_compilation_unit_(dex_compilation_unit), outer_compilation_unit_(nullptr), - compiler_driver_(nullptr), code_generator_(nullptr), compilation_stats_(nullptr), handles_(handles), return_type_(return_type) {} bool HGraphBuilder::SkipCompilation(size_t number_of_branches) { - if (compiler_driver_ == nullptr) { - // Note that the compiler driver is null when unit testing. + if (code_generator_ == nullptr) { + // Note that the codegen is null when unit testing. return false; } - const CompilerOptions& compiler_options = compiler_driver_->GetCompilerOptions(); + const CompilerOptions& compiler_options = code_generator_->GetCompilerOptions(); CompilerFilter::Filter compiler_filter = compiler_options.GetCompilerFilter(); if (compiler_filter == CompilerFilter::kEverything) { return false; @@ -131,7 +129,6 @@ GraphAnalysisResult HGraphBuilder::BuildGraph() { return_type_, dex_compilation_unit_, outer_compilation_unit_, - compiler_driver_, code_generator_, interpreter_metadata_, compilation_stats_, @@ -203,7 +200,6 @@ void HGraphBuilder::BuildIntrinsicGraph(ArtMethod* method) { return_type_, dex_compilation_unit_, outer_compilation_unit_, - compiler_driver_, code_generator_, interpreter_metadata_, compilation_stats_, diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h index 5a1914ce08..6152740324 100644 --- a/compiler/optimizing/builder.h +++ b/compiler/optimizing/builder.h @@ -22,7 +22,6 @@ #include "dex/code_item_accessors.h" #include "dex/dex_file-inl.h" #include "dex/dex_file.h" -#include "driver/compiler_driver.h" #include "nodes.h" namespace art { @@ -38,7 +37,6 @@ class HGraphBuilder : public ValueObject { const CodeItemDebugInfoAccessor& accessor, const DexCompilationUnit* dex_compilation_unit, const DexCompilationUnit* outer_compilation_unit, - CompilerDriver* driver, CodeGenerator* code_generator, OptimizingCompilerStats* compiler_stats, ArrayRef<const uint8_t> interpreter_metadata, @@ -70,7 +68,6 @@ class HGraphBuilder : public ValueObject { // The compilation unit of the enclosing method being compiled. const DexCompilationUnit* const outer_compilation_unit_; - CompilerDriver* const compiler_driver_; CodeGenerator* const code_generator_; OptimizingCompilerStats* const compilation_stats_; diff --git a/compiler/optimizing/cha_guard_optimization.cc b/compiler/optimizing/cha_guard_optimization.cc index 3addaeecd9..c6232ef661 100644 --- a/compiler/optimizing/cha_guard_optimization.cc +++ b/compiler/optimizing/cha_guard_optimization.cc @@ -44,9 +44,9 @@ class CHAGuardVisitor : HGraphVisitor { GetGraph()->SetNumberOfCHAGuards(0); } - void VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) OVERRIDE; + void VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) override; - void VisitBasicBlock(HBasicBlock* block) OVERRIDE; + void VisitBasicBlock(HBasicBlock* block) override; private: void RemoveGuard(HShouldDeoptimizeFlag* flag); @@ -241,14 +241,15 @@ void CHAGuardVisitor::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) { GetGraph()->IncrementNumberOfCHAGuards(); } -void CHAGuardOptimization::Run() { +bool CHAGuardOptimization::Run() { if (graph_->GetNumberOfCHAGuards() == 0) { - return; + return false; } CHAGuardVisitor visitor(graph_); for (HBasicBlock* block : graph_->GetReversePostOrder()) { visitor.VisitBasicBlock(block); } + return true; } } // namespace art diff --git a/compiler/optimizing/cha_guard_optimization.h b/compiler/optimizing/cha_guard_optimization.h index f14e07bd6c..440d51a969 100644 --- a/compiler/optimizing/cha_guard_optimization.h +++ b/compiler/optimizing/cha_guard_optimization.h @@ -30,7 +30,7 @@ class CHAGuardOptimization : public HOptimization { const char* name = kCHAGuardOptimizationPassName) : HOptimization(graph, name) {} - void Run() OVERRIDE; + bool Run() override; static constexpr const char* kCHAGuardOptimizationPassName = "cha_guard_optimization"; diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index 6abda9b302..2bbb570c8d 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -49,8 +49,9 @@ #include "dex/bytecode_utils.h" #include "dex/code_item_accessors-inl.h" #include "dex/verified_method.h" -#include "driver/compiler_driver.h" #include "graph_visualizer.h" +#include "image.h" +#include "gc/space/image_space.h" #include "intern_table.h" #include "intrinsics.h" #include "mirror/array-inl.h" @@ -61,15 +62,13 @@ #include "parallel_move_resolver.h" #include "scoped_thread_state_change-inl.h" #include "ssa_liveness_analysis.h" +#include "stack_map.h" #include "stack_map_stream.h" #include "thread-current-inl.h" #include "utils/assembler.h" namespace art { -// If true, we record the static and direct invokes in the invoke infos. -static constexpr bool kEnableDexLayoutOptimizations = false; - // Return whether a location is consistent with a type. static bool CheckType(DataType::Type type, Location location) { if (location.IsFpuRegister() @@ -197,7 +196,7 @@ class CodeGenerator::CodeGenerationData : public DeletableArenaObject<kArenaAllo return GetNumberOfJitStringRoots() + GetNumberOfJitClassRoots(); } - void EmitJitRoots(Handle<mirror::ObjectArray<mirror::Object>> roots) + void EmitJitRoots(/*out*/std::vector<Handle<mirror::Object>>* roots) REQUIRES_SHARED(Locks::mutator_lock_); private: @@ -230,29 +229,31 @@ class CodeGenerator::CodeGenerationData : public DeletableArenaObject<kArenaAllo }; void CodeGenerator::CodeGenerationData::EmitJitRoots( - Handle<mirror::ObjectArray<mirror::Object>> roots) { - DCHECK_EQ(static_cast<size_t>(roots->GetLength()), GetNumberOfJitRoots()); + /*out*/std::vector<Handle<mirror::Object>>* roots) { + DCHECK(roots->empty()); + roots->reserve(GetNumberOfJitRoots()); ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); size_t index = 0; for (auto& entry : jit_string_roots_) { // Update the `roots` with the string, and replace the address temporarily // stored to the index in the table. uint64_t address = entry.second; - roots->Set(index, reinterpret_cast<StackReference<mirror::String>*>(address)->AsMirrorPtr()); - DCHECK(roots->Get(index) != nullptr); + roots->emplace_back(reinterpret_cast<StackReference<mirror::Object>*>(address)); + DCHECK(roots->back() != nullptr); + DCHECK(roots->back()->IsString()); entry.second = index; // Ensure the string is strongly interned. This is a requirement on how the JIT // handles strings. b/32995596 - class_linker->GetInternTable()->InternStrong( - reinterpret_cast<mirror::String*>(roots->Get(index))); + class_linker->GetInternTable()->InternStrong(roots->back()->AsString()); ++index; } for (auto& entry : jit_class_roots_) { // Update the `roots` with the class, and replace the address temporarily // stored to the index in the table. uint64_t address = entry.second; - roots->Set(index, reinterpret_cast<StackReference<mirror::Class>*>(address)->AsMirrorPtr()); - DCHECK(roots->Get(index) != nullptr); + roots->emplace_back(reinterpret_cast<StackReference<mirror::Object>*>(address)); + DCHECK(roots->back() != nullptr); + DCHECK(roots->back()->IsClass()); entry.second = index; ++index; } @@ -390,6 +391,11 @@ void CodeGenerator::Compile(CodeAllocator* allocator) { HGraphVisitor* instruction_visitor = GetInstructionVisitor(); DCHECK_EQ(current_block_index_, 0u); + GetStackMapStream()->BeginMethod(HasEmptyFrame() ? 0 : frame_size_, + core_spill_mask_, + fpu_spill_mask_, + GetGraph()->GetNumberOfVRegs()); + size_t frame_start = GetAssembler()->CodeSize(); GenerateFrameEntry(); DCHECK_EQ(GetAssembler()->cfi().GetCurrentCFAOffset(), static_cast<int>(frame_size_)); @@ -407,7 +413,7 @@ void CodeGenerator::Compile(CodeAllocator* allocator) { // This ensures that we have correct native line mapping for all native instructions. // It is necessary to make stepping over a statement work. Otherwise, any initial // instructions (e.g. moves) would be assumed to be the start of next statement. - MaybeRecordNativeDebugInfo(nullptr /* instruction */, block->GetDexPc()); + MaybeRecordNativeDebugInfo(/* instruction= */ nullptr, block->GetDexPc()); for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { HInstruction* current = it.Current(); if (current->HasEnvironment()) { @@ -432,6 +438,8 @@ void CodeGenerator::Compile(CodeAllocator* allocator) { // Finalize instructions in assember; Finalize(allocator); + + GetStackMapStream()->EndMethod(); } void CodeGenerator::Finalize(CodeAllocator* allocator) { @@ -447,6 +455,18 @@ void CodeGenerator::EmitLinkerPatches( // No linker patches by default. } +bool CodeGenerator::NeedsThunkCode(const linker::LinkerPatch& patch ATTRIBUTE_UNUSED) const { + // Code generators that create patches requiring thunk compilation should override this function. + return false; +} + +void CodeGenerator::EmitThunkCode(const linker::LinkerPatch& patch ATTRIBUTE_UNUSED, + /*out*/ ArenaVector<uint8_t>* code ATTRIBUTE_UNUSED, + /*out*/ std::string* debug_name ATTRIBUTE_UNUSED) { + // Code generators that create patches requiring thunk compilation should override this function. + LOG(FATAL) << "Unexpected call to EmitThunkCode()."; +} + void CodeGenerator::InitializeCodeGeneration(size_t number_of_spill_slots, size_t maximum_safepoint_spill_size, size_t number_of_out_slots, @@ -501,7 +521,7 @@ void CodeGenerator::CreateCommonInvokeLocationSummary( locations->AddTemp(visitor->GetMethodLocation()); break; } - } else { + } else if (!invoke->IsInvokePolymorphic()) { locations->AddTemp(visitor->GetMethodLocation()); } } @@ -529,6 +549,7 @@ void CodeGenerator::GenerateInvokeStaticOrDirectRuntimeCall( case kVirtual: case kInterface: case kPolymorphic: + case kCustom: LOG(FATAL) << "Unexpected invoke type: " << invoke->GetInvokeType(); UNREACHABLE(); } @@ -557,6 +578,7 @@ void CodeGenerator::GenerateInvokeUnresolvedRuntimeCall(HInvokeUnresolved* invok entrypoint = kQuickInvokeInterfaceTrampolineWithAccessCheck; break; case kPolymorphic: + case kCustom: LOG(FATAL) << "Unexpected invoke type: " << invoke->GetInvokeType(); UNREACHABLE(); } @@ -564,11 +586,19 @@ void CodeGenerator::GenerateInvokeUnresolvedRuntimeCall(HInvokeUnresolved* invok } void CodeGenerator::GenerateInvokePolymorphicCall(HInvokePolymorphic* invoke) { - MoveConstant(invoke->GetLocations()->GetTemp(0), static_cast<int32_t>(invoke->GetType())); + // invoke-polymorphic does not use a temporary to convey any additional information (e.g. a + // method index) since it requires multiple info from the instruction (registers A, B, H). Not + // using the reservation has no effect on the registers used in the runtime call. QuickEntrypointEnum entrypoint = kQuickInvokePolymorphic; InvokeRuntime(entrypoint, invoke, invoke->GetDexPc(), nullptr); } +void CodeGenerator::GenerateInvokeCustomCall(HInvokeCustom* invoke) { + MoveConstant(invoke->GetLocations()->GetTemp(0), invoke->GetCallSiteIndex()); + QuickEntrypointEnum entrypoint = kQuickInvokeCustom; + InvokeRuntime(entrypoint, invoke, invoke->GetDexPc(), nullptr); +} + void CodeGenerator::CreateUnresolvedFieldLocationSummary( HInstruction* field_access, DataType::Type field_type, @@ -708,20 +738,99 @@ void CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(HLoadClass* cls, void CodeGenerator::GenerateLoadClassRuntimeCall(HLoadClass* cls) { DCHECK_EQ(cls->GetLoadKind(), HLoadClass::LoadKind::kRuntimeCall); + DCHECK(!cls->MustGenerateClinitCheck()); LocationSummary* locations = cls->GetLocations(); MoveConstant(locations->GetTemp(0), cls->GetTypeIndex().index_); if (cls->NeedsAccessCheck()) { - CheckEntrypointTypes<kQuickInitializeTypeAndVerifyAccess, void*, uint32_t>(); - InvokeRuntime(kQuickInitializeTypeAndVerifyAccess, cls, cls->GetDexPc()); - } else if (cls->MustGenerateClinitCheck()) { - CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>(); - InvokeRuntime(kQuickInitializeStaticStorage, cls, cls->GetDexPc()); + CheckEntrypointTypes<kQuickResolveTypeAndVerifyAccess, void*, uint32_t>(); + InvokeRuntime(kQuickResolveTypeAndVerifyAccess, cls, cls->GetDexPc()); } else { - CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>(); - InvokeRuntime(kQuickInitializeType, cls, cls->GetDexPc()); + CheckEntrypointTypes<kQuickResolveType, void*, uint32_t>(); + InvokeRuntime(kQuickResolveType, cls, cls->GetDexPc()); } } +void CodeGenerator::CreateLoadMethodHandleRuntimeCallLocationSummary( + HLoadMethodHandle* method_handle, + Location runtime_proto_index_location, + Location runtime_return_location) { + DCHECK_EQ(method_handle->InputCount(), 1u); + LocationSummary* locations = + new (method_handle->GetBlock()->GetGraph()->GetAllocator()) LocationSummary( + method_handle, LocationSummary::kCallOnMainOnly); + locations->SetInAt(0, Location::NoLocation()); + locations->AddTemp(runtime_proto_index_location); + locations->SetOut(runtime_return_location); +} + +void CodeGenerator::GenerateLoadMethodHandleRuntimeCall(HLoadMethodHandle* method_handle) { + LocationSummary* locations = method_handle->GetLocations(); + MoveConstant(locations->GetTemp(0), method_handle->GetMethodHandleIndex()); + CheckEntrypointTypes<kQuickResolveMethodHandle, void*, uint32_t>(); + InvokeRuntime(kQuickResolveMethodHandle, method_handle, method_handle->GetDexPc()); +} + +void CodeGenerator::CreateLoadMethodTypeRuntimeCallLocationSummary( + HLoadMethodType* method_type, + Location runtime_proto_index_location, + Location runtime_return_location) { + DCHECK_EQ(method_type->InputCount(), 1u); + LocationSummary* locations = + new (method_type->GetBlock()->GetGraph()->GetAllocator()) LocationSummary( + method_type, LocationSummary::kCallOnMainOnly); + locations->SetInAt(0, Location::NoLocation()); + locations->AddTemp(runtime_proto_index_location); + locations->SetOut(runtime_return_location); +} + +void CodeGenerator::GenerateLoadMethodTypeRuntimeCall(HLoadMethodType* method_type) { + LocationSummary* locations = method_type->GetLocations(); + MoveConstant(locations->GetTemp(0), method_type->GetProtoIndex().index_); + CheckEntrypointTypes<kQuickResolveMethodType, void*, uint32_t>(); + InvokeRuntime(kQuickResolveMethodType, method_type, method_type->GetDexPc()); +} + +static uint32_t GetBootImageOffsetImpl(const void* object, ImageHeader::ImageSections section) { + Runtime* runtime = Runtime::Current(); + DCHECK(runtime->IsAotCompiler()); + const std::vector<gc::space::ImageSpace*>& boot_image_spaces = + runtime->GetHeap()->GetBootImageSpaces(); + // Check that the `object` is in the expected section of one of the boot image files. + DCHECK(std::any_of(boot_image_spaces.begin(), + boot_image_spaces.end(), + [object, section](gc::space::ImageSpace* space) { + uintptr_t begin = reinterpret_cast<uintptr_t>(space->Begin()); + uintptr_t offset = reinterpret_cast<uintptr_t>(object) - begin; + return space->GetImageHeader().GetImageSection(section).Contains(offset); + })); + uintptr_t begin = reinterpret_cast<uintptr_t>(boot_image_spaces.front()->Begin()); + uintptr_t offset = reinterpret_cast<uintptr_t>(object) - begin; + return dchecked_integral_cast<uint32_t>(offset); +} + +// NO_THREAD_SAFETY_ANALYSIS: Avoid taking the mutator lock, boot image classes are non-moveable. +uint32_t CodeGenerator::GetBootImageOffset(HLoadClass* load_class) NO_THREAD_SAFETY_ANALYSIS { + DCHECK_EQ(load_class->GetLoadKind(), HLoadClass::LoadKind::kBootImageRelRo); + ObjPtr<mirror::Class> klass = load_class->GetClass().Get(); + DCHECK(klass != nullptr); + return GetBootImageOffsetImpl(klass.Ptr(), ImageHeader::kSectionObjects); +} + +// NO_THREAD_SAFETY_ANALYSIS: Avoid taking the mutator lock, boot image strings are non-moveable. +uint32_t CodeGenerator::GetBootImageOffset(HLoadString* load_string) NO_THREAD_SAFETY_ANALYSIS { + DCHECK_EQ(load_string->GetLoadKind(), HLoadString::LoadKind::kBootImageRelRo); + ObjPtr<mirror::String> string = load_string->GetString().Get(); + DCHECK(string != nullptr); + return GetBootImageOffsetImpl(string.Ptr(), ImageHeader::kSectionObjects); +} + +uint32_t CodeGenerator::GetBootImageOffset(HInvokeStaticOrDirect* invoke) { + DCHECK_EQ(invoke->GetMethodLoadKind(), HInvokeStaticOrDirect::MethodLoadKind::kBootImageRelRo); + ArtMethod* method = invoke->GetResolvedMethod(); + DCHECK(method != nullptr); + return GetBootImageOffsetImpl(method, ImageHeader::kSectionArtMethods); +} + void CodeGenerator::BlockIfInRegister(Location location, bool is_out) const { // The DCHECKS below check that a register is not specified twice in // the summary. The out location can overlap with an input, so we need @@ -771,53 +880,45 @@ void CodeGenerator::AllocateLocations(HInstruction* instruction) { } std::unique_ptr<CodeGenerator> CodeGenerator::Create(HGraph* graph, - InstructionSet instruction_set, - const InstructionSetFeatures& isa_features, const CompilerOptions& compiler_options, OptimizingCompilerStats* stats) { ArenaAllocator* allocator = graph->GetAllocator(); - switch (instruction_set) { + switch (compiler_options.GetInstructionSet()) { #ifdef ART_ENABLE_CODEGEN_arm case InstructionSet::kArm: case InstructionSet::kThumb2: { return std::unique_ptr<CodeGenerator>( - new (allocator) arm::CodeGeneratorARMVIXL( - graph, *isa_features.AsArmInstructionSetFeatures(), compiler_options, stats)); + new (allocator) arm::CodeGeneratorARMVIXL(graph, compiler_options, stats)); } #endif #ifdef ART_ENABLE_CODEGEN_arm64 case InstructionSet::kArm64: { return std::unique_ptr<CodeGenerator>( - new (allocator) arm64::CodeGeneratorARM64( - graph, *isa_features.AsArm64InstructionSetFeatures(), compiler_options, stats)); + new (allocator) arm64::CodeGeneratorARM64(graph, compiler_options, stats)); } #endif #ifdef ART_ENABLE_CODEGEN_mips case InstructionSet::kMips: { return std::unique_ptr<CodeGenerator>( - new (allocator) mips::CodeGeneratorMIPS( - graph, *isa_features.AsMipsInstructionSetFeatures(), compiler_options, stats)); + new (allocator) mips::CodeGeneratorMIPS(graph, compiler_options, stats)); } #endif #ifdef ART_ENABLE_CODEGEN_mips64 case InstructionSet::kMips64: { return std::unique_ptr<CodeGenerator>( - new (allocator) mips64::CodeGeneratorMIPS64( - graph, *isa_features.AsMips64InstructionSetFeatures(), compiler_options, stats)); + new (allocator) mips64::CodeGeneratorMIPS64(graph, compiler_options, stats)); } #endif #ifdef ART_ENABLE_CODEGEN_x86 case InstructionSet::kX86: { return std::unique_ptr<CodeGenerator>( - new (allocator) x86::CodeGeneratorX86( - graph, *isa_features.AsX86InstructionSetFeatures(), compiler_options, stats)); + new (allocator) x86::CodeGeneratorX86(graph, compiler_options, stats)); } #endif #ifdef ART_ENABLE_CODEGEN_x86_64 case InstructionSet::kX86_64: { return std::unique_ptr<CodeGenerator>( - new (allocator) x86_64::CodeGeneratorX86_64( - graph, *isa_features.AsX86_64InstructionSetFeatures(), compiler_options, stats)); + new (allocator) x86_64::CodeGeneratorX86_64(graph, compiler_options, stats)); } #endif default: @@ -861,15 +962,6 @@ CodeGenerator::CodeGenerator(HGraph* graph, CodeGenerator::~CodeGenerator() {} -void CodeGenerator::ComputeStackMapAndMethodInfoSize(size_t* stack_map_size, - size_t* method_info_size) { - DCHECK(stack_map_size != nullptr); - DCHECK(method_info_size != nullptr); - StackMapStream* stack_map_stream = GetStackMapStream(); - *stack_map_size = stack_map_stream->PrepareForFillIn(); - *method_info_size = stack_map_stream->ComputeMethodInfoSize(); -} - size_t CodeGenerator::GetNumberOfJitRoots() const { DCHECK(code_generation_data_ != nullptr); return code_generation_data_->GetNumberOfJitRoots(); @@ -880,11 +972,10 @@ static void CheckCovers(uint32_t dex_pc, const CodeInfo& code_info, const ArenaVector<HSuspendCheck*>& loop_headers, ArenaVector<size_t>* covered) { - CodeInfoEncoding encoding = code_info.ExtractEncoding(); for (size_t i = 0; i < loop_headers.size(); ++i) { if (loop_headers[i]->GetDexPc() == dex_pc) { if (graph.IsCompilingOsr()) { - DCHECK(code_info.GetOsrStackMapForDexPc(dex_pc, encoding).IsValid()); + DCHECK(code_info.GetOsrStackMapForDexPc(dex_pc).IsValid()); } ++(*covered)[i]; } @@ -895,7 +986,7 @@ static void CheckCovers(uint32_t dex_pc, // dex branch instructions. static void CheckLoopEntriesCanBeUsedForOsr(const HGraph& graph, const CodeInfo& code_info, - const DexFile::CodeItem& code_item) { + const dex::CodeItem& code_item) { if (graph.HasTryCatch()) { // One can write loops through try/catch, which we do not support for OSR anyway. return; @@ -937,20 +1028,18 @@ static void CheckLoopEntriesCanBeUsedForOsr(const HGraph& graph, } } -void CodeGenerator::BuildStackMaps(MemoryRegion stack_map_region, - MemoryRegion method_info_region, - const DexFile::CodeItem* code_item_for_osr_check) { - StackMapStream* stack_map_stream = GetStackMapStream(); - stack_map_stream->FillInCodeInfo(stack_map_region); - stack_map_stream->FillInMethodInfo(method_info_region); - if (kIsDebugBuild && code_item_for_osr_check != nullptr) { - CheckLoopEntriesCanBeUsedForOsr(*graph_, CodeInfo(stack_map_region), *code_item_for_osr_check); +ScopedArenaVector<uint8_t> CodeGenerator::BuildStackMaps(const dex::CodeItem* code_item) { + ScopedArenaVector<uint8_t> stack_map = GetStackMapStream()->Encode(); + if (kIsDebugBuild && code_item != nullptr) { + CheckLoopEntriesCanBeUsedForOsr(*graph_, CodeInfo(stack_map.data()), *code_item); } + return stack_map; } void CodeGenerator::RecordPcInfo(HInstruction* instruction, uint32_t dex_pc, - SlowPathCode* slow_path) { + SlowPathCode* slow_path, + bool native_debug_info) { if (instruction != nullptr) { // The code generated for some type conversions // may call the runtime, thus normally requiring a subsequent @@ -981,7 +1070,7 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction, if (instruction == nullptr) { // For stack overflow checks and native-debug-info entries without dex register // mapping (i.e. start of basic block or start of slow path). - stack_map_stream->BeginStackMapEntry(dex_pc, native_pc, 0, 0, 0, 0); + stack_map_stream->BeginStackMapEntry(dex_pc, native_pc); stack_map_stream->EndStackMapEntry(); return; } @@ -995,7 +1084,7 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction, // call). Therefore register_mask contains both callee-save and caller-save // registers that hold objects. We must remove the spilled caller-save from the // mask, since they will be overwritten by the callee. - uint32_t spills = GetSlowPathSpills(locations, /* core_registers */ true); + uint32_t spills = GetSlowPathSpills(locations, /* core_registers= */ true); register_mask &= ~spills; } else { // The register mask must be a subset of callee-save registers. @@ -1015,37 +1104,28 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction, outer_dex_pc = outer_environment->GetDexPc(); outer_environment_size = outer_environment->Size(); } + + HLoopInformation* info = instruction->GetBlock()->GetLoopInformation(); + bool osr = + instruction->IsSuspendCheck() && + (info != nullptr) && + graph_->IsCompilingOsr() && + (inlining_depth == 0); + StackMap::Kind kind = native_debug_info + ? StackMap::Kind::Debug + : (osr ? StackMap::Kind::OSR : StackMap::Kind::Default); stack_map_stream->BeginStackMapEntry(outer_dex_pc, native_pc, register_mask, locations->GetStackMask(), - outer_environment_size, - inlining_depth); + kind); EmitEnvironment(environment, slow_path); - // Record invoke info, the common case for the trampoline is super and static invokes. Only - // record these to reduce oat file size. - if (kEnableDexLayoutOptimizations) { - if (instruction->IsInvokeStaticOrDirect()) { - HInvoke* const invoke = instruction->AsInvokeStaticOrDirect(); - DCHECK(environment != nullptr); - stack_map_stream->AddInvoke(invoke->GetInvokeType(), invoke->GetDexMethodIndex()); - } - } stack_map_stream->EndStackMapEntry(); - HLoopInformation* info = instruction->GetBlock()->GetLoopInformation(); - if (instruction->IsSuspendCheck() && - (info != nullptr) && - graph_->IsCompilingOsr() && - (inlining_depth == 0)) { + if (osr) { DCHECK_EQ(info->GetSuspendCheck(), instruction); - // We duplicate the stack map as a marker that this stack map can be an OSR entry. - // Duplicating it avoids having the runtime recognize and skip an OSR stack map. DCHECK(info->IsIrreducible()); - stack_map_stream->BeginStackMapEntry( - dex_pc, native_pc, register_mask, locations->GetStackMask(), outer_environment_size, 0); - EmitEnvironment(instruction->GetEnvironment(), slow_path); - stack_map_stream->EndStackMapEntry(); + DCHECK(environment != nullptr); if (kIsDebugBuild) { for (size_t i = 0, environment_size = environment->Size(); i < environment_size; ++i) { HInstruction* in_environment = environment->GetInstructionAt(i); @@ -1062,14 +1142,6 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction, } } } - } else if (kIsDebugBuild) { - // Ensure stack maps are unique, by checking that the native pc in the stack map - // last emitted is different than the native pc of the stack map just emitted. - size_t number_of_stack_maps = stack_map_stream->GetNumberOfStackMaps(); - if (number_of_stack_maps > 1) { - DCHECK_NE(stack_map_stream->GetStackMap(number_of_stack_maps - 1).native_pc_code_offset, - stack_map_stream->GetStackMap(number_of_stack_maps - 2).native_pc_code_offset); - } } } @@ -1080,8 +1152,7 @@ bool CodeGenerator::HasStackMapAtCurrentPc() { if (count == 0) { return false; } - CodeOffset native_pc_offset = stack_map_stream->GetStackMap(count - 1).native_pc_code_offset; - return (native_pc_offset.Uint32Value(GetInstructionSet()) == pc); + return stack_map_stream->GetStackMapNativePcOffset(count - 1) == pc; } void CodeGenerator::MaybeRecordNativeDebugInfo(HInstruction* instruction, @@ -1092,12 +1163,11 @@ void CodeGenerator::MaybeRecordNativeDebugInfo(HInstruction* instruction, // Ensure that we do not collide with the stack map of the previous instruction. GenerateNop(); } - RecordPcInfo(instruction, dex_pc, slow_path); + RecordPcInfo(instruction, dex_pc, slow_path, /* native_debug_info= */ true); } } void CodeGenerator::RecordCatchBlockInfo() { - ArenaAllocator* allocator = graph_->GetAllocator(); StackMapStream* stack_map_stream = GetStackMapStream(); for (HBasicBlock* block : *block_order_) { @@ -1107,30 +1177,23 @@ void CodeGenerator::RecordCatchBlockInfo() { uint32_t dex_pc = block->GetDexPc(); uint32_t num_vregs = graph_->GetNumberOfVRegs(); - uint32_t inlining_depth = 0; // Inlining of catch blocks is not supported at the moment. uint32_t native_pc = GetAddressOf(block); - uint32_t register_mask = 0; // Not used. - - // The stack mask is not used, so we leave it empty. - ArenaBitVector* stack_mask = - ArenaBitVector::Create(allocator, 0, /* expandable */ true, kArenaAllocCodeGenerator); stack_map_stream->BeginStackMapEntry(dex_pc, native_pc, - register_mask, - stack_mask, - num_vregs, - inlining_depth); + /* register_mask= */ 0, + /* sp_mask= */ nullptr, + StackMap::Kind::Catch); HInstruction* current_phi = block->GetFirstPhi(); for (size_t vreg = 0; vreg < num_vregs; ++vreg) { - while (current_phi != nullptr && current_phi->AsPhi()->GetRegNumber() < vreg) { - HInstruction* next_phi = current_phi->GetNext(); - DCHECK(next_phi == nullptr || - current_phi->AsPhi()->GetRegNumber() <= next_phi->AsPhi()->GetRegNumber()) - << "Phis need to be sorted by vreg number to keep this a linear-time loop."; - current_phi = next_phi; - } + while (current_phi != nullptr && current_phi->AsPhi()->GetRegNumber() < vreg) { + HInstruction* next_phi = current_phi->GetNext(); + DCHECK(next_phi == nullptr || + current_phi->AsPhi()->GetRegNumber() <= next_phi->AsPhi()->GetRegNumber()) + << "Phis need to be sorted by vreg number to keep this a linear-time loop."; + current_phi = next_phi; + } if (current_phi == nullptr || current_phi->AsPhi()->GetRegNumber() != vreg) { stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kNone, 0); @@ -1190,50 +1253,45 @@ void CodeGenerator::EmitEnvironment(HEnvironment* environment, SlowPathCode* slo continue; } + using Kind = DexRegisterLocation::Kind; Location location = environment->GetLocationAt(i); switch (location.GetKind()) { case Location::kConstant: { DCHECK_EQ(current, location.GetConstant()); if (current->IsLongConstant()) { int64_t value = current->AsLongConstant()->GetValue(); - stack_map_stream->AddDexRegisterEntry( - DexRegisterLocation::Kind::kConstant, Low32Bits(value)); - stack_map_stream->AddDexRegisterEntry( - DexRegisterLocation::Kind::kConstant, High32Bits(value)); + stack_map_stream->AddDexRegisterEntry(Kind::kConstant, Low32Bits(value)); + stack_map_stream->AddDexRegisterEntry(Kind::kConstant, High32Bits(value)); ++i; DCHECK_LT(i, environment_size); } else if (current->IsDoubleConstant()) { int64_t value = bit_cast<int64_t, double>(current->AsDoubleConstant()->GetValue()); - stack_map_stream->AddDexRegisterEntry( - DexRegisterLocation::Kind::kConstant, Low32Bits(value)); - stack_map_stream->AddDexRegisterEntry( - DexRegisterLocation::Kind::kConstant, High32Bits(value)); + stack_map_stream->AddDexRegisterEntry(Kind::kConstant, Low32Bits(value)); + stack_map_stream->AddDexRegisterEntry(Kind::kConstant, High32Bits(value)); ++i; DCHECK_LT(i, environment_size); } else if (current->IsIntConstant()) { int32_t value = current->AsIntConstant()->GetValue(); - stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kConstant, value); + stack_map_stream->AddDexRegisterEntry(Kind::kConstant, value); } else if (current->IsNullConstant()) { - stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kConstant, 0); + stack_map_stream->AddDexRegisterEntry(Kind::kConstant, 0); } else { DCHECK(current->IsFloatConstant()) << current->DebugName(); int32_t value = bit_cast<int32_t, float>(current->AsFloatConstant()->GetValue()); - stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kConstant, value); + stack_map_stream->AddDexRegisterEntry(Kind::kConstant, value); } break; } case Location::kStackSlot: { - stack_map_stream->AddDexRegisterEntry( - DexRegisterLocation::Kind::kInStack, location.GetStackIndex()); + stack_map_stream->AddDexRegisterEntry(Kind::kInStack, location.GetStackIndex()); break; } case Location::kDoubleStackSlot: { + stack_map_stream->AddDexRegisterEntry(Kind::kInStack, location.GetStackIndex()); stack_map_stream->AddDexRegisterEntry( - DexRegisterLocation::Kind::kInStack, location.GetStackIndex()); - stack_map_stream->AddDexRegisterEntry( - DexRegisterLocation::Kind::kInStack, location.GetHighStackIndex(kVRegSize)); + Kind::kInStack, location.GetHighStackIndex(kVRegSize)); ++i; DCHECK_LT(i, environment_size); break; @@ -1243,17 +1301,16 @@ void CodeGenerator::EmitEnvironment(HEnvironment* environment, SlowPathCode* slo int id = location.reg(); if (slow_path != nullptr && slow_path->IsCoreRegisterSaved(id)) { uint32_t offset = slow_path->GetStackOffsetOfCoreRegister(id); - stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kInStack, offset); + stack_map_stream->AddDexRegisterEntry(Kind::kInStack, offset); if (current->GetType() == DataType::Type::kInt64) { - stack_map_stream->AddDexRegisterEntry( - DexRegisterLocation::Kind::kInStack, offset + kVRegSize); + stack_map_stream->AddDexRegisterEntry(Kind::kInStack, offset + kVRegSize); ++i; DCHECK_LT(i, environment_size); } } else { - stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kInRegister, id); + stack_map_stream->AddDexRegisterEntry(Kind::kInRegister, id); if (current->GetType() == DataType::Type::kInt64) { - stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kInRegisterHigh, id); + stack_map_stream->AddDexRegisterEntry(Kind::kInRegisterHigh, id); ++i; DCHECK_LT(i, environment_size); } @@ -1265,18 +1322,16 @@ void CodeGenerator::EmitEnvironment(HEnvironment* environment, SlowPathCode* slo int id = location.reg(); if (slow_path != nullptr && slow_path->IsFpuRegisterSaved(id)) { uint32_t offset = slow_path->GetStackOffsetOfFpuRegister(id); - stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kInStack, offset); + stack_map_stream->AddDexRegisterEntry(Kind::kInStack, offset); if (current->GetType() == DataType::Type::kFloat64) { - stack_map_stream->AddDexRegisterEntry( - DexRegisterLocation::Kind::kInStack, offset + kVRegSize); + stack_map_stream->AddDexRegisterEntry(Kind::kInStack, offset + kVRegSize); ++i; DCHECK_LT(i, environment_size); } } else { - stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kInFpuRegister, id); + stack_map_stream->AddDexRegisterEntry(Kind::kInFpuRegister, id); if (current->GetType() == DataType::Type::kFloat64) { - stack_map_stream->AddDexRegisterEntry( - DexRegisterLocation::Kind::kInFpuRegisterHigh, id); + stack_map_stream->AddDexRegisterEntry(Kind::kInFpuRegisterHigh, id); ++i; DCHECK_LT(i, environment_size); } @@ -1289,16 +1344,16 @@ void CodeGenerator::EmitEnvironment(HEnvironment* environment, SlowPathCode* slo int high = location.high(); if (slow_path != nullptr && slow_path->IsFpuRegisterSaved(low)) { uint32_t offset = slow_path->GetStackOffsetOfFpuRegister(low); - stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kInStack, offset); + stack_map_stream->AddDexRegisterEntry(Kind::kInStack, offset); } else { - stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kInFpuRegister, low); + stack_map_stream->AddDexRegisterEntry(Kind::kInFpuRegister, low); } if (slow_path != nullptr && slow_path->IsFpuRegisterSaved(high)) { uint32_t offset = slow_path->GetStackOffsetOfFpuRegister(high); - stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kInStack, offset); + stack_map_stream->AddDexRegisterEntry(Kind::kInStack, offset); ++i; } else { - stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kInFpuRegister, high); + stack_map_stream->AddDexRegisterEntry(Kind::kInFpuRegister, high); ++i; } DCHECK_LT(i, environment_size); @@ -1310,15 +1365,15 @@ void CodeGenerator::EmitEnvironment(HEnvironment* environment, SlowPathCode* slo int high = location.high(); if (slow_path != nullptr && slow_path->IsCoreRegisterSaved(low)) { uint32_t offset = slow_path->GetStackOffsetOfCoreRegister(low); - stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kInStack, offset); + stack_map_stream->AddDexRegisterEntry(Kind::kInStack, offset); } else { - stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kInRegister, low); + stack_map_stream->AddDexRegisterEntry(Kind::kInRegister, low); } if (slow_path != nullptr && slow_path->IsCoreRegisterSaved(high)) { uint32_t offset = slow_path->GetStackOffsetOfCoreRegister(high); - stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kInStack, offset); + stack_map_stream->AddDexRegisterEntry(Kind::kInStack, offset); } else { - stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kInRegister, high); + stack_map_stream->AddDexRegisterEntry(Kind::kInRegister, high); } ++i; DCHECK_LT(i, environment_size); @@ -1326,7 +1381,7 @@ void CodeGenerator::EmitEnvironment(HEnvironment* environment, SlowPathCode* slo } case Location::kInvalid: { - stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kNone, 0); + stack_map_stream->AddDexRegisterEntry(Kind::kNone, 0); break; } @@ -1341,37 +1396,12 @@ void CodeGenerator::EmitEnvironment(HEnvironment* environment, SlowPathCode* slo } bool CodeGenerator::CanMoveNullCheckToUser(HNullCheck* null_check) { - HInstruction* first_next_not_move = null_check->GetNextDisregardingMoves(); - - return (first_next_not_move != nullptr) - && first_next_not_move->CanDoImplicitNullCheckOn(null_check->InputAt(0)); + return null_check->IsEmittedAtUseSite(); } void CodeGenerator::MaybeRecordImplicitNullCheck(HInstruction* instr) { - if (!compiler_options_.GetImplicitNullChecks()) { - return; - } - - // If we are from a static path don't record the pc as we can't throw NPE. - // NB: having the checks here makes the code much less verbose in the arch - // specific code generators. - if (instr->IsStaticFieldSet() || instr->IsStaticFieldGet()) { - return; - } - - if (!instr->CanDoImplicitNullCheckOn(instr->InputAt(0))) { - return; - } - - // Find the first previous instruction which is not a move. - HInstruction* first_prev_not_move = instr->GetPreviousDisregardingMoves(); - - // If the instruction is a null check it means that `instr` is the first user - // and needs to record the pc. - if (first_prev_not_move != nullptr && first_prev_not_move->IsNullCheck()) { - HNullCheck* null_check = first_prev_not_move->AsNullCheck(); - // TODO: The parallel moves modify the environment. Their changes need to be - // reverted otherwise the stack maps at the throw point will not be correct. + HNullCheck* null_check = instr->GetImplicitNullCheck(); + if (null_check != nullptr) { RecordPcInfo(null_check, null_check->GetDexPc()); } } @@ -1461,7 +1491,12 @@ void CodeGenerator::ValidateInvokeRuntime(QuickEntrypointEnum entrypoint, << " instruction->GetSideEffects().ToString()=" << instruction->GetSideEffects().ToString(); } else { - DCHECK(instruction->GetSideEffects().Includes(SideEffects::CanTriggerGC()) || + // 'CanTriggerGC' side effect is used to restrict optimization of instructions which depend + // on GC (e.g. IntermediateAddress) - to ensure they are not alive across GC points. However + // if execution never returns to the compiled code from a GC point this restriction is + // unnecessary - in particular for fatal slow paths which might trigger GC. + DCHECK((slow_path->IsFatal() && !instruction->GetLocations()->WillCall()) || + instruction->GetSideEffects().Includes(SideEffects::CanTriggerGC()) || // When (non-Baker) read barriers are enabled, some instructions // use a slow path to emit a read barrier, which does not trigger // GC. @@ -1519,7 +1554,7 @@ void CodeGenerator::ValidateInvokeRuntimeWithoutRecordingPcInfo(HInstruction* in void SlowPathCode::SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) { size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath(); - const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ true); + const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ true); for (uint32_t i : LowToHighBits(core_spills)) { // If the register holds an object, update the stack mask. if (locations->RegisterContainsObject(i)) { @@ -1531,7 +1566,7 @@ void SlowPathCode::SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* lo stack_offset += codegen->SaveCoreRegister(stack_offset, i); } - const uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ false); + const uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ false); for (uint32_t i : LowToHighBits(fp_spills)) { DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize()); DCHECK_LT(i, kMaximumNumberOfExpectedRegisters); @@ -1543,14 +1578,14 @@ void SlowPathCode::SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* lo void SlowPathCode::RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) { size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath(); - const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ true); + const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ true); for (uint32_t i : LowToHighBits(core_spills)) { DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize()); DCHECK_LT(i, kMaximumNumberOfExpectedRegisters); stack_offset += codegen->RestoreCoreRegister(stack_offset, i); } - const uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ false); + const uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ false); for (uint32_t i : LowToHighBits(fp_spills)) { DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize()); DCHECK_LT(i, kMaximumNumberOfExpectedRegisters); @@ -1612,28 +1647,21 @@ void CodeGenerator::CreateSystemArrayCopyLocationSummary(HInvoke* invoke) { } void CodeGenerator::EmitJitRoots(uint8_t* code, - Handle<mirror::ObjectArray<mirror::Object>> roots, - const uint8_t* roots_data) { + const uint8_t* roots_data, + /*out*/std::vector<Handle<mirror::Object>>* roots) { code_generation_data_->EmitJitRoots(roots); EmitJitRootPatches(code, roots_data); } -QuickEntrypointEnum CodeGenerator::GetArrayAllocationEntrypoint(Handle<mirror::Class> array_klass) { - ScopedObjectAccess soa(Thread::Current()); - if (array_klass == nullptr) { - // This can only happen for non-primitive arrays, as primitive arrays can always - // be resolved. - return kQuickAllocArrayResolved32; - } - - switch (array_klass->GetComponentSize()) { - case 1: return kQuickAllocArrayResolved8; - case 2: return kQuickAllocArrayResolved16; - case 4: return kQuickAllocArrayResolved32; - case 8: return kQuickAllocArrayResolved64; +QuickEntrypointEnum CodeGenerator::GetArrayAllocationEntrypoint(HNewArray* new_array) { + switch (new_array->GetComponentSizeShift()) { + case 0: return kQuickAllocArrayResolved8; + case 1: return kQuickAllocArrayResolved16; + case 2: return kQuickAllocArrayResolved32; + case 3: return kQuickAllocArrayResolved64; } LOG(FATAL) << "Unreachable"; - return kQuickAllocArrayResolved; + UNREACHABLE(); } } // namespace art diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index f784a1a857..f70ecb612d 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -21,20 +21,20 @@ #include "arch/instruction_set_features.h" #include "base/arena_containers.h" #include "base/arena_object.h" +#include "base/array_ref.h" #include "base/bit_field.h" #include "base/bit_utils.h" #include "base/enums.h" +#include "base/globals.h" +#include "base/memory_region.h" #include "dex/string_reference.h" #include "dex/type_reference.h" -#include "globals.h" #include "graph_visualizer.h" #include "locations.h" -#include "memory_region.h" #include "nodes.h" #include "optimizing_compiler_stats.h" #include "read_barrier_option.h" #include "stack.h" -#include "stack_map.h" #include "utils/label.h" namespace art { @@ -59,7 +59,6 @@ static constexpr ReadBarrierOption kCompilerReadBarrierOption = class Assembler; class CodeGenerator; -class CompilerDriver; class CompilerOptions; class StackMapStream; class ParallelMoveResolver; @@ -74,6 +73,7 @@ class CodeAllocator { virtual ~CodeAllocator() {} virtual uint8_t* Allocate(size_t size) = 0; + virtual ArrayRef<const uint8_t> GetMemory() const = 0; private: DISALLOW_COPY_AND_ASSIGN(CodeAllocator); @@ -187,8 +187,6 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { // Compiles the graph to executable instructions. void Compile(CodeAllocator* allocator); static std::unique_ptr<CodeGenerator> Create(HGraph* graph, - InstructionSet instruction_set, - const InstructionSetFeatures& isa_features, const CompilerOptions& compiler_options, OptimizingCompilerStats* stats = nullptr); virtual ~CodeGenerator(); @@ -210,6 +208,10 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { virtual void Initialize() = 0; virtual void Finalize(CodeAllocator* allocator); virtual void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches); + virtual bool NeedsThunkCode(const linker::LinkerPatch& patch) const; + virtual void EmitThunkCode(const linker::LinkerPatch& patch, + /*out*/ ArenaVector<uint8_t>* code, + /*out*/ std::string* debug_name); virtual void GenerateFrameEntry() = 0; virtual void GenerateFrameExit() = 0; virtual void Bind(HBasicBlock* block) = 0; @@ -318,7 +320,10 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { } // Record native to dex mapping for a suspend point. Required by runtime. - void RecordPcInfo(HInstruction* instruction, uint32_t dex_pc, SlowPathCode* slow_path = nullptr); + void RecordPcInfo(HInstruction* instruction, + uint32_t dex_pc, + SlowPathCode* slow_path = nullptr, + bool native_debug_info = false); // Check whether we have already recorded mapping at this PC. bool HasStackMapAtCurrentPc(); // Record extra stack maps if we support native debugging. @@ -344,17 +349,14 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { void AddSlowPath(SlowPathCode* slow_path); - void BuildStackMaps(MemoryRegion stack_map_region, - MemoryRegion method_info_region, - const DexFile::CodeItem* code_item_for_osr_check); - void ComputeStackMapAndMethodInfoSize(size_t* stack_map_size, size_t* method_info_size); + ScopedArenaVector<uint8_t> BuildStackMaps(const dex::CodeItem* code_item_for_osr_check); size_t GetNumberOfJitRoots() const; // Fills the `literals` array with literals collected during code generation. // Also emits literal patches. void EmitJitRoots(uint8_t* code, - Handle<mirror::ObjectArray<mirror::Object>> roots, - const uint8_t* roots_data) + const uint8_t* roots_data, + /*out*/std::vector<Handle<mirror::Object>>* roots) REQUIRES_SHARED(Locks::mutator_lock_); bool IsLeafMethod() const { @@ -438,6 +440,8 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { case TypeCheckKind::kArrayCheck: case TypeCheckKind::kUnresolvedCheck: return false; + case TypeCheckKind::kBitstringCheck: + return true; } LOG(FATAL) << "Unreachable"; UNREACHABLE(); @@ -535,10 +539,13 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { void GenerateInvokeStaticOrDirectRuntimeCall( HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path); + void GenerateInvokeUnresolvedRuntimeCall(HInvokeUnresolved* invoke); void GenerateInvokePolymorphicCall(HInvokePolymorphic* invoke); + void GenerateInvokeCustomCall(HInvokeCustom* invoke); + void CreateUnresolvedFieldLocationSummary( HInstruction* field_access, DataType::Type field_type, @@ -556,6 +563,20 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { Location runtime_return_location); void GenerateLoadClassRuntimeCall(HLoadClass* cls); + static void CreateLoadMethodHandleRuntimeCallLocationSummary(HLoadMethodHandle* method_handle, + Location runtime_handle_index_location, + Location runtime_return_location); + void GenerateLoadMethodHandleRuntimeCall(HLoadMethodHandle* method_handle); + + static void CreateLoadMethodTypeRuntimeCallLocationSummary(HLoadMethodType* method_type, + Location runtime_type_index_location, + Location runtime_return_location); + void GenerateLoadMethodTypeRuntimeCall(HLoadMethodType* method_type); + + uint32_t GetBootImageOffset(HLoadClass* load_class); + uint32_t GetBootImageOffset(HLoadString* load_string); + uint32_t GetBootImageOffset(HInvokeStaticOrDirect* invoke); + static void CreateSystemArrayCopyLocationSummary(HInvoke* invoke); void SetDisassemblyInformation(DisassemblyInformation* info) { disasm_info_ = info; } @@ -600,7 +621,7 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { // otherwise return a fall-back info that should be used instead. virtual HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch( const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, - HInvokeStaticOrDirect* invoke) = 0; + ArtMethod* method) = 0; // Generate a call to a static or direct method. virtual void GenerateStaticOrDirectCall( @@ -614,7 +635,7 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { virtual void GenerateNop() = 0; - static QuickEntrypointEnum GetArrayAllocationEntrypoint(Handle<mirror::Class> array_klass); + static QuickEntrypointEnum GetArrayAllocationEntrypoint(HNewArray* new_array); protected: // Patch info used for recording locations of required linker patches and their targets, diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 60f8f98757..3086882678 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -27,10 +27,10 @@ #include "entrypoints/quick/quick_entrypoints.h" #include "entrypoints/quick/quick_entrypoints_enum.h" #include "gc/accounting/card_table.h" +#include "gc/space/image_space.h" #include "heap_poisoning.h" #include "intrinsics.h" #include "intrinsics_arm64.h" -#include "linker/arm64/relative_patcher_arm64.h" #include "linker/linker_patch.h" #include "lock_word.h" #include "mirror/array-inl.h" @@ -64,12 +64,11 @@ using helpers::DRegisterFrom; using helpers::FPRegisterFrom; using helpers::HeapOperand; using helpers::HeapOperandFrom; -using helpers::InputCPURegisterAt; using helpers::InputCPURegisterOrZeroRegAt; using helpers::InputFPRegisterAt; using helpers::InputOperandAt; using helpers::InputRegisterAt; -using helpers::Int64ConstantFrom; +using helpers::Int64FromLocation; using helpers::IsConstantZeroBitPattern; using helpers::LocationFrom; using helpers::OperandFromMemOperand; @@ -90,25 +89,10 @@ static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7; // Reference load (except object array loads) is using LDR Wt, [Xn, #offset] which can handle // offset < 16KiB. For offsets >= 16KiB, the load shall be emitted as two or more instructions. -// For the Baker read barrier implementation using link-generated thunks we need to split +// For the Baker read barrier implementation using link-time generated thunks we need to split // the offset explicitly. constexpr uint32_t kReferenceLoadMinFarOffset = 16 * KB; -// Flags controlling the use of link-time generated thunks for Baker read barriers. -constexpr bool kBakerReadBarrierLinkTimeThunksEnableForFields = true; -constexpr bool kBakerReadBarrierLinkTimeThunksEnableForArrays = true; -constexpr bool kBakerReadBarrierLinkTimeThunksEnableForGcRoots = true; - -// Some instructions have special requirements for a temporary, for example -// LoadClass/kBssEntry and LoadString/kBssEntry for Baker read barrier require -// temp that's not an R0 (to avoid an extra move) and Baker read barrier field -// loads with large offsets need a fixed register to limit the number of link-time -// thunks we generate. For these and similar cases, we want to reserve a specific -// register that's neither callee-save nor an argument register. We choose x15. -inline Location FixedTempLocation() { - return Location::RegisterLocation(x15.GetCode()); -} - inline Condition ARM64Condition(IfCondition cond) { switch (cond) { case kCondEQ: return eq; @@ -165,6 +149,16 @@ Location InvokeRuntimeCallingConvention::GetReturnLocation(DataType::Type return return ARM64ReturnLocation(return_type); } +static RegisterSet OneRegInReferenceOutSaveEverythingCallerSaves() { + InvokeRuntimeCallingConvention calling_convention; + RegisterSet caller_saves = RegisterSet::Empty(); + caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode())); + DCHECK_EQ(calling_convention.GetRegisterAt(0).GetCode(), + RegisterFrom(calling_convention.GetReturnLocation(DataType::Type::kReference), + DataType::Type::kReference).GetCode()); + return caller_saves; +} + // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. #define __ down_cast<CodeGeneratorARM64*>(codegen)->GetVIXLAssembler()-> // NOLINT #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kArm64PointerSize, x).Int32Value() @@ -174,8 +168,8 @@ static void SaveRestoreLiveRegistersHelper(CodeGenerator* codegen, LocationSummary* locations, int64_t spill_offset, bool is_save) { - const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ true); - const uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ false); + const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ true); + const uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ false); DCHECK(ArtVixlRegCodeCoherentForRegSet(core_spills, codegen->GetNumberOfCoreRegisters(), fp_spills, @@ -218,7 +212,7 @@ static void SaveRestoreLiveRegistersHelper(CodeGenerator* codegen, void SlowPathCodeARM64::SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) { size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath(); - const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ true); + const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ true); for (uint32_t i : LowToHighBits(core_spills)) { // If the register holds an object, update the stack mask. if (locations->RegisterContainsObject(i)) { @@ -230,7 +224,7 @@ void SlowPathCodeARM64::SaveLiveRegisters(CodeGenerator* codegen, LocationSummar stack_offset += kXRegSizeInBytes; } - const uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ false); + const uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ false); for (uint32_t i : LowToHighBits(fp_spills)) { DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize()); DCHECK_LT(i, kMaximumNumberOfExpectedRegisters); @@ -240,20 +234,20 @@ void SlowPathCodeARM64::SaveLiveRegisters(CodeGenerator* codegen, LocationSummar SaveRestoreLiveRegistersHelper(codegen, locations, - codegen->GetFirstRegisterSlotInSlowPath(), true /* is_save */); + codegen->GetFirstRegisterSlotInSlowPath(), /* is_save= */ true); } void SlowPathCodeARM64::RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) { SaveRestoreLiveRegistersHelper(codegen, locations, - codegen->GetFirstRegisterSlotInSlowPath(), false /* is_save */); + codegen->GetFirstRegisterSlotInSlowPath(), /* is_save= */ false); } class BoundsCheckSlowPathARM64 : public SlowPathCodeARM64 { public: explicit BoundsCheckSlowPathARM64(HBoundsCheck* instruction) : SlowPathCodeARM64(instruction) {} - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { LocationSummary* locations = instruction_->GetLocations(); CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); @@ -279,9 +273,9 @@ class BoundsCheckSlowPathARM64 : public SlowPathCodeARM64 { CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>(); } - bool IsFatal() const OVERRIDE { return true; } + bool IsFatal() const override { return true; } - const char* GetDescription() const OVERRIDE { return "BoundsCheckSlowPathARM64"; } + const char* GetDescription() const override { return "BoundsCheckSlowPathARM64"; } private: DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathARM64); @@ -291,16 +285,16 @@ class DivZeroCheckSlowPathARM64 : public SlowPathCodeARM64 { public: explicit DivZeroCheckSlowPathARM64(HDivZeroCheck* instruction) : SlowPathCodeARM64(instruction) {} - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); __ Bind(GetEntryLabel()); arm64_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this); CheckEntrypointTypes<kQuickThrowDivZero, void, void>(); } - bool IsFatal() const OVERRIDE { return true; } + bool IsFatal() const override { return true; } - const char* GetDescription() const OVERRIDE { return "DivZeroCheckSlowPathARM64"; } + const char* GetDescription() const override { return "DivZeroCheckSlowPathARM64"; } private: DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathARM64); @@ -308,35 +302,41 @@ class DivZeroCheckSlowPathARM64 : public SlowPathCodeARM64 { class LoadClassSlowPathARM64 : public SlowPathCodeARM64 { public: - LoadClassSlowPathARM64(HLoadClass* cls, - HInstruction* at, - uint32_t dex_pc, - bool do_clinit) - : SlowPathCodeARM64(at), - cls_(cls), - dex_pc_(dex_pc), - do_clinit_(do_clinit) { + LoadClassSlowPathARM64(HLoadClass* cls, HInstruction* at) + : SlowPathCodeARM64(at), cls_(cls) { DCHECK(at->IsLoadClass() || at->IsClinitCheck()); + DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_); } - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { LocationSummary* locations = instruction_->GetLocations(); Location out = locations->Out(); - CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); + const uint32_t dex_pc = instruction_->GetDexPc(); + bool must_resolve_type = instruction_->IsLoadClass() && cls_->MustResolveTypeOnSlowPath(); + bool must_do_clinit = instruction_->IsClinitCheck() || cls_->MustGenerateClinitCheck(); + CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, locations); InvokeRuntimeCallingConvention calling_convention; - dex::TypeIndex type_index = cls_->GetTypeIndex(); - __ Mov(calling_convention.GetRegisterAt(0).W(), type_index.index_); - QuickEntrypointEnum entrypoint = do_clinit_ ? kQuickInitializeStaticStorage - : kQuickInitializeType; - arm64_codegen->InvokeRuntime(entrypoint, instruction_, dex_pc_, this); - if (do_clinit_) { - CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>(); + if (must_resolve_type) { + DCHECK(IsSameDexFile(cls_->GetDexFile(), arm64_codegen->GetGraph()->GetDexFile())); + dex::TypeIndex type_index = cls_->GetTypeIndex(); + __ Mov(calling_convention.GetRegisterAt(0).W(), type_index.index_); + arm64_codegen->InvokeRuntime(kQuickResolveType, instruction_, dex_pc, this); + CheckEntrypointTypes<kQuickResolveType, void*, uint32_t>(); + // If we also must_do_clinit, the resolved type is now in the correct register. } else { - CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>(); + DCHECK(must_do_clinit); + Location source = instruction_->IsLoadClass() ? out : locations->InAt(0); + arm64_codegen->MoveLocation(LocationFrom(calling_convention.GetRegisterAt(0)), + source, + cls_->GetType()); + } + if (must_do_clinit) { + arm64_codegen->InvokeRuntime(kQuickInitializeStaticStorage, instruction_, dex_pc, this); + CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, mirror::Class*>(); } // Move the class to the desired location. @@ -349,18 +349,12 @@ class LoadClassSlowPathARM64 : public SlowPathCodeARM64 { __ B(GetExitLabel()); } - const char* GetDescription() const OVERRIDE { return "LoadClassSlowPathARM64"; } + const char* GetDescription() const override { return "LoadClassSlowPathARM64"; } private: // The class this slow path will load. HLoadClass* const cls_; - // The dex PC of `at_`. - const uint32_t dex_pc_; - - // Whether to initialize the class. - const bool do_clinit_; - DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathARM64); }; @@ -369,7 +363,7 @@ class LoadStringSlowPathARM64 : public SlowPathCodeARM64 { explicit LoadStringSlowPathARM64(HLoadString* instruction) : SlowPathCodeARM64(instruction) {} - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { LocationSummary* locations = instruction_->GetLocations(); DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg())); CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); @@ -390,7 +384,7 @@ class LoadStringSlowPathARM64 : public SlowPathCodeARM64 { __ B(GetExitLabel()); } - const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathARM64"; } + const char* GetDescription() const override { return "LoadStringSlowPathARM64"; } private: DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathARM64); @@ -400,7 +394,7 @@ class NullCheckSlowPathARM64 : public SlowPathCodeARM64 { public: explicit NullCheckSlowPathARM64(HNullCheck* instr) : SlowPathCodeARM64(instr) {} - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); __ Bind(GetEntryLabel()); if (instruction_->CanThrowIntoCatchBlock()) { @@ -414,9 +408,9 @@ class NullCheckSlowPathARM64 : public SlowPathCodeARM64 { CheckEntrypointTypes<kQuickThrowNullPointer, void, void>(); } - bool IsFatal() const OVERRIDE { return true; } + bool IsFatal() const override { return true; } - const char* GetDescription() const OVERRIDE { return "NullCheckSlowPathARM64"; } + const char* GetDescription() const override { return "NullCheckSlowPathARM64"; } private: DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathARM64); @@ -427,7 +421,7 @@ class SuspendCheckSlowPathARM64 : public SlowPathCodeARM64 { SuspendCheckSlowPathARM64(HSuspendCheck* instruction, HBasicBlock* successor) : SlowPathCodeARM64(instruction), successor_(successor) {} - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { LocationSummary* locations = instruction_->GetLocations(); CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); __ Bind(GetEntryLabel()); @@ -451,7 +445,7 @@ class SuspendCheckSlowPathARM64 : public SlowPathCodeARM64 { return successor_; } - const char* GetDescription() const OVERRIDE { return "SuspendCheckSlowPathARM64"; } + const char* GetDescription() const override { return "SuspendCheckSlowPathARM64"; } private: // If not null, the block to branch to after the suspend check. @@ -468,7 +462,7 @@ class TypeCheckSlowPathARM64 : public SlowPathCodeARM64 { TypeCheckSlowPathARM64(HInstruction* instruction, bool is_fatal) : SlowPathCodeARM64(instruction), is_fatal_(is_fatal) {} - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { LocationSummary* locations = instruction_->GetLocations(); DCHECK(instruction_->IsCheckCast() @@ -509,8 +503,8 @@ class TypeCheckSlowPathARM64 : public SlowPathCodeARM64 { } } - const char* GetDescription() const OVERRIDE { return "TypeCheckSlowPathARM64"; } - bool IsFatal() const OVERRIDE { return is_fatal_; } + const char* GetDescription() const override { return "TypeCheckSlowPathARM64"; } + bool IsFatal() const override { return is_fatal_; } private: const bool is_fatal_; @@ -523,7 +517,7 @@ class DeoptimizationSlowPathARM64 : public SlowPathCodeARM64 { explicit DeoptimizationSlowPathARM64(HDeoptimize* instruction) : SlowPathCodeARM64(instruction) {} - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); __ Bind(GetEntryLabel()); LocationSummary* locations = instruction_->GetLocations(); @@ -535,7 +529,7 @@ class DeoptimizationSlowPathARM64 : public SlowPathCodeARM64 { CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>(); } - const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathARM64"; } + const char* GetDescription() const override { return "DeoptimizationSlowPathARM64"; } private: DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathARM64); @@ -545,7 +539,7 @@ class ArraySetSlowPathARM64 : public SlowPathCodeARM64 { public: explicit ArraySetSlowPathARM64(HInstruction* instruction) : SlowPathCodeARM64(instruction) {} - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { LocationSummary* locations = instruction_->GetLocations(); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, locations); @@ -576,7 +570,7 @@ class ArraySetSlowPathARM64 : public SlowPathCodeARM64 { __ B(GetExitLabel()); } - const char* GetDescription() const OVERRIDE { return "ArraySetSlowPathARM64"; } + const char* GetDescription() const override { return "ArraySetSlowPathARM64"; } private: DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathARM64); @@ -605,503 +599,6 @@ void JumpTableARM64::EmitTable(CodeGeneratorARM64* codegen) { } } -// Abstract base class for read barrier slow paths marking a reference -// `ref`. -// -// Argument `entrypoint` must be a register location holding the read -// barrier marking runtime entry point to be invoked or an empty -// location; in the latter case, the read barrier marking runtime -// entry point will be loaded by the slow path code itself. -class ReadBarrierMarkSlowPathBaseARM64 : public SlowPathCodeARM64 { - protected: - ReadBarrierMarkSlowPathBaseARM64(HInstruction* instruction, Location ref, Location entrypoint) - : SlowPathCodeARM64(instruction), ref_(ref), entrypoint_(entrypoint) { - DCHECK(kEmitCompilerReadBarrier); - } - - const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathBaseARM64"; } - - // Generate assembly code calling the read barrier marking runtime - // entry point (ReadBarrierMarkRegX). - void GenerateReadBarrierMarkRuntimeCall(CodeGenerator* codegen) { - // No need to save live registers; it's taken care of by the - // entrypoint. Also, there is no need to update the stack mask, - // as this runtime call will not trigger a garbage collection. - CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); - DCHECK_NE(ref_.reg(), LR); - DCHECK_NE(ref_.reg(), WSP); - DCHECK_NE(ref_.reg(), WZR); - // IP0 is used internally by the ReadBarrierMarkRegX entry point - // as a temporary, it cannot be the entry point's input/output. - DCHECK_NE(ref_.reg(), IP0); - DCHECK(0 <= ref_.reg() && ref_.reg() < kNumberOfWRegisters) << ref_.reg(); - // "Compact" slow path, saving two moves. - // - // Instead of using the standard runtime calling convention (input - // and output in W0): - // - // W0 <- ref - // W0 <- ReadBarrierMark(W0) - // ref <- W0 - // - // we just use rX (the register containing `ref`) as input and output - // of a dedicated entrypoint: - // - // rX <- ReadBarrierMarkRegX(rX) - // - if (entrypoint_.IsValid()) { - arm64_codegen->ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction_, this); - __ Blr(XRegisterFrom(entrypoint_)); - } else { - // Entrypoint is not already loaded, load from the thread. - int32_t entry_point_offset = - Thread::ReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ref_.reg()); - // This runtime call does not require a stack map. - arm64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); - } - } - - // The location (register) of the marked object reference. - const Location ref_; - - // The location of the entrypoint if it is already loaded. - const Location entrypoint_; - - private: - DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathBaseARM64); -}; - -// Slow path marking an object reference `ref` during a read -// barrier. The field `obj.field` in the object `obj` holding this -// reference does not get updated by this slow path after marking. -// -// This means that after the execution of this slow path, `ref` will -// always be up-to-date, but `obj.field` may not; i.e., after the -// flip, `ref` will be a to-space reference, but `obj.field` will -// probably still be a from-space reference (unless it gets updated by -// another thread, or if another thread installed another object -// reference (different from `ref`) in `obj.field`). -// -// Argument `entrypoint` must be a register location holding the read -// barrier marking runtime entry point to be invoked or an empty -// location; in the latter case, the read barrier marking runtime -// entry point will be loaded by the slow path code itself. -class ReadBarrierMarkSlowPathARM64 : public ReadBarrierMarkSlowPathBaseARM64 { - public: - ReadBarrierMarkSlowPathARM64(HInstruction* instruction, - Location ref, - Location entrypoint = Location::NoLocation()) - : ReadBarrierMarkSlowPathBaseARM64(instruction, ref, entrypoint) { - DCHECK(kEmitCompilerReadBarrier); - } - - const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathARM64"; } - - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { - LocationSummary* locations = instruction_->GetLocations(); - DCHECK(locations->CanCall()); - DCHECK(ref_.IsRegister()) << ref_; - DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_.reg())) << ref_.reg(); - DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString()) - << "Unexpected instruction in read barrier marking slow path: " - << instruction_->DebugName(); - - __ Bind(GetEntryLabel()); - GenerateReadBarrierMarkRuntimeCall(codegen); - __ B(GetExitLabel()); - } - - private: - DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathARM64); -}; - -// Slow path loading `obj`'s lock word, loading a reference from -// object `*(obj + offset + (index << scale_factor))` into `ref`, and -// marking `ref` if `obj` is gray according to the lock word (Baker -// read barrier). The field `obj.field` in the object `obj` holding -// this reference does not get updated by this slow path after marking -// (see LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64 -// below for that). -// -// This means that after the execution of this slow path, `ref` will -// always be up-to-date, but `obj.field` may not; i.e., after the -// flip, `ref` will be a to-space reference, but `obj.field` will -// probably still be a from-space reference (unless it gets updated by -// another thread, or if another thread installed another object -// reference (different from `ref`) in `obj.field`). -// -// Argument `entrypoint` must be a register location holding the read -// barrier marking runtime entry point to be invoked or an empty -// location; in the latter case, the read barrier marking runtime -// entry point will be loaded by the slow path code itself. -class LoadReferenceWithBakerReadBarrierSlowPathARM64 : public ReadBarrierMarkSlowPathBaseARM64 { - public: - LoadReferenceWithBakerReadBarrierSlowPathARM64(HInstruction* instruction, - Location ref, - Register obj, - uint32_t offset, - Location index, - size_t scale_factor, - bool needs_null_check, - bool use_load_acquire, - Register temp, - Location entrypoint = Location::NoLocation()) - : ReadBarrierMarkSlowPathBaseARM64(instruction, ref, entrypoint), - obj_(obj), - offset_(offset), - index_(index), - scale_factor_(scale_factor), - needs_null_check_(needs_null_check), - use_load_acquire_(use_load_acquire), - temp_(temp) { - DCHECK(kEmitCompilerReadBarrier); - DCHECK(kUseBakerReadBarrier); - } - - const char* GetDescription() const OVERRIDE { - return "LoadReferenceWithBakerReadBarrierSlowPathARM64"; - } - - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { - LocationSummary* locations = instruction_->GetLocations(); - DCHECK(locations->CanCall()); - DCHECK(ref_.IsRegister()) << ref_; - DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_.reg())) << ref_.reg(); - DCHECK(obj_.IsW()); - DCHECK_NE(ref_.reg(), LocationFrom(temp_).reg()); - DCHECK(instruction_->IsInstanceFieldGet() || - instruction_->IsStaticFieldGet() || - instruction_->IsArrayGet() || - instruction_->IsArraySet() || - instruction_->IsInstanceOf() || - instruction_->IsCheckCast() || - (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) || - (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified())) - << "Unexpected instruction in read barrier marking slow path: " - << instruction_->DebugName(); - // The read barrier instrumentation of object ArrayGet - // instructions does not support the HIntermediateAddress - // instruction. - DCHECK(!(instruction_->IsArrayGet() && - instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress())); - - // Temporary register `temp_`, used to store the lock word, must - // not be IP0 nor IP1, as we may use them to emit the reference - // load (in the call to GenerateRawReferenceLoad below), and we - // need the lock word to still be in `temp_` after the reference - // load. - DCHECK_NE(LocationFrom(temp_).reg(), IP0); - DCHECK_NE(LocationFrom(temp_).reg(), IP1); - - __ Bind(GetEntryLabel()); - - // When using MaybeGenerateReadBarrierSlow, the read barrier call is - // inserted after the original load. However, in fast path based - // Baker's read barriers, we need to perform the load of - // mirror::Object::monitor_ *before* the original reference load. - // This load-load ordering is required by the read barrier. - // The slow path (for Baker's algorithm) should look like: - // - // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); - // lfence; // Load fence or artificial data dependency to prevent load-load reordering - // HeapReference<mirror::Object> ref = *src; // Original reference load. - // bool is_gray = (rb_state == ReadBarrier::GrayState()); - // if (is_gray) { - // ref = entrypoint(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call. - // } - // - // Note: the original implementation in ReadBarrier::Barrier is - // slightly more complex as it performs additional checks that we do - // not do here for performance reasons. - - // /* int32_t */ monitor = obj->monitor_ - uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value(); - __ Ldr(temp_, HeapOperand(obj_, monitor_offset)); - if (needs_null_check_) { - codegen->MaybeRecordImplicitNullCheck(instruction_); - } - // /* LockWord */ lock_word = LockWord(monitor) - static_assert(sizeof(LockWord) == sizeof(int32_t), - "art::LockWord and int32_t have different sizes."); - - // Introduce a dependency on the lock_word including rb_state, - // to prevent load-load reordering, and without using - // a memory barrier (which would be more expensive). - // `obj` is unchanged by this operation, but its value now depends - // on `temp`. - __ Add(obj_.X(), obj_.X(), Operand(temp_.X(), LSR, 32)); - - // The actual reference load. - // A possible implicit null check has already been handled above. - CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); - arm64_codegen->GenerateRawReferenceLoad(instruction_, - ref_, - obj_, - offset_, - index_, - scale_factor_, - /* needs_null_check */ false, - use_load_acquire_); - - // Mark the object `ref` when `obj` is gray. - // - // if (rb_state == ReadBarrier::GrayState()) - // ref = ReadBarrier::Mark(ref); - // - // Given the numeric representation, it's enough to check the low bit of the rb_state. - static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0"); - static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); - __ Tbz(temp_, LockWord::kReadBarrierStateShift, GetExitLabel()); - GenerateReadBarrierMarkRuntimeCall(codegen); - - __ B(GetExitLabel()); - } - - private: - // The register containing the object holding the marked object reference field. - Register obj_; - // The offset, index and scale factor to access the reference in `obj_`. - uint32_t offset_; - Location index_; - size_t scale_factor_; - // Is a null check required? - bool needs_null_check_; - // Should this reference load use Load-Acquire semantics? - bool use_load_acquire_; - // A temporary register used to hold the lock word of `obj_`. - Register temp_; - - DISALLOW_COPY_AND_ASSIGN(LoadReferenceWithBakerReadBarrierSlowPathARM64); -}; - -// Slow path loading `obj`'s lock word, loading a reference from -// object `*(obj + offset + (index << scale_factor))` into `ref`, and -// marking `ref` if `obj` is gray according to the lock word (Baker -// read barrier). If needed, this slow path also atomically updates -// the field `obj.field` in the object `obj` holding this reference -// after marking (contrary to -// LoadReferenceWithBakerReadBarrierSlowPathARM64 above, which never -// tries to update `obj.field`). -// -// This means that after the execution of this slow path, both `ref` -// and `obj.field` will be up-to-date; i.e., after the flip, both will -// hold the same to-space reference (unless another thread installed -// another object reference (different from `ref`) in `obj.field`). -// -// Argument `entrypoint` must be a register location holding the read -// barrier marking runtime entry point to be invoked or an empty -// location; in the latter case, the read barrier marking runtime -// entry point will be loaded by the slow path code itself. -class LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64 - : public ReadBarrierMarkSlowPathBaseARM64 { - public: - LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64( - HInstruction* instruction, - Location ref, - Register obj, - uint32_t offset, - Location index, - size_t scale_factor, - bool needs_null_check, - bool use_load_acquire, - Register temp, - Location entrypoint = Location::NoLocation()) - : ReadBarrierMarkSlowPathBaseARM64(instruction, ref, entrypoint), - obj_(obj), - offset_(offset), - index_(index), - scale_factor_(scale_factor), - needs_null_check_(needs_null_check), - use_load_acquire_(use_load_acquire), - temp_(temp) { - DCHECK(kEmitCompilerReadBarrier); - DCHECK(kUseBakerReadBarrier); - } - - const char* GetDescription() const OVERRIDE { - return "LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64"; - } - - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { - LocationSummary* locations = instruction_->GetLocations(); - Register ref_reg = WRegisterFrom(ref_); - DCHECK(locations->CanCall()); - DCHECK(ref_.IsRegister()) << ref_; - DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_.reg())) << ref_.reg(); - DCHECK(obj_.IsW()); - DCHECK_NE(ref_.reg(), LocationFrom(temp_).reg()); - - // This slow path is only used by the UnsafeCASObject intrinsic at the moment. - DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified())) - << "Unexpected instruction in read barrier marking and field updating slow path: " - << instruction_->DebugName(); - DCHECK(instruction_->GetLocations()->Intrinsified()); - DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject); - DCHECK_EQ(offset_, 0u); - DCHECK_EQ(scale_factor_, 0u); - DCHECK_EQ(use_load_acquire_, false); - // The location of the offset of the marked reference field within `obj_`. - Location field_offset = index_; - DCHECK(field_offset.IsRegister()) << field_offset; - - // Temporary register `temp_`, used to store the lock word, must - // not be IP0 nor IP1, as we may use them to emit the reference - // load (in the call to GenerateRawReferenceLoad below), and we - // need the lock word to still be in `temp_` after the reference - // load. - DCHECK_NE(LocationFrom(temp_).reg(), IP0); - DCHECK_NE(LocationFrom(temp_).reg(), IP1); - - __ Bind(GetEntryLabel()); - - // The implementation is similar to LoadReferenceWithBakerReadBarrierSlowPathARM64's: - // - // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); - // lfence; // Load fence or artificial data dependency to prevent load-load reordering - // HeapReference<mirror::Object> ref = *src; // Original reference load. - // bool is_gray = (rb_state == ReadBarrier::GrayState()); - // if (is_gray) { - // old_ref = ref; - // ref = entrypoint(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call. - // compareAndSwapObject(obj, field_offset, old_ref, ref); - // } - - // /* int32_t */ monitor = obj->monitor_ - uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value(); - __ Ldr(temp_, HeapOperand(obj_, monitor_offset)); - if (needs_null_check_) { - codegen->MaybeRecordImplicitNullCheck(instruction_); - } - // /* LockWord */ lock_word = LockWord(monitor) - static_assert(sizeof(LockWord) == sizeof(int32_t), - "art::LockWord and int32_t have different sizes."); - - // Introduce a dependency on the lock_word including rb_state, - // to prevent load-load reordering, and without using - // a memory barrier (which would be more expensive). - // `obj` is unchanged by this operation, but its value now depends - // on `temp`. - __ Add(obj_.X(), obj_.X(), Operand(temp_.X(), LSR, 32)); - - // The actual reference load. - // A possible implicit null check has already been handled above. - CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); - arm64_codegen->GenerateRawReferenceLoad(instruction_, - ref_, - obj_, - offset_, - index_, - scale_factor_, - /* needs_null_check */ false, - use_load_acquire_); - - // Mark the object `ref` when `obj` is gray. - // - // if (rb_state == ReadBarrier::GrayState()) - // ref = ReadBarrier::Mark(ref); - // - // Given the numeric representation, it's enough to check the low bit of the rb_state. - static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0"); - static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); - __ Tbz(temp_, LockWord::kReadBarrierStateShift, GetExitLabel()); - - // Save the old value of the reference before marking it. - // Note that we cannot use IP to save the old reference, as IP is - // used internally by the ReadBarrierMarkRegX entry point, and we - // need the old reference after the call to that entry point. - DCHECK_NE(LocationFrom(temp_).reg(), IP0); - __ Mov(temp_.W(), ref_reg); - - GenerateReadBarrierMarkRuntimeCall(codegen); - - // If the new reference is different from the old reference, - // update the field in the holder (`*(obj_ + field_offset)`). - // - // Note that this field could also hold a different object, if - // another thread had concurrently changed it. In that case, the - // LDXR/CMP/BNE sequence of instructions in the compare-and-set - // (CAS) operation below would abort the CAS, leaving the field - // as-is. - __ Cmp(temp_.W(), ref_reg); - __ B(eq, GetExitLabel()); - - // Update the the holder's field atomically. This may fail if - // mutator updates before us, but it's OK. This is achieved - // using a strong compare-and-set (CAS) operation with relaxed - // memory synchronization ordering, where the expected value is - // the old reference and the desired value is the new reference. - - MacroAssembler* masm = arm64_codegen->GetVIXLAssembler(); - UseScratchRegisterScope temps(masm); - - // Convenience aliases. - Register base = obj_.W(); - Register offset = XRegisterFrom(field_offset); - Register expected = temp_.W(); - Register value = ref_reg; - Register tmp_ptr = temps.AcquireX(); // Pointer to actual memory. - Register tmp_value = temps.AcquireW(); // Value in memory. - - __ Add(tmp_ptr, base.X(), Operand(offset)); - - if (kPoisonHeapReferences) { - arm64_codegen->GetAssembler()->PoisonHeapReference(expected); - if (value.Is(expected)) { - // Do not poison `value`, as it is the same register as - // `expected`, which has just been poisoned. - } else { - arm64_codegen->GetAssembler()->PoisonHeapReference(value); - } - } - - // do { - // tmp_value = [tmp_ptr] - expected; - // } while (tmp_value == 0 && failure([tmp_ptr] <- r_new_value)); - - vixl::aarch64::Label loop_head, comparison_failed, exit_loop; - __ Bind(&loop_head); - __ Ldxr(tmp_value, MemOperand(tmp_ptr)); - __ Cmp(tmp_value, expected); - __ B(&comparison_failed, ne); - __ Stxr(tmp_value, value, MemOperand(tmp_ptr)); - __ Cbnz(tmp_value, &loop_head); - __ B(&exit_loop); - __ Bind(&comparison_failed); - __ Clrex(); - __ Bind(&exit_loop); - - if (kPoisonHeapReferences) { - arm64_codegen->GetAssembler()->UnpoisonHeapReference(expected); - if (value.Is(expected)) { - // Do not unpoison `value`, as it is the same register as - // `expected`, which has just been unpoisoned. - } else { - arm64_codegen->GetAssembler()->UnpoisonHeapReference(value); - } - } - - __ B(GetExitLabel()); - } - - private: - // The register containing the object holding the marked object reference field. - const Register obj_; - // The offset, index and scale factor to access the reference in `obj_`. - uint32_t offset_; - Location index_; - size_t scale_factor_; - // Is a null check required? - bool needs_null_check_; - // Should this reference load use Load-Acquire semantics? - bool use_load_acquire_; - // A temporary register used to hold the lock word of `obj_`; and - // also to hold the original reference value, when the reference is - // marked. - const Register temp_; - - DISALLOW_COPY_AND_ASSIGN(LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64); -}; - // Slow path generating a read barrier for a heap reference. class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 { public: @@ -1131,7 +628,7 @@ class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 { DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref; } - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); LocationSummary* locations = instruction_->GetLocations(); DataType::Type type = DataType::Type::kReference; @@ -1257,7 +754,7 @@ class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 { __ B(GetExitLabel()); } - const char* GetDescription() const OVERRIDE { return "ReadBarrierForHeapReferenceSlowPathARM64"; } + const char* GetDescription() const override { return "ReadBarrierForHeapReferenceSlowPathARM64"; } private: Register FindAvailableCallerSaveRegister(CodeGenerator* codegen) { @@ -1297,7 +794,7 @@ class ReadBarrierForRootSlowPathARM64 : public SlowPathCodeARM64 { DCHECK(kEmitCompilerReadBarrier); } - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { LocationSummary* locations = instruction_->GetLocations(); DataType::Type type = DataType::Type::kReference; DCHECK(locations->CanCall()); @@ -1334,7 +831,7 @@ class ReadBarrierForRootSlowPathARM64 : public SlowPathCodeARM64 { __ B(GetExitLabel()); } - const char* GetDescription() const OVERRIDE { return "ReadBarrierForRootSlowPathARM64"; } + const char* GetDescription() const override { return "ReadBarrierForRootSlowPathARM64"; } private: const Location out_; @@ -1373,7 +870,6 @@ Location InvokeDexCallingConventionVisitorARM64::GetMethodLocation() const { } CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph, - const Arm64InstructionSetFeatures& isa_features, const CompilerOptions& compiler_options, OptimizingCompilerStats* stats) : CodeGenerator(graph, @@ -1389,8 +885,8 @@ CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph, location_builder_(graph, this), instruction_visitor_(graph, this), move_resolver_(graph->GetAllocator(), this), - assembler_(graph->GetAllocator()), - isa_features_(isa_features), + assembler_(graph->GetAllocator(), + compiler_options.GetInstructionSetFeatures()->AsArm64InstructionSetFeatures()), uint32_literals_(std::less<uint32_t>(), graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), uint64_literals_(std::less<uint64_t>(), @@ -1401,11 +897,14 @@ CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph, type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + boot_image_intrinsic_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), baker_read_barrier_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), jit_string_patches_(StringReferenceValueComparator(), graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), jit_class_patches_(TypeReferenceValueComparator(), - graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) { + graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + jit_baker_read_barrier_slow_paths_(std::less<uint32_t>(), + graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) { // Save the link register (containing the return address) to mimic Quick. AddAllocatedRegister(LocationFrom(lr)); } @@ -1420,10 +919,86 @@ void CodeGeneratorARM64::EmitJumpTables() { void CodeGeneratorARM64::Finalize(CodeAllocator* allocator) { EmitJumpTables(); + + // Emit JIT baker read barrier slow paths. + DCHECK(Runtime::Current()->UseJitCompilation() || jit_baker_read_barrier_slow_paths_.empty()); + for (auto& entry : jit_baker_read_barrier_slow_paths_) { + uint32_t encoded_data = entry.first; + vixl::aarch64::Label* slow_path_entry = &entry.second.label; + __ Bind(slow_path_entry); + CompileBakerReadBarrierThunk(*GetAssembler(), encoded_data, /* debug_name= */ nullptr); + } + // Ensure we emit the literal pool. __ FinalizeCode(); CodeGenerator::Finalize(allocator); + + // Verify Baker read barrier linker patches. + if (kIsDebugBuild) { + ArrayRef<const uint8_t> code = allocator->GetMemory(); + for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) { + DCHECK(info.label.IsBound()); + uint32_t literal_offset = info.label.GetLocation(); + DCHECK_ALIGNED(literal_offset, 4u); + + auto GetInsn = [&code](uint32_t offset) { + DCHECK_ALIGNED(offset, 4u); + return + (static_cast<uint32_t>(code[offset + 0]) << 0) + + (static_cast<uint32_t>(code[offset + 1]) << 8) + + (static_cast<uint32_t>(code[offset + 2]) << 16)+ + (static_cast<uint32_t>(code[offset + 3]) << 24); + }; + + const uint32_t encoded_data = info.custom_data; + BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data); + // Check that the next instruction matches the expected LDR. + switch (kind) { + case BakerReadBarrierKind::kField: + case BakerReadBarrierKind::kAcquire: { + DCHECK_GE(code.size() - literal_offset, 8u); + uint32_t next_insn = GetInsn(literal_offset + 4u); + CheckValidReg(next_insn & 0x1fu); // Check destination register. + const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data); + if (kind == BakerReadBarrierKind::kField) { + // LDR (immediate) with correct base_reg. + CHECK_EQ(next_insn & 0xffc003e0u, 0xb9400000u | (base_reg << 5)); + } else { + DCHECK(kind == BakerReadBarrierKind::kAcquire); + // LDAR with correct base_reg. + CHECK_EQ(next_insn & 0xffffffe0u, 0x88dffc00u | (base_reg << 5)); + } + break; + } + case BakerReadBarrierKind::kArray: { + DCHECK_GE(code.size() - literal_offset, 8u); + uint32_t next_insn = GetInsn(literal_offset + 4u); + // LDR (register) with the correct base_reg, size=10 (32-bit), option=011 (extend = LSL), + // and S=1 (shift amount = 2 for 32-bit version), i.e. LDR Wt, [Xn, Xm, LSL #2]. + CheckValidReg(next_insn & 0x1fu); // Check destination register. + const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data); + CHECK_EQ(next_insn & 0xffe0ffe0u, 0xb8607800u | (base_reg << 5)); + CheckValidReg((next_insn >> 16) & 0x1f); // Check index register + break; + } + case BakerReadBarrierKind::kGcRoot: { + DCHECK_GE(literal_offset, 4u); + uint32_t prev_insn = GetInsn(literal_offset - 4u); + const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data); + // Usually LDR (immediate) with correct root_reg but + // we may have a "MOV marked, old_value" for UnsafeCASObject. + if ((prev_insn & 0xffe0ffff) != (0x2a0003e0 | root_reg)) { // MOV? + CHECK_EQ(prev_insn & 0xffc0001fu, 0xb9400000u | root_reg); // LDR? + } + break; + } + default: + LOG(FATAL) << "Unexpected kind: " << static_cast<uint32_t>(kind); + UNREACHABLE(); + } + } + } } void ParallelMoveResolverARM64::PrepareForEmitNativeCode() { @@ -1543,7 +1118,7 @@ void CodeGeneratorARM64::GenerateFrameEntry() { } } - MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); + MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__); } void CodeGeneratorARM64::GenerateFrameExit() { @@ -1600,8 +1175,24 @@ void CodeGeneratorARM64::MarkGCCard(Register object, Register value, bool value_ if (value_can_be_null) { __ Cbz(value, &done); } + // Load the address of the card table into `card`. __ Ldr(card, MemOperand(tr, Thread::CardTableOffset<kArm64PointerSize>().Int32Value())); + // Calculate the offset (in the card table) of the card corresponding to + // `object`. __ Lsr(temp, object, gc::accounting::CardTable::kCardShift); + // Write the `art::gc::accounting::CardTable::kCardDirty` value into the + // `object`'s card. + // + // Register `card` contains the address of the card table. Note that the card + // table's base is biased during its creation so that it always starts at an + // address whose least-significant byte is equal to `kCardDirty` (see + // art::gc::accounting::CardTable::Create). Therefore the STRB instruction + // below writes the `kCardDirty` (byte) value into the `object`'s card + // (located at `card + object >> kCardShift`). + // + // This dual use of the value in register `card` (1. to calculate the location + // of the card to mark; and 2. to load the `kCardDirty` value) saves a load + // (no need to explicitly load `kCardDirty` as an immediate value). __ Strb(card, MemOperand(card, temp.X())); if (value_can_be_null) { __ Bind(&done); @@ -1615,6 +1206,7 @@ void CodeGeneratorARM64::SetupBlockedRegisters() const { // mr : Runtime reserved. // ip1 : VIXL core temp. // ip0 : VIXL core temp. + // x18 : Platform register. // // Blocked fp registers: // d31 : VIXL fp temp. @@ -1623,6 +1215,7 @@ void CodeGeneratorARM64::SetupBlockedRegisters() const { while (!reserved_core_registers.IsEmpty()) { blocked_core_registers_[reserved_core_registers.PopLowestIndex().GetCode()] = true; } + blocked_core_registers_[X18] = true; CPURegList reserved_fp_registers = vixl_reserved_fp_registers; while (!reserved_fp_registers.IsEmpty()) { @@ -1672,6 +1265,10 @@ void CodeGeneratorARM64::DumpFloatingPointRegister(std::ostream& stream, int reg stream << DRegister(reg); } +const Arm64InstructionSetFeatures& CodeGeneratorARM64::GetInstructionSetFeatures() const { + return *GetCompilerOptions().GetInstructionSetFeatures()->AsArm64InstructionSetFeatures(); +} + void CodeGeneratorARM64::MoveConstant(CPURegister destination, HConstant* constant) { if (constant->IsIntConstant()) { __ Mov(Register(destination), constant->AsIntConstant()->GetValue()); @@ -2128,6 +1725,26 @@ void InstructionCodeGeneratorARM64::GenerateClassInitializationCheck(SlowPathCod __ Bind(slow_path->GetExitLabel()); } +void InstructionCodeGeneratorARM64::GenerateBitstringTypeCheckCompare( + HTypeCheckInstruction* check, vixl::aarch64::Register temp) { + uint32_t path_to_root = check->GetBitstringPathToRoot(); + uint32_t mask = check->GetBitstringMask(); + DCHECK(IsPowerOfTwo(mask + 1)); + size_t mask_bits = WhichPowerOf2(mask + 1); + + if (mask_bits == 16u) { + // Load only the bitstring part of the status word. + __ Ldrh(temp, HeapOperand(temp, mirror::Class::StatusOffset())); + } else { + // /* uint32_t */ temp = temp->status_ + __ Ldr(temp, HeapOperand(temp, mirror::Class::StatusOffset())); + // Extract the bitstring bits. + __ Ubfx(temp, temp, 0, mask_bits); + } + // Compare the bitstring bits to `path_to_root`. + __ Cmp(temp, path_to_root); +} + void CodeGeneratorARM64::GenerateMemoryBarrier(MemBarrierKind kind) { BarrierType type = BarrierAll; @@ -2224,18 +1841,12 @@ void LocationsBuilderARM64::HandleFieldGet(HInstruction* instruction, : LocationSummary::kNoCall); if (object_field_get_with_read_barrier && kUseBakerReadBarrier) { locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. - // We need a temporary register for the read barrier marking slow - // path in CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier. - if (kBakerReadBarrierLinkTimeThunksEnableForFields && - !Runtime::Current()->UseJitCompilation() && - !field_info.IsVolatile()) { - // If link-time thunks for the Baker read barrier are enabled, for AOT - // non-volatile loads we need a temporary only if the offset is too big. - if (field_info.GetFieldOffset().Uint32Value() >= kReferenceLoadMinFarOffset) { - locations->AddTemp(FixedTempLocation()); - } - } else { - locations->AddTemp(Location::RequiresRegister()); + // We need a temporary register for the read barrier load in + // CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier() + // only if the field is volatile or the offset is too big. + if (field_info.IsVolatile() || + field_info.GetFieldOffset().Uint32Value() >= kReferenceLoadMinFarOffset) { + locations->AddTemp(FixedTempLocation()); } } locations->SetInAt(0, Location::RequiresRegister()); @@ -2277,7 +1888,7 @@ void InstructionCodeGeneratorARM64::HandleFieldGet(HInstruction* instruction, base, offset, maybe_temp, - /* needs_null_check */ true, + /* needs_null_check= */ true, field_info.IsVolatile()); } else { // General case. @@ -2286,7 +1897,7 @@ void InstructionCodeGeneratorARM64::HandleFieldGet(HInstruction* instruction, // CodeGeneratorARM64::LoadAcquire call. // NB: LoadAcquire will record the pc info if needed. codegen_->LoadAcquire( - instruction, OutputCPURegister(instruction), field, /* needs_null_check */ true); + instruction, OutputCPURegister(instruction), field, /* needs_null_check= */ true); } else { // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted. EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); @@ -2341,7 +1952,7 @@ void InstructionCodeGeneratorARM64::HandleFieldSet(HInstruction* instruction, if (field_info.IsVolatile()) { codegen_->StoreRelease( - instruction, field_type, source, HeapOperand(obj, offset), /* needs_null_check */ true); + instruction, field_type, source, HeapOperand(obj, offset), /* needs_null_check= */ true); } else { // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted. EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); @@ -2383,6 +1994,9 @@ void InstructionCodeGeneratorARM64::HandleBinaryOp(HBinaryOperation* instr) { // all & reg_bits - 1. __ Ror(dst, lhs, RegisterFrom(instr->GetLocations()->InAt(1), type)); } + } else if (instr->IsMin() || instr->IsMax()) { + __ Cmp(lhs, rhs); + __ Csel(dst, lhs, rhs, instr->IsMin() ? lt : gt); } else { DCHECK(instr->IsXor()); __ Eor(dst, lhs, rhs); @@ -2398,6 +2012,10 @@ void InstructionCodeGeneratorARM64::HandleBinaryOp(HBinaryOperation* instr) { __ Fadd(dst, lhs, rhs); } else if (instr->IsSub()) { __ Fsub(dst, lhs, rhs); + } else if (instr->IsMin()) { + __ Fmin(dst, lhs, rhs); + } else if (instr->IsMax()) { + __ Fmax(dst, lhs, rhs); } else { LOG(FATAL) << "Unexpected floating-point binary operation"; } @@ -2618,7 +2236,7 @@ void LocationsBuilderARM64::VisitIntermediateAddressIndex(HIntermediateAddressIn void InstructionCodeGeneratorARM64::VisitIntermediateAddressIndex( HIntermediateAddressIndex* instruction) { Register index_reg = InputRegisterAt(instruction, 0); - uint32_t shift = Int64ConstantFrom(instruction->GetLocations()->InAt(2)); + uint32_t shift = Int64FromLocation(instruction->GetLocations()->InAt(2)); uint32_t offset = instruction->GetOffset()->AsIntConstant()->GetValue(); if (shift == 0) { @@ -2691,21 +2309,21 @@ void LocationsBuilderARM64::VisitArrayGet(HArrayGet* instruction) { : LocationSummary::kNoCall); if (object_array_get_with_read_barrier && kUseBakerReadBarrier) { locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. - // We need a temporary register for the read barrier marking slow - // path in CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier. - if (kBakerReadBarrierLinkTimeThunksEnableForFields && - !Runtime::Current()->UseJitCompilation() && - instruction->GetIndex()->IsConstant()) { + if (instruction->GetIndex()->IsConstant()) { // Array loads with constant index are treated as field loads. - // If link-time thunks for the Baker read barrier are enabled, for AOT - // constant index loads we need a temporary only if the offset is too big. + // We need a temporary register for the read barrier load in + // CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier() + // only if the offset is too big. uint32_t offset = CodeGenerator::GetArrayDataOffset(instruction); uint32_t index = instruction->GetIndex()->AsIntConstant()->GetValue(); offset += index << DataType::SizeShift(DataType::Type::kReference); if (offset >= kReferenceLoadMinFarOffset) { locations->AddTemp(FixedTempLocation()); } - } else { + } else if (!instruction->GetArray()->IsIntermediateAddress()) { + // We need a non-scratch temporary for the array data pointer in + // CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier() for the case with no + // intermediate address. locations->AddTemp(Location::RequiresRegister()); } } @@ -2735,11 +2353,12 @@ void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) { MacroAssembler* masm = GetVIXLAssembler(); UseScratchRegisterScope temps(masm); - // The read barrier instrumentation of object ArrayGet instructions + // The non-Baker read barrier instrumentation of object ArrayGet instructions // does not support the HIntermediateAddress instruction. DCHECK(!((type == DataType::Type::kReference) && instruction->GetArray()->IsIntermediateAddress() && - kEmitCompilerReadBarrier)); + kEmitCompilerReadBarrier && + !kUseBakerReadBarrier)); if (type == DataType::Type::kReference && kEmitCompilerReadBarrier && kUseBakerReadBarrier) { // Object ArrayGet with Baker's read barrier case. @@ -2747,8 +2366,9 @@ void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) { // CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier call. DCHECK(!instruction->CanDoImplicitNullCheckOn(instruction->InputAt(0))); if (index.IsConstant()) { + DCHECK(!instruction->GetArray()->IsIntermediateAddress()); // Array load with a constant index can be treated as a field load. - offset += Int64ConstantFrom(index) << DataType::SizeShift(type); + offset += Int64FromLocation(index) << DataType::SizeShift(type); Location maybe_temp = (locations->GetTempCount() != 0) ? locations->GetTemp(0) : Location::NoLocation(); codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction, @@ -2756,12 +2376,11 @@ void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) { obj.W(), offset, maybe_temp, - /* needs_null_check */ false, - /* use_load_acquire */ false); + /* needs_null_check= */ false, + /* use_load_acquire= */ false); } else { - Register temp = WRegisterFrom(locations->GetTemp(0)); codegen_->GenerateArrayLoadWithBakerReadBarrier( - instruction, out, obj.W(), offset, index, temp, /* needs_null_check */ false); + instruction, out, obj.W(), offset, index, /* needs_null_check= */ false); } } else { // General case. @@ -2793,14 +2412,14 @@ void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) { "Expecting 0=compressed, 1=uncompressed"); __ Tbnz(length.W(), 0, &uncompressed_load); __ Ldrb(Register(OutputCPURegister(instruction)), - HeapOperand(obj, offset + Int64ConstantFrom(index))); + HeapOperand(obj, offset + Int64FromLocation(index))); __ B(&done); __ Bind(&uncompressed_load); __ Ldrh(Register(OutputCPURegister(instruction)), - HeapOperand(obj, offset + (Int64ConstantFrom(index) << 1))); + HeapOperand(obj, offset + (Int64FromLocation(index) << 1))); __ Bind(&done); } else { - offset += Int64ConstantFrom(index) << DataType::SizeShift(type); + offset += Int64FromLocation(index) << DataType::SizeShift(type); source = HeapOperand(obj, offset); } } else { @@ -2810,8 +2429,8 @@ void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) { // input instruction has done it already. See the comment in // `TryExtractArrayAccessAddress()`. if (kIsDebugBuild) { - HIntermediateAddress* tmp = instruction->GetArray()->AsIntermediateAddress(); - DCHECK_EQ(tmp->GetOffset()->AsIntConstant()->GetValueAsUint64(), offset); + HIntermediateAddress* interm_addr = instruction->GetArray()->AsIntermediateAddress(); + DCHECK_EQ(interm_addr->GetOffset()->AsIntConstant()->GetValueAsUint64(), offset); } temp = obj; } else { @@ -2913,7 +2532,7 @@ void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) { if (!needs_write_barrier) { DCHECK(!may_need_runtime_call_for_type_check); if (index.IsConstant()) { - offset += Int64ConstantFrom(index) << DataType::SizeShift(value_type); + offset += Int64FromLocation(index) << DataType::SizeShift(value_type); destination = HeapOperand(array, offset); } else { UseScratchRegisterScope temps(masm); @@ -2923,8 +2542,8 @@ void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) { // input instruction has done it already. See the comment in // `TryExtractArrayAccessAddress()`. if (kIsDebugBuild) { - HIntermediateAddress* tmp = instruction->GetArray()->AsIntermediateAddress(); - DCHECK(tmp->GetOffset()->AsIntConstant()->GetValueAsUint64() == offset); + HIntermediateAddress* interm_addr = instruction->GetArray()->AsIntermediateAddress(); + DCHECK(interm_addr->GetOffset()->AsIntConstant()->GetValueAsUint64() == offset); } temp = array; } else { @@ -2951,7 +2570,7 @@ void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) { UseScratchRegisterScope temps(masm); Register temp = temps.AcquireSameSizeAs(array); if (index.IsConstant()) { - offset += Int64ConstantFrom(index) << DataType::SizeShift(value_type); + offset += Int64FromLocation(index) << DataType::SizeShift(value_type); destination = HeapOperand(array, offset); } else { destination = HeapOperand(temp, @@ -3093,12 +2712,14 @@ void LocationsBuilderARM64::VisitClinitCheck(HClinitCheck* check) { if (check->HasUses()) { locations->SetOut(Location::SameAsFirstInput()); } + // Rely on the type initialization to save everything we need. + locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves()); } void InstructionCodeGeneratorARM64::VisitClinitCheck(HClinitCheck* check) { // We assume the class is not null. - SlowPathCodeARM64* slow_path = new (codegen_->GetScopedAllocator()) LoadClassSlowPathARM64( - check->GetLoadClass(), check, check->GetDexPc(), true); + SlowPathCodeARM64* slow_path = + new (codegen_->GetScopedAllocator()) LoadClassSlowPathARM64(check->GetLoadClass(), check); codegen_->AddSlowPath(slow_path); GenerateClassInitializationCheck(slow_path, InputRegisterAt(check, 0)); } @@ -3260,61 +2881,30 @@ FOR_EACH_CONDITION_INSTRUCTION(DEFINE_CONDITION_VISITORS) #undef DEFINE_CONDITION_VISITORS #undef FOR_EACH_CONDITION_INSTRUCTION -void InstructionCodeGeneratorARM64::DivRemOneOrMinusOne(HBinaryOperation* instruction) { - DCHECK(instruction->IsDiv() || instruction->IsRem()); - - LocationSummary* locations = instruction->GetLocations(); - Location second = locations->InAt(1); - DCHECK(second.IsConstant()); +void InstructionCodeGeneratorARM64::GenerateIntDivForPower2Denom(HDiv* instruction) { + int64_t imm = Int64FromLocation(instruction->GetLocations()->InAt(1)); + uint64_t abs_imm = static_cast<uint64_t>(AbsOrMin(imm)); + DCHECK(IsPowerOfTwo(abs_imm)) << abs_imm; Register out = OutputRegister(instruction); Register dividend = InputRegisterAt(instruction, 0); - int64_t imm = Int64FromConstant(second.GetConstant()); - DCHECK(imm == 1 || imm == -1); - if (instruction->IsRem()) { - __ Mov(out, 0); + if (abs_imm == 2) { + int bits = DataType::Size(instruction->GetResultType()) * kBitsPerByte; + __ Add(out, dividend, Operand(dividend, LSR, bits - 1)); } else { - if (imm == 1) { - __ Mov(out, dividend); - } else { - __ Neg(out, dividend); - } - } -} - -void InstructionCodeGeneratorARM64::DivRemByPowerOfTwo(HBinaryOperation* instruction) { - DCHECK(instruction->IsDiv() || instruction->IsRem()); - - LocationSummary* locations = instruction->GetLocations(); - Location second = locations->InAt(1); - DCHECK(second.IsConstant()); - - Register out = OutputRegister(instruction); - Register dividend = InputRegisterAt(instruction, 0); - int64_t imm = Int64FromConstant(second.GetConstant()); - uint64_t abs_imm = static_cast<uint64_t>(AbsOrMin(imm)); - int ctz_imm = CTZ(abs_imm); - - UseScratchRegisterScope temps(GetVIXLAssembler()); - Register temp = temps.AcquireSameSizeAs(out); - - if (instruction->IsDiv()) { + UseScratchRegisterScope temps(GetVIXLAssembler()); + Register temp = temps.AcquireSameSizeAs(out); __ Add(temp, dividend, abs_imm - 1); __ Cmp(dividend, 0); __ Csel(out, temp, dividend, lt); - if (imm > 0) { - __ Asr(out, out, ctz_imm); - } else { - __ Neg(out, Operand(out, ASR, ctz_imm)); - } + } + + int ctz_imm = CTZ(abs_imm); + if (imm > 0) { + __ Asr(out, out, ctz_imm); } else { - int bits = instruction->GetResultType() == DataType::Type::kInt32 ? 32 : 64; - __ Asr(temp, dividend, bits - 1); - __ Lsr(temp, temp, bits - ctz_imm); - __ Add(out, dividend, temp); - __ And(out, out, abs_imm - 1); - __ Sub(out, out, temp); + __ Neg(out, Operand(out, ASR, ctz_imm)); } } @@ -3335,7 +2925,7 @@ void InstructionCodeGeneratorARM64::GenerateDivRemWithAnyConstant(HBinaryOperati int64_t magic; int shift; CalculateMagicAndShiftForDivRem( - imm, type == DataType::Type::kInt64 /* is_long */, &magic, &shift); + imm, /* is_long= */ type == DataType::Type::kInt64, &magic, &shift); UseScratchRegisterScope temps(GetVIXLAssembler()); Register temp = temps.AcquireSameSizeAs(out); @@ -3370,39 +2960,34 @@ void InstructionCodeGeneratorARM64::GenerateDivRemWithAnyConstant(HBinaryOperati } } -void InstructionCodeGeneratorARM64::GenerateDivRemIntegral(HBinaryOperation* instruction) { - DCHECK(instruction->IsDiv() || instruction->IsRem()); - DataType::Type type = instruction->GetResultType(); - DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64); +void InstructionCodeGeneratorARM64::GenerateIntDivForConstDenom(HDiv *instruction) { + int64_t imm = Int64FromLocation(instruction->GetLocations()->InAt(1)); - LocationSummary* locations = instruction->GetLocations(); - Register out = OutputRegister(instruction); - Location second = locations->InAt(1); + if (imm == 0) { + // Do not generate anything. DivZeroCheck would prevent any code to be executed. + return; + } - if (second.IsConstant()) { - int64_t imm = Int64FromConstant(second.GetConstant()); + if (IsPowerOfTwo(AbsOrMin(imm))) { + GenerateIntDivForPower2Denom(instruction); + } else { + // Cases imm == -1 or imm == 1 are handled by InstructionSimplifier. + DCHECK(imm < -2 || imm > 2) << imm; + GenerateDivRemWithAnyConstant(instruction); + } +} - if (imm == 0) { - // Do not generate anything. DivZeroCheck would prevent any code to be executed. - } else if (imm == 1 || imm == -1) { - DivRemOneOrMinusOne(instruction); - } else if (IsPowerOfTwo(AbsOrMin(imm))) { - DivRemByPowerOfTwo(instruction); - } else { - DCHECK(imm <= -2 || imm >= 2); - GenerateDivRemWithAnyConstant(instruction); - } +void InstructionCodeGeneratorARM64::GenerateIntDiv(HDiv *instruction) { + DCHECK(DataType::IsIntOrLongType(instruction->GetResultType())) + << instruction->GetResultType(); + + if (instruction->GetLocations()->InAt(1).IsConstant()) { + GenerateIntDivForConstDenom(instruction); } else { + Register out = OutputRegister(instruction); Register dividend = InputRegisterAt(instruction, 0); Register divisor = InputRegisterAt(instruction, 1); - if (instruction->IsDiv()) { - __ Sdiv(out, dividend, divisor); - } else { - UseScratchRegisterScope temps(GetVIXLAssembler()); - Register temp = temps.AcquireSameSizeAs(out); - __ Sdiv(temp, dividend, divisor); - __ Msub(out, temp, divisor, dividend); - } + __ Sdiv(out, dividend, divisor); } } @@ -3434,7 +3019,7 @@ void InstructionCodeGeneratorARM64::VisitDiv(HDiv* div) { switch (type) { case DataType::Type::kInt32: case DataType::Type::kInt64: - GenerateDivRemIntegral(div); + GenerateIntDiv(div); break; case DataType::Type::kFloat32: @@ -3462,11 +3047,11 @@ void InstructionCodeGeneratorARM64::VisitDivZeroCheck(HDivZeroCheck* instruction if (!DataType::IsIntegralType(type)) { LOG(FATAL) << "Unexpected type " << type << " for DivZeroCheck."; - return; + UNREACHABLE(); } if (value.IsConstant()) { - int64_t divisor = Int64ConstantFrom(value); + int64_t divisor = Int64FromLocation(value); if (divisor == 0) { __ B(slow_path->GetEntryLabel()); } else { @@ -3531,7 +3116,7 @@ void InstructionCodeGeneratorARM64::HandleGoto(HInstruction* got, HBasicBlock* s } if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) { GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr); - codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__); } if (!codegen_->GoesToNextBlock(block, successor)) { __ B(codegen_->GetLabelOf(successor)); @@ -3681,7 +3266,7 @@ void InstructionCodeGeneratorARM64::VisitIf(HIf* if_instr) { if (codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor)) { false_target = nullptr; } - GenerateTestAndBranch(if_instr, /* condition_input_index */ 0, true_target, false_target); + GenerateTestAndBranch(if_instr, /* condition_input_index= */ 0, true_target, false_target); } void LocationsBuilderARM64::VisitDeoptimize(HDeoptimize* deoptimize) { @@ -3700,9 +3285,9 @@ void InstructionCodeGeneratorARM64::VisitDeoptimize(HDeoptimize* deoptimize) { SlowPathCodeARM64* slow_path = deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathARM64>(deoptimize); GenerateTestAndBranch(deoptimize, - /* condition_input_index */ 0, + /* condition_input_index= */ 0, slow_path->GetEntryLabel(), - /* false_target */ nullptr); + /* false_target= */ nullptr); } void LocationsBuilderARM64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) { @@ -3865,6 +3450,8 @@ void LocationsBuilderARM64::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kInterfaceCheck: call_kind = LocationSummary::kCallOnSlowPath; break; + case TypeCheckKind::kBitstringCheck: + break; } LocationSummary* locations = @@ -3873,7 +3460,13 @@ void LocationsBuilderARM64::VisitInstanceOf(HInstanceOf* instruction) { locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. } locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); + if (type_check_kind == TypeCheckKind::kBitstringCheck) { + locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); + locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); + locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); + } else { + locations->SetInAt(1, Location::RequiresRegister()); + } // The "out" register is used as a temporary, so it overlaps with the inputs. // Note that TypeCheckSlowPathARM64 uses this register too. locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); @@ -3886,7 +3479,9 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) { LocationSummary* locations = instruction->GetLocations(); Location obj_loc = locations->InAt(0); Register obj = InputRegisterAt(instruction, 0); - Register cls = InputRegisterAt(instruction, 1); + Register cls = (type_check_kind == TypeCheckKind::kBitstringCheck) + ? Register() + : InputRegisterAt(instruction, 1); Location out_loc = locations->Out(); Register out = OutputRegister(instruction); const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind); @@ -4032,7 +3627,7 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) { __ Cmp(out, cls); DCHECK(locations->OnlyCallsOnSlowPath()); slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathARM64( - instruction, /* is_fatal */ false); + instruction, /* is_fatal= */ false); codegen_->AddSlowPath(slow_path); __ B(ne, slow_path->GetEntryLabel()); __ Mov(out, 1); @@ -4064,7 +3659,7 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) { // This should also be beneficial for the other cases above. DCHECK(locations->OnlyCallsOnSlowPath()); slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathARM64( - instruction, /* is_fatal */ false); + instruction, /* is_fatal= */ false); codegen_->AddSlowPath(slow_path); __ B(slow_path->GetEntryLabel()); if (zero.IsLinked()) { @@ -4072,6 +3667,23 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) { } break; } + + case TypeCheckKind::kBitstringCheck: { + // /* HeapReference<Class> */ temp = obj->klass_ + GenerateReferenceLoadTwoRegisters(instruction, + out_loc, + obj_loc, + class_offset, + maybe_temp_loc, + kWithoutReadBarrier); + + GenerateBitstringTypeCheckCompare(instruction, out); + __ Cset(out, eq); + if (zero.IsLinked()) { + __ B(&done); + } + break; + } } if (zero.IsLinked()) { @@ -4094,7 +3706,13 @@ void LocationsBuilderARM64::VisitCheckCast(HCheckCast* instruction) { LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind); locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); + if (type_check_kind == TypeCheckKind::kBitstringCheck) { + locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); + locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); + locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); + } else { + locations->SetInAt(1, Location::RequiresRegister()); + } // Add temps for read barriers and other uses. One is used by TypeCheckSlowPathARM64. locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind)); } @@ -4104,7 +3722,9 @@ void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) { LocationSummary* locations = instruction->GetLocations(); Location obj_loc = locations->InAt(0); Register obj = InputRegisterAt(instruction, 0); - Register cls = InputRegisterAt(instruction, 1); + Register cls = (type_check_kind == TypeCheckKind::kBitstringCheck) + ? Register() + : InputRegisterAt(instruction, 1); const size_t num_temps = NumberOfCheckCastTemps(type_check_kind); DCHECK_GE(num_temps, 1u); DCHECK_LE(num_temps, 3u); @@ -4285,6 +3905,20 @@ void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) { __ B(ne, &start_loop); break; } + + case TypeCheckKind::kBitstringCheck: { + // /* HeapReference<Class> */ temp = obj->klass_ + GenerateReferenceLoadTwoRegisters(instruction, + temp_loc, + obj_loc, + class_offset, + maybe_temp2_loc, + kWithoutReadBarrier); + + GenerateBitstringTypeCheckCompare(instruction, temp); + __ B(ne, type_check_slow_path->GetEntryLabel()); + break; + } } __ Bind(&done); @@ -4318,7 +3952,7 @@ void LocationsBuilderARM64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) { void InstructionCodeGeneratorARM64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) { codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke); - codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__); } void LocationsBuilderARM64::HandleInvoke(HInvoke* invoke) { @@ -4388,7 +4022,7 @@ void InstructionCodeGeneratorARM64::VisitInvokeInterface(HInvokeInterface* invok codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); } - codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__); } void LocationsBuilderARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) { @@ -4424,7 +4058,7 @@ static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorARM64* codege HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARM64::GetSupportedInvokeStaticOrDirectDispatch( const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, - HInvokeStaticOrDirect* invoke ATTRIBUTE_UNUSED) { + ArtMethod* method ATTRIBUTE_UNUSED) { // On ARM64 we support all dispatch types. return desired_dispatch_info; } @@ -4455,21 +4089,32 @@ void CodeGeneratorARM64::GenerateStaticOrDirectCall( EmitAddPlaceholder(add_label, XRegisterFrom(temp), XRegisterFrom(temp)); break; } - case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: - // Load method address from literal pool. - __ Ldr(XRegisterFrom(temp), DeduplicateUint64Literal(invoke->GetMethodAddress())); + case HInvokeStaticOrDirect::MethodLoadKind::kBootImageRelRo: { + // Add ADRP with its PC-relative .data.bimg.rel.ro patch. + uint32_t boot_image_offset = GetBootImageOffset(invoke); + vixl::aarch64::Label* adrp_label = NewBootImageRelRoPatch(boot_image_offset); + EmitAdrpPlaceholder(adrp_label, XRegisterFrom(temp)); + // Add LDR with its PC-relative .data.bimg.rel.ro patch. + vixl::aarch64::Label* ldr_label = NewBootImageRelRoPatch(boot_image_offset, adrp_label); + // Note: Boot image is in the low 4GiB and the entry is 32-bit, so emit a 32-bit load. + EmitLdrOffsetPlaceholder(ldr_label, WRegisterFrom(temp), XRegisterFrom(temp)); break; + } case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: { - // Add ADRP with its PC-relative DexCache access patch. + // Add ADRP with its PC-relative .bss entry patch. MethodReference target_method(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex()); vixl::aarch64::Label* adrp_label = NewMethodBssEntryPatch(target_method); EmitAdrpPlaceholder(adrp_label, XRegisterFrom(temp)); - // Add LDR with its PC-relative DexCache access patch. + // Add LDR with its PC-relative .bss entry patch. vixl::aarch64::Label* ldr_label = NewMethodBssEntryPatch(target_method, adrp_label); EmitLdrOffsetPlaceholder(ldr_label, XRegisterFrom(temp), XRegisterFrom(temp)); break; } + case HInvokeStaticOrDirect::MethodLoadKind::kJitDirectAddress: + // Load method address from literal pool. + __ Ldr(XRegisterFrom(temp), DeduplicateUint64Literal(invoke->GetMethodAddress())); + break; case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall: { GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path); return; // No code pointer retrieval; the runtime performs the call directly. @@ -4556,7 +4201,30 @@ void LocationsBuilderARM64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) { void InstructionCodeGeneratorARM64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) { codegen_->GenerateInvokePolymorphicCall(invoke); - codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__); +} + +void LocationsBuilderARM64::VisitInvokeCustom(HInvokeCustom* invoke) { + HandleInvoke(invoke); +} + +void InstructionCodeGeneratorARM64::VisitInvokeCustom(HInvokeCustom* invoke) { + codegen_->GenerateInvokeCustomCall(invoke); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__); +} + +vixl::aarch64::Label* CodeGeneratorARM64::NewBootImageIntrinsicPatch( + uint32_t intrinsic_data, + vixl::aarch64::Label* adrp_label) { + return NewPcRelativePatch( + /* dex_file= */ nullptr, intrinsic_data, adrp_label, &boot_image_intrinsic_patches_); +} + +vixl::aarch64::Label* CodeGeneratorARM64::NewBootImageRelRoPatch( + uint32_t boot_image_offset, + vixl::aarch64::Label* adrp_label) { + return NewPcRelativePatch( + /* dex_file= */ nullptr, boot_image_offset, adrp_label, &boot_image_method_patches_); } vixl::aarch64::Label* CodeGeneratorARM64::NewBootImageMethodPatch( @@ -4602,9 +4270,18 @@ vixl::aarch64::Label* CodeGeneratorARM64::NewStringBssEntryPatch( return NewPcRelativePatch(&dex_file, string_index.index_, adrp_label, &string_bss_entry_patches_); } -vixl::aarch64::Label* CodeGeneratorARM64::NewBakerReadBarrierPatch(uint32_t custom_data) { - baker_read_barrier_patches_.emplace_back(custom_data); - return &baker_read_barrier_patches_.back().label; +void CodeGeneratorARM64::EmitBakerReadBarrierCbnz(uint32_t custom_data) { + DCHECK(!__ AllowMacroInstructions()); // In ExactAssemblyScope. + if (Runtime::Current()->UseJitCompilation()) { + auto it = jit_baker_read_barrier_slow_paths_.FindOrAdd(custom_data); + vixl::aarch64::Label* slow_path_entry = &it->second.label; + __ cbnz(mr, slow_path_entry); + } else { + baker_read_barrier_patches_.emplace_back(custom_data); + vixl::aarch64::Label* cbnz_label = &baker_read_barrier_patches_.back().label; + __ bind(cbnz_label); + __ cbnz(mr, static_cast<int64_t>(0)); // Placeholder, patched at link-time. + } } vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativePatch( @@ -4631,7 +4308,7 @@ vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateJitStringLitera ReserveJitStringRoot(StringReference(&dex_file, string_index), handle); return jit_string_patches_.GetOrCreate( StringReference(&dex_file, string_index), - [this]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(/* placeholder */ 0u); }); + [this]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(/* value= */ 0u); }); } vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateJitClassLiteral( @@ -4639,7 +4316,7 @@ vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateJitClassLiteral ReserveJitClassRoot(TypeReference(&dex_file, type_index), handle); return jit_class_patches_.GetOrCreate( TypeReference(&dex_file, type_index), - [this]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(/* placeholder */ 0u); }); + [this]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(/* value= */ 0u); }); } void CodeGeneratorARM64::EmitAdrpPlaceholder(vixl::aarch64::Label* fixup_label, @@ -4669,6 +4346,55 @@ void CodeGeneratorARM64::EmitLdrOffsetPlaceholder(vixl::aarch64::Label* fixup_la __ ldr(out, MemOperand(base, /* offset placeholder */ 0)); } +void CodeGeneratorARM64::LoadBootImageAddress(vixl::aarch64::Register reg, + uint32_t boot_image_reference) { + if (GetCompilerOptions().IsBootImage()) { + // Add ADRP with its PC-relative type patch. + vixl::aarch64::Label* adrp_label = NewBootImageIntrinsicPatch(boot_image_reference); + EmitAdrpPlaceholder(adrp_label, reg.X()); + // Add ADD with its PC-relative type patch. + vixl::aarch64::Label* add_label = NewBootImageIntrinsicPatch(boot_image_reference, adrp_label); + EmitAddPlaceholder(add_label, reg.X(), reg.X()); + } else if (GetCompilerOptions().GetCompilePic()) { + // Add ADRP with its PC-relative .data.bimg.rel.ro patch. + vixl::aarch64::Label* adrp_label = NewBootImageRelRoPatch(boot_image_reference); + EmitAdrpPlaceholder(adrp_label, reg.X()); + // Add LDR with its PC-relative .data.bimg.rel.ro patch. + vixl::aarch64::Label* ldr_label = NewBootImageRelRoPatch(boot_image_reference, adrp_label); + EmitLdrOffsetPlaceholder(ldr_label, reg.W(), reg.X()); + } else { + DCHECK(Runtime::Current()->UseJitCompilation()); + gc::Heap* heap = Runtime::Current()->GetHeap(); + DCHECK(!heap->GetBootImageSpaces().empty()); + const uint8_t* address = heap->GetBootImageSpaces()[0]->Begin() + boot_image_reference; + __ Ldr(reg.W(), DeduplicateBootImageAddressLiteral(reinterpret_cast<uintptr_t>(address))); + } +} + +void CodeGeneratorARM64::AllocateInstanceForIntrinsic(HInvokeStaticOrDirect* invoke, + uint32_t boot_image_offset) { + DCHECK(invoke->IsStatic()); + InvokeRuntimeCallingConvention calling_convention; + Register argument = calling_convention.GetRegisterAt(0); + if (GetCompilerOptions().IsBootImage()) { + DCHECK_EQ(boot_image_offset, IntrinsicVisitor::IntegerValueOfInfo::kInvalidReference); + // Load the class the same way as for HLoadClass::LoadKind::kBootImageLinkTimePcRelative. + MethodReference target_method = invoke->GetTargetMethod(); + dex::TypeIndex type_idx = target_method.dex_file->GetMethodId(target_method.index).class_idx_; + // Add ADRP with its PC-relative type patch. + vixl::aarch64::Label* adrp_label = NewBootImageTypePatch(*target_method.dex_file, type_idx); + EmitAdrpPlaceholder(adrp_label, argument.X()); + // Add ADD with its PC-relative type patch. + vixl::aarch64::Label* add_label = + NewBootImageTypePatch(*target_method.dex_file, type_idx, adrp_label); + EmitAddPlaceholder(add_label, argument.X(), argument.X()); + } else { + LoadBootImageAddress(argument, boot_image_offset); + } + InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc()); + CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); +} + template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)> inline void CodeGeneratorARM64::EmitPcRelativeLinkerPatches( const ArenaDeque<PcRelativePatchInfo>& infos, @@ -4681,6 +4407,15 @@ inline void CodeGeneratorARM64::EmitPcRelativeLinkerPatches( } } +template <linker::LinkerPatch (*Factory)(size_t, uint32_t, uint32_t)> +linker::LinkerPatch NoDexFileAdapter(size_t literal_offset, + const DexFile* target_dex_file, + uint32_t pc_insn_offset, + uint32_t boot_image_offset) { + DCHECK(target_dex_file == nullptr); // Unused for these patches, should be null. + return Factory(literal_offset, pc_insn_offset, boot_image_offset); +} + void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) { DCHECK(linker_patches->empty()); size_t size = @@ -4690,6 +4425,7 @@ void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* lin type_bss_entry_patches_.size() + boot_image_string_patches_.size() + string_bss_entry_patches_.size() + + boot_image_intrinsic_patches_.size() + baker_read_barrier_patches_.size(); linker_patches->reserve(size); if (GetCompilerOptions().IsBootImage()) { @@ -4699,12 +4435,14 @@ void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* lin boot_image_type_patches_, linker_patches); EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>( boot_image_string_patches_, linker_patches); + EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::IntrinsicReferencePatch>>( + boot_image_intrinsic_patches_, linker_patches); } else { - DCHECK(boot_image_method_patches_.empty()); - EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeClassTablePatch>( - boot_image_type_patches_, linker_patches); - EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringInternTablePatch>( - boot_image_string_patches_, linker_patches); + EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::DataBimgRelRoPatch>>( + boot_image_method_patches_, linker_patches); + DCHECK(boot_image_type_patches_.empty()); + DCHECK(boot_image_string_patches_.empty()); + DCHECK(boot_image_intrinsic_patches_.empty()); } EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>( method_bss_entry_patches_, linker_patches); @@ -4719,6 +4457,44 @@ void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* lin DCHECK_EQ(size, linker_patches->size()); } +bool CodeGeneratorARM64::NeedsThunkCode(const linker::LinkerPatch& patch) const { + return patch.GetType() == linker::LinkerPatch::Type::kBakerReadBarrierBranch || + patch.GetType() == linker::LinkerPatch::Type::kCallRelative; +} + +void CodeGeneratorARM64::EmitThunkCode(const linker::LinkerPatch& patch, + /*out*/ ArenaVector<uint8_t>* code, + /*out*/ std::string* debug_name) { + Arm64Assembler assembler(GetGraph()->GetAllocator()); + switch (patch.GetType()) { + case linker::LinkerPatch::Type::kCallRelative: { + // The thunk just uses the entry point in the ArtMethod. This works even for calls + // to the generic JNI and interpreter trampolines. + Offset offset(ArtMethod::EntryPointFromQuickCompiledCodeOffset( + kArm64PointerSize).Int32Value()); + assembler.JumpTo(ManagedRegister(arm64::X0), offset, ManagedRegister(arm64::IP0)); + if (GetCompilerOptions().GenerateAnyDebugInfo()) { + *debug_name = "MethodCallThunk"; + } + break; + } + case linker::LinkerPatch::Type::kBakerReadBarrierBranch: { + DCHECK_EQ(patch.GetBakerCustomValue2(), 0u); + CompileBakerReadBarrierThunk(assembler, patch.GetBakerCustomValue1(), debug_name); + break; + } + default: + LOG(FATAL) << "Unexpected patch type " << patch.GetType(); + UNREACHABLE(); + } + + // Ensure we emit the literal pool if any. + assembler.FinalizeCode(); + code->resize(assembler.CodeSize()); + MemoryRegion code_region(code->data(), code->size()); + assembler.FinalizeInstructions(code_region); +} + vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateUint32Literal(uint32_t value) { return uint32_literals_.GetOrCreate( value, @@ -4737,7 +4513,7 @@ void InstructionCodeGeneratorARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDir DCHECK(!invoke->IsStaticWithExplicitClinitCheck()); if (TryGenerateIntrinsicCode(invoke, codegen_)) { - codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__); return; } @@ -4750,12 +4526,12 @@ void InstructionCodeGeneratorARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDir invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation()); } - codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__); } void InstructionCodeGeneratorARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) { if (TryGenerateIntrinsicCode(invoke, codegen_)) { - codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__); return; } @@ -4767,7 +4543,7 @@ void InstructionCodeGeneratorARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) { DCHECK(!codegen_->IsLeafMethod()); } - codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__); } HLoadClass::LoadKind CodeGeneratorARM64::GetSupportedLoadClassKind( @@ -4779,14 +4555,14 @@ HLoadClass::LoadKind CodeGeneratorARM64::GetSupportedLoadClassKind( case HLoadClass::LoadKind::kReferrersClass: break; case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: - case HLoadClass::LoadKind::kBootImageClassTable: + case HLoadClass::LoadKind::kBootImageRelRo: case HLoadClass::LoadKind::kBssEntry: DCHECK(!Runtime::Current()->UseJitCompilation()); break; + case HLoadClass::LoadKind::kJitBootImageAddress: case HLoadClass::LoadKind::kJitTableAddress: DCHECK(Runtime::Current()->UseJitCompilation()); break; - case HLoadClass::LoadKind::kBootImageAddress: case HLoadClass::LoadKind::kRuntimeCall: break; } @@ -4822,13 +4598,7 @@ void LocationsBuilderARM64::VisitLoadClass(HLoadClass* cls) { if (cls->GetLoadKind() == HLoadClass::LoadKind::kBssEntry) { if (!kUseReadBarrier || kUseBakerReadBarrier) { // Rely on the type resolution or initialization and marking to save everything we need. - RegisterSet caller_saves = RegisterSet::Empty(); - InvokeRuntimeCallingConvention calling_convention; - caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode())); - DCHECK_EQ(calling_convention.GetRegisterAt(0).GetCode(), - RegisterFrom(calling_convention.GetReturnLocation(DataType::Type::kReference), - DataType::Type::kReference).GetCode()); - locations->SetCustomSlowPathCallerSaves(caller_saves); + locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves()); } else { // For non-Baker read barrier we have a temp-clobbering call. } @@ -4841,7 +4611,7 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SA HLoadClass::LoadKind load_kind = cls->GetLoadKind(); if (load_kind == HLoadClass::LoadKind::kRuntimeCall) { codegen_->GenerateLoadClassRuntimeCall(cls); - codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__); return; } DCHECK(!cls->NeedsAccessCheck()); @@ -4859,12 +4629,12 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SA DCHECK(!cls->MustGenerateClinitCheck()); // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ Register current_method = InputRegisterAt(cls, 0); - GenerateGcRootFieldLoad(cls, - out_loc, - current_method, - ArtMethod::DeclaringClassOffset().Int32Value(), - /* fixup_label */ nullptr, - read_barrier_option); + codegen_->GenerateGcRootFieldLoad(cls, + out_loc, + current_method, + ArtMethod::DeclaringClassOffset().Int32Value(), + /* fixup_label= */ nullptr, + read_barrier_option); break; } case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: { @@ -4880,31 +4650,16 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SA codegen_->EmitAddPlaceholder(add_label, out.X(), out.X()); break; } - case HLoadClass::LoadKind::kBootImageAddress: { - DCHECK_EQ(read_barrier_option, kWithoutReadBarrier); - uint32_t address = dchecked_integral_cast<uint32_t>( - reinterpret_cast<uintptr_t>(cls->GetClass().Get())); - DCHECK_NE(address, 0u); - __ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(address)); - break; - } - case HLoadClass::LoadKind::kBootImageClassTable: { + case HLoadClass::LoadKind::kBootImageRelRo: { DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); - // Add ADRP with its PC-relative type patch. - const DexFile& dex_file = cls->GetDexFile(); - dex::TypeIndex type_index = cls->GetTypeIndex(); - vixl::aarch64::Label* adrp_label = codegen_->NewBootImageTypePatch(dex_file, type_index); + uint32_t boot_image_offset = codegen_->GetBootImageOffset(cls); + // Add ADRP with its PC-relative .data.bimg.rel.ro patch. + vixl::aarch64::Label* adrp_label = codegen_->NewBootImageRelRoPatch(boot_image_offset); codegen_->EmitAdrpPlaceholder(adrp_label, out.X()); - // Add LDR with its PC-relative type patch. + // Add LDR with its PC-relative .data.bimg.rel.ro patch. vixl::aarch64::Label* ldr_label = - codegen_->NewBootImageTypePatch(dex_file, type_index, adrp_label); + codegen_->NewBootImageRelRoPatch(boot_image_offset, adrp_label); codegen_->EmitLdrOffsetPlaceholder(ldr_label, out.W(), out.X()); - // Extract the reference from the slot data, i.e. clear the hash bits. - int32_t masked_hash = ClassTable::TableSlot::MaskHash( - ComputeModifiedUtf8Hash(dex_file.StringByTypeIdx(type_index))); - if (masked_hash != 0) { - __ Sub(out.W(), out.W(), Operand(masked_hash)); - } break; } case HLoadClass::LoadKind::kBssEntry: { @@ -4914,29 +4669,36 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SA vixl::aarch64::Register temp = XRegisterFrom(out_loc); vixl::aarch64::Label* adrp_label = codegen_->NewBssEntryTypePatch(dex_file, type_index); codegen_->EmitAdrpPlaceholder(adrp_label, temp); - // Add LDR with its PC-relative Class patch. + // Add LDR with its PC-relative Class .bss entry patch. vixl::aarch64::Label* ldr_label = codegen_->NewBssEntryTypePatch(dex_file, type_index, adrp_label); // /* GcRoot<mirror::Class> */ out = *(base_address + offset) /* PC-relative */ - GenerateGcRootFieldLoad(cls, - out_loc, - temp, - /* offset placeholder */ 0u, - ldr_label, - read_barrier_option); + codegen_->GenerateGcRootFieldLoad(cls, + out_loc, + temp, + /* offset placeholder */ 0u, + ldr_label, + read_barrier_option); generate_null_check = true; break; } + case HLoadClass::LoadKind::kJitBootImageAddress: { + DCHECK_EQ(read_barrier_option, kWithoutReadBarrier); + uint32_t address = reinterpret_cast32<uint32_t>(cls->GetClass().Get()); + DCHECK_NE(address, 0u); + __ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(address)); + break; + } case HLoadClass::LoadKind::kJitTableAddress: { __ Ldr(out, codegen_->DeduplicateJitClassLiteral(cls->GetDexFile(), cls->GetTypeIndex(), cls->GetClass())); - GenerateGcRootFieldLoad(cls, - out_loc, - out.X(), - /* offset */ 0, - /* fixup_label */ nullptr, - read_barrier_option); + codegen_->GenerateGcRootFieldLoad(cls, + out_loc, + out.X(), + /* offset= */ 0, + /* fixup_label= */ nullptr, + read_barrier_option); break; } case HLoadClass::LoadKind::kRuntimeCall: @@ -4948,8 +4710,8 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SA bool do_clinit = cls->MustGenerateClinitCheck(); if (generate_null_check || do_clinit) { DCHECK(cls->CanCallRuntime()); - SlowPathCodeARM64* slow_path = new (codegen_->GetScopedAllocator()) LoadClassSlowPathARM64( - cls, cls, cls->GetDexPc(), do_clinit); + SlowPathCodeARM64* slow_path = + new (codegen_->GetScopedAllocator()) LoadClassSlowPathARM64(cls, cls); codegen_->AddSlowPath(slow_path); if (generate_null_check) { __ Cbz(out, slow_path->GetEntryLabel()); @@ -4959,10 +4721,30 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SA } else { __ Bind(slow_path->GetExitLabel()); } - codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__); } } +void LocationsBuilderARM64::VisitLoadMethodHandle(HLoadMethodHandle* load) { + InvokeRuntimeCallingConvention calling_convention; + Location location = LocationFrom(calling_convention.GetRegisterAt(0)); + CodeGenerator::CreateLoadMethodHandleRuntimeCallLocationSummary(load, location, location); +} + +void InstructionCodeGeneratorARM64::VisitLoadMethodHandle(HLoadMethodHandle* load) { + codegen_->GenerateLoadMethodHandleRuntimeCall(load); +} + +void LocationsBuilderARM64::VisitLoadMethodType(HLoadMethodType* load) { + InvokeRuntimeCallingConvention calling_convention; + Location location = LocationFrom(calling_convention.GetRegisterAt(0)); + CodeGenerator::CreateLoadMethodTypeRuntimeCallLocationSummary(load, location, location); +} + +void InstructionCodeGeneratorARM64::VisitLoadMethodType(HLoadMethodType* load) { + codegen_->GenerateLoadMethodTypeRuntimeCall(load); +} + static MemOperand GetExceptionTlsAddress() { return MemOperand(tr, Thread::ExceptionOffset<kArm64PointerSize>().Int32Value()); } @@ -4989,14 +4771,14 @@ HLoadString::LoadKind CodeGeneratorARM64::GetSupportedLoadStringKind( HLoadString::LoadKind desired_string_load_kind) { switch (desired_string_load_kind) { case HLoadString::LoadKind::kBootImageLinkTimePcRelative: - case HLoadString::LoadKind::kBootImageInternTable: + case HLoadString::LoadKind::kBootImageRelRo: case HLoadString::LoadKind::kBssEntry: DCHECK(!Runtime::Current()->UseJitCompilation()); break; + case HLoadString::LoadKind::kJitBootImageAddress: case HLoadString::LoadKind::kJitTableAddress: DCHECK(Runtime::Current()->UseJitCompilation()); break; - case HLoadString::LoadKind::kBootImageAddress: case HLoadString::LoadKind::kRuntimeCall: break; } @@ -5014,13 +4796,7 @@ void LocationsBuilderARM64::VisitLoadString(HLoadString* load) { if (load->GetLoadKind() == HLoadString::LoadKind::kBssEntry) { if (!kUseReadBarrier || kUseBakerReadBarrier) { // Rely on the pResolveString and marking to save everything we need. - RegisterSet caller_saves = RegisterSet::Empty(); - InvokeRuntimeCallingConvention calling_convention; - caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode())); - DCHECK_EQ(calling_convention.GetRegisterAt(0).GetCode(), - RegisterFrom(calling_convention.GetReturnLocation(DataType::Type::kReference), - DataType::Type::kReference).GetCode()); - locations->SetCustomSlowPathCallerSaves(caller_saves); + locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves()); } else { // For non-Baker read barrier we have a temp-clobbering call. } @@ -5048,23 +4824,15 @@ void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) NO_THREAD codegen_->EmitAddPlaceholder(add_label, out.X(), out.X()); return; } - case HLoadString::LoadKind::kBootImageAddress: { - uint32_t address = dchecked_integral_cast<uint32_t>( - reinterpret_cast<uintptr_t>(load->GetString().Get())); - DCHECK_NE(address, 0u); - __ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(address)); - return; - } - case HLoadString::LoadKind::kBootImageInternTable: { + case HLoadString::LoadKind::kBootImageRelRo: { DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); - // Add ADRP with its PC-relative String patch. - const DexFile& dex_file = load->GetDexFile(); - const dex::StringIndex string_index = load->GetStringIndex(); - vixl::aarch64::Label* adrp_label = codegen_->NewBootImageStringPatch(dex_file, string_index); + // Add ADRP with its PC-relative .data.bimg.rel.ro patch. + uint32_t boot_image_offset = codegen_->GetBootImageOffset(load); + vixl::aarch64::Label* adrp_label = codegen_->NewBootImageRelRoPatch(boot_image_offset); codegen_->EmitAdrpPlaceholder(adrp_label, out.X()); - // Add LDR with its PC-relative String patch. + // Add LDR with its PC-relative .data.bimg.rel.ro patch. vixl::aarch64::Label* ldr_label = - codegen_->NewBootImageStringPatch(dex_file, string_index, adrp_label); + codegen_->NewBootImageRelRoPatch(boot_image_offset, adrp_label); codegen_->EmitLdrOffsetPlaceholder(ldr_label, out.W(), out.X()); return; } @@ -5072,38 +4840,43 @@ void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) NO_THREAD // Add ADRP with its PC-relative String .bss entry patch. const DexFile& dex_file = load->GetDexFile(); const dex::StringIndex string_index = load->GetStringIndex(); - DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); Register temp = XRegisterFrom(out_loc); vixl::aarch64::Label* adrp_label = codegen_->NewStringBssEntryPatch(dex_file, string_index); codegen_->EmitAdrpPlaceholder(adrp_label, temp); - // Add LDR with its .bss entry String patch. + // Add LDR with its PC-relative String .bss entry patch. vixl::aarch64::Label* ldr_label = codegen_->NewStringBssEntryPatch(dex_file, string_index, adrp_label); // /* GcRoot<mirror::String> */ out = *(base_address + offset) /* PC-relative */ - GenerateGcRootFieldLoad(load, - out_loc, - temp, - /* offset placeholder */ 0u, - ldr_label, - kCompilerReadBarrierOption); + codegen_->GenerateGcRootFieldLoad(load, + out_loc, + temp, + /* offset placeholder */ 0u, + ldr_label, + kCompilerReadBarrierOption); SlowPathCodeARM64* slow_path = new (codegen_->GetScopedAllocator()) LoadStringSlowPathARM64(load); codegen_->AddSlowPath(slow_path); __ Cbz(out.X(), slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); - codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__); + return; + } + case HLoadString::LoadKind::kJitBootImageAddress: { + uint32_t address = reinterpret_cast32<uint32_t>(load->GetString().Get()); + DCHECK_NE(address, 0u); + __ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(address)); return; } case HLoadString::LoadKind::kJitTableAddress: { __ Ldr(out, codegen_->DeduplicateJitStringLiteral(load->GetDexFile(), load->GetStringIndex(), load->GetString())); - GenerateGcRootFieldLoad(load, - out_loc, - out.X(), - /* offset */ 0, - /* fixup_label */ nullptr, - kCompilerReadBarrierOption); + codegen_->GenerateGcRootFieldLoad(load, + out_loc, + out.X(), + /* offset= */ 0, + /* fixup_label= */ nullptr, + kCompilerReadBarrierOption); return; } default: @@ -5116,7 +4889,7 @@ void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) NO_THREAD __ Mov(calling_convention.GetRegisterAt(0).W(), load->GetStringIndex().index_); codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc()); CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); - codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__); } void LocationsBuilderARM64::VisitLongConstant(HLongConstant* constant) { @@ -5144,7 +4917,7 @@ void InstructionCodeGeneratorARM64::VisitMonitorOperation(HMonitorOperation* ins } else { CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>(); } - codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__); } void LocationsBuilderARM64::VisitMul(HMul* mul) { @@ -5235,50 +5008,25 @@ void LocationsBuilderARM64::VisitNewArray(HNewArray* instruction) { } void InstructionCodeGeneratorARM64::VisitNewArray(HNewArray* instruction) { - // Note: if heap poisoning is enabled, the entry point takes cares - // of poisoning the reference. - QuickEntrypointEnum entrypoint = - CodeGenerator::GetArrayAllocationEntrypoint(instruction->GetLoadClass()->GetClass()); + // Note: if heap poisoning is enabled, the entry point takes care of poisoning the reference. + QuickEntrypointEnum entrypoint = CodeGenerator::GetArrayAllocationEntrypoint(instruction); codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc()); CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>(); - codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__); } void LocationsBuilderARM64::VisitNewInstance(HNewInstance* instruction) { LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( instruction, LocationSummary::kCallOnMainOnly); InvokeRuntimeCallingConvention calling_convention; - if (instruction->IsStringAlloc()) { - locations->AddTemp(LocationFrom(kArtMethodRegister)); - } else { - locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); - } + locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kReference)); } void InstructionCodeGeneratorARM64::VisitNewInstance(HNewInstance* instruction) { - // Note: if heap poisoning is enabled, the entry point takes cares - // of poisoning the reference. - if (instruction->IsStringAlloc()) { - // String is allocated through StringFactory. Call NewEmptyString entry point. - Location temp = instruction->GetLocations()->GetTemp(0); - MemberOffset code_offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize); - __ Ldr(XRegisterFrom(temp), MemOperand(tr, QUICK_ENTRY_POINT(pNewEmptyString))); - __ Ldr(lr, MemOperand(XRegisterFrom(temp), code_offset.Int32Value())); - - { - // Ensure the pc position is recorded immediately after the `blr` instruction. - ExactAssemblyScope eas(GetVIXLAssembler(), - kInstructionSize, - CodeBufferCheckScope::kExactSize); - __ blr(lr); - codegen_->RecordPcInfo(instruction, instruction->GetDexPc()); - } - } else { - codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc()); - CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); - } - codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); + codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc()); + CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__); } void LocationsBuilderARM64::VisitNot(HNot* instruction) { @@ -5319,7 +5067,7 @@ void CodeGeneratorARM64::GenerateImplicitNullCheck(HNullCheck* instruction) { return; } { - // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted. + // Ensure that between load and RecordPcInfo there are no pools emitted. EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); Location obj = instruction->GetLocations()->InAt(0); __ Ldr(wzr, HeapOperandFrom(obj, Offset(0))); @@ -5433,13 +5181,75 @@ void LocationsBuilderARM64::VisitRem(HRem* rem) { } } +void InstructionCodeGeneratorARM64::GenerateIntRemForPower2Denom(HRem *instruction) { + int64_t imm = Int64FromLocation(instruction->GetLocations()->InAt(1)); + uint64_t abs_imm = static_cast<uint64_t>(AbsOrMin(imm)); + DCHECK(IsPowerOfTwo(abs_imm)) << abs_imm; + + Register out = OutputRegister(instruction); + Register dividend = InputRegisterAt(instruction, 0); + + if (abs_imm == 2) { + __ Cmp(dividend, 0); + __ And(out, dividend, 1); + __ Csneg(out, out, out, ge); + } else { + UseScratchRegisterScope temps(GetVIXLAssembler()); + Register temp = temps.AcquireSameSizeAs(out); + + __ Negs(temp, dividend); + __ And(out, dividend, abs_imm - 1); + __ And(temp, temp, abs_imm - 1); + __ Csneg(out, out, temp, mi); + } +} + +void InstructionCodeGeneratorARM64::GenerateIntRemForConstDenom(HRem *instruction) { + int64_t imm = Int64FromLocation(instruction->GetLocations()->InAt(1)); + + if (imm == 0) { + // Do not generate anything. + // DivZeroCheck would prevent any code to be executed. + return; + } + + if (IsPowerOfTwo(AbsOrMin(imm))) { + // Cases imm == -1 or imm == 1 are handled in constant folding by + // InstructionWithAbsorbingInputSimplifier. + // If the cases have survided till code generation they are handled in + // GenerateIntRemForPower2Denom becauses -1 and 1 are the power of 2 (2^0). + // The correct code is generated for them, just more instructions. + GenerateIntRemForPower2Denom(instruction); + } else { + DCHECK(imm < -2 || imm > 2) << imm; + GenerateDivRemWithAnyConstant(instruction); + } +} + +void InstructionCodeGeneratorARM64::GenerateIntRem(HRem* instruction) { + DCHECK(DataType::IsIntOrLongType(instruction->GetResultType())) + << instruction->GetResultType(); + + if (instruction->GetLocations()->InAt(1).IsConstant()) { + GenerateIntRemForConstDenom(instruction); + } else { + Register out = OutputRegister(instruction); + Register dividend = InputRegisterAt(instruction, 0); + Register divisor = InputRegisterAt(instruction, 1); + UseScratchRegisterScope temps(GetVIXLAssembler()); + Register temp = temps.AcquireSameSizeAs(out); + __ Sdiv(temp, dividend, divisor); + __ Msub(out, temp, divisor, dividend); + } +} + void InstructionCodeGeneratorARM64::VisitRem(HRem* rem) { DataType::Type type = rem->GetResultType(); switch (type) { case DataType::Type::kInt32: case DataType::Type::kInt64: { - GenerateDivRemIntegral(rem); + GenerateIntRem(rem); break; } @@ -5462,6 +5272,62 @@ void InstructionCodeGeneratorARM64::VisitRem(HRem* rem) { } } +void LocationsBuilderARM64::VisitMin(HMin* min) { + HandleBinaryOp(min); +} + +void InstructionCodeGeneratorARM64::VisitMin(HMin* min) { + HandleBinaryOp(min); +} + +void LocationsBuilderARM64::VisitMax(HMax* max) { + HandleBinaryOp(max); +} + +void InstructionCodeGeneratorARM64::VisitMax(HMax* max) { + HandleBinaryOp(max); +} + +void LocationsBuilderARM64::VisitAbs(HAbs* abs) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs); + switch (abs->GetResultType()) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); + break; + default: + LOG(FATAL) << "Unexpected type for abs operation " << abs->GetResultType(); + } +} + +void InstructionCodeGeneratorARM64::VisitAbs(HAbs* abs) { + switch (abs->GetResultType()) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: { + Register in_reg = InputRegisterAt(abs, 0); + Register out_reg = OutputRegister(abs); + __ Cmp(in_reg, Operand(0)); + __ Cneg(out_reg, in_reg, lt); + break; + } + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: { + FPRegister in_reg = InputFPRegisterAt(abs, 0); + FPRegister out_reg = OutputFPRegister(abs); + __ Fabs(out_reg, in_reg); + break; + } + default: + LOG(FATAL) << "Unexpected type for abs operation " << abs->GetResultType(); + } +} + void LocationsBuilderARM64::VisitConstructorFence(HConstructorFence* constructor_fence) { constructor_fence->SetLocations(nullptr); } @@ -5635,7 +5501,7 @@ void InstructionCodeGeneratorARM64::VisitSuspendCheck(HSuspendCheck* instruction return; } GenerateSuspendCheck(instruction, nullptr); - codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__); } void LocationsBuilderARM64::VisitThrow(HThrow* instruction) { @@ -5848,8 +5714,8 @@ void InstructionCodeGeneratorARM64::GenerateReferenceLoadOneRegister( out_reg, offset, maybe_temp, - /* needs_null_check */ false, - /* use_load_acquire */ false); + /* needs_null_check= */ false, + /* use_load_acquire= */ false); } else { // Load with slow path based read barrier. // Save the value of `out` into `maybe_temp` before overwriting it @@ -5889,8 +5755,8 @@ void InstructionCodeGeneratorARM64::GenerateReferenceLoadTwoRegisters( obj_reg, offset, maybe_temp, - /* needs_null_check */ false, - /* use_load_acquire */ false); + /* needs_null_check= */ false, + /* use_load_acquire= */ false); } else { // Load with slow path based read barrier. // /* HeapReference<Object> */ out = *(obj + offset) @@ -5905,7 +5771,7 @@ void InstructionCodeGeneratorARM64::GenerateReferenceLoadTwoRegisters( } } -void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad( +void CodeGeneratorARM64::GenerateGcRootFieldLoad( HInstruction* instruction, Location root, Register obj, @@ -5919,77 +5785,39 @@ void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad( if (kUseBakerReadBarrier) { // Fast path implementation of art::ReadBarrier::BarrierForRoot when // Baker's read barrier are used. - if (kBakerReadBarrierLinkTimeThunksEnableForGcRoots && - !Runtime::Current()->UseJitCompilation()) { - // Query `art::Thread::Current()->GetIsGcMarking()` (stored in - // the Marking Register) to decide whether we need to enter - // the slow path to mark the GC root. - // - // We use link-time generated thunks for the slow path. That thunk - // checks the reference and jumps to the entrypoint if needed. - // - // lr = &return_address; - // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load. - // if (mr) { // Thread::Current()->GetIsGcMarking() - // goto gc_root_thunk<root_reg>(lr) - // } - // return_address: - - UseScratchRegisterScope temps(GetVIXLAssembler()); - DCHECK(temps.IsAvailable(ip0)); - DCHECK(temps.IsAvailable(ip1)); - temps.Exclude(ip0, ip1); - uint32_t custom_data = - linker::Arm64RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg.GetCode()); - vixl::aarch64::Label* cbnz_label = codegen_->NewBakerReadBarrierPatch(custom_data); - - EmissionCheckScope guard(GetVIXLAssembler(), 3 * vixl::aarch64::kInstructionSize); - vixl::aarch64::Label return_address; - __ adr(lr, &return_address); - if (fixup_label != nullptr) { - __ Bind(fixup_label); - } - static_assert(BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET == -8, - "GC root LDR must be 2 instruction (8B) before the return address label."); - __ ldr(root_reg, MemOperand(obj.X(), offset)); - __ Bind(cbnz_label); - __ cbnz(mr, static_cast<int64_t>(0)); // Placeholder, patched at link-time. - __ Bind(&return_address); - } else { - // Query `art::Thread::Current()->GetIsGcMarking()` (stored in - // the Marking Register) to decide whether we need to enter - // the slow path to mark the GC root. - // - // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load. - // if (mr) { // Thread::Current()->GetIsGcMarking() - // // Slow path. - // entrypoint = Thread::Current()->pReadBarrierMarkReg ## root.reg() - // root = entrypoint(root); // root = ReadBarrier::Mark(root); // Entry point call. - // } - - // Slow path marking the GC root `root`. The entrypoint will - // be loaded by the slow path code. - SlowPathCodeARM64* slow_path = - new (codegen_->GetScopedAllocator()) ReadBarrierMarkSlowPathARM64(instruction, root); - codegen_->AddSlowPath(slow_path); - // /* GcRoot<mirror::Object> */ root = *(obj + offset) - if (fixup_label == nullptr) { - __ Ldr(root_reg, MemOperand(obj, offset)); - } else { - codegen_->EmitLdrOffsetPlaceholder(fixup_label, root_reg, obj); - } - static_assert( - sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>), - "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> " - "have different sizes."); - static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t), - "art::mirror::CompressedReference<mirror::Object> and int32_t " - "have different sizes."); - - __ Cbnz(mr, slow_path->GetEntryLabel()); - __ Bind(slow_path->GetExitLabel()); + // Query `art::Thread::Current()->GetIsGcMarking()` (stored in + // the Marking Register) to decide whether we need to enter + // the slow path to mark the GC root. + // + // We use shared thunks for the slow path; shared within the method + // for JIT, across methods for AOT. That thunk checks the reference + // and jumps to the entrypoint if needed. + // + // lr = &return_address; + // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load. + // if (mr) { // Thread::Current()->GetIsGcMarking() + // goto gc_root_thunk<root_reg>(lr) + // } + // return_address: + + UseScratchRegisterScope temps(GetVIXLAssembler()); + DCHECK(temps.IsAvailable(ip0)); + DCHECK(temps.IsAvailable(ip1)); + temps.Exclude(ip0, ip1); + uint32_t custom_data = EncodeBakerReadBarrierGcRootData(root_reg.GetCode()); + + ExactAssemblyScope guard(GetVIXLAssembler(), 3 * vixl::aarch64::kInstructionSize); + vixl::aarch64::Label return_address; + __ adr(lr, &return_address); + if (fixup_label != nullptr) { + __ bind(fixup_label); } + static_assert(BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET == -8, + "GC root LDR must be 2 instructions (8B) before the return address label."); + __ ldr(root_reg, MemOperand(obj.X(), offset)); + EmitBakerReadBarrierCbnz(custom_data); + __ bind(&return_address); } else { // GC root loaded through a slow path for read barriers other // than Baker's. @@ -5997,10 +5825,10 @@ void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad( if (fixup_label == nullptr) { __ Add(root_reg.X(), obj.X(), offset); } else { - codegen_->EmitAddPlaceholder(fixup_label, root_reg.X(), obj.X()); + EmitAddPlaceholder(fixup_label, root_reg.X(), obj.X()); } // /* mirror::Object* */ root = root->Read() - codegen_->GenerateReadBarrierForRootSlow(instruction, root, root); + GenerateReadBarrierForRootSlow(instruction, root, root); } } else { // Plain GC root load with no read barrier. @@ -6008,108 +5836,134 @@ void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad( if (fixup_label == nullptr) { __ Ldr(root_reg, MemOperand(obj, offset)); } else { - codegen_->EmitLdrOffsetPlaceholder(fixup_label, root_reg, obj.X()); + EmitLdrOffsetPlaceholder(fixup_label, root_reg, obj.X()); } // Note that GC roots are not affected by heap poisoning, thus we // do not have to unpoison `root_reg` here. } - codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); + MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__); +} + +void CodeGeneratorARM64::GenerateUnsafeCasOldValueMovWithBakerReadBarrier( + vixl::aarch64::Register marked, + vixl::aarch64::Register old_value) { + DCHECK(kEmitCompilerReadBarrier); + DCHECK(kUseBakerReadBarrier); + + // Similar to the Baker RB path in GenerateGcRootFieldLoad(), with a MOV instead of LDR. + uint32_t custom_data = EncodeBakerReadBarrierGcRootData(marked.GetCode()); + + ExactAssemblyScope guard(GetVIXLAssembler(), 3 * vixl::aarch64::kInstructionSize); + vixl::aarch64::Label return_address; + __ adr(lr, &return_address); + static_assert(BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET == -8, + "GC root LDR must be 2 instructions (8B) before the return address label."); + __ mov(marked, old_value); + EmitBakerReadBarrierCbnz(custom_data); + __ bind(&return_address); } void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, Location ref, - Register obj, - uint32_t offset, - Location maybe_temp, + vixl::aarch64::Register obj, + const vixl::aarch64::MemOperand& src, bool needs_null_check, bool use_load_acquire) { DCHECK(kEmitCompilerReadBarrier); DCHECK(kUseBakerReadBarrier); - if (kBakerReadBarrierLinkTimeThunksEnableForFields && - !use_load_acquire && - !Runtime::Current()->UseJitCompilation()) { - // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the - // Marking Register) to decide whether we need to enter the slow - // path to mark the reference. Then, in the slow path, check the - // gray bit in the lock word of the reference's holder (`obj`) to - // decide whether to mark `ref` or not. - // - // We use link-time generated thunks for the slow path. That thunk checks - // the holder and jumps to the entrypoint if needed. If the holder is not - // gray, it creates a fake dependency and returns to the LDR instruction. - // - // lr = &gray_return_address; - // if (mr) { // Thread::Current()->GetIsGcMarking() - // goto field_thunk<holder_reg, base_reg>(lr) - // } - // not_gray_return_address: - // // Original reference load. If the offset is too large to fit - // // into LDR, we use an adjusted base register here. - // HeapReference<mirror::Object> reference = *(obj+offset); - // gray_return_address: - - DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>)); - Register base = obj; - if (offset >= kReferenceLoadMinFarOffset) { - DCHECK(maybe_temp.IsRegister()); - base = WRegisterFrom(maybe_temp); - static_assert(IsPowerOfTwo(kReferenceLoadMinFarOffset), "Expecting a power of 2."); - __ Add(base, obj, Operand(offset & ~(kReferenceLoadMinFarOffset - 1u))); - offset &= (kReferenceLoadMinFarOffset - 1u); - } - UseScratchRegisterScope temps(GetVIXLAssembler()); - DCHECK(temps.IsAvailable(ip0)); - DCHECK(temps.IsAvailable(ip1)); - temps.Exclude(ip0, ip1); - uint32_t custom_data = linker::Arm64RelativePatcher::EncodeBakerReadBarrierFieldData( - base.GetCode(), - obj.GetCode()); - vixl::aarch64::Label* cbnz_label = NewBakerReadBarrierPatch(custom_data); + // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the + // Marking Register) to decide whether we need to enter the slow + // path to mark the reference. Then, in the slow path, check the + // gray bit in the lock word of the reference's holder (`obj`) to + // decide whether to mark `ref` or not. + // + // We use shared thunks for the slow path; shared within the method + // for JIT, across methods for AOT. That thunk checks the holder + // and jumps to the entrypoint if needed. If the holder is not gray, + // it creates a fake dependency and returns to the LDR instruction. + // + // lr = &gray_return_address; + // if (mr) { // Thread::Current()->GetIsGcMarking() + // goto field_thunk<holder_reg, base_reg, use_load_acquire>(lr) + // } + // not_gray_return_address: + // // Original reference load. If the offset is too large to fit + // // into LDR, we use an adjusted base register here. + // HeapReference<mirror::Object> reference = *(obj+offset); + // gray_return_address: - { - EmissionCheckScope guard(GetVIXLAssembler(), - (kPoisonHeapReferences ? 4u : 3u) * vixl::aarch64::kInstructionSize); - vixl::aarch64::Label return_address; - __ adr(lr, &return_address); - __ Bind(cbnz_label); - __ cbnz(mr, static_cast<int64_t>(0)); // Placeholder, patched at link-time. - static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4), - "Field LDR must be 1 instruction (4B) before the return address label; " - " 2 instructions (8B) for heap poisoning."); - Register ref_reg = RegisterFrom(ref, DataType::Type::kReference); - __ ldr(ref_reg, MemOperand(base.X(), offset)); - if (needs_null_check) { - MaybeRecordImplicitNullCheck(instruction); - } - GetAssembler()->MaybeUnpoisonHeapReference(ref_reg); - __ Bind(&return_address); + DCHECK(src.GetAddrMode() == vixl::aarch64::Offset); + DCHECK_ALIGNED(src.GetOffset(), sizeof(mirror::HeapReference<mirror::Object>)); + + UseScratchRegisterScope temps(GetVIXLAssembler()); + DCHECK(temps.IsAvailable(ip0)); + DCHECK(temps.IsAvailable(ip1)); + temps.Exclude(ip0, ip1); + uint32_t custom_data = use_load_acquire + ? EncodeBakerReadBarrierAcquireData(src.GetBaseRegister().GetCode(), obj.GetCode()) + : EncodeBakerReadBarrierFieldData(src.GetBaseRegister().GetCode(), obj.GetCode()); + + { + ExactAssemblyScope guard(GetVIXLAssembler(), + (kPoisonHeapReferences ? 4u : 3u) * vixl::aarch64::kInstructionSize); + vixl::aarch64::Label return_address; + __ adr(lr, &return_address); + EmitBakerReadBarrierCbnz(custom_data); + static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4), + "Field LDR must be 1 instruction (4B) before the return address label; " + " 2 instructions (8B) for heap poisoning."); + Register ref_reg = RegisterFrom(ref, DataType::Type::kReference); + if (use_load_acquire) { + DCHECK_EQ(src.GetOffset(), 0); + __ ldar(ref_reg, src); + } else { + __ ldr(ref_reg, src); } - MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__, /* temp_loc */ LocationFrom(ip1)); - return; + if (needs_null_check) { + MaybeRecordImplicitNullCheck(instruction); + } + // Unpoison the reference explicitly if needed. MaybeUnpoisonHeapReference() uses + // macro instructions disallowed in ExactAssemblyScope. + if (kPoisonHeapReferences) { + __ neg(ref_reg, Operand(ref_reg)); + } + __ bind(&return_address); } - - // /* HeapReference<Object> */ ref = *(obj + offset) - Register temp = WRegisterFrom(maybe_temp); - Location no_index = Location::NoLocation(); - size_t no_scale_factor = 0u; - GenerateReferenceLoadWithBakerReadBarrier(instruction, - ref, - obj, - offset, - no_index, - no_scale_factor, - temp, - needs_null_check, - use_load_acquire); + MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__, /* temp_loc= */ LocationFrom(ip1)); } -void CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction, +void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, + Location ref, + Register obj, + uint32_t offset, + Location maybe_temp, + bool needs_null_check, + bool use_load_acquire) { + DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>)); + Register base = obj; + if (use_load_acquire) { + DCHECK(maybe_temp.IsRegister()); + base = WRegisterFrom(maybe_temp); + __ Add(base, obj, offset); + offset = 0u; + } else if (offset >= kReferenceLoadMinFarOffset) { + DCHECK(maybe_temp.IsRegister()); + base = WRegisterFrom(maybe_temp); + static_assert(IsPowerOfTwo(kReferenceLoadMinFarOffset), "Expecting a power of 2."); + __ Add(base, obj, Operand(offset & ~(kReferenceLoadMinFarOffset - 1u))); + offset &= (kReferenceLoadMinFarOffset - 1u); + } + MemOperand src(base.X(), offset); + GenerateFieldLoadWithBakerReadBarrier( + instruction, ref, obj, src, needs_null_check, use_load_acquire); +} + +void CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier(HArrayGet* instruction, Location ref, Register obj, uint32_t data_offset, Location index, - Register temp, bool needs_null_check) { DCHECK(kEmitCompilerReadBarrier); DCHECK(kUseBakerReadBarrier); @@ -6119,267 +5973,72 @@ void CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* ins "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); size_t scale_factor = DataType::SizeShift(DataType::Type::kReference); - if (kBakerReadBarrierLinkTimeThunksEnableForArrays && - !Runtime::Current()->UseJitCompilation()) { - // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the - // Marking Register) to decide whether we need to enter the slow - // path to mark the reference. Then, in the slow path, check the - // gray bit in the lock word of the reference's holder (`obj`) to - // decide whether to mark `ref` or not. - // - // We use link-time generated thunks for the slow path. That thunk checks - // the holder and jumps to the entrypoint if needed. If the holder is not - // gray, it creates a fake dependency and returns to the LDR instruction. - // - // lr = &gray_return_address; - // if (mr) { // Thread::Current()->GetIsGcMarking() - // goto array_thunk<base_reg>(lr) - // } - // not_gray_return_address: - // // Original reference load. If the offset is too large to fit - // // into LDR, we use an adjusted base register here. - // HeapReference<mirror::Object> reference = data[index]; - // gray_return_address: - - DCHECK(index.IsValid()); - Register index_reg = RegisterFrom(index, DataType::Type::kInt32); - Register ref_reg = RegisterFrom(ref, DataType::Type::kReference); - - UseScratchRegisterScope temps(GetVIXLAssembler()); - DCHECK(temps.IsAvailable(ip0)); - DCHECK(temps.IsAvailable(ip1)); - temps.Exclude(ip0, ip1); - uint32_t custom_data = - linker::Arm64RelativePatcher::EncodeBakerReadBarrierArrayData(temp.GetCode()); - vixl::aarch64::Label* cbnz_label = NewBakerReadBarrierPatch(custom_data); - - __ Add(temp.X(), obj.X(), Operand(data_offset)); - { - EmissionCheckScope guard(GetVIXLAssembler(), - (kPoisonHeapReferences ? 4u : 3u) * vixl::aarch64::kInstructionSize); - vixl::aarch64::Label return_address; - __ adr(lr, &return_address); - __ Bind(cbnz_label); - __ cbnz(mr, static_cast<int64_t>(0)); // Placeholder, patched at link-time. - static_assert(BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4), - "Array LDR must be 1 instruction (4B) before the return address label; " - " 2 instructions (8B) for heap poisoning."); - __ ldr(ref_reg, MemOperand(temp.X(), index_reg.X(), LSL, scale_factor)); - DCHECK(!needs_null_check); // The thunk cannot handle the null check. - GetAssembler()->MaybeUnpoisonHeapReference(ref_reg); - __ Bind(&return_address); - } - MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__, /* temp_loc */ LocationFrom(ip1)); - return; - } - - // Array cells are never volatile variables, therefore array loads - // never use Load-Acquire instructions on ARM64. - const bool use_load_acquire = false; - - // /* HeapReference<Object> */ ref = - // *(obj + data_offset + index * sizeof(HeapReference<Object>)) - GenerateReferenceLoadWithBakerReadBarrier(instruction, - ref, - obj, - data_offset, - index, - scale_factor, - temp, - needs_null_check, - use_load_acquire); -} - -void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, - Location ref, - Register obj, - uint32_t offset, - Location index, - size_t scale_factor, - Register temp, - bool needs_null_check, - bool use_load_acquire) { - DCHECK(kEmitCompilerReadBarrier); - DCHECK(kUseBakerReadBarrier); - // If we are emitting an array load, we should not be using a - // Load Acquire instruction. In other words: - // `instruction->IsArrayGet()` => `!use_load_acquire`. - DCHECK(!instruction->IsArrayGet() || !use_load_acquire); - // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the // Marking Register) to decide whether we need to enter the slow // path to mark the reference. Then, in the slow path, check the // gray bit in the lock word of the reference's holder (`obj`) to // decide whether to mark `ref` or not. // - // if (mr) { // Thread::Current()->GetIsGcMarking() - // // Slow path. - // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); - // lfence; // Load fence or artificial data dependency to prevent load-load reordering - // HeapReference<mirror::Object> ref = *src; // Original reference load. - // bool is_gray = (rb_state == ReadBarrier::GrayState()); - // if (is_gray) { - // entrypoint = Thread::Current()->pReadBarrierMarkReg ## root.reg() - // ref = entrypoint(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call. - // } - // } else { - // HeapReference<mirror::Object> ref = *src; // Original reference load. - // } - - // Slow path marking the object `ref` when the GC is marking. The - // entrypoint will be loaded by the slow path code. - SlowPathCodeARM64* slow_path = - new (GetScopedAllocator()) LoadReferenceWithBakerReadBarrierSlowPathARM64( - instruction, - ref, - obj, - offset, - index, - scale_factor, - needs_null_check, - use_load_acquire, - temp); - AddSlowPath(slow_path); - - __ Cbnz(mr, slow_path->GetEntryLabel()); - // Fast path: the GC is not marking: just load the reference. - GenerateRawReferenceLoad( - instruction, ref, obj, offset, index, scale_factor, needs_null_check, use_load_acquire); - __ Bind(slow_path->GetExitLabel()); - MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); -} - -void CodeGeneratorARM64::UpdateReferenceFieldWithBakerReadBarrier(HInstruction* instruction, - Location ref, - Register obj, - Location field_offset, - Register temp, - bool needs_null_check, - bool use_load_acquire) { - DCHECK(kEmitCompilerReadBarrier); - DCHECK(kUseBakerReadBarrier); - // If we are emitting an array load, we should not be using a - // Load Acquire instruction. In other words: - // `instruction->IsArrayGet()` => `!use_load_acquire`. - DCHECK(!instruction->IsArrayGet() || !use_load_acquire); - - // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the - // Marking Register) to decide whether we need to enter the slow - // path to update the reference field within `obj`. Then, in the - // slow path, check the gray bit in the lock word of the reference's - // holder (`obj`) to decide whether to mark `ref` and update the - // field or not. + // We use shared thunks for the slow path; shared within the method + // for JIT, across methods for AOT. That thunk checks the holder + // and jumps to the entrypoint if needed. If the holder is not gray, + // it creates a fake dependency and returns to the LDR instruction. // - // if (mr) { // Thread::Current()->GetIsGcMarking() - // // Slow path. - // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); - // lfence; // Load fence or artificial data dependency to prevent load-load reordering - // HeapReference<mirror::Object> ref = *(obj + field_offset); // Reference load. - // bool is_gray = (rb_state == ReadBarrier::GrayState()); - // if (is_gray) { - // old_ref = ref; - // entrypoint = Thread::Current()->pReadBarrierMarkReg ## root.reg() - // ref = entrypoint(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call. - // compareAndSwapObject(obj, field_offset, old_ref, ref); + // lr = &gray_return_address; + // if (mr) { // Thread::Current()->GetIsGcMarking() + // goto array_thunk<base_reg>(lr) // } - // } - - // Slow path updating the object reference at address `obj + field_offset` - // when the GC is marking. The entrypoint will be loaded by the slow path code. - SlowPathCodeARM64* slow_path = - new (GetScopedAllocator()) LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64( - instruction, - ref, - obj, - /* offset */ 0u, - /* index */ field_offset, - /* scale_factor */ 0u /* "times 1" */, - needs_null_check, - use_load_acquire, - temp); - AddSlowPath(slow_path); - - __ Cbnz(mr, slow_path->GetEntryLabel()); - // Fast path: the GC is not marking: nothing to do (the field is - // up-to-date, and we don't need to load the reference). - __ Bind(slow_path->GetExitLabel()); - MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); -} - -void CodeGeneratorARM64::GenerateRawReferenceLoad(HInstruction* instruction, - Location ref, - Register obj, - uint32_t offset, - Location index, - size_t scale_factor, - bool needs_null_check, - bool use_load_acquire) { - DCHECK(obj.IsW()); - DataType::Type type = DataType::Type::kReference; - Register ref_reg = RegisterFrom(ref, type); + // not_gray_return_address: + // // Original reference load. If the offset is too large to fit + // // into LDR, we use an adjusted base register here. + // HeapReference<mirror::Object> reference = data[index]; + // gray_return_address: - // If needed, vixl::EmissionCheckScope guards are used to ensure - // that no pools are emitted between the load (macro) instruction - // and MaybeRecordImplicitNullCheck. + DCHECK(index.IsValid()); + Register index_reg = RegisterFrom(index, DataType::Type::kInt32); + Register ref_reg = RegisterFrom(ref, DataType::Type::kReference); - if (index.IsValid()) { - // Load types involving an "index": ArrayGet, - // UnsafeGetObject/UnsafeGetObjectVolatile and UnsafeCASObject - // intrinsics. - if (use_load_acquire) { - // UnsafeGetObjectVolatile intrinsic case. - // Register `index` is not an index in an object array, but an - // offset to an object reference field within object `obj`. - DCHECK(instruction->IsInvoke()) << instruction->DebugName(); - DCHECK(instruction->GetLocations()->Intrinsified()); - DCHECK(instruction->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile) - << instruction->AsInvoke()->GetIntrinsic(); - DCHECK_EQ(offset, 0u); - DCHECK_EQ(scale_factor, 0u); - DCHECK_EQ(needs_null_check, false); - // /* HeapReference<mirror::Object> */ ref = *(obj + index) - MemOperand field = HeapOperand(obj, XRegisterFrom(index)); - LoadAcquire(instruction, ref_reg, field, /* needs_null_check */ false); - } else { - // ArrayGet and UnsafeGetObject and UnsafeCASObject intrinsics cases. - // /* HeapReference<mirror::Object> */ ref = *(obj + offset + (index << scale_factor)) - if (index.IsConstant()) { - uint32_t computed_offset = offset + (Int64ConstantFrom(index) << scale_factor); - EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); - Load(type, ref_reg, HeapOperand(obj, computed_offset)); - if (needs_null_check) { - MaybeRecordImplicitNullCheck(instruction); - } - } else { - UseScratchRegisterScope temps(GetVIXLAssembler()); - Register temp = temps.AcquireW(); - __ Add(temp, obj, offset); - { - EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); - Load(type, ref_reg, HeapOperand(temp, XRegisterFrom(index), LSL, scale_factor)); - if (needs_null_check) { - MaybeRecordImplicitNullCheck(instruction); - } - } - } + UseScratchRegisterScope temps(GetVIXLAssembler()); + DCHECK(temps.IsAvailable(ip0)); + DCHECK(temps.IsAvailable(ip1)); + temps.Exclude(ip0, ip1); + + Register temp; + if (instruction->GetArray()->IsIntermediateAddress()) { + // We do not need to compute the intermediate address from the array: the + // input instruction has done it already. See the comment in + // `TryExtractArrayAccessAddress()`. + if (kIsDebugBuild) { + HIntermediateAddress* interm_addr = instruction->GetArray()->AsIntermediateAddress(); + DCHECK_EQ(interm_addr->GetOffset()->AsIntConstant()->GetValueAsUint64(), data_offset); } + temp = obj; } else { - // /* HeapReference<mirror::Object> */ ref = *(obj + offset) - MemOperand field = HeapOperand(obj, offset); - if (use_load_acquire) { - // Implicit null checks are handled by CodeGeneratorARM64::LoadAcquire. - LoadAcquire(instruction, ref_reg, field, needs_null_check); - } else { - EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); - Load(type, ref_reg, field); - if (needs_null_check) { - MaybeRecordImplicitNullCheck(instruction); - } - } + temp = WRegisterFrom(instruction->GetLocations()->GetTemp(0)); + __ Add(temp.X(), obj.X(), Operand(data_offset)); } - // Object* ref = ref_addr->AsMirrorPtr() - GetAssembler()->MaybeUnpoisonHeapReference(ref_reg); + uint32_t custom_data = EncodeBakerReadBarrierArrayData(temp.GetCode()); + + { + ExactAssemblyScope guard(GetVIXLAssembler(), + (kPoisonHeapReferences ? 4u : 3u) * vixl::aarch64::kInstructionSize); + vixl::aarch64::Label return_address; + __ adr(lr, &return_address); + EmitBakerReadBarrierCbnz(custom_data); + static_assert(BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4), + "Array LDR must be 1 instruction (4B) before the return address label; " + " 2 instructions (8B) for heap poisoning."); + __ ldr(ref_reg, MemOperand(temp.X(), index_reg.X(), LSL, scale_factor)); + DCHECK(!needs_null_check); // The thunk cannot handle the null check. + // Unpoison the reference explicitly if needed. MaybeUnpoisonHeapReference() uses + // macro instructions disallowed in ExactAssemblyScope. + if (kPoisonHeapReferences) { + __ neg(ref_reg, Operand(ref_reg)); + } + __ bind(&return_address); + } + MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__, /* temp_loc= */ LocationFrom(ip1)); } void CodeGeneratorARM64::MaybeGenerateMarkingRegisterCheck(int code, Location temp_loc) { @@ -6510,5 +6169,193 @@ void CodeGeneratorARM64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_ #undef __ #undef QUICK_ENTRY_POINT +#define __ assembler.GetVIXLAssembler()-> + +static void EmitGrayCheckAndFastPath(arm64::Arm64Assembler& assembler, + vixl::aarch64::Register base_reg, + vixl::aarch64::MemOperand& lock_word, + vixl::aarch64::Label* slow_path, + vixl::aarch64::Label* throw_npe = nullptr) { + // Load the lock word containing the rb_state. + __ Ldr(ip0.W(), lock_word); + // Given the numeric representation, it's enough to check the low bit of the rb_state. + static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0"); + static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); + __ Tbnz(ip0.W(), LockWord::kReadBarrierStateShift, slow_path); + static_assert( + BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET, + "Field and array LDR offsets must be the same to reuse the same code."); + // To throw NPE, we return to the fast path; the artificial dependence below does not matter. + if (throw_npe != nullptr) { + __ Bind(throw_npe); + } + // Adjust the return address back to the LDR (1 instruction; 2 for heap poisoning). + static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4), + "Field LDR must be 1 instruction (4B) before the return address label; " + " 2 instructions (8B) for heap poisoning."); + __ Add(lr, lr, BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET); + // Introduce a dependency on the lock_word including rb_state, + // to prevent load-load reordering, and without using + // a memory barrier (which would be more expensive). + __ Add(base_reg, base_reg, Operand(ip0, LSR, 32)); + __ Br(lr); // And return back to the function. + // Note: The fake dependency is unnecessary for the slow path. +} + +// Load the read barrier introspection entrypoint in register `entrypoint`. +static void LoadReadBarrierMarkIntrospectionEntrypoint(arm64::Arm64Assembler& assembler, + vixl::aarch64::Register entrypoint) { + // entrypoint = Thread::Current()->pReadBarrierMarkReg16, i.e. pReadBarrierMarkIntrospection. + DCHECK_EQ(ip0.GetCode(), 16u); + const int32_t entry_point_offset = + Thread::ReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ip0.GetCode()); + __ Ldr(entrypoint, MemOperand(tr, entry_point_offset)); +} + +void CodeGeneratorARM64::CompileBakerReadBarrierThunk(Arm64Assembler& assembler, + uint32_t encoded_data, + /*out*/ std::string* debug_name) { + BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data); + switch (kind) { + case BakerReadBarrierKind::kField: + case BakerReadBarrierKind::kAcquire: { + auto base_reg = + Register::GetXRegFromCode(BakerReadBarrierFirstRegField::Decode(encoded_data)); + CheckValidReg(base_reg.GetCode()); + auto holder_reg = + Register::GetXRegFromCode(BakerReadBarrierSecondRegField::Decode(encoded_data)); + CheckValidReg(holder_reg.GetCode()); + UseScratchRegisterScope temps(assembler.GetVIXLAssembler()); + temps.Exclude(ip0, ip1); + // If base_reg differs from holder_reg, the offset was too large and we must have emitted + // an explicit null check before the load. Otherwise, for implicit null checks, we need to + // null-check the holder as we do not necessarily do that check before going to the thunk. + vixl::aarch64::Label throw_npe_label; + vixl::aarch64::Label* throw_npe = nullptr; + if (GetCompilerOptions().GetImplicitNullChecks() && holder_reg.Is(base_reg)) { + throw_npe = &throw_npe_label; + __ Cbz(holder_reg.W(), throw_npe); + } + // Check if the holder is gray and, if not, add fake dependency to the base register + // and return to the LDR instruction to load the reference. Otherwise, use introspection + // to load the reference and call the entrypoint that performs further checks on the + // reference and marks it if needed. + vixl::aarch64::Label slow_path; + MemOperand lock_word(holder_reg, mirror::Object::MonitorOffset().Int32Value()); + EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path, throw_npe); + __ Bind(&slow_path); + if (kind == BakerReadBarrierKind::kField) { + MemOperand ldr_address(lr, BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET); + __ Ldr(ip0.W(), ldr_address); // Load the LDR (immediate) unsigned offset. + LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ip1); + __ Ubfx(ip0.W(), ip0.W(), 10, 12); // Extract the offset. + __ Ldr(ip0.W(), MemOperand(base_reg, ip0, LSL, 2)); // Load the reference. + } else { + DCHECK(kind == BakerReadBarrierKind::kAcquire); + DCHECK(!base_reg.Is(holder_reg)); + LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ip1); + __ Ldar(ip0.W(), MemOperand(base_reg)); + } + // Do not unpoison. With heap poisoning enabled, the entrypoint expects a poisoned reference. + __ Br(ip1); // Jump to the entrypoint. + break; + } + case BakerReadBarrierKind::kArray: { + auto base_reg = + Register::GetXRegFromCode(BakerReadBarrierFirstRegField::Decode(encoded_data)); + CheckValidReg(base_reg.GetCode()); + DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg, + BakerReadBarrierSecondRegField::Decode(encoded_data)); + UseScratchRegisterScope temps(assembler.GetVIXLAssembler()); + temps.Exclude(ip0, ip1); + vixl::aarch64::Label slow_path; + int32_t data_offset = + mirror::Array::DataOffset(Primitive::ComponentSize(Primitive::kPrimNot)).Int32Value(); + MemOperand lock_word(base_reg, mirror::Object::MonitorOffset().Int32Value() - data_offset); + DCHECK_LT(lock_word.GetOffset(), 0); + EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path); + __ Bind(&slow_path); + MemOperand ldr_address(lr, BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET); + __ Ldr(ip0.W(), ldr_address); // Load the LDR (register) unsigned offset. + LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ip1); + __ Ubfx(ip0, ip0, 16, 6); // Extract the index register, plus 32 (bit 21 is set). + __ Bfi(ip1, ip0, 3, 6); // Insert ip0 to the entrypoint address to create + // a switch case target based on the index register. + __ Mov(ip0, base_reg); // Move the base register to ip0. + __ Br(ip1); // Jump to the entrypoint's array switch case. + break; + } + case BakerReadBarrierKind::kGcRoot: { + // Check if the reference needs to be marked and if so (i.e. not null, not marked yet + // and it does not have a forwarding address), call the correct introspection entrypoint; + // otherwise return the reference (or the extracted forwarding address). + // There is no gray bit check for GC roots. + auto root_reg = + Register::GetWRegFromCode(BakerReadBarrierFirstRegField::Decode(encoded_data)); + CheckValidReg(root_reg.GetCode()); + DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg, + BakerReadBarrierSecondRegField::Decode(encoded_data)); + UseScratchRegisterScope temps(assembler.GetVIXLAssembler()); + temps.Exclude(ip0, ip1); + vixl::aarch64::Label return_label, not_marked, forwarding_address; + __ Cbz(root_reg, &return_label); + MemOperand lock_word(root_reg.X(), mirror::Object::MonitorOffset().Int32Value()); + __ Ldr(ip0.W(), lock_word); + __ Tbz(ip0.W(), LockWord::kMarkBitStateShift, ¬_marked); + __ Bind(&return_label); + __ Br(lr); + __ Bind(¬_marked); + __ Tst(ip0.W(), Operand(ip0.W(), LSL, 1)); + __ B(&forwarding_address, mi); + LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ip1); + // Adjust the art_quick_read_barrier_mark_introspection address in IP1 to + // art_quick_read_barrier_mark_introspection_gc_roots. + __ Add(ip1, ip1, Operand(BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRYPOINT_OFFSET)); + __ Mov(ip0.W(), root_reg); + __ Br(ip1); + __ Bind(&forwarding_address); + __ Lsl(root_reg, ip0.W(), LockWord::kForwardingAddressShift); + __ Br(lr); + break; + } + default: + LOG(FATAL) << "Unexpected kind: " << static_cast<uint32_t>(kind); + UNREACHABLE(); + } + + // For JIT, the slow path is considered part of the compiled method, + // so JIT should pass null as `debug_name`. Tests may not have a runtime. + DCHECK(Runtime::Current() == nullptr || + !Runtime::Current()->UseJitCompilation() || + debug_name == nullptr); + if (debug_name != nullptr && GetCompilerOptions().GenerateAnyDebugInfo()) { + std::ostringstream oss; + oss << "BakerReadBarrierThunk"; + switch (kind) { + case BakerReadBarrierKind::kField: + oss << "Field_r" << BakerReadBarrierFirstRegField::Decode(encoded_data) + << "_r" << BakerReadBarrierSecondRegField::Decode(encoded_data); + break; + case BakerReadBarrierKind::kAcquire: + oss << "Acquire_r" << BakerReadBarrierFirstRegField::Decode(encoded_data) + << "_r" << BakerReadBarrierSecondRegField::Decode(encoded_data); + break; + case BakerReadBarrierKind::kArray: + oss << "Array_r" << BakerReadBarrierFirstRegField::Decode(encoded_data); + DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg, + BakerReadBarrierSecondRegField::Decode(encoded_data)); + break; + case BakerReadBarrierKind::kGcRoot: + oss << "GcRoot_r" << BakerReadBarrierFirstRegField::Decode(encoded_data); + DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg, + BakerReadBarrierSecondRegField::Decode(encoded_data)); + break; + } + *debug_name = oss.str(); + } +} + +#undef __ + } // namespace arm64 } // namespace art diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index 0654046de5..ada5742fc0 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -17,7 +17,7 @@ #ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_ARM64_H_ #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_ARM64_H_ -#include "arch/arm64/quick_method_frame_info_arm64.h" +#include "base/bit_field.h" #include "code_generator.h" #include "common_arm64.h" #include "dex/dex_file_types.h" @@ -36,6 +36,11 @@ #pragma GCC diagnostic pop namespace art { + +namespace linker { +class Arm64RelativePatcherTest; +} // namespace linker + namespace arm64 { class CodeGeneratorARM64; @@ -87,6 +92,16 @@ const vixl::aarch64::CPURegList runtime_reserved_core_registers = ((kEmitCompilerReadBarrier && kUseBakerReadBarrier) ? mr : vixl::aarch64::NoCPUReg), vixl::aarch64::lr); +// Some instructions have special requirements for a temporary, for example +// LoadClass/kBssEntry and LoadString/kBssEntry for Baker read barrier require +// temp that's not an R0 (to avoid an extra move) and Baker read barrier field +// loads with large offsets need a fixed register to limit the number of link-time +// thunks we generate. For these and similar cases, we want to reserve a specific +// register that's neither callee-save nor an argument register. We choose x15. +inline Location FixedTempLocation() { + return Location::RegisterLocation(vixl::aarch64::x15.GetCode()); +} + // Callee-save registers AAPCS64, without x19 (Thread Register) (nor // x20 (Marking Register) when emitting Baker read barriers). const vixl::aarch64::CPURegList callee_saved_core_registers( @@ -110,8 +125,8 @@ class SlowPathCodeARM64 : public SlowPathCode { vixl::aarch64::Label* GetEntryLabel() { return &entry_label_; } vixl::aarch64::Label* GetExitLabel() { return &exit_label_; } - void SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) OVERRIDE; - void RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) OVERRIDE; + void SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) override; + void RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) override; private: vixl::aarch64::Label entry_label_; @@ -201,11 +216,11 @@ class InvokeDexCallingConventionVisitorARM64 : public InvokeDexCallingConvention InvokeDexCallingConventionVisitorARM64() {} virtual ~InvokeDexCallingConventionVisitorARM64() {} - Location GetNextLocation(DataType::Type type) OVERRIDE; - Location GetReturnLocation(DataType::Type return_type) const OVERRIDE { + Location GetNextLocation(DataType::Type type) override; + Location GetReturnLocation(DataType::Type return_type) const override { return calling_convention.GetReturnLocation(return_type); } - Location GetMethodLocation() const OVERRIDE; + Location GetMethodLocation() const override; private: InvokeDexCallingConvention calling_convention; @@ -217,22 +232,22 @@ class FieldAccessCallingConventionARM64 : public FieldAccessCallingConvention { public: FieldAccessCallingConventionARM64() {} - Location GetObjectLocation() const OVERRIDE { + Location GetObjectLocation() const override { return helpers::LocationFrom(vixl::aarch64::x1); } - Location GetFieldIndexLocation() const OVERRIDE { + Location GetFieldIndexLocation() const override { return helpers::LocationFrom(vixl::aarch64::x0); } - Location GetReturnLocation(DataType::Type type ATTRIBUTE_UNUSED) const OVERRIDE { + Location GetReturnLocation(DataType::Type type ATTRIBUTE_UNUSED) const override { return helpers::LocationFrom(vixl::aarch64::x0); } Location GetSetValueLocation(DataType::Type type ATTRIBUTE_UNUSED, - bool is_instance) const OVERRIDE { + bool is_instance) const override { return is_instance ? helpers::LocationFrom(vixl::aarch64::x2) : helpers::LocationFrom(vixl::aarch64::x1); } - Location GetFpuLocation(DataType::Type type ATTRIBUTE_UNUSED) const OVERRIDE { + Location GetFpuLocation(DataType::Type type ATTRIBUTE_UNUSED) const override { return helpers::LocationFrom(vixl::aarch64::d0); } @@ -245,7 +260,7 @@ class InstructionCodeGeneratorARM64 : public InstructionCodeGenerator { InstructionCodeGeneratorARM64(HGraph* graph, CodeGeneratorARM64* codegen); #define DECLARE_VISIT_INSTRUCTION(name, super) \ - void Visit##name(H##name* instr) OVERRIDE; + void Visit##name(H##name* instr) override; FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION) FOR_EACH_CONCRETE_INSTRUCTION_ARM64(DECLARE_VISIT_INSTRUCTION) @@ -253,7 +268,7 @@ class InstructionCodeGeneratorARM64 : public InstructionCodeGenerator { #undef DECLARE_VISIT_INSTRUCTION - void VisitInstruction(HInstruction* instruction) OVERRIDE { + void VisitInstruction(HInstruction* instruction) override { LOG(FATAL) << "Unreachable instruction " << instruction->DebugName() << " (id " << instruction->GetId() << ")"; } @@ -264,6 +279,8 @@ class InstructionCodeGeneratorARM64 : public InstructionCodeGenerator { private: void GenerateClassInitializationCheck(SlowPathCodeARM64* slow_path, vixl::aarch64::Register class_reg); + void GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, + vixl::aarch64::Register temp); void GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor); void HandleBinaryOp(HBinaryOperation* instr); @@ -303,17 +320,6 @@ class InstructionCodeGeneratorARM64 : public InstructionCodeGenerator { uint32_t offset, Location maybe_temp, ReadBarrierOption read_barrier_option); - // Generate a GC root reference load: - // - // root <- *(obj + offset) - // - // while honoring read barriers based on read_barrier_option. - void GenerateGcRootFieldLoad(HInstruction* instruction, - Location root, - vixl::aarch64::Register obj, - uint32_t offset, - vixl::aarch64::Label* fixup_label, - ReadBarrierOption read_barrier_option); // Generate a floating-point comparison. void GenerateFcmp(HInstruction* instruction); @@ -326,7 +332,12 @@ class InstructionCodeGeneratorARM64 : public InstructionCodeGenerator { void DivRemOneOrMinusOne(HBinaryOperation* instruction); void DivRemByPowerOfTwo(HBinaryOperation* instruction); void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction); - void GenerateDivRemIntegral(HBinaryOperation* instruction); + void GenerateIntDiv(HDiv* instruction); + void GenerateIntDivForConstDenom(HDiv *instruction); + void GenerateIntDivForPower2Denom(HDiv *instruction); + void GenerateIntRem(HRem* instruction); + void GenerateIntRemForConstDenom(HRem *instruction); + void GenerateIntRemForPower2Denom(HRem *instruction); void HandleGoto(HInstruction* got, HBasicBlock* successor); vixl::aarch64::MemOperand VecAddress( @@ -349,7 +360,7 @@ class LocationsBuilderARM64 : public HGraphVisitor { : HGraphVisitor(graph), codegen_(codegen) {} #define DECLARE_VISIT_INSTRUCTION(name, super) \ - void Visit##name(H##name* instr) OVERRIDE; + void Visit##name(H##name* instr) override; FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION) FOR_EACH_CONCRETE_INSTRUCTION_ARM64(DECLARE_VISIT_INSTRUCTION) @@ -357,7 +368,7 @@ class LocationsBuilderARM64 : public HGraphVisitor { #undef DECLARE_VISIT_INSTRUCTION - void VisitInstruction(HInstruction* instruction) OVERRIDE { + void VisitInstruction(HInstruction* instruction) override { LOG(FATAL) << "Unreachable instruction " << instruction->DebugName() << " (id " << instruction->GetId() << ")"; } @@ -382,11 +393,11 @@ class ParallelMoveResolverARM64 : public ParallelMoveResolverNoSwap { : ParallelMoveResolverNoSwap(allocator), codegen_(codegen), vixl_temps_() {} protected: - void PrepareForEmitNativeCode() OVERRIDE; - void FinishEmitNativeCode() OVERRIDE; - Location AllocateScratchLocationFor(Location::Kind kind) OVERRIDE; - void FreeScratchLocation(Location loc) OVERRIDE; - void EmitMove(size_t index) OVERRIDE; + void PrepareForEmitNativeCode() override; + void FinishEmitNativeCode() override; + Location AllocateScratchLocationFor(Location::Kind kind) override; + void FreeScratchLocation(Location loc) override; + void EmitMove(size_t index) override; private: Arm64Assembler* GetAssembler() const; @@ -403,44 +414,43 @@ class ParallelMoveResolverARM64 : public ParallelMoveResolverNoSwap { class CodeGeneratorARM64 : public CodeGenerator { public: CodeGeneratorARM64(HGraph* graph, - const Arm64InstructionSetFeatures& isa_features, const CompilerOptions& compiler_options, OptimizingCompilerStats* stats = nullptr); virtual ~CodeGeneratorARM64() {} - void GenerateFrameEntry() OVERRIDE; - void GenerateFrameExit() OVERRIDE; + void GenerateFrameEntry() override; + void GenerateFrameExit() override; vixl::aarch64::CPURegList GetFramePreservedCoreRegisters() const; vixl::aarch64::CPURegList GetFramePreservedFPRegisters() const; - void Bind(HBasicBlock* block) OVERRIDE; + void Bind(HBasicBlock* block) override; vixl::aarch64::Label* GetLabelOf(HBasicBlock* block) { block = FirstNonEmptyBlock(block); return &(block_labels_[block->GetBlockId()]); } - size_t GetWordSize() const OVERRIDE { + size_t GetWordSize() const override { return kArm64WordSize; } - size_t GetFloatingPointSpillSlotSize() const OVERRIDE { + size_t GetFloatingPointSpillSlotSize() const override { return GetGraph()->HasSIMD() ? 2 * kArm64WordSize // 16 bytes == 2 arm64 words for each spill : 1 * kArm64WordSize; // 8 bytes == 1 arm64 words for each spill } - uintptr_t GetAddressOf(HBasicBlock* block) OVERRIDE { + uintptr_t GetAddressOf(HBasicBlock* block) override { vixl::aarch64::Label* block_entry_label = GetLabelOf(block); DCHECK(block_entry_label->IsBound()); return block_entry_label->GetLocation(); } - HGraphVisitor* GetLocationBuilder() OVERRIDE { return &location_builder_; } - HGraphVisitor* GetInstructionVisitor() OVERRIDE { return &instruction_visitor_; } - Arm64Assembler* GetAssembler() OVERRIDE { return &assembler_; } - const Arm64Assembler& GetAssembler() const OVERRIDE { return assembler_; } + HGraphVisitor* GetLocationBuilder() override { return &location_builder_; } + HGraphVisitor* GetInstructionVisitor() override { return &instruction_visitor_; } + Arm64Assembler* GetAssembler() override { return &assembler_; } + const Arm64Assembler& GetAssembler() const override { return assembler_; } vixl::aarch64::MacroAssembler* GetVIXLAssembler() { return GetAssembler()->GetVIXLAssembler(); } // Emit a write barrier. @@ -452,12 +462,12 @@ class CodeGeneratorARM64 : public CodeGenerator { // Register allocation. - void SetupBlockedRegisters() const OVERRIDE; + void SetupBlockedRegisters() const override; - size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; - size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; - size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; - size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; + size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) override; + size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) override; + size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) override; + size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) override; // The number of registers that can be allocated. The register allocator may // decide to reserve and not use a few of them. @@ -469,37 +479,35 @@ class CodeGeneratorARM64 : public CodeGenerator { static const int kNumberOfAllocatableFPRegisters = vixl::aarch64::kNumberOfFPRegisters; static constexpr int kNumberOfAllocatableRegisterPairs = 0; - void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE; - void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE; + void DumpCoreRegister(std::ostream& stream, int reg) const override; + void DumpFloatingPointRegister(std::ostream& stream, int reg) const override; - InstructionSet GetInstructionSet() const OVERRIDE { + InstructionSet GetInstructionSet() const override { return InstructionSet::kArm64; } - const Arm64InstructionSetFeatures& GetInstructionSetFeatures() const { - return isa_features_; - } + const Arm64InstructionSetFeatures& GetInstructionSetFeatures() const; - void Initialize() OVERRIDE { + void Initialize() override { block_labels_.resize(GetGraph()->GetBlocks().size()); } // We want to use the STP and LDP instructions to spill and restore registers for slow paths. // These instructions can only encode offsets that are multiples of the register size accessed. - uint32_t GetPreferredSlotsAlignment() const OVERRIDE { return vixl::aarch64::kXRegSizeInBytes; } + uint32_t GetPreferredSlotsAlignment() const override { return vixl::aarch64::kXRegSizeInBytes; } JumpTableARM64* CreateJumpTable(HPackedSwitch* switch_instr) { jump_tables_.emplace_back(new (GetGraph()->GetAllocator()) JumpTableARM64(switch_instr)); return jump_tables_.back().get(); } - void Finalize(CodeAllocator* allocator) OVERRIDE; + void Finalize(CodeAllocator* allocator) override; // Code generation helpers. void MoveConstant(vixl::aarch64::CPURegister destination, HConstant* constant); - void MoveConstant(Location destination, int32_t value) OVERRIDE; - void MoveLocation(Location dst, Location src, DataType::Type dst_type) OVERRIDE; - void AddLocationAsTemp(Location location, LocationSummary* locations) OVERRIDE; + void MoveConstant(Location destination, int32_t value) override; + void MoveLocation(Location dst, Location src, DataType::Type dst_type) override; + void AddLocationAsTemp(Location location, LocationSummary* locations) override; void Load(DataType::Type type, vixl::aarch64::CPURegister dst, @@ -521,7 +529,7 @@ class CodeGeneratorARM64 : public CodeGenerator { void InvokeRuntime(QuickEntrypointEnum entrypoint, HInstruction* instruction, uint32_t dex_pc, - SlowPathCode* slow_path = nullptr) OVERRIDE; + SlowPathCode* slow_path = nullptr) override; // Generate code to invoke a runtime entry point, but do not record // PC-related information in a stack map. @@ -529,39 +537,53 @@ class CodeGeneratorARM64 : public CodeGenerator { HInstruction* instruction, SlowPathCode* slow_path); - ParallelMoveResolverARM64* GetMoveResolver() OVERRIDE { return &move_resolver_; } + ParallelMoveResolverARM64* GetMoveResolver() override { return &move_resolver_; } - bool NeedsTwoRegisters(DataType::Type type ATTRIBUTE_UNUSED) const OVERRIDE { + bool NeedsTwoRegisters(DataType::Type type ATTRIBUTE_UNUSED) const override { return false; } // Check if the desired_string_load_kind is supported. If it is, return it, // otherwise return a fall-back kind that should be used instead. HLoadString::LoadKind GetSupportedLoadStringKind( - HLoadString::LoadKind desired_string_load_kind) OVERRIDE; + HLoadString::LoadKind desired_string_load_kind) override; // Check if the desired_class_load_kind is supported. If it is, return it, // otherwise return a fall-back kind that should be used instead. HLoadClass::LoadKind GetSupportedLoadClassKind( - HLoadClass::LoadKind desired_class_load_kind) OVERRIDE; + HLoadClass::LoadKind desired_class_load_kind) override; // Check if the desired_dispatch_info is supported. If it is, return it, // otherwise return a fall-back info that should be used instead. HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch( const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, - HInvokeStaticOrDirect* invoke) OVERRIDE; + ArtMethod* method) override; void GenerateStaticOrDirectCall( - HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE; + HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path = nullptr) override; void GenerateVirtualCall( - HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE; + HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) override; void MoveFromReturnRegister(Location trg ATTRIBUTE_UNUSED, - DataType::Type type ATTRIBUTE_UNUSED) OVERRIDE { + DataType::Type type ATTRIBUTE_UNUSED) override { UNIMPLEMENTED(FATAL); } - // Add a new PC-relative method patch for an instruction and return the label + // Add a new boot image intrinsic patch for an instruction and return the label + // to be bound before the instruction. The instruction will be either the + // ADRP (pass `adrp_label = null`) or the ADD (pass `adrp_label` pointing + // to the associated ADRP patch label). + vixl::aarch64::Label* NewBootImageIntrinsicPatch(uint32_t intrinsic_data, + vixl::aarch64::Label* adrp_label = nullptr); + + // Add a new boot image relocation patch for an instruction and return the label + // to be bound before the instruction. The instruction will be either the + // ADRP (pass `adrp_label = null`) or the LDR (pass `adrp_label` pointing + // to the associated ADRP patch label). + vixl::aarch64::Label* NewBootImageRelRoPatch(uint32_t boot_image_offset, + vixl::aarch64::Label* adrp_label = nullptr); + + // Add a new boot image method patch for an instruction and return the label // to be bound before the instruction. The instruction will be either the // ADRP (pass `adrp_label = null`) or the ADD (pass `adrp_label` pointing // to the associated ADRP patch label). @@ -575,7 +597,7 @@ class CodeGeneratorARM64 : public CodeGenerator { vixl::aarch64::Label* NewMethodBssEntryPatch(MethodReference target_method, vixl::aarch64::Label* adrp_label = nullptr); - // Add a new PC-relative type patch for an instruction and return the label + // Add a new boot image type patch for an instruction and return the label // to be bound before the instruction. The instruction will be either the // ADRP (pass `adrp_label = null`) or the ADD (pass `adrp_label` pointing // to the associated ADRP patch label). @@ -591,7 +613,7 @@ class CodeGeneratorARM64 : public CodeGenerator { dex::TypeIndex type_index, vixl::aarch64::Label* adrp_label = nullptr); - // Add a new PC-relative string patch for an instruction and return the label + // Add a new boot image string patch for an instruction and return the label // to be bound before the instruction. The instruction will be either the // ADRP (pass `adrp_label = null`) or the ADD (pass `adrp_label` pointing // to the associated ADRP patch label). @@ -607,9 +629,9 @@ class CodeGeneratorARM64 : public CodeGenerator { dex::StringIndex string_index, vixl::aarch64::Label* adrp_label = nullptr); - // Add a new baker read barrier patch and return the label to be bound - // before the CBNZ instruction. - vixl::aarch64::Label* NewBakerReadBarrierPatch(uint32_t custom_data); + // Emit the CBNZ instruction for baker read barrier and record + // the associated patch for AOT or slow path for JIT. + void EmitBakerReadBarrierCbnz(uint32_t custom_data); vixl::aarch64::Literal<uint32_t>* DeduplicateBootImageAddressLiteral(uint64_t address); vixl::aarch64::Literal<uint32_t>* DeduplicateJitStringLiteral(const DexFile& dex_file, @@ -627,10 +649,40 @@ class CodeGeneratorARM64 : public CodeGenerator { vixl::aarch64::Register out, vixl::aarch64::Register base); - void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) OVERRIDE; + void LoadBootImageAddress(vixl::aarch64::Register reg, uint32_t boot_image_reference); + void AllocateInstanceForIntrinsic(HInvokeStaticOrDirect* invoke, uint32_t boot_image_offset); - void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) OVERRIDE; + void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) override; + bool NeedsThunkCode(const linker::LinkerPatch& patch) const override; + void EmitThunkCode(const linker::LinkerPatch& patch, + /*out*/ ArenaVector<uint8_t>* code, + /*out*/ std::string* debug_name) override; + void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) override; + + // Generate a GC root reference load: + // + // root <- *(obj + offset) + // + // while honoring read barriers based on read_barrier_option. + void GenerateGcRootFieldLoad(HInstruction* instruction, + Location root, + vixl::aarch64::Register obj, + uint32_t offset, + vixl::aarch64::Label* fixup_label, + ReadBarrierOption read_barrier_option); + // Generate MOV for the `old_value` in UnsafeCASObject and mark it with Baker read barrier. + void GenerateUnsafeCasOldValueMovWithBakerReadBarrier(vixl::aarch64::Register marked, + vixl::aarch64::Register old_value); + // Fast path implementation of ReadBarrier::Barrier for a heap + // reference field load when Baker's read barriers are used. + // Overload suitable for Unsafe.getObject/-Volatile() intrinsic. + void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, + Location ref, + vixl::aarch64::Register obj, + const vixl::aarch64::MemOperand& src, + bool needs_null_check, + bool use_load_acquire); // Fast path implementation of ReadBarrier::Barrier for a heap // reference field load when Baker's read barriers are used. void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, @@ -642,58 +694,12 @@ class CodeGeneratorARM64 : public CodeGenerator { bool use_load_acquire); // Fast path implementation of ReadBarrier::Barrier for a heap // reference array load when Baker's read barriers are used. - void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction, + void GenerateArrayLoadWithBakerReadBarrier(HArrayGet* instruction, Location ref, vixl::aarch64::Register obj, uint32_t data_offset, Location index, - vixl::aarch64::Register temp, bool needs_null_check); - // Factored implementation, used by GenerateFieldLoadWithBakerReadBarrier, - // GenerateArrayLoadWithBakerReadBarrier and some intrinsics. - // - // Load the object reference located at the address - // `obj + offset + (index << scale_factor)`, held by object `obj`, into - // `ref`, and mark it if needed. - void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, - Location ref, - vixl::aarch64::Register obj, - uint32_t offset, - Location index, - size_t scale_factor, - vixl::aarch64::Register temp, - bool needs_null_check, - bool use_load_acquire); - - // Generate code checking whether the the reference field at the - // address `obj + field_offset`, held by object `obj`, needs to be - // marked, and if so, marking it and updating the field within `obj` - // with the marked value. - // - // This routine is used for the implementation of the - // UnsafeCASObject intrinsic with Baker read barriers. - // - // This method has a structure similar to - // GenerateReferenceLoadWithBakerReadBarrier, but note that argument - // `ref` is only as a temporary here, and thus its value should not - // be used afterwards. - void UpdateReferenceFieldWithBakerReadBarrier(HInstruction* instruction, - Location ref, - vixl::aarch64::Register obj, - Location field_offset, - vixl::aarch64::Register temp, - bool needs_null_check, - bool use_load_acquire); - - // Generate a heap reference load (with no read barrier). - void GenerateRawReferenceLoad(HInstruction* instruction, - Location ref, - vixl::aarch64::Register obj, - uint32_t offset, - Location index, - size_t scale_factor, - bool needs_null_check, - bool use_load_acquire); // Emit code checking the status of the Marking Register, and // aborting the program if MR does not match the value stored in the @@ -759,12 +765,78 @@ class CodeGeneratorARM64 : public CodeGenerator { // artReadBarrierForRootSlow. void GenerateReadBarrierForRootSlow(HInstruction* instruction, Location out, Location root); - void GenerateNop() OVERRIDE; + void GenerateNop() override; - void GenerateImplicitNullCheck(HNullCheck* instruction) OVERRIDE; - void GenerateExplicitNullCheck(HNullCheck* instruction) OVERRIDE; + void GenerateImplicitNullCheck(HNullCheck* instruction) override; + void GenerateExplicitNullCheck(HNullCheck* instruction) override; private: + // Encoding of thunk type and data for link-time generated thunks for Baker read barriers. + + enum class BakerReadBarrierKind : uint8_t { + kField, // Field get or array get with constant offset (i.e. constant index). + kAcquire, // Volatile field get. + kArray, // Array get with index in register. + kGcRoot, // GC root load. + kLast = kGcRoot + }; + + static constexpr uint32_t kBakerReadBarrierInvalidEncodedReg = /* sp/zr is invalid */ 31u; + + static constexpr size_t kBitsForBakerReadBarrierKind = + MinimumBitsToStore(static_cast<size_t>(BakerReadBarrierKind::kLast)); + static constexpr size_t kBakerReadBarrierBitsForRegister = + MinimumBitsToStore(kBakerReadBarrierInvalidEncodedReg); + using BakerReadBarrierKindField = + BitField<BakerReadBarrierKind, 0, kBitsForBakerReadBarrierKind>; + using BakerReadBarrierFirstRegField = + BitField<uint32_t, kBitsForBakerReadBarrierKind, kBakerReadBarrierBitsForRegister>; + using BakerReadBarrierSecondRegField = + BitField<uint32_t, + kBitsForBakerReadBarrierKind + kBakerReadBarrierBitsForRegister, + kBakerReadBarrierBitsForRegister>; + + static void CheckValidReg(uint32_t reg) { + DCHECK(reg < vixl::aarch64::lr.GetCode() && + reg != vixl::aarch64::ip0.GetCode() && + reg != vixl::aarch64::ip1.GetCode()) << reg; + } + + static inline uint32_t EncodeBakerReadBarrierFieldData(uint32_t base_reg, uint32_t holder_reg) { + CheckValidReg(base_reg); + CheckValidReg(holder_reg); + return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kField) | + BakerReadBarrierFirstRegField::Encode(base_reg) | + BakerReadBarrierSecondRegField::Encode(holder_reg); + } + + static inline uint32_t EncodeBakerReadBarrierAcquireData(uint32_t base_reg, uint32_t holder_reg) { + CheckValidReg(base_reg); + CheckValidReg(holder_reg); + DCHECK_NE(base_reg, holder_reg); + return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kAcquire) | + BakerReadBarrierFirstRegField::Encode(base_reg) | + BakerReadBarrierSecondRegField::Encode(holder_reg); + } + + static inline uint32_t EncodeBakerReadBarrierArrayData(uint32_t base_reg) { + CheckValidReg(base_reg); + return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kArray) | + BakerReadBarrierFirstRegField::Encode(base_reg) | + BakerReadBarrierSecondRegField::Encode(kBakerReadBarrierInvalidEncodedReg); + } + + static inline uint32_t EncodeBakerReadBarrierGcRootData(uint32_t root_reg) { + CheckValidReg(root_reg); + return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kGcRoot) | + BakerReadBarrierFirstRegField::Encode(root_reg) | + BakerReadBarrierSecondRegField::Encode(kBakerReadBarrierInvalidEncodedReg); + } + + void CompileBakerReadBarrierThunk(Arm64Assembler& assembler, + uint32_t encoded_data, + /*out*/ std::string* debug_name); + using Uint64ToLiteralMap = ArenaSafeMap<uint64_t, vixl::aarch64::Literal<uint64_t>*>; using Uint32ToLiteralMap = ArenaSafeMap<uint32_t, vixl::aarch64::Literal<uint32_t>*>; using StringToLiteralMap = ArenaSafeMap<StringReference, @@ -814,13 +886,13 @@ class CodeGeneratorARM64 : public CodeGenerator { InstructionCodeGeneratorARM64 instruction_visitor_; ParallelMoveResolverARM64 move_resolver_; Arm64Assembler assembler_; - const Arm64InstructionSetFeatures& isa_features_; // Deduplication map for 32-bit literals, used for non-patchable boot image addresses. Uint32ToLiteralMap uint32_literals_; // Deduplication map for 64-bit literals, used for non-patchable method address or method code. Uint64ToLiteralMap uint64_literals_; - // PC-relative method patch info for kBootImageLinkTimePcRelative. + // PC-relative method patch info for kBootImageLinkTimePcRelative/BootImageRelRo. + // Also used for type/string patches for kBootImageRelRo (same linker patch as for methods). ArenaDeque<PcRelativePatchInfo> boot_image_method_patches_; // PC-relative method patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> method_bss_entry_patches_; @@ -828,10 +900,12 @@ class CodeGeneratorARM64 : public CodeGenerator { ArenaDeque<PcRelativePatchInfo> boot_image_type_patches_; // PC-relative type patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_; - // PC-relative String patch info; type depends on configuration (intern table or boot image PIC). + // PC-relative String patch info for kBootImageLinkTimePcRelative. ArenaDeque<PcRelativePatchInfo> boot_image_string_patches_; // PC-relative String patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> string_bss_entry_patches_; + // PC-relative patch info for IntrinsicObjects. + ArenaDeque<PcRelativePatchInfo> boot_image_intrinsic_patches_; // Baker read barrier patch info. ArenaDeque<BakerReadBarrierPatchInfo> baker_read_barrier_patches_; @@ -840,6 +914,20 @@ class CodeGeneratorARM64 : public CodeGenerator { // Patches for class literals in JIT compiled code. TypeToLiteralMap jit_class_patches_; + // Baker read barrier slow paths, mapping custom data (uint32_t) to label. + // Wrap the label to work around vixl::aarch64::Label being non-copyable + // and non-moveable and as such unusable in ArenaSafeMap<>. + struct LabelWrapper { + LabelWrapper(const LabelWrapper& src) + : label() { + DCHECK(!src.label.IsLinked() && !src.label.IsBound()); + } + LabelWrapper() = default; + vixl::aarch64::Label label; + }; + ArenaSafeMap<uint32_t, LabelWrapper> jit_baker_read_barrier_slow_paths_; + + friend class linker::Arm64RelativePatcherTest; DISALLOW_COPY_AND_ASSIGN(CodeGeneratorARM64); }; diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc index 2452139d42..6469c6964a 100644 --- a/compiler/optimizing/code_generator_arm_vixl.cc +++ b/compiler/optimizing/code_generator_arm_vixl.cc @@ -27,9 +27,10 @@ #include "compiled_method.h" #include "entrypoints/quick/quick_entrypoints.h" #include "gc/accounting/card_table.h" +#include "gc/space/image_space.h" #include "heap_poisoning.h" +#include "intrinsics.h" #include "intrinsics_arm_vixl.h" -#include "linker/arm/relative_patcher_thumb2.h" #include "linker/linker_patch.h" #include "mirror/array-inl.h" #include "mirror/class-inl.h" @@ -47,7 +48,6 @@ using namespace vixl32; // NOLINT(build/namespaces) using helpers::DRegisterFrom; using helpers::DWARFReg; -using helpers::HighDRegisterFrom; using helpers::HighRegisterFrom; using helpers::InputDRegisterAt; using helpers::InputOperandAt; @@ -85,18 +85,10 @@ static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7; // Reference load (except object array loads) is using LDR Rt, [Rn, #offset] which can handle // offset < 4KiB. For offsets >= 4KiB, the load shall be emitted as two or more instructions. -// For the Baker read barrier implementation using link-generated thunks we need to split +// For the Baker read barrier implementation using link-time generated thunks we need to split // the offset explicitly. constexpr uint32_t kReferenceLoadMinFarOffset = 4 * KB; -// Flags controlling the use of link-time generated thunks for Baker read barriers. -constexpr bool kBakerReadBarrierLinkTimeThunksEnableForFields = true; -constexpr bool kBakerReadBarrierLinkTimeThunksEnableForArrays = true; -constexpr bool kBakerReadBarrierLinkTimeThunksEnableForGcRoots = true; - -// The reserved entrypoint register for link-time generated thunks. -const vixl32::Register kBakerCcEntrypointRegister = r4; - // Using a base helps identify when we hit Marking Register check breakpoints. constexpr int kMarkingRegisterCheckBreakCodeBaseCode = 0x10; @@ -111,26 +103,6 @@ constexpr int kMarkingRegisterCheckBreakCodeBaseCode = 0x10; // Marker that code is yet to be, and must, be implemented. #define TODO_VIXL32(level) LOG(level) << __PRETTY_FUNCTION__ << " unimplemented " -static inline void ExcludeIPAndBakerCcEntrypointRegister(UseScratchRegisterScope* temps, - HInstruction* instruction) { - DCHECK(temps->IsAvailable(ip)); - temps->Exclude(ip); - DCHECK(!temps->IsAvailable(kBakerCcEntrypointRegister)); - DCHECK_EQ(kBakerCcEntrypointRegister.GetCode(), - linker::Thumb2RelativePatcher::kBakerCcEntrypointRegister); - DCHECK_NE(instruction->GetLocations()->GetTempCount(), 0u); - DCHECK(RegisterFrom(instruction->GetLocations()->GetTemp( - instruction->GetLocations()->GetTempCount() - 1u)).Is(kBakerCcEntrypointRegister)); -} - -static inline void EmitPlaceholderBne(CodeGeneratorARMVIXL* codegen, vixl32::Label* patch_label) { - ExactAssemblyScope eas(codegen->GetVIXLAssembler(), kMaxInstructionSizeInBytes); - __ bind(patch_label); - vixl32::Label placeholder_label; - __ b(ne, EncodingSize(Wide), &placeholder_label); // Placeholder, patched at link-time. - __ bind(&placeholder_label); -} - static inline bool CanEmitNarrowLdr(vixl32::Register rt, vixl32::Register rn, uint32_t offset) { return rt.IsLow() && rn.IsLow() && offset < 32u; } @@ -139,7 +111,7 @@ class EmitAdrCode { public: EmitAdrCode(ArmVIXLMacroAssembler* assembler, vixl32::Register rd, vixl32::Label* label) : assembler_(assembler), rd_(rd), label_(label) { - ExactAssemblyScope aas(assembler, kMaxInstructionSizeInBytes); + DCHECK(!assembler->AllowMacroInstructions()); // In ExactAssemblyScope. adr_location_ = assembler->GetCursorOffset(); assembler->adr(EncodingSize(Wide), rd, label); } @@ -165,6 +137,15 @@ class EmitAdrCode { int32_t adr_location_; }; +static RegisterSet OneRegInReferenceOutSaveEverythingCallerSaves() { + InvokeRuntimeCallingConventionARMVIXL calling_convention; + RegisterSet caller_saves = RegisterSet::Empty(); + caller_saves.Add(LocationFrom(calling_convention.GetRegisterAt(0))); + // TODO: Add GetReturnLocation() to the calling convention so that we can DCHECK() + // that the the kPrimNot result register is the same as the first argument register. + return caller_saves; +} + // SaveLiveRegisters and RestoreLiveRegisters from SlowPathCodeARM operate on sets of S registers, // for each live D registers they treat two corresponding S registers as live ones. // @@ -338,7 +319,7 @@ void SlowPathCodeARMVIXL::SaveLiveRegisters(CodeGenerator* codegen, LocationSumm size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath(); size_t orig_offset = stack_offset; - const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ true); + const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ true); for (uint32_t i : LowToHighBits(core_spills)) { // If the register holds an object, update the stack mask. if (locations->RegisterContainsObject(i)) { @@ -353,7 +334,7 @@ void SlowPathCodeARMVIXL::SaveLiveRegisters(CodeGenerator* codegen, LocationSumm CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen); arm_codegen->GetAssembler()->StoreRegisterList(core_spills, orig_offset); - uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ false); + uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ false); orig_offset = stack_offset; for (uint32_t i : LowToHighBits(fp_spills)) { DCHECK_LT(i, kMaximumNumberOfExpectedRegisters); @@ -376,7 +357,7 @@ void SlowPathCodeARMVIXL::RestoreLiveRegisters(CodeGenerator* codegen, LocationS size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath(); size_t orig_offset = stack_offset; - const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ true); + const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ true); for (uint32_t i : LowToHighBits(core_spills)) { DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize()); DCHECK_LT(i, kMaximumNumberOfExpectedRegisters); @@ -387,7 +368,7 @@ void SlowPathCodeARMVIXL::RestoreLiveRegisters(CodeGenerator* codegen, LocationS CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen); arm_codegen->GetAssembler()->LoadRegisterList(core_spills, orig_offset); - uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ false); + uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ false); while (fp_spills != 0u) { uint32_t begin = CTZ(fp_spills); uint32_t tmp = fp_spills + (1u << begin); @@ -402,7 +383,7 @@ class NullCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL { public: explicit NullCheckSlowPathARMVIXL(HNullCheck* instruction) : SlowPathCodeARMVIXL(instruction) {} - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen); __ Bind(GetEntryLabel()); if (instruction_->CanThrowIntoCatchBlock()) { @@ -416,9 +397,9 @@ class NullCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL { CheckEntrypointTypes<kQuickThrowNullPointer, void, void>(); } - bool IsFatal() const OVERRIDE { return true; } + bool IsFatal() const override { return true; } - const char* GetDescription() const OVERRIDE { return "NullCheckSlowPathARMVIXL"; } + const char* GetDescription() const override { return "NullCheckSlowPathARMVIXL"; } private: DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathARMVIXL); @@ -429,16 +410,16 @@ class DivZeroCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL { explicit DivZeroCheckSlowPathARMVIXL(HDivZeroCheck* instruction) : SlowPathCodeARMVIXL(instruction) {} - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen); __ Bind(GetEntryLabel()); arm_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this); CheckEntrypointTypes<kQuickThrowDivZero, void, void>(); } - bool IsFatal() const OVERRIDE { return true; } + bool IsFatal() const override { return true; } - const char* GetDescription() const OVERRIDE { return "DivZeroCheckSlowPathARMVIXL"; } + const char* GetDescription() const override { return "DivZeroCheckSlowPathARMVIXL"; } private: DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathARMVIXL); @@ -449,7 +430,7 @@ class SuspendCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL { SuspendCheckSlowPathARMVIXL(HSuspendCheck* instruction, HBasicBlock* successor) : SlowPathCodeARMVIXL(instruction), successor_(successor) {} - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen); __ Bind(GetEntryLabel()); arm_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this); @@ -470,7 +451,7 @@ class SuspendCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL { return successor_; } - const char* GetDescription() const OVERRIDE { return "SuspendCheckSlowPathARMVIXL"; } + const char* GetDescription() const override { return "SuspendCheckSlowPathARMVIXL"; } private: // If not null, the block to branch to after the suspend check. @@ -487,7 +468,7 @@ class BoundsCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL { explicit BoundsCheckSlowPathARMVIXL(HBoundsCheck* instruction) : SlowPathCodeARMVIXL(instruction) {} - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen); LocationSummary* locations = instruction_->GetLocations(); @@ -514,9 +495,9 @@ class BoundsCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL { CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>(); } - bool IsFatal() const OVERRIDE { return true; } + bool IsFatal() const override { return true; } - const char* GetDescription() const OVERRIDE { return "BoundsCheckSlowPathARMVIXL"; } + const char* GetDescription() const override { return "BoundsCheckSlowPathARMVIXL"; } private: DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathARMVIXL); @@ -524,29 +505,39 @@ class BoundsCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL { class LoadClassSlowPathARMVIXL : public SlowPathCodeARMVIXL { public: - LoadClassSlowPathARMVIXL(HLoadClass* cls, HInstruction* at, uint32_t dex_pc, bool do_clinit) - : SlowPathCodeARMVIXL(at), cls_(cls), dex_pc_(dex_pc), do_clinit_(do_clinit) { + LoadClassSlowPathARMVIXL(HLoadClass* cls, HInstruction* at) + : SlowPathCodeARMVIXL(at), cls_(cls) { DCHECK(at->IsLoadClass() || at->IsClinitCheck()); + DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_); } - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { LocationSummary* locations = instruction_->GetLocations(); Location out = locations->Out(); + const uint32_t dex_pc = instruction_->GetDexPc(); + bool must_resolve_type = instruction_->IsLoadClass() && cls_->MustResolveTypeOnSlowPath(); + bool must_do_clinit = instruction_->IsClinitCheck() || cls_->MustGenerateClinitCheck(); CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, locations); InvokeRuntimeCallingConventionARMVIXL calling_convention; - dex::TypeIndex type_index = cls_->GetTypeIndex(); - __ Mov(calling_convention.GetRegisterAt(0), type_index.index_); - QuickEntrypointEnum entrypoint = do_clinit_ ? kQuickInitializeStaticStorage - : kQuickInitializeType; - arm_codegen->InvokeRuntime(entrypoint, instruction_, dex_pc_, this); - if (do_clinit_) { - CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>(); + if (must_resolve_type) { + DCHECK(IsSameDexFile(cls_->GetDexFile(), arm_codegen->GetGraph()->GetDexFile())); + dex::TypeIndex type_index = cls_->GetTypeIndex(); + __ Mov(calling_convention.GetRegisterAt(0), type_index.index_); + arm_codegen->InvokeRuntime(kQuickResolveType, instruction_, dex_pc, this); + CheckEntrypointTypes<kQuickResolveType, void*, uint32_t>(); + // If we also must_do_clinit, the resolved type is now in the correct register. } else { - CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>(); + DCHECK(must_do_clinit); + Location source = instruction_->IsLoadClass() ? out : locations->InAt(0); + arm_codegen->Move32(LocationFrom(calling_convention.GetRegisterAt(0)), source); + } + if (must_do_clinit) { + arm_codegen->InvokeRuntime(kQuickInitializeStaticStorage, instruction_, dex_pc, this); + CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, mirror::Class*>(); } // Move the class to the desired location. @@ -558,18 +549,12 @@ class LoadClassSlowPathARMVIXL : public SlowPathCodeARMVIXL { __ B(GetExitLabel()); } - const char* GetDescription() const OVERRIDE { return "LoadClassSlowPathARMVIXL"; } + const char* GetDescription() const override { return "LoadClassSlowPathARMVIXL"; } private: // The class this slow path will load. HLoadClass* const cls_; - // The dex PC of `at_`. - const uint32_t dex_pc_; - - // Whether to initialize the class. - const bool do_clinit_; - DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathARMVIXL); }; @@ -578,7 +563,7 @@ class LoadStringSlowPathARMVIXL : public SlowPathCodeARMVIXL { explicit LoadStringSlowPathARMVIXL(HLoadString* instruction) : SlowPathCodeARMVIXL(instruction) {} - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { DCHECK(instruction_->IsLoadString()); DCHECK_EQ(instruction_->AsLoadString()->GetLoadKind(), HLoadString::LoadKind::kBssEntry); LocationSummary* locations = instruction_->GetLocations(); @@ -600,7 +585,7 @@ class LoadStringSlowPathARMVIXL : public SlowPathCodeARMVIXL { __ B(GetExitLabel()); } - const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathARMVIXL"; } + const char* GetDescription() const override { return "LoadStringSlowPathARMVIXL"; } private: DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathARMVIXL); @@ -611,7 +596,7 @@ class TypeCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL { TypeCheckSlowPathARMVIXL(HInstruction* instruction, bool is_fatal) : SlowPathCodeARMVIXL(instruction), is_fatal_(is_fatal) {} - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { LocationSummary* locations = instruction_->GetLocations(); DCHECK(instruction_->IsCheckCast() || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg())); @@ -655,9 +640,9 @@ class TypeCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL { } } - const char* GetDescription() const OVERRIDE { return "TypeCheckSlowPathARMVIXL"; } + const char* GetDescription() const override { return "TypeCheckSlowPathARMVIXL"; } - bool IsFatal() const OVERRIDE { return is_fatal_; } + bool IsFatal() const override { return is_fatal_; } private: const bool is_fatal_; @@ -670,7 +655,7 @@ class DeoptimizationSlowPathARMVIXL : public SlowPathCodeARMVIXL { explicit DeoptimizationSlowPathARMVIXL(HDeoptimize* instruction) : SlowPathCodeARMVIXL(instruction) {} - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen); __ Bind(GetEntryLabel()); LocationSummary* locations = instruction_->GetLocations(); @@ -683,7 +668,7 @@ class DeoptimizationSlowPathARMVIXL : public SlowPathCodeARMVIXL { CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>(); } - const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathARMVIXL"; } + const char* GetDescription() const override { return "DeoptimizationSlowPathARMVIXL"; } private: DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathARMVIXL); @@ -693,7 +678,7 @@ class ArraySetSlowPathARMVIXL : public SlowPathCodeARMVIXL { public: explicit ArraySetSlowPathARMVIXL(HInstruction* instruction) : SlowPathCodeARMVIXL(instruction) {} - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { LocationSummary* locations = instruction_->GetLocations(); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, locations); @@ -724,495 +709,12 @@ class ArraySetSlowPathARMVIXL : public SlowPathCodeARMVIXL { __ B(GetExitLabel()); } - const char* GetDescription() const OVERRIDE { return "ArraySetSlowPathARMVIXL"; } + const char* GetDescription() const override { return "ArraySetSlowPathARMVIXL"; } private: DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathARMVIXL); }; -// Abstract base class for read barrier slow paths marking a reference -// `ref`. -// -// Argument `entrypoint` must be a register location holding the read -// barrier marking runtime entry point to be invoked or an empty -// location; in the latter case, the read barrier marking runtime -// entry point will be loaded by the slow path code itself. -class ReadBarrierMarkSlowPathBaseARMVIXL : public SlowPathCodeARMVIXL { - protected: - ReadBarrierMarkSlowPathBaseARMVIXL(HInstruction* instruction, Location ref, Location entrypoint) - : SlowPathCodeARMVIXL(instruction), ref_(ref), entrypoint_(entrypoint) { - DCHECK(kEmitCompilerReadBarrier); - } - - const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathBaseARMVIXL"; } - - // Generate assembly code calling the read barrier marking runtime - // entry point (ReadBarrierMarkRegX). - void GenerateReadBarrierMarkRuntimeCall(CodeGenerator* codegen) { - vixl32::Register ref_reg = RegisterFrom(ref_); - - // No need to save live registers; it's taken care of by the - // entrypoint. Also, there is no need to update the stack mask, - // as this runtime call will not trigger a garbage collection. - CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen); - DCHECK(!ref_reg.Is(sp)); - DCHECK(!ref_reg.Is(lr)); - DCHECK(!ref_reg.Is(pc)); - // IP is used internally by the ReadBarrierMarkRegX entry point - // as a temporary, it cannot be the entry point's input/output. - DCHECK(!ref_reg.Is(ip)); - DCHECK(ref_reg.IsRegister()) << ref_reg; - // "Compact" slow path, saving two moves. - // - // Instead of using the standard runtime calling convention (input - // and output in R0): - // - // R0 <- ref - // R0 <- ReadBarrierMark(R0) - // ref <- R0 - // - // we just use rX (the register containing `ref`) as input and output - // of a dedicated entrypoint: - // - // rX <- ReadBarrierMarkRegX(rX) - // - if (entrypoint_.IsValid()) { - arm_codegen->ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction_, this); - __ Blx(RegisterFrom(entrypoint_)); - } else { - // Entrypoint is not already loaded, load from the thread. - int32_t entry_point_offset = - Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref_reg.GetCode()); - // This runtime call does not require a stack map. - arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); - } - } - - // The location (register) of the marked object reference. - const Location ref_; - - // The location of the entrypoint if already loaded. - const Location entrypoint_; - - private: - DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathBaseARMVIXL); -}; - -// Slow path marking an object reference `ref` during a read -// barrier. The field `obj.field` in the object `obj` holding this -// reference does not get updated by this slow path after marking. -// -// This means that after the execution of this slow path, `ref` will -// always be up-to-date, but `obj.field` may not; i.e., after the -// flip, `ref` will be a to-space reference, but `obj.field` will -// probably still be a from-space reference (unless it gets updated by -// another thread, or if another thread installed another object -// reference (different from `ref`) in `obj.field`). -// -// Argument `entrypoint` must be a register location holding the read -// barrier marking runtime entry point to be invoked or an empty -// location; in the latter case, the read barrier marking runtime -// entry point will be loaded by the slow path code itself. -class ReadBarrierMarkSlowPathARMVIXL : public ReadBarrierMarkSlowPathBaseARMVIXL { - public: - ReadBarrierMarkSlowPathARMVIXL(HInstruction* instruction, - Location ref, - Location entrypoint = Location::NoLocation()) - : ReadBarrierMarkSlowPathBaseARMVIXL(instruction, ref, entrypoint) { - DCHECK(kEmitCompilerReadBarrier); - } - - const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathARMVIXL"; } - - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { - LocationSummary* locations = instruction_->GetLocations(); - DCHECK(locations->CanCall()); - DCHECK(ref_.IsRegister()) << ref_; - DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_.reg())) << ref_.reg(); - DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString()) - << "Unexpected instruction in read barrier marking slow path: " - << instruction_->DebugName(); - - __ Bind(GetEntryLabel()); - GenerateReadBarrierMarkRuntimeCall(codegen); - __ B(GetExitLabel()); - } - - private: - DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathARMVIXL); -}; - -// Slow path loading `obj`'s lock word, loading a reference from -// object `*(obj + offset + (index << scale_factor))` into `ref`, and -// marking `ref` if `obj` is gray according to the lock word (Baker -// read barrier). The field `obj.field` in the object `obj` holding -// this reference does not get updated by this slow path after marking -// (see LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL -// below for that). -// -// This means that after the execution of this slow path, `ref` will -// always be up-to-date, but `obj.field` may not; i.e., after the -// flip, `ref` will be a to-space reference, but `obj.field` will -// probably still be a from-space reference (unless it gets updated by -// another thread, or if another thread installed another object -// reference (different from `ref`) in `obj.field`). -// -// Argument `entrypoint` must be a register location holding the read -// barrier marking runtime entry point to be invoked or an empty -// location; in the latter case, the read barrier marking runtime -// entry point will be loaded by the slow path code itself. -class LoadReferenceWithBakerReadBarrierSlowPathARMVIXL : public ReadBarrierMarkSlowPathBaseARMVIXL { - public: - LoadReferenceWithBakerReadBarrierSlowPathARMVIXL(HInstruction* instruction, - Location ref, - vixl32::Register obj, - uint32_t offset, - Location index, - ScaleFactor scale_factor, - bool needs_null_check, - vixl32::Register temp, - Location entrypoint = Location::NoLocation()) - : ReadBarrierMarkSlowPathBaseARMVIXL(instruction, ref, entrypoint), - obj_(obj), - offset_(offset), - index_(index), - scale_factor_(scale_factor), - needs_null_check_(needs_null_check), - temp_(temp) { - DCHECK(kEmitCompilerReadBarrier); - DCHECK(kUseBakerReadBarrier); - } - - const char* GetDescription() const OVERRIDE { - return "LoadReferenceWithBakerReadBarrierSlowPathARMVIXL"; - } - - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { - LocationSummary* locations = instruction_->GetLocations(); - vixl32::Register ref_reg = RegisterFrom(ref_); - DCHECK(locations->CanCall()); - DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg.GetCode())) << ref_reg; - DCHECK(instruction_->IsInstanceFieldGet() || - instruction_->IsStaticFieldGet() || - instruction_->IsArrayGet() || - instruction_->IsArraySet() || - instruction_->IsInstanceOf() || - instruction_->IsCheckCast() || - (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) || - (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified())) - << "Unexpected instruction in read barrier marking slow path: " - << instruction_->DebugName(); - // The read barrier instrumentation of object ArrayGet - // instructions does not support the HIntermediateAddress - // instruction. - DCHECK(!(instruction_->IsArrayGet() && - instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress())); - - // Temporary register `temp_`, used to store the lock word, must - // not be IP, as we may use it to emit the reference load (in the - // call to GenerateRawReferenceLoad below), and we need the lock - // word to still be in `temp_` after the reference load. - DCHECK(!temp_.Is(ip)); - - __ Bind(GetEntryLabel()); - - // When using MaybeGenerateReadBarrierSlow, the read barrier call is - // inserted after the original load. However, in fast path based - // Baker's read barriers, we need to perform the load of - // mirror::Object::monitor_ *before* the original reference load. - // This load-load ordering is required by the read barrier. - // The slow path (for Baker's algorithm) should look like: - // - // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); - // lfence; // Load fence or artificial data dependency to prevent load-load reordering - // HeapReference<mirror::Object> ref = *src; // Original reference load. - // bool is_gray = (rb_state == ReadBarrier::GrayState()); - // if (is_gray) { - // ref = entrypoint(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call. - // } - // - // Note: the original implementation in ReadBarrier::Barrier is - // slightly more complex as it performs additional checks that we do - // not do here for performance reasons. - - CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen); - - // /* int32_t */ monitor = obj->monitor_ - uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value(); - arm_codegen->GetAssembler()->LoadFromOffset(kLoadWord, temp_, obj_, monitor_offset); - if (needs_null_check_) { - codegen->MaybeRecordImplicitNullCheck(instruction_); - } - // /* LockWord */ lock_word = LockWord(monitor) - static_assert(sizeof(LockWord) == sizeof(int32_t), - "art::LockWord and int32_t have different sizes."); - - // Introduce a dependency on the lock_word including the rb_state, - // which shall prevent load-load reordering without using - // a memory barrier (which would be more expensive). - // `obj` is unchanged by this operation, but its value now depends - // on `temp`. - __ Add(obj_, obj_, Operand(temp_, ShiftType::LSR, 32)); - - // The actual reference load. - // A possible implicit null check has already been handled above. - arm_codegen->GenerateRawReferenceLoad( - instruction_, ref_, obj_, offset_, index_, scale_factor_, /* needs_null_check */ false); - - // Mark the object `ref` when `obj` is gray. - // - // if (rb_state == ReadBarrier::GrayState()) - // ref = ReadBarrier::Mark(ref); - // - // Given the numeric representation, it's enough to check the low bit of the - // rb_state. We do that by shifting the bit out of the lock word with LSRS - // which can be a 16-bit instruction unlike the TST immediate. - static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0"); - static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); - __ Lsrs(temp_, temp_, LockWord::kReadBarrierStateShift + 1); - __ B(cc, GetExitLabel()); // Carry flag is the last bit shifted out by LSRS. - GenerateReadBarrierMarkRuntimeCall(codegen); - - __ B(GetExitLabel()); - } - - private: - // The register containing the object holding the marked object reference field. - vixl32::Register obj_; - // The offset, index and scale factor to access the reference in `obj_`. - uint32_t offset_; - Location index_; - ScaleFactor scale_factor_; - // Is a null check required? - bool needs_null_check_; - // A temporary register used to hold the lock word of `obj_`. - vixl32::Register temp_; - - DISALLOW_COPY_AND_ASSIGN(LoadReferenceWithBakerReadBarrierSlowPathARMVIXL); -}; - -// Slow path loading `obj`'s lock word, loading a reference from -// object `*(obj + offset + (index << scale_factor))` into `ref`, and -// marking `ref` if `obj` is gray according to the lock word (Baker -// read barrier). If needed, this slow path also atomically updates -// the field `obj.field` in the object `obj` holding this reference -// after marking (contrary to -// LoadReferenceWithBakerReadBarrierSlowPathARMVIXL above, which never -// tries to update `obj.field`). -// -// This means that after the execution of this slow path, both `ref` -// and `obj.field` will be up-to-date; i.e., after the flip, both will -// hold the same to-space reference (unless another thread installed -// another object reference (different from `ref`) in `obj.field`). -// -// Argument `entrypoint` must be a register location holding the read -// barrier marking runtime entry point to be invoked or an empty -// location; in the latter case, the read barrier marking runtime -// entry point will be loaded by the slow path code itself. -class LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL - : public ReadBarrierMarkSlowPathBaseARMVIXL { - public: - LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL( - HInstruction* instruction, - Location ref, - vixl32::Register obj, - uint32_t offset, - Location index, - ScaleFactor scale_factor, - bool needs_null_check, - vixl32::Register temp1, - vixl32::Register temp2, - Location entrypoint = Location::NoLocation()) - : ReadBarrierMarkSlowPathBaseARMVIXL(instruction, ref, entrypoint), - obj_(obj), - offset_(offset), - index_(index), - scale_factor_(scale_factor), - needs_null_check_(needs_null_check), - temp1_(temp1), - temp2_(temp2) { - DCHECK(kEmitCompilerReadBarrier); - DCHECK(kUseBakerReadBarrier); - } - - const char* GetDescription() const OVERRIDE { - return "LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL"; - } - - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { - LocationSummary* locations = instruction_->GetLocations(); - vixl32::Register ref_reg = RegisterFrom(ref_); - DCHECK(locations->CanCall()); - DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg.GetCode())) << ref_reg; - DCHECK_NE(ref_.reg(), LocationFrom(temp1_).reg()); - - // This slow path is only used by the UnsafeCASObject intrinsic at the moment. - DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified())) - << "Unexpected instruction in read barrier marking and field updating slow path: " - << instruction_->DebugName(); - DCHECK(instruction_->GetLocations()->Intrinsified()); - DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject); - DCHECK_EQ(offset_, 0u); - DCHECK_EQ(scale_factor_, ScaleFactor::TIMES_1); - Location field_offset = index_; - DCHECK(field_offset.IsRegisterPair()) << field_offset; - - // Temporary register `temp1_`, used to store the lock word, must - // not be IP, as we may use it to emit the reference load (in the - // call to GenerateRawReferenceLoad below), and we need the lock - // word to still be in `temp1_` after the reference load. - DCHECK(!temp1_.Is(ip)); - - __ Bind(GetEntryLabel()); - - // The implementation is similar to LoadReferenceWithBakerReadBarrierSlowPathARMVIXL's: - // - // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); - // lfence; // Load fence or artificial data dependency to prevent load-load reordering - // HeapReference<mirror::Object> ref = *src; // Original reference load. - // bool is_gray = (rb_state == ReadBarrier::GrayState()); - // if (is_gray) { - // old_ref = ref; - // ref = entrypoint(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call. - // compareAndSwapObject(obj, field_offset, old_ref, ref); - // } - - CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen); - - // /* int32_t */ monitor = obj->monitor_ - uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value(); - arm_codegen->GetAssembler()->LoadFromOffset(kLoadWord, temp1_, obj_, monitor_offset); - if (needs_null_check_) { - codegen->MaybeRecordImplicitNullCheck(instruction_); - } - // /* LockWord */ lock_word = LockWord(monitor) - static_assert(sizeof(LockWord) == sizeof(int32_t), - "art::LockWord and int32_t have different sizes."); - - // Introduce a dependency on the lock_word including the rb_state, - // which shall prevent load-load reordering without using - // a memory barrier (which would be more expensive). - // `obj` is unchanged by this operation, but its value now depends - // on `temp`. - __ Add(obj_, obj_, Operand(temp1_, ShiftType::LSR, 32)); - - // The actual reference load. - // A possible implicit null check has already been handled above. - arm_codegen->GenerateRawReferenceLoad( - instruction_, ref_, obj_, offset_, index_, scale_factor_, /* needs_null_check */ false); - - // Mark the object `ref` when `obj` is gray. - // - // if (rb_state == ReadBarrier::GrayState()) - // ref = ReadBarrier::Mark(ref); - // - // Given the numeric representation, it's enough to check the low bit of the - // rb_state. We do that by shifting the bit out of the lock word with LSRS - // which can be a 16-bit instruction unlike the TST immediate. - static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0"); - static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); - __ Lsrs(temp1_, temp1_, LockWord::kReadBarrierStateShift + 1); - __ B(cc, GetExitLabel()); // Carry flag is the last bit shifted out by LSRS. - - // Save the old value of the reference before marking it. - // Note that we cannot use IP to save the old reference, as IP is - // used internally by the ReadBarrierMarkRegX entry point, and we - // need the old reference after the call to that entry point. - DCHECK(!temp1_.Is(ip)); - __ Mov(temp1_, ref_reg); - - GenerateReadBarrierMarkRuntimeCall(codegen); - - // If the new reference is different from the old reference, - // update the field in the holder (`*(obj_ + field_offset)`). - // - // Note that this field could also hold a different object, if - // another thread had concurrently changed it. In that case, the - // LDREX/CMP/BNE sequence of instructions in the compare-and-set - // (CAS) operation below would abort the CAS, leaving the field - // as-is. - __ Cmp(temp1_, ref_reg); - __ B(eq, GetExitLabel()); - - // Update the the holder's field atomically. This may fail if - // mutator updates before us, but it's OK. This is achieved - // using a strong compare-and-set (CAS) operation with relaxed - // memory synchronization ordering, where the expected value is - // the old reference and the desired value is the new reference. - - UseScratchRegisterScope temps(arm_codegen->GetVIXLAssembler()); - // Convenience aliases. - vixl32::Register base = obj_; - // The UnsafeCASObject intrinsic uses a register pair as field - // offset ("long offset"), of which only the low part contains - // data. - vixl32::Register offset = LowRegisterFrom(field_offset); - vixl32::Register expected = temp1_; - vixl32::Register value = ref_reg; - vixl32::Register tmp_ptr = temps.Acquire(); // Pointer to actual memory. - vixl32::Register tmp = temp2_; // Value in memory. - - __ Add(tmp_ptr, base, offset); - - if (kPoisonHeapReferences) { - arm_codegen->GetAssembler()->PoisonHeapReference(expected); - if (value.Is(expected)) { - // Do not poison `value`, as it is the same register as - // `expected`, which has just been poisoned. - } else { - arm_codegen->GetAssembler()->PoisonHeapReference(value); - } - } - - // do { - // tmp = [r_ptr] - expected; - // } while (tmp == 0 && failure([r_ptr] <- r_new_value)); - - vixl32::Label loop_head, comparison_failed, exit_loop; - __ Bind(&loop_head); - __ Ldrex(tmp, MemOperand(tmp_ptr)); - __ Cmp(tmp, expected); - __ B(ne, &comparison_failed, /* far_target */ false); - __ Strex(tmp, value, MemOperand(tmp_ptr)); - __ CompareAndBranchIfZero(tmp, &exit_loop, /* far_target */ false); - __ B(&loop_head); - __ Bind(&comparison_failed); - __ Clrex(); - __ Bind(&exit_loop); - - if (kPoisonHeapReferences) { - arm_codegen->GetAssembler()->UnpoisonHeapReference(expected); - if (value.Is(expected)) { - // Do not unpoison `value`, as it is the same register as - // `expected`, which has just been unpoisoned. - } else { - arm_codegen->GetAssembler()->UnpoisonHeapReference(value); - } - } - - __ B(GetExitLabel()); - } - - private: - // The register containing the object holding the marked object reference field. - const vixl32::Register obj_; - // The offset, index and scale factor to access the reference in `obj_`. - uint32_t offset_; - Location index_; - ScaleFactor scale_factor_; - // Is a null check required? - bool needs_null_check_; - // A temporary register used to hold the lock word of `obj_`; and - // also to hold the original reference value, when the reference is - // marked. - const vixl32::Register temp1_; - // A temporary register used in the implementation of the CAS, to - // update the object's reference field. - const vixl32::Register temp2_; - - DISALLOW_COPY_AND_ASSIGN(LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL); -}; - // Slow path generating a read barrier for a heap reference. class ReadBarrierForHeapReferenceSlowPathARMVIXL : public SlowPathCodeARMVIXL { public: @@ -1242,7 +744,7 @@ class ReadBarrierForHeapReferenceSlowPathARMVIXL : public SlowPathCodeARMVIXL { DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref; } - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen); LocationSummary* locations = instruction_->GetLocations(); vixl32::Register reg_out = RegisterFrom(out_); @@ -1366,7 +868,7 @@ class ReadBarrierForHeapReferenceSlowPathARMVIXL : public SlowPathCodeARMVIXL { __ B(GetExitLabel()); } - const char* GetDescription() const OVERRIDE { + const char* GetDescription() const override { return "ReadBarrierForHeapReferenceSlowPathARMVIXL"; } @@ -1408,7 +910,7 @@ class ReadBarrierForRootSlowPathARMVIXL : public SlowPathCodeARMVIXL { DCHECK(kEmitCompilerReadBarrier); } - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { LocationSummary* locations = instruction_->GetLocations(); vixl32::Register reg_out = RegisterFrom(out_); DCHECK(locations->CanCall()); @@ -1434,7 +936,7 @@ class ReadBarrierForRootSlowPathARMVIXL : public SlowPathCodeARMVIXL { __ B(GetExitLabel()); } - const char* GetDescription() const OVERRIDE { return "ReadBarrierForRootSlowPathARMVIXL"; } + const char* GetDescription() const override { return "ReadBarrierForRootSlowPathARMVIXL"; } private: const Location out_; @@ -1517,6 +1019,10 @@ void CodeGeneratorARMVIXL::DumpFloatingPointRegister(std::ostream& stream, int r stream << vixl32::SRegister(reg); } +const ArmInstructionSetFeatures& CodeGeneratorARMVIXL::GetInstructionSetFeatures() const { + return *GetCompilerOptions().GetInstructionSetFeatures()->AsArmInstructionSetFeatures(); +} + static uint32_t ComputeSRegisterListMask(const SRegisterList& regs) { uint32_t mask = 0; for (uint32_t i = regs.GetFirstSRegister().GetCode(); @@ -1531,26 +1037,26 @@ static uint32_t ComputeSRegisterListMask(const SRegisterList& regs) { size_t CodeGeneratorARMVIXL::SaveCoreRegister(size_t stack_index ATTRIBUTE_UNUSED, uint32_t reg_id ATTRIBUTE_UNUSED) { TODO_VIXL32(FATAL); - return 0; + UNREACHABLE(); } // Restores the register from the stack. Returns the size taken on stack. size_t CodeGeneratorARMVIXL::RestoreCoreRegister(size_t stack_index ATTRIBUTE_UNUSED, uint32_t reg_id ATTRIBUTE_UNUSED) { TODO_VIXL32(FATAL); - return 0; + UNREACHABLE(); } size_t CodeGeneratorARMVIXL::SaveFloatingPointRegister(size_t stack_index ATTRIBUTE_UNUSED, uint32_t reg_id ATTRIBUTE_UNUSED) { TODO_VIXL32(FATAL); - return 0; + UNREACHABLE(); } size_t CodeGeneratorARMVIXL::RestoreFloatingPointRegister(size_t stack_index ATTRIBUTE_UNUSED, uint32_t reg_id ATTRIBUTE_UNUSED) { TODO_VIXL32(FATAL); - return 0; + UNREACHABLE(); } static void GenerateDataProcInstruction(HInstruction::InstructionKind kind, @@ -2033,7 +1539,7 @@ static void GenerateConditionGeneric(HCondition* cond, CodeGeneratorARMVIXL* cod vixl32::Label done_label; vixl32::Label* const final_label = codegen->GetFinalLabel(cond, &done_label); - __ B(condition.second, final_label, /* far_target */ false); + __ B(condition.second, final_label, /* is_far_target= */ false); __ Mov(out, 1); if (done_label.IsReferenced()) { @@ -2334,7 +1840,6 @@ vixl32::Label* CodeGeneratorARMVIXL::GetFinalLabel(HInstruction* instruction, } CodeGeneratorARMVIXL::CodeGeneratorARMVIXL(HGraph* graph, - const ArmInstructionSetFeatures& isa_features, const CompilerOptions& compiler_options, OptimizingCompilerStats* stats) : CodeGenerator(graph, @@ -2351,7 +1856,6 @@ CodeGeneratorARMVIXL::CodeGeneratorARMVIXL(HGraph* graph, instruction_visitor_(graph, this), move_resolver_(graph->GetAllocator(), this), assembler_(graph->GetAllocator()), - isa_features_(isa_features), uint32_literals_(std::less<uint32_t>(), graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), @@ -2360,11 +1864,14 @@ CodeGeneratorARMVIXL::CodeGeneratorARMVIXL(HGraph* graph, type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + boot_image_intrinsic_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), baker_read_barrier_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), jit_string_patches_(StringReferenceValueComparator(), graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), jit_class_patches_(TypeReferenceValueComparator(), - graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) { + graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + jit_baker_read_barrier_slow_paths_(std::less<uint32_t>(), + graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) { // Always save the LR register to mimic Quick. AddAllocatedRegister(Location::RegisterLocation(LR)); // Give D30 and D31 as scratch register to VIXL. The register allocator only works on @@ -2420,8 +1927,100 @@ void CodeGeneratorARMVIXL::FixJumpTables() { void CodeGeneratorARMVIXL::Finalize(CodeAllocator* allocator) { FixJumpTables(); + + // Emit JIT baker read barrier slow paths. + DCHECK(Runtime::Current()->UseJitCompilation() || jit_baker_read_barrier_slow_paths_.empty()); + for (auto& entry : jit_baker_read_barrier_slow_paths_) { + uint32_t encoded_data = entry.first; + vixl::aarch32::Label* slow_path_entry = &entry.second.label; + __ Bind(slow_path_entry); + CompileBakerReadBarrierThunk(*GetAssembler(), encoded_data, /* debug_name= */ nullptr); + } + GetAssembler()->FinalizeCode(); CodeGenerator::Finalize(allocator); + + // Verify Baker read barrier linker patches. + if (kIsDebugBuild) { + ArrayRef<const uint8_t> code = allocator->GetMemory(); + for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) { + DCHECK(info.label.IsBound()); + uint32_t literal_offset = info.label.GetLocation(); + DCHECK_ALIGNED(literal_offset, 2u); + + auto GetInsn16 = [&code](uint32_t offset) { + DCHECK_ALIGNED(offset, 2u); + return (static_cast<uint32_t>(code[offset + 0]) << 0) + + (static_cast<uint32_t>(code[offset + 1]) << 8); + }; + auto GetInsn32 = [=](uint32_t offset) { + return (GetInsn16(offset) << 16) + (GetInsn16(offset + 2u) << 0); + }; + + uint32_t encoded_data = info.custom_data; + BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data); + // Check that the next instruction matches the expected LDR. + switch (kind) { + case BakerReadBarrierKind::kField: { + BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data); + if (width == BakerReadBarrierWidth::kWide) { + DCHECK_GE(code.size() - literal_offset, 8u); + uint32_t next_insn = GetInsn32(literal_offset + 4u); + // LDR (immediate), encoding T3, with correct base_reg. + CheckValidReg((next_insn >> 12) & 0xfu); // Check destination register. + const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data); + CHECK_EQ(next_insn & 0xffff0000u, 0xf8d00000u | (base_reg << 16)); + } else { + DCHECK_GE(code.size() - literal_offset, 6u); + uint32_t next_insn = GetInsn16(literal_offset + 4u); + // LDR (immediate), encoding T1, with correct base_reg. + CheckValidReg(next_insn & 0x7u); // Check destination register. + const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data); + CHECK_EQ(next_insn & 0xf838u, 0x6800u | (base_reg << 3)); + } + break; + } + case BakerReadBarrierKind::kArray: { + DCHECK_GE(code.size() - literal_offset, 8u); + uint32_t next_insn = GetInsn32(literal_offset + 4u); + // LDR (register) with correct base_reg, S=1 and option=011 (LDR Wt, [Xn, Xm, LSL #2]). + CheckValidReg((next_insn >> 12) & 0xfu); // Check destination register. + const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data); + CHECK_EQ(next_insn & 0xffff0ff0u, 0xf8500020u | (base_reg << 16)); + CheckValidReg(next_insn & 0xf); // Check index register + break; + } + case BakerReadBarrierKind::kGcRoot: { + BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data); + if (width == BakerReadBarrierWidth::kWide) { + DCHECK_GE(literal_offset, 4u); + uint32_t prev_insn = GetInsn32(literal_offset - 4u); + // LDR (immediate), encoding T3, with correct root_reg. + const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data); + CHECK_EQ(prev_insn & 0xfff0f000u, 0xf8d00000u | (root_reg << 12)); + } else { + DCHECK_GE(literal_offset, 2u); + uint32_t prev_insn = GetInsn16(literal_offset - 2u); + // LDR (immediate), encoding T1, with correct root_reg. + const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data); + CHECK_EQ(prev_insn & 0xf807u, 0x6800u | root_reg); + } + break; + } + case BakerReadBarrierKind::kUnsafeCas: { + DCHECK_GE(literal_offset, 4u); + uint32_t prev_insn = GetInsn32(literal_offset - 4u); + // ADD (register), encoding T3, with correct root_reg. + const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data); + CHECK_EQ(prev_insn & 0xfff0fff0u, 0xeb000000u | (root_reg << 8)); + break; + } + default: + LOG(FATAL) << "Unexpected kind: " << static_cast<uint32_t>(kind); + UNREACHABLE(); + } + } + } } void CodeGeneratorARMVIXL::SetupBlockedRegisters() const { @@ -2494,6 +2093,8 @@ void CodeGeneratorARMVIXL::GenerateFrameEntry() { } if (HasEmptyFrame()) { + // Ensure that the CFI opcode list is not empty. + GetAssembler()->cfi().Nop(); return; } @@ -2560,7 +2161,7 @@ void CodeGeneratorARMVIXL::GenerateFrameEntry() { GetAssembler()->StoreToOffset(kStoreWord, temp, sp, GetStackOffsetOfShouldDeoptimizeFlag()); } - MaybeGenerateMarkingRegisterCheck(/* code */ 1); + MaybeGenerateMarkingRegisterCheck(/* code= */ 1); } void CodeGeneratorARMVIXL::GenerateFrameExit() { @@ -2669,7 +2270,7 @@ Location InvokeDexCallingConventionVisitorARMVIXL::GetNextLocation(DataType::Typ case DataType::Type::kUint64: case DataType::Type::kVoid: LOG(FATAL) << "Unexpected parameter type " << type; - break; + UNREACHABLE(); } return Location::NoLocation(); } @@ -2828,7 +2429,7 @@ void InstructionCodeGeneratorARMVIXL::HandleGoto(HInstruction* got, HBasicBlock* } if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) { GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr); - codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 2); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 2); } if (!codegen_->GoesToNextBlock(block, successor)) { __ B(codegen_->GetLabelOf(successor)); @@ -3007,7 +2608,7 @@ void InstructionCodeGeneratorARMVIXL::VisitIf(HIf* if_instr) { nullptr : codegen_->GetLabelOf(true_successor); vixl32::Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ? nullptr : codegen_->GetLabelOf(false_successor); - GenerateTestAndBranch(if_instr, /* condition_input_index */ 0, true_target, false_target); + GenerateTestAndBranch(if_instr, /* condition_input_index= */ 0, true_target, false_target); } void LocationsBuilderARMVIXL::VisitDeoptimize(HDeoptimize* deoptimize) { @@ -3026,9 +2627,9 @@ void InstructionCodeGeneratorARMVIXL::VisitDeoptimize(HDeoptimize* deoptimize) { SlowPathCodeARMVIXL* slow_path = deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathARMVIXL>(deoptimize); GenerateTestAndBranch(deoptimize, - /* condition_input_index */ 0, + /* condition_input_index= */ 0, slow_path->GetEntryLabel(), - /* false_target */ nullptr); + /* false_target= */ nullptr); } void LocationsBuilderARMVIXL::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) { @@ -3194,7 +2795,7 @@ void InstructionCodeGeneratorARMVIXL::VisitSelect(HSelect* select) { } } - GenerateTestAndBranch(select, 2, true_target, false_target, /* far_target */ false); + GenerateTestAndBranch(select, 2, true_target, false_target, /* far_target= */ false); codegen_->MoveLocation(out, src, type); if (output_overlaps_with_condition_inputs) { __ B(target); @@ -3536,7 +3137,7 @@ void LocationsBuilderARMVIXL::VisitInvokeUnresolved(HInvokeUnresolved* invoke) { void InstructionCodeGeneratorARMVIXL::VisitInvokeUnresolved(HInvokeUnresolved* invoke) { codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke); - codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 3); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 3); } void LocationsBuilderARMVIXL::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { @@ -3567,7 +3168,7 @@ void InstructionCodeGeneratorARMVIXL::VisitInvokeStaticOrDirect(HInvokeStaticOrD DCHECK(!invoke->IsStaticWithExplicitClinitCheck()); if (TryGenerateIntrinsicCode(invoke, codegen_)) { - codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 4); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 4); return; } @@ -3575,7 +3176,7 @@ void InstructionCodeGeneratorARMVIXL::VisitInvokeStaticOrDirect(HInvokeStaticOrD codegen_->GenerateStaticOrDirectCall( invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation()); - codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 5); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 5); } void LocationsBuilderARMVIXL::HandleInvoke(HInvoke* invoke) { @@ -3594,14 +3195,14 @@ void LocationsBuilderARMVIXL::VisitInvokeVirtual(HInvokeVirtual* invoke) { void InstructionCodeGeneratorARMVIXL::VisitInvokeVirtual(HInvokeVirtual* invoke) { if (TryGenerateIntrinsicCode(invoke, codegen_)) { - codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 6); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 6); return; } codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0)); DCHECK(!codegen_->IsLeafMethod()); - codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 7); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 7); } void LocationsBuilderARMVIXL::VisitInvokeInterface(HInvokeInterface* invoke) { @@ -3679,7 +3280,7 @@ void InstructionCodeGeneratorARMVIXL::VisitInvokeInterface(HInvokeInterface* inv DCHECK(!codegen_->IsLeafMethod()); } - codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 8); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 8); } void LocationsBuilderARMVIXL::VisitInvokePolymorphic(HInvokePolymorphic* invoke) { @@ -3688,7 +3289,16 @@ void LocationsBuilderARMVIXL::VisitInvokePolymorphic(HInvokePolymorphic* invoke) void InstructionCodeGeneratorARMVIXL::VisitInvokePolymorphic(HInvokePolymorphic* invoke) { codegen_->GenerateInvokePolymorphicCall(invoke); - codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 9); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 9); +} + +void LocationsBuilderARMVIXL::VisitInvokeCustom(HInvokeCustom* invoke) { + HandleInvoke(invoke); +} + +void InstructionCodeGeneratorARMVIXL::VisitInvokeCustom(HInvokeCustom* invoke) { + codegen_->GenerateInvokeCustomCall(invoke); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 10); } void LocationsBuilderARMVIXL::VisitNeg(HNeg* neg) { @@ -4405,7 +4015,7 @@ void InstructionCodeGeneratorARMVIXL::GenerateDivRemWithAnyConstant(HBinaryOpera int64_t magic; int shift; - CalculateMagicAndShiftForDivRem(imm, false /* is_long */, &magic, &shift); + CalculateMagicAndShiftForDivRem(imm, /* is_long= */ false, &magic, &shift); // TODO(VIXL): Change the static cast to Operand::From() after VIXL is fixed. __ Mov(temp1, static_cast<int32_t>(magic)); @@ -4697,6 +4307,299 @@ void InstructionCodeGeneratorARMVIXL::VisitRem(HRem* rem) { } } +static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) { + LocationSummary* locations = new (allocator) LocationSummary(minmax); + switch (minmax->GetResultType()) { + case DataType::Type::kInt32: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + break; + case DataType::Type::kInt64: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::SameAsFirstInput()); + break; + case DataType::Type::kFloat32: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetOut(Location::SameAsFirstInput()); + locations->AddTemp(Location::RequiresRegister()); + break; + case DataType::Type::kFloat64: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetOut(Location::SameAsFirstInput()); + break; + default: + LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType(); + } +} + +void InstructionCodeGeneratorARMVIXL::GenerateMinMaxInt(LocationSummary* locations, bool is_min) { + Location op1_loc = locations->InAt(0); + Location op2_loc = locations->InAt(1); + Location out_loc = locations->Out(); + + vixl32::Register op1 = RegisterFrom(op1_loc); + vixl32::Register op2 = RegisterFrom(op2_loc); + vixl32::Register out = RegisterFrom(out_loc); + + __ Cmp(op1, op2); + + { + ExactAssemblyScope aas(GetVIXLAssembler(), + 3 * kMaxInstructionSizeInBytes, + CodeBufferCheckScope::kMaximumSize); + + __ ite(is_min ? lt : gt); + __ mov(is_min ? lt : gt, out, op1); + __ mov(is_min ? ge : le, out, op2); + } +} + +void InstructionCodeGeneratorARMVIXL::GenerateMinMaxLong(LocationSummary* locations, bool is_min) { + Location op1_loc = locations->InAt(0); + Location op2_loc = locations->InAt(1); + Location out_loc = locations->Out(); + + // Optimization: don't generate any code if inputs are the same. + if (op1_loc.Equals(op2_loc)) { + DCHECK(out_loc.Equals(op1_loc)); // out_loc is set as SameAsFirstInput() in location builder. + return; + } + + vixl32::Register op1_lo = LowRegisterFrom(op1_loc); + vixl32::Register op1_hi = HighRegisterFrom(op1_loc); + vixl32::Register op2_lo = LowRegisterFrom(op2_loc); + vixl32::Register op2_hi = HighRegisterFrom(op2_loc); + vixl32::Register out_lo = LowRegisterFrom(out_loc); + vixl32::Register out_hi = HighRegisterFrom(out_loc); + UseScratchRegisterScope temps(GetVIXLAssembler()); + const vixl32::Register temp = temps.Acquire(); + + DCHECK(op1_lo.Is(out_lo)); + DCHECK(op1_hi.Is(out_hi)); + + // Compare op1 >= op2, or op1 < op2. + __ Cmp(out_lo, op2_lo); + __ Sbcs(temp, out_hi, op2_hi); + + // Now GE/LT condition code is correct for the long comparison. + { + vixl32::ConditionType cond = is_min ? ge : lt; + ExactAssemblyScope it_scope(GetVIXLAssembler(), + 3 * kMaxInstructionSizeInBytes, + CodeBufferCheckScope::kMaximumSize); + __ itt(cond); + __ mov(cond, out_lo, op2_lo); + __ mov(cond, out_hi, op2_hi); + } +} + +void InstructionCodeGeneratorARMVIXL::GenerateMinMaxFloat(HInstruction* minmax, bool is_min) { + LocationSummary* locations = minmax->GetLocations(); + Location op1_loc = locations->InAt(0); + Location op2_loc = locations->InAt(1); + Location out_loc = locations->Out(); + + // Optimization: don't generate any code if inputs are the same. + if (op1_loc.Equals(op2_loc)) { + DCHECK(out_loc.Equals(op1_loc)); // out_loc is set as SameAsFirstInput() in location builder. + return; + } + + vixl32::SRegister op1 = SRegisterFrom(op1_loc); + vixl32::SRegister op2 = SRegisterFrom(op2_loc); + vixl32::SRegister out = SRegisterFrom(out_loc); + + UseScratchRegisterScope temps(GetVIXLAssembler()); + const vixl32::Register temp1 = temps.Acquire(); + vixl32::Register temp2 = RegisterFrom(locations->GetTemp(0)); + vixl32::Label nan, done; + vixl32::Label* final_label = codegen_->GetFinalLabel(minmax, &done); + + DCHECK(op1.Is(out)); + + __ Vcmp(op1, op2); + __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR); + __ B(vs, &nan, /* is_far_target= */ false); // if un-ordered, go to NaN handling. + + // op1 <> op2 + vixl32::ConditionType cond = is_min ? gt : lt; + { + ExactAssemblyScope it_scope(GetVIXLAssembler(), + 2 * kMaxInstructionSizeInBytes, + CodeBufferCheckScope::kMaximumSize); + __ it(cond); + __ vmov(cond, F32, out, op2); + } + // for <>(not equal), we've done min/max calculation. + __ B(ne, final_label, /* is_far_target= */ false); + + // handle op1 == op2, max(+0.0,-0.0), min(+0.0,-0.0). + __ Vmov(temp1, op1); + __ Vmov(temp2, op2); + if (is_min) { + __ Orr(temp1, temp1, temp2); + } else { + __ And(temp1, temp1, temp2); + } + __ Vmov(out, temp1); + __ B(final_label); + + // handle NaN input. + __ Bind(&nan); + __ Movt(temp1, High16Bits(kNanFloat)); // 0x7FC0xxxx is a NaN. + __ Vmov(out, temp1); + + if (done.IsReferenced()) { + __ Bind(&done); + } +} + +void InstructionCodeGeneratorARMVIXL::GenerateMinMaxDouble(HInstruction* minmax, bool is_min) { + LocationSummary* locations = minmax->GetLocations(); + Location op1_loc = locations->InAt(0); + Location op2_loc = locations->InAt(1); + Location out_loc = locations->Out(); + + // Optimization: don't generate any code if inputs are the same. + if (op1_loc.Equals(op2_loc)) { + DCHECK(out_loc.Equals(op1_loc)); // out_loc is set as SameAsFirstInput() in. + return; + } + + vixl32::DRegister op1 = DRegisterFrom(op1_loc); + vixl32::DRegister op2 = DRegisterFrom(op2_loc); + vixl32::DRegister out = DRegisterFrom(out_loc); + vixl32::Label handle_nan_eq, done; + vixl32::Label* final_label = codegen_->GetFinalLabel(minmax, &done); + + DCHECK(op1.Is(out)); + + __ Vcmp(op1, op2); + __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR); + __ B(vs, &handle_nan_eq, /* is_far_target= */ false); // if un-ordered, go to NaN handling. + + // op1 <> op2 + vixl32::ConditionType cond = is_min ? gt : lt; + { + ExactAssemblyScope it_scope(GetVIXLAssembler(), + 2 * kMaxInstructionSizeInBytes, + CodeBufferCheckScope::kMaximumSize); + __ it(cond); + __ vmov(cond, F64, out, op2); + } + // for <>(not equal), we've done min/max calculation. + __ B(ne, final_label, /* is_far_target= */ false); + + // handle op1 == op2, max(+0.0,-0.0). + if (!is_min) { + __ Vand(F64, out, op1, op2); + __ B(final_label); + } + + // handle op1 == op2, min(+0.0,-0.0), NaN input. + __ Bind(&handle_nan_eq); + __ Vorr(F64, out, op1, op2); // assemble op1/-0.0/NaN. + + if (done.IsReferenced()) { + __ Bind(&done); + } +} + +void InstructionCodeGeneratorARMVIXL::GenerateMinMax(HBinaryOperation* minmax, bool is_min) { + DataType::Type type = minmax->GetResultType(); + switch (type) { + case DataType::Type::kInt32: + GenerateMinMaxInt(minmax->GetLocations(), is_min); + break; + case DataType::Type::kInt64: + GenerateMinMaxLong(minmax->GetLocations(), is_min); + break; + case DataType::Type::kFloat32: + GenerateMinMaxFloat(minmax, is_min); + break; + case DataType::Type::kFloat64: + GenerateMinMaxDouble(minmax, is_min); + break; + default: + LOG(FATAL) << "Unexpected type for HMinMax " << type; + } +} + +void LocationsBuilderARMVIXL::VisitMin(HMin* min) { + CreateMinMaxLocations(GetGraph()->GetAllocator(), min); +} + +void InstructionCodeGeneratorARMVIXL::VisitMin(HMin* min) { + GenerateMinMax(min, /*is_min*/ true); +} + +void LocationsBuilderARMVIXL::VisitMax(HMax* max) { + CreateMinMaxLocations(GetGraph()->GetAllocator(), max); +} + +void InstructionCodeGeneratorARMVIXL::VisitMax(HMax* max) { + GenerateMinMax(max, /*is_min*/ false); +} + +void LocationsBuilderARMVIXL::VisitAbs(HAbs* abs) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs); + switch (abs->GetResultType()) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + locations->AddTemp(Location::RequiresRegister()); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); + break; + default: + LOG(FATAL) << "Unexpected type for abs operation " << abs->GetResultType(); + } +} + +void InstructionCodeGeneratorARMVIXL::VisitAbs(HAbs* abs) { + LocationSummary* locations = abs->GetLocations(); + switch (abs->GetResultType()) { + case DataType::Type::kInt32: { + vixl32::Register in_reg = RegisterFrom(locations->InAt(0)); + vixl32::Register out_reg = RegisterFrom(locations->Out()); + vixl32::Register mask = RegisterFrom(locations->GetTemp(0)); + __ Asr(mask, in_reg, 31); + __ Add(out_reg, in_reg, mask); + __ Eor(out_reg, out_reg, mask); + break; + } + case DataType::Type::kInt64: { + Location in = locations->InAt(0); + vixl32::Register in_reg_lo = LowRegisterFrom(in); + vixl32::Register in_reg_hi = HighRegisterFrom(in); + Location output = locations->Out(); + vixl32::Register out_reg_lo = LowRegisterFrom(output); + vixl32::Register out_reg_hi = HighRegisterFrom(output); + DCHECK(!out_reg_lo.Is(in_reg_hi)) << "Diagonal overlap unexpected."; + vixl32::Register mask = RegisterFrom(locations->GetTemp(0)); + __ Asr(mask, in_reg_hi, 31); + __ Adds(out_reg_lo, in_reg_lo, mask); + __ Adc(out_reg_hi, in_reg_hi, mask); + __ Eor(out_reg_lo, out_reg_lo, mask); + __ Eor(out_reg_hi, out_reg_hi, mask); + break; + } + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + __ Vabs(OutputVRegister(abs), InputVRegisterAt(abs, 0)); + break; + default: + LOG(FATAL) << "Unexpected type for abs operation " << abs->GetResultType(); + } +} void LocationsBuilderARMVIXL::VisitDivZeroCheck(HDivZeroCheck* instruction) { LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction); @@ -4813,7 +4716,7 @@ void InstructionCodeGeneratorARMVIXL::HandleLongRotate(HRor* ror) { __ And(shift_right, RegisterFrom(rhs), 0x1F); __ Lsrs(shift_left, RegisterFrom(rhs), 6); __ Rsb(LeaveFlags, shift_left, shift_right, Operand::From(kArmBitsPerWord)); - __ B(cc, &shift_by_32_plus_shift_right, /* far_target */ false); + __ B(cc, &shift_by_32_plus_shift_right, /* is_far_target= */ false); // out_reg_hi = (reg_hi << shift_left) | (reg_lo >> shift_right). // out_reg_lo = (reg_lo << shift_left) | (reg_hi >> shift_right). @@ -5069,8 +4972,11 @@ void InstructionCodeGeneratorARMVIXL::HandleShift(HBinaryOperation* op) { __ Lsrs(o_h, high, 1); __ Rrx(o_l, low); } + } else if (shift_value == 0) { + __ Mov(o_l, low); + __ Mov(o_h, high); } else { - DCHECK(2 <= shift_value && shift_value < 32) << shift_value; + DCHECK(0 < shift_value && shift_value < 32) << shift_value; if (op->IsShl()) { __ Lsl(o_h, high, shift_value); __ Orr(o_h, o_h, Operand(low, ShiftType::LSR, 32 - shift_value)); @@ -5121,35 +5027,15 @@ void InstructionCodeGeneratorARMVIXL::VisitUShr(HUShr* ushr) { void LocationsBuilderARMVIXL::VisitNewInstance(HNewInstance* instruction) { LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( instruction, LocationSummary::kCallOnMainOnly); - if (instruction->IsStringAlloc()) { - locations->AddTemp(LocationFrom(kMethodRegister)); - } else { - InvokeRuntimeCallingConventionARMVIXL calling_convention; - locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); - } + InvokeRuntimeCallingConventionARMVIXL calling_convention; + locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); locations->SetOut(LocationFrom(r0)); } void InstructionCodeGeneratorARMVIXL::VisitNewInstance(HNewInstance* instruction) { - // Note: if heap poisoning is enabled, the entry point takes cares - // of poisoning the reference. - if (instruction->IsStringAlloc()) { - // String is allocated through StringFactory. Call NewEmptyString entry point. - vixl32::Register temp = RegisterFrom(instruction->GetLocations()->GetTemp(0)); - MemberOffset code_offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize); - GetAssembler()->LoadFromOffset(kLoadWord, temp, tr, QUICK_ENTRY_POINT(pNewEmptyString)); - GetAssembler()->LoadFromOffset(kLoadWord, lr, temp, code_offset.Int32Value()); - // blx in T32 has only 16bit encoding that's why a stricter check for the scope is used. - ExactAssemblyScope aas(GetVIXLAssembler(), - vixl32::k16BitT32InstructionSizeInBytes, - CodeBufferCheckScope::kExactSize); - __ blx(lr); - codegen_->RecordPcInfo(instruction, instruction->GetDexPc()); - } else { - codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc()); - CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); - } - codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 10); + codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc()); + CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 11); } void LocationsBuilderARMVIXL::VisitNewArray(HNewArray* instruction) { @@ -5162,14 +5048,12 @@ void LocationsBuilderARMVIXL::VisitNewArray(HNewArray* instruction) { } void InstructionCodeGeneratorARMVIXL::VisitNewArray(HNewArray* instruction) { - // Note: if heap poisoning is enabled, the entry point takes cares - // of poisoning the reference. - QuickEntrypointEnum entrypoint = - CodeGenerator::GetArrayAllocationEntrypoint(instruction->GetLoadClass()->GetClass()); + // Note: if heap poisoning is enabled, the entry point takes care of poisoning the reference. + QuickEntrypointEnum entrypoint = CodeGenerator::GetArrayAllocationEntrypoint(instruction); codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc()); CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>(); DCHECK(!codegen_->IsLeafMethod()); - codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 11); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 12); } void LocationsBuilderARMVIXL::VisitParameterValue(HParameterValue* instruction) { @@ -5291,8 +5175,8 @@ void InstructionCodeGeneratorARMVIXL::VisitCompare(HCompare* compare) { } case DataType::Type::kInt64: { __ Cmp(HighRegisterFrom(left), HighRegisterFrom(right)); // Signed compare. - __ B(lt, &less, /* far_target */ false); - __ B(gt, &greater, /* far_target */ false); + __ B(lt, &less, /* is_far_target= */ false); + __ B(gt, &greater, /* is_far_target= */ false); // Emit move to `out` before the last `Cmp`, as `Mov` might affect the status flags. __ Mov(out, 0); __ Cmp(LowRegisterFrom(left), LowRegisterFrom(right)); // Unsigned compare. @@ -5313,8 +5197,8 @@ void InstructionCodeGeneratorARMVIXL::VisitCompare(HCompare* compare) { UNREACHABLE(); } - __ B(eq, final_label, /* far_target */ false); - __ B(less_cond, &less, /* far_target */ false); + __ B(eq, final_label, /* is_far_target= */ false); + __ B(less_cond, &less, /* is_far_target= */ false); __ Bind(&greater); __ Mov(out, 1); @@ -5610,18 +5494,10 @@ void LocationsBuilderARMVIXL::HandleFieldGet(HInstruction* instruction, locations->AddTemp(Location::RequiresRegister()); locations->AddTemp(Location::RequiresRegister()); } else if (object_field_get_with_read_barrier && kUseBakerReadBarrier) { - // We need a temporary register for the read barrier marking slow - // path in CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier. - if (kBakerReadBarrierLinkTimeThunksEnableForFields && - !Runtime::Current()->UseJitCompilation()) { - // If link-time thunks for the Baker read barrier are enabled, for AOT - // loads we need a temporary only if the offset is too big. - if (field_info.GetFieldOffset().Uint32Value() >= kReferenceLoadMinFarOffset) { - locations->AddTemp(Location::RequiresRegister()); - } - // And we always need the reserved entrypoint register. - locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister.GetCode())); - } else { + // We need a temporary register for the read barrier load in + // CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier() + // only if the offset is too big. + if (field_info.GetFieldOffset().Uint32Value() >= kReferenceLoadMinFarOffset) { locations->AddTemp(Location::RequiresRegister()); } } @@ -5733,11 +5609,11 @@ void InstructionCodeGeneratorARMVIXL::HandleFieldGet(HInstruction* instruction, case DataType::Type::kReference: { // /* HeapReference<Object> */ out = *(base + offset) if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { - Location temp_loc = locations->GetTemp(0); + Location maybe_temp = (locations->GetTempCount() != 0) ? locations->GetTemp(0) : Location(); // Note that a potential implicit null check is handled in this // CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier call. codegen_->GenerateFieldLoadWithBakerReadBarrier( - instruction, out, base, offset, temp_loc, /* needs_null_check */ true); + instruction, out, base, offset, maybe_temp, /* needs_null_check= */ true); if (is_volatile) { codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); } @@ -6036,30 +5912,20 @@ void LocationsBuilderARMVIXL::VisitArrayGet(HArrayGet* instruction) { object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap); } if (object_array_get_with_read_barrier && kUseBakerReadBarrier) { - // We need a temporary register for the read barrier marking slow - // path in CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier. - if (kBakerReadBarrierLinkTimeThunksEnableForFields && - !Runtime::Current()->UseJitCompilation() && - instruction->GetIndex()->IsConstant()) { + if (instruction->GetIndex()->IsConstant()) { // Array loads with constant index are treated as field loads. - // If link-time thunks for the Baker read barrier are enabled, for AOT - // constant index loads we need a temporary only if the offset is too big. + // We need a temporary register for the read barrier load in + // CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier() + // only if the offset is too big. uint32_t offset = CodeGenerator::GetArrayDataOffset(instruction); uint32_t index = instruction->GetIndex()->AsIntConstant()->GetValue(); offset += index << DataType::SizeShift(DataType::Type::kReference); if (offset >= kReferenceLoadMinFarOffset) { locations->AddTemp(Location::RequiresRegister()); } - // And we always need the reserved entrypoint register. - locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister.GetCode())); - } else if (kBakerReadBarrierLinkTimeThunksEnableForArrays && - !Runtime::Current()->UseJitCompilation() && - !instruction->GetIndex()->IsConstant()) { - // We need a non-scratch temporary for the array data pointer. - locations->AddTemp(Location::RequiresRegister()); - // And we always need the reserved entrypoint register. - locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister.GetCode())); } else { + // We need a non-scratch temporary for the array data pointer in + // CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier(). locations->AddTemp(Location::RequiresRegister()); } } else if (mirror::kUseStringCompression && instruction->IsStringCharAt()) { @@ -6103,7 +5969,7 @@ void InstructionCodeGeneratorARMVIXL::VisitArrayGet(HArrayGet* instruction) { __ Lsrs(length, length, 1u); // LSRS has a 16-bit encoding, TST (immediate) does not. static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, "Expecting 0=compressed, 1=uncompressed"); - __ B(cs, &uncompressed_load, /* far_target */ false); + __ B(cs, &uncompressed_load, /* is_far_target= */ false); GetAssembler()->LoadFromOffset(kLoadUnsignedByte, RegisterFrom(out_loc), obj, @@ -6145,7 +6011,7 @@ void InstructionCodeGeneratorARMVIXL::VisitArrayGet(HArrayGet* instruction) { __ Lsrs(length, length, 1u); // LSRS has a 16-bit encoding, TST (immediate) does not. static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, "Expecting 0=compressed, 1=uncompressed"); - __ B(cs, &uncompressed_load, /* far_target */ false); + __ B(cs, &uncompressed_load, /* is_far_target= */ false); __ Ldrb(RegisterFrom(out_loc), MemOperand(temp, RegisterFrom(index), vixl32::LSL, 0)); __ B(final_label); __ Bind(&uncompressed_load); @@ -6172,22 +6038,24 @@ void InstructionCodeGeneratorARMVIXL::VisitArrayGet(HArrayGet* instruction) { // /* HeapReference<Object> */ out = // *(obj + data_offset + index * sizeof(HeapReference<Object>)) if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { - Location temp = locations->GetTemp(0); // Note that a potential implicit null check is handled in this // CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier call. DCHECK(!instruction->CanDoImplicitNullCheckOn(instruction->InputAt(0))); if (index.IsConstant()) { // Array load with a constant index can be treated as a field load. + Location maybe_temp = + (locations->GetTempCount() != 0) ? locations->GetTemp(0) : Location(); data_offset += Int32ConstantFrom(index) << DataType::SizeShift(type); codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction, out_loc, obj, data_offset, - locations->GetTemp(0), - /* needs_null_check */ false); + maybe_temp, + /* needs_null_check= */ false); } else { + Location temp = locations->GetTemp(0); codegen_->GenerateArrayLoadWithBakerReadBarrier( - instruction, out_loc, obj, data_offset, index, temp, /* needs_null_check */ false); + out_loc, obj, data_offset, index, temp, /* needs_null_check= */ false); } } else { vixl32::Register out = OutputRegister(instruction); @@ -6462,7 +6330,7 @@ void InstructionCodeGeneratorARMVIXL::VisitArraySet(HArraySet* instruction) { if (instruction->StaticTypeOfArrayIsObjectArray()) { vixl32::Label do_put; - __ B(eq, &do_put, /* far_target */ false); + __ B(eq, &do_put, /* is_far_target= */ false); // If heap poisoning is enabled, the `temp1` reference has // not been unpoisoned yet; unpoison it now. GetAssembler()->MaybeUnpoisonHeapReference(temp1); @@ -6706,9 +6574,25 @@ void CodeGeneratorARMVIXL::MarkGCCard(vixl32::Register temp, if (can_be_null) { __ CompareAndBranchIfZero(value, &is_null); } + // Load the address of the card table into `card`. GetAssembler()->LoadFromOffset( kLoadWord, card, tr, Thread::CardTableOffset<kArmPointerSize>().Int32Value()); + // Calculate the offset (in the card table) of the card corresponding to + // `object`. __ Lsr(temp, object, Operand::From(gc::accounting::CardTable::kCardShift)); + // Write the `art::gc::accounting::CardTable::kCardDirty` value into the + // `object`'s card. + // + // Register `card` contains the address of the card table. Note that the card + // table's base is biased during its creation so that it always starts at an + // address whose least-significant byte is equal to `kCardDirty` (see + // art::gc::accounting::CardTable::Create). Therefore the STRB instruction + // below writes the `kCardDirty` (byte) value into the `object`'s card + // (located at `card + object >> kCardShift`). + // + // This dual use of the value in register `card` (1. to calculate the location + // of the card to mark; and 2. to load the `kCardDirty` value) saves a load + // (no need to explicitly load `kCardDirty` as an immediate value). __ Strb(card, MemOperand(card, temp)); if (can_be_null) { __ Bind(&is_null); @@ -6748,7 +6632,7 @@ void InstructionCodeGeneratorARMVIXL::VisitSuspendCheck(HSuspendCheck* instructi return; } GenerateSuspendCheck(instruction, nullptr); - codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 12); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 13); } void InstructionCodeGeneratorARMVIXL::GenerateSuspendCheck(HSuspendCheck* instruction, @@ -7040,14 +6924,14 @@ HLoadClass::LoadKind CodeGeneratorARMVIXL::GetSupportedLoadClassKind( case HLoadClass::LoadKind::kReferrersClass: break; case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: - case HLoadClass::LoadKind::kBootImageClassTable: + case HLoadClass::LoadKind::kBootImageRelRo: case HLoadClass::LoadKind::kBssEntry: DCHECK(!Runtime::Current()->UseJitCompilation()); break; + case HLoadClass::LoadKind::kJitBootImageAddress: case HLoadClass::LoadKind::kJitTableAddress: DCHECK(Runtime::Current()->UseJitCompilation()); break; - case HLoadClass::LoadKind::kBootImageAddress: case HLoadClass::LoadKind::kRuntimeCall: break; } @@ -7083,23 +6967,11 @@ void LocationsBuilderARMVIXL::VisitLoadClass(HLoadClass* cls) { if (load_kind == HLoadClass::LoadKind::kBssEntry) { if (!kUseReadBarrier || kUseBakerReadBarrier) { // Rely on the type resolution or initialization and marking to save everything we need. - RegisterSet caller_saves = RegisterSet::Empty(); - InvokeRuntimeCallingConventionARMVIXL calling_convention; - caller_saves.Add(LocationFrom(calling_convention.GetRegisterAt(0))); - // TODO: Add GetReturnLocation() to the calling convention so that we can DCHECK() - // that the the kPrimNot result register is the same as the first argument register. - locations->SetCustomSlowPathCallerSaves(caller_saves); + locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves()); } else { // For non-Baker read barrier we have a temp-clobbering call. } } - if (kUseBakerReadBarrier && kBakerReadBarrierLinkTimeThunksEnableForGcRoots) { - if (load_kind == HLoadClass::LoadKind::kBssEntry || - (load_kind == HLoadClass::LoadKind::kReferrersClass && - !Runtime::Current()->UseJitCompilation())) { - locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister.GetCode())); - } - } } // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not @@ -7108,7 +6980,7 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadClass(HLoadClass* cls) NO_THREAD_ HLoadClass::LoadKind load_kind = cls->GetLoadKind(); if (load_kind == HLoadClass::LoadKind::kRuntimeCall) { codegen_->GenerateLoadClassRuntimeCall(cls); - codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 13); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 14); return; } DCHECK(!cls->NeedsAccessCheck()); @@ -7127,11 +6999,11 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadClass(HLoadClass* cls) NO_THREAD_ DCHECK(!cls->MustGenerateClinitCheck()); // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ vixl32::Register current_method = InputRegisterAt(cls, 0); - GenerateGcRootFieldLoad(cls, - out_loc, - current_method, - ArtMethod::DeclaringClassOffset().Int32Value(), - read_barrier_option); + codegen_->GenerateGcRootFieldLoad(cls, + out_loc, + current_method, + ArtMethod::DeclaringClassOffset().Int32Value(), + read_barrier_option); break; } case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: { @@ -7142,42 +7014,35 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadClass(HLoadClass* cls) NO_THREAD_ codegen_->EmitMovwMovtPlaceholder(labels, out); break; } - case HLoadClass::LoadKind::kBootImageAddress: { - DCHECK_EQ(read_barrier_option, kWithoutReadBarrier); - uint32_t address = dchecked_integral_cast<uint32_t>( - reinterpret_cast<uintptr_t>(cls->GetClass().Get())); - DCHECK_NE(address, 0u); - __ Ldr(out, codegen_->DeduplicateBootImageAddressLiteral(address)); - break; - } - case HLoadClass::LoadKind::kBootImageClassTable: { + case HLoadClass::LoadKind::kBootImageRelRo: { DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); CodeGeneratorARMVIXL::PcRelativePatchInfo* labels = - codegen_->NewBootImageTypePatch(cls->GetDexFile(), cls->GetTypeIndex()); + codegen_->NewBootImageRelRoPatch(codegen_->GetBootImageOffset(cls)); codegen_->EmitMovwMovtPlaceholder(labels, out); - __ Ldr(out, MemOperand(out, /* offset */ 0)); - // Extract the reference from the slot data, i.e. clear the hash bits. - int32_t masked_hash = ClassTable::TableSlot::MaskHash( - ComputeModifiedUtf8Hash(cls->GetDexFile().StringByTypeIdx(cls->GetTypeIndex()))); - if (masked_hash != 0) { - __ Sub(out, out, Operand(masked_hash)); - } + __ Ldr(out, MemOperand(out, /* offset= */ 0)); break; } case HLoadClass::LoadKind::kBssEntry: { CodeGeneratorARMVIXL::PcRelativePatchInfo* labels = codegen_->NewTypeBssEntryPatch(cls->GetDexFile(), cls->GetTypeIndex()); codegen_->EmitMovwMovtPlaceholder(labels, out); - GenerateGcRootFieldLoad(cls, out_loc, out, /* offset */ 0, read_barrier_option); + codegen_->GenerateGcRootFieldLoad(cls, out_loc, out, /* offset= */ 0, read_barrier_option); generate_null_check = true; break; } + case HLoadClass::LoadKind::kJitBootImageAddress: { + DCHECK_EQ(read_barrier_option, kWithoutReadBarrier); + uint32_t address = reinterpret_cast32<uint32_t>(cls->GetClass().Get()); + DCHECK_NE(address, 0u); + __ Ldr(out, codegen_->DeduplicateBootImageAddressLiteral(address)); + break; + } case HLoadClass::LoadKind::kJitTableAddress: { __ Ldr(out, codegen_->DeduplicateJitClassLiteral(cls->GetDexFile(), cls->GetTypeIndex(), cls->GetClass())); // /* GcRoot<mirror::Class> */ out = *out - GenerateGcRootFieldLoad(cls, out_loc, out, /* offset */ 0, read_barrier_option); + codegen_->GenerateGcRootFieldLoad(cls, out_loc, out, /* offset= */ 0, read_barrier_option); break; } case HLoadClass::LoadKind::kRuntimeCall: @@ -7189,8 +7054,7 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadClass(HLoadClass* cls) NO_THREAD_ if (generate_null_check || cls->MustGenerateClinitCheck()) { DCHECK(cls->CanCallRuntime()); LoadClassSlowPathARMVIXL* slow_path = - new (codegen_->GetScopedAllocator()) LoadClassSlowPathARMVIXL( - cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck()); + new (codegen_->GetScopedAllocator()) LoadClassSlowPathARMVIXL(cls, cls); codegen_->AddSlowPath(slow_path); if (generate_null_check) { __ CompareAndBranchIfZero(out, slow_path->GetEntryLabel()); @@ -7200,10 +7064,30 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadClass(HLoadClass* cls) NO_THREAD_ } else { __ Bind(slow_path->GetExitLabel()); } - codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 14); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 15); } } +void LocationsBuilderARMVIXL::VisitLoadMethodHandle(HLoadMethodHandle* load) { + InvokeRuntimeCallingConventionARMVIXL calling_convention; + Location location = LocationFrom(calling_convention.GetRegisterAt(0)); + CodeGenerator::CreateLoadMethodHandleRuntimeCallLocationSummary(load, location, location); +} + +void InstructionCodeGeneratorARMVIXL::VisitLoadMethodHandle(HLoadMethodHandle* load) { + codegen_->GenerateLoadMethodHandleRuntimeCall(load); +} + +void LocationsBuilderARMVIXL::VisitLoadMethodType(HLoadMethodType* load) { + InvokeRuntimeCallingConventionARMVIXL calling_convention; + Location location = LocationFrom(calling_convention.GetRegisterAt(0)); + CodeGenerator::CreateLoadMethodTypeRuntimeCallLocationSummary(load, location, location); +} + +void InstructionCodeGeneratorARMVIXL::VisitLoadMethodType(HLoadMethodType* load) { + codegen_->GenerateLoadMethodTypeRuntimeCall(load); +} + void LocationsBuilderARMVIXL::VisitClinitCheck(HClinitCheck* check) { LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(check, LocationSummary::kCallOnSlowPath); @@ -7211,15 +7095,14 @@ void LocationsBuilderARMVIXL::VisitClinitCheck(HClinitCheck* check) { if (check->HasUses()) { locations->SetOut(Location::SameAsFirstInput()); } + // Rely on the type initialization to save everything we need. + locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves()); } void InstructionCodeGeneratorARMVIXL::VisitClinitCheck(HClinitCheck* check) { // We assume the class is not null. LoadClassSlowPathARMVIXL* slow_path = - new (codegen_->GetScopedAllocator()) LoadClassSlowPathARMVIXL(check->GetLoadClass(), - check, - check->GetDexPc(), - /* do_clinit */ true); + new (codegen_->GetScopedAllocator()) LoadClassSlowPathARMVIXL(check->GetLoadClass(), check); codegen_->AddSlowPath(slow_path); GenerateClassInitializationCheck(slow_path, InputRegisterAt(check, 0)); } @@ -7243,18 +7126,79 @@ void InstructionCodeGeneratorARMVIXL::GenerateClassInitializationCheck( __ Bind(slow_path->GetExitLabel()); } +void InstructionCodeGeneratorARMVIXL::GenerateBitstringTypeCheckCompare( + HTypeCheckInstruction* check, + vixl32::Register temp, + vixl32::FlagsUpdate flags_update) { + uint32_t path_to_root = check->GetBitstringPathToRoot(); + uint32_t mask = check->GetBitstringMask(); + DCHECK(IsPowerOfTwo(mask + 1)); + size_t mask_bits = WhichPowerOf2(mask + 1); + + // Note that HInstanceOf shall check for zero value in `temp` but HCheckCast needs + // the Z flag for BNE. This is indicated by the `flags_update` parameter. + if (mask_bits == 16u) { + // Load only the bitstring part of the status word. + __ Ldrh(temp, MemOperand(temp, mirror::Class::StatusOffset().Int32Value())); + // Check if the bitstring bits are equal to `path_to_root`. + if (flags_update == SetFlags) { + __ Cmp(temp, path_to_root); + } else { + __ Sub(temp, temp, path_to_root); + } + } else { + // /* uint32_t */ temp = temp->status_ + __ Ldr(temp, MemOperand(temp, mirror::Class::StatusOffset().Int32Value())); + if (GetAssembler()->ShifterOperandCanHold(SUB, path_to_root)) { + // Compare the bitstring bits using SUB. + __ Sub(temp, temp, path_to_root); + // Shift out bits that do not contribute to the comparison. + __ Lsl(flags_update, temp, temp, dchecked_integral_cast<uint32_t>(32u - mask_bits)); + } else if (IsUint<16>(path_to_root)) { + if (temp.IsLow()) { + // Note: Optimized for size but contains one more dependent instruction than necessary. + // MOVW+SUB(register) would be 8 bytes unless we find a low-reg temporary but the + // macro assembler would use the high reg IP for the constant by default. + // Compare the bitstring bits using SUB. + __ Sub(temp, temp, path_to_root & 0x00ffu); // 16-bit SUB (immediate) T2 + __ Sub(temp, temp, path_to_root & 0xff00u); // 32-bit SUB (immediate) T3 + // Shift out bits that do not contribute to the comparison. + __ Lsl(flags_update, temp, temp, dchecked_integral_cast<uint32_t>(32u - mask_bits)); + } else { + // Extract the bitstring bits. + __ Ubfx(temp, temp, 0, mask_bits); + // Check if the bitstring bits are equal to `path_to_root`. + if (flags_update == SetFlags) { + __ Cmp(temp, path_to_root); + } else { + __ Sub(temp, temp, path_to_root); + } + } + } else { + // Shift out bits that do not contribute to the comparison. + __ Lsl(temp, temp, dchecked_integral_cast<uint32_t>(32u - mask_bits)); + // Check if the shifted bitstring bits are equal to `path_to_root << (32u - mask_bits)`. + if (flags_update == SetFlags) { + __ Cmp(temp, path_to_root << (32u - mask_bits)); + } else { + __ Sub(temp, temp, path_to_root << (32u - mask_bits)); + } + } + } +} + HLoadString::LoadKind CodeGeneratorARMVIXL::GetSupportedLoadStringKind( HLoadString::LoadKind desired_string_load_kind) { switch (desired_string_load_kind) { case HLoadString::LoadKind::kBootImageLinkTimePcRelative: - case HLoadString::LoadKind::kBootImageInternTable: + case HLoadString::LoadKind::kBootImageRelRo: case HLoadString::LoadKind::kBssEntry: DCHECK(!Runtime::Current()->UseJitCompilation()); break; + case HLoadString::LoadKind::kJitBootImageAddress: case HLoadString::LoadKind::kJitTableAddress: DCHECK(Runtime::Current()->UseJitCompilation()); break; - case HLoadString::LoadKind::kBootImageAddress: case HLoadString::LoadKind::kRuntimeCall: break; } @@ -7272,15 +7216,7 @@ void LocationsBuilderARMVIXL::VisitLoadString(HLoadString* load) { if (load_kind == HLoadString::LoadKind::kBssEntry) { if (!kUseReadBarrier || kUseBakerReadBarrier) { // Rely on the pResolveString and marking to save everything we need, including temps. - RegisterSet caller_saves = RegisterSet::Empty(); - InvokeRuntimeCallingConventionARMVIXL calling_convention; - caller_saves.Add(LocationFrom(calling_convention.GetRegisterAt(0))); - // TODO: Add GetReturnLocation() to the calling convention so that we can DCHECK() - // that the the kPrimNot result register is the same as the first argument register. - locations->SetCustomSlowPathCallerSaves(caller_saves); - if (kUseBakerReadBarrier && kBakerReadBarrierLinkTimeThunksEnableForGcRoots) { - locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister.GetCode())); - } + locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves()); } else { // For non-Baker read barrier we have a temp-clobbering call. } @@ -7304,33 +7240,32 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadString(HLoadString* load) NO_THRE codegen_->EmitMovwMovtPlaceholder(labels, out); return; } - case HLoadString::LoadKind::kBootImageAddress: { - uint32_t address = dchecked_integral_cast<uint32_t>( - reinterpret_cast<uintptr_t>(load->GetString().Get())); - DCHECK_NE(address, 0u); - __ Ldr(out, codegen_->DeduplicateBootImageAddressLiteral(address)); - return; - } - case HLoadString::LoadKind::kBootImageInternTable: { + case HLoadString::LoadKind::kBootImageRelRo: { DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); CodeGeneratorARMVIXL::PcRelativePatchInfo* labels = - codegen_->NewBootImageStringPatch(load->GetDexFile(), load->GetStringIndex()); + codegen_->NewBootImageRelRoPatch(codegen_->GetBootImageOffset(load)); codegen_->EmitMovwMovtPlaceholder(labels, out); - __ Ldr(out, MemOperand(out, /* offset */ 0)); + __ Ldr(out, MemOperand(out, /* offset= */ 0)); return; } case HLoadString::LoadKind::kBssEntry: { - DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); CodeGeneratorARMVIXL::PcRelativePatchInfo* labels = codegen_->NewStringBssEntryPatch(load->GetDexFile(), load->GetStringIndex()); codegen_->EmitMovwMovtPlaceholder(labels, out); - GenerateGcRootFieldLoad(load, out_loc, out, /* offset */ 0, kCompilerReadBarrierOption); + codegen_->GenerateGcRootFieldLoad( + load, out_loc, out, /* offset= */ 0, kCompilerReadBarrierOption); LoadStringSlowPathARMVIXL* slow_path = new (codegen_->GetScopedAllocator()) LoadStringSlowPathARMVIXL(load); codegen_->AddSlowPath(slow_path); __ CompareAndBranchIfZero(out, slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); - codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 15); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 16); + return; + } + case HLoadString::LoadKind::kJitBootImageAddress: { + uint32_t address = reinterpret_cast32<uint32_t>(load->GetString().Get()); + DCHECK_NE(address, 0u); + __ Ldr(out, codegen_->DeduplicateBootImageAddressLiteral(address)); return; } case HLoadString::LoadKind::kJitTableAddress: { @@ -7338,7 +7273,8 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadString(HLoadString* load) NO_THRE load->GetStringIndex(), load->GetString())); // /* GcRoot<mirror::String> */ out = *out - GenerateGcRootFieldLoad(load, out_loc, out, /* offset */ 0, kCompilerReadBarrierOption); + codegen_->GenerateGcRootFieldLoad( + load, out_loc, out, /* offset= */ 0, kCompilerReadBarrierOption); return; } default: @@ -7351,7 +7287,7 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadString(HLoadString* load) NO_THRE __ Mov(calling_convention.GetRegisterAt(0), load->GetStringIndex().index_); codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc()); CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); - codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 16); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 17); } static int32_t GetExceptionTlsOffset() { @@ -7434,6 +7370,8 @@ void LocationsBuilderARMVIXL::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kInterfaceCheck: call_kind = LocationSummary::kCallOnSlowPath; break; + case TypeCheckKind::kBitstringCheck: + break; } LocationSummary* locations = @@ -7442,14 +7380,17 @@ void LocationsBuilderARMVIXL::VisitInstanceOf(HInstanceOf* instruction) { locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. } locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); + if (type_check_kind == TypeCheckKind::kBitstringCheck) { + locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); + locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); + locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); + } else { + locations->SetInAt(1, Location::RequiresRegister()); + } // The "out" register is used as a temporary, so it overlaps with the inputs. // Note that TypeCheckSlowPathARM uses this register too. locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind)); - if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { - codegen_->MaybeAddBakerCcEntrypointTempForFields(locations); - } } void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) { @@ -7457,7 +7398,9 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) LocationSummary* locations = instruction->GetLocations(); Location obj_loc = locations->InAt(0); vixl32::Register obj = InputRegisterAt(instruction, 0); - vixl32::Register cls = InputRegisterAt(instruction, 1); + vixl32::Register cls = (type_check_kind == TypeCheckKind::kBitstringCheck) + ? vixl32::Register() + : InputRegisterAt(instruction, 1); Location out_loc = locations->Out(); vixl32::Register out = OutputRegister(instruction); const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind); @@ -7476,7 +7419,7 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) if (instruction->MustDoNullCheck()) { DCHECK(!out.Is(obj)); __ Mov(out, 0); - __ CompareAndBranchIfZero(obj, final_label, /* far_target */ false); + __ CompareAndBranchIfZero(obj, final_label, /* is_far_target= */ false); } switch (type_check_kind) { @@ -7508,7 +7451,7 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) __ it(eq); __ mov(eq, out, 1); } else { - __ B(ne, final_label, /* far_target */ false); + __ B(ne, final_label, /* is_far_target= */ false); __ Mov(out, 1); } @@ -7536,9 +7479,9 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) maybe_temp_loc, read_barrier_option); // If `out` is null, we use it for the result, and jump to the final label. - __ CompareAndBranchIfZero(out, final_label, /* far_target */ false); + __ CompareAndBranchIfZero(out, final_label, /* is_far_target= */ false); __ Cmp(out, cls); - __ B(ne, &loop, /* far_target */ false); + __ B(ne, &loop, /* is_far_target= */ false); __ Mov(out, 1); break; } @@ -7557,7 +7500,7 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) vixl32::Label loop, success; __ Bind(&loop); __ Cmp(out, cls); - __ B(eq, &success, /* far_target */ false); + __ B(eq, &success, /* is_far_target= */ false); // /* HeapReference<Class> */ out = out->super_class_ GenerateReferenceLoadOneRegister(instruction, out_loc, @@ -7567,7 +7510,7 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) // This is essentially a null check, but it sets the condition flags to the // proper value for the code that follows the loop, i.e. not `eq`. __ Cmp(out, 1); - __ B(hs, &loop, /* far_target */ false); + __ B(hs, &loop, /* is_far_target= */ false); // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8, // we check that the output is in a low register, so that a 16-bit MOV @@ -7612,7 +7555,7 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) // Do an exact check. vixl32::Label exact_check; __ Cmp(out, cls); - __ B(eq, &exact_check, /* far_target */ false); + __ B(eq, &exact_check, /* is_far_target= */ false); // Otherwise, we need to check that the object's class is a non-primitive array. // /* HeapReference<Class> */ out = out->component_type_ GenerateReferenceLoadOneRegister(instruction, @@ -7621,7 +7564,7 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) maybe_temp_loc, read_barrier_option); // If `out` is null, we use it for the result, and jump to the final label. - __ CompareAndBranchIfZero(out, final_label, /* far_target */ false); + __ CompareAndBranchIfZero(out, final_label, /* is_far_target= */ false); GetAssembler()->LoadFromOffset(kLoadUnsignedHalfword, out, out, primitive_offset); static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); __ Cmp(out, 0); @@ -7643,7 +7586,7 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) __ it(eq); __ mov(eq, out, 1); } else { - __ B(ne, final_label, /* far_target */ false); + __ B(ne, final_label, /* is_far_target= */ false); __ Bind(&exact_check); __ Mov(out, 1); } @@ -7663,7 +7606,7 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) __ Cmp(out, cls); DCHECK(locations->OnlyCallsOnSlowPath()); slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathARMVIXL( - instruction, /* is_fatal */ false); + instruction, /* is_fatal= */ false); codegen_->AddSlowPath(slow_path); __ B(ne, slow_path->GetEntryLabel()); __ Mov(out, 1); @@ -7692,11 +7635,31 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) // This should also be beneficial for the other cases above. DCHECK(locations->OnlyCallsOnSlowPath()); slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathARMVIXL( - instruction, /* is_fatal */ false); + instruction, /* is_fatal= */ false); codegen_->AddSlowPath(slow_path); __ B(slow_path->GetEntryLabel()); break; } + + case TypeCheckKind::kBitstringCheck: { + // /* HeapReference<Class> */ temp = obj->klass_ + GenerateReferenceLoadTwoRegisters(instruction, + out_loc, + obj_loc, + class_offset, + maybe_temp_loc, + kWithoutReadBarrier); + + GenerateBitstringTypeCheckCompare(instruction, out, DontCare); + // If `out` is a low reg and we would have another low reg temp, we could + // optimize this as RSBS+ADC, see GenerateConditionWithZero(). + // + // Also, in some cases when `out` is a low reg and we're loading a constant to IP + // it would make sense to use CMP+MOV+IT+MOV instead of SUB+CLZ+LSR as the code size + // would be the same and we would have fewer direct data dependencies. + codegen_->GenerateConditionWithZero(kCondEQ, out, out); // CLZ+LSR + break; + } } if (done.IsReferenced()) { @@ -7714,7 +7677,13 @@ void LocationsBuilderARMVIXL::VisitCheckCast(HCheckCast* instruction) { LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind); locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); + if (type_check_kind == TypeCheckKind::kBitstringCheck) { + locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); + locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); + locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); + } else { + locations->SetInAt(1, Location::RequiresRegister()); + } locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind)); } @@ -7723,7 +7692,9 @@ void InstructionCodeGeneratorARMVIXL::VisitCheckCast(HCheckCast* instruction) { LocationSummary* locations = instruction->GetLocations(); Location obj_loc = locations->InAt(0); vixl32::Register obj = InputRegisterAt(instruction, 0); - vixl32::Register cls = InputRegisterAt(instruction, 1); + vixl32::Register cls = (type_check_kind == TypeCheckKind::kBitstringCheck) + ? vixl32::Register() + : InputRegisterAt(instruction, 1); Location temp_loc = locations->GetTemp(0); vixl32::Register temp = RegisterFrom(temp_loc); const size_t num_temps = NumberOfCheckCastTemps(type_check_kind); @@ -7749,7 +7720,7 @@ void InstructionCodeGeneratorARMVIXL::VisitCheckCast(HCheckCast* instruction) { vixl32::Label* final_label = codegen_->GetFinalLabel(instruction, &done); // Avoid null check if we know obj is not null. if (instruction->MustDoNullCheck()) { - __ CompareAndBranchIfZero(obj, final_label, /* far_target */ false); + __ CompareAndBranchIfZero(obj, final_label, /* is_far_target= */ false); } switch (type_check_kind) { @@ -7796,7 +7767,7 @@ void InstructionCodeGeneratorARMVIXL::VisitCheckCast(HCheckCast* instruction) { // Otherwise, compare the classes. __ Cmp(temp, cls); - __ B(ne, &loop, /* far_target */ false); + __ B(ne, &loop, /* is_far_target= */ false); break; } @@ -7813,7 +7784,7 @@ void InstructionCodeGeneratorARMVIXL::VisitCheckCast(HCheckCast* instruction) { vixl32::Label loop; __ Bind(&loop); __ Cmp(temp, cls); - __ B(eq, final_label, /* far_target */ false); + __ B(eq, final_label, /* is_far_target= */ false); // /* HeapReference<Class> */ temp = temp->super_class_ GenerateReferenceLoadOneRegister(instruction, @@ -7841,7 +7812,7 @@ void InstructionCodeGeneratorARMVIXL::VisitCheckCast(HCheckCast* instruction) { // Do an exact check. __ Cmp(temp, cls); - __ B(eq, final_label, /* far_target */ false); + __ B(eq, final_label, /* is_far_target= */ false); // Otherwise, we need to check that the object's class is a non-primitive array. // /* HeapReference<Class> */ temp = temp->component_type_ @@ -7905,7 +7876,21 @@ void InstructionCodeGeneratorARMVIXL::VisitCheckCast(HCheckCast* instruction) { __ Sub(RegisterFrom(maybe_temp2_loc), RegisterFrom(maybe_temp2_loc), 2); // Compare the classes and continue the loop if they do not match. __ Cmp(cls, RegisterFrom(maybe_temp3_loc)); - __ B(ne, &start_loop, /* far_target */ false); + __ B(ne, &start_loop, /* is_far_target= */ false); + break; + } + + case TypeCheckKind::kBitstringCheck: { + // /* HeapReference<Class> */ temp = obj->klass_ + GenerateReferenceLoadTwoRegisters(instruction, + temp_loc, + obj_loc, + class_offset, + maybe_temp2_loc, + kWithoutReadBarrier); + + GenerateBitstringTypeCheckCompare(instruction, temp, SetFlags); + __ B(ne, type_check_slow_path->GetEntryLabel()); break; } } @@ -7932,7 +7917,7 @@ void InstructionCodeGeneratorARMVIXL::VisitMonitorOperation(HMonitorOperation* i } else { CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>(); } - codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 17); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 18); } void LocationsBuilderARMVIXL::VisitAnd(HAnd* instruction) { @@ -8287,7 +8272,7 @@ void InstructionCodeGeneratorARMVIXL::GenerateReferenceLoadOneRegister( // Load with fast path based Baker's read barrier. // /* HeapReference<Object> */ out = *(out + offset) codegen_->GenerateFieldLoadWithBakerReadBarrier( - instruction, out, out_reg, offset, maybe_temp, /* needs_null_check */ false); + instruction, out, out_reg, offset, maybe_temp, /* needs_null_check= */ false); } else { // Load with slow path based read barrier. // Save the value of `out` into `maybe_temp` before overwriting it @@ -8322,7 +8307,7 @@ void InstructionCodeGeneratorARMVIXL::GenerateReferenceLoadTwoRegisters( // Load with fast path based Baker's read barrier. // /* HeapReference<Object> */ out = *(obj + offset) codegen_->GenerateFieldLoadWithBakerReadBarrier( - instruction, out, obj_reg, offset, maybe_temp, /* needs_null_check */ false); + instruction, out, obj_reg, offset, maybe_temp, /* needs_null_check= */ false); } else { // Load with slow path based read barrier. // /* HeapReference<Object> */ out = *(obj + offset) @@ -8337,7 +8322,7 @@ void InstructionCodeGeneratorARMVIXL::GenerateReferenceLoadTwoRegisters( } } -void InstructionCodeGeneratorARMVIXL::GenerateGcRootFieldLoad( +void CodeGeneratorARMVIXL::GenerateGcRootFieldLoad( HInstruction* instruction, Location root, vixl32::Register obj, @@ -8349,81 +8334,52 @@ void InstructionCodeGeneratorARMVIXL::GenerateGcRootFieldLoad( if (kUseBakerReadBarrier) { // Fast path implementation of art::ReadBarrier::BarrierForRoot when // Baker's read barrier are used. - if (kBakerReadBarrierLinkTimeThunksEnableForGcRoots && - !Runtime::Current()->UseJitCompilation()) { - // Query `art::Thread::Current()->GetIsGcMarking()` (stored in - // the Marking Register) to decide whether we need to enter - // the slow path to mark the GC root. - // - // We use link-time generated thunks for the slow path. That thunk - // checks the reference and jumps to the entrypoint if needed. - // - // lr = &return_address; - // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load. - // if (mr) { // Thread::Current()->GetIsGcMarking() - // goto gc_root_thunk<root_reg>(lr) - // } - // return_address: - UseScratchRegisterScope temps(GetVIXLAssembler()); - ExcludeIPAndBakerCcEntrypointRegister(&temps, instruction); - bool narrow = CanEmitNarrowLdr(root_reg, obj, offset); - uint32_t custom_data = linker::Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData( - root_reg.GetCode(), narrow); - vixl32::Label* bne_label = codegen_->NewBakerReadBarrierPatch(custom_data); - - vixl::EmissionCheckScope guard(GetVIXLAssembler(), 4 * vixl32::kMaxInstructionSizeInBytes); - vixl32::Label return_address; - EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address); - __ cmp(mr, Operand(0)); - // Currently the offset is always within range. If that changes, - // we shall have to split the load the same way as for fields. - DCHECK_LT(offset, kReferenceLoadMinFarOffset); - ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset(); - __ ldr(EncodingSize(narrow ? Narrow : Wide), root_reg, MemOperand(obj, offset)); - EmitPlaceholderBne(codegen_, bne_label); - __ Bind(&return_address); - DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(), - narrow ? BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_OFFSET - : BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_OFFSET); - } else { - // Query `art::Thread::Current()->GetIsGcMarking()` (stored in - // the Marking Register) to decide whether we need to enter - // the slow path to mark the GC root. - // - // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load. - // if (mr) { // Thread::Current()->GetIsGcMarking() - // // Slow path. - // entrypoint = Thread::Current()->pReadBarrierMarkReg ## root.reg() - // root = entrypoint(root); // root = ReadBarrier::Mark(root); // Entry point call. - // } - - // Slow path marking the GC root `root`. The entrypoint will - // be loaded by the slow path code. - SlowPathCodeARMVIXL* slow_path = - new (codegen_->GetScopedAllocator()) ReadBarrierMarkSlowPathARMVIXL(instruction, root); - codegen_->AddSlowPath(slow_path); + // Query `art::Thread::Current()->GetIsGcMarking()` (stored in + // the Marking Register) to decide whether we need to enter + // the slow path to mark the GC root. + // + // We use shared thunks for the slow path; shared within the method + // for JIT, across methods for AOT. That thunk checks the reference + // and jumps to the entrypoint if needed. + // + // lr = &return_address; + // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load. + // if (mr) { // Thread::Current()->GetIsGcMarking() + // goto gc_root_thunk<root_reg>(lr) + // } + // return_address: - // /* GcRoot<mirror::Object> */ root = *(obj + offset) - GetAssembler()->LoadFromOffset(kLoadWord, root_reg, obj, offset); - static_assert( - sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>), - "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> " - "have different sizes."); - static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t), - "art::mirror::CompressedReference<mirror::Object> and int32_t " - "have different sizes."); - - __ CompareAndBranchIfNonZero(mr, slow_path->GetEntryLabel()); - __ Bind(slow_path->GetExitLabel()); - } + UseScratchRegisterScope temps(GetVIXLAssembler()); + temps.Exclude(ip); + bool narrow = CanEmitNarrowLdr(root_reg, obj, offset); + uint32_t custom_data = EncodeBakerReadBarrierGcRootData(root_reg.GetCode(), narrow); + + size_t narrow_instructions = /* CMP */ (mr.IsLow() ? 1u : 0u) + /* LDR */ (narrow ? 1u : 0u); + size_t wide_instructions = /* ADR+CMP+LDR+BNE */ 4u - narrow_instructions; + size_t exact_size = wide_instructions * vixl32::k32BitT32InstructionSizeInBytes + + narrow_instructions * vixl32::k16BitT32InstructionSizeInBytes; + ExactAssemblyScope guard(GetVIXLAssembler(), exact_size); + vixl32::Label return_address; + EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address); + __ cmp(mr, Operand(0)); + // Currently the offset is always within range. If that changes, + // we shall have to split the load the same way as for fields. + DCHECK_LT(offset, kReferenceLoadMinFarOffset); + ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset(); + __ ldr(EncodingSize(narrow ? Narrow : Wide), root_reg, MemOperand(obj, offset)); + EmitBakerReadBarrierBne(custom_data); + __ bind(&return_address); + DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(), + narrow ? BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_OFFSET + : BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_OFFSET); } else { // GC root loaded through a slow path for read barriers other // than Baker's. // /* GcRoot<mirror::Object>* */ root = obj + offset __ Add(root_reg, obj, offset); // /* mirror::Object* */ root = root->Read() - codegen_->GenerateReadBarrierForRootSlow(instruction, root, root); + GenerateReadBarrierForRootSlow(instruction, root, root); } } else { // Plain GC root load with no read barrier. @@ -8432,112 +8388,129 @@ void InstructionCodeGeneratorARMVIXL::GenerateGcRootFieldLoad( // Note that GC roots are not affected by heap poisoning, thus we // do not have to unpoison `root_reg` here. } - codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 18); + MaybeGenerateMarkingRegisterCheck(/* code= */ 19); } -void CodeGeneratorARMVIXL::MaybeAddBakerCcEntrypointTempForFields(LocationSummary* locations) { +void CodeGeneratorARMVIXL::GenerateUnsafeCasOldValueAddWithBakerReadBarrier( + vixl::aarch32::Register old_value, + vixl::aarch32::Register adjusted_old_value, + vixl::aarch32::Register expected) { DCHECK(kEmitCompilerReadBarrier); DCHECK(kUseBakerReadBarrier); - if (kBakerReadBarrierLinkTimeThunksEnableForFields) { - if (!Runtime::Current()->UseJitCompilation()) { - locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister.GetCode())); - } - } + + // Similar to the Baker RB path in GenerateGcRootFieldLoad(), with an ADD instead of LDR. + uint32_t custom_data = EncodeBakerReadBarrierUnsafeCasData(old_value.GetCode()); + + size_t narrow_instructions = /* CMP */ (mr.IsLow() ? 1u : 0u); + size_t wide_instructions = /* ADR+CMP+ADD+BNE */ 4u - narrow_instructions; + size_t exact_size = wide_instructions * vixl32::k32BitT32InstructionSizeInBytes + + narrow_instructions * vixl32::k16BitT32InstructionSizeInBytes; + ExactAssemblyScope guard(GetVIXLAssembler(), exact_size); + vixl32::Label return_address; + EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address); + __ cmp(mr, Operand(0)); + ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset(); + __ add(EncodingSize(Wide), old_value, adjusted_old_value, Operand(expected)); // Preserves flags. + EmitBakerReadBarrierBne(custom_data); + __ bind(&return_address); + DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(), + BAKER_MARK_INTROSPECTION_UNSAFE_CAS_ADD_OFFSET); } void CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, Location ref, vixl32::Register obj, - uint32_t offset, - Location temp, + const vixl32::MemOperand& src, bool needs_null_check) { DCHECK(kEmitCompilerReadBarrier); DCHECK(kUseBakerReadBarrier); - if (kBakerReadBarrierLinkTimeThunksEnableForFields && - !Runtime::Current()->UseJitCompilation()) { - // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the - // Marking Register) to decide whether we need to enter the slow - // path to mark the reference. Then, in the slow path, check the - // gray bit in the lock word of the reference's holder (`obj`) to - // decide whether to mark `ref` or not. - // - // We use link-time generated thunks for the slow path. That thunk checks - // the holder and jumps to the entrypoint if needed. If the holder is not - // gray, it creates a fake dependency and returns to the LDR instruction. - // - // lr = &gray_return_address; - // if (mr) { // Thread::Current()->GetIsGcMarking() - // goto field_thunk<holder_reg, base_reg>(lr) - // } - // not_gray_return_address: - // // Original reference load. If the offset is too large to fit - // // into LDR, we use an adjusted base register here. - // HeapReference<mirror::Object> reference = *(obj+offset); - // gray_return_address: - - DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>)); - vixl32::Register ref_reg = RegisterFrom(ref, DataType::Type::kReference); - bool narrow = CanEmitNarrowLdr(ref_reg, obj, offset); - vixl32::Register base = obj; - if (offset >= kReferenceLoadMinFarOffset) { - base = RegisterFrom(temp); - DCHECK(!base.Is(kBakerCcEntrypointRegister)); - static_assert(IsPowerOfTwo(kReferenceLoadMinFarOffset), "Expecting a power of 2."); - __ Add(base, obj, Operand(offset & ~(kReferenceLoadMinFarOffset - 1u))); - offset &= (kReferenceLoadMinFarOffset - 1u); - // Use narrow LDR only for small offsets. Generating narrow encoding LDR for the large - // offsets with `(offset & (kReferenceLoadMinFarOffset - 1u)) < 32u` would most likely - // increase the overall code size when taking the generated thunks into account. - DCHECK(!narrow); - } - UseScratchRegisterScope temps(GetVIXLAssembler()); - ExcludeIPAndBakerCcEntrypointRegister(&temps, instruction); - uint32_t custom_data = linker::Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData( - base.GetCode(), obj.GetCode(), narrow); - vixl32::Label* bne_label = NewBakerReadBarrierPatch(custom_data); + // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the + // Marking Register) to decide whether we need to enter the slow + // path to mark the reference. Then, in the slow path, check the + // gray bit in the lock word of the reference's holder (`obj`) to + // decide whether to mark `ref` or not. + // + // We use shared thunks for the slow path; shared within the method + // for JIT, across methods for AOT. That thunk checks the holder + // and jumps to the entrypoint if needed. If the holder is not gray, + // it creates a fake dependency and returns to the LDR instruction. + // + // lr = &gray_return_address; + // if (mr) { // Thread::Current()->GetIsGcMarking() + // goto field_thunk<holder_reg, base_reg>(lr) + // } + // not_gray_return_address: + // // Original reference load. If the offset is too large to fit + // // into LDR, we use an adjusted base register here. + // HeapReference<mirror::Object> reference = *(obj+offset); + // gray_return_address: - { - vixl::EmissionCheckScope guard( - GetVIXLAssembler(), - (kPoisonHeapReferences ? 5u : 4u) * vixl32::kMaxInstructionSizeInBytes); - vixl32::Label return_address; - EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address); - __ cmp(mr, Operand(0)); - EmitPlaceholderBne(this, bne_label); - ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset(); - __ ldr(EncodingSize(narrow ? Narrow : Wide), ref_reg, MemOperand(base, offset)); - if (needs_null_check) { - MaybeRecordImplicitNullCheck(instruction); - } - // Note: We need a specific width for the unpoisoning NEG. - if (kPoisonHeapReferences) { - if (narrow) { - // The only 16-bit encoding is T1 which sets flags outside IT block (i.e. RSBS, not RSB). - __ rsbs(EncodingSize(Narrow), ref_reg, ref_reg, Operand(0)); - } else { - __ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0)); - } + DCHECK(src.GetAddrMode() == vixl32::Offset); + DCHECK_ALIGNED(src.GetOffsetImmediate(), sizeof(mirror::HeapReference<mirror::Object>)); + vixl32::Register ref_reg = RegisterFrom(ref, DataType::Type::kReference); + bool narrow = CanEmitNarrowLdr(ref_reg, src.GetBaseRegister(), src.GetOffsetImmediate()); + + UseScratchRegisterScope temps(GetVIXLAssembler()); + temps.Exclude(ip); + uint32_t custom_data = + EncodeBakerReadBarrierFieldData(src.GetBaseRegister().GetCode(), obj.GetCode(), narrow); + + { + size_t narrow_instructions = + /* CMP */ (mr.IsLow() ? 1u : 0u) + + /* LDR+unpoison? */ (narrow ? (kPoisonHeapReferences ? 2u : 1u) : 0u); + size_t wide_instructions = + /* ADR+CMP+LDR+BNE+unpoison? */ (kPoisonHeapReferences ? 5u : 4u) - narrow_instructions; + size_t exact_size = wide_instructions * vixl32::k32BitT32InstructionSizeInBytes + + narrow_instructions * vixl32::k16BitT32InstructionSizeInBytes; + ExactAssemblyScope guard(GetVIXLAssembler(), exact_size); + vixl32::Label return_address; + EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address); + __ cmp(mr, Operand(0)); + EmitBakerReadBarrierBne(custom_data); + ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset(); + __ ldr(EncodingSize(narrow ? Narrow : Wide), ref_reg, src); + if (needs_null_check) { + MaybeRecordImplicitNullCheck(instruction); + } + // Note: We need a specific width for the unpoisoning NEG. + if (kPoisonHeapReferences) { + if (narrow) { + // The only 16-bit encoding is T1 which sets flags outside IT block (i.e. RSBS, not RSB). + __ rsbs(EncodingSize(Narrow), ref_reg, ref_reg, Operand(0)); + } else { + __ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0)); } - __ Bind(&return_address); - DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(), - narrow ? BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET - : BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET); } - MaybeGenerateMarkingRegisterCheck(/* code */ 19, /* temp_loc */ LocationFrom(ip)); - return; + __ bind(&return_address); + DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(), + narrow ? BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET + : BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET); } - - // /* HeapReference<Object> */ ref = *(obj + offset) - Location no_index = Location::NoLocation(); - ScaleFactor no_scale_factor = TIMES_1; - GenerateReferenceLoadWithBakerReadBarrier( - instruction, ref, obj, offset, no_index, no_scale_factor, temp, needs_null_check); + MaybeGenerateMarkingRegisterCheck(/* code= */ 20, /* temp_loc= */ LocationFrom(ip)); } -void CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction, +void CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, Location ref, vixl32::Register obj, + uint32_t offset, + Location temp, + bool needs_null_check) { + DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>)); + vixl32::Register base = obj; + if (offset >= kReferenceLoadMinFarOffset) { + base = RegisterFrom(temp); + static_assert(IsPowerOfTwo(kReferenceLoadMinFarOffset), "Expecting a power of 2."); + __ Add(base, obj, Operand(offset & ~(kReferenceLoadMinFarOffset - 1u))); + offset &= (kReferenceLoadMinFarOffset - 1u); + } + GenerateFieldLoadWithBakerReadBarrier( + instruction, ref, obj, MemOperand(base, offset), needs_null_check); +} + +void CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier(Location ref, + vixl32::Register obj, uint32_t data_offset, Location index, Location temp, @@ -8550,229 +8523,60 @@ void CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier(HInstruction* i "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); ScaleFactor scale_factor = TIMES_4; - if (kBakerReadBarrierLinkTimeThunksEnableForArrays && - !Runtime::Current()->UseJitCompilation()) { - // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the - // Marking Register) to decide whether we need to enter the slow - // path to mark the reference. Then, in the slow path, check the - // gray bit in the lock word of the reference's holder (`obj`) to - // decide whether to mark `ref` or not. - // - // We use link-time generated thunks for the slow path. That thunk checks - // the holder and jumps to the entrypoint if needed. If the holder is not - // gray, it creates a fake dependency and returns to the LDR instruction. - // - // lr = &gray_return_address; - // if (mr) { // Thread::Current()->GetIsGcMarking() - // goto array_thunk<base_reg>(lr) - // } - // not_gray_return_address: - // // Original reference load. If the offset is too large to fit - // // into LDR, we use an adjusted base register here. - // HeapReference<mirror::Object> reference = data[index]; - // gray_return_address: - - DCHECK(index.IsValid()); - vixl32::Register index_reg = RegisterFrom(index, DataType::Type::kInt32); - vixl32::Register ref_reg = RegisterFrom(ref, DataType::Type::kReference); - vixl32::Register data_reg = RegisterFrom(temp, DataType::Type::kInt32); // Raw pointer. - DCHECK(!data_reg.Is(kBakerCcEntrypointRegister)); - - UseScratchRegisterScope temps(GetVIXLAssembler()); - ExcludeIPAndBakerCcEntrypointRegister(&temps, instruction); - uint32_t custom_data = - linker::Thumb2RelativePatcher::EncodeBakerReadBarrierArrayData(data_reg.GetCode()); - vixl32::Label* bne_label = NewBakerReadBarrierPatch(custom_data); - - __ Add(data_reg, obj, Operand(data_offset)); - { - vixl::EmissionCheckScope guard( - GetVIXLAssembler(), - (kPoisonHeapReferences ? 5u : 4u) * vixl32::kMaxInstructionSizeInBytes); - vixl32::Label return_address; - EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address); - __ cmp(mr, Operand(0)); - EmitPlaceholderBne(this, bne_label); - ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset(); - __ ldr(ref_reg, MemOperand(data_reg, index_reg, vixl32::LSL, scale_factor)); - DCHECK(!needs_null_check); // The thunk cannot handle the null check. - // Note: We need a Wide NEG for the unpoisoning. - if (kPoisonHeapReferences) { - __ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0)); - } - __ Bind(&return_address); - DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(), - BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET); - } - MaybeGenerateMarkingRegisterCheck(/* code */ 20, /* temp_loc */ LocationFrom(ip)); - return; - } - - // /* HeapReference<Object> */ ref = - // *(obj + data_offset + index * sizeof(HeapReference<Object>)) - GenerateReferenceLoadWithBakerReadBarrier( - instruction, ref, obj, data_offset, index, scale_factor, temp, needs_null_check); -} - -void CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, - Location ref, - vixl32::Register obj, - uint32_t offset, - Location index, - ScaleFactor scale_factor, - Location temp, - bool needs_null_check) { - DCHECK(kEmitCompilerReadBarrier); - DCHECK(kUseBakerReadBarrier); - // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the // Marking Register) to decide whether we need to enter the slow // path to mark the reference. Then, in the slow path, check the // gray bit in the lock word of the reference's holder (`obj`) to // decide whether to mark `ref` or not. // - // if (mr) { // Thread::Current()->GetIsGcMarking() - // // Slow path. - // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); - // lfence; // Load fence or artificial data dependency to prevent load-load reordering - // HeapReference<mirror::Object> ref = *src; // Original reference load. - // bool is_gray = (rb_state == ReadBarrier::GrayState()); - // if (is_gray) { - // entrypoint = Thread::Current()->pReadBarrierMarkReg ## root.reg() - // ref = entrypoint(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call. - // } - // } else { - // HeapReference<mirror::Object> ref = *src; // Original reference load. - // } - - vixl32::Register temp_reg = RegisterFrom(temp); - - // Slow path marking the object `ref` when the GC is marking. The - // entrypoint will be loaded by the slow path code. - SlowPathCodeARMVIXL* slow_path = - new (GetScopedAllocator()) LoadReferenceWithBakerReadBarrierSlowPathARMVIXL( - instruction, ref, obj, offset, index, scale_factor, needs_null_check, temp_reg); - AddSlowPath(slow_path); - - __ CompareAndBranchIfNonZero(mr, slow_path->GetEntryLabel()); - // Fast path: the GC is not marking: just load the reference. - GenerateRawReferenceLoad(instruction, ref, obj, offset, index, scale_factor, needs_null_check); - __ Bind(slow_path->GetExitLabel()); - MaybeGenerateMarkingRegisterCheck(/* code */ 21); -} - -void CodeGeneratorARMVIXL::UpdateReferenceFieldWithBakerReadBarrier(HInstruction* instruction, - Location ref, - vixl32::Register obj, - Location field_offset, - Location temp, - bool needs_null_check, - vixl32::Register temp2) { - DCHECK(kEmitCompilerReadBarrier); - DCHECK(kUseBakerReadBarrier); - - // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the - // Marking Register) to decide whether we need to enter the slow - // path to update the reference field within `obj`. Then, in the - // slow path, check the gray bit in the lock word of the reference's - // holder (`obj`) to decide whether to mark `ref` and update the - // field or not. + // We use shared thunks for the slow path; shared within the method + // for JIT, across methods for AOT. That thunk checks the holder + // and jumps to the entrypoint if needed. If the holder is not gray, + // it creates a fake dependency and returns to the LDR instruction. // - // if (mr) { // Thread::Current()->GetIsGcMarking() - // // Slow path. - // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); - // lfence; // Load fence or artificial data dependency to prevent load-load reordering - // HeapReference<mirror::Object> ref = *(obj + field_offset); // Reference load. - // bool is_gray = (rb_state == ReadBarrier::GrayState()); - // if (is_gray) { - // old_ref = ref; - // entrypoint = Thread::Current()->pReadBarrierMarkReg ## root.reg() - // ref = entrypoint(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call. - // compareAndSwapObject(obj, field_offset, old_ref, ref); + // lr = &gray_return_address; + // if (mr) { // Thread::Current()->GetIsGcMarking() + // goto array_thunk<base_reg>(lr) // } - // } + // not_gray_return_address: + // // Original reference load. If the offset is too large to fit + // // into LDR, we use an adjusted base register here. + // HeapReference<mirror::Object> reference = data[index]; + // gray_return_address: - vixl32::Register temp_reg = RegisterFrom(temp); + DCHECK(index.IsValid()); + vixl32::Register index_reg = RegisterFrom(index, DataType::Type::kInt32); + vixl32::Register ref_reg = RegisterFrom(ref, DataType::Type::kReference); + vixl32::Register data_reg = RegisterFrom(temp, DataType::Type::kInt32); // Raw pointer. - // Slow path updating the object reference at address `obj + field_offset` - // when the GC is marking. The entrypoint will be loaded by the slow path code. - SlowPathCodeARMVIXL* slow_path = - new (GetScopedAllocator()) LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL( - instruction, - ref, - obj, - /* offset */ 0u, - /* index */ field_offset, - /* scale_factor */ ScaleFactor::TIMES_1, - needs_null_check, - temp_reg, - temp2); - AddSlowPath(slow_path); + UseScratchRegisterScope temps(GetVIXLAssembler()); + temps.Exclude(ip); + uint32_t custom_data = EncodeBakerReadBarrierArrayData(data_reg.GetCode()); - __ CompareAndBranchIfNonZero(mr, slow_path->GetEntryLabel()); - // Fast path: the GC is not marking: nothing to do (the field is - // up-to-date, and we don't need to load the reference). - __ Bind(slow_path->GetExitLabel()); - MaybeGenerateMarkingRegisterCheck(/* code */ 22); -} - -void CodeGeneratorARMVIXL::GenerateRawReferenceLoad(HInstruction* instruction, - Location ref, - vixl::aarch32::Register obj, - uint32_t offset, - Location index, - ScaleFactor scale_factor, - bool needs_null_check) { - DataType::Type type = DataType::Type::kReference; - vixl32::Register ref_reg = RegisterFrom(ref, type); - - // If needed, vixl::EmissionCheckScope guards are used to ensure - // that no pools are emitted between the load (macro) instruction - // and MaybeRecordImplicitNullCheck. - - if (index.IsValid()) { - // Load types involving an "index": ArrayGet, - // UnsafeGetObject/UnsafeGetObjectVolatile and UnsafeCASObject - // intrinsics. - // /* HeapReference<mirror::Object> */ ref = *(obj + offset + (index << scale_factor)) - if (index.IsConstant()) { - size_t computed_offset = - (Int32ConstantFrom(index) << scale_factor) + offset; - vixl::EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); - GetAssembler()->LoadFromOffset(kLoadWord, ref_reg, obj, computed_offset); - if (needs_null_check) { - MaybeRecordImplicitNullCheck(instruction); - } - } else { - // Handle the special case of the - // UnsafeGetObject/UnsafeGetObjectVolatile and UnsafeCASObject - // intrinsics, which use a register pair as index ("long - // offset"), of which only the low part contains data. - vixl32::Register index_reg = index.IsRegisterPair() - ? LowRegisterFrom(index) - : RegisterFrom(index); - UseScratchRegisterScope temps(GetVIXLAssembler()); - vixl32::Register temp = temps.Acquire(); - __ Add(temp, obj, Operand(index_reg, ShiftType::LSL, scale_factor)); - { - vixl::EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); - GetAssembler()->LoadFromOffset(kLoadWord, ref_reg, temp, offset); - if (needs_null_check) { - MaybeRecordImplicitNullCheck(instruction); - } - } - } - } else { - // /* HeapReference<mirror::Object> */ ref = *(obj + offset) - vixl::EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); - GetAssembler()->LoadFromOffset(kLoadWord, ref_reg, obj, offset); - if (needs_null_check) { - MaybeRecordImplicitNullCheck(instruction); + __ Add(data_reg, obj, Operand(data_offset)); + { + size_t narrow_instructions = /* CMP */ (mr.IsLow() ? 1u : 0u); + size_t wide_instructions = + /* ADR+CMP+BNE+LDR+unpoison? */ (kPoisonHeapReferences ? 5u : 4u) - narrow_instructions; + size_t exact_size = wide_instructions * vixl32::k32BitT32InstructionSizeInBytes + + narrow_instructions * vixl32::k16BitT32InstructionSizeInBytes; + ExactAssemblyScope guard(GetVIXLAssembler(), exact_size); + vixl32::Label return_address; + EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address); + __ cmp(mr, Operand(0)); + EmitBakerReadBarrierBne(custom_data); + ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset(); + __ ldr(ref_reg, MemOperand(data_reg, index_reg, vixl32::LSL, scale_factor)); + DCHECK(!needs_null_check); // The thunk cannot handle the null check. + // Note: We need a Wide NEG for the unpoisoning. + if (kPoisonHeapReferences) { + __ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0)); } + __ bind(&return_address); + DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(), + BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET); } - - // Object* ref = ref_addr->AsMirrorPtr() - GetAssembler()->MaybeUnpoisonHeapReference(ref_reg); + MaybeGenerateMarkingRegisterCheck(/* code= */ 21, /* temp_loc= */ LocationFrom(ip)); } void CodeGeneratorARMVIXL::MaybeGenerateMarkingRegisterCheck(int code, Location temp_loc) { @@ -8855,7 +8659,7 @@ void CodeGeneratorARMVIXL::GenerateReadBarrierForRootSlow(HInstruction* instruct // otherwise return a fall-back info that should be used instead. HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARMVIXL::GetSupportedInvokeStaticOrDirectDispatch( const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, - HInvokeStaticOrDirect* invoke ATTRIBUTE_UNUSED) { + ArtMethod* method ATTRIBUTE_UNUSED) { return desired_dispatch_info; } @@ -8905,9 +8709,14 @@ void CodeGeneratorARMVIXL::GenerateStaticOrDirectCall( EmitMovwMovtPlaceholder(labels, temp_reg); break; } - case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: - __ Mov(RegisterFrom(temp), Operand::From(invoke->GetMethodAddress())); + case HInvokeStaticOrDirect::MethodLoadKind::kBootImageRelRo: { + uint32_t boot_image_offset = GetBootImageOffset(invoke); + PcRelativePatchInfo* labels = NewBootImageRelRoPatch(boot_image_offset); + vixl32::Register temp_reg = RegisterFrom(temp); + EmitMovwMovtPlaceholder(labels, temp_reg); + GetAssembler()->LoadFromOffset(kLoadWord, temp_reg, temp_reg, /* offset*/ 0); break; + } case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: { PcRelativePatchInfo* labels = NewMethodBssEntryPatch( MethodReference(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex())); @@ -8916,6 +8725,9 @@ void CodeGeneratorARMVIXL::GenerateStaticOrDirectCall( GetAssembler()->LoadFromOffset(kLoadWord, temp_reg, temp_reg, /* offset*/ 0); break; } + case HInvokeStaticOrDirect::MethodLoadKind::kJitDirectAddress: + __ Mov(RegisterFrom(temp), Operand::From(invoke->GetMethodAddress())); + break; case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall: { GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path); return; // No code pointer retrieval; the runtime performs the call directly. @@ -9005,6 +8817,18 @@ void CodeGeneratorARMVIXL::GenerateVirtualCall( } } +CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewBootImageIntrinsicPatch( + uint32_t intrinsic_data) { + return NewPcRelativePatch(/* dex_file= */ nullptr, intrinsic_data, &boot_image_intrinsic_patches_); +} + +CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewBootImageRelRoPatch( + uint32_t boot_image_offset) { + return NewPcRelativePatch(/* dex_file= */ nullptr, + boot_image_offset, + &boot_image_method_patches_); +} + CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewBootImageMethodPatch( MethodReference target_method) { return NewPcRelativePatch( @@ -9043,13 +8867,24 @@ CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewPcRelativePa return &patches->back(); } -vixl::aarch32::Label* CodeGeneratorARMVIXL::NewBakerReadBarrierPatch(uint32_t custom_data) { - baker_read_barrier_patches_.emplace_back(custom_data); - return &baker_read_barrier_patches_.back().label; +void CodeGeneratorARMVIXL::EmitBakerReadBarrierBne(uint32_t custom_data) { + DCHECK(!__ AllowMacroInstructions()); // In ExactAssemblyScope. + if (Runtime::Current()->UseJitCompilation()) { + auto it = jit_baker_read_barrier_slow_paths_.FindOrAdd(custom_data); + vixl::aarch32::Label* slow_path_entry = &it->second.label; + __ b(ne, EncodingSize(Wide), slow_path_entry); + } else { + baker_read_barrier_patches_.emplace_back(custom_data); + vixl::aarch32::Label* patch_label = &baker_read_barrier_patches_.back().label; + __ bind(patch_label); + vixl32::Label placeholder_label; + __ b(ne, EncodingSize(Wide), &placeholder_label); // Placeholder, patched at link-time. + __ bind(&placeholder_label); + } } VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateBootImageAddressLiteral(uint32_t address) { - return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address), &uint32_literals_); + return DeduplicateUint32Literal(address, &uint32_literals_); } VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateJitStringLiteral( @@ -9060,7 +8895,7 @@ VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateJitStringLiteral( return jit_string_patches_.GetOrCreate( StringReference(&dex_file, string_index), [this]() { - return GetAssembler()->CreateLiteralDestroyedWithPool<uint32_t>(/* placeholder */ 0u); + return GetAssembler()->CreateLiteralDestroyedWithPool<uint32_t>(/* value= */ 0u); }); } @@ -9071,10 +8906,50 @@ VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateJitClassLiteral(const DexFil return jit_class_patches_.GetOrCreate( TypeReference(&dex_file, type_index), [this]() { - return GetAssembler()->CreateLiteralDestroyedWithPool<uint32_t>(/* placeholder */ 0u); + return GetAssembler()->CreateLiteralDestroyedWithPool<uint32_t>(/* value= */ 0u); }); } +void CodeGeneratorARMVIXL::LoadBootImageAddress(vixl32::Register reg, + uint32_t boot_image_reference) { + if (GetCompilerOptions().IsBootImage()) { + CodeGeneratorARMVIXL::PcRelativePatchInfo* labels = + NewBootImageIntrinsicPatch(boot_image_reference); + EmitMovwMovtPlaceholder(labels, reg); + } else if (GetCompilerOptions().GetCompilePic()) { + CodeGeneratorARMVIXL::PcRelativePatchInfo* labels = + NewBootImageRelRoPatch(boot_image_reference); + EmitMovwMovtPlaceholder(labels, reg); + __ Ldr(reg, MemOperand(reg, /* offset= */ 0)); + } else { + DCHECK(Runtime::Current()->UseJitCompilation()); + gc::Heap* heap = Runtime::Current()->GetHeap(); + DCHECK(!heap->GetBootImageSpaces().empty()); + uintptr_t address = + reinterpret_cast<uintptr_t>(heap->GetBootImageSpaces()[0]->Begin() + boot_image_reference); + __ Ldr(reg, DeduplicateBootImageAddressLiteral(dchecked_integral_cast<uint32_t>(address))); + } +} + +void CodeGeneratorARMVIXL::AllocateInstanceForIntrinsic(HInvokeStaticOrDirect* invoke, + uint32_t boot_image_offset) { + DCHECK(invoke->IsStatic()); + InvokeRuntimeCallingConventionARMVIXL calling_convention; + vixl32::Register argument = calling_convention.GetRegisterAt(0); + if (GetCompilerOptions().IsBootImage()) { + DCHECK_EQ(boot_image_offset, IntrinsicVisitor::IntegerValueOfInfo::kInvalidReference); + // Load the class the same way as for HLoadClass::LoadKind::kBootImageLinkTimePcRelative. + MethodReference target_method = invoke->GetTargetMethod(); + dex::TypeIndex type_idx = target_method.dex_file->GetMethodId(target_method.index).class_idx_; + PcRelativePatchInfo* labels = NewBootImageTypePatch(*target_method.dex_file, type_idx); + EmitMovwMovtPlaceholder(labels, argument); + } else { + LoadBootImageAddress(argument, boot_image_offset); + } + InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc()); + CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); +} + template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)> inline void CodeGeneratorARMVIXL::EmitPcRelativeLinkerPatches( const ArenaDeque<PcRelativePatchInfo>& infos, @@ -9095,6 +8970,15 @@ inline void CodeGeneratorARMVIXL::EmitPcRelativeLinkerPatches( } } +template <linker::LinkerPatch (*Factory)(size_t, uint32_t, uint32_t)> +linker::LinkerPatch NoDexFileAdapter(size_t literal_offset, + const DexFile* target_dex_file, + uint32_t pc_insn_offset, + uint32_t boot_image_offset) { + DCHECK(target_dex_file == nullptr); // Unused for these patches, should be null. + return Factory(literal_offset, pc_insn_offset, boot_image_offset); +} + void CodeGeneratorARMVIXL::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) { DCHECK(linker_patches->empty()); size_t size = @@ -9104,6 +8988,7 @@ void CodeGeneratorARMVIXL::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* l /* MOVW+MOVT for each entry */ 2u * type_bss_entry_patches_.size() + /* MOVW+MOVT for each entry */ 2u * boot_image_string_patches_.size() + /* MOVW+MOVT for each entry */ 2u * string_bss_entry_patches_.size() + + /* MOVW+MOVT for each entry */ 2u * boot_image_intrinsic_patches_.size() + baker_read_barrier_patches_.size(); linker_patches->reserve(size); if (GetCompilerOptions().IsBootImage()) { @@ -9113,12 +8998,14 @@ void CodeGeneratorARMVIXL::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* l boot_image_type_patches_, linker_patches); EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>( boot_image_string_patches_, linker_patches); + EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::IntrinsicReferencePatch>>( + boot_image_intrinsic_patches_, linker_patches); } else { - DCHECK(boot_image_method_patches_.empty()); - EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeClassTablePatch>( - boot_image_type_patches_, linker_patches); - EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringInternTablePatch>( - boot_image_string_patches_, linker_patches); + EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::DataBimgRelRoPatch>>( + boot_image_method_patches_, linker_patches); + DCHECK(boot_image_type_patches_.empty()); + DCHECK(boot_image_string_patches_.empty()); + DCHECK(boot_image_intrinsic_patches_.empty()); } EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>( method_bss_entry_patches_, linker_patches); @@ -9133,13 +9020,52 @@ void CodeGeneratorARMVIXL::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* l DCHECK_EQ(size, linker_patches->size()); } +bool CodeGeneratorARMVIXL::NeedsThunkCode(const linker::LinkerPatch& patch) const { + return patch.GetType() == linker::LinkerPatch::Type::kBakerReadBarrierBranch || + patch.GetType() == linker::LinkerPatch::Type::kCallRelative; +} + +void CodeGeneratorARMVIXL::EmitThunkCode(const linker::LinkerPatch& patch, + /*out*/ ArenaVector<uint8_t>* code, + /*out*/ std::string* debug_name) { + arm::ArmVIXLAssembler assembler(GetGraph()->GetAllocator()); + switch (patch.GetType()) { + case linker::LinkerPatch::Type::kCallRelative: + // The thunk just uses the entry point in the ArtMethod. This works even for calls + // to the generic JNI and interpreter trampolines. + assembler.LoadFromOffset( + arm::kLoadWord, + vixl32::pc, + vixl32::r0, + ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize).Int32Value()); + assembler.GetVIXLAssembler()->Bkpt(0); + if (GetCompilerOptions().GenerateAnyDebugInfo()) { + *debug_name = "MethodCallThunk"; + } + break; + case linker::LinkerPatch::Type::kBakerReadBarrierBranch: + DCHECK_EQ(patch.GetBakerCustomValue2(), 0u); + CompileBakerReadBarrierThunk(assembler, patch.GetBakerCustomValue1(), debug_name); + break; + default: + LOG(FATAL) << "Unexpected patch type " << patch.GetType(); + UNREACHABLE(); + } + + // Ensure we emit the literal pool if any. + assembler.FinalizeCode(); + code->resize(assembler.CodeSize()); + MemoryRegion code_region(code->data(), code->size()); + assembler.FinalizeInstructions(code_region); +} + VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateUint32Literal( uint32_t value, Uint32ToLiteralMap* map) { return map->GetOrCreate( value, [this, value]() { - return GetAssembler()->CreateLiteralDestroyedWithPool<uint32_t>(/* placeholder */ value); + return GetAssembler()->CreateLiteralDestroyedWithPool<uint32_t>(/* value= */ value); }); } @@ -9366,9 +9292,9 @@ void CodeGeneratorARMVIXL::EmitMovwMovtPlaceholder( CodeBufferCheckScope::kMaximumSize); // TODO(VIXL): Think about using mov instead of movw. __ bind(&labels->movw_label); - __ movw(out, /* placeholder */ 0u); + __ movw(out, /* operand= */ 0u); __ bind(&labels->movt_label); - __ movt(out, /* placeholder */ 0u); + __ movt(out, /* operand= */ 0u); __ bind(&labels->add_pc_label); __ add(out, out, pc); } @@ -9377,5 +9303,224 @@ void CodeGeneratorARMVIXL::EmitMovwMovtPlaceholder( #undef QUICK_ENTRY_POINT #undef TODO_VIXL32 +#define __ assembler.GetVIXLAssembler()-> + +static void EmitGrayCheckAndFastPath(ArmVIXLAssembler& assembler, + vixl32::Register base_reg, + vixl32::MemOperand& lock_word, + vixl32::Label* slow_path, + int32_t raw_ldr_offset, + vixl32::Label* throw_npe = nullptr) { + // Load the lock word containing the rb_state. + __ Ldr(ip, lock_word); + // Given the numeric representation, it's enough to check the low bit of the rb_state. + static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0"); + static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); + __ Tst(ip, Operand(LockWord::kReadBarrierStateMaskShifted)); + __ B(ne, slow_path, /* is_far_target= */ false); + // To throw NPE, we return to the fast path; the artificial dependence below does not matter. + if (throw_npe != nullptr) { + __ Bind(throw_npe); + } + __ Add(lr, lr, raw_ldr_offset); + // Introduce a dependency on the lock_word including rb_state, + // to prevent load-load reordering, and without using + // a memory barrier (which would be more expensive). + __ Add(base_reg, base_reg, Operand(ip, LSR, 32)); + __ Bx(lr); // And return back to the function. + // Note: The fake dependency is unnecessary for the slow path. +} + +// Load the read barrier introspection entrypoint in register `entrypoint` +static vixl32::Register LoadReadBarrierMarkIntrospectionEntrypoint(ArmVIXLAssembler& assembler) { + // The register where the read barrier introspection entrypoint is loaded + // is the marking register. We clobber it here and the entrypoint restores it to 1. + vixl32::Register entrypoint = mr; + // entrypoint = Thread::Current()->pReadBarrierMarkReg12, i.e. pReadBarrierMarkIntrospection. + DCHECK_EQ(ip.GetCode(), 12u); + const int32_t entry_point_offset = + Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ip.GetCode()); + __ Ldr(entrypoint, MemOperand(tr, entry_point_offset)); + return entrypoint; +} + +void CodeGeneratorARMVIXL::CompileBakerReadBarrierThunk(ArmVIXLAssembler& assembler, + uint32_t encoded_data, + /*out*/ std::string* debug_name) { + BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data); + switch (kind) { + case BakerReadBarrierKind::kField: { + vixl32::Register base_reg(BakerReadBarrierFirstRegField::Decode(encoded_data)); + CheckValidReg(base_reg.GetCode()); + vixl32::Register holder_reg(BakerReadBarrierSecondRegField::Decode(encoded_data)); + CheckValidReg(holder_reg.GetCode()); + BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data); + UseScratchRegisterScope temps(assembler.GetVIXLAssembler()); + temps.Exclude(ip); + // If base_reg differs from holder_reg, the offset was too large and we must have emitted + // an explicit null check before the load. Otherwise, for implicit null checks, we need to + // null-check the holder as we do not necessarily do that check before going to the thunk. + vixl32::Label throw_npe_label; + vixl32::Label* throw_npe = nullptr; + if (GetCompilerOptions().GetImplicitNullChecks() && holder_reg.Is(base_reg)) { + throw_npe = &throw_npe_label; + __ CompareAndBranchIfZero(holder_reg, throw_npe, /* is_far_target= */ false); + } + // Check if the holder is gray and, if not, add fake dependency to the base register + // and return to the LDR instruction to load the reference. Otherwise, use introspection + // to load the reference and call the entrypoint that performs further checks on the + // reference and marks it if needed. + vixl32::Label slow_path; + MemOperand lock_word(holder_reg, mirror::Object::MonitorOffset().Int32Value()); + const int32_t raw_ldr_offset = (width == BakerReadBarrierWidth::kWide) + ? BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET + : BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET; + EmitGrayCheckAndFastPath( + assembler, base_reg, lock_word, &slow_path, raw_ldr_offset, throw_npe); + __ Bind(&slow_path); + const int32_t ldr_offset = /* Thumb state adjustment (LR contains Thumb state). */ -1 + + raw_ldr_offset; + vixl32::Register ep_reg = LoadReadBarrierMarkIntrospectionEntrypoint(assembler); + if (width == BakerReadBarrierWidth::kWide) { + MemOperand ldr_half_address(lr, ldr_offset + 2); + __ Ldrh(ip, ldr_half_address); // Load the LDR immediate half-word with "Rt | imm12". + __ Ubfx(ip, ip, 0, 12); // Extract the offset imm12. + __ Ldr(ip, MemOperand(base_reg, ip)); // Load the reference. + } else { + MemOperand ldr_address(lr, ldr_offset); + __ Ldrh(ip, ldr_address); // Load the LDR immediate, encoding T1. + __ Add(ep_reg, // Adjust the entrypoint address to the entrypoint + ep_reg, // for narrow LDR. + Operand(BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_ENTRYPOINT_OFFSET)); + __ Ubfx(ip, ip, 6, 5); // Extract the imm5, i.e. offset / 4. + __ Ldr(ip, MemOperand(base_reg, ip, LSL, 2)); // Load the reference. + } + // Do not unpoison. With heap poisoning enabled, the entrypoint expects a poisoned reference. + __ Bx(ep_reg); // Jump to the entrypoint. + break; + } + case BakerReadBarrierKind::kArray: { + vixl32::Register base_reg(BakerReadBarrierFirstRegField::Decode(encoded_data)); + CheckValidReg(base_reg.GetCode()); + DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg, + BakerReadBarrierSecondRegField::Decode(encoded_data)); + DCHECK(BakerReadBarrierWidthField::Decode(encoded_data) == BakerReadBarrierWidth::kWide); + UseScratchRegisterScope temps(assembler.GetVIXLAssembler()); + temps.Exclude(ip); + vixl32::Label slow_path; + int32_t data_offset = + mirror::Array::DataOffset(Primitive::ComponentSize(Primitive::kPrimNot)).Int32Value(); + MemOperand lock_word(base_reg, mirror::Object::MonitorOffset().Int32Value() - data_offset); + DCHECK_LT(lock_word.GetOffsetImmediate(), 0); + const int32_t raw_ldr_offset = BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET; + EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path, raw_ldr_offset); + __ Bind(&slow_path); + const int32_t ldr_offset = /* Thumb state adjustment (LR contains Thumb state). */ -1 + + raw_ldr_offset; + MemOperand ldr_address(lr, ldr_offset + 2); + __ Ldrb(ip, ldr_address); // Load the LDR (register) byte with "00 | imm2 | Rm", + // i.e. Rm+32 because the scale in imm2 is 2. + vixl32::Register ep_reg = LoadReadBarrierMarkIntrospectionEntrypoint(assembler); + __ Bfi(ep_reg, ip, 3, 6); // Insert ip to the entrypoint address to create + // a switch case target based on the index register. + __ Mov(ip, base_reg); // Move the base register to ip0. + __ Bx(ep_reg); // Jump to the entrypoint's array switch case. + break; + } + case BakerReadBarrierKind::kGcRoot: + case BakerReadBarrierKind::kUnsafeCas: { + // Check if the reference needs to be marked and if so (i.e. not null, not marked yet + // and it does not have a forwarding address), call the correct introspection entrypoint; + // otherwise return the reference (or the extracted forwarding address). + // There is no gray bit check for GC roots. + vixl32::Register root_reg(BakerReadBarrierFirstRegField::Decode(encoded_data)); + CheckValidReg(root_reg.GetCode()); + DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg, + BakerReadBarrierSecondRegField::Decode(encoded_data)); + BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data); + UseScratchRegisterScope temps(assembler.GetVIXLAssembler()); + temps.Exclude(ip); + vixl32::Label return_label, not_marked, forwarding_address; + __ CompareAndBranchIfZero(root_reg, &return_label, /* is_far_target= */ false); + MemOperand lock_word(root_reg, mirror::Object::MonitorOffset().Int32Value()); + __ Ldr(ip, lock_word); + __ Tst(ip, LockWord::kMarkBitStateMaskShifted); + __ B(eq, ¬_marked); + __ Bind(&return_label); + __ Bx(lr); + __ Bind(¬_marked); + static_assert(LockWord::kStateShift == 30 && LockWord::kStateForwardingAddress == 3, + "To use 'CMP ip, #modified-immediate; BHS', we need the lock word state in " + " the highest bits and the 'forwarding address' state to have all bits set"); + __ Cmp(ip, Operand(0xc0000000)); + __ B(hs, &forwarding_address); + vixl32::Register ep_reg = LoadReadBarrierMarkIntrospectionEntrypoint(assembler); + // Adjust the art_quick_read_barrier_mark_introspection address in kBakerCcEntrypointRegister + // to one of art_quick_read_barrier_mark_introspection_{gc_roots_{wide,narrow},unsafe_cas}. + DCHECK(kind != BakerReadBarrierKind::kUnsafeCas || width == BakerReadBarrierWidth::kWide); + int32_t entrypoint_offset = + (kind == BakerReadBarrierKind::kGcRoot) + ? (width == BakerReadBarrierWidth::kWide) + ? BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_ENTRYPOINT_OFFSET + : BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_ENTRYPOINT_OFFSET + : BAKER_MARK_INTROSPECTION_UNSAFE_CAS_ENTRYPOINT_OFFSET; + __ Add(ep_reg, ep_reg, Operand(entrypoint_offset)); + __ Mov(ip, root_reg); + __ Bx(ep_reg); + __ Bind(&forwarding_address); + __ Lsl(root_reg, ip, LockWord::kForwardingAddressShift); + __ Bx(lr); + break; + } + default: + LOG(FATAL) << "Unexpected kind: " << static_cast<uint32_t>(kind); + UNREACHABLE(); + } + + // For JIT, the slow path is considered part of the compiled method, + // so JIT should pass null as `debug_name`. Tests may not have a runtime. + DCHECK(Runtime::Current() == nullptr || + !Runtime::Current()->UseJitCompilation() || + debug_name == nullptr); + if (debug_name != nullptr && GetCompilerOptions().GenerateAnyDebugInfo()) { + std::ostringstream oss; + oss << "BakerReadBarrierThunk"; + switch (kind) { + case BakerReadBarrierKind::kField: + oss << "Field"; + if (BakerReadBarrierWidthField::Decode(encoded_data) == BakerReadBarrierWidth::kWide) { + oss << "Wide"; + } + oss << "_r" << BakerReadBarrierFirstRegField::Decode(encoded_data) + << "_r" << BakerReadBarrierSecondRegField::Decode(encoded_data); + break; + case BakerReadBarrierKind::kArray: + oss << "Array_r" << BakerReadBarrierFirstRegField::Decode(encoded_data); + DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg, + BakerReadBarrierSecondRegField::Decode(encoded_data)); + DCHECK(BakerReadBarrierWidthField::Decode(encoded_data) == BakerReadBarrierWidth::kWide); + break; + case BakerReadBarrierKind::kGcRoot: + oss << "GcRoot"; + if (BakerReadBarrierWidthField::Decode(encoded_data) == BakerReadBarrierWidth::kWide) { + oss << "Wide"; + } + oss << "_r" << BakerReadBarrierFirstRegField::Decode(encoded_data); + DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg, + BakerReadBarrierSecondRegField::Decode(encoded_data)); + break; + case BakerReadBarrierKind::kUnsafeCas: + oss << "UnsafeCas_r" << BakerReadBarrierFirstRegField::Decode(encoded_data); + DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg, + BakerReadBarrierSecondRegField::Decode(encoded_data)); + DCHECK(BakerReadBarrierWidthField::Decode(encoded_data) == BakerReadBarrierWidth::kWide); + break; + } + *debug_name = oss.str(); + } +} + +#undef __ + } // namespace arm } // namespace art diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h index 536da41d07..5edca87147 100644 --- a/compiler/optimizing/code_generator_arm_vixl.h +++ b/compiler/optimizing/code_generator_arm_vixl.h @@ -36,6 +36,11 @@ #pragma GCC diagnostic pop namespace art { + +namespace linker { +class Thumb2RelativePatcherTest; +} // namespace linker + namespace arm { // This constant is used as an approximate margin when emission of veneer and literal pools @@ -173,9 +178,9 @@ class InvokeDexCallingConventionVisitorARMVIXL : public InvokeDexCallingConventi InvokeDexCallingConventionVisitorARMVIXL() {} virtual ~InvokeDexCallingConventionVisitorARMVIXL() {} - Location GetNextLocation(DataType::Type type) OVERRIDE; - Location GetReturnLocation(DataType::Type type) const OVERRIDE; - Location GetMethodLocation() const OVERRIDE; + Location GetNextLocation(DataType::Type type) override; + Location GetReturnLocation(DataType::Type type) const override; + Location GetMethodLocation() const override; private: InvokeDexCallingConventionARMVIXL calling_convention; @@ -188,25 +193,25 @@ class FieldAccessCallingConventionARMVIXL : public FieldAccessCallingConvention public: FieldAccessCallingConventionARMVIXL() {} - Location GetObjectLocation() const OVERRIDE { + Location GetObjectLocation() const override { return helpers::LocationFrom(vixl::aarch32::r1); } - Location GetFieldIndexLocation() const OVERRIDE { + Location GetFieldIndexLocation() const override { return helpers::LocationFrom(vixl::aarch32::r0); } - Location GetReturnLocation(DataType::Type type) const OVERRIDE { + Location GetReturnLocation(DataType::Type type) const override { return DataType::Is64BitType(type) ? helpers::LocationFrom(vixl::aarch32::r0, vixl::aarch32::r1) : helpers::LocationFrom(vixl::aarch32::r0); } - Location GetSetValueLocation(DataType::Type type, bool is_instance) const OVERRIDE { + Location GetSetValueLocation(DataType::Type type, bool is_instance) const override { return DataType::Is64BitType(type) ? helpers::LocationFrom(vixl::aarch32::r2, vixl::aarch32::r3) : (is_instance ? helpers::LocationFrom(vixl::aarch32::r2) : helpers::LocationFrom(vixl::aarch32::r1)); } - Location GetFpuLocation(DataType::Type type) const OVERRIDE { + Location GetFpuLocation(DataType::Type type) const override { return DataType::Is64BitType(type) ? helpers::LocationFrom(vixl::aarch32::s0, vixl::aarch32::s1) : helpers::LocationFrom(vixl::aarch32::s0); @@ -224,8 +229,8 @@ class SlowPathCodeARMVIXL : public SlowPathCode { vixl::aarch32::Label* GetEntryLabel() { return &entry_label_; } vixl::aarch32::Label* GetExitLabel() { return &exit_label_; } - void SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) OVERRIDE; - void RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) OVERRIDE; + void SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) override; + void RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) override; private: vixl::aarch32::Label entry_label_; @@ -239,10 +244,10 @@ class ParallelMoveResolverARMVIXL : public ParallelMoveResolverWithSwap { ParallelMoveResolverARMVIXL(ArenaAllocator* allocator, CodeGeneratorARMVIXL* codegen) : ParallelMoveResolverWithSwap(allocator), codegen_(codegen) {} - void EmitMove(size_t index) OVERRIDE; - void EmitSwap(size_t index) OVERRIDE; - void SpillScratch(int reg) OVERRIDE; - void RestoreScratch(int reg) OVERRIDE; + void EmitMove(size_t index) override; + void EmitSwap(size_t index) override; + void SpillScratch(int reg) override; + void RestoreScratch(int reg) override; ArmVIXLAssembler* GetAssembler() const; @@ -261,7 +266,7 @@ class LocationsBuilderARMVIXL : public HGraphVisitor { : HGraphVisitor(graph), codegen_(codegen) {} #define DECLARE_VISIT_INSTRUCTION(name, super) \ - void Visit##name(H##name* instr) OVERRIDE; + void Visit##name(H##name* instr) override; FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION) FOR_EACH_CONCRETE_INSTRUCTION_ARM(DECLARE_VISIT_INSTRUCTION) @@ -269,7 +274,7 @@ class LocationsBuilderARMVIXL : public HGraphVisitor { #undef DECLARE_VISIT_INSTRUCTION - void VisitInstruction(HInstruction* instruction) OVERRIDE { + void VisitInstruction(HInstruction* instruction) override { LOG(FATAL) << "Unreachable instruction " << instruction->DebugName() << " (id " << instruction->GetId() << ")"; } @@ -299,7 +304,7 @@ class InstructionCodeGeneratorARMVIXL : public InstructionCodeGenerator { InstructionCodeGeneratorARMVIXL(HGraph* graph, CodeGeneratorARMVIXL* codegen); #define DECLARE_VISIT_INSTRUCTION(name, super) \ - void Visit##name(H##name* instr) OVERRIDE; + void Visit##name(H##name* instr) override; FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION) FOR_EACH_CONCRETE_INSTRUCTION_ARM(DECLARE_VISIT_INSTRUCTION) @@ -307,7 +312,7 @@ class InstructionCodeGeneratorARMVIXL : public InstructionCodeGenerator { #undef DECLARE_VISIT_INSTRUCTION - void VisitInstruction(HInstruction* instruction) OVERRIDE { + void VisitInstruction(HInstruction* instruction) override { LOG(FATAL) << "Unreachable instruction " << instruction->DebugName() << " (id " << instruction->GetId() << ")"; } @@ -322,6 +327,9 @@ class InstructionCodeGeneratorARMVIXL : public InstructionCodeGenerator { void GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor); void GenerateClassInitializationCheck(LoadClassSlowPathARMVIXL* slow_path, vixl32::Register class_reg); + void GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, + vixl::aarch32::Register temp, + vixl::aarch32::FlagsUpdate flags_update); void GenerateAndConst(vixl::aarch32::Register out, vixl::aarch32::Register first, uint32_t value); void GenerateOrrConst(vixl::aarch32::Register out, vixl::aarch32::Register first, uint32_t value); void GenerateEorConst(vixl::aarch32::Register out, vixl::aarch32::Register first, uint32_t value); @@ -349,6 +357,12 @@ class InstructionCodeGeneratorARMVIXL : public InstructionCodeGenerator { bool value_can_be_null); void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); + void GenerateMinMaxInt(LocationSummary* locations, bool is_min); + void GenerateMinMaxLong(LocationSummary* locations, bool is_min); + void GenerateMinMaxFloat(HInstruction* minmax, bool is_min); + void GenerateMinMaxDouble(HInstruction* minmax, bool is_min); + void GenerateMinMax(HBinaryOperation* minmax, bool is_min); + // Generate a heap reference load using one register `out`: // // out <- *(out + offset) @@ -379,16 +393,6 @@ class InstructionCodeGeneratorARMVIXL : public InstructionCodeGenerator { uint32_t offset, Location maybe_temp, ReadBarrierOption read_barrier_option); - // Generate a GC root reference load: - // - // root <- *(obj + offset) - // - // while honoring read barriers based on read_barrier_option. - void GenerateGcRootFieldLoad(HInstruction* instruction, - Location root, - vixl::aarch32::Register obj, - uint32_t offset, - ReadBarrierOption read_barrier_option); void GenerateTestAndBranch(HInstruction* instruction, size_t condition_input_index, vixl::aarch32::Label* true_target, @@ -424,53 +428,55 @@ class InstructionCodeGeneratorARMVIXL : public InstructionCodeGenerator { class CodeGeneratorARMVIXL : public CodeGenerator { public: CodeGeneratorARMVIXL(HGraph* graph, - const ArmInstructionSetFeatures& isa_features, const CompilerOptions& compiler_options, OptimizingCompilerStats* stats = nullptr); virtual ~CodeGeneratorARMVIXL() {} - void GenerateFrameEntry() OVERRIDE; - void GenerateFrameExit() OVERRIDE; - void Bind(HBasicBlock* block) OVERRIDE; - void MoveConstant(Location destination, int32_t value) OVERRIDE; - void MoveLocation(Location dst, Location src, DataType::Type dst_type) OVERRIDE; - void AddLocationAsTemp(Location location, LocationSummary* locations) OVERRIDE; + void GenerateFrameEntry() override; + void GenerateFrameExit() override; + void Bind(HBasicBlock* block) override; + void MoveConstant(Location destination, int32_t value) override; + void MoveLocation(Location dst, Location src, DataType::Type dst_type) override; + void AddLocationAsTemp(Location location, LocationSummary* locations) override; - size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; - size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; - size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; - size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; + size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) override; + size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) override; + size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) override; + size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) override; - size_t GetWordSize() const OVERRIDE { + size_t GetWordSize() const override { return static_cast<size_t>(kArmPointerSize); } - size_t GetFloatingPointSpillSlotSize() const OVERRIDE { return vixl::aarch32::kRegSizeInBytes; } + size_t GetFloatingPointSpillSlotSize() const override { return vixl::aarch32::kRegSizeInBytes; } - HGraphVisitor* GetLocationBuilder() OVERRIDE { return &location_builder_; } + HGraphVisitor* GetLocationBuilder() override { return &location_builder_; } - HGraphVisitor* GetInstructionVisitor() OVERRIDE { return &instruction_visitor_; } + HGraphVisitor* GetInstructionVisitor() override { return &instruction_visitor_; } - ArmVIXLAssembler* GetAssembler() OVERRIDE { return &assembler_; } + ArmVIXLAssembler* GetAssembler() override { return &assembler_; } - const ArmVIXLAssembler& GetAssembler() const OVERRIDE { return assembler_; } + const ArmVIXLAssembler& GetAssembler() const override { return assembler_; } ArmVIXLMacroAssembler* GetVIXLAssembler() { return GetAssembler()->GetVIXLAssembler(); } - uintptr_t GetAddressOf(HBasicBlock* block) OVERRIDE { + uintptr_t GetAddressOf(HBasicBlock* block) override { vixl::aarch32::Label* block_entry_label = GetLabelOf(block); DCHECK(block_entry_label->IsBound()); return block_entry_label->GetLocation(); } void FixJumpTables(); - void SetupBlockedRegisters() const OVERRIDE; + void SetupBlockedRegisters() const override; + + void DumpCoreRegister(std::ostream& stream, int reg) const override; + void DumpFloatingPointRegister(std::ostream& stream, int reg) const override; - void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE; - void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE; + ParallelMoveResolver* GetMoveResolver() override { return &move_resolver_; } + InstructionSet GetInstructionSet() const override { return InstructionSet::kThumb2; } + + const ArmInstructionSetFeatures& GetInstructionSetFeatures() const; - ParallelMoveResolver* GetMoveResolver() OVERRIDE { return &move_resolver_; } - InstructionSet GetInstructionSet() const OVERRIDE { return InstructionSet::kThumb2; } // Helper method to move a 32-bit value between two locations. void Move32(Location destination, Location source); @@ -489,7 +495,7 @@ class CodeGeneratorARMVIXL : public CodeGenerator { void InvokeRuntime(QuickEntrypointEnum entrypoint, HInstruction* instruction, uint32_t dex_pc, - SlowPathCode* slow_path = nullptr) OVERRIDE; + SlowPathCode* slow_path = nullptr) override; // Generate code to invoke a runtime entry point, but do not record // PC-related information in a stack map. @@ -513,44 +519,42 @@ class CodeGeneratorARMVIXL : public CodeGenerator { vixl32::Label* GetFinalLabel(HInstruction* instruction, vixl32::Label* final_label); - void Initialize() OVERRIDE { + void Initialize() override { block_labels_.resize(GetGraph()->GetBlocks().size()); } - void Finalize(CodeAllocator* allocator) OVERRIDE; - - const ArmInstructionSetFeatures& GetInstructionSetFeatures() const { return isa_features_; } + void Finalize(CodeAllocator* allocator) override; - bool NeedsTwoRegisters(DataType::Type type) const OVERRIDE { + bool NeedsTwoRegisters(DataType::Type type) const override { return type == DataType::Type::kFloat64 || type == DataType::Type::kInt64; } - void ComputeSpillMask() OVERRIDE; + void ComputeSpillMask() override; vixl::aarch32::Label* GetFrameEntryLabel() { return &frame_entry_label_; } // Check if the desired_string_load_kind is supported. If it is, return it, // otherwise return a fall-back kind that should be used instead. HLoadString::LoadKind GetSupportedLoadStringKind( - HLoadString::LoadKind desired_string_load_kind) OVERRIDE; + HLoadString::LoadKind desired_string_load_kind) override; // Check if the desired_class_load_kind is supported. If it is, return it, // otherwise return a fall-back kind that should be used instead. HLoadClass::LoadKind GetSupportedLoadClassKind( - HLoadClass::LoadKind desired_class_load_kind) OVERRIDE; + HLoadClass::LoadKind desired_class_load_kind) override; // Check if the desired_dispatch_info is supported. If it is, return it, // otherwise return a fall-back info that should be used instead. HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch( const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, - HInvokeStaticOrDirect* invoke) OVERRIDE; + ArtMethod* method) override; void GenerateStaticOrDirectCall( - HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE; + HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path = nullptr) override; void GenerateVirtualCall( - HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE; + HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) override; - void MoveFromReturnRegister(Location trg, DataType::Type type) OVERRIDE; + void MoveFromReturnRegister(Location trg, DataType::Type type) override; // The PcRelativePatchInfo is used for PC-relative addressing of methods/strings/types, // whether through .data.bimg.rel.ro, .bss, or directly in the boot image. @@ -574,6 +578,8 @@ class CodeGeneratorARMVIXL : public CodeGenerator { vixl::aarch32::Label add_pc_label; }; + PcRelativePatchInfo* NewBootImageIntrinsicPatch(uint32_t intrinsic_data); + PcRelativePatchInfo* NewBootImageRelRoPatch(uint32_t boot_image_offset); PcRelativePatchInfo* NewBootImageMethodPatch(MethodReference target_method); PcRelativePatchInfo* NewMethodBssEntryPatch(MethodReference target_method); PcRelativePatchInfo* NewBootImageTypePatch(const DexFile& dex_file, dex::TypeIndex type_index); @@ -583,9 +589,9 @@ class CodeGeneratorARMVIXL : public CodeGenerator { PcRelativePatchInfo* NewStringBssEntryPatch(const DexFile& dex_file, dex::StringIndex string_index); - // Add a new baker read barrier patch and return the label to be bound - // before the BNE instruction. - vixl::aarch32::Label* NewBakerReadBarrierPatch(uint32_t custom_data); + // Emit the BNE instruction for baker read barrier and record + // the associated patch for AOT or slow path for JIT. + void EmitBakerReadBarrierBne(uint32_t custom_data); VIXLUInt32Literal* DeduplicateBootImageAddressLiteral(uint32_t address); VIXLUInt32Literal* DeduplicateJitStringLiteral(const DexFile& dex_file, @@ -595,14 +601,40 @@ class CodeGeneratorARMVIXL : public CodeGenerator { dex::TypeIndex type_index, Handle<mirror::Class> handle); - void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) OVERRIDE; + void LoadBootImageAddress(vixl::aarch32::Register reg, uint32_t boot_image_reference); + void AllocateInstanceForIntrinsic(HInvokeStaticOrDirect* invoke, uint32_t boot_image_offset); - void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) OVERRIDE; + void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) override; + bool NeedsThunkCode(const linker::LinkerPatch& patch) const override; + void EmitThunkCode(const linker::LinkerPatch& patch, + /*out*/ ArenaVector<uint8_t>* code, + /*out*/ std::string* debug_name) override; - // Maybe add the reserved entrypoint register as a temporary for field load. This temp - // is added only for AOT compilation if link-time generated thunks for fields are enabled. - void MaybeAddBakerCcEntrypointTempForFields(LocationSummary* locations); + void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) override; + // Generate a GC root reference load: + // + // root <- *(obj + offset) + // + // while honoring read barriers based on read_barrier_option. + void GenerateGcRootFieldLoad(HInstruction* instruction, + Location root, + vixl::aarch32::Register obj, + uint32_t offset, + ReadBarrierOption read_barrier_option); + // Generate ADD for UnsafeCASObject to reconstruct the old value from + // `old_value - expected` and mark it with Baker read barrier. + void GenerateUnsafeCasOldValueAddWithBakerReadBarrier(vixl::aarch32::Register old_value, + vixl::aarch32::Register adjusted_old_value, + vixl::aarch32::Register expected); + // Fast path implementation of ReadBarrier::Barrier for a heap + // reference field load when Baker's read barriers are used. + // Overload suitable for Unsafe.getObject/-Volatile() intrinsic. + void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, + Location ref, + vixl::aarch32::Register obj, + const vixl::aarch32::MemOperand& src, + bool needs_null_check); // Fast path implementation of ReadBarrier::Barrier for a heap // reference field load when Baker's read barriers are used. void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, @@ -613,56 +645,12 @@ class CodeGeneratorARMVIXL : public CodeGenerator { bool needs_null_check); // Fast path implementation of ReadBarrier::Barrier for a heap // reference array load when Baker's read barriers are used. - void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction, - Location ref, + void GenerateArrayLoadWithBakerReadBarrier(Location ref, vixl::aarch32::Register obj, uint32_t data_offset, Location index, Location temp, bool needs_null_check); - // Factored implementation, used by GenerateFieldLoadWithBakerReadBarrier, - // GenerateArrayLoadWithBakerReadBarrier and some intrinsics. - // - // Load the object reference located at the address - // `obj + offset + (index << scale_factor)`, held by object `obj`, into - // `ref`, and mark it if needed. - void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, - Location ref, - vixl::aarch32::Register obj, - uint32_t offset, - Location index, - ScaleFactor scale_factor, - Location temp, - bool needs_null_check); - - // Generate code checking whether the the reference field at the - // address `obj + field_offset`, held by object `obj`, needs to be - // marked, and if so, marking it and updating the field within `obj` - // with the marked value. - // - // This routine is used for the implementation of the - // UnsafeCASObject intrinsic with Baker read barriers. - // - // This method has a structure similar to - // GenerateReferenceLoadWithBakerReadBarrier, but note that argument - // `ref` is only as a temporary here, and thus its value should not - // be used afterwards. - void UpdateReferenceFieldWithBakerReadBarrier(HInstruction* instruction, - Location ref, - vixl::aarch32::Register obj, - Location field_offset, - Location temp, - bool needs_null_check, - vixl::aarch32::Register temp2); - - // Generate a heap reference load (with no read barrier). - void GenerateRawReferenceLoad(HInstruction* instruction, - Location ref, - vixl::aarch32::Register obj, - uint32_t offset, - Location index, - ScaleFactor scale_factor, - bool needs_null_check); // Emit code checking the status of the Marking Register, and // aborting the program if MR does not match the value stored in the @@ -734,10 +722,10 @@ class CodeGeneratorARMVIXL : public CodeGenerator { // artReadBarrierForRootSlow. void GenerateReadBarrierForRootSlow(HInstruction* instruction, Location out, Location root); - void GenerateNop() OVERRIDE; + void GenerateNop() override; - void GenerateImplicitNullCheck(HNullCheck* instruction) OVERRIDE; - void GenerateExplicitNullCheck(HNullCheck* instruction) OVERRIDE; + void GenerateImplicitNullCheck(HNullCheck* instruction) override; + void GenerateExplicitNullCheck(HNullCheck* instruction) override; JumpTableARMVIXL* CreateJumpTable(HPackedSwitch* switch_instr) { jump_tables_.emplace_back(new (GetGraph()->GetAllocator()) JumpTableARMVIXL(switch_instr)); @@ -757,6 +745,92 @@ class CodeGeneratorARMVIXL : public CodeGenerator { vixl::aarch32::Register temp = vixl32::Register()); private: + // Encoding of thunk type and data for link-time generated thunks for Baker read barriers. + + enum class BakerReadBarrierKind : uint8_t { + kField, // Field get or array get with constant offset (i.e. constant index). + kArray, // Array get with index in register. + kGcRoot, // GC root load. + kUnsafeCas, // UnsafeCASObject intrinsic. + kLast = kUnsafeCas + }; + + enum class BakerReadBarrierWidth : uint8_t { + kWide, // 32-bit LDR (and 32-bit NEG if heap poisoning is enabled). + kNarrow, // 16-bit LDR (and 16-bit NEG if heap poisoning is enabled). + kLast = kNarrow + }; + + static constexpr uint32_t kBakerReadBarrierInvalidEncodedReg = /* pc is invalid */ 15u; + + static constexpr size_t kBitsForBakerReadBarrierKind = + MinimumBitsToStore(static_cast<size_t>(BakerReadBarrierKind::kLast)); + static constexpr size_t kBakerReadBarrierBitsForRegister = + MinimumBitsToStore(kBakerReadBarrierInvalidEncodedReg); + using BakerReadBarrierKindField = + BitField<BakerReadBarrierKind, 0, kBitsForBakerReadBarrierKind>; + using BakerReadBarrierFirstRegField = + BitField<uint32_t, kBitsForBakerReadBarrierKind, kBakerReadBarrierBitsForRegister>; + using BakerReadBarrierSecondRegField = + BitField<uint32_t, + kBitsForBakerReadBarrierKind + kBakerReadBarrierBitsForRegister, + kBakerReadBarrierBitsForRegister>; + static constexpr size_t kBitsForBakerReadBarrierWidth = + MinimumBitsToStore(static_cast<size_t>(BakerReadBarrierWidth::kLast)); + using BakerReadBarrierWidthField = + BitField<BakerReadBarrierWidth, + kBitsForBakerReadBarrierKind + 2 * kBakerReadBarrierBitsForRegister, + kBitsForBakerReadBarrierWidth>; + + static void CheckValidReg(uint32_t reg) { + DCHECK(reg < vixl::aarch32::ip.GetCode() && reg != mr.GetCode()) << reg; + } + + static uint32_t EncodeBakerReadBarrierFieldData(uint32_t base_reg, + uint32_t holder_reg, + bool narrow) { + CheckValidReg(base_reg); + CheckValidReg(holder_reg); + DCHECK(!narrow || base_reg < 8u) << base_reg; + BakerReadBarrierWidth width = + narrow ? BakerReadBarrierWidth::kNarrow : BakerReadBarrierWidth::kWide; + return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kField) | + BakerReadBarrierFirstRegField::Encode(base_reg) | + BakerReadBarrierSecondRegField::Encode(holder_reg) | + BakerReadBarrierWidthField::Encode(width); + } + + static uint32_t EncodeBakerReadBarrierArrayData(uint32_t base_reg) { + CheckValidReg(base_reg); + return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kArray) | + BakerReadBarrierFirstRegField::Encode(base_reg) | + BakerReadBarrierSecondRegField::Encode(kBakerReadBarrierInvalidEncodedReg) | + BakerReadBarrierWidthField::Encode(BakerReadBarrierWidth::kWide); + } + + static uint32_t EncodeBakerReadBarrierGcRootData(uint32_t root_reg, bool narrow) { + CheckValidReg(root_reg); + DCHECK(!narrow || root_reg < 8u) << root_reg; + BakerReadBarrierWidth width = + narrow ? BakerReadBarrierWidth::kNarrow : BakerReadBarrierWidth::kWide; + return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kGcRoot) | + BakerReadBarrierFirstRegField::Encode(root_reg) | + BakerReadBarrierSecondRegField::Encode(kBakerReadBarrierInvalidEncodedReg) | + BakerReadBarrierWidthField::Encode(width); + } + + static uint32_t EncodeBakerReadBarrierUnsafeCasData(uint32_t root_reg) { + CheckValidReg(root_reg); + return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kUnsafeCas) | + BakerReadBarrierFirstRegField::Encode(root_reg) | + BakerReadBarrierSecondRegField::Encode(kBakerReadBarrierInvalidEncodedReg) | + BakerReadBarrierWidthField::Encode(BakerReadBarrierWidth::kWide); + } + + void CompileBakerReadBarrierThunk(ArmVIXLAssembler& assembler, + uint32_t encoded_data, + /*out*/ std::string* debug_name); + vixl::aarch32::Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, vixl::aarch32::Register temp); @@ -794,11 +868,11 @@ class CodeGeneratorARMVIXL : public CodeGenerator { ParallelMoveResolverARMVIXL move_resolver_; ArmVIXLAssembler assembler_; - const ArmInstructionSetFeatures& isa_features_; // Deduplication map for 32-bit literals, used for non-patchable boot image addresses. Uint32ToLiteralMap uint32_literals_; - // PC-relative method patch info for kBootImageLinkTimePcRelative. + // PC-relative method patch info for kBootImageLinkTimePcRelative/kBootImageRelRo. + // Also used for type/string patches for kBootImageRelRo (same linker patch as for methods). ArenaDeque<PcRelativePatchInfo> boot_image_method_patches_; // PC-relative method patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> method_bss_entry_patches_; @@ -806,10 +880,12 @@ class CodeGeneratorARMVIXL : public CodeGenerator { ArenaDeque<PcRelativePatchInfo> boot_image_type_patches_; // PC-relative type patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_; - // PC-relative String patch info; type depends on configuration (intern table or boot image PIC). + // PC-relative String patch info for kBootImageLinkTimePcRelative. ArenaDeque<PcRelativePatchInfo> boot_image_string_patches_; // PC-relative String patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> string_bss_entry_patches_; + // PC-relative patch info for IntrinsicObjects. + ArenaDeque<PcRelativePatchInfo> boot_image_intrinsic_patches_; // Baker read barrier patch info. ArenaDeque<BakerReadBarrierPatchInfo> baker_read_barrier_patches_; @@ -818,6 +894,20 @@ class CodeGeneratorARMVIXL : public CodeGenerator { // Patches for class literals in JIT compiled code. TypeToLiteralMap jit_class_patches_; + // Baker read barrier slow paths, mapping custom data (uint32_t) to label. + // Wrap the label to work around vixl::aarch32::Label being non-copyable + // and non-moveable and as such unusable in ArenaSafeMap<>. + struct LabelWrapper { + LabelWrapper(const LabelWrapper& src) + : label() { + DCHECK(!src.label.IsReferenced() && !src.label.IsBound()); + } + LabelWrapper() = default; + vixl::aarch32::Label label; + }; + ArenaSafeMap<uint32_t, LabelWrapper> jit_baker_read_barrier_slow_paths_; + + friend class linker::Thumb2RelativePatcherTest; DISALLOW_COPY_AND_ASSIGN(CodeGeneratorARMVIXL); }; diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc index 87e6d6834b..72334afa40 100644 --- a/compiler/optimizing/code_generator_mips.cc +++ b/compiler/optimizing/code_generator_mips.cc @@ -26,6 +26,7 @@ #include "entrypoints/quick/quick_entrypoints.h" #include "entrypoints/quick/quick_entrypoints_enum.h" #include "gc/accounting/card_table.h" +#include "gc/space/image_space.h" #include "heap_poisoning.h" #include "intrinsics.h" #include "intrinsics_mips.h" @@ -146,7 +147,7 @@ Location InvokeDexCallingConventionVisitorMIPS::GetNextLocation(DataType::Type t case DataType::Type::kUint64: case DataType::Type::kVoid: LOG(FATAL) << "Unexpected parameter type " << type; - break; + UNREACHABLE(); } // Space on the stack is reserved for all arguments. @@ -159,6 +160,14 @@ Location InvokeRuntimeCallingConvention::GetReturnLocation(DataType::Type type) return MipsReturnLocation(type); } +static RegisterSet OneRegInReferenceOutSaveEverythingCallerSaves() { + InvokeRuntimeCallingConvention calling_convention; + RegisterSet caller_saves = RegisterSet::Empty(); + caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + // The reference is returned in the same register. This differs from the standard return location. + return caller_saves; +} + // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. #define __ down_cast<CodeGeneratorMIPS*>(codegen)->GetAssembler()-> // NOLINT #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kMipsPointerSize, x).Int32Value() @@ -167,7 +176,7 @@ class BoundsCheckSlowPathMIPS : public SlowPathCodeMIPS { public: explicit BoundsCheckSlowPathMIPS(HBoundsCheck* instruction) : SlowPathCodeMIPS(instruction) {} - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { LocationSummary* locations = instruction_->GetLocations(); CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen); __ Bind(GetEntryLabel()); @@ -192,9 +201,9 @@ class BoundsCheckSlowPathMIPS : public SlowPathCodeMIPS { CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>(); } - bool IsFatal() const OVERRIDE { return true; } + bool IsFatal() const override { return true; } - const char* GetDescription() const OVERRIDE { return "BoundsCheckSlowPathMIPS"; } + const char* GetDescription() const override { return "BoundsCheckSlowPathMIPS"; } private: DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathMIPS); @@ -204,16 +213,16 @@ class DivZeroCheckSlowPathMIPS : public SlowPathCodeMIPS { public: explicit DivZeroCheckSlowPathMIPS(HDivZeroCheck* instruction) : SlowPathCodeMIPS(instruction) {} - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen); __ Bind(GetEntryLabel()); mips_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this); CheckEntrypointTypes<kQuickThrowDivZero, void, void>(); } - bool IsFatal() const OVERRIDE { return true; } + bool IsFatal() const override { return true; } - const char* GetDescription() const OVERRIDE { return "DivZeroCheckSlowPathMIPS"; } + const char* GetDescription() const override { return "DivZeroCheckSlowPathMIPS"; } private: DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathMIPS); @@ -221,35 +230,41 @@ class DivZeroCheckSlowPathMIPS : public SlowPathCodeMIPS { class LoadClassSlowPathMIPS : public SlowPathCodeMIPS { public: - LoadClassSlowPathMIPS(HLoadClass* cls, - HInstruction* at, - uint32_t dex_pc, - bool do_clinit) - : SlowPathCodeMIPS(at), - cls_(cls), - dex_pc_(dex_pc), - do_clinit_(do_clinit) { + LoadClassSlowPathMIPS(HLoadClass* cls, HInstruction* at) + : SlowPathCodeMIPS(at), cls_(cls) { DCHECK(at->IsLoadClass() || at->IsClinitCheck()); + DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_); } - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { LocationSummary* locations = instruction_->GetLocations(); Location out = locations->Out(); + const uint32_t dex_pc = instruction_->GetDexPc(); + bool must_resolve_type = instruction_->IsLoadClass() && cls_->MustResolveTypeOnSlowPath(); + bool must_do_clinit = instruction_->IsClinitCheck() || cls_->MustGenerateClinitCheck(); + CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen); - InvokeRuntimeCallingConvention calling_convention; - DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, locations); - dex::TypeIndex type_index = cls_->GetTypeIndex(); - __ LoadConst32(calling_convention.GetRegisterAt(0), type_index.index_); - QuickEntrypointEnum entrypoint = do_clinit_ ? kQuickInitializeStaticStorage - : kQuickInitializeType; - mips_codegen->InvokeRuntime(entrypoint, instruction_, dex_pc_, this); - if (do_clinit_) { - CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>(); + InvokeRuntimeCallingConvention calling_convention; + if (must_resolve_type) { + DCHECK(IsSameDexFile(cls_->GetDexFile(), mips_codegen->GetGraph()->GetDexFile())); + dex::TypeIndex type_index = cls_->GetTypeIndex(); + __ LoadConst32(calling_convention.GetRegisterAt(0), type_index.index_); + mips_codegen->InvokeRuntime(kQuickResolveType, instruction_, dex_pc, this); + CheckEntrypointTypes<kQuickResolveType, void*, uint32_t>(); + // If we also must_do_clinit, the resolved type is now in the correct register. } else { - CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>(); + DCHECK(must_do_clinit); + Location source = instruction_->IsLoadClass() ? out : locations->InAt(0); + mips_codegen->MoveLocation(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), + source, + cls_->GetType()); + } + if (must_do_clinit) { + mips_codegen->InvokeRuntime(kQuickInitializeStaticStorage, instruction_, dex_pc, this); + CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, mirror::Class*>(); } // Move the class to the desired location. @@ -265,18 +280,12 @@ class LoadClassSlowPathMIPS : public SlowPathCodeMIPS { __ B(GetExitLabel()); } - const char* GetDescription() const OVERRIDE { return "LoadClassSlowPathMIPS"; } + const char* GetDescription() const override { return "LoadClassSlowPathMIPS"; } private: // The class this slow path will load. HLoadClass* const cls_; - // The dex PC of `at_`. - const uint32_t dex_pc_; - - // Whether to initialize the class. - const bool do_clinit_; - DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathMIPS); }; @@ -285,7 +294,7 @@ class LoadStringSlowPathMIPS : public SlowPathCodeMIPS { explicit LoadStringSlowPathMIPS(HLoadString* instruction) : SlowPathCodeMIPS(instruction) {} - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { DCHECK(instruction_->IsLoadString()); DCHECK_EQ(instruction_->AsLoadString()->GetLoadKind(), HLoadString::LoadKind::kBssEntry); LocationSummary* locations = instruction_->GetLocations(); @@ -309,7 +318,7 @@ class LoadStringSlowPathMIPS : public SlowPathCodeMIPS { __ B(GetExitLabel()); } - const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathMIPS"; } + const char* GetDescription() const override { return "LoadStringSlowPathMIPS"; } private: DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathMIPS); @@ -319,7 +328,7 @@ class NullCheckSlowPathMIPS : public SlowPathCodeMIPS { public: explicit NullCheckSlowPathMIPS(HNullCheck* instr) : SlowPathCodeMIPS(instr) {} - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen); __ Bind(GetEntryLabel()); if (instruction_->CanThrowIntoCatchBlock()) { @@ -333,9 +342,9 @@ class NullCheckSlowPathMIPS : public SlowPathCodeMIPS { CheckEntrypointTypes<kQuickThrowNullPointer, void, void>(); } - bool IsFatal() const OVERRIDE { return true; } + bool IsFatal() const override { return true; } - const char* GetDescription() const OVERRIDE { return "NullCheckSlowPathMIPS"; } + const char* GetDescription() const override { return "NullCheckSlowPathMIPS"; } private: DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathMIPS); @@ -346,7 +355,7 @@ class SuspendCheckSlowPathMIPS : public SlowPathCodeMIPS { SuspendCheckSlowPathMIPS(HSuspendCheck* instruction, HBasicBlock* successor) : SlowPathCodeMIPS(instruction), successor_(successor) {} - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { LocationSummary* locations = instruction_->GetLocations(); CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen); __ Bind(GetEntryLabel()); @@ -366,7 +375,7 @@ class SuspendCheckSlowPathMIPS : public SlowPathCodeMIPS { return &return_label_; } - const char* GetDescription() const OVERRIDE { return "SuspendCheckSlowPathMIPS"; } + const char* GetDescription() const override { return "SuspendCheckSlowPathMIPS"; } HBasicBlock* GetSuccessor() const { return successor_; @@ -387,7 +396,7 @@ class TypeCheckSlowPathMIPS : public SlowPathCodeMIPS { explicit TypeCheckSlowPathMIPS(HInstruction* instruction, bool is_fatal) : SlowPathCodeMIPS(instruction), is_fatal_(is_fatal) {} - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { LocationSummary* locations = instruction_->GetLocations(); uint32_t dex_pc = instruction_->GetDexPc(); DCHECK(instruction_->IsCheckCast() @@ -426,9 +435,9 @@ class TypeCheckSlowPathMIPS : public SlowPathCodeMIPS { } } - const char* GetDescription() const OVERRIDE { return "TypeCheckSlowPathMIPS"; } + const char* GetDescription() const override { return "TypeCheckSlowPathMIPS"; } - bool IsFatal() const OVERRIDE { return is_fatal_; } + bool IsFatal() const override { return is_fatal_; } private: const bool is_fatal_; @@ -441,7 +450,7 @@ class DeoptimizationSlowPathMIPS : public SlowPathCodeMIPS { explicit DeoptimizationSlowPathMIPS(HDeoptimize* instruction) : SlowPathCodeMIPS(instruction) {} - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen); __ Bind(GetEntryLabel()); LocationSummary* locations = instruction_->GetLocations(); @@ -453,7 +462,7 @@ class DeoptimizationSlowPathMIPS : public SlowPathCodeMIPS { CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>(); } - const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathMIPS"; } + const char* GetDescription() const override { return "DeoptimizationSlowPathMIPS"; } private: DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathMIPS); @@ -463,7 +472,7 @@ class ArraySetSlowPathMIPS : public SlowPathCodeMIPS { public: explicit ArraySetSlowPathMIPS(HInstruction* instruction) : SlowPathCodeMIPS(instruction) {} - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { LocationSummary* locations = instruction_->GetLocations(); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, locations); @@ -494,7 +503,7 @@ class ArraySetSlowPathMIPS : public SlowPathCodeMIPS { __ B(GetExitLabel()); } - const char* GetDescription() const OVERRIDE { return "ArraySetSlowPathMIPS"; } + const char* GetDescription() const override { return "ArraySetSlowPathMIPS"; } private: DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathMIPS); @@ -524,9 +533,9 @@ class ReadBarrierMarkSlowPathMIPS : public SlowPathCodeMIPS { DCHECK(kEmitCompilerReadBarrier); } - const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathMIPS"; } + const char* GetDescription() const override { return "ReadBarrierMarkSlowPathMIPS"; } - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { LocationSummary* locations = instruction_->GetLocations(); Register ref_reg = ref_.AsRegister<Register>(); DCHECK(locations->CanCall()); @@ -578,7 +587,7 @@ class ReadBarrierMarkSlowPathMIPS : public SlowPathCodeMIPS { mips_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this, - /* direct */ false); + /* direct= */ false); } __ B(GetExitLabel()); } @@ -618,11 +627,11 @@ class ReadBarrierMarkAndUpdateFieldSlowPathMIPS : public SlowPathCodeMIPS { DCHECK(kEmitCompilerReadBarrier); } - const char* GetDescription() const OVERRIDE { + const char* GetDescription() const override { return "ReadBarrierMarkAndUpdateFieldSlowPathMIPS"; } - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { LocationSummary* locations = instruction_->GetLocations(); Register ref_reg = ref_.AsRegister<Register>(); DCHECK(locations->CanCall()); @@ -672,7 +681,7 @@ class ReadBarrierMarkAndUpdateFieldSlowPathMIPS : public SlowPathCodeMIPS { mips_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this, - /* direct */ false); + /* direct= */ false); // If the new reference is different from the old reference, // update the field in the holder (`*(obj_ + field_offset_)`). @@ -789,7 +798,7 @@ class ReadBarrierForHeapReferenceSlowPathMIPS : public SlowPathCodeMIPS { DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref; } - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen); LocationSummary* locations = instruction_->GetLocations(); Register reg_out = out_.AsRegister<Register>(); @@ -913,7 +922,7 @@ class ReadBarrierForHeapReferenceSlowPathMIPS : public SlowPathCodeMIPS { __ B(GetExitLabel()); } - const char* GetDescription() const OVERRIDE { return "ReadBarrierForHeapReferenceSlowPathMIPS"; } + const char* GetDescription() const override { return "ReadBarrierForHeapReferenceSlowPathMIPS"; } private: Register FindAvailableCallerSaveRegister(CodeGenerator* codegen) { @@ -956,7 +965,7 @@ class ReadBarrierForRootSlowPathMIPS : public SlowPathCodeMIPS { DCHECK(kEmitCompilerReadBarrier); } - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { LocationSummary* locations = instruction_->GetLocations(); Register reg_out = out_.AsRegister<Register>(); DCHECK(locations->CanCall()); @@ -986,7 +995,7 @@ class ReadBarrierForRootSlowPathMIPS : public SlowPathCodeMIPS { __ B(GetExitLabel()); } - const char* GetDescription() const OVERRIDE { return "ReadBarrierForRootSlowPathMIPS"; } + const char* GetDescription() const override { return "ReadBarrierForRootSlowPathMIPS"; } private: const Location out_; @@ -996,7 +1005,6 @@ class ReadBarrierForRootSlowPathMIPS : public SlowPathCodeMIPS { }; CodeGeneratorMIPS::CodeGeneratorMIPS(HGraph* graph, - const MipsInstructionSetFeatures& isa_features, const CompilerOptions& compiler_options, OptimizingCompilerStats* stats) : CodeGenerator(graph, @@ -1013,8 +1021,8 @@ CodeGeneratorMIPS::CodeGeneratorMIPS(HGraph* graph, location_builder_(graph, this), instruction_visitor_(graph, this), move_resolver_(graph->GetAllocator(), this), - assembler_(graph->GetAllocator(), &isa_features), - isa_features_(isa_features), + assembler_(graph->GetAllocator(), + compiler_options.GetInstructionSetFeatures()->AsMipsInstructionSetFeatures()), uint32_literals_(std::less<uint32_t>(), graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), @@ -1023,6 +1031,7 @@ CodeGeneratorMIPS::CodeGeneratorMIPS(HGraph* graph, type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + boot_image_intrinsic_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), jit_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), jit_class_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), clobbered_ra_(false) { @@ -1042,8 +1051,7 @@ void CodeGeneratorMIPS::Finalize(CodeAllocator* allocator) { // Adjust native pc offsets in stack maps. StackMapStream* stack_map_stream = GetStackMapStream(); for (size_t i = 0, num = stack_map_stream->GetNumberOfStackMaps(); i != num; ++i) { - uint32_t old_position = - stack_map_stream->GetStackMap(i).native_pc_code_offset.Uint32Value(InstructionSet::kMips); + uint32_t old_position = stack_map_stream->GetStackMapNativePcOffset(i); uint32_t new_position = __ GetAdjustedPosition(old_position); DCHECK_GE(new_position, old_position); stack_map_stream->SetStackMapNativePcOffset(i, new_position); @@ -1159,9 +1167,9 @@ void ParallelMoveResolverMIPS::EmitSwap(size_t index) { __ Move(r2_l, TMP); __ Move(r2_h, AT); } else if (loc1.IsStackSlot() && loc2.IsStackSlot()) { - Exchange(loc1.GetStackIndex(), loc2.GetStackIndex(), /* double_slot */ false); + Exchange(loc1.GetStackIndex(), loc2.GetStackIndex(), /* double_slot= */ false); } else if (loc1.IsDoubleStackSlot() && loc2.IsDoubleStackSlot()) { - Exchange(loc1.GetStackIndex(), loc2.GetStackIndex(), /* double_slot */ true); + Exchange(loc1.GetStackIndex(), loc2.GetStackIndex(), /* double_slot= */ true); } else if (loc1.IsSIMDStackSlot() && loc2.IsSIMDStackSlot()) { ExchangeQuadSlots(loc1.GetStackIndex(), loc2.GetStackIndex()); } else if ((loc1.IsRegister() && loc2.IsStackSlot()) || @@ -1597,6 +1605,15 @@ inline void CodeGeneratorMIPS::EmitPcRelativeLinkerPatches( } } +template <linker::LinkerPatch (*Factory)(size_t, uint32_t, uint32_t)> +linker::LinkerPatch NoDexFileAdapter(size_t literal_offset, + const DexFile* target_dex_file, + uint32_t pc_insn_offset, + uint32_t boot_image_offset) { + DCHECK(target_dex_file == nullptr); // Unused for these patches, should be null. + return Factory(literal_offset, pc_insn_offset, boot_image_offset); +} + void CodeGeneratorMIPS::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) { DCHECK(linker_patches->empty()); size_t size = @@ -1605,7 +1622,8 @@ void CodeGeneratorMIPS::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* link boot_image_type_patches_.size() + type_bss_entry_patches_.size() + boot_image_string_patches_.size() + - string_bss_entry_patches_.size(); + string_bss_entry_patches_.size() + + boot_image_intrinsic_patches_.size(); linker_patches->reserve(size); if (GetCompilerOptions().IsBootImage()) { EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>( @@ -1614,12 +1632,14 @@ void CodeGeneratorMIPS::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* link boot_image_type_patches_, linker_patches); EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>( boot_image_string_patches_, linker_patches); + EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::IntrinsicReferencePatch>>( + boot_image_intrinsic_patches_, linker_patches); } else { - DCHECK(boot_image_method_patches_.empty()); - EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeClassTablePatch>( - boot_image_type_patches_, linker_patches); - EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringInternTablePatch>( - boot_image_string_patches_, linker_patches); + EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::DataBimgRelRoPatch>>( + boot_image_method_patches_, linker_patches); + DCHECK(boot_image_type_patches_.empty()); + DCHECK(boot_image_string_patches_.empty()); + DCHECK(boot_image_intrinsic_patches_.empty()); } EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>( method_bss_entry_patches_, linker_patches); @@ -1630,6 +1650,20 @@ void CodeGeneratorMIPS::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* link DCHECK_EQ(size, linker_patches->size()); } +CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewBootImageIntrinsicPatch( + uint32_t intrinsic_data, + const PcRelativePatchInfo* info_high) { + return NewPcRelativePatch( + /* dex_file= */ nullptr, intrinsic_data, info_high, &boot_image_intrinsic_patches_); +} + +CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewBootImageRelRoPatch( + uint32_t boot_image_offset, + const PcRelativePatchInfo* info_high) { + return NewPcRelativePatch( + /* dex_file= */ nullptr, boot_image_offset, info_high, &boot_image_method_patches_); +} + CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewBootImageMethodPatch( MethodReference target_method, const PcRelativePatchInfo* info_high) { @@ -1703,7 +1737,7 @@ void CodeGeneratorMIPS::EmitPcRelativeAddressPlaceholderHigh(PcRelativePatchInfo __ Bind(&info_high->label); __ Bind(&info_high->pc_rel_label); // Add the high half of a 32-bit offset to PC. - __ Auipc(out, /* placeholder */ 0x1234); + __ Auipc(out, /* imm16= */ 0x1234); __ SetReorder(reordering); } else { // If base is ZERO, emit NAL to obtain the actual base. @@ -1712,7 +1746,7 @@ void CodeGeneratorMIPS::EmitPcRelativeAddressPlaceholderHigh(PcRelativePatchInfo __ Nal(); } __ Bind(&info_high->label); - __ Lui(out, /* placeholder */ 0x1234); + __ Lui(out, /* imm16= */ 0x1234); // If we emitted the NAL, bind the pc_rel_label, otherwise base is a register holding // the HMipsComputeBaseMethodAddress which has its own label stored in MipsAssembler. if (base == ZERO) { @@ -1726,6 +1760,48 @@ void CodeGeneratorMIPS::EmitPcRelativeAddressPlaceholderHigh(PcRelativePatchInfo // offset to `out` (e.g. lw, jialc, addiu). } +void CodeGeneratorMIPS::LoadBootImageAddress(Register reg, uint32_t boot_image_reference) { + if (GetCompilerOptions().IsBootImage()) { + PcRelativePatchInfo* info_high = NewBootImageIntrinsicPatch(boot_image_reference); + PcRelativePatchInfo* info_low = NewBootImageIntrinsicPatch(boot_image_reference, info_high); + EmitPcRelativeAddressPlaceholderHigh(info_high, TMP, /* base= */ ZERO); + __ Addiu(reg, TMP, /* imm16= */ 0x5678, &info_low->label); + } else if (GetCompilerOptions().GetCompilePic()) { + PcRelativePatchInfo* info_high = NewBootImageRelRoPatch(boot_image_reference); + PcRelativePatchInfo* info_low = NewBootImageRelRoPatch(boot_image_reference, info_high); + EmitPcRelativeAddressPlaceholderHigh(info_high, reg, /* base= */ ZERO); + __ Lw(reg, reg, /* imm16= */ 0x5678, &info_low->label); + } else { + DCHECK(Runtime::Current()->UseJitCompilation()); + gc::Heap* heap = Runtime::Current()->GetHeap(); + DCHECK(!heap->GetBootImageSpaces().empty()); + const uint8_t* address = heap->GetBootImageSpaces()[0]->Begin() + boot_image_reference; + __ LoadConst32(reg, dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(address))); + } +} + +void CodeGeneratorMIPS::AllocateInstanceForIntrinsic(HInvokeStaticOrDirect* invoke, + uint32_t boot_image_offset) { + DCHECK(invoke->IsStatic()); + InvokeRuntimeCallingConvention calling_convention; + Register argument = calling_convention.GetRegisterAt(0); + if (GetCompilerOptions().IsBootImage()) { + DCHECK_EQ(boot_image_offset, IntrinsicVisitor::IntegerValueOfInfo::kInvalidReference); + // Load the class the same way as for HLoadClass::LoadKind::kBootImageLinkTimePcRelative. + MethodReference target_method = invoke->GetTargetMethod(); + dex::TypeIndex type_idx = target_method.dex_file->GetMethodId(target_method.index).class_idx_; + PcRelativePatchInfo* info_high = NewBootImageTypePatch(*target_method.dex_file, type_idx); + PcRelativePatchInfo* info_low = + NewBootImageTypePatch(*target_method.dex_file, type_idx, info_high); + EmitPcRelativeAddressPlaceholderHigh(info_high, argument, /* base= */ ZERO); + __ Addiu(argument, argument, /* imm16= */ 0x5678, &info_low->label); + } else { + LoadBootImageAddress(argument, boot_image_offset); + } + InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc()); + CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); +} + CodeGeneratorMIPS::JitPatchInfo* CodeGeneratorMIPS::NewJitRootStringPatch( const DexFile& dex_file, dex::StringIndex string_index, @@ -1792,12 +1868,27 @@ void CodeGeneratorMIPS::MarkGCCard(Register object, if (value_can_be_null) { __ Beqz(value, &done); } + // Load the address of the card table into `card`. __ LoadFromOffset(kLoadWord, card, TR, Thread::CardTableOffset<kMipsPointerSize>().Int32Value()); + // Calculate the address of the card corresponding to `object`. __ Srl(temp, object, gc::accounting::CardTable::kCardShift); __ Addu(temp, card, temp); + // Write the `art::gc::accounting::CardTable::kCardDirty` value into the + // `object`'s card. + // + // Register `card` contains the address of the card table. Note that the card + // table's base is biased during its creation so that it always starts at an + // address whose least-significant byte is equal to `kCardDirty` (see + // art::gc::accounting::CardTable::Create). Therefore the SB instruction + // below writes the `kCardDirty` (byte) value into the `object`'s card + // (located at `card + object >> kCardShift`). + // + // This dual use of the value in register `card` (1. to calculate the location + // of the card to mark; and 2. to load the `kCardDirty` value) saves a load + // (no need to explicitly load `kCardDirty` as an immediate value). __ Sb(card, temp, 0); if (value_can_be_null) { __ Bind(&done); @@ -1882,6 +1973,10 @@ void CodeGeneratorMIPS::DumpFloatingPointRegister(std::ostream& stream, int reg) stream << FRegister(reg); } +const MipsInstructionSetFeatures& CodeGeneratorMIPS::GetInstructionSetFeatures() const { + return *GetCompilerOptions().GetInstructionSetFeatures()->AsMipsInstructionSetFeatures(); +} + constexpr size_t kMipsDirectEntrypointRuntimeOffset = 16; void CodeGeneratorMIPS::InvokeRuntime(QuickEntrypointEnum entrypoint, @@ -1936,6 +2031,34 @@ void InstructionCodeGeneratorMIPS::GenerateClassInitializationCheck(SlowPathCode __ Bind(slow_path->GetExitLabel()); } +void InstructionCodeGeneratorMIPS::GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, + Register temp) { + uint32_t path_to_root = check->GetBitstringPathToRoot(); + uint32_t mask = check->GetBitstringMask(); + DCHECK(IsPowerOfTwo(mask + 1)); + size_t mask_bits = WhichPowerOf2(mask + 1); + + if (mask_bits == 16u) { + // Load only the bitstring part of the status word. + __ LoadFromOffset( + kLoadUnsignedHalfword, temp, temp, mirror::Class::StatusOffset().Int32Value()); + // Compare the bitstring bits using XOR. + __ Xori(temp, temp, dchecked_integral_cast<uint16_t>(path_to_root)); + } else { + // /* uint32_t */ temp = temp->status_ + __ LoadFromOffset(kLoadWord, temp, temp, mirror::Class::StatusOffset().Int32Value()); + // Compare the bitstring bits using XOR. + if (IsUint<16>(path_to_root)) { + __ Xori(temp, temp, dchecked_integral_cast<uint16_t>(path_to_root)); + } else { + __ LoadConst32(TMP, path_to_root); + __ Xor(temp, temp, TMP); + } + // Shift out bits that do not contribute to the comparison. + __ Sll(temp, temp, 32 - mask_bits); + } +} + void InstructionCodeGeneratorMIPS::GenerateMemoryBarrier(MemBarrierKind kind ATTRIBUTE_UNUSED) { __ Sync(0); // Only stype 0 is supported. } @@ -2456,7 +2579,7 @@ void InstructionCodeGeneratorMIPS::HandleShift(HBinaryOperation* instr) { __ Or(dst_high, dst_high, TMP); __ Andi(TMP, rhs_reg, kMipsBitsPerWord); if (isR6) { - __ Beqzc(TMP, &done, /* is_bare */ true); + __ Beqzc(TMP, &done, /* is_bare= */ true); __ Move(dst_high, dst_low); __ Move(dst_low, ZERO); } else { @@ -2472,7 +2595,7 @@ void InstructionCodeGeneratorMIPS::HandleShift(HBinaryOperation* instr) { __ Or(dst_low, dst_low, TMP); __ Andi(TMP, rhs_reg, kMipsBitsPerWord); if (isR6) { - __ Beqzc(TMP, &done, /* is_bare */ true); + __ Beqzc(TMP, &done, /* is_bare= */ true); __ Move(dst_low, dst_high); __ Sra(dst_high, dst_high, 31); } else { @@ -2489,7 +2612,7 @@ void InstructionCodeGeneratorMIPS::HandleShift(HBinaryOperation* instr) { __ Or(dst_low, dst_low, TMP); __ Andi(TMP, rhs_reg, kMipsBitsPerWord); if (isR6) { - __ Beqzc(TMP, &done, /* is_bare */ true); + __ Beqzc(TMP, &done, /* is_bare= */ true); __ Move(dst_low, dst_high); __ Move(dst_high, ZERO); } else { @@ -2508,7 +2631,7 @@ void InstructionCodeGeneratorMIPS::HandleShift(HBinaryOperation* instr) { __ Or(dst_high, dst_high, TMP); __ Andi(TMP, rhs_reg, kMipsBitsPerWord); if (isR6) { - __ Beqzc(TMP, &done, /* is_bare */ true); + __ Beqzc(TMP, &done, /* is_bare= */ true); __ Move(TMP, dst_high); __ Move(dst_high, dst_low); __ Move(dst_low, TMP); @@ -2739,7 +2862,7 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) { obj, offset, temp, - /* needs_null_check */ false); + /* needs_null_check= */ false); } else { codegen_->GenerateArrayLoadWithBakerReadBarrier(instruction, out_loc, @@ -2747,7 +2870,7 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) { data_offset, index, temp, - /* needs_null_check */ false); + /* needs_null_check= */ false); } } else { Register out = out_loc.AsRegister<Register>(); @@ -3287,7 +3410,13 @@ void LocationsBuilderMIPS::VisitCheckCast(HCheckCast* instruction) { LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind); locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); + if (type_check_kind == TypeCheckKind::kBitstringCheck) { + locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); + locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); + locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); + } else { + locations->SetInAt(1, Location::RequiresRegister()); + } locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind)); } @@ -3296,7 +3425,7 @@ void InstructionCodeGeneratorMIPS::VisitCheckCast(HCheckCast* instruction) { LocationSummary* locations = instruction->GetLocations(); Location obj_loc = locations->InAt(0); Register obj = obj_loc.AsRegister<Register>(); - Register cls = locations->InAt(1).AsRegister<Register>(); + Location cls = locations->InAt(1); Location temp_loc = locations->GetTemp(0); Register temp = temp_loc.AsRegister<Register>(); const size_t num_temps = NumberOfCheckCastTemps(type_check_kind); @@ -3335,7 +3464,7 @@ void InstructionCodeGeneratorMIPS::VisitCheckCast(HCheckCast* instruction) { kWithoutReadBarrier); // Jump to slow path for throwing the exception or doing a // more involved array check. - __ Bne(temp, cls, slow_path->GetEntryLabel()); + __ Bne(temp, cls.AsRegister<Register>(), slow_path->GetEntryLabel()); break; } @@ -3361,7 +3490,7 @@ void InstructionCodeGeneratorMIPS::VisitCheckCast(HCheckCast* instruction) { // exception. __ Beqz(temp, slow_path->GetEntryLabel()); // Otherwise, compare the classes. - __ Bne(temp, cls, &loop); + __ Bne(temp, cls.AsRegister<Register>(), &loop); break; } @@ -3376,7 +3505,7 @@ void InstructionCodeGeneratorMIPS::VisitCheckCast(HCheckCast* instruction) { // Walk over the class hierarchy to find a match. MipsLabel loop; __ Bind(&loop); - __ Beq(temp, cls, &done); + __ Beq(temp, cls.AsRegister<Register>(), &done); // /* HeapReference<Class> */ temp = temp->super_class_ GenerateReferenceLoadOneRegister(instruction, temp_loc, @@ -3399,7 +3528,7 @@ void InstructionCodeGeneratorMIPS::VisitCheckCast(HCheckCast* instruction) { maybe_temp2_loc, kWithoutReadBarrier); // Do an exact check. - __ Beq(temp, cls, &done); + __ Beq(temp, cls.AsRegister<Register>(), &done); // Otherwise, we need to check that the object's class is a non-primitive array. // /* HeapReference<Class> */ temp = temp->component_type_ GenerateReferenceLoadOneRegister(instruction, @@ -3458,7 +3587,21 @@ void InstructionCodeGeneratorMIPS::VisitCheckCast(HCheckCast* instruction) { // Go to next interface. __ Addiu(TMP, TMP, -2); // Compare the classes and continue the loop if they do not match. - __ Bne(AT, cls, &loop); + __ Bne(AT, cls.AsRegister<Register>(), &loop); + break; + } + + case TypeCheckKind::kBitstringCheck: { + // /* HeapReference<Class> */ temp = obj->klass_ + GenerateReferenceLoadTwoRegisters(instruction, + temp_loc, + obj_loc, + class_offset, + maybe_temp2_loc, + kWithoutReadBarrier); + + GenerateBitstringTypeCheckCompare(instruction, temp); + __ Bnez(temp, slow_path->GetEntryLabel()); break; } } @@ -3474,15 +3617,14 @@ void LocationsBuilderMIPS::VisitClinitCheck(HClinitCheck* check) { if (check->HasUses()) { locations->SetOut(Location::SameAsFirstInput()); } + // Rely on the type initialization to save everything we need. + locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves()); } void InstructionCodeGeneratorMIPS::VisitClinitCheck(HClinitCheck* check) { // We assume the class is not null. - SlowPathCodeMIPS* slow_path = new (codegen_->GetScopedAllocator()) LoadClassSlowPathMIPS( - check->GetLoadClass(), - check, - check->GetDexPc(), - true); + SlowPathCodeMIPS* slow_path = + new (codegen_->GetScopedAllocator()) LoadClassSlowPathMIPS(check->GetLoadClass(), check); codegen_->AddSlowPath(slow_path); GenerateClassInitializationCheck(slow_path, check->GetLocations()->InAt(0).AsRegister<Register>()); @@ -3962,7 +4104,7 @@ void InstructionCodeGeneratorMIPS::GenerateDivRemWithAnyConstant(HBinaryOperatio int64_t magic; int shift; - CalculateMagicAndShiftForDivRem(imm, false /* is_long */, &magic, &shift); + CalculateMagicAndShiftForDivRem(imm, false /* is_long= */, &magic, &shift); bool isR6 = codegen_->GetInstructionSetFeatures().IsR6(); @@ -5806,7 +5948,7 @@ void InstructionCodeGeneratorMIPS::VisitIf(HIf* if_instr) { nullptr : codegen_->GetLabelOf(true_successor); MipsLabel* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ? nullptr : codegen_->GetLabelOf(false_successor); - GenerateTestAndBranch(if_instr, /* condition_input_index */ 0, true_target, false_target); + GenerateTestAndBranch(if_instr, /* condition_input_index= */ 0, true_target, false_target); } void LocationsBuilderMIPS::VisitDeoptimize(HDeoptimize* deoptimize) { @@ -5825,9 +5967,9 @@ void InstructionCodeGeneratorMIPS::VisitDeoptimize(HDeoptimize* deoptimize) { SlowPathCodeMIPS* slow_path = deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathMIPS>(deoptimize); GenerateTestAndBranch(deoptimize, - /* condition_input_index */ 0, + /* condition_input_index= */ 0, slow_path->GetEntryLabel(), - /* false_target */ nullptr); + /* false_target= */ nullptr); } // This function returns true if a conditional move can be generated for HSelect. @@ -5841,7 +5983,7 @@ void InstructionCodeGeneratorMIPS::VisitDeoptimize(HDeoptimize* deoptimize) { // of common logic. static bool CanMoveConditionally(HSelect* select, bool is_r6, LocationSummary* locations_to_set) { bool materialized = IsBooleanValueOrMaterializedCondition(select->GetCondition()); - HInstruction* cond = select->InputAt(/* condition_input_index */ 2); + HInstruction* cond = select->InputAt(/* i= */ 2); HCondition* condition = cond->AsCondition(); DataType::Type cond_type = @@ -6074,7 +6216,7 @@ void InstructionCodeGeneratorMIPS::GenConditionalMoveR2(HSelect* select) { Location src = locations->InAt(1); Register src_reg = ZERO; Register src_reg_high = ZERO; - HInstruction* cond = select->InputAt(/* condition_input_index */ 2); + HInstruction* cond = select->InputAt(/* i= */ 2); Register cond_reg = TMP; int cond_cc = 0; DataType::Type cond_type = DataType::Type::kInt32; @@ -6082,7 +6224,7 @@ void InstructionCodeGeneratorMIPS::GenConditionalMoveR2(HSelect* select) { DataType::Type dst_type = select->GetType(); if (IsBooleanValueOrMaterializedCondition(cond)) { - cond_reg = locations->InAt(/* condition_input_index */ 2).AsRegister<Register>(); + cond_reg = locations->InAt(/* at= */ 2).AsRegister<Register>(); } else { HCondition* condition = cond->AsCondition(); LocationSummary* cond_locations = cond->GetLocations(); @@ -6195,7 +6337,7 @@ void InstructionCodeGeneratorMIPS::GenConditionalMoveR6(HSelect* select) { Location dst = locations->Out(); Location false_src = locations->InAt(0); Location true_src = locations->InAt(1); - HInstruction* cond = select->InputAt(/* condition_input_index */ 2); + HInstruction* cond = select->InputAt(/* i= */ 2); Register cond_reg = TMP; FRegister fcond_reg = FTMP; DataType::Type cond_type = DataType::Type::kInt32; @@ -6203,7 +6345,7 @@ void InstructionCodeGeneratorMIPS::GenConditionalMoveR6(HSelect* select) { DataType::Type dst_type = select->GetType(); if (IsBooleanValueOrMaterializedCondition(cond)) { - cond_reg = locations->InAt(/* condition_input_index */ 2).AsRegister<Register>(); + cond_reg = locations->InAt(/* at= */ 2).AsRegister<Register>(); } else { HCondition* condition = cond->AsCondition(); LocationSummary* cond_locations = cond->GetLocations(); @@ -6384,7 +6526,7 @@ void LocationsBuilderMIPS::VisitSelect(HSelect* select) { void InstructionCodeGeneratorMIPS::VisitSelect(HSelect* select) { bool is_r6 = codegen_->GetInstructionSetFeatures().IsR6(); - if (CanMoveConditionally(select, is_r6, /* locations_to_set */ nullptr)) { + if (CanMoveConditionally(select, is_r6, /* locations_to_set= */ nullptr)) { if (is_r6) { GenConditionalMoveR6(select); } else { @@ -6394,8 +6536,8 @@ void InstructionCodeGeneratorMIPS::VisitSelect(HSelect* select) { LocationSummary* locations = select->GetLocations(); MipsLabel false_target; GenerateTestAndBranch(select, - /* condition_input_index */ 2, - /* true_target */ nullptr, + /* condition_input_index= */ 2, + /* true_target= */ nullptr, &false_target); codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType()); __ Bind(&false_target); @@ -6554,7 +6696,7 @@ void InstructionCodeGeneratorMIPS::HandleFieldGet(HInstruction* instruction, obj, offset, temp_loc, - /* needs_null_check */ true); + /* needs_null_check= */ true); if (is_volatile) { GenerateMemoryBarrier(MemBarrierKind::kLoadAny); } @@ -6787,7 +6929,7 @@ void InstructionCodeGeneratorMIPS::GenerateReferenceLoadOneRegister( out_reg, offset, maybe_temp, - /* needs_null_check */ false); + /* needs_null_check= */ false); } else { // Load with slow path based read barrier. // Save the value of `out` into `maybe_temp` before overwriting it @@ -6828,7 +6970,7 @@ void InstructionCodeGeneratorMIPS::GenerateReferenceLoadTwoRegisters( obj_reg, offset, maybe_temp, - /* needs_null_check */ false); + /* needs_null_check= */ false); } else { // Load with slow path based read barrier. // /* HeapReference<Object> */ out = *(obj + offset) @@ -6919,7 +7061,7 @@ void InstructionCodeGeneratorMIPS::GenerateGcRootFieldLoad(HInstruction* instruc __ AddUpper(base, obj, offset_high); } MipsLabel skip_call; - __ Beqz(T9, &skip_call, /* is_bare */ true); + __ Beqz(T9, &skip_call, /* is_bare= */ true); if (label_low != nullptr) { DCHECK(short_offset); __ Bind(label_low); @@ -7074,11 +7216,11 @@ void CodeGeneratorMIPS::GenerateFieldLoadWithBakerReadBarrier(HInstruction* inst MipsLabel skip_call; if (short_offset) { if (isR6) { - __ Beqzc(T9, &skip_call, /* is_bare */ true); + __ Beqzc(T9, &skip_call, /* is_bare= */ true); __ Nop(); // In forbidden slot. __ Jialc(T9, thunk_disp); } else { - __ Beqz(T9, &skip_call, /* is_bare */ true); + __ Beqz(T9, &skip_call, /* is_bare= */ true); __ Addiu(T9, T9, thunk_disp); // In delay slot. __ Jalr(T9); __ Nop(); // In delay slot. @@ -7086,13 +7228,13 @@ void CodeGeneratorMIPS::GenerateFieldLoadWithBakerReadBarrier(HInstruction* inst __ Bind(&skip_call); } else { if (isR6) { - __ Beqz(T9, &skip_call, /* is_bare */ true); + __ Beqz(T9, &skip_call, /* is_bare= */ true); __ Aui(base, obj, offset_high); // In delay slot. __ Jialc(T9, thunk_disp); __ Bind(&skip_call); } else { __ Lui(base, offset_high); - __ Beqz(T9, &skip_call, /* is_bare */ true); + __ Beqz(T9, &skip_call, /* is_bare= */ true); __ Addiu(T9, T9, thunk_disp); // In delay slot. __ Jalr(T9); __ Bind(&skip_call); @@ -7169,7 +7311,7 @@ void CodeGeneratorMIPS::GenerateArrayLoadWithBakerReadBarrier(HInstruction* inst // We will not do the explicit null check in the thunk as some form of a null check // must've been done earlier. DCHECK(!needs_null_check); - const int thunk_disp = GetBakerMarkFieldArrayThunkDisplacement(obj, /* short_offset */ false); + const int thunk_disp = GetBakerMarkFieldArrayThunkDisplacement(obj, /* short_offset= */ false); // Loading the entrypoint does not require a load acquire since it is only changed when // threads are suspended or running a checkpoint. __ LoadFromOffset(kLoadWord, T9, TR, entry_point_offset); @@ -7179,13 +7321,13 @@ void CodeGeneratorMIPS::GenerateArrayLoadWithBakerReadBarrier(HInstruction* inst : index.AsRegister<Register>(); MipsLabel skip_call; if (GetInstructionSetFeatures().IsR6()) { - __ Beqz(T9, &skip_call, /* is_bare */ true); + __ Beqz(T9, &skip_call, /* is_bare= */ true); __ Lsa(TMP, index_reg, obj, scale_factor); // In delay slot. __ Jialc(T9, thunk_disp); __ Bind(&skip_call); } else { __ Sll(TMP, index_reg, scale_factor); - __ Beqz(T9, &skip_call, /* is_bare */ true); + __ Beqz(T9, &skip_call, /* is_bare= */ true); __ Addiu(T9, T9, thunk_disp); // In delay slot. __ Jalr(T9); __ Bind(&skip_call); @@ -7300,7 +7442,7 @@ void CodeGeneratorMIPS::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* ReadBarrierMarkAndUpdateFieldSlowPathMIPS(instruction, ref, obj, - /* field_offset */ index, + /* field_offset= */ index, temp_reg); } else { slow_path = new (GetScopedAllocator()) ReadBarrierMarkSlowPathMIPS(instruction, ref); @@ -7312,7 +7454,7 @@ void CodeGeneratorMIPS::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* // Given the numeric representation, it's enough to check the low bit of the // rb_state. We do that by shifting the bit into the sign bit (31) and // performing a branch on less than zero. - static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0"); + static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0"); static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); static_assert(LockWord::kReadBarrierStateSize == 1, "Expecting 1-bit read barrier state size"); __ Sll(temp_reg, temp_reg, 31 - LockWord::kReadBarrierStateShift); @@ -7401,6 +7543,8 @@ void LocationsBuilderMIPS::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kInterfaceCheck: call_kind = LocationSummary::kCallOnSlowPath; break; + case TypeCheckKind::kBitstringCheck: + break; } LocationSummary* locations = @@ -7409,7 +7553,13 @@ void LocationsBuilderMIPS::VisitInstanceOf(HInstanceOf* instruction) { locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. } locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); + if (type_check_kind == TypeCheckKind::kBitstringCheck) { + locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); + locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); + locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); + } else { + locations->SetInAt(1, Location::RequiresRegister()); + } // The output does overlap inputs. // Note that TypeCheckSlowPathMIPS uses this register too. locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); @@ -7421,7 +7571,7 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) { LocationSummary* locations = instruction->GetLocations(); Location obj_loc = locations->InAt(0); Register obj = obj_loc.AsRegister<Register>(); - Register cls = locations->InAt(1).AsRegister<Register>(); + Location cls = locations->InAt(1); Location out_loc = locations->Out(); Register out = out_loc.AsRegister<Register>(); const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind); @@ -7453,7 +7603,7 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) { maybe_temp_loc, read_barrier_option); // Classes must be equal for the instanceof to succeed. - __ Xor(out, out, cls); + __ Xor(out, out, cls.AsRegister<Register>()); __ Sltiu(out, out, 1); break; } @@ -7480,7 +7630,7 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) { read_barrier_option); // If `out` is null, we use it for the result, and jump to `done`. __ Beqz(out, &done); - __ Bne(out, cls, &loop); + __ Bne(out, cls.AsRegister<Register>(), &loop); __ LoadConst32(out, 1); break; } @@ -7498,7 +7648,7 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) { // Walk over the class hierarchy to find a match. MipsLabel loop, success; __ Bind(&loop); - __ Beq(out, cls, &success); + __ Beq(out, cls.AsRegister<Register>(), &success); // /* HeapReference<Class> */ out = out->super_class_ GenerateReferenceLoadOneRegister(instruction, out_loc, @@ -7525,7 +7675,7 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) { read_barrier_option); // Do an exact check. MipsLabel success; - __ Beq(out, cls, &success); + __ Beq(out, cls.AsRegister<Register>(), &success); // Otherwise, we need to check that the object's class is a non-primitive array. // /* HeapReference<Class> */ out = out->component_type_ GenerateReferenceLoadOneRegister(instruction, @@ -7555,9 +7705,9 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) { kWithoutReadBarrier); DCHECK(locations->OnlyCallsOnSlowPath()); slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathMIPS( - instruction, /* is_fatal */ false); + instruction, /* is_fatal= */ false); codegen_->AddSlowPath(slow_path); - __ Bne(out, cls, slow_path->GetEntryLabel()); + __ Bne(out, cls.AsRegister<Register>(), slow_path->GetEntryLabel()); __ LoadConst32(out, 1); break; } @@ -7584,11 +7734,25 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) { // This should also be beneficial for the other cases above. DCHECK(locations->OnlyCallsOnSlowPath()); slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathMIPS( - instruction, /* is_fatal */ false); + instruction, /* is_fatal= */ false); codegen_->AddSlowPath(slow_path); __ B(slow_path->GetEntryLabel()); break; } + + case TypeCheckKind::kBitstringCheck: { + // /* HeapReference<Class> */ temp = obj->klass_ + GenerateReferenceLoadTwoRegisters(instruction, + out_loc, + obj_loc, + class_offset, + maybe_temp_loc, + kWithoutReadBarrier); + + GenerateBitstringTypeCheckCompare(instruction, out); + __ Sltiu(out, out, 1); + break; + } } __ Bind(&done); @@ -7712,6 +7876,14 @@ void InstructionCodeGeneratorMIPS::VisitInvokePolymorphic(HInvokePolymorphic* in codegen_->GenerateInvokePolymorphicCall(invoke); } +void LocationsBuilderMIPS::VisitInvokeCustom(HInvokeCustom* invoke) { + HandleInvoke(invoke); +} + +void InstructionCodeGeneratorMIPS::VisitInvokeCustom(HInvokeCustom* invoke) { + codegen_->GenerateInvokeCustomCall(invoke); +} + static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorMIPS* codegen) { if (invoke->GetLocations()->Intrinsified()) { IntrinsicCodeGeneratorMIPS intrinsic(codegen); @@ -7725,14 +7897,14 @@ HLoadString::LoadKind CodeGeneratorMIPS::GetSupportedLoadStringKind( HLoadString::LoadKind desired_string_load_kind) { switch (desired_string_load_kind) { case HLoadString::LoadKind::kBootImageLinkTimePcRelative: - case HLoadString::LoadKind::kBootImageInternTable: + case HLoadString::LoadKind::kBootImageRelRo: case HLoadString::LoadKind::kBssEntry: DCHECK(!Runtime::Current()->UseJitCompilation()); break; + case HLoadString::LoadKind::kJitBootImageAddress: case HLoadString::LoadKind::kJitTableAddress: DCHECK(Runtime::Current()->UseJitCompilation()); break; - case HLoadString::LoadKind::kBootImageAddress: case HLoadString::LoadKind::kRuntimeCall: break; } @@ -7748,14 +7920,14 @@ HLoadClass::LoadKind CodeGeneratorMIPS::GetSupportedLoadClassKind( case HLoadClass::LoadKind::kReferrersClass: break; case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: - case HLoadClass::LoadKind::kBootImageClassTable: + case HLoadClass::LoadKind::kBootImageRelRo: case HLoadClass::LoadKind::kBssEntry: DCHECK(!Runtime::Current()->UseJitCompilation()); break; + case HLoadClass::LoadKind::kJitBootImageAddress: case HLoadClass::LoadKind::kJitTableAddress: DCHECK(Runtime::Current()->UseJitCompilation()); break; - case HLoadClass::LoadKind::kBootImageAddress: case HLoadClass::LoadKind::kRuntimeCall: break; } @@ -7792,7 +7964,7 @@ Register CodeGeneratorMIPS::GetInvokeStaticOrDirectExtraParameter(HInvokeStaticO HInvokeStaticOrDirect::DispatchInfo CodeGeneratorMIPS::GetSupportedInvokeStaticOrDirectDispatch( const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, - HInvokeStaticOrDirect* invoke ATTRIBUTE_UNUSED) { + ArtMethod* method ATTRIBUTE_UNUSED) { return desired_dispatch_info; } @@ -7829,12 +8001,18 @@ void CodeGeneratorMIPS::GenerateStaticOrDirectCall( NewBootImageMethodPatch(invoke->GetTargetMethod(), info_high); Register temp_reg = temp.AsRegister<Register>(); EmitPcRelativeAddressPlaceholderHigh(info_high, TMP, base_reg); - __ Addiu(temp_reg, TMP, /* placeholder */ 0x5678, &info_low->label); + __ Addiu(temp_reg, TMP, /* imm16= */ 0x5678, &info_low->label); break; } - case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: - __ LoadConst32(temp.AsRegister<Register>(), invoke->GetMethodAddress()); + case HInvokeStaticOrDirect::MethodLoadKind::kBootImageRelRo: { + uint32_t boot_image_offset = GetBootImageOffset(invoke); + PcRelativePatchInfo* info_high = NewBootImageRelRoPatch(boot_image_offset); + PcRelativePatchInfo* info_low = NewBootImageRelRoPatch(boot_image_offset, info_high); + Register temp_reg = temp.AsRegister<Register>(); + EmitPcRelativeAddressPlaceholderHigh(info_high, TMP, base_reg); + __ Lw(temp_reg, TMP, /* imm16= */ 0x5678, &info_low->label); break; + } case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: { PcRelativePatchInfo* info_high = NewMethodBssEntryPatch( MethodReference(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex())); @@ -7842,9 +8020,12 @@ void CodeGeneratorMIPS::GenerateStaticOrDirectCall( MethodReference(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex()), info_high); Register temp_reg = temp.AsRegister<Register>(); EmitPcRelativeAddressPlaceholderHigh(info_high, TMP, base_reg); - __ Lw(temp_reg, TMP, /* placeholder */ 0x5678, &info_low->label); + __ Lw(temp_reg, TMP, /* imm16= */ 0x5678, &info_low->label); break; } + case HInvokeStaticOrDirect::MethodLoadKind::kJitDirectAddress: + __ LoadConst32(temp.AsRegister<Register>(), invoke->GetMethodAddress()); + break; case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall: { GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path); return; // No code pointer retrieval; the runtime performs the call directly. @@ -7955,14 +8136,14 @@ void LocationsBuilderMIPS::VisitLoadClass(HLoadClass* cls) { switch (load_kind) { // We need an extra register for PC-relative literals on R2. case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: - case HLoadClass::LoadKind::kBootImageAddress: - case HLoadClass::LoadKind::kBootImageClassTable: + case HLoadClass::LoadKind::kBootImageRelRo: case HLoadClass::LoadKind::kBssEntry: + case HLoadClass::LoadKind::kJitBootImageAddress: if (isR6) { break; } if (has_irreducible_loops) { - if (load_kind != HLoadClass::LoadKind::kBootImageAddress) { + if (load_kind != HLoadClass::LoadKind::kJitBootImageAddress) { codegen_->ClobberRA(); } break; @@ -7978,10 +8159,7 @@ void LocationsBuilderMIPS::VisitLoadClass(HLoadClass* cls) { if (load_kind == HLoadClass::LoadKind::kBssEntry) { if (!kUseReadBarrier || kUseBakerReadBarrier) { // Rely on the type resolution or initialization and marking to save everything we need. - RegisterSet caller_saves = RegisterSet::Empty(); - InvokeRuntimeCallingConvention calling_convention; - caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->SetCustomSlowPathCallerSaves(caller_saves); + locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves()); } else { // For non-Baker read barriers we have a temp-clobbering call. } @@ -8007,9 +8185,9 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAF switch (load_kind) { // We need an extra register for PC-relative literals on R2. case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: - case HLoadClass::LoadKind::kBootImageAddress: - case HLoadClass::LoadKind::kBootImageClassTable: + case HLoadClass::LoadKind::kBootImageRelRo: case HLoadClass::LoadKind::kBssEntry: + case HLoadClass::LoadKind::kJitBootImageAddress: base_or_current_method_reg = (isR6 || has_irreducible_loops) ? ZERO : locations->InAt(0).AsRegister<Register>(); break; @@ -8048,39 +8226,20 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAF codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high, out, base_or_current_method_reg); - __ Addiu(out, out, /* placeholder */ 0x5678, &info_low->label); + __ Addiu(out, out, /* imm16= */ 0x5678, &info_low->label); break; } - case HLoadClass::LoadKind::kBootImageAddress: { - DCHECK_EQ(read_barrier_option, kWithoutReadBarrier); - uint32_t address = dchecked_integral_cast<uint32_t>( - reinterpret_cast<uintptr_t>(cls->GetClass().Get())); - DCHECK_NE(address, 0u); - if (isR6 || !has_irreducible_loops) { - __ LoadLiteral(out, - base_or_current_method_reg, - codegen_->DeduplicateBootImageAddressLiteral(address)); - } else { - __ LoadConst32(out, address); - } - break; - } - case HLoadClass::LoadKind::kBootImageClassTable: { + case HLoadClass::LoadKind::kBootImageRelRo: { DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); + uint32_t boot_image_offset = codegen_->GetBootImageOffset(cls); CodeGeneratorMIPS::PcRelativePatchInfo* info_high = - codegen_->NewBootImageTypePatch(cls->GetDexFile(), cls->GetTypeIndex()); + codegen_->NewBootImageRelRoPatch(boot_image_offset); CodeGeneratorMIPS::PcRelativePatchInfo* info_low = - codegen_->NewBootImageTypePatch(cls->GetDexFile(), cls->GetTypeIndex(), info_high); + codegen_->NewBootImageRelRoPatch(boot_image_offset, info_high); codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high, out, base_or_current_method_reg); - __ Lw(out, out, /* placeholder */ 0x5678, &info_low->label); - // Extract the reference from the slot data, i.e. clear the hash bits. - int32_t masked_hash = ClassTable::TableSlot::MaskHash( - ComputeModifiedUtf8Hash(cls->GetDexFile().StringByTypeIdx(cls->GetTypeIndex()))); - if (masked_hash != 0) { - __ Addiu(out, out, -masked_hash); - } + __ Lw(out, out, /* imm16= */ 0x5678, &info_low->label); break; } case HLoadClass::LoadKind::kBssEntry: { @@ -8094,24 +8253,37 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAF GenerateGcRootFieldLoad(cls, out_loc, out, - /* placeholder */ 0x5678, + /* offset= */ 0x5678, read_barrier_option, &info_low->label); generate_null_check = true; break; } + case HLoadClass::LoadKind::kJitBootImageAddress: { + DCHECK_EQ(read_barrier_option, kWithoutReadBarrier); + uint32_t address = reinterpret_cast32<uint32_t>(cls->GetClass().Get()); + DCHECK_NE(address, 0u); + if (isR6 || !has_irreducible_loops) { + __ LoadLiteral(out, + base_or_current_method_reg, + codegen_->DeduplicateBootImageAddressLiteral(address)); + } else { + __ LoadConst32(out, address); + } + break; + } case HLoadClass::LoadKind::kJitTableAddress: { CodeGeneratorMIPS::JitPatchInfo* info = codegen_->NewJitRootClassPatch(cls->GetDexFile(), cls->GetTypeIndex(), cls->GetClass()); bool reordering = __ SetReorder(false); __ Bind(&info->high_label); - __ Lui(out, /* placeholder */ 0x1234); + __ Lui(out, /* imm16= */ 0x1234); __ SetReorder(reordering); GenerateGcRootFieldLoad(cls, out_loc, out, - /* placeholder */ 0x5678, + /* offset= */ 0x5678, read_barrier_option, &info->low_label); break; @@ -8124,8 +8296,8 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAF if (generate_null_check || cls->MustGenerateClinitCheck()) { DCHECK(cls->CanCallRuntime()); - SlowPathCodeMIPS* slow_path = new (codegen_->GetScopedAllocator()) LoadClassSlowPathMIPS( - cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck()); + SlowPathCodeMIPS* slow_path = + new (codegen_->GetScopedAllocator()) LoadClassSlowPathMIPS(cls, cls); codegen_->AddSlowPath(slow_path); if (generate_null_check) { __ Beqz(out, slow_path->GetEntryLabel()); @@ -8138,6 +8310,26 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAF } } +void LocationsBuilderMIPS::VisitLoadMethodHandle(HLoadMethodHandle* load) { + InvokeRuntimeCallingConvention calling_convention; + Location loc = Location::RegisterLocation(calling_convention.GetRegisterAt(0)); + CodeGenerator::CreateLoadMethodHandleRuntimeCallLocationSummary(load, loc, loc); +} + +void InstructionCodeGeneratorMIPS::VisitLoadMethodHandle(HLoadMethodHandle* load) { + codegen_->GenerateLoadMethodHandleRuntimeCall(load); +} + +void LocationsBuilderMIPS::VisitLoadMethodType(HLoadMethodType* load) { + InvokeRuntimeCallingConvention calling_convention; + Location loc = Location::RegisterLocation(calling_convention.GetRegisterAt(0)); + CodeGenerator::CreateLoadMethodTypeRuntimeCallLocationSummary(load, loc, loc); +} + +void InstructionCodeGeneratorMIPS::VisitLoadMethodType(HLoadMethodType* load) { + codegen_->GenerateLoadMethodTypeRuntimeCall(load); +} + static int32_t GetExceptionTlsOffset() { return Thread::ExceptionOffset<kMipsPointerSize>().Int32Value(); } @@ -8169,15 +8361,15 @@ void LocationsBuilderMIPS::VisitLoadString(HLoadString* load) { const bool has_irreducible_loops = codegen_->GetGraph()->HasIrreducibleLoops(); switch (load_kind) { // We need an extra register for PC-relative literals on R2. - case HLoadString::LoadKind::kBootImageAddress: case HLoadString::LoadKind::kBootImageLinkTimePcRelative: - case HLoadString::LoadKind::kBootImageInternTable: + case HLoadString::LoadKind::kBootImageRelRo: case HLoadString::LoadKind::kBssEntry: + case HLoadString::LoadKind::kJitBootImageAddress: if (isR6) { break; } if (has_irreducible_loops) { - if (load_kind != HLoadString::LoadKind::kBootImageAddress) { + if (load_kind != HLoadString::LoadKind::kJitBootImageAddress) { codegen_->ClobberRA(); } break; @@ -8198,10 +8390,7 @@ void LocationsBuilderMIPS::VisitLoadString(HLoadString* load) { if (load_kind == HLoadString::LoadKind::kBssEntry) { if (!kUseReadBarrier || kUseBakerReadBarrier) { // Rely on the pResolveString and marking to save everything we need. - RegisterSet caller_saves = RegisterSet::Empty(); - InvokeRuntimeCallingConvention calling_convention; - caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->SetCustomSlowPathCallerSaves(caller_saves); + locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves()); } else { // For non-Baker read barriers we have a temp-clobbering call. } @@ -8221,10 +8410,10 @@ void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) NO_THREAD_ bool has_irreducible_loops = GetGraph()->HasIrreducibleLoops(); switch (load_kind) { // We need an extra register for PC-relative literals on R2. - case HLoadString::LoadKind::kBootImageAddress: case HLoadString::LoadKind::kBootImageLinkTimePcRelative: - case HLoadString::LoadKind::kBootImageInternTable: + case HLoadString::LoadKind::kBootImageRelRo: case HLoadString::LoadKind::kBssEntry: + case HLoadString::LoadKind::kJitBootImageAddress: base_or_current_method_reg = (isR6 || has_irreducible_loops) ? ZERO : locations->InAt(0).AsRegister<Register>(); break; @@ -8243,36 +8432,23 @@ void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) NO_THREAD_ codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high, out, base_or_current_method_reg); - __ Addiu(out, out, /* placeholder */ 0x5678, &info_low->label); + __ Addiu(out, out, /* imm16= */ 0x5678, &info_low->label); return; } - case HLoadString::LoadKind::kBootImageAddress: { - uint32_t address = dchecked_integral_cast<uint32_t>( - reinterpret_cast<uintptr_t>(load->GetString().Get())); - DCHECK_NE(address, 0u); - if (isR6 || !has_irreducible_loops) { - __ LoadLiteral(out, - base_or_current_method_reg, - codegen_->DeduplicateBootImageAddressLiteral(address)); - } else { - __ LoadConst32(out, address); - } - return; - } - case HLoadString::LoadKind::kBootImageInternTable: { + case HLoadString::LoadKind::kBootImageRelRo: { DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); + uint32_t boot_image_offset = codegen_->GetBootImageOffset(load); CodeGeneratorMIPS::PcRelativePatchInfo* info_high = - codegen_->NewBootImageStringPatch(load->GetDexFile(), load->GetStringIndex()); + codegen_->NewBootImageRelRoPatch(boot_image_offset); CodeGeneratorMIPS::PcRelativePatchInfo* info_low = - codegen_->NewBootImageStringPatch(load->GetDexFile(), load->GetStringIndex(), info_high); + codegen_->NewBootImageRelRoPatch(boot_image_offset, info_high); codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high, out, base_or_current_method_reg); - __ Lw(out, out, /* placeholder */ 0x5678, &info_low->label); + __ Lw(out, out, /* imm16= */ 0x5678, &info_low->label); return; } case HLoadString::LoadKind::kBssEntry: { - DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); CodeGeneratorMIPS::PcRelativePatchInfo* info_high = codegen_->NewStringBssEntryPatch(load->GetDexFile(), load->GetStringIndex()); CodeGeneratorMIPS::PcRelativePatchInfo* info_low = @@ -8283,7 +8459,7 @@ void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) NO_THREAD_ GenerateGcRootFieldLoad(load, out_loc, out, - /* placeholder */ 0x5678, + /* offset= */ 0x5678, kCompilerReadBarrierOption, &info_low->label); SlowPathCodeMIPS* slow_path = @@ -8293,6 +8469,18 @@ void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) NO_THREAD_ __ Bind(slow_path->GetExitLabel()); return; } + case HLoadString::LoadKind::kJitBootImageAddress: { + uint32_t address = reinterpret_cast32<uint32_t>(load->GetString().Get()); + DCHECK_NE(address, 0u); + if (isR6 || !has_irreducible_loops) { + __ LoadLiteral(out, + base_or_current_method_reg, + codegen_->DeduplicateBootImageAddressLiteral(address)); + } else { + __ LoadConst32(out, address); + } + return; + } case HLoadString::LoadKind::kJitTableAddress: { CodeGeneratorMIPS::JitPatchInfo* info = codegen_->NewJitRootStringPatch(load->GetDexFile(), @@ -8300,12 +8488,12 @@ void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) NO_THREAD_ load->GetString()); bool reordering = __ SetReorder(false); __ Bind(&info->high_label); - __ Lui(out, /* placeholder */ 0x1234); + __ Lui(out, /* imm16= */ 0x1234); __ SetReorder(reordering); GenerateGcRootFieldLoad(load, out_loc, out, - /* placeholder */ 0x5678, + /* offset= */ 0x5678, kCompilerReadBarrierOption, &info->low_label); return; @@ -8513,10 +8701,8 @@ void LocationsBuilderMIPS::VisitNewArray(HNewArray* instruction) { } void InstructionCodeGeneratorMIPS::VisitNewArray(HNewArray* instruction) { - // Note: if heap poisoning is enabled, the entry point takes care - // of poisoning the reference. - QuickEntrypointEnum entrypoint = - CodeGenerator::GetArrayAllocationEntrypoint(instruction->GetLoadClass()->GetClass()); + // Note: if heap poisoning is enabled, the entry point takes care of poisoning the reference. + QuickEntrypointEnum entrypoint = CodeGenerator::GetArrayAllocationEntrypoint(instruction); codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc()); CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>(); DCHECK(!codegen_->IsLeafMethod()); @@ -8526,30 +8712,13 @@ void LocationsBuilderMIPS::VisitNewInstance(HNewInstance* instruction) { LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( instruction, LocationSummary::kCallOnMainOnly); InvokeRuntimeCallingConvention calling_convention; - if (instruction->IsStringAlloc()) { - locations->AddTemp(Location::RegisterLocation(kMethodRegisterArgument)); - } else { - locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - } + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kReference)); } void InstructionCodeGeneratorMIPS::VisitNewInstance(HNewInstance* instruction) { - // Note: if heap poisoning is enabled, the entry point takes care - // of poisoning the reference. - if (instruction->IsStringAlloc()) { - // String is allocated through StringFactory. Call NewEmptyString entry point. - Register temp = instruction->GetLocations()->GetTemp(0).AsRegister<Register>(); - MemberOffset code_offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kMipsPointerSize); - __ LoadFromOffset(kLoadWord, temp, TR, QUICK_ENTRY_POINT(pNewEmptyString)); - __ LoadFromOffset(kLoadWord, T9, temp, code_offset.Int32Value()); - __ Jalr(T9); - __ NopIfNoReordering(); - codegen_->RecordPcInfo(instruction, instruction->GetDexPc()); - } else { - codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc()); - CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); - } + codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc()); + CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); } void LocationsBuilderMIPS::VisitNot(HNot* instruction) { @@ -8779,6 +8948,501 @@ void InstructionCodeGeneratorMIPS::VisitRem(HRem* instruction) { } } +static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) { + LocationSummary* locations = new (allocator) LocationSummary(minmax); + switch (minmax->GetResultType()) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister(), Location::kOutputOverlap); + break; + default: + LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType(); + } +} + +void InstructionCodeGeneratorMIPS::GenerateMinMaxInt(LocationSummary* locations, + bool is_min, + bool isR6, + DataType::Type type) { + if (isR6) { + // Some architectures, such as ARM and MIPS (prior to r6), have a + // conditional move instruction which only changes the target + // (output) register if the condition is true (MIPS prior to r6 had + // MOVF, MOVT, MOVN, and MOVZ). The SELEQZ and SELNEZ instructions + // always change the target (output) register. If the condition is + // true the output register gets the contents of the "rs" register; + // otherwise, the output register is set to zero. One consequence + // of this is that to implement something like "rd = c==0 ? rs : rt" + // MIPS64r6 needs to use a pair of SELEQZ/SELNEZ instructions. + // After executing this pair of instructions one of the output + // registers from the pair will necessarily contain zero. Then the + // code ORs the output registers from the SELEQZ/SELNEZ instructions + // to get the final result. + // + // The initial test to see if the output register is same as the + // first input register is needed to make sure that value in the + // first input register isn't clobbered before we've finished + // computing the output value. The logic in the corresponding else + // clause performs the same task but makes sure the second input + // register isn't clobbered in the event that it's the same register + // as the output register; the else clause also handles the case + // where the output register is distinct from both the first, and the + // second input registers. + if (type == DataType::Type::kInt64) { + Register a_lo = locations->InAt(0).AsRegisterPairLow<Register>(); + Register a_hi = locations->InAt(0).AsRegisterPairHigh<Register>(); + Register b_lo = locations->InAt(1).AsRegisterPairLow<Register>(); + Register b_hi = locations->InAt(1).AsRegisterPairHigh<Register>(); + Register out_lo = locations->Out().AsRegisterPairLow<Register>(); + Register out_hi = locations->Out().AsRegisterPairHigh<Register>(); + + MipsLabel compare_done; + + if (a_lo == b_lo) { + if (out_lo != a_lo) { + __ Move(out_lo, a_lo); + __ Move(out_hi, a_hi); + } + } else { + __ Slt(TMP, b_hi, a_hi); + __ Bne(b_hi, a_hi, &compare_done); + + __ Sltu(TMP, b_lo, a_lo); + + __ Bind(&compare_done); + + if (is_min) { + __ Seleqz(AT, a_lo, TMP); + __ Selnez(out_lo, b_lo, TMP); // Safe even if out_lo == a_lo/b_lo + // because at this point we're + // done using a_lo/b_lo. + } else { + __ Selnez(AT, a_lo, TMP); + __ Seleqz(out_lo, b_lo, TMP); // ditto + } + __ Or(out_lo, out_lo, AT); + if (is_min) { + __ Seleqz(AT, a_hi, TMP); + __ Selnez(out_hi, b_hi, TMP); // ditto but for out_hi & a_hi/b_hi + } else { + __ Selnez(AT, a_hi, TMP); + __ Seleqz(out_hi, b_hi, TMP); // ditto but for out_hi & a_hi/b_hi + } + __ Or(out_hi, out_hi, AT); + } + } else { + DCHECK_EQ(type, DataType::Type::kInt32); + Register a = locations->InAt(0).AsRegister<Register>(); + Register b = locations->InAt(1).AsRegister<Register>(); + Register out = locations->Out().AsRegister<Register>(); + + if (a == b) { + if (out != a) { + __ Move(out, a); + } + } else { + __ Slt(AT, b, a); + if (is_min) { + __ Seleqz(TMP, a, AT); + __ Selnez(AT, b, AT); + } else { + __ Selnez(TMP, a, AT); + __ Seleqz(AT, b, AT); + } + __ Or(out, TMP, AT); + } + } + } else { // !isR6 + if (type == DataType::Type::kInt64) { + Register a_lo = locations->InAt(0).AsRegisterPairLow<Register>(); + Register a_hi = locations->InAt(0).AsRegisterPairHigh<Register>(); + Register b_lo = locations->InAt(1).AsRegisterPairLow<Register>(); + Register b_hi = locations->InAt(1).AsRegisterPairHigh<Register>(); + Register out_lo = locations->Out().AsRegisterPairLow<Register>(); + Register out_hi = locations->Out().AsRegisterPairHigh<Register>(); + + MipsLabel compare_done; + + if (a_lo == b_lo) { + if (out_lo != a_lo) { + __ Move(out_lo, a_lo); + __ Move(out_hi, a_hi); + } + } else { + __ Slt(TMP, a_hi, b_hi); + __ Bne(a_hi, b_hi, &compare_done); + + __ Sltu(TMP, a_lo, b_lo); + + __ Bind(&compare_done); + + if (is_min) { + if (out_lo != a_lo) { + __ Movn(out_hi, a_hi, TMP); + __ Movn(out_lo, a_lo, TMP); + } + if (out_lo != b_lo) { + __ Movz(out_hi, b_hi, TMP); + __ Movz(out_lo, b_lo, TMP); + } + } else { + if (out_lo != a_lo) { + __ Movz(out_hi, a_hi, TMP); + __ Movz(out_lo, a_lo, TMP); + } + if (out_lo != b_lo) { + __ Movn(out_hi, b_hi, TMP); + __ Movn(out_lo, b_lo, TMP); + } + } + } + } else { + DCHECK_EQ(type, DataType::Type::kInt32); + Register a = locations->InAt(0).AsRegister<Register>(); + Register b = locations->InAt(1).AsRegister<Register>(); + Register out = locations->Out().AsRegister<Register>(); + + if (a == b) { + if (out != a) { + __ Move(out, a); + } + } else { + __ Slt(AT, a, b); + if (is_min) { + if (out != a) { + __ Movn(out, a, AT); + } + if (out != b) { + __ Movz(out, b, AT); + } + } else { + if (out != a) { + __ Movz(out, a, AT); + } + if (out != b) { + __ Movn(out, b, AT); + } + } + } + } + } +} + +void InstructionCodeGeneratorMIPS::GenerateMinMaxFP(LocationSummary* locations, + bool is_min, + bool isR6, + DataType::Type type) { + FRegister out = locations->Out().AsFpuRegister<FRegister>(); + FRegister a = locations->InAt(0).AsFpuRegister<FRegister>(); + FRegister b = locations->InAt(1).AsFpuRegister<FRegister>(); + + if (isR6) { + MipsLabel noNaNs; + MipsLabel done; + FRegister ftmp = ((out != a) && (out != b)) ? out : FTMP; + + // When Java computes min/max it prefers a NaN to a number; the + // behavior of MIPSR6 is to prefer numbers to NaNs, i.e., if one of + // the inputs is a NaN and the other is a valid number, the MIPS + // instruction will return the number; Java wants the NaN value + // returned. This is why there is extra logic preceding the use of + // the MIPS min.fmt/max.fmt instructions. If either a, or b holds a + // NaN, return the NaN, otherwise return the min/max. + if (type == DataType::Type::kFloat64) { + __ CmpUnD(FTMP, a, b); + __ Bc1eqz(FTMP, &noNaNs); + + // One of the inputs is a NaN + __ CmpEqD(ftmp, a, a); + // If a == a then b is the NaN, otherwise a is the NaN. + __ SelD(ftmp, a, b); + + if (ftmp != out) { + __ MovD(out, ftmp); + } + + __ B(&done); + + __ Bind(&noNaNs); + + if (is_min) { + __ MinD(out, a, b); + } else { + __ MaxD(out, a, b); + } + } else { + DCHECK_EQ(type, DataType::Type::kFloat32); + __ CmpUnS(FTMP, a, b); + __ Bc1eqz(FTMP, &noNaNs); + + // One of the inputs is a NaN + __ CmpEqS(ftmp, a, a); + // If a == a then b is the NaN, otherwise a is the NaN. + __ SelS(ftmp, a, b); + + if (ftmp != out) { + __ MovS(out, ftmp); + } + + __ B(&done); + + __ Bind(&noNaNs); + + if (is_min) { + __ MinS(out, a, b); + } else { + __ MaxS(out, a, b); + } + } + + __ Bind(&done); + + } else { // !isR6 + MipsLabel ordered; + MipsLabel compare; + MipsLabel select; + MipsLabel done; + + if (type == DataType::Type::kFloat64) { + __ CunD(a, b); + } else { + DCHECK_EQ(type, DataType::Type::kFloat32); + __ CunS(a, b); + } + __ Bc1f(&ordered); + + // a or b (or both) is a NaN. Return one, which is a NaN. + if (type == DataType::Type::kFloat64) { + __ CeqD(b, b); + } else { + __ CeqS(b, b); + } + __ B(&select); + + __ Bind(&ordered); + + // Neither is a NaN. + // a == b? (-0.0 compares equal with +0.0) + // If equal, handle zeroes, else compare further. + if (type == DataType::Type::kFloat64) { + __ CeqD(a, b); + } else { + __ CeqS(a, b); + } + __ Bc1f(&compare); + + // a == b either bit for bit or one is -0.0 and the other is +0.0. + if (type == DataType::Type::kFloat64) { + __ MoveFromFpuHigh(TMP, a); + __ MoveFromFpuHigh(AT, b); + } else { + __ Mfc1(TMP, a); + __ Mfc1(AT, b); + } + + if (is_min) { + // -0.0 prevails over +0.0. + __ Or(TMP, TMP, AT); + } else { + // +0.0 prevails over -0.0. + __ And(TMP, TMP, AT); + } + + if (type == DataType::Type::kFloat64) { + __ Mfc1(AT, a); + __ Mtc1(AT, out); + __ MoveToFpuHigh(TMP, out); + } else { + __ Mtc1(TMP, out); + } + __ B(&done); + + __ Bind(&compare); + + if (type == DataType::Type::kFloat64) { + if (is_min) { + // return (a <= b) ? a : b; + __ ColeD(a, b); + } else { + // return (a >= b) ? a : b; + __ ColeD(b, a); // b <= a + } + } else { + if (is_min) { + // return (a <= b) ? a : b; + __ ColeS(a, b); + } else { + // return (a >= b) ? a : b; + __ ColeS(b, a); // b <= a + } + } + + __ Bind(&select); + + if (type == DataType::Type::kFloat64) { + __ MovtD(out, a); + __ MovfD(out, b); + } else { + __ MovtS(out, a); + __ MovfS(out, b); + } + + __ Bind(&done); + } +} + +void InstructionCodeGeneratorMIPS::GenerateMinMax(HBinaryOperation* minmax, bool is_min) { + bool isR6 = codegen_->GetInstructionSetFeatures().IsR6(); + DataType::Type type = minmax->GetResultType(); + switch (type) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: + GenerateMinMaxInt(minmax->GetLocations(), is_min, isR6, type); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + GenerateMinMaxFP(minmax->GetLocations(), is_min, isR6, type); + break; + default: + LOG(FATAL) << "Unexpected type for HMinMax " << type; + } +} + +void LocationsBuilderMIPS::VisitMin(HMin* min) { + CreateMinMaxLocations(GetGraph()->GetAllocator(), min); +} + +void InstructionCodeGeneratorMIPS::VisitMin(HMin* min) { + GenerateMinMax(min, /*is_min*/ true); +} + +void LocationsBuilderMIPS::VisitMax(HMax* max) { + CreateMinMaxLocations(GetGraph()->GetAllocator(), max); +} + +void InstructionCodeGeneratorMIPS::VisitMax(HMax* max) { + GenerateMinMax(max, /*is_min*/ false); +} + +void LocationsBuilderMIPS::VisitAbs(HAbs* abs) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs); + switch (abs->GetResultType()) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); + break; + default: + LOG(FATAL) << "Unexpected abs type " << abs->GetResultType(); + } +} + +void InstructionCodeGeneratorMIPS::GenerateAbsFP(LocationSummary* locations, + DataType::Type type, + bool isR2OrNewer, + bool isR6) { + FRegister in = locations->InAt(0).AsFpuRegister<FRegister>(); + FRegister out = locations->Out().AsFpuRegister<FRegister>(); + + // Note, as a "quality of implementation", rather than pure "spec compliance", we require that + // Math.abs() clears the sign bit (but changes nothing else) for all numbers, including NaN + // (signaling NaN may become quiet though). + // + // The ABS.fmt instructions (abs.s and abs.d) do exactly that when NAN2008=1 (R6). For this case, + // both regular floating point numbers and NAN values are treated alike, only the sign bit is + // affected by this instruction. + // But when NAN2008=0 (R2 and before), the ABS.fmt instructions can't be used. For this case, any + // NaN operand signals invalid operation. This means that other bits (not just sign bit) might be + // changed when doing abs(NaN). Because of that, we clear sign bit in a different way. + if (isR6) { + if (type == DataType::Type::kFloat64) { + __ AbsD(out, in); + } else { + DCHECK_EQ(type, DataType::Type::kFloat32); + __ AbsS(out, in); + } + } else { + if (type == DataType::Type::kFloat64) { + if (in != out) { + __ MovD(out, in); + } + __ MoveFromFpuHigh(TMP, in); + // ins instruction is not available for R1. + if (isR2OrNewer) { + __ Ins(TMP, ZERO, 31, 1); + } else { + __ Sll(TMP, TMP, 1); + __ Srl(TMP, TMP, 1); + } + __ MoveToFpuHigh(TMP, out); + } else { + DCHECK_EQ(type, DataType::Type::kFloat32); + __ Mfc1(TMP, in); + // ins instruction is not available for R1. + if (isR2OrNewer) { + __ Ins(TMP, ZERO, 31, 1); + } else { + __ Sll(TMP, TMP, 1); + __ Srl(TMP, TMP, 1); + } + __ Mtc1(TMP, out); + } + } +} + +void InstructionCodeGeneratorMIPS::VisitAbs(HAbs* abs) { + LocationSummary* locations = abs->GetLocations(); + bool isR2OrNewer = codegen_->GetInstructionSetFeatures().IsMipsIsaRevGreaterThanEqual2(); + bool isR6 = codegen_->GetInstructionSetFeatures().IsR6(); + switch (abs->GetResultType()) { + case DataType::Type::kInt32: { + Register in = locations->InAt(0).AsRegister<Register>(); + Register out = locations->Out().AsRegister<Register>(); + __ Sra(AT, in, 31); + __ Xor(out, in, AT); + __ Subu(out, out, AT); + break; + } + case DataType::Type::kInt64: { + Register in_lo = locations->InAt(0).AsRegisterPairLow<Register>(); + Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>(); + Register out_lo = locations->Out().AsRegisterPairLow<Register>(); + Register out_hi = locations->Out().AsRegisterPairHigh<Register>(); + // The comments in this section show the analogous operations which would + // be performed if we had 64-bit registers "in", and "out". + // __ Dsra32(AT, in, 31); + __ Sra(AT, in_hi, 31); + // __ Xor(out, in, AT); + __ Xor(TMP, in_lo, AT); + __ Xor(out_hi, in_hi, AT); + // __ Dsubu(out, out, AT); + __ Subu(out_lo, TMP, AT); + __ Sltu(TMP, out_lo, TMP); + __ Addu(out_hi, out_hi, TMP); + break; + } + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + GenerateAbsFP(locations, abs->GetResultType(), isR2OrNewer, isR6); + break; + default: + LOG(FATAL) << "Unexpected abs type " << abs->GetResultType(); + } +} + void LocationsBuilderMIPS::VisitConstructorFence(HConstructorFence* constructor_fence) { constructor_fence->SetLocations(nullptr); } diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h index c91cb62eda..50807310b6 100644 --- a/compiler/optimizing/code_generator_mips.h +++ b/compiler/optimizing/code_generator_mips.h @@ -81,9 +81,9 @@ class InvokeDexCallingConventionVisitorMIPS : public InvokeDexCallingConventionV InvokeDexCallingConventionVisitorMIPS() {} virtual ~InvokeDexCallingConventionVisitorMIPS() {} - Location GetNextLocation(DataType::Type type) OVERRIDE; - Location GetReturnLocation(DataType::Type type) const OVERRIDE; - Location GetMethodLocation() const OVERRIDE; + Location GetNextLocation(DataType::Type type) override; + Location GetReturnLocation(DataType::Type type) const override; + Location GetMethodLocation() const override; private: InvokeDexCallingConvention calling_convention; @@ -110,23 +110,23 @@ class FieldAccessCallingConventionMIPS : public FieldAccessCallingConvention { public: FieldAccessCallingConventionMIPS() {} - Location GetObjectLocation() const OVERRIDE { + Location GetObjectLocation() const override { return Location::RegisterLocation(A1); } - Location GetFieldIndexLocation() const OVERRIDE { + Location GetFieldIndexLocation() const override { return Location::RegisterLocation(A0); } - Location GetReturnLocation(DataType::Type type) const OVERRIDE { + Location GetReturnLocation(DataType::Type type) const override { return DataType::Is64BitType(type) ? Location::RegisterPairLocation(V0, V1) : Location::RegisterLocation(V0); } - Location GetSetValueLocation(DataType::Type type, bool is_instance) const OVERRIDE { + Location GetSetValueLocation(DataType::Type type, bool is_instance) const override { return DataType::Is64BitType(type) ? Location::RegisterPairLocation(A2, A3) : (is_instance ? Location::RegisterLocation(A2) : Location::RegisterLocation(A1)); } - Location GetFpuLocation(DataType::Type type ATTRIBUTE_UNUSED) const OVERRIDE { + Location GetFpuLocation(DataType::Type type ATTRIBUTE_UNUSED) const override { return Location::FpuRegisterLocation(F0); } @@ -139,10 +139,10 @@ class ParallelMoveResolverMIPS : public ParallelMoveResolverWithSwap { ParallelMoveResolverMIPS(ArenaAllocator* allocator, CodeGeneratorMIPS* codegen) : ParallelMoveResolverWithSwap(allocator), codegen_(codegen) {} - void EmitMove(size_t index) OVERRIDE; - void EmitSwap(size_t index) OVERRIDE; - void SpillScratch(int reg) OVERRIDE; - void RestoreScratch(int reg) OVERRIDE; + void EmitMove(size_t index) override; + void EmitSwap(size_t index) override; + void SpillScratch(int reg) override; + void RestoreScratch(int reg) override; void Exchange(int index1, int index2, bool double_slot); void ExchangeQuadSlots(int index1, int index2); @@ -176,14 +176,14 @@ class LocationsBuilderMIPS : public HGraphVisitor { : HGraphVisitor(graph), codegen_(codegen) {} #define DECLARE_VISIT_INSTRUCTION(name, super) \ - void Visit##name(H##name* instr) OVERRIDE; + void Visit##name(H##name* instr) override; FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION) FOR_EACH_CONCRETE_INSTRUCTION_MIPS(DECLARE_VISIT_INSTRUCTION) #undef DECLARE_VISIT_INSTRUCTION - void VisitInstruction(HInstruction* instruction) OVERRIDE { + void VisitInstruction(HInstruction* instruction) override { LOG(FATAL) << "Unreachable instruction " << instruction->DebugName() << " (id " << instruction->GetId() << ")"; } @@ -210,14 +210,14 @@ class InstructionCodeGeneratorMIPS : public InstructionCodeGenerator { InstructionCodeGeneratorMIPS(HGraph* graph, CodeGeneratorMIPS* codegen); #define DECLARE_VISIT_INSTRUCTION(name, super) \ - void Visit##name(H##name* instr) OVERRIDE; + void Visit##name(H##name* instr) override; FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION) FOR_EACH_CONCRETE_INSTRUCTION_MIPS(DECLARE_VISIT_INSTRUCTION) #undef DECLARE_VISIT_INSTRUCTION - void VisitInstruction(HInstruction* instruction) OVERRIDE { + void VisitInstruction(HInstruction* instruction) override { LOG(FATAL) << "Unreachable instruction " << instruction->DebugName() << " (id " << instruction->GetId() << ")"; } @@ -237,6 +237,7 @@ class InstructionCodeGeneratorMIPS : public InstructionCodeGenerator { private: void GenerateClassInitializationCheck(SlowPathCodeMIPS* slow_path, Register class_reg); void GenerateSuspendCheck(HSuspendCheck* check, HBasicBlock* successor); + void GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, Register temp); void HandleBinaryOp(HBinaryOperation* operation); void HandleCondition(HCondition* instruction); void HandleShift(HBinaryOperation* operation); @@ -246,6 +247,11 @@ class InstructionCodeGeneratorMIPS : public InstructionCodeGenerator { bool value_can_be_null); void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info, uint32_t dex_pc); + void GenerateMinMaxInt(LocationSummary* locations, bool is_min, bool isR6, DataType::Type type); + void GenerateMinMaxFP(LocationSummary* locations, bool is_min, bool isR6, DataType::Type type); + void GenerateMinMax(HBinaryOperation*, bool is_min); + void GenerateAbsFP(LocationSummary* locations, DataType::Type type, bool isR2OrNewer, bool isR6); + // Generate a heap reference load using one register `out`: // // out <- *(out + offset) @@ -364,40 +370,39 @@ class InstructionCodeGeneratorMIPS : public InstructionCodeGenerator { class CodeGeneratorMIPS : public CodeGenerator { public: CodeGeneratorMIPS(HGraph* graph, - const MipsInstructionSetFeatures& isa_features, const CompilerOptions& compiler_options, OptimizingCompilerStats* stats = nullptr); virtual ~CodeGeneratorMIPS() {} - void ComputeSpillMask() OVERRIDE; - bool HasAllocatedCalleeSaveRegisters() const OVERRIDE; - void GenerateFrameEntry() OVERRIDE; - void GenerateFrameExit() OVERRIDE; + void ComputeSpillMask() override; + bool HasAllocatedCalleeSaveRegisters() const override; + void GenerateFrameEntry() override; + void GenerateFrameExit() override; - void Bind(HBasicBlock* block) OVERRIDE; + void Bind(HBasicBlock* block) override; void MoveConstant(Location location, HConstant* c); - size_t GetWordSize() const OVERRIDE { return kMipsWordSize; } + size_t GetWordSize() const override { return kMipsWordSize; } - size_t GetFloatingPointSpillSlotSize() const OVERRIDE { + size_t GetFloatingPointSpillSlotSize() const override { return GetGraph()->HasSIMD() ? 2 * kMipsDoublewordSize // 16 bytes for each spill. : 1 * kMipsDoublewordSize; // 8 bytes for each spill. } - uintptr_t GetAddressOf(HBasicBlock* block) OVERRIDE { + uintptr_t GetAddressOf(HBasicBlock* block) override { return assembler_.GetLabelLocation(GetLabelOf(block)); } - HGraphVisitor* GetLocationBuilder() OVERRIDE { return &location_builder_; } - HGraphVisitor* GetInstructionVisitor() OVERRIDE { return &instruction_visitor_; } - MipsAssembler* GetAssembler() OVERRIDE { return &assembler_; } - const MipsAssembler& GetAssembler() const OVERRIDE { return assembler_; } + HGraphVisitor* GetLocationBuilder() override { return &location_builder_; } + HGraphVisitor* GetInstructionVisitor() override { return &instruction_visitor_; } + MipsAssembler* GetAssembler() override { return &assembler_; } + const MipsAssembler& GetAssembler() const override { return assembler_; } // Emit linker patches. - void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) OVERRIDE; - void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) OVERRIDE; + void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) override; + void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) override; // Fast path implementation of ReadBarrier::Barrier for a heap // reference field load when Baker's read barriers are used. @@ -488,48 +493,46 @@ class CodeGeneratorMIPS : public CodeGenerator { // Register allocation. - void SetupBlockedRegisters() const OVERRIDE; + void SetupBlockedRegisters() const override; - size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; - size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; - size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; - size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; + size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) override; + size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) override; + size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) override; + size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) override; void ClobberRA() { clobbered_ra_ = true; } - void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE; - void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE; + void DumpCoreRegister(std::ostream& stream, int reg) const override; + void DumpFloatingPointRegister(std::ostream& stream, int reg) const override; - InstructionSet GetInstructionSet() const OVERRIDE { return InstructionSet::kMips; } + InstructionSet GetInstructionSet() const override { return InstructionSet::kMips; } - const MipsInstructionSetFeatures& GetInstructionSetFeatures() const { - return isa_features_; - } + const MipsInstructionSetFeatures& GetInstructionSetFeatures() const; MipsLabel* GetLabelOf(HBasicBlock* block) const { return CommonGetLabelOf<MipsLabel>(block_labels_, block); } - void Initialize() OVERRIDE { + void Initialize() override { block_labels_ = CommonInitializeLabels<MipsLabel>(); } - void Finalize(CodeAllocator* allocator) OVERRIDE; + void Finalize(CodeAllocator* allocator) override; // Code generation helpers. - void MoveLocation(Location dst, Location src, DataType::Type dst_type) OVERRIDE; + void MoveLocation(Location dst, Location src, DataType::Type dst_type) override; - void MoveConstant(Location destination, int32_t value) OVERRIDE; + void MoveConstant(Location destination, int32_t value) override; - void AddLocationAsTemp(Location location, LocationSummary* locations) OVERRIDE; + void AddLocationAsTemp(Location location, LocationSummary* locations) override; // Generate code to invoke a runtime entry point. void InvokeRuntime(QuickEntrypointEnum entrypoint, HInstruction* instruction, uint32_t dex_pc, - SlowPathCode* slow_path = nullptr) OVERRIDE; + SlowPathCode* slow_path = nullptr) override; // Generate code to invoke a runtime entry point, but do not record // PC-related information in a stack map. @@ -540,41 +543,41 @@ class CodeGeneratorMIPS : public CodeGenerator { void GenerateInvokeRuntime(int32_t entry_point_offset, bool direct); - ParallelMoveResolver* GetMoveResolver() OVERRIDE { return &move_resolver_; } + ParallelMoveResolver* GetMoveResolver() override { return &move_resolver_; } - bool NeedsTwoRegisters(DataType::Type type) const OVERRIDE { + bool NeedsTwoRegisters(DataType::Type type) const override { return type == DataType::Type::kInt64; } // Check if the desired_string_load_kind is supported. If it is, return it, // otherwise return a fall-back kind that should be used instead. HLoadString::LoadKind GetSupportedLoadStringKind( - HLoadString::LoadKind desired_string_load_kind) OVERRIDE; + HLoadString::LoadKind desired_string_load_kind) override; // Check if the desired_class_load_kind is supported. If it is, return it, // otherwise return a fall-back kind that should be used instead. HLoadClass::LoadKind GetSupportedLoadClassKind( - HLoadClass::LoadKind desired_class_load_kind) OVERRIDE; + HLoadClass::LoadKind desired_class_load_kind) override; // Check if the desired_dispatch_info is supported. If it is, return it, // otherwise return a fall-back info that should be used instead. HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch( const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, - HInvokeStaticOrDirect* invoke) OVERRIDE; + ArtMethod* method) override; void GenerateStaticOrDirectCall( - HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE; + HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path = nullptr) override; void GenerateVirtualCall( - HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE; + HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) override; void MoveFromReturnRegister(Location trg ATTRIBUTE_UNUSED, - DataType::Type type ATTRIBUTE_UNUSED) OVERRIDE { + DataType::Type type ATTRIBUTE_UNUSED) override { UNIMPLEMENTED(FATAL) << "Not implemented on MIPS"; } - void GenerateNop() OVERRIDE; - void GenerateImplicitNullCheck(HNullCheck* instruction) OVERRIDE; - void GenerateExplicitNullCheck(HNullCheck* instruction) OVERRIDE; + void GenerateNop() override; + void GenerateImplicitNullCheck(HNullCheck* instruction) override; + void GenerateExplicitNullCheck(HNullCheck* instruction) override; // The PcRelativePatchInfo is used for PC-relative addressing of methods/strings/types, // whether through .data.bimg.rel.ro, .bss, or directly in the boot image. @@ -615,6 +618,10 @@ class CodeGeneratorMIPS : public CodeGenerator { DISALLOW_COPY_AND_ASSIGN(PcRelativePatchInfo); }; + PcRelativePatchInfo* NewBootImageIntrinsicPatch(uint32_t intrinsic_data, + const PcRelativePatchInfo* info_high = nullptr); + PcRelativePatchInfo* NewBootImageRelRoPatch(uint32_t boot_image_offset, + const PcRelativePatchInfo* info_high = nullptr); PcRelativePatchInfo* NewBootImageMethodPatch(MethodReference target_method, const PcRelativePatchInfo* info_high = nullptr); PcRelativePatchInfo* NewMethodBssEntryPatch(MethodReference target_method, @@ -637,6 +644,9 @@ class CodeGeneratorMIPS : public CodeGenerator { Register out, Register base); + void LoadBootImageAddress(Register reg, uint32_t boot_image_reference); + void AllocateInstanceForIntrinsic(HInvokeStaticOrDirect* invoke, uint32_t boot_image_offset); + // The JitPatchInfo is used for JIT string and class loads. struct JitPatchInfo { JitPatchInfo(const DexFile& dex_file, uint64_t idx) @@ -685,11 +695,11 @@ class CodeGeneratorMIPS : public CodeGenerator { InstructionCodeGeneratorMIPS instruction_visitor_; ParallelMoveResolverMIPS move_resolver_; MipsAssembler assembler_; - const MipsInstructionSetFeatures& isa_features_; // Deduplication map for 32-bit literals, used for non-patchable boot image addresses. Uint32ToLiteralMap uint32_literals_; - // PC-relative method patch info for kBootImageLinkTimePcRelative. + // PC-relative method patch info for kBootImageLinkTimePcRelative/kBootImageRelRo. + // Also used for type/string patches for kBootImageRelRo (same linker patch as for methods). ArenaDeque<PcRelativePatchInfo> boot_image_method_patches_; // PC-relative method patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> method_bss_entry_patches_; @@ -697,10 +707,12 @@ class CodeGeneratorMIPS : public CodeGenerator { ArenaDeque<PcRelativePatchInfo> boot_image_type_patches_; // PC-relative type patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_; - // PC-relative String patch info; type depends on configuration (intern table or boot image PIC). + // PC-relative String patch info for kBootImageLinkTimePcRelative. ArenaDeque<PcRelativePatchInfo> boot_image_string_patches_; // PC-relative String patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> string_bss_entry_patches_; + // PC-relative patch info for IntrinsicObjects. + ArenaDeque<PcRelativePatchInfo> boot_image_intrinsic_patches_; // Patches for string root accesses in JIT compiled code. ArenaDeque<JitPatchInfo> jit_string_patches_; diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc index 985ac2ca55..0d3cb3b8ca 100644 --- a/compiler/optimizing/code_generator_mips64.cc +++ b/compiler/optimizing/code_generator_mips64.cc @@ -24,6 +24,7 @@ #include "entrypoints/quick/quick_entrypoints.h" #include "entrypoints/quick/quick_entrypoints_enum.h" #include "gc/accounting/card_table.h" +#include "gc/space/image_space.h" #include "heap_poisoning.h" #include "intrinsics.h" #include "intrinsics_mips64.h" @@ -111,6 +112,14 @@ Location InvokeRuntimeCallingConvention::GetReturnLocation(DataType::Type type) return Mips64ReturnLocation(type); } +static RegisterSet OneRegInReferenceOutSaveEverythingCallerSaves() { + InvokeRuntimeCallingConvention calling_convention; + RegisterSet caller_saves = RegisterSet::Empty(); + caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + // The reference is returned in the same register. This differs from the standard return location. + return caller_saves; +} + // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. #define __ down_cast<CodeGeneratorMIPS64*>(codegen)->GetAssembler()-> // NOLINT #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kMips64PointerSize, x).Int32Value() @@ -119,7 +128,7 @@ class BoundsCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 { public: explicit BoundsCheckSlowPathMIPS64(HBoundsCheck* instruction) : SlowPathCodeMIPS64(instruction) {} - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { LocationSummary* locations = instruction_->GetLocations(); CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen); __ Bind(GetEntryLabel()); @@ -144,9 +153,9 @@ class BoundsCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 { CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>(); } - bool IsFatal() const OVERRIDE { return true; } + bool IsFatal() const override { return true; } - const char* GetDescription() const OVERRIDE { return "BoundsCheckSlowPathMIPS64"; } + const char* GetDescription() const override { return "BoundsCheckSlowPathMIPS64"; } private: DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathMIPS64); @@ -157,16 +166,16 @@ class DivZeroCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 { explicit DivZeroCheckSlowPathMIPS64(HDivZeroCheck* instruction) : SlowPathCodeMIPS64(instruction) {} - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen); __ Bind(GetEntryLabel()); mips64_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this); CheckEntrypointTypes<kQuickThrowDivZero, void, void>(); } - bool IsFatal() const OVERRIDE { return true; } + bool IsFatal() const override { return true; } - const char* GetDescription() const OVERRIDE { return "DivZeroCheckSlowPathMIPS64"; } + const char* GetDescription() const override { return "DivZeroCheckSlowPathMIPS64"; } private: DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathMIPS64); @@ -174,35 +183,41 @@ class DivZeroCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 { class LoadClassSlowPathMIPS64 : public SlowPathCodeMIPS64 { public: - LoadClassSlowPathMIPS64(HLoadClass* cls, - HInstruction* at, - uint32_t dex_pc, - bool do_clinit) - : SlowPathCodeMIPS64(at), - cls_(cls), - dex_pc_(dex_pc), - do_clinit_(do_clinit) { + LoadClassSlowPathMIPS64(HLoadClass* cls, HInstruction* at) + : SlowPathCodeMIPS64(at), cls_(cls) { DCHECK(at->IsLoadClass() || at->IsClinitCheck()); + DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_); } - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { LocationSummary* locations = instruction_->GetLocations(); Location out = locations->Out(); + const uint32_t dex_pc = instruction_->GetDexPc(); + bool must_resolve_type = instruction_->IsLoadClass() && cls_->MustResolveTypeOnSlowPath(); + bool must_do_clinit = instruction_->IsClinitCheck() || cls_->MustGenerateClinitCheck(); + CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen); - InvokeRuntimeCallingConvention calling_convention; - DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, locations); - dex::TypeIndex type_index = cls_->GetTypeIndex(); - __ LoadConst32(calling_convention.GetRegisterAt(0), type_index.index_); - QuickEntrypointEnum entrypoint = do_clinit_ ? kQuickInitializeStaticStorage - : kQuickInitializeType; - mips64_codegen->InvokeRuntime(entrypoint, instruction_, dex_pc_, this); - if (do_clinit_) { - CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>(); + InvokeRuntimeCallingConvention calling_convention; + if (must_resolve_type) { + DCHECK(IsSameDexFile(cls_->GetDexFile(), mips64_codegen->GetGraph()->GetDexFile())); + dex::TypeIndex type_index = cls_->GetTypeIndex(); + __ LoadConst32(calling_convention.GetRegisterAt(0), type_index.index_); + mips64_codegen->InvokeRuntime(kQuickResolveType, instruction_, dex_pc, this); + CheckEntrypointTypes<kQuickResolveType, void*, uint32_t>(); + // If we also must_do_clinit, the resolved type is now in the correct register. } else { - CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>(); + DCHECK(must_do_clinit); + Location source = instruction_->IsLoadClass() ? out : locations->InAt(0); + mips64_codegen->MoveLocation(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), + source, + cls_->GetType()); + } + if (must_do_clinit) { + mips64_codegen->InvokeRuntime(kQuickInitializeStaticStorage, instruction_, dex_pc, this); + CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, mirror::Class*>(); } // Move the class to the desired location. @@ -218,18 +233,12 @@ class LoadClassSlowPathMIPS64 : public SlowPathCodeMIPS64 { __ Bc(GetExitLabel()); } - const char* GetDescription() const OVERRIDE { return "LoadClassSlowPathMIPS64"; } + const char* GetDescription() const override { return "LoadClassSlowPathMIPS64"; } private: // The class this slow path will load. HLoadClass* const cls_; - // The dex PC of `at_`. - const uint32_t dex_pc_; - - // Whether to initialize the class. - const bool do_clinit_; - DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathMIPS64); }; @@ -238,7 +247,7 @@ class LoadStringSlowPathMIPS64 : public SlowPathCodeMIPS64 { explicit LoadStringSlowPathMIPS64(HLoadString* instruction) : SlowPathCodeMIPS64(instruction) {} - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { DCHECK(instruction_->IsLoadString()); DCHECK_EQ(instruction_->AsLoadString()->GetLoadKind(), HLoadString::LoadKind::kBssEntry); LocationSummary* locations = instruction_->GetLocations(); @@ -265,7 +274,7 @@ class LoadStringSlowPathMIPS64 : public SlowPathCodeMIPS64 { __ Bc(GetExitLabel()); } - const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathMIPS64"; } + const char* GetDescription() const override { return "LoadStringSlowPathMIPS64"; } private: DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathMIPS64); @@ -275,7 +284,7 @@ class NullCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 { public: explicit NullCheckSlowPathMIPS64(HNullCheck* instr) : SlowPathCodeMIPS64(instr) {} - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen); __ Bind(GetEntryLabel()); if (instruction_->CanThrowIntoCatchBlock()) { @@ -289,9 +298,9 @@ class NullCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 { CheckEntrypointTypes<kQuickThrowNullPointer, void, void>(); } - bool IsFatal() const OVERRIDE { return true; } + bool IsFatal() const override { return true; } - const char* GetDescription() const OVERRIDE { return "NullCheckSlowPathMIPS64"; } + const char* GetDescription() const override { return "NullCheckSlowPathMIPS64"; } private: DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathMIPS64); @@ -302,7 +311,7 @@ class SuspendCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 { SuspendCheckSlowPathMIPS64(HSuspendCheck* instruction, HBasicBlock* successor) : SlowPathCodeMIPS64(instruction), successor_(successor) {} - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { LocationSummary* locations = instruction_->GetLocations(); CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen); __ Bind(GetEntryLabel()); @@ -322,7 +331,7 @@ class SuspendCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 { return &return_label_; } - const char* GetDescription() const OVERRIDE { return "SuspendCheckSlowPathMIPS64"; } + const char* GetDescription() const override { return "SuspendCheckSlowPathMIPS64"; } HBasicBlock* GetSuccessor() const { return successor_; @@ -343,7 +352,7 @@ class TypeCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 { explicit TypeCheckSlowPathMIPS64(HInstruction* instruction, bool is_fatal) : SlowPathCodeMIPS64(instruction), is_fatal_(is_fatal) {} - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { LocationSummary* locations = instruction_->GetLocations(); uint32_t dex_pc = instruction_->GetDexPc(); @@ -383,9 +392,9 @@ class TypeCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 { } } - const char* GetDescription() const OVERRIDE { return "TypeCheckSlowPathMIPS64"; } + const char* GetDescription() const override { return "TypeCheckSlowPathMIPS64"; } - bool IsFatal() const OVERRIDE { return is_fatal_; } + bool IsFatal() const override { return is_fatal_; } private: const bool is_fatal_; @@ -398,7 +407,7 @@ class DeoptimizationSlowPathMIPS64 : public SlowPathCodeMIPS64 { explicit DeoptimizationSlowPathMIPS64(HDeoptimize* instruction) : SlowPathCodeMIPS64(instruction) {} - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen); __ Bind(GetEntryLabel()); LocationSummary* locations = instruction_->GetLocations(); @@ -410,7 +419,7 @@ class DeoptimizationSlowPathMIPS64 : public SlowPathCodeMIPS64 { CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>(); } - const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathMIPS64"; } + const char* GetDescription() const override { return "DeoptimizationSlowPathMIPS64"; } private: DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathMIPS64); @@ -420,7 +429,7 @@ class ArraySetSlowPathMIPS64 : public SlowPathCodeMIPS64 { public: explicit ArraySetSlowPathMIPS64(HInstruction* instruction) : SlowPathCodeMIPS64(instruction) {} - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { LocationSummary* locations = instruction_->GetLocations(); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, locations); @@ -451,7 +460,7 @@ class ArraySetSlowPathMIPS64 : public SlowPathCodeMIPS64 { __ Bc(GetExitLabel()); } - const char* GetDescription() const OVERRIDE { return "ArraySetSlowPathMIPS64"; } + const char* GetDescription() const override { return "ArraySetSlowPathMIPS64"; } private: DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathMIPS64); @@ -481,9 +490,9 @@ class ReadBarrierMarkSlowPathMIPS64 : public SlowPathCodeMIPS64 { DCHECK(kEmitCompilerReadBarrier); } - const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathMIPS"; } + const char* GetDescription() const override { return "ReadBarrierMarkSlowPathMIPS"; } - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { LocationSummary* locations = instruction_->GetLocations(); GpuRegister ref_reg = ref_.AsRegister<GpuRegister>(); DCHECK(locations->CanCall()); @@ -574,11 +583,11 @@ class ReadBarrierMarkAndUpdateFieldSlowPathMIPS64 : public SlowPathCodeMIPS64 { DCHECK(kEmitCompilerReadBarrier); } - const char* GetDescription() const OVERRIDE { + const char* GetDescription() const override { return "ReadBarrierMarkAndUpdateFieldSlowPathMIPS64"; } - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { LocationSummary* locations = instruction_->GetLocations(); GpuRegister ref_reg = ref_.AsRegister<GpuRegister>(); DCHECK(locations->CanCall()); @@ -735,7 +744,7 @@ class ReadBarrierForHeapReferenceSlowPathMIPS64 : public SlowPathCodeMIPS64 { DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref; } - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen); LocationSummary* locations = instruction_->GetLocations(); DataType::Type type = DataType::Type::kReference; @@ -855,7 +864,7 @@ class ReadBarrierForHeapReferenceSlowPathMIPS64 : public SlowPathCodeMIPS64 { __ Bc(GetExitLabel()); } - const char* GetDescription() const OVERRIDE { + const char* GetDescription() const override { return "ReadBarrierForHeapReferenceSlowPathMIPS64"; } @@ -900,7 +909,7 @@ class ReadBarrierForRootSlowPathMIPS64 : public SlowPathCodeMIPS64 { DCHECK(kEmitCompilerReadBarrier); } - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { LocationSummary* locations = instruction_->GetLocations(); DataType::Type type = DataType::Type::kReference; GpuRegister reg_out = out_.AsRegister<GpuRegister>(); @@ -929,7 +938,7 @@ class ReadBarrierForRootSlowPathMIPS64 : public SlowPathCodeMIPS64 { __ Bc(GetExitLabel()); } - const char* GetDescription() const OVERRIDE { return "ReadBarrierForRootSlowPathMIPS64"; } + const char* GetDescription() const override { return "ReadBarrierForRootSlowPathMIPS64"; } private: const Location out_; @@ -939,13 +948,12 @@ class ReadBarrierForRootSlowPathMIPS64 : public SlowPathCodeMIPS64 { }; CodeGeneratorMIPS64::CodeGeneratorMIPS64(HGraph* graph, - const Mips64InstructionSetFeatures& isa_features, const CompilerOptions& compiler_options, OptimizingCompilerStats* stats) : CodeGenerator(graph, kNumberOfGpuRegisters, kNumberOfFpuRegisters, - /* number_of_register_pairs */ 0, + /* number_of_register_pairs= */ 0, ComputeRegisterMask(reinterpret_cast<const int*>(kCoreCalleeSaves), arraysize(kCoreCalleeSaves)), ComputeRegisterMask(reinterpret_cast<const int*>(kFpuCalleeSaves), @@ -956,8 +964,8 @@ CodeGeneratorMIPS64::CodeGeneratorMIPS64(HGraph* graph, location_builder_(graph, this), instruction_visitor_(graph, this), move_resolver_(graph->GetAllocator(), this), - assembler_(graph->GetAllocator(), &isa_features), - isa_features_(isa_features), + assembler_(graph->GetAllocator(), + compiler_options.GetInstructionSetFeatures()->AsMips64InstructionSetFeatures()), uint32_literals_(std::less<uint32_t>(), graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), uint64_literals_(std::less<uint64_t>(), @@ -968,6 +976,7 @@ CodeGeneratorMIPS64::CodeGeneratorMIPS64(HGraph* graph, type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + boot_image_intrinsic_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), jit_string_patches_(StringReferenceValueComparator(), graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), jit_class_patches_(TypeReferenceValueComparator(), @@ -988,8 +997,7 @@ void CodeGeneratorMIPS64::Finalize(CodeAllocator* allocator) { // Adjust native pc offsets in stack maps. StackMapStream* stack_map_stream = GetStackMapStream(); for (size_t i = 0, num = stack_map_stream->GetNumberOfStackMaps(); i != num; ++i) { - uint32_t old_position = - stack_map_stream->GetStackMap(i).native_pc_code_offset.Uint32Value(InstructionSet::kMips64); + uint32_t old_position = stack_map_stream->GetStackMapNativePcOffset(i); uint32_t new_position = __ GetAdjustedPosition(old_position); DCHECK_GE(new_position, old_position); stack_map_stream->SetStackMapNativePcOffset(i, new_position); @@ -1482,12 +1490,27 @@ void CodeGeneratorMIPS64::MarkGCCard(GpuRegister object, if (value_can_be_null) { __ Beqzc(value, &done); } + // Load the address of the card table into `card`. __ LoadFromOffset(kLoadDoubleword, card, TR, Thread::CardTableOffset<kMips64PointerSize>().Int32Value()); + // Calculate the address of the card corresponding to `object`. __ Dsrl(temp, object, gc::accounting::CardTable::kCardShift); __ Daddu(temp, card, temp); + // Write the `art::gc::accounting::CardTable::kCardDirty` value into the + // `object`'s card. + // + // Register `card` contains the address of the card table. Note that the card + // table's base is biased during its creation so that it always starts at an + // address whose least-significant byte is equal to `kCardDirty` (see + // art::gc::accounting::CardTable::Create). Therefore the SB instruction + // below writes the `kCardDirty` (byte) value into the `object`'s card + // (located at `card + object >> kCardShift`). + // + // This dual use of the value in register `card` (1. to calculate the location + // of the card to mark; and 2. to load the `kCardDirty` value) saves a load + // (no need to explicitly load `kCardDirty` as an immediate value). __ Sb(card, temp, 0); if (value_can_be_null) { __ Bind(&done); @@ -1509,6 +1532,15 @@ inline void CodeGeneratorMIPS64::EmitPcRelativeLinkerPatches( } } +template <linker::LinkerPatch (*Factory)(size_t, uint32_t, uint32_t)> +linker::LinkerPatch NoDexFileAdapter(size_t literal_offset, + const DexFile* target_dex_file, + uint32_t pc_insn_offset, + uint32_t boot_image_offset) { + DCHECK(target_dex_file == nullptr); // Unused for these patches, should be null. + return Factory(literal_offset, pc_insn_offset, boot_image_offset); +} + void CodeGeneratorMIPS64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) { DCHECK(linker_patches->empty()); size_t size = @@ -1517,7 +1549,8 @@ void CodeGeneratorMIPS64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* li boot_image_type_patches_.size() + type_bss_entry_patches_.size() + boot_image_string_patches_.size() + - string_bss_entry_patches_.size(); + string_bss_entry_patches_.size() + + boot_image_intrinsic_patches_.size(); linker_patches->reserve(size); if (GetCompilerOptions().IsBootImage()) { EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>( @@ -1526,12 +1559,14 @@ void CodeGeneratorMIPS64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* li boot_image_type_patches_, linker_patches); EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>( boot_image_string_patches_, linker_patches); + EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::IntrinsicReferencePatch>>( + boot_image_intrinsic_patches_, linker_patches); } else { - DCHECK(boot_image_method_patches_.empty()); - EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeClassTablePatch>( - boot_image_type_patches_, linker_patches); - EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringInternTablePatch>( - boot_image_string_patches_, linker_patches); + EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::DataBimgRelRoPatch>>( + boot_image_method_patches_, linker_patches); + DCHECK(boot_image_type_patches_.empty()); + DCHECK(boot_image_string_patches_.empty()); + DCHECK(boot_image_intrinsic_patches_.empty()); } EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>( method_bss_entry_patches_, linker_patches); @@ -1542,6 +1577,20 @@ void CodeGeneratorMIPS64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* li DCHECK_EQ(size, linker_patches->size()); } +CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewBootImageIntrinsicPatch( + uint32_t intrinsic_data, + const PcRelativePatchInfo* info_high) { + return NewPcRelativePatch( + /* dex_file= */ nullptr, intrinsic_data, info_high, &boot_image_intrinsic_patches_); +} + +CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewBootImageRelRoPatch( + uint32_t boot_image_offset, + const PcRelativePatchInfo* info_high) { + return NewPcRelativePatch( + /* dex_file= */ nullptr, boot_image_offset, info_high, &boot_image_method_patches_); +} + CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewBootImageMethodPatch( MethodReference target_method, const PcRelativePatchInfo* info_high) { @@ -1616,7 +1665,7 @@ void CodeGeneratorMIPS64::EmitPcRelativeAddressPlaceholderHigh(PcRelativePatchIn DCHECK(!info_high->patch_info_high); __ Bind(&info_high->label); // Add the high half of a 32-bit offset to PC. - __ Auipc(out, /* placeholder */ 0x1234); + __ Auipc(out, /* imm16= */ 0x1234); // A following instruction will add the sign-extended low half of the 32-bit // offset to `out` (e.g. ld, jialc, daddiu). if (info_low != nullptr) { @@ -1625,13 +1674,57 @@ void CodeGeneratorMIPS64::EmitPcRelativeAddressPlaceholderHigh(PcRelativePatchIn } } +void CodeGeneratorMIPS64::LoadBootImageAddress(GpuRegister reg, uint32_t boot_image_reference) { + if (GetCompilerOptions().IsBootImage()) { + PcRelativePatchInfo* info_high = NewBootImageIntrinsicPatch(boot_image_reference); + PcRelativePatchInfo* info_low = NewBootImageIntrinsicPatch(boot_image_reference, info_high); + EmitPcRelativeAddressPlaceholderHigh(info_high, AT, info_low); + __ Daddiu(reg, AT, /* imm16= */ 0x5678); + } else if (GetCompilerOptions().GetCompilePic()) { + PcRelativePatchInfo* info_high = NewBootImageRelRoPatch(boot_image_reference); + PcRelativePatchInfo* info_low = NewBootImageRelRoPatch(boot_image_reference, info_high); + EmitPcRelativeAddressPlaceholderHigh(info_high, AT, info_low); + // Note: Boot image is in the low 4GiB and the entry is 32-bit, so emit a 32-bit load. + __ Lwu(reg, AT, /* imm16= */ 0x5678); + } else { + DCHECK(Runtime::Current()->UseJitCompilation()); + gc::Heap* heap = Runtime::Current()->GetHeap(); + DCHECK(!heap->GetBootImageSpaces().empty()); + uintptr_t address = + reinterpret_cast<uintptr_t>(heap->GetBootImageSpaces()[0]->Begin() + boot_image_reference); + __ LoadLiteral(reg, kLoadDoubleword, DeduplicateBootImageAddressLiteral(address)); + } +} + +void CodeGeneratorMIPS64::AllocateInstanceForIntrinsic(HInvokeStaticOrDirect* invoke, + uint32_t boot_image_offset) { + DCHECK(invoke->IsStatic()); + InvokeRuntimeCallingConvention calling_convention; + GpuRegister argument = calling_convention.GetRegisterAt(0); + if (GetCompilerOptions().IsBootImage()) { + DCHECK_EQ(boot_image_offset, IntrinsicVisitor::IntegerValueOfInfo::kInvalidReference); + // Load the class the same way as for HLoadClass::LoadKind::kBootImageLinkTimePcRelative. + MethodReference target_method = invoke->GetTargetMethod(); + dex::TypeIndex type_idx = target_method.dex_file->GetMethodId(target_method.index).class_idx_; + PcRelativePatchInfo* info_high = NewBootImageTypePatch(*target_method.dex_file, type_idx); + PcRelativePatchInfo* info_low = + NewBootImageTypePatch(*target_method.dex_file, type_idx, info_high); + EmitPcRelativeAddressPlaceholderHigh(info_high, AT, info_low); + __ Daddiu(argument, AT, /* imm16= */ 0x5678); + } else { + LoadBootImageAddress(argument, boot_image_offset); + } + InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc()); + CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); +} + Literal* CodeGeneratorMIPS64::DeduplicateJitStringLiteral(const DexFile& dex_file, dex::StringIndex string_index, Handle<mirror::String> handle) { ReserveJitStringRoot(StringReference(&dex_file, string_index), handle); return jit_string_patches_.GetOrCreate( StringReference(&dex_file, string_index), - [this]() { return __ NewLiteral<uint32_t>(/* placeholder */ 0u); }); + [this]() { return __ NewLiteral<uint32_t>(/* value= */ 0u); }); } Literal* CodeGeneratorMIPS64::DeduplicateJitClassLiteral(const DexFile& dex_file, @@ -1640,7 +1733,7 @@ Literal* CodeGeneratorMIPS64::DeduplicateJitClassLiteral(const DexFile& dex_file ReserveJitClassRoot(TypeReference(&dex_file, type_index), handle); return jit_class_patches_.GetOrCreate( TypeReference(&dex_file, type_index), - [this]() { return __ NewLiteral<uint32_t>(/* placeholder */ 0u); }); + [this]() { return __ NewLiteral<uint32_t>(/* value= */ 0u); }); } void CodeGeneratorMIPS64::PatchJitRootUse(uint8_t* code, @@ -1740,6 +1833,10 @@ void CodeGeneratorMIPS64::DumpFloatingPointRegister(std::ostream& stream, int re stream << FpuRegister(reg); } +const Mips64InstructionSetFeatures& CodeGeneratorMIPS64::GetInstructionSetFeatures() const { + return *GetCompilerOptions().GetInstructionSetFeatures()->AsMips64InstructionSetFeatures(); +} + void CodeGeneratorMIPS64::InvokeRuntime(QuickEntrypointEnum entrypoint, HInstruction* instruction, uint32_t dex_pc, @@ -1780,6 +1877,34 @@ void InstructionCodeGeneratorMIPS64::GenerateClassInitializationCheck(SlowPathCo __ Bind(slow_path->GetExitLabel()); } +void InstructionCodeGeneratorMIPS64::GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, + GpuRegister temp) { + uint32_t path_to_root = check->GetBitstringPathToRoot(); + uint32_t mask = check->GetBitstringMask(); + DCHECK(IsPowerOfTwo(mask + 1)); + size_t mask_bits = WhichPowerOf2(mask + 1); + + if (mask_bits == 16u) { + // Load only the bitstring part of the status word. + __ LoadFromOffset( + kLoadUnsignedHalfword, temp, temp, mirror::Class::StatusOffset().Int32Value()); + // Compare the bitstring bits using XOR. + __ Xori(temp, temp, dchecked_integral_cast<uint16_t>(path_to_root)); + } else { + // /* uint32_t */ temp = temp->status_ + __ LoadFromOffset(kLoadWord, temp, temp, mirror::Class::StatusOffset().Int32Value()); + // Compare the bitstring bits using XOR. + if (IsUint<16>(path_to_root)) { + __ Xori(temp, temp, dchecked_integral_cast<uint16_t>(path_to_root)); + } else { + __ LoadConst32(TMP, path_to_root); + __ Xor(temp, temp, TMP); + } + // Shift out bits that do not contribute to the comparison. + __ Sll(temp, temp, 32 - mask_bits); + } +} + void InstructionCodeGeneratorMIPS64::GenerateMemoryBarrier(MemBarrierKind kind ATTRIBUTE_UNUSED) { __ Sync(0); // only stype 0 is supported } @@ -2333,7 +2458,7 @@ void InstructionCodeGeneratorMIPS64::VisitArrayGet(HArrayGet* instruction) { obj, offset, temp, - /* needs_null_check */ false); + /* needs_null_check= */ false); } else { codegen_->GenerateArrayLoadWithBakerReadBarrier(instruction, out_loc, @@ -2341,7 +2466,7 @@ void InstructionCodeGeneratorMIPS64::VisitArrayGet(HArrayGet* instruction) { data_offset, index, temp, - /* needs_null_check */ false); + /* needs_null_check= */ false); } } else { GpuRegister out = out_loc.AsRegister<GpuRegister>(); @@ -2840,7 +2965,13 @@ void LocationsBuilderMIPS64::VisitCheckCast(HCheckCast* instruction) { LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind); locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); + if (type_check_kind == TypeCheckKind::kBitstringCheck) { + locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); + locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); + locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); + } else { + locations->SetInAt(1, Location::RequiresRegister()); + } locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind)); } @@ -2849,7 +2980,7 @@ void InstructionCodeGeneratorMIPS64::VisitCheckCast(HCheckCast* instruction) { LocationSummary* locations = instruction->GetLocations(); Location obj_loc = locations->InAt(0); GpuRegister obj = obj_loc.AsRegister<GpuRegister>(); - GpuRegister cls = locations->InAt(1).AsRegister<GpuRegister>(); + Location cls = locations->InAt(1); Location temp_loc = locations->GetTemp(0); GpuRegister temp = temp_loc.AsRegister<GpuRegister>(); const size_t num_temps = NumberOfCheckCastTemps(type_check_kind); @@ -2888,7 +3019,7 @@ void InstructionCodeGeneratorMIPS64::VisitCheckCast(HCheckCast* instruction) { kWithoutReadBarrier); // Jump to slow path for throwing the exception or doing a // more involved array check. - __ Bnec(temp, cls, slow_path->GetEntryLabel()); + __ Bnec(temp, cls.AsRegister<GpuRegister>(), slow_path->GetEntryLabel()); break; } @@ -2914,7 +3045,7 @@ void InstructionCodeGeneratorMIPS64::VisitCheckCast(HCheckCast* instruction) { // exception. __ Beqzc(temp, slow_path->GetEntryLabel()); // Otherwise, compare the classes. - __ Bnec(temp, cls, &loop); + __ Bnec(temp, cls.AsRegister<GpuRegister>(), &loop); break; } @@ -2929,7 +3060,7 @@ void InstructionCodeGeneratorMIPS64::VisitCheckCast(HCheckCast* instruction) { // Walk over the class hierarchy to find a match. Mips64Label loop; __ Bind(&loop); - __ Beqc(temp, cls, &done); + __ Beqc(temp, cls.AsRegister<GpuRegister>(), &done); // /* HeapReference<Class> */ temp = temp->super_class_ GenerateReferenceLoadOneRegister(instruction, temp_loc, @@ -2952,7 +3083,7 @@ void InstructionCodeGeneratorMIPS64::VisitCheckCast(HCheckCast* instruction) { maybe_temp2_loc, kWithoutReadBarrier); // Do an exact check. - __ Beqc(temp, cls, &done); + __ Beqc(temp, cls.AsRegister<GpuRegister>(), &done); // Otherwise, we need to check that the object's class is a non-primitive array. // /* HeapReference<Class> */ temp = temp->component_type_ GenerateReferenceLoadOneRegister(instruction, @@ -3011,7 +3142,21 @@ void InstructionCodeGeneratorMIPS64::VisitCheckCast(HCheckCast* instruction) { __ Daddiu(temp, temp, 2 * kHeapReferenceSize); __ Addiu(TMP, TMP, -2); // Compare the classes and continue the loop if they do not match. - __ Bnec(AT, cls, &loop); + __ Bnec(AT, cls.AsRegister<GpuRegister>(), &loop); + break; + } + + case TypeCheckKind::kBitstringCheck: { + // /* HeapReference<Class> */ temp = obj->klass_ + GenerateReferenceLoadTwoRegisters(instruction, + temp_loc, + obj_loc, + class_offset, + maybe_temp2_loc, + kWithoutReadBarrier); + + GenerateBitstringTypeCheckCompare(instruction, temp); + __ Bnezc(temp, slow_path->GetEntryLabel()); break; } } @@ -3027,15 +3172,14 @@ void LocationsBuilderMIPS64::VisitClinitCheck(HClinitCheck* check) { if (check->HasUses()) { locations->SetOut(Location::SameAsFirstInput()); } + // Rely on the type initialization to save everything we need. + locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves()); } void InstructionCodeGeneratorMIPS64::VisitClinitCheck(HClinitCheck* check) { // We assume the class is not null. - SlowPathCodeMIPS64* slow_path = new (codegen_->GetScopedAllocator()) LoadClassSlowPathMIPS64( - check->GetLoadClass(), - check, - check->GetDexPc(), - true); + SlowPathCodeMIPS64* slow_path = + new (codegen_->GetScopedAllocator()) LoadClassSlowPathMIPS64(check->GetLoadClass(), check); codegen_->AddSlowPath(slow_path); GenerateClassInitializationCheck(slow_path, check->GetLocations()->InAt(0).AsRegister<GpuRegister>()); @@ -3193,10 +3337,10 @@ void InstructionCodeGeneratorMIPS64::HandleCondition(HCondition* instruction) { switch (type) { default: // Integer case. - GenerateIntLongCompare(instruction->GetCondition(), /* is64bit */ false, locations); + GenerateIntLongCompare(instruction->GetCondition(), /* is64bit= */ false, locations); return; case DataType::Type::kInt64: - GenerateIntLongCompare(instruction->GetCondition(), /* is64bit */ true, locations); + GenerateIntLongCompare(instruction->GetCondition(), /* is64bit= */ true, locations); return; case DataType::Type::kFloat32: case DataType::Type::kFloat64: @@ -3498,7 +3642,7 @@ void InstructionCodeGeneratorMIPS64::VisitDivZeroCheck(HDivZeroCheck* instructio if (!DataType::IsIntegralType(type)) { LOG(FATAL) << "Unexpected type " << type << " for DivZeroCheck."; - return; + UNREACHABLE(); } if (value.IsConstant()) { @@ -4305,10 +4449,10 @@ void InstructionCodeGeneratorMIPS64::GenerateTestAndBranch(HInstruction* instruc switch (type) { default: - GenerateIntLongCompareAndBranch(if_cond, /* is64bit */ false, locations, branch_target); + GenerateIntLongCompareAndBranch(if_cond, /* is64bit= */ false, locations, branch_target); break; case DataType::Type::kInt64: - GenerateIntLongCompareAndBranch(if_cond, /* is64bit */ true, locations, branch_target); + GenerateIntLongCompareAndBranch(if_cond, /* is64bit= */ true, locations, branch_target); break; case DataType::Type::kFloat32: case DataType::Type::kFloat64: @@ -4338,7 +4482,7 @@ void InstructionCodeGeneratorMIPS64::VisitIf(HIf* if_instr) { nullptr : codegen_->GetLabelOf(true_successor); Mips64Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ? nullptr : codegen_->GetLabelOf(false_successor); - GenerateTestAndBranch(if_instr, /* condition_input_index */ 0, true_target, false_target); + GenerateTestAndBranch(if_instr, /* condition_input_index= */ 0, true_target, false_target); } void LocationsBuilderMIPS64::VisitDeoptimize(HDeoptimize* deoptimize) { @@ -4357,9 +4501,9 @@ void InstructionCodeGeneratorMIPS64::VisitDeoptimize(HDeoptimize* deoptimize) { SlowPathCodeMIPS64* slow_path = deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathMIPS64>(deoptimize); GenerateTestAndBranch(deoptimize, - /* condition_input_index */ 0, + /* condition_input_index= */ 0, slow_path->GetEntryLabel(), - /* false_target */ nullptr); + /* false_target= */ nullptr); } // This function returns true if a conditional move can be generated for HSelect. @@ -4373,7 +4517,7 @@ void InstructionCodeGeneratorMIPS64::VisitDeoptimize(HDeoptimize* deoptimize) { // of common logic. static bool CanMoveConditionally(HSelect* select, LocationSummary* locations_to_set) { bool materialized = IsBooleanValueOrMaterializedCondition(select->GetCondition()); - HInstruction* cond = select->InputAt(/* condition_input_index */ 2); + HInstruction* cond = select->InputAt(/* i= */ 2); HCondition* condition = cond->AsCondition(); DataType::Type cond_type = @@ -4516,7 +4660,7 @@ void InstructionCodeGeneratorMIPS64::GenConditionalMove(HSelect* select) { Location dst = locations->Out(); Location false_src = locations->InAt(0); Location true_src = locations->InAt(1); - HInstruction* cond = select->InputAt(/* condition_input_index */ 2); + HInstruction* cond = select->InputAt(/* i= */ 2); GpuRegister cond_reg = TMP; FpuRegister fcond_reg = FTMP; DataType::Type cond_type = DataType::Type::kInt32; @@ -4524,7 +4668,7 @@ void InstructionCodeGeneratorMIPS64::GenConditionalMove(HSelect* select) { DataType::Type dst_type = select->GetType(); if (IsBooleanValueOrMaterializedCondition(cond)) { - cond_reg = locations->InAt(/* condition_input_index */ 2).AsRegister<GpuRegister>(); + cond_reg = locations->InAt(/* at= */ 2).AsRegister<GpuRegister>(); } else { HCondition* condition = cond->AsCondition(); LocationSummary* cond_locations = cond->GetLocations(); @@ -4533,13 +4677,13 @@ void InstructionCodeGeneratorMIPS64::GenConditionalMove(HSelect* select) { switch (cond_type) { default: cond_inverted = MaterializeIntLongCompare(if_cond, - /* is64bit */ false, + /* is64bit= */ false, cond_locations, cond_reg); break; case DataType::Type::kInt64: cond_inverted = MaterializeIntLongCompare(if_cond, - /* is64bit */ true, + /* is64bit= */ true, cond_locations, cond_reg); break; @@ -4682,14 +4826,14 @@ void LocationsBuilderMIPS64::VisitSelect(HSelect* select) { } void InstructionCodeGeneratorMIPS64::VisitSelect(HSelect* select) { - if (CanMoveConditionally(select, /* locations_to_set */ nullptr)) { + if (CanMoveConditionally(select, /* locations_to_set= */ nullptr)) { GenConditionalMove(select); } else { LocationSummary* locations = select->GetLocations(); Mips64Label false_target; GenerateTestAndBranch(select, - /* condition_input_index */ 2, - /* true_target */ nullptr, + /* condition_input_index= */ 2, + /* true_target= */ nullptr, &false_target); codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType()); __ Bind(&false_target); @@ -4801,7 +4945,7 @@ void InstructionCodeGeneratorMIPS64::HandleFieldGet(HInstruction* instruction, obj, offset, temp_loc, - /* needs_null_check */ true); + /* needs_null_check= */ true); if (is_volatile) { GenerateMemoryBarrier(MemBarrierKind::kLoadAny); } @@ -4957,7 +5101,7 @@ void InstructionCodeGeneratorMIPS64::GenerateReferenceLoadOneRegister( out_reg, offset, maybe_temp, - /* needs_null_check */ false); + /* needs_null_check= */ false); } else { // Load with slow path based read barrier. // Save the value of `out` into `maybe_temp` before overwriting it @@ -4998,7 +5142,7 @@ void InstructionCodeGeneratorMIPS64::GenerateReferenceLoadTwoRegisters( obj_reg, offset, maybe_temp, - /* needs_null_check */ false); + /* needs_null_check= */ false); } else { // Load with slow path based read barrier. // /* HeapReference<Object> */ out = *(obj + offset) @@ -5086,7 +5230,7 @@ void InstructionCodeGeneratorMIPS64::GenerateGcRootFieldLoad(HInstruction* instr __ Daui(base, obj, offset_high); } Mips64Label skip_call; - __ Beqz(T9, &skip_call, /* is_bare */ true); + __ Beqz(T9, &skip_call, /* is_bare= */ true); if (label_low != nullptr) { DCHECK(short_offset); __ Bind(label_low); @@ -5216,7 +5360,7 @@ void CodeGeneratorMIPS64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* in GpuRegister ref_reg = ref.AsRegister<GpuRegister>(); Mips64Label skip_call; if (short_offset) { - __ Beqzc(T9, &skip_call, /* is_bare */ true); + __ Beqzc(T9, &skip_call, /* is_bare= */ true); __ Nop(); // In forbidden slot. __ Jialc(T9, thunk_disp); __ Bind(&skip_call); @@ -5225,7 +5369,7 @@ void CodeGeneratorMIPS64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* in } else { int16_t offset_low = Low16Bits(offset); int16_t offset_high = High16Bits(offset - offset_low); // Accounts for sign extension in lwu. - __ Beqz(T9, &skip_call, /* is_bare */ true); + __ Beqz(T9, &skip_call, /* is_bare= */ true); __ Daui(TMP, obj, offset_high); // In delay slot. __ Jialc(T9, thunk_disp); __ Bind(&skip_call); @@ -5298,12 +5442,12 @@ void CodeGeneratorMIPS64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* in // We will not do the explicit null check in the thunk as some form of a null check // must've been done earlier. DCHECK(!needs_null_check); - const int thunk_disp = GetBakerMarkFieldArrayThunkDisplacement(obj, /* short_offset */ false); + const int thunk_disp = GetBakerMarkFieldArrayThunkDisplacement(obj, /* short_offset= */ false); // Loading the entrypoint does not require a load acquire since it is only changed when // threads are suspended or running a checkpoint. __ LoadFromOffset(kLoadDoubleword, T9, TR, entry_point_offset); Mips64Label skip_call; - __ Beqz(T9, &skip_call, /* is_bare */ true); + __ Beqz(T9, &skip_call, /* is_bare= */ true); GpuRegister ref_reg = ref.AsRegister<GpuRegister>(); GpuRegister index_reg = index.AsRegister<GpuRegister>(); __ Dlsa(TMP, index_reg, obj, scale_factor); // In delay slot. @@ -5414,7 +5558,7 @@ void CodeGeneratorMIPS64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction ReadBarrierMarkAndUpdateFieldSlowPathMIPS64(instruction, ref, obj, - /* field_offset */ index, + /* field_offset= */ index, temp_reg); } else { slow_path = new (GetScopedAllocator()) ReadBarrierMarkSlowPathMIPS64(instruction, ref); @@ -5426,7 +5570,7 @@ void CodeGeneratorMIPS64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction // Given the numeric representation, it's enough to check the low bit of the // rb_state. We do that by shifting the bit into the sign bit (31) and // performing a branch on less than zero. - static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0"); + static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0"); static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); static_assert(LockWord::kReadBarrierStateSize == 1, "Expecting 1-bit read barrier state size"); __ Sll(temp_reg, temp_reg, 31 - LockWord::kReadBarrierStateShift); @@ -5515,6 +5659,8 @@ void LocationsBuilderMIPS64::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kInterfaceCheck: call_kind = LocationSummary::kCallOnSlowPath; break; + case TypeCheckKind::kBitstringCheck: + break; } LocationSummary* locations = @@ -5523,7 +5669,13 @@ void LocationsBuilderMIPS64::VisitInstanceOf(HInstanceOf* instruction) { locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. } locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); + if (type_check_kind == TypeCheckKind::kBitstringCheck) { + locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); + locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); + locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); + } else { + locations->SetInAt(1, Location::RequiresRegister()); + } // The output does overlap inputs. // Note that TypeCheckSlowPathMIPS64 uses this register too. locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); @@ -5535,7 +5687,7 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) { LocationSummary* locations = instruction->GetLocations(); Location obj_loc = locations->InAt(0); GpuRegister obj = obj_loc.AsRegister<GpuRegister>(); - GpuRegister cls = locations->InAt(1).AsRegister<GpuRegister>(); + Location cls = locations->InAt(1); Location out_loc = locations->Out(); GpuRegister out = out_loc.AsRegister<GpuRegister>(); const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind); @@ -5567,7 +5719,7 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) { maybe_temp_loc, read_barrier_option); // Classes must be equal for the instanceof to succeed. - __ Xor(out, out, cls); + __ Xor(out, out, cls.AsRegister<GpuRegister>()); __ Sltiu(out, out, 1); break; } @@ -5594,7 +5746,7 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) { read_barrier_option); // If `out` is null, we use it for the result, and jump to `done`. __ Beqzc(out, &done); - __ Bnec(out, cls, &loop); + __ Bnec(out, cls.AsRegister<GpuRegister>(), &loop); __ LoadConst32(out, 1); break; } @@ -5612,7 +5764,7 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) { // Walk over the class hierarchy to find a match. Mips64Label loop, success; __ Bind(&loop); - __ Beqc(out, cls, &success); + __ Beqc(out, cls.AsRegister<GpuRegister>(), &success); // /* HeapReference<Class> */ out = out->super_class_ GenerateReferenceLoadOneRegister(instruction, out_loc, @@ -5639,7 +5791,7 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) { read_barrier_option); // Do an exact check. Mips64Label success; - __ Beqc(out, cls, &success); + __ Beqc(out, cls.AsRegister<GpuRegister>(), &success); // Otherwise, we need to check that the object's class is a non-primitive array. // /* HeapReference<Class> */ out = out->component_type_ GenerateReferenceLoadOneRegister(instruction, @@ -5669,9 +5821,9 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) { kWithoutReadBarrier); DCHECK(locations->OnlyCallsOnSlowPath()); slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathMIPS64( - instruction, /* is_fatal */ false); + instruction, /* is_fatal= */ false); codegen_->AddSlowPath(slow_path); - __ Bnec(out, cls, slow_path->GetEntryLabel()); + __ Bnec(out, cls.AsRegister<GpuRegister>(), slow_path->GetEntryLabel()); __ LoadConst32(out, 1); break; } @@ -5698,11 +5850,25 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) { // This should also be beneficial for the other cases above. DCHECK(locations->OnlyCallsOnSlowPath()); slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathMIPS64( - instruction, /* is_fatal */ false); + instruction, /* is_fatal= */ false); codegen_->AddSlowPath(slow_path); __ Bc(slow_path->GetEntryLabel()); break; } + + case TypeCheckKind::kBitstringCheck: { + // /* HeapReference<Class> */ temp = obj->klass_ + GenerateReferenceLoadTwoRegisters(instruction, + out_loc, + obj_loc, + class_offset, + maybe_temp_loc, + kWithoutReadBarrier); + + GenerateBitstringTypeCheckCompare(instruction, out); + __ Sltiu(out, out, 1); + break; + } } __ Bind(&done); @@ -5825,6 +5991,14 @@ void InstructionCodeGeneratorMIPS64::VisitInvokePolymorphic(HInvokePolymorphic* codegen_->GenerateInvokePolymorphicCall(invoke); } +void LocationsBuilderMIPS64::VisitInvokeCustom(HInvokeCustom* invoke) { + HandleInvoke(invoke); +} + +void InstructionCodeGeneratorMIPS64::VisitInvokeCustom(HInvokeCustom* invoke) { + codegen_->GenerateInvokeCustomCall(invoke); +} + static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorMIPS64* codegen) { if (invoke->GetLocations()->Intrinsified()) { IntrinsicCodeGeneratorMIPS64 intrinsic(codegen); @@ -5839,14 +6013,14 @@ HLoadString::LoadKind CodeGeneratorMIPS64::GetSupportedLoadStringKind( bool fallback_load = false; switch (desired_string_load_kind) { case HLoadString::LoadKind::kBootImageLinkTimePcRelative: - case HLoadString::LoadKind::kBootImageInternTable: + case HLoadString::LoadKind::kBootImageRelRo: case HLoadString::LoadKind::kBssEntry: DCHECK(!Runtime::Current()->UseJitCompilation()); break; + case HLoadString::LoadKind::kJitBootImageAddress: case HLoadString::LoadKind::kJitTableAddress: DCHECK(Runtime::Current()->UseJitCompilation()); break; - case HLoadString::LoadKind::kBootImageAddress: case HLoadString::LoadKind::kRuntimeCall: break; } @@ -5866,14 +6040,14 @@ HLoadClass::LoadKind CodeGeneratorMIPS64::GetSupportedLoadClassKind( case HLoadClass::LoadKind::kReferrersClass: break; case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: - case HLoadClass::LoadKind::kBootImageClassTable: + case HLoadClass::LoadKind::kBootImageRelRo: case HLoadClass::LoadKind::kBssEntry: DCHECK(!Runtime::Current()->UseJitCompilation()); break; + case HLoadClass::LoadKind::kJitBootImageAddress: case HLoadClass::LoadKind::kJitTableAddress: DCHECK(Runtime::Current()->UseJitCompilation()); break; - case HLoadClass::LoadKind::kBootImageAddress: case HLoadClass::LoadKind::kRuntimeCall: break; } @@ -5885,7 +6059,7 @@ HLoadClass::LoadKind CodeGeneratorMIPS64::GetSupportedLoadClassKind( HInvokeStaticOrDirect::DispatchInfo CodeGeneratorMIPS64::GetSupportedInvokeStaticOrDirectDispatch( const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, - HInvokeStaticOrDirect* invoke ATTRIBUTE_UNUSED) { + ArtMethod* method ATTRIBUTE_UNUSED) { // On MIPS64 we support all dispatch types. return desired_dispatch_info; } @@ -5918,23 +6092,32 @@ void CodeGeneratorMIPS64::GenerateStaticOrDirectCall( CodeGeneratorMIPS64::PcRelativePatchInfo* info_low = NewBootImageMethodPatch(invoke->GetTargetMethod(), info_high); EmitPcRelativeAddressPlaceholderHigh(info_high, AT, info_low); - __ Daddiu(temp.AsRegister<GpuRegister>(), AT, /* placeholder */ 0x5678); + __ Daddiu(temp.AsRegister<GpuRegister>(), AT, /* imm16= */ 0x5678); break; } - case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: - __ LoadLiteral(temp.AsRegister<GpuRegister>(), - kLoadDoubleword, - DeduplicateUint64Literal(invoke->GetMethodAddress())); + case HInvokeStaticOrDirect::MethodLoadKind::kBootImageRelRo: { + uint32_t boot_image_offset = GetBootImageOffset(invoke); + PcRelativePatchInfo* info_high = NewBootImageRelRoPatch(boot_image_offset); + PcRelativePatchInfo* info_low = NewBootImageRelRoPatch(boot_image_offset, info_high); + EmitPcRelativeAddressPlaceholderHigh(info_high, AT, info_low); + // Note: Boot image is in the low 4GiB and the entry is 32-bit, so emit a 32-bit load. + __ Lwu(temp.AsRegister<GpuRegister>(), AT, /* imm16= */ 0x5678); break; + } case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: { PcRelativePatchInfo* info_high = NewMethodBssEntryPatch( MethodReference(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex())); PcRelativePatchInfo* info_low = NewMethodBssEntryPatch( MethodReference(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex()), info_high); EmitPcRelativeAddressPlaceholderHigh(info_high, AT, info_low); - __ Ld(temp.AsRegister<GpuRegister>(), AT, /* placeholder */ 0x5678); + __ Ld(temp.AsRegister<GpuRegister>(), AT, /* imm16= */ 0x5678); break; } + case HInvokeStaticOrDirect::MethodLoadKind::kJitDirectAddress: + __ LoadLiteral(temp.AsRegister<GpuRegister>(), + kLoadDoubleword, + DeduplicateUint64Literal(invoke->GetMethodAddress())); + break; case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall: { GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path); return; // No code pointer retrieval; the runtime performs the call directly. @@ -6048,10 +6231,7 @@ void LocationsBuilderMIPS64::VisitLoadClass(HLoadClass* cls) { if (load_kind == HLoadClass::LoadKind::kBssEntry) { if (!kUseReadBarrier || kUseBakerReadBarrier) { // Rely on the type resolution or initialization and marking to save everything we need. - RegisterSet caller_saves = RegisterSet::Empty(); - InvokeRuntimeCallingConvention calling_convention; - caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->SetCustomSlowPathCallerSaves(caller_saves); + locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves()); } else { // For non-Baker read barriers we have a temp-clobbering call. } @@ -6100,33 +6280,18 @@ void InstructionCodeGeneratorMIPS64::VisitLoadClass(HLoadClass* cls) NO_THREAD_S CodeGeneratorMIPS64::PcRelativePatchInfo* info_low = codegen_->NewBootImageTypePatch(cls->GetDexFile(), cls->GetTypeIndex(), info_high); codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high, AT, info_low); - __ Daddiu(out, AT, /* placeholder */ 0x5678); - break; - } - case HLoadClass::LoadKind::kBootImageAddress: { - DCHECK_EQ(read_barrier_option, kWithoutReadBarrier); - uint32_t address = dchecked_integral_cast<uint32_t>( - reinterpret_cast<uintptr_t>(cls->GetClass().Get())); - DCHECK_NE(address, 0u); - __ LoadLiteral(out, - kLoadUnsignedWord, - codegen_->DeduplicateBootImageAddressLiteral(address)); + __ Daddiu(out, AT, /* imm16= */ 0x5678); break; } - case HLoadClass::LoadKind::kBootImageClassTable: { + case HLoadClass::LoadKind::kBootImageRelRo: { DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); + uint32_t boot_image_offset = codegen_->GetBootImageOffset(cls); CodeGeneratorMIPS64::PcRelativePatchInfo* info_high = - codegen_->NewBootImageTypePatch(cls->GetDexFile(), cls->GetTypeIndex()); + codegen_->NewBootImageRelRoPatch(boot_image_offset); CodeGeneratorMIPS64::PcRelativePatchInfo* info_low = - codegen_->NewBootImageTypePatch(cls->GetDexFile(), cls->GetTypeIndex(), info_high); + codegen_->NewBootImageRelRoPatch(boot_image_offset, info_high); codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high, AT, info_low); - __ Lwu(out, AT, /* placeholder */ 0x5678); - // Extract the reference from the slot data, i.e. clear the hash bits. - int32_t masked_hash = ClassTable::TableSlot::MaskHash( - ComputeModifiedUtf8Hash(cls->GetDexFile().StringByTypeIdx(cls->GetTypeIndex()))); - if (masked_hash != 0) { - __ Daddiu(out, out, -masked_hash); - } + __ Lwu(out, AT, /* imm16= */ 0x5678); break; } case HLoadClass::LoadKind::kBssEntry: { @@ -6138,12 +6303,21 @@ void InstructionCodeGeneratorMIPS64::VisitLoadClass(HLoadClass* cls) NO_THREAD_S GenerateGcRootFieldLoad(cls, out_loc, out, - /* placeholder */ 0x5678, + /* offset= */ 0x5678, read_barrier_option, &info_low->label); generate_null_check = true; break; } + case HLoadClass::LoadKind::kJitBootImageAddress: { + DCHECK_EQ(read_barrier_option, kWithoutReadBarrier); + uint32_t address = reinterpret_cast32<uint32_t>(cls->GetClass().Get()); + DCHECK_NE(address, 0u); + __ LoadLiteral(out, + kLoadUnsignedWord, + codegen_->DeduplicateBootImageAddressLiteral(address)); + break; + } case HLoadClass::LoadKind::kJitTableAddress: __ LoadLiteral(out, kLoadUnsignedWord, @@ -6160,8 +6334,8 @@ void InstructionCodeGeneratorMIPS64::VisitLoadClass(HLoadClass* cls) NO_THREAD_S if (generate_null_check || cls->MustGenerateClinitCheck()) { DCHECK(cls->CanCallRuntime()); - SlowPathCodeMIPS64* slow_path = new (codegen_->GetScopedAllocator()) LoadClassSlowPathMIPS64( - cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck()); + SlowPathCodeMIPS64* slow_path = + new (codegen_->GetScopedAllocator()) LoadClassSlowPathMIPS64(cls, cls); codegen_->AddSlowPath(slow_path); if (generate_null_check) { __ Beqzc(out, slow_path->GetEntryLabel()); @@ -6174,6 +6348,26 @@ void InstructionCodeGeneratorMIPS64::VisitLoadClass(HLoadClass* cls) NO_THREAD_S } } +void LocationsBuilderMIPS64::VisitLoadMethodHandle(HLoadMethodHandle* load) { + InvokeRuntimeCallingConvention calling_convention; + Location loc = Location::RegisterLocation(calling_convention.GetRegisterAt(0)); + CodeGenerator::CreateLoadMethodHandleRuntimeCallLocationSummary(load, loc, loc); +} + +void InstructionCodeGeneratorMIPS64::VisitLoadMethodHandle(HLoadMethodHandle* load) { + codegen_->GenerateLoadMethodHandleRuntimeCall(load); +} + +void LocationsBuilderMIPS64::VisitLoadMethodType(HLoadMethodType* load) { + InvokeRuntimeCallingConvention calling_convention; + Location loc = Location::RegisterLocation(calling_convention.GetRegisterAt(0)); + CodeGenerator::CreateLoadMethodTypeRuntimeCallLocationSummary(load, loc, loc); +} + +void InstructionCodeGeneratorMIPS64::VisitLoadMethodType(HLoadMethodType* load) { + codegen_->GenerateLoadMethodTypeRuntimeCall(load); +} + static int32_t GetExceptionTlsOffset() { return Thread::ExceptionOffset<kMips64PointerSize>().Int32Value(); } @@ -6209,10 +6403,7 @@ void LocationsBuilderMIPS64::VisitLoadString(HLoadString* load) { if (load_kind == HLoadString::LoadKind::kBssEntry) { if (!kUseReadBarrier || kUseBakerReadBarrier) { // Rely on the pResolveString and marking to save everything we need. - RegisterSet caller_saves = RegisterSet::Empty(); - InvokeRuntimeCallingConvention calling_convention; - caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->SetCustomSlowPathCallerSaves(caller_saves); + locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves()); } else { // For non-Baker read barriers we have a temp-clobbering call. } @@ -6236,30 +6427,21 @@ void InstructionCodeGeneratorMIPS64::VisitLoadString(HLoadString* load) NO_THREA CodeGeneratorMIPS64::PcRelativePatchInfo* info_low = codegen_->NewBootImageStringPatch(load->GetDexFile(), load->GetStringIndex(), info_high); codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high, AT, info_low); - __ Daddiu(out, AT, /* placeholder */ 0x5678); - return; - } - case HLoadString::LoadKind::kBootImageAddress: { - uint32_t address = dchecked_integral_cast<uint32_t>( - reinterpret_cast<uintptr_t>(load->GetString().Get())); - DCHECK_NE(address, 0u); - __ LoadLiteral(out, - kLoadUnsignedWord, - codegen_->DeduplicateBootImageAddressLiteral(address)); + __ Daddiu(out, AT, /* imm16= */ 0x5678); return; } - case HLoadString::LoadKind::kBootImageInternTable: { + case HLoadString::LoadKind::kBootImageRelRo: { DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); + uint32_t boot_image_offset = codegen_->GetBootImageOffset(load); CodeGeneratorMIPS64::PcRelativePatchInfo* info_high = - codegen_->NewBootImageStringPatch(load->GetDexFile(), load->GetStringIndex()); + codegen_->NewBootImageRelRoPatch(boot_image_offset); CodeGeneratorMIPS64::PcRelativePatchInfo* info_low = - codegen_->NewBootImageStringPatch(load->GetDexFile(), load->GetStringIndex(), info_high); + codegen_->NewBootImageRelRoPatch(boot_image_offset, info_high); codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high, AT, info_low); - __ Lwu(out, AT, /* placeholder */ 0x5678); + __ Lwu(out, AT, /* imm16= */ 0x5678); return; } case HLoadString::LoadKind::kBssEntry: { - DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); CodeGeneratorMIPS64::PcRelativePatchInfo* info_high = codegen_->NewStringBssEntryPatch(load->GetDexFile(), load->GetStringIndex()); CodeGeneratorMIPS64::PcRelativePatchInfo* info_low = @@ -6268,7 +6450,7 @@ void InstructionCodeGeneratorMIPS64::VisitLoadString(HLoadString* load) NO_THREA GenerateGcRootFieldLoad(load, out_loc, out, - /* placeholder */ 0x5678, + /* offset= */ 0x5678, kCompilerReadBarrierOption, &info_low->label); SlowPathCodeMIPS64* slow_path = @@ -6278,6 +6460,14 @@ void InstructionCodeGeneratorMIPS64::VisitLoadString(HLoadString* load) NO_THREA __ Bind(slow_path->GetExitLabel()); return; } + case HLoadString::LoadKind::kJitBootImageAddress: { + uint32_t address = reinterpret_cast32<uint32_t>(load->GetString().Get()); + DCHECK_NE(address, 0u); + __ LoadLiteral(out, + kLoadUnsignedWord, + codegen_->DeduplicateBootImageAddressLiteral(address)); + return; + } case HLoadString::LoadKind::kJitTableAddress: __ LoadLiteral(out, kLoadUnsignedWord, @@ -6442,10 +6632,8 @@ void LocationsBuilderMIPS64::VisitNewArray(HNewArray* instruction) { } void InstructionCodeGeneratorMIPS64::VisitNewArray(HNewArray* instruction) { - // Note: if heap poisoning is enabled, the entry point takes care - // of poisoning the reference. - QuickEntrypointEnum entrypoint = - CodeGenerator::GetArrayAllocationEntrypoint(instruction->GetLoadClass()->GetClass()); + // Note: if heap poisoning is enabled, the entry point takes care of poisoning the reference. + QuickEntrypointEnum entrypoint = CodeGenerator::GetArrayAllocationEntrypoint(instruction); codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc()); CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>(); DCHECK(!codegen_->IsLeafMethod()); @@ -6455,31 +6643,13 @@ void LocationsBuilderMIPS64::VisitNewInstance(HNewInstance* instruction) { LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( instruction, LocationSummary::kCallOnMainOnly); InvokeRuntimeCallingConvention calling_convention; - if (instruction->IsStringAlloc()) { - locations->AddTemp(Location::RegisterLocation(kMethodRegisterArgument)); - } else { - locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - } + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kReference)); } void InstructionCodeGeneratorMIPS64::VisitNewInstance(HNewInstance* instruction) { - // Note: if heap poisoning is enabled, the entry point takes care - // of poisoning the reference. - if (instruction->IsStringAlloc()) { - // String is allocated through StringFactory. Call NewEmptyString entry point. - GpuRegister temp = instruction->GetLocations()->GetTemp(0).AsRegister<GpuRegister>(); - MemberOffset code_offset = - ArtMethod::EntryPointFromQuickCompiledCodeOffset(kMips64PointerSize); - __ LoadFromOffset(kLoadDoubleword, temp, TR, QUICK_ENTRY_POINT(pNewEmptyString)); - __ LoadFromOffset(kLoadDoubleword, T9, temp, code_offset.Int32Value()); - __ Jalr(T9); - __ Nop(); - codegen_->RecordPcInfo(instruction, instruction->GetDexPc()); - } else { - codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc()); - CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); - } + codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc()); + CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); } void LocationsBuilderMIPS64::VisitNot(HNot* instruction) { @@ -6665,6 +6835,236 @@ void InstructionCodeGeneratorMIPS64::VisitRem(HRem* instruction) { } } +static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) { + LocationSummary* locations = new (allocator) LocationSummary(minmax); + switch (minmax->GetResultType()) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); + break; + default: + LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType(); + } +} + +void InstructionCodeGeneratorMIPS64::GenerateMinMaxInt(LocationSummary* locations, bool is_min) { + GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>(); + GpuRegister rhs = locations->InAt(1).AsRegister<GpuRegister>(); + GpuRegister out = locations->Out().AsRegister<GpuRegister>(); + + if (lhs == rhs) { + if (out != lhs) { + __ Move(out, lhs); + } + } else { + // Some architectures, such as ARM and MIPS (prior to r6), have a + // conditional move instruction which only changes the target + // (output) register if the condition is true (MIPS prior to r6 had + // MOVF, MOVT, and MOVZ). The SELEQZ and SELNEZ instructions always + // change the target (output) register. If the condition is true the + // output register gets the contents of the "rs" register; otherwise, + // the output register is set to zero. One consequence of this is + // that to implement something like "rd = c==0 ? rs : rt" MIPS64r6 + // needs to use a pair of SELEQZ/SELNEZ instructions. After + // executing this pair of instructions one of the output registers + // from the pair will necessarily contain zero. Then the code ORs the + // output registers from the SELEQZ/SELNEZ instructions to get the + // final result. + // + // The initial test to see if the output register is same as the + // first input register is needed to make sure that value in the + // first input register isn't clobbered before we've finished + // computing the output value. The logic in the corresponding else + // clause performs the same task but makes sure the second input + // register isn't clobbered in the event that it's the same register + // as the output register; the else clause also handles the case + // where the output register is distinct from both the first, and the + // second input registers. + if (out == lhs) { + __ Slt(AT, rhs, lhs); + if (is_min) { + __ Seleqz(out, lhs, AT); + __ Selnez(AT, rhs, AT); + } else { + __ Selnez(out, lhs, AT); + __ Seleqz(AT, rhs, AT); + } + } else { + __ Slt(AT, lhs, rhs); + if (is_min) { + __ Seleqz(out, rhs, AT); + __ Selnez(AT, lhs, AT); + } else { + __ Selnez(out, rhs, AT); + __ Seleqz(AT, lhs, AT); + } + } + __ Or(out, out, AT); + } +} + +void InstructionCodeGeneratorMIPS64::GenerateMinMaxFP(LocationSummary* locations, + bool is_min, + DataType::Type type) { + FpuRegister a = locations->InAt(0).AsFpuRegister<FpuRegister>(); + FpuRegister b = locations->InAt(1).AsFpuRegister<FpuRegister>(); + FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>(); + + Mips64Label noNaNs; + Mips64Label done; + FpuRegister ftmp = ((out != a) && (out != b)) ? out : FTMP; + + // When Java computes min/max it prefers a NaN to a number; the + // behavior of MIPSR6 is to prefer numbers to NaNs, i.e., if one of + // the inputs is a NaN and the other is a valid number, the MIPS + // instruction will return the number; Java wants the NaN value + // returned. This is why there is extra logic preceding the use of + // the MIPS min.fmt/max.fmt instructions. If either a, or b holds a + // NaN, return the NaN, otherwise return the min/max. + if (type == DataType::Type::kFloat64) { + __ CmpUnD(FTMP, a, b); + __ Bc1eqz(FTMP, &noNaNs); + + // One of the inputs is a NaN + __ CmpEqD(ftmp, a, a); + // If a == a then b is the NaN, otherwise a is the NaN. + __ SelD(ftmp, a, b); + + if (ftmp != out) { + __ MovD(out, ftmp); + } + + __ Bc(&done); + + __ Bind(&noNaNs); + + if (is_min) { + __ MinD(out, a, b); + } else { + __ MaxD(out, a, b); + } + } else { + DCHECK_EQ(type, DataType::Type::kFloat32); + __ CmpUnS(FTMP, a, b); + __ Bc1eqz(FTMP, &noNaNs); + + // One of the inputs is a NaN + __ CmpEqS(ftmp, a, a); + // If a == a then b is the NaN, otherwise a is the NaN. + __ SelS(ftmp, a, b); + + if (ftmp != out) { + __ MovS(out, ftmp); + } + + __ Bc(&done); + + __ Bind(&noNaNs); + + if (is_min) { + __ MinS(out, a, b); + } else { + __ MaxS(out, a, b); + } + } + + __ Bind(&done); +} + +void InstructionCodeGeneratorMIPS64::GenerateMinMax(HBinaryOperation* minmax, bool is_min) { + DataType::Type type = minmax->GetResultType(); + switch (type) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: + GenerateMinMaxInt(minmax->GetLocations(), is_min); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + GenerateMinMaxFP(minmax->GetLocations(), is_min, type); + break; + default: + LOG(FATAL) << "Unexpected type for HMinMax " << type; + } +} + +void LocationsBuilderMIPS64::VisitMin(HMin* min) { + CreateMinMaxLocations(GetGraph()->GetAllocator(), min); +} + +void InstructionCodeGeneratorMIPS64::VisitMin(HMin* min) { + GenerateMinMax(min, /*is_min*/ true); +} + +void LocationsBuilderMIPS64::VisitMax(HMax* max) { + CreateMinMaxLocations(GetGraph()->GetAllocator(), max); +} + +void InstructionCodeGeneratorMIPS64::VisitMax(HMax* max) { + GenerateMinMax(max, /*is_min*/ false); +} + +void LocationsBuilderMIPS64::VisitAbs(HAbs* abs) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs); + switch (abs->GetResultType()) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); + break; + default: + LOG(FATAL) << "Unexpected abs type " << abs->GetResultType(); + } +} + +void InstructionCodeGeneratorMIPS64::VisitAbs(HAbs* abs) { + LocationSummary* locations = abs->GetLocations(); + switch (abs->GetResultType()) { + case DataType::Type::kInt32: { + GpuRegister in = locations->InAt(0).AsRegister<GpuRegister>(); + GpuRegister out = locations->Out().AsRegister<GpuRegister>(); + __ Sra(AT, in, 31); + __ Xor(out, in, AT); + __ Subu(out, out, AT); + break; + } + case DataType::Type::kInt64: { + GpuRegister in = locations->InAt(0).AsRegister<GpuRegister>(); + GpuRegister out = locations->Out().AsRegister<GpuRegister>(); + __ Dsra32(AT, in, 31); + __ Xor(out, in, AT); + __ Dsubu(out, out, AT); + break; + } + case DataType::Type::kFloat32: { + FpuRegister in = locations->InAt(0).AsFpuRegister<FpuRegister>(); + FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>(); + __ AbsS(out, in); + break; + } + case DataType::Type::kFloat64: { + FpuRegister in = locations->InAt(0).AsFpuRegister<FpuRegister>(); + FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>(); + __ AbsD(out, in); + break; + } + default: + LOG(FATAL) << "Unexpected abs type " << abs->GetResultType(); + } +} + void LocationsBuilderMIPS64::VisitConstructorFence(HConstructorFence* constructor_fence) { constructor_fence->SetLocations(nullptr); } diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h index e6b69c469f..52f3a62f33 100644 --- a/compiler/optimizing/code_generator_mips64.h +++ b/compiler/optimizing/code_generator_mips64.h @@ -79,9 +79,9 @@ class InvokeDexCallingConventionVisitorMIPS64 : public InvokeDexCallingConventio InvokeDexCallingConventionVisitorMIPS64() {} virtual ~InvokeDexCallingConventionVisitorMIPS64() {} - Location GetNextLocation(DataType::Type type) OVERRIDE; - Location GetReturnLocation(DataType::Type type) const OVERRIDE; - Location GetMethodLocation() const OVERRIDE; + Location GetNextLocation(DataType::Type type) override; + Location GetReturnLocation(DataType::Type type) const override; + Location GetMethodLocation() const override; private: InvokeDexCallingConvention calling_convention; @@ -108,22 +108,22 @@ class FieldAccessCallingConventionMIPS64 : public FieldAccessCallingConvention { public: FieldAccessCallingConventionMIPS64() {} - Location GetObjectLocation() const OVERRIDE { + Location GetObjectLocation() const override { return Location::RegisterLocation(A1); } - Location GetFieldIndexLocation() const OVERRIDE { + Location GetFieldIndexLocation() const override { return Location::RegisterLocation(A0); } - Location GetReturnLocation(DataType::Type type ATTRIBUTE_UNUSED) const OVERRIDE { + Location GetReturnLocation(DataType::Type type ATTRIBUTE_UNUSED) const override { return Location::RegisterLocation(V0); } Location GetSetValueLocation(DataType::Type type ATTRIBUTE_UNUSED, - bool is_instance) const OVERRIDE { + bool is_instance) const override { return is_instance ? Location::RegisterLocation(A2) : Location::RegisterLocation(A1); } - Location GetFpuLocation(DataType::Type type ATTRIBUTE_UNUSED) const OVERRIDE { + Location GetFpuLocation(DataType::Type type ATTRIBUTE_UNUSED) const override { return Location::FpuRegisterLocation(F0); } @@ -136,10 +136,10 @@ class ParallelMoveResolverMIPS64 : public ParallelMoveResolverWithSwap { ParallelMoveResolverMIPS64(ArenaAllocator* allocator, CodeGeneratorMIPS64* codegen) : ParallelMoveResolverWithSwap(allocator), codegen_(codegen) {} - void EmitMove(size_t index) OVERRIDE; - void EmitSwap(size_t index) OVERRIDE; - void SpillScratch(int reg) OVERRIDE; - void RestoreScratch(int reg) OVERRIDE; + void EmitMove(size_t index) override; + void EmitSwap(size_t index) override; + void SpillScratch(int reg) override; + void RestoreScratch(int reg) override; void Exchange(int index1, int index2, bool double_slot); void ExchangeQuadSlots(int index1, int index2); @@ -173,14 +173,14 @@ class LocationsBuilderMIPS64 : public HGraphVisitor { : HGraphVisitor(graph), codegen_(codegen) {} #define DECLARE_VISIT_INSTRUCTION(name, super) \ - void Visit##name(H##name* instr) OVERRIDE; + void Visit##name(H##name* instr) override; FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION) FOR_EACH_CONCRETE_INSTRUCTION_MIPS64(DECLARE_VISIT_INSTRUCTION) #undef DECLARE_VISIT_INSTRUCTION - void VisitInstruction(HInstruction* instruction) OVERRIDE { + void VisitInstruction(HInstruction* instruction) override { LOG(FATAL) << "Unreachable instruction " << instruction->DebugName() << " (id " << instruction->GetId() << ")"; } @@ -207,14 +207,14 @@ class InstructionCodeGeneratorMIPS64 : public InstructionCodeGenerator { InstructionCodeGeneratorMIPS64(HGraph* graph, CodeGeneratorMIPS64* codegen); #define DECLARE_VISIT_INSTRUCTION(name, super) \ - void Visit##name(H##name* instr) OVERRIDE; + void Visit##name(H##name* instr) override; FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION) FOR_EACH_CONCRETE_INSTRUCTION_MIPS64(DECLARE_VISIT_INSTRUCTION) #undef DECLARE_VISIT_INSTRUCTION - void VisitInstruction(HInstruction* instruction) OVERRIDE { + void VisitInstruction(HInstruction* instruction) override { LOG(FATAL) << "Unreachable instruction " << instruction->DebugName() << " (id " << instruction->GetId() << ")"; } @@ -233,6 +233,7 @@ class InstructionCodeGeneratorMIPS64 : public InstructionCodeGenerator { private: void GenerateClassInitializationCheck(SlowPathCodeMIPS64* slow_path, GpuRegister class_reg); + void GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, GpuRegister temp); void GenerateSuspendCheck(HSuspendCheck* check, HBasicBlock* successor); void HandleBinaryOp(HBinaryOperation* operation); void HandleCondition(HCondition* instruction); @@ -242,6 +243,10 @@ class InstructionCodeGeneratorMIPS64 : public InstructionCodeGenerator { bool value_can_be_null); void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); + void GenerateMinMaxInt(LocationSummary* locations, bool is_min); + void GenerateMinMaxFP(LocationSummary* locations, bool is_min, DataType::Type type); + void GenerateMinMax(HBinaryOperation* minmax, bool is_min); + // Generate a heap reference load using one register `out`: // // out <- *(out + offset) @@ -347,36 +352,35 @@ class InstructionCodeGeneratorMIPS64 : public InstructionCodeGenerator { class CodeGeneratorMIPS64 : public CodeGenerator { public: CodeGeneratorMIPS64(HGraph* graph, - const Mips64InstructionSetFeatures& isa_features, const CompilerOptions& compiler_options, OptimizingCompilerStats* stats = nullptr); virtual ~CodeGeneratorMIPS64() {} - void GenerateFrameEntry() OVERRIDE; - void GenerateFrameExit() OVERRIDE; + void GenerateFrameEntry() override; + void GenerateFrameExit() override; - void Bind(HBasicBlock* block) OVERRIDE; + void Bind(HBasicBlock* block) override; - size_t GetWordSize() const OVERRIDE { return kMips64DoublewordSize; } + size_t GetWordSize() const override { return kMips64DoublewordSize; } - size_t GetFloatingPointSpillSlotSize() const OVERRIDE { + size_t GetFloatingPointSpillSlotSize() const override { return GetGraph()->HasSIMD() ? 2 * kMips64DoublewordSize // 16 bytes for each spill. : 1 * kMips64DoublewordSize; // 8 bytes for each spill. } - uintptr_t GetAddressOf(HBasicBlock* block) OVERRIDE { + uintptr_t GetAddressOf(HBasicBlock* block) override { return assembler_.GetLabelLocation(GetLabelOf(block)); } - HGraphVisitor* GetLocationBuilder() OVERRIDE { return &location_builder_; } - HGraphVisitor* GetInstructionVisitor() OVERRIDE { return &instruction_visitor_; } - Mips64Assembler* GetAssembler() OVERRIDE { return &assembler_; } - const Mips64Assembler& GetAssembler() const OVERRIDE { return assembler_; } + HGraphVisitor* GetLocationBuilder() override { return &location_builder_; } + HGraphVisitor* GetInstructionVisitor() override { return &instruction_visitor_; } + Mips64Assembler* GetAssembler() override { return &assembler_; } + const Mips64Assembler& GetAssembler() const override { return assembler_; } // Emit linker patches. - void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) OVERRIDE; - void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) OVERRIDE; + void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) override; + void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) override; // Fast path implementation of ReadBarrier::Barrier for a heap // reference field load when Baker's read barriers are used. @@ -467,42 +471,40 @@ class CodeGeneratorMIPS64 : public CodeGenerator { // Register allocation. - void SetupBlockedRegisters() const OVERRIDE; + void SetupBlockedRegisters() const override; - size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; - size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; - size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; - size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; + size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) override; + size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) override; + size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) override; + size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) override; - void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE; - void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE; + void DumpCoreRegister(std::ostream& stream, int reg) const override; + void DumpFloatingPointRegister(std::ostream& stream, int reg) const override; - InstructionSet GetInstructionSet() const OVERRIDE { return InstructionSet::kMips64; } + InstructionSet GetInstructionSet() const override { return InstructionSet::kMips64; } - const Mips64InstructionSetFeatures& GetInstructionSetFeatures() const { - return isa_features_; - } + const Mips64InstructionSetFeatures& GetInstructionSetFeatures() const; Mips64Label* GetLabelOf(HBasicBlock* block) const { return CommonGetLabelOf<Mips64Label>(block_labels_, block); } - void Initialize() OVERRIDE { + void Initialize() override { block_labels_ = CommonInitializeLabels<Mips64Label>(); } // We prefer aligned loads and stores (less code), so spill and restore registers in slow paths // at aligned locations. - uint32_t GetPreferredSlotsAlignment() const OVERRIDE { return kMips64DoublewordSize; } + uint32_t GetPreferredSlotsAlignment() const override { return kMips64DoublewordSize; } - void Finalize(CodeAllocator* allocator) OVERRIDE; + void Finalize(CodeAllocator* allocator) override; // Code generation helpers. - void MoveLocation(Location dst, Location src, DataType::Type dst_type) OVERRIDE; + void MoveLocation(Location dst, Location src, DataType::Type dst_type) override; - void MoveConstant(Location destination, int32_t value) OVERRIDE; + void MoveConstant(Location destination, int32_t value) override; - void AddLocationAsTemp(Location location, LocationSummary* locations) OVERRIDE; + void AddLocationAsTemp(Location location, LocationSummary* locations) override; void SwapLocations(Location loc1, Location loc2, DataType::Type type); @@ -511,7 +513,7 @@ class CodeGeneratorMIPS64 : public CodeGenerator { void InvokeRuntime(QuickEntrypointEnum entrypoint, HInstruction* instruction, uint32_t dex_pc, - SlowPathCode* slow_path = nullptr) OVERRIDE; + SlowPathCode* slow_path = nullptr) override; // Generate code to invoke a runtime entry point, but do not record // PC-related information in a stack map. @@ -521,39 +523,39 @@ class CodeGeneratorMIPS64 : public CodeGenerator { void GenerateInvokeRuntime(int32_t entry_point_offset); - ParallelMoveResolver* GetMoveResolver() OVERRIDE { return &move_resolver_; } + ParallelMoveResolver* GetMoveResolver() override { return &move_resolver_; } - bool NeedsTwoRegisters(DataType::Type type ATTRIBUTE_UNUSED) const OVERRIDE { return false; } + bool NeedsTwoRegisters(DataType::Type type ATTRIBUTE_UNUSED) const override { return false; } // Check if the desired_string_load_kind is supported. If it is, return it, // otherwise return a fall-back kind that should be used instead. HLoadString::LoadKind GetSupportedLoadStringKind( - HLoadString::LoadKind desired_string_load_kind) OVERRIDE; + HLoadString::LoadKind desired_string_load_kind) override; // Check if the desired_class_load_kind is supported. If it is, return it, // otherwise return a fall-back kind that should be used instead. HLoadClass::LoadKind GetSupportedLoadClassKind( - HLoadClass::LoadKind desired_class_load_kind) OVERRIDE; + HLoadClass::LoadKind desired_class_load_kind) override; // Check if the desired_dispatch_info is supported. If it is, return it, // otherwise return a fall-back info that should be used instead. HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch( const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, - HInvokeStaticOrDirect* invoke) OVERRIDE; + ArtMethod* method) override; void GenerateStaticOrDirectCall( - HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE; + HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path = nullptr) override; void GenerateVirtualCall( - HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE; + HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) override; void MoveFromReturnRegister(Location trg ATTRIBUTE_UNUSED, - DataType::Type type ATTRIBUTE_UNUSED) OVERRIDE { + DataType::Type type ATTRIBUTE_UNUSED) override { UNIMPLEMENTED(FATAL) << "Not implemented on MIPS64"; } - void GenerateNop() OVERRIDE; - void GenerateImplicitNullCheck(HNullCheck* instruction) OVERRIDE; - void GenerateExplicitNullCheck(HNullCheck* instruction) OVERRIDE; + void GenerateNop() override; + void GenerateImplicitNullCheck(HNullCheck* instruction) override; + void GenerateExplicitNullCheck(HNullCheck* instruction) override; // The PcRelativePatchInfo is used for PC-relative addressing of methods/strings/types, // whether through .data.bimg.rel.ro, .bss, or directly in the boot image. @@ -586,6 +588,10 @@ class CodeGeneratorMIPS64 : public CodeGenerator { DISALLOW_COPY_AND_ASSIGN(PcRelativePatchInfo); }; + PcRelativePatchInfo* NewBootImageIntrinsicPatch(uint32_t intrinsic_data, + const PcRelativePatchInfo* info_high = nullptr); + PcRelativePatchInfo* NewBootImageRelRoPatch(uint32_t boot_image_offset, + const PcRelativePatchInfo* info_high = nullptr); PcRelativePatchInfo* NewBootImageMethodPatch(MethodReference target_method, const PcRelativePatchInfo* info_high = nullptr); PcRelativePatchInfo* NewMethodBssEntryPatch(MethodReference target_method, @@ -608,6 +614,9 @@ class CodeGeneratorMIPS64 : public CodeGenerator { GpuRegister out, PcRelativePatchInfo* info_low = nullptr); + void LoadBootImageAddress(GpuRegister reg, uint32_t boot_image_reference); + void AllocateInstanceForIntrinsic(HInvokeStaticOrDirect* invoke, uint32_t boot_image_offset); + void PatchJitRootUse(uint8_t* code, const uint8_t* roots_data, const Literal* literal, @@ -648,14 +657,14 @@ class CodeGeneratorMIPS64 : public CodeGenerator { InstructionCodeGeneratorMIPS64 instruction_visitor_; ParallelMoveResolverMIPS64 move_resolver_; Mips64Assembler assembler_; - const Mips64InstructionSetFeatures& isa_features_; // Deduplication map for 32-bit literals, used for non-patchable boot image addresses. Uint32ToLiteralMap uint32_literals_; // Deduplication map for 64-bit literals, used for non-patchable method address or method code // address. Uint64ToLiteralMap uint64_literals_; - // PC-relative method patch info for kBootImageLinkTimePcRelative. + // PC-relative method patch info for kBootImageLinkTimePcRelative/kBootImageRelRo. + // Also used for type/string patches for kBootImageRelRo (same linker patch as for methods). ArenaDeque<PcRelativePatchInfo> boot_image_method_patches_; // PC-relative method patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> method_bss_entry_patches_; @@ -663,10 +672,12 @@ class CodeGeneratorMIPS64 : public CodeGenerator { ArenaDeque<PcRelativePatchInfo> boot_image_type_patches_; // PC-relative type patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_; - // PC-relative String patch info; type depends on configuration (intern table or boot image PIC). + // PC-relative String patch info for kBootImageLinkTimePcRelative. ArenaDeque<PcRelativePatchInfo> boot_image_string_patches_; // PC-relative type patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> string_bss_entry_patches_; + // PC-relative patch info for IntrinsicObjects. + ArenaDeque<PcRelativePatchInfo> boot_image_intrinsic_patches_; // Patches for string root accesses in JIT compiled code. StringToLiteralMap jit_string_patches_; diff --git a/compiler/optimizing/code_generator_vector_arm64.cc b/compiler/optimizing/code_generator_vector_arm64.cc index 1cfdf54816..df95c88c07 100644 --- a/compiler/optimizing/code_generator_vector_arm64.cc +++ b/compiler/optimizing/code_generator_vector_arm64.cc @@ -16,6 +16,7 @@ #include "code_generator_arm64.h" +#include "arch/arm64/instruction_set_features_arm64.h" #include "mirror/array-inl.h" #include "mirror/string.h" @@ -29,7 +30,7 @@ using helpers::Arm64CanEncodeConstantAsImmediate; using helpers::DRegisterFrom; using helpers::HeapOperand; using helpers::InputRegisterAt; -using helpers::Int64ConstantFrom; +using helpers::Int64FromLocation; using helpers::OutputRegister; using helpers::VRegisterFrom; using helpers::WRegisterFrom; @@ -37,6 +38,15 @@ using helpers::XRegisterFrom; #define __ GetVIXLAssembler()-> +// Build-time switch for Armv8.4-a dot product instructions. +// TODO: Enable dot product when there is a device to test it on. +static constexpr bool kArm64EmitDotProdInstructions = false; + +// Returns whether dot product instructions should be emitted. +static bool ShouldEmitDotProductInstructions(const CodeGeneratorARM64* codegen_) { + return kArm64EmitDotProdInstructions && codegen_->GetInstructionSetFeatures().HasDotProd(); +} + void LocationsBuilderARM64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) { LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); HInstruction* input = instruction->InputAt(0); @@ -63,7 +73,7 @@ void LocationsBuilderARM64::VisitVecReplicateScalar(HVecReplicateScalar* instruc } break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -78,7 +88,7 @@ void InstructionCodeGeneratorARM64::VisitVecReplicateScalar(HVecReplicateScalar* case DataType::Type::kInt8: DCHECK_EQ(16u, instruction->GetVectorLength()); if (src_loc.IsConstant()) { - __ Movi(dst.V16B(), Int64ConstantFrom(src_loc)); + __ Movi(dst.V16B(), Int64FromLocation(src_loc)); } else { __ Dup(dst.V16B(), InputRegisterAt(instruction, 0)); } @@ -87,7 +97,7 @@ void InstructionCodeGeneratorARM64::VisitVecReplicateScalar(HVecReplicateScalar* case DataType::Type::kInt16: DCHECK_EQ(8u, instruction->GetVectorLength()); if (src_loc.IsConstant()) { - __ Movi(dst.V8H(), Int64ConstantFrom(src_loc)); + __ Movi(dst.V8H(), Int64FromLocation(src_loc)); } else { __ Dup(dst.V8H(), InputRegisterAt(instruction, 0)); } @@ -95,7 +105,7 @@ void InstructionCodeGeneratorARM64::VisitVecReplicateScalar(HVecReplicateScalar* case DataType::Type::kInt32: DCHECK_EQ(4u, instruction->GetVectorLength()); if (src_loc.IsConstant()) { - __ Movi(dst.V4S(), Int64ConstantFrom(src_loc)); + __ Movi(dst.V4S(), Int64FromLocation(src_loc)); } else { __ Dup(dst.V4S(), InputRegisterAt(instruction, 0)); } @@ -103,7 +113,7 @@ void InstructionCodeGeneratorARM64::VisitVecReplicateScalar(HVecReplicateScalar* case DataType::Type::kInt64: DCHECK_EQ(2u, instruction->GetVectorLength()); if (src_loc.IsConstant()) { - __ Movi(dst.V2D(), Int64ConstantFrom(src_loc)); + __ Movi(dst.V2D(), Int64FromLocation(src_loc)); } else { __ Dup(dst.V2D(), XRegisterFrom(src_loc)); } @@ -125,7 +135,7 @@ void InstructionCodeGeneratorARM64::VisitVecReplicateScalar(HVecReplicateScalar* } break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -149,7 +159,7 @@ void LocationsBuilderARM64::VisitVecExtractScalar(HVecExtractScalar* instruction locations->SetOut(Location::SameAsFirstInput()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -173,7 +183,7 @@ void InstructionCodeGeneratorARM64::VisitVecExtractScalar(HVecExtractScalar* ins DCHECK(locations->InAt(0).Equals(locations->Out())); // no code required break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -200,7 +210,7 @@ static void CreateVecUnOpLocations(ArenaAllocator* allocator, HVecUnaryOperation locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -216,7 +226,7 @@ void InstructionCodeGeneratorARM64::VisitVecReduce(HVecReduce* instruction) { switch (instruction->GetPackedType()) { case DataType::Type::kInt32: DCHECK_EQ(4u, instruction->GetVectorLength()); - switch (instruction->GetKind()) { + switch (instruction->GetReductionKind()) { case HVecReduce::kSum: __ Addv(dst.S(), src.V4S()); break; @@ -230,7 +240,7 @@ void InstructionCodeGeneratorARM64::VisitVecReduce(HVecReduce* instruction) { break; case DataType::Type::kInt64: DCHECK_EQ(2u, instruction->GetVectorLength()); - switch (instruction->GetKind()) { + switch (instruction->GetReductionKind()) { case HVecReduce::kSum: __ Addp(dst.D(), src.V2D()); break; @@ -240,7 +250,7 @@ void InstructionCodeGeneratorARM64::VisitVecReduce(HVecReduce* instruction) { } break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -259,7 +269,7 @@ void InstructionCodeGeneratorARM64::VisitVecCnv(HVecCnv* instruction) { DCHECK_EQ(4u, instruction->GetVectorLength()); __ Scvtf(dst.V4S(), src.V4S()); } else { - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); } } @@ -299,7 +309,7 @@ void InstructionCodeGeneratorARM64::VisitVecNeg(HVecNeg* instruction) { __ Fneg(dst.V2D(), src.V2D()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -338,7 +348,7 @@ void InstructionCodeGeneratorARM64::VisitVecAbs(HVecAbs* instruction) { __ Fabs(dst.V2D(), src.V2D()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -366,7 +376,7 @@ void InstructionCodeGeneratorARM64::VisitVecNot(HVecNot* instruction) { __ Not(dst.V16B(), src.V16B()); // lanes do not matter break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -389,7 +399,7 @@ static void CreateVecBinOpLocations(ArenaAllocator* allocator, HVecBinaryOperati locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -431,7 +441,39 @@ void InstructionCodeGeneratorARM64::VisitVecAdd(HVecAdd* instruction) { __ Fadd(dst.V2D(), lhs.V2D(), rhs.V2D()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); + UNREACHABLE(); + } +} + +void LocationsBuilderARM64::VisitVecSaturationAdd(HVecSaturationAdd* instruction) { + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); +} + +void InstructionCodeGeneratorARM64::VisitVecSaturationAdd(HVecSaturationAdd* instruction) { + LocationSummary* locations = instruction->GetLocations(); + VRegister lhs = VRegisterFrom(locations->InAt(0)); + VRegister rhs = VRegisterFrom(locations->InAt(1)); + VRegister dst = VRegisterFrom(locations->Out()); + switch (instruction->GetPackedType()) { + case DataType::Type::kUint8: + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ Uqadd(dst.V16B(), lhs.V16B(), rhs.V16B()); + break; + case DataType::Type::kInt8: + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ Sqadd(dst.V16B(), lhs.V16B(), rhs.V16B()); + break; + case DataType::Type::kUint16: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ Uqadd(dst.V8H(), lhs.V8H(), rhs.V8H()); + break; + case DataType::Type::kInt16: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ Sqadd(dst.V8H(), lhs.V8H(), rhs.V8H()); + break; + default: + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -471,7 +513,7 @@ void InstructionCodeGeneratorARM64::VisitVecHalvingAdd(HVecHalvingAdd* instructi : __ Shadd(dst.V8H(), lhs.V8H(), rhs.V8H()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -513,7 +555,39 @@ void InstructionCodeGeneratorARM64::VisitVecSub(HVecSub* instruction) { __ Fsub(dst.V2D(), lhs.V2D(), rhs.V2D()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); + UNREACHABLE(); + } +} + +void LocationsBuilderARM64::VisitVecSaturationSub(HVecSaturationSub* instruction) { + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); +} + +void InstructionCodeGeneratorARM64::VisitVecSaturationSub(HVecSaturationSub* instruction) { + LocationSummary* locations = instruction->GetLocations(); + VRegister lhs = VRegisterFrom(locations->InAt(0)); + VRegister rhs = VRegisterFrom(locations->InAt(1)); + VRegister dst = VRegisterFrom(locations->Out()); + switch (instruction->GetPackedType()) { + case DataType::Type::kUint8: + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ Uqsub(dst.V16B(), lhs.V16B(), rhs.V16B()); + break; + case DataType::Type::kInt8: + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ Sqsub(dst.V16B(), lhs.V16B(), rhs.V16B()); + break; + case DataType::Type::kUint16: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ Uqsub(dst.V8H(), lhs.V8H(), rhs.V8H()); + break; + case DataType::Type::kInt16: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ Sqsub(dst.V8H(), lhs.V8H(), rhs.V8H()); + break; + default: + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -551,7 +625,7 @@ void InstructionCodeGeneratorARM64::VisitVecMul(HVecMul* instruction) { __ Fmul(dst.V2D(), lhs.V2D(), rhs.V2D()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -575,7 +649,7 @@ void InstructionCodeGeneratorARM64::VisitVecDiv(HVecDiv* instruction) { __ Fdiv(dst.V2D(), lhs.V2D(), rhs.V2D()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -623,7 +697,7 @@ void InstructionCodeGeneratorARM64::VisitVecMin(HVecMin* instruction) { __ Fmin(dst.V2D(), lhs.V2D(), rhs.V2D()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -671,7 +745,7 @@ void InstructionCodeGeneratorARM64::VisitVecMax(HVecMax* instruction) { __ Fmax(dst.V2D(), lhs.V2D(), rhs.V2D()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -699,7 +773,7 @@ void InstructionCodeGeneratorARM64::VisitVecAnd(HVecAnd* instruction) { __ And(dst.V16B(), lhs.V16B(), rhs.V16B()); // lanes do not matter break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -735,7 +809,7 @@ void InstructionCodeGeneratorARM64::VisitVecOr(HVecOr* instruction) { __ Orr(dst.V16B(), lhs.V16B(), rhs.V16B()); // lanes do not matter break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -762,7 +836,7 @@ void InstructionCodeGeneratorARM64::VisitVecXor(HVecXor* instruction) { __ Eor(dst.V16B(), lhs.V16B(), rhs.V16B()); // lanes do not matter break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -782,7 +856,7 @@ static void CreateVecShiftLocations(ArenaAllocator* allocator, HVecBinaryOperati locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -816,7 +890,7 @@ void InstructionCodeGeneratorARM64::VisitVecShl(HVecShl* instruction) { __ Shl(dst.V2D(), lhs.V2D(), value); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -850,7 +924,7 @@ void InstructionCodeGeneratorARM64::VisitVecShr(HVecShr* instruction) { __ Sshr(dst.V2D(), lhs.V2D(), value); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -884,7 +958,7 @@ void InstructionCodeGeneratorARM64::VisitVecUShr(HVecUShr* instruction) { __ Ushr(dst.V2D(), lhs.V2D(), value); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -916,7 +990,7 @@ void LocationsBuilderARM64::VisitVecSetScalars(HVecSetScalars* instruction) { locations->SetOut(Location::RequiresFpuRegister()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -957,7 +1031,7 @@ void InstructionCodeGeneratorARM64::VisitVecSetScalars(HVecSetScalars* instructi __ Mov(dst.V2D(), 0, InputRegisterAt(instruction, 0)); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -978,7 +1052,7 @@ static void CreateVecAccumLocations(ArenaAllocator* allocator, HVecOperation* in locations->SetOut(Location::SameAsFirstInput()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -1026,7 +1100,7 @@ void InstructionCodeGeneratorARM64::VisitVecMultiplyAccumulate(HVecMultiplyAccum } break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -1139,7 +1213,7 @@ void InstructionCodeGeneratorARM64::VisitVecSADAccumulate(HVecSADAccumulate* ins break; } default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } break; @@ -1167,7 +1241,7 @@ void InstructionCodeGeneratorARM64::VisitVecSADAccumulate(HVecSADAccumulate* ins break; } default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } break; @@ -1188,7 +1262,7 @@ void InstructionCodeGeneratorARM64::VisitVecSADAccumulate(HVecSADAccumulate* ins __ Sabal2(acc.V2D(), left.V4S(), right.V4S()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } break; @@ -1204,12 +1278,88 @@ void InstructionCodeGeneratorARM64::VisitVecSADAccumulate(HVecSADAccumulate* ins break; } default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); + } +} + +void LocationsBuilderARM64::VisitVecDotProd(HVecDotProd* instruction) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); + DCHECK(instruction->GetPackedType() == DataType::Type::kInt32); + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetInAt(2, Location::RequiresFpuRegister()); + locations->SetOut(Location::SameAsFirstInput()); + + // For Int8 and Uint8 general case we need a temp register. + if ((DataType::Size(instruction->InputAt(1)->AsVecOperation()->GetPackedType()) == 1) && + !ShouldEmitDotProductInstructions(codegen_)) { + locations->AddTemp(Location::RequiresFpuRegister()); + } +} + +void InstructionCodeGeneratorARM64::VisitVecDotProd(HVecDotProd* instruction) { + LocationSummary* locations = instruction->GetLocations(); + DCHECK(locations->InAt(0).Equals(locations->Out())); + VRegister acc = VRegisterFrom(locations->InAt(0)); + VRegister left = VRegisterFrom(locations->InAt(1)); + VRegister right = VRegisterFrom(locations->InAt(2)); + HVecOperation* a = instruction->InputAt(1)->AsVecOperation(); + HVecOperation* b = instruction->InputAt(2)->AsVecOperation(); + DCHECK_EQ(HVecOperation::ToSignedType(a->GetPackedType()), + HVecOperation::ToSignedType(b->GetPackedType())); + DCHECK_EQ(instruction->GetPackedType(), DataType::Type::kInt32); + DCHECK_EQ(4u, instruction->GetVectorLength()); + + size_t inputs_data_size = DataType::Size(a->GetPackedType()); + switch (inputs_data_size) { + case 1u: { + DCHECK_EQ(16u, a->GetVectorLength()); + if (instruction->IsZeroExtending()) { + if (ShouldEmitDotProductInstructions(codegen_)) { + __ Udot(acc.V4S(), left.V16B(), right.V16B()); + } else { + VRegister tmp = VRegisterFrom(locations->GetTemp(0)); + __ Umull(tmp.V8H(), left.V8B(), right.V8B()); + __ Uaddw(acc.V4S(), acc.V4S(), tmp.V4H()); + __ Uaddw2(acc.V4S(), acc.V4S(), tmp.V8H()); + + __ Umull2(tmp.V8H(), left.V16B(), right.V16B()); + __ Uaddw(acc.V4S(), acc.V4S(), tmp.V4H()); + __ Uaddw2(acc.V4S(), acc.V4S(), tmp.V8H()); + } + } else { + if (ShouldEmitDotProductInstructions(codegen_)) { + __ Sdot(acc.V4S(), left.V16B(), right.V16B()); + } else { + VRegister tmp = VRegisterFrom(locations->GetTemp(0)); + __ Smull(tmp.V8H(), left.V8B(), right.V8B()); + __ Saddw(acc.V4S(), acc.V4S(), tmp.V4H()); + __ Saddw2(acc.V4S(), acc.V4S(), tmp.V8H()); + + __ Smull2(tmp.V8H(), left.V16B(), right.V16B()); + __ Saddw(acc.V4S(), acc.V4S(), tmp.V4H()); + __ Saddw2(acc.V4S(), acc.V4S(), tmp.V8H()); + } + } + break; + } + case 2u: + DCHECK_EQ(8u, a->GetVectorLength()); + if (instruction->IsZeroExtending()) { + __ Umlal(acc.V4S(), left.V4H(), right.V4H()); + __ Umlal2(acc.V4S(), left.V8H(), right.V8H()); + } else { + __ Smlal(acc.V4S(), left.V4H(), right.V4H()); + __ Smlal2(acc.V4S(), left.V8H(), right.V8H()); + } + break; + default: + LOG(FATAL) << "Unsupported SIMD type size: " << inputs_data_size; } } @@ -1237,7 +1387,7 @@ static void CreateVecMemLocations(ArenaAllocator* allocator, } break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -1269,7 +1419,7 @@ MemOperand InstructionCodeGeneratorARM64::VecAddress( DCHECK(!instruction->InputAt(0)->IsIntermediateAddress()); if (index.IsConstant()) { - offset += Int64ConstantFrom(index) << shift; + offset += Int64FromLocation(index) << shift; return HeapOperand(base, offset); } else { *scratch = temps_scope->AcquireSameSizeAs(base); @@ -1331,7 +1481,7 @@ void InstructionCodeGeneratorARM64::VisitVecLoad(HVecLoad* instruction) { __ Ldr(reg, VecAddress(instruction, &temps, size, instruction->IsStringCharAt(), &scratch)); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -1362,7 +1512,7 @@ void InstructionCodeGeneratorARM64::VisitVecStore(HVecStore* instruction) { __ Str(reg, VecAddress(instruction, &temps, size, /*is_string_char_at*/ false, &scratch)); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } diff --git a/compiler/optimizing/code_generator_vector_arm_vixl.cc b/compiler/optimizing/code_generator_vector_arm_vixl.cc index 7c3155ab73..b092961a56 100644 --- a/compiler/optimizing/code_generator_vector_arm_vixl.cc +++ b/compiler/optimizing/code_generator_vector_arm_vixl.cc @@ -46,7 +46,7 @@ void LocationsBuilderARMVIXL::VisitVecReplicateScalar(HVecReplicateScalar* instr locations->SetOut(Location::RequiresFpuRegister()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -71,7 +71,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecReplicateScalar(HVecReplicateScala __ Vdup(Untyped32, dst, InputRegisterAt(instruction, 0)); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -84,7 +84,7 @@ void LocationsBuilderARMVIXL::VisitVecExtractScalar(HVecExtractScalar* instructi locations->SetOut(Location::RequiresRegister()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -98,7 +98,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecExtractScalar(HVecExtractScalar* i __ Vmov(OutputRegister(instruction), DRegisterLane(src, 0)); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -122,7 +122,7 @@ static void CreateVecUnOpLocations(ArenaAllocator* allocator, HVecUnaryOperation locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -138,7 +138,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecReduce(HVecReduce* instruction) { switch (instruction->GetPackedType()) { case DataType::Type::kInt32: DCHECK_EQ(2u, instruction->GetVectorLength()); - switch (instruction->GetKind()) { + switch (instruction->GetReductionKind()) { case HVecReduce::kSum: __ Vpadd(DataTypeValue::I32, dst, src, src); break; @@ -151,7 +151,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecReduce(HVecReduce* instruction) { } break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -188,7 +188,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecNeg(HVecNeg* instruction) { __ Vneg(DataTypeValue::S32, dst, src); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -215,7 +215,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecAbs(HVecAbs* instruction) { __ Vabs(DataTypeValue::S32, dst, src); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -242,7 +242,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecNot(HVecNot* instruction) { __ Vmvn(I8, dst, src); // lanes do not matter break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -262,7 +262,7 @@ static void CreateVecBinOpLocations(ArenaAllocator* allocator, HVecBinaryOperati locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -292,7 +292,39 @@ void InstructionCodeGeneratorARMVIXL::VisitVecAdd(HVecAdd* instruction) { __ Vadd(I32, dst, lhs, rhs); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); + UNREACHABLE(); + } +} + +void LocationsBuilderARMVIXL::VisitVecSaturationAdd(HVecSaturationAdd* instruction) { + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); +} + +void InstructionCodeGeneratorARMVIXL::VisitVecSaturationAdd(HVecSaturationAdd* instruction) { + LocationSummary* locations = instruction->GetLocations(); + vixl32::DRegister lhs = DRegisterFrom(locations->InAt(0)); + vixl32::DRegister rhs = DRegisterFrom(locations->InAt(1)); + vixl32::DRegister dst = DRegisterFrom(locations->Out()); + switch (instruction->GetPackedType()) { + case DataType::Type::kUint8: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ Vqadd(DataTypeValue::U8, dst, lhs, rhs); + break; + case DataType::Type::kInt8: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ Vqadd(DataTypeValue::S8, dst, lhs, rhs); + break; + case DataType::Type::kUint16: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ Vqadd(DataTypeValue::U16, dst, lhs, rhs); + break; + case DataType::Type::kInt16: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ Vqadd(DataTypeValue::S16, dst, lhs, rhs); + break; + default: + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -332,7 +364,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecHalvingAdd(HVecHalvingAdd* instruc : __ Vhadd(DataTypeValue::S16, dst, lhs, rhs); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -362,7 +394,39 @@ void InstructionCodeGeneratorARMVIXL::VisitVecSub(HVecSub* instruction) { __ Vsub(I32, dst, lhs, rhs); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); + UNREACHABLE(); + } +} + +void LocationsBuilderARMVIXL::VisitVecSaturationSub(HVecSaturationSub* instruction) { + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); +} + +void InstructionCodeGeneratorARMVIXL::VisitVecSaturationSub(HVecSaturationSub* instruction) { + LocationSummary* locations = instruction->GetLocations(); + vixl32::DRegister lhs = DRegisterFrom(locations->InAt(0)); + vixl32::DRegister rhs = DRegisterFrom(locations->InAt(1)); + vixl32::DRegister dst = DRegisterFrom(locations->Out()); + switch (instruction->GetPackedType()) { + case DataType::Type::kUint8: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ Vqsub(DataTypeValue::U8, dst, lhs, rhs); + break; + case DataType::Type::kInt8: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ Vqsub(DataTypeValue::S8, dst, lhs, rhs); + break; + case DataType::Type::kUint16: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ Vqsub(DataTypeValue::U16, dst, lhs, rhs); + break; + case DataType::Type::kInt16: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ Vqsub(DataTypeValue::S16, dst, lhs, rhs); + break; + default: + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -392,7 +456,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecMul(HVecMul* instruction) { __ Vmul(I32, dst, lhs, rhs); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -440,7 +504,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecMin(HVecMin* instruction) { __ Vmin(DataTypeValue::S32, dst, lhs, rhs); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -480,7 +544,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecMax(HVecMax* instruction) { __ Vmax(DataTypeValue::S32, dst, lhs, rhs); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -505,7 +569,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecAnd(HVecAnd* instruction) { __ Vand(I8, dst, lhs, rhs); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -537,7 +601,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecOr(HVecOr* instruction) { __ Vorr(I8, dst, lhs, rhs); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -561,7 +625,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecXor(HVecXor* instruction) { __ Veor(I8, dst, lhs, rhs); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -580,7 +644,7 @@ static void CreateVecShiftLocations(ArenaAllocator* allocator, HVecBinaryOperati locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -610,7 +674,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecShl(HVecShl* instruction) { __ Vshl(I32, dst, lhs, value); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -640,7 +704,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecShr(HVecShr* instruction) { __ Vshr(DataTypeValue::S32, dst, lhs, value); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -670,7 +734,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecUShr(HVecUShr* instruction) { __ Vshr(DataTypeValue::U32, dst, lhs, value); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -690,7 +754,7 @@ void LocationsBuilderARMVIXL::VisitVecSetScalars(HVecSetScalars* instruction) { locations->SetOut(Location::RequiresFpuRegister()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -716,7 +780,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecSetScalars(HVecSetScalars* instruc __ Vmov(Untyped32, DRegisterLane(dst, 0), InputRegisterAt(instruction, 0)); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -737,7 +801,7 @@ static void CreateVecAccumLocations(ArenaAllocator* allocator, HVecOperation* in locations->SetOut(Location::SameAsFirstInput()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -780,16 +844,24 @@ void InstructionCodeGeneratorARMVIXL::VisitVecSADAccumulate(HVecSADAccumulate* i break; } default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } +void LocationsBuilderARMVIXL::VisitVecDotProd(HVecDotProd* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void InstructionCodeGeneratorARMVIXL::VisitVecDotProd(HVecDotProd* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + // Return whether the vector memory access operation is guaranteed to be word-aligned (ARM word // size equals to 4). static bool IsWordAligned(HVecMemoryOperation* instruction) { @@ -817,7 +889,7 @@ static void CreateVecMemLocations(ArenaAllocator* allocator, } break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -923,7 +995,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecLoad(HVecLoad* instruction) { } break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -971,7 +1043,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecStore(HVecStore* instruction) { } break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } diff --git a/compiler/optimizing/code_generator_vector_mips.cc b/compiler/optimizing/code_generator_vector_mips.cc index ed9de96496..4e9ba0d3d2 100644 --- a/compiler/optimizing/code_generator_vector_mips.cc +++ b/compiler/optimizing/code_generator_vector_mips.cc @@ -42,7 +42,7 @@ void LocationsBuilderMIPS::VisitVecReplicateScalar(HVecReplicateScalar* instruct locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -74,22 +74,22 @@ void InstructionCodeGeneratorMIPS::VisitVecReplicateScalar(HVecReplicateScalar* __ InsertW(static_cast<VectorRegister>(FTMP), locations->InAt(0).AsRegisterPairHigh<Register>(), 1); - __ ReplicateFPToVectorRegister(dst, FTMP, /* is_double */ true); + __ ReplicateFPToVectorRegister(dst, FTMP, /* is_double= */ true); break; case DataType::Type::kFloat32: DCHECK_EQ(4u, instruction->GetVectorLength()); __ ReplicateFPToVectorRegister(dst, locations->InAt(0).AsFpuRegister<FRegister>(), - /* is_double */ false); + /* is_double= */ false); break; case DataType::Type::kFloat64: DCHECK_EQ(2u, instruction->GetVectorLength()); __ ReplicateFPToVectorRegister(dst, locations->InAt(0).AsFpuRegister<FRegister>(), - /* is_double */ true); + /* is_double= */ true); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -113,7 +113,7 @@ void LocationsBuilderMIPS::VisitVecExtractScalar(HVecExtractScalar* instruction) locations->SetOut(Location::SameAsFirstInput()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -138,7 +138,7 @@ void InstructionCodeGeneratorMIPS::VisitVecExtractScalar(HVecExtractScalar* inst DCHECK(locations->InAt(0).Equals(locations->Out())); // no code required break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -170,7 +170,7 @@ static void CreateVecUnOpLocations(ArenaAllocator* allocator, HVecUnaryOperation : Location::kNoOutputOverlap); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -187,7 +187,7 @@ void InstructionCodeGeneratorMIPS::VisitVecReduce(HVecReduce* instruction) { switch (instruction->GetPackedType()) { case DataType::Type::kInt32: DCHECK_EQ(4u, instruction->GetVectorLength()); - switch (instruction->GetKind()) { + switch (instruction->GetReductionKind()) { case HVecReduce::kSum: __ Hadd_sD(tmp, src, src); __ IlvlD(dst, tmp, tmp); @@ -209,7 +209,7 @@ void InstructionCodeGeneratorMIPS::VisitVecReduce(HVecReduce* instruction) { break; case DataType::Type::kInt64: DCHECK_EQ(2u, instruction->GetVectorLength()); - switch (instruction->GetKind()) { + switch (instruction->GetReductionKind()) { case HVecReduce::kSum: __ IlvlD(dst, src, src); __ AddvD(dst, dst, src); @@ -225,7 +225,7 @@ void InstructionCodeGeneratorMIPS::VisitVecReduce(HVecReduce* instruction) { } break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -244,7 +244,7 @@ void InstructionCodeGeneratorMIPS::VisitVecCnv(HVecCnv* instruction) { DCHECK_EQ(4u, instruction->GetVectorLength()); __ Ffint_sW(dst, src); } else { - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); } } @@ -290,7 +290,7 @@ void InstructionCodeGeneratorMIPS::VisitVecNeg(HVecNeg* instruction) { __ FsubD(dst, dst, src); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -337,7 +337,7 @@ void InstructionCodeGeneratorMIPS::VisitVecAbs(HVecAbs* instruction) { __ AndV(dst, dst, src); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -369,7 +369,7 @@ void InstructionCodeGeneratorMIPS::VisitVecNot(HVecNot* instruction) { __ NorV(dst, src, src); // lanes do not matter break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -392,7 +392,7 @@ static void CreateVecBinOpLocations(ArenaAllocator* allocator, HVecBinaryOperati locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -434,11 +434,19 @@ void InstructionCodeGeneratorMIPS::VisitVecAdd(HVecAdd* instruction) { __ FaddD(dst, lhs, rhs); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } +void LocationsBuilderMIPS::VisitVecSaturationAdd(HVecSaturationAdd* instruction) { + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); +} + +void InstructionCodeGeneratorMIPS::VisitVecSaturationAdd(HVecSaturationAdd* instruction) { + LOG(FATAL) << "Unsupported SIMD " << instruction->GetId(); +} + void LocationsBuilderMIPS::VisitVecHalvingAdd(HVecHalvingAdd* instruction) { CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); } @@ -474,7 +482,7 @@ void InstructionCodeGeneratorMIPS::VisitVecHalvingAdd(HVecHalvingAdd* instructio : __ Ave_sH(dst, lhs, rhs); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -516,11 +524,19 @@ void InstructionCodeGeneratorMIPS::VisitVecSub(HVecSub* instruction) { __ FsubD(dst, lhs, rhs); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } +void LocationsBuilderMIPS::VisitVecSaturationSub(HVecSaturationSub* instruction) { + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); +} + +void InstructionCodeGeneratorMIPS::VisitVecSaturationSub(HVecSaturationSub* instruction) { + LOG(FATAL) << "Unsupported SIMD " << instruction->GetId(); +} + void LocationsBuilderMIPS::VisitVecMul(HVecMul* instruction) { CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); } @@ -558,7 +574,7 @@ void InstructionCodeGeneratorMIPS::VisitVecMul(HVecMul* instruction) { __ FmulD(dst, lhs, rhs); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -582,7 +598,7 @@ void InstructionCodeGeneratorMIPS::VisitVecDiv(HVecDiv* instruction) { __ FdivD(dst, lhs, rhs); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -640,7 +656,7 @@ void InstructionCodeGeneratorMIPS::VisitVecMin(HVecMin* instruction) { __ FminD(dst, lhs, rhs); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -698,7 +714,7 @@ void InstructionCodeGeneratorMIPS::VisitVecMax(HVecMax* instruction) { __ FmaxD(dst, lhs, rhs); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -727,7 +743,7 @@ void InstructionCodeGeneratorMIPS::VisitVecAnd(HVecAnd* instruction) { __ AndV(dst, lhs, rhs); // lanes do not matter break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -764,7 +780,7 @@ void InstructionCodeGeneratorMIPS::VisitVecOr(HVecOr* instruction) { __ OrV(dst, lhs, rhs); // lanes do not matter break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -793,7 +809,7 @@ void InstructionCodeGeneratorMIPS::VisitVecXor(HVecXor* instruction) { __ XorV(dst, lhs, rhs); // lanes do not matter break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -813,7 +829,7 @@ static void CreateVecShiftLocations(ArenaAllocator* allocator, HVecBinaryOperati locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -847,7 +863,7 @@ void InstructionCodeGeneratorMIPS::VisitVecShl(HVecShl* instruction) { __ SlliD(dst, lhs, value); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -881,7 +897,7 @@ void InstructionCodeGeneratorMIPS::VisitVecShr(HVecShr* instruction) { __ SraiD(dst, lhs, value); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -915,7 +931,7 @@ void InstructionCodeGeneratorMIPS::VisitVecUShr(HVecUShr* instruction) { __ SrliD(dst, lhs, value); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -947,7 +963,7 @@ void LocationsBuilderMIPS::VisitVecSetScalars(HVecSetScalars* instruction) { locations->SetOut(Location::RequiresFpuRegister()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -989,7 +1005,7 @@ void InstructionCodeGeneratorMIPS::VisitVecSetScalars(HVecSetScalars* instructio __ InsertW(dst, locations->InAt(0).AsRegisterPairHigh<Register>(), 1); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -1010,7 +1026,7 @@ static void CreateVecAccumLocations(ArenaAllocator* allocator, HVecOperation* in locations->SetOut(Location::SameAsFirstInput()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -1060,7 +1076,7 @@ void InstructionCodeGeneratorMIPS::VisitVecMultiplyAccumulate(HVecMultiplyAccumu } break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -1162,7 +1178,7 @@ void InstructionCodeGeneratorMIPS::VisitVecSADAccumulate(HVecSADAccumulate* inst break; } default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } break; @@ -1201,7 +1217,7 @@ void InstructionCodeGeneratorMIPS::VisitVecSADAccumulate(HVecSADAccumulate* inst break; } default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } break; @@ -1231,7 +1247,7 @@ void InstructionCodeGeneratorMIPS::VisitVecSADAccumulate(HVecSADAccumulate* inst break; } default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } break; @@ -1247,17 +1263,25 @@ void InstructionCodeGeneratorMIPS::VisitVecSADAccumulate(HVecSADAccumulate* inst break; } default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } break; } default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } +void LocationsBuilderMIPS::VisitVecDotProd(HVecDotProd* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void InstructionCodeGeneratorMIPS::VisitVecDotProd(HVecDotProd* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + // Helper to set up locations for vector memory operations. static void CreateVecMemLocations(ArenaAllocator* allocator, HVecMemoryOperation* instruction, @@ -1282,7 +1306,7 @@ static void CreateVecMemLocations(ArenaAllocator* allocator, } break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -1320,7 +1344,7 @@ int32_t InstructionCodeGeneratorMIPS::VecAddress(LocationSummary* locations, } void LocationsBuilderMIPS::VisitVecLoad(HVecLoad* instruction) { - CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /* is_load */ true); + CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /* is_load= */ true); } void InstructionCodeGeneratorMIPS::VisitVecLoad(HVecLoad* instruction) { @@ -1357,13 +1381,13 @@ void InstructionCodeGeneratorMIPS::VisitVecLoad(HVecLoad* instruction) { __ LdD(reg, base, offset); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } void LocationsBuilderMIPS::VisitVecStore(HVecStore* instruction) { - CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /* is_load */ false); + CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /* is_load= */ false); } void InstructionCodeGeneratorMIPS::VisitVecStore(HVecStore* instruction) { @@ -1395,7 +1419,7 @@ void InstructionCodeGeneratorMIPS::VisitVecStore(HVecStore* instruction) { __ StD(reg, base, offset); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } diff --git a/compiler/optimizing/code_generator_vector_mips64.cc b/compiler/optimizing/code_generator_vector_mips64.cc index 9ea55ec8d7..6467d3e27f 100644 --- a/compiler/optimizing/code_generator_vector_mips64.cc +++ b/compiler/optimizing/code_generator_vector_mips64.cc @@ -47,7 +47,7 @@ void LocationsBuilderMIPS64::VisitVecReplicateScalar(HVecReplicateScalar* instru locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -79,16 +79,16 @@ void InstructionCodeGeneratorMIPS64::VisitVecReplicateScalar(HVecReplicateScalar DCHECK_EQ(4u, instruction->GetVectorLength()); __ ReplicateFPToVectorRegister(dst, locations->InAt(0).AsFpuRegister<FpuRegister>(), - /* is_double */ false); + /* is_double= */ false); break; case DataType::Type::kFloat64: DCHECK_EQ(2u, instruction->GetVectorLength()); __ ReplicateFPToVectorRegister(dst, locations->InAt(0).AsFpuRegister<FpuRegister>(), - /* is_double */ true); + /* is_double= */ true); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -112,7 +112,7 @@ void LocationsBuilderMIPS64::VisitVecExtractScalar(HVecExtractScalar* instructio locations->SetOut(Location::SameAsFirstInput()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -136,7 +136,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecExtractScalar(HVecExtractScalar* in DCHECK(locations->InAt(0).Equals(locations->Out())); // no code required break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -168,7 +168,7 @@ static void CreateVecUnOpLocations(ArenaAllocator* allocator, HVecUnaryOperation : Location::kNoOutputOverlap); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -185,7 +185,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecReduce(HVecReduce* instruction) { switch (instruction->GetPackedType()) { case DataType::Type::kInt32: DCHECK_EQ(4u, instruction->GetVectorLength()); - switch (instruction->GetKind()) { + switch (instruction->GetReductionKind()) { case HVecReduce::kSum: __ Hadd_sD(tmp, src, src); __ IlvlD(dst, tmp, tmp); @@ -207,7 +207,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecReduce(HVecReduce* instruction) { break; case DataType::Type::kInt64: DCHECK_EQ(2u, instruction->GetVectorLength()); - switch (instruction->GetKind()) { + switch (instruction->GetReductionKind()) { case HVecReduce::kSum: __ IlvlD(dst, src, src); __ AddvD(dst, dst, src); @@ -223,7 +223,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecReduce(HVecReduce* instruction) { } break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -242,7 +242,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecCnv(HVecCnv* instruction) { DCHECK_EQ(4u, instruction->GetVectorLength()); __ Ffint_sW(dst, src); } else { - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -289,7 +289,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecNeg(HVecNeg* instruction) { __ FsubD(dst, dst, src); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -336,7 +336,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecAbs(HVecAbs* instruction) { __ AndV(dst, dst, src); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -368,7 +368,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecNot(HVecNot* instruction) { __ NorV(dst, src, src); // lanes do not matter break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -391,7 +391,7 @@ static void CreateVecBinOpLocations(ArenaAllocator* allocator, HVecBinaryOperati locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -433,11 +433,19 @@ void InstructionCodeGeneratorMIPS64::VisitVecAdd(HVecAdd* instruction) { __ FaddD(dst, lhs, rhs); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } +void LocationsBuilderMIPS64::VisitVecSaturationAdd(HVecSaturationAdd* instruction) { + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); +} + +void InstructionCodeGeneratorMIPS64::VisitVecSaturationAdd(HVecSaturationAdd* instruction) { + LOG(FATAL) << "Unsupported SIMD " << instruction->GetId(); +} + void LocationsBuilderMIPS64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) { CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); } @@ -473,7 +481,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecHalvingAdd(HVecHalvingAdd* instruct : __ Ave_sH(dst, lhs, rhs); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -515,11 +523,19 @@ void InstructionCodeGeneratorMIPS64::VisitVecSub(HVecSub* instruction) { __ FsubD(dst, lhs, rhs); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } +void LocationsBuilderMIPS64::VisitVecSaturationSub(HVecSaturationSub* instruction) { + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); +} + +void InstructionCodeGeneratorMIPS64::VisitVecSaturationSub(HVecSaturationSub* instruction) { + LOG(FATAL) << "Unsupported SIMD " << instruction->GetId(); +} + void LocationsBuilderMIPS64::VisitVecMul(HVecMul* instruction) { CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); } @@ -557,7 +573,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecMul(HVecMul* instruction) { __ FmulD(dst, lhs, rhs); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -581,7 +597,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecDiv(HVecDiv* instruction) { __ FdivD(dst, lhs, rhs); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -639,7 +655,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecMin(HVecMin* instruction) { __ FminD(dst, lhs, rhs); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -697,7 +713,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecMax(HVecMax* instruction) { __ FmaxD(dst, lhs, rhs); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -726,7 +742,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecAnd(HVecAnd* instruction) { __ AndV(dst, lhs, rhs); // lanes do not matter break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -763,7 +779,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecOr(HVecOr* instruction) { __ OrV(dst, lhs, rhs); // lanes do not matter break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -792,7 +808,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecXor(HVecXor* instruction) { __ XorV(dst, lhs, rhs); // lanes do not matter break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -812,7 +828,7 @@ static void CreateVecShiftLocations(ArenaAllocator* allocator, HVecBinaryOperati locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -846,7 +862,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecShl(HVecShl* instruction) { __ SlliD(dst, lhs, value); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -880,7 +896,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecShr(HVecShr* instruction) { __ SraiD(dst, lhs, value); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -914,7 +930,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecUShr(HVecUShr* instruction) { __ SrliD(dst, lhs, value); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -946,7 +962,7 @@ void LocationsBuilderMIPS64::VisitVecSetScalars(HVecSetScalars* instruction) { locations->SetOut(Location::RequiresFpuRegister()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -987,7 +1003,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecSetScalars(HVecSetScalars* instruct __ InsertD(dst, locations->InAt(0).AsRegister<GpuRegister>(), 0); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -1008,7 +1024,7 @@ static void CreateVecAccumLocations(ArenaAllocator* allocator, HVecOperation* in locations->SetOut(Location::SameAsFirstInput()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -1058,7 +1074,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecMultiplyAccumulate(HVecMultiplyAccu } break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -1160,7 +1176,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecSADAccumulate(HVecSADAccumulate* in break; } default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } break; @@ -1199,7 +1215,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecSADAccumulate(HVecSADAccumulate* in break; } default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } break; @@ -1229,7 +1245,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecSADAccumulate(HVecSADAccumulate* in break; } default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } break; @@ -1245,17 +1261,25 @@ void InstructionCodeGeneratorMIPS64::VisitVecSADAccumulate(HVecSADAccumulate* in break; } default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } break; } default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } +void LocationsBuilderMIPS64::VisitVecDotProd(HVecDotProd* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void InstructionCodeGeneratorMIPS64::VisitVecDotProd(HVecDotProd* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + // Helper to set up locations for vector memory operations. static void CreateVecMemLocations(ArenaAllocator* allocator, HVecMemoryOperation* instruction, @@ -1280,7 +1304,7 @@ static void CreateVecMemLocations(ArenaAllocator* allocator, } break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -1318,7 +1342,7 @@ int32_t InstructionCodeGeneratorMIPS64::VecAddress(LocationSummary* locations, } void LocationsBuilderMIPS64::VisitVecLoad(HVecLoad* instruction) { - CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /* is_load */ true); + CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /* is_load= */ true); } void InstructionCodeGeneratorMIPS64::VisitVecLoad(HVecLoad* instruction) { @@ -1355,13 +1379,13 @@ void InstructionCodeGeneratorMIPS64::VisitVecLoad(HVecLoad* instruction) { __ LdD(reg, base, offset); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } void LocationsBuilderMIPS64::VisitVecStore(HVecStore* instruction) { - CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /* is_load */ false); + CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /* is_load= */ false); } void InstructionCodeGeneratorMIPS64::VisitVecStore(HVecStore* instruction) { @@ -1393,7 +1417,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecStore(HVecStore* instruction) { __ StD(reg, base, offset); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } diff --git a/compiler/optimizing/code_generator_vector_x86.cc b/compiler/optimizing/code_generator_vector_x86.cc index 4945328e2b..0ee00356b9 100644 --- a/compiler/optimizing/code_generator_vector_x86.cc +++ b/compiler/optimizing/code_generator_vector_x86.cc @@ -54,7 +54,7 @@ void LocationsBuilderX86::VisitVecReplicateScalar(HVecReplicateScalar* instructi : Location::SameAsFirstInput()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -111,7 +111,7 @@ void InstructionCodeGeneratorX86::VisitVecReplicateScalar(HVecReplicateScalar* i __ shufpd(dst, dst, Immediate(0)); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -138,7 +138,7 @@ void LocationsBuilderX86::VisitVecExtractScalar(HVecExtractScalar* instruction) locations->SetOut(Location::SameAsFirstInput()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -152,7 +152,7 @@ void InstructionCodeGeneratorX86::VisitVecExtractScalar(HVecExtractScalar* instr case DataType::Type::kInt8: case DataType::Type::kUint16: case DataType::Type::kInt16: // TODO: up to here, and? - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); case DataType::Type::kInt32: DCHECK_LE(4u, instruction->GetVectorLength()); @@ -174,7 +174,7 @@ void InstructionCodeGeneratorX86::VisitVecExtractScalar(HVecExtractScalar* instr DCHECK(locations->InAt(0).Equals(locations->Out())); // no code required break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -196,7 +196,7 @@ static void CreateVecUnOpLocations(ArenaAllocator* allocator, HVecUnaryOperation locations->SetOut(Location::RequiresFpuRegister()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -205,8 +205,8 @@ void LocationsBuilderX86::VisitVecReduce(HVecReduce* instruction) { CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction); // Long reduction or min/max require a temporary. if (instruction->GetPackedType() == DataType::Type::kInt64 || - instruction->GetKind() == HVecReduce::kMin || - instruction->GetKind() == HVecReduce::kMax) { + instruction->GetReductionKind() == HVecReduce::kMin || + instruction->GetReductionKind() == HVecReduce::kMax) { instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister()); } } @@ -218,38 +218,23 @@ void InstructionCodeGeneratorX86::VisitVecReduce(HVecReduce* instruction) { switch (instruction->GetPackedType()) { case DataType::Type::kInt32: DCHECK_EQ(4u, instruction->GetVectorLength()); - switch (instruction->GetKind()) { + switch (instruction->GetReductionKind()) { case HVecReduce::kSum: __ movaps(dst, src); __ phaddd(dst, dst); __ phaddd(dst, dst); break; - case HVecReduce::kMin: { - XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); - __ movaps(tmp, src); - __ movaps(dst, src); - __ psrldq(tmp, Immediate(8)); - __ pminsd(dst, tmp); - __ psrldq(tmp, Immediate(4)); - __ pminsd(dst, tmp); - break; - } - case HVecReduce::kMax: { - XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); - __ movaps(tmp, src); - __ movaps(dst, src); - __ psrldq(tmp, Immediate(8)); - __ pmaxsd(dst, tmp); - __ psrldq(tmp, Immediate(4)); - __ pmaxsd(dst, tmp); - break; - } + case HVecReduce::kMin: + case HVecReduce::kMax: + // Historical note: We've had a broken implementation here. b/117863065 + // Do not draw on the old code if we ever want to bring MIN/MAX reduction back. + LOG(FATAL) << "Unsupported reduction type."; } break; case DataType::Type::kInt64: { DCHECK_EQ(2u, instruction->GetVectorLength()); XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); - switch (instruction->GetKind()) { + switch (instruction->GetReductionKind()) { case HVecReduce::kSum: __ movaps(tmp, src); __ movaps(dst, src); @@ -258,12 +243,12 @@ void InstructionCodeGeneratorX86::VisitVecReduce(HVecReduce* instruction) { break; case HVecReduce::kMin: case HVecReduce::kMax: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); } break; } default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -282,7 +267,7 @@ void InstructionCodeGeneratorX86::VisitVecCnv(HVecCnv* instruction) { DCHECK_EQ(4u, instruction->GetVectorLength()); __ cvtdq2ps(dst, src); } else { - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); } } @@ -328,7 +313,7 @@ void InstructionCodeGeneratorX86::VisitVecNeg(HVecNeg* instruction) { __ subpd(dst, src); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -369,7 +354,7 @@ void InstructionCodeGeneratorX86::VisitVecAbs(HVecAbs* instruction) { __ andpd(dst, src); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -418,7 +403,7 @@ void InstructionCodeGeneratorX86::VisitVecNot(HVecNot* instruction) { __ xorpd(dst, src); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -441,7 +426,7 @@ static void CreateVecBinOpLocations(ArenaAllocator* allocator, HVecBinaryOperati locations->SetOut(Location::SameAsFirstInput()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -483,7 +468,39 @@ void InstructionCodeGeneratorX86::VisitVecAdd(HVecAdd* instruction) { __ addpd(dst, src); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); + UNREACHABLE(); + } +} + +void LocationsBuilderX86::VisitVecSaturationAdd(HVecSaturationAdd* instruction) { + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); +} + +void InstructionCodeGeneratorX86::VisitVecSaturationAdd(HVecSaturationAdd* instruction) { + LocationSummary* locations = instruction->GetLocations(); + DCHECK(locations->InAt(0).Equals(locations->Out())); + XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); + XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + switch (instruction->GetPackedType()) { + case DataType::Type::kUint8: + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ paddusb(dst, src); + break; + case DataType::Type::kInt8: + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ paddsb(dst, src); + break; + case DataType::Type::kUint16: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ paddusw(dst, src); + break; + case DataType::Type::kInt16: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ paddsw(dst, src); + break; + default: + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -503,14 +520,14 @@ void InstructionCodeGeneratorX86::VisitVecHalvingAdd(HVecHalvingAdd* instruction switch (instruction->GetPackedType()) { case DataType::Type::kUint8: DCHECK_EQ(16u, instruction->GetVectorLength()); - __ pavgb(dst, src); - return; + __ pavgb(dst, src); + break; case DataType::Type::kUint16: DCHECK_EQ(8u, instruction->GetVectorLength()); __ pavgw(dst, src); - return; + break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -552,7 +569,39 @@ void InstructionCodeGeneratorX86::VisitVecSub(HVecSub* instruction) { __ subpd(dst, src); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); + UNREACHABLE(); + } +} + +void LocationsBuilderX86::VisitVecSaturationSub(HVecSaturationSub* instruction) { + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); +} + +void InstructionCodeGeneratorX86::VisitVecSaturationSub(HVecSaturationSub* instruction) { + LocationSummary* locations = instruction->GetLocations(); + DCHECK(locations->InAt(0).Equals(locations->Out())); + XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); + XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + switch (instruction->GetPackedType()) { + case DataType::Type::kUint8: + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ psubusb(dst, src); + break; + case DataType::Type::kInt8: + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ psubsb(dst, src); + break; + case DataType::Type::kUint16: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ psubusw(dst, src); + break; + case DataType::Type::kInt16: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ psubsw(dst, src); + break; + default: + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -585,7 +634,7 @@ void InstructionCodeGeneratorX86::VisitVecMul(HVecMul* instruction) { __ mulpd(dst, src); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -609,7 +658,7 @@ void InstructionCodeGeneratorX86::VisitVecDiv(HVecDiv* instruction) { __ divpd(dst, src); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -658,7 +707,7 @@ void InstructionCodeGeneratorX86::VisitVecMin(HVecMin* instruction) { __ minpd(dst, src); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -707,7 +756,7 @@ void InstructionCodeGeneratorX86::VisitVecMax(HVecMax* instruction) { __ maxpd(dst, src); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -742,7 +791,7 @@ void InstructionCodeGeneratorX86::VisitVecAnd(HVecAnd* instruction) { __ andpd(dst, src); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -777,7 +826,7 @@ void InstructionCodeGeneratorX86::VisitVecAndNot(HVecAndNot* instruction) { __ andnpd(dst, src); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -812,7 +861,7 @@ void InstructionCodeGeneratorX86::VisitVecOr(HVecOr* instruction) { __ orpd(dst, src); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -847,7 +896,7 @@ void InstructionCodeGeneratorX86::VisitVecXor(HVecXor* instruction) { __ xorpd(dst, src); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -865,7 +914,7 @@ static void CreateVecShiftLocations(ArenaAllocator* allocator, HVecBinaryOperati locations->SetOut(Location::SameAsFirstInput()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -894,7 +943,7 @@ void InstructionCodeGeneratorX86::VisitVecShl(HVecShl* instruction) { __ psllq(dst, Immediate(static_cast<uint8_t>(value))); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -919,7 +968,7 @@ void InstructionCodeGeneratorX86::VisitVecShr(HVecShr* instruction) { __ psrad(dst, Immediate(static_cast<uint8_t>(value))); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -948,7 +997,7 @@ void InstructionCodeGeneratorX86::VisitVecUShr(HVecUShr* instruction) { __ psrlq(dst, Immediate(static_cast<uint8_t>(value))); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -985,7 +1034,7 @@ void LocationsBuilderX86::VisitVecSetScalars(HVecSetScalars* instruction) { locations->SetOut(Location::RequiresFpuRegister()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -1011,7 +1060,7 @@ void InstructionCodeGeneratorX86::VisitVecSetScalars(HVecSetScalars* instruction case DataType::Type::kInt8: case DataType::Type::kUint16: case DataType::Type::kInt16: // TODO: up to here, and? - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); case DataType::Type::kInt32: DCHECK_EQ(4u, instruction->GetVectorLength()); @@ -1035,7 +1084,7 @@ void InstructionCodeGeneratorX86::VisitVecSetScalars(HVecSetScalars* instruction __ movsd(dst, locations->InAt(1).AsFpuRegister<XmmRegister>()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -1056,7 +1105,7 @@ static void CreateVecAccumLocations(ArenaAllocator* allocator, HVecOperation* in locations->SetOut(Location::SameAsFirstInput()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -1079,6 +1128,14 @@ void InstructionCodeGeneratorX86::VisitVecSADAccumulate(HVecSADAccumulate* instr LOG(FATAL) << "No SIMD for " << instruction->GetId(); } +void LocationsBuilderX86::VisitVecDotProd(HVecDotProd* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void InstructionCodeGeneratorX86::VisitVecDotProd(HVecDotProd* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + // Helper to set up locations for vector memory operations. static void CreateVecMemLocations(ArenaAllocator* allocator, HVecMemoryOperation* instruction, @@ -1103,7 +1160,7 @@ static void CreateVecMemLocations(ArenaAllocator* allocator, } break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -1184,7 +1241,7 @@ void InstructionCodeGeneratorX86::VisitVecLoad(HVecLoad* instruction) { is_aligned16 ? __ movapd(reg, address) : __ movupd(reg, address); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -1220,7 +1277,7 @@ void InstructionCodeGeneratorX86::VisitVecStore(HVecStore* instruction) { is_aligned16 ? __ movapd(address, reg) : __ movupd(address, reg); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } diff --git a/compiler/optimizing/code_generator_vector_x86_64.cc b/compiler/optimizing/code_generator_vector_x86_64.cc index a77c7d6838..9c2882766c 100644 --- a/compiler/optimizing/code_generator_vector_x86_64.cc +++ b/compiler/optimizing/code_generator_vector_x86_64.cc @@ -49,7 +49,7 @@ void LocationsBuilderX86_64::VisitVecReplicateScalar(HVecReplicateScalar* instru : Location::SameAsFirstInput()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -102,7 +102,7 @@ void InstructionCodeGeneratorX86_64::VisitVecReplicateScalar(HVecReplicateScalar __ shufpd(dst, dst, Immediate(0)); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -126,7 +126,7 @@ void LocationsBuilderX86_64::VisitVecExtractScalar(HVecExtractScalar* instructio locations->SetOut(Location::SameAsFirstInput()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -140,7 +140,7 @@ void InstructionCodeGeneratorX86_64::VisitVecExtractScalar(HVecExtractScalar* in case DataType::Type::kInt8: case DataType::Type::kUint16: case DataType::Type::kInt16: // TODO: up to here, and? - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); case DataType::Type::kInt32: DCHECK_EQ(4u, instruction->GetVectorLength()); @@ -157,7 +157,7 @@ void InstructionCodeGeneratorX86_64::VisitVecExtractScalar(HVecExtractScalar* in DCHECK(locations->InAt(0).Equals(locations->Out())); // no code required break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -179,7 +179,7 @@ static void CreateVecUnOpLocations(ArenaAllocator* allocator, HVecUnaryOperation locations->SetOut(Location::RequiresFpuRegister()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -188,8 +188,8 @@ void LocationsBuilderX86_64::VisitVecReduce(HVecReduce* instruction) { CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction); // Long reduction or min/max require a temporary. if (instruction->GetPackedType() == DataType::Type::kInt64 || - instruction->GetKind() == HVecReduce::kMin || - instruction->GetKind() == HVecReduce::kMax) { + instruction->GetReductionKind() == HVecReduce::kMin || + instruction->GetReductionKind() == HVecReduce::kMax) { instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister()); } } @@ -201,38 +201,23 @@ void InstructionCodeGeneratorX86_64::VisitVecReduce(HVecReduce* instruction) { switch (instruction->GetPackedType()) { case DataType::Type::kInt32: DCHECK_EQ(4u, instruction->GetVectorLength()); - switch (instruction->GetKind()) { + switch (instruction->GetReductionKind()) { case HVecReduce::kSum: __ movaps(dst, src); __ phaddd(dst, dst); __ phaddd(dst, dst); break; - case HVecReduce::kMin: { - XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); - __ movaps(tmp, src); - __ movaps(dst, src); - __ psrldq(tmp, Immediate(8)); - __ pminsd(dst, tmp); - __ psrldq(tmp, Immediate(4)); - __ pminsd(dst, tmp); - break; - } - case HVecReduce::kMax: { - XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); - __ movaps(tmp, src); - __ movaps(dst, src); - __ psrldq(tmp, Immediate(8)); - __ pmaxsd(dst, tmp); - __ psrldq(tmp, Immediate(4)); - __ pmaxsd(dst, tmp); - break; - } + case HVecReduce::kMin: + case HVecReduce::kMax: + // Historical note: We've had a broken implementation here. b/117863065 + // Do not draw on the old code if we ever want to bring MIN/MAX reduction back. + LOG(FATAL) << "Unsupported reduction type."; } break; case DataType::Type::kInt64: { DCHECK_EQ(2u, instruction->GetVectorLength()); XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); - switch (instruction->GetKind()) { + switch (instruction->GetReductionKind()) { case HVecReduce::kSum: __ movaps(tmp, src); __ movaps(dst, src); @@ -241,12 +226,12 @@ void InstructionCodeGeneratorX86_64::VisitVecReduce(HVecReduce* instruction) { break; case HVecReduce::kMin: case HVecReduce::kMax: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); } break; } default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -265,7 +250,7 @@ void InstructionCodeGeneratorX86_64::VisitVecCnv(HVecCnv* instruction) { DCHECK_EQ(4u, instruction->GetVectorLength()); __ cvtdq2ps(dst, src); } else { - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); } } @@ -311,7 +296,7 @@ void InstructionCodeGeneratorX86_64::VisitVecNeg(HVecNeg* instruction) { __ subpd(dst, src); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -352,7 +337,7 @@ void InstructionCodeGeneratorX86_64::VisitVecAbs(HVecAbs* instruction) { __ andpd(dst, src); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -401,7 +386,7 @@ void InstructionCodeGeneratorX86_64::VisitVecNot(HVecNot* instruction) { __ xorpd(dst, src); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -424,7 +409,7 @@ static void CreateVecBinOpLocations(ArenaAllocator* allocator, HVecBinaryOperati locations->SetOut(Location::SameAsFirstInput()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -466,7 +451,39 @@ void InstructionCodeGeneratorX86_64::VisitVecAdd(HVecAdd* instruction) { __ addpd(dst, src); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); + UNREACHABLE(); + } +} + +void LocationsBuilderX86_64::VisitVecSaturationAdd(HVecSaturationAdd* instruction) { + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); +} + +void InstructionCodeGeneratorX86_64::VisitVecSaturationAdd(HVecSaturationAdd* instruction) { + LocationSummary* locations = instruction->GetLocations(); + DCHECK(locations->InAt(0).Equals(locations->Out())); + XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); + XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + switch (instruction->GetPackedType()) { + case DataType::Type::kUint8: + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ paddusb(dst, src); + break; + case DataType::Type::kInt8: + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ paddsb(dst, src); + break; + case DataType::Type::kUint16: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ paddusw(dst, src); + break; + case DataType::Type::kInt16: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ paddsw(dst, src); + break; + default: + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -486,14 +503,14 @@ void InstructionCodeGeneratorX86_64::VisitVecHalvingAdd(HVecHalvingAdd* instruct switch (instruction->GetPackedType()) { case DataType::Type::kUint8: DCHECK_EQ(16u, instruction->GetVectorLength()); - __ pavgb(dst, src); - return; + __ pavgb(dst, src); + break; case DataType::Type::kUint16: DCHECK_EQ(8u, instruction->GetVectorLength()); __ pavgw(dst, src); - return; + break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -535,7 +552,39 @@ void InstructionCodeGeneratorX86_64::VisitVecSub(HVecSub* instruction) { __ subpd(dst, src); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); + UNREACHABLE(); + } +} + +void LocationsBuilderX86_64::VisitVecSaturationSub(HVecSaturationSub* instruction) { + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); +} + +void InstructionCodeGeneratorX86_64::VisitVecSaturationSub(HVecSaturationSub* instruction) { + LocationSummary* locations = instruction->GetLocations(); + DCHECK(locations->InAt(0).Equals(locations->Out())); + XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); + XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + switch (instruction->GetPackedType()) { + case DataType::Type::kUint8: + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ psubusb(dst, src); + break; + case DataType::Type::kInt8: + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ psubsb(dst, src); + break; + case DataType::Type::kUint16: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ psubusw(dst, src); + break; + case DataType::Type::kInt16: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ psubsw(dst, src); + break; + default: + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -568,7 +617,7 @@ void InstructionCodeGeneratorX86_64::VisitVecMul(HVecMul* instruction) { __ mulpd(dst, src); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -592,7 +641,7 @@ void InstructionCodeGeneratorX86_64::VisitVecDiv(HVecDiv* instruction) { __ divpd(dst, src); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -641,7 +690,7 @@ void InstructionCodeGeneratorX86_64::VisitVecMin(HVecMin* instruction) { __ minpd(dst, src); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -690,7 +739,7 @@ void InstructionCodeGeneratorX86_64::VisitVecMax(HVecMax* instruction) { __ maxpd(dst, src); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -725,7 +774,7 @@ void InstructionCodeGeneratorX86_64::VisitVecAnd(HVecAnd* instruction) { __ andpd(dst, src); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -760,7 +809,7 @@ void InstructionCodeGeneratorX86_64::VisitVecAndNot(HVecAndNot* instruction) { __ andnpd(dst, src); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -795,7 +844,7 @@ void InstructionCodeGeneratorX86_64::VisitVecOr(HVecOr* instruction) { __ orpd(dst, src); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -830,7 +879,7 @@ void InstructionCodeGeneratorX86_64::VisitVecXor(HVecXor* instruction) { __ xorpd(dst, src); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -848,7 +897,7 @@ static void CreateVecShiftLocations(ArenaAllocator* allocator, HVecBinaryOperati locations->SetOut(Location::SameAsFirstInput()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -877,7 +926,7 @@ void InstructionCodeGeneratorX86_64::VisitVecShl(HVecShl* instruction) { __ psllq(dst, Immediate(static_cast<int8_t>(value))); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -902,7 +951,7 @@ void InstructionCodeGeneratorX86_64::VisitVecShr(HVecShr* instruction) { __ psrad(dst, Immediate(static_cast<int8_t>(value))); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -931,7 +980,7 @@ void InstructionCodeGeneratorX86_64::VisitVecUShr(HVecUShr* instruction) { __ psrlq(dst, Immediate(static_cast<int8_t>(value))); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -963,7 +1012,7 @@ void LocationsBuilderX86_64::VisitVecSetScalars(HVecSetScalars* instruction) { locations->SetOut(Location::RequiresFpuRegister()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -989,7 +1038,7 @@ void InstructionCodeGeneratorX86_64::VisitVecSetScalars(HVecSetScalars* instruct case DataType::Type::kInt8: case DataType::Type::kUint16: case DataType::Type::kInt16: // TODO: up to here, and? - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); case DataType::Type::kInt32: DCHECK_EQ(4u, instruction->GetVectorLength()); @@ -1008,7 +1057,7 @@ void InstructionCodeGeneratorX86_64::VisitVecSetScalars(HVecSetScalars* instruct __ movsd(dst, locations->InAt(0).AsFpuRegister<XmmRegister>()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -1029,7 +1078,7 @@ static void CreateVecAccumLocations(ArenaAllocator* allocator, HVecOperation* in locations->SetOut(Location::SameAsFirstInput()); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -1052,6 +1101,14 @@ void InstructionCodeGeneratorX86_64::VisitVecSADAccumulate(HVecSADAccumulate* in LOG(FATAL) << "No SIMD for " << instruction->GetId(); } +void LocationsBuilderX86_64::VisitVecDotProd(HVecDotProd* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void InstructionCodeGeneratorX86_64::VisitVecDotProd(HVecDotProd* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + // Helper to set up locations for vector memory operations. static void CreateVecMemLocations(ArenaAllocator* allocator, HVecMemoryOperation* instruction, @@ -1076,7 +1133,7 @@ static void CreateVecMemLocations(ArenaAllocator* allocator, } break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -1157,7 +1214,7 @@ void InstructionCodeGeneratorX86_64::VisitVecLoad(HVecLoad* instruction) { is_aligned16 ? __ movapd(reg, address) : __ movupd(reg, address); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } @@ -1193,7 +1250,7 @@ void InstructionCodeGeneratorX86_64::VisitVecStore(HVecStore* instruction) { is_aligned16 ? __ movapd(address, reg) : __ movupd(address, reg); break; default: - LOG(FATAL) << "Unsupported SIMD type"; + LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType(); UNREACHABLE(); } } diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 6bf045885d..95118b0b6d 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -23,6 +23,7 @@ #include "entrypoints/quick/quick_entrypoints.h" #include "entrypoints/quick/quick_entrypoints_enum.h" #include "gc/accounting/card_table.h" +#include "gc/space/image_space.h" #include "heap_poisoning.h" #include "intrinsics.h" #include "intrinsics_x86.h" @@ -51,6 +52,18 @@ static constexpr int kC2ConditionMask = 0x400; static constexpr int kFakeReturnRegister = Register(8); +static constexpr int64_t kDoubleNaN = INT64_C(0x7FF8000000000000); +static constexpr int32_t kFloatNaN = INT32_C(0x7FC00000); + +static RegisterSet OneRegInReferenceOutSaveEverythingCallerSaves() { + InvokeRuntimeCallingConvention calling_convention; + RegisterSet caller_saves = RegisterSet::Empty(); + caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + // TODO: Add GetReturnLocation() to the calling convention so that we can DCHECK() + // that the the kPrimNot result register is the same as the first argument register. + return caller_saves; +} + // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. #define __ down_cast<X86Assembler*>(codegen->GetAssembler())-> // NOLINT #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kX86PointerSize, x).Int32Value() @@ -59,7 +72,7 @@ class NullCheckSlowPathX86 : public SlowPathCode { public: explicit NullCheckSlowPathX86(HNullCheck* instruction) : SlowPathCode(instruction) {} - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen); __ Bind(GetEntryLabel()); if (instruction_->CanThrowIntoCatchBlock()) { @@ -73,9 +86,9 @@ class NullCheckSlowPathX86 : public SlowPathCode { CheckEntrypointTypes<kQuickThrowNullPointer, void, void>(); } - bool IsFatal() const OVERRIDE { return true; } + bool IsFatal() const override { return true; } - const char* GetDescription() const OVERRIDE { return "NullCheckSlowPathX86"; } + const char* GetDescription() const override { return "NullCheckSlowPathX86"; } private: DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathX86); @@ -85,16 +98,16 @@ class DivZeroCheckSlowPathX86 : public SlowPathCode { public: explicit DivZeroCheckSlowPathX86(HDivZeroCheck* instruction) : SlowPathCode(instruction) {} - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen); __ Bind(GetEntryLabel()); x86_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this); CheckEntrypointTypes<kQuickThrowDivZero, void, void>(); } - bool IsFatal() const OVERRIDE { return true; } + bool IsFatal() const override { return true; } - const char* GetDescription() const OVERRIDE { return "DivZeroCheckSlowPathX86"; } + const char* GetDescription() const override { return "DivZeroCheckSlowPathX86"; } private: DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathX86); @@ -105,7 +118,7 @@ class DivRemMinusOneSlowPathX86 : public SlowPathCode { DivRemMinusOneSlowPathX86(HInstruction* instruction, Register reg, bool is_div) : SlowPathCode(instruction), reg_(reg), is_div_(is_div) {} - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { __ Bind(GetEntryLabel()); if (is_div_) { __ negl(reg_); @@ -115,7 +128,7 @@ class DivRemMinusOneSlowPathX86 : public SlowPathCode { __ jmp(GetExitLabel()); } - const char* GetDescription() const OVERRIDE { return "DivRemMinusOneSlowPathX86"; } + const char* GetDescription() const override { return "DivRemMinusOneSlowPathX86"; } private: Register reg_; @@ -127,7 +140,7 @@ class BoundsCheckSlowPathX86 : public SlowPathCode { public: explicit BoundsCheckSlowPathX86(HBoundsCheck* instruction) : SlowPathCode(instruction) {} - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { LocationSummary* locations = instruction_->GetLocations(); CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen); __ Bind(GetEntryLabel()); @@ -174,9 +187,9 @@ class BoundsCheckSlowPathX86 : public SlowPathCode { CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>(); } - bool IsFatal() const OVERRIDE { return true; } + bool IsFatal() const override { return true; } - const char* GetDescription() const OVERRIDE { return "BoundsCheckSlowPathX86"; } + const char* GetDescription() const override { return "BoundsCheckSlowPathX86"; } private: DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathX86); @@ -187,7 +200,7 @@ class SuspendCheckSlowPathX86 : public SlowPathCode { SuspendCheckSlowPathX86(HSuspendCheck* instruction, HBasicBlock* successor) : SlowPathCode(instruction), successor_(successor) {} - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { LocationSummary* locations = instruction_->GetLocations(); CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen); __ Bind(GetEntryLabel()); @@ -211,7 +224,7 @@ class SuspendCheckSlowPathX86 : public SlowPathCode { return successor_; } - const char* GetDescription() const OVERRIDE { return "SuspendCheckSlowPathX86"; } + const char* GetDescription() const override { return "SuspendCheckSlowPathX86"; } private: HBasicBlock* const successor_; @@ -224,7 +237,7 @@ class LoadStringSlowPathX86 : public SlowPathCode { public: explicit LoadStringSlowPathX86(HLoadString* instruction): SlowPathCode(instruction) {} - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { LocationSummary* locations = instruction_->GetLocations(); DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg())); @@ -243,7 +256,7 @@ class LoadStringSlowPathX86 : public SlowPathCode { __ jmp(GetExitLabel()); } - const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathX86"; } + const char* GetDescription() const override { return "LoadStringSlowPathX86"; } private: DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathX86); @@ -251,36 +264,42 @@ class LoadStringSlowPathX86 : public SlowPathCode { class LoadClassSlowPathX86 : public SlowPathCode { public: - LoadClassSlowPathX86(HLoadClass* cls, - HInstruction* at, - uint32_t dex_pc, - bool do_clinit) - : SlowPathCode(at), cls_(cls), dex_pc_(dex_pc), do_clinit_(do_clinit) { + LoadClassSlowPathX86(HLoadClass* cls, HInstruction* at) + : SlowPathCode(at), cls_(cls) { DCHECK(at->IsLoadClass() || at->IsClinitCheck()); + DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_); } - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { LocationSummary* locations = instruction_->GetLocations(); + Location out = locations->Out(); + const uint32_t dex_pc = instruction_->GetDexPc(); + bool must_resolve_type = instruction_->IsLoadClass() && cls_->MustResolveTypeOnSlowPath(); + bool must_do_clinit = instruction_->IsClinitCheck() || cls_->MustGenerateClinitCheck(); + CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, locations); InvokeRuntimeCallingConvention calling_convention; - dex::TypeIndex type_index = cls_->GetTypeIndex(); - __ movl(calling_convention.GetRegisterAt(0), Immediate(type_index.index_)); - x86_codegen->InvokeRuntime(do_clinit_ ? kQuickInitializeStaticStorage - : kQuickInitializeType, - instruction_, - dex_pc_, - this); - if (do_clinit_) { - CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>(); + if (must_resolve_type) { + DCHECK(IsSameDexFile(cls_->GetDexFile(), x86_codegen->GetGraph()->GetDexFile())); + dex::TypeIndex type_index = cls_->GetTypeIndex(); + __ movl(calling_convention.GetRegisterAt(0), Immediate(type_index.index_)); + x86_codegen->InvokeRuntime(kQuickResolveType, instruction_, dex_pc, this); + CheckEntrypointTypes<kQuickResolveType, void*, uint32_t>(); + // If we also must_do_clinit, the resolved type is now in the correct register. } else { - CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>(); + DCHECK(must_do_clinit); + Location source = instruction_->IsLoadClass() ? out : locations->InAt(0); + x86_codegen->Move32(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), source); + } + if (must_do_clinit) { + x86_codegen->InvokeRuntime(kQuickInitializeStaticStorage, instruction_, dex_pc, this); + CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, mirror::Class*>(); } // Move the class to the desired location. - Location out = locations->Out(); if (out.IsValid()) { DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg())); x86_codegen->Move32(out, Location::RegisterLocation(EAX)); @@ -289,18 +308,12 @@ class LoadClassSlowPathX86 : public SlowPathCode { __ jmp(GetExitLabel()); } - const char* GetDescription() const OVERRIDE { return "LoadClassSlowPathX86"; } + const char* GetDescription() const override { return "LoadClassSlowPathX86"; } private: // The class this slow path will load. HLoadClass* const cls_; - // The dex PC of `at_`. - const uint32_t dex_pc_; - - // Whether to initialize the class. - const bool do_clinit_; - DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathX86); }; @@ -309,7 +322,7 @@ class TypeCheckSlowPathX86 : public SlowPathCode { TypeCheckSlowPathX86(HInstruction* instruction, bool is_fatal) : SlowPathCode(instruction), is_fatal_(is_fatal) {} - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { LocationSummary* locations = instruction_->GetLocations(); DCHECK(instruction_->IsCheckCast() || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg())); @@ -362,8 +375,8 @@ class TypeCheckSlowPathX86 : public SlowPathCode { } } - const char* GetDescription() const OVERRIDE { return "TypeCheckSlowPathX86"; } - bool IsFatal() const OVERRIDE { return is_fatal_; } + const char* GetDescription() const override { return "TypeCheckSlowPathX86"; } + bool IsFatal() const override { return is_fatal_; } private: const bool is_fatal_; @@ -376,7 +389,7 @@ class DeoptimizationSlowPathX86 : public SlowPathCode { explicit DeoptimizationSlowPathX86(HDeoptimize* instruction) : SlowPathCode(instruction) {} - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen); __ Bind(GetEntryLabel()); LocationSummary* locations = instruction_->GetLocations(); @@ -389,7 +402,7 @@ class DeoptimizationSlowPathX86 : public SlowPathCode { CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>(); } - const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathX86"; } + const char* GetDescription() const override { return "DeoptimizationSlowPathX86"; } private: DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathX86); @@ -399,7 +412,7 @@ class ArraySetSlowPathX86 : public SlowPathCode { public: explicit ArraySetSlowPathX86(HInstruction* instruction) : SlowPathCode(instruction) {} - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { LocationSummary* locations = instruction_->GetLocations(); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, locations); @@ -430,7 +443,7 @@ class ArraySetSlowPathX86 : public SlowPathCode { __ jmp(GetExitLabel()); } - const char* GetDescription() const OVERRIDE { return "ArraySetSlowPathX86"; } + const char* GetDescription() const override { return "ArraySetSlowPathX86"; } private: DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86); @@ -458,9 +471,9 @@ class ReadBarrierMarkSlowPathX86 : public SlowPathCode { DCHECK(kEmitCompilerReadBarrier); } - const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathX86"; } + const char* GetDescription() const override { return "ReadBarrierMarkSlowPathX86"; } - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { LocationSummary* locations = instruction_->GetLocations(); Register ref_reg = ref_.AsRegister<Register>(); DCHECK(locations->CanCall()); @@ -545,9 +558,9 @@ class ReadBarrierMarkAndUpdateFieldSlowPathX86 : public SlowPathCode { DCHECK(kEmitCompilerReadBarrier); } - const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkAndUpdateFieldSlowPathX86"; } + const char* GetDescription() const override { return "ReadBarrierMarkAndUpdateFieldSlowPathX86"; } - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { LocationSummary* locations = instruction_->GetLocations(); Register ref_reg = ref_.AsRegister<Register>(); DCHECK(locations->CanCall()); @@ -711,7 +724,7 @@ class ReadBarrierForHeapReferenceSlowPathX86 : public SlowPathCode { DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref; } - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen); LocationSummary* locations = instruction_->GetLocations(); Register reg_out = out_.AsRegister<Register>(); @@ -830,7 +843,7 @@ class ReadBarrierForHeapReferenceSlowPathX86 : public SlowPathCode { __ jmp(GetExitLabel()); } - const char* GetDescription() const OVERRIDE { return "ReadBarrierForHeapReferenceSlowPathX86"; } + const char* GetDescription() const override { return "ReadBarrierForHeapReferenceSlowPathX86"; } private: Register FindAvailableCallerSaveRegister(CodeGenerator* codegen) { @@ -870,7 +883,7 @@ class ReadBarrierForRootSlowPathX86 : public SlowPathCode { DCHECK(kEmitCompilerReadBarrier); } - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { LocationSummary* locations = instruction_->GetLocations(); Register reg_out = out_.AsRegister<Register>(); DCHECK(locations->CanCall()); @@ -896,7 +909,7 @@ class ReadBarrierForRootSlowPathX86 : public SlowPathCode { __ jmp(GetExitLabel()); } - const char* GetDescription() const OVERRIDE { return "ReadBarrierForRootSlowPathX86"; } + const char* GetDescription() const override { return "ReadBarrierForRootSlowPathX86"; } private: const Location out_; @@ -954,6 +967,10 @@ void CodeGeneratorX86::DumpFloatingPointRegister(std::ostream& stream, int reg) stream << XmmRegister(reg); } +const X86InstructionSetFeatures& CodeGeneratorX86::GetInstructionSetFeatures() const { + return *GetCompilerOptions().GetInstructionSetFeatures()->AsX86InstructionSetFeatures(); +} + size_t CodeGeneratorX86::SaveCoreRegister(size_t stack_index, uint32_t reg_id) { __ movl(Address(ESP, stack_index), static_cast<Register>(reg_id)); return kX86WordSize; @@ -1005,7 +1022,6 @@ void CodeGeneratorX86::GenerateInvokeRuntime(int32_t entry_point_offset) { } CodeGeneratorX86::CodeGeneratorX86(HGraph* graph, - const X86InstructionSetFeatures& isa_features, const CompilerOptions& compiler_options, OptimizingCompilerStats* stats) : CodeGenerator(graph, @@ -1023,13 +1039,13 @@ CodeGeneratorX86::CodeGeneratorX86(HGraph* graph, instruction_visitor_(graph, this), move_resolver_(graph->GetAllocator(), this), assembler_(graph->GetAllocator()), - isa_features_(isa_features), boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), boot_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + boot_image_intrinsic_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), jit_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), jit_class_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), constant_area_start_(-1), @@ -1212,7 +1228,7 @@ Location InvokeDexCallingConventionVisitorX86::GetNextLocation(DataType::Type ty case DataType::Type::kUint64: case DataType::Type::kVoid: LOG(FATAL) << "Unexpected parameter type " << type; - break; + UNREACHABLE(); } return Location::NoLocation(); } @@ -1704,7 +1720,7 @@ void InstructionCodeGeneratorX86::VisitIf(HIf* if_instr) { nullptr : codegen_->GetLabelOf(true_successor); Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ? nullptr : codegen_->GetLabelOf(false_successor); - GenerateTestAndBranch(if_instr, /* condition_input_index */ 0, true_target, false_target); + GenerateTestAndBranch(if_instr, /* condition_input_index= */ 0, true_target, false_target); } void LocationsBuilderX86::VisitDeoptimize(HDeoptimize* deoptimize) { @@ -1722,9 +1738,9 @@ void LocationsBuilderX86::VisitDeoptimize(HDeoptimize* deoptimize) { void InstructionCodeGeneratorX86::VisitDeoptimize(HDeoptimize* deoptimize) { SlowPathCode* slow_path = deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathX86>(deoptimize); GenerateTestAndBranch<Label>(deoptimize, - /* condition_input_index */ 0, + /* condition_input_index= */ 0, slow_path->GetEntryLabel(), - /* false_target */ nullptr); + /* false_target= */ nullptr); } void LocationsBuilderX86::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) { @@ -1847,7 +1863,7 @@ void InstructionCodeGeneratorX86::VisitSelect(HSelect* select) { } else { NearLabel false_target; GenerateTestAndBranch<NearLabel>( - select, /* condition_input_index */ 2, /* true_target */ nullptr, &false_target); + select, /* condition_input_index= */ 2, /* true_target= */ nullptr, &false_target); codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType()); __ Bind(&false_target); } @@ -2185,7 +2201,9 @@ void LocationsBuilderX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invok IntrinsicLocationsBuilderX86 intrinsic(codegen_); if (intrinsic.TryDispatch(invoke)) { - if (invoke->GetLocations()->CanCall() && invoke->HasPcRelativeMethodLoadKind()) { + if (invoke->GetLocations()->CanCall() && + invoke->HasPcRelativeMethodLoadKind() && + invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()).IsInvalid()) { invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::Any()); } return; @@ -2308,6 +2326,14 @@ void InstructionCodeGeneratorX86::VisitInvokePolymorphic(HInvokePolymorphic* inv codegen_->GenerateInvokePolymorphicCall(invoke); } +void LocationsBuilderX86::VisitInvokeCustom(HInvokeCustom* invoke) { + HandleInvoke(invoke); +} + +void InstructionCodeGeneratorX86::VisitInvokeCustom(HInvokeCustom* invoke) { + codegen_->GenerateInvokeCustomCall(invoke); +} + void LocationsBuilderX86::VisitNeg(HNeg* neg) { LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(neg, LocationSummary::kNoCall); @@ -2963,7 +2989,7 @@ void LocationsBuilderX86::VisitAdd(HAdd* add) { default: LOG(FATAL) << "Unexpected add type " << add->GetResultType(); - break; + UNREACHABLE(); } } @@ -3408,8 +3434,8 @@ void InstructionCodeGeneratorX86::GenerateRemFP(HRem *rem) { // Load the values to the FP stack in reverse order, using temporaries if needed. const bool is_wide = !is_float; - PushOntoFPStack(second, elem_size, 2 * elem_size, /* is_fp */ true, is_wide); - PushOntoFPStack(first, 0, 2 * elem_size, /* is_fp */ true, is_wide); + PushOntoFPStack(second, elem_size, 2 * elem_size, /* is_fp= */ true, is_wide); + PushOntoFPStack(first, 0, 2 * elem_size, /* is_fp= */ true, is_wide); // Loop doing FPREM until we stabilize. NearLabel retry; @@ -3471,6 +3497,27 @@ void InstructionCodeGeneratorX86::DivRemOneOrMinusOne(HBinaryOperation* instruct } } +void InstructionCodeGeneratorX86::RemByPowerOfTwo(HRem* instruction) { + LocationSummary* locations = instruction->GetLocations(); + Location second = locations->InAt(1); + + Register out = locations->Out().AsRegister<Register>(); + Register numerator = locations->InAt(0).AsRegister<Register>(); + + int32_t imm = Int64FromConstant(second.GetConstant()); + DCHECK(IsPowerOfTwo(AbsOrMin(imm))); + uint32_t abs_imm = static_cast<uint32_t>(AbsOrMin(imm)); + + Register tmp = locations->GetTemp(0).AsRegister<Register>(); + NearLabel done; + __ movl(out, numerator); + __ andl(out, Immediate(abs_imm-1)); + __ j(Condition::kZero, &done); + __ leal(tmp, Address(out, static_cast<int32_t>(~(abs_imm-1)))); + __ testl(numerator, numerator); + __ cmovl(Condition::kLess, out, tmp); + __ Bind(&done); +} void InstructionCodeGeneratorX86::DivByPowerOfTwo(HDiv* instruction) { LocationSummary* locations = instruction->GetLocations(); @@ -3525,7 +3572,7 @@ void InstructionCodeGeneratorX86::GenerateDivRemWithAnyConstant(HBinaryOperation int64_t magic; int shift; - CalculateMagicAndShiftForDivRem(imm, false /* is_long */, &magic, &shift); + CalculateMagicAndShiftForDivRem(imm, /* is_long= */ false, &magic, &shift); // Save the numerator. __ movl(num, eax); @@ -3584,8 +3631,12 @@ void InstructionCodeGeneratorX86::GenerateDivRemIntegral(HBinaryOperation* instr // Do not generate anything for 0. DivZeroCheck would forbid any generated code. } else if (imm == 1 || imm == -1) { DivRemOneOrMinusOne(instruction); - } else if (is_div && IsPowerOfTwo(AbsOrMin(imm))) { - DivByPowerOfTwo(instruction->AsDiv()); + } else if (IsPowerOfTwo(AbsOrMin(imm))) { + if (is_div) { + DivByPowerOfTwo(instruction->AsDiv()); + } else { + RemByPowerOfTwo(instruction->AsRem()); + } } else { DCHECK(imm <= -2 || imm >= 2); GenerateDivRemWithAnyConstant(instruction); @@ -3802,6 +3853,301 @@ void InstructionCodeGeneratorX86::VisitRem(HRem* rem) { } } +static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) { + LocationSummary* locations = new (allocator) LocationSummary(minmax); + switch (minmax->GetResultType()) { + case DataType::Type::kInt32: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::SameAsFirstInput()); + break; + case DataType::Type::kInt64: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::SameAsFirstInput()); + // Register to use to perform a long subtract to set cc. + locations->AddTemp(Location::RequiresRegister()); + break; + case DataType::Type::kFloat32: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetOut(Location::SameAsFirstInput()); + locations->AddTemp(Location::RequiresRegister()); + break; + case DataType::Type::kFloat64: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetOut(Location::SameAsFirstInput()); + break; + default: + LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType(); + } +} + +void InstructionCodeGeneratorX86::GenerateMinMaxInt(LocationSummary* locations, + bool is_min, + DataType::Type type) { + Location op1_loc = locations->InAt(0); + Location op2_loc = locations->InAt(1); + + // Shortcut for same input locations. + if (op1_loc.Equals(op2_loc)) { + // Can return immediately, as op1_loc == out_loc. + // Note: if we ever support separate registers, e.g., output into memory, we need to check for + // a copy here. + DCHECK(locations->Out().Equals(op1_loc)); + return; + } + + if (type == DataType::Type::kInt64) { + // Need to perform a subtract to get the sign right. + // op1 is already in the same location as the output. + Location output = locations->Out(); + Register output_lo = output.AsRegisterPairLow<Register>(); + Register output_hi = output.AsRegisterPairHigh<Register>(); + + Register op2_lo = op2_loc.AsRegisterPairLow<Register>(); + Register op2_hi = op2_loc.AsRegisterPairHigh<Register>(); + + // The comparison is performed by subtracting the second operand from + // the first operand and then setting the status flags in the same + // manner as the SUB instruction." + __ cmpl(output_lo, op2_lo); + + // Now use a temp and the borrow to finish the subtraction of op2_hi. + Register temp = locations->GetTemp(0).AsRegister<Register>(); + __ movl(temp, output_hi); + __ sbbl(temp, op2_hi); + + // Now the condition code is correct. + Condition cond = is_min ? Condition::kGreaterEqual : Condition::kLess; + __ cmovl(cond, output_lo, op2_lo); + __ cmovl(cond, output_hi, op2_hi); + } else { + DCHECK_EQ(type, DataType::Type::kInt32); + Register out = locations->Out().AsRegister<Register>(); + Register op2 = op2_loc.AsRegister<Register>(); + + // (out := op1) + // out <=? op2 + // if out is min jmp done + // out := op2 + // done: + + __ cmpl(out, op2); + Condition cond = is_min ? Condition::kGreater : Condition::kLess; + __ cmovl(cond, out, op2); + } +} + +void InstructionCodeGeneratorX86::GenerateMinMaxFP(LocationSummary* locations, + bool is_min, + DataType::Type type) { + Location op1_loc = locations->InAt(0); + Location op2_loc = locations->InAt(1); + Location out_loc = locations->Out(); + XmmRegister out = out_loc.AsFpuRegister<XmmRegister>(); + + // Shortcut for same input locations. + if (op1_loc.Equals(op2_loc)) { + DCHECK(out_loc.Equals(op1_loc)); + return; + } + + // (out := op1) + // out <=? op2 + // if Nan jmp Nan_label + // if out is min jmp done + // if op2 is min jmp op2_label + // handle -0/+0 + // jmp done + // Nan_label: + // out := NaN + // op2_label: + // out := op2 + // done: + // + // This removes one jmp, but needs to copy one input (op1) to out. + // + // TODO: This is straight from Quick (except literal pool). Make NaN an out-of-line slowpath? + + XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>(); + + NearLabel nan, done, op2_label; + if (type == DataType::Type::kFloat64) { + __ ucomisd(out, op2); + } else { + DCHECK_EQ(type, DataType::Type::kFloat32); + __ ucomiss(out, op2); + } + + __ j(Condition::kParityEven, &nan); + + __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label); + __ j(is_min ? Condition::kBelow : Condition::kAbove, &done); + + // Handle 0.0/-0.0. + if (is_min) { + if (type == DataType::Type::kFloat64) { + __ orpd(out, op2); + } else { + __ orps(out, op2); + } + } else { + if (type == DataType::Type::kFloat64) { + __ andpd(out, op2); + } else { + __ andps(out, op2); + } + } + __ jmp(&done); + + // NaN handling. + __ Bind(&nan); + if (type == DataType::Type::kFloat64) { + // TODO: Use a constant from the constant table (requires extra input). + __ LoadLongConstant(out, kDoubleNaN); + } else { + Register constant = locations->GetTemp(0).AsRegister<Register>(); + __ movl(constant, Immediate(kFloatNaN)); + __ movd(out, constant); + } + __ jmp(&done); + + // out := op2; + __ Bind(&op2_label); + if (type == DataType::Type::kFloat64) { + __ movsd(out, op2); + } else { + __ movss(out, op2); + } + + // Done. + __ Bind(&done); +} + +void InstructionCodeGeneratorX86::GenerateMinMax(HBinaryOperation* minmax, bool is_min) { + DataType::Type type = minmax->GetResultType(); + switch (type) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: + GenerateMinMaxInt(minmax->GetLocations(), is_min, type); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + GenerateMinMaxFP(minmax->GetLocations(), is_min, type); + break; + default: + LOG(FATAL) << "Unexpected type for HMinMax " << type; + } +} + +void LocationsBuilderX86::VisitMin(HMin* min) { + CreateMinMaxLocations(GetGraph()->GetAllocator(), min); +} + +void InstructionCodeGeneratorX86::VisitMin(HMin* min) { + GenerateMinMax(min, /*is_min*/ true); +} + +void LocationsBuilderX86::VisitMax(HMax* max) { + CreateMinMaxLocations(GetGraph()->GetAllocator(), max); +} + +void InstructionCodeGeneratorX86::VisitMax(HMax* max) { + GenerateMinMax(max, /*is_min*/ false); +} + +void LocationsBuilderX86::VisitAbs(HAbs* abs) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs); + switch (abs->GetResultType()) { + case DataType::Type::kInt32: + locations->SetInAt(0, Location::RegisterLocation(EAX)); + locations->SetOut(Location::SameAsFirstInput()); + locations->AddTemp(Location::RegisterLocation(EDX)); + break; + case DataType::Type::kInt64: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); + locations->AddTemp(Location::RequiresRegister()); + break; + case DataType::Type::kFloat32: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::SameAsFirstInput()); + locations->AddTemp(Location::RequiresFpuRegister()); + locations->AddTemp(Location::RequiresRegister()); + break; + case DataType::Type::kFloat64: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::SameAsFirstInput()); + locations->AddTemp(Location::RequiresFpuRegister()); + break; + default: + LOG(FATAL) << "Unexpected type for HAbs " << abs->GetResultType(); + } +} + +void InstructionCodeGeneratorX86::VisitAbs(HAbs* abs) { + LocationSummary* locations = abs->GetLocations(); + switch (abs->GetResultType()) { + case DataType::Type::kInt32: { + Register out = locations->Out().AsRegister<Register>(); + DCHECK_EQ(out, EAX); + Register temp = locations->GetTemp(0).AsRegister<Register>(); + DCHECK_EQ(temp, EDX); + // Sign extend EAX into EDX. + __ cdq(); + // XOR EAX with sign. + __ xorl(EAX, EDX); + // Subtract out sign to correct. + __ subl(EAX, EDX); + // The result is in EAX. + break; + } + case DataType::Type::kInt64: { + Location input = locations->InAt(0); + Register input_lo = input.AsRegisterPairLow<Register>(); + Register input_hi = input.AsRegisterPairHigh<Register>(); + Location output = locations->Out(); + Register output_lo = output.AsRegisterPairLow<Register>(); + Register output_hi = output.AsRegisterPairHigh<Register>(); + Register temp = locations->GetTemp(0).AsRegister<Register>(); + // Compute the sign into the temporary. + __ movl(temp, input_hi); + __ sarl(temp, Immediate(31)); + // Store the sign into the output. + __ movl(output_lo, temp); + __ movl(output_hi, temp); + // XOR the input to the output. + __ xorl(output_lo, input_lo); + __ xorl(output_hi, input_hi); + // Subtract the sign. + __ subl(output_lo, temp); + __ sbbl(output_hi, temp); + break; + } + case DataType::Type::kFloat32: { + XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>(); + XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); + Register constant = locations->GetTemp(1).AsRegister<Register>(); + __ movl(constant, Immediate(INT32_C(0x7FFFFFFF))); + __ movd(temp, constant); + __ andps(out, temp); + break; + } + case DataType::Type::kFloat64: { + XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>(); + XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); + // TODO: Use a constant from the constant table (requires extra input). + __ LoadLongConstant(temp, INT64_C(0x7FFFFFFFFFFFFFFF)); + __ andpd(out, temp); + break; + } + default: + LOG(FATAL) << "Unexpected type for HAbs " << abs->GetResultType(); + } +} + void LocationsBuilderX86::VisitDivZeroCheck(HDivZeroCheck* instruction) { LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction); switch (instruction->GetType()) { @@ -4184,29 +4530,14 @@ void LocationsBuilderX86::VisitNewInstance(HNewInstance* instruction) { LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( instruction, LocationSummary::kCallOnMainOnly); locations->SetOut(Location::RegisterLocation(EAX)); - if (instruction->IsStringAlloc()) { - locations->AddTemp(Location::RegisterLocation(kMethodRegisterArgument)); - } else { - InvokeRuntimeCallingConvention calling_convention; - locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - } + InvokeRuntimeCallingConvention calling_convention; + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); } void InstructionCodeGeneratorX86::VisitNewInstance(HNewInstance* instruction) { - // Note: if heap poisoning is enabled, the entry point takes cares - // of poisoning the reference. - if (instruction->IsStringAlloc()) { - // String is allocated through StringFactory. Call NewEmptyString entry point. - Register temp = instruction->GetLocations()->GetTemp(0).AsRegister<Register>(); - MemberOffset code_offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86PointerSize); - __ fs()->movl(temp, Address::Absolute(QUICK_ENTRY_POINT(pNewEmptyString))); - __ call(Address(temp, code_offset.Int32Value())); - codegen_->RecordPcInfo(instruction, instruction->GetDexPc()); - } else { - codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc()); - CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); - DCHECK(!codegen_->IsLeafMethod()); - } + codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc()); + CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); + DCHECK(!codegen_->IsLeafMethod()); } void LocationsBuilderX86::VisitNewArray(HNewArray* instruction) { @@ -4219,10 +4550,8 @@ void LocationsBuilderX86::VisitNewArray(HNewArray* instruction) { } void InstructionCodeGeneratorX86::VisitNewArray(HNewArray* instruction) { - // Note: if heap poisoning is enabled, the entry point takes cares - // of poisoning the reference. - QuickEntrypointEnum entrypoint = - CodeGenerator::GetArrayAllocationEntrypoint(instruction->GetLoadClass()->GetClass()); + // Note: if heap poisoning is enabled, the entry point takes care of poisoning the reference. + QuickEntrypointEnum entrypoint = CodeGenerator::GetArrayAllocationEntrypoint(instruction); codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc()); CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>(); DCHECK(!codegen_->IsLeafMethod()); @@ -4472,14 +4801,14 @@ void CodeGeneratorX86::GenerateMemoryBarrier(MemBarrierKind kind) { } case MemBarrierKind::kNTStoreStore: // Non-Temporal Store/Store needs an explicit fence. - MemoryFence(/* non-temporal */ true); + MemoryFence(/* non-temporal= */ true); break; } } HInvokeStaticOrDirect::DispatchInfo CodeGeneratorX86::GetSupportedInvokeStaticOrDirectDispatch( const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, - HInvokeStaticOrDirect* invoke ATTRIBUTE_UNUSED) { + ArtMethod* method ATTRIBUTE_UNUSED) { return desired_dispatch_info; } @@ -4531,9 +4860,15 @@ void CodeGeneratorX86::GenerateStaticOrDirectCall( RecordBootImageMethodPatch(invoke); break; } - case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: - __ movl(temp.AsRegister<Register>(), Immediate(invoke->GetMethodAddress())); + case HInvokeStaticOrDirect::MethodLoadKind::kBootImageRelRo: { + Register base_reg = GetInvokeStaticOrDirectExtraParameter(invoke, + temp.AsRegister<Register>()); + __ movl(temp.AsRegister<Register>(), Address(base_reg, kDummy32BitOffset)); + RecordBootImageRelRoPatch( + invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress(), + GetBootImageOffset(invoke)); break; + } case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: { Register base_reg = GetInvokeStaticOrDirectExtraParameter(invoke, temp.AsRegister<Register>()); @@ -4541,6 +4876,9 @@ void CodeGeneratorX86::GenerateStaticOrDirectCall( RecordMethodBssEntryPatch(invoke); break; } + case HInvokeStaticOrDirect::MethodLoadKind::kJitDirectAddress: + __ movl(temp.AsRegister<Register>(), Immediate(invoke->GetMethodAddress())); + break; case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall: { GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path); return; // No code pointer retrieval; the runtime performs the call directly. @@ -4595,6 +4933,20 @@ void CodeGeneratorX86::GenerateVirtualCall( RecordPcInfo(invoke, invoke->GetDexPc(), slow_path); } +void CodeGeneratorX86::RecordBootImageIntrinsicPatch(HX86ComputeBaseMethodAddress* method_address, + uint32_t intrinsic_data) { + boot_image_intrinsic_patches_.emplace_back( + method_address, /* target_dex_file= */ nullptr, intrinsic_data); + __ Bind(&boot_image_intrinsic_patches_.back().label); +} + +void CodeGeneratorX86::RecordBootImageRelRoPatch(HX86ComputeBaseMethodAddress* method_address, + uint32_t boot_image_offset) { + boot_image_method_patches_.emplace_back( + method_address, /* target_dex_file= */ nullptr, boot_image_offset); + __ Bind(&boot_image_method_patches_.back().label); +} + void CodeGeneratorX86::RecordBootImageMethodPatch(HInvokeStaticOrDirect* invoke) { DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u); HX86ComputeBaseMethodAddress* method_address = @@ -4639,7 +4991,6 @@ void CodeGeneratorX86::RecordBootImageStringPatch(HLoadString* load_string) { } Label* CodeGeneratorX86::NewStringBssEntryPatch(HLoadString* load_string) { - DCHECK(!GetCompilerOptions().IsBootImage()); HX86ComputeBaseMethodAddress* method_address = load_string->InputAt(0)->AsX86ComputeBaseMethodAddress(); string_bss_entry_patches_.emplace_back( @@ -4647,6 +4998,62 @@ Label* CodeGeneratorX86::NewStringBssEntryPatch(HLoadString* load_string) { return &string_bss_entry_patches_.back().label; } +void CodeGeneratorX86::LoadBootImageAddress(Register reg, + uint32_t boot_image_reference, + HInvokeStaticOrDirect* invoke) { + if (GetCompilerOptions().IsBootImage()) { + DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u); + HX86ComputeBaseMethodAddress* method_address = + invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress(); + DCHECK(method_address != nullptr); + Register method_address_reg = + invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()).AsRegister<Register>(); + __ leal(reg, Address(method_address_reg, CodeGeneratorX86::kDummy32BitOffset)); + RecordBootImageIntrinsicPatch(method_address, boot_image_reference); + } else if (GetCompilerOptions().GetCompilePic()) { + DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u); + HX86ComputeBaseMethodAddress* method_address = + invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress(); + DCHECK(method_address != nullptr); + Register method_address_reg = + invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()).AsRegister<Register>(); + __ movl(reg, Address(method_address_reg, CodeGeneratorX86::kDummy32BitOffset)); + RecordBootImageRelRoPatch(method_address, boot_image_reference); + } else { + DCHECK(Runtime::Current()->UseJitCompilation()); + gc::Heap* heap = Runtime::Current()->GetHeap(); + DCHECK(!heap->GetBootImageSpaces().empty()); + const uint8_t* address = heap->GetBootImageSpaces()[0]->Begin() + boot_image_reference; + __ movl(reg, Immediate(dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(address)))); + } +} + +void CodeGeneratorX86::AllocateInstanceForIntrinsic(HInvokeStaticOrDirect* invoke, + uint32_t boot_image_offset) { + DCHECK(invoke->IsStatic()); + InvokeRuntimeCallingConvention calling_convention; + Register argument = calling_convention.GetRegisterAt(0); + if (GetCompilerOptions().IsBootImage()) { + DCHECK_EQ(boot_image_offset, IntrinsicVisitor::IntegerValueOfInfo::kInvalidReference); + // Load the class the same way as for HLoadClass::LoadKind::kBootImageLinkTimePcRelative. + DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u); + HX86ComputeBaseMethodAddress* method_address = + invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress(); + DCHECK(method_address != nullptr); + Register method_address_reg = + invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()).AsRegister<Register>(); + __ leal(argument, Address(method_address_reg, CodeGeneratorX86::kDummy32BitOffset)); + MethodReference target_method = invoke->GetTargetMethod(); + dex::TypeIndex type_idx = target_method.dex_file->GetMethodId(target_method.index).class_idx_; + boot_image_type_patches_.emplace_back(method_address, target_method.dex_file, type_idx.index_); + __ Bind(&boot_image_type_patches_.back().label); + } else { + LoadBootImageAddress(argument, boot_image_offset, invoke); + } + InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc()); + CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); +} + // The label points to the end of the "movl" or another instruction but the literal offset // for method patch needs to point to the embedded constant which occupies the last 4 bytes. constexpr uint32_t kLabelPositionToLiteralOffsetAdjustment = 4u; @@ -4664,6 +5071,15 @@ inline void CodeGeneratorX86::EmitPcRelativeLinkerPatches( } } +template <linker::LinkerPatch (*Factory)(size_t, uint32_t, uint32_t)> +linker::LinkerPatch NoDexFileAdapter(size_t literal_offset, + const DexFile* target_dex_file, + uint32_t pc_insn_offset, + uint32_t boot_image_offset) { + DCHECK(target_dex_file == nullptr); // Unused for these patches, should be null. + return Factory(literal_offset, pc_insn_offset, boot_image_offset); +} + void CodeGeneratorX86::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) { DCHECK(linker_patches->empty()); size_t size = @@ -4672,7 +5088,8 @@ void CodeGeneratorX86::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linke boot_image_type_patches_.size() + type_bss_entry_patches_.size() + boot_image_string_patches_.size() + - string_bss_entry_patches_.size(); + string_bss_entry_patches_.size() + + boot_image_intrinsic_patches_.size(); linker_patches->reserve(size); if (GetCompilerOptions().IsBootImage()) { EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>( @@ -4681,12 +5098,14 @@ void CodeGeneratorX86::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linke boot_image_type_patches_, linker_patches); EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>( boot_image_string_patches_, linker_patches); + EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::IntrinsicReferencePatch>>( + boot_image_intrinsic_patches_, linker_patches); } else { - DCHECK(boot_image_method_patches_.empty()); - EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeClassTablePatch>( - boot_image_type_patches_, linker_patches); - EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringInternTablePatch>( - boot_image_string_patches_, linker_patches); + EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::DataBimgRelRoPatch>>( + boot_image_method_patches_, linker_patches); + DCHECK(boot_image_type_patches_.empty()); + DCHECK(boot_image_string_patches_.empty()); + DCHECK(boot_image_intrinsic_patches_.empty()); } EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>( method_bss_entry_patches_, linker_patches); @@ -4707,9 +5126,25 @@ void CodeGeneratorX86::MarkGCCard(Register temp, __ testl(value, value); __ j(kEqual, &is_null); } + // Load the address of the card table into `card`. __ fs()->movl(card, Address::Absolute(Thread::CardTableOffset<kX86PointerSize>().Int32Value())); + // Calculate the offset (in the card table) of the card corresponding to + // `object`. __ movl(temp, object); __ shrl(temp, Immediate(gc::accounting::CardTable::kCardShift)); + // Write the `art::gc::accounting::CardTable::kCardDirty` value into the + // `object`'s card. + // + // Register `card` contains the address of the card table. Note that the card + // table's base is biased during its creation so that it always starts at an + // address whose least-significant byte is equal to `kCardDirty` (see + // art::gc::accounting::CardTable::Create). Therefore the MOVB instruction + // below writes the `kCardDirty` (byte) value into the `object`'s card + // (located at `card + object >> kCardShift`). + // + // This dual use of the value in register `card` (1. to calculate the location + // of the card to mark; and 2. to load the `kCardDirty` value) saves a load + // (no need to explicitly load `kCardDirty` as an immediate value). __ movb(Address(temp, card, TIMES_1, 0), X86ManagedRegister::FromCpuRegister(card).AsByteRegister()); if (value_can_be_null) { @@ -4801,7 +5236,7 @@ void InstructionCodeGeneratorX86::HandleFieldGet(HInstruction* instruction, // Note that a potential implicit null check is handled in this // CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier call. codegen_->GenerateFieldLoadWithBakerReadBarrier( - instruction, out, base, offset, /* needs_null_check */ true); + instruction, out, base, offset, /* needs_null_check= */ true); if (is_volatile) { codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); } @@ -5284,7 +5719,7 @@ void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) { // Note that a potential implicit null check is handled in this // CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier call. codegen_->GenerateArrayLoadWithBakerReadBarrier( - instruction, out_loc, obj, data_offset, index, /* needs_null_check */ true); + instruction, out_loc, obj, data_offset, index, /* needs_null_check= */ true); } else { Register out = out_loc.AsRegister<Register>(); __ movl(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_4, data_offset)); @@ -6055,14 +6490,14 @@ HLoadClass::LoadKind CodeGeneratorX86::GetSupportedLoadClassKind( case HLoadClass::LoadKind::kReferrersClass: break; case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: - case HLoadClass::LoadKind::kBootImageClassTable: + case HLoadClass::LoadKind::kBootImageRelRo: case HLoadClass::LoadKind::kBssEntry: DCHECK(!Runtime::Current()->UseJitCompilation()); break; + case HLoadClass::LoadKind::kJitBootImageAddress: case HLoadClass::LoadKind::kJitTableAddress: DCHECK(Runtime::Current()->UseJitCompilation()); break; - case HLoadClass::LoadKind::kBootImageAddress: case HLoadClass::LoadKind::kRuntimeCall: break; } @@ -6093,7 +6528,7 @@ void LocationsBuilderX86::VisitLoadClass(HLoadClass* cls) { if (load_kind == HLoadClass::LoadKind::kReferrersClass || load_kind == HLoadClass::LoadKind::kBootImageLinkTimePcRelative || - load_kind == HLoadClass::LoadKind::kBootImageClassTable || + load_kind == HLoadClass::LoadKind::kBootImageRelRo || load_kind == HLoadClass::LoadKind::kBssEntry) { locations->SetInAt(0, Location::RequiresRegister()); } @@ -6101,10 +6536,7 @@ void LocationsBuilderX86::VisitLoadClass(HLoadClass* cls) { if (load_kind == HLoadClass::LoadKind::kBssEntry) { if (!kUseReadBarrier || kUseBakerReadBarrier) { // Rely on the type resolution and/or initialization to save everything. - RegisterSet caller_saves = RegisterSet::Empty(); - InvokeRuntimeCallingConvention calling_convention; - caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->SetCustomSlowPathCallerSaves(caller_saves); + locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves()); } else { // For non-Baker read barrier we have a temp-clobbering call. } @@ -6149,7 +6581,7 @@ void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFE cls, out_loc, Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()), - /* fixup_label */ nullptr, + /* fixup_label= */ nullptr, read_barrier_option); break; } @@ -6161,25 +6593,12 @@ void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFE codegen_->RecordBootImageTypePatch(cls); break; } - case HLoadClass::LoadKind::kBootImageAddress: { - DCHECK_EQ(read_barrier_option, kWithoutReadBarrier); - uint32_t address = dchecked_integral_cast<uint32_t>( - reinterpret_cast<uintptr_t>(cls->GetClass().Get())); - DCHECK_NE(address, 0u); - __ movl(out, Immediate(address)); - break; - } - case HLoadClass::LoadKind::kBootImageClassTable: { + case HLoadClass::LoadKind::kBootImageRelRo: { DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); Register method_address = locations->InAt(0).AsRegister<Register>(); __ movl(out, Address(method_address, CodeGeneratorX86::kDummy32BitOffset)); - codegen_->RecordBootImageTypePatch(cls); - // Extract the reference from the slot data, i.e. clear the hash bits. - int32_t masked_hash = ClassTable::TableSlot::MaskHash( - ComputeModifiedUtf8Hash(cls->GetDexFile().StringByTypeIdx(cls->GetTypeIndex()))); - if (masked_hash != 0) { - __ subl(out, Immediate(masked_hash)); - } + codegen_->RecordBootImageRelRoPatch(cls->InputAt(0)->AsX86ComputeBaseMethodAddress(), + codegen_->GetBootImageOffset(cls)); break; } case HLoadClass::LoadKind::kBssEntry: { @@ -6190,6 +6609,13 @@ void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFE generate_null_check = true; break; } + case HLoadClass::LoadKind::kJitBootImageAddress: { + DCHECK_EQ(read_barrier_option, kWithoutReadBarrier); + uint32_t address = reinterpret_cast32<uint32_t>(cls->GetClass().Get()); + DCHECK_NE(address, 0u); + __ movl(out, Immediate(address)); + break; + } case HLoadClass::LoadKind::kJitTableAddress: { Address address = Address::Absolute(CodeGeneratorX86::kDummy32BitOffset); Label* fixup_label = codegen_->NewJitRootClassPatch( @@ -6206,8 +6632,7 @@ void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFE if (generate_null_check || cls->MustGenerateClinitCheck()) { DCHECK(cls->CanCallRuntime()); - SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86( - cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck()); + SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86(cls, cls); codegen_->AddSlowPath(slow_path); if (generate_null_check) { @@ -6223,6 +6648,26 @@ void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFE } } +void LocationsBuilderX86::VisitLoadMethodHandle(HLoadMethodHandle* load) { + InvokeRuntimeCallingConvention calling_convention; + Location location = Location::RegisterLocation(calling_convention.GetRegisterAt(0)); + CodeGenerator::CreateLoadMethodHandleRuntimeCallLocationSummary(load, location, location); +} + +void InstructionCodeGeneratorX86::VisitLoadMethodHandle(HLoadMethodHandle* load) { + codegen_->GenerateLoadMethodHandleRuntimeCall(load); +} + +void LocationsBuilderX86::VisitLoadMethodType(HLoadMethodType* load) { + InvokeRuntimeCallingConvention calling_convention; + Location location = Location::RegisterLocation(calling_convention.GetRegisterAt(0)); + CodeGenerator::CreateLoadMethodTypeRuntimeCallLocationSummary(load, location, location); +} + +void InstructionCodeGeneratorX86::VisitLoadMethodType(HLoadMethodType* load) { + codegen_->GenerateLoadMethodTypeRuntimeCall(load); +} + void LocationsBuilderX86::VisitClinitCheck(HClinitCheck* check) { LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(check, LocationSummary::kCallOnSlowPath); @@ -6230,12 +6675,14 @@ void LocationsBuilderX86::VisitClinitCheck(HClinitCheck* check) { if (check->HasUses()) { locations->SetOut(Location::SameAsFirstInput()); } + // Rely on the type initialization to save everything we need. + locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves()); } void InstructionCodeGeneratorX86::VisitClinitCheck(HClinitCheck* check) { // We assume the class to not be null. - SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86( - check->GetLoadClass(), check, check->GetDexPc(), true); + SlowPathCode* slow_path = + new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86(check->GetLoadClass(), check); codegen_->AddSlowPath(slow_path); GenerateClassInitializationCheck(slow_path, check->GetLocations()->InAt(0).AsRegister<Register>()); @@ -6255,18 +6702,38 @@ void InstructionCodeGeneratorX86::GenerateClassInitializationCheck( // No need for memory fence, thanks to the X86 memory model. } +void InstructionCodeGeneratorX86::GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, + Register temp) { + uint32_t path_to_root = check->GetBitstringPathToRoot(); + uint32_t mask = check->GetBitstringMask(); + DCHECK(IsPowerOfTwo(mask + 1)); + size_t mask_bits = WhichPowerOf2(mask + 1); + + if (mask_bits == 16u) { + // Compare the bitstring in memory. + __ cmpw(Address(temp, mirror::Class::StatusOffset()), Immediate(path_to_root)); + } else { + // /* uint32_t */ temp = temp->status_ + __ movl(temp, Address(temp, mirror::Class::StatusOffset())); + // Compare the bitstring bits using SUB. + __ subl(temp, Immediate(path_to_root)); + // Shift out bits that do not contribute to the comparison. + __ shll(temp, Immediate(32u - mask_bits)); + } +} + HLoadString::LoadKind CodeGeneratorX86::GetSupportedLoadStringKind( HLoadString::LoadKind desired_string_load_kind) { switch (desired_string_load_kind) { case HLoadString::LoadKind::kBootImageLinkTimePcRelative: - case HLoadString::LoadKind::kBootImageInternTable: + case HLoadString::LoadKind::kBootImageRelRo: case HLoadString::LoadKind::kBssEntry: DCHECK(!Runtime::Current()->UseJitCompilation()); break; + case HLoadString::LoadKind::kJitBootImageAddress: case HLoadString::LoadKind::kJitTableAddress: DCHECK(Runtime::Current()->UseJitCompilation()); break; - case HLoadString::LoadKind::kBootImageAddress: case HLoadString::LoadKind::kRuntimeCall: break; } @@ -6278,7 +6745,7 @@ void LocationsBuilderX86::VisitLoadString(HLoadString* load) { LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(load, call_kind); HLoadString::LoadKind load_kind = load->GetLoadKind(); if (load_kind == HLoadString::LoadKind::kBootImageLinkTimePcRelative || - load_kind == HLoadString::LoadKind::kBootImageInternTable || + load_kind == HLoadString::LoadKind::kBootImageRelRo || load_kind == HLoadString::LoadKind::kBssEntry) { locations->SetInAt(0, Location::RequiresRegister()); } @@ -6289,10 +6756,7 @@ void LocationsBuilderX86::VisitLoadString(HLoadString* load) { if (load_kind == HLoadString::LoadKind::kBssEntry) { if (!kUseReadBarrier || kUseBakerReadBarrier) { // Rely on the pResolveString to save everything. - RegisterSet caller_saves = RegisterSet::Empty(); - InvokeRuntimeCallingConvention calling_convention; - caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->SetCustomSlowPathCallerSaves(caller_saves); + locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves()); } else { // For non-Baker read barrier we have a temp-clobbering call. } @@ -6325,18 +6789,12 @@ void InstructionCodeGeneratorX86::VisitLoadString(HLoadString* load) NO_THREAD_S codegen_->RecordBootImageStringPatch(load); return; } - case HLoadString::LoadKind::kBootImageAddress: { - uint32_t address = dchecked_integral_cast<uint32_t>( - reinterpret_cast<uintptr_t>(load->GetString().Get())); - DCHECK_NE(address, 0u); - __ movl(out, Immediate(address)); - return; - } - case HLoadString::LoadKind::kBootImageInternTable: { + case HLoadString::LoadKind::kBootImageRelRo: { DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); Register method_address = locations->InAt(0).AsRegister<Register>(); __ movl(out, Address(method_address, CodeGeneratorX86::kDummy32BitOffset)); - codegen_->RecordBootImageStringPatch(load); + codegen_->RecordBootImageRelRoPatch(load->InputAt(0)->AsX86ComputeBaseMethodAddress(), + codegen_->GetBootImageOffset(load)); return; } case HLoadString::LoadKind::kBssEntry: { @@ -6352,6 +6810,12 @@ void InstructionCodeGeneratorX86::VisitLoadString(HLoadString* load) NO_THREAD_S __ Bind(slow_path->GetExitLabel()); return; } + case HLoadString::LoadKind::kJitBootImageAddress: { + uint32_t address = reinterpret_cast32<uint32_t>(load->GetString().Get()); + DCHECK_NE(address, 0u); + __ movl(out, Immediate(address)); + return; + } case HLoadString::LoadKind::kJitTableAddress: { Address address = Address::Absolute(CodeGeneratorX86::kDummy32BitOffset); Label* fixup_label = codegen_->NewJitRootStringPatch( @@ -6418,8 +6882,8 @@ static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) { return 0; } -// Interface case has 3 temps, one for holding the number of interfaces, one for the current -// interface pointer, one for loading the current interface. +// Interface case has 2 temps, one for holding the number of interfaces, one for the current +// interface pointer, the current interface is compared in memory. // The other checks have one temp for loading the object's class. static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) { if (type_check_kind == TypeCheckKind::kInterfaceCheck) { @@ -6447,6 +6911,8 @@ void LocationsBuilderX86::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kInterfaceCheck: call_kind = LocationSummary::kCallOnSlowPath; break; + case TypeCheckKind::kBitstringCheck: + break; } LocationSummary* locations = @@ -6455,7 +6921,13 @@ void LocationsBuilderX86::VisitInstanceOf(HInstanceOf* instruction) { locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. } locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::Any()); + if (type_check_kind == TypeCheckKind::kBitstringCheck) { + locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); + locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); + locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); + } else { + locations->SetInAt(1, Location::Any()); + } // Note that TypeCheckSlowPathX86 uses this "out" register too. locations->SetOut(Location::RequiresRegister()); // When read barriers are enabled, we need a temporary register for some cases. @@ -6636,7 +7108,7 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) { } DCHECK(locations->OnlyCallsOnSlowPath()); slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86( - instruction, /* is_fatal */ false); + instruction, /* is_fatal= */ false); codegen_->AddSlowPath(slow_path); __ j(kNotEqual, slow_path->GetEntryLabel()); __ movl(out, Immediate(1)); @@ -6668,7 +7140,7 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) { // This should also be beneficial for the other cases above. DCHECK(locations->OnlyCallsOnSlowPath()); slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86( - instruction, /* is_fatal */ false); + instruction, /* is_fatal= */ false); codegen_->AddSlowPath(slow_path); __ jmp(slow_path->GetEntryLabel()); if (zero.IsLinked()) { @@ -6676,6 +7148,21 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) { } break; } + + case TypeCheckKind::kBitstringCheck: { + // /* HeapReference<Class> */ temp = obj->klass_ + GenerateReferenceLoadTwoRegisters(instruction, + out_loc, + obj_loc, + class_offset, + kWithoutReadBarrier); + + GenerateBitstringTypeCheckCompare(instruction, out); + __ j(kNotEqual, &zero); + __ movl(out, Immediate(1)); + __ jmp(&done); + break; + } } if (zero.IsLinked()) { @@ -6702,12 +7189,14 @@ void LocationsBuilderX86::VisitCheckCast(HCheckCast* instruction) { // Require a register for the interface check since there is a loop that compares the class to // a memory address. locations->SetInAt(1, Location::RequiresRegister()); + } else if (type_check_kind == TypeCheckKind::kBitstringCheck) { + locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); + locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); + locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); } else { locations->SetInAt(1, Location::Any()); } - // Note that TypeCheckSlowPathX86 uses this "temp" register too. - locations->AddTemp(Location::RequiresRegister()); - // When read barriers are enabled, we need an additional temporary register for some cases. + // Add temps for read barriers and other uses. One is used by TypeCheckSlowPathX86. locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind)); } @@ -6921,6 +7410,19 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) { __ MaybeUnpoisonHeapReference(cls.AsRegister<Register>()); break; } + + case TypeCheckKind::kBitstringCheck: { + // /* HeapReference<Class> */ temp = obj->klass_ + GenerateReferenceLoadTwoRegisters(instruction, + temp_loc, + obj_loc, + class_offset, + kWithoutReadBarrier); + + GenerateBitstringTypeCheckCompare(instruction, temp); + __ j(kNotEqual, type_check_slow_path->GetEntryLabel()); + break; + } } __ Bind(&done); @@ -6946,6 +7448,61 @@ void InstructionCodeGeneratorX86::VisitMonitorOperation(HMonitorOperation* instr } } +void LocationsBuilderX86::VisitX86AndNot(HX86AndNot* instruction) { + DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2()); + DCHECK(DataType::IsIntOrLongType(instruction->GetType())) << instruction->GetType(); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); +} + +void InstructionCodeGeneratorX86::VisitX86AndNot(HX86AndNot* instruction) { + LocationSummary* locations = instruction->GetLocations(); + Location first = locations->InAt(0); + Location second = locations->InAt(1); + Location dest = locations->Out(); + if (instruction->GetResultType() == DataType::Type::kInt32) { + __ andn(dest.AsRegister<Register>(), + first.AsRegister<Register>(), + second.AsRegister<Register>()); + } else { + DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64); + __ andn(dest.AsRegisterPairLow<Register>(), + first.AsRegisterPairLow<Register>(), + second.AsRegisterPairLow<Register>()); + __ andn(dest.AsRegisterPairHigh<Register>(), + first.AsRegisterPairHigh<Register>(), + second.AsRegisterPairHigh<Register>()); + } +} + +void LocationsBuilderX86::VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit* instruction) { + DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2()); + DCHECK(instruction->GetType() == DataType::Type::kInt32) << instruction->GetType(); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); +} + +void InstructionCodeGeneratorX86::VisitX86MaskOrResetLeastSetBit( + HX86MaskOrResetLeastSetBit* instruction) { + LocationSummary* locations = instruction->GetLocations(); + Location src = locations->InAt(0); + Location dest = locations->Out(); + DCHECK(instruction->GetResultType() == DataType::Type::kInt32); + switch (instruction->GetOpKind()) { + case HInstruction::kAnd: + __ blsr(dest.AsRegister<Register>(), src.AsRegister<Register>()); + break; + case HInstruction::kXor: + __ blsmsk(dest.AsRegister<Register>(), src.AsRegister<Register>()); + break; + default: + LOG(FATAL) << "Unreachable"; + } +} + void LocationsBuilderX86::VisitAnd(HAnd* instruction) { HandleBitwiseOperation(instruction); } void LocationsBuilderX86::VisitOr(HOr* instruction) { HandleBitwiseOperation(instruction); } void LocationsBuilderX86::VisitXor(HXor* instruction) { HandleBitwiseOperation(instruction); } @@ -7092,7 +7649,7 @@ void InstructionCodeGeneratorX86::GenerateReferenceLoadOneRegister( // Load with fast path based Baker's read barrier. // /* HeapReference<Object> */ out = *(out + offset) codegen_->GenerateFieldLoadWithBakerReadBarrier( - instruction, out, out_reg, offset, /* needs_null_check */ false); + instruction, out, out_reg, offset, /* needs_null_check= */ false); } else { // Load with slow path based read barrier. // Save the value of `out` into `maybe_temp` before overwriting it @@ -7126,7 +7683,7 @@ void InstructionCodeGeneratorX86::GenerateReferenceLoadTwoRegisters( // Load with fast path based Baker's read barrier. // /* HeapReference<Object> */ out = *(obj + offset) codegen_->GenerateFieldLoadWithBakerReadBarrier( - instruction, out, obj_reg, offset, /* needs_null_check */ false); + instruction, out, obj_reg, offset, /* needs_null_check= */ false); } else { // Load with slow path based read barrier. // /* HeapReference<Object> */ out = *(obj + offset) @@ -7175,7 +7732,7 @@ void InstructionCodeGeneratorX86::GenerateGcRootFieldLoad( // Slow path marking the GC root `root`. SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) ReadBarrierMarkSlowPathX86( - instruction, root, /* unpoison_ref_before_marking */ false); + instruction, root, /* unpoison_ref_before_marking= */ false); codegen_->AddSlowPath(slow_path); // Test the entrypoint (`Thread::Current()->pReadBarrierMarkReg ## root.reg()`). @@ -7277,7 +7834,7 @@ void CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value(); // Given the numeric representation, it's enough to check the low bit of the rb_state. - static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0"); + static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0"); static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte; constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte; @@ -7305,10 +7862,10 @@ void CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i if (always_update_field) { DCHECK(temp != nullptr); slow_path = new (GetScopedAllocator()) ReadBarrierMarkAndUpdateFieldSlowPathX86( - instruction, ref, obj, src, /* unpoison_ref_before_marking */ true, *temp); + instruction, ref, obj, src, /* unpoison_ref_before_marking= */ true, *temp); } else { slow_path = new (GetScopedAllocator()) ReadBarrierMarkSlowPathX86( - instruction, ref, /* unpoison_ref_before_marking */ true); + instruction, ref, /* unpoison_ref_before_marking= */ true); } AddSlowPath(slow_path); @@ -7620,7 +8177,7 @@ class RIPFixup : public AssemblerFixup, public ArenaObject<kArenaAllocCodeGenera HX86ComputeBaseMethodAddress* base_method_address_; private: - void Process(const MemoryRegion& region, int pos) OVERRIDE { + void Process(const MemoryRegion& region, int pos) override { // Patch the correct offset for the instruction. The place to patch is the // last 4 bytes of the instruction. // The value to patch is the distance from the offset in the constant area @@ -7821,7 +8378,7 @@ void CodeGeneratorX86::PatchJitRootUse(uint8_t* code, uint32_t code_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment; uintptr_t address = reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>); - typedef __attribute__((__aligned__(1))) uint32_t unaligned_uint32_t; + using unaligned_uint32_t __attribute__((__aligned__(1))) = uint32_t; reinterpret_cast<unaligned_uint32_t*>(code + code_offset)[0] = dchecked_integral_cast<uint32_t>(address); } diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index 51e5bca00b..deeef888e2 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -83,9 +83,9 @@ class InvokeDexCallingConventionVisitorX86 : public InvokeDexCallingConventionVi InvokeDexCallingConventionVisitorX86() {} virtual ~InvokeDexCallingConventionVisitorX86() {} - Location GetNextLocation(DataType::Type type) OVERRIDE; - Location GetReturnLocation(DataType::Type type) const OVERRIDE; - Location GetMethodLocation() const OVERRIDE; + Location GetNextLocation(DataType::Type type) override; + Location GetReturnLocation(DataType::Type type) const override; + Location GetMethodLocation() const override; private: InvokeDexCallingConvention calling_convention; @@ -97,18 +97,18 @@ class FieldAccessCallingConventionX86 : public FieldAccessCallingConvention { public: FieldAccessCallingConventionX86() {} - Location GetObjectLocation() const OVERRIDE { + Location GetObjectLocation() const override { return Location::RegisterLocation(ECX); } - Location GetFieldIndexLocation() const OVERRIDE { + Location GetFieldIndexLocation() const override { return Location::RegisterLocation(EAX); } - Location GetReturnLocation(DataType::Type type) const OVERRIDE { + Location GetReturnLocation(DataType::Type type) const override { return DataType::Is64BitType(type) ? Location::RegisterPairLocation(EAX, EDX) : Location::RegisterLocation(EAX); } - Location GetSetValueLocation(DataType::Type type, bool is_instance) const OVERRIDE { + Location GetSetValueLocation(DataType::Type type, bool is_instance) const override { return DataType::Is64BitType(type) ? (is_instance ? Location::RegisterPairLocation(EDX, EBX) @@ -117,7 +117,7 @@ class FieldAccessCallingConventionX86 : public FieldAccessCallingConvention { ? Location::RegisterLocation(EDX) : Location::RegisterLocation(ECX)); } - Location GetFpuLocation(DataType::Type type ATTRIBUTE_UNUSED) const OVERRIDE { + Location GetFpuLocation(DataType::Type type ATTRIBUTE_UNUSED) const override { return Location::FpuRegisterLocation(XMM0); } @@ -130,10 +130,10 @@ class ParallelMoveResolverX86 : public ParallelMoveResolverWithSwap { ParallelMoveResolverX86(ArenaAllocator* allocator, CodeGeneratorX86* codegen) : ParallelMoveResolverWithSwap(allocator), codegen_(codegen) {} - void EmitMove(size_t index) OVERRIDE; - void EmitSwap(size_t index) OVERRIDE; - void SpillScratch(int reg) OVERRIDE; - void RestoreScratch(int reg) OVERRIDE; + void EmitMove(size_t index) override; + void EmitSwap(size_t index) override; + void SpillScratch(int reg) override; + void RestoreScratch(int reg) override; X86Assembler* GetAssembler() const; @@ -155,14 +155,15 @@ class LocationsBuilderX86 : public HGraphVisitor { : HGraphVisitor(graph), codegen_(codegen) {} #define DECLARE_VISIT_INSTRUCTION(name, super) \ - void Visit##name(H##name* instr) OVERRIDE; + void Visit##name(H##name* instr) override; FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION) FOR_EACH_CONCRETE_INSTRUCTION_X86(DECLARE_VISIT_INSTRUCTION) + FOR_EACH_CONCRETE_INSTRUCTION_X86_COMMON(DECLARE_VISIT_INSTRUCTION) #undef DECLARE_VISIT_INSTRUCTION - void VisitInstruction(HInstruction* instruction) OVERRIDE { + void VisitInstruction(HInstruction* instruction) override { LOG(FATAL) << "Unreachable instruction " << instruction->DebugName() << " (id " << instruction->GetId() << ")"; } @@ -186,14 +187,15 @@ class InstructionCodeGeneratorX86 : public InstructionCodeGenerator { InstructionCodeGeneratorX86(HGraph* graph, CodeGeneratorX86* codegen); #define DECLARE_VISIT_INSTRUCTION(name, super) \ - void Visit##name(H##name* instr) OVERRIDE; + void Visit##name(H##name* instr) override; FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION) FOR_EACH_CONCRETE_INSTRUCTION_X86(DECLARE_VISIT_INSTRUCTION) + FOR_EACH_CONCRETE_INSTRUCTION_X86_COMMON(DECLARE_VISIT_INSTRUCTION) #undef DECLARE_VISIT_INSTRUCTION - void VisitInstruction(HInstruction* instruction) OVERRIDE { + void VisitInstruction(HInstruction* instruction) override { LOG(FATAL) << "Unreachable instruction " << instruction->DebugName() << " (id " << instruction->GetId() << ")"; } @@ -211,10 +213,12 @@ class InstructionCodeGeneratorX86 : public InstructionCodeGenerator { // the suspend call. void GenerateSuspendCheck(HSuspendCheck* check, HBasicBlock* successor); void GenerateClassInitializationCheck(SlowPathCode* slow_path, Register class_reg); + void GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, Register temp); void HandleBitwiseOperation(HBinaryOperation* instruction); void GenerateDivRemIntegral(HBinaryOperation* instruction); void DivRemOneOrMinusOne(HBinaryOperation* instruction); void DivByPowerOfTwo(HDiv* instruction); + void RemByPowerOfTwo(HRem* instruction); void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction); void GenerateRemFP(HRem* rem); void HandleCondition(HCondition* condition); @@ -225,6 +229,9 @@ class InstructionCodeGeneratorX86 : public InstructionCodeGenerator { void GenerateShlLong(const Location& loc, int shift); void GenerateShrLong(const Location& loc, int shift); void GenerateUShrLong(const Location& loc, int shift); + void GenerateMinMaxInt(LocationSummary* locations, bool is_min, DataType::Type type); + void GenerateMinMaxFP(LocationSummary* locations, bool is_min, DataType::Type type); + void GenerateMinMax(HBinaryOperation* minmax, bool is_min); void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info, @@ -312,28 +319,27 @@ class JumpTableRIPFixup; class CodeGeneratorX86 : public CodeGenerator { public: CodeGeneratorX86(HGraph* graph, - const X86InstructionSetFeatures& isa_features, const CompilerOptions& compiler_options, OptimizingCompilerStats* stats = nullptr); virtual ~CodeGeneratorX86() {} - void GenerateFrameEntry() OVERRIDE; - void GenerateFrameExit() OVERRIDE; - void Bind(HBasicBlock* block) OVERRIDE; - void MoveConstant(Location destination, int32_t value) OVERRIDE; - void MoveLocation(Location dst, Location src, DataType::Type dst_type) OVERRIDE; - void AddLocationAsTemp(Location location, LocationSummary* locations) OVERRIDE; + void GenerateFrameEntry() override; + void GenerateFrameExit() override; + void Bind(HBasicBlock* block) override; + void MoveConstant(Location destination, int32_t value) override; + void MoveLocation(Location dst, Location src, DataType::Type dst_type) override; + void AddLocationAsTemp(Location location, LocationSummary* locations) override; - size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; - size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; - size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; - size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; + size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) override; + size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) override; + size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) override; + size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) override; // Generate code to invoke a runtime entry point. void InvokeRuntime(QuickEntrypointEnum entrypoint, HInstruction* instruction, uint32_t dex_pc, - SlowPathCode* slow_path = nullptr) OVERRIDE; + SlowPathCode* slow_path = nullptr) override; // Generate code to invoke a runtime entry point, but do not record // PC-related information in a stack map. @@ -343,49 +349,51 @@ class CodeGeneratorX86 : public CodeGenerator { void GenerateInvokeRuntime(int32_t entry_point_offset); - size_t GetWordSize() const OVERRIDE { + size_t GetWordSize() const override { return kX86WordSize; } - size_t GetFloatingPointSpillSlotSize() const OVERRIDE { + size_t GetFloatingPointSpillSlotSize() const override { return GetGraph()->HasSIMD() ? 4 * kX86WordSize // 16 bytes == 4 words for each spill : 2 * kX86WordSize; // 8 bytes == 2 words for each spill } - HGraphVisitor* GetLocationBuilder() OVERRIDE { + HGraphVisitor* GetLocationBuilder() override { return &location_builder_; } - HGraphVisitor* GetInstructionVisitor() OVERRIDE { + HGraphVisitor* GetInstructionVisitor() override { return &instruction_visitor_; } - X86Assembler* GetAssembler() OVERRIDE { + X86Assembler* GetAssembler() override { return &assembler_; } - const X86Assembler& GetAssembler() const OVERRIDE { + const X86Assembler& GetAssembler() const override { return assembler_; } - uintptr_t GetAddressOf(HBasicBlock* block) OVERRIDE { + uintptr_t GetAddressOf(HBasicBlock* block) override { return GetLabelOf(block)->Position(); } - void SetupBlockedRegisters() const OVERRIDE; + void SetupBlockedRegisters() const override; - void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE; - void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE; + void DumpCoreRegister(std::ostream& stream, int reg) const override; + void DumpFloatingPointRegister(std::ostream& stream, int reg) const override; - ParallelMoveResolverX86* GetMoveResolver() OVERRIDE { + ParallelMoveResolverX86* GetMoveResolver() override { return &move_resolver_; } - InstructionSet GetInstructionSet() const OVERRIDE { + InstructionSet GetInstructionSet() const override { return InstructionSet::kX86; } + const X86InstructionSetFeatures& GetInstructionSetFeatures() const; + // Helper method to move a 32bits value between two locations. void Move32(Location destination, Location source); // Helper method to move a 64bits value between two locations. @@ -394,32 +402,42 @@ class CodeGeneratorX86 : public CodeGenerator { // Check if the desired_string_load_kind is supported. If it is, return it, // otherwise return a fall-back kind that should be used instead. HLoadString::LoadKind GetSupportedLoadStringKind( - HLoadString::LoadKind desired_string_load_kind) OVERRIDE; + HLoadString::LoadKind desired_string_load_kind) override; // Check if the desired_class_load_kind is supported. If it is, return it, // otherwise return a fall-back kind that should be used instead. HLoadClass::LoadKind GetSupportedLoadClassKind( - HLoadClass::LoadKind desired_class_load_kind) OVERRIDE; + HLoadClass::LoadKind desired_class_load_kind) override; // Check if the desired_dispatch_info is supported. If it is, return it, // otherwise return a fall-back info that should be used instead. HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch( const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, - HInvokeStaticOrDirect* invoke) OVERRIDE; + ArtMethod* method) override; // Generate a call to a static or direct method. void GenerateStaticOrDirectCall( - HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE; + HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path = nullptr) override; // Generate a call to a virtual method. void GenerateVirtualCall( - HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE; + HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) override; + void RecordBootImageIntrinsicPatch(HX86ComputeBaseMethodAddress* method_address, + uint32_t intrinsic_data); + void RecordBootImageRelRoPatch(HX86ComputeBaseMethodAddress* method_address, + uint32_t boot_image_offset); void RecordBootImageMethodPatch(HInvokeStaticOrDirect* invoke); void RecordMethodBssEntryPatch(HInvokeStaticOrDirect* invoke); void RecordBootImageTypePatch(HLoadClass* load_class); Label* NewTypeBssEntryPatch(HLoadClass* load_class); void RecordBootImageStringPatch(HLoadString* load_string); Label* NewStringBssEntryPatch(HLoadString* load_string); + + void LoadBootImageAddress(Register reg, + uint32_t boot_image_reference, + HInvokeStaticOrDirect* invoke); + void AllocateInstanceForIntrinsic(HInvokeStaticOrDirect* invoke, uint32_t boot_image_offset); + Label* NewJitRootStringPatch(const DexFile& dex_file, dex::StringIndex string_index, Handle<mirror::String> handle); @@ -427,16 +445,16 @@ class CodeGeneratorX86 : public CodeGenerator { dex::TypeIndex type_index, Handle<mirror::Class> handle); - void MoveFromReturnRegister(Location trg, DataType::Type type) OVERRIDE; + void MoveFromReturnRegister(Location trg, DataType::Type type) override; // Emit linker patches. - void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) OVERRIDE; + void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) override; void PatchJitRootUse(uint8_t* code, const uint8_t* roots_data, const PatchInfo<Label>& info, uint64_t index_in_table) const; - void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) OVERRIDE; + void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) override; // Emit a write barrier. void MarkGCCard(Register temp, @@ -451,22 +469,18 @@ class CodeGeneratorX86 : public CodeGenerator { return CommonGetLabelOf<Label>(block_labels_, block); } - void Initialize() OVERRIDE { + void Initialize() override { block_labels_ = CommonInitializeLabels<Label>(); } - bool NeedsTwoRegisters(DataType::Type type) const OVERRIDE { + bool NeedsTwoRegisters(DataType::Type type) const override { return type == DataType::Type::kInt64; } - bool ShouldSplitLongMoves() const OVERRIDE { return true; } + bool ShouldSplitLongMoves() const override { return true; } Label* GetFrameEntryLabel() { return &frame_entry_label_; } - const X86InstructionSetFeatures& GetInstructionSetFeatures() const { - return isa_features_; - } - void AddMethodAddressOffset(HX86ComputeBaseMethodAddress* method_base, int32_t offset) { method_address_offset_.Put(method_base->GetId(), offset); } @@ -502,7 +516,7 @@ class CodeGeneratorX86 : public CodeGenerator { Address LiteralCaseTable(HX86PackedSwitch* switch_instr, Register reg, Register value); - void Finalize(CodeAllocator* allocator) OVERRIDE; + void Finalize(CodeAllocator* allocator) override; // Fast path implementation of ReadBarrier::Barrier for a heap // reference field load when Baker's read barriers are used. @@ -598,9 +612,9 @@ class CodeGeneratorX86 : public CodeGenerator { } } - void GenerateNop() OVERRIDE; - void GenerateImplicitNullCheck(HNullCheck* instruction) OVERRIDE; - void GenerateExplicitNullCheck(HNullCheck* instruction) OVERRIDE; + void GenerateNop() override; + void GenerateImplicitNullCheck(HNullCheck* instruction) override; + void GenerateExplicitNullCheck(HNullCheck* instruction) override; // When we don't know the proper offset for the value, we use kDummy32BitOffset. // The correct value will be inserted when processing Assembler fixups. @@ -629,20 +643,22 @@ class CodeGeneratorX86 : public CodeGenerator { InstructionCodeGeneratorX86 instruction_visitor_; ParallelMoveResolverX86 move_resolver_; X86Assembler assembler_; - const X86InstructionSetFeatures& isa_features_; - // PC-relative method patch info for kBootImageLinkTimePcRelative. + // PC-relative method patch info for kBootImageLinkTimePcRelative/kBootImageRelRo. + // Also used for type/string patches for kBootImageRelRo (same linker patch as for methods). ArenaDeque<X86PcRelativePatchInfo> boot_image_method_patches_; // PC-relative method patch info for kBssEntry. ArenaDeque<X86PcRelativePatchInfo> method_bss_entry_patches_; // PC-relative type patch info for kBootImageLinkTimePcRelative. ArenaDeque<X86PcRelativePatchInfo> boot_image_type_patches_; - // Type patch locations for kBssEntry. + // PC-relative type patch info for kBssEntry. ArenaDeque<X86PcRelativePatchInfo> type_bss_entry_patches_; - // String patch locations; type depends on configuration (intern table or boot image PIC). + // PC-relative String patch info for kBootImageLinkTimePcRelative. ArenaDeque<X86PcRelativePatchInfo> boot_image_string_patches_; - // String patch locations for kBssEntry. + // PC-relative String patch info for kBssEntry. ArenaDeque<X86PcRelativePatchInfo> string_bss_entry_patches_; + // PC-relative patch info for IntrinsicObjects. + ArenaDeque<X86PcRelativePatchInfo> boot_image_intrinsic_patches_; // Patches for string root accesses in JIT compiled code. ArenaDeque<PatchInfo<Label>> jit_string_patches_; diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 7be360536b..7c293b8605 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -22,6 +22,7 @@ #include "compiled_method.h" #include "entrypoints/quick/quick_entrypoints.h" #include "gc/accounting/card_table.h" +#include "gc/space/image_space.h" #include "heap_poisoning.h" #include "intrinsics.h" #include "intrinsics_x86_64.h" @@ -55,6 +56,13 @@ static constexpr FloatRegister kFpuCalleeSaves[] = { XMM12, XMM13, XMM14, XMM15 static constexpr int kC2ConditionMask = 0x400; +static RegisterSet OneRegInReferenceOutSaveEverythingCallerSaves() { + // Custom calling convention: RAX serves as both input and output. + RegisterSet caller_saves = RegisterSet::Empty(); + caller_saves.Add(Location::RegisterLocation(RAX)); + return caller_saves; +} + // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. #define __ down_cast<X86_64Assembler*>(codegen->GetAssembler())-> // NOLINT #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kX86_64PointerSize, x).Int32Value() @@ -63,7 +71,7 @@ class NullCheckSlowPathX86_64 : public SlowPathCode { public: explicit NullCheckSlowPathX86_64(HNullCheck* instruction) : SlowPathCode(instruction) {} - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); __ Bind(GetEntryLabel()); if (instruction_->CanThrowIntoCatchBlock()) { @@ -77,9 +85,9 @@ class NullCheckSlowPathX86_64 : public SlowPathCode { CheckEntrypointTypes<kQuickThrowNullPointer, void, void>(); } - bool IsFatal() const OVERRIDE { return true; } + bool IsFatal() const override { return true; } - const char* GetDescription() const OVERRIDE { return "NullCheckSlowPathX86_64"; } + const char* GetDescription() const override { return "NullCheckSlowPathX86_64"; } private: DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathX86_64); @@ -89,16 +97,16 @@ class DivZeroCheckSlowPathX86_64 : public SlowPathCode { public: explicit DivZeroCheckSlowPathX86_64(HDivZeroCheck* instruction) : SlowPathCode(instruction) {} - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); __ Bind(GetEntryLabel()); x86_64_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this); CheckEntrypointTypes<kQuickThrowDivZero, void, void>(); } - bool IsFatal() const OVERRIDE { return true; } + bool IsFatal() const override { return true; } - const char* GetDescription() const OVERRIDE { return "DivZeroCheckSlowPathX86_64"; } + const char* GetDescription() const override { return "DivZeroCheckSlowPathX86_64"; } private: DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathX86_64); @@ -109,7 +117,7 @@ class DivRemMinusOneSlowPathX86_64 : public SlowPathCode { DivRemMinusOneSlowPathX86_64(HInstruction* at, Register reg, DataType::Type type, bool is_div) : SlowPathCode(at), cpu_reg_(CpuRegister(reg)), type_(type), is_div_(is_div) {} - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { __ Bind(GetEntryLabel()); if (type_ == DataType::Type::kInt32) { if (is_div_) { @@ -129,7 +137,7 @@ class DivRemMinusOneSlowPathX86_64 : public SlowPathCode { __ jmp(GetExitLabel()); } - const char* GetDescription() const OVERRIDE { return "DivRemMinusOneSlowPathX86_64"; } + const char* GetDescription() const override { return "DivRemMinusOneSlowPathX86_64"; } private: const CpuRegister cpu_reg_; @@ -143,7 +151,7 @@ class SuspendCheckSlowPathX86_64 : public SlowPathCode { SuspendCheckSlowPathX86_64(HSuspendCheck* instruction, HBasicBlock* successor) : SlowPathCode(instruction), successor_(successor) {} - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { LocationSummary* locations = instruction_->GetLocations(); CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); __ Bind(GetEntryLabel()); @@ -167,7 +175,7 @@ class SuspendCheckSlowPathX86_64 : public SlowPathCode { return successor_; } - const char* GetDescription() const OVERRIDE { return "SuspendCheckSlowPathX86_64"; } + const char* GetDescription() const override { return "SuspendCheckSlowPathX86_64"; } private: HBasicBlock* const successor_; @@ -181,7 +189,7 @@ class BoundsCheckSlowPathX86_64 : public SlowPathCode { explicit BoundsCheckSlowPathX86_64(HBoundsCheck* instruction) : SlowPathCode(instruction) {} - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { LocationSummary* locations = instruction_->GetLocations(); CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); __ Bind(GetEntryLabel()); @@ -228,9 +236,9 @@ class BoundsCheckSlowPathX86_64 : public SlowPathCode { CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>(); } - bool IsFatal() const OVERRIDE { return true; } + bool IsFatal() const override { return true; } - const char* GetDescription() const OVERRIDE { return "BoundsCheckSlowPathX86_64"; } + const char* GetDescription() const override { return "BoundsCheckSlowPathX86_64"; } private: DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathX86_64); @@ -238,34 +246,41 @@ class BoundsCheckSlowPathX86_64 : public SlowPathCode { class LoadClassSlowPathX86_64 : public SlowPathCode { public: - LoadClassSlowPathX86_64(HLoadClass* cls, - HInstruction* at, - uint32_t dex_pc, - bool do_clinit) - : SlowPathCode(at), cls_(cls), dex_pc_(dex_pc), do_clinit_(do_clinit) { + LoadClassSlowPathX86_64(HLoadClass* cls, HInstruction* at) + : SlowPathCode(at), cls_(cls) { DCHECK(at->IsLoadClass() || at->IsClinitCheck()); + DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_); } - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { LocationSummary* locations = instruction_->GetLocations(); + Location out = locations->Out(); + const uint32_t dex_pc = instruction_->GetDexPc(); + bool must_resolve_type = instruction_->IsLoadClass() && cls_->MustResolveTypeOnSlowPath(); + bool must_do_clinit = instruction_->IsClinitCheck() || cls_->MustGenerateClinitCheck(); + CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); __ Bind(GetEntryLabel()); - SaveLiveRegisters(codegen, locations); // Custom calling convention: RAX serves as both input and output. - __ movl(CpuRegister(RAX), Immediate(cls_->GetTypeIndex().index_)); - x86_64_codegen->InvokeRuntime(do_clinit_ ? kQuickInitializeStaticStorage : kQuickInitializeType, - instruction_, - dex_pc_, - this); - if (do_clinit_) { - CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>(); + if (must_resolve_type) { + DCHECK(IsSameDexFile(cls_->GetDexFile(), x86_64_codegen->GetGraph()->GetDexFile())); + dex::TypeIndex type_index = cls_->GetTypeIndex(); + __ movl(CpuRegister(RAX), Immediate(type_index.index_)); + x86_64_codegen->InvokeRuntime(kQuickResolveType, instruction_, dex_pc, this); + CheckEntrypointTypes<kQuickResolveType, void*, uint32_t>(); + // If we also must_do_clinit, the resolved type is now in the correct register. } else { - CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>(); + DCHECK(must_do_clinit); + Location source = instruction_->IsLoadClass() ? out : locations->InAt(0); + x86_64_codegen->Move(Location::RegisterLocation(RAX), source); + } + if (must_do_clinit) { + x86_64_codegen->InvokeRuntime(kQuickInitializeStaticStorage, instruction_, dex_pc, this); + CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, mirror::Class*>(); } - Location out = locations->Out(); // Move the class to the desired location. if (out.IsValid()) { DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg())); @@ -276,18 +291,12 @@ class LoadClassSlowPathX86_64 : public SlowPathCode { __ jmp(GetExitLabel()); } - const char* GetDescription() const OVERRIDE { return "LoadClassSlowPathX86_64"; } + const char* GetDescription() const override { return "LoadClassSlowPathX86_64"; } private: // The class this slow path will load. HLoadClass* const cls_; - // The dex PC of `at_`. - const uint32_t dex_pc_; - - // Whether to initialize the class. - const bool do_clinit_; - DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathX86_64); }; @@ -295,7 +304,7 @@ class LoadStringSlowPathX86_64 : public SlowPathCode { public: explicit LoadStringSlowPathX86_64(HLoadString* instruction) : SlowPathCode(instruction) {} - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { LocationSummary* locations = instruction_->GetLocations(); DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg())); @@ -317,7 +326,7 @@ class LoadStringSlowPathX86_64 : public SlowPathCode { __ jmp(GetExitLabel()); } - const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathX86_64"; } + const char* GetDescription() const override { return "LoadStringSlowPathX86_64"; } private: DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathX86_64); @@ -328,7 +337,7 @@ class TypeCheckSlowPathX86_64 : public SlowPathCode { TypeCheckSlowPathX86_64(HInstruction* instruction, bool is_fatal) : SlowPathCode(instruction), is_fatal_(is_fatal) {} - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { LocationSummary* locations = instruction_->GetLocations(); uint32_t dex_pc = instruction_->GetDexPc(); DCHECK(instruction_->IsCheckCast() @@ -376,9 +385,9 @@ class TypeCheckSlowPathX86_64 : public SlowPathCode { } } - const char* GetDescription() const OVERRIDE { return "TypeCheckSlowPathX86_64"; } + const char* GetDescription() const override { return "TypeCheckSlowPathX86_64"; } - bool IsFatal() const OVERRIDE { return is_fatal_; } + bool IsFatal() const override { return is_fatal_; } private: const bool is_fatal_; @@ -391,7 +400,7 @@ class DeoptimizationSlowPathX86_64 : public SlowPathCode { explicit DeoptimizationSlowPathX86_64(HDeoptimize* instruction) : SlowPathCode(instruction) {} - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); __ Bind(GetEntryLabel()); LocationSummary* locations = instruction_->GetLocations(); @@ -404,7 +413,7 @@ class DeoptimizationSlowPathX86_64 : public SlowPathCode { CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>(); } - const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathX86_64"; } + const char* GetDescription() const override { return "DeoptimizationSlowPathX86_64"; } private: DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathX86_64); @@ -414,7 +423,7 @@ class ArraySetSlowPathX86_64 : public SlowPathCode { public: explicit ArraySetSlowPathX86_64(HInstruction* instruction) : SlowPathCode(instruction) {} - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { LocationSummary* locations = instruction_->GetLocations(); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, locations); @@ -445,7 +454,7 @@ class ArraySetSlowPathX86_64 : public SlowPathCode { __ jmp(GetExitLabel()); } - const char* GetDescription() const OVERRIDE { return "ArraySetSlowPathX86_64"; } + const char* GetDescription() const override { return "ArraySetSlowPathX86_64"; } private: DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86_64); @@ -473,9 +482,9 @@ class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode { DCHECK(kEmitCompilerReadBarrier); } - const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathX86_64"; } + const char* GetDescription() const override { return "ReadBarrierMarkSlowPathX86_64"; } - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { LocationSummary* locations = instruction_->GetLocations(); CpuRegister ref_cpu_reg = ref_.AsRegister<CpuRegister>(); Register ref_reg = ref_cpu_reg.AsRegister(); @@ -564,11 +573,11 @@ class ReadBarrierMarkAndUpdateFieldSlowPathX86_64 : public SlowPathCode { DCHECK(kEmitCompilerReadBarrier); } - const char* GetDescription() const OVERRIDE { + const char* GetDescription() const override { return "ReadBarrierMarkAndUpdateFieldSlowPathX86_64"; } - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { LocationSummary* locations = instruction_->GetLocations(); CpuRegister ref_cpu_reg = ref_.AsRegister<CpuRegister>(); Register ref_reg = ref_cpu_reg.AsRegister(); @@ -736,7 +745,7 @@ class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode { DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref; } - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); LocationSummary* locations = instruction_->GetLocations(); CpuRegister reg_out = out_.AsRegister<CpuRegister>(); @@ -855,7 +864,7 @@ class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode { __ jmp(GetExitLabel()); } - const char* GetDescription() const OVERRIDE { + const char* GetDescription() const override { return "ReadBarrierForHeapReferenceSlowPathX86_64"; } @@ -897,7 +906,7 @@ class ReadBarrierForRootSlowPathX86_64 : public SlowPathCode { DCHECK(kEmitCompilerReadBarrier); } - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { LocationSummary* locations = instruction_->GetLocations(); DCHECK(locations->CanCall()); DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg())); @@ -922,7 +931,7 @@ class ReadBarrierForRootSlowPathX86_64 : public SlowPathCode { __ jmp(GetExitLabel()); } - const char* GetDescription() const OVERRIDE { return "ReadBarrierForRootSlowPathX86_64"; } + const char* GetDescription() const override { return "ReadBarrierForRootSlowPathX86_64"; } private: const Location out_; @@ -969,7 +978,7 @@ inline Condition X86_64FPCondition(IfCondition cond) { HInvokeStaticOrDirect::DispatchInfo CodeGeneratorX86_64::GetSupportedInvokeStaticOrDirectDispatch( const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, - HInvokeStaticOrDirect* invoke ATTRIBUTE_UNUSED) { + ArtMethod* method ATTRIBUTE_UNUSED) { return desired_dispatch_info; } @@ -983,7 +992,7 @@ void CodeGeneratorX86_64::GenerateStaticOrDirectCall( // temp = thread->string_init_entrypoint uint32_t offset = GetThreadOffset<kX86_64PointerSize>(invoke->GetStringInitEntryPoint()).Int32Value(); - __ gs()->movq(temp.AsRegister<CpuRegister>(), Address::Absolute(offset, /* no_rip */ true)); + __ gs()->movq(temp.AsRegister<CpuRegister>(), Address::Absolute(offset, /* no_rip= */ true)); break; } case HInvokeStaticOrDirect::MethodLoadKind::kRecursive: @@ -992,18 +1001,25 @@ void CodeGeneratorX86_64::GenerateStaticOrDirectCall( case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative: DCHECK(GetCompilerOptions().IsBootImage()); __ leal(temp.AsRegister<CpuRegister>(), - Address::Absolute(kDummy32BitOffset, /* no_rip */ false)); + Address::Absolute(kDummy32BitOffset, /* no_rip= */ false)); RecordBootImageMethodPatch(invoke); break; - case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: - Load64BitValue(temp.AsRegister<CpuRegister>(), invoke->GetMethodAddress()); + case HInvokeStaticOrDirect::MethodLoadKind::kBootImageRelRo: { + // Note: Boot image is in the low 4GiB and the entry is 32-bit, so emit a 32-bit load. + __ movl(temp.AsRegister<CpuRegister>(), + Address::Absolute(kDummy32BitOffset, /* no_rip= */ false)); + RecordBootImageRelRoPatch(GetBootImageOffset(invoke)); break; + } case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: { __ movq(temp.AsRegister<CpuRegister>(), - Address::Absolute(kDummy32BitOffset, /* no_rip */ false)); + Address::Absolute(kDummy32BitOffset, /* no_rip= */ false)); RecordMethodBssEntryPatch(invoke); break; } + case HInvokeStaticOrDirect::MethodLoadKind::kJitDirectAddress: + Load64BitValue(temp.AsRegister<CpuRegister>(), invoke->GetMethodAddress()); + break; case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall: { GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path); return; // No code pointer retrieval; the runtime performs the call directly. @@ -1059,6 +1075,16 @@ void CodeGeneratorX86_64::GenerateVirtualCall( RecordPcInfo(invoke, invoke->GetDexPc(), slow_path); } +void CodeGeneratorX86_64::RecordBootImageIntrinsicPatch(uint32_t intrinsic_data) { + boot_image_intrinsic_patches_.emplace_back(/* target_dex_file= */ nullptr, intrinsic_data); + __ Bind(&boot_image_intrinsic_patches_.back().label); +} + +void CodeGeneratorX86_64::RecordBootImageRelRoPatch(uint32_t boot_image_offset) { + boot_image_method_patches_.emplace_back(/* target_dex_file= */ nullptr, boot_image_offset); + __ Bind(&boot_image_method_patches_.back().label); +} + void CodeGeneratorX86_64::RecordBootImageMethodPatch(HInvokeStaticOrDirect* invoke) { boot_image_method_patches_.emplace_back( invoke->GetTargetMethod().dex_file, invoke->GetTargetMethod().index); @@ -1089,12 +1115,48 @@ void CodeGeneratorX86_64::RecordBootImageStringPatch(HLoadString* load_string) { } Label* CodeGeneratorX86_64::NewStringBssEntryPatch(HLoadString* load_string) { - DCHECK(!GetCompilerOptions().IsBootImage()); string_bss_entry_patches_.emplace_back( &load_string->GetDexFile(), load_string->GetStringIndex().index_); return &string_bss_entry_patches_.back().label; } +void CodeGeneratorX86_64::LoadBootImageAddress(CpuRegister reg, uint32_t boot_image_reference) { + if (GetCompilerOptions().IsBootImage()) { + __ leal(reg, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip= */ false)); + RecordBootImageIntrinsicPatch(boot_image_reference); + } else if (GetCompilerOptions().GetCompilePic()) { + __ movl(reg, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip= */ false)); + RecordBootImageRelRoPatch(boot_image_reference); + } else { + DCHECK(Runtime::Current()->UseJitCompilation()); + gc::Heap* heap = Runtime::Current()->GetHeap(); + DCHECK(!heap->GetBootImageSpaces().empty()); + const uint8_t* address = heap->GetBootImageSpaces()[0]->Begin() + boot_image_reference; + __ movl(reg, Immediate(dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(address)))); + } +} + +void CodeGeneratorX86_64::AllocateInstanceForIntrinsic(HInvokeStaticOrDirect* invoke, + uint32_t boot_image_offset) { + DCHECK(invoke->IsStatic()); + InvokeRuntimeCallingConvention calling_convention; + CpuRegister argument = CpuRegister(calling_convention.GetRegisterAt(0)); + if (GetCompilerOptions().IsBootImage()) { + DCHECK_EQ(boot_image_offset, IntrinsicVisitor::IntegerValueOfInfo::kInvalidReference); + // Load the class the same way as for HLoadClass::LoadKind::kBootImageLinkTimePcRelative. + __ leal(argument, + Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip= */ false)); + MethodReference target_method = invoke->GetTargetMethod(); + dex::TypeIndex type_idx = target_method.dex_file->GetMethodId(target_method.index).class_idx_; + boot_image_type_patches_.emplace_back(target_method.dex_file, type_idx.index_); + __ Bind(&boot_image_type_patches_.back().label); + } else { + LoadBootImageAddress(argument, boot_image_offset); + } + InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc()); + CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); +} + // The label points to the end of the "movl" or another instruction but the literal offset // for method patch needs to point to the embedded constant which occupies the last 4 bytes. constexpr uint32_t kLabelPositionToLiteralOffsetAdjustment = 4u; @@ -1110,6 +1172,15 @@ inline void CodeGeneratorX86_64::EmitPcRelativeLinkerPatches( } } +template <linker::LinkerPatch (*Factory)(size_t, uint32_t, uint32_t)> +linker::LinkerPatch NoDexFileAdapter(size_t literal_offset, + const DexFile* target_dex_file, + uint32_t pc_insn_offset, + uint32_t boot_image_offset) { + DCHECK(target_dex_file == nullptr); // Unused for these patches, should be null. + return Factory(literal_offset, pc_insn_offset, boot_image_offset); +} + void CodeGeneratorX86_64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) { DCHECK(linker_patches->empty()); size_t size = @@ -1118,7 +1189,8 @@ void CodeGeneratorX86_64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* li boot_image_type_patches_.size() + type_bss_entry_patches_.size() + boot_image_string_patches_.size() + - string_bss_entry_patches_.size(); + string_bss_entry_patches_.size() + + boot_image_intrinsic_patches_.size(); linker_patches->reserve(size); if (GetCompilerOptions().IsBootImage()) { EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>( @@ -1127,12 +1199,14 @@ void CodeGeneratorX86_64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* li boot_image_type_patches_, linker_patches); EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>( boot_image_string_patches_, linker_patches); + EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::IntrinsicReferencePatch>>( + boot_image_intrinsic_patches_, linker_patches); } else { - DCHECK(boot_image_method_patches_.empty()); - EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeClassTablePatch>( - boot_image_type_patches_, linker_patches); - EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringInternTablePatch>( - boot_image_string_patches_, linker_patches); + EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::DataBimgRelRoPatch>>( + boot_image_method_patches_, linker_patches); + DCHECK(boot_image_type_patches_.empty()); + DCHECK(boot_image_string_patches_.empty()); + DCHECK(boot_image_intrinsic_patches_.empty()); } EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>( method_bss_entry_patches_, linker_patches); @@ -1151,6 +1225,10 @@ void CodeGeneratorX86_64::DumpFloatingPointRegister(std::ostream& stream, int re stream << FloatRegister(reg); } +const X86_64InstructionSetFeatures& CodeGeneratorX86_64::GetInstructionSetFeatures() const { + return *GetCompilerOptions().GetInstructionSetFeatures()->AsX86_64InstructionSetFeatures(); +} + size_t CodeGeneratorX86_64::SaveCoreRegister(size_t stack_index, uint32_t reg_id) { __ movq(Address(CpuRegister(RSP), stack_index), CpuRegister(reg_id)); return kX86_64WordSize; @@ -1198,14 +1276,13 @@ void CodeGeneratorX86_64::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_poin } void CodeGeneratorX86_64::GenerateInvokeRuntime(int32_t entry_point_offset) { - __ gs()->call(Address::Absolute(entry_point_offset, /* no_rip */ true)); + __ gs()->call(Address::Absolute(entry_point_offset, /* no_rip= */ true)); } static constexpr int kNumberOfCpuRegisterPairs = 0; // Use a fake return address register to mimic Quick. static constexpr Register kFakeReturnRegister = Register(kLastCpuRegister + 1); CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph, - const X86_64InstructionSetFeatures& isa_features, const CompilerOptions& compiler_options, OptimizingCompilerStats* stats) : CodeGenerator(graph, @@ -1224,7 +1301,6 @@ CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph, instruction_visitor_(graph, this), move_resolver_(graph->GetAllocator(), this), assembler_(graph->GetAllocator()), - isa_features_(isa_features), constant_area_start_(0), boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), @@ -1232,6 +1308,7 @@ CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph, type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + boot_image_intrinsic_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), jit_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), jit_class_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), fixups_to_jump_tables_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) { @@ -1721,7 +1798,7 @@ void InstructionCodeGeneratorX86_64::VisitIf(HIf* if_instr) { nullptr : codegen_->GetLabelOf(true_successor); Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ? nullptr : codegen_->GetLabelOf(false_successor); - GenerateTestAndBranch(if_instr, /* condition_input_index */ 0, true_target, false_target); + GenerateTestAndBranch(if_instr, /* condition_input_index= */ 0, true_target, false_target); } void LocationsBuilderX86_64::VisitDeoptimize(HDeoptimize* deoptimize) { @@ -1739,9 +1816,9 @@ void LocationsBuilderX86_64::VisitDeoptimize(HDeoptimize* deoptimize) { void InstructionCodeGeneratorX86_64::VisitDeoptimize(HDeoptimize* deoptimize) { SlowPathCode* slow_path = deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathX86_64>(deoptimize); GenerateTestAndBranch<Label>(deoptimize, - /* condition_input_index */ 0, + /* condition_input_index= */ 0, slow_path->GetEntryLabel(), - /* false_target */ nullptr); + /* false_target= */ nullptr); } void LocationsBuilderX86_64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) { @@ -1844,8 +1921,8 @@ void InstructionCodeGeneratorX86_64::VisitSelect(HSelect* select) { } else { NearLabel false_target; GenerateTestAndBranch<NearLabel>(select, - /* condition_input_index */ 2, - /* true_target */ nullptr, + /* condition_input_index= */ 2, + /* true_target= */ nullptr, &false_target); codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType()); __ Bind(&false_target); @@ -2347,7 +2424,7 @@ Location InvokeDexCallingConventionVisitorX86_64::GetNextLocation(DataType::Type case DataType::Type::kUint64: case DataType::Type::kVoid: LOG(FATAL) << "Unexpected parameter type " << type; - break; + UNREACHABLE(); } return Location::NoLocation(); } @@ -2482,6 +2559,14 @@ void InstructionCodeGeneratorX86_64::VisitInvokePolymorphic(HInvokePolymorphic* codegen_->GenerateInvokePolymorphicCall(invoke); } +void LocationsBuilderX86_64::VisitInvokeCustom(HInvokeCustom* invoke) { + HandleInvoke(invoke); +} + +void InstructionCodeGeneratorX86_64::VisitInvokeCustom(HInvokeCustom* invoke) { + codegen_->GenerateInvokeCustomCall(invoke); +} + void LocationsBuilderX86_64::VisitNeg(HNeg* neg) { LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(neg, LocationSummary::kNoCall); @@ -3474,7 +3559,40 @@ void InstructionCodeGeneratorX86_64::DivRemOneOrMinusOne(HBinaryOperation* instr LOG(FATAL) << "Unexpected type for div by (-)1 " << instruction->GetResultType(); } } +void InstructionCodeGeneratorX86_64::RemByPowerOfTwo(HRem* instruction) { + LocationSummary* locations = instruction->GetLocations(); + Location second = locations->InAt(1); + CpuRegister out = locations->Out().AsRegister<CpuRegister>(); + CpuRegister numerator = locations->InAt(0).AsRegister<CpuRegister>(); + int64_t imm = Int64FromConstant(second.GetConstant()); + DCHECK(IsPowerOfTwo(AbsOrMin(imm))); + uint64_t abs_imm = AbsOrMin(imm); + CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>(); + if (instruction->GetResultType() == DataType::Type::kInt32) { + NearLabel done; + __ movl(out, numerator); + __ andl(out, Immediate(abs_imm-1)); + __ j(Condition::kZero, &done); + __ leal(tmp, Address(out, static_cast<int32_t>(~(abs_imm-1)))); + __ testl(numerator, numerator); + __ cmov(Condition::kLess, out, tmp, false); + __ Bind(&done); + + } else { + DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64); + codegen_->Load64BitValue(tmp, abs_imm - 1); + NearLabel done; + __ movq(out, numerator); + __ andq(out, tmp); + __ j(Condition::kZero, &done); + __ movq(tmp, numerator); + __ sarq(tmp, Immediate(63)); + __ shlq(tmp, Immediate(WhichPowerOf2(abs_imm))); + __ orq(out, tmp); + __ Bind(&done); + } +} void InstructionCodeGeneratorX86_64::DivByPowerOfTwo(HDiv* instruction) { LocationSummary* locations = instruction->GetLocations(); Location second = locations->InAt(1); @@ -3489,9 +3607,17 @@ void InstructionCodeGeneratorX86_64::DivByPowerOfTwo(HDiv* instruction) { CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>(); if (instruction->GetResultType() == DataType::Type::kInt32) { - __ leal(tmp, Address(numerator, abs_imm - 1)); - __ testl(numerator, numerator); - __ cmov(kGreaterEqual, tmp, numerator); + // When denominator is equal to 2, we can add signed bit and numerator to tmp. + // Below we are using addl instruction instead of cmov which give us 1 cycle benefit. + if (abs_imm == 2) { + __ leal(tmp, Address(numerator, 0)); + __ shrl(tmp, Immediate(31)); + __ addl(tmp, numerator); + } else { + __ leal(tmp, Address(numerator, abs_imm - 1)); + __ testl(numerator, numerator); + __ cmov(kGreaterEqual, tmp, numerator); + } int shift = CTZ(imm); __ sarl(tmp, Immediate(shift)); @@ -3503,11 +3629,16 @@ void InstructionCodeGeneratorX86_64::DivByPowerOfTwo(HDiv* instruction) { } else { DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64); CpuRegister rdx = locations->GetTemp(0).AsRegister<CpuRegister>(); - - codegen_->Load64BitValue(rdx, abs_imm - 1); - __ addq(rdx, numerator); - __ testq(numerator, numerator); - __ cmov(kGreaterEqual, rdx, numerator); + if (abs_imm == 2) { + __ movq(rdx, numerator); + __ shrq(rdx, Immediate(63)); + __ addq(rdx, numerator); + } else { + codegen_->Load64BitValue(rdx, abs_imm - 1); + __ addq(rdx, numerator); + __ testq(numerator, numerator); + __ cmov(kGreaterEqual, rdx, numerator); + } int shift = CTZ(imm); __ sarq(rdx, Immediate(shift)); @@ -3547,7 +3678,7 @@ void InstructionCodeGeneratorX86_64::GenerateDivRemWithAnyConstant(HBinaryOperat if (instruction->GetResultType() == DataType::Type::kInt32) { int imm = second.GetConstant()->AsIntConstant()->GetValue(); - CalculateMagicAndShiftForDivRem(imm, false /* is_long */, &magic, &shift); + CalculateMagicAndShiftForDivRem(imm, false /* is_long= */, &magic, &shift); __ movl(numerator, eax); @@ -3584,7 +3715,7 @@ void InstructionCodeGeneratorX86_64::GenerateDivRemWithAnyConstant(HBinaryOperat CpuRegister rax = eax; CpuRegister rdx = edx; - CalculateMagicAndShiftForDivRem(imm, true /* is_long */, &magic, &shift); + CalculateMagicAndShiftForDivRem(imm, true /* is_long= */, &magic, &shift); // Save the numerator. __ movq(numerator, rax); @@ -3651,8 +3782,12 @@ void InstructionCodeGeneratorX86_64::GenerateDivRemIntegral(HBinaryOperation* in // Do not generate anything. DivZeroCheck would prevent any code to be executed. } else if (imm == 1 || imm == -1) { DivRemOneOrMinusOne(instruction); - } else if (instruction->IsDiv() && IsPowerOfTwo(AbsOrMin(imm))) { - DivByPowerOfTwo(instruction->AsDiv()); + } else if (IsPowerOfTwo(AbsOrMin(imm))) { + if (is_div) { + DivByPowerOfTwo(instruction->AsDiv()); + } else { + RemByPowerOfTwo(instruction->AsRem()); + } } else { DCHECK(imm <= -2 || imm >= 2); GenerateDivRemWithAnyConstant(instruction); @@ -3821,6 +3956,241 @@ void InstructionCodeGeneratorX86_64::VisitRem(HRem* rem) { } } +static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) { + LocationSummary* locations = new (allocator) LocationSummary(minmax); + switch (minmax->GetResultType()) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::SameAsFirstInput()); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + // The following is sub-optimal, but all we can do for now. It would be fine to also accept + // the second input to be the output (we can simply swap inputs). + locations->SetOut(Location::SameAsFirstInput()); + break; + default: + LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType(); + } +} + +void InstructionCodeGeneratorX86_64::GenerateMinMaxInt(LocationSummary* locations, + bool is_min, + DataType::Type type) { + Location op1_loc = locations->InAt(0); + Location op2_loc = locations->InAt(1); + + // Shortcut for same input locations. + if (op1_loc.Equals(op2_loc)) { + // Can return immediately, as op1_loc == out_loc. + // Note: if we ever support separate registers, e.g., output into memory, we need to check for + // a copy here. + DCHECK(locations->Out().Equals(op1_loc)); + return; + } + + CpuRegister out = locations->Out().AsRegister<CpuRegister>(); + CpuRegister op2 = op2_loc.AsRegister<CpuRegister>(); + + // (out := op1) + // out <=? op2 + // if out is min jmp done + // out := op2 + // done: + + if (type == DataType::Type::kInt64) { + __ cmpq(out, op2); + __ cmov(is_min ? Condition::kGreater : Condition::kLess, out, op2, /*is64bit*/ true); + } else { + DCHECK_EQ(type, DataType::Type::kInt32); + __ cmpl(out, op2); + __ cmov(is_min ? Condition::kGreater : Condition::kLess, out, op2, /*is64bit*/ false); + } +} + +void InstructionCodeGeneratorX86_64::GenerateMinMaxFP(LocationSummary* locations, + bool is_min, + DataType::Type type) { + Location op1_loc = locations->InAt(0); + Location op2_loc = locations->InAt(1); + Location out_loc = locations->Out(); + XmmRegister out = out_loc.AsFpuRegister<XmmRegister>(); + + // Shortcut for same input locations. + if (op1_loc.Equals(op2_loc)) { + DCHECK(out_loc.Equals(op1_loc)); + return; + } + + // (out := op1) + // out <=? op2 + // if Nan jmp Nan_label + // if out is min jmp done + // if op2 is min jmp op2_label + // handle -0/+0 + // jmp done + // Nan_label: + // out := NaN + // op2_label: + // out := op2 + // done: + // + // This removes one jmp, but needs to copy one input (op1) to out. + // + // TODO: This is straight from Quick. Make NaN an out-of-line slowpath? + + XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>(); + + NearLabel nan, done, op2_label; + if (type == DataType::Type::kFloat64) { + __ ucomisd(out, op2); + } else { + DCHECK_EQ(type, DataType::Type::kFloat32); + __ ucomiss(out, op2); + } + + __ j(Condition::kParityEven, &nan); + + __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label); + __ j(is_min ? Condition::kBelow : Condition::kAbove, &done); + + // Handle 0.0/-0.0. + if (is_min) { + if (type == DataType::Type::kFloat64) { + __ orpd(out, op2); + } else { + __ orps(out, op2); + } + } else { + if (type == DataType::Type::kFloat64) { + __ andpd(out, op2); + } else { + __ andps(out, op2); + } + } + __ jmp(&done); + + // NaN handling. + __ Bind(&nan); + if (type == DataType::Type::kFloat64) { + __ movsd(out, codegen_->LiteralInt64Address(INT64_C(0x7FF8000000000000))); + } else { + __ movss(out, codegen_->LiteralInt32Address(INT32_C(0x7FC00000))); + } + __ jmp(&done); + + // out := op2; + __ Bind(&op2_label); + if (type == DataType::Type::kFloat64) { + __ movsd(out, op2); + } else { + __ movss(out, op2); + } + + // Done. + __ Bind(&done); +} + +void InstructionCodeGeneratorX86_64::GenerateMinMax(HBinaryOperation* minmax, bool is_min) { + DataType::Type type = minmax->GetResultType(); + switch (type) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: + GenerateMinMaxInt(minmax->GetLocations(), is_min, type); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + GenerateMinMaxFP(minmax->GetLocations(), is_min, type); + break; + default: + LOG(FATAL) << "Unexpected type for HMinMax " << type; + } +} + +void LocationsBuilderX86_64::VisitMin(HMin* min) { + CreateMinMaxLocations(GetGraph()->GetAllocator(), min); +} + +void InstructionCodeGeneratorX86_64::VisitMin(HMin* min) { + GenerateMinMax(min, /*is_min*/ true); +} + +void LocationsBuilderX86_64::VisitMax(HMax* max) { + CreateMinMaxLocations(GetGraph()->GetAllocator(), max); +} + +void InstructionCodeGeneratorX86_64::VisitMax(HMax* max) { + GenerateMinMax(max, /*is_min*/ false); +} + +void LocationsBuilderX86_64::VisitAbs(HAbs* abs) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs); + switch (abs->GetResultType()) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::SameAsFirstInput()); + locations->AddTemp(Location::RequiresRegister()); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::SameAsFirstInput()); + locations->AddTemp(Location::RequiresFpuRegister()); + break; + default: + LOG(FATAL) << "Unexpected type for HAbs " << abs->GetResultType(); + } +} + +void InstructionCodeGeneratorX86_64::VisitAbs(HAbs* abs) { + LocationSummary* locations = abs->GetLocations(); + switch (abs->GetResultType()) { + case DataType::Type::kInt32: { + CpuRegister out = locations->Out().AsRegister<CpuRegister>(); + CpuRegister mask = locations->GetTemp(0).AsRegister<CpuRegister>(); + // Create mask. + __ movl(mask, out); + __ sarl(mask, Immediate(31)); + // Add mask. + __ addl(out, mask); + __ xorl(out, mask); + break; + } + case DataType::Type::kInt64: { + CpuRegister out = locations->Out().AsRegister<CpuRegister>(); + CpuRegister mask = locations->GetTemp(0).AsRegister<CpuRegister>(); + // Create mask. + __ movq(mask, out); + __ sarq(mask, Immediate(63)); + // Add mask. + __ addq(out, mask); + __ xorq(out, mask); + break; + } + case DataType::Type::kFloat32: { + XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>(); + XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); + __ movss(mask, codegen_->LiteralInt32Address(INT32_C(0x7FFFFFFF))); + __ andps(out, mask); + break; + } + case DataType::Type::kFloat64: { + XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>(); + XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); + __ movsd(mask, codegen_->LiteralInt64Address(INT64_C(0x7FFFFFFFFFFFFFFF))); + __ andpd(out, mask); + break; + } + default: + LOG(FATAL) << "Unexpected type for HAbs " << abs->GetResultType(); + } +} + void LocationsBuilderX86_64::VisitDivZeroCheck(HDivZeroCheck* instruction) { LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction); locations->SetInAt(0, Location::Any()); @@ -4030,29 +4400,14 @@ void LocationsBuilderX86_64::VisitNewInstance(HNewInstance* instruction) { LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( instruction, LocationSummary::kCallOnMainOnly); InvokeRuntimeCallingConvention calling_convention; - if (instruction->IsStringAlloc()) { - locations->AddTemp(Location::RegisterLocation(kMethodRegisterArgument)); - } else { - locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - } + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); locations->SetOut(Location::RegisterLocation(RAX)); } void InstructionCodeGeneratorX86_64::VisitNewInstance(HNewInstance* instruction) { - // Note: if heap poisoning is enabled, the entry point takes cares - // of poisoning the reference. - if (instruction->IsStringAlloc()) { - // String is allocated through StringFactory. Call NewEmptyString entry point. - CpuRegister temp = instruction->GetLocations()->GetTemp(0).AsRegister<CpuRegister>(); - MemberOffset code_offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86_64PointerSize); - __ gs()->movq(temp, Address::Absolute(QUICK_ENTRY_POINT(pNewEmptyString), /* no_rip */ true)); - __ call(Address(temp, code_offset.SizeValue())); - codegen_->RecordPcInfo(instruction, instruction->GetDexPc()); - } else { - codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc()); - CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); - DCHECK(!codegen_->IsLeafMethod()); - } + codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc()); + CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); + DCHECK(!codegen_->IsLeafMethod()); } void LocationsBuilderX86_64::VisitNewArray(HNewArray* instruction) { @@ -4065,10 +4420,8 @@ void LocationsBuilderX86_64::VisitNewArray(HNewArray* instruction) { } void InstructionCodeGeneratorX86_64::VisitNewArray(HNewArray* instruction) { - // Note: if heap poisoning is enabled, the entry point takes cares - // of poisoning the reference. - QuickEntrypointEnum entrypoint = - CodeGenerator::GetArrayAllocationEntrypoint(instruction->GetLoadClass()->GetClass()); + // Note: if heap poisoning is enabled, the entry point takes care of poisoning the reference. + QuickEntrypointEnum entrypoint = CodeGenerator::GetArrayAllocationEntrypoint(instruction); codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc()); CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>(); DCHECK(!codegen_->IsLeafMethod()); @@ -4200,7 +4553,7 @@ void CodeGeneratorX86_64::GenerateMemoryBarrier(MemBarrierKind kind) { } case MemBarrierKind::kNTStoreStore: // Non-Temporal Store/Store needs an explicit fence. - MemoryFence(/* non-temporal */ true); + MemoryFence(/* non-temporal= */ true); break; } } @@ -4277,7 +4630,7 @@ void InstructionCodeGeneratorX86_64::HandleFieldGet(HInstruction* instruction, // Note that a potential implicit null check is handled in this // CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier call. codegen_->GenerateFieldLoadWithBakerReadBarrier( - instruction, out, base, offset, /* needs_null_check */ true); + instruction, out, base, offset, /* needs_null_check= */ true); if (is_volatile) { codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); } @@ -4732,7 +5085,7 @@ void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) { // Note that a potential implicit null check is handled in this // CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier call. codegen_->GenerateArrayLoadWithBakerReadBarrier( - instruction, out_loc, obj, data_offset, index, /* needs_null_check */ true); + instruction, out_loc, obj, data_offset, index, /* needs_null_check= */ true); } else { CpuRegister out = out_loc.AsRegister<CpuRegister>(); __ movl(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_4, data_offset)); @@ -5130,10 +5483,26 @@ void CodeGeneratorX86_64::MarkGCCard(CpuRegister temp, __ testl(value, value); __ j(kEqual, &is_null); } + // Load the address of the card table into `card`. __ gs()->movq(card, Address::Absolute(Thread::CardTableOffset<kX86_64PointerSize>().Int32Value(), - /* no_rip */ true)); + /* no_rip= */ true)); + // Calculate the offset (in the card table) of the card corresponding to + // `object`. __ movq(temp, object); __ shrq(temp, Immediate(gc::accounting::CardTable::kCardShift)); + // Write the `art::gc::accounting::CardTable::kCardDirty` value into the + // `object`'s card. + // + // Register `card` contains the address of the card table. Note that the card + // table's base is biased during its creation so that it always starts at an + // address whose least-significant byte is equal to `kCardDirty` (see + // art::gc::accounting::CardTable::Create). Therefore the MOVB instruction + // below writes the `kCardDirty` (byte) value into the `object`'s card + // (located at `card + object >> kCardShift`). + // + // This dual use of the value in register `card` (1. to calculate the location + // of the card to mark; and 2. to load the `kCardDirty` value) saves a load + // (no need to explicitly load `kCardDirty` as an immediate value). __ movb(Address(temp, card, TIMES_1, 0), card); if (value_can_be_null) { __ Bind(&is_null); @@ -5196,7 +5565,7 @@ void InstructionCodeGeneratorX86_64::GenerateSuspendCheck(HSuspendCheck* instruc } __ gs()->cmpw(Address::Absolute(Thread::ThreadFlagsOffset<kX86_64PointerSize>().Int32Value(), - /* no_rip */ true), + /* no_rip= */ true), Immediate(0)); if (successor == nullptr) { __ j(kNotEqual, slow_path->GetEntryLabel()); @@ -5462,6 +5831,26 @@ void InstructionCodeGeneratorX86_64::GenerateClassInitializationCheck( // No need for memory fence, thanks to the x86-64 memory model. } +void InstructionCodeGeneratorX86_64::GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, + CpuRegister temp) { + uint32_t path_to_root = check->GetBitstringPathToRoot(); + uint32_t mask = check->GetBitstringMask(); + DCHECK(IsPowerOfTwo(mask + 1)); + size_t mask_bits = WhichPowerOf2(mask + 1); + + if (mask_bits == 16u) { + // Compare the bitstring in memory. + __ cmpw(Address(temp, mirror::Class::StatusOffset()), Immediate(path_to_root)); + } else { + // /* uint32_t */ temp = temp->status_ + __ movl(temp, Address(temp, mirror::Class::StatusOffset())); + // Compare the bitstring bits using SUB. + __ subl(temp, Immediate(path_to_root)); + // Shift out bits that do not contribute to the comparison. + __ shll(temp, Immediate(32u - mask_bits)); + } +} + HLoadClass::LoadKind CodeGeneratorX86_64::GetSupportedLoadClassKind( HLoadClass::LoadKind desired_class_load_kind) { switch (desired_class_load_kind) { @@ -5471,14 +5860,14 @@ HLoadClass::LoadKind CodeGeneratorX86_64::GetSupportedLoadClassKind( case HLoadClass::LoadKind::kReferrersClass: break; case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: - case HLoadClass::LoadKind::kBootImageClassTable: + case HLoadClass::LoadKind::kBootImageRelRo: case HLoadClass::LoadKind::kBssEntry: DCHECK(!Runtime::Current()->UseJitCompilation()); break; + case HLoadClass::LoadKind::kJitBootImageAddress: case HLoadClass::LoadKind::kJitTableAddress: DCHECK(Runtime::Current()->UseJitCompilation()); break; - case HLoadClass::LoadKind::kBootImageAddress: case HLoadClass::LoadKind::kRuntimeCall: break; } @@ -5513,10 +5902,7 @@ void LocationsBuilderX86_64::VisitLoadClass(HLoadClass* cls) { if (load_kind == HLoadClass::LoadKind::kBssEntry) { if (!kUseReadBarrier || kUseBakerReadBarrier) { // Rely on the type resolution and/or initialization to save everything. - // Custom calling convention: RAX serves as both input and output. - RegisterSet caller_saves = RegisterSet::Empty(); - caller_saves.Add(Location::RegisterLocation(RAX)); - locations->SetCustomSlowPathCallerSaves(caller_saves); + locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves()); } else { // For non-Baker read barrier we have a temp-clobbering call. } @@ -5561,48 +5947,41 @@ void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) NO_THREAD_S cls, out_loc, Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()), - /* fixup_label */ nullptr, + /* fixup_label= */ nullptr, read_barrier_option); break; } case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: DCHECK(codegen_->GetCompilerOptions().IsBootImage()); DCHECK_EQ(read_barrier_option, kWithoutReadBarrier); - __ leal(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false)); + __ leal(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip= */ false)); codegen_->RecordBootImageTypePatch(cls); break; - case HLoadClass::LoadKind::kBootImageAddress: { - DCHECK_EQ(read_barrier_option, kWithoutReadBarrier); - uint32_t address = dchecked_integral_cast<uint32_t>( - reinterpret_cast<uintptr_t>(cls->GetClass().Get())); - DCHECK_NE(address, 0u); - __ movl(out, Immediate(static_cast<int32_t>(address))); // Zero-extended. - break; - } - case HLoadClass::LoadKind::kBootImageClassTable: { + case HLoadClass::LoadKind::kBootImageRelRo: { DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); - __ movl(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false)); - codegen_->RecordBootImageTypePatch(cls); - // Extract the reference from the slot data, i.e. clear the hash bits. - int32_t masked_hash = ClassTable::TableSlot::MaskHash( - ComputeModifiedUtf8Hash(cls->GetDexFile().StringByTypeIdx(cls->GetTypeIndex()))); - if (masked_hash != 0) { - __ subl(out, Immediate(masked_hash)); - } + __ movl(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip= */ false)); + codegen_->RecordBootImageRelRoPatch(codegen_->GetBootImageOffset(cls)); break; } case HLoadClass::LoadKind::kBssEntry: { Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, - /* no_rip */ false); + /* no_rip= */ false); Label* fixup_label = codegen_->NewTypeBssEntryPatch(cls); // /* GcRoot<mirror::Class> */ out = *address /* PC-relative */ GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option); generate_null_check = true; break; } + case HLoadClass::LoadKind::kJitBootImageAddress: { + DCHECK_EQ(read_barrier_option, kWithoutReadBarrier); + uint32_t address = reinterpret_cast32<uint32_t>(cls->GetClass().Get()); + DCHECK_NE(address, 0u); + __ movl(out, Immediate(static_cast<int32_t>(address))); // Zero-extended. + break; + } case HLoadClass::LoadKind::kJitTableAddress: { Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, - /* no_rip */ true); + /* no_rip= */ true); Label* fixup_label = codegen_->NewJitRootClassPatch(cls->GetDexFile(), cls->GetTypeIndex(), cls->GetClass()); // /* GcRoot<mirror::Class> */ out = *address @@ -5616,8 +5995,8 @@ void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) NO_THREAD_S if (generate_null_check || cls->MustGenerateClinitCheck()) { DCHECK(cls->CanCallRuntime()); - SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86_64( - cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck()); + SlowPathCode* slow_path = + new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86_64(cls, cls); codegen_->AddSlowPath(slow_path); if (generate_null_check) { __ testl(out, out); @@ -5638,12 +6017,34 @@ void LocationsBuilderX86_64::VisitClinitCheck(HClinitCheck* check) { if (check->HasUses()) { locations->SetOut(Location::SameAsFirstInput()); } + // Rely on the type initialization to save everything we need. + locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves()); +} + +void LocationsBuilderX86_64::VisitLoadMethodHandle(HLoadMethodHandle* load) { + // Custom calling convention: RAX serves as both input and output. + Location location = Location::RegisterLocation(RAX); + CodeGenerator::CreateLoadMethodHandleRuntimeCallLocationSummary(load, location, location); +} + +void InstructionCodeGeneratorX86_64::VisitLoadMethodHandle(HLoadMethodHandle* load) { + codegen_->GenerateLoadMethodHandleRuntimeCall(load); +} + +void LocationsBuilderX86_64::VisitLoadMethodType(HLoadMethodType* load) { + // Custom calling convention: RAX serves as both input and output. + Location location = Location::RegisterLocation(RAX); + CodeGenerator::CreateLoadMethodTypeRuntimeCallLocationSummary(load, location, location); +} + +void InstructionCodeGeneratorX86_64::VisitLoadMethodType(HLoadMethodType* load) { + codegen_->GenerateLoadMethodTypeRuntimeCall(load); } void InstructionCodeGeneratorX86_64::VisitClinitCheck(HClinitCheck* check) { // We assume the class to not be null. - SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86_64( - check->GetLoadClass(), check, check->GetDexPc(), true); + SlowPathCode* slow_path = + new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86_64(check->GetLoadClass(), check); codegen_->AddSlowPath(slow_path); GenerateClassInitializationCheck(slow_path, check->GetLocations()->InAt(0).AsRegister<CpuRegister>()); @@ -5653,14 +6054,14 @@ HLoadString::LoadKind CodeGeneratorX86_64::GetSupportedLoadStringKind( HLoadString::LoadKind desired_string_load_kind) { switch (desired_string_load_kind) { case HLoadString::LoadKind::kBootImageLinkTimePcRelative: - case HLoadString::LoadKind::kBootImageInternTable: + case HLoadString::LoadKind::kBootImageRelRo: case HLoadString::LoadKind::kBssEntry: DCHECK(!Runtime::Current()->UseJitCompilation()); break; + case HLoadString::LoadKind::kJitBootImageAddress: case HLoadString::LoadKind::kJitTableAddress: DCHECK(Runtime::Current()->UseJitCompilation()); break; - case HLoadString::LoadKind::kBootImageAddress: case HLoadString::LoadKind::kRuntimeCall: break; } @@ -5677,10 +6078,7 @@ void LocationsBuilderX86_64::VisitLoadString(HLoadString* load) { if (load->GetLoadKind() == HLoadString::LoadKind::kBssEntry) { if (!kUseReadBarrier || kUseBakerReadBarrier) { // Rely on the pResolveString to save everything. - // Custom calling convention: RAX serves as both input and output. - RegisterSet caller_saves = RegisterSet::Empty(); - caller_saves.Add(Location::RegisterLocation(RAX)); - locations->SetCustomSlowPathCallerSaves(caller_saves); + locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves()); } else { // For non-Baker read barrier we have a temp-clobbering call. } @@ -5708,26 +6106,19 @@ void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) NO_THREA switch (load->GetLoadKind()) { case HLoadString::LoadKind::kBootImageLinkTimePcRelative: { DCHECK(codegen_->GetCompilerOptions().IsBootImage()); - __ leal(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false)); + __ leal(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip= */ false)); codegen_->RecordBootImageStringPatch(load); return; } - case HLoadString::LoadKind::kBootImageAddress: { - uint32_t address = dchecked_integral_cast<uint32_t>( - reinterpret_cast<uintptr_t>(load->GetString().Get())); - DCHECK_NE(address, 0u); - __ movl(out, Immediate(static_cast<int32_t>(address))); // Zero-extended. - return; - } - case HLoadString::LoadKind::kBootImageInternTable: { + case HLoadString::LoadKind::kBootImageRelRo: { DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); - __ movl(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false)); - codegen_->RecordBootImageStringPatch(load); + __ movl(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip= */ false)); + codegen_->RecordBootImageRelRoPatch(codegen_->GetBootImageOffset(load)); return; } case HLoadString::LoadKind::kBssEntry: { Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, - /* no_rip */ false); + /* no_rip= */ false); Label* fixup_label = codegen_->NewStringBssEntryPatch(load); // /* GcRoot<mirror::Class> */ out = *address /* PC-relative */ GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kCompilerReadBarrierOption); @@ -5738,9 +6129,15 @@ void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) NO_THREA __ Bind(slow_path->GetExitLabel()); return; } + case HLoadString::LoadKind::kJitBootImageAddress: { + uint32_t address = reinterpret_cast32<uint32_t>(load->GetString().Get()); + DCHECK_NE(address, 0u); + __ movl(out, Immediate(static_cast<int32_t>(address))); // Zero-extended. + return; + } case HLoadString::LoadKind::kJitTableAddress: { Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, - /* no_rip */ true); + /* no_rip= */ true); Label* fixup_label = codegen_->NewJitRootStringPatch( load->GetDexFile(), load->GetStringIndex(), load->GetString()); // /* GcRoot<mirror::String> */ out = *address @@ -5762,7 +6159,7 @@ void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) NO_THREA static Address GetExceptionTlsAddress() { return Address::Absolute(Thread::ExceptionOffset<kX86_64PointerSize>().Int32Value(), - /* no_rip */ true); + /* no_rip= */ true); } void LocationsBuilderX86_64::VisitLoadException(HLoadException* load) { @@ -5795,24 +6192,26 @@ void InstructionCodeGeneratorX86_64::VisitThrow(HThrow* instruction) { CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>(); } -static bool CheckCastTypeCheckNeedsATemporary(TypeCheckKind type_check_kind) { - if (type_check_kind == TypeCheckKind::kInterfaceCheck) { - // We need a temporary for holding the iftable length. - return true; - } - return kEmitCompilerReadBarrier && +// Temp is used for read barrier. +static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) { + if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier && (type_check_kind == TypeCheckKind::kAbstractClassCheck || type_check_kind == TypeCheckKind::kClassHierarchyCheck || - type_check_kind == TypeCheckKind::kArrayObjectCheck); + type_check_kind == TypeCheckKind::kArrayObjectCheck)) { + return 1; + } + return 0; } -static bool InstanceOfTypeCheckNeedsATemporary(TypeCheckKind type_check_kind) { - return kEmitCompilerReadBarrier && - !kUseBakerReadBarrier && - (type_check_kind == TypeCheckKind::kAbstractClassCheck || - type_check_kind == TypeCheckKind::kClassHierarchyCheck || - type_check_kind == TypeCheckKind::kArrayObjectCheck); +// Interface case has 2 temps, one for holding the number of interfaces, one for the current +// interface pointer, the current interface is compared in memory. +// The other checks have one temp for loading the object's class. +static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) { + if (type_check_kind == TypeCheckKind::kInterfaceCheck) { + return 2; + } + return 1 + NumberOfInstanceOfTemps(type_check_kind); } void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) { @@ -5834,6 +6233,8 @@ void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kInterfaceCheck: call_kind = LocationSummary::kCallOnSlowPath; break; + case TypeCheckKind::kBitstringCheck: + break; } LocationSummary* locations = @@ -5842,14 +6243,16 @@ void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) { locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. } locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::Any()); + if (type_check_kind == TypeCheckKind::kBitstringCheck) { + locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); + locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); + locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); + } else { + locations->SetInAt(1, Location::Any()); + } // Note that TypeCheckSlowPathX86_64 uses this "out" register too. locations->SetOut(Location::RequiresRegister()); - // When read barriers are enabled, we need a temporary register for - // some cases. - if (InstanceOfTypeCheckNeedsATemporary(type_check_kind)) { - locations->AddTemp(Location::RequiresRegister()); - } + locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind)); } void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { @@ -5860,9 +6263,9 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { Location cls = locations->InAt(1); Location out_loc = locations->Out(); CpuRegister out = out_loc.AsRegister<CpuRegister>(); - Location maybe_temp_loc = InstanceOfTypeCheckNeedsATemporary(type_check_kind) ? - locations->GetTemp(0) : - Location::NoLocation(); + const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind); + DCHECK_LE(num_temps, 1u); + Location maybe_temp_loc = (num_temps >= 1u) ? locations->GetTemp(0) : Location::NoLocation(); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); @@ -6031,7 +6434,7 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { } DCHECK(locations->OnlyCallsOnSlowPath()); slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86_64( - instruction, /* is_fatal */ false); + instruction, /* is_fatal= */ false); codegen_->AddSlowPath(slow_path); __ j(kNotEqual, slow_path->GetEntryLabel()); __ movl(out, Immediate(1)); @@ -6063,7 +6466,7 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { // This should also be beneficial for the other cases above. DCHECK(locations->OnlyCallsOnSlowPath()); slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86_64( - instruction, /* is_fatal */ false); + instruction, /* is_fatal= */ false); codegen_->AddSlowPath(slow_path); __ jmp(slow_path->GetEntryLabel()); if (zero.IsLinked()) { @@ -6071,6 +6474,27 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { } break; } + + case TypeCheckKind::kBitstringCheck: { + // /* HeapReference<Class> */ temp = obj->klass_ + GenerateReferenceLoadTwoRegisters(instruction, + out_loc, + obj_loc, + class_offset, + kWithoutReadBarrier); + + GenerateBitstringTypeCheckCompare(instruction, out); + if (zero.IsLinked()) { + __ j(kNotEqual, &zero); + __ movl(out, Immediate(1)); + __ jmp(&done); + } else { + __ setcc(kEqual, out); + // setcc only sets the low byte. + __ andl(out, Immediate(1)); + } + break; + } } if (zero.IsLinked()) { @@ -6097,17 +6521,15 @@ void LocationsBuilderX86_64::VisitCheckCast(HCheckCast* instruction) { // Require a register for the interface check since there is a loop that compares the class to // a memory address. locations->SetInAt(1, Location::RequiresRegister()); + } else if (type_check_kind == TypeCheckKind::kBitstringCheck) { + locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); + locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); + locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); } else { locations->SetInAt(1, Location::Any()); } - - // Note that TypeCheckSlowPathX86_64 uses this "temp" register too. - locations->AddTemp(Location::RequiresRegister()); - // When read barriers are enabled, we need an additional temporary - // register for some cases. - if (CheckCastTypeCheckNeedsATemporary(type_check_kind)) { - locations->AddTemp(Location::RequiresRegister()); - } + // Add temps for read barriers and other uses. One is used by TypeCheckSlowPathX86. + locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind)); } void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { @@ -6118,9 +6540,10 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { Location cls = locations->InAt(1); Location temp_loc = locations->GetTemp(0); CpuRegister temp = temp_loc.AsRegister<CpuRegister>(); - Location maybe_temp2_loc = CheckCastTypeCheckNeedsATemporary(type_check_kind) ? - locations->GetTemp(1) : - Location::NoLocation(); + const size_t num_temps = NumberOfCheckCastTemps(type_check_kind); + DCHECK_GE(num_temps, 1u); + DCHECK_LE(num_temps, 2u); + Location maybe_temp2_loc = (num_temps >= 2u) ? locations->GetTemp(1) : Location::NoLocation(); const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); @@ -6283,7 +6706,7 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { break; } - case TypeCheckKind::kInterfaceCheck: + case TypeCheckKind::kInterfaceCheck: { // Fast path for the interface check. Try to avoid read barriers to improve the fast path. // We can not get false positives by doing this. // /* HeapReference<Class> */ temp = obj->klass_ @@ -6319,6 +6742,20 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { // If `cls` was poisoned above, unpoison it. __ MaybeUnpoisonHeapReference(cls.AsRegister<CpuRegister>()); break; + } + + case TypeCheckKind::kBitstringCheck: { + // /* HeapReference<Class> */ temp = obj->klass_ + GenerateReferenceLoadTwoRegisters(instruction, + temp_loc, + obj_loc, + class_offset, + kWithoutReadBarrier); + + GenerateBitstringTypeCheckCompare(instruction, temp); + __ j(kNotEqual, type_check_slow_path->GetEntryLabel()); + break; + } } if (done.IsLinked()) { @@ -6346,6 +6783,48 @@ void InstructionCodeGeneratorX86_64::VisitMonitorOperation(HMonitorOperation* in } } +void LocationsBuilderX86_64::VisitX86AndNot(HX86AndNot* instruction) { + DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2()); + DCHECK(DataType::IsIntOrLongType(instruction->GetType())) << instruction->GetType(); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); + locations->SetInAt(0, Location::RequiresRegister()); + // There is no immediate variant of negated bitwise and in X86. + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); +} + +void LocationsBuilderX86_64::VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit* instruction) { + DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2()); + DCHECK(DataType::IsIntOrLongType(instruction->GetType())) << instruction->GetType(); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); +} + +void InstructionCodeGeneratorX86_64::VisitX86AndNot(HX86AndNot* instruction) { + LocationSummary* locations = instruction->GetLocations(); + Location first = locations->InAt(0); + Location second = locations->InAt(1); + Location dest = locations->Out(); + __ andn(dest.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>()); +} + +void InstructionCodeGeneratorX86_64::VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit* instruction) { + LocationSummary* locations = instruction->GetLocations(); + Location src = locations->InAt(0); + Location dest = locations->Out(); + switch (instruction->GetOpKind()) { + case HInstruction::kAnd: + __ blsr(dest.AsRegister<CpuRegister>(), src.AsRegister<CpuRegister>()); + break; + case HInstruction::kXor: + __ blsmsk(dest.AsRegister<CpuRegister>(), src.AsRegister<CpuRegister>()); + break; + default: + LOG(FATAL) << "Unreachable"; + } +} + void LocationsBuilderX86_64::VisitAnd(HAnd* instruction) { HandleBitwiseOperation(instruction); } void LocationsBuilderX86_64::VisitOr(HOr* instruction) { HandleBitwiseOperation(instruction); } void LocationsBuilderX86_64::VisitXor(HXor* instruction) { HandleBitwiseOperation(instruction); } @@ -6474,7 +6953,7 @@ void InstructionCodeGeneratorX86_64::GenerateReferenceLoadOneRegister( // Load with fast path based Baker's read barrier. // /* HeapReference<Object> */ out = *(out + offset) codegen_->GenerateFieldLoadWithBakerReadBarrier( - instruction, out, out_reg, offset, /* needs_null_check */ false); + instruction, out, out_reg, offset, /* needs_null_check= */ false); } else { // Load with slow path based read barrier. // Save the value of `out` into `maybe_temp` before overwriting it @@ -6508,7 +6987,7 @@ void InstructionCodeGeneratorX86_64::GenerateReferenceLoadTwoRegisters( // Load with fast path based Baker's read barrier. // /* HeapReference<Object> */ out = *(obj + offset) codegen_->GenerateFieldLoadWithBakerReadBarrier( - instruction, out, obj_reg, offset, /* needs_null_check */ false); + instruction, out, obj_reg, offset, /* needs_null_check= */ false); } else { // Load with slow path based read barrier. // /* HeapReference<Object> */ out = *(obj + offset) @@ -6557,13 +7036,13 @@ void InstructionCodeGeneratorX86_64::GenerateGcRootFieldLoad( // Slow path marking the GC root `root`. SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) ReadBarrierMarkSlowPathX86_64( - instruction, root, /* unpoison_ref_before_marking */ false); + instruction, root, /* unpoison_ref_before_marking= */ false); codegen_->AddSlowPath(slow_path); // Test the `Thread::Current()->pReadBarrierMarkReg ## root.reg()` entrypoint. const int32_t entry_point_offset = Thread::ReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(root.reg()); - __ gs()->cmpl(Address::Absolute(entry_point_offset, /* no_rip */ true), Immediate(0)); + __ gs()->cmpl(Address::Absolute(entry_point_offset, /* no_rip= */ true), Immediate(0)); // The entrypoint is null when the GC is not marking. __ j(kNotEqual, slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); @@ -6660,7 +7139,7 @@ void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value(); // Given the numeric representation, it's enough to check the low bit of the rb_state. - static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0"); + static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0"); static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte; constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte; @@ -6689,10 +7168,10 @@ void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction DCHECK(temp1 != nullptr); DCHECK(temp2 != nullptr); slow_path = new (GetScopedAllocator()) ReadBarrierMarkAndUpdateFieldSlowPathX86_64( - instruction, ref, obj, src, /* unpoison_ref_before_marking */ true, *temp1, *temp2); + instruction, ref, obj, src, /* unpoison_ref_before_marking= */ true, *temp1, *temp2); } else { slow_path = new (GetScopedAllocator()) ReadBarrierMarkSlowPathX86_64( - instruction, ref, /* unpoison_ref_before_marking */ true); + instruction, ref, /* unpoison_ref_before_marking= */ true); } AddSlowPath(slow_path); @@ -7005,7 +7484,7 @@ class RIPFixup : public AssemblerFixup, public ArenaObject<kArenaAllocCodeGenera CodeGeneratorX86_64* codegen_; private: - void Process(const MemoryRegion& region, int pos) OVERRIDE { + void Process(const MemoryRegion& region, int pos) override { // Patch the correct offset for the instruction. We use the address of the // 'next' instruction, which is 'pos' (patch the 4 bytes before). int32_t constant_offset = codegen_->ConstantAreaStart() + offset_into_constant_area_; @@ -7152,7 +7631,7 @@ void CodeGeneratorX86_64::PatchJitRootUse(uint8_t* code, uint32_t code_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment; uintptr_t address = reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>); - typedef __attribute__((__aligned__(1))) uint32_t unaligned_uint32_t; + using unaligned_uint32_t __attribute__((__aligned__(1))) = uint32_t; reinterpret_cast<unaligned_uint32_t*>(code + code_offset)[0] = dchecked_integral_cast<uint32_t>(address); } diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index 1079e94dfc..f74e130702 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -83,22 +83,22 @@ class FieldAccessCallingConventionX86_64 : public FieldAccessCallingConvention { public: FieldAccessCallingConventionX86_64() {} - Location GetObjectLocation() const OVERRIDE { + Location GetObjectLocation() const override { return Location::RegisterLocation(RSI); } - Location GetFieldIndexLocation() const OVERRIDE { + Location GetFieldIndexLocation() const override { return Location::RegisterLocation(RDI); } - Location GetReturnLocation(DataType::Type type ATTRIBUTE_UNUSED) const OVERRIDE { + Location GetReturnLocation(DataType::Type type ATTRIBUTE_UNUSED) const override { return Location::RegisterLocation(RAX); } Location GetSetValueLocation(DataType::Type type ATTRIBUTE_UNUSED, bool is_instance) - const OVERRIDE { + const override { return is_instance ? Location::RegisterLocation(RDX) : Location::RegisterLocation(RSI); } - Location GetFpuLocation(DataType::Type type ATTRIBUTE_UNUSED) const OVERRIDE { + Location GetFpuLocation(DataType::Type type ATTRIBUTE_UNUSED) const override { return Location::FpuRegisterLocation(XMM0); } @@ -112,9 +112,9 @@ class InvokeDexCallingConventionVisitorX86_64 : public InvokeDexCallingConventio InvokeDexCallingConventionVisitorX86_64() {} virtual ~InvokeDexCallingConventionVisitorX86_64() {} - Location GetNextLocation(DataType::Type type) OVERRIDE; - Location GetReturnLocation(DataType::Type type) const OVERRIDE; - Location GetMethodLocation() const OVERRIDE; + Location GetNextLocation(DataType::Type type) override; + Location GetReturnLocation(DataType::Type type) const override; + Location GetMethodLocation() const override; private: InvokeDexCallingConvention calling_convention; @@ -129,10 +129,10 @@ class ParallelMoveResolverX86_64 : public ParallelMoveResolverWithSwap { ParallelMoveResolverX86_64(ArenaAllocator* allocator, CodeGeneratorX86_64* codegen) : ParallelMoveResolverWithSwap(allocator), codegen_(codegen) {} - void EmitMove(size_t index) OVERRIDE; - void EmitSwap(size_t index) OVERRIDE; - void SpillScratch(int reg) OVERRIDE; - void RestoreScratch(int reg) OVERRIDE; + void EmitMove(size_t index) override; + void EmitSwap(size_t index) override; + void SpillScratch(int reg) override; + void RestoreScratch(int reg) override; X86_64Assembler* GetAssembler() const; @@ -157,14 +157,15 @@ class LocationsBuilderX86_64 : public HGraphVisitor { : HGraphVisitor(graph), codegen_(codegen) {} #define DECLARE_VISIT_INSTRUCTION(name, super) \ - void Visit##name(H##name* instr) OVERRIDE; + void Visit##name(H##name* instr) override; FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION) FOR_EACH_CONCRETE_INSTRUCTION_X86_64(DECLARE_VISIT_INSTRUCTION) + FOR_EACH_CONCRETE_INSTRUCTION_X86_COMMON(DECLARE_VISIT_INSTRUCTION) #undef DECLARE_VISIT_INSTRUCTION - void VisitInstruction(HInstruction* instruction) OVERRIDE { + void VisitInstruction(HInstruction* instruction) override { LOG(FATAL) << "Unreachable instruction " << instruction->DebugName() << " (id " << instruction->GetId() << ")"; } @@ -188,14 +189,15 @@ class InstructionCodeGeneratorX86_64 : public InstructionCodeGenerator { InstructionCodeGeneratorX86_64(HGraph* graph, CodeGeneratorX86_64* codegen); #define DECLARE_VISIT_INSTRUCTION(name, super) \ - void Visit##name(H##name* instr) OVERRIDE; + void Visit##name(H##name* instr) override; FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION) FOR_EACH_CONCRETE_INSTRUCTION_X86_64(DECLARE_VISIT_INSTRUCTION) + FOR_EACH_CONCRETE_INSTRUCTION_X86_COMMON(DECLARE_VISIT_INSTRUCTION) #undef DECLARE_VISIT_INSTRUCTION - void VisitInstruction(HInstruction* instruction) OVERRIDE { + void VisitInstruction(HInstruction* instruction) override { LOG(FATAL) << "Unreachable instruction " << instruction->DebugName() << " (id " << instruction->GetId() << ")"; } @@ -208,10 +210,12 @@ class InstructionCodeGeneratorX86_64 : public InstructionCodeGenerator { // the suspend call. void GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor); void GenerateClassInitializationCheck(SlowPathCode* slow_path, CpuRegister class_reg); + void GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, CpuRegister temp); void HandleBitwiseOperation(HBinaryOperation* operation); void GenerateRemFP(HRem* rem); void DivRemOneOrMinusOne(HBinaryOperation* instruction); void DivByPowerOfTwo(HDiv* instruction); + void RemByPowerOfTwo(HRem* instruction); void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction); void GenerateDivRemIntegral(HBinaryOperation* instruction); void HandleCondition(HCondition* condition); @@ -222,6 +226,10 @@ class InstructionCodeGeneratorX86_64 : public InstructionCodeGenerator { bool value_can_be_null); void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); + void GenerateMinMaxInt(LocationSummary* locations, bool is_min, DataType::Type type); + void GenerateMinMaxFP(LocationSummary* locations, bool is_min, DataType::Type type); + void GenerateMinMax(HBinaryOperation* minmax, bool is_min); + // Generate a heap reference load using one register `out`: // // out <- *(out + offset) @@ -291,28 +299,27 @@ class JumpTableRIPFixup; class CodeGeneratorX86_64 : public CodeGenerator { public: CodeGeneratorX86_64(HGraph* graph, - const X86_64InstructionSetFeatures& isa_features, const CompilerOptions& compiler_options, OptimizingCompilerStats* stats = nullptr); virtual ~CodeGeneratorX86_64() {} - void GenerateFrameEntry() OVERRIDE; - void GenerateFrameExit() OVERRIDE; - void Bind(HBasicBlock* block) OVERRIDE; - void MoveConstant(Location destination, int32_t value) OVERRIDE; - void MoveLocation(Location dst, Location src, DataType::Type dst_type) OVERRIDE; - void AddLocationAsTemp(Location location, LocationSummary* locations) OVERRIDE; + void GenerateFrameEntry() override; + void GenerateFrameExit() override; + void Bind(HBasicBlock* block) override; + void MoveConstant(Location destination, int32_t value) override; + void MoveLocation(Location dst, Location src, DataType::Type dst_type) override; + void AddLocationAsTemp(Location location, LocationSummary* locations) override; - size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; - size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; - size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; - size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; + size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) override; + size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) override; + size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) override; + size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) override; // Generate code to invoke a runtime entry point. void InvokeRuntime(QuickEntrypointEnum entrypoint, HInstruction* instruction, uint32_t dex_pc, - SlowPathCode* slow_path = nullptr) OVERRIDE; + SlowPathCode* slow_path = nullptr) override; // Generate code to invoke a runtime entry point, but do not record // PC-related information in a stack map. @@ -322,49 +329,51 @@ class CodeGeneratorX86_64 : public CodeGenerator { void GenerateInvokeRuntime(int32_t entry_point_offset); - size_t GetWordSize() const OVERRIDE { + size_t GetWordSize() const override { return kX86_64WordSize; } - size_t GetFloatingPointSpillSlotSize() const OVERRIDE { + size_t GetFloatingPointSpillSlotSize() const override { return GetGraph()->HasSIMD() ? 2 * kX86_64WordSize // 16 bytes == 2 x86_64 words for each spill : 1 * kX86_64WordSize; // 8 bytes == 1 x86_64 words for each spill } - HGraphVisitor* GetLocationBuilder() OVERRIDE { + HGraphVisitor* GetLocationBuilder() override { return &location_builder_; } - HGraphVisitor* GetInstructionVisitor() OVERRIDE { + HGraphVisitor* GetInstructionVisitor() override { return &instruction_visitor_; } - X86_64Assembler* GetAssembler() OVERRIDE { + X86_64Assembler* GetAssembler() override { return &assembler_; } - const X86_64Assembler& GetAssembler() const OVERRIDE { + const X86_64Assembler& GetAssembler() const override { return assembler_; } - ParallelMoveResolverX86_64* GetMoveResolver() OVERRIDE { + ParallelMoveResolverX86_64* GetMoveResolver() override { return &move_resolver_; } - uintptr_t GetAddressOf(HBasicBlock* block) OVERRIDE { + uintptr_t GetAddressOf(HBasicBlock* block) override { return GetLabelOf(block)->Position(); } - void SetupBlockedRegisters() const OVERRIDE; - void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE; - void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE; - void Finalize(CodeAllocator* allocator) OVERRIDE; + void SetupBlockedRegisters() const override; + void DumpCoreRegister(std::ostream& stream, int reg) const override; + void DumpFloatingPointRegister(std::ostream& stream, int reg) const override; + void Finalize(CodeAllocator* allocator) override; - InstructionSet GetInstructionSet() const OVERRIDE { + InstructionSet GetInstructionSet() const override { return InstructionSet::kX86_64; } + const X86_64InstructionSetFeatures& GetInstructionSetFeatures() const; + // Emit a write barrier. void MarkGCCard(CpuRegister temp, CpuRegister card, @@ -381,35 +390,37 @@ class CodeGeneratorX86_64 : public CodeGenerator { return CommonGetLabelOf<Label>(block_labels_, block); } - void Initialize() OVERRIDE { + void Initialize() override { block_labels_ = CommonInitializeLabels<Label>(); } - bool NeedsTwoRegisters(DataType::Type type ATTRIBUTE_UNUSED) const OVERRIDE { + bool NeedsTwoRegisters(DataType::Type type ATTRIBUTE_UNUSED) const override { return false; } // Check if the desired_string_load_kind is supported. If it is, return it, // otherwise return a fall-back kind that should be used instead. HLoadString::LoadKind GetSupportedLoadStringKind( - HLoadString::LoadKind desired_string_load_kind) OVERRIDE; + HLoadString::LoadKind desired_string_load_kind) override; // Check if the desired_class_load_kind is supported. If it is, return it, // otherwise return a fall-back kind that should be used instead. HLoadClass::LoadKind GetSupportedLoadClassKind( - HLoadClass::LoadKind desired_class_load_kind) OVERRIDE; + HLoadClass::LoadKind desired_class_load_kind) override; // Check if the desired_dispatch_info is supported. If it is, return it, // otherwise return a fall-back info that should be used instead. HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch( const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, - HInvokeStaticOrDirect* invoke) OVERRIDE; + ArtMethod* method) override; void GenerateStaticOrDirectCall( - HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE; + HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path = nullptr) override; void GenerateVirtualCall( - HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE; + HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) override; + void RecordBootImageIntrinsicPatch(uint32_t intrinsic_data); + void RecordBootImageRelRoPatch(uint32_t boot_image_offset); void RecordBootImageMethodPatch(HInvokeStaticOrDirect* invoke); void RecordMethodBssEntryPatch(HInvokeStaticOrDirect* invoke); void RecordBootImageTypePatch(HLoadClass* load_class); @@ -423,20 +434,17 @@ class CodeGeneratorX86_64 : public CodeGenerator { dex::TypeIndex type_index, Handle<mirror::Class> handle); - void MoveFromReturnRegister(Location trg, DataType::Type type) OVERRIDE; + void LoadBootImageAddress(CpuRegister reg, uint32_t boot_image_reference); + void AllocateInstanceForIntrinsic(HInvokeStaticOrDirect* invoke, uint32_t boot_image_offset); - void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) OVERRIDE; + void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) override; void PatchJitRootUse(uint8_t* code, const uint8_t* roots_data, const PatchInfo<Label>& info, uint64_t index_in_table) const; - void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) OVERRIDE; - - const X86_64InstructionSetFeatures& GetInstructionSetFeatures() const { - return isa_features_; - } + void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) override; // Fast path implementation of ReadBarrier::Barrier for a heap // reference field load when Baker's read barriers are used. @@ -560,6 +568,8 @@ class CodeGeneratorX86_64 : public CodeGenerator { // Store a 64 bit value into a DoubleStackSlot in the most efficient manner. void Store64BitValueToStack(Location dest, int64_t value); + void MoveFromReturnRegister(Location trg, DataType::Type type) override; + // Assign a 64 bit constant to an address. void MoveInt64ToAddress(const Address& addr_low, const Address& addr_high, @@ -578,9 +588,9 @@ class CodeGeneratorX86_64 : public CodeGenerator { } } - void GenerateNop() OVERRIDE; - void GenerateImplicitNullCheck(HNullCheck* instruction) OVERRIDE; - void GenerateExplicitNullCheck(HNullCheck* instruction) OVERRIDE; + void GenerateNop() override; + void GenerateImplicitNullCheck(HNullCheck* instruction) override; + void GenerateExplicitNullCheck(HNullCheck* instruction) override; // When we don't know the proper offset for the value, we use kDummy32BitOffset. // We will fix this up in the linker later to have the right value. @@ -598,24 +608,26 @@ class CodeGeneratorX86_64 : public CodeGenerator { InstructionCodeGeneratorX86_64 instruction_visitor_; ParallelMoveResolverX86_64 move_resolver_; X86_64Assembler assembler_; - const X86_64InstructionSetFeatures& isa_features_; // Offset to the start of the constant area in the assembled code. // Used for fixups to the constant area. int constant_area_start_; - // PC-relative method patch info for kBootImageLinkTimePcRelative. + // PC-relative method patch info for kBootImageLinkTimePcRelative/kBootImageRelRo. + // Also used for type/string patches for kBootImageRelRo (same linker patch as for methods). ArenaDeque<PatchInfo<Label>> boot_image_method_patches_; // PC-relative method patch info for kBssEntry. ArenaDeque<PatchInfo<Label>> method_bss_entry_patches_; // PC-relative type patch info for kBootImageLinkTimePcRelative. ArenaDeque<PatchInfo<Label>> boot_image_type_patches_; - // Type patch locations for kBssEntry. + // PC-relative type patch info for kBssEntry. ArenaDeque<PatchInfo<Label>> type_bss_entry_patches_; - // String patch locations; type depends on configuration (intern table or boot image PIC). + // PC-relative String patch info for kBootImageLinkTimePcRelative. ArenaDeque<PatchInfo<Label>> boot_image_string_patches_; - // String patch locations for kBssEntry. + // PC-relative String patch info for kBssEntry. ArenaDeque<PatchInfo<Label>> string_bss_entry_patches_; + // PC-relative patch info for IntrinsicObjects. + ArenaDeque<PatchInfo<Label>> boot_image_intrinsic_patches_; // Patches for string literals in JIT compiled code. ArenaDeque<PatchInfo<Label>> jit_string_patches_; diff --git a/compiler/optimizing/code_sinking.cc b/compiler/optimizing/code_sinking.cc index 2e31d35584..f406983fc2 100644 --- a/compiler/optimizing/code_sinking.cc +++ b/compiler/optimizing/code_sinking.cc @@ -25,11 +25,11 @@ namespace art { -void CodeSinking::Run() { +bool CodeSinking::Run() { HBasicBlock* exit = graph_->GetExitBlock(); if (exit == nullptr) { // Infinite loop, just bail. - return; + return false; } // TODO(ngeoffray): we do not profile branches yet, so use throw instructions // as an indicator of an uncommon branch. @@ -40,6 +40,7 @@ void CodeSinking::Run() { SinkCodeToUncommonBranch(exit_predecessor); } } + return true; } static bool IsInterestingInstruction(HInstruction* instruction) { @@ -179,7 +180,7 @@ static HInstruction* FindIdealPosition(HInstruction* instruction, DCHECK(!instruction->IsPhi()); // Makes no sense for Phi. // Find the target block. - CommonDominator finder(/* start_block */ nullptr); + CommonDominator finder(/* block= */ nullptr); for (const HUseListNode<HInstruction*>& use : instruction->GetUses()) { HInstruction* user = use.GetUser(); if (!(filter && ShouldFilterUse(instruction, user, post_dominated))) { @@ -258,12 +259,12 @@ void CodeSinking::SinkCodeToUncommonBranch(HBasicBlock* end_block) { size_t number_of_instructions = graph_->GetCurrentInstructionId(); ScopedArenaVector<HInstruction*> worklist(allocator.Adapter(kArenaAllocMisc)); - ArenaBitVector processed_instructions(&allocator, number_of_instructions, /* expandable */ false); + ArenaBitVector processed_instructions(&allocator, number_of_instructions, /* expandable= */ false); processed_instructions.ClearAllBits(); - ArenaBitVector post_dominated(&allocator, graph_->GetBlocks().size(), /* expandable */ false); + ArenaBitVector post_dominated(&allocator, graph_->GetBlocks().size(), /* expandable= */ false); post_dominated.ClearAllBits(); ArenaBitVector instructions_that_can_move( - &allocator, number_of_instructions, /* expandable */ false); + &allocator, number_of_instructions, /* expandable= */ false); instructions_that_can_move.ClearAllBits(); ScopedArenaVector<HInstruction*> move_in_order(allocator.Adapter(kArenaAllocMisc)); @@ -413,7 +414,7 @@ void CodeSinking::SinkCodeToUncommonBranch(HBasicBlock* end_block) { } // Find the position of the instruction we're storing into, filtering out this // store and all other stores to that instruction. - position = FindIdealPosition(instruction->InputAt(0), post_dominated, /* filter */ true); + position = FindIdealPosition(instruction->InputAt(0), post_dominated, /* filter= */ true); // The position needs to be dominated by the store, in order for the store to move there. if (position == nullptr || !instruction->GetBlock()->Dominates(position->GetBlock())) { @@ -433,7 +434,7 @@ void CodeSinking::SinkCodeToUncommonBranch(HBasicBlock* end_block) { continue; } MaybeRecordStat(stats_, MethodCompilationStat::kInstructionSunk); - instruction->MoveBefore(position, /* ensure_safety */ false); + instruction->MoveBefore(position, /* do_checks= */ false); } } diff --git a/compiler/optimizing/code_sinking.h b/compiler/optimizing/code_sinking.h index 836d9d4f67..8eb3a520c3 100644 --- a/compiler/optimizing/code_sinking.h +++ b/compiler/optimizing/code_sinking.h @@ -33,7 +33,7 @@ class CodeSinking : public HOptimization { const char* name = kCodeSinkingPassName) : HOptimization(graph, name, stats) {} - void Run() OVERRIDE; + bool Run() override; static constexpr const char* kCodeSinkingPassName = "code_sinking"; diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc index a0fd5ffcb1..b5a7c137f6 100644 --- a/compiler/optimizing/codegen_test.cc +++ b/compiler/optimizing/codegen_test.cc @@ -89,7 +89,8 @@ void CodegenTest::TestCode(const std::vector<uint16_t>& data, bool has_result, i HGraph* graph = CreateCFG(data); // Remove suspend checks, they cannot be executed in this context. RemoveSuspendChecks(graph); - RunCode(target_config, graph, [](HGraph*) {}, has_result, expected); + OverrideInstructionSetFeatures(target_config.GetInstructionSet(), "default"); + RunCode(target_config, *compiler_options_, graph, [](HGraph*) {}, has_result, expected); } } @@ -100,7 +101,8 @@ void CodegenTest::TestCodeLong(const std::vector<uint16_t>& data, HGraph* graph = CreateCFG(data, DataType::Type::kInt64); // Remove suspend checks, they cannot be executed in this context. RemoveSuspendChecks(graph); - RunCode(target_config, graph, [](HGraph*) {}, has_result, expected); + OverrideInstructionSetFeatures(target_config.GetInstructionSet(), "default"); + RunCode(target_config, *compiler_options_, graph, [](HGraph*) {}, has_result, expected); } } @@ -451,7 +453,7 @@ TEST_F(CodegenTest, NonMaterializedCondition) { ASSERT_FALSE(equal->IsEmittedAtUseSite()); graph->BuildDominatorTree(); - PrepareForRegisterAllocation(graph).Run(); + PrepareForRegisterAllocation(graph, *compiler_options_).Run(); ASSERT_TRUE(equal->IsEmittedAtUseSite()); auto hook_before_codegen = [](HGraph* graph_in) { @@ -460,7 +462,8 @@ TEST_F(CodegenTest, NonMaterializedCondition) { block->InsertInstructionBefore(move, block->GetLastInstruction()); }; - RunCode(target_config, graph, hook_before_codegen, true, 0); + OverrideInstructionSetFeatures(target_config.GetInstructionSet(), "default"); + RunCode(target_config, *compiler_options_, graph, hook_before_codegen, true, 0); } } @@ -506,7 +509,8 @@ TEST_F(CodegenTest, MaterializedCondition1) { new (graph_in->GetAllocator()) HParallelMove(graph_in->GetAllocator()); block->InsertInstructionBefore(move, block->GetLastInstruction()); }; - RunCode(target_config, graph, hook_before_codegen, true, lhs[i] < rhs[i]); + OverrideInstructionSetFeatures(target_config.GetInstructionSet(), "default"); + RunCode(target_config, *compiler_options_, graph, hook_before_codegen, true, lhs[i] < rhs[i]); } } } @@ -573,7 +577,8 @@ TEST_F(CodegenTest, MaterializedCondition2) { new (graph_in->GetAllocator()) HParallelMove(graph_in->GetAllocator()); block->InsertInstructionBefore(move, block->GetLastInstruction()); }; - RunCode(target_config, graph, hook_before_codegen, true, lhs[i] < rhs[i]); + OverrideInstructionSetFeatures(target_config.GetInstructionSet(), "default"); + RunCode(target_config, *compiler_options_, graph, hook_before_codegen, true, lhs[i] < rhs[i]); } } } @@ -682,7 +687,8 @@ void CodegenTest::TestComparison(IfCondition condition, block->AddInstruction(new (GetAllocator()) HReturn(comparison)); graph->BuildDominatorTree(); - RunCode(target_config, graph, [](HGraph*) {}, true, expected_result); + OverrideInstructionSetFeatures(target_config.GetInstructionSet(), "default"); + RunCode(target_config, *compiler_options_, graph, [](HGraph*) {}, true, expected_result); } TEST_F(CodegenTest, ComparisonsInt) { @@ -713,10 +719,9 @@ TEST_F(CodegenTest, ComparisonsLong) { #ifdef ART_ENABLE_CODEGEN_arm TEST_F(CodegenTest, ARMVIXLParallelMoveResolver) { - std::unique_ptr<const ArmInstructionSetFeatures> features( - ArmInstructionSetFeatures::FromCppDefines()); + OverrideInstructionSetFeatures(InstructionSet::kThumb2, "default"); HGraph* graph = CreateGraph(); - arm::CodeGeneratorARMVIXL codegen(graph, *features.get(), CompilerOptions()); + arm::CodeGeneratorARMVIXL codegen(graph, *compiler_options_); codegen.Initialize(); @@ -737,10 +742,9 @@ TEST_F(CodegenTest, ARMVIXLParallelMoveResolver) { #ifdef ART_ENABLE_CODEGEN_arm64 // Regression test for b/34760542. TEST_F(CodegenTest, ARM64ParallelMoveResolverB34760542) { - std::unique_ptr<const Arm64InstructionSetFeatures> features( - Arm64InstructionSetFeatures::FromCppDefines()); + OverrideInstructionSetFeatures(InstructionSet::kArm64, "default"); HGraph* graph = CreateGraph(); - arm64::CodeGeneratorARM64 codegen(graph, *features.get(), CompilerOptions()); + arm64::CodeGeneratorARM64 codegen(graph, *compiler_options_); codegen.Initialize(); @@ -787,10 +791,9 @@ TEST_F(CodegenTest, ARM64ParallelMoveResolverB34760542) { // Check that ParallelMoveResolver works fine for ARM64 for both cases when SIMD is on and off. TEST_F(CodegenTest, ARM64ParallelMoveResolverSIMD) { - std::unique_ptr<const Arm64InstructionSetFeatures> features( - Arm64InstructionSetFeatures::FromCppDefines()); + OverrideInstructionSetFeatures(InstructionSet::kArm64, "default"); HGraph* graph = CreateGraph(); - arm64::CodeGeneratorARM64 codegen(graph, *features.get(), CompilerOptions()); + arm64::CodeGeneratorARM64 codegen(graph, *compiler_options_); codegen.Initialize(); @@ -820,13 +823,40 @@ TEST_F(CodegenTest, ARM64ParallelMoveResolverSIMD) { InternalCodeAllocator code_allocator; codegen.Finalize(&code_allocator); } + +// Check that ART ISA Features are propagated to VIXL for arm64 (using cortex-a75 as example). +TEST_F(CodegenTest, ARM64IsaVIXLFeaturesA75) { + OverrideInstructionSetFeatures(InstructionSet::kArm64, "cortex-a75"); + HGraph* graph = CreateGraph(); + arm64::CodeGeneratorARM64 codegen(graph, *compiler_options_); + vixl::CPUFeatures* features = codegen.GetVIXLAssembler()->GetCPUFeatures(); + + EXPECT_TRUE(features->Has(vixl::CPUFeatures::kCRC32)); + EXPECT_TRUE(features->Has(vixl::CPUFeatures::kDotProduct)); + EXPECT_TRUE(features->Has(vixl::CPUFeatures::kFPHalf)); + EXPECT_TRUE(features->Has(vixl::CPUFeatures::kAtomics)); +} + +// Check that ART ISA Features are propagated to VIXL for arm64 (using cortex-a53 as example). +TEST_F(CodegenTest, ARM64IsaVIXLFeaturesA53) { + OverrideInstructionSetFeatures(InstructionSet::kArm64, "cortex-a53"); + HGraph* graph = CreateGraph(); + arm64::CodeGeneratorARM64 codegen(graph, *compiler_options_); + vixl::CPUFeatures* features = codegen.GetVIXLAssembler()->GetCPUFeatures(); + + EXPECT_TRUE(features->Has(vixl::CPUFeatures::kCRC32)); + EXPECT_FALSE(features->Has(vixl::CPUFeatures::kDotProduct)); + EXPECT_FALSE(features->Has(vixl::CPUFeatures::kFPHalf)); + EXPECT_FALSE(features->Has(vixl::CPUFeatures::kAtomics)); +} + #endif #ifdef ART_ENABLE_CODEGEN_mips TEST_F(CodegenTest, MipsClobberRA) { - std::unique_ptr<const MipsInstructionSetFeatures> features_mips( - MipsInstructionSetFeatures::FromCppDefines()); - if (!CanExecute(InstructionSet::kMips) || features_mips->IsR6()) { + OverrideInstructionSetFeatures(InstructionSet::kMips, "mips32r"); + CHECK(!instruction_set_features_->AsMipsInstructionSetFeatures()->IsR6()); + if (!CanExecute(InstructionSet::kMips)) { // HMipsComputeBaseMethodAddress and the NAL instruction behind it // should only be generated on non-R6. return; @@ -860,7 +890,7 @@ TEST_F(CodegenTest, MipsClobberRA) { graph->BuildDominatorTree(); - mips::CodeGeneratorMIPS codegenMIPS(graph, *features_mips.get(), CompilerOptions()); + mips::CodeGeneratorMIPS codegenMIPS(graph, *compiler_options_); // Since there isn't HLoadClass or HLoadString, we need to manually indicate // that RA is clobbered and the method entry code should generate a stack frame // and preserve RA in it. And this is what we're testing here. diff --git a/compiler/optimizing/codegen_test_utils.h b/compiler/optimizing/codegen_test_utils.h index c41c290c8b..dde39d46f3 100644 --- a/compiler/optimizing/codegen_test_utils.h +++ b/compiler/optimizing/codegen_test_utils.h @@ -17,17 +17,11 @@ #ifndef ART_COMPILER_OPTIMIZING_CODEGEN_TEST_UTILS_H_ #define ART_COMPILER_OPTIMIZING_CODEGEN_TEST_UTILS_H_ -#include "arch/arm/instruction_set_features_arm.h" #include "arch/arm/registers_arm.h" -#include "arch/arm64/instruction_set_features_arm64.h" #include "arch/instruction_set.h" -#include "arch/mips/instruction_set_features_mips.h" #include "arch/mips/registers_mips.h" -#include "arch/mips64/instruction_set_features_mips64.h" #include "arch/mips64/registers_mips64.h" -#include "arch/x86/instruction_set_features_x86.h" #include "arch/x86/registers_x86.h" -#include "arch/x86_64/instruction_set_features_x86_64.h" #include "code_simulator.h" #include "code_simulator_container.h" #include "common_compiler_test.h" @@ -101,15 +95,13 @@ class CodegenTargetConfig { // to just overwrite the code generator. class TestCodeGeneratorARMVIXL : public arm::CodeGeneratorARMVIXL { public: - TestCodeGeneratorARMVIXL(HGraph* graph, - const ArmInstructionSetFeatures& isa_features, - const CompilerOptions& compiler_options) - : arm::CodeGeneratorARMVIXL(graph, isa_features, compiler_options) { + TestCodeGeneratorARMVIXL(HGraph* graph, const CompilerOptions& compiler_options) + : arm::CodeGeneratorARMVIXL(graph, compiler_options) { AddAllocatedRegister(Location::RegisterLocation(arm::R6)); AddAllocatedRegister(Location::RegisterLocation(arm::R7)); } - void SetupBlockedRegisters() const OVERRIDE { + void SetupBlockedRegisters() const override { arm::CodeGeneratorARMVIXL::SetupBlockedRegisters(); blocked_core_registers_[arm::R4] = true; blocked_core_registers_[arm::R6] = false; @@ -117,7 +109,7 @@ class TestCodeGeneratorARMVIXL : public arm::CodeGeneratorARMVIXL { } void MaybeGenerateMarkingRegisterCheck(int code ATTRIBUTE_UNUSED, - Location temp_loc ATTRIBUTE_UNUSED) OVERRIDE { + Location temp_loc ATTRIBUTE_UNUSED) override { // When turned on, the marking register checks in // CodeGeneratorARMVIXL::MaybeGenerateMarkingRegisterCheck expects the // Thread Register and the Marking Register to be set to @@ -145,13 +137,11 @@ class TestCodeGeneratorARMVIXL : public arm::CodeGeneratorARMVIXL { // function. class TestCodeGeneratorARM64 : public arm64::CodeGeneratorARM64 { public: - TestCodeGeneratorARM64(HGraph* graph, - const Arm64InstructionSetFeatures& isa_features, - const CompilerOptions& compiler_options) - : arm64::CodeGeneratorARM64(graph, isa_features, compiler_options) {} + TestCodeGeneratorARM64(HGraph* graph, const CompilerOptions& compiler_options) + : arm64::CodeGeneratorARM64(graph, compiler_options) {} void MaybeGenerateMarkingRegisterCheck(int codem ATTRIBUTE_UNUSED, - Location temp_loc ATTRIBUTE_UNUSED) OVERRIDE { + Location temp_loc ATTRIBUTE_UNUSED) override { // When turned on, the marking register checks in // CodeGeneratorARM64::MaybeGenerateMarkingRegisterCheck expect the // Thread Register and the Marking Register to be set to @@ -165,15 +155,13 @@ class TestCodeGeneratorARM64 : public arm64::CodeGeneratorARM64 { #ifdef ART_ENABLE_CODEGEN_x86 class TestCodeGeneratorX86 : public x86::CodeGeneratorX86 { public: - TestCodeGeneratorX86(HGraph* graph, - const X86InstructionSetFeatures& isa_features, - const CompilerOptions& compiler_options) - : x86::CodeGeneratorX86(graph, isa_features, compiler_options) { + TestCodeGeneratorX86(HGraph* graph, const CompilerOptions& compiler_options) + : x86::CodeGeneratorX86(graph, compiler_options) { // Save edi, we need it for getting enough registers for long multiplication. AddAllocatedRegister(Location::RegisterLocation(x86::EDI)); } - void SetupBlockedRegisters() const OVERRIDE { + void SetupBlockedRegisters() const override { x86::CodeGeneratorX86::SetupBlockedRegisters(); // ebx is a callee-save register in C, but caller-save for ART. blocked_core_registers_[x86::EBX] = true; @@ -188,14 +176,16 @@ class InternalCodeAllocator : public CodeAllocator { public: InternalCodeAllocator() : size_(0) { } - virtual uint8_t* Allocate(size_t size) { + uint8_t* Allocate(size_t size) override { size_ = size; memory_.reset(new uint8_t[size]); return memory_.get(); } size_t GetSize() const { return size_; } - uint8_t* GetMemory() const { return memory_.get(); } + ArrayRef<const uint8_t> GetMemory() const override { + return ArrayRef<const uint8_t>(memory_.get(), size_); + } private: size_t size_; @@ -269,8 +259,8 @@ static void Run(const InternalCodeAllocator& allocator, InstructionSet target_isa = codegen.GetInstructionSet(); typedef Expected (*fptr)(); - CommonCompilerTest::MakeExecutable(allocator.GetMemory(), allocator.GetSize()); - fptr f = reinterpret_cast<fptr>(allocator.GetMemory()); + CommonCompilerTest::MakeExecutable(allocator.GetMemory().data(), allocator.GetMemory().size()); + fptr f = reinterpret_cast<fptr>(reinterpret_cast<uintptr_t>(allocator.GetMemory().data())); if (target_isa == InstructionSet::kThumb2) { // For thumb we need the bottom bit set. f = reinterpret_cast<fptr>(reinterpret_cast<uintptr_t>(f) + 1); @@ -298,7 +288,7 @@ static void RunCodeNoCheck(CodeGenerator* codegen, { ScopedArenaAllocator local_allocator(graph->GetArenaStack()); SsaLivenessAnalysis liveness(graph, codegen, &local_allocator); - PrepareForRegisterAllocation(graph).Run(); + PrepareForRegisterAllocation(graph, codegen->GetCompilerOptions()).Run(); liveness.Analyze(); std::unique_ptr<RegisterAllocator> register_allocator = RegisterAllocator::Create(&local_allocator, codegen, liveness); @@ -322,11 +312,11 @@ static void RunCode(CodeGenerator* codegen, template <typename Expected> static void RunCode(CodegenTargetConfig target_config, + const CompilerOptions& compiler_options, HGraph* graph, std::function<void(HGraph*)> hook_before_codegen, bool has_result, Expected expected) { - CompilerOptions compiler_options; std::unique_ptr<CodeGenerator> codegen(target_config.CreateCodeGenerator(graph, compiler_options)); RunCode(codegen.get(), graph, hook_before_codegen, has_result, expected); @@ -334,55 +324,37 @@ static void RunCode(CodegenTargetConfig target_config, #ifdef ART_ENABLE_CODEGEN_arm CodeGenerator* create_codegen_arm_vixl32(HGraph* graph, const CompilerOptions& compiler_options) { - std::unique_ptr<const ArmInstructionSetFeatures> features_arm( - ArmInstructionSetFeatures::FromCppDefines()); - return new (graph->GetAllocator()) - TestCodeGeneratorARMVIXL(graph, *features_arm.get(), compiler_options); + return new (graph->GetAllocator()) TestCodeGeneratorARMVIXL(graph, compiler_options); } #endif #ifdef ART_ENABLE_CODEGEN_arm64 CodeGenerator* create_codegen_arm64(HGraph* graph, const CompilerOptions& compiler_options) { - std::unique_ptr<const Arm64InstructionSetFeatures> features_arm64( - Arm64InstructionSetFeatures::FromCppDefines()); - return new (graph->GetAllocator()) - TestCodeGeneratorARM64(graph, *features_arm64.get(), compiler_options); + return new (graph->GetAllocator()) TestCodeGeneratorARM64(graph, compiler_options); } #endif #ifdef ART_ENABLE_CODEGEN_x86 CodeGenerator* create_codegen_x86(HGraph* graph, const CompilerOptions& compiler_options) { - std::unique_ptr<const X86InstructionSetFeatures> features_x86( - X86InstructionSetFeatures::FromCppDefines()); - return new (graph->GetAllocator()) TestCodeGeneratorX86( - graph, *features_x86.get(), compiler_options); + return new (graph->GetAllocator()) TestCodeGeneratorX86(graph, compiler_options); } #endif #ifdef ART_ENABLE_CODEGEN_x86_64 CodeGenerator* create_codegen_x86_64(HGraph* graph, const CompilerOptions& compiler_options) { - std::unique_ptr<const X86_64InstructionSetFeatures> features_x86_64( - X86_64InstructionSetFeatures::FromCppDefines()); - return new (graph->GetAllocator()) - x86_64::CodeGeneratorX86_64(graph, *features_x86_64.get(), compiler_options); + return new (graph->GetAllocator()) x86_64::CodeGeneratorX86_64(graph, compiler_options); } #endif #ifdef ART_ENABLE_CODEGEN_mips CodeGenerator* create_codegen_mips(HGraph* graph, const CompilerOptions& compiler_options) { - std::unique_ptr<const MipsInstructionSetFeatures> features_mips( - MipsInstructionSetFeatures::FromCppDefines()); - return new (graph->GetAllocator()) - mips::CodeGeneratorMIPS(graph, *features_mips.get(), compiler_options); + return new (graph->GetAllocator()) mips::CodeGeneratorMIPS(graph, compiler_options); } #endif #ifdef ART_ENABLE_CODEGEN_mips64 CodeGenerator* create_codegen_mips64(HGraph* graph, const CompilerOptions& compiler_options) { - std::unique_ptr<const Mips64InstructionSetFeatures> features_mips64( - Mips64InstructionSetFeatures::FromCppDefines()); - return new (graph->GetAllocator()) - mips64::CodeGeneratorMIPS64(graph, *features_mips64.get(), compiler_options); + return new (graph->GetAllocator()) mips64::CodeGeneratorMIPS64(graph, compiler_options); } #endif diff --git a/compiler/optimizing/common_arm.h b/compiler/optimizing/common_arm.h index 356ff9f41f..7d3af9521a 100644 --- a/compiler/optimizing/common_arm.h +++ b/compiler/optimizing/common_arm.h @@ -17,7 +17,7 @@ #ifndef ART_COMPILER_OPTIMIZING_COMMON_ARM_H_ #define ART_COMPILER_OPTIMIZING_COMMON_ARM_H_ -#include "debug/dwarf/register.h" +#include "dwarf/register.h" #include "instruction_simplifier_shared.h" #include "locations.h" #include "nodes.h" diff --git a/compiler/optimizing/common_arm64.h b/compiler/optimizing/common_arm64.h index ed2f8e995d..5556f16740 100644 --- a/compiler/optimizing/common_arm64.h +++ b/compiler/optimizing/common_arm64.h @@ -151,23 +151,15 @@ inline vixl::aarch64::CPURegister InputCPURegisterOrZeroRegAt(HInstruction* inst return InputCPURegisterAt(instr, index); } -inline int64_t Int64ConstantFrom(Location location) { - HConstant* instr = location.GetConstant(); - if (instr->IsIntConstant()) { - return instr->AsIntConstant()->GetValue(); - } else if (instr->IsNullConstant()) { - return 0; - } else { - DCHECK(instr->IsLongConstant()) << instr->DebugName(); - return instr->AsLongConstant()->GetValue(); - } +inline int64_t Int64FromLocation(Location location) { + return Int64FromConstant(location.GetConstant()); } inline vixl::aarch64::Operand OperandFrom(Location location, DataType::Type type) { if (location.IsRegister()) { return vixl::aarch64::Operand(RegisterFrom(location, type)); } else { - return vixl::aarch64::Operand(Int64ConstantFrom(location)); + return vixl::aarch64::Operand(Int64FromLocation(location)); } } @@ -234,6 +226,13 @@ inline vixl::aarch64::Operand OperandFromMemOperand( } } +inline bool AddSubCanEncodeAsImmediate(int64_t value) { + // If `value` does not fit but `-value` does, VIXL will automatically use + // the 'opposite' instruction. + return vixl::aarch64::Assembler::IsImmAddSub(value) + || vixl::aarch64::Assembler::IsImmAddSub(-value); +} + inline bool Arm64CanEncodeConstantAsImmediate(HConstant* constant, HInstruction* instr) { int64_t value = CodeGenerator::GetInt64ValueOf(constant); @@ -249,6 +248,20 @@ inline bool Arm64CanEncodeConstantAsImmediate(HConstant* constant, HInstruction* return IsUint<8>(value); } + // Code generation for Min/Max: + // Cmp left_op, right_op + // Csel dst, left_op, right_op, cond + if (instr->IsMin() || instr->IsMax()) { + if (constant->GetUses().HasExactlyOneElement()) { + // If value can be encoded as immediate for the Cmp, then let VIXL handle + // the constant generation for the Csel. + return AddSubCanEncodeAsImmediate(value); + } + // These values are encodable as immediates for Cmp and VIXL will use csinc and csinv + // with the zr register as right_op, hence no constant generation is required. + return constant->IsZeroBitPattern() || constant->IsOne() || constant->IsMinusOne(); + } + // For single uses we let VIXL handle the constant generation since it will // use registers that are not managed by the register allocator (wip0, wip1). if (constant->GetUses().HasExactlyOneElement()) { @@ -275,10 +288,7 @@ inline bool Arm64CanEncodeConstantAsImmediate(HConstant* constant, HInstruction* instr->IsSub()) << instr->DebugName(); // Uses aliases of ADD/SUB instructions. - // If `value` does not fit but `-value` does, VIXL will automatically use - // the 'opposite' instruction. - return vixl::aarch64::Assembler::IsImmAddSub(value) - || vixl::aarch64::Assembler::IsImmAddSub(-value); + return AddSubCanEncodeAsImmediate(value); } } diff --git a/compiler/optimizing/constant_folding.cc b/compiler/optimizing/constant_folding.cc index 6f11e628ee..09e7cabfa4 100644 --- a/compiler/optimizing/constant_folding.cc +++ b/compiler/optimizing/constant_folding.cc @@ -26,13 +26,13 @@ class HConstantFoldingVisitor : public HGraphDelegateVisitor { : HGraphDelegateVisitor(graph) {} private: - void VisitBasicBlock(HBasicBlock* block) OVERRIDE; + void VisitBasicBlock(HBasicBlock* block) override; - void VisitUnaryOperation(HUnaryOperation* inst) OVERRIDE; - void VisitBinaryOperation(HBinaryOperation* inst) OVERRIDE; + void VisitUnaryOperation(HUnaryOperation* inst) override; + void VisitBinaryOperation(HBinaryOperation* inst) override; - void VisitTypeConversion(HTypeConversion* inst) OVERRIDE; - void VisitDivZeroCheck(HDivZeroCheck* inst) OVERRIDE; + void VisitTypeConversion(HTypeConversion* inst) override; + void VisitDivZeroCheck(HDivZeroCheck* inst) override; DISALLOW_COPY_AND_ASSIGN(HConstantFoldingVisitor); }; @@ -47,34 +47,35 @@ class InstructionWithAbsorbingInputSimplifier : public HGraphVisitor { private: void VisitShift(HBinaryOperation* shift); - void VisitEqual(HEqual* instruction) OVERRIDE; - void VisitNotEqual(HNotEqual* instruction) OVERRIDE; - - void VisitAbove(HAbove* instruction) OVERRIDE; - void VisitAboveOrEqual(HAboveOrEqual* instruction) OVERRIDE; - void VisitBelow(HBelow* instruction) OVERRIDE; - void VisitBelowOrEqual(HBelowOrEqual* instruction) OVERRIDE; - - void VisitAnd(HAnd* instruction) OVERRIDE; - void VisitCompare(HCompare* instruction) OVERRIDE; - void VisitMul(HMul* instruction) OVERRIDE; - void VisitOr(HOr* instruction) OVERRIDE; - void VisitRem(HRem* instruction) OVERRIDE; - void VisitShl(HShl* instruction) OVERRIDE; - void VisitShr(HShr* instruction) OVERRIDE; - void VisitSub(HSub* instruction) OVERRIDE; - void VisitUShr(HUShr* instruction) OVERRIDE; - void VisitXor(HXor* instruction) OVERRIDE; + void VisitEqual(HEqual* instruction) override; + void VisitNotEqual(HNotEqual* instruction) override; + + void VisitAbove(HAbove* instruction) override; + void VisitAboveOrEqual(HAboveOrEqual* instruction) override; + void VisitBelow(HBelow* instruction) override; + void VisitBelowOrEqual(HBelowOrEqual* instruction) override; + + void VisitAnd(HAnd* instruction) override; + void VisitCompare(HCompare* instruction) override; + void VisitMul(HMul* instruction) override; + void VisitOr(HOr* instruction) override; + void VisitRem(HRem* instruction) override; + void VisitShl(HShl* instruction) override; + void VisitShr(HShr* instruction) override; + void VisitSub(HSub* instruction) override; + void VisitUShr(HUShr* instruction) override; + void VisitXor(HXor* instruction) override; }; -void HConstantFolding::Run() { +bool HConstantFolding::Run() { HConstantFoldingVisitor visitor(graph_); // Process basic blocks in reverse post-order in the dominator tree, // so that an instruction turned into a constant, used as input of // another instruction, may possibly be used to turn that second // instruction into a constant as well. visitor.VisitReversePostOrder(); + return true; } diff --git a/compiler/optimizing/constant_folding.h b/compiler/optimizing/constant_folding.h index 05c6df4a93..72bd95b3cb 100644 --- a/compiler/optimizing/constant_folding.h +++ b/compiler/optimizing/constant_folding.h @@ -41,7 +41,7 @@ class HConstantFolding : public HOptimization { public: HConstantFolding(HGraph* graph, const char* name) : HOptimization(graph, name) {} - void Run() OVERRIDE; + bool Run() override; static constexpr const char* kConstantFoldingPassName = "constant_folding"; diff --git a/compiler/optimizing/constant_folding_test.cc b/compiler/optimizing/constant_folding_test.cc index d27104752b..74d9d3a993 100644 --- a/compiler/optimizing/constant_folding_test.cc +++ b/compiler/optimizing/constant_folding_test.cc @@ -16,8 +16,6 @@ #include <functional> -#include "arch/x86/instruction_set_features_x86.h" -#include "code_generator_x86.h" #include "constant_folding.h" #include "dead_code_elimination.h" #include "driver/compiler_options.h" @@ -60,9 +58,6 @@ class ConstantFoldingTest : public OptimizingUnitTest { std::string actual_before = printer_before.str(); EXPECT_EQ(expected_before, actual_before); - std::unique_ptr<const X86InstructionSetFeatures> features_x86( - X86InstructionSetFeatures::FromCppDefines()); - x86::CodeGeneratorX86 codegenX86(graph_, *features_x86.get(), CompilerOptions()); HConstantFolding(graph_, "constant_folding").Run(); GraphChecker graph_checker_cf(graph_); graph_checker_cf.Run(); @@ -75,7 +70,7 @@ class ConstantFoldingTest : public OptimizingUnitTest { check_after_cf(graph_); - HDeadCodeElimination(graph_, nullptr /* stats */, "dead_code_elimination").Run(); + HDeadCodeElimination(graph_, /* stats= */ nullptr, "dead_code_elimination").Run(); GraphChecker graph_checker_dce(graph_); graph_checker_dce.Run(); ASSERT_TRUE(graph_checker_dce.IsValid()); diff --git a/compiler/optimizing/constructor_fence_redundancy_elimination.cc b/compiler/optimizing/constructor_fence_redundancy_elimination.cc index 4a66cd2265..3a1a9e023d 100644 --- a/compiler/optimizing/constructor_fence_redundancy_elimination.cc +++ b/compiler/optimizing/constructor_fence_redundancy_elimination.cc @@ -34,7 +34,7 @@ class CFREVisitor : public HGraphVisitor { candidate_fence_targets_(scoped_allocator_.Adapter(kArenaAllocCFRE)), stats_(stats) {} - void VisitBasicBlock(HBasicBlock* block) OVERRIDE { + void VisitBasicBlock(HBasicBlock* block) override { // Visit all instructions in block. HGraphVisitor::VisitBasicBlock(block); @@ -43,86 +43,86 @@ class CFREVisitor : public HGraphVisitor { MergeCandidateFences(); } - void VisitConstructorFence(HConstructorFence* constructor_fence) OVERRIDE { + void VisitConstructorFence(HConstructorFence* constructor_fence) override { candidate_fences_.push_back(constructor_fence); for (size_t input_idx = 0; input_idx < constructor_fence->InputCount(); ++input_idx) { - candidate_fence_targets_.Insert(constructor_fence->InputAt(input_idx)); + candidate_fence_targets_.insert(constructor_fence->InputAt(input_idx)); } } - void VisitBoundType(HBoundType* bound_type) OVERRIDE { + void VisitBoundType(HBoundType* bound_type) override { VisitAlias(bound_type); } - void VisitNullCheck(HNullCheck* null_check) OVERRIDE { + void VisitNullCheck(HNullCheck* null_check) override { VisitAlias(null_check); } - void VisitSelect(HSelect* select) OVERRIDE { + void VisitSelect(HSelect* select) override { VisitAlias(select); } - void VisitInstanceFieldSet(HInstanceFieldSet* instruction) OVERRIDE { + void VisitInstanceFieldSet(HInstanceFieldSet* instruction) override { HInstruction* value = instruction->InputAt(1); VisitSetLocation(instruction, value); } - void VisitStaticFieldSet(HStaticFieldSet* instruction) OVERRIDE { + void VisitStaticFieldSet(HStaticFieldSet* instruction) override { HInstruction* value = instruction->InputAt(1); VisitSetLocation(instruction, value); } - void VisitArraySet(HArraySet* instruction) OVERRIDE { + void VisitArraySet(HArraySet* instruction) override { HInstruction* value = instruction->InputAt(2); VisitSetLocation(instruction, value); } - void VisitDeoptimize(HDeoptimize* instruction ATTRIBUTE_UNUSED) { + void VisitDeoptimize(HDeoptimize* instruction ATTRIBUTE_UNUSED) override { // Pessimize: Merge all fences. MergeCandidateFences(); } - void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE { + void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) override { HandleInvoke(invoke); } - void VisitInvokeVirtual(HInvokeVirtual* invoke) OVERRIDE { + void VisitInvokeVirtual(HInvokeVirtual* invoke) override { HandleInvoke(invoke); } - void VisitInvokeInterface(HInvokeInterface* invoke) OVERRIDE { + void VisitInvokeInterface(HInvokeInterface* invoke) override { HandleInvoke(invoke); } - void VisitInvokeUnresolved(HInvokeUnresolved* invoke) OVERRIDE { + void VisitInvokeUnresolved(HInvokeUnresolved* invoke) override { HandleInvoke(invoke); } - void VisitInvokePolymorphic(HInvokePolymorphic* invoke) OVERRIDE { + void VisitInvokePolymorphic(HInvokePolymorphic* invoke) override { HandleInvoke(invoke); } - void VisitClinitCheck(HClinitCheck* clinit) OVERRIDE { + void VisitClinitCheck(HClinitCheck* clinit) override { HandleInvoke(clinit); } - void VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet* instruction) OVERRIDE { + void VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet* instruction) override { // Conservatively treat it as an invocation. HandleInvoke(instruction); } - void VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet* instruction) OVERRIDE { + void VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet* instruction) override { // Conservatively treat it as an invocation. HandleInvoke(instruction); } - void VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet* instruction) OVERRIDE { + void VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet* instruction) override { // Conservatively treat it as an invocation. HandleInvoke(instruction); } - void VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet* instruction) OVERRIDE { + void VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet* instruction) override { // Conservatively treat it as an invocation. HandleInvoke(instruction); } @@ -208,13 +208,13 @@ class CFREVisitor : public HGraphVisitor { // there is no benefit to this extra complexity unless we also reordered // the stores to come later. candidate_fences_.clear(); - candidate_fence_targets_.Clear(); + candidate_fence_targets_.clear(); } // A publishing 'store' is only interesting if the value being stored // is one of the fence `targets` in `candidate_fences`. bool IsInterestingPublishTarget(HInstruction* store_input) const { - return candidate_fence_targets_.Find(store_input) != candidate_fence_targets_.end(); + return candidate_fence_targets_.find(store_input) != candidate_fence_targets_.end(); } void MaybeMerge(HConstructorFence* target, HConstructorFence* src) { @@ -250,13 +250,14 @@ class CFREVisitor : public HGraphVisitor { DISALLOW_COPY_AND_ASSIGN(CFREVisitor); }; -void ConstructorFenceRedundancyElimination::Run() { +bool ConstructorFenceRedundancyElimination::Run() { CFREVisitor cfre_visitor(graph_, stats_); // Arbitrarily visit in reverse-post order. // The exact block visit order does not matter, as the algorithm // only operates on a single block at a time. cfre_visitor.VisitReversePostOrder(); + return true; } } // namespace art diff --git a/compiler/optimizing/constructor_fence_redundancy_elimination.h b/compiler/optimizing/constructor_fence_redundancy_elimination.h index f4b06d5544..014b342258 100644 --- a/compiler/optimizing/constructor_fence_redundancy_elimination.h +++ b/compiler/optimizing/constructor_fence_redundancy_elimination.h @@ -52,7 +52,7 @@ class ConstructorFenceRedundancyElimination : public HOptimization { const char* name = kCFREPassName) : HOptimization(graph, name, stats) {} - void Run() OVERRIDE; + bool Run() override; static constexpr const char* kCFREPassName = "constructor_fence_redundancy_elimination"; diff --git a/compiler/optimizing/data_type.h b/compiler/optimizing/data_type.h index 4a6c91459f..3cbcc9e0c3 100644 --- a/compiler/optimizing/data_type.h +++ b/compiler/optimizing/data_type.h @@ -210,6 +210,42 @@ class DataType { static bool IsTypeConversionImplicit(Type input_type, Type result_type); static bool IsTypeConversionImplicit(int64_t value, Type result_type); + static bool IsZeroExtension(Type input_type, Type result_type) { + return IsIntOrLongType(result_type) && + IsUnsignedType(input_type) && + Size(result_type) > Size(input_type); + } + + static Type ToSigned(Type type) { + switch (type) { + case Type::kUint8: + return Type::kInt8; + case Type::kUint16: + return Type::kInt16; + case Type::kUint32: + return Type::kInt32; + case Type::kUint64: + return Type::kInt64; + default: + return type; + } + } + + static Type ToUnsigned(Type type) { + switch (type) { + case Type::kInt8: + return Type::kUint8; + case Type::kInt16: + return Type::kUint16; + case Type::kInt32: + return Type::kUint32; + case Type::kInt64: + return Type::kUint64; + default: + return type; + } + } + static const char* PrettyDescriptor(Type type); private: diff --git a/compiler/optimizing/dead_code_elimination.cc b/compiler/optimizing/dead_code_elimination.cc index 9fa0f72e80..1dc10948cc 100644 --- a/compiler/optimizing/dead_code_elimination.cc +++ b/compiler/optimizing/dead_code_elimination.cc @@ -508,7 +508,7 @@ void HDeadCodeElimination::RemoveDeadInstructions() { } } -void HDeadCodeElimination::Run() { +bool HDeadCodeElimination::Run() { // Do not eliminate dead blocks if the graph has irreducible loops. We could // support it, but that would require changes in our loop representation to handle // multiple entry points. We decided it was not worth the complexity. @@ -526,6 +526,7 @@ void HDeadCodeElimination::Run() { } SsaRedundantPhiElimination(graph_).Run(); RemoveDeadInstructions(); + return true; } } // namespace art diff --git a/compiler/optimizing/dead_code_elimination.h b/compiler/optimizing/dead_code_elimination.h index 92a7f562e1..799721acf2 100644 --- a/compiler/optimizing/dead_code_elimination.h +++ b/compiler/optimizing/dead_code_elimination.h @@ -32,7 +32,8 @@ class HDeadCodeElimination : public HOptimization { HDeadCodeElimination(HGraph* graph, OptimizingCompilerStats* stats, const char* name) : HOptimization(graph, name, stats) {} - void Run() OVERRIDE; + bool Run() override; + static constexpr const char* kDeadCodeEliminationPassName = "dead_code_elimination"; private: diff --git a/compiler/optimizing/dead_code_elimination_test.cc b/compiler/optimizing/dead_code_elimination_test.cc index adb6ce1187..f5cd4dc27a 100644 --- a/compiler/optimizing/dead_code_elimination_test.cc +++ b/compiler/optimizing/dead_code_elimination_test.cc @@ -16,8 +16,6 @@ #include "dead_code_elimination.h" -#include "arch/x86/instruction_set_features_x86.h" -#include "code_generator_x86.h" #include "driver/compiler_options.h" #include "graph_checker.h" #include "optimizing_unit_test.h" @@ -45,10 +43,7 @@ void DeadCodeEliminationTest::TestCode(const std::vector<uint16_t>& data, std::string actual_before = printer_before.str(); ASSERT_EQ(actual_before, expected_before); - std::unique_ptr<const X86InstructionSetFeatures> features_x86( - X86InstructionSetFeatures::FromCppDefines()); - x86::CodeGeneratorX86 codegenX86(graph, *features_x86.get(), CompilerOptions()); - HDeadCodeElimination(graph, nullptr /* stats */, "dead_code_elimination").Run(); + HDeadCodeElimination(graph, /* stats= */ nullptr, "dead_code_elimination").Run(); GraphChecker graph_checker(graph); graph_checker.Run(); ASSERT_TRUE(graph_checker.IsValid()); diff --git a/compiler/optimizing/emit_swap_mips_test.cc b/compiler/optimizing/emit_swap_mips_test.cc index b63914faf7..63a370a47b 100644 --- a/compiler/optimizing/emit_swap_mips_test.cc +++ b/compiler/optimizing/emit_swap_mips_test.cc @@ -27,12 +27,13 @@ namespace art { class EmitSwapMipsTest : public OptimizingUnitTest { public: - void SetUp() OVERRIDE { + void SetUp() override { + instruction_set_ = InstructionSet::kMips; + instruction_set_features_ = MipsInstructionSetFeatures::FromCppDefines(); + OptimizingUnitTest::SetUp(); graph_ = CreateGraph(); - isa_features_ = MipsInstructionSetFeatures::FromCppDefines(); - codegen_ = new (graph_->GetAllocator()) mips::CodeGeneratorMIPS(graph_, - *isa_features_.get(), - CompilerOptions()); + codegen_.reset( + new (graph_->GetAllocator()) mips::CodeGeneratorMIPS(graph_, *compiler_options_)); moves_ = new (GetAllocator()) HParallelMove(GetAllocator()); test_helper_.reset( new AssemblerTestInfrastructure(GetArchitectureString(), @@ -45,10 +46,12 @@ class EmitSwapMipsTest : public OptimizingUnitTest { GetAssemblyHeader())); } - void TearDown() OVERRIDE { + void TearDown() override { test_helper_.reset(); - isa_features_.reset(); + codegen_.reset(); + graph_ = nullptr; ResetPoolAndAllocator(); + OptimizingUnitTest::TearDown(); } // Get the typically used name for this architecture. @@ -106,10 +109,9 @@ class EmitSwapMipsTest : public OptimizingUnitTest { protected: HGraph* graph_; HParallelMove* moves_; - mips::CodeGeneratorMIPS* codegen_; + std::unique_ptr<mips::CodeGeneratorMIPS> codegen_; mips::MipsAssembler* assembler_; std::unique_ptr<AssemblerTestInfrastructure> test_helper_; - std::unique_ptr<const MipsInstructionSetFeatures> isa_features_; }; TEST_F(EmitSwapMipsTest, TwoRegisters) { diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc index c88baa8610..01d9603802 100644 --- a/compiler/optimizing/graph_checker.cc +++ b/compiler/optimizing/graph_checker.cc @@ -25,6 +25,11 @@ #include "base/bit_vector-inl.h" #include "base/scoped_arena_allocator.h" #include "base/scoped_arena_containers.h" +#include "handle.h" +#include "mirror/class.h" +#include "obj_ptr-inl.h" +#include "scoped_thread_state_change-inl.h" +#include "subtype_check.h" namespace art { @@ -53,6 +58,30 @@ static bool IsExitTryBoundaryIntoExitBlock(HBasicBlock* block) { !boundary->IsEntry(); } + +size_t GraphChecker::Run(bool pass_change, size_t last_size) { + size_t current_size = GetGraph()->GetReversePostOrder().size(); + if (!pass_change) { + // Nothing changed for certain. Do a quick sanity check on that assertion + // for anything other than the first call (when last size was still 0). + if (last_size != 0) { + if (current_size != last_size) { + AddError(StringPrintf("Incorrect no-change assertion, " + "last graph size %zu vs current graph size %zu", + last_size, current_size)); + } + } + // TODO: if we would trust the "false" value of the flag completely, we + // could skip checking the graph at this point. + } + + // VisitReversePostOrder is used instead of VisitInsertionOrder, + // as the latter might visit dead blocks removed by the dominator + // computation. + VisitReversePostOrder(); + return current_size; +} + void GraphChecker::VisitBasicBlock(HBasicBlock* block) { current_block_ = block; @@ -548,30 +577,85 @@ void GraphChecker::VisitReturnVoid(HReturnVoid* ret) { } } -void GraphChecker::VisitCheckCast(HCheckCast* check) { - VisitInstruction(check); - HInstruction* input = check->InputAt(1); - if (!input->IsLoadClass()) { - AddError(StringPrintf("%s:%d expects a HLoadClass as second input, not %s:%d.", +void GraphChecker::CheckTypeCheckBitstringInput(HTypeCheckInstruction* check, + size_t input_pos, + bool check_value, + uint32_t expected_value, + const char* name) { + if (!check->InputAt(input_pos)->IsIntConstant()) { + AddError(StringPrintf("%s:%d (bitstring) expects a HIntConstant input %zu (%s), not %s:%d.", check->DebugName(), check->GetId(), - input->DebugName(), - input->GetId())); + input_pos, + name, + check->InputAt(2)->DebugName(), + check->InputAt(2)->GetId())); + } else if (check_value) { + uint32_t actual_value = + static_cast<uint32_t>(check->InputAt(input_pos)->AsIntConstant()->GetValue()); + if (actual_value != expected_value) { + AddError(StringPrintf("%s:%d (bitstring) has %s 0x%x, not 0x%x as expected.", + check->DebugName(), + check->GetId(), + name, + actual_value, + expected_value)); + } } } -void GraphChecker::VisitInstanceOf(HInstanceOf* instruction) { - VisitInstruction(instruction); - HInstruction* input = instruction->InputAt(1); - if (!input->IsLoadClass()) { - AddError(StringPrintf("%s:%d expects a HLoadClass as second input, not %s:%d.", - instruction->DebugName(), - instruction->GetId(), - input->DebugName(), - input->GetId())); +void GraphChecker::HandleTypeCheckInstruction(HTypeCheckInstruction* check) { + VisitInstruction(check); + HInstruction* input = check->InputAt(1); + if (check->GetTypeCheckKind() == TypeCheckKind::kBitstringCheck) { + if (!input->IsNullConstant()) { + AddError(StringPrintf("%s:%d (bitstring) expects a HNullConstant as second input, not %s:%d.", + check->DebugName(), + check->GetId(), + input->DebugName(), + input->GetId())); + } + bool check_values = false; + BitString::StorageType expected_path_to_root = 0u; + BitString::StorageType expected_mask = 0u; + { + ScopedObjectAccess soa(Thread::Current()); + ObjPtr<mirror::Class> klass = check->GetClass().Get(); + MutexLock subtype_check_lock(Thread::Current(), *Locks::subtype_check_lock_); + SubtypeCheckInfo::State state = SubtypeCheck<ObjPtr<mirror::Class>>::GetState(klass); + if (state == SubtypeCheckInfo::kAssigned) { + expected_path_to_root = + SubtypeCheck<ObjPtr<mirror::Class>>::GetEncodedPathToRootForTarget(klass); + expected_mask = SubtypeCheck<ObjPtr<mirror::Class>>::GetEncodedPathToRootMask(klass); + check_values = true; + } else { + AddError(StringPrintf("%s:%d (bitstring) references a class with unassigned bitstring.", + check->DebugName(), + check->GetId())); + } + } + CheckTypeCheckBitstringInput( + check, /* input_pos= */ 2, check_values, expected_path_to_root, "path_to_root"); + CheckTypeCheckBitstringInput(check, /* input_pos= */ 3, check_values, expected_mask, "mask"); + } else { + if (!input->IsLoadClass()) { + AddError(StringPrintf("%s:%d (classic) expects a HLoadClass as second input, not %s:%d.", + check->DebugName(), + check->GetId(), + input->DebugName(), + input->GetId())); + } } } +void GraphChecker::VisitCheckCast(HCheckCast* check) { + HandleTypeCheckInstruction(check); +} + +void GraphChecker::VisitInstanceOf(HInstanceOf* instruction) { + HandleTypeCheckInstruction(instruction); +} + void GraphChecker::HandleLoop(HBasicBlock* loop_header) { int id = loop_header->GetBlockId(); HLoopInformation* loop_information = loop_header->GetLoopInformation(); @@ -847,7 +931,7 @@ void GraphChecker::VisitPhi(HPhi* phi) { // because the BitVector reallocation strategy has very bad worst-case behavior. ArenaBitVector visited(&allocator, GetGraph()->GetCurrentInstructionId(), - /* expandable */ false, + /* expandable= */ false, kArenaAllocGraphChecker); visited.ClearAllBits(); if (!IsConstantEquivalent(phi, other_phi, &visited)) { diff --git a/compiler/optimizing/graph_checker.h b/compiler/optimizing/graph_checker.h index 0f0b49d240..d085609197 100644 --- a/compiler/optimizing/graph_checker.h +++ b/compiler/optimizing/graph_checker.h @@ -38,39 +38,43 @@ class GraphChecker : public HGraphDelegateVisitor { seen_ids_.ClearAllBits(); } - // Check the whole graph (in reverse post-order). - void Run() { - // VisitReversePostOrder is used instead of VisitInsertionOrder, - // as the latter might visit dead blocks removed by the dominator - // computation. - VisitReversePostOrder(); - } - - void VisitBasicBlock(HBasicBlock* block) OVERRIDE; - - void VisitInstruction(HInstruction* instruction) OVERRIDE; - void VisitPhi(HPhi* phi) OVERRIDE; - - void VisitBinaryOperation(HBinaryOperation* op) OVERRIDE; - void VisitBooleanNot(HBooleanNot* instruction) OVERRIDE; - void VisitBoundType(HBoundType* instruction) OVERRIDE; - void VisitBoundsCheck(HBoundsCheck* check) OVERRIDE; - void VisitCheckCast(HCheckCast* check) OVERRIDE; - void VisitCondition(HCondition* op) OVERRIDE; - void VisitConstant(HConstant* instruction) OVERRIDE; - void VisitDeoptimize(HDeoptimize* instruction) OVERRIDE; - void VisitIf(HIf* instruction) OVERRIDE; - void VisitInstanceOf(HInstanceOf* check) OVERRIDE; - void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE; - void VisitLoadException(HLoadException* load) OVERRIDE; - void VisitNeg(HNeg* instruction) OVERRIDE; - void VisitPackedSwitch(HPackedSwitch* instruction) OVERRIDE; - void VisitReturn(HReturn* ret) OVERRIDE; - void VisitReturnVoid(HReturnVoid* ret) OVERRIDE; - void VisitSelect(HSelect* instruction) OVERRIDE; - void VisitTryBoundary(HTryBoundary* try_boundary) OVERRIDE; - void VisitTypeConversion(HTypeConversion* instruction) OVERRIDE; - + // Check the whole graph. The pass_change parameter indicates whether changes + // may have occurred during the just executed pass. The default value is + // conservatively "true" (something may have changed). The last_size parameter + // and return value pass along the observed graph sizes. + size_t Run(bool pass_change = true, size_t last_size = 0); + + void VisitBasicBlock(HBasicBlock* block) override; + + void VisitInstruction(HInstruction* instruction) override; + void VisitPhi(HPhi* phi) override; + + void VisitBinaryOperation(HBinaryOperation* op) override; + void VisitBooleanNot(HBooleanNot* instruction) override; + void VisitBoundType(HBoundType* instruction) override; + void VisitBoundsCheck(HBoundsCheck* check) override; + void VisitCheckCast(HCheckCast* check) override; + void VisitCondition(HCondition* op) override; + void VisitConstant(HConstant* instruction) override; + void VisitDeoptimize(HDeoptimize* instruction) override; + void VisitIf(HIf* instruction) override; + void VisitInstanceOf(HInstanceOf* check) override; + void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) override; + void VisitLoadException(HLoadException* load) override; + void VisitNeg(HNeg* instruction) override; + void VisitPackedSwitch(HPackedSwitch* instruction) override; + void VisitReturn(HReturn* ret) override; + void VisitReturnVoid(HReturnVoid* ret) override; + void VisitSelect(HSelect* instruction) override; + void VisitTryBoundary(HTryBoundary* try_boundary) override; + void VisitTypeConversion(HTypeConversion* instruction) override; + + void CheckTypeCheckBitstringInput(HTypeCheckInstruction* check, + size_t input_pos, + bool check_value, + uint32_t expected_value, + const char* name); + void HandleTypeCheckInstruction(HTypeCheckInstruction* instruction); void HandleLoop(HBasicBlock* loop_header); void HandleBooleanInput(HInstruction* instruction, size_t input_index); diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc index 5ff31cead5..2a7bbcb72f 100644 --- a/compiler/optimizing/graph_visualizer.cc +++ b/compiler/optimizing/graph_visualizer.cc @@ -106,8 +106,7 @@ std::ostream& operator<<(std::ostream& os, const StringList& list) { } } -typedef Disassembler* create_disasm_prototype(InstructionSet instruction_set, - DisassemblerOptions* options); +using create_disasm_prototype = Disassembler*(InstructionSet, DisassemblerOptions*); class HGraphVisualizerDisassembler { public: HGraphVisualizerDisassembler(InstructionSet instruction_set, @@ -131,10 +130,10 @@ class HGraphVisualizerDisassembler { // been generated, so we can read data in literal pools. disassembler_ = std::unique_ptr<Disassembler>((*create_disassembler)( instruction_set, - new DisassemblerOptions(/* absolute_addresses */ false, + new DisassemblerOptions(/* absolute_addresses= */ false, base_address, end_address, - /* can_read_literals */ true, + /* can_read_literals= */ true, Is64BitInstructionSet(instruction_set) ? &Thread::DumpThreadOffset<PointerSize::k64> : &Thread::DumpThreadOffset<PointerSize::k32>))); @@ -333,7 +332,7 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { return output_; } - void VisitParallelMove(HParallelMove* instruction) OVERRIDE { + void VisitParallelMove(HParallelMove* instruction) override { StartAttributeStream("liveness") << instruction->GetLifetimePosition(); StringList moves; for (size_t i = 0, e = instruction->NumMoves(); i < e; ++i) { @@ -346,36 +345,36 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { StartAttributeStream("moves") << moves; } - void VisitIntConstant(HIntConstant* instruction) OVERRIDE { + void VisitIntConstant(HIntConstant* instruction) override { StartAttributeStream() << instruction->GetValue(); } - void VisitLongConstant(HLongConstant* instruction) OVERRIDE { + void VisitLongConstant(HLongConstant* instruction) override { StartAttributeStream() << instruction->GetValue(); } - void VisitFloatConstant(HFloatConstant* instruction) OVERRIDE { + void VisitFloatConstant(HFloatConstant* instruction) override { StartAttributeStream() << instruction->GetValue(); } - void VisitDoubleConstant(HDoubleConstant* instruction) OVERRIDE { + void VisitDoubleConstant(HDoubleConstant* instruction) override { StartAttributeStream() << instruction->GetValue(); } - void VisitPhi(HPhi* phi) OVERRIDE { + void VisitPhi(HPhi* phi) override { StartAttributeStream("reg") << phi->GetRegNumber(); StartAttributeStream("is_catch_phi") << std::boolalpha << phi->IsCatchPhi() << std::noboolalpha; } - void VisitMemoryBarrier(HMemoryBarrier* barrier) OVERRIDE { + void VisitMemoryBarrier(HMemoryBarrier* barrier) override { StartAttributeStream("kind") << barrier->GetBarrierKind(); } - void VisitMonitorOperation(HMonitorOperation* monitor) OVERRIDE { + void VisitMonitorOperation(HMonitorOperation* monitor) override { StartAttributeStream("kind") << (monitor->IsEnter() ? "enter" : "exit"); } - void VisitLoadClass(HLoadClass* load_class) OVERRIDE { + void VisitLoadClass(HLoadClass* load_class) override { StartAttributeStream("load_kind") << load_class->GetLoadKind(); const char* descriptor = load_class->GetDexFile().GetTypeDescriptor( load_class->GetDexFile().GetTypeId(load_class->GetTypeIndex())); @@ -386,23 +385,42 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { << load_class->NeedsAccessCheck() << std::noboolalpha; } - void VisitLoadString(HLoadString* load_string) OVERRIDE { + void VisitLoadMethodHandle(HLoadMethodHandle* load_method_handle) override { + StartAttributeStream("load_kind") << "RuntimeCall"; + StartAttributeStream("method_handle_index") << load_method_handle->GetMethodHandleIndex(); + } + + void VisitLoadMethodType(HLoadMethodType* load_method_type) override { + StartAttributeStream("load_kind") << "RuntimeCall"; + const DexFile& dex_file = load_method_type->GetDexFile(); + const dex::ProtoId& proto_id = dex_file.GetProtoId(load_method_type->GetProtoIndex()); + StartAttributeStream("method_type") << dex_file.GetProtoSignature(proto_id); + } + + void VisitLoadString(HLoadString* load_string) override { StartAttributeStream("load_kind") << load_string->GetLoadKind(); } - void VisitCheckCast(HCheckCast* check_cast) OVERRIDE { - StartAttributeStream("check_kind") << check_cast->GetTypeCheckKind(); + void HandleTypeCheckInstruction(HTypeCheckInstruction* check) { + StartAttributeStream("check_kind") << check->GetTypeCheckKind(); StartAttributeStream("must_do_null_check") << std::boolalpha - << check_cast->MustDoNullCheck() << std::noboolalpha; + << check->MustDoNullCheck() << std::noboolalpha; + if (check->GetTypeCheckKind() == TypeCheckKind::kBitstringCheck) { + StartAttributeStream("path_to_root") << std::hex + << "0x" << check->GetBitstringPathToRoot() << std::dec; + StartAttributeStream("mask") << std::hex << "0x" << check->GetBitstringMask() << std::dec; + } } - void VisitInstanceOf(HInstanceOf* instance_of) OVERRIDE { - StartAttributeStream("check_kind") << instance_of->GetTypeCheckKind(); - StartAttributeStream("must_do_null_check") << std::boolalpha - << instance_of->MustDoNullCheck() << std::noboolalpha; + void VisitCheckCast(HCheckCast* check_cast) override { + HandleTypeCheckInstruction(check_cast); + } + + void VisitInstanceOf(HInstanceOf* instance_of) override { + HandleTypeCheckInstruction(instance_of); } - void VisitArrayLength(HArrayLength* array_length) OVERRIDE { + void VisitArrayLength(HArrayLength* array_length) override { StartAttributeStream("is_string_length") << std::boolalpha << array_length->IsStringLength() << std::noboolalpha; if (array_length->IsEmittedAtUseSite()) { @@ -410,31 +428,31 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { } } - void VisitBoundsCheck(HBoundsCheck* bounds_check) OVERRIDE { + void VisitBoundsCheck(HBoundsCheck* bounds_check) override { StartAttributeStream("is_string_char_at") << std::boolalpha << bounds_check->IsStringCharAt() << std::noboolalpha; } - void VisitArrayGet(HArrayGet* array_get) OVERRIDE { + void VisitArrayGet(HArrayGet* array_get) override { StartAttributeStream("is_string_char_at") << std::boolalpha << array_get->IsStringCharAt() << std::noboolalpha; } - void VisitArraySet(HArraySet* array_set) OVERRIDE { + void VisitArraySet(HArraySet* array_set) override { StartAttributeStream("value_can_be_null") << std::boolalpha << array_set->GetValueCanBeNull() << std::noboolalpha; StartAttributeStream("needs_type_check") << std::boolalpha << array_set->NeedsTypeCheck() << std::noboolalpha; } - void VisitCompare(HCompare* compare) OVERRIDE { + void VisitCompare(HCompare* compare) override { ComparisonBias bias = compare->GetBias(); StartAttributeStream("bias") << (bias == ComparisonBias::kGtBias ? "gt" : (bias == ComparisonBias::kLtBias ? "lt" : "none")); } - void VisitInvoke(HInvoke* invoke) OVERRIDE { + void VisitInvoke(HInvoke* invoke) override { StartAttributeStream("dex_file_index") << invoke->GetDexMethodIndex(); ArtMethod* method = invoke->GetResolvedMethod(); // We don't print signatures, which conflict with c1visualizer format. @@ -451,12 +469,12 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { << std::noboolalpha; } - void VisitInvokeUnresolved(HInvokeUnresolved* invoke) OVERRIDE { + void VisitInvokeUnresolved(HInvokeUnresolved* invoke) override { VisitInvoke(invoke); StartAttributeStream("invoke_type") << invoke->GetInvokeType(); } - void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE { + void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) override { VisitInvoke(invoke); StartAttributeStream("method_load_kind") << invoke->GetMethodLoadKind(); StartAttributeStream("intrinsic") << invoke->GetIntrinsic(); @@ -465,96 +483,104 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { } } - void VisitInvokeVirtual(HInvokeVirtual* invoke) OVERRIDE { + void VisitInvokeVirtual(HInvokeVirtual* invoke) override { VisitInvoke(invoke); StartAttributeStream("intrinsic") << invoke->GetIntrinsic(); } - void VisitInvokePolymorphic(HInvokePolymorphic* invoke) OVERRIDE { + void VisitInvokePolymorphic(HInvokePolymorphic* invoke) override { VisitInvoke(invoke); StartAttributeStream("invoke_type") << "InvokePolymorphic"; } - void VisitInstanceFieldGet(HInstanceFieldGet* iget) OVERRIDE { + void VisitInstanceFieldGet(HInstanceFieldGet* iget) override { StartAttributeStream("field_name") << iget->GetFieldInfo().GetDexFile().PrettyField(iget->GetFieldInfo().GetFieldIndex(), /* with type */ false); StartAttributeStream("field_type") << iget->GetFieldType(); } - void VisitInstanceFieldSet(HInstanceFieldSet* iset) OVERRIDE { + void VisitInstanceFieldSet(HInstanceFieldSet* iset) override { StartAttributeStream("field_name") << iset->GetFieldInfo().GetDexFile().PrettyField(iset->GetFieldInfo().GetFieldIndex(), /* with type */ false); StartAttributeStream("field_type") << iset->GetFieldType(); } - void VisitStaticFieldGet(HStaticFieldGet* sget) OVERRIDE { + void VisitStaticFieldGet(HStaticFieldGet* sget) override { StartAttributeStream("field_name") << sget->GetFieldInfo().GetDexFile().PrettyField(sget->GetFieldInfo().GetFieldIndex(), /* with type */ false); StartAttributeStream("field_type") << sget->GetFieldType(); } - void VisitStaticFieldSet(HStaticFieldSet* sset) OVERRIDE { + void VisitStaticFieldSet(HStaticFieldSet* sset) override { StartAttributeStream("field_name") << sset->GetFieldInfo().GetDexFile().PrettyField(sset->GetFieldInfo().GetFieldIndex(), /* with type */ false); StartAttributeStream("field_type") << sset->GetFieldType(); } - void VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet* field_access) OVERRIDE { + void VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet* field_access) override { StartAttributeStream("field_type") << field_access->GetFieldType(); } - void VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet* field_access) OVERRIDE { + void VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet* field_access) override { StartAttributeStream("field_type") << field_access->GetFieldType(); } - void VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet* field_access) OVERRIDE { + void VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet* field_access) override { StartAttributeStream("field_type") << field_access->GetFieldType(); } - void VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet* field_access) OVERRIDE { + void VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet* field_access) override { StartAttributeStream("field_type") << field_access->GetFieldType(); } - void VisitTryBoundary(HTryBoundary* try_boundary) OVERRIDE { + void VisitTryBoundary(HTryBoundary* try_boundary) override { StartAttributeStream("kind") << (try_boundary->IsEntry() ? "entry" : "exit"); } - void VisitDeoptimize(HDeoptimize* deoptimize) OVERRIDE { + void VisitDeoptimize(HDeoptimize* deoptimize) override { StartAttributeStream("kind") << deoptimize->GetKind(); } - void VisitVecOperation(HVecOperation* vec_operation) OVERRIDE { + void VisitVecOperation(HVecOperation* vec_operation) override { StartAttributeStream("packed_type") << vec_operation->GetPackedType(); } - void VisitVecMemoryOperation(HVecMemoryOperation* vec_mem_operation) OVERRIDE { + void VisitVecMemoryOperation(HVecMemoryOperation* vec_mem_operation) override { StartAttributeStream("alignment") << vec_mem_operation->GetAlignment().ToString(); } - void VisitVecHalvingAdd(HVecHalvingAdd* hadd) OVERRIDE { + void VisitVecHalvingAdd(HVecHalvingAdd* hadd) override { VisitVecBinaryOperation(hadd); StartAttributeStream("rounded") << std::boolalpha << hadd->IsRounded() << std::noboolalpha; } - void VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) OVERRIDE { + void VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) override { VisitVecOperation(instruction); StartAttributeStream("kind") << instruction->GetOpKind(); } + void VisitVecDotProd(HVecDotProd* instruction) override { + VisitVecOperation(instruction); + DataType::Type arg_type = instruction->InputAt(1)->AsVecOperation()->GetPackedType(); + StartAttributeStream("type") << (instruction->IsZeroExtending() ? + DataType::ToUnsigned(arg_type) : + DataType::ToSigned(arg_type)); + } + #if defined(ART_ENABLE_CODEGEN_arm) || defined(ART_ENABLE_CODEGEN_arm64) - void VisitMultiplyAccumulate(HMultiplyAccumulate* instruction) OVERRIDE { + void VisitMultiplyAccumulate(HMultiplyAccumulate* instruction) override { StartAttributeStream("kind") << instruction->GetOpKind(); } - void VisitBitwiseNegatedRight(HBitwiseNegatedRight* instruction) OVERRIDE { + void VisitBitwiseNegatedRight(HBitwiseNegatedRight* instruction) override { StartAttributeStream("kind") << instruction->GetOpKind(); } - void VisitDataProcWithShifterOp(HDataProcWithShifterOp* instruction) OVERRIDE { + void VisitDataProcWithShifterOp(HDataProcWithShifterOp* instruction) override { StartAttributeStream("kind") << instruction->GetInstrKind() << "+" << instruction->GetOpKind(); if (HDataProcWithShifterOp::IsShiftOp(instruction->GetOpKind())) { StartAttributeStream("shift") << instruction->GetShiftAmount(); @@ -576,6 +602,11 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { } StartAttributeStream() << input_list; } + if (instruction->GetDexPc() != kNoDexPc) { + StartAttributeStream("dex_pc") << instruction->GetDexPc(); + } else { + StartAttributeStream("dex_pc") << "n/a"; + } instruction->Accept(this); if (instruction->HasEnvironment()) { StringList envs; @@ -641,20 +672,32 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { << std::boolalpha << loop_info->IsIrreducible() << std::noboolalpha; } + // For the builder and the inliner, we want to add extra information on HInstructions + // that have reference types, and also HInstanceOf/HCheckcast. if ((IsPass(HGraphBuilder::kBuilderPassName) || IsPass(HInliner::kInlinerPassName)) - && (instruction->GetType() == DataType::Type::kReference)) { - ReferenceTypeInfo info = instruction->IsLoadClass() - ? instruction->AsLoadClass()->GetLoadedClassRTI() - : instruction->GetReferenceTypeInfo(); + && (instruction->GetType() == DataType::Type::kReference || + instruction->IsInstanceOf() || + instruction->IsCheckCast())) { + ReferenceTypeInfo info = (instruction->GetType() == DataType::Type::kReference) + ? instruction->IsLoadClass() + ? instruction->AsLoadClass()->GetLoadedClassRTI() + : instruction->GetReferenceTypeInfo() + : instruction->IsInstanceOf() + ? instruction->AsInstanceOf()->GetTargetClassRTI() + : instruction->AsCheckCast()->GetTargetClassRTI(); ScopedObjectAccess soa(Thread::Current()); if (info.IsValid()) { StartAttributeStream("klass") << mirror::Class::PrettyDescriptor(info.GetTypeHandle().Get()); - StartAttributeStream("can_be_null") - << std::boolalpha << instruction->CanBeNull() << std::noboolalpha; + if (instruction->GetType() == DataType::Type::kReference) { + StartAttributeStream("can_be_null") + << std::boolalpha << instruction->CanBeNull() << std::noboolalpha; + } StartAttributeStream("exact") << std::boolalpha << info.IsExact() << std::noboolalpha; - } else if (instruction->IsLoadClass()) { + } else if (instruction->IsLoadClass() || + instruction->IsInstanceOf() || + instruction->IsCheckCast()) { StartAttributeStream("klass") << "unresolved"; } else { // The NullConstant may be added to the graph during other passes that happen between @@ -778,7 +821,7 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { Flush(); } - void VisitBasicBlock(HBasicBlock* block) OVERRIDE { + void VisitBasicBlock(HBasicBlock* block) override { StartTag("block"); PrintProperty("name", "B", block->GetBlockId()); if (block->GetLifetimeStart() != kNoLifetime) { @@ -881,8 +924,8 @@ void HGraphVisualizer::DumpGraphWithDisassembly() const { HGraphVisualizerPrinter printer(graph_, *output_, "disassembly", - /* is_after_pass */ true, - /* graph_in_bad_state */ false, + /* is_after_pass= */ true, + /* graph_in_bad_state= */ false, codegen_, codegen_.GetDisassemblyInformation()); printer.Run(); diff --git a/compiler/optimizing/gvn.cc b/compiler/optimizing/gvn.cc index f05159b735..e8460a843f 100644 --- a/compiler/optimizing/gvn.cc +++ b/compiler/optimizing/gvn.cc @@ -43,7 +43,6 @@ class ValueSet : public ArenaObject<kArenaAllocGvn> { buckets_(allocator->AllocArray<Node*>(num_buckets_, kArenaAllocGvn)), buckets_owned_(allocator, num_buckets_, false, kArenaAllocGvn), num_entries_(0u) { - // ArenaAllocator returns zeroed memory, so no need to set buckets to null. DCHECK(IsPowerOfTwo(num_buckets_)); std::fill_n(buckets_, num_buckets_, nullptr); buckets_owned_.SetInitialBits(num_buckets_); @@ -57,8 +56,6 @@ class ValueSet : public ArenaObject<kArenaAllocGvn> { buckets_(allocator->AllocArray<Node*>(num_buckets_, kArenaAllocGvn)), buckets_owned_(allocator, num_buckets_, false, kArenaAllocGvn), num_entries_(0u) { - // ArenaAllocator returns zeroed memory, so entries of buckets_ and - // buckets_owned_ are initialized to null and false, respectively. DCHECK(IsPowerOfTwo(num_buckets_)); PopulateFromInternal(other); } @@ -348,11 +345,11 @@ class GlobalValueNumberer : public ValueObject { side_effects_(side_effects), sets_(graph->GetBlocks().size(), nullptr, allocator_.Adapter(kArenaAllocGvn)), visited_blocks_( - &allocator_, graph->GetBlocks().size(), /* expandable */ false, kArenaAllocGvn) { + &allocator_, graph->GetBlocks().size(), /* expandable= */ false, kArenaAllocGvn) { visited_blocks_.ClearAllBits(); } - void Run(); + bool Run(); private: // Per-block GVN. Will also update the ValueSet of the dominated and @@ -397,7 +394,7 @@ class GlobalValueNumberer : public ValueObject { DISALLOW_COPY_AND_ASSIGN(GlobalValueNumberer); }; -void GlobalValueNumberer::Run() { +bool GlobalValueNumberer::Run() { DCHECK(side_effects_.HasRun()); sets_[graph_->GetEntryBlock()->GetBlockId()] = new (&allocator_) ValueSet(&allocator_); @@ -406,6 +403,7 @@ void GlobalValueNumberer::Run() { for (HBasicBlock* block : graph_->GetReversePostOrder()) { VisitBasicBlock(block); } + return true; } void GlobalValueNumberer::VisitBasicBlock(HBasicBlock* block) { @@ -478,7 +476,10 @@ void GlobalValueNumberer::VisitBasicBlock(HBasicBlock* block) { HInstruction* next = current->GetNext(); // Do not kill the set with the side effects of the instruction just now: if // the instruction is GVN'ed, we don't need to kill. - if (current->CanBeMoved()) { + // + // BoundType is a special case example of an instruction which shouldn't be moved but can be + // GVN'ed. + if (current->CanBeMoved() || current->IsBoundType()) { if (current->IsBinaryOperation() && current->AsBinaryOperation()->IsCommutative()) { // For commutative ops, (x op y) will be treated the same as (y op x) // after fixed ordering. @@ -542,12 +543,12 @@ HBasicBlock* GlobalValueNumberer::FindVisitedBlockWithRecyclableSet( // that is larger, we return it if no perfectly-matching set is found. // Note that we defer testing WillBeReferencedAgain until all other criteria // have been satisfied because it might be expensive. - if (current_set->CanHoldCopyOf(reference_set, /* exact_match */ true)) { + if (current_set->CanHoldCopyOf(reference_set, /* exact_match= */ true)) { if (!WillBeReferencedAgain(current_block)) { return current_block; } } else if (secondary_match == nullptr && - current_set->CanHoldCopyOf(reference_set, /* exact_match */ false)) { + current_set->CanHoldCopyOf(reference_set, /* exact_match= */ false)) { if (!WillBeReferencedAgain(current_block)) { secondary_match = current_block; } @@ -557,9 +558,9 @@ HBasicBlock* GlobalValueNumberer::FindVisitedBlockWithRecyclableSet( return secondary_match; } -void GVNOptimization::Run() { +bool GVNOptimization::Run() { GlobalValueNumberer gvn(graph_, side_effects_); - gvn.Run(); + return gvn.Run(); } } // namespace art diff --git a/compiler/optimizing/gvn.h b/compiler/optimizing/gvn.h index 4fdba26ebd..bbf2265e98 100644 --- a/compiler/optimizing/gvn.h +++ b/compiler/optimizing/gvn.h @@ -31,7 +31,7 @@ class GVNOptimization : public HOptimization { const char* pass_name = kGlobalValueNumberingPassName) : HOptimization(graph, pass_name), side_effects_(side_effects) {} - void Run() OVERRIDE; + bool Run() override; static constexpr const char* kGlobalValueNumberingPassName = "GVN"; diff --git a/compiler/optimizing/induction_var_analysis.cc b/compiler/optimizing/induction_var_analysis.cc index d270c6a28e..3a10d5831d 100644 --- a/compiler/optimizing/induction_var_analysis.cc +++ b/compiler/optimizing/induction_var_analysis.cc @@ -243,7 +243,7 @@ HInductionVarAnalysis::HInductionVarAnalysis(HGraph* graph, const char* name) graph->GetAllocator()->Adapter(kArenaAllocInductionVarAnalysis)) { } -void HInductionVarAnalysis::Run() { +bool HInductionVarAnalysis::Run() { // Detects sequence variables (generalized induction variables) during an outer to inner // traversal of all loops using Gerlek's algorithm. The order is important to enable // range analysis on outer loop while visiting inner loops. @@ -253,6 +253,7 @@ void HInductionVarAnalysis::Run() { VisitLoop(graph_block->GetLoopInformation()); } } + return !induction_.empty(); } void HInductionVarAnalysis::VisitLoop(HLoopInformation* loop) { @@ -1073,8 +1074,8 @@ bool HInductionVarAnalysis::IsTaken(InductionInfo* lower_expr, && lower_value >= upper_value; default: LOG(FATAL) << "CONDITION UNREACHABLE"; + UNREACHABLE(); } - return false; // not certain, may be untaken } bool HInductionVarAnalysis::IsFinite(InductionInfo* upper_expr, @@ -1098,8 +1099,8 @@ bool HInductionVarAnalysis::IsFinite(InductionInfo* upper_expr, return (IsAtLeast(upper_expr, &value) && value >= (min - stride_value)); default: LOG(FATAL) << "CONDITION UNREACHABLE"; + UNREACHABLE(); } - return false; // not certain, may be infinite } bool HInductionVarAnalysis::FitsNarrowerControl(InductionInfo* lower_expr, diff --git a/compiler/optimizing/induction_var_analysis.h b/compiler/optimizing/induction_var_analysis.h index acad77d35f..a48aa90059 100644 --- a/compiler/optimizing/induction_var_analysis.h +++ b/compiler/optimizing/induction_var_analysis.h @@ -37,7 +37,7 @@ class HInductionVarAnalysis : public HOptimization { public: explicit HInductionVarAnalysis(HGraph* graph, const char* name = kInductionPassName); - void Run() OVERRIDE; + bool Run() override; static constexpr const char* kInductionPassName = "induction_var_analysis"; diff --git a/compiler/optimizing/induction_var_range.cc b/compiler/optimizing/induction_var_range.cc index 99dec11240..4c78fa8f06 100644 --- a/compiler/optimizing/induction_var_range.cc +++ b/compiler/optimizing/induction_var_range.cc @@ -78,22 +78,15 @@ static bool IsGEZero(HInstruction* instruction) { DCHECK(instruction != nullptr); if (instruction->IsArrayLength()) { return true; - } else if (instruction->IsInvokeStaticOrDirect()) { - switch (instruction->AsInvoke()->GetIntrinsic()) { - case Intrinsics::kMathMinIntInt: - case Intrinsics::kMathMinLongLong: - // Instruction MIN(>=0, >=0) is >= 0. - return IsGEZero(instruction->InputAt(0)) && - IsGEZero(instruction->InputAt(1)); - case Intrinsics::kMathAbsInt: - case Intrinsics::kMathAbsLong: - // Instruction ABS(>=0) is >= 0. - // NOTE: ABS(minint) = minint prevents assuming - // >= 0 without looking at the argument. - return IsGEZero(instruction->InputAt(0)); - default: - break; - } + } else if (instruction->IsMin()) { + // Instruction MIN(>=0, >=0) is >= 0. + return IsGEZero(instruction->InputAt(0)) && + IsGEZero(instruction->InputAt(1)); + } else if (instruction->IsAbs()) { + // Instruction ABS(>=0) is >= 0. + // NOTE: ABS(minint) = minint prevents assuming + // >= 0 without looking at the argument. + return IsGEZero(instruction->InputAt(0)); } int64_t value = -1; return IsInt64AndGet(instruction, &value) && value >= 0; @@ -102,21 +95,14 @@ static bool IsGEZero(HInstruction* instruction) { /** Hunts "under the hood" for a suitable instruction at the hint. */ static bool IsMaxAtHint( HInstruction* instruction, HInstruction* hint, /*out*/HInstruction** suitable) { - if (instruction->IsInvokeStaticOrDirect()) { - switch (instruction->AsInvoke()->GetIntrinsic()) { - case Intrinsics::kMathMinIntInt: - case Intrinsics::kMathMinLongLong: - // For MIN(x, y), return most suitable x or y as maximum. - return IsMaxAtHint(instruction->InputAt(0), hint, suitable) || - IsMaxAtHint(instruction->InputAt(1), hint, suitable); - default: - break; - } + if (instruction->IsMin()) { + // For MIN(x, y), return most suitable x or y as maximum. + return IsMaxAtHint(instruction->InputAt(0), hint, suitable) || + IsMaxAtHint(instruction->InputAt(1), hint, suitable); } else { *suitable = instruction; return HuntForDeclaration(instruction) == hint; } - return false; } /** Post-analysis simplification of a minimum value that makes the bound more useful to clients. */ @@ -230,13 +216,13 @@ bool InductionVarRange::GetInductionRange(HInstruction* context, chase_hint_ = chase_hint; bool in_body = context->GetBlock() != loop->GetHeader(); int64_t stride_value = 0; - *min_val = SimplifyMin(GetVal(info, trip, in_body, /* is_min */ true)); - *max_val = SimplifyMax(GetVal(info, trip, in_body, /* is_min */ false), chase_hint); + *min_val = SimplifyMin(GetVal(info, trip, in_body, /* is_min= */ true)); + *max_val = SimplifyMax(GetVal(info, trip, in_body, /* is_min= */ false), chase_hint); *needs_finite_test = NeedsTripCount(info, &stride_value) && IsUnsafeTripCount(trip); chase_hint_ = nullptr; // Retry chasing constants for wrap-around (merge sensitive). if (!min_val->is_known && info->induction_class == HInductionVarAnalysis::kWrapAround) { - *min_val = SimplifyMin(GetVal(info, trip, in_body, /* is_min */ true)); + *min_val = SimplifyMin(GetVal(info, trip, in_body, /* is_min= */ true)); } return true; } @@ -365,14 +351,16 @@ void InductionVarRange::Replace(HInstruction* instruction, } } -bool InductionVarRange::IsFinite(HLoopInformation* loop, /*out*/ int64_t* tc) const { - HInductionVarAnalysis::InductionInfo *trip = - induction_analysis_->LookupInfo(loop, GetLoopControl(loop)); - if (trip != nullptr && !IsUnsafeTripCount(trip)) { - IsConstant(trip->op_a, kExact, tc); - return true; - } - return false; +bool InductionVarRange::IsFinite(HLoopInformation* loop, /*out*/ int64_t* trip_count) const { + bool is_constant_unused = false; + return CheckForFiniteAndConstantProps(loop, &is_constant_unused, trip_count); +} + +bool InductionVarRange::HasKnownTripCount(HLoopInformation* loop, + /*out*/ int64_t* trip_count) const { + bool is_constant = false; + CheckForFiniteAndConstantProps(loop, &is_constant, trip_count); + return is_constant; } bool InductionVarRange::IsUnitStride(HInstruction* context, @@ -431,6 +419,18 @@ HInstruction* InductionVarRange::GenerateTripCount(HLoopInformation* loop, // Private class methods. // +bool InductionVarRange::CheckForFiniteAndConstantProps(HLoopInformation* loop, + /*out*/ bool* is_constant, + /*out*/ int64_t* trip_count) const { + HInductionVarAnalysis::InductionInfo *trip = + induction_analysis_->LookupInfo(loop, GetLoopControl(loop)); + if (trip != nullptr && !IsUnsafeTripCount(trip)) { + *is_constant = IsConstant(trip->op_a, kExact, trip_count); + return true; + } + return false; +} + bool InductionVarRange::IsConstant(HInductionVarAnalysis::InductionInfo* info, ConstantRequest request, /*out*/ int64_t* value) const { @@ -445,8 +445,8 @@ bool InductionVarRange::IsConstant(HInductionVarAnalysis::InductionInfo* info, } // Try range analysis on the invariant, only accept a proper range // to avoid arithmetic wrap-around anomalies. - Value min_val = GetVal(info, nullptr, /* in_body */ true, /* is_min */ true); - Value max_val = GetVal(info, nullptr, /* in_body */ true, /* is_min */ false); + Value min_val = GetVal(info, nullptr, /* in_body= */ true, /* is_min= */ true); + Value max_val = GetVal(info, nullptr, /* in_body= */ true, /* is_min= */ false); if (IsConstantValue(min_val) && IsConstantValue(max_val) && min_val.b_constant <= max_val.b_constant) { if ((request == kExact && min_val.b_constant == max_val.b_constant) || request == kAtMost) { @@ -791,10 +791,10 @@ InductionVarRange::Value InductionVarRange::GetMul(HInductionVarAnalysis::Induct return MulRangeAndConstant(value, info1, trip, in_body, is_min); } // Interval ranges. - Value v1_min = GetVal(info1, trip, in_body, /* is_min */ true); - Value v1_max = GetVal(info1, trip, in_body, /* is_min */ false); - Value v2_min = GetVal(info2, trip, in_body, /* is_min */ true); - Value v2_max = GetVal(info2, trip, in_body, /* is_min */ false); + Value v1_min = GetVal(info1, trip, in_body, /* is_min= */ true); + Value v1_max = GetVal(info1, trip, in_body, /* is_min= */ false); + Value v2_min = GetVal(info2, trip, in_body, /* is_min= */ true); + Value v2_max = GetVal(info2, trip, in_body, /* is_min= */ false); // Positive range vs. positive or negative range. if (IsConstantValue(v1_min) && v1_min.b_constant >= 0) { if (IsConstantValue(v2_min) && v2_min.b_constant >= 0) { @@ -825,10 +825,10 @@ InductionVarRange::Value InductionVarRange::GetDiv(HInductionVarAnalysis::Induct return DivRangeAndConstant(value, info1, trip, in_body, is_min); } // Interval ranges. - Value v1_min = GetVal(info1, trip, in_body, /* is_min */ true); - Value v1_max = GetVal(info1, trip, in_body, /* is_min */ false); - Value v2_min = GetVal(info2, trip, in_body, /* is_min */ true); - Value v2_max = GetVal(info2, trip, in_body, /* is_min */ false); + Value v1_min = GetVal(info1, trip, in_body, /* is_min= */ true); + Value v1_max = GetVal(info1, trip, in_body, /* is_min= */ false); + Value v2_min = GetVal(info2, trip, in_body, /* is_min= */ true); + Value v2_max = GetVal(info2, trip, in_body, /* is_min= */ false); // Positive range vs. positive or negative range. if (IsConstantValue(v1_min) && v1_min.b_constant >= 0) { if (IsConstantValue(v2_min) && v2_min.b_constant >= 0) { @@ -1019,10 +1019,10 @@ bool InductionVarRange::GenerateRangeOrLastValue(HInstruction* context, // Code generation for taken test: generate the code when requested or otherwise analyze // if code generation is feasible when taken test is needed. if (taken_test != nullptr) { - return GenerateCode(trip->op_b, nullptr, graph, block, taken_test, in_body, /* is_min */ false); + return GenerateCode(trip->op_b, nullptr, graph, block, taken_test, in_body, /* is_min= */ false); } else if (*needs_taken_test) { if (!GenerateCode( - trip->op_b, nullptr, nullptr, nullptr, nullptr, in_body, /* is_min */ false)) { + trip->op_b, nullptr, nullptr, nullptr, nullptr, in_body, /* is_min= */ false)) { return false; } } @@ -1030,9 +1030,9 @@ bool InductionVarRange::GenerateRangeOrLastValue(HInstruction* context, return // Success on lower if invariant (not set), or code can be generated. ((info->induction_class == HInductionVarAnalysis::kInvariant) || - GenerateCode(info, trip, graph, block, lower, in_body, /* is_min */ true)) && + GenerateCode(info, trip, graph, block, lower, in_body, /* is_min= */ true)) && // And success on upper. - GenerateCode(info, trip, graph, block, upper, in_body, /* is_min */ false); + GenerateCode(info, trip, graph, block, upper, in_body, /* is_min= */ false); } bool InductionVarRange::GenerateLastValuePolynomial(HInductionVarAnalysis::InductionInfo* info, diff --git a/compiler/optimizing/induction_var_range.h b/compiler/optimizing/induction_var_range.h index 0b980f596a..906dc6bb7b 100644 --- a/compiler/optimizing/induction_var_range.h +++ b/compiler/optimizing/induction_var_range.h @@ -161,9 +161,15 @@ class InductionVarRange { } /** - * Checks if header logic of a loop terminates. Sets trip-count tc if known. + * Checks if header logic of a loop terminates. If trip count is known sets 'trip_count' to its + * value. */ - bool IsFinite(HLoopInformation* loop, /*out*/ int64_t* tc) const; + bool IsFinite(HLoopInformation* loop, /*out*/ int64_t* trip_count) const; + + /** + * Checks if a trip count is known for the loop and sets 'trip_count' to its value in this case. + */ + bool HasKnownTripCount(HLoopInformation* loop, /*out*/ int64_t* trip_count) const; /** * Checks if the given instruction is a unit stride induction inside the closest enveloping @@ -194,6 +200,14 @@ class InductionVarRange { }; /** + * Checks if header logic of a loop terminates. If trip count is known (constant) sets + * 'is_constant' to true and 'trip_count' to the trip count value. + */ + bool CheckForFiniteAndConstantProps(HLoopInformation* loop, + /*out*/ bool* is_constant, + /*out*/ int64_t* trip_count) const; + + /** * Returns true if exact or upper/lower bound on the given induction * information is known as a 64-bit constant, which is returned in value. */ diff --git a/compiler/optimizing/induction_var_range_test.cc b/compiler/optimizing/induction_var_range_test.cc index e5bc6ef22c..f6af384af0 100644 --- a/compiler/optimizing/induction_var_range_test.cc +++ b/compiler/optimizing/induction_var_range_test.cc @@ -252,24 +252,24 @@ class InductionVarRangeTest : public OptimizingUnitTest { Value GetMin(HInductionVarAnalysis::InductionInfo* info, HInductionVarAnalysis::InductionInfo* trip) { - return range_.GetVal(info, trip, /* in_body */ true, /* is_min */ true); + return range_.GetVal(info, trip, /* in_body= */ true, /* is_min= */ true); } Value GetMax(HInductionVarAnalysis::InductionInfo* info, HInductionVarAnalysis::InductionInfo* trip) { - return range_.GetVal(info, trip, /* in_body */ true, /* is_min */ false); + return range_.GetVal(info, trip, /* in_body= */ true, /* is_min= */ false); } Value GetMul(HInductionVarAnalysis::InductionInfo* info1, HInductionVarAnalysis::InductionInfo* info2, bool is_min) { - return range_.GetMul(info1, info2, nullptr, /* in_body */ true, is_min); + return range_.GetMul(info1, info2, nullptr, /* in_body= */ true, is_min); } Value GetDiv(HInductionVarAnalysis::InductionInfo* info1, HInductionVarAnalysis::InductionInfo* info2, bool is_min) { - return range_.GetDiv(info1, info2, nullptr, /* in_body */ true, is_min); + return range_.GetDiv(info1, info2, nullptr, /* in_body= */ true, is_min); } Value GetRem(HInductionVarAnalysis::InductionInfo* info1, @@ -701,7 +701,11 @@ TEST_F(InductionVarRangeTest, MaxValue) { TEST_F(InductionVarRangeTest, ArrayLengthAndHints) { // We pass a bogus constant for the class to avoid mocking one. - HInstruction* new_array = new (GetAllocator()) HNewArray(x_, x_, 0); + HInstruction* new_array = new (GetAllocator()) HNewArray( + /* cls= */ x_, + /* length= */ x_, + /* dex_pc= */ 0, + /* component_size_shift= */ 0); entry_block_->AddInstruction(new_array); HInstruction* array_length = new (GetAllocator()) HArrayLength(new_array, 0); entry_block_->AddInstruction(array_length); diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc index 4fc7262265..205077fb49 100644 --- a/compiler/optimizing/inliner.cc +++ b/compiler/optimizing/inliner.cc @@ -18,15 +18,16 @@ #include "art_method-inl.h" #include "base/enums.h" +#include "base/logging.h" #include "builder.h" #include "class_linker.h" +#include "class_root.h" #include "constant_folding.h" #include "data_type-inl.h" #include "dead_code_elimination.h" #include "dex/inline_method_analyser.h" #include "dex/verification_results.h" #include "dex/verified_method.h" -#include "driver/compiler_driver-inl.h" #include "driver/compiler_options.h" #include "driver/dex_compilation_unit.h" #include "instruction_simplifier.h" @@ -35,8 +36,9 @@ #include "jit/jit_code_cache.h" #include "mirror/class_loader.h" #include "mirror/dex_cache.h" +#include "mirror/object_array-alloc-inl.h" +#include "mirror/object_array-inl.h" #include "nodes.h" -#include "optimizing_compiler.h" #include "reference_type_propagation.h" #include "register_allocator_linear_scan.h" #include "scoped_thread_state_change-inl.h" @@ -124,13 +126,18 @@ void HInliner::UpdateInliningBudget() { } } -void HInliner::Run() { - if (graph_->IsDebuggable()) { +bool HInliner::Run() { + if (codegen_->GetCompilerOptions().GetInlineMaxCodeUnits() == 0) { + // Inlining effectively disabled. + return false; + } else if (graph_->IsDebuggable()) { // For simplicity, we currently never inline when the graph is debuggable. This avoids // doing some logic in the runtime to discover if a method could have been inlined. - return; + return false; } + bool didInline = false; + // Initialize the number of instructions for the method being compiled. Recursive calls // to HInliner::Run have already updated the instruction count. if (outermost_graph_ == graph_) { @@ -143,14 +150,15 @@ void HInliner::Run() { // If we're compiling with a core image (which is only used for // test purposes), honor inlining directives in method names: - // - if a method's name contains the substring "$inline$", ensure - // that this method is actually inlined; // - if a method's name contains the substring "$noinline$", do not - // inline that method. - // We limit this to AOT compilation, as the JIT may or may not inline + // inline that method; + // - if a method's name contains the substring "$inline$", ensure + // that this method is actually inlined. + // We limit the latter to AOT compilation, as the JIT may or may not inline // depending on the state of classes at runtime. - const bool honor_inlining_directives = - IsCompilingWithCoreImage() && Runtime::Current()->IsAotCompiler(); + const bool honor_noinline_directives = codegen_->GetCompilerOptions().CompilingWithCoreImage(); + const bool honor_inline_directives = + honor_noinline_directives && Runtime::Current()->IsAotCompiler(); // Keep a copy of all blocks when starting the visit. ArenaVector<HBasicBlock*> blocks = graph_->GetReversePostOrder(); @@ -164,25 +172,32 @@ void HInliner::Run() { HInvoke* call = instruction->AsInvoke(); // As long as the call is not intrinsified, it is worth trying to inline. if (call != nullptr && call->GetIntrinsic() == Intrinsics::kNone) { - if (honor_inlining_directives) { + if (honor_noinline_directives) { // Debugging case: directives in method names control or assert on inlining. std::string callee_name = outer_compilation_unit_.GetDexFile()->PrettyMethod( - call->GetDexMethodIndex(), /* with_signature */ false); + call->GetDexMethodIndex(), /* with_signature= */ false); // Tests prevent inlining by having $noinline$ in their method names. if (callee_name.find("$noinline$") == std::string::npos) { - if (!TryInline(call)) { + if (TryInline(call)) { + didInline = true; + } else if (honor_inline_directives) { bool should_have_inlined = (callee_name.find("$inline$") != std::string::npos); CHECK(!should_have_inlined) << "Could not inline " << callee_name; } } } else { + DCHECK(!honor_inline_directives); // Normal case: try to inline. - TryInline(call); + if (TryInline(call)) { + didInline = true; + } } } instruction = next; } } + + return didInline; } static bool IsMethodOrDeclaringClassFinal(ArtMethod* method) @@ -274,7 +289,7 @@ static uint32_t FindMethodIndexIn(ArtMethod* method, } } -static dex::TypeIndex FindClassIndexIn(mirror::Class* cls, +static dex::TypeIndex FindClassIndexIn(ObjPtr<mirror::Class> cls, const DexCompilationUnit& compilation_unit) REQUIRES_SHARED(Locks::mutator_lock_) { const DexFile& dex_file = *compilation_unit.GetDexFile(); @@ -353,7 +368,7 @@ HInliner::InlineCacheType HInliner::GetInlineCacheType( } } -static mirror::Class* GetMonomorphicType(Handle<mirror::ObjectArray<mirror::Class>> classes) +static ObjPtr<mirror::Class> GetMonomorphicType(Handle<mirror::ObjectArray<mirror::Class>> classes) REQUIRES_SHARED(Locks::mutator_lock_) { DCHECK(classes->Get(0) != nullptr); return classes->Get(0); @@ -367,6 +382,11 @@ ArtMethod* HInliner::TryCHADevirtualization(ArtMethod* resolved_method) { // No CHA-based devirtulization for AOT compiler (yet). return nullptr; } + if (Runtime::Current()->IsZygote()) { + // No CHA-based devirtulization for Zygote, as it compiles with + // offline information. + return nullptr; + } if (outermost_graph_->IsCompilingOsr()) { // We do not support HDeoptimize in OSR methods. return nullptr; @@ -392,7 +412,7 @@ ArtMethod* HInliner::TryCHADevirtualization(ArtMethod* resolved_method) { return single_impl; } -static bool IsMethodUnverified(CompilerDriver* const compiler_driver, ArtMethod* method) +static bool IsMethodUnverified(const CompilerOptions& compiler_options, ArtMethod* method) REQUIRES_SHARED(Locks::mutator_lock_) { if (!method->GetDeclaringClass()->IsVerified()) { if (Runtime::Current()->UseJitCompilation()) { @@ -401,8 +421,9 @@ static bool IsMethodUnverified(CompilerDriver* const compiler_driver, ArtMethod* return true; } uint16_t class_def_idx = method->GetDeclaringClass()->GetDexClassDefIndex(); - if (!compiler_driver->IsMethodVerifiedWithoutFailures( - method->GetDexMethodIndex(), class_def_idx, *method->GetDexFile())) { + if (!compiler_options.IsMethodVerifiedWithoutFailures(method->GetDexMethodIndex(), + class_def_idx, + *method->GetDexFile())) { // Method has soft or hard failures, don't analyze. return true; } @@ -410,11 +431,11 @@ static bool IsMethodUnverified(CompilerDriver* const compiler_driver, ArtMethod* return false; } -static bool AlwaysThrows(CompilerDriver* const compiler_driver, ArtMethod* method) +static bool AlwaysThrows(const CompilerOptions& compiler_options, ArtMethod* method) REQUIRES_SHARED(Locks::mutator_lock_) { DCHECK(method != nullptr); // Skip non-compilable and unverified methods. - if (!method->IsCompilable() || IsMethodUnverified(compiler_driver, method)) { + if (!method->IsCompilable() || IsMethodUnverified(compiler_options, method)) { return false; } // Skip native methods, methods with try blocks, and methods that are too large. @@ -446,9 +467,10 @@ static bool AlwaysThrows(CompilerDriver* const compiler_driver, ArtMethod* metho bool HInliner::TryInline(HInvoke* invoke_instruction) { if (invoke_instruction->IsInvokeUnresolved() || - invoke_instruction->IsInvokePolymorphic()) { - return false; // Don't bother to move further if we know the method is unresolved or an - // invoke-polymorphic. + invoke_instruction->IsInvokePolymorphic() || + invoke_instruction->IsInvokeCustom()) { + return false; // Don't bother to move further if we know the method is unresolved or the + // invocation is polymorphic (invoke-{polymorphic,custom}). } ScopedObjectAccess soa(Thread::Current()); @@ -487,7 +509,7 @@ bool HInliner::TryInline(HInvoke* invoke_instruction) { bool result = TryInlineAndReplace(invoke_instruction, actual_method, ReferenceTypeInfo::CreateInvalid(), - /* do_rtp */ true, + /* do_rtp= */ true, cha_devirtualize); if (result) { // Successfully inlined. @@ -501,7 +523,7 @@ bool HInliner::TryInline(HInvoke* invoke_instruction) { MaybeRecordStat(stats_, MethodCompilationStat::kInlinedInvokeVirtualOrInterface); } } - } else if (!cha_devirtualize && AlwaysThrows(compiler_driver_, actual_method)) { + } else if (!cha_devirtualize && AlwaysThrows(codegen_->GetCompilerOptions(), actual_method)) { // Set always throws property for non-inlined method call with single target // (unless it was obtained through CHA, because that would imply we have // to add the CHA dependency, which seems not worth it). @@ -524,7 +546,7 @@ static Handle<mirror::ObjectArray<mirror::Class>> AllocateInlineCacheHolder( Handle<mirror::ObjectArray<mirror::Class>> inline_cache = hs->NewHandle( mirror::ObjectArray<mirror::Class>::Alloc( self, - class_linker->GetClassRoot(ClassLinker::kClassArrayClass), + GetClassRoot<mirror::ObjectArray<mirror::Class>>(class_linker), InlineCache::kIndividualCacheSize)); if (inline_cache == nullptr) { // We got an OOME. Just clear the exception, and don't inline. @@ -572,9 +594,12 @@ bool HInliner::TryInlineFromInlineCache(const DexFile& caller_dex_file, StackHandleScope<1> hs(Thread::Current()); Handle<mirror::ObjectArray<mirror::Class>> inline_cache; - InlineCacheType inline_cache_type = Runtime::Current()->IsAotCompiler() - ? GetInlineCacheAOT(caller_dex_file, invoke_instruction, &hs, &inline_cache) - : GetInlineCacheJIT(invoke_instruction, &hs, &inline_cache); + // The Zygote JIT compiles based on a profile, so we shouldn't use runtime inline caches + // for it. + InlineCacheType inline_cache_type = + (Runtime::Current()->IsAotCompiler() || Runtime::Current()->IsZygote()) + ? GetInlineCacheAOT(caller_dex_file, invoke_instruction, &hs, &inline_cache) + : GetInlineCacheJIT(invoke_instruction, &hs, &inline_cache); switch (inline_cache_type) { case kInlineCacheNoData: { @@ -662,8 +687,7 @@ HInliner::InlineCacheType HInliner::GetInlineCacheAOT( StackHandleScope<1>* hs, /*out*/Handle<mirror::ObjectArray<mirror::Class>>* inline_cache) REQUIRES_SHARED(Locks::mutator_lock_) { - DCHECK(Runtime::Current()->IsAotCompiler()); - const ProfileCompilationInfo* pci = compiler_driver_->GetProfileCompilationInfo(); + const ProfileCompilationInfo* pci = codegen_->GetCompilerOptions().GetProfileCompilationInfo(); if (pci == nullptr) { return kInlineCacheNoData; } @@ -716,7 +740,7 @@ HInliner::InlineCacheType HInliner::ExtractClassesFromOfflineProfile( offline_profile.dex_references.size()); for (size_t i = 0; i < offline_profile.dex_references.size(); i++) { bool found = false; - for (const DexFile* dex_file : compiler_driver_->GetDexFilesForOatFile()) { + for (const DexFile* dex_file : codegen_->GetCompilerOptions().GetDexFilesForOatFile()) { if (offline_profile.dex_references[i].MatchesDex(dex_file)) { dex_profile_index_to_dex_cache[i] = caller_compilation_unit_.GetClassLinker()->FindDexCache(self, *dex_file); @@ -764,7 +788,7 @@ HInliner::InlineCacheType HInliner::ExtractClassesFromOfflineProfile( HInstanceFieldGet* HInliner::BuildGetReceiverClass(ClassLinker* class_linker, HInstruction* receiver, uint32_t dex_pc) const { - ArtField* field = class_linker->GetClassRoot(ClassLinker::kJavaLangObject)->GetInstanceField(0); + ArtField* field = GetClassRoot<mirror::Object>(class_linker)->GetInstanceField(0); DCHECK_EQ(std::string(field->GetName()), "shadow$_klass_"); HInstanceFieldGet* result = new (graph_->GetAllocator()) HInstanceFieldGet( receiver, @@ -841,9 +865,9 @@ bool HInliner::TryInlineMonomorphicCall(HInvoke* invoke_instruction, HBasicBlock* bb_cursor = invoke_instruction->GetBlock(); if (!TryInlineAndReplace(invoke_instruction, resolved_method, - ReferenceTypeInfo::Create(monomorphic_type, /* is_exact */ true), - /* do_rtp */ false, - /* cha_devirtualize */ false)) { + ReferenceTypeInfo::Create(monomorphic_type, /* is_exact= */ true), + /* do_rtp= */ false, + /* cha_devirtualize= */ false)) { return false; } @@ -854,7 +878,7 @@ bool HInliner::TryInlineMonomorphicCall(HInvoke* invoke_instruction, class_index, monomorphic_type, invoke_instruction, - /* with_deoptimization */ true); + /* with_deoptimization= */ true); // Run type propagation to get the guard typed, and eventually propagate the // type of the receiver. @@ -862,7 +886,7 @@ bool HInliner::TryInlineMonomorphicCall(HInvoke* invoke_instruction, outer_compilation_unit_.GetClassLoader(), outer_compilation_unit_.GetDexCache(), handles_, - /* is_first_run */ false); + /* is_first_run= */ false); rtp_fixup.Run(); MaybeRecordStat(stats_, MethodCompilationStat::kInlinedMonomorphicCall); @@ -932,9 +956,9 @@ HInstruction* HInliner::AddTypeGuard(HInstruction* receiver, klass, is_referrer, invoke_instruction->GetDexPc(), - /* needs_access_check */ false); + /* needs_access_check= */ false); HLoadClass::LoadKind kind = HSharpening::ComputeLoadClassKind( - load_class, codegen_, compiler_driver_, caller_compilation_unit_); + load_class, codegen_, caller_compilation_unit_); DCHECK(kind != HLoadClass::LoadKind::kInvalid) << "We should always be able to reference a class for inline caches"; // Load kind must be set before inserting the instruction into the graph. @@ -1010,7 +1034,7 @@ bool HInliner::TryInlinePolymorphicCall(HInvoke* invoke_instruction, if (!class_index.IsValid() || !TryBuildAndInline(invoke_instruction, method, - ReferenceTypeInfo::Create(handle, /* is_exact */ true), + ReferenceTypeInfo::Create(handle, /* is_exact= */ true), &return_replacement)) { all_targets_inlined = false; } else { @@ -1062,7 +1086,7 @@ bool HInliner::TryInlinePolymorphicCall(HInvoke* invoke_instruction, outer_compilation_unit_.GetClassLoader(), outer_compilation_unit_.GetDexCache(), handles_, - /* is_first_run */ false); + /* is_first_run= */ false); rtp_fixup.Run(); return true; } @@ -1133,14 +1157,14 @@ void HInliner::CreateDiamondPatternForPolymorphicInline(HInstruction* compare, graph_->UpdateLoopAndTryInformationOfNewBlock( - then, original_invoke_block, /* replace_if_back_edge */ false); + then, original_invoke_block, /* replace_if_back_edge= */ false); graph_->UpdateLoopAndTryInformationOfNewBlock( - otherwise, original_invoke_block, /* replace_if_back_edge */ false); + otherwise, original_invoke_block, /* replace_if_back_edge= */ false); // In case the original invoke location was a back edge, we need to update // the loop to now have the merge block as a back edge. graph_->UpdateLoopAndTryInformationOfNewBlock( - merge, original_invoke_block, /* replace_if_back_edge */ true); + merge, original_invoke_block, /* replace_if_back_edge= */ true); } bool HInliner::TryInlinePolymorphicCallToSameTarget( @@ -1258,7 +1282,7 @@ bool HInliner::TryInlinePolymorphicCallToSameTarget( outer_compilation_unit_.GetClassLoader(), outer_compilation_unit_.GetDexCache(), handles_, - /* is_first_run */ false); + /* is_first_run= */ false); rtp_fixup.Run(); MaybeRecordStat(stats_, MethodCompilationStat::kInlinedPolymorphicCall); @@ -1281,9 +1305,7 @@ bool HInliner::TryInlineAndReplace(HInvoke* invoke_instruction, // If invoke_instruction is devirtualized to a different method, give intrinsics // another chance before we try to inline it. - bool wrong_invoke_type = false; - if (invoke_instruction->GetResolvedMethod() != method && - IntrinsicsRecognizer::Recognize(invoke_instruction, method, &wrong_invoke_type)) { + if (invoke_instruction->GetResolvedMethod() != method && method->IsIntrinsic()) { MaybeRecordStat(stats_, MethodCompilationStat::kIntrinsicRecognized); if (invoke_instruction->IsInvokeInterface()) { // We don't intrinsify an invoke-interface directly. @@ -1296,6 +1318,7 @@ bool HInliner::TryInlineAndReplace(HInvoke* invoke_instruction, invoke_instruction->GetDexMethodIndex(), // Use interface method's dex method index. method, method->GetMethodIndex()); + DCHECK_NE(new_invoke->GetIntrinsic(), Intrinsics::kNone); HInputsRef inputs = invoke_instruction->GetInputs(); for (size_t index = 0; index != inputs.size(); ++index) { new_invoke->SetArgumentAt(index, inputs[index]); @@ -1305,14 +1328,11 @@ bool HInliner::TryInlineAndReplace(HInvoke* invoke_instruction, if (invoke_instruction->GetType() == DataType::Type::kReference) { new_invoke->SetReferenceTypeInfo(invoke_instruction->GetReferenceTypeInfo()); } - // Run intrinsic recognizer again to set new_invoke's intrinsic. - IntrinsicsRecognizer::Recognize(new_invoke, method, &wrong_invoke_type); - DCHECK_NE(new_invoke->GetIntrinsic(), Intrinsics::kNone); return_replacement = new_invoke; // invoke_instruction is replaced with new_invoke. should_remove_invoke_instruction = true; } else { - // invoke_instruction is intrinsified and stays. + invoke_instruction->SetResolvedMethod(method); } } else if (!TryBuildAndInline(invoke_instruction, method, receiver_type, &return_replacement)) { if (invoke_instruction->IsInvokeInterface()) { @@ -1386,7 +1406,7 @@ bool HInliner::TryInlineAndReplace(HInvoke* invoke_instruction, outer_compilation_unit_.GetClassLoader(), outer_compilation_unit_.GetDexCache(), handles_, - /* is_first_run */ false).Run(); + /* is_first_run= */ false).Run(); } return true; } @@ -1403,6 +1423,18 @@ size_t HInliner::CountRecursiveCallsOf(ArtMethod* method) const { return count; } +static inline bool MayInline(const CompilerOptions& compiler_options, + const DexFile& inlined_from, + const DexFile& inlined_into) { + // We're not allowed to inline across dex files if we're the no-inline-from dex file. + if (!IsSameDexFile(inlined_from, inlined_into) && + ContainsElement(compiler_options.GetNoInlineFromDexFile(), &inlined_from)) { + return false; + } + + return true; +} + bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction, ArtMethod* method, ReferenceTypeInfo receiver_type, @@ -1424,8 +1456,9 @@ bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction, // Check whether we're allowed to inline. The outermost compilation unit is the relevant // dex file here (though the transitivity of an inline chain would allow checking the calller). - if (!compiler_driver_->MayInline(method->GetDexFile(), - outer_compilation_unit_.GetDexFile())) { + if (!MayInline(codegen_->GetCompilerOptions(), + *method->GetDexFile(), + *outer_compilation_unit_.GetDexFile())) { if (TryPatternSubstitution(invoke_instruction, method, return_replacement)) { LOG_SUCCESS() << "Successfully replaced pattern of invoke " << method->PrettyMethod(); @@ -1450,7 +1483,7 @@ bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction, return false; } - size_t inline_max_code_units = compiler_driver_->GetCompilerOptions().GetInlineMaxCodeUnits(); + size_t inline_max_code_units = codegen_->GetCompilerOptions().GetInlineMaxCodeUnits(); if (accessor.InsnsSizeInCodeUnits() > inline_max_code_units) { LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedCodeItem) << "Method " << method->PrettyMethod() @@ -1474,7 +1507,7 @@ bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction, return false; } - if (IsMethodUnverified(compiler_driver_, method)) { + if (IsMethodUnverified(codegen_->GetCompilerOptions(), method)) { LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedNotVerified) << "Method " << method->PrettyMethod() << " couldn't be verified, so it cannot be inlined"; @@ -1599,7 +1632,8 @@ bool HInliner::TryPatternSubstitution(HInvoke* invoke_instruction, [](uint16_t index) { return index != DexFile::kDexNoIndex16; })); // Create HInstanceFieldSet for each IPUT that stores non-zero data. - HInstruction* obj = GetInvokeInputForArgVRegIndex(invoke_instruction, /* this */ 0u); + HInstruction* obj = GetInvokeInputForArgVRegIndex(invoke_instruction, + /* arg_vreg_index= */ 0u); bool needs_constructor_barrier = false; for (size_t i = 0; i != number_of_iputs; ++i) { HInstruction* value = GetInvokeInputForArgVRegIndex(invoke_instruction, iput_args[i]); @@ -1617,7 +1651,7 @@ bool HInliner::TryPatternSubstitution(HInvoke* invoke_instruction, } } if (needs_constructor_barrier) { - // See CompilerDriver::RequiresConstructorBarrier for more details. + // See DexCompilationUnit::RequiresConstructorBarrier for more details. DCHECK(obj != nullptr) << "only non-static methods can have a constructor fence"; HConstructorFence* constructor_fence = @@ -1641,7 +1675,7 @@ HInstanceFieldGet* HInliner::CreateInstanceFieldGet(uint32_t field_index, REQUIRES_SHARED(Locks::mutator_lock_) { ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); ArtField* resolved_field = - class_linker->LookupResolvedField(field_index, referrer, /* is_static */ false); + class_linker->LookupResolvedField(field_index, referrer, /* is_static= */ false); DCHECK(resolved_field != nullptr); HInstanceFieldGet* iget = new (graph_->GetAllocator()) HInstanceFieldGet( obj, @@ -1654,7 +1688,7 @@ HInstanceFieldGet* HInliner::CreateInstanceFieldGet(uint32_t field_index, *referrer->GetDexFile(), // Read barrier generates a runtime call in slow path and we need a valid // dex pc for the associated stack map. 0 is bogus but valid. Bug: 26854537. - /* dex_pc */ 0); + /* dex_pc= */ 0); if (iget->GetType() == DataType::Type::kReference) { // Use the same dex_cache that we used for field lookup as the hint_dex_cache. Handle<mirror::DexCache> dex_cache = handles_->NewHandle(referrer->GetDexCache()); @@ -1662,7 +1696,7 @@ HInstanceFieldGet* HInliner::CreateInstanceFieldGet(uint32_t field_index, outer_compilation_unit_.GetClassLoader(), dex_cache, handles_, - /* is_first_run */ false); + /* is_first_run= */ false); rtp.Visit(iget); } return iget; @@ -1676,7 +1710,7 @@ HInstanceFieldSet* HInliner::CreateInstanceFieldSet(uint32_t field_index, REQUIRES_SHARED(Locks::mutator_lock_) { ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); ArtField* resolved_field = - class_linker->LookupResolvedField(field_index, referrer, /* is_static */ false); + class_linker->LookupResolvedField(field_index, referrer, /* is_static= */ false); DCHECK(resolved_field != nullptr); if (is_final != nullptr) { // This information is needed only for constructors. @@ -1695,18 +1729,33 @@ HInstanceFieldSet* HInliner::CreateInstanceFieldSet(uint32_t field_index, *referrer->GetDexFile(), // Read barrier generates a runtime call in slow path and we need a valid // dex pc for the associated stack map. 0 is bogus but valid. Bug: 26854537. - /* dex_pc */ 0); + /* dex_pc= */ 0); return iput; } template <typename T> -static inline Handle<T> NewHandleIfDifferent(T* object, +static inline Handle<T> NewHandleIfDifferent(ObjPtr<T> object, Handle<T> hint, VariableSizedHandleScope* handles) REQUIRES_SHARED(Locks::mutator_lock_) { return (object != hint.Get()) ? handles->NewHandle(object) : hint; } +static bool CanEncodeInlinedMethodInStackMap(const DexFile& caller_dex_file, ArtMethod* callee) + REQUIRES_SHARED(Locks::mutator_lock_) { + if (!Runtime::Current()->IsAotCompiler()) { + // JIT can always encode methods in stack maps. + return true; + } + if (IsSameDexFile(caller_dex_file, *callee->GetDexFile())) { + return true; + } + // TODO(ngeoffray): Support more AOT cases for inlining: + // - methods in multidex + // - methods in boot image for on-device non-PIC compilation. + return false; +} + bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction, ArtMethod* resolved_method, ReferenceTypeInfo receiver_type, @@ -1714,7 +1763,7 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction, HInstruction** return_replacement) { DCHECK(!(resolved_method->IsStatic() && receiver_type.IsValid())); ScopedObjectAccess soa(Thread::Current()); - const DexFile::CodeItem* code_item = resolved_method->GetCodeItem(); + const dex::CodeItem* code_item = resolved_method->GetCodeItem(); const DexFile& callee_dex_file = *resolved_method->GetDexFile(); uint32_t method_index = resolved_method->GetDexMethodIndex(); CodeItemDebugInfoAccessor code_item_accessor(resolved_method->DexInstructionDebugInfo()); @@ -1727,6 +1776,7 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction, caller_compilation_unit_.GetClassLoader(), handles_); + Handle<mirror::Class> compiling_class = handles_->NewHandle(resolved_method->GetDeclaringClass()); DexCompilationUnit dex_compilation_unit( class_loader, class_linker, @@ -1735,8 +1785,9 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction, resolved_method->GetDeclaringClass()->GetDexClassDefIndex(), method_index, resolved_method->GetAccessFlags(), - /* verified_method */ nullptr, - dex_cache); + /* verified_method= */ nullptr, + dex_cache, + compiling_class); InvokeType invoke_type = invoke_instruction->GetInvokeType(); if (invoke_type == kInterface) { @@ -1745,16 +1796,25 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction, invoke_type = kVirtual; } + bool caller_dead_reference_safe = graph_->IsDeadReferenceSafe(); + const dex::ClassDef& callee_class = resolved_method->GetClassDef(); + // MethodContainsRSensitiveAccess is currently slow, but HasDeadReferenceSafeAnnotation() + // is currently rarely true. + bool callee_dead_reference_safe = + annotations::HasDeadReferenceSafeAnnotation(callee_dex_file, callee_class) + && !annotations::MethodContainsRSensitiveAccess(callee_dex_file, callee_class, method_index); + const int32_t caller_instruction_counter = graph_->GetCurrentInstructionId(); HGraph* callee_graph = new (graph_->GetAllocator()) HGraph( graph_->GetAllocator(), graph_->GetArenaStack(), callee_dex_file, method_index, - compiler_driver_->GetInstructionSet(), + codegen_->GetCompilerOptions().GetInstructionSet(), invoke_type, + callee_dead_reference_safe, graph_->IsDebuggable(), - /* osr */ false, + /* osr= */ false, caller_instruction_counter); callee_graph->SetArtMethod(resolved_method); @@ -1775,7 +1835,6 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction, code_item_accessor, &dex_compilation_unit, &outer_compilation_unit_, - compiler_driver_, codegen_, inline_stats_, resolved_method->GetQuickenedInfo(), @@ -1788,8 +1847,8 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction, return false; } - if (!RegisterAllocator::CanAllocateRegistersFor(*callee_graph, - compiler_driver_->GetInstructionSet())) { + if (!RegisterAllocator::CanAllocateRegistersFor( + *callee_graph, codegen_->GetCompilerOptions().GetInstructionSet())) { LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedRegisterAllocator) << "Method " << callee_dex_file.PrettyMethod(method_index) << " cannot be inlined because of the register allocator"; @@ -1836,7 +1895,7 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction, outer_compilation_unit_.GetClassLoader(), dex_compilation_unit.GetDexCache(), handles_, - /* is_first_run */ false).Run(); + /* is_first_run= */ false).Run(); } RunOptimizations(callee_graph, code_item, dex_compilation_unit); @@ -1980,23 +2039,26 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction, inline_stats_->AddTo(stats_); } + if (caller_dead_reference_safe && !callee_dead_reference_safe) { + // Caller was dead reference safe, but is not anymore, since we inlined dead + // reference unsafe code. Prior transformations remain valid, since they did not + // affect the inlined code. + graph_->MarkDeadReferenceUnsafe(); + } + return true; } void HInliner::RunOptimizations(HGraph* callee_graph, - const DexFile::CodeItem* code_item, + const dex::CodeItem* code_item, const DexCompilationUnit& dex_compilation_unit) { // Note: if the outermost_graph_ is being compiled OSR, we should not run any // optimization that could lead to a HDeoptimize. The following optimizations do not. HDeadCodeElimination dce(callee_graph, inline_stats_, "dead_code_elimination$inliner"); HConstantFolding fold(callee_graph, "constant_folding$inliner"); - HSharpening sharpening(callee_graph, codegen_, compiler_driver_); - InstructionSimplifier simplify(callee_graph, codegen_, compiler_driver_, inline_stats_); - IntrinsicsRecognizer intrinsics(callee_graph, inline_stats_); + InstructionSimplifier simplify(callee_graph, codegen_, inline_stats_); HOptimization* optimizations[] = { - &intrinsics, - &sharpening, &simplify, &fold, &dce, @@ -2031,7 +2093,6 @@ void HInliner::RunOptimizations(HGraph* callee_graph, codegen_, outer_compilation_unit_, dex_compilation_unit, - compiler_driver_, handles_, inline_stats_, total_number_of_dex_registers_ + accessor.RegistersSize(), @@ -2065,7 +2126,7 @@ bool HInliner::ArgumentTypesMoreSpecific(HInvoke* invoke_instruction, ArtMethod* // is more specific than the class which declares the method. if (!resolved_method->IsStatic()) { if (IsReferenceTypeRefinement(GetClassRTI(resolved_method->GetDeclaringClass()), - /* declared_can_be_null */ false, + /* declared_can_be_null= */ false, invoke_instruction->InputAt(0u))) { return true; } @@ -2074,7 +2135,7 @@ bool HInliner::ArgumentTypesMoreSpecific(HInvoke* invoke_instruction, ArtMethod* // Iterate over the list of parameter types and test whether any of the // actual inputs has a more specific reference type than the type declared in // the signature. - const DexFile::TypeList* param_list = resolved_method->GetParameterTypeList(); + const dex::TypeList* param_list = resolved_method->GetParameterTypeList(); for (size_t param_idx = 0, input_idx = resolved_method->IsStatic() ? 0 : 1, e = (param_list == nullptr ? 0 : param_list->Size()); @@ -2085,7 +2146,7 @@ bool HInliner::ArgumentTypesMoreSpecific(HInvoke* invoke_instruction, ArtMethod* ObjPtr<mirror::Class> param_cls = resolved_method->LookupResolvedClassFromTypeIndex( param_list->GetTypeItem(param_idx).type_idx_); if (IsReferenceTypeRefinement(GetClassRTI(param_cls), - /* declared_can_be_null */ true, + /* declared_can_be_null= */ true, input)) { return true; } @@ -2102,14 +2163,13 @@ bool HInliner::ReturnTypeMoreSpecific(HInvoke* invoke_instruction, if (return_replacement->GetType() == DataType::Type::kReference) { // Test if the return type is a refinement of the declared return type. if (IsReferenceTypeRefinement(invoke_instruction->GetReferenceTypeInfo(), - /* declared_can_be_null */ true, + /* declared_can_be_null= */ true, return_replacement)) { return true; } else if (return_replacement->IsInstanceFieldGet()) { HInstanceFieldGet* field_get = return_replacement->AsInstanceFieldGet(); - ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); if (field_get->GetFieldInfo().GetField() == - class_linker->GetClassRoot(ClassLinker::kJavaLangObject)->GetInstanceField(0)) { + GetClassRoot<mirror::Object>()->GetInstanceField(0)) { return true; } } diff --git a/compiler/optimizing/inliner.h b/compiler/optimizing/inliner.h index 02465d37ba..15d7349694 100644 --- a/compiler/optimizing/inliner.h +++ b/compiler/optimizing/inliner.h @@ -19,13 +19,12 @@ #include "dex/dex_file_types.h" #include "dex/invoke_type.h" -#include "jit/profile_compilation_info.h" #include "optimization.h" +#include "profile/profile_compilation_info.h" namespace art { class CodeGenerator; -class CompilerDriver; class DexCompilationUnit; class HGraph; class HInvoke; @@ -38,7 +37,6 @@ class HInliner : public HOptimization { CodeGenerator* codegen, const DexCompilationUnit& outer_compilation_unit, const DexCompilationUnit& caller_compilation_unit, - CompilerDriver* compiler_driver, VariableSizedHandleScope* handles, OptimizingCompilerStats* stats, size_t total_number_of_dex_registers, @@ -51,7 +49,6 @@ class HInliner : public HOptimization { outer_compilation_unit_(outer_compilation_unit), caller_compilation_unit_(caller_compilation_unit), codegen_(codegen), - compiler_driver_(compiler_driver), total_number_of_dex_registers_(total_number_of_dex_registers), total_number_of_instructions_(total_number_of_instructions), parent_(parent), @@ -60,7 +57,7 @@ class HInliner : public HOptimization { handles_(handles), inline_stats_(nullptr) {} - void Run() OVERRIDE; + bool Run() override; static constexpr const char* kInlinerPassName = "inliner"; @@ -101,7 +98,7 @@ class HInliner : public HOptimization { // Run simple optimizations on `callee_graph`. void RunOptimizations(HGraph* callee_graph, - const DexFile::CodeItem* code_item, + const dex::CodeItem* code_item, const DexCompilationUnit& dex_compilation_unit) REQUIRES_SHARED(Locks::mutator_lock_); @@ -280,7 +277,6 @@ class HInliner : public HOptimization { const DexCompilationUnit& outer_compilation_unit_; const DexCompilationUnit& caller_compilation_unit_; CodeGenerator* const codegen_; - CompilerDriver* const compiler_driver_; const size_t total_number_of_dex_registers_; size_t total_number_of_instructions_; diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc index c7aef3779d..5e7b57523f 100644 --- a/compiler/optimizing/instruction_builder.cc +++ b/compiler/optimizing/instruction_builder.cc @@ -19,12 +19,13 @@ #include "art_method-inl.h" #include "base/arena_bit_vector.h" #include "base/bit_vector-inl.h" +#include "base/logging.h" #include "block_builder.h" -#include "class_linker.h" +#include "class_linker-inl.h" +#include "code_generator.h" #include "data_type-inl.h" #include "dex/bytecode_utils.h" #include "dex/dex_instruction-inl.h" -#include "driver/compiler_driver-inl.h" #include "driver/dex_compilation_unit.h" #include "driver/compiler_options.h" #include "imtable-inl.h" @@ -47,7 +48,6 @@ HInstructionBuilder::HInstructionBuilder(HGraph* graph, DataType::Type return_type, const DexCompilationUnit* dex_compilation_unit, const DexCompilationUnit* outer_compilation_unit, - CompilerDriver* compiler_driver, CodeGenerator* code_generator, ArrayRef<const uint8_t> interpreter_metadata, OptimizingCompilerStats* compiler_stats, @@ -61,7 +61,6 @@ HInstructionBuilder::HInstructionBuilder(HGraph* graph, return_type_(return_type), block_builder_(block_builder), ssa_builder_(ssa_builder), - compiler_driver_(compiler_driver), code_generator_(code_generator), dex_compilation_unit_(dex_compilation_unit), outer_compilation_unit_(outer_compilation_unit), @@ -73,7 +72,8 @@ HInstructionBuilder::HInstructionBuilder(HGraph* graph, current_locals_(nullptr), latest_result_(nullptr), current_this_parameter_(nullptr), - loop_headers_(local_allocator->Adapter(kArenaAllocGraphBuilder)) { + loop_headers_(local_allocator->Adapter(kArenaAllocGraphBuilder)), + class_cache_(std::less<dex::TypeIndex>(), local_allocator->Adapter(kArenaAllocGraphBuilder)) { loop_headers_.reserve(kDefaultNumberOfLoops); } @@ -319,8 +319,8 @@ bool HInstructionBuilder::Build() { // Find locations where we want to generate extra stackmaps for native debugging. // This allows us to generate the info only at interesting points (for example, // at start of java statement) rather than before every dex instruction. - const bool native_debuggable = compiler_driver_ != nullptr && - compiler_driver_->GetCompilerOptions().GetNativeDebuggable(); + const bool native_debuggable = code_generator_ != nullptr && + code_generator_->GetCompilerOptions().GetNativeDebuggable(); ArenaBitVector* native_debug_info_locations = nullptr; if (native_debuggable) { native_debug_info_locations = FindNativeDebugInfoLocations(); @@ -434,7 +434,7 @@ void HInstructionBuilder::BuildIntrinsic(ArtMethod* method) { HInvokeStaticOrDirect::DispatchInfo dispatch_info = { HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall, HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod, - /* method_load_data */ 0u + /* method_load_data= */ 0u }; InvokeType invoke_type = dex_compilation_unit_->IsStatic() ? kStatic : kDirect; HInvokeStaticOrDirect* invoke = new (allocator_) HInvokeStaticOrDirect( @@ -448,14 +448,8 @@ void HInstructionBuilder::BuildIntrinsic(ArtMethod* method) { invoke_type, target_method, HInvokeStaticOrDirect::ClinitCheckRequirement::kNone); - HandleInvoke(invoke, - in_vregs, - /* args */ nullptr, - graph_->GetNumberOfVRegs() - in_vregs, - /* is_range */ true, - dex_file_->GetMethodShorty(method_idx), - /* clinit_check */ nullptr, - /* is_unresolved */ false); + RangeInstructionOperands operands(graph_->GetNumberOfVRegs() - in_vregs, in_vregs); + HandleInvoke(invoke, operands, dex_file_->GetMethodShorty(method_idx), /* is_unresolved= */ false); // Add the return instruction. if (return_type_ == DataType::Type::kVoid) { @@ -472,22 +466,17 @@ void HInstructionBuilder::BuildIntrinsic(ArtMethod* method) { } ArenaBitVector* HInstructionBuilder::FindNativeDebugInfoLocations() { - // The callback gets called when the line number changes. - // In other words, it marks the start of new java statement. - struct Callback { - static bool Position(void* ctx, const DexFile::PositionInfo& entry) { - static_cast<ArenaBitVector*>(ctx)->SetBit(entry.address_); - return false; - } - }; ArenaBitVector* locations = ArenaBitVector::Create(local_allocator_, code_item_accessor_.InsnsSizeInCodeUnits(), - /* expandable */ false, + /* expandable= */ false, kArenaAllocGraphBuilder); locations->ClearAllBits(); - dex_file_->DecodeDebugPositionInfo(code_item_accessor_.DebugInfoOffset(), - Callback::Position, - locations); + // The visitor gets called when the line number changes. + // In other words, it marks the start of new java statement. + code_item_accessor_.DecodeDebugPositionInfo([&](const DexFile::PositionInfo& entry) { + locations->SetBit(entry.address_); + return false; + }); // Instruction-specific tweaks. for (const DexInstructionPcPair& inst : code_item_accessor_) { switch (inst->Opcode()) { @@ -570,7 +559,7 @@ void HInstructionBuilder::InitializeParameters() { uint16_t locals_index = graph_->GetNumberOfLocalVRegs(); uint16_t parameter_index = 0; - const DexFile::MethodId& referrer_method_id = + const dex::MethodId& referrer_method_id = dex_file_->GetMethodId(dex_compilation_unit_->GetDexMethodIndex()); if (!dex_compilation_unit_->IsStatic()) { // Add the implicit 'this' argument, not expressed in the signature. @@ -578,7 +567,7 @@ void HInstructionBuilder::InitializeParameters() { referrer_method_id.class_idx_, parameter_index++, DataType::Type::kReference, - /* is_this */ true); + /* is_this= */ true); AppendInstruction(parameter); UpdateLocal(locals_index++, parameter); number_of_parameters--; @@ -587,15 +576,15 @@ void HInstructionBuilder::InitializeParameters() { DCHECK(current_this_parameter_ == nullptr); } - const DexFile::ProtoId& proto = dex_file_->GetMethodPrototype(referrer_method_id); - const DexFile::TypeList* arg_types = dex_file_->GetProtoParameters(proto); + const dex::ProtoId& proto = dex_file_->GetMethodPrototype(referrer_method_id); + const dex::TypeList* arg_types = dex_file_->GetProtoParameters(proto); for (int i = 0, shorty_pos = 1; i < number_of_parameters; i++) { HParameterValue* parameter = new (allocator_) HParameterValue( *dex_file_, arg_types->GetTypeItem(shorty_pos - 1).type_idx_, parameter_index++, DataType::FromShorty(shorty[shorty_pos]), - /* is_this */ false); + /* is_this= */ false); ++shorty_pos; AppendInstruction(parameter); // Store the parameter value in the local that the dex code will use @@ -720,20 +709,18 @@ void HInstructionBuilder::Binop_22b(const Instruction& instruction, bool reverse // Does the method being compiled need any constructor barriers being inserted? // (Always 'false' for methods that aren't <init>.) -static bool RequiresConstructorBarrier(const DexCompilationUnit* cu, CompilerDriver* driver) { +static bool RequiresConstructorBarrier(const DexCompilationUnit* cu) { // Can be null in unit tests only. if (UNLIKELY(cu == nullptr)) { return false; } - Thread* self = Thread::Current(); - return cu->IsConstructor() - && !cu->IsStatic() - // RequiresConstructorBarrier must only be queried for <init> methods; - // it's effectively "false" for every other method. - // - // See CompilerDriver::RequiresConstructBarrier for more explanation. - && driver->RequiresConstructorBarrier(self, cu->GetDexFile(), cu->GetClassDefIndex()); + // Constructor barriers are applicable only for <init> methods. + if (LIKELY(!cu->IsConstructor() || cu->IsStatic())) { + return false; + } + + return cu->RequiresConstructorBarrier(); } // Returns true if `block` has only one successor which starts at the next @@ -779,7 +766,7 @@ void HInstructionBuilder::BuildReturn(const Instruction& instruction, // Only <init> (which is a return-void) could possibly have a constructor fence. // This may insert additional redundant constructor fences from the super constructors. // TODO: remove redundant constructor fences (b/36656456). - if (RequiresConstructorBarrier(dex_compilation_unit_, compiler_driver_)) { + if (RequiresConstructorBarrier(dex_compilation_unit_)) { // Compiling instance constructor. DCHECK_STREQ("<init>", graph_->GetMethodName()); @@ -793,7 +780,7 @@ void HInstructionBuilder::BuildReturn(const Instruction& instruction, } AppendInstruction(new (allocator_) HReturnVoid(dex_pc)); } else { - DCHECK(!RequiresConstructorBarrier(dex_compilation_unit_, compiler_driver_)); + DCHECK(!RequiresConstructorBarrier(dex_compilation_unit_)); HInstruction* value = LoadLocal(instruction.VRegA(), type); AppendInstruction(new (allocator_) HReturn(value, dex_pc)); } @@ -860,7 +847,7 @@ ArtMethod* HInstructionBuilder::ResolveMethod(uint16_t method_idx, InvokeType in // make this an invoke-unresolved to handle cross-dex invokes or abstract super methods, both of // which require runtime handling. if (invoke_type == kSuper) { - ObjPtr<mirror::Class> compiling_class = GetCompilingClass(); + ObjPtr<mirror::Class> compiling_class = dex_compilation_unit_->GetCompilingClass().Get(); if (compiling_class == nullptr) { // We could not determine the method's class we need to wait until runtime. DCHECK(Runtime::Current()->IsAotCompiler()); @@ -890,8 +877,8 @@ ArtMethod* HInstructionBuilder::ResolveMethod(uint16_t method_idx, InvokeType in // The back-end code generator relies on this check in order to ensure that it will not // attempt to read the dex_cache with a dex_method_index that is not from the correct // dex_file. If we didn't do this check then the dex_method_index will not be updated in the - // builder, which means that the code-generator (and compiler driver during sharpening and - // inliner, maybe) might invoke an incorrect method. + // builder, which means that the code-generator (and sharpening and inliner, maybe) + // might invoke an incorrect method. // TODO: The actual method could still be referenced in the current dex file, so we // could try locating it. // TODO: Remove the dex_file restriction. @@ -916,16 +903,13 @@ static bool IsStringConstructor(ArtMethod* method) { bool HInstructionBuilder::BuildInvoke(const Instruction& instruction, uint32_t dex_pc, uint32_t method_idx, - uint32_t number_of_vreg_arguments, - bool is_range, - uint32_t* args, - uint32_t register_index) { + const InstructionOperands& operands) { InvokeType invoke_type = GetInvokeTypeFromOpCode(instruction.Opcode()); - const char* descriptor = dex_file_->GetMethodShorty(method_idx); - DataType::Type return_type = DataType::FromShorty(descriptor[0]); + const char* shorty = dex_file_->GetMethodShorty(method_idx); + DataType::Type return_type = DataType::FromShorty(shorty[0]); // Remove the return type from the 'proto'. - size_t number_of_arguments = strlen(descriptor) - 1; + size_t number_of_arguments = strlen(shorty) - 1; if (invoke_type != kStatic) { // instance call // One extra argument for 'this'. number_of_arguments++; @@ -942,14 +926,7 @@ bool HInstructionBuilder::BuildInvoke(const Instruction& instruction, dex_pc, method_idx, invoke_type); - return HandleInvoke(invoke, - number_of_vreg_arguments, - args, - register_index, - is_range, - descriptor, - nullptr, /* clinit_check */ - true /* is_unresolved */); + return HandleInvoke(invoke, operands, shorty, /* is_unresolved= */ true); } // Replace calls to String.<init> with StringFactory. @@ -968,20 +945,15 @@ bool HInstructionBuilder::BuildInvoke(const Instruction& instruction, HInvoke* invoke = new (allocator_) HInvokeStaticOrDirect( allocator_, number_of_arguments - 1, - DataType::Type::kReference /*return_type */, + /* return_type= */ DataType::Type::kReference, dex_pc, method_idx, - nullptr /* resolved_method */, + /* resolved_method= */ nullptr, dispatch_info, invoke_type, target_method, HInvokeStaticOrDirect::ClinitCheckRequirement::kImplicit); - return HandleStringInit(invoke, - number_of_vreg_arguments, - args, - register_index, - is_range, - descriptor); + return HandleStringInit(invoke, operands, shorty); } // Potential class initialization check, in the case of a static method call. @@ -994,8 +966,8 @@ bool HInstructionBuilder::BuildInvoke(const Instruction& instruction, = HInvokeStaticOrDirect::ClinitCheckRequirement::kImplicit; ScopedObjectAccess soa(Thread::Current()); if (invoke_type == kStatic) { - clinit_check = ProcessClinitCheckForInvoke( - dex_pc, resolved_method, &clinit_check_requirement); + clinit_check = + ProcessClinitCheckForInvoke(dex_pc, resolved_method, &clinit_check_requirement); } else if (invoke_type == kSuper) { if (IsSameDexFile(*resolved_method->GetDexFile(), *dex_compilation_unit_->GetDexFile())) { // Update the method index to the one resolved. Note that this may be a no-op if @@ -1004,11 +976,8 @@ bool HInstructionBuilder::BuildInvoke(const Instruction& instruction, } } - HInvokeStaticOrDirect::DispatchInfo dispatch_info = { - HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall, - HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod, - 0u - }; + HInvokeStaticOrDirect::DispatchInfo dispatch_info = + HSharpening::SharpenInvokeStaticOrDirect(resolved_method, code_generator_); MethodReference target_method(resolved_method->GetDexFile(), resolved_method->GetDexMethodIndex()); invoke = new (allocator_) HInvokeStaticOrDirect(allocator_, @@ -1041,42 +1010,39 @@ bool HInstructionBuilder::BuildInvoke(const Instruction& instruction, resolved_method, ImTable::GetImtIndex(resolved_method)); } - - return HandleInvoke(invoke, - number_of_vreg_arguments, - args, - register_index, - is_range, - descriptor, - clinit_check, - false /* is_unresolved */); + return HandleInvoke(invoke, operands, shorty, /* is_unresolved= */ false, clinit_check); } -bool HInstructionBuilder::BuildInvokePolymorphic(const Instruction& instruction ATTRIBUTE_UNUSED, - uint32_t dex_pc, +bool HInstructionBuilder::BuildInvokePolymorphic(uint32_t dex_pc, uint32_t method_idx, - uint32_t proto_idx, - uint32_t number_of_vreg_arguments, - bool is_range, - uint32_t* args, - uint32_t register_index) { - const char* descriptor = dex_file_->GetShorty(proto_idx); - DCHECK_EQ(1 + ArtMethod::NumArgRegisters(descriptor), number_of_vreg_arguments); - DataType::Type return_type = DataType::FromShorty(descriptor[0]); - size_t number_of_arguments = strlen(descriptor); + dex::ProtoIndex proto_idx, + const InstructionOperands& operands) { + const char* shorty = dex_file_->GetShorty(proto_idx); + DCHECK_EQ(1 + ArtMethod::NumArgRegisters(shorty), operands.GetNumberOfOperands()); + DataType::Type return_type = DataType::FromShorty(shorty[0]); + size_t number_of_arguments = strlen(shorty); HInvoke* invoke = new (allocator_) HInvokePolymorphic(allocator_, number_of_arguments, return_type, dex_pc, method_idx); - return HandleInvoke(invoke, - number_of_vreg_arguments, - args, - register_index, - is_range, - descriptor, - nullptr /* clinit_check */, - false /* is_unresolved */); + return HandleInvoke(invoke, operands, shorty, /* is_unresolved= */ false); +} + + +bool HInstructionBuilder::BuildInvokeCustom(uint32_t dex_pc, + uint32_t call_site_idx, + const InstructionOperands& operands) { + dex::ProtoIndex proto_idx = dex_file_->GetProtoIndexForCallSite(call_site_idx); + const char* shorty = dex_file_->GetShorty(proto_idx); + DataType::Type return_type = DataType::FromShorty(shorty[0]); + size_t number_of_arguments = strlen(shorty) - 1; + HInvoke* invoke = new (allocator_) HInvokeCustom(allocator_, + number_of_arguments, + call_site_idx, + return_type, + dex_pc); + return HandleInvoke(invoke, operands, shorty, /* is_unresolved= */ false); } HNewInstance* HInstructionBuilder::BuildNewInstance(dex::TypeIndex type_index, uint32_t dex_pc) { @@ -1099,6 +1065,10 @@ HNewInstance* HInstructionBuilder::BuildNewInstance(dex::TypeIndex type_index, u if (load_class->NeedsAccessCheck() || klass->IsFinalizable() || !klass->IsInstantiable()) { entrypoint = kQuickAllocObjectWithChecks; } + // We will always be able to resolve the string class since it is in the BCP. + if (!klass.IsNull() && klass->IsStringClass()) { + entrypoint = kQuickAllocStringObject; + } // Consider classes we haven't resolved as potentially finalizable. bool finalizable = (klass == nullptr) || klass->IsFinalizable(); @@ -1167,30 +1137,219 @@ void HInstructionBuilder::BuildConstructorFenceForAllocation(HInstruction* alloc MethodCompilationStat::kConstructorFenceGeneratedNew); } +static bool IsInBootImage(ObjPtr<mirror::Class> cls, const CompilerOptions& compiler_options) + REQUIRES_SHARED(Locks::mutator_lock_) { + if (compiler_options.IsBootImage()) { + std::string temp; + const char* descriptor = cls->GetDescriptor(&temp); + return compiler_options.IsImageClass(descriptor); + } else { + return Runtime::Current()->GetHeap()->FindSpaceFromObject(cls, false)->IsImageSpace(); + } +} + static bool IsSubClass(ObjPtr<mirror::Class> to_test, ObjPtr<mirror::Class> super_class) REQUIRES_SHARED(Locks::mutator_lock_) { return to_test != nullptr && !to_test->IsInterface() && to_test->IsSubClass(super_class); } +static bool HasTrivialClinit(ObjPtr<mirror::Class> klass, PointerSize pointer_size) + REQUIRES_SHARED(Locks::mutator_lock_) { + // Check if the class has encoded fields that trigger bytecode execution. + // (Encoded fields are just a different representation of <clinit>.) + if (klass->NumStaticFields() != 0u) { + DCHECK(klass->GetClassDef() != nullptr); + EncodedStaticFieldValueIterator it(klass->GetDexFile(), *klass->GetClassDef()); + for (; it.HasNext(); it.Next()) { + switch (it.GetValueType()) { + case EncodedArrayValueIterator::ValueType::kBoolean: + case EncodedArrayValueIterator::ValueType::kByte: + case EncodedArrayValueIterator::ValueType::kShort: + case EncodedArrayValueIterator::ValueType::kChar: + case EncodedArrayValueIterator::ValueType::kInt: + case EncodedArrayValueIterator::ValueType::kLong: + case EncodedArrayValueIterator::ValueType::kFloat: + case EncodedArrayValueIterator::ValueType::kDouble: + case EncodedArrayValueIterator::ValueType::kNull: + case EncodedArrayValueIterator::ValueType::kString: + // Primitive, null or j.l.String initialization is permitted. + break; + case EncodedArrayValueIterator::ValueType::kType: + // Type initialization can load classes and execute bytecode through a class loader + // which can execute arbitrary bytecode. We do not optimize for known class loaders; + // kType is rarely used (if ever). + return false; + default: + // Other types in the encoded static field list are rejected by the DexFileVerifier. + LOG(FATAL) << "Unexpected type " << it.GetValueType(); + UNREACHABLE(); + } + } + } + // Check if the class has <clinit> that executes arbitrary code. + // Initialization of static fields of the class itself with constants is allowed. + ArtMethod* clinit = klass->FindClassInitializer(pointer_size); + if (clinit != nullptr) { + const DexFile& dex_file = *clinit->GetDexFile(); + CodeItemInstructionAccessor accessor(dex_file, clinit->GetCodeItem()); + for (DexInstructionPcPair it : accessor) { + switch (it->Opcode()) { + case Instruction::CONST_4: + case Instruction::CONST_16: + case Instruction::CONST: + case Instruction::CONST_HIGH16: + case Instruction::CONST_WIDE_16: + case Instruction::CONST_WIDE_32: + case Instruction::CONST_WIDE: + case Instruction::CONST_WIDE_HIGH16: + case Instruction::CONST_STRING: + case Instruction::CONST_STRING_JUMBO: + // Primitive, null or j.l.String initialization is permitted. + break; + case Instruction::RETURN_VOID: + case Instruction::RETURN_VOID_NO_BARRIER: + break; + case Instruction::SPUT: + case Instruction::SPUT_WIDE: + case Instruction::SPUT_OBJECT: + case Instruction::SPUT_BOOLEAN: + case Instruction::SPUT_BYTE: + case Instruction::SPUT_CHAR: + case Instruction::SPUT_SHORT: + // Only initialization of a static field of the same class is permitted. + if (dex_file.GetFieldId(it->VRegB_21c()).class_idx_ != klass->GetDexTypeIndex()) { + return false; + } + break; + case Instruction::NEW_ARRAY: + // Only primitive arrays are permitted. + if (Primitive::GetType(dex_file.GetTypeDescriptor(dex_file.GetTypeId( + dex::TypeIndex(it->VRegC_22c())))[1]) == Primitive::kPrimNot) { + return false; + } + break; + case Instruction::APUT: + case Instruction::APUT_WIDE: + case Instruction::APUT_BOOLEAN: + case Instruction::APUT_BYTE: + case Instruction::APUT_CHAR: + case Instruction::APUT_SHORT: + case Instruction::FILL_ARRAY_DATA: + case Instruction::NOP: + // Allow initialization of primitive arrays (only constants can be stored). + // Note: We expect NOPs used for fill-array-data-payload but accept all NOPs + // (even unreferenced switch payloads if they make it through the verifier). + break; + default: + return false; + } + } + } + return true; +} + +static bool HasTrivialInitialization(ObjPtr<mirror::Class> cls, + const CompilerOptions& compiler_options) + REQUIRES_SHARED(Locks::mutator_lock_) { + Runtime* runtime = Runtime::Current(); + PointerSize pointer_size = runtime->GetClassLinker()->GetImagePointerSize(); + + // Check the superclass chain. + for (ObjPtr<mirror::Class> klass = cls; klass != nullptr; klass = klass->GetSuperClass()) { + if (klass->IsInitialized() && IsInBootImage(klass, compiler_options)) { + break; // `klass` and its superclasses are already initialized in the boot image. + } + if (!HasTrivialClinit(klass, pointer_size)) { + return false; + } + } + + // Also check interfaces with default methods as they need to be initialized as well. + ObjPtr<mirror::IfTable> iftable = cls->GetIfTable(); + DCHECK(iftable != nullptr); + for (int32_t i = 0, count = iftable->Count(); i != count; ++i) { + ObjPtr<mirror::Class> iface = iftable->GetInterface(i); + if (!iface->HasDefaultMethods()) { + continue; // Initializing `cls` does not initialize this interface. + } + if (iface->IsInitialized() && IsInBootImage(iface, compiler_options)) { + continue; // This interface is already initialized in the boot image. + } + if (!HasTrivialClinit(iface, pointer_size)) { + return false; + } + } + return true; +} + bool HInstructionBuilder::IsInitialized(Handle<mirror::Class> cls) const { if (cls == nullptr) { return false; } - // `CanAssumeClassIsLoaded` will return true if we're JITting, or will - // check whether the class is in an image for the AOT compilation. - if (cls->IsInitialized() && - compiler_driver_->CanAssumeClassIsLoaded(cls.Get())) { - return true; - } - - if (IsSubClass(GetOutermostCompilingClass(), cls.Get())) { + // Check if the class will be initialized at runtime. + if (cls->IsInitialized()) { + Runtime* runtime = Runtime::Current(); + if (!runtime->IsAotCompiler()) { + DCHECK(runtime->UseJitCompilation()); + // For JIT, the class cannot revert to an uninitialized state. + return true; + } + // Assume loaded only if klass is in the boot image. App classes cannot be assumed + // loaded because we don't even know what class loader will be used to load them. + if (IsInBootImage(cls.Get(), code_generator_->GetCompilerOptions())) { + return true; + } + } + + // We can avoid the class initialization check for `cls` in static methods and constructors + // in the very same class; invoking a static method involves a class initialization check + // and so does the instance allocation that must be executed before invoking a constructor. + // Other instance methods of the same class can run on an escaped instance + // of an erroneous class. Even a superclass may need to be checked as the subclass + // can be completely initialized while the superclass is initializing and the subclass + // remains initialized when the superclass initializer throws afterwards. b/62478025 + // Note: The HClinitCheck+HInvokeStaticOrDirect merging can still apply. + auto is_static_method_or_constructor_of_cls = [cls](const DexCompilationUnit& compilation_unit) + REQUIRES_SHARED(Locks::mutator_lock_) { + return (compilation_unit.GetAccessFlags() & (kAccStatic | kAccConstructor)) != 0u && + compilation_unit.GetCompilingClass().Get() == cls.Get(); + }; + if (is_static_method_or_constructor_of_cls(*outer_compilation_unit_) || + // Check also the innermost method. Though excessive copies of ClinitCheck can be + // eliminated by GVN, that happens only after the decision whether to inline the + // graph or not and that may depend on the presence of the ClinitCheck. + // TODO: We should walk over the entire inlined method chain, but we don't pass that + // information to the builder. + is_static_method_or_constructor_of_cls(*dex_compilation_unit_)) { return true; } - // TODO: We should walk over the inlined methods, but we don't pass - // that information to the builder. - if (IsSubClass(GetCompilingClass(), cls.Get())) { + // Otherwise, we may be able to avoid the check if `cls` is a superclass of a method being + // compiled here (anywhere in the inlining chain) as the `cls` must have started initializing + // before calling any `cls` or subclass methods. Static methods require a clinit check and + // instance methods require an instance which cannot be created before doing a clinit check. + // When a subclass of `cls` starts initializing, it starts initializing its superclass + // chain up to `cls` without running any bytecode, i.e. without any opportunity for circular + // initialization weirdness. + // + // If the initialization of `cls` is trivial (`cls` and its superclasses and superinterfaces + // with default methods initialize only their own static fields using constant values), it must + // complete, either successfully or by throwing and marking `cls` erroneous, without allocating + // any instances of `cls` or subclasses (or any other class) and without calling any methods. + // If it completes by throwing, no instances of `cls` shall be created and no subclass method + // bytecode shall execute (see above), therefore the instruction we're building shall be + // unreachable. By reaching the instruction, we know that `cls` was initialized successfully. + // + // TODO: We should walk over the entire inlined methods chain, but we don't pass that + // information to the builder. (We could also check if we're guaranteed a non-null instance + // of `cls` at this location but that's outside the scope of the instruction builder.) + bool is_subclass = IsSubClass(outer_compilation_unit_->GetCompilingClass().Get(), cls.Get()); + if (dex_compilation_unit_ != outer_compilation_unit_) { + is_subclass = is_subclass || + IsSubClass(dex_compilation_unit_->GetCompilingClass().Get(), cls.Get()); + } + if (is_subclass && HasTrivialInitialization(cls.Get(), code_generator_->GetCompilerOptions())) { return true; } @@ -1198,9 +1357,9 @@ bool HInstructionBuilder::IsInitialized(Handle<mirror::Class> cls) const { } HClinitCheck* HInstructionBuilder::ProcessClinitCheckForInvoke( - uint32_t dex_pc, - ArtMethod* resolved_method, - HInvokeStaticOrDirect::ClinitCheckRequirement* clinit_check_requirement) { + uint32_t dex_pc, + ArtMethod* resolved_method, + HInvokeStaticOrDirect::ClinitCheckRequirement* clinit_check_requirement) { Handle<mirror::Class> klass = handles_->NewHandle(resolved_method->GetDeclaringClass()); HClinitCheck* clinit_check = nullptr; @@ -1211,7 +1370,7 @@ HClinitCheck* HInstructionBuilder::ProcessClinitCheckForInvoke( klass->GetDexFile(), klass, dex_pc, - /* needs_access_check */ false); + /* needs_access_check= */ false); if (cls != nullptr) { *clinit_check_requirement = HInvokeStaticOrDirect::ClinitCheckRequirement::kExplicit; clinit_check = new (allocator_) HClinitCheck(cls, dex_pc); @@ -1222,26 +1381,22 @@ HClinitCheck* HInstructionBuilder::ProcessClinitCheckForInvoke( } bool HInstructionBuilder::SetupInvokeArguments(HInvoke* invoke, - uint32_t number_of_vreg_arguments, - uint32_t* args, - uint32_t register_index, - bool is_range, - const char* descriptor, + const InstructionOperands& operands, + const char* shorty, size_t start_index, size_t* argument_index) { - uint32_t descriptor_index = 1; // Skip the return type. - + uint32_t shorty_index = 1; // Skip the return type. + const size_t number_of_operands = operands.GetNumberOfOperands(); for (size_t i = start_index; // Make sure we don't go over the expected arguments or over the number of // dex registers given. If the instruction was seen as dead by the verifier, // it hasn't been properly checked. - (i < number_of_vreg_arguments) && (*argument_index < invoke->GetNumberOfArguments()); + (i < number_of_operands) && (*argument_index < invoke->GetNumberOfArguments()); i++, (*argument_index)++) { - DataType::Type type = DataType::FromShorty(descriptor[descriptor_index++]); + DataType::Type type = DataType::FromShorty(shorty[shorty_index++]); bool is_wide = (type == DataType::Type::kInt64) || (type == DataType::Type::kFloat64); - if (!is_range - && is_wide - && ((i + 1 == number_of_vreg_arguments) || (args[i] + 1 != args[i + 1]))) { + if (is_wide && ((i + 1 == number_of_operands) || + (operands.GetOperand(i) + 1 != operands.GetOperand(i + 1)))) { // Longs and doubles should be in pairs, that is, sequential registers. The verifier should // reject any class where this is violated. However, the verifier only does these checks // on non trivially dead instructions, so we just bailout the compilation. @@ -1252,7 +1407,7 @@ bool HInstructionBuilder::SetupInvokeArguments(HInvoke* invoke, MethodCompilationStat::kNotCompiledMalformedOpcode); return false; } - HInstruction* arg = LoadLocal(is_range ? register_index + i : args[i], type); + HInstruction* arg = LoadLocal(operands.GetOperand(i), type); invoke->SetArgumentAt(*argument_index, arg); if (is_wide) { i++; @@ -1279,19 +1434,16 @@ bool HInstructionBuilder::SetupInvokeArguments(HInvoke* invoke, } bool HInstructionBuilder::HandleInvoke(HInvoke* invoke, - uint32_t number_of_vreg_arguments, - uint32_t* args, - uint32_t register_index, - bool is_range, - const char* descriptor, - HClinitCheck* clinit_check, - bool is_unresolved) { + const InstructionOperands& operands, + const char* shorty, + bool is_unresolved, + HClinitCheck* clinit_check) { DCHECK(!invoke->IsInvokeStaticOrDirect() || !invoke->AsInvokeStaticOrDirect()->IsStringInit()); size_t start_index = 0; size_t argument_index = 0; if (invoke->GetInvokeType() != InvokeType::kStatic) { // Instance call. - uint32_t obj_reg = is_range ? register_index : args[0]; + uint32_t obj_reg = operands.GetOperand(0); HInstruction* arg = is_unresolved ? LoadLocal(obj_reg, DataType::Type::kReference) : LoadNullCheckedLocal(obj_reg, invoke->GetDexPc()); @@ -1300,14 +1452,7 @@ bool HInstructionBuilder::HandleInvoke(HInvoke* invoke, argument_index = 1; } - if (!SetupInvokeArguments(invoke, - number_of_vreg_arguments, - args, - register_index, - is_range, - descriptor, - start_index, - &argument_index)) { + if (!SetupInvokeArguments(invoke, operands, shorty, start_index, &argument_index)) { return false; } @@ -1327,24 +1472,14 @@ bool HInstructionBuilder::HandleInvoke(HInvoke* invoke, } bool HInstructionBuilder::HandleStringInit(HInvoke* invoke, - uint32_t number_of_vreg_arguments, - uint32_t* args, - uint32_t register_index, - bool is_range, - const char* descriptor) { + const InstructionOperands& operands, + const char* shorty) { DCHECK(invoke->IsInvokeStaticOrDirect()); DCHECK(invoke->AsInvokeStaticOrDirect()->IsStringInit()); size_t start_index = 1; size_t argument_index = 0; - if (!SetupInvokeArguments(invoke, - number_of_vreg_arguments, - args, - register_index, - is_range, - descriptor, - start_index, - &argument_index)) { + if (!SetupInvokeArguments(invoke, operands, shorty, start_index, &argument_index)) { return false; } @@ -1352,31 +1487,35 @@ bool HInstructionBuilder::HandleStringInit(HInvoke* invoke, // This is a StringFactory call, not an actual String constructor. Its result // replaces the empty String pre-allocated by NewInstance. - uint32_t orig_this_reg = is_range ? register_index : args[0]; + uint32_t orig_this_reg = operands.GetOperand(0); HInstruction* arg_this = LoadLocal(orig_this_reg, DataType::Type::kReference); // Replacing the NewInstance might render it redundant. Keep a list of these - // to be visited once it is clear whether it is has remaining uses. + // to be visited once it is clear whether it has remaining uses. if (arg_this->IsNewInstance()) { ssa_builder_->AddUninitializedString(arg_this->AsNewInstance()); } else { DCHECK(arg_this->IsPhi()); - // NewInstance is not the direct input of the StringFactory call. It might - // be redundant but optimizing this case is not worth the effort. + // We can get a phi as input of a String.<init> if there is a loop between the + // allocation and the String.<init> call. As we don't know which other phis might alias + // with `arg_this`, we keep a record of those invocations so we can later replace + // the allocation with the invocation. + // Add the actual 'this' input so the analysis knows what is the allocation instruction. + // The input will be removed during the analysis. + invoke->AddInput(arg_this); + ssa_builder_->AddUninitializedStringPhi(invoke); } - // Walk over all vregs and replace any occurrence of `arg_this` with `invoke`. for (size_t vreg = 0, e = current_locals_->size(); vreg < e; ++vreg) { if ((*current_locals_)[vreg] == arg_this) { (*current_locals_)[vreg] = invoke; } } - return true; } static DataType::Type GetFieldAccessType(const DexFile& dex_file, uint16_t field_index) { - const DexFile::FieldId& field_id = dex_file.GetFieldId(field_index); + const dex::FieldId& field_id = dex_file.GetFieldId(field_index); const char* type = dex_file.GetFieldTypeDescriptor(field_id); return DataType::FromShorty(type[0]); } @@ -1400,7 +1539,7 @@ bool HInstructionBuilder::BuildInstanceFieldAccess(const Instruction& instructio } ScopedObjectAccess soa(Thread::Current()); - ArtField* resolved_field = ResolveField(field_index, /* is_static */ false, is_put); + ArtField* resolved_field = ResolveField(field_index, /* is_static= */ false, is_put); // Generate an explicit null check on the reference, unless the field access // is unresolved. In that case, we rely on the runtime to perform various @@ -1463,41 +1602,6 @@ bool HInstructionBuilder::BuildInstanceFieldAccess(const Instruction& instructio return true; } -static ObjPtr<mirror::Class> GetClassFrom(CompilerDriver* driver, - const DexCompilationUnit& compilation_unit) { - ScopedObjectAccess soa(Thread::Current()); - Handle<mirror::ClassLoader> class_loader = compilation_unit.GetClassLoader(); - Handle<mirror::DexCache> dex_cache = compilation_unit.GetDexCache(); - - return driver->ResolveCompilingMethodsClass(soa, dex_cache, class_loader, &compilation_unit); -} - -ObjPtr<mirror::Class> HInstructionBuilder::GetOutermostCompilingClass() const { - return GetClassFrom(compiler_driver_, *outer_compilation_unit_); -} - -ObjPtr<mirror::Class> HInstructionBuilder::GetCompilingClass() const { - return GetClassFrom(compiler_driver_, *dex_compilation_unit_); -} - -bool HInstructionBuilder::IsOutermostCompilingClass(dex::TypeIndex type_index) const { - ScopedObjectAccess soa(Thread::Current()); - StackHandleScope<2> hs(soa.Self()); - Handle<mirror::DexCache> dex_cache = dex_compilation_unit_->GetDexCache(); - Handle<mirror::ClassLoader> class_loader = dex_compilation_unit_->GetClassLoader(); - Handle<mirror::Class> cls(hs.NewHandle(compiler_driver_->ResolveClass( - soa, dex_cache, class_loader, type_index, dex_compilation_unit_))); - Handle<mirror::Class> outer_class(hs.NewHandle(GetOutermostCompilingClass())); - - // GetOutermostCompilingClass returns null when the class is unresolved - // (e.g. if it derives from an unresolved class). This is bogus knowing that - // we are compiling it. - // When this happens we cannot establish a direct relation between the current - // class and the outer class, so we return false. - // (Note that this is only used for optimizing invokes and field accesses) - return (cls != nullptr) && (outer_class.Get() == cls.Get()); -} - void HInstructionBuilder::BuildUnresolvedStaticFieldAccess(const Instruction& instruction, uint32_t dex_pc, bool is_put, @@ -1517,18 +1621,17 @@ void HInstructionBuilder::BuildUnresolvedStaticFieldAccess(const Instruction& in ArtField* HInstructionBuilder::ResolveField(uint16_t field_idx, bool is_static, bool is_put) { ScopedObjectAccess soa(Thread::Current()); - StackHandleScope<2> hs(soa.Self()); ClassLinker* class_linker = dex_compilation_unit_->GetClassLinker(); Handle<mirror::ClassLoader> class_loader = dex_compilation_unit_->GetClassLoader(); - Handle<mirror::Class> compiling_class(hs.NewHandle(GetCompilingClass())); ArtField* resolved_field = class_linker->ResolveField(field_idx, dex_compilation_unit_->GetDexCache(), class_loader, is_static); + DCHECK_EQ(resolved_field == nullptr, soa.Self()->IsExceptionPending()); if (UNLIKELY(resolved_field == nullptr)) { - // Clean up any exception left by type resolution. + // Clean up any exception left by field resolution. soa.Self()->ClearException(); return nullptr; } @@ -1540,6 +1643,7 @@ ArtField* HInstructionBuilder::ResolveField(uint16_t field_idx, bool is_static, } // Check access. + Handle<mirror::Class> compiling_class = dex_compilation_unit_->GetCompilingClass(); if (compiling_class == nullptr) { if (!resolved_field->IsPublic()) { return nullptr; @@ -1569,7 +1673,7 @@ void HInstructionBuilder::BuildStaticFieldAccess(const Instruction& instruction, uint16_t field_index = instruction.VRegB_21c(); ScopedObjectAccess soa(Thread::Current()); - ArtField* resolved_field = ResolveField(field_index, /* is_static */ true, is_put); + ArtField* resolved_field = ResolveField(field_index, /* is_static= */ true, is_put); if (resolved_field == nullptr) { MaybeRecordStat(compilation_stats_, @@ -1586,7 +1690,7 @@ void HInstructionBuilder::BuildStaticFieldAccess(const Instruction& instruction, klass->GetDexFile(), klass, dex_pc, - /* needs_access_check */ false); + /* needs_access_check= */ false); if (constant == nullptr) { // The class cannot be referenced from this compiled code. Generate @@ -1697,17 +1801,27 @@ void HInstructionBuilder::BuildArrayAccess(const Instruction& instruction, graph_->SetHasBoundsChecks(true); } +HNewArray* HInstructionBuilder::BuildNewArray(uint32_t dex_pc, + dex::TypeIndex type_index, + HInstruction* length) { + HLoadClass* cls = BuildLoadClass(type_index, dex_pc); + + const char* descriptor = dex_file_->GetTypeDescriptor(dex_file_->GetTypeId(type_index)); + DCHECK_EQ(descriptor[0], '['); + size_t component_type_shift = Primitive::ComponentSizeShift(Primitive::GetType(descriptor[1])); + + HNewArray* new_array = new (allocator_) HNewArray(cls, length, dex_pc, component_type_shift); + AppendInstruction(new_array); + return new_array; +} + HNewArray* HInstructionBuilder::BuildFilledNewArray(uint32_t dex_pc, dex::TypeIndex type_index, - uint32_t number_of_vreg_arguments, - bool is_range, - uint32_t* args, - uint32_t register_index) { - HInstruction* length = graph_->GetIntConstant(number_of_vreg_arguments, dex_pc); - HLoadClass* cls = BuildLoadClass(type_index, dex_pc); - HNewArray* const object = new (allocator_) HNewArray(cls, length, dex_pc); - AppendInstruction(object); + const InstructionOperands& operands) { + const size_t number_of_operands = operands.GetNumberOfOperands(); + HInstruction* length = graph_->GetIntConstant(number_of_operands, dex_pc); + HNewArray* new_array = BuildNewArray(dex_pc, type_index, length); const char* descriptor = dex_file_->StringByTypeIdx(type_index); DCHECK_EQ(descriptor[0], '[') << descriptor; char primitive = descriptor[1]; @@ -1717,16 +1831,16 @@ HNewArray* HInstructionBuilder::BuildFilledNewArray(uint32_t dex_pc, bool is_reference_array = (primitive == 'L') || (primitive == '['); DataType::Type type = is_reference_array ? DataType::Type::kReference : DataType::Type::kInt32; - for (size_t i = 0; i < number_of_vreg_arguments; ++i) { - HInstruction* value = LoadLocal(is_range ? register_index + i : args[i], type); + for (size_t i = 0; i < number_of_operands; ++i) { + HInstruction* value = LoadLocal(operands.GetOperand(i), type); HInstruction* index = graph_->GetIntConstant(i, dex_pc); - HArraySet* aset = new (allocator_) HArraySet(object, index, value, type, dex_pc); + HArraySet* aset = new (allocator_) HArraySet(new_array, index, value, type, dex_pc); ssa_builder_->MaybeAddAmbiguousArraySet(aset); AppendInstruction(aset); } - latest_result_ = object; + latest_result_ = new_array; - return object; + return new_array; } template <typename T> @@ -1815,35 +1929,11 @@ void HInstructionBuilder::BuildFillWideArrayData(HInstruction* object, } } -static TypeCheckKind ComputeTypeCheckKind(Handle<mirror::Class> cls) - REQUIRES_SHARED(Locks::mutator_lock_) { - if (cls == nullptr) { - return TypeCheckKind::kUnresolvedCheck; - } else if (cls->IsInterface()) { - return TypeCheckKind::kInterfaceCheck; - } else if (cls->IsArrayClass()) { - if (cls->GetComponentType()->IsObjectClass()) { - return TypeCheckKind::kArrayObjectCheck; - } else if (cls->CannotBeAssignedFromOtherTypes()) { - return TypeCheckKind::kExactCheck; - } else { - return TypeCheckKind::kArrayCheck; - } - } else if (cls->IsFinal()) { - return TypeCheckKind::kExactCheck; - } else if (cls->IsAbstract()) { - return TypeCheckKind::kAbstractClassCheck; - } else { - return TypeCheckKind::kClassHierarchyCheck; - } -} - void HInstructionBuilder::BuildLoadString(dex::StringIndex string_index, uint32_t dex_pc) { HLoadString* load_string = new (allocator_) HLoadString(graph_->GetCurrentMethod(), string_index, *dex_file_, dex_pc); HSharpening::ProcessLoadString(load_string, code_generator_, - compiler_driver_, *dex_compilation_unit_, handles_); AppendInstruction(load_string); @@ -1852,22 +1942,8 @@ void HInstructionBuilder::BuildLoadString(dex::StringIndex string_index, uint32_ HLoadClass* HInstructionBuilder::BuildLoadClass(dex::TypeIndex type_index, uint32_t dex_pc) { ScopedObjectAccess soa(Thread::Current()); const DexFile& dex_file = *dex_compilation_unit_->GetDexFile(); - Handle<mirror::ClassLoader> class_loader = dex_compilation_unit_->GetClassLoader(); - Handle<mirror::Class> klass = handles_->NewHandle(compiler_driver_->ResolveClass( - soa, dex_compilation_unit_->GetDexCache(), class_loader, type_index, dex_compilation_unit_)); - - bool needs_access_check = true; - if (klass != nullptr) { - if (klass->IsPublic()) { - needs_access_check = false; - } else { - ObjPtr<mirror::Class> compiling_class = GetCompilingClass(); - if (compiling_class != nullptr && compiling_class->CanAccess(klass.Get())) { - needs_access_check = false; - } - } - } - + Handle<mirror::Class> klass = ResolveClass(soa, type_index); + bool needs_access_check = LoadClassNeedsAccessCheck(klass); return BuildLoadClass(type_index, dex_file, klass, dex_pc, needs_access_check); } @@ -1888,18 +1964,19 @@ HLoadClass* HInstructionBuilder::BuildLoadClass(dex::TypeIndex type_index, } // Note: `klass` must be from `handles_`. + bool is_referrers_class = + (klass != nullptr) && (outer_compilation_unit_->GetCompilingClass().Get() == klass.Get()); HLoadClass* load_class = new (allocator_) HLoadClass( graph_->GetCurrentMethod(), type_index, *actual_dex_file, klass, - klass != nullptr && (klass.Get() == GetOutermostCompilingClass()), + is_referrers_class, dex_pc, needs_access_check); HLoadClass::LoadKind load_kind = HSharpening::ComputeLoadClassKind(load_class, code_generator_, - compiler_driver_, *dex_compilation_unit_); if (load_kind == HLoadClass::LoadKind::kInvalid) { @@ -1912,35 +1989,109 @@ HLoadClass* HInstructionBuilder::BuildLoadClass(dex::TypeIndex type_index, return load_class; } +Handle<mirror::Class> HInstructionBuilder::ResolveClass(ScopedObjectAccess& soa, + dex::TypeIndex type_index) { + auto it = class_cache_.find(type_index); + if (it != class_cache_.end()) { + return it->second; + } + + ObjPtr<mirror::Class> klass = dex_compilation_unit_->GetClassLinker()->ResolveType( + type_index, dex_compilation_unit_->GetDexCache(), dex_compilation_unit_->GetClassLoader()); + DCHECK_EQ(klass == nullptr, soa.Self()->IsExceptionPending()); + soa.Self()->ClearException(); // Clean up the exception left by type resolution if any. + + Handle<mirror::Class> h_klass = handles_->NewHandle(klass); + class_cache_.Put(type_index, h_klass); + return h_klass; +} + +bool HInstructionBuilder::LoadClassNeedsAccessCheck(Handle<mirror::Class> klass) { + if (klass == nullptr) { + return true; + } else if (klass->IsPublic()) { + return false; + } else { + ObjPtr<mirror::Class> compiling_class = dex_compilation_unit_->GetCompilingClass().Get(); + return compiling_class == nullptr || !compiling_class->CanAccess(klass.Get()); + } +} + +void HInstructionBuilder::BuildLoadMethodHandle(uint16_t method_handle_index, uint32_t dex_pc) { + const DexFile& dex_file = *dex_compilation_unit_->GetDexFile(); + HLoadMethodHandle* load_method_handle = new (allocator_) HLoadMethodHandle( + graph_->GetCurrentMethod(), method_handle_index, dex_file, dex_pc); + AppendInstruction(load_method_handle); +} + +void HInstructionBuilder::BuildLoadMethodType(dex::ProtoIndex proto_index, uint32_t dex_pc) { + const DexFile& dex_file = *dex_compilation_unit_->GetDexFile(); + HLoadMethodType* load_method_type = + new (allocator_) HLoadMethodType(graph_->GetCurrentMethod(), proto_index, dex_file, dex_pc); + AppendInstruction(load_method_type); +} + void HInstructionBuilder::BuildTypeCheck(const Instruction& instruction, uint8_t destination, uint8_t reference, dex::TypeIndex type_index, uint32_t dex_pc) { HInstruction* object = LoadLocal(reference, DataType::Type::kReference); - HLoadClass* cls = BuildLoadClass(type_index, dex_pc); ScopedObjectAccess soa(Thread::Current()); - TypeCheckKind check_kind = ComputeTypeCheckKind(cls->GetClass()); + const DexFile& dex_file = *dex_compilation_unit_->GetDexFile(); + Handle<mirror::Class> klass = ResolveClass(soa, type_index); + bool needs_access_check = LoadClassNeedsAccessCheck(klass); + TypeCheckKind check_kind = HSharpening::ComputeTypeCheckKind( + klass.Get(), code_generator_, needs_access_check); + + HInstruction* class_or_null = nullptr; + HIntConstant* bitstring_path_to_root = nullptr; + HIntConstant* bitstring_mask = nullptr; + if (check_kind == TypeCheckKind::kBitstringCheck) { + // TODO: Allow using the bitstring check also if we need an access check. + DCHECK(!needs_access_check); + class_or_null = graph_->GetNullConstant(dex_pc); + MutexLock subtype_check_lock(Thread::Current(), *Locks::subtype_check_lock_); + uint32_t path_to_root = + SubtypeCheck<ObjPtr<mirror::Class>>::GetEncodedPathToRootForTarget(klass.Get()); + uint32_t mask = SubtypeCheck<ObjPtr<mirror::Class>>::GetEncodedPathToRootMask(klass.Get()); + bitstring_path_to_root = graph_->GetIntConstant(static_cast<int32_t>(path_to_root), dex_pc); + bitstring_mask = graph_->GetIntConstant(static_cast<int32_t>(mask), dex_pc); + } else { + class_or_null = BuildLoadClass(type_index, dex_file, klass, dex_pc, needs_access_check); + } + DCHECK(class_or_null != nullptr); + if (instruction.Opcode() == Instruction::INSTANCE_OF) { - AppendInstruction(new (allocator_) HInstanceOf(object, cls, check_kind, dex_pc)); + AppendInstruction(new (allocator_) HInstanceOf(object, + class_or_null, + check_kind, + klass, + dex_pc, + allocator_, + bitstring_path_to_root, + bitstring_mask)); UpdateLocal(destination, current_block_->GetLastInstruction()); } else { DCHECK_EQ(instruction.Opcode(), Instruction::CHECK_CAST); // We emit a CheckCast followed by a BoundType. CheckCast is a statement // which may throw. If it succeeds BoundType sets the new type of `object` // for all subsequent uses. - AppendInstruction(new (allocator_) HCheckCast(object, cls, check_kind, dex_pc)); + AppendInstruction( + new (allocator_) HCheckCast(object, + class_or_null, + check_kind, + klass, + dex_pc, + allocator_, + bitstring_path_to_root, + bitstring_mask)); AppendInstruction(new (allocator_) HBoundType(object, dex_pc)); UpdateLocal(reference, current_block_->GetLastInstruction()); } } -bool HInstructionBuilder::NeedsAccessCheck(dex::TypeIndex type_index, bool* finalizable) const { - return !compiler_driver_->CanAccessInstantiableTypeWithoutChecks( - LookupReferrerClass(), LookupResolvedType(type_index, *dex_compilation_unit_), finalizable); -} - bool HInstructionBuilder::CanDecodeQuickenedInfo() const { return !quicken_info_.IsNull(); } @@ -2116,11 +2267,10 @@ bool HInstructionBuilder::ProcessDexInstruction(const Instruction& instruction, } else { method_idx = instruction.VRegB_35c(); } - uint32_t number_of_vreg_arguments = instruction.VRegA_35c(); uint32_t args[5]; - instruction.GetVarArgs(args); - if (!BuildInvoke(instruction, dex_pc, method_idx, - number_of_vreg_arguments, false, args, -1)) { + uint32_t number_of_vreg_arguments = instruction.GetVarArgs(args); + VarArgsInstructionOperands operands(args, number_of_vreg_arguments); + if (!BuildInvoke(instruction, dex_pc, method_idx, operands)) { return false; } break; @@ -2143,10 +2293,8 @@ bool HInstructionBuilder::ProcessDexInstruction(const Instruction& instruction, } else { method_idx = instruction.VRegB_3rc(); } - uint32_t number_of_vreg_arguments = instruction.VRegA_3rc(); - uint32_t register_index = instruction.VRegC(); - if (!BuildInvoke(instruction, dex_pc, method_idx, - number_of_vreg_arguments, true, nullptr, register_index)) { + RangeInstructionOperands operands(instruction.VRegC(), instruction.VRegA_3rc()); + if (!BuildInvoke(instruction, dex_pc, method_idx, operands)) { return false; } break; @@ -2154,33 +2302,32 @@ bool HInstructionBuilder::ProcessDexInstruction(const Instruction& instruction, case Instruction::INVOKE_POLYMORPHIC: { uint16_t method_idx = instruction.VRegB_45cc(); - uint16_t proto_idx = instruction.VRegH_45cc(); - uint32_t number_of_vreg_arguments = instruction.VRegA_45cc(); + dex::ProtoIndex proto_idx(instruction.VRegH_45cc()); uint32_t args[5]; - instruction.GetVarArgs(args); - return BuildInvokePolymorphic(instruction, - dex_pc, - method_idx, - proto_idx, - number_of_vreg_arguments, - false, - args, - -1); + uint32_t number_of_vreg_arguments = instruction.GetVarArgs(args); + VarArgsInstructionOperands operands(args, number_of_vreg_arguments); + return BuildInvokePolymorphic(dex_pc, method_idx, proto_idx, operands); } case Instruction::INVOKE_POLYMORPHIC_RANGE: { uint16_t method_idx = instruction.VRegB_4rcc(); - uint16_t proto_idx = instruction.VRegH_4rcc(); - uint32_t number_of_vreg_arguments = instruction.VRegA_4rcc(); - uint32_t register_index = instruction.VRegC_4rcc(); - return BuildInvokePolymorphic(instruction, - dex_pc, - method_idx, - proto_idx, - number_of_vreg_arguments, - true, - nullptr, - register_index); + dex::ProtoIndex proto_idx(instruction.VRegH_4rcc()); + RangeInstructionOperands operands(instruction.VRegC_4rcc(), instruction.VRegA_4rcc()); + return BuildInvokePolymorphic(dex_pc, method_idx, proto_idx, operands); + } + + case Instruction::INVOKE_CUSTOM: { + uint16_t call_site_idx = instruction.VRegB_35c(); + uint32_t args[5]; + uint32_t number_of_vreg_arguments = instruction.GetVarArgs(args); + VarArgsInstructionOperands operands(args, number_of_vreg_arguments); + return BuildInvokeCustom(dex_pc, call_site_idx, operands); + } + + case Instruction::INVOKE_CUSTOM_RANGE: { + uint16_t call_site_idx = instruction.VRegB_3rc(); + RangeInstructionOperands operands(instruction.VRegC_3rc(), instruction.VRegA_3rc()); + return BuildInvokeCustom(dex_pc, call_site_idx, operands); } case Instruction::NEG_INT: { @@ -2718,40 +2865,27 @@ bool HInstructionBuilder::ProcessDexInstruction(const Instruction& instruction, case Instruction::NEW_ARRAY: { dex::TypeIndex type_index(instruction.VRegC_22c()); HInstruction* length = LoadLocal(instruction.VRegB_22c(), DataType::Type::kInt32); - HLoadClass* cls = BuildLoadClass(type_index, dex_pc); + HNewArray* new_array = BuildNewArray(dex_pc, type_index, length); - HNewArray* new_array = new (allocator_) HNewArray(cls, length, dex_pc); - AppendInstruction(new_array); UpdateLocal(instruction.VRegA_22c(), current_block_->GetLastInstruction()); BuildConstructorFenceForAllocation(new_array); break; } case Instruction::FILLED_NEW_ARRAY: { - uint32_t number_of_vreg_arguments = instruction.VRegA_35c(); dex::TypeIndex type_index(instruction.VRegB_35c()); uint32_t args[5]; - instruction.GetVarArgs(args); - HNewArray* new_array = BuildFilledNewArray(dex_pc, - type_index, - number_of_vreg_arguments, - /* is_range */ false, - args, - /* register_index */ 0); + uint32_t number_of_vreg_arguments = instruction.GetVarArgs(args); + VarArgsInstructionOperands operands(args, number_of_vreg_arguments); + HNewArray* new_array = BuildFilledNewArray(dex_pc, type_index, operands); BuildConstructorFenceForAllocation(new_array); break; } case Instruction::FILLED_NEW_ARRAY_RANGE: { - uint32_t number_of_vreg_arguments = instruction.VRegA_3rc(); dex::TypeIndex type_index(instruction.VRegB_3rc()); - uint32_t register_index = instruction.VRegC_3rc(); - HNewArray* new_array = BuildFilledNewArray(dex_pc, - type_index, - number_of_vreg_arguments, - /* is_range */ true, - /* args*/ nullptr, - register_index); + RangeInstructionOperands operands(instruction.VRegC_3rc(), instruction.VRegA_3rc()); + HNewArray* new_array = BuildFilledNewArray(dex_pc, type_index, operands); BuildConstructorFenceForAllocation(new_array); break; } @@ -2812,7 +2946,7 @@ bool HInstructionBuilder::ProcessDexInstruction(const Instruction& instruction, case Instruction::IGET_CHAR_QUICK: case Instruction::IGET_SHORT: case Instruction::IGET_SHORT_QUICK: { - if (!BuildInstanceFieldAccess(instruction, dex_pc, /* is_put */ false, quicken_index)) { + if (!BuildInstanceFieldAccess(instruction, dex_pc, /* is_put= */ false, quicken_index)) { return false; } break; @@ -2832,7 +2966,7 @@ bool HInstructionBuilder::ProcessDexInstruction(const Instruction& instruction, case Instruction::IPUT_CHAR_QUICK: case Instruction::IPUT_SHORT: case Instruction::IPUT_SHORT_QUICK: { - if (!BuildInstanceFieldAccess(instruction, dex_pc, /* is_put */ true, quicken_index)) { + if (!BuildInstanceFieldAccess(instruction, dex_pc, /* is_put= */ true, quicken_index)) { return false; } break; @@ -2845,7 +2979,7 @@ bool HInstructionBuilder::ProcessDexInstruction(const Instruction& instruction, case Instruction::SGET_BYTE: case Instruction::SGET_CHAR: case Instruction::SGET_SHORT: { - BuildStaticFieldAccess(instruction, dex_pc, /* is_put */ false); + BuildStaticFieldAccess(instruction, dex_pc, /* is_put= */ false); break; } @@ -2856,7 +2990,7 @@ bool HInstructionBuilder::ProcessDexInstruction(const Instruction& instruction, case Instruction::SPUT_BYTE: case Instruction::SPUT_CHAR: case Instruction::SPUT_SHORT: { - BuildStaticFieldAccess(instruction, dex_pc, /* is_put */ true); + BuildStaticFieldAccess(instruction, dex_pc, /* is_put= */ true); break; } @@ -2906,6 +3040,20 @@ bool HInstructionBuilder::ProcessDexInstruction(const Instruction& instruction, break; } + case Instruction::CONST_METHOD_HANDLE: { + uint16_t method_handle_idx = instruction.VRegB_21c(); + BuildLoadMethodHandle(method_handle_idx, dex_pc); + UpdateLocal(instruction.VRegA_21c(), current_block_->GetLastInstruction()); + break; + } + + case Instruction::CONST_METHOD_TYPE: { + dex::ProtoIndex proto_idx(instruction.VRegB_21c()); + BuildLoadMethodType(proto_idx, dex_pc); + UpdateLocal(instruction.VRegA_21c(), current_block_->GetLastInstruction()); + break; + } + case Instruction::MOVE_EXCEPTION: { AppendInstruction(new (allocator_) HLoadException(dex_pc)); UpdateLocal(instruction.VRegA_11x(), current_block_->GetLastInstruction()); @@ -2959,7 +3107,21 @@ bool HInstructionBuilder::ProcessDexInstruction(const Instruction& instruction, break; } - default: + case Instruction::UNUSED_3E: + case Instruction::UNUSED_3F: + case Instruction::UNUSED_40: + case Instruction::UNUSED_41: + case Instruction::UNUSED_42: + case Instruction::UNUSED_43: + case Instruction::UNUSED_79: + case Instruction::UNUSED_7A: + case Instruction::UNUSED_F3: + case Instruction::UNUSED_F4: + case Instruction::UNUSED_F5: + case Instruction::UNUSED_F6: + case Instruction::UNUSED_F7: + case Instruction::UNUSED_F8: + case Instruction::UNUSED_F9: { VLOG(compiler) << "Did not compile " << dex_file_->PrettyMethod(dex_compilation_unit_->GetDexMethodIndex()) << " because of unhandled instruction " @@ -2967,6 +3129,7 @@ bool HInstructionBuilder::ProcessDexInstruction(const Instruction& instruction, MaybeRecordStat(compilation_stats_, MethodCompilationStat::kNotCompiledUnhandledInstruction); return false; + } } return true; } // NOLINT(readability/fn_size) @@ -2980,7 +3143,7 @@ ObjPtr<mirror::Class> HInstructionBuilder::LookupResolvedType( ObjPtr<mirror::Class> HInstructionBuilder::LookupReferrerClass() const { // TODO: Cache the result in a Handle<mirror::Class>. - const DexFile::MethodId& method_id = + const dex::MethodId& method_id = dex_compilation_unit_->GetDexFile()->GetMethodId(dex_compilation_unit_->GetDexMethodIndex()); return LookupResolvedType(method_id.class_idx_, *dex_compilation_unit_); } diff --git a/compiler/optimizing/instruction_builder.h b/compiler/optimizing/instruction_builder.h index 4428c53277..d701445946 100644 --- a/compiler/optimizing/instruction_builder.h +++ b/compiler/optimizing/instruction_builder.h @@ -34,16 +34,18 @@ class ArenaBitVector; class ArtField; class ArtMethod; class CodeGenerator; -class CompilerDriver; class DexCompilationUnit; class HBasicBlockBuilder; class Instruction; +class InstructionOperands; class OptimizingCompilerStats; +class ScopedObjectAccess; class SsaBuilder; class VariableSizedHandleScope; namespace mirror { class Class; +class MethodType; } // namespace mirror class HInstructionBuilder : public ValueObject { @@ -56,7 +58,6 @@ class HInstructionBuilder : public ValueObject { DataType::Type return_type, const DexCompilationUnit* dex_compilation_unit, const DexCompilationUnit* outer_compilation_unit, - CompilerDriver* compiler_driver, CodeGenerator* code_generator, ArrayRef<const uint8_t> interpreter_metadata, OptimizingCompilerStats* compiler_stats, @@ -95,11 +96,6 @@ class HInstructionBuilder : public ValueObject { void InitializeParameters(); - // Returns whether the current method needs access check for the type. - // Output parameter finalizable is set to whether the type is finalizable. - bool NeedsAccessCheck(dex::TypeIndex type_index, /*out*/bool* finalizable) const - REQUIRES_SHARED(Locks::mutator_lock_); - template<typename T> void Unop_12x(const Instruction& instruction, DataType::Type type, uint32_t dex_pc); @@ -166,29 +162,28 @@ class HInstructionBuilder : public ValueObject { bool BuildInvoke(const Instruction& instruction, uint32_t dex_pc, uint32_t method_idx, - uint32_t number_of_vreg_arguments, - bool is_range, - uint32_t* args, - uint32_t register_index); + const InstructionOperands& operands); // Builds an invocation node for invoke-polymorphic and returns whether the // instruction is supported. - bool BuildInvokePolymorphic(const Instruction& instruction, - uint32_t dex_pc, + bool BuildInvokePolymorphic(uint32_t dex_pc, uint32_t method_idx, - uint32_t proto_idx, - uint32_t number_of_vreg_arguments, - bool is_range, - uint32_t* args, - uint32_t register_index); + dex::ProtoIndex proto_idx, + const InstructionOperands& operands); + + // Builds an invocation node for invoke-custom and returns whether the + // instruction is supported. + bool BuildInvokeCustom(uint32_t dex_pc, + uint32_t call_site_idx, + const InstructionOperands& operands); + + // Builds a new array node. + HNewArray* BuildNewArray(uint32_t dex_pc, dex::TypeIndex type_index, HInstruction* length); // Builds a new array node and the instructions that fill it. HNewArray* BuildFilledNewArray(uint32_t dex_pc, dex::TypeIndex type_index, - uint32_t number_of_vreg_arguments, - bool is_range, - uint32_t* args, - uint32_t register_index); + const InstructionOperands& operands); void BuildFillArrayData(const Instruction& instruction, uint32_t dex_pc); @@ -232,43 +227,37 @@ class HInstructionBuilder : public ValueObject { bool needs_access_check) REQUIRES_SHARED(Locks::mutator_lock_); - // Returns the outer-most compiling method's class. - ObjPtr<mirror::Class> GetOutermostCompilingClass() const; + Handle<mirror::Class> ResolveClass(ScopedObjectAccess& soa, dex::TypeIndex type_index) + REQUIRES_SHARED(Locks::mutator_lock_); + + bool LoadClassNeedsAccessCheck(Handle<mirror::Class> klass) + REQUIRES_SHARED(Locks::mutator_lock_); - // Returns the class whose method is being compiled. - ObjPtr<mirror::Class> GetCompilingClass() const; + // Builds a `HLoadMethodHandle` loading the given `method_handle_index`. + void BuildLoadMethodHandle(uint16_t method_handle_idx, uint32_t dex_pc); - // Returns whether `type_index` points to the outer-most compiling method's class. - bool IsOutermostCompilingClass(dex::TypeIndex type_index) const; + // Builds a `HLoadMethodType` loading the given `proto_index`. + void BuildLoadMethodType(dex::ProtoIndex proto_index, uint32_t dex_pc); void PotentiallySimplifyFakeString(uint16_t original_dex_register, uint32_t dex_pc, HInvoke* invoke); bool SetupInvokeArguments(HInvoke* invoke, - uint32_t number_of_vreg_arguments, - uint32_t* args, - uint32_t register_index, - bool is_range, - const char* descriptor, + const InstructionOperands& operands, + const char* shorty, size_t start_index, size_t* argument_index); bool HandleInvoke(HInvoke* invoke, - uint32_t number_of_vreg_arguments, - uint32_t* args, - uint32_t register_index, - bool is_range, - const char* descriptor, - HClinitCheck* clinit_check, - bool is_unresolved); + const InstructionOperands& operands, + const char* shorty, + bool is_unresolved, + HClinitCheck* clinit_check = nullptr); bool HandleStringInit(HInvoke* invoke, - uint32_t number_of_vreg_arguments, - uint32_t* args, - uint32_t register_index, - bool is_range, - const char* descriptor); + const InstructionOperands& operands, + const char* shorty); void HandleStringInitResult(HInvokeStaticOrDirect* invoke); HClinitCheck* ProcessClinitCheckForInvoke( @@ -316,8 +305,6 @@ class HInstructionBuilder : public ValueObject { HBasicBlockBuilder* const block_builder_; SsaBuilder* const ssa_builder_; - CompilerDriver* const compiler_driver_; - CodeGenerator* const code_generator_; // The compilation unit of the current method being compiled. Note that @@ -347,6 +334,10 @@ class HInstructionBuilder : public ValueObject { ScopedArenaVector<HBasicBlock*> loop_headers_; + // Cached resolved types for the current compilation unit's DexFile. + // Handle<>s reference entries in the `handles_`. + ScopedArenaSafeMap<dex::TypeIndex, Handle<mirror::Class>> class_cache_; + static constexpr int kDefaultNumberOfLoops = 2; DISALLOW_COPY_AND_ASSIGN(HInstructionBuilder); diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc index a42a85dc1d..a433d7ef73 100644 --- a/compiler/optimizing/instruction_simplifier.cc +++ b/compiler/optimizing/instruction_simplifier.cc @@ -18,6 +18,7 @@ #include "art_method-inl.h" #include "class_linker-inl.h" +#include "class_root.h" #include "data_type-inl.h" #include "escape.h" #include "intrinsics.h" @@ -35,14 +36,12 @@ class InstructionSimplifierVisitor : public HGraphDelegateVisitor { public: InstructionSimplifierVisitor(HGraph* graph, CodeGenerator* codegen, - CompilerDriver* compiler_driver, OptimizingCompilerStats* stats) : HGraphDelegateVisitor(graph), codegen_(codegen), - compiler_driver_(compiler_driver), stats_(stats) {} - void Run(); + bool Run(); private: void RecordSimplification() { @@ -67,44 +66,44 @@ class InstructionSimplifierVisitor : public HGraphDelegateVisitor { bool TryCombineVecMultiplyAccumulate(HVecMul* mul); void VisitShift(HBinaryOperation* shift); - - void VisitEqual(HEqual* equal) OVERRIDE; - void VisitNotEqual(HNotEqual* equal) OVERRIDE; - void VisitBooleanNot(HBooleanNot* bool_not) OVERRIDE; - void VisitInstanceFieldSet(HInstanceFieldSet* equal) OVERRIDE; - void VisitStaticFieldSet(HStaticFieldSet* equal) OVERRIDE; - void VisitArraySet(HArraySet* equal) OVERRIDE; - void VisitTypeConversion(HTypeConversion* instruction) OVERRIDE; - void VisitNullCheck(HNullCheck* instruction) OVERRIDE; - void VisitArrayLength(HArrayLength* instruction) OVERRIDE; - void VisitCheckCast(HCheckCast* instruction) OVERRIDE; - void VisitAdd(HAdd* instruction) OVERRIDE; - void VisitAnd(HAnd* instruction) OVERRIDE; - void VisitCondition(HCondition* instruction) OVERRIDE; - void VisitGreaterThan(HGreaterThan* condition) OVERRIDE; - void VisitGreaterThanOrEqual(HGreaterThanOrEqual* condition) OVERRIDE; - void VisitLessThan(HLessThan* condition) OVERRIDE; - void VisitLessThanOrEqual(HLessThanOrEqual* condition) OVERRIDE; - void VisitBelow(HBelow* condition) OVERRIDE; - void VisitBelowOrEqual(HBelowOrEqual* condition) OVERRIDE; - void VisitAbove(HAbove* condition) OVERRIDE; - void VisitAboveOrEqual(HAboveOrEqual* condition) OVERRIDE; - void VisitDiv(HDiv* instruction) OVERRIDE; - void VisitMul(HMul* instruction) OVERRIDE; - void VisitNeg(HNeg* instruction) OVERRIDE; - void VisitNot(HNot* instruction) OVERRIDE; - void VisitOr(HOr* instruction) OVERRIDE; - void VisitShl(HShl* instruction) OVERRIDE; - void VisitShr(HShr* instruction) OVERRIDE; - void VisitSub(HSub* instruction) OVERRIDE; - void VisitUShr(HUShr* instruction) OVERRIDE; - void VisitXor(HXor* instruction) OVERRIDE; - void VisitSelect(HSelect* select) OVERRIDE; - void VisitIf(HIf* instruction) OVERRIDE; - void VisitInstanceOf(HInstanceOf* instruction) OVERRIDE; - void VisitInvoke(HInvoke* invoke) OVERRIDE; - void VisitDeoptimize(HDeoptimize* deoptimize) OVERRIDE; - void VisitVecMul(HVecMul* instruction) OVERRIDE; + void VisitEqual(HEqual* equal) override; + void VisitNotEqual(HNotEqual* equal) override; + void VisitBooleanNot(HBooleanNot* bool_not) override; + void VisitInstanceFieldSet(HInstanceFieldSet* equal) override; + void VisitStaticFieldSet(HStaticFieldSet* equal) override; + void VisitArraySet(HArraySet* equal) override; + void VisitTypeConversion(HTypeConversion* instruction) override; + void VisitNullCheck(HNullCheck* instruction) override; + void VisitArrayLength(HArrayLength* instruction) override; + void VisitCheckCast(HCheckCast* instruction) override; + void VisitAbs(HAbs* instruction) override; + void VisitAdd(HAdd* instruction) override; + void VisitAnd(HAnd* instruction) override; + void VisitCondition(HCondition* instruction) override; + void VisitGreaterThan(HGreaterThan* condition) override; + void VisitGreaterThanOrEqual(HGreaterThanOrEqual* condition) override; + void VisitLessThan(HLessThan* condition) override; + void VisitLessThanOrEqual(HLessThanOrEqual* condition) override; + void VisitBelow(HBelow* condition) override; + void VisitBelowOrEqual(HBelowOrEqual* condition) override; + void VisitAbove(HAbove* condition) override; + void VisitAboveOrEqual(HAboveOrEqual* condition) override; + void VisitDiv(HDiv* instruction) override; + void VisitMul(HMul* instruction) override; + void VisitNeg(HNeg* instruction) override; + void VisitNot(HNot* instruction) override; + void VisitOr(HOr* instruction) override; + void VisitShl(HShl* instruction) override; + void VisitShr(HShr* instruction) override; + void VisitSub(HSub* instruction) override; + void VisitUShr(HUShr* instruction) override; + void VisitXor(HXor* instruction) override; + void VisitSelect(HSelect* select) override; + void VisitIf(HIf* instruction) override; + void VisitInstanceOf(HInstanceOf* instruction) override; + void VisitInvoke(HInvoke* invoke) override; + void VisitDeoptimize(HDeoptimize* deoptimize) override; + void VisitVecMul(HVecMul* instruction) override; bool CanEnsureNotNullAt(HInstruction* instr, HInstruction* at) const; @@ -116,13 +115,16 @@ class InstructionSimplifierVisitor : public HGraphDelegateVisitor { void SimplifyFP2Int(HInvoke* invoke); void SimplifyStringCharAt(HInvoke* invoke); void SimplifyStringIsEmptyOrLength(HInvoke* invoke); + void SimplifyStringIndexOf(HInvoke* invoke); void SimplifyNPEOnArgN(HInvoke* invoke, size_t); void SimplifyReturnThis(HInvoke* invoke); void SimplifyAllocationIntrinsic(HInvoke* invoke); void SimplifyMemBarrier(HInvoke* invoke, MemBarrierKind barrier_kind); + void SimplifyMin(HInvoke* invoke, DataType::Type type); + void SimplifyMax(HInvoke* invoke, DataType::Type type); + void SimplifyAbs(HInvoke* invoke, DataType::Type type); CodeGenerator* codegen_; - CompilerDriver* compiler_driver_; OptimizingCompilerStats* stats_; bool simplification_occurred_ = false; int simplifications_at_current_position_ = 0; @@ -133,17 +135,18 @@ class InstructionSimplifierVisitor : public HGraphDelegateVisitor { static constexpr int kMaxSamePositionSimplifications = 50; }; -void InstructionSimplifier::Run() { +bool InstructionSimplifier::Run() { if (kTestInstructionClonerExhaustively) { CloneAndReplaceInstructionVisitor visitor(graph_); visitor.VisitReversePostOrder(); } - InstructionSimplifierVisitor visitor(graph_, codegen_, compiler_driver_, stats_); - visitor.Run(); + InstructionSimplifierVisitor visitor(graph_, codegen_, stats_); + return visitor.Run(); } -void InstructionSimplifierVisitor::Run() { +bool InstructionSimplifierVisitor::Run() { + bool didSimplify = false; // Iterate in reverse post order to open up more simplifications to users // of instructions that got simplified. for (HBasicBlock* block : GetGraph()->GetReversePostOrder()) { @@ -153,10 +156,14 @@ void InstructionSimplifierVisitor::Run() { do { simplification_occurred_ = false; VisitBasicBlock(block); + if (simplification_occurred_) { + didSimplify = true; + } } while (simplification_occurred_ && (simplifications_at_current_position_ < kMaxSamePositionSimplifications)); simplifications_at_current_position_ = 0; } + return didSimplify; } namespace { @@ -365,7 +372,7 @@ void InstructionSimplifierVisitor::VisitShift(HBinaryOperation* instruction) { // (as defined by shift semantics). This ensures other // optimizations do not need to special case for such situations. DCHECK_EQ(shift_amount->GetType(), DataType::Type::kInt32); - instruction->ReplaceInput(GetGraph()->GetIntConstant(masked_cst), /* index */ 1); + instruction->ReplaceInput(GetGraph()->GetIntConstant(masked_cst), /* index= */ 1); RecordSimplification(); return; } @@ -576,7 +583,9 @@ bool InstructionSimplifierVisitor::CanEnsureNotNullAt(HInstruction* input, HInst // Returns whether doing a type test between the class of `object` against `klass` has // a statically known outcome. The result of the test is stored in `outcome`. -static bool TypeCheckHasKnownOutcome(HLoadClass* klass, HInstruction* object, bool* outcome) { +static bool TypeCheckHasKnownOutcome(ReferenceTypeInfo class_rti, + HInstruction* object, + /*out*/bool* outcome) { DCHECK(!object->IsNullConstant()) << "Null constants should be special cased"; ReferenceTypeInfo obj_rti = object->GetReferenceTypeInfo(); ScopedObjectAccess soa(Thread::Current()); @@ -586,7 +595,6 @@ static bool TypeCheckHasKnownOutcome(HLoadClass* klass, HInstruction* object, bo return false; } - ReferenceTypeInfo class_rti = klass->GetLoadedClassRTI(); if (!class_rti.IsValid()) { // Happens when the loaded class is unresolved. return false; @@ -611,8 +619,8 @@ static bool TypeCheckHasKnownOutcome(HLoadClass* klass, HInstruction* object, bo void InstructionSimplifierVisitor::VisitCheckCast(HCheckCast* check_cast) { HInstruction* object = check_cast->InputAt(0); - HLoadClass* load_class = check_cast->InputAt(1)->AsLoadClass(); - if (load_class->NeedsAccessCheck()) { + if (check_cast->GetTypeCheckKind() != TypeCheckKind::kBitstringCheck && + check_cast->GetTargetClass()->NeedsAccessCheck()) { // If we need to perform an access check we cannot remove the instruction. return; } @@ -627,18 +635,21 @@ void InstructionSimplifierVisitor::VisitCheckCast(HCheckCast* check_cast) { return; } - // Note: The `outcome` is initialized to please valgrind - the compiler can reorder - // the return value check with the `outcome` check, b/27651442 . + // Historical note: The `outcome` was initialized to please Valgrind - the compiler can reorder + // the return value check with the `outcome` check, b/27651442. bool outcome = false; - if (TypeCheckHasKnownOutcome(load_class, object, &outcome)) { + if (TypeCheckHasKnownOutcome(check_cast->GetTargetClassRTI(), object, &outcome)) { if (outcome) { check_cast->GetBlock()->RemoveInstruction(check_cast); MaybeRecordStat(stats_, MethodCompilationStat::kRemovedCheckedCast); - if (!load_class->HasUses()) { - // We cannot rely on DCE to remove the class because the `HLoadClass` thinks it can throw. - // However, here we know that it cannot because the checkcast was successfull, hence - // the class was already loaded. - load_class->GetBlock()->RemoveInstruction(load_class); + if (check_cast->GetTypeCheckKind() != TypeCheckKind::kBitstringCheck) { + HLoadClass* load_class = check_cast->GetTargetClass(); + if (!load_class->HasUses()) { + // We cannot rely on DCE to remove the class because the `HLoadClass` thinks it can throw. + // However, here we know that it cannot because the checkcast was successfull, hence + // the class was already loaded. + load_class->GetBlock()->RemoveInstruction(load_class); + } } } else { // Don't do anything for exceptional cases for now. Ideally we should remove @@ -649,8 +660,8 @@ void InstructionSimplifierVisitor::VisitCheckCast(HCheckCast* check_cast) { void InstructionSimplifierVisitor::VisitInstanceOf(HInstanceOf* instruction) { HInstruction* object = instruction->InputAt(0); - HLoadClass* load_class = instruction->InputAt(1)->AsLoadClass(); - if (load_class->NeedsAccessCheck()) { + if (instruction->GetTypeCheckKind() != TypeCheckKind::kBitstringCheck && + instruction->GetTargetClass()->NeedsAccessCheck()) { // If we need to perform an access check we cannot remove the instruction. return; } @@ -670,10 +681,10 @@ void InstructionSimplifierVisitor::VisitInstanceOf(HInstanceOf* instruction) { return; } - // Note: The `outcome` is initialized to please valgrind - the compiler can reorder - // the return value check with the `outcome` check, b/27651442 . + // Historical note: The `outcome` was initialized to please Valgrind - the compiler can reorder + // the return value check with the `outcome` check, b/27651442. bool outcome = false; - if (TypeCheckHasKnownOutcome(load_class, object, &outcome)) { + if (TypeCheckHasKnownOutcome(instruction->GetTargetClassRTI(), object, &outcome)) { MaybeRecordStat(stats_, MethodCompilationStat::kRemovedInstanceOf); if (outcome && can_be_null) { // Type test will succeed, we just need a null test. @@ -686,11 +697,14 @@ void InstructionSimplifierVisitor::VisitInstanceOf(HInstanceOf* instruction) { } RecordSimplification(); instruction->GetBlock()->RemoveInstruction(instruction); - if (outcome && !load_class->HasUses()) { - // We cannot rely on DCE to remove the class because the `HLoadClass` thinks it can throw. - // However, here we know that it cannot because the instanceof check was successfull, hence - // the class was already loaded. - load_class->GetBlock()->RemoveInstruction(load_class); + if (outcome && instruction->GetTypeCheckKind() != TypeCheckKind::kBitstringCheck) { + HLoadClass* load_class = instruction->GetTargetClass(); + if (!load_class->HasUses()) { + // We cannot rely on DCE to remove the class because the `HLoadClass` thinks it can throw. + // However, here we know that it cannot because the instanceof check was successfull, hence + // the class was already loaded. + load_class->GetBlock()->RemoveInstruction(load_class); + } } } } @@ -735,8 +749,8 @@ static HCondition* GetOppositeConditionSwapOps(ArenaAllocator* allocator, HInstr return new (allocator) HBelowOrEqual(rhs, lhs); default: LOG(FATAL) << "Unknown ConditionType " << cond->GetKind(); + UNREACHABLE(); } - return nullptr; } static bool CmpHasBoolType(HInstruction* input, HInstruction* cmp) { @@ -849,35 +863,29 @@ void InstructionSimplifierVisitor::VisitBooleanNot(HBooleanNot* bool_not) { static HInstruction* NewIntegralAbs(ArenaAllocator* allocator, HInstruction* x, HInstruction* cursor) { - DataType::Type type = x->GetType(); - DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64); - // Construct a fake intrinsic with as much context as is needed to allocate one. - // The intrinsic will always be lowered into code later anyway. - // TODO: b/65164101 : moving towards a real HAbs node makes more sense. - HInvokeStaticOrDirect::DispatchInfo dispatch_info = { - HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress, - HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod, - 0u - }; - HInvokeStaticOrDirect* invoke = new (allocator) HInvokeStaticOrDirect( - allocator, - 1, - type, - x->GetDexPc(), - /*method_idx*/ -1, - /*resolved_method*/ nullptr, - dispatch_info, - kStatic, - MethodReference(nullptr, dex::kDexNoIndex), - HInvokeStaticOrDirect::ClinitCheckRequirement::kNone); - invoke->SetArgumentAt(0, x); - invoke->SetIntrinsic(type == DataType::Type::kInt32 ? Intrinsics::kMathAbsInt - : Intrinsics::kMathAbsLong, - kNoEnvironmentOrCache, - kNoSideEffects, - kNoThrow); - cursor->GetBlock()->InsertInstructionBefore(invoke, cursor); - return invoke; + DataType::Type type = DataType::Kind(x->GetType()); + DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64); + HAbs* abs = new (allocator) HAbs(type, x, cursor->GetDexPc()); + cursor->GetBlock()->InsertInstructionBefore(abs, cursor); + return abs; +} + +// Constructs a new MIN/MAX(x, y) node in the HIR. +static HInstruction* NewIntegralMinMax(ArenaAllocator* allocator, + HInstruction* x, + HInstruction* y, + HInstruction* cursor, + bool is_min) { + DataType::Type type = DataType::Kind(x->GetType()); + DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64); + HBinaryOperation* minmax = nullptr; + if (is_min) { + minmax = new (allocator) HMin(type, x, y, cursor->GetDexPc()); + } else { + minmax = new (allocator) HMax(type, x, y, cursor->GetDexPc()); + } + cursor->GetBlock()->InsertInstructionBefore(minmax, cursor); + return minmax; } // Returns true if operands a and b consists of widening type conversions @@ -899,6 +907,30 @@ static bool AreLowerPrecisionArgs(DataType::Type to_type, HInstruction* a, HInst to_type == DataType::Type::kInt64); } +// Returns an acceptable substitution for "a" on the select +// construct "a <cmp> b ? c : .." during MIN/MAX recognition. +static HInstruction* AllowInMinMax(IfCondition cmp, + HInstruction* a, + HInstruction* b, + HInstruction* c) { + int64_t value = 0; + if (IsInt64AndGet(b, /*out*/ &value) && + (((cmp == kCondLT || cmp == kCondLE) && c->IsMax()) || + ((cmp == kCondGT || cmp == kCondGE) && c->IsMin()))) { + HConstant* other = c->AsBinaryOperation()->GetConstantRight(); + if (other != nullptr && a == c->AsBinaryOperation()->GetLeastConstantLeft()) { + int64_t other_value = Int64FromConstant(other); + bool is_max = (cmp == kCondLT || cmp == kCondLE); + // Allow the max for a < 100 ? max(a, -100) : .. + // or the min for a > -100 ? min(a, 100) : .. + if (is_max ? (value >= other_value) : (value <= other_value)) { + return c; + } + } + } + return nullptr; +} + void InstructionSimplifierVisitor::VisitSelect(HSelect* select) { HInstruction* replace_with = nullptr; HInstruction* condition = select->GetCondition(); @@ -942,23 +974,35 @@ void InstructionSimplifierVisitor::VisitSelect(HSelect* select) { DataType::Type t_type = true_value->GetType(); DataType::Type f_type = false_value->GetType(); // Here we have a <cmp> b ? true_value : false_value. - // Test if both values are same-typed int or long. - if (t_type == f_type && - (t_type == DataType::Type::kInt32 || t_type == DataType::Type::kInt64)) { - // Try to replace typical integral ABS constructs. - if (true_value->IsNeg()) { - HInstruction* negated = true_value->InputAt(0); - if ((cmp == kCondLT || cmp == kCondLE) && - (a == negated && a == false_value && IsInt64Value(b, 0))) { - // Found a < 0 ? -a : a which can be replaced by ABS(a). - replace_with = NewIntegralAbs(GetGraph()->GetAllocator(), false_value, select); - } - } else if (false_value->IsNeg()) { - HInstruction* negated = false_value->InputAt(0); - if ((cmp == kCondGT || cmp == kCondGE) && - (a == true_value && a == negated && IsInt64Value(b, 0))) { - // Found a > 0 ? a : -a which can be replaced by ABS(a). - replace_with = NewIntegralAbs(GetGraph()->GetAllocator(), true_value, select); + // Test if both values are compatible integral types (resulting MIN/MAX/ABS + // type will be int or long, like the condition). Replacements are general, + // but assume conditions prefer constants on the right. + if (DataType::IsIntegralType(t_type) && DataType::Kind(t_type) == DataType::Kind(f_type)) { + // Allow a < 100 ? max(a, -100) : .. + // or a > -100 ? min(a, 100) : .. + // to use min/max instead of a to detect nested min/max expressions. + HInstruction* new_a = AllowInMinMax(cmp, a, b, true_value); + if (new_a != nullptr) { + a = new_a; + } + // Try to replace typical integral MIN/MAX/ABS constructs. + if ((cmp == kCondLT || cmp == kCondLE || cmp == kCondGT || cmp == kCondGE) && + ((a == true_value && b == false_value) || + (b == true_value && a == false_value))) { + // Found a < b ? a : b (MIN) or a < b ? b : a (MAX) + // or a > b ? a : b (MAX) or a > b ? b : a (MIN). + bool is_min = (cmp == kCondLT || cmp == kCondLE) == (a == true_value); + replace_with = NewIntegralMinMax(GetGraph()->GetAllocator(), a, b, select, is_min); + } else if (((cmp == kCondLT || cmp == kCondLE) && true_value->IsNeg()) || + ((cmp == kCondGT || cmp == kCondGE) && false_value->IsNeg())) { + bool negLeft = (cmp == kCondLT || cmp == kCondLE); + HInstruction* the_negated = negLeft ? true_value->InputAt(0) : false_value->InputAt(0); + HInstruction* not_negated = negLeft ? false_value : true_value; + if (a == the_negated && a == not_negated && IsInt64Value(b, 0)) { + // Found a < 0 ? -a : a + // or a > 0 ? a : -a + // which can be replaced by ABS(a). + replace_with = NewIntegralAbs(GetGraph()->GetAllocator(), a, select); } } else if (true_value->IsSub() && false_value->IsSub()) { HInstruction* true_sub1 = true_value->InputAt(0); @@ -970,8 +1014,8 @@ void InstructionSimplifierVisitor::VisitSelect(HSelect* select) { ((cmp == kCondLT || cmp == kCondLE) && (a == true_sub2 && b == true_sub1 && a == false_sub1 && b == false_sub2))) && AreLowerPrecisionArgs(t_type, a, b)) { - // Found a > b ? a - b : b - a or - // a < b ? b - a : a - b + // Found a > b ? a - b : b - a + // or a < b ? b - a : a - b // which can be replaced by ABS(a - b) for lower precision operands a, b. replace_with = NewIntegralAbs(GetGraph()->GetAllocator(), true_value, select); } @@ -1137,8 +1181,7 @@ void InstructionSimplifierVisitor::VisitTypeConversion(HTypeConversion* instruct HInstruction* input = instruction->GetInput(); DataType::Type input_type = input->GetType(); DataType::Type result_type = instruction->GetResultType(); - if (DataType::IsTypeConversionImplicit(input_type, result_type)) { - // Remove the implicit conversion; this includes conversion to the same type. + if (instruction->IsImplicitConversion()) { instruction->ReplaceWith(input); instruction->GetBlock()->RemoveInstruction(instruction); RecordSimplification(); @@ -1230,6 +1273,17 @@ void InstructionSimplifierVisitor::VisitTypeConversion(HTypeConversion* instruct } } +void InstructionSimplifierVisitor::VisitAbs(HAbs* instruction) { + HInstruction* input = instruction->GetInput(); + if (DataType::IsZeroExtension(input->GetType(), instruction->GetResultType())) { + // Zero extension from narrow to wide can never set sign bit in the wider + // operand, making the subsequent Abs redundant (e.g., abs(b & 0xff) for byte b). + instruction->ReplaceWith(input); + instruction->GetBlock()->RemoveInstruction(instruction); + RecordSimplification(); + } +} + void InstructionSimplifierVisitor::VisitAdd(HAdd* instruction) { HConstant* input_cst = instruction->GetConstantRight(); HInstruction* input_other = instruction->GetLeastConstantLeft(); @@ -1262,7 +1316,7 @@ void InstructionSimplifierVisitor::VisitAdd(HAdd* instruction) { } HNeg* neg = left_is_neg ? left->AsNeg() : right->AsNeg(); - if ((left_is_neg ^ right_is_neg) && neg->HasOnlyOneNonEnvironmentUse()) { + if (left_is_neg != right_is_neg && neg->HasOnlyOneNonEnvironmentUse()) { // Replace code looking like // NEG tmp, b // ADD dst, a, tmp @@ -1507,8 +1561,7 @@ static bool RecognizeAndSimplifyClassCheck(HCondition* condition) { { ScopedObjectAccess soa(Thread::Current()); - ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); - ArtField* field = class_linker->GetClassRoot(ClassLinker::kJavaLangObject)->GetInstanceField(0); + ArtField* field = GetClassRoot<mirror::Object>()->GetInstanceField(0); DCHECK_EQ(std::string(field->GetName()), "shadow$_klass_"); if (field_get->GetFieldInfo().GetField() != field) { return false; @@ -2092,22 +2145,6 @@ void InstructionSimplifierVisitor::SimplifyStringEquals(HInvoke* instruction) { ReferenceTypeInfo argument_rti = argument->GetReferenceTypeInfo(); if (argument_rti.IsValid() && argument_rti.IsStringClass()) { optimizations.SetArgumentIsString(); - } else if (kUseReadBarrier) { - DCHECK(instruction->GetResolvedMethod() != nullptr); - DCHECK(instruction->GetResolvedMethod()->GetDeclaringClass()->IsStringClass() || - // Object.equals() can be devirtualized to String.equals(). - instruction->GetResolvedMethod()->GetDeclaringClass()->IsObjectClass()); - Runtime* runtime = Runtime::Current(); - // For AOT, we always assume that the boot image shall contain the String.class and - // we do not need a read barrier for boot image classes as they are non-moveable. - // For JIT, check if we actually have a boot image; if we do, the String.class - // should also be non-moveable. - if (runtime->IsAotCompiler() || runtime->GetHeap()->HasBootImageSpace()) { - DCHECK(runtime->IsAotCompiler() || - !runtime->GetHeap()->IsMovableObject( - instruction->GetResolvedMethod()->GetDeclaringClass())); - optimizations.SetNoReadBarrierForStringClass(); - } } } } @@ -2214,7 +2251,7 @@ void InstructionSimplifierVisitor::SimplifySystemArrayCopy(HInvoke* instruction) ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); PointerSize image_size = class_linker->GetImagePointerSize(); HInvokeStaticOrDirect* invoke = instruction->AsInvokeStaticOrDirect(); - mirror::Class* system = invoke->GetResolvedMethod()->GetDeclaringClass(); + ObjPtr<mirror::Class> system = invoke->GetResolvedMethod()->GetDeclaringClass(); ArtMethod* method = nullptr; switch (source_component_type) { case DataType::Type::kBool: @@ -2252,7 +2289,7 @@ void InstructionSimplifierVisitor::SimplifySystemArrayCopy(HInvoke* instruction) // the invoke, as we would need to look it up in the current dex file, and it // is unlikely that it exists. The most usual situation for such typed // arraycopy methods is a direct pointer to the boot image. - HSharpening::SharpenInvokeStaticOrDirect(invoke, codegen_, compiler_driver_); + invoke->SetDispatchInfo(HSharpening::SharpenInvokeStaticOrDirect(method, codegen_)); } } } @@ -2324,17 +2361,17 @@ void InstructionSimplifierVisitor::SimplifyStringCharAt(HInvoke* invoke) { ArenaAllocator* allocator = GetGraph()->GetAllocator(); // We treat String as an array to allow DCE and BCE to seamlessly work on strings, // so create the HArrayLength, HBoundsCheck and HArrayGet. - HArrayLength* length = new (allocator) HArrayLength(str, dex_pc, /* is_string_length */ true); + HArrayLength* length = new (allocator) HArrayLength(str, dex_pc, /* is_string_length= */ true); invoke->GetBlock()->InsertInstructionBefore(length, invoke); HBoundsCheck* bounds_check = new (allocator) HBoundsCheck( - index, length, dex_pc, /* is_string_char_at */ true); + index, length, dex_pc, /* is_string_char_at= */ true); invoke->GetBlock()->InsertInstructionBefore(bounds_check, invoke); HArrayGet* array_get = new (allocator) HArrayGet(str, bounds_check, DataType::Type::kUint16, SideEffects::None(), // Strings are immutable. dex_pc, - /* is_string_char_at */ true); + /* is_string_char_at= */ true); invoke->GetBlock()->ReplaceAndRemoveInstructionWith(invoke, array_get); bounds_check->CopyEnvironmentFrom(invoke->GetEnvironment()); GetGraph()->SetHasBoundsChecks(true); @@ -2346,7 +2383,7 @@ void InstructionSimplifierVisitor::SimplifyStringIsEmptyOrLength(HInvoke* invoke // We treat String as an array to allow DCE and BCE to seamlessly work on strings, // so create the HArrayLength. HArrayLength* length = - new (GetGraph()->GetAllocator()) HArrayLength(str, dex_pc, /* is_string_length */ true); + new (GetGraph()->GetAllocator()) HArrayLength(str, dex_pc, /* is_string_length= */ true); HInstruction* replacement; if (invoke->GetIntrinsic() == Intrinsics::kStringIsEmpty) { // For String.isEmpty(), create the `HEqual` representing the `length == 0`. @@ -2361,6 +2398,43 @@ void InstructionSimplifierVisitor::SimplifyStringIsEmptyOrLength(HInvoke* invoke invoke->GetBlock()->ReplaceAndRemoveInstructionWith(invoke, replacement); } +void InstructionSimplifierVisitor::SimplifyStringIndexOf(HInvoke* invoke) { + DCHECK(invoke->GetIntrinsic() == Intrinsics::kStringIndexOf || + invoke->GetIntrinsic() == Intrinsics::kStringIndexOfAfter); + if (invoke->InputAt(0)->IsLoadString()) { + HLoadString* load_string = invoke->InputAt(0)->AsLoadString(); + const DexFile& dex_file = load_string->GetDexFile(); + uint32_t utf16_length; + const char* data = + dex_file.StringDataAndUtf16LengthByIdx(load_string->GetStringIndex(), &utf16_length); + if (utf16_length == 0) { + invoke->ReplaceWith(GetGraph()->GetIntConstant(-1)); + invoke->GetBlock()->RemoveInstruction(invoke); + RecordSimplification(); + return; + } + if (utf16_length == 1 && invoke->GetIntrinsic() == Intrinsics::kStringIndexOf) { + // Simplify to HSelect(HEquals(., load_string.charAt(0)), 0, -1). + // If the sought character is supplementary, this gives the correct result, i.e. -1. + uint32_t c = GetUtf16FromUtf8(&data); + DCHECK_EQ(GetTrailingUtf16Char(c), 0u); + DCHECK_EQ(GetLeadingUtf16Char(c), c); + uint32_t dex_pc = invoke->GetDexPc(); + ArenaAllocator* allocator = GetGraph()->GetAllocator(); + HEqual* equal = + new (allocator) HEqual(invoke->InputAt(1), GetGraph()->GetIntConstant(c), dex_pc); + invoke->GetBlock()->InsertInstructionBefore(equal, invoke); + HSelect* result = new (allocator) HSelect(equal, + GetGraph()->GetIntConstant(0), + GetGraph()->GetIntConstant(-1), + dex_pc); + invoke->GetBlock()->ReplaceAndRemoveInstructionWith(invoke, result); + RecordSimplification(); + return; + } + } +} + // This method should only be used on intrinsics whose sole way of throwing an // exception is raising a NPE when the nth argument is null. If that argument // is provably non-null, we can clear the flag. @@ -2430,6 +2504,27 @@ void InstructionSimplifierVisitor::SimplifyMemBarrier(HInvoke* invoke, invoke->GetBlock()->ReplaceAndRemoveInstructionWith(invoke, mem_barrier); } +void InstructionSimplifierVisitor::SimplifyMin(HInvoke* invoke, DataType::Type type) { + DCHECK(invoke->IsInvokeStaticOrDirect()); + HMin* min = new (GetGraph()->GetAllocator()) + HMin(type, invoke->InputAt(0), invoke->InputAt(1), invoke->GetDexPc()); + invoke->GetBlock()->ReplaceAndRemoveInstructionWith(invoke, min); +} + +void InstructionSimplifierVisitor::SimplifyMax(HInvoke* invoke, DataType::Type type) { + DCHECK(invoke->IsInvokeStaticOrDirect()); + HMax* max = new (GetGraph()->GetAllocator()) + HMax(type, invoke->InputAt(0), invoke->InputAt(1), invoke->GetDexPc()); + invoke->GetBlock()->ReplaceAndRemoveInstructionWith(invoke, max); +} + +void InstructionSimplifierVisitor::SimplifyAbs(HInvoke* invoke, DataType::Type type) { + DCHECK(invoke->IsInvokeStaticOrDirect()); + HAbs* abs = new (GetGraph()->GetAllocator()) + HAbs(type, invoke->InputAt(0), invoke->GetDexPc()); + invoke->GetBlock()->ReplaceAndRemoveInstructionWith(invoke, abs); +} + void InstructionSimplifierVisitor::VisitInvoke(HInvoke* instruction) { switch (instruction->GetIntrinsic()) { case Intrinsics::kStringEquals: @@ -2439,28 +2534,28 @@ void InstructionSimplifierVisitor::VisitInvoke(HInvoke* instruction) { SimplifySystemArrayCopy(instruction); break; case Intrinsics::kIntegerRotateRight: - SimplifyRotate(instruction, /* is_left */ false, DataType::Type::kInt32); + SimplifyRotate(instruction, /* is_left= */ false, DataType::Type::kInt32); break; case Intrinsics::kLongRotateRight: - SimplifyRotate(instruction, /* is_left */ false, DataType::Type::kInt64); + SimplifyRotate(instruction, /* is_left= */ false, DataType::Type::kInt64); break; case Intrinsics::kIntegerRotateLeft: - SimplifyRotate(instruction, /* is_left */ true, DataType::Type::kInt32); + SimplifyRotate(instruction, /* is_left= */ true, DataType::Type::kInt32); break; case Intrinsics::kLongRotateLeft: - SimplifyRotate(instruction, /* is_left */ true, DataType::Type::kInt64); + SimplifyRotate(instruction, /* is_left= */ true, DataType::Type::kInt64); break; case Intrinsics::kIntegerCompare: - SimplifyCompare(instruction, /* is_signum */ false, DataType::Type::kInt32); + SimplifyCompare(instruction, /* is_signum= */ false, DataType::Type::kInt32); break; case Intrinsics::kLongCompare: - SimplifyCompare(instruction, /* is_signum */ false, DataType::Type::kInt64); + SimplifyCompare(instruction, /* is_signum= */ false, DataType::Type::kInt64); break; case Intrinsics::kIntegerSignum: - SimplifyCompare(instruction, /* is_signum */ true, DataType::Type::kInt32); + SimplifyCompare(instruction, /* is_signum= */ true, DataType::Type::kInt32); break; case Intrinsics::kLongSignum: - SimplifyCompare(instruction, /* is_signum */ true, DataType::Type::kInt64); + SimplifyCompare(instruction, /* is_signum= */ true, DataType::Type::kInt64); break; case Intrinsics::kFloatIsNaN: case Intrinsics::kDoubleIsNaN: @@ -2477,6 +2572,10 @@ void InstructionSimplifierVisitor::VisitInvoke(HInvoke* instruction) { case Intrinsics::kStringLength: SimplifyStringIsEmptyOrLength(instruction); break; + case Intrinsics::kStringIndexOf: + case Intrinsics::kStringIndexOfAfter: + SimplifyStringIndexOf(instruction); + break; case Intrinsics::kStringStringIndexOf: case Intrinsics::kStringStringIndexOfAfter: SimplifyNPEOnArgN(instruction, 1); // 0th has own NullCheck @@ -2513,6 +2612,42 @@ void InstructionSimplifierVisitor::VisitInvoke(HInvoke* instruction) { case Intrinsics::kVarHandleStoreStoreFence: SimplifyMemBarrier(instruction, MemBarrierKind::kStoreStore); break; + case Intrinsics::kMathMinIntInt: + SimplifyMin(instruction, DataType::Type::kInt32); + break; + case Intrinsics::kMathMinLongLong: + SimplifyMin(instruction, DataType::Type::kInt64); + break; + case Intrinsics::kMathMinFloatFloat: + SimplifyMin(instruction, DataType::Type::kFloat32); + break; + case Intrinsics::kMathMinDoubleDouble: + SimplifyMin(instruction, DataType::Type::kFloat64); + break; + case Intrinsics::kMathMaxIntInt: + SimplifyMax(instruction, DataType::Type::kInt32); + break; + case Intrinsics::kMathMaxLongLong: + SimplifyMax(instruction, DataType::Type::kInt64); + break; + case Intrinsics::kMathMaxFloatFloat: + SimplifyMax(instruction, DataType::Type::kFloat32); + break; + case Intrinsics::kMathMaxDoubleDouble: + SimplifyMax(instruction, DataType::Type::kFloat64); + break; + case Intrinsics::kMathAbsInt: + SimplifyAbs(instruction, DataType::Type::kInt32); + break; + case Intrinsics::kMathAbsLong: + SimplifyAbs(instruction, DataType::Type::kInt64); + break; + case Intrinsics::kMathAbsFloat: + SimplifyAbs(instruction, DataType::Type::kFloat32); + break; + case Intrinsics::kMathAbsDouble: + SimplifyAbs(instruction, DataType::Type::kFloat64); + break; default: break; } @@ -2553,10 +2688,10 @@ bool InstructionSimplifierVisitor::TryHandleAssociativeAndCommutativeOperation( HConstant* const2; HBinaryOperation* y; - if (instruction->InstructionTypeEquals(left) && right->IsConstant()) { + if (instruction->GetKind() == left->GetKind() && right->IsConstant()) { const2 = right->AsConstant(); y = left->AsBinaryOperation(); - } else if (left->IsConstant() && instruction->InstructionTypeEquals(right)) { + } else if (left->IsConstant() && instruction->GetKind() == right->GetKind()) { const2 = left->AsConstant(); y = right->AsBinaryOperation(); } else { diff --git a/compiler/optimizing/instruction_simplifier.h b/compiler/optimizing/instruction_simplifier.h index 5e2045580b..982a24a6f0 100644 --- a/compiler/optimizing/instruction_simplifier.h +++ b/compiler/optimizing/instruction_simplifier.h @@ -24,7 +24,6 @@ namespace art { class CodeGenerator; -class CompilerDriver; /** * Implements optimizations specific to each instruction. @@ -40,20 +39,17 @@ class InstructionSimplifier : public HOptimization { public: InstructionSimplifier(HGraph* graph, CodeGenerator* codegen, - CompilerDriver* compiler_driver, OptimizingCompilerStats* stats = nullptr, const char* name = kInstructionSimplifierPassName) : HOptimization(graph, name, stats), - codegen_(codegen), - compiler_driver_(compiler_driver) {} + codegen_(codegen) {} static constexpr const char* kInstructionSimplifierPassName = "instruction_simplifier"; - void Run() OVERRIDE; + bool Run() override; private: CodeGenerator* codegen_; - CompilerDriver* compiler_driver_; DISALLOW_COPY_AND_ASSIGN(InstructionSimplifier); }; diff --git a/compiler/optimizing/instruction_simplifier_arm.cc b/compiler/optimizing/instruction_simplifier_arm.cc index 92081e30b1..01e9cff6d8 100644 --- a/compiler/optimizing/instruction_simplifier_arm.cc +++ b/compiler/optimizing/instruction_simplifier_arm.cc @@ -43,11 +43,11 @@ class InstructionSimplifierArmVisitor : public HGraphVisitor { bool TryMergeIntoUsersShifterOperand(HInstruction* instruction); bool TryMergeIntoShifterOperand(HInstruction* use, HInstruction* bitfield_op, bool do_merge); bool CanMergeIntoShifterOperand(HInstruction* use, HInstruction* bitfield_op) { - return TryMergeIntoShifterOperand(use, bitfield_op, /* do_merge */ false); + return TryMergeIntoShifterOperand(use, bitfield_op, /* do_merge= */ false); } bool MergeIntoShifterOperand(HInstruction* use, HInstruction* bitfield_op) { DCHECK(CanMergeIntoShifterOperand(use, bitfield_op)); - return TryMergeIntoShifterOperand(use, bitfield_op, /* do_merge */ true); + return TryMergeIntoShifterOperand(use, bitfield_op, /* do_merge= */ true); } /** @@ -56,7 +56,7 @@ class InstructionSimplifierArmVisitor : public HGraphVisitor { * (2) Since statements can be removed in a "forward" fashion, * the visitor should test if each statement is still there. */ - void VisitBasicBlock(HBasicBlock* block) OVERRIDE { + void VisitBasicBlock(HBasicBlock* block) override { // TODO: fragile iteration, provide more robust iterators? for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { HInstruction* instruction = it.Current(); @@ -66,15 +66,15 @@ class InstructionSimplifierArmVisitor : public HGraphVisitor { } } - void VisitAnd(HAnd* instruction) OVERRIDE; - void VisitArrayGet(HArrayGet* instruction) OVERRIDE; - void VisitArraySet(HArraySet* instruction) OVERRIDE; - void VisitMul(HMul* instruction) OVERRIDE; - void VisitOr(HOr* instruction) OVERRIDE; - void VisitShl(HShl* instruction) OVERRIDE; - void VisitShr(HShr* instruction) OVERRIDE; - void VisitTypeConversion(HTypeConversion* instruction) OVERRIDE; - void VisitUShr(HUShr* instruction) OVERRIDE; + void VisitAnd(HAnd* instruction) override; + void VisitArrayGet(HArrayGet* instruction) override; + void VisitArraySet(HArraySet* instruction) override; + void VisitMul(HMul* instruction) override; + void VisitOr(HOr* instruction) override; + void VisitShl(HShl* instruction) override; + void VisitShr(HShr* instruction) override; + void VisitTypeConversion(HTypeConversion* instruction) override; + void VisitUShr(HUShr* instruction) override; OptimizingCompilerStats* stats_; }; @@ -202,6 +202,11 @@ void InstructionSimplifierArmVisitor::VisitArrayGet(HArrayGet* instruction) { return; } + // TODO: Support intermediate address for object arrays on arm. + if (type == DataType::Type::kReference) { + return; + } + if (type == DataType::Type::kInt64 || type == DataType::Type::kFloat32 || type == DataType::Type::kFloat64) { @@ -283,9 +288,10 @@ void InstructionSimplifierArmVisitor::VisitUShr(HUShr* instruction) { } } -void InstructionSimplifierArm::Run() { +bool InstructionSimplifierArm::Run() { InstructionSimplifierArmVisitor visitor(graph_, stats_); visitor.VisitReversePostOrder(); + return true; } } // namespace arm diff --git a/compiler/optimizing/instruction_simplifier_arm.h b/compiler/optimizing/instruction_simplifier_arm.h index 2f6572931f..fca9341d59 100644 --- a/compiler/optimizing/instruction_simplifier_arm.h +++ b/compiler/optimizing/instruction_simplifier_arm.h @@ -30,7 +30,7 @@ class InstructionSimplifierArm : public HOptimization { static constexpr const char* kInstructionSimplifierArmPassName = "instruction_simplifier_arm"; - void Run() OVERRIDE; + bool Run() override; }; } // namespace arm diff --git a/compiler/optimizing/instruction_simplifier_arm64.cc b/compiler/optimizing/instruction_simplifier_arm64.cc index 1c44e5ac49..e23decbd71 100644 --- a/compiler/optimizing/instruction_simplifier_arm64.cc +++ b/compiler/optimizing/instruction_simplifier_arm64.cc @@ -45,11 +45,11 @@ class InstructionSimplifierArm64Visitor : public HGraphVisitor { HInstruction* bitfield_op, bool do_merge); bool CanMergeIntoShifterOperand(HInstruction* use, HInstruction* bitfield_op) { - return TryMergeIntoShifterOperand(use, bitfield_op, /* do_merge */ false); + return TryMergeIntoShifterOperand(use, bitfield_op, /* do_merge= */ false); } bool MergeIntoShifterOperand(HInstruction* use, HInstruction* bitfield_op) { DCHECK(CanMergeIntoShifterOperand(use, bitfield_op)); - return TryMergeIntoShifterOperand(use, bitfield_op, /* do_merge */ true); + return TryMergeIntoShifterOperand(use, bitfield_op, /* do_merge= */ true); } /** @@ -58,7 +58,7 @@ class InstructionSimplifierArm64Visitor : public HGraphVisitor { * (2) Since statements can be removed in a "forward" fashion, * the visitor should test if each statement is still there. */ - void VisitBasicBlock(HBasicBlock* block) OVERRIDE { + void VisitBasicBlock(HBasicBlock* block) override { // TODO: fragile iteration, provide more robust iterators? for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { HInstruction* instruction = it.Current(); @@ -69,18 +69,18 @@ class InstructionSimplifierArm64Visitor : public HGraphVisitor { } // HInstruction visitors, sorted alphabetically. - void VisitAnd(HAnd* instruction) OVERRIDE; - void VisitArrayGet(HArrayGet* instruction) OVERRIDE; - void VisitArraySet(HArraySet* instruction) OVERRIDE; - void VisitMul(HMul* instruction) OVERRIDE; - void VisitOr(HOr* instruction) OVERRIDE; - void VisitShl(HShl* instruction) OVERRIDE; - void VisitShr(HShr* instruction) OVERRIDE; - void VisitTypeConversion(HTypeConversion* instruction) OVERRIDE; - void VisitUShr(HUShr* instruction) OVERRIDE; - void VisitXor(HXor* instruction) OVERRIDE; - void VisitVecLoad(HVecLoad* instruction) OVERRIDE; - void VisitVecStore(HVecStore* instruction) OVERRIDE; + void VisitAnd(HAnd* instruction) override; + void VisitArrayGet(HArrayGet* instruction) override; + void VisitArraySet(HArraySet* instruction) override; + void VisitMul(HMul* instruction) override; + void VisitOr(HOr* instruction) override; + void VisitShl(HShl* instruction) override; + void VisitShr(HShr* instruction) override; + void VisitTypeConversion(HTypeConversion* instruction) override; + void VisitUShr(HUShr* instruction) override; + void VisitXor(HXor* instruction) override; + void VisitVecLoad(HVecLoad* instruction) override; + void VisitVecStore(HVecStore* instruction) override; OptimizingCompilerStats* stats_; }; @@ -278,9 +278,10 @@ void InstructionSimplifierArm64Visitor::VisitVecStore(HVecStore* instruction) { } } -void InstructionSimplifierArm64::Run() { +bool InstructionSimplifierArm64::Run() { InstructionSimplifierArm64Visitor visitor(graph_, stats_); visitor.VisitReversePostOrder(); + return true; } } // namespace arm64 diff --git a/compiler/optimizing/instruction_simplifier_arm64.h b/compiler/optimizing/instruction_simplifier_arm64.h index d180a8dc46..8d93c01ebf 100644 --- a/compiler/optimizing/instruction_simplifier_arm64.h +++ b/compiler/optimizing/instruction_simplifier_arm64.h @@ -30,7 +30,7 @@ class InstructionSimplifierArm64 : public HOptimization { static constexpr const char* kInstructionSimplifierArm64PassName = "instruction_simplifier_arm64"; - void Run() OVERRIDE; + bool Run() override; }; } // namespace arm64 diff --git a/compiler/optimizing/instruction_simplifier_mips.cc b/compiler/optimizing/instruction_simplifier_mips.cc index fa97401a0c..5d0c63b76b 100644 --- a/compiler/optimizing/instruction_simplifier_mips.cc +++ b/compiler/optimizing/instruction_simplifier_mips.cc @@ -39,8 +39,8 @@ class InstructionSimplifierMipsVisitor : public HGraphVisitor { bool TryExtractArrayAccessIndex(HInstruction* access, HInstruction* index, DataType::Type packed_type); - void VisitArrayGet(HArrayGet* instruction) OVERRIDE; - void VisitArraySet(HArraySet* instruction) OVERRIDE; + void VisitArrayGet(HArrayGet* instruction) override; + void VisitArraySet(HArraySet* instruction) override; OptimizingCompilerStats* stats_; CodeGeneratorMIPS* codegen_; @@ -131,9 +131,10 @@ void InstructionSimplifierMipsVisitor::VisitArraySet(HArraySet* instruction) { } } -void InstructionSimplifierMips::Run() { +bool InstructionSimplifierMips::Run() { InstructionSimplifierMipsVisitor visitor(graph_, codegen_, stats_); visitor.VisitReversePostOrder(); + return true; } } // namespace mips diff --git a/compiler/optimizing/instruction_simplifier_mips.h b/compiler/optimizing/instruction_simplifier_mips.h index 6cb8affe85..b431334811 100644 --- a/compiler/optimizing/instruction_simplifier_mips.h +++ b/compiler/optimizing/instruction_simplifier_mips.h @@ -35,7 +35,7 @@ class InstructionSimplifierMips : public HOptimization { static constexpr const char* kInstructionSimplifierMipsPassName = "instruction_simplifier_mips"; - void Run() OVERRIDE; + bool Run() override; private: CodeGeneratorMIPS* codegen_; diff --git a/compiler/optimizing/instruction_simplifier_shared.cc b/compiler/optimizing/instruction_simplifier_shared.cc index ccdcb3532d..0f30f662cd 100644 --- a/compiler/optimizing/instruction_simplifier_shared.cc +++ b/compiler/optimizing/instruction_simplifier_shared.cc @@ -245,11 +245,11 @@ bool TryExtractArrayAccessAddress(HInstruction* access, return false; } if (kEmitCompilerReadBarrier && + !kUseBakerReadBarrier && access->IsArrayGet() && access->GetType() == DataType::Type::kReference) { - // For object arrays, the read barrier instrumentation requires + // For object arrays, the non-Baker read barrier instrumentation requires // the original array pointer. - // TODO: This can be relaxed for Baker CC. return false; } diff --git a/compiler/optimizing/instruction_simplifier_x86.cc b/compiler/optimizing/instruction_simplifier_x86.cc new file mode 100644 index 0000000000..2d8f94a85b --- /dev/null +++ b/compiler/optimizing/instruction_simplifier_x86.cc @@ -0,0 +1,88 @@ +/* Copyright (C) 2018 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "instruction_simplifier_x86.h" +#include "instruction_simplifier_x86_shared.h" +#include "code_generator_x86.h" + +namespace art { + +namespace x86 { + +class InstructionSimplifierX86Visitor : public HGraphVisitor { + public: + InstructionSimplifierX86Visitor(HGraph* graph, + CodeGenerator* codegen, + OptimizingCompilerStats* stats) + : HGraphVisitor(graph), + codegen_(down_cast<CodeGeneratorX86*>(codegen)), + stats_(stats) {} + + void RecordSimplification() { + MaybeRecordStat(stats_, MethodCompilationStat::kInstructionSimplificationsArch); + } + + bool HasAVX2() { + return (codegen_->GetInstructionSetFeatures().HasAVX2()); + } + + void VisitBasicBlock(HBasicBlock* block) override { + for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { + HInstruction* instruction = it.Current(); + if (instruction->IsInBlock()) { + instruction->Accept(this); + } + } + } + + void VisitAnd(HAnd * instruction) override; + void VisitXor(HXor* instruction) override; + + private: + CodeGeneratorX86* codegen_; + OptimizingCompilerStats* stats_; +}; + + +void InstructionSimplifierX86Visitor::VisitAnd(HAnd* instruction) { + if (TryCombineAndNot(instruction)) { + RecordSimplification(); + } else if (instruction->GetResultType() == DataType::Type::kInt32) { + if (TryGenerateResetLeastSetBit(instruction)) { + RecordSimplification(); + } + } +} + +void InstructionSimplifierX86Visitor::VisitXor(HXor* instruction) { + if (instruction->GetResultType() == DataType::Type::kInt32) { + if (TryGenerateMaskUptoLeastSetBit(instruction)) { + RecordSimplification(); + } + } +} + +bool InstructionSimplifierX86::Run() { + InstructionSimplifierX86Visitor visitor(graph_, codegen_, stats_); + if (visitor.HasAVX2()) { + visitor.VisitReversePostOrder(); + return true; + } + return false; +} + +} // namespace x86 +} // namespace art + diff --git a/compiler/optimizing/instruction_simplifier_x86.h b/compiler/optimizing/instruction_simplifier_x86.h new file mode 100644 index 0000000000..6f10006db2 --- /dev/null +++ b/compiler/optimizing/instruction_simplifier_x86.h @@ -0,0 +1,44 @@ +/*Copyright (C) 2018 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_X86_H_ +#define ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_X86_H_ + +#include "nodes.h" +#include "optimization.h" + +namespace art { + +class CodeGenerator; +namespace x86 { + +class InstructionSimplifierX86 : public HOptimization { + public: + InstructionSimplifierX86(HGraph* graph, CodeGenerator* codegen, OptimizingCompilerStats* stats) + : HOptimization(graph, kInstructionSimplifierX86PassName, stats), + codegen_(codegen) {} + + static constexpr const char* kInstructionSimplifierX86PassName = "instruction_simplifier_x86"; + + bool Run() override; + + private: + CodeGenerator* codegen_; +}; + +} // namespace x86 +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_X86_H_ diff --git a/compiler/optimizing/instruction_simplifier_x86_64.cc b/compiler/optimizing/instruction_simplifier_x86_64.cc new file mode 100644 index 0000000000..56c6b414d7 --- /dev/null +++ b/compiler/optimizing/instruction_simplifier_x86_64.cc @@ -0,0 +1,82 @@ +/* Copyright (C) 2018 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "instruction_simplifier_x86_64.h" +#include "instruction_simplifier_x86_shared.h" +#include "code_generator_x86_64.h" + +namespace art { + +namespace x86_64 { + +class InstructionSimplifierX86_64Visitor : public HGraphVisitor { + public: + InstructionSimplifierX86_64Visitor(HGraph* graph, + CodeGenerator* codegen, + OptimizingCompilerStats* stats) + : HGraphVisitor(graph), + codegen_(down_cast<CodeGeneratorX86_64*>(codegen)), + stats_(stats) {} + + void RecordSimplification() { + MaybeRecordStat(stats_, MethodCompilationStat::kInstructionSimplificationsArch); + } + + bool HasAVX2() { + return codegen_->GetInstructionSetFeatures().HasAVX2(); + } + + void VisitBasicBlock(HBasicBlock* block) override { + for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { + HInstruction* instruction = it.Current(); + if (instruction->IsInBlock()) { + instruction->Accept(this); + } + } + } + + void VisitAnd(HAnd* instruction) override; + void VisitXor(HXor* instruction) override; + + private: + CodeGeneratorX86_64* codegen_; + OptimizingCompilerStats* stats_; +}; + +void InstructionSimplifierX86_64Visitor::VisitAnd(HAnd* instruction) { + if (TryCombineAndNot(instruction)) { + RecordSimplification(); + } else if (TryGenerateResetLeastSetBit(instruction)) { + RecordSimplification(); + } +} + + +void InstructionSimplifierX86_64Visitor::VisitXor(HXor* instruction) { + if (TryGenerateMaskUptoLeastSetBit(instruction)) { + RecordSimplification(); + } +} + +bool InstructionSimplifierX86_64::Run() { + InstructionSimplifierX86_64Visitor visitor(graph_, codegen_, stats_); + if (visitor.HasAVX2()) { + visitor.VisitReversePostOrder(); + return true; + } + return false; +} +} // namespace x86_64 +} // namespace art diff --git a/compiler/optimizing/instruction_simplifier_x86_64.h b/compiler/optimizing/instruction_simplifier_x86_64.h new file mode 100644 index 0000000000..6cae24d11a --- /dev/null +++ b/compiler/optimizing/instruction_simplifier_x86_64.h @@ -0,0 +1,48 @@ +/* Copyright (C) 2018 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_X86_64_H_ +#define ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_X86_64_H_ + +#include "nodes.h" +#include "optimization.h" + +namespace art { + +class CodeGenerator; + +namespace x86_64 { + +class InstructionSimplifierX86_64 : public HOptimization { + public: + InstructionSimplifierX86_64(HGraph* graph, CodeGenerator* codegen, OptimizingCompilerStats* stats) + : HOptimization(graph, kInstructionSimplifierX86_64PassName, stats), + codegen_(codegen) {} + + static constexpr const char* kInstructionSimplifierX86_64PassName = + "instruction_simplifier_x86_64"; + + bool Run() override; + + private: + CodeGenerator* codegen_; +}; + +} // namespace x86_64 +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_X86_64_H_ + + diff --git a/compiler/optimizing/instruction_simplifier_x86_shared.cc b/compiler/optimizing/instruction_simplifier_x86_shared.cc new file mode 100644 index 0000000000..2805abb2bb --- /dev/null +++ b/compiler/optimizing/instruction_simplifier_x86_shared.cc @@ -0,0 +1,137 @@ +/* Copyright (C) 2018 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "instruction_simplifier_x86_shared.h" +#include "nodes_x86.h" + +namespace art { + +bool TryCombineAndNot(HAnd* instruction) { + DataType::Type type = instruction->GetType(); + if (!DataType::IsIntOrLongType(type)) { + return false; + } + // Replace code looking like + // Not tmp, y + // And dst, x, tmp + // with + // AndNot dst, x, y + HInstruction* left = instruction->GetLeft(); + HInstruction* right = instruction->GetRight(); + // Perform simplication only when either left or right + // is Not. When both are Not, instruction should be simplified with + // DeMorgan's Laws. + if (left->IsNot() ^ right->IsNot()) { + bool left_is_not = left->IsNot(); + HInstruction* other_ins = (left_is_not ? right : left); + HNot* not_ins = (left_is_not ? left : right)->AsNot(); + // Only do the simplification if instruction has only one use + // and thus can be safely removed. + if (not_ins->HasOnlyOneNonEnvironmentUse()) { + ArenaAllocator* arena = instruction->GetBlock()->GetGraph()->GetAllocator(); + HX86AndNot* and_not = new (arena) HX86AndNot(type, + not_ins->GetInput(), + other_ins, + instruction->GetDexPc()); + instruction->GetBlock()->ReplaceAndRemoveInstructionWith(instruction, and_not); + DCHECK(!not_ins->HasUses()); + not_ins->GetBlock()->RemoveInstruction(not_ins); + return true; + } + } + return false; +} + +bool TryGenerateResetLeastSetBit(HAnd* instruction) { + DataType::Type type = instruction->GetType(); + if (!DataType::IsIntOrLongType(type)) { + return false; + } + // Replace code looking like + // Add tmp, x, -1 or Sub tmp, x, 1 + // And dest x, tmp + // with + // MaskOrResetLeastSetBit dest, x + HInstruction* candidate = nullptr; + HInstruction* other = nullptr; + HInstruction* left = instruction->GetLeft(); + HInstruction* right = instruction->GetRight(); + if (AreLeastSetBitInputs(left, right)) { + candidate = left; + other = right; + } else if (AreLeastSetBitInputs(right, left)) { + candidate = right; + other = left; + } + if (candidate != nullptr && candidate->HasOnlyOneNonEnvironmentUse()) { + ArenaAllocator* arena = instruction->GetBlock()->GetGraph()->GetAllocator(); + HX86MaskOrResetLeastSetBit* lsb = new (arena) HX86MaskOrResetLeastSetBit( + type, HInstruction::kAnd, other, instruction->GetDexPc()); + instruction->GetBlock()->ReplaceAndRemoveInstructionWith(instruction, lsb); + DCHECK(!candidate->HasUses()); + candidate->GetBlock()->RemoveInstruction(candidate); + return true; + } + return false; +} + +bool TryGenerateMaskUptoLeastSetBit(HXor* instruction) { + DataType::Type type = instruction->GetType(); + if (!DataType::IsIntOrLongType(type)) { + return false; + } + // Replace code looking like + // Add tmp, x, -1 or Sub tmp, x, 1 + // Xor dest x, tmp + // with + // MaskOrResetLeastSetBit dest, x + HInstruction* left = instruction->GetLeft(); + HInstruction* right = instruction->GetRight(); + HInstruction* other = nullptr; + HInstruction* candidate = nullptr; + if (AreLeastSetBitInputs(left, right)) { + candidate = left; + other = right; + } else if (AreLeastSetBitInputs(right, left)) { + candidate = right; + other = left; + } + if (candidate != nullptr && candidate->HasOnlyOneNonEnvironmentUse()) { + ArenaAllocator* arena = instruction->GetBlock()->GetGraph()->GetAllocator(); + HX86MaskOrResetLeastSetBit* lsb = new (arena) HX86MaskOrResetLeastSetBit( + type, HInstruction::kXor, other, instruction->GetDexPc()); + instruction->GetBlock()->ReplaceAndRemoveInstructionWith(instruction, lsb); + DCHECK(!candidate->HasUses()); + candidate->GetBlock()->RemoveInstruction(candidate); + return true; + } + return false; +} + +bool AreLeastSetBitInputs(HInstruction* to_test, HInstruction* other) { + if (to_test->IsAdd()) { + HAdd* add = to_test->AsAdd(); + HConstant* cst = add->GetConstantRight(); + return cst != nullptr && cst->IsMinusOne() && other == add->GetLeastConstantLeft(); + } + if (to_test->IsSub()) { + HSub* sub = to_test->AsSub(); + HConstant* cst = sub->GetConstantRight(); + return cst != nullptr && cst->IsOne() && other == sub->GetLeastConstantLeft(); + } + return false; +} + +} // namespace art diff --git a/compiler/linker/output_stream.cc b/compiler/optimizing/instruction_simplifier_x86_shared.h index f5a19138a5..7f94d7ea4c 100644 --- a/compiler/linker/output_stream.cc +++ b/compiler/optimizing/instruction_simplifier_x86_shared.h @@ -1,5 +1,4 @@ -/* - * Copyright (C) 2014 The Android Open Source Project +/* Copyright (C) 2018 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,20 +13,17 @@ * limitations under the License. */ -#include "output_stream.h" +#ifndef ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_X86_SHARED_H_ +#define ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_X86_SHARED_H_ + +#include "nodes.h" namespace art { -namespace linker { +bool TryCombineAndNot(HAnd* instruction); +bool TryGenerateResetLeastSetBit(HAnd* instruction); +bool TryGenerateMaskUptoLeastSetBit(HXor* instruction); +bool AreLeastSetBitInputs(HInstruction* to_test, HInstruction* other); +} // namespace art -std::ostream& operator<<(std::ostream& os, const Whence& rhs) { - switch (rhs) { - case kSeekSet: os << "SEEK_SET"; break; - case kSeekCurrent: os << "SEEK_CUR"; break; - case kSeekEnd: os << "SEEK_END"; break; - default: UNREACHABLE(); - } - return os; -} +#endif // ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_X86_SHARED_H_ -} // namespace linker -} // namespace art diff --git a/compiler/optimizing/intrinsic_objects.cc b/compiler/optimizing/intrinsic_objects.cc new file mode 100644 index 0000000000..c345624a7a --- /dev/null +++ b/compiler/optimizing/intrinsic_objects.cc @@ -0,0 +1,121 @@ +/* + * Copyright (C) 2018 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "intrinsic_objects.h" + +#include "art_field-inl.h" +#include "base/logging.h" +#include "class_root.h" +#include "handle.h" +#include "obj_ptr-inl.h" +#include "mirror/object_array-alloc-inl.h" +#include "mirror/object_array-inl.h" + +namespace art { + +static ObjPtr<mirror::ObjectArray<mirror::Object>> LookupIntegerCache(Thread* self, + ClassLinker* class_linker) + REQUIRES_SHARED(Locks::mutator_lock_) { + ObjPtr<mirror::Class> integer_cache_class = class_linker->LookupClass( + self, "Ljava/lang/Integer$IntegerCache;", /* class_loader= */ nullptr); + if (integer_cache_class == nullptr || !integer_cache_class->IsInitialized()) { + return nullptr; + } + ArtField* cache_field = + integer_cache_class->FindDeclaredStaticField("cache", "[Ljava/lang/Integer;"); + CHECK(cache_field != nullptr); + ObjPtr<mirror::ObjectArray<mirror::Object>> integer_cache = + ObjPtr<mirror::ObjectArray<mirror::Object>>::DownCast( + cache_field->GetObject(integer_cache_class)); + CHECK(integer_cache != nullptr); + return integer_cache; +} + +ObjPtr<mirror::ObjectArray<mirror::Object>> IntrinsicObjects::AllocateBootImageLiveObjects( + Thread* self, + ClassLinker* class_linker) REQUIRES_SHARED(Locks::mutator_lock_) { + // The objects used for the Integer.valueOf() intrinsic must remain live even if references + // to them are removed using reflection. Image roots are not accessible through reflection, + // so the array we construct here shall keep them alive. + StackHandleScope<1> hs(self); + Handle<mirror::ObjectArray<mirror::Object>> integer_cache = + hs.NewHandle(LookupIntegerCache(self, class_linker)); + size_t live_objects_size = + (integer_cache != nullptr) ? (/* cache */ 1u + integer_cache->GetLength()) : 0u; + ObjPtr<mirror::ObjectArray<mirror::Object>> live_objects = + mirror::ObjectArray<mirror::Object>::Alloc( + self, GetClassRoot<mirror::ObjectArray<mirror::Object>>(class_linker), live_objects_size); + int32_t index = 0; + if (integer_cache != nullptr) { + live_objects->Set(index++, integer_cache.Get()); + for (int32_t i = 0, length = integer_cache->GetLength(); i != length; ++i) { + live_objects->Set(index++, integer_cache->Get(i)); + } + } + CHECK_EQ(index, live_objects->GetLength()); + + if (kIsDebugBuild && integer_cache != nullptr) { + CHECK_EQ(integer_cache.Get(), GetIntegerValueOfCache(live_objects)); + for (int32_t i = 0, len = integer_cache->GetLength(); i != len; ++i) { + CHECK_EQ(integer_cache->GetWithoutChecks(i), GetIntegerValueOfObject(live_objects, i)); + } + } + return live_objects; +} + +ObjPtr<mirror::ObjectArray<mirror::Object>> IntrinsicObjects::GetIntegerValueOfCache( + ObjPtr<mirror::ObjectArray<mirror::Object>> boot_image_live_objects) { + DCHECK(boot_image_live_objects != nullptr); + if (boot_image_live_objects->GetLength() == 0u) { + return nullptr; // No intrinsic objects. + } + // No need for read barrier for boot image object or for verifying the value that was just stored. + ObjPtr<mirror::Object> result = + boot_image_live_objects->GetWithoutChecks<kVerifyNone, kWithoutReadBarrier>(0); + DCHECK(result != nullptr); + DCHECK(result->IsObjectArray()); + DCHECK(result->GetClass()->DescriptorEquals("[Ljava/lang/Integer;")); + return ObjPtr<mirror::ObjectArray<mirror::Object>>::DownCast(result); +} + +ObjPtr<mirror::Object> IntrinsicObjects::GetIntegerValueOfObject( + ObjPtr<mirror::ObjectArray<mirror::Object>> boot_image_live_objects, + uint32_t index) { + DCHECK(boot_image_live_objects != nullptr); + DCHECK_NE(boot_image_live_objects->GetLength(), 0); + DCHECK_LT(index, + static_cast<uint32_t>(GetIntegerValueOfCache(boot_image_live_objects)->GetLength())); + + // No need for read barrier for boot image object or for verifying the value that was just stored. + ObjPtr<mirror::Object> result = + boot_image_live_objects->GetWithoutChecks<kVerifyNone, kWithoutReadBarrier>( + /* skip the IntegerCache.cache */ 1u + index); + DCHECK(result != nullptr); + DCHECK(result->GetClass()->DescriptorEquals("Ljava/lang/Integer;")); + return result; +} + +MemberOffset IntrinsicObjects::GetIntegerValueOfArrayDataOffset( + ObjPtr<mirror::ObjectArray<mirror::Object>> boot_image_live_objects) { + DCHECK_NE(boot_image_live_objects->GetLength(), 0); + MemberOffset result = mirror::ObjectArray<mirror::Object>::OffsetOfElement(1u); + DCHECK_EQ(GetIntegerValueOfObject(boot_image_live_objects, 0u), + (boot_image_live_objects + ->GetFieldObject<mirror::Object, kVerifyNone, kWithoutReadBarrier>(result))); + return result; +} + +} // namespace art diff --git a/compiler/optimizing/intrinsic_objects.h b/compiler/optimizing/intrinsic_objects.h new file mode 100644 index 0000000000..863017be38 --- /dev/null +++ b/compiler/optimizing/intrinsic_objects.h @@ -0,0 +1,83 @@ +/* + * Copyright (C) 2018 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_INTRINSIC_OBJECTS_H_ +#define ART_COMPILER_OPTIMIZING_INTRINSIC_OBJECTS_H_ + +#include "base/bit_field.h" +#include "base/bit_utils.h" +#include "base/mutex.h" + +namespace art { + +class ClassLinker; +template <class MirrorType> class ObjPtr; +class MemberOffset; +class Thread; + +namespace mirror { +class Object; +template <class T> class ObjectArray; +} // namespace mirror + +class IntrinsicObjects { + public: + enum class PatchType { + kIntegerValueOfObject, + kIntegerValueOfArray, + + kLast = kIntegerValueOfArray + }; + + static uint32_t EncodePatch(PatchType patch_type, uint32_t index = 0u) { + DCHECK(patch_type == PatchType::kIntegerValueOfObject || index == 0u); + return PatchTypeField::Encode(static_cast<uint32_t>(patch_type)) | IndexField::Encode(index); + } + + static PatchType DecodePatchType(uint32_t intrinsic_data) { + return static_cast<PatchType>(PatchTypeField::Decode(intrinsic_data)); + } + + static uint32_t DecodePatchIndex(uint32_t intrinsic_data) { + return IndexField::Decode(intrinsic_data); + } + + static ObjPtr<mirror::ObjectArray<mirror::Object>> AllocateBootImageLiveObjects( + Thread* self, + ClassLinker* class_linker) REQUIRES_SHARED(Locks::mutator_lock_); + + // Functions for retrieving data for Integer.valueOf(). + static ObjPtr<mirror::ObjectArray<mirror::Object>> GetIntegerValueOfCache( + ObjPtr<mirror::ObjectArray<mirror::Object>> boot_image_live_objects) + REQUIRES_SHARED(Locks::mutator_lock_); + static ObjPtr<mirror::Object> GetIntegerValueOfObject( + ObjPtr<mirror::ObjectArray<mirror::Object>> boot_image_live_objects, + uint32_t index) REQUIRES_SHARED(Locks::mutator_lock_); + static MemberOffset GetIntegerValueOfArrayDataOffset( + ObjPtr<mirror::ObjectArray<mirror::Object>> boot_image_live_objects) + REQUIRES_SHARED(Locks::mutator_lock_); + + private: + static constexpr size_t kPatchTypeBits = + MinimumBitsToStore(static_cast<uint32_t>(PatchType::kLast)); + static constexpr size_t kIndexBits = BitSizeOf<uint32_t>() - kPatchTypeBits; + using PatchTypeField = BitField<uint32_t, 0u, kPatchTypeBits>; + using IndexField = BitField<uint32_t, kPatchTypeBits, kIndexBits>; +}; + +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_INTRINSIC_OBJECTS_H_ diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc index f8dc316e45..d9401050df 100644 --- a/compiler/optimizing/intrinsics.cc +++ b/compiler/optimizing/intrinsics.cc @@ -20,309 +20,355 @@ #include "art_method-inl.h" #include "base/utils.h" #include "class_linker.h" +#include "class_root.h" #include "dex/invoke_type.h" -#include "driver/compiler_driver.h" #include "driver/compiler_options.h" -#include "mirror/dex_cache-inl.h" +#include "gc/space/image_space.h" +#include "image-inl.h" +#include "intrinsic_objects.h" #include "nodes.h" +#include "obj_ptr-inl.h" #include "scoped_thread_state_change-inl.h" #include "thread-current-inl.h" namespace art { -// Check that intrinsic enum values fit within space set aside in ArtMethod modifier flags. -#define CHECK_INTRINSICS_ENUM_VALUES(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ - static_assert( \ - static_cast<uint32_t>(Intrinsics::k ## Name) <= (kAccIntrinsicBits >> CTZ(kAccIntrinsicBits)), \ - "Instrinsics enumeration space overflow."); -#include "intrinsics_list.h" - INTRINSICS_LIST(CHECK_INTRINSICS_ENUM_VALUES) -#undef INTRINSICS_LIST -#undef CHECK_INTRINSICS_ENUM_VALUES - -// Function that returns whether an intrinsic is static/direct or virtual. -static inline InvokeType GetIntrinsicInvokeType(Intrinsics i) { - switch (i) { +std::ostream& operator<<(std::ostream& os, const Intrinsics& intrinsic) { + switch (intrinsic) { case Intrinsics::kNone: - return kInterface; // Non-sensical for intrinsic. + os << "None"; + break; #define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ case Intrinsics::k ## Name: \ - return IsStatic; + os << # Name; \ + break; #include "intrinsics_list.h" INTRINSICS_LIST(OPTIMIZING_INTRINSICS) -#undef INTRINSICS_LIST +#undef STATIC_INTRINSICS_LIST +#undef VIRTUAL_INTRINSICS_LIST #undef OPTIMIZING_INTRINSICS } - return kInterface; + return os; } -// Function that returns whether an intrinsic needs an environment or not. -static inline IntrinsicNeedsEnvironmentOrCache NeedsEnvironmentOrCache(Intrinsics i) { - switch (i) { - case Intrinsics::kNone: - return kNeedsEnvironmentOrCache; // Non-sensical for intrinsic. -#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ - case Intrinsics::k ## Name: \ - return NeedsEnvironmentOrCache; -#include "intrinsics_list.h" - INTRINSICS_LIST(OPTIMIZING_INTRINSICS) -#undef INTRINSICS_LIST -#undef OPTIMIZING_INTRINSICS - } - return kNeedsEnvironmentOrCache; +static const char kIntegerCacheDescriptor[] = "Ljava/lang/Integer$IntegerCache;"; +static const char kIntegerDescriptor[] = "Ljava/lang/Integer;"; +static const char kIntegerArrayDescriptor[] = "[Ljava/lang/Integer;"; +static const char kLowFieldName[] = "low"; +static const char kHighFieldName[] = "high"; +static const char kValueFieldName[] = "value"; + +static ObjPtr<mirror::ObjectArray<mirror::Object>> GetBootImageLiveObjects() + REQUIRES_SHARED(Locks::mutator_lock_) { + gc::Heap* heap = Runtime::Current()->GetHeap(); + const std::vector<gc::space::ImageSpace*>& boot_image_spaces = heap->GetBootImageSpaces(); + DCHECK(!boot_image_spaces.empty()); + const ImageHeader& main_header = boot_image_spaces[0]->GetImageHeader(); + ObjPtr<mirror::ObjectArray<mirror::Object>> boot_image_live_objects = + ObjPtr<mirror::ObjectArray<mirror::Object>>::DownCast( + main_header.GetImageRoot<kWithoutReadBarrier>(ImageHeader::kBootImageLiveObjects)); + DCHECK(boot_image_live_objects != nullptr); + DCHECK(heap->ObjectIsInBootImageSpace(boot_image_live_objects)); + return boot_image_live_objects; } -// Function that returns whether an intrinsic has side effects. -static inline IntrinsicSideEffects GetSideEffects(Intrinsics i) { - switch (i) { - case Intrinsics::kNone: - return kAllSideEffects; -#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ - case Intrinsics::k ## Name: \ - return SideEffects; -#include "intrinsics_list.h" - INTRINSICS_LIST(OPTIMIZING_INTRINSICS) -#undef INTRINSICS_LIST -#undef OPTIMIZING_INTRINSICS - } - return kAllSideEffects; +static ObjPtr<mirror::Class> LookupInitializedClass(Thread* self, + ClassLinker* class_linker, + const char* descriptor) + REQUIRES_SHARED(Locks::mutator_lock_) { + ObjPtr<mirror::Class> klass = + class_linker->LookupClass(self, descriptor, /* class_loader= */ nullptr); + DCHECK(klass != nullptr); + DCHECK(klass->IsInitialized()); + return klass; } -// Function that returns whether an intrinsic can throw exceptions. -static inline IntrinsicExceptions GetExceptions(Intrinsics i) { - switch (i) { - case Intrinsics::kNone: - return kCanThrow; -#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ - case Intrinsics::k ## Name: \ - return Exceptions; -#include "intrinsics_list.h" - INTRINSICS_LIST(OPTIMIZING_INTRINSICS) -#undef INTRINSICS_LIST -#undef OPTIMIZING_INTRINSICS - } - return kCanThrow; +static ObjPtr<mirror::ObjectArray<mirror::Object>> GetIntegerCacheArray( + ObjPtr<mirror::Class> cache_class) REQUIRES_SHARED(Locks::mutator_lock_) { + ArtField* cache_field = cache_class->FindDeclaredStaticField("cache", kIntegerArrayDescriptor); + DCHECK(cache_field != nullptr); + return ObjPtr<mirror::ObjectArray<mirror::Object>>::DownCast(cache_field->GetObject(cache_class)); } -static bool CheckInvokeType(Intrinsics intrinsic, HInvoke* invoke) +static int32_t GetIntegerCacheField(ObjPtr<mirror::Class> cache_class, const char* field_name) REQUIRES_SHARED(Locks::mutator_lock_) { - // Whenever the intrinsic is marked as static, report an error if we find an InvokeVirtual. - // - // Whenever the intrinsic is marked as direct and we find an InvokeVirtual, a devirtualization - // failure occured. We might be in a situation where we have inlined a method that calls an - // intrinsic, but that method is in a different dex file on which we do not have a - // verified_method that would have helped the compiler driver sharpen the call. In that case, - // make sure that the intrinsic is actually for some final method (or in a final class), as - // otherwise the intrinsics setup is broken. - // - // For the last direction, we have intrinsics for virtual functions that will perform a check - // inline. If the precise type is known, however, the instruction will be sharpened to an - // InvokeStaticOrDirect. - InvokeType intrinsic_type = GetIntrinsicInvokeType(intrinsic); - InvokeType invoke_type = invoke->GetInvokeType(); - - switch (intrinsic_type) { - case kStatic: - return (invoke_type == kStatic); - - case kDirect: - if (invoke_type == kDirect) { - return true; - } - if (invoke_type == kVirtual) { - ArtMethod* art_method = invoke->GetResolvedMethod(); - return (art_method->IsFinal() || art_method->GetDeclaringClass()->IsFinal()); - } - return false; - - case kVirtual: - // Call might be devirtualized. - return (invoke_type == kVirtual || invoke_type == kDirect || invoke_type == kInterface); - - case kSuper: - case kInterface: - case kPolymorphic: - return false; - } - LOG(FATAL) << "Unknown intrinsic invoke type: " << intrinsic_type; - UNREACHABLE(); + ArtField* field = cache_class->FindDeclaredStaticField(field_name, "I"); + DCHECK(field != nullptr); + return field->GetInt(cache_class); } -bool IntrinsicsRecognizer::Recognize(HInvoke* invoke, - ArtMethod* art_method, - /*out*/ bool* wrong_invoke_type) { - if (art_method == nullptr) { - art_method = invoke->GetResolvedMethod(); - } - *wrong_invoke_type = false; - if (art_method == nullptr || !art_method->IsIntrinsic()) { - return false; - } - - // TODO: b/65872996 The intent is that polymorphic signature methods should - // be compiler intrinsics. At present, they are only interpreter intrinsics. - if (art_method->IsPolymorphicSignature()) { - return false; +static bool CheckIntegerCache(Thread* self, + ClassLinker* class_linker, + ObjPtr<mirror::ObjectArray<mirror::Object>> boot_image_live_objects, + ObjPtr<mirror::ObjectArray<mirror::Object>> boot_image_cache) + REQUIRES_SHARED(Locks::mutator_lock_) { + DCHECK(boot_image_cache != nullptr); + + // Since we have a cache in the boot image, both java.lang.Integer and + // java.lang.Integer$IntegerCache must be initialized in the boot image. + ObjPtr<mirror::Class> cache_class = + LookupInitializedClass(self, class_linker, kIntegerCacheDescriptor); + ObjPtr<mirror::Class> integer_class = + LookupInitializedClass(self, class_linker, kIntegerDescriptor); + + // Check that the current cache is the same as the `boot_image_cache`. + ObjPtr<mirror::ObjectArray<mirror::Object>> current_cache = GetIntegerCacheArray(cache_class); + if (current_cache != boot_image_cache) { + return false; // Messed up IntegerCache.cache. } - Intrinsics intrinsic = static_cast<Intrinsics>(art_method->GetIntrinsic()); - if (CheckInvokeType(intrinsic, invoke) == false) { - *wrong_invoke_type = true; - return false; + // Check that the range matches the boot image cache length. + int32_t low = GetIntegerCacheField(cache_class, kLowFieldName); + int32_t high = GetIntegerCacheField(cache_class, kHighFieldName); + if (boot_image_cache->GetLength() != high - low + 1) { + return false; // Messed up IntegerCache.low or IntegerCache.high. } - invoke->SetIntrinsic(intrinsic, - NeedsEnvironmentOrCache(intrinsic), - GetSideEffects(intrinsic), - GetExceptions(intrinsic)); - return true; -} - -void IntrinsicsRecognizer::Run() { - ScopedObjectAccess soa(Thread::Current()); - for (HBasicBlock* block : graph_->GetReversePostOrder()) { - for (HInstructionIterator inst_it(block->GetInstructions()); !inst_it.Done(); - inst_it.Advance()) { - HInstruction* inst = inst_it.Current(); - if (inst->IsInvoke()) { - bool wrong_invoke_type = false; - if (Recognize(inst->AsInvoke(), /* art_method */ nullptr, &wrong_invoke_type)) { - MaybeRecordStat(stats_, MethodCompilationStat::kIntrinsicRecognized); - } else if (wrong_invoke_type) { - LOG(WARNING) - << "Found an intrinsic with unexpected invoke type: " - << inst->AsInvoke()->GetResolvedMethod()->PrettyMethod() << " " - << inst->DebugName(); - } - } + // Check that the elements match the boot image intrinsic objects and check their values as well. + ArtField* value_field = integer_class->FindDeclaredInstanceField(kValueFieldName, "I"); + DCHECK(value_field != nullptr); + for (int32_t i = 0, len = boot_image_cache->GetLength(); i != len; ++i) { + ObjPtr<mirror::Object> boot_image_object = + IntrinsicObjects::GetIntegerValueOfObject(boot_image_live_objects, i); + DCHECK(Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(boot_image_object)); + // No need for read barrier for comparison with a boot image object. + ObjPtr<mirror::Object> current_object = + boot_image_cache->GetWithoutChecks<kVerifyNone, kWithoutReadBarrier>(i); + if (boot_image_object != current_object) { + return false; // Messed up IntegerCache.cache[i] + } + if (value_field->GetInt(boot_image_object) != low + i) { + return false; // Messed up IntegerCache.cache[i].value. } } -} -std::ostream& operator<<(std::ostream& os, const Intrinsics& intrinsic) { - switch (intrinsic) { - case Intrinsics::kNone: - os << "None"; - break; -#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ - case Intrinsics::k ## Name: \ - os << # Name; \ - break; -#include "intrinsics_list.h" - INTRINSICS_LIST(OPTIMIZING_INTRINSICS) -#undef STATIC_INTRINSICS_LIST -#undef VIRTUAL_INTRINSICS_LIST -#undef OPTIMIZING_INTRINSICS - } - return os; + return true; } void IntrinsicVisitor::ComputeIntegerValueOfLocations(HInvoke* invoke, CodeGenerator* codegen, Location return_location, Location first_argument_location) { - if (Runtime::Current()->IsAotCompiler()) { - if (codegen->GetCompilerOptions().IsBootImage() || - codegen->GetCompilerOptions().GetCompilePic()) { - // TODO(ngeoffray): Support boot image compilation. + // The intrinsic will call if it needs to allocate a j.l.Integer. + LocationSummary::CallKind call_kind = LocationSummary::kCallOnMainOnly; + const CompilerOptions& compiler_options = codegen->GetCompilerOptions(); + if (compiler_options.IsBootImage()) { + // Piggyback on the method load kind to determine whether we can use PC-relative addressing. + // This should cover both the testing config (non-PIC boot image) and codegens that reject + // PC-relative load kinds and fall back to the runtime call. + if (!invoke->AsInvokeStaticOrDirect()->HasPcRelativeMethodLoadKind()) { + return; + } + if (!compiler_options.IsImageClass(kIntegerCacheDescriptor) || + !compiler_options.IsImageClass(kIntegerDescriptor)) { + return; + } + ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); + Thread* self = Thread::Current(); + ScopedObjectAccess soa(self); + ObjPtr<mirror::Class> cache_class = class_linker->LookupClass( + self, kIntegerCacheDescriptor, /* class_loader= */ nullptr); + DCHECK(cache_class != nullptr); + if (UNLIKELY(!cache_class->IsInitialized())) { + LOG(WARNING) << "Image class " << cache_class->PrettyDescriptor() << " is uninitialized."; + return; + } + ObjPtr<mirror::Class> integer_class = + class_linker->LookupClass(self, kIntegerDescriptor, /* class_loader= */ nullptr); + DCHECK(integer_class != nullptr); + if (UNLIKELY(!integer_class->IsInitialized())) { + LOG(WARNING) << "Image class " << integer_class->PrettyDescriptor() << " is uninitialized."; return; } + int32_t low = GetIntegerCacheField(cache_class, kLowFieldName); + int32_t high = GetIntegerCacheField(cache_class, kHighFieldName); + if (kIsDebugBuild) { + ObjPtr<mirror::ObjectArray<mirror::Object>> current_cache = GetIntegerCacheArray(cache_class); + CHECK(current_cache != nullptr); + CHECK_EQ(current_cache->GetLength(), high - low + 1); + ArtField* value_field = integer_class->FindDeclaredInstanceField(kValueFieldName, "I"); + CHECK(value_field != nullptr); + for (int32_t i = 0, len = current_cache->GetLength(); i != len; ++i) { + ObjPtr<mirror::Object> current_object = current_cache->GetWithoutChecks(i); + CHECK(current_object != nullptr); + CHECK_EQ(value_field->GetInt(current_object), low + i); + } + } + if (invoke->InputAt(0)->IsIntConstant()) { + int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue(); + if (static_cast<uint32_t>(value) - static_cast<uint32_t>(low) < + static_cast<uint32_t>(high - low + 1)) { + // No call, we shall use direct pointer to the Integer object. + call_kind = LocationSummary::kNoCall; + } + } + } else { + Runtime* runtime = Runtime::Current(); + if (runtime->GetHeap()->GetBootImageSpaces().empty()) { + return; // Running without boot image, cannot use required boot image objects. + } + Thread* self = Thread::Current(); + ScopedObjectAccess soa(self); + ObjPtr<mirror::ObjectArray<mirror::Object>> boot_image_live_objects = GetBootImageLiveObjects(); + ObjPtr<mirror::ObjectArray<mirror::Object>> cache = + IntrinsicObjects::GetIntegerValueOfCache(boot_image_live_objects); + if (cache == nullptr) { + return; // No cache in the boot image. + } + if (runtime->UseJitCompilation()) { + if (!CheckIntegerCache(self, runtime->GetClassLinker(), boot_image_live_objects, cache)) { + return; // The cache was somehow messed up, probably by using reflection. + } + } else { + DCHECK(runtime->IsAotCompiler()); + DCHECK(CheckIntegerCache(self, runtime->GetClassLinker(), boot_image_live_objects, cache)); + if (invoke->InputAt(0)->IsIntConstant()) { + int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue(); + // Retrieve the `value` from the lowest cached Integer. + ObjPtr<mirror::Object> low_integer = + IntrinsicObjects::GetIntegerValueOfObject(boot_image_live_objects, 0u); + ObjPtr<mirror::Class> integer_class = + low_integer->GetClass<kVerifyNone, kWithoutReadBarrier>(); + ArtField* value_field = integer_class->FindDeclaredInstanceField(kValueFieldName, "I"); + DCHECK(value_field != nullptr); + int32_t low = value_field->GetInt(low_integer); + if (static_cast<uint32_t>(value) - static_cast<uint32_t>(low) < + static_cast<uint32_t>(cache->GetLength())) { + // No call, we shall use direct pointer to the Integer object. Note that we cannot + // do this for JIT as the "low" can change through reflection before emitting the code. + call_kind = LocationSummary::kNoCall; + } + } + } } - IntegerValueOfInfo info = ComputeIntegerValueOfInfo(); - - // Most common case is that we have found all we needed (classes are initialized - // and in the boot image). Bail if not. - if (info.integer_cache == nullptr || - info.integer == nullptr || - info.cache == nullptr || - info.value_offset == 0 || - // low and high cannot be 0, per the spec. - info.low == 0 || - info.high == 0) { - LOG(INFO) << "Integer.valueOf will not be optimized"; - return; + ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator(); + LocationSummary* locations = new (allocator) LocationSummary(invoke, call_kind, kIntrinsified); + if (call_kind == LocationSummary::kCallOnMainOnly) { + locations->SetInAt(0, Location::RegisterOrConstant(invoke->InputAt(0))); + locations->AddTemp(first_argument_location); + locations->SetOut(return_location); + } else { + locations->SetInAt(0, Location::ConstantLocation(invoke->InputAt(0)->AsConstant())); + locations->SetOut(Location::RequiresRegister()); } +} - // The intrinsic will call if it needs to allocate a j.l.Integer. - LocationSummary* locations = new (invoke->GetBlock()->GetGraph()->GetAllocator()) LocationSummary( - invoke, LocationSummary::kCallOnMainOnly, kIntrinsified); - if (!invoke->InputAt(0)->IsConstant()) { - locations->SetInAt(0, Location::RequiresRegister()); - } - locations->AddTemp(first_argument_location); - locations->SetOut(return_location); +static int32_t GetIntegerCacheLowFromIntegerCache(Thread* self, ClassLinker* class_linker) + REQUIRES_SHARED(Locks::mutator_lock_) { + ObjPtr<mirror::Class> cache_class = + LookupInitializedClass(self, class_linker, kIntegerCacheDescriptor); + return GetIntegerCacheField(cache_class, kLowFieldName); } -IntrinsicVisitor::IntegerValueOfInfo IntrinsicVisitor::ComputeIntegerValueOfInfo() { +static uint32_t CalculateBootImageOffset(ObjPtr<mirror::Object> object) + REQUIRES_SHARED(Locks::mutator_lock_) { + gc::Heap* heap = Runtime::Current()->GetHeap(); + DCHECK(heap->ObjectIsInBootImageSpace(object)); + return reinterpret_cast<const uint8_t*>(object.Ptr()) - heap->GetBootImageSpaces()[0]->Begin(); +} + +inline IntrinsicVisitor::IntegerValueOfInfo::IntegerValueOfInfo() + : value_offset(0), + low(0), + length(0u), + integer_boot_image_offset(kInvalidReference), + value_boot_image_reference(kInvalidReference) {} + +IntrinsicVisitor::IntegerValueOfInfo IntrinsicVisitor::ComputeIntegerValueOfInfo( + HInvoke* invoke, const CompilerOptions& compiler_options) { // Note that we could cache all of the data looked up here. but there's no good // location for it. We don't want to add it to WellKnownClasses, to avoid creating global // jni values. Adding it as state to the compiler singleton seems like wrong // separation of concerns. // The need for this data should be pretty rare though. - // The most common case is that the classes are in the boot image and initialized, - // which is easy to generate code for. We bail if not. - Thread* self = Thread::Current(); - ScopedObjectAccess soa(self); + // Note that at this point we can no longer abort the code generation. Therefore, + // we need to provide data that shall not lead to a crash even if the fields were + // modified through reflection since ComputeIntegerValueOfLocations() when JITting. + Runtime* runtime = Runtime::Current(); ClassLinker* class_linker = runtime->GetClassLinker(); - gc::Heap* heap = runtime->GetHeap(); - IntegerValueOfInfo info; - info.integer_cache = class_linker->FindSystemClass(self, "Ljava/lang/Integer$IntegerCache;"); - if (info.integer_cache == nullptr) { - self->ClearException(); - return info; - } - if (!heap->ObjectIsInBootImageSpace(info.integer_cache) || !info.integer_cache->IsInitialized()) { - // Optimization only works if the class is initialized and in the boot image. - return info; - } - info.integer = class_linker->FindSystemClass(self, "Ljava/lang/Integer;"); - if (info.integer == nullptr) { - self->ClearException(); - return info; - } - if (!heap->ObjectIsInBootImageSpace(info.integer) || !info.integer->IsInitialized()) { - // Optimization only works if the class is initialized and in the boot image. - return info; - } - - ArtField* field = info.integer_cache->FindDeclaredStaticField("cache", "[Ljava/lang/Integer;"); - if (field == nullptr) { - return info; - } - info.cache = static_cast<mirror::ObjectArray<mirror::Object>*>( - field->GetObject(info.integer_cache).Ptr()); - if (info.cache == nullptr) { - return info; - } - - if (!heap->ObjectIsInBootImageSpace(info.cache)) { - // Optimization only works if the object is in the boot image. - return info; - } + Thread* self = Thread::Current(); + ScopedObjectAccess soa(self); - field = info.integer->FindDeclaredInstanceField("value", "I"); - if (field == nullptr) { - return info; + IntegerValueOfInfo info; + if (compiler_options.IsBootImage()) { + ObjPtr<mirror::Class> integer_class = + LookupInitializedClass(self, class_linker, kIntegerDescriptor); + ArtField* value_field = integer_class->FindDeclaredInstanceField(kValueFieldName, "I"); + DCHECK(value_field != nullptr); + info.value_offset = value_field->GetOffset().Uint32Value(); + ObjPtr<mirror::Class> cache_class = + LookupInitializedClass(self, class_linker, kIntegerCacheDescriptor); + info.low = GetIntegerCacheField(cache_class, kLowFieldName); + int32_t high = GetIntegerCacheField(cache_class, kHighFieldName); + info.length = dchecked_integral_cast<uint32_t>(high - info.low + 1); + + info.integer_boot_image_offset = IntegerValueOfInfo::kInvalidReference; + if (invoke->InputAt(0)->IsIntConstant()) { + int32_t input_value = invoke->InputAt(0)->AsIntConstant()->GetValue(); + uint32_t index = static_cast<uint32_t>(input_value) - static_cast<uint32_t>(info.low); + if (index < static_cast<uint32_t>(info.length)) { + info.value_boot_image_reference = IntrinsicObjects::EncodePatch( + IntrinsicObjects::PatchType::kIntegerValueOfObject, index); + } else { + // Not in the cache. + info.value_boot_image_reference = IntegerValueOfInfo::kInvalidReference; + } + } else { + info.array_data_boot_image_reference = + IntrinsicObjects::EncodePatch(IntrinsicObjects::PatchType::kIntegerValueOfArray); + } + } else { + ObjPtr<mirror::ObjectArray<mirror::Object>> boot_image_live_objects = GetBootImageLiveObjects(); + ObjPtr<mirror::Object> low_integer = + IntrinsicObjects::GetIntegerValueOfObject(boot_image_live_objects, 0u); + ObjPtr<mirror::Class> integer_class = low_integer->GetClass<kVerifyNone, kWithoutReadBarrier>(); + ArtField* value_field = integer_class->FindDeclaredInstanceField(kValueFieldName, "I"); + DCHECK(value_field != nullptr); + info.value_offset = value_field->GetOffset().Uint32Value(); + if (runtime->UseJitCompilation()) { + // Use the current `IntegerCache.low` for JIT to avoid truly surprising behavior if the + // code messes up the `value` field in the lowest cached Integer using reflection. + info.low = GetIntegerCacheLowFromIntegerCache(self, class_linker); + } else { + // For app AOT, the `low_integer->value` should be the same as `IntegerCache.low`. + info.low = value_field->GetInt(low_integer); + DCHECK_EQ(info.low, GetIntegerCacheLowFromIntegerCache(self, class_linker)); + } + // Do not look at `IntegerCache.high`, use the immutable length of the cache array instead. + info.length = dchecked_integral_cast<uint32_t>( + IntrinsicObjects::GetIntegerValueOfCache(boot_image_live_objects)->GetLength()); + + info.integer_boot_image_offset = CalculateBootImageOffset(integer_class); + if (invoke->InputAt(0)->IsIntConstant()) { + int32_t input_value = invoke->InputAt(0)->AsIntConstant()->GetValue(); + uint32_t index = static_cast<uint32_t>(input_value) - static_cast<uint32_t>(info.low); + if (index < static_cast<uint32_t>(info.length)) { + ObjPtr<mirror::Object> integer = + IntrinsicObjects::GetIntegerValueOfObject(boot_image_live_objects, index); + info.value_boot_image_reference = CalculateBootImageOffset(integer); + } else { + // Not in the cache. + info.value_boot_image_reference = IntegerValueOfInfo::kInvalidReference; + } + } else { + info.array_data_boot_image_reference = + CalculateBootImageOffset(boot_image_live_objects) + + IntrinsicObjects::GetIntegerValueOfArrayDataOffset(boot_image_live_objects).Uint32Value(); + } } - info.value_offset = field->GetOffset().Int32Value(); - field = info.integer_cache->FindDeclaredStaticField("low", "I"); - if (field == nullptr) { - return info; - } - info.low = field->GetInt(info.integer_cache); + return info; +} - field = info.integer_cache->FindDeclaredStaticField("high", "I"); - if (field == nullptr) { - return info; +void IntrinsicVisitor::AssertNonMovableStringClass() { + if (kIsDebugBuild) { + ScopedObjectAccess soa(Thread::Current()); + ObjPtr<mirror::Class> string_class = GetClassRoot<art::mirror::String>(); + CHECK(!art::Runtime::Current()->GetHeap()->IsMovableObject(string_class)); } - info.high = field->GetInt(info.integer_cache); - - DCHECK_EQ(info.cache->GetLength(), info.high - info.low + 1); - return info; } } // namespace art diff --git a/compiler/optimizing/intrinsics.h b/compiler/optimizing/intrinsics.h index 62991435c7..ab68cce304 100644 --- a/compiler/optimizing/intrinsics.h +++ b/compiler/optimizing/intrinsics.h @@ -24,7 +24,6 @@ namespace art { -class CompilerDriver; class DexFile; // Positive floating-point infinities. @@ -34,28 +33,6 @@ static constexpr uint64_t kPositiveInfinityDouble = UINT64_C(0x7ff0000000000000) static constexpr uint32_t kNanFloat = 0x7fc00000U; static constexpr uint64_t kNanDouble = 0x7ff8000000000000; -// Recognize intrinsics from HInvoke nodes. -class IntrinsicsRecognizer : public HOptimization { - public: - IntrinsicsRecognizer(HGraph* graph, - OptimizingCompilerStats* stats, - const char* name = kIntrinsicsRecognizerPassName) - : HOptimization(graph, name, stats) {} - - void Run() OVERRIDE; - - // Static helper that recognizes intrinsic call. Returns true on success. - // If it fails due to invoke type mismatch, wrong_invoke_type is set. - // Useful to recognize intrinsics on individual calls outside this full pass. - static bool Recognize(HInvoke* invoke, ArtMethod* method, /*out*/ bool* wrong_invoke_type) - REQUIRES_SHARED(Locks::mutator_lock_); - - static constexpr const char* kIntrinsicsRecognizerPassName = "intrinsics_recognition"; - - private: - DISALLOW_COPY_AND_ASSIGN(IntrinsicsRecognizer); -}; - class IntrinsicVisitor : public ValueObject { public: virtual ~IntrinsicVisitor() {} @@ -126,37 +103,47 @@ class IntrinsicVisitor : public ValueObject { Location return_location, Location first_argument_location); - // Temporary data structure for holding Integer.valueOf useful data. We only - // use it if the mirror::Class* are in the boot image, so it is fine to keep raw - // mirror::Class pointers in this structure. + // Temporary data structure for holding Integer.valueOf data for generating code. + // We only use it if the boot image contains the IntegerCache objects. struct IntegerValueOfInfo { - IntegerValueOfInfo() - : integer_cache(nullptr), - integer(nullptr), - cache(nullptr), - low(0), - high(0), - value_offset(0) {} - - // The java.lang.IntegerCache class. - mirror::Class* integer_cache; - // The java.lang.Integer class. - mirror::Class* integer; - // Value of java.lang.IntegerCache#cache. - mirror::ObjectArray<mirror::Object>* cache; - // Value of java.lang.IntegerCache#low. + static constexpr uint32_t kInvalidReference = static_cast<uint32_t>(-1); + + IntegerValueOfInfo(); + + // Offset of the Integer.value field for initializing a newly allocated instance. + uint32_t value_offset; + // The low value in the cache. int32_t low; - // Value of java.lang.IntegerCache#high. - int32_t high; - // The offset of java.lang.Integer.value. - int32_t value_offset; + // The length of the cache array. + uint32_t length; + + // Boot image offset of java.lang.Integer for allocating an instance. + uint32_t integer_boot_image_offset; // Set to kInvalidReference when compiling the boot image. + + // This union contains references to the boot image. For app AOT or JIT compilation, + // these are the boot image offsets of the target. For boot image compilation, the + // location shall be known only at link time, so we encode a symbolic reference using + // IntrinsicObjects::EncodePatch(). + union { + // The target value for a constant input in the cache range. If the constant input + // is out of range (use `low` and `length` to check), this value is bogus (set to + // kInvalidReference) and the code must allocate a new Integer. + uint32_t value_boot_image_reference; + + // The cache array data used for a non-constant input in the cache range. + // If the input is out of range, the code must allocate a new Integer. + uint32_t array_data_boot_image_reference; + }; }; - static IntegerValueOfInfo ComputeIntegerValueOfInfo(); + static IntegerValueOfInfo ComputeIntegerValueOfInfo( + HInvoke* invoke, const CompilerOptions& compiler_options); protected: IntrinsicVisitor() {} + static void AssertNonMovableStringClass(); + private: DISALLOW_COPY_AND_ASSIGN(IntrinsicVisitor); }; @@ -211,7 +198,6 @@ class StringEqualsOptimizations : public IntrinsicOptimizations { INTRINSIC_OPTIMIZATION(ArgumentNotNull, 0); INTRINSIC_OPTIMIZATION(ArgumentIsString, 1); - INTRINSIC_OPTIMIZATION(NoReadBarrierForStringClass, 2); private: DISALLOW_COPY_AND_ASSIGN(StringEqualsOptimizations); @@ -255,17 +241,33 @@ void IntrinsicCodeGenerator ## Arch::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNU // Defines a list of unreached intrinsics: that is, method calls that are recognized as // an intrinsic, and then always converted into HIR instructions before they reach any -// architecture-specific intrinsics code generator. +// architecture-specific intrinsics code generator. This only applies to non-baseline +// compilation. #define UNREACHABLE_INTRINSIC(Arch, Name) \ void IntrinsicLocationsBuilder ## Arch::Visit ## Name(HInvoke* invoke) { \ - LOG(FATAL) << "Unreachable: intrinsic " << invoke->GetIntrinsic() \ - << " should have been converted to HIR"; \ + if (Runtime::Current()->IsAotCompiler() && \ + !codegen_->GetCompilerOptions().IsBaseline()) { \ + LOG(FATAL) << "Unreachable: intrinsic " << invoke->GetIntrinsic() \ + << " should have been converted to HIR"; \ + } \ } \ void IntrinsicCodeGenerator ## Arch::Visit ## Name(HInvoke* invoke) { \ LOG(FATAL) << "Unreachable: intrinsic " << invoke->GetIntrinsic() \ << " should have been converted to HIR"; \ } #define UNREACHABLE_INTRINSICS(Arch) \ +UNREACHABLE_INTRINSIC(Arch, MathMinIntInt) \ +UNREACHABLE_INTRINSIC(Arch, MathMinLongLong) \ +UNREACHABLE_INTRINSIC(Arch, MathMinFloatFloat) \ +UNREACHABLE_INTRINSIC(Arch, MathMinDoubleDouble) \ +UNREACHABLE_INTRINSIC(Arch, MathMaxIntInt) \ +UNREACHABLE_INTRINSIC(Arch, MathMaxLongLong) \ +UNREACHABLE_INTRINSIC(Arch, MathMaxFloatFloat) \ +UNREACHABLE_INTRINSIC(Arch, MathMaxDoubleDouble) \ +UNREACHABLE_INTRINSIC(Arch, MathAbsInt) \ +UNREACHABLE_INTRINSIC(Arch, MathAbsLong) \ +UNREACHABLE_INTRINSIC(Arch, MathAbsFloat) \ +UNREACHABLE_INTRINSIC(Arch, MathAbsDouble) \ UNREACHABLE_INTRINSIC(Arch, FloatFloatToIntBits) \ UNREACHABLE_INTRINSIC(Arch, DoubleDoubleToLongBits) \ UNREACHABLE_INTRINSIC(Arch, FloatIsNaN) \ diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index 0e6485be9f..ec5d17a443 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -112,7 +112,7 @@ class IntrinsicSlowPathARM64 : public SlowPathCodeARM64 { explicit IntrinsicSlowPathARM64(HInvoke* invoke) : SlowPathCodeARM64(invoke), invoke_(invoke) { } - void EmitNativeCode(CodeGenerator* codegen_in) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen_in) override { CodeGeneratorARM64* codegen = down_cast<CodeGeneratorARM64*>(codegen_in); __ Bind(GetEntryLabel()); @@ -145,7 +145,7 @@ class IntrinsicSlowPathARM64 : public SlowPathCodeARM64 { __ B(GetExitLabel()); } - const char* GetDescription() const OVERRIDE { return "IntrinsicSlowPathARM64"; } + const char* GetDescription() const override { return "IntrinsicSlowPathARM64"; } private: // The instruction where this slow path is happening. @@ -163,7 +163,7 @@ class ReadBarrierSystemArrayCopySlowPathARM64 : public SlowPathCodeARM64 { DCHECK(kUseBakerReadBarrier); } - void EmitNativeCode(CodeGenerator* codegen_in) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen_in) override { CodeGeneratorARM64* codegen = down_cast<CodeGeneratorARM64*>(codegen_in); LocationSummary* locations = instruction_->GetLocations(); DCHECK(locations->CanCall()); @@ -216,7 +216,7 @@ class ReadBarrierSystemArrayCopySlowPathARM64 : public SlowPathCodeARM64 { __ B(GetExitLabel()); } - const char* GetDescription() const OVERRIDE { return "ReadBarrierSystemArrayCopySlowPathARM64"; } + const char* GetDescription() const override { return "ReadBarrierSystemArrayCopySlowPathARM64"; } private: Location tmp_; @@ -272,10 +272,10 @@ void IntrinsicLocationsBuilderARM64::VisitDoubleLongBitsToDouble(HInvoke* invoke } void IntrinsicCodeGeneratorARM64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) { - MoveFPToInt(invoke->GetLocations(), /* is64bit */ true, GetVIXLAssembler()); + MoveFPToInt(invoke->GetLocations(), /* is64bit= */ true, GetVIXLAssembler()); } void IntrinsicCodeGeneratorARM64::VisitDoubleLongBitsToDouble(HInvoke* invoke) { - MoveIntToFP(invoke->GetLocations(), /* is64bit */ true, GetVIXLAssembler()); + MoveIntToFP(invoke->GetLocations(), /* is64bit= */ true, GetVIXLAssembler()); } void IntrinsicLocationsBuilderARM64::VisitFloatFloatToRawIntBits(HInvoke* invoke) { @@ -286,10 +286,10 @@ void IntrinsicLocationsBuilderARM64::VisitFloatIntBitsToFloat(HInvoke* invoke) { } void IntrinsicCodeGeneratorARM64::VisitFloatFloatToRawIntBits(HInvoke* invoke) { - MoveFPToInt(invoke->GetLocations(), /* is64bit */ false, GetVIXLAssembler()); + MoveFPToInt(invoke->GetLocations(), /* is64bit= */ false, GetVIXLAssembler()); } void IntrinsicCodeGeneratorARM64::VisitFloatIntBitsToFloat(HInvoke* invoke) { - MoveIntToFP(invoke->GetLocations(), /* is64bit */ false, GetVIXLAssembler()); + MoveIntToFP(invoke->GetLocations(), /* is64bit= */ false, GetVIXLAssembler()); } static void CreateIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) { @@ -344,14 +344,6 @@ void IntrinsicCodeGeneratorARM64::VisitShortReverseBytes(HInvoke* invoke) { GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt16, GetVIXLAssembler()); } -static void CreateIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); -} - static void GenNumberOfLeadingZeros(LocationSummary* locations, DataType::Type type, MacroAssembler* masm) { @@ -536,168 +528,6 @@ static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) { locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); } -static void MathAbsFP(LocationSummary* locations, bool is64bit, MacroAssembler* masm) { - Location in = locations->InAt(0); - Location out = locations->Out(); - - FPRegister in_reg = is64bit ? DRegisterFrom(in) : SRegisterFrom(in); - FPRegister out_reg = is64bit ? DRegisterFrom(out) : SRegisterFrom(out); - - __ Fabs(out_reg, in_reg); -} - -void IntrinsicLocationsBuilderARM64::VisitMathAbsDouble(HInvoke* invoke) { - CreateFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARM64::VisitMathAbsDouble(HInvoke* invoke) { - MathAbsFP(invoke->GetLocations(), /* is64bit */ true, GetVIXLAssembler()); -} - -void IntrinsicLocationsBuilderARM64::VisitMathAbsFloat(HInvoke* invoke) { - CreateFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARM64::VisitMathAbsFloat(HInvoke* invoke) { - MathAbsFP(invoke->GetLocations(), /* is64bit */ false, GetVIXLAssembler()); -} - -static void GenAbsInteger(LocationSummary* locations, - bool is64bit, - MacroAssembler* masm) { - Location in = locations->InAt(0); - Location output = locations->Out(); - - Register in_reg = is64bit ? XRegisterFrom(in) : WRegisterFrom(in); - Register out_reg = is64bit ? XRegisterFrom(output) : WRegisterFrom(output); - - __ Cmp(in_reg, Operand(0)); - __ Cneg(out_reg, in_reg, lt); -} - -void IntrinsicLocationsBuilderARM64::VisitMathAbsInt(HInvoke* invoke) { - CreateIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARM64::VisitMathAbsInt(HInvoke* invoke) { - GenAbsInteger(invoke->GetLocations(), /* is64bit */ false, GetVIXLAssembler()); -} - -void IntrinsicLocationsBuilderARM64::VisitMathAbsLong(HInvoke* invoke) { - CreateIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARM64::VisitMathAbsLong(HInvoke* invoke) { - GenAbsInteger(invoke->GetLocations(), /* is64bit */ true, GetVIXLAssembler()); -} - -static void GenMinMaxFP(LocationSummary* locations, - bool is_min, - bool is_double, - MacroAssembler* masm) { - Location op1 = locations->InAt(0); - Location op2 = locations->InAt(1); - Location out = locations->Out(); - - FPRegister op1_reg = is_double ? DRegisterFrom(op1) : SRegisterFrom(op1); - FPRegister op2_reg = is_double ? DRegisterFrom(op2) : SRegisterFrom(op2); - FPRegister out_reg = is_double ? DRegisterFrom(out) : SRegisterFrom(out); - if (is_min) { - __ Fmin(out_reg, op1_reg, op2_reg); - } else { - __ Fmax(out_reg, op1_reg, op2_reg); - } -} - -static void CreateFPFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::RequiresFpuRegister()); - locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); -} - -void IntrinsicLocationsBuilderARM64::VisitMathMinDoubleDouble(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARM64::VisitMathMinDoubleDouble(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), /* is_min */ true, /* is_double */ true, GetVIXLAssembler()); -} - -void IntrinsicLocationsBuilderARM64::VisitMathMinFloatFloat(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARM64::VisitMathMinFloatFloat(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), /* is_min */ true, /* is_double */ false, GetVIXLAssembler()); -} - -void IntrinsicLocationsBuilderARM64::VisitMathMaxDoubleDouble(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARM64::VisitMathMaxDoubleDouble(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), /* is_min */ false, /* is_double */ true, GetVIXLAssembler()); -} - -void IntrinsicLocationsBuilderARM64::VisitMathMaxFloatFloat(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARM64::VisitMathMaxFloatFloat(HInvoke* invoke) { - GenMinMaxFP( - invoke->GetLocations(), /* is_min */ false, /* is_double */ false, GetVIXLAssembler()); -} - -static void GenMinMax(LocationSummary* locations, - bool is_min, - bool is_long, - MacroAssembler* masm) { - Location op1 = locations->InAt(0); - Location op2 = locations->InAt(1); - Location out = locations->Out(); - - Register op1_reg = is_long ? XRegisterFrom(op1) : WRegisterFrom(op1); - Register op2_reg = is_long ? XRegisterFrom(op2) : WRegisterFrom(op2); - Register out_reg = is_long ? XRegisterFrom(out) : WRegisterFrom(out); - - __ Cmp(op1_reg, op2_reg); - __ Csel(out_reg, op1_reg, op2_reg, is_min ? lt : gt); -} - -void IntrinsicLocationsBuilderARM64::VisitMathMinIntInt(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARM64::VisitMathMinIntInt(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ false, GetVIXLAssembler()); -} - -void IntrinsicLocationsBuilderARM64::VisitMathMinLongLong(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARM64::VisitMathMinLongLong(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ true, GetVIXLAssembler()); -} - -void IntrinsicLocationsBuilderARM64::VisitMathMaxIntInt(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARM64::VisitMathMaxIntInt(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ false, GetVIXLAssembler()); -} - -void IntrinsicLocationsBuilderARM64::VisitMathMaxLongLong(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARM64::VisitMathMaxLongLong(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ true, GetVIXLAssembler()); -} - void IntrinsicLocationsBuilderARM64::VisitMathSqrt(HInvoke* invoke) { CreateFPToFPLocations(allocator_, invoke); } @@ -788,7 +618,7 @@ void IntrinsicLocationsBuilderARM64::VisitMathRoundDouble(HInvoke* invoke) { } void IntrinsicCodeGeneratorARM64::VisitMathRoundDouble(HInvoke* invoke) { - GenMathRound(invoke, /* is_double */ true, GetVIXLAssembler()); + GenMathRound(invoke, /* is_double= */ true, GetVIXLAssembler()); } void IntrinsicLocationsBuilderARM64::VisitMathRoundFloat(HInvoke* invoke) { @@ -796,7 +626,7 @@ void IntrinsicLocationsBuilderARM64::VisitMathRoundFloat(HInvoke* invoke) { } void IntrinsicCodeGeneratorARM64::VisitMathRoundFloat(HInvoke* invoke) { - GenMathRound(invoke, /* is_double */ false, GetVIXLAssembler()); + GenMathRound(invoke, /* is_double= */ false, GetVIXLAssembler()); } void IntrinsicLocationsBuilderARM64::VisitMemoryPeekByte(HInvoke* invoke) { @@ -915,20 +745,20 @@ static void GenUnsafeGet(HInvoke* invoke, if (type == DataType::Type::kReference && kEmitCompilerReadBarrier && kUseBakerReadBarrier) { // UnsafeGetObject/UnsafeGetObjectVolatile with Baker's read barrier case. Register temp = WRegisterFrom(locations->GetTemp(0)); - codegen->GenerateReferenceLoadWithBakerReadBarrier(invoke, - trg_loc, - base, - /* offset */ 0u, - /* index */ offset_loc, - /* scale_factor */ 0u, - temp, - /* needs_null_check */ false, - is_volatile); + MacroAssembler* masm = codegen->GetVIXLAssembler(); + // Piggy-back on the field load path using introspection for the Baker read barrier. + __ Add(temp, base, offset.W()); // Offset should not exceed 32 bits. + codegen->GenerateFieldLoadWithBakerReadBarrier(invoke, + trg_loc, + base, + MemOperand(temp.X()), + /* needs_null_check= */ false, + is_volatile); } else { // Other cases. MemOperand mem_op(base.X(), offset); if (is_volatile) { - codegen->LoadAcquire(invoke, trg, mem_op, /* needs_null_check */ true); + codegen->LoadAcquire(invoke, trg, mem_op, /* needs_null_check= */ true); } else { codegen->Load(type, trg, mem_op); } @@ -952,9 +782,9 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* in kIntrinsified); if (can_call && kUseBakerReadBarrier) { locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. - // We need a temporary register for the read barrier marking slow - // path in CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier. - locations->AddTemp(Location::RequiresRegister()); + // We need a temporary register for the read barrier load in order to use + // CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(). + locations->AddTemp(FixedTempLocation()); } locations->SetInAt(0, Location::NoLocation()); // Unused receiver. locations->SetInAt(1, Location::RequiresRegister()); @@ -983,22 +813,22 @@ void IntrinsicLocationsBuilderARM64::VisitUnsafeGetObjectVolatile(HInvoke* invok } void IntrinsicCodeGeneratorARM64::VisitUnsafeGet(HInvoke* invoke) { - GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile */ false, codegen_); + GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile= */ false, codegen_); } void IntrinsicCodeGeneratorARM64::VisitUnsafeGetVolatile(HInvoke* invoke) { - GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile */ true, codegen_); + GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile= */ true, codegen_); } void IntrinsicCodeGeneratorARM64::VisitUnsafeGetLong(HInvoke* invoke) { - GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile */ false, codegen_); + GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile= */ false, codegen_); } void IntrinsicCodeGeneratorARM64::VisitUnsafeGetLongVolatile(HInvoke* invoke) { - GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile */ true, codegen_); + GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile= */ true, codegen_); } void IntrinsicCodeGeneratorARM64::VisitUnsafeGetObject(HInvoke* invoke) { - GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile */ false, codegen_); + GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile= */ false, codegen_); } void IntrinsicCodeGeneratorARM64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { - GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile */ true, codegen_); + GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile= */ true, codegen_); } static void CreateIntIntIntIntToVoid(ArenaAllocator* allocator, HInvoke* invoke) { @@ -1066,7 +896,7 @@ static void GenUnsafePut(HInvoke* invoke, } if (is_volatile || is_ordered) { - codegen->StoreRelease(invoke, type, source, mem_op, /* needs_null_check */ false); + codegen->StoreRelease(invoke, type, source, mem_op, /* needs_null_check= */ false); } else { codegen->Store(type, source, mem_op); } @@ -1081,64 +911,64 @@ static void GenUnsafePut(HInvoke* invoke, void IntrinsicCodeGeneratorARM64::VisitUnsafePut(HInvoke* invoke) { GenUnsafePut(invoke, DataType::Type::kInt32, - /* is_volatile */ false, - /* is_ordered */ false, + /* is_volatile= */ false, + /* is_ordered= */ false, codegen_); } void IntrinsicCodeGeneratorARM64::VisitUnsafePutOrdered(HInvoke* invoke) { GenUnsafePut(invoke, DataType::Type::kInt32, - /* is_volatile */ false, - /* is_ordered */ true, + /* is_volatile= */ false, + /* is_ordered= */ true, codegen_); } void IntrinsicCodeGeneratorARM64::VisitUnsafePutVolatile(HInvoke* invoke) { GenUnsafePut(invoke, DataType::Type::kInt32, - /* is_volatile */ true, - /* is_ordered */ false, + /* is_volatile= */ true, + /* is_ordered= */ false, codegen_); } void IntrinsicCodeGeneratorARM64::VisitUnsafePutObject(HInvoke* invoke) { GenUnsafePut(invoke, DataType::Type::kReference, - /* is_volatile */ false, - /* is_ordered */ false, + /* is_volatile= */ false, + /* is_ordered= */ false, codegen_); } void IntrinsicCodeGeneratorARM64::VisitUnsafePutObjectOrdered(HInvoke* invoke) { GenUnsafePut(invoke, DataType::Type::kReference, - /* is_volatile */ false, - /* is_ordered */ true, + /* is_volatile= */ false, + /* is_ordered= */ true, codegen_); } void IntrinsicCodeGeneratorARM64::VisitUnsafePutObjectVolatile(HInvoke* invoke) { GenUnsafePut(invoke, DataType::Type::kReference, - /* is_volatile */ true, - /* is_ordered */ false, + /* is_volatile= */ true, + /* is_ordered= */ false, codegen_); } void IntrinsicCodeGeneratorARM64::VisitUnsafePutLong(HInvoke* invoke) { GenUnsafePut(invoke, DataType::Type::kInt64, - /* is_volatile */ false, - /* is_ordered */ false, + /* is_volatile= */ false, + /* is_ordered= */ false, codegen_); } void IntrinsicCodeGeneratorARM64::VisitUnsafePutLongOrdered(HInvoke* invoke) { GenUnsafePut(invoke, DataType::Type::kInt64, - /* is_volatile */ false, - /* is_ordered */ true, + /* is_volatile= */ false, + /* is_ordered= */ true, codegen_); } void IntrinsicCodeGeneratorARM64::VisitUnsafePutLongVolatile(HInvoke* invoke) { GenUnsafePut(invoke, DataType::Type::kInt64, - /* is_volatile */ true, - /* is_ordered */ false, + /* is_volatile= */ true, + /* is_ordered= */ false, codegen_); } @@ -1154,106 +984,155 @@ static void CreateIntIntIntIntIntToInt(ArenaAllocator* allocator, ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall, kIntrinsified); + if (can_call) { + locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. + } locations->SetInAt(0, Location::NoLocation()); // Unused receiver. locations->SetInAt(1, Location::RequiresRegister()); locations->SetInAt(2, Location::RequiresRegister()); locations->SetInAt(3, Location::RequiresRegister()); locations->SetInAt(4, Location::RequiresRegister()); - // If heap poisoning is enabled, we don't want the unpoisoning - // operations to potentially clobber the output. Likewise when - // emitting a (Baker) read barrier, which may call. - Location::OutputOverlap overlaps = - ((kPoisonHeapReferences && type == DataType::Type::kReference) || can_call) - ? Location::kOutputOverlap - : Location::kNoOutputOverlap; - locations->SetOut(Location::RequiresRegister(), overlaps); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); if (type == DataType::Type::kReference && kEmitCompilerReadBarrier && kUseBakerReadBarrier) { - // Temporary register for (Baker) read barrier. + // We need two non-scratch temporary registers for (Baker) read barrier. + locations->AddTemp(Location::RequiresRegister()); locations->AddTemp(Location::RequiresRegister()); } } +class BakerReadBarrierCasSlowPathARM64 : public SlowPathCodeARM64 { + public: + explicit BakerReadBarrierCasSlowPathARM64(HInvoke* invoke) + : SlowPathCodeARM64(invoke) {} + + const char* GetDescription() const override { return "BakerReadBarrierCasSlowPathARM64"; } + + void EmitNativeCode(CodeGenerator* codegen) override { + CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); + Arm64Assembler* assembler = arm64_codegen->GetAssembler(); + MacroAssembler* masm = assembler->GetVIXLAssembler(); + __ Bind(GetEntryLabel()); + + // Get the locations. + LocationSummary* locations = instruction_->GetLocations(); + Register base = WRegisterFrom(locations->InAt(1)); // Object pointer. + Register offset = XRegisterFrom(locations->InAt(2)); // Long offset. + Register expected = WRegisterFrom(locations->InAt(3)); // Expected. + Register value = WRegisterFrom(locations->InAt(4)); // Value. + + Register old_value = WRegisterFrom(locations->GetTemp(0)); // The old value from main path. + Register marked = WRegisterFrom(locations->GetTemp(1)); // The marked old value. + + // Mark the `old_value` from the main path and compare with `expected`. This clobbers the + // `tmp_ptr` scratch register but we do not want to allocate another non-scratch temporary. + arm64_codegen->GenerateUnsafeCasOldValueMovWithBakerReadBarrier(marked, old_value); + __ Cmp(marked, expected); + __ B(GetExitLabel(), ne); // If taken, Z=false indicates failure. + + // The `old_value` we have read did not match `expected` (which is always a to-space reference) + // but after the read barrier in GenerateUnsafeCasOldValueMovWithBakerReadBarrier() the marked + // to-space value matched, so the `old_value` must be a from-space reference to the same + // object. Do the same CAS loop as the main path but check for both `expected` and the unmarked + // old value representing the to-space and from-space references for the same object. + + UseScratchRegisterScope temps(masm); + Register tmp_ptr = temps.AcquireX(); + Register tmp = temps.AcquireSameSizeAs(value); + + // Recalculate the `tmp_ptr` clobbered above. + __ Add(tmp_ptr, base.X(), Operand(offset)); + + // do { + // tmp_value = [tmp_ptr]; + // } while ((tmp_value == expected || tmp == old_value) && failure([tmp_ptr] <- r_new_value)); + // result = (tmp_value == expected || tmp == old_value); + + vixl::aarch64::Label loop_head; + __ Bind(&loop_head); + __ Ldaxr(tmp, MemOperand(tmp_ptr)); + assembler->MaybeUnpoisonHeapReference(tmp); + __ Cmp(tmp, expected); + __ Ccmp(tmp, old_value, ZFlag, ne); + __ B(GetExitLabel(), ne); // If taken, Z=false indicates failure. + assembler->MaybePoisonHeapReference(value); + __ Stlxr(tmp.W(), value, MemOperand(tmp_ptr)); + assembler->MaybeUnpoisonHeapReference(value); + __ Cbnz(tmp.W(), &loop_head); + + // Z=true from the above CMP+CCMP indicates success. + __ B(GetExitLabel()); + } +}; + static void GenCas(HInvoke* invoke, DataType::Type type, CodeGeneratorARM64* codegen) { - MacroAssembler* masm = codegen->GetVIXLAssembler(); + Arm64Assembler* assembler = codegen->GetAssembler(); + MacroAssembler* masm = assembler->GetVIXLAssembler(); LocationSummary* locations = invoke->GetLocations(); - Location out_loc = locations->Out(); - Register out = WRegisterFrom(out_loc); // Boolean result. - - Register base = WRegisterFrom(locations->InAt(1)); // Object pointer. - Location offset_loc = locations->InAt(2); - Register offset = XRegisterFrom(offset_loc); // Long offset. - Register expected = RegisterFrom(locations->InAt(3), type); // Expected. - Register value = RegisterFrom(locations->InAt(4), type); // Value. + Register out = WRegisterFrom(locations->Out()); // Boolean result. + Register base = WRegisterFrom(locations->InAt(1)); // Object pointer. + Register offset = XRegisterFrom(locations->InAt(2)); // Long offset. + Register expected = RegisterFrom(locations->InAt(3), type); // Expected. + Register value = RegisterFrom(locations->InAt(4), type); // Value. // This needs to be before the temp registers, as MarkGCCard also uses VIXL temps. if (type == DataType::Type::kReference) { // Mark card for object assuming new value is stored. bool value_can_be_null = true; // TODO: Worth finding out this information? codegen->MarkGCCard(base, value, value_can_be_null); - - // The only read barrier implementation supporting the - // UnsafeCASObject intrinsic is the Baker-style read barriers. - DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier); - - if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { - Register temp = WRegisterFrom(locations->GetTemp(0)); - // Need to make sure the reference stored in the field is a to-space - // one before attempting the CAS or the CAS could fail incorrectly. - codegen->UpdateReferenceFieldWithBakerReadBarrier( - invoke, - out_loc, // Unused, used only as a "temporary" within the read barrier. - base, - /* field_offset */ offset_loc, - temp, - /* needs_null_check */ false, - /* use_load_acquire */ false); - } } UseScratchRegisterScope temps(masm); Register tmp_ptr = temps.AcquireX(); // Pointer to actual memory. - Register tmp_value = temps.AcquireSameSizeAs(value); // Value in memory. + Register old_value; // Value in memory. - Register tmp_32 = tmp_value.W(); + vixl::aarch64::Label exit_loop_label; + vixl::aarch64::Label* exit_loop = &exit_loop_label; + vixl::aarch64::Label* failure = &exit_loop_label; - __ Add(tmp_ptr, base.X(), Operand(offset)); + if (kEmitCompilerReadBarrier && type == DataType::Type::kReference) { + // The only read barrier implementation supporting the + // UnsafeCASObject intrinsic is the Baker-style read barriers. + DCHECK(kUseBakerReadBarrier); - if (kPoisonHeapReferences && type == DataType::Type::kReference) { - codegen->GetAssembler()->PoisonHeapReference(expected); - if (value.Is(expected)) { - // Do not poison `value`, as it is the same register as - // `expected`, which has just been poisoned. - } else { - codegen->GetAssembler()->PoisonHeapReference(value); - } + BakerReadBarrierCasSlowPathARM64* slow_path = + new (codegen->GetScopedAllocator()) BakerReadBarrierCasSlowPathARM64(invoke); + codegen->AddSlowPath(slow_path); + exit_loop = slow_path->GetExitLabel(); + failure = slow_path->GetEntryLabel(); + // We need to store the `old_value` in a non-scratch register to make sure + // the Baker read barrier in the slow path does not clobber it. + old_value = WRegisterFrom(locations->GetTemp(0)); + } else { + old_value = temps.AcquireSameSizeAs(value); } + __ Add(tmp_ptr, base.X(), Operand(offset)); + // do { - // tmp_value = [tmp_ptr] - expected; - // } while (tmp_value == 0 && failure([tmp_ptr] <- r_new_value)); - // result = tmp_value != 0; + // tmp_value = [tmp_ptr]; + // } while (tmp_value == expected && failure([tmp_ptr] <- r_new_value)); + // result = tmp_value == expected; - vixl::aarch64::Label loop_head, exit_loop; + vixl::aarch64::Label loop_head; __ Bind(&loop_head); - __ Ldaxr(tmp_value, MemOperand(tmp_ptr)); - __ Cmp(tmp_value, expected); - __ B(&exit_loop, ne); - __ Stlxr(tmp_32, value, MemOperand(tmp_ptr)); - __ Cbnz(tmp_32, &loop_head); - __ Bind(&exit_loop); - __ Cset(out, eq); - - if (kPoisonHeapReferences && type == DataType::Type::kReference) { - codegen->GetAssembler()->UnpoisonHeapReference(expected); - if (value.Is(expected)) { - // Do not unpoison `value`, as it is the same register as - // `expected`, which has just been unpoisoned. - } else { - codegen->GetAssembler()->UnpoisonHeapReference(value); - } + __ Ldaxr(old_value, MemOperand(tmp_ptr)); + if (type == DataType::Type::kReference) { + assembler->MaybeUnpoisonHeapReference(old_value); + } + __ Cmp(old_value, expected); + __ B(failure, ne); + if (type == DataType::Type::kReference) { + assembler->MaybePoisonHeapReference(value); } + __ Stlxr(old_value.W(), value, MemOperand(tmp_ptr)); // Reuse `old_value` for STLXR result. + if (type == DataType::Type::kReference) { + assembler->MaybeUnpoisonHeapReference(value); + } + __ Cbnz(old_value.W(), &loop_head); + __ Bind(exit_loop); + __ Cset(out, eq); } void IntrinsicLocationsBuilderARM64::VisitUnsafeCASInt(HInvoke* invoke) { @@ -1519,13 +1398,6 @@ static const char* GetConstString(HInstruction* candidate, uint32_t* utf16_lengt } void IntrinsicLocationsBuilderARM64::VisitStringEquals(HInvoke* invoke) { - if (kEmitCompilerReadBarrier && - !StringEqualsOptimizations(invoke).GetArgumentIsString() && - !StringEqualsOptimizations(invoke).GetNoReadBarrierForStringClass()) { - // No support for this odd case (String class is moveable, not in the boot image). - return; - } - LocationSummary* locations = new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresRegister()); @@ -1592,8 +1464,16 @@ void IntrinsicCodeGeneratorARM64::VisitStringEquals(HInvoke* invoke) { // All string objects must have the same type since String cannot be subclassed. // Receiver must be a string object, so its class field is equal to all strings' class fields. // If the argument is a string object, its class field must be equal to receiver's class field. + // + // As the String class is expected to be non-movable, we can read the class + // field from String.equals' arguments without read barriers. + AssertNonMovableStringClass(); + // /* HeapReference<Class> */ temp = str->klass_ __ Ldr(temp, MemOperand(str.X(), class_offset)); + // /* HeapReference<Class> */ temp1 = arg->klass_ __ Ldr(temp1, MemOperand(arg.X(), class_offset)); + // Also, because we use the previously loaded class references only in the + // following comparison, we don't need to unpoison them. __ Cmp(temp, temp1); __ B(&return_false, ne); } @@ -1766,7 +1646,7 @@ void IntrinsicLocationsBuilderARM64::VisitStringIndexOf(HInvoke* invoke) { } void IntrinsicCodeGeneratorARM64::VisitStringIndexOf(HInvoke* invoke) { - GenerateVisitStringIndexOf(invoke, GetVIXLAssembler(), codegen_, /* start_at_zero */ true); + GenerateVisitStringIndexOf(invoke, GetVIXLAssembler(), codegen_, /* start_at_zero= */ true); } void IntrinsicLocationsBuilderARM64::VisitStringIndexOfAfter(HInvoke* invoke) { @@ -1782,7 +1662,7 @@ void IntrinsicLocationsBuilderARM64::VisitStringIndexOfAfter(HInvoke* invoke) { } void IntrinsicCodeGeneratorARM64::VisitStringIndexOfAfter(HInvoke* invoke) { - GenerateVisitStringIndexOf(invoke, GetVIXLAssembler(), codegen_, /* start_at_zero */ false); + GenerateVisitStringIndexOf(invoke, GetVIXLAssembler(), codegen_, /* start_at_zero= */ false); } void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromBytes(HInvoke* invoke) { @@ -2584,8 +2464,8 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) { src.W(), class_offset, temp3_loc, - /* needs_null_check */ false, - /* use_load_acquire */ false); + /* needs_null_check= */ false, + /* use_load_acquire= */ false); // Bail out if the source is not a non primitive array. // /* HeapReference<Class> */ temp1 = temp1->component_type_ codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke, @@ -2593,8 +2473,8 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) { temp1, component_offset, temp3_loc, - /* needs_null_check */ false, - /* use_load_acquire */ false); + /* needs_null_check= */ false, + /* use_load_acquire= */ false); __ Cbz(temp1, intrinsic_slow_path->GetEntryLabel()); // If heap poisoning is enabled, `temp1` has been unpoisoned // by the the previous call to GenerateFieldLoadWithBakerReadBarrier. @@ -2610,8 +2490,8 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) { dest.W(), class_offset, temp3_loc, - /* needs_null_check */ false, - /* use_load_acquire */ false); + /* needs_null_check= */ false, + /* use_load_acquire= */ false); if (!optimizations.GetDestinationIsNonPrimitiveArray()) { // Bail out if the destination is not a non primitive array. @@ -2627,8 +2507,8 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) { temp1, component_offset, temp3_loc, - /* needs_null_check */ false, - /* use_load_acquire */ false); + /* needs_null_check= */ false, + /* use_load_acquire= */ false); __ Cbz(temp2, intrinsic_slow_path->GetEntryLabel()); // If heap poisoning is enabled, `temp2` has been unpoisoned // by the the previous call to GenerateFieldLoadWithBakerReadBarrier. @@ -2646,8 +2526,8 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) { src.W(), class_offset, temp3_loc, - /* needs_null_check */ false, - /* use_load_acquire */ false); + /* needs_null_check= */ false, + /* use_load_acquire= */ false); // Note: if heap poisoning is on, we are comparing two unpoisoned references here. __ Cmp(temp1, temp2); @@ -2660,8 +2540,8 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) { temp1, component_offset, temp3_loc, - /* needs_null_check */ false, - /* use_load_acquire */ false); + /* needs_null_check= */ false, + /* use_load_acquire= */ false); // /* HeapReference<Class> */ temp1 = temp1->super_class_ // We do not need to emit a read barrier for the following // heap reference load, as `temp1` is only used in a @@ -2744,16 +2624,16 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) { src.W(), class_offset, temp3_loc, - /* needs_null_check */ false, - /* use_load_acquire */ false); + /* needs_null_check= */ false, + /* use_load_acquire= */ false); // /* HeapReference<Class> */ temp2 = temp1->component_type_ codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke, temp2_loc, temp1, component_offset, temp3_loc, - /* needs_null_check */ false, - /* use_load_acquire */ false); + /* needs_null_check= */ false, + /* use_load_acquire= */ false); __ Cbz(temp2, intrinsic_slow_path->GetEntryLabel()); // If heap poisoning is enabled, `temp2` has been unpoisoned // by the the previous call to GenerateFieldLoadWithBakerReadBarrier. @@ -2860,7 +2740,7 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) { codegen_->AddSlowPath(read_barrier_slow_path); // Given the numeric representation, it's enough to check the low bit of the rb_state. - static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0"); + static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0"); static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); __ Tbnz(tmp, LockWord::kReadBarrierStateShift, read_barrier_slow_path->GetEntryLabel()); @@ -2907,7 +2787,7 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) { } // We only need one card marking on the destination array. - codegen_->MarkGCCard(dest.W(), Register(), /* value_can_be_null */ false); + codegen_->MarkGCCard(dest.W(), Register(), /* value_can_be_null= */ false); __ Bind(intrinsic_slow_path->GetExitLabel()); } @@ -2940,7 +2820,7 @@ void IntrinsicLocationsBuilderARM64::VisitFloatIsInfinite(HInvoke* invoke) { } void IntrinsicCodeGeneratorARM64::VisitFloatIsInfinite(HInvoke* invoke) { - GenIsInfinite(invoke->GetLocations(), /* is64bit */ false, GetVIXLAssembler()); + GenIsInfinite(invoke->GetLocations(), /* is64bit= */ false, GetVIXLAssembler()); } void IntrinsicLocationsBuilderARM64::VisitDoubleIsInfinite(HInvoke* invoke) { @@ -2948,7 +2828,7 @@ void IntrinsicLocationsBuilderARM64::VisitDoubleIsInfinite(HInvoke* invoke) { } void IntrinsicCodeGeneratorARM64::VisitDoubleIsInfinite(HInvoke* invoke) { - GenIsInfinite(invoke->GetLocations(), /* is64bit */ true, GetVIXLAssembler()); + GenIsInfinite(invoke->GetLocations(), /* is64bit= */ true, GetVIXLAssembler()); } void IntrinsicLocationsBuilderARM64::VisitIntegerValueOf(HInvoke* invoke) { @@ -2961,33 +2841,27 @@ void IntrinsicLocationsBuilderARM64::VisitIntegerValueOf(HInvoke* invoke) { } void IntrinsicCodeGeneratorARM64::VisitIntegerValueOf(HInvoke* invoke) { - IntrinsicVisitor::IntegerValueOfInfo info = IntrinsicVisitor::ComputeIntegerValueOfInfo(); + IntrinsicVisitor::IntegerValueOfInfo info = + IntrinsicVisitor::ComputeIntegerValueOfInfo(invoke, codegen_->GetCompilerOptions()); LocationSummary* locations = invoke->GetLocations(); MacroAssembler* masm = GetVIXLAssembler(); Register out = RegisterFrom(locations->Out(), DataType::Type::kReference); UseScratchRegisterScope temps(masm); Register temp = temps.AcquireW(); - InvokeRuntimeCallingConvention calling_convention; - Register argument = calling_convention.GetRegisterAt(0); if (invoke->InputAt(0)->IsConstant()) { int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue(); - if (value >= info.low && value <= info.high) { + if (static_cast<uint32_t>(value - info.low) < info.length) { // Just embed the j.l.Integer in the code. - ScopedObjectAccess soa(Thread::Current()); - mirror::Object* boxed = info.cache->Get(value + (-info.low)); - DCHECK(boxed != nullptr && Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(boxed)); - uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(boxed)); - __ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(address)); + DCHECK_NE(info.value_boot_image_reference, IntegerValueOfInfo::kInvalidReference); + codegen_->LoadBootImageAddress(out, info.value_boot_image_reference); } else { + DCHECK(locations->CanCall()); // Allocate and initialize a new j.l.Integer. // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the // JIT object table. - uint32_t address = - dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer)); - __ Ldr(argument.W(), codegen_->DeduplicateBootImageAddressLiteral(address)); - codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc()); - CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); + codegen_->AllocateInstanceForIntrinsic(invoke->AsInvokeStaticOrDirect(), + info.integer_boot_image_offset); __ Mov(temp.W(), value); __ Str(temp.W(), HeapOperand(out.W(), info.value_offset)); // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation @@ -2995,16 +2869,15 @@ void IntrinsicCodeGeneratorARM64::VisitIntegerValueOf(HInvoke* invoke) { codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore); } } else { + DCHECK(locations->CanCall()); Register in = RegisterFrom(locations->InAt(0), DataType::Type::kInt32); // Check bounds of our cache. __ Add(out.W(), in.W(), -info.low); - __ Cmp(out.W(), info.high - info.low + 1); + __ Cmp(out.W(), info.length); vixl::aarch64::Label allocate, done; __ B(&allocate, hs); // If the value is within the bounds, load the j.l.Integer directly from the array. - uint32_t data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value(); - uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.cache)); - __ Ldr(temp.W(), codegen_->DeduplicateBootImageAddressLiteral(data_offset + address)); + codegen_->LoadBootImageAddress(temp, info.array_data_boot_image_reference); MemOperand source = HeapOperand( temp, out.X(), LSL, DataType::SizeShift(DataType::Type::kReference)); codegen_->Load(DataType::Type::kReference, out, source); @@ -3012,10 +2885,8 @@ void IntrinsicCodeGeneratorARM64::VisitIntegerValueOf(HInvoke* invoke) { __ B(&done); __ Bind(&allocate); // Otherwise allocate and initialize a new j.l.Integer. - address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer)); - __ Ldr(argument.W(), codegen_->DeduplicateBootImageAddressLiteral(address)); - codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc()); - CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); + codegen_->AllocateInstanceForIntrinsic(invoke->AsInvokeStaticOrDirect(), + info.integer_boot_image_offset); __ Str(in.W(), HeapOperand(out.W(), info.value_offset)); // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation // one. @@ -3053,6 +2924,251 @@ void IntrinsicLocationsBuilderARM64::VisitReachabilityFence(HInvoke* invoke) { void IntrinsicCodeGeneratorARM64::VisitReachabilityFence(HInvoke* invoke ATTRIBUTE_UNUSED) { } +void IntrinsicLocationsBuilderARM64::VisitCRC32Update(HInvoke* invoke) { + if (!codegen_->GetInstructionSetFeatures().HasCRC()) { + return; + } + + LocationSummary* locations = new (allocator_) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); +} + +// Lower the invoke of CRC32.update(int crc, int b). +void IntrinsicCodeGeneratorARM64::VisitCRC32Update(HInvoke* invoke) { + DCHECK(codegen_->GetInstructionSetFeatures().HasCRC()); + + MacroAssembler* masm = GetVIXLAssembler(); + + Register crc = InputRegisterAt(invoke, 0); + Register val = InputRegisterAt(invoke, 1); + Register out = OutputRegister(invoke); + + // The general algorithm of the CRC32 calculation is: + // crc = ~crc + // result = crc32_for_byte(crc, b) + // crc = ~result + // It is directly lowered to three instructions. + + UseScratchRegisterScope temps(masm); + Register tmp = temps.AcquireSameSizeAs(out); + + __ Mvn(tmp, crc); + __ Crc32b(tmp, tmp, val); + __ Mvn(out, tmp); +} + +// Generate code using CRC32 instructions which calculates +// a CRC32 value of a byte. +// +// Parameters: +// masm - VIXL macro assembler +// crc - a register holding an initial CRC value +// ptr - a register holding a memory address of bytes +// length - a register holding a number of bytes to process +// out - a register to put a result of calculation +static void GenerateCodeForCalculationCRC32ValueOfBytes(MacroAssembler* masm, + const Register& crc, + const Register& ptr, + const Register& length, + const Register& out) { + // The algorithm of CRC32 of bytes is: + // crc = ~crc + // process a few first bytes to make the array 8-byte aligned + // while array has 8 bytes do: + // crc = crc32_of_8bytes(crc, 8_bytes(array)) + // if array has 4 bytes: + // crc = crc32_of_4bytes(crc, 4_bytes(array)) + // if array has 2 bytes: + // crc = crc32_of_2bytes(crc, 2_bytes(array)) + // if array has a byte: + // crc = crc32_of_byte(crc, 1_byte(array)) + // crc = ~crc + + vixl::aarch64::Label loop, done; + vixl::aarch64::Label process_4bytes, process_2bytes, process_1byte; + vixl::aarch64::Label aligned2, aligned4, aligned8; + + // Use VIXL scratch registers as the VIXL macro assembler won't use them in + // instructions below. + UseScratchRegisterScope temps(masm); + Register len = temps.AcquireW(); + Register array_elem = temps.AcquireW(); + + __ Mvn(out, crc); + __ Mov(len, length); + + __ Tbz(ptr, 0, &aligned2); + __ Subs(len, len, 1); + __ B(&done, lo); + __ Ldrb(array_elem, MemOperand(ptr, 1, PostIndex)); + __ Crc32b(out, out, array_elem); + + __ Bind(&aligned2); + __ Tbz(ptr, 1, &aligned4); + __ Subs(len, len, 2); + __ B(&process_1byte, lo); + __ Ldrh(array_elem, MemOperand(ptr, 2, PostIndex)); + __ Crc32h(out, out, array_elem); + + __ Bind(&aligned4); + __ Tbz(ptr, 2, &aligned8); + __ Subs(len, len, 4); + __ B(&process_2bytes, lo); + __ Ldr(array_elem, MemOperand(ptr, 4, PostIndex)); + __ Crc32w(out, out, array_elem); + + __ Bind(&aligned8); + __ Subs(len, len, 8); + // If len < 8 go to process data by 4 bytes, 2 bytes and a byte. + __ B(&process_4bytes, lo); + + // The main loop processing data by 8 bytes. + __ Bind(&loop); + __ Ldr(array_elem.X(), MemOperand(ptr, 8, PostIndex)); + __ Subs(len, len, 8); + __ Crc32x(out, out, array_elem.X()); + // if len >= 8, process the next 8 bytes. + __ B(&loop, hs); + + // Process the data which is less than 8 bytes. + // The code generated below works with values of len + // which come in the range [-8, 0]. + // The first three bits are used to detect whether 4 bytes or 2 bytes or + // a byte can be processed. + // The checking order is from bit 2 to bit 0: + // bit 2 is set: at least 4 bytes available + // bit 1 is set: at least 2 bytes available + // bit 0 is set: at least a byte available + __ Bind(&process_4bytes); + // Goto process_2bytes if less than four bytes available + __ Tbz(len, 2, &process_2bytes); + __ Ldr(array_elem, MemOperand(ptr, 4, PostIndex)); + __ Crc32w(out, out, array_elem); + + __ Bind(&process_2bytes); + // Goto process_1bytes if less than two bytes available + __ Tbz(len, 1, &process_1byte); + __ Ldrh(array_elem, MemOperand(ptr, 2, PostIndex)); + __ Crc32h(out, out, array_elem); + + __ Bind(&process_1byte); + // Goto done if no bytes available + __ Tbz(len, 0, &done); + __ Ldrb(array_elem, MemOperand(ptr)); + __ Crc32b(out, out, array_elem); + + __ Bind(&done); + __ Mvn(out, out); +} + +// The threshold for sizes of arrays to use the library provided implementation +// of CRC32.updateBytes instead of the intrinsic. +static constexpr int32_t kCRC32UpdateBytesThreshold = 64 * 1024; + +void IntrinsicLocationsBuilderARM64::VisitCRC32UpdateBytes(HInvoke* invoke) { + if (!codegen_->GetInstructionSetFeatures().HasCRC()) { + return; + } + + LocationSummary* locations = + new (allocator_) LocationSummary(invoke, + LocationSummary::kCallOnSlowPath, + kIntrinsified); + + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetInAt(2, Location::RegisterOrConstant(invoke->InputAt(2))); + locations->SetInAt(3, Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister()); +} + +// Lower the invoke of CRC32.updateBytes(int crc, byte[] b, int off, int len) +// +// Note: The intrinsic is not used if len exceeds a threshold. +void IntrinsicCodeGeneratorARM64::VisitCRC32UpdateBytes(HInvoke* invoke) { + DCHECK(codegen_->GetInstructionSetFeatures().HasCRC()); + + MacroAssembler* masm = GetVIXLAssembler(); + LocationSummary* locations = invoke->GetLocations(); + + SlowPathCodeARM64* slow_path = + new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke); + codegen_->AddSlowPath(slow_path); + + Register length = WRegisterFrom(locations->InAt(3)); + __ Cmp(length, kCRC32UpdateBytesThreshold); + __ B(slow_path->GetEntryLabel(), hi); + + const uint32_t array_data_offset = + mirror::Array::DataOffset(Primitive::kPrimByte).Uint32Value(); + Register ptr = XRegisterFrom(locations->GetTemp(0)); + Register array = XRegisterFrom(locations->InAt(1)); + Location offset = locations->InAt(2); + if (offset.IsConstant()) { + int32_t offset_value = offset.GetConstant()->AsIntConstant()->GetValue(); + __ Add(ptr, array, array_data_offset + offset_value); + } else { + __ Add(ptr, array, array_data_offset); + __ Add(ptr, ptr, XRegisterFrom(offset)); + } + + Register crc = WRegisterFrom(locations->InAt(0)); + Register out = WRegisterFrom(locations->Out()); + + GenerateCodeForCalculationCRC32ValueOfBytes(masm, crc, ptr, length, out); + + __ Bind(slow_path->GetExitLabel()); +} + +void IntrinsicLocationsBuilderARM64::VisitCRC32UpdateByteBuffer(HInvoke* invoke) { + if (!codegen_->GetInstructionSetFeatures().HasCRC()) { + return; + } + + LocationSummary* locations = + new (allocator_) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetInAt(2, Location::RequiresRegister()); + locations->SetInAt(3, Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister()); +} + +// Lower the invoke of CRC32.updateByteBuffer(int crc, long addr, int off, int len) +// +// There is no need to generate code checking if addr is 0. +// The method updateByteBuffer is a private method of java.util.zip.CRC32. +// This guarantees no calls outside of the CRC32 class. +// An address of DirectBuffer is always passed to the call of updateByteBuffer. +// It might be an implementation of an empty DirectBuffer which can use a zero +// address but it must have the length to be zero. The current generated code +// correctly works with the zero length. +void IntrinsicCodeGeneratorARM64::VisitCRC32UpdateByteBuffer(HInvoke* invoke) { + DCHECK(codegen_->GetInstructionSetFeatures().HasCRC()); + + MacroAssembler* masm = GetVIXLAssembler(); + LocationSummary* locations = invoke->GetLocations(); + + Register addr = XRegisterFrom(locations->InAt(1)); + Register ptr = XRegisterFrom(locations->GetTemp(0)); + __ Add(ptr, addr, XRegisterFrom(locations->InAt(2))); + + Register crc = WRegisterFrom(locations->InAt(0)); + Register length = WRegisterFrom(locations->InAt(3)); + Register out = WRegisterFrom(locations->Out()); + GenerateCodeForCalculationCRC32ValueOfBytes(masm, crc, ptr, length, out); +} + UNIMPLEMENTED_INTRINSIC(ARM64, ReferenceGetReferent) UNIMPLEMENTED_INTRINSIC(ARM64, StringStringIndexOf); diff --git a/compiler/optimizing/intrinsics_arm64.h b/compiler/optimizing/intrinsics_arm64.h index 033a644f34..9c46efddec 100644 --- a/compiler/optimizing/intrinsics_arm64.h +++ b/compiler/optimizing/intrinsics_arm64.h @@ -37,7 +37,7 @@ namespace arm64 { class CodeGeneratorARM64; -class IntrinsicLocationsBuilderARM64 FINAL : public IntrinsicVisitor { +class IntrinsicLocationsBuilderARM64 final : public IntrinsicVisitor { public: explicit IntrinsicLocationsBuilderARM64(ArenaAllocator* allocator, CodeGeneratorARM64* codegen) : allocator_(allocator), codegen_(codegen) {} @@ -45,7 +45,7 @@ class IntrinsicLocationsBuilderARM64 FINAL : public IntrinsicVisitor { // Define visitor methods. #define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ - void Visit ## Name(HInvoke* invoke) OVERRIDE; + void Visit ## Name(HInvoke* invoke) override; #include "intrinsics_list.h" INTRINSICS_LIST(OPTIMIZING_INTRINSICS) #undef INTRINSICS_LIST @@ -63,14 +63,14 @@ class IntrinsicLocationsBuilderARM64 FINAL : public IntrinsicVisitor { DISALLOW_COPY_AND_ASSIGN(IntrinsicLocationsBuilderARM64); }; -class IntrinsicCodeGeneratorARM64 FINAL : public IntrinsicVisitor { +class IntrinsicCodeGeneratorARM64 final : public IntrinsicVisitor { public: explicit IntrinsicCodeGeneratorARM64(CodeGeneratorARM64* codegen) : codegen_(codegen) {} // Define visitor methods. #define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ - void Visit ## Name(HInvoke* invoke) OVERRIDE; + void Visit ## Name(HInvoke* invoke) override; #include "intrinsics_list.h" INTRINSICS_LIST(OPTIMIZING_INTRINSICS) #undef INTRINSICS_LIST diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc index 97a145664c..f0aa92e981 100644 --- a/compiler/optimizing/intrinsics_arm_vixl.cc +++ b/compiler/optimizing/intrinsics_arm_vixl.cc @@ -25,7 +25,7 @@ #include "mirror/array-inl.h" #include "mirror/object_array-inl.h" #include "mirror/reference.h" -#include "mirror/string.h" +#include "mirror/string-inl.h" #include "scoped_thread_state_change-inl.h" #include "thread-current-inl.h" @@ -41,19 +41,15 @@ using helpers::HighRegisterFrom; using helpers::InputDRegisterAt; using helpers::InputRegisterAt; using helpers::InputSRegisterAt; -using helpers::InputVRegisterAt; using helpers::Int32ConstantFrom; using helpers::LocationFrom; using helpers::LowRegisterFrom; using helpers::LowSRegisterFrom; using helpers::HighSRegisterFrom; using helpers::OutputDRegister; -using helpers::OutputSRegister; using helpers::OutputRegister; -using helpers::OutputVRegister; using helpers::RegisterFrom; using helpers::SRegisterFrom; -using helpers::DRegisterFromS; using namespace vixl::aarch32; // NOLINT(build/namespaces) @@ -89,7 +85,7 @@ class IntrinsicSlowPathARMVIXL : public SlowPathCodeARMVIXL { return calling_convention_visitor.GetMethodLocation(); } - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { ArmVIXLAssembler* assembler = down_cast<ArmVIXLAssembler*>(codegen->GetAssembler()); __ Bind(GetEntryLabel()); @@ -115,7 +111,7 @@ class IntrinsicSlowPathARMVIXL : public SlowPathCodeARMVIXL { __ B(GetExitLabel()); } - const char* GetDescription() const OVERRIDE { return "IntrinsicSlowPath"; } + const char* GetDescription() const override { return "IntrinsicSlowPath"; } private: // The instruction where this slow path is happening. @@ -177,7 +173,7 @@ class ReadBarrierSystemArrayCopySlowPathARMVIXL : public SlowPathCodeARMVIXL { DCHECK(kUseBakerReadBarrier); } - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen); ArmVIXLAssembler* assembler = arm_codegen->GetAssembler(); LocationSummary* locations = instruction_->GetLocations(); @@ -233,11 +229,11 @@ class ReadBarrierSystemArrayCopySlowPathARMVIXL : public SlowPathCodeARMVIXL { assembler->MaybePoisonHeapReference(tmp); __ Str(tmp, MemOperand(dst_curr_addr, element_size, PostIndex)); __ Cmp(src_curr_addr, src_stop_addr); - __ B(ne, &loop, /* far_target */ false); + __ B(ne, &loop, /* is_far_target= */ false); __ B(GetExitLabel()); } - const char* GetDescription() const OVERRIDE { + const char* GetDescription() const override { return "ReadBarrierSystemArrayCopySlowPathARMVIXL"; } @@ -302,10 +298,10 @@ void IntrinsicLocationsBuilderARMVIXL::VisitDoubleLongBitsToDouble(HInvoke* invo } void IntrinsicCodeGeneratorARMVIXL::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) { - MoveFPToInt(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); + MoveFPToInt(invoke->GetLocations(), /* is64bit= */ true, GetAssembler()); } void IntrinsicCodeGeneratorARMVIXL::VisitDoubleLongBitsToDouble(HInvoke* invoke) { - MoveIntToFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); + MoveIntToFP(invoke->GetLocations(), /* is64bit= */ true, GetAssembler()); } void IntrinsicLocationsBuilderARMVIXL::VisitFloatFloatToRawIntBits(HInvoke* invoke) { @@ -316,10 +312,10 @@ void IntrinsicLocationsBuilderARMVIXL::VisitFloatIntBitsToFloat(HInvoke* invoke) } void IntrinsicCodeGeneratorARMVIXL::VisitFloatFloatToRawIntBits(HInvoke* invoke) { - MoveFPToInt(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); + MoveFPToInt(invoke->GetLocations(), /* is64bit= */ false, GetAssembler()); } void IntrinsicCodeGeneratorARMVIXL::VisitFloatIntBitsToFloat(HInvoke* invoke) { - MoveIntToFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); + MoveIntToFP(invoke->GetLocations(), /* is64bit= */ false, GetAssembler()); } static void CreateIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) { @@ -359,7 +355,7 @@ static void GenNumberOfLeadingZeros(HInvoke* invoke, vixl32::Label end; vixl32::Label* final_label = codegen->GetFinalLabel(invoke, &end); __ Clz(out, in_reg_hi); - __ CompareAndBranchIfNonZero(in_reg_hi, final_label, /* far_target */ false); + __ CompareAndBranchIfNonZero(in_reg_hi, final_label, /* is_far_target= */ false); __ Clz(out, in_reg_lo); __ Add(out, out, 32); if (end.IsReferenced()) { @@ -402,7 +398,7 @@ static void GenNumberOfTrailingZeros(HInvoke* invoke, vixl32::Label* final_label = codegen->GetFinalLabel(invoke, &end); __ Rbit(out, in_reg_lo); __ Clz(out, out); - __ CompareAndBranchIfNonZero(in_reg_lo, final_label, /* far_target */ false); + __ CompareAndBranchIfNonZero(in_reg_lo, final_label, /* is_far_target= */ false); __ Rbit(out, in_reg_hi); __ Clz(out, out); __ Add(out, out, 32); @@ -432,341 +428,6 @@ void IntrinsicCodeGeneratorARMVIXL::VisitLongNumberOfTrailingZeros(HInvoke* invo GenNumberOfTrailingZeros(invoke, DataType::Type::kInt64, codegen_); } -static void MathAbsFP(HInvoke* invoke, ArmVIXLAssembler* assembler) { - __ Vabs(OutputVRegister(invoke), InputVRegisterAt(invoke, 0)); -} - -void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsDouble(HInvoke* invoke) { - CreateFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsDouble(HInvoke* invoke) { - MathAbsFP(invoke, GetAssembler()); -} - -void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsFloat(HInvoke* invoke) { - CreateFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsFloat(HInvoke* invoke) { - MathAbsFP(invoke, GetAssembler()); -} - -static void CreateIntToIntPlusTemp(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); - - locations->AddTemp(Location::RequiresRegister()); -} - -static void GenAbsInteger(LocationSummary* locations, - bool is64bit, - ArmVIXLAssembler* assembler) { - Location in = locations->InAt(0); - Location output = locations->Out(); - - vixl32::Register mask = RegisterFrom(locations->GetTemp(0)); - - if (is64bit) { - vixl32::Register in_reg_lo = LowRegisterFrom(in); - vixl32::Register in_reg_hi = HighRegisterFrom(in); - vixl32::Register out_reg_lo = LowRegisterFrom(output); - vixl32::Register out_reg_hi = HighRegisterFrom(output); - - DCHECK(!out_reg_lo.Is(in_reg_hi)) << "Diagonal overlap unexpected."; - - __ Asr(mask, in_reg_hi, 31); - __ Adds(out_reg_lo, in_reg_lo, mask); - __ Adc(out_reg_hi, in_reg_hi, mask); - __ Eor(out_reg_lo, mask, out_reg_lo); - __ Eor(out_reg_hi, mask, out_reg_hi); - } else { - vixl32::Register in_reg = RegisterFrom(in); - vixl32::Register out_reg = RegisterFrom(output); - - __ Asr(mask, in_reg, 31); - __ Add(out_reg, in_reg, mask); - __ Eor(out_reg, mask, out_reg); - } -} - -void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsInt(HInvoke* invoke) { - CreateIntToIntPlusTemp(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsInt(HInvoke* invoke) { - GenAbsInteger(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); -} - - -void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsLong(HInvoke* invoke) { - CreateIntToIntPlusTemp(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsLong(HInvoke* invoke) { - GenAbsInteger(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); -} - -static void GenMinMaxFloat(HInvoke* invoke, bool is_min, CodeGeneratorARMVIXL* codegen) { - ArmVIXLAssembler* assembler = codegen->GetAssembler(); - Location op1_loc = invoke->GetLocations()->InAt(0); - Location op2_loc = invoke->GetLocations()->InAt(1); - Location out_loc = invoke->GetLocations()->Out(); - - // Optimization: don't generate any code if inputs are the same. - if (op1_loc.Equals(op2_loc)) { - DCHECK(out_loc.Equals(op1_loc)); // out_loc is set as SameAsFirstInput() in location builder. - return; - } - - vixl32::SRegister op1 = SRegisterFrom(op1_loc); - vixl32::SRegister op2 = SRegisterFrom(op2_loc); - vixl32::SRegister out = OutputSRegister(invoke); - UseScratchRegisterScope temps(assembler->GetVIXLAssembler()); - const vixl32::Register temp1 = temps.Acquire(); - vixl32::Register temp2 = RegisterFrom(invoke->GetLocations()->GetTemp(0)); - vixl32::Label nan, done; - vixl32::Label* final_label = codegen->GetFinalLabel(invoke, &done); - - DCHECK(op1.Is(out)); - - __ Vcmp(op1, op2); - __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR); - __ B(vs, &nan, /* far_target */ false); // if un-ordered, go to NaN handling. - - // op1 <> op2 - vixl32::ConditionType cond = is_min ? gt : lt; - { - ExactAssemblyScope it_scope(assembler->GetVIXLAssembler(), - 2 * kMaxInstructionSizeInBytes, - CodeBufferCheckScope::kMaximumSize); - __ it(cond); - __ vmov(cond, F32, out, op2); - } - // for <>(not equal), we've done min/max calculation. - __ B(ne, final_label, /* far_target */ false); - - // handle op1 == op2, max(+0.0,-0.0), min(+0.0,-0.0). - __ Vmov(temp1, op1); - __ Vmov(temp2, op2); - if (is_min) { - __ Orr(temp1, temp1, temp2); - } else { - __ And(temp1, temp1, temp2); - } - __ Vmov(out, temp1); - __ B(final_label); - - // handle NaN input. - __ Bind(&nan); - __ Movt(temp1, High16Bits(kNanFloat)); // 0x7FC0xxxx is a NaN. - __ Vmov(out, temp1); - - if (done.IsReferenced()) { - __ Bind(&done); - } -} - -static void CreateFPFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::RequiresFpuRegister()); - locations->SetOut(Location::SameAsFirstInput()); -} - -void IntrinsicLocationsBuilderARMVIXL::VisitMathMinFloatFloat(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); - invoke->GetLocations()->AddTemp(Location::RequiresRegister()); -} - -void IntrinsicCodeGeneratorARMVIXL::VisitMathMinFloatFloat(HInvoke* invoke) { - GenMinMaxFloat(invoke, /* is_min */ true, codegen_); -} - -void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxFloatFloat(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); - invoke->GetLocations()->AddTemp(Location::RequiresRegister()); -} - -void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxFloatFloat(HInvoke* invoke) { - GenMinMaxFloat(invoke, /* is_min */ false, codegen_); -} - -static void GenMinMaxDouble(HInvoke* invoke, bool is_min, CodeGeneratorARMVIXL* codegen) { - ArmVIXLAssembler* assembler = codegen->GetAssembler(); - Location op1_loc = invoke->GetLocations()->InAt(0); - Location op2_loc = invoke->GetLocations()->InAt(1); - Location out_loc = invoke->GetLocations()->Out(); - - // Optimization: don't generate any code if inputs are the same. - if (op1_loc.Equals(op2_loc)) { - DCHECK(out_loc.Equals(op1_loc)); // out_loc is set as SameAsFirstInput() in. - return; - } - - vixl32::DRegister op1 = DRegisterFrom(op1_loc); - vixl32::DRegister op2 = DRegisterFrom(op2_loc); - vixl32::DRegister out = OutputDRegister(invoke); - vixl32::Label handle_nan_eq, done; - vixl32::Label* final_label = codegen->GetFinalLabel(invoke, &done); - - DCHECK(op1.Is(out)); - - __ Vcmp(op1, op2); - __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR); - __ B(vs, &handle_nan_eq, /* far_target */ false); // if un-ordered, go to NaN handling. - - // op1 <> op2 - vixl32::ConditionType cond = is_min ? gt : lt; - { - ExactAssemblyScope it_scope(assembler->GetVIXLAssembler(), - 2 * kMaxInstructionSizeInBytes, - CodeBufferCheckScope::kMaximumSize); - __ it(cond); - __ vmov(cond, F64, out, op2); - } - // for <>(not equal), we've done min/max calculation. - __ B(ne, final_label, /* far_target */ false); - - // handle op1 == op2, max(+0.0,-0.0). - if (!is_min) { - __ Vand(F64, out, op1, op2); - __ B(final_label); - } - - // handle op1 == op2, min(+0.0,-0.0), NaN input. - __ Bind(&handle_nan_eq); - __ Vorr(F64, out, op1, op2); // assemble op1/-0.0/NaN. - - if (done.IsReferenced()) { - __ Bind(&done); - } -} - -void IntrinsicLocationsBuilderARMVIXL::VisitMathMinDoubleDouble(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARMVIXL::VisitMathMinDoubleDouble(HInvoke* invoke) { - GenMinMaxDouble(invoke, /* is_min */ true , codegen_); -} - -void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxDoubleDouble(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxDoubleDouble(HInvoke* invoke) { - GenMinMaxDouble(invoke, /* is_min */ false, codegen_); -} - -static void GenMinMaxLong(HInvoke* invoke, bool is_min, ArmVIXLAssembler* assembler) { - Location op1_loc = invoke->GetLocations()->InAt(0); - Location op2_loc = invoke->GetLocations()->InAt(1); - Location out_loc = invoke->GetLocations()->Out(); - - // Optimization: don't generate any code if inputs are the same. - if (op1_loc.Equals(op2_loc)) { - DCHECK(out_loc.Equals(op1_loc)); // out_loc is set as SameAsFirstInput() in location builder. - return; - } - - vixl32::Register op1_lo = LowRegisterFrom(op1_loc); - vixl32::Register op1_hi = HighRegisterFrom(op1_loc); - vixl32::Register op2_lo = LowRegisterFrom(op2_loc); - vixl32::Register op2_hi = HighRegisterFrom(op2_loc); - vixl32::Register out_lo = LowRegisterFrom(out_loc); - vixl32::Register out_hi = HighRegisterFrom(out_loc); - UseScratchRegisterScope temps(assembler->GetVIXLAssembler()); - const vixl32::Register temp = temps.Acquire(); - - DCHECK(op1_lo.Is(out_lo)); - DCHECK(op1_hi.Is(out_hi)); - - // Compare op1 >= op2, or op1 < op2. - __ Cmp(out_lo, op2_lo); - __ Sbcs(temp, out_hi, op2_hi); - - // Now GE/LT condition code is correct for the long comparison. - { - vixl32::ConditionType cond = is_min ? ge : lt; - ExactAssemblyScope it_scope(assembler->GetVIXLAssembler(), - 3 * kMaxInstructionSizeInBytes, - CodeBufferCheckScope::kMaximumSize); - __ itt(cond); - __ mov(cond, out_lo, op2_lo); - __ mov(cond, out_hi, op2_hi); - } -} - -static void CreateLongLongToLongLocations(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - locations->SetOut(Location::SameAsFirstInput()); -} - -void IntrinsicLocationsBuilderARMVIXL::VisitMathMinLongLong(HInvoke* invoke) { - CreateLongLongToLongLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARMVIXL::VisitMathMinLongLong(HInvoke* invoke) { - GenMinMaxLong(invoke, /* is_min */ true, GetAssembler()); -} - -void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxLongLong(HInvoke* invoke) { - CreateLongLongToLongLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxLongLong(HInvoke* invoke) { - GenMinMaxLong(invoke, /* is_min */ false, GetAssembler()); -} - -static void GenMinMax(HInvoke* invoke, bool is_min, ArmVIXLAssembler* assembler) { - vixl32::Register op1 = InputRegisterAt(invoke, 0); - vixl32::Register op2 = InputRegisterAt(invoke, 1); - vixl32::Register out = OutputRegister(invoke); - - __ Cmp(op1, op2); - - { - ExactAssemblyScope aas(assembler->GetVIXLAssembler(), - 3 * kMaxInstructionSizeInBytes, - CodeBufferCheckScope::kMaximumSize); - - __ ite(is_min ? lt : gt); - __ mov(is_min ? lt : gt, out, op1); - __ mov(is_min ? ge : le, out, op2); - } -} - -static void CreateIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); -} - -void IntrinsicLocationsBuilderARMVIXL::VisitMathMinIntInt(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARMVIXL::VisitMathMinIntInt(HInvoke* invoke) { - GenMinMax(invoke, /* is_min */ true, GetAssembler()); -} - -void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxIntInt(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxIntInt(HInvoke* invoke) { - GenMinMax(invoke, /* is_min */ false, GetAssembler()); -} - void IntrinsicLocationsBuilderARMVIXL::VisitMathSqrt(HInvoke* invoke) { CreateFPToFPLocations(allocator_, invoke); } @@ -785,7 +446,7 @@ void IntrinsicLocationsBuilderARMVIXL::VisitMathRint(HInvoke* invoke) { void IntrinsicCodeGeneratorARMVIXL::VisitMathRint(HInvoke* invoke) { DCHECK(codegen_->GetInstructionSetFeatures().HasARMv8AInstructions()); ArmVIXLAssembler* assembler = GetAssembler(); - __ Vrintn(F64, F64, OutputDRegister(invoke), InputDRegisterAt(invoke, 0)); + __ Vrintn(F64, OutputDRegister(invoke), InputDRegisterAt(invoke, 0)); } void IntrinsicLocationsBuilderARMVIXL::VisitMathRoundFloat(HInvoke* invoke) { @@ -815,12 +476,12 @@ void IntrinsicCodeGeneratorARMVIXL::VisitMathRoundFloat(HInvoke* invoke) { // For positive, zero or NaN inputs, rounding is done. __ Cmp(out_reg, 0); - __ B(ge, final_label, /* far_target */ false); + __ B(ge, final_label, /* is_far_target= */ false); // Handle input < 0 cases. // If input is negative but not a tie, previous result (round to nearest) is valid. // If input is a negative tie, change rounding direction to positive infinity, out_reg += 1. - __ Vrinta(F32, F32, temp1, in_reg); + __ Vrinta(F32, temp1, in_reg); __ Vmov(temp2, 0.5); __ Vsub(F32, temp1, in_reg, temp1); __ Vcmp(F32, temp1, temp2); @@ -977,8 +638,11 @@ static void GenUnsafeGet(HInvoke* invoke, if (kEmitCompilerReadBarrier) { if (kUseBakerReadBarrier) { Location temp = locations->GetTemp(0); - codegen->GenerateReferenceLoadWithBakerReadBarrier( - invoke, trg_loc, base, 0U, offset_loc, TIMES_1, temp, /* needs_null_check */ false); + // Piggy-back on the field load path using introspection for the Baker read barrier. + __ Add(RegisterFrom(temp), base, Operand(offset)); + MemOperand src(RegisterFrom(temp), 0); + codegen->GenerateFieldLoadWithBakerReadBarrier( + invoke, trg_loc, base, src, /* needs_null_check= */ false); if (is_volatile) { __ Dmb(vixl32::ISH); } @@ -1069,22 +733,22 @@ void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetObjectVolatile(HInvoke* inv } void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGet(HInvoke* invoke) { - GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile */ false, codegen_); + GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile= */ false, codegen_); } void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetVolatile(HInvoke* invoke) { - GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile */ true, codegen_); + GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile= */ true, codegen_); } void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetLong(HInvoke* invoke) { - GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile */ false, codegen_); + GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile= */ false, codegen_); } void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetLongVolatile(HInvoke* invoke) { - GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile */ true, codegen_); + GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile= */ true, codegen_); } void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetObject(HInvoke* invoke) { - GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile */ false, codegen_); + GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile= */ false, codegen_); } void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { - GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile */ true, codegen_); + GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile= */ true, codegen_); } static void CreateIntIntIntIntToVoid(ArenaAllocator* allocator, @@ -1114,39 +778,39 @@ static void CreateIntIntIntIntToVoid(ArenaAllocator* allocator, void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePut(HInvoke* invoke) { CreateIntIntIntIntToVoid( - allocator_, features_, DataType::Type::kInt32, /* is_volatile */ false, invoke); + allocator_, features_, DataType::Type::kInt32, /* is_volatile= */ false, invoke); } void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutOrdered(HInvoke* invoke) { CreateIntIntIntIntToVoid( - allocator_, features_, DataType::Type::kInt32, /* is_volatile */ false, invoke); + allocator_, features_, DataType::Type::kInt32, /* is_volatile= */ false, invoke); } void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutVolatile(HInvoke* invoke) { CreateIntIntIntIntToVoid( - allocator_, features_, DataType::Type::kInt32, /* is_volatile */ true, invoke); + allocator_, features_, DataType::Type::kInt32, /* is_volatile= */ true, invoke); } void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutObject(HInvoke* invoke) { CreateIntIntIntIntToVoid( - allocator_, features_, DataType::Type::kReference, /* is_volatile */ false, invoke); + allocator_, features_, DataType::Type::kReference, /* is_volatile= */ false, invoke); } void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutObjectOrdered(HInvoke* invoke) { CreateIntIntIntIntToVoid( - allocator_, features_, DataType::Type::kReference, /* is_volatile */ false, invoke); + allocator_, features_, DataType::Type::kReference, /* is_volatile= */ false, invoke); } void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutObjectVolatile(HInvoke* invoke) { CreateIntIntIntIntToVoid( - allocator_, features_, DataType::Type::kReference, /* is_volatile */ true, invoke); + allocator_, features_, DataType::Type::kReference, /* is_volatile= */ true, invoke); } void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutLong(HInvoke* invoke) { CreateIntIntIntIntToVoid( - allocator_, features_, DataType::Type::kInt64, /* is_volatile */ false, invoke); + allocator_, features_, DataType::Type::kInt64, /* is_volatile= */ false, invoke); } void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutLongOrdered(HInvoke* invoke) { CreateIntIntIntIntToVoid( - allocator_, features_, DataType::Type::kInt64, /* is_volatile */ false, invoke); + allocator_, features_, DataType::Type::kInt64, /* is_volatile= */ false, invoke); } void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutLongVolatile(HInvoke* invoke) { CreateIntIntIntIntToVoid( - allocator_, features_, DataType::Type::kInt64, /* is_volatile */ true, invoke); + allocator_, features_, DataType::Type::kInt64, /* is_volatile= */ true, invoke); } static void GenUnsafePut(LocationSummary* locations, @@ -1180,7 +844,7 @@ static void GenUnsafePut(LocationSummary* locations, __ Ldrexd(temp_lo, temp_hi, MemOperand(temp_reg)); __ Strexd(temp_lo, value_lo, value_hi, MemOperand(temp_reg)); __ Cmp(temp_lo, 0); - __ B(ne, &loop_head, /* far_target */ false); + __ B(ne, &loop_head, /* is_far_target= */ false); } else { __ Strd(value_lo, value_hi, MemOperand(base, offset)); } @@ -1211,70 +875,68 @@ static void GenUnsafePut(LocationSummary* locations, void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePut(HInvoke* invoke) { GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, - /* is_volatile */ false, - /* is_ordered */ false, + /* is_volatile= */ false, + /* is_ordered= */ false, codegen_); } void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutOrdered(HInvoke* invoke) { GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, - /* is_volatile */ false, - /* is_ordered */ true, + /* is_volatile= */ false, + /* is_ordered= */ true, codegen_); } void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutVolatile(HInvoke* invoke) { GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, - /* is_volatile */ true, - /* is_ordered */ false, + /* is_volatile= */ true, + /* is_ordered= */ false, codegen_); } void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutObject(HInvoke* invoke) { GenUnsafePut(invoke->GetLocations(), DataType::Type::kReference, - /* is_volatile */ false, - /* is_ordered */ false, + /* is_volatile= */ false, + /* is_ordered= */ false, codegen_); } void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutObjectOrdered(HInvoke* invoke) { GenUnsafePut(invoke->GetLocations(), DataType::Type::kReference, - /* is_volatile */ false, - /* is_ordered */ true, + /* is_volatile= */ false, + /* is_ordered= */ true, codegen_); } void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutObjectVolatile(HInvoke* invoke) { GenUnsafePut(invoke->GetLocations(), DataType::Type::kReference, - /* is_volatile */ true, - /* is_ordered */ false, + /* is_volatile= */ true, + /* is_ordered= */ false, codegen_); } void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutLong(HInvoke* invoke) { GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, - /* is_volatile */ false, - /* is_ordered */ false, + /* is_volatile= */ false, + /* is_ordered= */ false, codegen_); } void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutLongOrdered(HInvoke* invoke) { GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, - /* is_volatile */ false, - /* is_ordered */ true, + /* is_volatile= */ false, + /* is_ordered= */ true, codegen_); } void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutLongVolatile(HInvoke* invoke) { GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, - /* is_volatile */ true, - /* is_ordered */ false, + /* is_volatile= */ true, + /* is_ordered= */ false, codegen_); } -static void CreateIntIntIntIntIntToIntPlusTemps(ArenaAllocator* allocator, - HInvoke* invoke, - DataType::Type type) { +static void CreateIntIntIntIntIntToIntPlusTemps(ArenaAllocator* allocator, HInvoke* invoke) { bool can_call = kEmitCompilerReadBarrier && kUseBakerReadBarrier && (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject); @@ -1284,20 +946,16 @@ static void CreateIntIntIntIntIntToIntPlusTemps(ArenaAllocator* allocator, ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall, kIntrinsified); + if (can_call) { + locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. + } locations->SetInAt(0, Location::NoLocation()); // Unused receiver. locations->SetInAt(1, Location::RequiresRegister()); locations->SetInAt(2, Location::RequiresRegister()); locations->SetInAt(3, Location::RequiresRegister()); locations->SetInAt(4, Location::RequiresRegister()); - // If heap poisoning is enabled, we don't want the unpoisoning - // operations to potentially clobber the output. Likewise when - // emitting a (Baker) read barrier, which may call. - Location::OutputOverlap overlaps = - ((kPoisonHeapReferences && type == DataType::Type::kReference) || can_call) - ? Location::kOutputOverlap - : Location::kNoOutputOverlap; - locations->SetOut(Location::RequiresRegister(), overlaps); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); // Temporary registers used in CAS. In the object case // (UnsafeCASObject intrinsic), these are also used for @@ -1306,24 +964,92 @@ static void CreateIntIntIntIntIntToIntPlusTemps(ArenaAllocator* allocator, locations->AddTemp(Location::RequiresRegister()); // Temp 1. } +class BakerReadBarrierCasSlowPathARMVIXL : public SlowPathCodeARMVIXL { + public: + explicit BakerReadBarrierCasSlowPathARMVIXL(HInvoke* invoke) + : SlowPathCodeARMVIXL(invoke) {} + + const char* GetDescription() const override { return "BakerReadBarrierCasSlowPathARMVIXL"; } + + void EmitNativeCode(CodeGenerator* codegen) override { + CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen); + ArmVIXLAssembler* assembler = arm_codegen->GetAssembler(); + __ Bind(GetEntryLabel()); + + LocationSummary* locations = instruction_->GetLocations(); + vixl32::Register base = InputRegisterAt(instruction_, 1); // Object pointer. + vixl32::Register offset = LowRegisterFrom(locations->InAt(2)); // Offset (discard high 4B). + vixl32::Register expected = InputRegisterAt(instruction_, 3); // Expected. + vixl32::Register value = InputRegisterAt(instruction_, 4); // Value. + + vixl32::Register tmp_ptr = RegisterFrom(locations->GetTemp(0)); // Pointer to actual memory. + vixl32::Register tmp = RegisterFrom(locations->GetTemp(1)); // Temporary. + + // The `tmp` is initialized to `[tmp_ptr] - expected` in the main path. Reconstruct + // and mark the old value and compare with `expected`. We clobber `tmp_ptr` in the + // process due to lack of other temps suitable for the read barrier. + arm_codegen->GenerateUnsafeCasOldValueAddWithBakerReadBarrier(tmp_ptr, tmp, expected); + __ Cmp(tmp_ptr, expected); + __ B(ne, GetExitLabel()); + + // The old value we have read did not match `expected` (which is always a to-space reference) + // but after the read barrier in GenerateUnsafeCasOldValueAddWithBakerReadBarrier() the marked + // to-space value matched, so the old value must be a from-space reference to the same object. + // Do the same CAS loop as the main path but check for both `expected` and the unmarked + // old value representing the to-space and from-space references for the same object. + + UseScratchRegisterScope temps(assembler->GetVIXLAssembler()); + vixl32::Register adjusted_old_value = temps.Acquire(); // For saved `tmp` from main path. + + // Recalculate the `tmp_ptr` clobbered above and store the `adjusted_old_value`, i.e. IP. + __ Add(tmp_ptr, base, offset); + __ Mov(adjusted_old_value, tmp); + + // do { + // tmp = [r_ptr] - expected; + // } while ((tmp == 0 || tmp == adjusted_old_value) && failure([r_ptr] <- r_new_value)); + // result = (tmp == 0 || tmp == adjusted_old_value); + + vixl32::Label loop_head; + __ Bind(&loop_head); + __ Ldrex(tmp, MemOperand(tmp_ptr)); // This can now load null stored by another thread. + assembler->MaybeUnpoisonHeapReference(tmp); + __ Subs(tmp, tmp, expected); // Use SUBS to get non-zero value if both compares fail. + { + // If the newly loaded value did not match `expected`, compare with `adjusted_old_value`. + ExactAssemblyScope aas(assembler->GetVIXLAssembler(), 2 * k16BitT32InstructionSizeInBytes); + __ it(ne); + __ cmp(ne, tmp, adjusted_old_value); + } + __ B(ne, GetExitLabel()); + assembler->MaybePoisonHeapReference(value); + __ Strex(tmp, value, MemOperand(tmp_ptr)); + assembler->MaybeUnpoisonHeapReference(value); + __ Cmp(tmp, 0); + __ B(ne, &loop_head, /* is_far_target= */ false); + __ B(GetExitLabel()); + } +}; + static void GenCas(HInvoke* invoke, DataType::Type type, CodeGeneratorARMVIXL* codegen) { DCHECK_NE(type, DataType::Type::kInt64); ArmVIXLAssembler* assembler = codegen->GetAssembler(); LocationSummary* locations = invoke->GetLocations(); - Location out_loc = locations->Out(); vixl32::Register out = OutputRegister(invoke); // Boolean result. vixl32::Register base = InputRegisterAt(invoke, 1); // Object pointer. - Location offset_loc = locations->InAt(2); - vixl32::Register offset = LowRegisterFrom(offset_loc); // Offset (discard high 4B). + vixl32::Register offset = LowRegisterFrom(locations->InAt(2)); // Offset (discard high 4B). vixl32::Register expected = InputRegisterAt(invoke, 3); // Expected. vixl32::Register value = InputRegisterAt(invoke, 4); // Value. - Location tmp_ptr_loc = locations->GetTemp(0); - vixl32::Register tmp_ptr = RegisterFrom(tmp_ptr_loc); // Pointer to actual memory. - vixl32::Register tmp = RegisterFrom(locations->GetTemp(1)); // Value in memory. + vixl32::Register tmp_ptr = RegisterFrom(locations->GetTemp(0)); // Pointer to actual memory. + vixl32::Register tmp = RegisterFrom(locations->GetTemp(1)); // Temporary. + + vixl32::Label loop_exit_label; + vixl32::Label* loop_exit = &loop_exit_label; + vixl32::Label* failure = &loop_exit_label; if (type == DataType::Type::kReference) { // The only read barrier implementation supporting the @@ -1336,87 +1062,63 @@ static void GenCas(HInvoke* invoke, DataType::Type type, CodeGeneratorARMVIXL* c codegen->MarkGCCard(tmp_ptr, tmp, base, value, value_can_be_null); if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { - // Need to make sure the reference stored in the field is a to-space - // one before attempting the CAS or the CAS could fail incorrectly. - codegen->UpdateReferenceFieldWithBakerReadBarrier( - invoke, - out_loc, // Unused, used only as a "temporary" within the read barrier. - base, - /* field_offset */ offset_loc, - tmp_ptr_loc, - /* needs_null_check */ false, - tmp); + // If marking, check if the stored reference is a from-space reference to the same + // object as the to-space reference `expected`. If so, perform a custom CAS loop. + BakerReadBarrierCasSlowPathARMVIXL* slow_path = + new (codegen->GetScopedAllocator()) BakerReadBarrierCasSlowPathARMVIXL(invoke); + codegen->AddSlowPath(slow_path); + failure = slow_path->GetEntryLabel(); + loop_exit = slow_path->GetExitLabel(); } } // Prevent reordering with prior memory operations. // Emit a DMB ISH instruction instead of an DMB ISHST one, as the - // latter allows a preceding load to be delayed past the STXR + // latter allows a preceding load to be delayed past the STREX // instruction below. __ Dmb(vixl32::ISH); __ Add(tmp_ptr, base, offset); - if (kPoisonHeapReferences && type == DataType::Type::kReference) { - codegen->GetAssembler()->PoisonHeapReference(expected); - if (value.Is(expected)) { - // Do not poison `value`, as it is the same register as - // `expected`, which has just been poisoned. - } else { - codegen->GetAssembler()->PoisonHeapReference(value); - } - } - // do { // tmp = [r_ptr] - expected; // } while (tmp == 0 && failure([r_ptr] <- r_new_value)); - // result = tmp != 0; + // result = tmp == 0; vixl32::Label loop_head; __ Bind(&loop_head); - __ Ldrex(tmp, MemOperand(tmp_ptr)); - + if (type == DataType::Type::kReference) { + assembler->MaybeUnpoisonHeapReference(tmp); + } __ Subs(tmp, tmp, expected); - - { - ExactAssemblyScope aas(assembler->GetVIXLAssembler(), - 3 * kMaxInstructionSizeInBytes, - CodeBufferCheckScope::kMaximumSize); - - __ itt(eq); - __ strex(eq, tmp, value, MemOperand(tmp_ptr)); - __ cmp(eq, tmp, 1); + static_cast<vixl32::MacroAssembler*>(assembler->GetVIXLAssembler())-> + B(ne, failure, /* hint= */ (failure == loop_exit) ? kNear : kBranchWithoutHint); + if (type == DataType::Type::kReference) { + assembler->MaybePoisonHeapReference(value); + } + __ Strex(tmp, value, MemOperand(tmp_ptr)); + if (type == DataType::Type::kReference) { + assembler->MaybeUnpoisonHeapReference(value); } + __ Cmp(tmp, 0); + __ B(ne, &loop_head, /* is_far_target= */ false); - __ B(eq, &loop_head, /* far_target */ false); + __ Bind(loop_exit); __ Dmb(vixl32::ISH); - __ Rsbs(out, tmp, 1); + // out = tmp == 0. + __ Clz(out, tmp); + __ Lsr(out, out, WhichPowerOf2(out.GetSizeInBits())); - { - ExactAssemblyScope aas(assembler->GetVIXLAssembler(), - 2 * kMaxInstructionSizeInBytes, - CodeBufferCheckScope::kMaximumSize); - - __ it(cc); - __ mov(cc, out, 0); - } - - if (kPoisonHeapReferences && type == DataType::Type::kReference) { - codegen->GetAssembler()->UnpoisonHeapReference(expected); - if (value.Is(expected)) { - // Do not unpoison `value`, as it is the same register as - // `expected`, which has just been unpoisoned. - } else { - codegen->GetAssembler()->UnpoisonHeapReference(value); - } + if (type == DataType::Type::kReference) { + codegen->MaybeGenerateMarkingRegisterCheck(/* code= */ 128); } } void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeCASInt(HInvoke* invoke) { - CreateIntIntIntIntIntToIntPlusTemps(allocator_, invoke, DataType::Type::kInt32); + CreateIntIntIntIntIntToIntPlusTemps(allocator_, invoke); } void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeCASObject(HInvoke* invoke) { // The only read barrier implementation supporting the @@ -1425,7 +1127,7 @@ void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeCASObject(HInvoke* invoke) { return; } - CreateIntIntIntIntIntToIntPlusTemps(allocator_, invoke, DataType::Type::kReference); + CreateIntIntIntIntIntToIntPlusTemps(allocator_, invoke); } void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeCASInt(HInvoke* invoke) { GenCas(invoke, DataType::Type::kInt32, codegen_); @@ -1606,23 +1308,23 @@ static void GenerateStringCompareToLoop(ArmVIXLAssembler* assembler, __ Ldr(temp_reg, MemOperand(str, temp1)); __ Ldr(temp2, MemOperand(arg, temp1)); __ Cmp(temp_reg, temp2); - __ B(ne, &find_char_diff, /* far_target */ false); + __ B(ne, &find_char_diff, /* is_far_target= */ false); __ Add(temp1, temp1, char_size * 2); __ Ldr(temp_reg, MemOperand(str, temp1)); __ Ldr(temp2, MemOperand(arg, temp1)); __ Cmp(temp_reg, temp2); - __ B(ne, &find_char_diff_2nd_cmp, /* far_target */ false); + __ B(ne, &find_char_diff_2nd_cmp, /* is_far_target= */ false); __ Add(temp1, temp1, char_size * 2); // With string compression, we have compared 8 bytes, otherwise 4 chars. __ Subs(temp0, temp0, (mirror::kUseStringCompression ? 8 : 4)); - __ B(hi, &loop, /* far_target */ false); + __ B(hi, &loop, /* is_far_target= */ false); __ B(end); __ Bind(&find_char_diff_2nd_cmp); if (mirror::kUseStringCompression) { __ Subs(temp0, temp0, 4); // 4 bytes previously compared. - __ B(ls, end, /* far_target */ false); // Was the second comparison fully beyond the end? + __ B(ls, end, /* is_far_target= */ false); // Was the second comparison fully beyond the end? } else { // Without string compression, we can start treating temp0 as signed // and rely on the signed comparison below. @@ -1650,7 +1352,7 @@ static void GenerateStringCompareToLoop(ArmVIXLAssembler* assembler, // the remaining string data, so just return length diff (out). // The comparison is unsigned for string compression, otherwise signed. __ Cmp(temp0, Operand(temp1, vixl32::LSR, (mirror::kUseStringCompression ? 3 : 4))); - __ B((mirror::kUseStringCompression ? ls : le), end, /* far_target */ false); + __ B((mirror::kUseStringCompression ? ls : le), end, /* is_far_target= */ false); // Extract the characters and calculate the difference. if (mirror::kUseStringCompression) { @@ -1717,9 +1419,9 @@ static void GenerateStringCompareToLoop(ArmVIXLAssembler* assembler, __ Ldrb(temp_reg, MemOperand(temp1, c_char_size, PostIndex)); __ Ldrh(temp3, MemOperand(temp2, char_size, PostIndex)); __ Cmp(temp_reg, temp3); - __ B(ne, &different_compression_diff, /* far_target */ false); + __ B(ne, &different_compression_diff, /* is_far_target= */ false); __ Subs(temp0, temp0, 2); - __ B(hi, &different_compression_loop, /* far_target */ false); + __ B(hi, &different_compression_loop, /* is_far_target= */ false); __ B(end); // Calculate the difference. @@ -1757,13 +1459,6 @@ static const char* GetConstString(HInstruction* candidate, uint32_t* utf16_lengt } void IntrinsicLocationsBuilderARMVIXL::VisitStringEquals(HInvoke* invoke) { - if (kEmitCompilerReadBarrier && - !StringEqualsOptimizations(invoke).GetArgumentIsString() && - !StringEqualsOptimizations(invoke).GetNoReadBarrierForStringClass()) { - // No support for this odd case (String class is moveable, not in the boot image). - return; - } - LocationSummary* locations = new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); InvokeRuntimeCallingConventionARMVIXL calling_convention; @@ -1822,22 +1517,30 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringEquals(HInvoke* invoke) { StringEqualsOptimizations optimizations(invoke); if (!optimizations.GetArgumentNotNull()) { // Check if input is null, return false if it is. - __ CompareAndBranchIfZero(arg, &return_false, /* far_target */ false); + __ CompareAndBranchIfZero(arg, &return_false, /* is_far_target= */ false); } // Reference equality check, return true if same reference. __ Cmp(str, arg); - __ B(eq, &return_true, /* far_target */ false); + __ B(eq, &return_true, /* is_far_target= */ false); if (!optimizations.GetArgumentIsString()) { // Instanceof check for the argument by comparing class fields. // All string objects must have the same type since String cannot be subclassed. // Receiver must be a string object, so its class field is equal to all strings' class fields. // If the argument is a string object, its class field must be equal to receiver's class field. + // + // As the String class is expected to be non-movable, we can read the class + // field from String.equals' arguments without read barriers. + AssertNonMovableStringClass(); + // /* HeapReference<Class> */ temp = str->klass_ __ Ldr(temp, MemOperand(str, class_offset)); + // /* HeapReference<Class> */ out = arg->klass_ __ Ldr(out, MemOperand(arg, class_offset)); + // Also, because we use the previously loaded class references only in the + // following comparison, we don't need to unpoison them. __ Cmp(temp, out); - __ B(ne, &return_false, /* far_target */ false); + __ B(ne, &return_false, /* is_far_target= */ false); } // Check if one of the inputs is a const string. Do not special-case both strings @@ -1860,7 +1563,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringEquals(HInvoke* invoke) { // Also compares the compression style, if differs return false. __ Ldr(temp, MemOperand(arg, count_offset)); __ Cmp(temp, Operand(mirror::String::GetFlaggedCount(const_string_length, is_compressed))); - __ B(ne, &return_false, /* far_target */ false); + __ B(ne, &return_false, /* is_far_target= */ false); } else { // Load `count` fields of this and argument strings. __ Ldr(temp, MemOperand(str, count_offset)); @@ -1868,7 +1571,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringEquals(HInvoke* invoke) { // Check if `count` fields are equal, return false if they're not. // Also compares the compression style, if differs return false. __ Cmp(temp, out); - __ B(ne, &return_false, /* far_target */ false); + __ B(ne, &return_false, /* is_far_target= */ false); } // Assertions that must hold in order to compare strings 4 bytes at a time. @@ -1891,9 +1594,9 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringEquals(HInvoke* invoke) { __ Ldrd(temp, temp1, MemOperand(str, offset)); __ Ldrd(temp2, out, MemOperand(arg, offset)); __ Cmp(temp, temp2); - __ B(ne, &return_false, /* far_label */ false); + __ B(ne, &return_false, /* is_far_target= */ false); __ Cmp(temp1, out); - __ B(ne, &return_false, /* far_label */ false); + __ B(ne, &return_false, /* is_far_target= */ false); offset += 2u * sizeof(uint32_t); remaining_bytes -= 2u * sizeof(uint32_t); } @@ -1901,13 +1604,13 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringEquals(HInvoke* invoke) { __ Ldr(temp, MemOperand(str, offset)); __ Ldr(out, MemOperand(arg, offset)); __ Cmp(temp, out); - __ B(ne, &return_false, /* far_label */ false); + __ B(ne, &return_false, /* is_far_target= */ false); } } else { // Return true if both strings are empty. Even with string compression `count == 0` means empty. static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, "Expecting 0=compressed, 1=uncompressed"); - __ CompareAndBranchIfZero(temp, &return_true, /* far_target */ false); + __ CompareAndBranchIfZero(temp, &return_true, /* is_far_target= */ false); if (mirror::kUseStringCompression) { // For string compression, calculate the number of bytes to compare (not chars). @@ -1933,10 +1636,10 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringEquals(HInvoke* invoke) { __ Ldr(temp2, MemOperand(arg, temp1)); __ Add(temp1, temp1, Operand::From(sizeof(uint32_t))); __ Cmp(out, temp2); - __ B(ne, &return_false, /* far_target */ false); + __ B(ne, &return_false, /* is_far_target= */ false); // With string compression, we have compared 4 bytes, otherwise 2 chars. __ Subs(temp, temp, mirror::kUseStringCompression ? 4 : 2); - __ B(hi, &loop, /* far_target */ false); + __ B(hi, &loop, /* is_far_target= */ false); } // Return true and exit the function. @@ -2017,7 +1720,7 @@ void IntrinsicLocationsBuilderARMVIXL::VisitStringIndexOf(HInvoke* invoke) { } void IntrinsicCodeGeneratorARMVIXL::VisitStringIndexOf(HInvoke* invoke) { - GenerateVisitStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero */ true); + GenerateVisitStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ true); } void IntrinsicLocationsBuilderARMVIXL::VisitStringIndexOfAfter(HInvoke* invoke) { @@ -2033,7 +1736,7 @@ void IntrinsicLocationsBuilderARMVIXL::VisitStringIndexOfAfter(HInvoke* invoke) } void IntrinsicCodeGeneratorARMVIXL::VisitStringIndexOfAfter(HInvoke* invoke) { - GenerateVisitStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero */ false); + GenerateVisitStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ false); } void IntrinsicLocationsBuilderARMVIXL::VisitStringNewStringFromBytes(HInvoke* invoke) { @@ -2137,8 +1840,6 @@ void IntrinsicLocationsBuilderARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) { // is clobbered by ReadBarrierMarkRegX entry points). Get an extra // temporary register from the register allocator. locations->AddTemp(Location::RequiresRegister()); - CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen_); - arm_codegen->MaybeAddBakerCcEntrypointTempForFields(locations); } } @@ -2257,7 +1958,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) { } else { if (!optimizations.GetDestinationIsSource()) { __ Cmp(src, dest); - __ B(ne, &conditions_on_positions_validated, /* far_target */ false); + __ B(ne, &conditions_on_positions_validated, /* is_far_target= */ false); } __ Cmp(RegisterFrom(dest_pos), src_pos_constant); __ B(gt, intrinsic_slow_path->GetEntryLabel()); @@ -2265,7 +1966,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) { } else { if (!optimizations.GetDestinationIsSource()) { __ Cmp(src, dest); - __ B(ne, &conditions_on_positions_validated, /* far_target */ false); + __ B(ne, &conditions_on_positions_validated, /* is_far_target= */ false); } if (dest_pos.IsConstant()) { int32_t dest_pos_constant = Int32ConstantFrom(dest_pos); @@ -2325,11 +2026,11 @@ void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) { if (!optimizations.GetSourceIsNonPrimitiveArray()) { // /* HeapReference<Class> */ temp1 = src->klass_ codegen_->GenerateFieldLoadWithBakerReadBarrier( - invoke, temp1_loc, src, class_offset, temp2_loc, /* needs_null_check */ false); + invoke, temp1_loc, src, class_offset, temp2_loc, /* needs_null_check= */ false); // Bail out if the source is not a non primitive array. // /* HeapReference<Class> */ temp1 = temp1->component_type_ codegen_->GenerateFieldLoadWithBakerReadBarrier( - invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false); + invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check= */ false); __ CompareAndBranchIfZero(temp1, intrinsic_slow_path->GetEntryLabel()); // If heap poisoning is enabled, `temp1` has been unpoisoned // by the the previous call to GenerateFieldLoadWithBakerReadBarrier. @@ -2341,7 +2042,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) { // /* HeapReference<Class> */ temp1 = dest->klass_ codegen_->GenerateFieldLoadWithBakerReadBarrier( - invoke, temp1_loc, dest, class_offset, temp2_loc, /* needs_null_check */ false); + invoke, temp1_loc, dest, class_offset, temp2_loc, /* needs_null_check= */ false); if (!optimizations.GetDestinationIsNonPrimitiveArray()) { // Bail out if the destination is not a non primitive array. @@ -2353,7 +2054,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) { // temporaries such a `temp1`. // /* HeapReference<Class> */ temp2 = temp1->component_type_ codegen_->GenerateFieldLoadWithBakerReadBarrier( - invoke, temp2_loc, temp1, component_offset, temp3_loc, /* needs_null_check */ false); + invoke, temp2_loc, temp1, component_offset, temp3_loc, /* needs_null_check= */ false); __ CompareAndBranchIfZero(temp2, intrinsic_slow_path->GetEntryLabel()); // If heap poisoning is enabled, `temp2` has been unpoisoned // by the the previous call to GenerateFieldLoadWithBakerReadBarrier. @@ -2367,16 +2068,16 @@ void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) { // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below. // /* HeapReference<Class> */ temp2 = src->klass_ codegen_->GenerateFieldLoadWithBakerReadBarrier( - invoke, temp2_loc, src, class_offset, temp3_loc, /* needs_null_check */ false); + invoke, temp2_loc, src, class_offset, temp3_loc, /* needs_null_check= */ false); // Note: if heap poisoning is on, we are comparing two unpoisoned references here. __ Cmp(temp1, temp2); if (optimizations.GetDestinationIsTypedObjectArray()) { vixl32::Label do_copy; - __ B(eq, &do_copy, /* far_target */ false); + __ B(eq, &do_copy, /* is_far_target= */ false); // /* HeapReference<Class> */ temp1 = temp1->component_type_ codegen_->GenerateFieldLoadWithBakerReadBarrier( - invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false); + invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check= */ false); // /* HeapReference<Class> */ temp1 = temp1->super_class_ // We do not need to emit a read barrier for the following // heap reference load, as `temp1` is only used in a @@ -2433,7 +2134,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) { if (optimizations.GetDestinationIsTypedObjectArray()) { vixl32::Label do_copy; - __ B(eq, &do_copy, /* far_target */ false); + __ B(eq, &do_copy, /* is_far_target= */ false); if (!did_unpoison) { assembler->MaybeUnpoisonHeapReference(temp1); } @@ -2455,10 +2156,10 @@ void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) { if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { // /* HeapReference<Class> */ temp1 = src->klass_ codegen_->GenerateFieldLoadWithBakerReadBarrier( - invoke, temp1_loc, src, class_offset, temp2_loc, /* needs_null_check */ false); + invoke, temp1_loc, src, class_offset, temp2_loc, /* needs_null_check= */ false); // /* HeapReference<Class> */ temp3 = temp1->component_type_ codegen_->GenerateFieldLoadWithBakerReadBarrier( - invoke, temp3_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false); + invoke, temp3_loc, temp1, component_offset, temp2_loc, /* needs_null_check= */ false); __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel()); // If heap poisoning is enabled, `temp3` has been unpoisoned // by the the previous call to GenerateFieldLoadWithBakerReadBarrier. @@ -2486,7 +2187,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) { if (length.IsRegister()) { // Don't enter the copy loop if the length is null. - __ CompareAndBranchIfZero(RegisterFrom(length), &done, /* is_far_target */ false); + __ CompareAndBranchIfZero(RegisterFrom(length), &done, /* is_far_target= */ false); } if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { @@ -2543,7 +2244,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) { // Given the numeric representation, it's enough to check the low bit of the // rb_state. We do that by shifting the bit out of the lock word with LSRS // which can be a 16-bit instruction unlike the TST immediate. - static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0"); + static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0"); static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); __ Lsrs(temp2, temp2, LockWord::kReadBarrierStateShift + 1); // Carry flag is the last bit shifted out by LSRS. @@ -2563,7 +2264,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) { __ Str(temp_reg, MemOperand(temp2, element_size, PostIndex)); } __ Cmp(temp1, temp3); - __ B(ne, &loop, /* far_target */ false); + __ B(ne, &loop, /* is_far_target= */ false); __ Bind(read_barrier_slow_path->GetExitLabel()); } else { @@ -2585,13 +2286,13 @@ void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) { __ Str(temp_reg, MemOperand(temp2, element_size, PostIndex)); } __ Cmp(temp1, temp3); - __ B(ne, &loop, /* far_target */ false); + __ B(ne, &loop, /* is_far_target= */ false); } __ Bind(&done); } // We only need one card marking on the destination array. - codegen_->MarkGCCard(temp1, temp2, dest, NoReg, /* value_can_be_null */ false); + codegen_->MarkGCCard(temp1, temp2, dest, NoReg, /* can_be_null= */ false); __ Bind(intrinsic_slow_path->GetExitLabel()); } @@ -3121,7 +2822,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke) __ Subs(num_chr, srcEnd, srcBegin); // Early out for valid zero-length retrievals. - __ B(eq, final_label, /* far_target */ false); + __ B(eq, final_label, /* is_far_target= */ false); // src range to copy. __ Add(src_ptr, srcObj, value_offset); @@ -3137,7 +2838,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke) __ Ldr(temp, MemOperand(srcObj, count_offset)); __ Tst(temp, 1); temps.Release(temp); - __ B(eq, &compressed_string_preloop, /* far_target */ false); + __ B(eq, &compressed_string_preloop, /* is_far_target= */ false); } __ Add(src_ptr, src_ptr, Operand(srcBegin, vixl32::LSL, 1)); @@ -3147,7 +2848,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke) temp = temps.Acquire(); // Save repairing the value of num_chr on the < 4 character path. __ Subs(temp, num_chr, 4); - __ B(lt, &remainder, /* far_target */ false); + __ B(lt, &remainder, /* is_far_target= */ false); // Keep the result of the earlier subs, we are going to fetch at least 4 characters. __ Mov(num_chr, temp); @@ -3162,10 +2863,10 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke) __ Ldr(temp, MemOperand(src_ptr, char_size * 4, PostIndex)); __ Str(temp, MemOperand(dst_ptr, char_size * 4, PostIndex)); temps.Release(temp); - __ B(ge, &loop, /* far_target */ false); + __ B(ge, &loop, /* is_far_target= */ false); __ Adds(num_chr, num_chr, 4); - __ B(eq, final_label, /* far_target */ false); + __ B(eq, final_label, /* is_far_target= */ false); // Main loop for < 4 character case and remainder handling. Loads and stores one // 16-bit Java character at a time. @@ -3175,7 +2876,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke) __ Subs(num_chr, num_chr, 1); __ Strh(temp, MemOperand(dst_ptr, char_size, PostIndex)); temps.Release(temp); - __ B(gt, &remainder, /* far_target */ false); + __ B(gt, &remainder, /* is_far_target= */ false); if (mirror::kUseStringCompression) { __ B(final_label); @@ -3191,7 +2892,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke) __ Strh(temp, MemOperand(dst_ptr, char_size, PostIndex)); temps.Release(temp); __ Subs(num_chr, num_chr, 1); - __ B(gt, &compressed_string_loop, /* far_target */ false); + __ B(gt, &compressed_string_loop, /* is_far_target= */ false); } if (done.IsReferenced()) { @@ -3252,7 +2953,7 @@ void IntrinsicLocationsBuilderARMVIXL::VisitMathCeil(HInvoke* invoke) { void IntrinsicCodeGeneratorARMVIXL::VisitMathCeil(HInvoke* invoke) { ArmVIXLAssembler* assembler = GetAssembler(); DCHECK(codegen_->GetInstructionSetFeatures().HasARMv8AInstructions()); - __ Vrintp(F64, F64, OutputDRegister(invoke), InputDRegisterAt(invoke, 0)); + __ Vrintp(F64, OutputDRegister(invoke), InputDRegisterAt(invoke, 0)); } void IntrinsicLocationsBuilderARMVIXL::VisitMathFloor(HInvoke* invoke) { @@ -3264,7 +2965,7 @@ void IntrinsicLocationsBuilderARMVIXL::VisitMathFloor(HInvoke* invoke) { void IntrinsicCodeGeneratorARMVIXL::VisitMathFloor(HInvoke* invoke) { ArmVIXLAssembler* assembler = GetAssembler(); DCHECK(codegen_->GetInstructionSetFeatures().HasARMv8AInstructions()); - __ Vrintm(F64, F64, OutputDRegister(invoke), InputDRegisterAt(invoke, 0)); + __ Vrintm(F64, OutputDRegister(invoke), InputDRegisterAt(invoke, 0)); } void IntrinsicLocationsBuilderARMVIXL::VisitIntegerValueOf(HInvoke* invoke) { @@ -3277,33 +2978,27 @@ void IntrinsicLocationsBuilderARMVIXL::VisitIntegerValueOf(HInvoke* invoke) { } void IntrinsicCodeGeneratorARMVIXL::VisitIntegerValueOf(HInvoke* invoke) { - IntrinsicVisitor::IntegerValueOfInfo info = IntrinsicVisitor::ComputeIntegerValueOfInfo(); + IntrinsicVisitor::IntegerValueOfInfo info = + IntrinsicVisitor::ComputeIntegerValueOfInfo(invoke, codegen_->GetCompilerOptions()); LocationSummary* locations = invoke->GetLocations(); ArmVIXLAssembler* const assembler = GetAssembler(); vixl32::Register out = RegisterFrom(locations->Out()); UseScratchRegisterScope temps(assembler->GetVIXLAssembler()); vixl32::Register temp = temps.Acquire(); - InvokeRuntimeCallingConventionARMVIXL calling_convention; - vixl32::Register argument = calling_convention.GetRegisterAt(0); if (invoke->InputAt(0)->IsConstant()) { int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue(); - if (value >= info.low && value <= info.high) { + if (static_cast<uint32_t>(value - info.low) < info.length) { // Just embed the j.l.Integer in the code. - ScopedObjectAccess soa(Thread::Current()); - mirror::Object* boxed = info.cache->Get(value + (-info.low)); - DCHECK(boxed != nullptr && Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(boxed)); - uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(boxed)); - __ Ldr(out, codegen_->DeduplicateBootImageAddressLiteral(address)); + DCHECK_NE(info.value_boot_image_reference, IntegerValueOfInfo::kInvalidReference); + codegen_->LoadBootImageAddress(out, info.value_boot_image_reference); } else { + DCHECK(locations->CanCall()); // Allocate and initialize a new j.l.Integer. // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the // JIT object table. - uint32_t address = - dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer)); - __ Ldr(argument, codegen_->DeduplicateBootImageAddressLiteral(address)); - codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc()); - CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); + codegen_->AllocateInstanceForIntrinsic(invoke->AsInvokeStaticOrDirect(), + info.integer_boot_image_offset); __ Mov(temp, value); assembler->StoreToOffset(kStoreWord, temp, out, info.value_offset); // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation @@ -3311,25 +3006,22 @@ void IntrinsicCodeGeneratorARMVIXL::VisitIntegerValueOf(HInvoke* invoke) { codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore); } } else { + DCHECK(locations->CanCall()); vixl32::Register in = RegisterFrom(locations->InAt(0)); // Check bounds of our cache. __ Add(out, in, -info.low); - __ Cmp(out, info.high - info.low + 1); + __ Cmp(out, info.length); vixl32::Label allocate, done; - __ B(hs, &allocate, /* is_far_target */ false); + __ B(hs, &allocate, /* is_far_target= */ false); // If the value is within the bounds, load the j.l.Integer directly from the array. - uint32_t data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value(); - uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.cache)); - __ Ldr(temp, codegen_->DeduplicateBootImageAddressLiteral(data_offset + address)); + codegen_->LoadBootImageAddress(temp, info.array_data_boot_image_reference); codegen_->LoadFromShiftedRegOffset(DataType::Type::kReference, locations->Out(), temp, out); assembler->MaybeUnpoisonHeapReference(out); __ B(&done); __ Bind(&allocate); // Otherwise allocate and initialize a new j.l.Integer. - address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer)); - __ Ldr(argument, codegen_->DeduplicateBootImageAddressLiteral(address)); - codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc()); - CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); + codegen_->AllocateInstanceForIntrinsic(invoke->AsInvokeStaticOrDirect(), + info.integer_boot_image_offset); assembler->StoreToOffset(kStoreWord, in, out, info.value_offset); // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation // one. @@ -3353,7 +3045,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitThreadInterrupted(HInvoke* invoke) { vixl32::Register temp = temps.Acquire(); vixl32::Label done; vixl32::Label* const final_label = codegen_->GetFinalLabel(invoke, &done); - __ CompareAndBranchIfZero(out, final_label, /* far_target */ false); + __ CompareAndBranchIfZero(out, final_label, /* is_far_target= */ false); __ Dmb(vixl32::ISH); __ Mov(temp, 0); assembler->StoreToOffset(kStoreWord, temp, tr, offset); @@ -3375,6 +3067,9 @@ UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathRoundDouble) // Could be done by changing UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeCASLong) // High register pressure. UNIMPLEMENTED_INTRINSIC(ARMVIXL, SystemArrayCopyChar) UNIMPLEMENTED_INTRINSIC(ARMVIXL, ReferenceGetReferent) +UNIMPLEMENTED_INTRINSIC(ARMVIXL, CRC32Update) +UNIMPLEMENTED_INTRINSIC(ARMVIXL, CRC32UpdateBytes) +UNIMPLEMENTED_INTRINSIC(ARMVIXL, CRC32UpdateByteBuffer) UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringStringIndexOf); UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringStringIndexOfAfter); diff --git a/compiler/optimizing/intrinsics_arm_vixl.h b/compiler/optimizing/intrinsics_arm_vixl.h index 9c02d0a4ad..1fea776f0d 100644 --- a/compiler/optimizing/intrinsics_arm_vixl.h +++ b/compiler/optimizing/intrinsics_arm_vixl.h @@ -27,14 +27,14 @@ namespace arm { class ArmVIXLAssembler; class CodeGeneratorARMVIXL; -class IntrinsicLocationsBuilderARMVIXL FINAL : public IntrinsicVisitor { +class IntrinsicLocationsBuilderARMVIXL final : public IntrinsicVisitor { public: explicit IntrinsicLocationsBuilderARMVIXL(CodeGeneratorARMVIXL* codegen); // Define visitor methods. #define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ - void Visit ## Name(HInvoke* invoke) OVERRIDE; + void Visit ## Name(HInvoke* invoke) override; #include "intrinsics_list.h" INTRINSICS_LIST(OPTIMIZING_INTRINSICS) #undef INTRINSICS_LIST @@ -54,14 +54,14 @@ class IntrinsicLocationsBuilderARMVIXL FINAL : public IntrinsicVisitor { DISALLOW_COPY_AND_ASSIGN(IntrinsicLocationsBuilderARMVIXL); }; -class IntrinsicCodeGeneratorARMVIXL FINAL : public IntrinsicVisitor { +class IntrinsicCodeGeneratorARMVIXL final : public IntrinsicVisitor { public: explicit IntrinsicCodeGeneratorARMVIXL(CodeGeneratorARMVIXL* codegen) : codegen_(codegen) {} // Define visitor methods. #define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ - void Visit ## Name(HInvoke* invoke) OVERRIDE; + void Visit ## Name(HInvoke* invoke) override; #include "intrinsics_list.h" INTRINSICS_LIST(OPTIMIZING_INTRINSICS) #undef INTRINSICS_LIST diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc index b7936b9c8e..3da0e578bf 100644 --- a/compiler/optimizing/intrinsics_mips.cc +++ b/compiler/optimizing/intrinsics_mips.cc @@ -58,6 +58,10 @@ inline bool IntrinsicCodeGeneratorMIPS::Is32BitFPU() const { return codegen_->GetInstructionSetFeatures().Is32BitFloatingPoint(); } +inline bool IntrinsicCodeGeneratorMIPS::HasMsa() const { + return codegen_->GetInstructionSetFeatures().HasMsa(); +} + #define __ codegen->GetAssembler()-> static void MoveFromReturnRegister(Location trg, @@ -104,7 +108,7 @@ class IntrinsicSlowPathMIPS : public SlowPathCodeMIPS { public: explicit IntrinsicSlowPathMIPS(HInvoke* invoke) : SlowPathCodeMIPS(invoke), invoke_(invoke) { } - void EmitNativeCode(CodeGenerator* codegen_in) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen_in) override { CodeGeneratorMIPS* codegen = down_cast<CodeGeneratorMIPS*>(codegen_in); __ Bind(GetEntryLabel()); @@ -133,7 +137,7 @@ class IntrinsicSlowPathMIPS : public SlowPathCodeMIPS { __ B(GetExitLabel()); } - const char* GetDescription() const OVERRIDE { return "IntrinsicSlowPathMIPS"; } + const char* GetDescription() const override { return "IntrinsicSlowPathMIPS"; } private: // The instruction where this slow path is happening. @@ -181,7 +185,7 @@ void IntrinsicLocationsBuilderMIPS::VisitDoubleDoubleToRawLongBits(HInvoke* invo } void IntrinsicCodeGeneratorMIPS::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) { - MoveFPToInt(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); + MoveFPToInt(invoke->GetLocations(), /* is64bit= */ true, GetAssembler()); } // int java.lang.Float.floatToRawIntBits(float) @@ -190,7 +194,7 @@ void IntrinsicLocationsBuilderMIPS::VisitFloatFloatToRawIntBits(HInvoke* invoke) } void IntrinsicCodeGeneratorMIPS::VisitFloatFloatToRawIntBits(HInvoke* invoke) { - MoveFPToInt(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); + MoveFPToInt(invoke->GetLocations(), /* is64bit= */ false, GetAssembler()); } static void CreateIntToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) { @@ -222,7 +226,7 @@ void IntrinsicLocationsBuilderMIPS::VisitDoubleLongBitsToDouble(HInvoke* invoke) } void IntrinsicCodeGeneratorMIPS::VisitDoubleLongBitsToDouble(HInvoke* invoke) { - MoveIntToFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); + MoveIntToFP(invoke->GetLocations(), /* is64bit= */ true, GetAssembler()); } // float java.lang.Float.intBitsToFloat(int) @@ -231,7 +235,7 @@ void IntrinsicLocationsBuilderMIPS::VisitFloatIntBitsToFloat(HInvoke* invoke) { } void IntrinsicCodeGeneratorMIPS::VisitFloatIntBitsToFloat(HInvoke* invoke) { - MoveIntToFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); + MoveIntToFP(invoke->GetLocations(), /* is64bit= */ false, GetAssembler()); } static void CreateIntToIntLocations(ArenaAllocator* allocator, @@ -407,7 +411,7 @@ void IntrinsicCodeGeneratorMIPS::VisitIntegerReverseBytes(HInvoke* invoke) { DataType::Type::kInt32, IsR2OrNewer(), IsR6(), - /* reverseBits */ false, + /* reverseBits= */ false, GetAssembler()); } @@ -421,7 +425,7 @@ void IntrinsicCodeGeneratorMIPS::VisitLongReverseBytes(HInvoke* invoke) { DataType::Type::kInt64, IsR2OrNewer(), IsR6(), - /* reverseBits */ false, + /* reverseBits= */ false, GetAssembler()); } @@ -435,7 +439,7 @@ void IntrinsicCodeGeneratorMIPS::VisitShortReverseBytes(HInvoke* invoke) { DataType::Type::kInt16, IsR2OrNewer(), IsR6(), - /* reverseBits */ false, + /* reverseBits= */ false, GetAssembler()); } @@ -475,7 +479,7 @@ void IntrinsicLocationsBuilderMIPS::VisitIntegerNumberOfLeadingZeros(HInvoke* in } void IntrinsicCodeGeneratorMIPS::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) { - GenNumberOfLeadingZeroes(invoke->GetLocations(), /* is64bit */ false, IsR6(), GetAssembler()); + GenNumberOfLeadingZeroes(invoke->GetLocations(), /* is64bit= */ false, IsR6(), GetAssembler()); } // int java.lang.Long.numberOfLeadingZeros(long i) @@ -484,7 +488,7 @@ void IntrinsicLocationsBuilderMIPS::VisitLongNumberOfLeadingZeros(HInvoke* invok } void IntrinsicCodeGeneratorMIPS::VisitLongNumberOfLeadingZeros(HInvoke* invoke) { - GenNumberOfLeadingZeroes(invoke->GetLocations(), /* is64bit */ true, IsR6(), GetAssembler()); + GenNumberOfLeadingZeroes(invoke->GetLocations(), /* is64bit= */ true, IsR6(), GetAssembler()); } static void GenNumberOfTrailingZeroes(LocationSummary* locations, @@ -562,7 +566,7 @@ void IntrinsicLocationsBuilderMIPS::VisitIntegerNumberOfTrailingZeros(HInvoke* i } void IntrinsicCodeGeneratorMIPS::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) { - GenNumberOfTrailingZeroes(invoke->GetLocations(), /* is64bit */ false, IsR6(), GetAssembler()); + GenNumberOfTrailingZeroes(invoke->GetLocations(), /* is64bit= */ false, IsR6(), GetAssembler()); } // int java.lang.Long.numberOfTrailingZeros(long i) @@ -571,7 +575,7 @@ void IntrinsicLocationsBuilderMIPS::VisitLongNumberOfTrailingZeros(HInvoke* invo } void IntrinsicCodeGeneratorMIPS::VisitLongNumberOfTrailingZeros(HInvoke* invoke) { - GenNumberOfTrailingZeroes(invoke->GetLocations(), /* is64bit */ true, IsR6(), GetAssembler()); + GenNumberOfTrailingZeroes(invoke->GetLocations(), /* is64bit= */ true, IsR6(), GetAssembler()); } // int java.lang.Integer.reverse(int) @@ -584,7 +588,7 @@ void IntrinsicCodeGeneratorMIPS::VisitIntegerReverse(HInvoke* invoke) { DataType::Type::kInt32, IsR2OrNewer(), IsR6(), - /* reverseBits */ true, + /* reverseBits= */ true, GetAssembler()); } @@ -598,7 +602,7 @@ void IntrinsicCodeGeneratorMIPS::VisitLongReverse(HInvoke* invoke) { DataType::Type::kInt64, IsR2OrNewer(), IsR6(), - /* reverseBits */ true, + /* reverseBits= */ true, GetAssembler()); } @@ -612,6 +616,7 @@ static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) { static void GenBitCount(LocationSummary* locations, DataType::Type type, bool isR6, + bool hasMsa, MipsAssembler* assembler) { Register out = locations->Out().AsRegister<Register>(); @@ -637,85 +642,102 @@ static void GenBitCount(LocationSummary* locations, // instructions compared to a loop-based algorithm which required 47 // instructions. - if (type == DataType::Type::kInt32) { - Register in = locations->InAt(0).AsRegister<Register>(); - - __ Srl(TMP, in, 1); - __ LoadConst32(AT, 0x55555555); - __ And(TMP, TMP, AT); - __ Subu(TMP, in, TMP); - __ LoadConst32(AT, 0x33333333); - __ And(out, TMP, AT); - __ Srl(TMP, TMP, 2); - __ And(TMP, TMP, AT); - __ Addu(TMP, out, TMP); - __ Srl(out, TMP, 4); - __ Addu(out, out, TMP); - __ LoadConst32(AT, 0x0F0F0F0F); - __ And(out, out, AT); - __ LoadConst32(TMP, 0x01010101); - if (isR6) { - __ MulR6(out, out, TMP); + if (hasMsa) { + if (type == DataType::Type::kInt32) { + Register in = locations->InAt(0).AsRegister<Register>(); + __ Mtc1(in, FTMP); + __ PcntW(static_cast<VectorRegister>(FTMP), static_cast<VectorRegister>(FTMP)); + __ Mfc1(out, FTMP); } else { - __ MulR2(out, out, TMP); + DCHECK_EQ(type, DataType::Type::kInt64); + Register in_lo = locations->InAt(0).AsRegisterPairLow<Register>(); + Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>(); + __ Mtc1(in_lo, FTMP); + __ Mthc1(in_hi, FTMP); + __ PcntD(static_cast<VectorRegister>(FTMP), static_cast<VectorRegister>(FTMP)); + __ Mfc1(out, FTMP); } - __ Srl(out, out, 24); } else { - DCHECK_EQ(type, DataType::Type::kInt64); - Register in_lo = locations->InAt(0).AsRegisterPairLow<Register>(); - Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>(); - Register tmp_hi = locations->GetTemp(0).AsRegister<Register>(); - Register out_hi = locations->GetTemp(1).AsRegister<Register>(); - Register tmp_lo = TMP; - Register out_lo = out; + if (type == DataType::Type::kInt32) { + Register in = locations->InAt(0).AsRegister<Register>(); - __ Srl(tmp_lo, in_lo, 1); - __ Srl(tmp_hi, in_hi, 1); + __ Srl(TMP, in, 1); + __ LoadConst32(AT, 0x55555555); + __ And(TMP, TMP, AT); + __ Subu(TMP, in, TMP); + __ LoadConst32(AT, 0x33333333); + __ And(out, TMP, AT); + __ Srl(TMP, TMP, 2); + __ And(TMP, TMP, AT); + __ Addu(TMP, out, TMP); + __ Srl(out, TMP, 4); + __ Addu(out, out, TMP); + __ LoadConst32(AT, 0x0F0F0F0F); + __ And(out, out, AT); + __ LoadConst32(TMP, 0x01010101); + if (isR6) { + __ MulR6(out, out, TMP); + } else { + __ MulR2(out, out, TMP); + } + __ Srl(out, out, 24); + } else { + DCHECK_EQ(type, DataType::Type::kInt64); + Register in_lo = locations->InAt(0).AsRegisterPairLow<Register>(); + Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>(); + Register tmp_hi = locations->GetTemp(0).AsRegister<Register>(); + Register out_hi = locations->GetTemp(1).AsRegister<Register>(); + Register tmp_lo = TMP; + Register out_lo = out; - __ LoadConst32(AT, 0x55555555); + __ Srl(tmp_lo, in_lo, 1); + __ Srl(tmp_hi, in_hi, 1); - __ And(tmp_lo, tmp_lo, AT); - __ Subu(tmp_lo, in_lo, tmp_lo); + __ LoadConst32(AT, 0x55555555); - __ And(tmp_hi, tmp_hi, AT); - __ Subu(tmp_hi, in_hi, tmp_hi); + __ And(tmp_lo, tmp_lo, AT); + __ Subu(tmp_lo, in_lo, tmp_lo); - __ LoadConst32(AT, 0x33333333); + __ And(tmp_hi, tmp_hi, AT); + __ Subu(tmp_hi, in_hi, tmp_hi); - __ And(out_lo, tmp_lo, AT); - __ Srl(tmp_lo, tmp_lo, 2); - __ And(tmp_lo, tmp_lo, AT); - __ Addu(tmp_lo, out_lo, tmp_lo); + __ LoadConst32(AT, 0x33333333); - __ And(out_hi, tmp_hi, AT); - __ Srl(tmp_hi, tmp_hi, 2); - __ And(tmp_hi, tmp_hi, AT); - __ Addu(tmp_hi, out_hi, tmp_hi); + __ And(out_lo, tmp_lo, AT); + __ Srl(tmp_lo, tmp_lo, 2); + __ And(tmp_lo, tmp_lo, AT); + __ Addu(tmp_lo, out_lo, tmp_lo); - // Here we deviate from the original algorithm a bit. We've reached - // the stage where the bitfields holding the subtotals are large - // enough to hold the combined subtotals for both the low word, and - // the high word. This means that we can add the subtotals for the - // the high, and low words into a single word, and compute the final - // result for both the high, and low words using fewer instructions. - __ LoadConst32(AT, 0x0F0F0F0F); + __ And(out_hi, tmp_hi, AT); + __ Srl(tmp_hi, tmp_hi, 2); + __ And(tmp_hi, tmp_hi, AT); + __ Addu(tmp_hi, out_hi, tmp_hi); - __ Addu(TMP, tmp_hi, tmp_lo); + // Here we deviate from the original algorithm a bit. We've reached + // the stage where the bitfields holding the subtotals are large + // enough to hold the combined subtotals for both the low word, and + // the high word. This means that we can add the subtotals for the + // the high, and low words into a single word, and compute the final + // result for both the high, and low words using fewer instructions. + __ LoadConst32(AT, 0x0F0F0F0F); - __ Srl(out, TMP, 4); - __ And(out, out, AT); - __ And(TMP, TMP, AT); - __ Addu(out, out, TMP); + __ Addu(TMP, tmp_hi, tmp_lo); - __ LoadConst32(AT, 0x01010101); + __ Srl(out, TMP, 4); + __ And(out, out, AT); + __ And(TMP, TMP, AT); + __ Addu(out, out, TMP); - if (isR6) { - __ MulR6(out, out, AT); - } else { - __ MulR2(out, out, AT); - } + __ LoadConst32(AT, 0x01010101); - __ Srl(out, out, 24); + if (isR6) { + __ MulR6(out, out, AT); + } else { + __ MulR2(out, out, AT); + } + + __ Srl(out, out, 24); + } } } @@ -725,7 +747,7 @@ void IntrinsicLocationsBuilderMIPS::VisitIntegerBitCount(HInvoke* invoke) { } void IntrinsicCodeGeneratorMIPS::VisitIntegerBitCount(HInvoke* invoke) { - GenBitCount(invoke->GetLocations(), DataType::Type::kInt32, IsR6(), GetAssembler()); + GenBitCount(invoke->GetLocations(), DataType::Type::kInt32, IsR6(), HasMsa(), GetAssembler()); } // int java.lang.Long.bitCount(int) @@ -739,575 +761,7 @@ void IntrinsicLocationsBuilderMIPS::VisitLongBitCount(HInvoke* invoke) { } void IntrinsicCodeGeneratorMIPS::VisitLongBitCount(HInvoke* invoke) { - GenBitCount(invoke->GetLocations(), DataType::Type::kInt64, IsR6(), GetAssembler()); -} - -static void MathAbsFP(LocationSummary* locations, - bool is64bit, - bool isR2OrNewer, - bool isR6, - MipsAssembler* assembler) { - FRegister in = locations->InAt(0).AsFpuRegister<FRegister>(); - FRegister out = locations->Out().AsFpuRegister<FRegister>(); - - // Note, as a "quality of implementation", rather than pure "spec compliance", we require that - // Math.abs() clears the sign bit (but changes nothing else) for all numbers, including NaN - // (signaling NaN may become quiet though). - // - // The ABS.fmt instructions (abs.s and abs.d) do exactly that when NAN2008=1 (R6). For this case, - // both regular floating point numbers and NAN values are treated alike, only the sign bit is - // affected by this instruction. - // But when NAN2008=0 (R2 and before), the ABS.fmt instructions can't be used. For this case, any - // NaN operand signals invalid operation. This means that other bits (not just sign bit) might be - // changed when doing abs(NaN). Because of that, we clear sign bit in a different way. - if (isR6) { - if (is64bit) { - __ AbsD(out, in); - } else { - __ AbsS(out, in); - } - } else { - if (is64bit) { - if (in != out) { - __ MovD(out, in); - } - __ MoveFromFpuHigh(TMP, in); - // ins instruction is not available for R1. - if (isR2OrNewer) { - __ Ins(TMP, ZERO, 31, 1); - } else { - __ Sll(TMP, TMP, 1); - __ Srl(TMP, TMP, 1); - } - __ MoveToFpuHigh(TMP, out); - } else { - __ Mfc1(TMP, in); - // ins instruction is not available for R1. - if (isR2OrNewer) { - __ Ins(TMP, ZERO, 31, 1); - } else { - __ Sll(TMP, TMP, 1); - __ Srl(TMP, TMP, 1); - } - __ Mtc1(TMP, out); - } - } -} - -// double java.lang.Math.abs(double) -void IntrinsicLocationsBuilderMIPS::VisitMathAbsDouble(HInvoke* invoke) { - CreateFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitMathAbsDouble(HInvoke* invoke) { - MathAbsFP(invoke->GetLocations(), /* is64bit */ true, IsR2OrNewer(), IsR6(), GetAssembler()); -} - -// float java.lang.Math.abs(float) -void IntrinsicLocationsBuilderMIPS::VisitMathAbsFloat(HInvoke* invoke) { - CreateFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitMathAbsFloat(HInvoke* invoke) { - MathAbsFP(invoke->GetLocations(), /* is64bit */ false, IsR2OrNewer(), IsR6(), GetAssembler()); -} - -static void GenAbsInteger(LocationSummary* locations, bool is64bit, MipsAssembler* assembler) { - if (is64bit) { - Register in_lo = locations->InAt(0).AsRegisterPairLow<Register>(); - Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>(); - Register out_lo = locations->Out().AsRegisterPairLow<Register>(); - Register out_hi = locations->Out().AsRegisterPairHigh<Register>(); - - // The comments in this section show the analogous operations which would - // be performed if we had 64-bit registers "in", and "out". - // __ Dsra32(AT, in, 31); - __ Sra(AT, in_hi, 31); - // __ Xor(out, in, AT); - __ Xor(TMP, in_lo, AT); - __ Xor(out_hi, in_hi, AT); - // __ Dsubu(out, out, AT); - __ Subu(out_lo, TMP, AT); - __ Sltu(TMP, out_lo, TMP); - __ Addu(out_hi, out_hi, TMP); - } else { - Register in = locations->InAt(0).AsRegister<Register>(); - Register out = locations->Out().AsRegister<Register>(); - - __ Sra(AT, in, 31); - __ Xor(out, in, AT); - __ Subu(out, out, AT); - } -} - -// int java.lang.Math.abs(int) -void IntrinsicLocationsBuilderMIPS::VisitMathAbsInt(HInvoke* invoke) { - CreateIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitMathAbsInt(HInvoke* invoke) { - GenAbsInteger(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); -} - -// long java.lang.Math.abs(long) -void IntrinsicLocationsBuilderMIPS::VisitMathAbsLong(HInvoke* invoke) { - CreateIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitMathAbsLong(HInvoke* invoke) { - GenAbsInteger(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); -} - -static void GenMinMaxFP(LocationSummary* locations, - bool is_min, - DataType::Type type, - bool is_R6, - MipsAssembler* assembler) { - FRegister out = locations->Out().AsFpuRegister<FRegister>(); - FRegister a = locations->InAt(0).AsFpuRegister<FRegister>(); - FRegister b = locations->InAt(1).AsFpuRegister<FRegister>(); - - if (is_R6) { - MipsLabel noNaNs; - MipsLabel done; - FRegister ftmp = ((out != a) && (out != b)) ? out : FTMP; - - // When Java computes min/max it prefers a NaN to a number; the - // behavior of MIPSR6 is to prefer numbers to NaNs, i.e., if one of - // the inputs is a NaN and the other is a valid number, the MIPS - // instruction will return the number; Java wants the NaN value - // returned. This is why there is extra logic preceding the use of - // the MIPS min.fmt/max.fmt instructions. If either a, or b holds a - // NaN, return the NaN, otherwise return the min/max. - if (type == DataType::Type::kFloat64) { - __ CmpUnD(FTMP, a, b); - __ Bc1eqz(FTMP, &noNaNs); - - // One of the inputs is a NaN - __ CmpEqD(ftmp, a, a); - // If a == a then b is the NaN, otherwise a is the NaN. - __ SelD(ftmp, a, b); - - if (ftmp != out) { - __ MovD(out, ftmp); - } - - __ B(&done); - - __ Bind(&noNaNs); - - if (is_min) { - __ MinD(out, a, b); - } else { - __ MaxD(out, a, b); - } - } else { - DCHECK_EQ(type, DataType::Type::kFloat32); - __ CmpUnS(FTMP, a, b); - __ Bc1eqz(FTMP, &noNaNs); - - // One of the inputs is a NaN - __ CmpEqS(ftmp, a, a); - // If a == a then b is the NaN, otherwise a is the NaN. - __ SelS(ftmp, a, b); - - if (ftmp != out) { - __ MovS(out, ftmp); - } - - __ B(&done); - - __ Bind(&noNaNs); - - if (is_min) { - __ MinS(out, a, b); - } else { - __ MaxS(out, a, b); - } - } - - __ Bind(&done); - } else { - MipsLabel ordered; - MipsLabel compare; - MipsLabel select; - MipsLabel done; - - if (type == DataType::Type::kFloat64) { - __ CunD(a, b); - } else { - DCHECK_EQ(type, DataType::Type::kFloat32); - __ CunS(a, b); - } - __ Bc1f(&ordered); - - // a or b (or both) is a NaN. Return one, which is a NaN. - if (type == DataType::Type::kFloat64) { - __ CeqD(b, b); - } else { - __ CeqS(b, b); - } - __ B(&select); - - __ Bind(&ordered); - - // Neither is a NaN. - // a == b? (-0.0 compares equal with +0.0) - // If equal, handle zeroes, else compare further. - if (type == DataType::Type::kFloat64) { - __ CeqD(a, b); - } else { - __ CeqS(a, b); - } - __ Bc1f(&compare); - - // a == b either bit for bit or one is -0.0 and the other is +0.0. - if (type == DataType::Type::kFloat64) { - __ MoveFromFpuHigh(TMP, a); - __ MoveFromFpuHigh(AT, b); - } else { - __ Mfc1(TMP, a); - __ Mfc1(AT, b); - } - - if (is_min) { - // -0.0 prevails over +0.0. - __ Or(TMP, TMP, AT); - } else { - // +0.0 prevails over -0.0. - __ And(TMP, TMP, AT); - } - - if (type == DataType::Type::kFloat64) { - __ Mfc1(AT, a); - __ Mtc1(AT, out); - __ MoveToFpuHigh(TMP, out); - } else { - __ Mtc1(TMP, out); - } - __ B(&done); - - __ Bind(&compare); - - if (type == DataType::Type::kFloat64) { - if (is_min) { - // return (a <= b) ? a : b; - __ ColeD(a, b); - } else { - // return (a >= b) ? a : b; - __ ColeD(b, a); // b <= a - } - } else { - if (is_min) { - // return (a <= b) ? a : b; - __ ColeS(a, b); - } else { - // return (a >= b) ? a : b; - __ ColeS(b, a); // b <= a - } - } - - __ Bind(&select); - - if (type == DataType::Type::kFloat64) { - __ MovtD(out, a); - __ MovfD(out, b); - } else { - __ MovtS(out, a); - __ MovfS(out, b); - } - - __ Bind(&done); - } -} - -static void CreateFPFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::RequiresFpuRegister()); - locations->SetOut(Location::RequiresFpuRegister(), Location::kOutputOverlap); -} - -// double java.lang.Math.min(double, double) -void IntrinsicLocationsBuilderMIPS::VisitMathMinDoubleDouble(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitMathMinDoubleDouble(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), - /* is_min */ true, - DataType::Type::kFloat64, - IsR6(), - GetAssembler()); -} - -// float java.lang.Math.min(float, float) -void IntrinsicLocationsBuilderMIPS::VisitMathMinFloatFloat(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitMathMinFloatFloat(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), - /* is_min */ true, - DataType::Type::kFloat32, - IsR6(), - GetAssembler()); -} - -// double java.lang.Math.max(double, double) -void IntrinsicLocationsBuilderMIPS::VisitMathMaxDoubleDouble(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitMathMaxDoubleDouble(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), - /* is_min */ false, - DataType::Type::kFloat64, - IsR6(), - GetAssembler()); -} - -// float java.lang.Math.max(float, float) -void IntrinsicLocationsBuilderMIPS::VisitMathMaxFloatFloat(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitMathMaxFloatFloat(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), - /* is_min */ false, - DataType::Type::kFloat32, - IsR6(), - GetAssembler()); -} - -static void CreateIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); -} - -static void GenMinMax(LocationSummary* locations, - bool is_min, - DataType::Type type, - bool is_R6, - MipsAssembler* assembler) { - if (is_R6) { - // Some architectures, such as ARM and MIPS (prior to r6), have a - // conditional move instruction which only changes the target - // (output) register if the condition is true (MIPS prior to r6 had - // MOVF, MOVT, MOVN, and MOVZ). The SELEQZ and SELNEZ instructions - // always change the target (output) register. If the condition is - // true the output register gets the contents of the "rs" register; - // otherwise, the output register is set to zero. One consequence - // of this is that to implement something like "rd = c==0 ? rs : rt" - // MIPS64r6 needs to use a pair of SELEQZ/SELNEZ instructions. - // After executing this pair of instructions one of the output - // registers from the pair will necessarily contain zero. Then the - // code ORs the output registers from the SELEQZ/SELNEZ instructions - // to get the final result. - // - // The initial test to see if the output register is same as the - // first input register is needed to make sure that value in the - // first input register isn't clobbered before we've finished - // computing the output value. The logic in the corresponding else - // clause performs the same task but makes sure the second input - // register isn't clobbered in the event that it's the same register - // as the output register; the else clause also handles the case - // where the output register is distinct from both the first, and the - // second input registers. - if (type == DataType::Type::kInt64) { - Register a_lo = locations->InAt(0).AsRegisterPairLow<Register>(); - Register a_hi = locations->InAt(0).AsRegisterPairHigh<Register>(); - Register b_lo = locations->InAt(1).AsRegisterPairLow<Register>(); - Register b_hi = locations->InAt(1).AsRegisterPairHigh<Register>(); - Register out_lo = locations->Out().AsRegisterPairLow<Register>(); - Register out_hi = locations->Out().AsRegisterPairHigh<Register>(); - - MipsLabel compare_done; - - if (a_lo == b_lo) { - if (out_lo != a_lo) { - __ Move(out_lo, a_lo); - __ Move(out_hi, a_hi); - } - } else { - __ Slt(TMP, b_hi, a_hi); - __ Bne(b_hi, a_hi, &compare_done); - - __ Sltu(TMP, b_lo, a_lo); - - __ Bind(&compare_done); - - if (is_min) { - __ Seleqz(AT, a_lo, TMP); - __ Selnez(out_lo, b_lo, TMP); // Safe even if out_lo == a_lo/b_lo - // because at this point we're - // done using a_lo/b_lo. - } else { - __ Selnez(AT, a_lo, TMP); - __ Seleqz(out_lo, b_lo, TMP); // ditto - } - __ Or(out_lo, out_lo, AT); - if (is_min) { - __ Seleqz(AT, a_hi, TMP); - __ Selnez(out_hi, b_hi, TMP); // ditto but for out_hi & a_hi/b_hi - } else { - __ Selnez(AT, a_hi, TMP); - __ Seleqz(out_hi, b_hi, TMP); // ditto but for out_hi & a_hi/b_hi - } - __ Or(out_hi, out_hi, AT); - } - } else { - DCHECK_EQ(type, DataType::Type::kInt32); - Register a = locations->InAt(0).AsRegister<Register>(); - Register b = locations->InAt(1).AsRegister<Register>(); - Register out = locations->Out().AsRegister<Register>(); - - if (a == b) { - if (out != a) { - __ Move(out, a); - } - } else { - __ Slt(AT, b, a); - if (is_min) { - __ Seleqz(TMP, a, AT); - __ Selnez(AT, b, AT); - } else { - __ Selnez(TMP, a, AT); - __ Seleqz(AT, b, AT); - } - __ Or(out, TMP, AT); - } - } - } else { - if (type == DataType::Type::kInt64) { - Register a_lo = locations->InAt(0).AsRegisterPairLow<Register>(); - Register a_hi = locations->InAt(0).AsRegisterPairHigh<Register>(); - Register b_lo = locations->InAt(1).AsRegisterPairLow<Register>(); - Register b_hi = locations->InAt(1).AsRegisterPairHigh<Register>(); - Register out_lo = locations->Out().AsRegisterPairLow<Register>(); - Register out_hi = locations->Out().AsRegisterPairHigh<Register>(); - - MipsLabel compare_done; - - if (a_lo == b_lo) { - if (out_lo != a_lo) { - __ Move(out_lo, a_lo); - __ Move(out_hi, a_hi); - } - } else { - __ Slt(TMP, a_hi, b_hi); - __ Bne(a_hi, b_hi, &compare_done); - - __ Sltu(TMP, a_lo, b_lo); - - __ Bind(&compare_done); - - if (is_min) { - if (out_lo != a_lo) { - __ Movn(out_hi, a_hi, TMP); - __ Movn(out_lo, a_lo, TMP); - } - if (out_lo != b_lo) { - __ Movz(out_hi, b_hi, TMP); - __ Movz(out_lo, b_lo, TMP); - } - } else { - if (out_lo != a_lo) { - __ Movz(out_hi, a_hi, TMP); - __ Movz(out_lo, a_lo, TMP); - } - if (out_lo != b_lo) { - __ Movn(out_hi, b_hi, TMP); - __ Movn(out_lo, b_lo, TMP); - } - } - } - } else { - DCHECK_EQ(type, DataType::Type::kInt32); - Register a = locations->InAt(0).AsRegister<Register>(); - Register b = locations->InAt(1).AsRegister<Register>(); - Register out = locations->Out().AsRegister<Register>(); - - if (a == b) { - if (out != a) { - __ Move(out, a); - } - } else { - __ Slt(AT, a, b); - if (is_min) { - if (out != a) { - __ Movn(out, a, AT); - } - if (out != b) { - __ Movz(out, b, AT); - } - } else { - if (out != a) { - __ Movz(out, a, AT); - } - if (out != b) { - __ Movn(out, b, AT); - } - } - } - } - } -} - -// int java.lang.Math.min(int, int) -void IntrinsicLocationsBuilderMIPS::VisitMathMinIntInt(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitMathMinIntInt(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), - /* is_min */ true, - DataType::Type::kInt32, - IsR6(), - GetAssembler()); -} - -// long java.lang.Math.min(long, long) -void IntrinsicLocationsBuilderMIPS::VisitMathMinLongLong(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitMathMinLongLong(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), - /* is_min */ true, - DataType::Type::kInt64, - IsR6(), - GetAssembler()); -} - -// int java.lang.Math.max(int, int) -void IntrinsicLocationsBuilderMIPS::VisitMathMaxIntInt(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitMathMaxIntInt(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), - /* is_min */ false, - DataType::Type::kInt32, - IsR6(), - GetAssembler()); -} - -// long java.lang.Math.max(long, long) -void IntrinsicLocationsBuilderMIPS::VisitMathMaxLongLong(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitMathMaxLongLong(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), - /* is_min */ false, - DataType::Type::kInt64, - IsR6(), - GetAssembler()); + GenBitCount(invoke->GetLocations(), DataType::Type::kInt64, IsR6(), HasMsa(), GetAssembler()); } // double java.lang.Math.sqrt(double) @@ -1601,11 +1055,11 @@ static void GenUnsafeGet(HInvoke* invoke, codegen->GenerateReferenceLoadWithBakerReadBarrier(invoke, trg_loc, base, - /* offset */ 0U, - /* index */ offset_loc, + /* offset= */ 0U, + /* index= */ offset_loc, TIMES_1, temp, - /* needs_null_check */ false); + /* needs_null_check= */ false); if (is_volatile) { __ Sync(0); } @@ -1623,8 +1077,8 @@ static void GenUnsafeGet(HInvoke* invoke, trg_loc, trg_loc, base_loc, - /* offset */ 0U, - /* index */ offset_loc); + /* offset= */ 0U, + /* index= */ offset_loc); } } else { if (is_R6) { @@ -1653,7 +1107,7 @@ void IntrinsicLocationsBuilderMIPS::VisitUnsafeGet(HInvoke* invoke) { } void IntrinsicCodeGeneratorMIPS::VisitUnsafeGet(HInvoke* invoke) { - GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile */ false, IsR6(), codegen_); + GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile= */ false, IsR6(), codegen_); } // int sun.misc.Unsafe.getIntVolatile(Object o, long offset) @@ -1662,7 +1116,7 @@ void IntrinsicLocationsBuilderMIPS::VisitUnsafeGetVolatile(HInvoke* invoke) { } void IntrinsicCodeGeneratorMIPS::VisitUnsafeGetVolatile(HInvoke* invoke) { - GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile */ true, IsR6(), codegen_); + GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile= */ true, IsR6(), codegen_); } // long sun.misc.Unsafe.getLong(Object o, long offset) @@ -1671,7 +1125,7 @@ void IntrinsicLocationsBuilderMIPS::VisitUnsafeGetLong(HInvoke* invoke) { } void IntrinsicCodeGeneratorMIPS::VisitUnsafeGetLong(HInvoke* invoke) { - GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile */ false, IsR6(), codegen_); + GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile= */ false, IsR6(), codegen_); } // Object sun.misc.Unsafe.getObject(Object o, long offset) @@ -1680,7 +1134,7 @@ void IntrinsicLocationsBuilderMIPS::VisitUnsafeGetObject(HInvoke* invoke) { } void IntrinsicCodeGeneratorMIPS::VisitUnsafeGetObject(HInvoke* invoke) { - GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile */ false, IsR6(), codegen_); + GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile= */ false, IsR6(), codegen_); } // Object sun.misc.Unsafe.getObjectVolatile(Object o, long offset) @@ -1689,7 +1143,7 @@ void IntrinsicLocationsBuilderMIPS::VisitUnsafeGetObjectVolatile(HInvoke* invoke } void IntrinsicCodeGeneratorMIPS::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { - GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile */ true, IsR6(), codegen_); + GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile= */ true, IsR6(), codegen_); } static void CreateIntIntIntIntToVoidLocations(ArenaAllocator* allocator, HInvoke* invoke) { @@ -1771,8 +1225,8 @@ void IntrinsicLocationsBuilderMIPS::VisitUnsafePut(HInvoke* invoke) { void IntrinsicCodeGeneratorMIPS::VisitUnsafePut(HInvoke* invoke) { GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, - /* is_volatile */ false, - /* is_ordered */ false, + /* is_volatile= */ false, + /* is_ordered= */ false, IsR6(), codegen_); } @@ -1785,8 +1239,8 @@ void IntrinsicLocationsBuilderMIPS::VisitUnsafePutOrdered(HInvoke* invoke) { void IntrinsicCodeGeneratorMIPS::VisitUnsafePutOrdered(HInvoke* invoke) { GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, - /* is_volatile */ false, - /* is_ordered */ true, + /* is_volatile= */ false, + /* is_ordered= */ true, IsR6(), codegen_); } @@ -1799,8 +1253,8 @@ void IntrinsicLocationsBuilderMIPS::VisitUnsafePutVolatile(HInvoke* invoke) { void IntrinsicCodeGeneratorMIPS::VisitUnsafePutVolatile(HInvoke* invoke) { GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, - /* is_volatile */ true, - /* is_ordered */ false, + /* is_volatile= */ true, + /* is_ordered= */ false, IsR6(), codegen_); } @@ -1813,8 +1267,8 @@ void IntrinsicLocationsBuilderMIPS::VisitUnsafePutObject(HInvoke* invoke) { void IntrinsicCodeGeneratorMIPS::VisitUnsafePutObject(HInvoke* invoke) { GenUnsafePut(invoke->GetLocations(), DataType::Type::kReference, - /* is_volatile */ false, - /* is_ordered */ false, + /* is_volatile= */ false, + /* is_ordered= */ false, IsR6(), codegen_); } @@ -1827,8 +1281,8 @@ void IntrinsicLocationsBuilderMIPS::VisitUnsafePutObjectOrdered(HInvoke* invoke) void IntrinsicCodeGeneratorMIPS::VisitUnsafePutObjectOrdered(HInvoke* invoke) { GenUnsafePut(invoke->GetLocations(), DataType::Type::kReference, - /* is_volatile */ false, - /* is_ordered */ true, + /* is_volatile= */ false, + /* is_ordered= */ true, IsR6(), codegen_); } @@ -1841,8 +1295,8 @@ void IntrinsicLocationsBuilderMIPS::VisitUnsafePutObjectVolatile(HInvoke* invoke void IntrinsicCodeGeneratorMIPS::VisitUnsafePutObjectVolatile(HInvoke* invoke) { GenUnsafePut(invoke->GetLocations(), DataType::Type::kReference, - /* is_volatile */ true, - /* is_ordered */ false, + /* is_volatile= */ true, + /* is_ordered= */ false, IsR6(), codegen_); } @@ -1855,8 +1309,8 @@ void IntrinsicLocationsBuilderMIPS::VisitUnsafePutLong(HInvoke* invoke) { void IntrinsicCodeGeneratorMIPS::VisitUnsafePutLong(HInvoke* invoke) { GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, - /* is_volatile */ false, - /* is_ordered */ false, + /* is_volatile= */ false, + /* is_ordered= */ false, IsR6(), codegen_); } @@ -1869,8 +1323,8 @@ void IntrinsicLocationsBuilderMIPS::VisitUnsafePutLongOrdered(HInvoke* invoke) { void IntrinsicCodeGeneratorMIPS::VisitUnsafePutLongOrdered(HInvoke* invoke) { GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, - /* is_volatile */ false, - /* is_ordered */ true, + /* is_volatile= */ false, + /* is_ordered= */ true, IsR6(), codegen_); } @@ -1934,12 +1388,12 @@ static void GenCas(HInvoke* invoke, DataType::Type type, CodeGeneratorMIPS* code invoke, out_loc, // Unused, used only as a "temporary" within the read barrier. base, - /* offset */ 0u, - /* index */ offset_loc, + /* offset= */ 0u, + /* index= */ offset_loc, ScaleFactor::TIMES_1, temp, - /* needs_null_check */ false, - /* always_update_field */ true); + /* needs_null_check= */ false, + /* always_update_field= */ true); } } @@ -2062,13 +1516,6 @@ void IntrinsicCodeGeneratorMIPS::VisitStringCompareTo(HInvoke* invoke) { // boolean java.lang.String.equals(Object anObject) void IntrinsicLocationsBuilderMIPS::VisitStringEquals(HInvoke* invoke) { - if (kEmitCompilerReadBarrier && - !StringEqualsOptimizations(invoke).GetArgumentIsString() && - !StringEqualsOptimizations(invoke).GetNoReadBarrierForStringClass()) { - // No support for this odd case (String class is moveable, not in the boot image). - return; - } - LocationSummary* locations = new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresRegister()); @@ -2128,8 +1575,16 @@ void IntrinsicCodeGeneratorMIPS::VisitStringEquals(HInvoke* invoke) { // All string objects must have the same type since String cannot be subclassed. // Receiver must be a string object, so its class field is equal to all strings' class fields. // If the argument is a string object, its class field must be equal to receiver's class field. + // + // As the String class is expected to be non-movable, we can read the class + // field from String.equals' arguments without read barriers. + AssertNonMovableStringClass(); + // /* HeapReference<Class> */ temp1 = str->klass_ __ Lw(temp1, str, class_offset); + // /* HeapReference<Class> */ temp2 = arg->klass_ __ Lw(temp2, arg, class_offset); + // Also, because we use the previously loaded class references only in the + // following comparison, we don't need to unpoison them. __ Bne(temp1, temp2, &return_false); } @@ -2259,7 +1714,7 @@ void IntrinsicLocationsBuilderMIPS::VisitStringIndexOf(HInvoke* invoke) { } void IntrinsicCodeGeneratorMIPS::VisitStringIndexOf(HInvoke* invoke) { - GenerateStringIndexOf(invoke, /* start_at_zero */ true, GetAssembler(), codegen_); + GenerateStringIndexOf(invoke, /* start_at_zero= */ true, GetAssembler(), codegen_); } // int java.lang.String.indexOf(int ch, int fromIndex) @@ -2280,7 +1735,7 @@ void IntrinsicLocationsBuilderMIPS::VisitStringIndexOfAfter(HInvoke* invoke) { } void IntrinsicCodeGeneratorMIPS::VisitStringIndexOfAfter(HInvoke* invoke) { - GenerateStringIndexOf(invoke, /* start_at_zero */ false, GetAssembler(), codegen_); + GenerateStringIndexOf(invoke, /* start_at_zero= */ false, GetAssembler(), codegen_); } // java.lang.StringFactory.newStringFromBytes(byte[] data, int high, int offset, int byteCount) @@ -3147,59 +2602,50 @@ void IntrinsicLocationsBuilderMIPS::VisitIntegerValueOf(HInvoke* invoke) { } void IntrinsicCodeGeneratorMIPS::VisitIntegerValueOf(HInvoke* invoke) { - IntrinsicVisitor::IntegerValueOfInfo info = IntrinsicVisitor::ComputeIntegerValueOfInfo(); + IntrinsicVisitor::IntegerValueOfInfo info = + IntrinsicVisitor::ComputeIntegerValueOfInfo(invoke, codegen_->GetCompilerOptions()); LocationSummary* locations = invoke->GetLocations(); MipsAssembler* assembler = GetAssembler(); InstructionCodeGeneratorMIPS* icodegen = down_cast<InstructionCodeGeneratorMIPS*>(codegen_->GetInstructionVisitor()); Register out = locations->Out().AsRegister<Register>(); - InvokeRuntimeCallingConvention calling_convention; if (invoke->InputAt(0)->IsConstant()) { int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue(); - if (value >= info.low && value <= info.high) { + if (static_cast<uint32_t>(value - info.low) < info.length) { // Just embed the j.l.Integer in the code. - ScopedObjectAccess soa(Thread::Current()); - mirror::Object* boxed = info.cache->Get(value + (-info.low)); - DCHECK(boxed != nullptr && Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(boxed)); - uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(boxed)); - __ LoadConst32(out, address); + DCHECK_NE(info.value_boot_image_reference, IntegerValueOfInfo::kInvalidReference); + codegen_->LoadBootImageAddress(out, info.value_boot_image_reference); } else { + DCHECK(locations->CanCall()); // Allocate and initialize a new j.l.Integer. // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the // JIT object table. - uint32_t address = - dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer)); - __ LoadConst32(calling_convention.GetRegisterAt(0), address); - codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc()); - CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); + codegen_->AllocateInstanceForIntrinsic(invoke->AsInvokeStaticOrDirect(), + info.integer_boot_image_offset); __ StoreConstToOffset(kStoreWord, value, out, info.value_offset, TMP); // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation // one. icodegen->GenerateMemoryBarrier(MemBarrierKind::kStoreStore); } } else { + DCHECK(locations->CanCall()); Register in = locations->InAt(0).AsRegister<Register>(); MipsLabel allocate, done; - int32_t count = static_cast<uint32_t>(info.high) - info.low + 1; - // Is (info.low <= in) && (in <= info.high)? __ Addiu32(out, in, -info.low); - // As unsigned quantities is out < (info.high - info.low + 1)? - if (IsInt<16>(count)) { - __ Sltiu(AT, out, count); + // As unsigned quantities is out < info.length ? + if (IsUint<15>(info.length)) { + __ Sltiu(AT, out, info.length); } else { - __ LoadConst32(AT, count); + __ LoadConst32(AT, info.length); __ Sltu(AT, out, AT); } - // Branch if out >= (info.high - info.low + 1). - // This means that "in" is outside of the range [info.low, info.high]. + // Branch if out >= info.length. This means that "in" is outside of the valid range. __ Beqz(AT, &allocate); // If the value is within the bounds, load the j.l.Integer directly from the array. - uint32_t data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value(); - uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.cache)); - __ LoadConst32(TMP, data_offset + address); + codegen_->LoadBootImageAddress(TMP, info.array_data_boot_image_reference); __ ShiftAndAdd(out, out, TMP, TIMES_4); __ Lw(out, out, 0); __ MaybeUnpoisonHeapReference(out); @@ -3207,10 +2653,8 @@ void IntrinsicCodeGeneratorMIPS::VisitIntegerValueOf(HInvoke* invoke) { __ Bind(&allocate); // Otherwise allocate and initialize a new j.l.Integer. - address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer)); - __ LoadConst32(calling_convention.GetRegisterAt(0), address); - codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc()); - CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); + codegen_->AllocateInstanceForIntrinsic(invoke->AsInvokeStaticOrDirect(), + info.integer_boot_image_offset); __ StoreToOffset(kStoreWord, in, out, info.value_offset); // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation // one. @@ -3260,6 +2704,10 @@ UNIMPLEMENTED_INTRINSIC(MIPS, UnsafeCASLong) UNIMPLEMENTED_INTRINSIC(MIPS, ReferenceGetReferent) UNIMPLEMENTED_INTRINSIC(MIPS, SystemArrayCopy) +UNIMPLEMENTED_INTRINSIC(MIPS, CRC32Update) +UNIMPLEMENTED_INTRINSIC(MIPS, CRC32UpdateBytes) +UNIMPLEMENTED_INTRINSIC(MIPS, CRC32UpdateByteBuffer) + UNIMPLEMENTED_INTRINSIC(MIPS, StringStringIndexOf); UNIMPLEMENTED_INTRINSIC(MIPS, StringStringIndexOfAfter); UNIMPLEMENTED_INTRINSIC(MIPS, StringBufferAppend); diff --git a/compiler/optimizing/intrinsics_mips.h b/compiler/optimizing/intrinsics_mips.h index 13397f11d4..08d4e82139 100644 --- a/compiler/optimizing/intrinsics_mips.h +++ b/compiler/optimizing/intrinsics_mips.h @@ -30,14 +30,14 @@ namespace mips { class CodeGeneratorMIPS; class MipsAssembler; -class IntrinsicLocationsBuilderMIPS FINAL : public IntrinsicVisitor { +class IntrinsicLocationsBuilderMIPS final : public IntrinsicVisitor { public: explicit IntrinsicLocationsBuilderMIPS(CodeGeneratorMIPS* codegen); // Define visitor methods. #define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ - void Visit ## Name(HInvoke* invoke) OVERRIDE; + void Visit ## Name(HInvoke* invoke) override; #include "intrinsics_list.h" INTRINSICS_LIST(OPTIMIZING_INTRINSICS) #undef INTRINSICS_LIST @@ -55,14 +55,14 @@ class IntrinsicLocationsBuilderMIPS FINAL : public IntrinsicVisitor { DISALLOW_COPY_AND_ASSIGN(IntrinsicLocationsBuilderMIPS); }; -class IntrinsicCodeGeneratorMIPS FINAL : public IntrinsicVisitor { +class IntrinsicCodeGeneratorMIPS final : public IntrinsicVisitor { public: explicit IntrinsicCodeGeneratorMIPS(CodeGeneratorMIPS* codegen) : codegen_(codegen) {} // Define visitor methods. #define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ - void Visit ## Name(HInvoke* invoke) OVERRIDE; + void Visit ## Name(HInvoke* invoke) override; #include "intrinsics_list.h" INTRINSICS_LIST(OPTIMIZING_INTRINSICS) #undef INTRINSICS_LIST @@ -71,6 +71,7 @@ class IntrinsicCodeGeneratorMIPS FINAL : public IntrinsicVisitor { bool IsR2OrNewer() const; bool IsR6() const; bool Is32BitFPU() const; + bool HasMsa() const; private: MipsAssembler* GetAssembler(); diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc index 4668c561ed..3e687652d3 100644 --- a/compiler/optimizing/intrinsics_mips64.cc +++ b/compiler/optimizing/intrinsics_mips64.cc @@ -46,6 +46,10 @@ ArenaAllocator* IntrinsicCodeGeneratorMIPS64::GetAllocator() { return codegen_->GetGraph()->GetAllocator(); } +inline bool IntrinsicCodeGeneratorMIPS64::HasMsa() const { + return codegen_->GetInstructionSetFeatures().HasMsa(); +} + #define __ codegen->GetAssembler()-> static void MoveFromReturnRegister(Location trg, @@ -93,7 +97,7 @@ class IntrinsicSlowPathMIPS64 : public SlowPathCodeMIPS64 { explicit IntrinsicSlowPathMIPS64(HInvoke* invoke) : SlowPathCodeMIPS64(invoke), invoke_(invoke) { } - void EmitNativeCode(CodeGenerator* codegen_in) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen_in) override { CodeGeneratorMIPS64* codegen = down_cast<CodeGeneratorMIPS64*>(codegen_in); __ Bind(GetEntryLabel()); @@ -122,7 +126,7 @@ class IntrinsicSlowPathMIPS64 : public SlowPathCodeMIPS64 { __ Bc(GetExitLabel()); } - const char* GetDescription() const OVERRIDE { return "IntrinsicSlowPathMIPS64"; } + const char* GetDescription() const override { return "IntrinsicSlowPathMIPS64"; } private: // The instruction where this slow path is happening. @@ -165,7 +169,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitDoubleDoubleToRawLongBits(HInvoke* in } void IntrinsicCodeGeneratorMIPS64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) { - MoveFPToInt(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); + MoveFPToInt(invoke->GetLocations(), /* is64bit= */ true, GetAssembler()); } // int java.lang.Float.floatToRawIntBits(float) @@ -174,7 +178,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitFloatFloatToRawIntBits(HInvoke* invok } void IntrinsicCodeGeneratorMIPS64::VisitFloatFloatToRawIntBits(HInvoke* invoke) { - MoveFPToInt(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); + MoveFPToInt(invoke->GetLocations(), /* is64bit= */ false, GetAssembler()); } static void CreateIntToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) { @@ -201,7 +205,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitDoubleLongBitsToDouble(HInvoke* invok } void IntrinsicCodeGeneratorMIPS64::VisitDoubleLongBitsToDouble(HInvoke* invoke) { - MoveIntToFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); + MoveIntToFP(invoke->GetLocations(), /* is64bit= */ true, GetAssembler()); } // float java.lang.Float.intBitsToFloat(int) @@ -210,7 +214,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitFloatIntBitsToFloat(HInvoke* invoke) } void IntrinsicCodeGeneratorMIPS64::VisitFloatIntBitsToFloat(HInvoke* invoke) { - MoveIntToFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); + MoveIntToFP(invoke->GetLocations(), /* is64bit= */ false, GetAssembler()); } static void CreateIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) { @@ -291,7 +295,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitIntegerNumberOfLeadingZeros(HInvoke* } void IntrinsicCodeGeneratorMIPS64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) { - GenNumberOfLeadingZeroes(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); + GenNumberOfLeadingZeroes(invoke->GetLocations(), /* is64bit= */ false, GetAssembler()); } // int java.lang.Long.numberOfLeadingZeros(long i) @@ -300,7 +304,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitLongNumberOfLeadingZeros(HInvoke* inv } void IntrinsicCodeGeneratorMIPS64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) { - GenNumberOfLeadingZeroes(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); + GenNumberOfLeadingZeroes(invoke->GetLocations(), /* is64bit= */ true, GetAssembler()); } static void GenNumberOfTrailingZeroes(LocationSummary* locations, @@ -328,7 +332,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitIntegerNumberOfTrailingZeros(HInvoke* } void IntrinsicCodeGeneratorMIPS64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) { - GenNumberOfTrailingZeroes(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); + GenNumberOfTrailingZeroes(invoke->GetLocations(), /* is64bit= */ false, GetAssembler()); } // int java.lang.Long.numberOfTrailingZeros(long i) @@ -337,7 +341,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitLongNumberOfTrailingZeros(HInvoke* in } void IntrinsicCodeGeneratorMIPS64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) { - GenNumberOfTrailingZeroes(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); + GenNumberOfTrailingZeroes(invoke->GetLocations(), /* is64bit= */ true, GetAssembler()); } static void GenReverse(LocationSummary* locations, @@ -386,6 +390,7 @@ static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) { static void GenBitCount(LocationSummary* locations, const DataType::Type type, + const bool hasMsa, Mips64Assembler* assembler) { GpuRegister out = locations->Out().AsRegister<GpuRegister>(); GpuRegister in = locations->InAt(0).AsRegister<GpuRegister>(); @@ -414,41 +419,52 @@ static void GenBitCount(LocationSummary* locations, // bits are set but the algorithm here attempts to minimize the total // number of instructions executed even when a large number of bits // are set. - - if (type == DataType::Type::kInt32) { - __ Srl(TMP, in, 1); - __ LoadConst32(AT, 0x55555555); - __ And(TMP, TMP, AT); - __ Subu(TMP, in, TMP); - __ LoadConst32(AT, 0x33333333); - __ And(out, TMP, AT); - __ Srl(TMP, TMP, 2); - __ And(TMP, TMP, AT); - __ Addu(TMP, out, TMP); - __ Srl(out, TMP, 4); - __ Addu(out, out, TMP); - __ LoadConst32(AT, 0x0F0F0F0F); - __ And(out, out, AT); - __ LoadConst32(TMP, 0x01010101); - __ MulR6(out, out, TMP); - __ Srl(out, out, 24); - } else if (type == DataType::Type::kInt64) { - __ Dsrl(TMP, in, 1); - __ LoadConst64(AT, 0x5555555555555555L); - __ And(TMP, TMP, AT); - __ Dsubu(TMP, in, TMP); - __ LoadConst64(AT, 0x3333333333333333L); - __ And(out, TMP, AT); - __ Dsrl(TMP, TMP, 2); - __ And(TMP, TMP, AT); - __ Daddu(TMP, out, TMP); - __ Dsrl(out, TMP, 4); - __ Daddu(out, out, TMP); - __ LoadConst64(AT, 0x0F0F0F0F0F0F0F0FL); - __ And(out, out, AT); - __ LoadConst64(TMP, 0x0101010101010101L); - __ Dmul(out, out, TMP); - __ Dsrl32(out, out, 24); + if (hasMsa) { + if (type == DataType::Type::kInt32) { + __ Mtc1(in, FTMP); + __ PcntW(static_cast<VectorRegister>(FTMP), static_cast<VectorRegister>(FTMP)); + __ Mfc1(out, FTMP); + } else { + __ Dmtc1(in, FTMP); + __ PcntD(static_cast<VectorRegister>(FTMP), static_cast<VectorRegister>(FTMP)); + __ Dmfc1(out, FTMP); + } + } else { + if (type == DataType::Type::kInt32) { + __ Srl(TMP, in, 1); + __ LoadConst32(AT, 0x55555555); + __ And(TMP, TMP, AT); + __ Subu(TMP, in, TMP); + __ LoadConst32(AT, 0x33333333); + __ And(out, TMP, AT); + __ Srl(TMP, TMP, 2); + __ And(TMP, TMP, AT); + __ Addu(TMP, out, TMP); + __ Srl(out, TMP, 4); + __ Addu(out, out, TMP); + __ LoadConst32(AT, 0x0F0F0F0F); + __ And(out, out, AT); + __ LoadConst32(TMP, 0x01010101); + __ MulR6(out, out, TMP); + __ Srl(out, out, 24); + } else { + __ Dsrl(TMP, in, 1); + __ LoadConst64(AT, 0x5555555555555555L); + __ And(TMP, TMP, AT); + __ Dsubu(TMP, in, TMP); + __ LoadConst64(AT, 0x3333333333333333L); + __ And(out, TMP, AT); + __ Dsrl(TMP, TMP, 2); + __ And(TMP, TMP, AT); + __ Daddu(TMP, out, TMP); + __ Dsrl(out, TMP, 4); + __ Daddu(out, out, TMP); + __ LoadConst64(AT, 0x0F0F0F0F0F0F0F0FL); + __ And(out, out, AT); + __ LoadConst64(TMP, 0x0101010101010101L); + __ Dmul(out, out, TMP); + __ Dsrl32(out, out, 24); + } } } @@ -458,7 +474,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitIntegerBitCount(HInvoke* invoke) { } void IntrinsicCodeGeneratorMIPS64::VisitIntegerBitCount(HInvoke* invoke) { - GenBitCount(invoke->GetLocations(), DataType::Type::kInt32, GetAssembler()); + GenBitCount(invoke->GetLocations(), DataType::Type::kInt32, HasMsa(), GetAssembler()); } // int java.lang.Long.bitCount(long) @@ -467,291 +483,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitLongBitCount(HInvoke* invoke) { } void IntrinsicCodeGeneratorMIPS64::VisitLongBitCount(HInvoke* invoke) { - GenBitCount(invoke->GetLocations(), DataType::Type::kInt64, GetAssembler()); -} - -static void MathAbsFP(LocationSummary* locations, bool is64bit, Mips64Assembler* assembler) { - FpuRegister in = locations->InAt(0).AsFpuRegister<FpuRegister>(); - FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>(); - - if (is64bit) { - __ AbsD(out, in); - } else { - __ AbsS(out, in); - } -} - -// double java.lang.Math.abs(double) -void IntrinsicLocationsBuilderMIPS64::VisitMathAbsDouble(HInvoke* invoke) { - CreateFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitMathAbsDouble(HInvoke* invoke) { - MathAbsFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); -} - -// float java.lang.Math.abs(float) -void IntrinsicLocationsBuilderMIPS64::VisitMathAbsFloat(HInvoke* invoke) { - CreateFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitMathAbsFloat(HInvoke* invoke) { - MathAbsFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); -} - -static void CreateIntToInt(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); -} - -static void GenAbsInteger(LocationSummary* locations, bool is64bit, Mips64Assembler* assembler) { - GpuRegister in = locations->InAt(0).AsRegister<GpuRegister>(); - GpuRegister out = locations->Out().AsRegister<GpuRegister>(); - - if (is64bit) { - __ Dsra32(AT, in, 31); - __ Xor(out, in, AT); - __ Dsubu(out, out, AT); - } else { - __ Sra(AT, in, 31); - __ Xor(out, in, AT); - __ Subu(out, out, AT); - } -} - -// int java.lang.Math.abs(int) -void IntrinsicLocationsBuilderMIPS64::VisitMathAbsInt(HInvoke* invoke) { - CreateIntToInt(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitMathAbsInt(HInvoke* invoke) { - GenAbsInteger(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); -} - -// long java.lang.Math.abs(long) -void IntrinsicLocationsBuilderMIPS64::VisitMathAbsLong(HInvoke* invoke) { - CreateIntToInt(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitMathAbsLong(HInvoke* invoke) { - GenAbsInteger(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); -} - -static void GenMinMaxFP(LocationSummary* locations, - bool is_min, - DataType::Type type, - Mips64Assembler* assembler) { - FpuRegister a = locations->InAt(0).AsFpuRegister<FpuRegister>(); - FpuRegister b = locations->InAt(1).AsFpuRegister<FpuRegister>(); - FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>(); - - Mips64Label noNaNs; - Mips64Label done; - FpuRegister ftmp = ((out != a) && (out != b)) ? out : FTMP; - - // When Java computes min/max it prefers a NaN to a number; the - // behavior of MIPSR6 is to prefer numbers to NaNs, i.e., if one of - // the inputs is a NaN and the other is a valid number, the MIPS - // instruction will return the number; Java wants the NaN value - // returned. This is why there is extra logic preceding the use of - // the MIPS min.fmt/max.fmt instructions. If either a, or b holds a - // NaN, return the NaN, otherwise return the min/max. - if (type == DataType::Type::kFloat64) { - __ CmpUnD(FTMP, a, b); - __ Bc1eqz(FTMP, &noNaNs); - - // One of the inputs is a NaN - __ CmpEqD(ftmp, a, a); - // If a == a then b is the NaN, otherwise a is the NaN. - __ SelD(ftmp, a, b); - - if (ftmp != out) { - __ MovD(out, ftmp); - } - - __ Bc(&done); - - __ Bind(&noNaNs); - - if (is_min) { - __ MinD(out, a, b); - } else { - __ MaxD(out, a, b); - } - } else { - DCHECK_EQ(type, DataType::Type::kFloat32); - __ CmpUnS(FTMP, a, b); - __ Bc1eqz(FTMP, &noNaNs); - - // One of the inputs is a NaN - __ CmpEqS(ftmp, a, a); - // If a == a then b is the NaN, otherwise a is the NaN. - __ SelS(ftmp, a, b); - - if (ftmp != out) { - __ MovS(out, ftmp); - } - - __ Bc(&done); - - __ Bind(&noNaNs); - - if (is_min) { - __ MinS(out, a, b); - } else { - __ MaxS(out, a, b); - } - } - - __ Bind(&done); -} - -static void CreateFPFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::RequiresFpuRegister()); - locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); -} - -// double java.lang.Math.min(double, double) -void IntrinsicLocationsBuilderMIPS64::VisitMathMinDoubleDouble(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitMathMinDoubleDouble(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), /* is_min */ true, DataType::Type::kFloat64, GetAssembler()); -} - -// float java.lang.Math.min(float, float) -void IntrinsicLocationsBuilderMIPS64::VisitMathMinFloatFloat(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitMathMinFloatFloat(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), /* is_min */ true, DataType::Type::kFloat32, GetAssembler()); -} - -// double java.lang.Math.max(double, double) -void IntrinsicLocationsBuilderMIPS64::VisitMathMaxDoubleDouble(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitMathMaxDoubleDouble(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), /* is_min */ false, DataType::Type::kFloat64, GetAssembler()); -} - -// float java.lang.Math.max(float, float) -void IntrinsicLocationsBuilderMIPS64::VisitMathMaxFloatFloat(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitMathMaxFloatFloat(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), /* is_min */ false, DataType::Type::kFloat32, GetAssembler()); -} - -static void GenMinMax(LocationSummary* locations, - bool is_min, - Mips64Assembler* assembler) { - GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>(); - GpuRegister rhs = locations->InAt(1).AsRegister<GpuRegister>(); - GpuRegister out = locations->Out().AsRegister<GpuRegister>(); - - if (lhs == rhs) { - if (out != lhs) { - __ Move(out, lhs); - } - } else { - // Some architectures, such as ARM and MIPS (prior to r6), have a - // conditional move instruction which only changes the target - // (output) register if the condition is true (MIPS prior to r6 had - // MOVF, MOVT, and MOVZ). The SELEQZ and SELNEZ instructions always - // change the target (output) register. If the condition is true the - // output register gets the contents of the "rs" register; otherwise, - // the output register is set to zero. One consequence of this is - // that to implement something like "rd = c==0 ? rs : rt" MIPS64r6 - // needs to use a pair of SELEQZ/SELNEZ instructions. After - // executing this pair of instructions one of the output registers - // from the pair will necessarily contain zero. Then the code ORs the - // output registers from the SELEQZ/SELNEZ instructions to get the - // final result. - // - // The initial test to see if the output register is same as the - // first input register is needed to make sure that value in the - // first input register isn't clobbered before we've finished - // computing the output value. The logic in the corresponding else - // clause performs the same task but makes sure the second input - // register isn't clobbered in the event that it's the same register - // as the output register; the else clause also handles the case - // where the output register is distinct from both the first, and the - // second input registers. - if (out == lhs) { - __ Slt(AT, rhs, lhs); - if (is_min) { - __ Seleqz(out, lhs, AT); - __ Selnez(AT, rhs, AT); - } else { - __ Selnez(out, lhs, AT); - __ Seleqz(AT, rhs, AT); - } - } else { - __ Slt(AT, lhs, rhs); - if (is_min) { - __ Seleqz(out, rhs, AT); - __ Selnez(AT, lhs, AT); - } else { - __ Selnez(out, rhs, AT); - __ Seleqz(AT, lhs, AT); - } - } - __ Or(out, out, AT); - } -} - -static void CreateIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); -} - -// int java.lang.Math.min(int, int) -void IntrinsicLocationsBuilderMIPS64::VisitMathMinIntInt(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitMathMinIntInt(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), /* is_min */ true, GetAssembler()); -} - -// long java.lang.Math.min(long, long) -void IntrinsicLocationsBuilderMIPS64::VisitMathMinLongLong(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitMathMinLongLong(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), /* is_min */ true, GetAssembler()); -} - -// int java.lang.Math.max(int, int) -void IntrinsicLocationsBuilderMIPS64::VisitMathMaxIntInt(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitMathMaxIntInt(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), /* is_min */ false, GetAssembler()); -} - -// long java.lang.Math.max(long, long) -void IntrinsicLocationsBuilderMIPS64::VisitMathMaxLongLong(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorMIPS64::VisitMathMaxLongLong(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), /* is_min */ false, GetAssembler()); + GenBitCount(invoke->GetLocations(), DataType::Type::kInt64, HasMsa(), GetAssembler()); } // double java.lang.Math.sqrt(double) @@ -1179,11 +911,11 @@ static void GenUnsafeGet(HInvoke* invoke, codegen->GenerateReferenceLoadWithBakerReadBarrier(invoke, trg_loc, base, - /* offset */ 0U, - /* index */ offset_loc, + /* offset= */ 0U, + /* index= */ offset_loc, TIMES_1, temp, - /* needs_null_check */ false); + /* needs_null_check= */ false); if (is_volatile) { __ Sync(0); } @@ -1196,8 +928,8 @@ static void GenUnsafeGet(HInvoke* invoke, trg_loc, trg_loc, base_loc, - /* offset */ 0U, - /* index */ offset_loc); + /* offset= */ 0U, + /* index= */ offset_loc); } } else { __ Lwu(trg, TMP, 0); @@ -1220,7 +952,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitUnsafeGet(HInvoke* invoke) { } void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGet(HInvoke* invoke) { - GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile */ false, codegen_); + GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile= */ false, codegen_); } // int sun.misc.Unsafe.getIntVolatile(Object o, long offset) @@ -1229,7 +961,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitUnsafeGetVolatile(HInvoke* invoke) { } void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGetVolatile(HInvoke* invoke) { - GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile */ true, codegen_); + GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile= */ true, codegen_); } // long sun.misc.Unsafe.getLong(Object o, long offset) @@ -1238,7 +970,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitUnsafeGetLong(HInvoke* invoke) { } void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGetLong(HInvoke* invoke) { - GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile */ false, codegen_); + GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile= */ false, codegen_); } // long sun.misc.Unsafe.getLongVolatile(Object o, long offset) @@ -1247,7 +979,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitUnsafeGetLongVolatile(HInvoke* invoke } void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGetLongVolatile(HInvoke* invoke) { - GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile */ true, codegen_); + GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile= */ true, codegen_); } // Object sun.misc.Unsafe.getObject(Object o, long offset) @@ -1256,7 +988,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitUnsafeGetObject(HInvoke* invoke) { } void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGetObject(HInvoke* invoke) { - GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile */ false, codegen_); + GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile= */ false, codegen_); } // Object sun.misc.Unsafe.getObjectVolatile(Object o, long offset) @@ -1265,7 +997,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitUnsafeGetObjectVolatile(HInvoke* invo } void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { - GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile */ true, codegen_); + GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile= */ true, codegen_); } static void CreateIntIntIntIntToVoid(ArenaAllocator* allocator, HInvoke* invoke) { @@ -1335,8 +1067,8 @@ void IntrinsicLocationsBuilderMIPS64::VisitUnsafePut(HInvoke* invoke) { void IntrinsicCodeGeneratorMIPS64::VisitUnsafePut(HInvoke* invoke) { GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, - /* is_volatile */ false, - /* is_ordered */ false, + /* is_volatile= */ false, + /* is_ordered= */ false, codegen_); } @@ -1348,8 +1080,8 @@ void IntrinsicLocationsBuilderMIPS64::VisitUnsafePutOrdered(HInvoke* invoke) { void IntrinsicCodeGeneratorMIPS64::VisitUnsafePutOrdered(HInvoke* invoke) { GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, - /* is_volatile */ false, - /* is_ordered */ true, + /* is_volatile= */ false, + /* is_ordered= */ true, codegen_); } @@ -1361,8 +1093,8 @@ void IntrinsicLocationsBuilderMIPS64::VisitUnsafePutVolatile(HInvoke* invoke) { void IntrinsicCodeGeneratorMIPS64::VisitUnsafePutVolatile(HInvoke* invoke) { GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, - /* is_volatile */ true, - /* is_ordered */ false, + /* is_volatile= */ true, + /* is_ordered= */ false, codegen_); } @@ -1374,8 +1106,8 @@ void IntrinsicLocationsBuilderMIPS64::VisitUnsafePutObject(HInvoke* invoke) { void IntrinsicCodeGeneratorMIPS64::VisitUnsafePutObject(HInvoke* invoke) { GenUnsafePut(invoke->GetLocations(), DataType::Type::kReference, - /* is_volatile */ false, - /* is_ordered */ false, + /* is_volatile= */ false, + /* is_ordered= */ false, codegen_); } @@ -1387,8 +1119,8 @@ void IntrinsicLocationsBuilderMIPS64::VisitUnsafePutObjectOrdered(HInvoke* invok void IntrinsicCodeGeneratorMIPS64::VisitUnsafePutObjectOrdered(HInvoke* invoke) { GenUnsafePut(invoke->GetLocations(), DataType::Type::kReference, - /* is_volatile */ false, - /* is_ordered */ true, + /* is_volatile= */ false, + /* is_ordered= */ true, codegen_); } @@ -1400,8 +1132,8 @@ void IntrinsicLocationsBuilderMIPS64::VisitUnsafePutObjectVolatile(HInvoke* invo void IntrinsicCodeGeneratorMIPS64::VisitUnsafePutObjectVolatile(HInvoke* invoke) { GenUnsafePut(invoke->GetLocations(), DataType::Type::kReference, - /* is_volatile */ true, - /* is_ordered */ false, + /* is_volatile= */ true, + /* is_ordered= */ false, codegen_); } @@ -1413,8 +1145,8 @@ void IntrinsicLocationsBuilderMIPS64::VisitUnsafePutLong(HInvoke* invoke) { void IntrinsicCodeGeneratorMIPS64::VisitUnsafePutLong(HInvoke* invoke) { GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, - /* is_volatile */ false, - /* is_ordered */ false, + /* is_volatile= */ false, + /* is_ordered= */ false, codegen_); } @@ -1426,8 +1158,8 @@ void IntrinsicLocationsBuilderMIPS64::VisitUnsafePutLongOrdered(HInvoke* invoke) void IntrinsicCodeGeneratorMIPS64::VisitUnsafePutLongOrdered(HInvoke* invoke) { GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, - /* is_volatile */ false, - /* is_ordered */ true, + /* is_volatile= */ false, + /* is_ordered= */ true, codegen_); } @@ -1439,8 +1171,8 @@ void IntrinsicLocationsBuilderMIPS64::VisitUnsafePutLongVolatile(HInvoke* invoke void IntrinsicCodeGeneratorMIPS64::VisitUnsafePutLongVolatile(HInvoke* invoke) { GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, - /* is_volatile */ true, - /* is_ordered */ false, + /* is_volatile= */ true, + /* is_ordered= */ false, codegen_); } @@ -1502,12 +1234,12 @@ static void GenCas(HInvoke* invoke, DataType::Type type, CodeGeneratorMIPS64* co invoke, out_loc, // Unused, used only as a "temporary" within the read barrier. base, - /* offset */ 0u, - /* index */ offset_loc, + /* offset= */ 0u, + /* index= */ offset_loc, ScaleFactor::TIMES_1, temp, - /* needs_null_check */ false, - /* always_update_field */ true); + /* needs_null_check= */ false, + /* always_update_field= */ true); } } @@ -1637,13 +1369,6 @@ void IntrinsicCodeGeneratorMIPS64::VisitStringCompareTo(HInvoke* invoke) { // boolean java.lang.String.equals(Object anObject) void IntrinsicLocationsBuilderMIPS64::VisitStringEquals(HInvoke* invoke) { - if (kEmitCompilerReadBarrier && - !StringEqualsOptimizations(invoke).GetArgumentIsString() && - !StringEqualsOptimizations(invoke).GetNoReadBarrierForStringClass()) { - // No support for this odd case (String class is moveable, not in the boot image). - return; - } - LocationSummary* locations = new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresRegister()); @@ -1704,8 +1429,16 @@ void IntrinsicCodeGeneratorMIPS64::VisitStringEquals(HInvoke* invoke) { // All string objects must have the same type since String cannot be subclassed. // Receiver must be a string object, so its class field is equal to all strings' class fields. // If the argument is a string object, its class field must be equal to receiver's class field. + // + // As the String class is expected to be non-movable, we can read the class + // field from String.equals' arguments without read barriers. + AssertNonMovableStringClass(); + // /* HeapReference<Class> */ temp1 = str->klass_ __ Lw(temp1, str, class_offset); + // /* HeapReference<Class> */ temp2 = arg->klass_ __ Lw(temp2, arg, class_offset); + // Also, because we use the previously loaded class references only in the + // following comparison, we don't need to unpoison them. __ Bnec(temp1, temp2, &return_false); } @@ -1823,7 +1556,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitStringIndexOf(HInvoke* invoke) { } void IntrinsicCodeGeneratorMIPS64::VisitStringIndexOf(HInvoke* invoke) { - GenerateStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero */ true); + GenerateStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ true); } // int java.lang.String.indexOf(int ch, int fromIndex) @@ -1841,7 +1574,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitStringIndexOfAfter(HInvoke* invoke) { } void IntrinsicCodeGeneratorMIPS64::VisitStringIndexOfAfter(HInvoke* invoke) { - GenerateStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero */ false); + GenerateStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ false); } // java.lang.StringFactory.newStringFromBytes(byte[] data, int high, int offset, int byteCount) @@ -1942,7 +1675,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitFloatIsInfinite(HInvoke* invoke) { } void IntrinsicCodeGeneratorMIPS64::VisitFloatIsInfinite(HInvoke* invoke) { - GenIsInfinite(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); + GenIsInfinite(invoke->GetLocations(), /* is64bit= */ false, GetAssembler()); } // boolean java.lang.Double.isInfinite(double) @@ -1951,7 +1684,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitDoubleIsInfinite(HInvoke* invoke) { } void IntrinsicCodeGeneratorMIPS64::VisitDoubleIsInfinite(HInvoke* invoke) { - GenIsInfinite(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); + GenIsInfinite(invoke->GetLocations(), /* is64bit= */ true, GetAssembler()); } // void java.lang.String.getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin) @@ -2535,54 +2268,45 @@ void IntrinsicLocationsBuilderMIPS64::VisitIntegerValueOf(HInvoke* invoke) { } void IntrinsicCodeGeneratorMIPS64::VisitIntegerValueOf(HInvoke* invoke) { - IntrinsicVisitor::IntegerValueOfInfo info = IntrinsicVisitor::ComputeIntegerValueOfInfo(); + IntrinsicVisitor::IntegerValueOfInfo info = + IntrinsicVisitor::ComputeIntegerValueOfInfo(invoke, codegen_->GetCompilerOptions()); LocationSummary* locations = invoke->GetLocations(); Mips64Assembler* assembler = GetAssembler(); InstructionCodeGeneratorMIPS64* icodegen = down_cast<InstructionCodeGeneratorMIPS64*>(codegen_->GetInstructionVisitor()); GpuRegister out = locations->Out().AsRegister<GpuRegister>(); - InvokeRuntimeCallingConvention calling_convention; if (invoke->InputAt(0)->IsConstant()) { int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue(); - if (value >= info.low && value <= info.high) { + if (static_cast<uint32_t>(value - info.low) < info.length) { // Just embed the j.l.Integer in the code. - ScopedObjectAccess soa(Thread::Current()); - mirror::Object* boxed = info.cache->Get(value + (-info.low)); - DCHECK(boxed != nullptr && Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(boxed)); - uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(boxed)); - __ LoadConst64(out, address); + DCHECK_NE(info.value_boot_image_reference, IntegerValueOfInfo::kInvalidReference); + codegen_->LoadBootImageAddress(out, info.value_boot_image_reference); } else { + DCHECK(locations->CanCall()); // Allocate and initialize a new j.l.Integer. // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the // JIT object table. - uint32_t address = - dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer)); - __ LoadConst64(calling_convention.GetRegisterAt(0), address); - codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc()); - CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); + codegen_->AllocateInstanceForIntrinsic(invoke->AsInvokeStaticOrDirect(), + info.integer_boot_image_offset); __ StoreConstToOffset(kStoreWord, value, out, info.value_offset, TMP); // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation // one. icodegen->GenerateMemoryBarrier(MemBarrierKind::kStoreStore); } } else { + DCHECK(locations->CanCall()); GpuRegister in = locations->InAt(0).AsRegister<GpuRegister>(); Mips64Label allocate, done; - int32_t count = static_cast<uint32_t>(info.high) - info.low + 1; - // Is (info.low <= in) && (in <= info.high)? __ Addiu32(out, in, -info.low); - // As unsigned quantities is out < (info.high - info.low + 1)? - __ LoadConst32(AT, count); - // Branch if out >= (info.high - info.low + 1). - // This means that "in" is outside of the range [info.low, info.high]. + // As unsigned quantities is out < info.length ? + __ LoadConst32(AT, info.length); + // Branch if out >= info.length . This means that "in" is outside of the valid range. __ Bgeuc(out, AT, &allocate); // If the value is within the bounds, load the j.l.Integer directly from the array. - uint32_t data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value(); - uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.cache)); - __ LoadConst64(TMP, data_offset + address); + codegen_->LoadBootImageAddress(TMP, info.array_data_boot_image_reference); __ Dlsa(out, out, TMP, TIMES_4); __ Lwu(out, out, 0); __ MaybeUnpoisonHeapReference(out); @@ -2590,10 +2314,8 @@ void IntrinsicCodeGeneratorMIPS64::VisitIntegerValueOf(HInvoke* invoke) { __ Bind(&allocate); // Otherwise allocate and initialize a new j.l.Integer. - address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer)); - __ LoadConst64(calling_convention.GetRegisterAt(0), address); - codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc()); - CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); + codegen_->AllocateInstanceForIntrinsic(invoke->AsInvokeStaticOrDirect(), + info.integer_boot_image_offset); __ StoreToOffset(kStoreWord, in, out, info.value_offset); // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation // one. @@ -2632,6 +2354,9 @@ void IntrinsicCodeGeneratorMIPS64::VisitReachabilityFence(HInvoke* invoke ATTRIB UNIMPLEMENTED_INTRINSIC(MIPS64, ReferenceGetReferent) UNIMPLEMENTED_INTRINSIC(MIPS64, SystemArrayCopy) +UNIMPLEMENTED_INTRINSIC(MIPS64, CRC32Update) +UNIMPLEMENTED_INTRINSIC(MIPS64, CRC32UpdateBytes) +UNIMPLEMENTED_INTRINSIC(MIPS64, CRC32UpdateByteBuffer) UNIMPLEMENTED_INTRINSIC(MIPS64, StringStringIndexOf); UNIMPLEMENTED_INTRINSIC(MIPS64, StringStringIndexOfAfter); diff --git a/compiler/optimizing/intrinsics_mips64.h b/compiler/optimizing/intrinsics_mips64.h index 6f40d90ddb..ca8bc8f55a 100644 --- a/compiler/optimizing/intrinsics_mips64.h +++ b/compiler/optimizing/intrinsics_mips64.h @@ -30,14 +30,14 @@ namespace mips64 { class CodeGeneratorMIPS64; class Mips64Assembler; -class IntrinsicLocationsBuilderMIPS64 FINAL : public IntrinsicVisitor { +class IntrinsicLocationsBuilderMIPS64 final : public IntrinsicVisitor { public: explicit IntrinsicLocationsBuilderMIPS64(CodeGeneratorMIPS64* codegen); // Define visitor methods. #define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ - void Visit ## Name(HInvoke* invoke) OVERRIDE; + void Visit ## Name(HInvoke* invoke) override; #include "intrinsics_list.h" INTRINSICS_LIST(OPTIMIZING_INTRINSICS) #undef INTRINSICS_LIST @@ -55,19 +55,21 @@ class IntrinsicLocationsBuilderMIPS64 FINAL : public IntrinsicVisitor { DISALLOW_COPY_AND_ASSIGN(IntrinsicLocationsBuilderMIPS64); }; -class IntrinsicCodeGeneratorMIPS64 FINAL : public IntrinsicVisitor { +class IntrinsicCodeGeneratorMIPS64 final : public IntrinsicVisitor { public: explicit IntrinsicCodeGeneratorMIPS64(CodeGeneratorMIPS64* codegen) : codegen_(codegen) {} // Define visitor methods. #define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ - void Visit ## Name(HInvoke* invoke) OVERRIDE; + void Visit ## Name(HInvoke* invoke) override; #include "intrinsics_list.h" INTRINSICS_LIST(OPTIMIZING_INTRINSICS) #undef INTRINSICS_LIST #undef OPTIMIZING_INTRINSICS + bool HasMsa() const; + private: Mips64Assembler* GetAssembler(); diff --git a/compiler/optimizing/intrinsics_utils.h b/compiler/optimizing/intrinsics_utils.h index 8c69d9b643..41947f1ccd 100644 --- a/compiler/optimizing/intrinsics_utils.h +++ b/compiler/optimizing/intrinsics_utils.h @@ -47,7 +47,7 @@ class IntrinsicSlowPath : public SlowPathCode { return calling_convention_visitor.GetMethodLocation(); } - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { Assembler* assembler = codegen->GetAssembler(); assembler->Bind(GetEntryLabel()); @@ -73,7 +73,7 @@ class IntrinsicSlowPath : public SlowPathCode { assembler->Jump(GetExitLabel()); } - const char* GetDescription() const OVERRIDE { return "IntrinsicSlowPath"; } + const char* GetDescription() const override { return "IntrinsicSlowPath"; } private: // The instruction where this slow path is happening. diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc index 0763ef2352..de697f0f96 100644 --- a/compiler/optimizing/intrinsics_x86.cc +++ b/compiler/optimizing/intrinsics_x86.cc @@ -40,11 +40,6 @@ namespace art { namespace x86 { -static constexpr int kDoubleNaNHigh = 0x7FF80000; -static constexpr int kDoubleNaNLow = 0x00000000; -static constexpr int64_t kDoubleNaN = INT64_C(0x7FF8000000000000); -static constexpr int32_t kFloatNaN = INT32_C(0x7FC00000); - IntrinsicLocationsBuilderX86::IntrinsicLocationsBuilderX86(CodeGeneratorX86* codegen) : allocator_(codegen->GetGraph()->GetAllocator()), codegen_(codegen) { @@ -87,7 +82,7 @@ class ReadBarrierSystemArrayCopySlowPathX86 : public SlowPathCode { DCHECK(kUseBakerReadBarrier); } - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen); LocationSummary* locations = instruction_->GetLocations(); DCHECK(locations->CanCall()); @@ -165,7 +160,7 @@ class ReadBarrierSystemArrayCopySlowPathX86 : public SlowPathCode { __ jmp(GetExitLabel()); } - const char* GetDescription() const OVERRIDE { return "ReadBarrierSystemArrayCopySlowPathX86"; } + const char* GetDescription() const override { return "ReadBarrierSystemArrayCopySlowPathX86"; } private: DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathX86); @@ -228,31 +223,31 @@ static void MoveIntToFP(LocationSummary* locations, bool is64bit, X86Assembler* } void IntrinsicLocationsBuilderX86::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) { - CreateFPToIntLocations(allocator_, invoke, /* is64bit */ true); + CreateFPToIntLocations(allocator_, invoke, /* is64bit= */ true); } void IntrinsicLocationsBuilderX86::VisitDoubleLongBitsToDouble(HInvoke* invoke) { - CreateIntToFPLocations(allocator_, invoke, /* is64bit */ true); + CreateIntToFPLocations(allocator_, invoke, /* is64bit= */ true); } void IntrinsicCodeGeneratorX86::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) { - MoveFPToInt(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); + MoveFPToInt(invoke->GetLocations(), /* is64bit= */ true, GetAssembler()); } void IntrinsicCodeGeneratorX86::VisitDoubleLongBitsToDouble(HInvoke* invoke) { - MoveIntToFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); + MoveIntToFP(invoke->GetLocations(), /* is64bit= */ true, GetAssembler()); } void IntrinsicLocationsBuilderX86::VisitFloatFloatToRawIntBits(HInvoke* invoke) { - CreateFPToIntLocations(allocator_, invoke, /* is64bit */ false); + CreateFPToIntLocations(allocator_, invoke, /* is64bit= */ false); } void IntrinsicLocationsBuilderX86::VisitFloatIntBitsToFloat(HInvoke* invoke) { - CreateIntToFPLocations(allocator_, invoke, /* is64bit */ false); + CreateIntToFPLocations(allocator_, invoke, /* is64bit= */ false); } void IntrinsicCodeGeneratorX86::VisitFloatFloatToRawIntBits(HInvoke* invoke) { - MoveFPToInt(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); + MoveFPToInt(invoke->GetLocations(), /* is64bit= */ false, GetAssembler()); } void IntrinsicCodeGeneratorX86::VisitFloatIntBitsToFloat(HInvoke* invoke) { - MoveIntToFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); + MoveIntToFP(invoke->GetLocations(), /* is64bit= */ false, GetAssembler()); } static void CreateIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) { @@ -333,432 +328,6 @@ void IntrinsicCodeGeneratorX86::VisitShortReverseBytes(HInvoke* invoke) { GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler()); } - -// TODO: Consider Quick's way of doing Double abs through integer operations, as the immediate we -// need is 64b. - -static void CreateFloatToFloat(ArenaAllocator* allocator, HInvoke* invoke) { - // TODO: Enable memory operations when the assembler supports them. - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetOut(Location::SameAsFirstInput()); - HInvokeStaticOrDirect* static_or_direct = invoke->AsInvokeStaticOrDirect(); - DCHECK(static_or_direct != nullptr); - if (static_or_direct->HasSpecialInput() && - invoke->InputAt(static_or_direct->GetSpecialInputIndex())->IsX86ComputeBaseMethodAddress()) { - // We need addressibility for the constant area. - locations->SetInAt(1, Location::RequiresRegister()); - // We need a temporary to hold the constant. - locations->AddTemp(Location::RequiresFpuRegister()); - } -} - -static void MathAbsFP(HInvoke* invoke, - bool is64bit, - X86Assembler* assembler, - CodeGeneratorX86* codegen) { - LocationSummary* locations = invoke->GetLocations(); - Location output = locations->Out(); - - DCHECK(output.IsFpuRegister()); - if (locations->GetInputCount() == 2 && locations->InAt(1).IsValid()) { - HX86ComputeBaseMethodAddress* method_address = - invoke->InputAt(1)->AsX86ComputeBaseMethodAddress(); - DCHECK(locations->InAt(1).IsRegister()); - // We also have a constant area pointer. - Register constant_area = locations->InAt(1).AsRegister<Register>(); - XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); - if (is64bit) { - __ movsd(temp, codegen->LiteralInt64Address( - INT64_C(0x7FFFFFFFFFFFFFFF), method_address, constant_area)); - __ andpd(output.AsFpuRegister<XmmRegister>(), temp); - } else { - __ movss(temp, codegen->LiteralInt32Address( - INT32_C(0x7FFFFFFF), method_address, constant_area)); - __ andps(output.AsFpuRegister<XmmRegister>(), temp); - } - } else { - // Create the right constant on an aligned stack. - if (is64bit) { - __ subl(ESP, Immediate(8)); - __ pushl(Immediate(0x7FFFFFFF)); - __ pushl(Immediate(0xFFFFFFFF)); - __ andpd(output.AsFpuRegister<XmmRegister>(), Address(ESP, 0)); - } else { - __ subl(ESP, Immediate(12)); - __ pushl(Immediate(0x7FFFFFFF)); - __ andps(output.AsFpuRegister<XmmRegister>(), Address(ESP, 0)); - } - __ addl(ESP, Immediate(16)); - } -} - -void IntrinsicLocationsBuilderX86::VisitMathAbsDouble(HInvoke* invoke) { - CreateFloatToFloat(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86::VisitMathAbsDouble(HInvoke* invoke) { - MathAbsFP(invoke, /* is64bit */ true, GetAssembler(), codegen_); -} - -void IntrinsicLocationsBuilderX86::VisitMathAbsFloat(HInvoke* invoke) { - CreateFloatToFloat(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86::VisitMathAbsFloat(HInvoke* invoke) { - MathAbsFP(invoke, /* is64bit */ false, GetAssembler(), codegen_); -} - -static void CreateAbsIntLocation(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RegisterLocation(EAX)); - locations->SetOut(Location::SameAsFirstInput()); - locations->AddTemp(Location::RegisterLocation(EDX)); -} - -static void GenAbsInteger(LocationSummary* locations, X86Assembler* assembler) { - Location output = locations->Out(); - Register out = output.AsRegister<Register>(); - DCHECK_EQ(out, EAX); - Register temp = locations->GetTemp(0).AsRegister<Register>(); - DCHECK_EQ(temp, EDX); - - // Sign extend EAX into EDX. - __ cdq(); - - // XOR EAX with sign. - __ xorl(EAX, EDX); - - // Subtract out sign to correct. - __ subl(EAX, EDX); - - // The result is in EAX. -} - -static void CreateAbsLongLocation(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); - locations->AddTemp(Location::RequiresRegister()); -} - -static void GenAbsLong(LocationSummary* locations, X86Assembler* assembler) { - Location input = locations->InAt(0); - Register input_lo = input.AsRegisterPairLow<Register>(); - Register input_hi = input.AsRegisterPairHigh<Register>(); - Location output = locations->Out(); - Register output_lo = output.AsRegisterPairLow<Register>(); - Register output_hi = output.AsRegisterPairHigh<Register>(); - Register temp = locations->GetTemp(0).AsRegister<Register>(); - - // Compute the sign into the temporary. - __ movl(temp, input_hi); - __ sarl(temp, Immediate(31)); - - // Store the sign into the output. - __ movl(output_lo, temp); - __ movl(output_hi, temp); - - // XOR the input to the output. - __ xorl(output_lo, input_lo); - __ xorl(output_hi, input_hi); - - // Subtract the sign. - __ subl(output_lo, temp); - __ sbbl(output_hi, temp); -} - -void IntrinsicLocationsBuilderX86::VisitMathAbsInt(HInvoke* invoke) { - CreateAbsIntLocation(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86::VisitMathAbsInt(HInvoke* invoke) { - GenAbsInteger(invoke->GetLocations(), GetAssembler()); -} - -void IntrinsicLocationsBuilderX86::VisitMathAbsLong(HInvoke* invoke) { - CreateAbsLongLocation(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86::VisitMathAbsLong(HInvoke* invoke) { - GenAbsLong(invoke->GetLocations(), GetAssembler()); -} - -static void GenMinMaxFP(HInvoke* invoke, - bool is_min, - bool is_double, - X86Assembler* assembler, - CodeGeneratorX86* codegen) { - LocationSummary* locations = invoke->GetLocations(); - Location op1_loc = locations->InAt(0); - Location op2_loc = locations->InAt(1); - Location out_loc = locations->Out(); - XmmRegister out = out_loc.AsFpuRegister<XmmRegister>(); - - // Shortcut for same input locations. - if (op1_loc.Equals(op2_loc)) { - DCHECK(out_loc.Equals(op1_loc)); - return; - } - - // (out := op1) - // out <=? op2 - // if Nan jmp Nan_label - // if out is min jmp done - // if op2 is min jmp op2_label - // handle -0/+0 - // jmp done - // Nan_label: - // out := NaN - // op2_label: - // out := op2 - // done: - // - // This removes one jmp, but needs to copy one input (op1) to out. - // - // TODO: This is straight from Quick (except literal pool). Make NaN an out-of-line slowpath? - - XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>(); - - NearLabel nan, done, op2_label; - if (is_double) { - __ ucomisd(out, op2); - } else { - __ ucomiss(out, op2); - } - - __ j(Condition::kParityEven, &nan); - - __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label); - __ j(is_min ? Condition::kBelow : Condition::kAbove, &done); - - // Handle 0.0/-0.0. - if (is_min) { - if (is_double) { - __ orpd(out, op2); - } else { - __ orps(out, op2); - } - } else { - if (is_double) { - __ andpd(out, op2); - } else { - __ andps(out, op2); - } - } - __ jmp(&done); - - // NaN handling. - __ Bind(&nan); - // Do we have a constant area pointer? - if (locations->GetInputCount() == 3 && locations->InAt(2).IsValid()) { - HX86ComputeBaseMethodAddress* method_address = - invoke->InputAt(2)->AsX86ComputeBaseMethodAddress(); - DCHECK(locations->InAt(2).IsRegister()); - Register constant_area = locations->InAt(2).AsRegister<Register>(); - if (is_double) { - __ movsd(out, codegen->LiteralInt64Address(kDoubleNaN, method_address, constant_area)); - } else { - __ movss(out, codegen->LiteralInt32Address(kFloatNaN, method_address, constant_area)); - } - } else { - if (is_double) { - __ pushl(Immediate(kDoubleNaNHigh)); - __ pushl(Immediate(kDoubleNaNLow)); - __ movsd(out, Address(ESP, 0)); - __ addl(ESP, Immediate(8)); - } else { - __ pushl(Immediate(kFloatNaN)); - __ movss(out, Address(ESP, 0)); - __ addl(ESP, Immediate(4)); - } - } - __ jmp(&done); - - // out := op2; - __ Bind(&op2_label); - if (is_double) { - __ movsd(out, op2); - } else { - __ movss(out, op2); - } - - // Done. - __ Bind(&done); -} - -static void CreateFPFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::RequiresFpuRegister()); - // The following is sub-optimal, but all we can do for now. It would be fine to also accept - // the second input to be the output (we can simply swap inputs). - locations->SetOut(Location::SameAsFirstInput()); - HInvokeStaticOrDirect* static_or_direct = invoke->AsInvokeStaticOrDirect(); - DCHECK(static_or_direct != nullptr); - if (static_or_direct->HasSpecialInput() && - invoke->InputAt(static_or_direct->GetSpecialInputIndex())->IsX86ComputeBaseMethodAddress()) { - locations->SetInAt(2, Location::RequiresRegister()); - } -} - -void IntrinsicLocationsBuilderX86::VisitMathMinDoubleDouble(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86::VisitMathMinDoubleDouble(HInvoke* invoke) { - GenMinMaxFP(invoke, - /* is_min */ true, - /* is_double */ true, - GetAssembler(), - codegen_); -} - -void IntrinsicLocationsBuilderX86::VisitMathMinFloatFloat(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86::VisitMathMinFloatFloat(HInvoke* invoke) { - GenMinMaxFP(invoke, - /* is_min */ true, - /* is_double */ false, - GetAssembler(), - codegen_); -} - -void IntrinsicLocationsBuilderX86::VisitMathMaxDoubleDouble(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86::VisitMathMaxDoubleDouble(HInvoke* invoke) { - GenMinMaxFP(invoke, - /* is_min */ false, - /* is_double */ true, - GetAssembler(), - codegen_); -} - -void IntrinsicLocationsBuilderX86::VisitMathMaxFloatFloat(HInvoke* invoke) { - CreateFPFPToFPLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86::VisitMathMaxFloatFloat(HInvoke* invoke) { - GenMinMaxFP(invoke, - /* is_min */ false, - /* is_double */ false, - GetAssembler(), - codegen_); -} - -static void GenMinMax(LocationSummary* locations, bool is_min, bool is_long, - X86Assembler* assembler) { - Location op1_loc = locations->InAt(0); - Location op2_loc = locations->InAt(1); - - // Shortcut for same input locations. - if (op1_loc.Equals(op2_loc)) { - // Can return immediately, as op1_loc == out_loc. - // Note: if we ever support separate registers, e.g., output into memory, we need to check for - // a copy here. - DCHECK(locations->Out().Equals(op1_loc)); - return; - } - - if (is_long) { - // Need to perform a subtract to get the sign right. - // op1 is already in the same location as the output. - Location output = locations->Out(); - Register output_lo = output.AsRegisterPairLow<Register>(); - Register output_hi = output.AsRegisterPairHigh<Register>(); - - Register op2_lo = op2_loc.AsRegisterPairLow<Register>(); - Register op2_hi = op2_loc.AsRegisterPairHigh<Register>(); - - // Spare register to compute the subtraction to set condition code. - Register temp = locations->GetTemp(0).AsRegister<Register>(); - - // Subtract off op2_low. - __ movl(temp, output_lo); - __ subl(temp, op2_lo); - - // Now use the same tempo and the borrow to finish the subtraction of op2_hi. - __ movl(temp, output_hi); - __ sbbl(temp, op2_hi); - - // Now the condition code is correct. - Condition cond = is_min ? Condition::kGreaterEqual : Condition::kLess; - __ cmovl(cond, output_lo, op2_lo); - __ cmovl(cond, output_hi, op2_hi); - } else { - Register out = locations->Out().AsRegister<Register>(); - Register op2 = op2_loc.AsRegister<Register>(); - - // (out := op1) - // out <=? op2 - // if out is min jmp done - // out := op2 - // done: - - __ cmpl(out, op2); - Condition cond = is_min ? Condition::kGreater : Condition::kLess; - __ cmovl(cond, out, op2); - } -} - -static void CreateIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - locations->SetOut(Location::SameAsFirstInput()); -} - -static void CreateLongLongToLongLocations(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - locations->SetOut(Location::SameAsFirstInput()); - // Register to use to perform a long subtract to set cc. - locations->AddTemp(Location::RequiresRegister()); -} - -void IntrinsicLocationsBuilderX86::VisitMathMinIntInt(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86::VisitMathMinIntInt(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ false, GetAssembler()); -} - -void IntrinsicLocationsBuilderX86::VisitMathMinLongLong(HInvoke* invoke) { - CreateLongLongToLongLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86::VisitMathMinLongLong(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ true, GetAssembler()); -} - -void IntrinsicLocationsBuilderX86::VisitMathMaxIntInt(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86::VisitMathMaxIntInt(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ false, GetAssembler()); -} - -void IntrinsicLocationsBuilderX86::VisitMathMaxLongLong(HInvoke* invoke) { - CreateLongLongToLongLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86::VisitMathMaxLongLong(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ true, GetAssembler()); -} - static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) { LocationSummary* locations = new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); @@ -976,6 +545,96 @@ static void GenFPToFPCall(HInvoke* invoke, CodeGeneratorX86* codegen, QuickEntry __ cfi().AdjustCFAOffset(-16); } +static void CreateLowestOneBitLocations(ArenaAllocator* allocator, bool is_long, HInvoke* invoke) { + LocationSummary* locations = + new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); + if (is_long) { + locations->SetInAt(0, Location::RequiresRegister()); + } else { + locations->SetInAt(0, Location::Any()); + } + locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); +} + +static void GenLowestOneBit(X86Assembler* assembler, + CodeGeneratorX86* codegen, + bool is_long, + HInvoke* invoke) { + LocationSummary* locations = invoke->GetLocations(); + Location src = locations->InAt(0); + Location out_loc = locations->Out(); + + if (invoke->InputAt(0)->IsConstant()) { + // Evaluate this at compile time. + int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant()); + if (value == 0) { + if (is_long) { + __ xorl(out_loc.AsRegisterPairLow<Register>(), out_loc.AsRegisterPairLow<Register>()); + __ xorl(out_loc.AsRegisterPairHigh<Register>(), out_loc.AsRegisterPairHigh<Register>()); + } else { + __ xorl(out_loc.AsRegister<Register>(), out_loc.AsRegister<Register>()); + } + return; + } + // Nonzero value. + value = is_long ? CTZ(static_cast<uint64_t>(value)) + : CTZ(static_cast<uint32_t>(value)); + if (is_long) { + if (value >= 32) { + int shift = value-32; + codegen->Load32BitValue(out_loc.AsRegisterPairLow<Register>(), 0); + codegen->Load32BitValue(out_loc.AsRegisterPairHigh<Register>(), 1 << shift); + } else { + codegen->Load32BitValue(out_loc.AsRegisterPairLow<Register>(), 1 << value); + codegen->Load32BitValue(out_loc.AsRegisterPairHigh<Register>(), 0); + } + } else { + codegen->Load32BitValue(out_loc.AsRegister<Register>(), 1 << value); + } + return; + } + // Handle non constant case + if (is_long) { + DCHECK(src.IsRegisterPair()); + Register src_lo = src.AsRegisterPairLow<Register>(); + Register src_hi = src.AsRegisterPairHigh<Register>(); + + Register out_lo = out_loc.AsRegisterPairLow<Register>(); + Register out_hi = out_loc.AsRegisterPairHigh<Register>(); + + __ movl(out_lo, src_lo); + __ movl(out_hi, src_hi); + + __ negl(out_lo); + __ adcl(out_hi, Immediate(0)); + __ negl(out_hi); + + __ andl(out_lo, src_lo); + __ andl(out_hi, src_hi); + } else { + if (codegen->GetInstructionSetFeatures().HasAVX2() && src.IsRegister()) { + Register out = out_loc.AsRegister<Register>(); + __ blsi(out, src.AsRegister<Register>()); + } else { + Register out = out_loc.AsRegister<Register>(); + // Do tmp & -tmp + if (src.IsRegister()) { + __ movl(out, src.AsRegister<Register>()); + } else { + DCHECK(src.IsStackSlot()); + __ movl(out, Address(ESP, src.GetStackIndex())); + } + __ negl(out); + + if (src.IsRegister()) { + __ andl(out, src.AsRegister<Register>()); + } else { + __ andl(out, Address(ESP, src.GetStackIndex())); + } + } + } +} + void IntrinsicLocationsBuilderX86::VisitMathCos(HInvoke* invoke) { CreateFPToFPCallLocations(allocator_, invoke); } @@ -1088,6 +747,21 @@ void IntrinsicCodeGeneratorX86::VisitMathTanh(HInvoke* invoke) { GenFPToFPCall(invoke, codegen_, kQuickTanh); } +void IntrinsicLocationsBuilderX86::VisitIntegerLowestOneBit(HInvoke* invoke) { + CreateLowestOneBitLocations(allocator_, /*is_long=*/ false, invoke); +} +void IntrinsicCodeGeneratorX86::VisitIntegerLowestOneBit(HInvoke* invoke) { + GenLowestOneBit(GetAssembler(), codegen_, /*is_long=*/ false, invoke); +} + +void IntrinsicLocationsBuilderX86::VisitLongLowestOneBit(HInvoke* invoke) { + CreateLowestOneBitLocations(allocator_, /*is_long=*/ true, invoke); +} + +void IntrinsicCodeGeneratorX86::VisitLongLowestOneBit(HInvoke* invoke) { + GenLowestOneBit(GetAssembler(), codegen_, /*is_long=*/ true, invoke); +} + static void CreateFPFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) { LocationSummary* locations = new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified); @@ -1353,13 +1027,6 @@ void IntrinsicCodeGeneratorX86::VisitStringCompareTo(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86::VisitStringEquals(HInvoke* invoke) { - if (kEmitCompilerReadBarrier && - !StringEqualsOptimizations(invoke).GetArgumentIsString() && - !StringEqualsOptimizations(invoke).GetNoReadBarrierForStringClass()) { - // No support for this odd case (String class is moveable, not in the boot image). - return; - } - LocationSummary* locations = new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresRegister()); @@ -1405,7 +1072,15 @@ void IntrinsicCodeGeneratorX86::VisitStringEquals(HInvoke* invoke) { // All string objects must have the same type since String cannot be subclassed. // Receiver must be a string object, so its class field is equal to all strings' class fields. // If the argument is a string object, its class field must be equal to receiver's class field. + // + // As the String class is expected to be non-movable, we can read the class + // field from String.equals' arguments without read barriers. + AssertNonMovableStringClass(); + // Also, because we use the loaded class references only to compare them, we + // don't need to unpoison them. + // /* HeapReference<Class> */ ecx = str->klass_ __ movl(ecx, Address(str, class_offset)); + // if (ecx != /* HeapReference<Class> */ arg->klass_) return false __ cmpl(ecx, Address(arg, class_offset)); __ j(kNotEqual, &return_false); } @@ -1650,19 +1325,19 @@ static void GenerateStringIndexOf(HInvoke* invoke, } void IntrinsicLocationsBuilderX86::VisitStringIndexOf(HInvoke* invoke) { - CreateStringIndexOfLocations(invoke, allocator_, /* start_at_zero */ true); + CreateStringIndexOfLocations(invoke, allocator_, /* start_at_zero= */ true); } void IntrinsicCodeGeneratorX86::VisitStringIndexOf(HInvoke* invoke) { - GenerateStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero */ true); + GenerateStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ true); } void IntrinsicLocationsBuilderX86::VisitStringIndexOfAfter(HInvoke* invoke) { - CreateStringIndexOfLocations(invoke, allocator_, /* start_at_zero */ false); + CreateStringIndexOfLocations(invoke, allocator_, /* start_at_zero= */ false); } void IntrinsicCodeGeneratorX86::VisitStringIndexOfAfter(HInvoke* invoke) { - GenerateStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero */ false); + GenerateStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ false); } void IntrinsicLocationsBuilderX86::VisitStringNewStringFromBytes(HInvoke* invoke) { @@ -2024,7 +1699,7 @@ static void GenUnsafeGet(HInvoke* invoke, if (kUseBakerReadBarrier) { Address src(base, offset, ScaleFactor::TIMES_1, 0); codegen->GenerateReferenceLoadWithBakerReadBarrier( - invoke, output_loc, base, src, /* needs_null_check */ false); + invoke, output_loc, base, src, /* needs_null_check= */ false); } else { __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0)); codegen->GenerateReadBarrierSlow( @@ -2095,45 +1770,45 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* allocator, void IntrinsicLocationsBuilderX86::VisitUnsafeGet(HInvoke* invoke) { CreateIntIntIntToIntLocations( - allocator_, invoke, DataType::Type::kInt32, /* is_volatile */ false); + allocator_, invoke, DataType::Type::kInt32, /* is_volatile= */ false); } void IntrinsicLocationsBuilderX86::VisitUnsafeGetVolatile(HInvoke* invoke) { - CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kInt32, /* is_volatile */ true); + CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kInt32, /* is_volatile= */ true); } void IntrinsicLocationsBuilderX86::VisitUnsafeGetLong(HInvoke* invoke) { CreateIntIntIntToIntLocations( - allocator_, invoke, DataType::Type::kInt64, /* is_volatile */ false); + allocator_, invoke, DataType::Type::kInt64, /* is_volatile= */ false); } void IntrinsicLocationsBuilderX86::VisitUnsafeGetLongVolatile(HInvoke* invoke) { - CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kInt64, /* is_volatile */ true); + CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kInt64, /* is_volatile= */ true); } void IntrinsicLocationsBuilderX86::VisitUnsafeGetObject(HInvoke* invoke) { CreateIntIntIntToIntLocations( - allocator_, invoke, DataType::Type::kReference, /* is_volatile */ false); + allocator_, invoke, DataType::Type::kReference, /* is_volatile= */ false); } void IntrinsicLocationsBuilderX86::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { CreateIntIntIntToIntLocations( - allocator_, invoke, DataType::Type::kReference, /* is_volatile */ true); + allocator_, invoke, DataType::Type::kReference, /* is_volatile= */ true); } void IntrinsicCodeGeneratorX86::VisitUnsafeGet(HInvoke* invoke) { - GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile */ false, codegen_); + GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile= */ false, codegen_); } void IntrinsicCodeGeneratorX86::VisitUnsafeGetVolatile(HInvoke* invoke) { - GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile */ true, codegen_); + GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile= */ true, codegen_); } void IntrinsicCodeGeneratorX86::VisitUnsafeGetLong(HInvoke* invoke) { - GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile */ false, codegen_); + GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile= */ false, codegen_); } void IntrinsicCodeGeneratorX86::VisitUnsafeGetLongVolatile(HInvoke* invoke) { - GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile */ true, codegen_); + GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile= */ true, codegen_); } void IntrinsicCodeGeneratorX86::VisitUnsafeGetObject(HInvoke* invoke) { - GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile */ false, codegen_); + GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile= */ false, codegen_); } void IntrinsicCodeGeneratorX86::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { - GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile */ true, codegen_); + GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile= */ true, codegen_); } @@ -2160,39 +1835,39 @@ static void CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator* allocator void IntrinsicLocationsBuilderX86::VisitUnsafePut(HInvoke* invoke) { CreateIntIntIntIntToVoidPlusTempsLocations( - allocator_, DataType::Type::kInt32, invoke, /* is_volatile */ false); + allocator_, DataType::Type::kInt32, invoke, /* is_volatile= */ false); } void IntrinsicLocationsBuilderX86::VisitUnsafePutOrdered(HInvoke* invoke) { CreateIntIntIntIntToVoidPlusTempsLocations( - allocator_, DataType::Type::kInt32, invoke, /* is_volatile */ false); + allocator_, DataType::Type::kInt32, invoke, /* is_volatile= */ false); } void IntrinsicLocationsBuilderX86::VisitUnsafePutVolatile(HInvoke* invoke) { CreateIntIntIntIntToVoidPlusTempsLocations( - allocator_, DataType::Type::kInt32, invoke, /* is_volatile */ true); + allocator_, DataType::Type::kInt32, invoke, /* is_volatile= */ true); } void IntrinsicLocationsBuilderX86::VisitUnsafePutObject(HInvoke* invoke) { CreateIntIntIntIntToVoidPlusTempsLocations( - allocator_, DataType::Type::kReference, invoke, /* is_volatile */ false); + allocator_, DataType::Type::kReference, invoke, /* is_volatile= */ false); } void IntrinsicLocationsBuilderX86::VisitUnsafePutObjectOrdered(HInvoke* invoke) { CreateIntIntIntIntToVoidPlusTempsLocations( - allocator_, DataType::Type::kReference, invoke, /* is_volatile */ false); + allocator_, DataType::Type::kReference, invoke, /* is_volatile= */ false); } void IntrinsicLocationsBuilderX86::VisitUnsafePutObjectVolatile(HInvoke* invoke) { CreateIntIntIntIntToVoidPlusTempsLocations( - allocator_, DataType::Type::kReference, invoke, /* is_volatile */ true); + allocator_, DataType::Type::kReference, invoke, /* is_volatile= */ true); } void IntrinsicLocationsBuilderX86::VisitUnsafePutLong(HInvoke* invoke) { CreateIntIntIntIntToVoidPlusTempsLocations( - allocator_, DataType::Type::kInt64, invoke, /* is_volatile */ false); + allocator_, DataType::Type::kInt64, invoke, /* is_volatile= */ false); } void IntrinsicLocationsBuilderX86::VisitUnsafePutLongOrdered(HInvoke* invoke) { CreateIntIntIntIntToVoidPlusTempsLocations( - allocator_, DataType::Type::kInt64, invoke, /* is_volatile */ false); + allocator_, DataType::Type::kInt64, invoke, /* is_volatile= */ false); } void IntrinsicLocationsBuilderX86::VisitUnsafePutLongVolatile(HInvoke* invoke) { CreateIntIntIntIntToVoidPlusTempsLocations( - allocator_, DataType::Type::kInt64, invoke, /* is_volatile */ true); + allocator_, DataType::Type::kInt64, invoke, /* is_volatile= */ true); } // We don't care for ordered: it requires an AnyStore barrier, which is already given by the x86 @@ -2244,34 +1919,34 @@ static void GenUnsafePut(LocationSummary* locations, } void IntrinsicCodeGeneratorX86::VisitUnsafePut(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /* is_volatile */ false, codegen_); + GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /* is_volatile= */ false, codegen_); } void IntrinsicCodeGeneratorX86::VisitUnsafePutOrdered(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /* is_volatile */ false, codegen_); + GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /* is_volatile= */ false, codegen_); } void IntrinsicCodeGeneratorX86::VisitUnsafePutVolatile(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /* is_volatile */ true, codegen_); + GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /* is_volatile= */ true, codegen_); } void IntrinsicCodeGeneratorX86::VisitUnsafePutObject(HInvoke* invoke) { GenUnsafePut( - invoke->GetLocations(), DataType::Type::kReference, /* is_volatile */ false, codegen_); + invoke->GetLocations(), DataType::Type::kReference, /* is_volatile= */ false, codegen_); } void IntrinsicCodeGeneratorX86::VisitUnsafePutObjectOrdered(HInvoke* invoke) { GenUnsafePut( - invoke->GetLocations(), DataType::Type::kReference, /* is_volatile */ false, codegen_); + invoke->GetLocations(), DataType::Type::kReference, /* is_volatile= */ false, codegen_); } void IntrinsicCodeGeneratorX86::VisitUnsafePutObjectVolatile(HInvoke* invoke) { GenUnsafePut( - invoke->GetLocations(), DataType::Type::kReference, /* is_volatile */ true, codegen_); + invoke->GetLocations(), DataType::Type::kReference, /* is_volatile= */ true, codegen_); } void IntrinsicCodeGeneratorX86::VisitUnsafePutLong(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /* is_volatile */ false, codegen_); + GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /* is_volatile= */ false, codegen_); } void IntrinsicCodeGeneratorX86::VisitUnsafePutLongOrdered(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /* is_volatile */ false, codegen_); + GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /* is_volatile= */ false, codegen_); } void IntrinsicCodeGeneratorX86::VisitUnsafePutLongVolatile(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /* is_volatile */ true, codegen_); + GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /* is_volatile= */ true, codegen_); } static void CreateIntIntIntIntIntToInt(ArenaAllocator* allocator, @@ -2368,8 +2043,8 @@ static void GenCAS(DataType::Type type, HInvoke* invoke, CodeGeneratorX86* codeg temp1_loc, // Unused, used only as a "temporary" within the read barrier. base, field_addr, - /* needs_null_check */ false, - /* always_update_field */ true, + /* needs_null_check= */ false, + /* always_update_field= */ true, &temp2); } @@ -2600,19 +2275,19 @@ static void GenBitCount(X86Assembler* assembler, } void IntrinsicLocationsBuilderX86::VisitIntegerBitCount(HInvoke* invoke) { - CreateBitCountLocations(allocator_, codegen_, invoke, /* is_long */ false); + CreateBitCountLocations(allocator_, codegen_, invoke, /* is_long= */ false); } void IntrinsicCodeGeneratorX86::VisitIntegerBitCount(HInvoke* invoke) { - GenBitCount(GetAssembler(), codegen_, invoke, /* is_long */ false); + GenBitCount(GetAssembler(), codegen_, invoke, /* is_long= */ false); } void IntrinsicLocationsBuilderX86::VisitLongBitCount(HInvoke* invoke) { - CreateBitCountLocations(allocator_, codegen_, invoke, /* is_long */ true); + CreateBitCountLocations(allocator_, codegen_, invoke, /* is_long= */ true); } void IntrinsicCodeGeneratorX86::VisitLongBitCount(HInvoke* invoke) { - GenBitCount(GetAssembler(), codegen_, invoke, /* is_long */ true); + GenBitCount(GetAssembler(), codegen_, invoke, /* is_long= */ true); } static void CreateLeadingZeroLocations(ArenaAllocator* allocator, HInvoke* invoke, bool is_long) { @@ -2704,19 +2379,19 @@ static void GenLeadingZeros(X86Assembler* assembler, } void IntrinsicLocationsBuilderX86::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) { - CreateLeadingZeroLocations(allocator_, invoke, /* is_long */ false); + CreateLeadingZeroLocations(allocator_, invoke, /* is_long= */ false); } void IntrinsicCodeGeneratorX86::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) { - GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long */ false); + GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ false); } void IntrinsicLocationsBuilderX86::VisitLongNumberOfLeadingZeros(HInvoke* invoke) { - CreateLeadingZeroLocations(allocator_, invoke, /* is_long */ true); + CreateLeadingZeroLocations(allocator_, invoke, /* is_long= */ true); } void IntrinsicCodeGeneratorX86::VisitLongNumberOfLeadingZeros(HInvoke* invoke) { - GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long */ true); + GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ true); } static void CreateTrailingZeroLocations(ArenaAllocator* allocator, HInvoke* invoke, bool is_long) { @@ -2795,19 +2470,19 @@ static void GenTrailingZeros(X86Assembler* assembler, } void IntrinsicLocationsBuilderX86::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) { - CreateTrailingZeroLocations(allocator_, invoke, /* is_long */ false); + CreateTrailingZeroLocations(allocator_, invoke, /* is_long= */ false); } void IntrinsicCodeGeneratorX86::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) { - GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long */ false); + GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ false); } void IntrinsicLocationsBuilderX86::VisitLongNumberOfTrailingZeros(HInvoke* invoke) { - CreateTrailingZeroLocations(allocator_, invoke, /* is_long */ true); + CreateTrailingZeroLocations(allocator_, invoke, /* is_long= */ true); } void IntrinsicCodeGeneratorX86::VisitLongNumberOfTrailingZeros(HInvoke* invoke) { - GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long */ true); + GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ true); } static bool IsSameInput(HInstruction* instruction, size_t input0, size_t input1) { @@ -3015,11 +2690,11 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { // /* HeapReference<Class> */ temp1 = src->klass_ codegen_->GenerateFieldLoadWithBakerReadBarrier( - invoke, temp1_loc, src, class_offset, /* needs_null_check */ false); + invoke, temp1_loc, src, class_offset, /* needs_null_check= */ false); // Bail out if the source is not a non primitive array. // /* HeapReference<Class> */ temp1 = temp1->component_type_ codegen_->GenerateFieldLoadWithBakerReadBarrier( - invoke, temp1_loc, temp1, component_offset, /* needs_null_check */ false); + invoke, temp1_loc, temp1, component_offset, /* needs_null_check= */ false); __ testl(temp1, temp1); __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); // If heap poisoning is enabled, `temp1` has been unpoisoned @@ -3052,7 +2727,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { // /* HeapReference<Class> */ temp1 = dest->klass_ codegen_->GenerateFieldLoadWithBakerReadBarrier( - invoke, temp1_loc, dest, class_offset, /* needs_null_check */ false); + invoke, temp1_loc, dest, class_offset, /* needs_null_check= */ false); if (!optimizations.GetDestinationIsNonPrimitiveArray()) { // Bail out if the destination is not a non primitive array. @@ -3064,7 +2739,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { // temporaries such a `temp1`. // /* HeapReference<Class> */ temp2 = temp1->component_type_ codegen_->GenerateFieldLoadWithBakerReadBarrier( - invoke, temp2_loc, temp1, component_offset, /* needs_null_check */ false); + invoke, temp2_loc, temp1, component_offset, /* needs_null_check= */ false); __ testl(temp2, temp2); __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); // If heap poisoning is enabled, `temp2` has been unpoisoned @@ -3077,7 +2752,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below. // /* HeapReference<Class> */ temp2 = src->klass_ codegen_->GenerateFieldLoadWithBakerReadBarrier( - invoke, temp2_loc, src, class_offset, /* needs_null_check */ false); + invoke, temp2_loc, src, class_offset, /* needs_null_check= */ false); // Note: if heap poisoning is on, we are comparing two unpoisoned references here. __ cmpl(temp1, temp2); @@ -3086,7 +2761,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { __ j(kEqual, &do_copy); // /* HeapReference<Class> */ temp1 = temp1->component_type_ codegen_->GenerateFieldLoadWithBakerReadBarrier( - invoke, temp1_loc, temp1, component_offset, /* needs_null_check */ false); + invoke, temp1_loc, temp1, component_offset, /* needs_null_check= */ false); // We do not need to emit a read barrier for the following // heap reference load, as `temp1` is only used in a // comparison with null below, and this reference is not @@ -3140,10 +2815,10 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { // /* HeapReference<Class> */ temp1 = src->klass_ codegen_->GenerateFieldLoadWithBakerReadBarrier( - invoke, temp1_loc, src, class_offset, /* needs_null_check */ false); + invoke, temp1_loc, src, class_offset, /* needs_null_check= */ false); // /* HeapReference<Class> */ temp1 = temp1->component_type_ codegen_->GenerateFieldLoadWithBakerReadBarrier( - invoke, temp1_loc, temp1, component_offset, /* needs_null_check */ false); + invoke, temp1_loc, temp1, component_offset, /* needs_null_check= */ false); __ testl(temp1, temp1); __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); // If heap poisoning is enabled, `temp1` has been unpoisoned @@ -3212,7 +2887,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { __ j(kEqual, &done); // Given the numeric representation, it's enough to check the low bit of the rb_state. - static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0"); + static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0"); static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte; constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte; @@ -3276,22 +2951,36 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { } // We only need one card marking on the destination array. - codegen_->MarkGCCard(temp1, temp2, dest, Register(kNoRegister), /* value_can_be_null */ false); + codegen_->MarkGCCard(temp1, temp2, dest, Register(kNoRegister), /* value_can_be_null= */ false); __ Bind(intrinsic_slow_path->GetExitLabel()); } void IntrinsicLocationsBuilderX86::VisitIntegerValueOf(HInvoke* invoke) { + DCHECK(invoke->IsInvokeStaticOrDirect()); InvokeRuntimeCallingConvention calling_convention; IntrinsicVisitor::ComputeIntegerValueOfLocations( invoke, codegen_, Location::RegisterLocation(EAX), Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + + LocationSummary* locations = invoke->GetLocations(); + if (locations != nullptr) { + HInvokeStaticOrDirect* invoke_static_or_direct = invoke->AsInvokeStaticOrDirect(); + if (invoke_static_or_direct->HasSpecialInput() && + invoke->InputAt(invoke_static_or_direct->GetSpecialInputIndex()) + ->IsX86ComputeBaseMethodAddress()) { + locations->SetInAt(invoke_static_or_direct->GetSpecialInputIndex(), + Location::RequiresRegister()); + } + } } void IntrinsicCodeGeneratorX86::VisitIntegerValueOf(HInvoke* invoke) { - IntrinsicVisitor::IntegerValueOfInfo info = IntrinsicVisitor::ComputeIntegerValueOfInfo(); + DCHECK(invoke->IsInvokeStaticOrDirect()); + IntrinsicVisitor::IntegerValueOfInfo info = + IntrinsicVisitor::ComputeIntegerValueOfInfo(invoke, codegen_->GetCompilerOptions()); LocationSummary* locations = invoke->GetLocations(); X86Assembler* assembler = GetAssembler(); @@ -3299,42 +2988,58 @@ void IntrinsicCodeGeneratorX86::VisitIntegerValueOf(HInvoke* invoke) { InvokeRuntimeCallingConvention calling_convention; if (invoke->InputAt(0)->IsConstant()) { int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue(); - if (value >= info.low && value <= info.high) { + if (static_cast<uint32_t>(value - info.low) < info.length) { // Just embed the j.l.Integer in the code. - ScopedObjectAccess soa(Thread::Current()); - mirror::Object* boxed = info.cache->Get(value + (-info.low)); - DCHECK(boxed != nullptr && Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(boxed)); - uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(boxed)); - __ movl(out, Immediate(address)); + DCHECK_NE(info.value_boot_image_reference, IntegerValueOfInfo::kInvalidReference); + codegen_->LoadBootImageAddress( + out, info.value_boot_image_reference, invoke->AsInvokeStaticOrDirect()); } else { + DCHECK(locations->CanCall()); // Allocate and initialize a new j.l.Integer. // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the // JIT object table. - uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer)); - __ movl(calling_convention.GetRegisterAt(0), Immediate(address)); - codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc()); - CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); + codegen_->AllocateInstanceForIntrinsic(invoke->AsInvokeStaticOrDirect(), + info.integer_boot_image_offset); __ movl(Address(out, info.value_offset), Immediate(value)); } } else { + DCHECK(locations->CanCall()); Register in = locations->InAt(0).AsRegister<Register>(); // Check bounds of our cache. __ leal(out, Address(in, -info.low)); - __ cmpl(out, Immediate(info.high - info.low + 1)); + __ cmpl(out, Immediate(info.length)); NearLabel allocate, done; __ j(kAboveEqual, &allocate); // If the value is within the bounds, load the j.l.Integer directly from the array. - uint32_t data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value(); - uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.cache)); - __ movl(out, Address(out, TIMES_4, data_offset + address)); + constexpr size_t kElementSize = sizeof(mirror::HeapReference<mirror::Object>); + static_assert((1u << TIMES_4) == sizeof(mirror::HeapReference<mirror::Object>), + "Check heap reference size."); + if (codegen_->GetCompilerOptions().IsBootImage()) { + DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u); + size_t method_address_index = invoke->AsInvokeStaticOrDirect()->GetSpecialInputIndex(); + HX86ComputeBaseMethodAddress* method_address = + invoke->InputAt(method_address_index)->AsX86ComputeBaseMethodAddress(); + DCHECK(method_address != nullptr); + Register method_address_reg = + invoke->GetLocations()->InAt(method_address_index).AsRegister<Register>(); + __ movl(out, Address(method_address_reg, out, TIMES_4, CodeGeneratorX86::kDummy32BitOffset)); + codegen_->RecordBootImageIntrinsicPatch(method_address, info.array_data_boot_image_reference); + } else { + // Note: We're about to clobber the index in `out`, so we need to use `in` and + // adjust the offset accordingly. + uint32_t mid_array_boot_image_offset = + info.array_data_boot_image_reference - info.low * kElementSize; + codegen_->LoadBootImageAddress( + out, mid_array_boot_image_offset, invoke->AsInvokeStaticOrDirect()); + DCHECK_NE(out, in); + __ movl(out, Address(out, in, TIMES_4, 0)); + } __ MaybeUnpoisonHeapReference(out); __ jmp(&done); __ Bind(&allocate); // Otherwise allocate and initialize a new j.l.Integer. - address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer)); - __ movl(calling_convention.GetRegisterAt(0), Immediate(address)); - codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc()); - CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); + codegen_->AllocateInstanceForIntrinsic(invoke->AsInvokeStaticOrDirect(), + info.integer_boot_image_offset); __ movl(Address(out, info.value_offset), in); __ Bind(&done); } @@ -3373,8 +3078,9 @@ UNIMPLEMENTED_INTRINSIC(X86, FloatIsInfinite) UNIMPLEMENTED_INTRINSIC(X86, DoubleIsInfinite) UNIMPLEMENTED_INTRINSIC(X86, IntegerHighestOneBit) UNIMPLEMENTED_INTRINSIC(X86, LongHighestOneBit) -UNIMPLEMENTED_INTRINSIC(X86, IntegerLowestOneBit) -UNIMPLEMENTED_INTRINSIC(X86, LongLowestOneBit) +UNIMPLEMENTED_INTRINSIC(X86, CRC32Update) +UNIMPLEMENTED_INTRINSIC(X86, CRC32UpdateBytes) +UNIMPLEMENTED_INTRINSIC(X86, CRC32UpdateByteBuffer) UNIMPLEMENTED_INTRINSIC(X86, StringStringIndexOf); UNIMPLEMENTED_INTRINSIC(X86, StringStringIndexOfAfter); diff --git a/compiler/optimizing/intrinsics_x86.h b/compiler/optimizing/intrinsics_x86.h index e3555e78fc..ae150dad43 100644 --- a/compiler/optimizing/intrinsics_x86.h +++ b/compiler/optimizing/intrinsics_x86.h @@ -30,14 +30,14 @@ namespace x86 { class CodeGeneratorX86; class X86Assembler; -class IntrinsicLocationsBuilderX86 FINAL : public IntrinsicVisitor { +class IntrinsicLocationsBuilderX86 final : public IntrinsicVisitor { public: explicit IntrinsicLocationsBuilderX86(CodeGeneratorX86* codegen); // Define visitor methods. #define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ - void Visit ## Name(HInvoke* invoke) OVERRIDE; + void Visit ## Name(HInvoke* invoke) override; #include "intrinsics_list.h" INTRINSICS_LIST(OPTIMIZING_INTRINSICS) #undef INTRINSICS_LIST @@ -55,14 +55,14 @@ class IntrinsicLocationsBuilderX86 FINAL : public IntrinsicVisitor { DISALLOW_COPY_AND_ASSIGN(IntrinsicLocationsBuilderX86); }; -class IntrinsicCodeGeneratorX86 FINAL : public IntrinsicVisitor { +class IntrinsicCodeGeneratorX86 final : public IntrinsicVisitor { public: explicit IntrinsicCodeGeneratorX86(CodeGeneratorX86* codegen) : codegen_(codegen) {} // Define visitor methods. #define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ - void Visit ## Name(HInvoke* invoke) OVERRIDE; + void Visit ## Name(HInvoke* invoke) override; #include "intrinsics_list.h" INTRINSICS_LIST(OPTIMIZING_INTRINSICS) #undef INTRINSICS_LIST diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc index 91a505ede1..e79c0c9adf 100644 --- a/compiler/optimizing/intrinsics_x86_64.cc +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -80,7 +80,7 @@ class ReadBarrierSystemArrayCopySlowPathX86_64 : public SlowPathCode { DCHECK(kUseBakerReadBarrier); } - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + void EmitNativeCode(CodeGenerator* codegen) override { CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); LocationSummary* locations = instruction_->GetLocations(); DCHECK(locations->CanCall()); @@ -118,7 +118,7 @@ class ReadBarrierSystemArrayCopySlowPathX86_64 : public SlowPathCode { __ jmp(GetExitLabel()); } - const char* GetDescription() const OVERRIDE { return "ReadBarrierSystemArrayCopySlowPathX86_64"; } + const char* GetDescription() const override { return "ReadBarrierSystemArrayCopySlowPathX86_64"; } private: DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathX86_64); @@ -162,10 +162,10 @@ void IntrinsicLocationsBuilderX86_64::VisitDoubleLongBitsToDouble(HInvoke* invok } void IntrinsicCodeGeneratorX86_64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) { - MoveFPToInt(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); + MoveFPToInt(invoke->GetLocations(), /* is64bit= */ true, GetAssembler()); } void IntrinsicCodeGeneratorX86_64::VisitDoubleLongBitsToDouble(HInvoke* invoke) { - MoveIntToFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); + MoveIntToFP(invoke->GetLocations(), /* is64bit= */ true, GetAssembler()); } void IntrinsicLocationsBuilderX86_64::VisitFloatFloatToRawIntBits(HInvoke* invoke) { @@ -176,10 +176,10 @@ void IntrinsicLocationsBuilderX86_64::VisitFloatIntBitsToFloat(HInvoke* invoke) } void IntrinsicCodeGeneratorX86_64::VisitFloatFloatToRawIntBits(HInvoke* invoke) { - MoveFPToInt(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); + MoveFPToInt(invoke->GetLocations(), /* is64bit= */ false, GetAssembler()); } void IntrinsicCodeGeneratorX86_64::VisitFloatIntBitsToFloat(HInvoke* invoke) { - MoveIntToFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); + MoveIntToFP(invoke->GetLocations(), /* is64bit= */ false, GetAssembler()); } static void CreateIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) { @@ -236,304 +236,6 @@ void IntrinsicCodeGeneratorX86_64::VisitShortReverseBytes(HInvoke* invoke) { GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler()); } - -// TODO: Consider Quick's way of doing Double abs through integer operations, as the immediate we -// need is 64b. - -static void CreateFloatToFloatPlusTemps(ArenaAllocator* allocator, HInvoke* invoke) { - // TODO: Enable memory operations when the assembler supports them. - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetOut(Location::SameAsFirstInput()); - locations->AddTemp(Location::RequiresFpuRegister()); // FP reg to hold mask. -} - -static void MathAbsFP(LocationSummary* locations, - bool is64bit, - X86_64Assembler* assembler, - CodeGeneratorX86_64* codegen) { - Location output = locations->Out(); - - DCHECK(output.IsFpuRegister()); - XmmRegister xmm_temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); - - // TODO: Can mask directly with constant area using pand if we can guarantee - // that the literal is aligned on a 16 byte boundary. This will avoid a - // temporary. - if (is64bit) { - __ movsd(xmm_temp, codegen->LiteralInt64Address(INT64_C(0x7FFFFFFFFFFFFFFF))); - __ andpd(output.AsFpuRegister<XmmRegister>(), xmm_temp); - } else { - __ movss(xmm_temp, codegen->LiteralInt32Address(INT32_C(0x7FFFFFFF))); - __ andps(output.AsFpuRegister<XmmRegister>(), xmm_temp); - } -} - -void IntrinsicLocationsBuilderX86_64::VisitMathAbsDouble(HInvoke* invoke) { - CreateFloatToFloatPlusTemps(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86_64::VisitMathAbsDouble(HInvoke* invoke) { - MathAbsFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler(), codegen_); -} - -void IntrinsicLocationsBuilderX86_64::VisitMathAbsFloat(HInvoke* invoke) { - CreateFloatToFloatPlusTemps(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86_64::VisitMathAbsFloat(HInvoke* invoke) { - MathAbsFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler(), codegen_); -} - -static void CreateIntToIntPlusTemp(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::SameAsFirstInput()); - locations->AddTemp(Location::RequiresRegister()); -} - -static void GenAbsInteger(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) { - Location output = locations->Out(); - CpuRegister out = output.AsRegister<CpuRegister>(); - CpuRegister mask = locations->GetTemp(0).AsRegister<CpuRegister>(); - - if (is64bit) { - // Create mask. - __ movq(mask, out); - __ sarq(mask, Immediate(63)); - // Add mask. - __ addq(out, mask); - __ xorq(out, mask); - } else { - // Create mask. - __ movl(mask, out); - __ sarl(mask, Immediate(31)); - // Add mask. - __ addl(out, mask); - __ xorl(out, mask); - } -} - -void IntrinsicLocationsBuilderX86_64::VisitMathAbsInt(HInvoke* invoke) { - CreateIntToIntPlusTemp(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86_64::VisitMathAbsInt(HInvoke* invoke) { - GenAbsInteger(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); -} - -void IntrinsicLocationsBuilderX86_64::VisitMathAbsLong(HInvoke* invoke) { - CreateIntToIntPlusTemp(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86_64::VisitMathAbsLong(HInvoke* invoke) { - GenAbsInteger(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); -} - -static void GenMinMaxFP(LocationSummary* locations, - bool is_min, - bool is_double, - X86_64Assembler* assembler, - CodeGeneratorX86_64* codegen) { - Location op1_loc = locations->InAt(0); - Location op2_loc = locations->InAt(1); - Location out_loc = locations->Out(); - XmmRegister out = out_loc.AsFpuRegister<XmmRegister>(); - - // Shortcut for same input locations. - if (op1_loc.Equals(op2_loc)) { - DCHECK(out_loc.Equals(op1_loc)); - return; - } - - // (out := op1) - // out <=? op2 - // if Nan jmp Nan_label - // if out is min jmp done - // if op2 is min jmp op2_label - // handle -0/+0 - // jmp done - // Nan_label: - // out := NaN - // op2_label: - // out := op2 - // done: - // - // This removes one jmp, but needs to copy one input (op1) to out. - // - // TODO: This is straight from Quick. Make NaN an out-of-line slowpath? - - XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>(); - - NearLabel nan, done, op2_label; - if (is_double) { - __ ucomisd(out, op2); - } else { - __ ucomiss(out, op2); - } - - __ j(Condition::kParityEven, &nan); - - __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label); - __ j(is_min ? Condition::kBelow : Condition::kAbove, &done); - - // Handle 0.0/-0.0. - if (is_min) { - if (is_double) { - __ orpd(out, op2); - } else { - __ orps(out, op2); - } - } else { - if (is_double) { - __ andpd(out, op2); - } else { - __ andps(out, op2); - } - } - __ jmp(&done); - - // NaN handling. - __ Bind(&nan); - if (is_double) { - __ movsd(out, codegen->LiteralInt64Address(INT64_C(0x7FF8000000000000))); - } else { - __ movss(out, codegen->LiteralInt32Address(INT32_C(0x7FC00000))); - } - __ jmp(&done); - - // out := op2; - __ Bind(&op2_label); - if (is_double) { - __ movsd(out, op2); - } else { - __ movss(out, op2); - } - - // Done. - __ Bind(&done); -} - -static void CreateFPFPToFP(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::RequiresFpuRegister()); - // The following is sub-optimal, but all we can do for now. It would be fine to also accept - // the second input to be the output (we can simply swap inputs). - locations->SetOut(Location::SameAsFirstInput()); -} - -void IntrinsicLocationsBuilderX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) { - CreateFPFPToFP(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) { - GenMinMaxFP( - invoke->GetLocations(), /* is_min */ true, /* is_double */ true, GetAssembler(), codegen_); -} - -void IntrinsicLocationsBuilderX86_64::VisitMathMinFloatFloat(HInvoke* invoke) { - CreateFPFPToFP(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86_64::VisitMathMinFloatFloat(HInvoke* invoke) { - GenMinMaxFP( - invoke->GetLocations(), /* is_min */ true, /* is_double */ false, GetAssembler(), codegen_); -} - -void IntrinsicLocationsBuilderX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) { - CreateFPFPToFP(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) { - GenMinMaxFP( - invoke->GetLocations(), /* is_min */ false, /* is_double */ true, GetAssembler(), codegen_); -} - -void IntrinsicLocationsBuilderX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) { - CreateFPFPToFP(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) { - GenMinMaxFP( - invoke->GetLocations(), /* is_min */ false, /* is_double */ false, GetAssembler(), codegen_); -} - -static void GenMinMax(LocationSummary* locations, bool is_min, bool is_long, - X86_64Assembler* assembler) { - Location op1_loc = locations->InAt(0); - Location op2_loc = locations->InAt(1); - - // Shortcut for same input locations. - if (op1_loc.Equals(op2_loc)) { - // Can return immediately, as op1_loc == out_loc. - // Note: if we ever support separate registers, e.g., output into memory, we need to check for - // a copy here. - DCHECK(locations->Out().Equals(op1_loc)); - return; - } - - CpuRegister out = locations->Out().AsRegister<CpuRegister>(); - CpuRegister op2 = op2_loc.AsRegister<CpuRegister>(); - - // (out := op1) - // out <=? op2 - // if out is min jmp done - // out := op2 - // done: - - if (is_long) { - __ cmpq(out, op2); - } else { - __ cmpl(out, op2); - } - - __ cmov(is_min ? Condition::kGreater : Condition::kLess, out, op2, is_long); -} - -static void CreateIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) { - LocationSummary* locations = - new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - locations->SetOut(Location::SameAsFirstInput()); -} - -void IntrinsicLocationsBuilderX86_64::VisitMathMinIntInt(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86_64::VisitMathMinIntInt(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ false, GetAssembler()); -} - -void IntrinsicLocationsBuilderX86_64::VisitMathMinLongLong(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86_64::VisitMathMinLongLong(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ true, GetAssembler()); -} - -void IntrinsicLocationsBuilderX86_64::VisitMathMaxIntInt(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86_64::VisitMathMaxIntInt(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ false, GetAssembler()); -} - -void IntrinsicLocationsBuilderX86_64::VisitMathMaxLongLong(HInvoke* invoke) { - CreateIntIntToIntLocations(allocator_, invoke); -} - -void IntrinsicCodeGeneratorX86_64::VisitMathMaxLongLong(HInvoke* invoke) { - GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ true, GetAssembler()); -} - static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) { LocationSummary* locations = new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); @@ -728,12 +430,12 @@ void IntrinsicCodeGeneratorX86_64::VisitMathRoundDouble(HInvoke* invoke) { // direct x86 instruction, since NaN should map to 0 and large positive // values need to be clipped to the extreme value. codegen_->Load64BitValue(out, kPrimLongMax); - __ cvtsi2sd(t2, out, /* is64bit */ true); + __ cvtsi2sd(t2, out, /* is64bit= */ true); __ comisd(t1, t2); __ j(kAboveEqual, &done); // clipped to max (already in out), does not jump on unordered __ movl(out, Immediate(0)); // does not change flags, implicit zero extension to 64-bit __ j(kUnordered, &done); // NaN mapped to 0 (just moved in out) - __ cvttsd2si(out, t1, /* is64bit */ true); + __ cvttsd2si(out, t1, /* is64bit= */ true); __ Bind(&done); } @@ -1277,7 +979,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { // /* HeapReference<Class> */ temp1 = dest->klass_ codegen_->GenerateFieldLoadWithBakerReadBarrier( - invoke, temp1_loc, dest, class_offset, /* needs_null_check */ false); + invoke, temp1_loc, dest, class_offset, /* needs_null_check= */ false); // Register `temp1` is not trashed by the read barrier emitted // by GenerateFieldLoadWithBakerReadBarrier below, as that // method produces a call to a ReadBarrierMarkRegX entry point, @@ -1285,7 +987,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { // temporaries such a `temp1`. // /* HeapReference<Class> */ temp2 = src->klass_ codegen_->GenerateFieldLoadWithBakerReadBarrier( - invoke, temp2_loc, src, class_offset, /* needs_null_check */ false); + invoke, temp2_loc, src, class_offset, /* needs_null_check= */ false); // If heap poisoning is enabled, `temp1` and `temp2` have been // unpoisoned by the the previous calls to // GenerateFieldLoadWithBakerReadBarrier. @@ -1309,7 +1011,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { // /* HeapReference<Class> */ TMP = temp1->component_type_ codegen_->GenerateFieldLoadWithBakerReadBarrier( - invoke, TMP_loc, temp1, component_offset, /* needs_null_check */ false); + invoke, TMP_loc, temp1, component_offset, /* needs_null_check= */ false); __ testl(CpuRegister(TMP), CpuRegister(TMP)); __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); // If heap poisoning is enabled, `TMP` has been unpoisoned by @@ -1332,7 +1034,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below. // /* HeapReference<Class> */ TMP = temp2->component_type_ codegen_->GenerateFieldLoadWithBakerReadBarrier( - invoke, TMP_loc, temp2, component_offset, /* needs_null_check */ false); + invoke, TMP_loc, temp2, component_offset, /* needs_null_check= */ false); __ testl(CpuRegister(TMP), CpuRegister(TMP)); __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); // If heap poisoning is enabled, `TMP` has been unpoisoned by @@ -1356,7 +1058,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { // /* HeapReference<Class> */ temp1 = temp1->component_type_ codegen_->GenerateFieldLoadWithBakerReadBarrier( - invoke, temp1_loc, temp1, component_offset, /* needs_null_check */ false); + invoke, temp1_loc, temp1, component_offset, /* needs_null_check= */ false); // We do not need to emit a read barrier for the following // heap reference load, as `temp1` is only used in a // comparison with null below, and this reference is not @@ -1384,10 +1086,10 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { // /* HeapReference<Class> */ temp1 = src->klass_ codegen_->GenerateFieldLoadWithBakerReadBarrier( - invoke, temp1_loc, src, class_offset, /* needs_null_check */ false); + invoke, temp1_loc, src, class_offset, /* needs_null_check= */ false); // /* HeapReference<Class> */ TMP = temp1->component_type_ codegen_->GenerateFieldLoadWithBakerReadBarrier( - invoke, TMP_loc, temp1, component_offset, /* needs_null_check */ false); + invoke, TMP_loc, temp1, component_offset, /* needs_null_check= */ false); __ testl(CpuRegister(TMP), CpuRegister(TMP)); __ j(kEqual, intrinsic_slow_path->GetEntryLabel()); } else { @@ -1441,7 +1143,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { __ j(kEqual, &done); // Given the numeric representation, it's enough to check the low bit of the rb_state. - static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0"); + static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0"); static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte; constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte; @@ -1496,7 +1198,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { } // We only need one card marking on the destination array. - codegen_->MarkGCCard(temp1, temp2, dest, CpuRegister(kNoRegister), /* value_can_be_null */ false); + codegen_->MarkGCCard(temp1, temp2, dest, CpuRegister(kNoRegister), /* value_can_be_null= */ false); __ Bind(intrinsic_slow_path->GetExitLabel()); } @@ -1528,13 +1230,6 @@ void IntrinsicCodeGeneratorX86_64::VisitStringCompareTo(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86_64::VisitStringEquals(HInvoke* invoke) { - if (kEmitCompilerReadBarrier && - !StringEqualsOptimizations(invoke).GetArgumentIsString() && - !StringEqualsOptimizations(invoke).GetNoReadBarrierForStringClass()) { - // No support for this odd case (String class is moveable, not in the boot image). - return; - } - LocationSummary* locations = new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresRegister()); @@ -1580,7 +1275,15 @@ void IntrinsicCodeGeneratorX86_64::VisitStringEquals(HInvoke* invoke) { // All string objects must have the same type since String cannot be subclassed. // Receiver must be a string object, so its class field is equal to all strings' class fields. // If the argument is a string object, its class field must be equal to receiver's class field. + // + // As the String class is expected to be non-movable, we can read the class + // field from String.equals' arguments without read barriers. + AssertNonMovableStringClass(); + // Also, because we use the loaded class references only to compare them, we + // don't need to unpoison them. + // /* HeapReference<Class> */ rcx = str->klass_ __ movl(rcx, Address(str, class_offset)); + // if (rcx != /* HeapReference<Class> */ arg->klass_) return false __ cmpl(rcx, Address(arg, class_offset)); __ j(kNotEqual, &return_false); } @@ -1749,7 +1452,7 @@ static void GenerateStringIndexOf(HInvoke* invoke, // Ensure we have a start index >= 0; __ xorl(counter, counter); __ cmpl(start_index, Immediate(0)); - __ cmov(kGreater, counter, start_index, /* is64bit */ false); // 32-bit copy is enough. + __ cmov(kGreater, counter, start_index, /* is64bit= */ false); // 32-bit copy is enough. if (mirror::kUseStringCompression) { NearLabel modify_counter, offset_uncompressed_label; @@ -1811,19 +1514,19 @@ static void GenerateStringIndexOf(HInvoke* invoke, } void IntrinsicLocationsBuilderX86_64::VisitStringIndexOf(HInvoke* invoke) { - CreateStringIndexOfLocations(invoke, allocator_, /* start_at_zero */ true); + CreateStringIndexOfLocations(invoke, allocator_, /* start_at_zero= */ true); } void IntrinsicCodeGeneratorX86_64::VisitStringIndexOf(HInvoke* invoke) { - GenerateStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero */ true); + GenerateStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ true); } void IntrinsicLocationsBuilderX86_64::VisitStringIndexOfAfter(HInvoke* invoke) { - CreateStringIndexOfLocations(invoke, allocator_, /* start_at_zero */ false); + CreateStringIndexOfLocations(invoke, allocator_, /* start_at_zero= */ false); } void IntrinsicCodeGeneratorX86_64::VisitStringIndexOfAfter(HInvoke* invoke) { - GenerateStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero */ false); + GenerateStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ false); } void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromBytes(HInvoke* invoke) { @@ -2137,7 +1840,7 @@ void IntrinsicLocationsBuilderX86_64::VisitThreadCurrentThread(HInvoke* invoke) void IntrinsicCodeGeneratorX86_64::VisitThreadCurrentThread(HInvoke* invoke) { CpuRegister out = invoke->GetLocations()->Out().AsRegister<CpuRegister>(); GetAssembler()->gs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86_64PointerSize>(), - /* no_rip */ true)); + /* no_rip= */ true)); } static void GenUnsafeGet(HInvoke* invoke, @@ -2163,7 +1866,7 @@ static void GenUnsafeGet(HInvoke* invoke, if (kUseBakerReadBarrier) { Address src(base, offset, ScaleFactor::TIMES_1, 0); codegen->GenerateReferenceLoadWithBakerReadBarrier( - invoke, output_loc, base, src, /* needs_null_check */ false); + invoke, output_loc, base, src, /* needs_null_check= */ false); } else { __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0)); codegen->GenerateReadBarrierSlow( @@ -2227,22 +1930,22 @@ void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invo void IntrinsicCodeGeneratorX86_64::VisitUnsafeGet(HInvoke* invoke) { - GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile */ false, codegen_); + GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile= */ false, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) { - GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile */ true, codegen_); + GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile= */ true, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLong(HInvoke* invoke) { - GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile */ false, codegen_); + GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile= */ false, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) { - GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile */ true, codegen_); + GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile= */ true, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetObject(HInvoke* invoke) { - GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile */ false, codegen_); + GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile= */ false, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { - GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile */ true, codegen_); + GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile= */ true, codegen_); } @@ -2325,34 +2028,34 @@ static void GenUnsafePut(LocationSummary* locations, DataType::Type type, bool i } void IntrinsicCodeGeneratorX86_64::VisitUnsafePut(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /* is_volatile */ false, codegen_); + GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /* is_volatile= */ false, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitUnsafePutOrdered(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /* is_volatile */ false, codegen_); + GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /* is_volatile= */ false, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitUnsafePutVolatile(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /* is_volatile */ true, codegen_); + GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /* is_volatile= */ true, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObject(HInvoke* invoke) { GenUnsafePut( - invoke->GetLocations(), DataType::Type::kReference, /* is_volatile */ false, codegen_); + invoke->GetLocations(), DataType::Type::kReference, /* is_volatile= */ false, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObjectOrdered(HInvoke* invoke) { GenUnsafePut( - invoke->GetLocations(), DataType::Type::kReference, /* is_volatile */ false, codegen_); + invoke->GetLocations(), DataType::Type::kReference, /* is_volatile= */ false, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObjectVolatile(HInvoke* invoke) { GenUnsafePut( - invoke->GetLocations(), DataType::Type::kReference, /* is_volatile */ true, codegen_); + invoke->GetLocations(), DataType::Type::kReference, /* is_volatile= */ true, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLong(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /* is_volatile */ false, codegen_); + GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /* is_volatile= */ false, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongOrdered(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /* is_volatile */ false, codegen_); + GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /* is_volatile= */ false, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /* is_volatile */ true, codegen_); + GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /* is_volatile= */ true, codegen_); } static void CreateIntIntIntIntIntToInt(ArenaAllocator* allocator, @@ -2437,8 +2140,8 @@ static void GenCAS(DataType::Type type, HInvoke* invoke, CodeGeneratorX86_64* co out_loc, // Unused, used only as a "temporary" within the read barrier. base, field_addr, - /* needs_null_check */ false, - /* always_update_field */ true, + /* needs_null_check= */ false, + /* always_update_field= */ true, &temp1, &temp2); } @@ -2666,7 +2369,7 @@ void IntrinsicLocationsBuilderX86_64::VisitIntegerBitCount(HInvoke* invoke) { } void IntrinsicCodeGeneratorX86_64::VisitIntegerBitCount(HInvoke* invoke) { - GenBitCount(GetAssembler(), codegen_, invoke, /* is_long */ false); + GenBitCount(GetAssembler(), codegen_, invoke, /* is_long= */ false); } void IntrinsicLocationsBuilderX86_64::VisitLongBitCount(HInvoke* invoke) { @@ -2674,7 +2377,7 @@ void IntrinsicLocationsBuilderX86_64::VisitLongBitCount(HInvoke* invoke) { } void IntrinsicCodeGeneratorX86_64::VisitLongBitCount(HInvoke* invoke) { - GenBitCount(GetAssembler(), codegen_, invoke, /* is_long */ true); + GenBitCount(GetAssembler(), codegen_, invoke, /* is_long= */ true); } static void CreateOneBitLocations(ArenaAllocator* allocator, HInvoke* invoke, bool is_high) { @@ -2718,93 +2421,98 @@ static void GenOneBit(X86_64Assembler* assembler, } // Handle the non-constant cases. - CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>(); - if (is_high) { - // Use architectural support: basically 1 << bsr. - if (src.IsRegister()) { + if (!is_high && codegen->GetInstructionSetFeatures().HasAVX2() && + src.IsRegister()) { + __ blsi(out, src.AsRegister<CpuRegister>()); + } else { + CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>(); + if (is_high) { + // Use architectural support: basically 1 << bsr. + if (src.IsRegister()) { + if (is_long) { + __ bsrq(tmp, src.AsRegister<CpuRegister>()); + } else { + __ bsrl(tmp, src.AsRegister<CpuRegister>()); + } + } else if (is_long) { + DCHECK(src.IsDoubleStackSlot()); + __ bsrq(tmp, Address(CpuRegister(RSP), src.GetStackIndex())); + } else { + DCHECK(src.IsStackSlot()); + __ bsrl(tmp, Address(CpuRegister(RSP), src.GetStackIndex())); + } + // BSR sets ZF if the input was zero. + NearLabel is_zero, done; + __ j(kEqual, &is_zero); + __ movl(out, Immediate(1)); // Clears upper bits too. if (is_long) { - __ bsrq(tmp, src.AsRegister<CpuRegister>()); + __ shlq(out, tmp); } else { - __ bsrl(tmp, src.AsRegister<CpuRegister>()); + __ shll(out, tmp); } - } else if (is_long) { - DCHECK(src.IsDoubleStackSlot()); - __ bsrq(tmp, Address(CpuRegister(RSP), src.GetStackIndex())); - } else { - DCHECK(src.IsStackSlot()); - __ bsrl(tmp, Address(CpuRegister(RSP), src.GetStackIndex())); - } - // BSR sets ZF if the input was zero. - NearLabel is_zero, done; - __ j(kEqual, &is_zero); - __ movl(out, Immediate(1)); // Clears upper bits too. - if (is_long) { - __ shlq(out, tmp); - } else { - __ shll(out, tmp); - } - __ jmp(&done); - __ Bind(&is_zero); - __ xorl(out, out); // Clears upper bits too. - __ Bind(&done); - } else { - // Copy input into temporary. - if (src.IsRegister()) { + __ jmp(&done); + __ Bind(&is_zero); + __ xorl(out, out); // Clears upper bits too. + __ Bind(&done); + } else { + // Copy input into temporary. + if (src.IsRegister()) { + if (is_long) { + __ movq(tmp, src.AsRegister<CpuRegister>()); + } else { + __ movl(tmp, src.AsRegister<CpuRegister>()); + } + } else if (is_long) { + DCHECK(src.IsDoubleStackSlot()); + __ movq(tmp, Address(CpuRegister(RSP), src.GetStackIndex())); + } else { + DCHECK(src.IsStackSlot()); + __ movl(tmp, Address(CpuRegister(RSP), src.GetStackIndex())); + } + // Do the bit twiddling: basically tmp & -tmp; if (is_long) { - __ movq(tmp, src.AsRegister<CpuRegister>()); + __ movq(out, tmp); + __ negq(tmp); + __ andq(out, tmp); } else { - __ movl(tmp, src.AsRegister<CpuRegister>()); + __ movl(out, tmp); + __ negl(tmp); + __ andl(out, tmp); } - } else if (is_long) { - DCHECK(src.IsDoubleStackSlot()); - __ movq(tmp, Address(CpuRegister(RSP), src.GetStackIndex())); - } else { - DCHECK(src.IsStackSlot()); - __ movl(tmp, Address(CpuRegister(RSP), src.GetStackIndex())); - } - // Do the bit twiddling: basically tmp & -tmp; - if (is_long) { - __ movq(out, tmp); - __ negq(tmp); - __ andq(out, tmp); - } else { - __ movl(out, tmp); - __ negl(tmp); - __ andl(out, tmp); } } } void IntrinsicLocationsBuilderX86_64::VisitIntegerHighestOneBit(HInvoke* invoke) { - CreateOneBitLocations(allocator_, invoke, /* is_high */ true); + CreateOneBitLocations(allocator_, invoke, /* is_high= */ true); } void IntrinsicCodeGeneratorX86_64::VisitIntegerHighestOneBit(HInvoke* invoke) { - GenOneBit(GetAssembler(), codegen_, invoke, /* is_high */ true, /* is_long */ false); + GenOneBit(GetAssembler(), codegen_, invoke, /* is_high= */ true, /* is_long= */ false); } void IntrinsicLocationsBuilderX86_64::VisitLongHighestOneBit(HInvoke* invoke) { - CreateOneBitLocations(allocator_, invoke, /* is_high */ true); + CreateOneBitLocations(allocator_, invoke, /* is_high= */ true); } void IntrinsicCodeGeneratorX86_64::VisitLongHighestOneBit(HInvoke* invoke) { - GenOneBit(GetAssembler(), codegen_, invoke, /* is_high */ true, /* is_long */ true); + GenOneBit(GetAssembler(), codegen_, invoke, /* is_high= */ true, /* is_long= */ true); } void IntrinsicLocationsBuilderX86_64::VisitIntegerLowestOneBit(HInvoke* invoke) { - CreateOneBitLocations(allocator_, invoke, /* is_high */ false); + CreateOneBitLocations(allocator_, invoke, /* is_high= */ false); } void IntrinsicCodeGeneratorX86_64::VisitIntegerLowestOneBit(HInvoke* invoke) { - GenOneBit(GetAssembler(), codegen_, invoke, /* is_high */ false, /* is_long */ false); + GenOneBit(GetAssembler(), codegen_, invoke, /* is_high= */ false, /* is_long= */ false); } void IntrinsicLocationsBuilderX86_64::VisitLongLowestOneBit(HInvoke* invoke) { - CreateOneBitLocations(allocator_, invoke, /* is_high */ false); + CreateOneBitLocations(allocator_, invoke, /* is_high= */ false); } void IntrinsicCodeGeneratorX86_64::VisitLongLowestOneBit(HInvoke* invoke) { - GenOneBit(GetAssembler(), codegen_, invoke, /* is_high */ false, /* is_long */ true); + GenOneBit(GetAssembler(), codegen_, invoke, /* is_high= */ false, /* is_long= */ true); } static void CreateLeadingZeroLocations(ArenaAllocator* allocator, HInvoke* invoke) { @@ -2869,7 +2577,7 @@ void IntrinsicLocationsBuilderX86_64::VisitIntegerNumberOfLeadingZeros(HInvoke* } void IntrinsicCodeGeneratorX86_64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) { - GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long */ false); + GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ false); } void IntrinsicLocationsBuilderX86_64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) { @@ -2877,7 +2585,7 @@ void IntrinsicLocationsBuilderX86_64::VisitLongNumberOfLeadingZeros(HInvoke* inv } void IntrinsicCodeGeneratorX86_64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) { - GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long */ true); + GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ true); } static void CreateTrailingZeroLocations(ArenaAllocator* allocator, HInvoke* invoke) { @@ -2937,7 +2645,7 @@ void IntrinsicLocationsBuilderX86_64::VisitIntegerNumberOfTrailingZeros(HInvoke* } void IntrinsicCodeGeneratorX86_64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) { - GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long */ false); + GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ false); } void IntrinsicLocationsBuilderX86_64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) { @@ -2945,7 +2653,7 @@ void IntrinsicLocationsBuilderX86_64::VisitLongNumberOfTrailingZeros(HInvoke* in } void IntrinsicCodeGeneratorX86_64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) { - GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long */ true); + GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ true); } void IntrinsicLocationsBuilderX86_64::VisitIntegerValueOf(HInvoke* invoke) { @@ -2958,58 +2666,49 @@ void IntrinsicLocationsBuilderX86_64::VisitIntegerValueOf(HInvoke* invoke) { } void IntrinsicCodeGeneratorX86_64::VisitIntegerValueOf(HInvoke* invoke) { - IntrinsicVisitor::IntegerValueOfInfo info = IntrinsicVisitor::ComputeIntegerValueOfInfo(); + IntrinsicVisitor::IntegerValueOfInfo info = + IntrinsicVisitor::ComputeIntegerValueOfInfo(invoke, codegen_->GetCompilerOptions()); LocationSummary* locations = invoke->GetLocations(); X86_64Assembler* assembler = GetAssembler(); CpuRegister out = locations->Out().AsRegister<CpuRegister>(); InvokeRuntimeCallingConvention calling_convention; - if (invoke->InputAt(0)->IsConstant()) { + CpuRegister argument = CpuRegister(calling_convention.GetRegisterAt(0)); + if (invoke->InputAt(0)->IsIntConstant()) { int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue(); - if (value >= info.low && value <= info.high) { + if (static_cast<uint32_t>(value - info.low) < info.length) { // Just embed the j.l.Integer in the code. - ScopedObjectAccess soa(Thread::Current()); - mirror::Object* boxed = info.cache->Get(value + (-info.low)); - DCHECK(boxed != nullptr && Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(boxed)); - uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(boxed)); - __ movl(out, Immediate(static_cast<int32_t>(address))); + DCHECK_NE(info.value_boot_image_reference, IntegerValueOfInfo::kInvalidReference); + codegen_->LoadBootImageAddress(out, info.value_boot_image_reference); } else { + DCHECK(locations->CanCall()); // Allocate and initialize a new j.l.Integer. // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the // JIT object table. - CpuRegister argument = CpuRegister(calling_convention.GetRegisterAt(0)); - uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer)); - __ movl(argument, Immediate(static_cast<int32_t>(address))); - codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc()); - CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); + codegen_->AllocateInstanceForIntrinsic(invoke->AsInvokeStaticOrDirect(), + info.integer_boot_image_offset); __ movl(Address(out, info.value_offset), Immediate(value)); } } else { + DCHECK(locations->CanCall()); CpuRegister in = locations->InAt(0).AsRegister<CpuRegister>(); // Check bounds of our cache. __ leal(out, Address(in, -info.low)); - __ cmpl(out, Immediate(info.high - info.low + 1)); + __ cmpl(out, Immediate(info.length)); NearLabel allocate, done; __ j(kAboveEqual, &allocate); // If the value is within the bounds, load the j.l.Integer directly from the array. - uint32_t data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value(); - uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.cache)); - if (data_offset + address <= std::numeric_limits<int32_t>::max()) { - __ movl(out, Address(out, TIMES_4, data_offset + address)); - } else { - CpuRegister temp = CpuRegister(calling_convention.GetRegisterAt(0)); - __ movl(temp, Immediate(static_cast<int32_t>(data_offset + address))); - __ movl(out, Address(temp, out, TIMES_4, 0)); - } + DCHECK_NE(out.AsRegister(), argument.AsRegister()); + codegen_->LoadBootImageAddress(argument, info.array_data_boot_image_reference); + static_assert((1u << TIMES_4) == sizeof(mirror::HeapReference<mirror::Object>), + "Check heap reference size."); + __ movl(out, Address(argument, out, TIMES_4, 0)); __ MaybeUnpoisonHeapReference(out); __ jmp(&done); __ Bind(&allocate); // Otherwise allocate and initialize a new j.l.Integer. - CpuRegister argument = CpuRegister(calling_convention.GetRegisterAt(0)); - address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer)); - __ movl(argument, Immediate(static_cast<int32_t>(address))); - codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc()); - CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); + codegen_->AllocateInstanceForIntrinsic(invoke->AsInvokeStaticOrDirect(), + info.integer_boot_image_offset); __ movl(Address(out, info.value_offset), in); __ Bind(&done); } @@ -3025,7 +2724,7 @@ void IntrinsicCodeGeneratorX86_64::VisitThreadInterrupted(HInvoke* invoke) { X86_64Assembler* assembler = GetAssembler(); CpuRegister out = invoke->GetLocations()->Out().AsRegister<CpuRegister>(); Address address = Address::Absolute - (Thread::InterruptedOffset<kX86_64PointerSize>().Int32Value(), /* no_rip */ true); + (Thread::InterruptedOffset<kX86_64PointerSize>().Int32Value(), /* no_rip= */ true); NearLabel done; __ gs()->movl(out, address); __ testl(out, out); @@ -3046,6 +2745,9 @@ void IntrinsicCodeGeneratorX86_64::VisitReachabilityFence(HInvoke* invoke ATTRIB UNIMPLEMENTED_INTRINSIC(X86_64, ReferenceGetReferent) UNIMPLEMENTED_INTRINSIC(X86_64, FloatIsInfinite) UNIMPLEMENTED_INTRINSIC(X86_64, DoubleIsInfinite) +UNIMPLEMENTED_INTRINSIC(X86_64, CRC32Update) +UNIMPLEMENTED_INTRINSIC(X86_64, CRC32UpdateBytes) +UNIMPLEMENTED_INTRINSIC(X86_64, CRC32UpdateByteBuffer) UNIMPLEMENTED_INTRINSIC(X86_64, StringStringIndexOf); UNIMPLEMENTED_INTRINSIC(X86_64, StringStringIndexOfAfter); diff --git a/compiler/optimizing/intrinsics_x86_64.h b/compiler/optimizing/intrinsics_x86_64.h index 5cb601edfe..199cfede1a 100644 --- a/compiler/optimizing/intrinsics_x86_64.h +++ b/compiler/optimizing/intrinsics_x86_64.h @@ -30,14 +30,14 @@ namespace x86_64 { class CodeGeneratorX86_64; class X86_64Assembler; -class IntrinsicLocationsBuilderX86_64 FINAL : public IntrinsicVisitor { +class IntrinsicLocationsBuilderX86_64 final : public IntrinsicVisitor { public: explicit IntrinsicLocationsBuilderX86_64(CodeGeneratorX86_64* codegen); // Define visitor methods. #define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ - void Visit ## Name(HInvoke* invoke) OVERRIDE; + void Visit ## Name(HInvoke* invoke) override; #include "intrinsics_list.h" INTRINSICS_LIST(OPTIMIZING_INTRINSICS) #undef INTRINSICS_LIST @@ -55,14 +55,14 @@ class IntrinsicLocationsBuilderX86_64 FINAL : public IntrinsicVisitor { DISALLOW_COPY_AND_ASSIGN(IntrinsicLocationsBuilderX86_64); }; -class IntrinsicCodeGeneratorX86_64 FINAL : public IntrinsicVisitor { +class IntrinsicCodeGeneratorX86_64 final : public IntrinsicVisitor { public: explicit IntrinsicCodeGeneratorX86_64(CodeGeneratorX86_64* codegen) : codegen_(codegen) {} // Define visitor methods. #define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ - void Visit ## Name(HInvoke* invoke) OVERRIDE; + void Visit ## Name(HInvoke* invoke) override; #include "intrinsics_list.h" INTRINSICS_LIST(OPTIMIZING_INTRINSICS) #undef INTRINSICS_LIST diff --git a/compiler/optimizing/licm.cc b/compiler/optimizing/licm.cc index d3a0376e9c..0edb23b857 100644 --- a/compiler/optimizing/licm.cc +++ b/compiler/optimizing/licm.cc @@ -78,7 +78,8 @@ static void UpdateLoopPhisIn(HEnvironment* environment, HLoopInformation* info) } } -void LICM::Run() { +bool LICM::Run() { + bool didLICM = false; DCHECK(side_effects_.HasRun()); // Only used during debug. @@ -157,6 +158,7 @@ void LICM::Run() { } instruction->MoveBefore(pre_header->GetLastInstruction()); MaybeRecordStat(stats_, MethodCompilationStat::kLoopInvariantMoved); + didLICM = true; } if (!can_move && (instruction->CanThrow() || instruction->DoesAnyWrite())) { @@ -167,6 +169,7 @@ void LICM::Run() { } } } + return didLICM; } } // namespace art diff --git a/compiler/optimizing/licm.h b/compiler/optimizing/licm.h index ee567aeb20..9cafddb05a 100644 --- a/compiler/optimizing/licm.h +++ b/compiler/optimizing/licm.h @@ -33,7 +33,7 @@ class LICM : public HOptimization { : HOptimization(graph, name, stats), side_effects_(side_effects) {} - void Run() OVERRIDE; + bool Run() override; static constexpr const char* kLoopInvariantCodeMotionPassName = "licm"; diff --git a/compiler/optimizing/linearize_test.cc b/compiler/optimizing/linearize_test.cc index 9fa5b74c62..50bfe843b5 100644 --- a/compiler/optimizing/linearize_test.cc +++ b/compiler/optimizing/linearize_test.cc @@ -16,11 +16,9 @@ #include <fstream> -#include "arch/x86/instruction_set_features_x86.h" #include "base/arena_allocator.h" #include "builder.h" #include "code_generator.h" -#include "code_generator_x86.h" #include "dex/dex_file.h" #include "dex/dex_instruction.h" #include "driver/compiler_options.h" @@ -43,10 +41,8 @@ template <size_t number_of_blocks> void LinearizeTest::TestCode(const std::vector<uint16_t>& data, const uint32_t (&expected_order)[number_of_blocks]) { HGraph* graph = CreateCFG(data); - std::unique_ptr<const X86InstructionSetFeatures> features_x86( - X86InstructionSetFeatures::FromCppDefines()); - x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); - SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator()); + std::unique_ptr<CodeGenerator> codegen = CodeGenerator::Create(graph, *compiler_options_); + SsaLivenessAnalysis liveness(graph, codegen.get(), GetScopedAllocator()); liveness.Analyze(); ASSERT_EQ(graph->GetLinearOrder().size(), number_of_blocks); diff --git a/compiler/optimizing/live_ranges_test.cc b/compiler/optimizing/live_ranges_test.cc index 66660662e4..60f513ca48 100644 --- a/compiler/optimizing/live_ranges_test.cc +++ b/compiler/optimizing/live_ranges_test.cc @@ -14,11 +14,9 @@ * limitations under the License. */ -#include "arch/x86/instruction_set_features_x86.h" #include "base/arena_allocator.h" #include "builder.h" #include "code_generator.h" -#include "code_generator_x86.h" #include "dex/dex_file.h" #include "dex/dex_instruction.h" #include "driver/compiler_options.h" @@ -40,7 +38,7 @@ HGraph* LiveRangesTest::BuildGraph(const std::vector<uint16_t>& data) { // on how instructions are ordered. RemoveSuspendChecks(graph); // `Inline` conditions into ifs. - PrepareForRegisterAllocation(graph).Run(); + PrepareForRegisterAllocation(graph, *compiler_options_).Run(); return graph; } @@ -63,10 +61,8 @@ TEST_F(LiveRangesTest, CFG1) { HGraph* graph = BuildGraph(data); - std::unique_ptr<const X86InstructionSetFeatures> features_x86( - X86InstructionSetFeatures::FromCppDefines()); - x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); - SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator()); + std::unique_ptr<CodeGenerator> codegen = CodeGenerator::Create(graph, *compiler_options_); + SsaLivenessAnalysis liveness(graph, codegen.get(), GetScopedAllocator()); liveness.Analyze(); LiveInterval* interval = liveness.GetInstructionFromSsaIndex(0)->GetLiveInterval(); @@ -109,10 +105,8 @@ TEST_F(LiveRangesTest, CFG2) { Instruction::RETURN | 0 << 8); HGraph* graph = BuildGraph(data); - std::unique_ptr<const X86InstructionSetFeatures> features_x86( - X86InstructionSetFeatures::FromCppDefines()); - x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); - SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator()); + std::unique_ptr<CodeGenerator> codegen = CodeGenerator::Create(graph, *compiler_options_); + SsaLivenessAnalysis liveness(graph, codegen.get(), GetScopedAllocator()); liveness.Analyze(); LiveInterval* interval = liveness.GetInstructionFromSsaIndex(0)->GetLiveInterval(); @@ -158,10 +152,8 @@ TEST_F(LiveRangesTest, CFG3) { Instruction::RETURN | 0 << 8); HGraph* graph = BuildGraph(data); - std::unique_ptr<const X86InstructionSetFeatures> features_x86( - X86InstructionSetFeatures::FromCppDefines()); - x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); - SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator()); + std::unique_ptr<CodeGenerator> codegen = CodeGenerator::Create(graph, *compiler_options_); + SsaLivenessAnalysis liveness(graph, codegen.get(), GetScopedAllocator()); liveness.Analyze(); // Test for the 4 constant. @@ -235,10 +227,8 @@ TEST_F(LiveRangesTest, Loop1) { HGraph* graph = BuildGraph(data); RemoveSuspendChecks(graph); - std::unique_ptr<const X86InstructionSetFeatures> features_x86( - X86InstructionSetFeatures::FromCppDefines()); - x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); - SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator()); + std::unique_ptr<CodeGenerator> codegen = CodeGenerator::Create(graph, *compiler_options_); + SsaLivenessAnalysis liveness(graph, codegen.get(), GetScopedAllocator()); liveness.Analyze(); // Test for the 0 constant. @@ -312,10 +302,8 @@ TEST_F(LiveRangesTest, Loop2) { Instruction::RETURN | 0 << 8); HGraph* graph = BuildGraph(data); - std::unique_ptr<const X86InstructionSetFeatures> features_x86( - X86InstructionSetFeatures::FromCppDefines()); - x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); - SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator()); + std::unique_ptr<CodeGenerator> codegen = CodeGenerator::Create(graph, *compiler_options_); + SsaLivenessAnalysis liveness(graph, codegen.get(), GetScopedAllocator()); liveness.Analyze(); // Test for the 0 constant. @@ -388,10 +376,8 @@ TEST_F(LiveRangesTest, CFG4) { Instruction::RETURN); HGraph* graph = BuildGraph(data); - std::unique_ptr<const X86InstructionSetFeatures> features_x86( - X86InstructionSetFeatures::FromCppDefines()); - x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); - SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator()); + std::unique_ptr<CodeGenerator> codegen = CodeGenerator::Create(graph, *compiler_options_); + SsaLivenessAnalysis liveness(graph, codegen.get(), GetScopedAllocator()); liveness.Analyze(); // Test for the 0 constant. diff --git a/compiler/optimizing/liveness_test.cc b/compiler/optimizing/liveness_test.cc index 6621a03568..f11f7a9779 100644 --- a/compiler/optimizing/liveness_test.cc +++ b/compiler/optimizing/liveness_test.cc @@ -14,11 +14,9 @@ * limitations under the License. */ -#include "arch/x86/instruction_set_features_x86.h" #include "base/arena_allocator.h" #include "builder.h" #include "code_generator.h" -#include "code_generator_x86.h" #include "dex/dex_file.h" #include "dex/dex_instruction.h" #include "driver/compiler_options.h" @@ -49,11 +47,9 @@ static void DumpBitVector(BitVector* vector, void LivenessTest::TestCode(const std::vector<uint16_t>& data, const char* expected) { HGraph* graph = CreateCFG(data); // `Inline` conditions into ifs. - PrepareForRegisterAllocation(graph).Run(); - std::unique_ptr<const X86InstructionSetFeatures> features_x86( - X86InstructionSetFeatures::FromCppDefines()); - x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); - SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator()); + PrepareForRegisterAllocation(graph, *compiler_options_).Run(); + std::unique_ptr<CodeGenerator> codegen = CodeGenerator::Create(graph, *compiler_options_); + SsaLivenessAnalysis liveness(graph, codegen.get(), GetScopedAllocator()); liveness.Analyze(); std::ostringstream buffer; diff --git a/compiler/optimizing/load_store_analysis.cc b/compiler/optimizing/load_store_analysis.cc index 8b1812a6de..7d7bb94933 100644 --- a/compiler/optimizing/load_store_analysis.cc +++ b/compiler/optimizing/load_store_analysis.cc @@ -152,7 +152,7 @@ bool HeapLocationCollector::CanArrayElementsAlias(const HInstruction* idx1, return true; } -void LoadStoreAnalysis::Run() { +bool LoadStoreAnalysis::Run() { for (HBasicBlock* block : graph_->GetReversePostOrder()) { heap_location_collector_.VisitBasicBlock(block); } @@ -160,22 +160,23 @@ void LoadStoreAnalysis::Run() { if (heap_location_collector_.GetNumberOfHeapLocations() > kMaxNumberOfHeapLocations) { // Bail out if there are too many heap locations to deal with. heap_location_collector_.CleanUp(); - return; + return false; } if (!heap_location_collector_.HasHeapStores()) { // Without heap stores, this pass would act mostly as GVN on heap accesses. heap_location_collector_.CleanUp(); - return; + return false; } if (heap_location_collector_.HasVolatile() || heap_location_collector_.HasMonitorOps()) { // Don't do load/store elimination if the method has volatile field accesses or // monitor operations, for now. // TODO: do it right. heap_location_collector_.CleanUp(); - return; + return false; } heap_location_collector_.BuildAliasingMatrix(); + return true; } } // namespace art diff --git a/compiler/optimizing/load_store_analysis.h b/compiler/optimizing/load_store_analysis.h index 437e6be418..08d9309a3e 100644 --- a/compiler/optimizing/load_store_analysis.h +++ b/compiler/optimizing/load_store_analysis.h @@ -94,11 +94,13 @@ class HeapLocation : public ArenaObject<kArenaAllocLSA> { static constexpr int16_t kDeclaringClassDefIndexForArrays = -1; HeapLocation(ReferenceInfo* ref_info, + DataType::Type type, size_t offset, HInstruction* index, size_t vector_length, int16_t declaring_class_def_index) : ref_info_(ref_info), + type_(DataType::ToSigned(type)), offset_(offset), index_(index), vector_length_(vector_length), @@ -116,6 +118,7 @@ class HeapLocation : public ArenaObject<kArenaAllocLSA> { } ReferenceInfo* GetReferenceInfo() const { return ref_info_; } + DataType::Type GetType() const { return type_; } size_t GetOffset() const { return offset_; } HInstruction* GetIndex() const { return index_; } size_t GetVectorLength() const { return vector_length_; } @@ -149,6 +152,10 @@ class HeapLocation : public ArenaObject<kArenaAllocLSA> { private: // Reference for instance/static field, array element or vector data. ReferenceInfo* const ref_info_; + // Type of data residing at HeapLocation (always signed for integral + // data since e.g. a[i] and a[i] & 0xff are represented by differently + // signed types; char vs short are disambiguated through the reference). + const DataType::Type type_; // Offset of static/instance field. // Invalid when this HeapLocation is not field. const size_t offset_; @@ -237,19 +244,31 @@ class HeapLocationCollector : public HGraphVisitor { DCHECK(object != nullptr); DCHECK(field != nullptr); return FindHeapLocationIndex(FindReferenceInfoOf(HuntForOriginalReference(object)), + field->GetFieldType(), field->GetFieldOffset().SizeValue(), nullptr, HeapLocation::kScalar, field->GetDeclaringClassDefIndex()); } - size_t GetArrayHeapLocation(HInstruction* array, - HInstruction* index, - size_t vector_length = HeapLocation::kScalar) const { - DCHECK(array != nullptr); - DCHECK(index != nullptr); - DCHECK_GE(vector_length, HeapLocation::kScalar); + size_t GetArrayHeapLocation(HInstruction* instruction) const { + DCHECK(instruction != nullptr); + HInstruction* array = instruction->InputAt(0); + HInstruction* index = instruction->InputAt(1); + DataType::Type type = instruction->GetType(); + size_t vector_length = HeapLocation::kScalar; + if (instruction->IsArraySet()) { + type = instruction->AsArraySet()->GetComponentType(); + } else if (instruction->IsVecStore() || + instruction->IsVecLoad()) { + HVecOperation* vec_op = instruction->AsVecOperation(); + type = vec_op->GetPackedType(); + vector_length = vec_op->GetVectorLength(); + } else { + DCHECK(instruction->IsArrayGet()); + } return FindHeapLocationIndex(FindReferenceInfoOf(HuntForOriginalReference(array)), + type, HeapLocation::kInvalidFieldOffset, index, vector_length, @@ -279,13 +298,16 @@ class HeapLocationCollector : public HGraphVisitor { // In later analysis, ComputeMayAlias() and MayAlias() compute and tell whether // these indexes alias. size_t FindHeapLocationIndex(ReferenceInfo* ref_info, + DataType::Type type, size_t offset, HInstruction* index, size_t vector_length, int16_t declaring_class_def_index) const { + DataType::Type lookup_type = DataType::ToSigned(type); for (size_t i = 0; i < heap_locations_.size(); i++) { HeapLocation* loc = heap_locations_[i]; if (loc->GetReferenceInfo() == ref_info && + loc->GetType() == lookup_type && loc->GetOffset() == offset && loc->GetIndex() == index && loc->GetVectorLength() == vector_length && @@ -425,6 +447,7 @@ class HeapLocationCollector : public HGraphVisitor { } HeapLocation* GetOrCreateHeapLocation(HInstruction* ref, + DataType::Type type, size_t offset, HInstruction* index, size_t vector_length, @@ -432,10 +455,10 @@ class HeapLocationCollector : public HGraphVisitor { HInstruction* original_ref = HuntForOriginalReference(ref); ReferenceInfo* ref_info = GetOrCreateReferenceInfo(original_ref); size_t heap_location_idx = FindHeapLocationIndex( - ref_info, offset, index, vector_length, declaring_class_def_index); + ref_info, type, offset, index, vector_length, declaring_class_def_index); if (heap_location_idx == kHeapLocationNotFound) { HeapLocation* heap_loc = new (GetGraph()->GetAllocator()) - HeapLocation(ref_info, offset, index, vector_length, declaring_class_def_index); + HeapLocation(ref_info, type, offset, index, vector_length, declaring_class_def_index); heap_locations_.push_back(heap_loc); return heap_loc; } @@ -446,29 +469,35 @@ class HeapLocationCollector : public HGraphVisitor { if (field_info.IsVolatile()) { has_volatile_ = true; } + DataType::Type type = field_info.GetFieldType(); const uint16_t declaring_class_def_index = field_info.GetDeclaringClassDefIndex(); const size_t offset = field_info.GetFieldOffset().SizeValue(); return GetOrCreateHeapLocation(ref, + type, offset, nullptr, HeapLocation::kScalar, declaring_class_def_index); } - void VisitArrayAccess(HInstruction* array, HInstruction* index, size_t vector_length) { + void VisitArrayAccess(HInstruction* array, + HInstruction* index, + DataType::Type type, + size_t vector_length) { GetOrCreateHeapLocation(array, + type, HeapLocation::kInvalidFieldOffset, index, vector_length, HeapLocation::kDeclaringClassDefIndexForArrays); } - void VisitInstanceFieldGet(HInstanceFieldGet* instruction) OVERRIDE { + void VisitInstanceFieldGet(HInstanceFieldGet* instruction) override { VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo()); CreateReferenceInfoForReferenceType(instruction); } - void VisitInstanceFieldSet(HInstanceFieldSet* instruction) OVERRIDE { + void VisitInstanceFieldSet(HInstanceFieldSet* instruction) override { HeapLocation* location = VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo()); has_heap_stores_ = true; if (location->GetReferenceInfo()->IsSingleton()) { @@ -494,12 +523,12 @@ class HeapLocationCollector : public HGraphVisitor { } } - void VisitStaticFieldGet(HStaticFieldGet* instruction) OVERRIDE { + void VisitStaticFieldGet(HStaticFieldGet* instruction) override { VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo()); CreateReferenceInfoForReferenceType(instruction); } - void VisitStaticFieldSet(HStaticFieldSet* instruction) OVERRIDE { + void VisitStaticFieldSet(HStaticFieldSet* instruction) override { VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo()); has_heap_stores_ = true; } @@ -507,35 +536,39 @@ class HeapLocationCollector : public HGraphVisitor { // We intentionally don't collect HUnresolvedInstanceField/HUnresolvedStaticField accesses // since we cannot accurately track the fields. - void VisitArrayGet(HArrayGet* instruction) OVERRIDE { + void VisitArrayGet(HArrayGet* instruction) override { HInstruction* array = instruction->InputAt(0); HInstruction* index = instruction->InputAt(1); - VisitArrayAccess(array, index, HeapLocation::kScalar); + DataType::Type type = instruction->GetType(); + VisitArrayAccess(array, index, type, HeapLocation::kScalar); CreateReferenceInfoForReferenceType(instruction); } - void VisitArraySet(HArraySet* instruction) OVERRIDE { + void VisitArraySet(HArraySet* instruction) override { HInstruction* array = instruction->InputAt(0); HInstruction* index = instruction->InputAt(1); - VisitArrayAccess(array, index, HeapLocation::kScalar); + DataType::Type type = instruction->GetComponentType(); + VisitArrayAccess(array, index, type, HeapLocation::kScalar); has_heap_stores_ = true; } - void VisitVecLoad(HVecLoad* instruction) OVERRIDE { + void VisitVecLoad(HVecLoad* instruction) override { HInstruction* array = instruction->InputAt(0); HInstruction* index = instruction->InputAt(1); - VisitArrayAccess(array, index, instruction->GetVectorLength()); + DataType::Type type = instruction->GetPackedType(); + VisitArrayAccess(array, index, type, instruction->GetVectorLength()); CreateReferenceInfoForReferenceType(instruction); } - void VisitVecStore(HVecStore* instruction) OVERRIDE { + void VisitVecStore(HVecStore* instruction) override { HInstruction* array = instruction->InputAt(0); HInstruction* index = instruction->InputAt(1); - VisitArrayAccess(array, index, instruction->GetVectorLength()); + DataType::Type type = instruction->GetPackedType(); + VisitArrayAccess(array, index, type, instruction->GetVectorLength()); has_heap_stores_ = true; } - void VisitInstruction(HInstruction* instruction) OVERRIDE { + void VisitInstruction(HInstruction* instruction) override { // Any new-instance or new-array cannot alias with references that // pre-exist the new-instance/new-array. We append entries into // ref_info_array_ which keeps track of the order of creation @@ -547,7 +580,7 @@ class HeapLocationCollector : public HGraphVisitor { CreateReferenceInfoForReferenceType(instruction); } - void VisitMonitorOperation(HMonitorOperation* monitor ATTRIBUTE_UNUSED) OVERRIDE { + void VisitMonitorOperation(HMonitorOperation* monitor ATTRIBUTE_UNUSED) override { has_monitor_operations_ = true; } @@ -572,7 +605,7 @@ class LoadStoreAnalysis : public HOptimization { return heap_location_collector_; } - void Run() OVERRIDE; + bool Run() override; static constexpr const char* kLoadStoreAnalysisPassName = "load_store_analysis"; diff --git a/compiler/optimizing/load_store_analysis_test.cc b/compiler/optimizing/load_store_analysis_test.cc index 56361a8c90..bfe7a4f72f 100644 --- a/compiler/optimizing/load_store_analysis_test.cc +++ b/compiler/optimizing/load_store_analysis_test.cc @@ -78,12 +78,16 @@ TEST_F(LoadStoreAnalysisTest, ArrayHeapLocations) { // Test queries on HeapLocationCollector's ref info and index records. ReferenceInfo* ref = heap_location_collector.FindReferenceInfoOf(array); + DataType::Type type = DataType::Type::kInt32; size_t field = HeapLocation::kInvalidFieldOffset; size_t vec = HeapLocation::kScalar; size_t class_def = HeapLocation::kDeclaringClassDefIndexForArrays; - size_t loc1 = heap_location_collector.FindHeapLocationIndex(ref, field, c1, vec, class_def); - size_t loc2 = heap_location_collector.FindHeapLocationIndex(ref, field, c2, vec, class_def); - size_t loc3 = heap_location_collector.FindHeapLocationIndex(ref, field, index, vec, class_def); + size_t loc1 = heap_location_collector.FindHeapLocationIndex( + ref, type, field, c1, vec, class_def); + size_t loc2 = heap_location_collector.FindHeapLocationIndex( + ref, type, field, c2, vec, class_def); + size_t loc3 = heap_location_collector.FindHeapLocationIndex( + ref, type, field, index, vec, class_def); // must find this reference info for array in HeapLocationCollector. ASSERT_TRUE(ref != nullptr); // must find these heap locations; @@ -246,28 +250,28 @@ TEST_F(LoadStoreAnalysisTest, ArrayIndexAliasingTest) { size_t loc2 = HeapLocationCollector::kHeapLocationNotFound; // Test alias: array[0] and array[1] - loc1 = heap_location_collector.GetArrayHeapLocation(array, c0); - loc2 = heap_location_collector.GetArrayHeapLocation(array, c1); + loc1 = heap_location_collector.GetArrayHeapLocation(arr_set1); + loc2 = heap_location_collector.GetArrayHeapLocation(arr_set2); ASSERT_FALSE(heap_location_collector.MayAlias(loc1, loc2)); // Test alias: array[i+0] and array[i-0] - loc1 = heap_location_collector.GetArrayHeapLocation(array, add0); - loc2 = heap_location_collector.GetArrayHeapLocation(array, sub0); + loc1 = heap_location_collector.GetArrayHeapLocation(arr_set3); + loc2 = heap_location_collector.GetArrayHeapLocation(arr_set5); ASSERT_TRUE(heap_location_collector.MayAlias(loc1, loc2)); // Test alias: array[i+1] and array[i-1] - loc1 = heap_location_collector.GetArrayHeapLocation(array, add1); - loc2 = heap_location_collector.GetArrayHeapLocation(array, sub1); + loc1 = heap_location_collector.GetArrayHeapLocation(arr_set4); + loc2 = heap_location_collector.GetArrayHeapLocation(arr_set6); ASSERT_FALSE(heap_location_collector.MayAlias(loc1, loc2)); // Test alias: array[i+1] and array[1-i] - loc1 = heap_location_collector.GetArrayHeapLocation(array, add1); - loc2 = heap_location_collector.GetArrayHeapLocation(array, rev_sub1); + loc1 = heap_location_collector.GetArrayHeapLocation(arr_set4); + loc2 = heap_location_collector.GetArrayHeapLocation(arr_set7); ASSERT_TRUE(heap_location_collector.MayAlias(loc1, loc2)); // Test alias: array[i+1] and array[i-(-1)] - loc1 = heap_location_collector.GetArrayHeapLocation(array, add1); - loc2 = heap_location_collector.GetArrayHeapLocation(array, sub_neg1); + loc1 = heap_location_collector.GetArrayHeapLocation(arr_set4); + loc2 = heap_location_collector.GetArrayHeapLocation(arr_set8); ASSERT_TRUE(heap_location_collector.MayAlias(loc1, loc2)); } @@ -409,70 +413,75 @@ TEST_F(LoadStoreAnalysisTest, ArrayAliasingTest) { size_t loc1, loc2; // Test alias: array[0] and array[0,1,2,3] - loc1 = heap_location_collector.GetArrayHeapLocation(array, c0); - loc2 = heap_location_collector.GetArrayHeapLocation(array, c0, 4); + loc1 = heap_location_collector.GetArrayHeapLocation(arr_set_0); + loc2 = heap_location_collector.GetArrayHeapLocation(vstore_0); ASSERT_TRUE(heap_location_collector.MayAlias(loc1, loc2)); + // Test alias: array[0] and array[1,2,3,4] + loc1 = heap_location_collector.GetArrayHeapLocation(arr_set_0); + loc2 = heap_location_collector.GetArrayHeapLocation(vstore_1); + ASSERT_FALSE(heap_location_collector.MayAlias(loc1, loc2)); + // Test alias: array[0] and array[8,9,10,11] - loc1 = heap_location_collector.GetArrayHeapLocation(array, c0); - loc2 = heap_location_collector.GetArrayHeapLocation(array, c8, 4); + loc1 = heap_location_collector.GetArrayHeapLocation(arr_set_0); + loc2 = heap_location_collector.GetArrayHeapLocation(vstore_8); ASSERT_FALSE(heap_location_collector.MayAlias(loc1, loc2)); // Test alias: array[1] and array[8,9,10,11] - loc1 = heap_location_collector.GetArrayHeapLocation(array, c1); - loc2 = heap_location_collector.GetArrayHeapLocation(array, c8, 4); + loc1 = heap_location_collector.GetArrayHeapLocation(arr_set_1); + loc2 = heap_location_collector.GetArrayHeapLocation(vstore_8); ASSERT_FALSE(heap_location_collector.MayAlias(loc1, loc2)); // Test alias: array[1] and array[0,1,2,3] - loc1 = heap_location_collector.GetArrayHeapLocation(array, c1); - loc2 = heap_location_collector.GetArrayHeapLocation(array, c0, 4); + loc1 = heap_location_collector.GetArrayHeapLocation(arr_set_1); + loc2 = heap_location_collector.GetArrayHeapLocation(vstore_0); ASSERT_TRUE(heap_location_collector.MayAlias(loc1, loc2)); // Test alias: array[0,1,2,3] and array[8,9,10,11] - loc1 = heap_location_collector.GetArrayHeapLocation(array, c0, 4); - loc2 = heap_location_collector.GetArrayHeapLocation(array, c8, 4); + loc1 = heap_location_collector.GetArrayHeapLocation(vstore_0); + loc2 = heap_location_collector.GetArrayHeapLocation(vstore_8); ASSERT_FALSE(heap_location_collector.MayAlias(loc1, loc2)); // Test alias: array[0,1,2,3] and array[1,2,3,4] - loc1 = heap_location_collector.GetArrayHeapLocation(array, c1, 4); - loc2 = heap_location_collector.GetArrayHeapLocation(array, c0, 4); + loc1 = heap_location_collector.GetArrayHeapLocation(vstore_0); + loc2 = heap_location_collector.GetArrayHeapLocation(vstore_1); ASSERT_TRUE(heap_location_collector.MayAlias(loc1, loc2)); // Test alias: array[0] and array[i,i+1,i+2,i+3] - loc1 = heap_location_collector.GetArrayHeapLocation(array, c0); - loc2 = heap_location_collector.GetArrayHeapLocation(array, index, 4); + loc1 = heap_location_collector.GetArrayHeapLocation(arr_set_0); + loc2 = heap_location_collector.GetArrayHeapLocation(vstore_i); ASSERT_TRUE(heap_location_collector.MayAlias(loc1, loc2)); // Test alias: array[i] and array[0,1,2,3] - loc1 = heap_location_collector.GetArrayHeapLocation(array, index); - loc2 = heap_location_collector.GetArrayHeapLocation(array, c0, 4); + loc1 = heap_location_collector.GetArrayHeapLocation(arr_set_i); + loc2 = heap_location_collector.GetArrayHeapLocation(vstore_0); ASSERT_TRUE(heap_location_collector.MayAlias(loc1, loc2)); // Test alias: array[i] and array[i,i+1,i+2,i+3] - loc1 = heap_location_collector.GetArrayHeapLocation(array, index); - loc2 = heap_location_collector.GetArrayHeapLocation(array, index, 4); + loc1 = heap_location_collector.GetArrayHeapLocation(arr_set_i); + loc2 = heap_location_collector.GetArrayHeapLocation(vstore_i); ASSERT_TRUE(heap_location_collector.MayAlias(loc1, loc2)); // Test alias: array[i] and array[i+8,i+9,i+10,i+11] - loc1 = heap_location_collector.GetArrayHeapLocation(array, index); - loc2 = heap_location_collector.GetArrayHeapLocation(array, i_add8, 4); + loc1 = heap_location_collector.GetArrayHeapLocation(arr_set_i); + loc2 = heap_location_collector.GetArrayHeapLocation(vstore_i_add8); ASSERT_FALSE(heap_location_collector.MayAlias(loc1, loc2)); // Test alias: array[i+6,i+7,i+8,i+9] and array[i+8,i+9,i+10,i+11] // Test partial overlap. - loc1 = heap_location_collector.GetArrayHeapLocation(array, i_add6, 4); - loc2 = heap_location_collector.GetArrayHeapLocation(array, i_add8, 4); + loc1 = heap_location_collector.GetArrayHeapLocation(vstore_i_add6); + loc2 = heap_location_collector.GetArrayHeapLocation(vstore_i_add8); ASSERT_TRUE(heap_location_collector.MayAlias(loc1, loc2)); // Test alias: array[i+6,i+7] and array[i,i+1,i+2,i+3] // Test different vector lengths. - loc1 = heap_location_collector.GetArrayHeapLocation(array, i_add6, 2); - loc2 = heap_location_collector.GetArrayHeapLocation(array, index, 4); + loc1 = heap_location_collector.GetArrayHeapLocation(vstore_i_add6_vlen2); + loc2 = heap_location_collector.GetArrayHeapLocation(vstore_i); ASSERT_FALSE(heap_location_collector.MayAlias(loc1, loc2)); // Test alias: array[i+6,i+7] and array[i+8,i+9,i+10,i+11] - loc1 = heap_location_collector.GetArrayHeapLocation(array, i_add6, 2); - loc2 = heap_location_collector.GetArrayHeapLocation(array, i_add8, 4); + loc1 = heap_location_collector.GetArrayHeapLocation(vstore_i_add6_vlen2); + loc2 = heap_location_collector.GetArrayHeapLocation(vstore_i_add8); ASSERT_FALSE(heap_location_collector.MayAlias(loc1, loc2)); } @@ -563,33 +572,33 @@ TEST_F(LoadStoreAnalysisTest, ArrayIndexCalculationOverflowTest) { size_t loc2 = HeapLocationCollector::kHeapLocationNotFound; // Test alias: array[i+0x80000000] and array[i-0x80000000] - loc1 = heap_location_collector.GetArrayHeapLocation(array, add_0x80000000); - loc2 = heap_location_collector.GetArrayHeapLocation(array, sub_0x80000000); + loc1 = heap_location_collector.GetArrayHeapLocation(arr_set_1); + loc2 = heap_location_collector.GetArrayHeapLocation(arr_set_2); ASSERT_TRUE(heap_location_collector.MayAlias(loc1, loc2)); // Test alias: array[i+0x10] and array[i-0xFFFFFFF0] - loc1 = heap_location_collector.GetArrayHeapLocation(array, add_0x10); - loc2 = heap_location_collector.GetArrayHeapLocation(array, sub_0xFFFFFFF0); + loc1 = heap_location_collector.GetArrayHeapLocation(arr_set_3); + loc2 = heap_location_collector.GetArrayHeapLocation(arr_set_4); ASSERT_TRUE(heap_location_collector.MayAlias(loc1, loc2)); // Test alias: array[i+0x7FFFFFFF] and array[i-0x80000001] - loc1 = heap_location_collector.GetArrayHeapLocation(array, add_0x7FFFFFFF); - loc2 = heap_location_collector.GetArrayHeapLocation(array, sub_0x80000001); + loc1 = heap_location_collector.GetArrayHeapLocation(arr_set_5); + loc2 = heap_location_collector.GetArrayHeapLocation(arr_set_6); ASSERT_TRUE(heap_location_collector.MayAlias(loc1, loc2)); // Test alias: array[i+0] and array[i-0] - loc1 = heap_location_collector.GetArrayHeapLocation(array, add_0); - loc2 = heap_location_collector.GetArrayHeapLocation(array, sub_0); + loc1 = heap_location_collector.GetArrayHeapLocation(arr_set_7); + loc2 = heap_location_collector.GetArrayHeapLocation(arr_set_8); ASSERT_TRUE(heap_location_collector.MayAlias(loc1, loc2)); // Should not alias: - loc1 = heap_location_collector.GetArrayHeapLocation(array, sub_0x80000000); - loc2 = heap_location_collector.GetArrayHeapLocation(array, sub_0x80000001); + loc1 = heap_location_collector.GetArrayHeapLocation(arr_set_2); + loc2 = heap_location_collector.GetArrayHeapLocation(arr_set_6); ASSERT_FALSE(heap_location_collector.MayAlias(loc1, loc2)); // Should not alias: - loc1 = heap_location_collector.GetArrayHeapLocation(array, add_0); - loc2 = heap_location_collector.GetArrayHeapLocation(array, sub_0x80000000); + loc1 = heap_location_collector.GetArrayHeapLocation(arr_set_7); + loc2 = heap_location_collector.GetArrayHeapLocation(arr_set_2); ASSERT_FALSE(heap_location_collector.MayAlias(loc1, loc2)); } @@ -647,10 +656,10 @@ TEST_F(LoadStoreAnalysisTest, TestHuntOriginalRef) { // times the original reference has been transformed by BoundType, // NullCheck, IntermediateAddress, etc. ASSERT_EQ(heap_location_collector.GetNumberOfHeapLocations(), 1U); - size_t loc1 = heap_location_collector.GetArrayHeapLocation(array, c1); - size_t loc2 = heap_location_collector.GetArrayHeapLocation(bound_type, c1); - size_t loc3 = heap_location_collector.GetArrayHeapLocation(null_check, c1); - size_t loc4 = heap_location_collector.GetArrayHeapLocation(inter_addr, c1); + size_t loc1 = heap_location_collector.GetArrayHeapLocation(array_get1); + size_t loc2 = heap_location_collector.GetArrayHeapLocation(array_get2); + size_t loc3 = heap_location_collector.GetArrayHeapLocation(array_get3); + size_t loc4 = heap_location_collector.GetArrayHeapLocation(array_get4); ASSERT_TRUE(loc1 != HeapLocationCollector::kHeapLocationNotFound); ASSERT_EQ(loc1, loc2); ASSERT_EQ(loc1, loc3); diff --git a/compiler/optimizing/load_store_elimination.cc b/compiler/optimizing/load_store_elimination.cc index 237ecd3c10..b33d0f488e 100644 --- a/compiler/optimizing/load_store_elimination.cc +++ b/compiler/optimizing/load_store_elimination.cc @@ -107,7 +107,7 @@ class LSEVisitor : public HGraphDelegateVisitor { singleton_new_instances_(allocator_.Adapter(kArenaAllocLSE)) { } - void VisitBasicBlock(HBasicBlock* block) OVERRIDE { + void VisitBasicBlock(HBasicBlock* block) override { // Populate the heap_values array for this block. // TODO: try to reuse the heap_values array from one predecessor if possible. if (block->IsLoopHeader()) { @@ -160,7 +160,7 @@ class LSEVisitor : public HGraphDelegateVisitor { // Scan the list of removed loads to see if we can reuse `type_conversion`, if // the other removed load has the same substitute and type and is dominated - // by `type_conversioni`. + // by `type_conversion`. void TryToReuseTypeConversion(HInstruction* type_conversion, size_t index) { size_t size = removed_loads_.size(); HInstruction* load = removed_loads_[index]; @@ -458,8 +458,13 @@ class LSEVisitor : public HGraphDelegateVisitor { } if (from_all_predecessors) { if (ref_info->IsSingletonAndRemovable() && - block->IsSingleReturnOrReturnVoidAllowingPhis()) { - // Values in the singleton are not needed anymore. + (block->IsSingleReturnOrReturnVoidAllowingPhis() || + (block->EndsWithReturn() && (merged_value != kUnknownHeapValue || + merged_store_value != kUnknownHeapValue)))) { + // Values in the singleton are not needed anymore: + // (1) if this block consists of a sole return, or + // (2) if this block returns and a usable merged value is obtained + // (loads prior to the return will always use that value). } else if (!IsStore(merged_value)) { // We don't track merged value as a store anymore. We have to // hold the stores in predecessors live here. @@ -542,16 +547,7 @@ class LSEVisitor : public HGraphDelegateVisitor { } } - void VisitGetLocation(HInstruction* instruction, - HInstruction* ref, - size_t offset, - HInstruction* index, - size_t vector_length, - int16_t declaring_class_def_index) { - HInstruction* original_ref = heap_location_collector_.HuntForOriginalReference(ref); - ReferenceInfo* ref_info = heap_location_collector_.FindReferenceInfoOf(original_ref); - size_t idx = heap_location_collector_.FindHeapLocationIndex( - ref_info, offset, index, vector_length, declaring_class_def_index); + void VisitGetLocation(HInstruction* instruction, size_t idx) { DCHECK_NE(idx, HeapLocationCollector::kHeapLocationNotFound); ScopedArenaVector<HInstruction*>& heap_values = heap_values_for_[instruction->GetBlock()->GetBlockId()]; @@ -569,23 +565,7 @@ class LSEVisitor : public HGraphDelegateVisitor { heap_values[idx] = instruction; KeepStoresIfAliasedToLocation(heap_values, idx); } else { - if (DataType::Kind(heap_value->GetType()) != DataType::Kind(instruction->GetType())) { - // The only situation where the same heap location has different type is when - // we do an array get on an instruction that originates from the null constant - // (the null could be behind a field access, an array access, a null check or - // a bound type). - // In order to stay properly typed on primitive types, we do not eliminate - // the array gets. - if (kIsDebugBuild) { - DCHECK(heap_value->IsArrayGet()) << heap_value->DebugName(); - DCHECK(instruction->IsArrayGet()) << instruction->DebugName(); - } - // Load isn't eliminated. Put the load as the value into the HeapLocation. - // This acts like GVN but with better aliasing analysis. - heap_values[idx] = instruction; - KeepStoresIfAliasedToLocation(heap_values, idx); - return; - } + // Load is eliminated. AddRemovedLoad(instruction, heap_value); TryRemovingNullCheck(instruction); } @@ -610,21 +590,11 @@ class LSEVisitor : public HGraphDelegateVisitor { return false; } - void VisitSetLocation(HInstruction* instruction, - HInstruction* ref, - size_t offset, - HInstruction* index, - size_t vector_length, - int16_t declaring_class_def_index, - HInstruction* value) { + void VisitSetLocation(HInstruction* instruction, size_t idx, HInstruction* value) { + DCHECK_NE(idx, HeapLocationCollector::kHeapLocationNotFound); DCHECK(!IsStore(value)) << value->DebugName(); // value may already have a substitute. value = FindSubstitute(value); - HInstruction* original_ref = heap_location_collector_.HuntForOriginalReference(ref); - ReferenceInfo* ref_info = heap_location_collector_.FindReferenceInfoOf(original_ref); - size_t idx = heap_location_collector_.FindHeapLocationIndex( - ref_info, offset, index, vector_length, declaring_class_def_index); - DCHECK_NE(idx, HeapLocationCollector::kHeapLocationNotFound); ScopedArenaVector<HInstruction*>& heap_values = heap_values_for_[instruction->GetBlock()->GetBlockId()]; HInstruction* heap_value = heap_values[idx]; @@ -644,7 +614,8 @@ class LSEVisitor : public HGraphDelegateVisitor { } else if (!loop_info->IsIrreducible()) { // instruction is a store in the loop so the loop must do write. DCHECK(side_effects_.GetLoopEffects(loop_info->GetHeader()).DoesAnyWrite()); - if (ref_info->IsSingleton() && !loop_info->IsDefinedOutOfTheLoop(original_ref)) { + ReferenceInfo* ref_info = heap_location_collector_.GetHeapLocation(idx)->GetReferenceInfo(); + if (ref_info->IsSingleton() && !loop_info->IsDefinedOutOfTheLoop(ref_info->GetReference())) { // original_ref is created inside the loop. Value stored to it isn't needed at // the loop header. This is true for outer loops also. possibly_redundant = true; @@ -685,83 +656,43 @@ class LSEVisitor : public HGraphDelegateVisitor { } } - void VisitInstanceFieldGet(HInstanceFieldGet* instruction) OVERRIDE { - HInstruction* obj = instruction->InputAt(0); - size_t offset = instruction->GetFieldInfo().GetFieldOffset().SizeValue(); - int16_t declaring_class_def_index = instruction->GetFieldInfo().GetDeclaringClassDefIndex(); - VisitGetLocation(instruction, - obj, - offset, - nullptr, - HeapLocation::kScalar, - declaring_class_def_index); + void VisitInstanceFieldGet(HInstanceFieldGet* instruction) override { + HInstruction* object = instruction->InputAt(0); + const FieldInfo& field = instruction->GetFieldInfo(); + VisitGetLocation(instruction, heap_location_collector_.GetFieldHeapLocation(object, &field)); } - void VisitInstanceFieldSet(HInstanceFieldSet* instruction) OVERRIDE { - HInstruction* obj = instruction->InputAt(0); - size_t offset = instruction->GetFieldInfo().GetFieldOffset().SizeValue(); - int16_t declaring_class_def_index = instruction->GetFieldInfo().GetDeclaringClassDefIndex(); + void VisitInstanceFieldSet(HInstanceFieldSet* instruction) override { + HInstruction* object = instruction->InputAt(0); + const FieldInfo& field = instruction->GetFieldInfo(); HInstruction* value = instruction->InputAt(1); - VisitSetLocation(instruction, - obj, - offset, - nullptr, - HeapLocation::kScalar, - declaring_class_def_index, - value); + size_t idx = heap_location_collector_.GetFieldHeapLocation(object, &field); + VisitSetLocation(instruction, idx, value); } - void VisitStaticFieldGet(HStaticFieldGet* instruction) OVERRIDE { + void VisitStaticFieldGet(HStaticFieldGet* instruction) override { HInstruction* cls = instruction->InputAt(0); - size_t offset = instruction->GetFieldInfo().GetFieldOffset().SizeValue(); - int16_t declaring_class_def_index = instruction->GetFieldInfo().GetDeclaringClassDefIndex(); - VisitGetLocation(instruction, - cls, - offset, - nullptr, - HeapLocation::kScalar, - declaring_class_def_index); + const FieldInfo& field = instruction->GetFieldInfo(); + VisitGetLocation(instruction, heap_location_collector_.GetFieldHeapLocation(cls, &field)); } - void VisitStaticFieldSet(HStaticFieldSet* instruction) OVERRIDE { + void VisitStaticFieldSet(HStaticFieldSet* instruction) override { HInstruction* cls = instruction->InputAt(0); - size_t offset = instruction->GetFieldInfo().GetFieldOffset().SizeValue(); - int16_t declaring_class_def_index = instruction->GetFieldInfo().GetDeclaringClassDefIndex(); - HInstruction* value = instruction->InputAt(1); - VisitSetLocation(instruction, - cls, - offset, - nullptr, - HeapLocation::kScalar, - declaring_class_def_index, - value); - } - - void VisitArrayGet(HArrayGet* instruction) OVERRIDE { - HInstruction* array = instruction->InputAt(0); - HInstruction* index = instruction->InputAt(1); - VisitGetLocation(instruction, - array, - HeapLocation::kInvalidFieldOffset, - index, - HeapLocation::kScalar, - HeapLocation::kDeclaringClassDefIndexForArrays); - } - - void VisitArraySet(HArraySet* instruction) OVERRIDE { - HInstruction* array = instruction->InputAt(0); - HInstruction* index = instruction->InputAt(1); - HInstruction* value = instruction->InputAt(2); - VisitSetLocation(instruction, - array, - HeapLocation::kInvalidFieldOffset, - index, - HeapLocation::kScalar, - HeapLocation::kDeclaringClassDefIndexForArrays, - value); - } - - void VisitDeoptimize(HDeoptimize* instruction) { + const FieldInfo& field = instruction->GetFieldInfo(); + size_t idx = heap_location_collector_.GetFieldHeapLocation(cls, &field); + VisitSetLocation(instruction, idx, instruction->InputAt(1)); + } + + void VisitArrayGet(HArrayGet* instruction) override { + VisitGetLocation(instruction, heap_location_collector_.GetArrayHeapLocation(instruction)); + } + + void VisitArraySet(HArraySet* instruction) override { + size_t idx = heap_location_collector_.GetArrayHeapLocation(instruction); + VisitSetLocation(instruction, idx, instruction->InputAt(2)); + } + + void VisitDeoptimize(HDeoptimize* instruction) override { const ScopedArenaVector<HInstruction*>& heap_values = heap_values_for_[instruction->GetBlock()->GetBlockId()]; for (HInstruction* heap_value : heap_values) { @@ -812,15 +743,15 @@ class LSEVisitor : public HGraphDelegateVisitor { } } - void VisitReturn(HReturn* instruction) OVERRIDE { + void VisitReturn(HReturn* instruction) override { HandleExit(instruction->GetBlock()); } - void VisitReturnVoid(HReturnVoid* return_void) OVERRIDE { + void VisitReturnVoid(HReturnVoid* return_void) override { HandleExit(return_void->GetBlock()); } - void VisitThrow(HThrow* throw_instruction) OVERRIDE { + void VisitThrow(HThrow* throw_instruction) override { HandleExit(throw_instruction->GetBlock()); } @@ -846,35 +777,35 @@ class LSEVisitor : public HGraphDelegateVisitor { } } - void VisitInvoke(HInvoke* invoke) OVERRIDE { + void VisitInvoke(HInvoke* invoke) override { HandleInvoke(invoke); } - void VisitClinitCheck(HClinitCheck* clinit) OVERRIDE { + void VisitClinitCheck(HClinitCheck* clinit) override { HandleInvoke(clinit); } - void VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet* instruction) OVERRIDE { + void VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet* instruction) override { // Conservatively treat it as an invocation. HandleInvoke(instruction); } - void VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet* instruction) OVERRIDE { + void VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet* instruction) override { // Conservatively treat it as an invocation. HandleInvoke(instruction); } - void VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet* instruction) OVERRIDE { + void VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet* instruction) override { // Conservatively treat it as an invocation. HandleInvoke(instruction); } - void VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet* instruction) OVERRIDE { + void VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet* instruction) override { // Conservatively treat it as an invocation. HandleInvoke(instruction); } - void VisitNewInstance(HNewInstance* new_instance) OVERRIDE { + void VisitNewInstance(HNewInstance* new_instance) override { ReferenceInfo* ref_info = heap_location_collector_.FindReferenceInfoOf(new_instance); if (ref_info == nullptr) { // new_instance isn't used for field accesses. No need to process it. @@ -898,7 +829,7 @@ class LSEVisitor : public HGraphDelegateVisitor { } } - void VisitNewArray(HNewArray* new_array) OVERRIDE { + void VisitNewArray(HNewArray* new_array) override { ReferenceInfo* ref_info = heap_location_collector_.FindReferenceInfoOf(new_array); if (ref_info == nullptr) { // new_array isn't used for array accesses. No need to process it. @@ -948,22 +879,22 @@ class LSEVisitor : public HGraphDelegateVisitor { DISALLOW_COPY_AND_ASSIGN(LSEVisitor); }; -void LoadStoreElimination::Run() { +bool LoadStoreElimination::Run() { if (graph_->IsDebuggable() || graph_->HasTryCatch()) { // Debugger may set heap values or trigger deoptimization of callers. // Try/catch support not implemented yet. // Skip this optimization. - return; + return false; } const HeapLocationCollector& heap_location_collector = lsa_.GetHeapLocationCollector(); if (heap_location_collector.GetNumberOfHeapLocations() == 0) { // No HeapLocation information from LSA, skip this optimization. - return; + return false; } // TODO: analyze VecLoad/VecStore better. if (graph_->HasSIMD()) { - return; + return false; } LSEVisitor lse_visitor(graph_, heap_location_collector, side_effects_, stats_); @@ -971,6 +902,8 @@ void LoadStoreElimination::Run() { lse_visitor.VisitBasicBlock(block); } lse_visitor.RemoveInstructions(); + + return true; } } // namespace art diff --git a/compiler/optimizing/load_store_elimination.h b/compiler/optimizing/load_store_elimination.h index 7153541baf..f7ba41a1af 100644 --- a/compiler/optimizing/load_store_elimination.h +++ b/compiler/optimizing/load_store_elimination.h @@ -35,7 +35,7 @@ class LoadStoreElimination : public HOptimization { side_effects_(side_effects), lsa_(lsa) {} - void Run() OVERRIDE; + bool Run() override; static constexpr const char* kLoadStoreEliminationPassName = "load_store_elimination"; diff --git a/compiler/optimizing/loop_analysis.cc b/compiler/optimizing/loop_analysis.cc new file mode 100644 index 0000000000..2ae3683ffa --- /dev/null +++ b/compiler/optimizing/loop_analysis.cc @@ -0,0 +1,193 @@ +/* + * Copyright (C) 2018 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "loop_analysis.h" + +#include "base/bit_vector-inl.h" +#include "induction_var_range.h" + +namespace art { + +void LoopAnalysis::CalculateLoopBasicProperties(HLoopInformation* loop_info, + LoopAnalysisInfo* analysis_results, + int64_t trip_count) { + analysis_results->trip_count_ = trip_count; + + for (HBlocksInLoopIterator block_it(*loop_info); + !block_it.Done(); + block_it.Advance()) { + HBasicBlock* block = block_it.Current(); + + // Check whether one of the successor is loop exit. + for (HBasicBlock* successor : block->GetSuccessors()) { + if (!loop_info->Contains(*successor)) { + analysis_results->exits_num_++; + + // We track number of invariant loop exits which correspond to HIf instruction and + // can be eliminated by loop peeling; other control flow instruction are ignored and will + // not cause loop peeling to happen as they either cannot be inside a loop, or by + // definition cannot be loop exits (unconditional instructions), or are not beneficial for + // the optimization. + HIf* hif = block->GetLastInstruction()->AsIf(); + if (hif != nullptr && !loop_info->Contains(*hif->InputAt(0)->GetBlock())) { + analysis_results->invariant_exits_num_++; + } + } + } + + for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { + HInstruction* instruction = it.Current(); + if (it.Current()->GetType() == DataType::Type::kInt64) { + analysis_results->has_long_type_instructions_ = true; + } + if (MakesScalarPeelingUnrollingNonBeneficial(instruction)) { + analysis_results->has_instructions_preventing_scalar_peeling_ = true; + analysis_results->has_instructions_preventing_scalar_unrolling_ = true; + } + analysis_results->instr_num_++; + } + analysis_results->bb_num_++; + } +} + +int64_t LoopAnalysis::GetLoopTripCount(HLoopInformation* loop_info, + const InductionVarRange* induction_range) { + int64_t trip_count; + if (!induction_range->HasKnownTripCount(loop_info, &trip_count)) { + trip_count = LoopAnalysisInfo::kUnknownTripCount; + } + return trip_count; +} + +// Default implementation of loop helper; used for all targets unless a custom implementation +// is provided. Enables scalar loop peeling and unrolling with the most conservative heuristics. +class ArchDefaultLoopHelper : public ArchNoOptsLoopHelper { + public: + // Scalar loop unrolling parameters and heuristics. + // + // Maximum possible unrolling factor. + static constexpr uint32_t kScalarMaxUnrollFactor = 2; + // Loop's maximum instruction count. Loops with higher count will not be peeled/unrolled. + static constexpr uint32_t kScalarHeuristicMaxBodySizeInstr = 17; + // Loop's maximum basic block count. Loops with higher count will not be peeled/unrolled. + static constexpr uint32_t kScalarHeuristicMaxBodySizeBlocks = 6; + // Maximum number of instructions to be created as a result of full unrolling. + static constexpr uint32_t kScalarHeuristicFullyUnrolledMaxInstrThreshold = 35; + + bool IsLoopNonBeneficialForScalarOpts(LoopAnalysisInfo* analysis_info) const override { + return analysis_info->HasLongTypeInstructions() || + IsLoopTooBig(analysis_info, + kScalarHeuristicMaxBodySizeInstr, + kScalarHeuristicMaxBodySizeBlocks); + } + + uint32_t GetScalarUnrollingFactor(const LoopAnalysisInfo* analysis_info) const override { + int64_t trip_count = analysis_info->GetTripCount(); + // Unroll only loops with known trip count. + if (trip_count == LoopAnalysisInfo::kUnknownTripCount) { + return LoopAnalysisInfo::kNoUnrollingFactor; + } + uint32_t desired_unrolling_factor = kScalarMaxUnrollFactor; + if (trip_count < desired_unrolling_factor || trip_count % desired_unrolling_factor != 0) { + return LoopAnalysisInfo::kNoUnrollingFactor; + } + + return desired_unrolling_factor; + } + + bool IsLoopPeelingEnabled() const override { return true; } + + bool IsFullUnrollingBeneficial(LoopAnalysisInfo* analysis_info) const override { + int64_t trip_count = analysis_info->GetTripCount(); + // We assume that trip count is known. + DCHECK_NE(trip_count, LoopAnalysisInfo::kUnknownTripCount); + size_t instr_num = analysis_info->GetNumberOfInstructions(); + return (trip_count * instr_num < kScalarHeuristicFullyUnrolledMaxInstrThreshold); + } + + protected: + bool IsLoopTooBig(LoopAnalysisInfo* loop_analysis_info, + size_t instr_threshold, + size_t bb_threshold) const { + size_t instr_num = loop_analysis_info->GetNumberOfInstructions(); + size_t bb_num = loop_analysis_info->GetNumberOfBasicBlocks(); + return (instr_num >= instr_threshold || bb_num >= bb_threshold); + } +}; + +// Custom implementation of loop helper for arm64 target. Enables heuristics for scalar loop +// peeling and unrolling and supports SIMD loop unrolling. +class Arm64LoopHelper : public ArchDefaultLoopHelper { + public: + // SIMD loop unrolling parameters and heuristics. + // + // Maximum possible unrolling factor. + static constexpr uint32_t kArm64SimdMaxUnrollFactor = 8; + // Loop's maximum instruction count. Loops with higher count will not be unrolled. + static constexpr uint32_t kArm64SimdHeuristicMaxBodySizeInstr = 50; + + // Loop's maximum instruction count. Loops with higher count will not be peeled/unrolled. + static constexpr uint32_t kArm64ScalarHeuristicMaxBodySizeInstr = 40; + // Loop's maximum basic block count. Loops with higher count will not be peeled/unrolled. + static constexpr uint32_t kArm64ScalarHeuristicMaxBodySizeBlocks = 8; + + bool IsLoopNonBeneficialForScalarOpts(LoopAnalysisInfo* loop_analysis_info) const override { + return IsLoopTooBig(loop_analysis_info, + kArm64ScalarHeuristicMaxBodySizeInstr, + kArm64ScalarHeuristicMaxBodySizeBlocks); + } + + uint32_t GetSIMDUnrollingFactor(HBasicBlock* block, + int64_t trip_count, + uint32_t max_peel, + uint32_t vector_length) const override { + // Don't unroll with insufficient iterations. + // TODO: Unroll loops with unknown trip count. + DCHECK_NE(vector_length, 0u); + if (trip_count < (2 * vector_length + max_peel)) { + return LoopAnalysisInfo::kNoUnrollingFactor; + } + // Don't unroll for large loop body size. + uint32_t instruction_count = block->GetInstructions().CountSize(); + if (instruction_count >= kArm64SimdHeuristicMaxBodySizeInstr) { + return LoopAnalysisInfo::kNoUnrollingFactor; + } + // Find a beneficial unroll factor with the following restrictions: + // - At least one iteration of the transformed loop should be executed. + // - The loop body shouldn't be "too big" (heuristic). + + uint32_t uf1 = kArm64SimdHeuristicMaxBodySizeInstr / instruction_count; + uint32_t uf2 = (trip_count - max_peel) / vector_length; + uint32_t unroll_factor = + TruncToPowerOfTwo(std::min({uf1, uf2, kArm64SimdMaxUnrollFactor})); + DCHECK_GE(unroll_factor, 1u); + return unroll_factor; + } +}; + +ArchNoOptsLoopHelper* ArchNoOptsLoopHelper::Create(InstructionSet isa, + ArenaAllocator* allocator) { + switch (isa) { + case InstructionSet::kArm64: { + return new (allocator) Arm64LoopHelper; + } + default: { + return new (allocator) ArchDefaultLoopHelper; + } + } +} + +} // namespace art diff --git a/compiler/optimizing/loop_analysis.h b/compiler/optimizing/loop_analysis.h new file mode 100644 index 0000000000..57509ee410 --- /dev/null +++ b/compiler/optimizing/loop_analysis.h @@ -0,0 +1,183 @@ +/* + * Copyright (C) 2018 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_LOOP_ANALYSIS_H_ +#define ART_COMPILER_OPTIMIZING_LOOP_ANALYSIS_H_ + +#include "nodes.h" + +namespace art { + +class InductionVarRange; +class LoopAnalysis; + +// Class to hold cached information on properties of the loop. +class LoopAnalysisInfo : public ValueObject { + public: + // No loop unrolling factor (just one copy of the loop-body). + static constexpr uint32_t kNoUnrollingFactor = 1; + // Used for unknown and non-constant trip counts (see InductionVarRange::HasKnownTripCount). + static constexpr int64_t kUnknownTripCount = -1; + + explicit LoopAnalysisInfo(HLoopInformation* loop_info) + : trip_count_(kUnknownTripCount), + bb_num_(0), + instr_num_(0), + exits_num_(0), + invariant_exits_num_(0), + has_instructions_preventing_scalar_peeling_(false), + has_instructions_preventing_scalar_unrolling_(false), + has_long_type_instructions_(false), + loop_info_(loop_info) {} + + int64_t GetTripCount() const { return trip_count_; } + size_t GetNumberOfBasicBlocks() const { return bb_num_; } + size_t GetNumberOfInstructions() const { return instr_num_; } + size_t GetNumberOfExits() const { return exits_num_; } + size_t GetNumberOfInvariantExits() const { return invariant_exits_num_; } + + bool HasInstructionsPreventingScalarPeeling() const { + return has_instructions_preventing_scalar_peeling_; + } + + bool HasInstructionsPreventingScalarUnrolling() const { + return has_instructions_preventing_scalar_unrolling_; + } + + bool HasInstructionsPreventingScalarOpts() const { + return HasInstructionsPreventingScalarPeeling() || HasInstructionsPreventingScalarUnrolling(); + } + + bool HasLongTypeInstructions() const { + return has_long_type_instructions_; + } + + HLoopInformation* GetLoopInfo() const { return loop_info_; } + + private: + // Trip count of the loop if known, kUnknownTripCount otherwise. + int64_t trip_count_; + // Number of basic blocks in the loop body. + size_t bb_num_; + // Number of instructions in the loop body. + size_t instr_num_; + // Number of loop's exits. + size_t exits_num_; + // Number of "if" loop exits (with HIf instruction) whose condition is loop-invariant. + size_t invariant_exits_num_; + // Whether the loop has instructions which make scalar loop peeling non-beneficial. + bool has_instructions_preventing_scalar_peeling_; + // Whether the loop has instructions which make scalar loop unrolling non-beneficial. + bool has_instructions_preventing_scalar_unrolling_; + // Whether the loop has instructions of primitive long type; unrolling these loop will + // likely introduce spill/fills on 32-bit targets. + bool has_long_type_instructions_; + + // Corresponding HLoopInformation. + HLoopInformation* loop_info_; + + friend class LoopAnalysis; +}; + +// Placeholder class for methods and routines used to analyse loops, calculate loop properties +// and characteristics. +class LoopAnalysis : public ValueObject { + public: + // Calculates loops basic properties like body size, exits number, etc. and fills + // 'analysis_results' with this information. + static void CalculateLoopBasicProperties(HLoopInformation* loop_info, + LoopAnalysisInfo* analysis_results, + int64_t trip_count); + + // Returns the trip count of the loop if it is known and kUnknownTripCount otherwise. + static int64_t GetLoopTripCount(HLoopInformation* loop_info, + const InductionVarRange* induction_range); + + private: + // Returns whether an instruction makes scalar loop peeling/unrolling non-beneficial. + // + // If in the loop body we have a dex/runtime call then its contribution to the whole + // loop performance will probably prevail. So peeling/unrolling optimization will not bring + // any noticeable performance improvement. It will increase the code size. + static bool MakesScalarPeelingUnrollingNonBeneficial(HInstruction* instruction) { + return (instruction->IsNewArray() || + instruction->IsNewInstance() || + instruction->IsUnresolvedInstanceFieldGet() || + instruction->IsUnresolvedInstanceFieldSet() || + instruction->IsUnresolvedStaticFieldGet() || + instruction->IsUnresolvedStaticFieldSet() || + // TODO: Support loops with intrinsified invokes. + instruction->IsInvoke()); + } +}; + +// +// Helper class which holds target-dependent methods and constants needed for loop optimizations. +// +// To support peeling/unrolling for a new architecture one needs to create new helper class, +// inherit it from this and add implementation for the following methods. +// +class ArchNoOptsLoopHelper : public ArenaObject<kArenaAllocOptimization> { + public: + virtual ~ArchNoOptsLoopHelper() {} + + // Creates an instance of specialised helper for the target or default helper if the target + // doesn't support loop peeling and unrolling. + static ArchNoOptsLoopHelper* Create(InstructionSet isa, ArenaAllocator* allocator); + + // Returns whether the loop is not beneficial for loop peeling/unrolling. + // + // For example, if the loop body has too many instructions then peeling/unrolling optimization + // will not bring any noticeable performance improvement however will increase the code size. + // + // Returns 'true' by default, should be overridden by particular target loop helper. + virtual bool IsLoopNonBeneficialForScalarOpts( + LoopAnalysisInfo* loop_analysis_info ATTRIBUTE_UNUSED) const { return true; } + + // Returns optimal scalar unrolling factor for the loop. + // + // Returns kNoUnrollingFactor by default, should be overridden by particular target loop helper. + virtual uint32_t GetScalarUnrollingFactor( + const LoopAnalysisInfo* analysis_info ATTRIBUTE_UNUSED) const { + return LoopAnalysisInfo::kNoUnrollingFactor; + } + + // Returns whether scalar loop peeling is enabled, + // + // Returns 'false' by default, should be overridden by particular target loop helper. + virtual bool IsLoopPeelingEnabled() const { return false; } + + // Returns whether it is beneficial to fully unroll the loop. + // + // Returns 'false' by default, should be overridden by particular target loop helper. + virtual bool IsFullUnrollingBeneficial(LoopAnalysisInfo* analysis_info ATTRIBUTE_UNUSED) const { + return false; + } + + // Returns optimal SIMD unrolling factor for the loop. + // + // Returns kNoUnrollingFactor by default, should be overridden by particular target loop helper. + virtual uint32_t GetSIMDUnrollingFactor(HBasicBlock* block ATTRIBUTE_UNUSED, + int64_t trip_count ATTRIBUTE_UNUSED, + uint32_t max_peel ATTRIBUTE_UNUSED, + uint32_t vector_length ATTRIBUTE_UNUSED) const { + return LoopAnalysisInfo::kNoUnrollingFactor; + } +}; + +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_LOOP_ANALYSIS_H_ diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc index 9f278a9f4e..6c76ab858b 100644 --- a/compiler/optimizing/loop_optimization.cc +++ b/compiler/optimizing/loop_optimization.cc @@ -23,7 +23,7 @@ #include "arch/mips64/instruction_set_features_mips64.h" #include "arch/x86/instruction_set_features_x86.h" #include "arch/x86_64/instruction_set_features_x86_64.h" -#include "driver/compiler_driver.h" +#include "driver/compiler_options.h" #include "linear_order.h" #include "mirror/array-inl.h" #include "mirror/string.h" @@ -33,9 +33,6 @@ namespace art { // Enables vectorization (SIMDization) in the loop optimizer. static constexpr bool kEnableVectorization = true; -// No loop unrolling factor (just one copy of the loop-body). -static constexpr uint32_t kNoUnrollingFactor = 1; - // // Static helpers. // @@ -227,6 +224,7 @@ static bool IsNarrowerOperands(HInstruction* a, /*out*/ HInstruction** r, /*out*/ HInstruction** s, /*out*/ bool* is_unsigned) { + DCHECK(a != nullptr && b != nullptr); // Look for a matching sign extension. DataType::Type stype = HVecOperation::ToSignedType(type); if (IsSignExtensionAndGet(a, stype, r) && IsSignExtensionAndGet(b, stype, s)) { @@ -247,6 +245,7 @@ static bool IsNarrowerOperand(HInstruction* a, DataType::Type type, /*out*/ HInstruction** r, /*out*/ bool* is_unsigned) { + DCHECK(a != nullptr); // Look for a matching sign extension. DataType::Type stype = HVecOperation::ToSignedType(type); if (IsSignExtensionAndGet(a, stype, r)) { @@ -270,20 +269,28 @@ static uint32_t GetOtherVL(DataType::Type other_type, DataType::Type vector_type return vl >> (DataType::SizeShift(other_type) - DataType::SizeShift(vector_type)); } -// Detect up to two instructions a and b, and an acccumulated constant c. -static bool IsAddConstHelper(HInstruction* instruction, - /*out*/ HInstruction** a, - /*out*/ HInstruction** b, - /*out*/ int64_t* c, - int32_t depth) { - static constexpr int32_t kMaxDepth = 8; // don't search too deep +// Detect up to two added operands a and b and an acccumulated constant c. +static bool IsAddConst(HInstruction* instruction, + /*out*/ HInstruction** a, + /*out*/ HInstruction** b, + /*out*/ int64_t* c, + int32_t depth = 8) { // don't search too deep int64_t value = 0; + // Enter add/sub while still within reasonable depth. + if (depth > 0) { + if (instruction->IsAdd()) { + return IsAddConst(instruction->InputAt(0), a, b, c, depth - 1) && + IsAddConst(instruction->InputAt(1), a, b, c, depth - 1); + } else if (instruction->IsSub() && + IsInt64AndGet(instruction->InputAt(1), &value)) { + *c -= value; + return IsAddConst(instruction->InputAt(0), a, b, c, depth - 1); + } + } + // Otherwise, deal with leaf nodes. if (IsInt64AndGet(instruction, &value)) { *c += value; return true; - } else if (instruction->IsAdd() && depth <= kMaxDepth) { - return IsAddConstHelper(instruction->InputAt(0), a, b, c, depth + 1) && - IsAddConstHelper(instruction->InputAt(1), a, b, c, depth + 1); } else if (*a == nullptr) { *a = instruction; return true; @@ -291,42 +298,40 @@ static bool IsAddConstHelper(HInstruction* instruction, *b = instruction; return true; } - return false; // too many non-const operands + return false; // too many operands } -// Detect a + b + c for an optional constant c. -static bool IsAddConst(HInstruction* instruction, - /*out*/ HInstruction** a, - /*out*/ HInstruction** b, - /*out*/ int64_t* c) { - if (instruction->IsAdd()) { - // Try to find a + b and accumulated c. - if (IsAddConstHelper(instruction->InputAt(0), a, b, c, /*depth*/ 0) && - IsAddConstHelper(instruction->InputAt(1), a, b, c, /*depth*/ 0) && - *b != nullptr) { - return true; +// Detect a + b + c with optional constant c. +static bool IsAddConst2(HGraph* graph, + HInstruction* instruction, + /*out*/ HInstruction** a, + /*out*/ HInstruction** b, + /*out*/ int64_t* c) { + if (IsAddConst(instruction, a, b, c) && *a != nullptr) { + if (*b == nullptr) { + // Constant is usually already present, unless accumulated. + *b = graph->GetConstant(instruction->GetType(), (*c)); + *c = 0; } - // Found a + b. - *a = instruction->InputAt(0); - *b = instruction->InputAt(1); - *c = 0; return true; } return false; } -// Detect a + c for constant c. -static bool IsAddConst(HInstruction* instruction, - /*out*/ HInstruction** a, - /*out*/ int64_t* c) { - if (instruction->IsAdd()) { - if (IsInt64AndGet(instruction->InputAt(0), c)) { - *a = instruction->InputAt(1); - return true; - } else if (IsInt64AndGet(instruction->InputAt(1), c)) { - *a = instruction->InputAt(0); - return true; - } +// Detect a direct a - b or a hidden a - (-c). +static bool IsSubConst2(HGraph* graph, + HInstruction* instruction, + /*out*/ HInstruction** a, + /*out*/ HInstruction** b) { + int64_t c = 0; + if (instruction->IsSub()) { + *a = instruction->InputAt(0); + *b = instruction->InputAt(1); + return true; + } else if (IsAddConst(instruction, a, b, &c) && *a != nullptr && *b == nullptr) { + // Constant for the hidden subtraction. + *b = graph->GetConstant(instruction->GetType(), -c); + return true; } return false; } @@ -346,7 +351,10 @@ static bool HasReductionFormat(HInstruction* reduction, HInstruction* phi) { // Translates vector operation to reduction kind. static HVecReduce::ReductionKind GetReductionKind(HVecOperation* reduction) { - if (reduction->IsVecAdd() || reduction->IsVecSub() || reduction->IsVecSADAccumulate()) { + if (reduction->IsVecAdd() || + reduction->IsVecSub() || + reduction->IsVecSADAccumulate() || + reduction->IsVecDotProd()) { return HVecReduce::kSum; } LOG(FATAL) << "Unsupported SIMD reduction " << reduction->GetId(); @@ -380,17 +388,82 @@ static bool CheckInductionSetFullyRemoved(ScopedArenaSet<HInstruction*>* iset) { return true; } +// Tries to statically evaluate condition of the specified "HIf" for other condition checks. +static void TryToEvaluateIfCondition(HIf* instruction, HGraph* graph) { + HInstruction* cond = instruction->InputAt(0); + + // If a condition 'cond' is evaluated in an HIf instruction then in the successors of the + // IF_BLOCK we statically know the value of the condition 'cond' (TRUE in TRUE_SUCC, FALSE in + // FALSE_SUCC). Using that we can replace another evaluation (use) EVAL of the same 'cond' + // with TRUE value (FALSE value) if every path from the ENTRY_BLOCK to EVAL_BLOCK contains the + // edge HIF_BLOCK->TRUE_SUCC (HIF_BLOCK->FALSE_SUCC). + // if (cond) { if(cond) { + // if (cond) {} if (1) {} + // } else { =======> } else { + // if (cond) {} if (0) {} + // } } + if (!cond->IsConstant()) { + HBasicBlock* true_succ = instruction->IfTrueSuccessor(); + HBasicBlock* false_succ = instruction->IfFalseSuccessor(); + + DCHECK_EQ(true_succ->GetPredecessors().size(), 1u); + DCHECK_EQ(false_succ->GetPredecessors().size(), 1u); + + const HUseList<HInstruction*>& uses = cond->GetUses(); + for (auto it = uses.begin(), end = uses.end(); it != end; /* ++it below */) { + HInstruction* user = it->GetUser(); + size_t index = it->GetIndex(); + HBasicBlock* user_block = user->GetBlock(); + // Increment `it` now because `*it` may disappear thanks to user->ReplaceInput(). + ++it; + if (true_succ->Dominates(user_block)) { + user->ReplaceInput(graph->GetIntConstant(1), index); + } else if (false_succ->Dominates(user_block)) { + user->ReplaceInput(graph->GetIntConstant(0), index); + } + } + } +} + +// Peel the first 'count' iterations of the loop. +static void PeelByCount(HLoopInformation* loop_info, + int count, + InductionVarRange* induction_range) { + for (int i = 0; i < count; i++) { + // Perform peeling. + PeelUnrollSimpleHelper helper(loop_info, induction_range); + helper.DoPeeling(); + } +} + +// Returns the narrower type out of instructions a and b types. +static DataType::Type GetNarrowerType(HInstruction* a, HInstruction* b) { + DataType::Type type = a->GetType(); + if (DataType::Size(b->GetType()) < DataType::Size(type)) { + type = b->GetType(); + } + if (a->IsTypeConversion() && + DataType::Size(a->InputAt(0)->GetType()) < DataType::Size(type)) { + type = a->InputAt(0)->GetType(); + } + if (b->IsTypeConversion() && + DataType::Size(b->InputAt(0)->GetType()) < DataType::Size(type)) { + type = b->InputAt(0)->GetType(); + } + return type; +} + // // Public methods. // HLoopOptimization::HLoopOptimization(HGraph* graph, - CompilerDriver* compiler_driver, + const CompilerOptions* compiler_options, HInductionVarAnalysis* induction_analysis, OptimizingCompilerStats* stats, const char* name) : HOptimization(graph, name, stats), - compiler_driver_(compiler_driver), + compiler_options_(compiler_options), induction_range_(induction_analysis), loop_allocator_(nullptr), global_allocator_(graph_->GetAllocator()), @@ -411,14 +484,18 @@ HLoopOptimization::HLoopOptimization(HGraph* graph, vector_preheader_(nullptr), vector_header_(nullptr), vector_body_(nullptr), - vector_index_(nullptr) { + vector_index_(nullptr), + arch_loop_helper_(ArchNoOptsLoopHelper::Create(compiler_options_ != nullptr + ? compiler_options_->GetInstructionSet() + : InstructionSet::kNone, + global_allocator_)) { } -void HLoopOptimization::Run() { +bool HLoopOptimization::Run() { // Skip if there is no loop or the graph has try-catch/irreducible loops. // TODO: make this less of a sledgehammer. if (!graph_->HasLoops() || graph_->HasTryCatch() || graph_->HasIrreducibleLoops()) { - return; + return false; } // Phase-local allocator. @@ -426,7 +503,7 @@ void HLoopOptimization::Run() { loop_allocator_ = &allocator; // Perform loop optimizations. - LocalRun(); + bool didLoopOpt = LocalRun(); if (top_loop_ == nullptr) { graph_->SetHasLoops(false); // no more loops } @@ -434,13 +511,16 @@ void HLoopOptimization::Run() { // Detach. loop_allocator_ = nullptr; last_loop_ = top_loop_ = nullptr; + + return didLoopOpt; } // // Loop setup and traversal. // -void HLoopOptimization::LocalRun() { +bool HLoopOptimization::LocalRun() { + bool didLoopOpt = false; // Build the linear order using the phase-local allocator. This step enables building // a loop hierarchy that properly reflects the outer-inner and previous-next relation. ScopedArenaVector<HBasicBlock*> linear_order(loop_allocator_->Adapter(kArenaAllocLinearOrder)); @@ -472,7 +552,7 @@ void HLoopOptimization::LocalRun() { vector_map_ = ↦ vector_permanent_map_ = &perm; // Traverse. - TraverseLoopsInnerToOuter(top_loop_); + didLoopOpt = TraverseLoopsInnerToOuter(top_loop_); // Detach. iset_ = nullptr; reductions_ = nullptr; @@ -480,6 +560,7 @@ void HLoopOptimization::LocalRun() { vector_map_ = nullptr; vector_permanent_map_ = nullptr; } + return didLoopOpt; } void HLoopOptimization::AddLoop(HLoopInformation* loop_info) { @@ -536,6 +617,7 @@ bool HLoopOptimization::TraverseLoopsInnerToOuter(LoopNode* node) { // loop if the induction of any inner loop has changed. if (TraverseLoopsInnerToOuter(node->inner)) { induction_range_.ReVisit(node->loop_info); + changed = true; } // Repeat simplifications in the loop-body until no more changes occur. // Note that since each simplification consists of eliminating code (without @@ -622,7 +704,7 @@ void HLoopOptimization::SimplifyBlocks(LoopNode* node) { } } -bool HLoopOptimization::OptimizeInnerLoop(LoopNode* node) { +bool HLoopOptimization::TryOptimizeInnerLoopFinite(LoopNode* node) { HBasicBlock* header = node->loop_info->GetHeader(); HBasicBlock* preheader = node->loop_info->GetPreHeader(); // Ensure loop header logic is finite. @@ -692,6 +774,146 @@ bool HLoopOptimization::OptimizeInnerLoop(LoopNode* node) { return false; } +bool HLoopOptimization::OptimizeInnerLoop(LoopNode* node) { + return TryOptimizeInnerLoopFinite(node) || TryPeelingAndUnrolling(node); +} + + + +// +// Scalar loop peeling and unrolling: generic part methods. +// + +bool HLoopOptimization::TryUnrollingForBranchPenaltyReduction(LoopAnalysisInfo* analysis_info, + bool generate_code) { + if (analysis_info->GetNumberOfExits() > 1) { + return false; + } + + uint32_t unrolling_factor = arch_loop_helper_->GetScalarUnrollingFactor(analysis_info); + if (unrolling_factor == LoopAnalysisInfo::kNoUnrollingFactor) { + return false; + } + + if (generate_code) { + // TODO: support other unrolling factors. + DCHECK_EQ(unrolling_factor, 2u); + + // Perform unrolling. + HLoopInformation* loop_info = analysis_info->GetLoopInfo(); + PeelUnrollSimpleHelper helper(loop_info, &induction_range_); + helper.DoUnrolling(); + + // Remove the redundant loop check after unrolling. + HIf* copy_hif = + helper.GetBasicBlockMap()->Get(loop_info->GetHeader())->GetLastInstruction()->AsIf(); + int32_t constant = loop_info->Contains(*copy_hif->IfTrueSuccessor()) ? 1 : 0; + copy_hif->ReplaceInput(graph_->GetIntConstant(constant), 0u); + } + return true; +} + +bool HLoopOptimization::TryPeelingForLoopInvariantExitsElimination(LoopAnalysisInfo* analysis_info, + bool generate_code) { + HLoopInformation* loop_info = analysis_info->GetLoopInfo(); + if (!arch_loop_helper_->IsLoopPeelingEnabled()) { + return false; + } + + if (analysis_info->GetNumberOfInvariantExits() == 0) { + return false; + } + + if (generate_code) { + // Perform peeling. + PeelUnrollSimpleHelper helper(loop_info, &induction_range_); + helper.DoPeeling(); + + // Statically evaluate loop check after peeling for loop invariant condition. + const SuperblockCloner::HInstructionMap* hir_map = helper.GetInstructionMap(); + for (auto entry : *hir_map) { + HInstruction* copy = entry.second; + if (copy->IsIf()) { + TryToEvaluateIfCondition(copy->AsIf(), graph_); + } + } + } + + return true; +} + +bool HLoopOptimization::TryFullUnrolling(LoopAnalysisInfo* analysis_info, bool generate_code) { + // Fully unroll loops with a known and small trip count. + int64_t trip_count = analysis_info->GetTripCount(); + if (!arch_loop_helper_->IsLoopPeelingEnabled() || + trip_count == LoopAnalysisInfo::kUnknownTripCount || + !arch_loop_helper_->IsFullUnrollingBeneficial(analysis_info)) { + return false; + } + + if (generate_code) { + // Peeling of the N first iterations (where N equals to the trip count) will effectively + // eliminate the loop: after peeling we will have N sequential iterations copied into the loop + // preheader and the original loop. The trip count of this loop will be 0 as the sequential + // iterations are executed first and there are exactly N of them. Thus we can statically + // evaluate the loop exit condition to 'false' and fully eliminate it. + // + // Here is an example of full unrolling of a loop with a trip count 2: + // + // loop_cond_1 + // loop_body_1 <- First iteration. + // | + // \ v + // ==\ loop_cond_2 + // ==/ loop_body_2 <- Second iteration. + // / | + // <- v <- + // loop_cond \ loop_cond \ <- This cond is always false. + // loop_body _/ loop_body _/ + // + HLoopInformation* loop_info = analysis_info->GetLoopInfo(); + PeelByCount(loop_info, trip_count, &induction_range_); + HIf* loop_hif = loop_info->GetHeader()->GetLastInstruction()->AsIf(); + int32_t constant = loop_info->Contains(*loop_hif->IfTrueSuccessor()) ? 0 : 1; + loop_hif->ReplaceInput(graph_->GetIntConstant(constant), 0u); + } + + return true; +} + +bool HLoopOptimization::TryPeelingAndUnrolling(LoopNode* node) { + // Don't run peeling/unrolling if compiler_options_ is nullptr (i.e., running under tests) + // as InstructionSet is needed. + if (compiler_options_ == nullptr) { + return false; + } + + HLoopInformation* loop_info = node->loop_info; + int64_t trip_count = LoopAnalysis::GetLoopTripCount(loop_info, &induction_range_); + LoopAnalysisInfo analysis_info(loop_info); + LoopAnalysis::CalculateLoopBasicProperties(loop_info, &analysis_info, trip_count); + + if (analysis_info.HasInstructionsPreventingScalarOpts() || + arch_loop_helper_->IsLoopNonBeneficialForScalarOpts(&analysis_info)) { + return false; + } + + if (!TryFullUnrolling(&analysis_info, /*generate_code*/ false) && + !TryPeelingForLoopInvariantExitsElimination(&analysis_info, /*generate_code*/ false) && + !TryUnrollingForBranchPenaltyReduction(&analysis_info, /*generate_code*/ false)) { + return false; + } + + // Run 'IsLoopClonable' the last as it might be time-consuming. + if (!PeelUnrollHelper::IsLoopClonable(loop_info)) { + return false; + } + + return TryFullUnrolling(&analysis_info) || + TryPeelingForLoopInvariantExitsElimination(&analysis_info) || + TryUnrollingForBranchPenaltyReduction(&analysis_info); +} + // // Loop vectorization. The implementation is based on the book by Aart J.C. Bik: // "The Software Vectorization Handbook. Applying Multimedia Extensions for Maximum Performance." @@ -822,7 +1044,8 @@ void HLoopOptimization::Vectorize(LoopNode* node, HBasicBlock* preheader = node->loop_info->GetPreHeader(); // Pick a loop unrolling factor for the vector loop. - uint32_t unroll = GetUnrollingFactor(block, trip_count); + uint32_t unroll = arch_loop_helper_->GetSIMDUnrollingFactor( + block, trip_count, MaxNumberPeeled(), vector_length_); uint32_t chunk = vector_length_ * unroll; DCHECK(trip_count == 0 || (trip_count >= MaxNumberPeeled() + chunk)); @@ -927,7 +1150,7 @@ void HLoopOptimization::Vectorize(LoopNode* node, vector_index_, ptc, graph_->GetConstant(induc_type, 1), - kNoUnrollingFactor); + LoopAnalysisInfo::kNoUnrollingFactor); } // Generate vector loop, possibly further unrolled: @@ -954,7 +1177,7 @@ void HLoopOptimization::Vectorize(LoopNode* node, vector_index_, stc, graph_->GetConstant(induc_type, 1), - kNoUnrollingFactor); + LoopAnalysisInfo::kNoUnrollingFactor); } // Link reductions to their final uses. @@ -1061,6 +1284,11 @@ bool HLoopOptimization::VectorizeDef(LoopNode* node, HInstruction* index = instruction->InputAt(1); HInstruction* value = instruction->InputAt(2); HInstruction* offset = nullptr; + // For narrow types, explicit type conversion may have been + // optimized way, so set the no hi bits restriction here. + if (DataType::Size(type) <= 2) { + restrictions |= kNoHiBits; + } if (TrySetVectorType(type, &restrictions) && node->loop_info->IsDefinedOutOfTheLoop(base) && induction_range_.IsUnitStride(instruction, index, graph_, &offset) && @@ -1083,6 +1311,7 @@ bool HLoopOptimization::VectorizeDef(LoopNode* node, DataType::Type type = instruction->GetType(); // Recognize SAD idiom or direct reduction. if (VectorizeSADIdiom(node, instruction, generate_code, type, restrictions) || + VectorizeDotProdIdiom(node, instruction, generate_code, type, restrictions) || (TrySetVectorType(type, &restrictions) && VectorizeUse(node, instruction, generate_code, type, restrictions))) { if (generate_code) { @@ -1275,49 +1504,37 @@ bool HLoopOptimization::VectorizeUse(LoopNode* node, return true; } } - } else if (instruction->IsInvokeStaticOrDirect()) { - // Accept particular intrinsics. - HInvokeStaticOrDirect* invoke = instruction->AsInvokeStaticOrDirect(); - switch (invoke->GetIntrinsic()) { - case Intrinsics::kMathAbsInt: - case Intrinsics::kMathAbsLong: - case Intrinsics::kMathAbsFloat: - case Intrinsics::kMathAbsDouble: { - // Deal with vector restrictions. - HInstruction* opa = instruction->InputAt(0); - HInstruction* r = opa; - bool is_unsigned = false; - if (HasVectorRestrictions(restrictions, kNoAbs)) { - return false; - } else if (HasVectorRestrictions(restrictions, kNoHiBits) && - (!IsNarrowerOperand(opa, type, &r, &is_unsigned) || is_unsigned)) { - return false; // reject, unless operand is sign-extension narrower - } - // Accept ABS(x) for vectorizable operand. - DCHECK(r != nullptr); - if (generate_code && vector_mode_ != kVector) { // de-idiom - r = opa; - } - if (VectorizeUse(node, r, generate_code, type, restrictions)) { - if (generate_code) { - GenerateVecOp(instruction, - vector_map_->Get(r), - nullptr, - HVecOperation::ToProperType(type, is_unsigned)); - } - return true; - } - return false; + } else if (instruction->IsAbs()) { + // Deal with vector restrictions. + HInstruction* opa = instruction->InputAt(0); + HInstruction* r = opa; + bool is_unsigned = false; + if (HasVectorRestrictions(restrictions, kNoAbs)) { + return false; + } else if (HasVectorRestrictions(restrictions, kNoHiBits) && + (!IsNarrowerOperand(opa, type, &r, &is_unsigned) || is_unsigned)) { + return false; // reject, unless operand is sign-extension narrower + } + // Accept ABS(x) for vectorizable operand. + DCHECK(r != nullptr); + if (generate_code && vector_mode_ != kVector) { // de-idiom + r = opa; + } + if (VectorizeUse(node, r, generate_code, type, restrictions)) { + if (generate_code) { + GenerateVecOp(instruction, + vector_map_->Get(r), + nullptr, + HVecOperation::ToProperType(type, is_unsigned)); } - default: - return false; - } // switch + return true; + } } return false; } uint32_t HLoopOptimization::GetVectorSizeInBytes() { - switch (compiler_driver_->GetInstructionSet()) { + switch (compiler_options_->GetInstructionSet()) { case InstructionSet::kArm: case InstructionSet::kThumb2: return 8; // 64-bit SIMD @@ -1327,8 +1544,8 @@ uint32_t HLoopOptimization::GetVectorSizeInBytes() { } bool HLoopOptimization::TrySetVectorType(DataType::Type type, uint64_t* restrictions) { - const InstructionSetFeatures* features = compiler_driver_->GetInstructionSetFeatures(); - switch (compiler_driver_->GetInstructionSet()) { + const InstructionSetFeatures* features = compiler_options_->GetInstructionSetFeatures(); + switch (compiler_options_->GetInstructionSet()) { case InstructionSet::kArm: case InstructionSet::kThumb2: // Allow vectorization for all ARM devices, because Android assumes that @@ -1337,11 +1554,11 @@ bool HLoopOptimization::TrySetVectorType(DataType::Type type, uint64_t* restrict case DataType::Type::kBool: case DataType::Type::kUint8: case DataType::Type::kInt8: - *restrictions |= kNoDiv | kNoReduction; + *restrictions |= kNoDiv | kNoReduction | kNoDotProd; return TrySetVectorLength(8); case DataType::Type::kUint16: case DataType::Type::kInt16: - *restrictions |= kNoDiv | kNoStringCharAt | kNoReduction; + *restrictions |= kNoDiv | kNoStringCharAt | kNoReduction | kNoDotProd; return TrySetVectorLength(4); case DataType::Type::kInt32: *restrictions |= kNoDiv | kNoWideSAD; @@ -1386,12 +1603,23 @@ bool HLoopOptimization::TrySetVectorType(DataType::Type type, uint64_t* restrict case DataType::Type::kBool: case DataType::Type::kUint8: case DataType::Type::kInt8: - *restrictions |= - kNoMul | kNoDiv | kNoShift | kNoAbs | kNoSignedHAdd | kNoUnroundedHAdd | kNoSAD; + *restrictions |= kNoMul | + kNoDiv | + kNoShift | + kNoAbs | + kNoSignedHAdd | + kNoUnroundedHAdd | + kNoSAD | + kNoDotProd; return TrySetVectorLength(16); case DataType::Type::kUint16: case DataType::Type::kInt16: - *restrictions |= kNoDiv | kNoAbs | kNoSignedHAdd | kNoUnroundedHAdd | kNoSAD; + *restrictions |= kNoDiv | + kNoAbs | + kNoSignedHAdd | + kNoUnroundedHAdd | + kNoSAD| + kNoDotProd; return TrySetVectorLength(8); case DataType::Type::kInt32: *restrictions |= kNoDiv | kNoSAD; @@ -1416,11 +1644,11 @@ bool HLoopOptimization::TrySetVectorType(DataType::Type type, uint64_t* restrict case DataType::Type::kBool: case DataType::Type::kUint8: case DataType::Type::kInt8: - *restrictions |= kNoDiv; + *restrictions |= kNoDiv | kNoDotProd; return TrySetVectorLength(16); case DataType::Type::kUint16: case DataType::Type::kInt16: - *restrictions |= kNoDiv | kNoStringCharAt; + *restrictions |= kNoDiv | kNoStringCharAt | kNoDotProd; return TrySetVectorLength(8); case DataType::Type::kInt32: *restrictions |= kNoDiv; @@ -1445,11 +1673,11 @@ bool HLoopOptimization::TrySetVectorType(DataType::Type type, uint64_t* restrict case DataType::Type::kBool: case DataType::Type::kUint8: case DataType::Type::kInt8: - *restrictions |= kNoDiv; + *restrictions |= kNoDiv | kNoDotProd; return TrySetVectorLength(16); case DataType::Type::kUint16: case DataType::Type::kInt16: - *restrictions |= kNoDiv | kNoStringCharAt; + *restrictions |= kNoDiv | kNoStringCharAt | kNoDotProd; return TrySetVectorLength(8); case DataType::Type::kInt32: *restrictions |= kNoDiv; @@ -1751,57 +1979,11 @@ void HLoopOptimization::GenerateVecOp(HInstruction* org, GENERATE_VEC( new (global_allocator_) HVecUShr(global_allocator_, opa, opb, type, vector_length_, dex_pc), new (global_allocator_) HUShr(org_type, opa, opb, dex_pc)); - case HInstruction::kInvokeStaticOrDirect: { - HInvokeStaticOrDirect* invoke = org->AsInvokeStaticOrDirect(); - if (vector_mode_ == kVector) { - switch (invoke->GetIntrinsic()) { - case Intrinsics::kMathAbsInt: - case Intrinsics::kMathAbsLong: - case Intrinsics::kMathAbsFloat: - case Intrinsics::kMathAbsDouble: - DCHECK(opb == nullptr); - vector = new (global_allocator_) - HVecAbs(global_allocator_, opa, type, vector_length_, dex_pc); - break; - default: - LOG(FATAL) << "Unsupported SIMD intrinsic " << org->GetId(); - UNREACHABLE(); - } // switch invoke - } else { - // In scalar code, simply clone the method invoke, and replace its operands with the - // corresponding new scalar instructions in the loop. The instruction will get an - // environment while being inserted from the instruction map in original program order. - DCHECK(vector_mode_ == kSequential); - size_t num_args = invoke->GetNumberOfArguments(); - HInvokeStaticOrDirect* new_invoke = new (global_allocator_) HInvokeStaticOrDirect( - global_allocator_, - num_args, - invoke->GetType(), - invoke->GetDexPc(), - invoke->GetDexMethodIndex(), - invoke->GetResolvedMethod(), - invoke->GetDispatchInfo(), - invoke->GetInvokeType(), - invoke->GetTargetMethod(), - invoke->GetClinitCheckRequirement()); - HInputsRef inputs = invoke->GetInputs(); - size_t num_inputs = inputs.size(); - DCHECK_LE(num_args, num_inputs); - DCHECK_EQ(num_inputs, new_invoke->GetInputs().size()); // both invokes agree - for (size_t index = 0; index < num_inputs; ++index) { - HInstruction* new_input = index < num_args - ? vector_map_->Get(inputs[index]) - : inputs[index]; // beyond arguments: just pass through - new_invoke->SetArgumentAt(index, new_input); - } - new_invoke->SetIntrinsic(invoke->GetIntrinsic(), - kNeedsEnvironmentOrCache, - kNoSideEffects, - kNoThrow); - vector = new_invoke; - } - break; - } + case HInstruction::kAbs: + DCHECK(opb == nullptr); + GENERATE_VEC( + new (global_allocator_) HVecAbs(global_allocator_, opa, type, vector_length_, dex_pc), + new (global_allocator_) HAbs(org_type, opa, dex_pc)); default: break; } // switch @@ -1838,8 +2020,7 @@ bool HLoopOptimization::VectorizeHalvingAddIdiom(LoopNode* node, HInstruction* a = nullptr; HInstruction* b = nullptr; int64_t c = 0; - if (IsAddConst(instruction->InputAt(0), /*out*/ &a, /*out*/ &b, /*out*/ &c)) { - DCHECK(a != nullptr && b != nullptr); + if (IsAddConst2(graph_, instruction->InputAt(0), /*out*/ &a, /*out*/ &b, /*out*/ &c)) { // Accept c == 1 (rounded) or c == 0 (not rounded). bool is_rounded = false; if (c == 1) { @@ -1861,8 +2042,7 @@ bool HLoopOptimization::VectorizeHalvingAddIdiom(LoopNode* node, } // Accept recognized halving add for vectorizable operands. Vectorized code uses the // shorthand idiomatic operation. Sequential code uses the original scalar expressions. - DCHECK(r != nullptr); - DCHECK(s != nullptr); + DCHECK(r != nullptr && s != nullptr); if (generate_code && vector_mode_ != kVector) { // de-idiom r = instruction->InputAt(0); s = instruction->InputAt(1); @@ -1912,21 +2092,11 @@ bool HLoopOptimization::VectorizeSADIdiom(LoopNode* node, HInstruction* v = instruction->InputAt(1); HInstruction* a = nullptr; HInstruction* b = nullptr; - if (v->IsInvokeStaticOrDirect() && - (v->AsInvokeStaticOrDirect()->GetIntrinsic() == Intrinsics::kMathAbsInt || - v->AsInvokeStaticOrDirect()->GetIntrinsic() == Intrinsics::kMathAbsLong)) { - HInstruction* x = v->InputAt(0); - if (x->GetType() == reduction_type) { - int64_t c = 0; - if (x->IsSub()) { - a = x->InputAt(0); - b = x->InputAt(1); - } else if (IsAddConst(x, /*out*/ &a, /*out*/ &c)) { - b = graph_->GetConstant(reduction_type, -c); // hidden SUB! - } - } - } - if (a == nullptr || b == nullptr) { + if (v->IsAbs() && + v->GetType() == reduction_type && + IsSubConst2(graph_, v->InputAt(0), /*out*/ &a, /*out*/ &b)) { + DCHECK(a != nullptr && b != nullptr); + } else { return false; } // Accept same-type or consistent sign extension for narrower-type on operands a and b. @@ -1935,18 +2105,7 @@ bool HLoopOptimization::VectorizeSADIdiom(LoopNode* node, HInstruction* r = a; HInstruction* s = b; bool is_unsigned = false; - DataType::Type sub_type = a->GetType(); - if (DataType::Size(b->GetType()) < DataType::Size(sub_type)) { - sub_type = b->GetType(); - } - if (a->IsTypeConversion() && - DataType::Size(a->InputAt(0)->GetType()) < DataType::Size(sub_type)) { - sub_type = a->InputAt(0)->GetType(); - } - if (b->IsTypeConversion() && - DataType::Size(b->InputAt(0)->GetType()) < DataType::Size(sub_type)) { - sub_type = b->InputAt(0)->GetType(); - } + DataType::Type sub_type = GetNarrowerType(a, b); if (reduction_type != sub_type && (!IsNarrowerOperands(a, b, sub_type, &r, &s, &is_unsigned) || is_unsigned)) { return false; @@ -1959,8 +2118,7 @@ bool HLoopOptimization::VectorizeSADIdiom(LoopNode* node, } // Accept SAD idiom for vectorizable operands. Vectorized code uses the shorthand // idiomatic operation. Sequential code uses the original scalar expressions. - DCHECK(r != nullptr); - DCHECK(s != nullptr); + DCHECK(r != nullptr && s != nullptr); if (generate_code && vector_mode_ != kVector) { // de-idiom r = s = v->InputAt(0); } @@ -1968,14 +2126,13 @@ bool HLoopOptimization::VectorizeSADIdiom(LoopNode* node, VectorizeUse(node, r, generate_code, sub_type, restrictions) && VectorizeUse(node, s, generate_code, sub_type, restrictions)) { if (generate_code) { - reduction_type = HVecOperation::ToProperType(reduction_type, is_unsigned); if (vector_mode_ == kVector) { vector_map_->Put(instruction, new (global_allocator_) HVecSADAccumulate( global_allocator_, vector_map_->Get(q), vector_map_->Get(r), vector_map_->Get(s), - reduction_type, + HVecOperation::ToProperType(reduction_type, is_unsigned), GetOtherVL(reduction_type, sub_type, vector_length_), kNoDexPc)); MaybeRecordStat(stats_, MethodCompilationStat::kLoopVectorizedIdiom); @@ -1989,6 +2146,75 @@ bool HLoopOptimization::VectorizeSADIdiom(LoopNode* node, return false; } +// Method recognises the following dot product idiom: +// q += a * b for operands a, b whose type is narrower than the reduction one. +// Provided that the operands have the same type or are promoted to a wider form. +// Since this may involve a vector length change, the idiom is handled by going directly +// to a dot product node (rather than relying combining finer grained nodes later). +bool HLoopOptimization::VectorizeDotProdIdiom(LoopNode* node, + HInstruction* instruction, + bool generate_code, + DataType::Type reduction_type, + uint64_t restrictions) { + if (!instruction->IsAdd() || (reduction_type != DataType::Type::kInt32)) { + return false; + } + + HInstruction* q = instruction->InputAt(0); + HInstruction* v = instruction->InputAt(1); + if (!v->IsMul() || v->GetType() != reduction_type) { + return false; + } + + HInstruction* a = v->InputAt(0); + HInstruction* b = v->InputAt(1); + HInstruction* r = a; + HInstruction* s = b; + DataType::Type op_type = GetNarrowerType(a, b); + bool is_unsigned = false; + + if (!IsNarrowerOperands(a, b, op_type, &r, &s, &is_unsigned)) { + return false; + } + op_type = HVecOperation::ToProperType(op_type, is_unsigned); + + if (!TrySetVectorType(op_type, &restrictions) || + HasVectorRestrictions(restrictions, kNoDotProd)) { + return false; + } + + DCHECK(r != nullptr && s != nullptr); + // Accept dot product idiom for vectorizable operands. Vectorized code uses the shorthand + // idiomatic operation. Sequential code uses the original scalar expressions. + if (generate_code && vector_mode_ != kVector) { // de-idiom + r = a; + s = b; + } + if (VectorizeUse(node, q, generate_code, op_type, restrictions) && + VectorizeUse(node, r, generate_code, op_type, restrictions) && + VectorizeUse(node, s, generate_code, op_type, restrictions)) { + if (generate_code) { + if (vector_mode_ == kVector) { + vector_map_->Put(instruction, new (global_allocator_) HVecDotProd( + global_allocator_, + vector_map_->Get(q), + vector_map_->Get(r), + vector_map_->Get(s), + reduction_type, + is_unsigned, + GetOtherVL(reduction_type, op_type, vector_length_), + kNoDexPc)); + MaybeRecordStat(stats_, MethodCompilationStat::kLoopVectorizedIdiom); + } else { + GenerateVecOp(v, vector_map_->Get(r), vector_map_->Get(s), reduction_type); + GenerateVecOp(instruction, vector_map_->Get(q), vector_map_->Get(v), reduction_type); + } + } + return true; + } + return false; +} + // // Vectorization heuristics. // @@ -2048,41 +2274,6 @@ bool HLoopOptimization::IsVectorizationProfitable(int64_t trip_count) { return true; } -static constexpr uint32_t ARM64_SIMD_MAXIMUM_UNROLL_FACTOR = 8; -static constexpr uint32_t ARM64_SIMD_HEURISTIC_MAX_BODY_SIZE = 50; - -uint32_t HLoopOptimization::GetUnrollingFactor(HBasicBlock* block, int64_t trip_count) { - uint32_t max_peel = MaxNumberPeeled(); - switch (compiler_driver_->GetInstructionSet()) { - case InstructionSet::kArm64: { - // Don't unroll with insufficient iterations. - // TODO: Unroll loops with unknown trip count. - DCHECK_NE(vector_length_, 0u); - if (trip_count < (2 * vector_length_ + max_peel)) { - return kNoUnrollingFactor; - } - // Don't unroll for large loop body size. - uint32_t instruction_count = block->GetInstructions().CountSize(); - if (instruction_count >= ARM64_SIMD_HEURISTIC_MAX_BODY_SIZE) { - return kNoUnrollingFactor; - } - // Find a beneficial unroll factor with the following restrictions: - // - At least one iteration of the transformed loop should be executed. - // - The loop body shouldn't be "too big" (heuristic). - uint32_t uf1 = ARM64_SIMD_HEURISTIC_MAX_BODY_SIZE / instruction_count; - uint32_t uf2 = (trip_count - max_peel) / vector_length_; - uint32_t unroll_factor = - TruncToPowerOfTwo(std::min({uf1, uf2, ARM64_SIMD_MAXIMUM_UNROLL_FACTOR})); - DCHECK_GE(unroll_factor, 1u); - return unroll_factor; - } - case InstructionSet::kX86: - case InstructionSet::kX86_64: - default: - return kNoUnrollingFactor; - } -} - // // Helpers. // diff --git a/compiler/optimizing/loop_optimization.h b/compiler/optimizing/loop_optimization.h index d70751037b..1a842c4bf3 100644 --- a/compiler/optimizing/loop_optimization.h +++ b/compiler/optimizing/loop_optimization.h @@ -20,12 +20,15 @@ #include "base/scoped_arena_allocator.h" #include "base/scoped_arena_containers.h" #include "induction_var_range.h" +#include "loop_analysis.h" #include "nodes.h" #include "optimization.h" +#include "superblock_cloner.h" namespace art { -class CompilerDriver; +class CompilerOptions; +class ArchNoOptsLoopHelper; /** * Loop optimizations. Builds a loop hierarchy and applies optimizations to @@ -35,12 +38,12 @@ class CompilerDriver; class HLoopOptimization : public HOptimization { public: HLoopOptimization(HGraph* graph, - CompilerDriver* compiler_driver, + const CompilerOptions* compiler_options, HInductionVarAnalysis* induction_analysis, OptimizingCompilerStats* stats, const char* name = kLoopOptimizationPassName); - void Run() OVERRIDE; + bool Run() override; static constexpr const char* kLoopOptimizationPassName = "loop_optimization"; @@ -79,6 +82,7 @@ class HLoopOptimization : public HOptimization { kNoReduction = 1 << 9, // no reduction kNoSAD = 1 << 10, // no sum of absolute differences (SAD) kNoWideSAD = 1 << 11, // no sum of absolute differences (SAD) with operand widening + kNoDotProd = 1 << 12, // no dot product }; /* @@ -118,7 +122,7 @@ class HLoopOptimization : public HOptimization { // Loop setup and traversal. // - void LocalRun(); + bool LocalRun(); void AddLoop(HLoopInformation* loop_info); void RemoveLoop(LoopNode* node); @@ -133,10 +137,34 @@ class HLoopOptimization : public HOptimization { void SimplifyInduction(LoopNode* node); void SimplifyBlocks(LoopNode* node); - // Performs optimizations specific to inner loop (empty loop removal, + // Performs optimizations specific to inner loop with finite header logic (empty loop removal, // unrolling, vectorization). Returns true if anything changed. + bool TryOptimizeInnerLoopFinite(LoopNode* node); + + // Performs optimizations specific to inner loop. Returns true if anything changed. bool OptimizeInnerLoop(LoopNode* node); + // Tries to apply loop unrolling for branch penalty reduction and better instruction scheduling + // opportunities. Returns whether transformation happened. 'generate_code' determines whether the + // optimization should be actually applied. + bool TryUnrollingForBranchPenaltyReduction(LoopAnalysisInfo* analysis_info, + bool generate_code = true); + + // Tries to apply loop peeling for loop invariant exits elimination. Returns whether + // transformation happened. 'generate_code' determines whether the optimization should be + // actually applied. + bool TryPeelingForLoopInvariantExitsElimination(LoopAnalysisInfo* analysis_info, + bool generate_code = true); + + // Tries to perform whole loop unrolling for a small loop with a small trip count to eliminate + // the loop check overhead and to have more opportunities for inter-iteration optimizations. + // Returns whether transformation happened. 'generate_code' determines whether the optimization + // should be actually applied. + bool TryFullUnrolling(LoopAnalysisInfo* analysis_info, bool generate_code = true); + + // Tries to apply scalar loop peeling and unrolling. + bool TryPeelingAndUnrolling(LoopNode* node); + // // Vectorization analysis and synthesis. // @@ -175,6 +203,11 @@ class HLoopOptimization : public HOptimization { DataType::Type type); // Vectorization idioms. + bool VectorizeSaturationIdiom(LoopNode* node, + HInstruction* instruction, + bool generate_code, + DataType::Type type, + uint64_t restrictions); bool VectorizeHalvingAddIdiom(LoopNode* node, HInstruction* instruction, bool generate_code, @@ -185,6 +218,11 @@ class HLoopOptimization : public HOptimization { bool generate_code, DataType::Type type, uint64_t restrictions); + bool VectorizeDotProdIdiom(LoopNode* node, + HInstruction* instruction, + bool generate_code, + DataType::Type type, + uint64_t restrictions); // Vectorization heuristics. Alignment ComputeAlignment(HInstruction* offset, @@ -195,7 +233,6 @@ class HLoopOptimization : public HOptimization { const ArrayReference* peeling_candidate); uint32_t MaxNumberPeeled(); bool IsVectorizationProfitable(int64_t trip_count); - uint32_t GetUnrollingFactor(HBasicBlock* block, int64_t trip_count); // // Helpers. @@ -225,8 +262,8 @@ class HLoopOptimization : public HOptimization { void RemoveDeadInstructions(const HInstructionList& list); bool CanRemoveCycle(); // Whether the current 'iset_' is removable. - // Compiler driver (to query ISA features). - const CompilerDriver* compiler_driver_; + // Compiler options (to query ISA features). + const CompilerOptions* compiler_options_; // Range information based on prior induction variable analysis. InductionVarRange induction_range_; @@ -289,6 +326,9 @@ class HLoopOptimization : public HOptimization { HBasicBlock* vector_body_; // body of the new loop HInstruction* vector_index_; // normalized index of the new loop + // Helper for target-specific behaviour for loop optimizations. + ArchNoOptsLoopHelper* arch_loop_helper_; + friend class LoopOptimizationTest; DISALLOW_COPY_AND_ASSIGN(HLoopOptimization); diff --git a/compiler/optimizing/loop_optimization_test.cc b/compiler/optimizing/loop_optimization_test.cc index db8368986c..310d98b5b0 100644 --- a/compiler/optimizing/loop_optimization_test.cc +++ b/compiler/optimizing/loop_optimization_test.cc @@ -29,7 +29,8 @@ class LoopOptimizationTest : public OptimizingUnitTest { LoopOptimizationTest() : graph_(CreateGraph()), iva_(new (GetAllocator()) HInductionVarAnalysis(graph_)), - loop_opt_(new (GetAllocator()) HLoopOptimization(graph_, nullptr, iva_, nullptr)) { + loop_opt_(new (GetAllocator()) HLoopOptimization( + graph_, /* compiler_options= */ nullptr, iva_, /* stats= */ nullptr)) { BuildGraph(); } @@ -227,11 +228,14 @@ TEST_F(LoopOptimizationTest, SimplifyLoopReoderPredecessors) { graph_->ClearDominanceInformation(); graph_->BuildDominatorTree(); + // BuildDominatorTree inserts a block beetween loop header and entry block. + EXPECT_EQ(header->GetPredecessors()[0]->GetSinglePredecessor(), entry_block_); + // Check that after optimizations in BuildDominatorTree()/SimplifyCFG() phi inputs // are still mapped correctly to the block predecessors. for (size_t i = 0, e = phi->InputCount(); i < e; i++) { HInstruction* input = phi->InputAt(i); - ASSERT_TRUE(input->GetBlock()->Dominates(header->GetPredecessors()[i])); + EXPECT_TRUE(input->GetBlock()->Dominates(header->GetPredecessors()[i])); } } diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index f6ba19f22a..1940d55a9d 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -20,8 +20,10 @@ #include "art_method-inl.h" #include "base/bit_utils.h" #include "base/bit_vector-inl.h" +#include "base/logging.h" #include "base/stl_util.h" #include "class_linker-inl.h" +#include "class_root.h" #include "code_generator.h" #include "common_dominator.h" #include "intrinsics.h" @@ -40,10 +42,9 @@ static constexpr bool kEnableFloatingPointStaticEvaluation = (FLT_EVAL_METHOD == void HGraph::InitializeInexactObjectRTI(VariableSizedHandleScope* handles) { ScopedObjectAccess soa(Thread::Current()); // Create the inexact Object reference type and store it in the HGraph. - ClassLinker* linker = Runtime::Current()->GetClassLinker(); inexact_object_rti_ = ReferenceTypeInfo::Create( - handles->NewHandle(linker->GetClassRoot(ClassLinker::kJavaLangObject)), - /* is_exact */ false); + handles->NewHandle(GetClassRoot<mirror::Object>()), + /* is_exact= */ false); } void HGraph::AddBlock(HBasicBlock* block) { @@ -59,7 +60,7 @@ void HGraph::FindBackEdges(ArenaBitVector* visited) { ScopedArenaAllocator allocator(GetArenaStack()); // Nodes that we're currently visiting, indexed by block id. ArenaBitVector visiting( - &allocator, blocks_.size(), /* expandable */ false, kArenaAllocGraphBuilder); + &allocator, blocks_.size(), /* expandable= */ false, kArenaAllocGraphBuilder); visiting.ClearAllBits(); // Number of successors visited from a given node, indexed by block id. ScopedArenaVector<size_t> successors_visited(blocks_.size(), @@ -146,7 +147,9 @@ void HGraph::RemoveInstructionsAsUsersFromDeadBlocks(const ArenaBitVector& visit if (!visited.IsBitSet(i)) { HBasicBlock* block = blocks_[i]; if (block == nullptr) continue; - DCHECK(block->GetPhis().IsEmpty()) << "Phis are not inserted at this stage"; + for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { + RemoveAsUser(it.Current()); + } for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { RemoveAsUser(it.Current()); } @@ -688,7 +691,7 @@ HCurrentMethod* HGraph::GetCurrentMethod() { } const char* HGraph::GetMethodName() const { - const DexFile::MethodId& method_id = dex_file_.GetMethodId(method_idx_); + const dex::MethodId& method_id = dex_file_.GetMethodId(method_idx_); return dex_file_.GetMethodName(method_id); } @@ -825,7 +828,7 @@ void HLoopInformation::Populate() { ScopedArenaAllocator allocator(graph->GetArenaStack()); ArenaBitVector visited(&allocator, graph->GetBlocks().size(), - /* expandable */ false, + /* expandable= */ false, kArenaAllocGraphBuilder); visited.ClearAllBits(); // Stop marking blocks at the loop header. @@ -1121,6 +1124,23 @@ void HEnvironment::RemoveAsUserOfInput(size_t index) const { user->FixUpUserRecordsAfterEnvUseRemoval(before_env_use_node); } +void HEnvironment::ReplaceInput(HInstruction* replacement, size_t index) { + const HUserRecord<HEnvironment*>& env_use_record = vregs_[index]; + HInstruction* orig_instr = env_use_record.GetInstruction(); + + DCHECK(orig_instr != replacement); + + HUseList<HEnvironment*>::iterator before_use_node = env_use_record.GetBeforeUseNode(); + // Note: fixup_end remains valid across splice_after(). + auto fixup_end = replacement->env_uses_.empty() ? replacement->env_uses_.begin() + : ++replacement->env_uses_.begin(); + replacement->env_uses_.splice_after(replacement->env_uses_.before_begin(), + env_use_record.GetInstruction()->env_uses_, + before_use_node); + replacement->FixUpUserRecordsAfterEnvUseInsertion(fixup_end); + orig_instr->FixUpUserRecordsAfterEnvUseRemoval(before_use_node); +} + HInstruction* HInstruction::GetNextDisregardingMoves() const { HInstruction* next = GetNext(); while (next != nullptr && next->IsParallelMove()) { @@ -1213,7 +1233,7 @@ bool HInstructionList::FoundBefore(const HInstruction* instruction1, } } LOG(FATAL) << "Did not find an order between two instructions of the same block."; - return true; + UNREACHABLE(); } bool HInstruction::StrictlyDominates(HInstruction* other_instruction) const { @@ -1236,7 +1256,7 @@ bool HInstruction::StrictlyDominates(HInstruction* other_instruction) const { } else { // There is no order among phis. LOG(FATAL) << "There is no dominance between phis of a same block."; - return false; + UNREACHABLE(); } } else { // `this` is not a phi. @@ -1284,6 +1304,28 @@ void HInstruction::ReplaceUsesDominatedBy(HInstruction* dominator, HInstruction* ++it; if (dominator->StrictlyDominates(user)) { user->ReplaceInput(replacement, index); + } else if (user->IsPhi() && !user->AsPhi()->IsCatchPhi()) { + // If the input flows from a block dominated by `dominator`, we can replace it. + // We do not perform this for catch phis as we don't have control flow support + // for their inputs. + const ArenaVector<HBasicBlock*>& predecessors = user->GetBlock()->GetPredecessors(); + HBasicBlock* predecessor = predecessors[index]; + if (dominator->GetBlock()->Dominates(predecessor)) { + user->ReplaceInput(replacement, index); + } + } + } +} + +void HInstruction::ReplaceEnvUsesDominatedBy(HInstruction* dominator, HInstruction* replacement) { + const HUseList<HEnvironment*>& uses = GetEnvUses(); + for (auto it = uses.begin(), end = uses.end(); it != end; /* ++it below */) { + HEnvironment* user = it->GetUser(); + size_t index = it->GetIndex(); + // Increment `it` now because `*it` may disappear thanks to user->ReplaceInput(). + ++it; + if (dominator->StrictlyDominates(user->GetHolder())) { + user->ReplaceInput(replacement, index); } } } @@ -1680,10 +1722,9 @@ bool HCondition::IsBeforeWhenDisregardMoves(HInstruction* instruction) const { } bool HInstruction::Equals(const HInstruction* other) const { - if (!InstructionTypeEquals(other)) return false; - DCHECK_EQ(GetKind(), other->GetKind()); - if (!InstructionDataEquals(other)) return false; + if (GetKind() != other->GetKind()) return false; if (GetType() != other->GetType()) return false; + if (!InstructionDataEquals(other)) return false; HConstInputsRef inputs = GetInputs(); HConstInputsRef other_inputs = other->GetInputs(); if (inputs.size() != other_inputs.size()) return false; @@ -1698,7 +1739,7 @@ bool HInstruction::Equals(const HInstruction* other) const { std::ostream& operator<<(std::ostream& os, const HInstruction::InstructionKind& rhs) { #define DECLARE_CASE(type, super) case HInstruction::k##type: os << #type; break; switch (rhs) { - FOR_EACH_INSTRUCTION(DECLARE_CASE) + FOR_EACH_CONCRETE_INSTRUCTION(DECLARE_CASE) default: os << "Unknown instruction kind " << static_cast<int>(rhs); break; @@ -1952,6 +1993,11 @@ bool HBasicBlock::EndsWithControlFlowInstruction() const { return !GetInstructions().IsEmpty() && GetLastInstruction()->IsControlFlow(); } +bool HBasicBlock::EndsWithReturn() const { + return !GetInstructions().IsEmpty() && + (GetLastInstruction()->IsReturn() || GetLastInstruction()->IsReturnVoid()); +} + bool HBasicBlock::EndsWithIf() const { return !GetInstructions().IsEmpty() && GetLastInstruction()->IsIf(); } @@ -2483,7 +2529,7 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { current->SetGraph(outer_graph); outer_graph->AddBlock(current); outer_graph->reverse_post_order_[++index_of_at] = current; - UpdateLoopAndTryInformationOfNewBlock(current, at, /* replace_if_back_edge */ false); + UpdateLoopAndTryInformationOfNewBlock(current, at, /* replace_if_back_edge= */ false); } } @@ -2493,7 +2539,7 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { outer_graph->reverse_post_order_[++index_of_at] = to; // Only `to` can become a back edge, as the inlined blocks // are predecessors of `to`. - UpdateLoopAndTryInformationOfNewBlock(to, at, /* replace_if_back_edge */ true); + UpdateLoopAndTryInformationOfNewBlock(to, at, /* replace_if_back_edge= */ true); // Update all predecessors of the exit block (now the `to` block) // to not `HReturn` but `HGoto` instead. Special case throwing blocks @@ -2667,13 +2713,13 @@ void HGraph::TransformLoopHeaderForBCE(HBasicBlock* header) { DCHECK((old_pre_header->GetLoopInformation() == nullptr) || !old_pre_header->GetLoopInformation()->IsBackEdge(*old_pre_header)); UpdateLoopAndTryInformationOfNewBlock( - if_block, old_pre_header, /* replace_if_back_edge */ false); + if_block, old_pre_header, /* replace_if_back_edge= */ false); UpdateLoopAndTryInformationOfNewBlock( - true_block, old_pre_header, /* replace_if_back_edge */ false); + true_block, old_pre_header, /* replace_if_back_edge= */ false); UpdateLoopAndTryInformationOfNewBlock( - false_block, old_pre_header, /* replace_if_back_edge */ false); + false_block, old_pre_header, /* replace_if_back_edge= */ false); UpdateLoopAndTryInformationOfNewBlock( - new_pre_header, old_pre_header, /* replace_if_back_edge */ false); + new_pre_header, old_pre_header, /* replace_if_back_edge= */ false); } HBasicBlock* HGraph::TransformLoopForVectorization(HBasicBlock* header, @@ -2765,6 +2811,14 @@ void HInstruction::SetReferenceTypeInfo(ReferenceTypeInfo rti) { SetPackedFlag<kFlagReferenceTypeIsExact>(rti.IsExact()); } +bool HBoundType::InstructionDataEquals(const HInstruction* other) const { + const HBoundType* other_bt = other->AsBoundType(); + ScopedObjectAccess soa(Thread::Current()); + return GetUpperBound().IsEqual(other_bt->GetUpperBound()) && + GetUpperCanBeNull() == other_bt->GetUpperCanBeNull() && + CanBeNull() == other_bt->CanBeNull(); +} + void HBoundType::SetUpperBound(const ReferenceTypeInfo& upper_bound, bool can_be_null) { if (kIsDebugBuild) { ScopedObjectAccess soa(Thread::Current()); @@ -2850,8 +2904,7 @@ void HInvoke::SetIntrinsic(Intrinsics intrinsic, } bool HNewInstance::IsStringAlloc() const { - ScopedObjectAccess soa(Thread::Current()); - return GetReferenceTypeInfo().IsStringClass(); + return GetEntrypoint() == kQuickAllocStringObject; } bool HInvoke::NeedsEnvironment() const { @@ -2889,10 +2942,12 @@ std::ostream& operator<<(std::ostream& os, HInvokeStaticOrDirect::MethodLoadKind return os << "Recursive"; case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative: return os << "BootImageLinkTimePcRelative"; - case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: - return os << "DirectAddress"; + case HInvokeStaticOrDirect::MethodLoadKind::kBootImageRelRo: + return os << "BootImageRelRo"; case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: return os << "BssEntry"; + case HInvokeStaticOrDirect::MethodLoadKind::kJitDirectAddress: + return os << "JitDirectAddress"; case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall: return os << "RuntimeCall"; default: @@ -2924,8 +2979,8 @@ bool HLoadClass::InstructionDataEquals(const HInstruction* other) const { return false; } switch (GetLoadKind()) { - case LoadKind::kBootImageAddress: - case LoadKind::kBootImageClassTable: + case LoadKind::kBootImageRelRo: + case LoadKind::kJitBootImageAddress: case LoadKind::kJitTableAddress: { ScopedObjectAccess soa(Thread::Current()); return GetClass().Get() == other_load_class->GetClass().Get(); @@ -2942,12 +2997,12 @@ std::ostream& operator<<(std::ostream& os, HLoadClass::LoadKind rhs) { return os << "ReferrersClass"; case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: return os << "BootImageLinkTimePcRelative"; - case HLoadClass::LoadKind::kBootImageAddress: - return os << "BootImageAddress"; - case HLoadClass::LoadKind::kBootImageClassTable: - return os << "BootImageClassTable"; + case HLoadClass::LoadKind::kBootImageRelRo: + return os << "BootImageRelRo"; case HLoadClass::LoadKind::kBssEntry: return os << "BssEntry"; + case HLoadClass::LoadKind::kJitBootImageAddress: + return os << "JitBootImageAddress"; case HLoadClass::LoadKind::kJitTableAddress: return os << "JitTableAddress"; case HLoadClass::LoadKind::kRuntimeCall: @@ -2967,8 +3022,8 @@ bool HLoadString::InstructionDataEquals(const HInstruction* other) const { return false; } switch (GetLoadKind()) { - case LoadKind::kBootImageAddress: - case LoadKind::kBootImageInternTable: + case LoadKind::kBootImageRelRo: + case LoadKind::kJitBootImageAddress: case LoadKind::kJitTableAddress: { ScopedObjectAccess soa(Thread::Current()); return GetString().Get() == other_load_string->GetString().Get(); @@ -2982,12 +3037,12 @@ std::ostream& operator<<(std::ostream& os, HLoadString::LoadKind rhs) { switch (rhs) { case HLoadString::LoadKind::kBootImageLinkTimePcRelative: return os << "BootImageLinkTimePcRelative"; - case HLoadString::LoadKind::kBootImageAddress: - return os << "BootImageAddress"; - case HLoadString::LoadKind::kBootImageInternTable: - return os << "BootImageInternTable"; + case HLoadString::LoadKind::kBootImageRelRo: + return os << "BootImageRelRo"; case HLoadString::LoadKind::kBssEntry: return os << "BssEntry"; + case HLoadString::LoadKind::kJitBootImageAddress: + return os << "JitBootImageAddress"; case HLoadString::LoadKind::kJitTableAddress: return os << "JitTableAddress"; case HLoadString::LoadKind::kRuntimeCall: @@ -3101,6 +3156,8 @@ std::ostream& operator<<(std::ostream& os, TypeCheckKind rhs) { return os << "array_object_check"; case TypeCheckKind::kArrayCheck: return os << "array_check"; + case TypeCheckKind::kBitstringCheck: + return os << "bitstring_check"; default: LOG(FATAL) << "Unknown TypeCheckKind: " << static_cast<int>(rhs); UNREACHABLE(); @@ -3126,4 +3183,77 @@ std::ostream& operator<<(std::ostream& os, const MemBarrierKind& kind) { } } +// Check that intrinsic enum values fit within space set aside in ArtMethod modifier flags. +#define CHECK_INTRINSICS_ENUM_VALUES(Name, InvokeType, _, SideEffects, Exceptions, ...) \ + static_assert( \ + static_cast<uint32_t>(Intrinsics::k ## Name) <= (kAccIntrinsicBits >> CTZ(kAccIntrinsicBits)), \ + "Instrinsics enumeration space overflow."); +#include "intrinsics_list.h" + INTRINSICS_LIST(CHECK_INTRINSICS_ENUM_VALUES) +#undef INTRINSICS_LIST +#undef CHECK_INTRINSICS_ENUM_VALUES + +// Function that returns whether an intrinsic needs an environment or not. +static inline IntrinsicNeedsEnvironmentOrCache NeedsEnvironmentOrCacheIntrinsic(Intrinsics i) { + switch (i) { + case Intrinsics::kNone: + return kNeedsEnvironmentOrCache; // Non-sensical for intrinsic. +#define OPTIMIZING_INTRINSICS(Name, InvokeType, NeedsEnvOrCache, SideEffects, Exceptions, ...) \ + case Intrinsics::k ## Name: \ + return NeedsEnvOrCache; +#include "intrinsics_list.h" + INTRINSICS_LIST(OPTIMIZING_INTRINSICS) +#undef INTRINSICS_LIST +#undef OPTIMIZING_INTRINSICS + } + return kNeedsEnvironmentOrCache; +} + +// Function that returns whether an intrinsic has side effects. +static inline IntrinsicSideEffects GetSideEffectsIntrinsic(Intrinsics i) { + switch (i) { + case Intrinsics::kNone: + return kAllSideEffects; +#define OPTIMIZING_INTRINSICS(Name, InvokeType, NeedsEnvOrCache, SideEffects, Exceptions, ...) \ + case Intrinsics::k ## Name: \ + return SideEffects; +#include "intrinsics_list.h" + INTRINSICS_LIST(OPTIMIZING_INTRINSICS) +#undef INTRINSICS_LIST +#undef OPTIMIZING_INTRINSICS + } + return kAllSideEffects; +} + +// Function that returns whether an intrinsic can throw exceptions. +static inline IntrinsicExceptions GetExceptionsIntrinsic(Intrinsics i) { + switch (i) { + case Intrinsics::kNone: + return kCanThrow; +#define OPTIMIZING_INTRINSICS(Name, InvokeType, NeedsEnvOrCache, SideEffects, Exceptions, ...) \ + case Intrinsics::k ## Name: \ + return Exceptions; +#include "intrinsics_list.h" + INTRINSICS_LIST(OPTIMIZING_INTRINSICS) +#undef INTRINSICS_LIST +#undef OPTIMIZING_INTRINSICS + } + return kCanThrow; +} + +void HInvoke::SetResolvedMethod(ArtMethod* method) { + // TODO: b/65872996 The intent is that polymorphic signature methods should + // be compiler intrinsics. At present, they are only interpreter intrinsics. + if (method != nullptr && + method->IsIntrinsic() && + !method->IsPolymorphicSignature()) { + Intrinsics intrinsic = static_cast<Intrinsics>(method->GetIntrinsic()); + SetIntrinsic(intrinsic, + NeedsEnvironmentOrCacheIntrinsic(intrinsic), + GetSideEffectsIntrinsic(intrinsic), + GetExceptionsIntrinsic(intrinsic)); + } + resolved_method_ = method; +} + } // namespace art diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index fe992a7f39..fedad0c69a 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -26,9 +26,11 @@ #include "base/arena_object.h" #include "base/array_ref.h" #include "base/iteration_range.h" +#include "base/mutex.h" #include "base/quasi_atomic.h" #include "base/stl_util.h" #include "base/transform_array_ref.h" +#include "art_method.h" #include "data_type.h" #include "deoptimization_kind.h" #include "dex/dex_file.h" @@ -41,6 +43,7 @@ #include "intrinsics_enum.h" #include "locations.h" #include "mirror/class.h" +#include "mirror/method_type.h" #include "offsets.h" #include "utils/intrusive_forward_list.h" @@ -127,6 +130,7 @@ enum GraphAnalysisResult { kAnalysisInvalidBytecode, kAnalysisFailThrowCatchLoop, kAnalysisFailAmbiguousArrayOp, + kAnalysisFailIrreducibleLoopAndStringInit, kAnalysisSuccess, }; @@ -313,6 +317,7 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { uint32_t method_idx, InstructionSet instruction_set, InvokeType invoke_type = kInvalidInvokeType, + bool dead_reference_safe = false, bool debuggable = false, bool osr = false, int start_instruction_id = 0) @@ -332,6 +337,7 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { has_simd_(false), has_loops_(false), has_irreducible_loops_(false), + dead_reference_safe_(dead_reference_safe), debuggable_(debuggable), current_instruction_id_(start_instruction_id), dex_file_(dex_file), @@ -522,6 +528,12 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { has_bounds_checks_ = value; } + // Is the code known to be robust against eliminating dead references + // and the effects of early finalization? + bool IsDeadReferenceSafe() const { return dead_reference_safe_; } + + void MarkDeadReferenceUnsafe() { dead_reference_safe_ = false; } + bool IsDebuggable() const { return debuggable_; } // Returns a constant of the given type and value. If it does not exist @@ -700,6 +712,14 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { // so there might be false positives. bool has_irreducible_loops_; + // Is the code known to be robust against eliminating dead references + // and the effects of early finalization? If false, dead reference variables + // are kept if they might be visible to the garbage collector. + // Currently this means that the class was declared to be dead-reference-safe, + // the method accesses no reachability-sensitive fields or data, and the same + // is true for any methods that were inlined into the current one. + bool dead_reference_safe_; + // Indicates whether the graph should be compiled in a way that // ensures full debuggability. If false, we can apply more // aggressive optimizations that may limit the level of debugging. @@ -891,7 +911,7 @@ class TryCatchInformation : public ArenaObject<kArenaAllocTryCatchInfo> { explicit TryCatchInformation(const HTryBoundary& try_entry) : try_entry_(&try_entry), catch_dex_file_(nullptr), - catch_type_index_(DexFile::kDexNoIndex16) { + catch_type_index_(dex::TypeIndex::Invalid()) { DCHECK(try_entry_ != nullptr); } @@ -910,9 +930,9 @@ class TryCatchInformation : public ArenaObject<kArenaAllocTryCatchInfo> { bool IsCatchBlock() const { return catch_dex_file_ != nullptr; } - bool IsCatchAllTypeIndex() const { + bool IsValidTypeIndex() const { DCHECK(IsCatchBlock()); - return !catch_type_index_.IsValid(); + return catch_type_index_.IsValid(); } dex::TypeIndex GetCatchTypeIndex() const { @@ -925,6 +945,10 @@ class TryCatchInformation : public ArenaObject<kArenaAllocTryCatchInfo> { return *catch_dex_file_; } + void SetInvalidTypeIndex() { + catch_type_index_ = dex::TypeIndex::Invalid(); + } + private: // One of possibly several TryBoundary instructions entering the block's try. // Only set for try blocks. @@ -932,7 +956,7 @@ class TryCatchInformation : public ArenaObject<kArenaAllocTryCatchInfo> { // Exception type information. Only set for catch blocks. const DexFile* catch_dex_file_; - const dex::TypeIndex catch_type_index_; + dex::TypeIndex catch_type_index_; }; static constexpr size_t kNoLifetime = -1; @@ -1284,6 +1308,7 @@ class HBasicBlock : public ArenaObject<kArenaAllocBasicBlock> { void SetLifetimeEnd(size_t end) { lifetime_end_ = end; } bool EndsWithControlFlowInstruction() const; + bool EndsWithReturn() const; bool EndsWithIf() const; bool EndsWithTryBoundary() const; bool HasSinglePhi() const; @@ -1338,6 +1363,7 @@ class HLoopInformationOutwardIterator : public ValueObject { #define FOR_EACH_CONCRETE_INSTRUCTION_COMMON(M) \ M(Above, Condition) \ M(AboveOrEqual, Condition) \ + M(Abs, UnaryOperation) \ M(Add, BinaryOperation) \ M(And, BinaryOperation) \ M(ArrayGet, Instruction) \ @@ -1377,13 +1403,18 @@ class HLoopInformationOutwardIterator : public ValueObject { M(InvokeStaticOrDirect, Invoke) \ M(InvokeVirtual, Invoke) \ M(InvokePolymorphic, Invoke) \ + M(InvokeCustom, Invoke) \ M(LessThan, Condition) \ M(LessThanOrEqual, Condition) \ M(LoadClass, Instruction) \ M(LoadException, Instruction) \ + M(LoadMethodHandle, Instruction) \ + M(LoadMethodType, Instruction) \ M(LoadString, Instruction) \ M(LongConstant, Constant) \ + M(Max, Instruction) \ M(MemoryBarrier, Instruction) \ + M(Min, BinaryOperation) \ M(MonitorOperation, Instruction) \ M(Mul, BinaryOperation) \ M(NativeDebugInfo, Instruction) \ @@ -1437,12 +1468,15 @@ class HLoopInformationOutwardIterator : public ValueObject { M(VecAndNot, VecBinaryOperation) \ M(VecOr, VecBinaryOperation) \ M(VecXor, VecBinaryOperation) \ + M(VecSaturationAdd, VecBinaryOperation) \ + M(VecSaturationSub, VecBinaryOperation) \ M(VecShl, VecBinaryOperation) \ M(VecShr, VecBinaryOperation) \ M(VecUShr, VecBinaryOperation) \ M(VecSetScalars, VecOperation) \ M(VecMultiplyAccumulate, VecOperation) \ M(VecSADAccumulate, VecOperation) \ + M(VecDotProd, VecOperation) \ M(VecLoad, VecMemoryOperation) \ M(VecStore, VecMemoryOperation) \ @@ -1484,6 +1518,14 @@ class HLoopInformationOutwardIterator : public ValueObject { M(X86PackedSwitch, Instruction) #endif +#if defined(ART_ENABLE_CODEGEN_x86) || defined(ART_ENABLE_CODEGEN_x86_64) +#define FOR_EACH_CONCRETE_INSTRUCTION_X86_COMMON(M) \ + M(X86AndNot, Instruction) \ + M(X86MaskOrResetLeastSetBit, Instruction) +#else +#define FOR_EACH_CONCRETE_INSTRUCTION_X86_COMMON(M) +#endif + #define FOR_EACH_CONCRETE_INSTRUCTION_X86_64(M) #define FOR_EACH_CONCRETE_INSTRUCTION(M) \ @@ -1494,7 +1536,8 @@ class HLoopInformationOutwardIterator : public ValueObject { FOR_EACH_CONCRETE_INSTRUCTION_MIPS(M) \ FOR_EACH_CONCRETE_INSTRUCTION_MIPS64(M) \ FOR_EACH_CONCRETE_INSTRUCTION_X86(M) \ - FOR_EACH_CONCRETE_INSTRUCTION_X86_64(M) + FOR_EACH_CONCRETE_INSTRUCTION_X86_64(M) \ + FOR_EACH_CONCRETE_INSTRUCTION_X86_COMMON(M) #define FOR_EACH_ABSTRACT_INSTRUCTION(M) \ M(Condition, BinaryOperation) \ @@ -1519,23 +1562,17 @@ FOR_EACH_INSTRUCTION(FORWARD_DECLARATION) private: \ H##type& operator=(const H##type&) = delete; \ public: \ - const char* DebugName() const OVERRIDE { return #type; } \ - bool InstructionTypeEquals(const HInstruction* other) const OVERRIDE { \ - return other->Is##type(); \ - } \ - HInstruction* Clone(ArenaAllocator* arena) const OVERRIDE { \ + const char* DebugName() const override { return #type; } \ + HInstruction* Clone(ArenaAllocator* arena) const override { \ DCHECK(IsClonable()); \ return new (arena) H##type(*this->As##type()); \ } \ - void Accept(HGraphVisitor* visitor) OVERRIDE + void Accept(HGraphVisitor* visitor) override #define DECLARE_ABSTRACT_INSTRUCTION(type) \ private: \ H##type& operator=(const H##type&) = delete; \ - public: \ - bool Is##type() const { return As##type() != nullptr; } \ - const H##type* As##type() const { return this; } \ - H##type* As##type() { return this; } + public: #define DEFAULT_COPY_CONSTRUCTOR(type) \ explicit H##type(const H##type& other) = default; @@ -1622,6 +1659,21 @@ using HConstInputsRef = TransformArrayRef<const HUserRecord<HInstruction*>, HInp * the same, and any reference read depends on any reference read without * further regard of its type). * + * kDependsOnGCBit is defined in the following way: instructions with kDependsOnGCBit must not be + * alive across the point where garbage collection might happen. + * + * Note: Instructions with kCanTriggerGCBit do not depend on each other. + * + * kCanTriggerGCBit must be used for instructions for which GC might happen on the path across + * those instructions from the compiler perspective (between this instruction and the next one + * in the IR). + * + * Note: Instructions which can cause GC only on a fatal slow path do not need + * kCanTriggerGCBit as the execution never returns to the instruction next to the exceptional + * one. However the execution may return to compiled code if there is a catch block in the + * current method; for this purpose the TryBoundary exit instruction has kCanTriggerGCBit + * set. + * * The internal representation uses 38-bit and is described in the table below. * The first line indicates the side effect, and for field/array accesses the * second line indicates the type of the access (in the order of the @@ -1694,10 +1746,17 @@ class SideEffects : public ValueObject { return SideEffects(TypeFlag(type, kArrayReadOffset)); } + // Returns whether GC might happen across this instruction from the compiler perspective so + // the next instruction in the IR would see that. + // + // See the SideEffect class comments. static SideEffects CanTriggerGC() { return SideEffects(1ULL << kCanTriggerGCBit); } + // Returns whether the instruction must not be alive across a GC point. + // + // See the SideEffect class comments. static SideEffects DependsOnGC() { return SideEffects(1ULL << kDependsOnGCBit); } @@ -1906,6 +1965,11 @@ class HEnvironment : public ArenaObject<kArenaAllocEnvironment> { void RemoveAsUserOfInput(size_t index) const; + // Replaces the input at the position 'index' with the replacement; the replacement and old + // input instructions' env_uses_ lists are adjusted. The function works similar to + // HInstruction::ReplaceInput. + void ReplaceInput(HInstruction* replacement, size_t index); + size_t Size() const { return vregs_.size(); } HEnvironment* GetParent() const { return parent_; } @@ -1954,12 +2018,15 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> { public: #define DECLARE_KIND(type, super) k##type, enum InstructionKind { - FOR_EACH_INSTRUCTION(DECLARE_KIND) + FOR_EACH_CONCRETE_INSTRUCTION(DECLARE_KIND) kLastInstructionKind }; #undef DECLARE_KIND HInstruction(InstructionKind kind, SideEffects side_effects, uint32_t dex_pc) + : HInstruction(kind, DataType::Type::kVoid, side_effects, dex_pc) {} + + HInstruction(InstructionKind kind, DataType::Type type, SideEffects side_effects, uint32_t dex_pc) : previous_(nullptr), next_(nullptr), block_(nullptr), @@ -1974,6 +2041,7 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> { side_effects_(side_effects), reference_type_handle_(ReferenceTypeInfo::CreateInvalid().GetTypeHandle()) { SetPackedField<InstructionKindField>(kind); + SetPackedField<TypeField>(type); SetPackedFlag<kFlagReferenceTypeIsExact>(ReferenceTypeInfo::CreateInvalid().IsExact()); } @@ -2031,7 +2099,9 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> { virtual void Accept(HGraphVisitor* visitor) = 0; virtual const char* DebugName() const = 0; - virtual DataType::Type GetType() const { return DataType::Type::kVoid; } + DataType::Type GetType() const { + return TypeField::Decode(GetPackedFields()); + } virtual bool NeedsEnvironment() const { return false; } @@ -2064,6 +2134,19 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> { return false; } + // If this instruction will do an implicit null check, return the `HNullCheck` associated + // with it. Otherwise return null. + HNullCheck* GetImplicitNullCheck() const { + // Find the first previous instruction which is not a move. + HInstruction* first_prev_not_move = GetPreviousDisregardingMoves(); + if (first_prev_not_move != nullptr && + first_prev_not_move->IsNullCheck() && + first_prev_not_move->IsEmittedAtUseSite()) { + return first_prev_not_move->AsNullCheck(); + } + return nullptr; + } + virtual bool IsActualObject() const { return GetType() == DataType::Type::kReference; } @@ -2202,6 +2285,7 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> { void ReplaceWith(HInstruction* instruction); void ReplaceUsesDominatedBy(HInstruction* dominator, HInstruction* replacement); + void ReplaceEnvUsesDominatedBy(HInstruction* dominator, HInstruction* replacement); void ReplaceInput(HInstruction* replacement, size_t index); // This is almost the same as doing `ReplaceWith()`. But in this helper, the @@ -2223,19 +2307,17 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> { void MoveBeforeFirstUserAndOutOfLoops(); #define INSTRUCTION_TYPE_CHECK(type, super) \ - bool Is##type() const; \ - const H##type* As##type() const; \ - H##type* As##type(); + bool Is##type() const; - FOR_EACH_CONCRETE_INSTRUCTION(INSTRUCTION_TYPE_CHECK) + FOR_EACH_INSTRUCTION(INSTRUCTION_TYPE_CHECK) #undef INSTRUCTION_TYPE_CHECK -#define INSTRUCTION_TYPE_CHECK(type, super) \ - bool Is##type() const { return (As##type() != nullptr); } \ - virtual const H##type* As##type() const { return nullptr; } \ - virtual H##type* As##type() { return nullptr; } - FOR_EACH_ABSTRACT_INSTRUCTION(INSTRUCTION_TYPE_CHECK) -#undef INSTRUCTION_TYPE_CHECK +#define INSTRUCTION_TYPE_CAST(type, super) \ + const H##type* As##type() const; \ + H##type* As##type(); + + FOR_EACH_INSTRUCTION(INSTRUCTION_TYPE_CAST) +#undef INSTRUCTION_TYPE_CAST // Return a clone of the instruction if it is clonable (shallow copy by default, custom copy // if a custom copy-constructor is provided for a particular type). If IsClonable() is false for @@ -2261,11 +2343,6 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> { // meanings? split and rename? virtual bool CanBeMoved() const { return false; } - // Returns whether the two instructions are of the same kind. - virtual bool InstructionTypeEquals(const HInstruction* other ATTRIBUTE_UNUSED) const { - return false; - } - // Returns whether any data encoded in the two instructions is equal. // This method does not look at the inputs. Both instructions must be // of the same type, otherwise the method has undefined behavior. @@ -2278,10 +2355,6 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> { // 2) Their inputs are identical. bool Equals(const HInstruction* other) const; - // TODO: Remove this indirection when the [[pure]] attribute proposal (n3744) - // is adopted and implemented by our C++ compiler(s). Fow now, we need to hide - // the virtual function because the __attribute__((__pure__)) doesn't really - // apply the strong requirement for virtual functions, preventing optimizations. InstructionKind GetKind() const { return GetPackedField<InstructionKindField>(); } virtual size_t ComputeHashCode() const { @@ -2337,13 +2410,18 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> { static constexpr size_t kFieldInstructionKind = kFlagReferenceTypeIsExact + 1; static constexpr size_t kFieldInstructionKindSize = MinimumBitsToStore(static_cast<size_t>(InstructionKind::kLastInstructionKind - 1)); - static constexpr size_t kNumberOfGenericPackedBits = + static constexpr size_t kFieldType = kFieldInstructionKind + kFieldInstructionKindSize; + static constexpr size_t kFieldTypeSize = + MinimumBitsToStore(static_cast<size_t>(DataType::Type::kLast)); + static constexpr size_t kNumberOfGenericPackedBits = kFieldType + kFieldTypeSize; static constexpr size_t kMaxNumberOfPackedBits = sizeof(uint32_t) * kBitsPerByte; static_assert(kNumberOfGenericPackedBits <= kMaxNumberOfPackedBits, "Too many generic packed fields"); + using TypeField = BitField<DataType::Type, kFieldType, kFieldTypeSize>; + const HUserRecord<HInstruction*> InputRecordAt(size_t i) const { return GetInputRecords()[i]; } @@ -2568,7 +2646,7 @@ class HBackwardInstructionIterator : public ValueObject { class HVariableInputSizeInstruction : public HInstruction { public: using HInstruction::GetInputRecords; // Keep the const version visible. - ArrayRef<HUserRecord<HInstruction*>> GetInputRecords() OVERRIDE { + ArrayRef<HUserRecord<HInstruction*>> GetInputRecords() override { return ArrayRef<HUserRecord<HInstruction*>>(inputs_); } @@ -2590,6 +2668,15 @@ class HVariableInputSizeInstruction : public HInstruction { ArenaAllocKind kind) : HInstruction(inst_kind, side_effects, dex_pc), inputs_(number_of_inputs, allocator->Adapter(kind)) {} + HVariableInputSizeInstruction(InstructionKind inst_kind, + DataType::Type type, + SideEffects side_effects, + uint32_t dex_pc, + ArenaAllocator* allocator, + size_t number_of_inputs, + ArenaAllocKind kind) + : HInstruction(inst_kind, type, side_effects, dex_pc), + inputs_(number_of_inputs, allocator->Adapter(kind)) {} DEFAULT_COPY_CONSTRUCTOR(VariableInputSizeInstruction); @@ -2597,19 +2684,24 @@ class HVariableInputSizeInstruction : public HInstruction { }; template<size_t N> -class HTemplateInstruction: public HInstruction { +class HExpression : public HInstruction { public: - HTemplateInstruction<N>(InstructionKind kind, SideEffects side_effects, uint32_t dex_pc) + HExpression<N>(InstructionKind kind, SideEffects side_effects, uint32_t dex_pc) : HInstruction(kind, side_effects, dex_pc), inputs_() {} - virtual ~HTemplateInstruction() {} + HExpression<N>(InstructionKind kind, + DataType::Type type, + SideEffects side_effects, + uint32_t dex_pc) + : HInstruction(kind, type, side_effects, dex_pc), inputs_() {} + virtual ~HExpression() {} using HInstruction::GetInputRecords; // Keep the const version visible. - ArrayRef<HUserRecord<HInstruction*>> GetInputRecords() OVERRIDE FINAL { + ArrayRef<HUserRecord<HInstruction*>> GetInputRecords() final { return ArrayRef<HUserRecord<HInstruction*>>(inputs_); } protected: - DEFAULT_COPY_CONSTRUCTOR(TemplateInstruction<N>); + DEFAULT_COPY_CONSTRUCTOR(Expression<N>); private: std::array<HUserRecord<HInstruction*>, N> inputs_; @@ -2617,64 +2709,35 @@ class HTemplateInstruction: public HInstruction { friend class SsaBuilder; }; -// HTemplateInstruction specialization for N=0. +// HExpression specialization for N=0. template<> -class HTemplateInstruction<0>: public HInstruction { +class HExpression<0> : public HInstruction { public: - explicit HTemplateInstruction<0>(InstructionKind kind, SideEffects side_effects, uint32_t dex_pc) - : HInstruction(kind, side_effects, dex_pc) {} + using HInstruction::HInstruction; - virtual ~HTemplateInstruction() {} + virtual ~HExpression() {} using HInstruction::GetInputRecords; // Keep the const version visible. - ArrayRef<HUserRecord<HInstruction*>> GetInputRecords() OVERRIDE FINAL { + ArrayRef<HUserRecord<HInstruction*>> GetInputRecords() final { return ArrayRef<HUserRecord<HInstruction*>>(); } protected: - DEFAULT_COPY_CONSTRUCTOR(TemplateInstruction<0>); + DEFAULT_COPY_CONSTRUCTOR(Expression<0>); private: friend class SsaBuilder; }; -template<intptr_t N> -class HExpression : public HTemplateInstruction<N> { - public: - using HInstruction::InstructionKind; - HExpression<N>(InstructionKind kind, - DataType::Type type, - SideEffects side_effects, - uint32_t dex_pc) - : HTemplateInstruction<N>(kind, side_effects, dex_pc) { - this->template SetPackedField<TypeField>(type); - } - virtual ~HExpression() {} - - DataType::Type GetType() const OVERRIDE { - return TypeField::Decode(this->GetPackedFields()); - } - - protected: - static constexpr size_t kFieldType = HInstruction::kNumberOfGenericPackedBits; - static constexpr size_t kFieldTypeSize = - MinimumBitsToStore(static_cast<size_t>(DataType::Type::kLast)); - static constexpr size_t kNumberOfExpressionPackedBits = kFieldType + kFieldTypeSize; - static_assert(kNumberOfExpressionPackedBits <= HInstruction::kMaxNumberOfPackedBits, - "Too many packed fields."); - using TypeField = BitField<DataType::Type, kFieldType, kFieldTypeSize>; - DEFAULT_COPY_CONSTRUCTOR(Expression<N>); -}; - // Represents dex's RETURN_VOID opcode. A HReturnVoid is a control flow // instruction that branches to the exit block. -class HReturnVoid FINAL : public HTemplateInstruction<0> { +class HReturnVoid final : public HExpression<0> { public: explicit HReturnVoid(uint32_t dex_pc = kNoDexPc) - : HTemplateInstruction(kReturnVoid, SideEffects::None(), dex_pc) { + : HExpression(kReturnVoid, SideEffects::None(), dex_pc) { } - bool IsControlFlow() const OVERRIDE { return true; } + bool IsControlFlow() const override { return true; } DECLARE_INSTRUCTION(ReturnVoid); @@ -2684,14 +2747,14 @@ class HReturnVoid FINAL : public HTemplateInstruction<0> { // Represents dex's RETURN opcodes. A HReturn is a control flow // instruction that branches to the exit block. -class HReturn FINAL : public HTemplateInstruction<1> { +class HReturn final : public HExpression<1> { public: explicit HReturn(HInstruction* value, uint32_t dex_pc = kNoDexPc) - : HTemplateInstruction(kReturn, SideEffects::None(), dex_pc) { + : HExpression(kReturn, SideEffects::None(), dex_pc) { SetRawInputAt(0, value); } - bool IsControlFlow() const OVERRIDE { return true; } + bool IsControlFlow() const override { return true; } DECLARE_INSTRUCTION(Return); @@ -2699,7 +2762,7 @@ class HReturn FINAL : public HTemplateInstruction<1> { DEFAULT_COPY_CONSTRUCTOR(Return); }; -class HPhi FINAL : public HVariableInputSizeInstruction { +class HPhi final : public HVariableInputSizeInstruction { public: HPhi(ArenaAllocator* allocator, uint32_t reg_number, @@ -2708,13 +2771,13 @@ class HPhi FINAL : public HVariableInputSizeInstruction { uint32_t dex_pc = kNoDexPc) : HVariableInputSizeInstruction( kPhi, + ToPhiType(type), SideEffects::None(), dex_pc, allocator, number_of_inputs, kArenaAllocPhiInputs), reg_number_(reg_number) { - SetPackedField<TypeField>(ToPhiType(type)); DCHECK_NE(GetType(), DataType::Type::kVoid); // Phis are constructed live and marked dead if conflicting or unused. // Individual steps of SsaBuilder should assume that if a phi has been @@ -2723,7 +2786,7 @@ class HPhi FINAL : public HVariableInputSizeInstruction { SetPackedFlag<kFlagCanBeNull>(true); } - bool IsClonable() const OVERRIDE { return true; } + bool IsClonable() const override { return true; } // Returns a type equivalent to the given `type`, but that a `HPhi` can hold. static DataType::Type ToPhiType(DataType::Type type) { @@ -2732,7 +2795,6 @@ class HPhi FINAL : public HVariableInputSizeInstruction { bool IsCatchPhi() const { return GetBlock()->IsCatchBlock(); } - DataType::Type GetType() const OVERRIDE { return GetPackedField<TypeField>(); } void SetType(DataType::Type new_type) { // Make sure that only valid type changes occur. The following are allowed: // (1) int -> float/ref (primitive type propagation), @@ -2744,7 +2806,7 @@ class HPhi FINAL : public HVariableInputSizeInstruction { SetPackedField<TypeField>(new_type); } - bool CanBeNull() const OVERRIDE { return GetPackedFlag<kFlagCanBeNull>(); } + bool CanBeNull() const override { return GetPackedFlag<kFlagCanBeNull>(); } void SetCanBeNull(bool can_be_null) { SetPackedFlag<kFlagCanBeNull>(can_be_null); } uint32_t GetRegNumber() const { return reg_number_; } @@ -2791,14 +2853,10 @@ class HPhi FINAL : public HVariableInputSizeInstruction { DEFAULT_COPY_CONSTRUCTOR(Phi); private: - static constexpr size_t kFieldType = HInstruction::kNumberOfGenericPackedBits; - static constexpr size_t kFieldTypeSize = - MinimumBitsToStore(static_cast<size_t>(DataType::Type::kLast)); - static constexpr size_t kFlagIsLive = kFieldType + kFieldTypeSize; + static constexpr size_t kFlagIsLive = HInstruction::kNumberOfGenericPackedBits; static constexpr size_t kFlagCanBeNull = kFlagIsLive + 1; static constexpr size_t kNumberOfPhiPackedBits = kFlagCanBeNull + 1; static_assert(kNumberOfPhiPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields."); - using TypeField = BitField<DataType::Type, kFieldType, kFieldTypeSize>; const uint32_t reg_number_; }; @@ -2806,13 +2864,13 @@ class HPhi FINAL : public HVariableInputSizeInstruction { // The exit instruction is the only instruction of the exit block. // Instructions aborting the method (HThrow and HReturn) must branch to the // exit block. -class HExit FINAL : public HTemplateInstruction<0> { +class HExit final : public HExpression<0> { public: explicit HExit(uint32_t dex_pc = kNoDexPc) - : HTemplateInstruction(kExit, SideEffects::None(), dex_pc) { + : HExpression(kExit, SideEffects::None(), dex_pc) { } - bool IsControlFlow() const OVERRIDE { return true; } + bool IsControlFlow() const override { return true; } DECLARE_INSTRUCTION(Exit); @@ -2821,14 +2879,14 @@ class HExit FINAL : public HTemplateInstruction<0> { }; // Jumps from one block to another. -class HGoto FINAL : public HTemplateInstruction<0> { +class HGoto final : public HExpression<0> { public: explicit HGoto(uint32_t dex_pc = kNoDexPc) - : HTemplateInstruction(kGoto, SideEffects::None(), dex_pc) { + : HExpression(kGoto, SideEffects::None(), dex_pc) { } - bool IsClonable() const OVERRIDE { return true; } - bool IsControlFlow() const OVERRIDE { return true; } + bool IsClonable() const override { return true; } + bool IsControlFlow() const override { return true; } HBasicBlock* GetSuccessor() const { return GetBlock()->GetSingleSuccessor(); @@ -2846,7 +2904,7 @@ class HConstant : public HExpression<0> { : HExpression(kind, type, SideEffects::None(), dex_pc) { } - bool CanBeMoved() const OVERRIDE { return true; } + bool CanBeMoved() const override { return true; } // Is this constant -1 in the arithmetic sense? virtual bool IsMinusOne() const { return false; } @@ -2865,18 +2923,18 @@ class HConstant : public HExpression<0> { DEFAULT_COPY_CONSTRUCTOR(Constant); }; -class HNullConstant FINAL : public HConstant { +class HNullConstant final : public HConstant { public: - bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { + bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override { return true; } - uint64_t GetValueAsUint64() const OVERRIDE { return 0; } + uint64_t GetValueAsUint64() const override { return 0; } - size_t ComputeHashCode() const OVERRIDE { return 0; } + size_t ComputeHashCode() const override { return 0; } // The null constant representation is a 0-bit pattern. - virtual bool IsZeroBitPattern() const { return true; } + bool IsZeroBitPattern() const override { return true; } DECLARE_INSTRUCTION(NullConstant); @@ -2893,25 +2951,25 @@ class HNullConstant FINAL : public HConstant { // Constants of the type int. Those can be from Dex instructions, or // synthesized (for example with the if-eqz instruction). -class HIntConstant FINAL : public HConstant { +class HIntConstant final : public HConstant { public: int32_t GetValue() const { return value_; } - uint64_t GetValueAsUint64() const OVERRIDE { + uint64_t GetValueAsUint64() const override { return static_cast<uint64_t>(static_cast<uint32_t>(value_)); } - bool InstructionDataEquals(const HInstruction* other) const OVERRIDE { + bool InstructionDataEquals(const HInstruction* other) const override { DCHECK(other->IsIntConstant()) << other->DebugName(); return other->AsIntConstant()->value_ == value_; } - size_t ComputeHashCode() const OVERRIDE { return GetValue(); } + size_t ComputeHashCode() const override { return GetValue(); } - bool IsMinusOne() const OVERRIDE { return GetValue() == -1; } - bool IsArithmeticZero() const OVERRIDE { return GetValue() == 0; } - bool IsZeroBitPattern() const OVERRIDE { return GetValue() == 0; } - bool IsOne() const OVERRIDE { return GetValue() == 1; } + bool IsMinusOne() const override { return GetValue() == -1; } + bool IsArithmeticZero() const override { return GetValue() == 0; } + bool IsZeroBitPattern() const override { return GetValue() == 0; } + bool IsOne() const override { return GetValue() == 1; } // Integer constants are used to encode Boolean values as well, // where 1 means true and 0 means false. @@ -2939,23 +2997,23 @@ class HIntConstant FINAL : public HConstant { ART_FRIEND_TYPED_TEST(ParallelMoveTest, ConstantLast); }; -class HLongConstant FINAL : public HConstant { +class HLongConstant final : public HConstant { public: int64_t GetValue() const { return value_; } - uint64_t GetValueAsUint64() const OVERRIDE { return value_; } + uint64_t GetValueAsUint64() const override { return value_; } - bool InstructionDataEquals(const HInstruction* other) const OVERRIDE { + bool InstructionDataEquals(const HInstruction* other) const override { DCHECK(other->IsLongConstant()) << other->DebugName(); return other->AsLongConstant()->value_ == value_; } - size_t ComputeHashCode() const OVERRIDE { return static_cast<size_t>(GetValue()); } + size_t ComputeHashCode() const override { return static_cast<size_t>(GetValue()); } - bool IsMinusOne() const OVERRIDE { return GetValue() == -1; } - bool IsArithmeticZero() const OVERRIDE { return GetValue() == 0; } - bool IsZeroBitPattern() const OVERRIDE { return GetValue() == 0; } - bool IsOne() const OVERRIDE { return GetValue() == 1; } + bool IsMinusOne() const override { return GetValue() == -1; } + bool IsArithmeticZero() const override { return GetValue() == 0; } + bool IsZeroBitPattern() const override { return GetValue() == 0; } + bool IsOne() const override { return GetValue() == 1; } DECLARE_INSTRUCTION(LongConstant); @@ -2973,25 +3031,25 @@ class HLongConstant FINAL : public HConstant { friend class HGraph; }; -class HFloatConstant FINAL : public HConstant { +class HFloatConstant final : public HConstant { public: float GetValue() const { return value_; } - uint64_t GetValueAsUint64() const OVERRIDE { + uint64_t GetValueAsUint64() const override { return static_cast<uint64_t>(bit_cast<uint32_t, float>(value_)); } - bool InstructionDataEquals(const HInstruction* other) const OVERRIDE { + bool InstructionDataEquals(const HInstruction* other) const override { DCHECK(other->IsFloatConstant()) << other->DebugName(); return other->AsFloatConstant()->GetValueAsUint64() == GetValueAsUint64(); } - size_t ComputeHashCode() const OVERRIDE { return static_cast<size_t>(GetValue()); } + size_t ComputeHashCode() const override { return static_cast<size_t>(GetValue()); } - bool IsMinusOne() const OVERRIDE { + bool IsMinusOne() const override { return bit_cast<uint32_t, float>(value_) == bit_cast<uint32_t, float>((-1.0f)); } - bool IsArithmeticZero() const OVERRIDE { + bool IsArithmeticZero() const override { return std::fpclassify(value_) == FP_ZERO; } bool IsArithmeticPositiveZero() const { @@ -3000,10 +3058,10 @@ class HFloatConstant FINAL : public HConstant { bool IsArithmeticNegativeZero() const { return IsArithmeticZero() && std::signbit(value_); } - bool IsZeroBitPattern() const OVERRIDE { + bool IsZeroBitPattern() const override { return bit_cast<uint32_t, float>(value_) == bit_cast<uint32_t, float>(0.0f); } - bool IsOne() const OVERRIDE { + bool IsOne() const override { return bit_cast<uint32_t, float>(value_) == bit_cast<uint32_t, float>(1.0f); } bool IsNaN() const { @@ -3032,23 +3090,23 @@ class HFloatConstant FINAL : public HConstant { friend class HGraph; }; -class HDoubleConstant FINAL : public HConstant { +class HDoubleConstant final : public HConstant { public: double GetValue() const { return value_; } - uint64_t GetValueAsUint64() const OVERRIDE { return bit_cast<uint64_t, double>(value_); } + uint64_t GetValueAsUint64() const override { return bit_cast<uint64_t, double>(value_); } - bool InstructionDataEquals(const HInstruction* other) const OVERRIDE { + bool InstructionDataEquals(const HInstruction* other) const override { DCHECK(other->IsDoubleConstant()) << other->DebugName(); return other->AsDoubleConstant()->GetValueAsUint64() == GetValueAsUint64(); } - size_t ComputeHashCode() const OVERRIDE { return static_cast<size_t>(GetValue()); } + size_t ComputeHashCode() const override { return static_cast<size_t>(GetValue()); } - bool IsMinusOne() const OVERRIDE { + bool IsMinusOne() const override { return bit_cast<uint64_t, double>(value_) == bit_cast<uint64_t, double>((-1.0)); } - bool IsArithmeticZero() const OVERRIDE { + bool IsArithmeticZero() const override { return std::fpclassify(value_) == FP_ZERO; } bool IsArithmeticPositiveZero() const { @@ -3057,10 +3115,10 @@ class HDoubleConstant FINAL : public HConstant { bool IsArithmeticNegativeZero() const { return IsArithmeticZero() && std::signbit(value_); } - bool IsZeroBitPattern() const OVERRIDE { + bool IsZeroBitPattern() const override { return bit_cast<uint64_t, double>(value_) == bit_cast<uint64_t, double>((0.0)); } - bool IsOne() const OVERRIDE { + bool IsOne() const override { return bit_cast<uint64_t, double>(value_) == bit_cast<uint64_t, double>(1.0); } bool IsNaN() const { @@ -3091,15 +3149,15 @@ class HDoubleConstant FINAL : public HConstant { // Conditional branch. A block ending with an HIf instruction must have // two successors. -class HIf FINAL : public HTemplateInstruction<1> { +class HIf final : public HExpression<1> { public: explicit HIf(HInstruction* input, uint32_t dex_pc = kNoDexPc) - : HTemplateInstruction(kIf, SideEffects::None(), dex_pc) { + : HExpression(kIf, SideEffects::None(), dex_pc) { SetRawInputAt(0, input); } - bool IsClonable() const OVERRIDE { return true; } - bool IsControlFlow() const OVERRIDE { return true; } + bool IsClonable() const override { return true; } + bool IsControlFlow() const override { return true; } HBasicBlock* IfTrueSuccessor() const { return GetBlock()->GetSuccessors()[0]; @@ -3121,7 +3179,7 @@ class HIf FINAL : public HTemplateInstruction<1> { // non-exceptional control flow. // Normal-flow successor is stored at index zero, exception handlers under // higher indices in no particular order. -class HTryBoundary FINAL : public HTemplateInstruction<0> { +class HTryBoundary final : public HExpression<0> { public: enum class BoundaryKind { kEntry, @@ -3129,12 +3187,19 @@ class HTryBoundary FINAL : public HTemplateInstruction<0> { kLast = kExit }; + // SideEffects::CanTriggerGC prevents instructions with SideEffects::DependOnGC to be alive + // across the catch block entering edges as GC might happen during throwing an exception. + // TryBoundary with BoundaryKind::kExit is conservatively used for that as there is no + // HInstruction which a catch block must start from. explicit HTryBoundary(BoundaryKind kind, uint32_t dex_pc = kNoDexPc) - : HTemplateInstruction(kTryBoundary, SideEffects::None(), dex_pc) { + : HExpression(kTryBoundary, + (kind == BoundaryKind::kExit) ? SideEffects::CanTriggerGC() + : SideEffects::None(), + dex_pc) { SetPackedField<BoundaryKindField>(kind); } - bool IsControlFlow() const OVERRIDE { return true; } + bool IsControlFlow() const override { return true; } // Returns the block's non-exceptional successor (index zero). HBasicBlock* GetNormalFlowSuccessor() const { return GetBlock()->GetSuccessors()[0]; } @@ -3180,7 +3245,7 @@ class HTryBoundary FINAL : public HTemplateInstruction<0> { }; // Deoptimize to interpreter, upon checking a condition. -class HDeoptimize FINAL : public HVariableInputSizeInstruction { +class HDeoptimize final : public HVariableInputSizeInstruction { public: // Use this constructor when the `HDeoptimize` acts as a barrier, where no code can move // across. @@ -3193,14 +3258,14 @@ class HDeoptimize FINAL : public HVariableInputSizeInstruction { SideEffects::All(), dex_pc, allocator, - /* number_of_inputs */ 1, + /* number_of_inputs= */ 1, kArenaAllocMisc) { SetPackedFlag<kFieldCanBeMoved>(false); SetPackedField<DeoptimizeKindField>(kind); SetRawInputAt(0, cond); } - bool IsClonable() const OVERRIDE { return true; } + bool IsClonable() const override { return true; } // Use this constructor when the `HDeoptimize` guards an instruction, and any user // that relies on the deoptimization to pass should have its input be the `HDeoptimize` @@ -3214,10 +3279,11 @@ class HDeoptimize FINAL : public HVariableInputSizeInstruction { uint32_t dex_pc) : HVariableInputSizeInstruction( kDeoptimize, + guard->GetType(), SideEffects::CanTriggerGC(), dex_pc, allocator, - /* number_of_inputs */ 2, + /* number_of_inputs= */ 2, kArenaAllocMisc) { SetPackedFlag<kFieldCanBeMoved>(true); SetPackedField<DeoptimizeKindField>(kind); @@ -3225,22 +3291,18 @@ class HDeoptimize FINAL : public HVariableInputSizeInstruction { SetRawInputAt(1, guard); } - bool CanBeMoved() const OVERRIDE { return GetPackedFlag<kFieldCanBeMoved>(); } + bool CanBeMoved() const override { return GetPackedFlag<kFieldCanBeMoved>(); } - bool InstructionDataEquals(const HInstruction* other) const OVERRIDE { + bool InstructionDataEquals(const HInstruction* other) const override { return (other->CanBeMoved() == CanBeMoved()) && (other->AsDeoptimize()->GetKind() == GetKind()); } - bool NeedsEnvironment() const OVERRIDE { return true; } + bool NeedsEnvironment() const override { return true; } - bool CanThrow() const OVERRIDE { return true; } + bool CanThrow() const override { return true; } DeoptimizationKind GetDeoptimizationKind() const { return GetPackedField<DeoptimizeKindField>(); } - DataType::Type GetType() const OVERRIDE { - return GuardsAnInput() ? GuardedInput()->GetType() : DataType::Type::kVoid; - } - bool GuardsAnInput() const { return InputCount() == 2; } @@ -3277,12 +3339,13 @@ class HDeoptimize FINAL : public HVariableInputSizeInstruction { // if it's true, starts to do deoptimization. // It has a 4-byte slot on stack. // TODO: allocate a register for this flag. -class HShouldDeoptimizeFlag FINAL : public HVariableInputSizeInstruction { +class HShouldDeoptimizeFlag final : public HVariableInputSizeInstruction { public: // CHA guards are only optimized in a separate pass and it has no side effects // with regard to other passes. HShouldDeoptimizeFlag(ArenaAllocator* allocator, uint32_t dex_pc) : HVariableInputSizeInstruction(kShouldDeoptimizeFlag, + DataType::Type::kInt32, SideEffects::None(), dex_pc, allocator, @@ -3290,13 +3353,11 @@ class HShouldDeoptimizeFlag FINAL : public HVariableInputSizeInstruction { kArenaAllocCHA) { } - DataType::Type GetType() const OVERRIDE { return DataType::Type::kInt32; } - // We do all CHA guard elimination/motion in a single pass, after which there is no // further guard elimination/motion since a guard might have been used for justification // of the elimination of another guard. Therefore, we pretend this guard cannot be moved // to avoid other optimizations trying to move it. - bool CanBeMoved() const OVERRIDE { return false; } + bool CanBeMoved() const override { return false; } DECLARE_INSTRUCTION(ShouldDeoptimizeFlag); @@ -3307,7 +3368,7 @@ class HShouldDeoptimizeFlag FINAL : public HVariableInputSizeInstruction { // Represents the ArtMethod that was passed as a first argument to // the method. It is used by instructions that depend on it, like // instructions that work with the dex cache. -class HCurrentMethod FINAL : public HExpression<0> { +class HCurrentMethod final : public HExpression<0> { public: explicit HCurrentMethod(DataType::Type type, uint32_t dex_pc = kNoDexPc) : HExpression(kCurrentMethod, type, SideEffects::None(), dex_pc) { @@ -3321,7 +3382,7 @@ class HCurrentMethod FINAL : public HExpression<0> { // Fetches an ArtMethod from the virtual table or the interface method table // of a class. -class HClassTableGet FINAL : public HExpression<1> { +class HClassTableGet final : public HExpression<1> { public: enum class TableKind { kVTable, @@ -3339,9 +3400,9 @@ class HClassTableGet FINAL : public HExpression<1> { SetRawInputAt(0, cls); } - bool IsClonable() const OVERRIDE { return true; } - bool CanBeMoved() const OVERRIDE { return true; } - bool InstructionDataEquals(const HInstruction* other) const OVERRIDE { + bool IsClonable() const override { return true; } + bool CanBeMoved() const override { return true; } + bool InstructionDataEquals(const HInstruction* other) const override { return other->AsClassTableGet()->GetIndex() == index_ && other->AsClassTableGet()->GetPackedFields() == GetPackedFields(); } @@ -3355,7 +3416,7 @@ class HClassTableGet FINAL : public HExpression<1> { DEFAULT_COPY_CONSTRUCTOR(ClassTableGet); private: - static constexpr size_t kFieldTableKind = kNumberOfExpressionPackedBits; + static constexpr size_t kFieldTableKind = kNumberOfGenericPackedBits; static constexpr size_t kFieldTableKindSize = MinimumBitsToStore(static_cast<size_t>(TableKind::kLast)); static constexpr size_t kNumberOfClassTableGetPackedBits = kFieldTableKind + kFieldTableKindSize; @@ -3370,21 +3431,21 @@ class HClassTableGet FINAL : public HExpression<1> { // PackedSwitch (jump table). A block ending with a PackedSwitch instruction will // have one successor for each entry in the switch table, and the final successor // will be the block containing the next Dex opcode. -class HPackedSwitch FINAL : public HTemplateInstruction<1> { +class HPackedSwitch final : public HExpression<1> { public: HPackedSwitch(int32_t start_value, uint32_t num_entries, HInstruction* input, uint32_t dex_pc = kNoDexPc) - : HTemplateInstruction(kPackedSwitch, SideEffects::None(), dex_pc), + : HExpression(kPackedSwitch, SideEffects::None(), dex_pc), start_value_(start_value), num_entries_(num_entries) { SetRawInputAt(0, input); } - bool IsClonable() const OVERRIDE { return true; } + bool IsClonable() const override { return true; } - bool IsControlFlow() const OVERRIDE { return true; } + bool IsControlFlow() const override { return true; } int32_t GetStartValue() const { return start_value_; } @@ -3415,13 +3476,13 @@ class HUnaryOperation : public HExpression<1> { } // All of the UnaryOperation instructions are clonable. - bool IsClonable() const OVERRIDE { return true; } + bool IsClonable() const override { return true; } HInstruction* GetInput() const { return InputAt(0); } DataType::Type GetResultType() const { return GetType(); } - bool CanBeMoved() const OVERRIDE { return true; } - bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { + bool CanBeMoved() const override { return true; } + bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override { return true; } @@ -3456,7 +3517,7 @@ class HBinaryOperation : public HExpression<2> { } // All of the BinaryOperation instructions are clonable. - bool IsClonable() const OVERRIDE { return true; } + bool IsClonable() const override { return true; } HInstruction* GetLeft() const { return InputAt(0); } HInstruction* GetRight() const { return InputAt(1); } @@ -3496,8 +3557,8 @@ class HBinaryOperation : public HExpression<2> { } } - bool CanBeMoved() const OVERRIDE { return true; } - bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { + bool CanBeMoved() const override { return true; } + bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override { return true; } @@ -3578,7 +3639,7 @@ class HCondition : public HBinaryOperation { ComparisonBias GetBias() const { return GetPackedField<ComparisonBiasField>(); } void SetBias(ComparisonBias bias) { SetPackedField<ComparisonBiasField>(bias); } - bool InstructionDataEquals(const HInstruction* other) const OVERRIDE { + bool InstructionDataEquals(const HInstruction* other) const override { return GetPackedFields() == other->AsCondition()->GetPackedFields(); } @@ -3606,7 +3667,7 @@ class HCondition : public HBinaryOperation { protected: // Needed if we merge a HCompare into a HCondition. - static constexpr size_t kFieldComparisonBias = kNumberOfExpressionPackedBits; + static constexpr size_t kFieldComparisonBias = kNumberOfGenericPackedBits; static constexpr size_t kFieldComparisonBiasSize = MinimumBitsToStore(static_cast<size_t>(ComparisonBias::kLast)); static constexpr size_t kNumberOfConditionPackedBits = @@ -3635,42 +3696,42 @@ class HCondition : public HBinaryOperation { }; // Instruction to check if two inputs are equal to each other. -class HEqual FINAL : public HCondition { +class HEqual final : public HCondition { public: HEqual(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc) : HCondition(kEqual, first, second, dex_pc) { } - bool IsCommutative() const OVERRIDE { return true; } + bool IsCommutative() const override { return true; } HConstant* Evaluate(HNullConstant* x ATTRIBUTE_UNUSED, - HNullConstant* y ATTRIBUTE_UNUSED) const OVERRIDE { + HNullConstant* y ATTRIBUTE_UNUSED) const override { return MakeConstantCondition(true, GetDexPc()); } - HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { + HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const override { return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc()); } // In the following Evaluate methods, a HCompare instruction has // been merged into this HEqual instruction; evaluate it as // `Compare(x, y) == 0`. - HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE { + HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const override { return MakeConstantCondition(Compute(Compare(x->GetValue(), y->GetValue()), 0), GetDexPc()); } - HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const OVERRIDE { + HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const override { return MakeConstantCondition(Compute(CompareFP(x->GetValue(), y->GetValue()), 0), GetDexPc()); } - HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const OVERRIDE { + HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const override { return MakeConstantCondition(Compute(CompareFP(x->GetValue(), y->GetValue()), 0), GetDexPc()); } DECLARE_INSTRUCTION(Equal); - IfCondition GetCondition() const OVERRIDE { + IfCondition GetCondition() const override { return kCondEQ; } - IfCondition GetOppositeCondition() const OVERRIDE { + IfCondition GetOppositeCondition() const override { return kCondNE; } @@ -3681,42 +3742,42 @@ class HEqual FINAL : public HCondition { template <typename T> static bool Compute(T x, T y) { return x == y; } }; -class HNotEqual FINAL : public HCondition { +class HNotEqual final : public HCondition { public: HNotEqual(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc) : HCondition(kNotEqual, first, second, dex_pc) { } - bool IsCommutative() const OVERRIDE { return true; } + bool IsCommutative() const override { return true; } HConstant* Evaluate(HNullConstant* x ATTRIBUTE_UNUSED, - HNullConstant* y ATTRIBUTE_UNUSED) const OVERRIDE { + HNullConstant* y ATTRIBUTE_UNUSED) const override { return MakeConstantCondition(false, GetDexPc()); } - HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { + HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const override { return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc()); } // In the following Evaluate methods, a HCompare instruction has // been merged into this HNotEqual instruction; evaluate it as // `Compare(x, y) != 0`. - HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE { + HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const override { return MakeConstantCondition(Compute(Compare(x->GetValue(), y->GetValue()), 0), GetDexPc()); } - HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const OVERRIDE { + HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const override { return MakeConstantCondition(Compute(CompareFP(x->GetValue(), y->GetValue()), 0), GetDexPc()); } - HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const OVERRIDE { + HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const override { return MakeConstantCondition(Compute(CompareFP(x->GetValue(), y->GetValue()), 0), GetDexPc()); } DECLARE_INSTRUCTION(NotEqual); - IfCondition GetCondition() const OVERRIDE { + IfCondition GetCondition() const override { return kCondNE; } - IfCondition GetOppositeCondition() const OVERRIDE { + IfCondition GetOppositeCondition() const override { return kCondEQ; } @@ -3727,36 +3788,36 @@ class HNotEqual FINAL : public HCondition { template <typename T> static bool Compute(T x, T y) { return x != y; } }; -class HLessThan FINAL : public HCondition { +class HLessThan final : public HCondition { public: HLessThan(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc) : HCondition(kLessThan, first, second, dex_pc) { } - HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { + HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const override { return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc()); } // In the following Evaluate methods, a HCompare instruction has // been merged into this HLessThan instruction; evaluate it as // `Compare(x, y) < 0`. - HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE { + HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const override { return MakeConstantCondition(Compute(Compare(x->GetValue(), y->GetValue()), 0), GetDexPc()); } - HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const OVERRIDE { + HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const override { return MakeConstantCondition(Compute(CompareFP(x->GetValue(), y->GetValue()), 0), GetDexPc()); } - HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const OVERRIDE { + HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const override { return MakeConstantCondition(Compute(CompareFP(x->GetValue(), y->GetValue()), 0), GetDexPc()); } DECLARE_INSTRUCTION(LessThan); - IfCondition GetCondition() const OVERRIDE { + IfCondition GetCondition() const override { return kCondLT; } - IfCondition GetOppositeCondition() const OVERRIDE { + IfCondition GetOppositeCondition() const override { return kCondGE; } @@ -3767,36 +3828,36 @@ class HLessThan FINAL : public HCondition { template <typename T> static bool Compute(T x, T y) { return x < y; } }; -class HLessThanOrEqual FINAL : public HCondition { +class HLessThanOrEqual final : public HCondition { public: HLessThanOrEqual(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc) : HCondition(kLessThanOrEqual, first, second, dex_pc) { } - HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { + HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const override { return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc()); } // In the following Evaluate methods, a HCompare instruction has // been merged into this HLessThanOrEqual instruction; evaluate it as // `Compare(x, y) <= 0`. - HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE { + HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const override { return MakeConstantCondition(Compute(Compare(x->GetValue(), y->GetValue()), 0), GetDexPc()); } - HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const OVERRIDE { + HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const override { return MakeConstantCondition(Compute(CompareFP(x->GetValue(), y->GetValue()), 0), GetDexPc()); } - HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const OVERRIDE { + HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const override { return MakeConstantCondition(Compute(CompareFP(x->GetValue(), y->GetValue()), 0), GetDexPc()); } DECLARE_INSTRUCTION(LessThanOrEqual); - IfCondition GetCondition() const OVERRIDE { + IfCondition GetCondition() const override { return kCondLE; } - IfCondition GetOppositeCondition() const OVERRIDE { + IfCondition GetOppositeCondition() const override { return kCondGT; } @@ -3807,35 +3868,35 @@ class HLessThanOrEqual FINAL : public HCondition { template <typename T> static bool Compute(T x, T y) { return x <= y; } }; -class HGreaterThan FINAL : public HCondition { +class HGreaterThan final : public HCondition { public: HGreaterThan(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc) : HCondition(kGreaterThan, first, second, dex_pc) { } - HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { + HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const override { return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc()); } // In the following Evaluate methods, a HCompare instruction has // been merged into this HGreaterThan instruction; evaluate it as // `Compare(x, y) > 0`. - HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE { + HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const override { return MakeConstantCondition(Compute(Compare(x->GetValue(), y->GetValue()), 0), GetDexPc()); } - HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const OVERRIDE { + HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const override { return MakeConstantCondition(Compute(CompareFP(x->GetValue(), y->GetValue()), 0), GetDexPc()); } - HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const OVERRIDE { + HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const override { return MakeConstantCondition(Compute(CompareFP(x->GetValue(), y->GetValue()), 0), GetDexPc()); } DECLARE_INSTRUCTION(GreaterThan); - IfCondition GetCondition() const OVERRIDE { + IfCondition GetCondition() const override { return kCondGT; } - IfCondition GetOppositeCondition() const OVERRIDE { + IfCondition GetOppositeCondition() const override { return kCondLE; } @@ -3846,35 +3907,35 @@ class HGreaterThan FINAL : public HCondition { template <typename T> static bool Compute(T x, T y) { return x > y; } }; -class HGreaterThanOrEqual FINAL : public HCondition { +class HGreaterThanOrEqual final : public HCondition { public: HGreaterThanOrEqual(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc) : HCondition(kGreaterThanOrEqual, first, second, dex_pc) { } - HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { + HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const override { return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc()); } // In the following Evaluate methods, a HCompare instruction has // been merged into this HGreaterThanOrEqual instruction; evaluate it as // `Compare(x, y) >= 0`. - HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE { + HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const override { return MakeConstantCondition(Compute(Compare(x->GetValue(), y->GetValue()), 0), GetDexPc()); } - HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const OVERRIDE { + HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const override { return MakeConstantCondition(Compute(CompareFP(x->GetValue(), y->GetValue()), 0), GetDexPc()); } - HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const OVERRIDE { + HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const override { return MakeConstantCondition(Compute(CompareFP(x->GetValue(), y->GetValue()), 0), GetDexPc()); } DECLARE_INSTRUCTION(GreaterThanOrEqual); - IfCondition GetCondition() const OVERRIDE { + IfCondition GetCondition() const override { return kCondGE; } - IfCondition GetOppositeCondition() const OVERRIDE { + IfCondition GetOppositeCondition() const override { return kCondLT; } @@ -3885,36 +3946,36 @@ class HGreaterThanOrEqual FINAL : public HCondition { template <typename T> static bool Compute(T x, T y) { return x >= y; } }; -class HBelow FINAL : public HCondition { +class HBelow final : public HCondition { public: HBelow(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc) : HCondition(kBelow, first, second, dex_pc) { } - HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { + HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const override { return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc()); } - HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE { + HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const override { return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc()); } HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED, - HFloatConstant* y ATTRIBUTE_UNUSED) const OVERRIDE { + HFloatConstant* y ATTRIBUTE_UNUSED) const override { LOG(FATAL) << DebugName() << " is not defined for float values"; UNREACHABLE(); } HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED, - HDoubleConstant* y ATTRIBUTE_UNUSED) const OVERRIDE { + HDoubleConstant* y ATTRIBUTE_UNUSED) const override { LOG(FATAL) << DebugName() << " is not defined for double values"; UNREACHABLE(); } DECLARE_INSTRUCTION(Below); - IfCondition GetCondition() const OVERRIDE { + IfCondition GetCondition() const override { return kCondB; } - IfCondition GetOppositeCondition() const OVERRIDE { + IfCondition GetOppositeCondition() const override { return kCondAE; } @@ -3927,36 +3988,36 @@ class HBelow FINAL : public HCondition { } }; -class HBelowOrEqual FINAL : public HCondition { +class HBelowOrEqual final : public HCondition { public: HBelowOrEqual(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc) : HCondition(kBelowOrEqual, first, second, dex_pc) { } - HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { + HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const override { return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc()); } - HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE { + HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const override { return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc()); } HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED, - HFloatConstant* y ATTRIBUTE_UNUSED) const OVERRIDE { + HFloatConstant* y ATTRIBUTE_UNUSED) const override { LOG(FATAL) << DebugName() << " is not defined for float values"; UNREACHABLE(); } HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED, - HDoubleConstant* y ATTRIBUTE_UNUSED) const OVERRIDE { + HDoubleConstant* y ATTRIBUTE_UNUSED) const override { LOG(FATAL) << DebugName() << " is not defined for double values"; UNREACHABLE(); } DECLARE_INSTRUCTION(BelowOrEqual); - IfCondition GetCondition() const OVERRIDE { + IfCondition GetCondition() const override { return kCondBE; } - IfCondition GetOppositeCondition() const OVERRIDE { + IfCondition GetOppositeCondition() const override { return kCondA; } @@ -3969,36 +4030,36 @@ class HBelowOrEqual FINAL : public HCondition { } }; -class HAbove FINAL : public HCondition { +class HAbove final : public HCondition { public: HAbove(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc) : HCondition(kAbove, first, second, dex_pc) { } - HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { + HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const override { return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc()); } - HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE { + HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const override { return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc()); } HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED, - HFloatConstant* y ATTRIBUTE_UNUSED) const OVERRIDE { + HFloatConstant* y ATTRIBUTE_UNUSED) const override { LOG(FATAL) << DebugName() << " is not defined for float values"; UNREACHABLE(); } HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED, - HDoubleConstant* y ATTRIBUTE_UNUSED) const OVERRIDE { + HDoubleConstant* y ATTRIBUTE_UNUSED) const override { LOG(FATAL) << DebugName() << " is not defined for double values"; UNREACHABLE(); } DECLARE_INSTRUCTION(Above); - IfCondition GetCondition() const OVERRIDE { + IfCondition GetCondition() const override { return kCondA; } - IfCondition GetOppositeCondition() const OVERRIDE { + IfCondition GetOppositeCondition() const override { return kCondBE; } @@ -4011,36 +4072,36 @@ class HAbove FINAL : public HCondition { } }; -class HAboveOrEqual FINAL : public HCondition { +class HAboveOrEqual final : public HCondition { public: HAboveOrEqual(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc) : HCondition(kAboveOrEqual, first, second, dex_pc) { } - HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { + HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const override { return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc()); } - HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE { + HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const override { return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc()); } HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED, - HFloatConstant* y ATTRIBUTE_UNUSED) const OVERRIDE { + HFloatConstant* y ATTRIBUTE_UNUSED) const override { LOG(FATAL) << DebugName() << " is not defined for float values"; UNREACHABLE(); } HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED, - HDoubleConstant* y ATTRIBUTE_UNUSED) const OVERRIDE { + HDoubleConstant* y ATTRIBUTE_UNUSED) const override { LOG(FATAL) << DebugName() << " is not defined for double values"; UNREACHABLE(); } DECLARE_INSTRUCTION(AboveOrEqual); - IfCondition GetCondition() const OVERRIDE { + IfCondition GetCondition() const override { return kCondAE; } - IfCondition GetOppositeCondition() const OVERRIDE { + IfCondition GetOppositeCondition() const override { return kCondB; } @@ -4055,7 +4116,7 @@ class HAboveOrEqual FINAL : public HCondition { // Instruction to check how two inputs compare to each other. // Result is 0 if input0 == input1, 1 if input0 > input1, or -1 if input0 < input1. -class HCompare FINAL : public HBinaryOperation { +class HCompare final : public HBinaryOperation { public: // Note that `comparison_type` is the type of comparison performed // between the comparison's inputs, not the type of the instantiated @@ -4087,7 +4148,7 @@ class HCompare FINAL : public HBinaryOperation { return std::isunordered(x, y) ? (IsGtBias() ? 1 : -1) : Compute(x, y); } - HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { + HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const override { // Note that there is no "cmp-int" Dex instruction so we shouldn't // reach this code path when processing a freshly built HIR // graph. However HCompare integer instructions can be synthesized @@ -4095,17 +4156,17 @@ class HCompare FINAL : public HBinaryOperation { // IntegerSignum intrinsics, so we have to handle this case. return MakeConstantComparison(Compute(x->GetValue(), y->GetValue()), GetDexPc()); } - HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE { + HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const override { return MakeConstantComparison(Compute(x->GetValue(), y->GetValue()), GetDexPc()); } - HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const OVERRIDE { + HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const override { return MakeConstantComparison(ComputeFP(x->GetValue(), y->GetValue()), GetDexPc()); } - HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const OVERRIDE { + HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const override { return MakeConstantComparison(ComputeFP(x->GetValue(), y->GetValue()), GetDexPc()); } - bool InstructionDataEquals(const HInstruction* other) const OVERRIDE { + bool InstructionDataEquals(const HInstruction* other) const override { return GetPackedFields() == other->AsCompare()->GetPackedFields(); } @@ -4126,7 +4187,7 @@ class HCompare FINAL : public HBinaryOperation { DECLARE_INSTRUCTION(Compare); protected: - static constexpr size_t kFieldComparisonBias = kNumberOfExpressionPackedBits; + static constexpr size_t kFieldComparisonBias = kNumberOfGenericPackedBits; static constexpr size_t kFieldComparisonBiasSize = MinimumBitsToStore(static_cast<size_t>(ComparisonBias::kLast)); static constexpr size_t kNumberOfComparePackedBits = @@ -4144,7 +4205,7 @@ class HCompare FINAL : public HBinaryOperation { DEFAULT_COPY_CONSTRUCTOR(Compare); }; -class HNewInstance FINAL : public HExpression<1> { +class HNewInstance final : public HExpression<1> { public: HNewInstance(HInstruction* cls, uint32_t dex_pc, @@ -4163,16 +4224,16 @@ class HNewInstance FINAL : public HExpression<1> { SetRawInputAt(0, cls); } - bool IsClonable() const OVERRIDE { return true; } + bool IsClonable() const override { return true; } dex::TypeIndex GetTypeIndex() const { return type_index_; } const DexFile& GetDexFile() const { return dex_file_; } // Calls runtime so needs an environment. - bool NeedsEnvironment() const OVERRIDE { return true; } + bool NeedsEnvironment() const override { return true; } // Can throw errors when out-of-memory or if it's not instantiable/accessible. - bool CanThrow() const OVERRIDE { return true; } + bool CanThrow() const override { return true; } bool NeedsChecks() const { return entrypoint_ == kQuickAllocObjectWithChecks; @@ -4180,7 +4241,7 @@ class HNewInstance FINAL : public HExpression<1> { bool IsFinalizable() const { return GetPackedFlag<kFlagFinalizable>(); } - bool CanBeNull() const OVERRIDE { return false; } + bool CanBeNull() const override { return false; } QuickEntrypointEnum GetEntrypoint() const { return entrypoint_; } @@ -4205,7 +4266,7 @@ class HNewInstance FINAL : public HExpression<1> { DEFAULT_COPY_CONSTRUCTOR(NewInstance); private: - static constexpr size_t kFlagFinalizable = kNumberOfExpressionPackedBits; + static constexpr size_t kFlagFinalizable = kNumberOfGenericPackedBits; static constexpr size_t kNumberOfNewInstancePackedBits = kFlagFinalizable + 1; static_assert(kNumberOfNewInstancePackedBits <= kMaxNumberOfPackedBits, "Too many packed fields."); @@ -4234,7 +4295,7 @@ enum IntrinsicExceptions { class HInvoke : public HVariableInputSizeInstruction { public: - bool NeedsEnvironment() const OVERRIDE; + bool NeedsEnvironment() const override; void SetArgumentAt(size_t index, HInstruction* argument) { SetRawInputAt(index, argument); @@ -4246,8 +4307,6 @@ class HInvoke : public HVariableInputSizeInstruction { // inputs at the end of their list of inputs. uint32_t GetNumberOfArguments() const { return number_of_arguments_; } - DataType::Type GetType() const OVERRIDE { return GetPackedField<ReturnTypeField>(); } - uint32_t GetDexMethodIndex() const { return dex_method_index_; } InvokeType GetInvokeType() const { @@ -4269,15 +4328,15 @@ class HInvoke : public HVariableInputSizeInstruction { void SetCanThrow(bool can_throw) { SetPackedFlag<kFlagCanThrow>(can_throw); } - bool CanThrow() const OVERRIDE { return GetPackedFlag<kFlagCanThrow>(); } + bool CanThrow() const override { return GetPackedFlag<kFlagCanThrow>(); } void SetAlwaysThrows(bool always_throws) { SetPackedFlag<kFlagAlwaysThrows>(always_throws); } - bool AlwaysThrows() const OVERRIDE { return GetPackedFlag<kFlagAlwaysThrows>(); } + bool AlwaysThrows() const override { return GetPackedFlag<kFlagAlwaysThrows>(); } - bool CanBeMoved() const OVERRIDE { return IsIntrinsic() && !DoesAnyWrite(); } + bool CanBeMoved() const override { return IsIntrinsic() && !DoesAnyWrite(); } - bool InstructionDataEquals(const HInstruction* other) const OVERRIDE { + bool InstructionDataEquals(const HInstruction* other) const override { return intrinsic_ != Intrinsics::kNone && intrinsic_ == other->AsInvoke()->intrinsic_; } @@ -4292,7 +4351,7 @@ class HInvoke : public HVariableInputSizeInstruction { bool IsIntrinsic() const { return intrinsic_ != Intrinsics::kNone; } ArtMethod* GetResolvedMethod() const { return resolved_method_; } - void SetResolvedMethod(ArtMethod* method) { resolved_method_ = method; } + void SetResolvedMethod(ArtMethod* method) REQUIRES_SHARED(Locks::mutator_lock_); DECLARE_ABSTRACT_INSTRUCTION(Invoke); @@ -4300,16 +4359,11 @@ class HInvoke : public HVariableInputSizeInstruction { static constexpr size_t kFieldInvokeType = kNumberOfGenericPackedBits; static constexpr size_t kFieldInvokeTypeSize = MinimumBitsToStore(static_cast<size_t>(kMaxInvokeType)); - static constexpr size_t kFieldReturnType = - kFieldInvokeType + kFieldInvokeTypeSize; - static constexpr size_t kFieldReturnTypeSize = - MinimumBitsToStore(static_cast<size_t>(DataType::Type::kLast)); - static constexpr size_t kFlagCanThrow = kFieldReturnType + kFieldReturnTypeSize; + static constexpr size_t kFlagCanThrow = kFieldInvokeType + kFieldInvokeTypeSize; static constexpr size_t kFlagAlwaysThrows = kFlagCanThrow + 1; static constexpr size_t kNumberOfInvokePackedBits = kFlagAlwaysThrows + 1; static_assert(kNumberOfInvokePackedBits <= kMaxNumberOfPackedBits, "Too many packed fields."); using InvokeTypeField = BitField<InvokeType, kFieldInvokeType, kFieldInvokeTypeSize>; - using ReturnTypeField = BitField<DataType::Type, kFieldReturnType, kFieldReturnTypeSize>; HInvoke(InstructionKind kind, ArenaAllocator* allocator, @@ -4322,19 +4376,21 @@ class HInvoke : public HVariableInputSizeInstruction { InvokeType invoke_type) : HVariableInputSizeInstruction( kind, + return_type, SideEffects::AllExceptGCDependency(), // Assume write/read on all fields/arrays. dex_pc, allocator, number_of_arguments + number_of_other_inputs, kArenaAllocInvokeInputs), number_of_arguments_(number_of_arguments), - resolved_method_(resolved_method), dex_method_index_(dex_method_index), intrinsic_(Intrinsics::kNone), intrinsic_optimizations_(0) { - SetPackedField<ReturnTypeField>(return_type); SetPackedField<InvokeTypeField>(invoke_type); SetPackedFlag<kFlagCanThrow>(true); + // Check mutator lock, constructors lack annotalysis support. + Locks::mutator_lock_->AssertNotExclusiveHeld(Thread::Current()); + SetResolvedMethod(resolved_method); } DEFAULT_COPY_CONSTRUCTOR(Invoke); @@ -4348,7 +4404,7 @@ class HInvoke : public HVariableInputSizeInstruction { uint32_t intrinsic_optimizations_; }; -class HInvokeUnresolved FINAL : public HInvoke { +class HInvokeUnresolved final : public HInvoke { public: HInvokeUnresolved(ArenaAllocator* allocator, uint32_t number_of_arguments, @@ -4359,7 +4415,7 @@ class HInvokeUnresolved FINAL : public HInvoke { : HInvoke(kInvokeUnresolved, allocator, number_of_arguments, - 0u /* number_of_other_inputs */, + /* number_of_other_inputs= */ 0u, return_type, dex_pc, dex_method_index, @@ -4367,7 +4423,7 @@ class HInvokeUnresolved FINAL : public HInvoke { invoke_type) { } - bool IsClonable() const OVERRIDE { return true; } + bool IsClonable() const override { return true; } DECLARE_INSTRUCTION(InvokeUnresolved); @@ -4375,7 +4431,7 @@ class HInvokeUnresolved FINAL : public HInvoke { DEFAULT_COPY_CONSTRUCTOR(InvokeUnresolved); }; -class HInvokePolymorphic FINAL : public HInvoke { +class HInvokePolymorphic final : public HInvoke { public: HInvokePolymorphic(ArenaAllocator* allocator, uint32_t number_of_arguments, @@ -4385,7 +4441,7 @@ class HInvokePolymorphic FINAL : public HInvoke { : HInvoke(kInvokePolymorphic, allocator, number_of_arguments, - 0u /* number_of_other_inputs */, + /* number_of_other_inputs= */ 0u, return_type, dex_pc, dex_method_index, @@ -4393,7 +4449,7 @@ class HInvokePolymorphic FINAL : public HInvoke { kVirtual) { } - bool IsClonable() const OVERRIDE { return true; } + bool IsClonable() const override { return true; } DECLARE_INSTRUCTION(InvokePolymorphic); @@ -4401,7 +4457,39 @@ class HInvokePolymorphic FINAL : public HInvoke { DEFAULT_COPY_CONSTRUCTOR(InvokePolymorphic); }; -class HInvokeStaticOrDirect FINAL : public HInvoke { +class HInvokeCustom final : public HInvoke { + public: + HInvokeCustom(ArenaAllocator* allocator, + uint32_t number_of_arguments, + uint32_t call_site_index, + DataType::Type return_type, + uint32_t dex_pc) + : HInvoke(kInvokeCustom, + allocator, + number_of_arguments, + /* number_of_other_inputs= */ 0u, + return_type, + dex_pc, + /* dex_method_index= */ dex::kDexNoIndex, + /* resolved_method= */ nullptr, + kStatic), + call_site_index_(call_site_index) { + } + + uint32_t GetCallSiteIndex() const { return call_site_index_; } + + bool IsClonable() const override { return true; } + + DECLARE_INSTRUCTION(InvokeCustom); + + protected: + DEFAULT_COPY_CONSTRUCTOR(InvokeCustom); + + private: + uint32_t call_site_index_; +}; + +class HInvokeStaticOrDirect final : public HInvoke { public: // Requirements of this method call regarding the class // initialization (clinit) check of its declaring class. @@ -4424,14 +4512,18 @@ class HInvokeStaticOrDirect FINAL : public HInvoke { // Used for boot image methods referenced by boot image code. kBootImageLinkTimePcRelative, - // Use ArtMethod* at a known address, embed the direct address in the code. - // Used for app->boot calls with non-relocatable image and for JIT-compiled calls. - kDirectAddress, + // Load from an entry in the .data.bimg.rel.ro using a PC-relative load. + // Used for app->boot calls with relocatable image. + kBootImageRelRo, // Load from an entry in the .bss section using a PC-relative load. - // Used for classes outside boot image when .bss is accessible with a PC-relative load. + // Used for methods outside boot image referenced by AOT-compiled app and boot image code. kBssEntry, + // Use ArtMethod* at a known address, embed the direct address in the code. + // Used for for JIT-compiled calls. + kJitDirectAddress, + // Make a runtime call to resolve and call the method. This is the last-resort-kind // used when other kinds are unimplemented on a particular architecture. kRuntimeCall, @@ -4472,8 +4564,7 @@ class HInvokeStaticOrDirect FINAL : public HInvoke { allocator, number_of_arguments, // There is potentially one extra argument for the HCurrentMethod node, and - // potentially one other if the clinit check is explicit, and potentially - // one other if the method is a string factory. + // potentially one other if the clinit check is explicit. (NeedsCurrentMethodInput(dispatch_info.method_load_kind) ? 1u : 0u) + (clinit_check_requirement == ClinitCheckRequirement::kExplicit ? 1u : 0u), return_type, @@ -4486,7 +4577,7 @@ class HInvokeStaticOrDirect FINAL : public HInvoke { SetPackedField<ClinitCheckRequirementField>(clinit_check_requirement); } - bool IsClonable() const OVERRIDE { return true; } + bool IsClonable() const override { return true; } void SetDispatchInfo(const DispatchInfo& dispatch_info) { bool had_current_method_input = HasCurrentMethodInput(); @@ -4516,7 +4607,7 @@ class HInvokeStaticOrDirect FINAL : public HInvoke { } using HInstruction::GetInputRecords; // Keep the const version visible. - ArrayRef<HUserRecord<HInstruction*>> GetInputRecords() OVERRIDE { + ArrayRef<HUserRecord<HInstruction*>> GetInputRecords() override { ArrayRef<HUserRecord<HInstruction*>> input_records = HInvoke::GetInputRecords(); if (kIsDebugBuild && IsStaticWithExplicitClinitCheck()) { DCHECK(!input_records.empty()); @@ -4534,14 +4625,14 @@ class HInvokeStaticOrDirect FINAL : public HInvoke { return input_records; } - bool CanDoImplicitNullCheckOn(HInstruction* obj ATTRIBUTE_UNUSED) const OVERRIDE { + bool CanDoImplicitNullCheckOn(HInstruction* obj ATTRIBUTE_UNUSED) const override { // We access the method via the dex cache so we can't do an implicit null check. // TODO: for intrinsics we can generate implicit null checks. return false; } - bool CanBeNull() const OVERRIDE { - return GetPackedField<ReturnTypeField>() == DataType::Type::kReference && !IsStringInit(); + bool CanBeNull() const override { + return GetType() == DataType::Type::kReference && !IsStringInit(); } // Get the index of the special input, if any. @@ -4555,11 +4646,12 @@ class HInvokeStaticOrDirect FINAL : public HInvoke { MethodLoadKind GetMethodLoadKind() const { return dispatch_info_.method_load_kind; } CodePtrLocation GetCodePtrLocation() const { return dispatch_info_.code_ptr_location; } bool IsRecursive() const { return GetMethodLoadKind() == MethodLoadKind::kRecursive; } - bool NeedsDexCacheOfDeclaringClass() const OVERRIDE; + bool NeedsDexCacheOfDeclaringClass() const override; bool IsStringInit() const { return GetMethodLoadKind() == MethodLoadKind::kStringInit; } - bool HasMethodAddress() const { return GetMethodLoadKind() == MethodLoadKind::kDirectAddress; } + bool HasMethodAddress() const { return GetMethodLoadKind() == MethodLoadKind::kJitDirectAddress; } bool HasPcRelativeMethodLoadKind() const { return GetMethodLoadKind() == MethodLoadKind::kBootImageLinkTimePcRelative || + GetMethodLoadKind() == MethodLoadKind::kBootImageRelRo || GetMethodLoadKind() == MethodLoadKind::kBssEntry; } bool HasCurrentMethodInput() const { @@ -4655,7 +4747,7 @@ class HInvokeStaticOrDirect FINAL : public HInvoke { std::ostream& operator<<(std::ostream& os, HInvokeStaticOrDirect::MethodLoadKind rhs); std::ostream& operator<<(std::ostream& os, HInvokeStaticOrDirect::ClinitCheckRequirement rhs); -class HInvokeVirtual FINAL : public HInvoke { +class HInvokeVirtual final : public HInvoke { public: HInvokeVirtual(ArenaAllocator* allocator, uint32_t number_of_arguments, @@ -4676,9 +4768,9 @@ class HInvokeVirtual FINAL : public HInvoke { vtable_index_(vtable_index) { } - bool IsClonable() const OVERRIDE { return true; } + bool IsClonable() const override { return true; } - bool CanBeNull() const OVERRIDE { + bool CanBeNull() const override { switch (GetIntrinsic()) { case Intrinsics::kThreadCurrentThread: case Intrinsics::kStringBufferAppend: @@ -4691,9 +4783,9 @@ class HInvokeVirtual FINAL : public HInvoke { } } - bool CanDoImplicitNullCheckOn(HInstruction* obj) const OVERRIDE { + bool CanDoImplicitNullCheckOn(HInstruction* obj) const override { // TODO: Add implicit null checks in intrinsics. - return (obj == InputAt(0)) && !GetLocations()->Intrinsified(); + return (obj == InputAt(0)) && !IsIntrinsic(); } uint32_t GetVTableIndex() const { return vtable_index_; } @@ -4708,7 +4800,7 @@ class HInvokeVirtual FINAL : public HInvoke { const uint32_t vtable_index_; }; -class HInvokeInterface FINAL : public HInvoke { +class HInvokeInterface final : public HInvoke { public: HInvokeInterface(ArenaAllocator* allocator, uint32_t number_of_arguments, @@ -4729,14 +4821,14 @@ class HInvokeInterface FINAL : public HInvoke { imt_index_(imt_index) { } - bool IsClonable() const OVERRIDE { return true; } + bool IsClonable() const override { return true; } - bool CanDoImplicitNullCheckOn(HInstruction* obj) const OVERRIDE { + bool CanDoImplicitNullCheckOn(HInstruction* obj) const override { // TODO: Add implicit null checks in intrinsics. - return (obj == InputAt(0)) && !GetLocations()->Intrinsified(); + return (obj == InputAt(0)) && !IsIntrinsic(); } - bool NeedsDexCacheOfDeclaringClass() const OVERRIDE { + bool NeedsDexCacheOfDeclaringClass() const override { // The assembly stub currently needs it. return true; } @@ -4753,7 +4845,7 @@ class HInvokeInterface FINAL : public HInvoke { const uint32_t imt_index_; }; -class HNeg FINAL : public HUnaryOperation { +class HNeg final : public HUnaryOperation { public: HNeg(DataType::Type result_type, HInstruction* input, uint32_t dex_pc = kNoDexPc) : HUnaryOperation(kNeg, result_type, input, dex_pc) { @@ -4762,16 +4854,16 @@ class HNeg FINAL : public HUnaryOperation { template <typename T> static T Compute(T x) { return -x; } - HConstant* Evaluate(HIntConstant* x) const OVERRIDE { + HConstant* Evaluate(HIntConstant* x) const override { return GetBlock()->GetGraph()->GetIntConstant(Compute(x->GetValue()), GetDexPc()); } - HConstant* Evaluate(HLongConstant* x) const OVERRIDE { + HConstant* Evaluate(HLongConstant* x) const override { return GetBlock()->GetGraph()->GetLongConstant(Compute(x->GetValue()), GetDexPc()); } - HConstant* Evaluate(HFloatConstant* x) const OVERRIDE { + HConstant* Evaluate(HFloatConstant* x) const override { return GetBlock()->GetGraph()->GetFloatConstant(Compute(x->GetValue()), GetDexPc()); } - HConstant* Evaluate(HDoubleConstant* x) const OVERRIDE { + HConstant* Evaluate(HDoubleConstant* x) const override { return GetBlock()->GetGraph()->GetDoubleConstant(Compute(x->GetValue()), GetDexPc()); } @@ -4781,23 +4873,24 @@ class HNeg FINAL : public HUnaryOperation { DEFAULT_COPY_CONSTRUCTOR(Neg); }; -class HNewArray FINAL : public HExpression<2> { +class HNewArray final : public HExpression<2> { public: - HNewArray(HInstruction* cls, HInstruction* length, uint32_t dex_pc) + HNewArray(HInstruction* cls, HInstruction* length, uint32_t dex_pc, size_t component_size_shift) : HExpression(kNewArray, DataType::Type::kReference, SideEffects::CanTriggerGC(), dex_pc) { SetRawInputAt(0, cls); SetRawInputAt(1, length); + SetPackedField<ComponentSizeShiftField>(component_size_shift); } - bool IsClonable() const OVERRIDE { return true; } + bool IsClonable() const override { return true; } // Calls runtime so needs an environment. - bool NeedsEnvironment() const OVERRIDE { return true; } + bool NeedsEnvironment() const override { return true; } // May throw NegativeArraySizeException, OutOfMemoryError, etc. - bool CanThrow() const OVERRIDE { return true; } + bool CanThrow() const override { return true; } - bool CanBeNull() const OVERRIDE { return false; } + bool CanBeNull() const override { return false; } HLoadClass* GetLoadClass() const { DCHECK(InputAt(0)->IsLoadClass()); @@ -4808,13 +4901,26 @@ class HNewArray FINAL : public HExpression<2> { return InputAt(1); } + size_t GetComponentSizeShift() { + return GetPackedField<ComponentSizeShiftField>(); + } + DECLARE_INSTRUCTION(NewArray); protected: DEFAULT_COPY_CONSTRUCTOR(NewArray); + + private: + static constexpr size_t kFieldComponentSizeShift = kNumberOfGenericPackedBits; + static constexpr size_t kFieldComponentSizeShiftSize = MinimumBitsToStore(3u); + static constexpr size_t kNumberOfNewArrayPackedBits = + kFieldComponentSizeShift + kFieldComponentSizeShiftSize; + static_assert(kNumberOfNewArrayPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields."); + using ComponentSizeShiftField = + BitField<size_t, kFieldComponentSizeShift, kFieldComponentSizeShift>; }; -class HAdd FINAL : public HBinaryOperation { +class HAdd final : public HBinaryOperation { public: HAdd(DataType::Type result_type, HInstruction* left, @@ -4823,23 +4929,23 @@ class HAdd FINAL : public HBinaryOperation { : HBinaryOperation(kAdd, result_type, left, right, SideEffects::None(), dex_pc) { } - bool IsCommutative() const OVERRIDE { return true; } + bool IsCommutative() const override { return true; } template <typename T> static T Compute(T x, T y) { return x + y; } - HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { + HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const override { return GetBlock()->GetGraph()->GetIntConstant( Compute(x->GetValue(), y->GetValue()), GetDexPc()); } - HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE { + HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const override { return GetBlock()->GetGraph()->GetLongConstant( Compute(x->GetValue(), y->GetValue()), GetDexPc()); } - HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const OVERRIDE { + HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const override { return GetBlock()->GetGraph()->GetFloatConstant( Compute(x->GetValue(), y->GetValue()), GetDexPc()); } - HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const OVERRIDE { + HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const override { return GetBlock()->GetGraph()->GetDoubleConstant( Compute(x->GetValue(), y->GetValue()), GetDexPc()); } @@ -4850,7 +4956,7 @@ class HAdd FINAL : public HBinaryOperation { DEFAULT_COPY_CONSTRUCTOR(Add); }; -class HSub FINAL : public HBinaryOperation { +class HSub final : public HBinaryOperation { public: HSub(DataType::Type result_type, HInstruction* left, @@ -4861,19 +4967,19 @@ class HSub FINAL : public HBinaryOperation { template <typename T> static T Compute(T x, T y) { return x - y; } - HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { + HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const override { return GetBlock()->GetGraph()->GetIntConstant( Compute(x->GetValue(), y->GetValue()), GetDexPc()); } - HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE { + HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const override { return GetBlock()->GetGraph()->GetLongConstant( Compute(x->GetValue(), y->GetValue()), GetDexPc()); } - HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const OVERRIDE { + HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const override { return GetBlock()->GetGraph()->GetFloatConstant( Compute(x->GetValue(), y->GetValue()), GetDexPc()); } - HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const OVERRIDE { + HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const override { return GetBlock()->GetGraph()->GetDoubleConstant( Compute(x->GetValue(), y->GetValue()), GetDexPc()); } @@ -4884,7 +4990,7 @@ class HSub FINAL : public HBinaryOperation { DEFAULT_COPY_CONSTRUCTOR(Sub); }; -class HMul FINAL : public HBinaryOperation { +class HMul final : public HBinaryOperation { public: HMul(DataType::Type result_type, HInstruction* left, @@ -4893,23 +4999,23 @@ class HMul FINAL : public HBinaryOperation { : HBinaryOperation(kMul, result_type, left, right, SideEffects::None(), dex_pc) { } - bool IsCommutative() const OVERRIDE { return true; } + bool IsCommutative() const override { return true; } template <typename T> static T Compute(T x, T y) { return x * y; } - HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { + HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const override { return GetBlock()->GetGraph()->GetIntConstant( Compute(x->GetValue(), y->GetValue()), GetDexPc()); } - HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE { + HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const override { return GetBlock()->GetGraph()->GetLongConstant( Compute(x->GetValue(), y->GetValue()), GetDexPc()); } - HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const OVERRIDE { + HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const override { return GetBlock()->GetGraph()->GetFloatConstant( Compute(x->GetValue(), y->GetValue()), GetDexPc()); } - HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const OVERRIDE { + HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const override { return GetBlock()->GetGraph()->GetDoubleConstant( Compute(x->GetValue(), y->GetValue()), GetDexPc()); } @@ -4920,7 +5026,7 @@ class HMul FINAL : public HBinaryOperation { DEFAULT_COPY_CONSTRUCTOR(Mul); }; -class HDiv FINAL : public HBinaryOperation { +class HDiv final : public HBinaryOperation { public: HDiv(DataType::Type result_type, HInstruction* left, @@ -4945,19 +5051,19 @@ class HDiv FINAL : public HBinaryOperation { return x / y; } - HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { + HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const override { return GetBlock()->GetGraph()->GetIntConstant( ComputeIntegral(x->GetValue(), y->GetValue()), GetDexPc()); } - HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE { + HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const override { return GetBlock()->GetGraph()->GetLongConstant( ComputeIntegral(x->GetValue(), y->GetValue()), GetDexPc()); } - HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const OVERRIDE { + HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const override { return GetBlock()->GetGraph()->GetFloatConstant( ComputeFP(x->GetValue(), y->GetValue()), GetDexPc()); } - HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const OVERRIDE { + HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const override { return GetBlock()->GetGraph()->GetDoubleConstant( ComputeFP(x->GetValue(), y->GetValue()), GetDexPc()); } @@ -4968,7 +5074,7 @@ class HDiv FINAL : public HBinaryOperation { DEFAULT_COPY_CONSTRUCTOR(Div); }; -class HRem FINAL : public HBinaryOperation { +class HRem final : public HBinaryOperation { public: HRem(DataType::Type result_type, HInstruction* left, @@ -4993,19 +5099,19 @@ class HRem FINAL : public HBinaryOperation { return std::fmod(x, y); } - HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { + HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const override { return GetBlock()->GetGraph()->GetIntConstant( ComputeIntegral(x->GetValue(), y->GetValue()), GetDexPc()); } - HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE { + HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const override { return GetBlock()->GetGraph()->GetLongConstant( ComputeIntegral(x->GetValue(), y->GetValue()), GetDexPc()); } - HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const OVERRIDE { + HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const override { return GetBlock()->GetGraph()->GetFloatConstant( ComputeFP(x->GetValue(), y->GetValue()), GetDexPc()); } - HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const OVERRIDE { + HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const override { return GetBlock()->GetGraph()->GetDoubleConstant( ComputeFP(x->GetValue(), y->GetValue()), GetDexPc()); } @@ -5016,25 +5122,136 @@ class HRem FINAL : public HBinaryOperation { DEFAULT_COPY_CONSTRUCTOR(Rem); }; -class HDivZeroCheck FINAL : public HExpression<1> { +class HMin final : public HBinaryOperation { + public: + HMin(DataType::Type result_type, + HInstruction* left, + HInstruction* right, + uint32_t dex_pc) + : HBinaryOperation(kMin, result_type, left, right, SideEffects::None(), dex_pc) {} + + bool IsCommutative() const override { return true; } + + // Evaluation for integral values. + template <typename T> static T ComputeIntegral(T x, T y) { + return (x <= y) ? x : y; + } + + HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const override { + return GetBlock()->GetGraph()->GetIntConstant( + ComputeIntegral(x->GetValue(), y->GetValue()), GetDexPc()); + } + HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const override { + return GetBlock()->GetGraph()->GetLongConstant( + ComputeIntegral(x->GetValue(), y->GetValue()), GetDexPc()); + } + // TODO: Evaluation for floating-point values. + HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED, + HFloatConstant* y ATTRIBUTE_UNUSED) const override { return nullptr; } + HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED, + HDoubleConstant* y ATTRIBUTE_UNUSED) const override { return nullptr; } + + DECLARE_INSTRUCTION(Min); + + protected: + DEFAULT_COPY_CONSTRUCTOR(Min); +}; + +class HMax final : public HBinaryOperation { + public: + HMax(DataType::Type result_type, + HInstruction* left, + HInstruction* right, + uint32_t dex_pc) + : HBinaryOperation(kMax, result_type, left, right, SideEffects::None(), dex_pc) {} + + bool IsCommutative() const override { return true; } + + // Evaluation for integral values. + template <typename T> static T ComputeIntegral(T x, T y) { + return (x >= y) ? x : y; + } + + HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const override { + return GetBlock()->GetGraph()->GetIntConstant( + ComputeIntegral(x->GetValue(), y->GetValue()), GetDexPc()); + } + HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const override { + return GetBlock()->GetGraph()->GetLongConstant( + ComputeIntegral(x->GetValue(), y->GetValue()), GetDexPc()); + } + // TODO: Evaluation for floating-point values. + HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED, + HFloatConstant* y ATTRIBUTE_UNUSED) const override { return nullptr; } + HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED, + HDoubleConstant* y ATTRIBUTE_UNUSED) const override { return nullptr; } + + DECLARE_INSTRUCTION(Max); + + protected: + DEFAULT_COPY_CONSTRUCTOR(Max); +}; + +class HAbs final : public HUnaryOperation { + public: + HAbs(DataType::Type result_type, HInstruction* input, uint32_t dex_pc = kNoDexPc) + : HUnaryOperation(kAbs, result_type, input, dex_pc) {} + + // Evaluation for integral values. + template <typename T> static T ComputeIntegral(T x) { + return x < 0 ? -x : x; + } + + // Evaluation for floating-point values. + // Note, as a "quality of implementation", rather than pure "spec compliance", + // we require that Math.abs() clears the sign bit (but changes nothing else) + // for all floating-point numbers, including NaN (signaling NaN may become quiet though). + // http://b/30758343 + template <typename T, typename S> static T ComputeFP(T x) { + S bits = bit_cast<S, T>(x); + return bit_cast<T, S>(bits & std::numeric_limits<S>::max()); + } + + HConstant* Evaluate(HIntConstant* x) const override { + return GetBlock()->GetGraph()->GetIntConstant(ComputeIntegral(x->GetValue()), GetDexPc()); + } + HConstant* Evaluate(HLongConstant* x) const override { + return GetBlock()->GetGraph()->GetLongConstant(ComputeIntegral(x->GetValue()), GetDexPc()); + } + HConstant* Evaluate(HFloatConstant* x) const override { + return GetBlock()->GetGraph()->GetFloatConstant( + ComputeFP<float, int32_t>(x->GetValue()), GetDexPc()); + } + HConstant* Evaluate(HDoubleConstant* x) const override { + return GetBlock()->GetGraph()->GetDoubleConstant( + ComputeFP<double, int64_t>(x->GetValue()), GetDexPc()); + } + + DECLARE_INSTRUCTION(Abs); + + protected: + DEFAULT_COPY_CONSTRUCTOR(Abs); +}; + +class HDivZeroCheck final : public HExpression<1> { public: // `HDivZeroCheck` can trigger GC, as it may call the `ArithmeticException` - // constructor. + // constructor. However it can only do it on a fatal slow path so execution never returns to the + // instruction following the current one; thus 'SideEffects::None()' is used. HDivZeroCheck(HInstruction* value, uint32_t dex_pc) - : HExpression(kDivZeroCheck, value->GetType(), SideEffects::CanTriggerGC(), dex_pc) { + : HExpression(kDivZeroCheck, value->GetType(), SideEffects::None(), dex_pc) { SetRawInputAt(0, value); } - DataType::Type GetType() const OVERRIDE { return InputAt(0)->GetType(); } + bool IsClonable() const override { return true; } + bool CanBeMoved() const override { return true; } - bool CanBeMoved() const OVERRIDE { return true; } - - bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { + bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override { return true; } - bool NeedsEnvironment() const OVERRIDE { return true; } - bool CanThrow() const OVERRIDE { return true; } + bool NeedsEnvironment() const override { return true; } + bool CanThrow() const override { return true; } DECLARE_INSTRUCTION(DivZeroCheck); @@ -5042,7 +5259,7 @@ class HDivZeroCheck FINAL : public HExpression<1> { DEFAULT_COPY_CONSTRUCTOR(DivZeroCheck); }; -class HShl FINAL : public HBinaryOperation { +class HShl final : public HBinaryOperation { public: HShl(DataType::Type result_type, HInstruction* value, @@ -5058,26 +5275,26 @@ class HShl FINAL : public HBinaryOperation { return value << (distance & max_shift_distance); } - HConstant* Evaluate(HIntConstant* value, HIntConstant* distance) const OVERRIDE { + HConstant* Evaluate(HIntConstant* value, HIntConstant* distance) const override { return GetBlock()->GetGraph()->GetIntConstant( Compute(value->GetValue(), distance->GetValue(), kMaxIntShiftDistance), GetDexPc()); } - HConstant* Evaluate(HLongConstant* value, HIntConstant* distance) const OVERRIDE { + HConstant* Evaluate(HLongConstant* value, HIntConstant* distance) const override { return GetBlock()->GetGraph()->GetLongConstant( Compute(value->GetValue(), distance->GetValue(), kMaxLongShiftDistance), GetDexPc()); } HConstant* Evaluate(HLongConstant* value ATTRIBUTE_UNUSED, - HLongConstant* distance ATTRIBUTE_UNUSED) const OVERRIDE { + HLongConstant* distance ATTRIBUTE_UNUSED) const override { LOG(FATAL) << DebugName() << " is not defined for the (long, long) case."; UNREACHABLE(); } HConstant* Evaluate(HFloatConstant* value ATTRIBUTE_UNUSED, - HFloatConstant* distance ATTRIBUTE_UNUSED) const OVERRIDE { + HFloatConstant* distance ATTRIBUTE_UNUSED) const override { LOG(FATAL) << DebugName() << " is not defined for float values"; UNREACHABLE(); } HConstant* Evaluate(HDoubleConstant* value ATTRIBUTE_UNUSED, - HDoubleConstant* distance ATTRIBUTE_UNUSED) const OVERRIDE { + HDoubleConstant* distance ATTRIBUTE_UNUSED) const override { LOG(FATAL) << DebugName() << " is not defined for double values"; UNREACHABLE(); } @@ -5088,7 +5305,7 @@ class HShl FINAL : public HBinaryOperation { DEFAULT_COPY_CONSTRUCTOR(Shl); }; -class HShr FINAL : public HBinaryOperation { +class HShr final : public HBinaryOperation { public: HShr(DataType::Type result_type, HInstruction* value, @@ -5104,26 +5321,26 @@ class HShr FINAL : public HBinaryOperation { return value >> (distance & max_shift_distance); } - HConstant* Evaluate(HIntConstant* value, HIntConstant* distance) const OVERRIDE { + HConstant* Evaluate(HIntConstant* value, HIntConstant* distance) const override { return GetBlock()->GetGraph()->GetIntConstant( Compute(value->GetValue(), distance->GetValue(), kMaxIntShiftDistance), GetDexPc()); } - HConstant* Evaluate(HLongConstant* value, HIntConstant* distance) const OVERRIDE { + HConstant* Evaluate(HLongConstant* value, HIntConstant* distance) const override { return GetBlock()->GetGraph()->GetLongConstant( Compute(value->GetValue(), distance->GetValue(), kMaxLongShiftDistance), GetDexPc()); } HConstant* Evaluate(HLongConstant* value ATTRIBUTE_UNUSED, - HLongConstant* distance ATTRIBUTE_UNUSED) const OVERRIDE { + HLongConstant* distance ATTRIBUTE_UNUSED) const override { LOG(FATAL) << DebugName() << " is not defined for the (long, long) case."; UNREACHABLE(); } HConstant* Evaluate(HFloatConstant* value ATTRIBUTE_UNUSED, - HFloatConstant* distance ATTRIBUTE_UNUSED) const OVERRIDE { + HFloatConstant* distance ATTRIBUTE_UNUSED) const override { LOG(FATAL) << DebugName() << " is not defined for float values"; UNREACHABLE(); } HConstant* Evaluate(HDoubleConstant* value ATTRIBUTE_UNUSED, - HDoubleConstant* distance ATTRIBUTE_UNUSED) const OVERRIDE { + HDoubleConstant* distance ATTRIBUTE_UNUSED) const override { LOG(FATAL) << DebugName() << " is not defined for double values"; UNREACHABLE(); } @@ -5134,7 +5351,7 @@ class HShr FINAL : public HBinaryOperation { DEFAULT_COPY_CONSTRUCTOR(Shr); }; -class HUShr FINAL : public HBinaryOperation { +class HUShr final : public HBinaryOperation { public: HUShr(DataType::Type result_type, HInstruction* value, @@ -5152,26 +5369,26 @@ class HUShr FINAL : public HBinaryOperation { return static_cast<T>(ux >> (distance & max_shift_distance)); } - HConstant* Evaluate(HIntConstant* value, HIntConstant* distance) const OVERRIDE { + HConstant* Evaluate(HIntConstant* value, HIntConstant* distance) const override { return GetBlock()->GetGraph()->GetIntConstant( Compute(value->GetValue(), distance->GetValue(), kMaxIntShiftDistance), GetDexPc()); } - HConstant* Evaluate(HLongConstant* value, HIntConstant* distance) const OVERRIDE { + HConstant* Evaluate(HLongConstant* value, HIntConstant* distance) const override { return GetBlock()->GetGraph()->GetLongConstant( Compute(value->GetValue(), distance->GetValue(), kMaxLongShiftDistance), GetDexPc()); } HConstant* Evaluate(HLongConstant* value ATTRIBUTE_UNUSED, - HLongConstant* distance ATTRIBUTE_UNUSED) const OVERRIDE { + HLongConstant* distance ATTRIBUTE_UNUSED) const override { LOG(FATAL) << DebugName() << " is not defined for the (long, long) case."; UNREACHABLE(); } HConstant* Evaluate(HFloatConstant* value ATTRIBUTE_UNUSED, - HFloatConstant* distance ATTRIBUTE_UNUSED) const OVERRIDE { + HFloatConstant* distance ATTRIBUTE_UNUSED) const override { LOG(FATAL) << DebugName() << " is not defined for float values"; UNREACHABLE(); } HConstant* Evaluate(HDoubleConstant* value ATTRIBUTE_UNUSED, - HDoubleConstant* distance ATTRIBUTE_UNUSED) const OVERRIDE { + HDoubleConstant* distance ATTRIBUTE_UNUSED) const override { LOG(FATAL) << DebugName() << " is not defined for double values"; UNREACHABLE(); } @@ -5182,7 +5399,7 @@ class HUShr FINAL : public HBinaryOperation { DEFAULT_COPY_CONSTRUCTOR(UShr); }; -class HAnd FINAL : public HBinaryOperation { +class HAnd final : public HBinaryOperation { public: HAnd(DataType::Type result_type, HInstruction* left, @@ -5191,25 +5408,25 @@ class HAnd FINAL : public HBinaryOperation { : HBinaryOperation(kAnd, result_type, left, right, SideEffects::None(), dex_pc) { } - bool IsCommutative() const OVERRIDE { return true; } + bool IsCommutative() const override { return true; } template <typename T> static T Compute(T x, T y) { return x & y; } - HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { + HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const override { return GetBlock()->GetGraph()->GetIntConstant( Compute(x->GetValue(), y->GetValue()), GetDexPc()); } - HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE { + HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const override { return GetBlock()->GetGraph()->GetLongConstant( Compute(x->GetValue(), y->GetValue()), GetDexPc()); } HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED, - HFloatConstant* y ATTRIBUTE_UNUSED) const OVERRIDE { + HFloatConstant* y ATTRIBUTE_UNUSED) const override { LOG(FATAL) << DebugName() << " is not defined for float values"; UNREACHABLE(); } HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED, - HDoubleConstant* y ATTRIBUTE_UNUSED) const OVERRIDE { + HDoubleConstant* y ATTRIBUTE_UNUSED) const override { LOG(FATAL) << DebugName() << " is not defined for double values"; UNREACHABLE(); } @@ -5220,7 +5437,7 @@ class HAnd FINAL : public HBinaryOperation { DEFAULT_COPY_CONSTRUCTOR(And); }; -class HOr FINAL : public HBinaryOperation { +class HOr final : public HBinaryOperation { public: HOr(DataType::Type result_type, HInstruction* left, @@ -5229,25 +5446,25 @@ class HOr FINAL : public HBinaryOperation { : HBinaryOperation(kOr, result_type, left, right, SideEffects::None(), dex_pc) { } - bool IsCommutative() const OVERRIDE { return true; } + bool IsCommutative() const override { return true; } template <typename T> static T Compute(T x, T y) { return x | y; } - HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { + HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const override { return GetBlock()->GetGraph()->GetIntConstant( Compute(x->GetValue(), y->GetValue()), GetDexPc()); } - HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE { + HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const override { return GetBlock()->GetGraph()->GetLongConstant( Compute(x->GetValue(), y->GetValue()), GetDexPc()); } HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED, - HFloatConstant* y ATTRIBUTE_UNUSED) const OVERRIDE { + HFloatConstant* y ATTRIBUTE_UNUSED) const override { LOG(FATAL) << DebugName() << " is not defined for float values"; UNREACHABLE(); } HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED, - HDoubleConstant* y ATTRIBUTE_UNUSED) const OVERRIDE { + HDoubleConstant* y ATTRIBUTE_UNUSED) const override { LOG(FATAL) << DebugName() << " is not defined for double values"; UNREACHABLE(); } @@ -5258,7 +5475,7 @@ class HOr FINAL : public HBinaryOperation { DEFAULT_COPY_CONSTRUCTOR(Or); }; -class HXor FINAL : public HBinaryOperation { +class HXor final : public HBinaryOperation { public: HXor(DataType::Type result_type, HInstruction* left, @@ -5267,25 +5484,25 @@ class HXor FINAL : public HBinaryOperation { : HBinaryOperation(kXor, result_type, left, right, SideEffects::None(), dex_pc) { } - bool IsCommutative() const OVERRIDE { return true; } + bool IsCommutative() const override { return true; } template <typename T> static T Compute(T x, T y) { return x ^ y; } - HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { + HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const override { return GetBlock()->GetGraph()->GetIntConstant( Compute(x->GetValue(), y->GetValue()), GetDexPc()); } - HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE { + HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const override { return GetBlock()->GetGraph()->GetLongConstant( Compute(x->GetValue(), y->GetValue()), GetDexPc()); } HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED, - HFloatConstant* y ATTRIBUTE_UNUSED) const OVERRIDE { + HFloatConstant* y ATTRIBUTE_UNUSED) const override { LOG(FATAL) << DebugName() << " is not defined for float values"; UNREACHABLE(); } HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED, - HDoubleConstant* y ATTRIBUTE_UNUSED) const OVERRIDE { + HDoubleConstant* y ATTRIBUTE_UNUSED) const override { LOG(FATAL) << DebugName() << " is not defined for double values"; UNREACHABLE(); } @@ -5296,7 +5513,7 @@ class HXor FINAL : public HBinaryOperation { DEFAULT_COPY_CONSTRUCTOR(Xor); }; -class HRor FINAL : public HBinaryOperation { +class HRor final : public HBinaryOperation { public: HRor(DataType::Type result_type, HInstruction* value, HInstruction* distance) : HBinaryOperation(kRor, result_type, value, distance) { @@ -5317,26 +5534,26 @@ class HRor FINAL : public HBinaryOperation { } } - HConstant* Evaluate(HIntConstant* value, HIntConstant* distance) const OVERRIDE { + HConstant* Evaluate(HIntConstant* value, HIntConstant* distance) const override { return GetBlock()->GetGraph()->GetIntConstant( Compute(value->GetValue(), distance->GetValue(), kMaxIntShiftDistance), GetDexPc()); } - HConstant* Evaluate(HLongConstant* value, HIntConstant* distance) const OVERRIDE { + HConstant* Evaluate(HLongConstant* value, HIntConstant* distance) const override { return GetBlock()->GetGraph()->GetLongConstant( Compute(value->GetValue(), distance->GetValue(), kMaxLongShiftDistance), GetDexPc()); } HConstant* Evaluate(HLongConstant* value ATTRIBUTE_UNUSED, - HLongConstant* distance ATTRIBUTE_UNUSED) const OVERRIDE { + HLongConstant* distance ATTRIBUTE_UNUSED) const override { LOG(FATAL) << DebugName() << " is not defined for the (long, long) case."; UNREACHABLE(); } HConstant* Evaluate(HFloatConstant* value ATTRIBUTE_UNUSED, - HFloatConstant* distance ATTRIBUTE_UNUSED) const OVERRIDE { + HFloatConstant* distance ATTRIBUTE_UNUSED) const override { LOG(FATAL) << DebugName() << " is not defined for float values"; UNREACHABLE(); } HConstant* Evaluate(HDoubleConstant* value ATTRIBUTE_UNUSED, - HDoubleConstant* distance ATTRIBUTE_UNUSED) const OVERRIDE { + HDoubleConstant* distance ATTRIBUTE_UNUSED) const override { LOG(FATAL) << DebugName() << " is not defined for double values"; UNREACHABLE(); } @@ -5349,7 +5566,7 @@ class HRor FINAL : public HBinaryOperation { // The value of a parameter in this method. Its location depends on // the calling convention. -class HParameterValue FINAL : public HExpression<0> { +class HParameterValue final : public HExpression<0> { public: HParameterValue(const DexFile& dex_file, dex::TypeIndex type_index, @@ -5369,7 +5586,7 @@ class HParameterValue FINAL : public HExpression<0> { uint8_t GetIndex() const { return index_; } bool IsThis() const { return GetPackedFlag<kFlagIsThis>(); } - bool CanBeNull() const OVERRIDE { return GetPackedFlag<kFlagCanBeNull>(); } + bool CanBeNull() const override { return GetPackedFlag<kFlagCanBeNull>(); } void SetCanBeNull(bool can_be_null) { SetPackedFlag<kFlagCanBeNull>(can_be_null); } DECLARE_INSTRUCTION(ParameterValue); @@ -5379,7 +5596,7 @@ class HParameterValue FINAL : public HExpression<0> { private: // Whether or not the parameter value corresponds to 'this' argument. - static constexpr size_t kFlagIsThis = kNumberOfExpressionPackedBits; + static constexpr size_t kFlagIsThis = kNumberOfGenericPackedBits; static constexpr size_t kFlagCanBeNull = kFlagIsThis + 1; static constexpr size_t kNumberOfParameterValuePackedBits = kFlagCanBeNull + 1; static_assert(kNumberOfParameterValuePackedBits <= kMaxNumberOfPackedBits, @@ -5392,30 +5609,30 @@ class HParameterValue FINAL : public HExpression<0> { const uint8_t index_; }; -class HNot FINAL : public HUnaryOperation { +class HNot final : public HUnaryOperation { public: HNot(DataType::Type result_type, HInstruction* input, uint32_t dex_pc = kNoDexPc) : HUnaryOperation(kNot, result_type, input, dex_pc) { } - bool CanBeMoved() const OVERRIDE { return true; } - bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { + bool CanBeMoved() const override { return true; } + bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override { return true; } template <typename T> static T Compute(T x) { return ~x; } - HConstant* Evaluate(HIntConstant* x) const OVERRIDE { + HConstant* Evaluate(HIntConstant* x) const override { return GetBlock()->GetGraph()->GetIntConstant(Compute(x->GetValue()), GetDexPc()); } - HConstant* Evaluate(HLongConstant* x) const OVERRIDE { + HConstant* Evaluate(HLongConstant* x) const override { return GetBlock()->GetGraph()->GetLongConstant(Compute(x->GetValue()), GetDexPc()); } - HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED) const OVERRIDE { + HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED) const override { LOG(FATAL) << DebugName() << " is not defined for float values"; UNREACHABLE(); } - HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED) const OVERRIDE { + HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED) const override { LOG(FATAL) << DebugName() << " is not defined for double values"; UNREACHABLE(); } @@ -5426,14 +5643,14 @@ class HNot FINAL : public HUnaryOperation { DEFAULT_COPY_CONSTRUCTOR(Not); }; -class HBooleanNot FINAL : public HUnaryOperation { +class HBooleanNot final : public HUnaryOperation { public: explicit HBooleanNot(HInstruction* input, uint32_t dex_pc = kNoDexPc) : HUnaryOperation(kBooleanNot, DataType::Type::kBool, input, dex_pc) { } - bool CanBeMoved() const OVERRIDE { return true; } - bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { + bool CanBeMoved() const override { return true; } + bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override { return true; } @@ -5442,18 +5659,18 @@ class HBooleanNot FINAL : public HUnaryOperation { return !x; } - HConstant* Evaluate(HIntConstant* x) const OVERRIDE { + HConstant* Evaluate(HIntConstant* x) const override { return GetBlock()->GetGraph()->GetIntConstant(Compute(x->GetValue()), GetDexPc()); } - HConstant* Evaluate(HLongConstant* x ATTRIBUTE_UNUSED) const OVERRIDE { + HConstant* Evaluate(HLongConstant* x ATTRIBUTE_UNUSED) const override { LOG(FATAL) << DebugName() << " is not defined for long values"; UNREACHABLE(); } - HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED) const OVERRIDE { + HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED) const override { LOG(FATAL) << DebugName() << " is not defined for float values"; UNREACHABLE(); } - HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED) const OVERRIDE { + HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED) const override { LOG(FATAL) << DebugName() << " is not defined for double values"; UNREACHABLE(); } @@ -5464,7 +5681,7 @@ class HBooleanNot FINAL : public HUnaryOperation { DEFAULT_COPY_CONSTRUCTOR(BooleanNot); }; -class HTypeConversion FINAL : public HExpression<1> { +class HTypeConversion final : public HExpression<1> { public: // Instantiate a type conversion of `input` to `result_type`. HTypeConversion(DataType::Type result_type, HInstruction* input, uint32_t dex_pc = kNoDexPc) @@ -5478,10 +5695,15 @@ class HTypeConversion FINAL : public HExpression<1> { DataType::Type GetInputType() const { return GetInput()->GetType(); } DataType::Type GetResultType() const { return GetType(); } - bool CanBeMoved() const OVERRIDE { return true; } - bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { + bool IsClonable() const override { return true; } + bool CanBeMoved() const override { return true; } + bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override { return true; } + // Return whether the conversion is implicit. This includes conversion to the same type. + bool IsImplicitConversion() const { + return DataType::IsTypeConversionImplicit(GetInputType(), GetResultType()); + } // Try to statically evaluate the conversion and return a HConstant // containing the result. If the input cannot be converted, return nullptr. @@ -5495,26 +5717,27 @@ class HTypeConversion FINAL : public HExpression<1> { static constexpr uint32_t kNoRegNumber = -1; -class HNullCheck FINAL : public HExpression<1> { +class HNullCheck final : public HExpression<1> { public: // `HNullCheck` can trigger GC, as it may call the `NullPointerException` - // constructor. + // constructor. However it can only do it on a fatal slow path so execution never returns to the + // instruction following the current one; thus 'SideEffects::None()' is used. HNullCheck(HInstruction* value, uint32_t dex_pc) - : HExpression(kNullCheck, value->GetType(), SideEffects::CanTriggerGC(), dex_pc) { + : HExpression(kNullCheck, value->GetType(), SideEffects::None(), dex_pc) { SetRawInputAt(0, value); } - bool IsClonable() const OVERRIDE { return true; } - bool CanBeMoved() const OVERRIDE { return true; } - bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { + bool IsClonable() const override { return true; } + bool CanBeMoved() const override { return true; } + bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override { return true; } - bool NeedsEnvironment() const OVERRIDE { return true; } + bool NeedsEnvironment() const override { return true; } - bool CanThrow() const OVERRIDE { return true; } + bool CanThrow() const override { return true; } - bool CanBeNull() const OVERRIDE { return false; } + bool CanBeNull() const override { return false; } DECLARE_INSTRUCTION(NullCheck); @@ -5559,7 +5782,7 @@ class FieldInfo : public ValueObject { const DexFile& dex_file_; }; -class HInstanceFieldGet FINAL : public HExpression<1> { +class HInstanceFieldGet final : public HExpression<1> { public: HInstanceFieldGet(HInstruction* value, ArtField* field, @@ -5584,19 +5807,19 @@ class HInstanceFieldGet FINAL : public HExpression<1> { SetRawInputAt(0, value); } - bool IsClonable() const OVERRIDE { return true; } - bool CanBeMoved() const OVERRIDE { return !IsVolatile(); } + bool IsClonable() const override { return true; } + bool CanBeMoved() const override { return !IsVolatile(); } - bool InstructionDataEquals(const HInstruction* other) const OVERRIDE { + bool InstructionDataEquals(const HInstruction* other) const override { const HInstanceFieldGet* other_get = other->AsInstanceFieldGet(); return GetFieldOffset().SizeValue() == other_get->GetFieldOffset().SizeValue(); } - bool CanDoImplicitNullCheckOn(HInstruction* obj) const OVERRIDE { + bool CanDoImplicitNullCheckOn(HInstruction* obj) const override { return (obj == InputAt(0)) && art::CanDoImplicitNullCheckOn(GetFieldOffset().Uint32Value()); } - size_t ComputeHashCode() const OVERRIDE { + size_t ComputeHashCode() const override { return (HInstruction::ComputeHashCode() << 7) | GetFieldOffset().SizeValue(); } @@ -5621,7 +5844,7 @@ class HInstanceFieldGet FINAL : public HExpression<1> { const FieldInfo field_info_; }; -class HInstanceFieldSet FINAL : public HTemplateInstruction<2> { +class HInstanceFieldSet final : public HExpression<2> { public: HInstanceFieldSet(HInstruction* object, HInstruction* value, @@ -5633,9 +5856,9 @@ class HInstanceFieldSet FINAL : public HTemplateInstruction<2> { uint16_t declaring_class_def_index, const DexFile& dex_file, uint32_t dex_pc) - : HTemplateInstruction(kInstanceFieldSet, - SideEffects::FieldWriteOfType(field_type, is_volatile), - dex_pc), + : HExpression(kInstanceFieldSet, + SideEffects::FieldWriteOfType(field_type, is_volatile), + dex_pc), field_info_(field, field_offset, field_type, @@ -5648,9 +5871,9 @@ class HInstanceFieldSet FINAL : public HTemplateInstruction<2> { SetRawInputAt(1, value); } - bool IsClonable() const OVERRIDE { return true; } + bool IsClonable() const override { return true; } - bool CanDoImplicitNullCheckOn(HInstruction* obj) const OVERRIDE { + bool CanDoImplicitNullCheckOn(HInstruction* obj) const override { return (obj == InputAt(0)) && art::CanDoImplicitNullCheckOn(GetFieldOffset().Uint32Value()); } @@ -5676,7 +5899,7 @@ class HInstanceFieldSet FINAL : public HTemplateInstruction<2> { const FieldInfo field_info_; }; -class HArrayGet FINAL : public HExpression<2> { +class HArrayGet final : public HExpression<2> { public: HArrayGet(HInstruction* array, HInstruction* index, @@ -5687,7 +5910,7 @@ class HArrayGet FINAL : public HExpression<2> { type, SideEffects::ArrayReadOfType(type), dex_pc, - /* is_string_char_at */ false) { + /* is_string_char_at= */ false) { } HArrayGet(HInstruction* array, @@ -5702,12 +5925,12 @@ class HArrayGet FINAL : public HExpression<2> { SetRawInputAt(1, index); } - bool IsClonable() const OVERRIDE { return true; } - bool CanBeMoved() const OVERRIDE { return true; } - bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { + bool IsClonable() const override { return true; } + bool CanBeMoved() const override { return true; } + bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override { return true; } - bool CanDoImplicitNullCheckOn(HInstruction* obj ATTRIBUTE_UNUSED) const OVERRIDE { + bool CanDoImplicitNullCheckOn(HInstruction* obj ATTRIBUTE_UNUSED) const override { // TODO: We can be smarter here. // Currently, unless the array is the result of NewArray, the array access is always // preceded by some form of null NullCheck necessary for the bounds check, usually @@ -5761,13 +5984,13 @@ class HArrayGet FINAL : public HExpression<2> { // a particular HArrayGet is actually a String.charAt() by looking at the type // of the input but that requires holding the mutator lock, so we prefer to use // a flag, so that code generators don't need to do the locking. - static constexpr size_t kFlagIsStringCharAt = kNumberOfExpressionPackedBits; + static constexpr size_t kFlagIsStringCharAt = kNumberOfGenericPackedBits; static constexpr size_t kNumberOfArrayGetPackedBits = kFlagIsStringCharAt + 1; static_assert(kNumberOfArrayGetPackedBits <= HInstruction::kMaxNumberOfPackedBits, "Too many packed fields."); }; -class HArraySet FINAL : public HTemplateInstruction<3> { +class HArraySet final : public HExpression<3> { public: HArraySet(HInstruction* array, HInstruction* index, @@ -5789,7 +6012,7 @@ class HArraySet FINAL : public HTemplateInstruction<3> { DataType::Type expected_component_type, SideEffects side_effects, uint32_t dex_pc) - : HTemplateInstruction(kArraySet, side_effects, dex_pc) { + : HExpression(kArraySet, side_effects, dex_pc) { SetPackedField<ExpectedComponentTypeField>(expected_component_type); SetPackedFlag<kFlagNeedsTypeCheck>(value->GetType() == DataType::Type::kReference); SetPackedFlag<kFlagValueCanBeNull>(true); @@ -5799,17 +6022,17 @@ class HArraySet FINAL : public HTemplateInstruction<3> { SetRawInputAt(2, value); } - bool IsClonable() const OVERRIDE { return true; } + bool IsClonable() const override { return true; } - bool NeedsEnvironment() const OVERRIDE { + bool NeedsEnvironment() const override { // We call a runtime method to throw ArrayStoreException. return NeedsTypeCheck(); } // Can throw ArrayStoreException. - bool CanThrow() const OVERRIDE { return NeedsTypeCheck(); } + bool CanThrow() const override { return NeedsTypeCheck(); } - bool CanDoImplicitNullCheckOn(HInstruction* obj ATTRIBUTE_UNUSED) const OVERRIDE { + bool CanDoImplicitNullCheckOn(HInstruction* obj ATTRIBUTE_UNUSED) const override { // TODO: Same as for ArrayGet. return false; } @@ -5886,7 +6109,7 @@ class HArraySet FINAL : public HTemplateInstruction<3> { BitField<DataType::Type, kFieldExpectedComponentType, kFieldExpectedComponentTypeSize>; }; -class HArrayLength FINAL : public HExpression<1> { +class HArrayLength final : public HExpression<1> { public: HArrayLength(HInstruction* array, uint32_t dex_pc, bool is_string_length = false) : HExpression(kArrayLength, DataType::Type::kInt32, SideEffects::None(), dex_pc) { @@ -5896,12 +6119,12 @@ class HArrayLength FINAL : public HExpression<1> { SetRawInputAt(0, array); } - bool IsClonable() const OVERRIDE { return true; } - bool CanBeMoved() const OVERRIDE { return true; } - bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { + bool IsClonable() const override { return true; } + bool CanBeMoved() const override { return true; } + bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override { return true; } - bool CanDoImplicitNullCheckOn(HInstruction* obj) const OVERRIDE { + bool CanDoImplicitNullCheckOn(HInstruction* obj) const override { return obj == InputAt(0); } @@ -5918,36 +6141,37 @@ class HArrayLength FINAL : public HExpression<1> { // determine whether a particular HArrayLength is actually a String.length() by // looking at the type of the input but that requires holding the mutator lock, so // we prefer to use a flag, so that code generators don't need to do the locking. - static constexpr size_t kFlagIsStringLength = kNumberOfExpressionPackedBits; + static constexpr size_t kFlagIsStringLength = kNumberOfGenericPackedBits; static constexpr size_t kNumberOfArrayLengthPackedBits = kFlagIsStringLength + 1; static_assert(kNumberOfArrayLengthPackedBits <= HInstruction::kMaxNumberOfPackedBits, "Too many packed fields."); }; -class HBoundsCheck FINAL : public HExpression<2> { +class HBoundsCheck final : public HExpression<2> { public: // `HBoundsCheck` can trigger GC, as it may call the `IndexOutOfBoundsException` - // constructor. + // constructor. However it can only do it on a fatal slow path so execution never returns to the + // instruction following the current one; thus 'SideEffects::None()' is used. HBoundsCheck(HInstruction* index, HInstruction* length, uint32_t dex_pc, bool is_string_char_at = false) - : HExpression(kBoundsCheck, index->GetType(), SideEffects::CanTriggerGC(), dex_pc) { + : HExpression(kBoundsCheck, index->GetType(), SideEffects::None(), dex_pc) { DCHECK_EQ(DataType::Type::kInt32, DataType::Kind(index->GetType())); SetPackedFlag<kFlagIsStringCharAt>(is_string_char_at); SetRawInputAt(0, index); SetRawInputAt(1, length); } - bool IsClonable() const OVERRIDE { return true; } - bool CanBeMoved() const OVERRIDE { return true; } - bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { + bool IsClonable() const override { return true; } + bool CanBeMoved() const override { return true; } + bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override { return true; } - bool NeedsEnvironment() const OVERRIDE { return true; } + bool NeedsEnvironment() const override { return true; } - bool CanThrow() const OVERRIDE { return true; } + bool CanThrow() const override { return true; } bool IsStringCharAt() const { return GetPackedFlag<kFlagIsStringCharAt>(); } @@ -5959,19 +6183,22 @@ class HBoundsCheck FINAL : public HExpression<2> { DEFAULT_COPY_CONSTRUCTOR(BoundsCheck); private: - static constexpr size_t kFlagIsStringCharAt = kNumberOfExpressionPackedBits; + static constexpr size_t kFlagIsStringCharAt = kNumberOfGenericPackedBits; + static constexpr size_t kNumberOfBoundsCheckPackedBits = kFlagIsStringCharAt + 1; + static_assert(kNumberOfBoundsCheckPackedBits <= HInstruction::kMaxNumberOfPackedBits, + "Too many packed fields."); }; -class HSuspendCheck FINAL : public HTemplateInstruction<0> { +class HSuspendCheck final : public HExpression<0> { public: explicit HSuspendCheck(uint32_t dex_pc = kNoDexPc) - : HTemplateInstruction(kSuspendCheck, SideEffects::CanTriggerGC(), dex_pc), + : HExpression(kSuspendCheck, SideEffects::CanTriggerGC(), dex_pc), slow_path_(nullptr) { } - bool IsClonable() const OVERRIDE { return true; } + bool IsClonable() const override { return true; } - bool NeedsEnvironment() const OVERRIDE { + bool NeedsEnvironment() const override { return true; } @@ -5991,13 +6218,13 @@ class HSuspendCheck FINAL : public HTemplateInstruction<0> { // Pseudo-instruction which provides the native debugger with mapping information. // It ensures that we can generate line number and local variables at this point. -class HNativeDebugInfo : public HTemplateInstruction<0> { +class HNativeDebugInfo : public HExpression<0> { public: explicit HNativeDebugInfo(uint32_t dex_pc) - : HTemplateInstruction<0>(kNativeDebugInfo, SideEffects::None(), dex_pc) { + : HExpression<0>(kNativeDebugInfo, SideEffects::None(), dex_pc) { } - bool NeedsEnvironment() const OVERRIDE { + bool NeedsEnvironment() const override { return true; } @@ -6010,7 +6237,7 @@ class HNativeDebugInfo : public HTemplateInstruction<0> { /** * Instruction to load a Class object. */ -class HLoadClass FINAL : public HInstruction { +class HLoadClass final : public HInstruction { public: // Determines how to load the Class. enum class LoadKind { @@ -6024,18 +6251,18 @@ class HLoadClass FINAL : public HInstruction { // Used for boot image classes referenced by boot image code. kBootImageLinkTimePcRelative, - // Use a known boot image Class* address, embedded in the code by the codegen. - // Used for boot image classes referenced by apps in AOT- and JIT-compiled code. - kBootImageAddress, - - // Use a PC-relative load from a boot image ClassTable mmapped into the .bss - // of the oat file. - kBootImageClassTable, + // Load from an entry in the .data.bimg.rel.ro using a PC-relative load. + // Used for boot image classes referenced by apps in AOT-compiled code. + kBootImageRelRo, // Load from an entry in the .bss section using a PC-relative load. - // Used for classes outside boot image when .bss is accessible with a PC-relative load. + // Used for classes outside boot image referenced by AOT-compiled app and boot image code. kBssEntry, + // Use a known boot image Class* address, embedded in the code by the codegen. + // Used for boot image classes referenced by apps in JIT-compiled code. + kJitBootImageAddress, + // Load from the root table associated with the JIT compiled method. kJitTableAddress, @@ -6053,12 +6280,14 @@ class HLoadClass FINAL : public HInstruction { bool is_referrers_class, uint32_t dex_pc, bool needs_access_check) - : HInstruction(kLoadClass, SideEffectsForArchRuntimeCalls(), dex_pc), + : HInstruction(kLoadClass, + DataType::Type::kReference, + SideEffectsForArchRuntimeCalls(), + dex_pc), special_input_(HUserRecord<HInstruction*>(current_method)), type_index_(type_index), dex_file_(dex_file), - klass_(klass), - loaded_class_rti_(ReferenceTypeInfo::CreateInvalid()) { + klass_(klass) { // Referrers class should not need access check. We never inline unverified // methods so we can't possibly end up in this situation. DCHECK(!is_referrers_class || !needs_access_check); @@ -6068,9 +6297,10 @@ class HLoadClass FINAL : public HInstruction { SetPackedFlag<kFlagNeedsAccessCheck>(needs_access_check); SetPackedFlag<kFlagIsInBootImage>(false); SetPackedFlag<kFlagGenerateClInitCheck>(false); + SetPackedFlag<kFlagValidLoadedClassRTI>(false); } - bool IsClonable() const OVERRIDE { return true; } + bool IsClonable() const override { return true; } void SetLoadKind(LoadKind load_kind); @@ -6078,15 +6308,21 @@ class HLoadClass FINAL : public HInstruction { return GetPackedField<LoadKindField>(); } - bool CanBeMoved() const OVERRIDE { return true; } + bool HasPcRelativeLoadKind() const { + return GetLoadKind() == LoadKind::kBootImageLinkTimePcRelative || + GetLoadKind() == LoadKind::kBootImageRelRo || + GetLoadKind() == LoadKind::kBssEntry; + } - bool InstructionDataEquals(const HInstruction* other) const; + bool CanBeMoved() const override { return true; } - size_t ComputeHashCode() const OVERRIDE { return type_index_.index_; } + bool InstructionDataEquals(const HInstruction* other) const override; - bool CanBeNull() const OVERRIDE { return false; } + size_t ComputeHashCode() const override { return type_index_.index_; } - bool NeedsEnvironment() const OVERRIDE { + bool CanBeNull() const override { return false; } + + bool NeedsEnvironment() const override { return CanCallRuntime(); } @@ -6104,31 +6340,34 @@ class HLoadClass FINAL : public HInstruction { GetLoadKind() == LoadKind::kBssEntry; } - bool CanThrow() const OVERRIDE { + bool CanThrow() const override { return NeedsAccessCheck() || MustGenerateClinitCheck() || // If the class is in the boot image, the lookup in the runtime call cannot throw. - // This keeps CanThrow() consistent between non-PIC (using kBootImageAddress) and - // PIC and subsequently avoids a DCE behavior dependency on the PIC option. ((GetLoadKind() == LoadKind::kRuntimeCall || GetLoadKind() == LoadKind::kBssEntry) && !IsInBootImage()); } ReferenceTypeInfo GetLoadedClassRTI() { - return loaded_class_rti_; + if (GetPackedFlag<kFlagValidLoadedClassRTI>()) { + // Note: The is_exact flag from the return value should not be used. + return ReferenceTypeInfo::CreateUnchecked(klass_, /* is_exact= */ true); + } else { + return ReferenceTypeInfo::CreateInvalid(); + } } - void SetLoadedClassRTI(ReferenceTypeInfo rti) { - // Make sure we only set exact types (the loaded class should never be merged). - DCHECK(rti.IsExact()); - loaded_class_rti_ = rti; + // Loaded class RTI is marked as valid by RTP if the klass_ is admissible. + void SetValidLoadedClassRTI() REQUIRES_SHARED(Locks::mutator_lock_) { + DCHECK(klass_ != nullptr); + SetPackedFlag<kFlagValidLoadedClassRTI>(true); } dex::TypeIndex GetTypeIndex() const { return type_index_; } const DexFile& GetDexFile() const { return dex_file_; } - bool NeedsDexCacheOfDeclaringClass() const OVERRIDE { + bool NeedsDexCacheOfDeclaringClass() const override { return GetLoadKind() == LoadKind::kRuntimeCall; } @@ -6141,6 +6380,13 @@ class HLoadClass FINAL : public HInstruction { bool IsInBootImage() const { return GetPackedFlag<kFlagIsInBootImage>(); } bool MustGenerateClinitCheck() const { return GetPackedFlag<kFlagGenerateClInitCheck>(); } + bool MustResolveTypeOnSlowPath() const { + // Check that this instruction has a slow path. + DCHECK(GetLoadKind() != LoadKind::kRuntimeCall); // kRuntimeCall calls on main path. + DCHECK(GetLoadKind() == LoadKind::kBssEntry || MustGenerateClinitCheck()); + return GetLoadKind() == LoadKind::kBssEntry; + } + void MarkInBootImage() { SetPackedFlag<kFlagIsInBootImage>(true); } @@ -6148,15 +6394,11 @@ class HLoadClass FINAL : public HInstruction { void AddSpecialInput(HInstruction* special_input); using HInstruction::GetInputRecords; // Keep the const version visible. - ArrayRef<HUserRecord<HInstruction*>> GetInputRecords() OVERRIDE FINAL { + ArrayRef<HUserRecord<HInstruction*>> GetInputRecords() final { return ArrayRef<HUserRecord<HInstruction*>>( &special_input_, (special_input_.GetInstruction() != nullptr) ? 1u : 0u); } - DataType::Type GetType() const OVERRIDE { - return DataType::Type::kReference; - } - Handle<mirror::Class> GetClass() const { return klass_; } @@ -6175,14 +6417,14 @@ class HLoadClass FINAL : public HInstruction { static constexpr size_t kFieldLoadKind = kFlagGenerateClInitCheck + 1; static constexpr size_t kFieldLoadKindSize = MinimumBitsToStore(static_cast<size_t>(LoadKind::kLast)); - static constexpr size_t kNumberOfLoadClassPackedBits = kFieldLoadKind + kFieldLoadKindSize; + static constexpr size_t kFlagValidLoadedClassRTI = kFieldLoadKind + kFieldLoadKindSize; + static constexpr size_t kNumberOfLoadClassPackedBits = kFlagValidLoadedClassRTI + 1; static_assert(kNumberOfLoadClassPackedBits < kMaxNumberOfPackedBits, "Too many packed fields."); using LoadKindField = BitField<LoadKind, kFieldLoadKind, kFieldLoadKindSize>; static bool HasTypeReference(LoadKind load_kind) { return load_kind == LoadKind::kReferrersClass || load_kind == LoadKind::kBootImageLinkTimePcRelative || - load_kind == LoadKind::kBootImageClassTable || load_kind == LoadKind::kBssEntry || load_kind == LoadKind::kRuntimeCall; } @@ -6203,8 +6445,6 @@ class HLoadClass FINAL : public HInstruction { const DexFile& dex_file_; Handle<mirror::Class> klass_; - - ReferenceTypeInfo loaded_class_rti_; }; std::ostream& operator<<(std::ostream& os, HLoadClass::LoadKind rhs); @@ -6227,15 +6467,15 @@ inline void HLoadClass::AddSpecialInput(HInstruction* special_input) { // The special input is used for PC-relative loads on some architectures, // including literal pool loads, which are PC-relative too. DCHECK(GetLoadKind() == LoadKind::kBootImageLinkTimePcRelative || - GetLoadKind() == LoadKind::kBootImageAddress || - GetLoadKind() == LoadKind::kBootImageClassTable || - GetLoadKind() == LoadKind::kBssEntry) << GetLoadKind(); + GetLoadKind() == LoadKind::kBootImageRelRo || + GetLoadKind() == LoadKind::kBssEntry || + GetLoadKind() == LoadKind::kJitBootImageAddress) << GetLoadKind(); DCHECK(special_input_.GetInstruction() == nullptr); special_input_ = HUserRecord<HInstruction*>(special_input); special_input->AddUseAt(this, 0); } -class HLoadString FINAL : public HInstruction { +class HLoadString final : public HInstruction { public: // Determines how to load the String. enum class LoadKind { @@ -6243,18 +6483,18 @@ class HLoadString FINAL : public HInstruction { // Used for boot image strings referenced by boot image code. kBootImageLinkTimePcRelative, - // Use a known boot image String* address, embedded in the code by the codegen. - // Used for boot image strings referenced by apps in AOT- and JIT-compiled code. - kBootImageAddress, - - // Use a PC-relative load from a boot image InternTable mmapped into the .bss - // of the oat file. - kBootImageInternTable, + // Load from an entry in the .data.bimg.rel.ro using a PC-relative load. + // Used for boot image strings referenced by apps in AOT-compiled code. + kBootImageRelRo, // Load from an entry in the .bss section using a PC-relative load. - // Used for strings outside boot image when .bss is accessible with a PC-relative load. + // Used for strings outside boot image referenced by AOT-compiled app and boot image code. kBssEntry, + // Use a known boot image String* address, embedded in the code by the codegen. + // Used for boot image strings referenced by apps in JIT-compiled code. + kJitBootImageAddress, + // Load from the root table associated with the JIT compiled method. kJitTableAddress, @@ -6269,14 +6509,17 @@ class HLoadString FINAL : public HInstruction { dex::StringIndex string_index, const DexFile& dex_file, uint32_t dex_pc) - : HInstruction(kLoadString, SideEffectsForArchRuntimeCalls(), dex_pc), + : HInstruction(kLoadString, + DataType::Type::kReference, + SideEffectsForArchRuntimeCalls(), + dex_pc), special_input_(HUserRecord<HInstruction*>(current_method)), string_index_(string_index), dex_file_(dex_file) { SetPackedField<LoadKindField>(LoadKind::kRuntimeCall); } - bool IsClonable() const OVERRIDE { return true; } + bool IsClonable() const override { return true; } void SetLoadKind(LoadKind load_kind); @@ -6284,6 +6527,12 @@ class HLoadString FINAL : public HInstruction { return GetPackedField<LoadKindField>(); } + bool HasPcRelativeLoadKind() const { + return GetLoadKind() == LoadKind::kBootImageLinkTimePcRelative || + GetLoadKind() == LoadKind::kBootImageRelRo || + GetLoadKind() == LoadKind::kBssEntry; + } + const DexFile& GetDexFile() const { return dex_file_; } @@ -6300,31 +6549,31 @@ class HLoadString FINAL : public HInstruction { string_ = str; } - bool CanBeMoved() const OVERRIDE { return true; } + bool CanBeMoved() const override { return true; } - bool InstructionDataEquals(const HInstruction* other) const OVERRIDE; + bool InstructionDataEquals(const HInstruction* other) const override; - size_t ComputeHashCode() const OVERRIDE { return string_index_.index_; } + size_t ComputeHashCode() const override { return string_index_.index_; } // Will call the runtime if we need to load the string through // the dex cache and the string is not guaranteed to be there yet. - bool NeedsEnvironment() const OVERRIDE { + bool NeedsEnvironment() const override { LoadKind load_kind = GetLoadKind(); if (load_kind == LoadKind::kBootImageLinkTimePcRelative || - load_kind == LoadKind::kBootImageAddress || - load_kind == LoadKind::kBootImageInternTable || + load_kind == LoadKind::kBootImageRelRo || + load_kind == LoadKind::kJitBootImageAddress || load_kind == LoadKind::kJitTableAddress) { return false; } return true; } - bool NeedsDexCacheOfDeclaringClass() const OVERRIDE { + bool NeedsDexCacheOfDeclaringClass() const override { return GetLoadKind() == LoadKind::kRuntimeCall; } - bool CanBeNull() const OVERRIDE { return false; } - bool CanThrow() const OVERRIDE { return NeedsEnvironment(); } + bool CanBeNull() const override { return false; } + bool CanThrow() const override { return NeedsEnvironment(); } static SideEffects SideEffectsForArchRuntimeCalls() { return SideEffects::CanTriggerGC(); @@ -6333,15 +6582,11 @@ class HLoadString FINAL : public HInstruction { void AddSpecialInput(HInstruction* special_input); using HInstruction::GetInputRecords; // Keep the const version visible. - ArrayRef<HUserRecord<HInstruction*>> GetInputRecords() OVERRIDE FINAL { + ArrayRef<HUserRecord<HInstruction*>> GetInputRecords() final { return ArrayRef<HUserRecord<HInstruction*>>( &special_input_, (special_input_.GetInstruction() != nullptr) ? 1u : 0u); } - DataType::Type GetType() const OVERRIDE { - return DataType::Type::kReference; - } - DECLARE_INSTRUCTION(LoadString); protected: @@ -6389,9 +6634,9 @@ inline void HLoadString::AddSpecialInput(HInstruction* special_input) { // The special input is used for PC-relative loads on some architectures, // including literal pool loads, which are PC-relative too. DCHECK(GetLoadKind() == LoadKind::kBootImageLinkTimePcRelative || - GetLoadKind() == LoadKind::kBootImageAddress || - GetLoadKind() == LoadKind::kBootImageInternTable || - GetLoadKind() == LoadKind::kBssEntry) << GetLoadKind(); + GetLoadKind() == LoadKind::kBootImageRelRo || + GetLoadKind() == LoadKind::kBssEntry || + GetLoadKind() == LoadKind::kJitBootImageAddress) << GetLoadKind(); // HLoadString::GetInputRecords() returns an empty array at this point, // so use the GetInputRecords() from the base class to set the input record. DCHECK(special_input_.GetInstruction() == nullptr); @@ -6399,10 +6644,98 @@ inline void HLoadString::AddSpecialInput(HInstruction* special_input) { special_input->AddUseAt(this, 0); } +class HLoadMethodHandle final : public HInstruction { + public: + HLoadMethodHandle(HCurrentMethod* current_method, + uint16_t method_handle_idx, + const DexFile& dex_file, + uint32_t dex_pc) + : HInstruction(kLoadMethodHandle, + DataType::Type::kReference, + SideEffectsForArchRuntimeCalls(), + dex_pc), + special_input_(HUserRecord<HInstruction*>(current_method)), + method_handle_idx_(method_handle_idx), + dex_file_(dex_file) { + } + + using HInstruction::GetInputRecords; // Keep the const version visible. + ArrayRef<HUserRecord<HInstruction*>> GetInputRecords() final { + return ArrayRef<HUserRecord<HInstruction*>>( + &special_input_, (special_input_.GetInstruction() != nullptr) ? 1u : 0u); + } + + bool IsClonable() const override { return true; } + + uint16_t GetMethodHandleIndex() const { return method_handle_idx_; } + + const DexFile& GetDexFile() const { return dex_file_; } + + static SideEffects SideEffectsForArchRuntimeCalls() { + return SideEffects::CanTriggerGC(); + } + + DECLARE_INSTRUCTION(LoadMethodHandle); + + protected: + DEFAULT_COPY_CONSTRUCTOR(LoadMethodHandle); + + private: + // The special input is the HCurrentMethod for kRuntimeCall. + HUserRecord<HInstruction*> special_input_; + + const uint16_t method_handle_idx_; + const DexFile& dex_file_; +}; + +class HLoadMethodType final : public HInstruction { + public: + HLoadMethodType(HCurrentMethod* current_method, + dex::ProtoIndex proto_index, + const DexFile& dex_file, + uint32_t dex_pc) + : HInstruction(kLoadMethodType, + DataType::Type::kReference, + SideEffectsForArchRuntimeCalls(), + dex_pc), + special_input_(HUserRecord<HInstruction*>(current_method)), + proto_index_(proto_index), + dex_file_(dex_file) { + } + + using HInstruction::GetInputRecords; // Keep the const version visible. + ArrayRef<HUserRecord<HInstruction*>> GetInputRecords() final { + return ArrayRef<HUserRecord<HInstruction*>>( + &special_input_, (special_input_.GetInstruction() != nullptr) ? 1u : 0u); + } + + bool IsClonable() const override { return true; } + + dex::ProtoIndex GetProtoIndex() const { return proto_index_; } + + const DexFile& GetDexFile() const { return dex_file_; } + + static SideEffects SideEffectsForArchRuntimeCalls() { + return SideEffects::CanTriggerGC(); + } + + DECLARE_INSTRUCTION(LoadMethodType); + + protected: + DEFAULT_COPY_CONSTRUCTOR(LoadMethodType); + + private: + // The special input is the HCurrentMethod for kRuntimeCall. + HUserRecord<HInstruction*> special_input_; + + const dex::ProtoIndex proto_index_; + const DexFile& dex_file_; +}; + /** * Performs an initialization check on its Class object input. */ -class HClinitCheck FINAL : public HExpression<1> { +class HClinitCheck final : public HExpression<1> { public: HClinitCheck(HLoadClass* constant, uint32_t dex_pc) : HExpression( @@ -6412,19 +6745,18 @@ class HClinitCheck FINAL : public HExpression<1> { dex_pc) { SetRawInputAt(0, constant); } - - bool IsClonable() const OVERRIDE { return true; } - bool CanBeMoved() const OVERRIDE { return true; } - bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { + // TODO: Make ClinitCheck clonable. + bool CanBeMoved() const override { return true; } + bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override { return true; } - bool NeedsEnvironment() const OVERRIDE { + bool NeedsEnvironment() const override { // May call runtime to initialize the class. return true; } - bool CanThrow() const OVERRIDE { return true; } + bool CanThrow() const override { return true; } HLoadClass* GetLoadClass() const { DCHECK(InputAt(0)->IsLoadClass()); @@ -6438,7 +6770,7 @@ class HClinitCheck FINAL : public HExpression<1> { DEFAULT_COPY_CONSTRUCTOR(ClinitCheck); }; -class HStaticFieldGet FINAL : public HExpression<1> { +class HStaticFieldGet final : public HExpression<1> { public: HStaticFieldGet(HInstruction* cls, ArtField* field, @@ -6464,15 +6796,15 @@ class HStaticFieldGet FINAL : public HExpression<1> { } - bool IsClonable() const OVERRIDE { return true; } - bool CanBeMoved() const OVERRIDE { return !IsVolatile(); } + bool IsClonable() const override { return true; } + bool CanBeMoved() const override { return !IsVolatile(); } - bool InstructionDataEquals(const HInstruction* other) const OVERRIDE { + bool InstructionDataEquals(const HInstruction* other) const override { const HStaticFieldGet* other_get = other->AsStaticFieldGet(); return GetFieldOffset().SizeValue() == other_get->GetFieldOffset().SizeValue(); } - size_t ComputeHashCode() const OVERRIDE { + size_t ComputeHashCode() const override { return (HInstruction::ComputeHashCode() << 7) | GetFieldOffset().SizeValue(); } @@ -6497,7 +6829,7 @@ class HStaticFieldGet FINAL : public HExpression<1> { const FieldInfo field_info_; }; -class HStaticFieldSet FINAL : public HTemplateInstruction<2> { +class HStaticFieldSet final : public HExpression<2> { public: HStaticFieldSet(HInstruction* cls, HInstruction* value, @@ -6509,9 +6841,9 @@ class HStaticFieldSet FINAL : public HTemplateInstruction<2> { uint16_t declaring_class_def_index, const DexFile& dex_file, uint32_t dex_pc) - : HTemplateInstruction(kStaticFieldSet, - SideEffects::FieldWriteOfType(field_type, is_volatile), - dex_pc), + : HExpression(kStaticFieldSet, + SideEffects::FieldWriteOfType(field_type, is_volatile), + dex_pc), field_info_(field, field_offset, field_type, @@ -6524,7 +6856,7 @@ class HStaticFieldSet FINAL : public HTemplateInstruction<2> { SetRawInputAt(1, value); } - bool IsClonable() const OVERRIDE { return true; } + bool IsClonable() const override { return true; } const FieldInfo& GetFieldInfo() const { return field_info_; } MemberOffset GetFieldOffset() const { return field_info_.GetFieldOffset(); } DataType::Type GetFieldType() const { return field_info_.GetFieldType(); } @@ -6548,7 +6880,7 @@ class HStaticFieldSet FINAL : public HTemplateInstruction<2> { const FieldInfo field_info_; }; -class HUnresolvedInstanceFieldGet FINAL : public HExpression<1> { +class HUnresolvedInstanceFieldGet final : public HExpression<1> { public: HUnresolvedInstanceFieldGet(HInstruction* obj, DataType::Type field_type, @@ -6562,9 +6894,9 @@ class HUnresolvedInstanceFieldGet FINAL : public HExpression<1> { SetRawInputAt(0, obj); } - bool IsClonable() const OVERRIDE { return true; } - bool NeedsEnvironment() const OVERRIDE { return true; } - bool CanThrow() const OVERRIDE { return true; } + bool IsClonable() const override { return true; } + bool NeedsEnvironment() const override { return true; } + bool CanThrow() const override { return true; } DataType::Type GetFieldType() const { return GetType(); } uint32_t GetFieldIndex() const { return field_index_; } @@ -6578,16 +6910,14 @@ class HUnresolvedInstanceFieldGet FINAL : public HExpression<1> { const uint32_t field_index_; }; -class HUnresolvedInstanceFieldSet FINAL : public HTemplateInstruction<2> { +class HUnresolvedInstanceFieldSet final : public HExpression<2> { public: HUnresolvedInstanceFieldSet(HInstruction* obj, HInstruction* value, DataType::Type field_type, uint32_t field_index, uint32_t dex_pc) - : HTemplateInstruction(kUnresolvedInstanceFieldSet, - SideEffects::AllExceptGCDependency(), - dex_pc), + : HExpression(kUnresolvedInstanceFieldSet, SideEffects::AllExceptGCDependency(), dex_pc), field_index_(field_index) { SetPackedField<FieldTypeField>(field_type); DCHECK_EQ(DataType::Kind(field_type), DataType::Kind(value->GetType())); @@ -6595,9 +6925,9 @@ class HUnresolvedInstanceFieldSet FINAL : public HTemplateInstruction<2> { SetRawInputAt(1, value); } - bool IsClonable() const OVERRIDE { return true; } - bool NeedsEnvironment() const OVERRIDE { return true; } - bool CanThrow() const OVERRIDE { return true; } + bool IsClonable() const override { return true; } + bool NeedsEnvironment() const override { return true; } + bool CanThrow() const override { return true; } DataType::Type GetFieldType() const { return GetPackedField<FieldTypeField>(); } uint32_t GetFieldIndex() const { return field_index_; } @@ -6620,7 +6950,7 @@ class HUnresolvedInstanceFieldSet FINAL : public HTemplateInstruction<2> { const uint32_t field_index_; }; -class HUnresolvedStaticFieldGet FINAL : public HExpression<0> { +class HUnresolvedStaticFieldGet final : public HExpression<0> { public: HUnresolvedStaticFieldGet(DataType::Type field_type, uint32_t field_index, @@ -6632,9 +6962,9 @@ class HUnresolvedStaticFieldGet FINAL : public HExpression<0> { field_index_(field_index) { } - bool IsClonable() const OVERRIDE { return true; } - bool NeedsEnvironment() const OVERRIDE { return true; } - bool CanThrow() const OVERRIDE { return true; } + bool IsClonable() const override { return true; } + bool NeedsEnvironment() const override { return true; } + bool CanThrow() const override { return true; } DataType::Type GetFieldType() const { return GetType(); } uint32_t GetFieldIndex() const { return field_index_; } @@ -6648,24 +6978,22 @@ class HUnresolvedStaticFieldGet FINAL : public HExpression<0> { const uint32_t field_index_; }; -class HUnresolvedStaticFieldSet FINAL : public HTemplateInstruction<1> { +class HUnresolvedStaticFieldSet final : public HExpression<1> { public: HUnresolvedStaticFieldSet(HInstruction* value, DataType::Type field_type, uint32_t field_index, uint32_t dex_pc) - : HTemplateInstruction(kUnresolvedStaticFieldSet, - SideEffects::AllExceptGCDependency(), - dex_pc), + : HExpression(kUnresolvedStaticFieldSet, SideEffects::AllExceptGCDependency(), dex_pc), field_index_(field_index) { SetPackedField<FieldTypeField>(field_type); DCHECK_EQ(DataType::Kind(field_type), DataType::Kind(value->GetType())); SetRawInputAt(0, value); } - bool IsClonable() const OVERRIDE { return true; } - bool NeedsEnvironment() const OVERRIDE { return true; } - bool CanThrow() const OVERRIDE { return true; } + bool IsClonable() const override { return true; } + bool NeedsEnvironment() const override { return true; } + bool CanThrow() const override { return true; } DataType::Type GetFieldType() const { return GetPackedField<FieldTypeField>(); } uint32_t GetFieldIndex() const { return field_index_; } @@ -6689,13 +7017,13 @@ class HUnresolvedStaticFieldSet FINAL : public HTemplateInstruction<1> { }; // Implement the move-exception DEX instruction. -class HLoadException FINAL : public HExpression<0> { +class HLoadException final : public HExpression<0> { public: explicit HLoadException(uint32_t dex_pc = kNoDexPc) : HExpression(kLoadException, DataType::Type::kReference, SideEffects::None(), dex_pc) { } - bool CanBeNull() const OVERRIDE { return false; } + bool CanBeNull() const override { return false; } DECLARE_INSTRUCTION(LoadException); @@ -6705,10 +7033,10 @@ class HLoadException FINAL : public HExpression<0> { // Implicit part of move-exception which clears thread-local exception storage. // Must not be removed because the runtime expects the TLS to get cleared. -class HClearException FINAL : public HTemplateInstruction<0> { +class HClearException final : public HExpression<0> { public: explicit HClearException(uint32_t dex_pc = kNoDexPc) - : HTemplateInstruction(kClearException, SideEffects::AllWrites(), dex_pc) { + : HExpression(kClearException, SideEffects::AllWrites(), dex_pc) { } DECLARE_INSTRUCTION(ClearException); @@ -6717,20 +7045,20 @@ class HClearException FINAL : public HTemplateInstruction<0> { DEFAULT_COPY_CONSTRUCTOR(ClearException); }; -class HThrow FINAL : public HTemplateInstruction<1> { +class HThrow final : public HExpression<1> { public: HThrow(HInstruction* exception, uint32_t dex_pc) - : HTemplateInstruction(kThrow, SideEffects::CanTriggerGC(), dex_pc) { + : HExpression(kThrow, SideEffects::CanTriggerGC(), dex_pc) { SetRawInputAt(0, exception); } - bool IsControlFlow() const OVERRIDE { return true; } + bool IsControlFlow() const override { return true; } - bool NeedsEnvironment() const OVERRIDE { return true; } + bool NeedsEnvironment() const override { return true; } - bool CanThrow() const OVERRIDE { return true; } + bool CanThrow() const override { return true; } - bool AlwaysThrows() const OVERRIDE { return true; } + bool AlwaysThrows() const override { return true; } DECLARE_INSTRUCTION(Throw); @@ -6750,75 +7078,165 @@ enum class TypeCheckKind { kInterfaceCheck, // No optimization yet when checking against an interface. kArrayObjectCheck, // Can just check if the array is not primitive. kArrayCheck, // No optimization yet when checking against a generic array. + kBitstringCheck, // Compare the type check bitstring. kLast = kArrayCheck }; std::ostream& operator<<(std::ostream& os, TypeCheckKind rhs); -class HInstanceOf FINAL : public HExpression<2> { +// Note: HTypeCheckInstruction is just a helper class, not an abstract instruction with an +// `IsTypeCheckInstruction()`. (New virtual methods in the HInstruction class have a high cost.) +class HTypeCheckInstruction : public HVariableInputSizeInstruction { public: - HInstanceOf(HInstruction* object, - HLoadClass* target_class, - TypeCheckKind check_kind, - uint32_t dex_pc) - : HExpression(kInstanceOf, - DataType::Type::kBool, - SideEffectsForArchRuntimeCalls(check_kind), - dex_pc) { + HTypeCheckInstruction(InstructionKind kind, + DataType::Type type, + HInstruction* object, + HInstruction* target_class_or_null, + TypeCheckKind check_kind, + Handle<mirror::Class> klass, + uint32_t dex_pc, + ArenaAllocator* allocator, + HIntConstant* bitstring_path_to_root, + HIntConstant* bitstring_mask, + SideEffects side_effects) + : HVariableInputSizeInstruction( + kind, + type, + side_effects, + dex_pc, + allocator, + /* number_of_inputs= */ check_kind == TypeCheckKind::kBitstringCheck ? 4u : 2u, + kArenaAllocTypeCheckInputs), + klass_(klass) { SetPackedField<TypeCheckKindField>(check_kind); SetPackedFlag<kFlagMustDoNullCheck>(true); + SetPackedFlag<kFlagValidTargetClassRTI>(false); SetRawInputAt(0, object); - SetRawInputAt(1, target_class); + SetRawInputAt(1, target_class_or_null); + DCHECK_EQ(check_kind == TypeCheckKind::kBitstringCheck, bitstring_path_to_root != nullptr); + DCHECK_EQ(check_kind == TypeCheckKind::kBitstringCheck, bitstring_mask != nullptr); + if (check_kind == TypeCheckKind::kBitstringCheck) { + DCHECK(target_class_or_null->IsNullConstant()); + SetRawInputAt(2, bitstring_path_to_root); + SetRawInputAt(3, bitstring_mask); + } else { + DCHECK(target_class_or_null->IsLoadClass()); + } } HLoadClass* GetTargetClass() const { + DCHECK_NE(GetTypeCheckKind(), TypeCheckKind::kBitstringCheck); HInstruction* load_class = InputAt(1); DCHECK(load_class->IsLoadClass()); return load_class->AsLoadClass(); } - bool IsClonable() const OVERRIDE { return true; } - bool CanBeMoved() const OVERRIDE { return true; } + uint32_t GetBitstringPathToRoot() const { + DCHECK_EQ(GetTypeCheckKind(), TypeCheckKind::kBitstringCheck); + HInstruction* path_to_root = InputAt(2); + DCHECK(path_to_root->IsIntConstant()); + return static_cast<uint32_t>(path_to_root->AsIntConstant()->GetValue()); + } - bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { - return true; + uint32_t GetBitstringMask() const { + DCHECK_EQ(GetTypeCheckKind(), TypeCheckKind::kBitstringCheck); + HInstruction* mask = InputAt(3); + DCHECK(mask->IsIntConstant()); + return static_cast<uint32_t>(mask->AsIntConstant()->GetValue()); } - bool NeedsEnvironment() const OVERRIDE { - return CanCallRuntime(GetTypeCheckKind()); + bool IsClonable() const override { return true; } + bool CanBeMoved() const override { return true; } + + bool InstructionDataEquals(const HInstruction* other) const override { + DCHECK(other->IsInstanceOf() || other->IsCheckCast()) << other->DebugName(); + return GetPackedFields() == down_cast<const HTypeCheckInstruction*>(other)->GetPackedFields(); } - // Used only in code generation. bool MustDoNullCheck() const { return GetPackedFlag<kFlagMustDoNullCheck>(); } void ClearMustDoNullCheck() { SetPackedFlag<kFlagMustDoNullCheck>(false); } TypeCheckKind GetTypeCheckKind() const { return GetPackedField<TypeCheckKindField>(); } bool IsExactCheck() const { return GetTypeCheckKind() == TypeCheckKind::kExactCheck; } - static bool CanCallRuntime(TypeCheckKind check_kind) { - // Mips currently does runtime calls for any other checks. - return check_kind != TypeCheckKind::kExactCheck; + ReferenceTypeInfo GetTargetClassRTI() { + if (GetPackedFlag<kFlagValidTargetClassRTI>()) { + // Note: The is_exact flag from the return value should not be used. + return ReferenceTypeInfo::CreateUnchecked(klass_, /* is_exact= */ true); + } else { + return ReferenceTypeInfo::CreateInvalid(); + } } - static SideEffects SideEffectsForArchRuntimeCalls(TypeCheckKind check_kind) { - return CanCallRuntime(check_kind) ? SideEffects::CanTriggerGC() : SideEffects::None(); + // Target class RTI is marked as valid by RTP if the klass_ is admissible. + void SetValidTargetClassRTI() REQUIRES_SHARED(Locks::mutator_lock_) { + DCHECK(klass_ != nullptr); + SetPackedFlag<kFlagValidTargetClassRTI>(true); } - DECLARE_INSTRUCTION(InstanceOf); + Handle<mirror::Class> GetClass() const { + return klass_; + } protected: - DEFAULT_COPY_CONSTRUCTOR(InstanceOf); + DEFAULT_COPY_CONSTRUCTOR(TypeCheckInstruction); private: - static constexpr size_t kFieldTypeCheckKind = kNumberOfExpressionPackedBits; + static constexpr size_t kFieldTypeCheckKind = kNumberOfGenericPackedBits; static constexpr size_t kFieldTypeCheckKindSize = MinimumBitsToStore(static_cast<size_t>(TypeCheckKind::kLast)); static constexpr size_t kFlagMustDoNullCheck = kFieldTypeCheckKind + kFieldTypeCheckKindSize; - static constexpr size_t kNumberOfInstanceOfPackedBits = kFlagMustDoNullCheck + 1; + static constexpr size_t kFlagValidTargetClassRTI = kFlagMustDoNullCheck + 1; + static constexpr size_t kNumberOfInstanceOfPackedBits = kFlagValidTargetClassRTI + 1; static_assert(kNumberOfInstanceOfPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields."); using TypeCheckKindField = BitField<TypeCheckKind, kFieldTypeCheckKind, kFieldTypeCheckKindSize>; + + Handle<mirror::Class> klass_; }; -class HBoundType FINAL : public HExpression<1> { +class HInstanceOf final : public HTypeCheckInstruction { + public: + HInstanceOf(HInstruction* object, + HInstruction* target_class_or_null, + TypeCheckKind check_kind, + Handle<mirror::Class> klass, + uint32_t dex_pc, + ArenaAllocator* allocator, + HIntConstant* bitstring_path_to_root, + HIntConstant* bitstring_mask) + : HTypeCheckInstruction(kInstanceOf, + DataType::Type::kBool, + object, + target_class_or_null, + check_kind, + klass, + dex_pc, + allocator, + bitstring_path_to_root, + bitstring_mask, + SideEffectsForArchRuntimeCalls(check_kind)) {} + + bool IsClonable() const override { return true; } + + bool NeedsEnvironment() const override { + return CanCallRuntime(GetTypeCheckKind()); + } + + static bool CanCallRuntime(TypeCheckKind check_kind) { + // Mips currently does runtime calls for any other checks. + return check_kind != TypeCheckKind::kExactCheck; + } + + static SideEffects SideEffectsForArchRuntimeCalls(TypeCheckKind check_kind) { + return CanCallRuntime(check_kind) ? SideEffects::CanTriggerGC() : SideEffects::None(); + } + + DECLARE_INSTRUCTION(InstanceOf); + + protected: + DEFAULT_COPY_CONSTRUCTOR(InstanceOf); +}; + +class HBoundType final : public HExpression<1> { public: explicit HBoundType(HInstruction* input, uint32_t dex_pc = kNoDexPc) : HExpression(kBoundType, DataType::Type::kReference, SideEffects::None(), dex_pc), @@ -6829,7 +7247,8 @@ class HBoundType FINAL : public HExpression<1> { SetRawInputAt(0, input); } - bool IsClonable() const OVERRIDE { return true; } + bool InstructionDataEquals(const HInstruction* other) const override; + bool IsClonable() const override { return true; } // {Get,Set}Upper* should only be used in reference type propagation. const ReferenceTypeInfo& GetUpperBound() const { return upper_bound_; } @@ -6841,7 +7260,7 @@ class HBoundType FINAL : public HExpression<1> { SetPackedFlag<kFlagCanBeNull>(can_be_null); } - bool CanBeNull() const OVERRIDE { return GetPackedFlag<kFlagCanBeNull>(); } + bool CanBeNull() const override { return GetPackedFlag<kFlagCanBeNull>(); } DECLARE_INSTRUCTION(BoundType); @@ -6851,7 +7270,7 @@ class HBoundType FINAL : public HExpression<1> { private: // Represents the top constraint that can_be_null_ cannot exceed (i.e. if this // is false then CanBeNull() cannot be true). - static constexpr size_t kFlagUpperCanBeNull = kNumberOfExpressionPackedBits; + static constexpr size_t kFlagUpperCanBeNull = kNumberOfGenericPackedBits; static constexpr size_t kFlagCanBeNull = kFlagUpperCanBeNull + 1; static constexpr size_t kNumberOfBoundTypePackedBits = kFlagCanBeNull + 1; static_assert(kNumberOfBoundTypePackedBits <= kMaxNumberOfPackedBits, "Too many packed fields."); @@ -6865,57 +7284,40 @@ class HBoundType FINAL : public HExpression<1> { ReferenceTypeInfo upper_bound_; }; -class HCheckCast FINAL : public HTemplateInstruction<2> { +class HCheckCast final : public HTypeCheckInstruction { public: HCheckCast(HInstruction* object, - HLoadClass* target_class, + HInstruction* target_class_or_null, TypeCheckKind check_kind, - uint32_t dex_pc) - : HTemplateInstruction(kCheckCast, SideEffects::CanTriggerGC(), dex_pc) { - SetPackedField<TypeCheckKindField>(check_kind); - SetPackedFlag<kFlagMustDoNullCheck>(true); - SetRawInputAt(0, object); - SetRawInputAt(1, target_class); - } - - HLoadClass* GetTargetClass() const { - HInstruction* load_class = InputAt(1); - DCHECK(load_class->IsLoadClass()); - return load_class->AsLoadClass(); - } - - bool IsClonable() const OVERRIDE { return true; } - bool CanBeMoved() const OVERRIDE { return true; } - - bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { - return true; - } - - bool NeedsEnvironment() const OVERRIDE { + Handle<mirror::Class> klass, + uint32_t dex_pc, + ArenaAllocator* allocator, + HIntConstant* bitstring_path_to_root, + HIntConstant* bitstring_mask) + : HTypeCheckInstruction(kCheckCast, + DataType::Type::kVoid, + object, + target_class_or_null, + check_kind, + klass, + dex_pc, + allocator, + bitstring_path_to_root, + bitstring_mask, + SideEffects::CanTriggerGC()) {} + + bool IsClonable() const override { return true; } + bool NeedsEnvironment() const override { // Instruction may throw a CheckCastError. return true; } - bool CanThrow() const OVERRIDE { return true; } - - bool MustDoNullCheck() const { return GetPackedFlag<kFlagMustDoNullCheck>(); } - void ClearMustDoNullCheck() { SetPackedFlag<kFlagMustDoNullCheck>(false); } - TypeCheckKind GetTypeCheckKind() const { return GetPackedField<TypeCheckKindField>(); } - bool IsExactCheck() const { return GetTypeCheckKind() == TypeCheckKind::kExactCheck; } + bool CanThrow() const override { return true; } DECLARE_INSTRUCTION(CheckCast); protected: DEFAULT_COPY_CONSTRUCTOR(CheckCast); - - private: - static constexpr size_t kFieldTypeCheckKind = kNumberOfGenericPackedBits; - static constexpr size_t kFieldTypeCheckKindSize = - MinimumBitsToStore(static_cast<size_t>(TypeCheckKind::kLast)); - static constexpr size_t kFlagMustDoNullCheck = kFieldTypeCheckKind + kFieldTypeCheckKindSize; - static constexpr size_t kNumberOfCheckCastPackedBits = kFlagMustDoNullCheck + 1; - static_assert(kNumberOfCheckCastPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields."); - using TypeCheckKindField = BitField<TypeCheckKind, kFieldTypeCheckKind, kFieldTypeCheckKindSize>; }; /** @@ -6944,17 +7346,16 @@ enum MemBarrierKind { }; std::ostream& operator<<(std::ostream& os, const MemBarrierKind& kind); -class HMemoryBarrier FINAL : public HTemplateInstruction<0> { +class HMemoryBarrier final : public HExpression<0> { public: explicit HMemoryBarrier(MemBarrierKind barrier_kind, uint32_t dex_pc = kNoDexPc) - : HTemplateInstruction( - kMemoryBarrier, - SideEffects::AllWritesAndReads(), // Assume write/read on all fields/arrays. - dex_pc) { + : HExpression(kMemoryBarrier, + SideEffects::AllWritesAndReads(), // Assume write/read on all fields/arrays. + dex_pc) { SetPackedField<BarrierKindField>(barrier_kind); } - bool IsClonable() const OVERRIDE { return true; } + bool IsClonable() const override { return true; } MemBarrierKind GetBarrierKind() { return GetPackedField<BarrierKindField>(); } @@ -7027,10 +7428,10 @@ class HMemoryBarrier FINAL : public HTemplateInstruction<0> { // } // // See also: -// * CompilerDriver::RequiresConstructorBarrier +// * DexCompilationUnit::RequiresConstructorBarrier // * QuasiAtomic::ThreadFenceForConstructor // -class HConstructorFence FINAL : public HVariableInputSizeInstruction { +class HConstructorFence final : public HVariableInputSizeInstruction { // A fence has variable inputs because the inputs can be removed // after prepare_for_register_allocation phase. // (TODO: In the future a fence could freeze multiple objects @@ -7073,7 +7474,7 @@ class HConstructorFence FINAL : public HVariableInputSizeInstruction { SideEffects::AllReads(), dex_pc, allocator, - /* number_of_inputs */ 1, + /* number_of_inputs= */ 1, kArenaAllocConstructorFenceInputs) { DCHECK(fence_object != nullptr); SetRawInputAt(0, fence_object); @@ -7127,7 +7528,7 @@ class HConstructorFence FINAL : public HVariableInputSizeInstruction { DEFAULT_COPY_CONSTRUCTOR(ConstructorFence); }; -class HMonitorOperation FINAL : public HTemplateInstruction<1> { +class HMonitorOperation final : public HExpression<1> { public: enum class OperationKind { kEnter, @@ -7136,18 +7537,17 @@ class HMonitorOperation FINAL : public HTemplateInstruction<1> { }; HMonitorOperation(HInstruction* object, OperationKind kind, uint32_t dex_pc) - : HTemplateInstruction( - kMonitorOperation, - SideEffects::AllExceptGCDependency(), // Assume write/read on all fields/arrays. - dex_pc) { + : HExpression(kMonitorOperation, + SideEffects::AllExceptGCDependency(), // Assume write/read on all fields/arrays. + dex_pc) { SetPackedField<OperationKindField>(kind); SetRawInputAt(0, object); } // Instruction may go into runtime, so we need an environment. - bool NeedsEnvironment() const OVERRIDE { return true; } + bool NeedsEnvironment() const override { return true; } - bool CanThrow() const OVERRIDE { + bool CanThrow() const override { // Verifier guarantees that monitor-exit cannot throw. // This is important because it allows the HGraphBuilder to remove // a dead throw-catch loop generated for `synchronized` blocks/methods. @@ -7173,7 +7573,7 @@ class HMonitorOperation FINAL : public HTemplateInstruction<1> { using OperationKindField = BitField<OperationKind, kFieldOperationKind, kFieldOperationKindSize>; }; -class HSelect FINAL : public HExpression<3> { +class HSelect final : public HExpression<3> { public: HSelect(HInstruction* condition, HInstruction* true_value, @@ -7191,17 +7591,17 @@ class HSelect FINAL : public HExpression<3> { SetRawInputAt(2, condition); } - bool IsClonable() const OVERRIDE { return true; } + bool IsClonable() const override { return true; } HInstruction* GetFalseValue() const { return InputAt(0); } HInstruction* GetTrueValue() const { return InputAt(1); } HInstruction* GetCondition() const { return InputAt(2); } - bool CanBeMoved() const OVERRIDE { return true; } - bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { + bool CanBeMoved() const override { return true; } + bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override { return true; } - bool CanBeNull() const OVERRIDE { + bool CanBeNull() const override { return GetTrueValue()->CanBeNull() || GetFalseValue()->CanBeNull(); } @@ -7289,10 +7689,10 @@ std::ostream& operator<<(std::ostream& os, const MoveOperands& rhs); static constexpr size_t kDefaultNumberOfMoves = 4; -class HParallelMove FINAL : public HTemplateInstruction<0> { +class HParallelMove final : public HExpression<0> { public: explicit HParallelMove(ArenaAllocator* allocator, uint32_t dex_pc = kNoDexPc) - : HTemplateInstruction(kParallelMove, SideEffects::None(), dex_pc), + : HExpression(kParallelMove, SideEffects::None(), dex_pc), moves_(allocator->Adapter(kArenaAllocMoveOperands)) { moves_.reserve(kDefaultNumberOfMoves); } @@ -7351,7 +7751,7 @@ class HParallelMove FINAL : public HTemplateInstruction<0> { // never used across anything that can trigger GC. // The result of this instruction is not a pointer in the sense of `DataType::Type::kreference`. // So we represent it by the type `DataType::Type::kInt`. -class HIntermediateAddress FINAL : public HExpression<2> { +class HIntermediateAddress final : public HExpression<2> { public: HIntermediateAddress(HInstruction* base_address, HInstruction* offset, uint32_t dex_pc) : HExpression(kIntermediateAddress, @@ -7365,12 +7765,12 @@ class HIntermediateAddress FINAL : public HExpression<2> { SetRawInputAt(1, offset); } - bool IsClonable() const OVERRIDE { return true; } - bool CanBeMoved() const OVERRIDE { return true; } - bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { + bool IsClonable() const override { return true; } + bool CanBeMoved() const override { return true; } + bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override { return true; } - bool IsActualObject() const OVERRIDE { return false; } + bool IsActualObject() const override { return false; } HInstruction* GetBaseAddress() const { return InputAt(0); } HInstruction* GetOffset() const { return InputAt(1); } @@ -7392,7 +7792,7 @@ class HIntermediateAddress FINAL : public HExpression<2> { #ifdef ART_ENABLE_CODEGEN_mips #include "nodes_mips.h" #endif -#ifdef ART_ENABLE_CODEGEN_x86 +#if defined(ART_ENABLE_CODEGEN_x86) || defined(ART_ENABLE_CODEGEN_x86_64) #include "nodes_x86.h" #endif @@ -7443,7 +7843,7 @@ class HGraphDelegateVisitor : public HGraphVisitor { // Visit functions that delegate to to super class. #define DECLARE_VISIT_INSTRUCTION(name, super) \ - void Visit##name(H##name* instr) OVERRIDE { Visit##super(instr); } + void Visit##name(H##name* instr) override { Visit##super(instr); } FOR_EACH_INSTRUCTION(DECLARE_VISIT_INSTRUCTION) @@ -7465,7 +7865,7 @@ class CloneAndReplaceInstructionVisitor : public HGraphDelegateVisitor { explicit CloneAndReplaceInstructionVisitor(HGraph* graph) : HGraphDelegateVisitor(graph), instr_replaced_by_clones_count_(0) {} - void VisitInstruction(HInstruction* instruction) OVERRIDE { + void VisitInstruction(HInstruction* instruction) override { if (instruction->IsClonable()) { ReplaceInstrOrPhiByClone(instruction); instr_replaced_by_clones_count_++; @@ -7584,8 +7984,30 @@ inline bool IsZeroBitPattern(HInstruction* instruction) { return instruction->IsConstant() && instruction->AsConstant()->IsZeroBitPattern(); } +// Implement HInstruction::Is##type() for concrete instructions. #define INSTRUCTION_TYPE_CHECK(type, super) \ - inline bool HInstruction::Is##type() const { return GetKind() == k##type; } \ + inline bool HInstruction::Is##type() const { return GetKind() == k##type; } + FOR_EACH_CONCRETE_INSTRUCTION(INSTRUCTION_TYPE_CHECK) +#undef INSTRUCTION_TYPE_CHECK + +// Implement HInstruction::Is##type() for abstract instructions. +#define INSTRUCTION_TYPE_CHECK_RESULT(type, super) \ + std::is_base_of<BaseType, H##type>::value, +#define INSTRUCTION_TYPE_CHECK(type, super) \ + inline bool HInstruction::Is##type() const { \ + DCHECK_LT(GetKind(), kLastInstructionKind); \ + using BaseType = H##type; \ + static constexpr bool results[] = { \ + FOR_EACH_CONCRETE_INSTRUCTION(INSTRUCTION_TYPE_CHECK_RESULT) \ + }; \ + return results[static_cast<size_t>(GetKind())]; \ + } + + FOR_EACH_ABSTRACT_INSTRUCTION(INSTRUCTION_TYPE_CHECK) +#undef INSTRUCTION_TYPE_CHECK +#undef INSTRUCTION_TYPE_CHECK_RESULT + +#define INSTRUCTION_TYPE_CAST(type, super) \ inline const H##type* HInstruction::As##type() const { \ return Is##type() ? down_cast<const H##type*>(this) : nullptr; \ } \ @@ -7593,8 +8015,9 @@ inline bool IsZeroBitPattern(HInstruction* instruction) { return Is##type() ? static_cast<H##type*>(this) : nullptr; \ } - FOR_EACH_CONCRETE_INSTRUCTION(INSTRUCTION_TYPE_CHECK) -#undef INSTRUCTION_TYPE_CHECK + FOR_EACH_INSTRUCTION(INSTRUCTION_TYPE_CAST) +#undef INSTRUCTION_TYPE_CAST + // Create space in `blocks` for adding `number_of_new_blocks` entries // starting at location `at`. Blocks after `at` are moved accordingly. diff --git a/compiler/optimizing/nodes_mips.h b/compiler/optimizing/nodes_mips.h index d0e0fef946..4993f5737e 100644 --- a/compiler/optimizing/nodes_mips.h +++ b/compiler/optimizing/nodes_mips.h @@ -30,7 +30,7 @@ class HMipsComputeBaseMethodAddress : public HExpression<0> { kNoDexPc) { } - bool CanBeMoved() const OVERRIDE { return true; } + bool CanBeMoved() const override { return true; } DECLARE_INSTRUCTION(MipsComputeBaseMethodAddress); @@ -39,21 +39,21 @@ class HMipsComputeBaseMethodAddress : public HExpression<0> { }; // Mips version of HPackedSwitch that holds a pointer to the base method address. -class HMipsPackedSwitch FINAL : public HTemplateInstruction<2> { +class HMipsPackedSwitch final : public HExpression<2> { public: HMipsPackedSwitch(int32_t start_value, int32_t num_entries, HInstruction* input, HMipsComputeBaseMethodAddress* method_base, uint32_t dex_pc) - : HTemplateInstruction(kMipsPackedSwitch, SideEffects::None(), dex_pc), + : HExpression(kMipsPackedSwitch, SideEffects::None(), dex_pc), start_value_(start_value), num_entries_(num_entries) { SetRawInputAt(0, input); SetRawInputAt(1, method_base); } - bool IsControlFlow() const OVERRIDE { return true; } + bool IsControlFlow() const override { return true; } int32_t GetStartValue() const { return start_value_; } @@ -91,7 +91,7 @@ class HMipsPackedSwitch FINAL : public HTemplateInstruction<2> { // // Note: as the instruction doesn't involve base array address into computations it has no side // effects. -class HIntermediateArrayAddressIndex FINAL : public HExpression<2> { +class HIntermediateArrayAddressIndex final : public HExpression<2> { public: HIntermediateArrayAddressIndex(HInstruction* index, HInstruction* shift, uint32_t dex_pc) : HExpression(kIntermediateArrayAddressIndex, @@ -102,11 +102,11 @@ class HIntermediateArrayAddressIndex FINAL : public HExpression<2> { SetRawInputAt(1, shift); } - bool CanBeMoved() const OVERRIDE { return true; } - bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { + bool CanBeMoved() const override { return true; } + bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override { return true; } - bool IsActualObject() const OVERRIDE { return false; } + bool IsActualObject() const override { return false; } HInstruction* GetIndex() const { return InputAt(0); } HInstruction* GetShift() const { return InputAt(1); } diff --git a/compiler/optimizing/nodes_shared.h b/compiler/optimizing/nodes_shared.h index 29358e1141..7dcac1787e 100644 --- a/compiler/optimizing/nodes_shared.h +++ b/compiler/optimizing/nodes_shared.h @@ -24,7 +24,7 @@ namespace art { -class HMultiplyAccumulate FINAL : public HExpression<3> { +class HMultiplyAccumulate final : public HExpression<3> { public: HMultiplyAccumulate(DataType::Type type, InstructionKind op, @@ -39,14 +39,14 @@ class HMultiplyAccumulate FINAL : public HExpression<3> { SetRawInputAt(kInputMulRightIndex, mul_right); } - bool IsClonable() const OVERRIDE { return true; } + bool IsClonable() const override { return true; } static constexpr int kInputAccumulatorIndex = 0; static constexpr int kInputMulLeftIndex = 1; static constexpr int kInputMulRightIndex = 2; - bool CanBeMoved() const OVERRIDE { return true; } - bool InstructionDataEquals(const HInstruction* other) const OVERRIDE { + bool CanBeMoved() const override { return true; } + bool InstructionDataEquals(const HInstruction* other) const override { return op_kind_ == other->AsMultiplyAccumulate()->op_kind_; } @@ -62,7 +62,7 @@ class HMultiplyAccumulate FINAL : public HExpression<3> { const InstructionKind op_kind_; }; -class HBitwiseNegatedRight FINAL : public HBinaryOperation { +class HBitwiseNegatedRight final : public HBinaryOperation { public: HBitwiseNegatedRight(DataType::Type result_type, InstructionKind op, @@ -97,21 +97,21 @@ class HBitwiseNegatedRight FINAL : public HBinaryOperation { } } - HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { + HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const override { return GetBlock()->GetGraph()->GetIntConstant( Compute(x->GetValue(), y->GetValue()), GetDexPc()); } - HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE { + HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const override { return GetBlock()->GetGraph()->GetLongConstant( Compute(x->GetValue(), y->GetValue()), GetDexPc()); } HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED, - HFloatConstant* y ATTRIBUTE_UNUSED) const OVERRIDE { + HFloatConstant* y ATTRIBUTE_UNUSED) const override { LOG(FATAL) << DebugName() << " is not defined for float values"; UNREACHABLE(); } HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED, - HDoubleConstant* y ATTRIBUTE_UNUSED) const OVERRIDE { + HDoubleConstant* y ATTRIBUTE_UNUSED) const override { LOG(FATAL) << DebugName() << " is not defined for double values"; UNREACHABLE(); } @@ -145,7 +145,7 @@ class HBitwiseNegatedRight FINAL : public HBinaryOperation { // // Note: as the instruction doesn't involve base array address into computations it has no side // effects (in comparison of HIntermediateAddress). -class HIntermediateAddressIndex FINAL : public HExpression<3> { +class HIntermediateAddressIndex final : public HExpression<3> { public: HIntermediateAddressIndex( HInstruction* index, HInstruction* offset, HInstruction* shift, uint32_t dex_pc) @@ -158,12 +158,12 @@ class HIntermediateAddressIndex FINAL : public HExpression<3> { SetRawInputAt(2, shift); } - bool IsClonable() const OVERRIDE { return true; } - bool CanBeMoved() const OVERRIDE { return true; } - bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { + bool IsClonable() const override { return true; } + bool CanBeMoved() const override { return true; } + bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override { return true; } - bool IsActualObject() const OVERRIDE { return false; } + bool IsActualObject() const override { return false; } HInstruction* GetIndex() const { return InputAt(0); } HInstruction* GetOffset() const { return InputAt(1); } @@ -175,7 +175,7 @@ class HIntermediateAddressIndex FINAL : public HExpression<3> { DEFAULT_COPY_CONSTRUCTOR(IntermediateAddressIndex); }; -class HDataProcWithShifterOp FINAL : public HExpression<2> { +class HDataProcWithShifterOp final : public HExpression<2> { public: enum OpKind { kLSL, // Logical shift left. @@ -212,9 +212,9 @@ class HDataProcWithShifterOp FINAL : public HExpression<2> { SetRawInputAt(1, right); } - bool IsClonable() const OVERRIDE { return true; } - bool CanBeMoved() const OVERRIDE { return true; } - bool InstructionDataEquals(const HInstruction* other_instr) const OVERRIDE { + bool IsClonable() const override { return true; } + bool CanBeMoved() const override { return true; } + bool InstructionDataEquals(const HInstruction* other_instr) const override { const HDataProcWithShifterOp* other = other_instr->AsDataProcWithShifterOp(); return instr_kind_ == other->instr_kind_ && op_kind_ == other->op_kind_ && diff --git a/compiler/optimizing/nodes_vector.h b/compiler/optimizing/nodes_vector.h index 20f6cf01ed..efe4d6b000 100644 --- a/compiler/optimizing/nodes_vector.h +++ b/compiler/optimizing/nodes_vector.h @@ -79,13 +79,14 @@ class HVecOperation : public HVariableInputSizeInstruction { size_t vector_length, uint32_t dex_pc) : HVariableInputSizeInstruction(kind, + kSIMDType, side_effects, dex_pc, allocator, number_of_inputs, kArenaAllocVectorNode), vector_length_(vector_length) { - SetPackedField<TypeField>(packed_type); + SetPackedField<PackedTypeField>(packed_type); DCHECK_LT(1u, vector_length); } @@ -99,14 +100,9 @@ class HVecOperation : public HVariableInputSizeInstruction { return vector_length_ * DataType::Size(GetPackedType()); } - // Returns the type of the vector operation. - DataType::Type GetType() const OVERRIDE { - return kSIMDType; - } - // Returns the true component type packed in a vector. DataType::Type GetPackedType() const { - return GetPackedField<TypeField>(); + return GetPackedField<PackedTypeField>(); } // Assumes vector nodes cannot be moved by default. Each concrete implementation @@ -121,12 +117,12 @@ class HVecOperation : public HVariableInputSizeInstruction { // Note: For newly introduced vector instructions HScheduler${ARCH}::IsSchedulingBarrier must be // altered to return true if the instruction might reside outside the SIMD loop body since SIMD // registers are not kept alive across vector loop boundaries (yet). - bool CanBeMoved() const OVERRIDE { return false; } + bool CanBeMoved() const override { return false; } // Tests if all data of a vector node (vector length and packed type) is equal. // Each concrete implementation that adds more fields should test equality of // those fields in its own method *and* call all super methods. - bool InstructionDataEquals(const HInstruction* other) const OVERRIDE { + bool InstructionDataEquals(const HInstruction* other) const override { DCHECK(other->IsVecOperation()); const HVecOperation* o = other->AsVecOperation(); return GetVectorLength() == o->GetVectorLength() && GetPackedType() == o->GetPackedType(); @@ -185,12 +181,12 @@ class HVecOperation : public HVariableInputSizeInstruction { protected: // Additional packed bits. - static constexpr size_t kFieldType = HInstruction::kNumberOfGenericPackedBits; - static constexpr size_t kFieldTypeSize = + static constexpr size_t kFieldPackedType = HInstruction::kNumberOfGenericPackedBits; + static constexpr size_t kFieldPackedTypeSize = MinimumBitsToStore(static_cast<size_t>(DataType::Type::kLast)); - static constexpr size_t kNumberOfVectorOpPackedBits = kFieldType + kFieldTypeSize; + static constexpr size_t kNumberOfVectorOpPackedBits = kFieldPackedType + kFieldPackedTypeSize; static_assert(kNumberOfVectorOpPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields."); - using TypeField = BitField<DataType::Type, kFieldType, kFieldTypeSize>; + using PackedTypeField = BitField<DataType::Type, kFieldPackedType, kFieldPackedTypeSize>; DEFAULT_COPY_CONSTRUCTOR(VecOperation); @@ -211,7 +207,7 @@ class HVecUnaryOperation : public HVecOperation { allocator, packed_type, SideEffects::None(), - /* number_of_inputs */ 1, + /* number_of_inputs= */ 1, vector_length, dex_pc) { SetRawInputAt(0, input); @@ -239,7 +235,7 @@ class HVecBinaryOperation : public HVecOperation { allocator, packed_type, SideEffects::None(), - /* number_of_inputs */ 2, + /* number_of_inputs= */ 2, vector_length, dex_pc) { SetRawInputAt(0, left); @@ -284,7 +280,7 @@ class HVecMemoryOperation : public HVecOperation { HInstruction* GetArray() const { return InputAt(0); } HInstruction* GetIndex() const { return InputAt(1); } - bool InstructionDataEquals(const HInstruction* other) const OVERRIDE { + bool InstructionDataEquals(const HInstruction* other) const override { DCHECK(other->IsVecMemoryOperation()); const HVecMemoryOperation* o = other->AsVecMemoryOperation(); return HVecOperation::InstructionDataEquals(o) && GetAlignment() == o->GetAlignment(); @@ -319,7 +315,7 @@ inline static bool HasConsistentPackedTypes(HInstruction* input, DataType::Type // Replicates the given scalar into a vector, // viz. replicate(x) = [ x, .. , x ]. -class HVecReplicateScalar FINAL : public HVecUnaryOperation { +class HVecReplicateScalar final : public HVecUnaryOperation { public: HVecReplicateScalar(ArenaAllocator* allocator, HInstruction* scalar, @@ -333,7 +329,7 @@ class HVecReplicateScalar FINAL : public HVecUnaryOperation { // A replicate needs to stay in place, since SIMD registers are not // kept alive across vector loop boundaries (yet). - bool CanBeMoved() const OVERRIDE { return false; } + bool CanBeMoved() const override { return false; } DECLARE_INSTRUCTION(VecReplicateScalar); @@ -345,7 +341,7 @@ class HVecReplicateScalar FINAL : public HVecUnaryOperation { // viz. extract[ x1, .. , xn ] = x_i. // // TODO: for now only i == 1 case supported. -class HVecExtractScalar FINAL : public HVecUnaryOperation { +class HVecExtractScalar final : public HVecUnaryOperation { public: HVecExtractScalar(ArenaAllocator* allocator, HInstruction* input, @@ -358,16 +354,14 @@ class HVecExtractScalar FINAL : public HVecUnaryOperation { DCHECK(HasConsistentPackedTypes(input, packed_type)); DCHECK_LT(index, vector_length); DCHECK_EQ(index, 0u); - } - - // Yields a single component in the vector. - DataType::Type GetType() const OVERRIDE { - return GetPackedType(); + // Yields a single component in the vector. + // Overrides the kSIMDType set by the VecOperation constructor. + SetPackedField<TypeField>(packed_type); } // An extract needs to stay in place, since SIMD registers are not // kept alive across vector loop boundaries (yet). - bool CanBeMoved() const OVERRIDE { return false; } + bool CanBeMoved() const override { return false; } DECLARE_INSTRUCTION(VecExtractScalar); @@ -378,7 +372,7 @@ class HVecExtractScalar FINAL : public HVecUnaryOperation { // Reduces the given vector into the first element as sum/min/max, // viz. sum-reduce[ x1, .. , xn ] = [ y, ---- ], where y = sum xi // and the "-" denotes "don't care" (implementation dependent). -class HVecReduce FINAL : public HVecUnaryOperation { +class HVecReduce final : public HVecUnaryOperation { public: enum ReductionKind { kSum = 1, @@ -390,21 +384,21 @@ class HVecReduce FINAL : public HVecUnaryOperation { HInstruction* input, DataType::Type packed_type, size_t vector_length, - ReductionKind kind, + ReductionKind reduction_kind, uint32_t dex_pc) : HVecUnaryOperation(kVecReduce, allocator, input, packed_type, vector_length, dex_pc), - kind_(kind) { + reduction_kind_(reduction_kind) { DCHECK(HasConsistentPackedTypes(input, packed_type)); } - ReductionKind GetKind() const { return kind_; } + ReductionKind GetReductionKind() const { return reduction_kind_; } - bool CanBeMoved() const OVERRIDE { return true; } + bool CanBeMoved() const override { return true; } - bool InstructionDataEquals(const HInstruction* other) const OVERRIDE { + bool InstructionDataEquals(const HInstruction* other) const override { DCHECK(other->IsVecReduce()); const HVecReduce* o = other->AsVecReduce(); - return HVecOperation::InstructionDataEquals(o) && GetKind() == o->GetKind(); + return HVecOperation::InstructionDataEquals(o) && GetReductionKind() == o->GetReductionKind(); } DECLARE_INSTRUCTION(VecReduce); @@ -413,12 +407,12 @@ class HVecReduce FINAL : public HVecUnaryOperation { DEFAULT_COPY_CONSTRUCTOR(VecReduce); private: - const ReductionKind kind_; + const ReductionKind reduction_kind_; }; // Converts every component in the vector, // viz. cnv[ x1, .. , xn ] = [ cnv(x1), .. , cnv(xn) ]. -class HVecCnv FINAL : public HVecUnaryOperation { +class HVecCnv final : public HVecUnaryOperation { public: HVecCnv(ArenaAllocator* allocator, HInstruction* input, @@ -433,7 +427,7 @@ class HVecCnv FINAL : public HVecUnaryOperation { DataType::Type GetInputType() const { return InputAt(0)->AsVecOperation()->GetPackedType(); } DataType::Type GetResultType() const { return GetPackedType(); } - bool CanBeMoved() const OVERRIDE { return true; } + bool CanBeMoved() const override { return true; } DECLARE_INSTRUCTION(VecCnv); @@ -443,7 +437,7 @@ class HVecCnv FINAL : public HVecUnaryOperation { // Negates every component in the vector, // viz. neg[ x1, .. , xn ] = [ -x1, .. , -xn ]. -class HVecNeg FINAL : public HVecUnaryOperation { +class HVecNeg final : public HVecUnaryOperation { public: HVecNeg(ArenaAllocator* allocator, HInstruction* input, @@ -454,7 +448,7 @@ class HVecNeg FINAL : public HVecUnaryOperation { DCHECK(HasConsistentPackedTypes(input, packed_type)); } - bool CanBeMoved() const OVERRIDE { return true; } + bool CanBeMoved() const override { return true; } DECLARE_INSTRUCTION(VecNeg); @@ -465,7 +459,7 @@ class HVecNeg FINAL : public HVecUnaryOperation { // Takes absolute value of every component in the vector, // viz. abs[ x1, .. , xn ] = [ |x1|, .. , |xn| ] // for signed operand x. -class HVecAbs FINAL : public HVecUnaryOperation { +class HVecAbs final : public HVecUnaryOperation { public: HVecAbs(ArenaAllocator* allocator, HInstruction* input, @@ -476,7 +470,7 @@ class HVecAbs FINAL : public HVecUnaryOperation { DCHECK(HasConsistentPackedTypes(input, packed_type)); } - bool CanBeMoved() const OVERRIDE { return true; } + bool CanBeMoved() const override { return true; } DECLARE_INSTRUCTION(VecAbs); @@ -487,7 +481,7 @@ class HVecAbs FINAL : public HVecUnaryOperation { // Bitwise- or boolean-nots every component in the vector, // viz. not[ x1, .. , xn ] = [ ~x1, .. , ~xn ], or // not[ x1, .. , xn ] = [ !x1, .. , !xn ] for boolean. -class HVecNot FINAL : public HVecUnaryOperation { +class HVecNot final : public HVecUnaryOperation { public: HVecNot(ArenaAllocator* allocator, HInstruction* input, @@ -498,7 +492,7 @@ class HVecNot FINAL : public HVecUnaryOperation { DCHECK(input->IsVecOperation()); } - bool CanBeMoved() const OVERRIDE { return true; } + bool CanBeMoved() const override { return true; } DECLARE_INSTRUCTION(VecNot); @@ -512,7 +506,7 @@ class HVecNot FINAL : public HVecUnaryOperation { // Adds every component in the two vectors, // viz. [ x1, .. , xn ] + [ y1, .. , yn ] = [ x1 + y1, .. , xn + yn ]. -class HVecAdd FINAL : public HVecBinaryOperation { +class HVecAdd final : public HVecBinaryOperation { public: HVecAdd(ArenaAllocator* allocator, HInstruction* left, @@ -525,7 +519,7 @@ class HVecAdd FINAL : public HVecBinaryOperation { DCHECK(HasConsistentPackedTypes(right, packed_type)); } - bool CanBeMoved() const OVERRIDE { return true; } + bool CanBeMoved() const override { return true; } DECLARE_INSTRUCTION(VecAdd); @@ -533,11 +527,36 @@ class HVecAdd FINAL : public HVecBinaryOperation { DEFAULT_COPY_CONSTRUCTOR(VecAdd); }; +// Adds every component in the two vectors using saturation arithmetic, +// viz. [ x1, .. , xn ] + [ y1, .. , yn ] = [ x1 +_sat y1, .. , xn +_sat yn ] +// for either both signed or both unsigned operands x, y (reflected in packed_type). +class HVecSaturationAdd final : public HVecBinaryOperation { + public: + HVecSaturationAdd(ArenaAllocator* allocator, + HInstruction* left, + HInstruction* right, + DataType::Type packed_type, + size_t vector_length, + uint32_t dex_pc) + : HVecBinaryOperation( + kVecSaturationAdd, allocator, left, right, packed_type, vector_length, dex_pc) { + DCHECK(HasConsistentPackedTypes(left, packed_type)); + DCHECK(HasConsistentPackedTypes(right, packed_type)); + } + + bool CanBeMoved() const override { return true; } + + DECLARE_INSTRUCTION(VecSaturationAdd); + + protected: + DEFAULT_COPY_CONSTRUCTOR(VecSaturationAdd); +}; + // Performs halving add on every component in the two vectors, viz. // rounded [ x1, .. , xn ] hradd [ y1, .. , yn ] = [ (x1 + y1 + 1) >> 1, .. , (xn + yn + 1) >> 1 ] // truncated [ x1, .. , xn ] hadd [ y1, .. , yn ] = [ (x1 + y1) >> 1, .. , (xn + yn ) >> 1 ] // for either both signed or both unsigned operands x, y (reflected in packed_type). -class HVecHalvingAdd FINAL : public HVecBinaryOperation { +class HVecHalvingAdd final : public HVecBinaryOperation { public: HVecHalvingAdd(ArenaAllocator* allocator, HInstruction* left, @@ -555,9 +574,9 @@ class HVecHalvingAdd FINAL : public HVecBinaryOperation { bool IsRounded() const { return GetPackedFlag<kFieldHAddIsRounded>(); } - bool CanBeMoved() const OVERRIDE { return true; } + bool CanBeMoved() const override { return true; } - bool InstructionDataEquals(const HInstruction* other) const OVERRIDE { + bool InstructionDataEquals(const HInstruction* other) const override { DCHECK(other->IsVecHalvingAdd()); const HVecHalvingAdd* o = other->AsVecHalvingAdd(); return HVecOperation::InstructionDataEquals(o) && IsRounded() == o->IsRounded(); @@ -577,7 +596,7 @@ class HVecHalvingAdd FINAL : public HVecBinaryOperation { // Subtracts every component in the two vectors, // viz. [ x1, .. , xn ] - [ y1, .. , yn ] = [ x1 - y1, .. , xn - yn ]. -class HVecSub FINAL : public HVecBinaryOperation { +class HVecSub final : public HVecBinaryOperation { public: HVecSub(ArenaAllocator* allocator, HInstruction* left, @@ -590,7 +609,7 @@ class HVecSub FINAL : public HVecBinaryOperation { DCHECK(HasConsistentPackedTypes(right, packed_type)); } - bool CanBeMoved() const OVERRIDE { return true; } + bool CanBeMoved() const override { return true; } DECLARE_INSTRUCTION(VecSub); @@ -598,9 +617,34 @@ class HVecSub FINAL : public HVecBinaryOperation { DEFAULT_COPY_CONSTRUCTOR(VecSub); }; +// Subtracts every component in the two vectors using saturation arithmetic, +// viz. [ x1, .. , xn ] + [ y1, .. , yn ] = [ x1 -_sat y1, .. , xn -_sat yn ] +// for either both signed or both unsigned operands x, y (reflected in packed_type). +class HVecSaturationSub final : public HVecBinaryOperation { + public: + HVecSaturationSub(ArenaAllocator* allocator, + HInstruction* left, + HInstruction* right, + DataType::Type packed_type, + size_t vector_length, + uint32_t dex_pc) + : HVecBinaryOperation( + kVecSaturationSub, allocator, left, right, packed_type, vector_length, dex_pc) { + DCHECK(HasConsistentPackedTypes(left, packed_type)); + DCHECK(HasConsistentPackedTypes(right, packed_type)); + } + + bool CanBeMoved() const override { return true; } + + DECLARE_INSTRUCTION(VecSaturationSub); + + protected: + DEFAULT_COPY_CONSTRUCTOR(VecSaturationSub); +}; + // Multiplies every component in the two vectors, // viz. [ x1, .. , xn ] * [ y1, .. , yn ] = [ x1 * y1, .. , xn * yn ]. -class HVecMul FINAL : public HVecBinaryOperation { +class HVecMul final : public HVecBinaryOperation { public: HVecMul(ArenaAllocator* allocator, HInstruction* left, @@ -613,7 +657,7 @@ class HVecMul FINAL : public HVecBinaryOperation { DCHECK(HasConsistentPackedTypes(right, packed_type)); } - bool CanBeMoved() const OVERRIDE { return true; } + bool CanBeMoved() const override { return true; } DECLARE_INSTRUCTION(VecMul); @@ -623,7 +667,7 @@ class HVecMul FINAL : public HVecBinaryOperation { // Divides every component in the two vectors, // viz. [ x1, .. , xn ] / [ y1, .. , yn ] = [ x1 / y1, .. , xn / yn ]. -class HVecDiv FINAL : public HVecBinaryOperation { +class HVecDiv final : public HVecBinaryOperation { public: HVecDiv(ArenaAllocator* allocator, HInstruction* left, @@ -636,7 +680,7 @@ class HVecDiv FINAL : public HVecBinaryOperation { DCHECK(HasConsistentPackedTypes(right, packed_type)); } - bool CanBeMoved() const OVERRIDE { return true; } + bool CanBeMoved() const override { return true; } DECLARE_INSTRUCTION(VecDiv); @@ -647,7 +691,7 @@ class HVecDiv FINAL : public HVecBinaryOperation { // Takes minimum of every component in the two vectors, // viz. MIN( [ x1, .. , xn ] , [ y1, .. , yn ]) = [ min(x1, y1), .. , min(xn, yn) ] // for either both signed or both unsigned operands x, y (reflected in packed_type). -class HVecMin FINAL : public HVecBinaryOperation { +class HVecMin final : public HVecBinaryOperation { public: HVecMin(ArenaAllocator* allocator, HInstruction* left, @@ -660,7 +704,7 @@ class HVecMin FINAL : public HVecBinaryOperation { DCHECK(HasConsistentPackedTypes(right, packed_type)); } - bool CanBeMoved() const OVERRIDE { return true; } + bool CanBeMoved() const override { return true; } DECLARE_INSTRUCTION(VecMin); @@ -671,7 +715,7 @@ class HVecMin FINAL : public HVecBinaryOperation { // Takes maximum of every component in the two vectors, // viz. MAX( [ x1, .. , xn ] , [ y1, .. , yn ]) = [ max(x1, y1), .. , max(xn, yn) ] // for either both signed or both unsigned operands x, y (reflected in packed_type). -class HVecMax FINAL : public HVecBinaryOperation { +class HVecMax final : public HVecBinaryOperation { public: HVecMax(ArenaAllocator* allocator, HInstruction* left, @@ -684,7 +728,7 @@ class HVecMax FINAL : public HVecBinaryOperation { DCHECK(HasConsistentPackedTypes(right, packed_type)); } - bool CanBeMoved() const OVERRIDE { return true; } + bool CanBeMoved() const override { return true; } DECLARE_INSTRUCTION(VecMax); @@ -694,7 +738,7 @@ class HVecMax FINAL : public HVecBinaryOperation { // Bitwise-ands every component in the two vectors, // viz. [ x1, .. , xn ] & [ y1, .. , yn ] = [ x1 & y1, .. , xn & yn ]. -class HVecAnd FINAL : public HVecBinaryOperation { +class HVecAnd final : public HVecBinaryOperation { public: HVecAnd(ArenaAllocator* allocator, HInstruction* left, @@ -706,7 +750,7 @@ class HVecAnd FINAL : public HVecBinaryOperation { DCHECK(left->IsVecOperation() && right->IsVecOperation()); } - bool CanBeMoved() const OVERRIDE { return true; } + bool CanBeMoved() const override { return true; } DECLARE_INSTRUCTION(VecAnd); @@ -716,7 +760,7 @@ class HVecAnd FINAL : public HVecBinaryOperation { // Bitwise-and-nots every component in the two vectors, // viz. [ x1, .. , xn ] and-not [ y1, .. , yn ] = [ ~x1 & y1, .. , ~xn & yn ]. -class HVecAndNot FINAL : public HVecBinaryOperation { +class HVecAndNot final : public HVecBinaryOperation { public: HVecAndNot(ArenaAllocator* allocator, HInstruction* left, @@ -729,7 +773,7 @@ class HVecAndNot FINAL : public HVecBinaryOperation { DCHECK(left->IsVecOperation() && right->IsVecOperation()); } - bool CanBeMoved() const OVERRIDE { return true; } + bool CanBeMoved() const override { return true; } DECLARE_INSTRUCTION(VecAndNot); @@ -739,7 +783,7 @@ class HVecAndNot FINAL : public HVecBinaryOperation { // Bitwise-ors every component in the two vectors, // viz. [ x1, .. , xn ] | [ y1, .. , yn ] = [ x1 | y1, .. , xn | yn ]. -class HVecOr FINAL : public HVecBinaryOperation { +class HVecOr final : public HVecBinaryOperation { public: HVecOr(ArenaAllocator* allocator, HInstruction* left, @@ -751,7 +795,7 @@ class HVecOr FINAL : public HVecBinaryOperation { DCHECK(left->IsVecOperation() && right->IsVecOperation()); } - bool CanBeMoved() const OVERRIDE { return true; } + bool CanBeMoved() const override { return true; } DECLARE_INSTRUCTION(VecOr); @@ -761,7 +805,7 @@ class HVecOr FINAL : public HVecBinaryOperation { // Bitwise-xors every component in the two vectors, // viz. [ x1, .. , xn ] ^ [ y1, .. , yn ] = [ x1 ^ y1, .. , xn ^ yn ]. -class HVecXor FINAL : public HVecBinaryOperation { +class HVecXor final : public HVecBinaryOperation { public: HVecXor(ArenaAllocator* allocator, HInstruction* left, @@ -773,7 +817,7 @@ class HVecXor FINAL : public HVecBinaryOperation { DCHECK(left->IsVecOperation() && right->IsVecOperation()); } - bool CanBeMoved() const OVERRIDE { return true; } + bool CanBeMoved() const override { return true; } DECLARE_INSTRUCTION(VecXor); @@ -783,7 +827,7 @@ class HVecXor FINAL : public HVecBinaryOperation { // Logically shifts every component in the vector left by the given distance, // viz. [ x1, .. , xn ] << d = [ x1 << d, .. , xn << d ]. -class HVecShl FINAL : public HVecBinaryOperation { +class HVecShl final : public HVecBinaryOperation { public: HVecShl(ArenaAllocator* allocator, HInstruction* left, @@ -795,7 +839,7 @@ class HVecShl FINAL : public HVecBinaryOperation { DCHECK(HasConsistentPackedTypes(left, packed_type)); } - bool CanBeMoved() const OVERRIDE { return true; } + bool CanBeMoved() const override { return true; } DECLARE_INSTRUCTION(VecShl); @@ -805,7 +849,7 @@ class HVecShl FINAL : public HVecBinaryOperation { // Arithmetically shifts every component in the vector right by the given distance, // viz. [ x1, .. , xn ] >> d = [ x1 >> d, .. , xn >> d ]. -class HVecShr FINAL : public HVecBinaryOperation { +class HVecShr final : public HVecBinaryOperation { public: HVecShr(ArenaAllocator* allocator, HInstruction* left, @@ -817,7 +861,7 @@ class HVecShr FINAL : public HVecBinaryOperation { DCHECK(HasConsistentPackedTypes(left, packed_type)); } - bool CanBeMoved() const OVERRIDE { return true; } + bool CanBeMoved() const override { return true; } DECLARE_INSTRUCTION(VecShr); @@ -827,7 +871,7 @@ class HVecShr FINAL : public HVecBinaryOperation { // Logically shifts every component in the vector right by the given distance, // viz. [ x1, .. , xn ] >>> d = [ x1 >>> d, .. , xn >>> d ]. -class HVecUShr FINAL : public HVecBinaryOperation { +class HVecUShr final : public HVecBinaryOperation { public: HVecUShr(ArenaAllocator* allocator, HInstruction* left, @@ -839,7 +883,7 @@ class HVecUShr FINAL : public HVecBinaryOperation { DCHECK(HasConsistentPackedTypes(left, packed_type)); } - bool CanBeMoved() const OVERRIDE { return true; } + bool CanBeMoved() const override { return true; } DECLARE_INSTRUCTION(VecUShr); @@ -854,7 +898,7 @@ class HVecUShr FINAL : public HVecBinaryOperation { // Assigns the given scalar elements to a vector, // viz. set( array(x1, .. , xn) ) = [ x1, .. , xn ] if n == m, // set( array(x1, .. , xm) ) = [ x1, .. , xm, 0, .. , 0 ] if m < n. -class HVecSetScalars FINAL : public HVecOperation { +class HVecSetScalars final : public HVecOperation { public: HVecSetScalars(ArenaAllocator* allocator, HInstruction* scalars[], @@ -877,7 +921,7 @@ class HVecSetScalars FINAL : public HVecOperation { // Setting scalars needs to stay in place, since SIMD registers are not // kept alive across vector loop boundaries (yet). - bool CanBeMoved() const OVERRIDE { return false; } + bool CanBeMoved() const override { return false; } DECLARE_INSTRUCTION(VecSetScalars); @@ -887,7 +931,10 @@ class HVecSetScalars FINAL : public HVecOperation { // Multiplies every component in the two vectors, adds the result vector to the accumulator vector, // viz. [ a1, .. , an ] + [ x1, .. , xn ] * [ y1, .. , yn ] = [ a1 + x1 * y1, .. , an + xn * yn ]. -class HVecMultiplyAccumulate FINAL : public HVecOperation { +// For floating point types, Java rounding behavior must be preserved; the products are rounded to +// the proper precision before being added. "Fused" multiply-add operations available on several +// architectures are not usable since they would violate Java language rules. +class HVecMultiplyAccumulate final : public HVecOperation { public: HVecMultiplyAccumulate(ArenaAllocator* allocator, InstructionKind op, @@ -901,7 +948,7 @@ class HVecMultiplyAccumulate FINAL : public HVecOperation { allocator, packed_type, SideEffects::None(), - /* number_of_inputs */ 3, + /* number_of_inputs= */ 3, vector_length, dex_pc), op_kind_(op) { @@ -909,14 +956,17 @@ class HVecMultiplyAccumulate FINAL : public HVecOperation { DCHECK(HasConsistentPackedTypes(accumulator, packed_type)); DCHECK(HasConsistentPackedTypes(mul_left, packed_type)); DCHECK(HasConsistentPackedTypes(mul_right, packed_type)); + // Remove the following if we add an architecture that supports floating point multiply-add + // with Java-compatible rounding. + DCHECK(DataType::IsIntegralType(packed_type)); SetRawInputAt(0, accumulator); SetRawInputAt(1, mul_left); SetRawInputAt(2, mul_right); } - bool CanBeMoved() const OVERRIDE { return true; } + bool CanBeMoved() const override { return true; } - bool InstructionDataEquals(const HInstruction* other) const OVERRIDE { + bool InstructionDataEquals(const HInstruction* other) const override { DCHECK(other->IsVecMultiplyAccumulate()); const HVecMultiplyAccumulate* o = other->AsVecMultiplyAccumulate(); return HVecOperation::InstructionDataEquals(o) && GetOpKind() == o->GetOpKind(); @@ -939,7 +989,7 @@ class HVecMultiplyAccumulate FINAL : public HVecOperation { // viz. SAD([ a1, .. , am ], [ x1, .. , xn ], [ y1, .. , yn ]) = // [ a1 + sum abs(xi-yi), .. , am + sum abs(xj-yj) ], // for m <= n, non-overlapping sums, and signed operands x, y. -class HVecSADAccumulate FINAL : public HVecOperation { +class HVecSADAccumulate final : public HVecOperation { public: HVecSADAccumulate(ArenaAllocator* allocator, HInstruction* accumulator, @@ -952,7 +1002,7 @@ class HVecSADAccumulate FINAL : public HVecOperation { allocator, packed_type, SideEffects::None(), - /* number_of_inputs */ 3, + /* number_of_inputs= */ 3, vector_length, dex_pc) { DCHECK(HasConsistentPackedTypes(accumulator, packed_type)); @@ -971,9 +1021,69 @@ class HVecSADAccumulate FINAL : public HVecOperation { DEFAULT_COPY_CONSTRUCTOR(VecSADAccumulate); }; +// Performs dot product of two vectors and adds the result to wider precision components in +// the accumulator. +// +// viz. DOT_PRODUCT([ a1, .. , am], [ x1, .. , xn ], [ y1, .. , yn ]) = +// [ a1 + sum(xi * yi), .. , am + sum(xj * yj) ], +// for m <= n, non-overlapping sums, +// for either both signed or both unsigned operands x, y. +// +// Notes: +// - packed type reflects the type of sum reduction, not the type of the operands. +// - IsZeroExtending() is used to determine the kind of signed/zero extension to be +// performed for the operands. +// +// TODO: Support types other than kInt32 for packed type. +class HVecDotProd final : public HVecOperation { + public: + HVecDotProd(ArenaAllocator* allocator, + HInstruction* accumulator, + HInstruction* left, + HInstruction* right, + DataType::Type packed_type, + bool is_zero_extending, + size_t vector_length, + uint32_t dex_pc) + : HVecOperation(kVecDotProd, + allocator, + packed_type, + SideEffects::None(), + /* number_of_inputs= */ 3, + vector_length, + dex_pc) { + DCHECK(HasConsistentPackedTypes(accumulator, packed_type)); + DCHECK(DataType::IsIntegralType(packed_type)); + DCHECK(left->IsVecOperation()); + DCHECK(right->IsVecOperation()); + DCHECK_EQ(ToSignedType(left->AsVecOperation()->GetPackedType()), + ToSignedType(right->AsVecOperation()->GetPackedType())); + SetRawInputAt(0, accumulator); + SetRawInputAt(1, left); + SetRawInputAt(2, right); + SetPackedFlag<kFieldHDotProdIsZeroExtending>(is_zero_extending); + } + + bool IsZeroExtending() const { return GetPackedFlag<kFieldHDotProdIsZeroExtending>(); } + + bool CanBeMoved() const override { return true; } + + DECLARE_INSTRUCTION(VecDotProd); + + protected: + DEFAULT_COPY_CONSTRUCTOR(VecDotProd); + + private: + // Additional packed bits. + static constexpr size_t kFieldHDotProdIsZeroExtending = + HVecOperation::kNumberOfVectorOpPackedBits; + static constexpr size_t kNumberOfHDotProdPackedBits = kFieldHDotProdIsZeroExtending + 1; + static_assert(kNumberOfHDotProdPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields."); +}; + // Loads a vector from memory, viz. load(mem, 1) // yield the vector [ mem(1), .. , mem(n) ]. -class HVecLoad FINAL : public HVecMemoryOperation { +class HVecLoad final : public HVecMemoryOperation { public: HVecLoad(ArenaAllocator* allocator, HInstruction* base, @@ -987,7 +1097,7 @@ class HVecLoad FINAL : public HVecMemoryOperation { allocator, packed_type, side_effects, - /* number_of_inputs */ 2, + /* number_of_inputs= */ 2, vector_length, dex_pc) { SetRawInputAt(0, base); @@ -997,9 +1107,9 @@ class HVecLoad FINAL : public HVecMemoryOperation { bool IsStringCharAt() const { return GetPackedFlag<kFieldIsStringCharAt>(); } - bool CanBeMoved() const OVERRIDE { return true; } + bool CanBeMoved() const override { return true; } - bool InstructionDataEquals(const HInstruction* other) const OVERRIDE { + bool InstructionDataEquals(const HInstruction* other) const override { DCHECK(other->IsVecLoad()); const HVecLoad* o = other->AsVecLoad(); return HVecMemoryOperation::InstructionDataEquals(o) && IsStringCharAt() == o->IsStringCharAt(); @@ -1019,7 +1129,7 @@ class HVecLoad FINAL : public HVecMemoryOperation { // Stores a vector to memory, viz. store(m, 1, [x1, .. , xn] ) // sets mem(1) = x1, .. , mem(n) = xn. -class HVecStore FINAL : public HVecMemoryOperation { +class HVecStore final : public HVecMemoryOperation { public: HVecStore(ArenaAllocator* allocator, HInstruction* base, @@ -1033,7 +1143,7 @@ class HVecStore FINAL : public HVecMemoryOperation { allocator, packed_type, side_effects, - /* number_of_inputs */ 3, + /* number_of_inputs= */ 3, vector_length, dex_pc) { DCHECK(HasConsistentPackedTypes(value, packed_type)); @@ -1043,7 +1153,7 @@ class HVecStore FINAL : public HVecMemoryOperation { } // A store needs to stay in place. - bool CanBeMoved() const OVERRIDE { return false; } + bool CanBeMoved() const override { return false; } DECLARE_INSTRUCTION(VecStore); diff --git a/compiler/optimizing/nodes_vector_test.cc b/compiler/optimizing/nodes_vector_test.cc index af13449646..b0a665d704 100644 --- a/compiler/optimizing/nodes_vector_test.cc +++ b/compiler/optimizing/nodes_vector_test.cc @@ -401,9 +401,9 @@ TEST_F(NodesVectorTest, VectorKindMattersOnReduce) { EXPECT_TRUE(v2->CanBeMoved()); EXPECT_TRUE(v3->CanBeMoved()); - EXPECT_EQ(HVecReduce::kSum, v1->GetKind()); - EXPECT_EQ(HVecReduce::kMin, v2->GetKind()); - EXPECT_EQ(HVecReduce::kMax, v3->GetKind()); + EXPECT_EQ(HVecReduce::kSum, v1->GetReductionKind()); + EXPECT_EQ(HVecReduce::kMin, v2->GetReductionKind()); + EXPECT_EQ(HVecReduce::kMax, v3->GetReductionKind()); EXPECT_TRUE(v1->Equals(v1)); EXPECT_TRUE(v2->Equals(v2)); diff --git a/compiler/optimizing/nodes_x86.h b/compiler/optimizing/nodes_x86.h index 4c32be7d15..8e8fbc1581 100644 --- a/compiler/optimizing/nodes_x86.h +++ b/compiler/optimizing/nodes_x86.h @@ -20,7 +20,7 @@ namespace art { // Compute the address of the method for X86 Constant area support. -class HX86ComputeBaseMethodAddress FINAL : public HExpression<0> { +class HX86ComputeBaseMethodAddress final : public HExpression<0> { public: // Treat the value as an int32_t, but it is really a 32 bit native pointer. HX86ComputeBaseMethodAddress() @@ -30,7 +30,7 @@ class HX86ComputeBaseMethodAddress FINAL : public HExpression<0> { kNoDexPc) { } - bool CanBeMoved() const OVERRIDE { return true; } + bool CanBeMoved() const override { return true; } DECLARE_INSTRUCTION(X86ComputeBaseMethodAddress); @@ -39,7 +39,7 @@ class HX86ComputeBaseMethodAddress FINAL : public HExpression<0> { }; // Load a constant value from the constant table. -class HX86LoadFromConstantTable FINAL : public HExpression<2> { +class HX86LoadFromConstantTable final : public HExpression<2> { public: HX86LoadFromConstantTable(HX86ComputeBaseMethodAddress* method_base, HConstant* constant) @@ -66,7 +66,7 @@ class HX86LoadFromConstantTable FINAL : public HExpression<2> { }; // Version of HNeg with access to the constant table for FP types. -class HX86FPNeg FINAL : public HExpression<2> { +class HX86FPNeg final : public HExpression<2> { public: HX86FPNeg(DataType::Type result_type, HInstruction* input, @@ -89,21 +89,21 @@ class HX86FPNeg FINAL : public HExpression<2> { }; // X86 version of HPackedSwitch that holds a pointer to the base method address. -class HX86PackedSwitch FINAL : public HTemplateInstruction<2> { +class HX86PackedSwitch final : public HExpression<2> { public: HX86PackedSwitch(int32_t start_value, int32_t num_entries, HInstruction* input, HX86ComputeBaseMethodAddress* method_base, uint32_t dex_pc) - : HTemplateInstruction(kX86PackedSwitch, SideEffects::None(), dex_pc), + : HExpression(kX86PackedSwitch, SideEffects::None(), dex_pc), start_value_(start_value), num_entries_(num_entries) { SetRawInputAt(0, input); SetRawInputAt(1, method_base); } - bool IsControlFlow() const OVERRIDE { return true; } + bool IsControlFlow() const override { return true; } int32_t GetStartValue() const { return start_value_; } @@ -128,6 +128,92 @@ class HX86PackedSwitch FINAL : public HTemplateInstruction<2> { const int32_t num_entries_; }; +class HX86AndNot final : public HBinaryOperation { + public: + HX86AndNot(DataType::Type result_type, + HInstruction* left, + HInstruction* right, + uint32_t dex_pc = kNoDexPc) + : HBinaryOperation(kX86AndNot, result_type, left, right, SideEffects::None(), dex_pc) { + } + + bool IsCommutative() const override { return false; } + + template <typename T> static T Compute(T x, T y) { return ~x & y; } + + HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const override { + return GetBlock()->GetGraph()->GetIntConstant( + Compute(x->GetValue(), y->GetValue()), GetDexPc()); + } + HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const override { + return GetBlock()->GetGraph()->GetLongConstant( + Compute(x->GetValue(), y->GetValue()), GetDexPc()); + } + HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED, + HFloatConstant* y ATTRIBUTE_UNUSED) const override { + LOG(FATAL) << DebugName() << " is not defined for float values"; + UNREACHABLE(); + } + HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED, + HDoubleConstant* y ATTRIBUTE_UNUSED) const override { + LOG(FATAL) << DebugName() << " is not defined for double values"; + UNREACHABLE(); + } + + DECLARE_INSTRUCTION(X86AndNot); + + protected: + DEFAULT_COPY_CONSTRUCTOR(X86AndNot); +}; + +class HX86MaskOrResetLeastSetBit final : public HUnaryOperation { + public: + HX86MaskOrResetLeastSetBit(DataType::Type result_type, InstructionKind op, + HInstruction* input, uint32_t dex_pc = kNoDexPc) + : HUnaryOperation(kX86MaskOrResetLeastSetBit, result_type, input, dex_pc), + op_kind_(op) { + DCHECK_EQ(result_type, DataType::Kind(input->GetType())); + DCHECK(op == HInstruction::kAnd || op == HInstruction::kXor) << op; + } + template <typename T> + auto Compute(T x) const -> decltype(x & (x-1)) { + static_assert(std::is_same<decltype(x & (x-1)), decltype(x ^(x-1))>::value, + "Inconsistent bitwise types"); + switch (op_kind_) { + case HInstruction::kAnd: + return x & (x-1); + case HInstruction::kXor: + return x ^ (x-1); + default: + LOG(FATAL) << "Unreachable"; + UNREACHABLE(); + } + } + + HConstant* Evaluate(HIntConstant* x) const override { + return GetBlock()->GetGraph()->GetIntConstant(Compute(x->GetValue()), GetDexPc()); + } + HConstant* Evaluate(HLongConstant* x) const override { + return GetBlock()->GetGraph()->GetLongConstant(Compute(x->GetValue()), GetDexPc()); + } + HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED) const override { + LOG(FATAL) << DebugName() << "is not defined for float values"; + UNREACHABLE(); + } + HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED) const override { + LOG(FATAL) << DebugName() << "is not defined for double values"; + UNREACHABLE(); + } + InstructionKind GetOpKind() const { return op_kind_; } + + DECLARE_INSTRUCTION(X86MaskOrResetLeastSetBit); + + protected: + const InstructionKind op_kind_; + + DEFAULT_COPY_CONSTRUCTOR(X86MaskOrResetLeastSetBit); +}; + } // namespace art #endif // ART_COMPILER_OPTIMIZING_NODES_X86_H_ diff --git a/compiler/optimizing/optimization.cc b/compiler/optimizing/optimization.cc index 57db7a634c..8864a12301 100644 --- a/compiler/optimizing/optimization.cc +++ b/compiler/optimizing/optimization.cc @@ -28,10 +28,14 @@ #endif #ifdef ART_ENABLE_CODEGEN_x86 #include "pc_relative_fixups_x86.h" +#include "instruction_simplifier_x86.h" #endif #if defined(ART_ENABLE_CODEGEN_x86) || defined(ART_ENABLE_CODEGEN_x86_64) #include "x86_memory_gen.h" #endif +#ifdef ART_ENABLE_CODEGEN_x86_64 +#include "instruction_simplifier_x86_64.h" +#endif #include "bounds_check_elimination.h" #include "cha_guard_optimization.h" @@ -40,6 +44,7 @@ #include "constructor_fence_redundancy_elimination.h" #include "dead_code_elimination.h" #include "dex/code_item_accessors-inl.h" +#include "driver/compiler_options.h" #include "driver/dex_compilation_unit.h" #include "gvn.h" #include "induction_var_analysis.h" @@ -83,14 +88,10 @@ const char* OptimizationPassName(OptimizationPass pass) { return HDeadCodeElimination::kDeadCodeEliminationPassName; case OptimizationPass::kInliner: return HInliner::kInlinerPassName; - case OptimizationPass::kSharpening: - return HSharpening::kSharpeningPassName; case OptimizationPass::kSelectGenerator: return HSelectGenerator::kSelectGeneratorPassName; case OptimizationPass::kInstructionSimplifier: return InstructionSimplifier::kInstructionSimplifierPassName; - case OptimizationPass::kIntrinsicsRecognizer: - return IntrinsicsRecognizer::kIntrinsicsRecognizerPassName; case OptimizationPass::kCHAGuardOptimization: return CHAGuardOptimization::kCHAGuardOptimizationPassName; case OptimizationPass::kCodeSinking: @@ -116,17 +117,26 @@ const char* OptimizationPassName(OptimizationPass pass) { #ifdef ART_ENABLE_CODEGEN_x86 case OptimizationPass::kPcRelativeFixupsX86: return x86::PcRelativeFixups::kPcRelativeFixupsX86PassName; + case OptimizationPass::kInstructionSimplifierX86: + return x86::InstructionSimplifierX86::kInstructionSimplifierX86PassName; +#endif +#ifdef ART_ENABLE_CODEGEN_x86_64 + case OptimizationPass::kInstructionSimplifierX86_64: + return x86_64::InstructionSimplifierX86_64::kInstructionSimplifierX86_64PassName; #endif #if defined(ART_ENABLE_CODEGEN_x86) || defined(ART_ENABLE_CODEGEN_x86_64) case OptimizationPass::kX86MemoryOperandGeneration: return x86::X86MemoryOperandGeneration::kX86MemoryOperandGenerationPassName; #endif + case OptimizationPass::kNone: + LOG(FATAL) << "kNone does not represent an actual pass"; + UNREACHABLE(); } } -#define X(x) if (name == OptimizationPassName((x))) return (x) +#define X(x) if (pass_name == OptimizationPassName((x))) return (x) -OptimizationPass OptimizationPassByName(const std::string& name) { +OptimizationPass OptimizationPassByName(const std::string& pass_name) { X(OptimizationPass::kBoundsCheckElimination); X(OptimizationPass::kCHAGuardOptimization); X(OptimizationPass::kCodeSinking); @@ -137,14 +147,12 @@ OptimizationPass OptimizationPassByName(const std::string& name) { X(OptimizationPass::kInductionVarAnalysis); X(OptimizationPass::kInliner); X(OptimizationPass::kInstructionSimplifier); - X(OptimizationPass::kIntrinsicsRecognizer); X(OptimizationPass::kInvariantCodeMotion); X(OptimizationPass::kLoadStoreAnalysis); X(OptimizationPass::kLoadStoreElimination); X(OptimizationPass::kLoopOptimization); X(OptimizationPass::kScheduling); X(OptimizationPass::kSelectGenerator); - X(OptimizationPass::kSharpening); X(OptimizationPass::kSideEffectsAnalysis); #ifdef ART_ENABLE_CODEGEN_arm X(OptimizationPass::kInstructionSimplifierArm); @@ -160,7 +168,7 @@ OptimizationPass OptimizationPassByName(const std::string& name) { X(OptimizationPass::kPcRelativeFixupsX86); X(OptimizationPass::kX86MemoryOperandGeneration); #endif - LOG(FATAL) << "Cannot find optimization " << name; + LOG(FATAL) << "Cannot find optimization " << pass_name; UNREACHABLE(); } @@ -173,7 +181,6 @@ ArenaVector<HOptimization*> ConstructOptimizations( HGraph* graph, OptimizingCompilerStats* stats, CodeGenerator* codegen, - CompilerDriver* driver, const DexCompilationUnit& dex_compilation_unit, VariableSizedHandleScope* handles) { ArenaVector<HOptimization*> optimizations(allocator->Adapter()); @@ -187,9 +194,9 @@ ArenaVector<HOptimization*> ConstructOptimizations( // Loop over the requested optimizations. for (size_t i = 0; i < length; i++) { - OptimizationPass pass = definitions[i].first; - const char* alt_name = definitions[i].second; - const char* name = alt_name != nullptr + OptimizationPass pass = definitions[i].pass; + const char* alt_name = definitions[i].pass_name; + const char* pass_name = alt_name != nullptr ? alt_name : OptimizationPassName(pass); HOptimization* opt = nullptr; @@ -199,47 +206,48 @@ ArenaVector<HOptimization*> ConstructOptimizations( // Analysis passes (kept in most recent for subsequent passes). // case OptimizationPass::kSideEffectsAnalysis: - opt = most_recent_side_effects = new (allocator) SideEffectsAnalysis(graph, name); + opt = most_recent_side_effects = new (allocator) SideEffectsAnalysis(graph, pass_name); break; case OptimizationPass::kInductionVarAnalysis: - opt = most_recent_induction = new (allocator) HInductionVarAnalysis(graph, name); + opt = most_recent_induction = new (allocator) HInductionVarAnalysis(graph, pass_name); break; case OptimizationPass::kLoadStoreAnalysis: - opt = most_recent_lsa = new (allocator) LoadStoreAnalysis(graph, name); + opt = most_recent_lsa = new (allocator) LoadStoreAnalysis(graph, pass_name); break; // // Passes that need prior analysis. // case OptimizationPass::kGlobalValueNumbering: CHECK(most_recent_side_effects != nullptr); - opt = new (allocator) GVNOptimization(graph, *most_recent_side_effects, name); + opt = new (allocator) GVNOptimization(graph, *most_recent_side_effects, pass_name); break; case OptimizationPass::kInvariantCodeMotion: CHECK(most_recent_side_effects != nullptr); - opt = new (allocator) LICM(graph, *most_recent_side_effects, stats, name); + opt = new (allocator) LICM(graph, *most_recent_side_effects, stats, pass_name); break; case OptimizationPass::kLoopOptimization: CHECK(most_recent_induction != nullptr); - opt = new (allocator) HLoopOptimization(graph, driver, most_recent_induction, stats, name); + opt = new (allocator) HLoopOptimization( + graph, &codegen->GetCompilerOptions(), most_recent_induction, stats, pass_name); break; case OptimizationPass::kBoundsCheckElimination: CHECK(most_recent_side_effects != nullptr && most_recent_induction != nullptr); opt = new (allocator) BoundsCheckElimination( - graph, *most_recent_side_effects, most_recent_induction, name); + graph, *most_recent_side_effects, most_recent_induction, pass_name); break; case OptimizationPass::kLoadStoreElimination: CHECK(most_recent_side_effects != nullptr && most_recent_induction != nullptr); opt = new (allocator) LoadStoreElimination( - graph, *most_recent_side_effects, *most_recent_lsa, stats, name); + graph, *most_recent_side_effects, *most_recent_lsa, stats, pass_name); break; // // Regular passes. // case OptimizationPass::kConstantFolding: - opt = new (allocator) HConstantFolding(graph, name); + opt = new (allocator) HConstantFolding(graph, pass_name); break; case OptimizationPass::kDeadCodeElimination: - opt = new (allocator) HDeadCodeElimination(graph, stats, name); + opt = new (allocator) HDeadCodeElimination(graph, stats, pass_name); break; case OptimizationPass::kInliner: { CodeItemDataAccessor accessor(*dex_compilation_unit.GetDexFile(), @@ -249,40 +257,33 @@ ArenaVector<HOptimization*> ConstructOptimizations( codegen, dex_compilation_unit, // outer_compilation_unit dex_compilation_unit, // outermost_compilation_unit - driver, handles, stats, accessor.RegistersSize(), - /* total_number_of_instructions */ 0, - /* parent */ nullptr, - /* depth */ 0, - name); + /* total_number_of_instructions= */ 0, + /* parent= */ nullptr, + /* depth= */ 0, + pass_name); break; } - case OptimizationPass::kSharpening: - opt = new (allocator) HSharpening(graph, codegen, driver, name); - break; case OptimizationPass::kSelectGenerator: - opt = new (allocator) HSelectGenerator(graph, handles, stats, name); + opt = new (allocator) HSelectGenerator(graph, handles, stats, pass_name); break; case OptimizationPass::kInstructionSimplifier: - opt = new (allocator) InstructionSimplifier(graph, codegen, driver, stats, name); - break; - case OptimizationPass::kIntrinsicsRecognizer: - opt = new (allocator) IntrinsicsRecognizer(graph, stats, name); + opt = new (allocator) InstructionSimplifier(graph, codegen, stats, pass_name); break; case OptimizationPass::kCHAGuardOptimization: - opt = new (allocator) CHAGuardOptimization(graph, name); + opt = new (allocator) CHAGuardOptimization(graph, pass_name); break; case OptimizationPass::kCodeSinking: - opt = new (allocator) CodeSinking(graph, stats, name); + opt = new (allocator) CodeSinking(graph, stats, pass_name); break; case OptimizationPass::kConstructorFenceRedundancyElimination: - opt = new (allocator) ConstructorFenceRedundancyElimination(graph, stats, name); + opt = new (allocator) ConstructorFenceRedundancyElimination(graph, stats, pass_name); break; case OptimizationPass::kScheduling: opt = new (allocator) HInstructionScheduling( - graph, driver->GetInstructionSet(), codegen, name); + graph, codegen->GetCompilerOptions().GetInstructionSet(), codegen, pass_name); break; // // Arch-specific passes. @@ -318,12 +319,23 @@ ArenaVector<HOptimization*> ConstructOptimizations( DCHECK(alt_name == nullptr) << "arch-specific pass does not support alternative name"; opt = new (allocator) x86::X86MemoryOperandGeneration(graph, codegen, stats); break; + case OptimizationPass::kInstructionSimplifierX86: + opt = new (allocator) x86::InstructionSimplifierX86(graph, codegen, stats); + break; +#endif +#ifdef ART_ENABLE_CODEGEN_x86_64 + case OptimizationPass::kInstructionSimplifierX86_64: + opt = new (allocator) x86_64::InstructionSimplifierX86_64(graph, codegen, stats); + break; #endif + case OptimizationPass::kNone: + LOG(FATAL) << "kNone does not represent an actual pass"; + UNREACHABLE(); } // switch // Add each next optimization to result vector. CHECK(opt != nullptr); - DCHECK_STREQ(name, opt->GetPassName()); // sanity + DCHECK_STREQ(pass_name, opt->GetPassName()); // sanity optimizations.push_back(opt); } diff --git a/compiler/optimizing/optimization.h b/compiler/optimizing/optimization.h index c170f155fa..b84e03894c 100644 --- a/compiler/optimizing/optimization.h +++ b/compiler/optimizing/optimization.h @@ -24,7 +24,6 @@ namespace art { class CodeGenerator; -class CompilerDriver; class DexCompilationUnit; /** @@ -47,8 +46,9 @@ class HOptimization : public ArenaObject<kArenaAllocOptimization> { // 'instruction_simplifier$before_codegen'. const char* GetPassName() const { return pass_name_; } - // Perform the analysis itself. - virtual void Run() = 0; + // Perform the pass or analysis. Returns false if no optimizations occurred or no useful + // information was computed (this is best effort, returning true is always ok). + virtual bool Run() = 0; protected: HGraph* const graph_; @@ -76,14 +76,12 @@ enum class OptimizationPass { kInductionVarAnalysis, kInliner, kInstructionSimplifier, - kIntrinsicsRecognizer, kInvariantCodeMotion, kLoadStoreAnalysis, kLoadStoreElimination, kLoopOptimization, kScheduling, kSelectGenerator, - kSharpening, kSideEffectsAnalysis, #ifdef ART_ENABLE_CODEGEN_arm kInstructionSimplifierArm, @@ -97,25 +95,40 @@ enum class OptimizationPass { #endif #ifdef ART_ENABLE_CODEGEN_x86 kPcRelativeFixupsX86, + kInstructionSimplifierX86, +#endif +#ifdef ART_ENABLE_CODEGEN_x86_64 + kInstructionSimplifierX86_64, #endif #if defined(ART_ENABLE_CODEGEN_x86) || defined(ART_ENABLE_CODEGEN_x86_64) kX86MemoryOperandGeneration, #endif + kNone, + kLast = kNone }; // Lookup name of optimization pass. const char* OptimizationPassName(OptimizationPass pass); // Lookup optimization pass by name. -OptimizationPass OptimizationPassByName(const std::string& name); +OptimizationPass OptimizationPassByName(const std::string& pass_name); // Optimization definition consisting of an optimization pass -// and an optional alternative name (nullptr denotes default). -typedef std::pair<OptimizationPass, const char*> OptimizationDef; +// an optional alternative name (nullptr denotes default), and +// an optional pass dependence (kNone denotes no dependence). +struct OptimizationDef { + OptimizationDef(OptimizationPass p, const char* pn, OptimizationPass d) + : pass(p), pass_name(pn), depends_on(d) {} + OptimizationPass pass; + const char* pass_name; + OptimizationPass depends_on; +}; // Helper method for optimization definition array entries. -inline OptimizationDef OptDef(OptimizationPass pass, const char* name = nullptr) { - return std::make_pair(pass, name); +inline OptimizationDef OptDef(OptimizationPass pass, + const char* pass_name = nullptr, + OptimizationPass depends_on = OptimizationPass::kNone) { + return OptimizationDef(pass, pass_name, depends_on); } // Helper method to construct series of optimization passes. @@ -133,7 +146,6 @@ ArenaVector<HOptimization*> ConstructOptimizations( HGraph* graph, OptimizingCompilerStats* stats, CodeGenerator* codegen, - CompilerDriver* driver, const DexCompilationUnit& dex_compilation_unit, VariableSizedHandleScope* handles); diff --git a/compiler/optimizing/optimizing_cfi_test.cc b/compiler/optimizing/optimizing_cfi_test.cc index d20b681b49..a52031cced 100644 --- a/compiler/optimizing/optimizing_cfi_test.cc +++ b/compiler/optimizing/optimizing_cfi_test.cc @@ -34,8 +34,6 @@ namespace vixl32 = vixl::aarch32; -using vixl32::r0; - namespace art { // Run the tests only on host. @@ -47,25 +45,20 @@ class OptimizingCFITest : public CFITest, public OptimizingUnitTestHelper { static constexpr bool kGenerateExpected = false; OptimizingCFITest() - : pool_and_allocator_(), - opts_(), - isa_features_(), - graph_(nullptr), + : graph_(nullptr), code_gen_(), blocks_(GetAllocator()->Adapter()) {} - ArenaAllocator* GetAllocator() { return pool_and_allocator_.GetAllocator(); } - void SetUpFrame(InstructionSet isa) { + OverrideInstructionSetFeatures(isa, "default"); + // Ensure that slow-debug is off, so that there is no unexpected read-barrier check emitted. SetRuntimeDebugFlagsEnabled(false); // Setup simple context. - std::string error; - isa_features_ = InstructionSetFeatures::FromVariant(isa, "default", &error); graph_ = CreateGraph(); // Generate simple frame with some spills. - code_gen_ = CodeGenerator::Create(graph_, isa, *isa_features_, opts_); + code_gen_ = CodeGenerator::Create(graph_, *compiler_options_); code_gen_->GetAssembler()->cfi().SetEnabled(true); code_gen_->InitializeCodeGenerationData(); const int frame_size = 64; @@ -105,15 +98,15 @@ class OptimizingCFITest : public CFITest, public OptimizingUnitTestHelper { const std::vector<uint8_t>& expected_asm, const std::vector<uint8_t>& expected_cfi) { // Get the outputs. - const std::vector<uint8_t>& actual_asm = code_allocator_.GetMemory(); + ArrayRef<const uint8_t> actual_asm = code_allocator_.GetMemory(); Assembler* opt_asm = code_gen_->GetAssembler(); - const std::vector<uint8_t>& actual_cfi = *(opt_asm->cfi().data()); + ArrayRef<const uint8_t> actual_cfi(*(opt_asm->cfi().data())); if (kGenerateExpected) { GenerateExpected(stdout, isa, isa_str, actual_asm, actual_cfi); } else { - EXPECT_EQ(expected_asm, actual_asm); - EXPECT_EQ(expected_cfi, actual_cfi); + EXPECT_EQ(ArrayRef<const uint8_t>(expected_asm), actual_asm); + EXPECT_EQ(ArrayRef<const uint8_t>(expected_cfi), actual_cfi); } } @@ -135,12 +128,12 @@ class OptimizingCFITest : public CFITest, public OptimizingUnitTestHelper { public: InternalCodeAllocator() {} - virtual uint8_t* Allocate(size_t size) { + uint8_t* Allocate(size_t size) override { memory_.resize(size); return memory_.data(); } - const std::vector<uint8_t>& GetMemory() { return memory_; } + ArrayRef<const uint8_t> GetMemory() const override { return ArrayRef<const uint8_t>(memory_); } private: std::vector<uint8_t> memory_; @@ -148,9 +141,6 @@ class OptimizingCFITest : public CFITest, public OptimizingUnitTestHelper { DISALLOW_COPY_AND_ASSIGN(InternalCodeAllocator); }; - ArenaPoolAndAllocator pool_and_allocator_; - CompilerOptions opts_; - std::unique_ptr<const InstructionSetFeatures> isa_features_; HGraph* graph_; std::unique_ptr<CodeGenerator> code_gen_; ArenaVector<HBasicBlock*> blocks_; @@ -202,6 +192,7 @@ TEST_ISA(kMips64) #ifdef ART_ENABLE_CODEGEN_arm TEST_F(OptimizingCFITest, kThumb2Adjust) { + using vixl32::r0; std::vector<uint8_t> expected_asm( expected_asm_kThumb2_adjust, expected_asm_kThumb2_adjust + arraysize(expected_asm_kThumb2_adjust)); diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index e42dfc10ba..f4bf11d3d3 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -26,11 +26,13 @@ #include "base/arena_allocator.h" #include "base/arena_containers.h" #include "base/dumpable.h" +#include "base/logging.h" #include "base/macros.h" #include "base/mutex.h" #include "base/scoped_arena_allocator.h" #include "base/timing_logger.h" #include "builder.h" +#include "class_root.h" #include "code_generator.h" #include "compiled_method.h" #include "compiler.h" @@ -39,7 +41,7 @@ #include "dex/dex_file_types.h" #include "dex/verification_results.h" #include "dex/verified_method.h" -#include "driver/compiler_driver-inl.h" +#include "driver/compiled_method_storage.h" #include "driver/compiler_options.h" #include "driver/dex_compilation_unit.h" #include "graph_checker.h" @@ -60,6 +62,7 @@ #include "ssa_builder.h" #include "ssa_liveness_analysis.h" #include "ssa_phi_elimination.h" +#include "stack_map_stream.h" #include "utils/assembler.h" #include "verifier/verifier_compiler_binding.h" @@ -72,25 +75,21 @@ static constexpr const char* kPassNameSeparator = "$"; /** * Used by the code generator, to allocate the code in a vector. */ -class CodeVectorAllocator FINAL : public CodeAllocator { +class CodeVectorAllocator final : public CodeAllocator { public: explicit CodeVectorAllocator(ArenaAllocator* allocator) - : memory_(allocator->Adapter(kArenaAllocCodeBuffer)), - size_(0) {} + : memory_(allocator->Adapter(kArenaAllocCodeBuffer)) {} - virtual uint8_t* Allocate(size_t size) { - size_ = size; + uint8_t* Allocate(size_t size) override { memory_.resize(size); return &memory_[0]; } - size_t GetSize() const { return size_; } - const ArenaVector<uint8_t>& GetMemory() const { return memory_; } + ArrayRef<const uint8_t> GetMemory() const override { return ArrayRef<const uint8_t>(memory_); } uint8_t* GetData() { return memory_.data(); } private: ArenaVector<uint8_t> memory_; - size_t size_; DISALLOW_COPY_AND_ASSIGN(CodeVectorAllocator); }; @@ -108,21 +107,22 @@ class PassObserver : public ValueObject { PassObserver(HGraph* graph, CodeGenerator* codegen, std::ostream* visualizer_output, - CompilerDriver* compiler_driver, + const CompilerOptions& compiler_options, Mutex& dump_mutex) : graph_(graph), + last_seen_graph_size_(0), cached_method_name_(), - timing_logger_enabled_(compiler_driver->GetCompilerOptions().GetDumpTimings()), + timing_logger_enabled_(compiler_options.GetDumpPassTimings()), timing_logger_(timing_logger_enabled_ ? GetMethodName() : "", true, true), disasm_info_(graph->GetAllocator()), visualizer_oss_(), visualizer_output_(visualizer_output), - visualizer_enabled_(!compiler_driver->GetCompilerOptions().GetDumpCfgFileName().empty()), + visualizer_enabled_(!compiler_options.GetDumpCfgFileName().empty()), visualizer_(&visualizer_oss_, graph, *codegen), visualizer_dump_mutex_(dump_mutex), graph_in_bad_state_(false) { if (timing_logger_enabled_ || visualizer_enabled_) { - if (!IsVerboseMethod(compiler_driver, GetMethodName())) { + if (!IsVerboseMethod(compiler_options, GetMethodName())) { timing_logger_enabled_ = visualizer_enabled_ = false; } if (visualizer_enabled_) { @@ -162,7 +162,7 @@ class PassObserver : public ValueObject { VLOG(compiler) << "Starting pass: " << pass_name; // Dump graph first, then start timer. if (visualizer_enabled_) { - visualizer_.DumpGraph(pass_name, /* is_after_pass */ false, graph_in_bad_state_); + visualizer_.DumpGraph(pass_name, /* is_after_pass= */ false, graph_in_bad_state_); FlushVisualizer(); } if (timing_logger_enabled_) { @@ -178,13 +178,13 @@ class PassObserver : public ValueObject { visualizer_oss_.clear(); } - void EndPass(const char* pass_name) REQUIRES(!visualizer_dump_mutex_) { + void EndPass(const char* pass_name, bool pass_change) REQUIRES(!visualizer_dump_mutex_) { // Pause timer first, then dump graph. if (timing_logger_enabled_) { timing_logger_.EndTiming(); } if (visualizer_enabled_) { - visualizer_.DumpGraph(pass_name, /* is_after_pass */ true, graph_in_bad_state_); + visualizer_.DumpGraph(pass_name, /* is_after_pass= */ true, graph_in_bad_state_); FlushVisualizer(); } @@ -192,7 +192,7 @@ class PassObserver : public ValueObject { if (kIsDebugBuild) { if (!graph_in_bad_state_) { GraphChecker checker(graph_); - checker.Run(); + last_seen_graph_size_ = checker.Run(pass_change, last_seen_graph_size_); if (!checker.IsValid()) { LOG(FATAL) << "Error after " << pass_name << ": " << Dumpable<GraphChecker>(checker); } @@ -200,11 +200,11 @@ class PassObserver : public ValueObject { } } - static bool IsVerboseMethod(CompilerDriver* compiler_driver, const char* method_name) { + static bool IsVerboseMethod(const CompilerOptions& compiler_options, const char* method_name) { // Test an exact match to --verbose-methods. If verbose-methods is set, this overrides an // empty kStringFilter matching all methods. - if (compiler_driver->GetCompilerOptions().HasVerboseMethods()) { - return compiler_driver->GetCompilerOptions().IsVerboseMethod(method_name); + if (compiler_options.HasVerboseMethods()) { + return compiler_options.IsVerboseMethod(method_name); } // Test the kStringFilter sub-string. constexpr helper variable to silence unreachable-code @@ -218,6 +218,7 @@ class PassObserver : public ValueObject { } HGraph* const graph_; + size_t last_seen_graph_size_; std::string cached_method_name_; @@ -245,60 +246,64 @@ class PassScope : public ValueObject { public: PassScope(const char *pass_name, PassObserver* pass_observer) : pass_name_(pass_name), + pass_change_(true), // assume change pass_observer_(pass_observer) { pass_observer_->StartPass(pass_name_); } + void SetPassNotChanged() { + pass_change_ = false; + } + ~PassScope() { - pass_observer_->EndPass(pass_name_); + pass_observer_->EndPass(pass_name_, pass_change_); } private: const char* const pass_name_; + bool pass_change_; PassObserver* const pass_observer_; }; -class OptimizingCompiler FINAL : public Compiler { +class OptimizingCompiler final : public Compiler { public: - explicit OptimizingCompiler(CompilerDriver* driver); - ~OptimizingCompiler() OVERRIDE; + explicit OptimizingCompiler(const CompilerOptions& compiler_options, + CompiledMethodStorage* storage); + ~OptimizingCompiler() override; - bool CanCompileMethod(uint32_t method_idx, const DexFile& dex_file) const OVERRIDE; + bool CanCompileMethod(uint32_t method_idx, const DexFile& dex_file) const override; - CompiledMethod* Compile(const DexFile::CodeItem* code_item, + CompiledMethod* Compile(const dex::CodeItem* code_item, uint32_t access_flags, InvokeType invoke_type, uint16_t class_def_idx, uint32_t method_idx, Handle<mirror::ClassLoader> class_loader, const DexFile& dex_file, - Handle<mirror::DexCache> dex_cache) const OVERRIDE; + Handle<mirror::DexCache> dex_cache) const override; CompiledMethod* JniCompile(uint32_t access_flags, uint32_t method_idx, const DexFile& dex_file, - Handle<mirror::DexCache> dex_cache) const OVERRIDE; + Handle<mirror::DexCache> dex_cache) const override; - uintptr_t GetEntryPointOf(ArtMethod* method) const OVERRIDE + uintptr_t GetEntryPointOf(ArtMethod* method) const override REQUIRES_SHARED(Locks::mutator_lock_) { return reinterpret_cast<uintptr_t>(method->GetEntryPointFromQuickCompiledCodePtrSize( - InstructionSetPointerSize(GetCompilerDriver()->GetInstructionSet()))); + InstructionSetPointerSize(GetCompilerOptions().GetInstructionSet()))); } - void Init() OVERRIDE; - - void UnInit() const OVERRIDE; - bool JitCompile(Thread* self, jit::JitCodeCache* code_cache, ArtMethod* method, + bool baseline, bool osr, jit::JitLogger* jit_logger) - OVERRIDE + override REQUIRES_SHARED(Locks::mutator_lock_); private: - void RunOptimizations(HGraph* graph, + bool RunOptimizations(HGraph* graph, CodeGenerator* codegen, const DexCompilationUnit& dex_compilation_unit, PassObserver* pass_observer, @@ -313,25 +318,41 @@ class OptimizingCompiler FINAL : public Compiler { graph, compilation_stats_.get(), codegen, - GetCompilerDriver(), dex_compilation_unit, handles); DCHECK_EQ(length, optimizations.size()); - // Run the optimization passes one by one. + // Run the optimization passes one by one. Any "depends_on" pass refers back to + // the most recent occurrence of that pass, skipped or executed. + std::bitset<static_cast<size_t>(OptimizationPass::kLast) + 1u> pass_changes; + pass_changes[static_cast<size_t>(OptimizationPass::kNone)] = true; + bool change = false; for (size_t i = 0; i < length; ++i) { - PassScope scope(optimizations[i]->GetPassName(), pass_observer); - optimizations[i]->Run(); + if (pass_changes[static_cast<size_t>(definitions[i].depends_on)]) { + // Execute the pass and record whether it changed anything. + PassScope scope(optimizations[i]->GetPassName(), pass_observer); + bool pass_change = optimizations[i]->Run(); + pass_changes[static_cast<size_t>(definitions[i].pass)] = pass_change; + if (pass_change) { + change = true; + } else { + scope.SetPassNotChanged(); + } + } else { + // Skip the pass and record that nothing changed. + pass_changes[static_cast<size_t>(definitions[i].pass)] = false; + } } + return change; } - template <size_t length> void RunOptimizations( + template <size_t length> bool RunOptimizations( HGraph* graph, CodeGenerator* codegen, const DexCompilationUnit& dex_compilation_unit, PassObserver* pass_observer, VariableSizedHandleScope* handles, const OptimizationDef (&definitions)[length]) const { - RunOptimizations( + return RunOptimizations( graph, codegen, dex_compilation_unit, pass_observer, handles, definitions, length); } @@ -346,7 +367,7 @@ class OptimizingCompiler FINAL : public Compiler { CompiledMethod* Emit(ArenaAllocator* allocator, CodeVectorAllocator* code_allocator, CodeGenerator* codegen, - const DexFile::CodeItem* item) const; + const dex::CodeItem* item) const; // Try compiling a method and return the code generator used for // compiling it. @@ -360,6 +381,7 @@ class OptimizingCompiler FINAL : public Compiler { CodeVectorAllocator* code_allocator, const DexCompilationUnit& dex_compilation_unit, ArtMethod* method, + bool baseline, bool osr, VariableSizedHandleScope* handles) const; @@ -370,19 +392,20 @@ class OptimizingCompiler FINAL : public Compiler { ArtMethod* method, VariableSizedHandleScope* handles) const; - void MaybeRunInliner(HGraph* graph, - CodeGenerator* codegen, - const DexCompilationUnit& dex_compilation_unit, - PassObserver* pass_observer, - VariableSizedHandleScope* handles) const; - - void RunArchOptimizations(HGraph* graph, + bool RunArchOptimizations(HGraph* graph, CodeGenerator* codegen, const DexCompilationUnit& dex_compilation_unit, PassObserver* pass_observer, VariableSizedHandleScope* handles) const; - void GenerateJitDebugInfo(ArtMethod* method, debug::MethodDebugInfo method_debug_info) + bool RunBaselineOptimizations(HGraph* graph, + CodeGenerator* codegen, + const DexCompilationUnit& dex_compilation_unit, + PassObserver* pass_observer, + VariableSizedHandleScope* handles) const; + + void GenerateJitDebugInfo(ArtMethod* method, + const debug::MethodDebugInfo& method_debug_info) REQUIRES_SHARED(Locks::mutator_lock_); std::unique_ptr<OptimizingCompilerStats> compilation_stats_; @@ -396,28 +419,22 @@ class OptimizingCompiler FINAL : public Compiler { static const int kMaximumCompilationTimeBeforeWarning = 100; /* ms */ -OptimizingCompiler::OptimizingCompiler(CompilerDriver* driver) - : Compiler(driver, kMaximumCompilationTimeBeforeWarning), - dump_mutex_("Visualizer dump lock") {} - -void OptimizingCompiler::Init() { - // Enable C1visualizer output. Must be done in Init() because the compiler - // driver is not fully initialized when passed to the compiler's constructor. - CompilerDriver* driver = GetCompilerDriver(); - const std::string cfg_file_name = driver->GetCompilerOptions().GetDumpCfgFileName(); +OptimizingCompiler::OptimizingCompiler(const CompilerOptions& compiler_options, + CompiledMethodStorage* storage) + : Compiler(compiler_options, storage, kMaximumCompilationTimeBeforeWarning), + dump_mutex_("Visualizer dump lock") { + // Enable C1visualizer output. + const std::string& cfg_file_name = compiler_options.GetDumpCfgFileName(); if (!cfg_file_name.empty()) { std::ios_base::openmode cfg_file_mode = - driver->GetCompilerOptions().GetDumpCfgAppend() ? std::ofstream::app : std::ofstream::out; + compiler_options.GetDumpCfgAppend() ? std::ofstream::app : std::ofstream::out; visualizer_output_.reset(new std::ofstream(cfg_file_name, cfg_file_mode)); } - if (driver->GetCompilerOptions().GetDumpStats()) { + if (compiler_options.GetDumpStats()) { compilation_stats_.reset(new OptimizingCompilerStats()); } } -void OptimizingCompiler::UnInit() const { -} - OptimizingCompiler::~OptimizingCompiler() { if (compilation_stats_.get() != nullptr) { compilation_stats_->Log(); @@ -439,33 +456,54 @@ static bool IsInstructionSetSupported(InstructionSet instruction_set) { || instruction_set == InstructionSet::kX86_64; } -void OptimizingCompiler::MaybeRunInliner(HGraph* graph, - CodeGenerator* codegen, - const DexCompilationUnit& dex_compilation_unit, - PassObserver* pass_observer, - VariableSizedHandleScope* handles) const { - const CompilerOptions& compiler_options = GetCompilerDriver()->GetCompilerOptions(); - bool should_inline = (compiler_options.GetInlineMaxCodeUnits() > 0); - if (!should_inline) { - return; +bool OptimizingCompiler::RunBaselineOptimizations(HGraph* graph, + CodeGenerator* codegen, + const DexCompilationUnit& dex_compilation_unit, + PassObserver* pass_observer, + VariableSizedHandleScope* handles) const { + switch (codegen->GetCompilerOptions().GetInstructionSet()) { +#ifdef ART_ENABLE_CODEGEN_mips + case InstructionSet::kMips: { + OptimizationDef mips_optimizations[] = { + OptDef(OptimizationPass::kPcRelativeFixupsMips) + }; + return RunOptimizations(graph, + codegen, + dex_compilation_unit, + pass_observer, + handles, + mips_optimizations); + } +#endif +#ifdef ART_ENABLE_CODEGEN_x86 + case InstructionSet::kX86: { + OptimizationDef x86_optimizations[] = { + OptDef(OptimizationPass::kPcRelativeFixupsX86), + }; + return RunOptimizations(graph, + codegen, + dex_compilation_unit, + pass_observer, + handles, + x86_optimizations); + } +#endif + default: + UNUSED(graph); + UNUSED(codegen); + UNUSED(dex_compilation_unit); + UNUSED(pass_observer); + UNUSED(handles); + return false; } - OptimizationDef optimizations[] = { - OptDef(OptimizationPass::kInliner) - }; - RunOptimizations(graph, - codegen, - dex_compilation_unit, - pass_observer, - handles, - optimizations); } -void OptimizingCompiler::RunArchOptimizations(HGraph* graph, +bool OptimizingCompiler::RunArchOptimizations(HGraph* graph, CodeGenerator* codegen, const DexCompilationUnit& dex_compilation_unit, PassObserver* pass_observer, VariableSizedHandleScope* handles) const { - switch (GetCompilerDriver()->GetInstructionSet()) { + switch (codegen->GetCompilerOptions().GetInstructionSet()) { #if defined(ART_ENABLE_CODEGEN_arm) case InstructionSet::kThumb2: case InstructionSet::kArm: { @@ -475,13 +513,12 @@ void OptimizingCompiler::RunArchOptimizations(HGraph* graph, OptDef(OptimizationPass::kGlobalValueNumbering, "GVN$after_arch"), OptDef(OptimizationPass::kScheduling) }; - RunOptimizations(graph, - codegen, - dex_compilation_unit, - pass_observer, - handles, - arm_optimizations); - break; + return RunOptimizations(graph, + codegen, + dex_compilation_unit, + pass_observer, + handles, + arm_optimizations); } #endif #ifdef ART_ENABLE_CODEGEN_arm64 @@ -492,13 +529,12 @@ void OptimizingCompiler::RunArchOptimizations(HGraph* graph, OptDef(OptimizationPass::kGlobalValueNumbering, "GVN$after_arch"), OptDef(OptimizationPass::kScheduling) }; - RunOptimizations(graph, - codegen, - dex_compilation_unit, - pass_observer, - handles, - arm64_optimizations); - break; + return RunOptimizations(graph, + codegen, + dex_compilation_unit, + pass_observer, + handles, + arm64_optimizations); } #endif #ifdef ART_ENABLE_CODEGEN_mips @@ -509,13 +545,12 @@ void OptimizingCompiler::RunArchOptimizations(HGraph* graph, OptDef(OptimizationPass::kGlobalValueNumbering, "GVN$after_arch"), OptDef(OptimizationPass::kPcRelativeFixupsMips) }; - RunOptimizations(graph, - codegen, - dex_compilation_unit, - pass_observer, - handles, - mips_optimizations); - break; + return RunOptimizations(graph, + codegen, + dex_compilation_unit, + pass_observer, + handles, + mips_optimizations); } #endif #ifdef ART_ENABLE_CODEGEN_mips64 @@ -524,50 +559,49 @@ void OptimizingCompiler::RunArchOptimizations(HGraph* graph, OptDef(OptimizationPass::kSideEffectsAnalysis), OptDef(OptimizationPass::kGlobalValueNumbering, "GVN$after_arch") }; - RunOptimizations(graph, - codegen, - dex_compilation_unit, - pass_observer, - handles, - mips64_optimizations); - break; + return RunOptimizations(graph, + codegen, + dex_compilation_unit, + pass_observer, + handles, + mips64_optimizations); } #endif #ifdef ART_ENABLE_CODEGEN_x86 case InstructionSet::kX86: { OptimizationDef x86_optimizations[] = { + OptDef(OptimizationPass::kInstructionSimplifierX86), OptDef(OptimizationPass::kSideEffectsAnalysis), OptDef(OptimizationPass::kGlobalValueNumbering, "GVN$after_arch"), OptDef(OptimizationPass::kPcRelativeFixupsX86), OptDef(OptimizationPass::kX86MemoryOperandGeneration) }; - RunOptimizations(graph, - codegen, - dex_compilation_unit, - pass_observer, - handles, - x86_optimizations); - break; + return RunOptimizations(graph, + codegen, + dex_compilation_unit, + pass_observer, + handles, + x86_optimizations); } #endif #ifdef ART_ENABLE_CODEGEN_x86_64 case InstructionSet::kX86_64: { OptimizationDef x86_64_optimizations[] = { + OptDef(OptimizationPass::kInstructionSimplifierX86_64), OptDef(OptimizationPass::kSideEffectsAnalysis), OptDef(OptimizationPass::kGlobalValueNumbering, "GVN$after_arch"), OptDef(OptimizationPass::kX86MemoryOperandGeneration) }; - RunOptimizations(graph, - codegen, - dex_compilation_unit, - pass_observer, - handles, - x86_64_optimizations); - break; + return RunOptimizations(graph, + codegen, + dex_compilation_unit, + pass_observer, + handles, + x86_64_optimizations); } #endif default: - break; + return false; } } @@ -580,7 +614,7 @@ static void AllocateRegisters(HGraph* graph, { PassScope scope(PrepareForRegisterAllocation::kPrepareForRegisterAllocationPassName, pass_observer); - PrepareForRegisterAllocation(graph, stats).Run(); + PrepareForRegisterAllocation(graph, codegen->GetCompilerOptions(), stats).Run(); } // Use local allocator shared by SSA liveness analysis and register allocator. // (Register allocator creates new objects in the liveness data.) @@ -609,16 +643,16 @@ void OptimizingCompiler::RunOptimizations(HGraph* graph, const DexCompilationUnit& dex_compilation_unit, PassObserver* pass_observer, VariableSizedHandleScope* handles) const { - const std::vector<std::string>* pass_names = - GetCompilerDriver()->GetCompilerOptions().GetPassesToRun(); + const std::vector<std::string>* pass_names = GetCompilerOptions().GetPassesToRun(); if (pass_names != nullptr) { // If passes were defined on command-line, build the optimization // passes and run these instead of the built-in optimizations. + // TODO: a way to define depends_on via command-line? const size_t length = pass_names->size(); std::vector<OptimizationDef> optimizations; for (const std::string& pass_name : *pass_names) { std::string opt_name = ConvertPassNameToOptimizationName(pass_name); - optimizations.push_back(OptDef(OptimizationPassByName(opt_name.c_str()), pass_name.c_str())); + optimizations.push_back(OptDef(OptimizationPassByName(opt_name), pass_name.c_str())); } RunOptimizations(graph, codegen, @@ -630,49 +664,62 @@ void OptimizingCompiler::RunOptimizations(HGraph* graph, return; } - OptimizationDef optimizations1[] = { - OptDef(OptimizationPass::kIntrinsicsRecognizer), - OptDef(OptimizationPass::kSharpening), + OptimizationDef optimizations[] = { + // Initial optimizations. OptDef(OptimizationPass::kConstantFolding), OptDef(OptimizationPass::kInstructionSimplifier), - OptDef(OptimizationPass::kDeadCodeElimination, "dead_code_elimination$initial") - }; - RunOptimizations(graph, - codegen, - dex_compilation_unit, - pass_observer, - handles, - optimizations1); - - MaybeRunInliner(graph, codegen, dex_compilation_unit, pass_observer, handles); - - OptimizationDef optimizations2[] = { - // SelectGenerator depends on the InstructionSimplifier removing - // redundant suspend checks to recognize empty blocks. - OptDef(OptimizationPass::kSelectGenerator), - // TODO: if we don't inline we can also skip fold2. - OptDef(OptimizationPass::kConstantFolding, "constant_folding$after_inlining"), - OptDef(OptimizationPass::kInstructionSimplifier, "instruction_simplifier$after_inlining"), - OptDef(OptimizationPass::kDeadCodeElimination, "dead_code_elimination$after_inlining"), - OptDef(OptimizationPass::kSideEffectsAnalysis, "side_effects$before_gvn"), + OptDef(OptimizationPass::kDeadCodeElimination, + "dead_code_elimination$initial"), + // Inlining. + OptDef(OptimizationPass::kInliner), + // Simplification (only if inlining occurred). + OptDef(OptimizationPass::kConstantFolding, + "constant_folding$after_inlining", + OptimizationPass::kInliner), + OptDef(OptimizationPass::kInstructionSimplifier, + "instruction_simplifier$after_inlining", + OptimizationPass::kInliner), + OptDef(OptimizationPass::kDeadCodeElimination, + "dead_code_elimination$after_inlining", + OptimizationPass::kInliner), + // GVN. + OptDef(OptimizationPass::kSideEffectsAnalysis, + "side_effects$before_gvn"), OptDef(OptimizationPass::kGlobalValueNumbering), + // Simplification (TODO: only if GVN occurred). + OptDef(OptimizationPass::kSelectGenerator), + OptDef(OptimizationPass::kConstantFolding, + "constant_folding$after_gvn"), + OptDef(OptimizationPass::kInstructionSimplifier, + "instruction_simplifier$after_gvn"), + OptDef(OptimizationPass::kDeadCodeElimination, + "dead_code_elimination$after_gvn"), + // High-level optimizations. + OptDef(OptimizationPass::kSideEffectsAnalysis, + "side_effects$before_licm"), OptDef(OptimizationPass::kInvariantCodeMotion), OptDef(OptimizationPass::kInductionVarAnalysis), OptDef(OptimizationPass::kBoundsCheckElimination), OptDef(OptimizationPass::kLoopOptimization), - // Evaluates code generated by dynamic bce. - OptDef(OptimizationPass::kConstantFolding, "constant_folding$after_bce"), - OptDef(OptimizationPass::kInstructionSimplifier, "instruction_simplifier$after_bce"), - OptDef(OptimizationPass::kSideEffectsAnalysis, "side_effects$before_lse"), + // Simplification. + OptDef(OptimizationPass::kConstantFolding, + "constant_folding$after_bce"), + OptDef(OptimizationPass::kInstructionSimplifier, + "instruction_simplifier$after_bce"), + // Other high-level optimizations. + OptDef(OptimizationPass::kSideEffectsAnalysis, + "side_effects$before_lse"), OptDef(OptimizationPass::kLoadStoreAnalysis), OptDef(OptimizationPass::kLoadStoreElimination), OptDef(OptimizationPass::kCHAGuardOptimization), - OptDef(OptimizationPass::kDeadCodeElimination, "dead_code_elimination$final"), + OptDef(OptimizationPass::kDeadCodeElimination, + "dead_code_elimination$final"), OptDef(OptimizationPass::kCodeSinking), // The codegen has a few assumptions that only the instruction simplifier // can satisfy. For example, the code generator does not expect to see a // HTypeConversion from a type to the same type. - OptDef(OptimizationPass::kInstructionSimplifier, "instruction_simplifier$before_codegen"), + OptDef(OptimizationPass::kInstructionSimplifier, + "instruction_simplifier$before_codegen"), // Eliminate constructor fences after code sinking to avoid // complicated sinking logic to split a fence with many inputs. OptDef(OptimizationPass::kConstructorFenceRedundancyElimination) @@ -682,7 +729,7 @@ void OptimizingCompiler::RunOptimizations(HGraph* graph, dex_compilation_unit, pass_observer, handles, - optimizations2); + optimizations); RunArchOptimizations(graph, codegen, dex_compilation_unit, pass_observer, handles); } @@ -703,34 +750,28 @@ static ArenaVector<linker::LinkerPatch> EmitAndSortLinkerPatches(CodeGenerator* CompiledMethod* OptimizingCompiler::Emit(ArenaAllocator* allocator, CodeVectorAllocator* code_allocator, CodeGenerator* codegen, - const DexFile::CodeItem* code_item_for_osr_check) const { + const dex::CodeItem* code_item_for_osr_check) const { ArenaVector<linker::LinkerPatch> linker_patches = EmitAndSortLinkerPatches(codegen); - ArenaVector<uint8_t> stack_map(allocator->Adapter(kArenaAllocStackMaps)); - ArenaVector<uint8_t> method_info(allocator->Adapter(kArenaAllocStackMaps)); - size_t stack_map_size = 0; - size_t method_info_size = 0; - codegen->ComputeStackMapAndMethodInfoSize(&stack_map_size, &method_info_size); - stack_map.resize(stack_map_size); - method_info.resize(method_info_size); - codegen->BuildStackMaps(MemoryRegion(stack_map.data(), stack_map.size()), - MemoryRegion(method_info.data(), method_info.size()), - code_item_for_osr_check); + ScopedArenaVector<uint8_t> stack_map = codegen->BuildStackMaps(code_item_for_osr_check); + CompiledMethodStorage* storage = GetCompiledMethodStorage(); CompiledMethod* compiled_method = CompiledMethod::SwapAllocCompiledMethod( - GetCompilerDriver(), + storage, codegen->GetInstructionSet(), - ArrayRef<const uint8_t>(code_allocator->GetMemory()), - // Follow Quick's behavior and set the frame size to zero if it is - // considered "empty" (see the definition of - // art::CodeGenerator::HasEmptyFrame). - codegen->HasEmptyFrame() ? 0 : codegen->GetFrameSize(), - codegen->GetCoreSpillMask(), - codegen->GetFpuSpillMask(), - ArrayRef<const uint8_t>(method_info), + code_allocator->GetMemory(), ArrayRef<const uint8_t>(stack_map), ArrayRef<const uint8_t>(*codegen->GetAssembler()->cfi().data()), ArrayRef<const linker::LinkerPatch>(linker_patches)); + for (const linker::LinkerPatch& patch : linker_patches) { + if (codegen->NeedsThunkCode(patch) && storage->GetThunkCode(patch).empty()) { + ArenaVector<uint8_t> code(allocator->Adapter()); + std::string debug_name; + codegen->EmitThunkCode(patch, &code, &debug_name); + storage->SetThunkCode(patch, ArrayRef<const uint8_t>(code), debug_name); + } + } + return compiled_method; } @@ -739,14 +780,15 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* allocator, CodeVectorAllocator* code_allocator, const DexCompilationUnit& dex_compilation_unit, ArtMethod* method, + bool baseline, bool osr, VariableSizedHandleScope* handles) const { MaybeRecordStat(compilation_stats_.get(), MethodCompilationStat::kAttemptBytecodeCompilation); - CompilerDriver* compiler_driver = GetCompilerDriver(); - InstructionSet instruction_set = compiler_driver->GetInstructionSet(); + const CompilerOptions& compiler_options = GetCompilerOptions(); + InstructionSet instruction_set = compiler_options.GetInstructionSet(); const DexFile& dex_file = *dex_compilation_unit.GetDexFile(); uint32_t method_idx = dex_compilation_unit.GetDexMethodIndex(); - const DexFile::CodeItem* code_item = dex_compilation_unit.GetCodeItem(); + const dex::CodeItem* code_item = dex_compilation_unit.GetCodeItem(); // Always use the Thumb-2 assembler: some runtime functionality // (like implicit stack overflow checks) assume Thumb-2. @@ -767,7 +809,6 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* allocator, // Implementation of the space filter: do not compile a code item whose size in // code units is bigger than 128. static constexpr size_t kSpaceFilterOptimizingThreshold = 128; - const CompilerOptions& compiler_options = compiler_driver->GetCompilerOptions(); if ((compiler_options.GetCompilerFilter() == CompilerFilter::kSpace) && (CodeItemInstructionAccessor(dex_file, code_item).InsnsSizeInCodeUnits() > kSpaceFilterOptimizingThreshold)) { @@ -776,43 +817,58 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* allocator, } CodeItemDebugInfoAccessor code_item_accessor(dex_file, code_item, method_idx); + + bool dead_reference_safe; + ArrayRef<const uint8_t> interpreter_metadata; + // For AOT compilation, we may not get a method, for example if its class is erroneous, + // possibly due to an unavailable superclass. JIT should always have a method. + DCHECK(Runtime::Current()->IsAotCompiler() || method != nullptr); + if (method != nullptr) { + const dex::ClassDef* containing_class; + { + ScopedObjectAccess soa(Thread::Current()); + containing_class = &method->GetClassDef(); + interpreter_metadata = method->GetQuickenedInfo(); + } + // MethodContainsRSensitiveAccess is currently slow, but HasDeadReferenceSafeAnnotation() + // is currently rarely true. + dead_reference_safe = + annotations::HasDeadReferenceSafeAnnotation(dex_file, *containing_class) + && !annotations::MethodContainsRSensitiveAccess(dex_file, *containing_class, method_idx); + } else { + // If we could not resolve the class, conservatively assume it's dead-reference unsafe. + dead_reference_safe = false; + } + HGraph* graph = new (allocator) HGraph( allocator, arena_stack, dex_file, method_idx, - compiler_driver->GetInstructionSet(), + compiler_options.GetInstructionSet(), kInvalidInvokeType, - compiler_driver->GetCompilerOptions().GetDebuggable(), - osr); + dead_reference_safe, + compiler_options.GetDebuggable(), + /* osr= */ osr); - ArrayRef<const uint8_t> interpreter_metadata; - // For AOT compilation, we may not get a method, for example if its class is erroneous. - // JIT should always have a method. - DCHECK(Runtime::Current()->IsAotCompiler() || method != nullptr); if (method != nullptr) { graph->SetArtMethod(method); - ScopedObjectAccess soa(Thread::Current()); - interpreter_metadata = method->GetQuickenedInfo(); } std::unique_ptr<CodeGenerator> codegen( CodeGenerator::Create(graph, - instruction_set, - *compiler_driver->GetInstructionSetFeatures(), - compiler_driver->GetCompilerOptions(), + compiler_options, compilation_stats_.get())); if (codegen.get() == nullptr) { MaybeRecordStat(compilation_stats_.get(), MethodCompilationStat::kNotCompiledNoCodegen); return nullptr; } - codegen->GetAssembler()->cfi().SetEnabled( - compiler_driver->GetCompilerOptions().GenerateAnyDebugInfo()); + codegen->GetAssembler()->cfi().SetEnabled(compiler_options.GenerateAnyDebugInfo()); PassObserver pass_observer(graph, codegen.get(), visualizer_output_.get(), - compiler_driver, + compiler_options, dump_mutex_); { @@ -822,7 +878,6 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* allocator, code_item_accessor, &dex_compilation_unit, &dex_compilation_unit, - compiler_driver, codegen.get(), compilation_stats_.get(), interpreter_metadata, @@ -833,23 +888,28 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* allocator, case kAnalysisSkipped: { MaybeRecordStat(compilation_stats_.get(), MethodCompilationStat::kNotCompiledSkipped); - } break; + } case kAnalysisInvalidBytecode: { MaybeRecordStat(compilation_stats_.get(), MethodCompilationStat::kNotCompiledInvalidBytecode); - } break; + } case kAnalysisFailThrowCatchLoop: { MaybeRecordStat(compilation_stats_.get(), MethodCompilationStat::kNotCompiledThrowCatchLoop); - } break; + } case kAnalysisFailAmbiguousArrayOp: { MaybeRecordStat(compilation_stats_.get(), MethodCompilationStat::kNotCompiledAmbiguousArrayOp); + break; } + case kAnalysisFailIrreducibleLoopAndStringInit: { + MaybeRecordStat(compilation_stats_.get(), + MethodCompilationStat::kNotCompiledIrreducibleLoopAndStringInit); break; + } case kAnalysisSuccess: UNREACHABLE(); } @@ -858,11 +918,11 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* allocator, } } - RunOptimizations(graph, - codegen.get(), - dex_compilation_unit, - &pass_observer, - handles); + if (baseline) { + RunBaselineOptimizations(graph, codegen.get(), dex_compilation_unit, &pass_observer, handles); + } else { + RunOptimizations(graph, codegen.get(), dex_compilation_unit, &pass_observer, handles); + } RegisterAllocator::Strategy regalloc_strategy = compiler_options.GetRegisterAllocationStrategy(); @@ -887,8 +947,8 @@ CodeGenerator* OptimizingCompiler::TryCompileIntrinsic( ArtMethod* method, VariableSizedHandleScope* handles) const { MaybeRecordStat(compilation_stats_.get(), MethodCompilationStat::kAttemptIntrinsicCompilation); - CompilerDriver* compiler_driver = GetCompilerDriver(); - InstructionSet instruction_set = compiler_driver->GetInstructionSet(); + const CompilerOptions& compiler_options = GetCompilerOptions(); + InstructionSet instruction_set = compiler_options.GetInstructionSet(); const DexFile& dex_file = *dex_compilation_unit.GetDexFile(); uint32_t method_idx = dex_compilation_unit.GetDexMethodIndex(); @@ -906,10 +966,11 @@ CodeGenerator* OptimizingCompiler::TryCompileIntrinsic( arena_stack, dex_file, method_idx, - compiler_driver->GetInstructionSet(), + compiler_options.GetInstructionSet(), kInvalidInvokeType, - compiler_driver->GetCompilerOptions().GetDebuggable(), - /* osr */ false); + /* dead_reference_safe= */ true, // Intrinsics don't affect dead reference safety. + compiler_options.GetDebuggable(), + /* osr= */ false); DCHECK(Runtime::Current()->IsAotCompiler()); DCHECK(method != nullptr); @@ -917,20 +978,17 @@ CodeGenerator* OptimizingCompiler::TryCompileIntrinsic( std::unique_ptr<CodeGenerator> codegen( CodeGenerator::Create(graph, - instruction_set, - *compiler_driver->GetInstructionSetFeatures(), - compiler_driver->GetCompilerOptions(), + compiler_options, compilation_stats_.get())); if (codegen.get() == nullptr) { return nullptr; } - codegen->GetAssembler()->cfi().SetEnabled( - compiler_driver->GetCompilerOptions().GenerateAnyDebugInfo()); + codegen->GetAssembler()->cfi().SetEnabled(compiler_options.GenerateAnyDebugInfo()); PassObserver pass_observer(graph, codegen.get(), visualizer_output_.get(), - compiler_driver, + compiler_options, dump_mutex_); { @@ -940,18 +998,16 @@ CodeGenerator* OptimizingCompiler::TryCompileIntrinsic( CodeItemDebugInfoAccessor(), // Null code item. &dex_compilation_unit, &dex_compilation_unit, - compiler_driver, codegen.get(), compilation_stats_.get(), - /* interpreter_metadata */ ArrayRef<const uint8_t>(), + /* interpreter_metadata= */ ArrayRef<const uint8_t>(), handles); builder.BuildIntrinsicGraph(method); } OptimizationDef optimizations[] = { - OptDef(OptimizationPass::kIntrinsicsRecognizer), - // Some intrinsics are converted to HIR by the simplifier and the codegen also - // has a few assumptions that only the instruction simplifier can satisfy. + // The codegen has a few assumptions that only the instruction simplifier + // can satisfy. OptDef(OptimizationPass::kInstructionSimplifier), }; RunOptimizations(graph, @@ -966,7 +1022,7 @@ CodeGenerator* OptimizingCompiler::TryCompileIntrinsic( AllocateRegisters(graph, codegen.get(), &pass_observer, - compiler_driver->GetCompilerOptions().GetRegisterAllocationStrategy(), + compiler_options.GetRegisterAllocationStrategy(), compilation_stats_.get()); if (!codegen->IsLeafMethod()) { VLOG(compiler) << "Intrinsic method is not leaf: " << method->GetIntrinsic() @@ -983,7 +1039,7 @@ CodeGenerator* OptimizingCompiler::TryCompileIntrinsic( return codegen.release(); } -CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, +CompiledMethod* OptimizingCompiler::Compile(const dex::CodeItem* code_item, uint32_t access_flags, InvokeType invoke_type, uint16_t class_def_idx, @@ -991,13 +1047,13 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, Handle<mirror::ClassLoader> jclass_loader, const DexFile& dex_file, Handle<mirror::DexCache> dex_cache) const { - CompilerDriver* compiler_driver = GetCompilerDriver(); + const CompilerOptions& compiler_options = GetCompilerOptions(); CompiledMethod* compiled_method = nullptr; Runtime* runtime = Runtime::Current(); DCHECK(runtime->IsAotCompiler()); - const VerifiedMethod* verified_method = compiler_driver->GetVerifiedMethod(&dex_file, method_idx); + const VerifiedMethod* verified_method = compiler_options.GetVerifiedMethod(&dex_file, method_idx); DCHECK(!verified_method->HasRuntimeThrow()); - if (compiler_driver->IsMethodVerifiedWithoutFailures(method_idx, class_def_idx, dex_file) || + if (compiler_options.IsMethodVerifiedWithoutFailures(method_idx, class_def_idx, dex_file) || verifier::CanCompilerHandleVerificationFailure( verified_method->GetEncounteredVerificationFailures())) { ArenaAllocator allocator(runtime->GetArenaPool()); @@ -1006,6 +1062,15 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, std::unique_ptr<CodeGenerator> codegen; bool compiled_intrinsic = false; { + ScopedObjectAccess soa(Thread::Current()); + ArtMethod* method = + runtime->GetClassLinker()->ResolveMethod<ClassLinker::ResolveMode::kCheckICCEAndIAE>( + method_idx, dex_cache, jclass_loader, /*referrer=*/ nullptr, invoke_type); + DCHECK_EQ(method == nullptr, soa.Self()->IsExceptionPending()); + soa.Self()->ClearException(); // Suppress exception if any. + VariableSizedHandleScope handles(soa.Self()); + Handle<mirror::Class> compiling_class = + handles.NewHandle(method != nullptr ? method->GetDeclaringClass() : nullptr); DexCompilationUnit dex_compilation_unit( jclass_loader, runtime->GetClassLinker(), @@ -1014,16 +1079,13 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, class_def_idx, method_idx, access_flags, - /* verified_method */ nullptr, // Not needed by the Optimizing compiler. - dex_cache); - ScopedObjectAccess soa(Thread::Current()); - ArtMethod* method = compiler_driver->ResolveMethod( - soa, dex_cache, jclass_loader, &dex_compilation_unit, method_idx, invoke_type); - VariableSizedHandleScope handles(soa.Self()); + /*verified_method=*/ nullptr, // Not needed by the Optimizing compiler. + dex_cache, + compiling_class); // Go to native so that we don't block GC during compilation. ScopedThreadSuspension sts(soa.Self(), kNative); if (method != nullptr && UNLIKELY(method->IsIntrinsic())) { - DCHECK(compiler_driver->GetCompilerOptions().IsBootImage()); + DCHECK(compiler_options.IsBootImage()); codegen.reset( TryCompileIntrinsic(&allocator, &arena_stack, @@ -1042,7 +1104,8 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, &code_allocator, dex_compilation_unit, method, - /* osr */ false, + compiler_options.IsBaseline(), + /* osr= */ false, &handles)); } } @@ -1070,7 +1133,7 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, } } else { MethodCompilationStat method_stat; - if (compiler_driver->GetCompilerOptions().VerifyAtRuntime()) { + if (compiler_options.VerifyAtRuntime()) { method_stat = MethodCompilationStat::kNotCompiledVerifyAtRuntime; } else { method_stat = MethodCompilationStat::kNotCompiledVerificationError; @@ -1079,8 +1142,8 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, } if (kIsDebugBuild && - IsCompilingWithCoreImage() && - IsInstructionSetSupported(compiler_driver->GetInstructionSet())) { + compiler_options.CompilingWithCoreImage() && + IsInstructionSetSupported(compiler_options.GetInstructionSet())) { // For testing purposes, we put a special marker on method names // that should be compiled with this compiler (when the // instruction set is supported). This makes sure we're not @@ -1093,31 +1156,50 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, return compiled_method; } +static ScopedArenaVector<uint8_t> CreateJniStackMap(ScopedArenaAllocator* allocator, + const JniCompiledMethod& jni_compiled_method) { + // StackMapStream is quite large, so allocate it using the ScopedArenaAllocator + // to stay clear of the frame size limit. + std::unique_ptr<StackMapStream> stack_map_stream( + new (allocator) StackMapStream(allocator, jni_compiled_method.GetInstructionSet())); + stack_map_stream->BeginMethod( + jni_compiled_method.GetFrameSize(), + jni_compiled_method.GetCoreSpillMask(), + jni_compiled_method.GetFpSpillMask(), + /* num_dex_registers= */ 0); + stack_map_stream->EndMethod(); + return stack_map_stream->Encode(); +} + CompiledMethod* OptimizingCompiler::JniCompile(uint32_t access_flags, uint32_t method_idx, const DexFile& dex_file, Handle<mirror::DexCache> dex_cache) const { - if (GetCompilerDriver()->GetCompilerOptions().IsBootImage()) { + Runtime* runtime = Runtime::Current(); + ArenaAllocator allocator(runtime->GetArenaPool()); + ArenaStack arena_stack(runtime->GetArenaPool()); + + const CompilerOptions& compiler_options = GetCompilerOptions(); + if (compiler_options.IsBootImage()) { ScopedObjectAccess soa(Thread::Current()); - Runtime* runtime = Runtime::Current(); ArtMethod* method = runtime->GetClassLinker()->LookupResolvedMethod( - method_idx, dex_cache.Get(), /* class_loader */ nullptr); + method_idx, dex_cache.Get(), /*class_loader=*/ nullptr); if (method != nullptr && UNLIKELY(method->IsIntrinsic())) { + VariableSizedHandleScope handles(soa.Self()); ScopedNullHandle<mirror::ClassLoader> class_loader; // null means boot class path loader. + Handle<mirror::Class> compiling_class = handles.NewHandle(method->GetDeclaringClass()); DexCompilationUnit dex_compilation_unit( class_loader, runtime->GetClassLinker(), dex_file, - /* code_item */ nullptr, - /* class_def_idx */ DexFile::kDexNoIndex16, + /*code_item=*/ nullptr, + /*class_def_idx=*/ DexFile::kDexNoIndex16, method_idx, access_flags, - /* verified_method */ nullptr, - dex_cache); - ArenaAllocator allocator(runtime->GetArenaPool()); - ArenaStack arena_stack(runtime->GetArenaPool()); + /*verified_method=*/ nullptr, + dex_cache, + compiling_class); CodeVectorAllocator code_allocator(&allocator); - VariableSizedHandleScope handles(soa.Self()); // Go to native so that we don't block GC during compilation. ScopedThreadSuspension sts(soa.Self(), kNative); std::unique_ptr<CodeGenerator> codegen( @@ -1131,7 +1213,7 @@ CompiledMethod* OptimizingCompiler::JniCompile(uint32_t access_flags, CompiledMethod* compiled_method = Emit(&allocator, &code_allocator, codegen.get(), - /* code_item_for_osr_check */ nullptr); + /* item= */ nullptr); compiled_method->MarkAsIntrinsic(); return compiled_method; } @@ -1139,28 +1221,24 @@ CompiledMethod* OptimizingCompiler::JniCompile(uint32_t access_flags, } JniCompiledMethod jni_compiled_method = ArtQuickJniCompileMethod( - GetCompilerDriver(), access_flags, method_idx, dex_file); + compiler_options, access_flags, method_idx, dex_file); MaybeRecordStat(compilation_stats_.get(), MethodCompilationStat::kCompiledNativeStub); + + ScopedArenaAllocator stack_map_allocator(&arena_stack); // Will hold the stack map. + ScopedArenaVector<uint8_t> stack_map = CreateJniStackMap(&stack_map_allocator, + jni_compiled_method); return CompiledMethod::SwapAllocCompiledMethod( - GetCompilerDriver(), + GetCompiledMethodStorage(), jni_compiled_method.GetInstructionSet(), jni_compiled_method.GetCode(), - jni_compiled_method.GetFrameSize(), - jni_compiled_method.GetCoreSpillMask(), - jni_compiled_method.GetFpSpillMask(), - /* method_info */ ArrayRef<const uint8_t>(), - /* vmap_table */ ArrayRef<const uint8_t>(), + ArrayRef<const uint8_t>(stack_map), jni_compiled_method.GetCfi(), - /* patches */ ArrayRef<const linker::LinkerPatch>()); + /* patches= */ ArrayRef<const linker::LinkerPatch>()); } -Compiler* CreateOptimizingCompiler(CompilerDriver* driver) { - return new OptimizingCompiler(driver); -} - -bool IsCompilingWithCoreImage() { - const std::string& image = Runtime::Current()->GetImageLocation(); - return CompilerDriver::IsCoreImageFilename(image); +Compiler* CreateOptimizingCompiler(const CompilerOptions& compiler_options, + CompiledMethodStorage* storage) { + return new OptimizingCompiler(compiler_options, storage); } bool EncodeArtMethodInInlineInfo(ArtMethod* method ATTRIBUTE_UNUSED) { @@ -1168,23 +1246,10 @@ bool EncodeArtMethodInInlineInfo(ArtMethod* method ATTRIBUTE_UNUSED) { return Runtime::Current() == nullptr || !Runtime::Current()->IsAotCompiler(); } -bool CanEncodeInlinedMethodInStackMap(const DexFile& caller_dex_file, ArtMethod* callee) { - if (!Runtime::Current()->IsAotCompiler()) { - // JIT can always encode methods in stack maps. - return true; - } - if (IsSameDexFile(caller_dex_file, *callee->GetDexFile())) { - return true; - } - // TODO(ngeoffray): Support more AOT cases for inlining: - // - methods in multidex - // - methods in boot image for on-device non-PIC compilation. - return false; -} - bool OptimizingCompiler::JitCompile(Thread* self, jit::JitCodeCache* code_cache, ArtMethod* method, + bool baseline, bool osr, jit::JitLogger* jit_logger) { StackHandleScope<3> hs(self); @@ -1195,7 +1260,7 @@ bool OptimizingCompiler::JitCompile(Thread* self, const DexFile* dex_file = method->GetDexFile(); const uint16_t class_def_idx = method->GetClassDefIndex(); - const DexFile::CodeItem* code_item = dex_file->GetCodeItem(method->GetCodeItemOffset()); + const dex::CodeItem* code_item = dex_file->GetCodeItem(method->GetCodeItemOffset()); const uint32_t method_idx = method->GetDexMethodIndex(); const uint32_t access_flags = method->GetAccessFlags(); @@ -1203,37 +1268,52 @@ bool OptimizingCompiler::JitCompile(Thread* self, ArenaAllocator allocator(runtime->GetJitArenaPool()); if (UNLIKELY(method->IsNative())) { + const CompilerOptions& compiler_options = GetCompilerOptions(); JniCompiledMethod jni_compiled_method = ArtQuickJniCompileMethod( - GetCompilerDriver(), access_flags, method_idx, *dex_file); - ScopedNullHandle<mirror::ObjectArray<mirror::Object>> roots; + compiler_options, access_flags, method_idx, *dex_file); + std::vector<Handle<mirror::Object>> roots; ArenaSet<ArtMethod*, std::less<ArtMethod*>> cha_single_implementation_list( allocator.Adapter(kArenaAllocCHA)); + ArenaStack arena_stack(runtime->GetJitArenaPool()); + // StackMapStream is large and it does not fit into this frame, so we need helper method. + ScopedArenaAllocator stack_map_allocator(&arena_stack); // Will hold the stack map. + ScopedArenaVector<uint8_t> stack_map = CreateJniStackMap(&stack_map_allocator, + jni_compiled_method); + uint8_t* stack_map_data = nullptr; + uint8_t* roots_data = nullptr; + uint32_t data_size = code_cache->ReserveData(self, + stack_map.size(), + /* number_of_roots= */ 0, + method, + &stack_map_data, + &roots_data); + if (stack_map_data == nullptr || roots_data == nullptr) { + MaybeRecordStat(compilation_stats_.get(), MethodCompilationStat::kJitOutOfMemoryForCommit); + return false; + } + memcpy(stack_map_data, stack_map.data(), stack_map.size()); + const void* code = code_cache->CommitCode( self, method, - /* stack_map_data */ nullptr, - /* method_info_data */ nullptr, - /* roots_data */ nullptr, - jni_compiled_method.GetFrameSize(), - jni_compiled_method.GetCoreSpillMask(), - jni_compiled_method.GetFpSpillMask(), + stack_map_data, + roots_data, jni_compiled_method.GetCode().data(), jni_compiled_method.GetCode().size(), - /* data_size */ 0u, + data_size, osr, roots, - /* has_should_deoptimize_flag */ false, + /* has_should_deoptimize_flag= */ false, cha_single_implementation_list); if (code == nullptr) { return false; } - const CompilerOptions& compiler_options = GetCompilerDriver()->GetCompilerOptions(); if (compiler_options.GenerateAnyDebugInfo()) { const auto* method_header = reinterpret_cast<const OatQuickMethodHeader*>(code); const uintptr_t code_address = reinterpret_cast<uintptr_t>(method_header->GetCode()); debug::MethodDebugInfo info = {}; - DCHECK(info.custom_name.empty()); + info.custom_name = "art_jni_trampoline"; info.dex_file = dex_file; info.class_def_index = class_def_idx; info.dex_method_index = method_idx; @@ -1265,6 +1345,7 @@ bool OptimizingCompiler::JitCompile(Thread* self, std::unique_ptr<CodeGenerator> codegen; { + Handle<mirror::Class> compiling_class = handles.NewHandle(method->GetDeclaringClass()); DexCompilationUnit dex_compilation_unit( class_loader, runtime->GetClassLinker(), @@ -1273,8 +1354,9 @@ bool OptimizingCompiler::JitCompile(Thread* self, class_def_idx, method_idx, access_flags, - /* verified_method */ nullptr, - dex_cache); + /*verified_method=*/ nullptr, + dex_cache, + compiling_class); // Go to native so that we don't block GC during compilation. ScopedThreadSuspension sts(self, kNative); @@ -1284,6 +1366,7 @@ bool OptimizingCompiler::JitCompile(Thread* self, &code_allocator, dex_compilation_unit, method, + baseline, osr, &handles)); if (codegen.get() == nullptr) { @@ -1291,55 +1374,37 @@ bool OptimizingCompiler::JitCompile(Thread* self, } } - size_t stack_map_size = 0; - size_t method_info_size = 0; - codegen->ComputeStackMapAndMethodInfoSize(&stack_map_size, &method_info_size); + ScopedArenaVector<uint8_t> stack_map = codegen->BuildStackMaps(code_item); size_t number_of_roots = codegen->GetNumberOfJitRoots(); - ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); - // We allocate an object array to ensure the JIT roots that we will collect in EmitJitRoots - // will be visible by the GC between EmitLiterals and CommitCode. Once CommitCode is - // executed, this array is not needed. - Handle<mirror::ObjectArray<mirror::Object>> roots( - hs.NewHandle(mirror::ObjectArray<mirror::Object>::Alloc( - self, class_linker->GetClassRoot(ClassLinker::kObjectArrayClass), number_of_roots))); - if (roots == nullptr) { - // Out of memory, just clear the exception to avoid any Java exception uncaught problems. - MaybeRecordStat(compilation_stats_.get(), MethodCompilationStat::kJitOutOfMemoryForCommit); - DCHECK(self->IsExceptionPending()); - self->ClearException(); - return false; - } uint8_t* stack_map_data = nullptr; - uint8_t* method_info_data = nullptr; uint8_t* roots_data = nullptr; uint32_t data_size = code_cache->ReserveData(self, - stack_map_size, - method_info_size, + stack_map.size(), number_of_roots, method, &stack_map_data, - &method_info_data, &roots_data); if (stack_map_data == nullptr || roots_data == nullptr) { MaybeRecordStat(compilation_stats_.get(), MethodCompilationStat::kJitOutOfMemoryForCommit); return false; } - codegen->BuildStackMaps(MemoryRegion(stack_map_data, stack_map_size), - MemoryRegion(method_info_data, method_info_size), - code_item); - codegen->EmitJitRoots(code_allocator.GetData(), roots, roots_data); + memcpy(stack_map_data, stack_map.data(), stack_map.size()); + std::vector<Handle<mirror::Object>> roots; + codegen->EmitJitRoots(code_allocator.GetData(), roots_data, &roots); + // The root Handle<>s filled by the codegen reference entries in the VariableSizedHandleScope. + DCHECK(std::all_of(roots.begin(), + roots.end(), + [&handles](Handle<mirror::Object> root){ + return handles.Contains(root.GetReference()); + })); const void* code = code_cache->CommitCode( self, method, stack_map_data, - method_info_data, roots_data, - codegen->HasEmptyFrame() ? 0 : codegen->GetFrameSize(), - codegen->GetCoreSpillMask(), - codegen->GetFpuSpillMask(), code_allocator.GetMemory().data(), - code_allocator.GetSize(), + code_allocator.GetMemory().size(), data_size, osr, roots, @@ -1352,7 +1417,7 @@ bool OptimizingCompiler::JitCompile(Thread* self, return false; } - const CompilerOptions& compiler_options = GetCompilerDriver()->GetCompilerOptions(); + const CompilerOptions& compiler_options = GetCompilerOptions(); if (compiler_options.GenerateAnyDebugInfo()) { const auto* method_header = reinterpret_cast<const OatQuickMethodHeader*>(code); const uintptr_t code_address = reinterpret_cast<uintptr_t>(method_header->GetCode()); @@ -1369,16 +1434,16 @@ bool OptimizingCompiler::JitCompile(Thread* self, info.is_optimized = true; info.is_code_address_text_relative = false; info.code_address = code_address; - info.code_size = code_allocator.GetSize(); + info.code_size = code_allocator.GetMemory().size(); info.frame_size_in_bytes = method_header->GetFrameSizeInBytes(); - info.code_info = stack_map_size == 0 ? nullptr : stack_map_data; + info.code_info = stack_map.size() == 0 ? nullptr : stack_map_data; info.cfi = ArrayRef<const uint8_t>(*codegen->GetAssembler()->cfi().data()); GenerateJitDebugInfo(method, info); } Runtime::Current()->GetJit()->AddMemoryUsage(method, allocator.BytesUsed()); if (jit_logger != nullptr) { - jit_logger->WriteLog(code, code_allocator.GetSize(), method); + jit_logger->WriteLog(code, code_allocator.GetMemory().size(), method); } if (kArenaAllocatorCountAllocations) { @@ -1397,26 +1462,31 @@ bool OptimizingCompiler::JitCompile(Thread* self, return true; } -void OptimizingCompiler::GenerateJitDebugInfo(ArtMethod* method, debug::MethodDebugInfo info) { - const CompilerOptions& compiler_options = GetCompilerDriver()->GetCompilerOptions(); +void OptimizingCompiler::GenerateJitDebugInfo(ArtMethod* method ATTRIBUTE_UNUSED, + const debug::MethodDebugInfo& info) { + const CompilerOptions& compiler_options = GetCompilerOptions(); DCHECK(compiler_options.GenerateAnyDebugInfo()); - - // If both flags are passed, generate full debug info. - const bool mini_debug_info = !compiler_options.GetGenerateDebugInfo(); - - // Create entry for the single method that we just compiled. - std::vector<uint8_t> elf_file = debug::MakeElfFileForJIT( - GetCompilerDriver()->GetInstructionSet(), - GetCompilerDriver()->GetInstructionSetFeatures(), - mini_debug_info, - ArrayRef<const debug::MethodDebugInfo>(&info, 1)); - MutexLock mu(Thread::Current(), *Locks::native_debug_interface_lock_); - AddNativeDebugInfoForJit(reinterpret_cast<const void*>(info.code_address), elf_file); - - VLOG(jit) - << "JIT mini-debug-info added for " << ArtMethod::PrettyMethod(method) - << " size=" << PrettySize(elf_file.size()) - << " total_size=" << PrettySize(GetJitNativeDebugInfoMemUsage()); + TimingLogger logger("Generate JIT debug info logger", true, VLOG_IS_ON(jit)); + { + TimingLogger::ScopedTiming st("Generate JIT debug info", &logger); + + // If both flags are passed, generate full debug info. + const bool mini_debug_info = !compiler_options.GetGenerateDebugInfo(); + + // Create entry for the single method that we just compiled. + std::vector<uint8_t> elf_file = debug::MakeElfFileForJIT( + compiler_options.GetInstructionSet(), + compiler_options.GetInstructionSetFeatures(), + mini_debug_info, + info); + AddNativeDebugInfoForJit(Thread::Current(), + reinterpret_cast<const void*>(info.code_address), + elf_file, + debug::PackElfFileForJIT, + compiler_options.GetInstructionSet(), + compiler_options.GetInstructionSetFeatures()); + } + Runtime::Current()->GetJit()->AddTimingLogger(logger); } } // namespace art diff --git a/compiler/optimizing/optimizing_compiler.h b/compiler/optimizing/optimizing_compiler.h index d8cea30a6b..cd6d684590 100644 --- a/compiler/optimizing/optimizing_compiler.h +++ b/compiler/optimizing/optimizing_compiler.h @@ -17,26 +17,21 @@ #ifndef ART_COMPILER_OPTIMIZING_OPTIMIZING_COMPILER_H_ #define ART_COMPILER_OPTIMIZING_OPTIMIZING_COMPILER_H_ +#include "base/globals.h" #include "base/mutex.h" -#include "globals.h" namespace art { class ArtMethod; class Compiler; -class CompilerDriver; +class CompiledMethodStorage; +class CompilerOptions; class DexFile; -Compiler* CreateOptimizingCompiler(CompilerDriver* driver); - -// Returns whether we are compiling against a "core" image, which -// is an indicative we are running tests. The compiler will use that -// information for checking invariants. -bool IsCompilingWithCoreImage(); +Compiler* CreateOptimizingCompiler(const CompilerOptions& compiler_options, + CompiledMethodStorage* storage); bool EncodeArtMethodInInlineInfo(ArtMethod* method); -bool CanEncodeInlinedMethodInStackMap(const DexFile& caller_dex_file, ArtMethod* callee) - REQUIRES_SHARED(Locks::mutator_lock_); } // namespace art diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h index 00194ff1fe..ddd57f5f1a 100644 --- a/compiler/optimizing/optimizing_compiler_stats.h +++ b/compiler/optimizing/optimizing_compiler_stats.h @@ -22,9 +22,10 @@ #include <string> #include <type_traits> +#include <android-base/logging.h> + #include "base/atomic.h" #include "base/globals.h" -#include "base/logging.h" // For VLOG_IS_ON. namespace art { @@ -59,6 +60,7 @@ enum class MethodCompilationStat { kNotCompiledUnsupportedIsa, kNotCompiledVerificationError, kNotCompiledVerifyAtRuntime, + kNotCompiledIrreducibleLoopAndStringInit, kInlinedMonomorphicCall, kInlinedPolymorphicCall, kMonomorphicCall, @@ -99,6 +101,7 @@ enum class MethodCompilationStat { kConstructorFenceRemovedLSE, kConstructorFenceRemovedPFRA, kConstructorFenceRemovedCFRE, + kBitstringTypeCheck, kJitOutOfMemoryForCommit, kLastStat }; @@ -124,11 +127,6 @@ class OptimizingCompilerStats { } void Log() const { - if (!kIsDebugBuild && !VLOG_IS_ON(compiler)) { - // Log only in debug builds or if the compiler is verbose. - return; - } - uint32_t compiled_intrinsics = GetStat(MethodCompilationStat::kCompiledIntrinsic); uint32_t compiled_native_stubs = GetStat(MethodCompilationStat::kCompiledNativeStub); uint32_t bytecode_attempts = diff --git a/compiler/optimizing/optimizing_unit_test.h b/compiler/optimizing/optimizing_unit_test.h index 6dcbadba6e..e5f694109a 100644 --- a/compiler/optimizing/optimizing_unit_test.h +++ b/compiler/optimizing/optimizing_unit_test.h @@ -20,6 +20,7 @@ #include <memory> #include <vector> +#include "base/malloc_arena_pool.h" #include "base/scoped_arena_allocator.h" #include "builder.h" #include "common_compiler_test.h" @@ -28,6 +29,7 @@ #include "dex/dex_instruction.h" #include "dex/standard_dex_file.h" #include "driver/dex_compilation_unit.h" +#include "graph_checker.h" #include "handle_scope-inl.h" #include "mirror/class_loader.h" #include "mirror/dex_cache.h" @@ -97,7 +99,7 @@ class ArenaPoolAndAllocator { ScopedArenaAllocator* GetScopedAllocator() { return &scoped_allocator_; } private: - ArenaPool pool_; + MallocArenaPool pool_; ArenaAllocator allocator_; ArenaStack arena_stack_; ScopedArenaAllocator scoped_allocator_; @@ -153,7 +155,7 @@ class OptimizingUnitTestHelper { void* aligned_data = GetAllocator()->Alloc(code_item_size); memcpy(aligned_data, &data[0], code_item_size); CHECK_ALIGNED(aligned_data, StandardDexFile::CodeItem::kAlignment); - const DexFile::CodeItem* code_item = reinterpret_cast<const DexFile::CodeItem*>(aligned_data); + const dex::CodeItem* code_item = reinterpret_cast<const dex::CodeItem*>(aligned_data); { ScopedObjectAccess soa(Thread::Current()); @@ -163,13 +165,13 @@ class OptimizingUnitTestHelper { const DexCompilationUnit* dex_compilation_unit = new (graph->GetAllocator()) DexCompilationUnit( handles_->NewHandle<mirror::ClassLoader>(nullptr), - /* class_linker */ nullptr, + /* class_linker= */ nullptr, graph->GetDexFile(), code_item, - /* class_def_index */ DexFile::kDexNoIndex16, - /* method_idx */ dex::kDexNoIndex, - /* access_flags */ 0u, - /* verified_method */ nullptr, + /* class_def_index= */ DexFile::kDexNoIndex16, + /* method_idx= */ dex::kDexNoIndex, + /* access_flags= */ 0u, + /* verified_method= */ nullptr, handles_->NewHandle<mirror::DexCache>(nullptr)); CodeItemDebugInfoAccessor accessor(graph->GetDexFile(), code_item, /*dex_method_idx*/ 0u); HGraphBuilder builder(graph, dex_compilation_unit, accessor, handles_.get(), return_type); @@ -186,6 +188,77 @@ class OptimizingUnitTestHelper { class OptimizingUnitTest : public CommonCompilerTest, public OptimizingUnitTestHelper {}; +// OptimizingUnitTest with some handy functions to ease the graph creation. +class ImprovedOptimizingUnitTest : public OptimizingUnitTest { + public: + ImprovedOptimizingUnitTest() : graph_(CreateGraph()), + entry_block_(nullptr), + return_block_(nullptr), + exit_block_(nullptr), + parameter_(nullptr) {} + + virtual ~ImprovedOptimizingUnitTest() {} + + void InitGraph() { + entry_block_ = new (GetAllocator()) HBasicBlock(graph_); + graph_->AddBlock(entry_block_); + graph_->SetEntryBlock(entry_block_); + + return_block_ = new (GetAllocator()) HBasicBlock(graph_); + graph_->AddBlock(return_block_); + + exit_block_ = new (GetAllocator()) HBasicBlock(graph_); + graph_->AddBlock(exit_block_); + graph_->SetExitBlock(exit_block_); + + entry_block_->AddSuccessor(return_block_); + return_block_->AddSuccessor(exit_block_); + + parameter_ = new (GetAllocator()) HParameterValue(graph_->GetDexFile(), + dex::TypeIndex(0), + 0, + DataType::Type::kInt32); + entry_block_->AddInstruction(parameter_); + return_block_->AddInstruction(new (GetAllocator()) HReturnVoid()); + exit_block_->AddInstruction(new (GetAllocator()) HExit()); + } + + bool CheckGraph() { + GraphChecker checker(graph_); + checker.Run(); + if (!checker.IsValid()) { + for (const std::string& error : checker.GetErrors()) { + std::cout << error << std::endl; + } + return false; + } + return true; + } + + HEnvironment* ManuallyBuildEnvFor(HInstruction* instruction, + ArenaVector<HInstruction*>* current_locals) { + HEnvironment* environment = new (GetAllocator()) HEnvironment( + (GetAllocator()), + current_locals->size(), + graph_->GetArtMethod(), + instruction->GetDexPc(), + instruction); + + environment->CopyFrom(ArrayRef<HInstruction* const>(*current_locals)); + instruction->SetRawEnvironment(environment); + return environment; + } + + protected: + HGraph* graph_; + + HBasicBlock* entry_block_; + HBasicBlock* return_block_; + HBasicBlock* exit_block_; + + HInstruction* parameter_; +}; + // Naive string diff data type. typedef std::list<std::pair<std::string, std::string>> diff_t; diff --git a/compiler/optimizing/parallel_move_resolver.h b/compiler/optimizing/parallel_move_resolver.h index e6e069f96e..5fadcab402 100644 --- a/compiler/optimizing/parallel_move_resolver.h +++ b/compiler/optimizing/parallel_move_resolver.h @@ -58,7 +58,7 @@ class ParallelMoveResolverWithSwap : public ParallelMoveResolver { virtual ~ParallelMoveResolverWithSwap() {} // Resolve a set of parallel moves, emitting assembler instructions. - void EmitNativeCode(HParallelMove* parallel_move) OVERRIDE; + void EmitNativeCode(HParallelMove* parallel_move) override; protected: class ScratchRegisterScope : public ValueObject { @@ -133,7 +133,7 @@ class ParallelMoveResolverNoSwap : public ParallelMoveResolver { virtual ~ParallelMoveResolverNoSwap() {} // Resolve a set of parallel moves, emitting assembler instructions. - void EmitNativeCode(HParallelMove* parallel_move) OVERRIDE; + void EmitNativeCode(HParallelMove* parallel_move) override; protected: // Called at the beginning of EmitNativeCode(). A subclass may put some architecture dependent diff --git a/compiler/optimizing/parallel_move_test.cc b/compiler/optimizing/parallel_move_test.cc index cb87cabe1c..a8ab6cdd0c 100644 --- a/compiler/optimizing/parallel_move_test.cc +++ b/compiler/optimizing/parallel_move_test.cc @@ -15,6 +15,7 @@ */ #include "base/arena_allocator.h" +#include "base/malloc_arena_pool.h" #include "nodes.h" #include "parallel_move_resolver.h" @@ -55,7 +56,7 @@ class TestParallelMoveResolverWithSwap : public ParallelMoveResolverWithSwap { explicit TestParallelMoveResolverWithSwap(ArenaAllocator* allocator) : ParallelMoveResolverWithSwap(allocator) {} - void EmitMove(size_t index) OVERRIDE { + void EmitMove(size_t index) override { MoveOperands* move = moves_[index]; if (!message_.str().empty()) { message_ << " "; @@ -67,7 +68,7 @@ class TestParallelMoveResolverWithSwap : public ParallelMoveResolverWithSwap { message_ << ")"; } - void EmitSwap(size_t index) OVERRIDE { + void EmitSwap(size_t index) override { MoveOperands* move = moves_[index]; if (!message_.str().empty()) { message_ << " "; @@ -79,8 +80,8 @@ class TestParallelMoveResolverWithSwap : public ParallelMoveResolverWithSwap { message_ << ")"; } - void SpillScratch(int reg ATTRIBUTE_UNUSED) OVERRIDE {} - void RestoreScratch(int reg ATTRIBUTE_UNUSED) OVERRIDE {} + void SpillScratch(int reg ATTRIBUTE_UNUSED) override {} + void RestoreScratch(int reg ATTRIBUTE_UNUSED) override {} std::string GetMessage() const { return message_.str(); @@ -98,13 +99,13 @@ class TestParallelMoveResolverNoSwap : public ParallelMoveResolverNoSwap { explicit TestParallelMoveResolverNoSwap(ArenaAllocator* allocator) : ParallelMoveResolverNoSwap(allocator), scratch_index_(kScratchRegisterStartIndexForTest) {} - void PrepareForEmitNativeCode() OVERRIDE { + void PrepareForEmitNativeCode() override { scratch_index_ = kScratchRegisterStartIndexForTest; } - void FinishEmitNativeCode() OVERRIDE {} + void FinishEmitNativeCode() override {} - Location AllocateScratchLocationFor(Location::Kind kind) OVERRIDE { + Location AllocateScratchLocationFor(Location::Kind kind) override { if (kind == Location::kStackSlot || kind == Location::kFpuRegister || kind == Location::kRegister) { kind = Location::kRegister; @@ -124,9 +125,9 @@ class TestParallelMoveResolverNoSwap : public ParallelMoveResolverNoSwap { return scratch; } - void FreeScratchLocation(Location loc ATTRIBUTE_UNUSED) OVERRIDE {} + void FreeScratchLocation(Location loc ATTRIBUTE_UNUSED) override {} - void EmitMove(size_t index) OVERRIDE { + void EmitMove(size_t index) override { MoveOperands* move = moves_[index]; if (!message_.str().empty()) { message_ << " "; @@ -173,14 +174,14 @@ class ParallelMoveTest : public ::testing::Test { template<> const bool ParallelMoveTest<TestParallelMoveResolverWithSwap>::has_swap = true; template<> const bool ParallelMoveTest<TestParallelMoveResolverNoSwap>::has_swap = false; -typedef ::testing::Types<TestParallelMoveResolverWithSwap, TestParallelMoveResolverNoSwap> - ParallelMoveResolverTestTypes; +using ParallelMoveResolverTestTypes = + ::testing::Types<TestParallelMoveResolverWithSwap, TestParallelMoveResolverNoSwap>; TYPED_TEST_CASE(ParallelMoveTest, ParallelMoveResolverTestTypes); TYPED_TEST(ParallelMoveTest, Dependency) { - ArenaPool pool; + MallocArenaPool pool; ArenaAllocator allocator(&pool); { @@ -207,7 +208,7 @@ TYPED_TEST(ParallelMoveTest, Dependency) { } TYPED_TEST(ParallelMoveTest, Cycle) { - ArenaPool pool; + MallocArenaPool pool; ArenaAllocator allocator(&pool); { @@ -257,7 +258,7 @@ TYPED_TEST(ParallelMoveTest, Cycle) { } TYPED_TEST(ParallelMoveTest, ConstantLast) { - ArenaPool pool; + MallocArenaPool pool; ArenaAllocator allocator(&pool); TypeParam resolver(&allocator); HParallelMove* moves = new (&allocator) HParallelMove(&allocator); @@ -276,7 +277,7 @@ TYPED_TEST(ParallelMoveTest, ConstantLast) { } TYPED_TEST(ParallelMoveTest, Pairs) { - ArenaPool pool; + MallocArenaPool pool; ArenaAllocator allocator(&pool); { @@ -453,7 +454,7 @@ TYPED_TEST(ParallelMoveTest, Pairs) { } TYPED_TEST(ParallelMoveTest, MultiCycles) { - ArenaPool pool; + MallocArenaPool pool; ArenaAllocator allocator(&pool); { @@ -551,7 +552,7 @@ TYPED_TEST(ParallelMoveTest, MultiCycles) { // Test that we do 64bits moves before 32bits moves. TYPED_TEST(ParallelMoveTest, CyclesWith64BitsMoves) { - ArenaPool pool; + MallocArenaPool pool; ArenaAllocator allocator(&pool); { @@ -610,7 +611,7 @@ TYPED_TEST(ParallelMoveTest, CyclesWith64BitsMoves) { } TYPED_TEST(ParallelMoveTest, CyclesWith64BitsMoves2) { - ArenaPool pool; + MallocArenaPool pool; ArenaAllocator allocator(&pool); { diff --git a/compiler/optimizing/pc_relative_fixups_mips.cc b/compiler/optimizing/pc_relative_fixups_mips.cc index 9d5358514e..05208ff65c 100644 --- a/compiler/optimizing/pc_relative_fixups_mips.cc +++ b/compiler/optimizing/pc_relative_fixups_mips.cc @@ -58,7 +58,7 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { DCHECK(base_ != nullptr); } - void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE { + void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) override { // If this is an invoke with PC-relative load kind, // we need to add the base as the special input. if (invoke->HasPcRelativeMethodLoadKind() && @@ -70,13 +70,13 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { } } - void VisitLoadClass(HLoadClass* load_class) OVERRIDE { + void VisitLoadClass(HLoadClass* load_class) override { HLoadClass::LoadKind load_kind = load_class->GetLoadKind(); switch (load_kind) { case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: - case HLoadClass::LoadKind::kBootImageAddress: - case HLoadClass::LoadKind::kBootImageClassTable: + case HLoadClass::LoadKind::kBootImageRelRo: case HLoadClass::LoadKind::kBssEntry: + case HLoadClass::LoadKind::kJitBootImageAddress: // Add a base register for PC-relative literals on R2. InitializePCRelativeBasePointer(); load_class->AddSpecialInput(base_); @@ -86,13 +86,13 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { } } - void VisitLoadString(HLoadString* load_string) OVERRIDE { + void VisitLoadString(HLoadString* load_string) override { HLoadString::LoadKind load_kind = load_string->GetLoadKind(); switch (load_kind) { case HLoadString::LoadKind::kBootImageLinkTimePcRelative: - case HLoadString::LoadKind::kBootImageAddress: - case HLoadString::LoadKind::kBootImageInternTable: + case HLoadString::LoadKind::kBootImageRelRo: case HLoadString::LoadKind::kBssEntry: + case HLoadString::LoadKind::kJitBootImageAddress: // Add a base register for PC-relative literals on R2. InitializePCRelativeBasePointer(); load_string->AddSpecialInput(base_); @@ -102,7 +102,7 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { } } - void VisitPackedSwitch(HPackedSwitch* switch_insn) OVERRIDE { + void VisitPackedSwitch(HPackedSwitch* switch_insn) override { if (switch_insn->GetNumEntries() <= InstructionCodeGeneratorMIPS::kPackedSwitchJumpTableThreshold) { return; @@ -128,20 +128,21 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { HMipsComputeBaseMethodAddress* base_; }; -void PcRelativeFixups::Run() { +bool PcRelativeFixups::Run() { CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen_); if (mips_codegen->GetInstructionSetFeatures().IsR6()) { // Do nothing for R6 because it has PC-relative addressing. - return; + return false; } if (graph_->HasIrreducibleLoops()) { // Do not run this optimization, as irreducible loops do not work with an instruction // that can be live-in at the irreducible loop header. - return; + return false; } PCRelativeHandlerVisitor visitor(graph_, codegen_); visitor.VisitInsertionOrder(); visitor.MoveBaseIfNeeded(); + return true; } } // namespace mips diff --git a/compiler/optimizing/pc_relative_fixups_mips.h b/compiler/optimizing/pc_relative_fixups_mips.h index ec2c711f8d..872370bcb7 100644 --- a/compiler/optimizing/pc_relative_fixups_mips.h +++ b/compiler/optimizing/pc_relative_fixups_mips.h @@ -34,7 +34,7 @@ class PcRelativeFixups : public HOptimization { static constexpr const char* kPcRelativeFixupsMipsPassName = "pc_relative_fixups_mips"; - void Run() OVERRIDE; + bool Run() override; private: CodeGenerator* codegen_; diff --git a/compiler/optimizing/pc_relative_fixups_x86.cc b/compiler/optimizing/pc_relative_fixups_x86.cc index f92f4b274a..1d8d1a6e90 100644 --- a/compiler/optimizing/pc_relative_fixups_x86.cc +++ b/compiler/optimizing/pc_relative_fixups_x86.cc @@ -41,60 +41,54 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { } private: - void VisitAdd(HAdd* add) OVERRIDE { + void VisitAdd(HAdd* add) override { BinaryFP(add); } - void VisitSub(HSub* sub) OVERRIDE { + void VisitSub(HSub* sub) override { BinaryFP(sub); } - void VisitMul(HMul* mul) OVERRIDE { + void VisitMul(HMul* mul) override { BinaryFP(mul); } - void VisitDiv(HDiv* div) OVERRIDE { + void VisitDiv(HDiv* div) override { BinaryFP(div); } - void VisitCompare(HCompare* compare) OVERRIDE { + void VisitCompare(HCompare* compare) override { BinaryFP(compare); } - void VisitReturn(HReturn* ret) OVERRIDE { + void VisitReturn(HReturn* ret) override { HConstant* value = ret->InputAt(0)->AsConstant(); if ((value != nullptr && DataType::IsFloatingPointType(value->GetType()))) { ReplaceInput(ret, value, 0, true); } } - void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE { + void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) override { HandleInvoke(invoke); } - void VisitInvokeVirtual(HInvokeVirtual* invoke) OVERRIDE { + void VisitInvokeVirtual(HInvokeVirtual* invoke) override { HandleInvoke(invoke); } - void VisitInvokeInterface(HInvokeInterface* invoke) OVERRIDE { + void VisitInvokeInterface(HInvokeInterface* invoke) override { HandleInvoke(invoke); } - void VisitLoadClass(HLoadClass* load_class) OVERRIDE { - HLoadClass::LoadKind load_kind = load_class->GetLoadKind(); - if (load_kind == HLoadClass::LoadKind::kBootImageLinkTimePcRelative || - load_kind == HLoadClass::LoadKind::kBootImageClassTable || - load_kind == HLoadClass::LoadKind::kBssEntry) { + void VisitLoadClass(HLoadClass* load_class) override { + if (load_class->HasPcRelativeLoadKind()) { HX86ComputeBaseMethodAddress* method_address = GetPCRelativeBasePointer(load_class); load_class->AddSpecialInput(method_address); } } - void VisitLoadString(HLoadString* load_string) OVERRIDE { - HLoadString::LoadKind load_kind = load_string->GetLoadKind(); - if (load_kind == HLoadString::LoadKind::kBootImageLinkTimePcRelative || - load_kind == HLoadString::LoadKind::kBootImageInternTable || - load_kind == HLoadString::LoadKind::kBssEntry) { + void VisitLoadString(HLoadString* load_string) override { + if (load_string->HasPcRelativeLoadKind()) { HX86ComputeBaseMethodAddress* method_address = GetPCRelativeBasePointer(load_string); load_string->AddSpecialInput(method_address); } @@ -107,31 +101,31 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { } } - void VisitEqual(HEqual* cond) OVERRIDE { + void VisitEqual(HEqual* cond) override { BinaryFP(cond); } - void VisitNotEqual(HNotEqual* cond) OVERRIDE { + void VisitNotEqual(HNotEqual* cond) override { BinaryFP(cond); } - void VisitLessThan(HLessThan* cond) OVERRIDE { + void VisitLessThan(HLessThan* cond) override { BinaryFP(cond); } - void VisitLessThanOrEqual(HLessThanOrEqual* cond) OVERRIDE { + void VisitLessThanOrEqual(HLessThanOrEqual* cond) override { BinaryFP(cond); } - void VisitGreaterThan(HGreaterThan* cond) OVERRIDE { + void VisitGreaterThan(HGreaterThan* cond) override { BinaryFP(cond); } - void VisitGreaterThanOrEqual(HGreaterThanOrEqual* cond) OVERRIDE { + void VisitGreaterThanOrEqual(HGreaterThanOrEqual* cond) override { BinaryFP(cond); } - void VisitNeg(HNeg* neg) OVERRIDE { + void VisitNeg(HNeg* neg) override { if (DataType::IsFloatingPointType(neg->GetType())) { // We need to replace the HNeg with a HX86FPNeg in order to address the constant area. HX86ComputeBaseMethodAddress* method_address = GetPCRelativeBasePointer(neg); @@ -146,7 +140,7 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { } } - void VisitPackedSwitch(HPackedSwitch* switch_insn) OVERRIDE { + void VisitPackedSwitch(HPackedSwitch* switch_insn) override { if (switch_insn->GetNumEntries() <= InstructionCodeGeneratorX86::kPackedSwitchJumpTableThreshold) { return; @@ -199,18 +193,19 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { } void HandleInvoke(HInvoke* invoke) { - // If this is an invoke-static/-direct with PC-relative dex cache array - // addressing, we need the PC-relative address base. HInvokeStaticOrDirect* invoke_static_or_direct = invoke->AsInvokeStaticOrDirect(); - // We can't add a pointer to the constant area if we already have a current - // method pointer. This may arise when sharpening doesn't remove the current - // method pointer from the invoke. - if (invoke_static_or_direct != nullptr && - invoke_static_or_direct->HasCurrentMethodInput()) { + + // We can't add the method address if we already have a current method pointer. + // This may arise when sharpening doesn't remove the current method pointer from the invoke. + if (invoke_static_or_direct != nullptr && invoke_static_or_direct->HasCurrentMethodInput()) { + // Note: This happens only for recursive calls (including compiling an intrinsic + // by faking a call to itself; we use kRuntimeCall for this case). DCHECK(!invoke_static_or_direct->HasPcRelativeMethodLoadKind()); return; } + // If this is an invoke-static/-direct with PC-relative addressing (within boot image + // or using .bss or .data.bimg.rel.ro), we need the PC-relative address base. bool base_added = false; if (invoke_static_or_direct != nullptr && invoke_static_or_direct->HasPcRelativeMethodLoadKind() && @@ -230,15 +225,16 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { } } - // These intrinsics need the constant area. switch (invoke->GetIntrinsic()) { - case Intrinsics::kMathAbsDouble: - case Intrinsics::kMathAbsFloat: - case Intrinsics::kMathMaxDoubleDouble: - case Intrinsics::kMathMaxFloatFloat: - case Intrinsics::kMathMinDoubleDouble: - case Intrinsics::kMathMinFloatFloat: + case Intrinsics::kIntegerValueOf: + // This intrinsic can be call free if it loads the address of the boot image object. + // If we're compiling PIC, we need the address base for loading from .data.bimg.rel.ro. + if (!codegen_->GetCompilerOptions().GetCompilePic()) { + break; + } + FALLTHROUGH_INTENDED; case Intrinsics::kMathRoundFloat: + // This intrinsic needs the constant area. if (!base_added) { DCHECK(invoke_static_or_direct != nullptr); DCHECK(!invoke_static_or_direct->HasCurrentMethodInput()); @@ -259,10 +255,11 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { HX86ComputeBaseMethodAddress* base_; }; -void PcRelativeFixups::Run() { +bool PcRelativeFixups::Run() { PCRelativeHandlerVisitor visitor(graph_, codegen_); visitor.VisitInsertionOrder(); visitor.MoveBaseIfNeeded(); + return true; } } // namespace x86 diff --git a/compiler/optimizing/pc_relative_fixups_x86.h b/compiler/optimizing/pc_relative_fixups_x86.h index 72fa71ea94..3b470a6502 100644 --- a/compiler/optimizing/pc_relative_fixups_x86.h +++ b/compiler/optimizing/pc_relative_fixups_x86.h @@ -34,7 +34,7 @@ class PcRelativeFixups : public HOptimization { static constexpr const char* kPcRelativeFixupsX86PassName = "pc_relative_fixups_x86"; - void Run() OVERRIDE; + bool Run() override; private: CodeGenerator* codegen_; diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc index f843c008d8..fbdbf9d086 100644 --- a/compiler/optimizing/prepare_for_register_allocation.cc +++ b/compiler/optimizing/prepare_for_register_allocation.cc @@ -17,7 +17,8 @@ #include "prepare_for_register_allocation.h" #include "dex/dex_file_types.h" -#include "jni_internal.h" +#include "driver/compiler_options.h" +#include "jni/jni_internal.h" #include "optimizing_compiler_stats.h" #include "well_known_classes.h" @@ -27,15 +28,42 @@ void PrepareForRegisterAllocation::Run() { // Order does not matter. for (HBasicBlock* block : GetGraph()->GetReversePostOrder()) { // No need to visit the phis. - for (HInstructionIterator inst_it(block->GetInstructions()); !inst_it.Done(); + for (HInstructionIteratorHandleChanges inst_it(block->GetInstructions()); !inst_it.Done(); inst_it.Advance()) { inst_it.Current()->Accept(this); } } } +void PrepareForRegisterAllocation::VisitCheckCast(HCheckCast* check_cast) { + // Record only those bitstring type checks that make it to the codegen stage. + if (check_cast->GetTypeCheckKind() == TypeCheckKind::kBitstringCheck) { + MaybeRecordStat(stats_, MethodCompilationStat::kBitstringTypeCheck); + } +} + +void PrepareForRegisterAllocation::VisitInstanceOf(HInstanceOf* instance_of) { + // Record only those bitstring type checks that make it to the codegen stage. + if (instance_of->GetTypeCheckKind() == TypeCheckKind::kBitstringCheck) { + MaybeRecordStat(stats_, MethodCompilationStat::kBitstringTypeCheck); + } +} + void PrepareForRegisterAllocation::VisitNullCheck(HNullCheck* check) { check->ReplaceWith(check->InputAt(0)); + if (compiler_options_.GetImplicitNullChecks()) { + HInstruction* next = check->GetNext(); + + // The `PrepareForRegisterAllocation` pass removes `HBoundType` from the graph, + // so do it ourselves now to not prevent optimizations. + while (next->IsBoundType()) { + next = next->GetNext(); + VisitBoundType(next->GetPrevious()->AsBoundType()); + } + if (next->CanDoImplicitNullCheckOn(check->InputAt(0))) { + check->MarkEmittedAtUseSite(); + } + } } void PrepareForRegisterAllocation::VisitDivZeroCheck(HDivZeroCheck* check) { @@ -59,9 +87,9 @@ void PrepareForRegisterAllocation::VisitBoundsCheck(HBoundsCheck* check) { if (GetGraph()->GetArtMethod() != char_at_method) { ArenaAllocator* allocator = GetGraph()->GetAllocator(); HEnvironment* environment = new (allocator) HEnvironment(allocator, - /* number_of_vregs */ 0u, + /* number_of_vregs= */ 0u, char_at_method, - /* dex_pc */ dex::kDexNoIndex, + /* dex_pc= */ dex::kDexNoIndex, check); check->InsertRawEnvironment(environment); } @@ -136,7 +164,9 @@ void PrepareForRegisterAllocation::VisitClinitCheck(HClinitCheck* check) { if (can_merge_with_load_class && !load_class->HasUses()) { load_class->GetBlock()->RemoveInstruction(load_class); } - } else if (can_merge_with_load_class && !load_class->NeedsAccessCheck()) { + } else if (can_merge_with_load_class && + load_class->GetLoadKind() != HLoadClass::LoadKind::kRuntimeCall) { + DCHECK(!load_class->NeedsAccessCheck()); // Pass the initialization duty to the `HLoadClass` instruction, // and remove the instruction from the graph. DCHECK(load_class->HasEnvironment()); @@ -274,4 +304,13 @@ bool PrepareForRegisterAllocation::CanMoveClinitCheck(HInstruction* input, return true; } +void PrepareForRegisterAllocation::VisitTypeConversion(HTypeConversion* instruction) { + // For simplicity, our code generators don't handle implicit type conversion, so ensure + // there are none before hitting codegen. + if (instruction->IsImplicitConversion()) { + instruction->ReplaceWith(instruction->GetInput()); + instruction->GetBlock()->RemoveInstruction(instruction); + } +} + } // namespace art diff --git a/compiler/optimizing/prepare_for_register_allocation.h b/compiler/optimizing/prepare_for_register_allocation.h index 2c64f016c1..e0bb76eb22 100644 --- a/compiler/optimizing/prepare_for_register_allocation.h +++ b/compiler/optimizing/prepare_for_register_allocation.h @@ -21,6 +21,7 @@ namespace art { +class CompilerOptions; class OptimizingCompilerStats; /** @@ -30,9 +31,11 @@ class OptimizingCompilerStats; */ class PrepareForRegisterAllocation : public HGraphDelegateVisitor { public: - explicit PrepareForRegisterAllocation(HGraph* graph, - OptimizingCompilerStats* stats = nullptr) - : HGraphDelegateVisitor(graph, stats) {} + PrepareForRegisterAllocation(HGraph* graph, + const CompilerOptions& compiler_options, + OptimizingCompilerStats* stats = nullptr) + : HGraphDelegateVisitor(graph, stats), + compiler_options_(compiler_options) {} void Run(); @@ -40,20 +43,25 @@ class PrepareForRegisterAllocation : public HGraphDelegateVisitor { "prepare_for_register_allocation"; private: - void VisitNullCheck(HNullCheck* check) OVERRIDE; - void VisitDivZeroCheck(HDivZeroCheck* check) OVERRIDE; - void VisitBoundsCheck(HBoundsCheck* check) OVERRIDE; - void VisitBoundType(HBoundType* bound_type) OVERRIDE; - void VisitArraySet(HArraySet* instruction) OVERRIDE; - void VisitClinitCheck(HClinitCheck* check) OVERRIDE; - void VisitCondition(HCondition* condition) OVERRIDE; - void VisitConstructorFence(HConstructorFence* constructor_fence) OVERRIDE; - void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE; - void VisitDeoptimize(HDeoptimize* deoptimize) OVERRIDE; + void VisitCheckCast(HCheckCast* check_cast) override; + void VisitInstanceOf(HInstanceOf* instance_of) override; + void VisitNullCheck(HNullCheck* check) override; + void VisitDivZeroCheck(HDivZeroCheck* check) override; + void VisitBoundsCheck(HBoundsCheck* check) override; + void VisitBoundType(HBoundType* bound_type) override; + void VisitArraySet(HArraySet* instruction) override; + void VisitClinitCheck(HClinitCheck* check) override; + void VisitCondition(HCondition* condition) override; + void VisitConstructorFence(HConstructorFence* constructor_fence) override; + void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) override; + void VisitDeoptimize(HDeoptimize* deoptimize) override; + void VisitTypeConversion(HTypeConversion* instruction) override; bool CanMoveClinitCheck(HInstruction* input, HInstruction* user) const; bool CanEmitConditionAt(HCondition* condition, HInstruction* user) const; + const CompilerOptions& compiler_options_; + DISALLOW_COPY_AND_ASSIGN(PrepareForRegisterAllocation); }; diff --git a/compiler/optimizing/pretty_printer.h b/compiler/optimizing/pretty_printer.h index c6579dc5e0..8ef9ce4e8b 100644 --- a/compiler/optimizing/pretty_printer.h +++ b/compiler/optimizing/pretty_printer.h @@ -33,7 +33,7 @@ class HPrettyPrinter : public HGraphVisitor { PrintString(": "); } - void VisitInstruction(HInstruction* instruction) OVERRIDE { + void VisitInstruction(HInstruction* instruction) override { PrintPreInstruction(instruction); PrintString(instruction->DebugName()); PrintPostInstruction(instruction); @@ -70,7 +70,7 @@ class HPrettyPrinter : public HGraphVisitor { PrintNewLine(); } - void VisitBasicBlock(HBasicBlock* block) OVERRIDE { + void VisitBasicBlock(HBasicBlock* block) override { PrintString("BasicBlock "); PrintInt(block->GetBlockId()); const ArenaVector<HBasicBlock*>& predecessors = block->GetPredecessors(); @@ -108,15 +108,15 @@ class StringPrettyPrinter : public HPrettyPrinter { explicit StringPrettyPrinter(HGraph* graph) : HPrettyPrinter(graph), str_(""), current_block_(nullptr) { } - void PrintInt(int value) OVERRIDE { + void PrintInt(int value) override { str_ += android::base::StringPrintf("%d", value); } - void PrintString(const char* value) OVERRIDE { + void PrintString(const char* value) override { str_ += value; } - void PrintNewLine() OVERRIDE { + void PrintNewLine() override { str_ += '\n'; } @@ -124,12 +124,12 @@ class StringPrettyPrinter : public HPrettyPrinter { std::string str() const { return str_; } - void VisitBasicBlock(HBasicBlock* block) OVERRIDE { + void VisitBasicBlock(HBasicBlock* block) override { current_block_ = block; HPrettyPrinter::VisitBasicBlock(block); } - void VisitGoto(HGoto* gota) OVERRIDE { + void VisitGoto(HGoto* gota) override { PrintString(" "); PrintInt(gota->GetId()); PrintString(": Goto "); diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc index 67a61fc01d..4929e0a3a1 100644 --- a/compiler/optimizing/reference_type_propagation.cc +++ b/compiler/optimizing/reference_type_propagation.cc @@ -22,6 +22,7 @@ #include "base/scoped_arena_containers.h" #include "base/enums.h" #include "class_linker-inl.h" +#include "class_root.h" #include "handle_scope-inl.h" #include "mirror/class-inl.h" #include "mirror/dex_cache.h" @@ -40,31 +41,40 @@ static inline ObjPtr<mirror::DexCache> FindDexCacheWithHint( } static inline ReferenceTypeInfo::TypeHandle GetRootHandle(VariableSizedHandleScope* handles, - ClassLinker::ClassRoot class_root, + ClassRoot class_root, ReferenceTypeInfo::TypeHandle* cache) { if (!ReferenceTypeInfo::IsValidHandle(*cache)) { // Mutator lock is required for NewHandle. - ClassLinker* linker = Runtime::Current()->GetClassLinker(); ScopedObjectAccess soa(Thread::Current()); - *cache = handles->NewHandle(linker->GetClassRoot(class_root)); + *cache = handles->NewHandle(GetClassRoot(class_root)); } return *cache; } ReferenceTypeInfo::TypeHandle ReferenceTypePropagation::HandleCache::GetObjectClassHandle() { - return GetRootHandle(handles_, ClassLinker::kJavaLangObject, &object_class_handle_); + return GetRootHandle(handles_, ClassRoot::kJavaLangObject, &object_class_handle_); } ReferenceTypeInfo::TypeHandle ReferenceTypePropagation::HandleCache::GetClassClassHandle() { - return GetRootHandle(handles_, ClassLinker::kJavaLangClass, &class_class_handle_); + return GetRootHandle(handles_, ClassRoot::kJavaLangClass, &class_class_handle_); +} + +ReferenceTypeInfo::TypeHandle ReferenceTypePropagation::HandleCache::GetMethodHandleClassHandle() { + return GetRootHandle(handles_, + ClassRoot::kJavaLangInvokeMethodHandleImpl, + &method_handle_class_handle_); +} + +ReferenceTypeInfo::TypeHandle ReferenceTypePropagation::HandleCache::GetMethodTypeClassHandle() { + return GetRootHandle(handles_, ClassRoot::kJavaLangInvokeMethodType, &method_type_class_handle_); } ReferenceTypeInfo::TypeHandle ReferenceTypePropagation::HandleCache::GetStringClassHandle() { - return GetRootHandle(handles_, ClassLinker::kJavaLangString, &string_class_handle_); + return GetRootHandle(handles_, ClassRoot::kJavaLangString, &string_class_handle_); } ReferenceTypeInfo::TypeHandle ReferenceTypePropagation::HandleCache::GetThrowableClassHandle() { - return GetRootHandle(handles_, ClassLinker::kJavaLangThrowable, &throwable_class_handle_); + return GetRootHandle(handles_, ClassRoot::kJavaLangThrowable, &throwable_class_handle_); } class ReferenceTypePropagation::RTPVisitor : public HGraphDelegateVisitor { @@ -84,26 +94,29 @@ class ReferenceTypePropagation::RTPVisitor : public HGraphDelegateVisitor { worklist_.reserve(kDefaultWorklistSize); } - void VisitDeoptimize(HDeoptimize* deopt) OVERRIDE; - void VisitNewInstance(HNewInstance* new_instance) OVERRIDE; - void VisitLoadClass(HLoadClass* load_class) OVERRIDE; - void VisitClinitCheck(HClinitCheck* clinit_check) OVERRIDE; - void VisitLoadString(HLoadString* instr) OVERRIDE; - void VisitLoadException(HLoadException* instr) OVERRIDE; - void VisitNewArray(HNewArray* instr) OVERRIDE; - void VisitParameterValue(HParameterValue* instr) OVERRIDE; - void VisitInstanceFieldGet(HInstanceFieldGet* instr) OVERRIDE; - void VisitStaticFieldGet(HStaticFieldGet* instr) OVERRIDE; - void VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet* instr) OVERRIDE; - void VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet* instr) OVERRIDE; - void VisitInvoke(HInvoke* instr) OVERRIDE; - void VisitArrayGet(HArrayGet* instr) OVERRIDE; - void VisitCheckCast(HCheckCast* instr) OVERRIDE; - void VisitBoundType(HBoundType* instr) OVERRIDE; - void VisitNullCheck(HNullCheck* instr) OVERRIDE; - void VisitPhi(HPhi* phi); - - void VisitBasicBlock(HBasicBlock* block); + void VisitDeoptimize(HDeoptimize* deopt) override; + void VisitNewInstance(HNewInstance* new_instance) override; + void VisitLoadClass(HLoadClass* load_class) override; + void VisitInstanceOf(HInstanceOf* load_class) override; + void VisitClinitCheck(HClinitCheck* clinit_check) override; + void VisitLoadMethodHandle(HLoadMethodHandle* instr) override; + void VisitLoadMethodType(HLoadMethodType* instr) override; + void VisitLoadString(HLoadString* instr) override; + void VisitLoadException(HLoadException* instr) override; + void VisitNewArray(HNewArray* instr) override; + void VisitParameterValue(HParameterValue* instr) override; + void VisitInstanceFieldGet(HInstanceFieldGet* instr) override; + void VisitStaticFieldGet(HStaticFieldGet* instr) override; + void VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet* instr) override; + void VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet* instr) override; + void VisitInvoke(HInvoke* instr) override; + void VisitArrayGet(HArrayGet* instr) override; + void VisitCheckCast(HCheckCast* instr) override; + void VisitBoundType(HBoundType* instr) override; + void VisitNullCheck(HNullCheck* instr) override; + void VisitPhi(HPhi* phi) override; + + void VisitBasicBlock(HBasicBlock* block) override; void ProcessWorklist(); private: @@ -171,6 +184,12 @@ void ReferenceTypePropagation::ValidateTypes() { << "NullCheck " << instr->GetReferenceTypeInfo() << "Input(0) " << instr->InputAt(0)->GetReferenceTypeInfo(); } + } else if (instr->IsInstanceOf()) { + HInstanceOf* iof = instr->AsInstanceOf(); + DCHECK(!iof->GetTargetClassRTI().IsValid() || iof->GetTargetClassRTI().IsExact()); + } else if (instr->IsCheckCast()) { + HCheckCast* check = instr->AsCheckCast(); + DCHECK(!check->GetTargetClassRTI().IsValid() || check->GetTargetClassRTI().IsExact()); } } } @@ -259,7 +278,7 @@ static void BoundTypeIn(HInstruction* receiver, if (ShouldCreateBoundType( insert_point, receiver, class_rti, start_instruction, start_block)) { bound_type = new (receiver->GetBlock()->GetGraph()->GetAllocator()) HBoundType(receiver); - bound_type->SetUpperBound(class_rti, /* bound_can_be_null */ false); + bound_type->SetUpperBound(class_rti, /* can_be_null= */ false); start_block->InsertInstructionBefore(bound_type, insert_point); // To comply with the RTP algorithm, don't type the bound type just yet, it will // be handled in RTPVisitor::VisitBoundType. @@ -320,8 +339,7 @@ static void BoundTypeForClassCheck(HInstruction* check) { { ScopedObjectAccess soa(Thread::Current()); - ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); - ArtField* field = class_linker->GetClassRoot(ClassLinker::kJavaLangObject)->GetInstanceField(0); + ArtField* field = GetClassRoot<mirror::Object>()->GetInstanceField(0); DCHECK_EQ(std::string(field->GetName()), "shadow$_klass_"); if (field_get->GetFieldInfo().GetField() != field) { return; @@ -332,7 +350,7 @@ static void BoundTypeForClassCheck(HInstruction* check) { HBasicBlock* trueBlock = compare->IsEqual() ? check->AsIf()->IfTrueSuccessor() : check->AsIf()->IfFalseSuccessor(); - BoundTypeIn(receiver, trueBlock, /* start_instruction */ nullptr, class_rti); + BoundTypeIn(receiver, trueBlock, /* start_instruction= */ nullptr, class_rti); } else { DCHECK(check->IsDeoptimize()); if (compare->IsEqual() && check->AsDeoptimize()->GuardsAnInput()) { @@ -341,7 +359,7 @@ static void BoundTypeForClassCheck(HInstruction* check) { } } -void ReferenceTypePropagation::Run() { +bool ReferenceTypePropagation::Run() { RTPVisitor visitor(graph_, class_loader_, hint_dex_cache_, &handle_cache_, is_first_run_); // To properly propagate type info we need to visit in the dominator-based order. @@ -353,6 +371,7 @@ void ReferenceTypePropagation::Run() { visitor.ProcessWorklist(); ValidateTypes(); + return true; } void ReferenceTypePropagation::RTPVisitor::VisitBasicBlock(HBasicBlock* block) { @@ -408,9 +427,9 @@ void ReferenceTypePropagation::RTPVisitor::BoundTypeForIfNotNull(HBasicBlock* bl : ifInstruction->IfFalseSuccessor(); ReferenceTypeInfo object_rti = ReferenceTypeInfo::Create( - handle_cache_->GetObjectClassHandle(), /* is_exact */ false); + handle_cache_->GetObjectClassHandle(), /* is_exact= */ false); - BoundTypeIn(obj, notNullBlock, /* start_instruction */ nullptr, object_rti); + BoundTypeIn(obj, notNullBlock, /* start_instruction= */ nullptr, object_rti); } // Returns true if one of the patterns below has been recognized. If so, the @@ -499,8 +518,7 @@ void ReferenceTypePropagation::RTPVisitor::BoundTypeForIfInstanceOf(HBasicBlock* return; } - HLoadClass* load_class = instanceOf->InputAt(1)->AsLoadClass(); - ReferenceTypeInfo class_rti = load_class->GetLoadedClassRTI(); + ReferenceTypeInfo class_rti = instanceOf->GetTargetClassRTI(); if (!class_rti.IsValid()) { // He have loaded an unresolved class. Don't bother bounding the type. return; @@ -520,10 +538,10 @@ void ReferenceTypePropagation::RTPVisitor::BoundTypeForIfInstanceOf(HBasicBlock* { ScopedObjectAccess soa(Thread::Current()); if (!class_rti.GetTypeHandle()->CannotBeAssignedFromOtherTypes()) { - class_rti = ReferenceTypeInfo::Create(class_rti.GetTypeHandle(), /* is_exact */ false); + class_rti = ReferenceTypeInfo::Create(class_rti.GetTypeHandle(), /* is_exact= */ false); } } - BoundTypeIn(obj, instanceOfTrueBlock, /* start_instruction */ nullptr, class_rti); + BoundTypeIn(obj, instanceOfTrueBlock, /* start_instruction= */ nullptr, class_rti); } void ReferenceTypePropagation::RTPVisitor::SetClassAsTypeInfo(HInstruction* instr, @@ -543,9 +561,9 @@ void ReferenceTypePropagation::RTPVisitor::SetClassAsTypeInfo(HInstruction* inst // Use a null loader, the target method is in a boot classpath dex file. Handle<mirror::ClassLoader> loader(hs.NewHandle<mirror::ClassLoader>(nullptr)); ArtMethod* method = cl->ResolveMethod<ClassLinker::ResolveMode::kNoChecks>( - dex_method_index, dex_cache, loader, /* referrer */ nullptr, kDirect); + dex_method_index, dex_cache, loader, /* referrer= */ nullptr, kDirect); DCHECK(method != nullptr); - mirror::Class* declaring_class = method->GetDeclaringClass(); + ObjPtr<mirror::Class> declaring_class = method->GetDeclaringClass(); DCHECK(declaring_class != nullptr); DCHECK(declaring_class->IsStringClass()) << "Expected String class: " << declaring_class->PrettyDescriptor(); @@ -553,8 +571,8 @@ void ReferenceTypePropagation::RTPVisitor::SetClassAsTypeInfo(HInstruction* inst << "Expected String.<init>: " << method->PrettyMethod(); } instr->SetReferenceTypeInfo( - ReferenceTypeInfo::Create(handle_cache_->GetStringClassHandle(), /* is_exact */ true)); - } else if (IsAdmissible(klass.Ptr())) { + ReferenceTypeInfo::Create(handle_cache_->GetStringClassHandle(), /* is_exact= */ true)); + } else if (IsAdmissible(klass)) { ReferenceTypeInfo::TypeHandle handle = handle_cache_->NewHandle(klass); is_exact = is_exact || handle->CannotBeAssignedFromOtherTypes(); instr->SetReferenceTypeInfo(ReferenceTypeInfo::Create(handle, is_exact)); @@ -582,12 +600,12 @@ void ReferenceTypePropagation::RTPVisitor::UpdateReferenceTypeInfo(HInstruction* void ReferenceTypePropagation::RTPVisitor::VisitNewInstance(HNewInstance* instr) { ScopedObjectAccess soa(Thread::Current()); - SetClassAsTypeInfo(instr, instr->GetLoadClass()->GetClass().Get(), /* is_exact */ true); + SetClassAsTypeInfo(instr, instr->GetLoadClass()->GetClass().Get(), /* is_exact= */ true); } void ReferenceTypePropagation::RTPVisitor::VisitNewArray(HNewArray* instr) { ScopedObjectAccess soa(Thread::Current()); - SetClassAsTypeInfo(instr, instr->GetLoadClass()->GetClass().Get(), /* is_exact */ true); + SetClassAsTypeInfo(instr, instr->GetLoadClass()->GetClass().Get(), /* is_exact= */ true); } void ReferenceTypePropagation::RTPVisitor::VisitParameterValue(HParameterValue* instr) { @@ -596,7 +614,7 @@ void ReferenceTypePropagation::RTPVisitor::VisitParameterValue(HParameterValue* UpdateReferenceTypeInfo(instr, instr->GetTypeIndex(), instr->GetDexFile(), - /* is_exact */ false); + /* is_exact= */ false); } } @@ -614,7 +632,7 @@ void ReferenceTypePropagation::RTPVisitor::UpdateFieldAccessTypeInfo(HInstructio klass = info.GetField()->LookupResolvedType(); } - SetClassAsTypeInfo(instr, klass, /* is_exact */ false); + SetClassAsTypeInfo(instr, klass, /* is_exact= */ false); } void ReferenceTypePropagation::RTPVisitor::VisitInstanceFieldGet(HInstanceFieldGet* instr) { @@ -643,36 +661,52 @@ void ReferenceTypePropagation::RTPVisitor::VisitUnresolvedStaticFieldGet( void ReferenceTypePropagation::RTPVisitor::VisitLoadClass(HLoadClass* instr) { ScopedObjectAccess soa(Thread::Current()); - Handle<mirror::Class> resolved_class = instr->GetClass(); - if (IsAdmissible(resolved_class.Get())) { - instr->SetLoadedClassRTI(ReferenceTypeInfo::Create( - resolved_class, /* is_exact */ true)); + if (IsAdmissible(instr->GetClass().Get())) { + instr->SetValidLoadedClassRTI(); } instr->SetReferenceTypeInfo( - ReferenceTypeInfo::Create(handle_cache_->GetClassClassHandle(), /* is_exact */ true)); + ReferenceTypeInfo::Create(handle_cache_->GetClassClassHandle(), /* is_exact= */ true)); +} + +void ReferenceTypePropagation::RTPVisitor::VisitInstanceOf(HInstanceOf* instr) { + ScopedObjectAccess soa(Thread::Current()); + if (IsAdmissible(instr->GetClass().Get())) { + instr->SetValidTargetClassRTI(); + } } void ReferenceTypePropagation::RTPVisitor::VisitClinitCheck(HClinitCheck* instr) { instr->SetReferenceTypeInfo(instr->InputAt(0)->GetReferenceTypeInfo()); } +void ReferenceTypePropagation::RTPVisitor::VisitLoadMethodHandle(HLoadMethodHandle* instr) { + instr->SetReferenceTypeInfo(ReferenceTypeInfo::Create( + handle_cache_->GetMethodHandleClassHandle(), + /* is_exact= */ true)); +} + +void ReferenceTypePropagation::RTPVisitor::VisitLoadMethodType(HLoadMethodType* instr) { + instr->SetReferenceTypeInfo( + ReferenceTypeInfo::Create(handle_cache_->GetMethodTypeClassHandle(), /* is_exact= */ true)); +} + void ReferenceTypePropagation::RTPVisitor::VisitLoadString(HLoadString* instr) { instr->SetReferenceTypeInfo( - ReferenceTypeInfo::Create(handle_cache_->GetStringClassHandle(), /* is_exact */ true)); + ReferenceTypeInfo::Create(handle_cache_->GetStringClassHandle(), /* is_exact= */ true)); } void ReferenceTypePropagation::RTPVisitor::VisitLoadException(HLoadException* instr) { DCHECK(instr->GetBlock()->IsCatchBlock()); TryCatchInformation* catch_info = instr->GetBlock()->GetTryCatchInformation(); - if (catch_info->IsCatchAllTypeIndex()) { - instr->SetReferenceTypeInfo( - ReferenceTypeInfo::Create(handle_cache_->GetThrowableClassHandle(), /* is_exact */ false)); - } else { + if (catch_info->IsValidTypeIndex()) { UpdateReferenceTypeInfo(instr, catch_info->GetCatchTypeIndex(), catch_info->GetCatchDexFile(), - /* is_exact */ false); + /* is_exact= */ false); + } else { + instr->SetReferenceTypeInfo( + ReferenceTypeInfo::Create(handle_cache_->GetThrowableClassHandle(), /* is_exact= */ false)); } } @@ -702,7 +736,7 @@ void ReferenceTypePropagation::RTPVisitor::VisitBoundType(HBoundType* instr) { // bound type is dead. To not confuse potential other optimizations, we mark // the bound as non-exact. instr->SetReferenceTypeInfo( - ReferenceTypeInfo::Create(class_rti.GetTypeHandle(), /* is_exact */ false)); + ReferenceTypeInfo::Create(class_rti.GetTypeHandle(), /* is_exact= */ false)); } } else { // Object not typed yet. Leave BoundType untyped for now rather than @@ -719,8 +753,6 @@ void ReferenceTypePropagation::RTPVisitor::VisitBoundType(HBoundType* instr) { } void ReferenceTypePropagation::RTPVisitor::VisitCheckCast(HCheckCast* check_cast) { - HLoadClass* load_class = check_cast->InputAt(1)->AsLoadClass(); - ReferenceTypeInfo class_rti = load_class->GetLoadedClassRTI(); HBoundType* bound_type = check_cast->GetNext()->AsBoundType(); if (bound_type == nullptr || bound_type->GetUpperBound().IsValid()) { // The next instruction is not an uninitialized BoundType. This must be @@ -729,12 +761,14 @@ void ReferenceTypePropagation::RTPVisitor::VisitCheckCast(HCheckCast* check_cast } DCHECK_EQ(bound_type->InputAt(0), check_cast->InputAt(0)); - if (class_rti.IsValid()) { + ScopedObjectAccess soa(Thread::Current()); + Handle<mirror::Class> klass = check_cast->GetClass(); + if (IsAdmissible(klass.Get())) { DCHECK(is_first_run_); - ScopedObjectAccess soa(Thread::Current()); + check_cast->SetValidTargetClassRTI(); // This is the first run of RTP and class is resolved. - bool is_exact = class_rti.GetTypeHandle()->CannotBeAssignedFromOtherTypes(); - bound_type->SetUpperBound(ReferenceTypeInfo::Create(class_rti.GetTypeHandle(), is_exact), + bool is_exact = klass->CannotBeAssignedFromOtherTypes(); + bound_type->SetUpperBound(ReferenceTypeInfo::Create(klass, is_exact), /* CheckCast succeeds for nulls. */ true); } else { // This is the first run of RTP and class is unresolved. Remove the binding. @@ -880,7 +914,7 @@ void ReferenceTypePropagation::RTPVisitor::VisitInvoke(HInvoke* instr) { ScopedObjectAccess soa(Thread::Current()); ArtMethod* method = instr->GetResolvedMethod(); ObjPtr<mirror::Class> klass = (method == nullptr) ? nullptr : method->LookupResolvedReturnType(); - SetClassAsTypeInfo(instr, klass, /* is_exact */ false); + SetClassAsTypeInfo(instr, klass, /* is_exact= */ false); } void ReferenceTypePropagation::RTPVisitor::VisitArrayGet(HArrayGet* instr) { @@ -913,7 +947,7 @@ void ReferenceTypePropagation::RTPVisitor::UpdateBoundType(HBoundType* instr) { // bound type is dead. To not confuse potential other optimizations, we mark // the bound as non-exact. instr->SetReferenceTypeInfo( - ReferenceTypeInfo::Create(upper_bound_rti.GetTypeHandle(), /* is_exact */ false)); + ReferenceTypeInfo::Create(upper_bound_rti.GetTypeHandle(), /* is_exact= */ false)); } } diff --git a/compiler/optimizing/reference_type_propagation.h b/compiler/optimizing/reference_type_propagation.h index fd4dad2b45..7c6a048444 100644 --- a/compiler/optimizing/reference_type_propagation.h +++ b/compiler/optimizing/reference_type_propagation.h @@ -40,7 +40,7 @@ class ReferenceTypePropagation : public HOptimization { // Visit a single instruction. void Visit(HInstruction* instruction); - void Run() OVERRIDE; + bool Run() override; // Returns true if klass is admissible to the propagation: non-null and resolved. // For an array type, we also check if the component type is admissible. @@ -75,6 +75,8 @@ class ReferenceTypePropagation : public HOptimization { ReferenceTypeInfo::TypeHandle GetObjectClassHandle(); ReferenceTypeInfo::TypeHandle GetClassClassHandle(); + ReferenceTypeInfo::TypeHandle GetMethodHandleClassHandle(); + ReferenceTypeInfo::TypeHandle GetMethodTypeClassHandle(); ReferenceTypeInfo::TypeHandle GetStringClassHandle(); ReferenceTypeInfo::TypeHandle GetThrowableClassHandle(); @@ -83,6 +85,8 @@ class ReferenceTypePropagation : public HOptimization { ReferenceTypeInfo::TypeHandle object_class_handle_; ReferenceTypeInfo::TypeHandle class_class_handle_; + ReferenceTypeInfo::TypeHandle method_handle_class_handle_; + ReferenceTypeInfo::TypeHandle method_type_class_handle_; ReferenceTypeInfo::TypeHandle string_class_handle_; ReferenceTypeInfo::TypeHandle throwable_class_handle_; }; diff --git a/compiler/optimizing/register_allocation_resolver.cc b/compiler/optimizing/register_allocation_resolver.cc index 27f9ac3990..b1f0a1add9 100644 --- a/compiler/optimizing/register_allocation_resolver.cc +++ b/compiler/optimizing/register_allocation_resolver.cc @@ -280,16 +280,16 @@ size_t RegisterAllocationResolver::CalculateMaximumSafepointSpillSize( LocationSummary* locations = instruction->GetLocations(); if (locations->OnlyCallsOnSlowPath()) { size_t core_spills = - codegen_->GetNumberOfSlowPathSpills(locations, /* core_registers */ true); + codegen_->GetNumberOfSlowPathSpills(locations, /* core_registers= */ true); size_t fp_spills = - codegen_->GetNumberOfSlowPathSpills(locations, /* core_registers */ false); + codegen_->GetNumberOfSlowPathSpills(locations, /* core_registers= */ false); size_t spill_size = core_register_spill_size * core_spills + fp_register_spill_size * fp_spills; maximum_safepoint_spill_size = std::max(maximum_safepoint_spill_size, spill_size); } else if (locations->CallsOnMainAndSlowPath()) { // Nothing to spill on the slow path if the main path already clobbers caller-saves. - DCHECK_EQ(0u, codegen_->GetNumberOfSlowPathSpills(locations, /* core_registers */ true)); - DCHECK_EQ(0u, codegen_->GetNumberOfSlowPathSpills(locations, /* core_registers */ false)); + DCHECK_EQ(0u, codegen_->GetNumberOfSlowPathSpills(locations, /* core_registers= */ true)); + DCHECK_EQ(0u, codegen_->GetNumberOfSlowPathSpills(locations, /* core_registers= */ false)); } } return maximum_safepoint_spill_size; diff --git a/compiler/optimizing/register_allocator_graph_color.cc b/compiler/optimizing/register_allocator_graph_color.cc index fa7ad82316..42e6498148 100644 --- a/compiler/optimizing/register_allocator_graph_color.cc +++ b/compiler/optimizing/register_allocator_graph_color.cc @@ -1183,7 +1183,7 @@ static bool CheckInputOutputCanOverlap(InterferenceNode* in_node, InterferenceNo void ColoringIteration::BuildInterferenceGraph( const ScopedArenaVector<LiveInterval*>& intervals, const ScopedArenaVector<InterferenceNode*>& physical_nodes) { - DCHECK(interval_node_map_.Empty() && prunable_nodes_.empty()); + DCHECK(interval_node_map_.empty() && prunable_nodes_.empty()); // Build the interference graph efficiently by ordering range endpoints // by position and doing a linear sweep to find interferences. (That is, we // jump from endpoint to endpoint, maintaining a set of intervals live at each @@ -1208,7 +1208,7 @@ void ColoringIteration::BuildInterferenceGraph( if (range != nullptr) { InterferenceNode* node = new (allocator_) InterferenceNode(sibling, register_allocator_->liveness_); - interval_node_map_.Insert(std::make_pair(sibling, node)); + interval_node_map_.insert(std::make_pair(sibling, node)); if (sibling->HasRegister()) { // Fixed nodes should alias the canonical node for the corresponding register. @@ -1303,7 +1303,7 @@ void ColoringIteration::FindCoalesceOpportunities() { // Coalesce siblings. LiveInterval* next_sibling = interval->GetNextSibling(); if (next_sibling != nullptr && interval->GetEnd() == next_sibling->GetStart()) { - auto it = interval_node_map_.Find(next_sibling); + auto it = interval_node_map_.find(next_sibling); if (it != interval_node_map_.end()) { InterferenceNode* sibling_node = it->second; CreateCoalesceOpportunity(node, @@ -1318,7 +1318,7 @@ void ColoringIteration::FindCoalesceOpportunities() { if (parent->HasRegister() && parent->GetNextSibling() == interval && parent->GetEnd() == interval->GetStart()) { - auto it = interval_node_map_.Find(parent); + auto it = interval_node_map_.find(parent); if (it != interval_node_map_.end()) { InterferenceNode* parent_node = it->second; CreateCoalesceOpportunity(node, @@ -1341,7 +1341,7 @@ void ColoringIteration::FindCoalesceOpportunities() { size_t position = predecessor->GetLifetimeEnd() - 1; LiveInterval* existing = interval->GetParent()->GetSiblingAt(position); if (existing != nullptr) { - auto it = interval_node_map_.Find(existing); + auto it = interval_node_map_.find(existing); if (it != interval_node_map_.end()) { InterferenceNode* existing_node = it->second; CreateCoalesceOpportunity(node, @@ -1364,7 +1364,7 @@ void ColoringIteration::FindCoalesceOpportunities() { size_t position = predecessors[i]->GetLifetimeEnd() - 1; LiveInterval* input_interval = inputs[i]->GetLiveInterval()->GetSiblingAt(position); - auto it = interval_node_map_.Find(input_interval); + auto it = interval_node_map_.find(input_interval); if (it != interval_node_map_.end()) { InterferenceNode* input_node = it->second; CreateCoalesceOpportunity(node, input_node, CoalesceKind::kPhi, position); @@ -1380,7 +1380,7 @@ void ColoringIteration::FindCoalesceOpportunities() { = defined_by->InputAt(0)->GetLiveInterval()->GetSiblingAt(interval->GetStart() - 1); // TODO: Could we consider lifetime holes here? if (input_interval->GetEnd() == interval->GetStart()) { - auto it = interval_node_map_.Find(input_interval); + auto it = interval_node_map_.find(input_interval); if (it != interval_node_map_.end()) { InterferenceNode* input_node = it->second; CreateCoalesceOpportunity(node, @@ -1407,7 +1407,7 @@ void ColoringIteration::FindCoalesceOpportunities() { LiveInterval* input_interval = inputs[i]->GetLiveInterval()->GetSiblingAt(def_point); if (input_interval != nullptr && input_interval->HasHighInterval() == interval->HasHighInterval()) { - auto it = interval_node_map_.Find(input_interval); + auto it = interval_node_map_.find(input_interval); if (it != interval_node_map_.end()) { InterferenceNode* input_node = it->second; CreateCoalesceOpportunity(node, diff --git a/compiler/optimizing/register_allocator_graph_color.h b/compiler/optimizing/register_allocator_graph_color.h index 3072c92e0f..f0e7e55863 100644 --- a/compiler/optimizing/register_allocator_graph_color.h +++ b/compiler/optimizing/register_allocator_graph_color.h @@ -90,11 +90,11 @@ class RegisterAllocatorGraphColor : public RegisterAllocator { CodeGenerator* codegen, const SsaLivenessAnalysis& analysis, bool iterative_move_coalescing = true); - ~RegisterAllocatorGraphColor() OVERRIDE; + ~RegisterAllocatorGraphColor() override; - void AllocateRegisters() OVERRIDE; + void AllocateRegisters() override; - bool Validate(bool log_fatal_on_failure); + bool Validate(bool log_fatal_on_failure) override; private: // Collect all intervals and prepare for register allocation. diff --git a/compiler/optimizing/register_allocator_linear_scan.cc b/compiler/optimizing/register_allocator_linear_scan.cc index 216fb57a96..0d6c5a3eff 100644 --- a/compiler/optimizing/register_allocator_linear_scan.cc +++ b/compiler/optimizing/register_allocator_linear_scan.cc @@ -252,7 +252,7 @@ void RegisterAllocatorLinearScan::ProcessInstruction(HInstruction* instruction) temp_intervals_.push_back(interval); interval->AddTempUse(instruction, i); if (codegen_->NeedsTwoRegisters(DataType::Type::kFloat64)) { - interval->AddHighInterval(/* is_temp */ true); + interval->AddHighInterval(/* is_temp= */ true); LiveInterval* high = interval->GetHighInterval(); temp_intervals_.push_back(high); unhandled_fp_intervals_.push_back(high); @@ -284,7 +284,7 @@ void RegisterAllocatorLinearScan::ProcessInstruction(HInstruction* instruction) } if (locations->WillCall()) { - BlockRegisters(position, position + 1, /* caller_save_only */ true); + BlockRegisters(position, position + 1, /* caller_save_only= */ true); } for (size_t i = 0; i < locations->GetInputCount(); ++i) { @@ -312,7 +312,7 @@ void RegisterAllocatorLinearScan::ProcessInstruction(HInstruction* instruction) for (size_t safepoint_index = safepoints_.size(); safepoint_index > 0; --safepoint_index) { HInstruction* safepoint = safepoints_[safepoint_index - 1u]; - size_t safepoint_position = safepoint->GetLifetimePosition(); + size_t safepoint_position = SafepointPosition::ComputePosition(safepoint); // Test that safepoints are ordered in the optimal way. DCHECK(safepoint_index == safepoints_.size() || diff --git a/compiler/optimizing/register_allocator_linear_scan.h b/compiler/optimizing/register_allocator_linear_scan.h index 36788b7c3c..4d445c7ff7 100644 --- a/compiler/optimizing/register_allocator_linear_scan.h +++ b/compiler/optimizing/register_allocator_linear_scan.h @@ -42,11 +42,11 @@ class RegisterAllocatorLinearScan : public RegisterAllocator { RegisterAllocatorLinearScan(ScopedArenaAllocator* allocator, CodeGenerator* codegen, const SsaLivenessAnalysis& analysis); - ~RegisterAllocatorLinearScan() OVERRIDE; + ~RegisterAllocatorLinearScan() override; - void AllocateRegisters() OVERRIDE; + void AllocateRegisters() override; - bool Validate(bool log_fatal_on_failure) OVERRIDE { + bool Validate(bool log_fatal_on_failure) override { processing_core_registers_ = true; if (!ValidateInternal(log_fatal_on_failure)) { return false; diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc index a70b0664dc..79eb082cd7 100644 --- a/compiler/optimizing/register_allocator_test.cc +++ b/compiler/optimizing/register_allocator_test.cc @@ -40,6 +40,12 @@ using Strategy = RegisterAllocator::Strategy; class RegisterAllocatorTest : public OptimizingUnitTest { protected: + void SetUp() override { + // This test is using the x86 ISA. + OverrideInstructionSetFeatures(InstructionSet::kX86, "default"); + OptimizingUnitTest::SetUp(); + } + // These functions need to access private variables of LocationSummary, so we declare it // as a member of RegisterAllocatorTest, which we make a friend class. void SameAsFirstInputHint(Strategy strategy); @@ -62,11 +68,11 @@ class RegisterAllocatorTest : public OptimizingUnitTest { bool ValidateIntervals(const ScopedArenaVector<LiveInterval*>& intervals, const CodeGenerator& codegen) { return RegisterAllocator::ValidateIntervals(ArrayRef<LiveInterval* const>(intervals), - /* number_of_spill_slots */ 0u, - /* number_of_out_slots */ 0u, + /* number_of_spill_slots= */ 0u, + /* number_of_out_slots= */ 0u, codegen, - /* processing_core_registers */ true, - /* log_fatal_on_failure */ false); + /* processing_core_registers= */ true, + /* log_fatal_on_failure= */ false); } }; @@ -81,9 +87,7 @@ TEST_F(RegisterAllocatorTest, test_name##_GraphColor) {\ bool RegisterAllocatorTest::Check(const std::vector<uint16_t>& data, Strategy strategy) { HGraph* graph = CreateCFG(data); - std::unique_ptr<const X86InstructionSetFeatures> features_x86( - X86InstructionSetFeatures::FromCppDefines()); - x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); + x86::CodeGeneratorX86 codegen(graph, *compiler_options_); SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator()); liveness.Analyze(); std::unique_ptr<RegisterAllocator> register_allocator = @@ -98,9 +102,7 @@ bool RegisterAllocatorTest::Check(const std::vector<uint16_t>& data, Strategy st */ TEST_F(RegisterAllocatorTest, ValidateIntervals) { HGraph* graph = CreateGraph(); - std::unique_ptr<const X86InstructionSetFeatures> features_x86( - X86InstructionSetFeatures::FromCppDefines()); - x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); + x86::CodeGeneratorX86 codegen(graph, *compiler_options_); ScopedArenaVector<LiveInterval*> intervals(GetScopedAllocator()->Adapter()); // Test with two intervals of the same range. @@ -324,9 +326,7 @@ void RegisterAllocatorTest::Loop3(Strategy strategy) { Instruction::GOTO | 0xF900); HGraph* graph = CreateCFG(data); - std::unique_ptr<const X86InstructionSetFeatures> features_x86( - X86InstructionSetFeatures::FromCppDefines()); - x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); + x86::CodeGeneratorX86 codegen(graph, *compiler_options_); SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator()); liveness.Analyze(); std::unique_ptr<RegisterAllocator> register_allocator = @@ -359,9 +359,7 @@ TEST_F(RegisterAllocatorTest, FirstRegisterUse) { Instruction::RETURN_VOID); HGraph* graph = CreateCFG(data); - std::unique_ptr<const X86InstructionSetFeatures> features_x86( - X86InstructionSetFeatures::FromCppDefines()); - x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); + x86::CodeGeneratorX86 codegen(graph, *compiler_options_); SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator()); liveness.Analyze(); @@ -412,9 +410,7 @@ void RegisterAllocatorTest::DeadPhi(Strategy strategy) { HGraph* graph = CreateCFG(data); SsaDeadPhiElimination(graph).Run(); - std::unique_ptr<const X86InstructionSetFeatures> features_x86( - X86InstructionSetFeatures::FromCppDefines()); - x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); + x86::CodeGeneratorX86 codegen(graph, *compiler_options_); SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator()); liveness.Analyze(); std::unique_ptr<RegisterAllocator> register_allocator = @@ -438,9 +434,7 @@ TEST_F(RegisterAllocatorTest, FreeUntil) { HGraph* graph = CreateCFG(data); SsaDeadPhiElimination(graph).Run(); - std::unique_ptr<const X86InstructionSetFeatures> features_x86( - X86InstructionSetFeatures::FromCppDefines()); - x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); + x86::CodeGeneratorX86 codegen(graph, *compiler_options_); SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator()); liveness.Analyze(); RegisterAllocatorLinearScan register_allocator(GetScopedAllocator(), &codegen, liveness); @@ -566,9 +560,7 @@ void RegisterAllocatorTest::PhiHint(Strategy strategy) { { HGraph* graph = BuildIfElseWithPhi(&phi, &input1, &input2); - std::unique_ptr<const X86InstructionSetFeatures> features_x86( - X86InstructionSetFeatures::FromCppDefines()); - x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); + x86::CodeGeneratorX86 codegen(graph, *compiler_options_); SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator()); liveness.Analyze(); @@ -584,9 +576,7 @@ void RegisterAllocatorTest::PhiHint(Strategy strategy) { { HGraph* graph = BuildIfElseWithPhi(&phi, &input1, &input2); - std::unique_ptr<const X86InstructionSetFeatures> features_x86( - X86InstructionSetFeatures::FromCppDefines()); - x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); + x86::CodeGeneratorX86 codegen(graph, *compiler_options_); SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator()); liveness.Analyze(); @@ -604,9 +594,7 @@ void RegisterAllocatorTest::PhiHint(Strategy strategy) { { HGraph* graph = BuildIfElseWithPhi(&phi, &input1, &input2); - std::unique_ptr<const X86InstructionSetFeatures> features_x86( - X86InstructionSetFeatures::FromCppDefines()); - x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); + x86::CodeGeneratorX86 codegen(graph, *compiler_options_); SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator()); liveness.Analyze(); @@ -624,9 +612,7 @@ void RegisterAllocatorTest::PhiHint(Strategy strategy) { { HGraph* graph = BuildIfElseWithPhi(&phi, &input1, &input2); - std::unique_ptr<const X86InstructionSetFeatures> features_x86( - X86InstructionSetFeatures::FromCppDefines()); - x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); + x86::CodeGeneratorX86 codegen(graph, *compiler_options_); SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator()); liveness.Analyze(); @@ -689,9 +675,7 @@ void RegisterAllocatorTest::ExpectedInRegisterHint(Strategy strategy) { { HGraph* graph = BuildFieldReturn(&field, &ret); - std::unique_ptr<const X86InstructionSetFeatures> features_x86( - X86InstructionSetFeatures::FromCppDefines()); - x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); + x86::CodeGeneratorX86 codegen(graph, *compiler_options_); SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator()); liveness.Analyze(); @@ -705,9 +689,7 @@ void RegisterAllocatorTest::ExpectedInRegisterHint(Strategy strategy) { { HGraph* graph = BuildFieldReturn(&field, &ret); - std::unique_ptr<const X86InstructionSetFeatures> features_x86( - X86InstructionSetFeatures::FromCppDefines()); - x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); + x86::CodeGeneratorX86 codegen(graph, *compiler_options_); SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator()); liveness.Analyze(); @@ -761,9 +743,7 @@ void RegisterAllocatorTest::SameAsFirstInputHint(Strategy strategy) { { HGraph* graph = BuildTwoSubs(&first_sub, &second_sub); - std::unique_ptr<const X86InstructionSetFeatures> features_x86( - X86InstructionSetFeatures::FromCppDefines()); - x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); + x86::CodeGeneratorX86 codegen(graph, *compiler_options_); SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator()); liveness.Analyze(); @@ -778,9 +758,7 @@ void RegisterAllocatorTest::SameAsFirstInputHint(Strategy strategy) { { HGraph* graph = BuildTwoSubs(&first_sub, &second_sub); - std::unique_ptr<const X86InstructionSetFeatures> features_x86( - X86InstructionSetFeatures::FromCppDefines()); - x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); + x86::CodeGeneratorX86 codegen(graph, *compiler_options_); SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator()); liveness.Analyze(); @@ -834,9 +812,7 @@ HGraph* RegisterAllocatorTest::BuildDiv(HInstruction** div) { void RegisterAllocatorTest::ExpectedExactInRegisterAndSameOutputHint(Strategy strategy) { HInstruction *div; HGraph* graph = BuildDiv(&div); - std::unique_ptr<const X86InstructionSetFeatures> features_x86( - X86InstructionSetFeatures::FromCppDefines()); - x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); + x86::CodeGeneratorX86 codegen(graph, *compiler_options_); SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator()); liveness.Analyze(); @@ -896,9 +872,9 @@ TEST_F(RegisterAllocatorTest, SpillInactive) { // Create an interval with lifetime holes. static constexpr size_t ranges1[][2] = {{0, 2}, {4, 6}, {8, 10}}; LiveInterval* first = BuildInterval(ranges1, arraysize(ranges1), GetScopedAllocator(), -1, one); - first->uses_.push_front(*new (GetScopedAllocator()) UsePosition(user, false, 8)); - first->uses_.push_front(*new (GetScopedAllocator()) UsePosition(user, false, 7)); - first->uses_.push_front(*new (GetScopedAllocator()) UsePosition(user, false, 6)); + first->uses_.push_front(*new (GetScopedAllocator()) UsePosition(user, 0u, 8)); + first->uses_.push_front(*new (GetScopedAllocator()) UsePosition(user, 0u, 7)); + first->uses_.push_front(*new (GetScopedAllocator()) UsePosition(user, 0u, 6)); locations = new (GetAllocator()) LocationSummary(first->GetDefinedBy(), LocationSummary::kNoCall); locations->SetOut(Location::RequiresRegister()); @@ -919,9 +895,9 @@ TEST_F(RegisterAllocatorTest, SpillInactive) { // before lifetime position 6 yet. static constexpr size_t ranges3[][2] = {{2, 4}, {8, 10}}; LiveInterval* third = BuildInterval(ranges3, arraysize(ranges3), GetScopedAllocator(), -1, three); - third->uses_.push_front(*new (GetScopedAllocator()) UsePosition(user, false, 8)); - third->uses_.push_front(*new (GetScopedAllocator()) UsePosition(user, false, 4)); - third->uses_.push_front(*new (GetScopedAllocator()) UsePosition(user, false, 3)); + third->uses_.push_front(*new (GetScopedAllocator()) UsePosition(user, 0u, 8)); + third->uses_.push_front(*new (GetScopedAllocator()) UsePosition(user, 0u, 4)); + third->uses_.push_front(*new (GetScopedAllocator()) UsePosition(user, 0u, 3)); locations = new (GetAllocator()) LocationSummary(third->GetDefinedBy(), LocationSummary::kNoCall); locations->SetOut(Location::RequiresRegister()); third = third->SplitAt(3); @@ -934,9 +910,7 @@ TEST_F(RegisterAllocatorTest, SpillInactive) { new (GetAllocator()) LocationSummary(fourth->GetDefinedBy(), LocationSummary::kNoCall); locations->SetOut(Location::RequiresRegister()); - std::unique_ptr<const X86InstructionSetFeatures> features_x86( - X86InstructionSetFeatures::FromCppDefines()); - x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); + x86::CodeGeneratorX86 codegen(graph, *compiler_options_); SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator()); // Populate the instructions in the liveness object, to please the register allocator. for (size_t i = 0; i < 32; ++i) { diff --git a/compiler/optimizing/scheduler.cc b/compiler/optimizing/scheduler.cc index bb28d50b56..fdef45ec8b 100644 --- a/compiler/optimizing/scheduler.cc +++ b/compiler/optimizing/scheduler.cc @@ -70,19 +70,19 @@ static bool MayHaveReorderingDependency(SideEffects node, SideEffects other) { return false; } -size_t SchedulingGraph::ArrayAccessHeapLocation(HInstruction* array, HInstruction* index) const { +size_t SchedulingGraph::ArrayAccessHeapLocation(HInstruction* instruction) const { DCHECK(heap_location_collector_ != nullptr); - size_t heap_loc = heap_location_collector_->GetArrayHeapLocation(array, index); + size_t heap_loc = heap_location_collector_->GetArrayHeapLocation(instruction); // This array access should be analyzed and added to HeapLocationCollector before. DCHECK(heap_loc != HeapLocationCollector::kHeapLocationNotFound); return heap_loc; } -bool SchedulingGraph::ArrayAccessMayAlias(const HInstruction* node, - const HInstruction* other) const { +bool SchedulingGraph::ArrayAccessMayAlias(HInstruction* node, + HInstruction* other) const { DCHECK(heap_location_collector_ != nullptr); - size_t node_heap_loc = ArrayAccessHeapLocation(node->InputAt(0), node->InputAt(1)); - size_t other_heap_loc = ArrayAccessHeapLocation(other->InputAt(0), other->InputAt(1)); + size_t node_heap_loc = ArrayAccessHeapLocation(node); + size_t other_heap_loc = ArrayAccessHeapLocation(other); // For example: arr[0] and arr[0] if (node_heap_loc == other_heap_loc) { @@ -194,8 +194,8 @@ bool SchedulingGraph::FieldAccessMayAlias(const HInstruction* node, return true; } -bool SchedulingGraph::HasMemoryDependency(const HInstruction* node, - const HInstruction* other) const { +bool SchedulingGraph::HasMemoryDependency(HInstruction* node, + HInstruction* other) const { if (!MayHaveReorderingDependency(node->GetSideEffects(), other->GetSideEffects())) { return false; } @@ -264,8 +264,8 @@ bool SchedulingGraph::HasExceptionDependency(const HInstruction* node, // Check whether `node` depends on `other`, taking into account `SideEffect` // information and `CanThrow` information. -bool SchedulingGraph::HasSideEffectDependency(const HInstruction* node, - const HInstruction* other) const { +bool SchedulingGraph::HasSideEffectDependency(HInstruction* node, + HInstruction* other) const { if (HasMemoryDependency(node, other)) { return true; } @@ -280,6 +280,23 @@ bool SchedulingGraph::HasSideEffectDependency(const HInstruction* node, return false; } +// Check if the specified instruction is a better candidate which more likely will +// have other instructions depending on it. +static bool IsBetterCandidateWithMoreLikelyDependencies(HInstruction* new_candidate, + HInstruction* old_candidate) { + if (!new_candidate->GetSideEffects().Includes(old_candidate->GetSideEffects())) { + // Weaker side effects. + return false; + } + if (old_candidate->GetSideEffects().Includes(new_candidate->GetSideEffects())) { + // Same side effects, check if `new_candidate` has stronger `CanThrow()`. + return new_candidate->CanThrow() && !old_candidate->CanThrow(); + } else { + // Stronger side effects, check if `new_candidate` has at least as strong `CanThrow()`. + return new_candidate->CanThrow() || !old_candidate->CanThrow(); + } +} + void SchedulingGraph::AddDependencies(HInstruction* instruction, bool is_scheduling_barrier) { SchedulingNode* instruction_node = GetNode(instruction); @@ -331,6 +348,7 @@ void SchedulingGraph::AddDependencies(HInstruction* instruction, bool is_schedul // Side effect dependencies. if (!instruction->GetSideEffects().DoesNothing() || instruction->CanThrow()) { + HInstruction* dep_chain_candidate = nullptr; for (HInstruction* other = instruction->GetNext(); other != nullptr; other = other->GetNext()) { SchedulingNode* other_node = GetNode(other); if (other_node->IsSchedulingBarrier()) { @@ -340,7 +358,18 @@ void SchedulingGraph::AddDependencies(HInstruction* instruction, bool is_schedul break; } if (HasSideEffectDependency(other, instruction)) { - AddOtherDependency(other_node, instruction_node); + if (dep_chain_candidate != nullptr && + HasSideEffectDependency(other, dep_chain_candidate)) { + // Skip an explicit dependency to reduce memory usage, rely on the transitive dependency. + } else { + AddOtherDependency(other_node, instruction_node); + } + // Check if `other` is a better candidate which more likely will have other instructions + // depending on it. + if (dep_chain_candidate == nullptr || + IsBetterCandidateWithMoreLikelyDependencies(other, dep_chain_candidate)) { + dep_chain_candidate = other; + } } } } @@ -545,60 +574,67 @@ SchedulingNode* CriticalPathSchedulingNodeSelector::GetHigherPrioritySchedulingN void HScheduler::Schedule(HGraph* graph) { // We run lsa here instead of in a separate pass to better control whether we // should run the analysis or not. + const HeapLocationCollector* heap_location_collector = nullptr; LoadStoreAnalysis lsa(graph); if (!only_optimize_loop_blocks_ || graph->HasLoops()) { lsa.Run(); - scheduling_graph_.SetHeapLocationCollector(lsa.GetHeapLocationCollector()); + heap_location_collector = &lsa.GetHeapLocationCollector(); } for (HBasicBlock* block : graph->GetReversePostOrder()) { if (IsSchedulable(block)) { - Schedule(block); + Schedule(block, heap_location_collector); } } } -void HScheduler::Schedule(HBasicBlock* block) { - ScopedArenaVector<SchedulingNode*> scheduling_nodes(allocator_->Adapter(kArenaAllocScheduler)); +void HScheduler::Schedule(HBasicBlock* block, + const HeapLocationCollector* heap_location_collector) { + ScopedArenaAllocator allocator(block->GetGraph()->GetArenaStack()); + ScopedArenaVector<SchedulingNode*> scheduling_nodes(allocator.Adapter(kArenaAllocScheduler)); // Build the scheduling graph. - scheduling_graph_.Clear(); + SchedulingGraph scheduling_graph(this, &allocator, heap_location_collector); for (HBackwardInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { HInstruction* instruction = it.Current(); CHECK_EQ(instruction->GetBlock(), block) << instruction->DebugName() << " is in block " << instruction->GetBlock()->GetBlockId() << ", and expected in block " << block->GetBlockId(); - SchedulingNode* node = scheduling_graph_.AddNode(instruction, IsSchedulingBarrier(instruction)); + SchedulingNode* node = scheduling_graph.AddNode(instruction, IsSchedulingBarrier(instruction)); CalculateLatency(node); scheduling_nodes.push_back(node); } - if (scheduling_graph_.Size() <= 1) { - scheduling_graph_.Clear(); + if (scheduling_graph.Size() <= 1) { return; } cursor_ = block->GetLastInstruction(); + // The list of candidates for scheduling. A node becomes a candidate when all + // its predecessors have been scheduled. + ScopedArenaVector<SchedulingNode*> candidates(allocator.Adapter(kArenaAllocScheduler)); + // Find the initial candidates for scheduling. - candidates_.clear(); for (SchedulingNode* node : scheduling_nodes) { if (!node->HasUnscheduledSuccessors()) { node->MaybeUpdateCriticalPath(node->GetLatency()); - candidates_.push_back(node); + candidates.push_back(node); } } - ScopedArenaVector<SchedulingNode*> initial_candidates(allocator_->Adapter(kArenaAllocScheduler)); + ScopedArenaVector<SchedulingNode*> initial_candidates(allocator.Adapter(kArenaAllocScheduler)); if (kDumpDotSchedulingGraphs) { // Remember the list of initial candidates for debug output purposes. - initial_candidates.assign(candidates_.begin(), candidates_.end()); + initial_candidates.assign(candidates.begin(), candidates.end()); } // Schedule all nodes. - while (!candidates_.empty()) { - Schedule(selector_->PopHighestPriorityNode(&candidates_, scheduling_graph_)); + selector_->Reset(); + while (!candidates.empty()) { + SchedulingNode* node = selector_->PopHighestPriorityNode(&candidates, scheduling_graph); + Schedule(node, &candidates); } if (kDumpDotSchedulingGraphs) { @@ -607,11 +643,12 @@ void HScheduler::Schedule(HBasicBlock* block) { std::stringstream description; description << graph->GetDexFile().PrettyMethod(graph->GetMethodIdx()) << " B" << block->GetBlockId(); - scheduling_graph_.DumpAsDotGraph(description.str(), initial_candidates); + scheduling_graph.DumpAsDotGraph(description.str(), initial_candidates); } } -void HScheduler::Schedule(SchedulingNode* scheduling_node) { +void HScheduler::Schedule(SchedulingNode* scheduling_node, + /*inout*/ ScopedArenaVector<SchedulingNode*>* candidates) { // Check whether any of the node's predecessors will be valid candidates after // this node is scheduled. uint32_t path_to_node = scheduling_node->GetCriticalPath(); @@ -620,7 +657,7 @@ void HScheduler::Schedule(SchedulingNode* scheduling_node) { path_to_node + predecessor->GetInternalLatency() + predecessor->GetLatency()); predecessor->DecrementNumberOfUnscheduledSuccessors(); if (!predecessor->HasUnscheduledSuccessors()) { - candidates_.push_back(predecessor); + candidates->push_back(predecessor); } } for (SchedulingNode* predecessor : scheduling_node->GetOtherPredecessors()) { @@ -630,7 +667,7 @@ void HScheduler::Schedule(SchedulingNode* scheduling_node) { // correctness. So we do not use them to compute the critical path. predecessor->DecrementNumberOfUnscheduledSuccessors(); if (!predecessor->HasUnscheduledSuccessors()) { - candidates_.push_back(predecessor); + candidates->push_back(predecessor); } } @@ -643,7 +680,7 @@ static void MoveAfterInBlock(HInstruction* instruction, HInstruction* cursor) { DCHECK_NE(cursor, cursor->GetBlock()->GetLastInstruction()); DCHECK(!instruction->IsControlFlow()); DCHECK(!cursor->IsControlFlow()); - instruction->MoveBefore(cursor->GetNext(), /* do_checks */ false); + instruction->MoveBefore(cursor->GetNext(), /* do_checks= */ false); } void HScheduler::Schedule(HInstruction* instruction) { @@ -667,7 +704,8 @@ bool HScheduler::IsSchedulable(const HInstruction* instruction) const { // HUnaryOperation (or HBinaryOperation), check in debug mode that we have // the exhaustive lists here. if (instruction->IsUnaryOperation()) { - DCHECK(instruction->IsBooleanNot() || + DCHECK(instruction->IsAbs() || + instruction->IsBooleanNot() || instruction->IsNot() || instruction->IsNeg()) << "unexpected instruction " << instruction->DebugName(); return true; @@ -678,6 +716,8 @@ bool HScheduler::IsSchedulable(const HInstruction* instruction) const { instruction->IsCompare() || instruction->IsCondition() || instruction->IsDiv() || + instruction->IsMin() || + instruction->IsMax() || instruction->IsMul() || instruction->IsOr() || instruction->IsRem() || @@ -771,12 +811,11 @@ bool HScheduler::IsSchedulingBarrier(const HInstruction* instr) const { instr->IsSuspendCheck(); } -void HInstructionScheduling::Run(bool only_optimize_loop_blocks, +bool HInstructionScheduling::Run(bool only_optimize_loop_blocks, bool schedule_randomly) { #if defined(ART_ENABLE_CODEGEN_arm64) || defined(ART_ENABLE_CODEGEN_arm) // Phase-local allocator that allocates scheduler internal data structures like // scheduling nodes, internel nodes map, dependencies, etc. - ScopedArenaAllocator allocator(graph_->GetArenaStack()); CriticalPathSchedulingNodeSelector critical_path_selector; RandomSchedulingNodeSelector random_selector; SchedulingNodeSelector* selector = schedule_randomly @@ -792,7 +831,7 @@ void HInstructionScheduling::Run(bool only_optimize_loop_blocks, switch (instruction_set_) { #ifdef ART_ENABLE_CODEGEN_arm64 case InstructionSet::kArm64: { - arm64::HSchedulerARM64 scheduler(&allocator, selector); + arm64::HSchedulerARM64 scheduler(selector); scheduler.SetOnlyOptimizeLoopBlocks(only_optimize_loop_blocks); scheduler.Schedule(graph_); break; @@ -802,7 +841,7 @@ void HInstructionScheduling::Run(bool only_optimize_loop_blocks, case InstructionSet::kThumb2: case InstructionSet::kArm: { arm::SchedulingLatencyVisitorARM arm_latency_visitor(codegen_); - arm::HSchedulerARM scheduler(&allocator, selector, &arm_latency_visitor); + arm::HSchedulerARM scheduler(selector, &arm_latency_visitor); scheduler.SetOnlyOptimizeLoopBlocks(only_optimize_loop_blocks); scheduler.Schedule(graph_); break; @@ -811,6 +850,7 @@ void HInstructionScheduling::Run(bool only_optimize_loop_blocks, default: break; } + return true; } } // namespace art diff --git a/compiler/optimizing/scheduler.h b/compiler/optimizing/scheduler.h index dfa077f7de..d2dbeca924 100644 --- a/compiler/optimizing/scheduler.h +++ b/compiler/optimizing/scheduler.h @@ -23,7 +23,6 @@ #include "base/scoped_arena_containers.h" #include "base/time_utils.h" #include "code_generator.h" -#include "driver/compiler_driver.h" #include "load_store_analysis.h" #include "nodes.h" #include "optimization.h" @@ -251,34 +250,27 @@ class SchedulingNode : public DeletableArenaObject<kArenaAllocScheduler> { */ class SchedulingGraph : public ValueObject { public: - SchedulingGraph(const HScheduler* scheduler, ScopedArenaAllocator* allocator) + SchedulingGraph(const HScheduler* scheduler, + ScopedArenaAllocator* allocator, + const HeapLocationCollector* heap_location_collector) : scheduler_(scheduler), allocator_(allocator), contains_scheduling_barrier_(false), nodes_map_(allocator_->Adapter(kArenaAllocScheduler)), - heap_location_collector_(nullptr) {} + heap_location_collector_(heap_location_collector) {} SchedulingNode* AddNode(HInstruction* instr, bool is_scheduling_barrier = false) { std::unique_ptr<SchedulingNode> node( new (allocator_) SchedulingNode(instr, allocator_, is_scheduling_barrier)); SchedulingNode* result = node.get(); - nodes_map_.Insert(std::make_pair(instr, std::move(node))); + nodes_map_.insert(std::make_pair(instr, std::move(node))); contains_scheduling_barrier_ |= is_scheduling_barrier; AddDependencies(instr, is_scheduling_barrier); return result; } - void Clear() { - nodes_map_.Clear(); - contains_scheduling_barrier_ = false; - } - - void SetHeapLocationCollector(const HeapLocationCollector& heap_location_collector) { - heap_location_collector_ = &heap_location_collector; - } - SchedulingNode* GetNode(const HInstruction* instr) const { - auto it = nodes_map_.Find(instr); + auto it = nodes_map_.find(instr); if (it == nodes_map_.end()) { return nullptr; } else { @@ -294,7 +286,7 @@ class SchedulingGraph : public ValueObject { bool HasImmediateOtherDependency(const HInstruction* node, const HInstruction* other) const; size_t Size() const { - return nodes_map_.Size(); + return nodes_map_.size(); } // Dump the scheduling graph, in dot file format, appending it to the file @@ -310,12 +302,12 @@ class SchedulingGraph : public ValueObject { void AddOtherDependency(SchedulingNode* node, SchedulingNode* dependency) { AddDependency(node, dependency, /*is_data_dependency*/false); } - bool HasMemoryDependency(const HInstruction* node, const HInstruction* other) const; + bool HasMemoryDependency(HInstruction* node, HInstruction* other) const; bool HasExceptionDependency(const HInstruction* node, const HInstruction* other) const; - bool HasSideEffectDependency(const HInstruction* node, const HInstruction* other) const; - bool ArrayAccessMayAlias(const HInstruction* node, const HInstruction* other) const; + bool HasSideEffectDependency(HInstruction* node, HInstruction* other) const; + bool ArrayAccessMayAlias(HInstruction* node, HInstruction* other) const; bool FieldAccessMayAlias(const HInstruction* node, const HInstruction* other) const; - size_t ArrayAccessHeapLocation(HInstruction* array, HInstruction* index) const; + size_t ArrayAccessHeapLocation(HInstruction* instruction) const; size_t FieldAccessHeapLocation(HInstruction* obj, const FieldInfo* field) const; // Add dependencies nodes for the given `HInstruction`: inputs, environments, and side-effects. @@ -329,7 +321,7 @@ class SchedulingGraph : public ValueObject { ScopedArenaHashMap<const HInstruction*, std::unique_ptr<SchedulingNode>> nodes_map_; - const HeapLocationCollector* heap_location_collector_; + const HeapLocationCollector* const heap_location_collector_; }; /* @@ -346,7 +338,7 @@ class SchedulingLatencyVisitor : public HGraphDelegateVisitor { last_visited_latency_(0), last_visited_internal_latency_(0) {} - void VisitInstruction(HInstruction* instruction) OVERRIDE { + void VisitInstruction(HInstruction* instruction) override { LOG(FATAL) << "Error visiting " << instruction->DebugName() << ". " "Architecture-specific scheduling latency visitors must handle all instructions" " (potentially by overriding the generic `VisitInstruction()`."; @@ -377,6 +369,7 @@ class SchedulingLatencyVisitor : public HGraphDelegateVisitor { class SchedulingNodeSelector : public ArenaObject<kArenaAllocScheduler> { public: + virtual void Reset() {} virtual SchedulingNode* PopHighestPriorityNode(ScopedArenaVector<SchedulingNode*>* nodes, const SchedulingGraph& graph) = 0; virtual ~SchedulingNodeSelector() {} @@ -398,7 +391,7 @@ class RandomSchedulingNodeSelector : public SchedulingNodeSelector { } SchedulingNode* PopHighestPriorityNode(ScopedArenaVector<SchedulingNode*>* nodes, - const SchedulingGraph& graph) OVERRIDE { + const SchedulingGraph& graph) override { UNUSED(graph); DCHECK(!nodes->empty()); size_t select = rand_r(&seed_) % nodes->size(); @@ -418,8 +411,9 @@ class CriticalPathSchedulingNodeSelector : public SchedulingNodeSelector { public: CriticalPathSchedulingNodeSelector() : prev_select_(nullptr) {} + void Reset() override { prev_select_ = nullptr; } SchedulingNode* PopHighestPriorityNode(ScopedArenaVector<SchedulingNode*>* nodes, - const SchedulingGraph& graph) OVERRIDE; + const SchedulingGraph& graph) override; protected: SchedulingNode* GetHigherPrioritySchedulingNode(SchedulingNode* candidate, @@ -434,16 +428,11 @@ class CriticalPathSchedulingNodeSelector : public SchedulingNodeSelector { class HScheduler { public: - HScheduler(ScopedArenaAllocator* allocator, - SchedulingLatencyVisitor* latency_visitor, - SchedulingNodeSelector* selector) - : allocator_(allocator), - latency_visitor_(latency_visitor), + HScheduler(SchedulingLatencyVisitor* latency_visitor, SchedulingNodeSelector* selector) + : latency_visitor_(latency_visitor), selector_(selector), only_optimize_loop_blocks_(true), - scheduling_graph_(this, allocator), - cursor_(nullptr), - candidates_(allocator_->Adapter(kArenaAllocScheduler)) {} + cursor_(nullptr) {} virtual ~HScheduler() {} void Schedule(HGraph* graph); @@ -454,8 +443,9 @@ class HScheduler { virtual bool IsSchedulingBarrier(const HInstruction* instruction) const; protected: - void Schedule(HBasicBlock* block); - void Schedule(SchedulingNode* scheduling_node); + void Schedule(HBasicBlock* block, const HeapLocationCollector* heap_location_collector); + void Schedule(SchedulingNode* scheduling_node, + /*inout*/ ScopedArenaVector<SchedulingNode*>* candidates); void Schedule(HInstruction* instruction); // Any instruction returning `false` via this method will prevent its @@ -476,19 +466,12 @@ class HScheduler { node->SetInternalLatency(latency_visitor_->GetLastVisitedInternalLatency()); } - ScopedArenaAllocator* const allocator_; SchedulingLatencyVisitor* const latency_visitor_; SchedulingNodeSelector* const selector_; bool only_optimize_loop_blocks_; - // We instantiate the members below as part of this class to avoid - // instantiating them locally for every chunk scheduled. - SchedulingGraph scheduling_graph_; // A pointer indicating where the next instruction to be scheduled will be inserted. HInstruction* cursor_; - // The list of candidates for scheduling. A node becomes a candidate when all - // its predecessors have been scheduled. - ScopedArenaVector<SchedulingNode*> candidates_; private: DISALLOW_COPY_AND_ASSIGN(HScheduler); @@ -508,10 +491,11 @@ class HInstructionScheduling : public HOptimization { codegen_(cg), instruction_set_(instruction_set) {} - void Run() { - Run(/*only_optimize_loop_blocks*/ true, /*schedule_randomly*/ false); + bool Run() override { + return Run(/*only_optimize_loop_blocks*/ true, /*schedule_randomly*/ false); } - void Run(bool only_optimize_loop_blocks, bool schedule_randomly); + + bool Run(bool only_optimize_loop_blocks, bool schedule_randomly); static constexpr const char* kInstructionSchedulingPassName = "scheduler"; diff --git a/compiler/optimizing/scheduler_arm.cc b/compiler/optimizing/scheduler_arm.cc index 8dcadaad2e..858a555e97 100644 --- a/compiler/optimizing/scheduler_arm.cc +++ b/compiler/optimizing/scheduler_arm.cc @@ -563,7 +563,7 @@ void SchedulingLatencyVisitorARM::HandleGenerateDataProc(HDataProcWithShifterOp* last_visited_internal_latency_ = kArmIntegerOpLatency; last_visited_latency_ = kArmIntegerOpLatency; } else { - HandleGenerateDataProcInstruction(/* internal_latency */ true); + HandleGenerateDataProcInstruction(/* internal_latency= */ true); HandleGenerateDataProcInstruction(); } } @@ -585,8 +585,8 @@ void SchedulingLatencyVisitorARM::HandleGenerateLongDataProc(HDataProcWithShifte DCHECK_LT(shift_value, 32U); if (kind == HInstruction::kOr || kind == HInstruction::kXor) { - HandleGenerateDataProcInstruction(/* internal_latency */ true); - HandleGenerateDataProcInstruction(/* internal_latency */ true); + HandleGenerateDataProcInstruction(/* internal_latency= */ true); + HandleGenerateDataProcInstruction(/* internal_latency= */ true); HandleGenerateDataProcInstruction(); } else { last_visited_internal_latency_ += 2 * kArmIntegerOpLatency; @@ -679,7 +679,7 @@ void SchedulingLatencyVisitorARM::VisitArrayGet(HArrayGet* instruction) { } else { last_visited_internal_latency_ += kArmIntegerOpLatency; } - last_visited_internal_latency_ = kArmMemoryLoadLatency; + last_visited_latency_ = kArmMemoryLoadLatency; } } break; diff --git a/compiler/optimizing/scheduler_arm.h b/compiler/optimizing/scheduler_arm.h index 0cb8684376..4c7a3bb4d6 100644 --- a/compiler/optimizing/scheduler_arm.h +++ b/compiler/optimizing/scheduler_arm.h @@ -55,7 +55,7 @@ class SchedulingLatencyVisitorARM : public SchedulingLatencyVisitor { : codegen_(down_cast<CodeGeneratorARMType*>(codegen)) {} // Default visitor for instructions not handled specifically below. - void VisitInstruction(HInstruction* ATTRIBUTE_UNUSED) { + void VisitInstruction(HInstruction* ATTRIBUTE_UNUSED) override { last_visited_latency_ = kArmIntegerOpLatency; } @@ -100,7 +100,7 @@ class SchedulingLatencyVisitorARM : public SchedulingLatencyVisitor { M(DataProcWithShifterOp, unused) #define DECLARE_VISIT_INSTRUCTION(type, unused) \ - void Visit##type(H##type* instruction) OVERRIDE; + void Visit##type(H##type* instruction) override; FOR_EACH_SCHEDULED_ARM_INSTRUCTION(DECLARE_VISIT_INSTRUCTION) FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(DECLARE_VISIT_INSTRUCTION) @@ -137,13 +137,12 @@ class SchedulingLatencyVisitorARM : public SchedulingLatencyVisitor { class HSchedulerARM : public HScheduler { public: - HSchedulerARM(ScopedArenaAllocator* allocator, - SchedulingNodeSelector* selector, + HSchedulerARM(SchedulingNodeSelector* selector, SchedulingLatencyVisitorARM* arm_latency_visitor) - : HScheduler(allocator, arm_latency_visitor, selector) {} - ~HSchedulerARM() OVERRIDE {} + : HScheduler(arm_latency_visitor, selector) {} + ~HSchedulerARM() override {} - bool IsSchedulable(const HInstruction* instruction) const OVERRIDE { + bool IsSchedulable(const HInstruction* instruction) const override { #define CASE_INSTRUCTION_KIND(type, unused) case \ HInstruction::InstructionKind::k##type: switch (instruction->GetKind()) { diff --git a/compiler/optimizing/scheduler_arm64.h b/compiler/optimizing/scheduler_arm64.h index f71cb5b784..ba5a743545 100644 --- a/compiler/optimizing/scheduler_arm64.h +++ b/compiler/optimizing/scheduler_arm64.h @@ -58,7 +58,7 @@ static constexpr uint32_t kArm64SIMDTypeConversionInt2FPLatency = 10; class SchedulingLatencyVisitorARM64 : public SchedulingLatencyVisitor { public: // Default visitor for instructions not handled specifically below. - void VisitInstruction(HInstruction* ATTRIBUTE_UNUSED) { + void VisitInstruction(HInstruction* ATTRIBUTE_UNUSED) override { last_visited_latency_ = kArm64IntegerOpLatency; } @@ -68,12 +68,10 @@ class SchedulingLatencyVisitorARM64 : public SchedulingLatencyVisitor { M(ArrayGet , unused) \ M(ArrayLength , unused) \ M(ArraySet , unused) \ - M(BinaryOperation , unused) \ M(BoundsCheck , unused) \ M(Div , unused) \ M(InstanceFieldGet , unused) \ M(InstanceOf , unused) \ - M(Invoke , unused) \ M(LoadString , unused) \ M(Mul , unused) \ M(NewArray , unused) \ @@ -108,6 +106,10 @@ class SchedulingLatencyVisitorARM64 : public SchedulingLatencyVisitor { M(VecLoad , unused) \ M(VecStore , unused) +#define FOR_EACH_SCHEDULED_ABSTRACT_INSTRUCTION(M) \ + M(BinaryOperation , unused) \ + M(Invoke , unused) + #define FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(M) \ M(BitwiseNegatedRight, unused) \ M(MultiplyAccumulate, unused) \ @@ -116,9 +118,10 @@ class SchedulingLatencyVisitorARM64 : public SchedulingLatencyVisitor { M(DataProcWithShifterOp, unused) #define DECLARE_VISIT_INSTRUCTION(type, unused) \ - void Visit##type(H##type* instruction) OVERRIDE; + void Visit##type(H##type* instruction) override; FOR_EACH_SCHEDULED_COMMON_INSTRUCTION(DECLARE_VISIT_INSTRUCTION) + FOR_EACH_SCHEDULED_ABSTRACT_INSTRUCTION(DECLARE_VISIT_INSTRUCTION) FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(DECLARE_VISIT_INSTRUCTION) FOR_EACH_CONCRETE_INSTRUCTION_ARM64(DECLARE_VISIT_INSTRUCTION) @@ -131,11 +134,11 @@ class SchedulingLatencyVisitorARM64 : public SchedulingLatencyVisitor { class HSchedulerARM64 : public HScheduler { public: - HSchedulerARM64(ScopedArenaAllocator* allocator, SchedulingNodeSelector* selector) - : HScheduler(allocator, &arm64_latency_visitor_, selector) {} - ~HSchedulerARM64() OVERRIDE {} + explicit HSchedulerARM64(SchedulingNodeSelector* selector) + : HScheduler(&arm64_latency_visitor_, selector) {} + ~HSchedulerARM64() override {} - bool IsSchedulable(const HInstruction* instruction) const OVERRIDE { + bool IsSchedulable(const HInstruction* instruction) const override { #define CASE_INSTRUCTION_KIND(type, unused) case \ HInstruction::InstructionKind::k##type: switch (instruction->GetKind()) { @@ -157,7 +160,7 @@ class HSchedulerARM64 : public HScheduler { // SIMD&FP registers are callee saved) so don't reorder such vector instructions. // // TODO: remove this when a proper support of SIMD registers is introduced to the compiler. - bool IsSchedulingBarrier(const HInstruction* instr) const OVERRIDE { + bool IsSchedulingBarrier(const HInstruction* instr) const override { return HScheduler::IsSchedulingBarrier(instr) || instr->IsVecReduce() || instr->IsVecExtractScalar() || diff --git a/compiler/optimizing/scheduler_test.cc b/compiler/optimizing/scheduler_test.cc index fb15fc8975..e0e265a04c 100644 --- a/compiler/optimizing/scheduler_test.cc +++ b/compiler/optimizing/scheduler_test.cc @@ -146,7 +146,9 @@ class SchedulerTest : public OptimizingUnitTest { environment->SetRawEnvAt(1, mul); mul->AddEnvUseAt(div_check->GetEnvironment(), 1); - SchedulingGraph scheduling_graph(scheduler, GetScopedAllocator()); + SchedulingGraph scheduling_graph(scheduler, + GetScopedAllocator(), + /* heap_location_collector= */ nullptr); // Instructions must be inserted in reverse order into the scheduling graph. for (HInstruction* instr : ReverseRange(block_instructions)) { scheduling_graph.AddNode(instr); @@ -169,7 +171,9 @@ class SchedulerTest : public OptimizingUnitTest { ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(array_set1, array_get1)); ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(array_set2, array_get2)); ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(array_get2, array_set1)); - ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(array_set2, array_set1)); + // Unnecessary dependency is not stored, we rely on transitive dependencies. + // The array_set2 -> array_get2 -> array_set1 dependencies are tested above. + ASSERT_FALSE(scheduling_graph.HasImmediateOtherDependency(array_set2, array_set1)); // Env dependency. ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(div_check, mul)); @@ -192,7 +196,9 @@ class SchedulerTest : public OptimizingUnitTest { HInstructionScheduling scheduling(graph, target_config.GetInstructionSet()); scheduling.Run(/*only_optimize_loop_blocks*/ false, /*schedule_randomly*/ true); + OverrideInstructionSetFeatures(target_config.GetInstructionSet(), "default"); RunCode(target_config, + *compiler_options_, graph, [](HGraph* graph_arg) { RemoveSuspendChecks(graph_arg); }, has_result, expected); @@ -274,11 +280,10 @@ class SchedulerTest : public OptimizingUnitTest { entry->AddInstruction(instr); } - SchedulingGraph scheduling_graph(scheduler, GetScopedAllocator()); HeapLocationCollector heap_location_collector(graph_); heap_location_collector.VisitBasicBlock(entry); heap_location_collector.BuildAliasingMatrix(); - scheduling_graph.SetHeapLocationCollector(heap_location_collector); + SchedulingGraph scheduling_graph(scheduler, GetScopedAllocator(), &heap_location_collector); for (HInstruction* instr : ReverseRange(block_instructions)) { // Build scheduling graph with memory access aliasing information @@ -296,47 +301,53 @@ class SchedulerTest : public OptimizingUnitTest { size_t loc2 = HeapLocationCollector::kHeapLocationNotFound; // Test side effect dependency: array[0] and array[1] - loc1 = heap_location_collector.GetArrayHeapLocation(arr, c0); - loc2 = heap_location_collector.GetArrayHeapLocation(arr, c1); + loc1 = heap_location_collector.GetArrayHeapLocation(arr_set_0); + loc2 = heap_location_collector.GetArrayHeapLocation(arr_set_1); ASSERT_FALSE(heap_location_collector.MayAlias(loc1, loc2)); ASSERT_FALSE(scheduling_graph.HasImmediateOtherDependency(arr_set_1, arr_set_0)); // Test side effect dependency based on LSA analysis: array[i] and array[j] - loc1 = heap_location_collector.GetArrayHeapLocation(arr, i); - loc2 = heap_location_collector.GetArrayHeapLocation(arr, j); + loc1 = heap_location_collector.GetArrayHeapLocation(arr_set_i); + loc2 = heap_location_collector.GetArrayHeapLocation(arr_set_j); ASSERT_TRUE(heap_location_collector.MayAlias(loc1, loc2)); - ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(arr_set_j, arr_set_i)); + // Unnecessary dependency is not stored, we rely on transitive dependencies. + // The arr_set_j -> arr_set_sub0 -> arr_set_add0 -> arr_set_i dependencies are tested below. + ASSERT_FALSE(scheduling_graph.HasImmediateOtherDependency(arr_set_j, arr_set_i)); // Test side effect dependency based on LSA analysis: array[i] and array[i+0] - loc1 = heap_location_collector.GetArrayHeapLocation(arr, i); - loc2 = heap_location_collector.GetArrayHeapLocation(arr, add0); + loc1 = heap_location_collector.GetArrayHeapLocation(arr_set_i); + loc2 = heap_location_collector.GetArrayHeapLocation(arr_set_add0); ASSERT_TRUE(heap_location_collector.MayAlias(loc1, loc2)); ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(arr_set_add0, arr_set_i)); // Test side effect dependency based on LSA analysis: array[i] and array[i-0] - loc1 = heap_location_collector.GetArrayHeapLocation(arr, i); - loc2 = heap_location_collector.GetArrayHeapLocation(arr, sub0); + loc1 = heap_location_collector.GetArrayHeapLocation(arr_set_i); + loc2 = heap_location_collector.GetArrayHeapLocation(arr_set_sub0); ASSERT_TRUE(heap_location_collector.MayAlias(loc1, loc2)); - ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(arr_set_sub0, arr_set_i)); + // Unnecessary dependency is not stored, we rely on transitive dependencies. + ASSERT_FALSE(scheduling_graph.HasImmediateOtherDependency(arr_set_sub0, arr_set_i)); + // Instead, we rely on arr_set_sub0 -> arr_set_add0 -> arr_set_i, the latter is tested above. + ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(arr_set_sub0, arr_set_add0)); // Test side effect dependency based on LSA analysis: array[i] and array[i+1] - loc1 = heap_location_collector.GetArrayHeapLocation(arr, i); - loc2 = heap_location_collector.GetArrayHeapLocation(arr, add1); + loc1 = heap_location_collector.GetArrayHeapLocation(arr_set_i); + loc2 = heap_location_collector.GetArrayHeapLocation(arr_set_add1); ASSERT_FALSE(heap_location_collector.MayAlias(loc1, loc2)); ASSERT_FALSE(scheduling_graph.HasImmediateOtherDependency(arr_set_add1, arr_set_i)); // Test side effect dependency based on LSA analysis: array[i+1] and array[i-1] - loc1 = heap_location_collector.GetArrayHeapLocation(arr, add1); - loc2 = heap_location_collector.GetArrayHeapLocation(arr, sub1); + loc1 = heap_location_collector.GetArrayHeapLocation(arr_set_add1); + loc2 = heap_location_collector.GetArrayHeapLocation(arr_set_sub1); ASSERT_FALSE(heap_location_collector.MayAlias(loc1, loc2)); ASSERT_FALSE(scheduling_graph.HasImmediateOtherDependency(arr_set_sub1, arr_set_add1)); // Test side effect dependency based on LSA analysis: array[j] and all others array accesses - ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(arr_set_j, arr_set_i)); - ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(arr_set_j, arr_set_add0)); ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(arr_set_j, arr_set_sub0)); ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(arr_set_j, arr_set_add1)); ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(arr_set_j, arr_set_sub1)); + // Unnecessary dependencies are not stored, we rely on transitive dependencies. + ASSERT_FALSE(scheduling_graph.HasImmediateOtherDependency(arr_set_j, arr_set_i)); + ASSERT_FALSE(scheduling_graph.HasImmediateOtherDependency(arr_set_j, arr_set_add0)); // Test that ArraySet and FieldSet should not have side effect dependency ASSERT_FALSE(scheduling_graph.HasImmediateOtherDependency(arr_set_i, set_field10)); @@ -352,13 +363,13 @@ class SchedulerTest : public OptimizingUnitTest { #if defined(ART_ENABLE_CODEGEN_arm64) TEST_F(SchedulerTest, DependencyGraphAndSchedulerARM64) { CriticalPathSchedulingNodeSelector critical_path_selector; - arm64::HSchedulerARM64 scheduler(GetScopedAllocator(), &critical_path_selector); + arm64::HSchedulerARM64 scheduler(&critical_path_selector); TestBuildDependencyGraphAndSchedule(&scheduler); } TEST_F(SchedulerTest, ArrayAccessAliasingARM64) { CriticalPathSchedulingNodeSelector critical_path_selector; - arm64::HSchedulerARM64 scheduler(GetScopedAllocator(), &critical_path_selector); + arm64::HSchedulerARM64 scheduler(&critical_path_selector); TestDependencyGraphOnAliasingArrayAccesses(&scheduler); } #endif @@ -367,14 +378,14 @@ TEST_F(SchedulerTest, ArrayAccessAliasingARM64) { TEST_F(SchedulerTest, DependencyGraphAndSchedulerARM) { CriticalPathSchedulingNodeSelector critical_path_selector; arm::SchedulingLatencyVisitorARM arm_latency_visitor(/*CodeGenerator*/ nullptr); - arm::HSchedulerARM scheduler(GetScopedAllocator(), &critical_path_selector, &arm_latency_visitor); + arm::HSchedulerARM scheduler(&critical_path_selector, &arm_latency_visitor); TestBuildDependencyGraphAndSchedule(&scheduler); } TEST_F(SchedulerTest, ArrayAccessAliasingARM) { CriticalPathSchedulingNodeSelector critical_path_selector; arm::SchedulingLatencyVisitorARM arm_latency_visitor(/*CodeGenerator*/ nullptr); - arm::HSchedulerARM scheduler(GetScopedAllocator(), &critical_path_selector, &arm_latency_visitor); + arm::HSchedulerARM scheduler(&critical_path_selector, &arm_latency_visitor); TestDependencyGraphOnAliasingArrayAccesses(&scheduler); } #endif diff --git a/compiler/optimizing/select_generator.cc b/compiler/optimizing/select_generator.cc index 66e51421ca..dcc7f77fc2 100644 --- a/compiler/optimizing/select_generator.cc +++ b/compiler/optimizing/select_generator.cc @@ -16,6 +16,7 @@ #include "select_generator.h" +#include "base/scoped_arena_containers.h" #include "reference_type_propagation.h" namespace art { @@ -43,12 +44,18 @@ static bool IsSimpleBlock(HBasicBlock* block) { for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { HInstruction* instruction = it.Current(); if (instruction->IsControlFlow()) { - if (num_instructions > kMaxInstructionsInBranch) { - return false; - } return instruction->IsGoto() || instruction->IsReturn(); - } else if (instruction->CanBeMoved() && !instruction->HasSideEffects()) { - num_instructions++; + } else if (instruction->CanBeMoved() && + !instruction->HasSideEffects() && + !instruction->CanThrow()) { + if (instruction->IsSelect() && + instruction->AsSelect()->GetCondition()->GetBlock() == block) { + // Count one HCondition and HSelect in the same block as a single instruction. + // This enables finding nested selects. + continue; + } else if (++num_instructions > kMaxInstructionsInBranch) { + return false; // bail as soon as we exceed number of allowed instructions + } } else { return false; } @@ -85,10 +92,15 @@ static HPhi* GetSingleChangedPhi(HBasicBlock* block, size_t index1, size_t index return select_phi; } -void HSelectGenerator::Run() { +bool HSelectGenerator::Run() { + bool didSelect = false; + // Select cache with local allocator. + ScopedArenaAllocator allocator(graph_->GetArenaStack()); + ScopedArenaSafeMap<HInstruction*, HSelect*> cache( + std::less<HInstruction*>(), allocator.Adapter(kArenaAllocSelectGenerator)); + // Iterate in post order in the unlikely case that removing one occurrence of // the selection pattern empties a branch block of another occurrence. - // Otherwise the order does not matter. for (HBasicBlock* block : graph_->GetPostOrder()) { if (!block->EndsWithIf()) continue; @@ -97,6 +109,7 @@ void HSelectGenerator::Run() { HBasicBlock* true_block = if_instruction->IfTrueSuccessor(); HBasicBlock* false_block = if_instruction->IfFalseSuccessor(); DCHECK_NE(true_block, false_block); + if (!IsSimpleBlock(true_block) || !IsSimpleBlock(false_block) || !BlocksMergeTogether(true_block, false_block)) { @@ -107,11 +120,15 @@ void HSelectGenerator::Run() { // If the branches are not empty, move instructions in front of the If. // TODO(dbrazdil): This puts an instruction between If and its condition. // Implement moving of conditions to first users if possible. - if (!true_block->IsSingleGoto() && !true_block->IsSingleReturn()) { - true_block->GetFirstInstruction()->MoveBefore(if_instruction); + while (!true_block->IsSingleGoto() && !true_block->IsSingleReturn()) { + HInstruction* instr = true_block->GetFirstInstruction(); + DCHECK(!instr->CanThrow()); + instr->MoveBefore(if_instruction); } - if (!false_block->IsSingleGoto() && !false_block->IsSingleReturn()) { - false_block->GetFirstInstruction()->MoveBefore(if_instruction); + while (!false_block->IsSingleGoto() && !false_block->IsSingleReturn()) { + HInstruction* instr = false_block->GetFirstInstruction(); + DCHECK(!instr->CanThrow()); + instr->MoveBefore(if_instruction); } DCHECK(true_block->IsSingleGoto() || true_block->IsSingleReturn()); DCHECK(false_block->IsSingleGoto() || false_block->IsSingleReturn()); @@ -138,7 +155,8 @@ void HSelectGenerator::Run() { DCHECK(both_successors_return || phi != nullptr); // Create the Select instruction and insert it in front of the If. - HSelect* select = new (graph_->GetAllocator()) HSelect(if_instruction->InputAt(0), + HInstruction* condition = if_instruction->InputAt(0); + HSelect* select = new (graph_->GetAllocator()) HSelect(condition, true_value, false_value, if_instruction->GetDexPc()); @@ -175,12 +193,34 @@ void HSelectGenerator::Run() { MaybeRecordStat(stats_, MethodCompilationStat::kSelectGenerated); + // Very simple way of finding common subexpressions in the generated HSelect statements + // (since this runs after GVN). Lookup by condition, and reuse latest one if possible + // (due to post order, latest select is most likely replacement). If needed, we could + // improve this by e.g. using the operands in the map as well. + auto it = cache.find(condition); + if (it == cache.end()) { + cache.Put(condition, select); + } else { + // Found cached value. See if latest can replace cached in the HIR. + HSelect* cached = it->second; + DCHECK_EQ(cached->GetCondition(), select->GetCondition()); + if (cached->GetTrueValue() == select->GetTrueValue() && + cached->GetFalseValue() == select->GetFalseValue() && + select->StrictlyDominates(cached)) { + cached->ReplaceWith(select); + cached->GetBlock()->RemoveInstruction(cached); + } + it->second = select; // always cache latest + } + // No need to update dominance information, as we are simplifying // a simple diamond shape, where the join block is merged with the // entry block. Any following blocks would have had the join block // as a dominator, and `MergeWith` handles changing that to the // entry block. + didSelect = true; } + return didSelect; } } // namespace art diff --git a/compiler/optimizing/select_generator.h b/compiler/optimizing/select_generator.h index bda57fd5c8..2889166f60 100644 --- a/compiler/optimizing/select_generator.h +++ b/compiler/optimizing/select_generator.h @@ -68,7 +68,7 @@ class HSelectGenerator : public HOptimization { OptimizingCompilerStats* stats, const char* name = kSelectGeneratorPassName); - void Run() OVERRIDE; + bool Run() override; static constexpr const char* kSelectGeneratorPassName = "select_generator"; diff --git a/compiler/optimizing/select_generator_test.cc b/compiler/optimizing/select_generator_test.cc new file mode 100644 index 0000000000..6e6549737c --- /dev/null +++ b/compiler/optimizing/select_generator_test.cc @@ -0,0 +1,96 @@ +/* + * Copyright (C) 2018 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "select_generator.h" + +#include "base/arena_allocator.h" +#include "builder.h" +#include "nodes.h" +#include "optimizing_unit_test.h" +#include "side_effects_analysis.h" + +namespace art { + +class SelectGeneratorTest : public ImprovedOptimizingUnitTest { + public: + void ConstructBasicGraphForSelect(HInstruction* instr) { + HBasicBlock* if_block = new (GetAllocator()) HBasicBlock(graph_); + HBasicBlock* then_block = new (GetAllocator()) HBasicBlock(graph_); + HBasicBlock* else_block = new (GetAllocator()) HBasicBlock(graph_); + + graph_->AddBlock(if_block); + graph_->AddBlock(then_block); + graph_->AddBlock(else_block); + + entry_block_->ReplaceSuccessor(return_block_, if_block); + + if_block->AddSuccessor(then_block); + if_block->AddSuccessor(else_block); + then_block->AddSuccessor(return_block_); + else_block->AddSuccessor(return_block_); + + HParameterValue* bool_param = new (GetAllocator()) HParameterValue(graph_->GetDexFile(), + dex::TypeIndex(0), + 1, + DataType::Type::kBool); + entry_block_->AddInstruction(bool_param); + HIntConstant* const1 = graph_->GetIntConstant(1); + + if_block->AddInstruction(new (GetAllocator()) HIf(bool_param)); + + then_block->AddInstruction(instr); + then_block->AddInstruction(new (GetAllocator()) HGoto()); + + else_block->AddInstruction(new (GetAllocator()) HGoto()); + + HPhi* phi = new (GetAllocator()) HPhi(GetAllocator(), 0, 0, DataType::Type::kInt32); + return_block_->AddPhi(phi); + phi->AddInput(instr); + phi->AddInput(const1); + } + + bool CheckGraphAndTrySelectGenerator() { + graph_->BuildDominatorTree(); + EXPECT_TRUE(CheckGraph()); + + SideEffectsAnalysis side_effects(graph_); + side_effects.Run(); + return HSelectGenerator(graph_, /*handles*/ nullptr, /*stats*/ nullptr).Run(); + } +}; + +// HDivZeroCheck might throw and should not be hoisted from the conditional to an unconditional. +TEST_F(SelectGeneratorTest, testZeroCheck) { + InitGraph(); + HDivZeroCheck* instr = new (GetAllocator()) HDivZeroCheck(parameter_, 0); + ConstructBasicGraphForSelect(instr); + + ArenaVector<HInstruction*> current_locals({parameter_, graph_->GetIntConstant(1)}, + GetAllocator()->Adapter(kArenaAllocInstruction)); + ManuallyBuildEnvFor(instr, ¤t_locals); + + EXPECT_FALSE(CheckGraphAndTrySelectGenerator()); +} + +// Test that SelectGenerator succeeds with HAdd. +TEST_F(SelectGeneratorTest, testAdd) { + InitGraph(); + HAdd* instr = new (GetAllocator()) HAdd(DataType::Type::kInt32, parameter_, parameter_, 0); + ConstructBasicGraphForSelect(instr); + EXPECT_TRUE(CheckGraphAndTrySelectGenerator()); +} + +} // namespace art diff --git a/compiler/optimizing/sharpening.cc b/compiler/optimizing/sharpening.cc index 1e49411c72..8637db13ad 100644 --- a/compiler/optimizing/sharpening.cc +++ b/compiler/optimizing/sharpening.cc @@ -21,7 +21,6 @@ #include "base/enums.h" #include "class_linker.h" #include "code_generator.h" -#include "driver/compiler_driver.h" #include "driver/compiler_options.h" #include "driver/dex_compilation_unit.h" #include "gc/heap.h" @@ -36,23 +35,6 @@ namespace art { -void HSharpening::Run() { - // We don't care about the order of the blocks here. - for (HBasicBlock* block : graph_->GetReversePostOrder()) { - for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { - HInstruction* instruction = it.Current(); - if (instruction->IsInvokeStaticOrDirect()) { - SharpenInvokeStaticOrDirect(instruction->AsInvokeStaticOrDirect(), - codegen_, - compiler_driver_); - } - // TODO: Move the sharpening of invoke-virtual/-interface/-super from HGraphBuilder - // here. Rewrite it to avoid the CompilerDriver's reliance on verifier data - // because we know the type better when inlining. - } - } -} - static bool IsInBootImage(ArtMethod* method) { const std::vector<gc::space::ImageSpace*>& image_spaces = Runtime::Current()->GetHeap()->GetBootImageSpaces(); @@ -65,34 +47,23 @@ static bool IsInBootImage(ArtMethod* method) { return false; } -static bool AOTCanEmbedMethod(ArtMethod* method, const CompilerOptions& options) { - return IsInBootImage(method) && !options.GetCompilePic(); -} - -static bool BootImageAOTCanEmbedMethod(ArtMethod* method, CompilerDriver* compiler_driver) { - DCHECK(compiler_driver->GetCompilerOptions().IsBootImage()); - if (!compiler_driver->GetSupportBootImageFixup()) { - return false; - } +static bool BootImageAOTCanEmbedMethod(ArtMethod* method, const CompilerOptions& compiler_options) { + DCHECK(compiler_options.IsBootImage()); ScopedObjectAccess soa(Thread::Current()); ObjPtr<mirror::Class> klass = method->GetDeclaringClass(); DCHECK(klass != nullptr); const DexFile& dex_file = klass->GetDexFile(); - return compiler_driver->IsImageClass(dex_file.StringByTypeIdx(klass->GetDexTypeIndex())); + return compiler_options.IsImageClass(dex_file.StringByTypeIdx(klass->GetDexTypeIndex())); } -void HSharpening::SharpenInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke, - CodeGenerator* codegen, - CompilerDriver* compiler_driver) { - if (invoke->IsStringInit()) { - // Not using the dex cache arrays. But we could still try to use a better dispatch... - // TODO: Use direct_method and direct_code for the appropriate StringFactory method. - return; +HInvokeStaticOrDirect::DispatchInfo HSharpening::SharpenInvokeStaticOrDirect( + ArtMethod* callee, CodeGenerator* codegen) { + if (kIsDebugBuild) { + ScopedObjectAccess soa(Thread::Current()); // Required for GetDeclaringClass below. + DCHECK(callee != nullptr); + DCHECK(!(callee->IsConstructor() && callee->GetDeclaringClass()->IsStringClass())); } - ArtMethod* callee = invoke->GetResolvedMethod(); - DCHECK(callee != nullptr); - HInvokeStaticOrDirect::MethodLoadKind method_load_kind; HInvokeStaticOrDirect::CodePtrLocation code_ptr_location; uint64_t method_load_data = 0u; @@ -110,23 +81,34 @@ void HSharpening::SharpenInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke, // We don't optimize for debuggable as it would prevent us from obsoleting the method in some // situations. + const CompilerOptions& compiler_options = codegen->GetCompilerOptions(); if (callee == codegen->GetGraph()->GetArtMethod() && !codegen->GetGraph()->IsDebuggable()) { // Recursive call. method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kRecursive; code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallSelf; - } else if (Runtime::Current()->UseJitCompilation() || - AOTCanEmbedMethod(callee, codegen->GetCompilerOptions())) { + } else if (compiler_options.IsBootImage()) { + if (!compiler_options.GetCompilePic()) { + // Test configuration, do not sharpen. + method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall; + } else if (BootImageAOTCanEmbedMethod(callee, compiler_options)) { + method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative; + } else { + // Use PC-relative access to the .bss methods array. + method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kBssEntry; + } + code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod; + } else if (Runtime::Current()->UseJitCompilation()) { // JIT or on-device AOT compilation referencing a boot image method. // Use the method address directly. - method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress; + method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kJitDirectAddress; method_load_data = reinterpret_cast<uintptr_t>(callee); code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod; - } else if (codegen->GetCompilerOptions().IsBootImage() && - BootImageAOTCanEmbedMethod(callee, compiler_driver)) { - method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative; + } else if (IsInBootImage(callee)) { + // Use PC-relative access to the .data.bimg.rel.ro methods array. + method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kBootImageRelRo; code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod; } else { - // Use PC-relative access to the .bss methods arrays. + // Use PC-relative access to the .bss methods array. method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kBssEntry; code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod; } @@ -140,15 +122,12 @@ void HSharpening::SharpenInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke, HInvokeStaticOrDirect::DispatchInfo desired_dispatch_info = { method_load_kind, code_ptr_location, method_load_data }; - HInvokeStaticOrDirect::DispatchInfo dispatch_info = - codegen->GetSupportedInvokeStaticOrDirectDispatch(desired_dispatch_info, invoke); - invoke->SetDispatchInfo(dispatch_info); + return codegen->GetSupportedInvokeStaticOrDirectDispatch(desired_dispatch_info, callee); } HLoadClass::LoadKind HSharpening::ComputeLoadClassKind( HLoadClass* load_class, CodeGenerator* codegen, - CompilerDriver* compiler_driver, const DexCompilationUnit& dex_compilation_unit) { Handle<mirror::Class> klass = load_class->GetClass(); DCHECK(load_class->GetLoadKind() == HLoadClass::LoadKind::kRuntimeCall || @@ -172,29 +151,29 @@ HLoadClass::LoadKind HSharpening::ComputeLoadClassKind( bool is_in_boot_image = false; HLoadClass::LoadKind desired_load_kind = HLoadClass::LoadKind::kInvalid; Runtime* runtime = Runtime::Current(); - if (codegen->GetCompilerOptions().IsBootImage()) { + const CompilerOptions& compiler_options = codegen->GetCompilerOptions(); + if (compiler_options.IsBootImage()) { // Compiling boot image. Check if the class is a boot image class. DCHECK(!runtime->UseJitCompilation()); - if (!compiler_driver->GetSupportBootImageFixup()) { - // compiler_driver_test. Do not sharpen. + if (!compiler_options.GetCompilePic()) { + // Test configuration, do not sharpen. desired_load_kind = HLoadClass::LoadKind::kRuntimeCall; } else if ((klass != nullptr) && - compiler_driver->IsImageClass(dex_file.StringByTypeIdx(type_index))) { + compiler_options.IsImageClass(dex_file.StringByTypeIdx(type_index))) { is_in_boot_image = true; desired_load_kind = HLoadClass::LoadKind::kBootImageLinkTimePcRelative; } else { // Not a boot image class. - DCHECK(ContainsElement(compiler_driver->GetDexFilesForOatFile(), &dex_file)); + DCHECK(ContainsElement(compiler_options.GetDexFilesForOatFile(), &dex_file)); desired_load_kind = HLoadClass::LoadKind::kBssEntry; } } else { is_in_boot_image = (klass != nullptr) && runtime->GetHeap()->ObjectIsInBootImageSpace(klass.Get()); if (runtime->UseJitCompilation()) { - DCHECK(!codegen->GetCompilerOptions().GetCompilePic()); + DCHECK(!compiler_options.GetCompilePic()); if (is_in_boot_image) { - // TODO: Use direct pointers for all non-moving spaces, not just boot image. Bug: 29530787 - desired_load_kind = HLoadClass::LoadKind::kBootImageAddress; + desired_load_kind = HLoadClass::LoadKind::kJitBootImageAddress; } else if (klass != nullptr) { desired_load_kind = HLoadClass::LoadKind::kJitTableAddress; } else { @@ -206,11 +185,7 @@ HLoadClass::LoadKind HSharpening::ComputeLoadClassKind( } } else if (is_in_boot_image) { // AOT app compilation, boot image class. - if (codegen->GetCompilerOptions().GetCompilePic()) { - desired_load_kind = HLoadClass::LoadKind::kBootImageClassTable; - } else { - desired_load_kind = HLoadClass::LoadKind::kBootImageAddress; - } + desired_load_kind = HLoadClass::LoadKind::kBootImageRelRo; } else { // Not JIT and the klass is not in boot image. desired_load_kind = HLoadClass::LoadKind::kBssEntry; @@ -236,10 +211,75 @@ HLoadClass::LoadKind HSharpening::ComputeLoadClassKind( return load_kind; } +static inline bool CanUseTypeCheckBitstring(ObjPtr<mirror::Class> klass, CodeGenerator* codegen) + REQUIRES_SHARED(Locks::mutator_lock_) { + DCHECK(!klass->IsProxyClass()); + DCHECK(!klass->IsArrayClass()); + + if (Runtime::Current()->UseJitCompilation()) { + // If we're JITting, try to assign a type check bitstring (fall through). + } else if (codegen->GetCompilerOptions().IsBootImage()) { + const char* descriptor = klass->GetDexFile().StringByTypeIdx(klass->GetDexTypeIndex()); + if (!codegen->GetCompilerOptions().IsImageClass(descriptor)) { + return false; + } + // If the target is a boot image class, try to assign a type check bitstring (fall through). + // (If --force-determinism, this was already done; repeating is OK and yields the same result.) + } else { + // TODO: Use the bitstring also for AOT app compilation if the target class has a bitstring + // already assigned in the boot image. + return false; + } + + // Try to assign a type check bitstring. + MutexLock subtype_check_lock(Thread::Current(), *Locks::subtype_check_lock_); + if ((false) && // FIXME: Inliner does not respect CompilerDriver::ShouldCompileMethod() + // and we're hitting an unassigned bitstring in dex2oat_image_test. b/26687569 + kIsDebugBuild && + codegen->GetCompilerOptions().IsBootImage() && + codegen->GetCompilerOptions().IsForceDeterminism()) { + SubtypeCheckInfo::State old_state = SubtypeCheck<ObjPtr<mirror::Class>>::GetState(klass); + CHECK(old_state == SubtypeCheckInfo::kAssigned || old_state == SubtypeCheckInfo::kOverflowed) + << klass->PrettyDescriptor() << "/" << old_state + << " in " << codegen->GetGraph()->PrettyMethod(); + } + SubtypeCheckInfo::State state = SubtypeCheck<ObjPtr<mirror::Class>>::EnsureAssigned(klass); + return state == SubtypeCheckInfo::kAssigned; +} + +TypeCheckKind HSharpening::ComputeTypeCheckKind(ObjPtr<mirror::Class> klass, + CodeGenerator* codegen, + bool needs_access_check) { + if (klass == nullptr) { + return TypeCheckKind::kUnresolvedCheck; + } else if (klass->IsInterface()) { + return TypeCheckKind::kInterfaceCheck; + } else if (klass->IsArrayClass()) { + if (klass->GetComponentType()->IsObjectClass()) { + return TypeCheckKind::kArrayObjectCheck; + } else if (klass->CannotBeAssignedFromOtherTypes()) { + return TypeCheckKind::kExactCheck; + } else { + return TypeCheckKind::kArrayCheck; + } + } else if (klass->IsFinal()) { // TODO: Consider using bitstring for final classes. + return TypeCheckKind::kExactCheck; + } else if (kBitstringSubtypeCheckEnabled && + !needs_access_check && + CanUseTypeCheckBitstring(klass, codegen)) { + // TODO: We should not need the `!needs_access_check` check but getting rid of that + // requires rewriting some optimizations in instruction simplifier. + return TypeCheckKind::kBitstringCheck; + } else if (klass->IsAbstract()) { + return TypeCheckKind::kAbstractClassCheck; + } else { + return TypeCheckKind::kClassHierarchyCheck; + } +} + void HSharpening::ProcessLoadString( HLoadString* load_string, CodeGenerator* codegen, - CompilerDriver* compiler_driver, const DexCompilationUnit& dex_compilation_unit, VariableSizedHandleScope* handles) { DCHECK_EQ(load_string->GetLoadKind(), HLoadString::LoadKind::kRuntimeCall); @@ -258,17 +298,33 @@ void HSharpening::ProcessLoadString( : hs.NewHandle(class_linker->FindDexCache(soa.Self(), dex_file)); ObjPtr<mirror::String> string = nullptr; - if (codegen->GetCompilerOptions().IsBootImage()) { + const CompilerOptions& compiler_options = codegen->GetCompilerOptions(); + if (compiler_options.IsBootImage()) { // Compiling boot image. Resolve the string and allocate it if needed, to ensure // the string will be added to the boot image. DCHECK(!runtime->UseJitCompilation()); - string = class_linker->ResolveString(string_index, dex_cache); - CHECK(string != nullptr); - if (compiler_driver->GetSupportBootImageFixup()) { - DCHECK(ContainsElement(compiler_driver->GetDexFilesForOatFile(), &dex_file)); - desired_load_kind = HLoadString::LoadKind::kBootImageLinkTimePcRelative; + if (compiler_options.GetCompilePic()) { + DCHECK(ContainsElement(compiler_options.GetDexFilesForOatFile(), &dex_file)); + if (compiler_options.IsForceDeterminism()) { + // Strings for methods we're compiling should be pre-resolved but Strings in inlined + // methods may not be if these inlined methods are not in the boot image profile. + // Multiple threads allocating new Strings can cause non-deterministic boot image + // because of the image relying on the order of GC roots we walk. (We could fix that + // by ordering the roots we walk in ImageWriter.) Therefore we avoid allocating these + // strings even if that results in omitting them from the boot image and using the + // sub-optimal load kind kBssEntry. + string = class_linker->LookupString(string_index, dex_cache.Get()); + } else { + string = class_linker->ResolveString(string_index, dex_cache); + CHECK(string != nullptr); + } + if (string != nullptr) { + desired_load_kind = HLoadString::LoadKind::kBootImageLinkTimePcRelative; + } else { + desired_load_kind = HLoadString::LoadKind::kBssEntry; + } } else { - // compiler_driver_test. Do not sharpen. + // Test configuration, do not sharpen. desired_load_kind = HLoadString::LoadKind::kRuntimeCall; } } else if (runtime->UseJitCompilation()) { @@ -276,7 +332,7 @@ void HSharpening::ProcessLoadString( string = class_linker->LookupString(string_index, dex_cache.Get()); if (string != nullptr) { if (runtime->GetHeap()->ObjectIsInBootImageSpace(string)) { - desired_load_kind = HLoadString::LoadKind::kBootImageAddress; + desired_load_kind = HLoadString::LoadKind::kJitBootImageAddress; } else { desired_load_kind = HLoadString::LoadKind::kJitTableAddress; } @@ -287,11 +343,7 @@ void HSharpening::ProcessLoadString( // AOT app compilation. Try to lookup the string without allocating if not found. string = class_linker->LookupString(string_index, dex_cache.Get()); if (string != nullptr && runtime->GetHeap()->ObjectIsInBootImageSpace(string)) { - if (codegen->GetCompilerOptions().GetCompilePic()) { - desired_load_kind = HLoadString::LoadKind::kBootImageInternTable; - } else { - desired_load_kind = HLoadString::LoadKind::kBootImageAddress; - } + desired_load_kind = HLoadString::LoadKind::kBootImageRelRo; } else { desired_load_kind = HLoadString::LoadKind::kBssEntry; } diff --git a/compiler/optimizing/sharpening.h b/compiler/optimizing/sharpening.h index 6df7d6d91e..b81867201f 100644 --- a/compiler/optimizing/sharpening.h +++ b/compiler/optimizing/sharpening.h @@ -23,49 +23,33 @@ namespace art { class CodeGenerator; -class CompilerDriver; class DexCompilationUnit; -// Optimization that tries to improve the way we dispatch methods and access types, -// fields, etc. Besides actual method sharpening based on receiver type (for example -// virtual->direct), this includes selecting the best available dispatch for -// invoke-static/-direct based on code generator support. -class HSharpening : public HOptimization { +// Utility methods that try to improve the way we dispatch methods, and access +// types and strings. +class HSharpening { public: - HSharpening(HGraph* graph, - CodeGenerator* codegen, - CompilerDriver* compiler_driver, - const char* name = kSharpeningPassName) - : HOptimization(graph, name), - codegen_(codegen), - compiler_driver_(compiler_driver) { } - - void Run() OVERRIDE; - - static constexpr const char* kSharpeningPassName = "sharpening"; - - // Used by the builder. - static void ProcessLoadString(HLoadString* load_string, - CodeGenerator* codegen, - CompilerDriver* compiler_driver, - const DexCompilationUnit& dex_compilation_unit, - VariableSizedHandleScope* handles); + // Used by the builder and InstructionSimplifier. + static HInvokeStaticOrDirect::DispatchInfo SharpenInvokeStaticOrDirect( + ArtMethod* callee, CodeGenerator* codegen); // Used by the builder and the inliner. static HLoadClass::LoadKind ComputeLoadClassKind(HLoadClass* load_class, CodeGenerator* codegen, - CompilerDriver* compiler_driver, const DexCompilationUnit& dex_compilation_unit) REQUIRES_SHARED(Locks::mutator_lock_); - // Used by Sharpening and InstructionSimplifier. - static void SharpenInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke, - CodeGenerator* codegen, - CompilerDriver* compiler_driver); + // Used by the builder. + static TypeCheckKind ComputeTypeCheckKind(ObjPtr<mirror::Class> klass, + CodeGenerator* codegen, + bool needs_access_check) + REQUIRES_SHARED(Locks::mutator_lock_); - private: - CodeGenerator* codegen_; - CompilerDriver* compiler_driver_; + // Used by the builder. + static void ProcessLoadString(HLoadString* load_string, + CodeGenerator* codegen, + const DexCompilationUnit& dex_compilation_unit, + VariableSizedHandleScope* handles); }; } // namespace art diff --git a/compiler/optimizing/side_effects_analysis.cc b/compiler/optimizing/side_effects_analysis.cc index 6d82e8e06d..ba97b43de9 100644 --- a/compiler/optimizing/side_effects_analysis.cc +++ b/compiler/optimizing/side_effects_analysis.cc @@ -18,7 +18,7 @@ namespace art { -void SideEffectsAnalysis::Run() { +bool SideEffectsAnalysis::Run() { // Inlining might have created more blocks, so we need to increase the size // if needed. block_effects_.resize(graph_->GetBlocks().size()); @@ -69,6 +69,7 @@ void SideEffectsAnalysis::Run() { } } has_run_ = true; + return true; } SideEffects SideEffectsAnalysis::GetLoopEffects(HBasicBlock* block) const { diff --git a/compiler/optimizing/side_effects_analysis.h b/compiler/optimizing/side_effects_analysis.h index c0f81a9c54..56a01e63f1 100644 --- a/compiler/optimizing/side_effects_analysis.h +++ b/compiler/optimizing/side_effects_analysis.h @@ -37,7 +37,7 @@ class SideEffectsAnalysis : public HOptimization { SideEffects GetBlockEffects(HBasicBlock* block) const; // Compute side effects of individual blocks and loops. - void Run(); + bool Run(); bool HasRun() const { return has_run_; } diff --git a/compiler/optimizing/side_effects_test.cc b/compiler/optimizing/side_effects_test.cc index 97317124ef..cf26e79c69 100644 --- a/compiler/optimizing/side_effects_test.cc +++ b/compiler/optimizing/side_effects_test.cc @@ -141,13 +141,13 @@ TEST(SideEffectsTest, NoDependences) { TEST(SideEffectsTest, VolatileDependences) { SideEffects volatile_write = - SideEffects::FieldWriteOfType(DataType::Type::kInt32, /* is_volatile */ true); + SideEffects::FieldWriteOfType(DataType::Type::kInt32, /* is_volatile= */ true); SideEffects any_write = - SideEffects::FieldWriteOfType(DataType::Type::kInt32, /* is_volatile */ false); + SideEffects::FieldWriteOfType(DataType::Type::kInt32, /* is_volatile= */ false); SideEffects volatile_read = - SideEffects::FieldReadOfType(DataType::Type::kInt8, /* is_volatile */ true); + SideEffects::FieldReadOfType(DataType::Type::kInt8, /* is_volatile= */ true); SideEffects any_read = - SideEffects::FieldReadOfType(DataType::Type::kInt8, /* is_volatile */ false); + SideEffects::FieldReadOfType(DataType::Type::kInt8, /* is_volatile= */ false); EXPECT_FALSE(volatile_write.MayDependOn(any_read)); EXPECT_TRUE(any_read.MayDependOn(volatile_write)); @@ -163,15 +163,15 @@ TEST(SideEffectsTest, VolatileDependences) { TEST(SideEffectsTest, SameWidthTypesNoAlias) { // Type I/F. testNoWriteAndReadDependence( - SideEffects::FieldWriteOfType(DataType::Type::kInt32, /* is_volatile */ false), - SideEffects::FieldReadOfType(DataType::Type::kFloat32, /* is_volatile */ false)); + SideEffects::FieldWriteOfType(DataType::Type::kInt32, /* is_volatile= */ false), + SideEffects::FieldReadOfType(DataType::Type::kFloat32, /* is_volatile= */ false)); testNoWriteAndReadDependence( SideEffects::ArrayWriteOfType(DataType::Type::kInt32), SideEffects::ArrayReadOfType(DataType::Type::kFloat32)); // Type L/D. testNoWriteAndReadDependence( - SideEffects::FieldWriteOfType(DataType::Type::kInt64, /* is_volatile */ false), - SideEffects::FieldReadOfType(DataType::Type::kFloat64, /* is_volatile */ false)); + SideEffects::FieldWriteOfType(DataType::Type::kInt64, /* is_volatile= */ false), + SideEffects::FieldReadOfType(DataType::Type::kFloat64, /* is_volatile= */ false)); testNoWriteAndReadDependence( SideEffects::ArrayWriteOfType(DataType::Type::kInt64), SideEffects::ArrayReadOfType(DataType::Type::kFloat64)); @@ -181,9 +181,9 @@ TEST(SideEffectsTest, AllWritesAndReads) { SideEffects s = SideEffects::None(); // Keep taking the union of different writes and reads. for (DataType::Type type : kTestTypes) { - s = s.Union(SideEffects::FieldWriteOfType(type, /* is_volatile */ false)); + s = s.Union(SideEffects::FieldWriteOfType(type, /* is_volatile= */ false)); s = s.Union(SideEffects::ArrayWriteOfType(type)); - s = s.Union(SideEffects::FieldReadOfType(type, /* is_volatile */ false)); + s = s.Union(SideEffects::FieldReadOfType(type, /* is_volatile= */ false)); s = s.Union(SideEffects::ArrayReadOfType(type)); } EXPECT_TRUE(s.DoesAllReadWrite()); @@ -202,6 +202,7 @@ TEST(SideEffectsTest, GC) { EXPECT_TRUE(depends_on_gc.MayDependOn(all_changes)); EXPECT_TRUE(depends_on_gc.Union(can_trigger_gc).MayDependOn(all_changes)); EXPECT_FALSE(can_trigger_gc.MayDependOn(all_changes)); + EXPECT_FALSE(can_trigger_gc.MayDependOn(can_trigger_gc)); EXPECT_TRUE(all_changes.Includes(can_trigger_gc)); EXPECT_FALSE(all_changes.Includes(depends_on_gc)); @@ -253,10 +254,10 @@ TEST(SideEffectsTest, BitStrings) { "||I|||||", SideEffects::ArrayReadOfType(DataType::Type::kInt32).ToString().c_str()); SideEffects s = SideEffects::None(); - s = s.Union(SideEffects::FieldWriteOfType(DataType::Type::kUint16, /* is_volatile */ false)); - s = s.Union(SideEffects::FieldWriteOfType(DataType::Type::kInt64, /* is_volatile */ false)); + s = s.Union(SideEffects::FieldWriteOfType(DataType::Type::kUint16, /* is_volatile= */ false)); + s = s.Union(SideEffects::FieldWriteOfType(DataType::Type::kInt64, /* is_volatile= */ false)); s = s.Union(SideEffects::ArrayWriteOfType(DataType::Type::kInt16)); - s = s.Union(SideEffects::FieldReadOfType(DataType::Type::kInt32, /* is_volatile */ false)); + s = s.Union(SideEffects::FieldReadOfType(DataType::Type::kInt32, /* is_volatile= */ false)); s = s.Union(SideEffects::ArrayReadOfType(DataType::Type::kFloat32)); s = s.Union(SideEffects::ArrayReadOfType(DataType::Type::kFloat64)); EXPECT_STREQ("||DF|I||S|JC|", s.ToString().c_str()); diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc index dd54468217..0d0e1ecf1f 100644 --- a/compiler/optimizing/ssa_builder.cc +++ b/compiler/optimizing/ssa_builder.cc @@ -16,6 +16,9 @@ #include "ssa_builder.h" +#include "base/arena_bit_vector.h" +#include "base/bit_vector-inl.h" +#include "base/logging.h" #include "data_type-inl.h" #include "dex/bytecode_utils.h" #include "mirror/class-inl.h" @@ -388,7 +391,7 @@ bool SsaBuilder::FixAmbiguousArrayOps() { // succeed in code validated by the verifier. HInstruction* equivalent = GetFloatOrDoubleEquivalent(value, array_type); DCHECK(equivalent != nullptr); - aset->ReplaceInput(equivalent, /* input_index */ 2); + aset->ReplaceInput(equivalent, /* index= */ 2); if (equivalent->IsPhi()) { // Returned equivalent is a phi which may not have had its inputs // replaced yet. We need to run primitive type propagation on it. @@ -415,29 +418,36 @@ bool SsaBuilder::FixAmbiguousArrayOps() { return true; } -static bool HasAliasInEnvironments(HInstruction* instruction) { - HEnvironment* last_user = nullptr; +bool SsaBuilder::HasAliasInEnvironments(HInstruction* instruction) { + ScopedArenaHashSet<size_t> seen_users( + local_allocator_->Adapter(kArenaAllocGraphBuilder)); for (const HUseListNode<HEnvironment*>& use : instruction->GetEnvUses()) { DCHECK(use.GetUser() != nullptr); - // Note: The first comparison (== null) always fails. - if (use.GetUser() == last_user) { + size_t id = use.GetUser()->GetHolder()->GetId(); + if (seen_users.find(id) != seen_users.end()) { return true; } - last_user = use.GetUser(); + seen_users.insert(id); } + return false; +} - if (kIsDebugBuild) { - // Do a quadratic search to ensure same environment uses are next - // to each other. - const HUseList<HEnvironment*>& env_uses = instruction->GetEnvUses(); - for (auto current = env_uses.begin(), end = env_uses.end(); current != end; ++current) { - auto next = current; - for (++next; next != end; ++next) { - DCHECK(next->GetUser() != current->GetUser()); - } +bool SsaBuilder::ReplaceUninitializedStringPhis() { + for (HInvoke* invoke : uninitialized_string_phis_) { + HInstruction* str = invoke->InputAt(invoke->InputCount() - 1); + if (str->IsPhi()) { + // If after redundant phi and dead phi elimination, it's still a phi that feeds + // the invoke, then we must be compiling a method with irreducible loops. Just bail. + DCHECK(graph_->HasIrreducibleLoops()); + return false; } + DCHECK(str->IsNewInstance()); + AddUninitializedString(str->AsNewInstance()); + str->ReplaceUsesDominatedBy(invoke, invoke); + str->ReplaceEnvUsesDominatedBy(invoke, invoke); + invoke->RemoveInputAt(invoke->InputCount() - 1); } - return false; + return true; } void SsaBuilder::RemoveRedundantUninitializedStrings() { @@ -452,8 +462,9 @@ void SsaBuilder::RemoveRedundantUninitializedStrings() { DCHECK(new_instance->IsStringAlloc()); // Replace NewInstance of String with NullConstant if not used prior to - // calling StringFactory. In case of deoptimization, the interpreter is - // expected to skip null check on the `this` argument of the StringFactory call. + // calling StringFactory. We check for alias environments in case of deoptimization. + // The interpreter is expected to skip null check on the `this` argument of the + // StringFactory call. if (!new_instance->HasNonEnvironmentUses() && !HasAliasInEnvironments(new_instance)) { new_instance->ReplaceWith(graph_->GetNullConstant()); new_instance->GetBlock()->RemoveInstruction(new_instance); @@ -488,35 +499,35 @@ void SsaBuilder::RemoveRedundantUninitializedStrings() { GraphAnalysisResult SsaBuilder::BuildSsa() { DCHECK(!graph_->IsInSsaForm()); - // 1) Propagate types of phis. At this point, phis are typed void in the general + // Propagate types of phis. At this point, phis are typed void in the general // case, or float/double/reference if we created an equivalent phi. So we need // to propagate the types across phis to give them a correct type. If a type // conflict is detected in this stage, the phi is marked dead. RunPrimitiveTypePropagation(); - // 2) Now that the correct primitive types have been assigned, we can get rid + // Now that the correct primitive types have been assigned, we can get rid // of redundant phis. Note that we cannot do this phase before type propagation, // otherwise we could get rid of phi equivalents, whose presence is a requirement // for the type propagation phase. Note that this is to satisfy statement (a) // of the SsaBuilder (see ssa_builder.h). SsaRedundantPhiElimination(graph_).Run(); - // 3) Fix the type for null constants which are part of an equality comparison. + // Fix the type for null constants which are part of an equality comparison. // We need to do this after redundant phi elimination, to ensure the only cases // that we can see are reference comparison against 0. The redundant phi // elimination ensures we do not see a phi taking two 0 constants in a HEqual // or HNotEqual. FixNullConstantType(); - // 4) Compute type of reference type instructions. The pass assumes that + // Compute type of reference type instructions. The pass assumes that // NullConstant has been fixed up. ReferenceTypePropagation(graph_, class_loader_, dex_cache_, handles_, - /* is_first_run */ true).Run(); + /* is_first_run= */ true).Run(); - // 5) HInstructionBuilder duplicated ArrayGet instructions with ambiguous type + // HInstructionBuilder duplicated ArrayGet instructions with ambiguous type // (int/float or long/double) and marked ArraySets with ambiguous input type. // Now that RTP computed the type of the array input, the ambiguity can be // resolved and the correct equivalents kept. @@ -524,13 +535,13 @@ GraphAnalysisResult SsaBuilder::BuildSsa() { return kAnalysisFailAmbiguousArrayOp; } - // 6) Mark dead phis. This will mark phis which are not used by instructions + // Mark dead phis. This will mark phis which are not used by instructions // or other live phis. If compiling as debuggable code, phis will also be kept // live if they have an environment use. SsaDeadPhiElimination dead_phi_elimimation(graph_); dead_phi_elimimation.MarkDeadPhis(); - // 7) Make sure environments use the right phi equivalent: a phi marked dead + // Make sure environments use the right phi equivalent: a phi marked dead // can have a phi equivalent that is not dead. In that case we have to replace // it with the live equivalent because deoptimization and try/catch rely on // environments containing values of all live vregs at that point. Note that @@ -539,14 +550,22 @@ GraphAnalysisResult SsaBuilder::BuildSsa() { // environments to just reference one. FixEnvironmentPhis(); - // 8) Now that the right phis are used for the environments, we can eliminate + // Now that the right phis are used for the environments, we can eliminate // phis we do not need. Regardless of the debuggable status, this phase is /// necessary for statement (b) of the SsaBuilder (see ssa_builder.h), as well // as for the code generation, which does not deal with phis of conflicting // input types. dead_phi_elimimation.EliminateDeadPhis(); - // 9) HInstructionBuidler replaced uses of NewInstances of String with the + // Replace Phis that feed in a String.<init> during instruction building. We + // run this after redundant and dead phi elimination to make sure the phi will have + // been replaced by the actual allocation. Only with an irreducible loop + // a phi can still be the input, in which case we bail. + if (!ReplaceUninitializedStringPhis()) { + return kAnalysisFailIrreducibleLoopAndStringInit; + } + + // HInstructionBuidler replaced uses of NewInstances of String with the // results of their corresponding StringFactory calls. Unless the String // objects are used before they are initialized, they can be replaced with // NullConstant. Note that this optimization is valid only if unsimplified diff --git a/compiler/optimizing/ssa_builder.h b/compiler/optimizing/ssa_builder.h index 60831a9e6a..bb892c9304 100644 --- a/compiler/optimizing/ssa_builder.h +++ b/compiler/optimizing/ssa_builder.h @@ -61,7 +61,8 @@ class SsaBuilder : public ValueObject { local_allocator_(local_allocator), ambiguous_agets_(local_allocator->Adapter(kArenaAllocGraphBuilder)), ambiguous_asets_(local_allocator->Adapter(kArenaAllocGraphBuilder)), - uninitialized_strings_(local_allocator->Adapter(kArenaAllocGraphBuilder)) { + uninitialized_strings_(local_allocator->Adapter(kArenaAllocGraphBuilder)), + uninitialized_string_phis_(local_allocator->Adapter(kArenaAllocGraphBuilder)) { graph_->InitializeInexactObjectRTI(handles); } @@ -96,6 +97,10 @@ class SsaBuilder : public ValueObject { } } + void AddUninitializedStringPhi(HInvoke* invoke) { + uninitialized_string_phis_.push_back(invoke); + } + private: void SetLoopHeaderPhiInputs(); void FixEnvironmentPhis(); @@ -118,6 +123,8 @@ class SsaBuilder : public ValueObject { HArrayGet* GetFloatOrDoubleEquivalentOfArrayGet(HArrayGet* aget); void RemoveRedundantUninitializedStrings(); + bool ReplaceUninitializedStringPhis(); + bool HasAliasInEnvironments(HInstruction* instruction); HGraph* const graph_; Handle<mirror::ClassLoader> class_loader_; @@ -131,6 +138,7 @@ class SsaBuilder : public ValueObject { ScopedArenaVector<HArrayGet*> ambiguous_agets_; ScopedArenaVector<HArraySet*> ambiguous_asets_; ScopedArenaVector<HNewInstance*> uninitialized_strings_; + ScopedArenaVector<HInvoke*> uninitialized_string_phis_; DISALLOW_COPY_AND_ASSIGN(SsaBuilder); }; diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc index f6bd05269e..7b2c3a939c 100644 --- a/compiler/optimizing/ssa_liveness_analysis.cc +++ b/compiler/optimizing/ssa_liveness_analysis.cc @@ -103,9 +103,9 @@ void SsaLivenessAnalysis::ComputeLiveness() { ComputeLiveInAndLiveOutSets(); } -static void RecursivelyProcessInputs(HInstruction* current, - HInstruction* actual_user, - BitVector* live_in) { +void SsaLivenessAnalysis::RecursivelyProcessInputs(HInstruction* current, + HInstruction* actual_user, + BitVector* live_in) { HInputsRef inputs = current->GetInputs(); for (size_t i = 0; i < inputs.size(); ++i) { HInstruction* input = inputs[i]; @@ -120,7 +120,7 @@ static void RecursivelyProcessInputs(HInstruction* current, DCHECK(input->HasSsaIndex()); // `input` generates a result used by `current`. Add use and update // the live-in set. - input->GetLiveInterval()->AddUse(current, /* environment */ nullptr, i, actual_user); + input->GetLiveInterval()->AddUse(current, /* environment= */ nullptr, i, actual_user); live_in->SetBit(input->GetSsaIndex()); } else if (has_out_location) { // `input` generates a result but it is not used by `current`. @@ -131,11 +131,40 @@ static void RecursivelyProcessInputs(HInstruction* current, // Check that the inlined input is not a phi. Recursing on loop phis could // lead to an infinite loop. DCHECK(!input->IsPhi()); + DCHECK(!input->HasEnvironment()); RecursivelyProcessInputs(input, actual_user, live_in); } } } +void SsaLivenessAnalysis::ProcessEnvironment(HInstruction* current, + HInstruction* actual_user, + BitVector* live_in) { + for (HEnvironment* environment = current->GetEnvironment(); + environment != nullptr; + environment = environment->GetParent()) { + // Handle environment uses. See statements (b) and (c) of the + // SsaLivenessAnalysis. + for (size_t i = 0, e = environment->Size(); i < e; ++i) { + HInstruction* instruction = environment->GetInstructionAt(i); + if (instruction == nullptr) { + continue; + } + bool should_be_live = ShouldBeLiveForEnvironment(current, instruction); + // If this environment use does not keep the instruction live, it does not + // affect the live range of that instruction. + if (should_be_live) { + CHECK(instruction->HasSsaIndex()) << instruction->DebugName(); + live_in->SetBit(instruction->GetSsaIndex()); + instruction->GetLiveInterval()->AddUse(current, + environment, + i, + actual_user); + } + } + } +} + void SsaLivenessAnalysis::ComputeLiveRanges() { // Do a post order visit, adding inputs of instructions live in the block where // that instruction is defined, and killing instructions that are being visited. @@ -186,27 +215,6 @@ void SsaLivenessAnalysis::ComputeLiveRanges() { current->GetLiveInterval()->SetFrom(current->GetLifetimePosition()); } - // Process the environment first, because we know their uses come after - // or at the same liveness position of inputs. - for (HEnvironment* environment = current->GetEnvironment(); - environment != nullptr; - environment = environment->GetParent()) { - // Handle environment uses. See statements (b) and (c) of the - // SsaLivenessAnalysis. - for (size_t i = 0, e = environment->Size(); i < e; ++i) { - HInstruction* instruction = environment->GetInstructionAt(i); - bool should_be_live = ShouldBeLiveForEnvironment(current, instruction); - if (should_be_live) { - CHECK(instruction->HasSsaIndex()) << instruction->DebugName(); - live_in->SetBit(instruction->GetSsaIndex()); - } - if (instruction != nullptr) { - instruction->GetLiveInterval()->AddUse( - current, environment, i, /* actual_user */ nullptr, should_be_live); - } - } - } - // Process inputs of instructions. if (current->IsEmittedAtUseSite()) { if (kIsDebugBuild) { @@ -219,6 +227,16 @@ void SsaLivenessAnalysis::ComputeLiveRanges() { DCHECK(!current->HasEnvironmentUses()); } } else { + // Process the environment first, because we know their uses come after + // or at the same liveness position of inputs. + ProcessEnvironment(current, current, live_in); + + // Special case implicit null checks. We want their environment uses to be + // emitted at the instruction doing the actual null check. + HNullCheck* check = current->GetImplicitNullCheck(); + if (check != nullptr) { + ProcessEnvironment(check, current, live_in); + } RecursivelyProcessInputs(current, current, live_in); } } diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h index f83bb52b69..c88390775c 100644 --- a/compiler/optimizing/ssa_liveness_analysis.h +++ b/compiler/optimizing/ssa_liveness_analysis.h @@ -60,7 +60,7 @@ class BlockInfo : public ArenaObject<kArenaAllocSsaLiveness> { * A live range contains the start and end of a range where an instruction or a temporary * is live. */ -class LiveRange FINAL : public ArenaObject<kArenaAllocSsaLiveness> { +class LiveRange final : public ArenaObject<kArenaAllocSsaLiveness> { public: LiveRange(size_t start, size_t end, LiveRange* next) : start_(start), end_(end), next_(next) { DCHECK_LT(start, end); @@ -230,12 +230,25 @@ class SafepointPosition : public ArenaObject<kArenaAllocSsaLiveness> { : instruction_(instruction), next_(nullptr) {} + static size_t ComputePosition(HInstruction* instruction) { + // We special case instructions emitted at use site, as their + // safepoint position needs to be at their use. + if (instruction->IsEmittedAtUseSite()) { + // Currently only applies to implicit null checks, which are emitted + // at the next instruction. + DCHECK(instruction->IsNullCheck()) << instruction->DebugName(); + return instruction->GetLifetimePosition() + 2; + } else { + return instruction->GetLifetimePosition(); + } + } + void SetNext(SafepointPosition* next) { next_ = next; } size_t GetPosition() const { - return instruction_->GetLifetimePosition(); + return ComputePosition(instruction_); } SafepointPosition* GetNext() const { @@ -300,8 +313,7 @@ class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> { void AddUse(HInstruction* instruction, HEnvironment* environment, size_t input_index, - HInstruction* actual_user = nullptr, - bool keep_alive = false) { + HInstruction* actual_user = nullptr) { bool is_environment = (environment != nullptr); LocationSummary* locations = instruction->GetLocations(); if (actual_user == nullptr) { @@ -359,12 +371,6 @@ class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> { uses_.push_front(*new_use); } - if (is_environment && !keep_alive) { - // If this environment use does not keep the instruction live, it does not - // affect the live range of that instruction. - return; - } - size_t start_block_position = instruction->GetBlock()->GetLifetimeStart(); if (first_range_ == nullptr) { // First time we see a use of that interval. @@ -929,7 +935,7 @@ class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> { if (first_safepoint_ == nullptr) { first_safepoint_ = last_safepoint_ = safepoint; } else { - DCHECK_LT(last_safepoint_->GetPosition(), safepoint->GetPosition()); + DCHECK_LE(last_safepoint_->GetPosition(), safepoint->GetPosition()); last_safepoint_->SetNext(safepoint); last_safepoint_ = safepoint; } @@ -1149,16 +1155,20 @@ class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> { * * (a) Non-environment uses of an instruction always make * the instruction live. - * (b) Environment uses of an instruction whose type is - * object (that is, non-primitive), make the instruction live. - * This is due to having to keep alive objects that have - * finalizers deleting native objects. + * (b) Environment uses of an instruction whose type is object (that is, non-primitive), make the + * instruction live, unless the class has an @DeadReferenceSafe annotation. + * This avoids unexpected premature reference enqueuing or finalization, which could + * result in premature deletion of native objects. In the presence of @DeadReferenceSafe, + * object references are treated like primitive types. * (c) When the graph has the debuggable property, environment uses * of an instruction that has a primitive type make the instruction live. * If the graph does not have the debuggable property, the environment * use has no effect, and may get a 'none' value after register allocation. + * (d) When compiling in OSR mode, all loops in the compiled method may be entered + * from the interpreter via SuspendCheck; such use in SuspendCheck makes the instruction + * live. * - * (b) and (c) are implemented through SsaLivenessAnalysis::ShouldBeLiveForEnvironment. + * (b), (c) and (d) are implemented through SsaLivenessAnalysis::ShouldBeLiveForEnvironment. */ class SsaLivenessAnalysis : public ValueObject { public: @@ -1256,17 +1266,29 @@ class SsaLivenessAnalysis : public ValueObject { // Update the live_out set of the block and returns whether it has changed. bool UpdateLiveOut(const HBasicBlock& block); + static void ProcessEnvironment(HInstruction* instruction, + HInstruction* actual_user, + BitVector* live_in); + static void RecursivelyProcessInputs(HInstruction* instruction, + HInstruction* actual_user, + BitVector* live_in); + // Returns whether `instruction` in an HEnvironment held by `env_holder` // should be kept live by the HEnvironment. static bool ShouldBeLiveForEnvironment(HInstruction* env_holder, HInstruction* instruction) { - if (instruction == nullptr) return false; + DCHECK(instruction != nullptr); // A value that's not live in compiled code may still be needed in interpreter, // due to code motion, etc. if (env_holder->IsDeoptimize()) return true; // A value live at a throwing instruction in a try block may be copied by // the exception handler to its location at the top of the catch block. if (env_holder->CanThrowIntoCatchBlock()) return true; - if (instruction->GetBlock()->GetGraph()->IsDebuggable()) return true; + HGraph* graph = instruction->GetBlock()->GetGraph(); + if (graph->IsDebuggable()) return true; + // When compiling in OSR mode, all loops in the compiled method may be entered + // from the interpreter via SuspendCheck; thus we need to preserve the environment. + if (env_holder->IsSuspendCheck() && graph->IsCompilingOsr()) return true; + if (graph -> IsDeadReferenceSafe()) return false; return instruction->GetType() == DataType::Type::kReference; } diff --git a/compiler/optimizing/ssa_liveness_analysis_test.cc b/compiler/optimizing/ssa_liveness_analysis_test.cc index b9bfbaa173..352c44f63a 100644 --- a/compiler/optimizing/ssa_liveness_analysis_test.cc +++ b/compiler/optimizing/ssa_liveness_analysis_test.cc @@ -28,18 +28,11 @@ namespace art { class SsaLivenessAnalysisTest : public OptimizingUnitTest { - public: - SsaLivenessAnalysisTest() - : graph_(CreateGraph()), - compiler_options_(), - instruction_set_(kRuntimeISA) { - std::string error_msg; - instruction_set_features_ = - InstructionSetFeatures::FromVariant(instruction_set_, "default", &error_msg); - codegen_ = CodeGenerator::Create(graph_, - instruction_set_, - *instruction_set_features_, - compiler_options_); + protected: + void SetUp() override { + OptimizingUnitTest::SetUp(); + graph_ = CreateGraph(); + codegen_ = CodeGenerator::Create(graph_, *compiler_options_); CHECK(codegen_ != nullptr) << instruction_set_ << " is not a supported target architecture."; // Create entry block. entry_ = new (GetAllocator()) HBasicBlock(graph_); @@ -57,9 +50,6 @@ class SsaLivenessAnalysisTest : public OptimizingUnitTest { } HGraph* graph_; - CompilerOptions compiler_options_; - InstructionSet instruction_set_; - std::unique_ptr<const InstructionSetFeatures> instruction_set_features_; std::unique_ptr<CodeGenerator> codegen_; HBasicBlock* entry_; }; @@ -104,25 +94,25 @@ TEST_F(SsaLivenessAnalysisTest, TestAput) { HInstruction* null_check = new (GetAllocator()) HNullCheck(array, 0); block->AddInstruction(null_check); HEnvironment* null_check_env = new (GetAllocator()) HEnvironment(GetAllocator(), - /* number_of_vregs */ 5, - /* method */ nullptr, - /* dex_pc */ 0u, + /* number_of_vregs= */ 5, + /* method= */ nullptr, + /* dex_pc= */ 0u, null_check); null_check_env->CopyFrom(ArrayRef<HInstruction* const>(args)); null_check->SetRawEnvironment(null_check_env); HInstruction* length = new (GetAllocator()) HArrayLength(array, 0); block->AddInstruction(length); - HInstruction* bounds_check = new (GetAllocator()) HBoundsCheck(index, length, /* dex_pc */ 0u); + HInstruction* bounds_check = new (GetAllocator()) HBoundsCheck(index, length, /* dex_pc= */ 0u); block->AddInstruction(bounds_check); HEnvironment* bounds_check_env = new (GetAllocator()) HEnvironment(GetAllocator(), - /* number_of_vregs */ 5, - /* method */ nullptr, - /* dex_pc */ 0u, + /* number_of_vregs= */ 5, + /* method= */ nullptr, + /* dex_pc= */ 0u, bounds_check); bounds_check_env->CopyFrom(ArrayRef<HInstruction* const>(args)); bounds_check->SetRawEnvironment(bounds_check_env); HInstruction* array_set = - new (GetAllocator()) HArraySet(array, index, value, DataType::Type::kInt32, /* dex_pc */ 0); + new (GetAllocator()) HArraySet(array, index, value, DataType::Type::kInt32, /* dex_pc= */ 0); block->AddInstruction(array_set); graph_->BuildDominatorTree(); @@ -134,12 +124,12 @@ TEST_F(SsaLivenessAnalysisTest, TestAput) { static const char* const expected[] = { "ranges: { [2,21) }, uses: { 15 17 21 }, { 15 19 } is_fixed: 0, is_split: 0 is_low: 0 " "is_high: 0", - "ranges: { [4,21) }, uses: { 19 21 }, { 15 19 } is_fixed: 0, is_split: 0 is_low: 0 " + "ranges: { [4,21) }, uses: { 19 21 }, { } is_fixed: 0, is_split: 0 is_low: 0 " "is_high: 0", - "ranges: { [6,21) }, uses: { 21 }, { 15 19 } is_fixed: 0, is_split: 0 is_low: 0 " + "ranges: { [6,21) }, uses: { 21 }, { } is_fixed: 0, is_split: 0 is_low: 0 " "is_high: 0", // Environment uses do not keep the non-reference argument alive. - "ranges: { [8,10) }, uses: { }, { 15 19 } is_fixed: 0, is_split: 0 is_low: 0 is_high: 0", + "ranges: { [8,10) }, uses: { }, { } is_fixed: 0, is_split: 0 is_low: 0 is_high: 0", // Environment uses keep the reference argument alive. "ranges: { [10,19) }, uses: { }, { 15 19 } is_fixed: 0, is_split: 0 is_low: 0 is_high: 0", }; @@ -173,9 +163,9 @@ TEST_F(SsaLivenessAnalysisTest, TestDeoptimize) { HInstruction* null_check = new (GetAllocator()) HNullCheck(array, 0); block->AddInstruction(null_check); HEnvironment* null_check_env = new (GetAllocator()) HEnvironment(GetAllocator(), - /* number_of_vregs */ 5, - /* method */ nullptr, - /* dex_pc */ 0u, + /* number_of_vregs= */ 5, + /* method= */ nullptr, + /* dex_pc= */ 0u, null_check); null_check_env->CopyFrom(ArrayRef<HInstruction* const>(args)); null_check->SetRawEnvironment(null_check_env); @@ -185,17 +175,17 @@ TEST_F(SsaLivenessAnalysisTest, TestDeoptimize) { HInstruction* ae = new (GetAllocator()) HAboveOrEqual(index, length); block->AddInstruction(ae); HInstruction* deoptimize = new(GetAllocator()) HDeoptimize( - GetAllocator(), ae, DeoptimizationKind::kBlockBCE, /* dex_pc */ 0u); + GetAllocator(), ae, DeoptimizationKind::kBlockBCE, /* dex_pc= */ 0u); block->AddInstruction(deoptimize); HEnvironment* deoptimize_env = new (GetAllocator()) HEnvironment(GetAllocator(), - /* number_of_vregs */ 5, - /* method */ nullptr, - /* dex_pc */ 0u, + /* number_of_vregs= */ 5, + /* method= */ nullptr, + /* dex_pc= */ 0u, deoptimize); deoptimize_env->CopyFrom(ArrayRef<HInstruction* const>(args)); deoptimize->SetRawEnvironment(deoptimize_env); HInstruction* array_set = - new (GetAllocator()) HArraySet(array, index, value, DataType::Type::kInt32, /* dex_pc */ 0); + new (GetAllocator()) HArraySet(array, index, value, DataType::Type::kInt32, /* dex_pc= */ 0); block->AddInstruction(array_set); graph_->BuildDominatorTree(); @@ -207,11 +197,11 @@ TEST_F(SsaLivenessAnalysisTest, TestDeoptimize) { static const char* const expected[] = { "ranges: { [2,23) }, uses: { 15 17 23 }, { 15 21 } is_fixed: 0, is_split: 0 is_low: 0 " "is_high: 0", - "ranges: { [4,23) }, uses: { 19 23 }, { 15 21 } is_fixed: 0, is_split: 0 is_low: 0 " + "ranges: { [4,23) }, uses: { 19 23 }, { 21 } is_fixed: 0, is_split: 0 is_low: 0 " "is_high: 0", - "ranges: { [6,23) }, uses: { 23 }, { 15 21 } is_fixed: 0, is_split: 0 is_low: 0 is_high: 0", + "ranges: { [6,23) }, uses: { 23 }, { 21 } is_fixed: 0, is_split: 0 is_low: 0 is_high: 0", // Environment use in HDeoptimize keeps even the non-reference argument alive. - "ranges: { [8,21) }, uses: { }, { 15 21 } is_fixed: 0, is_split: 0 is_low: 0 is_high: 0", + "ranges: { [8,21) }, uses: { }, { 21 } is_fixed: 0, is_split: 0 is_low: 0 is_high: 0", // Environment uses keep the reference argument alive. "ranges: { [10,21) }, uses: { }, { 15 21 } is_fixed: 0, is_split: 0 is_low: 0 is_high: 0", }; diff --git a/compiler/optimizing/ssa_phi_elimination.cc b/compiler/optimizing/ssa_phi_elimination.cc index cb27ded17a..3fcb72e4fb 100644 --- a/compiler/optimizing/ssa_phi_elimination.cc +++ b/compiler/optimizing/ssa_phi_elimination.cc @@ -23,9 +23,10 @@ namespace art { -void SsaDeadPhiElimination::Run() { +bool SsaDeadPhiElimination::Run() { MarkDeadPhis(); EliminateDeadPhis(); + return true; } void SsaDeadPhiElimination::MarkDeadPhis() { @@ -122,7 +123,7 @@ void SsaDeadPhiElimination::EliminateDeadPhis() { } } -void SsaRedundantPhiElimination::Run() { +bool SsaRedundantPhiElimination::Run() { // Use local allocator for allocating memory used by this optimization. ScopedArenaAllocator allocator(graph_->GetArenaStack()); @@ -140,7 +141,7 @@ void SsaRedundantPhiElimination::Run() { ArenaBitVector visited_phis_in_cycle(&allocator, graph_->GetCurrentInstructionId(), - /* expandable */ false, + /* expandable= */ false, kArenaAllocSsaPhiElimination); visited_phis_in_cycle.ClearAllBits(); ScopedArenaVector<HPhi*> cycle_worklist(allocator.Adapter(kArenaAllocSsaPhiElimination)); @@ -255,6 +256,7 @@ void SsaRedundantPhiElimination::Run() { current->GetBlock()->RemovePhi(current); } } + return true; } } // namespace art diff --git a/compiler/optimizing/ssa_phi_elimination.h b/compiler/optimizing/ssa_phi_elimination.h index 11d5837eb5..c5cc752ffc 100644 --- a/compiler/optimizing/ssa_phi_elimination.h +++ b/compiler/optimizing/ssa_phi_elimination.h @@ -31,7 +31,7 @@ class SsaDeadPhiElimination : public HOptimization { explicit SsaDeadPhiElimination(HGraph* graph) : HOptimization(graph, kSsaDeadPhiEliminationPassName) {} - void Run() OVERRIDE; + bool Run() override; void MarkDeadPhis(); void EliminateDeadPhis(); @@ -53,7 +53,7 @@ class SsaRedundantPhiElimination : public HOptimization { explicit SsaRedundantPhiElimination(HGraph* graph) : HOptimization(graph, kSsaRedundantPhiEliminationPassName) {} - void Run() OVERRIDE; + bool Run() override; static constexpr const char* kSsaRedundantPhiEliminationPassName = "redundant_phi_elimination"; diff --git a/compiler/optimizing/ssa_test.cc b/compiler/optimizing/ssa_test.cc index 85ed06eb9b..e679893af2 100644 --- a/compiler/optimizing/ssa_test.cc +++ b/compiler/optimizing/ssa_test.cc @@ -38,15 +38,15 @@ class SsaPrettyPrinter : public HPrettyPrinter { public: explicit SsaPrettyPrinter(HGraph* graph) : HPrettyPrinter(graph), str_("") {} - void PrintInt(int value) OVERRIDE { + void PrintInt(int value) override { str_ += android::base::StringPrintf("%d", value); } - void PrintString(const char* value) OVERRIDE { + void PrintString(const char* value) override { str_ += value; } - void PrintNewLine() OVERRIDE { + void PrintNewLine() override { str_ += '\n'; } @@ -54,7 +54,7 @@ class SsaPrettyPrinter : public HPrettyPrinter { std::string str() const { return str_; } - void VisitIntConstant(HIntConstant* constant) OVERRIDE { + void VisitIntConstant(HIntConstant* constant) override { PrintPreInstruction(constant); str_ += constant->DebugName(); str_ += " "; diff --git a/compiler/optimizing/stack_map_stream.cc b/compiler/optimizing/stack_map_stream.cc index 7010e3f380..60ca61c133 100644 --- a/compiler/optimizing/stack_map_stream.cc +++ b/compiler/optimizing/stack_map_stream.cc @@ -16,682 +16,312 @@ #include "stack_map_stream.h" +#include <memory> + #include "art_method-inl.h" #include "base/stl_util.h" #include "dex/dex_file_types.h" #include "optimizing/optimizing_compiler.h" #include "runtime.h" #include "scoped_thread_state_change-inl.h" +#include "stack_map.h" namespace art { -void StackMapStream::BeginStackMapEntry(uint32_t dex_pc, - uint32_t native_pc_offset, - uint32_t register_mask, - BitVector* sp_mask, - uint32_t num_dex_registers, - uint8_t inlining_depth) { - DCHECK_EQ(0u, current_entry_.dex_pc) << "EndStackMapEntry not called after BeginStackMapEntry"; - current_entry_.dex_pc = dex_pc; - current_entry_.native_pc_code_offset = CodeOffset::FromOffset(native_pc_offset, instruction_set_); - current_entry_.register_mask = register_mask; - current_entry_.sp_mask = sp_mask; - current_entry_.inlining_depth = inlining_depth; - current_entry_.inline_infos_start_index = inline_infos_.size(); - current_entry_.stack_mask_index = 0; - current_entry_.dex_method_index = dex::kDexNoIndex; - current_entry_.dex_register_entry.num_dex_registers = num_dex_registers; - current_entry_.dex_register_entry.locations_start_index = dex_register_locations_.size(); - current_entry_.dex_register_entry.live_dex_registers_mask = nullptr; - if (num_dex_registers != 0u) { - current_entry_.dex_register_entry.live_dex_registers_mask = - ArenaBitVector::Create(allocator_, num_dex_registers, true, kArenaAllocStackMapStream); - current_entry_.dex_register_entry.live_dex_registers_mask->ClearAllBits(); - } - if (sp_mask != nullptr) { - stack_mask_max_ = std::max(stack_mask_max_, sp_mask->GetHighestBitSet()); - } - if (inlining_depth > 0) { - number_of_stack_maps_with_inline_info_++; - } +constexpr static bool kVerifyStackMaps = kIsDebugBuild; - // Note: dex_pc can be kNoDexPc for native method intrinsics. - if (dex_pc != dex::kDexNoIndex && (dex_pc_max_ == dex::kDexNoIndex || dex_pc_max_ < dex_pc)) { - dex_pc_max_ = dex_pc; - } - register_mask_max_ = std::max(register_mask_max_, register_mask); - current_dex_register_ = 0; +uint32_t StackMapStream::GetStackMapNativePcOffset(size_t i) { + return StackMap::UnpackNativePc(stack_maps_[i][StackMap::kPackedNativePc], instruction_set_); } -void StackMapStream::EndStackMapEntry() { - current_entry_.dex_register_map_index = AddDexRegisterMapEntry(current_entry_.dex_register_entry); - stack_maps_.push_back(current_entry_); - current_entry_ = StackMapEntry(); +void StackMapStream::SetStackMapNativePcOffset(size_t i, uint32_t native_pc_offset) { + stack_maps_[i][StackMap::kPackedNativePc] = + StackMap::PackNativePc(native_pc_offset, instruction_set_); } -void StackMapStream::AddDexRegisterEntry(DexRegisterLocation::Kind kind, int32_t value) { - if (kind != DexRegisterLocation::Kind::kNone) { - // Ensure we only use non-compressed location kind at this stage. - DCHECK(DexRegisterLocation::IsShortLocationKind(kind)) << kind; - DexRegisterLocation location(kind, value); - - // Look for Dex register `location` in the location catalog (using the - // companion hash map of locations to indices). Use its index if it - // is already in the location catalog. If not, insert it (in the - // location catalog and the hash map) and use the newly created index. - auto it = location_catalog_entries_indices_.Find(location); - if (it != location_catalog_entries_indices_.end()) { - // Retrieve the index from the hash map. - dex_register_locations_.push_back(it->second); - } else { - // Create a new entry in the location catalog and the hash map. - size_t index = location_catalog_entries_.size(); - location_catalog_entries_.push_back(location); - dex_register_locations_.push_back(index); - location_catalog_entries_indices_.Insert(std::make_pair(location, index)); - } - DexRegisterMapEntry* const entry = in_inline_frame_ - ? ¤t_inline_info_.dex_register_entry - : ¤t_entry_.dex_register_entry; - DCHECK_LT(current_dex_register_, entry->num_dex_registers); - entry->live_dex_registers_mask->SetBit(current_dex_register_); - entry->hash += (1 << - (current_dex_register_ % (sizeof(DexRegisterMapEntry::hash) * kBitsPerByte))); - entry->hash += static_cast<uint32_t>(value); - entry->hash += static_cast<uint32_t>(kind); - } - current_dex_register_++; +void StackMapStream::BeginMethod(size_t frame_size_in_bytes, + size_t core_spill_mask, + size_t fp_spill_mask, + uint32_t num_dex_registers) { + DCHECK(!in_method_) << "Mismatched Begin/End calls"; + in_method_ = true; + DCHECK_EQ(packed_frame_size_, 0u) << "BeginMethod was already called"; + + DCHECK_ALIGNED(frame_size_in_bytes, kStackAlignment); + packed_frame_size_ = frame_size_in_bytes / kStackAlignment; + core_spill_mask_ = core_spill_mask; + fp_spill_mask_ = fp_spill_mask; + num_dex_registers_ = num_dex_registers; } -void StackMapStream::AddInvoke(InvokeType invoke_type, uint32_t dex_method_index) { - current_entry_.invoke_type = invoke_type; - current_entry_.dex_method_index = dex_method_index; -} +void StackMapStream::EndMethod() { + DCHECK(in_method_) << "Mismatched Begin/End calls"; + in_method_ = false; -void StackMapStream::BeginInlineInfoEntry(ArtMethod* method, - uint32_t dex_pc, - uint32_t num_dex_registers, - const DexFile* outer_dex_file) { - DCHECK(!in_inline_frame_); - in_inline_frame_ = true; - if (EncodeArtMethodInInlineInfo(method)) { - current_inline_info_.method = method; - } else { - if (dex_pc != static_cast<uint32_t>(-1) && kIsDebugBuild) { - ScopedObjectAccess soa(Thread::Current()); - DCHECK(IsSameDexFile(*outer_dex_file, *method->GetDexFile())); + // Read the stack masks now. The compiler might have updated them. + for (size_t i = 0; i < lazy_stack_masks_.size(); i++) { + BitVector* stack_mask = lazy_stack_masks_[i]; + if (stack_mask != nullptr && stack_mask->GetNumberOfBits() != 0) { + stack_maps_[i][StackMap::kStackMaskIndex] = + stack_masks_.Dedup(stack_mask->GetRawStorage(), stack_mask->GetNumberOfBits()); } - current_inline_info_.method_index = method->GetDexMethodIndexUnchecked(); - } - current_inline_info_.dex_pc = dex_pc; - current_inline_info_.dex_register_entry.num_dex_registers = num_dex_registers; - current_inline_info_.dex_register_entry.locations_start_index = dex_register_locations_.size(); - current_inline_info_.dex_register_entry.live_dex_registers_mask = nullptr; - if (num_dex_registers != 0) { - current_inline_info_.dex_register_entry.live_dex_registers_mask = - ArenaBitVector::Create(allocator_, num_dex_registers, true, kArenaAllocStackMapStream); - current_inline_info_.dex_register_entry.live_dex_registers_mask->ClearAllBits(); - } - current_dex_register_ = 0; -} - -void StackMapStream::EndInlineInfoEntry() { - current_inline_info_.dex_register_map_index = - AddDexRegisterMapEntry(current_inline_info_.dex_register_entry); - DCHECK(in_inline_frame_); - DCHECK_EQ(current_dex_register_, current_inline_info_.dex_register_entry.num_dex_registers) - << "Inline information contains less registers than expected"; - in_inline_frame_ = false; - inline_infos_.push_back(current_inline_info_); - current_inline_info_ = InlineInfoEntry(); -} - -CodeOffset StackMapStream::ComputeMaxNativePcCodeOffset() const { - CodeOffset max_native_pc_offset; - for (const StackMapEntry& entry : stack_maps_) { - max_native_pc_offset = std::max(max_native_pc_offset, entry.native_pc_code_offset); - } - return max_native_pc_offset; -} - -size_t StackMapStream::PrepareForFillIn() { - CodeInfoEncoding encoding; - encoding.dex_register_map.num_entries = 0; // TODO: Remove this field. - encoding.dex_register_map.num_bytes = ComputeDexRegisterMapsSize(); - encoding.location_catalog.num_entries = location_catalog_entries_.size(); - encoding.location_catalog.num_bytes = ComputeDexRegisterLocationCatalogSize(); - encoding.inline_info.num_entries = inline_infos_.size(); - // Must be done before calling ComputeInlineInfoEncoding since ComputeInlineInfoEncoding requires - // dex_method_index_idx to be filled in. - PrepareMethodIndices(); - ComputeInlineInfoEncoding(&encoding.inline_info.encoding, - encoding.dex_register_map.num_bytes); - CodeOffset max_native_pc_offset = ComputeMaxNativePcCodeOffset(); - // Prepare the CodeInfo variable-sized encoding. - encoding.stack_mask.encoding.num_bits = stack_mask_max_ + 1; // Need room for max element too. - encoding.stack_mask.num_entries = PrepareStackMasks(encoding.stack_mask.encoding.num_bits); - encoding.register_mask.encoding.num_bits = MinimumBitsToStore(register_mask_max_); - encoding.register_mask.num_entries = PrepareRegisterMasks(); - encoding.stack_map.num_entries = stack_maps_.size(); - encoding.stack_map.encoding.SetFromSizes( - // The stack map contains compressed native PC offsets. - max_native_pc_offset.CompressedValue(), - dex_pc_max_, - encoding.dex_register_map.num_bytes, - encoding.inline_info.num_entries, - encoding.register_mask.num_entries, - encoding.stack_mask.num_entries); - ComputeInvokeInfoEncoding(&encoding); - DCHECK_EQ(code_info_encoding_.size(), 0u); - encoding.Compress(&code_info_encoding_); - encoding.ComputeTableOffsets(); - // Compute table offsets so we can get the non header size. - DCHECK_EQ(encoding.HeaderSize(), code_info_encoding_.size()); - needed_size_ = code_info_encoding_.size() + encoding.NonHeaderSize(); - return needed_size_; -} - -size_t StackMapStream::ComputeDexRegisterLocationCatalogSize() const { - size_t size = DexRegisterLocationCatalog::kFixedSize; - for (const DexRegisterLocation& dex_register_location : location_catalog_entries_) { - size += DexRegisterLocationCatalog::EntrySize(dex_register_location); - } - return size; -} - -size_t StackMapStream::DexRegisterMapEntry::ComputeSize(size_t catalog_size) const { - // For num_dex_registers == 0u live_dex_registers_mask may be null. - if (num_dex_registers == 0u) { - return 0u; // No register map will be emitted. } - DCHECK(live_dex_registers_mask != nullptr); - - // Size of the map in bytes. - size_t size = DexRegisterMap::kFixedSize; - // Add the live bit mask for the Dex register liveness. - size += DexRegisterMap::GetLiveBitMaskSize(num_dex_registers); - // Compute the size of the set of live Dex register entries. - size_t number_of_live_dex_registers = live_dex_registers_mask->NumSetBits(); - size_t map_entries_size_in_bits = - DexRegisterMap::SingleEntrySizeInBits(catalog_size) * number_of_live_dex_registers; - size_t map_entries_size_in_bytes = - RoundUp(map_entries_size_in_bits, kBitsPerByte) / kBitsPerByte; - size += map_entries_size_in_bytes; - return size; } -size_t StackMapStream::ComputeDexRegisterMapsSize() const { - size_t size = 0; - for (const DexRegisterMapEntry& entry : dex_register_entries_) { - size += entry.ComputeSize(location_catalog_entries_.size()); - } - return size; -} - -void StackMapStream::ComputeInvokeInfoEncoding(CodeInfoEncoding* encoding) { - DCHECK(encoding != nullptr); - uint32_t native_pc_max = 0; - uint16_t method_index_max = 0; - size_t invoke_infos_count = 0; - size_t invoke_type_max = 0; - for (const StackMapEntry& entry : stack_maps_) { - if (entry.dex_method_index != dex::kDexNoIndex) { - native_pc_max = std::max(native_pc_max, entry.native_pc_code_offset.CompressedValue()); - method_index_max = std::max(method_index_max, static_cast<uint16_t>(entry.dex_method_index)); - invoke_type_max = std::max(invoke_type_max, static_cast<size_t>(entry.invoke_type)); - ++invoke_infos_count; +void StackMapStream::BeginStackMapEntry(uint32_t dex_pc, + uint32_t native_pc_offset, + uint32_t register_mask, + BitVector* stack_mask, + StackMap::Kind kind) { + DCHECK(in_method_) << "Call BeginMethod first"; + DCHECK(!in_stack_map_) << "Mismatched Begin/End calls"; + in_stack_map_ = true; + + current_stack_map_ = BitTableBuilder<StackMap>::Entry(); + current_stack_map_[StackMap::kKind] = static_cast<uint32_t>(kind); + current_stack_map_[StackMap::kPackedNativePc] = + StackMap::PackNativePc(native_pc_offset, instruction_set_); + current_stack_map_[StackMap::kDexPc] = dex_pc; + if (stack_maps_.size() > 0) { + // Check that non-catch stack maps are sorted by pc. + // Catch stack maps are at the end and may be unordered. + if (stack_maps_.back()[StackMap::kKind] == StackMap::Kind::Catch) { + DCHECK(current_stack_map_[StackMap::kKind] == StackMap::Kind::Catch); + } else if (current_stack_map_[StackMap::kKind] != StackMap::Kind::Catch) { + DCHECK_LE(stack_maps_.back()[StackMap::kPackedNativePc], + current_stack_map_[StackMap::kPackedNativePc]); } } - encoding->invoke_info.num_entries = invoke_infos_count; - encoding->invoke_info.encoding.SetFromSizes(native_pc_max, invoke_type_max, method_index_max); -} - -void StackMapStream::ComputeInlineInfoEncoding(InlineInfoEncoding* encoding, - size_t dex_register_maps_bytes) { - uint32_t method_index_max = 0; - uint32_t dex_pc_max = dex::kDexNoIndex; - uint32_t extra_data_max = 0; - - uint32_t inline_info_index = 0; - for (const StackMapEntry& entry : stack_maps_) { - for (size_t j = 0; j < entry.inlining_depth; ++j) { - InlineInfoEntry inline_entry = inline_infos_[inline_info_index++]; - if (inline_entry.method == nullptr) { - method_index_max = std::max(method_index_max, inline_entry.dex_method_index_idx); - extra_data_max = std::max(extra_data_max, 1u); - } else { - method_index_max = std::max( - method_index_max, High32Bits(reinterpret_cast<uintptr_t>(inline_entry.method))); - extra_data_max = std::max( - extra_data_max, Low32Bits(reinterpret_cast<uintptr_t>(inline_entry.method))); + if (register_mask != 0) { + uint32_t shift = LeastSignificantBit(register_mask); + BitTableBuilder<RegisterMask>::Entry entry; + entry[RegisterMask::kValue] = register_mask >> shift; + entry[RegisterMask::kShift] = shift; + current_stack_map_[StackMap::kRegisterMaskIndex] = register_masks_.Dedup(&entry); + } + // The compiler assumes the bit vector will be read during PrepareForFillIn(), + // and it might modify the data before that. Therefore, just store the pointer. + // See ClearSpillSlotsFromLoopPhisInStackMap in code_generator.h. + lazy_stack_masks_.push_back(stack_mask); + current_inline_infos_.clear(); + current_dex_registers_.clear(); + expected_num_dex_registers_ = num_dex_registers_; + + if (kVerifyStackMaps) { + size_t stack_map_index = stack_maps_.size(); + // Create lambda method, which will be executed at the very end to verify data. + // Parameters and local variables will be captured(stored) by the lambda "[=]". + dchecks_.emplace_back([=](const CodeInfo& code_info) { + if (kind == StackMap::Kind::Default || kind == StackMap::Kind::OSR) { + StackMap stack_map = code_info.GetStackMapForNativePcOffset(native_pc_offset, + instruction_set_); + CHECK_EQ(stack_map.Row(), stack_map_index); + } else if (kind == StackMap::Kind::Catch) { + StackMap stack_map = code_info.GetCatchStackMapForDexPc(dex_pc); + CHECK_EQ(stack_map.Row(), stack_map_index); } - if (inline_entry.dex_pc != dex::kDexNoIndex && - (dex_pc_max == dex::kDexNoIndex || dex_pc_max < inline_entry.dex_pc)) { - dex_pc_max = inline_entry.dex_pc; + StackMap stack_map = code_info.GetStackMapAt(stack_map_index); + CHECK_EQ(stack_map.GetNativePcOffset(instruction_set_), native_pc_offset); + CHECK_EQ(stack_map.GetKind(), static_cast<uint32_t>(kind)); + CHECK_EQ(stack_map.GetDexPc(), dex_pc); + CHECK_EQ(code_info.GetRegisterMaskOf(stack_map), register_mask); + BitMemoryRegion seen_stack_mask = code_info.GetStackMaskOf(stack_map); + CHECK_GE(seen_stack_mask.size_in_bits(), stack_mask ? stack_mask->GetNumberOfBits() : 0); + for (size_t b = 0; b < seen_stack_mask.size_in_bits(); b++) { + CHECK_EQ(seen_stack_mask.LoadBit(b), stack_mask != nullptr && stack_mask->IsBitSet(b)); } - } + }); } - DCHECK_EQ(inline_info_index, inline_infos_.size()); - - encoding->SetFromSizes(method_index_max, dex_pc_max, extra_data_max, dex_register_maps_bytes); } -size_t StackMapStream::MaybeCopyDexRegisterMap(DexRegisterMapEntry& entry, - size_t* current_offset, - MemoryRegion dex_register_locations_region) { - DCHECK(current_offset != nullptr); - if ((entry.num_dex_registers == 0) || (entry.live_dex_registers_mask->NumSetBits() == 0)) { - // No dex register map needed. - return StackMap::kNoDexRegisterMap; - } - if (entry.offset == DexRegisterMapEntry::kOffsetUnassigned) { - // Not already copied, need to copy and and assign an offset. - entry.offset = *current_offset; - const size_t entry_size = entry.ComputeSize(location_catalog_entries_.size()); - DexRegisterMap dex_register_map( - dex_register_locations_region.Subregion(entry.offset, entry_size)); - *current_offset += entry_size; - // Fill in the map since it was just added. - FillInDexRegisterMap(dex_register_map, - entry.num_dex_registers, - *entry.live_dex_registers_mask, - entry.locations_start_index); - } - return entry.offset; -} - -void StackMapStream::FillInMethodInfo(MemoryRegion region) { - { - MethodInfo info(region.begin(), method_indices_.size()); - for (size_t i = 0; i < method_indices_.size(); ++i) { - info.SetMethodIndex(i, method_indices_[i]); - } - } - if (kIsDebugBuild) { - // Check the data matches. - MethodInfo info(region.begin()); - const size_t count = info.NumMethodIndices(); - DCHECK_EQ(count, method_indices_.size()); - for (size_t i = 0; i < count; ++i) { - DCHECK_EQ(info.GetMethodIndex(i), method_indices_[i]); - } - } -} - -void StackMapStream::FillInCodeInfo(MemoryRegion region) { - DCHECK_EQ(0u, current_entry_.dex_pc) << "EndStackMapEntry not called after BeginStackMapEntry"; - DCHECK_NE(0u, needed_size_) << "PrepareForFillIn not called before FillIn"; - - DCHECK_EQ(region.size(), needed_size_); - - // Note that the memory region does not have to be zeroed when we JIT code - // because we do not use the arena allocator there. - - // Write the CodeInfo header. - region.CopyFrom(0, MemoryRegion(code_info_encoding_.data(), code_info_encoding_.size())); - - CodeInfo code_info(region); - CodeInfoEncoding encoding = code_info.ExtractEncoding(); - DCHECK_EQ(encoding.stack_map.num_entries, stack_maps_.size()); - - MemoryRegion dex_register_locations_region = region.Subregion( - encoding.dex_register_map.byte_offset, - encoding.dex_register_map.num_bytes); - - // Set the Dex register location catalog. - MemoryRegion dex_register_location_catalog_region = region.Subregion( - encoding.location_catalog.byte_offset, - encoding.location_catalog.num_bytes); - DexRegisterLocationCatalog dex_register_location_catalog(dex_register_location_catalog_region); - // Offset in `dex_register_location_catalog` where to store the next - // register location. - size_t location_catalog_offset = DexRegisterLocationCatalog::kFixedSize; - for (DexRegisterLocation dex_register_location : location_catalog_entries_) { - dex_register_location_catalog.SetRegisterInfo(location_catalog_offset, dex_register_location); - location_catalog_offset += DexRegisterLocationCatalog::EntrySize(dex_register_location); - } - // Ensure we reached the end of the Dex registers location_catalog. - DCHECK_EQ(location_catalog_offset, dex_register_location_catalog_region.size()); - - ArenaBitVector empty_bitmask(allocator_, 0, /* expandable */ false, kArenaAllocStackMapStream); - uintptr_t next_dex_register_map_offset = 0; - uintptr_t next_inline_info_index = 0; - size_t invoke_info_idx = 0; - for (size_t i = 0, e = stack_maps_.size(); i < e; ++i) { - StackMap stack_map = code_info.GetStackMapAt(i, encoding); - StackMapEntry entry = stack_maps_[i]; - - stack_map.SetDexPc(encoding.stack_map.encoding, entry.dex_pc); - stack_map.SetNativePcCodeOffset(encoding.stack_map.encoding, entry.native_pc_code_offset); - stack_map.SetRegisterMaskIndex(encoding.stack_map.encoding, entry.register_mask_index); - stack_map.SetStackMaskIndex(encoding.stack_map.encoding, entry.stack_mask_index); - - size_t offset = MaybeCopyDexRegisterMap(dex_register_entries_[entry.dex_register_map_index], - &next_dex_register_map_offset, - dex_register_locations_region); - stack_map.SetDexRegisterMapOffset(encoding.stack_map.encoding, offset); - - if (entry.dex_method_index != dex::kDexNoIndex) { - InvokeInfo invoke_info(code_info.GetInvokeInfo(encoding, invoke_info_idx)); - invoke_info.SetNativePcCodeOffset(encoding.invoke_info.encoding, entry.native_pc_code_offset); - invoke_info.SetInvokeType(encoding.invoke_info.encoding, entry.invoke_type); - invoke_info.SetMethodIndexIdx(encoding.invoke_info.encoding, entry.dex_method_index_idx); - ++invoke_info_idx; - } +void StackMapStream::EndStackMapEntry() { + DCHECK(in_stack_map_) << "Mismatched Begin/End calls"; + in_stack_map_ = false; - // Set the inlining info. - if (entry.inlining_depth != 0) { - InlineInfo inline_info = code_info.GetInlineInfo(next_inline_info_index, encoding); - - // Fill in the index. - stack_map.SetInlineInfoIndex(encoding.stack_map.encoding, next_inline_info_index); - DCHECK_EQ(next_inline_info_index, entry.inline_infos_start_index); - next_inline_info_index += entry.inlining_depth; - - inline_info.SetDepth(encoding.inline_info.encoding, entry.inlining_depth); - DCHECK_LE(entry.inline_infos_start_index + entry.inlining_depth, inline_infos_.size()); - - for (size_t depth = 0; depth < entry.inlining_depth; ++depth) { - InlineInfoEntry inline_entry = inline_infos_[depth + entry.inline_infos_start_index]; - if (inline_entry.method != nullptr) { - inline_info.SetMethodIndexIdxAtDepth( - encoding.inline_info.encoding, - depth, - High32Bits(reinterpret_cast<uintptr_t>(inline_entry.method))); - inline_info.SetExtraDataAtDepth( - encoding.inline_info.encoding, - depth, - Low32Bits(reinterpret_cast<uintptr_t>(inline_entry.method))); - } else { - inline_info.SetMethodIndexIdxAtDepth(encoding.inline_info.encoding, - depth, - inline_entry.dex_method_index_idx); - inline_info.SetExtraDataAtDepth(encoding.inline_info.encoding, depth, 1); - } - inline_info.SetDexPcAtDepth(encoding.inline_info.encoding, depth, inline_entry.dex_pc); - size_t dex_register_map_offset = MaybeCopyDexRegisterMap( - dex_register_entries_[inline_entry.dex_register_map_index], - &next_dex_register_map_offset, - dex_register_locations_region); - inline_info.SetDexRegisterMapOffsetAtDepth(encoding.inline_info.encoding, - depth, - dex_register_map_offset); - } - } else if (encoding.stack_map.encoding.GetInlineInfoEncoding().BitSize() > 0) { - stack_map.SetInlineInfoIndex(encoding.stack_map.encoding, StackMap::kNoInlineInfo); - } + // Generate index into the InlineInfo table. + size_t inlining_depth = current_inline_infos_.size(); + if (!current_inline_infos_.empty()) { + current_inline_infos_.back()[InlineInfo::kIsLast] = InlineInfo::kLast; + current_stack_map_[StackMap::kInlineInfoIndex] = + inline_infos_.Dedup(current_inline_infos_.data(), current_inline_infos_.size()); } - // Write stack masks table. - const size_t stack_mask_bits = encoding.stack_mask.encoding.BitSize(); - if (stack_mask_bits > 0) { - size_t stack_mask_bytes = RoundUp(stack_mask_bits, kBitsPerByte) / kBitsPerByte; - for (size_t i = 0; i < encoding.stack_mask.num_entries; ++i) { - MemoryRegion source(&stack_masks_[i * stack_mask_bytes], stack_mask_bytes); - BitMemoryRegion stack_mask = code_info.GetStackMask(i, encoding); - for (size_t bit_index = 0; bit_index < stack_mask_bits; ++bit_index) { - stack_mask.StoreBit(bit_index, source.LoadBit(bit_index)); - } - } + // Generate delta-compressed dex register map. + size_t num_dex_registers = current_dex_registers_.size(); + if (!current_dex_registers_.empty()) { + DCHECK_EQ(expected_num_dex_registers_, current_dex_registers_.size()); + CreateDexRegisterMap(); } - // Write register masks table. - for (size_t i = 0; i < encoding.register_mask.num_entries; ++i) { - BitMemoryRegion register_mask = code_info.GetRegisterMask(i, encoding); - register_mask.StoreBits(0, register_masks_[i], encoding.register_mask.encoding.BitSize()); - } + stack_maps_.Add(current_stack_map_); - // Verify all written data in debug build. - if (kIsDebugBuild) { - CheckCodeInfo(region); + if (kVerifyStackMaps) { + size_t stack_map_index = stack_maps_.size() - 1; + dchecks_.emplace_back([=](const CodeInfo& code_info) { + StackMap stack_map = code_info.GetStackMapAt(stack_map_index); + CHECK_EQ(stack_map.HasDexRegisterMap(), (num_dex_registers != 0)); + CHECK_EQ(stack_map.HasInlineInfo(), (inlining_depth != 0)); + CHECK_EQ(code_info.GetInlineInfosOf(stack_map).size(), inlining_depth); + }); } } -void StackMapStream::FillInDexRegisterMap(DexRegisterMap dex_register_map, +void StackMapStream::BeginInlineInfoEntry(ArtMethod* method, + uint32_t dex_pc, uint32_t num_dex_registers, - const BitVector& live_dex_registers_mask, - uint32_t start_index_in_dex_register_locations) const { - dex_register_map.SetLiveBitMask(num_dex_registers, live_dex_registers_mask); - // Set the dex register location mapping data. - size_t number_of_live_dex_registers = live_dex_registers_mask.NumSetBits(); - DCHECK_LE(number_of_live_dex_registers, dex_register_locations_.size()); - DCHECK_LE(start_index_in_dex_register_locations, - dex_register_locations_.size() - number_of_live_dex_registers); - for (size_t index_in_dex_register_locations = 0; - index_in_dex_register_locations != number_of_live_dex_registers; - ++index_in_dex_register_locations) { - size_t location_catalog_entry_index = dex_register_locations_[ - start_index_in_dex_register_locations + index_in_dex_register_locations]; - dex_register_map.SetLocationCatalogEntryIndex( - index_in_dex_register_locations, - location_catalog_entry_index, - num_dex_registers, - location_catalog_entries_.size()); - } -} + const DexFile* outer_dex_file) { + DCHECK(in_stack_map_) << "Call BeginStackMapEntry first"; + DCHECK(!in_inline_info_) << "Mismatched Begin/End calls"; + in_inline_info_ = true; + DCHECK_EQ(expected_num_dex_registers_, current_dex_registers_.size()); + + expected_num_dex_registers_ += num_dex_registers; -size_t StackMapStream::AddDexRegisterMapEntry(const DexRegisterMapEntry& entry) { - const size_t current_entry_index = dex_register_entries_.size(); - auto entries_it = dex_map_hash_to_stack_map_indices_.find(entry.hash); - if (entries_it == dex_map_hash_to_stack_map_indices_.end()) { - // We don't have a perfect hash functions so we need a list to collect all stack maps - // which might have the same dex register map. - ScopedArenaVector<uint32_t> stack_map_indices(allocator_->Adapter(kArenaAllocStackMapStream)); - stack_map_indices.push_back(current_entry_index); - dex_map_hash_to_stack_map_indices_.Put(entry.hash, std::move(stack_map_indices)); + BitTableBuilder<InlineInfo>::Entry entry; + entry[InlineInfo::kIsLast] = InlineInfo::kMore; + entry[InlineInfo::kDexPc] = dex_pc; + entry[InlineInfo::kNumberOfDexRegisters] = static_cast<uint32_t>(expected_num_dex_registers_); + if (EncodeArtMethodInInlineInfo(method)) { + entry[InlineInfo::kArtMethodHi] = High32Bits(reinterpret_cast<uintptr_t>(method)); + entry[InlineInfo::kArtMethodLo] = Low32Bits(reinterpret_cast<uintptr_t>(method)); } else { - // We might have collisions, so we need to check whether or not we really have a match. - for (uint32_t test_entry_index : entries_it->second) { - if (DexRegisterMapEntryEquals(dex_register_entries_[test_entry_index], entry)) { - return test_entry_index; - } + if (dex_pc != static_cast<uint32_t>(-1) && kIsDebugBuild) { + ScopedObjectAccess soa(Thread::Current()); + DCHECK(IsSameDexFile(*outer_dex_file, *method->GetDexFile())); } - entries_it->second.push_back(current_entry_index); + uint32_t dex_method_index = method->GetDexMethodIndex(); + entry[InlineInfo::kMethodInfoIndex] = method_infos_.Dedup({dex_method_index}); + } + current_inline_infos_.push_back(entry); + + if (kVerifyStackMaps) { + size_t stack_map_index = stack_maps_.size(); + size_t depth = current_inline_infos_.size() - 1; + dchecks_.emplace_back([=](const CodeInfo& code_info) { + StackMap stack_map = code_info.GetStackMapAt(stack_map_index); + InlineInfo inline_info = code_info.GetInlineInfosOf(stack_map)[depth]; + CHECK_EQ(inline_info.GetDexPc(), dex_pc); + bool encode_art_method = EncodeArtMethodInInlineInfo(method); + CHECK_EQ(inline_info.EncodesArtMethod(), encode_art_method); + if (encode_art_method) { + CHECK_EQ(inline_info.GetArtMethod(), method); + } else { + CHECK_EQ(code_info.GetMethodIndexOf(inline_info), method->GetDexMethodIndex()); + } + }); } - dex_register_entries_.push_back(entry); - return current_entry_index; } -bool StackMapStream::DexRegisterMapEntryEquals(const DexRegisterMapEntry& a, - const DexRegisterMapEntry& b) const { - if ((a.live_dex_registers_mask == nullptr) != (b.live_dex_registers_mask == nullptr)) { - return false; - } - if (a.num_dex_registers != b.num_dex_registers) { - return false; - } - if (a.num_dex_registers != 0u) { - DCHECK(a.live_dex_registers_mask != nullptr); - DCHECK(b.live_dex_registers_mask != nullptr); - if (!a.live_dex_registers_mask->Equal(b.live_dex_registers_mask)) { - return false; - } - size_t number_of_live_dex_registers = a.live_dex_registers_mask->NumSetBits(); - DCHECK_LE(number_of_live_dex_registers, dex_register_locations_.size()); - DCHECK_LE(a.locations_start_index, - dex_register_locations_.size() - number_of_live_dex_registers); - DCHECK_LE(b.locations_start_index, - dex_register_locations_.size() - number_of_live_dex_registers); - auto a_begin = dex_register_locations_.begin() + a.locations_start_index; - auto b_begin = dex_register_locations_.begin() + b.locations_start_index; - if (!std::equal(a_begin, a_begin + number_of_live_dex_registers, b_begin)) { - return false; - } - } - return true; +void StackMapStream::EndInlineInfoEntry() { + DCHECK(in_inline_info_) << "Mismatched Begin/End calls"; + in_inline_info_ = false; + DCHECK_EQ(expected_num_dex_registers_, current_dex_registers_.size()); } -// Helper for CheckCodeInfo - check that register map has the expected content. -void StackMapStream::CheckDexRegisterMap(const CodeInfo& code_info, - const DexRegisterMap& dex_register_map, - size_t num_dex_registers, - BitVector* live_dex_registers_mask, - size_t dex_register_locations_index) const { - CodeInfoEncoding encoding = code_info.ExtractEncoding(); - for (size_t reg = 0; reg < num_dex_registers; reg++) { - // Find the location we tried to encode. - DexRegisterLocation expected = DexRegisterLocation::None(); - if (live_dex_registers_mask->IsBitSet(reg)) { - size_t catalog_index = dex_register_locations_[dex_register_locations_index++]; - expected = location_catalog_entries_[catalog_index]; +// Create delta-compressed dex register map based on the current list of DexRegisterLocations. +// All dex registers for a stack map are concatenated - inlined registers are just appended. +void StackMapStream::CreateDexRegisterMap() { + // These are fields rather than local variables so that we can reuse the reserved memory. + temp_dex_register_mask_.ClearAllBits(); + temp_dex_register_map_.clear(); + + // Ensure that the arrays that hold previous state are big enough to be safely indexed below. + if (previous_dex_registers_.size() < current_dex_registers_.size()) { + previous_dex_registers_.resize(current_dex_registers_.size(), DexRegisterLocation::None()); + dex_register_timestamp_.resize(current_dex_registers_.size(), 0u); + } + + // Set bit in the mask for each register that has been changed since the previous stack map. + // Modified registers are stored in the catalogue and the catalogue index added to the list. + for (size_t i = 0; i < current_dex_registers_.size(); i++) { + DexRegisterLocation reg = current_dex_registers_[i]; + // Distance is difference between this index and the index of last modification. + uint32_t distance = stack_maps_.size() - dex_register_timestamp_[i]; + if (previous_dex_registers_[i] != reg || distance > kMaxDexRegisterMapSearchDistance) { + BitTableBuilder<DexRegisterInfo>::Entry entry; + entry[DexRegisterInfo::kKind] = static_cast<uint32_t>(reg.GetKind()); + entry[DexRegisterInfo::kPackedValue] = + DexRegisterInfo::PackValue(reg.GetKind(), reg.GetValue()); + uint32_t index = reg.IsLive() ? dex_register_catalog_.Dedup(&entry) : kNoValue; + temp_dex_register_mask_.SetBit(i); + temp_dex_register_map_.push_back({index}); + previous_dex_registers_[i] = reg; + dex_register_timestamp_[i] = stack_maps_.size(); } - // Compare to the seen location. - if (expected.GetKind() == DexRegisterLocation::Kind::kNone) { - DCHECK(!dex_register_map.IsValid() || !dex_register_map.IsDexRegisterLive(reg)) - << dex_register_map.IsValid() << " " << dex_register_map.IsDexRegisterLive(reg); - } else { - DCHECK(dex_register_map.IsDexRegisterLive(reg)); - DexRegisterLocation seen = dex_register_map.GetDexRegisterLocation( - reg, num_dex_registers, code_info, encoding); - DCHECK_EQ(expected.GetKind(), seen.GetKind()); - DCHECK_EQ(expected.GetValue(), seen.GetValue()); - } - } - if (num_dex_registers == 0) { - DCHECK(!dex_register_map.IsValid()); } -} -size_t StackMapStream::PrepareRegisterMasks() { - register_masks_.resize(stack_maps_.size(), 0u); - ScopedArenaUnorderedMap<uint32_t, size_t> dedupe(allocator_->Adapter(kArenaAllocStackMapStream)); - for (StackMapEntry& stack_map : stack_maps_) { - const size_t index = dedupe.size(); - stack_map.register_mask_index = dedupe.emplace(stack_map.register_mask, index).first->second; - register_masks_[index] = stack_map.register_mask; - } - return dedupe.size(); -} - -void StackMapStream::PrepareMethodIndices() { - CHECK(method_indices_.empty()); - method_indices_.resize(stack_maps_.size() + inline_infos_.size()); - ScopedArenaUnorderedMap<uint32_t, size_t> dedupe(allocator_->Adapter(kArenaAllocStackMapStream)); - for (StackMapEntry& stack_map : stack_maps_) { - const size_t index = dedupe.size(); - const uint32_t method_index = stack_map.dex_method_index; - if (method_index != dex::kDexNoIndex) { - stack_map.dex_method_index_idx = dedupe.emplace(method_index, index).first->second; - method_indices_[index] = method_index; - } - } - for (InlineInfoEntry& inline_info : inline_infos_) { - const size_t index = dedupe.size(); - const uint32_t method_index = inline_info.method_index; - CHECK_NE(method_index, dex::kDexNoIndex); - inline_info.dex_method_index_idx = dedupe.emplace(method_index, index).first->second; - method_indices_[index] = method_index; + // Set the mask and map for the current StackMap (which includes inlined registers). + if (temp_dex_register_mask_.GetNumberOfBits() != 0) { + current_stack_map_[StackMap::kDexRegisterMaskIndex] = + dex_register_masks_.Dedup(temp_dex_register_mask_.GetRawStorage(), + temp_dex_register_mask_.GetNumberOfBits()); + } + if (!current_dex_registers_.empty()) { + current_stack_map_[StackMap::kDexRegisterMapIndex] = + dex_register_maps_.Dedup(temp_dex_register_map_.data(), + temp_dex_register_map_.size()); + } + + if (kVerifyStackMaps) { + size_t stack_map_index = stack_maps_.size(); + // We need to make copy of the current registers for later (when the check is run). + auto expected_dex_registers = std::make_shared<dchecked_vector<DexRegisterLocation>>( + current_dex_registers_.begin(), current_dex_registers_.end()); + dchecks_.emplace_back([=](const CodeInfo& code_info) { + StackMap stack_map = code_info.GetStackMapAt(stack_map_index); + uint32_t expected_reg = 0; + for (DexRegisterLocation reg : code_info.GetDexRegisterMapOf(stack_map)) { + CHECK_EQ((*expected_dex_registers)[expected_reg++], reg); + } + for (InlineInfo inline_info : code_info.GetInlineInfosOf(stack_map)) { + DexRegisterMap map = code_info.GetInlineDexRegisterMapOf(stack_map, inline_info); + for (DexRegisterLocation reg : map) { + CHECK_EQ((*expected_dex_registers)[expected_reg++], reg); + } + } + CHECK_EQ(expected_reg, expected_dex_registers->size()); + }); } - method_indices_.resize(dedupe.size()); } - -size_t StackMapStream::PrepareStackMasks(size_t entry_size_in_bits) { - // Preallocate memory since we do not want it to move (the dedup map will point into it). - const size_t byte_entry_size = RoundUp(entry_size_in_bits, kBitsPerByte) / kBitsPerByte; - stack_masks_.resize(byte_entry_size * stack_maps_.size(), 0u); - // For deduplicating we store the stack masks as byte packed for simplicity. We can bit pack later - // when copying out from stack_masks_. - ScopedArenaUnorderedMap<MemoryRegion, - size_t, - FNVHash<MemoryRegion>, - MemoryRegion::ContentEquals> dedup( - stack_maps_.size(), allocator_->Adapter(kArenaAllocStackMapStream)); - for (StackMapEntry& stack_map : stack_maps_) { - size_t index = dedup.size(); - MemoryRegion stack_mask(stack_masks_.data() + index * byte_entry_size, byte_entry_size); - for (size_t i = 0; i < entry_size_in_bits; i++) { - stack_mask.StoreBit(i, stack_map.sp_mask != nullptr && stack_map.sp_mask->IsBitSet(i)); - } - stack_map.stack_mask_index = dedup.emplace(stack_mask, index).first->second; - } - return dedup.size(); +template<typename Writer, typename Builder> +ALWAYS_INLINE static void EncodeTable(Writer& out, const Builder& bit_table) { + out.WriteBit(false); // Is not deduped. + bit_table.Encode(out); } -// Check that all StackMapStream inputs are correctly encoded by trying to read them back. -void StackMapStream::CheckCodeInfo(MemoryRegion region) const { - CodeInfo code_info(region); - CodeInfoEncoding encoding = code_info.ExtractEncoding(); - DCHECK_EQ(code_info.GetNumberOfStackMaps(encoding), stack_maps_.size()); - size_t invoke_info_index = 0; - for (size_t s = 0; s < stack_maps_.size(); ++s) { - const StackMap stack_map = code_info.GetStackMapAt(s, encoding); - const StackMapEncoding& stack_map_encoding = encoding.stack_map.encoding; - StackMapEntry entry = stack_maps_[s]; - - // Check main stack map fields. - DCHECK_EQ(stack_map.GetNativePcOffset(stack_map_encoding, instruction_set_), - entry.native_pc_code_offset.Uint32Value(instruction_set_)); - DCHECK_EQ(stack_map.GetDexPc(stack_map_encoding), entry.dex_pc); - DCHECK_EQ(stack_map.GetRegisterMaskIndex(stack_map_encoding), entry.register_mask_index); - DCHECK_EQ(code_info.GetRegisterMaskOf(encoding, stack_map), entry.register_mask); - const size_t num_stack_mask_bits = code_info.GetNumberOfStackMaskBits(encoding); - DCHECK_EQ(stack_map.GetStackMaskIndex(stack_map_encoding), entry.stack_mask_index); - BitMemoryRegion stack_mask = code_info.GetStackMaskOf(encoding, stack_map); - if (entry.sp_mask != nullptr) { - DCHECK_GE(stack_mask.size_in_bits(), entry.sp_mask->GetNumberOfBits()); - for (size_t b = 0; b < num_stack_mask_bits; b++) { - DCHECK_EQ(stack_mask.LoadBit(b), entry.sp_mask->IsBitSet(b)); - } - } else { - for (size_t b = 0; b < num_stack_mask_bits; b++) { - DCHECK_EQ(stack_mask.LoadBit(b), 0u); - } - } - if (entry.dex_method_index != dex::kDexNoIndex) { - InvokeInfo invoke_info = code_info.GetInvokeInfo(encoding, invoke_info_index); - DCHECK_EQ(invoke_info.GetNativePcOffset(encoding.invoke_info.encoding, instruction_set_), - entry.native_pc_code_offset.Uint32Value(instruction_set_)); - DCHECK_EQ(invoke_info.GetInvokeType(encoding.invoke_info.encoding), entry.invoke_type); - DCHECK_EQ(invoke_info.GetMethodIndexIdx(encoding.invoke_info.encoding), - entry.dex_method_index_idx); - invoke_info_index++; - } - CheckDexRegisterMap(code_info, - code_info.GetDexRegisterMapOf( - stack_map, encoding, entry.dex_register_entry.num_dex_registers), - entry.dex_register_entry.num_dex_registers, - entry.dex_register_entry.live_dex_registers_mask, - entry.dex_register_entry.locations_start_index); - - // Check inline info. - DCHECK_EQ(stack_map.HasInlineInfo(stack_map_encoding), (entry.inlining_depth != 0)); - if (entry.inlining_depth != 0) { - InlineInfo inline_info = code_info.GetInlineInfoOf(stack_map, encoding); - DCHECK_EQ(inline_info.GetDepth(encoding.inline_info.encoding), entry.inlining_depth); - for (size_t d = 0; d < entry.inlining_depth; ++d) { - size_t inline_info_index = entry.inline_infos_start_index + d; - DCHECK_LT(inline_info_index, inline_infos_.size()); - InlineInfoEntry inline_entry = inline_infos_[inline_info_index]; - DCHECK_EQ(inline_info.GetDexPcAtDepth(encoding.inline_info.encoding, d), - inline_entry.dex_pc); - if (inline_info.EncodesArtMethodAtDepth(encoding.inline_info.encoding, d)) { - DCHECK_EQ(inline_info.GetArtMethodAtDepth(encoding.inline_info.encoding, d), - inline_entry.method); - } else { - const size_t method_index_idx = - inline_info.GetMethodIndexIdxAtDepth(encoding.inline_info.encoding, d); - DCHECK_EQ(method_index_idx, inline_entry.dex_method_index_idx); - DCHECK_EQ(method_indices_[method_index_idx], inline_entry.method_index); - } - - CheckDexRegisterMap(code_info, - code_info.GetDexRegisterMapAtDepth( - d, - inline_info, - encoding, - inline_entry.dex_register_entry.num_dex_registers), - inline_entry.dex_register_entry.num_dex_registers, - inline_entry.dex_register_entry.live_dex_registers_mask, - inline_entry.dex_register_entry.locations_start_index); - } +ScopedArenaVector<uint8_t> StackMapStream::Encode() { + DCHECK(in_stack_map_ == false) << "Mismatched Begin/End calls"; + DCHECK(in_inline_info_ == false) << "Mismatched Begin/End calls"; + + ScopedArenaVector<uint8_t> buffer(allocator_->Adapter(kArenaAllocStackMapStream)); + BitMemoryWriter<ScopedArenaVector<uint8_t>> out(&buffer); + out.WriteVarint(packed_frame_size_); + out.WriteVarint(core_spill_mask_); + out.WriteVarint(fp_spill_mask_); + out.WriteVarint(num_dex_registers_); + EncodeTable(out, stack_maps_); + EncodeTable(out, register_masks_); + EncodeTable(out, stack_masks_); + EncodeTable(out, inline_infos_); + EncodeTable(out, method_infos_); + EncodeTable(out, dex_register_masks_); + EncodeTable(out, dex_register_maps_); + EncodeTable(out, dex_register_catalog_); + + // Verify that we can load the CodeInfo and check some essentials. + CodeInfo code_info(buffer.data()); + CHECK_EQ(code_info.Size(), buffer.size()); + CHECK_EQ(code_info.GetNumberOfStackMaps(), stack_maps_.size()); + + // Verify all written data (usually only in debug builds). + if (kVerifyStackMaps) { + for (const auto& dcheck : dchecks_) { + dcheck(code_info); } } -} -size_t StackMapStream::ComputeMethodInfoSize() const { - DCHECK_NE(0u, needed_size_) << "PrepareForFillIn not called before " << __FUNCTION__; - return MethodInfo::ComputeSize(method_indices_.size()); + return buffer; } } // namespace art diff --git a/compiler/optimizing/stack_map_stream.h b/compiler/optimizing/stack_map_stream.h index 579aabdb5f..01c6bf9e0e 100644 --- a/compiler/optimizing/stack_map_stream.h +++ b/compiler/optimizing/stack_map_stream.h @@ -17,133 +17,63 @@ #ifndef ART_COMPILER_OPTIMIZING_STACK_MAP_STREAM_H_ #define ART_COMPILER_OPTIMIZING_STACK_MAP_STREAM_H_ +#include "base/allocator.h" +#include "base/arena_bit_vector.h" +#include "base/bit_table.h" #include "base/bit_vector-inl.h" -#include "base/hash_map.h" +#include "base/memory_region.h" #include "base/scoped_arena_containers.h" #include "base/value_object.h" -#include "memory_region.h" -#include "method_info.h" +#include "dex_register_location.h" #include "nodes.h" #include "stack_map.h" namespace art { -// Helper to build art::StackMapStream::LocationCatalogEntriesIndices. -class LocationCatalogEntriesIndicesEmptyFn { - public: - void MakeEmpty(std::pair<DexRegisterLocation, size_t>& item) const { - item.first = DexRegisterLocation::None(); - } - bool IsEmpty(const std::pair<DexRegisterLocation, size_t>& item) const { - return item.first == DexRegisterLocation::None(); - } -}; - -// Hash function for art::StackMapStream::LocationCatalogEntriesIndices. -// This hash function does not create collisions. -class DexRegisterLocationHashFn { - public: - size_t operator()(DexRegisterLocation key) const { - // Concatenate `key`s fields to create a 64-bit value to be hashed. - int64_t kind_and_value = - (static_cast<int64_t>(key.kind_) << 32) | static_cast<int64_t>(key.value_); - return inner_hash_fn_(kind_and_value); - } - private: - std::hash<int64_t> inner_hash_fn_; -}; - - /** * Collects and builds stack maps for a method. All the stack maps * for a method are placed in a CodeInfo object. */ -class StackMapStream : public ValueObject { +class StackMapStream : public DeletableArenaObject<kArenaAllocStackMapStream> { public: explicit StackMapStream(ScopedArenaAllocator* allocator, InstructionSet instruction_set) : allocator_(allocator), instruction_set_(instruction_set), - stack_maps_(allocator->Adapter(kArenaAllocStackMapStream)), - location_catalog_entries_(allocator->Adapter(kArenaAllocStackMapStream)), - location_catalog_entries_indices_(allocator->Adapter(kArenaAllocStackMapStream)), - dex_register_locations_(allocator->Adapter(kArenaAllocStackMapStream)), - inline_infos_(allocator->Adapter(kArenaAllocStackMapStream)), - stack_masks_(allocator->Adapter(kArenaAllocStackMapStream)), - register_masks_(allocator->Adapter(kArenaAllocStackMapStream)), - method_indices_(allocator->Adapter(kArenaAllocStackMapStream)), - dex_register_entries_(allocator->Adapter(kArenaAllocStackMapStream)), - stack_mask_max_(-1), - dex_pc_max_(kNoDexPc), - register_mask_max_(0), - number_of_stack_maps_with_inline_info_(0), - dex_map_hash_to_stack_map_indices_(std::less<uint32_t>(), - allocator->Adapter(kArenaAllocStackMapStream)), - current_entry_(), - current_inline_info_(), - code_info_encoding_(allocator->Adapter(kArenaAllocStackMapStream)), - needed_size_(0), - current_dex_register_(0), - in_inline_frame_(false) { - stack_maps_.reserve(10); - location_catalog_entries_.reserve(4); - dex_register_locations_.reserve(10 * 4); - inline_infos_.reserve(2); - code_info_encoding_.reserve(16); + stack_maps_(allocator), + inline_infos_(allocator), + method_infos_(allocator), + register_masks_(allocator), + stack_masks_(allocator), + dex_register_masks_(allocator), + dex_register_maps_(allocator), + dex_register_catalog_(allocator), + lazy_stack_masks_(allocator->Adapter(kArenaAllocStackMapStream)), + current_stack_map_(), + current_inline_infos_(allocator->Adapter(kArenaAllocStackMapStream)), + current_dex_registers_(allocator->Adapter(kArenaAllocStackMapStream)), + previous_dex_registers_(allocator->Adapter(kArenaAllocStackMapStream)), + dex_register_timestamp_(allocator->Adapter(kArenaAllocStackMapStream)), + expected_num_dex_registers_(0u), + temp_dex_register_mask_(allocator, 32, true, kArenaAllocStackMapStream), + temp_dex_register_map_(allocator->Adapter(kArenaAllocStackMapStream)) { } - // A dex register map entry for a single stack map entry, contains what registers are live as - // well as indices into the location catalog. - class DexRegisterMapEntry { - public: - static const size_t kOffsetUnassigned = -1; - - BitVector* live_dex_registers_mask; - uint32_t num_dex_registers; - size_t locations_start_index; - // Computed fields - size_t hash = 0; - size_t offset = kOffsetUnassigned; - - size_t ComputeSize(size_t catalog_size) const; - }; - - // See runtime/stack_map.h to know what these fields contain. - struct StackMapEntry { - uint32_t dex_pc; - CodeOffset native_pc_code_offset; - uint32_t register_mask; - BitVector* sp_mask; - uint8_t inlining_depth; - size_t inline_infos_start_index; - uint32_t stack_mask_index; - uint32_t register_mask_index; - DexRegisterMapEntry dex_register_entry; - size_t dex_register_map_index; - InvokeType invoke_type; - uint32_t dex_method_index; - uint32_t dex_method_index_idx; // Index into dex method index table. - }; - - struct InlineInfoEntry { - uint32_t dex_pc; // dex::kDexNoIndex for intrinsified native methods. - ArtMethod* method; - uint32_t method_index; - DexRegisterMapEntry dex_register_entry; - size_t dex_register_map_index; - uint32_t dex_method_index_idx; // Index into the dex method index table. - }; + void BeginMethod(size_t frame_size_in_bytes, + size_t core_spill_mask, + size_t fp_spill_mask, + uint32_t num_dex_registers); + void EndMethod(); void BeginStackMapEntry(uint32_t dex_pc, uint32_t native_pc_offset, - uint32_t register_mask, - BitVector* sp_mask, - uint32_t num_dex_registers, - uint8_t inlining_depth); + uint32_t register_mask = 0, + BitVector* sp_mask = nullptr, + StackMap::Kind kind = StackMap::Kind::Default); void EndStackMapEntry(); - void AddDexRegisterEntry(DexRegisterLocation::Kind kind, int32_t value); - - void AddInvoke(InvokeType type, uint32_t dex_method_index); + void AddDexRegisterEntry(DexRegisterLocation::Kind kind, int32_t value) { + current_dex_registers_.push_back(DexRegisterLocation(kind, value)); + } void BeginInlineInfoEntry(ArtMethod* method, uint32_t dex_pc, @@ -155,109 +85,54 @@ class StackMapStream : public ValueObject { return stack_maps_.size(); } - const StackMapEntry& GetStackMap(size_t i) const { - return stack_maps_[i]; - } - - void SetStackMapNativePcOffset(size_t i, uint32_t native_pc_offset) { - stack_maps_[i].native_pc_code_offset = - CodeOffset::FromOffset(native_pc_offset, instruction_set_); - } - - // Prepares the stream to fill in a memory region. Must be called before FillIn. - // Returns the size (in bytes) needed to store this stream. - size_t PrepareForFillIn(); - void FillInCodeInfo(MemoryRegion region); - void FillInMethodInfo(MemoryRegion region); + uint32_t GetStackMapNativePcOffset(size_t i); + void SetStackMapNativePcOffset(size_t i, uint32_t native_pc_offset); - size_t ComputeMethodInfoSize() const; + // Encode all stack map data. + // The returned vector is allocated using the allocator passed to the StackMapStream. + ScopedArenaVector<uint8_t> Encode(); private: - size_t ComputeDexRegisterLocationCatalogSize() const; - size_t ComputeDexRegisterMapsSize() const; - void ComputeInlineInfoEncoding(InlineInfoEncoding* encoding, - size_t dex_register_maps_bytes); - - CodeOffset ComputeMaxNativePcCodeOffset() const; - - // Returns the number of unique stack masks. - size_t PrepareStackMasks(size_t entry_size_in_bits); - - // Returns the number of unique register masks. - size_t PrepareRegisterMasks(); - - // Prepare and deduplicate method indices. - void PrepareMethodIndices(); - - // Deduplicate entry if possible and return the corresponding index into dex_register_entries_ - // array. If entry is not a duplicate, a new entry is added to dex_register_entries_. - size_t AddDexRegisterMapEntry(const DexRegisterMapEntry& entry); - - // Return true if the two dex register map entries are equal. - bool DexRegisterMapEntryEquals(const DexRegisterMapEntry& a, const DexRegisterMapEntry& b) const; + static constexpr uint32_t kNoValue = -1; - // Fill in the corresponding entries of a register map. - void ComputeInvokeInfoEncoding(CodeInfoEncoding* encoding); + void CreateDexRegisterMap(); - // Returns the index of an entry with the same dex register map as the current_entry, - // or kNoSameDexMapFound if no such entry exists. - size_t FindEntryWithTheSameDexMap(); - bool HaveTheSameDexMaps(const StackMapEntry& a, const StackMapEntry& b) const; - - // Fill in the corresponding entries of a register map. - void FillInDexRegisterMap(DexRegisterMap dex_register_map, - uint32_t num_dex_registers, - const BitVector& live_dex_registers_mask, - uint32_t start_index_in_dex_register_locations) const; - - // Returns the offset for the dex register inside of the dex register location region. See FillIn. - // Only copies the dex register map if the offset for the entry is not already assigned. - size_t MaybeCopyDexRegisterMap(DexRegisterMapEntry& entry, - size_t* current_offset, - MemoryRegion dex_register_locations_region); - void CheckDexRegisterMap(const CodeInfo& code_info, - const DexRegisterMap& dex_register_map, - size_t num_dex_registers, - BitVector* live_dex_registers_mask, - size_t dex_register_locations_index) const; - void CheckCodeInfo(MemoryRegion region) const; - - ScopedArenaAllocator* const allocator_; + ScopedArenaAllocator* allocator_; const InstructionSet instruction_set_; - ScopedArenaVector<StackMapEntry> stack_maps_; - - // A catalog of unique [location_kind, register_value] pairs (per method). - ScopedArenaVector<DexRegisterLocation> location_catalog_entries_; - // Map from Dex register location catalog entries to their indices in the - // location catalog. - using LocationCatalogEntriesIndices = ScopedArenaHashMap<DexRegisterLocation, - size_t, - LocationCatalogEntriesIndicesEmptyFn, - DexRegisterLocationHashFn>; - LocationCatalogEntriesIndices location_catalog_entries_indices_; - - // A set of concatenated maps of Dex register locations indices to `location_catalog_entries_`. - ScopedArenaVector<size_t> dex_register_locations_; - ScopedArenaVector<InlineInfoEntry> inline_infos_; - ScopedArenaVector<uint8_t> stack_masks_; - ScopedArenaVector<uint32_t> register_masks_; - ScopedArenaVector<uint32_t> method_indices_; - ScopedArenaVector<DexRegisterMapEntry> dex_register_entries_; - int stack_mask_max_; - uint32_t dex_pc_max_; - uint32_t register_mask_max_; - size_t number_of_stack_maps_with_inline_info_; - - ScopedArenaSafeMap<uint32_t, ScopedArenaVector<uint32_t>> dex_map_hash_to_stack_map_indices_; - - StackMapEntry current_entry_; - InlineInfoEntry current_inline_info_; - ScopedArenaVector<uint8_t> code_info_encoding_; - size_t needed_size_; - uint32_t current_dex_register_; - bool in_inline_frame_; - - static constexpr uint32_t kNoSameDexMapFound = -1; + uint32_t packed_frame_size_ = 0; + uint32_t core_spill_mask_ = 0; + uint32_t fp_spill_mask_ = 0; + uint32_t num_dex_registers_ = 0; + BitTableBuilder<StackMap> stack_maps_; + BitTableBuilder<InlineInfo> inline_infos_; + BitTableBuilder<MethodInfo> method_infos_; + BitTableBuilder<RegisterMask> register_masks_; + BitmapTableBuilder stack_masks_; + BitmapTableBuilder dex_register_masks_; + BitTableBuilder<DexRegisterMapInfo> dex_register_maps_; + BitTableBuilder<DexRegisterInfo> dex_register_catalog_; + + ScopedArenaVector<BitVector*> lazy_stack_masks_; + + // Variables which track the current state between Begin/End calls; + bool in_method_ = false; + bool in_stack_map_ = false; + bool in_inline_info_ = false; + BitTableBuilder<StackMap>::Entry current_stack_map_; + ScopedArenaVector<BitTableBuilder<InlineInfo>::Entry> current_inline_infos_; + ScopedArenaVector<DexRegisterLocation> current_dex_registers_; + ScopedArenaVector<DexRegisterLocation> previous_dex_registers_; + ScopedArenaVector<uint32_t> dex_register_timestamp_; // Stack map index of last change. + size_t expected_num_dex_registers_; + + // Temporary variables used in CreateDexRegisterMap. + // They are here so that we can reuse the reserved memory. + ArenaBitVector temp_dex_register_mask_; + ScopedArenaVector<BitTableBuilder<DexRegisterMapInfo>::Entry> temp_dex_register_map_; + + // A set of lambda functions to be executed at the end to verify + // the encoded data. It is generally only used in debug builds. + std::vector<std::function<void(CodeInfo&)>> dchecks_; DISALLOW_COPY_AND_ASSIGN(StackMapStream); }; diff --git a/compiler/optimizing/stack_map_test.cc b/compiler/optimizing/stack_map_test.cc index 7e517f3485..d28f09fbba 100644 --- a/compiler/optimizing/stack_map_test.cc +++ b/compiler/optimizing/stack_map_test.cc @@ -18,6 +18,7 @@ #include "art_method.h" #include "base/arena_bit_vector.h" +#include "base/malloc_arena_pool.h" #include "stack_map_stream.h" #include "gtest/gtest.h" @@ -28,14 +29,13 @@ namespace art { // to the given bit vector. Returns true if they are same. static bool CheckStackMask( const CodeInfo& code_info, - const CodeInfoEncoding& encoding, const StackMap& stack_map, const BitVector& bit_vector) { - BitMemoryRegion stack_mask = code_info.GetStackMaskOf(encoding, stack_map); - if (bit_vector.GetNumberOfBits() > encoding.stack_mask.encoding.BitSize()) { + BitMemoryRegion stack_mask = code_info.GetStackMaskOf(stack_map); + if (bit_vector.GetNumberOfBits() > stack_mask.size_in_bits()) { return false; } - for (size_t i = 0; i < encoding.stack_mask.encoding.BitSize(); ++i) { + for (size_t i = 0; i < stack_mask.size_in_bits(); ++i) { if (stack_mask.LoadBit(i) != bit_vector.IsBitSet(i)) { return false; } @@ -45,93 +45,68 @@ static bool CheckStackMask( using Kind = DexRegisterLocation::Kind; +constexpr static uint32_t kPcAlign = GetInstructionSetInstructionAlignment(kRuntimeISA); + TEST(StackMapTest, Test1) { - ArenaPool pool; + MallocArenaPool pool; ArenaStack arena_stack(&pool); ScopedArenaAllocator allocator(&arena_stack); StackMapStream stream(&allocator, kRuntimeISA); + stream.BeginMethod(32, 0, 0, 2); ArenaBitVector sp_mask(&allocator, 0, false); size_t number_of_dex_registers = 2; - stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0); + stream.BeginStackMapEntry(0, 64 * kPcAlign, 0x3, &sp_mask); stream.AddDexRegisterEntry(Kind::kInStack, 0); // Short location. stream.AddDexRegisterEntry(Kind::kConstant, -2); // Short location. stream.EndStackMapEntry(); - size_t size = stream.PrepareForFillIn(); - void* memory = allocator.Alloc(size, kArenaAllocMisc); - MemoryRegion region(memory, size); - stream.FillInCodeInfo(region); + stream.EndMethod(); + ScopedArenaVector<uint8_t> memory = stream.Encode(); - CodeInfo code_info(region); - CodeInfoEncoding encoding = code_info.ExtractEncoding(); - ASSERT_EQ(1u, code_info.GetNumberOfStackMaps(encoding)); + CodeInfo code_info(memory.data()); + ASSERT_EQ(1u, code_info.GetNumberOfStackMaps()); - uint32_t number_of_catalog_entries = code_info.GetNumberOfLocationCatalogEntries(encoding); + uint32_t number_of_catalog_entries = code_info.GetNumberOfLocationCatalogEntries(); ASSERT_EQ(2u, number_of_catalog_entries); - DexRegisterLocationCatalog location_catalog = code_info.GetDexRegisterLocationCatalog(encoding); - // The Dex register location catalog contains: - // - one 1-byte short Dex register location, and - // - one 5-byte large Dex register location. - size_t expected_location_catalog_size = 1u + 5u; - ASSERT_EQ(expected_location_catalog_size, location_catalog.Size()); - - StackMap stack_map = code_info.GetStackMapAt(0, encoding); - ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(0, encoding))); - ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(64, encoding))); - ASSERT_EQ(0u, stack_map.GetDexPc(encoding.stack_map.encoding)); - ASSERT_EQ(64u, stack_map.GetNativePcOffset(encoding.stack_map.encoding, kRuntimeISA)); - ASSERT_EQ(0x3u, code_info.GetRegisterMaskOf(encoding, stack_map)); - - ASSERT_TRUE(CheckStackMask(code_info, encoding, stack_map, sp_mask)); - - ASSERT_TRUE(stack_map.HasDexRegisterMap(encoding.stack_map.encoding)); - DexRegisterMap dex_register_map = - code_info.GetDexRegisterMapOf(stack_map, encoding, number_of_dex_registers); - ASSERT_TRUE(dex_register_map.IsDexRegisterLive(0)); - ASSERT_TRUE(dex_register_map.IsDexRegisterLive(1)); - ASSERT_EQ(2u, dex_register_map.GetNumberOfLiveDexRegisters(number_of_dex_registers)); - // The Dex register map contains: - // - one 1-byte live bit mask, and - // - one 1-byte set of location catalog entry indices composed of two 2-bit values. - size_t expected_dex_register_map_size = 1u + 1u; - ASSERT_EQ(expected_dex_register_map_size, dex_register_map.Size()); - - ASSERT_EQ(Kind::kInStack, dex_register_map.GetLocationKind( - 0, number_of_dex_registers, code_info, encoding)); - ASSERT_EQ(Kind::kConstant, dex_register_map.GetLocationKind( - 1, number_of_dex_registers, code_info, encoding)); - ASSERT_EQ(Kind::kInStack, dex_register_map.GetLocationInternalKind( - 0, number_of_dex_registers, code_info, encoding)); - ASSERT_EQ(Kind::kConstantLargeValue, dex_register_map.GetLocationInternalKind( - 1, number_of_dex_registers, code_info, encoding)); - ASSERT_EQ(0, dex_register_map.GetStackOffsetInBytes( - 0, number_of_dex_registers, code_info, encoding)); - ASSERT_EQ(-2, dex_register_map.GetConstant(1, number_of_dex_registers, code_info, encoding)); - - size_t index0 = dex_register_map.GetLocationCatalogEntryIndex( - 0, number_of_dex_registers, number_of_catalog_entries); - size_t index1 = dex_register_map.GetLocationCatalogEntryIndex( - 1, number_of_dex_registers, number_of_catalog_entries); - ASSERT_EQ(0u, index0); - ASSERT_EQ(1u, index1); - DexRegisterLocation location0 = location_catalog.GetDexRegisterLocation(index0); - DexRegisterLocation location1 = location_catalog.GetDexRegisterLocation(index1); + + StackMap stack_map = code_info.GetStackMapAt(0); + ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(0))); + ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(64 * kPcAlign))); + ASSERT_EQ(0u, stack_map.GetDexPc()); + ASSERT_EQ(64u * kPcAlign, stack_map.GetNativePcOffset(kRuntimeISA)); + ASSERT_EQ(0x3u, code_info.GetRegisterMaskOf(stack_map)); + + ASSERT_TRUE(CheckStackMask(code_info, stack_map, sp_mask)); + + ASSERT_TRUE(stack_map.HasDexRegisterMap()); + DexRegisterMap dex_register_map = code_info.GetDexRegisterMapOf(stack_map); + ASSERT_EQ(number_of_dex_registers, dex_register_map.size()); + ASSERT_TRUE(dex_register_map[0].IsLive()); + ASSERT_TRUE(dex_register_map[1].IsLive()); + ASSERT_EQ(2u, dex_register_map.GetNumberOfLiveDexRegisters()); + + ASSERT_EQ(Kind::kInStack, dex_register_map[0].GetKind()); + ASSERT_EQ(Kind::kConstant, dex_register_map[1].GetKind()); + ASSERT_EQ(0, dex_register_map[0].GetStackOffsetInBytes()); + ASSERT_EQ(-2, dex_register_map[1].GetConstant()); + + DexRegisterLocation location0 = code_info.GetDexRegisterCatalogEntry(0); + DexRegisterLocation location1 = code_info.GetDexRegisterCatalogEntry(1); ASSERT_EQ(Kind::kInStack, location0.GetKind()); ASSERT_EQ(Kind::kConstant, location1.GetKind()); - ASSERT_EQ(Kind::kInStack, location0.GetInternalKind()); - ASSERT_EQ(Kind::kConstantLargeValue, location1.GetInternalKind()); ASSERT_EQ(0, location0.GetValue()); ASSERT_EQ(-2, location1.GetValue()); - ASSERT_FALSE(stack_map.HasInlineInfo(encoding.stack_map.encoding)); + ASSERT_FALSE(stack_map.HasInlineInfo()); } TEST(StackMapTest, Test2) { - ArenaPool pool; + MallocArenaPool pool; ArenaStack arena_stack(&pool); ScopedArenaAllocator allocator(&arena_stack); StackMapStream stream(&allocator, kRuntimeISA); + stream.BeginMethod(32, 0, 0, 2); ArtMethod art_method; ArenaBitVector sp_mask1(&allocator, 0, true); @@ -139,7 +114,7 @@ TEST(StackMapTest, Test2) { sp_mask1.SetBit(4); size_t number_of_dex_registers = 2; size_t number_of_dex_registers_in_inline_info = 0; - stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask1, number_of_dex_registers, 2); + stream.BeginStackMapEntry(0, 64 * kPcAlign, 0x3, &sp_mask1); stream.AddDexRegisterEntry(Kind::kInStack, 0); // Short location. stream.AddDexRegisterEntry(Kind::kConstant, -2); // Large location. stream.BeginInlineInfoEntry(&art_method, 3, number_of_dex_registers_in_inline_info); @@ -151,7 +126,7 @@ TEST(StackMapTest, Test2) { ArenaBitVector sp_mask2(&allocator, 0, true); sp_mask2.SetBit(3); sp_mask2.SetBit(8); - stream.BeginStackMapEntry(1, 128, 0xFF, &sp_mask2, number_of_dex_registers, 0); + stream.BeginStackMapEntry(1, 128 * kPcAlign, 0xFF, &sp_mask2); stream.AddDexRegisterEntry(Kind::kInRegister, 18); // Short location. stream.AddDexRegisterEntry(Kind::kInFpuRegister, 3); // Short location. stream.EndStackMapEntry(); @@ -159,7 +134,7 @@ TEST(StackMapTest, Test2) { ArenaBitVector sp_mask3(&allocator, 0, true); sp_mask3.SetBit(1); sp_mask3.SetBit(5); - stream.BeginStackMapEntry(2, 192, 0xAB, &sp_mask3, number_of_dex_registers, 0); + stream.BeginStackMapEntry(2, 192 * kPcAlign, 0xAB, &sp_mask3); stream.AddDexRegisterEntry(Kind::kInRegister, 6); // Short location. stream.AddDexRegisterEntry(Kind::kInRegisterHigh, 8); // Short location. stream.EndStackMapEntry(); @@ -167,256 +142,165 @@ TEST(StackMapTest, Test2) { ArenaBitVector sp_mask4(&allocator, 0, true); sp_mask4.SetBit(6); sp_mask4.SetBit(7); - stream.BeginStackMapEntry(3, 256, 0xCD, &sp_mask4, number_of_dex_registers, 0); + stream.BeginStackMapEntry(3, 256 * kPcAlign, 0xCD, &sp_mask4); stream.AddDexRegisterEntry(Kind::kInFpuRegister, 3); // Short location, same in stack map 2. stream.AddDexRegisterEntry(Kind::kInFpuRegisterHigh, 1); // Short location. stream.EndStackMapEntry(); - size_t size = stream.PrepareForFillIn(); - void* memory = allocator.Alloc(size, kArenaAllocMisc); - MemoryRegion region(memory, size); - stream.FillInCodeInfo(region); + stream.EndMethod(); + ScopedArenaVector<uint8_t> memory = stream.Encode(); - CodeInfo code_info(region); - CodeInfoEncoding encoding = code_info.ExtractEncoding(); - ASSERT_EQ(4u, code_info.GetNumberOfStackMaps(encoding)); + CodeInfo code_info(memory.data()); + ASSERT_EQ(4u, code_info.GetNumberOfStackMaps()); - uint32_t number_of_catalog_entries = code_info.GetNumberOfLocationCatalogEntries(encoding); + uint32_t number_of_catalog_entries = code_info.GetNumberOfLocationCatalogEntries(); ASSERT_EQ(7u, number_of_catalog_entries); - DexRegisterLocationCatalog location_catalog = code_info.GetDexRegisterLocationCatalog(encoding); - // The Dex register location catalog contains: - // - six 1-byte short Dex register locations, and - // - one 5-byte large Dex register location. - size_t expected_location_catalog_size = 6u * 1u + 5u; - ASSERT_EQ(expected_location_catalog_size, location_catalog.Size()); // First stack map. { - StackMap stack_map = code_info.GetStackMapAt(0, encoding); - ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(0, encoding))); - ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(64, encoding))); - ASSERT_EQ(0u, stack_map.GetDexPc(encoding.stack_map.encoding)); - ASSERT_EQ(64u, stack_map.GetNativePcOffset(encoding.stack_map.encoding, kRuntimeISA)); - ASSERT_EQ(0x3u, code_info.GetRegisterMaskOf(encoding, stack_map)); - - ASSERT_TRUE(CheckStackMask(code_info, encoding, stack_map, sp_mask1)); - - ASSERT_TRUE(stack_map.HasDexRegisterMap(encoding.stack_map.encoding)); - DexRegisterMap dex_register_map = - code_info.GetDexRegisterMapOf(stack_map, encoding, number_of_dex_registers); - ASSERT_TRUE(dex_register_map.IsDexRegisterLive(0)); - ASSERT_TRUE(dex_register_map.IsDexRegisterLive(1)); - ASSERT_EQ(2u, dex_register_map.GetNumberOfLiveDexRegisters(number_of_dex_registers)); - // The Dex register map contains: - // - one 1-byte live bit mask, and - // - one 1-byte set of location catalog entry indices composed of two 2-bit values. - size_t expected_dex_register_map_size = 1u + 1u; - ASSERT_EQ(expected_dex_register_map_size, dex_register_map.Size()); - - ASSERT_EQ(Kind::kInStack, dex_register_map.GetLocationKind( - 0, number_of_dex_registers, code_info, encoding)); - ASSERT_EQ(Kind::kConstant, dex_register_map.GetLocationKind( - 1, number_of_dex_registers, code_info, encoding)); - ASSERT_EQ(Kind::kInStack, dex_register_map.GetLocationInternalKind( - 0, number_of_dex_registers, code_info, encoding)); - ASSERT_EQ(Kind::kConstantLargeValue, dex_register_map.GetLocationInternalKind( - 1, number_of_dex_registers, code_info, encoding)); - ASSERT_EQ(0, dex_register_map.GetStackOffsetInBytes( - 0, number_of_dex_registers, code_info, encoding)); - ASSERT_EQ(-2, dex_register_map.GetConstant(1, number_of_dex_registers, code_info, encoding)); - - size_t index0 = dex_register_map.GetLocationCatalogEntryIndex( - 0, number_of_dex_registers, number_of_catalog_entries); - size_t index1 = dex_register_map.GetLocationCatalogEntryIndex( - 1, number_of_dex_registers, number_of_catalog_entries); - ASSERT_EQ(0u, index0); - ASSERT_EQ(1u, index1); - DexRegisterLocation location0 = location_catalog.GetDexRegisterLocation(index0); - DexRegisterLocation location1 = location_catalog.GetDexRegisterLocation(index1); + StackMap stack_map = code_info.GetStackMapAt(0); + ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(0))); + ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(64 * kPcAlign))); + ASSERT_EQ(0u, stack_map.GetDexPc()); + ASSERT_EQ(64u * kPcAlign, stack_map.GetNativePcOffset(kRuntimeISA)); + ASSERT_EQ(0x3u, code_info.GetRegisterMaskOf(stack_map)); + + ASSERT_TRUE(CheckStackMask(code_info, stack_map, sp_mask1)); + + ASSERT_TRUE(stack_map.HasDexRegisterMap()); + DexRegisterMap dex_register_map = code_info.GetDexRegisterMapOf(stack_map); + ASSERT_EQ(number_of_dex_registers, dex_register_map.size()); + ASSERT_TRUE(dex_register_map[0].IsLive()); + ASSERT_TRUE(dex_register_map[1].IsLive()); + ASSERT_EQ(2u, dex_register_map.GetNumberOfLiveDexRegisters()); + + ASSERT_EQ(Kind::kInStack, dex_register_map[0].GetKind()); + ASSERT_EQ(Kind::kConstant, dex_register_map[1].GetKind()); + ASSERT_EQ(0, dex_register_map[0].GetStackOffsetInBytes()); + ASSERT_EQ(-2, dex_register_map[1].GetConstant()); + + DexRegisterLocation location0 = code_info.GetDexRegisterCatalogEntry(0); + DexRegisterLocation location1 = code_info.GetDexRegisterCatalogEntry(1); ASSERT_EQ(Kind::kInStack, location0.GetKind()); ASSERT_EQ(Kind::kConstant, location1.GetKind()); - ASSERT_EQ(Kind::kInStack, location0.GetInternalKind()); - ASSERT_EQ(Kind::kConstantLargeValue, location1.GetInternalKind()); ASSERT_EQ(0, location0.GetValue()); ASSERT_EQ(-2, location1.GetValue()); - ASSERT_TRUE(stack_map.HasInlineInfo(encoding.stack_map.encoding)); - InlineInfo inline_info = code_info.GetInlineInfoOf(stack_map, encoding); - ASSERT_EQ(2u, inline_info.GetDepth(encoding.inline_info.encoding)); - ASSERT_EQ(3u, inline_info.GetDexPcAtDepth(encoding.inline_info.encoding, 0)); - ASSERT_EQ(2u, inline_info.GetDexPcAtDepth(encoding.inline_info.encoding, 1)); - ASSERT_TRUE(inline_info.EncodesArtMethodAtDepth(encoding.inline_info.encoding, 0)); - ASSERT_TRUE(inline_info.EncodesArtMethodAtDepth(encoding.inline_info.encoding, 1)); + ASSERT_TRUE(stack_map.HasInlineInfo()); + auto inline_infos = code_info.GetInlineInfosOf(stack_map); + ASSERT_EQ(2u, inline_infos.size()); + ASSERT_EQ(3u, inline_infos[0].GetDexPc()); + ASSERT_EQ(2u, inline_infos[1].GetDexPc()); + ASSERT_TRUE(inline_infos[0].EncodesArtMethod()); + ASSERT_TRUE(inline_infos[1].EncodesArtMethod()); } // Second stack map. { - StackMap stack_map = code_info.GetStackMapAt(1, encoding); - ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(1u, encoding))); - ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(128u, encoding))); - ASSERT_EQ(1u, stack_map.GetDexPc(encoding.stack_map.encoding)); - ASSERT_EQ(128u, stack_map.GetNativePcOffset(encoding.stack_map.encoding, kRuntimeISA)); - ASSERT_EQ(0xFFu, code_info.GetRegisterMaskOf(encoding, stack_map)); - - ASSERT_TRUE(CheckStackMask(code_info, encoding, stack_map, sp_mask2)); - - ASSERT_TRUE(stack_map.HasDexRegisterMap(encoding.stack_map.encoding)); - DexRegisterMap dex_register_map = - code_info.GetDexRegisterMapOf(stack_map, encoding, number_of_dex_registers); - ASSERT_TRUE(dex_register_map.IsDexRegisterLive(0)); - ASSERT_TRUE(dex_register_map.IsDexRegisterLive(1)); - ASSERT_EQ(2u, dex_register_map.GetNumberOfLiveDexRegisters(number_of_dex_registers)); - // The Dex register map contains: - // - one 1-byte live bit mask, and - // - one 1-byte set of location catalog entry indices composed of two 2-bit values. - size_t expected_dex_register_map_size = 1u + 1u; - ASSERT_EQ(expected_dex_register_map_size, dex_register_map.Size()); - - ASSERT_EQ(Kind::kInRegister, dex_register_map.GetLocationKind( - 0, number_of_dex_registers, code_info, encoding)); - ASSERT_EQ(Kind::kInFpuRegister, dex_register_map.GetLocationKind( - 1, number_of_dex_registers, code_info, encoding)); - ASSERT_EQ(Kind::kInRegister, dex_register_map.GetLocationInternalKind( - 0, number_of_dex_registers, code_info, encoding)); - ASSERT_EQ(Kind::kInFpuRegister, dex_register_map.GetLocationInternalKind( - 1, number_of_dex_registers, code_info, encoding)); - ASSERT_EQ(18, dex_register_map.GetMachineRegister( - 0, number_of_dex_registers, code_info, encoding)); - ASSERT_EQ(3, dex_register_map.GetMachineRegister( - 1, number_of_dex_registers, code_info, encoding)); - - size_t index0 = dex_register_map.GetLocationCatalogEntryIndex( - 0, number_of_dex_registers, number_of_catalog_entries); - size_t index1 = dex_register_map.GetLocationCatalogEntryIndex( - 1, number_of_dex_registers, number_of_catalog_entries); - ASSERT_EQ(2u, index0); - ASSERT_EQ(3u, index1); - DexRegisterLocation location0 = location_catalog.GetDexRegisterLocation(index0); - DexRegisterLocation location1 = location_catalog.GetDexRegisterLocation(index1); + StackMap stack_map = code_info.GetStackMapAt(1); + ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(1u))); + ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(128u * kPcAlign))); + ASSERT_EQ(1u, stack_map.GetDexPc()); + ASSERT_EQ(128u * kPcAlign, stack_map.GetNativePcOffset(kRuntimeISA)); + ASSERT_EQ(0xFFu, code_info.GetRegisterMaskOf(stack_map)); + + ASSERT_TRUE(CheckStackMask(code_info, stack_map, sp_mask2)); + + ASSERT_TRUE(stack_map.HasDexRegisterMap()); + DexRegisterMap dex_register_map = code_info.GetDexRegisterMapOf(stack_map); + ASSERT_EQ(number_of_dex_registers, dex_register_map.size()); + ASSERT_TRUE(dex_register_map[0].IsLive()); + ASSERT_TRUE(dex_register_map[1].IsLive()); + ASSERT_EQ(2u, dex_register_map.GetNumberOfLiveDexRegisters()); + + ASSERT_EQ(Kind::kInRegister, dex_register_map[0].GetKind()); + ASSERT_EQ(Kind::kInFpuRegister, dex_register_map[1].GetKind()); + ASSERT_EQ(18, dex_register_map[0].GetMachineRegister()); + ASSERT_EQ(3, dex_register_map[1].GetMachineRegister()); + + DexRegisterLocation location0 = code_info.GetDexRegisterCatalogEntry(2); + DexRegisterLocation location1 = code_info.GetDexRegisterCatalogEntry(3); ASSERT_EQ(Kind::kInRegister, location0.GetKind()); ASSERT_EQ(Kind::kInFpuRegister, location1.GetKind()); - ASSERT_EQ(Kind::kInRegister, location0.GetInternalKind()); - ASSERT_EQ(Kind::kInFpuRegister, location1.GetInternalKind()); ASSERT_EQ(18, location0.GetValue()); ASSERT_EQ(3, location1.GetValue()); - ASSERT_FALSE(stack_map.HasInlineInfo(encoding.stack_map.encoding)); + ASSERT_FALSE(stack_map.HasInlineInfo()); } // Third stack map. { - StackMap stack_map = code_info.GetStackMapAt(2, encoding); - ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(2u, encoding))); - ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(192u, encoding))); - ASSERT_EQ(2u, stack_map.GetDexPc(encoding.stack_map.encoding)); - ASSERT_EQ(192u, stack_map.GetNativePcOffset(encoding.stack_map.encoding, kRuntimeISA)); - ASSERT_EQ(0xABu, code_info.GetRegisterMaskOf(encoding, stack_map)); - - ASSERT_TRUE(CheckStackMask(code_info, encoding, stack_map, sp_mask3)); - - ASSERT_TRUE(stack_map.HasDexRegisterMap(encoding.stack_map.encoding)); - DexRegisterMap dex_register_map = - code_info.GetDexRegisterMapOf(stack_map, encoding, number_of_dex_registers); - ASSERT_TRUE(dex_register_map.IsDexRegisterLive(0)); - ASSERT_TRUE(dex_register_map.IsDexRegisterLive(1)); - ASSERT_EQ(2u, dex_register_map.GetNumberOfLiveDexRegisters(number_of_dex_registers)); - // The Dex register map contains: - // - one 1-byte live bit mask, and - // - one 1-byte set of location catalog entry indices composed of two 2-bit values. - size_t expected_dex_register_map_size = 1u + 1u; - ASSERT_EQ(expected_dex_register_map_size, dex_register_map.Size()); - - ASSERT_EQ(Kind::kInRegister, dex_register_map.GetLocationKind( - 0, number_of_dex_registers, code_info, encoding)); - ASSERT_EQ(Kind::kInRegisterHigh, dex_register_map.GetLocationKind( - 1, number_of_dex_registers, code_info, encoding)); - ASSERT_EQ(Kind::kInRegister, dex_register_map.GetLocationInternalKind( - 0, number_of_dex_registers, code_info, encoding)); - ASSERT_EQ(Kind::kInRegisterHigh, dex_register_map.GetLocationInternalKind( - 1, number_of_dex_registers, code_info, encoding)); - ASSERT_EQ(6, dex_register_map.GetMachineRegister( - 0, number_of_dex_registers, code_info, encoding)); - ASSERT_EQ(8, dex_register_map.GetMachineRegister( - 1, number_of_dex_registers, code_info, encoding)); - - size_t index0 = dex_register_map.GetLocationCatalogEntryIndex( - 0, number_of_dex_registers, number_of_catalog_entries); - size_t index1 = dex_register_map.GetLocationCatalogEntryIndex( - 1, number_of_dex_registers, number_of_catalog_entries); - ASSERT_EQ(4u, index0); - ASSERT_EQ(5u, index1); - DexRegisterLocation location0 = location_catalog.GetDexRegisterLocation(index0); - DexRegisterLocation location1 = location_catalog.GetDexRegisterLocation(index1); + StackMap stack_map = code_info.GetStackMapAt(2); + ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(2u))); + ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(192u * kPcAlign))); + ASSERT_EQ(2u, stack_map.GetDexPc()); + ASSERT_EQ(192u * kPcAlign, stack_map.GetNativePcOffset(kRuntimeISA)); + ASSERT_EQ(0xABu, code_info.GetRegisterMaskOf(stack_map)); + + ASSERT_TRUE(CheckStackMask(code_info, stack_map, sp_mask3)); + + ASSERT_TRUE(stack_map.HasDexRegisterMap()); + DexRegisterMap dex_register_map = code_info.GetDexRegisterMapOf(stack_map); + ASSERT_EQ(number_of_dex_registers, dex_register_map.size()); + ASSERT_TRUE(dex_register_map[0].IsLive()); + ASSERT_TRUE(dex_register_map[1].IsLive()); + ASSERT_EQ(2u, dex_register_map.GetNumberOfLiveDexRegisters()); + + ASSERT_EQ(Kind::kInRegister, dex_register_map[0].GetKind()); + ASSERT_EQ(Kind::kInRegisterHigh, dex_register_map[1].GetKind()); + ASSERT_EQ(6, dex_register_map[0].GetMachineRegister()); + ASSERT_EQ(8, dex_register_map[1].GetMachineRegister()); + + DexRegisterLocation location0 = code_info.GetDexRegisterCatalogEntry(4); + DexRegisterLocation location1 = code_info.GetDexRegisterCatalogEntry(5); ASSERT_EQ(Kind::kInRegister, location0.GetKind()); ASSERT_EQ(Kind::kInRegisterHigh, location1.GetKind()); - ASSERT_EQ(Kind::kInRegister, location0.GetInternalKind()); - ASSERT_EQ(Kind::kInRegisterHigh, location1.GetInternalKind()); ASSERT_EQ(6, location0.GetValue()); ASSERT_EQ(8, location1.GetValue()); - ASSERT_FALSE(stack_map.HasInlineInfo(encoding.stack_map.encoding)); + ASSERT_FALSE(stack_map.HasInlineInfo()); } // Fourth stack map. { - StackMap stack_map = code_info.GetStackMapAt(3, encoding); - ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(3u, encoding))); - ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(256u, encoding))); - ASSERT_EQ(3u, stack_map.GetDexPc(encoding.stack_map.encoding)); - ASSERT_EQ(256u, stack_map.GetNativePcOffset(encoding.stack_map.encoding, kRuntimeISA)); - ASSERT_EQ(0xCDu, code_info.GetRegisterMaskOf(encoding, stack_map)); - - ASSERT_TRUE(CheckStackMask(code_info, encoding, stack_map, sp_mask4)); - - ASSERT_TRUE(stack_map.HasDexRegisterMap(encoding.stack_map.encoding)); - DexRegisterMap dex_register_map = - code_info.GetDexRegisterMapOf(stack_map, encoding, number_of_dex_registers); - ASSERT_TRUE(dex_register_map.IsDexRegisterLive(0)); - ASSERT_TRUE(dex_register_map.IsDexRegisterLive(1)); - ASSERT_EQ(2u, dex_register_map.GetNumberOfLiveDexRegisters(number_of_dex_registers)); - // The Dex register map contains: - // - one 1-byte live bit mask, and - // - one 1-byte set of location catalog entry indices composed of two 2-bit values. - size_t expected_dex_register_map_size = 1u + 1u; - ASSERT_EQ(expected_dex_register_map_size, dex_register_map.Size()); - - ASSERT_EQ(Kind::kInFpuRegister, dex_register_map.GetLocationKind( - 0, number_of_dex_registers, code_info, encoding)); - ASSERT_EQ(Kind::kInFpuRegisterHigh, dex_register_map.GetLocationKind( - 1, number_of_dex_registers, code_info, encoding)); - ASSERT_EQ(Kind::kInFpuRegister, dex_register_map.GetLocationInternalKind( - 0, number_of_dex_registers, code_info, encoding)); - ASSERT_EQ(Kind::kInFpuRegisterHigh, dex_register_map.GetLocationInternalKind( - 1, number_of_dex_registers, code_info, encoding)); - ASSERT_EQ(3, dex_register_map.GetMachineRegister( - 0, number_of_dex_registers, code_info, encoding)); - ASSERT_EQ(1, dex_register_map.GetMachineRegister( - 1, number_of_dex_registers, code_info, encoding)); - - size_t index0 = dex_register_map.GetLocationCatalogEntryIndex( - 0, number_of_dex_registers, number_of_catalog_entries); - size_t index1 = dex_register_map.GetLocationCatalogEntryIndex( - 1, number_of_dex_registers, number_of_catalog_entries); - ASSERT_EQ(3u, index0); // Shared with second stack map. - ASSERT_EQ(6u, index1); - DexRegisterLocation location0 = location_catalog.GetDexRegisterLocation(index0); - DexRegisterLocation location1 = location_catalog.GetDexRegisterLocation(index1); + StackMap stack_map = code_info.GetStackMapAt(3); + ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(3u))); + ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(256u * kPcAlign))); + ASSERT_EQ(3u, stack_map.GetDexPc()); + ASSERT_EQ(256u * kPcAlign, stack_map.GetNativePcOffset(kRuntimeISA)); + ASSERT_EQ(0xCDu, code_info.GetRegisterMaskOf(stack_map)); + + ASSERT_TRUE(CheckStackMask(code_info, stack_map, sp_mask4)); + + ASSERT_TRUE(stack_map.HasDexRegisterMap()); + DexRegisterMap dex_register_map = code_info.GetDexRegisterMapOf(stack_map); + ASSERT_EQ(number_of_dex_registers, dex_register_map.size()); + ASSERT_TRUE(dex_register_map[0].IsLive()); + ASSERT_TRUE(dex_register_map[1].IsLive()); + ASSERT_EQ(2u, dex_register_map.GetNumberOfLiveDexRegisters()); + + ASSERT_EQ(Kind::kInFpuRegister, dex_register_map[0].GetKind()); + ASSERT_EQ(Kind::kInFpuRegisterHigh, dex_register_map[1].GetKind()); + ASSERT_EQ(3, dex_register_map[0].GetMachineRegister()); + ASSERT_EQ(1, dex_register_map[1].GetMachineRegister()); + + DexRegisterLocation location0 = code_info.GetDexRegisterCatalogEntry(3); + DexRegisterLocation location1 = code_info.GetDexRegisterCatalogEntry(6); ASSERT_EQ(Kind::kInFpuRegister, location0.GetKind()); ASSERT_EQ(Kind::kInFpuRegisterHigh, location1.GetKind()); - ASSERT_EQ(Kind::kInFpuRegister, location0.GetInternalKind()); - ASSERT_EQ(Kind::kInFpuRegisterHigh, location1.GetInternalKind()); ASSERT_EQ(3, location0.GetValue()); ASSERT_EQ(1, location1.GetValue()); - ASSERT_FALSE(stack_map.HasInlineInfo(encoding.stack_map.encoding)); + ASSERT_FALSE(stack_map.HasInlineInfo()); } } TEST(StackMapTest, TestDeduplicateInlineInfoDexRegisterMap) { - ArenaPool pool; + MallocArenaPool pool; ArenaStack arena_stack(&pool); ScopedArenaAllocator allocator(&arena_stack); StackMapStream stream(&allocator, kRuntimeISA); + stream.BeginMethod(32, 0, 0, 2); ArtMethod art_method; ArenaBitVector sp_mask1(&allocator, 0, true); @@ -424,7 +308,7 @@ TEST(StackMapTest, TestDeduplicateInlineInfoDexRegisterMap) { sp_mask1.SetBit(4); const size_t number_of_dex_registers = 2; const size_t number_of_dex_registers_in_inline_info = 2; - stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask1, number_of_dex_registers, 1); + stream.BeginStackMapEntry(0, 64 * kPcAlign, 0x3, &sp_mask1); stream.AddDexRegisterEntry(Kind::kInStack, 0); // Short location. stream.AddDexRegisterEntry(Kind::kConstant, -2); // Large location. stream.BeginInlineInfoEntry(&art_method, 3, number_of_dex_registers_in_inline_info); @@ -433,338 +317,204 @@ TEST(StackMapTest, TestDeduplicateInlineInfoDexRegisterMap) { stream.EndInlineInfoEntry(); stream.EndStackMapEntry(); - size_t size = stream.PrepareForFillIn(); - void* memory = allocator.Alloc(size, kArenaAllocMisc); - MemoryRegion region(memory, size); - stream.FillInCodeInfo(region); + stream.EndMethod(); + ScopedArenaVector<uint8_t> memory = stream.Encode(); - CodeInfo code_info(region); - CodeInfoEncoding encoding = code_info.ExtractEncoding(); - ASSERT_EQ(1u, code_info.GetNumberOfStackMaps(encoding)); + CodeInfo code_info(memory.data()); + ASSERT_EQ(1u, code_info.GetNumberOfStackMaps()); - uint32_t number_of_catalog_entries = code_info.GetNumberOfLocationCatalogEntries(encoding); + uint32_t number_of_catalog_entries = code_info.GetNumberOfLocationCatalogEntries(); ASSERT_EQ(2u, number_of_catalog_entries); - DexRegisterLocationCatalog location_catalog = code_info.GetDexRegisterLocationCatalog(encoding); - // The Dex register location catalog contains: - // - one 1-byte short Dex register locations, and - // - one 5-byte large Dex register location. - const size_t expected_location_catalog_size = 1u + 5u; - ASSERT_EQ(expected_location_catalog_size, location_catalog.Size()); // First stack map. { - StackMap stack_map = code_info.GetStackMapAt(0, encoding); - ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(0, encoding))); - ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(64, encoding))); - ASSERT_EQ(0u, stack_map.GetDexPc(encoding.stack_map.encoding)); - ASSERT_EQ(64u, stack_map.GetNativePcOffset(encoding.stack_map.encoding, kRuntimeISA)); - ASSERT_EQ(0x3u, code_info.GetRegisterMaskOf(encoding, stack_map)); - - ASSERT_TRUE(CheckStackMask(code_info, encoding, stack_map, sp_mask1)); - - ASSERT_TRUE(stack_map.HasDexRegisterMap(encoding.stack_map.encoding)); - DexRegisterMap map(code_info.GetDexRegisterMapOf(stack_map, encoding, number_of_dex_registers)); - ASSERT_TRUE(map.IsDexRegisterLive(0)); - ASSERT_TRUE(map.IsDexRegisterLive(1)); - ASSERT_EQ(2u, map.GetNumberOfLiveDexRegisters(number_of_dex_registers)); - // The Dex register map contains: - // - one 1-byte live bit mask, and - // - one 1-byte set of location catalog entry indices composed of two 2-bit values. - size_t expected_map_size = 1u + 1u; - ASSERT_EQ(expected_map_size, map.Size()); - - ASSERT_EQ(Kind::kInStack, map.GetLocationKind(0, number_of_dex_registers, code_info, encoding)); - ASSERT_EQ(Kind::kConstant, - map.GetLocationKind(1, number_of_dex_registers, code_info, encoding)); - ASSERT_EQ(Kind::kInStack, - map.GetLocationInternalKind(0, number_of_dex_registers, code_info, encoding)); - ASSERT_EQ(Kind::kConstantLargeValue, - map.GetLocationInternalKind(1, number_of_dex_registers, code_info, encoding)); - ASSERT_EQ(0, map.GetStackOffsetInBytes(0, number_of_dex_registers, code_info, encoding)); - ASSERT_EQ(-2, map.GetConstant(1, number_of_dex_registers, code_info, encoding)); - - const size_t index0 = - map.GetLocationCatalogEntryIndex(0, number_of_dex_registers, number_of_catalog_entries); - const size_t index1 = - map.GetLocationCatalogEntryIndex(1, number_of_dex_registers, number_of_catalog_entries); - ASSERT_EQ(0u, index0); - ASSERT_EQ(1u, index1); - DexRegisterLocation location0 = location_catalog.GetDexRegisterLocation(index0); - DexRegisterLocation location1 = location_catalog.GetDexRegisterLocation(index1); + StackMap stack_map = code_info.GetStackMapAt(0); + ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(0))); + ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(64 * kPcAlign))); + ASSERT_EQ(0u, stack_map.GetDexPc()); + ASSERT_EQ(64u * kPcAlign, stack_map.GetNativePcOffset(kRuntimeISA)); + ASSERT_EQ(0x3u, code_info.GetRegisterMaskOf(stack_map)); + + ASSERT_TRUE(CheckStackMask(code_info, stack_map, sp_mask1)); + + ASSERT_TRUE(stack_map.HasDexRegisterMap()); + DexRegisterMap map(code_info.GetDexRegisterMapOf(stack_map)); + ASSERT_EQ(number_of_dex_registers, map.size()); + ASSERT_TRUE(map[0].IsLive()); + ASSERT_TRUE(map[1].IsLive()); + ASSERT_EQ(2u, map.GetNumberOfLiveDexRegisters()); + + ASSERT_EQ(Kind::kInStack, map[0].GetKind()); + ASSERT_EQ(Kind::kConstant, map[1].GetKind()); + ASSERT_EQ(0, map[0].GetStackOffsetInBytes()); + ASSERT_EQ(-2, map[1].GetConstant()); + + DexRegisterLocation location0 = code_info.GetDexRegisterCatalogEntry(0); + DexRegisterLocation location1 = code_info.GetDexRegisterCatalogEntry(1); ASSERT_EQ(Kind::kInStack, location0.GetKind()); ASSERT_EQ(Kind::kConstant, location1.GetKind()); - ASSERT_EQ(Kind::kInStack, location0.GetInternalKind()); - ASSERT_EQ(Kind::kConstantLargeValue, location1.GetInternalKind()); ASSERT_EQ(0, location0.GetValue()); ASSERT_EQ(-2, location1.GetValue()); - - // Test that the inline info dex register map deduplicated to the same offset as the stack map - // one. - ASSERT_TRUE(stack_map.HasInlineInfo(encoding.stack_map.encoding)); - InlineInfo inline_info = code_info.GetInlineInfoOf(stack_map, encoding); - EXPECT_EQ(inline_info.GetDexRegisterMapOffsetAtDepth(encoding.inline_info.encoding, 0), - stack_map.GetDexRegisterMapOffset(encoding.stack_map.encoding)); } } TEST(StackMapTest, TestNonLiveDexRegisters) { - ArenaPool pool; + MallocArenaPool pool; ArenaStack arena_stack(&pool); ScopedArenaAllocator allocator(&arena_stack); StackMapStream stream(&allocator, kRuntimeISA); + stream.BeginMethod(32, 0, 0, 2); ArenaBitVector sp_mask(&allocator, 0, false); uint32_t number_of_dex_registers = 2; - stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0); + stream.BeginStackMapEntry(0, 64 * kPcAlign, 0x3, &sp_mask); stream.AddDexRegisterEntry(Kind::kNone, 0); // No location. stream.AddDexRegisterEntry(Kind::kConstant, -2); // Large location. stream.EndStackMapEntry(); - size_t size = stream.PrepareForFillIn(); - void* memory = allocator.Alloc(size, kArenaAllocMisc); - MemoryRegion region(memory, size); - stream.FillInCodeInfo(region); + stream.EndMethod(); + ScopedArenaVector<uint8_t> memory = stream.Encode(); - CodeInfo code_info(region); - CodeInfoEncoding encoding = code_info.ExtractEncoding(); - ASSERT_EQ(1u, code_info.GetNumberOfStackMaps(encoding)); + CodeInfo code_info(memory.data()); + ASSERT_EQ(1u, code_info.GetNumberOfStackMaps()); - uint32_t number_of_catalog_entries = code_info.GetNumberOfLocationCatalogEntries(encoding); + uint32_t number_of_catalog_entries = code_info.GetNumberOfLocationCatalogEntries(); ASSERT_EQ(1u, number_of_catalog_entries); - DexRegisterLocationCatalog location_catalog = code_info.GetDexRegisterLocationCatalog(encoding); - // The Dex register location catalog contains: - // - one 5-byte large Dex register location. - size_t expected_location_catalog_size = 5u; - ASSERT_EQ(expected_location_catalog_size, location_catalog.Size()); - - StackMap stack_map = code_info.GetStackMapAt(0, encoding); - ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(0, encoding))); - ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(64, encoding))); - ASSERT_EQ(0u, stack_map.GetDexPc(encoding.stack_map.encoding)); - ASSERT_EQ(64u, stack_map.GetNativePcOffset(encoding.stack_map.encoding, kRuntimeISA)); - ASSERT_EQ(0x3u, code_info.GetRegisterMaskOf(encoding, stack_map)); - - ASSERT_TRUE(stack_map.HasDexRegisterMap(encoding.stack_map.encoding)); - DexRegisterMap dex_register_map = - code_info.GetDexRegisterMapOf(stack_map, encoding, number_of_dex_registers); - ASSERT_FALSE(dex_register_map.IsDexRegisterLive(0)); - ASSERT_TRUE(dex_register_map.IsDexRegisterLive(1)); - ASSERT_EQ(1u, dex_register_map.GetNumberOfLiveDexRegisters(number_of_dex_registers)); - // The Dex register map contains: - // - one 1-byte live bit mask. - // No space is allocated for the sole location catalog entry index, as it is useless. - size_t expected_dex_register_map_size = 1u + 0u; - ASSERT_EQ(expected_dex_register_map_size, dex_register_map.Size()); - - ASSERT_EQ(Kind::kNone, dex_register_map.GetLocationKind( - 0, number_of_dex_registers, code_info, encoding)); - ASSERT_EQ(Kind::kConstant, dex_register_map.GetLocationKind( - 1, number_of_dex_registers, code_info, encoding)); - ASSERT_EQ(Kind::kNone, dex_register_map.GetLocationInternalKind( - 0, number_of_dex_registers, code_info, encoding)); - ASSERT_EQ(Kind::kConstantLargeValue, dex_register_map.GetLocationInternalKind( - 1, number_of_dex_registers, code_info, encoding)); - ASSERT_EQ(-2, dex_register_map.GetConstant(1, number_of_dex_registers, code_info, encoding)); - - size_t index0 = dex_register_map.GetLocationCatalogEntryIndex( - 0, number_of_dex_registers, number_of_catalog_entries); - size_t index1 = dex_register_map.GetLocationCatalogEntryIndex( - 1, number_of_dex_registers, number_of_catalog_entries); - ASSERT_EQ(DexRegisterLocationCatalog::kNoLocationEntryIndex, index0); - ASSERT_EQ(0u, index1); - DexRegisterLocation location0 = location_catalog.GetDexRegisterLocation(index0); - DexRegisterLocation location1 = location_catalog.GetDexRegisterLocation(index1); - ASSERT_EQ(Kind::kNone, location0.GetKind()); + + StackMap stack_map = code_info.GetStackMapAt(0); + ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(0))); + ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(64 * kPcAlign))); + ASSERT_EQ(0u, stack_map.GetDexPc()); + ASSERT_EQ(64u * kPcAlign, stack_map.GetNativePcOffset(kRuntimeISA)); + ASSERT_EQ(0x3u, code_info.GetRegisterMaskOf(stack_map)); + + ASSERT_TRUE(stack_map.HasDexRegisterMap()); + DexRegisterMap dex_register_map = code_info.GetDexRegisterMapOf(stack_map); + ASSERT_EQ(number_of_dex_registers, dex_register_map.size()); + ASSERT_FALSE(dex_register_map[0].IsLive()); + ASSERT_TRUE(dex_register_map[1].IsLive()); + ASSERT_EQ(1u, dex_register_map.GetNumberOfLiveDexRegisters()); + + ASSERT_EQ(Kind::kNone, dex_register_map[0].GetKind()); + ASSERT_EQ(Kind::kConstant, dex_register_map[1].GetKind()); + ASSERT_EQ(-2, dex_register_map[1].GetConstant()); + + DexRegisterLocation location1 = code_info.GetDexRegisterCatalogEntry(0); ASSERT_EQ(Kind::kConstant, location1.GetKind()); - ASSERT_EQ(Kind::kNone, location0.GetInternalKind()); - ASSERT_EQ(Kind::kConstantLargeValue, location1.GetInternalKind()); - ASSERT_EQ(0, location0.GetValue()); ASSERT_EQ(-2, location1.GetValue()); - ASSERT_FALSE(stack_map.HasInlineInfo(encoding.stack_map.encoding)); -} - -// Generate a stack map whose dex register offset is -// StackMap::kNoDexRegisterMapSmallEncoding, and ensure we do -// not treat it as kNoDexRegisterMap. -TEST(StackMapTest, DexRegisterMapOffsetOverflow) { - ArenaPool pool; - ArenaStack arena_stack(&pool); - ScopedArenaAllocator allocator(&arena_stack); - StackMapStream stream(&allocator, kRuntimeISA); - - ArenaBitVector sp_mask(&allocator, 0, false); - uint32_t number_of_dex_registers = 1024; - // Create the first stack map (and its Dex register map). - stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0); - uint32_t number_of_dex_live_registers_in_dex_register_map_0 = number_of_dex_registers - 8; - for (uint32_t i = 0; i < number_of_dex_live_registers_in_dex_register_map_0; ++i) { - // Use two different Dex register locations to populate this map, - // as using a single value (in the whole CodeInfo object) would - // make this Dex register mapping data empty (see - // art::DexRegisterMap::SingleEntrySizeInBits). - stream.AddDexRegisterEntry(Kind::kConstant, i % 2); // Short location. - } - stream.EndStackMapEntry(); - // Create the second stack map (and its Dex register map). - stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0); - for (uint32_t i = 0; i < number_of_dex_registers; ++i) { - stream.AddDexRegisterEntry(Kind::kConstant, 0); // Short location. - } - stream.EndStackMapEntry(); - - size_t size = stream.PrepareForFillIn(); - void* memory = allocator.Alloc(size, kArenaAllocMisc); - MemoryRegion region(memory, size); - stream.FillInCodeInfo(region); - - CodeInfo code_info(region); - CodeInfoEncoding encoding = code_info.ExtractEncoding(); - // The location catalog contains two entries (DexRegisterLocation(kConstant, 0) - // and DexRegisterLocation(kConstant, 1)), therefore the location catalog index - // has a size of 1 bit. - uint32_t number_of_catalog_entries = code_info.GetNumberOfLocationCatalogEntries(encoding); - ASSERT_EQ(2u, number_of_catalog_entries); - ASSERT_EQ(1u, DexRegisterMap::SingleEntrySizeInBits(number_of_catalog_entries)); - - // The first Dex register map contains: - // - a live register bit mask for 1024 registers (that is, 128 bytes of - // data); and - // - Dex register mapping information for 1016 1-bit Dex (live) register - // locations (that is, 127 bytes of data). - // Hence it has a size of 255 bytes, and therefore... - ASSERT_EQ(128u, DexRegisterMap::GetLiveBitMaskSize(number_of_dex_registers)); - StackMap stack_map0 = code_info.GetStackMapAt(0, encoding); - DexRegisterMap dex_register_map0 = - code_info.GetDexRegisterMapOf(stack_map0, encoding, number_of_dex_registers); - ASSERT_EQ(127u, dex_register_map0.GetLocationMappingDataSize(number_of_dex_registers, - number_of_catalog_entries)); - ASSERT_EQ(255u, dex_register_map0.Size()); - - StackMap stack_map1 = code_info.GetStackMapAt(1, encoding); - ASSERT_TRUE(stack_map1.HasDexRegisterMap(encoding.stack_map.encoding)); - // ...the offset of the second Dex register map (relative to the - // beginning of the Dex register maps region) is 255 (i.e., - // kNoDexRegisterMapSmallEncoding). - ASSERT_NE(stack_map1.GetDexRegisterMapOffset(encoding.stack_map.encoding), - StackMap::kNoDexRegisterMap); - ASSERT_EQ(stack_map1.GetDexRegisterMapOffset(encoding.stack_map.encoding), 0xFFu); + ASSERT_FALSE(stack_map.HasInlineInfo()); } TEST(StackMapTest, TestShareDexRegisterMap) { - ArenaPool pool; + MallocArenaPool pool; ArenaStack arena_stack(&pool); ScopedArenaAllocator allocator(&arena_stack); StackMapStream stream(&allocator, kRuntimeISA); + stream.BeginMethod(32, 0, 0, 2); ArenaBitVector sp_mask(&allocator, 0, false); uint32_t number_of_dex_registers = 2; // First stack map. - stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0); + stream.BeginStackMapEntry(0, 64 * kPcAlign, 0x3, &sp_mask); stream.AddDexRegisterEntry(Kind::kInRegister, 0); // Short location. stream.AddDexRegisterEntry(Kind::kConstant, -2); // Large location. stream.EndStackMapEntry(); // Second stack map, which should share the same dex register map. - stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0); + stream.BeginStackMapEntry(0, 65 * kPcAlign, 0x3, &sp_mask); stream.AddDexRegisterEntry(Kind::kInRegister, 0); // Short location. stream.AddDexRegisterEntry(Kind::kConstant, -2); // Large location. stream.EndStackMapEntry(); // Third stack map (doesn't share the dex register map). - stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0); + stream.BeginStackMapEntry(0, 66 * kPcAlign, 0x3, &sp_mask); stream.AddDexRegisterEntry(Kind::kInRegister, 2); // Short location. stream.AddDexRegisterEntry(Kind::kConstant, -2); // Large location. stream.EndStackMapEntry(); - size_t size = stream.PrepareForFillIn(); - void* memory = allocator.Alloc(size, kArenaAllocMisc); - MemoryRegion region(memory, size); - stream.FillInCodeInfo(region); + stream.EndMethod(); + ScopedArenaVector<uint8_t> memory = stream.Encode(); - CodeInfo ci(region); - CodeInfoEncoding encoding = ci.ExtractEncoding(); + CodeInfo ci(memory.data()); // Verify first stack map. - StackMap sm0 = ci.GetStackMapAt(0, encoding); - DexRegisterMap dex_registers0 = ci.GetDexRegisterMapOf(sm0, encoding, number_of_dex_registers); - ASSERT_EQ(0, dex_registers0.GetMachineRegister(0, number_of_dex_registers, ci, encoding)); - ASSERT_EQ(-2, dex_registers0.GetConstant(1, number_of_dex_registers, ci, encoding)); + StackMap sm0 = ci.GetStackMapAt(0); + DexRegisterMap dex_registers0 = ci.GetDexRegisterMapOf(sm0); + ASSERT_EQ(number_of_dex_registers, dex_registers0.size()); + ASSERT_EQ(0, dex_registers0[0].GetMachineRegister()); + ASSERT_EQ(-2, dex_registers0[1].GetConstant()); // Verify second stack map. - StackMap sm1 = ci.GetStackMapAt(1, encoding); - DexRegisterMap dex_registers1 = ci.GetDexRegisterMapOf(sm1, encoding, number_of_dex_registers); - ASSERT_EQ(0, dex_registers1.GetMachineRegister(0, number_of_dex_registers, ci, encoding)); - ASSERT_EQ(-2, dex_registers1.GetConstant(1, number_of_dex_registers, ci, encoding)); + StackMap sm1 = ci.GetStackMapAt(1); + DexRegisterMap dex_registers1 = ci.GetDexRegisterMapOf(sm1); + ASSERT_EQ(number_of_dex_registers, dex_registers1.size()); + ASSERT_EQ(0, dex_registers1[0].GetMachineRegister()); + ASSERT_EQ(-2, dex_registers1[1].GetConstant()); // Verify third stack map. - StackMap sm2 = ci.GetStackMapAt(2, encoding); - DexRegisterMap dex_registers2 = ci.GetDexRegisterMapOf(sm2, encoding, number_of_dex_registers); - ASSERT_EQ(2, dex_registers2.GetMachineRegister(0, number_of_dex_registers, ci, encoding)); - ASSERT_EQ(-2, dex_registers2.GetConstant(1, number_of_dex_registers, ci, encoding)); - - // Verify dex register map offsets. - ASSERT_EQ(sm0.GetDexRegisterMapOffset(encoding.stack_map.encoding), - sm1.GetDexRegisterMapOffset(encoding.stack_map.encoding)); - ASSERT_NE(sm0.GetDexRegisterMapOffset(encoding.stack_map.encoding), - sm2.GetDexRegisterMapOffset(encoding.stack_map.encoding)); - ASSERT_NE(sm1.GetDexRegisterMapOffset(encoding.stack_map.encoding), - sm2.GetDexRegisterMapOffset(encoding.stack_map.encoding)); + StackMap sm2 = ci.GetStackMapAt(2); + DexRegisterMap dex_registers2 = ci.GetDexRegisterMapOf(sm2); + ASSERT_EQ(number_of_dex_registers, dex_registers2.size()); + ASSERT_EQ(2, dex_registers2[0].GetMachineRegister()); + ASSERT_EQ(-2, dex_registers2[1].GetConstant()); + + // Verify dex register mask offsets. + ASSERT_FALSE(sm1.HasDexRegisterMaskIndex()); // No delta. + ASSERT_TRUE(sm2.HasDexRegisterMaskIndex()); // Has delta. } TEST(StackMapTest, TestNoDexRegisterMap) { - ArenaPool pool; + MallocArenaPool pool; ArenaStack arena_stack(&pool); ScopedArenaAllocator allocator(&arena_stack); StackMapStream stream(&allocator, kRuntimeISA); + stream.BeginMethod(32, 0, 0, 1); ArenaBitVector sp_mask(&allocator, 0, false); uint32_t number_of_dex_registers = 0; - stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0); + stream.BeginStackMapEntry(0, 64 * kPcAlign, 0x3, &sp_mask); stream.EndStackMapEntry(); number_of_dex_registers = 1; - stream.BeginStackMapEntry(1, 68, 0x4, &sp_mask, number_of_dex_registers, 0); + stream.BeginStackMapEntry(1, 68 * kPcAlign, 0x4, &sp_mask); + stream.AddDexRegisterEntry(Kind::kNone, 0); stream.EndStackMapEntry(); - size_t size = stream.PrepareForFillIn(); - void* memory = allocator.Alloc(size, kArenaAllocMisc); - MemoryRegion region(memory, size); - stream.FillInCodeInfo(region); + stream.EndMethod(); + ScopedArenaVector<uint8_t> memory = stream.Encode(); - CodeInfo code_info(region); - CodeInfoEncoding encoding = code_info.ExtractEncoding(); - ASSERT_EQ(2u, code_info.GetNumberOfStackMaps(encoding)); + CodeInfo code_info(memory.data()); + ASSERT_EQ(2u, code_info.GetNumberOfStackMaps()); - uint32_t number_of_catalog_entries = code_info.GetNumberOfLocationCatalogEntries(encoding); + uint32_t number_of_catalog_entries = code_info.GetNumberOfLocationCatalogEntries(); ASSERT_EQ(0u, number_of_catalog_entries); - DexRegisterLocationCatalog location_catalog = code_info.GetDexRegisterLocationCatalog(encoding); - ASSERT_EQ(0u, location_catalog.Size()); - - StackMap stack_map = code_info.GetStackMapAt(0, encoding); - ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(0, encoding))); - ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(64, encoding))); - ASSERT_EQ(0u, stack_map.GetDexPc(encoding.stack_map.encoding)); - ASSERT_EQ(64u, stack_map.GetNativePcOffset(encoding.stack_map.encoding, kRuntimeISA)); - ASSERT_EQ(0x3u, code_info.GetRegisterMaskOf(encoding, stack_map)); - - ASSERT_FALSE(stack_map.HasDexRegisterMap(encoding.stack_map.encoding)); - ASSERT_FALSE(stack_map.HasInlineInfo(encoding.stack_map.encoding)); - - stack_map = code_info.GetStackMapAt(1, encoding); - ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(1, encoding))); - ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(68, encoding))); - ASSERT_EQ(1u, stack_map.GetDexPc(encoding.stack_map.encoding)); - ASSERT_EQ(68u, stack_map.GetNativePcOffset(encoding.stack_map.encoding, kRuntimeISA)); - ASSERT_EQ(0x4u, code_info.GetRegisterMaskOf(encoding, stack_map)); - - ASSERT_FALSE(stack_map.HasDexRegisterMap(encoding.stack_map.encoding)); - ASSERT_FALSE(stack_map.HasInlineInfo(encoding.stack_map.encoding)); + + StackMap stack_map = code_info.GetStackMapAt(0); + ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(0))); + ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(64 * kPcAlign))); + ASSERT_EQ(0u, stack_map.GetDexPc()); + ASSERT_EQ(64u * kPcAlign, stack_map.GetNativePcOffset(kRuntimeISA)); + ASSERT_EQ(0x3u, code_info.GetRegisterMaskOf(stack_map)); + + ASSERT_FALSE(stack_map.HasDexRegisterMap()); + ASSERT_FALSE(stack_map.HasInlineInfo()); + + stack_map = code_info.GetStackMapAt(1); + ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(1))); + ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(68 * kPcAlign))); + ASSERT_EQ(1u, stack_map.GetDexPc()); + ASSERT_EQ(68u * kPcAlign, stack_map.GetNativePcOffset(kRuntimeISA)); + ASSERT_EQ(0x4u, code_info.GetRegisterMaskOf(stack_map)); + + ASSERT_TRUE(stack_map.HasDexRegisterMap()); + ASSERT_FALSE(stack_map.HasInlineInfo()); } TEST(StackMapTest, InlineTest) { - ArenaPool pool; + MallocArenaPool pool; ArenaStack arena_stack(&pool); ScopedArenaAllocator allocator(&arena_stack); StackMapStream stream(&allocator, kRuntimeISA); + stream.BeginMethod(32, 0, 0, 2); ArtMethod art_method; ArenaBitVector sp_mask1(&allocator, 0, true); @@ -772,7 +522,7 @@ TEST(StackMapTest, InlineTest) { sp_mask1.SetBit(4); // First stack map. - stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask1, 2, 2); + stream.BeginStackMapEntry(0, 10 * kPcAlign, 0x3, &sp_mask1); stream.AddDexRegisterEntry(Kind::kInStack, 0); stream.AddDexRegisterEntry(Kind::kConstant, 4); @@ -788,7 +538,7 @@ TEST(StackMapTest, InlineTest) { stream.EndStackMapEntry(); // Second stack map. - stream.BeginStackMapEntry(2, 22, 0x3, &sp_mask1, 2, 3); + stream.BeginStackMapEntry(2, 22 * kPcAlign, 0x3, &sp_mask1); stream.AddDexRegisterEntry(Kind::kInStack, 56); stream.AddDexRegisterEntry(Kind::kConstant, 0); @@ -806,13 +556,13 @@ TEST(StackMapTest, InlineTest) { stream.EndStackMapEntry(); // Third stack map. - stream.BeginStackMapEntry(4, 56, 0x3, &sp_mask1, 2, 0); + stream.BeginStackMapEntry(4, 56 * kPcAlign, 0x3, &sp_mask1); stream.AddDexRegisterEntry(Kind::kNone, 0); stream.AddDexRegisterEntry(Kind::kConstant, 4); stream.EndStackMapEntry(); // Fourth stack map. - stream.BeginStackMapEntry(6, 78, 0x3, &sp_mask1, 2, 3); + stream.BeginStackMapEntry(6, 78 * kPcAlign, 0x3, &sp_mask1); stream.AddDexRegisterEntry(Kind::kInStack, 56); stream.AddDexRegisterEntry(Kind::kConstant, 0); @@ -828,204 +578,202 @@ TEST(StackMapTest, InlineTest) { stream.EndStackMapEntry(); - size_t size = stream.PrepareForFillIn(); - void* memory = allocator.Alloc(size, kArenaAllocMisc); - MemoryRegion region(memory, size); - stream.FillInCodeInfo(region); + stream.EndMethod(); + ScopedArenaVector<uint8_t> memory = stream.Encode(); - CodeInfo ci(region); - CodeInfoEncoding encoding = ci.ExtractEncoding(); + CodeInfo ci(memory.data()); { // Verify first stack map. - StackMap sm0 = ci.GetStackMapAt(0, encoding); - - DexRegisterMap dex_registers0 = ci.GetDexRegisterMapOf(sm0, encoding, 2); - ASSERT_EQ(0, dex_registers0.GetStackOffsetInBytes(0, 2, ci, encoding)); - ASSERT_EQ(4, dex_registers0.GetConstant(1, 2, ci, encoding)); - - InlineInfo if0 = ci.GetInlineInfoOf(sm0, encoding); - ASSERT_EQ(2u, if0.GetDepth(encoding.inline_info.encoding)); - ASSERT_EQ(2u, if0.GetDexPcAtDepth(encoding.inline_info.encoding, 0)); - ASSERT_TRUE(if0.EncodesArtMethodAtDepth(encoding.inline_info.encoding, 0)); - ASSERT_EQ(3u, if0.GetDexPcAtDepth(encoding.inline_info.encoding, 1)); - ASSERT_TRUE(if0.EncodesArtMethodAtDepth(encoding.inline_info.encoding, 1)); - - DexRegisterMap dex_registers1 = ci.GetDexRegisterMapAtDepth(0, if0, encoding, 1); - ASSERT_EQ(8, dex_registers1.GetStackOffsetInBytes(0, 1, ci, encoding)); - - DexRegisterMap dex_registers2 = ci.GetDexRegisterMapAtDepth(1, if0, encoding, 3); - ASSERT_EQ(16, dex_registers2.GetStackOffsetInBytes(0, 3, ci, encoding)); - ASSERT_EQ(20, dex_registers2.GetConstant(1, 3, ci, encoding)); - ASSERT_EQ(15, dex_registers2.GetMachineRegister(2, 3, ci, encoding)); + StackMap sm0 = ci.GetStackMapAt(0); + + DexRegisterMap dex_registers0 = ci.GetDexRegisterMapOf(sm0); + ASSERT_EQ(2u, dex_registers0.size()); + ASSERT_EQ(0, dex_registers0[0].GetStackOffsetInBytes()); + ASSERT_EQ(4, dex_registers0[1].GetConstant()); + + auto inline_infos = ci.GetInlineInfosOf(sm0); + ASSERT_EQ(2u, inline_infos.size()); + ASSERT_EQ(2u, inline_infos[0].GetDexPc()); + ASSERT_TRUE(inline_infos[0].EncodesArtMethod()); + ASSERT_EQ(3u, inline_infos[1].GetDexPc()); + ASSERT_TRUE(inline_infos[1].EncodesArtMethod()); + + DexRegisterMap dex_registers1 = ci.GetInlineDexRegisterMapOf(sm0, inline_infos[0]); + ASSERT_EQ(1u, dex_registers1.size()); + ASSERT_EQ(8, dex_registers1[0].GetStackOffsetInBytes()); + + DexRegisterMap dex_registers2 = ci.GetInlineDexRegisterMapOf(sm0, inline_infos[1]); + ASSERT_EQ(3u, dex_registers2.size()); + ASSERT_EQ(16, dex_registers2[0].GetStackOffsetInBytes()); + ASSERT_EQ(20, dex_registers2[1].GetConstant()); + ASSERT_EQ(15, dex_registers2[2].GetMachineRegister()); } { // Verify second stack map. - StackMap sm1 = ci.GetStackMapAt(1, encoding); - - DexRegisterMap dex_registers0 = ci.GetDexRegisterMapOf(sm1, encoding, 2); - ASSERT_EQ(56, dex_registers0.GetStackOffsetInBytes(0, 2, ci, encoding)); - ASSERT_EQ(0, dex_registers0.GetConstant(1, 2, ci, encoding)); - - InlineInfo if1 = ci.GetInlineInfoOf(sm1, encoding); - ASSERT_EQ(3u, if1.GetDepth(encoding.inline_info.encoding)); - ASSERT_EQ(2u, if1.GetDexPcAtDepth(encoding.inline_info.encoding, 0)); - ASSERT_TRUE(if1.EncodesArtMethodAtDepth(encoding.inline_info.encoding, 0)); - ASSERT_EQ(3u, if1.GetDexPcAtDepth(encoding.inline_info.encoding, 1)); - ASSERT_TRUE(if1.EncodesArtMethodAtDepth(encoding.inline_info.encoding, 1)); - ASSERT_EQ(5u, if1.GetDexPcAtDepth(encoding.inline_info.encoding, 2)); - ASSERT_TRUE(if1.EncodesArtMethodAtDepth(encoding.inline_info.encoding, 2)); - - DexRegisterMap dex_registers1 = ci.GetDexRegisterMapAtDepth(0, if1, encoding, 1); - ASSERT_EQ(12, dex_registers1.GetStackOffsetInBytes(0, 1, ci, encoding)); - - DexRegisterMap dex_registers2 = ci.GetDexRegisterMapAtDepth(1, if1, encoding, 3); - ASSERT_EQ(80, dex_registers2.GetStackOffsetInBytes(0, 3, ci, encoding)); - ASSERT_EQ(10, dex_registers2.GetConstant(1, 3, ci, encoding)); - ASSERT_EQ(5, dex_registers2.GetMachineRegister(2, 3, ci, encoding)); - - ASSERT_FALSE(if1.HasDexRegisterMapAtDepth(encoding.inline_info.encoding, 2)); + StackMap sm1 = ci.GetStackMapAt(1); + + DexRegisterMap dex_registers0 = ci.GetDexRegisterMapOf(sm1); + ASSERT_EQ(2u, dex_registers0.size()); + ASSERT_EQ(56, dex_registers0[0].GetStackOffsetInBytes()); + ASSERT_EQ(0, dex_registers0[1].GetConstant()); + + auto inline_infos = ci.GetInlineInfosOf(sm1); + ASSERT_EQ(3u, inline_infos.size()); + ASSERT_EQ(2u, inline_infos[0].GetDexPc()); + ASSERT_TRUE(inline_infos[0].EncodesArtMethod()); + ASSERT_EQ(3u, inline_infos[1].GetDexPc()); + ASSERT_TRUE(inline_infos[1].EncodesArtMethod()); + ASSERT_EQ(5u, inline_infos[2].GetDexPc()); + ASSERT_TRUE(inline_infos[2].EncodesArtMethod()); + + DexRegisterMap dex_registers1 = ci.GetInlineDexRegisterMapOf(sm1, inline_infos[0]); + ASSERT_EQ(1u, dex_registers1.size()); + ASSERT_EQ(12, dex_registers1[0].GetStackOffsetInBytes()); + + DexRegisterMap dex_registers2 = ci.GetInlineDexRegisterMapOf(sm1, inline_infos[1]); + ASSERT_EQ(3u, dex_registers2.size()); + ASSERT_EQ(80, dex_registers2[0].GetStackOffsetInBytes()); + ASSERT_EQ(10, dex_registers2[1].GetConstant()); + ASSERT_EQ(5, dex_registers2[2].GetMachineRegister()); } { // Verify third stack map. - StackMap sm2 = ci.GetStackMapAt(2, encoding); + StackMap sm2 = ci.GetStackMapAt(2); - DexRegisterMap dex_registers0 = ci.GetDexRegisterMapOf(sm2, encoding, 2); - ASSERT_FALSE(dex_registers0.IsDexRegisterLive(0)); - ASSERT_EQ(4, dex_registers0.GetConstant(1, 2, ci, encoding)); - ASSERT_FALSE(sm2.HasInlineInfo(encoding.stack_map.encoding)); + DexRegisterMap dex_registers0 = ci.GetDexRegisterMapOf(sm2); + ASSERT_EQ(2u, dex_registers0.size()); + ASSERT_FALSE(dex_registers0[0].IsLive()); + ASSERT_EQ(4, dex_registers0[1].GetConstant()); + ASSERT_FALSE(sm2.HasInlineInfo()); } { // Verify fourth stack map. - StackMap sm3 = ci.GetStackMapAt(3, encoding); - - DexRegisterMap dex_registers0 = ci.GetDexRegisterMapOf(sm3, encoding, 2); - ASSERT_EQ(56, dex_registers0.GetStackOffsetInBytes(0, 2, ci, encoding)); - ASSERT_EQ(0, dex_registers0.GetConstant(1, 2, ci, encoding)); - - InlineInfo if2 = ci.GetInlineInfoOf(sm3, encoding); - ASSERT_EQ(3u, if2.GetDepth(encoding.inline_info.encoding)); - ASSERT_EQ(2u, if2.GetDexPcAtDepth(encoding.inline_info.encoding, 0)); - ASSERT_TRUE(if2.EncodesArtMethodAtDepth(encoding.inline_info.encoding, 0)); - ASSERT_EQ(5u, if2.GetDexPcAtDepth(encoding.inline_info.encoding, 1)); - ASSERT_TRUE(if2.EncodesArtMethodAtDepth(encoding.inline_info.encoding, 1)); - ASSERT_EQ(10u, if2.GetDexPcAtDepth(encoding.inline_info.encoding, 2)); - ASSERT_TRUE(if2.EncodesArtMethodAtDepth(encoding.inline_info.encoding, 2)); - - ASSERT_FALSE(if2.HasDexRegisterMapAtDepth(encoding.inline_info.encoding, 0)); - - DexRegisterMap dex_registers1 = ci.GetDexRegisterMapAtDepth(1, if2, encoding, 1); - ASSERT_EQ(2, dex_registers1.GetMachineRegister(0, 1, ci, encoding)); - - DexRegisterMap dex_registers2 = ci.GetDexRegisterMapAtDepth(2, if2, encoding, 2); - ASSERT_FALSE(dex_registers2.IsDexRegisterLive(0)); - ASSERT_EQ(3, dex_registers2.GetMachineRegister(1, 2, ci, encoding)); + StackMap sm3 = ci.GetStackMapAt(3); + + DexRegisterMap dex_registers0 = ci.GetDexRegisterMapOf(sm3); + ASSERT_EQ(2u, dex_registers0.size()); + ASSERT_EQ(56, dex_registers0[0].GetStackOffsetInBytes()); + ASSERT_EQ(0, dex_registers0[1].GetConstant()); + + auto inline_infos = ci.GetInlineInfosOf(sm3); + ASSERT_EQ(3u, inline_infos.size()); + ASSERT_EQ(2u, inline_infos[0].GetDexPc()); + ASSERT_TRUE(inline_infos[0].EncodesArtMethod()); + ASSERT_EQ(5u, inline_infos[1].GetDexPc()); + ASSERT_TRUE(inline_infos[1].EncodesArtMethod()); + ASSERT_EQ(10u, inline_infos[2].GetDexPc()); + ASSERT_TRUE(inline_infos[2].EncodesArtMethod()); + + DexRegisterMap dex_registers1 = ci.GetInlineDexRegisterMapOf(sm3, inline_infos[1]); + ASSERT_EQ(1u, dex_registers1.size()); + ASSERT_EQ(2, dex_registers1[0].GetMachineRegister()); + + DexRegisterMap dex_registers2 = ci.GetInlineDexRegisterMapOf(sm3, inline_infos[2]); + ASSERT_EQ(2u, dex_registers2.size()); + ASSERT_FALSE(dex_registers2[0].IsLive()); + ASSERT_EQ(3, dex_registers2[1].GetMachineRegister()); } } -TEST(StackMapTest, CodeOffsetTest) { - // Test minimum alignments, encoding, and decoding. - CodeOffset offset_thumb2 = - CodeOffset::FromOffset(kThumb2InstructionAlignment, InstructionSet::kThumb2); - CodeOffset offset_arm64 = - CodeOffset::FromOffset(kArm64InstructionAlignment, InstructionSet::kArm64); - CodeOffset offset_x86 = - CodeOffset::FromOffset(kX86InstructionAlignment, InstructionSet::kX86); - CodeOffset offset_x86_64 = - CodeOffset::FromOffset(kX86_64InstructionAlignment, InstructionSet::kX86_64); - CodeOffset offset_mips = - CodeOffset::FromOffset(kMipsInstructionAlignment, InstructionSet::kMips); - CodeOffset offset_mips64 = - CodeOffset::FromOffset(kMips64InstructionAlignment, InstructionSet::kMips64); - EXPECT_EQ(offset_thumb2.Uint32Value(InstructionSet::kThumb2), kThumb2InstructionAlignment); - EXPECT_EQ(offset_arm64.Uint32Value(InstructionSet::kArm64), kArm64InstructionAlignment); - EXPECT_EQ(offset_x86.Uint32Value(InstructionSet::kX86), kX86InstructionAlignment); - EXPECT_EQ(offset_x86_64.Uint32Value(InstructionSet::kX86_64), kX86_64InstructionAlignment); - EXPECT_EQ(offset_mips.Uint32Value(InstructionSet::kMips), kMipsInstructionAlignment); - EXPECT_EQ(offset_mips64.Uint32Value(InstructionSet::kMips64), kMips64InstructionAlignment); +TEST(StackMapTest, PackedNativePcTest) { + // Test minimum alignments, and decoding. + uint32_t packed_thumb2 = + StackMap::PackNativePc(kThumb2InstructionAlignment, InstructionSet::kThumb2); + uint32_t packed_arm64 = + StackMap::PackNativePc(kArm64InstructionAlignment, InstructionSet::kArm64); + uint32_t packed_x86 = + StackMap::PackNativePc(kX86InstructionAlignment, InstructionSet::kX86); + uint32_t packed_x86_64 = + StackMap::PackNativePc(kX86_64InstructionAlignment, InstructionSet::kX86_64); + uint32_t packed_mips = + StackMap::PackNativePc(kMipsInstructionAlignment, InstructionSet::kMips); + uint32_t packed_mips64 = + StackMap::PackNativePc(kMips64InstructionAlignment, InstructionSet::kMips64); + EXPECT_EQ(StackMap::UnpackNativePc(packed_thumb2, InstructionSet::kThumb2), + kThumb2InstructionAlignment); + EXPECT_EQ(StackMap::UnpackNativePc(packed_arm64, InstructionSet::kArm64), + kArm64InstructionAlignment); + EXPECT_EQ(StackMap::UnpackNativePc(packed_x86, InstructionSet::kX86), + kX86InstructionAlignment); + EXPECT_EQ(StackMap::UnpackNativePc(packed_x86_64, InstructionSet::kX86_64), + kX86_64InstructionAlignment); + EXPECT_EQ(StackMap::UnpackNativePc(packed_mips, InstructionSet::kMips), + kMipsInstructionAlignment); + EXPECT_EQ(StackMap::UnpackNativePc(packed_mips64, InstructionSet::kMips64), + kMips64InstructionAlignment); } TEST(StackMapTest, TestDeduplicateStackMask) { - ArenaPool pool; + MallocArenaPool pool; ArenaStack arena_stack(&pool); ScopedArenaAllocator allocator(&arena_stack); StackMapStream stream(&allocator, kRuntimeISA); + stream.BeginMethod(32, 0, 0, 0); ArenaBitVector sp_mask(&allocator, 0, true); sp_mask.SetBit(1); sp_mask.SetBit(4); - stream.BeginStackMapEntry(0, 4, 0x3, &sp_mask, 0, 0); + stream.BeginStackMapEntry(0, 4 * kPcAlign, 0x3, &sp_mask); stream.EndStackMapEntry(); - stream.BeginStackMapEntry(0, 8, 0x3, &sp_mask, 0, 0); + stream.BeginStackMapEntry(0, 8 * kPcAlign, 0x3, &sp_mask); stream.EndStackMapEntry(); - size_t size = stream.PrepareForFillIn(); - void* memory = allocator.Alloc(size, kArenaAllocMisc); - MemoryRegion region(memory, size); - stream.FillInCodeInfo(region); + stream.EndMethod(); + ScopedArenaVector<uint8_t> memory = stream.Encode(); - CodeInfo code_info(region); - CodeInfoEncoding encoding = code_info.ExtractEncoding(); - ASSERT_EQ(2u, code_info.GetNumberOfStackMaps(encoding)); + CodeInfo code_info(memory.data()); + ASSERT_EQ(2u, code_info.GetNumberOfStackMaps()); - StackMap stack_map1 = code_info.GetStackMapForNativePcOffset(4, encoding); - StackMap stack_map2 = code_info.GetStackMapForNativePcOffset(8, encoding); - EXPECT_EQ(stack_map1.GetStackMaskIndex(encoding.stack_map.encoding), - stack_map2.GetStackMaskIndex(encoding.stack_map.encoding)); + StackMap stack_map1 = code_info.GetStackMapForNativePcOffset(4 * kPcAlign); + StackMap stack_map2 = code_info.GetStackMapForNativePcOffset(8 * kPcAlign); + EXPECT_EQ(stack_map1.GetStackMaskIndex(), + stack_map2.GetStackMaskIndex()); } -TEST(StackMapTest, TestInvokeInfo) { - ArenaPool pool; +TEST(StackMapTest, TestDedupeBitTables) { + MallocArenaPool pool; ArenaStack arena_stack(&pool); ScopedArenaAllocator allocator(&arena_stack); StackMapStream stream(&allocator, kRuntimeISA); + stream.BeginMethod(32, 0, 0, 2); - ArenaBitVector sp_mask(&allocator, 0, true); - sp_mask.SetBit(1); - stream.BeginStackMapEntry(0, 4, 0x3, &sp_mask, 0, 0); - stream.AddInvoke(kSuper, 1); - stream.EndStackMapEntry(); - stream.BeginStackMapEntry(0, 8, 0x3, &sp_mask, 0, 0); - stream.AddInvoke(kStatic, 3); - stream.EndStackMapEntry(); - stream.BeginStackMapEntry(0, 16, 0x3, &sp_mask, 0, 0); - stream.AddInvoke(kDirect, 65535); + stream.BeginStackMapEntry(0, 64 * kPcAlign); + stream.AddDexRegisterEntry(Kind::kInStack, 0); + stream.AddDexRegisterEntry(Kind::kConstant, -2); stream.EndStackMapEntry(); - const size_t code_info_size = stream.PrepareForFillIn(); - MemoryRegion code_info_region(allocator.Alloc(code_info_size, kArenaAllocMisc), code_info_size); - stream.FillInCodeInfo(code_info_region); - - const size_t method_info_size = stream.ComputeMethodInfoSize(); - MemoryRegion method_info_region(allocator.Alloc(method_info_size, kArenaAllocMisc), - method_info_size); - stream.FillInMethodInfo(method_info_region); - - CodeInfo code_info(code_info_region); - MethodInfo method_info(method_info_region.begin()); - CodeInfoEncoding encoding = code_info.ExtractEncoding(); - ASSERT_EQ(3u, code_info.GetNumberOfStackMaps(encoding)); - - InvokeInfo invoke1(code_info.GetInvokeInfoForNativePcOffset(4, encoding)); - InvokeInfo invoke2(code_info.GetInvokeInfoForNativePcOffset(8, encoding)); - InvokeInfo invoke3(code_info.GetInvokeInfoForNativePcOffset(16, encoding)); - InvokeInfo invoke_invalid(code_info.GetInvokeInfoForNativePcOffset(12, encoding)); - EXPECT_FALSE(invoke_invalid.IsValid()); // No entry for that index. - EXPECT_TRUE(invoke1.IsValid()); - EXPECT_TRUE(invoke2.IsValid()); - EXPECT_TRUE(invoke3.IsValid()); - EXPECT_EQ(invoke1.GetInvokeType(encoding.invoke_info.encoding), kSuper); - EXPECT_EQ(invoke1.GetMethodIndex(encoding.invoke_info.encoding, method_info), 1u); - EXPECT_EQ(invoke1.GetNativePcOffset(encoding.invoke_info.encoding, kRuntimeISA), 4u); - EXPECT_EQ(invoke2.GetInvokeType(encoding.invoke_info.encoding), kStatic); - EXPECT_EQ(invoke2.GetMethodIndex(encoding.invoke_info.encoding, method_info), 3u); - EXPECT_EQ(invoke2.GetNativePcOffset(encoding.invoke_info.encoding, kRuntimeISA), 8u); - EXPECT_EQ(invoke3.GetInvokeType(encoding.invoke_info.encoding), kDirect); - EXPECT_EQ(invoke3.GetMethodIndex(encoding.invoke_info.encoding, method_info), 65535u); - EXPECT_EQ(invoke3.GetNativePcOffset(encoding.invoke_info.encoding, kRuntimeISA), 16u); + stream.EndMethod(); + ScopedArenaVector<uint8_t> memory = stream.Encode(); + + std::vector<uint8_t> out; + CodeInfo::Deduper deduper(&out); + size_t deduped1 = deduper.Dedupe(memory.data()); + size_t deduped2 = deduper.Dedupe(memory.data()); + + for (size_t deduped : { deduped1, deduped2 }) { + CodeInfo code_info(out.data() + deduped); + ASSERT_EQ(1u, code_info.GetNumberOfStackMaps()); + + StackMap stack_map = code_info.GetStackMapAt(0); + ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(0))); + ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(64 * kPcAlign))); + ASSERT_EQ(0u, stack_map.GetDexPc()); + ASSERT_EQ(64u * kPcAlign, stack_map.GetNativePcOffset(kRuntimeISA)); + + ASSERT_TRUE(stack_map.HasDexRegisterMap()); + DexRegisterMap dex_register_map = code_info.GetDexRegisterMapOf(stack_map); + + ASSERT_EQ(Kind::kInStack, dex_register_map[0].GetKind()); + ASSERT_EQ(Kind::kConstant, dex_register_map[1].GetKind()); + ASSERT_EQ(0, dex_register_map[0].GetStackOffsetInBytes()); + ASSERT_EQ(-2, dex_register_map[1].GetConstant()); + } + + ASSERT_GT(memory.size() * 2, out.size()); } } // namespace art diff --git a/compiler/optimizing/superblock_cloner.cc b/compiler/optimizing/superblock_cloner.cc index a7c23bef7e..dc433feb51 100644 --- a/compiler/optimizing/superblock_cloner.cc +++ b/compiler/optimizing/superblock_cloner.cc @@ -17,6 +17,7 @@ #include "superblock_cloner.h" #include "common_dominator.h" +#include "induction_var_range.h" #include "graph_checker.h" #include <iostream> @@ -70,20 +71,18 @@ static bool ArePhiInputsTheSame(const HPhi* phi) { return true; } -// Returns a common predecessor of loop1 and loop2 in the loop tree or nullptr if it is the whole -// graph. -static HLoopInformation* FindCommonLoop(HLoopInformation* loop1, HLoopInformation* loop2) { - if (loop1 != nullptr || loop2 != nullptr) { - return nullptr; +// Returns whether two Edge sets are equal (ArenaHashSet doesn't have "Equal" method). +static bool EdgeHashSetsEqual(const HEdgeSet* set1, const HEdgeSet* set2) { + if (set1->size() != set2->size()) { + return false; } - if (loop1->IsIn(*loop2)) { - return loop2; - } else if (loop2->IsIn(*loop1)) { - return loop1; + for (auto e : *set1) { + if (set2->find(e) == set2->end()) { + return false; + } } - HBasicBlock* block = CommonDominator::ForPair(loop1->GetHeader(), loop2->GetHeader()); - return block->GetLoopInformation(); + return true; } // Calls HGraph::OrderLoopHeaderPredecessors for each loop in the graph. @@ -95,6 +94,21 @@ static void OrderLoopsHeadersPredecessors(HGraph* graph) { } } +// Performs DFS on the subgraph (specified by 'bb_set') starting from the specified block; while +// traversing the function removes basic blocks from the bb_set (instead of traditional DFS +// 'marking'). So what is left in the 'bb_set' after the traversal is not reachable from the start +// block. +static void TraverseSubgraphForConnectivity(HBasicBlock* block, HBasicBlockSet* bb_set) { + DCHECK(bb_set->IsBitSet(block->GetBlockId())); + bb_set->ClearBit(block->GetBlockId()); + + for (HBasicBlock* succ : block->GetSuccessors()) { + if (bb_set->IsBitSet(succ->GetBlockId())) { + TraverseSubgraphForConnectivity(succ, bb_set); + } + } +} + // // Helpers for CloneBasicBlock. // @@ -268,7 +282,6 @@ void SuperblockCloner::FindBackEdgesLocal(HBasicBlock* entry_block, ArenaBitVect } void SuperblockCloner::RecalculateBackEdgesInfo(ArenaBitVector* outer_loop_bb_set) { - // TODO: DCHECK that after the transformation the graph is connected. HBasicBlock* block_entry = nullptr; if (outer_loop_ == nullptr) { @@ -397,7 +410,7 @@ void SuperblockCloner::ResolvePhi(HPhi* phi) { // Main algorithm methods. // -void SuperblockCloner::SearchForSubgraphExits(ArenaVector<HBasicBlock*>* exits) { +void SuperblockCloner::SearchForSubgraphExits(ArenaVector<HBasicBlock*>* exits) const { DCHECK(exits->empty()); for (uint32_t block_id : orig_bb_set_.Indexes()) { HBasicBlock* block = GetBlockById(block_id); @@ -424,6 +437,11 @@ void SuperblockCloner::FindAndSetLocalAreaForAdjustments() { outer_loop_ = nullptr; break; } + if (outer_loop_ == nullptr) { + // We should not use the initial outer_loop_ value 'nullptr' when finding the most outer + // common loop. + outer_loop_ = loop_exit_loop_info; + } outer_loop_ = FindCommonLoop(outer_loop_, loop_exit_loop_info); } @@ -455,8 +473,8 @@ void SuperblockCloner::RemapEdgesSuccessors() { continue; } - auto orig_redir = remap_orig_internal_->Find(HEdge(orig_block_id, orig_succ_id)); - auto copy_redir = remap_copy_internal_->Find(HEdge(orig_block_id, orig_succ_id)); + auto orig_redir = remap_orig_internal_->find(HEdge(orig_block_id, orig_succ_id)); + auto copy_redir = remap_copy_internal_->find(HEdge(orig_block_id, orig_succ_id)); // Due to construction all successors of copied block were set to original. if (copy_redir != remap_copy_internal_->end()) { @@ -504,9 +522,152 @@ void SuperblockCloner::ResolveDataFlow() { } // +// Helpers for live-outs processing and Subgraph-closed SSA. +// + +bool SuperblockCloner::CollectLiveOutsAndCheckClonable(HInstructionMap* live_outs) const { + DCHECK(live_outs->empty()); + for (uint32_t idx : orig_bb_set_.Indexes()) { + HBasicBlock* block = GetBlockById(idx); + + for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { + HInstruction* instr = it.Current(); + DCHECK(instr->IsClonable()); + + if (IsUsedOutsideRegion(instr, orig_bb_set_)) { + live_outs->FindOrAdd(instr, instr); + } + } + + for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { + HInstruction* instr = it.Current(); + if (!instr->IsClonable()) { + return false; + } + + if (IsUsedOutsideRegion(instr, orig_bb_set_)) { + // TODO: Investigate why HNewInstance, HCheckCast has a requirement for the input. + if (instr->IsLoadClass()) { + return false; + } + live_outs->FindOrAdd(instr, instr); + } + } + } + return true; +} + +void SuperblockCloner::UpdateInductionRangeInfoOf( + HInstruction* user, HInstruction* old_instruction, HInstruction* replacement) { + if (induction_range_ != nullptr) { + induction_range_->Replace(user, old_instruction, replacement); + } +} + +void SuperblockCloner::ConstructSubgraphClosedSSA() { + if (live_outs_.empty()) { + return; + } + + ArenaVector<HBasicBlock*> exits(arena_->Adapter(kArenaAllocSuperblockCloner)); + SearchForSubgraphExits(&exits); + if (exits.empty()) { + DCHECK(live_outs_.empty()); + return; + } + + DCHECK_EQ(exits.size(), 1u); + HBasicBlock* exit_block = exits[0]; + // There should be no critical edges. + DCHECK_EQ(exit_block->GetPredecessors().size(), 1u); + DCHECK(exit_block->GetPhis().IsEmpty()); + + // For each live-out value insert a phi into the loop exit and replace all the value's uses + // external to the loop with this phi. The phi will have the original value as its only input; + // after copying is done FixSubgraphClosedSSAAfterCloning will add a corresponding copy of the + // original value as the second input thus merging data flow from the original and copy parts of + // the subgraph. Also update the record in the live_outs_ map from (value, value) to + // (value, new_phi). + for (auto live_out_it = live_outs_.begin(); live_out_it != live_outs_.end(); ++live_out_it) { + HInstruction* value = live_out_it->first; + HPhi* phi = new (arena_) HPhi(arena_, kNoRegNumber, 0, value->GetType()); + + if (value->GetType() == DataType::Type::kReference) { + phi->SetReferenceTypeInfo(value->GetReferenceTypeInfo()); + } + + exit_block->AddPhi(phi); + live_out_it->second = phi; + + const HUseList<HInstruction*>& uses = value->GetUses(); + for (auto it = uses.begin(), end = uses.end(); it != end; /* ++it below */) { + HInstruction* user = it->GetUser(); + size_t index = it->GetIndex(); + // Increment `it` now because `*it` may disappear thanks to user->ReplaceInput(). + ++it; + if (!IsInOrigBBSet(user->GetBlock())) { + user->ReplaceInput(phi, index); + UpdateInductionRangeInfoOf(user, value, phi); + } + } + + const HUseList<HEnvironment*>& env_uses = value->GetEnvUses(); + for (auto it = env_uses.begin(), e = env_uses.end(); it != e; /* ++it below */) { + HEnvironment* env = it->GetUser(); + size_t index = it->GetIndex(); + ++it; + if (!IsInOrigBBSet(env->GetHolder()->GetBlock())) { + env->ReplaceInput(phi, index); + } + } + + phi->AddInput(value); + } +} + +void SuperblockCloner::FixSubgraphClosedSSAAfterCloning() { + for (auto it : live_outs_) { + DCHECK(it.first != it.second); + HInstruction* orig_value = it.first; + HPhi* phi = it.second->AsPhi(); + HInstruction* copy_value = GetInstrCopy(orig_value); + // Copy edges are inserted after the original so we can just add new input to the phi. + phi->AddInput(copy_value); + } +} + +// // Debug and logging methods. // +// Debug function to dump graph' BasicBlocks info. +void DumpBB(HGraph* graph) { + for (HBasicBlock* bb : graph->GetBlocks()) { + if (bb == nullptr) { + continue; + } + std::cout << bb->GetBlockId(); + std::cout << " <- "; + for (HBasicBlock* pred : bb->GetPredecessors()) { + std::cout << pred->GetBlockId() << " "; + } + std::cout << " -> "; + for (HBasicBlock* succ : bb->GetSuccessors()) { + std::cout << succ->GetBlockId() << " "; + } + + if (bb->GetDominator()) { + std::cout << " dom " << bb->GetDominator()->GetBlockId(); + } + + if (bb->GetLoopInformation()) { + std::cout << "\tloop: " << bb->GetLoopInformation()->GetHeader()->GetBlockId(); + } + + std::cout << std::endl; + } +} + void SuperblockCloner::CheckInstructionInputsRemapping(HInstruction* orig_instr) { DCHECK(!orig_instr->IsPhi()); HInstruction* copy_instr = GetInstrCopy(orig_instr); @@ -542,6 +703,81 @@ void SuperblockCloner::CheckInstructionInputsRemapping(HInstruction* orig_instr) } } +bool SuperblockCloner::CheckRemappingInfoIsValid() { + for (HEdge edge : *remap_orig_internal_) { + if (!IsEdgeValid(edge, graph_) || + !IsInOrigBBSet(edge.GetFrom()) || + !IsInOrigBBSet(edge.GetTo())) { + return false; + } + } + + for (auto edge : *remap_copy_internal_) { + if (!IsEdgeValid(edge, graph_) || + !IsInOrigBBSet(edge.GetFrom()) || + !IsInOrigBBSet(edge.GetTo())) { + return false; + } + } + + for (auto edge : *remap_incoming_) { + if (!IsEdgeValid(edge, graph_) || + IsInOrigBBSet(edge.GetFrom()) || + !IsInOrigBBSet(edge.GetTo())) { + return false; + } + } + + return true; +} + +void SuperblockCloner::VerifyGraph() { + for (auto it : *hir_map_) { + HInstruction* orig_instr = it.first; + HInstruction* copy_instr = it.second; + if (!orig_instr->IsPhi() && !orig_instr->IsSuspendCheck()) { + DCHECK(it.first->GetBlock() != nullptr); + } + if (!copy_instr->IsPhi() && !copy_instr->IsSuspendCheck()) { + DCHECK(it.second->GetBlock() != nullptr); + } + } + + GraphChecker checker(graph_); + checker.Run(); + if (!checker.IsValid()) { + for (const std::string& error : checker.GetErrors()) { + std::cout << error << std::endl; + } + LOG(FATAL) << "GraphChecker failed: superblock cloner\n"; + } +} + +void DumpBBSet(const ArenaBitVector* set) { + for (uint32_t idx : set->Indexes()) { + std::cout << idx << "\n"; + } +} + +void SuperblockCloner::DumpInputSets() { + std::cout << "orig_bb_set:\n"; + for (uint32_t idx : orig_bb_set_.Indexes()) { + std::cout << idx << "\n"; + } + std::cout << "remap_orig_internal:\n"; + for (HEdge e : *remap_orig_internal_) { + std::cout << e << "\n"; + } + std::cout << "remap_copy_internal:\n"; + for (auto e : *remap_copy_internal_) { + std::cout << e << "\n"; + } + std::cout << "remap_incoming:\n"; + for (auto e : *remap_incoming_) { + std::cout << e << "\n"; + } +} + // // Public methods. // @@ -549,7 +785,8 @@ void SuperblockCloner::CheckInstructionInputsRemapping(HInstruction* orig_instr) SuperblockCloner::SuperblockCloner(HGraph* graph, const HBasicBlockSet* orig_bb_set, HBasicBlockMap* bb_map, - HInstructionMap* hir_map) + HInstructionMap* hir_map, + InductionVarRange* induction_range) : graph_(graph), arena_(graph->GetAllocator()), orig_bb_set_(arena_, orig_bb_set->GetSizeOf(), true, kArenaAllocSuperblockCloner), @@ -558,8 +795,11 @@ SuperblockCloner::SuperblockCloner(HGraph* graph, remap_incoming_(nullptr), bb_map_(bb_map), hir_map_(hir_map), + induction_range_(induction_range), outer_loop_(nullptr), - outer_loop_bb_set_(arena_, orig_bb_set->GetSizeOf(), true, kArenaAllocSuperblockCloner) { + outer_loop_bb_set_(arena_, orig_bb_set->GetSizeOf(), true, kArenaAllocSuperblockCloner), + live_outs_(std::less<HInstruction*>(), + graph->GetAllocator()->Adapter(kArenaAllocSuperblockCloner)) { orig_bb_set_.Copy(orig_bb_set); } @@ -569,6 +809,7 @@ void SuperblockCloner::SetSuccessorRemappingInfo(const HEdgeSet* remap_orig_inte remap_orig_internal_ = remap_orig_internal; remap_copy_internal_ = remap_copy_internal; remap_incoming_ = remap_incoming; + DCHECK(CheckRemappingInfoIsValid()); } bool SuperblockCloner::IsSubgraphClonable() const { @@ -577,29 +818,79 @@ bool SuperblockCloner::IsSubgraphClonable() const { return false; } - // Check that there are no instructions defined in the subgraph and used outside. - // TODO: Improve this by accepting graph with such uses but only one exit. + HInstructionMap live_outs( + std::less<HInstruction*>(), graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner)); + + if (!CollectLiveOutsAndCheckClonable(&live_outs)) { + return false; + } + + ArenaVector<HBasicBlock*> exits(arena_->Adapter(kArenaAllocSuperblockCloner)); + SearchForSubgraphExits(&exits); + + // The only loops with live-outs which are currently supported are loops with a single exit. + if (!live_outs.empty() && exits.size() != 1) { + return false; + } + + return true; +} + +bool SuperblockCloner::IsFastCase() const { + // Check that loop unrolling/loop peeling is being conducted. + // Check that all the basic blocks belong to the same loop. + bool flag = false; + HLoopInformation* common_loop_info = nullptr; for (uint32_t idx : orig_bb_set_.Indexes()) { HBasicBlock* block = GetBlockById(idx); - - for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { - HInstruction* instr = it.Current(); - if (!instr->IsClonable() || - IsUsedOutsideRegion(instr, orig_bb_set_)) { + HLoopInformation* block_loop_info = block->GetLoopInformation(); + if (!flag) { + common_loop_info = block_loop_info; + } else { + if (block_loop_info != common_loop_info) { return false; } } + } - for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { - HInstruction* instr = it.Current(); - if (!instr->IsClonable() || - IsUsedOutsideRegion(instr, orig_bb_set_)) { - return false; - } - } + // Check that orig_bb_set_ corresponds to loop peeling/unrolling. + if (common_loop_info == nullptr || !orig_bb_set_.SameBitsSet(&common_loop_info->GetBlocks())) { + return false; } - return true; + bool peeling_or_unrolling = false; + HEdgeSet remap_orig_internal(graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner)); + HEdgeSet remap_copy_internal(graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner)); + HEdgeSet remap_incoming(graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner)); + + + // Check whether remapping info corresponds to loop unrolling. + CollectRemappingInfoForPeelUnroll(/* to_unroll*/ true, + common_loop_info, + &remap_orig_internal, + &remap_copy_internal, + &remap_incoming); + + peeling_or_unrolling |= EdgeHashSetsEqual(&remap_orig_internal, remap_orig_internal_) && + EdgeHashSetsEqual(&remap_copy_internal, remap_copy_internal_) && + EdgeHashSetsEqual(&remap_incoming, remap_incoming_); + + remap_orig_internal.clear(); + remap_copy_internal.clear(); + remap_incoming.clear(); + + // Check whether remapping info corresponds to loop peeling. + CollectRemappingInfoForPeelUnroll(/* to_unroll*/ false, + common_loop_info, + &remap_orig_internal, + &remap_copy_internal, + &remap_incoming); + + peeling_or_unrolling |= EdgeHashSetsEqual(&remap_orig_internal, remap_orig_internal_) && + EdgeHashSetsEqual(&remap_copy_internal, remap_copy_internal_) && + EdgeHashSetsEqual(&remap_incoming, remap_incoming_); + + return peeling_or_unrolling; } void SuperblockCloner::Run() { @@ -609,19 +900,40 @@ void SuperblockCloner::Run() { remap_copy_internal_ != nullptr && remap_incoming_ != nullptr); DCHECK(IsSubgraphClonable()); + DCHECK(IsFastCase()); + if (kSuperblockClonerLogging) { + DumpInputSets(); + } + + CollectLiveOutsAndCheckClonable(&live_outs_); // Find an area in the graph for which control flow information should be adjusted. FindAndSetLocalAreaForAdjustments(); + ConstructSubgraphClosedSSA(); // Clone the basic blocks from the orig_bb_set_; data flow is invalid after the call and is to be // adjusted. CloneBasicBlocks(); // Connect the blocks together/remap successors and fix phis which are directly affected my the // remapping. RemapEdgesSuccessors(); + + // Check that the subgraph is connected. + if (kIsDebugBuild) { + HBasicBlockSet work_set(arena_, orig_bb_set_.GetSizeOf(), true, kArenaAllocSuperblockCloner); + + // Add original and copy blocks of the subgraph to the work set. + for (auto iter : *bb_map_) { + work_set.SetBit(iter.first->GetBlockId()); // Original block. + work_set.SetBit(iter.second->GetBlockId()); // Copy block. + } + CHECK(IsSubgraphConnected(&work_set, graph_)); + } + // Recalculate dominance and backedge information which is required by the next stage. AdjustControlFlowInfo(); // Fix data flow of the graph. ResolveDataFlow(); + FixSubgraphClosedSSAAfterCloning(); } void SuperblockCloner::CleanUp() { @@ -650,6 +962,10 @@ void SuperblockCloner::CleanUp() { } } } + + if (kIsDebugBuild) { + VerifyGraph(); + } } HBasicBlock* SuperblockCloner::CloneBasicBlock(const HBasicBlock* orig_block) { @@ -701,4 +1017,135 @@ void SuperblockCloner::CloneBasicBlocks() { } } +// +// Stand-alone methods. +// + +void CollectRemappingInfoForPeelUnroll(bool to_unroll, + HLoopInformation* loop_info, + HEdgeSet* remap_orig_internal, + HEdgeSet* remap_copy_internal, + HEdgeSet* remap_incoming) { + DCHECK(loop_info != nullptr); + HBasicBlock* loop_header = loop_info->GetHeader(); + // Set up remap_orig_internal edges set - set is empty. + // Set up remap_copy_internal edges set. + for (HBasicBlock* back_edge_block : loop_info->GetBackEdges()) { + HEdge e = HEdge(back_edge_block, loop_header); + if (to_unroll) { + remap_orig_internal->insert(e); + remap_copy_internal->insert(e); + } else { + remap_copy_internal->insert(e); + } + } + + // Set up remap_incoming edges set. + if (!to_unroll) { + remap_incoming->insert(HEdge(loop_info->GetPreHeader(), loop_header)); + } +} + +bool IsSubgraphConnected(SuperblockCloner::HBasicBlockSet* work_set, HGraph* graph) { + ArenaVector<HBasicBlock*> entry_blocks( + graph->GetAllocator()->Adapter(kArenaAllocSuperblockCloner)); + + // Find subgraph entry blocks. + for (uint32_t orig_block_id : work_set->Indexes()) { + HBasicBlock* block = graph->GetBlocks()[orig_block_id]; + for (HBasicBlock* pred : block->GetPredecessors()) { + if (!work_set->IsBitSet(pred->GetBlockId())) { + entry_blocks.push_back(block); + break; + } + } + } + + for (HBasicBlock* entry_block : entry_blocks) { + if (work_set->IsBitSet(entry_block->GetBlockId())) { + TraverseSubgraphForConnectivity(entry_block, work_set); + } + } + + // Return whether there are unvisited - unreachable - blocks. + return work_set->NumSetBits() == 0; +} + +HLoopInformation* FindCommonLoop(HLoopInformation* loop1, HLoopInformation* loop2) { + if (loop1 == nullptr || loop2 == nullptr) { + return nullptr; + } + + if (loop1->IsIn(*loop2)) { + return loop2; + } + + HLoopInformation* current = loop1; + while (current != nullptr && !loop2->IsIn(*current)) { + current = current->GetPreHeader()->GetLoopInformation(); + } + + return current; +} + +bool PeelUnrollHelper::IsLoopClonable(HLoopInformation* loop_info) { + PeelUnrollHelper helper( + loop_info, /* bb_map= */ nullptr, /* hir_map= */ nullptr, /* induction_range= */ nullptr); + return helper.IsLoopClonable(); +} + +HBasicBlock* PeelUnrollHelper::DoPeelUnrollImpl(bool to_unroll) { + // For now do peeling only for natural loops. + DCHECK(!loop_info_->IsIrreducible()); + + HBasicBlock* loop_header = loop_info_->GetHeader(); + // Check that loop info is up-to-date. + DCHECK(loop_info_ == loop_header->GetLoopInformation()); + HGraph* graph = loop_header->GetGraph(); + + if (kSuperblockClonerLogging) { + std::cout << "Method: " << graph->PrettyMethod() << std::endl; + std::cout << "Scalar loop " << (to_unroll ? "unrolling" : "peeling") << + " was applied to the loop <" << loop_header->GetBlockId() << ">." << std::endl; + } + + ArenaAllocator allocator(graph->GetAllocator()->GetArenaPool()); + + HEdgeSet remap_orig_internal(graph->GetAllocator()->Adapter(kArenaAllocSuperblockCloner)); + HEdgeSet remap_copy_internal(graph->GetAllocator()->Adapter(kArenaAllocSuperblockCloner)); + HEdgeSet remap_incoming(graph->GetAllocator()->Adapter(kArenaAllocSuperblockCloner)); + + CollectRemappingInfoForPeelUnroll(to_unroll, + loop_info_, + &remap_orig_internal, + &remap_copy_internal, + &remap_incoming); + + cloner_.SetSuccessorRemappingInfo(&remap_orig_internal, &remap_copy_internal, &remap_incoming); + cloner_.Run(); + cloner_.CleanUp(); + + // Check that loop info is preserved. + DCHECK(loop_info_ == loop_header->GetLoopInformation()); + + return loop_header; +} + +PeelUnrollSimpleHelper::PeelUnrollSimpleHelper(HLoopInformation* info, + InductionVarRange* induction_range) + : bb_map_(std::less<HBasicBlock*>(), + info->GetHeader()->GetGraph()->GetAllocator()->Adapter(kArenaAllocSuperblockCloner)), + hir_map_(std::less<HInstruction*>(), + info->GetHeader()->GetGraph()->GetAllocator()->Adapter(kArenaAllocSuperblockCloner)), + helper_(info, &bb_map_, &hir_map_, induction_range) {} + } // namespace art + +namespace std { + +ostream& operator<<(ostream& os, const art::HEdge& e) { + e.Dump(os); + return os; +} + +} // namespace std diff --git a/compiler/optimizing/superblock_cloner.h b/compiler/optimizing/superblock_cloner.h index 23de692673..ece0914ddb 100644 --- a/compiler/optimizing/superblock_cloner.h +++ b/compiler/optimizing/superblock_cloner.h @@ -24,8 +24,9 @@ namespace art { +class InductionVarRange; + static const bool kSuperblockClonerLogging = false; -static const bool kSuperblockClonerVerify = false; // Represents an edge between two HBasicBlocks. // @@ -141,7 +142,8 @@ class SuperblockCloner : public ValueObject { SuperblockCloner(HGraph* graph, const HBasicBlockSet* orig_bb_set, HBasicBlockMap* bb_map, - HInstructionMap* hir_map); + HInstructionMap* hir_map, + InductionVarRange* induction_range); // Sets edge successor remapping info specified by corresponding edge sets. void SetSuccessorRemappingInfo(const HEdgeSet* remap_orig_internal, @@ -152,6 +154,15 @@ class SuperblockCloner : public ValueObject { // TODO: Start from small range of graph patterns then extend it. bool IsSubgraphClonable() const; + // Returns whether selected subgraph satisfies the criteria for fast data flow resolution + // when iterative DF algorithm is not required and dominators/instructions inputs can be + // trivially adjusted. + // + // TODO: formally describe the criteria. + // + // Loop peeling and unrolling satisfy the criteria. + bool IsFastCase() const; + // Runs the copy algorithm according to the description. void Run(); @@ -202,11 +213,17 @@ class SuperblockCloner : public ValueObject { return IsInOrigBBSet(block->GetBlockId()); } + // Returns the area (the most outer loop) in the graph for which control flow (back edges, loops, + // dominators) needs to be adjusted. + HLoopInformation* GetRegionToBeAdjusted() const { + return outer_loop_; + } + private: // Fills the 'exits' vector with the subgraph exits. - void SearchForSubgraphExits(ArenaVector<HBasicBlock*>* exits); + void SearchForSubgraphExits(ArenaVector<HBasicBlock*>* exits) const; - // Finds and records information about the area in the graph for which control-flow (back edges, + // Finds and records information about the area in the graph for which control flow (back edges, // loops, dominators) needs to be adjusted. void FindAndSetLocalAreaForAdjustments(); @@ -217,7 +234,7 @@ class SuperblockCloner : public ValueObject { // phis' nor instructions' inputs values are resolved. void RemapEdgesSuccessors(); - // Adjusts control-flow (back edges, loops, dominators) for the local area defined by + // Adjusts control flow (back edges, loops, dominators) for the local area defined by // FindAndSetLocalAreaForAdjustments. void AdjustControlFlowInfo(); @@ -226,6 +243,33 @@ class SuperblockCloner : public ValueObject { void ResolveDataFlow(); // + // Helpers for live-outs processing and Subgraph-closed SSA. + // + // - live-outs - values which are defined inside the subgraph and have uses outside. + // - Subgraph-closed SSA - SSA form for which all the values defined inside the subgraph + // have no outside uses except for the phi-nodes in the subgraph exits. + // + // Note: now if the subgraph has live-outs it is only clonable if it has a single exit; this + // makes the subgraph-closed SSA form construction much easier. + // + // TODO: Support subgraphs with live-outs and multiple exits. + // + + // For each live-out value 'val' in the region puts a record <val, val> into the map. + // Returns whether all of the instructions in the subgraph are clonable. + bool CollectLiveOutsAndCheckClonable(HInstructionMap* live_outs_) const; + + // Constructs Subgraph-closed SSA; precondition - a subgraph has a single exit. + // + // For each live-out 'val' in 'live_outs_' map inserts a HPhi 'phi' into the exit node, updates + // the record in the map to <val, phi> and replaces all outside uses with this phi. + void ConstructSubgraphClosedSSA(); + + // Fixes the data flow for the live-out 'val' by adding a 'copy_val' input to the corresponding + // (<val, phi>) phi after the cloning is done. + void FixSubgraphClosedSSAAfterCloning(); + + // // Helpers for CloneBasicBlock. // @@ -268,10 +312,17 @@ class SuperblockCloner : public ValueObject { // Resolves the inputs of the phi. void ResolvePhi(HPhi* phi); + // Update induction range after when fixing SSA. + void UpdateInductionRangeInfoOf( + HInstruction* user, HInstruction* old_instruction, HInstruction* replacement); + // // Debug and logging methods. // void CheckInstructionInputsRemapping(HInstruction* orig_instr); + bool CheckRemappingInfoIsValid(); + void VerifyGraph(); + void DumpInputSets(); HBasicBlock* GetBlockById(uint32_t block_id) const { DCHECK(block_id < graph_->GetBlocks().size()); @@ -295,15 +346,103 @@ class SuperblockCloner : public ValueObject { HBasicBlockMap* bb_map_; // Correspondence map for instructions: (original HInstruction, copy HInstruction). HInstructionMap* hir_map_; - // Area in the graph for which control-flow (back edges, loops, dominators) needs to be adjusted. + // As a result of cloning, the induction range analysis information can be invalidated + // and must be updated. If not null, the cloner updates it for changed instructions. + InductionVarRange* induction_range_; + // Area in the graph for which control flow (back edges, loops, dominators) needs to be adjusted. HLoopInformation* outer_loop_; HBasicBlockSet outer_loop_bb_set_; + HInstructionMap live_outs_; + ART_FRIEND_TEST(SuperblockClonerTest, AdjustControlFlowInfo); + ART_FRIEND_TEST(SuperblockClonerTest, IsGraphConnected); DISALLOW_COPY_AND_ASSIGN(SuperblockCloner); }; +// Helper class to perform loop peeling/unrolling. +// +// This helper should be used when correspondence map between original and copied +// basic blocks/instructions are demanded. +class PeelUnrollHelper : public ValueObject { + public: + PeelUnrollHelper(HLoopInformation* info, + SuperblockCloner::HBasicBlockMap* bb_map, + SuperblockCloner::HInstructionMap* hir_map, + InductionVarRange* induction_range) : + loop_info_(info), + cloner_(info->GetHeader()->GetGraph(), &info->GetBlocks(), bb_map, hir_map, induction_range) { + // For now do peeling/unrolling only for natural loops. + DCHECK(!info->IsIrreducible()); + } + + // Returns whether the loop can be peeled/unrolled (static function). + static bool IsLoopClonable(HLoopInformation* loop_info); + + // Returns whether the loop can be peeled/unrolled. + bool IsLoopClonable() const { return cloner_.IsSubgraphClonable(); } + + HBasicBlock* DoPeeling() { return DoPeelUnrollImpl(/* to_unroll= */ false); } + HBasicBlock* DoUnrolling() { return DoPeelUnrollImpl(/* to_unroll= */ true); } + HLoopInformation* GetRegionToBeAdjusted() const { return cloner_.GetRegionToBeAdjusted(); } + + protected: + // Applies loop peeling/unrolling for the loop specified by 'loop_info'. + // + // Depending on 'do_unroll' either unrolls loop by 2 or peels one iteration from it. + HBasicBlock* DoPeelUnrollImpl(bool to_unroll); + + private: + HLoopInformation* loop_info_; + SuperblockCloner cloner_; + + DISALLOW_COPY_AND_ASSIGN(PeelUnrollHelper); +}; + +// Helper class to perform loop peeling/unrolling. +// +// This helper should be used when there is no need to get correspondence information between +// original and copied basic blocks/instructions. +class PeelUnrollSimpleHelper : public ValueObject { + public: + PeelUnrollSimpleHelper(HLoopInformation* info, InductionVarRange* induction_range); + bool IsLoopClonable() const { return helper_.IsLoopClonable(); } + HBasicBlock* DoPeeling() { return helper_.DoPeeling(); } + HBasicBlock* DoUnrolling() { return helper_.DoUnrolling(); } + HLoopInformation* GetRegionToBeAdjusted() const { return helper_.GetRegionToBeAdjusted(); } + + const SuperblockCloner::HBasicBlockMap* GetBasicBlockMap() const { return &bb_map_; } + const SuperblockCloner::HInstructionMap* GetInstructionMap() const { return &hir_map_; } + + private: + SuperblockCloner::HBasicBlockMap bb_map_; + SuperblockCloner::HInstructionMap hir_map_; + PeelUnrollHelper helper_; + + DISALLOW_COPY_AND_ASSIGN(PeelUnrollSimpleHelper); +}; + +// Collects edge remapping info for loop peeling/unrolling for the loop specified by loop info. +void CollectRemappingInfoForPeelUnroll(bool to_unroll, + HLoopInformation* loop_info, + SuperblockCloner::HEdgeSet* remap_orig_internal, + SuperblockCloner::HEdgeSet* remap_copy_internal, + SuperblockCloner::HEdgeSet* remap_incoming); + +// Returns whether blocks from 'work_set' are reachable from the rest of the graph. +// +// Returns whether such a set 'outer_entries' of basic blocks exists that: +// - each block from 'outer_entries' is not from 'work_set'. +// - each block from 'work_set' is reachable from at least one block from 'outer_entries'. +// +// After the function returns work_set contains only blocks from the original 'work_set' +// which are unreachable from the rest of the graph. +bool IsSubgraphConnected(SuperblockCloner::HBasicBlockSet* work_set, HGraph* graph); + +// Returns a common predecessor of loop1 and loop2 in the loop tree or nullptr if it is the whole +// graph. +HLoopInformation* FindCommonLoop(HLoopInformation* loop1, HLoopInformation* loop2); } // namespace art namespace std { @@ -312,11 +451,12 @@ template <> struct hash<art::HEdge> { size_t operator()(art::HEdge const& x) const noexcept { // Use Cantor pairing function as the hash function. - uint32_t a = x.GetFrom(); - uint32_t b = x.GetTo(); + size_t a = x.GetFrom(); + size_t b = x.GetTo(); return (a + b) * (a + b + 1) / 2 + b; } }; +ostream& operator<<(ostream& os, const art::HEdge& e); } // namespace std diff --git a/compiler/optimizing/superblock_cloner_test.cc b/compiler/optimizing/superblock_cloner_test.cc index f1b7bffdf5..aa19de683f 100644 --- a/compiler/optimizing/superblock_cloner_test.cc +++ b/compiler/optimizing/superblock_cloner_test.cc @@ -25,52 +25,35 @@ namespace art { using HBasicBlockMap = SuperblockCloner::HBasicBlockMap; using HInstructionMap = SuperblockCloner::HInstructionMap; +using HBasicBlockSet = SuperblockCloner::HBasicBlockSet; +using HEdgeSet = SuperblockCloner::HEdgeSet; // This class provides methods and helpers for testing various cloning and copying routines: // individual instruction cloning and cloning of the more coarse-grain structures. -class SuperblockClonerTest : public OptimizingUnitTest { +class SuperblockClonerTest : public ImprovedOptimizingUnitTest { public: - SuperblockClonerTest() - : graph_(CreateGraph()), entry_block_(nullptr), exit_block_(nullptr), parameter_(nullptr) {} - - void CreateBasicLoopControlFlow(/* out */ HBasicBlock** header_p, + void CreateBasicLoopControlFlow(HBasicBlock* position, + HBasicBlock* successor, + /* out */ HBasicBlock** header_p, /* out */ HBasicBlock** body_p) { - entry_block_ = new (GetAllocator()) HBasicBlock(graph_); - graph_->AddBlock(entry_block_); - graph_->SetEntryBlock(entry_block_); - HBasicBlock* loop_preheader = new (GetAllocator()) HBasicBlock(graph_); HBasicBlock* loop_header = new (GetAllocator()) HBasicBlock(graph_); HBasicBlock* loop_body = new (GetAllocator()) HBasicBlock(graph_); - HBasicBlock* loop_exit = new (GetAllocator()) HBasicBlock(graph_); graph_->AddBlock(loop_preheader); graph_->AddBlock(loop_header); graph_->AddBlock(loop_body); - graph_->AddBlock(loop_exit); - exit_block_ = new (GetAllocator()) HBasicBlock(graph_); - graph_->AddBlock(exit_block_); - graph_->SetExitBlock(exit_block_); + position->ReplaceSuccessor(successor, loop_preheader); - entry_block_->AddSuccessor(loop_preheader); loop_preheader->AddSuccessor(loop_header); // Loop exit first to have a proper exit condition/target for HIf. - loop_header->AddSuccessor(loop_exit); + loop_header->AddSuccessor(successor); loop_header->AddSuccessor(loop_body); loop_body->AddSuccessor(loop_header); - loop_exit->AddSuccessor(exit_block_); *header_p = loop_header; *body_p = loop_body; - - parameter_ = new (GetAllocator()) HParameterValue(graph_->GetDexFile(), - dex::TypeIndex(0), - 0, - DataType::Type::kInt32); - entry_block_->AddInstruction(parameter_); - loop_exit->AddInstruction(new (GetAllocator()) HReturnVoid()); - exit_block_->AddInstruction(new (GetAllocator()) HExit()); } void CreateBasicLoopDataFlow(HBasicBlock* loop_header, HBasicBlock* loop_body) { @@ -84,11 +67,12 @@ class SuperblockClonerTest : public OptimizingUnitTest { // Header block. HPhi* phi = new (GetAllocator()) HPhi(GetAllocator(), 0, 0, DataType::Type::kInt32); HInstruction* suspend_check = new (GetAllocator()) HSuspendCheck(); + HInstruction* loop_check = new (GetAllocator()) HGreaterThanOrEqual(phi, const_128); loop_header->AddPhi(phi); loop_header->AddInstruction(suspend_check); - loop_header->AddInstruction(new (GetAllocator()) HGreaterThanOrEqual(phi, const_128)); - loop_header->AddInstruction(new (GetAllocator()) HIf(parameter_)); + loop_header->AddInstruction(loop_check); + loop_header->AddInstruction(new (GetAllocator()) HIf(loop_check)); // Loop body block. HInstruction* null_check = new (GetAllocator()) HNullCheck(parameter_, dex_pc); @@ -97,8 +81,8 @@ class SuperblockClonerTest : public OptimizingUnitTest { HInstruction* array_get = new (GetAllocator()) HArrayGet(null_check, bounds_check, DataType::Type::kInt32, dex_pc); HInstruction* add = new (GetAllocator()) HAdd(DataType::Type::kInt32, array_get, const_1); - HInstruction* array_set = - new (GetAllocator()) HArraySet(null_check, bounds_check, add, DataType::Type::kInt32, dex_pc); + HInstruction* array_set = new (GetAllocator()) HArraySet( + null_check, bounds_check, add, DataType::Type::kInt32, dex_pc); HInstruction* induction_inc = new (GetAllocator()) HAdd(DataType::Type::kInt32, phi, const_1); loop_body->AddInstruction(null_check); @@ -123,49 +107,17 @@ class SuperblockClonerTest : public OptimizingUnitTest { null_check->CopyEnvironmentFrom(env); bounds_check->CopyEnvironmentFrom(env); } - - HEnvironment* ManuallyBuildEnvFor(HInstruction* instruction, - ArenaVector<HInstruction*>* current_locals) { - HEnvironment* environment = new (GetAllocator()) HEnvironment( - (GetAllocator()), - current_locals->size(), - graph_->GetArtMethod(), - instruction->GetDexPc(), - instruction); - - environment->CopyFrom(ArrayRef<HInstruction* const>(*current_locals)); - instruction->SetRawEnvironment(environment); - return environment; - } - - bool CheckGraph() { - GraphChecker checker(graph_); - checker.Run(); - if (!checker.IsValid()) { - for (const std::string& error : checker.GetErrors()) { - std::cout << error << std::endl; - } - return false; - } - return true; - } - - HGraph* graph_; - - HBasicBlock* entry_block_; - HBasicBlock* exit_block_; - - HInstruction* parameter_; }; TEST_F(SuperblockClonerTest, IndividualInstrCloner) { HBasicBlock* header = nullptr; HBasicBlock* loop_body = nullptr; - CreateBasicLoopControlFlow(&header, &loop_body); + InitGraph(); + CreateBasicLoopControlFlow(entry_block_, return_block_, &header, &loop_body); CreateBasicLoopDataFlow(header, loop_body); graph_->BuildDominatorTree(); - ASSERT_TRUE(CheckGraph()); + EXPECT_TRUE(CheckGraph()); HSuspendCheck* old_suspend_check = header->GetLoopInformation()->GetSuspendCheck(); CloneAndReplaceInstructionVisitor visitor(graph_); @@ -193,7 +145,8 @@ TEST_F(SuperblockClonerTest, CloneBasicBlocks) { HBasicBlock* loop_body = nullptr; ArenaAllocator* arena = graph_->GetAllocator(); - CreateBasicLoopControlFlow(&header, &loop_body); + InitGraph(); + CreateBasicLoopControlFlow(entry_block_, return_block_, &header, &loop_body); CreateBasicLoopDataFlow(header, loop_body); graph_->BuildDominatorTree(); ASSERT_TRUE(CheckGraph()); @@ -209,7 +162,8 @@ TEST_F(SuperblockClonerTest, CloneBasicBlocks) { SuperblockCloner cloner(graph_, &orig_bb_set, &bb_map, - &hir_map); + &hir_map, + /* induction_range= */ nullptr); EXPECT_TRUE(cloner.IsSubgraphClonable()); cloner.CloneBasicBlocks(); @@ -272,7 +226,8 @@ TEST_F(SuperblockClonerTest, AdjustControlFlowInfo) { HBasicBlock* loop_body = nullptr; ArenaAllocator* arena = graph_->GetAllocator(); - CreateBasicLoopControlFlow(&header, &loop_body); + InitGraph(); + CreateBasicLoopControlFlow(entry_block_, return_block_, &header, &loop_body); CreateBasicLoopDataFlow(header, loop_body); graph_->BuildDominatorTree(); ASSERT_TRUE(CheckGraph()); @@ -285,8 +240,9 @@ TEST_F(SuperblockClonerTest, AdjustControlFlowInfo) { SuperblockCloner cloner(graph_, &orig_bb_set, - nullptr, - nullptr); + /* bb_map= */ nullptr, + /* hir_map= */ nullptr, + /* induction_range= */ nullptr); EXPECT_TRUE(cloner.IsSubgraphClonable()); cloner.FindAndSetLocalAreaForAdjustments(); @@ -303,4 +259,488 @@ TEST_F(SuperblockClonerTest, AdjustControlFlowInfo) { EXPECT_TRUE(loop_info->IsBackEdge(*loop_body)); } +// Tests IsSubgraphConnected function for negative case. +TEST_F(SuperblockClonerTest, IsGraphConnected) { + HBasicBlock* header = nullptr; + HBasicBlock* loop_body = nullptr; + ArenaAllocator* arena = graph_->GetAllocator(); + + InitGraph(); + CreateBasicLoopControlFlow(entry_block_, return_block_, &header, &loop_body); + CreateBasicLoopDataFlow(header, loop_body); + HBasicBlock* unreachable_block = new (GetAllocator()) HBasicBlock(graph_); + graph_->AddBlock(unreachable_block); + + HBasicBlockSet bb_set( + arena, graph_->GetBlocks().size(), false, kArenaAllocSuperblockCloner); + bb_set.SetBit(header->GetBlockId()); + bb_set.SetBit(loop_body->GetBlockId()); + bb_set.SetBit(unreachable_block->GetBlockId()); + + EXPECT_FALSE(IsSubgraphConnected(&bb_set, graph_)); + EXPECT_EQ(bb_set.NumSetBits(), 1u); + EXPECT_TRUE(bb_set.IsBitSet(unreachable_block->GetBlockId())); +} + +// Tests SuperblockCloner for loop peeling case. +// +// Control Flow of the example (ignoring critical edges splitting). +// +// Before After +// +// |B| |B| +// | | +// v v +// |1| |1| +// | | +// v v +// |2|<-\ (6) |2A| +// / \ / / \ +// v v/ / v +// |4| |3| / |3A| (7) +// | / / +// v | v +// |E| \ |2|<-\ +// \ / \ / +// v v / +// |4| |3| +// | +// v +// |E| +TEST_F(SuperblockClonerTest, LoopPeeling) { + HBasicBlock* header = nullptr; + HBasicBlock* loop_body = nullptr; + + InitGraph(); + CreateBasicLoopControlFlow(entry_block_, return_block_, &header, &loop_body); + CreateBasicLoopDataFlow(header, loop_body); + graph_->BuildDominatorTree(); + EXPECT_TRUE(CheckGraph()); + + HBasicBlockMap bb_map( + std::less<HBasicBlock*>(), graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner)); + HInstructionMap hir_map( + std::less<HInstruction*>(), graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner)); + + HLoopInformation* loop_info = header->GetLoopInformation(); + PeelUnrollHelper helper(loop_info, &bb_map, &hir_map, /* induction_range= */ nullptr); + EXPECT_TRUE(helper.IsLoopClonable()); + HBasicBlock* new_header = helper.DoPeeling(); + HLoopInformation* new_loop_info = new_header->GetLoopInformation(); + + EXPECT_TRUE(CheckGraph()); + + // Check loop body successors. + EXPECT_EQ(loop_body->GetSingleSuccessor(), header); + EXPECT_EQ(bb_map.Get(loop_body)->GetSingleSuccessor(), header); + + // Check loop structure. + EXPECT_EQ(header, new_header); + EXPECT_EQ(new_loop_info->GetHeader(), header); + EXPECT_EQ(new_loop_info->GetBackEdges().size(), 1u); + EXPECT_EQ(new_loop_info->GetBackEdges()[0], loop_body); +} + +// Tests SuperblockCloner for loop unrolling case. +// +// Control Flow of the example (ignoring critical edges splitting). +// +// Before After +// +// |B| |B| +// | | +// v v +// |1| |1| +// | | +// v v +// |2|<-\ (6) |2A|<-\ +// / \ / / \ \ +// v v/ / v \ +// |4| |3| /(7)|3A| | +// | / / / +// v | v / +// |E| \ |2| / +// \ / \ / +// v v/ +// |4| |3| +// | +// v +// |E| +TEST_F(SuperblockClonerTest, LoopUnrolling) { + HBasicBlock* header = nullptr; + HBasicBlock* loop_body = nullptr; + + InitGraph(); + CreateBasicLoopControlFlow(entry_block_, return_block_, &header, &loop_body); + CreateBasicLoopDataFlow(header, loop_body); + graph_->BuildDominatorTree(); + EXPECT_TRUE(CheckGraph()); + + HBasicBlockMap bb_map( + std::less<HBasicBlock*>(), graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner)); + HInstructionMap hir_map( + std::less<HInstruction*>(), graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner)); + + HLoopInformation* loop_info = header->GetLoopInformation(); + PeelUnrollHelper helper(loop_info, &bb_map, &hir_map, /* induction_range= */ nullptr); + EXPECT_TRUE(helper.IsLoopClonable()); + HBasicBlock* new_header = helper.DoUnrolling(); + + EXPECT_TRUE(CheckGraph()); + + // Check loop body successors. + EXPECT_EQ(loop_body->GetSingleSuccessor(), bb_map.Get(header)); + EXPECT_EQ(bb_map.Get(loop_body)->GetSingleSuccessor(), header); + + // Check loop structure. + EXPECT_EQ(header, new_header); + EXPECT_EQ(loop_info, new_header->GetLoopInformation()); + EXPECT_EQ(loop_info->GetHeader(), new_header); + EXPECT_EQ(loop_info->GetBackEdges().size(), 1u); + EXPECT_EQ(loop_info->GetBackEdges()[0], bb_map.Get(loop_body)); +} + +// Checks that loop unrolling works fine for a loop with multiple back edges. Tests that after +// the transformation the loop has a single preheader. +TEST_F(SuperblockClonerTest, LoopPeelingMultipleBackEdges) { + HBasicBlock* header = nullptr; + HBasicBlock* loop_body = nullptr; + + InitGraph(); + CreateBasicLoopControlFlow(entry_block_, return_block_, &header, &loop_body); + CreateBasicLoopDataFlow(header, loop_body); + + // Transform a basic loop to have multiple back edges. + HBasicBlock* latch = header->GetSuccessors()[1]; + HBasicBlock* if_block = new (GetAllocator()) HBasicBlock(graph_); + HBasicBlock* temp1 = new (GetAllocator()) HBasicBlock(graph_); + graph_->AddBlock(if_block); + graph_->AddBlock(temp1); + header->ReplaceSuccessor(latch, if_block); + if_block->AddSuccessor(latch); + if_block->AddSuccessor(temp1); + temp1->AddSuccessor(header); + + if_block->AddInstruction(new (GetAllocator()) HIf(parameter_)); + + HInstructionIterator it(header->GetPhis()); + DCHECK(!it.Done()); + HPhi* loop_phi = it.Current()->AsPhi(); + HInstruction* temp_add = new (GetAllocator()) HAdd(DataType::Type::kInt32, + loop_phi, + graph_->GetIntConstant(2)); + temp1->AddInstruction(temp_add); + temp1->AddInstruction(new (GetAllocator()) HGoto()); + loop_phi->AddInput(temp_add); + + graph_->BuildDominatorTree(); + EXPECT_TRUE(CheckGraph()); + + HLoopInformation* loop_info = header->GetLoopInformation(); + PeelUnrollSimpleHelper helper(loop_info, /* induction_range= */ nullptr); + HBasicBlock* new_header = helper.DoPeeling(); + EXPECT_EQ(header, new_header); + + EXPECT_TRUE(CheckGraph()); + EXPECT_EQ(header->GetPredecessors().size(), 3u); +} + +static void CheckLoopStructureForLoopPeelingNested(HBasicBlock* loop1_header, + HBasicBlock* loop2_header, + HBasicBlock* loop3_header) { + EXPECT_EQ(loop1_header->GetLoopInformation()->GetHeader(), loop1_header); + EXPECT_EQ(loop2_header->GetLoopInformation()->GetHeader(), loop2_header); + EXPECT_EQ(loop3_header->GetLoopInformation()->GetHeader(), loop3_header); + EXPECT_EQ(loop1_header->GetLoopInformation()->GetPreHeader()->GetLoopInformation(), nullptr); + EXPECT_EQ(loop2_header->GetLoopInformation()->GetPreHeader()->GetLoopInformation(), nullptr); + EXPECT_EQ(loop3_header->GetLoopInformation()->GetPreHeader()->GetLoopInformation()->GetHeader(), + loop2_header); +} + +TEST_F(SuperblockClonerTest, LoopPeelingNested) { + HBasicBlock* header = nullptr; + HBasicBlock* loop_body = nullptr; + + InitGraph(); + + // Create the following nested structure of loops + // Headers: 1 2 3 + // [ ], [ [ ] ] + CreateBasicLoopControlFlow(entry_block_, return_block_, &header, &loop_body); + CreateBasicLoopDataFlow(header, loop_body); + HBasicBlock* loop1_header = header; + + CreateBasicLoopControlFlow(header, return_block_, &header, &loop_body); + CreateBasicLoopDataFlow(header, loop_body); + HBasicBlock* loop2_header = header; + + CreateBasicLoopControlFlow(header, header->GetSuccessors()[1], &header, &loop_body); + CreateBasicLoopDataFlow(header, loop_body); + HBasicBlock* loop3_header = header; + + graph_->BuildDominatorTree(); + EXPECT_TRUE(CheckGraph()); + + HLoopInformation* loop2_info_before = loop2_header->GetLoopInformation(); + HLoopInformation* loop3_info_before = loop3_header->GetLoopInformation(); + + // Check nested loops structure. + CheckLoopStructureForLoopPeelingNested(loop1_header, loop2_header, loop3_header); + PeelUnrollSimpleHelper helper(loop1_header->GetLoopInformation(), /* induction_range= */ nullptr); + helper.DoPeeling(); + // Check that nested loops structure has not changed after the transformation. + CheckLoopStructureForLoopPeelingNested(loop1_header, loop2_header, loop3_header); + + // Test that the loop info is preserved. + EXPECT_EQ(loop2_info_before, loop2_header->GetLoopInformation()); + EXPECT_EQ(loop3_info_before, loop3_header->GetLoopInformation()); + + EXPECT_EQ(loop3_info_before->GetPreHeader()->GetLoopInformation(), loop2_info_before); + EXPECT_EQ(loop2_info_before->GetPreHeader()->GetLoopInformation(), nullptr); + + EXPECT_EQ(helper.GetRegionToBeAdjusted(), nullptr); + + EXPECT_TRUE(CheckGraph()); +} + +// Checks that the loop population is correctly propagated after an inner loop is peeled. +TEST_F(SuperblockClonerTest, OuterLoopPopulationAfterInnerPeeled) { + HBasicBlock* header = nullptr; + HBasicBlock* loop_body = nullptr; + + InitGraph(); + + // Create the following nested structure of loops + // Headers: 1 2 3 4 + // [ [ [ ] ] ], [ ] + CreateBasicLoopControlFlow(entry_block_, return_block_, &header, &loop_body); + CreateBasicLoopDataFlow(header, loop_body); + HBasicBlock* loop1_header = header; + + CreateBasicLoopControlFlow(header, header->GetSuccessors()[1], &header, &loop_body); + CreateBasicLoopDataFlow(header, loop_body); + HBasicBlock* loop2_header = header; + + CreateBasicLoopControlFlow(header, header->GetSuccessors()[1], &header, &loop_body); + CreateBasicLoopDataFlow(header, loop_body); + HBasicBlock* loop3_header = header; + + CreateBasicLoopControlFlow(loop1_header, return_block_, &header, &loop_body); + CreateBasicLoopDataFlow(header, loop_body); + HBasicBlock* loop4_header = header; + + graph_->BuildDominatorTree(); + EXPECT_TRUE(CheckGraph()); + + PeelUnrollSimpleHelper helper(loop3_header->GetLoopInformation(), /* induction_range= */ nullptr); + helper.DoPeeling(); + HLoopInformation* loop1 = loop1_header->GetLoopInformation(); + HLoopInformation* loop2 = loop2_header->GetLoopInformation(); + HLoopInformation* loop3 = loop3_header->GetLoopInformation(); + HLoopInformation* loop4 = loop4_header->GetLoopInformation(); + + EXPECT_TRUE(loop1->Contains(*loop2_header)); + EXPECT_TRUE(loop1->Contains(*loop3_header)); + EXPECT_TRUE(loop1->Contains(*loop3_header->GetLoopInformation()->GetPreHeader())); + + // Check that loop4 info has not been touched after local run of AnalyzeLoops. + EXPECT_EQ(loop4, loop4_header->GetLoopInformation()); + + EXPECT_TRUE(loop1->IsIn(*loop1)); + EXPECT_TRUE(loop2->IsIn(*loop1)); + EXPECT_TRUE(loop3->IsIn(*loop1)); + EXPECT_TRUE(loop3->IsIn(*loop2)); + EXPECT_TRUE(!loop4->IsIn(*loop1)); + + EXPECT_EQ(loop4->GetPreHeader()->GetLoopInformation(), nullptr); + + EXPECT_EQ(helper.GetRegionToBeAdjusted(), loop2); + + EXPECT_TRUE(CheckGraph()); +} + +// Checks the case when inner loop have an exit not to its immediate outer_loop but some other loop +// in the hierarchy. Loop population information must be valid after loop peeling. +TEST_F(SuperblockClonerTest, NestedCaseExitToOutermost) { + HBasicBlock* header = nullptr; + HBasicBlock* loop_body = nullptr; + + InitGraph(); + + // Create the following nested structure of loops then peel loop3. + // Headers: 1 2 3 + // [ [ [ ] ] ] + CreateBasicLoopControlFlow(entry_block_, return_block_, &header, &loop_body); + CreateBasicLoopDataFlow(header, loop_body); + HBasicBlock* loop1_header = header; + HBasicBlock* loop_body1 = loop_body; + + CreateBasicLoopControlFlow(header, header->GetSuccessors()[1], &header, &loop_body); + CreateBasicLoopDataFlow(header, loop_body); + + CreateBasicLoopControlFlow(header, header->GetSuccessors()[1], &header, &loop_body); + CreateBasicLoopDataFlow(header, loop_body); + HBasicBlock* loop3_header = header; + HBasicBlock* loop_body3 = loop_body; + + // Change the loop3 - insert an exit which leads to loop1. + HBasicBlock* loop3_extra_if_block = new (GetAllocator()) HBasicBlock(graph_); + graph_->AddBlock(loop3_extra_if_block); + loop3_extra_if_block->AddInstruction(new (GetAllocator()) HIf(parameter_)); + + loop3_header->ReplaceSuccessor(loop_body3, loop3_extra_if_block); + loop3_extra_if_block->AddSuccessor(loop_body1); // Long exit. + loop3_extra_if_block->AddSuccessor(loop_body3); + + graph_->BuildDominatorTree(); + EXPECT_TRUE(CheckGraph()); + + HBasicBlock* loop3_long_exit = loop3_extra_if_block->GetSuccessors()[0]; + EXPECT_TRUE(loop1_header->GetLoopInformation()->Contains(*loop3_long_exit)); + + PeelUnrollSimpleHelper helper(loop3_header->GetLoopInformation(), /* induction_range= */ nullptr); + helper.DoPeeling(); + + HLoopInformation* loop1 = loop1_header->GetLoopInformation(); + // Check that after the transformation the local area for CF adjustments has been chosen + // correctly and loop population has been updated. + loop3_long_exit = loop3_extra_if_block->GetSuccessors()[0]; + EXPECT_TRUE(loop1->Contains(*loop3_long_exit)); + + EXPECT_EQ(helper.GetRegionToBeAdjusted(), loop1); + + EXPECT_TRUE(loop1->Contains(*loop3_header)); + EXPECT_TRUE(loop1->Contains(*loop3_header->GetLoopInformation()->GetPreHeader())); + + EXPECT_TRUE(CheckGraph()); +} + +TEST_F(SuperblockClonerTest, FastCaseCheck) { + HBasicBlock* header = nullptr; + HBasicBlock* loop_body = nullptr; + ArenaAllocator* arena = graph_->GetAllocator(); + + InitGraph(); + CreateBasicLoopControlFlow(entry_block_, return_block_, &header, &loop_body); + CreateBasicLoopDataFlow(header, loop_body); + graph_->BuildDominatorTree(); + + HLoopInformation* loop_info = header->GetLoopInformation(); + + ArenaBitVector orig_bb_set( + arena, graph_->GetBlocks().size(), false, kArenaAllocSuperblockCloner); + orig_bb_set.Union(&loop_info->GetBlocks()); + + HEdgeSet remap_orig_internal(graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner)); + HEdgeSet remap_copy_internal(graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner)); + HEdgeSet remap_incoming(graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner)); + + CollectRemappingInfoForPeelUnroll(true, + loop_info, + &remap_orig_internal, + &remap_copy_internal, + &remap_incoming); + + // Insert some extra nodes and edges. + HBasicBlock* preheader = loop_info->GetPreHeader(); + orig_bb_set.SetBit(preheader->GetBlockId()); + + // Adjust incoming edges. + remap_incoming.clear(); + remap_incoming.insert(HEdge(preheader->GetSinglePredecessor(), preheader)); + + HBasicBlockMap bb_map(std::less<HBasicBlock*>(), arena->Adapter(kArenaAllocSuperblockCloner)); + HInstructionMap hir_map(std::less<HInstruction*>(), arena->Adapter(kArenaAllocSuperblockCloner)); + + SuperblockCloner cloner(graph_, + &orig_bb_set, + &bb_map, + &hir_map, + /* induction_range= */ nullptr); + cloner.SetSuccessorRemappingInfo(&remap_orig_internal, &remap_copy_internal, &remap_incoming); + + EXPECT_FALSE(cloner.IsFastCase()); +} + +// Helper for FindCommonLoop which also check that FindCommonLoop is symmetric. +static HLoopInformation* FindCommonLoopCheck(HLoopInformation* loop1, HLoopInformation* loop2) { + HLoopInformation* common_loop12 = FindCommonLoop(loop1, loop2); + HLoopInformation* common_loop21 = FindCommonLoop(loop2, loop1); + EXPECT_EQ(common_loop21, common_loop12); + return common_loop12; +} + +// Tests FindCommonLoop function on a loop nest. +TEST_F(SuperblockClonerTest, FindCommonLoop) { + HBasicBlock* header = nullptr; + HBasicBlock* loop_body = nullptr; + + InitGraph(); + + // Create the following nested structure of loops + // Headers: 1 2 3 4 5 + // [ [ [ ] ], [ ] ], [ ] + CreateBasicLoopControlFlow(entry_block_, return_block_, &header, &loop_body); + CreateBasicLoopDataFlow(header, loop_body); + HBasicBlock* loop1_header = header; + + CreateBasicLoopControlFlow(header, header->GetSuccessors()[1], &header, &loop_body); + CreateBasicLoopDataFlow(header, loop_body); + HBasicBlock* loop2_header = header; + + CreateBasicLoopControlFlow(header, header->GetSuccessors()[1], &header, &loop_body); + CreateBasicLoopDataFlow(header, loop_body); + HBasicBlock* loop3_header = header; + + CreateBasicLoopControlFlow(loop2_header, loop2_header->GetSuccessors()[0], &header, &loop_body); + CreateBasicLoopDataFlow(header, loop_body); + HBasicBlock* loop4_header = header; + + CreateBasicLoopControlFlow(loop1_header, return_block_, &header, &loop_body); + CreateBasicLoopDataFlow(header, loop_body); + HBasicBlock* loop5_header = header; + + graph_->BuildDominatorTree(); + EXPECT_TRUE(CheckGraph()); + + HLoopInformation* loop1 = loop1_header->GetLoopInformation(); + HLoopInformation* loop2 = loop2_header->GetLoopInformation(); + HLoopInformation* loop3 = loop3_header->GetLoopInformation(); + HLoopInformation* loop4 = loop4_header->GetLoopInformation(); + HLoopInformation* loop5 = loop5_header->GetLoopInformation(); + + EXPECT_TRUE(loop1->IsIn(*loop1)); + EXPECT_TRUE(loop2->IsIn(*loop1)); + EXPECT_TRUE(loop3->IsIn(*loop1)); + EXPECT_TRUE(loop3->IsIn(*loop2)); + EXPECT_TRUE(loop4->IsIn(*loop1)); + + EXPECT_FALSE(loop5->IsIn(*loop1)); + EXPECT_FALSE(loop4->IsIn(*loop2)); + EXPECT_FALSE(loop4->IsIn(*loop3)); + + EXPECT_EQ(loop1->GetPreHeader()->GetLoopInformation(), nullptr); + EXPECT_EQ(loop4->GetPreHeader()->GetLoopInformation(), loop1); + + EXPECT_EQ(FindCommonLoopCheck(nullptr, nullptr), nullptr); + EXPECT_EQ(FindCommonLoopCheck(loop2, nullptr), nullptr); + + EXPECT_EQ(FindCommonLoopCheck(loop1, loop1), loop1); + EXPECT_EQ(FindCommonLoopCheck(loop1, loop2), loop1); + EXPECT_EQ(FindCommonLoopCheck(loop1, loop3), loop1); + EXPECT_EQ(FindCommonLoopCheck(loop1, loop4), loop1); + EXPECT_EQ(FindCommonLoopCheck(loop1, loop5), nullptr); + + EXPECT_EQ(FindCommonLoopCheck(loop2, loop3), loop2); + EXPECT_EQ(FindCommonLoopCheck(loop2, loop4), loop1); + EXPECT_EQ(FindCommonLoopCheck(loop2, loop5), nullptr); + + EXPECT_EQ(FindCommonLoopCheck(loop3, loop4), loop1); + EXPECT_EQ(FindCommonLoopCheck(loop3, loop5), nullptr); + + EXPECT_EQ(FindCommonLoopCheck(loop4, loop5), nullptr); + + EXPECT_EQ(FindCommonLoopCheck(loop5, loop5), loop5); +} + } // namespace art diff --git a/compiler/optimizing/x86_memory_gen.cc b/compiler/optimizing/x86_memory_gen.cc index 0271850f29..b1abcf6747 100644 --- a/compiler/optimizing/x86_memory_gen.cc +++ b/compiler/optimizing/x86_memory_gen.cc @@ -31,7 +31,7 @@ class MemoryOperandVisitor : public HGraphVisitor { do_implicit_null_checks_(do_implicit_null_checks) {} private: - void VisitBoundsCheck(HBoundsCheck* check) OVERRIDE { + void VisitBoundsCheck(HBoundsCheck* check) override { // Replace the length by the array itself, so that we can do compares to memory. HArrayLength* array_len = check->InputAt(1)->AsArrayLength(); @@ -76,9 +76,10 @@ X86MemoryOperandGeneration::X86MemoryOperandGeneration(HGraph* graph, do_implicit_null_checks_(codegen->GetCompilerOptions().GetImplicitNullChecks()) { } -void X86MemoryOperandGeneration::Run() { +bool X86MemoryOperandGeneration::Run() { MemoryOperandVisitor visitor(graph_, do_implicit_null_checks_); visitor.VisitInsertionOrder(); + return true; } } // namespace x86 diff --git a/compiler/optimizing/x86_memory_gen.h b/compiler/optimizing/x86_memory_gen.h index 5f15d9f1e6..3f4178d58a 100644 --- a/compiler/optimizing/x86_memory_gen.h +++ b/compiler/optimizing/x86_memory_gen.h @@ -31,7 +31,7 @@ class X86MemoryOperandGeneration : public HOptimization { CodeGenerator* codegen, OptimizingCompilerStats* stats); - void Run() OVERRIDE; + bool Run() override; static constexpr const char* kX86MemoryOperandGenerationPassName = "x86_memory_operand_generation"; diff --git a/compiler/trampolines/trampoline_compiler.cc b/compiler/trampolines/trampoline_compiler.cc index 921d401849..26aa434c0d 100644 --- a/compiler/trampolines/trampoline_compiler.cc +++ b/compiler/trampolines/trampoline_compiler.cc @@ -17,7 +17,8 @@ #include "trampoline_compiler.h" #include "base/arena_allocator.h" -#include "jni_env_ext.h" +#include "base/malloc_arena_pool.h" +#include "jni/jni_env_ext.h" #ifdef ART_ENABLE_CODEGEN_arm #include "utils/arm/assembler_arm_vixl.h" @@ -243,7 +244,7 @@ static std::unique_ptr<const std::vector<uint8_t>> CreateTrampoline(ArenaAllocat std::unique_ptr<const std::vector<uint8_t>> CreateTrampoline64(InstructionSet isa, EntryPointCallingConvention abi, ThreadOffset64 offset) { - ArenaPool pool; + MallocArenaPool pool; ArenaAllocator allocator(&pool); switch (isa) { #ifdef ART_ENABLE_CODEGEN_arm64 @@ -269,7 +270,7 @@ std::unique_ptr<const std::vector<uint8_t>> CreateTrampoline64(InstructionSet is std::unique_ptr<const std::vector<uint8_t>> CreateTrampoline32(InstructionSet isa, EntryPointCallingConvention abi, ThreadOffset32 offset) { - ArenaPool pool; + MallocArenaPool pool; ArenaAllocator allocator(&pool); switch (isa) { #ifdef ART_ENABLE_CODEGEN_arm diff --git a/compiler/trampolines/trampoline_compiler.h b/compiler/trampolines/trampoline_compiler.h index 64c1eb5022..f0086b58d5 100644 --- a/compiler/trampolines/trampoline_compiler.h +++ b/compiler/trampolines/trampoline_compiler.h @@ -18,13 +18,23 @@ #define ART_COMPILER_TRAMPOLINES_TRAMPOLINE_COMPILER_H_ #include <stdint.h> +#include <memory> #include <vector> -#include "driver/compiler_driver.h" +#include "arch/instruction_set.h" #include "offsets.h" namespace art { +enum EntryPointCallingConvention { + // ABI of invocations to a method's interpreter entry point. + kInterpreterAbi, + // ABI of calls to a method's native code, only used for native methods. + kJniAbi, + // ABI of calls to a method's quick code entry point. + kQuickAbi +}; + // Create code that will invoke the function held in thread local storage. std::unique_ptr<const std::vector<uint8_t>> CreateTrampoline32(InstructionSet isa, EntryPointCallingConvention abi, diff --git a/compiler/utils/arm/assembler_arm_vixl.cc b/compiler/utils/arm/assembler_arm_vixl.cc index 05250a4157..77f5d7081a 100644 --- a/compiler/utils/arm/assembler_arm_vixl.cc +++ b/compiler/utils/arm/assembler_arm_vixl.cc @@ -26,9 +26,6 @@ using namespace vixl::aarch32; // NOLINT(build/namespaces) -using vixl::ExactAssemblyScope; -using vixl::CodeBufferCheckScope; - namespace art { namespace arm { @@ -94,7 +91,7 @@ void ArmVIXLAssembler::GenerateMarkingRegisterCheck(vixl32::Register temp, int c ___ Ldr(temp, MemOperand(tr, Thread::IsGcMarkingOffset<kArmPointerSize>().Int32Value())); // Check that mr == self.tls32_.is.gc_marking. ___ Cmp(mr, temp); - ___ B(eq, &mr_is_ok, /* far_target */ false); + ___ B(eq, &mr_is_ok, /* is_far_target= */ false); ___ Bkpt(code); ___ Bind(&mr_is_ok); } diff --git a/compiler/utils/arm/assembler_arm_vixl.h b/compiler/utils/arm/assembler_arm_vixl.h index b0310f2fb6..98c0191679 100644 --- a/compiler/utils/arm/assembler_arm_vixl.h +++ b/compiler/utils/arm/assembler_arm_vixl.h @@ -39,7 +39,7 @@ namespace vixl32 = vixl::aarch32; namespace art { namespace arm { -class ArmVIXLMacroAssembler FINAL : public vixl32::MacroAssembler { +class ArmVIXLMacroAssembler final : public vixl32::MacroAssembler { public: // Most methods fit in a 1KB code buffer, which results in more optimal alloc/realloc and // fewer system calls than a larger default capacity. @@ -149,7 +149,7 @@ class ArmVIXLMacroAssembler FINAL : public vixl32::MacroAssembler { using MacroAssembler::Vmov; }; -class ArmVIXLAssembler FINAL : public Assembler { +class ArmVIXLAssembler final : public Assembler { private: class ArmException; public: @@ -161,19 +161,19 @@ class ArmVIXLAssembler FINAL : public Assembler { virtual ~ArmVIXLAssembler() {} ArmVIXLMacroAssembler* GetVIXLAssembler() { return &vixl_masm_; } - void FinalizeCode() OVERRIDE; + void FinalizeCode() override; // Size of generated code. - size_t CodeSize() const OVERRIDE; - const uint8_t* CodeBufferBaseAddress() const OVERRIDE; + size_t CodeSize() const override; + const uint8_t* CodeBufferBaseAddress() const override; // Copy instructions out of assembly buffer into the given region of memory. - void FinalizeInstructions(const MemoryRegion& region) OVERRIDE; + void FinalizeInstructions(const MemoryRegion& region) override; - void Bind(Label* label ATTRIBUTE_UNUSED) OVERRIDE { + void Bind(Label* label ATTRIBUTE_UNUSED) override { UNIMPLEMENTED(FATAL) << "Do not use Bind for ARM"; } - void Jump(Label* label ATTRIBUTE_UNUSED) OVERRIDE { + void Jump(Label* label ATTRIBUTE_UNUSED) override { UNIMPLEMENTED(FATAL) << "Do not use Jump for ARM"; } diff --git a/compiler/utils/arm/constants_arm.h b/compiler/utils/arm/constants_arm.h index 66252bed86..3e316c8e84 100644 --- a/compiler/utils/arm/constants_arm.h +++ b/compiler/utils/arm/constants_arm.h @@ -25,7 +25,7 @@ #include "arch/arm/registers_arm.h" #include "base/casts.h" -#include "globals.h" +#include "base/globals.h" namespace art { namespace arm { diff --git a/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc b/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc index 065c3de23c..c6c764e3a9 100644 --- a/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc +++ b/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc @@ -37,6 +37,29 @@ namespace arm { #define ___ asm_.GetVIXLAssembler()-> #endif +vixl::aarch32::Register AsVIXLRegister(ArmManagedRegister reg) { + CHECK(reg.IsCoreRegister()); + return vixl::aarch32::Register(reg.RegId()); +} + +static inline vixl::aarch32::SRegister AsVIXLSRegister(ArmManagedRegister reg) { + CHECK(reg.IsSRegister()); + return vixl::aarch32::SRegister(reg.RegId() - kNumberOfCoreRegIds); +} + +static inline vixl::aarch32::DRegister AsVIXLDRegister(ArmManagedRegister reg) { + CHECK(reg.IsDRegister()); + return vixl::aarch32::DRegister(reg.RegId() - kNumberOfCoreRegIds - kNumberOfSRegIds); +} + +static inline vixl::aarch32::Register AsVIXLRegisterPairLow(ArmManagedRegister reg) { + return vixl::aarch32::Register(reg.AsRegisterPairLow()); +} + +static inline vixl::aarch32::Register AsVIXLRegisterPairHigh(ArmManagedRegister reg) { + return vixl::aarch32::Register(reg.AsRegisterPairHigh()); +} + void ArmVIXLJNIMacroAssembler::FinalizeCode() { for (const std::unique_ptr< ArmVIXLJNIMacroAssembler::ArmException>& exception : exception_blocks_) { @@ -60,7 +83,7 @@ void ArmVIXLJNIMacroAssembler::BuildFrame(size_t frame_size, ArrayRef<const ManagedRegister> callee_save_regs, const ManagedRegisterEntrySpills& entry_spills) { CHECK_ALIGNED(frame_size, kStackAlignment); - CHECK(r0.Is(method_reg.AsArm().AsVIXLRegister())); + CHECK(r0.Is(AsVIXLRegister(method_reg.AsArm()))); // Push callee saves and link register. RegList core_spill_mask = 1 << LR; @@ -97,20 +120,19 @@ void ArmVIXLJNIMacroAssembler::BuildFrame(size_t frame_size, // Write out entry spills. int32_t offset = frame_size + kFramePointerSize; - for (size_t i = 0; i < entry_spills.size(); ++i) { - ArmManagedRegister reg = entry_spills.at(i).AsArm(); + for (const ManagedRegisterSpill& spill : entry_spills) { + ArmManagedRegister reg = spill.AsArm(); if (reg.IsNoRegister()) { // only increment stack offset. - ManagedRegisterSpill spill = entry_spills.at(i); offset += spill.getSize(); } else if (reg.IsCoreRegister()) { - asm_.StoreToOffset(kStoreWord, reg.AsVIXLRegister(), sp, offset); + asm_.StoreToOffset(kStoreWord, AsVIXLRegister(reg), sp, offset); offset += 4; } else if (reg.IsSRegister()) { - asm_.StoreSToOffset(reg.AsVIXLSRegister(), sp, offset); + asm_.StoreSToOffset(AsVIXLSRegister(reg), sp, offset); offset += 4; } else if (reg.IsDRegister()) { - asm_.StoreDToOffset(reg.AsVIXLDRegister(), sp, offset); + asm_.StoreDToOffset(AsVIXLDRegister(reg), sp, offset); offset += 8; } } @@ -208,76 +230,71 @@ void ArmVIXLJNIMacroAssembler::Store(FrameOffset dest, ManagedRegister m_src, si } else if (src.IsCoreRegister()) { CHECK_EQ(4u, size); UseScratchRegisterScope temps(asm_.GetVIXLAssembler()); - temps.Exclude(src.AsVIXLRegister()); - asm_.StoreToOffset(kStoreWord, src.AsVIXLRegister(), sp, dest.Int32Value()); + temps.Exclude(AsVIXLRegister(src)); + asm_.StoreToOffset(kStoreWord, AsVIXLRegister(src), sp, dest.Int32Value()); } else if (src.IsRegisterPair()) { CHECK_EQ(8u, size); - asm_.StoreToOffset(kStoreWord, src.AsVIXLRegisterPairLow(), sp, dest.Int32Value()); - asm_.StoreToOffset(kStoreWord, src.AsVIXLRegisterPairHigh(), sp, dest.Int32Value() + 4); + asm_.StoreToOffset(kStoreWord, AsVIXLRegisterPairLow(src), sp, dest.Int32Value()); + asm_.StoreToOffset(kStoreWord, AsVIXLRegisterPairHigh(src), sp, dest.Int32Value() + 4); } else if (src.IsSRegister()) { CHECK_EQ(4u, size); - asm_.StoreSToOffset(src.AsVIXLSRegister(), sp, dest.Int32Value()); + asm_.StoreSToOffset(AsVIXLSRegister(src), sp, dest.Int32Value()); } else { CHECK_EQ(8u, size); CHECK(src.IsDRegister()) << src; - asm_.StoreDToOffset(src.AsVIXLDRegister(), sp, dest.Int32Value()); + asm_.StoreDToOffset(AsVIXLDRegister(src), sp, dest.Int32Value()); } } void ArmVIXLJNIMacroAssembler::StoreRef(FrameOffset dest, ManagedRegister msrc) { - ArmManagedRegister src = msrc.AsArm(); - CHECK(src.IsCoreRegister()) << src; + vixl::aarch32::Register src = AsVIXLRegister(msrc.AsArm()); UseScratchRegisterScope temps(asm_.GetVIXLAssembler()); - temps.Exclude(src.AsVIXLRegister()); - asm_.StoreToOffset(kStoreWord, src.AsVIXLRegister(), sp, dest.Int32Value()); + temps.Exclude(src); + asm_.StoreToOffset(kStoreWord, src, sp, dest.Int32Value()); } void ArmVIXLJNIMacroAssembler::StoreRawPtr(FrameOffset dest, ManagedRegister msrc) { - ArmManagedRegister src = msrc.AsArm(); - CHECK(src.IsCoreRegister()) << src; + vixl::aarch32::Register src = AsVIXLRegister(msrc.AsArm()); UseScratchRegisterScope temps(asm_.GetVIXLAssembler()); - temps.Exclude(src.AsVIXLRegister()); - asm_.StoreToOffset(kStoreWord, src.AsVIXLRegister(), sp, dest.Int32Value()); + temps.Exclude(src); + asm_.StoreToOffset(kStoreWord, src, sp, dest.Int32Value()); } void ArmVIXLJNIMacroAssembler::StoreSpanning(FrameOffset dest, ManagedRegister msrc, FrameOffset in_off, ManagedRegister mscratch) { - ArmManagedRegister src = msrc.AsArm(); - ArmManagedRegister scratch = mscratch.AsArm(); - asm_.StoreToOffset(kStoreWord, src.AsVIXLRegister(), sp, dest.Int32Value()); + vixl::aarch32::Register src = AsVIXLRegister(msrc.AsArm()); + vixl::aarch32::Register scratch = AsVIXLRegister(mscratch.AsArm()); + asm_.StoreToOffset(kStoreWord, src, sp, dest.Int32Value()); UseScratchRegisterScope temps(asm_.GetVIXLAssembler()); - temps.Exclude(scratch.AsVIXLRegister()); - asm_.LoadFromOffset(kLoadWord, scratch.AsVIXLRegister(), sp, in_off.Int32Value()); - asm_.StoreToOffset(kStoreWord, scratch.AsVIXLRegister(), sp, dest.Int32Value() + 4); + temps.Exclude(scratch); + asm_.LoadFromOffset(kLoadWord, scratch, sp, in_off.Int32Value()); + asm_.StoreToOffset(kStoreWord, scratch, sp, dest.Int32Value() + 4); } void ArmVIXLJNIMacroAssembler::CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister mscratch) { - ArmManagedRegister scratch = mscratch.AsArm(); + vixl::aarch32::Register scratch = AsVIXLRegister(mscratch.AsArm()); UseScratchRegisterScope temps(asm_.GetVIXLAssembler()); - temps.Exclude(scratch.AsVIXLRegister()); - asm_.LoadFromOffset(kLoadWord, scratch.AsVIXLRegister(), sp, src.Int32Value()); - asm_.StoreToOffset(kStoreWord, scratch.AsVIXLRegister(), sp, dest.Int32Value()); + temps.Exclude(scratch); + asm_.LoadFromOffset(kLoadWord, scratch, sp, src.Int32Value()); + asm_.StoreToOffset(kStoreWord, scratch, sp, dest.Int32Value()); } -void ArmVIXLJNIMacroAssembler::LoadRef(ManagedRegister dest, - ManagedRegister base, +void ArmVIXLJNIMacroAssembler::LoadRef(ManagedRegister mdest, + ManagedRegister mbase, MemberOffset offs, bool unpoison_reference) { - ArmManagedRegister dst = dest.AsArm(); - CHECK(dst.IsCoreRegister() && dst.IsCoreRegister()) << dst; + vixl::aarch32::Register dest = AsVIXLRegister(mdest.AsArm()); + vixl::aarch32::Register base = AsVIXLRegister(mbase.AsArm()); UseScratchRegisterScope temps(asm_.GetVIXLAssembler()); - temps.Exclude(dst.AsVIXLRegister(), base.AsArm().AsVIXLRegister()); - asm_.LoadFromOffset(kLoadWord, - dst.AsVIXLRegister(), - base.AsArm().AsVIXLRegister(), - offs.Int32Value()); + temps.Exclude(dest, base); + asm_.LoadFromOffset(kLoadWord, dest, base, offs.Int32Value()); if (unpoison_reference) { - asm_.MaybeUnpoisonHeapReference(dst.AsVIXLRegister()); + asm_.MaybeUnpoisonHeapReference(dest); } } @@ -294,13 +311,12 @@ void ArmVIXLJNIMacroAssembler::LoadRawPtr(ManagedRegister dest ATTRIBUTE_UNUSED, void ArmVIXLJNIMacroAssembler::StoreImmediateToFrame(FrameOffset dest, uint32_t imm, - ManagedRegister scratch) { - ArmManagedRegister mscratch = scratch.AsArm(); - CHECK(mscratch.IsCoreRegister()) << mscratch; + ManagedRegister mscratch) { + vixl::aarch32::Register scratch = AsVIXLRegister(mscratch.AsArm()); UseScratchRegisterScope temps(asm_.GetVIXLAssembler()); - temps.Exclude(mscratch.AsVIXLRegister()); - asm_.LoadImmediate(mscratch.AsVIXLRegister(), imm); - asm_.StoreToOffset(kStoreWord, mscratch.AsVIXLRegister(), sp, dest.Int32Value()); + temps.Exclude(scratch); + asm_.LoadImmediate(scratch, imm); + asm_.StoreToOffset(kStoreWord, scratch, sp, dest.Int32Value()); } void ArmVIXLJNIMacroAssembler::Load(ManagedRegister m_dst, FrameOffset src, size_t size) { @@ -313,23 +329,21 @@ void ArmVIXLJNIMacroAssembler::LoadFromThread(ManagedRegister m_dst, return Load(m_dst.AsArm(), tr, src.Int32Value(), size); } -void ArmVIXLJNIMacroAssembler::LoadRawPtrFromThread(ManagedRegister m_dst, ThreadOffset32 offs) { - ArmManagedRegister dst = m_dst.AsArm(); - CHECK(dst.IsCoreRegister()) << dst; +void ArmVIXLJNIMacroAssembler::LoadRawPtrFromThread(ManagedRegister mdest, ThreadOffset32 offs) { + vixl::aarch32::Register dest = AsVIXLRegister(mdest.AsArm()); UseScratchRegisterScope temps(asm_.GetVIXLAssembler()); - temps.Exclude(dst.AsVIXLRegister()); - asm_.LoadFromOffset(kLoadWord, dst.AsVIXLRegister(), tr, offs.Int32Value()); + temps.Exclude(dest); + asm_.LoadFromOffset(kLoadWord, dest, tr, offs.Int32Value()); } void ArmVIXLJNIMacroAssembler::CopyRawPtrFromThread(FrameOffset fr_offs, ThreadOffset32 thr_offs, ManagedRegister mscratch) { - ArmManagedRegister scratch = mscratch.AsArm(); - CHECK(scratch.IsCoreRegister()) << scratch; + vixl::aarch32::Register scratch = AsVIXLRegister(mscratch.AsArm()); UseScratchRegisterScope temps(asm_.GetVIXLAssembler()); - temps.Exclude(scratch.AsVIXLRegister()); - asm_.LoadFromOffset(kLoadWord, scratch.AsVIXLRegister(), tr, thr_offs.Int32Value()); - asm_.StoreToOffset(kStoreWord, scratch.AsVIXLRegister(), sp, fr_offs.Int32Value()); + temps.Exclude(scratch); + asm_.LoadFromOffset(kLoadWord, scratch, tr, thr_offs.Int32Value()); + asm_.StoreToOffset(kStoreWord, scratch, sp, fr_offs.Int32Value()); } void ArmVIXLJNIMacroAssembler::CopyRawPtrToThread(ThreadOffset32 thr_offs ATTRIBUTE_UNUSED, @@ -341,12 +355,11 @@ void ArmVIXLJNIMacroAssembler::CopyRawPtrToThread(ThreadOffset32 thr_offs ATTRIB void ArmVIXLJNIMacroAssembler::StoreStackOffsetToThread(ThreadOffset32 thr_offs, FrameOffset fr_offs, ManagedRegister mscratch) { - ArmManagedRegister scratch = mscratch.AsArm(); - CHECK(scratch.IsCoreRegister()) << scratch; + vixl::aarch32::Register scratch = AsVIXLRegister(mscratch.AsArm()); UseScratchRegisterScope temps(asm_.GetVIXLAssembler()); - temps.Exclude(scratch.AsVIXLRegister()); - asm_.AddConstant(scratch.AsVIXLRegister(), sp, fr_offs.Int32Value()); - asm_.StoreToOffset(kStoreWord, scratch.AsVIXLRegister(), tr, thr_offs.Int32Value()); + temps.Exclude(scratch); + asm_.AddConstant(scratch, sp, fr_offs.Int32Value()); + asm_.StoreToOffset(kStoreWord, scratch, tr, thr_offs.Int32Value()); } void ArmVIXLJNIMacroAssembler::StoreStackPointerToThread(ThreadOffset32 thr_offs) { @@ -363,43 +376,43 @@ void ArmVIXLJNIMacroAssembler::ZeroExtend(ManagedRegister mreg ATTRIBUTE_UNUSED, UNIMPLEMENTED(FATAL) << "no zero extension necessary for arm"; } -void ArmVIXLJNIMacroAssembler::Move(ManagedRegister m_dst, - ManagedRegister m_src, +void ArmVIXLJNIMacroAssembler::Move(ManagedRegister mdst, + ManagedRegister msrc, size_t size ATTRIBUTE_UNUSED) { - ArmManagedRegister dst = m_dst.AsArm(); - ArmManagedRegister src = m_src.AsArm(); + ArmManagedRegister dst = mdst.AsArm(); + ArmManagedRegister src = msrc.AsArm(); if (!dst.Equals(src)) { if (dst.IsCoreRegister()) { CHECK(src.IsCoreRegister()) << src; UseScratchRegisterScope temps(asm_.GetVIXLAssembler()); - temps.Exclude(dst.AsVIXLRegister()); - ___ Mov(dst.AsVIXLRegister(), src.AsVIXLRegister()); + temps.Exclude(AsVIXLRegister(dst)); + ___ Mov(AsVIXLRegister(dst), AsVIXLRegister(src)); } else if (dst.IsDRegister()) { if (src.IsDRegister()) { - ___ Vmov(F64, dst.AsVIXLDRegister(), src.AsVIXLDRegister()); + ___ Vmov(F64, AsVIXLDRegister(dst), AsVIXLDRegister(src)); } else { // VMOV Dn, Rlo, Rhi (Dn = {Rlo, Rhi}) CHECK(src.IsRegisterPair()) << src; - ___ Vmov(dst.AsVIXLDRegister(), src.AsVIXLRegisterPairLow(), src.AsVIXLRegisterPairHigh()); + ___ Vmov(AsVIXLDRegister(dst), AsVIXLRegisterPairLow(src), AsVIXLRegisterPairHigh(src)); } } else if (dst.IsSRegister()) { if (src.IsSRegister()) { - ___ Vmov(F32, dst.AsVIXLSRegister(), src.AsVIXLSRegister()); + ___ Vmov(F32, AsVIXLSRegister(dst), AsVIXLSRegister(src)); } else { // VMOV Sn, Rn (Sn = Rn) CHECK(src.IsCoreRegister()) << src; - ___ Vmov(dst.AsVIXLSRegister(), src.AsVIXLRegister()); + ___ Vmov(AsVIXLSRegister(dst), AsVIXLRegister(src)); } } else { CHECK(dst.IsRegisterPair()) << dst; CHECK(src.IsRegisterPair()) << src; // Ensure that the first move doesn't clobber the input of the second. if (src.AsRegisterPairHigh() != dst.AsRegisterPairLow()) { - ___ Mov(dst.AsVIXLRegisterPairLow(), src.AsVIXLRegisterPairLow()); - ___ Mov(dst.AsVIXLRegisterPairHigh(), src.AsVIXLRegisterPairHigh()); + ___ Mov(AsVIXLRegisterPairLow(dst), AsVIXLRegisterPairLow(src)); + ___ Mov(AsVIXLRegisterPairHigh(dst), AsVIXLRegisterPairHigh(src)); } else { - ___ Mov(dst.AsVIXLRegisterPairHigh(), src.AsVIXLRegisterPairHigh()); - ___ Mov(dst.AsVIXLRegisterPairLow(), src.AsVIXLRegisterPairLow()); + ___ Mov(AsVIXLRegisterPairHigh(dst), AsVIXLRegisterPairHigh(src)); + ___ Mov(AsVIXLRegisterPairLow(dst), AsVIXLRegisterPairLow(src)); } } } @@ -407,21 +420,20 @@ void ArmVIXLJNIMacroAssembler::Move(ManagedRegister m_dst, void ArmVIXLJNIMacroAssembler::Copy(FrameOffset dest, FrameOffset src, - ManagedRegister scratch, + ManagedRegister mscratch, size_t size) { - ArmManagedRegister temp = scratch.AsArm(); - CHECK(temp.IsCoreRegister()) << temp; + vixl::aarch32::Register scratch = AsVIXLRegister(mscratch.AsArm()); CHECK(size == 4 || size == 8) << size; UseScratchRegisterScope temps(asm_.GetVIXLAssembler()); - temps.Exclude(temp.AsVIXLRegister()); + temps.Exclude(scratch); if (size == 4) { - asm_.LoadFromOffset(kLoadWord, temp.AsVIXLRegister(), sp, src.Int32Value()); - asm_.StoreToOffset(kStoreWord, temp.AsVIXLRegister(), sp, dest.Int32Value()); + asm_.LoadFromOffset(kLoadWord, scratch, sp, src.Int32Value()); + asm_.StoreToOffset(kStoreWord, scratch, sp, dest.Int32Value()); } else if (size == 8) { - asm_.LoadFromOffset(kLoadWord, temp.AsVIXLRegister(), sp, src.Int32Value()); - asm_.StoreToOffset(kStoreWord, temp.AsVIXLRegister(), sp, dest.Int32Value()); - asm_.LoadFromOffset(kLoadWord, temp.AsVIXLRegister(), sp, src.Int32Value() + 4); - asm_.StoreToOffset(kStoreWord, temp.AsVIXLRegister(), sp, dest.Int32Value() + 4); + asm_.LoadFromOffset(kLoadWord, scratch, sp, src.Int32Value()); + asm_.StoreToOffset(kStoreWord, scratch, sp, dest.Int32Value()); + asm_.LoadFromOffset(kLoadWord, scratch, sp, src.Int32Value() + 4); + asm_.StoreToOffset(kStoreWord, scratch, sp, dest.Int32Value() + 4); } } @@ -471,48 +483,44 @@ void ArmVIXLJNIMacroAssembler::CreateHandleScopeEntry(ManagedRegister mout_reg, FrameOffset handle_scope_offset, ManagedRegister min_reg, bool null_allowed) { - ArmManagedRegister out_reg = mout_reg.AsArm(); - ArmManagedRegister in_reg = min_reg.AsArm(); - CHECK(in_reg.IsNoRegister() || in_reg.IsCoreRegister()) << in_reg; - CHECK(out_reg.IsCoreRegister()) << out_reg; + vixl::aarch32::Register out_reg = AsVIXLRegister(mout_reg.AsArm()); + vixl::aarch32::Register in_reg = + min_reg.AsArm().IsNoRegister() ? vixl::aarch32::Register() : AsVIXLRegister(min_reg.AsArm()); UseScratchRegisterScope temps(asm_.GetVIXLAssembler()); - temps.Exclude(out_reg.AsVIXLRegister()); + temps.Exclude(out_reg); if (null_allowed) { // Null values get a handle scope entry value of 0. Otherwise, the handle scope entry is // the address in the handle scope holding the reference. // e.g. out_reg = (handle == 0) ? 0 : (SP+handle_offset) - if (in_reg.IsNoRegister()) { - asm_.LoadFromOffset(kLoadWord, - out_reg.AsVIXLRegister(), - sp, - handle_scope_offset.Int32Value()); + if (!in_reg.IsValid()) { + asm_.LoadFromOffset(kLoadWord, out_reg, sp, handle_scope_offset.Int32Value()); in_reg = out_reg; } - temps.Exclude(in_reg.AsVIXLRegister()); - ___ Cmp(in_reg.AsVIXLRegister(), 0); + temps.Exclude(in_reg); + ___ Cmp(in_reg, 0); if (asm_.ShifterOperandCanHold(ADD, handle_scope_offset.Int32Value())) { - if (!out_reg.Equals(in_reg)) { + if (!out_reg.Is(in_reg)) { ExactAssemblyScope guard(asm_.GetVIXLAssembler(), 3 * vixl32::kMaxInstructionSizeInBytes, CodeBufferCheckScope::kMaximumSize); ___ it(eq, 0xc); - ___ mov(eq, out_reg.AsVIXLRegister(), 0); - asm_.AddConstantInIt(out_reg.AsVIXLRegister(), sp, handle_scope_offset.Int32Value(), ne); + ___ mov(eq, out_reg, 0); + asm_.AddConstantInIt(out_reg, sp, handle_scope_offset.Int32Value(), ne); } else { ExactAssemblyScope guard(asm_.GetVIXLAssembler(), 2 * vixl32::kMaxInstructionSizeInBytes, CodeBufferCheckScope::kMaximumSize); ___ it(ne, 0x8); - asm_.AddConstantInIt(out_reg.AsVIXLRegister(), sp, handle_scope_offset.Int32Value(), ne); + asm_.AddConstantInIt(out_reg, sp, handle_scope_offset.Int32Value(), ne); } } else { // TODO: Implement this (old arm assembler would have crashed here). UNIMPLEMENTED(FATAL); } } else { - asm_.AddConstant(out_reg.AsVIXLRegister(), sp, handle_scope_offset.Int32Value()); + asm_.AddConstant(out_reg, sp, handle_scope_offset.Int32Value()); } } @@ -520,31 +528,30 @@ void ArmVIXLJNIMacroAssembler::CreateHandleScopeEntry(FrameOffset out_off, FrameOffset handle_scope_offset, ManagedRegister mscratch, bool null_allowed) { - ArmManagedRegister scratch = mscratch.AsArm(); - CHECK(scratch.IsCoreRegister()) << scratch; + vixl::aarch32::Register scratch = AsVIXLRegister(mscratch.AsArm()); UseScratchRegisterScope temps(asm_.GetVIXLAssembler()); - temps.Exclude(scratch.AsVIXLRegister()); + temps.Exclude(scratch); if (null_allowed) { - asm_.LoadFromOffset(kLoadWord, scratch.AsVIXLRegister(), sp, handle_scope_offset.Int32Value()); + asm_.LoadFromOffset(kLoadWord, scratch, sp, handle_scope_offset.Int32Value()); // Null values get a handle scope entry value of 0. Otherwise, the handle scope entry is // the address in the handle scope holding the reference. // e.g. scratch = (scratch == 0) ? 0 : (SP+handle_scope_offset) - ___ Cmp(scratch.AsVIXLRegister(), 0); + ___ Cmp(scratch, 0); if (asm_.ShifterOperandCanHold(ADD, handle_scope_offset.Int32Value())) { ExactAssemblyScope guard(asm_.GetVIXLAssembler(), 2 * vixl32::kMaxInstructionSizeInBytes, CodeBufferCheckScope::kMaximumSize); ___ it(ne, 0x8); - asm_.AddConstantInIt(scratch.AsVIXLRegister(), sp, handle_scope_offset.Int32Value(), ne); + asm_.AddConstantInIt(scratch, sp, handle_scope_offset.Int32Value(), ne); } else { // TODO: Implement this (old arm assembler would have crashed here). UNIMPLEMENTED(FATAL); } } else { - asm_.AddConstant(scratch.AsVIXLRegister(), sp, handle_scope_offset.Int32Value()); + asm_.AddConstant(scratch, sp, handle_scope_offset.Int32Value()); } - asm_.StoreToOffset(kStoreWord, scratch.AsVIXLRegister(), sp, out_off.Int32Value()); + asm_.StoreToOffset(kStoreWord, scratch, sp, out_off.Int32Value()); } void ArmVIXLJNIMacroAssembler::LoadReferenceFromHandleScope( @@ -566,32 +573,23 @@ void ArmVIXLJNIMacroAssembler::VerifyObject(FrameOffset src ATTRIBUTE_UNUSED, void ArmVIXLJNIMacroAssembler::Call(ManagedRegister mbase, Offset offset, ManagedRegister mscratch) { - ArmManagedRegister base = mbase.AsArm(); - ArmManagedRegister scratch = mscratch.AsArm(); - CHECK(base.IsCoreRegister()) << base; - CHECK(scratch.IsCoreRegister()) << scratch; + vixl::aarch32::Register base = AsVIXLRegister(mbase.AsArm()); + vixl::aarch32::Register scratch = AsVIXLRegister(mscratch.AsArm()); UseScratchRegisterScope temps(asm_.GetVIXLAssembler()); - temps.Exclude(scratch.AsVIXLRegister()); - asm_.LoadFromOffset(kLoadWord, - scratch.AsVIXLRegister(), - base.AsVIXLRegister(), - offset.Int32Value()); - ___ Blx(scratch.AsVIXLRegister()); + temps.Exclude(scratch); + asm_.LoadFromOffset(kLoadWord, scratch, base, offset.Int32Value()); + ___ Blx(scratch); // TODO: place reference map on call. } void ArmVIXLJNIMacroAssembler::Call(FrameOffset base, Offset offset, ManagedRegister mscratch) { - ArmManagedRegister scratch = mscratch.AsArm(); - CHECK(scratch.IsCoreRegister()) << scratch; + vixl::aarch32::Register scratch = AsVIXLRegister(mscratch.AsArm()); UseScratchRegisterScope temps(asm_.GetVIXLAssembler()); - temps.Exclude(scratch.AsVIXLRegister()); + temps.Exclude(scratch); // Call *(*(SP + base) + offset) - asm_.LoadFromOffset(kLoadWord, scratch.AsVIXLRegister(), sp, base.Int32Value()); - asm_.LoadFromOffset(kLoadWord, - scratch.AsVIXLRegister(), - scratch.AsVIXLRegister(), - offset.Int32Value()); - ___ Blx(scratch.AsVIXLRegister()); + asm_.LoadFromOffset(kLoadWord, scratch, sp, base.Int32Value()); + asm_.LoadFromOffset(kLoadWord, scratch, scratch, offset.Int32Value()); + ___ Blx(scratch); // TODO: place reference map on call } @@ -602,8 +600,8 @@ void ArmVIXLJNIMacroAssembler::CallFromThread(ThreadOffset32 offset ATTRIBUTE_UN void ArmVIXLJNIMacroAssembler::GetCurrentThread(ManagedRegister mtr) { UseScratchRegisterScope temps(asm_.GetVIXLAssembler()); - temps.Exclude(mtr.AsArm().AsVIXLRegister()); - ___ Mov(mtr.AsArm().AsVIXLRegister(), tr); + temps.Exclude(AsVIXLRegister(mtr.AsArm())); + ___ Mov(AsVIXLRegister(mtr.AsArm()), tr); } void ArmVIXLJNIMacroAssembler::GetCurrentThread(FrameOffset dest_offset, @@ -611,19 +609,19 @@ void ArmVIXLJNIMacroAssembler::GetCurrentThread(FrameOffset dest_offset, asm_.StoreToOffset(kStoreWord, tr, sp, dest_offset.Int32Value()); } -void ArmVIXLJNIMacroAssembler::ExceptionPoll(ManagedRegister m_scratch, size_t stack_adjust) { +void ArmVIXLJNIMacroAssembler::ExceptionPoll(ManagedRegister mscratch, size_t stack_adjust) { CHECK_ALIGNED(stack_adjust, kStackAlignment); - ArmManagedRegister scratch = m_scratch.AsArm(); + vixl::aarch32::Register scratch = AsVIXLRegister(mscratch.AsArm()); UseScratchRegisterScope temps(asm_.GetVIXLAssembler()); - temps.Exclude(scratch.AsVIXLRegister()); + temps.Exclude(scratch); exception_blocks_.emplace_back( - new ArmVIXLJNIMacroAssembler::ArmException(scratch, stack_adjust)); + new ArmVIXLJNIMacroAssembler::ArmException(mscratch.AsArm(), stack_adjust)); asm_.LoadFromOffset(kLoadWord, - scratch.AsVIXLRegister(), + scratch, tr, Thread::ExceptionOffset<kArmPointerSize>().Int32Value()); - ___ Cmp(scratch.AsVIXLRegister(), 0); + ___ Cmp(scratch, 0); vixl32::Label* label = exception_blocks_.back()->Entry(); ___ BPreferNear(ne, label); // TODO: think about using CBNZ here. @@ -640,19 +638,18 @@ void ArmVIXLJNIMacroAssembler::Jump(JNIMacroLabel* label) { void ArmVIXLJNIMacroAssembler::Jump(JNIMacroLabel* label, JNIMacroUnaryCondition condition, - ManagedRegister test) { + ManagedRegister mtest) { CHECK(label != nullptr); + vixl::aarch32::Register test = AsVIXLRegister(mtest.AsArm()); UseScratchRegisterScope temps(asm_.GetVIXLAssembler()); - temps.Exclude(test.AsArm().AsVIXLRegister()); + temps.Exclude(test); switch (condition) { case JNIMacroUnaryCondition::kZero: - ___ CompareAndBranchIfZero(test.AsArm().AsVIXLRegister(), - ArmVIXLJNIMacroLabel::Cast(label)->AsArm()); + ___ CompareAndBranchIfZero(test, ArmVIXLJNIMacroLabel::Cast(label)->AsArm()); break; case JNIMacroUnaryCondition::kNotZero: - ___ CompareAndBranchIfNonZero(test.AsArm().AsVIXLRegister(), - ArmVIXLJNIMacroLabel::Cast(label)->AsArm()); + ___ CompareAndBranchIfNonZero(test, ArmVIXLJNIMacroLabel::Cast(label)->AsArm()); break; default: LOG(FATAL) << "Not implemented unary condition: " << static_cast<int>(condition); @@ -672,12 +669,13 @@ void ArmVIXLJNIMacroAssembler::EmitExceptionPoll( DecreaseFrameSize(exception->stack_adjust_); } + vixl::aarch32::Register scratch = AsVIXLRegister(exception->scratch_); UseScratchRegisterScope temps(asm_.GetVIXLAssembler()); - temps.Exclude(exception->scratch_.AsVIXLRegister()); + temps.Exclude(scratch); // Pass exception object as argument. // Don't care about preserving r0 as this won't return. - ___ Mov(r0, exception->scratch_.AsVIXLRegister()); - temps.Include(exception->scratch_.AsVIXLRegister()); + ___ Mov(r0, scratch); + temps.Include(scratch); // TODO: check that exception->scratch_ is dead by this point. vixl32::Register temp = temps.Acquire(); ___ Ldr(temp, @@ -698,26 +696,27 @@ void ArmVIXLJNIMacroAssembler::Load(ArmManagedRegister if (dest.IsNoRegister()) { CHECK_EQ(0u, size) << dest; } else if (dest.IsCoreRegister()) { - CHECK(!dest.AsVIXLRegister().Is(sp)) << dest; + vixl::aarch32::Register dst = AsVIXLRegister(dest); + CHECK(!dst.Is(sp)) << dest; UseScratchRegisterScope temps(asm_.GetVIXLAssembler()); - temps.Exclude(dest.AsVIXLRegister()); + temps.Exclude(dst); if (size == 1u) { - ___ Ldrb(dest.AsVIXLRegister(), MemOperand(base, offset)); + ___ Ldrb(dst, MemOperand(base, offset)); } else { CHECK_EQ(4u, size) << dest; - ___ Ldr(dest.AsVIXLRegister(), MemOperand(base, offset)); + ___ Ldr(dst, MemOperand(base, offset)); } } else if (dest.IsRegisterPair()) { CHECK_EQ(8u, size) << dest; - ___ Ldr(dest.AsVIXLRegisterPairLow(), MemOperand(base, offset)); - ___ Ldr(dest.AsVIXLRegisterPairHigh(), MemOperand(base, offset + 4)); + ___ Ldr(AsVIXLRegisterPairLow(dest), MemOperand(base, offset)); + ___ Ldr(AsVIXLRegisterPairHigh(dest), MemOperand(base, offset + 4)); } else if (dest.IsSRegister()) { - ___ Vldr(dest.AsVIXLSRegister(), MemOperand(base, offset)); + ___ Vldr(AsVIXLSRegister(dest), MemOperand(base, offset)); } else { CHECK(dest.IsDRegister()) << dest; - ___ Vldr(dest.AsVIXLDRegister(), MemOperand(base, offset)); + ___ Vldr(AsVIXLDRegister(dest), MemOperand(base, offset)); } } diff --git a/compiler/utils/arm/jni_macro_assembler_arm_vixl.h b/compiler/utils/arm/jni_macro_assembler_arm_vixl.h index 4bc5d69f4d..0b1b6d2ba9 100644 --- a/compiler/utils/arm/jni_macro_assembler_arm_vixl.h +++ b/compiler/utils/arm/jni_macro_assembler_arm_vixl.h @@ -32,7 +32,7 @@ namespace art { namespace arm { -class ArmVIXLJNIMacroAssembler FINAL +class ArmVIXLJNIMacroAssembler final : public JNIMacroAssemblerFwd<ArmVIXLAssembler, PointerSize::k32> { private: class ArmException; @@ -42,7 +42,7 @@ class ArmVIXLJNIMacroAssembler FINAL exception_blocks_(allocator->Adapter(kArenaAllocAssembler)) {} virtual ~ArmVIXLJNIMacroAssembler() {} - void FinalizeCode() OVERRIDE; + void FinalizeCode() override; // // Overridden common assembler high-level functionality @@ -52,109 +52,109 @@ class ArmVIXLJNIMacroAssembler FINAL void BuildFrame(size_t frame_size, ManagedRegister method_reg, ArrayRef<const ManagedRegister> callee_save_regs, - const ManagedRegisterEntrySpills& entry_spills) OVERRIDE; + const ManagedRegisterEntrySpills& entry_spills) override; // Emit code that will remove an activation from the stack. void RemoveFrame(size_t frame_size, ArrayRef<const ManagedRegister> callee_save_regs, - bool may_suspend) OVERRIDE; + bool may_suspend) override; - void IncreaseFrameSize(size_t adjust) OVERRIDE; - void DecreaseFrameSize(size_t adjust) OVERRIDE; + void IncreaseFrameSize(size_t adjust) override; + void DecreaseFrameSize(size_t adjust) override; // Store routines. - void Store(FrameOffset offs, ManagedRegister src, size_t size) OVERRIDE; - void StoreRef(FrameOffset dest, ManagedRegister src) OVERRIDE; - void StoreRawPtr(FrameOffset dest, ManagedRegister src) OVERRIDE; + void Store(FrameOffset offs, ManagedRegister src, size_t size) override; + void StoreRef(FrameOffset dest, ManagedRegister src) override; + void StoreRawPtr(FrameOffset dest, ManagedRegister src) override; - void StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister scratch) OVERRIDE; + void StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister scratch) override; void StoreStackOffsetToThread(ThreadOffset32 thr_offs, FrameOffset fr_offs, - ManagedRegister scratch) OVERRIDE; + ManagedRegister scratch) override; - void StoreStackPointerToThread(ThreadOffset32 thr_offs) OVERRIDE; + void StoreStackPointerToThread(ThreadOffset32 thr_offs) override; void StoreSpanning(FrameOffset dest, ManagedRegister src, FrameOffset in_off, - ManagedRegister scratch) OVERRIDE; + ManagedRegister scratch) override; // Load routines. - void Load(ManagedRegister dest, FrameOffset src, size_t size) OVERRIDE; + void Load(ManagedRegister dest, FrameOffset src, size_t size) override; void LoadFromThread(ManagedRegister dest, ThreadOffset32 src, - size_t size) OVERRIDE; + size_t size) override; - void LoadRef(ManagedRegister dest, FrameOffset src) OVERRIDE; + void LoadRef(ManagedRegister dest, FrameOffset src) override; void LoadRef(ManagedRegister dest, ManagedRegister base, MemberOffset offs, - bool unpoison_reference) OVERRIDE; + bool unpoison_reference) override; - void LoadRawPtr(ManagedRegister dest, ManagedRegister base, Offset offs) OVERRIDE; + void LoadRawPtr(ManagedRegister dest, ManagedRegister base, Offset offs) override; - void LoadRawPtrFromThread(ManagedRegister dest, ThreadOffset32 offs) OVERRIDE; + void LoadRawPtrFromThread(ManagedRegister dest, ThreadOffset32 offs) override; // Copying routines. - void Move(ManagedRegister dest, ManagedRegister src, size_t size) OVERRIDE; + void Move(ManagedRegister dest, ManagedRegister src, size_t size) override; void CopyRawPtrFromThread(FrameOffset fr_offs, ThreadOffset32 thr_offs, - ManagedRegister scratch) OVERRIDE; + ManagedRegister scratch) override; void CopyRawPtrToThread(ThreadOffset32 thr_offs, FrameOffset fr_offs, - ManagedRegister scratch) OVERRIDE; + ManagedRegister scratch) override; - void CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister scratch) OVERRIDE; + void CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister scratch) override; - void Copy(FrameOffset dest, FrameOffset src, ManagedRegister scratch, size_t size) OVERRIDE; + void Copy(FrameOffset dest, FrameOffset src, ManagedRegister scratch, size_t size) override; void Copy(FrameOffset dest, ManagedRegister src_base, Offset src_offset, ManagedRegister scratch, - size_t size) OVERRIDE; + size_t size) override; void Copy(ManagedRegister dest_base, Offset dest_offset, FrameOffset src, ManagedRegister scratch, - size_t size) OVERRIDE; + size_t size) override; void Copy(FrameOffset dest, FrameOffset src_base, Offset src_offset, ManagedRegister scratch, - size_t size) OVERRIDE; + size_t size) override; void Copy(ManagedRegister dest, Offset dest_offset, ManagedRegister src, Offset src_offset, ManagedRegister scratch, - size_t size) OVERRIDE; + size_t size) override; void Copy(FrameOffset dest, Offset dest_offset, FrameOffset src, Offset src_offset, ManagedRegister scratch, - size_t size) OVERRIDE; + size_t size) override; // Sign extension. - void SignExtend(ManagedRegister mreg, size_t size) OVERRIDE; + void SignExtend(ManagedRegister mreg, size_t size) override; // Zero extension. - void ZeroExtend(ManagedRegister mreg, size_t size) OVERRIDE; + void ZeroExtend(ManagedRegister mreg, size_t size) override; // Exploit fast access in managed code to Thread::Current(). - void GetCurrentThread(ManagedRegister mtr) OVERRIDE; + void GetCurrentThread(ManagedRegister mtr) override; void GetCurrentThread(FrameOffset dest_offset, - ManagedRegister scratch) OVERRIDE; + ManagedRegister scratch) override; // Set up out_reg to hold a Object** into the handle scope, or to be null if the // value is null and null_allowed. in_reg holds a possibly stale reference @@ -163,43 +163,43 @@ class ArmVIXLJNIMacroAssembler FINAL void CreateHandleScopeEntry(ManagedRegister out_reg, FrameOffset handlescope_offset, ManagedRegister in_reg, - bool null_allowed) OVERRIDE; + bool null_allowed) override; // Set up out_off to hold a Object** into the handle scope, or to be null if the // value is null and null_allowed. void CreateHandleScopeEntry(FrameOffset out_off, FrameOffset handlescope_offset, ManagedRegister scratch, - bool null_allowed) OVERRIDE; + bool null_allowed) override; // src holds a handle scope entry (Object**) load this into dst. void LoadReferenceFromHandleScope(ManagedRegister dst, - ManagedRegister src) OVERRIDE; + ManagedRegister src) override; // Heap::VerifyObject on src. In some cases (such as a reference to this) we // know that src may not be null. - void VerifyObject(ManagedRegister src, bool could_be_null) OVERRIDE; - void VerifyObject(FrameOffset src, bool could_be_null) OVERRIDE; + void VerifyObject(ManagedRegister src, bool could_be_null) override; + void VerifyObject(FrameOffset src, bool could_be_null) override; // Call to address held at [base+offset]. - void Call(ManagedRegister base, Offset offset, ManagedRegister scratch) OVERRIDE; - void Call(FrameOffset base, Offset offset, ManagedRegister scratch) OVERRIDE; - void CallFromThread(ThreadOffset32 offset, ManagedRegister scratch) OVERRIDE; + void Call(ManagedRegister base, Offset offset, ManagedRegister scratch) override; + void Call(FrameOffset base, Offset offset, ManagedRegister scratch) override; + void CallFromThread(ThreadOffset32 offset, ManagedRegister scratch) override; // Generate code to check if Thread::Current()->exception_ is non-null // and branch to a ExceptionSlowPath if it is. - void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust); + void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust) override; // Create a new label that can be used with Jump/Bind calls. - std::unique_ptr<JNIMacroLabel> CreateLabel() OVERRIDE; + std::unique_ptr<JNIMacroLabel> CreateLabel() override; // Emit an unconditional jump to the label. - void Jump(JNIMacroLabel* label) OVERRIDE; + void Jump(JNIMacroLabel* label) override; // Emit a conditional jump to the label by applying a unary condition test to the register. - void Jump(JNIMacroLabel* label, JNIMacroUnaryCondition cond, ManagedRegister test) OVERRIDE; + void Jump(JNIMacroLabel* label, JNIMacroUnaryCondition cond, ManagedRegister test) override; // Code at this offset will serve as the target for the Jump call. - void Bind(JNIMacroLabel* label) OVERRIDE; + void Bind(JNIMacroLabel* label) override; - void MemoryBarrier(ManagedRegister scratch) OVERRIDE; + void MemoryBarrier(ManagedRegister scratch) override; void EmitExceptionPoll(ArmVIXLJNIMacroAssembler::ArmException *exception); void Load(ArmManagedRegister dest, vixl32::Register base, int32_t offset, size_t size); @@ -231,7 +231,7 @@ class ArmVIXLJNIMacroAssembler FINAL friend class ArmVIXLAssemblerTest_VixlStoreToOffset_Test; }; -class ArmVIXLJNIMacroLabel FINAL +class ArmVIXLJNIMacroLabel final : public JNIMacroLabelCommon<ArmVIXLJNIMacroLabel, vixl32::Label, InstructionSet::kArm> { diff --git a/compiler/utils/arm/managed_register_arm.cc b/compiler/utils/arm/managed_register_arm.cc index 1fdc110dcf..deff658b4f 100644 --- a/compiler/utils/arm/managed_register_arm.cc +++ b/compiler/utils/arm/managed_register_arm.cc @@ -16,7 +16,7 @@ #include "managed_register_arm.h" -#include "globals.h" +#include "base/globals.h" namespace art { namespace arm { diff --git a/compiler/utils/arm/managed_register_arm.h b/compiler/utils/arm/managed_register_arm.h index 26f23b2ed6..e42572dc32 100644 --- a/compiler/utils/arm/managed_register_arm.h +++ b/compiler/utils/arm/managed_register_arm.h @@ -20,15 +20,8 @@ #include <android-base/logging.h> #include "constants_arm.h" -#include "debug/dwarf/register.h" #include "utils/managed_register.h" -// TODO(VIXL): Make VIXL compile with -Wshadow. -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wshadow" -#include "aarch32/macro-assembler-aarch32.h" -#pragma GCC diagnostic pop - namespace art { namespace arm { @@ -97,31 +90,16 @@ class ArmManagedRegister : public ManagedRegister { return static_cast<Register>(id_); } - vixl::aarch32::Register AsVIXLRegister() const { - CHECK(IsCoreRegister()); - return vixl::aarch32::Register(id_); - } - constexpr SRegister AsSRegister() const { CHECK(IsSRegister()); return static_cast<SRegister>(id_ - kNumberOfCoreRegIds); } - vixl::aarch32::SRegister AsVIXLSRegister() const { - CHECK(IsSRegister()); - return vixl::aarch32::SRegister(id_ - kNumberOfCoreRegIds); - } - constexpr DRegister AsDRegister() const { CHECK(IsDRegister()); return static_cast<DRegister>(id_ - kNumberOfCoreRegIds - kNumberOfSRegIds); } - vixl::aarch32::DRegister AsVIXLDRegister() const { - CHECK(IsDRegister()); - return vixl::aarch32::DRegister(id_ - kNumberOfCoreRegIds - kNumberOfSRegIds); - } - constexpr SRegister AsOverlappingDRegisterLow() const { CHECK(IsOverlappingDRegister()); DRegister d_reg = AsDRegister(); @@ -150,20 +128,12 @@ class ArmManagedRegister : public ManagedRegister { return FromRegId(AllocIdLow()).AsCoreRegister(); } - vixl::aarch32::Register AsVIXLRegisterPairLow() const { - return vixl::aarch32::Register(AsRegisterPairLow()); - } - constexpr Register AsRegisterPairHigh() const { CHECK(IsRegisterPair()); // Appropriate mapping of register ids allows to use AllocIdHigh(). return FromRegId(AllocIdHigh()).AsCoreRegister(); } - vixl::aarch32::Register AsVIXLRegisterPairHigh() const { - return vixl::aarch32::Register(AsRegisterPairHigh()); - } - constexpr bool IsCoreRegister() const { CHECK(IsValidManagedRegister()); return (0 <= id_) && (id_ < kNumberOfCoreRegIds); @@ -255,16 +225,16 @@ class ArmManagedRegister : public ManagedRegister { return FromDRegister(static_cast<DRegister>(r)); } - private: - constexpr bool IsValidManagedRegister() const { - return (0 <= id_) && (id_ < kNumberOfRegIds); - } - int RegId() const { CHECK(!IsNoRegister()); return id_; } + private: + constexpr bool IsValidManagedRegister() const { + return (0 <= id_) && (id_ < kNumberOfRegIds); + } + int AllocId() const { CHECK(IsValidManagedRegister() && !IsOverlappingDRegister() && !IsRegisterPair()); diff --git a/compiler/utils/arm/managed_register_arm_test.cc b/compiler/utils/arm/managed_register_arm_test.cc index 43b0b516dc..6f440a7c81 100644 --- a/compiler/utils/arm/managed_register_arm_test.cc +++ b/compiler/utils/arm/managed_register_arm_test.cc @@ -15,7 +15,7 @@ */ #include "managed_register_arm.h" -#include "globals.h" +#include "base/globals.h" #include "gtest/gtest.h" namespace art { diff --git a/compiler/utils/arm64/assembler_arm64.cc b/compiler/utils/arm64/assembler_arm64.cc index c83fd4404a..d7ade058a4 100644 --- a/compiler/utils/arm64/assembler_arm64.cc +++ b/compiler/utils/arm64/assembler_arm64.cc @@ -14,6 +14,7 @@ * limitations under the License. */ +#include "arch/arm64/instruction_set_features_arm64.h" #include "assembler_arm64.h" #include "entrypoints/quick/quick_entrypoints.h" #include "heap_poisoning.h" @@ -31,6 +32,37 @@ namespace arm64 { #define ___ vixl_masm_. #endif +// Sets vixl::CPUFeatures according to ART instruction set features. +static void SetVIXLCPUFeaturesFromART(vixl::aarch64::MacroAssembler* vixl_masm_, + const Arm64InstructionSetFeatures* art_features) { + // Retrieve already initialized default features of vixl. + vixl::CPUFeatures* features = vixl_masm_->GetCPUFeatures(); + + DCHECK(features->Has(vixl::CPUFeatures::kFP)); + DCHECK(features->Has(vixl::CPUFeatures::kNEON)); + DCHECK(art_features != nullptr); + if (art_features->HasCRC()) { + features->Combine(vixl::CPUFeatures::kCRC32); + } + if (art_features->HasDotProd()) { + features->Combine(vixl::CPUFeatures::kDotProduct); + } + if (art_features->HasFP16()) { + features->Combine(vixl::CPUFeatures::kFPHalf); + } + if (art_features->HasLSE()) { + features->Combine(vixl::CPUFeatures::kAtomics); + } +} + +Arm64Assembler::Arm64Assembler(ArenaAllocator* allocator, + const Arm64InstructionSetFeatures* art_features) + : Assembler(allocator) { + if (art_features != nullptr) { + SetVIXLCPUFeaturesFromART(&vixl_masm_, art_features); + } +} + void Arm64Assembler::FinalizeCode() { ___ FinalizeCode(); } diff --git a/compiler/utils/arm64/assembler_arm64.h b/compiler/utils/arm64/assembler_arm64.h index 8983af2677..9e01a70ea9 100644 --- a/compiler/utils/arm64/assembler_arm64.h +++ b/compiler/utils/arm64/assembler_arm64.h @@ -37,6 +37,9 @@ #pragma GCC diagnostic pop namespace art { + +class Arm64InstructionSetFeatures; + namespace arm64 { #define MEM_OP(...) vixl::aarch64::MemOperand(__VA_ARGS__) @@ -61,23 +64,24 @@ enum StoreOperandType { kStoreDWord }; -class Arm64Assembler FINAL : public Assembler { +class Arm64Assembler final : public Assembler { public: - explicit Arm64Assembler(ArenaAllocator* allocator) : Assembler(allocator) {} + explicit Arm64Assembler( + ArenaAllocator* allocator, const Arm64InstructionSetFeatures* features = nullptr); virtual ~Arm64Assembler() {} vixl::aarch64::MacroAssembler* GetVIXLAssembler() { return &vixl_masm_; } // Finalize the code. - void FinalizeCode() OVERRIDE; + void FinalizeCode() override; // Size of generated code. - size_t CodeSize() const OVERRIDE; - const uint8_t* CodeBufferBaseAddress() const OVERRIDE; + size_t CodeSize() const override; + const uint8_t* CodeBufferBaseAddress() const override; // Copy instructions out of assembly buffer into the given region of memory. - void FinalizeInstructions(const MemoryRegion& region); + void FinalizeInstructions(const MemoryRegion& region) override; void LoadRawPtr(ManagedRegister dest, ManagedRegister base, Offset offs); @@ -109,10 +113,10 @@ class Arm64Assembler FINAL : public Assembler { // MaybeGenerateMarkingRegisterCheck and is passed to the BRK instruction. void GenerateMarkingRegisterCheck(vixl::aarch64::Register temp, int code = 0); - void Bind(Label* label ATTRIBUTE_UNUSED) OVERRIDE { + void Bind(Label* label ATTRIBUTE_UNUSED) override { UNIMPLEMENTED(FATAL) << "Do not use Bind for ARM64"; } - void Jump(Label* label ATTRIBUTE_UNUSED) OVERRIDE { + void Jump(Label* label ATTRIBUTE_UNUSED) override { UNIMPLEMENTED(FATAL) << "Do not use Jump for ARM64"; } diff --git a/compiler/utils/arm64/jni_macro_assembler_arm64.cc b/compiler/utils/arm64/jni_macro_assembler_arm64.cc index a5aa1c12b3..d6ce03387c 100644 --- a/compiler/utils/arm64/jni_macro_assembler_arm64.cc +++ b/compiler/utils/arm64/jni_macro_assembler_arm64.cc @@ -719,11 +719,10 @@ void Arm64JNIMacroAssembler::BuildFrame(size_t frame_size, // Write out entry spills int32_t offset = frame_size + static_cast<size_t>(kArm64PointerSize); - for (size_t i = 0; i < entry_spills.size(); ++i) { - Arm64ManagedRegister reg = entry_spills.at(i).AsArm64(); + for (const ManagedRegisterSpill& spill : entry_spills) { + Arm64ManagedRegister reg = spill.AsArm64(); if (reg.IsNoRegister()) { // only increment stack offset. - ManagedRegisterSpill spill = entry_spills.at(i); offset += spill.getSize(); } else if (reg.IsXRegister()) { StoreToOffset(reg.AsXRegister(), SP, offset); diff --git a/compiler/utils/arm64/jni_macro_assembler_arm64.h b/compiler/utils/arm64/jni_macro_assembler_arm64.h index f531b2aa51..45316ed88e 100644 --- a/compiler/utils/arm64/jni_macro_assembler_arm64.h +++ b/compiler/utils/arm64/jni_macro_assembler_arm64.h @@ -40,7 +40,7 @@ namespace art { namespace arm64 { -class Arm64JNIMacroAssembler FINAL : public JNIMacroAssemblerFwd<Arm64Assembler, PointerSize::k64> { +class Arm64JNIMacroAssembler final : public JNIMacroAssemblerFwd<Arm64Assembler, PointerSize::k64> { public: explicit Arm64JNIMacroAssembler(ArenaAllocator* allocator) : JNIMacroAssemblerFwd(allocator), @@ -49,94 +49,94 @@ class Arm64JNIMacroAssembler FINAL : public JNIMacroAssemblerFwd<Arm64Assembler, ~Arm64JNIMacroAssembler(); // Finalize the code. - void FinalizeCode() OVERRIDE; + void FinalizeCode() override; // Emit code that will create an activation on the stack. void BuildFrame(size_t frame_size, ManagedRegister method_reg, ArrayRef<const ManagedRegister> callee_save_regs, - const ManagedRegisterEntrySpills& entry_spills) OVERRIDE; + const ManagedRegisterEntrySpills& entry_spills) override; // Emit code that will remove an activation from the stack. void RemoveFrame(size_t frame_size, ArrayRef<const ManagedRegister> callee_save_regs, - bool may_suspend) OVERRIDE; + bool may_suspend) override; - void IncreaseFrameSize(size_t adjust) OVERRIDE; - void DecreaseFrameSize(size_t adjust) OVERRIDE; + void IncreaseFrameSize(size_t adjust) override; + void DecreaseFrameSize(size_t adjust) override; // Store routines. - void Store(FrameOffset offs, ManagedRegister src, size_t size) OVERRIDE; - void StoreRef(FrameOffset dest, ManagedRegister src) OVERRIDE; - void StoreRawPtr(FrameOffset dest, ManagedRegister src) OVERRIDE; - void StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister scratch) OVERRIDE; + void Store(FrameOffset offs, ManagedRegister src, size_t size) override; + void StoreRef(FrameOffset dest, ManagedRegister src) override; + void StoreRawPtr(FrameOffset dest, ManagedRegister src) override; + void StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister scratch) override; void StoreStackOffsetToThread(ThreadOffset64 thr_offs, FrameOffset fr_offs, - ManagedRegister scratch) OVERRIDE; - void StoreStackPointerToThread(ThreadOffset64 thr_offs) OVERRIDE; + ManagedRegister scratch) override; + void StoreStackPointerToThread(ThreadOffset64 thr_offs) override; void StoreSpanning(FrameOffset dest, ManagedRegister src, FrameOffset in_off, - ManagedRegister scratch) OVERRIDE; + ManagedRegister scratch) override; // Load routines. - void Load(ManagedRegister dest, FrameOffset src, size_t size) OVERRIDE; - void LoadFromThread(ManagedRegister dest, ThreadOffset64 src, size_t size) OVERRIDE; - void LoadRef(ManagedRegister dest, FrameOffset src) OVERRIDE; + void Load(ManagedRegister dest, FrameOffset src, size_t size) override; + void LoadFromThread(ManagedRegister dest, ThreadOffset64 src, size_t size) override; + void LoadRef(ManagedRegister dest, FrameOffset src) override; void LoadRef(ManagedRegister dest, ManagedRegister base, MemberOffset offs, - bool unpoison_reference) OVERRIDE; - void LoadRawPtr(ManagedRegister dest, ManagedRegister base, Offset offs) OVERRIDE; - void LoadRawPtrFromThread(ManagedRegister dest, ThreadOffset64 offs) OVERRIDE; + bool unpoison_reference) override; + void LoadRawPtr(ManagedRegister dest, ManagedRegister base, Offset offs) override; + void LoadRawPtrFromThread(ManagedRegister dest, ThreadOffset64 offs) override; // Copying routines. - void Move(ManagedRegister dest, ManagedRegister src, size_t size) OVERRIDE; + void Move(ManagedRegister dest, ManagedRegister src, size_t size) override; void CopyRawPtrFromThread(FrameOffset fr_offs, ThreadOffset64 thr_offs, - ManagedRegister scratch) OVERRIDE; + ManagedRegister scratch) override; void CopyRawPtrToThread(ThreadOffset64 thr_offs, FrameOffset fr_offs, ManagedRegister scratch) - OVERRIDE; - void CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister scratch) OVERRIDE; - void Copy(FrameOffset dest, FrameOffset src, ManagedRegister scratch, size_t size) OVERRIDE; + override; + void CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister scratch) override; + void Copy(FrameOffset dest, FrameOffset src, ManagedRegister scratch, size_t size) override; void Copy(FrameOffset dest, ManagedRegister src_base, Offset src_offset, ManagedRegister scratch, - size_t size) OVERRIDE; + size_t size) override; void Copy(ManagedRegister dest_base, Offset dest_offset, FrameOffset src, ManagedRegister scratch, - size_t size) OVERRIDE; + size_t size) override; void Copy(FrameOffset dest, FrameOffset src_base, Offset src_offset, ManagedRegister scratch, - size_t size) OVERRIDE; + size_t size) override; void Copy(ManagedRegister dest, Offset dest_offset, ManagedRegister src, Offset src_offset, ManagedRegister scratch, - size_t size) OVERRIDE; + size_t size) override; void Copy(FrameOffset dest, Offset dest_offset, FrameOffset src, Offset src_offset, ManagedRegister scratch, - size_t size) OVERRIDE; - void MemoryBarrier(ManagedRegister scratch) OVERRIDE; + size_t size) override; + void MemoryBarrier(ManagedRegister scratch) override; // Sign extension. - void SignExtend(ManagedRegister mreg, size_t size) OVERRIDE; + void SignExtend(ManagedRegister mreg, size_t size) override; // Zero extension. - void ZeroExtend(ManagedRegister mreg, size_t size) OVERRIDE; + void ZeroExtend(ManagedRegister mreg, size_t size) override; // Exploit fast access in managed code to Thread::Current(). - void GetCurrentThread(ManagedRegister tr) OVERRIDE; - void GetCurrentThread(FrameOffset dest_offset, ManagedRegister scratch) OVERRIDE; + void GetCurrentThread(ManagedRegister tr) override; + void GetCurrentThread(FrameOffset dest_offset, ManagedRegister scratch) override; // Set up out_reg to hold a Object** into the handle scope, or to be null if the // value is null and null_allowed. in_reg holds a possibly stale reference @@ -145,40 +145,40 @@ class Arm64JNIMacroAssembler FINAL : public JNIMacroAssemblerFwd<Arm64Assembler, void CreateHandleScopeEntry(ManagedRegister out_reg, FrameOffset handlescope_offset, ManagedRegister in_reg, - bool null_allowed) OVERRIDE; + bool null_allowed) override; // Set up out_off to hold a Object** into the handle scope, or to be null if the // value is null and null_allowed. void CreateHandleScopeEntry(FrameOffset out_off, FrameOffset handlescope_offset, ManagedRegister scratch, - bool null_allowed) OVERRIDE; + bool null_allowed) override; // src holds a handle scope entry (Object**) load this into dst. - void LoadReferenceFromHandleScope(ManagedRegister dst, ManagedRegister src) OVERRIDE; + void LoadReferenceFromHandleScope(ManagedRegister dst, ManagedRegister src) override; // Heap::VerifyObject on src. In some cases (such as a reference to this) we // know that src may not be null. - void VerifyObject(ManagedRegister src, bool could_be_null) OVERRIDE; - void VerifyObject(FrameOffset src, bool could_be_null) OVERRIDE; + void VerifyObject(ManagedRegister src, bool could_be_null) override; + void VerifyObject(FrameOffset src, bool could_be_null) override; // Call to address held at [base+offset]. - void Call(ManagedRegister base, Offset offset, ManagedRegister scratch) OVERRIDE; - void Call(FrameOffset base, Offset offset, ManagedRegister scratch) OVERRIDE; - void CallFromThread(ThreadOffset64 offset, ManagedRegister scratch) OVERRIDE; + void Call(ManagedRegister base, Offset offset, ManagedRegister scratch) override; + void Call(FrameOffset base, Offset offset, ManagedRegister scratch) override; + void CallFromThread(ThreadOffset64 offset, ManagedRegister scratch) override; // Generate code to check if Thread::Current()->exception_ is non-null // and branch to a ExceptionSlowPath if it is. - void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust) OVERRIDE; + void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust) override; // Create a new label that can be used with Jump/Bind calls. - std::unique_ptr<JNIMacroLabel> CreateLabel() OVERRIDE; + std::unique_ptr<JNIMacroLabel> CreateLabel() override; // Emit an unconditional jump to the label. - void Jump(JNIMacroLabel* label) OVERRIDE; + void Jump(JNIMacroLabel* label) override; // Emit a conditional jump to the label by applying a unary condition test to the register. - void Jump(JNIMacroLabel* label, JNIMacroUnaryCondition cond, ManagedRegister test) OVERRIDE; + void Jump(JNIMacroLabel* label, JNIMacroUnaryCondition cond, ManagedRegister test) override; // Code at this offset will serve as the target for the Jump call. - void Bind(JNIMacroLabel* label) OVERRIDE; + void Bind(JNIMacroLabel* label) override; private: class Arm64Exception { @@ -234,7 +234,7 @@ class Arm64JNIMacroAssembler FINAL : public JNIMacroAssemblerFwd<Arm64Assembler, ArenaVector<std::unique_ptr<Arm64Exception>> exception_blocks_; }; -class Arm64JNIMacroLabel FINAL +class Arm64JNIMacroLabel final : public JNIMacroLabelCommon<Arm64JNIMacroLabel, vixl::aarch64::Label, InstructionSet::kArm64> { diff --git a/compiler/utils/arm64/managed_register_arm64.cc b/compiler/utils/arm64/managed_register_arm64.cc index 47924bf99f..5632265646 100644 --- a/compiler/utils/arm64/managed_register_arm64.cc +++ b/compiler/utils/arm64/managed_register_arm64.cc @@ -15,7 +15,7 @@ */ #include "managed_register_arm64.h" -#include "globals.h" +#include "base/globals.h" namespace art { namespace arm64 { diff --git a/compiler/utils/arm64/managed_register_arm64.h b/compiler/utils/arm64/managed_register_arm64.h index 9ce7ec9a97..0513890aa8 100644 --- a/compiler/utils/arm64/managed_register_arm64.h +++ b/compiler/utils/arm64/managed_register_arm64.h @@ -20,7 +20,6 @@ #include <android-base/logging.h> #include "arch/arm64/registers_arm64.h" -#include "debug/dwarf/register.h" #include "utils/managed_register.h" namespace art { diff --git a/compiler/utils/arm64/managed_register_arm64_test.cc b/compiler/utils/arm64/managed_register_arm64_test.cc index 2a79313be5..d151ac99e7 100644 --- a/compiler/utils/arm64/managed_register_arm64_test.cc +++ b/compiler/utils/arm64/managed_register_arm64_test.cc @@ -17,7 +17,7 @@ #include "managed_register_arm64.h" #include "assembler_arm64.h" -#include "globals.h" +#include "base/globals.h" #include "gtest/gtest.h" namespace art { diff --git a/compiler/utils/assembler.cc b/compiler/utils/assembler.cc index 944c64b591..d1d2a3d556 100644 --- a/compiler/utils/assembler.cc +++ b/compiler/utils/assembler.cc @@ -20,8 +20,8 @@ #include <vector> #include "base/casts.h" -#include "globals.h" -#include "memory_region.h" +#include "base/globals.h" +#include "base/memory_region.h" namespace art { diff --git a/compiler/utils/assembler.h b/compiler/utils/assembler.h index 5b0cd6baa8..aa21f862de 100644 --- a/compiler/utils/assembler.h +++ b/compiler/utils/assembler.h @@ -29,10 +29,10 @@ #include "base/array_ref.h" #include "base/enums.h" #include "base/macros.h" -#include "debug/dwarf/debug_frame_opcode_writer.h" +#include "base/memory_region.h" +#include "dwarf/debug_frame_opcode_writer.h" #include "label.h" #include "managed_register.h" -#include "memory_region.h" #include "mips/constants_mips.h" #include "offsets.h" #include "x86/constants_x86.h" @@ -283,7 +283,7 @@ class AssemblerBuffer { // The purpose of this class is to ensure that we do not have to explicitly // call the AdvancePC method (which is good for convenience and correctness). -class DebugFrameOpCodeWriterForAssembler FINAL +class DebugFrameOpCodeWriterForAssembler final : public dwarf::DebugFrameOpCodeWriter<> { public: struct DelayedAdvancePC { @@ -292,10 +292,10 @@ class DebugFrameOpCodeWriterForAssembler FINAL }; // This method is called the by the opcode writers. - virtual void ImplicitlyAdvancePC() FINAL; + void ImplicitlyAdvancePC() final; explicit DebugFrameOpCodeWriterForAssembler(Assembler* buffer) - : dwarf::DebugFrameOpCodeWriter<>(false /* enabled */), + : dwarf::DebugFrameOpCodeWriter<>(/* enabled= */ false), assembler_(buffer), delay_emitting_advance_pc_(false), delayed_advance_pcs_() { diff --git a/compiler/utils/assembler_test.h b/compiler/utils/assembler_test.h index 0cb8bbb2d5..9e23d11116 100644 --- a/compiler/utils/assembler_test.h +++ b/compiler/utils/assembler_test.h @@ -26,6 +26,7 @@ #include <fstream> #include <iterator> +#include "base/malloc_arena_pool.h" #include "assembler_test_base.h" #include "common_runtime_test.h" // For ScratchFile @@ -736,7 +737,7 @@ class AssemblerTest : public testing::Test { protected: AssemblerTest() {} - void SetUp() OVERRIDE { + void SetUp() override { allocator_.reset(new ArenaAllocator(&pool_)); assembler_.reset(CreateAssembler(allocator_.get())); test_helper_.reset( @@ -752,7 +753,7 @@ class AssemblerTest : public testing::Test { SetUpHelpers(); } - void TearDown() OVERRIDE { + void TearDown() override { test_helper_.reset(); // Clean up the helper. assembler_.reset(); allocator_.reset(); @@ -1606,7 +1607,7 @@ class AssemblerTest : public testing::Test { static constexpr size_t kWarnManyCombinationsThreshold = 500; - ArenaPool pool_; + MallocArenaPool pool_; std::unique_ptr<ArenaAllocator> allocator_; std::unique_ptr<Ass> assembler_; std::unique_ptr<AssemblerTestInfrastructure> test_helper_; diff --git a/compiler/utils/assembler_test_base.h b/compiler/utils/assembler_test_base.h index 778a01566c..5fa0b3cd39 100644 --- a/compiler/utils/assembler_test_base.h +++ b/compiler/utils/assembler_test_base.h @@ -59,12 +59,12 @@ class AssemblerTestInfrastructure { disassembler_cmd_name_(disasm), disassembler_parameters_(disasm_params) { // Fake a runtime test for ScratchFile - CommonRuntimeTest::SetUpAndroidData(android_data_); + CommonRuntimeTest::SetUpAndroidDataDir(android_data_); } virtual ~AssemblerTestInfrastructure() { // We leave temporaries in case this failed so we can debug issues. - CommonRuntimeTest::TearDownAndroidData(android_data_, false); + CommonRuntimeTest::TearDownAndroidDataDir(android_data_, false); tmpnam_ = ""; } diff --git a/compiler/utils/assembler_thumb_test.cc b/compiler/utils/assembler_thumb_test.cc index 655d17d4fb..c9ece1df69 100644 --- a/compiler/utils/assembler_thumb_test.cc +++ b/compiler/utils/assembler_thumb_test.cc @@ -27,6 +27,7 @@ #include "utils/arm/jni_macro_assembler_arm_vixl.h" #include "base/hex_dump.h" +#include "base/malloc_arena_pool.h" #include "common_runtime_test.h" namespace art { @@ -124,7 +125,7 @@ void DumpAndCheck(std::vector<uint8_t>& code, const char* testname, const char* // Assemble the .S snprintf(cmd, sizeof(cmd), "%sas %s -o %s.o", toolsdir.c_str(), filename, filename); int cmd_result = system(cmd); - ASSERT_EQ(cmd_result, 0) << strerror(errno); + ASSERT_EQ(cmd_result, 0) << cmd << strerror(errno); // Disassemble. snprintf(cmd, sizeof(cmd), "%sobjdump -D -M force-thumb --section=.text %s.o | grep '^ *[0-9a-f][0-9a-f]*:'", @@ -169,7 +170,7 @@ class ArmVIXLAssemblerTest : public ::testing::Test { public: ArmVIXLAssemblerTest() : pool(), allocator(&pool), assembler(&allocator) { } - ArenaPool pool; + MallocArenaPool pool; ArenaAllocator allocator; ArmVIXLJNIMacroAssembler assembler; }; @@ -238,7 +239,7 @@ TEST_F(ArmVIXLAssemblerTest, VixlJniHelpers) { __ Load(scratch_register, FrameOffset(4092), 4); __ Load(scratch_register, FrameOffset(4096), 4); __ LoadRawPtrFromThread(scratch_register, ThreadOffset32(512)); - __ LoadRef(method_register, scratch_register, MemberOffset(128), /* unpoison_reference */ false); + __ LoadRef(method_register, scratch_register, MemberOffset(128), /* unpoison_reference= */ false); // Stores __ Store(FrameOffset(32), method_register, 4); @@ -283,7 +284,7 @@ TEST_F(ArmVIXLAssemblerTest, VixlJniHelpers) { __ DecreaseFrameSize(4096); __ DecreaseFrameSize(32); - __ RemoveFrame(frame_size, callee_save_regs, /* may_suspend */ true); + __ RemoveFrame(frame_size, callee_save_regs, /* may_suspend= */ true); EmitAndCheck(&assembler, "VixlJniHelpers"); } diff --git a/compiler/utils/assembler_thumb_test_expected.cc.inc b/compiler/utils/assembler_thumb_test_expected.cc.inc index 674dc9a78b..842716fac3 100644 --- a/compiler/utils/assembler_thumb_test_expected.cc.inc +++ b/compiler/utils/assembler_thumb_test_expected.cc.inc @@ -76,7 +76,7 @@ const char* const VixlJniHelpersResults[] = { " f0: f1bc 0f00 cmp.w ip, #0\n", " f4: bf18 it ne\n", " f6: f20d 4c01 addwne ip, sp, #1025 ; 0x401\n", - " fa: f8d9 c08c ldr.w ip, [r9, #140] ; 0x8c\n", + " fa: f8d9 c09c ldr.w ip, [r9, #156] ; 0x9c\n", " fe: f1bc 0f00 cmp.w ip, #0\n", " 102: d171 bne.n 1e8 <VixlJniHelpers+0x1e8>\n", " 104: f8cd c7ff str.w ip, [sp, #2047] ; 0x7ff\n", @@ -153,7 +153,7 @@ const char* const VixlJniHelpersResults[] = { " 21c: f8d9 8034 ldr.w r8, [r9, #52] ; 0x34\n", " 220: 4770 bx lr\n", " 222: 4660 mov r0, ip\n", - " 224: f8d9 c2c4 ldr.w ip, [r9, #708] ; 0x2c4\n", + " 224: f8d9 c2e4 ldr.w ip, [r9, #740] ; 0x2e4\n", " 228: 47e0 blx ip\n", nullptr }; diff --git a/compiler/utils/atomic_dex_ref_map-inl.h b/compiler/utils/atomic_dex_ref_map-inl.h index ce3302bb62..377b7fe352 100644 --- a/compiler/utils/atomic_dex_ref_map-inl.h +++ b/compiler/utils/atomic_dex_ref_map-inl.h @@ -70,7 +70,7 @@ inline bool AtomicDexRefMap<DexFileReferenceType, Value>::Get(const DexFileRefer if (array == nullptr) { return false; } - *out = (*array)[ref.index].LoadRelaxed(); + *out = (*array)[ref.index].load(std::memory_order_relaxed); return true; } @@ -81,7 +81,7 @@ inline bool AtomicDexRefMap<DexFileReferenceType, Value>::Remove(const DexFileRe if (array == nullptr) { return false; } - *out = (*array)[ref.index].ExchangeSequentiallyConsistent(nullptr); + *out = (*array)[ref.index].exchange(nullptr, std::memory_order_seq_cst); return true; } @@ -120,7 +120,7 @@ inline void AtomicDexRefMap<DexFileReferenceType, Value>::Visit(const Visitor& v const DexFile* dex_file = pair.first; const ElementArray& elements = pair.second; for (size_t i = 0; i < elements.size(); ++i) { - visitor(DexFileReference(dex_file, i), elements[i].LoadRelaxed()); + visitor(DexFileReference(dex_file, i), elements[i].load(std::memory_order_relaxed)); } } } @@ -129,11 +129,22 @@ template <typename DexFileReferenceType, typename Value> inline void AtomicDexRefMap<DexFileReferenceType, Value>::ClearEntries() { for (auto& it : arrays_) { for (auto& element : it.second) { - element.StoreRelaxed(nullptr); + element.store(nullptr, std::memory_order_relaxed); } } } +template <typename DexFileReferenceType, typename Value> +inline std::vector<const DexFile*> AtomicDexRefMap<DexFileReferenceType, Value>::GetDexFiles() + const { + std::vector<const DexFile*> result; + result.reserve(arrays_.size()); + for (auto& it : arrays_) { + result.push_back(it.first); + } + return result; +} + } // namespace art #endif // ART_COMPILER_UTILS_ATOMIC_DEX_REF_MAP_INL_H_ diff --git a/compiler/utils/atomic_dex_ref_map.h b/compiler/utils/atomic_dex_ref_map.h index fc2437999e..a8c285f765 100644 --- a/compiler/utils/atomic_dex_ref_map.h +++ b/compiler/utils/atomic_dex_ref_map.h @@ -54,6 +54,9 @@ class AtomicDexRefMap { void AddDexFile(const DexFile* dex_file); void AddDexFiles(const std::vector<const DexFile*>& dex_files); + // Return a vector of all dex files which were added to the map. + std::vector<const DexFile*> GetDexFiles() const; + bool HaveDexFile(const DexFile* dex_file) const { return arrays_.find(dex_file) != arrays_.end(); } diff --git a/compiler/utils/atomic_dex_ref_map_test.cc b/compiler/utils/atomic_dex_ref_map_test.cc index 4e1ef1248d..864531ed91 100644 --- a/compiler/utils/atomic_dex_ref_map_test.cc +++ b/compiler/utils/atomic_dex_ref_map_test.cc @@ -41,6 +41,9 @@ TEST_F(AtomicDexRefMapTest, RunTests) { EXPECT_TRUE(map.Insert(MethodReference(dex.get(), 1), 0, 1) == Map::kInsertResultInvalidDexFile); map.AddDexFile(dex.get()); EXPECT_TRUE(map.HaveDexFile(dex.get())); + std::vector<const DexFile*> registered_dex_files = map.GetDexFiles(); + EXPECT_EQ(1u, registered_dex_files.size()); + EXPECT_TRUE(registered_dex_files[0] == dex.get()); EXPECT_GT(dex->NumMethodIds(), 10u); // After we have added the get should succeed but return the default value. EXPECT_TRUE(map.Get(MethodReference(dex.get(), 1), &value)); diff --git a/compiler/utils/dedupe_set-inl.h b/compiler/utils/dedupe_set-inl.h index c866504e62..4e892f2616 100644 --- a/compiler/utils/dedupe_set-inl.h +++ b/compiler/utils/dedupe_set-inl.h @@ -71,13 +71,13 @@ class DedupeSet<InKey, StoreKey, Alloc, HashType, HashFunc, kShard>::Shard { const StoreKey* Add(Thread* self, size_t hash, const InKey& in_key) REQUIRES(!lock_) { MutexLock lock(self, lock_); HashedKey<InKey> hashed_in_key(hash, &in_key); - auto it = keys_.Find(hashed_in_key); + auto it = keys_.find(hashed_in_key); if (it != keys_.end()) { DCHECK(it->Key() != nullptr); return it->Key(); } const StoreKey* store_key = alloc_.Copy(in_key); - keys_.Insert(HashedKey<StoreKey> { hash, store_key }); + keys_.insert(HashedKey<StoreKey> { hash, store_key }); return store_key; } @@ -90,7 +90,7 @@ class DedupeSet<InKey, StoreKey, Alloc, HashType, HashFunc, kShard>::Shard { // Note: The total_probe_distance will be updated with the current state. // It may have been higher before a re-hash. global_stats->total_probe_distance += keys_.TotalProbeDistance(); - global_stats->total_size += keys_.Size(); + global_stats->total_size += keys_.size(); for (const HashedKey<StoreKey>& key : keys_) { auto it = stats.find(key.Hash()); if (it == stats.end()) { diff --git a/compiler/utils/jni_macro_assembler.cc b/compiler/utils/jni_macro_assembler.cc index 3f7691b6a8..5f405f348c 100644 --- a/compiler/utils/jni_macro_assembler.cc +++ b/compiler/utils/jni_macro_assembler.cc @@ -38,8 +38,8 @@ #include "x86_64/jni_macro_assembler_x86_64.h" #endif #include "base/casts.h" -#include "globals.h" -#include "memory_region.h" +#include "base/globals.h" +#include "base/memory_region.h" namespace art { diff --git a/compiler/utils/jni_macro_assembler.h b/compiler/utils/jni_macro_assembler.h index f5df926749..e6130cfc4c 100644 --- a/compiler/utils/jni_macro_assembler.h +++ b/compiler/utils/jni_macro_assembler.h @@ -259,19 +259,19 @@ inline JNIMacroLabel::~JNIMacroLabel() { template <typename T, PointerSize kPointerSize> class JNIMacroAssemblerFwd : public JNIMacroAssembler<kPointerSize> { public: - void FinalizeCode() OVERRIDE { + void FinalizeCode() override { asm_.FinalizeCode(); } - size_t CodeSize() const OVERRIDE { + size_t CodeSize() const override { return asm_.CodeSize(); } - void FinalizeInstructions(const MemoryRegion& region) OVERRIDE { + void FinalizeInstructions(const MemoryRegion& region) override { asm_.FinalizeInstructions(region); } - DebugFrameOpCodeWriterForAssembler& cfi() OVERRIDE { + DebugFrameOpCodeWriterForAssembler& cfi() override { return asm_.cfi(); } @@ -299,7 +299,7 @@ class JNIMacroLabelCommon : public JNIMacroLabel { JNIMacroLabelCommon() : JNIMacroLabel(kIsa) { } - virtual ~JNIMacroLabelCommon() OVERRIDE {} + ~JNIMacroLabelCommon() override {} private: PlatformLabel label_; diff --git a/compiler/utils/jni_macro_assembler_test.h b/compiler/utils/jni_macro_assembler_test.h index 1aefc84c78..067a5953b8 100644 --- a/compiler/utils/jni_macro_assembler_test.h +++ b/compiler/utils/jni_macro_assembler_test.h @@ -20,6 +20,7 @@ #include "jni_macro_assembler.h" #include "assembler_test_base.h" +#include "base/malloc_arena_pool.h" #include "common_runtime_test.h" // For ScratchFile #include <sys/stat.h> @@ -57,7 +58,7 @@ class JNIMacroAssemblerTest : public testing::Test { protected: JNIMacroAssemblerTest() {} - void SetUp() OVERRIDE { + void SetUp() override { allocator_.reset(new ArenaAllocator(&pool_)); assembler_.reset(CreateAssembler(allocator_.get())); test_helper_.reset( @@ -73,7 +74,7 @@ class JNIMacroAssemblerTest : public testing::Test { SetUpHelpers(); } - void TearDown() OVERRIDE { + void TearDown() override { test_helper_.reset(); // Clean up the helper. assembler_.reset(); allocator_.reset(); @@ -139,7 +140,7 @@ class JNIMacroAssemblerTest : public testing::Test { test_helper_->Driver(*data, assembly_text, test_name); } - ArenaPool pool_; + MallocArenaPool pool_; std::unique_ptr<ArenaAllocator> allocator_; std::unique_ptr<Ass> assembler_; std::unique_ptr<AssemblerTestInfrastructure> test_helper_; diff --git a/compiler/utils/managed_register.h b/compiler/utils/managed_register.h index 2b7b2aa7ce..db9c36cc75 100644 --- a/compiler/utils/managed_register.h +++ b/compiler/utils/managed_register.h @@ -101,11 +101,11 @@ class ManagedRegisterSpill : public ManagedRegister { ManagedRegisterSpill(const ManagedRegister& other, int32_t size) : ManagedRegister(other), size_(size), spill_offset_(-1) { } - int32_t getSpillOffset() { + int32_t getSpillOffset() const { return spill_offset_; } - int32_t getSize() { + int32_t getSize() const { return size_; } diff --git a/compiler/utils/mips/assembler_mips.cc b/compiler/utils/mips/assembler_mips.cc index 2218ef9af2..a9d1a25530 100644 --- a/compiler/utils/mips/assembler_mips.cc +++ b/compiler/utils/mips/assembler_mips.cc @@ -18,9 +18,9 @@ #include "base/bit_utils.h" #include "base/casts.h" +#include "base/memory_region.h" #include "entrypoints/quick/quick_entrypoints.h" #include "entrypoints/quick/quick_entrypoints_enum.h" -#include "memory_region.h" #include "thread.h" namespace art { @@ -179,7 +179,7 @@ void MipsAssembler::PatchCFI(size_t number_of_delayed_adjust_pcs) { return; } - typedef DebugFrameOpCodeWriterForAssembler::DelayedAdvancePC DelayedAdvancePC; + using DelayedAdvancePC = DebugFrameOpCodeWriterForAssembler::DelayedAdvancePC; const auto data = cfi().ReleaseStreamAndPrepareForDelayedAdvancePC(); const std::vector<uint8_t>& old_stream = data.first; const std::vector<DelayedAdvancePC>& advances = data.second; @@ -463,7 +463,7 @@ void MipsAssembler::Addiu(Register rt, Register rs, uint16_t imm16, MipsLabel* p } void MipsAssembler::Addiu(Register rt, Register rs, uint16_t imm16) { - Addiu(rt, rs, imm16, /* patcher_label */ nullptr); + Addiu(rt, rs, imm16, /* patcher_label= */ nullptr); } void MipsAssembler::Subu(Register rd, Register rs, Register rt) { @@ -732,7 +732,7 @@ void MipsAssembler::Lw(Register rt, Register rs, uint16_t imm16, MipsLabel* patc } void MipsAssembler::Lw(Register rt, Register rs, uint16_t imm16) { - Lw(rt, rs, imm16, /* patcher_label */ nullptr); + Lw(rt, rs, imm16, /* patcher_label= */ nullptr); } void MipsAssembler::Lwl(Register rt, Register rs, uint16_t imm16) { @@ -814,7 +814,7 @@ void MipsAssembler::Sw(Register rt, Register rs, uint16_t imm16, MipsLabel* patc } void MipsAssembler::Sw(Register rt, Register rs, uint16_t imm16) { - Sw(rt, rs, imm16, /* patcher_label */ nullptr); + Sw(rt, rs, imm16, /* patcher_label= */ nullptr); } void MipsAssembler::Swl(Register rt, Register rs, uint16_t imm16) { @@ -2793,6 +2793,26 @@ void MipsAssembler::Hadd_uD(VectorRegister wd, VectorRegister ws, VectorRegister DsFsmInstr(EmitMsa3R(0x5, 0x3, wt, ws, wd, 0x15)).FprOuts(wd).FprIns(ws, wt); } +void MipsAssembler::PcntB(VectorRegister wd, VectorRegister ws) { + CHECK(HasMsa()); + DsFsmInstr(EmitMsa2R(0xc1, 0x0, ws, wd, 0x1e)).FprOuts(wd).FprIns(ws); +} + +void MipsAssembler::PcntH(VectorRegister wd, VectorRegister ws) { + CHECK(HasMsa()); + DsFsmInstr(EmitMsa2R(0xc1, 0x1, ws, wd, 0x1e)).FprOuts(wd).FprIns(ws); +} + +void MipsAssembler::PcntW(VectorRegister wd, VectorRegister ws) { + CHECK(HasMsa()); + DsFsmInstr(EmitMsa2R(0xc1, 0x2, ws, wd, 0x1e)).FprOuts(wd).FprIns(ws); +} + +void MipsAssembler::PcntD(VectorRegister wd, VectorRegister ws) { + CHECK(HasMsa()); + DsFsmInstr(EmitMsa2R(0xc1, 0x3, ws, wd, 0x1e)).FprOuts(wd).FprIns(ws); +} + void MipsAssembler::ReplicateFPToVectorRegister(VectorRegister dst, FRegister src, bool is_double) { @@ -3590,7 +3610,7 @@ void MipsAssembler::FinalizeLabeledBranch(MipsLabel* label) { label->LinkTo(branch_id); } // Reserve space for the branch. - while (length--) { + for (; length != 0u; --length) { Nop(); } } @@ -3735,7 +3755,7 @@ void MipsAssembler::MoveInstructionToDelaySlot(Branch& branch) { void MipsAssembler::Buncond(MipsLabel* label, bool is_r6, bool is_bare) { uint32_t target = label->IsBound() ? GetLabelLocation(label) : Branch::kUnresolved; - branches_.emplace_back(is_r6, buffer_.Size(), target, /* is_call */ false, is_bare); + branches_.emplace_back(is_r6, buffer_.Size(), target, /* is_call= */ false, is_bare); MoveInstructionToDelaySlot(branches_.back()); FinalizeLabeledBranch(label); } @@ -3758,7 +3778,7 @@ void MipsAssembler::Bcond(MipsLabel* label, void MipsAssembler::Call(MipsLabel* label, bool is_r6, bool is_bare) { uint32_t target = label->IsBound() ? GetLabelLocation(label) : Branch::kUnresolved; - branches_.emplace_back(is_r6, buffer_.Size(), target, /* is_call */ true, is_bare); + branches_.emplace_back(is_r6, buffer_.Size(), target, /* is_call= */ true, is_bare); MoveInstructionToDelaySlot(branches_.back()); FinalizeLabeledBranch(label); } @@ -4280,43 +4300,43 @@ void MipsAssembler::EmitBranch(uint32_t branch_id) { } void MipsAssembler::B(MipsLabel* label, bool is_bare) { - Buncond(label, /* is_r6 */ (IsR6() && !is_bare), is_bare); + Buncond(label, /* is_r6= */ (IsR6() && !is_bare), is_bare); } void MipsAssembler::Bal(MipsLabel* label, bool is_bare) { - Call(label, /* is_r6 */ (IsR6() && !is_bare), is_bare); + Call(label, /* is_r6= */ (IsR6() && !is_bare), is_bare); } void MipsAssembler::Beq(Register rs, Register rt, MipsLabel* label, bool is_bare) { - Bcond(label, /* is_r6 */ (IsR6() && !is_bare), is_bare, kCondEQ, rs, rt); + Bcond(label, /* is_r6= */ (IsR6() && !is_bare), is_bare, kCondEQ, rs, rt); } void MipsAssembler::Bne(Register rs, Register rt, MipsLabel* label, bool is_bare) { - Bcond(label, /* is_r6 */ (IsR6() && !is_bare), is_bare, kCondNE, rs, rt); + Bcond(label, /* is_r6= */ (IsR6() && !is_bare), is_bare, kCondNE, rs, rt); } void MipsAssembler::Beqz(Register rt, MipsLabel* label, bool is_bare) { - Bcond(label, /* is_r6 */ (IsR6() && !is_bare), is_bare, kCondEQZ, rt); + Bcond(label, /* is_r6= */ (IsR6() && !is_bare), is_bare, kCondEQZ, rt); } void MipsAssembler::Bnez(Register rt, MipsLabel* label, bool is_bare) { - Bcond(label, /* is_r6 */ (IsR6() && !is_bare), is_bare, kCondNEZ, rt); + Bcond(label, /* is_r6= */ (IsR6() && !is_bare), is_bare, kCondNEZ, rt); } void MipsAssembler::Bltz(Register rt, MipsLabel* label, bool is_bare) { - Bcond(label, /* is_r6 */ (IsR6() && !is_bare), is_bare, kCondLTZ, rt); + Bcond(label, /* is_r6= */ (IsR6() && !is_bare), is_bare, kCondLTZ, rt); } void MipsAssembler::Bgez(Register rt, MipsLabel* label, bool is_bare) { - Bcond(label, /* is_r6 */ (IsR6() && !is_bare), is_bare, kCondGEZ, rt); + Bcond(label, /* is_r6= */ (IsR6() && !is_bare), is_bare, kCondGEZ, rt); } void MipsAssembler::Blez(Register rt, MipsLabel* label, bool is_bare) { - Bcond(label, /* is_r6 */ (IsR6() && !is_bare), is_bare, kCondLEZ, rt); + Bcond(label, /* is_r6= */ (IsR6() && !is_bare), is_bare, kCondLEZ, rt); } void MipsAssembler::Bgtz(Register rt, MipsLabel* label, bool is_bare) { - Bcond(label, /* is_r6 */ (IsR6() && !is_bare), is_bare, kCondGTZ, rt); + Bcond(label, /* is_r6= */ (IsR6() && !is_bare), is_bare, kCondGTZ, rt); } bool MipsAssembler::CanExchangeWithSlt(Register rs, Register rt) const { @@ -4372,7 +4392,7 @@ void MipsAssembler::Blt(Register rs, Register rt, MipsLabel* label, bool is_bare Bcond(label, IsR6(), is_bare, kCondLT, rs, rt); } else if (!Branch::IsNop(kCondLT, rs, rt)) { // Synthesize the instruction (not available on R2). - GenerateSltForCondBranch(/* unsigned_slt */ false, rs, rt); + GenerateSltForCondBranch(/* unsigned_slt= */ false, rs, rt); Bnez(AT, label, is_bare); } } @@ -4384,7 +4404,7 @@ void MipsAssembler::Bge(Register rs, Register rt, MipsLabel* label, bool is_bare B(label, is_bare); } else { // Synthesize the instruction (not available on R2). - GenerateSltForCondBranch(/* unsigned_slt */ false, rs, rt); + GenerateSltForCondBranch(/* unsigned_slt= */ false, rs, rt); Beqz(AT, label, is_bare); } } @@ -4394,7 +4414,7 @@ void MipsAssembler::Bltu(Register rs, Register rt, MipsLabel* label, bool is_bar Bcond(label, IsR6(), is_bare, kCondLTU, rs, rt); } else if (!Branch::IsNop(kCondLTU, rs, rt)) { // Synthesize the instruction (not available on R2). - GenerateSltForCondBranch(/* unsigned_slt */ true, rs, rt); + GenerateSltForCondBranch(/* unsigned_slt= */ true, rs, rt); Bnez(AT, label, is_bare); } } @@ -4406,7 +4426,7 @@ void MipsAssembler::Bgeu(Register rs, Register rt, MipsLabel* label, bool is_bar B(label, is_bare); } else { // Synthesize the instruction (not available on R2). - GenerateSltForCondBranch(/* unsigned_slt */ true, rs, rt); + GenerateSltForCondBranch(/* unsigned_slt= */ true, rs, rt); Beqz(AT, label, is_bare); } } @@ -4417,7 +4437,7 @@ void MipsAssembler::Bc1f(MipsLabel* label, bool is_bare) { void MipsAssembler::Bc1f(int cc, MipsLabel* label, bool is_bare) { CHECK(IsUint<3>(cc)) << cc; - Bcond(label, /* is_r6 */ false, is_bare, kCondF, static_cast<Register>(cc), ZERO); + Bcond(label, /* is_r6= */ false, is_bare, kCondF, static_cast<Register>(cc), ZERO); } void MipsAssembler::Bc1t(MipsLabel* label, bool is_bare) { @@ -4426,71 +4446,71 @@ void MipsAssembler::Bc1t(MipsLabel* label, bool is_bare) { void MipsAssembler::Bc1t(int cc, MipsLabel* label, bool is_bare) { CHECK(IsUint<3>(cc)) << cc; - Bcond(label, /* is_r6 */ false, is_bare, kCondT, static_cast<Register>(cc), ZERO); + Bcond(label, /* is_r6= */ false, is_bare, kCondT, static_cast<Register>(cc), ZERO); } void MipsAssembler::Bc(MipsLabel* label, bool is_bare) { - Buncond(label, /* is_r6 */ true, is_bare); + Buncond(label, /* is_r6= */ true, is_bare); } void MipsAssembler::Balc(MipsLabel* label, bool is_bare) { - Call(label, /* is_r6 */ true, is_bare); + Call(label, /* is_r6= */ true, is_bare); } void MipsAssembler::Beqc(Register rs, Register rt, MipsLabel* label, bool is_bare) { - Bcond(label, /* is_r6 */ true, is_bare, kCondEQ, rs, rt); + Bcond(label, /* is_r6= */ true, is_bare, kCondEQ, rs, rt); } void MipsAssembler::Bnec(Register rs, Register rt, MipsLabel* label, bool is_bare) { - Bcond(label, /* is_r6 */ true, is_bare, kCondNE, rs, rt); + Bcond(label, /* is_r6= */ true, is_bare, kCondNE, rs, rt); } void MipsAssembler::Beqzc(Register rt, MipsLabel* label, bool is_bare) { - Bcond(label, /* is_r6 */ true, is_bare, kCondEQZ, rt); + Bcond(label, /* is_r6= */ true, is_bare, kCondEQZ, rt); } void MipsAssembler::Bnezc(Register rt, MipsLabel* label, bool is_bare) { - Bcond(label, /* is_r6 */ true, is_bare, kCondNEZ, rt); + Bcond(label, /* is_r6= */ true, is_bare, kCondNEZ, rt); } void MipsAssembler::Bltzc(Register rt, MipsLabel* label, bool is_bare) { - Bcond(label, /* is_r6 */ true, is_bare, kCondLTZ, rt); + Bcond(label, /* is_r6= */ true, is_bare, kCondLTZ, rt); } void MipsAssembler::Bgezc(Register rt, MipsLabel* label, bool is_bare) { - Bcond(label, /* is_r6 */ true, is_bare, kCondGEZ, rt); + Bcond(label, /* is_r6= */ true, is_bare, kCondGEZ, rt); } void MipsAssembler::Blezc(Register rt, MipsLabel* label, bool is_bare) { - Bcond(label, /* is_r6 */ true, is_bare, kCondLEZ, rt); + Bcond(label, /* is_r6= */ true, is_bare, kCondLEZ, rt); } void MipsAssembler::Bgtzc(Register rt, MipsLabel* label, bool is_bare) { - Bcond(label, /* is_r6 */ true, is_bare, kCondGTZ, rt); + Bcond(label, /* is_r6= */ true, is_bare, kCondGTZ, rt); } void MipsAssembler::Bltc(Register rs, Register rt, MipsLabel* label, bool is_bare) { - Bcond(label, /* is_r6 */ true, is_bare, kCondLT, rs, rt); + Bcond(label, /* is_r6= */ true, is_bare, kCondLT, rs, rt); } void MipsAssembler::Bgec(Register rs, Register rt, MipsLabel* label, bool is_bare) { - Bcond(label, /* is_r6 */ true, is_bare, kCondGE, rs, rt); + Bcond(label, /* is_r6= */ true, is_bare, kCondGE, rs, rt); } void MipsAssembler::Bltuc(Register rs, Register rt, MipsLabel* label, bool is_bare) { - Bcond(label, /* is_r6 */ true, is_bare, kCondLTU, rs, rt); + Bcond(label, /* is_r6= */ true, is_bare, kCondLTU, rs, rt); } void MipsAssembler::Bgeuc(Register rs, Register rt, MipsLabel* label, bool is_bare) { - Bcond(label, /* is_r6 */ true, is_bare, kCondGEU, rs, rt); + Bcond(label, /* is_r6= */ true, is_bare, kCondGEU, rs, rt); } void MipsAssembler::Bc1eqz(FRegister ft, MipsLabel* label, bool is_bare) { - Bcond(label, /* is_r6 */ true, is_bare, kCondF, static_cast<Register>(ft), ZERO); + Bcond(label, /* is_r6= */ true, is_bare, kCondF, static_cast<Register>(ft), ZERO); } void MipsAssembler::Bc1nez(FRegister ft, MipsLabel* label, bool is_bare) { - Bcond(label, /* is_r6 */ true, is_bare, kCondT, static_cast<Register>(ft), ZERO); + Bcond(label, /* is_r6= */ true, is_bare, kCondT, static_cast<Register>(ft), ZERO); } void MipsAssembler::AdjustBaseAndOffset(Register& base, @@ -4781,10 +4801,9 @@ void MipsAssembler::BuildFrame(size_t frame_size, // Write out entry spills. int32_t offset = frame_size + kFramePointerSize; - for (size_t i = 0; i < entry_spills.size(); ++i) { - MipsManagedRegister reg = entry_spills.at(i).AsMips(); + for (const ManagedRegisterSpill& spill : entry_spills) { + MipsManagedRegister reg = spill.AsMips(); if (reg.IsNoRegister()) { - ManagedRegisterSpill spill = entry_spills.at(i); offset += spill.getSize(); } else if (reg.IsCoreRegister()) { StoreToOffset(kStoreWord, reg.AsCoreRegister(), SP, offset); diff --git a/compiler/utils/mips/assembler_mips.h b/compiler/utils/mips/assembler_mips.h index 7de8e2e366..a24071d694 100644 --- a/compiler/utils/mips/assembler_mips.h +++ b/compiler/utils/mips/assembler_mips.h @@ -24,10 +24,10 @@ #include "arch/mips/instruction_set_features_mips.h" #include "base/arena_containers.h" #include "base/enums.h" +#include "base/globals.h" #include "base/macros.h" #include "base/stl_util_identity.h" #include "constants_mips.h" -#include "globals.h" #include "heap_poisoning.h" #include "managed_register_mips.h" #include "offsets.h" @@ -263,7 +263,7 @@ class MipsExceptionSlowPath { DISALLOW_COPY_AND_ASSIGN(MipsExceptionSlowPath); }; -class MipsAssembler FINAL : public Assembler, public JNIMacroAssembler<PointerSize::k32> { +class MipsAssembler final : public Assembler, public JNIMacroAssembler<PointerSize::k32> { public: using JNIBase = JNIMacroAssembler<PointerSize::k32>; @@ -285,9 +285,9 @@ class MipsAssembler FINAL : public Assembler, public JNIMacroAssembler<PointerSi cfi().DelayEmittingAdvancePCs(); } - size_t CodeSize() const OVERRIDE { return Assembler::CodeSize(); } - size_t CodePosition() OVERRIDE; - DebugFrameOpCodeWriterForAssembler& cfi() { return Assembler::cfi(); } + size_t CodeSize() const override { return Assembler::CodeSize(); } + size_t CodePosition() override; + DebugFrameOpCodeWriterForAssembler& cfi() override { return Assembler::cfi(); } virtual ~MipsAssembler() { for (auto& branch : branches_) { @@ -756,6 +756,11 @@ class MipsAssembler FINAL : public Assembler, public JNIMacroAssembler<PointerSi void Hadd_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt); void Hadd_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void PcntB(VectorRegister wd, VectorRegister ws); + void PcntH(VectorRegister wd, VectorRegister ws); + void PcntW(VectorRegister wd, VectorRegister ws); + void PcntD(VectorRegister wd, VectorRegister ws); + // Helper for replicating floating point value in all destination elements. void ReplicateFPToVectorRegister(VectorRegister dst, FRegister src, bool is_double); @@ -857,7 +862,7 @@ class MipsAssembler FINAL : public Assembler, public JNIMacroAssembler<PointerSi // We permit `base` and `temp` to coincide (however, we check that neither is AT), // in which case the `base` register may be overwritten in the process. CHECK_NE(temp, AT); // Must not use AT as temp, so as not to overwrite the adjusted base. - AdjustBaseAndOffset(base, offset, /* is_doubleword */ (type == kStoreDoubleword)); + AdjustBaseAndOffset(base, offset, /* is_doubleword= */ (type == kStoreDoubleword)); uint32_t low = Low32Bits(value); uint32_t high = High32Bits(value); Register reg; @@ -912,7 +917,7 @@ class MipsAssembler FINAL : public Assembler, public JNIMacroAssembler<PointerSi Register base, int32_t offset, ImplicitNullChecker null_checker = NoImplicitNullChecker()) { - AdjustBaseAndOffset(base, offset, /* is_doubleword */ (type == kLoadDoubleword)); + AdjustBaseAndOffset(base, offset, /* is_doubleword= */ (type == kLoadDoubleword)); switch (type) { case kLoadSignedByte: Lb(reg, base, offset); @@ -955,7 +960,7 @@ class MipsAssembler FINAL : public Assembler, public JNIMacroAssembler<PointerSi Register base, int32_t offset, ImplicitNullChecker null_checker = NoImplicitNullChecker()) { - AdjustBaseAndOffset(base, offset, /* is_doubleword */ false, /* is_float */ true); + AdjustBaseAndOffset(base, offset, /* is_doubleword= */ false, /* is_float= */ true); Lwc1(reg, base, offset); null_checker(); } @@ -965,7 +970,7 @@ class MipsAssembler FINAL : public Assembler, public JNIMacroAssembler<PointerSi Register base, int32_t offset, ImplicitNullChecker null_checker = NoImplicitNullChecker()) { - AdjustBaseAndOffset(base, offset, /* is_doubleword */ true, /* is_float */ true); + AdjustBaseAndOffset(base, offset, /* is_doubleword= */ true, /* is_float= */ true); if (IsAligned<kMipsDoublewordSize>(offset)) { Ldc1(reg, base, offset); null_checker(); @@ -1011,7 +1016,7 @@ class MipsAssembler FINAL : public Assembler, public JNIMacroAssembler<PointerSi // Must not use AT as `reg`, so as not to overwrite the value being stored // with the adjusted `base`. CHECK_NE(reg, AT); - AdjustBaseAndOffset(base, offset, /* is_doubleword */ (type == kStoreDoubleword)); + AdjustBaseAndOffset(base, offset, /* is_doubleword= */ (type == kStoreDoubleword)); switch (type) { case kStoreByte: Sb(reg, base, offset); @@ -1042,7 +1047,7 @@ class MipsAssembler FINAL : public Assembler, public JNIMacroAssembler<PointerSi Register base, int32_t offset, ImplicitNullChecker null_checker = NoImplicitNullChecker()) { - AdjustBaseAndOffset(base, offset, /* is_doubleword */ false, /* is_float */ true); + AdjustBaseAndOffset(base, offset, /* is_doubleword= */ false, /* is_float= */ true); Swc1(reg, base, offset); null_checker(); } @@ -1052,7 +1057,7 @@ class MipsAssembler FINAL : public Assembler, public JNIMacroAssembler<PointerSi Register base, int32_t offset, ImplicitNullChecker null_checker = NoImplicitNullChecker()) { - AdjustBaseAndOffset(base, offset, /* is_doubleword */ true, /* is_float */ true); + AdjustBaseAndOffset(base, offset, /* is_doubleword= */ true, /* is_float= */ true); if (IsAligned<kMipsDoublewordSize>(offset)) { Sdc1(reg, base, offset); null_checker(); @@ -1138,10 +1143,10 @@ class MipsAssembler FINAL : public Assembler, public JNIMacroAssembler<PointerSi } } - void Bind(Label* label) OVERRIDE { + void Bind(Label* label) override { Bind(down_cast<MipsLabel*>(label)); } - void Jump(Label* label ATTRIBUTE_UNUSED) OVERRIDE { + void Jump(Label* label ATTRIBUTE_UNUSED) override { UNIMPLEMENTED(FATAL) << "Do not use Jump for MIPS"; } @@ -1150,25 +1155,25 @@ class MipsAssembler FINAL : public Assembler, public JNIMacroAssembler<PointerSi using JNIBase::Jump; // Create a new label that can be used with Jump/Bind calls. - std::unique_ptr<JNIMacroLabel> CreateLabel() OVERRIDE { + std::unique_ptr<JNIMacroLabel> CreateLabel() override { LOG(FATAL) << "Not implemented on MIPS32"; UNREACHABLE(); } // Emit an unconditional jump to the label. - void Jump(JNIMacroLabel* label ATTRIBUTE_UNUSED) OVERRIDE { + void Jump(JNIMacroLabel* label ATTRIBUTE_UNUSED) override { LOG(FATAL) << "Not implemented on MIPS32"; UNREACHABLE(); } // Emit a conditional jump to the label by applying a unary condition test to the register. void Jump(JNIMacroLabel* label ATTRIBUTE_UNUSED, JNIMacroUnaryCondition cond ATTRIBUTE_UNUSED, - ManagedRegister test ATTRIBUTE_UNUSED) OVERRIDE { + ManagedRegister test ATTRIBUTE_UNUSED) override { LOG(FATAL) << "Not implemented on MIPS32"; UNREACHABLE(); } // Code at this offset will serve as the target for the Jump call. - void Bind(JNIMacroLabel* label ATTRIBUTE_UNUSED) OVERRIDE { + void Bind(JNIMacroLabel* label ATTRIBUTE_UNUSED) override { LOG(FATAL) << "Not implemented on MIPS32"; UNREACHABLE(); } @@ -1227,108 +1232,108 @@ class MipsAssembler FINAL : public Assembler, public JNIMacroAssembler<PointerSi void BuildFrame(size_t frame_size, ManagedRegister method_reg, ArrayRef<const ManagedRegister> callee_save_regs, - const ManagedRegisterEntrySpills& entry_spills) OVERRIDE; + const ManagedRegisterEntrySpills& entry_spills) override; // Emit code that will remove an activation from the stack. void RemoveFrame(size_t frame_size, ArrayRef<const ManagedRegister> callee_save_regs, - bool may_suspend) OVERRIDE; + bool may_suspend) override; - void IncreaseFrameSize(size_t adjust) OVERRIDE; - void DecreaseFrameSize(size_t adjust) OVERRIDE; + void IncreaseFrameSize(size_t adjust) override; + void DecreaseFrameSize(size_t adjust) override; // Store routines. - void Store(FrameOffset offs, ManagedRegister msrc, size_t size) OVERRIDE; - void StoreRef(FrameOffset dest, ManagedRegister msrc) OVERRIDE; - void StoreRawPtr(FrameOffset dest, ManagedRegister msrc) OVERRIDE; + void Store(FrameOffset offs, ManagedRegister msrc, size_t size) override; + void StoreRef(FrameOffset dest, ManagedRegister msrc) override; + void StoreRawPtr(FrameOffset dest, ManagedRegister msrc) override; - void StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister mscratch) OVERRIDE; + void StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister mscratch) override; void StoreStackOffsetToThread(ThreadOffset32 thr_offs, FrameOffset fr_offs, - ManagedRegister mscratch) OVERRIDE; + ManagedRegister mscratch) override; - void StoreStackPointerToThread(ThreadOffset32 thr_offs) OVERRIDE; + void StoreStackPointerToThread(ThreadOffset32 thr_offs) override; void StoreSpanning(FrameOffset dest, ManagedRegister msrc, FrameOffset in_off, - ManagedRegister mscratch) OVERRIDE; + ManagedRegister mscratch) override; // Load routines. - void Load(ManagedRegister mdest, FrameOffset src, size_t size) OVERRIDE; + void Load(ManagedRegister mdest, FrameOffset src, size_t size) override; - void LoadFromThread(ManagedRegister mdest, ThreadOffset32 src, size_t size) OVERRIDE; + void LoadFromThread(ManagedRegister mdest, ThreadOffset32 src, size_t size) override; - void LoadRef(ManagedRegister dest, FrameOffset src) OVERRIDE; + void LoadRef(ManagedRegister dest, FrameOffset src) override; void LoadRef(ManagedRegister mdest, ManagedRegister base, MemberOffset offs, - bool unpoison_reference) OVERRIDE; + bool unpoison_reference) override; - void LoadRawPtr(ManagedRegister mdest, ManagedRegister base, Offset offs) OVERRIDE; + void LoadRawPtr(ManagedRegister mdest, ManagedRegister base, Offset offs) override; - void LoadRawPtrFromThread(ManagedRegister mdest, ThreadOffset32 offs) OVERRIDE; + void LoadRawPtrFromThread(ManagedRegister mdest, ThreadOffset32 offs) override; // Copying routines. - void Move(ManagedRegister mdest, ManagedRegister msrc, size_t size) OVERRIDE; + void Move(ManagedRegister mdest, ManagedRegister msrc, size_t size) override; void CopyRawPtrFromThread(FrameOffset fr_offs, ThreadOffset32 thr_offs, - ManagedRegister mscratch) OVERRIDE; + ManagedRegister mscratch) override; void CopyRawPtrToThread(ThreadOffset32 thr_offs, FrameOffset fr_offs, - ManagedRegister mscratch) OVERRIDE; + ManagedRegister mscratch) override; - void CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister mscratch) OVERRIDE; + void CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister mscratch) override; - void Copy(FrameOffset dest, FrameOffset src, ManagedRegister mscratch, size_t size) OVERRIDE; + void Copy(FrameOffset dest, FrameOffset src, ManagedRegister mscratch, size_t size) override; void Copy(FrameOffset dest, ManagedRegister src_base, Offset src_offset, ManagedRegister mscratch, - size_t size) OVERRIDE; + size_t size) override; void Copy(ManagedRegister dest_base, Offset dest_offset, FrameOffset src, ManagedRegister mscratch, - size_t size) OVERRIDE; + size_t size) override; void Copy(FrameOffset dest, FrameOffset src_base, Offset src_offset, ManagedRegister mscratch, - size_t size) OVERRIDE; + size_t size) override; void Copy(ManagedRegister dest, Offset dest_offset, ManagedRegister src, Offset src_offset, ManagedRegister mscratch, - size_t size) OVERRIDE; + size_t size) override; void Copy(FrameOffset dest, Offset dest_offset, FrameOffset src, Offset src_offset, ManagedRegister mscratch, - size_t size) OVERRIDE; + size_t size) override; - void MemoryBarrier(ManagedRegister) OVERRIDE; + void MemoryBarrier(ManagedRegister) override; // Sign extension. - void SignExtend(ManagedRegister mreg, size_t size) OVERRIDE; + void SignExtend(ManagedRegister mreg, size_t size) override; // Zero extension. - void ZeroExtend(ManagedRegister mreg, size_t size) OVERRIDE; + void ZeroExtend(ManagedRegister mreg, size_t size) override; // Exploit fast access in managed code to Thread::Current(). - void GetCurrentThread(ManagedRegister tr) OVERRIDE; - void GetCurrentThread(FrameOffset dest_offset, ManagedRegister mscratch) OVERRIDE; + void GetCurrentThread(ManagedRegister tr) override; + void GetCurrentThread(FrameOffset dest_offset, ManagedRegister mscratch) override; // Set up out_reg to hold a Object** into the handle scope, or to be null if the // value is null and null_allowed. in_reg holds a possibly stale reference @@ -1337,37 +1342,37 @@ class MipsAssembler FINAL : public Assembler, public JNIMacroAssembler<PointerSi void CreateHandleScopeEntry(ManagedRegister out_reg, FrameOffset handlescope_offset, ManagedRegister in_reg, - bool null_allowed) OVERRIDE; + bool null_allowed) override; // Set up out_off to hold a Object** into the handle scope, or to be null if the // value is null and null_allowed. void CreateHandleScopeEntry(FrameOffset out_off, FrameOffset handlescope_offset, ManagedRegister mscratch, - bool null_allowed) OVERRIDE; + bool null_allowed) override; // src holds a handle scope entry (Object**) load this into dst. - void LoadReferenceFromHandleScope(ManagedRegister dst, ManagedRegister src) OVERRIDE; + void LoadReferenceFromHandleScope(ManagedRegister dst, ManagedRegister src) override; // Heap::VerifyObject on src. In some cases (such as a reference to this) we // know that src may not be null. - void VerifyObject(ManagedRegister src, bool could_be_null) OVERRIDE; - void VerifyObject(FrameOffset src, bool could_be_null) OVERRIDE; + void VerifyObject(ManagedRegister src, bool could_be_null) override; + void VerifyObject(FrameOffset src, bool could_be_null) override; // Call to address held at [base+offset]. - void Call(ManagedRegister base, Offset offset, ManagedRegister mscratch) OVERRIDE; - void Call(FrameOffset base, Offset offset, ManagedRegister mscratch) OVERRIDE; - void CallFromThread(ThreadOffset32 offset, ManagedRegister mscratch) OVERRIDE; + void Call(ManagedRegister base, Offset offset, ManagedRegister mscratch) override; + void Call(FrameOffset base, Offset offset, ManagedRegister mscratch) override; + void CallFromThread(ThreadOffset32 offset, ManagedRegister mscratch) override; // Generate code to check if Thread::Current()->exception_ is non-null // and branch to a ExceptionSlowPath if it is. - void ExceptionPoll(ManagedRegister mscratch, size_t stack_adjust) OVERRIDE; + void ExceptionPoll(ManagedRegister mscratch, size_t stack_adjust) override; // Emit slow paths queued during assembly and promote short branches to long if needed. - void FinalizeCode() OVERRIDE; + void FinalizeCode() override; // Emit branches and finalize all instructions. - void FinalizeInstructions(const MemoryRegion& region); + void FinalizeInstructions(const MemoryRegion& region) override; // Returns the (always-)current location of a label (can be used in class CodeGeneratorMIPS, // must be used instead of MipsLabel::GetPosition()). diff --git a/compiler/utils/mips/assembler_mips32r5_test.cc b/compiler/utils/mips/assembler_mips32r5_test.cc index 9a69ffd3dd..98fc44ba5d 100644 --- a/compiler/utils/mips/assembler_mips32r5_test.cc +++ b/compiler/utils/mips/assembler_mips32r5_test.cc @@ -38,12 +38,22 @@ class AssemblerMIPS32r5Test : public AssemblerTest<mips::MipsAssembler, uint32_t, mips::VectorRegister> { public: - typedef AssemblerTest<mips::MipsAssembler, - mips::MipsLabel, - mips::Register, - mips::FRegister, - uint32_t, - mips::VectorRegister> Base; + using Base = AssemblerTest<mips::MipsAssembler, + mips::MipsLabel, + mips::Register, + mips::FRegister, + uint32_t, + mips::VectorRegister>; + + // These tests were taking too long, so we hide the DriverStr() from AssemblerTest<> + // and reimplement it without the verification against `assembly_string`. b/73903608 + void DriverStr(const std::string& assembly_string ATTRIBUTE_UNUSED, + const std::string& test_name ATTRIBUTE_UNUSED) { + GetAssembler()->FinalizeCode(); + std::vector<uint8_t> data(GetAssembler()->CodeSize()); + MemoryRegion code(data.data(), data.size()); + GetAssembler()->FinalizeInstructions(code); + } AssemblerMIPS32r5Test() : instruction_set_features_(MipsInstructionSetFeatures::FromVariant("mips32r5", nullptr)) { @@ -51,15 +61,15 @@ class AssemblerMIPS32r5Test : public AssemblerTest<mips::MipsAssembler, protected: // Get the typically used name for this architecture, e.g., aarch64, x86-64, ... - std::string GetArchitectureString() OVERRIDE { + std::string GetArchitectureString() override { return "mips"; } - std::string GetAssemblerParameters() OVERRIDE { + std::string GetAssemblerParameters() override { return " --no-warn -32 -march=mips32r5 -mmsa"; } - void Pad(std::vector<uint8_t>& data) OVERRIDE { + void Pad(std::vector<uint8_t>& data) override { // The GNU linker unconditionally pads the code segment with NOPs to a size that is a multiple // of 16 and there doesn't appear to be a way to suppress this padding. Our assembler doesn't // pad, so, in order for two assembler outputs to match, we need to match the padding as well. @@ -68,15 +78,15 @@ class AssemblerMIPS32r5Test : public AssemblerTest<mips::MipsAssembler, data.insert(data.end(), pad_size, 0); } - std::string GetDisassembleParameters() OVERRIDE { + std::string GetDisassembleParameters() override { return " -D -bbinary -mmips:isa32r5"; } - mips::MipsAssembler* CreateAssembler(ArenaAllocator* allocator) OVERRIDE { + mips::MipsAssembler* CreateAssembler(ArenaAllocator* allocator) override { return new (allocator) mips::MipsAssembler(allocator, instruction_set_features_.get()); } - void SetUpHelpers() OVERRIDE { + void SetUpHelpers() override { if (registers_.size() == 0) { registers_.push_back(new mips::Register(mips::ZERO)); registers_.push_back(new mips::Register(mips::AT)); @@ -212,35 +222,35 @@ class AssemblerMIPS32r5Test : public AssemblerTest<mips::MipsAssembler, } } - void TearDown() OVERRIDE { + void TearDown() override { AssemblerTest::TearDown(); STLDeleteElements(®isters_); STLDeleteElements(&fp_registers_); STLDeleteElements(&vec_registers_); } - std::vector<mips::MipsLabel> GetAddresses() { + std::vector<mips::MipsLabel> GetAddresses() override { UNIMPLEMENTED(FATAL) << "Feature not implemented yet"; UNREACHABLE(); } - std::vector<mips::Register*> GetRegisters() OVERRIDE { + std::vector<mips::Register*> GetRegisters() override { return registers_; } - std::vector<mips::FRegister*> GetFPRegisters() OVERRIDE { + std::vector<mips::FRegister*> GetFPRegisters() override { return fp_registers_; } - std::vector<mips::VectorRegister*> GetVectorRegisters() OVERRIDE { + std::vector<mips::VectorRegister*> GetVectorRegisters() override { return vec_registers_; } - uint32_t CreateImmediate(int64_t imm_value) OVERRIDE { + uint32_t CreateImmediate(int64_t imm_value) override { return imm_value; } - std::string GetSecondaryRegisterName(const mips::Register& reg) OVERRIDE { + std::string GetSecondaryRegisterName(const mips::Register& reg) override { CHECK(secondary_register_names_.find(reg) != secondary_register_names_.end()); return secondary_register_names_[reg]; } diff --git a/compiler/utils/mips/assembler_mips32r6_test.cc b/compiler/utils/mips/assembler_mips32r6_test.cc index 937ee25bcb..4e27bbf28d 100644 --- a/compiler/utils/mips/assembler_mips32r6_test.cc +++ b/compiler/utils/mips/assembler_mips32r6_test.cc @@ -38,12 +38,22 @@ class AssemblerMIPS32r6Test : public AssemblerTest<mips::MipsAssembler, uint32_t, mips::VectorRegister> { public: - typedef AssemblerTest<mips::MipsAssembler, - mips::MipsLabel, - mips::Register, - mips::FRegister, - uint32_t, - mips::VectorRegister> Base; + using Base = AssemblerTest<mips::MipsAssembler, + mips::MipsLabel, + mips::Register, + mips::FRegister, + uint32_t, + mips::VectorRegister>; + + // These tests were taking too long, so we hide the DriverStr() from AssemblerTest<> + // and reimplement it without the verification against `assembly_string`. b/73903608 + void DriverStr(const std::string& assembly_string ATTRIBUTE_UNUSED, + const std::string& test_name ATTRIBUTE_UNUSED) { + GetAssembler()->FinalizeCode(); + std::vector<uint8_t> data(GetAssembler()->CodeSize()); + MemoryRegion code(data.data(), data.size()); + GetAssembler()->FinalizeInstructions(code); + } AssemblerMIPS32r6Test() : instruction_set_features_(MipsInstructionSetFeatures::FromVariant("mips32r6", nullptr)) { @@ -51,16 +61,16 @@ class AssemblerMIPS32r6Test : public AssemblerTest<mips::MipsAssembler, protected: // Get the typically used name for this architecture, e.g., aarch64, x86-64, ... - std::string GetArchitectureString() OVERRIDE { + std::string GetArchitectureString() override { return "mips"; } - std::string GetAssemblerCmdName() OVERRIDE { + std::string GetAssemblerCmdName() override { // We assemble and link for MIPS32R6. See GetAssemblerParameters() for details. return "gcc"; } - std::string GetAssemblerParameters() OVERRIDE { + std::string GetAssemblerParameters() override { // We assemble and link for MIPS32R6. The reason is that object files produced for MIPS32R6 // (and MIPS64R6) with the GNU assembler don't have correct final offsets in PC-relative // branches in the .text section and so they require a relocation pass (there's a relocation @@ -72,7 +82,7 @@ class AssemblerMIPS32r6Test : public AssemblerTest<mips::MipsAssembler, " -Wl,-Ttext=0x1000000 -Wl,-e0x1000000 -nostdlib"; } - void Pad(std::vector<uint8_t>& data) OVERRIDE { + void Pad(std::vector<uint8_t>& data) override { // The GNU linker unconditionally pads the code segment with NOPs to a size that is a multiple // of 16 and there doesn't appear to be a way to suppress this padding. Our assembler doesn't // pad, so, in order for two assembler outputs to match, we need to match the padding as well. @@ -81,15 +91,15 @@ class AssemblerMIPS32r6Test : public AssemblerTest<mips::MipsAssembler, data.insert(data.end(), pad_size, 0); } - std::string GetDisassembleParameters() OVERRIDE { + std::string GetDisassembleParameters() override { return " -D -bbinary -mmips:isa32r6"; } - mips::MipsAssembler* CreateAssembler(ArenaAllocator* allocator) OVERRIDE { + mips::MipsAssembler* CreateAssembler(ArenaAllocator* allocator) override { return new (allocator) mips::MipsAssembler(allocator, instruction_set_features_.get()); } - void SetUpHelpers() OVERRIDE { + void SetUpHelpers() override { if (registers_.size() == 0) { registers_.push_back(new mips::Register(mips::ZERO)); registers_.push_back(new mips::Register(mips::AT)); @@ -225,35 +235,35 @@ class AssemblerMIPS32r6Test : public AssemblerTest<mips::MipsAssembler, } } - void TearDown() OVERRIDE { + void TearDown() override { AssemblerTest::TearDown(); STLDeleteElements(®isters_); STLDeleteElements(&fp_registers_); STLDeleteElements(&vec_registers_); } - std::vector<mips::MipsLabel> GetAddresses() { + std::vector<mips::MipsLabel> GetAddresses() override { UNIMPLEMENTED(FATAL) << "Feature not implemented yet"; UNREACHABLE(); } - std::vector<mips::Register*> GetRegisters() OVERRIDE { + std::vector<mips::Register*> GetRegisters() override { return registers_; } - std::vector<mips::FRegister*> GetFPRegisters() OVERRIDE { + std::vector<mips::FRegister*> GetFPRegisters() override { return fp_registers_; } - std::vector<mips::VectorRegister*> GetVectorRegisters() OVERRIDE { + std::vector<mips::VectorRegister*> GetVectorRegisters() override { return vec_registers_; } - uint32_t CreateImmediate(int64_t imm_value) OVERRIDE { + uint32_t CreateImmediate(int64_t imm_value) override { return imm_value; } - std::string GetSecondaryRegisterName(const mips::Register& reg) OVERRIDE { + std::string GetSecondaryRegisterName(const mips::Register& reg) override { CHECK(secondary_register_names_.find(reg) != secondary_register_names_.end()); return secondary_register_names_[reg]; } @@ -1068,11 +1078,11 @@ TEST_F(AssemblerMIPS32r6Test, StoreQToOffset) { ////////////// TEST_F(AssemblerMIPS32r6Test, Bc) { - BranchHelper(&mips::MipsAssembler::Bc, "Bc", /* has_slot */ false); + BranchHelper(&mips::MipsAssembler::Bc, "Bc", /* has_slot= */ false); } TEST_F(AssemblerMIPS32r6Test, Balc) { - BranchHelper(&mips::MipsAssembler::Balc, "Balc", /* has_slot */ false); + BranchHelper(&mips::MipsAssembler::Balc, "Balc", /* has_slot= */ false); } TEST_F(AssemblerMIPS32r6Test, Beqc) { @@ -1132,11 +1142,11 @@ TEST_F(AssemblerMIPS32r6Test, Bc1nez) { } TEST_F(AssemblerMIPS32r6Test, B) { - BranchHelper(&mips::MipsAssembler::B, "Bc", /* has_slot */ false); + BranchHelper(&mips::MipsAssembler::B, "Bc", /* has_slot= */ false); } TEST_F(AssemblerMIPS32r6Test, Bal) { - BranchHelper(&mips::MipsAssembler::Bal, "Balc", /* has_slot */ false); + BranchHelper(&mips::MipsAssembler::Bal, "Balc", /* has_slot= */ false); } TEST_F(AssemblerMIPS32r6Test, Beq) { @@ -1188,123 +1198,123 @@ TEST_F(AssemblerMIPS32r6Test, Bgeu) { } TEST_F(AssemblerMIPS32r6Test, BareBc) { - BranchHelper(&mips::MipsAssembler::Bc, "Bc", /* has_slot */ false, /* is_bare */ true); + BranchHelper(&mips::MipsAssembler::Bc, "Bc", /* has_slot= */ false, /* is_bare= */ true); } TEST_F(AssemblerMIPS32r6Test, BareBalc) { - BranchHelper(&mips::MipsAssembler::Balc, "Balc", /* has_slot */ false, /* is_bare */ true); + BranchHelper(&mips::MipsAssembler::Balc, "Balc", /* has_slot= */ false, /* is_bare= */ true); } TEST_F(AssemblerMIPS32r6Test, BareBeqc) { - BranchCondTwoRegsHelper(&mips::MipsAssembler::Beqc, "Beqc", /* is_bare */ true); + BranchCondTwoRegsHelper(&mips::MipsAssembler::Beqc, "Beqc", /* is_bare= */ true); } TEST_F(AssemblerMIPS32r6Test, BareBnec) { - BranchCondTwoRegsHelper(&mips::MipsAssembler::Bnec, "Bnec", /* is_bare */ true); + BranchCondTwoRegsHelper(&mips::MipsAssembler::Bnec, "Bnec", /* is_bare= */ true); } TEST_F(AssemblerMIPS32r6Test, BareBeqzc) { - BranchCondOneRegHelper(&mips::MipsAssembler::Beqzc, "Beqzc", /* is_bare */ true); + BranchCondOneRegHelper(&mips::MipsAssembler::Beqzc, "Beqzc", /* is_bare= */ true); } TEST_F(AssemblerMIPS32r6Test, BareBnezc) { - BranchCondOneRegHelper(&mips::MipsAssembler::Bnezc, "Bnezc", /* is_bare */ true); + BranchCondOneRegHelper(&mips::MipsAssembler::Bnezc, "Bnezc", /* is_bare= */ true); } TEST_F(AssemblerMIPS32r6Test, BareBltzc) { - BranchCondOneRegHelper(&mips::MipsAssembler::Bltzc, "Bltzc", /* is_bare */ true); + BranchCondOneRegHelper(&mips::MipsAssembler::Bltzc, "Bltzc", /* is_bare= */ true); } TEST_F(AssemblerMIPS32r6Test, BareBgezc) { - BranchCondOneRegHelper(&mips::MipsAssembler::Bgezc, "Bgezc", /* is_bare */ true); + BranchCondOneRegHelper(&mips::MipsAssembler::Bgezc, "Bgezc", /* is_bare= */ true); } TEST_F(AssemblerMIPS32r6Test, BareBlezc) { - BranchCondOneRegHelper(&mips::MipsAssembler::Blezc, "Blezc", /* is_bare */ true); + BranchCondOneRegHelper(&mips::MipsAssembler::Blezc, "Blezc", /* is_bare= */ true); } TEST_F(AssemblerMIPS32r6Test, BareBgtzc) { - BranchCondOneRegHelper(&mips::MipsAssembler::Bgtzc, "Bgtzc", /* is_bare */ true); + BranchCondOneRegHelper(&mips::MipsAssembler::Bgtzc, "Bgtzc", /* is_bare= */ true); } TEST_F(AssemblerMIPS32r6Test, BareBltc) { - BranchCondTwoRegsHelper(&mips::MipsAssembler::Bltc, "Bltc", /* is_bare */ true); + BranchCondTwoRegsHelper(&mips::MipsAssembler::Bltc, "Bltc", /* is_bare= */ true); } TEST_F(AssemblerMIPS32r6Test, BareBgec) { - BranchCondTwoRegsHelper(&mips::MipsAssembler::Bgec, "Bgec", /* is_bare */ true); + BranchCondTwoRegsHelper(&mips::MipsAssembler::Bgec, "Bgec", /* is_bare= */ true); } TEST_F(AssemblerMIPS32r6Test, BareBltuc) { - BranchCondTwoRegsHelper(&mips::MipsAssembler::Bltuc, "Bltuc", /* is_bare */ true); + BranchCondTwoRegsHelper(&mips::MipsAssembler::Bltuc, "Bltuc", /* is_bare= */ true); } TEST_F(AssemblerMIPS32r6Test, BareBgeuc) { - BranchCondTwoRegsHelper(&mips::MipsAssembler::Bgeuc, "Bgeuc", /* is_bare */ true); + BranchCondTwoRegsHelper(&mips::MipsAssembler::Bgeuc, "Bgeuc", /* is_bare= */ true); } TEST_F(AssemblerMIPS32r6Test, BareBc1eqz) { - BranchFpuCondHelper(&mips::MipsAssembler::Bc1eqz, "Bc1eqz", /* is_bare */ true); + BranchFpuCondHelper(&mips::MipsAssembler::Bc1eqz, "Bc1eqz", /* is_bare= */ true); } TEST_F(AssemblerMIPS32r6Test, BareBc1nez) { - BranchFpuCondHelper(&mips::MipsAssembler::Bc1nez, "Bc1nez", /* is_bare */ true); + BranchFpuCondHelper(&mips::MipsAssembler::Bc1nez, "Bc1nez", /* is_bare= */ true); } TEST_F(AssemblerMIPS32r6Test, BareB) { - BranchHelper(&mips::MipsAssembler::B, "B", /* has_slot */ true, /* is_bare */ true); + BranchHelper(&mips::MipsAssembler::B, "B", /* has_slot= */ true, /* is_bare= */ true); } TEST_F(AssemblerMIPS32r6Test, BareBal) { - BranchHelper(&mips::MipsAssembler::Bal, "Bal", /* has_slot */ true, /* is_bare */ true); + BranchHelper(&mips::MipsAssembler::Bal, "Bal", /* has_slot= */ true, /* is_bare= */ true); } TEST_F(AssemblerMIPS32r6Test, BareBeq) { - BranchCondTwoRegsHelper(&mips::MipsAssembler::Beq, "Beq", /* is_bare */ true); + BranchCondTwoRegsHelper(&mips::MipsAssembler::Beq, "Beq", /* is_bare= */ true); } TEST_F(AssemblerMIPS32r6Test, BareBne) { - BranchCondTwoRegsHelper(&mips::MipsAssembler::Bne, "Bne", /* is_bare */ true); + BranchCondTwoRegsHelper(&mips::MipsAssembler::Bne, "Bne", /* is_bare= */ true); } TEST_F(AssemblerMIPS32r6Test, BareBeqz) { - BranchCondOneRegHelper(&mips::MipsAssembler::Beqz, "Beqz", /* is_bare */ true); + BranchCondOneRegHelper(&mips::MipsAssembler::Beqz, "Beqz", /* is_bare= */ true); } TEST_F(AssemblerMIPS32r6Test, BareBnez) { - BranchCondOneRegHelper(&mips::MipsAssembler::Bnez, "Bnez", /* is_bare */ true); + BranchCondOneRegHelper(&mips::MipsAssembler::Bnez, "Bnez", /* is_bare= */ true); } TEST_F(AssemblerMIPS32r6Test, BareBltz) { - BranchCondOneRegHelper(&mips::MipsAssembler::Bltz, "Bltz", /* is_bare */ true); + BranchCondOneRegHelper(&mips::MipsAssembler::Bltz, "Bltz", /* is_bare= */ true); } TEST_F(AssemblerMIPS32r6Test, BareBgez) { - BranchCondOneRegHelper(&mips::MipsAssembler::Bgez, "Bgez", /* is_bare */ true); + BranchCondOneRegHelper(&mips::MipsAssembler::Bgez, "Bgez", /* is_bare= */ true); } TEST_F(AssemblerMIPS32r6Test, BareBlez) { - BranchCondOneRegHelper(&mips::MipsAssembler::Blez, "Blez", /* is_bare */ true); + BranchCondOneRegHelper(&mips::MipsAssembler::Blez, "Blez", /* is_bare= */ true); } TEST_F(AssemblerMIPS32r6Test, BareBgtz) { - BranchCondOneRegHelper(&mips::MipsAssembler::Bgtz, "Bgtz", /* is_bare */ true); + BranchCondOneRegHelper(&mips::MipsAssembler::Bgtz, "Bgtz", /* is_bare= */ true); } TEST_F(AssemblerMIPS32r6Test, BareBlt) { - BranchCondTwoRegsHelper(&mips::MipsAssembler::Blt, "Blt", /* is_bare */ true); + BranchCondTwoRegsHelper(&mips::MipsAssembler::Blt, "Blt", /* is_bare= */ true); } TEST_F(AssemblerMIPS32r6Test, BareBge) { - BranchCondTwoRegsHelper(&mips::MipsAssembler::Bge, "Bge", /* is_bare */ true); + BranchCondTwoRegsHelper(&mips::MipsAssembler::Bge, "Bge", /* is_bare= */ true); } TEST_F(AssemblerMIPS32r6Test, BareBltu) { - BranchCondTwoRegsHelper(&mips::MipsAssembler::Bltu, "Bltu", /* is_bare */ true); + BranchCondTwoRegsHelper(&mips::MipsAssembler::Bltu, "Bltu", /* is_bare= */ true); } TEST_F(AssemblerMIPS32r6Test, BareBgeu) { - BranchCondTwoRegsHelper(&mips::MipsAssembler::Bgeu, "Bgeu", /* is_bare */ true); + BranchCondTwoRegsHelper(&mips::MipsAssembler::Bgeu, "Bgeu", /* is_bare= */ true); } TEST_F(AssemblerMIPS32r6Test, LongBeqc) { @@ -2277,6 +2287,22 @@ TEST_F(AssemblerMIPS32r6Test, FillW) { DriverStr(RepeatVR(&mips::MipsAssembler::FillW, "fill.w ${reg1}, ${reg2}"), "fill.w"); } +TEST_F(AssemblerMIPS32r6Test, PcntB) { + DriverStr(RepeatVV(&mips::MipsAssembler::PcntB, "pcnt.b ${reg1}, ${reg2}"), "pcnt.b"); +} + +TEST_F(AssemblerMIPS32r6Test, PcntH) { + DriverStr(RepeatVV(&mips::MipsAssembler::PcntH, "pcnt.h ${reg1}, ${reg2}"), "pcnt.h"); +} + +TEST_F(AssemblerMIPS32r6Test, PcntW) { + DriverStr(RepeatVV(&mips::MipsAssembler::PcntW, "pcnt.w ${reg1}, ${reg2}"), "pcnt.w"); +} + +TEST_F(AssemblerMIPS32r6Test, PcntD) { + DriverStr(RepeatVV(&mips::MipsAssembler::PcntD, "pcnt.d ${reg1}, ${reg2}"), "pcnt.d"); +} + TEST_F(AssemblerMIPS32r6Test, LdiB) { DriverStr(RepeatVIb(&mips::MipsAssembler::LdiB, -8, "ldi.b ${reg}, {imm}"), "ldi.b"); } diff --git a/compiler/utils/mips/assembler_mips_test.cc b/compiler/utils/mips/assembler_mips_test.cc index b027d3a549..c0894d309e 100644 --- a/compiler/utils/mips/assembler_mips_test.cc +++ b/compiler/utils/mips/assembler_mips_test.cc @@ -37,27 +37,37 @@ class AssemblerMIPSTest : public AssemblerTest<mips::MipsAssembler, mips::FRegister, uint32_t> { public: - typedef AssemblerTest<mips::MipsAssembler, - mips::MipsLabel, - mips::Register, - mips::FRegister, - uint32_t> Base; + using Base = AssemblerTest<mips::MipsAssembler, + mips::MipsLabel, + mips::Register, + mips::FRegister, + uint32_t>; + + // These tests were taking too long, so we hide the DriverStr() from AssemblerTest<> + // and reimplement it without the verification against `assembly_string`. b/73903608 + void DriverStr(const std::string& assembly_string ATTRIBUTE_UNUSED, + const std::string& test_name ATTRIBUTE_UNUSED) { + GetAssembler()->FinalizeCode(); + std::vector<uint8_t> data(GetAssembler()->CodeSize()); + MemoryRegion code(data.data(), data.size()); + GetAssembler()->FinalizeInstructions(code); + } protected: // Get the typically used name for this architecture, e.g., aarch64, x86-64, ... - std::string GetArchitectureString() OVERRIDE { + std::string GetArchitectureString() override { return "mips"; } - std::string GetAssemblerParameters() OVERRIDE { + std::string GetAssemblerParameters() override { return " --no-warn -32 -march=mips32r2"; } - std::string GetDisassembleParameters() OVERRIDE { + std::string GetDisassembleParameters() override { return " -D -bbinary -mmips:isa32r2"; } - void SetUpHelpers() OVERRIDE { + void SetUpHelpers() override { if (registers_.size() == 0) { registers_.push_back(new mips::Register(mips::ZERO)); registers_.push_back(new mips::Register(mips::AT)); @@ -160,30 +170,30 @@ class AssemblerMIPSTest : public AssemblerTest<mips::MipsAssembler, } } - void TearDown() OVERRIDE { + void TearDown() override { AssemblerTest::TearDown(); STLDeleteElements(®isters_); STLDeleteElements(&fp_registers_); } - std::vector<mips::MipsLabel> GetAddresses() { + std::vector<mips::MipsLabel> GetAddresses() override { UNIMPLEMENTED(FATAL) << "Feature not implemented yet"; UNREACHABLE(); } - std::vector<mips::Register*> GetRegisters() OVERRIDE { + std::vector<mips::Register*> GetRegisters() override { return registers_; } - std::vector<mips::FRegister*> GetFPRegisters() OVERRIDE { + std::vector<mips::FRegister*> GetFPRegisters() override { return fp_registers_; } - uint32_t CreateImmediate(int64_t imm_value) OVERRIDE { + uint32_t CreateImmediate(int64_t imm_value) override { return imm_value; } - std::string GetSecondaryRegisterName(const mips::Register& reg) OVERRIDE { + std::string GetSecondaryRegisterName(const mips::Register& reg) override { CHECK(secondary_register_names_.find(reg) != secondary_register_names_.end()); return secondary_register_names_[reg]; } @@ -2231,67 +2241,67 @@ TEST_F(AssemblerMIPSTest, Bc1t) { } TEST_F(AssemblerMIPSTest, BareB) { - BranchHelper(&mips::MipsAssembler::B, "B", /* is_bare */ true); + BranchHelper(&mips::MipsAssembler::B, "B", /* is_bare= */ true); } TEST_F(AssemblerMIPSTest, BareBal) { - BranchHelper(&mips::MipsAssembler::Bal, "Bal", /* is_bare */ true); + BranchHelper(&mips::MipsAssembler::Bal, "Bal", /* is_bare= */ true); } TEST_F(AssemblerMIPSTest, BareBeq) { - BranchCondTwoRegsHelper(&mips::MipsAssembler::Beq, "Beq", /* is_bare */ true); + BranchCondTwoRegsHelper(&mips::MipsAssembler::Beq, "Beq", /* is_bare= */ true); } TEST_F(AssemblerMIPSTest, BareBne) { - BranchCondTwoRegsHelper(&mips::MipsAssembler::Bne, "Bne", /* is_bare */ true); + BranchCondTwoRegsHelper(&mips::MipsAssembler::Bne, "Bne", /* is_bare= */ true); } TEST_F(AssemblerMIPSTest, BareBeqz) { - BranchCondOneRegHelper(&mips::MipsAssembler::Beqz, "Beqz", /* is_bare */ true); + BranchCondOneRegHelper(&mips::MipsAssembler::Beqz, "Beqz", /* is_bare= */ true); } TEST_F(AssemblerMIPSTest, BareBnez) { - BranchCondOneRegHelper(&mips::MipsAssembler::Bnez, "Bnez", /* is_bare */ true); + BranchCondOneRegHelper(&mips::MipsAssembler::Bnez, "Bnez", /* is_bare= */ true); } TEST_F(AssemblerMIPSTest, BareBltz) { - BranchCondOneRegHelper(&mips::MipsAssembler::Bltz, "Bltz", /* is_bare */ true); + BranchCondOneRegHelper(&mips::MipsAssembler::Bltz, "Bltz", /* is_bare= */ true); } TEST_F(AssemblerMIPSTest, BareBgez) { - BranchCondOneRegHelper(&mips::MipsAssembler::Bgez, "Bgez", /* is_bare */ true); + BranchCondOneRegHelper(&mips::MipsAssembler::Bgez, "Bgez", /* is_bare= */ true); } TEST_F(AssemblerMIPSTest, BareBlez) { - BranchCondOneRegHelper(&mips::MipsAssembler::Blez, "Blez", /* is_bare */ true); + BranchCondOneRegHelper(&mips::MipsAssembler::Blez, "Blez", /* is_bare= */ true); } TEST_F(AssemblerMIPSTest, BareBgtz) { - BranchCondOneRegHelper(&mips::MipsAssembler::Bgtz, "Bgtz", /* is_bare */ true); + BranchCondOneRegHelper(&mips::MipsAssembler::Bgtz, "Bgtz", /* is_bare= */ true); } TEST_F(AssemblerMIPSTest, BareBlt) { - BranchCondTwoRegsHelper(&mips::MipsAssembler::Blt, "Blt", /* is_bare */ true); + BranchCondTwoRegsHelper(&mips::MipsAssembler::Blt, "Blt", /* is_bare= */ true); } TEST_F(AssemblerMIPSTest, BareBge) { - BranchCondTwoRegsHelper(&mips::MipsAssembler::Bge, "Bge", /* is_bare */ true); + BranchCondTwoRegsHelper(&mips::MipsAssembler::Bge, "Bge", /* is_bare= */ true); } TEST_F(AssemblerMIPSTest, BareBltu) { - BranchCondTwoRegsHelper(&mips::MipsAssembler::Bltu, "Bltu", /* is_bare */ true); + BranchCondTwoRegsHelper(&mips::MipsAssembler::Bltu, "Bltu", /* is_bare= */ true); } TEST_F(AssemblerMIPSTest, BareBgeu) { - BranchCondTwoRegsHelper(&mips::MipsAssembler::Bgeu, "Bgeu", /* is_bare */ true); + BranchCondTwoRegsHelper(&mips::MipsAssembler::Bgeu, "Bgeu", /* is_bare= */ true); } TEST_F(AssemblerMIPSTest, BareBc1f) { - BranchFpuCondCodeHelper(&mips::MipsAssembler::Bc1f, "Bc1f", /* is_bare */ true); + BranchFpuCondCodeHelper(&mips::MipsAssembler::Bc1f, "Bc1f", /* is_bare= */ true); } TEST_F(AssemblerMIPSTest, BareBc1t) { - BranchFpuCondCodeHelper(&mips::MipsAssembler::Bc1t, "Bc1t", /* is_bare */ true); + BranchFpuCondCodeHelper(&mips::MipsAssembler::Bc1t, "Bc1t", /* is_bare= */ true); } TEST_F(AssemblerMIPSTest, ImpossibleReordering) { diff --git a/compiler/utils/mips/constants_mips.h b/compiler/utils/mips/constants_mips.h index 016c0dbb2e..07d8b7de0e 100644 --- a/compiler/utils/mips/constants_mips.h +++ b/compiler/utils/mips/constants_mips.h @@ -22,8 +22,8 @@ #include <android-base/logging.h> #include "arch/mips/registers_mips.h" +#include "base/globals.h" #include "base/macros.h" -#include "globals.h" namespace art { namespace mips { diff --git a/compiler/utils/mips/managed_register_mips.cc b/compiler/utils/mips/managed_register_mips.cc index 5a8c0481a5..9b3ed79d2f 100644 --- a/compiler/utils/mips/managed_register_mips.cc +++ b/compiler/utils/mips/managed_register_mips.cc @@ -16,7 +16,7 @@ #include "managed_register_mips.h" -#include "globals.h" +#include "base/globals.h" namespace art { namespace mips { diff --git a/compiler/utils/mips/managed_register_mips.h b/compiler/utils/mips/managed_register_mips.h index 66204e70e3..18d5821e61 100644 --- a/compiler/utils/mips/managed_register_mips.h +++ b/compiler/utils/mips/managed_register_mips.h @@ -18,7 +18,6 @@ #define ART_COMPILER_UTILS_MIPS_MANAGED_REGISTER_MIPS_H_ #include "constants_mips.h" -#include "debug/dwarf/register.h" #include "utils/managed_register.h" namespace art { diff --git a/compiler/utils/mips64/assembler_mips64.cc b/compiler/utils/mips64/assembler_mips64.cc index e1b0e75108..70313ca093 100644 --- a/compiler/utils/mips64/assembler_mips64.cc +++ b/compiler/utils/mips64/assembler_mips64.cc @@ -18,9 +18,9 @@ #include "base/bit_utils.h" #include "base/casts.h" +#include "base/memory_region.h" #include "entrypoints/quick/quick_entrypoints.h" #include "entrypoints/quick/quick_entrypoints_enum.h" -#include "memory_region.h" #include "thread.h" namespace art { @@ -52,7 +52,7 @@ void Mips64Assembler::PatchCFI() { return; } - typedef DebugFrameOpCodeWriterForAssembler::DelayedAdvancePC DelayedAdvancePC; + using DelayedAdvancePC = DebugFrameOpCodeWriterForAssembler::DelayedAdvancePC; const auto data = cfi().ReleaseStreamAndPrepareForDelayedAdvancePC(); const std::vector<uint8_t>& old_stream = data.first; const std::vector<DelayedAdvancePC>& advances = data.second; @@ -2279,6 +2279,26 @@ void Mips64Assembler::Hadd_uD(VectorRegister wd, VectorRegister ws, VectorRegist EmitMsa3R(0x5, 0x3, wt, ws, wd, 0x15); } +void Mips64Assembler::PcntB(VectorRegister wd, VectorRegister ws) { + CHECK(HasMsa()); + EmitMsa2R(0xc1, 0x0, ws, wd, 0x1e); +} + +void Mips64Assembler::PcntH(VectorRegister wd, VectorRegister ws) { + CHECK(HasMsa()); + EmitMsa2R(0xc1, 0x1, ws, wd, 0x1e); +} + +void Mips64Assembler::PcntW(VectorRegister wd, VectorRegister ws) { + CHECK(HasMsa()); + EmitMsa2R(0xc1, 0x2, ws, wd, 0x1e); +} + +void Mips64Assembler::PcntD(VectorRegister wd, VectorRegister ws) { + CHECK(HasMsa()); + EmitMsa2R(0xc1, 0x3, ws, wd, 0x1e); +} + void Mips64Assembler::ReplicateFPToVectorRegister(VectorRegister dst, FpuRegister src, bool is_double) { @@ -2435,7 +2455,7 @@ Mips64Assembler::Branch::Branch(uint32_t location, uint32_t target, bool is_call condition_(kUncond) { InitializeType( (is_call ? (is_bare ? kBareCall : kCall) : (is_bare ? kBareCondBranch : kCondBranch)), - /* is_r6 */ true); + /* is_r6= */ true); } Mips64Assembler::Branch::Branch(bool is_r6, @@ -2496,7 +2516,7 @@ Mips64Assembler::Branch::Branch(uint32_t location, GpuRegister dest_reg, Type la rhs_reg_(ZERO), condition_(kUncond) { CHECK_NE(dest_reg, ZERO); - InitializeType(label_or_literal_type, /* is_r6 */ true); + InitializeType(label_or_literal_type, /* is_r6= */ true); } Mips64Assembler::BranchCondition Mips64Assembler::Branch::OppositeCondition( @@ -2869,14 +2889,14 @@ void Mips64Assembler::FinalizeLabeledBranch(Mips64Label* label) { label->LinkTo(branch_id); } // Reserve space for the branch. - while (length--) { + for (; length != 0u; --length) { Nop(); } } void Mips64Assembler::Buncond(Mips64Label* label, bool is_bare) { uint32_t target = label->IsBound() ? GetLabelLocation(label) : Branch::kUnresolved; - branches_.emplace_back(buffer_.Size(), target, /* is_call */ false, is_bare); + branches_.emplace_back(buffer_.Size(), target, /* is_call= */ false, is_bare); FinalizeLabeledBranch(label); } @@ -2897,7 +2917,7 @@ void Mips64Assembler::Bcond(Mips64Label* label, void Mips64Assembler::Call(Mips64Label* label, bool is_bare) { uint32_t target = label->IsBound() ? GetLabelLocation(label) : Branch::kUnresolved; - branches_.emplace_back(buffer_.Size(), target, /* is_call */ true, is_bare); + branches_.emplace_back(buffer_.Size(), target, /* is_call= */ true, is_bare); FinalizeLabeledBranch(label); } @@ -3258,99 +3278,99 @@ void Mips64Assembler::Balc(Mips64Label* label, bool is_bare) { } void Mips64Assembler::Bltc(GpuRegister rs, GpuRegister rt, Mips64Label* label, bool is_bare) { - Bcond(label, /* is_r6 */ true, is_bare, kCondLT, rs, rt); + Bcond(label, /* is_r6= */ true, is_bare, kCondLT, rs, rt); } void Mips64Assembler::Bltzc(GpuRegister rt, Mips64Label* label, bool is_bare) { - Bcond(label, /* is_r6 */ true, is_bare, kCondLTZ, rt); + Bcond(label, /* is_r6= */ true, is_bare, kCondLTZ, rt); } void Mips64Assembler::Bgtzc(GpuRegister rt, Mips64Label* label, bool is_bare) { - Bcond(label, /* is_r6 */ true, is_bare, kCondGTZ, rt); + Bcond(label, /* is_r6= */ true, is_bare, kCondGTZ, rt); } void Mips64Assembler::Bgec(GpuRegister rs, GpuRegister rt, Mips64Label* label, bool is_bare) { - Bcond(label, /* is_r6 */ true, is_bare, kCondGE, rs, rt); + Bcond(label, /* is_r6= */ true, is_bare, kCondGE, rs, rt); } void Mips64Assembler::Bgezc(GpuRegister rt, Mips64Label* label, bool is_bare) { - Bcond(label, /* is_r6 */ true, is_bare, kCondGEZ, rt); + Bcond(label, /* is_r6= */ true, is_bare, kCondGEZ, rt); } void Mips64Assembler::Blezc(GpuRegister rt, Mips64Label* label, bool is_bare) { - Bcond(label, /* is_r6 */ true, is_bare, kCondLEZ, rt); + Bcond(label, /* is_r6= */ true, is_bare, kCondLEZ, rt); } void Mips64Assembler::Bltuc(GpuRegister rs, GpuRegister rt, Mips64Label* label, bool is_bare) { - Bcond(label, /* is_r6 */ true, is_bare, kCondLTU, rs, rt); + Bcond(label, /* is_r6= */ true, is_bare, kCondLTU, rs, rt); } void Mips64Assembler::Bgeuc(GpuRegister rs, GpuRegister rt, Mips64Label* label, bool is_bare) { - Bcond(label, /* is_r6 */ true, is_bare, kCondGEU, rs, rt); + Bcond(label, /* is_r6= */ true, is_bare, kCondGEU, rs, rt); } void Mips64Assembler::Beqc(GpuRegister rs, GpuRegister rt, Mips64Label* label, bool is_bare) { - Bcond(label, /* is_r6 */ true, is_bare, kCondEQ, rs, rt); + Bcond(label, /* is_r6= */ true, is_bare, kCondEQ, rs, rt); } void Mips64Assembler::Bnec(GpuRegister rs, GpuRegister rt, Mips64Label* label, bool is_bare) { - Bcond(label, /* is_r6 */ true, is_bare, kCondNE, rs, rt); + Bcond(label, /* is_r6= */ true, is_bare, kCondNE, rs, rt); } void Mips64Assembler::Beqzc(GpuRegister rs, Mips64Label* label, bool is_bare) { - Bcond(label, /* is_r6 */ true, is_bare, kCondEQZ, rs); + Bcond(label, /* is_r6= */ true, is_bare, kCondEQZ, rs); } void Mips64Assembler::Bnezc(GpuRegister rs, Mips64Label* label, bool is_bare) { - Bcond(label, /* is_r6 */ true, is_bare, kCondNEZ, rs); + Bcond(label, /* is_r6= */ true, is_bare, kCondNEZ, rs); } void Mips64Assembler::Bc1eqz(FpuRegister ft, Mips64Label* label, bool is_bare) { - Bcond(label, /* is_r6 */ true, is_bare, kCondF, static_cast<GpuRegister>(ft), ZERO); + Bcond(label, /* is_r6= */ true, is_bare, kCondF, static_cast<GpuRegister>(ft), ZERO); } void Mips64Assembler::Bc1nez(FpuRegister ft, Mips64Label* label, bool is_bare) { - Bcond(label, /* is_r6 */ true, is_bare, kCondT, static_cast<GpuRegister>(ft), ZERO); + Bcond(label, /* is_r6= */ true, is_bare, kCondT, static_cast<GpuRegister>(ft), ZERO); } void Mips64Assembler::Bltz(GpuRegister rt, Mips64Label* label, bool is_bare) { CHECK(is_bare); - Bcond(label, /* is_r6 */ false, is_bare, kCondLTZ, rt); + Bcond(label, /* is_r6= */ false, is_bare, kCondLTZ, rt); } void Mips64Assembler::Bgtz(GpuRegister rt, Mips64Label* label, bool is_bare) { CHECK(is_bare); - Bcond(label, /* is_r6 */ false, is_bare, kCondGTZ, rt); + Bcond(label, /* is_r6= */ false, is_bare, kCondGTZ, rt); } void Mips64Assembler::Bgez(GpuRegister rt, Mips64Label* label, bool is_bare) { CHECK(is_bare); - Bcond(label, /* is_r6 */ false, is_bare, kCondGEZ, rt); + Bcond(label, /* is_r6= */ false, is_bare, kCondGEZ, rt); } void Mips64Assembler::Blez(GpuRegister rt, Mips64Label* label, bool is_bare) { CHECK(is_bare); - Bcond(label, /* is_r6 */ false, is_bare, kCondLEZ, rt); + Bcond(label, /* is_r6= */ false, is_bare, kCondLEZ, rt); } void Mips64Assembler::Beq(GpuRegister rs, GpuRegister rt, Mips64Label* label, bool is_bare) { CHECK(is_bare); - Bcond(label, /* is_r6 */ false, is_bare, kCondEQ, rs, rt); + Bcond(label, /* is_r6= */ false, is_bare, kCondEQ, rs, rt); } void Mips64Assembler::Bne(GpuRegister rs, GpuRegister rt, Mips64Label* label, bool is_bare) { CHECK(is_bare); - Bcond(label, /* is_r6 */ false, is_bare, kCondNE, rs, rt); + Bcond(label, /* is_r6= */ false, is_bare, kCondNE, rs, rt); } void Mips64Assembler::Beqz(GpuRegister rs, Mips64Label* label, bool is_bare) { CHECK(is_bare); - Bcond(label, /* is_r6 */ false, is_bare, kCondEQZ, rs); + Bcond(label, /* is_r6= */ false, is_bare, kCondEQZ, rs); } void Mips64Assembler::Bnez(GpuRegister rs, Mips64Label* label, bool is_bare) { CHECK(is_bare); - Bcond(label, /* is_r6 */ false, is_bare, kCondNEZ, rs); + Bcond(label, /* is_r6= */ false, is_bare, kCondNEZ, rs); } void Mips64Assembler::AdjustBaseAndOffset(GpuRegister& base, @@ -3613,9 +3633,8 @@ void Mips64Assembler::BuildFrame(size_t frame_size, // Write out entry spills. int32_t offset = frame_size + kFramePointerSize; - for (size_t i = 0; i < entry_spills.size(); ++i) { - Mips64ManagedRegister reg = entry_spills[i].AsMips64(); - ManagedRegisterSpill spill = entry_spills.at(i); + for (const ManagedRegisterSpill& spill : entry_spills) { + Mips64ManagedRegister reg = spill.AsMips64(); int32_t size = spill.getSize(); if (reg.IsNoRegister()) { // only increment stack offset. diff --git a/compiler/utils/mips64/assembler_mips64.h b/compiler/utils/mips64/assembler_mips64.h index 7a61f39e64..b331cee33d 100644 --- a/compiler/utils/mips64/assembler_mips64.h +++ b/compiler/utils/mips64/assembler_mips64.h @@ -24,10 +24,10 @@ #include "arch/mips64/instruction_set_features_mips64.h" #include "base/arena_containers.h" #include "base/enums.h" +#include "base/globals.h" #include "base/macros.h" #include "base/stl_util_identity.h" #include "constants_mips64.h" -#include "globals.h" #include "heap_poisoning.h" #include "managed_register_mips64.h" #include "offsets.h" @@ -414,7 +414,7 @@ class Mips64ExceptionSlowPath { DISALLOW_COPY_AND_ASSIGN(Mips64ExceptionSlowPath); }; -class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<PointerSize::k64> { +class Mips64Assembler final : public Assembler, public JNIMacroAssembler<PointerSize::k64> { public: using JNIBase = JNIMacroAssembler<PointerSize::k64>; @@ -439,8 +439,8 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer } } - size_t CodeSize() const OVERRIDE { return Assembler::CodeSize(); } - DebugFrameOpCodeWriterForAssembler& cfi() { return Assembler::cfi(); } + size_t CodeSize() const override { return Assembler::CodeSize(); } + DebugFrameOpCodeWriterForAssembler& cfi() override { return Assembler::cfi(); } // Emit Machine Instructions. void Addu(GpuRegister rd, GpuRegister rs, GpuRegister rt); @@ -863,6 +863,11 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer void Hadd_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt); void Hadd_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void PcntB(VectorRegister wd, VectorRegister ws); + void PcntH(VectorRegister wd, VectorRegister ws); + void PcntW(VectorRegister wd, VectorRegister ws); + void PcntD(VectorRegister wd, VectorRegister ws); + // Helper for replicating floating point value in all destination elements. void ReplicateFPToVectorRegister(VectorRegister dst, FpuRegister src, bool is_double); @@ -915,10 +920,10 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer } } - void Bind(Label* label) OVERRIDE { + void Bind(Label* label) override { Bind(down_cast<Mips64Label*>(label)); } - void Jump(Label* label ATTRIBUTE_UNUSED) OVERRIDE { + void Jump(Label* label ATTRIBUTE_UNUSED) override { UNIMPLEMENTED(FATAL) << "Do not use Jump for MIPS64"; } @@ -929,25 +934,25 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer using JNIBase::Jump; // Create a new label that can be used with Jump/Bind calls. - std::unique_ptr<JNIMacroLabel> CreateLabel() OVERRIDE { + std::unique_ptr<JNIMacroLabel> CreateLabel() override { LOG(FATAL) << "Not implemented on MIPS64"; UNREACHABLE(); } // Emit an unconditional jump to the label. - void Jump(JNIMacroLabel* label ATTRIBUTE_UNUSED) OVERRIDE { + void Jump(JNIMacroLabel* label ATTRIBUTE_UNUSED) override { LOG(FATAL) << "Not implemented on MIPS64"; UNREACHABLE(); } // Emit a conditional jump to the label by applying a unary condition test to the register. void Jump(JNIMacroLabel* label ATTRIBUTE_UNUSED, JNIMacroUnaryCondition cond ATTRIBUTE_UNUSED, - ManagedRegister test ATTRIBUTE_UNUSED) OVERRIDE { + ManagedRegister test ATTRIBUTE_UNUSED) override { LOG(FATAL) << "Not implemented on MIPS64"; UNREACHABLE(); } // Code at this offset will serve as the target for the Jump call. - void Bind(JNIMacroLabel* label ATTRIBUTE_UNUSED) OVERRIDE { + void Bind(JNIMacroLabel* label ATTRIBUTE_UNUSED) override { LOG(FATAL) << "Not implemented on MIPS64"; UNREACHABLE(); } @@ -1053,7 +1058,7 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer // We permit `base` and `temp` to coincide (however, we check that neither is AT), // in which case the `base` register may be overwritten in the process. CHECK_NE(temp, AT); // Must not use AT as temp, so as not to overwrite the adjusted base. - AdjustBaseAndOffset(base, offset, /* is_doubleword */ (type == kStoreDoubleword)); + AdjustBaseAndOffset(base, offset, /* is_doubleword= */ (type == kStoreDoubleword)); GpuRegister reg; // If the adjustment left `base` unchanged and equal to `temp`, we can't use `temp` // to load and hold the value but we can use AT instead as AT hasn't been used yet. @@ -1122,7 +1127,7 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer GpuRegister base, int32_t offset, ImplicitNullChecker null_checker = NoImplicitNullChecker()) { - AdjustBaseAndOffset(base, offset, /* is_doubleword */ (type == kLoadDoubleword)); + AdjustBaseAndOffset(base, offset, /* is_doubleword= */ (type == kLoadDoubleword)); switch (type) { case kLoadSignedByte: @@ -1173,7 +1178,7 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer ImplicitNullChecker null_checker = NoImplicitNullChecker()) { int element_size_shift = -1; if (type != kLoadQuadword) { - AdjustBaseAndOffset(base, offset, /* is_doubleword */ (type == kLoadDoubleword)); + AdjustBaseAndOffset(base, offset, /* is_doubleword= */ (type == kLoadDoubleword)); } else { AdjustBaseOffsetAndElementSizeShift(base, offset, element_size_shift); } @@ -1221,7 +1226,7 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer // Must not use AT as `reg`, so as not to overwrite the value being stored // with the adjusted `base`. CHECK_NE(reg, AT); - AdjustBaseAndOffset(base, offset, /* is_doubleword */ (type == kStoreDoubleword)); + AdjustBaseAndOffset(base, offset, /* is_doubleword= */ (type == kStoreDoubleword)); switch (type) { case kStoreByte: @@ -1262,7 +1267,7 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer ImplicitNullChecker null_checker = NoImplicitNullChecker()) { int element_size_shift = -1; if (type != kStoreQuadword) { - AdjustBaseAndOffset(base, offset, /* is_doubleword */ (type == kStoreDoubleword)); + AdjustBaseAndOffset(base, offset, /* is_doubleword= */ (type == kStoreDoubleword)); } else { AdjustBaseOffsetAndElementSizeShift(base, offset, element_size_shift); } @@ -1317,122 +1322,122 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer void BuildFrame(size_t frame_size, ManagedRegister method_reg, ArrayRef<const ManagedRegister> callee_save_regs, - const ManagedRegisterEntrySpills& entry_spills) OVERRIDE; + const ManagedRegisterEntrySpills& entry_spills) override; // Emit code that will remove an activation from the stack. void RemoveFrame(size_t frame_size, ArrayRef<const ManagedRegister> callee_save_regs, - bool may_suspend) OVERRIDE; + bool may_suspend) override; - void IncreaseFrameSize(size_t adjust) OVERRIDE; - void DecreaseFrameSize(size_t adjust) OVERRIDE; + void IncreaseFrameSize(size_t adjust) override; + void DecreaseFrameSize(size_t adjust) override; // Store routines. - void Store(FrameOffset offs, ManagedRegister msrc, size_t size) OVERRIDE; - void StoreRef(FrameOffset dest, ManagedRegister msrc) OVERRIDE; - void StoreRawPtr(FrameOffset dest, ManagedRegister msrc) OVERRIDE; + void Store(FrameOffset offs, ManagedRegister msrc, size_t size) override; + void StoreRef(FrameOffset dest, ManagedRegister msrc) override; + void StoreRawPtr(FrameOffset dest, ManagedRegister msrc) override; - void StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister mscratch) OVERRIDE; + void StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister mscratch) override; void StoreStackOffsetToThread(ThreadOffset64 thr_offs, FrameOffset fr_offs, - ManagedRegister mscratch) OVERRIDE; + ManagedRegister mscratch) override; - void StoreStackPointerToThread(ThreadOffset64 thr_offs) OVERRIDE; + void StoreStackPointerToThread(ThreadOffset64 thr_offs) override; void StoreSpanning(FrameOffset dest, ManagedRegister msrc, FrameOffset in_off, - ManagedRegister mscratch) OVERRIDE; + ManagedRegister mscratch) override; // Load routines. - void Load(ManagedRegister mdest, FrameOffset src, size_t size) OVERRIDE; + void Load(ManagedRegister mdest, FrameOffset src, size_t size) override; - void LoadFromThread(ManagedRegister mdest, ThreadOffset64 src, size_t size) OVERRIDE; + void LoadFromThread(ManagedRegister mdest, ThreadOffset64 src, size_t size) override; - void LoadRef(ManagedRegister dest, FrameOffset src) OVERRIDE; + void LoadRef(ManagedRegister dest, FrameOffset src) override; void LoadRef(ManagedRegister mdest, ManagedRegister base, MemberOffset offs, - bool unpoison_reference) OVERRIDE; + bool unpoison_reference) override; - void LoadRawPtr(ManagedRegister mdest, ManagedRegister base, Offset offs) OVERRIDE; + void LoadRawPtr(ManagedRegister mdest, ManagedRegister base, Offset offs) override; - void LoadRawPtrFromThread(ManagedRegister mdest, ThreadOffset64 offs) OVERRIDE; + void LoadRawPtrFromThread(ManagedRegister mdest, ThreadOffset64 offs) override; // Copying routines. - void Move(ManagedRegister mdest, ManagedRegister msrc, size_t size) OVERRIDE; + void Move(ManagedRegister mdest, ManagedRegister msrc, size_t size) override; void CopyRawPtrFromThread(FrameOffset fr_offs, ThreadOffset64 thr_offs, - ManagedRegister mscratch) OVERRIDE; + ManagedRegister mscratch) override; void CopyRawPtrToThread(ThreadOffset64 thr_offs, FrameOffset fr_offs, - ManagedRegister mscratch) OVERRIDE; + ManagedRegister mscratch) override; - void CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister mscratch) OVERRIDE; + void CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister mscratch) override; - void Copy(FrameOffset dest, FrameOffset src, ManagedRegister mscratch, size_t size) OVERRIDE; + void Copy(FrameOffset dest, FrameOffset src, ManagedRegister mscratch, size_t size) override; void Copy(FrameOffset dest, ManagedRegister src_base, Offset src_offset, ManagedRegister mscratch, - size_t size) OVERRIDE; + size_t size) override; void Copy(ManagedRegister dest_base, Offset dest_offset, FrameOffset src, - ManagedRegister mscratch, size_t size) OVERRIDE; + ManagedRegister mscratch, size_t size) override; void Copy(FrameOffset dest, FrameOffset src_base, Offset src_offset, ManagedRegister mscratch, - size_t size) OVERRIDE; + size_t size) override; void Copy(ManagedRegister dest, Offset dest_offset, ManagedRegister src, Offset src_offset, - ManagedRegister mscratch, size_t size) OVERRIDE; + ManagedRegister mscratch, size_t size) override; void Copy(FrameOffset dest, Offset dest_offset, FrameOffset src, Offset src_offset, - ManagedRegister mscratch, size_t size) OVERRIDE; + ManagedRegister mscratch, size_t size) override; - void MemoryBarrier(ManagedRegister) OVERRIDE; + void MemoryBarrier(ManagedRegister) override; // Sign extension. - void SignExtend(ManagedRegister mreg, size_t size) OVERRIDE; + void SignExtend(ManagedRegister mreg, size_t size) override; // Zero extension. - void ZeroExtend(ManagedRegister mreg, size_t size) OVERRIDE; + void ZeroExtend(ManagedRegister mreg, size_t size) override; // Exploit fast access in managed code to Thread::Current(). - void GetCurrentThread(ManagedRegister tr) OVERRIDE; - void GetCurrentThread(FrameOffset dest_offset, ManagedRegister mscratch) OVERRIDE; + void GetCurrentThread(ManagedRegister tr) override; + void GetCurrentThread(FrameOffset dest_offset, ManagedRegister mscratch) override; // Set up out_reg to hold a Object** into the handle scope, or to be null if the // value is null and null_allowed. in_reg holds a possibly stale reference // that can be used to avoid loading the handle scope entry to see if the value is // null. void CreateHandleScopeEntry(ManagedRegister out_reg, FrameOffset handlescope_offset, - ManagedRegister in_reg, bool null_allowed) OVERRIDE; + ManagedRegister in_reg, bool null_allowed) override; // Set up out_off to hold a Object** into the handle scope, or to be null if the // value is null and null_allowed. void CreateHandleScopeEntry(FrameOffset out_off, FrameOffset handlescope_offset, ManagedRegister - mscratch, bool null_allowed) OVERRIDE; + mscratch, bool null_allowed) override; // src holds a handle scope entry (Object**) load this into dst. - void LoadReferenceFromHandleScope(ManagedRegister dst, ManagedRegister src) OVERRIDE; + void LoadReferenceFromHandleScope(ManagedRegister dst, ManagedRegister src) override; // Heap::VerifyObject on src. In some cases (such as a reference to this) we // know that src may not be null. - void VerifyObject(ManagedRegister src, bool could_be_null) OVERRIDE; - void VerifyObject(FrameOffset src, bool could_be_null) OVERRIDE; + void VerifyObject(ManagedRegister src, bool could_be_null) override; + void VerifyObject(FrameOffset src, bool could_be_null) override; // Call to address held at [base+offset]. - void Call(ManagedRegister base, Offset offset, ManagedRegister mscratch) OVERRIDE; - void Call(FrameOffset base, Offset offset, ManagedRegister mscratch) OVERRIDE; - void CallFromThread(ThreadOffset64 offset, ManagedRegister mscratch) OVERRIDE; + void Call(ManagedRegister base, Offset offset, ManagedRegister mscratch) override; + void Call(FrameOffset base, Offset offset, ManagedRegister mscratch) override; + void CallFromThread(ThreadOffset64 offset, ManagedRegister mscratch) override; // Generate code to check if Thread::Current()->exception_ is non-null // and branch to a ExceptionSlowPath if it is. - void ExceptionPoll(ManagedRegister mscratch, size_t stack_adjust) OVERRIDE; + void ExceptionPoll(ManagedRegister mscratch, size_t stack_adjust) override; // Emit slow paths queued during assembly and promote short branches to long if needed. - void FinalizeCode() OVERRIDE; + void FinalizeCode() override; // Emit branches and finalize all instructions. - void FinalizeInstructions(const MemoryRegion& region); + void FinalizeInstructions(const MemoryRegion& region) override; // Returns the (always-)current location of a label (can be used in class CodeGeneratorMIPS64, // must be used instead of Mips64Label::GetPosition()). diff --git a/compiler/utils/mips64/assembler_mips64_test.cc b/compiler/utils/mips64/assembler_mips64_test.cc index b0e1d91c3f..499e8f4e15 100644 --- a/compiler/utils/mips64/assembler_mips64_test.cc +++ b/compiler/utils/mips64/assembler_mips64_test.cc @@ -41,28 +41,38 @@ class AssemblerMIPS64Test : public AssemblerTest<mips64::Mips64Assembler, uint32_t, mips64::VectorRegister> { public: - typedef AssemblerTest<mips64::Mips64Assembler, - mips64::Mips64Label, - mips64::GpuRegister, - mips64::FpuRegister, - uint32_t, - mips64::VectorRegister> Base; + using Base = AssemblerTest<mips64::Mips64Assembler, + mips64::Mips64Label, + mips64::GpuRegister, + mips64::FpuRegister, + uint32_t, + mips64::VectorRegister>; + + // These tests were taking too long, so we hide the DriverStr() from AssemblerTest<> + // and reimplement it without the verification against `assembly_string`. b/73903608 + void DriverStr(const std::string& assembly_string ATTRIBUTE_UNUSED, + const std::string& test_name ATTRIBUTE_UNUSED) { + GetAssembler()->FinalizeCode(); + std::vector<uint8_t> data(GetAssembler()->CodeSize()); + MemoryRegion code(data.data(), data.size()); + GetAssembler()->FinalizeInstructions(code); + } AssemblerMIPS64Test() : instruction_set_features_(Mips64InstructionSetFeatures::FromVariant("default", nullptr)) {} protected: // Get the typically used name for this architecture, e.g., aarch64, x86-64, ... - std::string GetArchitectureString() OVERRIDE { + std::string GetArchitectureString() override { return "mips64"; } - std::string GetAssemblerCmdName() OVERRIDE { + std::string GetAssemblerCmdName() override { // We assemble and link for MIPS64R6. See GetAssemblerParameters() for details. return "gcc"; } - std::string GetAssemblerParameters() OVERRIDE { + std::string GetAssemblerParameters() override { // We assemble and link for MIPS64R6. The reason is that object files produced for MIPS64R6 // (and MIPS32R6) with the GNU assembler don't have correct final offsets in PC-relative // branches in the .text section and so they require a relocation pass (there's a relocation @@ -70,7 +80,7 @@ class AssemblerMIPS64Test : public AssemblerTest<mips64::Mips64Assembler, return " -march=mips64r6 -mmsa -Wa,--no-warn -Wl,-Ttext=0 -Wl,-e0 -nostdlib"; } - void Pad(std::vector<uint8_t>& data) OVERRIDE { + void Pad(std::vector<uint8_t>& data) override { // The GNU linker unconditionally pads the code segment with NOPs to a size that is a multiple // of 16 and there doesn't appear to be a way to suppress this padding. Our assembler doesn't // pad, so, in order for two assembler outputs to match, we need to match the padding as well. @@ -79,15 +89,15 @@ class AssemblerMIPS64Test : public AssemblerTest<mips64::Mips64Assembler, data.insert(data.end(), pad_size, 0); } - std::string GetDisassembleParameters() OVERRIDE { + std::string GetDisassembleParameters() override { return " -D -bbinary -mmips:isa64r6"; } - mips64::Mips64Assembler* CreateAssembler(ArenaAllocator* allocator) OVERRIDE { + mips64::Mips64Assembler* CreateAssembler(ArenaAllocator* allocator) override { return new (allocator) mips64::Mips64Assembler(allocator, instruction_set_features_.get()); } - void SetUpHelpers() OVERRIDE { + void SetUpHelpers() override { if (registers_.size() == 0) { registers_.push_back(new mips64::GpuRegister(mips64::ZERO)); registers_.push_back(new mips64::GpuRegister(mips64::AT)); @@ -223,35 +233,35 @@ class AssemblerMIPS64Test : public AssemblerTest<mips64::Mips64Assembler, } } - void TearDown() OVERRIDE { + void TearDown() override { AssemblerTest::TearDown(); STLDeleteElements(®isters_); STLDeleteElements(&fp_registers_); STLDeleteElements(&vec_registers_); } - std::vector<mips64::Mips64Label> GetAddresses() { + std::vector<mips64::Mips64Label> GetAddresses() override { UNIMPLEMENTED(FATAL) << "Feature not implemented yet"; UNREACHABLE(); } - std::vector<mips64::GpuRegister*> GetRegisters() OVERRIDE { + std::vector<mips64::GpuRegister*> GetRegisters() override { return registers_; } - std::vector<mips64::FpuRegister*> GetFPRegisters() OVERRIDE { + std::vector<mips64::FpuRegister*> GetFPRegisters() override { return fp_registers_; } - std::vector<mips64::VectorRegister*> GetVectorRegisters() OVERRIDE { + std::vector<mips64::VectorRegister*> GetVectorRegisters() override { return vec_registers_; } - uint32_t CreateImmediate(int64_t imm_value) OVERRIDE { + uint32_t CreateImmediate(int64_t imm_value) override { return imm_value; } - std::string GetSecondaryRegisterName(const mips64::GpuRegister& reg) OVERRIDE { + std::string GetSecondaryRegisterName(const mips64::GpuRegister& reg) override { CHECK(secondary_register_names_.find(reg) != secondary_register_names_.end()); return secondary_register_names_[reg]; } @@ -842,99 +852,99 @@ TEST_F(AssemblerMIPS64Test, Bc1nez) { } TEST_F(AssemblerMIPS64Test, BareBc) { - BranchHelper(&mips64::Mips64Assembler::Bc, "Bc", /* is_bare */ true); + BranchHelper(&mips64::Mips64Assembler::Bc, "Bc", /* is_bare= */ true); } TEST_F(AssemblerMIPS64Test, BareBalc) { - BranchHelper(&mips64::Mips64Assembler::Balc, "Balc", /* is_bare */ true); + BranchHelper(&mips64::Mips64Assembler::Balc, "Balc", /* is_bare= */ true); } TEST_F(AssemblerMIPS64Test, BareBeqzc) { - BranchCondOneRegHelper(&mips64::Mips64Assembler::Beqzc, "Beqzc", /* is_bare */ true); + BranchCondOneRegHelper(&mips64::Mips64Assembler::Beqzc, "Beqzc", /* is_bare= */ true); } TEST_F(AssemblerMIPS64Test, BareBnezc) { - BranchCondOneRegHelper(&mips64::Mips64Assembler::Bnezc, "Bnezc", /* is_bare */ true); + BranchCondOneRegHelper(&mips64::Mips64Assembler::Bnezc, "Bnezc", /* is_bare= */ true); } TEST_F(AssemblerMIPS64Test, BareBltzc) { - BranchCondOneRegHelper(&mips64::Mips64Assembler::Bltzc, "Bltzc", /* is_bare */ true); + BranchCondOneRegHelper(&mips64::Mips64Assembler::Bltzc, "Bltzc", /* is_bare= */ true); } TEST_F(AssemblerMIPS64Test, BareBgezc) { - BranchCondOneRegHelper(&mips64::Mips64Assembler::Bgezc, "Bgezc", /* is_bare */ true); + BranchCondOneRegHelper(&mips64::Mips64Assembler::Bgezc, "Bgezc", /* is_bare= */ true); } TEST_F(AssemblerMIPS64Test, BareBlezc) { - BranchCondOneRegHelper(&mips64::Mips64Assembler::Blezc, "Blezc", /* is_bare */ true); + BranchCondOneRegHelper(&mips64::Mips64Assembler::Blezc, "Blezc", /* is_bare= */ true); } TEST_F(AssemblerMIPS64Test, BareBgtzc) { - BranchCondOneRegHelper(&mips64::Mips64Assembler::Bgtzc, "Bgtzc", /* is_bare */ true); + BranchCondOneRegHelper(&mips64::Mips64Assembler::Bgtzc, "Bgtzc", /* is_bare= */ true); } TEST_F(AssemblerMIPS64Test, BareBeqc) { - BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Beqc, "Beqc", /* is_bare */ true); + BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Beqc, "Beqc", /* is_bare= */ true); } TEST_F(AssemblerMIPS64Test, BareBnec) { - BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Bnec, "Bnec", /* is_bare */ true); + BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Bnec, "Bnec", /* is_bare= */ true); } TEST_F(AssemblerMIPS64Test, BareBltc) { - BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Bltc, "Bltc", /* is_bare */ true); + BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Bltc, "Bltc", /* is_bare= */ true); } TEST_F(AssemblerMIPS64Test, BareBgec) { - BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Bgec, "Bgec", /* is_bare */ true); + BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Bgec, "Bgec", /* is_bare= */ true); } TEST_F(AssemblerMIPS64Test, BareBltuc) { - BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Bltuc, "Bltuc", /* is_bare */ true); + BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Bltuc, "Bltuc", /* is_bare= */ true); } TEST_F(AssemblerMIPS64Test, BareBgeuc) { - BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Bgeuc, "Bgeuc", /* is_bare */ true); + BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Bgeuc, "Bgeuc", /* is_bare= */ true); } TEST_F(AssemblerMIPS64Test, BareBc1eqz) { - BranchFpuCondHelper(&mips64::Mips64Assembler::Bc1eqz, "Bc1eqz", /* is_bare */ true); + BranchFpuCondHelper(&mips64::Mips64Assembler::Bc1eqz, "Bc1eqz", /* is_bare= */ true); } TEST_F(AssemblerMIPS64Test, BareBc1nez) { - BranchFpuCondHelper(&mips64::Mips64Assembler::Bc1nez, "Bc1nez", /* is_bare */ true); + BranchFpuCondHelper(&mips64::Mips64Assembler::Bc1nez, "Bc1nez", /* is_bare= */ true); } TEST_F(AssemblerMIPS64Test, BareBeqz) { - BranchCondOneRegHelper(&mips64::Mips64Assembler::Beqz, "Beqz", /* is_bare */ true); + BranchCondOneRegHelper(&mips64::Mips64Assembler::Beqz, "Beqz", /* is_bare= */ true); } TEST_F(AssemblerMIPS64Test, BareBnez) { - BranchCondOneRegHelper(&mips64::Mips64Assembler::Bnez, "Bnez", /* is_bare */ true); + BranchCondOneRegHelper(&mips64::Mips64Assembler::Bnez, "Bnez", /* is_bare= */ true); } TEST_F(AssemblerMIPS64Test, BareBltz) { - BranchCondOneRegHelper(&mips64::Mips64Assembler::Bltz, "Bltz", /* is_bare */ true); + BranchCondOneRegHelper(&mips64::Mips64Assembler::Bltz, "Bltz", /* is_bare= */ true); } TEST_F(AssemblerMIPS64Test, BareBgez) { - BranchCondOneRegHelper(&mips64::Mips64Assembler::Bgez, "Bgez", /* is_bare */ true); + BranchCondOneRegHelper(&mips64::Mips64Assembler::Bgez, "Bgez", /* is_bare= */ true); } TEST_F(AssemblerMIPS64Test, BareBlez) { - BranchCondOneRegHelper(&mips64::Mips64Assembler::Blez, "Blez", /* is_bare */ true); + BranchCondOneRegHelper(&mips64::Mips64Assembler::Blez, "Blez", /* is_bare= */ true); } TEST_F(AssemblerMIPS64Test, BareBgtz) { - BranchCondOneRegHelper(&mips64::Mips64Assembler::Bgtz, "Bgtz", /* is_bare */ true); + BranchCondOneRegHelper(&mips64::Mips64Assembler::Bgtz, "Bgtz", /* is_bare= */ true); } TEST_F(AssemblerMIPS64Test, BareBeq) { - BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Beq, "Beq", /* is_bare */ true); + BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Beq, "Beq", /* is_bare= */ true); } TEST_F(AssemblerMIPS64Test, BareBne) { - BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Bne, "Bne", /* is_bare */ true); + BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Bne, "Bne", /* is_bare= */ true); } TEST_F(AssemblerMIPS64Test, LongBeqc) { @@ -1242,7 +1252,7 @@ TEST_F(AssemblerMIPS64Test, Daui) { std::vector<mips64::GpuRegister*> reg1_registers = GetRegisters(); std::vector<mips64::GpuRegister*> reg2_registers = GetRegisters(); reg2_registers.erase(reg2_registers.begin()); // reg2 can't be ZERO, remove it. - std::vector<int64_t> imms = CreateImmediateValuesBits(/* imm_bits */ 16, /* as_uint */ true); + std::vector<int64_t> imms = CreateImmediateValuesBits(/* imm_bits= */ 16, /* as_uint= */ true); WarnOnCombinations(reg1_registers.size() * reg2_registers.size() * imms.size()); std::ostringstream expected; for (mips64::GpuRegister* reg1 : reg1_registers) { @@ -3529,6 +3539,22 @@ TEST_F(AssemblerMIPS64Test, FillD) { DriverStr(RepeatVR(&mips64::Mips64Assembler::FillD, "fill.d ${reg1}, ${reg2}"), "fill.d"); } +TEST_F(AssemblerMIPS64Test, PcntB) { + DriverStr(RepeatVV(&mips64::Mips64Assembler::PcntB, "pcnt.b ${reg1}, ${reg2}"), "pcnt.b"); +} + +TEST_F(AssemblerMIPS64Test, PcntH) { + DriverStr(RepeatVV(&mips64::Mips64Assembler::PcntH, "pcnt.h ${reg1}, ${reg2}"), "pcnt.h"); +} + +TEST_F(AssemblerMIPS64Test, PcntW) { + DriverStr(RepeatVV(&mips64::Mips64Assembler::PcntW, "pcnt.w ${reg1}, ${reg2}"), "pcnt.w"); +} + +TEST_F(AssemblerMIPS64Test, PcntD) { + DriverStr(RepeatVV(&mips64::Mips64Assembler::PcntD, "pcnt.d ${reg1}, ${reg2}"), "pcnt.d"); +} + TEST_F(AssemblerMIPS64Test, LdiB) { DriverStr(RepeatVIb(&mips64::Mips64Assembler::LdiB, -8, "ldi.b ${reg}, {imm}"), "ldi.b"); } diff --git a/compiler/utils/mips64/constants_mips64.h b/compiler/utils/mips64/constants_mips64.h index 310f23c287..41eb77c9ae 100644 --- a/compiler/utils/mips64/constants_mips64.h +++ b/compiler/utils/mips64/constants_mips64.h @@ -22,8 +22,8 @@ #include <android-base/logging.h> #include "arch/mips64/registers_mips64.h" +#include "base/globals.h" #include "base/macros.h" -#include "globals.h" namespace art { namespace mips64 { diff --git a/compiler/utils/mips64/managed_register_mips64.cc b/compiler/utils/mips64/managed_register_mips64.cc index 42d061ec15..01cb6ddfe2 100644 --- a/compiler/utils/mips64/managed_register_mips64.cc +++ b/compiler/utils/mips64/managed_register_mips64.cc @@ -16,7 +16,7 @@ #include "managed_register_mips64.h" -#include "globals.h" +#include "base/globals.h" namespace art { namespace mips64 { diff --git a/compiler/utils/mips64/managed_register_mips64.h b/compiler/utils/mips64/managed_register_mips64.h index 3980199b1e..94166d32b7 100644 --- a/compiler/utils/mips64/managed_register_mips64.h +++ b/compiler/utils/mips64/managed_register_mips64.h @@ -18,7 +18,6 @@ #define ART_COMPILER_UTILS_MIPS64_MANAGED_REGISTER_MIPS64_H_ #include "constants_mips64.h" -#include "debug/dwarf/register.h" #include "utils/managed_register.h" namespace art { diff --git a/compiler/utils/mips64/managed_register_mips64_test.cc b/compiler/utils/mips64/managed_register_mips64_test.cc index 8b72d7e61d..bbfeeee20f 100644 --- a/compiler/utils/mips64/managed_register_mips64_test.cc +++ b/compiler/utils/mips64/managed_register_mips64_test.cc @@ -15,7 +15,8 @@ */ #include "managed_register_mips64.h" -#include "globals.h" + +#include "base/globals.h" #include "gtest/gtest.h" namespace art { diff --git a/compiler/utils/swap_space.cc b/compiler/utils/swap_space.cc index 1f9ad4242d..841ff1c58d 100644 --- a/compiler/utils/swap_space.cc +++ b/compiler/utils/swap_space.cc @@ -115,12 +115,11 @@ void* SwapSpace::Alloc(size_t size) { ? free_by_size_.end() : free_by_size_.lower_bound(FreeBySizeEntry { size, free_by_start_.begin() }); if (it != free_by_size_.end()) { - auto entry = it->free_by_start_entry; - SpaceChunk old_chunk = *entry; + SpaceChunk old_chunk = *it->free_by_start_entry; if (old_chunk.size == size) { RemoveChunk(it); } else { - // Try to avoid deallocating and allocating the std::set<> nodes. + // Avoid deallocating and allocating the std::set<> nodes. // This would be much simpler if we could use replace() from Boost.Bimap. // The free_by_start_ map contains disjoint intervals ordered by the `ptr`. @@ -128,23 +127,9 @@ void* SwapSpace::Alloc(size_t size) { it->free_by_start_entry->ptr += size; it->free_by_start_entry->size -= size; - // The free_by_size_ map is ordered by the `size` and then `free_by_start_entry->ptr`. - // Adjusting the `ptr` above does not change that ordering but decreasing `size` can - // push the node before the previous node(s). - if (it == free_by_size_.begin()) { - it->size -= size; - } else { - auto prev = it; - --prev; - FreeBySizeEntry new_value(old_chunk.size - size, entry); - if (free_by_size_.key_comp()(*prev, new_value)) { - it->size -= size; - } else { - // Changing in place would break the std::set<> ordering, we need to remove and insert. - free_by_size_.erase(it); - free_by_size_.insert(new_value); - } - } + auto node = free_by_size_.extract(it); + node.value().size -= size; + free_by_size_.insert(std::move(node)); } return old_chunk.ptr; } else { diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc index ea160c8993..4b073bde0b 100644 --- a/compiler/utils/x86/assembler_x86.cc +++ b/compiler/utils/x86/assembler_x86.cc @@ -17,8 +17,8 @@ #include "assembler_x86.h" #include "base/casts.h" +#include "base/memory_region.h" #include "entrypoints/quick/quick_entrypoints.h" -#include "memory_region.h" #include "thread.h" namespace art { @@ -59,6 +59,98 @@ std::ostream& operator<<(std::ostream& os, const Address& addr) { } } +uint8_t X86Assembler::EmitVexByteZero(bool is_two_byte) { + uint8_t vex_zero = 0xC0; + if (!is_two_byte) { + vex_zero |= 0xC4; + } else { + vex_zero |= 0xC5; + } + return vex_zero; +} + +uint8_t X86Assembler::EmitVexByte1(bool r, bool x, bool b, int mmmmm ) { + // VEX Byte 1 + uint8_t vex_prefix = 0; + if (!r) { + vex_prefix |= 0x80; // VEX.R + } + if (!x) { + vex_prefix |= 0x40; // VEX.X + } + if (!b) { + vex_prefix |= 0x20; // VEX.B + } + + // VEX.mmmmm + switch (mmmmm) { + case 1: + // implied 0F leading opcode byte + vex_prefix |= 0x01; + break; + case 2: + // implied leading 0F 38 opcode byte + vex_prefix |= 0x02; + break; + case 3: + // implied leading OF 3A opcode byte + vex_prefix |= 0x03; + break; + default: + LOG(FATAL) << "unknown opcode bytes"; + } + return vex_prefix; +} + +uint8_t X86Assembler::EmitVexByte2(bool w, int l, X86ManagedRegister operand, int pp) { + uint8_t vex_prefix = 0; + // VEX Byte 2 + if (w) { + vex_prefix |= 0x80; + } + // VEX.vvvv + if (operand.IsXmmRegister()) { + XmmRegister vvvv = operand.AsXmmRegister(); + int inverted_reg = 15-static_cast<int>(vvvv); + uint8_t reg = static_cast<uint8_t>(inverted_reg); + vex_prefix |= ((reg & 0x0F) << 3); + } else if (operand.IsCpuRegister()) { + Register vvvv = operand.AsCpuRegister(); + int inverted_reg = 15 - static_cast<int>(vvvv); + uint8_t reg = static_cast<uint8_t>(inverted_reg); + vex_prefix |= ((reg & 0x0F) << 3); + } + + // VEX.L + if (l == 256) { + vex_prefix |= 0x04; + } + + // VEX.pp + switch (pp) { + case 0: + // SIMD Pefix - None + vex_prefix |= 0x00; + break; + case 1: + // SIMD Prefix - 66 + vex_prefix |= 0x01; + break; + case 2: + // SIMD Prefix - F3 + vex_prefix |= 0x02; + break; + case 3: + // SIMD Prefix - F2 + vex_prefix |= 0x03; + break; + default: + LOG(FATAL) << "unknown SIMD Prefix"; + } + + return vex_prefix; +} + void X86Assembler::call(Register reg) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0xFF); @@ -179,6 +271,60 @@ void X86Assembler::movntl(const Address& dst, Register src) { EmitOperand(src, dst); } +void X86Assembler::blsi(Register dst, Register src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + uint8_t byte_zero = EmitVexByteZero(/*is_two_byte=*/ false); + uint8_t byte_one = EmitVexByte1(/*r=*/ false, + /*x=*/ false, + /*b=*/ false, + /*mmmmm=*/ 2); + uint8_t byte_two = EmitVexByte2(/*w=*/ false, + /*l=*/ 128, + X86ManagedRegister::FromCpuRegister(dst), + /*pp=*/ 0); + EmitUint8(byte_zero); + EmitUint8(byte_one); + EmitUint8(byte_two); + EmitUint8(0xF3); + EmitRegisterOperand(3, src); +} + +void X86Assembler::blsmsk(Register dst, Register src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + uint8_t byte_zero = EmitVexByteZero(/*is_two_byte=*/ false); + uint8_t byte_one = EmitVexByte1(/*r=*/ false, + /*x=*/ false, + /*b=*/ false, + /*mmmmm=*/ 2); + uint8_t byte_two = EmitVexByte2(/*w=*/ false, + /*l=*/ 128, + X86ManagedRegister::FromCpuRegister(dst), + /*pp=*/ 0); + EmitUint8(byte_zero); + EmitUint8(byte_one); + EmitUint8(byte_two); + EmitUint8(0xF3); + EmitRegisterOperand(2, src); +} + +void X86Assembler::blsr(Register dst, Register src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + uint8_t byte_zero = EmitVexByteZero(/*is_two_byte=*/ false); + uint8_t byte_one = EmitVexByte1(/*r=*/ false, + /*x=*/ false, + /*b=*/ false, + /*mmmmm=*/ 2); + uint8_t byte_two = EmitVexByte2(/*w=*/ false, + /*l=*/ 128, + X86ManagedRegister::FromCpuRegister(dst), + /*pp=*/ 0); + EmitUint8(byte_zero); + EmitUint8(byte_one); + EmitUint8(byte_two); + EmitUint8(0xF3); + EmitRegisterOperand(1, src); +} + void X86Assembler::bswapl(Register dst) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0x0F); @@ -913,6 +1059,78 @@ void X86Assembler::psubq(XmmRegister dst, XmmRegister src) { } +void X86Assembler::paddusb(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0xDC); + EmitXmmRegisterOperand(dst, src); +} + + +void X86Assembler::paddsb(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0xEC); + EmitXmmRegisterOperand(dst, src); +} + + +void X86Assembler::paddusw(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0xDD); + EmitXmmRegisterOperand(dst, src); +} + + +void X86Assembler::paddsw(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0xED); + EmitXmmRegisterOperand(dst, src); +} + + +void X86Assembler::psubusb(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0xD8); + EmitXmmRegisterOperand(dst, src); +} + + +void X86Assembler::psubsb(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0xE8); + EmitXmmRegisterOperand(dst, src); +} + + +void X86Assembler::psubusw(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0xD9); + EmitXmmRegisterOperand(dst, src); +} + + +void X86Assembler::psubsw(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0xE9); + EmitXmmRegisterOperand(dst, src); +} + + void X86Assembler::cvtsi2ss(XmmRegister dst, Register src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0xF3); @@ -1195,6 +1413,25 @@ void X86Assembler::pand(XmmRegister dst, XmmRegister src) { EmitXmmRegisterOperand(dst, src); } +void X86Assembler::andn(Register dst, Register src1, Register src2) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + uint8_t byte_zero = EmitVexByteZero(/*is_two_byte=*/ false); + uint8_t byte_one = EmitVexByte1(/*r=*/ false, + /*x=*/ false, + /*b=*/ false, + /*mmmmm=*/ 2); + uint8_t byte_two = EmitVexByte2(/*w=*/ false, + /*l=*/ 128, + X86ManagedRegister::FromCpuRegister(src1), + /*pp=*/ 0); + EmitUint8(byte_zero); + EmitUint8(byte_one); + EmitUint8(byte_two); + // Opcode field + EmitUint8(0xF2); + EmitRegisterOperand(dst, src2); +} + void X86Assembler::andnpd(XmmRegister dst, XmmRegister src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); @@ -1914,7 +2151,7 @@ void X86Assembler::cmpb(const Address& address, const Immediate& imm) { void X86Assembler::cmpw(const Address& address, const Immediate& imm) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0x66); - EmitComplex(7, address, imm, /* is_16_op */ true); + EmitComplex(7, address, imm, /* is_16_op= */ true); } @@ -2104,7 +2341,7 @@ void X86Assembler::addw(const Address& address, const Immediate& imm) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); CHECK(imm.is_uint16() || imm.is_int16()) << imm.value(); EmitUint8(0x66); - EmitComplex(0, address, imm, /* is_16_op */ true); + EmitComplex(0, address, imm, /* is_16_op= */ true); } diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h index a085677083..275e5c1234 100644 --- a/compiler/utils/x86/assembler_x86.h +++ b/compiler/utils/x86/assembler_x86.h @@ -23,9 +23,9 @@ #include "base/array_ref.h" #include "base/bit_utils.h" #include "base/enums.h" +#include "base/globals.h" #include "base/macros.h" #include "constants_x86.h" -#include "globals.h" #include "heap_poisoning.h" #include "managed_register_x86.h" #include "offsets.h" @@ -306,7 +306,7 @@ class ConstantArea { ArenaVector<int32_t> buffer_; }; -class X86Assembler FINAL : public Assembler { +class X86Assembler final : public Assembler { public: explicit X86Assembler(ArenaAllocator* allocator) : Assembler(allocator), constant_area_(allocator) {} @@ -337,6 +337,10 @@ class X86Assembler FINAL : public Assembler { void movntl(const Address& dst, Register src); + void blsi(Register dst, Register src); // no addr variant (for now) + void blsmsk(Register dst, Register src); // no addr variant (for now) + void blsr(Register dst, Register src); // no addr varianr (for now) + void bswapl(Register dst); void bsfl(Register dst, Register src); @@ -449,6 +453,15 @@ class X86Assembler FINAL : public Assembler { void paddq(XmmRegister dst, XmmRegister src); void psubq(XmmRegister dst, XmmRegister src); + void paddusb(XmmRegister dst, XmmRegister src); + void paddsb(XmmRegister dst, XmmRegister src); + void paddusw(XmmRegister dst, XmmRegister src); + void paddsw(XmmRegister dst, XmmRegister src); + void psubusb(XmmRegister dst, XmmRegister src); + void psubsb(XmmRegister dst, XmmRegister src); + void psubusw(XmmRegister dst, XmmRegister src); + void psubsw(XmmRegister dst, XmmRegister src); + void cvtsi2ss(XmmRegister dst, Register src); void cvtsi2sd(XmmRegister dst, Register src); @@ -491,6 +504,7 @@ class X86Assembler FINAL : public Assembler { void andps(XmmRegister dst, const Address& src); void pand(XmmRegister dst, XmmRegister src); // no addr variant (for now) + void andn(Register dst, Register src1, Register src2); // no addr variant (for now) void andnpd(XmmRegister dst, XmmRegister src); // no addr variant (for now) void andnps(XmmRegister dst, XmmRegister src); void pandn(XmmRegister dst, XmmRegister src); @@ -749,8 +763,8 @@ class X86Assembler FINAL : public Assembler { // int PreferredLoopAlignment() { return 16; } void Align(int alignment, int offset); - void Bind(Label* label) OVERRIDE; - void Jump(Label* label) OVERRIDE { + void Bind(Label* label) override; + void Jump(Label* label) override { jmp(label); } void Bind(NearLabel* label); @@ -828,6 +842,11 @@ class X86Assembler FINAL : public Assembler { void EmitGenericShift(int rm, const Operand& operand, const Immediate& imm); void EmitGenericShift(int rm, const Operand& operand, Register shifter); + // Emit a 3 byte VEX Prefix + uint8_t EmitVexByteZero(bool is_two_byte); + uint8_t EmitVexByte1(bool r, bool x, bool b, int mmmmm); + uint8_t EmitVexByte2(bool w , int l , X86ManagedRegister operand, int pp); + ConstantArea constant_area_; DISALLOW_COPY_AND_ASSIGN(X86Assembler); diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc index 2fd1b27182..1d8bfe7fa7 100644 --- a/compiler/utils/x86/assembler_x86_test.cc +++ b/compiler/utils/x86/assembler_x86_test.cc @@ -17,13 +17,14 @@ #include "assembler_x86.h" #include "base/arena_allocator.h" +#include "base/malloc_arena_pool.h" #include "base/stl_util.h" #include "utils/assembler_test.h" namespace art { TEST(AssemblerX86, CreateBuffer) { - ArenaPool pool; + MallocArenaPool pool; ArenaAllocator allocator(&pool); AssemblerBuffer buffer(&allocator); AssemblerBuffer::EnsureCapacity ensured(&buffer); @@ -43,26 +44,26 @@ class AssemblerX86Test : public AssemblerTest<x86::X86Assembler, x86::XmmRegister, x86::Immediate> { public: - typedef AssemblerTest<x86::X86Assembler, - x86::Address, - x86::Register, - x86::XmmRegister, - x86::Immediate> Base; + using Base = AssemblerTest<x86::X86Assembler, + x86::Address, + x86::Register, + x86::XmmRegister, + x86::Immediate>; protected: - std::string GetArchitectureString() OVERRIDE { + std::string GetArchitectureString() override { return "x86"; } - std::string GetAssemblerParameters() OVERRIDE { + std::string GetAssemblerParameters() override { return " --32"; } - std::string GetDisassembleParameters() OVERRIDE { + std::string GetDisassembleParameters() override { return " -D -bbinary -mi386 --no-show-raw-insn"; } - void SetUpHelpers() OVERRIDE { + void SetUpHelpers() override { if (addresses_singleton_.size() == 0) { // One addressing mode to test the repeat drivers. addresses_singleton_.push_back(x86::Address(x86::EAX, x86::EBX, x86::TIMES_1, 2)); @@ -117,25 +118,25 @@ class AssemblerX86Test : public AssemblerTest<x86::X86Assembler, } } - void TearDown() OVERRIDE { + void TearDown() override { AssemblerTest::TearDown(); STLDeleteElements(®isters_); STLDeleteElements(&fp_registers_); } - std::vector<x86::Address> GetAddresses() OVERRIDE { + std::vector<x86::Address> GetAddresses() override { return addresses_; } - std::vector<x86::Register*> GetRegisters() OVERRIDE { + std::vector<x86::Register*> GetRegisters() override { return registers_; } - std::vector<x86::XmmRegister*> GetFPRegisters() OVERRIDE { + std::vector<x86::XmmRegister*> GetFPRegisters() override { return fp_registers_; } - x86::Immediate CreateImmediate(int64_t imm_value) OVERRIDE { + x86::Immediate CreateImmediate(int64_t imm_value) override { return x86::Immediate(imm_value); } @@ -348,6 +349,18 @@ TEST_F(AssemblerX86Test, RepMovsw) { DriverStr(expected, "rep_movsw"); } +TEST_F(AssemblerX86Test, Blsmask) { + DriverStr(RepeatRR(&x86::X86Assembler::blsmsk, "blsmsk %{reg2}, %{reg1}"), "blsmsk"); +} + +TEST_F(AssemblerX86Test, Blsi) { + DriverStr(RepeatRR(&x86::X86Assembler::blsi, "blsi %{reg2}, %{reg1}"), "blsi"); +} + +TEST_F(AssemblerX86Test, Blsr) { + DriverStr(RepeatRR(&x86::X86Assembler::blsr, "blsr %{reg2}, %{reg1}"), "blsr"); +} + TEST_F(AssemblerX86Test, Bsfl) { DriverStr(RepeatRR(&x86::X86Assembler::bsfl, "bsfl %{reg2}, %{reg1}"), "bsfl"); } @@ -600,6 +613,38 @@ TEST_F(AssemblerX86Test, PSubQ) { DriverStr(RepeatFF(&x86::X86Assembler::psubq, "psubq %{reg2}, %{reg1}"), "psubq"); } +TEST_F(AssemblerX86Test, PAddUSB) { + DriverStr(RepeatFF(&x86::X86Assembler::paddusb, "paddusb %{reg2}, %{reg1}"), "paddusb"); +} + +TEST_F(AssemblerX86Test, PAddSB) { + DriverStr(RepeatFF(&x86::X86Assembler::paddsb, "paddsb %{reg2}, %{reg1}"), "paddsb"); +} + +TEST_F(AssemblerX86Test, PAddUSW) { + DriverStr(RepeatFF(&x86::X86Assembler::paddusw, "paddusw %{reg2}, %{reg1}"), "paddusw"); +} + +TEST_F(AssemblerX86Test, PAddSW) { + DriverStr(RepeatFF(&x86::X86Assembler::psubsw, "psubsw %{reg2}, %{reg1}"), "psubsw"); +} + +TEST_F(AssemblerX86Test, PSubUSB) { + DriverStr(RepeatFF(&x86::X86Assembler::psubusb, "psubusb %{reg2}, %{reg1}"), "psubusb"); +} + +TEST_F(AssemblerX86Test, PSubSB) { + DriverStr(RepeatFF(&x86::X86Assembler::psubsb, "psubsb %{reg2}, %{reg1}"), "psubsb"); +} + +TEST_F(AssemblerX86Test, PSubUSW) { + DriverStr(RepeatFF(&x86::X86Assembler::psubusw, "psubusw %{reg2}, %{reg1}"), "psubusw"); +} + +TEST_F(AssemblerX86Test, PSubSW) { + DriverStr(RepeatFF(&x86::X86Assembler::psubsw, "psubsw %{reg2}, %{reg1}"), "psubsw"); +} + TEST_F(AssemblerX86Test, XorPD) { DriverStr(RepeatFF(&x86::X86Assembler::xorpd, "xorpd %{reg2}, %{reg1}"), "xorpd"); } @@ -624,6 +669,10 @@ TEST_F(AssemblerX86Test, PAnd) { DriverStr(RepeatFF(&x86::X86Assembler::pand, "pand %{reg2}, %{reg1}"), "pand"); } +TEST_F(AssemblerX86Test, Andn) { + DriverStr(RepeatRRR(&x86::X86Assembler::andn, "andn %{reg3}, %{reg2}, %{reg1}"), "andn"); +} + TEST_F(AssemblerX86Test, AndnPD) { DriverStr(RepeatFF(&x86::X86Assembler::andnpd, "andnpd %{reg2}, %{reg1}"), "andnpd"); } diff --git a/compiler/utils/x86/constants_x86.h b/compiler/utils/x86/constants_x86.h index 2e03b9fc3c..a782b16c6b 100644 --- a/compiler/utils/x86/constants_x86.h +++ b/compiler/utils/x86/constants_x86.h @@ -22,8 +22,8 @@ #include <android-base/logging.h> #include "arch/x86/registers_x86.h" +#include "base/globals.h" #include "base/macros.h" -#include "globals.h" namespace art { namespace x86 { @@ -40,21 +40,6 @@ enum ByteRegister { kNoByteRegister = -1 // Signals an illegal register. }; - -enum XmmRegister { - XMM0 = 0, - XMM1 = 1, - XMM2 = 2, - XMM3 = 3, - XMM4 = 4, - XMM5 = 5, - XMM6 = 6, - XMM7 = 7, - kNumberOfXmmRegisters = 8, - kNoXmmRegister = -1 // Signals an illegal register. -}; -std::ostream& operator<<(std::ostream& os, const XmmRegister& reg); - enum X87Register { ST0 = 0, ST1 = 1, diff --git a/compiler/utils/x86/jni_macro_assembler_x86.cc b/compiler/utils/x86/jni_macro_assembler_x86.cc index 7e29c4aa26..540d72b28d 100644 --- a/compiler/utils/x86/jni_macro_assembler_x86.cc +++ b/compiler/utils/x86/jni_macro_assembler_x86.cc @@ -25,10 +25,10 @@ namespace art { namespace x86 { // Slowpath entered when Thread::Current()->_exception is non-null -class X86ExceptionSlowPath FINAL : public SlowPath { +class X86ExceptionSlowPath final : public SlowPath { public: explicit X86ExceptionSlowPath(size_t stack_adjust) : stack_adjust_(stack_adjust) {} - virtual void Emit(Assembler *sp_asm) OVERRIDE; + void Emit(Assembler *sp_asm) override; private: const size_t stack_adjust_; }; @@ -67,8 +67,7 @@ void X86JNIMacroAssembler::BuildFrame(size_t frame_size, cfi().AdjustCFAOffset(kFramePointerSize); DCHECK_EQ(static_cast<size_t>(cfi().GetCurrentCFAOffset()), frame_size); - for (size_t i = 0; i < entry_spills.size(); ++i) { - ManagedRegisterSpill spill = entry_spills.at(i); + for (const ManagedRegisterSpill& spill : entry_spills) { if (spill.AsX86().IsCpuRegister()) { int offset = frame_size + spill.getSpillOffset(); __ movl(Address(ESP, offset), spill.AsX86().AsCpuRegister()); diff --git a/compiler/utils/x86/jni_macro_assembler_x86.h b/compiler/utils/x86/jni_macro_assembler_x86.h index 99219d8f88..a701080b4f 100644 --- a/compiler/utils/x86/jni_macro_assembler_x86.h +++ b/compiler/utils/x86/jni_macro_assembler_x86.h @@ -32,7 +32,7 @@ namespace x86 { class X86JNIMacroLabel; -class X86JNIMacroAssembler FINAL : public JNIMacroAssemblerFwd<X86Assembler, PointerSize::k32> { +class X86JNIMacroAssembler final : public JNIMacroAssemblerFwd<X86Assembler, PointerSize::k32> { public: explicit X86JNIMacroAssembler(ArenaAllocator* allocator) : JNIMacroAssemblerFwd(allocator) {} virtual ~X86JNIMacroAssembler() {} @@ -45,130 +45,130 @@ class X86JNIMacroAssembler FINAL : public JNIMacroAssemblerFwd<X86Assembler, Poi void BuildFrame(size_t frame_size, ManagedRegister method_reg, ArrayRef<const ManagedRegister> callee_save_regs, - const ManagedRegisterEntrySpills& entry_spills) OVERRIDE; + const ManagedRegisterEntrySpills& entry_spills) override; // Emit code that will remove an activation from the stack void RemoveFrame(size_t frame_size, ArrayRef<const ManagedRegister> callee_save_regs, - bool may_suspend) OVERRIDE; + bool may_suspend) override; - void IncreaseFrameSize(size_t adjust) OVERRIDE; - void DecreaseFrameSize(size_t adjust) OVERRIDE; + void IncreaseFrameSize(size_t adjust) override; + void DecreaseFrameSize(size_t adjust) override; // Store routines - void Store(FrameOffset offs, ManagedRegister src, size_t size) OVERRIDE; - void StoreRef(FrameOffset dest, ManagedRegister src) OVERRIDE; - void StoreRawPtr(FrameOffset dest, ManagedRegister src) OVERRIDE; + void Store(FrameOffset offs, ManagedRegister src, size_t size) override; + void StoreRef(FrameOffset dest, ManagedRegister src) override; + void StoreRawPtr(FrameOffset dest, ManagedRegister src) override; - void StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister scratch) OVERRIDE; + void StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister scratch) override; void StoreStackOffsetToThread(ThreadOffset32 thr_offs, FrameOffset fr_offs, - ManagedRegister scratch) OVERRIDE; + ManagedRegister scratch) override; - void StoreStackPointerToThread(ThreadOffset32 thr_offs) OVERRIDE; + void StoreStackPointerToThread(ThreadOffset32 thr_offs) override; void StoreSpanning(FrameOffset dest, ManagedRegister src, FrameOffset in_off, - ManagedRegister scratch) OVERRIDE; + ManagedRegister scratch) override; // Load routines - void Load(ManagedRegister dest, FrameOffset src, size_t size) OVERRIDE; + void Load(ManagedRegister dest, FrameOffset src, size_t size) override; - void LoadFromThread(ManagedRegister dest, ThreadOffset32 src, size_t size) OVERRIDE; + void LoadFromThread(ManagedRegister dest, ThreadOffset32 src, size_t size) override; - void LoadRef(ManagedRegister dest, FrameOffset src) OVERRIDE; + void LoadRef(ManagedRegister dest, FrameOffset src) override; void LoadRef(ManagedRegister dest, ManagedRegister base, MemberOffset offs, - bool unpoison_reference) OVERRIDE; + bool unpoison_reference) override; - void LoadRawPtr(ManagedRegister dest, ManagedRegister base, Offset offs) OVERRIDE; + void LoadRawPtr(ManagedRegister dest, ManagedRegister base, Offset offs) override; - void LoadRawPtrFromThread(ManagedRegister dest, ThreadOffset32 offs) OVERRIDE; + void LoadRawPtrFromThread(ManagedRegister dest, ThreadOffset32 offs) override; // Copying routines - void Move(ManagedRegister dest, ManagedRegister src, size_t size) OVERRIDE; + void Move(ManagedRegister dest, ManagedRegister src, size_t size) override; void CopyRawPtrFromThread(FrameOffset fr_offs, ThreadOffset32 thr_offs, - ManagedRegister scratch) OVERRIDE; + ManagedRegister scratch) override; void CopyRawPtrToThread(ThreadOffset32 thr_offs, FrameOffset fr_offs, ManagedRegister scratch) - OVERRIDE; + override; - void CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister scratch) OVERRIDE; + void CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister scratch) override; - void Copy(FrameOffset dest, FrameOffset src, ManagedRegister scratch, size_t size) OVERRIDE; + void Copy(FrameOffset dest, FrameOffset src, ManagedRegister scratch, size_t size) override; void Copy(FrameOffset dest, ManagedRegister src_base, Offset src_offset, ManagedRegister scratch, - size_t size) OVERRIDE; + size_t size) override; void Copy(ManagedRegister dest_base, Offset dest_offset, FrameOffset src, ManagedRegister scratch, - size_t size) OVERRIDE; + size_t size) override; void Copy(FrameOffset dest, FrameOffset src_base, Offset src_offset, ManagedRegister scratch, - size_t size) OVERRIDE; + size_t size) override; void Copy(ManagedRegister dest, Offset dest_offset, ManagedRegister src, Offset src_offset, - ManagedRegister scratch, size_t size) OVERRIDE; + ManagedRegister scratch, size_t size) override; void Copy(FrameOffset dest, Offset dest_offset, FrameOffset src, Offset src_offset, - ManagedRegister scratch, size_t size) OVERRIDE; + ManagedRegister scratch, size_t size) override; - void MemoryBarrier(ManagedRegister) OVERRIDE; + void MemoryBarrier(ManagedRegister) override; // Sign extension - void SignExtend(ManagedRegister mreg, size_t size) OVERRIDE; + void SignExtend(ManagedRegister mreg, size_t size) override; // Zero extension - void ZeroExtend(ManagedRegister mreg, size_t size) OVERRIDE; + void ZeroExtend(ManagedRegister mreg, size_t size) override; // Exploit fast access in managed code to Thread::Current() - void GetCurrentThread(ManagedRegister tr) OVERRIDE; - void GetCurrentThread(FrameOffset dest_offset, ManagedRegister scratch) OVERRIDE; + void GetCurrentThread(ManagedRegister tr) override; + void GetCurrentThread(FrameOffset dest_offset, ManagedRegister scratch) override; // Set up out_reg to hold a Object** into the handle scope, or to be null if the // value is null and null_allowed. in_reg holds a possibly stale reference // that can be used to avoid loading the handle scope entry to see if the value is // null. void CreateHandleScopeEntry(ManagedRegister out_reg, FrameOffset handlescope_offset, - ManagedRegister in_reg, bool null_allowed) OVERRIDE; + ManagedRegister in_reg, bool null_allowed) override; // Set up out_off to hold a Object** into the handle scope, or to be null if the // value is null and null_allowed. void CreateHandleScopeEntry(FrameOffset out_off, FrameOffset handlescope_offset, - ManagedRegister scratch, bool null_allowed) OVERRIDE; + ManagedRegister scratch, bool null_allowed) override; // src holds a handle scope entry (Object**) load this into dst - void LoadReferenceFromHandleScope(ManagedRegister dst, ManagedRegister src) OVERRIDE; + void LoadReferenceFromHandleScope(ManagedRegister dst, ManagedRegister src) override; // Heap::VerifyObject on src. In some cases (such as a reference to this) we // know that src may not be null. - void VerifyObject(ManagedRegister src, bool could_be_null) OVERRIDE; - void VerifyObject(FrameOffset src, bool could_be_null) OVERRIDE; + void VerifyObject(ManagedRegister src, bool could_be_null) override; + void VerifyObject(FrameOffset src, bool could_be_null) override; // Call to address held at [base+offset] - void Call(ManagedRegister base, Offset offset, ManagedRegister scratch) OVERRIDE; - void Call(FrameOffset base, Offset offset, ManagedRegister scratch) OVERRIDE; - void CallFromThread(ThreadOffset32 offset, ManagedRegister scratch) OVERRIDE; + void Call(ManagedRegister base, Offset offset, ManagedRegister scratch) override; + void Call(FrameOffset base, Offset offset, ManagedRegister scratch) override; + void CallFromThread(ThreadOffset32 offset, ManagedRegister scratch) override; // Generate code to check if Thread::Current()->exception_ is non-null // and branch to a ExceptionSlowPath if it is. - void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust) OVERRIDE; + void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust) override; // Create a new label that can be used with Jump/Bind calls. - std::unique_ptr<JNIMacroLabel> CreateLabel() OVERRIDE; + std::unique_ptr<JNIMacroLabel> CreateLabel() override; // Emit an unconditional jump to the label. - void Jump(JNIMacroLabel* label) OVERRIDE; + void Jump(JNIMacroLabel* label) override; // Emit a conditional jump to the label by applying a unary condition test to the register. - void Jump(JNIMacroLabel* label, JNIMacroUnaryCondition cond, ManagedRegister test) OVERRIDE; + void Jump(JNIMacroLabel* label, JNIMacroUnaryCondition cond, ManagedRegister test) override; // Code at this offset will serve as the target for the Jump call. - void Bind(JNIMacroLabel* label) OVERRIDE; + void Bind(JNIMacroLabel* label) override; private: DISALLOW_COPY_AND_ASSIGN(X86JNIMacroAssembler); }; -class X86JNIMacroLabel FINAL +class X86JNIMacroLabel final : public JNIMacroLabelCommon<X86JNIMacroLabel, art::Label, InstructionSet::kX86> { diff --git a/compiler/utils/x86/managed_register_x86.cc b/compiler/utils/x86/managed_register_x86.cc index 69e6fce5c4..cc7cedf93e 100644 --- a/compiler/utils/x86/managed_register_x86.cc +++ b/compiler/utils/x86/managed_register_x86.cc @@ -16,7 +16,7 @@ #include "managed_register_x86.h" -#include "globals.h" +#include "base/globals.h" namespace art { namespace x86 { diff --git a/compiler/utils/x86/managed_register_x86.h b/compiler/utils/x86/managed_register_x86.h index c0c2b650e9..8810bfa2f1 100644 --- a/compiler/utils/x86/managed_register_x86.h +++ b/compiler/utils/x86/managed_register_x86.h @@ -18,7 +18,6 @@ #define ART_COMPILER_UTILS_X86_MANAGED_REGISTER_X86_H_ #include "constants_x86.h" -#include "debug/dwarf/register.h" #include "utils/managed_register.h" namespace art { diff --git a/compiler/utils/x86/managed_register_x86_test.cc b/compiler/utils/x86/managed_register_x86_test.cc index 0ed5c36fe4..28af5313c7 100644 --- a/compiler/utils/x86/managed_register_x86_test.cc +++ b/compiler/utils/x86/managed_register_x86_test.cc @@ -16,7 +16,7 @@ #include "managed_register_x86.h" -#include "globals.h" +#include "base/globals.h" #include "gtest/gtest.h" namespace art { diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc index ff5a357c5e..c118bc6fbe 100644 --- a/compiler/utils/x86_64/assembler_x86_64.cc +++ b/compiler/utils/x86_64/assembler_x86_64.cc @@ -17,8 +17,8 @@ #include "assembler_x86_64.h" #include "base/casts.h" +#include "base/memory_region.h" #include "entrypoints/quick/quick_entrypoints.h" -#include "memory_region.h" #include "thread.h" namespace art { @@ -64,6 +64,99 @@ std::ostream& operator<<(std::ostream& os, const Address& addr) { } } +uint8_t X86_64Assembler::EmitVexByteZero(bool is_two_byte) { + uint8_t vex_zero = 0xC0; + if (!is_two_byte) { + vex_zero |= 0xC4; + } else { + vex_zero |= 0xC5; + } + return vex_zero; +} + +uint8_t X86_64Assembler::EmitVexByte1(bool r, bool x, bool b, int mmmmm) { + // VEX Byte 1 + uint8_t vex_prefix = 0; + if (!r) { + vex_prefix |= 0x80; // VEX.R + } + if (!x) { + vex_prefix |= 0x40; // VEX.X + } + if (!b) { + vex_prefix |= 0x20; // VEX.B + } + + // VEX.mmmmm + switch (mmmmm) { + case 1: + // implied 0F leading opcode byte + vex_prefix |= 0x01; + break; + case 2: + // implied leading 0F 38 opcode byte + vex_prefix |= 0x02; + break; + case 3: + // implied leading OF 3A opcode byte + vex_prefix |= 0x03; + break; + default: + LOG(FATAL) << "unknown opcode bytes"; + } + + return vex_prefix; +} + +uint8_t X86_64Assembler::EmitVexByte2(bool w, int l, X86_64ManagedRegister operand, int pp) { + // VEX Byte 2 + uint8_t vex_prefix = 0; + if (w) { + vex_prefix |= 0x80; + } + // VEX.vvvv + if (operand.IsXmmRegister()) { + XmmRegister vvvv = operand.AsXmmRegister(); + int inverted_reg = 15-static_cast<int>(vvvv.AsFloatRegister()); + uint8_t reg = static_cast<uint8_t>(inverted_reg); + vex_prefix |= ((reg & 0x0F) << 3); + } else if (operand.IsCpuRegister()) { + CpuRegister vvvv = operand.AsCpuRegister(); + int inverted_reg = 15 - static_cast<int>(vvvv.AsRegister()); + uint8_t reg = static_cast<uint8_t>(inverted_reg); + vex_prefix |= ((reg & 0x0F) << 3); + } + + // VEX.L + if (l == 256) { + vex_prefix |= 0x04; + } + + // VEX.pp + switch (pp) { + case 0: + // SIMD Pefix - None + vex_prefix |= 0x00; + break; + case 1: + // SIMD Prefix - 66 + vex_prefix |= 0x01; + break; + case 2: + // SIMD Prefix - F3 + vex_prefix |= 0x02; + break; + case 3: + // SIMD Prefix - F2 + vex_prefix |= 0x03; + break; + default: + LOG(FATAL) << "unknown SIMD Prefix"; + } + + return vex_prefix; +} + void X86_64Assembler::call(CpuRegister reg) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitOptionalRex32(reg); @@ -1011,6 +1104,86 @@ void X86_64Assembler::psubq(XmmRegister dst, XmmRegister src) { } +void X86_64Assembler::paddusb(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0xDC); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + + +void X86_64Assembler::paddsb(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0xEC); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + + +void X86_64Assembler::paddusw(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0xDD); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + + +void X86_64Assembler::paddsw(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0xED); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + + +void X86_64Assembler::psubusb(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0xD8); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + + +void X86_64Assembler::psubsb(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0xE8); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + + +void X86_64Assembler::psubusw(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0xD9); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + + +void X86_64Assembler::psubsw(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0xE9); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + + void X86_64Assembler::cvtsi2ss(XmmRegister dst, CpuRegister src) { cvtsi2ss(dst, src, false); } @@ -1403,6 +1576,25 @@ void X86_64Assembler::pand(XmmRegister dst, XmmRegister src) { EmitXmmRegisterOperand(dst.LowBits(), src); } +void X86_64Assembler::andn(CpuRegister dst, CpuRegister src1, CpuRegister src2) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + uint8_t byte_zero = EmitVexByteZero(/*is_two_byte=*/ false); + uint8_t byte_one = EmitVexByte1(dst.NeedsRex(), + /*x=*/ false, + src2.NeedsRex(), + /*mmmmm=*/ 2); + uint8_t byte_two = EmitVexByte2(/*w=*/ true, + /*l=*/ 128, + X86_64ManagedRegister::FromCpuRegister(src1.AsRegister()), + /*pp=*/ 0); + EmitUint8(byte_zero); + EmitUint8(byte_one); + EmitUint8(byte_two); + // Opcode field + EmitUint8(0xF2); + EmitRegisterOperand(dst.LowBits(), src2.LowBits()); +} + void X86_64Assembler::andnpd(XmmRegister dst, XmmRegister src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0x66); @@ -2199,7 +2391,7 @@ void X86_64Assembler::cmpw(const Address& address, const Immediate& imm) { CHECK(imm.is_int32()); EmitOperandSizeOverride(); EmitOptionalRex32(address); - EmitComplex(7, address, imm, /* is_16_op */ true); + EmitComplex(7, address, imm, /* is_16_op= */ true); } @@ -2613,7 +2805,7 @@ void X86_64Assembler::addw(const Address& address, const Immediate& imm) { CHECK(imm.is_uint16() || imm.is_int16()) << imm.value(); EmitUint8(0x66); EmitOptionalRex32(address); - EmitComplex(0, address, imm, /* is_16_op */ true); + EmitComplex(0, address, imm, /* is_16_op= */ true); } @@ -3180,6 +3372,60 @@ void X86_64Assembler::setcc(Condition condition, CpuRegister dst) { EmitUint8(0xC0 + dst.LowBits()); } +void X86_64Assembler::blsi(CpuRegister dst, CpuRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + uint8_t byte_zero = EmitVexByteZero(/*is_two_byte=*/ false); + uint8_t byte_one = EmitVexByte1(/*r=*/ false, + /*x=*/ false, + src.NeedsRex(), + /*mmmmm=*/ 2); + uint8_t byte_two = EmitVexByte2(/*w=*/ true, + /*l=*/ 128, + X86_64ManagedRegister::FromCpuRegister(dst.AsRegister()), + /*pp=*/ 0); + EmitUint8(byte_zero); + EmitUint8(byte_one); + EmitUint8(byte_two); + EmitUint8(0xF3); + EmitRegisterOperand(3, src.LowBits()); +} + +void X86_64Assembler::blsmsk(CpuRegister dst, CpuRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + uint8_t byte_zero = EmitVexByteZero(/*is_two_byte=*/ false); + uint8_t byte_one = EmitVexByte1(/*r=*/ false, + /*x=*/ false, + src.NeedsRex(), + /*mmmmm=*/ 2); + uint8_t byte_two = EmitVexByte2(/*w=*/ true, + /*l=*/ 128, + X86_64ManagedRegister::FromCpuRegister(dst.AsRegister()), + /*pp=*/ 0); + EmitUint8(byte_zero); + EmitUint8(byte_one); + EmitUint8(byte_two); + EmitUint8(0xF3); + EmitRegisterOperand(2, src.LowBits()); +} + +void X86_64Assembler::blsr(CpuRegister dst, CpuRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + uint8_t byte_zero = EmitVexByteZero(/*is_two_byte=*/ false); + uint8_t byte_one = EmitVexByte1(/*r=*/ false, + /*x=*/ false, + src.NeedsRex(), + /*mmmmm=*/ 2); + uint8_t byte_two = EmitVexByte2(/*w=*/ true, + /*l=*/ 128, + X86_64ManagedRegister::FromCpuRegister(dst.AsRegister()), + /*pp=*/ 0); + EmitUint8(byte_zero); + EmitUint8(byte_one); + EmitUint8(byte_two); + EmitUint8(0xF3); + EmitRegisterOperand(1, src.LowBits()); +} + void X86_64Assembler::bswapl(CpuRegister dst) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitOptionalRex(false, false, false, false, dst.NeedsRex()); diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h index 7a5fdb502f..ff13ea3293 100644 --- a/compiler/utils/x86_64/assembler_x86_64.h +++ b/compiler/utils/x86_64/assembler_x86_64.h @@ -22,9 +22,9 @@ #include "base/arena_containers.h" #include "base/array_ref.h" #include "base/bit_utils.h" +#include "base/globals.h" #include "base/macros.h" #include "constants_x86_64.h" -#include "globals.h" #include "heap_poisoning.h" #include "managed_register_x86_64.h" #include "offsets.h" @@ -351,7 +351,7 @@ class NearLabel : private Label { }; -class X86_64Assembler FINAL : public Assembler { +class X86_64Assembler final : public Assembler { public: explicit X86_64Assembler(ArenaAllocator* allocator) : Assembler(allocator), constant_area_(allocator) {} @@ -485,6 +485,15 @@ class X86_64Assembler FINAL : public Assembler { void paddq(XmmRegister dst, XmmRegister src); void psubq(XmmRegister dst, XmmRegister src); + void paddusb(XmmRegister dst, XmmRegister src); + void paddsb(XmmRegister dst, XmmRegister src); + void paddusw(XmmRegister dst, XmmRegister src); + void paddsw(XmmRegister dst, XmmRegister src); + void psubusb(XmmRegister dst, XmmRegister src); + void psubsb(XmmRegister dst, XmmRegister src); + void psubusw(XmmRegister dst, XmmRegister src); + void psubsw(XmmRegister dst, XmmRegister src); + void cvtsi2ss(XmmRegister dst, CpuRegister src); // Note: this is the r/m32 version. void cvtsi2ss(XmmRegister dst, CpuRegister src, bool is64bit); void cvtsi2ss(XmmRegister dst, const Address& src, bool is64bit); @@ -534,6 +543,7 @@ class X86_64Assembler FINAL : public Assembler { void andps(XmmRegister dst, XmmRegister src); // no addr variant (for now) void pand(XmmRegister dst, XmmRegister src); + void andn(CpuRegister dst, CpuRegister src1, CpuRegister src2); void andnpd(XmmRegister dst, XmmRegister src); // no addr variant (for now) void andnps(XmmRegister dst, XmmRegister src); void pandn(XmmRegister dst, XmmRegister src); @@ -787,6 +797,10 @@ class X86_64Assembler FINAL : public Assembler { void bsfq(CpuRegister dst, CpuRegister src); void bsfq(CpuRegister dst, const Address& src); + void blsi(CpuRegister dst, CpuRegister src); // no addr variant (for now) + void blsmsk(CpuRegister dst, CpuRegister src); // no addr variant (for now) + void blsr(CpuRegister dst, CpuRegister src); // no addr variant (for now) + void bsrl(CpuRegister dst, CpuRegister src); void bsrl(CpuRegister dst, const Address& src); void bsrq(CpuRegister dst, CpuRegister src); @@ -835,8 +849,8 @@ class X86_64Assembler FINAL : public Assembler { // int PreferredLoopAlignment() { return 16; } void Align(int alignment, int offset); - void Bind(Label* label) OVERRIDE; - void Jump(Label* label) OVERRIDE { + void Bind(Label* label) override; + void Jump(Label* label) override { jmp(label); } void Bind(NearLabel* label); @@ -942,6 +956,11 @@ class X86_64Assembler FINAL : public Assembler { void EmitOptionalByteRegNormalizingRex32(CpuRegister dst, CpuRegister src); void EmitOptionalByteRegNormalizingRex32(CpuRegister dst, const Operand& operand); + // Emit a 3 byte VEX Prefix + uint8_t EmitVexByteZero(bool is_two_byte); + uint8_t EmitVexByte1(bool r, bool x, bool b, int mmmmm); + uint8_t EmitVexByte2(bool w , int l , X86_64ManagedRegister operand, int pp); + ConstantArea constant_area_; DISALLOW_COPY_AND_ASSIGN(X86_64Assembler); diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc index 6b1e53c35a..461f028d9a 100644 --- a/compiler/utils/x86_64/assembler_x86_64_test.cc +++ b/compiler/utils/x86_64/assembler_x86_64_test.cc @@ -21,6 +21,7 @@ #include <random> #include "base/bit_utils.h" +#include "base/malloc_arena_pool.h" #include "base/stl_util.h" #include "jni_macro_assembler_x86_64.h" #include "utils/assembler_test.h" @@ -29,7 +30,7 @@ namespace art { TEST(AssemblerX86_64, CreateBuffer) { - ArenaPool pool; + MallocArenaPool pool; ArenaAllocator allocator(&pool); AssemblerBuffer buffer(&allocator); AssemblerBuffer::EnsureCapacity ensured(&buffer); @@ -136,23 +137,23 @@ class AssemblerX86_64Test : public AssemblerTest<x86_64::X86_64Assembler, x86_64::XmmRegister, x86_64::Immediate> { public: - typedef AssemblerTest<x86_64::X86_64Assembler, - x86_64::Address, - x86_64::CpuRegister, - x86_64::XmmRegister, - x86_64::Immediate> Base; + using Base = AssemblerTest<x86_64::X86_64Assembler, + x86_64::Address, + x86_64::CpuRegister, + x86_64::XmmRegister, + x86_64::Immediate>; protected: // Get the typically used name for this architecture, e.g., aarch64, x86-64, ... - std::string GetArchitectureString() OVERRIDE { + std::string GetArchitectureString() override { return "x86_64"; } - std::string GetDisassembleParameters() OVERRIDE { + std::string GetDisassembleParameters() override { return " -D -bbinary -mi386:x86-64 -Mx86-64,addr64,data32 --no-show-raw-insn"; } - void SetUpHelpers() OVERRIDE { + void SetUpHelpers() override { if (addresses_singleton_.size() == 0) { // One addressing mode to test the repeat drivers. addresses_singleton_.push_back( @@ -290,39 +291,39 @@ class AssemblerX86_64Test : public AssemblerTest<x86_64::X86_64Assembler, } } - void TearDown() OVERRIDE { + void TearDown() override { AssemblerTest::TearDown(); STLDeleteElements(®isters_); STLDeleteElements(&fp_registers_); } - std::vector<x86_64::Address> GetAddresses() { + std::vector<x86_64::Address> GetAddresses() override { return addresses_; } - std::vector<x86_64::CpuRegister*> GetRegisters() OVERRIDE { + std::vector<x86_64::CpuRegister*> GetRegisters() override { return registers_; } - std::vector<x86_64::XmmRegister*> GetFPRegisters() OVERRIDE { + std::vector<x86_64::XmmRegister*> GetFPRegisters() override { return fp_registers_; } - x86_64::Immediate CreateImmediate(int64_t imm_value) OVERRIDE { + x86_64::Immediate CreateImmediate(int64_t imm_value) override { return x86_64::Immediate(imm_value); } - std::string GetSecondaryRegisterName(const x86_64::CpuRegister& reg) OVERRIDE { + std::string GetSecondaryRegisterName(const x86_64::CpuRegister& reg) override { CHECK(secondary_register_names_.find(reg) != secondary_register_names_.end()); return secondary_register_names_[reg]; } - std::string GetTertiaryRegisterName(const x86_64::CpuRegister& reg) OVERRIDE { + std::string GetTertiaryRegisterName(const x86_64::CpuRegister& reg) override { CHECK(tertiary_register_names_.find(reg) != tertiary_register_names_.end()); return tertiary_register_names_[reg]; } - std::string GetQuaternaryRegisterName(const x86_64::CpuRegister& reg) OVERRIDE { + std::string GetQuaternaryRegisterName(const x86_64::CpuRegister& reg) override { CHECK(quaternary_register_names_.find(reg) != quaternary_register_names_.end()); return quaternary_register_names_[reg]; } @@ -1282,6 +1283,38 @@ TEST_F(AssemblerX86_64Test, Psubq) { DriverStr(RepeatFF(&x86_64::X86_64Assembler::psubq, "psubq %{reg2}, %{reg1}"), "psubq"); } +TEST_F(AssemblerX86_64Test, Paddusb) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::paddusb, "paddusb %{reg2}, %{reg1}"), "paddusb"); +} + +TEST_F(AssemblerX86_64Test, Paddsb) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::paddsb, "paddsb %{reg2}, %{reg1}"), "paddsb"); +} + +TEST_F(AssemblerX86_64Test, Paddusw) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::paddusw, "paddusw %{reg2}, %{reg1}"), "paddusw"); +} + +TEST_F(AssemblerX86_64Test, Paddsw) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::paddsw, "paddsw %{reg2}, %{reg1}"), "paddsw"); +} + +TEST_F(AssemblerX86_64Test, Psubusb) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::psubusb, "psubusb %{reg2}, %{reg1}"), "psubusb"); +} + +TEST_F(AssemblerX86_64Test, Psubsb) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::psubsb, "psubsb %{reg2}, %{reg1}"), "psubsb"); +} + +TEST_F(AssemblerX86_64Test, Psubusw) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::psubusw, "psubusw %{reg2}, %{reg1}"), "psubusw"); +} + +TEST_F(AssemblerX86_64Test, Psubsw) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::psubsw, "psubsw %{reg2}, %{reg1}"), "psubsw"); +} + TEST_F(AssemblerX86_64Test, Cvtsi2ss) { DriverStr(RepeatFr(&x86_64::X86_64Assembler::cvtsi2ss, "cvtsi2ss %{reg2}, %{reg1}"), "cvtsi2ss"); } @@ -1381,7 +1414,9 @@ TEST_F(AssemblerX86_64Test, Andpd) { TEST_F(AssemblerX86_64Test, Pand) { DriverStr(RepeatFF(&x86_64::X86_64Assembler::pand, "pand %{reg2}, %{reg1}"), "pand"); } - +TEST_F(AssemblerX86_64Test, Andn) { + DriverStr(RepeatRRR(&x86_64::X86_64Assembler::andn, "andn %{reg3}, %{reg2}, %{reg1}"), "andn"); +} TEST_F(AssemblerX86_64Test, andnpd) { DriverStr(RepeatFF(&x86_64::X86_64Assembler::andnpd, "andnpd %{reg2}, %{reg1}"), "andnpd"); } @@ -1752,6 +1787,18 @@ TEST_F(AssemblerX86_64Test, RetAndLeave) { DriverFn(&ret_and_leave_fn, "retleave"); } +TEST_F(AssemblerX86_64Test, Blsmask) { + DriverStr(RepeatRR(&x86_64::X86_64Assembler::blsmsk, "blsmsk %{reg2}, %{reg1}"), "blsmsk"); +} + +TEST_F(AssemblerX86_64Test, Blsi) { + DriverStr(RepeatRR(&x86_64::X86_64Assembler::blsi, "blsi %{reg2}, %{reg1}"), "blsi"); +} + +TEST_F(AssemblerX86_64Test, Blsr) { + DriverStr(RepeatRR(&x86_64::X86_64Assembler::blsr, "blsr %{reg2}, %{reg1}"), "blsr"); +} + TEST_F(AssemblerX86_64Test, Bswapl) { DriverStr(Repeatr(&x86_64::X86_64Assembler::bswapl, "bswap %{reg}"), "bswapl"); } @@ -1969,11 +2016,11 @@ class JNIMacroAssemblerX86_64Test : public JNIMacroAssemblerTest<x86_64::X86_64J protected: // Get the typically used name for this architecture, e.g., aarch64, x86-64, ... - std::string GetArchitectureString() OVERRIDE { + std::string GetArchitectureString() override { return "x86_64"; } - std::string GetDisassembleParameters() OVERRIDE { + std::string GetDisassembleParameters() override { return " -D -bbinary -mi386:x86-64 -Mx86-64,addr64,data32 --no-show-raw-insn"; } @@ -2047,7 +2094,7 @@ std::string removeframe_test_fn(JNIMacroAssemblerX86_64Test::Base* assembler_tes ArrayRef<const ManagedRegister> spill_regs(raw_spill_regs); size_t frame_size = 10 * kStackAlignment; - assembler->RemoveFrame(frame_size, spill_regs, /* may_suspend */ true); + assembler->RemoveFrame(frame_size, spill_regs, /* may_suspend= */ true); // Construct assembly text counterpart. std::ostringstream str; diff --git a/compiler/utils/x86_64/constants_x86_64.h b/compiler/utils/x86_64/constants_x86_64.h index 2af3e7be16..b02e246842 100644 --- a/compiler/utils/x86_64/constants_x86_64.h +++ b/compiler/utils/x86_64/constants_x86_64.h @@ -22,8 +22,8 @@ #include <android-base/logging.h> #include "arch/x86_64/registers_x86_64.h" +#include "base/globals.h" #include "base/macros.h" -#include "globals.h" namespace art { namespace x86_64 { diff --git a/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc b/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc index 5766f9d44b..5924a8bd08 100644 --- a/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc +++ b/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc @@ -17,8 +17,8 @@ #include "jni_macro_assembler_x86_64.h" #include "base/casts.h" +#include "base/memory_region.h" #include "entrypoints/quick/quick_entrypoints.h" -#include "memory_region.h" #include "thread.h" namespace art { @@ -75,8 +75,7 @@ void X86_64JNIMacroAssembler::BuildFrame(size_t frame_size, __ movq(Address(CpuRegister(RSP), 0), method_reg.AsX86_64().AsCpuRegister()); - for (size_t i = 0; i < entry_spills.size(); ++i) { - ManagedRegisterSpill spill = entry_spills.at(i); + for (const ManagedRegisterSpill& spill : entry_spills) { if (spill.AsX86_64().IsCpuRegister()) { if (spill.getSize() == 8) { __ movq(Address(CpuRegister(RSP), frame_size + spill.getSpillOffset()), @@ -575,10 +574,10 @@ void X86_64JNIMacroAssembler::GetCurrentThread(FrameOffset offset, ManagedRegist } // Slowpath entered when Thread::Current()->_exception is non-null -class X86_64ExceptionSlowPath FINAL : public SlowPath { +class X86_64ExceptionSlowPath final : public SlowPath { public: explicit X86_64ExceptionSlowPath(size_t stack_adjust) : stack_adjust_(stack_adjust) {} - virtual void Emit(Assembler *sp_asm) OVERRIDE; + void Emit(Assembler *sp_asm) override; private: const size_t stack_adjust_; }; diff --git a/compiler/utils/x86_64/jni_macro_assembler_x86_64.h b/compiler/utils/x86_64/jni_macro_assembler_x86_64.h index d766ad4716..4c2fd8fc73 100644 --- a/compiler/utils/x86_64/jni_macro_assembler_x86_64.h +++ b/compiler/utils/x86_64/jni_macro_assembler_x86_64.h @@ -31,7 +31,7 @@ namespace art { namespace x86_64 { -class X86_64JNIMacroAssembler FINAL : public JNIMacroAssemblerFwd<X86_64Assembler, +class X86_64JNIMacroAssembler final : public JNIMacroAssemblerFwd<X86_64Assembler, PointerSize::k64> { public: explicit X86_64JNIMacroAssembler(ArenaAllocator* allocator) @@ -46,107 +46,107 @@ class X86_64JNIMacroAssembler FINAL : public JNIMacroAssemblerFwd<X86_64Assemble void BuildFrame(size_t frame_size, ManagedRegister method_reg, ArrayRef<const ManagedRegister> callee_save_regs, - const ManagedRegisterEntrySpills& entry_spills) OVERRIDE; + const ManagedRegisterEntrySpills& entry_spills) override; // Emit code that will remove an activation from the stack void RemoveFrame(size_t frame_size, ArrayRef<const ManagedRegister> callee_save_regs, - bool may_suspend) OVERRIDE; + bool may_suspend) override; - void IncreaseFrameSize(size_t adjust) OVERRIDE; - void DecreaseFrameSize(size_t adjust) OVERRIDE; + void IncreaseFrameSize(size_t adjust) override; + void DecreaseFrameSize(size_t adjust) override; // Store routines - void Store(FrameOffset offs, ManagedRegister src, size_t size) OVERRIDE; - void StoreRef(FrameOffset dest, ManagedRegister src) OVERRIDE; - void StoreRawPtr(FrameOffset dest, ManagedRegister src) OVERRIDE; + void Store(FrameOffset offs, ManagedRegister src, size_t size) override; + void StoreRef(FrameOffset dest, ManagedRegister src) override; + void StoreRawPtr(FrameOffset dest, ManagedRegister src) override; - void StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister scratch) OVERRIDE; + void StoreImmediateToFrame(FrameOffset dest, uint32_t imm, ManagedRegister scratch) override; void StoreStackOffsetToThread(ThreadOffset64 thr_offs, FrameOffset fr_offs, - ManagedRegister scratch) OVERRIDE; + ManagedRegister scratch) override; - void StoreStackPointerToThread(ThreadOffset64 thr_offs) OVERRIDE; + void StoreStackPointerToThread(ThreadOffset64 thr_offs) override; void StoreSpanning(FrameOffset dest, ManagedRegister src, FrameOffset in_off, - ManagedRegister scratch) OVERRIDE; + ManagedRegister scratch) override; // Load routines - void Load(ManagedRegister dest, FrameOffset src, size_t size) OVERRIDE; + void Load(ManagedRegister dest, FrameOffset src, size_t size) override; - void LoadFromThread(ManagedRegister dest, ThreadOffset64 src, size_t size) OVERRIDE; + void LoadFromThread(ManagedRegister dest, ThreadOffset64 src, size_t size) override; - void LoadRef(ManagedRegister dest, FrameOffset src) OVERRIDE; + void LoadRef(ManagedRegister dest, FrameOffset src) override; void LoadRef(ManagedRegister dest, ManagedRegister base, MemberOffset offs, - bool unpoison_reference) OVERRIDE; + bool unpoison_reference) override; - void LoadRawPtr(ManagedRegister dest, ManagedRegister base, Offset offs) OVERRIDE; + void LoadRawPtr(ManagedRegister dest, ManagedRegister base, Offset offs) override; - void LoadRawPtrFromThread(ManagedRegister dest, ThreadOffset64 offs) OVERRIDE; + void LoadRawPtrFromThread(ManagedRegister dest, ThreadOffset64 offs) override; // Copying routines - void Move(ManagedRegister dest, ManagedRegister src, size_t size); + void Move(ManagedRegister dest, ManagedRegister src, size_t size) override; void CopyRawPtrFromThread(FrameOffset fr_offs, ThreadOffset64 thr_offs, - ManagedRegister scratch) OVERRIDE; + ManagedRegister scratch) override; void CopyRawPtrToThread(ThreadOffset64 thr_offs, FrameOffset fr_offs, ManagedRegister scratch) - OVERRIDE; + override; - void CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister scratch) OVERRIDE; + void CopyRef(FrameOffset dest, FrameOffset src, ManagedRegister scratch) override; - void Copy(FrameOffset dest, FrameOffset src, ManagedRegister scratch, size_t size) OVERRIDE; + void Copy(FrameOffset dest, FrameOffset src, ManagedRegister scratch, size_t size) override; void Copy(FrameOffset dest, ManagedRegister src_base, Offset src_offset, ManagedRegister scratch, - size_t size) OVERRIDE; + size_t size) override; void Copy(ManagedRegister dest_base, Offset dest_offset, FrameOffset src, ManagedRegister scratch, - size_t size) OVERRIDE; + size_t size) override; void Copy(FrameOffset dest, FrameOffset src_base, Offset src_offset, ManagedRegister scratch, - size_t size) OVERRIDE; + size_t size) override; void Copy(ManagedRegister dest, Offset dest_offset, ManagedRegister src, Offset src_offset, ManagedRegister scratch, - size_t size) OVERRIDE; + size_t size) override; void Copy(FrameOffset dest, Offset dest_offset, FrameOffset src, Offset src_offset, ManagedRegister scratch, - size_t size) OVERRIDE; + size_t size) override; - void MemoryBarrier(ManagedRegister) OVERRIDE; + void MemoryBarrier(ManagedRegister) override; // Sign extension - void SignExtend(ManagedRegister mreg, size_t size) OVERRIDE; + void SignExtend(ManagedRegister mreg, size_t size) override; // Zero extension - void ZeroExtend(ManagedRegister mreg, size_t size) OVERRIDE; + void ZeroExtend(ManagedRegister mreg, size_t size) override; // Exploit fast access in managed code to Thread::Current() - void GetCurrentThread(ManagedRegister tr) OVERRIDE; - void GetCurrentThread(FrameOffset dest_offset, ManagedRegister scratch) OVERRIDE; + void GetCurrentThread(ManagedRegister tr) override; + void GetCurrentThread(FrameOffset dest_offset, ManagedRegister scratch) override; // Set up out_reg to hold a Object** into the handle scope, or to be null if the // value is null and null_allowed. in_reg holds a possibly stale reference @@ -155,46 +155,46 @@ class X86_64JNIMacroAssembler FINAL : public JNIMacroAssemblerFwd<X86_64Assemble void CreateHandleScopeEntry(ManagedRegister out_reg, FrameOffset handlescope_offset, ManagedRegister in_reg, - bool null_allowed) OVERRIDE; + bool null_allowed) override; // Set up out_off to hold a Object** into the handle scope, or to be null if the // value is null and null_allowed. void CreateHandleScopeEntry(FrameOffset out_off, FrameOffset handlescope_offset, ManagedRegister scratch, - bool null_allowed) OVERRIDE; + bool null_allowed) override; // src holds a handle scope entry (Object**) load this into dst - virtual void LoadReferenceFromHandleScope(ManagedRegister dst, ManagedRegister src) OVERRIDE; + void LoadReferenceFromHandleScope(ManagedRegister dst, ManagedRegister src) override; // Heap::VerifyObject on src. In some cases (such as a reference to this) we // know that src may not be null. - void VerifyObject(ManagedRegister src, bool could_be_null) OVERRIDE; - void VerifyObject(FrameOffset src, bool could_be_null) OVERRIDE; + void VerifyObject(ManagedRegister src, bool could_be_null) override; + void VerifyObject(FrameOffset src, bool could_be_null) override; // Call to address held at [base+offset] - void Call(ManagedRegister base, Offset offset, ManagedRegister scratch) OVERRIDE; - void Call(FrameOffset base, Offset offset, ManagedRegister scratch) OVERRIDE; - void CallFromThread(ThreadOffset64 offset, ManagedRegister scratch) OVERRIDE; + void Call(ManagedRegister base, Offset offset, ManagedRegister scratch) override; + void Call(FrameOffset base, Offset offset, ManagedRegister scratch) override; + void CallFromThread(ThreadOffset64 offset, ManagedRegister scratch) override; // Generate code to check if Thread::Current()->exception_ is non-null // and branch to a ExceptionSlowPath if it is. - void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust) OVERRIDE; + void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust) override; // Create a new label that can be used with Jump/Bind calls. - std::unique_ptr<JNIMacroLabel> CreateLabel() OVERRIDE; + std::unique_ptr<JNIMacroLabel> CreateLabel() override; // Emit an unconditional jump to the label. - void Jump(JNIMacroLabel* label) OVERRIDE; + void Jump(JNIMacroLabel* label) override; // Emit a conditional jump to the label by applying a unary condition test to the register. - void Jump(JNIMacroLabel* label, JNIMacroUnaryCondition cond, ManagedRegister test) OVERRIDE; + void Jump(JNIMacroLabel* label, JNIMacroUnaryCondition cond, ManagedRegister test) override; // Code at this offset will serve as the target for the Jump call. - void Bind(JNIMacroLabel* label) OVERRIDE; + void Bind(JNIMacroLabel* label) override; private: DISALLOW_COPY_AND_ASSIGN(X86_64JNIMacroAssembler); }; -class X86_64JNIMacroLabel FINAL +class X86_64JNIMacroLabel final : public JNIMacroLabelCommon<X86_64JNIMacroLabel, art::Label, InstructionSet::kX86_64> { diff --git a/compiler/utils/x86_64/managed_register_x86_64.cc b/compiler/utils/x86_64/managed_register_x86_64.cc index b8c2db2d2e..c0eec9d86c 100644 --- a/compiler/utils/x86_64/managed_register_x86_64.cc +++ b/compiler/utils/x86_64/managed_register_x86_64.cc @@ -16,7 +16,7 @@ #include "managed_register_x86_64.h" -#include "globals.h" +#include "base/globals.h" namespace art { namespace x86_64 { diff --git a/compiler/utils/x86_64/managed_register_x86_64.h b/compiler/utils/x86_64/managed_register_x86_64.h index 32af672670..6760882965 100644 --- a/compiler/utils/x86_64/managed_register_x86_64.h +++ b/compiler/utils/x86_64/managed_register_x86_64.h @@ -18,7 +18,6 @@ #define ART_COMPILER_UTILS_X86_64_MANAGED_REGISTER_X86_64_H_ #include "constants_x86_64.h" -#include "debug/dwarf/register.h" #include "utils/managed_register.h" namespace art { diff --git a/compiler/utils/x86_64/managed_register_x86_64_test.cc b/compiler/utils/x86_64/managed_register_x86_64_test.cc index e43d717385..46a405ffaf 100644 --- a/compiler/utils/x86_64/managed_register_x86_64_test.cc +++ b/compiler/utils/x86_64/managed_register_x86_64_test.cc @@ -15,7 +15,7 @@ */ #include "managed_register_x86_64.h" -#include "globals.h" +#include "base/globals.h" #include "gtest/gtest.h" namespace art { diff --git a/compiler/verifier_deps_test.cc b/compiler/verifier_deps_test.cc deleted file mode 100644 index 76448d819c..0000000000 --- a/compiler/verifier_deps_test.cc +++ /dev/null @@ -1,1515 +0,0 @@ -/* - * Copyright (C) 2016 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// Test is in compiler, as it uses compiler related code. -#include "verifier/verifier_deps.h" - -#include "art_method-inl.h" -#include "class_linker.h" -#include "common_compiler_test.h" -#include "compiler_callbacks.h" -#include "dex/dex_file-inl.h" -#include "dex/dex_file_types.h" -#include "dex/verification_results.h" -#include "dex/verified_method.h" -#include "driver/compiler_driver-inl.h" -#include "driver/compiler_options.h" -#include "handle_scope-inl.h" -#include "indenter.h" -#include "mirror/class_loader.h" -#include "runtime.h" -#include "scoped_thread_state_change-inl.h" -#include "thread.h" -#include "utils/atomic_dex_ref_map-inl.h" -#include "verifier/method_verifier-inl.h" - -namespace art { -namespace verifier { - -class VerifierDepsCompilerCallbacks : public CompilerCallbacks { - public: - VerifierDepsCompilerCallbacks() - : CompilerCallbacks(CompilerCallbacks::CallbackMode::kCompileApp), - deps_(nullptr) {} - - void MethodVerified(verifier::MethodVerifier* verifier ATTRIBUTE_UNUSED) OVERRIDE {} - void ClassRejected(ClassReference ref ATTRIBUTE_UNUSED) OVERRIDE {} - bool IsRelocationPossible() OVERRIDE { return false; } - - verifier::VerifierDeps* GetVerifierDeps() const OVERRIDE { return deps_; } - void SetVerifierDeps(verifier::VerifierDeps* deps) { deps_ = deps; } - - private: - verifier::VerifierDeps* deps_; -}; - -class VerifierDepsTest : public CommonCompilerTest { - public: - void SetUpRuntimeOptions(RuntimeOptions* options) { - CommonCompilerTest::SetUpRuntimeOptions(options); - callbacks_.reset(new VerifierDepsCompilerCallbacks()); - } - - mirror::Class* FindClassByName(const std::string& name, ScopedObjectAccess* soa) - REQUIRES_SHARED(Locks::mutator_lock_) { - StackHandleScope<1> hs(Thread::Current()); - Handle<mirror::ClassLoader> class_loader_handle( - hs.NewHandle(soa->Decode<mirror::ClassLoader>(class_loader_))); - mirror::Class* klass = class_linker_->FindClass(Thread::Current(), - name.c_str(), - class_loader_handle); - if (klass == nullptr) { - DCHECK(Thread::Current()->IsExceptionPending()); - Thread::Current()->ClearException(); - } - return klass; - } - - void SetupCompilerDriver() { - compiler_options_->boot_image_ = false; - compiler_driver_->InitializeThreadPools(); - } - - void VerifyWithCompilerDriver(verifier::VerifierDeps* deps) { - TimingLogger timings("Verify", false, false); - // The compiler driver handles the verifier deps in the callbacks, so - // remove what this class did for unit testing. - if (deps == nullptr) { - // Create some verifier deps by default if they are not already specified. - deps = new verifier::VerifierDeps(dex_files_); - verifier_deps_.reset(deps); - } - callbacks_->SetVerifierDeps(deps); - compiler_driver_->Verify(class_loader_, dex_files_, &timings); - callbacks_->SetVerifierDeps(nullptr); - // Clear entries in the verification results to avoid hitting a DCHECK that - // we always succeed inserting a new entry after verifying. - AtomicDexRefMap<MethodReference, const VerifiedMethod*>* map = - &compiler_driver_->GetVerificationResults()->atomic_verified_methods_; - map->Visit([](const DexFileReference& ref ATTRIBUTE_UNUSED, const VerifiedMethod* method) { - delete method; - }); - map->ClearEntries(); - } - - void SetVerifierDeps(const std::vector<const DexFile*>& dex_files) { - verifier_deps_.reset(new verifier::VerifierDeps(dex_files)); - VerifierDepsCompilerCallbacks* callbacks = - reinterpret_cast<VerifierDepsCompilerCallbacks*>(callbacks_.get()); - callbacks->SetVerifierDeps(verifier_deps_.get()); - } - - void LoadDexFile(ScopedObjectAccess* soa, const char* name1, const char* name2 = nullptr) - REQUIRES_SHARED(Locks::mutator_lock_) { - class_loader_ = (name2 == nullptr) ? LoadDex(name1) : LoadMultiDex(name1, name2); - dex_files_ = GetDexFiles(class_loader_); - primary_dex_file_ = dex_files_.front(); - - SetVerifierDeps(dex_files_); - StackHandleScope<1> hs(soa->Self()); - Handle<mirror::ClassLoader> loader = - hs.NewHandle(soa->Decode<mirror::ClassLoader>(class_loader_)); - for (const DexFile* dex_file : dex_files_) { - class_linker_->RegisterDexFile(*dex_file, loader.Get()); - } - for (const DexFile* dex_file : dex_files_) { - compiler_driver_->GetVerificationResults()->AddDexFile(dex_file); - } - compiler_driver_->SetDexFilesForOatFile(dex_files_); - } - - void LoadDexFile(ScopedObjectAccess* soa) REQUIRES_SHARED(Locks::mutator_lock_) { - LoadDexFile(soa, "VerifierDeps"); - CHECK_EQ(dex_files_.size(), 1u); - klass_Main_ = FindClassByName("LMain;", soa); - CHECK(klass_Main_ != nullptr); - } - - bool VerifyMethod(const std::string& method_name) { - ScopedObjectAccess soa(Thread::Current()); - LoadDexFile(&soa); - - StackHandleScope<2> hs(soa.Self()); - Handle<mirror::ClassLoader> class_loader_handle( - hs.NewHandle(soa.Decode<mirror::ClassLoader>(class_loader_))); - Handle<mirror::DexCache> dex_cache_handle(hs.NewHandle(klass_Main_->GetDexCache())); - - const DexFile::ClassDef* class_def = klass_Main_->GetClassDef(); - const uint8_t* class_data = primary_dex_file_->GetClassData(*class_def); - CHECK(class_data != nullptr); - - ClassDataItemIterator it(*primary_dex_file_, class_data); - it.SkipAllFields(); - - ArtMethod* method = nullptr; - while (it.HasNextDirectMethod()) { - ArtMethod* resolved_method = - class_linker_->ResolveMethod<ClassLinker::ResolveMode::kNoChecks>( - it.GetMemberIndex(), - dex_cache_handle, - class_loader_handle, - /* referrer */ nullptr, - it.GetMethodInvokeType(*class_def)); - CHECK(resolved_method != nullptr); - if (method_name == resolved_method->GetName()) { - method = resolved_method; - break; - } - it.Next(); - } - CHECK(method != nullptr); - - Thread::Current()->SetVerifierDeps(callbacks_->GetVerifierDeps()); - MethodVerifier verifier(Thread::Current(), - primary_dex_file_, - dex_cache_handle, - class_loader_handle, - *class_def, - it.GetMethodCodeItem(), - it.GetMemberIndex(), - method, - it.GetMethodAccessFlags(), - true /* can_load_classes */, - true /* allow_soft_failures */, - true /* need_precise_constants */, - false /* verify to dump */, - true /* allow_thread_suspension */); - verifier.Verify(); - Thread::Current()->SetVerifierDeps(nullptr); - return !verifier.HasFailures(); - } - - void VerifyDexFile(const char* multidex = nullptr) { - { - ScopedObjectAccess soa(Thread::Current()); - LoadDexFile(&soa, "VerifierDeps", multidex); - } - SetupCompilerDriver(); - VerifyWithCompilerDriver(/* verifier_deps */ nullptr); - } - - bool TestAssignabilityRecording(const std::string& dst, - const std::string& src, - bool is_strict, - bool is_assignable) { - ScopedObjectAccess soa(Thread::Current()); - LoadDexFile(&soa); - mirror::Class* klass_dst = FindClassByName(dst, &soa); - DCHECK(klass_dst != nullptr) << dst; - mirror::Class* klass_src = FindClassByName(src, &soa); - DCHECK(klass_src != nullptr) << src; - verifier_deps_->AddAssignability(*primary_dex_file_, - klass_dst, - klass_src, - is_strict, - is_assignable); - return true; - } - - // Check that the status of classes in `class_loader_` match the - // expected status in `deps`. - void VerifyClassStatus(const verifier::VerifierDeps& deps) { - ScopedObjectAccess soa(Thread::Current()); - StackHandleScope<2> hs(soa.Self()); - Handle<mirror::ClassLoader> class_loader_handle( - hs.NewHandle(soa.Decode<mirror::ClassLoader>(class_loader_))); - MutableHandle<mirror::Class> cls(hs.NewHandle<mirror::Class>(nullptr)); - for (const DexFile* dex_file : dex_files_) { - const std::set<dex::TypeIndex>& unverified_classes = deps.GetUnverifiedClasses(*dex_file); - for (uint32_t i = 0; i < dex_file->NumClassDefs(); ++i) { - const DexFile::ClassDef& class_def = dex_file->GetClassDef(i); - const char* descriptor = dex_file->GetClassDescriptor(class_def); - cls.Assign(class_linker_->FindClass(soa.Self(), descriptor, class_loader_handle)); - if (cls == nullptr) { - CHECK(soa.Self()->IsExceptionPending()); - soa.Self()->ClearException(); - } else if (unverified_classes.find(class_def.class_idx_) == unverified_classes.end()) { - ASSERT_EQ(cls->GetStatus(), ClassStatus::kVerified); - } else { - ASSERT_LT(cls->GetStatus(), ClassStatus::kVerified); - } - } - } - } - - bool HasUnverifiedClass(const std::string& cls) { - return HasUnverifiedClass(cls, *primary_dex_file_); - } - - bool HasUnverifiedClass(const std::string& cls, const DexFile& dex_file) { - const DexFile::TypeId* type_id = dex_file.FindTypeId(cls.c_str()); - DCHECK(type_id != nullptr); - dex::TypeIndex index = dex_file.GetIndexForTypeId(*type_id); - for (const auto& dex_dep : verifier_deps_->dex_deps_) { - for (dex::TypeIndex entry : dex_dep.second->unverified_classes_) { - if (index == entry) { - return true; - } - } - } - return false; - } - - // Iterates over all assignability records and tries to find an entry which - // matches the expected destination/source pair. - bool HasAssignable(const std::string& expected_destination, - const std::string& expected_source, - bool expected_is_assignable) { - for (auto& dex_dep : verifier_deps_->dex_deps_) { - const DexFile& dex_file = *dex_dep.first; - auto& storage = expected_is_assignable ? dex_dep.second->assignable_types_ - : dex_dep.second->unassignable_types_; - for (auto& entry : storage) { - std::string actual_destination = - verifier_deps_->GetStringFromId(dex_file, entry.GetDestination()); - std::string actual_source = verifier_deps_->GetStringFromId(dex_file, entry.GetSource()); - if ((expected_destination == actual_destination) && (expected_source == actual_source)) { - return true; - } - } - } - return false; - } - - // Iterates over all class resolution records, finds an entry which matches - // the given class descriptor and tests its properties. - bool HasClass(const std::string& expected_klass, - bool expected_resolved, - const std::string& expected_access_flags = "") { - for (auto& dex_dep : verifier_deps_->dex_deps_) { - for (auto& entry : dex_dep.second->classes_) { - if (expected_resolved != entry.IsResolved()) { - continue; - } - - std::string actual_klass = dex_dep.first->StringByTypeIdx(entry.GetDexTypeIndex()); - if (expected_klass != actual_klass) { - continue; - } - - if (expected_resolved) { - // Test access flags. Note that PrettyJavaAccessFlags always appends - // a space after the modifiers. Add it to the expected access flags. - std::string actual_access_flags = PrettyJavaAccessFlags(entry.GetAccessFlags()); - if (expected_access_flags + " " != actual_access_flags) { - continue; - } - } - - return true; - } - } - return false; - } - - // Iterates over all field resolution records, finds an entry which matches - // the given field class+name+type and tests its properties. - bool HasField(const std::string& expected_klass, - const std::string& expected_name, - const std::string& expected_type, - bool expected_resolved, - const std::string& expected_access_flags = "", - const std::string& expected_decl_klass = "") { - for (auto& dex_dep : verifier_deps_->dex_deps_) { - for (auto& entry : dex_dep.second->fields_) { - if (expected_resolved != entry.IsResolved()) { - continue; - } - - const DexFile::FieldId& field_id = dex_dep.first->GetFieldId(entry.GetDexFieldIndex()); - - std::string actual_klass = dex_dep.first->StringByTypeIdx(field_id.class_idx_); - if (expected_klass != actual_klass) { - continue; - } - - std::string actual_name = dex_dep.first->StringDataByIdx(field_id.name_idx_); - if (expected_name != actual_name) { - continue; - } - - std::string actual_type = dex_dep.first->StringByTypeIdx(field_id.type_idx_); - if (expected_type != actual_type) { - continue; - } - - if (expected_resolved) { - // Test access flags. Note that PrettyJavaAccessFlags always appends - // a space after the modifiers. Add it to the expected access flags. - std::string actual_access_flags = PrettyJavaAccessFlags(entry.GetAccessFlags()); - if (expected_access_flags + " " != actual_access_flags) { - continue; - } - - std::string actual_decl_klass = verifier_deps_->GetStringFromId( - *dex_dep.first, entry.GetDeclaringClassIndex()); - if (expected_decl_klass != actual_decl_klass) { - continue; - } - } - - return true; - } - } - return false; - } - - // Iterates over all method resolution records, finds an entry which matches - // the given field kind+class+name+signature and tests its properties. - bool HasMethod(const std::string& expected_klass, - const std::string& expected_name, - const std::string& expected_signature, - bool expected_resolved, - const std::string& expected_access_flags = "", - const std::string& expected_decl_klass = "") { - for (auto& dex_dep : verifier_deps_->dex_deps_) { - for (const VerifierDeps::MethodResolution& entry : dex_dep.second->methods_) { - if (expected_resolved != entry.IsResolved()) { - continue; - } - - const DexFile::MethodId& method_id = dex_dep.first->GetMethodId(entry.GetDexMethodIndex()); - - std::string actual_klass = dex_dep.first->StringByTypeIdx(method_id.class_idx_); - if (expected_klass != actual_klass) { - continue; - } - - std::string actual_name = dex_dep.first->StringDataByIdx(method_id.name_idx_); - if (expected_name != actual_name) { - continue; - } - - std::string actual_signature = dex_dep.first->GetMethodSignature(method_id).ToString(); - if (expected_signature != actual_signature) { - continue; - } - - if (expected_resolved) { - // Test access flags. Note that PrettyJavaAccessFlags always appends - // a space after the modifiers. Add it to the expected access flags. - std::string actual_access_flags = PrettyJavaAccessFlags(entry.GetAccessFlags()); - if (expected_access_flags + " " != actual_access_flags) { - continue; - } - - std::string actual_decl_klass = verifier_deps_->GetStringFromId( - *dex_dep.first, entry.GetDeclaringClassIndex()); - if (expected_decl_klass != actual_decl_klass) { - continue; - } - } - - return true; - } - } - return false; - } - - size_t NumberOfCompiledDexFiles() { - return verifier_deps_->dex_deps_.size(); - } - - size_t HasEachKindOfRecord() { - bool has_strings = false; - bool has_assignability = false; - bool has_classes = false; - bool has_fields = false; - bool has_methods = false; - bool has_unverified_classes = false; - - for (auto& entry : verifier_deps_->dex_deps_) { - has_strings |= !entry.second->strings_.empty(); - has_assignability |= !entry.second->assignable_types_.empty(); - has_assignability |= !entry.second->unassignable_types_.empty(); - has_classes |= !entry.second->classes_.empty(); - has_fields |= !entry.second->fields_.empty(); - has_methods |= !entry.second->methods_.empty(); - has_unverified_classes |= !entry.second->unverified_classes_.empty(); - } - - return has_strings && - has_assignability && - has_classes && - has_fields && - has_methods && - has_unverified_classes; - } - - std::unique_ptr<verifier::VerifierDeps> verifier_deps_; - std::vector<const DexFile*> dex_files_; - const DexFile* primary_dex_file_; - jobject class_loader_; - mirror::Class* klass_Main_; -}; - -TEST_F(VerifierDepsTest, StringToId) { - ScopedObjectAccess soa(Thread::Current()); - LoadDexFile(&soa); - - dex::StringIndex id_Main1 = verifier_deps_->GetIdFromString(*primary_dex_file_, "LMain;"); - ASSERT_LT(id_Main1.index_, primary_dex_file_->NumStringIds()); - ASSERT_EQ("LMain;", verifier_deps_->GetStringFromId(*primary_dex_file_, id_Main1)); - - dex::StringIndex id_Main2 = verifier_deps_->GetIdFromString(*primary_dex_file_, "LMain;"); - ASSERT_LT(id_Main2.index_, primary_dex_file_->NumStringIds()); - ASSERT_EQ("LMain;", verifier_deps_->GetStringFromId(*primary_dex_file_, id_Main2)); - - dex::StringIndex id_Lorem1 = verifier_deps_->GetIdFromString(*primary_dex_file_, "Lorem ipsum"); - ASSERT_GE(id_Lorem1.index_, primary_dex_file_->NumStringIds()); - ASSERT_EQ("Lorem ipsum", verifier_deps_->GetStringFromId(*primary_dex_file_, id_Lorem1)); - - dex::StringIndex id_Lorem2 = verifier_deps_->GetIdFromString(*primary_dex_file_, "Lorem ipsum"); - ASSERT_GE(id_Lorem2.index_, primary_dex_file_->NumStringIds()); - ASSERT_EQ("Lorem ipsum", verifier_deps_->GetStringFromId(*primary_dex_file_, id_Lorem2)); - - ASSERT_EQ(id_Main1, id_Main2); - ASSERT_EQ(id_Lorem1, id_Lorem2); - ASSERT_NE(id_Main1, id_Lorem1); -} - -TEST_F(VerifierDepsTest, Assignable_BothInBoot) { - ASSERT_TRUE(TestAssignabilityRecording(/* dst */ "Ljava/util/TimeZone;", - /* src */ "Ljava/util/SimpleTimeZone;", - /* is_strict */ true, - /* is_assignable */ true)); - ASSERT_TRUE(HasAssignable("Ljava/util/TimeZone;", "Ljava/util/SimpleTimeZone;", true)); -} - -TEST_F(VerifierDepsTest, Assignable_DestinationInBoot1) { - ASSERT_TRUE(TestAssignabilityRecording(/* dst */ "Ljava/net/Socket;", - /* src */ "LMySSLSocket;", - /* is_strict */ true, - /* is_assignable */ true)); - ASSERT_TRUE(HasAssignable("Ljava/net/Socket;", "Ljavax/net/ssl/SSLSocket;", true)); -} - -TEST_F(VerifierDepsTest, Assignable_DestinationInBoot2) { - ASSERT_TRUE(TestAssignabilityRecording(/* dst */ "Ljava/util/TimeZone;", - /* src */ "LMySimpleTimeZone;", - /* is_strict */ true, - /* is_assignable */ true)); - ASSERT_TRUE(HasAssignable("Ljava/util/TimeZone;", "Ljava/util/SimpleTimeZone;", true)); -} - -TEST_F(VerifierDepsTest, Assignable_DestinationInBoot3) { - ASSERT_TRUE(TestAssignabilityRecording(/* dst */ "Ljava/util/Collection;", - /* src */ "LMyThreadSet;", - /* is_strict */ true, - /* is_assignable */ true)); - ASSERT_TRUE(HasAssignable("Ljava/util/Collection;", "Ljava/util/Set;", true)); -} - -TEST_F(VerifierDepsTest, Assignable_BothArrays_Resolved) { - ASSERT_TRUE(TestAssignabilityRecording(/* dst */ "[[Ljava/util/TimeZone;", - /* src */ "[[Ljava/util/SimpleTimeZone;", - /* is_strict */ true, - /* is_assignable */ true)); - // If the component types of both arrays are resolved, we optimize the list of - // dependencies by recording a dependency on the component types. - ASSERT_FALSE(HasAssignable("[[Ljava/util/TimeZone;", "[[Ljava/util/SimpleTimeZone;", true)); - ASSERT_FALSE(HasAssignable("[Ljava/util/TimeZone;", "[Ljava/util/SimpleTimeZone;", true)); - ASSERT_TRUE(HasAssignable("Ljava/util/TimeZone;", "Ljava/util/SimpleTimeZone;", true)); -} - -TEST_F(VerifierDepsTest, NotAssignable_BothInBoot) { - ASSERT_TRUE(TestAssignabilityRecording(/* dst */ "Ljava/lang/Exception;", - /* src */ "Ljava/util/SimpleTimeZone;", - /* is_strict */ true, - /* is_assignable */ false)); - ASSERT_TRUE(HasAssignable("Ljava/lang/Exception;", "Ljava/util/SimpleTimeZone;", false)); -} - -TEST_F(VerifierDepsTest, NotAssignable_DestinationInBoot1) { - ASSERT_TRUE(TestAssignabilityRecording(/* dst */ "Ljava/lang/Exception;", - /* src */ "LMySSLSocket;", - /* is_strict */ true, - /* is_assignable */ false)); - ASSERT_TRUE(HasAssignable("Ljava/lang/Exception;", "Ljavax/net/ssl/SSLSocket;", false)); -} - -TEST_F(VerifierDepsTest, NotAssignable_DestinationInBoot2) { - ASSERT_TRUE(TestAssignabilityRecording(/* dst */ "Ljava/lang/Exception;", - /* src */ "LMySimpleTimeZone;", - /* is_strict */ true, - /* is_assignable */ false)); - ASSERT_TRUE(HasAssignable("Ljava/lang/Exception;", "Ljava/util/SimpleTimeZone;", false)); -} - -TEST_F(VerifierDepsTest, NotAssignable_BothArrays) { - ASSERT_TRUE(TestAssignabilityRecording(/* dst */ "[Ljava/lang/Exception;", - /* src */ "[Ljava/util/SimpleTimeZone;", - /* is_strict */ true, - /* is_assignable */ false)); - ASSERT_TRUE(HasAssignable("Ljava/lang/Exception;", "Ljava/util/SimpleTimeZone;", false)); -} - -TEST_F(VerifierDepsTest, ArgumentType_ResolvedClass) { - ASSERT_TRUE(VerifyMethod("ArgumentType_ResolvedClass")); - ASSERT_TRUE(HasClass("Ljava/lang/Thread;", true, "public")); -} - -TEST_F(VerifierDepsTest, ArgumentType_UnresolvedClass) { - ASSERT_TRUE(VerifyMethod("ArgumentType_UnresolvedClass")); - ASSERT_TRUE(HasClass("LUnresolvedClass;", false)); -} - -TEST_F(VerifierDepsTest, ArgumentType_UnresolvedSuper) { - ASSERT_TRUE(VerifyMethod("ArgumentType_UnresolvedSuper")); - ASSERT_TRUE(HasClass("LMySetWithUnresolvedSuper;", false)); -} - -TEST_F(VerifierDepsTest, ReturnType_Reference) { - ASSERT_TRUE(VerifyMethod("ReturnType_Reference")); - ASSERT_TRUE(HasAssignable("Ljava/lang/Throwable;", "Ljava/lang/IllegalStateException;", true)); -} - -TEST_F(VerifierDepsTest, ReturnType_Array) { - ASSERT_FALSE(VerifyMethod("ReturnType_Array")); - ASSERT_TRUE(HasAssignable("Ljava/lang/Integer;", "Ljava/lang/IllegalStateException;", false)); -} - -TEST_F(VerifierDepsTest, InvokeArgumentType) { - ASSERT_TRUE(VerifyMethod("InvokeArgumentType")); - ASSERT_TRUE(HasClass("Ljava/text/SimpleDateFormat;", true, "public")); - ASSERT_TRUE(HasClass("Ljava/util/SimpleTimeZone;", true, "public")); - ASSERT_TRUE(HasMethod("Ljava/text/SimpleDateFormat;", - "setTimeZone", - "(Ljava/util/TimeZone;)V", - /* expect_resolved */ true, - "public", - "Ljava/text/DateFormat;")); - ASSERT_TRUE(HasAssignable("Ljava/util/TimeZone;", "Ljava/util/SimpleTimeZone;", true)); -} - -TEST_F(VerifierDepsTest, MergeTypes_RegisterLines) { - ASSERT_TRUE(VerifyMethod("MergeTypes_RegisterLines")); - ASSERT_TRUE(HasAssignable("Ljava/lang/Exception;", "Ljava/net/SocketTimeoutException;", true)); - ASSERT_TRUE(HasAssignable( - "Ljava/lang/Exception;", "Ljava/util/concurrent/TimeoutException;", true)); -} - -TEST_F(VerifierDepsTest, MergeTypes_IfInstanceOf) { - ASSERT_TRUE(VerifyMethod("MergeTypes_IfInstanceOf")); - ASSERT_TRUE(HasAssignable("Ljava/lang/Exception;", "Ljava/net/SocketTimeoutException;", true)); - ASSERT_TRUE(HasAssignable( - "Ljava/lang/Exception;", "Ljava/util/concurrent/TimeoutException;", true)); - ASSERT_TRUE(HasAssignable("Ljava/net/SocketTimeoutException;", "Ljava/lang/Exception;", false)); -} - -TEST_F(VerifierDepsTest, MergeTypes_Unresolved) { - ASSERT_TRUE(VerifyMethod("MergeTypes_Unresolved")); - ASSERT_TRUE(HasAssignable("Ljava/lang/Exception;", "Ljava/net/SocketTimeoutException;", true)); - ASSERT_TRUE(HasAssignable( - "Ljava/lang/Exception;", "Ljava/util/concurrent/TimeoutException;", true)); -} - -TEST_F(VerifierDepsTest, ConstClass_Resolved) { - ASSERT_TRUE(VerifyMethod("ConstClass_Resolved")); - ASSERT_TRUE(HasClass("Ljava/lang/IllegalStateException;", true, "public")); -} - -TEST_F(VerifierDepsTest, ConstClass_Unresolved) { - ASSERT_FALSE(VerifyMethod("ConstClass_Unresolved")); - ASSERT_TRUE(HasClass("LUnresolvedClass;", false)); -} - -TEST_F(VerifierDepsTest, CheckCast_Resolved) { - ASSERT_TRUE(VerifyMethod("CheckCast_Resolved")); - ASSERT_TRUE(HasClass("Ljava/lang/IllegalStateException;", true, "public")); -} - -TEST_F(VerifierDepsTest, CheckCast_Unresolved) { - ASSERT_FALSE(VerifyMethod("CheckCast_Unresolved")); - ASSERT_TRUE(HasClass("LUnresolvedClass;", false)); -} - -TEST_F(VerifierDepsTest, InstanceOf_Resolved) { - ASSERT_TRUE(VerifyMethod("InstanceOf_Resolved")); - ASSERT_TRUE(HasClass("Ljava/lang/IllegalStateException;", true, "public")); -} - -TEST_F(VerifierDepsTest, InstanceOf_Unresolved) { - ASSERT_FALSE(VerifyMethod("InstanceOf_Unresolved")); - ASSERT_TRUE(HasClass("LUnresolvedClass;", false)); -} - -TEST_F(VerifierDepsTest, NewInstance_Resolved) { - ASSERT_TRUE(VerifyMethod("NewInstance_Resolved")); - ASSERT_TRUE(HasClass("Ljava/lang/IllegalStateException;", true, "public")); -} - -TEST_F(VerifierDepsTest, NewInstance_Unresolved) { - ASSERT_FALSE(VerifyMethod("NewInstance_Unresolved")); - ASSERT_TRUE(HasClass("LUnresolvedClass;", false)); -} - -TEST_F(VerifierDepsTest, NewArray_Unresolved) { - ASSERT_FALSE(VerifyMethod("NewArray_Unresolved")); - ASSERT_TRUE(HasClass("[LUnresolvedClass;", false)); -} - -TEST_F(VerifierDepsTest, Throw) { - ASSERT_TRUE(VerifyMethod("Throw")); - ASSERT_TRUE(HasAssignable("Ljava/lang/Throwable;", "Ljava/lang/IllegalStateException;", true)); -} - -TEST_F(VerifierDepsTest, MoveException_Resolved) { - ASSERT_TRUE(VerifyMethod("MoveException_Resolved")); - ASSERT_TRUE(HasClass("Ljava/io/InterruptedIOException;", true, "public")); - ASSERT_TRUE(HasClass("Ljava/net/SocketTimeoutException;", true, "public")); - ASSERT_TRUE(HasClass("Ljava/util/zip/ZipException;", true, "public")); - - // Testing that all exception types are assignable to Throwable. - ASSERT_TRUE(HasAssignable("Ljava/lang/Throwable;", "Ljava/io/InterruptedIOException;", true)); - ASSERT_TRUE(HasAssignable("Ljava/lang/Throwable;", "Ljava/net/SocketTimeoutException;", true)); - ASSERT_TRUE(HasAssignable("Ljava/lang/Throwable;", "Ljava/util/zip/ZipException;", true)); - - // Testing that the merge type is assignable to Throwable. - ASSERT_TRUE(HasAssignable("Ljava/lang/Throwable;", "Ljava/io/IOException;", true)); - - // Merging of exception types. - ASSERT_TRUE(HasAssignable("Ljava/io/IOException;", "Ljava/io/InterruptedIOException;", true)); - ASSERT_TRUE(HasAssignable("Ljava/io/IOException;", "Ljava/util/zip/ZipException;", true)); - ASSERT_TRUE(HasAssignable( - "Ljava/io/InterruptedIOException;", "Ljava/net/SocketTimeoutException;", true)); -} - -TEST_F(VerifierDepsTest, MoveException_Unresolved) { - ASSERT_FALSE(VerifyMethod("MoveException_Unresolved")); - ASSERT_TRUE(HasClass("LUnresolvedException;", false)); -} - -TEST_F(VerifierDepsTest, StaticField_Resolved_DeclaredInReferenced) { - ASSERT_TRUE(VerifyMethod("StaticField_Resolved_DeclaredInReferenced")); - ASSERT_TRUE(HasClass("Ljava/lang/System;", true, "public")); - ASSERT_TRUE(HasField("Ljava/lang/System;", - "out", - "Ljava/io/PrintStream;", - true, - "public static", - "Ljava/lang/System;")); -} - -TEST_F(VerifierDepsTest, StaticField_Resolved_DeclaredInSuperclass1) { - ASSERT_TRUE(VerifyMethod("StaticField_Resolved_DeclaredInSuperclass1")); - ASSERT_TRUE(HasClass("Ljava/util/SimpleTimeZone;", true, "public")); - ASSERT_TRUE(HasField( - "Ljava/util/SimpleTimeZone;", "LONG", "I", true, "public static", "Ljava/util/TimeZone;")); -} - -TEST_F(VerifierDepsTest, StaticField_Resolved_DeclaredInSuperclass2) { - ASSERT_TRUE(VerifyMethod("StaticField_Resolved_DeclaredInSuperclass2")); - ASSERT_TRUE(HasField( - "LMySimpleTimeZone;", "SHORT", "I", true, "public static", "Ljava/util/TimeZone;")); -} - -TEST_F(VerifierDepsTest, StaticField_Resolved_DeclaredInInterface1) { - ASSERT_TRUE(VerifyMethod("StaticField_Resolved_DeclaredInInterface1")); - ASSERT_TRUE(HasClass("Ljavax/xml/transform/dom/DOMResult;", true, "public")); - ASSERT_TRUE(HasField("Ljavax/xml/transform/dom/DOMResult;", - "PI_ENABLE_OUTPUT_ESCAPING", - "Ljava/lang/String;", - true, - "public static", - "Ljavax/xml/transform/Result;")); -} - -TEST_F(VerifierDepsTest, StaticField_Resolved_DeclaredInInterface2) { - ASSERT_TRUE(VerifyMethod("StaticField_Resolved_DeclaredInInterface2")); - ASSERT_TRUE(HasField("LMyDOMResult;", - "PI_ENABLE_OUTPUT_ESCAPING", - "Ljava/lang/String;", - true, - "public static", - "Ljavax/xml/transform/Result;")); -} - -TEST_F(VerifierDepsTest, StaticField_Resolved_DeclaredInInterface3) { - ASSERT_TRUE(VerifyMethod("StaticField_Resolved_DeclaredInInterface3")); - ASSERT_TRUE(HasField("LMyResult;", - "PI_ENABLE_OUTPUT_ESCAPING", - "Ljava/lang/String;", - true, - "public static", - "Ljavax/xml/transform/Result;")); -} - -TEST_F(VerifierDepsTest, StaticField_Resolved_DeclaredInInterface4) { - ASSERT_TRUE(VerifyMethod("StaticField_Resolved_DeclaredInInterface4")); - ASSERT_TRUE(HasField("LMyDocument;", - "ELEMENT_NODE", - "S", - true, - "public static", - "Lorg/w3c/dom/Node;")); -} - -TEST_F(VerifierDepsTest, StaticField_Unresolved_ReferrerInBoot) { - ASSERT_TRUE(VerifyMethod("StaticField_Unresolved_ReferrerInBoot")); - ASSERT_TRUE(HasClass("Ljava/util/TimeZone;", true, "public")); - ASSERT_TRUE(HasField("Ljava/util/TimeZone;", "x", "I", false)); -} - -TEST_F(VerifierDepsTest, StaticField_Unresolved_ReferrerInDex) { - ASSERT_TRUE(VerifyMethod("StaticField_Unresolved_ReferrerInDex")); - ASSERT_TRUE(HasField("LMyThreadSet;", "x", "I", false)); -} - -TEST_F(VerifierDepsTest, InstanceField_Resolved_DeclaredInReferenced) { - ASSERT_TRUE(VerifyMethod("InstanceField_Resolved_DeclaredInReferenced")); - ASSERT_TRUE(HasClass("Ljava/io/InterruptedIOException;", true, "public")); - ASSERT_TRUE(HasField("Ljava/io/InterruptedIOException;", - "bytesTransferred", - "I", - true, - "public", - "Ljava/io/InterruptedIOException;")); - ASSERT_TRUE(HasAssignable( - "Ljava/io/InterruptedIOException;", "Ljava/net/SocketTimeoutException;", true)); -} - -TEST_F(VerifierDepsTest, InstanceField_Resolved_DeclaredInSuperclass1) { - ASSERT_TRUE(VerifyMethod("InstanceField_Resolved_DeclaredInSuperclass1")); - ASSERT_TRUE(HasClass("Ljava/net/SocketTimeoutException;", true, "public")); - ASSERT_TRUE(HasField("Ljava/net/SocketTimeoutException;", - "bytesTransferred", - "I", - true, - "public", - "Ljava/io/InterruptedIOException;")); - ASSERT_TRUE(HasAssignable( - "Ljava/io/InterruptedIOException;", "Ljava/net/SocketTimeoutException;", true)); -} - -TEST_F(VerifierDepsTest, InstanceField_Resolved_DeclaredInSuperclass2) { - ASSERT_TRUE(VerifyMethod("InstanceField_Resolved_DeclaredInSuperclass2")); - ASSERT_TRUE(HasField("LMySocketTimeoutException;", - "bytesTransferred", - "I", - true, - "public", - "Ljava/io/InterruptedIOException;")); - ASSERT_TRUE(HasAssignable( - "Ljava/io/InterruptedIOException;", "Ljava/net/SocketTimeoutException;", true)); -} - -TEST_F(VerifierDepsTest, InstanceField_Unresolved_ReferrerInBoot) { - ASSERT_TRUE(VerifyMethod("InstanceField_Unresolved_ReferrerInBoot")); - ASSERT_TRUE(HasClass("Ljava/io/InterruptedIOException;", true, "public")); - ASSERT_TRUE(HasField("Ljava/io/InterruptedIOException;", "x", "I", false)); -} - -TEST_F(VerifierDepsTest, InstanceField_Unresolved_ReferrerInDex) { - ASSERT_TRUE(VerifyMethod("InstanceField_Unresolved_ReferrerInDex")); - ASSERT_TRUE(HasField("LMyThreadSet;", "x", "I", false)); -} - -TEST_F(VerifierDepsTest, InvokeStatic_Resolved_DeclaredInReferenced) { - ASSERT_TRUE(VerifyMethod("InvokeStatic_Resolved_DeclaredInReferenced")); - ASSERT_TRUE(HasClass("Ljava/net/Socket;", true, "public")); - ASSERT_TRUE(HasMethod("Ljava/net/Socket;", - "setSocketImplFactory", - "(Ljava/net/SocketImplFactory;)V", - /* expect_resolved */ true, - "public static", - "Ljava/net/Socket;")); -} - -TEST_F(VerifierDepsTest, InvokeStatic_Resolved_DeclaredInSuperclass1) { - ASSERT_TRUE(VerifyMethod("InvokeStatic_Resolved_DeclaredInSuperclass1")); - ASSERT_TRUE(HasClass("Ljavax/net/ssl/SSLSocket;", true, "public")); - ASSERT_TRUE(HasMethod("Ljavax/net/ssl/SSLSocket;", - "setSocketImplFactory", - "(Ljava/net/SocketImplFactory;)V", - /* expect_resolved */ true, - "public static", - "Ljava/net/Socket;")); -} - -TEST_F(VerifierDepsTest, InvokeStatic_Resolved_DeclaredInSuperclass2) { - ASSERT_TRUE(VerifyMethod("InvokeStatic_Resolved_DeclaredInSuperclass2")); - ASSERT_TRUE(HasMethod("LMySSLSocket;", - "setSocketImplFactory", - "(Ljava/net/SocketImplFactory;)V", - /* expect_resolved */ true, - "public static", - "Ljava/net/Socket;")); -} - -TEST_F(VerifierDepsTest, InvokeStatic_DeclaredInInterface1) { - ASSERT_TRUE(VerifyMethod("InvokeStatic_DeclaredInInterface1")); - ASSERT_TRUE(HasClass("Ljava/util/Map$Entry;", true, "public interface")); - ASSERT_TRUE(HasMethod("Ljava/util/Map$Entry;", - "comparingByKey", - "()Ljava/util/Comparator;", - /* expect_resolved */ true, - "public static", - "Ljava/util/Map$Entry;")); -} - -TEST_F(VerifierDepsTest, InvokeStatic_DeclaredInInterface2) { - ASSERT_FALSE(VerifyMethod("InvokeStatic_DeclaredInInterface2")); - ASSERT_TRUE(HasClass("Ljava/util/AbstractMap$SimpleEntry;", true, "public")); - ASSERT_TRUE(HasMethod("Ljava/util/AbstractMap$SimpleEntry;", - "comparingByKey", - "()Ljava/util/Comparator;", - /* expect_resolved */ false)); -} - -TEST_F(VerifierDepsTest, InvokeStatic_Unresolved1) { - ASSERT_FALSE(VerifyMethod("InvokeStatic_Unresolved1")); - ASSERT_TRUE(HasClass("Ljavax/net/ssl/SSLSocket;", true, "public")); - ASSERT_TRUE(HasMethod("Ljavax/net/ssl/SSLSocket;", - "x", - "()V", - /* expect_resolved */ false)); -} - -TEST_F(VerifierDepsTest, InvokeStatic_Unresolved2) { - ASSERT_FALSE(VerifyMethod("InvokeStatic_Unresolved2")); - ASSERT_TRUE(HasMethod("LMySSLSocket;", - "x", - "()V", - /* expect_resolved */ false)); -} - -TEST_F(VerifierDepsTest, InvokeDirect_Resolved_DeclaredInReferenced) { - ASSERT_TRUE(VerifyMethod("InvokeDirect_Resolved_DeclaredInReferenced")); - ASSERT_TRUE(HasClass("Ljava/net/Socket;", true, "public")); - ASSERT_TRUE(HasMethod("Ljava/net/Socket;", - "<init>", - "()V", - /* expect_resolved */ true, - "public", - "Ljava/net/Socket;")); -} - -TEST_F(VerifierDepsTest, InvokeDirect_Resolved_DeclaredInSuperclass1) { - ASSERT_FALSE(VerifyMethod("InvokeDirect_Resolved_DeclaredInSuperclass1")); - ASSERT_TRUE(HasClass("Ljavax/net/ssl/SSLSocket;", true, "public")); - ASSERT_TRUE(HasMethod("Ljavax/net/ssl/SSLSocket;", - "checkOldImpl", - "()V", - /* expect_resolved */ true, - "private", - "Ljava/net/Socket;")); -} - -TEST_F(VerifierDepsTest, InvokeDirect_Resolved_DeclaredInSuperclass2) { - ASSERT_FALSE(VerifyMethod("InvokeDirect_Resolved_DeclaredInSuperclass2")); - ASSERT_TRUE(HasMethod("LMySSLSocket;", - "checkOldImpl", - "()V", - /* expect_resolved */ true, - "private", - "Ljava/net/Socket;")); -} - -TEST_F(VerifierDepsTest, InvokeDirect_Unresolved1) { - ASSERT_FALSE(VerifyMethod("InvokeDirect_Unresolved1")); - ASSERT_TRUE(HasClass("Ljavax/net/ssl/SSLSocket;", true, "public")); - ASSERT_TRUE(HasMethod("Ljavax/net/ssl/SSLSocket;", - "x", - "()V", - /* expect_resolved */ false)); -} - -TEST_F(VerifierDepsTest, InvokeDirect_Unresolved2) { - ASSERT_FALSE(VerifyMethod("InvokeDirect_Unresolved2")); - ASSERT_TRUE(HasMethod("LMySSLSocket;", - "x", - "()V", - /* expect_resolved */ false)); -} - -TEST_F(VerifierDepsTest, InvokeVirtual_Resolved_DeclaredInReferenced) { - ASSERT_TRUE(VerifyMethod("InvokeVirtual_Resolved_DeclaredInReferenced")); - ASSERT_TRUE(HasClass("Ljava/lang/Throwable;", true, "public")); - ASSERT_TRUE(HasMethod("Ljava/lang/Throwable;", - "getMessage", - "()Ljava/lang/String;", - /* expect_resolved */ true, - "public", - "Ljava/lang/Throwable;")); - // Type dependency on `this` argument. - ASSERT_TRUE(HasAssignable("Ljava/lang/Throwable;", "Ljava/net/SocketTimeoutException;", true)); -} - -TEST_F(VerifierDepsTest, InvokeVirtual_Resolved_DeclaredInSuperclass1) { - ASSERT_TRUE(VerifyMethod("InvokeVirtual_Resolved_DeclaredInSuperclass1")); - ASSERT_TRUE(HasClass("Ljava/io/InterruptedIOException;", true, "public")); - ASSERT_TRUE(HasMethod("Ljava/io/InterruptedIOException;", - "getMessage", - "()Ljava/lang/String;", - /* expect_resolved */ true, - "public", - "Ljava/lang/Throwable;")); - // Type dependency on `this` argument. - ASSERT_TRUE(HasAssignable("Ljava/lang/Throwable;", "Ljava/net/SocketTimeoutException;", true)); -} - -TEST_F(VerifierDepsTest, InvokeVirtual_Resolved_DeclaredInSuperclass2) { - ASSERT_TRUE(VerifyMethod("InvokeVirtual_Resolved_DeclaredInSuperclass2")); - ASSERT_TRUE(HasMethod("LMySocketTimeoutException;", - "getMessage", - "()Ljava/lang/String;", - /* expect_resolved */ true, - "public", - "Ljava/lang/Throwable;")); -} - -TEST_F(VerifierDepsTest, InvokeVirtual_Resolved_DeclaredInSuperinterface) { - ASSERT_TRUE(VerifyMethod("InvokeVirtual_Resolved_DeclaredInSuperinterface")); - ASSERT_TRUE(HasMethod("LMyThreadSet;", - "size", - "()I", - /* expect_resolved */ true, - "public", - "Ljava/util/Set;")); -} - -TEST_F(VerifierDepsTest, InvokeVirtual_Unresolved1) { - ASSERT_FALSE(VerifyMethod("InvokeVirtual_Unresolved1")); - ASSERT_TRUE(HasClass("Ljava/io/InterruptedIOException;", true, "public")); - ASSERT_TRUE(HasMethod("Ljava/io/InterruptedIOException;", - "x", - "()V", - /* expect_resolved */ false)); -} - -TEST_F(VerifierDepsTest, InvokeVirtual_Unresolved2) { - ASSERT_FALSE(VerifyMethod("InvokeVirtual_Unresolved2")); - ASSERT_TRUE(HasMethod("LMySocketTimeoutException;", - "x", - "()V", - /* expect_resolved */ false)); -} - -TEST_F(VerifierDepsTest, InvokeInterface_Resolved_DeclaredInReferenced) { - ASSERT_TRUE(VerifyMethod("InvokeInterface_Resolved_DeclaredInReferenced")); - ASSERT_TRUE(HasClass("Ljava/lang/Runnable;", true, "public interface")); - ASSERT_TRUE(HasMethod("Ljava/lang/Runnable;", - "run", - "()V", - /* expect_resolved */ true, - "public", - "Ljava/lang/Runnable;")); -} - -TEST_F(VerifierDepsTest, InvokeInterface_Resolved_DeclaredInSuperclass) { - ASSERT_FALSE(VerifyMethod("InvokeInterface_Resolved_DeclaredInSuperclass")); - // TODO: Maybe we should not record dependency if the invoke type does not match the lookup type. - ASSERT_TRUE(HasMethod("LMyThread;", - "join", - "()V", - /* expect_resolved */ true, - "public", - "Ljava/lang/Thread;")); -} - -TEST_F(VerifierDepsTest, InvokeInterface_Resolved_DeclaredInSuperinterface1) { - ASSERT_FALSE(VerifyMethod("InvokeInterface_Resolved_DeclaredInSuperinterface1")); - // TODO: Maybe we should not record dependency if the invoke type does not match the lookup type. - ASSERT_TRUE(HasMethod("LMyThreadSet;", - "run", - "()V", - /* expect_resolved */ true, - "public", - "Ljava/lang/Thread;")); -} - -TEST_F(VerifierDepsTest, InvokeInterface_Resolved_DeclaredInSuperinterface2) { - ASSERT_FALSE(VerifyMethod("InvokeInterface_Resolved_DeclaredInSuperinterface2")); - ASSERT_TRUE(HasMethod("LMyThreadSet;", - "isEmpty", - "()Z", - /* expect_resolved */ true, - "public", - "Ljava/util/Set;")); -} - -TEST_F(VerifierDepsTest, InvokeInterface_Unresolved1) { - ASSERT_FALSE(VerifyMethod("InvokeInterface_Unresolved1")); - ASSERT_TRUE(HasClass("Ljava/lang/Runnable;", true, "public interface")); - ASSERT_TRUE(HasMethod("Ljava/lang/Runnable;", - "x", - "()V", - /* expect_resolved */ false)); -} - -TEST_F(VerifierDepsTest, InvokeInterface_Unresolved2) { - ASSERT_FALSE(VerifyMethod("InvokeInterface_Unresolved2")); - ASSERT_TRUE(HasMethod("LMyThreadSet;", "x", "()V", /* expect_resolved */ false)); -} - -TEST_F(VerifierDepsTest, InvokeSuper_ThisAssignable) { - ASSERT_TRUE(VerifyMethod("InvokeSuper_ThisAssignable")); - ASSERT_TRUE(HasClass("Ljava/lang/Runnable;", true, "public interface")); - ASSERT_TRUE(HasAssignable("Ljava/lang/Runnable;", "Ljava/lang/Thread;", true)); - ASSERT_TRUE(HasMethod("Ljava/lang/Runnable;", - "run", - "()V", - /* expect_resolved */ true, - "public", - "Ljava/lang/Runnable;")); -} - -TEST_F(VerifierDepsTest, InvokeSuper_ThisNotAssignable) { - ASSERT_FALSE(VerifyMethod("InvokeSuper_ThisNotAssignable")); - ASSERT_TRUE(HasClass("Ljava/lang/Integer;", true, "public")); - ASSERT_TRUE(HasAssignable("Ljava/lang/Integer;", "Ljava/lang/Thread;", false)); - ASSERT_TRUE(HasMethod("Ljava/lang/Integer;", - "intValue", "()I", - /* expect_resolved */ true, - "public", "Ljava/lang/Integer;")); -} - -TEST_F(VerifierDepsTest, ArgumentType_ResolvedReferenceArray) { - ASSERT_TRUE(VerifyMethod("ArgumentType_ResolvedReferenceArray")); - ASSERT_TRUE(HasClass("[Ljava/lang/Thread;", true, "public")); -} - -TEST_F(VerifierDepsTest, NewArray_Resolved) { - ASSERT_TRUE(VerifyMethod("NewArray_Resolved")); - ASSERT_TRUE(HasClass("[Ljava/lang/IllegalStateException;", true, "public")); -} - -TEST_F(VerifierDepsTest, EncodeDecode) { - VerifyDexFile(); - - ASSERT_EQ(1u, NumberOfCompiledDexFiles()); - ASSERT_TRUE(HasEachKindOfRecord()); - - std::vector<uint8_t> buffer; - verifier_deps_->Encode(dex_files_, &buffer); - ASSERT_FALSE(buffer.empty()); - - VerifierDeps decoded_deps(dex_files_, ArrayRef<const uint8_t>(buffer)); - ASSERT_TRUE(verifier_deps_->Equals(decoded_deps)); -} - -TEST_F(VerifierDepsTest, EncodeDecodeMulti) { - VerifyDexFile("MultiDex"); - - ASSERT_GT(NumberOfCompiledDexFiles(), 1u); - std::vector<uint8_t> buffer; - verifier_deps_->Encode(dex_files_, &buffer); - ASSERT_FALSE(buffer.empty()); - - // Create new DexFile, to mess with std::map order: the verifier deps used - // to iterate over the map, which doesn't guarantee insertion order. We fixed - // this by passing the expected order when encoding/decoding. - std::vector<std::unique_ptr<const DexFile>> first_dex_files = OpenTestDexFiles("VerifierDeps"); - std::vector<std::unique_ptr<const DexFile>> second_dex_files = OpenTestDexFiles("MultiDex"); - std::vector<const DexFile*> dex_files; - for (auto& dex_file : first_dex_files) { - dex_files.push_back(dex_file.get()); - } - for (auto& dex_file : second_dex_files) { - dex_files.push_back(dex_file.get()); - } - - // Dump the new verifier deps to ensure it can properly read the data. - VerifierDeps decoded_deps(dex_files, ArrayRef<const uint8_t>(buffer)); - std::ostringstream stream; - VariableIndentationOutputStream os(&stream); - decoded_deps.Dump(&os); -} - -TEST_F(VerifierDepsTest, UnverifiedClasses) { - VerifyDexFile(); - ASSERT_FALSE(HasUnverifiedClass("LMyThread;")); - // Test that a class with a soft failure is recorded. - ASSERT_TRUE(HasUnverifiedClass("LMain;")); - // Test that a class with hard failure is recorded. - ASSERT_TRUE(HasUnverifiedClass("LMyVerificationFailure;")); - // Test that a class with unresolved super is recorded. - ASSERT_TRUE(HasUnverifiedClass("LMyClassWithNoSuper;")); - // Test that a class with unresolved super and hard failure is recorded. - ASSERT_TRUE(HasUnverifiedClass("LMyClassWithNoSuperButFailures;")); -} - -TEST_F(VerifierDepsTest, UnverifiedOrder) { - ScopedObjectAccess soa(Thread::Current()); - jobject loader = LoadDex("VerifierDeps"); - std::vector<const DexFile*> dex_files = GetDexFiles(loader); - ASSERT_GT(dex_files.size(), 0u); - const DexFile* dex_file = dex_files[0]; - VerifierDeps deps1(dex_files); - Thread* const self = Thread::Current(); - ASSERT_TRUE(self->GetVerifierDeps() == nullptr); - self->SetVerifierDeps(&deps1); - deps1.MaybeRecordVerificationStatus(*dex_file, - dex::TypeIndex(0), - verifier::FailureKind::kHardFailure); - deps1.MaybeRecordVerificationStatus(*dex_file, - dex::TypeIndex(1), - verifier::FailureKind::kHardFailure); - VerifierDeps deps2(dex_files); - self->SetVerifierDeps(nullptr); - self->SetVerifierDeps(&deps2); - deps2.MaybeRecordVerificationStatus(*dex_file, - dex::TypeIndex(1), - verifier::FailureKind::kHardFailure); - deps2.MaybeRecordVerificationStatus(*dex_file, - dex::TypeIndex(0), - verifier::FailureKind::kHardFailure); - self->SetVerifierDeps(nullptr); - std::vector<uint8_t> buffer1; - deps1.Encode(dex_files, &buffer1); - std::vector<uint8_t> buffer2; - deps2.Encode(dex_files, &buffer2); - EXPECT_EQ(buffer1, buffer2); -} - -TEST_F(VerifierDepsTest, VerifyDeps) { - VerifyDexFile(); - - ASSERT_EQ(1u, NumberOfCompiledDexFiles()); - ASSERT_TRUE(HasEachKindOfRecord()); - - // When validating, we create a new class loader, as - // the existing `class_loader_` may contain erroneous classes, - // that ClassLinker::FindClass won't return. - - ScopedObjectAccess soa(Thread::Current()); - StackHandleScope<1> hs(soa.Self()); - MutableHandle<mirror::ClassLoader> new_class_loader(hs.NewHandle<mirror::ClassLoader>(nullptr)); - { - new_class_loader.Assign(soa.Decode<mirror::ClassLoader>(LoadDex("VerifierDeps"))); - ASSERT_TRUE(verifier_deps_->ValidateDependencies(new_class_loader, soa.Self())); - } - - std::vector<uint8_t> buffer; - verifier_deps_->Encode(dex_files_, &buffer); - ASSERT_FALSE(buffer.empty()); - - { - VerifierDeps decoded_deps(dex_files_, ArrayRef<const uint8_t>(buffer)); - new_class_loader.Assign(soa.Decode<mirror::ClassLoader>(LoadDex("VerifierDeps"))); - ASSERT_TRUE(decoded_deps.ValidateDependencies(new_class_loader, soa.Self())); - } - - // Fiddle with the dependencies to make sure we catch any change and fail to verify. - - { - // Mess up with the assignable_types. - VerifierDeps decoded_deps(dex_files_, ArrayRef<const uint8_t>(buffer)); - VerifierDeps::DexFileDeps* deps = decoded_deps.GetDexFileDeps(*primary_dex_file_); - deps->assignable_types_.insert(*deps->unassignable_types_.begin()); - new_class_loader.Assign(soa.Decode<mirror::ClassLoader>(LoadDex("VerifierDeps"))); - ASSERT_FALSE(decoded_deps.ValidateDependencies(new_class_loader, soa.Self())); - } - - { - // Mess up with the unassignable_types. - VerifierDeps decoded_deps(dex_files_, ArrayRef<const uint8_t>(buffer)); - VerifierDeps::DexFileDeps* deps = decoded_deps.GetDexFileDeps(*primary_dex_file_); - deps->unassignable_types_.insert(*deps->assignable_types_.begin()); - new_class_loader.Assign(soa.Decode<mirror::ClassLoader>(LoadDex("VerifierDeps"))); - ASSERT_FALSE(decoded_deps.ValidateDependencies(new_class_loader, soa.Self())); - } - - // Mess up with classes. - { - VerifierDeps decoded_deps(dex_files_, ArrayRef<const uint8_t>(buffer)); - VerifierDeps::DexFileDeps* deps = decoded_deps.GetDexFileDeps(*primary_dex_file_); - bool found = false; - for (const auto& entry : deps->classes_) { - if (entry.IsResolved()) { - deps->classes_.insert(VerifierDeps::ClassResolution( - entry.GetDexTypeIndex(), VerifierDeps::kUnresolvedMarker)); - found = true; - break; - } - } - ASSERT_TRUE(found); - new_class_loader.Assign(soa.Decode<mirror::ClassLoader>(LoadDex("VerifierDeps"))); - ASSERT_FALSE(decoded_deps.ValidateDependencies(new_class_loader, soa.Self())); - } - - { - VerifierDeps decoded_deps(dex_files_, ArrayRef<const uint8_t>(buffer)); - VerifierDeps::DexFileDeps* deps = decoded_deps.GetDexFileDeps(*primary_dex_file_); - bool found = false; - for (const auto& entry : deps->classes_) { - if (!entry.IsResolved()) { - deps->classes_.insert(VerifierDeps::ClassResolution( - entry.GetDexTypeIndex(), VerifierDeps::kUnresolvedMarker - 1)); - found = true; - break; - } - } - ASSERT_TRUE(found); - new_class_loader.Assign(soa.Decode<mirror::ClassLoader>(LoadDex("VerifierDeps"))); - ASSERT_FALSE(decoded_deps.ValidateDependencies(new_class_loader, soa.Self())); - } - - { - VerifierDeps decoded_deps(dex_files_, ArrayRef<const uint8_t>(buffer)); - VerifierDeps::DexFileDeps* deps = decoded_deps.GetDexFileDeps(*primary_dex_file_); - bool found = false; - for (const auto& entry : deps->classes_) { - if (entry.IsResolved()) { - deps->classes_.insert(VerifierDeps::ClassResolution( - entry.GetDexTypeIndex(), entry.GetAccessFlags() - 1)); - found = true; - break; - } - } - ASSERT_TRUE(found); - new_class_loader.Assign(soa.Decode<mirror::ClassLoader>(LoadDex("VerifierDeps"))); - ASSERT_FALSE(decoded_deps.ValidateDependencies(new_class_loader, soa.Self())); - } - - // Mess up with fields. - { - VerifierDeps decoded_deps(dex_files_, ArrayRef<const uint8_t>(buffer)); - VerifierDeps::DexFileDeps* deps = decoded_deps.GetDexFileDeps(*primary_dex_file_); - bool found = false; - for (const auto& entry : deps->fields_) { - if (entry.IsResolved()) { - deps->fields_.insert(VerifierDeps::FieldResolution(entry.GetDexFieldIndex(), - VerifierDeps::kUnresolvedMarker, - entry.GetDeclaringClassIndex())); - found = true; - break; - } - } - ASSERT_TRUE(found); - new_class_loader.Assign(soa.Decode<mirror::ClassLoader>(LoadDex("VerifierDeps"))); - ASSERT_FALSE(decoded_deps.ValidateDependencies(new_class_loader, soa.Self())); - } - - { - VerifierDeps decoded_deps(dex_files_, ArrayRef<const uint8_t>(buffer)); - VerifierDeps::DexFileDeps* deps = decoded_deps.GetDexFileDeps(*primary_dex_file_); - bool found = false; - for (const auto& entry : deps->fields_) { - if (!entry.IsResolved()) { - constexpr dex::StringIndex kStringIndexZero(0); // We know there is a class there. - deps->fields_.insert(VerifierDeps::FieldResolution(0 /* we know there is a field there */, - VerifierDeps::kUnresolvedMarker - 1, - kStringIndexZero)); - found = true; - break; - } - } - ASSERT_TRUE(found); - new_class_loader.Assign(soa.Decode<mirror::ClassLoader>(LoadDex("VerifierDeps"))); - ASSERT_FALSE(decoded_deps.ValidateDependencies(new_class_loader, soa.Self())); - } - - { - VerifierDeps decoded_deps(dex_files_, ArrayRef<const uint8_t>(buffer)); - VerifierDeps::DexFileDeps* deps = decoded_deps.GetDexFileDeps(*primary_dex_file_); - bool found = false; - for (const auto& entry : deps->fields_) { - if (entry.IsResolved()) { - deps->fields_.insert(VerifierDeps::FieldResolution(entry.GetDexFieldIndex(), - entry.GetAccessFlags() - 1, - entry.GetDeclaringClassIndex())); - found = true; - break; - } - } - ASSERT_TRUE(found); - new_class_loader.Assign(soa.Decode<mirror::ClassLoader>(LoadDex("VerifierDeps"))); - ASSERT_FALSE(decoded_deps.ValidateDependencies(new_class_loader, soa.Self())); - } - - { - VerifierDeps decoded_deps(dex_files_, ArrayRef<const uint8_t>(buffer)); - VerifierDeps::DexFileDeps* deps = decoded_deps.GetDexFileDeps(*primary_dex_file_); - bool found = false; - for (const auto& entry : deps->fields_) { - constexpr dex::StringIndex kNewTypeIndex(0); - if (entry.GetDeclaringClassIndex() != kNewTypeIndex) { - deps->fields_.insert(VerifierDeps::FieldResolution(entry.GetDexFieldIndex(), - entry.GetAccessFlags(), - kNewTypeIndex)); - found = true; - break; - } - } - ASSERT_TRUE(found); - new_class_loader.Assign(soa.Decode<mirror::ClassLoader>(LoadDex("VerifierDeps"))); - ASSERT_FALSE(decoded_deps.ValidateDependencies(new_class_loader, soa.Self())); - } - - // Mess up with methods. - { - VerifierDeps decoded_deps(dex_files_, ArrayRef<const uint8_t>(buffer)); - VerifierDeps::DexFileDeps* deps = decoded_deps.GetDexFileDeps(*primary_dex_file_); - bool found = false; - std::set<VerifierDeps::MethodResolution>* methods = &deps->methods_; - for (const auto& entry : *methods) { - if (entry.IsResolved()) { - methods->insert(VerifierDeps::MethodResolution(entry.GetDexMethodIndex(), - VerifierDeps::kUnresolvedMarker, - entry.GetDeclaringClassIndex())); - found = true; - break; - } - } - ASSERT_TRUE(found); - new_class_loader.Assign(soa.Decode<mirror::ClassLoader>(LoadDex("VerifierDeps"))); - ASSERT_FALSE(decoded_deps.ValidateDependencies(new_class_loader, soa.Self())); - } - - { - VerifierDeps decoded_deps(dex_files_, ArrayRef<const uint8_t>(buffer)); - VerifierDeps::DexFileDeps* deps = decoded_deps.GetDexFileDeps(*primary_dex_file_); - bool found = false; - std::set<VerifierDeps::MethodResolution>* methods = &deps->methods_; - for (const auto& entry : *methods) { - if (!entry.IsResolved()) { - constexpr dex::StringIndex kStringIndexZero(0); // We know there is a class there. - methods->insert(VerifierDeps::MethodResolution(0 /* we know there is a method there */, - VerifierDeps::kUnresolvedMarker - 1, - kStringIndexZero)); - found = true; - break; - } - } - ASSERT_TRUE(found); - new_class_loader.Assign(soa.Decode<mirror::ClassLoader>(LoadDex("VerifierDeps"))); - ASSERT_FALSE(decoded_deps.ValidateDependencies(new_class_loader, soa.Self())); - } - - { - VerifierDeps decoded_deps(dex_files_, ArrayRef<const uint8_t>(buffer)); - VerifierDeps::DexFileDeps* deps = decoded_deps.GetDexFileDeps(*primary_dex_file_); - bool found = false; - std::set<VerifierDeps::MethodResolution>* methods = &deps->methods_; - for (const auto& entry : *methods) { - if (entry.IsResolved()) { - methods->insert(VerifierDeps::MethodResolution(entry.GetDexMethodIndex(), - entry.GetAccessFlags() - 1, - entry.GetDeclaringClassIndex())); - found = true; - break; - } - } - ASSERT_TRUE(found); - new_class_loader.Assign(soa.Decode<mirror::ClassLoader>(LoadDex("VerifierDeps"))); - ASSERT_FALSE(decoded_deps.ValidateDependencies(new_class_loader, soa.Self())); - } - - { - VerifierDeps decoded_deps(dex_files_, ArrayRef<const uint8_t>(buffer)); - VerifierDeps::DexFileDeps* deps = decoded_deps.GetDexFileDeps(*primary_dex_file_); - bool found = false; - std::set<VerifierDeps::MethodResolution>* methods = &deps->methods_; - for (const auto& entry : *methods) { - constexpr dex::StringIndex kNewTypeIndex(0); - if (entry.IsResolved() && entry.GetDeclaringClassIndex() != kNewTypeIndex) { - methods->insert(VerifierDeps::MethodResolution(entry.GetDexMethodIndex(), - entry.GetAccessFlags(), - kNewTypeIndex)); - found = true; - break; - } - } - ASSERT_TRUE(found); - new_class_loader.Assign(soa.Decode<mirror::ClassLoader>(LoadDex("VerifierDeps"))); - ASSERT_FALSE(decoded_deps.ValidateDependencies(new_class_loader, soa.Self())); - } -} - -TEST_F(VerifierDepsTest, CompilerDriver) { - SetupCompilerDriver(); - - // Test both multi-dex and single-dex configuration. - for (const char* multi : { "MultiDex", static_cast<const char*>(nullptr) }) { - // Test that the compiler driver behaves as expected when the dependencies - // verify and when they don't verify. - for (bool verify_failure : { false, true }) { - { - ScopedObjectAccess soa(Thread::Current()); - LoadDexFile(&soa, "VerifierDeps", multi); - } - VerifyWithCompilerDriver(/* verifier_deps */ nullptr); - - std::vector<uint8_t> buffer; - verifier_deps_->Encode(dex_files_, &buffer); - - { - ScopedObjectAccess soa(Thread::Current()); - LoadDexFile(&soa, "VerifierDeps", multi); - } - verifier::VerifierDeps decoded_deps(dex_files_, ArrayRef<const uint8_t>(buffer)); - if (verify_failure) { - // Just taint the decoded VerifierDeps with one invalid entry. - VerifierDeps::DexFileDeps* deps = decoded_deps.GetDexFileDeps(*primary_dex_file_); - bool found = false; - for (const auto& entry : deps->classes_) { - if (entry.IsResolved()) { - deps->classes_.insert(VerifierDeps::ClassResolution( - entry.GetDexTypeIndex(), VerifierDeps::kUnresolvedMarker)); - found = true; - break; - } - } - ASSERT_TRUE(found); - } - VerifyWithCompilerDriver(&decoded_deps); - - if (verify_failure) { - ASSERT_FALSE(verifier_deps_ == nullptr); - ASSERT_FALSE(verifier_deps_->Equals(decoded_deps)); - } else { - VerifyClassStatus(decoded_deps); - } - } - } -} - -TEST_F(VerifierDepsTest, MultiDexVerification) { - VerifyDexFile("VerifierDepsMulti"); - ASSERT_EQ(NumberOfCompiledDexFiles(), 2u); - - ASSERT_TRUE(HasUnverifiedClass("LMySoftVerificationFailure;", *dex_files_[1])); - ASSERT_TRUE(HasUnverifiedClass("LMySub1SoftVerificationFailure;", *dex_files_[0])); - ASSERT_TRUE(HasUnverifiedClass("LMySub2SoftVerificationFailure;", *dex_files_[0])); - - std::vector<uint8_t> buffer; - verifier_deps_->Encode(dex_files_, &buffer); - ASSERT_FALSE(buffer.empty()); -} - -TEST_F(VerifierDepsTest, NotAssignable_InterfaceWithClassInBoot) { - ASSERT_TRUE(TestAssignabilityRecording(/* dst */ "Ljava/lang/Exception;", - /* src */ "LIface;", - /* is_strict */ true, - /* is_assignable */ false)); - ASSERT_TRUE(HasAssignable("Ljava/lang/Exception;", "LIface;", false)); -} - -TEST_F(VerifierDepsTest, Assignable_Arrays) { - ASSERT_TRUE(TestAssignabilityRecording(/* dst */ "[LIface;", - /* src */ "[LMyClassExtendingInterface;", - /* is_strict */ false, - /* is_assignable */ true)); - ASSERT_FALSE(HasAssignable( - "LIface;", "LMyClassExtendingInterface;", /* expected_is_assignable */ true)); - ASSERT_FALSE(HasAssignable( - "LIface;", "LMyClassExtendingInterface;", /* expected_is_assignable */ false)); -} - -} // namespace verifier -} // namespace art |