diff options
Diffstat (limited to 'compiler')
65 files changed, 2586 insertions, 1033 deletions
diff --git a/compiler/Android.mk b/compiler/Android.mk index f0bf4997c6..458973684e 100644 --- a/compiler/Android.mk +++ b/compiler/Android.mk @@ -108,7 +108,8 @@ LIBART_COMPILER_SRC_FILES := \ elf_writer_debug.cc \ elf_writer_quick.cc \ image_writer.cc \ - oat_writer.cc + oat_writer.cc \ + profile_assistant.cc LIBART_COMPILER_SRC_FILES_arm := \ dex/quick/arm/assemble_arm.cc \ diff --git a/compiler/common_compiler_test.cc b/compiler/common_compiler_test.cc index 278c49017e..b5fd1e074f 100644 --- a/compiler/common_compiler_test.cc +++ b/compiler/common_compiler_test.cc @@ -208,8 +208,8 @@ void CommonCompilerTest::CreateCompilerDriver(Compiler::Kind kind, InstructionSe false, timer_.get(), -1, - /* profile_file */ "", - /* dex_to_oat_map */ nullptr)); + /* dex_to_oat_map */ nullptr, + /* profile_compilation_info */ nullptr)); // We typically don't generate an image in unit tests, disable this optimization by default. compiler_driver_->SetSupportBootImageFixup(false); } diff --git a/compiler/dex/quick/quick_cfi_test.cc b/compiler/dex/quick/quick_cfi_test.cc index bcf20c7efa..12568a4ad4 100644 --- a/compiler/dex/quick/quick_cfi_test.cc +++ b/compiler/dex/quick/quick_cfi_test.cc @@ -92,7 +92,7 @@ class QuickCFITest : public CFITest { false, 0, -1, - "", + nullptr, nullptr); ClassLinker* linker = nullptr; CompilationUnit cu(&pool, isa, &driver, linker); diff --git a/compiler/dex/quick/quick_compiler.cc b/compiler/dex/quick/quick_compiler.cc index 3260a7a050..ebc9a2c9ea 100644 --- a/compiler/dex/quick/quick_compiler.cc +++ b/compiler/dex/quick/quick_compiler.cc @@ -520,7 +520,12 @@ static bool CanCompileShorty(const char* shorty, InstructionSet instruction_set) // In the rare cases we compile experimental opcodes, the runtime has an option to enable it, // which will force scanning for any unsupported opcodes. static bool SkipScanningUnsupportedOpcodes(InstructionSet instruction_set) { - if (UNLIKELY(kUnsupportedOpcodesSize[instruction_set] == 0U)) { + Runtime* runtime = Runtime::Current(); + if (UNLIKELY(runtime->AreExperimentalFlagsEnabled(ExperimentalFlags::kDefaultMethods))) { + // Always need to scan opcodes if we have default methods since invoke-super for interface + // methods is never going to be supported in the quick compiler. + return false; + } else if (UNLIKELY(kUnsupportedOpcodesSize[instruction_set] == 0U)) { // All opcodes are supported no matter what. Usually not the case // since experimental opcodes are not implemented in the quick compiler. return true; @@ -538,8 +543,28 @@ static bool SkipScanningUnsupportedOpcodes(InstructionSet instruction_set) { } } +bool QuickCompiler::CanCompileInstruction(const MIR* mir, + const DexFile& dex_file) const { + switch (mir->dalvikInsn.opcode) { + // Quick compiler won't support new instruction semantics to invoke-super into an interface + // method + case Instruction::INVOKE_SUPER: // Fall-through + case Instruction::INVOKE_SUPER_RANGE: { + DCHECK(mir->dalvikInsn.IsInvoke()); + uint32_t invoke_method_idx = mir->dalvikInsn.vB; + const DexFile::MethodId& method_id = dex_file.GetMethodId(invoke_method_idx); + const DexFile::ClassDef* class_def = dex_file.FindClassDef(method_id.class_idx_); + // False if we are an interface i.e. !(java_access_flags & kAccInterface) + return class_def != nullptr && ((class_def->GetJavaAccessFlags() & kAccInterface) == 0); + } + default: + return true; + } +} + // Skip the method that we do not support currently. -bool QuickCompiler::CanCompileMethod(uint32_t method_idx, const DexFile& dex_file, +bool QuickCompiler::CanCompileMethod(uint32_t method_idx, + const DexFile& dex_file, CompilationUnit* cu) const { // This is a limitation in mir_graph. See MirGraph::SetNumSSARegs. if (cu->mir_graph->GetNumOfCodeAndTempVRs() > kMaxAllowedDalvikRegisters) { @@ -580,6 +605,9 @@ bool QuickCompiler::CanCompileMethod(uint32_t method_idx, const DexFile& dex_fil << MIRGraph::extended_mir_op_names_[opcode - kMirOpFirst]; } return false; + } else if (!CanCompileInstruction(mir, dex_file)) { + VLOG(compiler) << "Cannot compile dalvik opcode : " << mir->dalvikInsn.opcode; + return false; } // Check if it invokes a prototype that we cannot support. if (std::find(kInvokeOpcodes, kInvokeOpcodes + arraysize(kInvokeOpcodes), opcode) diff --git a/compiler/dex/quick/quick_compiler.h b/compiler/dex/quick/quick_compiler.h index d512b256cd..55f45f1ab0 100644 --- a/compiler/dex/quick/quick_compiler.h +++ b/compiler/dex/quick/quick_compiler.h @@ -18,6 +18,7 @@ #define ART_COMPILER_DEX_QUICK_QUICK_COMPILER_H_ #include "compiler.h" +#include "dex/mir_graph.h" namespace art { @@ -74,6 +75,8 @@ class QuickCompiler : public Compiler { explicit QuickCompiler(CompilerDriver* driver); private: + bool CanCompileInstruction(const MIR* mir, const DexFile& dex_file) const; + std::unique_ptr<PassManager> pre_opt_pass_manager_; std::unique_ptr<PassManager> post_opt_pass_manager_; DISALLOW_COPY_AND_ASSIGN(QuickCompiler); diff --git a/compiler/dex/quick/x86/quick_assemble_x86_test.cc b/compiler/dex/quick/x86/quick_assemble_x86_test.cc index 9deabc02e9..b39fe4da4f 100644 --- a/compiler/dex/quick/x86/quick_assemble_x86_test.cc +++ b/compiler/dex/quick/x86/quick_assemble_x86_test.cc @@ -73,7 +73,7 @@ class QuickAssembleX86TestBase : public testing::Test { false, 0, -1, - "", + nullptr, nullptr)); cu_.reset(new CompilationUnit(pool_.get(), isa_, compiler_driver_.get(), nullptr)); DexFile::CodeItem* code_item = static_cast<DexFile::CodeItem*>( diff --git a/compiler/driver/compiled_method_storage_test.cc b/compiler/driver/compiled_method_storage_test.cc index 84fb4324b5..f18fa67ea5 100644 --- a/compiler/driver/compiled_method_storage_test.cc +++ b/compiler/driver/compiled_method_storage_test.cc @@ -45,7 +45,7 @@ TEST(CompiledMethodStorage, Deduplicate) { false, nullptr, -1, - "", + nullptr, nullptr); CompiledMethodStorage* storage = driver.GetCompiledMethodStorage(); diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc index afb4b71ccf..043bd93bd7 100644 --- a/compiler/driver/compiler_driver.cc +++ b/compiler/driver/compiler_driver.cc @@ -347,8 +347,8 @@ CompilerDriver::CompilerDriver( size_t thread_count, bool dump_stats, bool dump_passes, const std::string& dump_cfg_file_name, bool dump_cfg_append, CumulativeLogger* timer, int swap_fd, - const std::string& profile_file, - const std::unordered_map<const DexFile*, const char*>* dex_to_oat_map) + const std::unordered_map<const DexFile*, const char*>* dex_to_oat_map, + const ProfileCompilationInfo* profile_compilation_info) : compiler_options_(compiler_options), verification_results_(verification_results), method_inliner_map_(method_inliner_map), @@ -377,7 +377,8 @@ CompilerDriver::CompilerDriver( support_boot_image_fixup_(instruction_set != kMips && instruction_set != kMips64), dex_files_for_oat_file_(nullptr), dex_file_oat_filename_map_(dex_to_oat_map), - compiled_method_storage_(swap_fd) { + compiled_method_storage_(swap_fd), + profile_compilation_info_(profile_compilation_info) { DCHECK(compiler_options_ != nullptr); DCHECK(verification_results_ != nullptr); DCHECK(method_inliner_map_ != nullptr); @@ -385,12 +386,6 @@ CompilerDriver::CompilerDriver( compiler_->Init(); CHECK_EQ(boot_image_, image_classes_.get() != nullptr); - - // Read the profile file if one is provided. - if (!profile_file.empty()) { - profile_compilation_info_.reset(new ProfileCompilationInfo(profile_file)); - LOG(INFO) << "Using profile data from file " << profile_file; - } } CompilerDriver::~CompilerDriver() { @@ -2306,15 +2301,11 @@ void CompilerDriver::InitializeClasses(jobject class_loader, void CompilerDriver::Compile(jobject class_loader, const std::vector<const DexFile*>& dex_files, ThreadPool* thread_pool, TimingLogger* timings) { - if (profile_compilation_info_ != nullptr) { - if (!profile_compilation_info_->Load(dex_files)) { - LOG(WARNING) << "Failed to load offline profile info from " - << profile_compilation_info_->GetFilename() - << ". No methods will be compiled"; - } else if (kDebugProfileGuidedCompilation) { - LOG(INFO) << "[ProfileGuidedCompilation] " - << profile_compilation_info_->DumpInfo(); - } + if (kDebugProfileGuidedCompilation) { + LOG(INFO) << "[ProfileGuidedCompilation] " << + ((profile_compilation_info_ == nullptr) + ? "null" + : profile_compilation_info_->DumpInfo(&dex_files)); } for (size_t i = 0; i != dex_files.size(); ++i) { const DexFile* dex_file = dex_files[i]; diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h index fa0cb9a412..3847c8183e 100644 --- a/compiler/driver/compiler_driver.h +++ b/compiler/driver/compiler_driver.h @@ -97,8 +97,8 @@ class CompilerDriver { size_t thread_count, bool dump_stats, bool dump_passes, const std::string& dump_cfg_file_name, bool dump_cfg_append, CumulativeLogger* timer, int swap_fd, - const std::string& profile_file, - const std::unordered_map<const DexFile*, const char*>* dex_to_oat_map); + const std::unordered_map<const DexFile*, const char*>* dex_to_oat_map, + const ProfileCompilationInfo* profile_compilation_info); ~CompilerDriver(); @@ -657,9 +657,6 @@ class CompilerDriver { // This option may be restricted to the boot image, depending on a flag in the implementation. std::unique_ptr<std::unordered_set<std::string>> methods_to_compile_; - // Info for profile guided compilation. - std::unique_ptr<ProfileCompilationInfo> profile_compilation_info_; - bool had_hard_verifier_failure_; size_t thread_count_; @@ -689,6 +686,9 @@ class CompilerDriver { CompiledMethodStorage compiled_method_storage_; + // Info for profile guided compilation. + const ProfileCompilationInfo* const profile_compilation_info_; + friend class CompileClassVisitor; DISALLOW_COPY_AND_ASSIGN(CompilerDriver); }; diff --git a/compiler/driver/compiler_options.cc b/compiler/driver/compiler_options.cc index 209bb5a3c2..385f34a9f9 100644 --- a/compiler/driver/compiler_options.cc +++ b/compiler/driver/compiler_options.cc @@ -211,11 +211,9 @@ bool CompilerOptions::ParseCompilerOption(const StringPiece& option, UsageFn Usa generate_debug_info_ = false; } else if (option == "--debuggable") { debuggable_ = true; - generate_debug_info_ = true; } else if (option == "--native-debuggable") { native_debuggable_ = true; debuggable_ = true; - generate_debug_info_ = true; } else if (option.starts_with("--top-k-profile-threshold=")) { ParseDouble(option.data(), '=', 0.0, 100.0, &top_k_profile_threshold_, Usage); } else if (option == "--include-patch-information") { diff --git a/compiler/driver/compiler_options.h b/compiler/driver/compiler_options.h index f8032bb514..f14bdc4a2f 100644 --- a/compiler/driver/compiler_options.h +++ b/compiler/driver/compiler_options.h @@ -50,7 +50,7 @@ class CompilerOptions FINAL { static const size_t kDefaultNumDexMethodsThreshold = 900; static constexpr double kDefaultTopKProfileThreshold = 90.0; static const bool kDefaultNativeDebuggable = false; - static const bool kDefaultGenerateDebugInfo = kIsDebugBuild; + static const bool kDefaultGenerateDebugInfo = false; static const bool kDefaultIncludePatchInformation = false; static const size_t kDefaultInlineDepthLimit = 3; static const size_t kDefaultInlineMaxCodeUnits = 32; diff --git a/compiler/dwarf/method_debug_info.h b/compiler/dwarf/method_debug_info.h index a391e4d08a..e8ba9148e8 100644 --- a/compiler/dwarf/method_debug_info.h +++ b/compiler/dwarf/method_debug_info.h @@ -30,8 +30,8 @@ struct MethodDebugInfo { uint32_t access_flags_; const DexFile::CodeItem* code_item_; bool deduped_; - uint32_t low_pc_; - uint32_t high_pc_; + uintptr_t low_pc_; + uintptr_t high_pc_; CompiledMethod* compiled_method_; }; diff --git a/compiler/elf_builder.h b/compiler/elf_builder.h index bb07cc2913..a7461a5525 100644 --- a/compiler/elf_builder.h +++ b/compiler/elf_builder.h @@ -148,6 +148,12 @@ class ElfBuilder FINAL { } } + // Returns true if the section was written to disk. + // (Used to check whether we have .text when writing JIT debug info) + bool Exists() const { + return finished_; + } + // Get the location of this section in virtual memory. Elf_Addr GetAddress() const { CHECK(started_); @@ -247,16 +253,18 @@ class ElfBuilder FINAL { } // Buffer symbol for this section. It will be written later. + // If the symbol's section is null, it will be considered absolute (SHN_ABS). + // (we use this in JIT to reference code which is stored outside the debug ELF file) void Add(Elf_Word name, const Section* section, Elf_Addr addr, bool is_relative, Elf_Word size, uint8_t binding, uint8_t type, uint8_t other = 0) { - CHECK(section != nullptr); Elf_Sym sym = Elf_Sym(); sym.st_name = name; sym.st_value = addr + (is_relative ? section->GetAddress() : 0); sym.st_size = size; sym.st_other = other; - sym.st_shndx = section->GetSectionIndex(); + sym.st_shndx = (section != nullptr ? section->GetSectionIndex() + : static_cast<Elf_Word>(SHN_ABS)); sym.st_info = (binding << 4) + (type & 0xf); symbols_.push_back(sym); } diff --git a/compiler/elf_writer_debug.cc b/compiler/elf_writer_debug.cc index 2bc8c89f73..dd50f69b71 100644 --- a/compiler/elf_writer_debug.cc +++ b/compiler/elf_writer_debug.cc @@ -22,16 +22,20 @@ #include "base/casts.h" #include "base/stl_util.h" #include "compiled_method.h" -#include "driver/compiler_driver.h" #include "dex_file-inl.h" +#include "driver/compiler_driver.h" #include "dwarf/dedup_vector.h" #include "dwarf/headers.h" #include "dwarf/method_debug_info.h" #include "dwarf/register.h" #include "elf_builder.h" +#include "linker/vector_output_stream.h" +#include "mirror/array.h" +#include "mirror/class-inl.h" +#include "mirror/class.h" #include "oat_writer.h" -#include "utils.h" #include "stack_map.h" +#include "utils.h" namespace art { namespace dwarf { @@ -219,6 +223,10 @@ void WriteCFISection(ElfBuilder<ElfTypes>* builder, CHECK(format == DW_DEBUG_FRAME_FORMAT || format == DW_EH_FRAME_FORMAT); typedef typename ElfTypes::Addr Elf_Addr; + if (method_infos.empty()) { + return; + } + std::vector<uint32_t> binary_search_table; std::vector<uintptr_t> patch_locations; if (format == DW_EH_FRAME_FORMAT) { @@ -234,7 +242,9 @@ void WriteCFISection(ElfBuilder<ElfTypes>* builder, { cfi_section->Start(); const bool is64bit = Is64BitInstructionSet(builder->GetIsa()); - const Elf_Addr text_address = builder->GetText()->GetAddress(); + const Elf_Addr text_address = builder->GetText()->Exists() + ? builder->GetText()->GetAddress() + : 0; const Elf_Addr cfi_address = cfi_section->GetAddress(); const Elf_Addr cie_address = cfi_address; Elf_Addr buffer_address = cfi_address; @@ -305,8 +315,8 @@ namespace { struct CompilationUnit { std::vector<const MethodDebugInfo*> methods_; size_t debug_line_offset_ = 0; - uint32_t low_pc_ = 0xFFFFFFFFU; - uint32_t high_pc_ = 0; + uintptr_t low_pc_ = std::numeric_limits<uintptr_t>::max(); + uintptr_t high_pc_ = 0; }; typedef std::vector<DexFile::LocalInfo> LocalInfos; @@ -439,14 +449,17 @@ class DebugInfoWriter { void Write(const CompilationUnit& compilation_unit) { CHECK(!compilation_unit.methods_.empty()); - const Elf_Addr text_address = owner_->builder_->GetText()->GetAddress(); + const Elf_Addr text_address = owner_->builder_->GetText()->Exists() + ? owner_->builder_->GetText()->GetAddress() + : 0; + const uintptr_t cu_size = compilation_unit.high_pc_ - compilation_unit.low_pc_; info_.StartTag(DW_TAG_compile_unit); info_.WriteStrp(DW_AT_producer, owner_->WriteString("Android dex2oat")); info_.WriteData1(DW_AT_language, DW_LANG_Java); info_.WriteStrp(DW_AT_comp_dir, owner_->WriteString("$JAVA_SRC_ROOT")); info_.WriteAddr(DW_AT_low_pc, text_address + compilation_unit.low_pc_); - info_.WriteUdata(DW_AT_high_pc, compilation_unit.high_pc_ - compilation_unit.low_pc_); + info_.WriteUdata(DW_AT_high_pc, dchecked_integral_cast<uint32_t>(cu_size)); info_.WriteSecOffset(DW_AT_stmt_list, compilation_unit.debug_line_offset_); const char* last_dex_class_desc = nullptr; @@ -464,8 +477,16 @@ class DebugInfoWriter { if (last_dex_class_desc != nullptr) { EndClassTag(last_dex_class_desc); } - size_t offset = StartClassTag(dex_class_desc); - type_cache_.emplace(dex_class_desc, offset); + // Write reference tag for the class we are about to declare. + size_t reference_tag_offset = info_.StartTag(DW_TAG_reference_type); + type_cache_.emplace(std::string(dex_class_desc), reference_tag_offset); + size_t type_attrib_offset = info_.size(); + info_.WriteRef4(DW_AT_type, 0); + info_.EndTag(); + // Declare the class that owns this method. + size_t class_offset = StartClassTag(dex_class_desc); + info_.UpdateUint32(type_attrib_offset, class_offset); + info_.WriteFlag(DW_AT_declaration, true); // Check that each class is defined only once. bool unique = owner_->defined_dex_classes_.insert(dex_class_desc).second; CHECK(unique) << "Redefinition of " << dex_class_desc; @@ -476,7 +497,7 @@ class DebugInfoWriter { info_.StartTag(DW_TAG_subprogram); WriteName(dex->GetMethodName(dex_method)); info_.WriteAddr(DW_AT_low_pc, text_address + mi->low_pc_); - info_.WriteUdata(DW_AT_high_pc, mi->high_pc_ - mi->low_pc_); + info_.WriteUdata(DW_AT_high_pc, dchecked_integral_cast<uint32_t>(mi->high_pc_-mi->low_pc_)); uint8_t frame_base[] = { DW_OP_call_frame_cfa }; info_.WriteExprLoc(DW_AT_frame_base, &frame_base, sizeof(frame_base)); WriteLazyType(dex->GetReturnTypeDescriptor(dex_proto)); @@ -562,6 +583,92 @@ class DebugInfoWriter { owner_->builder_->GetDebugInfo()->WriteFully(buffer.data(), buffer.size()); } + void Write(const ArrayRef<mirror::Class*>& types) SHARED_REQUIRES(Locks::mutator_lock_) { + info_.StartTag(DW_TAG_compile_unit); + info_.WriteStrp(DW_AT_producer, owner_->WriteString("Android dex2oat")); + info_.WriteData1(DW_AT_language, DW_LANG_Java); + + for (mirror::Class* type : types) { + if (type->IsPrimitive()) { + // For primitive types the definition and the declaration is the same. + if (type->GetPrimitiveType() != Primitive::kPrimVoid) { + WriteTypeDeclaration(type->GetDescriptor(nullptr)); + } + } else if (type->IsArrayClass()) { + mirror::Class* element_type = type->GetComponentType(); + uint32_t component_size = type->GetComponentSize(); + uint32_t data_offset = mirror::Array::DataOffset(component_size).Uint32Value(); + uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value(); + + info_.StartTag(DW_TAG_array_type); + std::string descriptor_string; + WriteLazyType(element_type->GetDescriptor(&descriptor_string)); + info_.WriteUdata(DW_AT_data_member_location, data_offset); + info_.StartTag(DW_TAG_subrange_type); + DCHECK_LT(length_offset, 32u); + uint8_t count[] = { + DW_OP_push_object_address, + static_cast<uint8_t>(DW_OP_lit0 + length_offset), + DW_OP_plus, + DW_OP_deref_size, + 4 // Array length is always 32-bit wide. + }; + info_.WriteExprLoc(DW_AT_count, &count, sizeof(count)); + info_.EndTag(); // DW_TAG_subrange_type. + info_.EndTag(); // DW_TAG_array_type. + } else { + std::string descriptor_string; + const char* desc = type->GetDescriptor(&descriptor_string); + StartClassTag(desc); + + if (!type->IsVariableSize()) { + info_.WriteUdata(DW_AT_byte_size, type->GetObjectSize()); + } + + // Base class. + mirror::Class* base_class = type->GetSuperClass(); + if (base_class != nullptr) { + info_.StartTag(DW_TAG_inheritance); + WriteLazyType(base_class->GetDescriptor(&descriptor_string)); + info_.WriteUdata(DW_AT_data_member_location, 0); + info_.WriteSdata(DW_AT_accessibility, DW_ACCESS_public); + info_.EndTag(); // DW_TAG_inheritance. + } + + // Member variables. + for (uint32_t i = 0, count = type->NumInstanceFields(); i < count; ++i) { + ArtField* field = type->GetInstanceField(i); + info_.StartTag(DW_TAG_member); + WriteName(field->GetName()); + WriteLazyType(field->GetTypeDescriptor()); + info_.WriteUdata(DW_AT_data_member_location, field->GetOffset().Uint32Value()); + uint32_t access_flags = field->GetAccessFlags(); + if (access_flags & kAccPublic) { + info_.WriteSdata(DW_AT_accessibility, DW_ACCESS_public); + } else if (access_flags & kAccProtected) { + info_.WriteSdata(DW_AT_accessibility, DW_ACCESS_protected); + } else if (access_flags & kAccPrivate) { + info_.WriteSdata(DW_AT_accessibility, DW_ACCESS_private); + } + info_.EndTag(); // DW_TAG_member. + } + + EndClassTag(desc); + } + } + + CHECK_EQ(info_.Depth(), 1); + FinishLazyTypes(); + info_.EndTag(); // DW_TAG_compile_unit. + std::vector<uint8_t> buffer; + buffer.reserve(info_.data()->size() + KB); + const size_t offset = owner_->builder_->GetDebugInfo()->GetSize(); + const size_t debug_abbrev_offset = + owner_->debug_abbrev_.Insert(debug_abbrev_.data(), debug_abbrev_.size()); + WriteDebugInfoCU(debug_abbrev_offset, info_, offset, &buffer, &owner_->debug_info_patches_); + owner_->builder_->GetDebugInfo()->WriteFully(buffer.data(), buffer.size()); + } + // Write table into .debug_loc which describes location of dex register. // The dex register might be valid only at some points and it might // move between machine registers and stack. @@ -715,14 +822,14 @@ class DebugInfoWriter { // just define all types lazily at the end of compilation unit. void WriteLazyType(const char* type_descriptor) { if (type_descriptor != nullptr && type_descriptor[0] != 'V') { - lazy_types_.emplace(type_descriptor, info_.size()); + lazy_types_.emplace(std::string(type_descriptor), info_.size()); info_.WriteRef4(DW_AT_type, 0); } } void FinishLazyTypes() { for (const auto& lazy_type : lazy_types_) { - info_.UpdateUint32(lazy_type.second, WriteType(lazy_type.first)); + info_.UpdateUint32(lazy_type.second, WriteTypeDeclaration(lazy_type.first)); } lazy_types_.clear(); } @@ -747,30 +854,39 @@ class DebugInfoWriter { // Convert dex type descriptor to DWARF. // Returns offset in the compilation unit. - size_t WriteType(const char* desc) { + size_t WriteTypeDeclaration(const std::string& desc) { + DCHECK(!desc.empty()); const auto& it = type_cache_.find(desc); if (it != type_cache_.end()) { return it->second; } size_t offset; - if (*desc == 'L') { + if (desc[0] == 'L') { // Class type. For example: Lpackage/name; - offset = StartClassTag(desc); + size_t class_offset = StartClassTag(desc.c_str()); info_.WriteFlag(DW_AT_declaration, true); - EndClassTag(desc); - } else if (*desc == '[') { + EndClassTag(desc.c_str()); + // Reference to the class type. + offset = info_.StartTag(DW_TAG_reference_type); + info_.WriteRef(DW_AT_type, class_offset); + info_.EndTag(); + } else if (desc[0] == '[') { // Array type. - size_t element_type = WriteType(desc + 1); - offset = info_.StartTag(DW_TAG_array_type); + size_t element_type = WriteTypeDeclaration(desc.substr(1)); + size_t array_type = info_.StartTag(DW_TAG_array_type); + info_.WriteFlag(DW_AT_declaration, true); info_.WriteRef(DW_AT_type, element_type); info_.EndTag(); + offset = info_.StartTag(DW_TAG_reference_type); + info_.WriteRef4(DW_AT_type, array_type); + info_.EndTag(); } else { // Primitive types. const char* name; uint32_t encoding; uint32_t byte_size; - switch (*desc) { + switch (desc[0]) { case 'B': name = "byte"; encoding = DW_ATE_signed; @@ -815,7 +931,7 @@ class DebugInfoWriter { LOG(FATAL) << "Void type should not be encoded"; UNREACHABLE(); default: - LOG(FATAL) << "Unknown dex type descriptor: " << desc; + LOG(FATAL) << "Unknown dex type descriptor: \"" << desc << "\""; UNREACHABLE(); } offset = info_.StartTag(DW_TAG_base_type); @@ -865,9 +981,10 @@ class DebugInfoWriter { // Temporary buffer to create and store the entries. DebugInfoEntryWriter<> info_; // Cache of already translated type descriptors. - std::map<const char*, size_t, CStringLess> type_cache_; // type_desc -> definition_offset. + std::map<std::string, size_t> type_cache_; // type_desc -> definition_offset. // 32-bit references which need to be resolved to a type later. - std::multimap<const char*, size_t, CStringLess> lazy_types_; // type_desc -> patch_offset. + // Given type may be used multiple times. Therefore we need a multimap. + std::multimap<std::string, size_t> lazy_types_; // type_desc -> patch_offset. }; public: @@ -883,6 +1000,11 @@ class DebugInfoWriter { writer.Write(compilation_unit); } + void WriteTypes(const ArrayRef<mirror::Class*>& types) SHARED_REQUIRES(Locks::mutator_lock_) { + CompilationUnitWriter writer(this); + writer.Write(types); + } + void End() { builder_->GetDebugInfo()->End(); builder_->WritePatches(".debug_info.oat_patches", @@ -924,7 +1046,9 @@ class DebugLineWriter { // Returns the number of bytes written. size_t WriteCompilationUnit(CompilationUnit& compilation_unit) { const bool is64bit = Is64BitInstructionSet(builder_->GetIsa()); - const Elf_Addr text_address = builder_->GetText()->GetAddress(); + const Elf_Addr text_address = builder_->GetText()->Exists() + ? builder_->GetText()->GetAddress() + : 0; compilation_unit.debug_line_offset_ = builder_->GetDebugLine()->GetSize(); @@ -1102,9 +1226,27 @@ class DebugLineWriter { std::vector<uintptr_t> debug_line_patches; }; +// Get all types loaded by the runtime. +static std::vector<mirror::Class*> GetLoadedRuntimeTypes() SHARED_REQUIRES(Locks::mutator_lock_) { + std::vector<mirror::Class*> result; + class CollectClasses : public ClassVisitor { + public: + virtual bool Visit(mirror::Class* klass) { + classes_->push_back(klass); + return true; + } + std::vector<mirror::Class*>* classes_; + }; + CollectClasses visitor; + visitor.classes_ = &result; + Runtime::Current()->GetClassLinker()->VisitClasses(&visitor); + return result; +} + template<typename ElfTypes> -void WriteDebugSections(ElfBuilder<ElfTypes>* builder, - const ArrayRef<const MethodDebugInfo>& method_infos) { +static void WriteDebugSections(ElfBuilder<ElfTypes>* builder, + bool write_loaded_runtime_types, + const ArrayRef<const MethodDebugInfo>& method_infos) { // Group the methods into compilation units based on source file. std::vector<CompilationUnit> compilation_units; const char* last_source_file = nullptr; @@ -1122,7 +1264,7 @@ void WriteDebugSections(ElfBuilder<ElfTypes>* builder, } // Write .debug_line section. - { + if (!compilation_units.empty()) { DebugLineWriter<ElfTypes> line_writer(builder); line_writer.Start(); for (auto& compilation_unit : compilation_units) { @@ -1132,12 +1274,19 @@ void WriteDebugSections(ElfBuilder<ElfTypes>* builder, } // Write .debug_info section. - { + if (!compilation_units.empty() || write_loaded_runtime_types) { DebugInfoWriter<ElfTypes> info_writer(builder); info_writer.Start(); for (const auto& compilation_unit : compilation_units) { info_writer.WriteCompilationUnit(compilation_unit); } + if (write_loaded_runtime_types) { + Thread* self = Thread::Current(); + // The lock prevents the classes being moved by the GC. + ReaderMutexLock mu(self, *Locks::mutator_lock_); + std::vector<mirror::Class*> types = GetLoadedRuntimeTypes(); + info_writer.WriteTypes(ArrayRef<mirror::Class*>(types.data(), types.size())); + } info_writer.End(); } } @@ -1173,11 +1322,13 @@ void WriteDebugSymbols(ElfBuilder<ElfTypes>* builder, name += " [DEDUPED]"; } + const auto* text = builder->GetText()->Exists() ? builder->GetText() : nullptr; + const bool is_relative = (text != nullptr); uint32_t low_pc = info.low_pc_; // Add in code delta, e.g., thumb bit 0 for Thumb2 code. low_pc += info.compiled_method_->CodeDelta(); - symtab->Add(strtab->Write(name), builder->GetText(), low_pc, - true, info.high_pc_ - info.low_pc_, STB_GLOBAL, STT_FUNC); + symtab->Add(strtab->Write(name), text, low_pc, + is_relative, info.high_pc_ - info.low_pc_, STB_GLOBAL, STT_FUNC); // Conforming to aaelf, add $t mapping symbol to indicate start of a sequence of thumb2 // instructions, so that disassembler tools can correctly disassemble. @@ -1185,8 +1336,8 @@ void WriteDebugSymbols(ElfBuilder<ElfTypes>* builder, // requires it to match function symbol. Just address 0 does not work. if (info.compiled_method_->GetInstructionSet() == kThumb2) { if (!generated_mapping_symbol || !kGenerateSingleArmMappingSymbol) { - symtab->Add(strtab->Write("$t"), builder->GetText(), info.low_pc_ & ~1, - true, 0, STB_LOCAL, STT_NOTYPE); + symtab->Add(strtab->Write("$t"), text, info.low_pc_ & ~1, + is_relative, 0, STB_LOCAL, STT_NOTYPE); generated_mapping_symbol = true; } } @@ -1202,25 +1353,89 @@ void WriteDebugSymbols(ElfBuilder<ElfTypes>* builder, template <typename ElfTypes> void WriteDebugInfo(ElfBuilder<ElfTypes>* builder, + bool write_loaded_runtime_types, const ArrayRef<const MethodDebugInfo>& method_infos, CFIFormat cfi_format) { - if (!method_infos.empty()) { - // Add methods to .symtab. - WriteDebugSymbols(builder, method_infos); - // Generate CFI (stack unwinding information). - WriteCFISection(builder, method_infos, cfi_format); - // Write DWARF .debug_* sections. - WriteDebugSections(builder, method_infos); + // Add methods to .symtab. + WriteDebugSymbols(builder, method_infos); + // Generate CFI (stack unwinding information). + WriteCFISection(builder, method_infos, cfi_format); + // Write DWARF .debug_* sections. + WriteDebugSections(builder, write_loaded_runtime_types, method_infos); +} + +template <typename ElfTypes> +static ArrayRef<const uint8_t> WriteDebugElfFileForMethodInternal( + const dwarf::MethodDebugInfo& method_info) { + const InstructionSet isa = method_info.compiled_method_->GetInstructionSet(); + std::vector<uint8_t> buffer; + buffer.reserve(KB); + VectorOutputStream out("Debug ELF file", &buffer); + std::unique_ptr<ElfBuilder<ElfTypes>> builder(new ElfBuilder<ElfTypes>(isa, &out)); + builder->Start(); + WriteDebugInfo(builder.get(), + false, + ArrayRef<const MethodDebugInfo>(&method_info, 1), + DW_DEBUG_FRAME_FORMAT); + builder->End(); + CHECK(builder->Good()); + // Make a copy of the buffer. We want to shrink it anyway. + uint8_t* result = new uint8_t[buffer.size()]; + CHECK(result != nullptr); + memcpy(result, buffer.data(), buffer.size()); + return ArrayRef<const uint8_t>(result, buffer.size()); +} + +ArrayRef<const uint8_t> WriteDebugElfFileForMethod(const dwarf::MethodDebugInfo& method_info) { + const InstructionSet isa = method_info.compiled_method_->GetInstructionSet(); + if (Is64BitInstructionSet(isa)) { + return WriteDebugElfFileForMethodInternal<ElfTypes64>(method_info); + } else { + return WriteDebugElfFileForMethodInternal<ElfTypes32>(method_info); + } +} + +template <typename ElfTypes> +static ArrayRef<const uint8_t> WriteDebugElfFileForClassInternal(const InstructionSet isa, + mirror::Class* type) + SHARED_REQUIRES(Locks::mutator_lock_) { + std::vector<uint8_t> buffer; + buffer.reserve(KB); + VectorOutputStream out("Debug ELF file", &buffer); + std::unique_ptr<ElfBuilder<ElfTypes>> builder(new ElfBuilder<ElfTypes>(isa, &out)); + builder->Start(); + + DebugInfoWriter<ElfTypes> info_writer(builder.get()); + info_writer.Start(); + info_writer.WriteTypes(ArrayRef<mirror::Class*>(&type, 1)); + info_writer.End(); + + builder->End(); + CHECK(builder->Good()); + // Make a copy of the buffer. We want to shrink it anyway. + uint8_t* result = new uint8_t[buffer.size()]; + CHECK(result != nullptr); + memcpy(result, buffer.data(), buffer.size()); + return ArrayRef<const uint8_t>(result, buffer.size()); +} + +ArrayRef<const uint8_t> WriteDebugElfFileForClass(const InstructionSet isa, mirror::Class* type) { + if (Is64BitInstructionSet(isa)) { + return WriteDebugElfFileForClassInternal<ElfTypes64>(isa, type); + } else { + return WriteDebugElfFileForClassInternal<ElfTypes32>(isa, type); } } // Explicit instantiations template void WriteDebugInfo<ElfTypes32>( ElfBuilder<ElfTypes32>* builder, + bool write_loaded_runtime_types, const ArrayRef<const MethodDebugInfo>& method_infos, CFIFormat cfi_format); template void WriteDebugInfo<ElfTypes64>( ElfBuilder<ElfTypes64>* builder, + bool write_loaded_runtime_types, const ArrayRef<const MethodDebugInfo>& method_infos, CFIFormat cfi_format); diff --git a/compiler/elf_writer_debug.h b/compiler/elf_writer_debug.h index 7ec0be185a..91da00f97a 100644 --- a/compiler/elf_writer_debug.h +++ b/compiler/elf_writer_debug.h @@ -17,19 +17,30 @@ #ifndef ART_COMPILER_ELF_WRITER_DEBUG_H_ #define ART_COMPILER_ELF_WRITER_DEBUG_H_ -#include "elf_builder.h" +#include "base/macros.h" +#include "base/mutex.h" #include "dwarf/dwarf_constants.h" -#include "oat_writer.h" +#include "elf_builder.h" #include "utils/array_ref.h" namespace art { +namespace mirror { +class Class; +} namespace dwarf { +struct MethodDebugInfo; template <typename ElfTypes> void WriteDebugInfo(ElfBuilder<ElfTypes>* builder, + bool write_loaded_runtime_types, const ArrayRef<const MethodDebugInfo>& method_infos, CFIFormat cfi_format); +ArrayRef<const uint8_t> WriteDebugElfFileForMethod(const dwarf::MethodDebugInfo& method_info); + +ArrayRef<const uint8_t> WriteDebugElfFileForClass(const InstructionSet isa, mirror::Class* type) + SHARED_REQUIRES(Locks::mutator_lock_); + } // namespace dwarf } // namespace art diff --git a/compiler/elf_writer_quick.cc b/compiler/elf_writer_quick.cc index 7b1bdd72e5..a67f3bd1a9 100644 --- a/compiler/elf_writer_quick.cc +++ b/compiler/elf_writer_quick.cc @@ -152,7 +152,7 @@ template <typename ElfTypes> void ElfWriterQuick<ElfTypes>::WriteDebugInfo( const ArrayRef<const dwarf::MethodDebugInfo>& method_infos) { if (compiler_options_->GetGenerateDebugInfo()) { - dwarf::WriteDebugInfo(builder_.get(), method_infos, kCFIFormat); + dwarf::WriteDebugInfo(builder_.get(), /* write_types */ true, method_infos, kCFIFormat); } } diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc index 17d0f61a34..d0bb201d69 100644 --- a/compiler/image_writer.cc +++ b/compiler/image_writer.cc @@ -76,23 +76,35 @@ static constexpr bool kBinObjects = true; // Return true if an object is already in an image space. bool ImageWriter::IsInBootImage(const void* obj) const { + gc::Heap* const heap = Runtime::Current()->GetHeap(); if (!compile_app_image_) { - DCHECK(boot_image_space_ == nullptr); + DCHECK(heap->GetBootImageSpaces().empty()); return false; } - const uint8_t* image_begin = boot_image_space_->Begin(); - // Real image end including ArtMethods and ArtField sections. - const uint8_t* image_end = image_begin + boot_image_space_->GetImageHeader().GetImageSize(); - return image_begin <= obj && obj < image_end; + for (gc::space::ImageSpace* boot_image_space : heap->GetBootImageSpaces()) { + const uint8_t* image_begin = boot_image_space->Begin(); + // Real image end including ArtMethods and ArtField sections. + const uint8_t* image_end = image_begin + boot_image_space->GetImageHeader().GetImageSize(); + if (image_begin <= obj && obj < image_end) { + return true; + } + } + return false; } bool ImageWriter::IsInBootOatFile(const void* ptr) const { + gc::Heap* const heap = Runtime::Current()->GetHeap(); if (!compile_app_image_) { - DCHECK(boot_image_space_ == nullptr); + DCHECK(heap->GetBootImageSpaces().empty()); return false; } - const ImageHeader& image_header = boot_image_space_->GetImageHeader(); - return image_header.GetOatFileBegin() <= ptr && ptr < image_header.GetOatFileEnd(); + for (gc::space::ImageSpace* boot_image_space : heap->GetBootImageSpaces()) { + const ImageHeader& image_header = boot_image_space->GetImageHeader(); + if (image_header.GetOatFileBegin() <= ptr && ptr < image_header.GetOatFileEnd()) { + return true; + } + } + return false; } static void CheckNoDexObjectsCallback(Object* obj, void* arg ATTRIBUTE_UNUSED) @@ -109,14 +121,6 @@ static void CheckNoDexObjects() { bool ImageWriter::PrepareImageAddressSpace() { target_ptr_size_ = InstructionSetPointerSize(compiler_driver_.GetInstructionSet()); gc::Heap* const heap = Runtime::Current()->GetHeap(); - // Cache boot image space. - for (gc::space::ContinuousSpace* space : heap->GetContinuousSpaces()) { - if (space->IsImageSpace()) { - CHECK(compile_app_image_); - CHECK(boot_image_space_ == nullptr) << "Multiple image spaces"; - boot_image_space_ = space->AsImageSpace(); - } - } { ScopedObjectAccess soa(Thread::Current()); PruneNonImageClasses(); // Remove junk @@ -205,9 +209,6 @@ bool ImageWriter::Write(int image_fd, oat_header.GetQuickResolutionTrampolineOffset(); image_info.oat_address_offsets_[kOatAddressQuickToInterpreterBridge] = oat_header.GetQuickToInterpreterBridgeOffset(); - } else { - // Other oat files use the primary trampolines. - // TODO: Dummy values to protect usage? b/26317072 } @@ -635,11 +636,11 @@ ImageWriter::BinSlot ImageWriter::GetImageBinSlot(mirror::Object* object) const bool ImageWriter::AllocMemory() { for (const char* oat_filename : oat_filenames_) { ImageInfo& image_info = GetImageInfo(oat_filename); - const size_t length = RoundUp(image_objects_offset_begin_ + - GetBinSizeSum(image_info) + - intern_table_bytes_ + - class_table_bytes_, - kPageSize); + ImageSection unused_sections[ImageHeader::kSectionCount]; + const size_t length = RoundUp( + image_info.CreateImageSections(target_ptr_size_, unused_sections), + kPageSize); + std::string error_msg; image_info.image_.reset(MemMap::MapAnonymous("image writer image", nullptr, @@ -909,14 +910,17 @@ void ImageWriter::CalculateObjectBinSlots(Object* obj) { DCHECK(obj != nullptr); // if it is a string, we want to intern it if its not interned. if (obj->GetClass()->IsStringClass()) { + const char* oat_filename = GetOatFilename(obj); + ImageInfo& image_info = GetImageInfo(oat_filename); + // we must be an interned string that was forward referenced and already assigned if (IsImageBinSlotAssigned(obj)) { - DCHECK_EQ(obj, obj->AsString()->Intern()); + DCHECK_EQ(obj, image_info.intern_table_->InternStrongImageString(obj->AsString())); return; } // InternImageString allows us to intern while holding the heap bitmap lock. This is safe since // we are guaranteed to not have GC during image writing. - mirror::String* const interned = Runtime::Current()->GetInternTable()->InternStrongImageString( + mirror::String* const interned = image_info.intern_table_->InternStrongImageString( obj->AsString()); if (obj != interned) { if (!IsImageBinSlotAssigned(interned)) { @@ -1067,6 +1071,13 @@ void ImageWriter::WalkFieldsInOrder(mirror::Object* obj) { }; const char* oat_file = GetOatFilenameForDexCache(dex_cache); ImageInfo& image_info = GetImageInfo(oat_file); + { + // Note: This table is only accessed from the image writer, so the lock is technically + // unnecessary. + WriterMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_); + // Insert in the class table for this iamge. + image_info.class_table_->Insert(as_klass); + } for (LengthPrefixedArray<ArtField>* cur_fields : fields) { // Total array length including header. if (cur_fields != nullptr) { @@ -1249,6 +1260,18 @@ void ImageWriter::CalculateNewObjectOffsets() { // Calculate size of the dex cache arrays slot and prepare offsets. PrepareDexCacheArraySlots(); + // Calculate the sizes of the intern tables and class tables. + for (const char* oat_filename : oat_filenames_) { + ImageInfo& image_info = GetImageInfo(oat_filename); + // Calculate how big the intern table will be after being serialized. + InternTable* const intern_table = image_info.intern_table_.get(); + CHECK_EQ(intern_table->WeakSize(), 0u) << " should have strong interned all the strings"; + image_info.intern_table_bytes_ = intern_table->WriteToMemory(nullptr); + // Calculate the size of the class table. + ReaderMutexLock mu(self, *Locks::classlinker_classes_lock_); + image_info.class_table_bytes_ += image_info.class_table_->WriteToMemory(nullptr); + } + // Calculate bin slot offsets. for (const char* oat_filename : oat_filenames_) { ImageInfo& image_info = GetImageInfo(oat_filename); @@ -1275,18 +1298,11 @@ void ImageWriter::CalculateNewObjectOffsets() { ImageInfo& image_info = GetImageInfo(oat_filename); image_info.image_begin_ = global_image_begin_ + image_offset; image_info.image_offset_ = image_offset; - size_t native_sections_size = image_info.bin_slot_sizes_[kBinArtField] + - image_info.bin_slot_sizes_[kBinArtMethodDirty] + - image_info.bin_slot_sizes_[kBinArtMethodClean] + - image_info.bin_slot_sizes_[kBinDexCacheArray] + - intern_table_bytes_ + - class_table_bytes_; - size_t image_objects = RoundUp(image_info.image_end_, kPageSize); - size_t bitmap_size = - RoundUp(gc::accounting::ContinuousSpaceBitmap::ComputeBitmapSize(image_objects), kPageSize); - size_t heap_size = gc::accounting::ContinuousSpaceBitmap::ComputeHeapSize(bitmap_size); - size_t max = std::max(heap_size, image_info.image_end_ + native_sections_size + bitmap_size); - image_info.image_size_ = RoundUp(max, kPageSize); + ImageSection unused_sections[ImageHeader::kSectionCount]; + image_info.image_size_ = RoundUp( + image_info.CreateImageSections(target_ptr_size_, unused_sections), + kPageSize); + // There should be no gaps until the next image. image_offset += image_info.image_size_; } @@ -1310,89 +1326,69 @@ void ImageWriter::CalculateNewObjectOffsets() { relocation.offset += image_info.bin_slot_offsets_[bin_type]; } - /* TODO: Reenable the intern table and class table. b/26317072 - // Calculate how big the intern table will be after being serialized. - InternTable* const intern_table = runtime->GetInternTable(); - CHECK_EQ(intern_table->WeakSize(), 0u) << " should have strong interned all the strings"; - intern_table_bytes_ = intern_table->WriteToMemory(nullptr); - - // Write out the class table. - ClassLinker* class_linker = runtime->GetClassLinker(); - if (boot_image_space_ == nullptr) { - // Compiling the boot image, add null class loader. - class_loaders_.insert(nullptr); - } - // class_loaders_ usually will not be empty, but may be empty if we attempt to create an image - // with no classes. - if (class_loaders_.size() == 1u) { - // Only write the class table if we have exactly one class loader. There may be cases where - // there are multiple class loaders if a class path is passed to dex2oat. - ReaderMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_); - for (mirror::ClassLoader* loader : class_loaders_) { - ClassTable* table = class_linker->ClassTableForClassLoader(loader); - CHECK(table != nullptr); - class_table_bytes_ += table->WriteToMemory(nullptr); - } - } - */ - // Note that image_info.image_end_ is left at end of used mirror object section. } -void ImageWriter::CreateHeader(size_t oat_loaded_size, size_t oat_data_offset) { - CHECK_NE(0U, oat_loaded_size); - const char* oat_filename = oat_file_->GetLocation().c_str(); - ImageInfo& image_info = GetImageInfo(oat_filename); - const uint8_t* oat_file_begin = GetOatFileBegin(oat_filename); - const uint8_t* oat_file_end = oat_file_begin + oat_loaded_size; - image_info.oat_data_begin_ = const_cast<uint8_t*>(oat_file_begin) + oat_data_offset; - const uint8_t* oat_data_end = image_info.oat_data_begin_ + oat_file_->Size(); - image_info.oat_size_ = oat_file_->Size(); - - // Create the image sections. - ImageSection sections[ImageHeader::kSectionCount]; +size_t ImageWriter::ImageInfo::CreateImageSections(size_t target_ptr_size, + ImageSection* out_sections) const { + DCHECK(out_sections != nullptr); // Objects section - auto* objects_section = §ions[ImageHeader::kSectionObjects]; - *objects_section = ImageSection(0u, image_info.image_end_); + auto* objects_section = &out_sections[ImageHeader::kSectionObjects]; + *objects_section = ImageSection(0u, image_end_); size_t cur_pos = objects_section->End(); // Add field section. - auto* field_section = §ions[ImageHeader::kSectionArtFields]; - *field_section = ImageSection(cur_pos, image_info.bin_slot_sizes_[kBinArtField]); - CHECK_EQ(image_info.bin_slot_offsets_[kBinArtField], field_section->Offset()); + auto* field_section = &out_sections[ImageHeader::kSectionArtFields]; + *field_section = ImageSection(cur_pos, bin_slot_sizes_[kBinArtField]); + CHECK_EQ(bin_slot_offsets_[kBinArtField], field_section->Offset()); cur_pos = field_section->End(); // Round up to the alignment the required by the method section. - cur_pos = RoundUp(cur_pos, ArtMethod::Alignment(target_ptr_size_)); + cur_pos = RoundUp(cur_pos, ArtMethod::Alignment(target_ptr_size)); // Add method section. - auto* methods_section = §ions[ImageHeader::kSectionArtMethods]; + auto* methods_section = &out_sections[ImageHeader::kSectionArtMethods]; *methods_section = ImageSection(cur_pos, - image_info.bin_slot_sizes_[kBinArtMethodClean] + - image_info.bin_slot_sizes_[kBinArtMethodDirty]); - CHECK_EQ(image_info.bin_slot_offsets_[kBinArtMethodClean], methods_section->Offset()); + bin_slot_sizes_[kBinArtMethodClean] + + bin_slot_sizes_[kBinArtMethodDirty]); + CHECK_EQ(bin_slot_offsets_[kBinArtMethodClean], methods_section->Offset()); cur_pos = methods_section->End(); // Add dex cache arrays section. - auto* dex_cache_arrays_section = §ions[ImageHeader::kSectionDexCacheArrays]; - *dex_cache_arrays_section = ImageSection(cur_pos, image_info.bin_slot_sizes_[kBinDexCacheArray]); - CHECK_EQ(image_info.bin_slot_offsets_[kBinDexCacheArray], dex_cache_arrays_section->Offset()); + auto* dex_cache_arrays_section = &out_sections[ImageHeader::kSectionDexCacheArrays]; + *dex_cache_arrays_section = ImageSection(cur_pos, bin_slot_sizes_[kBinDexCacheArray]); + CHECK_EQ(bin_slot_offsets_[kBinDexCacheArray], dex_cache_arrays_section->Offset()); cur_pos = dex_cache_arrays_section->End(); // Round up to the alignment the string table expects. See HashSet::WriteToMemory. cur_pos = RoundUp(cur_pos, sizeof(uint64_t)); // Calculate the size of the interned strings. - auto* interned_strings_section = §ions[ImageHeader::kSectionInternedStrings]; + auto* interned_strings_section = &out_sections[ImageHeader::kSectionInternedStrings]; *interned_strings_section = ImageSection(cur_pos, intern_table_bytes_); cur_pos = interned_strings_section->End(); // Round up to the alignment the class table expects. See HashSet::WriteToMemory. cur_pos = RoundUp(cur_pos, sizeof(uint64_t)); // Calculate the size of the class table section. - auto* class_table_section = §ions[ImageHeader::kSectionClassTable]; + auto* class_table_section = &out_sections[ImageHeader::kSectionClassTable]; *class_table_section = ImageSection(cur_pos, class_table_bytes_); cur_pos = class_table_section->End(); // Image end goes right before the start of the image bitmap. - const size_t image_end = static_cast<uint32_t>(cur_pos); + return cur_pos; +} + +void ImageWriter::CreateHeader(size_t oat_loaded_size, size_t oat_data_offset) { + CHECK_NE(0U, oat_loaded_size); + const char* oat_filename = oat_file_->GetLocation().c_str(); + ImageInfo& image_info = GetImageInfo(oat_filename); + const uint8_t* oat_file_begin = GetOatFileBegin(oat_filename); + const uint8_t* oat_file_end = oat_file_begin + oat_loaded_size; + image_info.oat_data_begin_ = const_cast<uint8_t*>(oat_file_begin) + oat_data_offset; + const uint8_t* oat_data_end = image_info.oat_data_begin_ + oat_file_->Size(); + image_info.oat_size_ = oat_file_->Size(); + + // Create the image sections. + ImageSection sections[ImageHeader::kSectionCount]; + const size_t image_end = image_info.CreateImageSections(target_ptr_size_, sections); + // Finally bitmap section. const size_t bitmap_bytes = image_info.image_bitmap_->Size(); auto* bitmap_section = §ions[ImageHeader::kSectionImageBitmap]; - *bitmap_section = ImageSection(RoundUp(cur_pos, kPageSize), RoundUp(bitmap_bytes, kPageSize)); - cur_pos = bitmap_section->End(); + *bitmap_section = ImageSection(RoundUp(image_end, kPageSize), RoundUp(bitmap_bytes, kPageSize)); if (VLOG_IS_ON(compiler)) { LOG(INFO) << "Creating header for " << oat_filename; size_t idx = 0; @@ -1444,7 +1440,7 @@ class FixupRootVisitor : public RootVisitor { void VisitRoots(mirror::Object*** roots, size_t count, const RootInfo& info ATTRIBUTE_UNUSED) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) { for (size_t i = 0; i < count; ++i) { - *roots[i] = ImageAddress(*roots[i]); + *roots[i] = image_writer_->GetImageAddress(*roots[i]); } } @@ -1452,19 +1448,12 @@ class FixupRootVisitor : public RootVisitor { const RootInfo& info ATTRIBUTE_UNUSED) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) { for (size_t i = 0; i < count; ++i) { - roots[i]->Assign(ImageAddress(roots[i]->AsMirrorPtr())); + roots[i]->Assign(image_writer_->GetImageAddress(roots[i]->AsMirrorPtr())); } } private: ImageWriter* const image_writer_; - - mirror::Object* ImageAddress(mirror::Object* obj) SHARED_REQUIRES(Locks::mutator_lock_) { - const size_t offset = image_writer_->GetImageOffset(obj); - auto* const dest = reinterpret_cast<Object*>(image_writer_->global_image_begin_ + offset); - VLOG(compiler) << "Update root from " << obj << " to " << dest; - return dest; - } }; void ImageWriter::CopyAndFixupNativeData() { @@ -1536,54 +1525,48 @@ void ImageWriter::CopyAndFixupNativeData() { } FixupRootVisitor root_visitor(this); - /* TODO: Reenable the intern table and class table // Write the intern table into the image. - const ImageSection& intern_table_section = image_header->GetImageSection( - ImageHeader::kSectionInternedStrings); - Runtime* const runtime = Runtime::Current(); - InternTable* const intern_table = runtime->GetInternTable(); - uint8_t* const intern_table_memory_ptr = - image_info.image_->Begin() + intern_table_section.Offset(); - const size_t intern_table_bytes = intern_table->WriteToMemory(intern_table_memory_ptr); - CHECK_EQ(intern_table_bytes, intern_table_bytes_); - // Fixup the pointers in the newly written intern table to contain image addresses. - InternTable temp_intern_table; - // Note that we require that ReadFromMemory does not make an internal copy of the elements so that - // the VisitRoots() will update the memory directly rather than the copies. - // This also relies on visit roots not doing any verification which could fail after we update - // the roots to be the image addresses. - temp_intern_table.ReadFromMemory(intern_table_memory_ptr); - CHECK_EQ(temp_intern_table.Size(), intern_table->Size()); - temp_intern_table.VisitRoots(&root_visitor, kVisitRootFlagAllRoots); - + if (image_info.intern_table_bytes_ > 0) { + const ImageSection& intern_table_section = image_header->GetImageSection( + ImageHeader::kSectionInternedStrings); + InternTable* const intern_table = image_info.intern_table_.get(); + uint8_t* const intern_table_memory_ptr = + image_info.image_->Begin() + intern_table_section.Offset(); + const size_t intern_table_bytes = intern_table->WriteToMemory(intern_table_memory_ptr); + CHECK_EQ(intern_table_bytes, image_info.intern_table_bytes_); + // Fixup the pointers in the newly written intern table to contain image addresses. + InternTable temp_intern_table; + // Note that we require that ReadFromMemory does not make an internal copy of the elements so that + // the VisitRoots() will update the memory directly rather than the copies. + // This also relies on visit roots not doing any verification which could fail after we update + // the roots to be the image addresses. + temp_intern_table.AddTableFromMemory(intern_table_memory_ptr); + CHECK_EQ(temp_intern_table.Size(), intern_table->Size()); + temp_intern_table.VisitRoots(&root_visitor, kVisitRootFlagAllRoots); + } // Write the class table(s) into the image. class_table_bytes_ may be 0 if there are multiple // class loaders. Writing multiple class tables into the image is currently unsupported. - if (class_table_bytes_ > 0u) { - ClassLinker* const class_linker = Runtime::Current()->GetClassLinker(); + if (image_info.class_table_bytes_ > 0u) { const ImageSection& class_table_section = image_header->GetImageSection( ImageHeader::kSectionClassTable); uint8_t* const class_table_memory_ptr = image_info.image_->Begin() + class_table_section.Offset(); ReaderMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_); - size_t class_table_bytes = 0; - for (mirror::ClassLoader* loader : class_loaders_) { - ClassTable* table = class_linker->ClassTableForClassLoader(loader); - CHECK(table != nullptr); - uint8_t* memory_ptr = class_table_memory_ptr + class_table_bytes; - class_table_bytes += table->WriteToMemory(memory_ptr); - // Fixup the pointers in the newly written class table to contain image addresses. See - // above comment for intern tables. - ClassTable temp_class_table; - temp_class_table.ReadFromMemory(memory_ptr); - CHECK_EQ(temp_class_table.NumZygoteClasses(), table->NumNonZygoteClasses() + - table->NumZygoteClasses()); - BufferedRootVisitor<kDefaultBufferedRootCount> buffered_visitor(&root_visitor, - RootInfo(kRootUnknown)); - temp_class_table.VisitRoots(buffered_visitor); - } - CHECK_EQ(class_table_bytes, class_table_bytes_); + + ClassTable* table = image_info.class_table_.get(); + CHECK(table != nullptr); + const size_t class_table_bytes = table->WriteToMemory(class_table_memory_ptr); + CHECK_EQ(class_table_bytes, image_info.class_table_bytes_); + // Fixup the pointers in the newly written class table to contain image addresses. See + // above comment for intern tables. + ClassTable temp_class_table; + temp_class_table.ReadFromMemory(class_table_memory_ptr); + CHECK_EQ(temp_class_table.NumZygoteClasses(), table->NumNonZygoteClasses() + + table->NumZygoteClasses()); + BufferedRootVisitor<kDefaultBufferedRootCount> buffered_visitor(&root_visitor, + RootInfo(kRootUnknown)); + temp_class_table.VisitRoots(buffered_visitor); } - */ } void ImageWriter::CopyAndFixupObjects() { @@ -1991,7 +1974,7 @@ void ImageWriter::CopyAndFixupMethod(ArtMethod* orig, copy->SetDeclaringClass(GetImageAddress(orig->GetDeclaringClassUnchecked())); const char* oat_filename; - if (orig->IsRuntimeMethod()) { + if (orig->IsRuntimeMethod() || compile_app_image_) { oat_filename = default_oat_filename_; } else { auto it = dex_file_oat_filename_map_.find(orig->GetDexFile()); @@ -2110,7 +2093,6 @@ uint32_t ImageWriter::BinSlot::GetIndex() const { } uint8_t* ImageWriter::GetOatFileBegin(const char* oat_filename) const { - // DCHECK_GT(intern_table_bytes_, 0u); TODO: Reenable intern table and class table. uintptr_t last_image_end = 0; for (const char* oat_fn : oat_filenames_) { const ImageInfo& image_info = GetConstImageInfo(oat_fn); @@ -2197,4 +2179,37 @@ void ImageWriter::UpdateOatFile(const char* oat_filename) { } } +ImageWriter::ImageWriter( + const CompilerDriver& compiler_driver, + uintptr_t image_begin, + bool compile_pic, + bool compile_app_image, + ImageHeader::StorageMode image_storage_mode, + const std::vector<const char*> oat_filenames, + const std::unordered_map<const DexFile*, const char*>& dex_file_oat_filename_map) + : compiler_driver_(compiler_driver), + global_image_begin_(reinterpret_cast<uint8_t*>(image_begin)), + image_objects_offset_begin_(0), + oat_file_(nullptr), + compile_pic_(compile_pic), + compile_app_image_(compile_app_image), + target_ptr_size_(InstructionSetPointerSize(compiler_driver_.GetInstructionSet())), + image_method_array_(ImageHeader::kImageMethodsCount), + dirty_methods_(0u), + clean_methods_(0u), + image_storage_mode_(image_storage_mode), + dex_file_oat_filename_map_(dex_file_oat_filename_map), + oat_filenames_(oat_filenames), + default_oat_filename_(oat_filenames[0]) { + CHECK_NE(image_begin, 0U); + for (const char* oat_filename : oat_filenames) { + image_info_map_.emplace(oat_filename, ImageInfo()); + } + std::fill_n(image_methods_, arraysize(image_methods_), nullptr); +} + +ImageWriter::ImageInfo::ImageInfo() + : intern_table_(new InternTable), + class_table_(new ClassTable) {} + } // namespace art diff --git a/compiler/image_writer.h b/compiler/image_writer.h index 78297ae645..ad690389e9 100644 --- a/compiler/image_writer.h +++ b/compiler/image_writer.h @@ -47,6 +47,8 @@ class ImageSpace; } // namespace space } // namespace gc +class ClassTable; + static constexpr int kInvalidImageFd = -1; // Write a Space built during compilation for use during execution. @@ -58,33 +60,7 @@ class ImageWriter FINAL { bool compile_app_image, ImageHeader::StorageMode image_storage_mode, const std::vector<const char*> oat_filenames, - const std::unordered_map<const DexFile*, const char*>& dex_file_oat_filename_map) - : compiler_driver_(compiler_driver), - global_image_begin_(reinterpret_cast<uint8_t*>(image_begin)), - image_objects_offset_begin_(0), - oat_file_(nullptr), - compile_pic_(compile_pic), - compile_app_image_(compile_app_image), - boot_image_space_(nullptr), - target_ptr_size_(InstructionSetPointerSize(compiler_driver_.GetInstructionSet())), - intern_table_bytes_(0u), - image_method_array_(ImageHeader::kImageMethodsCount), - dirty_methods_(0u), - clean_methods_(0u), - class_table_bytes_(0u), - image_storage_mode_(image_storage_mode), - dex_file_oat_filename_map_(dex_file_oat_filename_map), - oat_filenames_(oat_filenames), - default_oat_filename_(oat_filenames[0]) { - CHECK_NE(image_begin, 0U); - for (const char* oat_filename : oat_filenames) { - image_info_map_.emplace(oat_filename, ImageInfo()); - } - std::fill_n(image_methods_, arraysize(image_methods_), nullptr); - } - - ~ImageWriter() { - } + const std::unordered_map<const DexFile*, const char*>& dex_file_oat_filename_map); bool PrepareImageAddressSpace(); @@ -237,41 +213,40 @@ class ImageWriter FINAL { }; struct ImageInfo { - explicit ImageInfo() - : image_begin_(nullptr), - image_end_(RoundUp(sizeof(ImageHeader), kObjectAlignment)), - image_roots_address_(0), - image_offset_(0), - image_size_(0), - oat_offset_(0), - bin_slot_sizes_(), - bin_slot_offsets_(), - bin_slot_count_() {} + ImageInfo(); + ImageInfo(ImageInfo&&) = default; + + // Create the image sections into the out sections variable, returns the size of the image + // excluding the bitmap. + size_t CreateImageSections(size_t target_ptr_size, ImageSection* out_sections) const; std::unique_ptr<MemMap> image_; // Memory mapped for generating the image. // Target begin of this image. Notes: It is not valid to write here, this is the address // of the target image, not necessarily where image_ is mapped. The address is only valid // after layouting (otherwise null). - uint8_t* image_begin_; + uint8_t* image_begin_ = nullptr; - size_t image_end_; // Offset to the free space in image_, initially size of image header. - uint32_t image_roots_address_; // The image roots address in the image. - size_t image_offset_; // Offset of this image from the start of the first image. + // Offset to the free space in image_, initially size of image header. + size_t image_end_ = RoundUp(sizeof(ImageHeader), kObjectAlignment); + uint32_t image_roots_address_ = 0; // The image roots address in the image. + size_t image_offset_ = 0; // Offset of this image from the start of the first image. // Image size is the *address space* covered by this image. As the live bitmap is aligned // to the page size, the live bitmap will cover more address space than necessary. But live // bitmaps may not overlap, so an image has a "shadow," which is accounted for in the size. // The next image may only start at image_begin_ + image_size_ (which is guaranteed to be // page-aligned). - size_t image_size_; + size_t image_size_ = 0; // Oat data. - size_t oat_offset_; // Offset of the oat file for this image from start of oat files. This is - // valid when the previous oat file has been written. - uint8_t* oat_data_begin_; // Start of oatdata in the corresponding oat file. This is - // valid when the images have been layed out. - size_t oat_size_; // Size of the corresponding oat data. + // Offset of the oat file for this image from start of oat files. This is + // valid when the previous oat file has been written. + size_t oat_offset_ = 0; + // Start of oatdata in the corresponding oat file. This is + // valid when the images have been layed out. + uint8_t* oat_data_begin_ = nullptr; + size_t oat_size_ = 0; // Size of the corresponding oat data. // Image bitmap which lets us know where the objects inside of the image reside. std::unique_ptr<gc::accounting::ContinuousSpaceBitmap> image_bitmap_; @@ -280,12 +255,24 @@ class ImageWriter FINAL { SafeMap<const DexFile*, size_t> dex_cache_array_starts_; // Offset from oat_data_begin_ to the stubs. - uint32_t oat_address_offsets_[kOatAddressCount]; + uint32_t oat_address_offsets_[kOatAddressCount] = {}; // Bin slot tracking for dirty object packing. - size_t bin_slot_sizes_[kBinSize]; // Number of bytes in a bin. - size_t bin_slot_offsets_[kBinSize]; // Number of bytes in previous bins. - size_t bin_slot_count_[kBinSize]; // Number of objects in a bin. + size_t bin_slot_sizes_[kBinSize] = {}; // Number of bytes in a bin. + size_t bin_slot_offsets_[kBinSize] = {}; // Number of bytes in previous bins. + size_t bin_slot_count_[kBinSize] = {}; // Number of objects in a bin. + + // Cached size of the intern table for when we allocate memory. + size_t intern_table_bytes_ = 0; + + // Number of image class table bytes. + size_t class_table_bytes_ = 0; + + // Intern table associated with this image for serialization. + std::unique_ptr<InternTable> intern_table_; + + // Class table associated with this image for serialization. + std::unique_ptr<ClassTable> class_table_; }; // We use the lock word to store the offset of the object in the image. @@ -483,18 +470,12 @@ class ImageWriter FINAL { const bool compile_pic_; const bool compile_app_image_; - // Cache the boot image space in this class for faster lookups. - gc::space::ImageSpace* boot_image_space_; - // Size of pointers on the target architecture. size_t target_ptr_size_; // Mapping of oat filename to image data. std::unordered_map<std::string, ImageInfo> image_info_map_; - // Cached size of the intern table for when we allocate memory. - size_t intern_table_bytes_; - // ArtField, ArtMethod relocating map. These are allocated as array of structs but we want to // have one entry per art field for convenience. ArtFields are placed right after the end of the // image objects (aka sum of bin_slot_sizes_). ArtMethods are placed right after the ArtFields. @@ -528,9 +509,6 @@ class ImageWriter FINAL { // null is a valid entry. std::unordered_set<mirror::ClassLoader*> class_loaders_; - // Number of image class table bytes. - size_t class_table_bytes_; - // Which mode the image is stored as, see image.h const ImageHeader::StorageMode image_storage_mode_; diff --git a/compiler/jit/jit_compiler.cc b/compiler/jit/jit_compiler.cc index b323d24038..bc51ed6e6a 100644 --- a/compiler/jit/jit_compiler.cc +++ b/compiler/jit/jit_compiler.cc @@ -22,6 +22,7 @@ #include "base/stringpiece.h" #include "base/time_utils.h" #include "base/timing_logger.h" +#include "base/unix_file/fd_file.h" #include "compiler_callbacks.h" #include "dex/pass_manager.h" #include "dex/quick_compiler_callbacks.h" @@ -42,11 +43,12 @@ JitCompiler* JitCompiler::Create() { return new JitCompiler(); } -extern "C" void* jit_load(CompilerCallbacks** callbacks) { +extern "C" void* jit_load(CompilerCallbacks** callbacks, bool* generate_debug_info) { VLOG(jit) << "loading jit compiler"; auto* const jit_compiler = JitCompiler::Create(); CHECK(jit_compiler != nullptr); *callbacks = jit_compiler->GetCompilerCallbacks(); + *generate_debug_info = jit_compiler->GetCompilerOptions()->GetGenerateDebugInfo(); VLOG(jit) << "Done loading jit compiler"; return jit_compiler; } @@ -155,14 +157,33 @@ JitCompiler::JitCompiler() : total_time_(0) { /* dump_cfg_append */ false, cumulative_logger_.get(), /* swap_fd */ -1, - /* profile_file */ "", - /* dex to oat map */ nullptr)); + /* dex to oat map */ nullptr, + /* profile_compilation_info */ nullptr)); // Disable dedupe so we can remove compiled methods. compiler_driver_->SetDedupeEnabled(false); compiler_driver_->SetSupportBootImageFixup(false); + + if (compiler_options_->GetGenerateDebugInfo()) { +#ifdef __ANDROID__ + const char* prefix = GetAndroidData(); +#else + const char* prefix = "/tmp"; +#endif + DCHECK_EQ(compiler_driver_->GetThreadCount(), 1u) + << "Generating debug info only works with one compiler thread"; + std::string perf_filename = std::string(prefix) + "/perf-" + std::to_string(getpid()) + ".map"; + perf_file_.reset(OS::CreateEmptyFileWriteOnly(perf_filename.c_str())); + if (perf_file_ == nullptr) { + LOG(FATAL) << "Could not create perf file at " << perf_filename; + } + } } JitCompiler::~JitCompiler() { + if (perf_file_ != nullptr) { + UNUSED(perf_file_->Flush()); + UNUSED(perf_file_->Close()); + } } bool JitCompiler::CompileMethod(Thread* self, ArtMethod* method) { @@ -188,6 +209,20 @@ bool JitCompiler::CompileMethod(Thread* self, ArtMethod* method) { ArtMethod* method_to_compile = method->GetInterfaceMethodIfProxy(sizeof(void*)); JitCodeCache* const code_cache = runtime->GetJit()->GetCodeCache(); success = compiler_driver_->GetCompiler()->JitCompile(self, code_cache, method_to_compile); + if (success && compiler_options_->GetGenerateDebugInfo()) { + const void* ptr = method_to_compile->GetEntryPointFromQuickCompiledCode(); + std::ostringstream stream; + stream << std::hex + << reinterpret_cast<uintptr_t>(ptr) + << " " + << code_cache->GetMemorySizeOfCodePointer(ptr) + << " " + << PrettyMethod(method_to_compile) + << std::endl; + std::string str = stream.str(); + bool res = perf_file_->WriteFully(str.c_str(), str.size()); + CHECK(res); + } } // Trim maps to reduce memory usage. diff --git a/compiler/jit/jit_compiler.h b/compiler/jit/jit_compiler.h index 913a6d00ae..037a18ac7a 100644 --- a/compiler/jit/jit_compiler.h +++ b/compiler/jit/jit_compiler.h @@ -43,6 +43,9 @@ class JitCompiler { size_t GetTotalCompileTime() const { return total_time_; } + CompilerOptions* GetCompilerOptions() const { + return compiler_options_.get(); + } private: uint64_t total_time_; @@ -53,6 +56,7 @@ class JitCompiler { std::unique_ptr<CompilerCallbacks> callbacks_; std::unique_ptr<CompilerDriver> compiler_driver_; std::unique_ptr<const InstructionSetFeatures> instruction_set_features_; + std::unique_ptr<File> perf_file_; JitCompiler(); diff --git a/compiler/linker/relative_patcher_test.h b/compiler/linker/relative_patcher_test.h index 877a674674..b10cc3534c 100644 --- a/compiler/linker/relative_patcher_test.h +++ b/compiler/linker/relative_patcher_test.h @@ -47,7 +47,7 @@ class RelativePatcherTest : public testing::Test { driver_(&compiler_options_, &verification_results_, &inliner_map_, Compiler::kQuick, instruction_set, nullptr, false, nullptr, nullptr, nullptr, 1u, - false, false, "", false, nullptr, -1, "", nullptr), + false, false, "", false, nullptr, -1, nullptr, nullptr), error_msg_(), instruction_set_(instruction_set), features_(InstructionSetFeatures::FromVariant(instruction_set, variant, &error_msg_)), diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc index 58f46d69a2..9f7ffa5ace 100644 --- a/compiler/oat_test.cc +++ b/compiler/oat_test.cc @@ -121,7 +121,7 @@ class OatTest : public CommonCompilerTest { false, timer_.get(), -1, - "", + nullptr, nullptr)); } diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc index da2f9cbed5..eee6116098 100644 --- a/compiler/optimizing/bounds_check_elimination.cc +++ b/compiler/optimizing/bounds_check_elimination.cc @@ -1142,7 +1142,7 @@ class BCEVisitor : public HGraphVisitor { loop->IsDefinedOutOfTheLoop(array_get->InputAt(1))) { SideEffects loop_effects = side_effects_.GetLoopEffects(loop->GetHeader()); if (!array_get->GetSideEffects().MayDependOn(loop_effects)) { - HoistToPreheaderOrDeoptBlock(loop, array_get); + HoistToPreHeaderOrDeoptBlock(loop, array_get); } } } @@ -1280,7 +1280,8 @@ class BCEVisitor : public HGraphVisitor { // as runtime test. By restricting dynamic bce to unit strides (with a maximum of 32-bit // iterations) and by not combining access (e.g. a[i], a[i-3], a[i+5] etc.), these tests // correctly guard against any possible OOB (including arithmetic wrap-around cases). - HBasicBlock* block = TransformLoopForDeoptimizationIfNeeded(loop, needs_taken_test); + TransformLoopForDeoptimizationIfNeeded(loop, needs_taken_test); + HBasicBlock* block = GetPreHeader(loop, instruction); induction_range_.GenerateRangeCode(instruction, index, GetGraph(), block, &lower, &upper); if (lower != nullptr) { InsertDeopt(loop, block, new (GetGraph()->GetArena()) HAbove(lower, upper)); @@ -1358,7 +1359,7 @@ class BCEVisitor : public HGraphVisitor { return true; } else if (length->IsArrayLength() && length->GetBlock()->GetLoopInformation() == loop) { if (CanHandleNullCheck(loop, length->InputAt(0), needs_taken_test)) { - HoistToPreheaderOrDeoptBlock(loop, length); + HoistToPreHeaderOrDeoptBlock(loop, length); return true; } } @@ -1376,7 +1377,8 @@ class BCEVisitor : public HGraphVisitor { HInstruction* array = check->InputAt(0); if (loop->IsDefinedOutOfTheLoop(array)) { // Generate: if (array == null) deoptimize; - HBasicBlock* block = TransformLoopForDeoptimizationIfNeeded(loop, needs_taken_test); + TransformLoopForDeoptimizationIfNeeded(loop, needs_taken_test); + HBasicBlock* block = GetPreHeader(loop, check); HInstruction* cond = new (GetGraph()->GetArena()) HEqual(array, GetGraph()->GetNullConstant()); InsertDeopt(loop, block, cond); @@ -1423,6 +1425,28 @@ class BCEVisitor : public HGraphVisitor { return true; } + /** + * Returns appropriate preheader for the loop, depending on whether the + * instruction appears in the loop header or proper loop-body. + */ + HBasicBlock* GetPreHeader(HLoopInformation* loop, HInstruction* instruction) { + // Use preheader unless there is an earlier generated deoptimization block since + // hoisted expressions may depend on and/or used by the deoptimization tests. + HBasicBlock* header = loop->GetHeader(); + const uint32_t loop_id = header->GetBlockId(); + auto it = taken_test_loop_.find(loop_id); + if (it != taken_test_loop_.end()) { + HBasicBlock* block = it->second; + // If always taken, keep it that way by returning the original preheader, + // which can be found by following the predecessor of the true-block twice. + if (instruction->GetBlock() == header) { + return block->GetSinglePredecessor()->GetSinglePredecessor(); + } + return block; + } + return loop->GetPreHeader(); + } + /** Inserts a deoptimization test. */ void InsertDeopt(HLoopInformation* loop, HBasicBlock* block, HInstruction* condition) { HInstruction* suspend = loop->GetSuspendCheck(); @@ -1437,28 +1461,17 @@ class BCEVisitor : public HGraphVisitor { } /** Hoists instruction out of the loop to preheader or deoptimization block. */ - void HoistToPreheaderOrDeoptBlock(HLoopInformation* loop, HInstruction* instruction) { - // Use preheader unless there is an earlier generated deoptimization block since - // hoisted expressions may depend on and/or used by the deoptimization tests. - const uint32_t loop_id = loop->GetHeader()->GetBlockId(); - HBasicBlock* preheader = loop->GetPreHeader(); - HBasicBlock* block = preheader; - auto it = taken_test_loop_.find(loop_id); - if (it != taken_test_loop_.end()) { - block = it->second; - } - // Hoist the instruction. + void HoistToPreHeaderOrDeoptBlock(HLoopInformation* loop, HInstruction* instruction) { + HBasicBlock* block = GetPreHeader(loop, instruction); DCHECK(!instruction->HasEnvironment()); instruction->MoveBefore(block->GetLastInstruction()); } /** - * Adds a new taken-test structure to a loop if needed (and not already done). + * Adds a new taken-test structure to a loop if needed and not already done. * The taken-test protects range analysis evaluation code to avoid any * deoptimization caused by incorrect trip-count evaluation in non-taken loops. * - * Returns block in which deoptimizations/invariants can be put. - * * old_preheader * | * if_block <- taken-test protects deoptimization block @@ -1490,16 +1503,11 @@ class BCEVisitor : public HGraphVisitor { * array[i] = 0; * } */ - HBasicBlock* TransformLoopForDeoptimizationIfNeeded(HLoopInformation* loop, bool needs_taken_test) { - // Not needed (can use preheader), or already done (can reuse)? + void TransformLoopForDeoptimizationIfNeeded(HLoopInformation* loop, bool needs_taken_test) { + // Not needed (can use preheader) or already done (can reuse)? const uint32_t loop_id = loop->GetHeader()->GetBlockId(); - if (!needs_taken_test) { - return loop->GetPreHeader(); - } else { - auto it = taken_test_loop_.find(loop_id); - if (it != taken_test_loop_.end()) { - return it->second; - } + if (!needs_taken_test || taken_test_loop_.find(loop_id) != taken_test_loop_.end()) { + return; } // Generate top test structure. @@ -1528,7 +1536,6 @@ class BCEVisitor : public HGraphVisitor { if_block->AddInstruction(new (GetGraph()->GetArena()) HIf(condition)); taken_test_loop_.Put(loop_id, true_block); - return true_block; } /** @@ -1543,7 +1550,7 @@ class BCEVisitor : public HGraphVisitor { * \ / * x_1 = phi(x_0, null) <- synthetic phi * | - * header + * new_preheader */ void InsertPhiNodes() { // Scan all new deoptimization blocks. diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index 53d3615a41..ea0b9eca9a 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -997,6 +997,12 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction, stack_map_stream_.EndStackMapEntry(); } +bool CodeGenerator::HasStackMapAtCurrentPc() { + uint32_t pc = GetAssembler()->CodeSize(); + size_t count = stack_map_stream_.GetNumberOfStackMaps(); + return count > 0 && stack_map_stream_.GetStackMap(count - 1).native_pc_offset == pc; +} + void CodeGenerator::RecordCatchBlockInfo() { ArenaAllocator* arena = graph_->GetArena(); @@ -1320,12 +1326,6 @@ void CodeGenerator::ValidateInvokeRuntime(HInstruction* instruction, SlowPathCod << "instruction->DebugName()=" << instruction->DebugName() << " slow_path->GetDescription()=" << slow_path->GetDescription(); DCHECK(instruction->GetSideEffects().Includes(SideEffects::CanTriggerGC()) || - // Control flow would not come back into the code if a fatal slow - // path is taken, so we do not care if it triggers GC. - slow_path->IsFatal() || - // HDeoptimize is a special case: we know we are not coming back from - // it into the code. - instruction->IsDeoptimize() || // When read barriers are enabled, some instructions use a // slow path to emit a read barrier, which does not trigger // GC, is not fatal, nor is emitted by HDeoptimize diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index eade05d7b6..5958cd89bc 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -269,6 +269,8 @@ class CodeGenerator { // Record native to dex mapping for a suspend point. Required by runtime. void RecordPcInfo(HInstruction* instruction, uint32_t dex_pc, SlowPathCode* slow_path = nullptr); + // Check whether we have already recorded mapping at this PC. + bool HasStackMapAtCurrentPc(); bool CanMoveNullCheckToUser(HNullCheck* null_check); void MaybeRecordImplicitNullCheck(HInstruction* instruction); @@ -611,7 +613,7 @@ class CodeGenerator { ArenaVector<SlowPathCode*> slow_paths_; - // The current slow path that we're generating code for. + // The current slow-path that we're generating code for. SlowPathCode* current_slow_path_; // The current block index in `block_order_` of the block @@ -672,6 +674,122 @@ class CallingConvention { DISALLOW_COPY_AND_ASSIGN(CallingConvention); }; +/** + * A templated class SlowPathGenerator with a templated method NewSlowPath() + * that can be used by any code generator to share equivalent slow-paths with + * the objective of reducing generated code size. + * + * InstructionType: instruction that requires SlowPathCodeType + * SlowPathCodeType: subclass of SlowPathCode, with constructor SlowPathCodeType(InstructionType *) + */ +template <typename InstructionType> +class SlowPathGenerator { + static_assert(std::is_base_of<HInstruction, InstructionType>::value, + "InstructionType is not a subclass of art::HInstruction"); + + public: + SlowPathGenerator(HGraph* graph, CodeGenerator* codegen) + : graph_(graph), + codegen_(codegen), + slow_path_map_(std::less<uint32_t>(), graph->GetArena()->Adapter(kArenaAllocSlowPaths)) {} + + // Creates and adds a new slow-path, if needed, or returns existing one otherwise. + // Templating the method (rather than the whole class) on the slow-path type enables + // keeping this code at a generic, non architecture-specific place. + // + // NOTE: This approach assumes each InstructionType only generates one SlowPathCodeType. + // To relax this requirement, we would need some RTTI on the stored slow-paths, + // or template the class as a whole on SlowPathType. + template <typename SlowPathCodeType> + SlowPathCodeType* NewSlowPath(InstructionType* instruction) { + static_assert(std::is_base_of<SlowPathCode, SlowPathCodeType>::value, + "SlowPathCodeType is not a subclass of art::SlowPathCode"); + static_assert(std::is_constructible<SlowPathCodeType, InstructionType*>::value, + "SlowPathCodeType is not constructible from InstructionType*"); + // Iterate over potential candidates for sharing. Currently, only same-typed + // slow-paths with exactly the same dex-pc are viable candidates. + // TODO: pass dex-pc/slow-path-type to run-time to allow even more sharing? + const uint32_t dex_pc = instruction->GetDexPc(); + auto iter = slow_path_map_.find(dex_pc); + if (iter != slow_path_map_.end()) { + auto candidates = iter->second; + for (const auto& it : candidates) { + InstructionType* other_instruction = it.first; + SlowPathCodeType* other_slow_path = down_cast<SlowPathCodeType*>(it.second); + // Determine if the instructions allow for slow-path sharing. + if (HaveSameLiveRegisters(instruction, other_instruction) && + HaveSameStackMap(instruction, other_instruction)) { + // Can share: reuse existing one. + return other_slow_path; + } + } + } else { + // First time this dex-pc is seen. + iter = slow_path_map_.Put(dex_pc, {{}, {graph_->GetArena()->Adapter(kArenaAllocSlowPaths)}}); + } + // Cannot share: create and add new slow-path for this particular dex-pc. + SlowPathCodeType* slow_path = new (graph_->GetArena()) SlowPathCodeType(instruction); + iter->second.emplace_back(std::make_pair(instruction, slow_path)); + codegen_->AddSlowPath(slow_path); + return slow_path; + } + + private: + // Tests if both instructions have same set of live physical registers. This ensures + // the slow-path has exactly the same preamble on saving these registers to stack. + bool HaveSameLiveRegisters(const InstructionType* i1, const InstructionType* i2) const { + const uint32_t core_spill = ~codegen_->GetCoreSpillMask(); + const uint32_t fpu_spill = ~codegen_->GetFpuSpillMask(); + RegisterSet* live1 = i1->GetLocations()->GetLiveRegisters(); + RegisterSet* live2 = i2->GetLocations()->GetLiveRegisters(); + return (((live1->GetCoreRegisters() & core_spill) == + (live2->GetCoreRegisters() & core_spill)) && + ((live1->GetFloatingPointRegisters() & fpu_spill) == + (live2->GetFloatingPointRegisters() & fpu_spill))); + } + + // Tests if both instructions have the same stack map. This ensures the interpreter + // will find exactly the same dex-registers at the same entries. + bool HaveSameStackMap(const InstructionType* i1, const InstructionType* i2) const { + DCHECK(i1->HasEnvironment()); + DCHECK(i2->HasEnvironment()); + // We conservatively test if the two instructions find exactly the same instructions + // and location in each dex-register. This guarantees they will have the same stack map. + HEnvironment* e1 = i1->GetEnvironment(); + HEnvironment* e2 = i2->GetEnvironment(); + if (e1->GetParent() != e2->GetParent() || e1->Size() != e2->Size()) { + return false; + } + for (size_t i = 0, sz = e1->Size(); i < sz; ++i) { + if (e1->GetInstructionAt(i) != e2->GetInstructionAt(i) || + !e1->GetLocationAt(i).Equals(e2->GetLocationAt(i))) { + return false; + } + } + return true; + } + + HGraph* const graph_; + CodeGenerator* const codegen_; + + // Map from dex-pc to vector of already existing instruction/slow-path pairs. + ArenaSafeMap<uint32_t, ArenaVector<std::pair<InstructionType*, SlowPathCode*>>> slow_path_map_; + + DISALLOW_COPY_AND_ASSIGN(SlowPathGenerator); +}; + +class InstructionCodeGenerator : public HGraphVisitor { + public: + InstructionCodeGenerator(HGraph* graph, CodeGenerator* codegen) + : HGraphVisitor(graph), + deopt_slow_paths_(graph, codegen) {} + + protected: + // Add slow-path generator for each instruction/slow-path combination that desires sharing. + // TODO: under current regime, only deopt sharing make sense; extend later. + SlowPathGenerator<HDeoptimize> deopt_slow_paths_; +}; + } // namespace art #endif // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_ diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index 9a1f2b8717..d64b8784e1 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -350,24 +350,24 @@ class TypeCheckSlowPathARM : public SlowPathCode { class DeoptimizationSlowPathARM : public SlowPathCode { public: - explicit DeoptimizationSlowPathARM(HInstruction* instruction) + explicit DeoptimizationSlowPathARM(HDeoptimize* instruction) : instruction_(instruction) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, instruction_->GetLocations()); - DCHECK(instruction_->IsDeoptimize()); - HDeoptimize* deoptimize = instruction_->AsDeoptimize(); - uint32_t dex_pc = deoptimize->GetDexPc(); - CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen); - arm_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize), instruction_, dex_pc, this); + arm_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize), + instruction_, + instruction_->GetDexPc(), + this); CheckEntrypointTypes<kQuickDeoptimize, void, void>(); } const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathARM"; } private: - HInstruction* const instruction_; + HDeoptimize* const instruction_; DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathARM); }; @@ -417,6 +417,56 @@ class ArraySetSlowPathARM : public SlowPathCode { DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathARM); }; +// Slow path marking an object during a read barrier. +class ReadBarrierMarkSlowPathARM : public SlowPathCode { + public: + ReadBarrierMarkSlowPathARM(HInstruction* instruction, Location out, Location obj) + : instruction_(instruction), out_(out), obj_(obj) { + DCHECK(kEmitCompilerReadBarrier); + } + + const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathARM"; } + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + LocationSummary* locations = instruction_->GetLocations(); + Register reg_out = out_.AsRegister<Register>(); + DCHECK(locations->CanCall()); + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out)); + DCHECK(instruction_->IsInstanceFieldGet() || + instruction_->IsStaticFieldGet() || + instruction_->IsArrayGet() || + instruction_->IsLoadClass() || + instruction_->IsLoadString() || + instruction_->IsInstanceOf() || + instruction_->IsCheckCast()) + << "Unexpected instruction in read barrier marking slow path: " + << instruction_->DebugName(); + + __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, locations); + + InvokeRuntimeCallingConvention calling_convention; + CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen); + arm_codegen->Move32(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), obj_); + arm_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierMark), + instruction_, + instruction_->GetDexPc(), + this); + CheckEntrypointTypes<kQuickReadBarrierMark, mirror::Object*, mirror::Object*>(); + arm_codegen->Move32(out_, Location::RegisterLocation(R0)); + + RestoreLiveRegisters(codegen, locations); + __ b(GetExitLabel()); + } + + private: + HInstruction* const instruction_; + const Location out_; + const Location obj_; + + DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathARM); +}; + // Slow path generating a read barrier for a heap reference. class ReadBarrierForHeapReferenceSlowPathARM : public SlowPathCode { public: @@ -438,7 +488,7 @@ class ReadBarrierForHeapReferenceSlowPathARM : public SlowPathCode { // to be instrumented, e.g.: // // __ LoadFromOffset(kLoadWord, out, out, offset); - // codegen_->GenerateReadBarrier(instruction, out_loc, out_loc, out_loc, offset); + // codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset); // // In that case, we have lost the information about the original // object, and the emitted read barrier cannot work properly. @@ -454,7 +504,9 @@ class ReadBarrierForHeapReferenceSlowPathARM : public SlowPathCode { DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out)); DCHECK(!instruction_->IsInvoke() || (instruction_->IsInvokeStaticOrDirect() && - instruction_->GetLocations()->Intrinsified())); + instruction_->GetLocations()->Intrinsified())) + << "Unexpected instruction in read barrier for heap reference slow path: " + << instruction_->DebugName(); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, locations); @@ -596,14 +648,18 @@ class ReadBarrierForHeapReferenceSlowPathARM : public SlowPathCode { class ReadBarrierForRootSlowPathARM : public SlowPathCode { public: ReadBarrierForRootSlowPathARM(HInstruction* instruction, Location out, Location root) - : instruction_(instruction), out_(out), root_(root) {} + : instruction_(instruction), out_(out), root_(root) { + DCHECK(kEmitCompilerReadBarrier); + } void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { LocationSummary* locations = instruction_->GetLocations(); Register reg_out = out_.AsRegister<Register>(); DCHECK(locations->CanCall()); DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out)); - DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString()); + DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString()) + << "Unexpected instruction in read barrier for GC root slow path: " + << instruction_->DebugName(); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, locations); @@ -857,7 +913,7 @@ void CodeGeneratorARM::UpdateBlockedPairRegisters() const { } InstructionCodeGeneratorARM::InstructionCodeGeneratorARM(HGraph* graph, CodeGeneratorARM* codegen) - : HGraphVisitor(graph), + : InstructionCodeGenerator(graph, codegen), assembler_(codegen->GetAssembler()), codegen_(codegen) {} @@ -1358,17 +1414,6 @@ void LocationsBuilderARM::VisitExit(HExit* exit) { void InstructionCodeGeneratorARM::VisitExit(HExit* exit ATTRIBUTE_UNUSED) { } -void InstructionCodeGeneratorARM::GenerateCompareWithImmediate(Register left, int32_t right) { - ShifterOperand operand; - if (GetAssembler()->ShifterOperandCanHold(R0, left, CMP, right, &operand)) { - __ cmp(left, operand); - } else { - Register temp = IP; - __ LoadImmediate(temp, right); - __ cmp(left, ShifterOperand(temp)); - } -} - void InstructionCodeGeneratorARM::GenerateFPJumps(HCondition* cond, Label* true_label, Label* false_label) { @@ -1434,7 +1479,7 @@ void InstructionCodeGeneratorARM::GenerateLongComparesAndJumps(HCondition* cond, int32_t val_low = Low32Bits(value); int32_t val_high = High32Bits(value); - GenerateCompareWithImmediate(left_high, val_high); + __ CmpConstant(left_high, val_high); if (if_cond == kCondNE) { __ b(true_label, ARMCondition(true_high_cond)); } else if (if_cond == kCondEQ) { @@ -1444,7 +1489,7 @@ void InstructionCodeGeneratorARM::GenerateLongComparesAndJumps(HCondition* cond, __ b(false_label, ARMCondition(false_high_cond)); } // Must be equal high, so compare the lows. - GenerateCompareWithImmediate(left_low, val_low); + __ CmpConstant(left_low, val_low); } else { Register right_high = right.AsRegisterPairHigh<Register>(); Register right_low = right.AsRegisterPairLow<Register>(); @@ -1568,7 +1613,7 @@ void InstructionCodeGeneratorARM::GenerateTestAndBranch(HInstruction* instructio __ cmp(left, ShifterOperand(right.AsRegister<Register>())); } else { DCHECK(right.IsConstant()); - GenerateCompareWithImmediate(left, CodeGenerator::GetInt32ValueOf(right.GetConstant())); + __ CmpConstant(left, CodeGenerator::GetInt32ValueOf(right.GetConstant())); } if (true_target == nullptr) { __ b(false_target, ARMCondition(condition->GetOppositeCondition())); @@ -1610,8 +1655,7 @@ void LocationsBuilderARM::VisitDeoptimize(HDeoptimize* deoptimize) { } void InstructionCodeGeneratorARM::VisitDeoptimize(HDeoptimize* deoptimize) { - SlowPathCode* slow_path = new (GetGraph()->GetArena()) DeoptimizationSlowPathARM(deoptimize); - codegen_->AddSlowPath(slow_path); + SlowPathCode* slow_path = deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathARM>(deoptimize); GenerateTestAndBranch(deoptimize, /* condition_input_index */ 0, slow_path->GetEntryLabel(), @@ -1623,6 +1667,10 @@ void LocationsBuilderARM::VisitNativeDebugInfo(HNativeDebugInfo* info) { } void InstructionCodeGeneratorARM::VisitNativeDebugInfo(HNativeDebugInfo* info) { + if (codegen_->HasStackMapAtCurrentPc()) { + // Ensure that we do not collide with the stack map of the previous instruction. + __ nop(); + } codegen_->RecordPcInfo(info, info->GetDexPc()); } @@ -1675,8 +1723,8 @@ void InstructionCodeGeneratorARM::HandleCondition(HCondition* cond) { __ cmp(left.AsRegister<Register>(), ShifterOperand(right.AsRegister<Register>())); } else { DCHECK(right.IsConstant()); - GenerateCompareWithImmediate(left.AsRegister<Register>(), - CodeGenerator::GetInt32ValueOf(right.GetConstant())); + __ CmpConstant(left.AsRegister<Register>(), + CodeGenerator::GetInt32ValueOf(right.GetConstant())); } __ it(ARMCondition(cond->GetCondition()), kItElse); __ mov(locations->Out().AsRegister<Register>(), ShifterOperand(1), @@ -1891,7 +1939,7 @@ void LocationsBuilderARM::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) { } void InstructionCodeGeneratorARM::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) { - GenerateMemoryBarrier(memory_barrier->GetBarrierKind()); + codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind()); } void LocationsBuilderARM::VisitReturnVoid(HReturnVoid* ret) { @@ -2846,8 +2894,7 @@ void InstructionCodeGeneratorARM::DivRemByPowerOfTwo(HBinaryOperation* instructi Register dividend = locations->InAt(0).AsRegister<Register>(); Register temp = locations->GetTemp(0).AsRegister<Register>(); int32_t imm = second.GetConstant()->AsIntConstant()->GetValue(); - uint32_t abs_imm = static_cast<uint32_t>(std::abs(imm)); - DCHECK(IsPowerOfTwo(abs_imm)); + uint32_t abs_imm = static_cast<uint32_t>(AbsOrMin(imm)); int ctz_imm = CTZ(abs_imm); if (ctz_imm == 1) { @@ -2923,7 +2970,7 @@ void InstructionCodeGeneratorARM::GenerateDivRemConstantIntegral(HBinaryOperatio // Do not generate anything. DivZeroCheck would prevent any code to be executed. } else if (imm == 1 || imm == -1) { DivRemOneOrMinusOne(instruction); - } else if (IsPowerOfTwo(std::abs(imm))) { + } else if (IsPowerOfTwo(AbsOrMin(imm))) { DivRemByPowerOfTwo(instruction); } else { DCHECK(imm <= -2 || imm >= 2); @@ -2952,12 +2999,12 @@ void LocationsBuilderARM::VisitDiv(HDiv* div) { locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::ConstantLocation(div->InputAt(1)->AsConstant())); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); - int32_t abs_imm = std::abs(div->InputAt(1)->AsIntConstant()->GetValue()); - if (abs_imm <= 1) { + int32_t value = div->InputAt(1)->AsIntConstant()->GetValue(); + if (value == 1 || value == 0 || value == -1) { // No temp register required. } else { locations->AddTemp(Location::RequiresRegister()); - if (!IsPowerOfTwo(abs_imm)) { + if (!IsPowerOfTwo(AbsOrMin(value))) { locations->AddTemp(Location::RequiresRegister()); } } @@ -3078,12 +3125,12 @@ void LocationsBuilderARM::VisitRem(HRem* rem) { locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::ConstantLocation(rem->InputAt(1)->AsConstant())); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); - int32_t abs_imm = std::abs(rem->InputAt(1)->AsIntConstant()->GetValue()); - if (abs_imm <= 1) { + int32_t value = rem->InputAt(1)->AsIntConstant()->GetValue(); + if (value == 1 || value == 0 || value == -1) { // No temp register required. } else { locations->AddTemp(Location::RequiresRegister()); - if (!IsPowerOfTwo(abs_imm)) { + if (!IsPowerOfTwo(AbsOrMin(value))) { locations->AddTemp(Location::RequiresRegister()); } } @@ -3437,7 +3484,7 @@ void InstructionCodeGeneratorARM::HandleShift(HBinaryOperation* op) { Register first_reg = first.AsRegister<Register>(); if (second.IsRegister()) { Register second_reg = second.AsRegister<Register>(); - // Arm doesn't mask the shift count so we need to do it ourselves. + // ARM doesn't mask the shift count so we need to do it ourselves. __ and_(out_reg, second_reg, ShifterOperand(kMaxIntShiftValue)); if (op->IsShl()) { __ Lsl(out_reg, first_reg, out_reg); @@ -3449,7 +3496,7 @@ void InstructionCodeGeneratorARM::HandleShift(HBinaryOperation* op) { } else { int32_t cst = second.GetConstant()->AsIntConstant()->GetValue(); uint32_t shift_value = static_cast<uint32_t>(cst & kMaxIntShiftValue); - if (shift_value == 0) { // arm does not support shifting with 0 immediate. + if (shift_value == 0) { // ARM does not support shifting with 0 immediate. __ Mov(out_reg, first_reg); } else if (op->IsShl()) { __ Lsl(out_reg, first_reg, shift_value); @@ -3796,9 +3843,9 @@ void InstructionCodeGeneratorARM::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) { LOG(FATAL) << "Unreachable"; } -void InstructionCodeGeneratorARM::GenerateMemoryBarrier(MemBarrierKind kind) { - // TODO (ported from quick): revisit Arm barrier kinds - DmbOptions flavor = DmbOptions::ISH; // quiet c++ warnings +void CodeGeneratorARM::GenerateMemoryBarrier(MemBarrierKind kind) { + // TODO (ported from quick): revisit ARM barrier kinds. + DmbOptions flavor = DmbOptions::ISH; // Quiet C++ warnings. switch (kind) { case MemBarrierKind::kAnyStore: case MemBarrierKind::kLoadAny: @@ -3879,11 +3926,11 @@ void LocationsBuilderARM::HandleFieldSet(HInstruction* instruction, const FieldI locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too. locations->AddTemp(Location::RequiresRegister()); } else if (generate_volatile) { - // Arm encoding have some additional constraints for ldrexd/strexd: + // ARM encoding have some additional constraints for ldrexd/strexd: // - registers need to be consecutive // - the first register should be even but not R14. - // We don't test for Arm yet, and the assertion makes sure that we revisit this if we ever - // enable Arm encoding. + // We don't test for ARM yet, and the assertion makes sure that we + // revisit this if we ever enable ARM encoding. DCHECK_EQ(InstructionSet::kThumb2, codegen_->GetInstructionSet()); locations->AddTemp(Location::RequiresRegister()); @@ -3913,7 +3960,7 @@ void InstructionCodeGeneratorARM::HandleFieldSet(HInstruction* instruction, CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1)); if (is_volatile) { - GenerateMemoryBarrier(MemBarrierKind::kAnyStore); + codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore); } switch (field_type) { @@ -4005,7 +4052,7 @@ void InstructionCodeGeneratorARM::HandleFieldSet(HInstruction* instruction, } if (is_volatile) { - GenerateMemoryBarrier(MemBarrierKind::kAnyAny); + codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny); } } @@ -4039,14 +4086,18 @@ void LocationsBuilderARM::HandleFieldGet(HInstruction* instruction, const FieldI (overlap ? Location::kOutputOverlap : Location::kNoOutputOverlap)); } if (volatile_for_double) { - // Arm encoding have some additional constraints for ldrexd/strexd: + // ARM encoding have some additional constraints for ldrexd/strexd: // - registers need to be consecutive // - the first register should be even but not R14. - // We don't test for Arm yet, and the assertion makes sure that we revisit this if we ever - // enable Arm encoding. + // We don't test for ARM yet, and the assertion makes sure that we + // revisit this if we ever enable ARM encoding. DCHECK_EQ(InstructionSet::kThumb2, codegen_->GetInstructionSet()); locations->AddTemp(Location::RequiresRegister()); locations->AddTemp(Location::RequiresRegister()); + } else if (object_field_get_with_read_barrier && kUseBakerReadBarrier) { + // We need a temporary register for the read barrier marking slow + // path in CodeGeneratorARM::GenerateFieldLoadWithBakerReadBarrier. + locations->AddTemp(Location::RequiresRegister()); } } @@ -4105,33 +4156,52 @@ void InstructionCodeGeneratorARM::HandleFieldGet(HInstruction* instruction, uint32_t offset = field_info.GetFieldOffset().Uint32Value(); switch (field_type) { - case Primitive::kPrimBoolean: { + case Primitive::kPrimBoolean: __ LoadFromOffset(kLoadUnsignedByte, out.AsRegister<Register>(), base, offset); break; - } - case Primitive::kPrimByte: { + case Primitive::kPrimByte: __ LoadFromOffset(kLoadSignedByte, out.AsRegister<Register>(), base, offset); break; - } - case Primitive::kPrimShort: { + case Primitive::kPrimShort: __ LoadFromOffset(kLoadSignedHalfword, out.AsRegister<Register>(), base, offset); break; - } - case Primitive::kPrimChar: { + case Primitive::kPrimChar: __ LoadFromOffset(kLoadUnsignedHalfword, out.AsRegister<Register>(), base, offset); break; - } case Primitive::kPrimInt: - case Primitive::kPrimNot: { __ LoadFromOffset(kLoadWord, out.AsRegister<Register>(), base, offset); break; + + case Primitive::kPrimNot: { + // /* HeapReference<Object> */ out = *(base + offset) + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + Location temp_loc = locations->GetTemp(0); + // Note that a potential implicit null check is handled in this + // CodeGeneratorARM::GenerateFieldLoadWithBakerReadBarrier call. + codegen_->GenerateFieldLoadWithBakerReadBarrier( + instruction, out, base, offset, temp_loc, /* needs_null_check */ true); + if (is_volatile) { + codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); + } + } else { + __ LoadFromOffset(kLoadWord, out.AsRegister<Register>(), base, offset); + codegen_->MaybeRecordImplicitNullCheck(instruction); + if (is_volatile) { + codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); + } + // If read barriers are enabled, emit read barriers other than + // Baker's using a slow path (and also unpoison the loaded + // reference, if heap poisoning is enabled). + codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, base_loc, offset); + } + break; } - case Primitive::kPrimLong: { + case Primitive::kPrimLong: if (is_volatile && !atomic_ldrd_strd) { GenerateWideAtomicLoad(base, offset, out.AsRegisterPairLow<Register>(), @@ -4140,12 +4210,10 @@ void InstructionCodeGeneratorARM::HandleFieldGet(HInstruction* instruction, __ LoadFromOffset(kLoadWordPair, out.AsRegisterPairLow<Register>(), base, offset); } break; - } - case Primitive::kPrimFloat: { + case Primitive::kPrimFloat: __ LoadSFromOffset(out.AsFpuRegister<SRegister>(), base, offset); break; - } case Primitive::kPrimDouble: { DRegister out_reg = FromLowSToD(out.AsFpuRegisterPairLow<SRegister>()); @@ -4167,17 +4235,20 @@ void InstructionCodeGeneratorARM::HandleFieldGet(HInstruction* instruction, UNREACHABLE(); } - // Doubles are handled in the switch. - if (field_type != Primitive::kPrimDouble) { + if (field_type == Primitive::kPrimNot || field_type == Primitive::kPrimDouble) { + // Potential implicit null checks, in the case of reference or + // double fields, are handled in the previous switch statement. + } else { codegen_->MaybeRecordImplicitNullCheck(instruction); } if (is_volatile) { - GenerateMemoryBarrier(MemBarrierKind::kLoadAny); - } - - if (field_type == Primitive::kPrimNot) { - codegen_->MaybeGenerateReadBarrier(instruction, out, out, base_loc, offset); + if (field_type == Primitive::kPrimNot) { + // Memory barriers, in the case of references, are also handled + // in the previous switch statement. + } else { + codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); + } } } @@ -4340,6 +4411,11 @@ void LocationsBuilderARM::VisitArrayGet(HArrayGet* instruction) { Location::RequiresRegister(), object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap); } + // We need a temporary register for the read barrier marking slow + // path in CodeGeneratorARM::GenerateArrayLoadWithBakerReadBarrier. + if (object_array_get_with_read_barrier && kUseBakerReadBarrier) { + locations->AddTemp(Location::RequiresRegister()); + } } void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) { @@ -4347,12 +4423,13 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) { Location obj_loc = locations->InAt(0); Register obj = obj_loc.AsRegister<Register>(); Location index = locations->InAt(1); - Primitive::Type type = instruction->GetType(); + Location out_loc = locations->Out(); + Primitive::Type type = instruction->GetType(); switch (type) { case Primitive::kPrimBoolean: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value(); - Register out = locations->Out().AsRegister<Register>(); + Register out = out_loc.AsRegister<Register>(); if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset; @@ -4366,7 +4443,7 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) { case Primitive::kPrimByte: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(int8_t)).Uint32Value(); - Register out = locations->Out().AsRegister<Register>(); + Register out = out_loc.AsRegister<Register>(); if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset; @@ -4380,7 +4457,7 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) { case Primitive::kPrimShort: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(int16_t)).Uint32Value(); - Register out = locations->Out().AsRegister<Register>(); + Register out = out_loc.AsRegister<Register>(); if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset; @@ -4394,7 +4471,7 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) { case Primitive::kPrimChar: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value(); - Register out = locations->Out().AsRegister<Register>(); + Register out = out_loc.AsRegister<Register>(); if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset; @@ -4406,13 +4483,9 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) { break; } - case Primitive::kPrimInt: - case Primitive::kPrimNot: { - static_assert( - sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), - "art::mirror::HeapReference<mirror::Object> and int32_t have different sizes."); + case Primitive::kPrimInt: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); - Register out = locations->Out().AsRegister<Register>(); + Register out = out_loc.AsRegister<Register>(); if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; @@ -4424,44 +4497,79 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) { break; } + case Primitive::kPrimNot: { + static_assert( + sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); + uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); + // /* HeapReference<Object> */ out = + // *(obj + data_offset + index * sizeof(HeapReference<Object>)) + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + Location temp = locations->GetTemp(0); + // Note that a potential implicit null check is handled in this + // CodeGeneratorARM::GenerateArrayLoadWithBakerReadBarrier call. + codegen_->GenerateArrayLoadWithBakerReadBarrier( + instruction, out_loc, obj, data_offset, index, temp, /* needs_null_check */ true); + } else { + Register out = out_loc.AsRegister<Register>(); + if (index.IsConstant()) { + size_t offset = + (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; + __ LoadFromOffset(kLoadWord, out, obj, offset); + codegen_->MaybeRecordImplicitNullCheck(instruction); + // If read barriers are enabled, emit read barriers other than + // Baker's using a slow path (and also unpoison the loaded + // reference, if heap poisoning is enabled). + codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset); + } else { + __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4)); + __ LoadFromOffset(kLoadWord, out, IP, data_offset); + codegen_->MaybeRecordImplicitNullCheck(instruction); + // If read barriers are enabled, emit read barriers other than + // Baker's using a slow path (and also unpoison the loaded + // reference, if heap poisoning is enabled). + codegen_->MaybeGenerateReadBarrierSlow( + instruction, out_loc, out_loc, obj_loc, data_offset, index); + } + } + break; + } + case Primitive::kPrimLong: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value(); - Location out = locations->Out(); if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset; - __ LoadFromOffset(kLoadWordPair, out.AsRegisterPairLow<Register>(), obj, offset); + __ LoadFromOffset(kLoadWordPair, out_loc.AsRegisterPairLow<Register>(), obj, offset); } else { __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_8)); - __ LoadFromOffset(kLoadWordPair, out.AsRegisterPairLow<Register>(), IP, data_offset); + __ LoadFromOffset(kLoadWordPair, out_loc.AsRegisterPairLow<Register>(), IP, data_offset); } break; } case Primitive::kPrimFloat: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value(); - Location out = locations->Out(); - DCHECK(out.IsFpuRegister()); + SRegister out = out_loc.AsFpuRegister<SRegister>(); if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; - __ LoadSFromOffset(out.AsFpuRegister<SRegister>(), obj, offset); + __ LoadSFromOffset(out, obj, offset); } else { __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4)); - __ LoadSFromOffset(out.AsFpuRegister<SRegister>(), IP, data_offset); + __ LoadSFromOffset(out, IP, data_offset); } break; } case Primitive::kPrimDouble: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value(); - Location out = locations->Out(); - DCHECK(out.IsFpuRegisterPair()); + SRegister out = out_loc.AsFpuRegisterPairLow<SRegister>(); if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset; - __ LoadDFromOffset(FromLowSToD(out.AsFpuRegisterPairLow<SRegister>()), obj, offset); + __ LoadDFromOffset(FromLowSToD(out), obj, offset); } else { __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_8)); - __ LoadDFromOffset(FromLowSToD(out.AsFpuRegisterPairLow<SRegister>()), IP, data_offset); + __ LoadDFromOffset(FromLowSToD(out), IP, data_offset); } break; } @@ -4470,20 +4578,12 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) { LOG(FATAL) << "Unreachable type " << type; UNREACHABLE(); } - codegen_->MaybeRecordImplicitNullCheck(instruction); if (type == Primitive::kPrimNot) { - static_assert( - sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), - "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); - uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); - Location out = locations->Out(); - if (index.IsConstant()) { - uint32_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; - codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, offset); - } else { - codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, data_offset, index); - } + // Potential implicit null checks, in the case of reference + // arrays, are handled in the previous switch statement. + } else { + codegen_->MaybeRecordImplicitNullCheck(instruction); } } @@ -4574,6 +4674,7 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) { __ add(IP, array, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4)); __ StoreToOffset(kStoreWord, source, IP, data_offset); } + codegen_->MaybeRecordImplicitNullCheck(instruction); DCHECK(!needs_write_barrier); DCHECK(!may_need_runtime_call_for_type_check); break; @@ -4615,12 +4716,12 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) { // __ Mov(temp2, temp1); // // /* HeapReference<Class> */ temp1 = temp1->component_type_ // __ LoadFromOffset(kLoadWord, temp1, temp1, component_offset); - // codegen_->GenerateReadBarrier( + // codegen_->GenerateReadBarrierSlow( // instruction, temp1_loc, temp1_loc, temp2_loc, component_offset); // // // /* HeapReference<Class> */ temp2 = value->klass_ // __ LoadFromOffset(kLoadWord, temp2, value, class_offset); - // codegen_->GenerateReadBarrier( + // codegen_->GenerateReadBarrierSlow( // instruction, temp2_loc, temp2_loc, value_loc, class_offset, temp1_loc); // // __ cmp(temp1, ShifterOperand(temp2)); @@ -4717,8 +4818,6 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) { __ add(IP, array, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4)); __ StoreToOffset(kStoreWord, value, IP, data_offset); } - - codegen_->MaybeRecordImplicitNullCheck(instruction); break; } @@ -4770,8 +4869,8 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) { UNREACHABLE(); } - // Ints and objects are handled in the switch. - if (value_type != Primitive::kPrimInt && value_type != Primitive::kPrimNot) { + // Objects are handled in the switch. + if (value_type != Primitive::kPrimNot) { codegen_->MaybeRecordImplicitNullCheck(instruction); } } @@ -5140,16 +5239,9 @@ void InstructionCodeGeneratorARM::VisitLoadClass(HLoadClass* cls) { if (cls->IsReferrersClass()) { DCHECK(!cls->CanCallRuntime()); DCHECK(!cls->MustGenerateClinitCheck()); - uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value(); - if (kEmitCompilerReadBarrier) { - // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_) - __ AddConstant(out, current_method, declaring_class_offset); - // /* mirror::Class* */ out = out->Read() - codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc); - } else { - // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ - __ LoadFromOffset(kLoadWord, out, current_method, declaring_class_offset); - } + // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ + GenerateGcRootFieldLoad( + cls, out_loc, current_method, ArtMethod::DeclaringClassOffset().Int32Value()); } else { // /* GcRoot<mirror::Class>[] */ out = // current_method.ptr_sized_fields_->dex_cache_resolved_types_ @@ -5157,17 +5249,8 @@ void InstructionCodeGeneratorARM::VisitLoadClass(HLoadClass* cls) { out, current_method, ArtMethod::DexCacheResolvedTypesOffset(kArmPointerSize).Int32Value()); - - size_t cache_offset = CodeGenerator::GetCacheOffset(cls->GetTypeIndex()); - if (kEmitCompilerReadBarrier) { - // /* GcRoot<mirror::Class>* */ out = &out[type_index] - __ AddConstant(out, out, cache_offset); - // /* mirror::Class* */ out = out->Read() - codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc); - } else { - // /* GcRoot<mirror::Class> */ out = out[type_index] - __ LoadFromOffset(kLoadWord, out, out, cache_offset); - } + // /* GcRoot<mirror::Class> */ out = out[type_index] + GenerateGcRootFieldLoad(cls, out_loc, out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex())); if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) { DCHECK(cls->CanCallRuntime()); @@ -5230,30 +5313,14 @@ void InstructionCodeGeneratorARM::VisitLoadString(HLoadString* load) { Register out = out_loc.AsRegister<Register>(); Register current_method = locations->InAt(0).AsRegister<Register>(); - uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value(); - if (kEmitCompilerReadBarrier) { - // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_) - __ AddConstant(out, current_method, declaring_class_offset); - // /* mirror::Class* */ out = out->Read() - codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc); - } else { - // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ - __ LoadFromOffset(kLoadWord, out, current_method, declaring_class_offset); - } - + // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ + GenerateGcRootFieldLoad( + load, out_loc, current_method, ArtMethod::DeclaringClassOffset().Int32Value()); // /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_ __ LoadFromOffset(kLoadWord, out, out, mirror::Class::DexCacheStringsOffset().Int32Value()); - - size_t cache_offset = CodeGenerator::GetCacheOffset(load->GetStringIndex()); - if (kEmitCompilerReadBarrier) { - // /* GcRoot<mirror::String>* */ out = &out[string_index] - __ AddConstant(out, out, cache_offset); - // /* mirror::String* */ out = out->Read() - codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc); - } else { - // /* GcRoot<mirror::String> */ out = out[string_index] - __ LoadFromOffset(kLoadWord, out, out, cache_offset); - } + // /* GcRoot<mirror::String> */ out = out[string_index] + GenerateGcRootFieldLoad( + load, out_loc, out, CodeGenerator::GetCacheOffset(load->GetStringIndex())); if (!load->IsInDexCache()) { SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathARM(load); @@ -5300,6 +5367,14 @@ void InstructionCodeGeneratorARM::VisitThrow(HThrow* instruction) { CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>(); } +static bool TypeCheckNeedsATemporary(TypeCheckKind type_check_kind) { + return kEmitCompilerReadBarrier && + (kUseBakerReadBarrier || + type_check_kind == TypeCheckKind::kAbstractClassCheck || + type_check_kind == TypeCheckKind::kClassHierarchyCheck || + type_check_kind == TypeCheckKind::kArrayObjectCheck); +} + void LocationsBuilderARM::VisitInstanceOf(HInstanceOf* instruction) { LocationSummary::CallKind call_kind = LocationSummary::kNoCall; TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); @@ -5326,21 +5401,22 @@ void LocationsBuilderARM::VisitInstanceOf(HInstanceOf* instruction) { locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); // When read barriers are enabled, we need a temporary register for // some cases. - if (kEmitCompilerReadBarrier && - (type_check_kind == TypeCheckKind::kAbstractClassCheck || - type_check_kind == TypeCheckKind::kClassHierarchyCheck || - type_check_kind == TypeCheckKind::kArrayObjectCheck)) { + if (TypeCheckNeedsATemporary(type_check_kind)) { locations->AddTemp(Location::RequiresRegister()); } } void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) { + TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); LocationSummary* locations = instruction->GetLocations(); Location obj_loc = locations->InAt(0); Register obj = obj_loc.AsRegister<Register>(); Register cls = locations->InAt(1).AsRegister<Register>(); Location out_loc = locations->Out(); Register out = out_loc.AsRegister<Register>(); + Location temp_loc = TypeCheckNeedsATemporary(type_check_kind) ? + locations->GetTemp(0) : + Location::NoLocation(); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); @@ -5355,10 +5431,9 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) { } // /* HeapReference<Class> */ out = obj->klass_ - __ LoadFromOffset(kLoadWord, out, obj, class_offset); - codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, obj_loc, class_offset); + GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, temp_loc); - switch (instruction->GetTypeCheckKind()) { + switch (type_check_kind) { case TypeCheckKind::kExactCheck: { __ cmp(out, ShifterOperand(cls)); // Classes must be equal for the instanceof to succeed. @@ -5373,17 +5448,8 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) { // object to avoid doing a comparison we know will fail. Label loop; __ Bind(&loop); - Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation(); - if (kEmitCompilerReadBarrier) { - // Save the value of `out` into `temp` before overwriting it - // in the following move operation, as we will need it for the - // read barrier below. - Register temp = temp_loc.AsRegister<Register>(); - __ Mov(temp, out); - } // /* HeapReference<Class> */ out = out->super_class_ - __ LoadFromOffset(kLoadWord, out, out, super_offset); - codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset); + GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, temp_loc); // If `out` is null, we use it for the result, and jump to `done`. __ CompareAndBranchIfZero(out, &done); __ cmp(out, ShifterOperand(cls)); @@ -5401,17 +5467,8 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) { __ Bind(&loop); __ cmp(out, ShifterOperand(cls)); __ b(&success, EQ); - Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation(); - if (kEmitCompilerReadBarrier) { - // Save the value of `out` into `temp` before overwriting it - // in the following move operation, as we will need it for the - // read barrier below. - Register temp = temp_loc.AsRegister<Register>(); - __ Mov(temp, out); - } // /* HeapReference<Class> */ out = out->super_class_ - __ LoadFromOffset(kLoadWord, out, out, super_offset); - codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset); + GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, temp_loc); __ CompareAndBranchIfNonZero(out, &loop); // If `out` is null, we use it for the result, and jump to `done`. __ b(&done); @@ -5429,17 +5486,8 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) { __ cmp(out, ShifterOperand(cls)); __ b(&exact_check, EQ); // Otherwise, we need to check that the object's class is a non-primitive array. - Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation(); - if (kEmitCompilerReadBarrier) { - // Save the value of `out` into `temp` before overwriting it - // in the following move operation, as we will need it for the - // read barrier below. - Register temp = temp_loc.AsRegister<Register>(); - __ Mov(temp, out); - } // /* HeapReference<Class> */ out = out->component_type_ - __ LoadFromOffset(kLoadWord, out, out, component_offset); - codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, component_offset); + GenerateReferenceLoadOneRegister(instruction, out_loc, component_offset, temp_loc); // If `out` is null, we use it for the result, and jump to `done`. __ CompareAndBranchIfZero(out, &done); __ LoadFromOffset(kLoadUnsignedHalfword, out, out, primitive_offset); @@ -5478,6 +5526,13 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) { // HInstanceOf instruction (following the runtime calling // convention), which might be cluttered by the potential first // read barrier emission at the beginning of this method. + // + // TODO: Introduce a new runtime entry point taking the object + // to test (instead of its class) as argument, and let it deal + // with the read barrier issues. This will let us refactor this + // case of the `switch` code as it was previously (with a direct + // call to the runtime not using a type checking slow path). + // This should also be beneficial for the other cases above. DCHECK(locations->OnlyCallsOnSlowPath()); slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM(instruction, /* is_fatal */ false); @@ -5532,27 +5587,27 @@ void LocationsBuilderARM::VisitCheckCast(HCheckCast* instruction) { locations->AddTemp(Location::RequiresRegister()); // When read barriers are enabled, we need an additional temporary // register for some cases. - if (kEmitCompilerReadBarrier && - (type_check_kind == TypeCheckKind::kAbstractClassCheck || - type_check_kind == TypeCheckKind::kClassHierarchyCheck || - type_check_kind == TypeCheckKind::kArrayObjectCheck)) { + if (TypeCheckNeedsATemporary(type_check_kind)) { locations->AddTemp(Location::RequiresRegister()); } } void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) { + TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); LocationSummary* locations = instruction->GetLocations(); Location obj_loc = locations->InAt(0); Register obj = obj_loc.AsRegister<Register>(); Register cls = locations->InAt(1).AsRegister<Register>(); Location temp_loc = locations->GetTemp(0); Register temp = temp_loc.AsRegister<Register>(); + Location temp2_loc = TypeCheckNeedsATemporary(type_check_kind) ? + locations->GetTemp(1) : + Location::NoLocation(); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); - TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); bool is_type_check_slow_path_fatal = (type_check_kind == TypeCheckKind::kExactCheck || type_check_kind == TypeCheckKind::kAbstractClassCheck || @@ -5571,8 +5626,7 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) { } // /* HeapReference<Class> */ temp = obj->klass_ - __ LoadFromOffset(kLoadWord, temp, obj, class_offset); - codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc); switch (type_check_kind) { case TypeCheckKind::kExactCheck: @@ -5589,18 +5643,8 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) { // object to avoid doing a comparison we know will fail. Label loop, compare_classes; __ Bind(&loop); - Location temp2_loc = - kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation(); - if (kEmitCompilerReadBarrier) { - // Save the value of `temp` into `temp2` before overwriting it - // in the following move operation, as we will need it for the - // read barrier below. - Register temp2 = temp2_loc.AsRegister<Register>(); - __ Mov(temp2, temp); - } // /* HeapReference<Class> */ temp = temp->super_class_ - __ LoadFromOffset(kLoadWord, temp, temp, super_offset); - codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset); + GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, temp2_loc); // If the class reference currently in `temp` is not null, jump // to the `compare_classes` label to compare it with the checked @@ -5612,8 +5656,7 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) { // going into the slow path, as it has been overwritten in the // meantime. // /* HeapReference<Class> */ temp = obj->klass_ - __ LoadFromOffset(kLoadWord, temp, obj, class_offset); - codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc); __ b(type_check_slow_path->GetEntryLabel()); __ Bind(&compare_classes); @@ -5629,18 +5672,8 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) { __ cmp(temp, ShifterOperand(cls)); __ b(&done, EQ); - Location temp2_loc = - kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation(); - if (kEmitCompilerReadBarrier) { - // Save the value of `temp` into `temp2` before overwriting it - // in the following move operation, as we will need it for the - // read barrier below. - Register temp2 = temp2_loc.AsRegister<Register>(); - __ Mov(temp2, temp); - } // /* HeapReference<Class> */ temp = temp->super_class_ - __ LoadFromOffset(kLoadWord, temp, temp, super_offset); - codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset); + GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, temp2_loc); // If the class reference currently in `temp` is not null, jump // back at the beginning of the loop. @@ -5651,8 +5684,7 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) { // going into the slow path, as it has been overwritten in the // meantime. // /* HeapReference<Class> */ temp = obj->klass_ - __ LoadFromOffset(kLoadWord, temp, obj, class_offset); - codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc); __ b(type_check_slow_path->GetEntryLabel()); break; } @@ -5664,19 +5696,8 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) { __ b(&done, EQ); // Otherwise, we need to check that the object's class is a non-primitive array. - Location temp2_loc = - kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation(); - if (kEmitCompilerReadBarrier) { - // Save the value of `temp` into `temp2` before overwriting it - // in the following move operation, as we will need it for the - // read barrier below. - Register temp2 = temp2_loc.AsRegister<Register>(); - __ Mov(temp2, temp); - } // /* HeapReference<Class> */ temp = temp->component_type_ - __ LoadFromOffset(kLoadWord, temp, temp, component_offset); - codegen_->MaybeGenerateReadBarrier( - instruction, temp_loc, temp_loc, temp2_loc, component_offset); + GenerateReferenceLoadOneRegister(instruction, temp_loc, component_offset, temp2_loc); // If the component type is not null (i.e. the object is indeed // an array), jump to label `check_non_primitive_component_type` @@ -5689,8 +5710,7 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) { // going into the slow path, as it has been overwritten in the // meantime. // /* HeapReference<Class> */ temp = obj->klass_ - __ LoadFromOffset(kLoadWord, temp, obj, class_offset); - codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc); __ b(type_check_slow_path->GetEntryLabel()); __ Bind(&check_non_primitive_component_type); @@ -5699,8 +5719,7 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) { __ CompareAndBranchIfZero(temp, &done); // Same comment as above regarding `temp` and the slow path. // /* HeapReference<Class> */ temp = obj->klass_ - __ LoadFromOffset(kLoadWord, temp, obj, class_offset); - codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc); __ b(type_check_slow_path->GetEntryLabel()); break; } @@ -5717,6 +5736,13 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) { // instruction (following the runtime calling convention), which // might be cluttered by the potential first read barrier // emission at the beginning of this method. + // + // TODO: Introduce a new runtime entry point taking the object + // to test (instead of its class) as argument, and let it deal + // with the read barrier issues. This will let us refactor this + // case of the `switch` code as it was previously (with a direct + // call to the runtime not using a type checking slow path). + // This should also be beneficial for the other cases above. __ b(type_check_slow_path->GetEntryLabel()); break; } @@ -5901,14 +5927,249 @@ void InstructionCodeGeneratorARM::HandleBitwiseOperation(HBinaryOperation* instr } } -void CodeGeneratorARM::GenerateReadBarrier(HInstruction* instruction, - Location out, - Location ref, - Location obj, - uint32_t offset, - Location index) { +void InstructionCodeGeneratorARM::GenerateReferenceLoadOneRegister(HInstruction* instruction, + Location out, + uint32_t offset, + Location temp) { + Register out_reg = out.AsRegister<Register>(); + if (kEmitCompilerReadBarrier) { + if (kUseBakerReadBarrier) { + // Load with fast path based Baker's read barrier. + // /* HeapReference<Object> */ out = *(out + offset) + codegen_->GenerateFieldLoadWithBakerReadBarrier( + instruction, out, out_reg, offset, temp, /* needs_null_check */ false); + } else { + // Load with slow path based read barrier. + // Save the value of `out` into `temp` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + __ Mov(temp.AsRegister<Register>(), out_reg); + // /* HeapReference<Object> */ out = *(out + offset) + __ LoadFromOffset(kLoadWord, out_reg, out_reg, offset); + codegen_->GenerateReadBarrierSlow(instruction, out, out, temp, offset); + } + } else { + // Plain load with no read barrier. + // /* HeapReference<Object> */ out = *(out + offset) + __ LoadFromOffset(kLoadWord, out_reg, out_reg, offset); + __ MaybeUnpoisonHeapReference(out_reg); + } +} + +void InstructionCodeGeneratorARM::GenerateReferenceLoadTwoRegisters(HInstruction* instruction, + Location out, + Location obj, + uint32_t offset, + Location temp) { + Register out_reg = out.AsRegister<Register>(); + Register obj_reg = obj.AsRegister<Register>(); + if (kEmitCompilerReadBarrier) { + if (kUseBakerReadBarrier) { + // Load with fast path based Baker's read barrier. + // /* HeapReference<Object> */ out = *(obj + offset) + codegen_->GenerateFieldLoadWithBakerReadBarrier( + instruction, out, obj_reg, offset, temp, /* needs_null_check */ false); + } else { + // Load with slow path based read barrier. + // /* HeapReference<Object> */ out = *(obj + offset) + __ LoadFromOffset(kLoadWord, out_reg, obj_reg, offset); + codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset); + } + } else { + // Plain load with no read barrier. + // /* HeapReference<Object> */ out = *(obj + offset) + __ LoadFromOffset(kLoadWord, out_reg, obj_reg, offset); + __ MaybeUnpoisonHeapReference(out_reg); + } +} + +void InstructionCodeGeneratorARM::GenerateGcRootFieldLoad(HInstruction* instruction, + Location root, + Register obj, + uint32_t offset) { + Register root_reg = root.AsRegister<Register>(); + if (kEmitCompilerReadBarrier) { + if (kUseBakerReadBarrier) { + // Fast path implementation of art::ReadBarrier::BarrierForRoot when + // Baker's read barrier are used: + // + // root = obj.field; + // if (Thread::Current()->GetIsGcMarking()) { + // root = ReadBarrier::Mark(root) + // } + + // /* GcRoot<mirror::Object> */ root = *(obj + offset) + __ LoadFromOffset(kLoadWord, root_reg, obj, offset); + static_assert( + sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>), + "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> " + "have different sizes."); + static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::CompressedReference<mirror::Object> and int32_t " + "have different sizes."); + + // Slow path used to mark the GC root `root`. + SlowPathCode* slow_path = + new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM(instruction, root, root); + codegen_->AddSlowPath(slow_path); + + __ LoadFromOffset( + kLoadWord, IP, TR, Thread::IsGcMarkingOffset<kArmWordSize>().Int32Value()); + __ CompareAndBranchIfNonZero(IP, slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); + } else { + // GC root loaded through a slow path for read barriers other + // than Baker's. + // /* GcRoot<mirror::Object>* */ root = obj + offset + __ AddConstant(root_reg, obj, offset); + // /* mirror::Object* */ root = root->Read() + codegen_->GenerateReadBarrierForRootSlow(instruction, root, root); + } + } else { + // Plain GC root load with no read barrier. + // /* GcRoot<mirror::Object> */ root = *(obj + offset) + __ LoadFromOffset(kLoadWord, root_reg, obj, offset); + // Note that GC roots are not affected by heap poisoning, thus we + // do not have to unpoison `root_reg` here. + } +} + +void CodeGeneratorARM::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, + Location ref, + Register obj, + uint32_t offset, + Location temp, + bool needs_null_check) { + DCHECK(kEmitCompilerReadBarrier); + DCHECK(kUseBakerReadBarrier); + + // /* HeapReference<Object> */ ref = *(obj + offset) + Location no_index = Location::NoLocation(); + GenerateReferenceLoadWithBakerReadBarrier( + instruction, ref, obj, offset, no_index, temp, needs_null_check); +} + +void CodeGeneratorARM::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction, + Location ref, + Register obj, + uint32_t data_offset, + Location index, + Location temp, + bool needs_null_check) { + DCHECK(kEmitCompilerReadBarrier); + DCHECK(kUseBakerReadBarrier); + + // /* HeapReference<Object> */ ref = + // *(obj + data_offset + index * sizeof(HeapReference<Object>)) + GenerateReferenceLoadWithBakerReadBarrier( + instruction, ref, obj, data_offset, index, temp, needs_null_check); +} + +void CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, + Location ref, + Register obj, + uint32_t offset, + Location index, + Location temp, + bool needs_null_check) { + DCHECK(kEmitCompilerReadBarrier); + DCHECK(kUseBakerReadBarrier); + + // In slow path based read barriers, the read barrier call is + // inserted after the original load. However, in fast path based + // Baker's read barriers, we need to perform the load of + // mirror::Object::monitor_ *before* the original reference load. + // This load-load ordering is required by the read barrier. + // The fast path/slow path (for Baker's algorithm) should look like: + // + // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); + // lfence; // Load fence or artificial data dependency to prevent load-load reordering + // HeapReference<Object> ref = *src; // Original reference load. + // bool is_gray = (rb_state == ReadBarrier::gray_ptr_); + // if (is_gray) { + // ref = ReadBarrier::Mark(ref); // Performed by runtime entrypoint slow path. + // } + // + // Note: the original implementation in ReadBarrier::Barrier is + // slightly more complex as: + // - it implements the load-load fence using a data dependency on + // the high-bits of rb_state, which are expected to be all zeroes; + // - it performs additional checks that we do not do here for + // performance reasons. + + Register ref_reg = ref.AsRegister<Register>(); + Register temp_reg = temp.AsRegister<Register>(); + uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value(); + + // /* int32_t */ monitor = obj->monitor_ + __ LoadFromOffset(kLoadWord, temp_reg, obj, monitor_offset); + if (needs_null_check) { + MaybeRecordImplicitNullCheck(instruction); + } + // /* LockWord */ lock_word = LockWord(monitor) + static_assert(sizeof(LockWord) == sizeof(int32_t), + "art::LockWord and int32_t have different sizes."); + // /* uint32_t */ rb_state = lock_word.ReadBarrierState() + __ Lsr(temp_reg, temp_reg, LockWord::kReadBarrierStateShift); + __ and_(temp_reg, temp_reg, ShifterOperand(LockWord::kReadBarrierStateMask)); + static_assert( + LockWord::kReadBarrierStateMask == ReadBarrier::rb_ptr_mask_, + "art::LockWord::kReadBarrierStateMask is not equal to art::ReadBarrier::rb_ptr_mask_."); + + // Introduce a dependency on the high bits of rb_state, which shall + // be all zeroes, to prevent load-load reordering, and without using + // a memory barrier (which would be more expensive). + // IP = rb_state & ~LockWord::kReadBarrierStateMask = 0 + __ bic(IP, temp_reg, ShifterOperand(LockWord::kReadBarrierStateMask)); + // obj is unchanged by this operation, but its value now depends on + // IP, which depends on temp_reg. + __ add(obj, obj, ShifterOperand(IP)); + + // The actual reference load. + if (index.IsValid()) { + static_assert( + sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); + // /* HeapReference<Object> */ ref = + // *(obj + offset + index * sizeof(HeapReference<Object>)) + if (index.IsConstant()) { + size_t computed_offset = + (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + offset; + __ LoadFromOffset(kLoadWord, ref_reg, obj, computed_offset); + } else { + __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4)); + __ LoadFromOffset(kLoadWord, ref_reg, IP, offset); + } + } else { + // /* HeapReference<Object> */ ref = *(obj + offset) + __ LoadFromOffset(kLoadWord, ref_reg, obj, offset); + } + + // Object* ref = ref_addr->AsMirrorPtr() + __ MaybeUnpoisonHeapReference(ref_reg); + + // Slow path used to mark the object `ref` when it is gray. + SlowPathCode* slow_path = + new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM(instruction, ref, ref); + AddSlowPath(slow_path); + + // if (rb_state == ReadBarrier::gray_ptr_) + // ref = ReadBarrier::Mark(ref); + __ cmp(temp_reg, ShifterOperand(ReadBarrier::gray_ptr_)); + __ b(slow_path->GetEntryLabel(), EQ); + __ Bind(slow_path->GetExitLabel()); +} + +void CodeGeneratorARM::GenerateReadBarrierSlow(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index) { DCHECK(kEmitCompilerReadBarrier); + // Insert a slow path based read barrier *after* the reference load. + // // If heap poisoning is enabled, the unpoisoning of the loaded // reference will be carried out by the runtime within the slow // path. @@ -5922,57 +6183,41 @@ void CodeGeneratorARM::GenerateReadBarrier(HInstruction* instruction, ReadBarrierForHeapReferenceSlowPathARM(instruction, out, ref, obj, offset, index); AddSlowPath(slow_path); - // TODO: When read barrier has a fast path, add it here. - /* Currently the read barrier call is inserted after the original load. - * However, if we have a fast path, we need to perform the load of obj.LockWord *before* the - * original load. This load-load ordering is required by the read barrier. - * The fast path/slow path (for Baker's algorithm) should look like: - * - * bool isGray = obj.LockWord & kReadBarrierMask; - * lfence; // load fence or artificial data dependence to prevent load-load reordering - * ref = obj.field; // this is the original load - * if (isGray) { - * ref = Mark(ref); // ideally the slow path just does Mark(ref) - * } - */ - __ b(slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); } -void CodeGeneratorARM::MaybeGenerateReadBarrier(HInstruction* instruction, - Location out, - Location ref, - Location obj, - uint32_t offset, - Location index) { +void CodeGeneratorARM::MaybeGenerateReadBarrierSlow(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index) { if (kEmitCompilerReadBarrier) { + // Baker's read barriers shall be handled by the fast path + // (CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier). + DCHECK(!kUseBakerReadBarrier); // If heap poisoning is enabled, unpoisoning will be taken care of // by the runtime within the slow path. - GenerateReadBarrier(instruction, out, ref, obj, offset, index); + GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index); } else if (kPoisonHeapReferences) { __ UnpoisonHeapReference(out.AsRegister<Register>()); } } -void CodeGeneratorARM::GenerateReadBarrierForRoot(HInstruction* instruction, - Location out, - Location root) { +void CodeGeneratorARM::GenerateReadBarrierForRootSlow(HInstruction* instruction, + Location out, + Location root) { DCHECK(kEmitCompilerReadBarrier); + // Insert a slow path based read barrier *after* the GC root load. + // // Note that GC roots are not affected by heap poisoning, so we do // not need to do anything special for this here. SlowPathCode* slow_path = new (GetGraph()->GetArena()) ReadBarrierForRootSlowPathARM(instruction, out, root); AddSlowPath(slow_path); - // TODO: Implement a fast path for ReadBarrierForRoot, performing - // the following operation (for Baker's algorithm): - // - // if (thread.tls32_.is_gc_marking) { - // root = Mark(root); - // } - __ b(slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); } @@ -6304,7 +6549,7 @@ void InstructionCodeGeneratorARM::VisitPackedSwitch(HPackedSwitch* switch_instr) } if (num_entries - last_index == 2) { // The last missing case_value. - GenerateCompareWithImmediate(temp_reg, 1); + __ CmpConstant(temp_reg, 1); __ b(codegen_->GetLabelOf(successors[last_index + 1]), EQ); } @@ -6364,7 +6609,7 @@ void InstructionCodeGeneratorARM::VisitArmDexCacheArraysBase(HArmDexCacheArraysB void CodeGeneratorARM::MoveFromReturnRegister(Location trg, Primitive::Type type) { if (!trg.IsValid()) { - DCHECK(type == Primitive::kPrimVoid); + DCHECK_EQ(type, Primitive::kPrimVoid); return; } diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h index b7c58e1248..26d6d63b31 100644 --- a/compiler/optimizing/code_generator_arm.h +++ b/compiler/optimizing/code_generator_arm.h @@ -188,7 +188,7 @@ class LocationsBuilderARM : public HGraphVisitor { DISALLOW_COPY_AND_ASSIGN(LocationsBuilderARM); }; -class InstructionCodeGeneratorARM : public HGraphVisitor { +class InstructionCodeGeneratorARM : public InstructionCodeGenerator { public: InstructionCodeGeneratorARM(HGraph* graph, CodeGeneratorARM* codegen); @@ -222,24 +222,57 @@ class InstructionCodeGeneratorARM : public HGraphVisitor { void HandleLongRotate(LocationSummary* locations); void HandleRotate(HRor* ror); void HandleShift(HBinaryOperation* operation); - void GenerateMemoryBarrier(MemBarrierKind kind); + void GenerateWideAtomicStore(Register addr, uint32_t offset, Register value_lo, Register value_hi, Register temp1, Register temp2, HInstruction* instruction); void GenerateWideAtomicLoad(Register addr, uint32_t offset, Register out_lo, Register out_hi); + void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info, bool value_can_be_null); void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); + + // Generate a heap reference load using one register `out`: + // + // out <- *(out + offset) + // + // while honoring heap poisoning and/or read barriers (if any). + // Register `temp` is used when generating a read barrier. + void GenerateReferenceLoadOneRegister(HInstruction* instruction, + Location out, + uint32_t offset, + Location temp); + // Generate a heap reference load using two different registers + // `out` and `obj`: + // + // out <- *(obj + offset) + // + // while honoring heap poisoning and/or read barriers (if any). + // Register `temp` is used when generating a Baker's read barrier. + void GenerateReferenceLoadTwoRegisters(HInstruction* instruction, + Location out, + Location obj, + uint32_t offset, + Location temp); + // Generate a GC root reference load: + // + // root <- *(obj + offset) + // + // while honoring read barriers (if any). + void GenerateGcRootFieldLoad(HInstruction* instruction, + Location root, + Register obj, + uint32_t offset); + void GenerateImplicitNullCheck(HNullCheck* instruction); void GenerateExplicitNullCheck(HNullCheck* instruction); void GenerateTestAndBranch(HInstruction* instruction, size_t condition_input_index, Label* true_target, Label* false_target); - void GenerateCompareWithImmediate(Register left, int32_t right); void GenerateCompareTestAndBranch(HCondition* condition, Label* true_target, Label* false_target); @@ -346,6 +379,8 @@ class CodeGeneratorARM : public CodeGenerator { // Emit a write barrier. void MarkGCCard(Register temp, Register card, Register object, Register value, bool can_be_null); + void GenerateMemoryBarrier(MemBarrierKind kind); + Label* GetLabelOf(HBasicBlock* block) const { return CommonGetLabelOf<Label>(block_labels_, block); } @@ -406,7 +441,26 @@ class CodeGeneratorARM : public CodeGenerator { return &it->second; } - // Generate a read barrier for a heap reference within `instruction`. + // Fast path implementation of ReadBarrier::Barrier for a heap + // reference field load when Baker's read barriers are used. + void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, + Location out, + Register obj, + uint32_t offset, + Location temp, + bool needs_null_check); + // Fast path implementation of ReadBarrier::Barrier for a heap + // reference array load when Baker's read barriers are used. + void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction, + Location out, + Register obj, + uint32_t data_offset, + Location index, + Location temp, + bool needs_null_check); + + // Generate a read barrier for a heap reference within `instruction` + // using a slow path. // // A read barrier for an object reference read from the heap is // implemented as a call to the artReadBarrierSlow runtime entry @@ -423,23 +477,25 @@ class CodeGeneratorARM : public CodeGenerator { // When `index` is provided (i.e. for array accesses), the offset // value passed to artReadBarrierSlow is adjusted to take `index` // into account. - void GenerateReadBarrier(HInstruction* instruction, - Location out, - Location ref, - Location obj, - uint32_t offset, - Location index = Location::NoLocation()); - - // If read barriers are enabled, generate a read barrier for a heap reference. - // If heap poisoning is enabled, also unpoison the reference in `out`. - void MaybeGenerateReadBarrier(HInstruction* instruction, - Location out, - Location ref, - Location obj, - uint32_t offset, - Location index = Location::NoLocation()); - - // Generate a read barrier for a GC root within `instruction`. + void GenerateReadBarrierSlow(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index = Location::NoLocation()); + + // If read barriers are enabled, generate a read barrier for a heap + // reference using a slow path. If heap poisoning is enabled, also + // unpoison the reference in `out`. + void MaybeGenerateReadBarrierSlow(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index = Location::NoLocation()); + + // Generate a read barrier for a GC root within `instruction` using + // a slow path. // // A read barrier for an object reference GC root is implemented as // a call to the artReadBarrierForRootSlow runtime entry point, @@ -449,9 +505,19 @@ class CodeGeneratorARM : public CodeGenerator { // // The `out` location contains the value returned by // artReadBarrierForRootSlow. - void GenerateReadBarrierForRoot(HInstruction* instruction, Location out, Location root); + void GenerateReadBarrierForRootSlow(HInstruction* instruction, Location out, Location root); private: + // Factored implementation of GenerateFieldLoadWithBakerReadBarrier + // and GenerateArrayLoadWithBakerReadBarrier. + void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, + Location ref, + Register obj, + uint32_t offset, + Location index, + Location temp, + bool needs_null_check); + Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, Register temp); using MethodToLiteralMap = ArenaSafeMap<MethodReference, Literal*, MethodReferenceComparator>; diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index b49f42b6c8..a3150d3d22 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -477,24 +477,24 @@ class TypeCheckSlowPathARM64 : public SlowPathCodeARM64 { class DeoptimizationSlowPathARM64 : public SlowPathCodeARM64 { public: - explicit DeoptimizationSlowPathARM64(HInstruction* instruction) + explicit DeoptimizationSlowPathARM64(HDeoptimize* instruction) : instruction_(instruction) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, instruction_->GetLocations()); - DCHECK(instruction_->IsDeoptimize()); - HDeoptimize* deoptimize = instruction_->AsDeoptimize(); - uint32_t dex_pc = deoptimize->GetDexPc(); - CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); - arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize), instruction_, dex_pc, this); + arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize), + instruction_, + instruction_->GetDexPc(), + this); CheckEntrypointTypes<kQuickDeoptimize, void, void>(); } const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathARM64"; } private: - HInstruction* const instruction_; + HDeoptimize* const instruction_; DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathARM64); }; @@ -1605,7 +1605,7 @@ void InstructionCodeGeneratorARM64::GenerateSuspendCheck(HSuspendCheck* instruct InstructionCodeGeneratorARM64::InstructionCodeGeneratorARM64(HGraph* graph, CodeGeneratorARM64* codegen) - : HGraphVisitor(graph), + : InstructionCodeGenerator(graph, codegen), assembler_(codegen->GetAssembler()), codegen_(codegen) {} @@ -2534,8 +2534,7 @@ void InstructionCodeGeneratorARM64::DivRemByPowerOfTwo(HBinaryOperation* instruc Register out = OutputRegister(instruction); Register dividend = InputRegisterAt(instruction, 0); int64_t imm = Int64FromConstant(second.GetConstant()); - uint64_t abs_imm = static_cast<uint64_t>(std::abs(imm)); - DCHECK(IsPowerOfTwo(abs_imm)); + uint64_t abs_imm = static_cast<uint64_t>(AbsOrMin(imm)); int ctz_imm = CTZ(abs_imm); UseScratchRegisterScope temps(GetVIXLAssembler()); @@ -2627,7 +2626,7 @@ void InstructionCodeGeneratorARM64::GenerateDivRemIntegral(HBinaryOperation* ins // Do not generate anything. DivZeroCheck would prevent any code to be executed. } else if (imm == 1 || imm == -1) { DivRemOneOrMinusOne(instruction); - } else if (IsPowerOfTwo(std::abs(imm))) { + } else if (IsPowerOfTwo(AbsOrMin(imm))) { DivRemByPowerOfTwo(instruction); } else { DCHECK(imm <= -2 || imm >= 2); @@ -2940,9 +2939,8 @@ void LocationsBuilderARM64::VisitDeoptimize(HDeoptimize* deoptimize) { } void InstructionCodeGeneratorARM64::VisitDeoptimize(HDeoptimize* deoptimize) { - SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) - DeoptimizationSlowPathARM64(deoptimize); - codegen_->AddSlowPath(slow_path); + SlowPathCodeARM64* slow_path = + deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathARM64>(deoptimize); GenerateTestAndBranch(deoptimize, /* condition_input_index */ 0, slow_path->GetEntryLabel(), @@ -2954,6 +2952,10 @@ void LocationsBuilderARM64::VisitNativeDebugInfo(HNativeDebugInfo* info) { } void InstructionCodeGeneratorARM64::VisitNativeDebugInfo(HNativeDebugInfo* info) { + if (codegen_->HasStackMapAtCurrentPc()) { + // Ensure that we do not collide with the stack map of the previous instruction. + __ Nop(); + } codegen_->RecordPcInfo(info, info->GetDexPc()); } diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index 0e90ac6345..f2ff89488e 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -186,7 +186,7 @@ class FieldAccessCallingConventionARM64 : public FieldAccessCallingConvention { DISALLOW_COPY_AND_ASSIGN(FieldAccessCallingConventionARM64); }; -class InstructionCodeGeneratorARM64 : public HGraphVisitor { +class InstructionCodeGeneratorARM64 : public InstructionCodeGenerator { public: InstructionCodeGeneratorARM64(HGraph* graph, CodeGeneratorARM64* codegen); diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc index 4648606da8..322912976e 100644 --- a/compiler/optimizing/code_generator_mips.cc +++ b/compiler/optimizing/code_generator_mips.cc @@ -444,19 +444,16 @@ class TypeCheckSlowPathMIPS : public SlowPathCodeMIPS { class DeoptimizationSlowPathMIPS : public SlowPathCodeMIPS { public: - explicit DeoptimizationSlowPathMIPS(HInstruction* instruction) + explicit DeoptimizationSlowPathMIPS(HDeoptimize* instruction) : instruction_(instruction) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, instruction_->GetLocations()); - DCHECK(instruction_->IsDeoptimize()); - HDeoptimize* deoptimize = instruction_->AsDeoptimize(); - uint32_t dex_pc = deoptimize->GetDexPc(); - CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen); mips_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize), instruction_, - dex_pc, + instruction_->GetDexPc(), this, IsDirectEntrypoint(kQuickDeoptimize)); CheckEntrypointTypes<kQuickDeoptimize, void, void>(); @@ -465,7 +462,7 @@ class DeoptimizationSlowPathMIPS : public SlowPathCodeMIPS { const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathMIPS"; } private: - HInstruction* const instruction_; + HDeoptimize* const instruction_; DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathMIPS); }; @@ -608,9 +605,9 @@ void ParallelMoveResolverMIPS::EmitSwap(size_t index) { // then swap the high 32 bits of the same FPR. mtc1 makes the high 32 bits of an FPR // unpredictable and the following mfch1 will fail. __ Mfc1(TMP, f1); - __ Mfhc1(AT, f1); + __ MoveFromFpuHigh(AT, f1); __ Mtc1(r2_l, f1); - __ Mthc1(r2_h, f1); + __ MoveToFpuHigh(r2_h, f1); __ Move(r2_l, TMP); __ Move(r2_h, AT); } else if (loc1.IsStackSlot() && loc2.IsStackSlot()) { @@ -862,7 +859,7 @@ void CodeGeneratorMIPS::Move64(Location destination, Location source) { Register dst_low = destination.AsRegisterPairLow<Register>(); FRegister src = source.AsFpuRegister<FRegister>(); __ Mfc1(dst_low, src); - __ Mfhc1(dst_high, src); + __ MoveFromFpuHigh(dst_high, src); } else { DCHECK(source.IsDoubleStackSlot()) << "Cannot move from " << source << " to " << destination; int32_t off = source.GetStackIndex(); @@ -875,7 +872,7 @@ void CodeGeneratorMIPS::Move64(Location destination, Location source) { Register src_high = source.AsRegisterPairHigh<Register>(); Register src_low = source.AsRegisterPairLow<Register>(); __ Mtc1(src_low, dst); - __ Mthc1(src_high, dst); + __ MoveToFpuHigh(src_high, dst); } else if (source.IsFpuRegister()) { __ MovD(destination.AsFpuRegister<FRegister>(), source.AsFpuRegister<FRegister>()); } else { @@ -1241,7 +1238,7 @@ void InstructionCodeGeneratorMIPS::GenerateSuspendCheck(HSuspendCheck* instructi InstructionCodeGeneratorMIPS::InstructionCodeGeneratorMIPS(HGraph* graph, CodeGeneratorMIPS* codegen) - : HGraphVisitor(graph), + : InstructionCodeGenerator(graph, codegen), assembler_(codegen->GetAssembler()), codegen_(codegen) {} @@ -1511,7 +1508,7 @@ void InstructionCodeGeneratorMIPS::HandleBinaryOp(HBinaryOperation* instruction) } void LocationsBuilderMIPS::HandleShift(HBinaryOperation* instr) { - DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr()); + DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr() || instr->IsRor()); LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr); Primitive::Type type = instr->GetResultType(); @@ -1534,7 +1531,7 @@ void LocationsBuilderMIPS::HandleShift(HBinaryOperation* instr) { static constexpr size_t kMipsBitsPerWord = kMipsWordSize * kBitsPerByte; void InstructionCodeGeneratorMIPS::HandleShift(HBinaryOperation* instr) { - DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr()); + DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr() || instr->IsRor()); LocationSummary* locations = instr->GetLocations(); Primitive::Type type = instr->GetType(); @@ -1542,30 +1539,58 @@ void InstructionCodeGeneratorMIPS::HandleShift(HBinaryOperation* instr) { bool use_imm = rhs_location.IsConstant(); Register rhs_reg = use_imm ? ZERO : rhs_location.AsRegister<Register>(); int64_t rhs_imm = use_imm ? CodeGenerator::GetInt64ValueOf(rhs_location.GetConstant()) : 0; - uint32_t shift_mask = (type == Primitive::kPrimInt) ? kMaxIntShiftValue : kMaxLongShiftValue; - uint32_t shift_value = rhs_imm & shift_mask; - // Is the INS (Insert Bit Field) instruction supported? - bool has_ins = codegen_->GetInstructionSetFeatures().IsMipsIsaRevGreaterThanEqual2(); + const uint32_t shift_mask = (type == Primitive::kPrimInt) + ? kMaxIntShiftValue + : kMaxLongShiftValue; + const uint32_t shift_value = rhs_imm & shift_mask; + // Are the INS (Insert Bit Field) and ROTR instructions supported? + bool has_ins_rotr = codegen_->GetInstructionSetFeatures().IsMipsIsaRevGreaterThanEqual2(); switch (type) { case Primitive::kPrimInt: { Register dst = locations->Out().AsRegister<Register>(); Register lhs = locations->InAt(0).AsRegister<Register>(); if (use_imm) { - if (instr->IsShl()) { + if (shift_value == 0) { + if (dst != lhs) { + __ Move(dst, lhs); + } + } else if (instr->IsShl()) { __ Sll(dst, lhs, shift_value); } else if (instr->IsShr()) { __ Sra(dst, lhs, shift_value); - } else { + } else if (instr->IsUShr()) { __ Srl(dst, lhs, shift_value); + } else { + if (has_ins_rotr) { + __ Rotr(dst, lhs, shift_value); + } else { + __ Sll(TMP, lhs, (kMipsBitsPerWord - shift_value) & shift_mask); + __ Srl(dst, lhs, shift_value); + __ Or(dst, dst, TMP); + } } } else { if (instr->IsShl()) { __ Sllv(dst, lhs, rhs_reg); } else if (instr->IsShr()) { __ Srav(dst, lhs, rhs_reg); - } else { + } else if (instr->IsUShr()) { __ Srlv(dst, lhs, rhs_reg); + } else { + if (has_ins_rotr) { + __ Rotrv(dst, lhs, rhs_reg); + } else { + __ Subu(TMP, ZERO, rhs_reg); + // 32-bit shift instructions use the 5 least significant bits of the shift count, so + // shifting by `-rhs_reg` is equivalent to shifting by `(32 - rhs_reg) & 31`. The case + // when `rhs_reg & 31 == 0` is OK even though we don't shift `lhs` left all the way out + // by 32, because the result in this case is computed as `(lhs >> 0) | (lhs << 0)`, + // IOW, the OR'd values are equal. + __ Sllv(TMP, lhs, TMP); + __ Srlv(dst, lhs, rhs_reg); + __ Or(dst, dst, TMP); + } } } break; @@ -1580,7 +1605,7 @@ void InstructionCodeGeneratorMIPS::HandleShift(HBinaryOperation* instr) { if (shift_value == 0) { codegen_->Move64(locations->Out(), locations->InAt(0)); } else if (shift_value < kMipsBitsPerWord) { - if (has_ins) { + if (has_ins_rotr) { if (instr->IsShl()) { __ Srl(dst_high, lhs_low, kMipsBitsPerWord - shift_value); __ Ins(dst_high, lhs_high, shift_value, kMipsBitsPerWord - shift_value); @@ -1589,10 +1614,15 @@ void InstructionCodeGeneratorMIPS::HandleShift(HBinaryOperation* instr) { __ Srl(dst_low, lhs_low, shift_value); __ Ins(dst_low, lhs_high, kMipsBitsPerWord - shift_value, shift_value); __ Sra(dst_high, lhs_high, shift_value); + } else if (instr->IsUShr()) { + __ Srl(dst_low, lhs_low, shift_value); + __ Ins(dst_low, lhs_high, kMipsBitsPerWord - shift_value, shift_value); + __ Srl(dst_high, lhs_high, shift_value); } else { __ Srl(dst_low, lhs_low, shift_value); __ Ins(dst_low, lhs_high, kMipsBitsPerWord - shift_value, shift_value); __ Srl(dst_high, lhs_high, shift_value); + __ Ins(dst_high, lhs_low, kMipsBitsPerWord - shift_value, shift_value); } } else { if (instr->IsShl()) { @@ -1605,24 +1635,51 @@ void InstructionCodeGeneratorMIPS::HandleShift(HBinaryOperation* instr) { __ Sll(TMP, lhs_high, kMipsBitsPerWord - shift_value); __ Srl(dst_low, lhs_low, shift_value); __ Or(dst_low, dst_low, TMP); - } else { + } else if (instr->IsUShr()) { __ Srl(dst_high, lhs_high, shift_value); __ Sll(TMP, lhs_high, kMipsBitsPerWord - shift_value); __ Srl(dst_low, lhs_low, shift_value); __ Or(dst_low, dst_low, TMP); + } else { + __ Srl(TMP, lhs_low, shift_value); + __ Sll(dst_low, lhs_high, kMipsBitsPerWord - shift_value); + __ Or(dst_low, dst_low, TMP); + __ Srl(TMP, lhs_high, shift_value); + __ Sll(dst_high, lhs_low, kMipsBitsPerWord - shift_value); + __ Or(dst_high, dst_high, TMP); } } } else { - shift_value -= kMipsBitsPerWord; + const uint32_t shift_value_high = shift_value - kMipsBitsPerWord; if (instr->IsShl()) { - __ Sll(dst_high, lhs_low, shift_value); + __ Sll(dst_high, lhs_low, shift_value_high); __ Move(dst_low, ZERO); } else if (instr->IsShr()) { - __ Sra(dst_low, lhs_high, shift_value); + __ Sra(dst_low, lhs_high, shift_value_high); __ Sra(dst_high, dst_low, kMipsBitsPerWord - 1); - } else { - __ Srl(dst_low, lhs_high, shift_value); + } else if (instr->IsUShr()) { + __ Srl(dst_low, lhs_high, shift_value_high); __ Move(dst_high, ZERO); + } else { + if (shift_value == kMipsBitsPerWord) { + // 64-bit rotation by 32 is just a swap. + __ Move(dst_low, lhs_high); + __ Move(dst_high, lhs_low); + } else { + if (has_ins_rotr) { + __ Srl(dst_low, lhs_high, shift_value_high); + __ Ins(dst_low, lhs_low, kMipsBitsPerWord - shift_value_high, shift_value_high); + __ Srl(dst_high, lhs_low, shift_value_high); + __ Ins(dst_high, lhs_high, kMipsBitsPerWord - shift_value_high, shift_value_high); + } else { + __ Sll(TMP, lhs_low, kMipsBitsPerWord - shift_value_high); + __ Srl(dst_low, lhs_high, shift_value_high); + __ Or(dst_low, dst_low, TMP); + __ Sll(TMP, lhs_high, kMipsBitsPerWord - shift_value_high); + __ Srl(dst_high, lhs_low, shift_value_high); + __ Or(dst_high, dst_high, TMP); + } + } } } } else { @@ -1649,7 +1706,7 @@ void InstructionCodeGeneratorMIPS::HandleShift(HBinaryOperation* instr) { __ Beqz(TMP, &done); __ Move(dst_low, dst_high); __ Sra(dst_high, dst_high, 31); - } else { + } else if (instr->IsUShr()) { __ Srlv(dst_high, lhs_high, rhs_reg); __ Nor(AT, ZERO, rhs_reg); __ Sll(TMP, lhs_high, 1); @@ -1660,6 +1717,21 @@ void InstructionCodeGeneratorMIPS::HandleShift(HBinaryOperation* instr) { __ Beqz(TMP, &done); __ Move(dst_low, dst_high); __ Move(dst_high, ZERO); + } else { + __ Nor(AT, ZERO, rhs_reg); + __ Srlv(TMP, lhs_low, rhs_reg); + __ Sll(dst_low, lhs_high, 1); + __ Sllv(dst_low, dst_low, AT); + __ Or(dst_low, dst_low, TMP); + __ Srlv(TMP, lhs_high, rhs_reg); + __ Sll(dst_high, lhs_low, 1); + __ Sllv(dst_high, dst_high, AT); + __ Or(dst_high, dst_high, TMP); + __ Andi(TMP, rhs_reg, kMipsBitsPerWord); + __ Beqz(TMP, &done); + __ Move(TMP, dst_high); + __ Move(dst_high, dst_low); + __ Move(dst_low, TMP); } __ Bind(&done); } @@ -2314,8 +2386,7 @@ void InstructionCodeGeneratorMIPS::DivRemByPowerOfTwo(HBinaryOperation* instruct Register out = locations->Out().AsRegister<Register>(); Register dividend = locations->InAt(0).AsRegister<Register>(); int32_t imm = second.GetConstant()->AsIntConstant()->GetValue(); - uint32_t abs_imm = static_cast<uint32_t>(std::abs(imm)); - DCHECK(IsPowerOfTwo(abs_imm)); + uint32_t abs_imm = static_cast<uint32_t>(AbsOrMin(imm)); int ctz_imm = CTZ(abs_imm); if (instruction->IsDiv()) { @@ -2418,7 +2489,7 @@ void InstructionCodeGeneratorMIPS::GenerateDivRemIntegral(HBinaryOperation* inst // Do not generate anything. DivZeroCheck would prevent any code to be executed. } else if (imm == 1 || imm == -1) { DivRemOneOrMinusOne(instruction); - } else if (IsPowerOfTwo(std::abs(imm))) { + } else if (IsPowerOfTwo(AbsOrMin(imm))) { DivRemByPowerOfTwo(instruction); } else { DCHECK(imm <= -2 || imm >= 2); @@ -3358,8 +3429,8 @@ void LocationsBuilderMIPS::VisitDeoptimize(HDeoptimize* deoptimize) { } void InstructionCodeGeneratorMIPS::VisitDeoptimize(HDeoptimize* deoptimize) { - SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) DeoptimizationSlowPathMIPS(deoptimize); - codegen_->AddSlowPath(slow_path); + SlowPathCodeMIPS* slow_path = + deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathMIPS>(deoptimize); GenerateTestAndBranch(deoptimize, /* condition_input_index */ 0, slow_path->GetEntryLabel(), @@ -3371,6 +3442,10 @@ void LocationsBuilderMIPS::VisitNativeDebugInfo(HNativeDebugInfo* info) { } void InstructionCodeGeneratorMIPS::VisitNativeDebugInfo(HNativeDebugInfo* info) { + if (codegen_->HasStackMapAtCurrentPc()) { + // Ensure that we do not collide with the stack map of the previous instruction. + __ Nop(); + } codegen_->RecordPcInfo(info, info->GetDexPc()); } @@ -3457,8 +3532,8 @@ void InstructionCodeGeneratorMIPS::HandleFieldGet(HInstruction* instruction, // Need to move to FP regs since FP results are returned in core registers. __ Mtc1(locations->GetTemp(1).AsRegister<Register>(), locations->Out().AsFpuRegister<FRegister>()); - __ Mthc1(locations->GetTemp(2).AsRegister<Register>(), - locations->Out().AsFpuRegister<FRegister>()); + __ MoveToFpuHigh(locations->GetTemp(2).AsRegister<Register>(), + locations->Out().AsFpuRegister<FRegister>()); } } else { if (!Primitive::IsFloatingPointType(type)) { @@ -3578,8 +3653,8 @@ void InstructionCodeGeneratorMIPS::HandleFieldSet(HInstruction* instruction, // Pass FP parameters in core registers. __ Mfc1(locations->GetTemp(1).AsRegister<Register>(), locations->InAt(1).AsFpuRegister<FRegister>()); - __ Mfhc1(locations->GetTemp(2).AsRegister<Register>(), - locations->InAt(1).AsFpuRegister<FRegister>()); + __ MoveFromFpuHigh(locations->GetTemp(2).AsRegister<Register>(), + locations->InAt(1).AsFpuRegister<FRegister>()); } codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pA64Store), instruction, @@ -4536,14 +4611,12 @@ void InstructionCodeGeneratorMIPS::VisitReturnVoid(HReturnVoid* ret ATTRIBUTE_UN codegen_->GenerateFrameExit(); } -void LocationsBuilderMIPS::VisitRor(HRor* ror ATTRIBUTE_UNUSED) { - LOG(FATAL) << "Unreachable"; - UNREACHABLE(); +void LocationsBuilderMIPS::VisitRor(HRor* ror) { + HandleShift(ror); } -void InstructionCodeGeneratorMIPS::VisitRor(HRor* ror ATTRIBUTE_UNUSED) { - LOG(FATAL) << "Unreachable"; - UNREACHABLE(); +void InstructionCodeGeneratorMIPS::VisitRor(HRor* ror) { + HandleShift(ror); } void LocationsBuilderMIPS::VisitShl(HShl* shl) { @@ -4731,6 +4804,7 @@ void LocationsBuilderMIPS::VisitTypeConversion(HTypeConversion* conversion) { Primitive::Type input_type = conversion->GetInputType(); Primitive::Type result_type = conversion->GetResultType(); DCHECK_NE(input_type, result_type); + bool isR6 = codegen_->GetInstructionSetFeatures().IsR6(); if ((input_type == Primitive::kPrimNot) || (input_type == Primitive::kPrimVoid) || (result_type == Primitive::kPrimNot) || (result_type == Primitive::kPrimVoid)) { @@ -4738,8 +4812,9 @@ void LocationsBuilderMIPS::VisitTypeConversion(HTypeConversion* conversion) { } LocationSummary::CallKind call_kind = LocationSummary::kNoCall; - if ((Primitive::IsFloatingPointType(result_type) && input_type == Primitive::kPrimLong) || - (Primitive::IsIntegralType(result_type) && Primitive::IsFloatingPointType(input_type))) { + if (!isR6 && + ((Primitive::IsFloatingPointType(result_type) && input_type == Primitive::kPrimLong) || + (result_type == Primitive::kPrimLong && Primitive::IsFloatingPointType(input_type)))) { call_kind = LocationSummary::kCall; } @@ -4777,6 +4852,8 @@ void InstructionCodeGeneratorMIPS::VisitTypeConversion(HTypeConversion* conversi Primitive::Type result_type = conversion->GetResultType(); Primitive::Type input_type = conversion->GetInputType(); bool has_sign_extension = codegen_->GetInstructionSetFeatures().IsMipsIsaRevGreaterThanEqual2(); + bool isR6 = codegen_->GetInstructionSetFeatures().IsR6(); + bool fpu_32bit = codegen_->GetInstructionSetFeatures().Is32BitFloatingPoint(); DCHECK_NE(input_type, result_type); @@ -4822,7 +4899,37 @@ void InstructionCodeGeneratorMIPS::VisitTypeConversion(HTypeConversion* conversi << " to " << result_type; } } else if (Primitive::IsFloatingPointType(result_type) && Primitive::IsIntegralType(input_type)) { - if (input_type != Primitive::kPrimLong) { + if (input_type == Primitive::kPrimLong) { + if (isR6) { + // cvt.s.l/cvt.d.l requires MIPSR2+ with FR=1. MIPS32R6 is implemented as a secondary + // architecture on top of MIPS64R6, which has FR=1, and therefore can use the instruction. + Register src_high = locations->InAt(0).AsRegisterPairHigh<Register>(); + Register src_low = locations->InAt(0).AsRegisterPairLow<Register>(); + FRegister dst = locations->Out().AsFpuRegister<FRegister>(); + __ Mtc1(src_low, FTMP); + __ Mthc1(src_high, FTMP); + if (result_type == Primitive::kPrimFloat) { + __ Cvtsl(dst, FTMP); + } else { + __ Cvtdl(dst, FTMP); + } + } else { + int32_t entry_offset = (result_type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pL2f) + : QUICK_ENTRY_POINT(pL2d); + bool direct = (result_type == Primitive::kPrimFloat) ? IsDirectEntrypoint(kQuickL2f) + : IsDirectEntrypoint(kQuickL2d); + codegen_->InvokeRuntime(entry_offset, + conversion, + conversion->GetDexPc(), + nullptr, + direct); + if (result_type == Primitive::kPrimFloat) { + CheckEntrypointTypes<kQuickL2f, float, int64_t>(); + } else { + CheckEntrypointTypes<kQuickL2d, double, int64_t>(); + } + } + } else { Register src = locations->InAt(0).AsRegister<Register>(); FRegister dst = locations->Out().AsFpuRegister<FRegister>(); __ Mtc1(src, FTMP); @@ -4831,54 +4938,168 @@ void InstructionCodeGeneratorMIPS::VisitTypeConversion(HTypeConversion* conversi } else { __ Cvtdw(dst, FTMP); } - } else { - int32_t entry_offset = (result_type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pL2f) - : QUICK_ENTRY_POINT(pL2d); - bool direct = (result_type == Primitive::kPrimFloat) ? IsDirectEntrypoint(kQuickL2f) - : IsDirectEntrypoint(kQuickL2d); - codegen_->InvokeRuntime(entry_offset, - conversion, - conversion->GetDexPc(), - nullptr, - direct); - if (result_type == Primitive::kPrimFloat) { - CheckEntrypointTypes<kQuickL2f, float, int64_t>(); - } else { - CheckEntrypointTypes<kQuickL2d, double, int64_t>(); - } } } else if (Primitive::IsIntegralType(result_type) && Primitive::IsFloatingPointType(input_type)) { CHECK(result_type == Primitive::kPrimInt || result_type == Primitive::kPrimLong); - int32_t entry_offset; - bool direct; - if (result_type != Primitive::kPrimLong) { - entry_offset = (input_type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pF2iz) - : QUICK_ENTRY_POINT(pD2iz); - direct = (result_type == Primitive::kPrimFloat) ? IsDirectEntrypoint(kQuickF2iz) - : IsDirectEntrypoint(kQuickD2iz); + if (result_type == Primitive::kPrimLong) { + if (isR6) { + // trunc.l.s/trunc.l.d requires MIPSR2+ with FR=1. MIPS32R6 is implemented as a secondary + // architecture on top of MIPS64R6, which has FR=1, and therefore can use the instruction. + FRegister src = locations->InAt(0).AsFpuRegister<FRegister>(); + Register dst_high = locations->Out().AsRegisterPairHigh<Register>(); + Register dst_low = locations->Out().AsRegisterPairLow<Register>(); + MipsLabel truncate; + MipsLabel done; + + // When NAN2008=0 (R2 and before), the truncate instruction produces the maximum positive + // value when the input is either a NaN or is outside of the range of the output type + // after the truncation. IOW, the three special cases (NaN, too small, too big) produce + // the same result. + // + // When NAN2008=1 (R6), the truncate instruction caps the output at the minimum/maximum + // value of the output type if the input is outside of the range after the truncation or + // produces 0 when the input is a NaN. IOW, the three special cases produce three distinct + // results. This matches the desired float/double-to-int/long conversion exactly. + // + // So, NAN2008 affects handling of negative values and NaNs by the truncate instruction. + // + // The following code supports both NAN2008=0 and NAN2008=1 behaviors of the truncate + // instruction, the reason being that the emulator implements NAN2008=0 on MIPS64R6, + // even though it must be NAN2008=1 on R6. + // + // The code takes care of the different behaviors by first comparing the input to the + // minimum output value (-2**-63 for truncating to long, -2**-31 for truncating to int). + // If the input is greater than or equal to the minimum, it procedes to the truncate + // instruction, which will handle such an input the same way irrespective of NAN2008. + // Otherwise the input is compared to itself to determine whether it is a NaN or not + // in order to return either zero or the minimum value. + // + // TODO: simplify this when the emulator correctly implements NAN2008=1 behavior of the + // truncate instruction for MIPS64R6. + if (input_type == Primitive::kPrimFloat) { + uint32_t min_val = bit_cast<uint32_t, float>(std::numeric_limits<int64_t>::min()); + __ LoadConst32(TMP, min_val); + __ Mtc1(TMP, FTMP); + __ CmpLeS(FTMP, FTMP, src); + } else { + uint64_t min_val = bit_cast<uint64_t, double>(std::numeric_limits<int64_t>::min()); + __ LoadConst32(TMP, High32Bits(min_val)); + __ Mtc1(ZERO, FTMP); + __ Mthc1(TMP, FTMP); + __ CmpLeD(FTMP, FTMP, src); + } + + __ Bc1nez(FTMP, &truncate); + + if (input_type == Primitive::kPrimFloat) { + __ CmpEqS(FTMP, src, src); + } else { + __ CmpEqD(FTMP, src, src); + } + __ Move(dst_low, ZERO); + __ LoadConst32(dst_high, std::numeric_limits<int32_t>::min()); + __ Mfc1(TMP, FTMP); + __ And(dst_high, dst_high, TMP); + + __ B(&done); + + __ Bind(&truncate); + + if (input_type == Primitive::kPrimFloat) { + __ TruncLS(FTMP, src); + } else { + __ TruncLD(FTMP, src); + } + __ Mfc1(dst_low, FTMP); + __ Mfhc1(dst_high, FTMP); + + __ Bind(&done); + } else { + int32_t entry_offset = (input_type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pF2l) + : QUICK_ENTRY_POINT(pD2l); + bool direct = (result_type == Primitive::kPrimFloat) ? IsDirectEntrypoint(kQuickF2l) + : IsDirectEntrypoint(kQuickD2l); + codegen_->InvokeRuntime(entry_offset, conversion, conversion->GetDexPc(), nullptr, direct); + if (input_type == Primitive::kPrimFloat) { + CheckEntrypointTypes<kQuickF2l, int64_t, float>(); + } else { + CheckEntrypointTypes<kQuickD2l, int64_t, double>(); + } + } } else { - entry_offset = (input_type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pF2l) - : QUICK_ENTRY_POINT(pD2l); - direct = (result_type == Primitive::kPrimFloat) ? IsDirectEntrypoint(kQuickF2l) - : IsDirectEntrypoint(kQuickD2l); - } - codegen_->InvokeRuntime(entry_offset, - conversion, - conversion->GetDexPc(), - nullptr, - direct); - if (result_type != Primitive::kPrimLong) { + FRegister src = locations->InAt(0).AsFpuRegister<FRegister>(); + Register dst = locations->Out().AsRegister<Register>(); + MipsLabel truncate; + MipsLabel done; + + // The following code supports both NAN2008=0 and NAN2008=1 behaviors of the truncate + // instruction, the reason being that the emulator implements NAN2008=0 on MIPS64R6, + // even though it must be NAN2008=1 on R6. + // + // For details see the large comment above for the truncation of float/double to long on R6. + // + // TODO: simplify this when the emulator correctly implements NAN2008=1 behavior of the + // truncate instruction for MIPS64R6. if (input_type == Primitive::kPrimFloat) { - CheckEntrypointTypes<kQuickF2iz, int32_t, float>(); + uint32_t min_val = bit_cast<uint32_t, float>(std::numeric_limits<int32_t>::min()); + __ LoadConst32(TMP, min_val); + __ Mtc1(TMP, FTMP); } else { - CheckEntrypointTypes<kQuickD2iz, int32_t, double>(); + uint64_t min_val = bit_cast<uint64_t, double>(std::numeric_limits<int32_t>::min()); + __ LoadConst32(TMP, High32Bits(min_val)); + __ Mtc1(ZERO, FTMP); + if (fpu_32bit) { + __ Mtc1(TMP, static_cast<FRegister>(FTMP + 1)); + } else { + __ Mthc1(TMP, FTMP); + } } - } else { + + if (isR6) { + if (input_type == Primitive::kPrimFloat) { + __ CmpLeS(FTMP, FTMP, src); + } else { + __ CmpLeD(FTMP, FTMP, src); + } + __ Bc1nez(FTMP, &truncate); + + if (input_type == Primitive::kPrimFloat) { + __ CmpEqS(FTMP, src, src); + } else { + __ CmpEqD(FTMP, src, src); + } + __ LoadConst32(dst, std::numeric_limits<int32_t>::min()); + __ Mfc1(TMP, FTMP); + __ And(dst, dst, TMP); + } else { + if (input_type == Primitive::kPrimFloat) { + __ ColeS(0, FTMP, src); + } else { + __ ColeD(0, FTMP, src); + } + __ Bc1t(0, &truncate); + + if (input_type == Primitive::kPrimFloat) { + __ CeqS(0, src, src); + } else { + __ CeqD(0, src, src); + } + __ LoadConst32(dst, std::numeric_limits<int32_t>::min()); + __ Movf(dst, ZERO, 0); + } + + __ B(&done); + + __ Bind(&truncate); + if (input_type == Primitive::kPrimFloat) { - CheckEntrypointTypes<kQuickF2l, int64_t, float>(); + __ TruncWS(FTMP, src); } else { - CheckEntrypointTypes<kQuickD2l, int64_t, double>(); + __ TruncWD(FTMP, src); } + __ Mfc1(dst, FTMP); + + __ Bind(&done); } } else if (Primitive::IsFloatingPointType(result_type) && Primitive::IsFloatingPointType(input_type)) { diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h index 38302ad315..c3d4851ee9 100644 --- a/compiler/optimizing/code_generator_mips.h +++ b/compiler/optimizing/code_generator_mips.h @@ -197,7 +197,7 @@ class LocationsBuilderMIPS : public HGraphVisitor { DISALLOW_COPY_AND_ASSIGN(LocationsBuilderMIPS); }; -class InstructionCodeGeneratorMIPS : public HGraphVisitor { +class InstructionCodeGeneratorMIPS : public InstructionCodeGenerator { public: InstructionCodeGeneratorMIPS(HGraph* graph, CodeGeneratorMIPS* codegen); diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc index 05834ff063..38c32cad06 100644 --- a/compiler/optimizing/code_generator_mips64.cc +++ b/compiler/optimizing/code_generator_mips64.cc @@ -391,24 +391,24 @@ class TypeCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 { class DeoptimizationSlowPathMIPS64 : public SlowPathCodeMIPS64 { public: - explicit DeoptimizationSlowPathMIPS64(HInstruction* instruction) + explicit DeoptimizationSlowPathMIPS64(HDeoptimize* instruction) : instruction_(instruction) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, instruction_->GetLocations()); - DCHECK(instruction_->IsDeoptimize()); - HDeoptimize* deoptimize = instruction_->AsDeoptimize(); - uint32_t dex_pc = deoptimize->GetDexPc(); - CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen); - mips64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize), instruction_, dex_pc, this); + mips64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize), + instruction_, + instruction_->GetDexPc(), + this); CheckEntrypointTypes<kQuickDeoptimize, void, void>(); } const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathMIPS64"; } private: - HInstruction* const instruction_; + HDeoptimize* const instruction_; DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathMIPS64); }; @@ -1113,7 +1113,7 @@ void InstructionCodeGeneratorMIPS64::GenerateSuspendCheck(HSuspendCheck* instruc InstructionCodeGeneratorMIPS64::InstructionCodeGeneratorMIPS64(HGraph* graph, CodeGeneratorMIPS64* codegen) - : HGraphVisitor(graph), + : InstructionCodeGenerator(graph, codegen), assembler_(codegen->GetAssembler()), codegen_(codegen) {} @@ -1247,7 +1247,7 @@ void InstructionCodeGeneratorMIPS64::HandleBinaryOp(HBinaryOperation* instructio } void LocationsBuilderMIPS64::HandleShift(HBinaryOperation* instr) { - DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr()); + DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr() || instr->IsRor()); LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr); Primitive::Type type = instr->GetResultType(); @@ -1265,7 +1265,7 @@ void LocationsBuilderMIPS64::HandleShift(HBinaryOperation* instr) { } void InstructionCodeGeneratorMIPS64::HandleShift(HBinaryOperation* instr) { - DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr()); + DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr() || instr->IsRor()); LocationSummary* locations = instr->GetLocations(); Primitive::Type type = instr->GetType(); @@ -1290,13 +1290,19 @@ void InstructionCodeGeneratorMIPS64::HandleShift(HBinaryOperation* instr) { ? static_cast<uint32_t>(rhs_imm & kMaxIntShiftValue) : static_cast<uint32_t>(rhs_imm & kMaxLongShiftValue); - if (type == Primitive::kPrimInt) { + if (shift_value == 0) { + if (dst != lhs) { + __ Move(dst, lhs); + } + } else if (type == Primitive::kPrimInt) { if (instr->IsShl()) { __ Sll(dst, lhs, shift_value); } else if (instr->IsShr()) { __ Sra(dst, lhs, shift_value); - } else { + } else if (instr->IsUShr()) { __ Srl(dst, lhs, shift_value); + } else { + __ Rotr(dst, lhs, shift_value); } } else { if (shift_value < 32) { @@ -1304,8 +1310,10 @@ void InstructionCodeGeneratorMIPS64::HandleShift(HBinaryOperation* instr) { __ Dsll(dst, lhs, shift_value); } else if (instr->IsShr()) { __ Dsra(dst, lhs, shift_value); - } else { + } else if (instr->IsUShr()) { __ Dsrl(dst, lhs, shift_value); + } else { + __ Drotr(dst, lhs, shift_value); } } else { shift_value -= 32; @@ -1313,8 +1321,10 @@ void InstructionCodeGeneratorMIPS64::HandleShift(HBinaryOperation* instr) { __ Dsll32(dst, lhs, shift_value); } else if (instr->IsShr()) { __ Dsra32(dst, lhs, shift_value); - } else { + } else if (instr->IsUShr()) { __ Dsrl32(dst, lhs, shift_value); + } else { + __ Drotr32(dst, lhs, shift_value); } } } @@ -1324,16 +1334,20 @@ void InstructionCodeGeneratorMIPS64::HandleShift(HBinaryOperation* instr) { __ Sllv(dst, lhs, rhs_reg); } else if (instr->IsShr()) { __ Srav(dst, lhs, rhs_reg); - } else { + } else if (instr->IsUShr()) { __ Srlv(dst, lhs, rhs_reg); + } else { + __ Rotrv(dst, lhs, rhs_reg); } } else { if (instr->IsShl()) { __ Dsllv(dst, lhs, rhs_reg); } else if (instr->IsShr()) { __ Dsrav(dst, lhs, rhs_reg); - } else { + } else if (instr->IsUShr()) { __ Dsrlv(dst, lhs, rhs_reg); + } else { + __ Drotrv(dst, lhs, rhs_reg); } } } @@ -1955,8 +1969,7 @@ void InstructionCodeGeneratorMIPS64::DivRemByPowerOfTwo(HBinaryOperation* instru GpuRegister out = locations->Out().AsRegister<GpuRegister>(); GpuRegister dividend = locations->InAt(0).AsRegister<GpuRegister>(); int64_t imm = Int64FromConstant(second.GetConstant()); - uint64_t abs_imm = static_cast<uint64_t>(std::abs(imm)); - DCHECK(IsPowerOfTwo(abs_imm)); + uint64_t abs_imm = static_cast<uint64_t>(AbsOrMin(imm)); int ctz_imm = CTZ(abs_imm); if (instruction->IsDiv()) { @@ -2138,7 +2151,7 @@ void InstructionCodeGeneratorMIPS64::GenerateDivRemIntegral(HBinaryOperation* in // Do not generate anything. DivZeroCheck would prevent any code to be executed. } else if (imm == 1 || imm == -1) { DivRemOneOrMinusOne(instruction); - } else if (IsPowerOfTwo(std::abs(imm))) { + } else if (IsPowerOfTwo(AbsOrMin(imm))) { DivRemByPowerOfTwo(instruction); } else { DCHECK(imm <= -2 || imm >= 2); @@ -2736,9 +2749,8 @@ void LocationsBuilderMIPS64::VisitDeoptimize(HDeoptimize* deoptimize) { } void InstructionCodeGeneratorMIPS64::VisitDeoptimize(HDeoptimize* deoptimize) { - SlowPathCodeMIPS64* slow_path = new (GetGraph()->GetArena()) - DeoptimizationSlowPathMIPS64(deoptimize); - codegen_->AddSlowPath(slow_path); + SlowPathCodeMIPS64* slow_path = + deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathMIPS64>(deoptimize); GenerateTestAndBranch(deoptimize, /* condition_input_index */ 0, slow_path->GetEntryLabel(), @@ -2750,6 +2762,10 @@ void LocationsBuilderMIPS64::VisitNativeDebugInfo(HNativeDebugInfo* info) { } void InstructionCodeGeneratorMIPS64::VisitNativeDebugInfo(HNativeDebugInfo* info) { + if (codegen_->HasStackMapAtCurrentPc()) { + // Ensure that we do not collide with the stack map of the previous instruction. + __ Nop(); + } codegen_->RecordPcInfo(info, info->GetDexPc()); } @@ -3722,14 +3738,12 @@ void InstructionCodeGeneratorMIPS64::VisitReturnVoid(HReturnVoid* ret ATTRIBUTE_ codegen_->GenerateFrameExit(); } -void LocationsBuilderMIPS64::VisitRor(HRor* ror ATTRIBUTE_UNUSED) { - LOG(FATAL) << "Unreachable"; - UNREACHABLE(); +void LocationsBuilderMIPS64::VisitRor(HRor* ror) { + HandleShift(ror); } -void InstructionCodeGeneratorMIPS64::VisitRor(HRor* ror ATTRIBUTE_UNUSED) { - LOG(FATAL) << "Unreachable"; - UNREACHABLE(); +void InstructionCodeGeneratorMIPS64::VisitRor(HRor* ror) { + HandleShift(ror); } void LocationsBuilderMIPS64::VisitShl(HShl* shl) { @@ -3918,36 +3932,18 @@ void LocationsBuilderMIPS64::VisitTypeConversion(HTypeConversion* conversion) { LOG(FATAL) << "Unexpected type conversion from " << input_type << " to " << result_type; } - LocationSummary::CallKind call_kind = LocationSummary::kNoCall; - if ((Primitive::IsFloatingPointType(result_type) && input_type == Primitive::kPrimLong) || - (Primitive::IsIntegralType(result_type) && Primitive::IsFloatingPointType(input_type))) { - call_kind = LocationSummary::kCall; - } - - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(conversion, call_kind); + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(conversion); - if (call_kind == LocationSummary::kNoCall) { - if (Primitive::IsFloatingPointType(input_type)) { - locations->SetInAt(0, Location::RequiresFpuRegister()); - } else { - locations->SetInAt(0, Location::RequiresRegister()); - } - - if (Primitive::IsFloatingPointType(result_type)) { - locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); - } else { - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); - } + if (Primitive::IsFloatingPointType(input_type)) { + locations->SetInAt(0, Location::RequiresFpuRegister()); } else { - InvokeRuntimeCallingConvention calling_convention; - - if (Primitive::IsFloatingPointType(input_type)) { - locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0))); - } else { - locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - } + locations->SetInAt(0, Location::RequiresRegister()); + } - locations->SetOut(calling_convention.GetReturnLocation(result_type)); + if (Primitive::IsFloatingPointType(result_type)) { + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); + } else { + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); } } @@ -3992,55 +3988,107 @@ void InstructionCodeGeneratorMIPS64::VisitTypeConversion(HTypeConversion* conver << " to " << result_type; } } else if (Primitive::IsFloatingPointType(result_type) && Primitive::IsIntegralType(input_type)) { - if (input_type != Primitive::kPrimLong) { - FpuRegister dst = locations->Out().AsFpuRegister<FpuRegister>(); - GpuRegister src = locations->InAt(0).AsRegister<GpuRegister>(); - __ Mtc1(src, FTMP); + FpuRegister dst = locations->Out().AsFpuRegister<FpuRegister>(); + GpuRegister src = locations->InAt(0).AsRegister<GpuRegister>(); + if (input_type == Primitive::kPrimLong) { + __ Dmtc1(src, FTMP); if (result_type == Primitive::kPrimFloat) { - __ Cvtsw(dst, FTMP); + __ Cvtsl(dst, FTMP); } else { - __ Cvtdw(dst, FTMP); + __ Cvtdl(dst, FTMP); } } else { - int32_t entry_offset = (result_type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pL2f) - : QUICK_ENTRY_POINT(pL2d); - codegen_->InvokeRuntime(entry_offset, - conversion, - conversion->GetDexPc(), - nullptr); + __ Mtc1(src, FTMP); if (result_type == Primitive::kPrimFloat) { - CheckEntrypointTypes<kQuickL2f, float, int64_t>(); + __ Cvtsw(dst, FTMP); } else { - CheckEntrypointTypes<kQuickL2d, double, int64_t>(); + __ Cvtdw(dst, FTMP); } } } else if (Primitive::IsIntegralType(result_type) && Primitive::IsFloatingPointType(input_type)) { CHECK(result_type == Primitive::kPrimInt || result_type == Primitive::kPrimLong); - int32_t entry_offset; - if (result_type != Primitive::kPrimLong) { - entry_offset = (input_type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pF2iz) - : QUICK_ENTRY_POINT(pD2iz); + GpuRegister dst = locations->Out().AsRegister<GpuRegister>(); + FpuRegister src = locations->InAt(0).AsFpuRegister<FpuRegister>(); + Mips64Label truncate; + Mips64Label done; + + // When NAN2008=0 (R2 and before), the truncate instruction produces the maximum positive + // value when the input is either a NaN or is outside of the range of the output type + // after the truncation. IOW, the three special cases (NaN, too small, too big) produce + // the same result. + // + // When NAN2008=1 (R6), the truncate instruction caps the output at the minimum/maximum + // value of the output type if the input is outside of the range after the truncation or + // produces 0 when the input is a NaN. IOW, the three special cases produce three distinct + // results. This matches the desired float/double-to-int/long conversion exactly. + // + // So, NAN2008 affects handling of negative values and NaNs by the truncate instruction. + // + // The following code supports both NAN2008=0 and NAN2008=1 behaviors of the truncate + // instruction, the reason being that the emulator implements NAN2008=0 on MIPS64R6, + // even though it must be NAN2008=1 on R6. + // + // The code takes care of the different behaviors by first comparing the input to the + // minimum output value (-2**-63 for truncating to long, -2**-31 for truncating to int). + // If the input is greater than or equal to the minimum, it procedes to the truncate + // instruction, which will handle such an input the same way irrespective of NAN2008. + // Otherwise the input is compared to itself to determine whether it is a NaN or not + // in order to return either zero or the minimum value. + // + // TODO: simplify this when the emulator correctly implements NAN2008=1 behavior of the + // truncate instruction for MIPS64R6. + if (input_type == Primitive::kPrimFloat) { + uint32_t min_val = (result_type == Primitive::kPrimLong) + ? bit_cast<uint32_t, float>(std::numeric_limits<int64_t>::min()) + : bit_cast<uint32_t, float>(std::numeric_limits<int32_t>::min()); + __ LoadConst32(TMP, min_val); + __ Mtc1(TMP, FTMP); + __ CmpLeS(FTMP, FTMP, src); } else { - entry_offset = (input_type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pF2l) - : QUICK_ENTRY_POINT(pD2l); + uint64_t min_val = (result_type == Primitive::kPrimLong) + ? bit_cast<uint64_t, double>(std::numeric_limits<int64_t>::min()) + : bit_cast<uint64_t, double>(std::numeric_limits<int32_t>::min()); + __ LoadConst64(TMP, min_val); + __ Dmtc1(TMP, FTMP); + __ CmpLeD(FTMP, FTMP, src); } - codegen_->InvokeRuntime(entry_offset, - conversion, - conversion->GetDexPc(), - nullptr); - if (result_type != Primitive::kPrimLong) { + + __ Bc1nez(FTMP, &truncate); + + if (input_type == Primitive::kPrimFloat) { + __ CmpEqS(FTMP, src, src); + } else { + __ CmpEqD(FTMP, src, src); + } + if (result_type == Primitive::kPrimLong) { + __ LoadConst64(dst, std::numeric_limits<int64_t>::min()); + } else { + __ LoadConst32(dst, std::numeric_limits<int32_t>::min()); + } + __ Mfc1(TMP, FTMP); + __ And(dst, dst, TMP); + + __ Bc(&done); + + __ Bind(&truncate); + + if (result_type == Primitive::kPrimLong) { if (input_type == Primitive::kPrimFloat) { - CheckEntrypointTypes<kQuickF2iz, int32_t, float>(); + __ TruncLS(FTMP, src); } else { - CheckEntrypointTypes<kQuickD2iz, int32_t, double>(); + __ TruncLD(FTMP, src); } + __ Dmfc1(dst, FTMP); } else { if (input_type == Primitive::kPrimFloat) { - CheckEntrypointTypes<kQuickF2l, int64_t, float>(); + __ TruncWS(FTMP, src); } else { - CheckEntrypointTypes<kQuickD2l, int64_t, double>(); + __ TruncWD(FTMP, src); } + __ Mfc1(dst, FTMP); } + + __ Bind(&done); } else if (Primitive::IsFloatingPointType(result_type) && Primitive::IsFloatingPointType(input_type)) { FpuRegister dst = locations->Out().AsFpuRegister<FpuRegister>(); diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h index 60ff96dc43..7182e8e987 100644 --- a/compiler/optimizing/code_generator_mips64.h +++ b/compiler/optimizing/code_generator_mips64.h @@ -201,7 +201,7 @@ class LocationsBuilderMIPS64 : public HGraphVisitor { DISALLOW_COPY_AND_ASSIGN(LocationsBuilderMIPS64); }; -class InstructionCodeGeneratorMIPS64 : public HGraphVisitor { +class InstructionCodeGeneratorMIPS64 : public InstructionCodeGenerator { public: InstructionCodeGeneratorMIPS64(HGraph* graph, CodeGeneratorMIPS64* codegen); diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 86327fb741..6ab3aaff4b 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -365,11 +365,10 @@ class TypeCheckSlowPathX86 : public SlowPathCode { class DeoptimizationSlowPathX86 : public SlowPathCode { public: - explicit DeoptimizationSlowPathX86(HInstruction* instruction) + explicit DeoptimizationSlowPathX86(HDeoptimize* instruction) : instruction_(instruction) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { - DCHECK(instruction_->IsDeoptimize()); CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, instruction_->GetLocations()); @@ -383,7 +382,7 @@ class DeoptimizationSlowPathX86 : public SlowPathCode { const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathX86"; } private: - HInstruction* const instruction_; + HDeoptimize* const instruction_; DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathX86); }; @@ -892,7 +891,7 @@ void CodeGeneratorX86::UpdateBlockedPairRegisters() const { } InstructionCodeGeneratorX86::InstructionCodeGeneratorX86(HGraph* graph, CodeGeneratorX86* codegen) - : HGraphVisitor(graph), + : InstructionCodeGenerator(graph, codegen), assembler_(codegen->GetAssembler()), codegen_(codegen) {} @@ -1611,9 +1610,7 @@ void LocationsBuilderX86::VisitDeoptimize(HDeoptimize* deoptimize) { } void InstructionCodeGeneratorX86::VisitDeoptimize(HDeoptimize* deoptimize) { - SlowPathCode* slow_path = new (GetGraph()->GetArena()) - DeoptimizationSlowPathX86(deoptimize); - codegen_->AddSlowPath(slow_path); + SlowPathCode* slow_path = deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathX86>(deoptimize); GenerateTestAndBranch(deoptimize, /* condition_input_index */ 0, slow_path->GetEntryLabel(), @@ -1625,6 +1622,10 @@ void LocationsBuilderX86::VisitNativeDebugInfo(HNativeDebugInfo* info) { } void InstructionCodeGeneratorX86::VisitNativeDebugInfo(HNativeDebugInfo* info) { + if (codegen_->HasStackMapAtCurrentPc()) { + // Ensure that we do not collide with the stack map of the previous instruction. + __ nop(); + } codegen_->RecordPcInfo(info, info->GetDexPc()); } @@ -3223,11 +3224,12 @@ void InstructionCodeGeneratorX86::DivByPowerOfTwo(HDiv* instruction) { Register out_register = locations->Out().AsRegister<Register>(); Register input_register = locations->InAt(0).AsRegister<Register>(); int32_t imm = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); + DCHECK(IsPowerOfTwo(AbsOrMin(imm))); + uint32_t abs_imm = static_cast<uint32_t>(AbsOrMin(imm)); - DCHECK(IsPowerOfTwo(std::abs(imm))); Register num = locations->GetTemp(0).AsRegister<Register>(); - __ leal(num, Address(input_register, std::abs(imm) - 1)); + __ leal(num, Address(input_register, abs_imm - 1)); __ testl(input_register, input_register); __ cmovl(kGreaterEqual, num, input_register); int shift = CTZ(imm); @@ -3340,7 +3342,7 @@ void InstructionCodeGeneratorX86::GenerateDivRemIntegral(HBinaryOperation* instr // Do not generate anything for 0. DivZeroCheck would forbid any generated code. } else if (imm == 1 || imm == -1) { DivRemOneOrMinusOne(instruction); - } else if (is_div && IsPowerOfTwo(std::abs(imm))) { + } else if (is_div && IsPowerOfTwo(AbsOrMin(imm))) { DivByPowerOfTwo(instruction->AsDiv()); } else { DCHECK(imm <= -2 || imm >= 2); diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index df7347658b..c65c423eae 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -178,7 +178,7 @@ class LocationsBuilderX86 : public HGraphVisitor { DISALLOW_COPY_AND_ASSIGN(LocationsBuilderX86); }; -class InstructionCodeGeneratorX86 : public HGraphVisitor { +class InstructionCodeGeneratorX86 : public InstructionCodeGenerator { public: InstructionCodeGeneratorX86(HGraph* graph, CodeGeneratorX86* codegen); diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 76a4ce2e93..294b40e3d4 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -387,18 +387,16 @@ class TypeCheckSlowPathX86_64 : public SlowPathCode { class DeoptimizationSlowPathX86_64 : public SlowPathCode { public: - explicit DeoptimizationSlowPathX86_64(HInstruction* instruction) + explicit DeoptimizationSlowPathX86_64(HDeoptimize* instruction) : instruction_(instruction) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, instruction_->GetLocations()); - DCHECK(instruction_->IsDeoptimize()); - HDeoptimize* deoptimize = instruction_->AsDeoptimize(); x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize), - deoptimize, - deoptimize->GetDexPc(), + instruction_, + instruction_->GetDexPc(), this); CheckEntrypointTypes<kQuickDeoptimize, void, void>(); } @@ -406,7 +404,7 @@ class DeoptimizationSlowPathX86_64 : public SlowPathCode { const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathX86_64"; } private: - HInstruction* const instruction_; + HDeoptimize* const instruction_; DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathX86_64); }; @@ -1000,7 +998,7 @@ CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph, InstructionCodeGeneratorX86_64::InstructionCodeGeneratorX86_64(HGraph* graph, CodeGeneratorX86_64* codegen) - : HGraphVisitor(graph), + : InstructionCodeGenerator(graph, codegen), assembler_(codegen->GetAssembler()), codegen_(codegen) {} @@ -1594,9 +1592,7 @@ void LocationsBuilderX86_64::VisitDeoptimize(HDeoptimize* deoptimize) { } void InstructionCodeGeneratorX86_64::VisitDeoptimize(HDeoptimize* deoptimize) { - SlowPathCode* slow_path = new (GetGraph()->GetArena()) - DeoptimizationSlowPathX86_64(deoptimize); - codegen_->AddSlowPath(slow_path); + SlowPathCode* slow_path = deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathX86_64>(deoptimize); GenerateTestAndBranch(deoptimize, /* condition_input_index */ 0, slow_path->GetEntryLabel(), @@ -1608,6 +1604,10 @@ void LocationsBuilderX86_64::VisitNativeDebugInfo(HNativeDebugInfo* info) { } void InstructionCodeGeneratorX86_64::VisitNativeDebugInfo(HNativeDebugInfo* info) { + if (codegen_->HasStackMapAtCurrentPc()) { + // Ensure that we do not collide with the stack map of the previous instruction. + __ nop(); + } codegen_->RecordPcInfo(info, info->GetDexPc()); } @@ -3350,13 +3350,13 @@ void InstructionCodeGeneratorX86_64::DivByPowerOfTwo(HDiv* instruction) { CpuRegister numerator = locations->InAt(0).AsRegister<CpuRegister>(); int64_t imm = Int64FromConstant(second.GetConstant()); - - DCHECK(IsPowerOfTwo(std::abs(imm))); + DCHECK(IsPowerOfTwo(AbsOrMin(imm))); + uint64_t abs_imm = AbsOrMin(imm); CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>(); if (instruction->GetResultType() == Primitive::kPrimInt) { - __ leal(tmp, Address(numerator, std::abs(imm) - 1)); + __ leal(tmp, Address(numerator, abs_imm - 1)); __ testl(numerator, numerator); __ cmov(kGreaterEqual, tmp, numerator); int shift = CTZ(imm); @@ -3371,7 +3371,7 @@ void InstructionCodeGeneratorX86_64::DivByPowerOfTwo(HDiv* instruction) { DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimLong); CpuRegister rdx = locations->GetTemp(0).AsRegister<CpuRegister>(); - codegen_->Load64BitValue(rdx, std::abs(imm) - 1); + codegen_->Load64BitValue(rdx, abs_imm - 1); __ addq(rdx, numerator); __ testq(numerator, numerator); __ cmov(kGreaterEqual, rdx, numerator); @@ -3529,7 +3529,7 @@ void InstructionCodeGeneratorX86_64::GenerateDivRemIntegral(HBinaryOperation* in // Do not generate anything. DivZeroCheck would prevent any code to be executed. } else if (imm == 1 || imm == -1) { DivRemOneOrMinusOne(instruction); - } else if (instruction->IsDiv() && IsPowerOfTwo(std::abs(imm))) { + } else if (instruction->IsDiv() && IsPowerOfTwo(AbsOrMin(imm))) { DivByPowerOfTwo(instruction->AsDiv()); } else { DCHECK(imm <= -2 || imm >= 2); diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index c5e8a04da6..505c9dcdad 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -183,7 +183,7 @@ class LocationsBuilderX86_64 : public HGraphVisitor { DISALLOW_COPY_AND_ASSIGN(LocationsBuilderX86_64); }; -class InstructionCodeGeneratorX86_64 : public HGraphVisitor { +class InstructionCodeGeneratorX86_64 : public InstructionCodeGenerator { public: InstructionCodeGeneratorX86_64(HGraph* graph, CodeGeneratorX86_64* codegen); diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc index c504ded54c..b90afb1d73 100644 --- a/compiler/optimizing/instruction_simplifier.cc +++ b/compiler/optimizing/instruction_simplifier.cc @@ -211,19 +211,6 @@ bool InstructionSimplifierVisitor::ReplaceRotateWithRor(HBinaryOperation* op, // Try to replace a binary operation flanked by one UShr and one Shl with a bitfield rotation. bool InstructionSimplifierVisitor::TryReplaceWithRotate(HBinaryOperation* op) { - // This simplification is currently supported on x86, x86_64, ARM and ARM64. - // TODO: Implement it for MIPS/64. - const InstructionSet instruction_set = GetGraph()->GetInstructionSet(); - switch (instruction_set) { - case kArm: - case kArm64: - case kThumb2: - case kX86: - case kX86_64: - break; - default: - return false; - } DCHECK(op->IsAdd() || op->IsXor() || op->IsOr()); HInstruction* left = op->GetLeft(); HInstruction* right = op->GetRight(); @@ -1261,19 +1248,6 @@ void InstructionSimplifierVisitor::SimplifyStringEquals(HInvoke* instruction) { void InstructionSimplifierVisitor::SimplifyRotate(HInvoke* invoke, bool is_left) { DCHECK(invoke->IsInvokeStaticOrDirect()); DCHECK_EQ(invoke->GetOriginalInvokeType(), InvokeType::kStatic); - // This simplification is currently supported on x86, x86_64, ARM and ARM64. - // TODO: Implement it for MIPS/64. - const InstructionSet instruction_set = GetGraph()->GetInstructionSet(); - switch (instruction_set) { - case kArm: - case kArm64: - case kThumb2: - case kX86: - case kX86_64: - break; - default: - return; - } HInstruction* value = invoke->InputAt(0); HInstruction* distance = invoke->InputAt(1); // Replace the invoke with an HRor. diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc index 4683aee603..b1fbf28204 100644 --- a/compiler/optimizing/intrinsics_arm.cc +++ b/compiler/optimizing/intrinsics_arm.cc @@ -502,9 +502,6 @@ static void GenUnsafeGet(HInvoke* invoke, bool is_volatile, CodeGeneratorARM* codegen) { LocationSummary* locations = invoke->GetLocations(); - DCHECK((type == Primitive::kPrimInt) || - (type == Primitive::kPrimLong) || - (type == Primitive::kPrimNot)); ArmAssembler* assembler = codegen->GetAssembler(); Location base_loc = locations->InAt(1); Register base = base_loc.AsRegister<Register>(); // Object pointer. @@ -512,30 +509,67 @@ static void GenUnsafeGet(HInvoke* invoke, Register offset = offset_loc.AsRegisterPairLow<Register>(); // Long offset, lo part only. Location trg_loc = locations->Out(); - if (type == Primitive::kPrimLong) { - Register trg_lo = trg_loc.AsRegisterPairLow<Register>(); - __ add(IP, base, ShifterOperand(offset)); - if (is_volatile && !codegen->GetInstructionSetFeatures().HasAtomicLdrdAndStrd()) { - Register trg_hi = trg_loc.AsRegisterPairHigh<Register>(); - __ ldrexd(trg_lo, trg_hi, IP); - } else { - __ ldrd(trg_lo, Address(IP)); + switch (type) { + case Primitive::kPrimInt: { + Register trg = trg_loc.AsRegister<Register>(); + __ ldr(trg, Address(base, offset)); + if (is_volatile) { + __ dmb(ISH); + } + break; } - } else { - Register trg = trg_loc.AsRegister<Register>(); - __ ldr(trg, Address(base, offset)); - } - if (is_volatile) { - __ dmb(ISH); - } + case Primitive::kPrimNot: { + Register trg = trg_loc.AsRegister<Register>(); + if (kEmitCompilerReadBarrier) { + if (kUseBakerReadBarrier) { + Location temp = locations->GetTemp(0); + codegen->GenerateArrayLoadWithBakerReadBarrier( + invoke, trg_loc, base, 0U, offset_loc, temp, /* needs_null_check */ false); + if (is_volatile) { + __ dmb(ISH); + } + } else { + __ ldr(trg, Address(base, offset)); + if (is_volatile) { + __ dmb(ISH); + } + codegen->GenerateReadBarrierSlow(invoke, trg_loc, trg_loc, base_loc, 0U, offset_loc); + } + } else { + __ ldr(trg, Address(base, offset)); + if (is_volatile) { + __ dmb(ISH); + } + __ MaybeUnpoisonHeapReference(trg); + } + break; + } - if (type == Primitive::kPrimNot) { - codegen->MaybeGenerateReadBarrier(invoke, trg_loc, trg_loc, base_loc, 0U, offset_loc); + case Primitive::kPrimLong: { + Register trg_lo = trg_loc.AsRegisterPairLow<Register>(); + __ add(IP, base, ShifterOperand(offset)); + if (is_volatile && !codegen->GetInstructionSetFeatures().HasAtomicLdrdAndStrd()) { + Register trg_hi = trg_loc.AsRegisterPairHigh<Register>(); + __ ldrexd(trg_lo, trg_hi, IP); + } else { + __ ldrd(trg_lo, Address(IP)); + } + if (is_volatile) { + __ dmb(ISH); + } + break; + } + + default: + LOG(FATAL) << "Unexpected type " << type; + UNREACHABLE(); } } -static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { +static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, + HInvoke* invoke, + Primitive::Type type) { bool can_call = kEmitCompilerReadBarrier && (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject || invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile); @@ -548,25 +582,30 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke locations->SetInAt(1, Location::RequiresRegister()); locations->SetInAt(2, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // We need a temporary register for the read barrier marking slow + // path in InstructionCodeGeneratorARM::GenerateArrayLoadWithBakerReadBarrier. + locations->AddTemp(Location::RequiresRegister()); + } } void IntrinsicLocationsBuilderARM::VisitUnsafeGet(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke); + CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt); } void IntrinsicLocationsBuilderARM::VisitUnsafeGetVolatile(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke); + CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt); } void IntrinsicLocationsBuilderARM::VisitUnsafeGetLong(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke); + CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong); } void IntrinsicLocationsBuilderARM::VisitUnsafeGetLongVolatile(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke); + CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong); } void IntrinsicLocationsBuilderARM::VisitUnsafeGetObject(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke); + CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot); } void IntrinsicLocationsBuilderARM::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke); + CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot); } void IntrinsicCodeGeneratorARM::VisitUnsafeGet(HInvoke* invoke) { @@ -808,6 +847,9 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat } // Prevent reordering with prior memory operations. + // Emit a DMB ISH instruction instead of an DMB ISHST one, as the + // latter allows a preceding load to be delayed past the STXR + // instruction below. __ dmb(ISH); __ add(tmp_ptr, base, ShifterOperand(offset)); diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index f723940444..81cab86c83 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -1035,7 +1035,11 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat __ Stlxr(tmp_32, value, MemOperand(tmp_ptr)); __ Cbnz(tmp_32, &loop_head); } else { - __ Dmb(InnerShareable, BarrierWrites); + // Emit a `Dmb(InnerShareable, BarrierAll)` (DMB ISH) instruction + // instead of a `Dmb(InnerShareable, BarrierWrites)` (DMB ISHST) + // one, as the latter allows a preceding load to be delayed past + // the STXR instruction below. + __ Dmb(InnerShareable, BarrierAll); __ Bind(&loop_head); // TODO: When `type == Primitive::kPrimNot`, add a read barrier for // the reference stored in the object before attempting the CAS, diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc index 06fab616ad..bc126a2716 100644 --- a/compiler/optimizing/intrinsics_mips.cc +++ b/compiler/optimizing/intrinsics_mips.cc @@ -43,14 +43,18 @@ ArenaAllocator* IntrinsicCodeGeneratorMIPS::GetAllocator() { return codegen_->GetGraph()->GetArena(); } -inline bool IntrinsicCodeGeneratorMIPS::IsR2OrNewer() { +inline bool IntrinsicCodeGeneratorMIPS::IsR2OrNewer() const { return codegen_->GetInstructionSetFeatures().IsMipsIsaRevGreaterThanEqual2(); } -inline bool IntrinsicCodeGeneratorMIPS::IsR6() { +inline bool IntrinsicCodeGeneratorMIPS::IsR6() const { return codegen_->GetInstructionSetFeatures().IsR6(); } +inline bool IntrinsicCodeGeneratorMIPS::Is32BitFPU() const { + return codegen_->GetInstructionSetFeatures().Is32BitFloatingPoint(); +} + #define __ codegen->GetAssembler()-> static void MoveFromReturnRegister(Location trg, @@ -162,7 +166,7 @@ static void MoveFPToInt(LocationSummary* locations, bool is64bit, MipsAssembler* Register out_hi = locations->Out().AsRegisterPairHigh<Register>(); __ Mfc1(out_lo, in); - __ Mfhc1(out_hi, in); + __ MoveFromFpuHigh(out_hi, in); } else { Register out = locations->Out().AsRegister<Register>(); @@ -204,7 +208,7 @@ static void MoveIntToFP(LocationSummary* locations, bool is64bit, MipsAssembler* Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>(); __ Mtc1(in_lo, out); - __ Mthc1(in_hi, out); + __ MoveToFpuHigh(in_hi, out); } else { Register in = locations->InAt(0).AsRegister<Register>(); diff --git a/compiler/optimizing/intrinsics_mips.h b/compiler/optimizing/intrinsics_mips.h index f86b0efe4a..575a7d0a23 100644 --- a/compiler/optimizing/intrinsics_mips.h +++ b/compiler/optimizing/intrinsics_mips.h @@ -67,8 +67,9 @@ INTRINSICS_LIST(OPTIMIZING_INTRINSICS) #undef INTRINSICS_LIST #undef OPTIMIZING_INTRINSICS - bool IsR2OrNewer(void); - bool IsR6(void); + bool IsR2OrNewer() const; + bool IsR6() const; + bool Is32BitFPU() const; private: MipsAssembler* GetAssembler(); diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index f5a7048b01..b80c6bde82 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -2183,10 +2183,7 @@ void HInvoke::SetIntrinsic(Intrinsics intrinsic, IntrinsicExceptions exceptions) { intrinsic_ = intrinsic; IntrinsicOptimizations opt(this); - if (needs_env_or_cache == kNoEnvironmentOrCache) { - opt.SetDoesNotNeedDexCache(); - opt.SetDoesNotNeedEnvironment(); - } + // Adjust method's side effects from intrinsic table. switch (side_effects) { case kNoSideEffects: SetSideEffects(SideEffects::None()); break; @@ -2194,6 +2191,14 @@ void HInvoke::SetIntrinsic(Intrinsics intrinsic, case kWriteSideEffects: SetSideEffects(SideEffects::AllWrites()); break; case kAllSideEffects: SetSideEffects(SideEffects::AllExceptGCDependency()); break; } + + if (needs_env_or_cache == kNoEnvironmentOrCache) { + opt.SetDoesNotNeedDexCache(); + opt.SetDoesNotNeedEnvironment(); + } else { + // If we need an environment, that means there will be a call, which can trigger GC. + SetSideEffects(GetSideEffects().Union(SideEffects::CanTriggerGC())); + } // Adjust method's exception status from intrinsic table. switch (exceptions) { case kNoThrow: SetCanThrow(false); break; @@ -2325,4 +2330,19 @@ HInstruction* HGraph::InsertOppositeCondition(HInstruction* cond, HInstruction* } } +std::ostream& operator<<(std::ostream& os, const MoveOperands& rhs) { + os << "[" + << " source=" << rhs.GetSource() + << " destination=" << rhs.GetDestination() + << " type=" << rhs.GetType() + << " instruction="; + if (rhs.GetInstruction() != nullptr) { + os << rhs.GetInstruction()->DebugName() << ' ' << rhs.GetInstruction()->GetId(); + } else { + os << "null"; + } + os << " ]"; + return os; +} + } // namespace art diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 59c07690b1..23132308f0 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -1881,6 +1881,10 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> { return false; } + virtual bool IsActualObject() const { + return GetType() == Primitive::kPrimNot; + } + void SetReferenceTypeInfo(ReferenceTypeInfo rti); ReferenceTypeInfo GetReferenceTypeInfo() const { @@ -2500,8 +2504,10 @@ class HTryBoundary : public HTemplateInstruction<0> { // Deoptimize to interpreter, upon checking a condition. class HDeoptimize : public HTemplateInstruction<1> { public: + // We set CanTriggerGC to prevent any intermediate address to be live + // at the point of the `HDeoptimize`. HDeoptimize(HInstruction* cond, uint32_t dex_pc) - : HTemplateInstruction(SideEffects::None(), dex_pc) { + : HTemplateInstruction(SideEffects::CanTriggerGC(), dex_pc) { SetRawInputAt(0, cond); } @@ -4017,8 +4023,10 @@ class HRem : public HBinaryOperation { class HDivZeroCheck : public HExpression<1> { public: + // `HDivZeroCheck` can trigger GC, as it may call the `ArithmeticException` + // constructor. HDivZeroCheck(HInstruction* value, uint32_t dex_pc) - : HExpression(value->GetType(), SideEffects::None(), dex_pc) { + : HExpression(value->GetType(), SideEffects::CanTriggerGC(), dex_pc) { SetRawInputAt(0, value); } @@ -4539,8 +4547,10 @@ class HPhi : public HInstruction { class HNullCheck : public HExpression<1> { public: + // `HNullCheck` can trigger GC, as it may call the `NullPointerException` + // constructor. HNullCheck(HInstruction* value, uint32_t dex_pc) - : HExpression(value->GetType(), SideEffects::None(), dex_pc) { + : HExpression(value->GetType(), SideEffects::CanTriggerGC(), dex_pc) { SetRawInputAt(0, value); } @@ -4861,8 +4871,10 @@ class HArrayLength : public HExpression<1> { class HBoundsCheck : public HExpression<2> { public: + // `HBoundsCheck` can trigger GC, as it may call the `IndexOutOfBoundsException` + // constructor. HBoundsCheck(HInstruction* index, HInstruction* length, uint32_t dex_pc) - : HExpression(index->GetType(), SideEffects::None(), dex_pc) { + : HExpression(index->GetType(), SideEffects::CanTriggerGC(), dex_pc) { DCHECK(index->GetType() == Primitive::kPrimInt); SetRawInputAt(0, index); SetRawInputAt(1, length); @@ -5626,8 +5638,8 @@ class MoveOperands : public ArenaObject<kArenaAllocMoveOperands> { } bool IsPending() const { - DCHECK(!source_.IsInvalid() || destination_.IsInvalid()); - return destination_.IsInvalid() && !source_.IsInvalid(); + DCHECK(source_.IsValid() || destination_.IsInvalid()); + return destination_.IsInvalid() && source_.IsValid(); } // True if this blocks a move from the given location. @@ -5671,6 +5683,8 @@ class MoveOperands : public ArenaObject<kArenaAllocMoveOperands> { HInstruction* instruction_; }; +std::ostream& operator<<(std::ostream& os, const MoveOperands& rhs); + static constexpr size_t kDefaultNumberOfMoves = 4; class HParallelMove : public HTemplateInstruction<0> { diff --git a/compiler/optimizing/nodes_arm64.h b/compiler/optimizing/nodes_arm64.h index 18405f2623..445cdab191 100644 --- a/compiler/optimizing/nodes_arm64.h +++ b/compiler/optimizing/nodes_arm64.h @@ -107,6 +107,7 @@ class HArm64IntermediateAddress : public HExpression<2> { bool CanBeMoved() const OVERRIDE { return true; } bool InstructionDataEquals(HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { return true; } + bool IsActualObject() const OVERRIDE { return false; } HInstruction* GetBaseAddress() const { return InputAt(0); } HInstruction* GetOffset() const { return InputAt(1); } diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index cafc6c5440..bb840eabdd 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -17,6 +17,7 @@ #include "optimizing_compiler.h" #include <fstream> +#include <memory> #include <stdint.h> #ifdef ART_ENABLE_CODEGEN_arm64 @@ -52,6 +53,8 @@ #include "driver/compiler_driver-inl.h" #include "driver/compiler_options.h" #include "driver/dex_compilation_unit.h" +#include "dwarf/method_debug_info.h" +#include "elf_writer_debug.h" #include "elf_writer_quick.h" #include "graph_checker.h" #include "graph_visualizer.h" @@ -60,6 +63,7 @@ #include "inliner.h" #include "instruction_simplifier.h" #include "intrinsics.h" +#include "jit/debugger_interface.h" #include "jit/jit_code_cache.h" #include "licm.h" #include "jni/quick/jni_compiler.h" @@ -68,6 +72,7 @@ #include "prepare_for_register_allocation.h" #include "reference_type_propagation.h" #include "register_allocator.h" +#include "oat_quick_method_header.h" #include "sharpening.h" #include "side_effects_analysis.h" #include "ssa_builder.h" @@ -965,6 +970,39 @@ bool OptimizingCompiler::JitCompile(Thread* self, return false; } + if (GetCompilerDriver()->GetCompilerOptions().GetGenerateDebugInfo()) { + const auto* method_header = reinterpret_cast<const OatQuickMethodHeader*>(code); + const uintptr_t code_address = reinterpret_cast<uintptr_t>(method_header->GetCode()); + CompiledMethod compiled_method( + GetCompilerDriver(), + codegen->GetInstructionSet(), + ArrayRef<const uint8_t>(code_allocator.GetMemory()), + codegen->HasEmptyFrame() ? 0 : codegen->GetFrameSize(), + codegen->GetCoreSpillMask(), + codegen->GetFpuSpillMask(), + ArrayRef<const SrcMapElem>(), + ArrayRef<const uint8_t>(), // mapping_table. + ArrayRef<const uint8_t>(stack_map_data, stack_map_size), + ArrayRef<const uint8_t>(), // native_gc_map. + ArrayRef<const uint8_t>(*codegen->GetAssembler()->cfi().data()), + ArrayRef<const LinkerPatch>()); + dwarf::MethodDebugInfo method_debug_info { + dex_file, + class_def_idx, + method_idx, + access_flags, + code_item, + false, // deduped. + code_address, + code_address + code_allocator.GetSize(), + &compiled_method + }; + ArrayRef<const uint8_t> elf_file = dwarf::WriteDebugElfFileForMethod(method_debug_info); + CreateJITCodeEntryForAddress(code_address, + std::unique_ptr<const uint8_t[]>(elf_file.data()), + elf_file.size()); + } + return true; } diff --git a/compiler/optimizing/parallel_move_resolver.cc b/compiler/optimizing/parallel_move_resolver.cc index 176c50ce21..9d136f3ae6 100644 --- a/compiler/optimizing/parallel_move_resolver.cc +++ b/compiler/optimizing/parallel_move_resolver.cc @@ -13,7 +13,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include <iostream> #include "parallel_move_resolver.h" @@ -172,7 +171,7 @@ MoveOperands* ParallelMoveResolverWithSwap::PerformMove(size_t index) { i = -1; } else if (required_swap != nullptr) { // A move is required to swap. We walk back the cycle to find the - // move by just returning from this `PerforrmMove`. + // move by just returning from this `PerformMove`. moves_[index]->ClearPending(destination); return required_swap; } @@ -201,7 +200,7 @@ MoveOperands* ParallelMoveResolverWithSwap::PerformMove(size_t index) { } else { for (MoveOperands* other_move : moves_) { if (other_move->Blocks(destination)) { - DCHECK(other_move->IsPending()); + DCHECK(other_move->IsPending()) << "move=" << *move << " other_move=" << *other_move; if (!move->Is64BitMove() && other_move->Is64BitMove()) { // We swap 64bits moves before swapping 32bits moves. Go back from the // cycle by returning the move that must be swapped. diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc index d1770b75ab..63ef600756 100644 --- a/compiler/optimizing/prepare_for_register_allocation.cc +++ b/compiler/optimizing/prepare_for_register_allocation.cc @@ -96,7 +96,7 @@ void PrepareForRegisterAllocation::VisitClinitCheck(HClinitCheck* check) { if (can_merge_with_load_class && !load_class->HasUses()) { load_class->GetBlock()->RemoveInstruction(load_class); } - } else if (can_merge_with_load_class) { + } else if (can_merge_with_load_class && !load_class->NeedsAccessCheck()) { // Pass the initialization duty to the `HLoadClass` instruction, // and remove the instruction from the graph. load_class->SetMustGenerateClinitCheck(true); diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc index 5ab4547e22..2bae4bc5c8 100644 --- a/compiler/optimizing/register_allocator.cc +++ b/compiler/optimizing/register_allocator.cc @@ -1679,6 +1679,9 @@ void RegisterAllocator::ConnectSiblings(LiveInterval* interval) { LocationSummary* locations = safepoint_position->GetLocations(); if ((current->GetType() == Primitive::kPrimNot) && current->GetParent()->HasSpillSlot()) { + DCHECK(interval->GetDefinedBy()->IsActualObject()) + << interval->GetDefinedBy()->DebugName() + << "@" << safepoint_position->GetInstruction()->DebugName(); locations->SetStackBit(current->GetParent()->GetSpillSlot() / kVRegSize); } @@ -1691,6 +1694,9 @@ void RegisterAllocator::ConnectSiblings(LiveInterval* interval) { maximum_number_of_live_fp_registers_); } if (current->GetType() == Primitive::kPrimNot) { + DCHECK(interval->GetDefinedBy()->IsActualObject()) + << interval->GetDefinedBy()->DebugName() + << "@" << safepoint_position->GetInstruction()->DebugName(); locations->SetRegisterBit(source.reg()); } break; diff --git a/compiler/optimizing/stack_map_stream.cc b/compiler/optimizing/stack_map_stream.cc index c60a4eacaa..4784de1380 100644 --- a/compiler/optimizing/stack_map_stream.cc +++ b/compiler/optimizing/stack_map_stream.cc @@ -270,7 +270,7 @@ void StackMapStream::FillIn(MemoryRegion region) { stack_map.SetStackMask(stack_map_encoding_, *entry.sp_mask); } - if (entry.num_dex_registers == 0) { + if (entry.num_dex_registers == 0 || (entry.live_dex_registers_mask->NumSetBits() == 0)) { // No dex map available. stack_map.SetDexRegisterMapOffset(stack_map_encoding_, StackMap::kNoDexRegisterMap); } else { diff --git a/compiler/optimizing/stack_map_test.cc b/compiler/optimizing/stack_map_test.cc index 560502fde6..604787fd92 100644 --- a/compiler/optimizing/stack_map_test.cc +++ b/compiler/optimizing/stack_map_test.cc @@ -614,6 +614,10 @@ TEST(StackMapTest, TestNoDexRegisterMap) { stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0); stream.EndStackMapEntry(); + number_of_dex_registers = 1; + stream.BeginStackMapEntry(1, 67, 0x4, &sp_mask, number_of_dex_registers, 0); + stream.EndStackMapEntry(); + size_t size = stream.PrepareForFillIn(); void* memory = arena.Alloc(size, kArenaAllocMisc); MemoryRegion region(memory, size); @@ -622,7 +626,7 @@ TEST(StackMapTest, TestNoDexRegisterMap) { CodeInfo code_info(region); StackMapEncoding encoding = code_info.ExtractEncoding(); ASSERT_EQ(0u, encoding.NumberOfBytesForStackMask()); - ASSERT_EQ(1u, code_info.GetNumberOfStackMaps()); + ASSERT_EQ(2u, code_info.GetNumberOfStackMaps()); uint32_t number_of_location_catalog_entries = code_info.GetNumberOfLocationCatalogEntries(); ASSERT_EQ(0u, number_of_location_catalog_entries); @@ -638,6 +642,16 @@ TEST(StackMapTest, TestNoDexRegisterMap) { ASSERT_FALSE(stack_map.HasDexRegisterMap(encoding)); ASSERT_FALSE(stack_map.HasInlineInfo(encoding)); + + stack_map = code_info.GetStackMapAt(1, encoding); + ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(1, encoding))); + ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(67, encoding))); + ASSERT_EQ(1u, stack_map.GetDexPc(encoding)); + ASSERT_EQ(67u, stack_map.GetNativePcOffset(encoding)); + ASSERT_EQ(0x4u, stack_map.GetRegisterMask(encoding)); + + ASSERT_FALSE(stack_map.HasDexRegisterMap(encoding)); + ASSERT_FALSE(stack_map.HasInlineInfo(encoding)); } TEST(StackMapTest, InlineTest) { diff --git a/compiler/profile_assistant.cc b/compiler/profile_assistant.cc new file mode 100644 index 0000000000..81f2a5692d --- /dev/null +++ b/compiler/profile_assistant.cc @@ -0,0 +1,69 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "profile_assistant.h" + +namespace art { + +// Minimum number of new methods that profiles must contain to enable recompilation. +static constexpr const uint32_t kMinNewMethodsForCompilation = 10; + +bool ProfileAssistant::ProcessProfiles( + const std::vector<std::string>& profile_files, + const std::vector<std::string>& reference_profile_files, + /*out*/ ProfileCompilationInfo** profile_compilation_info) { + DCHECK(!profile_files.empty()); + DCHECK(reference_profile_files.empty() || + (profile_files.size() == reference_profile_files.size())); + + std::vector<ProfileCompilationInfo> new_info(profile_files.size()); + bool should_compile = false; + // Read the main profile files. + for (size_t i = 0; i < profile_files.size(); i++) { + if (!new_info[i].Load(profile_files[i])) { + LOG(WARNING) << "Could not load profile file: " << profile_files[i]; + return false; + } + // Do we have enough new profiled methods that will make the compilation worthwhile? + should_compile |= (new_info[i].GetNumberOfMethods() > kMinNewMethodsForCompilation); + } + if (!should_compile) { + *profile_compilation_info = nullptr; + return true; + } + + std::unique_ptr<ProfileCompilationInfo> result(new ProfileCompilationInfo()); + for (size_t i = 0; i < new_info.size(); i++) { + // Merge all data into a single object. + result->Load(new_info[i]); + // If we have any reference profile information merge their information with + // the current profiles and save them back to disk. + if (!reference_profile_files.empty()) { + if (!new_info[i].Load(reference_profile_files[i])) { + LOG(WARNING) << "Could not load reference profile file: " << reference_profile_files[i]; + return false; + } + if (!new_info[i].Save(reference_profile_files[i])) { + LOG(WARNING) << "Could not save reference profile file: " << reference_profile_files[i]; + return false; + } + } + } + *profile_compilation_info = result.release(); + return true; +} + +} // namespace art diff --git a/compiler/profile_assistant.h b/compiler/profile_assistant.h new file mode 100644 index 0000000000..088c8bd1c7 --- /dev/null +++ b/compiler/profile_assistant.h @@ -0,0 +1,61 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_PROFILE_ASSISTANT_H_ +#define ART_COMPILER_PROFILE_ASSISTANT_H_ + +#include <string> +#include <vector> + +#include "jit/offline_profiling_info.cc" + +namespace art { + +class ProfileAssistant { + public: + // Process the profile information present in the given files. Returns true + // if the analysis ended up successfully (i.e. no errors during reading, + // merging or writing of profile files). + // + // If the returned value is true and there is a significant difference between + // profile_files and reference_profile_files: + // - profile_compilation_info is set to a not null object that + // can be used to drive compilation. It will be the merge of all the data + // found in profile_files and reference_profile_files. + // - the data from profile_files[i] is merged into + // reference_profile_files[i] and the corresponding backing file is + // updated. + // + // If the returned value is false or the difference is insignificant, + // profile_compilation_info will be set to null. + // + // Additional notes: + // - as mentioned above, this function may update the content of the files + // passed with the reference_profile_files. + // - if reference_profile_files is not empty it must be the same size as + // profile_files. + static bool ProcessProfiles( + const std::vector<std::string>& profile_files, + const std::vector<std::string>& reference_profile_files, + /*out*/ ProfileCompilationInfo** profile_compilation_info); + + private: + DISALLOW_COPY_AND_ASSIGN(ProfileAssistant); +}; + +} // namespace art + +#endif // ART_COMPILER_PROFILE_ASSISTANT_H_ diff --git a/compiler/utils/arm/assembler_arm.h b/compiler/utils/arm/assembler_arm.h index b79c2f0f4e..f96376d9fe 100644 --- a/compiler/utils/arm/assembler_arm.h +++ b/compiler/utils/arm/assembler_arm.h @@ -501,6 +501,8 @@ class ArmAssembler : public Assembler { virtual void cmp(Register rn, const ShifterOperand& so, Condition cond = AL) = 0; + // Note: CMN updates flags based on addition of its operands. Do not confuse + // the "N" suffix with bitwise inversion performed by MVN. virtual void cmn(Register rn, const ShifterOperand& so, Condition cond = AL) = 0; virtual void orr(Register rd, Register rn, const ShifterOperand& so, diff --git a/compiler/utils/arm/assembler_thumb2.cc b/compiler/utils/arm/assembler_thumb2.cc index f341030c15..52023a67ee 100644 --- a/compiler/utils/arm/assembler_thumb2.cc +++ b/compiler/utils/arm/assembler_thumb2.cc @@ -3428,10 +3428,10 @@ void Thumb2Assembler::AddConstant(Register rd, Register rn, int32_t value, CHECK(rn != IP); // If rd != rn, use rd as temp. This alows 16-bit ADD/SUB in more situations than using IP. Register temp = (rd != rn) ? rd : IP; - if (ShifterOperandCanHold(temp, kNoRegister, MVN, ~value, set_cc, &shifter_op)) { + if (ShifterOperandCanHold(temp, kNoRegister, MVN, ~value, kCcKeep, &shifter_op)) { mvn(temp, shifter_op, cond, kCcKeep); add(rd, rn, ShifterOperand(temp), cond, set_cc); - } else if (ShifterOperandCanHold(temp, kNoRegister, MVN, ~(-value), set_cc, &shifter_op)) { + } else if (ShifterOperandCanHold(temp, kNoRegister, MVN, ~(-value), kCcKeep, &shifter_op)) { mvn(temp, shifter_op, cond, kCcKeep); sub(rd, rn, ShifterOperand(temp), cond, set_cc); } else if (High16Bits(-value) == 0) { @@ -3449,22 +3449,32 @@ void Thumb2Assembler::AddConstant(Register rd, Register rn, int32_t value, } void Thumb2Assembler::CmpConstant(Register rn, int32_t value, Condition cond) { - // We prefer to select the shorter code sequence rather than selecting add for - // positive values and sub for negatives ones, which would slightly improve - // the readability of generated code for some constants. + // We prefer to select the shorter code sequence rather than using plain cmp and cmn + // which would slightly improve the readability of generated code for some constants. ShifterOperand shifter_op; if (ShifterOperandCanHold(kNoRegister, rn, CMP, value, kCcSet, &shifter_op)) { cmp(rn, shifter_op, cond); - } else if (ShifterOperandCanHold(kNoRegister, rn, CMN, ~value, kCcSet, &shifter_op)) { + } else if (ShifterOperandCanHold(kNoRegister, rn, CMN, -value, kCcSet, &shifter_op)) { cmn(rn, shifter_op, cond); } else { CHECK(rn != IP); - movw(IP, Low16Bits(value), cond); - uint16_t value_high = High16Bits(value); - if (value_high != 0) { - movt(IP, value_high, cond); + if (ShifterOperandCanHold(IP, kNoRegister, MVN, ~value, kCcKeep, &shifter_op)) { + mvn(IP, shifter_op, cond, kCcKeep); + cmp(rn, ShifterOperand(IP), cond); + } else if (ShifterOperandCanHold(IP, kNoRegister, MVN, ~(-value), kCcKeep, &shifter_op)) { + mvn(IP, shifter_op, cond, kCcKeep); + cmn(rn, ShifterOperand(IP), cond); + } else if (High16Bits(-value) == 0) { + movw(IP, Low16Bits(-value), cond); + cmn(rn, ShifterOperand(IP), cond); + } else { + movw(IP, Low16Bits(value), cond); + uint16_t value_high = High16Bits(value); + if (value_high != 0) { + movt(IP, value_high, cond); + } + cmp(rn, ShifterOperand(IP), cond); } - cmp(rn, ShifterOperand(IP), cond); } } diff --git a/compiler/utils/assembler_thumb_test.cc b/compiler/utils/assembler_thumb_test.cc index 0ef0dc19e6..2df9b177bf 100644 --- a/compiler/utils/assembler_thumb_test.cc +++ b/compiler/utils/assembler_thumb_test.cc @@ -1626,6 +1626,76 @@ TEST(Thumb2AssemblerTest, AddConstant) { EmitAndCheck(&assembler, "AddConstant"); } +TEST(Thumb2AssemblerTest, CmpConstant) { + arm::Thumb2Assembler assembler; + + __ CmpConstant(R0, 0); // 16-bit CMP. + __ CmpConstant(R1, 1); // 16-bit CMP. + __ CmpConstant(R0, 7); // 16-bit CMP. + __ CmpConstant(R1, 8); // 16-bit CMP. + __ CmpConstant(R0, 255); // 16-bit CMP. + __ CmpConstant(R1, 256); // 32-bit CMP. + __ CmpConstant(R0, 257); // MNV+CMN. + __ CmpConstant(R1, 0xfff); // MOVW+CMP. + __ CmpConstant(R0, 0x1000); // 32-bit CMP. + __ CmpConstant(R1, 0x1001); // MNV+CMN. + __ CmpConstant(R0, 0x1002); // MOVW+CMP. + __ CmpConstant(R1, 0xffff); // MOVW+CMP. + __ CmpConstant(R0, 0x10000); // 32-bit CMP. + __ CmpConstant(R1, 0x10001); // 32-bit CMP. + __ CmpConstant(R0, 0x10002); // MVN+CMN. + __ CmpConstant(R1, 0x10003); // MOVW+MOVT+CMP. + __ CmpConstant(R0, -1); // 32-bit CMP. + __ CmpConstant(R1, -7); // CMN. + __ CmpConstant(R0, -8); // CMN. + __ CmpConstant(R1, -255); // CMN. + __ CmpConstant(R0, -256); // CMN. + __ CmpConstant(R1, -257); // MNV+CMP. + __ CmpConstant(R0, -0xfff); // MOVW+CMN. + __ CmpConstant(R1, -0x1000); // CMN. + __ CmpConstant(R0, -0x1001); // MNV+CMP. + __ CmpConstant(R1, -0x1002); // MOVW+CMN. + __ CmpConstant(R0, -0xffff); // MOVW+CMN. + __ CmpConstant(R1, -0x10000); // CMN. + __ CmpConstant(R0, -0x10001); // CMN. + __ CmpConstant(R1, -0x10002); // MVN+CMP. + __ CmpConstant(R0, -0x10003); // MOVW+MOVT+CMP. + + __ CmpConstant(R8, 0); // 32-bit CMP. + __ CmpConstant(R9, 1); // 32-bit CMP. + __ CmpConstant(R8, 7); // 32-bit CMP. + __ CmpConstant(R9, 8); // 32-bit CMP. + __ CmpConstant(R8, 255); // 32-bit CMP. + __ CmpConstant(R9, 256); // 32-bit CMP. + __ CmpConstant(R8, 257); // MNV+CMN + __ CmpConstant(R9, 0xfff); // MOVW+CMP. + __ CmpConstant(R8, 0x1000); // 32-bit CMP. + __ CmpConstant(R9, 0x1001); // MVN+CMN. + __ CmpConstant(R8, 0x1002); // MOVW+CMP. + __ CmpConstant(R9, 0xffff); // MOVW+CMP. + __ CmpConstant(R8, 0x10000); // 32-bit CMP. + __ CmpConstant(R9, 0x10001); // 32-bit CMP. + __ CmpConstant(R8, 0x10002); // MVN+CMN. + __ CmpConstant(R9, 0x10003); // MOVW+MOVT+CMP. + __ CmpConstant(R8, -1); // 32-bit CMP + __ CmpConstant(R9, -7); // CMN. + __ CmpConstant(R8, -8); // CMN. + __ CmpConstant(R9, -255); // CMN. + __ CmpConstant(R8, -256); // CMN. + __ CmpConstant(R9, -257); // MNV+CMP. + __ CmpConstant(R8, -0xfff); // MOVW+CMN. + __ CmpConstant(R9, -0x1000); // CMN. + __ CmpConstant(R8, -0x1001); // MVN+CMP. + __ CmpConstant(R9, -0x1002); // MOVW+CMN. + __ CmpConstant(R8, -0xffff); // MOVW+CMN. + __ CmpConstant(R9, -0x10000); // CMN. + __ CmpConstant(R8, -0x10001); // CMN. + __ CmpConstant(R9, -0x10002); // MVN+CMP. + __ CmpConstant(R8, -0x10003); // MOVW+MOVT+CMP. + + EmitAndCheck(&assembler, "CmpConstant"); +} + #undef __ } // namespace arm } // namespace art diff --git a/compiler/utils/assembler_thumb_test_expected.cc.inc b/compiler/utils/assembler_thumb_test_expected.cc.inc index f07f8c74d7..6736015bf1 100644 --- a/compiler/utils/assembler_thumb_test_expected.cc.inc +++ b/compiler/utils/assembler_thumb_test_expected.cc.inc @@ -1,4 +1,4 @@ -const char* SimpleMovResults[] = { +const char* const SimpleMovResults[] = { " 0: 0008 movs r0, r1\n", " 2: 4608 mov r0, r1\n", " 4: 46c8 mov r8, r9\n", @@ -6,18 +6,18 @@ const char* SimpleMovResults[] = { " 8: f04f 0809 mov.w r8, #9\n", nullptr }; -const char* SimpleMov32Results[] = { +const char* const SimpleMov32Results[] = { " 0: ea4f 0001 mov.w r0, r1\n", " 4: ea4f 0809 mov.w r8, r9\n", nullptr }; -const char* SimpleMovAddResults[] = { +const char* const SimpleMovAddResults[] = { " 0: 4608 mov r0, r1\n", " 2: 1888 adds r0, r1, r2\n", " 4: 1c08 adds r0, r1, #0\n", nullptr }; -const char* DataProcessingRegisterResults[] = { +const char* const DataProcessingRegisterResults[] = { " 0: ea6f 0001 mvn.w r0, r1\n", " 4: eb01 0002 add.w r0, r1, r2\n", " 8: eba1 0002 sub.w r0, r1, r2\n", @@ -129,7 +129,7 @@ const char* DataProcessingRegisterResults[] = { " 120: eb01 0c00 add.w ip, r1, r0\n", nullptr }; -const char* DataProcessingImmediateResults[] = { +const char* const DataProcessingImmediateResults[] = { " 0: 2055 movs r0, #85 ; 0x55\n", " 2: f06f 0055 mvn.w r0, #85 ; 0x55\n", " 6: f101 0055 add.w r0, r1, #85 ; 0x55\n", @@ -154,7 +154,7 @@ const char* DataProcessingImmediateResults[] = { " 48: 1f48 subs r0, r1, #5\n", nullptr }; -const char* DataProcessingModifiedImmediateResults[] = { +const char* const DataProcessingModifiedImmediateResults[] = { " 0: f04f 1055 mov.w r0, #5570645 ; 0x550055\n", " 4: f06f 1055 mvn.w r0, #5570645 ; 0x550055\n", " 8: f101 1055 add.w r0, r1, #5570645 ; 0x550055\n", @@ -173,7 +173,7 @@ const char* DataProcessingModifiedImmediateResults[] = { " 3c: f110 1f55 cmn.w r0, #5570645 ; 0x550055\n", nullptr }; -const char* DataProcessingModifiedImmediatesResults[] = { +const char* const DataProcessingModifiedImmediatesResults[] = { " 0: f04f 1055 mov.w r0, #5570645 ; 0x550055\n", " 4: f04f 2055 mov.w r0, #1426085120 ; 0x55005500\n", " 8: f04f 3055 mov.w r0, #1431655765 ; 0x55555555\n", @@ -183,7 +183,7 @@ const char* DataProcessingModifiedImmediatesResults[] = { " 18: f44f 70d4 mov.w r0, #424 ; 0x1a8\n", nullptr }; -const char* DataProcessingShiftedRegisterResults[] = { +const char* const DataProcessingShiftedRegisterResults[] = { " 0: 0123 lsls r3, r4, #4\n", " 2: 0963 lsrs r3, r4, #5\n", " 4: 11a3 asrs r3, r4, #6\n", @@ -201,7 +201,7 @@ const char* DataProcessingShiftedRegisterResults[] = { " 32: ea5f 0834 movs.w r8, r4, rrx\n", nullptr }; -const char* ShiftImmediateResults[] = { +const char* const ShiftImmediateResults[] = { " 0: 0123 lsls r3, r4, #4\n", " 2: 0963 lsrs r3, r4, #5\n", " 4: 11a3 asrs r3, r4, #6\n", @@ -219,7 +219,7 @@ const char* ShiftImmediateResults[] = { " 32: ea5f 0834 movs.w r8, r4, rrx\n", nullptr }; -const char* BasicLoadResults[] = { +const char* const BasicLoadResults[] = { " 0: 69a3 ldr r3, [r4, #24]\n", " 2: 7e23 ldrb r3, [r4, #24]\n", " 4: 8b23 ldrh r3, [r4, #24]\n", @@ -233,7 +233,7 @@ const char* BasicLoadResults[] = { " 20: f9b4 8018 ldrsh.w r8, [r4, #24]\n", nullptr }; -const char* BasicStoreResults[] = { +const char* const BasicStoreResults[] = { " 0: 61a3 str r3, [r4, #24]\n", " 2: 7623 strb r3, [r4, #24]\n", " 4: 8323 strh r3, [r4, #24]\n", @@ -243,7 +243,7 @@ const char* BasicStoreResults[] = { " 10: f8a4 8018 strh.w r8, [r4, #24]\n", nullptr }; -const char* ComplexLoadResults[] = { +const char* const ComplexLoadResults[] = { " 0: 69a3 ldr r3, [r4, #24]\n", " 2: f854 3f18 ldr.w r3, [r4, #24]!\n", " 6: f854 3b18 ldr.w r3, [r4], #24\n", @@ -276,7 +276,7 @@ const char* ComplexLoadResults[] = { " 6e: f934 3918 ldrsh.w r3, [r4], #-24\n", nullptr }; -const char* ComplexStoreResults[] = { +const char* const ComplexStoreResults[] = { " 0: 61a3 str r3, [r4, #24]\n", " 2: f844 3f18 str.w r3, [r4, #24]!\n", " 6: f844 3b18 str.w r3, [r4], #24\n", @@ -297,7 +297,7 @@ const char* ComplexStoreResults[] = { " 3e: f824 3918 strh.w r3, [r4], #-24\n", nullptr }; -const char* NegativeLoadStoreResults[] = { +const char* const NegativeLoadStoreResults[] = { " 0: f854 3c18 ldr.w r3, [r4, #-24]\n", " 4: f854 3d18 ldr.w r3, [r4, #-24]!\n", " 8: f854 3918 ldr.w r3, [r4], #-24\n", @@ -348,12 +348,12 @@ const char* NegativeLoadStoreResults[] = { " bc: f824 3b18 strh.w r3, [r4], #24\n", nullptr }; -const char* SimpleLoadStoreDualResults[] = { +const char* const SimpleLoadStoreDualResults[] = { " 0: e9c0 2306 strd r2, r3, [r0, #24]\n", " 4: e9d0 2306 ldrd r2, r3, [r0, #24]\n", nullptr }; -const char* ComplexLoadStoreDualResults[] = { +const char* const ComplexLoadStoreDualResults[] = { " 0: e9c0 2306 strd r2, r3, [r0, #24]\n", " 4: e9e0 2306 strd r2, r3, [r0, #24]!\n", " 8: e8e0 2306 strd r2, r3, [r0], #24\n", @@ -368,7 +368,7 @@ const char* ComplexLoadStoreDualResults[] = { " 2c: e870 2306 ldrd r2, r3, [r0], #-24\n", nullptr }; -const char* NegativeLoadStoreDualResults[] = { +const char* const NegativeLoadStoreDualResults[] = { " 0: e940 2306 strd r2, r3, [r0, #-24]\n", " 4: e960 2306 strd r2, r3, [r0, #-24]!\n", " 8: e860 2306 strd r2, r3, [r0], #-24\n", @@ -383,7 +383,7 @@ const char* NegativeLoadStoreDualResults[] = { " 2c: e8f0 2306 ldrd r2, r3, [r0], #24\n", nullptr }; -const char* SimpleBranchResults[] = { +const char* const SimpleBranchResults[] = { " 0: 2002 movs r0, #2\n", " 2: 2101 movs r1, #1\n", " 4: e7fd b.n 2 <SimpleBranch+0x2>\n", @@ -403,7 +403,7 @@ const char* SimpleBranchResults[] = { " 20: 2006 movs r0, #6\n", nullptr }; -const char* LongBranchResults[] = { +const char* const LongBranchResults[] = { " 0: f04f 0002 mov.w r0, #2\n", " 4: f04f 0101 mov.w r1, #1\n", " 8: f7ff bffc b.w 4 <LongBranch+0x4>\n", @@ -423,14 +423,14 @@ const char* LongBranchResults[] = { " 40: f04f 0006 mov.w r0, #6\n", nullptr }; -const char* LoadMultipleResults[] = { +const char* const LoadMultipleResults[] = { " 0: cc09 ldmia r4!, {r0, r3}\n", " 2: e934 4800 ldmdb r4!, {fp, lr}\n", " 6: e914 4800 ldmdb r4, {fp, lr}\n", " a: f854 5b04 ldr.w r5, [r4], #4\n", nullptr }; -const char* StoreMultipleResults[] = { +const char* const StoreMultipleResults[] = { " 0: c409 stmia r4!, {r0, r3}\n", " 2: e8a4 4800 stmia.w r4!, {fp, lr}\n", " 6: e884 4800 stmia.w r4, {fp, lr}\n", @@ -438,7 +438,7 @@ const char* StoreMultipleResults[] = { " e: f844 5d04 str.w r5, [r4, #-4]!\n", nullptr }; -const char* MovWMovTResults[] = { +const char* const MovWMovTResults[] = { " 0: f240 0400 movw r4, #0\n", " 4: f240 0434 movw r4, #52 ; 0x34\n", " 8: f240 0934 movw r9, #52 ; 0x34\n", @@ -449,7 +449,7 @@ const char* MovWMovTResults[] = { " 1c: f6cf 71ff movt r1, #65535 ; 0xffff\n", nullptr }; -const char* SpecialAddSubResults[] = { +const char* const SpecialAddSubResults[] = { " 0: aa14 add r2, sp, #80 ; 0x50\n", " 2: b014 add sp, #80 ; 0x50\n", " 4: f10d 0850 add.w r8, sp, #80 ; 0x50\n", @@ -463,7 +463,7 @@ const char* SpecialAddSubResults[] = { " 22: f6ad 7dfc subw sp, sp, #4092 ; 0xffc\n", nullptr }; -const char* LoadFromOffsetResults[] = { +const char* const LoadFromOffsetResults[] = { " 0: 68e2 ldr r2, [r4, #12]\n", " 2: f8d4 2fff ldr.w r2, [r4, #4095] ; 0xfff\n", " 6: f504 5280 add.w r2, r4, #4096 ; 0x1000\n", @@ -514,7 +514,7 @@ const char* LoadFromOffsetResults[] = { " 9e: f9b4 200c ldrsh.w r2, [r4, #12]\n", nullptr }; -const char* StoreToOffsetResults[] = { +const char* const StoreToOffsetResults[] = { " 0: 60e2 str r2, [r4, #12]\n", " 2: f8c4 2fff str.w r2, [r4, #4095] ; 0xfff\n", " 6: f504 5c80 add.w ip, r4, #4096 ; 0x1000\n", @@ -563,7 +563,7 @@ const char* StoreToOffsetResults[] = { " a4: 7322 strb r2, [r4, #12]\n", nullptr }; -const char* IfThenResults[] = { +const char* const IfThenResults[] = { " 0: bf08 it eq\n", " 2: 2101 moveq r1, #1\n", " 4: bf04 itt eq\n", @@ -587,7 +587,7 @@ const char* IfThenResults[] = { " 28: 2404 movne r4, #4\n", nullptr }; -const char* CbzCbnzResults[] = { +const char* const CbzCbnzResults[] = { " 0: b10a cbz r2, 6 <CbzCbnz+0x6>\n", " 2: 2103 movs r1, #3\n", " 4: 2203 movs r2, #3\n", @@ -598,7 +598,7 @@ const char* CbzCbnzResults[] = { " 10: 2204 movs r2, #4\n", nullptr }; -const char* MultiplyResults[] = { +const char* const MultiplyResults[] = { " 0: 4348 muls r0, r1\n", " 2: fb01 f002 mul.w r0, r1, r2\n", " 6: fb09 f808 mul.w r8, r9, r8\n", @@ -611,21 +611,21 @@ const char* MultiplyResults[] = { " 22: fbaa 890b umull r8, r9, sl, fp\n", nullptr }; -const char* DivideResults[] = { +const char* const DivideResults[] = { " 0: fb91 f0f2 sdiv r0, r1, r2\n", " 4: fb99 f8fa sdiv r8, r9, sl\n", " 8: fbb1 f0f2 udiv r0, r1, r2\n", " c: fbb9 f8fa udiv r8, r9, sl\n", nullptr }; -const char* VMovResults[] = { +const char* const VMovResults[] = { " 0: eef7 0a00 vmov.f32 s1, #112 ; 0x70\n", " 4: eeb7 1b00 vmov.f64 d1, #112 ; 0x70\n", " 8: eef0 0a41 vmov.f32 s1, s2\n", " c: eeb0 1b42 vmov.f64 d1, d2\n", nullptr }; -const char* BasicFloatingPointResults[] = { +const char* const BasicFloatingPointResults[] = { " 0: ee30 0a81 vadd.f32 s0, s1, s2\n", " 4: ee30 0ac1 vsub.f32 s0, s1, s2\n", " 8: ee20 0a81 vmul.f32 s0, s1, s2\n", @@ -646,7 +646,7 @@ const char* BasicFloatingPointResults[] = { " 44: eeb1 0bc1 vsqrt.f64 d0, d1\n", nullptr }; -const char* FloatingPointConversionsResults[] = { +const char* const FloatingPointConversionsResults[] = { " 0: eeb7 1bc2 vcvt.f32.f64 s2, d2\n", " 4: eeb7 2ac1 vcvt.f64.f32 d2, s2\n", " 8: eefd 0ac1 vcvt.s32.f32 s1, s2\n", @@ -659,35 +659,35 @@ const char* FloatingPointConversionsResults[] = { " 24: eeb8 1b41 vcvt.f64.u32 d1, s2\n", nullptr }; -const char* FloatingPointComparisonsResults[] = { +const char* const FloatingPointComparisonsResults[] = { " 0: eeb4 0a60 vcmp.f32 s0, s1\n", " 4: eeb4 0b41 vcmp.f64 d0, d1\n", " 8: eeb5 1a40 vcmp.f32 s2, #0.0\n", " c: eeb5 2b40 vcmp.f64 d2, #0.0\n", nullptr }; -const char* CallsResults[] = { +const char* const CallsResults[] = { " 0: 47f0 blx lr\n", " 2: 4770 bx lr\n", nullptr }; -const char* BreakpointResults[] = { +const char* const BreakpointResults[] = { " 0: be00 bkpt 0x0000\n", nullptr }; -const char* StrR1Results[] = { +const char* const StrR1Results[] = { " 0: 9111 str r1, [sp, #68] ; 0x44\n", " 2: f8cd 142c str.w r1, [sp, #1068] ; 0x42c\n", nullptr }; -const char* VPushPopResults[] = { +const char* const VPushPopResults[] = { " 0: ed2d 1a04 vpush {s2-s5}\n", " 4: ed2d 2b08 vpush {d2-d5}\n", " 8: ecbd 1a04 vpop {s2-s5}\n", " c: ecbd 2b08 vpop {d2-d5}\n", nullptr }; -const char* Max16BitBranchResults[] = { +const char* const Max16BitBranchResults[] = { " 0: e3ff b.n 802 <Max16BitBranch+0x802>\n", " 2: 2300 movs r3, #0\n", " 4: 2302 movs r3, #2\n", @@ -1716,7 +1716,7 @@ const char* Max16BitBranchResults[] = { " 802: 4611 mov r1, r2\n", nullptr }; -const char* Branch32Results[] = { +const char* const Branch32Results[] = { " 0: f000 bc01 b.w 806 <Branch32+0x806>\n", " 4: 2300 movs r3, #0\n", " 6: 2302 movs r3, #2\n", @@ -2746,7 +2746,7 @@ const char* Branch32Results[] = { " 806: 4611 mov r1, r2\n", nullptr }; -const char* CompareAndBranchMaxResults[] = { +const char* const CompareAndBranchMaxResults[] = { " 0: b3fc cbz r4, 82 <CompareAndBranchMax+0x82>\n", " 2: 2300 movs r3, #0\n", " 4: 2302 movs r3, #2\n", @@ -2815,7 +2815,7 @@ const char* CompareAndBranchMaxResults[] = { " 82: 4611 mov r1, r2\n", nullptr }; -const char* CompareAndBranchRelocation16Results[] = { +const char* const CompareAndBranchRelocation16Results[] = { " 0: 2c00 cmp r4, #0\n", " 2: d040 beq.n 86 <CompareAndBranchRelocation16+0x86>\n", " 4: 2300 movs r3, #0\n", @@ -2886,7 +2886,7 @@ const char* CompareAndBranchRelocation16Results[] = { " 86: 4611 mov r1, r2\n", nullptr }; -const char* CompareAndBranchRelocation32Results[] = { +const char* const CompareAndBranchRelocation32Results[] = { " 0: 2c00 cmp r4, #0\n", " 2: f000 8401 beq.w 808 <CompareAndBranchRelocation32+0x808>\n", " 6: 2300 movs r3, #0\n", @@ -3917,7 +3917,7 @@ const char* CompareAndBranchRelocation32Results[] = { " 808: 4611 mov r1, r2\n", nullptr }; -const char* MixedBranch32Results[] = { +const char* const MixedBranch32Results[] = { " 0: f000 bc03 b.w 80a <MixedBranch32+0x80a>\n", " 4: 2300 movs r3, #0\n", " 6: 2302 movs r3, #2\n", @@ -4948,7 +4948,7 @@ const char* MixedBranch32Results[] = { " 80a: 4611 mov r1, r2\n", nullptr }; -const char* ShiftsResults[] = { +const char* const ShiftsResults[] = { " 0: 0148 lsls r0, r1, #5\n", " 2: 0948 lsrs r0, r1, #5\n", " 4: 1148 asrs r0, r1, #5\n", @@ -4997,7 +4997,7 @@ const char* ShiftsResults[] = { " 98: fa51 f008 asrs.w r0, r1, r8\n", nullptr }; -const char* LoadStoreRegOffsetResults[] = { +const char* const LoadStoreRegOffsetResults[] = { " 0: 5888 ldr r0, [r1, r2]\n", " 2: 5088 str r0, [r1, r2]\n", " 4: f851 0012 ldr.w r0, [r1, r2, lsl #1]\n", @@ -5012,7 +5012,7 @@ const char* LoadStoreRegOffsetResults[] = { " 28: f841 0008 str.w r0, [r1, r8]\n", nullptr }; -const char* LoadStoreLiteralResults[] = { +const char* const LoadStoreLiteralResults[] = { " 0: 4801 ldr r0, [pc, #4] ; (8 <LoadStoreLiteral+0x8>)\n", " 2: f8cf 0004 str.w r0, [pc, #4] ; 8 <LoadStoreLiteral+0x8>\n", " 6: f85f 0008 ldr.w r0, [pc, #-8] ; 0 <LoadStoreLiteral>\n", @@ -5023,7 +5023,7 @@ const char* LoadStoreLiteralResults[] = { " 18: f8cf 07ff str.w r0, [pc, #2047] ; 81b <LoadStoreLiteral+0x81b>\n", nullptr }; -const char* LoadStoreLimitsResults[] = { +const char* const LoadStoreLimitsResults[] = { " 0: 6fe0 ldr r0, [r4, #124] ; 0x7c\n", " 2: f8d4 0080 ldr.w r0, [r4, #128] ; 0x80\n", " 6: 7fe0 ldrb r0, [r4, #31]\n", @@ -5042,7 +5042,7 @@ const char* LoadStoreLimitsResults[] = { " 30: f8a4 0040 strh.w r0, [r4, #64] ; 0x40\n", nullptr }; -const char* CompareAndBranchResults[] = { +const char* const CompareAndBranchResults[] = { " 0: b130 cbz r0, 10 <CompareAndBranch+0x10>\n", " 2: f1bb 0f00 cmp.w fp, #0\n", " 6: d003 beq.n 10 <CompareAndBranch+0x10>\n", @@ -5052,7 +5052,7 @@ const char* CompareAndBranchResults[] = { nullptr }; -const char* AddConstantResults[] = { +const char* const AddConstantResults[] = { " 0: 4608 mov r0, r1\n", " 2: 1c48 adds r0, r1, #1\n", " 4: 1dc8 adds r0, r1, #7\n", @@ -5370,6 +5370,104 @@ const char* AddConstantResults[] = { nullptr }; +const char* const CmpConstantResults[] = { + " 0: 2800 cmp r0, #0\n", + " 2: 2901 cmp r1, #1\n", + " 4: 2807 cmp r0, #7\n", + " 6: 2908 cmp r1, #8\n", + " 8: 28ff cmp r0, #255 ; 0xff\n", + " a: f5b1 7f80 cmp.w r1, #256 ; 0x100\n", + " e: f46f 7c80 mvn.w ip, #256 ; 0x100\n", + " 12: eb10 0f0c cmn.w r0, ip\n", + " 16: f640 7cff movw ip, #4095 ; 0xfff\n", + " 1a: 4561 cmp r1, ip\n", + " 1c: f5b0 5f80 cmp.w r0, #4096 ; 0x1000\n", + " 20: f46f 5c80 mvn.w ip, #4096 ; 0x1000\n", + " 24: eb11 0f0c cmn.w r1, ip\n", + " 28: f241 0c02 movw ip, #4098 ; 0x1002\n", + " 2c: 4560 cmp r0, ip\n", + " 2e: f64f 7cff movw ip, #65535 ; 0xffff\n", + " 32: 4561 cmp r1, ip\n", + " 34: f5b0 3f80 cmp.w r0, #65536 ; 0x10000\n", + " 38: f1b1 1f01 cmp.w r1, #65537 ; 0x10001\n", + " 3c: f06f 1c01 mvn.w ip, #65537 ; 0x10001\n", + " 40: eb10 0f0c cmn.w r0, ip\n", + " 44: f240 0c03 movw ip, #3\n", + " 48: f2c0 0c01 movt ip, #1\n", + " 4c: 4561 cmp r1, ip\n", + " 4e: f1b0 3fff cmp.w r0, #4294967295 ; 0xffffffff\n", + " 52: f111 0f07 cmn.w r1, #7\n", + " 56: f110 0f08 cmn.w r0, #8\n", + " 5a: f111 0fff cmn.w r1, #255 ; 0xff\n", + " 5e: f510 7f80 cmn.w r0, #256 ; 0x100\n", + " 62: f46f 7c80 mvn.w ip, #256 ; 0x100\n", + " 66: 4561 cmp r1, ip\n", + " 68: f640 7cff movw ip, #4095 ; 0xfff\n", + " 6c: eb10 0f0c cmn.w r0, ip\n", + " 70: f511 5f80 cmn.w r1, #4096 ; 0x1000\n", + " 74: f46f 5c80 mvn.w ip, #4096 ; 0x1000\n", + " 78: 4560 cmp r0, ip\n", + " 7a: f241 0c02 movw ip, #4098 ; 0x1002\n", + " 7e: eb11 0f0c cmn.w r1, ip\n", + " 82: f64f 7cff movw ip, #65535 ; 0xffff\n", + " 86: eb10 0f0c cmn.w r0, ip\n", + " 8a: f511 3f80 cmn.w r1, #65536 ; 0x10000\n", + " 8e: f110 1f01 cmn.w r0, #65537 ; 0x10001\n", + " 92: f06f 1c01 mvn.w ip, #65537 ; 0x10001\n", + " 96: 4561 cmp r1, ip\n", + " 98: f64f 7cfd movw ip, #65533 ; 0xfffd\n", + " 9c: f6cf 7cfe movt ip, #65534 ; 0xfffe\n", + " a0: 4560 cmp r0, ip\n", + " a2: f1b8 0f00 cmp.w r8, #0\n", + " a6: f1b9 0f01 cmp.w r9, #1\n", + " aa: f1b8 0f07 cmp.w r8, #7\n", + " ae: f1b9 0f08 cmp.w r9, #8\n", + " b2: f1b8 0fff cmp.w r8, #255 ; 0xff\n", + " b6: f5b9 7f80 cmp.w r9, #256 ; 0x100\n", + " ba: f46f 7c80 mvn.w ip, #256 ; 0x100\n", + " be: eb18 0f0c cmn.w r8, ip\n", + " c2: f640 7cff movw ip, #4095 ; 0xfff\n", + " c6: 45e1 cmp r9, ip\n", + " c8: f5b8 5f80 cmp.w r8, #4096 ; 0x1000\n", + " cc: f46f 5c80 mvn.w ip, #4096 ; 0x1000\n", + " d0: eb19 0f0c cmn.w r9, ip\n", + " d4: f241 0c02 movw ip, #4098 ; 0x1002\n", + " d8: 45e0 cmp r8, ip\n", + " da: f64f 7cff movw ip, #65535 ; 0xffff\n", + " de: 45e1 cmp r9, ip\n", + " e0: f5b8 3f80 cmp.w r8, #65536 ; 0x10000\n", + " e4: f1b9 1f01 cmp.w r9, #65537 ; 0x10001\n", + " e8: f06f 1c01 mvn.w ip, #65537 ; 0x10001\n", + " ec: eb18 0f0c cmn.w r8, ip\n", + " f0: f240 0c03 movw ip, #3\n", + " f4: f2c0 0c01 movt ip, #1\n", + " f8: 45e1 cmp r9, ip\n", + " fa: f1b8 3fff cmp.w r8, #4294967295 ; 0xffffffff\n", + " fe: f119 0f07 cmn.w r9, #7\n", + " 102: f118 0f08 cmn.w r8, #8\n", + " 106: f119 0fff cmn.w r9, #255 ; 0xff\n", + " 10a: f518 7f80 cmn.w r8, #256 ; 0x100\n", + " 10e: f46f 7c80 mvn.w ip, #256 ; 0x100\n", + " 112: 45e1 cmp r9, ip\n", + " 114: f640 7cff movw ip, #4095 ; 0xfff\n", + " 118: eb18 0f0c cmn.w r8, ip\n", + " 11c: f519 5f80 cmn.w r9, #4096 ; 0x1000\n", + " 120: f46f 5c80 mvn.w ip, #4096 ; 0x1000\n", + " 124: 45e0 cmp r8, ip\n", + " 126: f241 0c02 movw ip, #4098 ; 0x1002\n", + " 12a: eb19 0f0c cmn.w r9, ip\n", + " 12e: f64f 7cff movw ip, #65535 ; 0xffff\n", + " 132: eb18 0f0c cmn.w r8, ip\n", + " 136: f519 3f80 cmn.w r9, #65536 ; 0x10000\n", + " 13a: f118 1f01 cmn.w r8, #65537 ; 0x10001\n", + " 13e: f06f 1c01 mvn.w ip, #65537 ; 0x10001\n", + " 142: 45e1 cmp r9, ip\n", + " 144: f64f 7cfd movw ip, #65533 ; 0xfffd\n", + " 148: f6cf 7cfe movt ip, #65534 ; 0xfffe\n", + " 14c: 45e0 cmp r8, ip\n", + nullptr +}; + std::map<std::string, const char* const*> test_results; void setup_results() { test_results["SimpleMov"] = SimpleMovResults; @@ -5421,4 +5519,5 @@ void setup_results() { test_results["LoadStoreLimits"] = LoadStoreLimitsResults; test_results["CompareAndBranch"] = CompareAndBranchResults; test_results["AddConstant"] = AddConstantResults; + test_results["CmpConstant"] = CmpConstantResults; } diff --git a/compiler/utils/mips/assembler_mips.cc b/compiler/utils/mips/assembler_mips.cc index 0dc307c9ac..ac9c097892 100644 --- a/compiler/utils/mips/assembler_mips.cc +++ b/compiler/utils/mips/assembler_mips.cc @@ -1035,6 +1035,22 @@ void MipsAssembler::Movt(Register rd, Register rs, int cc) { EmitR(0, rs, static_cast<Register>((cc << 2) | 1), rd, 0, 0x01); } +void MipsAssembler::TruncLS(FRegister fd, FRegister fs) { + EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x09); +} + +void MipsAssembler::TruncLD(FRegister fd, FRegister fs) { + EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0x09); +} + +void MipsAssembler::TruncWS(FRegister fd, FRegister fs) { + EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x0D); +} + +void MipsAssembler::TruncWD(FRegister fd, FRegister fs) { + EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0x0D); +} + void MipsAssembler::Cvtsw(FRegister fd, FRegister fs) { EmitFR(0x11, 0x14, static_cast<FRegister>(0), fs, fd, 0x20); } @@ -1051,6 +1067,14 @@ void MipsAssembler::Cvtds(FRegister fd, FRegister fs) { EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x21); } +void MipsAssembler::Cvtsl(FRegister fd, FRegister fs) { + EmitFR(0x11, 0x15, static_cast<FRegister>(0), fs, fd, 0x20); +} + +void MipsAssembler::Cvtdl(FRegister fd, FRegister fs) { + EmitFR(0x11, 0x15, static_cast<FRegister>(0), fs, fd, 0x21); +} + void MipsAssembler::Mfc1(Register rt, FRegister fs) { EmitFR(0x11, 0x00, static_cast<FRegister>(rt), fs, static_cast<FRegister>(0), 0x0); } @@ -1067,6 +1091,24 @@ void MipsAssembler::Mthc1(Register rt, FRegister fs) { EmitFR(0x11, 0x07, static_cast<FRegister>(rt), fs, static_cast<FRegister>(0), 0x0); } +void MipsAssembler::MoveFromFpuHigh(Register rt, FRegister fs) { + if (Is32BitFPU()) { + CHECK_EQ(fs % 2, 0) << fs; + Mfc1(rt, static_cast<FRegister>(fs + 1)); + } else { + Mfhc1(rt, fs); + } +} + +void MipsAssembler::MoveToFpuHigh(Register rt, FRegister fs) { + if (Is32BitFPU()) { + CHECK_EQ(fs % 2, 0) << fs; + Mtc1(rt, static_cast<FRegister>(fs + 1)); + } else { + Mthc1(rt, fs); + } +} + void MipsAssembler::Lwc1(FRegister ft, Register rs, uint16_t imm16) { EmitI(0x31, rs, static_cast<Register>(ft), imm16); } @@ -1213,10 +1255,10 @@ void MipsAssembler::LoadDConst64(FRegister rd, int64_t value, Register temp) { Mtc1(temp, rd); } if (high == 0) { - Mthc1(ZERO, rd); + MoveToFpuHigh(ZERO, rd); } else { LoadConst32(temp, high); - Mthc1(temp, rd); + MoveToFpuHigh(temp, rd); } } diff --git a/compiler/utils/mips/assembler_mips.h b/compiler/utils/mips/assembler_mips.h index 066e7b0014..01c6490f88 100644 --- a/compiler/utils/mips/assembler_mips.h +++ b/compiler/utils/mips/assembler_mips.h @@ -265,15 +265,23 @@ class MipsAssembler FINAL : public Assembler { void Movf(Register rd, Register rs, int cc); // R2 void Movt(Register rd, Register rs, int cc); // R2 + void TruncLS(FRegister fd, FRegister fs); // R2+, FR=1 + void TruncLD(FRegister fd, FRegister fs); // R2+, FR=1 + void TruncWS(FRegister fd, FRegister fs); + void TruncWD(FRegister fd, FRegister fs); void Cvtsw(FRegister fd, FRegister fs); void Cvtdw(FRegister fd, FRegister fs); void Cvtsd(FRegister fd, FRegister fs); void Cvtds(FRegister fd, FRegister fs); + void Cvtsl(FRegister fd, FRegister fs); // R2+, FR=1 + void Cvtdl(FRegister fd, FRegister fs); // R2+, FR=1 void Mfc1(Register rt, FRegister fs); void Mtc1(Register rt, FRegister fs); void Mfhc1(Register rt, FRegister fs); void Mthc1(Register rt, FRegister fs); + void MoveFromFpuHigh(Register rt, FRegister fs); + void MoveToFpuHigh(Register rt, FRegister fs); void Lwc1(FRegister ft, Register rs, uint16_t imm16); void Ldc1(FRegister ft, Register rs, uint16_t imm16); void Swc1(FRegister ft, Register rs, uint16_t imm16); diff --git a/compiler/utils/mips/assembler_mips_test.cc b/compiler/utils/mips/assembler_mips_test.cc index 4361843c54..5fc3deebd3 100644 --- a/compiler/utils/mips/assembler_mips_test.cc +++ b/compiler/utils/mips/assembler_mips_test.cc @@ -599,6 +599,14 @@ TEST_F(AssemblerMIPSTest, CvtDW) { DriverStr(RepeatFF(&mips::MipsAssembler::Cvtdw, "cvt.d.w ${reg1}, ${reg2}"), "CvtDW"); } +TEST_F(AssemblerMIPSTest, CvtSL) { + DriverStr(RepeatFF(&mips::MipsAssembler::Cvtsl, "cvt.s.l ${reg1}, ${reg2}"), "CvtSL"); +} + +TEST_F(AssemblerMIPSTest, CvtDL) { + DriverStr(RepeatFF(&mips::MipsAssembler::Cvtdl, "cvt.d.l ${reg1}, ${reg2}"), "CvtDL"); +} + TEST_F(AssemblerMIPSTest, CvtSD) { DriverStr(RepeatFF(&mips::MipsAssembler::Cvtsd, "cvt.s.d ${reg1}, ${reg2}"), "CvtSD"); } @@ -607,6 +615,22 @@ TEST_F(AssemblerMIPSTest, CvtDS) { DriverStr(RepeatFF(&mips::MipsAssembler::Cvtds, "cvt.d.s ${reg1}, ${reg2}"), "CvtDS"); } +TEST_F(AssemblerMIPSTest, TruncWS) { + DriverStr(RepeatFF(&mips::MipsAssembler::TruncWS, "trunc.w.s ${reg1}, ${reg2}"), "TruncWS"); +} + +TEST_F(AssemblerMIPSTest, TruncWD) { + DriverStr(RepeatFF(&mips::MipsAssembler::TruncWD, "trunc.w.d ${reg1}, ${reg2}"), "TruncWD"); +} + +TEST_F(AssemblerMIPSTest, TruncLS) { + DriverStr(RepeatFF(&mips::MipsAssembler::TruncLS, "trunc.l.s ${reg1}, ${reg2}"), "TruncLS"); +} + +TEST_F(AssemblerMIPSTest, TruncLD) { + DriverStr(RepeatFF(&mips::MipsAssembler::TruncLD, "trunc.l.d ${reg1}, ${reg2}"), "TruncLD"); +} + TEST_F(AssemblerMIPSTest, Mfc1) { DriverStr(RepeatRF(&mips::MipsAssembler::Mfc1, "mfc1 ${reg1}, ${reg2}"), "Mfc1"); } diff --git a/compiler/utils/mips64/assembler_mips64.cc b/compiler/utils/mips64/assembler_mips64.cc index cfd8421e93..f9ff2df8bb 100644 --- a/compiler/utils/mips64/assembler_mips64.cc +++ b/compiler/utils/mips64/assembler_mips64.cc @@ -771,6 +771,22 @@ void Mips64Assembler::RoundWD(FpuRegister fd, FpuRegister fs) { EmitFR(0x11, 0x11, static_cast<FpuRegister>(0), fs, fd, 0xc); } +void Mips64Assembler::TruncLS(FpuRegister fd, FpuRegister fs) { + EmitFR(0x11, 0x10, static_cast<FpuRegister>(0), fs, fd, 0x9); +} + +void Mips64Assembler::TruncLD(FpuRegister fd, FpuRegister fs) { + EmitFR(0x11, 0x11, static_cast<FpuRegister>(0), fs, fd, 0x9); +} + +void Mips64Assembler::TruncWS(FpuRegister fd, FpuRegister fs) { + EmitFR(0x11, 0x10, static_cast<FpuRegister>(0), fs, fd, 0xd); +} + +void Mips64Assembler::TruncWD(FpuRegister fd, FpuRegister fs) { + EmitFR(0x11, 0x11, static_cast<FpuRegister>(0), fs, fd, 0xd); +} + void Mips64Assembler::CeilLS(FpuRegister fd, FpuRegister fs) { EmitFR(0x11, 0x10, static_cast<FpuRegister>(0), fs, fd, 0xa); } diff --git a/compiler/utils/mips64/assembler_mips64.h b/compiler/utils/mips64/assembler_mips64.h index 883f013f87..3262640ce7 100644 --- a/compiler/utils/mips64/assembler_mips64.h +++ b/compiler/utils/mips64/assembler_mips64.h @@ -250,6 +250,10 @@ class Mips64Assembler FINAL : public Assembler { void RoundLD(FpuRegister fd, FpuRegister fs); void RoundWS(FpuRegister fd, FpuRegister fs); void RoundWD(FpuRegister fd, FpuRegister fs); + void TruncLS(FpuRegister fd, FpuRegister fs); + void TruncLD(FpuRegister fd, FpuRegister fs); + void TruncWS(FpuRegister fd, FpuRegister fs); + void TruncWD(FpuRegister fd, FpuRegister fs); void CeilLS(FpuRegister fd, FpuRegister fs); void CeilLD(FpuRegister fd, FpuRegister fs); void CeilWS(FpuRegister fd, FpuRegister fs); diff --git a/compiler/utils/mips64/assembler_mips64_test.cc b/compiler/utils/mips64/assembler_mips64_test.cc index bac4375b35..7d79be2731 100644 --- a/compiler/utils/mips64/assembler_mips64_test.cc +++ b/compiler/utils/mips64/assembler_mips64_test.cc @@ -527,6 +527,22 @@ TEST_F(AssemblerMIPS64Test, CvtSW) { DriverStr(RepeatFF(&mips64::Mips64Assembler::Cvtsw, "cvt.s.w ${reg1}, ${reg2}"), "cvt.s.w"); } +TEST_F(AssemblerMIPS64Test, TruncWS) { + DriverStr(RepeatFF(&mips64::Mips64Assembler::TruncWS, "trunc.w.s ${reg1}, ${reg2}"), "trunc.w.s"); +} + +TEST_F(AssemblerMIPS64Test, TruncWD) { + DriverStr(RepeatFF(&mips64::Mips64Assembler::TruncWD, "trunc.w.d ${reg1}, ${reg2}"), "trunc.w.d"); +} + +TEST_F(AssemblerMIPS64Test, TruncLS) { + DriverStr(RepeatFF(&mips64::Mips64Assembler::TruncLS, "trunc.l.s ${reg1}, ${reg2}"), "trunc.l.s"); +} + +TEST_F(AssemblerMIPS64Test, TruncLD) { + DriverStr(RepeatFF(&mips64::Mips64Assembler::TruncLD, "trunc.l.d ${reg1}, ${reg2}"), "trunc.l.d"); +} + //////////////// // CALL / JMP // //////////////// diff --git a/compiler/utils/swap_space.cc b/compiler/utils/swap_space.cc index 42ed8810f8..244a5fedbe 100644 --- a/compiler/utils/swap_space.cc +++ b/compiler/utils/swap_space.cc @@ -18,6 +18,7 @@ #include <algorithm> #include <numeric> +#include <sys/mman.h> #include "base/logging.h" #include "base/macros.h" @@ -44,23 +45,17 @@ static void DumpFreeMap(const FreeBySizeSet& free_by_size) { } } -template <typename FreeByStartSet, typename FreeBySizeSet> -static void RemoveChunk(FreeByStartSet* free_by_start, - FreeBySizeSet* free_by_size, - typename FreeBySizeSet::const_iterator free_by_size_pos) { +void SwapSpace::RemoveChunk(FreeBySizeSet::const_iterator free_by_size_pos) { auto free_by_start_pos = free_by_size_pos->second; - free_by_size->erase(free_by_size_pos); - free_by_start->erase(free_by_start_pos); + free_by_size_.erase(free_by_size_pos); + free_by_start_.erase(free_by_start_pos); } -template <typename FreeByStartSet, typename FreeBySizeSet> -static void InsertChunk(FreeByStartSet* free_by_start, - FreeBySizeSet* free_by_size, - const SpaceChunk& chunk) { +inline void SwapSpace::InsertChunk(const SpaceChunk& chunk) { DCHECK_NE(chunk.size, 0u); - auto insert_result = free_by_start->insert(chunk); + auto insert_result = free_by_start_.insert(chunk); DCHECK(insert_result.second); - free_by_size->emplace(chunk.size, insert_result.first); + free_by_size_.emplace(chunk.size, insert_result.first); } SwapSpace::SwapSpace(int fd, size_t initial_size) @@ -69,10 +64,18 @@ SwapSpace::SwapSpace(int fd, size_t initial_size) lock_("SwapSpace lock", static_cast<LockLevel>(LockLevel::kDefaultMutexLevel - 1)) { // Assume that the file is unlinked. - InsertChunk(&free_by_start_, &free_by_size_, NewFileChunk(initial_size)); + InsertChunk(NewFileChunk(initial_size)); } SwapSpace::~SwapSpace() { + // Unmap all mmapped chunks. Nothing should be allocated anymore at + // this point, so there should be only full size chunks in free_by_start_. + for (const SpaceChunk& chunk : free_by_start_) { + if (munmap(chunk.ptr, chunk.size) != 0) { + PLOG(ERROR) << "Failed to unmap swap space chunk at " + << static_cast<const void*>(chunk.ptr) << " size=" << chunk.size; + } + } // All arenas are backed by the same file. Just close the descriptor. close(fd_); } @@ -113,7 +116,7 @@ void* SwapSpace::Alloc(size_t size) { : free_by_size_.lower_bound(FreeBySizeEntry { size, free_by_start_.begin() }); if (it != free_by_size_.end()) { old_chunk = *it->second; - RemoveChunk(&free_by_start_, &free_by_size_, it); + RemoveChunk(it); } else { // Not a big enough free chunk, need to increase file size. old_chunk = NewFileChunk(size); @@ -124,13 +127,13 @@ void* SwapSpace::Alloc(size_t size) { if (old_chunk.size != size) { // Insert the remainder. SpaceChunk new_chunk = { old_chunk.ptr + size, old_chunk.size - size }; - InsertChunk(&free_by_start_, &free_by_size_, new_chunk); + InsertChunk(new_chunk); } return ret; } -SpaceChunk SwapSpace::NewFileChunk(size_t min_size) { +SwapSpace::SpaceChunk SwapSpace::NewFileChunk(size_t min_size) { #if !defined(__APPLE__) size_t next_part = std::max(RoundUp(min_size, kPageSize), RoundUp(kMininumMapSize, kPageSize)); int result = TEMP_FAILURE_RETRY(ftruncate64(fd_, size_ + next_part)); @@ -159,7 +162,7 @@ SpaceChunk SwapSpace::NewFileChunk(size_t min_size) { } // TODO: Full coalescing. -void SwapSpace::Free(void* ptrV, size_t size) { +void SwapSpace::Free(void* ptr, size_t size) { MutexLock lock(Thread::Current(), lock_); size = RoundUp(size, 8U); @@ -168,7 +171,7 @@ void SwapSpace::Free(void* ptrV, size_t size) { free_before = CollectFree(free_by_start_, free_by_size_); } - SpaceChunk chunk = { reinterpret_cast<uint8_t*>(ptrV), size }; + SpaceChunk chunk = { reinterpret_cast<uint8_t*>(ptr), size }; auto it = free_by_start_.lower_bound(chunk); if (it != free_by_start_.begin()) { auto prev = it; @@ -180,7 +183,7 @@ void SwapSpace::Free(void* ptrV, size_t size) { chunk.ptr -= prev->size; auto erase_pos = free_by_size_.find(FreeBySizeEntry { prev->size, prev }); DCHECK(erase_pos != free_by_size_.end()); - RemoveChunk(&free_by_start_, &free_by_size_, erase_pos); + RemoveChunk(erase_pos); // "prev" is invalidated but "it" remains valid. } } @@ -191,11 +194,11 @@ void SwapSpace::Free(void* ptrV, size_t size) { chunk.size += it->size; auto erase_pos = free_by_size_.find(FreeBySizeEntry { it->size, it }); DCHECK(erase_pos != free_by_size_.end()); - RemoveChunk(&free_by_start_, &free_by_size_, erase_pos); + RemoveChunk(erase_pos); // "it" is invalidated but we don't need it anymore. } } - InsertChunk(&free_by_start_, &free_by_size_, chunk); + InsertChunk(chunk); if (kCheckFreeMaps) { size_t free_after = CollectFree(free_by_start_, free_by_size_); diff --git a/compiler/utils/swap_space.h b/compiler/utils/swap_space.h index 9127b6b096..b659f1d3c7 100644 --- a/compiler/utils/swap_space.h +++ b/compiler/utils/swap_space.h @@ -19,42 +19,17 @@ #include <cstdlib> #include <list> +#include <vector> #include <set> #include <stdint.h> #include <stddef.h> -#include "base/debug_stack.h" #include "base/logging.h" #include "base/macros.h" #include "base/mutex.h" -#include "mem_map.h" namespace art { -// Chunk of space. -struct SpaceChunk { - uint8_t* ptr; - size_t size; - - uintptr_t Start() const { - return reinterpret_cast<uintptr_t>(ptr); - } - uintptr_t End() const { - return reinterpret_cast<uintptr_t>(ptr) + size; - } -}; - -inline bool operator==(const SpaceChunk& lhs, const SpaceChunk& rhs) { - return (lhs.size == rhs.size) && (lhs.ptr == rhs.ptr); -} - -class SortChunkByPtr { - public: - bool operator()(const SpaceChunk& a, const SpaceChunk& b) const { - return reinterpret_cast<uintptr_t>(a.ptr) < reinterpret_cast<uintptr_t>(b.ptr); - } -}; - // An arena pool that creates arenas backed by an mmaped file. class SwapSpace { public: @@ -68,17 +43,27 @@ class SwapSpace { } private: - SpaceChunk NewFileChunk(size_t min_size) REQUIRES(lock_); + // Chunk of space. + struct SpaceChunk { + uint8_t* ptr; + size_t size; - int fd_; - size_t size_; - std::list<SpaceChunk> maps_; + uintptr_t Start() const { + return reinterpret_cast<uintptr_t>(ptr); + } + uintptr_t End() const { + return reinterpret_cast<uintptr_t>(ptr) + size; + } + }; - // NOTE: Boost.Bimap would be useful for the two following members. + class SortChunkByPtr { + public: + bool operator()(const SpaceChunk& a, const SpaceChunk& b) const { + return reinterpret_cast<uintptr_t>(a.ptr) < reinterpret_cast<uintptr_t>(b.ptr); + } + }; - // Map start of a free chunk to its size. typedef std::set<SpaceChunk, SortChunkByPtr> FreeByStartSet; - FreeByStartSet free_by_start_ GUARDED_BY(lock_); // Map size to an iterator to free_by_start_'s entry. typedef std::pair<size_t, FreeByStartSet::const_iterator> FreeBySizeEntry; @@ -92,6 +77,21 @@ class SwapSpace { } }; typedef std::set<FreeBySizeEntry, FreeBySizeComparator> FreeBySizeSet; + + SpaceChunk NewFileChunk(size_t min_size) REQUIRES(lock_); + + void RemoveChunk(FreeBySizeSet::const_iterator free_by_size_pos) REQUIRES(lock_); + void InsertChunk(const SpaceChunk& chunk) REQUIRES(lock_); + + int fd_; + size_t size_; + std::list<SpaceChunk> maps_; + + // NOTE: Boost.Bimap would be useful for the two following members. + + // Map start of a free chunk to its size. + FreeByStartSet free_by_start_ GUARDED_BY(lock_); + // Free chunks ordered by size. FreeBySizeSet free_by_size_ GUARDED_BY(lock_); mutable Mutex lock_ DEFAULT_MUTEX_ACQUIRED_AFTER; @@ -126,6 +126,9 @@ class SwapAllocator<void> { template <typename U> friend class SwapAllocator; + + template <typename U> + friend bool operator==(const SwapAllocator<U>& lhs, const SwapAllocator<U>& rhs); }; template <typename T> @@ -201,9 +204,22 @@ class SwapAllocator { template <typename U> friend class SwapAllocator; + + template <typename U> + friend bool operator==(const SwapAllocator<U>& lhs, const SwapAllocator<U>& rhs); }; template <typename T> +inline bool operator==(const SwapAllocator<T>& lhs, const SwapAllocator<T>& rhs) { + return lhs.swap_space_ == rhs.swap_space_; +} + +template <typename T> +inline bool operator!=(const SwapAllocator<T>& lhs, const SwapAllocator<T>& rhs) { + return !(lhs == rhs); +} + +template <typename T> using SwapVector = std::vector<T, SwapAllocator<T>>; template <typename T, typename Comparator> using SwapSet = std::set<T, Comparator, SwapAllocator<T>>; |