diff options
Diffstat (limited to 'compiler')
51 files changed, 2892 insertions, 2026 deletions
diff --git a/compiler/buffered_output_stream.cc b/compiler/buffered_output_stream.cc index 0940a80cc1..3ca518b686 100644 --- a/compiler/buffered_output_stream.cc +++ b/compiler/buffered_output_stream.cc @@ -25,12 +25,13 @@ BufferedOutputStream::BufferedOutputStream(OutputStream* out) bool BufferedOutputStream::WriteFully(const void* buffer, size_t byte_count) { if (byte_count > kBufferSize) { - Flush(); + if (!Flush()) { + return false; + } return out_->WriteFully(buffer, byte_count); } if (used_ + byte_count > kBufferSize) { - bool success = Flush(); - if (!success) { + if (!Flush()) { return false; } } diff --git a/compiler/buffered_output_stream.h b/compiler/buffered_output_stream.h index 15fc0335a9..b447f41e21 100644 --- a/compiler/buffered_output_stream.h +++ b/compiler/buffered_output_stream.h @@ -36,11 +36,11 @@ class BufferedOutputStream FINAL : public OutputStream { virtual off_t Seek(off_t offset, Whence whence); + bool Flush(); + private: static const size_t kBufferSize = 8 * KB; - bool Flush(); - OutputStream* const out_; uint8_t buffer_[kBufferSize]; diff --git a/compiler/cfi_test.h b/compiler/cfi_test.h index 6fd457599f..508b04a16f 100644 --- a/compiler/cfi_test.h +++ b/compiler/cfi_test.h @@ -48,11 +48,11 @@ class CFITest : public dwarf::DwarfTest { // Pretty-print CFI opcodes. constexpr bool is64bit = false; dwarf::DebugFrameOpCodeWriter<> initial_opcodes; - dwarf::WriteDebugFrameCIE(is64bit, dwarf::DW_EH_PE_absptr, dwarf::Reg(8), - initial_opcodes, kCFIFormat, &debug_frame_data_); + dwarf::WriteCIE(is64bit, dwarf::Reg(8), + initial_opcodes, kCFIFormat, &debug_frame_data_); std::vector<uintptr_t> debug_frame_patches; - dwarf::WriteDebugFrameFDE(is64bit, 0, 0, actual_asm.size(), ArrayRef<const uint8_t>(actual_cfi), - kCFIFormat, &debug_frame_data_, &debug_frame_patches); + dwarf::WriteFDE(is64bit, 0, 0, 0, actual_asm.size(), ArrayRef<const uint8_t>(actual_cfi), + kCFIFormat, 0, &debug_frame_data_, &debug_frame_patches); ReformatCfi(Objdump(false, "-W"), &lines); // Pretty-print assembly. auto* opts = new DisassemblerOptions(false, actual_asm.data(), true); diff --git a/compiler/compiler.h b/compiler/compiler.h index 8788dc1950..3a9ce1bc0e 100644 --- a/compiler/compiler.h +++ b/compiler/compiler.h @@ -22,6 +22,10 @@ namespace art { +namespace jit { + class JitCodeCache; +} + class ArtMethod; class Backend; struct CompilationUnit; @@ -58,6 +62,13 @@ class Compiler { uint32_t method_idx, const DexFile& dex_file) const = 0; + virtual bool JitCompile(Thread* self ATTRIBUTE_UNUSED, + jit::JitCodeCache* code_cache ATTRIBUTE_UNUSED, + ArtMethod* method ATTRIBUTE_UNUSED) + SHARED_REQUIRES(Locks::mutator_lock_) { + return false; + } + virtual uintptr_t GetEntryPointOf(ArtMethod* method) const SHARED_REQUIRES(Locks::mutator_lock_) = 0; diff --git a/compiler/dex/quick/arm64/call_arm64.cc b/compiler/dex/quick/arm64/call_arm64.cc index 036da2e2b2..b1acf5e691 100644 --- a/compiler/dex/quick/arm64/call_arm64.cc +++ b/compiler/dex/quick/arm64/call_arm64.cc @@ -447,7 +447,7 @@ void Arm64Mir2Lir::GenSpecialExitForSuspend() { static bool Arm64UseRelativeCall(CompilationUnit* cu, const MethodReference& target_method) { // Emit relative calls anywhere in the image or within a dex file otherwise. - return cu->compiler_driver->IsImage() || cu->dex_file == target_method.dex_file; + return cu->compiler_driver->IsBootImage() || cu->dex_file == target_method.dex_file; } /* diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc index d055b37ea7..aa5e411ba8 100644 --- a/compiler/driver/compiler_driver.cc +++ b/compiler/driver/compiler_driver.cc @@ -341,7 +341,7 @@ CompilerDriver::CompilerDriver(const CompilerOptions* compiler_options, Compiler::Kind compiler_kind, InstructionSet instruction_set, const InstructionSetFeatures* instruction_set_features, - bool image, std::unordered_set<std::string>* image_classes, + bool boot_image, std::unordered_set<std::string>* image_classes, std::unordered_set<std::string>* compiled_classes, std::unordered_set<std::string>* compiled_methods, size_t thread_count, bool dump_stats, bool dump_passes, @@ -361,7 +361,7 @@ CompilerDriver::CompilerDriver(const CompilerOptions* compiler_options, compiled_methods_lock_("compiled method lock"), compiled_methods_(MethodTable::key_compare()), non_relative_linker_patch_count_(0u), - image_(image), + boot_image_(boot_image), image_classes_(image_classes), classes_to_compile_(compiled_classes), methods_to_compile_(compiled_methods), @@ -383,7 +383,7 @@ CompilerDriver::CompilerDriver(const CompilerOptions* compiler_options, compiler_->Init(); - CHECK_EQ(image_, image_classes_.get() != nullptr); + CHECK_EQ(boot_image_, image_classes_.get() != nullptr); // Read the profile file if one is provided. if (!profile_file.empty()) { @@ -559,7 +559,7 @@ static void CompileMethod(Thread* self, } } else if ((access_flags & kAccAbstract) != 0) { // Abstract methods don't have code. - } else if (Runtime::Current()->IsAotCompiler()) { + } else { const VerifiedMethod* verified_method = driver->GetVerificationResults()->GetVerifiedMethod(method_ref); bool compile = compilation_enabled && @@ -598,13 +598,6 @@ static void CompileMethod(Thread* self, ? dex_to_dex_compilation_level : optimizer::DexToDexCompilationLevel::kRequired); } - } else { - // This is for the JIT compiler, which has already ensured the class is verified. - // We can go straight to compiling. - DCHECK(Runtime::Current()->UseJit()); - compiled_method = driver->GetCompiler()->Compile(code_item, access_flags, invoke_type, - class_def_idx, method_idx, class_loader, - dex_file, dex_cache); } if (kTimeCompileMethod) { uint64_t duration_ns = NanoTime() - start_ns; @@ -696,42 +689,6 @@ void CompilerDriver::CompileOne(Thread* self, ArtMethod* method, TimingLogger* t self->GetJniEnv()->DeleteGlobalRef(jclass_loader); } -CompiledMethod* CompilerDriver::CompileArtMethod(Thread* self, ArtMethod* method) { - DCHECK_EQ(method, - method->GetInterfaceMethodIfProxy( - Runtime::Current()->GetClassLinker()->GetImagePointerSize())); - const uint32_t method_idx = method->GetDexMethodIndex(); - const uint32_t access_flags = method->GetAccessFlags(); - const InvokeType invoke_type = method->GetInvokeType(); - StackHandleScope<2> hs(self); - Handle<mirror::ClassLoader> class_loader(hs.NewHandle( - method->GetDeclaringClass()->GetClassLoader())); - Handle<mirror::DexCache> dex_cache(hs.NewHandle(method->GetDexCache())); - jobject jclass_loader = class_loader.ToJObject(); - const DexFile* dex_file = method->GetDexFile(); - const uint16_t class_def_idx = method->GetClassDefIndex(); - const DexFile::ClassDef& class_def = dex_file->GetClassDef(class_def_idx); - optimizer::DexToDexCompilationLevel dex_to_dex_compilation_level = - GetDexToDexCompilationLevel(self, *this, class_loader, *dex_file, class_def); - const DexFile::CodeItem* code_item = dex_file->GetCodeItem(method->GetCodeItemOffset()); - // Go to native so that we don't block GC during compilation. - ScopedThreadSuspension sts(self, kNative); - CompileMethod(self, - this, - code_item, - access_flags, - invoke_type, - class_def_idx, - method_idx, - jclass_loader, - *dex_file, - dex_to_dex_compilation_level, - true, - dex_cache); - auto* compiled_method = GetCompiledMethod(MethodReference(dex_file, method_idx)); - return compiled_method; -} - void CompilerDriver::Resolve(jobject class_loader, const std::vector<const DexFile*>& dex_files, ThreadPool* thread_pool, TimingLogger* timings) { for (size_t i = 0; i != dex_files.size(); ++i) { @@ -781,7 +738,7 @@ void CompilerDriver::PreCompile(jobject class_loader, const std::vector<const De } bool CompilerDriver::IsImageClass(const char* descriptor) const { - if (!IsImage()) { + if (!IsBootImage()) { // NOTE: Currently unreachable, all callers check IsImage(). return false; } else { @@ -790,7 +747,7 @@ bool CompilerDriver::IsImageClass(const char* descriptor) const { } bool CompilerDriver::IsClassToCompile(const char* descriptor) const { - if (kRestrictCompilationFiltersToImage && !IsImage()) { + if (kRestrictCompilationFiltersToImage && !IsBootImage()) { return true; } @@ -801,7 +758,7 @@ bool CompilerDriver::IsClassToCompile(const char* descriptor) const { } bool CompilerDriver::IsMethodToCompile(const MethodReference& method_ref) const { - if (kRestrictCompilationFiltersToImage && !IsImage()) { + if (kRestrictCompilationFiltersToImage && !IsBootImage()) { return true; } @@ -889,7 +846,7 @@ class RecordImageClassesVisitor : public ClassVisitor { // Make a list of descriptors for classes to include in the image void CompilerDriver::LoadImageClasses(TimingLogger* timings) { CHECK(timings != nullptr); - if (!IsImage()) { + if (!IsBootImage()) { return; } @@ -1118,7 +1075,7 @@ class ClinitImageUpdate { }; void CompilerDriver::UpdateImageClasses(TimingLogger* timings) { - if (IsImage()) { + if (IsBootImage()) { TimingLogger::ScopedTiming t("UpdateImageClasses", timings); Runtime* runtime = Runtime::Current(); @@ -1145,7 +1102,7 @@ bool CompilerDriver::CanAssumeClassIsLoaded(mirror::Class* klass) { // Having the klass reference here implies that the klass is already loaded. return true; } - if (!IsImage()) { + if (!IsBootImage()) { // Assume loaded only if klass is in the boot image. App classes cannot be assumed // loaded because we don't even know what class loader will be used to load them. bool class_in_image = runtime->GetHeap()->FindSpaceFromObject(klass, false)->IsImageSpace(); @@ -1157,7 +1114,7 @@ bool CompilerDriver::CanAssumeClassIsLoaded(mirror::Class* klass) { } bool CompilerDriver::CanAssumeTypeIsPresentInDexCache(const DexFile& dex_file, uint32_t type_idx) { - if (IsImage() && + if (IsBootImage() && IsImageClass(dex_file.StringDataByIdx(dex_file.GetTypeId(type_idx).descriptor_idx_))) { { ScopedObjectAccess soa(Thread::Current()); @@ -1183,7 +1140,7 @@ bool CompilerDriver::CanAssumeStringIsPresentInDexCache(const DexFile& dex_file, // See also Compiler::ResolveDexFile bool result = false; - if (IsImage()) { + if (IsBootImage()) { // We resolve all const-string strings when building for the image. ScopedObjectAccess soa(Thread::Current()); StackHandleScope<1> hs(soa.Self()); @@ -1300,7 +1257,7 @@ bool CompilerDriver::CanEmbedTypeInCode(const DexFile& dex_file, uint32_t type_i if (compiling_boot) { // boot -> boot class pointers. // True if the class is in the image at boot compiling time. - const bool is_image_class = IsImage() && IsImageClass( + const bool is_image_class = IsBootImage() && IsImageClass( dex_file.StringDataByIdx(dex_file.GetTypeId(type_idx).descriptor_idx_)); // True if pc relative load works. if (is_image_class && support_boot_image_fixup) { @@ -1548,7 +1505,7 @@ void CompilerDriver::GetCodeAndMethodForDirectCall(InvokeType* type, InvokeType } if (!use_dex_cache && force_relocations) { bool is_in_image; - if (IsImage()) { + if (IsBootImage()) { is_in_image = IsImageClass(method->GetDeclaringClassDescriptor()); } else { is_in_image = instruction_set_ != kX86 && instruction_set_ != kX86_64 && @@ -2019,7 +1976,7 @@ void CompilerDriver::ResolveDexFile(jobject class_loader, const DexFile& dex_fil ParallelCompilationManager context(class_linker, class_loader, this, &dex_file, dex_files, thread_pool); - if (IsImage()) { + if (IsBootImage()) { // For images we resolve all types, such as array, whereas for applications just those with // classdefs are resolved by ResolveClassFieldsAndMethods. TimingLogger::ScopedTiming t("Resolve Types", timings); @@ -2101,8 +2058,8 @@ class VerifyClassVisitor : public CompilationVisitor { // It is *very* problematic if there are verification errors in the boot classpath. For example, // we rely on things working OK without verification when the decryption dialog is brought up. // So abort in a debug build if we find this violated. - DCHECK(!manager_->GetCompiler()->IsImage() || klass->IsVerified()) << "Boot classpath class " - << PrettyClass(klass.Get()) << " failed to fully verify."; + DCHECK(!manager_->GetCompiler()->IsBootImage() || klass->IsVerified()) + << "Boot classpath class " << PrettyClass(klass.Get()) << " failed to fully verify."; } soa.Self()->AssertNoPendingException(); } @@ -2222,7 +2179,7 @@ class InitializeClassVisitor : public CompilationVisitor { if (!klass->IsInitialized()) { // We need to initialize static fields, we only do this for image classes that aren't // marked with the $NoPreloadHolder (which implies this should not be initialized early). - bool can_init_static_fields = manager_->GetCompiler()->IsImage() && + bool can_init_static_fields = manager_->GetCompiler()->IsBootImage() && manager_->GetCompiler()->IsImageClass(descriptor) && !StringPiece(descriptor).ends_with("$NoPreloadHolder;"); if (can_init_static_fields) { @@ -2286,7 +2243,7 @@ void CompilerDriver::InitializeClasses(jobject jni_class_loader, const DexFile& ParallelCompilationManager context(class_linker, jni_class_loader, this, &dex_file, dex_files, thread_pool); size_t thread_count; - if (IsImage()) { + if (IsBootImage()) { // TODO: remove this when transactional mode supports multithreading. thread_count = 1U; } else { @@ -2304,7 +2261,7 @@ void CompilerDriver::InitializeClasses(jobject class_loader, CHECK(dex_file != nullptr); InitializeClasses(class_loader, *dex_file, dex_files, thread_pool, timings); } - if (IsImage()) { + if (IsBootImage()) { // Prune garbage objects created during aborted transactions. Runtime::Current()->GetHeap()->CollectGarbage(true); } diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h index 4ed4dc60d2..5683b03a71 100644 --- a/compiler/driver/compiler_driver.h +++ b/compiler/driver/compiler_driver.h @@ -92,7 +92,7 @@ class CompilerDriver { Compiler::Kind compiler_kind, InstructionSet instruction_set, const InstructionSetFeatures* instruction_set_features, - bool image, std::unordered_set<std::string>* image_classes, + bool boot_image, std::unordered_set<std::string>* image_classes, std::unordered_set<std::string>* compiled_classes, std::unordered_set<std::string>* compiled_methods, size_t thread_count, bool dump_stats, bool dump_passes, @@ -119,9 +119,6 @@ class CompilerDriver { TimingLogger* timings) REQUIRES(!Locks::mutator_lock_, !compiled_classes_lock_); - CompiledMethod* CompileArtMethod(Thread* self, ArtMethod*) - SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!compiled_methods_lock_) WARN_UNUSED; - // Compile a single Method. void CompileOne(Thread* self, ArtMethod* method, TimingLogger* timings) SHARED_REQUIRES(Locks::mutator_lock_) @@ -156,8 +153,8 @@ class CompilerDriver { } // Are we compiling and creating an image file? - bool IsImage() const { - return image_; + bool IsBootImage() const { + return boot_image_; } const std::unordered_set<std::string>* GetImageClasses() const { @@ -637,7 +634,7 @@ class CompilerDriver { // in the .oat_patches ELF section if requested in the compiler options. size_t non_relative_linker_patch_count_ GUARDED_BY(compiled_methods_lock_); - const bool image_; + const bool boot_image_; // If image_ is true, specifies the classes that will be included in // the image. Note if image_classes_ is null, all classes are diff --git a/compiler/dwarf/dwarf_test.cc b/compiler/dwarf/dwarf_test.cc index 3ba380e9db..a412a99d98 100644 --- a/compiler/dwarf/dwarf_test.cc +++ b/compiler/dwarf/dwarf_test.cc @@ -122,12 +122,12 @@ TEST_F(DwarfTest, DebugFrame) { DW_CHECK_NEXT("DW_CFA_restore: r5 (ebp)"); DebugFrameOpCodeWriter<> initial_opcodes; - WriteDebugFrameCIE(is64bit, DW_EH_PE_absptr, Reg(is64bit ? 16 : 8), - initial_opcodes, kCFIFormat, &debug_frame_data_); + WriteCIE(is64bit, Reg(is64bit ? 16 : 8), + initial_opcodes, kCFIFormat, &debug_frame_data_); std::vector<uintptr_t> debug_frame_patches; std::vector<uintptr_t> expected_patches { 28 }; // NOLINT - WriteDebugFrameFDE(is64bit, 0, 0x01000000, 0x01000000, ArrayRef<const uint8_t>(*opcodes.data()), - kCFIFormat, &debug_frame_data_, &debug_frame_patches); + WriteFDE(is64bit, 0, 0, 0x01000000, 0x01000000, ArrayRef<const uint8_t>(*opcodes.data()), + kCFIFormat, 0, &debug_frame_data_, &debug_frame_patches); EXPECT_EQ(expected_patches, debug_frame_patches); CheckObjdumpOutput(is64bit, "-W"); @@ -136,14 +136,14 @@ TEST_F(DwarfTest, DebugFrame) { TEST_F(DwarfTest, DebugFrame64) { constexpr bool is64bit = true; DebugFrameOpCodeWriter<> initial_opcodes; - WriteDebugFrameCIE(is64bit, DW_EH_PE_absptr, Reg(16), - initial_opcodes, kCFIFormat, &debug_frame_data_); + WriteCIE(is64bit, Reg(16), + initial_opcodes, kCFIFormat, &debug_frame_data_); DebugFrameOpCodeWriter<> opcodes; std::vector<uintptr_t> debug_frame_patches; std::vector<uintptr_t> expected_patches { 32 }; // NOLINT - WriteDebugFrameFDE(is64bit, 0, 0x0100000000000000, 0x0200000000000000, - ArrayRef<const uint8_t>(*opcodes.data()), - kCFIFormat, &debug_frame_data_, &debug_frame_patches); + WriteFDE(is64bit, 0, 0, 0x0100000000000000, 0x0200000000000000, + ArrayRef<const uint8_t>(*opcodes.data()), + kCFIFormat, 0, &debug_frame_data_, &debug_frame_patches); DW_CHECK("FDE cie=00000000 pc=100000000000000..300000000000000"); EXPECT_EQ(expected_patches, debug_frame_patches); @@ -176,12 +176,12 @@ TEST_F(DwarfTest, x86_64_RegisterMapping) { DW_CHECK_NEXT("DW_CFA_offset: r14 (r14)"); DW_CHECK_NEXT("DW_CFA_offset: r15 (r15)"); DebugFrameOpCodeWriter<> initial_opcodes; - WriteDebugFrameCIE(is64bit, DW_EH_PE_absptr, Reg(16), - initial_opcodes, kCFIFormat, &debug_frame_data_); + WriteCIE(is64bit, Reg(16), + initial_opcodes, kCFIFormat, &debug_frame_data_); std::vector<uintptr_t> debug_frame_patches; - WriteDebugFrameFDE(is64bit, 0, 0x0100000000000000, 0x0200000000000000, - ArrayRef<const uint8_t>(*opcodes.data()), - kCFIFormat, &debug_frame_data_, &debug_frame_patches); + WriteFDE(is64bit, 0, 0, 0x0100000000000000, 0x0200000000000000, + ArrayRef<const uint8_t>(*opcodes.data()), + kCFIFormat, 0, &debug_frame_data_, &debug_frame_patches); CheckObjdumpOutput(is64bit, "-W"); } diff --git a/compiler/dwarf/dwarf_test.h b/compiler/dwarf/dwarf_test.h index f819c49cee..5464ed9c49 100644 --- a/compiler/dwarf/dwarf_test.h +++ b/compiler/dwarf/dwarf_test.h @@ -59,38 +59,27 @@ class DwarfTest : public CommonRuntimeTest { std::vector<std::string> Objdump(const char* args) { // Write simple elf file with just the DWARF sections. InstructionSet isa = (sizeof(typename ElfTypes::Addr) == 8) ? kX86_64 : kX86; - class NoCode : public CodeOutput { - bool Write(OutputStream*) OVERRIDE { return true; } // NOLINT - } no_code; - ElfBuilder<ElfTypes> builder(isa, 0, &no_code, 0, &no_code, 0); - typedef typename ElfBuilder<ElfTypes>::RawSection RawSection; - RawSection debug_info(".debug_info", SHT_PROGBITS, 0, nullptr, 0, 1, 0); - RawSection debug_abbrev(".debug_abbrev", SHT_PROGBITS, 0, nullptr, 0, 1, 0); - RawSection debug_str(".debug_str", SHT_PROGBITS, 0, nullptr, 0, 1, 0); - RawSection debug_line(".debug_line", SHT_PROGBITS, 0, nullptr, 0, 1, 0); - RawSection debug_frame(".debug_frame", SHT_PROGBITS, 0, nullptr, 0, 8, 0); + ScratchFile file; + FileOutputStream output_stream(file.GetFile()); + ElfBuilder<ElfTypes> builder(isa, &output_stream); + builder.Start(); if (!debug_info_data_.empty()) { - debug_info.SetBuffer(debug_info_data_); - builder.RegisterSection(&debug_info); + builder.WriteSection(".debug_info", &debug_info_data_); } if (!debug_abbrev_data_.empty()) { - debug_abbrev.SetBuffer(debug_abbrev_data_); - builder.RegisterSection(&debug_abbrev); + builder.WriteSection(".debug_abbrev", &debug_abbrev_data_); } if (!debug_str_data_.empty()) { - debug_str.SetBuffer(debug_str_data_); - builder.RegisterSection(&debug_str); + builder.WriteSection(".debug_str", &debug_str_data_); } if (!debug_line_data_.empty()) { - debug_line.SetBuffer(debug_line_data_); - builder.RegisterSection(&debug_line); + builder.WriteSection(".debug_line", &debug_line_data_); } if (!debug_frame_data_.empty()) { - debug_frame.SetBuffer(debug_frame_data_); - builder.RegisterSection(&debug_frame); + builder.WriteSection(".debug_frame", &debug_frame_data_); } - ScratchFile file; - builder.Write(file.GetFile()); + builder.End(); + EXPECT_TRUE(builder.Good()); // Read the elf file back using objdump. std::vector<std::string> lines; diff --git a/compiler/dwarf/headers.h b/compiler/dwarf/headers.h index f3fba4b1fa..883d756885 100644 --- a/compiler/dwarf/headers.h +++ b/compiler/dwarf/headers.h @@ -38,15 +38,14 @@ namespace dwarf { // Write common information entry (CIE) to .debug_frame or .eh_frame section. template<typename Vector> -void WriteDebugFrameCIE(bool is64bit, - ExceptionHeaderValueApplication address_type, - Reg return_address_register, - const DebugFrameOpCodeWriter<Vector>& opcodes, - CFIFormat format, - std::vector<uint8_t>* debug_frame) { +void WriteCIE(bool is64bit, + Reg return_address_register, + const DebugFrameOpCodeWriter<Vector>& opcodes, + CFIFormat format, + std::vector<uint8_t>* buffer) { static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type"); - Writer<> writer(debug_frame); + Writer<> writer(buffer); size_t cie_header_start_ = writer.data()->size(); writer.PushUint32(0); // Length placeholder. writer.PushUint32((format == DW_EH_FRAME_FORMAT) ? 0 : 0xFFFFFFFF); // CIE id. @@ -57,17 +56,17 @@ void WriteDebugFrameCIE(bool is64bit, writer.PushUleb128(return_address_register.num()); // ubyte in DWARF2. writer.PushUleb128(1); // z: Augmentation data size. if (is64bit) { - if (address_type == DW_EH_PE_pcrel) { + if (format == DW_EH_FRAME_FORMAT) { writer.PushUint8(DW_EH_PE_pcrel | DW_EH_PE_sdata8); // R: Pointer encoding. } else { - DCHECK(address_type == DW_EH_PE_absptr); + DCHECK(format == DW_DEBUG_FRAME_FORMAT); writer.PushUint8(DW_EH_PE_absptr | DW_EH_PE_udata8); // R: Pointer encoding. } } else { - if (address_type == DW_EH_PE_pcrel) { + if (format == DW_EH_FRAME_FORMAT) { writer.PushUint8(DW_EH_PE_pcrel | DW_EH_PE_sdata4); // R: Pointer encoding. } else { - DCHECK(address_type == DW_EH_PE_absptr); + DCHECK(format == DW_DEBUG_FRAME_FORMAT); writer.PushUint8(DW_EH_PE_absptr | DW_EH_PE_udata4); // R: Pointer encoding. } } @@ -78,30 +77,44 @@ void WriteDebugFrameCIE(bool is64bit, // Write frame description entry (FDE) to .debug_frame or .eh_frame section. inline -void WriteDebugFrameFDE(bool is64bit, size_t cie_offset, - uint64_t initial_address, uint64_t address_range, - const ArrayRef<const uint8_t>& opcodes, - CFIFormat format, - std::vector<uint8_t>* debug_frame, - std::vector<uintptr_t>* debug_frame_patches) { - Writer<> writer(debug_frame); +void WriteFDE(bool is64bit, + uint64_t section_address, // Absolute address of the section. + uint64_t cie_address, // Absolute address of last CIE. + uint64_t code_address, + uint64_t code_size, + const ArrayRef<const uint8_t>& opcodes, + CFIFormat format, + uint64_t buffer_address, // Address of buffer in linked application. + std::vector<uint8_t>* buffer, + std::vector<uintptr_t>* patch_locations) { + CHECK_GE(cie_address, section_address); + CHECK_GE(buffer_address, section_address); + + Writer<> writer(buffer); size_t fde_header_start = writer.data()->size(); writer.PushUint32(0); // Length placeholder. if (format == DW_EH_FRAME_FORMAT) { - uint32_t cie_pointer = writer.data()->size() - cie_offset; + uint32_t cie_pointer = (buffer_address + buffer->size()) - cie_address; writer.PushUint32(cie_pointer); } else { - uint32_t cie_pointer = cie_offset; + DCHECK(format == DW_DEBUG_FRAME_FORMAT); + uint32_t cie_pointer = cie_address - section_address; writer.PushUint32(cie_pointer); } - // Relocate initial_address, but not address_range (it is size). - debug_frame_patches->push_back(writer.data()->size()); + if (format == DW_EH_FRAME_FORMAT) { + // .eh_frame encodes the location as relative address. + code_address -= buffer_address + buffer->size(); + } else { + DCHECK(format == DW_DEBUG_FRAME_FORMAT); + // Relocate code_address if it has absolute value. + patch_locations->push_back(buffer_address + buffer->size() - section_address); + } if (is64bit) { - writer.PushUint64(initial_address); - writer.PushUint64(address_range); + writer.PushUint64(code_address); + writer.PushUint64(code_size); } else { - writer.PushUint32(initial_address); - writer.PushUint32(address_range); + writer.PushUint32(code_address); + writer.PushUint32(code_size); } writer.PushUleb128(0); // Augmentation data size. writer.PushData(opcodes); diff --git a/compiler/elf_builder.h b/compiler/elf_builder.h index bbd962fae2..895dfccfe3 100644 --- a/compiler/elf_builder.h +++ b/compiler/elf_builder.h @@ -21,27 +21,58 @@ #include "arch/instruction_set.h" #include "base/bit_utils.h" +#include "base/casts.h" #include "base/unix_file/fd_file.h" #include "buffered_output_stream.h" #include "elf_utils.h" #include "file_output_stream.h" +#include "leb128.h" namespace art { -class CodeOutput { - public: - virtual bool Write(OutputStream* out) = 0; - virtual ~CodeOutput() {} -}; - // Writes ELF file. -// The main complication is that the sections often want to reference -// each other. We solve this by writing the ELF file in two stages: -// * Sections are asked about their size, and overall layout is calculated. -// * Sections do the actual writes which may use offsets of other sections. +// +// The basic layout of the elf file: +// Elf_Ehdr - The ELF header. +// Elf_Phdr[] - Program headers for the linker. +// .rodata - DEX files and oat metadata. +// .text - Compiled code. +// .bss - Zero-initialized writeable section. +// .dynstr - Names for .dynsym. +// .dynsym - A few oat-specific dynamic symbols. +// .hash - Hash-table for .dynsym. +// .dynamic - Tags which let the linker locate .dynsym. +// .strtab - Names for .symtab. +// .symtab - Debug symbols. +// .eh_frame - Unwind information (CFI). +// .eh_frame_hdr - Index of .eh_frame. +// .debug_frame - Unwind information (CFI). +// .debug_frame.oat_patches - Addresses for relocation. +// .debug_info - Debug information. +// .debug_info.oat_patches - Addresses for relocation. +// .debug_abbrev - Decoding information for .debug_info. +// .debug_str - Strings for .debug_info. +// .debug_line - Line number tables. +// .debug_line.oat_patches - Addresses for relocation. +// .text.oat_patches - Addresses for relocation. +// .shstrtab - Names of ELF sections. +// Elf_Shdr[] - Section headers. +// +// Some section are optional (the debug sections in particular). +// +// We try write the section data directly into the file without much +// in-memory buffering. This means we generally write sections based on the +// dependency order (e.g. .dynamic points to .dynsym which points to .text). +// +// In the cases where we need to buffer, we write the larger section first +// and buffer the smaller one (e.g. .strtab is bigger than .symtab). +// +// The debug sections are written last for easier stripping. +// template <typename ElfTypes> class ElfBuilder FINAL { public: + static constexpr size_t kMaxProgramHeaders = 16; using Elf_Addr = typename ElfTypes::Addr; using Elf_Off = typename ElfTypes::Off; using Elf_Word = typename ElfTypes::Word; @@ -53,776 +84,420 @@ class ElfBuilder FINAL { using Elf_Dyn = typename ElfTypes::Dyn; // Base class of all sections. - class Section { + class Section : public OutputStream { public: - Section(const std::string& name, Elf_Word type, Elf_Word flags, - const Section* link, Elf_Word info, Elf_Word align, Elf_Word entsize) - : header_(), section_index_(0), name_(name), link_(link) { + Section(ElfBuilder<ElfTypes>* owner, const std::string& name, + Elf_Word type, Elf_Word flags, const Section* link, + Elf_Word info, Elf_Word align, Elf_Word entsize) + : OutputStream(name), owner_(owner), header_(), + section_index_(0), name_(name), link_(link), + started_(false), finished_(false), phdr_flags_(PF_R), phdr_type_(0) { + DCHECK_GE(align, 1u); header_.sh_type = type; header_.sh_flags = flags; header_.sh_info = info; header_.sh_addralign = align; header_.sh_entsize = entsize; } - virtual ~Section() {} - - // Returns the size of the content of this section. It is used to - // calculate file offsets of all sections before doing any writes. - virtual Elf_Word GetSize() const = 0; - - // Write the content of this section to the given file. - // This must write exactly the number of bytes returned by GetSize(). - // Offsets of all sections are known when this method is called. - virtual bool Write(File* elf_file) = 0; - - Elf_Word GetLink() const { - return (link_ != nullptr) ? link_->GetSectionIndex() : 0; - } - - const Elf_Shdr* GetHeader() const { - return &header_; - } - - Elf_Shdr* GetHeader() { - return &header_; - } - - Elf_Word GetSectionIndex() const { - DCHECK_NE(section_index_, 0u); - return section_index_; - } - - void SetSectionIndex(Elf_Word section_index) { - section_index_ = section_index; - } - - const std::string& GetName() const { - return name_; - } - - private: - Elf_Shdr header_; - Elf_Word section_index_; - const std::string name_; - const Section* const link_; - - DISALLOW_COPY_AND_ASSIGN(Section); - }; - - // Writer of .dynamic section. - class DynamicSection FINAL : public Section { - public: - void AddDynamicTag(Elf_Sword tag, Elf_Word value, const Section* section) { - DCHECK_NE(tag, static_cast<Elf_Sword>(DT_NULL)); - dynamics_.push_back({tag, value, section}); - } - DynamicSection(const std::string& name, Section* link) - : Section(name, SHT_DYNAMIC, SHF_ALLOC, - link, 0, kPageSize, sizeof(Elf_Dyn)) {} - - Elf_Word GetSize() const OVERRIDE { - return (dynamics_.size() + 1 /* DT_NULL */) * sizeof(Elf_Dyn); - } - - bool Write(File* elf_file) OVERRIDE { - std::vector<Elf_Dyn> buffer; - buffer.reserve(dynamics_.size() + 1u); - for (const ElfDynamicState& it : dynamics_) { - if (it.section_ != nullptr) { - // We are adding an address relative to a section. - buffer.push_back( - {it.tag_, {it.value_ + it.section_->GetHeader()->sh_addr}}); - } else { - buffer.push_back({it.tag_, {it.value_}}); - } + virtual ~Section() { + if (started_) { + CHECK(finished_); } - buffer.push_back({DT_NULL, {0}}); - return WriteArray(elf_file, buffer.data(), buffer.size()); } - private: - struct ElfDynamicState { - Elf_Sword tag_; - Elf_Word value_; - const Section* section_; - }; - std::vector<ElfDynamicState> dynamics_; - }; - - using PatchFn = void (*)(const std::vector<uintptr_t>& patch_locations, - Elf_Addr buffer_address, - Elf_Addr base_address, - std::vector<uint8_t>* buffer); - - // Section with content based on simple memory buffer. - // The buffer can be optionally patched before writing. - class RawSection FINAL : public Section { - public: - RawSection(const std::string& name, Elf_Word type, Elf_Word flags, - const Section* link, Elf_Word info, Elf_Word align, Elf_Word entsize, - PatchFn patch = nullptr, const Section* patch_base_section = nullptr) - : Section(name, type, flags, link, info, align, entsize), - patched_(false), patch_(patch), patch_base_section_(patch_base_section) { - } - - RawSection(const std::string& name, Elf_Word type) - : RawSection(name, type, 0, nullptr, 0, 1, 0, nullptr, nullptr) { - } - - Elf_Word GetSize() const OVERRIDE { - return buffer_.size(); + // Start writing of this section. + void Start() { + CHECK(!started_); + CHECK(!finished_); + started_ = true; + auto& sections = owner_->sections_; + // Check that the previous section is complete. + CHECK(sections.empty() || sections.back()->finished_); + // The first ELF section index is 1. Index 0 is reserved for NULL. + section_index_ = sections.size() + 1; + // Push this section on the list of written sections. + sections.push_back(this); + // Align file position. + if (header_.sh_type != SHT_NOBITS) { + header_.sh_offset = RoundUp(owner_->Seek(0, kSeekCurrent), header_.sh_addralign); + owner_->Seek(header_.sh_offset, kSeekSet); + } + // Align virtual memory address. + if ((header_.sh_flags & SHF_ALLOC) != 0) { + header_.sh_addr = RoundUp(owner_->virtual_address_, header_.sh_addralign); + owner_->virtual_address_ = header_.sh_addr; + } } - bool Write(File* elf_file) OVERRIDE { - if (!patch_locations_.empty()) { - DCHECK(!patched_); // Do not patch twice. - DCHECK(patch_ != nullptr); - DCHECK(patch_base_section_ != nullptr); - patch_(patch_locations_, - this->GetHeader()->sh_addr, - patch_base_section_->GetHeader()->sh_addr, - &buffer_); - patched_ = true; + // Finish writing of this section. + void End() { + CHECK(started_); + CHECK(!finished_); + finished_ = true; + if (header_.sh_type == SHT_NOBITS) { + CHECK_GT(header_.sh_size, 0u); + } else { + // Use the current file position to determine section size. + off_t file_offset = owner_->Seek(0, kSeekCurrent); + CHECK_GE(file_offset, (off_t)header_.sh_offset); + header_.sh_size = file_offset - header_.sh_offset; + } + if ((header_.sh_flags & SHF_ALLOC) != 0) { + owner_->virtual_address_ += header_.sh_size; } - return WriteArray(elf_file, buffer_.data(), buffer_.size()); } - bool IsEmpty() const { - return buffer_.size() == 0; + // Get the location of this section in virtual memory. + Elf_Addr GetAddress() const { + CHECK(started_); + return header_.sh_addr; } - std::vector<uint8_t>* GetBuffer() { - return &buffer_; + // Returns the size of the content of this section. + Elf_Word GetSize() const { + CHECK(finished_); + return header_.sh_size; } - void SetBuffer(const std::vector<uint8_t>& buffer) { - buffer_ = buffer; + // Set desired allocation size for .bss section. + void SetSize(Elf_Word size) { + CHECK_EQ(header_.sh_type, (Elf_Word)SHT_NOBITS); + header_.sh_size = size; } - std::vector<uintptr_t>* GetPatchLocations() { - return &patch_locations_; + // This function always succeeds to simplify code. + // Use builder's Good() to check the actual status. + bool WriteFully(const void* buffer, size_t byte_count) OVERRIDE { + CHECK(started_); + CHECK(!finished_); + owner_->WriteFully(buffer, byte_count); + return true; } - private: - std::vector<uint8_t> buffer_; - std::vector<uintptr_t> patch_locations_; - bool patched_; - // User-provided function to do the actual patching. - PatchFn patch_; - // The section that we patch against (usually .text). - const Section* patch_base_section_; - }; - - // Writer of .rodata section or .text section. - // The write is done lazily using the provided CodeOutput. - class OatSection FINAL : public Section { - public: - OatSection(const std::string& name, Elf_Word type, Elf_Word flags, - const Section* link, Elf_Word info, Elf_Word align, - Elf_Word entsize, Elf_Word size, CodeOutput* code_output) - : Section(name, type, flags, link, info, align, entsize), - size_(size), code_output_(code_output) { + // This function always succeeds to simplify code. + // Use builder's Good() to check the actual status. + off_t Seek(off_t offset, Whence whence) OVERRIDE { + // Forward the seek as-is and trust the caller to use it reasonably. + return owner_->Seek(offset, whence); } - Elf_Word GetSize() const OVERRIDE { - return size_; - } - - bool Write(File* elf_file) OVERRIDE { - // The BufferedOutputStream class contains the buffer as field, - // therefore it is too big to allocate on the stack. - std::unique_ptr<BufferedOutputStream> output_stream( - new BufferedOutputStream(new FileOutputStream(elf_file))); - return code_output_->Write(output_stream.get()); + Elf_Word GetSectionIndex() const { + DCHECK(started_); + DCHECK_NE(section_index_, 0u); + return section_index_; } private: - Elf_Word size_; - CodeOutput* code_output_; - }; - - // Writer of .bss section. - class NoBitsSection FINAL : public Section { - public: - NoBitsSection(const std::string& name, Elf_Word size) - : Section(name, SHT_NOBITS, SHF_ALLOC, nullptr, 0, kPageSize, 0), - size_(size) { - } - - Elf_Word GetSize() const OVERRIDE { - return size_; - } + ElfBuilder<ElfTypes>* owner_; + Elf_Shdr header_; + Elf_Word section_index_; + const std::string name_; + const Section* const link_; + bool started_; + bool finished_; + Elf_Word phdr_flags_; + Elf_Word phdr_type_; - bool Write(File* elf_file ATTRIBUTE_UNUSED) OVERRIDE { - LOG(ERROR) << "This section should not be written to the ELF file"; - return false; - } + friend class ElfBuilder; - private: - Elf_Word size_; + DISALLOW_COPY_AND_ASSIGN(Section); }; // Writer of .dynstr .strtab and .shstrtab sections. - class StrtabSection FINAL : public Section { + class StringSection FINAL : public Section { public: - StrtabSection(const std::string& name, Elf_Word flags) - : Section(name, SHT_STRTAB, flags, nullptr, 0, 1, 0) { - buffer_.reserve(4 * KB); - // The first entry of strtab must be empty string. - buffer_ += '\0'; + StringSection(ElfBuilder<ElfTypes>* owner, const std::string& name, + Elf_Word flags, Elf_Word align) + : Section(owner, name, SHT_STRTAB, flags, nullptr, 0, align, 0), + current_offset_(0) { } - Elf_Word AddName(const std::string& name) { - Elf_Word offset = buffer_.size(); - buffer_ += name; - buffer_ += '\0'; + Elf_Word Write(const std::string& name) { + if (current_offset_ == 0) { + DCHECK(name.empty()); + } + Elf_Word offset = current_offset_; + this->WriteFully(name.c_str(), name.length() + 1); + current_offset_ += name.length() + 1; return offset; } - Elf_Word GetSize() const OVERRIDE { - return buffer_.size(); - } - - bool Write(File* elf_file) OVERRIDE { - return WriteArray(elf_file, buffer_.data(), buffer_.size()); - } - private: - std::string buffer_; + Elf_Word current_offset_; }; - class HashSection; - // Writer of .dynsym and .symtab sections. - class SymtabSection FINAL : public Section { + class SymbolSection FINAL : public Section { public: - // Add a symbol with given name to this symtab. The symbol refers to - // 'relative_addr' within the given section and has the given attributes. - void AddSymbol(const std::string& name, const Section* section, - Elf_Addr addr, bool is_relative, Elf_Word size, - uint8_t binding, uint8_t type, uint8_t other = 0) { - CHECK(section != nullptr); - Elf_Word name_idx = strtab_->AddName(name); - symbols_.push_back({ name, section, addr, size, is_relative, - MakeStInfo(binding, type), other, name_idx }); + SymbolSection(ElfBuilder<ElfTypes>* owner, const std::string& name, + Elf_Word type, Elf_Word flags, StringSection* strtab) + : Section(owner, name, type, flags, strtab, 0, + sizeof(Elf_Off), sizeof(Elf_Sym)) { } - SymtabSection(const std::string& name, Elf_Word type, Elf_Word flags, - StrtabSection* strtab) - : Section(name, type, flags, strtab, 0, sizeof(Elf_Off), sizeof(Elf_Sym)), - strtab_(strtab) { - } - - bool IsEmpty() const { - return symbols_.empty(); - } - - Elf_Word GetSize() const OVERRIDE { - return (1 /* NULL */ + symbols_.size()) * sizeof(Elf_Sym); + // Buffer symbol for this section. It will be written later. + void Add(Elf_Word name, const Section* section, + Elf_Addr addr, bool is_relative, Elf_Word size, + uint8_t binding, uint8_t type, uint8_t other = 0) { + CHECK(section != nullptr); + Elf_Sym sym = Elf_Sym(); + sym.st_name = name; + sym.st_value = addr + (is_relative ? section->GetAddress() : 0); + sym.st_size = size; + sym.st_other = other; + sym.st_shndx = section->GetSectionIndex(); + sym.st_info = (binding << 4) + (type & 0xf); + symbols_.push_back(sym); } - bool Write(File* elf_file) OVERRIDE { - std::vector<Elf_Sym> buffer; - buffer.reserve(1u + symbols_.size()); - buffer.push_back(Elf_Sym()); // NULL. - for (const ElfSymbolState& it : symbols_) { - Elf_Sym sym = Elf_Sym(); - sym.st_name = it.name_idx_; - if (it.is_relative_) { - sym.st_value = it.addr_ + it.section_->GetHeader()->sh_addr; - } else { - sym.st_value = it.addr_; - } - sym.st_size = it.size_; - sym.st_other = it.other_; - sym.st_shndx = it.section_->GetSectionIndex(); - sym.st_info = it.info_; - buffer.push_back(sym); - } - return WriteArray(elf_file, buffer.data(), buffer.size()); + void Write() { + // The symbol table always has to start with NULL symbol. + Elf_Sym null_symbol = Elf_Sym(); + this->WriteFully(&null_symbol, sizeof(null_symbol)); + this->WriteFully(symbols_.data(), symbols_.size() * sizeof(symbols_[0])); + symbols_.clear(); + symbols_.shrink_to_fit(); } private: - struct ElfSymbolState { - const std::string name_; - const Section* section_; - Elf_Addr addr_; - Elf_Word size_; - bool is_relative_; - uint8_t info_; - uint8_t other_; - Elf_Word name_idx_; // index in the strtab. - }; - - static inline constexpr uint8_t MakeStInfo(uint8_t binding, uint8_t type) { - return ((binding) << 4) + ((type) & 0xf); - } - - // The symbols in the same order they will be in the symbol table. - std::vector<ElfSymbolState> symbols_; - StrtabSection* strtab_; - - friend class HashSection; + std::vector<Elf_Sym> symbols_; }; - // TODO: Consider removing. - // We use it only for the dynsym section which has only 5 symbols. - // We do not use it for symtab, and we probably do not have to - // since we use those symbols only to print backtraces. - class HashSection FINAL : public Section { - public: - HashSection(const std::string& name, Elf_Word flags, SymtabSection* symtab) - : Section(name, SHT_HASH, flags, symtab, - 0, sizeof(Elf_Word), sizeof(Elf_Word)), - symtab_(symtab) { - } - - Elf_Word GetSize() const OVERRIDE { - Elf_Word nbuckets = GetNumBuckets(); - Elf_Word chain_size = symtab_->symbols_.size() + 1 /* NULL */; - return (2 /* header */ + nbuckets + chain_size) * sizeof(Elf_Word); - } - - bool Write(File* const elf_file) OVERRIDE { - // Here is how The ELF hash table works. - // There are 3 arrays to worry about. - // * The symbol table where the symbol information is. - // * The bucket array which is an array of indexes into the symtab and chain. - // * The chain array which is also an array of indexes into the symtab and chain. - // - // Lets say the state is something like this. - // +--------+ +--------+ +-----------+ - // | symtab | | bucket | | chain | - // | null | | 1 | | STN_UNDEF | - // | <sym1> | | 4 | | 2 | - // | <sym2> | | | | 5 | - // | <sym3> | | | | STN_UNDEF | - // | <sym4> | | | | 3 | - // | <sym5> | | | | STN_UNDEF | - // +--------+ +--------+ +-----------+ - // - // The lookup process (in python psudocode) is - // - // def GetSym(name): - // # NB STN_UNDEF == 0 - // indx = bucket[elfhash(name) % num_buckets] - // while indx != STN_UNDEF: - // if GetSymbolName(symtab[indx]) == name: - // return symtab[indx] - // indx = chain[indx] - // return SYMBOL_NOT_FOUND - // - // Between bucket and chain arrays every symtab index must be present exactly - // once (except for STN_UNDEF, which must be present 1 + num_bucket times). - const auto& symbols = symtab_->symbols_; - // Select number of buckets. - // This is essentially arbitrary. - Elf_Word nbuckets = GetNumBuckets(); - // 1 is for the implicit NULL symbol. - Elf_Word chain_size = (symbols.size() + 1); - std::vector<Elf_Word> hash; - hash.push_back(nbuckets); - hash.push_back(chain_size); - uint32_t bucket_offset = hash.size(); - uint32_t chain_offset = bucket_offset + nbuckets; - hash.resize(hash.size() + nbuckets + chain_size, 0); - - Elf_Word* buckets = hash.data() + bucket_offset; - Elf_Word* chain = hash.data() + chain_offset; - - // Set up the actual hash table. - for (Elf_Word i = 0; i < symbols.size(); i++) { - // Add 1 since we need to have the null symbol that is not in the symbols - // list. - Elf_Word index = i + 1; - Elf_Word hash_val = static_cast<Elf_Word>(elfhash(symbols[i].name_.c_str())) % nbuckets; - if (buckets[hash_val] == 0) { - buckets[hash_val] = index; - } else { - hash_val = buckets[hash_val]; - CHECK_LT(hash_val, chain_size); - while (chain[hash_val] != 0) { - hash_val = chain[hash_val]; - CHECK_LT(hash_val, chain_size); - } - chain[hash_val] = index; - // Check for loops. Works because if this is non-empty then there must be - // another cell which already contains the same symbol index as this one, - // which means some symbol has more then one name, which isn't allowed. - CHECK_EQ(chain[index], static_cast<Elf_Word>(0)); - } - } - return WriteArray(elf_file, hash.data(), hash.size()); - } - - private: - Elf_Word GetNumBuckets() const { - const auto& symbols = symtab_->symbols_; - if (symbols.size() < 8) { - return 2; - } else if (symbols.size() < 32) { - return 4; - } else if (symbols.size() < 256) { - return 16; - } else { - // Have about 32 ids per bucket. - return RoundUp(symbols.size()/32, 2); - } - } - - // from bionic - static inline unsigned elfhash(const char *_name) { - const unsigned char *name = (const unsigned char *) _name; - unsigned h = 0, g; + ElfBuilder(InstructionSet isa, OutputStream* output) + : isa_(isa), + output_(output), + output_good_(true), + output_offset_(0), + rodata_(this, ".rodata", SHT_PROGBITS, SHF_ALLOC, nullptr, 0, kPageSize, 0), + text_(this, ".text", SHT_PROGBITS, SHF_ALLOC | SHF_EXECINSTR, nullptr, 0, kPageSize, 0), + bss_(this, ".bss", SHT_NOBITS, SHF_ALLOC, nullptr, 0, kPageSize, 0), + dynstr_(this, ".dynstr", SHF_ALLOC, kPageSize), + dynsym_(this, ".dynsym", SHT_DYNSYM, SHF_ALLOC, &dynstr_), + hash_(this, ".hash", SHT_HASH, SHF_ALLOC, &dynsym_, 0, sizeof(Elf_Word), sizeof(Elf_Word)), + dynamic_(this, ".dynamic", SHT_DYNAMIC, SHF_ALLOC, &dynstr_, 0, kPageSize, sizeof(Elf_Dyn)), + eh_frame_(this, ".eh_frame", SHT_PROGBITS, SHF_ALLOC, nullptr, 0, kPageSize, 0), + eh_frame_hdr_(this, ".eh_frame_hdr", SHT_PROGBITS, SHF_ALLOC, nullptr, 0, 4, 0), + strtab_(this, ".strtab", 0, kPageSize), + symtab_(this, ".symtab", SHT_SYMTAB, 0, &strtab_), + debug_frame_(this, ".debug_frame", SHT_PROGBITS, 0, nullptr, 0, sizeof(Elf_Addr), 0), + shstrtab_(this, ".shstrtab", 0, 1), + virtual_address_(0) { + text_.phdr_flags_ = PF_R | PF_X; + bss_.phdr_flags_ = PF_R | PF_W; + dynamic_.phdr_flags_ = PF_R | PF_W; + dynamic_.phdr_type_ = PT_DYNAMIC; + eh_frame_hdr_.phdr_type_ = PT_GNU_EH_FRAME; + } + ~ElfBuilder() {} - while (*name) { - h = (h << 4) + *name++; - g = h & 0xf0000000; - h ^= g; - h ^= g >> 24; - } - return h; + InstructionSet GetIsa() { return isa_; } + Section* GetRoData() { return &rodata_; } + Section* GetText() { return &text_; } + Section* GetBss() { return &bss_; } + StringSection* GetStrTab() { return &strtab_; } + SymbolSection* GetSymTab() { return &symtab_; } + Section* GetEhFrame() { return &eh_frame_; } + Section* GetEhFrameHdr() { return &eh_frame_hdr_; } + Section* GetDebugFrame() { return &debug_frame_; } + + // Encode patch locations as LEB128 list of deltas between consecutive addresses. + // (exposed publicly for tests) + static void EncodeOatPatches(const std::vector<uintptr_t>& locations, + std::vector<uint8_t>* buffer) { + buffer->reserve(buffer->size() + locations.size() * 2); // guess 2 bytes per ULEB128. + uintptr_t address = 0; // relative to start of section. + for (uintptr_t location : locations) { + DCHECK_GE(location, address) << "Patch locations are not in sorted order"; + EncodeUnsignedLeb128(buffer, dchecked_integral_cast<uint32_t>(location - address)); + address = location; } + } - SymtabSection* symtab_; - - DISALLOW_COPY_AND_ASSIGN(HashSection); - }; + void WritePatches(const char* name, const std::vector<uintptr_t>* patch_locations) { + std::vector<uint8_t> buffer; + EncodeOatPatches(*patch_locations, &buffer); + std::unique_ptr<Section> s(new Section(this, name, SHT_OAT_PATCH, 0, nullptr, 0, 1, 0)); + s->Start(); + s->WriteFully(buffer.data(), buffer.size()); + s->End(); + other_sections_.push_back(std::move(s)); + } - ElfBuilder(InstructionSet isa, - Elf_Word rodata_size, CodeOutput* rodata_writer, - Elf_Word text_size, CodeOutput* text_writer, - Elf_Word bss_size) - : isa_(isa), - dynstr_(".dynstr", SHF_ALLOC), - dynsym_(".dynsym", SHT_DYNSYM, SHF_ALLOC, &dynstr_), - hash_(".hash", SHF_ALLOC, &dynsym_), - rodata_(".rodata", SHT_PROGBITS, SHF_ALLOC, - nullptr, 0, kPageSize, 0, rodata_size, rodata_writer), - text_(".text", SHT_PROGBITS, SHF_ALLOC | SHF_EXECINSTR, - nullptr, 0, kPageSize, 0, text_size, text_writer), - bss_(".bss", bss_size), - dynamic_(".dynamic", &dynstr_), - strtab_(".strtab", 0), - symtab_(".symtab", SHT_SYMTAB, 0, &strtab_), - shstrtab_(".shstrtab", 0) { + void WriteSection(const char* name, const std::vector<uint8_t>* buffer) { + std::unique_ptr<Section> s(new Section(this, name, SHT_PROGBITS, 0, nullptr, 0, 1, 0)); + s->Start(); + s->WriteFully(buffer->data(), buffer->size()); + s->End(); + other_sections_.push_back(std::move(s)); } - ~ElfBuilder() {} - OatSection* GetText() { return &text_; } - SymtabSection* GetSymtab() { return &symtab_; } - - bool Write(File* elf_file) { - // Since the .text section of an oat file contains relative references to .rodata - // and (optionally) .bss, we keep these 2 or 3 sections together. This creates - // a non-traditional layout where the .bss section is mapped independently of the - // .dynamic section and needs its own program header with LOAD RW. - // - // The basic layout of the elf file. Order may be different in final output. - // +-------------------------+ - // | Elf_Ehdr | - // +-------------------------+ - // | Elf_Phdr PHDR | - // | Elf_Phdr LOAD R | .dynsym .dynstr .hash .rodata - // | Elf_Phdr LOAD R X | .text - // | Elf_Phdr LOAD RW | .bss (Optional) - // | Elf_Phdr LOAD RW | .dynamic - // | Elf_Phdr DYNAMIC | .dynamic - // | Elf_Phdr LOAD R | .eh_frame .eh_frame_hdr - // | Elf_Phdr EH_FRAME R | .eh_frame_hdr - // +-------------------------+ - // | .dynsym | - // | Elf_Sym STN_UNDEF | - // | Elf_Sym oatdata | - // | Elf_Sym oatexec | - // | Elf_Sym oatlastword | - // | Elf_Sym oatbss | (Optional) - // | Elf_Sym oatbsslastword | (Optional) - // +-------------------------+ - // | .dynstr | - // | names for .dynsym | - // +-------------------------+ - // | .hash | - // | hashtable for dynsym | - // +-------------------------+ - // | .rodata | - // | oatdata..oatexec-4 | - // +-------------------------+ - // | .text | - // | oatexec..oatlastword | - // +-------------------------+ - // | .dynamic | - // | Elf_Dyn DT_HASH | - // | Elf_Dyn DT_STRTAB | - // | Elf_Dyn DT_SYMTAB | - // | Elf_Dyn DT_SYMENT | - // | Elf_Dyn DT_STRSZ | - // | Elf_Dyn DT_SONAME | - // | Elf_Dyn DT_NULL | - // +-------------------------+ (Optional) - // | .symtab | (Optional) - // | program symbols | (Optional) - // +-------------------------+ (Optional) - // | .strtab | (Optional) - // | names for .symtab | (Optional) - // +-------------------------+ (Optional) - // | .eh_frame | (Optional) - // +-------------------------+ (Optional) - // | .eh_frame_hdr | (Optional) - // +-------------------------+ (Optional) - // | .debug_info | (Optional) - // +-------------------------+ (Optional) - // | .debug_abbrev | (Optional) - // +-------------------------+ (Optional) - // | .debug_str | (Optional) - // +-------------------------+ (Optional) - // | .debug_line | (Optional) - // +-------------------------+ - // | .shstrtab | - // | names of sections | - // +-------------------------+ - // | Elf_Shdr null | - // | Elf_Shdr .dynsym | - // | Elf_Shdr .dynstr | - // | Elf_Shdr .hash | - // | Elf_Shdr .rodata | - // | Elf_Shdr .text | - // | Elf_Shdr .bss | (Optional) - // | Elf_Shdr .dynamic | - // | Elf_Shdr .symtab | (Optional) - // | Elf_Shdr .strtab | (Optional) - // | Elf_Shdr .eh_frame | (Optional) - // | Elf_Shdr .eh_frame_hdr | (Optional) - // | Elf_Shdr .debug_info | (Optional) - // | Elf_Shdr .debug_abbrev | (Optional) - // | Elf_Shdr .debug_str | (Optional) - // | Elf_Shdr .debug_line | (Optional) - // | Elf_Shdr .oat_patches | (Optional) - // | Elf_Shdr .shstrtab | - // +-------------------------+ - constexpr bool debug_logging_ = false; - - // Create a list of all section which we want to write. - // This is the order in which they will be written. - std::vector<Section*> sections; - sections.push_back(&dynsym_); - sections.push_back(&dynstr_); - sections.push_back(&hash_); - sections.push_back(&rodata_); - sections.push_back(&text_); - if (bss_.GetSize() != 0u) { - sections.push_back(&bss_); - } - sections.push_back(&dynamic_); - if (!symtab_.IsEmpty()) { - sections.push_back(&symtab_); - sections.push_back(&strtab_); - } - for (Section* section : other_sections_) { - sections.push_back(section); - } - sections.push_back(&shstrtab_); - for (size_t i = 0; i < sections.size(); i++) { - // The first section index is 1. Index 0 is reserved for NULL. - // Section index is used for relative symbols and for section links. - sections[i]->SetSectionIndex(i + 1); - // Add section name to .shstrtab. - Elf_Word name_offset = shstrtab_.AddName(sections[i]->GetName()); - sections[i]->GetHeader()->sh_name = name_offset; - } + void Start() { + // Reserve space for ELF header and program headers. + // We do not know the number of headers until later, so + // it is easiest to just reserve a fixed amount of space. + int size = sizeof(Elf_Ehdr) + sizeof(Elf_Phdr) * kMaxProgramHeaders; + Seek(size, kSeekSet); + virtual_address_ += size; + } - // The running program does not have access to section headers - // and the loader is not supposed to use them either. - // The dynamic sections therefore replicates some of the layout - // information like the address and size of .rodata and .text. - // It also contains other metadata like the SONAME. - // The .dynamic section is found using the PT_DYNAMIC program header. - BuildDynsymSection(); - BuildDynamicSection(elf_file->GetPath()); - - // We do not know the number of headers until the final stages of write. - // It is easiest to just reserve a fixed amount of space for them. - constexpr size_t kMaxProgramHeaders = 8; - constexpr size_t kProgramHeadersOffset = sizeof(Elf_Ehdr); - - // Layout of all sections - determine the final file offsets and addresses. - // This must be done after we have built all sections and know their size. - Elf_Off file_offset = kProgramHeadersOffset + sizeof(Elf_Phdr) * kMaxProgramHeaders; - Elf_Addr load_address = file_offset; - std::vector<Elf_Shdr> section_headers; - section_headers.reserve(1u + sections.size()); - section_headers.push_back(Elf_Shdr()); // NULL at index 0. - for (auto* section : sections) { - Elf_Shdr* header = section->GetHeader(); - Elf_Off alignment = header->sh_addralign > 0 ? header->sh_addralign : 1; - header->sh_size = section->GetSize(); - header->sh_link = section->GetLink(); - // Allocate memory for the section in the file. - if (header->sh_type != SHT_NOBITS) { - header->sh_offset = RoundUp(file_offset, alignment); - file_offset = header->sh_offset + header->sh_size; - } - // Allocate memory for the section during program execution. - if ((header->sh_flags & SHF_ALLOC) != 0) { - header->sh_addr = RoundUp(load_address, alignment); - load_address = header->sh_addr + header->sh_size; + void End() { + // Write section names and finish the section headers. + shstrtab_.Start(); + shstrtab_.Write(""); + for (auto* section : sections_) { + section->header_.sh_name = shstrtab_.Write(section->name_); + if (section->link_ != nullptr) { + section->header_.sh_link = section->link_->GetSectionIndex(); } - if (debug_logging_) { - LOG(INFO) << "Section " << section->GetName() << ":" << std::hex - << " offset=0x" << header->sh_offset - << " addr=0x" << header->sh_addr - << " size=0x" << header->sh_size; - } - // Collect section headers into continuous array for convenience. - section_headers.push_back(*header); - } - Elf_Off section_headers_offset = RoundUp(file_offset, sizeof(Elf_Off)); - - // Create program headers now that we know the layout of the whole file. - // Each segment contains one or more sections which are mapped together. - // Not all sections are mapped during the execution of the program. - // PT_LOAD does the mapping. Other PT_* types allow the program to locate - // interesting parts of memory and their addresses overlap with PT_LOAD. - std::vector<Elf_Phdr> program_headers; - program_headers.push_back(Elf_Phdr()); // Placeholder for PT_PHDR. - // Create the main LOAD R segment which spans all sections up to .rodata. - const Elf_Shdr* rodata = rodata_.GetHeader(); - program_headers.push_back(MakeProgramHeader(PT_LOAD, PF_R, - 0, rodata->sh_offset + rodata->sh_size, rodata->sh_addralign)); - program_headers.push_back(MakeProgramHeader(PT_LOAD, PF_R | PF_X, text_)); - if (bss_.GetHeader()->sh_size != 0u) { - program_headers.push_back(MakeProgramHeader(PT_LOAD, PF_R | PF_W, bss_)); } - program_headers.push_back(MakeProgramHeader(PT_LOAD, PF_R | PF_W, dynamic_)); - program_headers.push_back(MakeProgramHeader(PT_DYNAMIC, PF_R | PF_W, dynamic_)); - const Section* eh_frame = FindSection(".eh_frame"); - if (eh_frame != nullptr) { - program_headers.push_back(MakeProgramHeader(PT_LOAD, PF_R, *eh_frame)); - const Section* eh_frame_hdr = FindSection(".eh_frame_hdr"); - if (eh_frame_hdr != nullptr) { - // Check layout: eh_frame is before eh_frame_hdr and there is no gap. - CHECK_LE(eh_frame->GetHeader()->sh_offset, eh_frame_hdr->GetHeader()->sh_offset); - CHECK_EQ(eh_frame->GetHeader()->sh_offset + eh_frame->GetHeader()->sh_size, - eh_frame_hdr->GetHeader()->sh_offset); - // Extend the PT_LOAD of .eh_frame to include the .eh_frame_hdr as well. - program_headers.back().p_filesz += eh_frame_hdr->GetHeader()->sh_size; - program_headers.back().p_memsz += eh_frame_hdr->GetHeader()->sh_size; - program_headers.push_back(MakeProgramHeader(PT_GNU_EH_FRAME, PF_R, *eh_frame_hdr)); - } + shstrtab_.End(); + + // Write section headers at the end of the ELF file. + std::vector<Elf_Shdr> shdrs; + shdrs.reserve(1u + sections_.size()); + shdrs.push_back(Elf_Shdr()); // NULL at index 0. + for (auto* section : sections_) { + shdrs.push_back(section->header_); } - DCHECK_EQ(program_headers[0].p_type, 0u); // Check placeholder. - program_headers[0] = MakeProgramHeader(PT_PHDR, PF_R, - kProgramHeadersOffset, program_headers.size() * sizeof(Elf_Phdr), sizeof(Elf_Off)); - CHECK_LE(program_headers.size(), kMaxProgramHeaders); + Elf_Off section_headers_offset; + section_headers_offset = RoundUp(Seek(0, kSeekCurrent), sizeof(Elf_Off)); + Seek(section_headers_offset, kSeekSet); + WriteFully(shdrs.data(), shdrs.size() * sizeof(shdrs[0])); - // Create the main ELF header. + // Write the initial file headers. + std::vector<Elf_Phdr> phdrs = MakeProgramHeaders(); Elf_Ehdr elf_header = MakeElfHeader(isa_); - elf_header.e_phoff = kProgramHeadersOffset; + elf_header.e_phoff = sizeof(Elf_Ehdr); elf_header.e_shoff = section_headers_offset; - elf_header.e_phnum = program_headers.size(); - elf_header.e_shnum = section_headers.size(); + elf_header.e_phnum = phdrs.size(); + elf_header.e_shnum = shdrs.size(); elf_header.e_shstrndx = shstrtab_.GetSectionIndex(); + Seek(0, kSeekSet); + WriteFully(&elf_header, sizeof(elf_header)); + WriteFully(phdrs.data(), phdrs.size() * sizeof(phdrs[0])); + } - // Write all headers and section content to the file. - // Depending on the implementations of Section::Write, this - // might be just memory copies or some more elaborate operations. - if (!WriteArray(elf_file, &elf_header, 1)) { - LOG(INFO) << "Failed to write the ELF header"; - return false; - } - if (!WriteArray(elf_file, program_headers.data(), program_headers.size())) { - LOG(INFO) << "Failed to write the program headers"; - return false; - } - for (Section* section : sections) { - const Elf_Shdr* header = section->GetHeader(); - if (header->sh_type != SHT_NOBITS) { - if (!SeekTo(elf_file, header->sh_offset) || !section->Write(elf_file)) { - LOG(INFO) << "Failed to write section " << section->GetName(); - return false; - } - Elf_Word current_offset = lseek(elf_file->Fd(), 0, SEEK_CUR); - CHECK_EQ(current_offset, header->sh_offset + header->sh_size) - << "The number of bytes written does not match GetSize()"; - } - } - if (!SeekTo(elf_file, section_headers_offset) || - !WriteArray(elf_file, section_headers.data(), section_headers.size())) { - LOG(INFO) << "Failed to write the section headers"; - return false; + // The running program does not have access to section headers + // and the loader is not supposed to use them either. + // The dynamic sections therefore replicates some of the layout + // information like the address and size of .rodata and .text. + // It also contains other metadata like the SONAME. + // The .dynamic section is found using the PT_DYNAMIC program header. + void WriteDynamicSection(const std::string& elf_file_path) { + std::string soname(elf_file_path); + size_t directory_separator_pos = soname.rfind('/'); + if (directory_separator_pos != std::string::npos) { + soname = soname.substr(directory_separator_pos + 1); } - return true; - } - // Adds the given section to the builder. It does not take ownership. - void RegisterSection(Section* section) { - other_sections_.push_back(section); + dynstr_.Start(); + dynstr_.Write(""); // dynstr should start with empty string. + dynsym_.Add(dynstr_.Write("oatdata"), &rodata_, 0, true, + rodata_.GetSize(), STB_GLOBAL, STT_OBJECT); + if (text_.GetSize() != 0u) { + dynsym_.Add(dynstr_.Write("oatexec"), &text_, 0, true, + text_.GetSize(), STB_GLOBAL, STT_OBJECT); + dynsym_.Add(dynstr_.Write("oatlastword"), &text_, text_.GetSize() - 4, + true, 4, STB_GLOBAL, STT_OBJECT); + } else if (rodata_.GetSize() != 0) { + // rodata_ can be size 0 for dwarf_test. + dynsym_.Add(dynstr_.Write("oatlastword"), &rodata_, rodata_.GetSize() - 4, + true, 4, STB_GLOBAL, STT_OBJECT); + } + if (bss_.finished_) { + dynsym_.Add(dynstr_.Write("oatbss"), &bss_, + 0, true, bss_.GetSize(), STB_GLOBAL, STT_OBJECT); + dynsym_.Add(dynstr_.Write("oatbsslastword"), &bss_, + bss_.GetSize() - 4, true, 4, STB_GLOBAL, STT_OBJECT); + } + Elf_Word soname_offset = dynstr_.Write(soname); + dynstr_.End(); + + dynsym_.Start(); + dynsym_.Write(); + dynsym_.End(); + + // We do not really need a hash-table since there is so few entries. + // However, the hash-table is the only way the linker can actually + // determine the number of symbols in .dynsym so it is required. + hash_.Start(); + int count = dynsym_.GetSize() / sizeof(Elf_Sym); // Includes NULL. + std::vector<Elf_Word> hash; + hash.push_back(1); // Number of buckets. + hash.push_back(count); // Number of chains. + // Buckets. Having just one makes it linear search. + hash.push_back(1); // Point to first non-NULL symbol. + // Chains. This creates linked list of symbols. + hash.push_back(0); // Dummy entry for the NULL symbol. + for (int i = 1; i < count - 1; i++) { + hash.push_back(i + 1); // Each symbol points to the next one. + } + hash.push_back(0); // Last symbol terminates the chain. + hash_.WriteFully(hash.data(), hash.size() * sizeof(hash[0])); + hash_.End(); + + dynamic_.Start(); + Elf_Dyn dyns[] = { + { DT_HASH, { hash_.GetAddress() } }, + { DT_STRTAB, { dynstr_.GetAddress() } }, + { DT_SYMTAB, { dynsym_.GetAddress() } }, + { DT_SYMENT, { sizeof(Elf_Sym) } }, + { DT_STRSZ, { dynstr_.GetSize() } }, + { DT_SONAME, { soname_offset } }, + { DT_NULL, { 0 } }, + }; + dynamic_.WriteFully(&dyns, sizeof(dyns)); + dynamic_.End(); } - const Section* FindSection(const char* name) { - for (const auto* section : other_sections_) { - if (section->GetName() == name) { - return section; - } - } - return nullptr; + // Returns true if all writes and seeks on the output stream succeeded. + bool Good() { + return output_good_; } private: - static bool SeekTo(File* elf_file, Elf_Word offset) { - DCHECK_LE(lseek(elf_file->Fd(), 0, SEEK_CUR), static_cast<off_t>(offset)) - << "Seeking backwards"; - if (static_cast<off_t>(offset) != lseek(elf_file->Fd(), offset, SEEK_SET)) { - PLOG(ERROR) << "Failed to seek in file " << elf_file->GetPath(); - return false; + // This function always succeeds to simplify code. + // Use Good() to check the actual status of the output stream. + void WriteFully(const void* buffer, size_t byte_count) { + if (output_good_) { + if (!output_->WriteFully(buffer, byte_count)) { + PLOG(ERROR) << "Failed to write " << byte_count + << " bytes to ELF file at offset " << output_offset_; + output_good_ = false; + } } - return true; + output_offset_ += byte_count; } - template<typename T> - static bool WriteArray(File* elf_file, const T* data, size_t count) { - if (count != 0) { - DCHECK(data != nullptr); - if (!elf_file->WriteFully(data, count * sizeof(T))) { - PLOG(ERROR) << "Failed to write to file " << elf_file->GetPath(); - return false; + // This function always succeeds to simplify code. + // Use Good() to check the actual status of the output stream. + off_t Seek(off_t offset, Whence whence) { + // We keep shadow copy of the offset so that we return + // the expected value even if the output stream failed. + off_t new_offset; + switch (whence) { + case kSeekSet: + new_offset = offset; + break; + case kSeekCurrent: + new_offset = output_offset_ + offset; + break; + default: + LOG(FATAL) << "Unsupported seek type: " << whence; + UNREACHABLE(); + } + if (output_good_) { + off_t actual_offset = output_->Seek(offset, whence); + if (actual_offset == (off_t)-1) { + PLOG(ERROR) << "Failed to seek in ELF file. Offset=" << offset + << " whence=" << whence << " new_offset=" << new_offset; + output_good_ = false; } + DCHECK_EQ(actual_offset, new_offset); } - return true; - } - - // Helper - create segment header based on memory range. - static Elf_Phdr MakeProgramHeader(Elf_Word type, Elf_Word flags, - Elf_Off offset, Elf_Word size, Elf_Word align) { - Elf_Phdr phdr = Elf_Phdr(); - phdr.p_type = type; - phdr.p_flags = flags; - phdr.p_offset = offset; - phdr.p_vaddr = offset; - phdr.p_paddr = offset; - phdr.p_filesz = size; - phdr.p_memsz = size; - phdr.p_align = align; - return phdr; - } - - // Helper - create segment header based on section header. - static Elf_Phdr MakeProgramHeader(Elf_Word type, Elf_Word flags, - const Section& section) { - const Elf_Shdr* shdr = section.GetHeader(); - // Only run-time allocated sections should be in segment headers. - CHECK_NE(shdr->sh_flags & SHF_ALLOC, 0u); - Elf_Phdr phdr = Elf_Phdr(); - phdr.p_type = type; - phdr.p_flags = flags; - phdr.p_offset = shdr->sh_offset; - phdr.p_vaddr = shdr->sh_addr; - phdr.p_paddr = shdr->sh_addr; - phdr.p_filesz = shdr->sh_type != SHT_NOBITS ? shdr->sh_size : 0u; - phdr.p_memsz = shdr->sh_size; - phdr.p_align = shdr->sh_addralign; - return phdr; + output_offset_ = new_offset; + return new_offset; } static Elf_Ehdr MakeElfHeader(InstructionSet isa) { @@ -869,6 +544,10 @@ class ElfBuilder FINAL { } case kNone: { LOG(FATAL) << "No instruction set"; + break; + } + default: { + LOG(FATAL) << "Unknown instruction set " << isa; } } @@ -892,50 +571,110 @@ class ElfBuilder FINAL { return elf_header; } - void BuildDynamicSection(const std::string& elf_file_path) { - std::string soname(elf_file_path); - size_t directory_separator_pos = soname.rfind('/'); - if (directory_separator_pos != std::string::npos) { - soname = soname.substr(directory_separator_pos + 1); + // Create program headers based on written sections. + std::vector<Elf_Phdr> MakeProgramHeaders() { + CHECK(!sections_.empty()); + std::vector<Elf_Phdr> phdrs; + { + // The program headers must start with PT_PHDR which is used in + // loaded process to determine the number of program headers. + Elf_Phdr phdr = Elf_Phdr(); + phdr.p_type = PT_PHDR; + phdr.p_flags = PF_R; + phdr.p_offset = phdr.p_vaddr = phdr.p_paddr = sizeof(Elf_Ehdr); + phdr.p_filesz = phdr.p_memsz = 0; // We need to fill this later. + phdr.p_align = sizeof(Elf_Off); + phdrs.push_back(phdr); + // Tell the linker to mmap the start of file to memory. + Elf_Phdr load = Elf_Phdr(); + load.p_type = PT_LOAD; + load.p_flags = PF_R; + load.p_offset = load.p_vaddr = load.p_paddr = 0; + load.p_filesz = load.p_memsz = sections_[0]->header_.sh_offset; + load.p_align = kPageSize; + phdrs.push_back(load); + } + // Create program headers for sections. + for (auto* section : sections_) { + const Elf_Shdr& shdr = section->header_; + if ((shdr.sh_flags & SHF_ALLOC) != 0 && shdr.sh_size != 0) { + // PT_LOAD tells the linker to mmap part of the file. + // The linker can only mmap page-aligned sections. + // Single PT_LOAD may contain several ELF sections. + Elf_Phdr& prev = phdrs.back(); + Elf_Phdr load = Elf_Phdr(); + load.p_type = PT_LOAD; + load.p_flags = section->phdr_flags_; + load.p_offset = shdr.sh_offset; + load.p_vaddr = load.p_paddr = shdr.sh_addr; + load.p_filesz = (shdr.sh_type != SHT_NOBITS ? shdr.sh_size : 0u); + load.p_memsz = shdr.sh_size; + load.p_align = shdr.sh_addralign; + if (prev.p_type == load.p_type && + prev.p_flags == load.p_flags && + prev.p_filesz == prev.p_memsz && // Do not merge .bss + load.p_filesz == load.p_memsz) { // Do not merge .bss + // Merge this PT_LOAD with the previous one. + Elf_Word size = shdr.sh_offset + shdr.sh_size - prev.p_offset; + prev.p_filesz = size; + prev.p_memsz = size; + } else { + // If we are adding new load, it must be aligned. + CHECK_EQ(shdr.sh_addralign, (Elf_Word)kPageSize); + phdrs.push_back(load); + } + } } - // NB: We must add the name before adding DT_STRSZ. - Elf_Word soname_offset = dynstr_.AddName(soname); - - dynamic_.AddDynamicTag(DT_HASH, 0, &hash_); - dynamic_.AddDynamicTag(DT_STRTAB, 0, &dynstr_); - dynamic_.AddDynamicTag(DT_SYMTAB, 0, &dynsym_); - dynamic_.AddDynamicTag(DT_SYMENT, sizeof(Elf_Sym), nullptr); - dynamic_.AddDynamicTag(DT_STRSZ, dynstr_.GetSize(), nullptr); - dynamic_.AddDynamicTag(DT_SONAME, soname_offset, nullptr); - } - - void BuildDynsymSection() { - dynsym_.AddSymbol("oatdata", &rodata_, 0, true, - rodata_.GetSize(), STB_GLOBAL, STT_OBJECT); - dynsym_.AddSymbol("oatexec", &text_, 0, true, - text_.GetSize(), STB_GLOBAL, STT_OBJECT); - dynsym_.AddSymbol("oatlastword", &text_, text_.GetSize() - 4, - true, 4, STB_GLOBAL, STT_OBJECT); - if (bss_.GetSize() != 0u) { - dynsym_.AddSymbol("oatbss", &bss_, 0, true, - bss_.GetSize(), STB_GLOBAL, STT_OBJECT); - dynsym_.AddSymbol("oatbsslastword", &bss_, bss_.GetSize() - 4, - true, 4, STB_GLOBAL, STT_OBJECT); + for (auto* section : sections_) { + const Elf_Shdr& shdr = section->header_; + if ((shdr.sh_flags & SHF_ALLOC) != 0 && shdr.sh_size != 0) { + // Other PT_* types allow the program to locate interesting + // parts of memory at runtime. They must overlap with PT_LOAD. + if (section->phdr_type_ != 0) { + Elf_Phdr phdr = Elf_Phdr(); + phdr.p_type = section->phdr_type_; + phdr.p_flags = section->phdr_flags_; + phdr.p_offset = shdr.sh_offset; + phdr.p_vaddr = phdr.p_paddr = shdr.sh_addr; + phdr.p_filesz = phdr.p_memsz = shdr.sh_size; + phdr.p_align = shdr.sh_addralign; + phdrs.push_back(phdr); + } + } } + // Set the size of the initial PT_PHDR. + CHECK_EQ(phdrs[0].p_type, (Elf_Word)PT_PHDR); + phdrs[0].p_filesz = phdrs[0].p_memsz = phdrs.size() * sizeof(Elf_Phdr); + + return phdrs; } InstructionSet isa_; - StrtabSection dynstr_; - SymtabSection dynsym_; - HashSection hash_; - OatSection rodata_; - OatSection text_; - NoBitsSection bss_; - DynamicSection dynamic_; - StrtabSection strtab_; - SymtabSection symtab_; - std::vector<Section*> other_sections_; - StrtabSection shstrtab_; + + OutputStream* output_; + bool output_good_; // True if all writes to output succeeded. + off_t output_offset_; // Keep track of the current position in the stream. + + Section rodata_; + Section text_; + Section bss_; + StringSection dynstr_; + SymbolSection dynsym_; + Section hash_; + Section dynamic_; + Section eh_frame_; + Section eh_frame_hdr_; + StringSection strtab_; + SymbolSection symtab_; + Section debug_frame_; + StringSection shstrtab_; + std::vector<std::unique_ptr<Section>> other_sections_; + + // List of used section in the order in which they were written. + std::vector<Section*> sections_; + + // Used for allocation of virtual address space. + Elf_Addr virtual_address_; DISALLOW_COPY_AND_ASSIGN(ElfBuilder); }; diff --git a/compiler/elf_writer_debug.cc b/compiler/elf_writer_debug.cc index 3a9e312225..90db7eb41a 100644 --- a/compiler/elf_writer_debug.cc +++ b/compiler/elf_writer_debug.cc @@ -24,16 +24,16 @@ #include "dex_file-inl.h" #include "dwarf/headers.h" #include "dwarf/register.h" +#include "elf_builder.h" #include "oat_writer.h" #include "utils.h" namespace art { namespace dwarf { -static void WriteDebugFrameCIE(InstructionSet isa, - ExceptionHeaderValueApplication addr_type, - CFIFormat format, - std::vector<uint8_t>* eh_frame) { +static void WriteCIE(InstructionSet isa, + CFIFormat format, + std::vector<uint8_t>* buffer) { // Scratch registers should be marked as undefined. This tells the // debugger that its value in the previous frame is not recoverable. bool is64bit = Is64BitInstructionSet(isa); @@ -59,8 +59,7 @@ static void WriteDebugFrameCIE(InstructionSet isa, } } auto return_reg = Reg::ArmCore(14); // R14(LR). - WriteDebugFrameCIE(is64bit, addr_type, return_reg, - opcodes, format, eh_frame); + WriteCIE(is64bit, return_reg, opcodes, format, buffer); return; } case kArm64: { @@ -83,8 +82,7 @@ static void WriteDebugFrameCIE(InstructionSet isa, } } auto return_reg = Reg::Arm64Core(30); // R30(LR). - WriteDebugFrameCIE(is64bit, addr_type, return_reg, - opcodes, format, eh_frame); + WriteCIE(is64bit, return_reg, opcodes, format, buffer); return; } case kMips: @@ -100,8 +98,7 @@ static void WriteDebugFrameCIE(InstructionSet isa, } } auto return_reg = Reg::MipsCore(31); // R31(RA). - WriteDebugFrameCIE(is64bit, addr_type, return_reg, - opcodes, format, eh_frame); + WriteCIE(is64bit, return_reg, opcodes, format, buffer); return; } case kX86: { @@ -127,8 +124,7 @@ static void WriteDebugFrameCIE(InstructionSet isa, } } auto return_reg = Reg::X86Core(8); // R8(EIP). - WriteDebugFrameCIE(is64bit, addr_type, return_reg, - opcodes, format, eh_frame); + WriteCIE(is64bit, return_reg, opcodes, format, buffer); return; } case kX86_64: { @@ -154,8 +150,7 @@ static void WriteDebugFrameCIE(InstructionSet isa, } } auto return_reg = Reg::X86_64Core(16); // R16(RIP). - WriteDebugFrameCIE(is64bit, addr_type, return_reg, - opcodes, format, eh_frame); + WriteCIE(is64bit, return_reg, opcodes, format, buffer); return; } case kNone: @@ -165,36 +160,69 @@ static void WriteDebugFrameCIE(InstructionSet isa, UNREACHABLE(); } -void WriteCFISection(const CompilerDriver* compiler, - const OatWriter* oat_writer, - ExceptionHeaderValueApplication address_type, - CFIFormat format, - std::vector<uint8_t>* debug_frame, - std::vector<uintptr_t>* debug_frame_patches, - std::vector<uint8_t>* eh_frame_hdr, - std::vector<uintptr_t>* eh_frame_hdr_patches) { - const auto& method_infos = oat_writer->GetMethodDebugInfo(); - const InstructionSet isa = compiler->GetInstructionSet(); +template<typename ElfTypes> +void WriteCFISection(ElfBuilder<ElfTypes>* builder, + const std::vector<OatWriter::DebugInfo>& method_infos, + CFIFormat format) { + CHECK(format == dwarf::DW_DEBUG_FRAME_FORMAT || + format == dwarf::DW_EH_FRAME_FORMAT); + typedef typename ElfTypes::Addr Elf_Addr; + + std::vector<uint32_t> binary_search_table; + std::vector<uintptr_t> patch_locations; + if (format == DW_EH_FRAME_FORMAT) { + binary_search_table.reserve(2 * method_infos.size()); + } else { + patch_locations.reserve(method_infos.size()); + } // Write .eh_frame/.debug_frame section. - std::map<uint32_t, size_t> address_to_fde_offset_map; - size_t cie_offset = debug_frame->size(); - WriteDebugFrameCIE(isa, address_type, format, debug_frame); - for (const OatWriter::DebugInfo& mi : method_infos) { - if (!mi.deduped_) { // Only one FDE per unique address. - ArrayRef<const uint8_t> opcodes = mi.compiled_method_->GetCFIInfo(); - if (!opcodes.empty()) { - address_to_fde_offset_map.emplace(mi.low_pc_, debug_frame->size()); - WriteDebugFrameFDE(Is64BitInstructionSet(isa), cie_offset, - mi.low_pc_, mi.high_pc_ - mi.low_pc_, - opcodes, format, debug_frame, debug_frame_patches); + auto* cfi_section = (format == dwarf::DW_DEBUG_FRAME_FORMAT + ? builder->GetDebugFrame() + : builder->GetEhFrame()); + { + cfi_section->Start(); + const bool is64bit = Is64BitInstructionSet(builder->GetIsa()); + const Elf_Addr text_address = builder->GetText()->GetAddress(); + const Elf_Addr cfi_address = cfi_section->GetAddress(); + const Elf_Addr cie_address = cfi_address; + Elf_Addr buffer_address = cfi_address; + std::vector<uint8_t> buffer; // Small temporary buffer. + WriteCIE(builder->GetIsa(), format, &buffer); + cfi_section->WriteFully(buffer.data(), buffer.size()); + buffer_address += buffer.size(); + buffer.clear(); + for (const OatWriter::DebugInfo& mi : method_infos) { + if (!mi.deduped_) { // Only one FDE per unique address. + ArrayRef<const uint8_t> opcodes = mi.compiled_method_->GetCFIInfo(); + if (!opcodes.empty()) { + const Elf_Addr code_address = text_address + mi.low_pc_; + if (format == DW_EH_FRAME_FORMAT) { + binary_search_table.push_back( + dchecked_integral_cast<uint32_t>(code_address)); + binary_search_table.push_back( + dchecked_integral_cast<uint32_t>(buffer_address)); + } + WriteFDE(is64bit, cfi_address, cie_address, + code_address, mi.high_pc_ - mi.low_pc_, + opcodes, format, buffer_address, &buffer, + &patch_locations); + cfi_section->WriteFully(buffer.data(), buffer.size()); + buffer_address += buffer.size(); + buffer.clear(); + } } } + cfi_section->End(); } if (format == DW_EH_FRAME_FORMAT) { + auto* header_section = builder->GetEhFrameHdr(); + header_section->Start(); + uint32_t header_address = dchecked_integral_cast<int32_t>(header_section->GetAddress()); // Write .eh_frame_hdr section. - Writer<> header(eh_frame_hdr); + std::vector<uint8_t> buffer; + Writer<> header(&buffer); header.PushUint8(1); // Version. // Encoding of .eh_frame pointer - libunwind does not honor datarel here, // so we have to use pcrel which means relative to the pointer's location. @@ -204,47 +232,29 @@ void WriteCFISection(const CompilerDriver* compiler, // Encoding of binary search table addresses - libunwind supports only this // specific combination, which means relative to the start of .eh_frame_hdr. header.PushUint8(DW_EH_PE_datarel | DW_EH_PE_sdata4); - // .eh_frame pointer - .eh_frame_hdr section is after .eh_frame section - const int32_t relative_eh_frame_begin = -static_cast<int32_t>(debug_frame->size()); - header.PushInt32(relative_eh_frame_begin - 4U); + // .eh_frame pointer + header.PushInt32(cfi_section->GetAddress() - (header_address + 4u)); // Binary search table size (number of entries). - header.PushUint32(dchecked_integral_cast<uint32_t>(address_to_fde_offset_map.size())); + header.PushUint32(dchecked_integral_cast<uint32_t>(binary_search_table.size()/2)); + header_section->WriteFully(buffer.data(), buffer.size()); // Binary search table. - for (const auto& address_to_fde_offset : address_to_fde_offset_map) { - u_int32_t code_address = address_to_fde_offset.first; - int32_t fde_address = dchecked_integral_cast<int32_t>(address_to_fde_offset.second); - eh_frame_hdr_patches->push_back(header.data()->size()); - header.PushUint32(code_address); - // We know the exact layout (eh_frame is immediately before eh_frame_hdr) - // and the data is relative to the start of the eh_frame_hdr, - // so patching isn't necessary (in contrast to the code address above). - header.PushInt32(relative_eh_frame_begin + fde_address); + for (size_t i = 0; i < binary_search_table.size(); i++) { + // Make addresses section-relative since we know the header address now. + binary_search_table[i] -= header_address; } + header_section->WriteFully(binary_search_table.data(), binary_search_table.size()); + header_section->End(); + } else { + builder->WritePatches(".debug_frame.oat_patches", &patch_locations); } } -/* - * @brief Generate the DWARF sections. - * @param oat_writer The Oat file Writer. - * @param eh_frame Call Frame Information. - * @param debug_info Compilation unit information. - * @param debug_info_patches Address locations to be patched. - * @param debug_abbrev Abbreviations used to generate dbg_info. - * @param debug_str Debug strings. - * @param debug_line Line number table. - * @param debug_line_patches Address locations to be patched. - */ -void WriteDebugSections(const CompilerDriver* compiler, - const OatWriter* oat_writer, - std::vector<uint8_t>* debug_info, - std::vector<uintptr_t>* debug_info_patches, - std::vector<uint8_t>* debug_abbrev, - std::vector<uint8_t>* debug_str, - std::vector<uint8_t>* debug_line, - std::vector<uintptr_t>* debug_line_patches) { - const std::vector<OatWriter::DebugInfo>& method_infos = oat_writer->GetMethodDebugInfo(); - const InstructionSet isa = compiler->GetInstructionSet(); - const bool is64bit = Is64BitInstructionSet(isa); +template<typename ElfTypes> +void WriteDebugSections(ElfBuilder<ElfTypes>* builder, + const std::vector<OatWriter::DebugInfo>& method_infos) { + typedef typename ElfTypes::Addr Elf_Addr; + const bool is64bit = Is64BitInstructionSet(builder->GetIsa()); + Elf_Addr text_address = builder->GetText()->GetAddress(); // Find all addresses (low_pc) which contain deduped methods. // The first instance of method is not marked deduped_, but the rest is. @@ -273,6 +283,12 @@ void WriteDebugSections(const CompilerDriver* compiler, } // Write .debug_info section. + std::vector<uint8_t> debug_info; + std::vector<uintptr_t> debug_info_patches; + std::vector<uint8_t> debug_abbrev; + std::vector<uint8_t> debug_str; + std::vector<uint8_t> debug_line; + std::vector<uintptr_t> debug_line_patches; for (const auto& compilation_unit : compilation_units) { uint32_t cunit_low_pc = 0xFFFFFFFFU; uint32_t cunit_high_pc = 0; @@ -281,14 +297,14 @@ void WriteDebugSections(const CompilerDriver* compiler, cunit_high_pc = std::max(cunit_high_pc, method_info->high_pc_); } - size_t debug_abbrev_offset = debug_abbrev->size(); - DebugInfoEntryWriter<> info(is64bit, debug_abbrev); + size_t debug_abbrev_offset = debug_abbrev.size(); + DebugInfoEntryWriter<> info(is64bit, &debug_abbrev); info.StartTag(DW_TAG_compile_unit, DW_CHILDREN_yes); - info.WriteStrp(DW_AT_producer, "Android dex2oat", debug_str); + info.WriteStrp(DW_AT_producer, "Android dex2oat", &debug_str); info.WriteData1(DW_AT_language, DW_LANG_Java); - info.WriteAddr(DW_AT_low_pc, cunit_low_pc); - info.WriteAddr(DW_AT_high_pc, cunit_high_pc); - info.WriteData4(DW_AT_stmt_list, debug_line->size()); + info.WriteAddr(DW_AT_low_pc, text_address + cunit_low_pc); + info.WriteAddr(DW_AT_high_pc, text_address + cunit_high_pc); + info.WriteData4(DW_AT_stmt_list, debug_line.size()); for (auto method_info : compilation_unit) { std::string method_name = PrettyMethod(method_info->dex_method_index_, *method_info->dex_file_, true); @@ -296,13 +312,13 @@ void WriteDebugSections(const CompilerDriver* compiler, method_name += " [DEDUPED]"; } info.StartTag(DW_TAG_subprogram, DW_CHILDREN_no); - info.WriteStrp(DW_AT_name, method_name.data(), debug_str); - info.WriteAddr(DW_AT_low_pc, method_info->low_pc_); - info.WriteAddr(DW_AT_high_pc, method_info->high_pc_); + info.WriteStrp(DW_AT_name, method_name.data(), &debug_str); + info.WriteAddr(DW_AT_low_pc, text_address + method_info->low_pc_); + info.WriteAddr(DW_AT_high_pc, text_address + method_info->high_pc_); info.EndTag(); // DW_TAG_subprogram } info.EndTag(); // DW_TAG_compile_unit - WriteDebugInfoCU(debug_abbrev_offset, info, debug_info, debug_info_patches); + WriteDebugInfoCU(debug_abbrev_offset, info, &debug_info, &debug_info_patches); // Write .debug_line section. std::vector<FileEntry> files; @@ -311,7 +327,7 @@ void WriteDebugSections(const CompilerDriver* compiler, std::unordered_map<std::string, size_t> directories_map; int code_factor_bits_ = 0; int dwarf_isa = -1; - switch (isa) { + switch (builder->GetIsa()) { case kArm: // arm actually means thumb2. case kThumb2: code_factor_bits_ = 1; // 16-bit instuctions @@ -328,7 +344,7 @@ void WriteDebugSections(const CompilerDriver* compiler, break; } DebugLineOpCodeWriter<> opcodes(is64bit, code_factor_bits_); - opcodes.SetAddress(cunit_low_pc); + opcodes.SetAddress(text_address + cunit_low_pc); if (dwarf_isa != -1) { opcodes.SetISA(dwarf_isa); } @@ -342,6 +358,8 @@ void WriteDebugSections(const CompilerDriver* compiler, DefaultSrcMap dex2line_; } debug_info_callbacks; + Elf_Addr method_address = text_address + mi->low_pc_; + const DexFile* dex = mi->dex_file_; if (mi->code_item_ != nullptr) { dex->DecodeDebugInfo(mi->code_item_, @@ -414,26 +432,48 @@ void WriteDebugSections(const CompilerDriver* compiler, int first_line = dex2line_map.front().to_; // Prologue is not a sensible place for a breakpoint. opcodes.NegateStmt(); - opcodes.AddRow(mi->low_pc_, first_line); + opcodes.AddRow(method_address, first_line); opcodes.NegateStmt(); opcodes.SetPrologueEnd(); } - opcodes.AddRow(mi->low_pc_ + pc, line); + opcodes.AddRow(method_address + pc, line); } else if (line != opcodes.CurrentLine()) { - opcodes.AddRow(mi->low_pc_ + pc, line); + opcodes.AddRow(method_address + pc, line); } } } } else { // line 0 - instruction cannot be attributed to any source line. - opcodes.AddRow(mi->low_pc_, 0); + opcodes.AddRow(method_address, 0); } } - opcodes.AdvancePC(cunit_high_pc); + opcodes.AdvancePC(text_address + cunit_high_pc); opcodes.EndSequence(); - WriteDebugLineTable(directories, files, opcodes, debug_line, debug_line_patches); + WriteDebugLineTable(directories, files, opcodes, &debug_line, &debug_line_patches); } + builder->WriteSection(".debug_info", &debug_info); + builder->WritePatches(".debug_info.oat_patches", &debug_info_patches); + builder->WriteSection(".debug_abbrev", &debug_abbrev); + builder->WriteSection(".debug_str", &debug_str); + builder->WriteSection(".debug_line", &debug_line); + builder->WritePatches(".debug_line.oat_patches", &debug_line_patches); } +// Explicit instantiations +template void WriteCFISection<ElfTypes32>( + ElfBuilder<ElfTypes32>* builder, + const std::vector<OatWriter::DebugInfo>& method_infos, + CFIFormat format); +template void WriteCFISection<ElfTypes64>( + ElfBuilder<ElfTypes64>* builder, + const std::vector<OatWriter::DebugInfo>& method_infos, + CFIFormat format); +template void WriteDebugSections<ElfTypes32>( + ElfBuilder<ElfTypes32>* builder, + const std::vector<OatWriter::DebugInfo>& method_infos); +template void WriteDebugSections<ElfTypes64>( + ElfBuilder<ElfTypes64>* builder, + const std::vector<OatWriter::DebugInfo>& method_infos); + } // namespace dwarf } // namespace art diff --git a/compiler/elf_writer_debug.h b/compiler/elf_writer_debug.h index 69f7e0d811..e58fd0a390 100644 --- a/compiler/elf_writer_debug.h +++ b/compiler/elf_writer_debug.h @@ -19,29 +19,21 @@ #include <vector> +#include "elf_builder.h" #include "dwarf/dwarf_constants.h" #include "oat_writer.h" namespace art { namespace dwarf { -void WriteCFISection(const CompilerDriver* compiler, - const OatWriter* oat_writer, - ExceptionHeaderValueApplication address_type, - CFIFormat format, - std::vector<uint8_t>* debug_frame, - std::vector<uintptr_t>* debug_frame_patches, - std::vector<uint8_t>* eh_frame_hdr, - std::vector<uintptr_t>* eh_frame_hdr_patches); - -void WriteDebugSections(const CompilerDriver* compiler, - const OatWriter* oat_writer, - std::vector<uint8_t>* debug_info, - std::vector<uintptr_t>* debug_info_patches, - std::vector<uint8_t>* debug_abbrev, - std::vector<uint8_t>* debug_str, - std::vector<uint8_t>* debug_line, - std::vector<uintptr_t>* debug_line_patches); +template<typename ElfTypes> +void WriteCFISection(ElfBuilder<ElfTypes>* builder, + const std::vector<OatWriter::DebugInfo>& method_infos, + CFIFormat format); + +template<typename ElfTypes> +void WriteDebugSections(ElfBuilder<ElfTypes>* builder, + const std::vector<OatWriter::DebugInfo>& method_infos); } // namespace dwarf } // namespace art diff --git a/compiler/elf_writer_quick.cc b/compiler/elf_writer_quick.cc index dce1e861b4..5c059e1e82 100644 --- a/compiler/elf_writer_quick.cc +++ b/compiler/elf_writer_quick.cc @@ -70,190 +70,78 @@ bool ElfWriterQuick<ElfTypes>::Create(File* elf_file, template <typename ElfTypes> static void WriteDebugSymbols(ElfBuilder<ElfTypes>* builder, OatWriter* oat_writer); -// Encode patch locations as LEB128 list of deltas between consecutive addresses. template <typename ElfTypes> -void ElfWriterQuick<ElfTypes>::EncodeOatPatches(const std::vector<uintptr_t>& locations, - std::vector<uint8_t>* buffer) { - buffer->reserve(buffer->size() + locations.size() * 2); // guess 2 bytes per ULEB128. - uintptr_t address = 0; // relative to start of section. - for (uintptr_t location : locations) { - DCHECK_GE(location, address) << "Patch locations are not in sorted order"; - EncodeUnsignedLeb128(buffer, dchecked_integral_cast<uint32_t>(location - address)); - address = location; - } -} - -class RodataWriter FINAL : public CodeOutput { - public: - explicit RodataWriter(OatWriter* oat_writer) : oat_writer_(oat_writer) {} - - bool Write(OutputStream* out) OVERRIDE { - return oat_writer_->WriteRodata(out); - } +bool ElfWriterQuick<ElfTypes>::Write( + OatWriter* oat_writer, + const std::vector<const DexFile*>& dex_files_unused ATTRIBUTE_UNUSED, + const std::string& android_root_unused ATTRIBUTE_UNUSED, + bool is_host_unused ATTRIBUTE_UNUSED) { + const InstructionSet isa = compiler_driver_->GetInstructionSet(); + std::unique_ptr<BufferedOutputStream> output_stream( + new BufferedOutputStream(new FileOutputStream(elf_file_))); + std::unique_ptr<ElfBuilder<ElfTypes>> builder( + new ElfBuilder<ElfTypes>(isa, output_stream.get())); - private: - OatWriter* oat_writer_; -}; + builder->Start(); -class TextWriter FINAL : public CodeOutput { - public: - explicit TextWriter(OatWriter* oat_writer) : oat_writer_(oat_writer) {} + auto* rodata = builder->GetRoData(); + auto* text = builder->GetText(); + auto* bss = builder->GetBss(); - bool Write(OutputStream* out) OVERRIDE { - return oat_writer_->WriteCode(out); + rodata->Start(); + if (!oat_writer->WriteRodata(rodata)) { + return false; } + rodata->End(); - private: - OatWriter* oat_writer_; -}; - -enum PatchResult { - kAbsoluteAddress, // Absolute memory location. - kPointerRelativeAddress, // Offset relative to the location of the pointer. - kSectionRelativeAddress, // Offset relative to start of containing section. -}; - -// Patch memory addresses within a buffer. -// It assumes that the unpatched addresses are offsets relative to base_address. -// (which generally means method's low_pc relative to the start of .text) -template <typename Elf_Addr, typename Address, PatchResult kPatchResult> -static void Patch(const std::vector<uintptr_t>& patch_locations, - Elf_Addr buffer_address, Elf_Addr base_address, - std::vector<uint8_t>* buffer) { - for (uintptr_t location : patch_locations) { - typedef __attribute__((__aligned__(1))) Address UnalignedAddress; - auto* to_patch = reinterpret_cast<UnalignedAddress*>(buffer->data() + location); - switch (kPatchResult) { - case kAbsoluteAddress: - *to_patch = (base_address + *to_patch); - break; - case kPointerRelativeAddress: - *to_patch = (base_address + *to_patch) - (buffer_address + location); - break; - case kSectionRelativeAddress: - *to_patch = (base_address + *to_patch) - buffer_address; - break; - } + text->Start(); + if (!oat_writer->WriteCode(text)) { + return false; } -} + text->End(); -template <typename ElfTypes> -bool ElfWriterQuick<ElfTypes>::Write( - OatWriter* oat_writer, - const std::vector<const DexFile*>& dex_files_unused ATTRIBUTE_UNUSED, - const std::string& android_root_unused ATTRIBUTE_UNUSED, - bool is_host_unused ATTRIBUTE_UNUSED) { - using Elf_Addr = typename ElfTypes::Addr; - const InstructionSet isa = compiler_driver_->GetInstructionSet(); + if (oat_writer->GetBssSize() != 0) { + bss->Start(); + bss->SetSize(oat_writer->GetBssSize()); + bss->End(); + } - // Setup the builder with the main OAT sections (.rodata .text .bss). - const size_t rodata_size = oat_writer->GetOatHeader().GetExecutableOffset(); - const size_t text_size = oat_writer->GetSize() - rodata_size; - const size_t bss_size = oat_writer->GetBssSize(); - RodataWriter rodata_writer(oat_writer); - TextWriter text_writer(oat_writer); - std::unique_ptr<ElfBuilder<ElfTypes>> builder(new ElfBuilder<ElfTypes>( - isa, rodata_size, &rodata_writer, text_size, &text_writer, bss_size)); + builder->WriteDynamicSection(elf_file_->GetPath()); - // Add debug sections. - // They are allocated here (in the same scope as the builder), - // but they are registered with the builder only if they are used. - using RawSection = typename ElfBuilder<ElfTypes>::RawSection; - const auto* text = builder->GetText(); - const bool is64bit = Is64BitInstructionSet(isa); - const int pointer_size = GetInstructionSetPointerSize(isa); - std::unique_ptr<RawSection> eh_frame(new RawSection( - ".eh_frame", SHT_PROGBITS, SHF_ALLOC, nullptr, 0, kPageSize, 0, - is64bit ? Patch<Elf_Addr, uint64_t, kPointerRelativeAddress> : - Patch<Elf_Addr, uint32_t, kPointerRelativeAddress>, - text)); - std::unique_ptr<RawSection> eh_frame_hdr(new RawSection( - ".eh_frame_hdr", SHT_PROGBITS, SHF_ALLOC, nullptr, 0, 4, 0, - Patch<Elf_Addr, uint32_t, kSectionRelativeAddress>, text)); - std::unique_ptr<RawSection> debug_frame(new RawSection( - ".debug_frame", SHT_PROGBITS, 0, nullptr, 0, pointer_size, 0, - is64bit ? Patch<Elf_Addr, uint64_t, kAbsoluteAddress> : - Patch<Elf_Addr, uint32_t, kAbsoluteAddress>, - text)); - std::unique_ptr<RawSection> debug_frame_oat_patches(new RawSection( - ".debug_frame.oat_patches", SHT_OAT_PATCH)); - std::unique_ptr<RawSection> debug_info(new RawSection( - ".debug_info", SHT_PROGBITS, 0, nullptr, 0, 1, 0, - Patch<Elf_Addr, uint32_t, kAbsoluteAddress>, text)); - std::unique_ptr<RawSection> debug_info_oat_patches(new RawSection( - ".debug_info.oat_patches", SHT_OAT_PATCH)); - std::unique_ptr<RawSection> debug_abbrev(new RawSection( - ".debug_abbrev", SHT_PROGBITS)); - std::unique_ptr<RawSection> debug_str(new RawSection( - ".debug_str", SHT_PROGBITS)); - std::unique_ptr<RawSection> debug_line(new RawSection( - ".debug_line", SHT_PROGBITS, 0, nullptr, 0, 1, 0, - Patch<Elf_Addr, uint32_t, kAbsoluteAddress>, text)); - std::unique_ptr<RawSection> debug_line_oat_patches(new RawSection( - ".debug_line.oat_patches", SHT_OAT_PATCH)); - if (!oat_writer->GetMethodDebugInfo().empty()) { - if (compiler_driver_->GetCompilerOptions().GetGenerateDebugInfo()) { - // Generate CFI (stack unwinding information). - if (kCFIFormat == dwarf::DW_EH_FRAME_FORMAT) { - dwarf::WriteCFISection( - compiler_driver_, oat_writer, - dwarf::DW_EH_PE_pcrel, kCFIFormat, - eh_frame->GetBuffer(), eh_frame->GetPatchLocations(), - eh_frame_hdr->GetBuffer(), eh_frame_hdr->GetPatchLocations()); - builder->RegisterSection(eh_frame.get()); - builder->RegisterSection(eh_frame_hdr.get()); - } else { - DCHECK(kCFIFormat == dwarf::DW_DEBUG_FRAME_FORMAT); - dwarf::WriteCFISection( - compiler_driver_, oat_writer, - dwarf::DW_EH_PE_absptr, kCFIFormat, - debug_frame->GetBuffer(), debug_frame->GetPatchLocations(), - nullptr, nullptr); - builder->RegisterSection(debug_frame.get()); - EncodeOatPatches(*debug_frame->GetPatchLocations(), - debug_frame_oat_patches->GetBuffer()); - builder->RegisterSection(debug_frame_oat_patches.get()); - } + if (compiler_driver_->GetCompilerOptions().GetGenerateDebugInfo()) { + const auto& method_infos = oat_writer->GetMethodDebugInfo(); + if (!method_infos.empty()) { // Add methods to .symtab. WriteDebugSymbols(builder.get(), oat_writer); - // Generate DWARF .debug_* sections. - dwarf::WriteDebugSections( - compiler_driver_, oat_writer, - debug_info->GetBuffer(), debug_info->GetPatchLocations(), - debug_abbrev->GetBuffer(), - debug_str->GetBuffer(), - debug_line->GetBuffer(), debug_line->GetPatchLocations()); - builder->RegisterSection(debug_info.get()); - EncodeOatPatches(*debug_info->GetPatchLocations(), - debug_info_oat_patches->GetBuffer()); - builder->RegisterSection(debug_info_oat_patches.get()); - builder->RegisterSection(debug_abbrev.get()); - builder->RegisterSection(debug_str.get()); - builder->RegisterSection(debug_line.get()); - EncodeOatPatches(*debug_line->GetPatchLocations(), - debug_line_oat_patches->GetBuffer()); - builder->RegisterSection(debug_line_oat_patches.get()); + // Generate CFI (stack unwinding information). + dwarf::WriteCFISection(builder.get(), method_infos, kCFIFormat); + // Write DWARF .debug_* sections. + dwarf::WriteDebugSections(builder.get(), method_infos); } } // Add relocation section for .text. - std::unique_ptr<RawSection> text_oat_patches(new RawSection( - ".text.oat_patches", SHT_OAT_PATCH)); if (compiler_driver_->GetCompilerOptions().GetIncludePatchInformation()) { // Note that ElfWriter::Fixup will be called regardless and therefore // we need to include oat_patches for debug sections unconditionally. - EncodeOatPatches(oat_writer->GetAbsolutePatchLocations(), - text_oat_patches->GetBuffer()); - builder->RegisterSection(text_oat_patches.get()); + builder->WritePatches(".text.oat_patches", &oat_writer->GetAbsolutePatchLocations()); } - return builder->Write(elf_file_); + builder->End(); + + return builder->Good() && output_stream->Flush(); } template <typename ElfTypes> static void WriteDebugSymbols(ElfBuilder<ElfTypes>* builder, OatWriter* oat_writer) { const std::vector<OatWriter::DebugInfo>& method_info = oat_writer->GetMethodDebugInfo(); bool generated_mapping_symbol = false; + auto* strtab = builder->GetStrTab(); + auto* symtab = builder->GetSymTab(); + + if (method_info.empty()) { + return; + } // Find all addresses (low_pc) which contain deduped methods. // The first instance of method is not marked deduped_, but the rest is. @@ -264,7 +152,8 @@ static void WriteDebugSymbols(ElfBuilder<ElfTypes>* builder, OatWriter* oat_writ } } - auto* symtab = builder->GetSymtab(); + strtab->Start(); + strtab->Write(""); // strtab should start with empty string. for (auto it = method_info.begin(); it != method_info.end(); ++it) { if (it->deduped_) { continue; // Add symbol only for the first instance. @@ -277,8 +166,8 @@ static void WriteDebugSymbols(ElfBuilder<ElfTypes>* builder, OatWriter* oat_writ uint32_t low_pc = it->low_pc_; // Add in code delta, e.g., thumb bit 0 for Thumb2 code. low_pc += it->compiled_method_->CodeDelta(); - symtab->AddSymbol(name, builder->GetText(), low_pc, - true, it->high_pc_ - it->low_pc_, STB_GLOBAL, STT_FUNC); + symtab->Add(strtab->Write(name), builder->GetText(), low_pc, + true, it->high_pc_ - it->low_pc_, STB_GLOBAL, STT_FUNC); // Conforming to aaelf, add $t mapping symbol to indicate start of a sequence of thumb2 // instructions, so that disassembler tools can correctly disassemble. @@ -286,12 +175,19 @@ static void WriteDebugSymbols(ElfBuilder<ElfTypes>* builder, OatWriter* oat_writ // requires it to match function symbol. Just address 0 does not work. if (it->compiled_method_->GetInstructionSet() == kThumb2) { if (!generated_mapping_symbol || !kGenerateSingleArmMappingSymbol) { - symtab->AddSymbol("$t", builder->GetText(), it->low_pc_ & ~1, true, - 0, STB_LOCAL, STT_NOTYPE); + symtab->Add(strtab->Write("$t"), builder->GetText(), it->low_pc_ & ~1, + true, 0, STB_LOCAL, STT_NOTYPE); generated_mapping_symbol = true; } } } + strtab->End(); + + // Symbols are buffered and written after names (because they are smaller). + // We could also do two passes in this function to avoid the buffering. + symtab->Start(); + symtab->Write(); + symtab->End(); } // Explicit instantiations diff --git a/compiler/elf_writer_test.cc b/compiler/elf_writer_test.cc index ccf34b816b..b413a9eb7b 100644 --- a/compiler/elf_writer_test.cc +++ b/compiler/elf_writer_test.cc @@ -21,6 +21,7 @@ #include "common_compiler_test.h" #include "elf_file.h" #include "elf_file_impl.h" +#include "elf_builder.h" #include "elf_writer_quick.h" #include "oat.h" #include "utils.h" @@ -100,7 +101,7 @@ TEST_F(ElfWriterTest, EncodeDecodeOatPatches) { // Encode patch locations. std::vector<uint8_t> oat_patches; - ElfWriterQuick32::EncodeOatPatches(patch_locations, &oat_patches); + ElfBuilder<ElfTypes32>::EncodeOatPatches(patch_locations, &oat_patches); // Create buffer to be patched. std::vector<uint8_t> initial_data(256); diff --git a/compiler/image_test.cc b/compiler/image_test.cc index fd6cd82f7c..a38e1f54c0 100644 --- a/compiler/image_test.cc +++ b/compiler/image_test.cc @@ -64,8 +64,10 @@ TEST_F(ImageTest, WriteRead) { ScratchFile oat_file(OS::CreateEmptyFile(oat_filename.c_str())); const uintptr_t requested_image_base = ART_BASE_ADDRESS; - std::unique_ptr<ImageWriter> writer(new ImageWriter(*compiler_driver_, requested_image_base, - /*compile_pic*/false)); + std::unique_ptr<ImageWriter> writer(new ImageWriter(*compiler_driver_, + requested_image_base, + /*compile_pic*/false, + /*compile_app_image*/false)); // TODO: compile_pic should be a test argument. { { @@ -81,8 +83,15 @@ TEST_F(ImageTest, WriteRead) { t.NewTiming("WriteElf"); SafeMap<std::string, std::string> key_value_store; - OatWriter oat_writer(class_linker->GetBootClassPath(), 0, 0, 0, compiler_driver_.get(), - writer.get(), &timings, &key_value_store); + OatWriter oat_writer(class_linker->GetBootClassPath(), + 0, + 0, + 0, + compiler_driver_.get(), + writer.get(), + /*compiling_boot_image*/true, + &timings, + &key_value_store); bool success = writer->PrepareImageAddressSpace() && compiler_driver_->WriteElf(GetTestAndroidRoot(), !kIsTargetBuild, diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc index af2a4f9426..0c85323805 100644 --- a/compiler/image_writer.cc +++ b/compiler/image_writer.cc @@ -20,6 +20,7 @@ #include <memory> #include <numeric> +#include <unordered_set> #include <vector> #include "art_field-inl.h" @@ -72,6 +73,27 @@ namespace art { // Separate objects into multiple bins to optimize dirty memory use. static constexpr bool kBinObjects = true; +// Return true if an object is already in an image space. +bool ImageWriter::IsInBootImage(const void* obj) const { + if (!compile_app_image_) { + DCHECK(boot_image_space_ == nullptr); + return false; + } + const uint8_t* image_begin = boot_image_space_->Begin(); + // Real image end including ArtMethods and ArtField sections. + const uint8_t* image_end = image_begin + boot_image_space_->GetImageHeader().GetImageSize(); + return image_begin <= obj && obj < image_end; +} + +bool ImageWriter::IsInBootOatFile(const void* ptr) const { + if (!compile_app_image_) { + DCHECK(boot_image_space_ == nullptr); + return false; + } + const ImageHeader& image_header = boot_image_space_->GetImageHeader(); + return image_header.GetOatFileBegin() <= ptr && ptr < image_header.GetOatFileEnd(); +} + static void CheckNoDexObjectsCallback(Object* obj, void* arg ATTRIBUTE_UNUSED) SHARED_REQUIRES(Locks::mutator_lock_) { Class* klass = obj->GetClass(); @@ -85,12 +107,20 @@ static void CheckNoDexObjects() { bool ImageWriter::PrepareImageAddressSpace() { target_ptr_size_ = InstructionSetPointerSize(compiler_driver_.GetInstructionSet()); + gc::Heap* const heap = Runtime::Current()->GetHeap(); + // Cache boot image space. + for (gc::space::ContinuousSpace* space : heap->GetContinuousSpaces()) { + if (space->IsImageSpace()) { + CHECK(compile_app_image_); + CHECK(boot_image_space_ == nullptr) << "Multiple image spaces"; + boot_image_space_ = space->AsImageSpace(); + } + } { ScopedObjectAccess soa(Thread::Current()); PruneNonImageClasses(); // Remove junk ComputeLazyFieldsForImageClasses(); // Add useful information } - gc::Heap* heap = Runtime::Current()->GetHeap(); heap->CollectGarbage(false); // Remove garbage. // Dex caches must not have their dex fields set in the image. These are memory buffers of mapped @@ -144,21 +174,21 @@ bool ImageWriter::Write(int image_fd, Runtime::Current()->GetOatFileManager().RegisterOatFile( std::unique_ptr<const OatFile>(oat_file_)); - interpreter_to_interpreter_bridge_offset_ = - oat_file_->GetOatHeader().GetInterpreterToInterpreterBridgeOffset(); - interpreter_to_compiled_code_bridge_offset_ = - oat_file_->GetOatHeader().GetInterpreterToCompiledCodeBridgeOffset(); - - jni_dlsym_lookup_offset_ = oat_file_->GetOatHeader().GetJniDlsymLookupOffset(); - - quick_generic_jni_trampoline_offset_ = - oat_file_->GetOatHeader().GetQuickGenericJniTrampolineOffset(); - quick_imt_conflict_trampoline_offset_ = - oat_file_->GetOatHeader().GetQuickImtConflictTrampolineOffset(); - quick_resolution_trampoline_offset_ = - oat_file_->GetOatHeader().GetQuickResolutionTrampolineOffset(); - quick_to_interpreter_bridge_offset_ = - oat_file_->GetOatHeader().GetQuickToInterpreterBridgeOffset(); + const OatHeader& oat_header = oat_file_->GetOatHeader(); + oat_address_offsets_[kOatAddressInterpreterToInterpreterBridge] = + oat_header.GetInterpreterToInterpreterBridgeOffset(); + oat_address_offsets_[kOatAddressInterpreterToCompiledCodeBridge] = + oat_header.GetInterpreterToCompiledCodeBridgeOffset(); + oat_address_offsets_[kOatAddressJNIDlsymLookup] = + oat_header.GetJniDlsymLookupOffset(); + oat_address_offsets_[kOatAddressQuickGenericJNITrampoline] = + oat_header.GetQuickGenericJniTrampolineOffset(); + oat_address_offsets_[kOatAddressQuickIMTConflictTrampoline] = + oat_header.GetQuickImtConflictTrampolineOffset(); + oat_address_offsets_[kOatAddressQuickResolutionTrampoline] = + oat_header.GetQuickResolutionTrampolineOffset(); + oat_address_offsets_[kOatAddressQuickToInterpreterBridge] = + oat_header.GetQuickToInterpreterBridgeOffset(); size_t oat_loaded_size = 0; size_t oat_data_offset = 0; @@ -307,7 +337,7 @@ void ImageWriter::PrepareDexCacheArraySlots() { for (jobject weak_root : class_linker->GetDexCaches()) { mirror::DexCache* dex_cache = down_cast<mirror::DexCache*>(self->DecodeJObject(weak_root)); - if (dex_cache == nullptr) { + if (dex_cache == nullptr || IsInBootImage(dex_cache)) { continue; } const DexFile* dex_file = dex_cache->GetDexFile(); @@ -331,6 +361,7 @@ void ImageWriter::PrepareDexCacheArraySlots() { void ImageWriter::AddDexCacheArrayRelocation(void* array, size_t offset) { if (array != nullptr) { + DCHECK(!IsInBootImage(array)); native_object_relocations_.emplace( array, NativeObjectRelocation { offset, kNativeObjectRelocationTypeDexCacheArray }); @@ -344,8 +375,8 @@ void ImageWriter::AddMethodPointerArray(mirror::PointerArray* arr) { auto* method = arr->GetElementPtrSize<ArtMethod*>(i, target_ptr_size_); if (method != nullptr && !method->IsRuntimeMethod()) { auto* klass = method->GetDeclaringClass(); - CHECK(klass == nullptr || IsImageClass(klass)) << PrettyClass(klass) - << " should be an image class"; + CHECK(klass == nullptr || KeepClass(klass)) + << PrettyClass(klass) << " should be a kept class"; } } } @@ -539,10 +570,66 @@ void ImageWriter::ComputeLazyFieldsForImageClasses() { class_linker->VisitClassesWithoutClassesLock(&visitor); } -bool ImageWriter::IsImageClass(Class* klass) { +static bool IsBootClassLoaderClass(mirror::Class* klass) SHARED_REQUIRES(Locks::mutator_lock_) { + return klass->GetClassLoader() == nullptr; +} + +bool ImageWriter::IsBootClassLoaderNonImageClass(mirror::Class* klass) { + return IsBootClassLoaderClass(klass) && !IsInBootImage(klass); +} + +bool ImageWriter::ContainsBootClassLoaderNonImageClass(mirror::Class* klass) { + if (klass == nullptr) { + return false; + } + auto found = prune_class_memo_.find(klass); + if (found != prune_class_memo_.end()) { + // Already computed, return the found value. + return found->second; + } + // Place holder value to prevent infinite recursion. + prune_class_memo_.emplace(klass, false); + bool result = IsBootClassLoaderNonImageClass(klass); + if (!result) { + // Check interfaces since these wont be visited through VisitReferences.) + mirror::IfTable* if_table = klass->GetIfTable(); + for (size_t i = 0, num_interfaces = klass->GetIfTableCount(); i < num_interfaces; ++i) { + result = result || ContainsBootClassLoaderNonImageClass(if_table->GetInterface(i)); + } + } + // Check static fields and their classes. + size_t num_static_fields = klass->NumReferenceStaticFields(); + if (num_static_fields != 0 && klass->IsResolved()) { + // Presumably GC can happen when we are cross compiling, it should not cause performance + // problems to do pointer size logic. + MemberOffset field_offset = klass->GetFirstReferenceStaticFieldOffset( + Runtime::Current()->GetClassLinker()->GetImagePointerSize()); + for (size_t i = 0u; i < num_static_fields; ++i) { + mirror::Object* ref = klass->GetFieldObject<mirror::Object>(field_offset); + if (ref != nullptr) { + if (ref->IsClass()) { + result = result || ContainsBootClassLoaderNonImageClass(ref->AsClass()); + } + result = result || ContainsBootClassLoaderNonImageClass(ref->GetClass()); + } + field_offset = MemberOffset(field_offset.Uint32Value() + + sizeof(mirror::HeapReference<mirror::Object>)); + } + } + result = result || ContainsBootClassLoaderNonImageClass(klass->GetSuperClass()); + prune_class_memo_[klass] = result; + return result; +} + +bool ImageWriter::KeepClass(Class* klass) { if (klass == nullptr) { return false; } + if (compile_app_image_) { + // For app images, we need to prune boot loader classes that are not in the boot image since + // these may have already been loaded when the app image is loaded. + return !ContainsBootClassLoaderNonImageClass(klass); + } std::string temp; return compiler_driver_.IsImageClass(klass->GetDescriptor(&temp)); } @@ -552,21 +639,17 @@ class NonImageClassesVisitor : public ClassVisitor { explicit NonImageClassesVisitor(ImageWriter* image_writer) : image_writer_(image_writer) {} bool Visit(Class* klass) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) { - if (!image_writer_->IsImageClass(klass)) { - std::string temp; - non_image_classes_.insert(klass->GetDescriptor(&temp)); + if (!image_writer_->KeepClass(klass)) { + classes_to_prune_.insert(klass); } return true; } - std::set<std::string> non_image_classes_; + std::unordered_set<mirror::Class*> classes_to_prune_; ImageWriter* const image_writer_; }; void ImageWriter::PruneNonImageClasses() { - if (compiler_driver_.GetImageClasses() == nullptr) { - return; - } Runtime* runtime = Runtime::Current(); ClassLinker* class_linker = runtime->GetClassLinker(); Thread* self = Thread::Current(); @@ -576,8 +659,14 @@ void ImageWriter::PruneNonImageClasses() { class_linker->VisitClasses(&visitor); // Remove the undesired classes from the class roots. - for (const std::string& it : visitor.non_image_classes_) { - bool result = class_linker->RemoveClass(it.c_str(), nullptr); + for (mirror::Class* klass : visitor.classes_to_prune_) { + std::string temp; + const char* name = klass->GetDescriptor(&temp); + VLOG(compiler) << "Pruning class " << name; + if (!compile_app_image_) { + DCHECK(IsBootClassLoaderClass(klass)); + } + bool result = class_linker->RemoveClass(name, klass->GetClassLoader()); DCHECK(result); } @@ -594,7 +683,7 @@ void ImageWriter::PruneNonImageClasses() { } for (size_t i = 0; i < dex_cache->NumResolvedTypes(); i++) { Class* klass = dex_cache->GetResolvedType(i); - if (klass != nullptr && !IsImageClass(klass)) { + if (klass != nullptr && !KeepClass(klass)) { dex_cache->SetResolvedType(i, nullptr); } } @@ -607,7 +696,7 @@ void ImageWriter::PruneNonImageClasses() { // Miranda methods may be held live by a class which was not an image class but have a // declaring class which is an image class. Set it to the resolution method to be safe and // prevent dangling pointers. - if (method->IsMiranda() || !IsImageClass(declaring_class)) { + if (method->IsMiranda() || !KeepClass(declaring_class)) { mirror::DexCache::SetElementPtrSize(resolved_methods, i, resolution_method, @@ -621,7 +710,7 @@ void ImageWriter::PruneNonImageClasses() { } for (size_t i = 0; i < dex_cache->NumResolvedFields(); i++) { ArtField* field = dex_cache->GetResolvedField(i, target_ptr_size_); - if (field != nullptr && !IsImageClass(field->GetDeclaringClass())) { + if (field != nullptr && !KeepClass(field->GetDeclaringClass())) { dex_cache->SetResolvedField(i, nullptr, target_ptr_size_); } } @@ -632,6 +721,9 @@ void ImageWriter::PruneNonImageClasses() { // Drop the array class cache in the ClassLinker, as these are roots holding those classes live. class_linker->DropFindArrayClassCache(); + + // Clear to save RAM. + prune_class_memo_.clear(); } void ImageWriter::CheckNonImageClassesRemoved() { @@ -643,13 +735,13 @@ void ImageWriter::CheckNonImageClassesRemoved() { void ImageWriter::CheckNonImageClassesRemovedCallback(Object* obj, void* arg) { ImageWriter* image_writer = reinterpret_cast<ImageWriter*>(arg); - if (obj->IsClass()) { + if (obj->IsClass() && !image_writer->IsInBootImage(obj)) { Class* klass = obj->AsClass(); - if (!image_writer->IsImageClass(klass)) { + if (!image_writer->KeepClass(klass)) { image_writer->DumpImageClasses(); std::string temp; - CHECK(image_writer->IsImageClass(klass)) << klass->GetDescriptor(&temp) - << " " << PrettyDescriptor(klass); + CHECK(image_writer->KeepClass(klass)) << klass->GetDescriptor(&temp) + << " " << PrettyDescriptor(klass); } } } @@ -703,25 +795,35 @@ ObjectArray<Object>* ImageWriter::CreateImageRoots() const { // ObjectArray, we lock the dex lock twice, first to get the number // of dex caches first and then lock it again to copy the dex // caches. We check that the number of dex caches does not change. - size_t dex_cache_count; + size_t dex_cache_count = 0; { ReaderMutexLock mu(self, *class_linker->DexLock()); - dex_cache_count = class_linker->GetDexCacheCount(); + // Count number of dex caches not in the boot image. + for (jobject weak_root : class_linker->GetDexCaches()) { + mirror::DexCache* dex_cache = down_cast<mirror::DexCache*>(self->DecodeJObject(weak_root)); + dex_cache_count += IsInBootImage(dex_cache) ? 0u : 1u; + } } Handle<ObjectArray<Object>> dex_caches( - hs.NewHandle(ObjectArray<Object>::Alloc(self, object_array_class.Get(), - dex_cache_count))); + hs.NewHandle(ObjectArray<Object>::Alloc(self, object_array_class.Get(), dex_cache_count))); CHECK(dex_caches.Get() != nullptr) << "Failed to allocate a dex cache array."; { ReaderMutexLock mu(self, *class_linker->DexLock()); - CHECK_EQ(dex_cache_count, class_linker->GetDexCacheCount()) - << "The number of dex caches changed."; + size_t non_image_dex_caches = 0; + // Re-count number of non image dex caches. + for (jobject weak_root : class_linker->GetDexCaches()) { + mirror::DexCache* dex_cache = down_cast<mirror::DexCache*>(self->DecodeJObject(weak_root)); + non_image_dex_caches += IsInBootImage(dex_cache) ? 0u : 1u; + } + CHECK_EQ(dex_cache_count, non_image_dex_caches) + << "The number of non-image dex caches changed."; size_t i = 0; for (jobject weak_root : class_linker->GetDexCaches()) { - mirror::DexCache* dex_cache = - down_cast<mirror::DexCache*>(self->DecodeJObject(weak_root)); - dex_caches->Set<false>(i, dex_cache); - ++i; + mirror::DexCache* dex_cache = down_cast<mirror::DexCache*>(self->DecodeJObject(weak_root)); + if (!IsInBootImage(dex_cache)) { + dex_caches->Set<false>(i, dex_cache); + ++i; + } } } @@ -761,6 +863,10 @@ void ImageWriter::WalkInstanceFields(mirror::Object* obj, mirror::Class* klass) // For an unvisited object, visit it then all its children found via fields. void ImageWriter::WalkFieldsInOrder(mirror::Object* obj) { + if (IsInBootImage(obj)) { + // Object is in the image, don't need to fix it up. + return; + } // Use our own visitor routine (instead of GC visitor) to get better locality between // an object and its fields if (!IsImageBinSlotAssigned(obj)) { @@ -797,6 +903,7 @@ void ImageWriter::WalkFieldsInOrder(mirror::Object* obj) { CHECK(it == native_object_relocations_.end()) << "Field array " << cur_fields << " already forwarded"; size_t& offset = bin_slot_sizes_[kBinArtField]; + DCHECK(!IsInBootImage(cur_fields)); native_object_relocations_.emplace( cur_fields, NativeObjectRelocation { offset, kNativeObjectRelocationTypeArtFieldArray }); @@ -808,6 +915,7 @@ void ImageWriter::WalkFieldsInOrder(mirror::Object* obj) { auto it2 = native_object_relocations_.find(field); CHECK(it2 == native_object_relocations_.end()) << "Field at index=" << i << " already assigned " << PrettyField(field) << " static=" << field->IsStatic(); + DCHECK(!IsInBootImage(field)); native_object_relocations_.emplace( field, NativeObjectRelocation {offset, kNativeObjectRelocationTypeArtField }); offset += sizeof(ArtField); @@ -843,6 +951,7 @@ void ImageWriter::WalkFieldsInOrder(mirror::Object* obj) { CHECK(it == native_object_relocations_.end()) << "Method array " << array << " already forwarded"; size_t& offset = bin_slot_sizes_[bin_type]; + DCHECK(!IsInBootImage(array)); native_object_relocations_.emplace(array, NativeObjectRelocation { offset, any_dirty ? kNativeObjectRelocationTypeArtMethodArrayDirty : kNativeObjectRelocationTypeArtMethodArrayClean }); @@ -867,6 +976,7 @@ void ImageWriter::WalkFieldsInOrder(mirror::Object* obj) { } void ImageWriter::AssignMethodOffset(ArtMethod* method, NativeObjectRelocationType type) { + DCHECK(!IsInBootImage(method)); auto it = native_object_relocations_.find(method); CHECK(it == native_object_relocations_.end()) << "Method " << method << " already assigned " << PrettyMethod(method); @@ -884,10 +994,13 @@ void ImageWriter::WalkFieldsCallback(mirror::Object* obj, void* arg) { void ImageWriter::UnbinObjectsIntoOffsetCallback(mirror::Object* obj, void* arg) { ImageWriter* writer = reinterpret_cast<ImageWriter*>(arg); DCHECK(writer != nullptr); - writer->UnbinObjectsIntoOffset(obj); + if (!writer->IsInBootImage(obj)) { + writer->UnbinObjectsIntoOffset(obj); + } } void ImageWriter::UnbinObjectsIntoOffset(mirror::Object* obj) { + DCHECK(!IsInBootImage(obj)); CHECK(obj != nullptr); // We know the bin slot, and the total bin sizes for all objects by now, @@ -925,13 +1038,15 @@ void ImageWriter::CalculateNewObjectOffsets() { image_methods_[ImageHeader::kRefsAndArgsSaveMethod] = runtime->GetCalleeSaveMethod(Runtime::kRefsAndArgs); - // Add room for fake length prefixed array. + // Add room for fake length prefixed array for holding the image methods. const auto image_method_type = kNativeObjectRelocationTypeArtMethodArrayClean; auto it = native_object_relocations_.find(&image_method_array_); CHECK(it == native_object_relocations_.end()); size_t& offset = bin_slot_sizes_[BinTypeForNativeRelocationType(image_method_type)]; - native_object_relocations_.emplace(&image_method_array_, - NativeObjectRelocation { offset, image_method_type }); + if (!compile_app_image_) { + native_object_relocations_.emplace(&image_method_array_, + NativeObjectRelocation { offset, image_method_type }); + } size_t method_alignment = ArtMethod::Alignment(target_ptr_size_); const size_t array_size = LengthPrefixedArray<ArtMethod>::ComputeSize( 0, ArtMethod::Size(target_ptr_size_), method_alignment); @@ -940,7 +1055,10 @@ void ImageWriter::CalculateNewObjectOffsets() { for (auto* m : image_methods_) { CHECK(m != nullptr); CHECK(m->IsRuntimeMethod()); - AssignMethodOffset(m, kNativeObjectRelocationTypeArtMethodClean); + DCHECK_EQ(compile_app_image_, IsInBootImage(m)) << "Trampolines should be in boot image"; + if (!IsInBootImage(m)) { + AssignMethodOffset(m, kNativeObjectRelocationTypeArtMethodClean); + } } // Calculate size of the dex cache arrays slot and prepare offsets. PrepareDexCacheArraySlots(); @@ -1090,6 +1208,7 @@ void ImageWriter::CopyAndFixupNativeData() { NativeObjectRelocation& relocation = pair.second; auto* dest = image_->Begin() + relocation.offset; DCHECK_GE(dest, image_->Begin() + image_end_); + DCHECK(!IsInBootImage(pair.first)); switch (relocation.type) { case kNativeObjectRelocationTypeArtField: { memcpy(dest, pair.first, sizeof(ArtField)); @@ -1126,16 +1245,18 @@ void ImageWriter::CopyAndFixupNativeData() { auto* image_header = reinterpret_cast<ImageHeader*>(image_->Begin()); const ImageSection& methods_section = image_header->GetMethodsSection(); for (size_t i = 0; i < ImageHeader::kImageMethodsCount; ++i) { - auto* m = image_methods_[i]; - CHECK(m != nullptr); - auto it = native_object_relocations_.find(m); - CHECK(it != native_object_relocations_.end()) << "No fowarding for " << PrettyMethod(m); - NativeObjectRelocation& relocation = it->second; - CHECK(methods_section.Contains(relocation.offset)) << relocation.offset << " not in " - << methods_section; - CHECK(relocation.IsArtMethodRelocation()) << relocation.type; - auto* dest = reinterpret_cast<ArtMethod*>(image_begin_ + it->second.offset); - image_header->SetImageMethod(static_cast<ImageHeader::ImageMethod>(i), dest); + ArtMethod* method = image_methods_[i]; + CHECK(method != nullptr); + if (!IsInBootImage(method)) { + auto it = native_object_relocations_.find(method); + CHECK(it != native_object_relocations_.end()) << "No fowarding for " << PrettyMethod(method); + NativeObjectRelocation& relocation = it->second; + CHECK(methods_section.Contains(relocation.offset)) << relocation.offset << " not in " + << methods_section; + CHECK(relocation.IsArtMethodRelocation()) << relocation.type; + method = reinterpret_cast<ArtMethod*>(image_begin_ + it->second.offset); + } + image_header->SetImageMethod(static_cast<ImageHeader::ImageMethod>(i), method); } // Write the intern table into the image. const ImageSection& intern_table_section = image_header->GetImageSection( @@ -1183,8 +1304,8 @@ void ImageWriter::FixupPointerArray(mirror::Object* dst, mirror::PointerArray* a dst->SetClass(GetImageAddress(arr->GetClass())); auto* dest_array = down_cast<mirror::PointerArray*>(dst); for (size_t i = 0, count = num_elements; i < count; ++i) { - auto* elem = arr->GetElementPtrSize<void*>(i, target_ptr_size_); - if (elem != nullptr) { + void* elem = arr->GetElementPtrSize<void*>(i, target_ptr_size_); + if (elem != nullptr && !IsInBootImage(elem)) { auto it = native_object_relocations_.find(elem); if (UNLIKELY(it == native_object_relocations_.end())) { if (it->second.IsArtMethodRelocation()) { @@ -1209,6 +1330,9 @@ void ImageWriter::FixupPointerArray(mirror::Object* dst, mirror::PointerArray* a } void ImageWriter::CopyAndFixupObject(Object* obj) { + if (IsInBootImage(obj)) { + return; + } size_t offset = GetImageOffset(obj); auto* dst = reinterpret_cast<Object*>(image_->Begin() + offset); DCHECK_LT(offset, image_end_); @@ -1282,18 +1406,19 @@ class FixupClassVisitor FINAL : public FixupVisitor { uintptr_t ImageWriter::NativeOffsetInImage(void* obj) { DCHECK(obj != nullptr); + DCHECK(!IsInBootImage(obj)); auto it = native_object_relocations_.find(obj); - CHECK(it != native_object_relocations_.end()) << obj; + CHECK(it != native_object_relocations_.end()) << obj << " spaces " + << Runtime::Current()->GetHeap()->DumpSpaces(); const NativeObjectRelocation& relocation = it->second; return relocation.offset; } template <typename T> T* ImageWriter::NativeLocationInImage(T* obj) { - if (obj == nullptr) { - return nullptr; - } - return reinterpret_cast<T*>(image_begin_ + NativeOffsetInImage(obj)); + return (obj == nullptr || IsInBootImage(obj)) + ? obj + : reinterpret_cast<T*>(image_begin_ + NativeOffsetInImage(obj)); } void ImageWriter::FixupClass(mirror::Class* orig, mirror::Class* copy) { @@ -1306,18 +1431,22 @@ void ImageWriter::FixupClass(mirror::Class* orig, mirror::Class* copy) { // Update dex cache strings. copy->SetDexCacheStrings(NativeLocationInImage(orig->GetDexCacheStrings())); // Fix up embedded tables. - if (orig->ShouldHaveEmbeddedImtAndVTable()) { - for (int32_t i = 0; i < orig->GetEmbeddedVTableLength(); ++i) { - auto it = native_object_relocations_.find(orig->GetEmbeddedVTableEntry(i, target_ptr_size_)); - CHECK(it != native_object_relocations_.end()) << PrettyClass(orig); - copy->SetEmbeddedVTableEntryUnchecked( - i, reinterpret_cast<ArtMethod*>(image_begin_ + it->second.offset), target_ptr_size_); - } - for (size_t i = 0; i < mirror::Class::kImtSize; ++i) { - auto it = native_object_relocations_.find(orig->GetEmbeddedImTableEntry(i, target_ptr_size_)); - CHECK(it != native_object_relocations_.end()) << PrettyClass(orig); - copy->SetEmbeddedImTableEntry( - i, reinterpret_cast<ArtMethod*>(image_begin_ + it->second.offset), target_ptr_size_); + if (!orig->IsTemp()) { + // TODO: Why do we have temp classes in some cases? + if (orig->ShouldHaveEmbeddedImtAndVTable()) { + for (int32_t i = 0; i < orig->GetEmbeddedVTableLength(); ++i) { + ArtMethod* orig_method = orig->GetEmbeddedVTableEntry(i, target_ptr_size_); + copy->SetEmbeddedVTableEntryUnchecked( + i, + NativeLocationInImage(orig_method), + target_ptr_size_); + } + for (size_t i = 0; i < mirror::Class::kImtSize; ++i) { + copy->SetEmbeddedImTableEntry( + i, + NativeLocationInImage(orig->GetEmbeddedImTableEntry(i, target_ptr_size_)), + target_ptr_size_); + } } } FixupClassVisitor visitor(this, copy); @@ -1419,7 +1548,7 @@ void ImageWriter::FixupDexCache(mirror::DexCache* orig_dex_cache, reinterpret_cast<ArtMethod**>(image_->Begin() + copy_methods_offset); for (size_t i = 0, num = orig_dex_cache->NumResolvedMethods(); i != num; ++i) { ArtMethod* orig = mirror::DexCache::GetElementPtrSize(orig_methods, i, target_ptr_size_); - ArtMethod* copy = NativeLocationInImage(orig); + ArtMethod* copy = IsInBootImage(orig) ? orig : NativeLocationInImage(orig); mirror::DexCache::SetElementPtrSize(copy_methods, i, copy, target_ptr_size_); } } @@ -1432,15 +1561,51 @@ void ImageWriter::FixupDexCache(mirror::DexCache* orig_dex_cache, ArtField** copy_fields = reinterpret_cast<ArtField**>(image_->Begin() + copy_fields_offset); for (size_t i = 0, num = orig_dex_cache->NumResolvedFields(); i != num; ++i) { ArtField* orig = mirror::DexCache::GetElementPtrSize(orig_fields, i, target_ptr_size_); - ArtField* copy = NativeLocationInImage(orig); + ArtField* copy = IsInBootImage(orig) ? orig : NativeLocationInImage(orig); mirror::DexCache::SetElementPtrSize(copy_fields, i, copy, target_ptr_size_); } } } +const uint8_t* ImageWriter::GetOatAddress(OatAddress type) const { + DCHECK_LT(type, kOatAddressCount); + // If we are compiling an app image, we need to use the stubs of the boot image. + if (compile_app_image_) { + // Use the current image pointers. + gc::space::ImageSpace* image_space = Runtime::Current()->GetHeap()->GetImageSpace(); + DCHECK(image_space != nullptr); + const OatFile* oat_file = image_space->GetOatFile(); + CHECK(oat_file != nullptr); + const OatHeader& header = oat_file->GetOatHeader(); + switch (type) { + // TODO: We could maybe clean this up if we stored them in an array in the oat header. + case kOatAddressQuickGenericJNITrampoline: + return static_cast<const uint8_t*>(header.GetQuickGenericJniTrampoline()); + case kOatAddressInterpreterToInterpreterBridge: + return static_cast<const uint8_t*>(header.GetInterpreterToInterpreterBridge()); + case kOatAddressInterpreterToCompiledCodeBridge: + return static_cast<const uint8_t*>(header.GetInterpreterToCompiledCodeBridge()); + case kOatAddressJNIDlsymLookup: + return static_cast<const uint8_t*>(header.GetJniDlsymLookup()); + case kOatAddressQuickIMTConflictTrampoline: + return static_cast<const uint8_t*>(header.GetQuickImtConflictTrampoline()); + case kOatAddressQuickResolutionTrampoline: + return static_cast<const uint8_t*>(header.GetQuickResolutionTrampoline()); + case kOatAddressQuickToInterpreterBridge: + return static_cast<const uint8_t*>(header.GetQuickToInterpreterBridge()); + default: + UNREACHABLE(); + } + } + return GetOatAddressForOffset(oat_address_offsets_[type]); +} + const uint8_t* ImageWriter::GetQuickCode(ArtMethod* method, bool* quick_is_interpreted) { - DCHECK(!method->IsResolutionMethod() && !method->IsImtConflictMethod() && - !method->IsImtUnimplementedMethod() && !method->IsAbstract()) << PrettyMethod(method); + DCHECK(!method->IsResolutionMethod()) << PrettyMethod(method); + DCHECK(!method->IsImtConflictMethod()) << PrettyMethod(method); + DCHECK(!method->IsImtUnimplementedMethod()) << PrettyMethod(method); + DCHECK(!method->IsAbstract()) << PrettyMethod(method); + DCHECK(!IsInBootImage(method)) << PrettyMethod(method); // Use original code if it exists. Otherwise, set the code pointer to the resolution // trampoline. @@ -1448,27 +1613,26 @@ const uint8_t* ImageWriter::GetQuickCode(ArtMethod* method, bool* quick_is_inter // Quick entrypoint: uint32_t quick_oat_code_offset = PointerToLowMemUInt32( method->GetEntryPointFromQuickCompiledCodePtrSize(target_ptr_size_)); - const uint8_t* quick_code = GetOatAddress(quick_oat_code_offset); + const uint8_t* quick_code = GetOatAddressForOffset(quick_oat_code_offset); *quick_is_interpreted = false; if (quick_code != nullptr && (!method->IsStatic() || method->IsConstructor() || method->GetDeclaringClass()->IsInitialized())) { // We have code for a non-static or initialized method, just use the code. - DCHECK_GE(quick_code, oat_data_begin_); } else if (quick_code == nullptr && method->IsNative() && (!method->IsStatic() || method->GetDeclaringClass()->IsInitialized())) { // Non-static or initialized native method missing compiled code, use generic JNI version. - quick_code = GetOatAddress(quick_generic_jni_trampoline_offset_); - DCHECK_GE(quick_code, oat_data_begin_); + quick_code = GetOatAddress(kOatAddressQuickGenericJNITrampoline); } else if (quick_code == nullptr && !method->IsNative()) { // We don't have code at all for a non-native method, use the interpreter. - quick_code = GetOatAddress(quick_to_interpreter_bridge_offset_); + quick_code = GetOatAddress(kOatAddressQuickToInterpreterBridge); *quick_is_interpreted = true; - DCHECK_GE(quick_code, oat_data_begin_); } else { CHECK(!method->GetDeclaringClass()->IsInitialized()); // We have code for a static method, but need to go through the resolution stub for class // initialization. - quick_code = GetOatAddress(quick_resolution_trampoline_offset_); + quick_code = GetOatAddress(kOatAddressQuickResolutionTrampoline); + } + if (!IsInBootOatFile(quick_code)) { DCHECK_GE(quick_code, oat_data_begin_); } return quick_code; @@ -1479,16 +1643,16 @@ const uint8_t* ImageWriter::GetQuickEntryPoint(ArtMethod* method) { // The resolution method has a special trampoline to call. Runtime* runtime = Runtime::Current(); if (UNLIKELY(method == runtime->GetResolutionMethod())) { - return GetOatAddress(quick_resolution_trampoline_offset_); + return GetOatAddress(kOatAddressQuickResolutionTrampoline); } else if (UNLIKELY(method == runtime->GetImtConflictMethod() || method == runtime->GetImtUnimplementedMethod())) { - return GetOatAddress(quick_imt_conflict_trampoline_offset_); + return GetOatAddress(kOatAddressQuickIMTConflictTrampoline); } else { // We assume all methods have code. If they don't currently then we set them to the use the // resolution trampoline. Abstract methods never have code and so we need to make sure their // use results in an AbstractMethodError. We use the interpreter to achieve this. if (UNLIKELY(method->IsAbstract())) { - return GetOatAddress(quick_to_interpreter_bridge_offset_); + return GetOatAddress(kOatAddressQuickToInterpreterBridge); } else { bool quick_is_interpreted; return GetQuickCode(method, &quick_is_interpreted); @@ -1513,11 +1677,11 @@ void ImageWriter::CopyAndFixupMethod(ArtMethod* orig, ArtMethod* copy) { Runtime* runtime = Runtime::Current(); if (UNLIKELY(orig == runtime->GetResolutionMethod())) { copy->SetEntryPointFromQuickCompiledCodePtrSize( - GetOatAddress(quick_resolution_trampoline_offset_), target_ptr_size_); + GetOatAddress(kOatAddressQuickResolutionTrampoline), target_ptr_size_); } else if (UNLIKELY(orig == runtime->GetImtConflictMethod() || orig == runtime->GetImtUnimplementedMethod())) { copy->SetEntryPointFromQuickCompiledCodePtrSize( - GetOatAddress(quick_imt_conflict_trampoline_offset_), target_ptr_size_); + GetOatAddress(kOatAddressQuickIMTConflictTrampoline), target_ptr_size_); } else if (UNLIKELY(orig->IsRuntimeMethod())) { bool found_one = false; for (size_t i = 0; i < static_cast<size_t>(Runtime::kLastCalleeSaveType); ++i) { @@ -1535,7 +1699,7 @@ void ImageWriter::CopyAndFixupMethod(ArtMethod* orig, ArtMethod* copy) { // use results in an AbstractMethodError. We use the interpreter to achieve this. if (UNLIKELY(orig->IsAbstract())) { copy->SetEntryPointFromQuickCompiledCodePtrSize( - GetOatAddress(quick_to_interpreter_bridge_offset_), target_ptr_size_); + GetOatAddress(kOatAddressQuickToInterpreterBridge), target_ptr_size_); } else { bool quick_is_interpreted; const uint8_t* quick_code = GetQuickCode(orig, &quick_is_interpreted); @@ -1546,7 +1710,7 @@ void ImageWriter::CopyAndFixupMethod(ArtMethod* orig, ArtMethod* copy) { // The native method's pointer is set to a stub to lookup via dlsym. // Note this is not the code_ pointer, that is handled above. copy->SetEntryPointFromJniPtrSize( - GetOatAddress(jni_dlsym_lookup_offset_), target_ptr_size_); + GetOatAddress(kOatAddressJNIDlsymLookup), target_ptr_size_); } } } diff --git a/compiler/image_writer.h b/compiler/image_writer.h index 7a2febcea1..120de97620 100644 --- a/compiler/image_writer.h +++ b/compiler/image_writer.h @@ -40,27 +40,42 @@ #include "utils.h" namespace art { +namespace gc { +namespace space { +class ImageSpace; +} // namespace space +} // namespace gc static constexpr int kInvalidImageFd = -1; // Write a Space built during compilation for use during execution. class ImageWriter FINAL { public: - ImageWriter(const CompilerDriver& compiler_driver, uintptr_t image_begin, - bool compile_pic) - : compiler_driver_(compiler_driver), image_begin_(reinterpret_cast<uint8_t*>(image_begin)), - image_end_(0), image_objects_offset_begin_(0), image_roots_address_(0), oat_file_(nullptr), - oat_data_begin_(nullptr), interpreter_to_interpreter_bridge_offset_(0), - interpreter_to_compiled_code_bridge_offset_(0), jni_dlsym_lookup_offset_(0), - quick_generic_jni_trampoline_offset_(0), - quick_imt_conflict_trampoline_offset_(0), quick_resolution_trampoline_offset_(0), - quick_to_interpreter_bridge_offset_(0), compile_pic_(compile_pic), + ImageWriter(const CompilerDriver& compiler_driver, + uintptr_t image_begin, + bool compile_pic, + bool compile_app_image) + : compiler_driver_(compiler_driver), + image_begin_(reinterpret_cast<uint8_t*>(image_begin)), + image_end_(0), + image_objects_offset_begin_(0), + image_roots_address_(0), + oat_file_(nullptr), + oat_data_begin_(nullptr), + compile_pic_(compile_pic), + compile_app_image_(compile_app_image), + boot_image_space_(nullptr), target_ptr_size_(InstructionSetPointerSize(compiler_driver_.GetInstructionSet())), - bin_slot_sizes_(), bin_slot_offsets_(), bin_slot_count_(), - intern_table_bytes_(0u), image_method_array_(ImageHeader::kImageMethodsCount), - dirty_methods_(0u), clean_methods_(0u) { + bin_slot_sizes_(), + bin_slot_offsets_(), + bin_slot_count_(), + intern_table_bytes_(0u), + image_method_array_(ImageHeader::kImageMethodsCount), + dirty_methods_(0u), + clean_methods_(0u) { CHECK_NE(image_begin, 0U); - std::fill(image_methods_, image_methods_ + arraysize(image_methods_), nullptr); + std::fill_n(image_methods_, arraysize(image_methods_), nullptr); + std::fill_n(oat_address_offsets_, arraysize(oat_address_offsets_), 0); } ~ImageWriter() { @@ -74,8 +89,9 @@ class ImageWriter FINAL { template <typename T> T* GetImageAddress(T* object) const SHARED_REQUIRES(Locks::mutator_lock_) { - return object == nullptr ? nullptr : - reinterpret_cast<T*>(image_begin_ + GetImageOffset(object)); + return (object == nullptr || IsInBootImage(object)) + ? object + : reinterpret_cast<T*>(image_begin_ + GetImageOffset(object)); } ArtMethod* GetImageMethodAddress(ArtMethod* method) SHARED_REQUIRES(Locks::mutator_lock_); @@ -150,6 +166,19 @@ class ImageWriter FINAL { }; friend std::ostream& operator<<(std::ostream& stream, const NativeObjectRelocationType& type); + enum OatAddress { + kOatAddressInterpreterToInterpreterBridge, + kOatAddressInterpreterToCompiledCodeBridge, + kOatAddressJNIDlsymLookup, + kOatAddressQuickGenericJNITrampoline, + kOatAddressQuickIMTConflictTrampoline, + kOatAddressQuickResolutionTrampoline, + kOatAddressQuickToInterpreterBridge, + // Number of elements in the enum. + kOatAddressCount, + }; + friend std::ostream& operator<<(std::ostream& stream, const OatAddress& oat_address); + static constexpr size_t kBinBits = MinimumBitsToStore<uint32_t>(kBinMirrorCount - 1); // uint32 = typeof(lockword_) // Subtract read barrier bits since we want these to remain 0, or else it may result in DCHECK @@ -215,7 +244,10 @@ class ImageWriter FINAL { return reinterpret_cast<mirror::Object*>(dst); } - const uint8_t* GetOatAddress(uint32_t offset) const { + // Returns the address in the boot image if we are compiling the app image. + const uint8_t* GetOatAddress(OatAddress type) const; + + const uint8_t* GetOatAddressForOffset(uint32_t offset) const { // With Quick, code is within the OatFile, as there are all in one // .o ELF object. DCHECK_LE(offset, oat_file_->Size()); @@ -224,7 +256,7 @@ class ImageWriter FINAL { } // Returns true if the class was in the original requested image classes list. - bool IsImageClass(mirror::Class* klass) SHARED_REQUIRES(Locks::mutator_lock_); + bool KeepClass(mirror::Class* klass) SHARED_REQUIRES(Locks::mutator_lock_); // Debug aid that list of requested image classes. void DumpImageClasses(); @@ -299,6 +331,11 @@ class ImageWriter FINAL { void AssignMethodOffset(ArtMethod* method, NativeObjectRelocationType type) SHARED_REQUIRES(Locks::mutator_lock_); + bool IsBootClassLoaderNonImageClass(mirror::Class* klass) SHARED_REQUIRES(Locks::mutator_lock_); + + bool ContainsBootClassLoaderNonImageClass(mirror::Class* klass) + SHARED_REQUIRES(Locks::mutator_lock_); + static Bin BinTypeForNativeRelocationType(NativeObjectRelocationType type); uintptr_t NativeOffsetInImage(void* obj); @@ -306,6 +343,13 @@ class ImageWriter FINAL { template <typename T> T* NativeLocationInImage(T* obj); + // Return true of obj is inside of the boot image space. This may only return true if we are + // compiling an app image. + bool IsInBootImage(const void* obj) const; + + // Return true if ptr is within the boot oat file. + bool IsInBootOatFile(const void* ptr) const; + const CompilerDriver& compiler_driver_; // Beginning target image address for the output image. @@ -344,14 +388,14 @@ class ImageWriter FINAL { std::unique_ptr<gc::accounting::ContinuousSpaceBitmap> image_bitmap_; // Offset from oat_data_begin_ to the stubs. - uint32_t interpreter_to_interpreter_bridge_offset_; - uint32_t interpreter_to_compiled_code_bridge_offset_; - uint32_t jni_dlsym_lookup_offset_; - uint32_t quick_generic_jni_trampoline_offset_; - uint32_t quick_imt_conflict_trampoline_offset_; - uint32_t quick_resolution_trampoline_offset_; - uint32_t quick_to_interpreter_bridge_offset_; + uint32_t oat_address_offsets_[kOatAddressCount]; + + // Boolean flags. const bool compile_pic_; + const bool compile_app_image_; + + // Boot image space for fast lookups. + gc::space::ImageSpace* boot_image_space_; // Size of pointers on the target architecture. size_t target_ptr_size_; @@ -388,6 +432,10 @@ class ImageWriter FINAL { uint64_t dirty_methods_; uint64_t clean_methods_; + // Prune class memoization table. + std::unordered_map<mirror::Class*, bool> prune_class_memo_; + + friend class ContainsBootClassLoaderNonImageClassVisitor; friend class FixupClassVisitor; friend class FixupRootVisitor; friend class FixupVisitor; diff --git a/compiler/jit/jit_compiler.cc b/compiler/jit/jit_compiler.cc index d520208d32..2125c9a26a 100644 --- a/compiler/jit/jit_compiler.cc +++ b/compiler/jit/jit_compiler.cc @@ -177,7 +177,8 @@ bool JitCompiler::CompileMethod(Thread* self, ArtMethod* method) { } // Don't compile the method if we are supposed to be deoptimized. - if (runtime->GetInstrumentation()->AreAllMethodsDeoptimized()) { + instrumentation::Instrumentation* instrumentation = runtime->GetInstrumentation(); + if (instrumentation->AreAllMethodsDeoptimized() || instrumentation->IsDeoptimized(method)) { return false; } @@ -189,13 +190,14 @@ bool JitCompiler::CompileMethod(Thread* self, ArtMethod* method) { } // Do the compilation. - CompiledMethod* compiled_method = nullptr; + JitCodeCache* const code_cache = runtime->GetJit()->GetCodeCache(); + bool success = false; { TimingLogger::ScopedTiming t2("Compiling", &logger); // If we get a request to compile a proxy method, we pass the actual Java method // of that proxy method, as the compiler does not expect a proxy method. ArtMethod* method_to_compile = method->GetInterfaceMethodIfProxy(sizeof(void*)); - compiled_method = compiler_driver_->CompileArtMethod(self, method_to_compile); + success = compiler_driver_->GetCompiler()->JitCompile(self, code_cache, method_to_compile); } // Trim maps to reduce memory usage. @@ -205,105 +207,14 @@ bool JitCompiler::CompileMethod(Thread* self, ArtMethod* method) { runtime->GetArenaPool()->TrimMaps(); } - // Check if we failed compiling. - if (compiled_method == nullptr) { - return false; - } - total_time_ += NanoTime() - start_time; - bool result = false; - const void* code = runtime->GetClassLinker()->GetOatMethodQuickCodeFor(method); - - if (code != nullptr) { - // Already have some compiled code, just use this instead of linking. - // TODO: Fix recompilation. - method->SetEntryPointFromQuickCompiledCode(code); - result = true; - } else { - TimingLogger::ScopedTiming t2("LinkCode", &logger); - if (AddToCodeCache(method, compiled_method)) { - result = true; - } - } - - // Remove the compiled method to save memory. - compiler_driver_->RemoveCompiledMethod( - MethodReference(h_class->GetDexCache()->GetDexFile(), method->GetDexMethodIndex())); runtime->GetJit()->AddTimingLogger(logger); - return result; + return success; } CompilerCallbacks* JitCompiler::GetCompilerCallbacks() const { return callbacks_.get(); } -bool JitCompiler::AddToCodeCache(ArtMethod* method, - const CompiledMethod* compiled_method) { - Runtime* runtime = Runtime::Current(); - JitCodeCache* const code_cache = runtime->GetJit()->GetCodeCache(); - auto const quick_code = compiled_method->GetQuickCode(); - if (quick_code.empty()) { - return false; - } - const auto code_size = quick_code.size(); - Thread* const self = Thread::Current(); - auto const mapping_table = compiled_method->GetMappingTable(); - auto const vmap_table = compiled_method->GetVmapTable(); - auto const gc_map = compiled_method->GetGcMap(); - uint8_t* mapping_table_ptr = nullptr; - uint8_t* vmap_table_ptr = nullptr; - uint8_t* gc_map_ptr = nullptr; - - if (!mapping_table.empty()) { - // Write out pre-header stuff. - mapping_table_ptr = code_cache->AddDataArray( - self, mapping_table.data(), mapping_table.data() + mapping_table.size()); - if (mapping_table_ptr == nullptr) { - return false; // Out of data cache. - } - } - - if (!vmap_table.empty()) { - vmap_table_ptr = code_cache->AddDataArray( - self, vmap_table.data(), vmap_table.data() + vmap_table.size()); - if (vmap_table_ptr == nullptr) { - return false; // Out of data cache. - } - } - - if (!gc_map.empty()) { - gc_map_ptr = code_cache->AddDataArray( - self, gc_map.data(), gc_map.data() + gc_map.size()); - if (gc_map_ptr == nullptr) { - return false; // Out of data cache. - } - } - - uint8_t* const code = code_cache->CommitCode(self, - method, - mapping_table_ptr, - vmap_table_ptr, - gc_map_ptr, - compiled_method->GetFrameSizeInBytes(), - compiled_method->GetCoreSpillMask(), - compiled_method->GetFpSpillMask(), - compiled_method->GetQuickCode().data(), - compiled_method->GetQuickCode().size()); - - if (code == nullptr) { - return false; - } - - const size_t thumb_offset = compiled_method->CodeDelta(); - const uint32_t code_offset = sizeof(OatQuickMethodHeader) + thumb_offset; - VLOG(jit) - << "JIT added " - << PrettyMethod(method) << "@" << method - << " ccache_size=" << PrettySize(code_cache->CodeCacheSize()) << ": " - << reinterpret_cast<void*>(code + code_offset) - << "," << reinterpret_cast<void*>(code + code_offset + code_size); - return true; -} - } // namespace jit } // namespace art diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc index ea3cb667e2..16f641ab56 100644 --- a/compiler/oat_test.cc +++ b/compiler/oat_test.cc @@ -16,6 +16,7 @@ #include "arch/instruction_set_features.h" #include "art_method-inl.h" +#include "base/unix_file/fd_file.h" #include "class_linker.h" #include "common_compiler_test.h" #include "compiled_method.h" @@ -37,6 +38,16 @@ namespace art { +NO_RETURN static void Usage(const char* fmt, ...) { + va_list ap; + va_start(ap, fmt); + std::string error; + StringAppendV(&error, fmt, ap); + LOG(FATAL) << error; + va_end(ap); + UNREACHABLE(); +} + class OatTest : public CommonCompilerTest { protected: static const bool kCompile = false; // DISABLED_ due to the time to compile libcore @@ -71,6 +82,67 @@ class OatTest : public CommonCompilerTest { CHECK_EQ(0, memcmp(quick_oat_code, &quick_code[0], code_size)); } } + + void SetupCompiler(Compiler::Kind compiler_kind, + InstructionSet insn_set, + const std::vector<std::string>& compiler_options, + /*out*/std::string* error_msg) { + ASSERT_TRUE(error_msg != nullptr); + insn_features_.reset(InstructionSetFeatures::FromVariant(insn_set, "default", error_msg)); + ASSERT_TRUE(insn_features_ != nullptr) << error_msg; + compiler_options_.reset(new CompilerOptions); + for (const std::string& option : compiler_options) { + compiler_options_->ParseCompilerOption(option, Usage); + } + verification_results_.reset(new VerificationResults(compiler_options_.get())); + method_inliner_map_.reset(new DexFileToMethodInlinerMap); + callbacks_.reset(new QuickCompilerCallbacks(verification_results_.get(), + method_inliner_map_.get(), + CompilerCallbacks::CallbackMode::kCompileApp)); + Runtime::Current()->SetCompilerCallbacks(callbacks_.get()); + timer_.reset(new CumulativeLogger("Compilation times")); + compiler_driver_.reset(new CompilerDriver(compiler_options_.get(), + verification_results_.get(), + method_inliner_map_.get(), + compiler_kind, + insn_set, + insn_features_.get(), + false, + nullptr, + nullptr, + nullptr, + 2, + true, + true, + "", + false, + timer_.get(), + -1, + "")); + } + + bool WriteElf(File* file, + const std::vector<const DexFile*>& dex_files, + SafeMap<std::string, std::string>& key_value_store) { + TimingLogger timings("WriteElf", false, false); + OatWriter oat_writer(dex_files, + 42U, + 4096U, + 0, + compiler_driver_.get(), + nullptr, + /*compiling_boot_image*/false, + &timings, + &key_value_store); + return compiler_driver_->WriteElf(GetTestAndroidRoot(), + !kIsTargetBuild, + dex_files, + &oat_writer, + file); + } + + std::unique_ptr<const InstructionSetFeatures> insn_features_; + std::unique_ptr<QuickCompilerCallbacks> callbacks_; }; TEST_F(OatTest, WriteRead) { @@ -80,21 +152,9 @@ TEST_F(OatTest, WriteRead) { // TODO: make selectable. Compiler::Kind compiler_kind = Compiler::kQuick; InstructionSet insn_set = kIsTargetBuild ? kThumb2 : kX86; - std::string error_msg; - std::unique_ptr<const InstructionSetFeatures> insn_features( - InstructionSetFeatures::FromVariant(insn_set, "default", &error_msg)); - ASSERT_TRUE(insn_features.get() != nullptr) << error_msg; - compiler_options_.reset(new CompilerOptions); - verification_results_.reset(new VerificationResults(compiler_options_.get())); - method_inliner_map_.reset(new DexFileToMethodInlinerMap); - timer_.reset(new CumulativeLogger("Compilation times")); - compiler_driver_.reset(new CompilerDriver(compiler_options_.get(), - verification_results_.get(), - method_inliner_map_.get(), - compiler_kind, insn_set, - insn_features.get(), false, nullptr, nullptr, nullptr, - 2, true, true, "", false, timer_.get(), -1, "")); + SetupCompiler(compiler_kind, insn_set, std::vector<std::string>(), /*out*/ &error_msg); + jobject class_loader = nullptr; if (kCompile) { TimingLogger timings2("OatTest::WriteRead", false, false); @@ -105,19 +165,7 @@ TEST_F(OatTest, WriteRead) { ScratchFile tmp; SafeMap<std::string, std::string> key_value_store; key_value_store.Put(OatHeader::kImageLocationKey, "lue.art"); - OatWriter oat_writer(class_linker->GetBootClassPath(), - 42U, - 4096U, - 0, - compiler_driver_.get(), - nullptr, - &timings, - &key_value_store); - bool success = compiler_driver_->WriteElf(GetTestAndroidRoot(), - !kIsTargetBuild, - class_linker->GetBootClassPath(), - &oat_writer, - tmp.GetFile()); + bool success = WriteElf(tmp.GetFile(), class_linker->GetBootClassPath(), key_value_store); ASSERT_TRUE(success); if (kCompile) { // OatWriter strips the code, regenerate to compare @@ -212,4 +260,53 @@ TEST_F(OatTest, OatHeaderIsValid) { ASSERT_FALSE(oat_header->IsValid()); } +TEST_F(OatTest, EmptyTextSection) { + TimingLogger timings("OatTest::EmptyTextSection", false, false); + + // TODO: make selectable. + Compiler::Kind compiler_kind = Compiler::kQuick; + InstructionSet insn_set = kRuntimeISA; + if (insn_set == kArm) insn_set = kThumb2; + std::string error_msg; + std::vector<std::string> compiler_options; + compiler_options.push_back("--compiler-filter=verify-at-runtime"); + SetupCompiler(compiler_kind, insn_set, compiler_options, /*out*/ &error_msg); + + jobject class_loader; + { + ScopedObjectAccess soa(Thread::Current()); + class_loader = LoadDex("Main"); + } + ASSERT_TRUE(class_loader != nullptr); + std::vector<const DexFile*> dex_files = GetDexFiles(class_loader); + ASSERT_TRUE(!dex_files.empty()); + + ClassLinker* const class_linker = Runtime::Current()->GetClassLinker(); + for (const DexFile* dex_file : dex_files) { + ScopedObjectAccess soa(Thread::Current()); + class_linker->RegisterDexFile( + *dex_file, + class_linker->GetOrCreateAllocatorForClassLoader( + soa.Decode<mirror::ClassLoader*>(class_loader))); + } + compiler_driver_->SetDexFilesForOatFile(dex_files); + compiler_driver_->CompileAll(class_loader, dex_files, &timings); + + ScratchFile tmp; + SafeMap<std::string, std::string> key_value_store; + key_value_store.Put(OatHeader::kImageLocationKey, "test.art"); + bool success = WriteElf(tmp.GetFile(), dex_files, key_value_store); + ASSERT_TRUE(success); + + std::unique_ptr<OatFile> oat_file(OatFile::Open(tmp.GetFilename(), + tmp.GetFilename(), + nullptr, + nullptr, + false, + nullptr, + &error_msg)); + ASSERT_TRUE(oat_file != nullptr); + EXPECT_LT(static_cast<size_t>(oat_file->Size()), static_cast<size_t>(tmp.GetFile()->GetLength())); +} + } // namespace art diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc index c7b8884214..3f2271ef11 100644 --- a/compiler/oat_writer.cc +++ b/compiler/oat_writer.cc @@ -65,10 +65,12 @@ OatWriter::OatWriter(const std::vector<const DexFile*>& dex_files, int32_t image_patch_delta, const CompilerDriver* compiler, ImageWriter* image_writer, + bool compiling_boot_image, TimingLogger* timings, SafeMap<std::string, std::string>* key_value_store) : compiler_driver_(compiler), image_writer_(image_writer), + compiling_boot_image_(compiling_boot_image), dex_files_(&dex_files), size_(0u), bss_size_(0u), @@ -113,7 +115,9 @@ OatWriter::OatWriter(const std::vector<const DexFile*>& dex_files, size_oat_lookup_table_(0), method_offset_map_() { CHECK(key_value_store != nullptr); - + if (compiling_boot_image) { + CHECK(image_writer != nullptr); + } InstructionSet instruction_set = compiler_driver_->GetInstructionSet(); const InstructionSetFeatures* features = compiler_driver_->GetInstructionSetFeatures(); relative_patcher_ = linker::RelativePatcher::Create(instruction_set, features, @@ -154,7 +158,7 @@ OatWriter::OatWriter(const std::vector<const DexFile*>& dex_files, } size_ = offset; - if (!HasImage()) { + if (!HasBootImage()) { // Allocate space for app dex cache arrays in the .bss section. size_t bss_start = RoundUp(size_, kPageSize); size_t pointer_size = GetInstructionSetPointerSize(instruction_set); @@ -167,9 +171,10 @@ OatWriter::OatWriter(const std::vector<const DexFile*>& dex_files, } CHECK_EQ(dex_files_->size(), oat_dex_files_.size()); - CHECK_EQ(compiler->IsImage(), image_writer_ != nullptr); - CHECK_EQ(compiler->IsImage(), - key_value_store_->find(OatHeader::kImageLocationKey) == key_value_store_->end()); + if (compiling_boot_image_) { + CHECK_EQ(image_writer_ != nullptr, + key_value_store_->find(OatHeader::kImageLocationKey) == key_value_store_->end()); + } CHECK_ALIGNED(image_patch_delta_, kPageSize); } @@ -672,7 +677,7 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor { class_linker_(Runtime::Current()->GetClassLinker()), dex_cache_(nullptr) { patched_code_.reserve(16 * KB); - if (writer_->HasImage()) { + if (writer_->HasBootImage()) { // If we're creating the image, the address space must be ready so that we can apply patches. CHECK(writer_->image_writer_->IsImageAddressSpaceReady()); } @@ -855,7 +860,7 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor { } uint32_t GetDexCacheOffset(const LinkerPatch& patch) SHARED_REQUIRES(Locks::mutator_lock_) { - if (writer_->HasImage()) { + if (writer_->HasBootImage()) { auto* element = writer_->image_writer_->GetDexCacheArrayElementImageAddress<const uint8_t*>( patch.TargetDexCacheDexFile(), patch.TargetDexCacheElementOffset()); const uint8_t* oat_data = writer_->image_writer_->GetOatFileBegin() + file_offset_; @@ -868,7 +873,7 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor { void PatchObjectAddress(std::vector<uint8_t>* code, uint32_t offset, mirror::Object* object) SHARED_REQUIRES(Locks::mutator_lock_) { - if (writer_->HasImage()) { + if (writer_->HasBootImage()) { object = writer_->image_writer_->GetImageAddress(object); } else { // NOTE: We're using linker patches for app->boot references when the image can @@ -888,7 +893,7 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor { void PatchMethodAddress(std::vector<uint8_t>* code, uint32_t offset, ArtMethod* method) SHARED_REQUIRES(Locks::mutator_lock_) { - if (writer_->HasImage()) { + if (writer_->HasBootImage()) { method = writer_->image_writer_->GetImageMethodAddress(method); } else if (kIsDebugBuild) { // NOTE: We're using linker patches for app->boot references when the image can @@ -911,7 +916,7 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor { void PatchCodeAddress(std::vector<uint8_t>* code, uint32_t offset, uint32_t target_offset) SHARED_REQUIRES(Locks::mutator_lock_) { uint32_t address = target_offset; - if (writer_->HasImage()) { + if (writer_->HasBootImage()) { address = PointerToLowMemUInt32(writer_->image_writer_->GetOatFileBegin() + writer_->oat_data_offset_ + target_offset); } @@ -1123,7 +1128,7 @@ size_t OatWriter::InitOatCode(size_t offset) { offset = RoundUp(offset, kPageSize); oat_header_->SetExecutableOffset(offset); size_executable_offset_alignment_ = offset - old_offset; - if (compiler_driver_->IsImage()) { + if (compiler_driver_->IsBootImage()) { CHECK_EQ(image_patch_delta_, 0); InstructionSet instruction_set = compiler_driver_->GetInstructionSet(); @@ -1164,7 +1169,7 @@ size_t OatWriter::InitOatCodeDexFiles(size_t offset) { } while (false) VISIT(InitCodeMethodVisitor); - if (compiler_driver_->IsImage()) { + if (compiler_driver_->IsBootImage()) { VISIT(InitImageMethodVisitor); } @@ -1408,7 +1413,7 @@ size_t OatWriter::WriteMaps(OutputStream* out, const size_t file_offset, size_t } size_t OatWriter::WriteCode(OutputStream* out, const size_t file_offset, size_t relative_offset) { - if (compiler_driver_->IsImage()) { + if (compiler_driver_->IsBootImage()) { InstructionSet instruction_set = compiler_driver_->GetInstructionSet(); #define DO_TRAMPOLINE(field) \ diff --git a/compiler/oat_writer.h b/compiler/oat_writer.h index f2fe048174..7027434cca 100644 --- a/compiler/oat_writer.h +++ b/compiler/oat_writer.h @@ -93,6 +93,7 @@ class OatWriter { int32_t image_patch_delta, const CompilerDriver* compiler, ImageWriter* image_writer, + bool compiling_boot_image, TimingLogger* timings, SafeMap<std::string, std::string>* key_value_store); @@ -103,6 +104,10 @@ class OatWriter { return image_writer_ != nullptr; } + bool HasBootImage() const { + return compiling_boot_image_; + } + const OatHeader& GetOatHeader() const { return *oat_header_; } @@ -279,6 +284,7 @@ class OatWriter { const CompilerDriver* const compiler_driver_; ImageWriter* const image_writer_; + const bool compiling_boot_image_; // note OatFile does not take ownership of the DexFiles const std::vector<const DexFile*>* dex_files_; diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc index bcc32403d3..cca0baf274 100644 --- a/compiler/optimizing/bounds_check_elimination.cc +++ b/compiler/optimizing/bounds_check_elimination.cc @@ -1169,8 +1169,10 @@ class BCEVisitor : public HGraphVisitor { // Return the range resulting from induction variable analysis of "instruction" when the value // is used from "context", for example, an index used from a bounds-check inside a loop body. ValueRange* LookupInductionRange(HInstruction* context, HInstruction* instruction) { - InductionVarRange::Value v1 = induction_range_.GetMinInduction(context, instruction); - InductionVarRange::Value v2 = induction_range_.GetMaxInduction(context, instruction); + InductionVarRange::Value v1; + InductionVarRange::Value v2; + bool needs_finite_test = false; + induction_range_.GetInductionRange(context, instruction, &v1, &v2, &needs_finite_test); if (v1.is_known && (v1.a_constant == 0 || v1.a_constant == 1) && v2.is_known && (v2.a_constant == 0 || v2.a_constant == 1)) { DCHECK(v1.a_constant == 1 || v1.instruction == nullptr); diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc index ed193c7b61..167c35d075 100644 --- a/compiler/optimizing/builder.cc +++ b/compiler/optimizing/builder.cc @@ -359,18 +359,10 @@ void HGraphBuilder::InsertTryBoundaryBlocks(const DexFile::CodeItem& code_item) // need a strategy for splitting exceptional edges. We split the block // after the move-exception (if present) and mark the first part not // throwing. The normal-flow edge between them will be split later. - HInstruction* first_insn = block->GetFirstInstruction(); - if (first_insn->IsLoadException()) { - // Catch block starts with a LoadException. Split the block after - // the StoreLocal and ClearException which must come after the load. - DCHECK(first_insn->GetNext()->IsStoreLocal()); - DCHECK(first_insn->GetNext()->GetNext()->IsClearException()); - throwing_block = block->SplitBefore(first_insn->GetNext()->GetNext()->GetNext()); - } else { - // Catch block does not load the exception. Split at the beginning - // to create an empty catch block. - throwing_block = block->SplitBefore(first_insn); - } + throwing_block = block->SplitCatchBlockAfterMoveException(); + // Move-exception does not throw and the block has throwing insructions + // so it must have been possible to split it. + DCHECK(throwing_block != nullptr); } try_block_info.Put(throwing_block->GetBlockId(), @@ -1006,7 +998,9 @@ bool HGraphBuilder::SetupInvokeArguments(HInvoke* invoke, return false; } - if (invoke->IsInvokeStaticOrDirect()) { + if (invoke->IsInvokeStaticOrDirect() && + HInvokeStaticOrDirect::NeedsCurrentMethodInput( + invoke->AsInvokeStaticOrDirect()->GetMethodLoadKind())) { invoke->SetArgumentAt(*argument_index, graph_->GetCurrentMethod()); (*argument_index)++; } diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index a1bb5e0838..ce92470868 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -42,7 +42,7 @@ #include "compiled_method.h" #include "dex/verified_method.h" -#include "driver/dex_compilation_unit.h" +#include "driver/compiler_driver.h" #include "gc_map_builder.h" #include "graph_visualizer.h" #include "intrinsics.h" @@ -787,9 +787,10 @@ CodeGenerator* CodeGenerator::Create(HGraph* graph, } void CodeGenerator::BuildNativeGCMap( - ArenaVector<uint8_t>* data, const DexCompilationUnit& dex_compilation_unit) const { + ArenaVector<uint8_t>* data, const CompilerDriver& compiler_driver) const { const std::vector<uint8_t>& gc_map_raw = - dex_compilation_unit.GetVerifiedMethod()->GetDexGcMap(); + compiler_driver.GetVerifiedMethod(&GetGraph()->GetDexFile(), GetGraph()->GetMethodIdx()) + ->GetDexGcMap(); verifier::DexPcToReferenceMap dex_gc_map(&(gc_map_raw)[0]); uint32_t max_native_offset = stack_map_stream_.ComputeMaxNativePcOffset(); @@ -911,19 +912,22 @@ void CodeGenerator::BuildVMapTable(ArenaVector<uint8_t>* data) const { vmap_encoder.PushBackUnsigned(VmapTable::kAdjustedFpMarker); } -void CodeGenerator::BuildStackMaps(ArenaVector<uint8_t>* data) { - uint32_t size = stack_map_stream_.PrepareForFillIn(); - data->resize(size); - MemoryRegion region(data->data(), size); +size_t CodeGenerator::ComputeStackMapsSize() { + return stack_map_stream_.PrepareForFillIn(); +} + +void CodeGenerator::BuildStackMaps(MemoryRegion region) { stack_map_stream_.FillIn(region); } void CodeGenerator::RecordNativeDebugInfo(uint32_t dex_pc, uintptr_t native_pc_begin, uintptr_t native_pc_end) { - if (src_map_ != nullptr && dex_pc != kNoDexPc && native_pc_begin != native_pc_end) { - src_map_->push_back(SrcMapElem({static_cast<uint32_t>(native_pc_begin), - static_cast<int32_t>(dex_pc)})); + if (compiler_options_.GetGenerateDebugInfo() && + dex_pc != kNoDexPc && + native_pc_begin != native_pc_end) { + src_map_.push_back(SrcMapElem({static_cast<uint32_t>(native_pc_begin), + static_cast<int32_t>(dex_pc)})); } } diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index 47b6f30450..a92014dc79 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -22,6 +22,7 @@ #include "base/arena_containers.h" #include "base/arena_object.h" #include "base/bit_field.h" +#include "compiled_method.h" #include "driver/compiler_options.h" #include "globals.h" #include "graph_visualizer.h" @@ -51,13 +52,9 @@ static int64_t constexpr kPrimLongMax = INT64_C(0x7fffffffffffffff); class Assembler; class CodeGenerator; -class DexCompilationUnit; +class CompilerDriver; class LinkerPatch; class ParallelMoveResolver; -class SrcMapElem; -template <class Alloc> -class SrcMap; -using DefaultSrcMap = SrcMap<std::allocator<SrcMapElem>>; class CodeAllocator { public: @@ -284,13 +281,12 @@ class CodeGenerator { slow_paths_.push_back(slow_path); } - void SetSrcMap(DefaultSrcMap* src_map) { src_map_ = src_map; } - void BuildMappingTable(ArenaVector<uint8_t>* vector) const; void BuildVMapTable(ArenaVector<uint8_t>* vector) const; void BuildNativeGCMap( - ArenaVector<uint8_t>* vector, const DexCompilationUnit& dex_compilation_unit) const; - void BuildStackMaps(ArenaVector<uint8_t>* vector); + ArenaVector<uint8_t>* vector, const CompilerDriver& compiler_driver) const; + void BuildStackMaps(MemoryRegion region); + size_t ComputeStackMapsSize(); bool IsBaseline() const { return is_baseline_; @@ -446,6 +442,10 @@ class CodeGenerator { // Copy the result of a call into the given target. virtual void MoveFromReturnRegister(Location trg, Primitive::Type type) = 0; + const ArenaVector<SrcMapElem>& GetSrcMappingTable() const { + return src_map_; + } + protected: // Method patch info used for recording locations of required linker patches and // target methods. The target method can be used for various purposes, whether for @@ -488,7 +488,7 @@ class CodeGenerator { stats_(stats), graph_(graph), compiler_options_(compiler_options), - src_map_(nullptr), + src_map_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), slow_paths_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), current_block_index_(0), is_leaf_(true), @@ -602,7 +602,7 @@ class CodeGenerator { const CompilerOptions& compiler_options_; // Native to dex_pc map used for native debugging/profiling tools. - DefaultSrcMap* src_map_; + ArenaVector<SrcMapElem> src_map_; ArenaVector<SlowPathCode*> slow_paths_; // The current block index in `block_order_` of the block diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index 3dc3b7fba0..6d05293277 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -1300,20 +1300,29 @@ void InstructionCodeGeneratorARM::GenerateTestAndBranch(HInstruction* instructio DCHECK_EQ(cond_value, 0); } } else { - if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) { - // Condition has been materialized, compare the output to 0 + // Can we optimize the jump if we know that the next block is the true case? + HCondition* condition = cond->AsCondition(); + bool can_jump_to_false = CanReverseCondition(always_true_target, false_target, condition); + if (condition == nullptr || condition->NeedsMaterialization()) { + // Condition has been materialized, compare the output to 0. DCHECK(instruction->GetLocations()->InAt(0).IsRegister()); + if (can_jump_to_false) { + __ CompareAndBranchIfZero(instruction->GetLocations()->InAt(0).AsRegister<Register>(), + false_target); + return; + } __ CompareAndBranchIfNonZero(instruction->GetLocations()->InAt(0).AsRegister<Register>(), true_target); } else { // Condition has not been materialized, use its inputs as the // comparison and its condition as the branch condition. - Primitive::Type type = - cond->IsCondition() ? cond->InputAt(0)->GetType() : Primitive::kPrimInt; + Primitive::Type type = (condition != nullptr) + ? cond->InputAt(0)->GetType() + : Primitive::kPrimInt; // Is this a long or FP comparison that has been folded into the HCondition? if (type == Primitive::kPrimLong || Primitive::IsFloatingPointType(type)) { // Generate the comparison directly. - GenerateCompareTestAndBranch(instruction->AsIf(), cond->AsCondition(), + GenerateCompareTestAndBranch(instruction->AsIf(), condition, true_target, false_target, always_true_target); return; } @@ -1328,7 +1337,12 @@ void InstructionCodeGeneratorARM::GenerateTestAndBranch(HInstruction* instructio DCHECK(right.IsConstant()); GenerateCompareWithImmediate(left, CodeGenerator::GetInt32ValueOf(right.GetConstant())); } - __ b(true_target, ARMCondition(cond->AsCondition()->GetCondition())); + if (can_jump_to_false) { + __ b(false_target, ARMCondition(condition->GetOppositeCondition())); + return; + } + + __ b(true_target, ARMCondition(condition->GetCondition())); } } if (false_target != nullptr) { diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc index 8106499c02..959adb4238 100644 --- a/compiler/optimizing/code_generator_mips.cc +++ b/compiler/optimizing/code_generator_mips.cc @@ -40,12 +40,8 @@ static constexpr int kCurrentMethodStackOffset = 0; static constexpr Register kMethodRegisterArgument = A0; // We need extra temporary/scratch registers (in addition to AT) in some cases. -static constexpr Register TMP = T8; static constexpr FRegister FTMP = F8; -// ART Thread Register. -static constexpr Register TR = S1; - Location MipsReturnLocation(Primitive::Type return_type) { switch (return_type) { case Primitive::kPrimBoolean: diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc index 55efd5f9de..5e81c5f648 100644 --- a/compiler/optimizing/code_generator_mips64.cc +++ b/compiler/optimizing/code_generator_mips64.cc @@ -16,13 +16,13 @@ #include "code_generator_mips64.h" +#include "art_method.h" +#include "code_generator_utils.h" #include "entrypoints/quick/quick_entrypoints.h" #include "entrypoints/quick/quick_entrypoints_enum.h" #include "gc/accounting/card_table.h" #include "intrinsics.h" #include "intrinsics_mips64.h" -#include "art_method.h" -#include "code_generator_utils.h" #include "mirror/array-inl.h" #include "mirror/class-inl.h" #include "offsets.h" @@ -666,9 +666,19 @@ void CodeGeneratorMIPS64::MoveLocation(Location destination, gpr = destination.AsRegister<GpuRegister>(); } if (dst_type == Primitive::kPrimInt || dst_type == Primitive::kPrimFloat) { - __ LoadConst32(gpr, GetInt32ValueOf(source.GetConstant()->AsConstant())); + int32_t value = GetInt32ValueOf(source.GetConstant()->AsConstant()); + if (Primitive::IsFloatingPointType(dst_type) && value == 0) { + gpr = ZERO; + } else { + __ LoadConst32(gpr, value); + } } else { - __ LoadConst64(gpr, GetInt64ValueOf(source.GetConstant()->AsConstant())); + int64_t value = GetInt64ValueOf(source.GetConstant()->AsConstant()); + if (Primitive::IsFloatingPointType(dst_type) && value == 0) { + gpr = ZERO; + } else { + __ LoadConst64(gpr, value); + } } if (dst_type == Primitive::kPrimFloat) { __ Mtc1(gpr, destination.AsFpuRegister<FpuRegister>()); @@ -734,12 +744,22 @@ void CodeGeneratorMIPS64::MoveLocation(Location destination, // Move to stack from constant HConstant* src_cst = source.GetConstant(); StoreOperandType store_type = destination.IsStackSlot() ? kStoreWord : kStoreDoubleword; + GpuRegister gpr = ZERO; if (destination.IsStackSlot()) { - __ LoadConst32(TMP, GetInt32ValueOf(src_cst->AsConstant())); + int32_t value = GetInt32ValueOf(src_cst->AsConstant()); + if (value != 0) { + gpr = TMP; + __ LoadConst32(gpr, value); + } } else { - __ LoadConst64(TMP, GetInt64ValueOf(src_cst->AsConstant())); + DCHECK(destination.IsDoubleStackSlot()); + int64_t value = GetInt64ValueOf(src_cst->AsConstant()); + if (value != 0) { + gpr = TMP; + __ LoadConst64(gpr, value); + } } - __ StoreToOffset(store_type, TMP, SP, destination.GetStackIndex()); + __ StoreToOffset(store_type, gpr, SP, destination.GetStackIndex()); } else { DCHECK(source.IsStackSlot() || source.IsDoubleStackSlot()); DCHECK_EQ(source.IsDoubleStackSlot(), destination.IsDoubleStackSlot()); @@ -755,9 +775,7 @@ void CodeGeneratorMIPS64::MoveLocation(Location destination, } } -void CodeGeneratorMIPS64::SwapLocations(Location loc1, - Location loc2, - Primitive::Type type ATTRIBUTE_UNUSED) { +void CodeGeneratorMIPS64::SwapLocations(Location loc1, Location loc2, Primitive::Type type) { DCHECK(!loc1.IsConstant()); DCHECK(!loc2.IsConstant()); @@ -781,12 +799,16 @@ void CodeGeneratorMIPS64::SwapLocations(Location loc1, // Swap 2 FPRs FpuRegister r1 = loc1.AsFpuRegister<FpuRegister>(); FpuRegister r2 = loc2.AsFpuRegister<FpuRegister>(); - // TODO: Can MOV.S/MOV.D be used here to save one instruction? - // Need to distinguish float from double, right? - __ Dmfc1(TMP, r2); - __ Dmfc1(AT, r1); - __ Dmtc1(TMP, r1); - __ Dmtc1(AT, r2); + if (type == Primitive::kPrimFloat) { + __ MovS(FTMP, r1); + __ MovS(r1, r2); + __ MovS(r2, FTMP); + } else { + DCHECK_EQ(type, Primitive::kPrimDouble); + __ MovD(FTMP, r1); + __ MovD(r1, r2); + __ MovD(r2, FTMP); + } } else if (is_slot1 != is_slot2) { // Swap GPR/FPR and stack slot Location reg_loc = is_slot1 ? loc2 : loc1; @@ -800,7 +822,6 @@ void CodeGeneratorMIPS64::SwapLocations(Location loc1, reg_loc.AsFpuRegister<FpuRegister>(), SP, mem_loc.GetStackIndex()); - // TODO: review this MTC1/DMTC1 move if (mem_loc.IsStackSlot()) { __ Mtc1(TMP, reg_loc.AsFpuRegister<FpuRegister>()); } else { @@ -845,12 +866,22 @@ void CodeGeneratorMIPS64::Move(HInstruction* instruction, } else { DCHECK(location.IsStackSlot() || location.IsDoubleStackSlot()); // Move to stack from constant + GpuRegister gpr = ZERO; if (location.IsStackSlot()) { - __ LoadConst32(TMP, GetInt32ValueOf(instruction->AsConstant())); - __ StoreToOffset(kStoreWord, TMP, SP, location.GetStackIndex()); + int32_t value = GetInt32ValueOf(instruction->AsConstant()); + if (value != 0) { + gpr = TMP; + __ LoadConst32(gpr, value); + } + __ StoreToOffset(kStoreWord, gpr, SP, location.GetStackIndex()); } else { - __ LoadConst64(TMP, instruction->AsLongConstant()->GetValue()); - __ StoreToOffset(kStoreDoubleword, TMP, SP, location.GetStackIndex()); + DCHECK(location.IsDoubleStackSlot()); + int64_t value = instruction->AsLongConstant()->GetValue(); + if (value != 0) { + gpr = TMP; + __ LoadConst64(gpr, value); + } + __ StoreToOffset(kStoreDoubleword, gpr, SP, location.GetStackIndex()); } } } else if (instruction->IsTemporary()) { @@ -1198,7 +1229,7 @@ void LocationsBuilderMIPS64::HandleShift(HBinaryOperation* instr) { case Primitive::kPrimLong: { locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(instr->InputAt(1))); - locations->SetOut(Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); break; } default: @@ -1707,7 +1738,7 @@ void LocationsBuilderMIPS64::VisitCompare(HCompare* compare) { switch (in_type) { case Primitive::kPrimLong: locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(compare->InputAt(1))); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); break; @@ -1736,8 +1767,18 @@ void InstructionCodeGeneratorMIPS64::VisitCompare(HCompare* instruction) { case Primitive::kPrimLong: { GpuRegister dst = locations->Out().AsRegister<GpuRegister>(); GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>(); - GpuRegister rhs = locations->InAt(1).AsRegister<GpuRegister>(); - // TODO: more efficient (direct) comparison with a constant + Location rhs_location = locations->InAt(1); + bool use_imm = rhs_location.IsConstant(); + GpuRegister rhs = ZERO; + if (use_imm) { + int64_t value = CodeGenerator::GetInt64ValueOf(rhs_location.GetConstant()->AsConstant()); + if (value != 0) { + rhs = AT; + __ LoadConst64(rhs, value); + } + } else { + rhs = rhs_location.AsRegister<GpuRegister>(); + } __ Slt(TMP, lhs, rhs); __ Slt(dst, rhs, lhs); __ Subu(dst, dst, TMP); @@ -1902,6 +1943,252 @@ void InstructionCodeGeneratorMIPS64::VisitCondition(HCondition* instruction) { } } +void InstructionCodeGeneratorMIPS64::DivRemOneOrMinusOne(HBinaryOperation* instruction) { + DCHECK(instruction->IsDiv() || instruction->IsRem()); + Primitive::Type type = instruction->GetResultType(); + + LocationSummary* locations = instruction->GetLocations(); + Location second = locations->InAt(1); + DCHECK(second.IsConstant()); + + GpuRegister out = locations->Out().AsRegister<GpuRegister>(); + GpuRegister dividend = locations->InAt(0).AsRegister<GpuRegister>(); + int64_t imm = Int64FromConstant(second.GetConstant()); + DCHECK(imm == 1 || imm == -1); + + if (instruction->IsRem()) { + __ Move(out, ZERO); + } else { + if (imm == -1) { + if (type == Primitive::kPrimInt) { + __ Subu(out, ZERO, dividend); + } else { + DCHECK_EQ(type, Primitive::kPrimLong); + __ Dsubu(out, ZERO, dividend); + } + } else if (out != dividend) { + __ Move(out, dividend); + } + } +} + +void InstructionCodeGeneratorMIPS64::DivRemByPowerOfTwo(HBinaryOperation* instruction) { + DCHECK(instruction->IsDiv() || instruction->IsRem()); + Primitive::Type type = instruction->GetResultType(); + + LocationSummary* locations = instruction->GetLocations(); + Location second = locations->InAt(1); + DCHECK(second.IsConstant()); + + GpuRegister out = locations->Out().AsRegister<GpuRegister>(); + GpuRegister dividend = locations->InAt(0).AsRegister<GpuRegister>(); + int64_t imm = Int64FromConstant(second.GetConstant()); + uint64_t abs_imm = static_cast<uint64_t>(std::abs(imm)); + DCHECK(IsPowerOfTwo(abs_imm)); + int ctz_imm = CTZ(abs_imm); + + if (instruction->IsDiv()) { + if (type == Primitive::kPrimInt) { + if (ctz_imm == 1) { + // Fast path for division by +/-2, which is very common. + __ Srl(TMP, dividend, 31); + } else { + __ Sra(TMP, dividend, 31); + __ Srl(TMP, TMP, 32 - ctz_imm); + } + __ Addu(out, dividend, TMP); + __ Sra(out, out, ctz_imm); + if (imm < 0) { + __ Subu(out, ZERO, out); + } + } else { + DCHECK_EQ(type, Primitive::kPrimLong); + if (ctz_imm == 1) { + // Fast path for division by +/-2, which is very common. + __ Dsrl32(TMP, dividend, 31); + } else { + __ Dsra32(TMP, dividend, 31); + if (ctz_imm > 32) { + __ Dsrl(TMP, TMP, 64 - ctz_imm); + } else { + __ Dsrl32(TMP, TMP, 32 - ctz_imm); + } + } + __ Daddu(out, dividend, TMP); + if (ctz_imm < 32) { + __ Dsra(out, out, ctz_imm); + } else { + __ Dsra32(out, out, ctz_imm - 32); + } + if (imm < 0) { + __ Dsubu(out, ZERO, out); + } + } + } else { + if (type == Primitive::kPrimInt) { + if (ctz_imm == 1) { + // Fast path for modulo +/-2, which is very common. + __ Sra(TMP, dividend, 31); + __ Subu(out, dividend, TMP); + __ Andi(out, out, 1); + __ Addu(out, out, TMP); + } else { + __ Sra(TMP, dividend, 31); + __ Srl(TMP, TMP, 32 - ctz_imm); + __ Addu(out, dividend, TMP); + if (IsUint<16>(abs_imm - 1)) { + __ Andi(out, out, abs_imm - 1); + } else { + __ Sll(out, out, 32 - ctz_imm); + __ Srl(out, out, 32 - ctz_imm); + } + __ Subu(out, out, TMP); + } + } else { + DCHECK_EQ(type, Primitive::kPrimLong); + if (ctz_imm == 1) { + // Fast path for modulo +/-2, which is very common. + __ Dsra32(TMP, dividend, 31); + __ Dsubu(out, dividend, TMP); + __ Andi(out, out, 1); + __ Daddu(out, out, TMP); + } else { + __ Dsra32(TMP, dividend, 31); + if (ctz_imm > 32) { + __ Dsrl(TMP, TMP, 64 - ctz_imm); + } else { + __ Dsrl32(TMP, TMP, 32 - ctz_imm); + } + __ Daddu(out, dividend, TMP); + if (IsUint<16>(abs_imm - 1)) { + __ Andi(out, out, abs_imm - 1); + } else { + if (ctz_imm > 32) { + __ Dsll(out, out, 64 - ctz_imm); + __ Dsrl(out, out, 64 - ctz_imm); + } else { + __ Dsll32(out, out, 32 - ctz_imm); + __ Dsrl32(out, out, 32 - ctz_imm); + } + } + __ Dsubu(out, out, TMP); + } + } + } +} + +void InstructionCodeGeneratorMIPS64::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) { + DCHECK(instruction->IsDiv() || instruction->IsRem()); + + LocationSummary* locations = instruction->GetLocations(); + Location second = locations->InAt(1); + DCHECK(second.IsConstant()); + + GpuRegister out = locations->Out().AsRegister<GpuRegister>(); + GpuRegister dividend = locations->InAt(0).AsRegister<GpuRegister>(); + int64_t imm = Int64FromConstant(second.GetConstant()); + + Primitive::Type type = instruction->GetResultType(); + DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong) << type; + + int64_t magic; + int shift; + CalculateMagicAndShiftForDivRem(imm, + (type == Primitive::kPrimLong), + &magic, + &shift); + + if (type == Primitive::kPrimInt) { + __ LoadConst32(TMP, magic); + __ MuhR6(TMP, dividend, TMP); + + if (imm > 0 && magic < 0) { + __ Addu(TMP, TMP, dividend); + } else if (imm < 0 && magic > 0) { + __ Subu(TMP, TMP, dividend); + } + + if (shift != 0) { + __ Sra(TMP, TMP, shift); + } + + if (instruction->IsDiv()) { + __ Sra(out, TMP, 31); + __ Subu(out, TMP, out); + } else { + __ Sra(AT, TMP, 31); + __ Subu(AT, TMP, AT); + __ LoadConst32(TMP, imm); + __ MulR6(TMP, AT, TMP); + __ Subu(out, dividend, TMP); + } + } else { + __ LoadConst64(TMP, magic); + __ Dmuh(TMP, dividend, TMP); + + if (imm > 0 && magic < 0) { + __ Daddu(TMP, TMP, dividend); + } else if (imm < 0 && magic > 0) { + __ Dsubu(TMP, TMP, dividend); + } + + if (shift >= 32) { + __ Dsra32(TMP, TMP, shift - 32); + } else if (shift > 0) { + __ Dsra(TMP, TMP, shift); + } + + if (instruction->IsDiv()) { + __ Dsra32(out, TMP, 31); + __ Dsubu(out, TMP, out); + } else { + __ Dsra32(AT, TMP, 31); + __ Dsubu(AT, TMP, AT); + __ LoadConst64(TMP, imm); + __ Dmul(TMP, AT, TMP); + __ Dsubu(out, dividend, TMP); + } + } +} + +void InstructionCodeGeneratorMIPS64::GenerateDivRemIntegral(HBinaryOperation* instruction) { + DCHECK(instruction->IsDiv() || instruction->IsRem()); + Primitive::Type type = instruction->GetResultType(); + DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong) << type; + + LocationSummary* locations = instruction->GetLocations(); + GpuRegister out = locations->Out().AsRegister<GpuRegister>(); + Location second = locations->InAt(1); + + if (second.IsConstant()) { + int64_t imm = Int64FromConstant(second.GetConstant()); + if (imm == 0) { + // Do not generate anything. DivZeroCheck would prevent any code to be executed. + } else if (imm == 1 || imm == -1) { + DivRemOneOrMinusOne(instruction); + } else if (IsPowerOfTwo(std::abs(imm))) { + DivRemByPowerOfTwo(instruction); + } else { + DCHECK(imm <= -2 || imm >= 2); + GenerateDivRemWithAnyConstant(instruction); + } + } else { + GpuRegister dividend = locations->InAt(0).AsRegister<GpuRegister>(); + GpuRegister divisor = second.AsRegister<GpuRegister>(); + if (instruction->IsDiv()) { + if (type == Primitive::kPrimInt) + __ DivR6(out, dividend, divisor); + else + __ Ddiv(out, dividend, divisor); + } else { + if (type == Primitive::kPrimInt) + __ ModR6(out, dividend, divisor); + else + __ Dmod(out, dividend, divisor); + } + } +} + void LocationsBuilderMIPS64::VisitDiv(HDiv* div) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(div, LocationSummary::kNoCall); @@ -1909,7 +2196,7 @@ void LocationsBuilderMIPS64::VisitDiv(HDiv* div) { case Primitive::kPrimInt: case Primitive::kPrimLong: locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1))); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); break; @@ -1931,16 +2218,9 @@ void InstructionCodeGeneratorMIPS64::VisitDiv(HDiv* instruction) { switch (type) { case Primitive::kPrimInt: - case Primitive::kPrimLong: { - GpuRegister dst = locations->Out().AsRegister<GpuRegister>(); - GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>(); - GpuRegister rhs = locations->InAt(1).AsRegister<GpuRegister>(); - if (type == Primitive::kPrimInt) - __ DivR6(dst, lhs, rhs); - else - __ Ddiv(dst, lhs, rhs); + case Primitive::kPrimLong: + GenerateDivRemIntegral(instruction); break; - } case Primitive::kPrimFloat: case Primitive::kPrimDouble: { FpuRegister dst = locations->Out().AsFpuRegister<FpuRegister>(); @@ -2659,14 +2939,10 @@ void InstructionCodeGeneratorMIPS64::VisitInvokeStaticOrDirect(HInvokeStaticOrDi codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); } -void InstructionCodeGeneratorMIPS64::VisitInvokeVirtual(HInvokeVirtual* invoke) { - if (TryGenerateIntrinsicCode(invoke, codegen_)) { - return; - } - +void CodeGeneratorMIPS64::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp_location) { LocationSummary* locations = invoke->GetLocations(); Location receiver = locations->InAt(0); - GpuRegister temp = invoke->GetLocations()->GetTemp(0).AsRegister<GpuRegister>(); + GpuRegister temp = temp_location.AsRegister<GpuRegister>(); size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset( invoke->GetVTableIndex(), kMips64PointerSize).SizeValue(); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); @@ -2675,13 +2951,21 @@ void InstructionCodeGeneratorMIPS64::VisitInvokeVirtual(HInvokeVirtual* invoke) // temp = object->GetClass(); DCHECK(receiver.IsRegister()); __ LoadFromOffset(kLoadUnsignedWord, temp, receiver.AsRegister<GpuRegister>(), class_offset); - codegen_->MaybeRecordImplicitNullCheck(invoke); + MaybeRecordImplicitNullCheck(invoke); // temp = temp->GetMethodAt(method_offset); __ LoadFromOffset(kLoadDoubleword, temp, temp, method_offset); // T9 = temp->GetEntryPoint(); __ LoadFromOffset(kLoadDoubleword, T9, temp, entry_point.Int32Value()); // T9(); __ Jalr(T9); +} + +void InstructionCodeGeneratorMIPS64::VisitInvokeVirtual(HInvokeVirtual* invoke) { + if (TryGenerateIntrinsicCode(invoke, codegen_)) { + return; + } + + codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0)); DCHECK(!codegen_->IsLeafMethod()); codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); } @@ -3108,7 +3392,7 @@ void LocationsBuilderMIPS64::VisitRem(HRem* rem) { case Primitive::kPrimInt: case Primitive::kPrimLong: locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1))); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); break; @@ -3128,20 +3412,12 @@ void LocationsBuilderMIPS64::VisitRem(HRem* rem) { void InstructionCodeGeneratorMIPS64::VisitRem(HRem* instruction) { Primitive::Type type = instruction->GetType(); - LocationSummary* locations = instruction->GetLocations(); switch (type) { case Primitive::kPrimInt: - case Primitive::kPrimLong: { - GpuRegister dst = locations->Out().AsRegister<GpuRegister>(); - GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>(); - GpuRegister rhs = locations->InAt(1).AsRegister<GpuRegister>(); - if (type == Primitive::kPrimInt) - __ ModR6(dst, lhs, rhs); - else - __ Dmod(dst, lhs, rhs); + case Primitive::kPrimLong: + GenerateDivRemIntegral(instruction); break; - } case Primitive::kPrimFloat: case Primitive::kPrimDouble: { diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h index 9bbd02759a..58c6e0fa83 100644 --- a/compiler/optimizing/code_generator_mips64.h +++ b/compiler/optimizing/code_generator_mips64.h @@ -230,6 +230,10 @@ class InstructionCodeGeneratorMIPS64 : public HGraphVisitor { Label* true_target, Label* false_target, Label* always_true_target); + void DivRemOneOrMinusOne(HBinaryOperation* instruction); + void DivRemByPowerOfTwo(HBinaryOperation* instruction); + void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction); + void GenerateDivRemIntegral(HBinaryOperation* instruction); void HandleGoto(HInstruction* got, HBasicBlock* successor); Mips64Assembler* const assembler_; @@ -333,10 +337,7 @@ class CodeGeneratorMIPS64 : public CodeGenerator { MethodReference target_method) OVERRIDE; void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) OVERRIDE; - void GenerateVirtualCall(HInvokeVirtual* invoke ATTRIBUTE_UNUSED, - Location temp ATTRIBUTE_UNUSED) OVERRIDE { - UNIMPLEMENTED(FATAL); - } + void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE; void MoveFromReturnRegister(Location trg ATTRIBUTE_UNUSED, Primitive::Type type ATTRIBUTE_UNUSED) OVERRIDE { diff --git a/compiler/optimizing/code_generator_utils.cc b/compiler/optimizing/code_generator_utils.cc index 921c1d86c2..bf354e7ee2 100644 --- a/compiler/optimizing/code_generator_utils.cc +++ b/compiler/optimizing/code_generator_utils.cc @@ -15,6 +15,7 @@ */ #include "code_generator_utils.h" +#include "nodes.h" #include "base/logging.h" @@ -94,4 +95,19 @@ void CalculateMagicAndShiftForDivRem(int64_t divisor, bool is_long, *shift = is_long ? p - 64 : p - 32; } +// Is it valid to reverse the condition? Uses the values supplied to +// GenerateTestAndBranch() in instruction generators. +bool CanReverseCondition(Label* always_true_target, + Label* false_target, + HCondition* condition) { + // 'always_true_target' is null when the 'true' path is to the next + // block to be generated. Check the type of the condition to ensure that + // FP conditions are not swapped. This is for future fusing of HCompare and + // HCondition. + // Note: If the condition is nullptr, then it is always okay to reverse. + return always_true_target == nullptr && false_target != nullptr && + (condition == nullptr || + !Primitive::IsFloatingPointType(condition->InputAt(0)->GetType())); +} + } // namespace art diff --git a/compiler/optimizing/code_generator_utils.h b/compiler/optimizing/code_generator_utils.h index 59b495c2c9..628eee8885 100644 --- a/compiler/optimizing/code_generator_utils.h +++ b/compiler/optimizing/code_generator_utils.h @@ -21,10 +21,19 @@ namespace art { +class Label; +class HCondition; + // Computes the magic number and the shift needed in the div/rem by constant algorithm, as out // arguments `magic` and `shift` void CalculateMagicAndShiftForDivRem(int64_t divisor, bool is_long, int64_t* magic, int* shift); +// Is it valid to reverse the condition? Uses the values supplied to +// GenerateTestAndBranch() in instruction generators. +bool CanReverseCondition(Label* always_true_target, + Label* false_target, + HCondition* condition); + } // namespace art #endif // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_UTILS_H_ diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 0df7e3b30a..8308d9ee20 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -1216,16 +1216,21 @@ void InstructionCodeGeneratorX86::GenerateTestAndBranch(HInstruction* instructio DCHECK_EQ(cond_value, 0); } } else { + HCondition* condition = cond->AsCondition(); bool is_materialized = - !cond->IsCondition() || cond->AsCondition()->NeedsMaterialization(); + condition == nullptr || condition->NeedsMaterialization(); // Moves do not affect the eflags register, so if the condition is // evaluated just before the if, we don't need to evaluate it // again. We can't use the eflags on long/FP conditions if they are // materialized due to the complex branching. - Primitive::Type type = cond->IsCondition() ? cond->InputAt(0)->GetType() : Primitive::kPrimInt; - bool eflags_set = cond->IsCondition() - && cond->AsCondition()->IsBeforeWhenDisregardMoves(instruction) + Primitive::Type type = (condition != nullptr) + ? cond->InputAt(0)->GetType() + : Primitive::kPrimInt; + bool eflags_set = condition != nullptr + && condition->IsBeforeWhenDisregardMoves(instruction) && (type != Primitive::kPrimLong && !Primitive::IsFloatingPointType(type)); + // Can we optimize the jump if we know that the next block is the true case? + bool can_jump_to_false = CanReverseCondition(always_true_target, false_target, condition); if (is_materialized) { if (!eflags_set) { // Materialized condition, compare against 0. @@ -1235,9 +1240,17 @@ void InstructionCodeGeneratorX86::GenerateTestAndBranch(HInstruction* instructio } else { __ cmpl(Address(ESP, lhs.GetStackIndex()), Immediate(0)); } + if (can_jump_to_false) { + __ j(kEqual, false_target); + return; + } __ j(kNotEqual, true_target); } else { - __ j(X86Condition(cond->AsCondition()->GetCondition()), true_target); + if (can_jump_to_false) { + __ j(X86Condition(condition->GetOppositeCondition()), false_target); + return; + } + __ j(X86Condition(condition->GetCondition()), true_target); } } else { // Condition has not been materialized, use its inputs as the @@ -1247,7 +1260,7 @@ void InstructionCodeGeneratorX86::GenerateTestAndBranch(HInstruction* instructio if (type == Primitive::kPrimLong || Primitive::IsFloatingPointType(type)) { // Generate the comparison directly. GenerateCompareTestAndBranch(instruction->AsIf(), - cond->AsCondition(), + condition, true_target, false_target, always_true_target); @@ -1270,7 +1283,13 @@ void InstructionCodeGeneratorX86::GenerateTestAndBranch(HInstruction* instructio } else { __ cmpl(lhs.AsRegister<Register>(), Address(ESP, rhs.GetStackIndex())); } - __ j(X86Condition(cond->AsCondition()->GetCondition()), true_target); + + if (can_jump_to_false) { + __ j(X86Condition(condition->GetOppositeCondition()), false_target); + return; + } + + __ j(X86Condition(condition->GetCondition()), true_target); } } if (false_target != nullptr) { @@ -4043,16 +4062,16 @@ void LocationsBuilderX86::HandleFieldSet(HInstruction* instruction, const FieldI // Ensure the value is in a byte register. locations->SetInAt(1, Location::RegisterLocation(EAX)); } else if (Primitive::IsFloatingPointType(field_type)) { - locations->SetInAt(1, Location::RequiresFpuRegister()); - } else { + if (is_volatile && field_type == Primitive::kPrimDouble) { + // In order to satisfy the semantics of volatile, this must be a single instruction store. + locations->SetInAt(1, Location::RequiresFpuRegister()); + } else { + locations->SetInAt(1, Location::FpuRegisterOrConstant(instruction->InputAt(1))); + } + } else if (is_volatile && field_type == Primitive::kPrimLong) { + // In order to satisfy the semantics of volatile, this must be a single instruction store. locations->SetInAt(1, Location::RequiresRegister()); - } - if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) { - // Temporary registers for the write barrier. - locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too. - // Ensure the card is in a byte register. - locations->AddTemp(Location::RegisterLocation(ECX)); - } else if (is_volatile && (field_type == Primitive::kPrimLong)) { + // 64bits value can be atomically written to an address with movsd and an XMM register. // We need two XMM registers because there's no easier way to (bit) copy a register pair // into a single XMM register (we copy each pair part into the XMMs and then interleave them). @@ -4060,6 +4079,15 @@ void LocationsBuilderX86::HandleFieldSet(HInstruction* instruction, const FieldI // isolated cases when we need this it isn't worth adding the extra complexity. locations->AddTemp(Location::RequiresFpuRegister()); locations->AddTemp(Location::RequiresFpuRegister()); + } else { + locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); + + if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) { + // Temporary registers for the write barrier. + locations->AddTemp(Location::RequiresRegister()); // May be used for reference poisoning too. + // Ensure the card is in a byte register. + locations->AddTemp(Location::RegisterLocation(ECX)); + } } } @@ -4081,6 +4109,8 @@ void InstructionCodeGeneratorX86::HandleFieldSet(HInstruction* instruction, GenerateMemoryBarrier(MemBarrierKind::kAnyStore); } + bool maybe_record_implicit_null_check_done = false; + switch (field_type) { case Primitive::kPrimBoolean: case Primitive::kPrimByte: { @@ -4090,7 +4120,12 @@ void InstructionCodeGeneratorX86::HandleFieldSet(HInstruction* instruction, case Primitive::kPrimShort: case Primitive::kPrimChar: { - __ movw(Address(base, offset), value.AsRegister<Register>()); + if (value.IsConstant()) { + int16_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant()); + __ movw(Address(base, offset), Immediate(v)); + } else { + __ movw(Address(base, offset), value.AsRegister<Register>()); + } break; } @@ -4105,6 +4140,9 @@ void InstructionCodeGeneratorX86::HandleFieldSet(HInstruction* instruction, __ movl(temp, value.AsRegister<Register>()); __ PoisonHeapReference(temp); __ movl(Address(base, offset), temp); + } else if (value.IsConstant()) { + int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant()); + __ movl(Address(base, offset), Immediate(v)); } else { __ movl(Address(base, offset), value.AsRegister<Register>()); } @@ -4120,21 +4158,40 @@ void InstructionCodeGeneratorX86::HandleFieldSet(HInstruction* instruction, __ punpckldq(temp1, temp2); __ movsd(Address(base, offset), temp1); codegen_->MaybeRecordImplicitNullCheck(instruction); + } else if (value.IsConstant()) { + int64_t v = CodeGenerator::GetInt64ValueOf(value.GetConstant()); + __ movl(Address(base, offset), Immediate(Low32Bits(v))); + codegen_->MaybeRecordImplicitNullCheck(instruction); + __ movl(Address(base, kX86WordSize + offset), Immediate(High32Bits(v))); } else { __ movl(Address(base, offset), value.AsRegisterPairLow<Register>()); codegen_->MaybeRecordImplicitNullCheck(instruction); __ movl(Address(base, kX86WordSize + offset), value.AsRegisterPairHigh<Register>()); } + maybe_record_implicit_null_check_done = true; break; } case Primitive::kPrimFloat: { - __ movss(Address(base, offset), value.AsFpuRegister<XmmRegister>()); + if (value.IsConstant()) { + int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant()); + __ movl(Address(base, offset), Immediate(v)); + } else { + __ movss(Address(base, offset), value.AsFpuRegister<XmmRegister>()); + } break; } case Primitive::kPrimDouble: { - __ movsd(Address(base, offset), value.AsFpuRegister<XmmRegister>()); + if (value.IsConstant()) { + int64_t v = CodeGenerator::GetInt64ValueOf(value.GetConstant()); + __ movl(Address(base, offset), Immediate(Low32Bits(v))); + codegen_->MaybeRecordImplicitNullCheck(instruction); + __ movl(Address(base, kX86WordSize + offset), Immediate(High32Bits(v))); + maybe_record_implicit_null_check_done = true; + } else { + __ movsd(Address(base, offset), value.AsFpuRegister<XmmRegister>()); + } break; } @@ -4143,8 +4200,7 @@ void InstructionCodeGeneratorX86::HandleFieldSet(HInstruction* instruction, UNREACHABLE(); } - // Longs are handled in the switch. - if (field_type != Primitive::kPrimLong) { + if (!maybe_record_implicit_null_check_done) { codegen_->MaybeRecordImplicitNullCheck(instruction); } @@ -4481,7 +4537,7 @@ void LocationsBuilderX86::VisitArraySet(HArraySet* instruction) { // Ensure the value is in a byte register. locations->SetInAt(2, Location::ByteRegisterOrConstant(EAX, instruction->InputAt(2))); } else if (Primitive::IsFloatingPointType(value_type)) { - locations->SetInAt(2, Location::RequiresFpuRegister()); + locations->SetInAt(2, Location::FpuRegisterOrConstant(instruction->InputAt(2))); } else { locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2))); } @@ -4667,8 +4723,14 @@ void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) { Address address = index.IsConstant() ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + offset) : Address(array, index.AsRegister<Register>(), TIMES_4, offset); - DCHECK(value.IsFpuRegister()); - __ movss(address, value.AsFpuRegister<XmmRegister>()); + if (value.IsFpuRegister()) { + __ movss(address, value.AsFpuRegister<XmmRegister>()); + } else { + DCHECK(value.IsConstant()); + int32_t v = bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue()); + __ movl(address, Immediate(v)); + } + codegen_->MaybeRecordImplicitNullCheck(instruction); break; } @@ -4677,8 +4739,19 @@ void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) { Address address = index.IsConstant() ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + offset) : Address(array, index.AsRegister<Register>(), TIMES_8, offset); - DCHECK(value.IsFpuRegister()); - __ movsd(address, value.AsFpuRegister<XmmRegister>()); + if (value.IsFpuRegister()) { + __ movsd(address, value.AsFpuRegister<XmmRegister>()); + } else { + DCHECK(value.IsConstant()); + Address address_hi = index.IsConstant() ? + Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + + offset + kX86WordSize) : + Address(array, index.AsRegister<Register>(), TIMES_8, offset + kX86WordSize); + int64_t v = bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue()); + __ movl(address, Immediate(Low32Bits(v))); + codegen_->MaybeRecordImplicitNullCheck(instruction); + __ movl(address_hi, Immediate(High32Bits(v))); + } break; } diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 5218d70995..ee8a299c5e 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -1183,16 +1183,20 @@ void InstructionCodeGeneratorX86_64::GenerateTestAndBranch(HInstruction* instruc DCHECK_EQ(cond_value, 0); } } else { - bool is_materialized = - !cond->IsCondition() || cond->AsCondition()->NeedsMaterialization(); + HCondition* condition = cond->AsCondition(); + bool is_materialized = condition == nullptr || condition->NeedsMaterialization(); // Moves do not affect the eflags register, so if the condition is // evaluated just before the if, we don't need to evaluate it // again. We can't use the eflags on FP conditions if they are // materialized due to the complex branching. - Primitive::Type type = cond->IsCondition() ? cond->InputAt(0)->GetType() : Primitive::kPrimInt; - bool eflags_set = cond->IsCondition() - && cond->AsCondition()->IsBeforeWhenDisregardMoves(instruction) + Primitive::Type type = (condition != nullptr) + ? cond->InputAt(0)->GetType() + : Primitive::kPrimInt; + bool eflags_set = condition != nullptr + && condition->IsBeforeWhenDisregardMoves(instruction) && !Primitive::IsFloatingPointType(type); + // Can we optimize the jump if we know that the next block is the true case? + bool can_jump_to_false = CanReverseCondition(always_true_target, false_target, condition); if (is_materialized) { if (!eflags_set) { @@ -1204,9 +1208,17 @@ void InstructionCodeGeneratorX86_64::GenerateTestAndBranch(HInstruction* instruc __ cmpl(Address(CpuRegister(RSP), lhs.GetStackIndex()), Immediate(0)); } + if (can_jump_to_false) { + __ j(kEqual, false_target); + return; + } __ j(kNotEqual, true_target); } else { - __ j(X86_64IntegerCondition(cond->AsCondition()->GetCondition()), true_target); + if (can_jump_to_false) { + __ j(X86_64IntegerCondition(condition->GetOppositeCondition()), false_target); + return; + } + __ j(X86_64IntegerCondition(condition->GetCondition()), true_target); } } else { // Condition has not been materialized, use its inputs as the @@ -1215,7 +1227,7 @@ void InstructionCodeGeneratorX86_64::GenerateTestAndBranch(HInstruction* instruc // Is this a long or FP comparison that has been folded into the HCondition? if (type == Primitive::kPrimLong || Primitive::IsFloatingPointType(type)) { // Generate the comparison directly. - GenerateCompareTestAndBranch(instruction->AsIf(), cond->AsCondition(), + GenerateCompareTestAndBranch(instruction->AsIf(), condition, true_target, false_target, always_true_target); return; } @@ -1235,7 +1247,13 @@ void InstructionCodeGeneratorX86_64::GenerateTestAndBranch(HInstruction* instruc __ cmpl(lhs.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), rhs.GetStackIndex())); } - __ j(X86_64IntegerCondition(cond->AsCondition()->GetCondition()), true_target); + + if (can_jump_to_false) { + __ j(X86_64IntegerCondition(condition->GetOppositeCondition()), false_target); + return; + } + + __ j(X86_64IntegerCondition(condition->GetCondition()), true_target); } } if (false_target != nullptr) { @@ -2562,7 +2580,7 @@ void LocationsBuilderX86_64::VisitAdd(HAdd* add) { case Primitive::kPrimLong: { locations->SetInAt(0, Location::RequiresRegister()); // We can use a leaq or addq if the constant can fit in an immediate. - locations->SetInAt(1, Location::RegisterOrInt32LongConstant(add->InputAt(1))); + locations->SetInAt(1, Location::RegisterOrInt32Constant(add->InputAt(1))); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); break; } @@ -2682,7 +2700,7 @@ void LocationsBuilderX86_64::VisitSub(HSub* sub) { } case Primitive::kPrimLong: { locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RegisterOrInt32LongConstant(sub->InputAt(1))); + locations->SetInAt(1, Location::RegisterOrInt32Constant(sub->InputAt(1))); locations->SetOut(Location::SameAsFirstInput()); break; } @@ -3755,14 +3773,25 @@ void LocationsBuilderX86_64::HandleFieldSet(HInstruction* instruction, LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); Primitive::Type field_type = field_info.GetFieldType(); + bool is_volatile = field_info.IsVolatile(); bool needs_write_barrier = CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1)); locations->SetInAt(0, Location::RequiresRegister()); if (Primitive::IsFloatingPointType(instruction->InputAt(1)->GetType())) { - locations->SetInAt(1, Location::RequiresFpuRegister()); + if (is_volatile) { + // In order to satisfy the semantics of volatile, this must be a single instruction store. + locations->SetInAt(1, Location::FpuRegisterOrInt32Constant(instruction->InputAt(1))); + } else { + locations->SetInAt(1, Location::FpuRegisterOrConstant(instruction->InputAt(1))); + } } else { - locations->SetInAt(1, Location::RegisterOrInt32LongConstant(instruction->InputAt(1))); + if (is_volatile) { + // In order to satisfy the semantics of volatile, this must be a single instruction store. + locations->SetInAt(1, Location::RegisterOrInt32Constant(instruction->InputAt(1))); + } else { + locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); + } } if (needs_write_barrier) { // Temporary registers for the write barrier. @@ -3790,11 +3819,13 @@ void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction, GenerateMemoryBarrier(MemBarrierKind::kAnyStore); } + bool maybe_record_implicit_null_check_done = false; + switch (field_type) { case Primitive::kPrimBoolean: case Primitive::kPrimByte: { if (value.IsConstant()) { - int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant()); + int8_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant()); __ movb(Address(base, offset), Immediate(v)); } else { __ movb(Address(base, offset), value.AsRegister<CpuRegister>()); @@ -3805,7 +3836,7 @@ void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction, case Primitive::kPrimShort: case Primitive::kPrimChar: { if (value.IsConstant()) { - int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant()); + int16_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant()); __ movw(Address(base, offset), Immediate(v)); } else { __ movw(Address(base, offset), value.AsRegister<CpuRegister>()); @@ -3838,9 +3869,11 @@ void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction, case Primitive::kPrimLong: { if (value.IsConstant()) { int64_t v = value.GetConstant()->AsLongConstant()->GetValue(); - DCHECK(IsInt<32>(v)); - int32_t v_32 = v; - __ movq(Address(base, offset), Immediate(v_32)); + codegen_->MoveInt64ToAddress(Address(base, offset), + Address(base, offset + sizeof(int32_t)), + v, + instruction); + maybe_record_implicit_null_check_done = true; } else { __ movq(Address(base, offset), value.AsRegister<CpuRegister>()); } @@ -3848,12 +3881,28 @@ void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction, } case Primitive::kPrimFloat: { - __ movss(Address(base, offset), value.AsFpuRegister<XmmRegister>()); + if (value.IsConstant()) { + int32_t v = + bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue()); + __ movl(Address(base, offset), Immediate(v)); + } else { + __ movss(Address(base, offset), value.AsFpuRegister<XmmRegister>()); + } break; } case Primitive::kPrimDouble: { - __ movsd(Address(base, offset), value.AsFpuRegister<XmmRegister>()); + if (value.IsConstant()) { + int64_t v = + bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue()); + codegen_->MoveInt64ToAddress(Address(base, offset), + Address(base, offset + sizeof(int32_t)), + v, + instruction); + maybe_record_implicit_null_check_done = true; + } else { + __ movsd(Address(base, offset), value.AsFpuRegister<XmmRegister>()); + } break; } @@ -3862,7 +3911,9 @@ void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction, UNREACHABLE(); } - codegen_->MaybeRecordImplicitNullCheck(instruction); + if (!maybe_record_implicit_null_check_done) { + codegen_->MaybeRecordImplicitNullCheck(instruction); + } if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) { CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>(); @@ -4170,13 +4221,9 @@ void LocationsBuilderX86_64::VisitArraySet(HArraySet* instruction) { may_need_runtime_call ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt( - 1, Location::RegisterOrConstant(instruction->InputAt(1))); - locations->SetInAt(2, Location::RequiresRegister()); - if (value_type == Primitive::kPrimLong) { - locations->SetInAt(2, Location::RegisterOrInt32LongConstant(instruction->InputAt(2))); - } else if (value_type == Primitive::kPrimFloat || value_type == Primitive::kPrimDouble) { - locations->SetInAt(2, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); + if (Primitive::IsFloatingPointType(value_type)) { + locations->SetInAt(2, Location::FpuRegisterOrConstant(instruction->InputAt(2))); } else { locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2))); } @@ -4330,13 +4377,15 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) { : Address(array, index.AsRegister<CpuRegister>(), TIMES_8, offset); if (value.IsRegister()) { __ movq(address, value.AsRegister<CpuRegister>()); + codegen_->MaybeRecordImplicitNullCheck(instruction); } else { int64_t v = value.GetConstant()->AsLongConstant()->GetValue(); - DCHECK(IsInt<32>(v)); - int32_t v_32 = v; - __ movq(address, Immediate(v_32)); + Address address_high = index.IsConstant() + ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + + offset + sizeof(int32_t)) + : Address(array, index.AsRegister<CpuRegister>(), TIMES_8, offset + sizeof(int32_t)); + codegen_->MoveInt64ToAddress(address, address_high, v, instruction); } - codegen_->MaybeRecordImplicitNullCheck(instruction); break; } @@ -4345,8 +4394,14 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) { Address address = index.IsConstant() ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + offset) : Address(array, index.AsRegister<CpuRegister>(), TIMES_4, offset); - DCHECK(value.IsFpuRegister()); - __ movss(address, value.AsFpuRegister<XmmRegister>()); + if (value.IsFpuRegister()) { + __ movss(address, value.AsFpuRegister<XmmRegister>()); + } else { + DCHECK(value.IsConstant()); + int32_t v = + bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue()); + __ movl(address, Immediate(v)); + } codegen_->MaybeRecordImplicitNullCheck(instruction); break; } @@ -4356,9 +4411,18 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) { Address address = index.IsConstant() ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + offset) : Address(array, index.AsRegister<CpuRegister>(), TIMES_8, offset); - DCHECK(value.IsFpuRegister()); - __ movsd(address, value.AsFpuRegister<XmmRegister>()); - codegen_->MaybeRecordImplicitNullCheck(instruction); + if (value.IsFpuRegister()) { + __ movsd(address, value.AsFpuRegister<XmmRegister>()); + codegen_->MaybeRecordImplicitNullCheck(instruction); + } else { + int64_t v = + bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue()); + Address address_high = index.IsConstant() + ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + + offset + sizeof(int32_t)) + : Address(array, index.AsRegister<CpuRegister>(), TIMES_8, offset + sizeof(int32_t)); + codegen_->MoveInt64ToAddress(address, address_high, v, instruction); + } break; } @@ -5564,6 +5628,24 @@ Address CodeGeneratorX86_64::LiteralCaseTable(HPackedSwitch* switch_instr) { return Address::RIP(table_fixup); } +void CodeGeneratorX86_64::MoveInt64ToAddress(const Address& addr_low, + const Address& addr_high, + int64_t v, + HInstruction* instruction) { + if (IsInt<32>(v)) { + int32_t v_32 = v; + __ movq(addr_low, Immediate(v_32)); + MaybeRecordImplicitNullCheck(instruction); + } else { + // Didn't fit in a register. Do it in pieces. + int32_t low_v = Low32Bits(v); + int32_t high_v = High32Bits(v); + __ movl(addr_low, Immediate(low_v)); + MaybeRecordImplicitNullCheck(instruction); + __ movl(addr_high, Immediate(high_v)); + } +} + #undef __ } // namespace x86_64 diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index fc485f5bb6..7a52473408 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -368,6 +368,12 @@ class CodeGeneratorX86_64 : public CodeGenerator { // Store a 64 bit value into a DoubleStackSlot in the most efficient manner. void Store64BitValueToStack(Location dest, int64_t value); + // Assign a 64 bit constant to an address. + void MoveInt64ToAddress(const Address& addr_low, + const Address& addr_high, + int64_t v, + HInstruction* instruction); + private: struct PcRelativeDexCacheAccessInfo { PcRelativeDexCacheAccessInfo(const DexFile& dex_file, uint32_t element_off) diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc index 3de96b5d84..dd380c25cc 100644 --- a/compiler/optimizing/graph_checker.cc +++ b/compiler/optimizing/graph_checker.cc @@ -188,6 +188,21 @@ void GraphChecker::VisitTryBoundary(HTryBoundary* try_boundary) { VisitInstruction(try_boundary); } +void GraphChecker::VisitLoadException(HLoadException* load) { + // Ensure that LoadException is the first instruction in a catch block. + if (!load->GetBlock()->IsCatchBlock()) { + AddError(StringPrintf("%s:%d is in a non-catch block %d.", + load->DebugName(), + load->GetId(), + load->GetBlock()->GetBlockId())); + } else if (load->GetBlock()->GetFirstInstruction() != load) { + AddError(StringPrintf("%s:%d is not the first instruction in catch block %d.", + load->DebugName(), + load->GetId(), + load->GetBlock()->GetBlockId())); + } +} + void GraphChecker::VisitInstruction(HInstruction* instruction) { if (seen_ids_.IsBitSet(instruction->GetId())) { AddError(StringPrintf("Instruction id %d is duplicate in graph.", @@ -242,10 +257,11 @@ void GraphChecker::VisitInstruction(HInstruction* instruction) { } size_t use_index = use_it.Current()->GetIndex(); if ((use_index >= use->InputCount()) || (use->InputAt(use_index) != instruction)) { - AddError(StringPrintf("User %s:%d of instruction %d has a wrong " + AddError(StringPrintf("User %s:%d of instruction %s:%d has a wrong " "UseListNode index.", use->DebugName(), use->GetId(), + instruction->DebugName(), instruction->GetId())); } } @@ -531,10 +547,14 @@ void SSAChecker::VisitInstruction(HInstruction* instruction) { !use_it.Done(); use_it.Advance()) { HInstruction* use = use_it.Current()->GetUser(); if (!use->IsPhi() && !instruction->StrictlyDominates(use)) { - AddError(StringPrintf("Instruction %d in block %d does not dominate " - "use %d in block %d.", - instruction->GetId(), current_block_->GetBlockId(), - use->GetId(), use->GetBlock()->GetBlockId())); + AddError(StringPrintf("Instruction %s:%d in block %d does not dominate " + "use %s:%d in block %d.", + instruction->DebugName(), + instruction->GetId(), + current_block_->GetBlockId(), + use->DebugName(), + use->GetId(), + use->GetBlock()->GetBlockId())); } } diff --git a/compiler/optimizing/graph_checker.h b/compiler/optimizing/graph_checker.h index abf3659d91..d5ddbabc8c 100644 --- a/compiler/optimizing/graph_checker.h +++ b/compiler/optimizing/graph_checker.h @@ -50,6 +50,9 @@ class GraphChecker : public HGraphDelegateVisitor { // Check successors of blocks ending in TryBoundary. void VisitTryBoundary(HTryBoundary* try_boundary) OVERRIDE; + // Check that LoadException is the first instruction in a catch block. + void VisitLoadException(HLoadException* load) OVERRIDE; + // Check that HCheckCast and HInstanceOf have HLoadClass as second input. void VisitCheckCast(HCheckCast* check) OVERRIDE; void VisitInstanceOf(HInstanceOf* check) OVERRIDE; diff --git a/compiler/optimizing/induction_var_range.cc b/compiler/optimizing/induction_var_range.cc index 5530d261d2..b40ef5aa41 100644 --- a/compiler/optimizing/induction_var_range.cc +++ b/compiler/optimizing/induction_var_range.cc @@ -75,10 +75,12 @@ static InductionVarRange::Value SimplifyMax(InductionVarRange::Value v) { return v; } -static HInstruction* Insert(HBasicBlock* preheader, HInstruction* instruction) { - DCHECK(preheader != nullptr); +/** Helper method to insert an instruction. */ +static HInstruction* Insert(HBasicBlock* block, HInstruction* instruction) { + DCHECK(block != nullptr); + DCHECK(block->GetLastInstruction() != nullptr) << block->GetBlockId(); DCHECK(instruction != nullptr); - preheader->InsertInstructionBefore(instruction, preheader->GetLastInstruction()); + block->InsertInstructionBefore(instruction, block->GetLastInstruction()); return instruction; } @@ -91,48 +93,98 @@ InductionVarRange::InductionVarRange(HInductionVarAnalysis* induction_analysis) DCHECK(induction_analysis != nullptr); } -InductionVarRange::Value InductionVarRange::GetMinInduction(HInstruction* context, - HInstruction* instruction) { - return GetInduction(context, instruction, /* is_min */ true); -} - -InductionVarRange::Value InductionVarRange::GetMaxInduction(HInstruction* context, - HInstruction* instruction) { - return SimplifyMax(GetInduction(context, instruction, /* is_min */ false)); +void InductionVarRange::GetInductionRange(HInstruction* context, + HInstruction* instruction, + /*out*/Value* min_val, + /*out*/Value* max_val, + /*out*/bool* needs_finite_test) { + HLoopInformation* loop = context->GetBlock()->GetLoopInformation(); // closest enveloping loop + if (loop != nullptr) { + // Set up loop information. + HBasicBlock* header = loop->GetHeader(); + bool in_body = context->GetBlock() != header; + HInductionVarAnalysis::InductionInfo* info = + induction_analysis_->LookupInfo(loop, instruction); + HInductionVarAnalysis::InductionInfo* trip = + induction_analysis_->LookupInfo(loop, header->GetLastInstruction()); + // Find range. + *min_val = GetVal(info, trip, in_body, /* is_min */ true); + *max_val = SimplifyMax(GetVal(info, trip, in_body, /* is_min */ false)); + *needs_finite_test = NeedsTripCount(info) && IsUnsafeTripCount(trip); + } else { + // No loop to analyze. + *min_val = Value(); + *max_val = Value(); + *needs_finite_test = false; + } } bool InductionVarRange::CanGenerateCode(HInstruction* context, HInstruction* instruction, - /*out*/bool* top_test) { - return GenerateCode(context, instruction, nullptr, nullptr, nullptr, nullptr, top_test); + /*out*/bool* needs_finite_test, + /*out*/bool* needs_taken_test) { + return GenerateCode(context, + instruction, + nullptr, nullptr, nullptr, nullptr, nullptr, // nothing generated yet + needs_finite_test, + needs_taken_test); } -bool InductionVarRange::GenerateCode(HInstruction* context, - HInstruction* instruction, - HGraph* graph, - HBasicBlock* block, - /*out*/HInstruction** lower, - /*out*/HInstruction** upper) { - return GenerateCode(context, instruction, graph, block, lower, upper, nullptr); +void InductionVarRange::GenerateRangeCode(HInstruction* context, + HInstruction* instruction, + HGraph* graph, + HBasicBlock* block, + /*out*/HInstruction** lower, + /*out*/HInstruction** upper) { + bool b1, b2; // unused + if (!GenerateCode(context, instruction, graph, block, lower, upper, nullptr, &b1, &b2)) { + LOG(FATAL) << "Failed precondition: GenerateCode()"; + } +} + +void InductionVarRange::GenerateTakenTest(HInstruction* context, + HGraph* graph, + HBasicBlock* block, + /*out*/HInstruction** taken_test) { + bool b1, b2; // unused + if (!GenerateCode(context, context, graph, block, nullptr, nullptr, taken_test, &b1, &b2)) { + LOG(FATAL) << "Failed precondition: GenerateCode()"; + } } // // Private class methods. // -InductionVarRange::Value InductionVarRange::GetInduction(HInstruction* context, - HInstruction* instruction, - bool is_min) { - HLoopInformation* loop = context->GetBlock()->GetLoopInformation(); // closest enveloping loop - if (loop != nullptr) { - HBasicBlock* header = loop->GetHeader(); - bool in_body = context->GetBlock() != header; - return GetVal(induction_analysis_->LookupInfo(loop, instruction), - induction_analysis_->LookupInfo(loop, header->GetLastInstruction()), - in_body, - is_min); +bool InductionVarRange::NeedsTripCount(HInductionVarAnalysis::InductionInfo* info) { + if (info != nullptr) { + if (info->induction_class == HInductionVarAnalysis::kLinear) { + return true; + } else if (info->induction_class == HInductionVarAnalysis::kWrapAround) { + return NeedsTripCount(info->op_b); + } } - return Value(); + return false; +} + +bool InductionVarRange::IsBodyTripCount(HInductionVarAnalysis::InductionInfo* trip) { + if (trip != nullptr) { + if (trip->induction_class == HInductionVarAnalysis::kInvariant) { + return trip->operation == HInductionVarAnalysis::kTripCountInBody || + trip->operation == HInductionVarAnalysis::kTripCountInBodyUnsafe; + } + } + return false; +} + +bool InductionVarRange::IsUnsafeTripCount(HInductionVarAnalysis::InductionInfo* trip) { + if (trip != nullptr) { + if (trip->induction_class == HInductionVarAnalysis::kInvariant) { + return trip->operation == HInductionVarAnalysis::kTripCountInBodyUnsafe || + trip->operation == HInductionVarAnalysis::kTripCountInLoopUnsafe; + } + } + return false; } InductionVarRange::Value InductionVarRange::GetFetch(HInstruction* instruction, @@ -184,11 +236,13 @@ InductionVarRange::Value InductionVarRange::GetVal(HInductionVarAnalysis::Induct case HInductionVarAnalysis::kFetch: return GetFetch(info->fetch, trip, in_body, is_min); case HInductionVarAnalysis::kTripCountInLoop: + case HInductionVarAnalysis::kTripCountInLoopUnsafe: if (!in_body && !is_min) { // one extra! return GetVal(info->op_a, trip, in_body, is_min); } FALLTHROUGH_INTENDED; case HInductionVarAnalysis::kTripCountInBody: + case HInductionVarAnalysis::kTripCountInBodyUnsafe: if (is_min) { return Value(0); } else if (in_body) { @@ -356,25 +410,42 @@ bool InductionVarRange::GenerateCode(HInstruction* context, HBasicBlock* block, /*out*/HInstruction** lower, /*out*/HInstruction** upper, - /*out*/bool* top_test) { + /*out*/HInstruction** taken_test, + /*out*/bool* needs_finite_test, + /*out*/bool* needs_taken_test) { HLoopInformation* loop = context->GetBlock()->GetLoopInformation(); // closest enveloping loop if (loop != nullptr) { + // Set up loop information. HBasicBlock* header = loop->GetHeader(); bool in_body = context->GetBlock() != header; - HInductionVarAnalysis::InductionInfo* info = induction_analysis_->LookupInfo(loop, instruction); + HInductionVarAnalysis::InductionInfo* info = + induction_analysis_->LookupInfo(loop, instruction); + if (info == nullptr) { + return false; // nothing to analyze + } HInductionVarAnalysis::InductionInfo* trip = induction_analysis_->LookupInfo(loop, header->GetLastInstruction()); - if (info != nullptr && trip != nullptr) { - if (top_test != nullptr) { - *top_test = trip->operation != HInductionVarAnalysis::kTripCountInLoop; + // Determine what tests are needed. + *needs_finite_test = NeedsTripCount(info) && IsUnsafeTripCount(trip); + *needs_taken_test = NeedsTripCount(info) && IsBodyTripCount(trip); + // Code generation for taken test: generate the code when requested or otherwise analyze + // if code generation is feasible when taken test is needed. + if (taken_test != nullptr) { + return GenerateCode( + trip->op_b, nullptr, graph, block, taken_test, in_body, /* is_min */ false); + } else if (*needs_taken_test) { + if (!GenerateCode( + trip->op_b, nullptr, nullptr, nullptr, nullptr, in_body, /* is_min */ false)) { + return false; } - return + } + // Code generation for lower and upper. + return // Success on lower if invariant (not set), or code can be generated. ((info->induction_class == HInductionVarAnalysis::kInvariant) || GenerateCode(info, trip, graph, block, lower, in_body, /* is_min */ true)) && // And success on upper. GenerateCode(info, trip, graph, block, upper, in_body, /* is_min */ false); - } } return false; } @@ -387,19 +458,38 @@ bool InductionVarRange::GenerateCode(HInductionVarAnalysis::InductionInfo* info, bool in_body, bool is_min) { if (info != nullptr) { + // Handle current operation. Primitive::Type type = Primitive::kPrimInt; HInstruction* opa = nullptr; HInstruction* opb = nullptr; - int32_t value = 0; switch (info->induction_class) { case HInductionVarAnalysis::kInvariant: // Invariants. switch (info->operation) { case HInductionVarAnalysis::kAdd: + case HInductionVarAnalysis::kLT: + case HInductionVarAnalysis::kLE: + case HInductionVarAnalysis::kGT: + case HInductionVarAnalysis::kGE: if (GenerateCode(info->op_a, trip, graph, block, &opa, in_body, is_min) && GenerateCode(info->op_b, trip, graph, block, &opb, in_body, is_min)) { if (graph != nullptr) { - *result = Insert(block, new (graph->GetArena()) HAdd(type, opa, opb)); + HInstruction* operation = nullptr; + switch (info->operation) { + case HInductionVarAnalysis::kAdd: + operation = new (graph->GetArena()) HAdd(type, opa, opb); break; + case HInductionVarAnalysis::kLT: + operation = new (graph->GetArena()) HLessThan(opa, opb); break; + case HInductionVarAnalysis::kLE: + operation = new (graph->GetArena()) HLessThanOrEqual(opa, opb); break; + case HInductionVarAnalysis::kGT: + operation = new (graph->GetArena()) HGreaterThan(opa, opb); break; + case HInductionVarAnalysis::kGE: + operation = new (graph->GetArena()) HGreaterThanOrEqual(opa, opb); break; + default: + LOG(FATAL) << "unknown operation"; + } + *result = Insert(block, operation); } return true; } @@ -427,11 +517,13 @@ bool InductionVarRange::GenerateCode(HInductionVarAnalysis::InductionInfo* info, } return true; case HInductionVarAnalysis::kTripCountInLoop: + case HInductionVarAnalysis::kTripCountInLoopUnsafe: if (!in_body && !is_min) { // one extra! return GenerateCode(info->op_a, trip, graph, block, result, in_body, is_min); } FALLTHROUGH_INTENDED; case HInductionVarAnalysis::kTripCountInBody: + case HInductionVarAnalysis::kTripCountInBodyUnsafe: if (is_min) { if (graph != nullptr) { *result = graph->GetIntConstant(0); @@ -452,23 +544,31 @@ bool InductionVarRange::GenerateCode(HInductionVarAnalysis::InductionInfo* info, break; } break; - case HInductionVarAnalysis::kLinear: - // Linear induction a * i + b, for normalized 0 <= i < TC. Restrict to unit stride only - // to avoid arithmetic wrap-around situations that are hard to guard against. - if (GetConstant(info->op_a, &value)) { - if (value == 1 || value == -1) { - const bool is_min_a = value == 1 ? is_min : !is_min; - if (GenerateCode(trip, trip, graph, block, &opa, in_body, is_min_a) && - GenerateCode(info->op_b, trip, graph, block, &opb, in_body, is_min)) { - if (graph != nullptr) { - *result = Insert(block, new (graph->GetArena()) HAdd(type, opa, opb)); + case HInductionVarAnalysis::kLinear: { + // Linear induction a * i + b, for normalized 0 <= i < TC. Restrict to unit stride only + // to avoid arithmetic wrap-around situations that are hard to guard against. + int32_t stride_value = 0; + if (GetConstant(info->op_a, &stride_value)) { + if (stride_value == 1 || stride_value == -1) { + const bool is_min_a = stride_value == 1 ? is_min : !is_min; + if (GenerateCode(trip, trip, graph, block, &opa, in_body, is_min_a) && + GenerateCode(info->op_b, trip, graph, block, &opb, in_body, is_min)) { + if (graph != nullptr) { + HInstruction* oper; + if (stride_value == 1) { + oper = new (graph->GetArena()) HAdd(type, opa, opb); + } else { + oper = new (graph->GetArena()) HSub(type, opb, opa); + } + *result = Insert(block, oper); + } + return true; } - return true; } } } break; - default: // TODO(ajcbik): add more cases + default: break; } } diff --git a/compiler/optimizing/induction_var_range.h b/compiler/optimizing/induction_var_range.h index 7fa5a26dce..7984871b08 100644 --- a/compiler/optimizing/induction_var_range.h +++ b/compiler/optimizing/induction_var_range.h @@ -57,29 +57,33 @@ class InductionVarRange { explicit InductionVarRange(HInductionVarAnalysis* induction); /** - * Given a context denoted by the first instruction, returns a, - * possibly conservative, lower bound on the instruction's value. + * Given a context denoted by the first instruction, returns a possibly conservative + * lower and upper bound on the instruction's value in the output parameters min_val + * and max_val, respectively. The need_finite_test flag denotes if an additional finite-test + * is needed to protect the range evaluation inside its loop. */ - Value GetMinInduction(HInstruction* context, HInstruction* instruction); + void GetInductionRange(HInstruction* context, + HInstruction* instruction, + /*out*/Value* min_val, + /*out*/Value* max_val, + /*out*/bool* needs_finite_test); /** - * Given a context denoted by the first instruction, returns a, - * possibly conservative, upper bound on the instruction's value. + * Returns true if range analysis is able to generate code for the lower and upper + * bound expressions on the instruction in the given context. The need_finite_test + * and need_taken test flags denote if an additional finite-test and/or taken-test + * are needed to protect the range evaluation inside its loop. */ - Value GetMaxInduction(HInstruction* context, HInstruction* instruction); - - /** - * Returns true if range analysis is able to generate code for the lower and upper bound - * expressions on the instruction in the given context. Output parameter top_test denotes - * whether a top test is needed to protect the trip-count expression evaluation. - */ - bool CanGenerateCode(HInstruction* context, HInstruction* instruction, /*out*/bool* top_test); + bool CanGenerateCode(HInstruction* context, + HInstruction* instruction, + /*out*/bool* needs_finite_test, + /*out*/bool* needs_taken_test); /** * Generates the actual code in the HIR for the lower and upper bound expressions on the * instruction in the given context. Code for the lower and upper bound expression are - * generated in given block and graph and are returned in lower and upper, respectively. - * For a loop invariant, lower is not set. + * generated in given block and graph and are returned in the output parameters lower and + * upper, respectively. For a loop invariant, lower is not set. * * For example, given expression x+i with range [0, 5] for i, calling this method * will generate the following sequence: @@ -87,20 +91,35 @@ class InductionVarRange { * block: * lower: add x, 0 * upper: add x, 5 + * + * Precondition: CanGenerateCode() returns true. */ - bool GenerateCode(HInstruction* context, - HInstruction* instruction, - HGraph* graph, - HBasicBlock* block, - /*out*/HInstruction** lower, - /*out*/HInstruction** upper); + void GenerateRangeCode(HInstruction* context, + HInstruction* instruction, + HGraph* graph, + HBasicBlock* block, + /*out*/HInstruction** lower, + /*out*/HInstruction** upper); + + /** + * Generates explicit taken-test for the loop in the given context. Code is generated in + * given block and graph. The taken-test is returned in parameter test. + * + * Precondition: CanGenerateCode() returns true and needs_taken_test is set. + */ + void GenerateTakenTest(HInstruction* context, + HGraph* graph, + HBasicBlock* block, + /*out*/HInstruction** taken_test); private: // // Private helper methods. // - Value GetInduction(HInstruction* context, HInstruction* instruction, bool is_min); + static bool NeedsTripCount(HInductionVarAnalysis::InductionInfo* info); + static bool IsBodyTripCount(HInductionVarAnalysis::InductionInfo* trip); + static bool IsUnsafeTripCount(HInductionVarAnalysis::InductionInfo* trip); static Value GetFetch(HInstruction* instruction, HInductionVarAnalysis::InductionInfo* trip, @@ -130,8 +149,8 @@ class InductionVarRange { static Value MergeVal(Value v1, Value v2, bool is_min); /** - * Generates code for lower/upper expression in the HIR. Returns true on success. - * With graph == nullptr, the method can be used to determine if code generation + * Generates code for lower/upper/taken-test in the HIR. Returns true on success. + * With values nullptr, the method can be used to determine if code generation * would be successful without generating actual code yet. */ bool GenerateCode(HInstruction* context, @@ -140,7 +159,9 @@ class InductionVarRange { HBasicBlock* block, /*out*/HInstruction** lower, /*out*/HInstruction** upper, - bool* top_test); + /*out*/HInstruction** taken_test, + /*out*/bool* needs_finite_test, + /*out*/bool* needs_taken_test); static bool GenerateCode(HInductionVarAnalysis::InductionInfo* info, HInductionVarAnalysis::InductionInfo* trip, diff --git a/compiler/optimizing/induction_var_range_test.cc b/compiler/optimizing/induction_var_range_test.cc index ce8926ad72..fda5153d43 100644 --- a/compiler/optimizing/induction_var_range_test.cc +++ b/compiler/optimizing/induction_var_range_test.cc @@ -46,6 +46,10 @@ class InductionVarRangeTest : public testing::Test { EXPECT_EQ(v1.is_known, v2.is_known); } + // + // Construction methods. + // + /** Constructs bare minimum graph. */ void BuildGraph() { graph_->SetNumberOfVRegs(1); @@ -58,7 +62,7 @@ class InductionVarRangeTest : public testing::Test { } /** Constructs loop with given upper bound. */ - void BuildLoop(HInstruction* upper) { + void BuildLoop(int32_t lower, HInstruction* upper, int32_t stride) { // Control flow. loop_preheader_ = new (&allocator_) HBasicBlock(graph_); graph_->AddBlock(loop_preheader_); @@ -75,18 +79,22 @@ class InductionVarRangeTest : public testing::Test { HLocal* induc = new (&allocator_) HLocal(0); entry_block_->AddInstruction(induc); loop_preheader_->AddInstruction( - new (&allocator_) HStoreLocal(induc, graph_->GetIntConstant(0))); // i = 0 + new (&allocator_) HStoreLocal(induc, graph_->GetIntConstant(lower))); // i = l loop_preheader_->AddInstruction(new (&allocator_) HGoto()); HInstruction* load = new (&allocator_) HLoadLocal(induc, Primitive::kPrimInt); loop_header->AddInstruction(load); - condition_ = new (&allocator_) HLessThan(load, upper); + if (stride > 0) { + condition_ = new (&allocator_) HLessThan(load, upper); // i < u + } else { + condition_ = new (&allocator_) HGreaterThan(load, upper); // i > u + } loop_header->AddInstruction(condition_); - loop_header->AddInstruction(new (&allocator_) HIf(condition_)); // i < u + loop_header->AddInstruction(new (&allocator_) HIf(condition_)); load = new (&allocator_) HLoadLocal(induc, Primitive::kPrimInt); loop_body->AddInstruction(load); - increment_ = new (&allocator_) HAdd(Primitive::kPrimInt, load, graph_->GetIntConstant(1)); + increment_ = new (&allocator_) HAdd(Primitive::kPrimInt, load, graph_->GetIntConstant(stride)); loop_body->AddInstruction(increment_); - loop_body->AddInstruction(new (&allocator_) HStoreLocal(induc, increment_)); // i++ + loop_body->AddInstruction(new (&allocator_) HStoreLocal(induc, increment_)); // i += s loop_body->AddInstruction(new (&allocator_) HGoto()); exit_block_->AddInstruction(new (&allocator_) HReturnVoid()); } @@ -124,8 +132,20 @@ class InductionVarRangeTest : public testing::Test { } /** Constructs a trip-count. */ - HInductionVarAnalysis::InductionInfo* CreateTripCount(int32_t tc) { - return iva_->CreateTripCount(HInductionVarAnalysis::kTripCountInLoop, CreateConst(tc), nullptr); + HInductionVarAnalysis::InductionInfo* CreateTripCount(int32_t tc, bool in_loop, bool safe) { + if (in_loop && safe) { + return iva_->CreateTripCount( + HInductionVarAnalysis::kTripCountInLoop, CreateConst(tc), nullptr); + } else if (in_loop) { + return iva_->CreateTripCount( + HInductionVarAnalysis::kTripCountInLoopUnsafe, CreateConst(tc), nullptr); + } else if (safe) { + return iva_->CreateTripCount( + HInductionVarAnalysis::kTripCountInBody, CreateConst(tc), nullptr); + } else { + return iva_->CreateTripCount( + HInductionVarAnalysis::kTripCountInBodyUnsafe, CreateConst(tc), nullptr); + } } /** Constructs a linear a * i + b induction. */ @@ -139,16 +159,34 @@ class InductionVarRangeTest : public testing::Test { HInductionVarAnalysis::kPeriodic, CreateConst(lo), CreateConst(hi)); } + /** Constructs a wrap-around induction consisting of a constant, followed info */ + HInductionVarAnalysis::InductionInfo* CreateWrapAround( + int32_t initial, + HInductionVarAnalysis::InductionInfo* info) { + return iva_->CreateInduction(HInductionVarAnalysis::kWrapAround, CreateConst(initial), info); + } + /** Constructs a wrap-around induction consisting of a constant, followed by a range. */ HInductionVarAnalysis::InductionInfo* CreateWrapAround(int32_t initial, int32_t lo, int32_t hi) { - return iva_->CreateInduction( - HInductionVarAnalysis::kWrapAround, CreateConst(initial), CreateRange(lo, hi)); + return CreateWrapAround(initial, CreateRange(lo, hi)); } // // Relay methods. // + bool NeedsTripCount(HInductionVarAnalysis::InductionInfo* info) { + return InductionVarRange::NeedsTripCount(info); + } + + bool IsBodyTripCount(HInductionVarAnalysis::InductionInfo* trip) { + return InductionVarRange::IsBodyTripCount(trip); + } + + bool IsUnsafeTripCount(HInductionVarAnalysis::InductionInfo* trip) { + return InductionVarRange::IsUnsafeTripCount(trip); + } + Value GetMin(HInductionVarAnalysis::InductionInfo* info, HInductionVarAnalysis::InductionInfo* induc) { return InductionVarRange::GetVal(info, induc, /* in_body */ true, /* is_min */ true); @@ -202,6 +240,26 @@ class InductionVarRangeTest : public testing::Test { // Tests on static methods. // +TEST_F(InductionVarRangeTest, TripCountProperties) { + EXPECT_FALSE(NeedsTripCount(nullptr)); + EXPECT_FALSE(NeedsTripCount(CreateConst(1))); + EXPECT_TRUE(NeedsTripCount(CreateLinear(1, 1))); + EXPECT_FALSE(NeedsTripCount(CreateWrapAround(1, 2, 3))); + EXPECT_TRUE(NeedsTripCount(CreateWrapAround(1, CreateLinear(1, 1)))); + + EXPECT_FALSE(IsBodyTripCount(nullptr)); + EXPECT_FALSE(IsBodyTripCount(CreateTripCount(100, true, true))); + EXPECT_FALSE(IsBodyTripCount(CreateTripCount(100, true, false))); + EXPECT_TRUE(IsBodyTripCount(CreateTripCount(100, false, true))); + EXPECT_TRUE(IsBodyTripCount(CreateTripCount(100, false, false))); + + EXPECT_FALSE(IsUnsafeTripCount(nullptr)); + EXPECT_FALSE(IsUnsafeTripCount(CreateTripCount(100, true, true))); + EXPECT_TRUE(IsUnsafeTripCount(CreateTripCount(100, true, false))); + EXPECT_FALSE(IsUnsafeTripCount(CreateTripCount(100, false, true))); + EXPECT_TRUE(IsUnsafeTripCount(CreateTripCount(100, false, false))); +} + TEST_F(InductionVarRangeTest, GetMinMaxNull) { ExpectEqual(Value(), GetMin(nullptr, nullptr)); ExpectEqual(Value(), GetMax(nullptr, nullptr)); @@ -279,10 +337,10 @@ TEST_F(InductionVarRangeTest, GetMinMaxFetch) { } TEST_F(InductionVarRangeTest, GetMinMaxLinear) { - ExpectEqual(Value(20), GetMin(CreateLinear(10, 20), CreateTripCount(100))); - ExpectEqual(Value(1010), GetMax(CreateLinear(10, 20), CreateTripCount(100))); - ExpectEqual(Value(-970), GetMin(CreateLinear(-10, 20), CreateTripCount(100))); - ExpectEqual(Value(20), GetMax(CreateLinear(-10, 20), CreateTripCount(100))); + ExpectEqual(Value(20), GetMin(CreateLinear(10, 20), CreateTripCount(100, true, true))); + ExpectEqual(Value(1010), GetMax(CreateLinear(10, 20), CreateTripCount(100, true, true))); + ExpectEqual(Value(-970), GetMin(CreateLinear(-10, 20), CreateTripCount(100, true, true))); + ExpectEqual(Value(20), GetMax(CreateLinear(-10, 20), CreateTripCount(100, true, true))); } TEST_F(InductionVarRangeTest, GetMinMaxWrapAround) { @@ -398,61 +456,98 @@ TEST_F(InductionVarRangeTest, MaxValue) { // Tests on instance methods. // -TEST_F(InductionVarRangeTest, FindRangeConstantTripCount) { - BuildLoop(graph_->GetIntConstant(1000)); +TEST_F(InductionVarRangeTest, ConstantTripCountUp) { + BuildLoop(0, graph_->GetIntConstant(1000), 1); PerformInductionVarAnalysis(); InductionVarRange range(iva_); + Value v1, v2; + bool needs_finite_test = true; + // In context of header: known. - ExpectEqual(Value(0), range.GetMinInduction(condition_, condition_->InputAt(0))); - ExpectEqual(Value(1000), range.GetMaxInduction(condition_, condition_->InputAt(0))); + range.GetInductionRange(condition_, condition_->InputAt(0), &v1, &v2, &needs_finite_test); + EXPECT_FALSE(needs_finite_test); + ExpectEqual(Value(0), v1); + ExpectEqual(Value(1000), v2); // In context of loop-body: known. - ExpectEqual(Value(0), range.GetMinInduction(increment_, condition_->InputAt(0))); - ExpectEqual(Value(999), range.GetMaxInduction(increment_, condition_->InputAt(0))); - ExpectEqual(Value(1), range.GetMinInduction(increment_, increment_)); - ExpectEqual(Value(1000), range.GetMaxInduction(increment_, increment_)); + range.GetInductionRange(increment_, condition_->InputAt(0), &v1, &v2, &needs_finite_test); + EXPECT_FALSE(needs_finite_test); + ExpectEqual(Value(0), v1); + ExpectEqual(Value(999), v2); + range.GetInductionRange(increment_, increment_, &v1, &v2, &needs_finite_test); + EXPECT_FALSE(needs_finite_test); + ExpectEqual(Value(1), v1); + ExpectEqual(Value(1000), v2); } -TEST_F(InductionVarRangeTest, FindRangeSymbolicTripCount) { - HInstruction* parameter = new (&allocator_) HParameterValue( - graph_->GetDexFile(), 0, 0, Primitive::kPrimInt); - entry_block_->AddInstruction(parameter); - BuildLoop(parameter); +TEST_F(InductionVarRangeTest, ConstantTripCountDown) { + BuildLoop(1000, graph_->GetIntConstant(0), -1); PerformInductionVarAnalysis(); InductionVarRange range(iva_); - // In context of header: full range unknown. - ExpectEqual(Value(0), range.GetMinInduction(condition_, condition_->InputAt(0))); - ExpectEqual(Value(), range.GetMaxInduction(condition_, condition_->InputAt(0))); + Value v1, v2; + bool needs_finite_test = true; + + // In context of header: known. + range.GetInductionRange(condition_, condition_->InputAt(0), &v1, &v2, &needs_finite_test); + EXPECT_FALSE(needs_finite_test); + ExpectEqual(Value(0), v1); + ExpectEqual(Value(1000), v2); // In context of loop-body: known. - ExpectEqual(Value(0), range.GetMinInduction(increment_, condition_->InputAt(0))); - ExpectEqual(Value(parameter, 1, -1), range.GetMaxInduction(increment_, condition_->InputAt(0))); - ExpectEqual(Value(1), range.GetMinInduction(increment_, increment_)); - ExpectEqual(Value(parameter, 1, 0), range.GetMaxInduction(increment_, increment_)); + range.GetInductionRange(increment_, condition_->InputAt(0), &v1, &v2, &needs_finite_test); + EXPECT_FALSE(needs_finite_test); + ExpectEqual(Value(1), v1); + ExpectEqual(Value(1000), v2); + range.GetInductionRange(increment_, increment_, &v1, &v2, &needs_finite_test); + EXPECT_FALSE(needs_finite_test); + ExpectEqual(Value(0), v1); + ExpectEqual(Value(999), v2); } -TEST_F(InductionVarRangeTest, CodeGeneration) { +TEST_F(InductionVarRangeTest, SymbolicTripCountUp) { HInstruction* parameter = new (&allocator_) HParameterValue( graph_->GetDexFile(), 0, 0, Primitive::kPrimInt); entry_block_->AddInstruction(parameter); - BuildLoop(parameter); + BuildLoop(0, parameter, 1); PerformInductionVarAnalysis(); InductionVarRange range(iva_); + Value v1, v2; + bool needs_finite_test = true; + bool needs_taken_test = true; + + // In context of header: upper unknown. + range.GetInductionRange(condition_, condition_->InputAt(0), &v1, &v2, &needs_finite_test); + EXPECT_FALSE(needs_finite_test); + ExpectEqual(Value(0), v1); + ExpectEqual(Value(), v2); + + // In context of loop-body: known. + range.GetInductionRange(increment_, condition_->InputAt(0), &v1, &v2, &needs_finite_test); + EXPECT_FALSE(needs_finite_test); + ExpectEqual(Value(0), v1); + ExpectEqual(Value(parameter, 1, -1), v2); + range.GetInductionRange(increment_, increment_, &v1, &v2, &needs_finite_test); + EXPECT_FALSE(needs_finite_test); + ExpectEqual(Value(1), v1); + ExpectEqual(Value(parameter, 1, 0), v2); + HInstruction* lower = nullptr; HInstruction* upper = nullptr; - bool top_test = false; + HInstruction* taken = nullptr; // Can generate code in context of loop-body only. - EXPECT_FALSE(range.CanGenerateCode(condition_, condition_->InputAt(0), &top_test)); - ASSERT_TRUE(range.CanGenerateCode(increment_, condition_->InputAt(0), &top_test)); - EXPECT_TRUE(top_test); + EXPECT_FALSE(range.CanGenerateCode( + condition_, condition_->InputAt(0), &needs_finite_test, &needs_taken_test)); + ASSERT_TRUE(range.CanGenerateCode( + increment_, condition_->InputAt(0), &needs_finite_test, &needs_taken_test)); + EXPECT_FALSE(needs_finite_test); + EXPECT_TRUE(needs_taken_test); // Generates code. - EXPECT_TRUE(range.GenerateCode( - increment_, condition_->InputAt(0), graph_, loop_preheader_, &lower, &upper)); + range.GenerateRangeCode(increment_, condition_->InputAt(0), graph_, loop_preheader_, &lower, &upper); // Verify lower is 0+0. ASSERT_TRUE(lower != nullptr); @@ -462,7 +557,7 @@ TEST_F(InductionVarRangeTest, CodeGeneration) { ASSERT_TRUE(lower->InputAt(1)->IsIntConstant()); EXPECT_EQ(0, lower->InputAt(1)->AsIntConstant()->GetValue()); - // Verify upper is (V-1)+0 + // Verify upper is (V-1)+0. ASSERT_TRUE(upper != nullptr); ASSERT_TRUE(upper->IsAdd()); ASSERT_TRUE(upper->InputAt(0)->IsSub()); @@ -471,6 +566,91 @@ TEST_F(InductionVarRangeTest, CodeGeneration) { EXPECT_EQ(1, upper->InputAt(0)->InputAt(1)->AsIntConstant()->GetValue()); ASSERT_TRUE(upper->InputAt(1)->IsIntConstant()); EXPECT_EQ(0, upper->InputAt(1)->AsIntConstant()->GetValue()); + + // Verify taken-test is 0<V. + range.GenerateTakenTest(increment_, graph_, loop_preheader_, &taken); + ASSERT_TRUE(taken != nullptr); + ASSERT_TRUE(taken->IsLessThan()); + ASSERT_TRUE(taken->InputAt(0)->IsIntConstant()); + EXPECT_EQ(0, taken->InputAt(0)->AsIntConstant()->GetValue()); + EXPECT_TRUE(taken->InputAt(1)->IsParameterValue()); +} + +TEST_F(InductionVarRangeTest, SymbolicTripCountDown) { + HInstruction* parameter = new (&allocator_) HParameterValue( + graph_->GetDexFile(), 0, 0, Primitive::kPrimInt); + entry_block_->AddInstruction(parameter); + BuildLoop(1000, parameter, -1); + PerformInductionVarAnalysis(); + InductionVarRange range(iva_); + + Value v1, v2; + bool needs_finite_test = true; + bool needs_taken_test = true; + + // In context of header: lower unknown. + range.GetInductionRange(condition_, condition_->InputAt(0), &v1, &v2, &needs_finite_test); + EXPECT_FALSE(needs_finite_test); + ExpectEqual(Value(), v1); + ExpectEqual(Value(1000), v2); + + // In context of loop-body: known. + range.GetInductionRange(increment_, condition_->InputAt(0), &v1, &v2, &needs_finite_test); + EXPECT_FALSE(needs_finite_test); + ExpectEqual(Value(parameter, 1, 1), v1); + ExpectEqual(Value(1000), v2); + range.GetInductionRange(increment_, increment_, &v1, &v2, &needs_finite_test); + EXPECT_FALSE(needs_finite_test); + ExpectEqual(Value(parameter, 1, 0), v1); + ExpectEqual(Value(999), v2); + + HInstruction* lower = nullptr; + HInstruction* upper = nullptr; + HInstruction* taken = nullptr; + + // Can generate code in context of loop-body only. + EXPECT_FALSE(range.CanGenerateCode( + condition_, condition_->InputAt(0), &needs_finite_test, &needs_taken_test)); + ASSERT_TRUE(range.CanGenerateCode( + increment_, condition_->InputAt(0), &needs_finite_test, &needs_taken_test)); + EXPECT_FALSE(needs_finite_test); + EXPECT_TRUE(needs_taken_test); + + // Generates code. + range.GenerateRangeCode(increment_, condition_->InputAt(0), graph_, loop_preheader_, &lower, &upper); + + // Verify lower is 1000-(-(V-1000)-1). + ASSERT_TRUE(lower != nullptr); + ASSERT_TRUE(lower->IsSub()); + ASSERT_TRUE(lower->InputAt(0)->IsIntConstant()); + EXPECT_EQ(1000, lower->InputAt(0)->AsIntConstant()->GetValue()); + lower = lower->InputAt(1); + ASSERT_TRUE(lower->IsSub()); + ASSERT_TRUE(lower->InputAt(1)->IsIntConstant()); + EXPECT_EQ(1, lower->InputAt(1)->AsIntConstant()->GetValue()); + lower = lower->InputAt(0); + ASSERT_TRUE(lower->IsNeg()); + lower = lower->InputAt(0); + ASSERT_TRUE(lower->IsSub()); + EXPECT_TRUE(lower->InputAt(0)->IsParameterValue()); + ASSERT_TRUE(lower->InputAt(1)->IsIntConstant()); + EXPECT_EQ(1000, lower->InputAt(1)->AsIntConstant()->GetValue()); + + // Verify upper is 1000-0. + ASSERT_TRUE(upper != nullptr); + ASSERT_TRUE(upper->IsSub()); + ASSERT_TRUE(upper->InputAt(0)->IsIntConstant()); + EXPECT_EQ(1000, upper->InputAt(0)->AsIntConstant()->GetValue()); + ASSERT_TRUE(upper->InputAt(1)->IsIntConstant()); + EXPECT_EQ(0, upper->InputAt(1)->AsIntConstant()->GetValue()); + + // Verify taken-test is 1000>V. + range.GenerateTakenTest(increment_, graph_, loop_preheader_, &taken); + ASSERT_TRUE(taken != nullptr); + ASSERT_TRUE(taken->IsGreaterThan()); + ASSERT_TRUE(taken->InputAt(0)->IsIntConstant()); + EXPECT_EQ(1000, taken->InputAt(0)->AsIntConstant()->GetValue()); + EXPECT_TRUE(taken->InputAt(1)->IsParameterValue()); } } // namespace art diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc index dbe75249be..b01324ec3b 100644 --- a/compiler/optimizing/intrinsics.cc +++ b/compiler/optimizing/intrinsics.cc @@ -89,10 +89,7 @@ static Primitive::Type GetType(uint64_t data, bool is_op_size) { } } -static Intrinsics GetIntrinsic(InlineMethod method, InstructionSet instruction_set) { - if (instruction_set == kMips) { - return Intrinsics::kNone; - } +static Intrinsics GetIntrinsic(InlineMethod method) { switch (method.opcode) { // Floating-point conversions. case kIntrinsicDoubleCvt: @@ -431,7 +428,7 @@ void IntrinsicsRecognizer::Run() { DexFileMethodInliner* inliner = driver_->GetMethodInlinerMap()->GetMethodInliner(&dex_file); DCHECK(inliner != nullptr); if (inliner->IsIntrinsic(invoke->GetDexMethodIndex(), &method)) { - Intrinsics intrinsic = GetIntrinsic(method, graph_->GetInstructionSet()); + Intrinsics intrinsic = GetIntrinsic(method); if (intrinsic != Intrinsics::kNone) { if (!CheckInvokeType(intrinsic, invoke, dex_file)) { diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc index 5efcf4eadf..a94e3a8c23 100644 --- a/compiler/optimizing/intrinsics_mips.cc +++ b/compiler/optimizing/intrinsics_mips.cc @@ -138,6 +138,108 @@ bool IntrinsicLocationsBuilderMIPS::TryDispatch(HInvoke* invoke) { #define __ assembler-> +// boolean java.lang.String.equals(Object anObject) +void IntrinsicLocationsBuilderMIPS::VisitStringEquals(HInvoke* invoke) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister()); + + // Temporary registers to store lengths of strings and for calculations. + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); +} + +void IntrinsicCodeGeneratorMIPS::VisitStringEquals(HInvoke* invoke) { + MipsAssembler* assembler = GetAssembler(); + LocationSummary* locations = invoke->GetLocations(); + + Register str = locations->InAt(0).AsRegister<Register>(); + Register arg = locations->InAt(1).AsRegister<Register>(); + Register out = locations->Out().AsRegister<Register>(); + + Register temp1 = locations->GetTemp(0).AsRegister<Register>(); + Register temp2 = locations->GetTemp(1).AsRegister<Register>(); + Register temp3 = locations->GetTemp(2).AsRegister<Register>(); + + MipsLabel loop; + MipsLabel end; + MipsLabel return_true; + MipsLabel return_false; + + // Get offsets of count, value, and class fields within a string object. + const uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); + const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value(); + const uint32_t class_offset = mirror::Object::ClassOffset().Uint32Value(); + + // Note that the null check must have been done earlier. + DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0))); + + // If the register containing the pointer to "this", and the register + // containing the pointer to "anObject" are the same register then + // "this", and "anObject" are the same object and we can + // short-circuit the logic to a true result. + if (str == arg) { + __ LoadConst32(out, 1); + return; + } + + // Check if input is null, return false if it is. + __ Beqz(arg, &return_false); + + // Reference equality check, return true if same reference. + __ Beq(str, arg, &return_true); + + // Instanceof check for the argument by comparing class fields. + // All string objects must have the same type since String cannot be subclassed. + // Receiver must be a string object, so its class field is equal to all strings' class fields. + // If the argument is a string object, its class field must be equal to receiver's class field. + __ Lw(temp1, str, class_offset); + __ Lw(temp2, arg, class_offset); + __ Bne(temp1, temp2, &return_false); + + // Load lengths of this and argument strings. + __ Lw(temp1, str, count_offset); + __ Lw(temp2, arg, count_offset); + // Check if lengths are equal, return false if they're not. + __ Bne(temp1, temp2, &return_false); + // Return true if both strings are empty. + __ Beqz(temp1, &return_true); + + // Don't overwrite input registers + __ Move(TMP, str); + __ Move(temp3, arg); + + // Assertions that must hold in order to compare strings 2 characters at a time. + DCHECK_ALIGNED(value_offset, 4); + static_assert(IsAligned<4>(kObjectAlignment), "String of odd length is not zero padded"); + + // Loop to compare strings 2 characters at a time starting at the beginning of the string. + // Ok to do this because strings are zero-padded. + __ Bind(&loop); + __ Lw(out, TMP, value_offset); + __ Lw(temp2, temp3, value_offset); + __ Bne(out, temp2, &return_false); + __ Addiu(TMP, TMP, 4); + __ Addiu(temp3, temp3, 4); + __ Addiu(temp1, temp1, -2); + __ Bgtz(temp1, &loop); + + // Return true and exit the function. + // If loop does not result in returning false, we return true. + __ Bind(&return_true); + __ LoadConst32(out, 1); + __ B(&end); + + // Return false and exit the function. + __ Bind(&return_false); + __ LoadConst32(out, 0); + __ Bind(&end); +} + // Unimplemented intrinsics. #define UNIMPLEMENTED_INTRINSIC(Name) \ @@ -204,7 +306,6 @@ UNIMPLEMENTED_INTRINSIC(UnsafeCASLong) UNIMPLEMENTED_INTRINSIC(UnsafeCASObject) UNIMPLEMENTED_INTRINSIC(StringCharAt) UNIMPLEMENTED_INTRINSIC(StringCompareTo) -UNIMPLEMENTED_INTRINSIC(StringEquals) UNIMPLEMENTED_INTRINSIC(StringIndexOf) UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter) UNIMPLEMENTED_INTRINSIC(StringNewStringFromBytes) diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc index 05c7eb02d9..ff843ebb1e 100644 --- a/compiler/optimizing/intrinsics_mips64.cc +++ b/compiler/optimizing/intrinsics_mips64.cc @@ -101,11 +101,10 @@ class IntrinsicSlowPathMIPS64 : public SlowPathCodeMIPS64 { if (invoke_->IsInvokeStaticOrDirect()) { codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), Location::RegisterLocation(A0)); - codegen->RecordPcInfo(invoke_, invoke_->GetDexPc(), this); } else { - UNIMPLEMENTED(FATAL) << "Non-direct intrinsic slow-path not yet implemented"; - UNREACHABLE(); + codegen->GenerateVirtualCall(invoke_->AsInvokeVirtual(), Location::RegisterLocation(A0)); } + codegen->RecordPcInfo(invoke_, invoke_->GetDexPc(), this); // Copy the result back to the expected output. Location out = invoke_->GetLocations()->Out(); diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc index 14c65c9aaf..a29f3ef1d1 100644 --- a/compiler/optimizing/intrinsics_x86_64.cc +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -1605,7 +1605,7 @@ static void CreateIntIntToVoidLocations(ArenaAllocator* arena, HInvoke* invoke) LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RegisterOrInt32LongConstant(invoke->InputAt(1))); + locations->SetInAt(1, Location::RegisterOrInt32Constant(invoke->InputAt(1))); } static void GenPoke(LocationSummary* locations, Primitive::Type size, X86_64Assembler* assembler) { diff --git a/compiler/optimizing/locations.cc b/compiler/optimizing/locations.cc index ebdf7a2f65..1ab206f69e 100644 --- a/compiler/optimizing/locations.cc +++ b/compiler/optimizing/locations.cc @@ -17,6 +17,7 @@ #include "locations.h" #include "nodes.h" +#include "code_generator.h" namespace art { @@ -47,18 +48,26 @@ Location Location::RegisterOrConstant(HInstruction* instruction) { : Location::RequiresRegister(); } -Location Location::RegisterOrInt32LongConstant(HInstruction* instruction) { - if (instruction->IsIntConstant() || instruction->IsNullConstant()) { - return Location::ConstantLocation(instruction->AsConstant()); - } else if (instruction->IsLongConstant()) { - // Does the long constant fit in a 32 bit int? - int64_t value = instruction->AsLongConstant()->GetValue(); - return IsInt<32>(value) - ? Location::ConstantLocation(instruction->AsConstant()) - : Location::RequiresRegister(); - } else { - return Location::RequiresRegister(); +Location Location::RegisterOrInt32Constant(HInstruction* instruction) { + HConstant* constant = instruction->AsConstant(); + if (constant != nullptr) { + int64_t value = CodeGenerator::GetInt64ValueOf(constant); + if (IsInt<32>(value)) { + return Location::ConstantLocation(constant); + } } + return Location::RequiresRegister(); +} + +Location Location::FpuRegisterOrInt32Constant(HInstruction* instruction) { + HConstant* constant = instruction->AsConstant(); + if (constant != nullptr) { + int64_t value = CodeGenerator::GetInt64ValueOf(constant); + if (IsInt<32>(value)) { + return Location::ConstantLocation(constant); + } + } + return Location::RequiresFpuRegister(); } Location Location::ByteRegisterOrConstant(int reg, HInstruction* instruction) { @@ -67,6 +76,12 @@ Location Location::ByteRegisterOrConstant(int reg, HInstruction* instruction) { : Location::RegisterLocation(reg); } +Location Location::FpuRegisterOrConstant(HInstruction* instruction) { + return instruction->IsConstant() + ? Location::ConstantLocation(instruction->AsConstant()) + : Location::RequiresFpuRegister(); +} + std::ostream& operator<<(std::ostream& os, const Location& location) { os << location.DebugString(); if (location.IsRegister() || location.IsFpuRegister()) { diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h index d014379bca..1181007666 100644 --- a/compiler/optimizing/locations.h +++ b/compiler/optimizing/locations.h @@ -354,8 +354,10 @@ class Location : public ValueObject { } static Location RegisterOrConstant(HInstruction* instruction); - static Location RegisterOrInt32LongConstant(HInstruction* instruction); + static Location RegisterOrInt32Constant(HInstruction* instruction); static Location ByteRegisterOrConstant(int reg, HInstruction* instruction); + static Location FpuRegisterOrConstant(HInstruction* instruction); + static Location FpuRegisterOrInt32Constant(HInstruction* instruction); // The location of the first input to the instruction will be // used to replace this unallocated location. diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index 68fb0acf7f..2d3dcf746b 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -335,14 +335,24 @@ void HGraph::SimplifyCatchBlocks() { // instructions into `normal_block` and links the two blocks with a Goto. // Afterwards, incoming normal-flow edges are re-linked to `normal_block`, // leaving `catch_block` with the exceptional edges only. + // // Note that catch blocks with normal-flow predecessors cannot begin with - // a MOVE_EXCEPTION instruction, as guaranteed by the verifier. - DCHECK(!catch_block->GetFirstInstruction()->IsLoadException()); - HBasicBlock* normal_block = catch_block->SplitBefore(catch_block->GetFirstInstruction()); - for (size_t j = 0; j < catch_block->GetPredecessors().size(); ++j) { - if (!CheckIfPredecessorAtIsExceptional(*catch_block, j)) { - catch_block->GetPredecessors()[j]->ReplaceSuccessor(catch_block, normal_block); - --j; + // a move-exception instruction, as guaranteed by the verifier. However, + // trivially dead predecessors are ignored by the verifier and such code + // has not been removed at this stage. We therefore ignore the assumption + // and rely on GraphChecker to enforce it after initial DCE is run (b/25492628). + HBasicBlock* normal_block = catch_block->SplitCatchBlockAfterMoveException(); + if (normal_block == nullptr) { + // Catch block is either empty or only contains a move-exception. It must + // therefore be dead and will be removed during initial DCE. Do nothing. + DCHECK(!catch_block->EndsWithControlFlowInstruction()); + } else { + // Catch block was split. Re-link normal-flow edges to the new block. + for (size_t j = 0; j < catch_block->GetPredecessors().size(); ++j) { + if (!CheckIfPredecessorAtIsExceptional(*catch_block, j)) { + catch_block->GetPredecessors()[j]->ReplaceSuccessor(catch_block, normal_block); + --j; + } } } } @@ -1163,7 +1173,7 @@ void HInstruction::MoveBefore(HInstruction* cursor) { } HBasicBlock* HBasicBlock::SplitBefore(HInstruction* cursor) { - DCHECK(!graph_->IsInSsaForm()) << "Support for SSA form not implemented"; + DCHECK(!graph_->IsInSsaForm()) << "Support for SSA form not implemented."; DCHECK_EQ(cursor->GetBlock(), this); HBasicBlock* new_block = new (GetGraph()->GetArena()) HBasicBlock(GetGraph(), @@ -1193,7 +1203,7 @@ HBasicBlock* HBasicBlock::SplitBefore(HInstruction* cursor) { } HBasicBlock* HBasicBlock::CreateImmediateDominator() { - DCHECK(!graph_->IsInSsaForm()) << "Support for SSA form not implemented"; + DCHECK(!graph_->IsInSsaForm()) << "Support for SSA form not implemented."; DCHECK(!IsCatchBlock()) << "Support for updating try/catch information not implemented."; HBasicBlock* new_block = new (GetGraph()->GetArena()) HBasicBlock(GetGraph(), GetDexPc()); @@ -1209,6 +1219,34 @@ HBasicBlock* HBasicBlock::CreateImmediateDominator() { return new_block; } +HBasicBlock* HBasicBlock::SplitCatchBlockAfterMoveException() { + DCHECK(!graph_->IsInSsaForm()) << "Support for SSA form not implemented."; + DCHECK(IsCatchBlock()) << "This method is intended for catch blocks only."; + + HInstruction* first_insn = GetFirstInstruction(); + HInstruction* split_before = nullptr; + + if (first_insn != nullptr && first_insn->IsLoadException()) { + // Catch block starts with a LoadException. Split the block after + // the StoreLocal and ClearException which must come after the load. + DCHECK(first_insn->GetNext()->IsStoreLocal()); + DCHECK(first_insn->GetNext()->GetNext()->IsClearException()); + split_before = first_insn->GetNext()->GetNext()->GetNext(); + } else { + // Catch block does not load the exception. Split at the beginning + // to create an empty catch block. + split_before = first_insn; + } + + if (split_before == nullptr) { + // Catch block has no instructions after the split point (must be dead). + // Do not split it but rather signal error by returning nullptr. + return nullptr; + } else { + return SplitBefore(split_before); + } +} + HBasicBlock* HBasicBlock::SplitAfter(HInstruction* cursor) { DCHECK(!cursor->IsControlFlow()); DCHECK_NE(instructions_.last_instruction_, cursor); @@ -1940,6 +1978,16 @@ bool HInvokeStaticOrDirect::NeedsDexCacheOfDeclaringClass() const { return !opt.GetDoesNotNeedDexCache(); } +void HInvokeStaticOrDirect::RemoveInputAt(size_t index) { + RemoveAsUserOfInput(index); + inputs_.erase(inputs_.begin() + index); + // Update indexes in use nodes of inputs that have been pulled forward by the erase(). + for (size_t i = index, e = InputCount(); i < e; ++i) { + DCHECK_EQ(InputRecordAt(i).GetUseNode()->GetIndex(), i + 1u); + InputRecordAt(i).GetUseNode()->SetIndex(i); + } +} + void HInstruction::RemoveEnvironmentUsers() { for (HUseIterator<HEnvironment*> use_it(GetEnvUses()); !use_it.Done(); use_it.Advance()) { HUseListNode<HEnvironment*>* user_node = use_it.Current(); diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 0f2c1cffee..4e8124894a 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -837,6 +837,15 @@ class HBasicBlock : public ArenaObject<kArenaAllocBasicBlock> { // blocks are consistent (for example ending with a control flow instruction). HBasicBlock* SplitAfter(HInstruction* cursor); + // Split catch block into two blocks after the original move-exception bytecode + // instruction, or at the beginning if not present. Returns the newly created, + // latter block, or nullptr if such block could not be created (must be dead + // in that case). Note that this method just updates raw block information, + // like predecessors, successors, dominators, and instruction list. It does not + // update the graph, reverse post order, loop information, nor make sure the + // blocks are consistent (for example ending with a control flow instruction). + HBasicBlock* SplitCatchBlockAfterMoveException(); + // Merge `other` at the end of `this`. Successors and dominated blocks of // `other` are changed to be successors and dominated blocks of `this`. Note // that this method does not update the graph, reverse post order, loop @@ -3399,11 +3408,12 @@ class HInvokeStaticOrDirect : public HInvoke { ClinitCheckRequirement clinit_check_requirement) : HInvoke(arena, number_of_arguments, - // There is one extra argument for the HCurrentMethod node, and - // potentially one other if the clinit check is explicit, and one other - // if the method is a string factory. - 1u + (clinit_check_requirement == ClinitCheckRequirement::kExplicit ? 1u : 0u) - + (dispatch_info.method_load_kind == MethodLoadKind::kStringInit ? 1u : 0u), + // There is potentially one extra argument for the HCurrentMethod node, and + // potentially one other if the clinit check is explicit, and potentially + // one other if the method is a string factory. + (NeedsCurrentMethodInput(dispatch_info.method_load_kind) ? 1u : 0u) + + (clinit_check_requirement == ClinitCheckRequirement::kExplicit ? 1u : 0u) + + (dispatch_info.method_load_kind == MethodLoadKind::kStringInit ? 1u : 0u), return_type, dex_pc, method_index, @@ -3411,12 +3421,25 @@ class HInvokeStaticOrDirect : public HInvoke { invoke_type_(invoke_type), clinit_check_requirement_(clinit_check_requirement), target_method_(target_method), - dispatch_info_(dispatch_info) {} + dispatch_info_(dispatch_info) { } void SetDispatchInfo(const DispatchInfo& dispatch_info) { + bool had_current_method_input = HasCurrentMethodInput(); + bool needs_current_method_input = NeedsCurrentMethodInput(dispatch_info.method_load_kind); + + // Using the current method is the default and once we find a better + // method load kind, we should not go back to using the current method. + DCHECK(had_current_method_input || !needs_current_method_input); + + if (had_current_method_input && !needs_current_method_input) { + DCHECK_EQ(InputAt(GetCurrentMethodInputIndex()), GetBlock()->GetGraph()->GetCurrentMethod()); + RemoveInputAt(GetCurrentMethodInputIndex()); + } dispatch_info_ = dispatch_info; } + void RemoveInputAt(size_t index); + bool CanDoImplicitNullCheckOn(HInstruction* obj ATTRIBUTE_UNUSED) const OVERRIDE { // We access the method via the dex cache so we can't do an implicit null check. // TODO: for intrinsics we can generate implicit null checks. @@ -3438,6 +3461,17 @@ class HInvokeStaticOrDirect : public HInvoke { bool HasPcRelDexCache() const { return GetMethodLoadKind() == MethodLoadKind::kDexCachePcRelative; } + bool HasCurrentMethodInput() const { + // This function can be called only after the invoke has been fully initialized by the builder. + if (NeedsCurrentMethodInput(GetMethodLoadKind())) { + DCHECK(InputAt(GetCurrentMethodInputIndex())->IsCurrentMethod()); + return true; + } else { + DCHECK(InputCount() == GetCurrentMethodInputIndex() || + !InputAt(GetCurrentMethodInputIndex())->IsCurrentMethod()); + return false; + } + } bool HasDirectCodePtr() const { return GetCodePtrLocation() == CodePtrLocation::kCallDirect; } MethodReference GetTargetMethod() const { return target_method_; } @@ -3486,8 +3520,8 @@ class HInvokeStaticOrDirect : public HInvoke { bool IsStringFactoryFor(HFakeString* str) const { if (!IsStringInit()) return false; - // +1 for the current method. - if (InputCount() == (number_of_arguments_ + 1)) return false; + DCHECK(!HasCurrentMethodInput()); + if (InputCount() == (number_of_arguments_)) return false; return InputAt(InputCount() - 1)->AsFakeString() == str; } @@ -3513,6 +3547,11 @@ class HInvokeStaticOrDirect : public HInvoke { return IsStatic() && (clinit_check_requirement_ == ClinitCheckRequirement::kImplicit); } + // Does this method load kind need the current method as an input? + static bool NeedsCurrentMethodInput(MethodLoadKind kind) { + return kind == MethodLoadKind::kRecursive || kind == MethodLoadKind::kDexCacheViaMethod; + } + DECLARE_INSTRUCTION(InvokeStaticOrDirect); protected: diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index 8cb2cfc816..7e3c5e602e 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -56,6 +56,7 @@ #include "inliner.h" #include "instruction_simplifier.h" #include "intrinsics.h" +#include "jit/jit_code_cache.h" #include "licm.h" #include "jni/quick/jni_compiler.h" #include "load_store_elimination.h" @@ -258,15 +259,6 @@ class OptimizingCompiler FINAL : public Compiler { const DexFile& dex_file, Handle<mirror::DexCache> dex_cache) const OVERRIDE; - CompiledMethod* TryCompile(const DexFile::CodeItem* code_item, - uint32_t access_flags, - InvokeType invoke_type, - uint16_t class_def_idx, - uint32_t method_idx, - jobject class_loader, - const DexFile& dex_file, - Handle<mirror::DexCache> dex_cache) const; - CompiledMethod* JniCompile(uint32_t access_flags, uint32_t method_idx, const DexFile& dex_file) const OVERRIDE { @@ -291,23 +283,45 @@ class OptimizingCompiler FINAL : public Compiler { } } + bool JitCompile(Thread* self, jit::JitCodeCache* code_cache, ArtMethod* method) + OVERRIDE + SHARED_REQUIRES(Locks::mutator_lock_); + private: // Whether we should run any optimization or register allocation. If false, will // just run the code generation after the graph was built. const bool run_optimizations_; - // Optimize and compile `graph`. - CompiledMethod* CompileOptimized(HGraph* graph, - CodeGenerator* codegen, - CompilerDriver* driver, - const DexCompilationUnit& dex_compilation_unit, - PassObserver* pass_observer) const; - - // Just compile without doing optimizations. - CompiledMethod* CompileBaseline(CodeGenerator* codegen, - CompilerDriver* driver, - const DexCompilationUnit& dex_compilation_unit, - PassObserver* pass_observer) const; + // Create a 'CompiledMethod' for an optimized graph. + CompiledMethod* EmitOptimized(ArenaAllocator* arena, + CodeVectorAllocator* code_allocator, + CodeGenerator* codegen, + CompilerDriver* driver) const; + + // Create a 'CompiledMethod' for a non-optimized graph. + CompiledMethod* EmitBaseline(ArenaAllocator* arena, + CodeVectorAllocator* code_allocator, + CodeGenerator* codegen, + CompilerDriver* driver) const; + + // Try compiling a method and return the code generator used for + // compiling it. + // This method: + // 1) Builds the graph. Returns null if it failed to build it. + // 2) If `run_optimizations_` is set: + // 2.1) Transform the graph to SSA. Returns null if it failed. + // 2.2) Run optimizations on the graph, including register allocator. + // 3) Generate code with the `code_allocator` provided. + CodeGenerator* TryCompile(ArenaAllocator* arena, + CodeVectorAllocator* code_allocator, + const DexFile::CodeItem* code_item, + uint32_t access_flags, + InvokeType invoke_type, + uint16_t class_def_idx, + uint32_t method_idx, + jobject class_loader, + const DexFile& dex_file, + Handle<mirror::DexCache> dex_cache) const; std::unique_ptr<OptimizingCompilerStats> compilation_stats_; @@ -446,13 +460,32 @@ static void RunArchOptimizations(InstructionSet instruction_set, } } +NO_INLINE // Avoid increasing caller's frame size by large stack-allocated objects. +static void AllocateRegisters(HGraph* graph, + CodeGenerator* codegen, + PassObserver* pass_observer) { + PrepareForRegisterAllocation(graph).Run(); + SsaLivenessAnalysis liveness(graph, codegen); + { + PassScope scope(SsaLivenessAnalysis::kLivenessPassName, pass_observer); + liveness.Analyze(); + } + { + PassScope scope(RegisterAllocator::kRegisterAllocatorPassName, pass_observer); + RegisterAllocator(graph->GetArena(), codegen, liveness).AllocateRegisters(); + } +} + static void RunOptimizations(HGraph* graph, CodeGenerator* codegen, CompilerDriver* driver, OptimizingCompilerStats* stats, const DexCompilationUnit& dex_compilation_unit, - PassObserver* pass_observer, - StackHandleScopeCollection* handles) { + PassObserver* pass_observer) { + ScopedObjectAccess soa(Thread::Current()); + StackHandleScopeCollection handles(soa.Self()); + ScopedThreadSuspension sts(soa.Self(), kNative); + ArenaAllocator* arena = graph->GetArena(); HDeadCodeElimination* dce1 = new (arena) HDeadCodeElimination( graph, stats, HDeadCodeElimination::kInitialDeadCodeEliminationPassName); @@ -469,7 +502,7 @@ static void RunOptimizations(HGraph* graph, HInductionVarAnalysis* induction = new (arena) HInductionVarAnalysis(graph); BoundsCheckElimination* bce = new (arena) BoundsCheckElimination(graph, induction); ReferenceTypePropagation* type_propagation = - new (arena) ReferenceTypePropagation(graph, handles); + new (arena) ReferenceTypePropagation(graph, &handles); HSharpening* sharpening = new (arena) HSharpening(graph, codegen, dex_compilation_unit, driver); InstructionSimplifier* simplify2 = new (arena) InstructionSimplifier( graph, stats, "instruction_simplifier_after_types"); @@ -492,7 +525,7 @@ static void RunOptimizations(HGraph* graph, RunOptimizations(optimizations1, arraysize(optimizations1), pass_observer); - MaybeRunInliner(graph, codegen, driver, stats, dex_compilation_unit, pass_observer, handles); + MaybeRunInliner(graph, codegen, driver, stats, dex_compilation_unit, pass_observer, &handles); // TODO: Update passes incompatible with try/catch so we have the same // pipeline for all methods. @@ -532,6 +565,7 @@ static void RunOptimizations(HGraph* graph, } RunArchOptimizations(driver->GetInstructionSet(), graph, stats, pass_observer); + AllocateRegisters(graph, codegen, pass_observer); } // The stack map we generate must be 4-byte aligned on ARM. Since existing @@ -545,22 +579,6 @@ static ArrayRef<const uint8_t> AlignVectorSize(ArenaVector<uint8_t>& vector) { return ArrayRef<const uint8_t>(vector); } -NO_INLINE // Avoid increasing caller's frame size by large stack-allocated objects. -static void AllocateRegisters(HGraph* graph, - CodeGenerator* codegen, - PassObserver* pass_observer) { - PrepareForRegisterAllocation(graph).Run(); - SsaLivenessAnalysis liveness(graph, codegen); - { - PassScope scope(SsaLivenessAnalysis::kLivenessPassName, pass_observer); - liveness.Analyze(); - } - { - PassScope scope(RegisterAllocator::kRegisterAllocatorPassName, pass_observer); - RegisterAllocator(graph->GetArena(), codegen, liveness).AllocateRegisters(); - } -} - static ArenaVector<LinkerPatch> EmitAndSortLinkerPatches(CodeGenerator* codegen) { ArenaVector<LinkerPatch> linker_patches(codegen->GetGraph()->GetArena()->Adapter()); codegen->EmitLinkerPatches(&linker_patches); @@ -574,74 +592,42 @@ static ArenaVector<LinkerPatch> EmitAndSortLinkerPatches(CodeGenerator* codegen) return linker_patches; } -CompiledMethod* OptimizingCompiler::CompileOptimized(HGraph* graph, - CodeGenerator* codegen, - CompilerDriver* compiler_driver, - const DexCompilationUnit& dex_compilation_unit, - PassObserver* pass_observer) const { - ScopedObjectAccess soa(Thread::Current()); - StackHandleScopeCollection handles(soa.Self()); - soa.Self()->TransitionFromRunnableToSuspended(kNative); - RunOptimizations(graph, - codegen, - compiler_driver, - compilation_stats_.get(), - dex_compilation_unit, - pass_observer, - &handles); - - AllocateRegisters(graph, codegen, pass_observer); - - ArenaAllocator* arena = graph->GetArena(); - CodeVectorAllocator allocator(arena); - DefaultSrcMap src_mapping_table; - codegen->SetSrcMap(compiler_driver->GetCompilerOptions().GetGenerateDebugInfo() - ? &src_mapping_table - : nullptr); - codegen->CompileOptimized(&allocator); - +CompiledMethod* OptimizingCompiler::EmitOptimized(ArenaAllocator* arena, + CodeVectorAllocator* code_allocator, + CodeGenerator* codegen, + CompilerDriver* compiler_driver) const { ArenaVector<LinkerPatch> linker_patches = EmitAndSortLinkerPatches(codegen); - ArenaVector<uint8_t> stack_map(arena->Adapter(kArenaAllocStackMaps)); - codegen->BuildStackMaps(&stack_map); + stack_map.resize(codegen->ComputeStackMapsSize()); + codegen->BuildStackMaps(MemoryRegion(stack_map.data(), stack_map.size())); MaybeRecordStat(MethodCompilationStat::kCompiledOptimized); CompiledMethod* compiled_method = CompiledMethod::SwapAllocCompiledMethod( compiler_driver, codegen->GetInstructionSet(), - ArrayRef<const uint8_t>(allocator.GetMemory()), + ArrayRef<const uint8_t>(code_allocator->GetMemory()), // Follow Quick's behavior and set the frame size to zero if it is // considered "empty" (see the definition of // art::CodeGenerator::HasEmptyFrame). codegen->HasEmptyFrame() ? 0 : codegen->GetFrameSize(), codegen->GetCoreSpillMask(), codegen->GetFpuSpillMask(), - ArrayRef<const SrcMapElem>(src_mapping_table), + ArrayRef<const SrcMapElem>(codegen->GetSrcMappingTable()), ArrayRef<const uint8_t>(), // mapping_table. ArrayRef<const uint8_t>(stack_map), ArrayRef<const uint8_t>(), // native_gc_map. ArrayRef<const uint8_t>(*codegen->GetAssembler()->cfi().data()), ArrayRef<const LinkerPatch>(linker_patches)); - pass_observer->DumpDisassembly(); - soa.Self()->TransitionFromSuspendedToRunnable(); return compiled_method; } -CompiledMethod* OptimizingCompiler::CompileBaseline( +CompiledMethod* OptimizingCompiler::EmitBaseline( + ArenaAllocator* arena, + CodeVectorAllocator* code_allocator, CodeGenerator* codegen, - CompilerDriver* compiler_driver, - const DexCompilationUnit& dex_compilation_unit, - PassObserver* pass_observer) const { - ArenaAllocator* arena = codegen->GetGraph()->GetArena(); - CodeVectorAllocator allocator(arena); - DefaultSrcMap src_mapping_table; - codegen->SetSrcMap(compiler_driver->GetCompilerOptions().GetGenerateDebugInfo() - ? &src_mapping_table - : nullptr); - codegen->CompileBaseline(&allocator); - + CompilerDriver* compiler_driver) const { ArenaVector<LinkerPatch> linker_patches = EmitAndSortLinkerPatches(codegen); ArenaVector<uint8_t> mapping_table(arena->Adapter(kArenaAllocBaselineMaps)); @@ -649,37 +635,38 @@ CompiledMethod* OptimizingCompiler::CompileBaseline( ArenaVector<uint8_t> vmap_table(arena->Adapter(kArenaAllocBaselineMaps)); codegen->BuildVMapTable(&vmap_table); ArenaVector<uint8_t> gc_map(arena->Adapter(kArenaAllocBaselineMaps)); - codegen->BuildNativeGCMap(&gc_map, dex_compilation_unit); + codegen->BuildNativeGCMap(&gc_map, *compiler_driver); MaybeRecordStat(MethodCompilationStat::kCompiledBaseline); CompiledMethod* compiled_method = CompiledMethod::SwapAllocCompiledMethod( compiler_driver, codegen->GetInstructionSet(), - ArrayRef<const uint8_t>(allocator.GetMemory()), + ArrayRef<const uint8_t>(code_allocator->GetMemory()), // Follow Quick's behavior and set the frame size to zero if it is // considered "empty" (see the definition of // art::CodeGenerator::HasEmptyFrame). codegen->HasEmptyFrame() ? 0 : codegen->GetFrameSize(), codegen->GetCoreSpillMask(), codegen->GetFpuSpillMask(), - ArrayRef<const SrcMapElem>(src_mapping_table), + ArrayRef<const SrcMapElem>(codegen->GetSrcMappingTable()), AlignVectorSize(mapping_table), AlignVectorSize(vmap_table), AlignVectorSize(gc_map), ArrayRef<const uint8_t>(*codegen->GetAssembler()->cfi().data()), ArrayRef<const LinkerPatch>(linker_patches)); - pass_observer->DumpDisassembly(); return compiled_method; } -CompiledMethod* OptimizingCompiler::TryCompile(const DexFile::CodeItem* code_item, - uint32_t access_flags, - InvokeType invoke_type, - uint16_t class_def_idx, - uint32_t method_idx, - jobject class_loader, - const DexFile& dex_file, - Handle<mirror::DexCache> dex_cache) const { +CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* arena, + CodeVectorAllocator* code_allocator, + const DexFile::CodeItem* code_item, + uint32_t access_flags, + InvokeType invoke_type, + uint16_t class_def_idx, + uint32_t method_idx, + jobject class_loader, + const DexFile& dex_file, + Handle<mirror::DexCache> dex_cache) const { std::string method_name = PrettyMethod(method_idx, dex_file); MaybeRecordStat(MethodCompilationStat::kAttemptCompilation); CompilerDriver* compiler_driver = GetCompilerDriver(); @@ -721,13 +708,10 @@ CompiledMethod* OptimizingCompiler::TryCompile(const DexFile::CodeItem* code_ite && compiler_driver->RequiresConstructorBarrier(Thread::Current(), dex_compilation_unit.GetDexFile(), dex_compilation_unit.GetClassDefIndex()); - ArenaAllocator arena(Runtime::Current()->GetArenaPool()); - HGraph* graph = new (&arena) HGraph( - &arena, dex_file, method_idx, requires_barrier, compiler_driver->GetInstructionSet(), + HGraph* graph = new (arena) HGraph( + arena, dex_file, method_idx, requires_barrier, compiler_driver->GetInstructionSet(), kInvalidInvokeType, compiler_driver->GetCompilerOptions().GetDebuggable()); - bool shouldOptimize = method_name.find("$opt$reg$") != std::string::npos && run_optimizations_; - std::unique_ptr<CodeGenerator> codegen( CodeGenerator::Create(graph, instruction_set, @@ -779,16 +763,8 @@ CompiledMethod* OptimizingCompiler::TryCompile(const DexFile::CodeItem* code_ite } } - bool can_allocate_registers = RegisterAllocator::CanAllocateRegistersFor(*graph, instruction_set); - - // `run_optimizations_` is set explicitly (either through a compiler filter - // or the debuggable flag). If it is set, we can run baseline. Otherwise, we fall back - // to Quick. - bool can_use_baseline = !run_optimizations_ && builder.CanUseBaselineForStringInit(); - CompiledMethod* compiled_method = nullptr; - if (run_optimizations_ && can_allocate_registers) { - VLOG(compiler) << "Optimizing " << method_name; - + VLOG(compiler) << "Optimizing " << method_name; + if (run_optimizations_) { { PassScope scope(SsaBuilder::kSsaBuilderPassName, &pass_observer); if (!graph->TryBuildingSsa()) { @@ -800,37 +776,26 @@ CompiledMethod* OptimizingCompiler::TryCompile(const DexFile::CodeItem* code_ite } } - compiled_method = CompileOptimized(graph, - codegen.get(), - compiler_driver, - dex_compilation_unit, - &pass_observer); - } else if (shouldOptimize && can_allocate_registers) { - LOG(FATAL) << "Could not allocate registers in optimizing compiler"; - UNREACHABLE(); - } else if (can_use_baseline) { - VLOG(compiler) << "Compile baseline " << method_name; - - if (!run_optimizations_) { - MaybeRecordStat(MethodCompilationStat::kNotOptimizedDisabled); - } else if (!can_allocate_registers) { - MaybeRecordStat(MethodCompilationStat::kNotOptimizedRegisterAllocator); - } - - compiled_method = CompileBaseline(codegen.get(), - compiler_driver, - dex_compilation_unit, - &pass_observer); + RunOptimizations(graph, + codegen.get(), + compiler_driver, + compilation_stats_.get(), + dex_compilation_unit, + &pass_observer); + codegen->CompileOptimized(code_allocator); + } else { + codegen->CompileBaseline(code_allocator); } + pass_observer.DumpDisassembly(); if (kArenaAllocatorCountAllocations) { - if (arena.BytesAllocated() > 4 * MB) { - MemStats mem_stats(arena.GetMemStats()); + if (arena->BytesAllocated() > 4 * MB) { + MemStats mem_stats(arena->GetMemStats()); LOG(INFO) << PrettyMethod(method_idx, dex_file) << " " << Dumpable<MemStats>(mem_stats); } } - return compiled_method; + return codegen.release(); } static bool CanHandleVerificationFailure(const VerifiedMethod* verified_method) { @@ -852,26 +817,37 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, Handle<mirror::DexCache> dex_cache) const { CompilerDriver* compiler_driver = GetCompilerDriver(); CompiledMethod* method = nullptr; - if (Runtime::Current()->IsAotCompiler()) { - const VerifiedMethod* verified_method = compiler_driver->GetVerifiedMethod(&dex_file, method_idx); - DCHECK(!verified_method->HasRuntimeThrow()); - if (compiler_driver->IsMethodVerifiedWithoutFailures(method_idx, class_def_idx, dex_file) - || CanHandleVerificationFailure(verified_method)) { - method = TryCompile(code_item, access_flags, invoke_type, class_def_idx, - method_idx, jclass_loader, dex_file, dex_cache); - } else { - if (compiler_driver->GetCompilerOptions().VerifyAtRuntime()) { - MaybeRecordStat(MethodCompilationStat::kNotCompiledVerifyAtRuntime); + DCHECK(Runtime::Current()->IsAotCompiler()); + const VerifiedMethod* verified_method = compiler_driver->GetVerifiedMethod(&dex_file, method_idx); + DCHECK(!verified_method->HasRuntimeThrow()); + if (compiler_driver->IsMethodVerifiedWithoutFailures(method_idx, class_def_idx, dex_file) + || CanHandleVerificationFailure(verified_method)) { + ArenaAllocator arena(Runtime::Current()->GetArenaPool()); + CodeVectorAllocator code_allocator(&arena); + std::unique_ptr<CodeGenerator> codegen( + TryCompile(&arena, + &code_allocator, + code_item, + access_flags, + invoke_type, + class_def_idx, + method_idx, + jclass_loader, + dex_file, + dex_cache)); + if (codegen.get() != nullptr) { + if (run_optimizations_) { + method = EmitOptimized(&arena, &code_allocator, codegen.get(), compiler_driver); } else { - MaybeRecordStat(MethodCompilationStat::kNotCompiledClassNotVerified); + method = EmitBaseline(&arena, &code_allocator, codegen.get(), compiler_driver); } } } else { - // This is for the JIT compiler, which has already ensured the class is verified. - // We can go straight to compiling. - DCHECK(Runtime::Current()->UseJit()); - method = TryCompile(code_item, access_flags, invoke_type, class_def_idx, - method_idx, jclass_loader, dex_file, dex_cache); + if (compiler_driver->GetCompilerOptions().VerifyAtRuntime()) { + MaybeRecordStat(MethodCompilationStat::kNotCompiledVerifyAtRuntime); + } else { + MaybeRecordStat(MethodCompilationStat::kNotCompiledClassNotVerified); + } } if (kIsDebugBuild && @@ -896,4 +872,70 @@ bool IsCompilingWithCoreImage() { return EndsWith(image, "core.art") || EndsWith(image, "core-optimizing.art"); } +bool OptimizingCompiler::JitCompile(Thread* self, + jit::JitCodeCache* code_cache, + ArtMethod* method) { + StackHandleScope<2> hs(self); + Handle<mirror::ClassLoader> class_loader(hs.NewHandle( + method->GetDeclaringClass()->GetClassLoader())); + Handle<mirror::DexCache> dex_cache(hs.NewHandle(method->GetDexCache())); + + jobject jclass_loader = class_loader.ToJObject(); + const DexFile* dex_file = method->GetDexFile(); + const uint16_t class_def_idx = method->GetClassDefIndex(); + const DexFile::CodeItem* code_item = dex_file->GetCodeItem(method->GetCodeItemOffset()); + const uint32_t method_idx = method->GetDexMethodIndex(); + const uint32_t access_flags = method->GetAccessFlags(); + const InvokeType invoke_type = method->GetInvokeType(); + + ArenaAllocator arena(Runtime::Current()->GetArenaPool()); + CodeVectorAllocator code_allocator(&arena); + std::unique_ptr<CodeGenerator> codegen; + { + // Go to native so that we don't block GC during compilation. + ScopedThreadSuspension sts(self, kNative); + + DCHECK(run_optimizations_); + codegen.reset( + TryCompile(&arena, + &code_allocator, + code_item, + access_flags, + invoke_type, + class_def_idx, + method_idx, + jclass_loader, + *dex_file, + dex_cache)); + if (codegen.get() == nullptr) { + return false; + } + } + + size_t stack_map_size = codegen->ComputeStackMapsSize(); + uint8_t* stack_map_data = code_cache->ReserveData(self, stack_map_size); + if (stack_map_data == nullptr) { + return false; + } + codegen->BuildStackMaps(MemoryRegion(stack_map_data, stack_map_size)); + const void* code = code_cache->CommitCode( + self, + method, + nullptr, + stack_map_data, + nullptr, + codegen->HasEmptyFrame() ? 0 : codegen->GetFrameSize(), + codegen->GetCoreSpillMask(), + codegen->GetFpuSpillMask(), + code_allocator.GetMemory().data(), + code_allocator.GetSize()); + + if (code == nullptr) { + code_cache->ClearData(self, stack_map_data); + return false; + } + + return true; +} + } // namespace art diff --git a/compiler/utils/mips64/assembler_mips64.cc b/compiler/utils/mips64/assembler_mips64.cc index 00e8995bff..ba2525e555 100644 --- a/compiler/utils/mips64/assembler_mips64.cc +++ b/compiler/utils/mips64/assembler_mips64.cc @@ -117,14 +117,6 @@ void Mips64Assembler::EmitFI(int opcode, int fmt, FpuRegister ft, uint16_t imm) Emit(encoding); } -void Mips64Assembler::Add(GpuRegister rd, GpuRegister rs, GpuRegister rt) { - EmitR(0, rs, rt, rd, 0, 0x20); -} - -void Mips64Assembler::Addi(GpuRegister rt, GpuRegister rs, uint16_t imm16) { - EmitI(0x8, rs, rt, imm16); -} - void Mips64Assembler::Addu(GpuRegister rd, GpuRegister rs, GpuRegister rt) { EmitR(0, rs, rt, rd, 0, 0x21); } @@ -141,10 +133,6 @@ void Mips64Assembler::Daddiu(GpuRegister rt, GpuRegister rs, uint16_t imm16) { EmitI(0x19, rs, rt, imm16); } -void Mips64Assembler::Sub(GpuRegister rd, GpuRegister rs, GpuRegister rt) { - EmitR(0, rs, rt, rd, 0, 0x22); -} - void Mips64Assembler::Subu(GpuRegister rd, GpuRegister rs, GpuRegister rt) { EmitR(0, rs, rt, rd, 0, 0x23); } @@ -153,50 +141,14 @@ void Mips64Assembler::Dsubu(GpuRegister rd, GpuRegister rs, GpuRegister rt) { EmitR(0, rs, rt, rd, 0, 0x2f); } -void Mips64Assembler::MultR2(GpuRegister rs, GpuRegister rt) { - EmitR(0, rs, rt, static_cast<GpuRegister>(0), 0, 0x18); -} - -void Mips64Assembler::MultuR2(GpuRegister rs, GpuRegister rt) { - EmitR(0, rs, rt, static_cast<GpuRegister>(0), 0, 0x19); -} - -void Mips64Assembler::DivR2(GpuRegister rs, GpuRegister rt) { - EmitR(0, rs, rt, static_cast<GpuRegister>(0), 0, 0x1a); -} - -void Mips64Assembler::DivuR2(GpuRegister rs, GpuRegister rt) { - EmitR(0, rs, rt, static_cast<GpuRegister>(0), 0, 0x1b); -} - -void Mips64Assembler::MulR2(GpuRegister rd, GpuRegister rs, GpuRegister rt) { - EmitR(0x1c, rs, rt, rd, 0, 2); -} - -void Mips64Assembler::DivR2(GpuRegister rd, GpuRegister rs, GpuRegister rt) { - DivR2(rs, rt); - Mflo(rd); -} - -void Mips64Assembler::ModR2(GpuRegister rd, GpuRegister rs, GpuRegister rt) { - DivR2(rs, rt); - Mfhi(rd); -} - -void Mips64Assembler::DivuR2(GpuRegister rd, GpuRegister rs, GpuRegister rt) { - DivuR2(rs, rt); - Mflo(rd); -} - -void Mips64Assembler::ModuR2(GpuRegister rd, GpuRegister rs, GpuRegister rt) { - DivuR2(rs, rt); - Mfhi(rd); -} - void Mips64Assembler::MulR6(GpuRegister rd, GpuRegister rs, GpuRegister rt) { EmitR(0, rs, rt, rd, 2, 0x18); } +void Mips64Assembler::MuhR6(GpuRegister rd, GpuRegister rs, GpuRegister rt) { + EmitR(0, rs, rt, rd, 3, 0x18); +} + void Mips64Assembler::DivR6(GpuRegister rd, GpuRegister rs, GpuRegister rt) { EmitR(0, rs, rt, rd, 2, 0x1a); } @@ -217,6 +169,10 @@ void Mips64Assembler::Dmul(GpuRegister rd, GpuRegister rs, GpuRegister rt) { EmitR(0, rs, rt, rd, 2, 0x1c); } +void Mips64Assembler::Dmuh(GpuRegister rd, GpuRegister rs, GpuRegister rt) { + EmitR(0, rs, rt, rd, 3, 0x1c); +} + void Mips64Assembler::Ddiv(GpuRegister rd, GpuRegister rs, GpuRegister rt) { EmitR(0, rs, rt, rd, 2, 0x1e); } @@ -440,14 +396,6 @@ void Mips64Assembler::Sync(uint32_t stype) { static_cast<GpuRegister>(0), stype & 0x1f, 0xf); } -void Mips64Assembler::Mfhi(GpuRegister rd) { - EmitR(0, static_cast<GpuRegister>(0), static_cast<GpuRegister>(0), rd, 0, 0x10); -} - -void Mips64Assembler::Mflo(GpuRegister rd) { - EmitR(0, static_cast<GpuRegister>(0), static_cast<GpuRegister>(0), rd, 0, 0x12); -} - void Mips64Assembler::Sb(GpuRegister rt, GpuRegister rs, uint16_t imm16) { EmitI(0x28, rs, rt, imm16); } @@ -892,45 +840,58 @@ void Mips64Assembler::LoadConst64(GpuRegister rd, int64_t value) { } else if ((value & 0xFFFF) == 0 && ((value >> 31) & 0x1FFFF) == ((0x20000 - bit31) & 0x1FFFF)) { Lui(rd, value >> 16); Dati(rd, (value >> 48) + bit31); + } else if (IsPowerOfTwo(value + UINT64_C(1))) { + int shift_cnt = 64 - CTZ(value + UINT64_C(1)); + Daddiu(rd, ZERO, -1); + if (shift_cnt < 32) { + Dsrl(rd, rd, shift_cnt); + } else { + Dsrl32(rd, rd, shift_cnt & 31); + } } else { int shift_cnt = CTZ(value); int64_t tmp = value >> shift_cnt; if (IsUint<16>(tmp)) { Ori(rd, ZERO, tmp); - if (shift_cnt < 32) + if (shift_cnt < 32) { Dsll(rd, rd, shift_cnt); - else + } else { Dsll32(rd, rd, shift_cnt & 31); + } } else if (IsInt<16>(tmp)) { Daddiu(rd, ZERO, tmp); - if (shift_cnt < 32) + if (shift_cnt < 32) { Dsll(rd, rd, shift_cnt); - else + } else { Dsll32(rd, rd, shift_cnt & 31); + } } else if (IsInt<32>(tmp)) { // Loads with 3 instructions. Lui(rd, tmp >> 16); Ori(rd, rd, tmp); - if (shift_cnt < 32) + if (shift_cnt < 32) { Dsll(rd, rd, shift_cnt); - else + } else { Dsll32(rd, rd, shift_cnt & 31); + } } else { shift_cnt = 16 + CTZ(value >> 16); tmp = value >> shift_cnt; if (IsUint<16>(tmp)) { Ori(rd, ZERO, tmp); - if (shift_cnt < 32) + if (shift_cnt < 32) { Dsll(rd, rd, shift_cnt); - else + } else { Dsll32(rd, rd, shift_cnt & 31); + } Ori(rd, rd, value); } else if (IsInt<16>(tmp)) { Daddiu(rd, ZERO, tmp); - if (shift_cnt < 32) + if (shift_cnt < 32) { Dsll(rd, rd, shift_cnt); - else + } else { Dsll32(rd, rd, shift_cnt & 31); + } Ori(rd, rd, value); } else { // Loads with 3-4 instructions. @@ -941,10 +902,11 @@ void Mips64Assembler::LoadConst64(GpuRegister rd, int64_t value) { used_lui = true; } if ((tmp2 & 0xFFFF) != 0) { - if (used_lui) + if (used_lui) { Ori(rd, rd, tmp2); - else + } else { Ori(rd, ZERO, tmp2); + } } if (bit31) { tmp2 += UINT64_C(0x100000000); diff --git a/compiler/utils/mips64/assembler_mips64.h b/compiler/utils/mips64/assembler_mips64.h index 33f22d2c2d..42962bca20 100644 --- a/compiler/utils/mips64/assembler_mips64.h +++ b/compiler/utils/mips64/assembler_mips64.h @@ -66,35 +66,25 @@ class Mips64Assembler FINAL : public Assembler { virtual ~Mips64Assembler() {} // Emit Machine Instructions. - void Add(GpuRegister rd, GpuRegister rs, GpuRegister rt); - void Addi(GpuRegister rt, GpuRegister rs, uint16_t imm16); void Addu(GpuRegister rd, GpuRegister rs, GpuRegister rt); void Addiu(GpuRegister rt, GpuRegister rs, uint16_t imm16); void Daddu(GpuRegister rd, GpuRegister rs, GpuRegister rt); // MIPS64 void Daddiu(GpuRegister rt, GpuRegister rs, uint16_t imm16); // MIPS64 - void Sub(GpuRegister rd, GpuRegister rs, GpuRegister rt); void Subu(GpuRegister rd, GpuRegister rs, GpuRegister rt); void Dsubu(GpuRegister rd, GpuRegister rs, GpuRegister rt); // MIPS64 - void MultR2(GpuRegister rs, GpuRegister rt); // R2 - void MultuR2(GpuRegister rs, GpuRegister rt); // R2 - void DivR2(GpuRegister rs, GpuRegister rt); // R2 - void DivuR2(GpuRegister rs, GpuRegister rt); // R2 - void MulR2(GpuRegister rd, GpuRegister rs, GpuRegister rt); // R2 - void DivR2(GpuRegister rd, GpuRegister rs, GpuRegister rt); // R2 - void ModR2(GpuRegister rd, GpuRegister rs, GpuRegister rt); // R2 - void DivuR2(GpuRegister rd, GpuRegister rs, GpuRegister rt); // R2 - void ModuR2(GpuRegister rd, GpuRegister rs, GpuRegister rt); // R2 - void MulR6(GpuRegister rd, GpuRegister rs, GpuRegister rt); // R6 - void DivR6(GpuRegister rd, GpuRegister rs, GpuRegister rt); // R6 - void ModR6(GpuRegister rd, GpuRegister rs, GpuRegister rt); // R6 - void DivuR6(GpuRegister rd, GpuRegister rs, GpuRegister rt); // R6 - void ModuR6(GpuRegister rd, GpuRegister rs, GpuRegister rt); // R6 - void Dmul(GpuRegister rd, GpuRegister rs, GpuRegister rt); // MIPS64 R6 - void Ddiv(GpuRegister rd, GpuRegister rs, GpuRegister rt); // MIPS64 R6 - void Dmod(GpuRegister rd, GpuRegister rs, GpuRegister rt); // MIPS64 R6 - void Ddivu(GpuRegister rd, GpuRegister rs, GpuRegister rt); // MIPS64 R6 - void Dmodu(GpuRegister rd, GpuRegister rs, GpuRegister rt); // MIPS64 R6 + void MulR6(GpuRegister rd, GpuRegister rs, GpuRegister rt); + void MuhR6(GpuRegister rd, GpuRegister rs, GpuRegister rt); + void DivR6(GpuRegister rd, GpuRegister rs, GpuRegister rt); + void ModR6(GpuRegister rd, GpuRegister rs, GpuRegister rt); + void DivuR6(GpuRegister rd, GpuRegister rs, GpuRegister rt); + void ModuR6(GpuRegister rd, GpuRegister rs, GpuRegister rt); + void Dmul(GpuRegister rd, GpuRegister rs, GpuRegister rt); // MIPS64 + void Dmuh(GpuRegister rd, GpuRegister rs, GpuRegister rt); // MIPS64 + void Ddiv(GpuRegister rd, GpuRegister rs, GpuRegister rt); // MIPS64 + void Dmod(GpuRegister rd, GpuRegister rs, GpuRegister rt); // MIPS64 + void Ddivu(GpuRegister rd, GpuRegister rs, GpuRegister rt); // MIPS64 + void Dmodu(GpuRegister rd, GpuRegister rs, GpuRegister rt); // MIPS64 void And(GpuRegister rd, GpuRegister rs, GpuRegister rt); void Andi(GpuRegister rt, GpuRegister rs, uint16_t imm16); @@ -104,12 +94,12 @@ class Mips64Assembler FINAL : public Assembler { void Xori(GpuRegister rt, GpuRegister rs, uint16_t imm16); void Nor(GpuRegister rd, GpuRegister rs, GpuRegister rt); - void Bitswap(GpuRegister rd, GpuRegister rt); // R6 - void Dbitswap(GpuRegister rd, GpuRegister rt); // R6 - void Seb(GpuRegister rd, GpuRegister rt); // R2+ - void Seh(GpuRegister rd, GpuRegister rt); // R2+ - void Dsbh(GpuRegister rd, GpuRegister rt); // R2+ - void Dshd(GpuRegister rd, GpuRegister rt); // R2+ + void Bitswap(GpuRegister rd, GpuRegister rt); + void Dbitswap(GpuRegister rd, GpuRegister rt); + void Seb(GpuRegister rd, GpuRegister rt); + void Seh(GpuRegister rd, GpuRegister rt); + void Dsbh(GpuRegister rd, GpuRegister rt); + void Dshd(GpuRegister rd, GpuRegister rt); void Dext(GpuRegister rs, GpuRegister rt, int pos, int size_less_one); // MIPS64 void Wsbh(GpuRegister rd, GpuRegister rt); void Sc(GpuRegister rt, GpuRegister base, int16_t imm9 = 0); @@ -146,11 +136,9 @@ class Mips64Assembler FINAL : public Assembler { void Lhu(GpuRegister rt, GpuRegister rs, uint16_t imm16); void Lwu(GpuRegister rt, GpuRegister rs, uint16_t imm16); // MIPS64 void Lui(GpuRegister rt, uint16_t imm16); - void Dahi(GpuRegister rs, uint16_t imm16); // MIPS64 R6 - void Dati(GpuRegister rs, uint16_t imm16); // MIPS64 R6 + void Dahi(GpuRegister rs, uint16_t imm16); // MIPS64 + void Dati(GpuRegister rs, uint16_t imm16); // MIPS64 void Sync(uint32_t stype); - void Mfhi(GpuRegister rd); // R2 - void Mflo(GpuRegister rd); // R2 void Sb(GpuRegister rt, GpuRegister rs, uint16_t imm16); void Sh(GpuRegister rt, GpuRegister rs, uint16_t imm16); @@ -175,21 +163,21 @@ class Mips64Assembler FINAL : public Assembler { void Jalr(GpuRegister rd, GpuRegister rs); void Jalr(GpuRegister rs); void Jr(GpuRegister rs); - void Auipc(GpuRegister rs, uint16_t imm16); // R6 - void Jic(GpuRegister rt, uint16_t imm16); // R6 - void Jialc(GpuRegister rt, uint16_t imm16); // R6 - void Bltc(GpuRegister rs, GpuRegister rt, uint16_t imm16); // R6 - void Bltzc(GpuRegister rt, uint16_t imm16); // R6 - void Bgtzc(GpuRegister rt, uint16_t imm16); // R6 - void Bgec(GpuRegister rs, GpuRegister rt, uint16_t imm16); // R6 - void Bgezc(GpuRegister rt, uint16_t imm16); // R6 - void Blezc(GpuRegister rt, uint16_t imm16); // R6 - void Bltuc(GpuRegister rs, GpuRegister rt, uint16_t imm16); // R6 - void Bgeuc(GpuRegister rs, GpuRegister rt, uint16_t imm16); // R6 - void Beqc(GpuRegister rs, GpuRegister rt, uint16_t imm16); // R6 - void Bnec(GpuRegister rs, GpuRegister rt, uint16_t imm16); // R6 - void Beqzc(GpuRegister rs, uint32_t imm21); // R6 - void Bnezc(GpuRegister rs, uint32_t imm21); // R6 + void Auipc(GpuRegister rs, uint16_t imm16); + void Jic(GpuRegister rt, uint16_t imm16); + void Jialc(GpuRegister rt, uint16_t imm16); + void Bltc(GpuRegister rs, GpuRegister rt, uint16_t imm16); + void Bltzc(GpuRegister rt, uint16_t imm16); + void Bgtzc(GpuRegister rt, uint16_t imm16); + void Bgec(GpuRegister rs, GpuRegister rt, uint16_t imm16); + void Bgezc(GpuRegister rt, uint16_t imm16); + void Blezc(GpuRegister rt, uint16_t imm16); + void Bltuc(GpuRegister rs, GpuRegister rt, uint16_t imm16); + void Bgeuc(GpuRegister rs, GpuRegister rt, uint16_t imm16); + void Beqc(GpuRegister rs, GpuRegister rt, uint16_t imm16); + void Bnec(GpuRegister rs, GpuRegister rt, uint16_t imm16); + void Beqzc(GpuRegister rs, uint32_t imm21); + void Bnezc(GpuRegister rs, uint32_t imm21); void AddS(FpuRegister fd, FpuRegister fs, FpuRegister ft); void SubS(FpuRegister fd, FpuRegister fs, FpuRegister ft); @@ -259,25 +247,25 @@ class Mips64Assembler FINAL : public Assembler { void Addiu32(GpuRegister rt, GpuRegister rs, int32_t value, GpuRegister rtmp = AT); void Daddiu64(GpuRegister rt, GpuRegister rs, int64_t value, GpuRegister rtmp = AT); // MIPS64 - void Bind(Label* label) OVERRIDE; // R6 + void Bind(Label* label) OVERRIDE; void Jump(Label* label) OVERRIDE { B(label); } - void B(Label* label); // R6 - void Jalr(Label* label, GpuRegister indirect_reg = RA); // R6 + void B(Label* label); + void Jalr(Label* label, GpuRegister indirect_reg = RA); // TODO: implement common for R6 and non-R6 interface for conditional branches? - void Bltc(GpuRegister rs, GpuRegister rt, Label* label); // R6 - void Bltzc(GpuRegister rt, Label* label); // R6 - void Bgtzc(GpuRegister rt, Label* label); // R6 - void Bgec(GpuRegister rs, GpuRegister rt, Label* label); // R6 - void Bgezc(GpuRegister rt, Label* label); // R6 - void Blezc(GpuRegister rt, Label* label); // R6 - void Bltuc(GpuRegister rs, GpuRegister rt, Label* label); // R6 - void Bgeuc(GpuRegister rs, GpuRegister rt, Label* label); // R6 - void Beqc(GpuRegister rs, GpuRegister rt, Label* label); // R6 - void Bnec(GpuRegister rs, GpuRegister rt, Label* label); // R6 - void Beqzc(GpuRegister rs, Label* label); // R6 - void Bnezc(GpuRegister rs, Label* label); // R6 + void Bltc(GpuRegister rs, GpuRegister rt, Label* label); + void Bltzc(GpuRegister rt, Label* label); + void Bgtzc(GpuRegister rt, Label* label); + void Bgec(GpuRegister rs, GpuRegister rt, Label* label); + void Bgezc(GpuRegister rt, Label* label); + void Blezc(GpuRegister rt, Label* label); + void Bltuc(GpuRegister rs, GpuRegister rt, Label* label); + void Bgeuc(GpuRegister rs, GpuRegister rt, Label* label); + void Beqc(GpuRegister rs, GpuRegister rt, Label* label); + void Bnec(GpuRegister rs, GpuRegister rt, Label* label); + void Beqzc(GpuRegister rs, Label* label); + void Bnezc(GpuRegister rs, Label* label); void EmitLoad(ManagedRegister m_dst, GpuRegister src_register, int32_t src_offset, size_t size); void LoadFromOffset(LoadOperandType type, GpuRegister reg, GpuRegister base, int32_t offset); |