diff options
Diffstat (limited to 'compiler')
117 files changed, 6446 insertions, 3236 deletions
diff --git a/compiler/Android.mk b/compiler/Android.mk index bdd9a84433..458973684e 100644 --- a/compiler/Android.mk +++ b/compiler/Android.mk @@ -92,7 +92,6 @@ LIBART_COMPILER_SRC_FILES := \ optimizing/parallel_move_resolver.cc \ optimizing/pc_relative_fixups_x86.cc \ optimizing/prepare_for_register_allocation.cc \ - optimizing/primitive_type_propagation.cc \ optimizing/reference_type_propagation.cc \ optimizing/register_allocator.cc \ optimizing/sharpening.cc \ @@ -109,7 +108,8 @@ LIBART_COMPILER_SRC_FILES := \ elf_writer_debug.cc \ elf_writer_quick.cc \ image_writer.cc \ - oat_writer.cc + oat_writer.cc \ + profile_assistant.cc LIBART_COMPILER_SRC_FILES_arm := \ dex/quick/arm/assemble_arm.cc \ diff --git a/compiler/common_compiler_test.cc b/compiler/common_compiler_test.cc index c7c190793c..b5fd1e074f 100644 --- a/compiler/common_compiler_test.cc +++ b/compiler/common_compiler_test.cc @@ -208,7 +208,8 @@ void CommonCompilerTest::CreateCompilerDriver(Compiler::Kind kind, InstructionSe false, timer_.get(), -1, - "")); + /* dex_to_oat_map */ nullptr, + /* profile_compilation_info */ nullptr)); // We typically don't generate an image in unit tests, disable this optimization by default. compiler_driver_->SetSupportBootImageFixup(false); } diff --git a/compiler/dex/quick/arm64/fp_arm64.cc b/compiler/dex/quick/arm64/fp_arm64.cc index 3b88021361..0130ef481a 100644 --- a/compiler/dex/quick/arm64/fp_arm64.cc +++ b/compiler/dex/quick/arm64/fp_arm64.cc @@ -448,6 +448,10 @@ bool Arm64Mir2Lir::GenInlinedRint(CallInfo* info) { } bool Arm64Mir2Lir::GenInlinedRound(CallInfo* info, bool is_double) { + // b/26327751. + if ((true)) { + return false; + } int32_t encoded_imm = EncodeImmSingle(bit_cast<uint32_t, float>(0.5f)); A64Opcode wide = (is_double) ? WIDE(0) : UNWIDE(0); RegLocation rl_src = info->args[0]; diff --git a/compiler/dex/quick/dex_file_method_inliner.cc b/compiler/dex/quick/dex_file_method_inliner.cc index f48947d537..32d751861a 100644 --- a/compiler/dex/quick/dex_file_method_inliner.cc +++ b/compiler/dex/quick/dex_file_method_inliner.cc @@ -22,6 +22,7 @@ #include "base/macros.h" #include "base/mutex-inl.h" #include "dex/compiler_ir.h" +#include "driver/compiler_driver.h" #include "thread-inl.h" #include "dex/mir_graph.h" #include "dex/quick/mir_to_lir.h" @@ -777,6 +778,17 @@ bool DexFileMethodInliner::GenSpecial(Mir2Lir* backend, uint32_t method_idx) { bool DexFileMethodInliner::GenInline(MIRGraph* mir_graph, BasicBlock* bb, MIR* invoke, uint32_t method_idx) { + // Check that we're allowed to inline. + { + CompilationUnit* cu = mir_graph->GetCurrentDexCompilationUnit()->GetCompilationUnit(); + if (!cu->compiler_driver->MayInline(dex_file_, cu->dex_file)) { + VLOG(compiler) << "Won't inline " << method_idx << " in " + << cu->dex_file->GetLocation() << " from " + << dex_file_->GetLocation(); + return false; + } + } + InlineMethod method; { ReaderMutexLock mu(Thread::Current(), lock_); diff --git a/compiler/dex/quick/quick_cfi_test.cc b/compiler/dex/quick/quick_cfi_test.cc index 24daf2f15f..12568a4ad4 100644 --- a/compiler/dex/quick/quick_cfi_test.cc +++ b/compiler/dex/quick/quick_cfi_test.cc @@ -58,6 +58,7 @@ class QuickCFITest : public CFITest { CompilerOptions::kDefaultNumDexMethodsThreshold, CompilerOptions::kDefaultInlineDepthLimit, CompilerOptions::kDefaultInlineMaxCodeUnits, + nullptr, false, CompilerOptions::kDefaultTopKProfileThreshold, false, @@ -74,9 +75,25 @@ class QuickCFITest : public CFITest { std::unique_ptr<const InstructionSetFeatures> isa_features; std::string error; isa_features.reset(InstructionSetFeatures::FromVariant(isa, "default", &error)); - CompilerDriver driver(&compiler_options, &verification_results, &method_inliner_map, - Compiler::kQuick, isa, isa_features.get(), - false, nullptr, nullptr, nullptr, 0, false, false, "", false, 0, -1, ""); + CompilerDriver driver(&compiler_options, + &verification_results, + &method_inliner_map, + Compiler::kQuick, + isa, + isa_features.get(), + false, + nullptr, + nullptr, + nullptr, + 0, + false, + false, + "", + false, + 0, + -1, + nullptr, + nullptr); ClassLinker* linker = nullptr; CompilationUnit cu(&pool, isa, &driver, linker); DexFile::CodeItem code_item { 0, 0, 0, 0, 0, 0, { 0 } }; // NOLINT diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc index e5d3841b14..1c2a619020 100644 --- a/compiler/dex/quick/x86/assemble_x86.cc +++ b/compiler/dex/quick/x86/assemble_x86.cc @@ -508,6 +508,7 @@ ENCODING_MAP(Cmp, IS_LOAD, 0, 0, { kX86Lfence, kReg, NO_OPERAND, { 0, 0, 0x0F, 0xAE, 0, 5, 0, 0, false }, "Lfence", "" }, { kX86Mfence, kReg, NO_OPERAND, { 0, 0, 0x0F, 0xAE, 0, 6, 0, 0, false }, "Mfence", "" }, { kX86Sfence, kReg, NO_OPERAND, { 0, 0, 0x0F, 0xAE, 0, 7, 0, 0, false }, "Sfence", "" }, + { kX86LockAdd32MI8, kMemImm, IS_LOAD | IS_STORE | IS_TERTIARY_OP | REG_USE0 | SETS_CCODES, { 0xF0, 0, 0x83, 0x0, 0x0, 0, 0, 1, false }, "LockAdd32MI8", "[!0r+!1d],!2d" }, EXT_0F_ENCODING_MAP(Imul16, 0x66, 0xAF, REG_USE0 | REG_DEF0 | SETS_CCODES), EXT_0F_ENCODING_MAP(Imul32, 0x00, 0xAF, REG_USE0 | REG_DEF0 | SETS_CCODES), diff --git a/compiler/dex/quick/x86/quick_assemble_x86_test.cc b/compiler/dex/quick/x86/quick_assemble_x86_test.cc index e977ebf722..b39fe4da4f 100644 --- a/compiler/dex/quick/x86/quick_assemble_x86_test.cc +++ b/compiler/dex/quick/x86/quick_assemble_x86_test.cc @@ -41,6 +41,7 @@ class QuickAssembleX86TestBase : public testing::Test { CompilerOptions::kDefaultNumDexMethodsThreshold, CompilerOptions::kDefaultInlineDepthLimit, CompilerOptions::kDefaultInlineMaxCodeUnits, + nullptr, false, CompilerOptions::kDefaultTopKProfileThreshold, false, @@ -72,7 +73,8 @@ class QuickAssembleX86TestBase : public testing::Test { false, 0, -1, - "")); + nullptr, + nullptr)); cu_.reset(new CompilationUnit(pool_.get(), isa_, compiler_driver_.get(), nullptr)); DexFile::CodeItem* code_item = static_cast<DexFile::CodeItem*>( cu_->arena.Alloc(sizeof(DexFile::CodeItem), kArenaAllocMisc)); diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc index 75f3fef599..4ff79935d7 100755 --- a/compiler/dex/quick/x86/target_x86.cc +++ b/compiler/dex/quick/x86/target_x86.cc @@ -20,7 +20,7 @@ #include <inttypes.h> #include <string> -#include "arch/instruction_set_features.h" +#include "arch/x86/instruction_set_features_x86.h" #include "art_method.h" #include "backend_x86.h" #include "base/logging.h" @@ -585,6 +585,8 @@ bool X86Mir2Lir::ProvidesFullMemoryBarrier(X86OpCode opcode) { case kX86LockCmpxchgAR: case kX86LockCmpxchg64M: case kX86LockCmpxchg64A: + case kX86LockCmpxchg64AR: + case kX86LockAdd32MI8: case kX86XchgMR: case kX86Mfence: // Atomic memory instructions provide full barrier. @@ -598,7 +600,9 @@ bool X86Mir2Lir::ProvidesFullMemoryBarrier(X86OpCode opcode) { } bool X86Mir2Lir::GenMemBarrier(MemBarrierKind barrier_kind) { - if (!cu_->compiler_driver->GetInstructionSetFeatures()->IsSmp()) { + const X86InstructionSetFeatures* features = + cu_->compiler_driver->GetInstructionSetFeatures()->AsX86InstructionSetFeatures(); + if (!features->IsSmp()) { return false; } // Start off with using the last LIR as the barrier. If it is not enough, then we will update it. @@ -610,20 +614,34 @@ bool X86Mir2Lir::GenMemBarrier(MemBarrierKind barrier_kind) { * All other barriers (LoadAny, AnyStore, StoreStore) are nops due to the x86 memory model. * For those cases, all we need to ensure is that there is a scheduling barrier in place. */ + const RegStorage rs_rSP = cu_->target64 ? rs_rX86_SP_64 : rs_rX86_SP_32; + bool use_locked_add = features->PrefersLockedAddSynchronization(); if (barrier_kind == kAnyAny) { - // If no LIR exists already that can be used a barrier, then generate an mfence. + // If no LIR exists already that can be used a barrier, then generate a barrier. if (mem_barrier == nullptr) { - mem_barrier = NewLIR0(kX86Mfence); + if (use_locked_add) { + mem_barrier = NewLIR3(kX86LockAdd32MI8, rs_rSP.GetReg(), 0, 0); + } else { + mem_barrier = NewLIR0(kX86Mfence); + } ret = true; } - // If last instruction does not provide full barrier, then insert an mfence. + // If last instruction does not provide full barrier, then insert a barrier. if (ProvidesFullMemoryBarrier(static_cast<X86OpCode>(mem_barrier->opcode)) == false) { - mem_barrier = NewLIR0(kX86Mfence); + if (use_locked_add) { + mem_barrier = NewLIR3(kX86LockAdd32MI8, rs_rSP.GetReg(), 0, 0); + } else { + mem_barrier = NewLIR0(kX86Mfence); + } ret = true; } } else if (barrier_kind == kNTStoreStore) { - mem_barrier = NewLIR0(kX86Sfence); + if (use_locked_add) { + mem_barrier = NewLIR3(kX86LockAdd32MI8, rs_rSP.GetReg(), 0, 0); + } else { + mem_barrier = NewLIR0(kX86Sfence); + } ret = true; } diff --git a/compiler/dex/quick/x86/x86_lir.h b/compiler/dex/quick/x86/x86_lir.h index d6a6a60d3d..8cd6574443 100644 --- a/compiler/dex/quick/x86/x86_lir.h +++ b/compiler/dex/quick/x86/x86_lir.h @@ -606,6 +606,7 @@ enum X86OpCode { // load-from-memory and store-to-memory instructions kX86Sfence, // memory barrier to serialize all previous // store-to-memory instructions + kX86LockAdd32MI8, // locked add used to serialize memory instructions Binary0fOpCode(kX86Imul16), // 16bit multiply Binary0fOpCode(kX86Imul32), // 32bit multiply Binary0fOpCode(kX86Imul64), // 64bit multiply diff --git a/compiler/dex/type_inference_test.cc b/compiler/dex/type_inference_test.cc index 528a18cc99..e2c0d32f97 100644 --- a/compiler/dex/type_inference_test.cc +++ b/compiler/dex/type_inference_test.cc @@ -253,7 +253,7 @@ class TypeInferenceTest : public testing::Test { &cu_, cu_.class_loader, cu_.class_linker, *cu_.dex_file, nullptr /* code_item not used */, 0u /* class_def_idx not used */, 0u /* method_index not used */, cu_.access_flags, nullptr /* verified_method not used */, - NullHandle<mirror::DexCache>())); + ScopedNullHandle<mirror::DexCache>())); cu_.mir_graph->current_method_ = 0u; code_item_ = static_cast<DexFile::CodeItem*>( cu_.arena.Alloc(sizeof(DexFile::CodeItem), kArenaAllocMisc)); diff --git a/compiler/driver/compiled_method_storage_test.cc b/compiler/driver/compiled_method_storage_test.cc index c6dbd24bf8..f18fa67ea5 100644 --- a/compiler/driver/compiled_method_storage_test.cc +++ b/compiler/driver/compiled_method_storage_test.cc @@ -45,7 +45,8 @@ TEST(CompiledMethodStorage, Deduplicate) { false, nullptr, -1, - ""); + nullptr, + nullptr); CompiledMethodStorage* storage = driver.GetCompiledMethodStorage(); ASSERT_TRUE(storage->DedupeEnabled()); // The default. diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc index 56839f85f9..043bd93bd7 100644 --- a/compiler/driver/compiler_driver.cc +++ b/compiler/driver/compiler_driver.cc @@ -334,19 +334,21 @@ class CompilerDriver::AOTCompilationStats { DISALLOW_COPY_AND_ASSIGN(AOTCompilationStats); }; -CompilerDriver::CompilerDriver(const CompilerOptions* compiler_options, - VerificationResults* verification_results, - DexFileToMethodInlinerMap* method_inliner_map, - Compiler::Kind compiler_kind, - InstructionSet instruction_set, - const InstructionSetFeatures* instruction_set_features, - bool boot_image, std::unordered_set<std::string>* image_classes, - std::unordered_set<std::string>* compiled_classes, - std::unordered_set<std::string>* compiled_methods, - size_t thread_count, bool dump_stats, bool dump_passes, - const std::string& dump_cfg_file_name, bool dump_cfg_append, - CumulativeLogger* timer, int swap_fd, - const std::string& profile_file) +CompilerDriver::CompilerDriver( + const CompilerOptions* compiler_options, + VerificationResults* verification_results, + DexFileToMethodInlinerMap* method_inliner_map, + Compiler::Kind compiler_kind, + InstructionSet instruction_set, + const InstructionSetFeatures* instruction_set_features, + bool boot_image, std::unordered_set<std::string>* image_classes, + std::unordered_set<std::string>* compiled_classes, + std::unordered_set<std::string>* compiled_methods, + size_t thread_count, bool dump_stats, bool dump_passes, + const std::string& dump_cfg_file_name, bool dump_cfg_append, + CumulativeLogger* timer, int swap_fd, + const std::unordered_map<const DexFile*, const char*>* dex_to_oat_map, + const ProfileCompilationInfo* profile_compilation_info) : compiler_options_(compiler_options), verification_results_(verification_results), method_inliner_map_(method_inliner_map), @@ -374,7 +376,9 @@ CompilerDriver::CompilerDriver(const CompilerOptions* compiler_options, compiler_context_(nullptr), support_boot_image_fixup_(instruction_set != kMips && instruction_set != kMips64), dex_files_for_oat_file_(nullptr), - compiled_method_storage_(swap_fd) { + dex_file_oat_filename_map_(dex_to_oat_map), + compiled_method_storage_(swap_fd), + profile_compilation_info_(profile_compilation_info) { DCHECK(compiler_options_ != nullptr); DCHECK(verification_results_ != nullptr); DCHECK(method_inliner_map_ != nullptr); @@ -382,12 +386,6 @@ CompilerDriver::CompilerDriver(const CompilerOptions* compiler_options, compiler_->Init(); CHECK_EQ(boot_image_, image_classes_.get() != nullptr); - - // Read the profile file if one is provided. - if (!profile_file.empty()) { - profile_compilation_info_.reset(new ProfileCompilationInfo(profile_file)); - LOG(INFO) << "Using profile data from file " << profile_file; - } } CompilerDriver::~CompilerDriver() { @@ -898,8 +896,10 @@ void CompilerDriver::LoadImageClasses(TimingLogger* timings) { *dex_file, Runtime::Current()->GetLinearAlloc()))); Handle<mirror::Class> klass(hs2.NewHandle( - class_linker->ResolveType(*dex_file, exception_type_idx, dex_cache, - NullHandle<mirror::ClassLoader>()))); + class_linker->ResolveType(*dex_file, + exception_type_idx, + dex_cache, + ScopedNullHandle<mirror::ClassLoader>()))); if (klass.Get() == nullptr) { const DexFile::TypeId& type_id = dex_file->GetTypeId(exception_type_idx); const char* descriptor = dex_file->GetTypeDescriptor(type_id); @@ -1536,6 +1536,12 @@ void CompilerDriver::GetCodeAndMethodForDirectCall(InvokeType* type, InvokeType use_dex_cache = true; } } + if (!use_dex_cache && IsBootImage()) { + if (!AreInSameOatFile(&(const_cast<mirror::Class*>(referrer_class)->GetDexFile()), + &declaring_class->GetDexFile())) { + use_dex_cache = true; + } + } // The method is defined not within this dex file. We need a dex cache slot within the current // dex file or direct pointers. bool must_use_direct_pointers = false; @@ -1569,12 +1575,14 @@ void CompilerDriver::GetCodeAndMethodForDirectCall(InvokeType* type, InvokeType *type = sharp_type; } } else { - auto* image_space = heap->GetBootImageSpace(); bool method_in_image = false; - if (image_space != nullptr) { + const std::vector<gc::space::ImageSpace*> image_spaces = heap->GetBootImageSpaces(); + for (gc::space::ImageSpace* image_space : image_spaces) { const auto& method_section = image_space->GetImageHeader().GetMethodsSection(); - method_in_image = method_section.Contains( - reinterpret_cast<uint8_t*>(method) - image_space->Begin()); + if (method_section.Contains(reinterpret_cast<uint8_t*>(method) - image_space->Begin())) { + method_in_image = true; + break; + } } if (method_in_image || compiling_boot || runtime->UseJit()) { // We know we must be able to get to the method in the image, so use that pointer. @@ -2293,15 +2301,11 @@ void CompilerDriver::InitializeClasses(jobject class_loader, void CompilerDriver::Compile(jobject class_loader, const std::vector<const DexFile*>& dex_files, ThreadPool* thread_pool, TimingLogger* timings) { - if (profile_compilation_info_ != nullptr) { - if (!profile_compilation_info_->Load(dex_files)) { - LOG(WARNING) << "Failed to load offline profile info from " - << profile_compilation_info_->GetFilename() - << ". No methods will be compiled"; - } else if (kDebugProfileGuidedCompilation) { - LOG(INFO) << "[ProfileGuidedCompilation] " - << profile_compilation_info_->DumpInfo(); - } + if (kDebugProfileGuidedCompilation) { + LOG(INFO) << "[ProfileGuidedCompilation] " << + ((profile_compilation_info_ == nullptr) + ? "null" + : profile_compilation_info_->DumpInfo(&dex_files)); } for (size_t i = 0; i != dex_files.size(); ++i) { const DexFile* dex_file = dex_files[i]; @@ -2570,4 +2574,15 @@ bool CompilerDriver::IsStringInit(uint32_t method_index, const DexFile* dex_file return inliner->IsStringInitMethodIndex(method_index); } +bool CompilerDriver::MayInlineInternal(const DexFile* inlined_from, + const DexFile* inlined_into) const { + // We're not allowed to inline across dex files if we're the no-inline-from dex file. + if (inlined_from != inlined_into && + compiler_options_->GetNoInlineFromDexFile() == inlined_from) { + return false; + } + + return true; +} + } // namespace art diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h index f0360ceffb..3847c8183e 100644 --- a/compiler/driver/compiler_driver.h +++ b/compiler/driver/compiler_driver.h @@ -97,7 +97,8 @@ class CompilerDriver { size_t thread_count, bool dump_stats, bool dump_passes, const std::string& dump_cfg_file_name, bool dump_cfg_append, CumulativeLogger* timer, int swap_fd, - const std::string& profile_file); + const std::unordered_map<const DexFile*, const char*>* dex_to_oat_map, + const ProfileCompilationInfo* profile_compilation_info); ~CompilerDriver(); @@ -113,6 +114,18 @@ class CompilerDriver { : ArrayRef<const DexFile* const>(); } + // Are the given dex files compiled into the same oat file? Should only be called after + // GetDexFilesForOatFile, as the conservative answer (when we don't have a map) is true. + bool AreInSameOatFile(const DexFile* d1, const DexFile* d2) { + if (dex_file_oat_filename_map_ == nullptr) { + // TODO: Check for this wrt/ apps and boot image calls. + return true; + } + auto it1 = dex_file_oat_filename_map_->find(d1); + auto it2 = dex_file_oat_filename_map_->find(d2); + return it1 == it2; + } + void CompileAll(jobject class_loader, const std::vector<const DexFile*>& dex_files, TimingLogger* timings) @@ -471,6 +484,13 @@ class CompilerDriver { bool CanAssumeClassIsLoaded(mirror::Class* klass) SHARED_REQUIRES(Locks::mutator_lock_); + bool MayInline(const DexFile* inlined_from, const DexFile* inlined_into) const { + if (!kIsTargetBuild) { + return MayInlineInternal(inlined_from, inlined_into); + } + return true; + } + private: // Return whether the declaring class of `resolved_member` is // available to `referrer_class` for read or write access using two @@ -587,6 +607,8 @@ class CompilerDriver { ThreadPool* thread_pool, TimingLogger* timings) REQUIRES(!Locks::mutator_lock_); + bool MayInlineInternal(const DexFile* inlined_from, const DexFile* inlined_into) const; + const CompilerOptions* const compiler_options_; VerificationResults* const verification_results_; DexFileToMethodInlinerMap* const method_inliner_map_; @@ -621,9 +643,8 @@ class CompilerDriver { const bool boot_image_; - // If image_ is true, specifies the classes that will be included in - // the image. Note if image_classes_ is null, all classes are - // included in the image. + // If image_ is true, specifies the classes that will be included in the image. + // Note if image_classes_ is null, all classes are included in the image. std::unique_ptr<std::unordered_set<std::string>> image_classes_; // Specifies the classes that will be compiled. Note that if classes_to_compile_ is null, @@ -636,9 +657,6 @@ class CompilerDriver { // This option may be restricted to the boot image, depending on a flag in the implementation. std::unique_ptr<std::unordered_set<std::string>> methods_to_compile_; - // Info for profile guided compilation. - std::unique_ptr<ProfileCompilationInfo> profile_compilation_info_; - bool had_hard_verifier_failure_; size_t thread_count_; @@ -663,8 +681,14 @@ class CompilerDriver { // List of dex files that will be stored in the oat file. const std::vector<const DexFile*>* dex_files_for_oat_file_; + // Map from dex files to the oat file (name) they will be compiled into. + const std::unordered_map<const DexFile*, const char*>* dex_file_oat_filename_map_; + CompiledMethodStorage compiled_method_storage_; + // Info for profile guided compilation. + const ProfileCompilationInfo* const profile_compilation_info_; + friend class CompileClassVisitor; DISALLOW_COPY_AND_ASSIGN(CompilerDriver); }; diff --git a/compiler/driver/compiler_driver_test.cc b/compiler/driver/compiler_driver_test.cc index 86f8b823cc..82c0e86b25 100644 --- a/compiler/driver/compiler_driver_test.cc +++ b/compiler/driver/compiler_driver_test.cc @@ -142,16 +142,21 @@ TEST_F(CompilerDriverTest, DISABLED_LARGE_CompileDexLibCore) { // TODO: check that all Method::GetCode() values are non-null } -TEST_F(CompilerDriverTest, AbstractMethodErrorStub) { +TEST_F(CompilerDriverTest, DISABLED_AbstractMethodErrorStub) { TEST_DISABLED_FOR_HEAP_REFERENCE_POISONING_WITH_QUICK(); TEST_DISABLED_FOR_READ_BARRIER_WITH_QUICK(); TEST_DISABLED_FOR_READ_BARRIER_WITH_OPTIMIZING_FOR_UNSUPPORTED_INSTRUCTION_SETS(); jobject class_loader; { ScopedObjectAccess soa(Thread::Current()); - CompileVirtualMethod(NullHandle<mirror::ClassLoader>(), "java.lang.Class", "isFinalizable", + CompileVirtualMethod(ScopedNullHandle<mirror::ClassLoader>(), + "java.lang.Class", + "isFinalizable", "()Z"); - CompileDirectMethod(NullHandle<mirror::ClassLoader>(), "java.lang.Object", "<init>", "()V"); + CompileDirectMethod(ScopedNullHandle<mirror::ClassLoader>(), + "java.lang.Object", + "<init>", + "()V"); class_loader = LoadDex("AbstractMethod"); } ASSERT_TRUE(class_loader != nullptr); diff --git a/compiler/driver/compiler_options.cc b/compiler/driver/compiler_options.cc index 4d2d9246df..385f34a9f9 100644 --- a/compiler/driver/compiler_options.cc +++ b/compiler/driver/compiler_options.cc @@ -31,9 +31,11 @@ CompilerOptions::CompilerOptions() num_dex_methods_threshold_(kDefaultNumDexMethodsThreshold), inline_depth_limit_(kUnsetInlineDepthLimit), inline_max_code_units_(kUnsetInlineMaxCodeUnits), + no_inline_from_(nullptr), include_patch_information_(kDefaultIncludePatchInformation), top_k_profile_threshold_(kDefaultTopKProfileThreshold), debuggable_(false), + native_debuggable_(kDefaultNativeDebuggable), generate_debug_info_(kDefaultGenerateDebugInfo), implicit_null_checks_(true), implicit_so_checks_(true), @@ -58,6 +60,7 @@ CompilerOptions::CompilerOptions(CompilerFilter compiler_filter, size_t num_dex_methods_threshold, size_t inline_depth_limit, size_t inline_max_code_units, + const DexFile* no_inline_from, bool include_patch_information, double top_k_profile_threshold, bool debuggable, @@ -78,9 +81,11 @@ CompilerOptions::CompilerOptions(CompilerFilter compiler_filter, num_dex_methods_threshold_(num_dex_methods_threshold), inline_depth_limit_(inline_depth_limit), inline_max_code_units_(inline_max_code_units), + no_inline_from_(no_inline_from), include_patch_information_(include_patch_information), top_k_profile_threshold_(top_k_profile_threshold), debuggable_(debuggable), + native_debuggable_(kDefaultNativeDebuggable), generate_debug_info_(generate_debug_info), implicit_null_checks_(implicit_null_checks), implicit_so_checks_(implicit_so_checks), @@ -206,7 +211,9 @@ bool CompilerOptions::ParseCompilerOption(const StringPiece& option, UsageFn Usa generate_debug_info_ = false; } else if (option == "--debuggable") { debuggable_ = true; - generate_debug_info_ = true; + } else if (option == "--native-debuggable") { + native_debuggable_ = true; + debuggable_ = true; } else if (option.starts_with("--top-k-profile-threshold=")) { ParseDouble(option.data(), '=', 0.0, 100.0, &top_k_profile_threshold_, Usage); } else if (option == "--include-patch-information") { diff --git a/compiler/driver/compiler_options.h b/compiler/driver/compiler_options.h index e6acab42f2..f14bdc4a2f 100644 --- a/compiler/driver/compiler_options.h +++ b/compiler/driver/compiler_options.h @@ -49,10 +49,11 @@ class CompilerOptions FINAL { static const size_t kDefaultTinyMethodThreshold = 20; static const size_t kDefaultNumDexMethodsThreshold = 900; static constexpr double kDefaultTopKProfileThreshold = 90.0; - static const bool kDefaultGenerateDebugInfo = kIsDebugBuild; + static const bool kDefaultNativeDebuggable = false; + static const bool kDefaultGenerateDebugInfo = false; static const bool kDefaultIncludePatchInformation = false; static const size_t kDefaultInlineDepthLimit = 3; - static const size_t kDefaultInlineMaxCodeUnits = 20; + static const size_t kDefaultInlineMaxCodeUnits = 32; static constexpr size_t kUnsetInlineDepthLimit = -1; static constexpr size_t kUnsetInlineMaxCodeUnits = -1; @@ -71,6 +72,7 @@ class CompilerOptions FINAL { size_t num_dex_methods_threshold, size_t inline_depth_limit, size_t inline_max_code_units, + const DexFile* no_inline_from, bool include_patch_information, double top_k_profile_threshold, bool debuggable, @@ -162,6 +164,10 @@ class CompilerOptions FINAL { return debuggable_; } + bool GetNativeDebuggable() const { + return native_debuggable_; + } + bool GetGenerateDebugInfo() const { return generate_debug_info_; } @@ -212,6 +218,10 @@ class CompilerOptions FINAL { return abort_on_hard_verifier_failure_; } + const DexFile* GetNoInlineFromDexFile() const { + return no_inline_from_; + } + bool ParseCompilerOption(const StringPiece& option, UsageFn Usage); private: @@ -236,10 +246,15 @@ class CompilerOptions FINAL { size_t num_dex_methods_threshold_; size_t inline_depth_limit_; size_t inline_max_code_units_; + + // A dex file from which we should not inline code. + const DexFile* no_inline_from_; + bool include_patch_information_; // When using a profile file only the top K% of the profiled samples will be compiled. double top_k_profile_threshold_; bool debuggable_; + bool native_debuggable_; bool generate_debug_info_; bool implicit_null_checks_; bool implicit_so_checks_; diff --git a/compiler/dwarf/method_debug_info.h b/compiler/dwarf/method_debug_info.h index a391e4d08a..e8ba9148e8 100644 --- a/compiler/dwarf/method_debug_info.h +++ b/compiler/dwarf/method_debug_info.h @@ -30,8 +30,8 @@ struct MethodDebugInfo { uint32_t access_flags_; const DexFile::CodeItem* code_item_; bool deduped_; - uint32_t low_pc_; - uint32_t high_pc_; + uintptr_t low_pc_; + uintptr_t high_pc_; CompiledMethod* compiled_method_; }; diff --git a/compiler/elf_builder.h b/compiler/elf_builder.h index bb07cc2913..a7461a5525 100644 --- a/compiler/elf_builder.h +++ b/compiler/elf_builder.h @@ -148,6 +148,12 @@ class ElfBuilder FINAL { } } + // Returns true if the section was written to disk. + // (Used to check whether we have .text when writing JIT debug info) + bool Exists() const { + return finished_; + } + // Get the location of this section in virtual memory. Elf_Addr GetAddress() const { CHECK(started_); @@ -247,16 +253,18 @@ class ElfBuilder FINAL { } // Buffer symbol for this section. It will be written later. + // If the symbol's section is null, it will be considered absolute (SHN_ABS). + // (we use this in JIT to reference code which is stored outside the debug ELF file) void Add(Elf_Word name, const Section* section, Elf_Addr addr, bool is_relative, Elf_Word size, uint8_t binding, uint8_t type, uint8_t other = 0) { - CHECK(section != nullptr); Elf_Sym sym = Elf_Sym(); sym.st_name = name; sym.st_value = addr + (is_relative ? section->GetAddress() : 0); sym.st_size = size; sym.st_other = other; - sym.st_shndx = section->GetSectionIndex(); + sym.st_shndx = (section != nullptr ? section->GetSectionIndex() + : static_cast<Elf_Word>(SHN_ABS)); sym.st_info = (binding << 4) + (type & 0xf); symbols_.push_back(sym); } diff --git a/compiler/elf_writer_debug.cc b/compiler/elf_writer_debug.cc index 06553a6d62..dd50f69b71 100644 --- a/compiler/elf_writer_debug.cc +++ b/compiler/elf_writer_debug.cc @@ -22,20 +22,33 @@ #include "base/casts.h" #include "base/stl_util.h" #include "compiled_method.h" -#include "driver/compiler_driver.h" #include "dex_file-inl.h" +#include "driver/compiler_driver.h" #include "dwarf/dedup_vector.h" #include "dwarf/headers.h" #include "dwarf/method_debug_info.h" #include "dwarf/register.h" #include "elf_builder.h" +#include "linker/vector_output_stream.h" +#include "mirror/array.h" +#include "mirror/class-inl.h" +#include "mirror/class.h" #include "oat_writer.h" -#include "utils.h" #include "stack_map.h" +#include "utils.h" namespace art { namespace dwarf { +// The ARM specification defines three special mapping symbols +// $a, $t and $d which mark ARM, Thumb and data ranges respectively. +// These symbols can be used by tools, for example, to pretty +// print instructions correctly. Objdump will use them if they +// exist, but it will still work well without them. +// However, these extra symbols take space, so let's just generate +// one symbol which marks the whole .text section as code. +constexpr bool kGenerateSingleArmMappingSymbol = true; + static Reg GetDwarfCoreReg(InstructionSet isa, int machine_reg) { switch (isa) { case kArm: @@ -207,10 +220,13 @@ template<typename ElfTypes> void WriteCFISection(ElfBuilder<ElfTypes>* builder, const ArrayRef<const MethodDebugInfo>& method_infos, CFIFormat format) { - CHECK(format == dwarf::DW_DEBUG_FRAME_FORMAT || - format == dwarf::DW_EH_FRAME_FORMAT); + CHECK(format == DW_DEBUG_FRAME_FORMAT || format == DW_EH_FRAME_FORMAT); typedef typename ElfTypes::Addr Elf_Addr; + if (method_infos.empty()) { + return; + } + std::vector<uint32_t> binary_search_table; std::vector<uintptr_t> patch_locations; if (format == DW_EH_FRAME_FORMAT) { @@ -220,13 +236,15 @@ void WriteCFISection(ElfBuilder<ElfTypes>* builder, } // Write .eh_frame/.debug_frame section. - auto* cfi_section = (format == dwarf::DW_DEBUG_FRAME_FORMAT + auto* cfi_section = (format == DW_DEBUG_FRAME_FORMAT ? builder->GetDebugFrame() : builder->GetEhFrame()); { cfi_section->Start(); const bool is64bit = Is64BitInstructionSet(builder->GetIsa()); - const Elf_Addr text_address = builder->GetText()->GetAddress(); + const Elf_Addr text_address = builder->GetText()->Exists() + ? builder->GetText()->GetAddress() + : 0; const Elf_Addr cfi_address = cfi_section->GetAddress(); const Elf_Addr cie_address = cfi_address; Elf_Addr buffer_address = cfi_address; @@ -297,8 +315,8 @@ namespace { struct CompilationUnit { std::vector<const MethodDebugInfo*> methods_; size_t debug_line_offset_ = 0; - uint32_t low_pc_ = 0xFFFFFFFFU; - uint32_t high_pc_ = 0; + uintptr_t low_pc_ = std::numeric_limits<uintptr_t>::max(); + uintptr_t high_pc_ = 0; }; typedef std::vector<DexFile::LocalInfo> LocalInfos; @@ -431,14 +449,17 @@ class DebugInfoWriter { void Write(const CompilationUnit& compilation_unit) { CHECK(!compilation_unit.methods_.empty()); - const Elf_Addr text_address = owner_->builder_->GetText()->GetAddress(); + const Elf_Addr text_address = owner_->builder_->GetText()->Exists() + ? owner_->builder_->GetText()->GetAddress() + : 0; + const uintptr_t cu_size = compilation_unit.high_pc_ - compilation_unit.low_pc_; info_.StartTag(DW_TAG_compile_unit); info_.WriteStrp(DW_AT_producer, owner_->WriteString("Android dex2oat")); info_.WriteData1(DW_AT_language, DW_LANG_Java); info_.WriteStrp(DW_AT_comp_dir, owner_->WriteString("$JAVA_SRC_ROOT")); info_.WriteAddr(DW_AT_low_pc, text_address + compilation_unit.low_pc_); - info_.WriteUdata(DW_AT_high_pc, compilation_unit.high_pc_ - compilation_unit.low_pc_); + info_.WriteUdata(DW_AT_high_pc, dchecked_integral_cast<uint32_t>(cu_size)); info_.WriteSecOffset(DW_AT_stmt_list, compilation_unit.debug_line_offset_); const char* last_dex_class_desc = nullptr; @@ -456,8 +477,16 @@ class DebugInfoWriter { if (last_dex_class_desc != nullptr) { EndClassTag(last_dex_class_desc); } - size_t offset = StartClassTag(dex_class_desc); - type_cache_.emplace(dex_class_desc, offset); + // Write reference tag for the class we are about to declare. + size_t reference_tag_offset = info_.StartTag(DW_TAG_reference_type); + type_cache_.emplace(std::string(dex_class_desc), reference_tag_offset); + size_t type_attrib_offset = info_.size(); + info_.WriteRef4(DW_AT_type, 0); + info_.EndTag(); + // Declare the class that owns this method. + size_t class_offset = StartClassTag(dex_class_desc); + info_.UpdateUint32(type_attrib_offset, class_offset); + info_.WriteFlag(DW_AT_declaration, true); // Check that each class is defined only once. bool unique = owner_->defined_dex_classes_.insert(dex_class_desc).second; CHECK(unique) << "Redefinition of " << dex_class_desc; @@ -468,7 +497,7 @@ class DebugInfoWriter { info_.StartTag(DW_TAG_subprogram); WriteName(dex->GetMethodName(dex_method)); info_.WriteAddr(DW_AT_low_pc, text_address + mi->low_pc_); - info_.WriteUdata(DW_AT_high_pc, mi->high_pc_ - mi->low_pc_); + info_.WriteUdata(DW_AT_high_pc, dchecked_integral_cast<uint32_t>(mi->high_pc_-mi->low_pc_)); uint8_t frame_base[] = { DW_OP_call_frame_cfa }; info_.WriteExprLoc(DW_AT_frame_base, &frame_base, sizeof(frame_base)); WriteLazyType(dex->GetReturnTypeDescriptor(dex_proto)); @@ -554,6 +583,92 @@ class DebugInfoWriter { owner_->builder_->GetDebugInfo()->WriteFully(buffer.data(), buffer.size()); } + void Write(const ArrayRef<mirror::Class*>& types) SHARED_REQUIRES(Locks::mutator_lock_) { + info_.StartTag(DW_TAG_compile_unit); + info_.WriteStrp(DW_AT_producer, owner_->WriteString("Android dex2oat")); + info_.WriteData1(DW_AT_language, DW_LANG_Java); + + for (mirror::Class* type : types) { + if (type->IsPrimitive()) { + // For primitive types the definition and the declaration is the same. + if (type->GetPrimitiveType() != Primitive::kPrimVoid) { + WriteTypeDeclaration(type->GetDescriptor(nullptr)); + } + } else if (type->IsArrayClass()) { + mirror::Class* element_type = type->GetComponentType(); + uint32_t component_size = type->GetComponentSize(); + uint32_t data_offset = mirror::Array::DataOffset(component_size).Uint32Value(); + uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value(); + + info_.StartTag(DW_TAG_array_type); + std::string descriptor_string; + WriteLazyType(element_type->GetDescriptor(&descriptor_string)); + info_.WriteUdata(DW_AT_data_member_location, data_offset); + info_.StartTag(DW_TAG_subrange_type); + DCHECK_LT(length_offset, 32u); + uint8_t count[] = { + DW_OP_push_object_address, + static_cast<uint8_t>(DW_OP_lit0 + length_offset), + DW_OP_plus, + DW_OP_deref_size, + 4 // Array length is always 32-bit wide. + }; + info_.WriteExprLoc(DW_AT_count, &count, sizeof(count)); + info_.EndTag(); // DW_TAG_subrange_type. + info_.EndTag(); // DW_TAG_array_type. + } else { + std::string descriptor_string; + const char* desc = type->GetDescriptor(&descriptor_string); + StartClassTag(desc); + + if (!type->IsVariableSize()) { + info_.WriteUdata(DW_AT_byte_size, type->GetObjectSize()); + } + + // Base class. + mirror::Class* base_class = type->GetSuperClass(); + if (base_class != nullptr) { + info_.StartTag(DW_TAG_inheritance); + WriteLazyType(base_class->GetDescriptor(&descriptor_string)); + info_.WriteUdata(DW_AT_data_member_location, 0); + info_.WriteSdata(DW_AT_accessibility, DW_ACCESS_public); + info_.EndTag(); // DW_TAG_inheritance. + } + + // Member variables. + for (uint32_t i = 0, count = type->NumInstanceFields(); i < count; ++i) { + ArtField* field = type->GetInstanceField(i); + info_.StartTag(DW_TAG_member); + WriteName(field->GetName()); + WriteLazyType(field->GetTypeDescriptor()); + info_.WriteUdata(DW_AT_data_member_location, field->GetOffset().Uint32Value()); + uint32_t access_flags = field->GetAccessFlags(); + if (access_flags & kAccPublic) { + info_.WriteSdata(DW_AT_accessibility, DW_ACCESS_public); + } else if (access_flags & kAccProtected) { + info_.WriteSdata(DW_AT_accessibility, DW_ACCESS_protected); + } else if (access_flags & kAccPrivate) { + info_.WriteSdata(DW_AT_accessibility, DW_ACCESS_private); + } + info_.EndTag(); // DW_TAG_member. + } + + EndClassTag(desc); + } + } + + CHECK_EQ(info_.Depth(), 1); + FinishLazyTypes(); + info_.EndTag(); // DW_TAG_compile_unit. + std::vector<uint8_t> buffer; + buffer.reserve(info_.data()->size() + KB); + const size_t offset = owner_->builder_->GetDebugInfo()->GetSize(); + const size_t debug_abbrev_offset = + owner_->debug_abbrev_.Insert(debug_abbrev_.data(), debug_abbrev_.size()); + WriteDebugInfoCU(debug_abbrev_offset, info_, offset, &buffer, &owner_->debug_info_patches_); + owner_->builder_->GetDebugInfo()->WriteFully(buffer.data(), buffer.size()); + } + // Write table into .debug_loc which describes location of dex register. // The dex register might be valid only at some points and it might // move between machine registers and stack. @@ -707,14 +822,14 @@ class DebugInfoWriter { // just define all types lazily at the end of compilation unit. void WriteLazyType(const char* type_descriptor) { if (type_descriptor != nullptr && type_descriptor[0] != 'V') { - lazy_types_.emplace(type_descriptor, info_.size()); + lazy_types_.emplace(std::string(type_descriptor), info_.size()); info_.WriteRef4(DW_AT_type, 0); } } void FinishLazyTypes() { for (const auto& lazy_type : lazy_types_) { - info_.UpdateUint32(lazy_type.second, WriteType(lazy_type.first)); + info_.UpdateUint32(lazy_type.second, WriteTypeDeclaration(lazy_type.first)); } lazy_types_.clear(); } @@ -739,30 +854,39 @@ class DebugInfoWriter { // Convert dex type descriptor to DWARF. // Returns offset in the compilation unit. - size_t WriteType(const char* desc) { + size_t WriteTypeDeclaration(const std::string& desc) { + DCHECK(!desc.empty()); const auto& it = type_cache_.find(desc); if (it != type_cache_.end()) { return it->second; } size_t offset; - if (*desc == 'L') { + if (desc[0] == 'L') { // Class type. For example: Lpackage/name; - offset = StartClassTag(desc); + size_t class_offset = StartClassTag(desc.c_str()); info_.WriteFlag(DW_AT_declaration, true); - EndClassTag(desc); - } else if (*desc == '[') { + EndClassTag(desc.c_str()); + // Reference to the class type. + offset = info_.StartTag(DW_TAG_reference_type); + info_.WriteRef(DW_AT_type, class_offset); + info_.EndTag(); + } else if (desc[0] == '[') { // Array type. - size_t element_type = WriteType(desc + 1); - offset = info_.StartTag(DW_TAG_array_type); + size_t element_type = WriteTypeDeclaration(desc.substr(1)); + size_t array_type = info_.StartTag(DW_TAG_array_type); + info_.WriteFlag(DW_AT_declaration, true); info_.WriteRef(DW_AT_type, element_type); info_.EndTag(); + offset = info_.StartTag(DW_TAG_reference_type); + info_.WriteRef4(DW_AT_type, array_type); + info_.EndTag(); } else { // Primitive types. const char* name; uint32_t encoding; uint32_t byte_size; - switch (*desc) { + switch (desc[0]) { case 'B': name = "byte"; encoding = DW_ATE_signed; @@ -807,7 +931,7 @@ class DebugInfoWriter { LOG(FATAL) << "Void type should not be encoded"; UNREACHABLE(); default: - LOG(FATAL) << "Unknown dex type descriptor: " << desc; + LOG(FATAL) << "Unknown dex type descriptor: \"" << desc << "\""; UNREACHABLE(); } offset = info_.StartTag(DW_TAG_base_type); @@ -857,9 +981,10 @@ class DebugInfoWriter { // Temporary buffer to create and store the entries. DebugInfoEntryWriter<> info_; // Cache of already translated type descriptors. - std::map<const char*, size_t, CStringLess> type_cache_; // type_desc -> definition_offset. + std::map<std::string, size_t> type_cache_; // type_desc -> definition_offset. // 32-bit references which need to be resolved to a type later. - std::multimap<const char*, size_t, CStringLess> lazy_types_; // type_desc -> patch_offset. + // Given type may be used multiple times. Therefore we need a multimap. + std::multimap<std::string, size_t> lazy_types_; // type_desc -> patch_offset. }; public: @@ -875,6 +1000,11 @@ class DebugInfoWriter { writer.Write(compilation_unit); } + void WriteTypes(const ArrayRef<mirror::Class*>& types) SHARED_REQUIRES(Locks::mutator_lock_) { + CompilationUnitWriter writer(this); + writer.Write(types); + } + void End() { builder_->GetDebugInfo()->End(); builder_->WritePatches(".debug_info.oat_patches", @@ -916,7 +1046,9 @@ class DebugLineWriter { // Returns the number of bytes written. size_t WriteCompilationUnit(CompilationUnit& compilation_unit) { const bool is64bit = Is64BitInstructionSet(builder_->GetIsa()); - const Elf_Addr text_address = builder_->GetText()->GetAddress(); + const Elf_Addr text_address = builder_->GetText()->Exists() + ? builder_->GetText()->GetAddress() + : 0; compilation_unit.debug_line_offset_ = builder_->GetDebugLine()->GetSize(); @@ -1094,9 +1226,27 @@ class DebugLineWriter { std::vector<uintptr_t> debug_line_patches; }; +// Get all types loaded by the runtime. +static std::vector<mirror::Class*> GetLoadedRuntimeTypes() SHARED_REQUIRES(Locks::mutator_lock_) { + std::vector<mirror::Class*> result; + class CollectClasses : public ClassVisitor { + public: + virtual bool Visit(mirror::Class* klass) { + classes_->push_back(klass); + return true; + } + std::vector<mirror::Class*>* classes_; + }; + CollectClasses visitor; + visitor.classes_ = &result; + Runtime::Current()->GetClassLinker()->VisitClasses(&visitor); + return result; +} + template<typename ElfTypes> -void WriteDebugSections(ElfBuilder<ElfTypes>* builder, - const ArrayRef<const MethodDebugInfo>& method_infos) { +static void WriteDebugSections(ElfBuilder<ElfTypes>* builder, + bool write_loaded_runtime_types, + const ArrayRef<const MethodDebugInfo>& method_infos) { // Group the methods into compilation units based on source file. std::vector<CompilationUnit> compilation_units; const char* last_source_file = nullptr; @@ -1114,7 +1264,7 @@ void WriteDebugSections(ElfBuilder<ElfTypes>* builder, } // Write .debug_line section. - { + if (!compilation_units.empty()) { DebugLineWriter<ElfTypes> line_writer(builder); line_writer.Start(); for (auto& compilation_unit : compilation_units) { @@ -1124,31 +1274,170 @@ void WriteDebugSections(ElfBuilder<ElfTypes>* builder, } // Write .debug_info section. - { + if (!compilation_units.empty() || write_loaded_runtime_types) { DebugInfoWriter<ElfTypes> info_writer(builder); info_writer.Start(); for (const auto& compilation_unit : compilation_units) { info_writer.WriteCompilationUnit(compilation_unit); } + if (write_loaded_runtime_types) { + Thread* self = Thread::Current(); + // The lock prevents the classes being moved by the GC. + ReaderMutexLock mu(self, *Locks::mutator_lock_); + std::vector<mirror::Class*> types = GetLoadedRuntimeTypes(); + info_writer.WriteTypes(ArrayRef<mirror::Class*>(types.data(), types.size())); + } info_writer.End(); } } +template <typename ElfTypes> +void WriteDebugSymbols(ElfBuilder<ElfTypes>* builder, + const ArrayRef<const MethodDebugInfo>& method_infos) { + bool generated_mapping_symbol = false; + auto* strtab = builder->GetStrTab(); + auto* symtab = builder->GetSymTab(); + + if (method_infos.empty()) { + return; + } + + // Find all addresses (low_pc) which contain deduped methods. + // The first instance of method is not marked deduped_, but the rest is. + std::unordered_set<uint32_t> deduped_addresses; + for (const MethodDebugInfo& info : method_infos) { + if (info.deduped_) { + deduped_addresses.insert(info.low_pc_); + } + } + + strtab->Start(); + strtab->Write(""); // strtab should start with empty string. + for (const MethodDebugInfo& info : method_infos) { + if (info.deduped_) { + continue; // Add symbol only for the first instance. + } + std::string name = PrettyMethod(info.dex_method_index_, *info.dex_file_, true); + if (deduped_addresses.find(info.low_pc_) != deduped_addresses.end()) { + name += " [DEDUPED]"; + } + + const auto* text = builder->GetText()->Exists() ? builder->GetText() : nullptr; + const bool is_relative = (text != nullptr); + uint32_t low_pc = info.low_pc_; + // Add in code delta, e.g., thumb bit 0 for Thumb2 code. + low_pc += info.compiled_method_->CodeDelta(); + symtab->Add(strtab->Write(name), text, low_pc, + is_relative, info.high_pc_ - info.low_pc_, STB_GLOBAL, STT_FUNC); + + // Conforming to aaelf, add $t mapping symbol to indicate start of a sequence of thumb2 + // instructions, so that disassembler tools can correctly disassemble. + // Note that even if we generate just a single mapping symbol, ARM's Streamline + // requires it to match function symbol. Just address 0 does not work. + if (info.compiled_method_->GetInstructionSet() == kThumb2) { + if (!generated_mapping_symbol || !kGenerateSingleArmMappingSymbol) { + symtab->Add(strtab->Write("$t"), text, info.low_pc_ & ~1, + is_relative, 0, STB_LOCAL, STT_NOTYPE); + generated_mapping_symbol = true; + } + } + } + strtab->End(); + + // Symbols are buffered and written after names (because they are smaller). + // We could also do two passes in this function to avoid the buffering. + symtab->Start(); + symtab->Write(); + symtab->End(); +} + +template <typename ElfTypes> +void WriteDebugInfo(ElfBuilder<ElfTypes>* builder, + bool write_loaded_runtime_types, + const ArrayRef<const MethodDebugInfo>& method_infos, + CFIFormat cfi_format) { + // Add methods to .symtab. + WriteDebugSymbols(builder, method_infos); + // Generate CFI (stack unwinding information). + WriteCFISection(builder, method_infos, cfi_format); + // Write DWARF .debug_* sections. + WriteDebugSections(builder, write_loaded_runtime_types, method_infos); +} + +template <typename ElfTypes> +static ArrayRef<const uint8_t> WriteDebugElfFileForMethodInternal( + const dwarf::MethodDebugInfo& method_info) { + const InstructionSet isa = method_info.compiled_method_->GetInstructionSet(); + std::vector<uint8_t> buffer; + buffer.reserve(KB); + VectorOutputStream out("Debug ELF file", &buffer); + std::unique_ptr<ElfBuilder<ElfTypes>> builder(new ElfBuilder<ElfTypes>(isa, &out)); + builder->Start(); + WriteDebugInfo(builder.get(), + false, + ArrayRef<const MethodDebugInfo>(&method_info, 1), + DW_DEBUG_FRAME_FORMAT); + builder->End(); + CHECK(builder->Good()); + // Make a copy of the buffer. We want to shrink it anyway. + uint8_t* result = new uint8_t[buffer.size()]; + CHECK(result != nullptr); + memcpy(result, buffer.data(), buffer.size()); + return ArrayRef<const uint8_t>(result, buffer.size()); +} + +ArrayRef<const uint8_t> WriteDebugElfFileForMethod(const dwarf::MethodDebugInfo& method_info) { + const InstructionSet isa = method_info.compiled_method_->GetInstructionSet(); + if (Is64BitInstructionSet(isa)) { + return WriteDebugElfFileForMethodInternal<ElfTypes64>(method_info); + } else { + return WriteDebugElfFileForMethodInternal<ElfTypes32>(method_info); + } +} + +template <typename ElfTypes> +static ArrayRef<const uint8_t> WriteDebugElfFileForClassInternal(const InstructionSet isa, + mirror::Class* type) + SHARED_REQUIRES(Locks::mutator_lock_) { + std::vector<uint8_t> buffer; + buffer.reserve(KB); + VectorOutputStream out("Debug ELF file", &buffer); + std::unique_ptr<ElfBuilder<ElfTypes>> builder(new ElfBuilder<ElfTypes>(isa, &out)); + builder->Start(); + + DebugInfoWriter<ElfTypes> info_writer(builder.get()); + info_writer.Start(); + info_writer.WriteTypes(ArrayRef<mirror::Class*>(&type, 1)); + info_writer.End(); + + builder->End(); + CHECK(builder->Good()); + // Make a copy of the buffer. We want to shrink it anyway. + uint8_t* result = new uint8_t[buffer.size()]; + CHECK(result != nullptr); + memcpy(result, buffer.data(), buffer.size()); + return ArrayRef<const uint8_t>(result, buffer.size()); +} + +ArrayRef<const uint8_t> WriteDebugElfFileForClass(const InstructionSet isa, mirror::Class* type) { + if (Is64BitInstructionSet(isa)) { + return WriteDebugElfFileForClassInternal<ElfTypes64>(isa, type); + } else { + return WriteDebugElfFileForClassInternal<ElfTypes32>(isa, type); + } +} + // Explicit instantiations -template void WriteCFISection<ElfTypes32>( +template void WriteDebugInfo<ElfTypes32>( ElfBuilder<ElfTypes32>* builder, + bool write_loaded_runtime_types, const ArrayRef<const MethodDebugInfo>& method_infos, - CFIFormat format); -template void WriteCFISection<ElfTypes64>( + CFIFormat cfi_format); +template void WriteDebugInfo<ElfTypes64>( ElfBuilder<ElfTypes64>* builder, + bool write_loaded_runtime_types, const ArrayRef<const MethodDebugInfo>& method_infos, - CFIFormat format); -template void WriteDebugSections<ElfTypes32>( - ElfBuilder<ElfTypes32>* builder, - const ArrayRef<const MethodDebugInfo>& method_infos); -template void WriteDebugSections<ElfTypes64>( - ElfBuilder<ElfTypes64>* builder, - const ArrayRef<const MethodDebugInfo>& method_infos); + CFIFormat cfi_format); } // namespace dwarf } // namespace art diff --git a/compiler/elf_writer_debug.h b/compiler/elf_writer_debug.h index 9ed102f91b..91da00f97a 100644 --- a/compiler/elf_writer_debug.h +++ b/compiler/elf_writer_debug.h @@ -17,22 +17,29 @@ #ifndef ART_COMPILER_ELF_WRITER_DEBUG_H_ #define ART_COMPILER_ELF_WRITER_DEBUG_H_ -#include "elf_builder.h" +#include "base/macros.h" +#include "base/mutex.h" #include "dwarf/dwarf_constants.h" -#include "oat_writer.h" +#include "elf_builder.h" #include "utils/array_ref.h" namespace art { +namespace mirror { +class Class; +} namespace dwarf { +struct MethodDebugInfo; + +template <typename ElfTypes> +void WriteDebugInfo(ElfBuilder<ElfTypes>* builder, + bool write_loaded_runtime_types, + const ArrayRef<const MethodDebugInfo>& method_infos, + CFIFormat cfi_format); -template<typename ElfTypes> -void WriteCFISection(ElfBuilder<ElfTypes>* builder, - const ArrayRef<const MethodDebugInfo>& method_infos, - CFIFormat format); +ArrayRef<const uint8_t> WriteDebugElfFileForMethod(const dwarf::MethodDebugInfo& method_info); -template<typename ElfTypes> -void WriteDebugSections(ElfBuilder<ElfTypes>* builder, - const ArrayRef<const MethodDebugInfo>& method_infos); +ArrayRef<const uint8_t> WriteDebugElfFileForClass(const InstructionSet isa, mirror::Class* type) + SHARED_REQUIRES(Locks::mutator_lock_); } // namespace dwarf } // namespace art diff --git a/compiler/elf_writer_quick.cc b/compiler/elf_writer_quick.cc index e411496980..a67f3bd1a9 100644 --- a/compiler/elf_writer_quick.cc +++ b/compiler/elf_writer_quick.cc @@ -46,15 +46,6 @@ namespace art { // Let's use .debug_frame because it is easier to strip or compress. constexpr dwarf::CFIFormat kCFIFormat = dwarf::DW_DEBUG_FRAME_FORMAT; -// The ARM specification defines three special mapping symbols -// $a, $t and $d which mark ARM, Thumb and data ranges respectively. -// These symbols can be used by tools, for example, to pretty -// print instructions correctly. Objdump will use them if they -// exist, but it will still work well without them. -// However, these extra symbols take space, so let's just generate -// one symbol which marks the whole .text section as code. -constexpr bool kGenerateSingleArmMappingSymbol = true; - template <typename ElfTypes> class ElfWriterQuick FINAL : public ElfWriter { public: @@ -99,10 +90,6 @@ std::unique_ptr<ElfWriter> CreateElfWriterQuick(InstructionSet instruction_set, } template <typename ElfTypes> -static void WriteDebugSymbols(ElfBuilder<ElfTypes>* builder, - const ArrayRef<const dwarf::MethodDebugInfo>& method_infos); - -template <typename ElfTypes> ElfWriterQuick<ElfTypes>::ElfWriterQuick(InstructionSet instruction_set, const CompilerOptions* compiler_options, File* elf_file) @@ -165,14 +152,7 @@ template <typename ElfTypes> void ElfWriterQuick<ElfTypes>::WriteDebugInfo( const ArrayRef<const dwarf::MethodDebugInfo>& method_infos) { if (compiler_options_->GetGenerateDebugInfo()) { - if (!method_infos.empty()) { - // Add methods to .symtab. - WriteDebugSymbols(builder_.get(), method_infos); - // Generate CFI (stack unwinding information). - dwarf::WriteCFISection(builder_.get(), method_infos, kCFIFormat); - // Write DWARF .debug_* sections. - dwarf::WriteDebugSections(builder_.get(), method_infos); - } + dwarf::WriteDebugInfo(builder_.get(), /* write_types */ true, method_infos, kCFIFormat); } } @@ -199,64 +179,6 @@ OutputStream* ElfWriterQuick<ElfTypes>::GetStream() { return builder_->GetStream(); } -template <typename ElfTypes> -static void WriteDebugSymbols(ElfBuilder<ElfTypes>* builder, - const ArrayRef<const dwarf::MethodDebugInfo>& method_infos) { - bool generated_mapping_symbol = false; - auto* strtab = builder->GetStrTab(); - auto* symtab = builder->GetSymTab(); - - if (method_infos.empty()) { - return; - } - - // Find all addresses (low_pc) which contain deduped methods. - // The first instance of method is not marked deduped_, but the rest is. - std::unordered_set<uint32_t> deduped_addresses; - for (const dwarf::MethodDebugInfo& info : method_infos) { - if (info.deduped_) { - deduped_addresses.insert(info.low_pc_); - } - } - - strtab->Start(); - strtab->Write(""); // strtab should start with empty string. - for (const dwarf::MethodDebugInfo& info : method_infos) { - if (info.deduped_) { - continue; // Add symbol only for the first instance. - } - std::string name = PrettyMethod(info.dex_method_index_, *info.dex_file_, true); - if (deduped_addresses.find(info.low_pc_) != deduped_addresses.end()) { - name += " [DEDUPED]"; - } - - uint32_t low_pc = info.low_pc_; - // Add in code delta, e.g., thumb bit 0 for Thumb2 code. - low_pc += info.compiled_method_->CodeDelta(); - symtab->Add(strtab->Write(name), builder->GetText(), low_pc, - true, info.high_pc_ - info.low_pc_, STB_GLOBAL, STT_FUNC); - - // Conforming to aaelf, add $t mapping symbol to indicate start of a sequence of thumb2 - // instructions, so that disassembler tools can correctly disassemble. - // Note that even if we generate just a single mapping symbol, ARM's Streamline - // requires it to match function symbol. Just address 0 does not work. - if (info.compiled_method_->GetInstructionSet() == kThumb2) { - if (!generated_mapping_symbol || !kGenerateSingleArmMappingSymbol) { - symtab->Add(strtab->Write("$t"), builder->GetText(), info.low_pc_ & ~1, - true, 0, STB_LOCAL, STT_NOTYPE); - generated_mapping_symbol = true; - } - } - } - strtab->End(); - - // Symbols are buffered and written after names (because they are smaller). - // We could also do two passes in this function to avoid the buffering. - symtab->Start(); - symtab->Write(); - symtab->End(); -} - // Explicit instantiations template class ElfWriterQuick<ElfTypes32>; template class ElfWriterQuick<ElfTypes64>; diff --git a/compiler/image_test.cc b/compiler/image_test.cc index 15812dc3f3..6859605095 100644 --- a/compiler/image_test.cc +++ b/compiler/image_test.cc @@ -72,11 +72,18 @@ void ImageTest::TestWriteRead(ImageHeader::StorageMode storage_mode) { ScratchFile oat_file(OS::CreateEmptyFile(oat_filename.c_str())); const uintptr_t requested_image_base = ART_BASE_ADDRESS; + std::unordered_map<const DexFile*, const char*> dex_file_to_oat_filename_map; + std::vector<const char*> oat_filename_vector(1, oat_filename.c_str()); + for (const DexFile* dex_file : class_linker->GetBootClassPath()) { + dex_file_to_oat_filename_map.emplace(dex_file, oat_filename.c_str()); + } std::unique_ptr<ImageWriter> writer(new ImageWriter(*compiler_driver_, requested_image_base, /*compile_pic*/false, /*compile_app_image*/false, - storage_mode)); + storage_mode, + oat_filename_vector, + dex_file_to_oat_filename_map)); // TODO: compile_pic should be a test argument. { { @@ -131,12 +138,12 @@ void ImageTest::TestWriteRead(ImageHeader::StorageMode storage_mode) { ASSERT_TRUE(dup_oat.get() != nullptr); { - bool success_image = writer->Write(kInvalidImageFd, - image_file.GetFilename(), - dup_oat->GetPath(), - dup_oat->GetPath()); + std::vector<const char*> dup_oat_filename(1, dup_oat->GetPath().c_str()); + std::vector<const char*> dup_image_filename(1, image_file.GetFilename().c_str()); + bool success_image = writer->Write(kInvalidImageFd, dup_image_filename, dup_oat_filename); ASSERT_TRUE(success_image); - bool success_fixup = ElfWriter::Fixup(dup_oat.get(), writer->GetOatDataBegin()); + bool success_fixup = ElfWriter::Fixup(dup_oat.get(), + writer->GetOatDataBegin(dup_oat_filename[0])); ASSERT_TRUE(success_fixup); ASSERT_EQ(dup_oat->FlushCloseOrErase(), 0) << "Could not flush and close oat file " @@ -181,7 +188,7 @@ void ImageTest::TestWriteRead(ImageHeader::StorageMode storage_mode) { java_lang_dex_file_ = nullptr; MemMap::Init(); - std::unique_ptr<const DexFile> dex(LoadExpectSingleDexFile(GetLibCoreDexFileName().c_str())); + std::unique_ptr<const DexFile> dex(LoadExpectSingleDexFile(GetLibCoreDexFileNames()[0].c_str())); RuntimeOptions options; std::string image("-Ximage:"); @@ -203,10 +210,11 @@ void ImageTest::TestWriteRead(ImageHeader::StorageMode storage_mode) { class_linker_ = runtime_->GetClassLinker(); gc::Heap* heap = Runtime::Current()->GetHeap(); - ASSERT_TRUE(heap->HasImageSpace()); + ASSERT_TRUE(heap->HasBootImageSpace()); ASSERT_TRUE(heap->GetNonMovingSpace()->IsMallocSpace()); - gc::space::ImageSpace* image_space = heap->GetBootImageSpace(); + // We loaded the runtime with an explicit image, so it must exist. + gc::space::ImageSpace* image_space = heap->GetBootImageSpaces()[0]; ASSERT_TRUE(image_space != nullptr); if (storage_mode == ImageHeader::kStorageModeUncompressed) { // Uncompressed, image should be smaller than file. diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc index 9545c83eaf..d0bb201d69 100644 --- a/compiler/image_writer.cc +++ b/compiler/image_writer.cc @@ -76,23 +76,35 @@ static constexpr bool kBinObjects = true; // Return true if an object is already in an image space. bool ImageWriter::IsInBootImage(const void* obj) const { + gc::Heap* const heap = Runtime::Current()->GetHeap(); if (!compile_app_image_) { - DCHECK(boot_image_space_ == nullptr); + DCHECK(heap->GetBootImageSpaces().empty()); return false; } - const uint8_t* image_begin = boot_image_space_->Begin(); - // Real image end including ArtMethods and ArtField sections. - const uint8_t* image_end = image_begin + boot_image_space_->GetImageHeader().GetImageSize(); - return image_begin <= obj && obj < image_end; + for (gc::space::ImageSpace* boot_image_space : heap->GetBootImageSpaces()) { + const uint8_t* image_begin = boot_image_space->Begin(); + // Real image end including ArtMethods and ArtField sections. + const uint8_t* image_end = image_begin + boot_image_space->GetImageHeader().GetImageSize(); + if (image_begin <= obj && obj < image_end) { + return true; + } + } + return false; } bool ImageWriter::IsInBootOatFile(const void* ptr) const { + gc::Heap* const heap = Runtime::Current()->GetHeap(); if (!compile_app_image_) { - DCHECK(boot_image_space_ == nullptr); + DCHECK(heap->GetBootImageSpaces().empty()); return false; } - const ImageHeader& image_header = boot_image_space_->GetImageHeader(); - return image_header.GetOatFileBegin() <= ptr && ptr < image_header.GetOatFileEnd(); + for (gc::space::ImageSpace* boot_image_space : heap->GetBootImageSpaces()) { + const ImageHeader& image_header = boot_image_space->GetImageHeader(); + if (image_header.GetOatFileBegin() <= ptr && ptr < image_header.GetOatFileEnd()) { + return true; + } + } + return false; } static void CheckNoDexObjectsCallback(Object* obj, void* arg ATTRIBUTE_UNUSED) @@ -109,14 +121,6 @@ static void CheckNoDexObjects() { bool ImageWriter::PrepareImageAddressSpace() { target_ptr_size_ = InstructionSetPointerSize(compiler_driver_.GetInstructionSet()); gc::Heap* const heap = Runtime::Current()->GetHeap(); - // Cache boot image space. - for (gc::space::ContinuousSpace* space : heap->GetContinuousSpaces()) { - if (space->IsImageSpace()) { - CHECK(compile_app_image_); - CHECK(boot_image_space_ == nullptr) << "Multiple image spaces"; - boot_image_space_ = space->AsImageSpace(); - } - } { ScopedObjectAccess soa(Thread::Current()); PruneNonImageClasses(); // Remove junk @@ -154,147 +158,171 @@ bool ImageWriter::PrepareImageAddressSpace() { } bool ImageWriter::Write(int image_fd, - const std::string& image_filename, - const std::string& oat_filename, - const std::string& oat_location) { - CHECK(!image_filename.empty()); - - std::unique_ptr<File> oat_file(OS::OpenFileReadWrite(oat_filename.c_str())); - if (oat_file.get() == nullptr) { - PLOG(ERROR) << "Failed to open oat file " << oat_filename << " for " << oat_location; - return false; - } - std::string error_msg; - oat_file_ = OatFile::OpenReadable(oat_file.get(), oat_location, nullptr, &error_msg); - if (oat_file_ == nullptr) { - PLOG(ERROR) << "Failed to open writable oat file " << oat_filename << " for " << oat_location - << ": " << error_msg; - oat_file->Erase(); - return false; - } - Runtime::Current()->GetOatFileManager().RegisterOatFile( + const std::vector<const char*>& image_filenames, + const std::vector<const char*>& oat_filenames) { + CHECK(!image_filenames.empty()); + CHECK(!oat_filenames.empty()); + CHECK_EQ(image_filenames.size(), oat_filenames.size()); + + size_t oat_file_offset = 0; + + for (size_t i = 0; i < oat_filenames.size(); ++i) { + const char* oat_filename = oat_filenames[i]; + std::unique_ptr<File> oat_file(OS::OpenFileReadWrite(oat_filename)); + if (oat_file.get() == nullptr) { + PLOG(ERROR) << "Failed to open oat file " << oat_filename; + return false; + } + std::string error_msg; + oat_file_ = OatFile::OpenReadable(oat_file.get(), oat_filename, nullptr, &error_msg); + if (oat_file_ == nullptr) { + PLOG(ERROR) << "Failed to open writable oat file " << oat_filename; + oat_file->Erase(); + return false; + } + Runtime::Current()->GetOatFileManager().RegisterOatFile( std::unique_ptr<const OatFile>(oat_file_)); - const OatHeader& oat_header = oat_file_->GetOatHeader(); - oat_address_offsets_[kOatAddressInterpreterToInterpreterBridge] = - oat_header.GetInterpreterToInterpreterBridgeOffset(); - oat_address_offsets_[kOatAddressInterpreterToCompiledCodeBridge] = - oat_header.GetInterpreterToCompiledCodeBridgeOffset(); - oat_address_offsets_[kOatAddressJNIDlsymLookup] = - oat_header.GetJniDlsymLookupOffset(); - oat_address_offsets_[kOatAddressQuickGenericJNITrampoline] = - oat_header.GetQuickGenericJniTrampolineOffset(); - oat_address_offsets_[kOatAddressQuickIMTConflictTrampoline] = - oat_header.GetQuickImtConflictTrampolineOffset(); - oat_address_offsets_[kOatAddressQuickResolutionTrampoline] = - oat_header.GetQuickResolutionTrampolineOffset(); - oat_address_offsets_[kOatAddressQuickToInterpreterBridge] = - oat_header.GetQuickToInterpreterBridgeOffset(); + const OatHeader& oat_header = oat_file_->GetOatHeader(); + ImageInfo& image_info = GetImageInfo(oat_filename); + + size_t oat_loaded_size = 0; + size_t oat_data_offset = 0; + ElfWriter::GetOatElfInformation(oat_file.get(), &oat_loaded_size, &oat_data_offset); + + DCHECK_EQ(image_info.oat_offset_, oat_file_offset); + oat_file_offset += oat_loaded_size; + + if (i == 0) { + // Primary oat file, read the trampolines. + image_info.oat_address_offsets_[kOatAddressInterpreterToInterpreterBridge] = + oat_header.GetInterpreterToInterpreterBridgeOffset(); + image_info.oat_address_offsets_[kOatAddressInterpreterToCompiledCodeBridge] = + oat_header.GetInterpreterToCompiledCodeBridgeOffset(); + image_info.oat_address_offsets_[kOatAddressJNIDlsymLookup] = + oat_header.GetJniDlsymLookupOffset(); + image_info.oat_address_offsets_[kOatAddressQuickGenericJNITrampoline] = + oat_header.GetQuickGenericJniTrampolineOffset(); + image_info.oat_address_offsets_[kOatAddressQuickIMTConflictTrampoline] = + oat_header.GetQuickImtConflictTrampolineOffset(); + image_info.oat_address_offsets_[kOatAddressQuickResolutionTrampoline] = + oat_header.GetQuickResolutionTrampolineOffset(); + image_info.oat_address_offsets_[kOatAddressQuickToInterpreterBridge] = + oat_header.GetQuickToInterpreterBridgeOffset(); + } + - size_t oat_loaded_size = 0; - size_t oat_data_offset = 0; - ElfWriter::GetOatElfInformation(oat_file.get(), &oat_loaded_size, &oat_data_offset); + { + ScopedObjectAccess soa(Thread::Current()); + CreateHeader(oat_loaded_size, oat_data_offset); + CopyAndFixupNativeData(); + } + + SetOatChecksumFromElfFile(oat_file.get()); + + if (oat_file->FlushCloseOrErase() != 0) { + LOG(ERROR) << "Failed to flush and close oat file " << oat_filename; + return false; + } + } { - ScopedObjectAccess soa(Thread::Current()); - CreateHeader(oat_loaded_size, oat_data_offset); - CopyAndFixupNativeData(); // TODO: heap validation can't handle these fix up passes. + ScopedObjectAccess soa(Thread::Current()); Runtime::Current()->GetHeap()->DisableObjectValidation(); CopyAndFixupObjects(); } - SetOatChecksumFromElfFile(oat_file.get()); - - if (oat_file->FlushCloseOrErase() != 0) { - LOG(ERROR) << "Failed to flush and close oat file " << oat_filename << " for " << oat_location; - return false; - } - std::unique_ptr<File> image_file; - if (image_fd != kInvalidImageFd) { - image_file.reset(new File(image_fd, image_filename, unix_file::kCheckSafeUsage)); - } else { - image_file.reset(OS::CreateEmptyFile(image_filename.c_str())); - } - if (image_file == nullptr) { - LOG(ERROR) << "Failed to open image file " << image_filename; - return false; - } - if (fchmod(image_file->Fd(), 0644) != 0) { - PLOG(ERROR) << "Failed to make image file world readable: " << image_filename; - image_file->Erase(); - return EXIT_FAILURE; - } - - std::unique_ptr<char[]> compressed_data; - // Image data size excludes the bitmap and the header. - ImageHeader* const image_header = reinterpret_cast<ImageHeader*>(image_->Begin()); - const size_t image_data_size = image_header->GetImageSize() - sizeof(ImageHeader); - char* image_data = reinterpret_cast<char*>(image_->Begin()) + sizeof(ImageHeader); - size_t data_size; - const char* image_data_to_write; - - CHECK_EQ(image_header->storage_mode_, image_storage_mode_); - switch (image_storage_mode_) { - case ImageHeader::kStorageModeLZ4: { - size_t compressed_max_size = LZ4_compressBound(image_data_size); - compressed_data.reset(new char[compressed_max_size]); - data_size = LZ4_compress( - reinterpret_cast<char*>(image_->Begin()) + sizeof(ImageHeader), - &compressed_data[0], - image_data_size); - image_data_to_write = &compressed_data[0]; - VLOG(compiler) << "Compressed from " << image_data_size << " to " << data_size; - break; + for (size_t i = 0; i < image_filenames.size(); ++i) { + const char* image_filename = image_filenames[i]; + const char* oat_filename = oat_filenames[i]; + ImageInfo& image_info = GetImageInfo(oat_filename); + std::unique_ptr<File> image_file; + if (image_fd != kInvalidImageFd) { + image_file.reset(new File(image_fd, image_filename, unix_file::kCheckSafeUsage)); + } else { + image_file.reset(OS::CreateEmptyFile(image_filename)); } - case ImageHeader::kStorageModeUncompressed: { - data_size = image_data_size; - image_data_to_write = image_data; - break; + if (image_file == nullptr) { + LOG(ERROR) << "Failed to open image file " << image_filename; + return false; } - default: { - LOG(FATAL) << "Unsupported"; - UNREACHABLE(); + if (fchmod(image_file->Fd(), 0644) != 0) { + PLOG(ERROR) << "Failed to make image file world readable: " << image_filename; + image_file->Erase(); + return EXIT_FAILURE; } - } - // Write header first, as uncompressed. - image_header->data_size_ = data_size; - if (!image_file->WriteFully(image_->Begin(), sizeof(ImageHeader))) { - PLOG(ERROR) << "Failed to write image file header " << image_filename; - image_file->Erase(); - return false; - } + std::unique_ptr<char[]> compressed_data; + // Image data size excludes the bitmap and the header. + ImageHeader* const image_header = reinterpret_cast<ImageHeader*>(image_info.image_->Begin()); + const size_t image_data_size = image_header->GetImageSize() - sizeof(ImageHeader); + char* image_data = reinterpret_cast<char*>(image_info.image_->Begin()) + sizeof(ImageHeader); + size_t data_size; + const char* image_data_to_write; + + CHECK_EQ(image_header->storage_mode_, image_storage_mode_); + switch (image_storage_mode_) { + case ImageHeader::kStorageModeLZ4: { + size_t compressed_max_size = LZ4_compressBound(image_data_size); + compressed_data.reset(new char[compressed_max_size]); + data_size = LZ4_compress( + reinterpret_cast<char*>(image_info.image_->Begin()) + sizeof(ImageHeader), + &compressed_data[0], + image_data_size); + image_data_to_write = &compressed_data[0]; + VLOG(compiler) << "Compressed from " << image_data_size << " to " << data_size; + break; + } + case ImageHeader::kStorageModeUncompressed: { + data_size = image_data_size; + image_data_to_write = image_data; + break; + } + default: { + LOG(FATAL) << "Unsupported"; + UNREACHABLE(); + } + } - // Write out the image + fields + methods. - const bool is_compressed = compressed_data != nullptr; - if (!image_file->WriteFully(image_data_to_write, data_size)) { - PLOG(ERROR) << "Failed to write image file data " << image_filename; - image_file->Erase(); - return false; - } + // Write header first, as uncompressed. + image_header->data_size_ = data_size; + if (!image_file->WriteFully(image_info.image_->Begin(), sizeof(ImageHeader))) { + PLOG(ERROR) << "Failed to write image file header " << image_filename; + image_file->Erase(); + return false; + } - // Write out the image bitmap at the page aligned start of the image end, also uncompressed for - // convenience. - const ImageSection& bitmap_section = image_header->GetImageSection( - ImageHeader::kSectionImageBitmap); - // Align up since data size may be unaligned if the image is compressed. - size_t bitmap_position_in_file = RoundUp(sizeof(ImageHeader) + data_size, kPageSize); - if (!is_compressed) { - CHECK_EQ(bitmap_position_in_file, bitmap_section.Offset()); - } - if (!image_file->Write(reinterpret_cast<char*>(image_bitmap_->Begin()), - bitmap_section.Size(), - bitmap_position_in_file)) { - PLOG(ERROR) << "Failed to write image file " << image_filename; - image_file->Erase(); - return false; - } - CHECK_EQ(bitmap_position_in_file + bitmap_section.Size(), - static_cast<size_t>(image_file->GetLength())); - if (image_file->FlushCloseOrErase() != 0) { - PLOG(ERROR) << "Failed to flush and close image file " << image_filename; - return false; + // Write out the image + fields + methods. + const bool is_compressed = compressed_data != nullptr; + if (!image_file->WriteFully(image_data_to_write, data_size)) { + PLOG(ERROR) << "Failed to write image file data " << image_filename; + image_file->Erase(); + return false; + } + + // Write out the image bitmap at the page aligned start of the image end, also uncompressed for + // convenience. + const ImageSection& bitmap_section = image_header->GetImageSection( + ImageHeader::kSectionImageBitmap); + // Align up since data size may be unaligned if the image is compressed. + size_t bitmap_position_in_file = RoundUp(sizeof(ImageHeader) + data_size, kPageSize); + if (!is_compressed) { + CHECK_EQ(bitmap_position_in_file, bitmap_section.Offset()); + } + if (!image_file->Write(reinterpret_cast<char*>(image_info.image_bitmap_->Begin()), + bitmap_section.Size(), + bitmap_position_in_file)) { + PLOG(ERROR) << "Failed to write image file " << image_filename; + image_file->Erase(); + return false; + } + CHECK_EQ(bitmap_position_in_file + bitmap_section.Size(), + static_cast<size_t>(image_file->GetLength())); + if (image_file->FlushCloseOrErase() != 0) { + PLOG(ERROR) << "Failed to flush and close image file " << image_filename; + return false; + } } return true; } @@ -319,12 +347,14 @@ void ImageWriter::AssignImageOffset(mirror::Object* object, ImageWriter::BinSlot DCHECK(object != nullptr); DCHECK_NE(image_objects_offset_begin_, 0u); - size_t bin_slot_offset = bin_slot_offsets_[bin_slot.GetBin()]; + const char* oat_filename = GetOatFilename(object); + ImageInfo& image_info = GetImageInfo(oat_filename); + size_t bin_slot_offset = image_info.bin_slot_offsets_[bin_slot.GetBin()]; size_t new_offset = bin_slot_offset + bin_slot.GetIndex(); DCHECK_ALIGNED(new_offset, kObjectAlignment); SetImageOffset(object, new_offset); - DCHECK_LT(new_offset, image_end_); + DCHECK_LT(new_offset, image_info.image_end_); } bool ImageWriter::IsImageOffsetAssigned(mirror::Object* object) const { @@ -338,7 +368,9 @@ size_t ImageWriter::GetImageOffset(mirror::Object* object) const { DCHECK(IsImageOffsetAssigned(object)); LockWord lock_word = object->GetLockWord(false); size_t offset = lock_word.ForwardingAddress(); - DCHECK_LT(offset, image_end_); + const char* oat_filename = GetOatFilename(object); + const ImageInfo& image_info = GetConstImageInfo(oat_filename); + DCHECK_LT(offset, image_info.image_end_); return offset; } @@ -377,15 +409,16 @@ void ImageWriter::SetImageBinSlot(mirror::Object* object, BinSlot bin_slot) { void ImageWriter::PrepareDexCacheArraySlots() { // Prepare dex cache array starts based on the ordering specified in the CompilerDriver. - uint32_t size = 0u; + // Set the slot size early to avoid DCHECK() failures in IsImageBinSlotAssigned() + // when AssignImageBinSlot() assigns their indexes out or order. for (const DexFile* dex_file : compiler_driver_.GetDexFilesForOatFile()) { - dex_cache_array_starts_.Put(dex_file, size); + auto it = dex_file_oat_filename_map_.find(dex_file); + DCHECK(it != dex_file_oat_filename_map_.end()) << dex_file->GetLocation(); + ImageInfo& image_info = GetImageInfo(it->second); + image_info.dex_cache_array_starts_.Put(dex_file, image_info.bin_slot_sizes_[kBinDexCacheArray]); DexCacheArraysLayout layout(target_ptr_size_, dex_file); - size += layout.Size(); + image_info.bin_slot_sizes_[kBinDexCacheArray] += layout.Size(); } - // Set the slot size early to avoid DCHECK() failures in IsImageBinSlotAssigned() - // when AssignImageBinSlot() assigns their indexes out or order. - bin_slot_sizes_[kBinDexCacheArray] = size; ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); Thread* const self = Thread::Current(); @@ -399,24 +432,32 @@ void ImageWriter::PrepareDexCacheArraySlots() { const DexFile* dex_file = dex_cache->GetDexFile(); DexCacheArraysLayout layout(target_ptr_size_, dex_file); DCHECK(layout.Valid()); - uint32_t start = dex_cache_array_starts_.Get(dex_file); + const char* oat_filename = GetOatFilenameForDexCache(dex_cache); + ImageInfo& image_info = GetImageInfo(oat_filename); + uint32_t start = image_info.dex_cache_array_starts_.Get(dex_file); DCHECK_EQ(dex_file->NumTypeIds() != 0u, dex_cache->GetResolvedTypes() != nullptr); - AddDexCacheArrayRelocation(dex_cache->GetResolvedTypes(), start + layout.TypesOffset()); + AddDexCacheArrayRelocation(dex_cache->GetResolvedTypes(), + start + layout.TypesOffset(), + dex_cache); DCHECK_EQ(dex_file->NumMethodIds() != 0u, dex_cache->GetResolvedMethods() != nullptr); - AddDexCacheArrayRelocation(dex_cache->GetResolvedMethods(), start + layout.MethodsOffset()); + AddDexCacheArrayRelocation(dex_cache->GetResolvedMethods(), + start + layout.MethodsOffset(), + dex_cache); DCHECK_EQ(dex_file->NumFieldIds() != 0u, dex_cache->GetResolvedFields() != nullptr); - AddDexCacheArrayRelocation(dex_cache->GetResolvedFields(), start + layout.FieldsOffset()); + AddDexCacheArrayRelocation(dex_cache->GetResolvedFields(), + start + layout.FieldsOffset(), + dex_cache); DCHECK_EQ(dex_file->NumStringIds() != 0u, dex_cache->GetStrings() != nullptr); - AddDexCacheArrayRelocation(dex_cache->GetStrings(), start + layout.StringsOffset()); + AddDexCacheArrayRelocation(dex_cache->GetStrings(), start + layout.StringsOffset(), dex_cache); } } -void ImageWriter::AddDexCacheArrayRelocation(void* array, size_t offset) { +void ImageWriter::AddDexCacheArrayRelocation(void* array, size_t offset, DexCache* dex_cache) { if (array != nullptr) { DCHECK(!IsInBootImage(array)); - native_object_relocations_.emplace( - array, - NativeObjectRelocation { offset, kNativeObjectRelocationTypeDexCacheArray }); + const char* oat_filename = GetOatFilenameForDexCache(dex_cache); + native_object_relocations_.emplace(array, + NativeObjectRelocation { oat_filename, offset, kNativeObjectRelocationTypeDexCacheArray }); } } @@ -531,18 +572,21 @@ void ImageWriter::AssignImageBinSlot(mirror::Object* object) { } // else bin = kBinRegular } + const char* oat_filename = GetOatFilename(object); + ImageInfo& image_info = GetImageInfo(oat_filename); + size_t offset_delta = RoundUp(object_size, kObjectAlignment); // 64-bit alignment - current_offset = bin_slot_sizes_[bin]; // How many bytes the current bin is at (aligned). - // Move the current bin size up to accomodate the object we just assigned a bin slot. - bin_slot_sizes_[bin] += offset_delta; + current_offset = image_info.bin_slot_sizes_[bin]; // How many bytes the current bin is at (aligned). + // Move the current bin size up to accommodate the object we just assigned a bin slot. + image_info.bin_slot_sizes_[bin] += offset_delta; BinSlot new_bin_slot(bin, current_offset); SetImageBinSlot(object, new_bin_slot); - ++bin_slot_count_[bin]; + ++image_info.bin_slot_count_[bin]; // Grow the image closer to the end by the object we just assigned. - image_end_ += offset_delta; + image_info.image_end_ += offset_delta; } bool ImageWriter::WillMethodBeDirty(ArtMethod* m) const { @@ -565,7 +609,9 @@ bool ImageWriter::IsImageBinSlotAssigned(mirror::Object* object) const { LockWord lock_word = object->GetLockWord(false); size_t offset = lock_word.ForwardingAddress(); BinSlot bin_slot(offset); - DCHECK_LT(bin_slot.GetIndex(), bin_slot_sizes_[bin_slot.GetBin()]) + const char* oat_filename = GetOatFilename(object); + const ImageInfo& image_info = GetConstImageInfo(oat_filename); + DCHECK_LT(bin_slot.GetIndex(), image_info.bin_slot_sizes_[bin_slot.GetBin()]) << "bin slot offset should not exceed the size of that bin"; } return true; @@ -580,39 +626,42 @@ ImageWriter::BinSlot ImageWriter::GetImageBinSlot(mirror::Object* object) const DCHECK_LE(offset, std::numeric_limits<uint32_t>::max()); BinSlot bin_slot(static_cast<uint32_t>(offset)); - DCHECK_LT(bin_slot.GetIndex(), bin_slot_sizes_[bin_slot.GetBin()]); + const char* oat_filename = GetOatFilename(object); + const ImageInfo& image_info = GetConstImageInfo(oat_filename); + DCHECK_LT(bin_slot.GetIndex(), image_info.bin_slot_sizes_[bin_slot.GetBin()]); return bin_slot; } bool ImageWriter::AllocMemory() { - const size_t length = RoundUp(image_objects_offset_begin_ + - GetBinSizeSum() + - intern_table_bytes_ + - class_table_bytes_, - kPageSize); - std::string error_msg; - image_.reset(MemMap::MapAnonymous("image writer image", - nullptr, - length, - PROT_READ | PROT_WRITE, - false, - false, - &error_msg)); - if (UNLIKELY(image_.get() == nullptr)) { - LOG(ERROR) << "Failed to allocate memory for image file generation: " << error_msg; - return false; - } + for (const char* oat_filename : oat_filenames_) { + ImageInfo& image_info = GetImageInfo(oat_filename); + ImageSection unused_sections[ImageHeader::kSectionCount]; + const size_t length = RoundUp( + image_info.CreateImageSections(target_ptr_size_, unused_sections), + kPageSize); + + std::string error_msg; + image_info.image_.reset(MemMap::MapAnonymous("image writer image", + nullptr, + length, + PROT_READ | PROT_WRITE, + false, + false, + &error_msg)); + if (UNLIKELY(image_info.image_.get() == nullptr)) { + LOG(ERROR) << "Failed to allocate memory for image file generation: " << error_msg; + return false; + } - // Create the image bitmap, only needs to cover mirror object section which is up to image_end_. - CHECK_LE(image_end_, length); - image_bitmap_.reset(gc::accounting::ContinuousSpaceBitmap::Create( - "image bitmap", - image_->Begin(), - RoundUp(image_end_, kPageSize))); - if (image_bitmap_.get() == nullptr) { - LOG(ERROR) << "Failed to allocate memory for image bitmap"; - return false; + // Create the image bitmap, only needs to cover mirror object section which is up to image_end_. + CHECK_LE(image_info.image_end_, length); + image_info.image_bitmap_.reset(gc::accounting::ContinuousSpaceBitmap::Create( + "image bitmap", image_info.image_->Begin(), RoundUp(image_info.image_end_, kPageSize))); + if (image_info.image_bitmap_.get() == nullptr) { + LOG(ERROR) << "Failed to allocate memory for image bitmap"; + return false; + } } return true; } @@ -861,14 +910,17 @@ void ImageWriter::CalculateObjectBinSlots(Object* obj) { DCHECK(obj != nullptr); // if it is a string, we want to intern it if its not interned. if (obj->GetClass()->IsStringClass()) { + const char* oat_filename = GetOatFilename(obj); + ImageInfo& image_info = GetImageInfo(oat_filename); + // we must be an interned string that was forward referenced and already assigned if (IsImageBinSlotAssigned(obj)) { - DCHECK_EQ(obj, obj->AsString()->Intern()); + DCHECK_EQ(obj, image_info.intern_table_->InternStrongImageString(obj->AsString())); return; } // InternImageString allows us to intern while holding the heap bitmap lock. This is safe since // we are guaranteed to not have GC during image writing. - mirror::String* const interned = Runtime::Current()->GetInternTable()->InternStrongImageString( + mirror::String* const interned = image_info.intern_table_->InternStrongImageString( obj->AsString()); if (obj != interned) { if (!IsImageBinSlotAssigned(interned)) { @@ -885,7 +937,7 @@ void ImageWriter::CalculateObjectBinSlots(Object* obj) { AssignImageBinSlot(obj); } -ObjectArray<Object>* ImageWriter::CreateImageRoots() const { +ObjectArray<Object>* ImageWriter::CreateImageRoots(const char* oat_filename) const { Runtime* runtime = Runtime::Current(); ClassLinker* class_linker = runtime->GetClassLinker(); Thread* self = Thread::Current(); @@ -893,6 +945,15 @@ ObjectArray<Object>* ImageWriter::CreateImageRoots() const { Handle<Class> object_array_class(hs.NewHandle( class_linker->FindSystemClass(self, "[Ljava/lang/Object;"))); + std::unordered_set<const DexFile*> image_dex_files; + for (auto& pair : dex_file_oat_filename_map_) { + const DexFile* image_dex_file = pair.first; + const char* image_oat_filename = pair.second; + if (strcmp(oat_filename, image_oat_filename) == 0) { + image_dex_files.insert(image_dex_file); + } + } + // build an Object[] of all the DexCaches used in the source_space_. // Since we can't hold the dex lock when allocating the dex_caches // ObjectArray, we lock the dex lock twice, first to get the number @@ -905,7 +966,10 @@ ObjectArray<Object>* ImageWriter::CreateImageRoots() const { for (const ClassLinker::DexCacheData& data : class_linker->GetDexCachesData()) { mirror::DexCache* dex_cache = down_cast<mirror::DexCache*>(self->DecodeJObject(data.weak_root)); - dex_cache_count += IsInBootImage(dex_cache) ? 0u : 1u; + const DexFile* dex_file = dex_cache->GetDexFile(); + if (!IsInBootImage(dex_cache)) { + dex_cache_count += image_dex_files.find(dex_file) != image_dex_files.end() ? 1u : 0u; + } } } Handle<ObjectArray<Object>> dex_caches( @@ -918,7 +982,10 @@ ObjectArray<Object>* ImageWriter::CreateImageRoots() const { for (const ClassLinker::DexCacheData& data : class_linker->GetDexCachesData()) { mirror::DexCache* dex_cache = down_cast<mirror::DexCache*>(self->DecodeJObject(data.weak_root)); - non_image_dex_caches += IsInBootImage(dex_cache) ? 0u : 1u; + const DexFile* dex_file = dex_cache->GetDexFile(); + if (!IsInBootImage(dex_cache)) { + non_image_dex_caches += image_dex_files.find(dex_file) != image_dex_files.end() ? 1u : 0u; + } } CHECK_EQ(dex_cache_count, non_image_dex_caches) << "The number of non-image dex caches changed."; @@ -926,7 +993,8 @@ ObjectArray<Object>* ImageWriter::CreateImageRoots() const { for (const ClassLinker::DexCacheData& data : class_linker->GetDexCachesData()) { mirror::DexCache* dex_cache = down_cast<mirror::DexCache*>(self->DecodeJObject(data.weak_root)); - if (!IsInBootImage(dex_cache)) { + const DexFile* dex_file = dex_cache->GetDexFile(); + if (!IsInBootImage(dex_cache) && image_dex_files.find(dex_file) != image_dex_files.end()) { dex_caches->Set<false>(i, dex_cache); ++i; } @@ -997,9 +1065,19 @@ void ImageWriter::WalkFieldsInOrder(mirror::Object* obj) { } // Visit and assign offsets for fields and field arrays. auto* as_klass = h_obj->AsClass(); + mirror::DexCache* dex_cache = as_klass->GetDexCache(); LengthPrefixedArray<ArtField>* fields[] = { as_klass->GetSFieldsPtr(), as_klass->GetIFieldsPtr(), }; + const char* oat_file = GetOatFilenameForDexCache(dex_cache); + ImageInfo& image_info = GetImageInfo(oat_file); + { + // Note: This table is only accessed from the image writer, so the lock is technically + // unnecessary. + WriterMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_); + // Insert in the class table for this iamge. + image_info.class_table_->Insert(as_klass); + } for (LengthPrefixedArray<ArtField>* cur_fields : fields) { // Total array length including header. if (cur_fields != nullptr) { @@ -1008,11 +1086,10 @@ void ImageWriter::WalkFieldsInOrder(mirror::Object* obj) { auto it = native_object_relocations_.find(cur_fields); CHECK(it == native_object_relocations_.end()) << "Field array " << cur_fields << " already forwarded"; - size_t& offset = bin_slot_sizes_[kBinArtField]; + size_t& offset = image_info.bin_slot_sizes_[kBinArtField]; DCHECK(!IsInBootImage(cur_fields)); - native_object_relocations_.emplace( - cur_fields, - NativeObjectRelocation {offset, kNativeObjectRelocationTypeArtFieldArray }); + native_object_relocations_.emplace(cur_fields, + NativeObjectRelocation {oat_file, offset, kNativeObjectRelocationTypeArtFieldArray }); offset += header_size; // Forward individual fields so that we can quickly find where they belong. for (size_t i = 0, count = cur_fields->size(); i < count; ++i) { @@ -1022,9 +1099,8 @@ void ImageWriter::WalkFieldsInOrder(mirror::Object* obj) { CHECK(it2 == native_object_relocations_.end()) << "Field at index=" << i << " already assigned " << PrettyField(field) << " static=" << field->IsStatic(); DCHECK(!IsInBootImage(field)); - native_object_relocations_.emplace( - field, - NativeObjectRelocation {offset, kNativeObjectRelocationTypeArtField }); + native_object_relocations_.emplace(field, + NativeObjectRelocation {oat_file, offset, kNativeObjectRelocationTypeArtField }); offset += sizeof(ArtField); } } @@ -1053,17 +1129,17 @@ void ImageWriter::WalkFieldsInOrder(mirror::Object* obj) { auto it = native_object_relocations_.find(array); CHECK(it == native_object_relocations_.end()) << "Method array " << array << " already forwarded"; - size_t& offset = bin_slot_sizes_[bin_type]; + size_t& offset = image_info.bin_slot_sizes_[bin_type]; DCHECK(!IsInBootImage(array)); - native_object_relocations_.emplace( - array, NativeObjectRelocation { - offset, - any_dirty ? kNativeObjectRelocationTypeArtMethodArrayDirty - : kNativeObjectRelocationTypeArtMethodArrayClean - }); + native_object_relocations_.emplace(array, + NativeObjectRelocation { + oat_file, + offset, + any_dirty ? kNativeObjectRelocationTypeArtMethodArrayDirty + : kNativeObjectRelocationTypeArtMethodArrayClean }); offset += header_size; for (auto& m : as_klass->GetMethods(target_ptr_size_)) { - AssignMethodOffset(&m, type); + AssignMethodOffset(&m, type, oat_file); } (any_dirty ? dirty_methods_ : clean_methods_) += num_methods; } @@ -1089,13 +1165,16 @@ void ImageWriter::WalkFieldsInOrder(mirror::Object* obj) { } } -void ImageWriter::AssignMethodOffset(ArtMethod* method, NativeObjectRelocationType type) { +void ImageWriter::AssignMethodOffset(ArtMethod* method, + NativeObjectRelocationType type, + const char* oat_filename) { DCHECK(!IsInBootImage(method)); auto it = native_object_relocations_.find(method); CHECK(it == native_object_relocations_.end()) << "Method " << method << " already assigned " << PrettyMethod(method); - size_t& offset = bin_slot_sizes_[BinTypeForNativeRelocationType(type)]; - native_object_relocations_.emplace(method, NativeObjectRelocation { offset, type }); + ImageInfo& image_info = GetImageInfo(oat_filename); + size_t& offset = image_info.bin_slot_sizes_[BinTypeForNativeRelocationType(type)]; + native_object_relocations_.emplace(method, NativeObjectRelocation { oat_filename, offset, type }); offset += ArtMethod::Size(target_ptr_size_); } @@ -1128,18 +1207,20 @@ void ImageWriter::UnbinObjectsIntoOffset(mirror::Object* obj) { void ImageWriter::CalculateNewObjectOffsets() { Thread* const self = Thread::Current(); - StackHandleScope<1> hs(self); - Handle<ObjectArray<Object>> image_roots(hs.NewHandle(CreateImageRoots())); + StackHandleScopeCollection handles(self); + std::vector<Handle<ObjectArray<Object>>> image_roots; + for (const char* oat_filename : oat_filenames_) { + std::string image_filename = oat_filename; + image_roots.push_back(handles.NewHandle(CreateImageRoots(image_filename.c_str()))); + } auto* runtime = Runtime::Current(); auto* heap = runtime->GetHeap(); - DCHECK_EQ(0U, image_end_); // Leave space for the header, but do not write it yet, we need to // know where image_roots is going to end up - image_end_ += RoundUp(sizeof(ImageHeader), kObjectAlignment); // 64-bit-alignment + image_objects_offset_begin_ = RoundUp(sizeof(ImageHeader), kObjectAlignment); // 64-bit-alignment - image_objects_offset_begin_ = image_end_; // Clear any pre-existing monitors which may have been in the monitor words, assign bin slots. heap->VisitObjects(WalkFieldsCallback, this); // Write the image runtime methods. @@ -1156,10 +1237,12 @@ void ImageWriter::CalculateNewObjectOffsets() { const auto image_method_type = kNativeObjectRelocationTypeArtMethodArrayClean; auto it = native_object_relocations_.find(&image_method_array_); CHECK(it == native_object_relocations_.end()); - size_t& offset = bin_slot_sizes_[BinTypeForNativeRelocationType(image_method_type)]; + ImageInfo& default_image_info = GetImageInfo(default_oat_filename_); + size_t& offset = + default_image_info.bin_slot_sizes_[BinTypeForNativeRelocationType(image_method_type)]; if (!compile_app_image_) { native_object_relocations_.emplace(&image_method_array_, - NativeObjectRelocation { offset, image_method_type }); + NativeObjectRelocation { default_oat_filename_, offset, image_method_type }); } size_t method_alignment = ArtMethod::Alignment(target_ptr_size_); const size_t array_size = LengthPrefixedArray<ArtMethod>::ComputeSize( @@ -1171,153 +1254,182 @@ void ImageWriter::CalculateNewObjectOffsets() { CHECK(m->IsRuntimeMethod()); DCHECK_EQ(compile_app_image_, IsInBootImage(m)) << "Trampolines should be in boot image"; if (!IsInBootImage(m)) { - AssignMethodOffset(m, kNativeObjectRelocationTypeArtMethodClean); + AssignMethodOffset(m, kNativeObjectRelocationTypeArtMethodClean, default_oat_filename_); } } // Calculate size of the dex cache arrays slot and prepare offsets. PrepareDexCacheArraySlots(); + // Calculate the sizes of the intern tables and class tables. + for (const char* oat_filename : oat_filenames_) { + ImageInfo& image_info = GetImageInfo(oat_filename); + // Calculate how big the intern table will be after being serialized. + InternTable* const intern_table = image_info.intern_table_.get(); + CHECK_EQ(intern_table->WeakSize(), 0u) << " should have strong interned all the strings"; + image_info.intern_table_bytes_ = intern_table->WriteToMemory(nullptr); + // Calculate the size of the class table. + ReaderMutexLock mu(self, *Locks::classlinker_classes_lock_); + image_info.class_table_bytes_ += image_info.class_table_->WriteToMemory(nullptr); + } + // Calculate bin slot offsets. - size_t bin_offset = image_objects_offset_begin_; - for (size_t i = 0; i != kBinSize; ++i) { - bin_slot_offsets_[i] = bin_offset; - bin_offset += bin_slot_sizes_[i]; - if (i == kBinArtField) { - static_assert(kBinArtField + 1 == kBinArtMethodClean, "Methods follow fields."); - static_assert(alignof(ArtField) == 4u, "ArtField alignment is 4."); - DCHECK_ALIGNED(bin_offset, 4u); - DCHECK(method_alignment == 4u || method_alignment == 8u); - bin_offset = RoundUp(bin_offset, method_alignment); + for (const char* oat_filename : oat_filenames_) { + ImageInfo& image_info = GetImageInfo(oat_filename); + size_t bin_offset = image_objects_offset_begin_; + for (size_t i = 0; i != kBinSize; ++i) { + image_info.bin_slot_offsets_[i] = bin_offset; + bin_offset += image_info.bin_slot_sizes_[i]; + if (i == kBinArtField) { + static_assert(kBinArtField + 1 == kBinArtMethodClean, "Methods follow fields."); + static_assert(alignof(ArtField) == 4u, "ArtField alignment is 4."); + DCHECK_ALIGNED(bin_offset, 4u); + DCHECK(method_alignment == 4u || method_alignment == 8u); + bin_offset = RoundUp(bin_offset, method_alignment); + } } + // NOTE: There may be additional padding between the bin slots and the intern table. + DCHECK_EQ(image_info.image_end_, + GetBinSizeSum(image_info, kBinMirrorCount) + image_objects_offset_begin_); } - // NOTE: There may be additional padding between the bin slots and the intern table. - DCHECK_EQ(image_end_, GetBinSizeSum(kBinMirrorCount) + image_objects_offset_begin_); + // Calculate image offsets. + size_t image_offset = 0; + for (const char* oat_filename : oat_filenames_) { + ImageInfo& image_info = GetImageInfo(oat_filename); + image_info.image_begin_ = global_image_begin_ + image_offset; + image_info.image_offset_ = image_offset; + ImageSection unused_sections[ImageHeader::kSectionCount]; + image_info.image_size_ = RoundUp( + image_info.CreateImageSections(target_ptr_size_, unused_sections), + kPageSize); + // There should be no gaps until the next image. + image_offset += image_info.image_size_; + } // Transform each object's bin slot into an offset which will be used to do the final copy. heap->VisitObjects(UnbinObjectsIntoOffsetCallback, this); - DCHECK_EQ(image_end_, GetBinSizeSum(kBinMirrorCount) + image_objects_offset_begin_); + // DCHECK_EQ(image_end_, GetBinSizeSum(kBinMirrorCount) + image_objects_offset_begin_); - image_roots_address_ = PointerToLowMemUInt32(GetImageAddress(image_roots.Get())); + size_t i = 0; + for (const char* oat_filename : oat_filenames_) { + ImageInfo& image_info = GetImageInfo(oat_filename); + image_info.image_roots_address_ = PointerToLowMemUInt32(GetImageAddress(image_roots[i].Get())); + i++; + } // Update the native relocations by adding their bin sums. for (auto& pair : native_object_relocations_) { NativeObjectRelocation& relocation = pair.second; Bin bin_type = BinTypeForNativeRelocationType(relocation.type); - relocation.offset += bin_slot_offsets_[bin_type]; + ImageInfo& image_info = GetImageInfo(relocation.oat_filename); + relocation.offset += image_info.bin_slot_offsets_[bin_type]; } - // Calculate how big the intern table will be after being serialized. - InternTable* const intern_table = runtime->GetInternTable(); - CHECK_EQ(intern_table->WeakSize(), 0u) << " should have strong interned all the strings"; - intern_table_bytes_ = intern_table->WriteToMemory(nullptr); - - // Write out the class table. - ClassLinker* class_linker = runtime->GetClassLinker(); - if (boot_image_space_ == nullptr) { - // Compiling the boot image, add null class loader. - class_loaders_.insert(nullptr); - } - // class_loaders_ usually will not be empty, but may be empty if we attempt to create an image - // with no classes. - if (class_loaders_.size() == 1u) { - // Only write the class table if we have exactly one class loader. There may be cases where - // there are multiple class loaders if a class path is passed to dex2oat. - ReaderMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_); - for (mirror::ClassLoader* loader : class_loaders_) { - ClassTable* table = class_linker->ClassTableForClassLoader(loader); - CHECK(table != nullptr); - class_table_bytes_ += table->WriteToMemory(nullptr); - } - } - - // Note that image_end_ is left at end of used mirror object section. + // Note that image_info.image_end_ is left at end of used mirror object section. } -void ImageWriter::CreateHeader(size_t oat_loaded_size, size_t oat_data_offset) { - CHECK_NE(0U, oat_loaded_size); - const uint8_t* oat_file_begin = GetOatFileBegin(); - const uint8_t* oat_file_end = oat_file_begin + oat_loaded_size; - oat_data_begin_ = oat_file_begin + oat_data_offset; - const uint8_t* oat_data_end = oat_data_begin_ + oat_file_->Size(); - - // Create the image sections. - ImageSection sections[ImageHeader::kSectionCount]; +size_t ImageWriter::ImageInfo::CreateImageSections(size_t target_ptr_size, + ImageSection* out_sections) const { + DCHECK(out_sections != nullptr); // Objects section - auto* objects_section = §ions[ImageHeader::kSectionObjects]; + auto* objects_section = &out_sections[ImageHeader::kSectionObjects]; *objects_section = ImageSection(0u, image_end_); size_t cur_pos = objects_section->End(); // Add field section. - auto* field_section = §ions[ImageHeader::kSectionArtFields]; + auto* field_section = &out_sections[ImageHeader::kSectionArtFields]; *field_section = ImageSection(cur_pos, bin_slot_sizes_[kBinArtField]); CHECK_EQ(bin_slot_offsets_[kBinArtField], field_section->Offset()); cur_pos = field_section->End(); // Round up to the alignment the required by the method section. - cur_pos = RoundUp(cur_pos, ArtMethod::Alignment(target_ptr_size_)); + cur_pos = RoundUp(cur_pos, ArtMethod::Alignment(target_ptr_size)); // Add method section. - auto* methods_section = §ions[ImageHeader::kSectionArtMethods]; + auto* methods_section = &out_sections[ImageHeader::kSectionArtMethods]; *methods_section = ImageSection(cur_pos, bin_slot_sizes_[kBinArtMethodClean] + bin_slot_sizes_[kBinArtMethodDirty]); CHECK_EQ(bin_slot_offsets_[kBinArtMethodClean], methods_section->Offset()); cur_pos = methods_section->End(); // Add dex cache arrays section. - auto* dex_cache_arrays_section = §ions[ImageHeader::kSectionDexCacheArrays]; + auto* dex_cache_arrays_section = &out_sections[ImageHeader::kSectionDexCacheArrays]; *dex_cache_arrays_section = ImageSection(cur_pos, bin_slot_sizes_[kBinDexCacheArray]); CHECK_EQ(bin_slot_offsets_[kBinDexCacheArray], dex_cache_arrays_section->Offset()); cur_pos = dex_cache_arrays_section->End(); // Round up to the alignment the string table expects. See HashSet::WriteToMemory. cur_pos = RoundUp(cur_pos, sizeof(uint64_t)); // Calculate the size of the interned strings. - auto* interned_strings_section = §ions[ImageHeader::kSectionInternedStrings]; + auto* interned_strings_section = &out_sections[ImageHeader::kSectionInternedStrings]; *interned_strings_section = ImageSection(cur_pos, intern_table_bytes_); cur_pos = interned_strings_section->End(); // Round up to the alignment the class table expects. See HashSet::WriteToMemory. cur_pos = RoundUp(cur_pos, sizeof(uint64_t)); // Calculate the size of the class table section. - auto* class_table_section = §ions[ImageHeader::kSectionClassTable]; + auto* class_table_section = &out_sections[ImageHeader::kSectionClassTable]; *class_table_section = ImageSection(cur_pos, class_table_bytes_); cur_pos = class_table_section->End(); // Image end goes right before the start of the image bitmap. - const size_t image_end = static_cast<uint32_t>(cur_pos); + return cur_pos; +} + +void ImageWriter::CreateHeader(size_t oat_loaded_size, size_t oat_data_offset) { + CHECK_NE(0U, oat_loaded_size); + const char* oat_filename = oat_file_->GetLocation().c_str(); + ImageInfo& image_info = GetImageInfo(oat_filename); + const uint8_t* oat_file_begin = GetOatFileBegin(oat_filename); + const uint8_t* oat_file_end = oat_file_begin + oat_loaded_size; + image_info.oat_data_begin_ = const_cast<uint8_t*>(oat_file_begin) + oat_data_offset; + const uint8_t* oat_data_end = image_info.oat_data_begin_ + oat_file_->Size(); + image_info.oat_size_ = oat_file_->Size(); + + // Create the image sections. + ImageSection sections[ImageHeader::kSectionCount]; + const size_t image_end = image_info.CreateImageSections(target_ptr_size_, sections); + // Finally bitmap section. - const size_t bitmap_bytes = image_bitmap_->Size(); + const size_t bitmap_bytes = image_info.image_bitmap_->Size(); auto* bitmap_section = §ions[ImageHeader::kSectionImageBitmap]; - *bitmap_section = ImageSection(RoundUp(cur_pos, kPageSize), RoundUp(bitmap_bytes, kPageSize)); - cur_pos = bitmap_section->End(); - if (kIsDebugBuild) { + *bitmap_section = ImageSection(RoundUp(image_end, kPageSize), RoundUp(bitmap_bytes, kPageSize)); + if (VLOG_IS_ON(compiler)) { + LOG(INFO) << "Creating header for " << oat_filename; size_t idx = 0; for (const ImageSection& section : sections) { LOG(INFO) << static_cast<ImageHeader::ImageSections>(idx) << " " << section; ++idx; } LOG(INFO) << "Methods: clean=" << clean_methods_ << " dirty=" << dirty_methods_; + LOG(INFO) << "Image roots address=" << std::hex << image_info.image_roots_address_ << std::dec; + LOG(INFO) << "Image begin=" << std::hex << reinterpret_cast<uintptr_t>(global_image_begin_) + << " Image offset=" << image_info.image_offset_ << std::dec; + LOG(INFO) << "Oat file begin=" << std::hex << reinterpret_cast<uintptr_t>(oat_file_begin) + << " Oat data begin=" << reinterpret_cast<uintptr_t>(image_info.oat_data_begin_) + << " Oat data end=" << reinterpret_cast<uintptr_t>(oat_data_end) + << " Oat file end=" << reinterpret_cast<uintptr_t>(oat_file_end); } - CHECK_EQ(AlignUp(image_begin_ + image_end, kPageSize), oat_file_begin) << - "Oat file should be right after the image."; + // Create the header, leave 0 for data size since we will fill this in as we are writing the // image. - new (image_->Begin()) ImageHeader(PointerToLowMemUInt32(image_begin_), - image_end, - sections, - image_roots_address_, - oat_file_->GetOatHeader().GetChecksum(), - PointerToLowMemUInt32(oat_file_begin), - PointerToLowMemUInt32(oat_data_begin_), - PointerToLowMemUInt32(oat_data_end), - PointerToLowMemUInt32(oat_file_end), - target_ptr_size_, - compile_pic_, - image_storage_mode_, - /*data_size*/0u); + new (image_info.image_->Begin()) ImageHeader(PointerToLowMemUInt32(image_info.image_begin_), + image_end, + sections, + image_info.image_roots_address_, + oat_file_->GetOatHeader().GetChecksum(), + PointerToLowMemUInt32(oat_file_begin), + PointerToLowMemUInt32(image_info.oat_data_begin_), + PointerToLowMemUInt32(oat_data_end), + PointerToLowMemUInt32(oat_file_end), + target_ptr_size_, + compile_pic_, + image_storage_mode_, + /*data_size*/0u); } ArtMethod* ImageWriter::GetImageMethodAddress(ArtMethod* method) { auto it = native_object_relocations_.find(method); CHECK(it != native_object_relocations_.end()) << PrettyMethod(method) << " @ " << method; - CHECK_GE(it->second.offset, image_end_) << "ArtMethods should be after Objects"; - return reinterpret_cast<ArtMethod*>(image_begin_ + it->second.offset); + const char* oat_filename = GetOatFilename(method->GetDexCache()); + ImageInfo& image_info = GetImageInfo(oat_filename); + CHECK_GE(it->second.offset, image_info.image_end_) << "ArtMethods should be after Objects"; + return reinterpret_cast<ArtMethod*>(image_info.image_begin_ + it->second.offset); } class FixupRootVisitor : public RootVisitor { @@ -1328,7 +1440,7 @@ class FixupRootVisitor : public RootVisitor { void VisitRoots(mirror::Object*** roots, size_t count, const RootInfo& info ATTRIBUTE_UNUSED) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) { for (size_t i = 0; i < count; ++i) { - *roots[i] = ImageAddress(*roots[i]); + *roots[i] = image_writer_->GetImageAddress(*roots[i]); } } @@ -1336,27 +1448,26 @@ class FixupRootVisitor : public RootVisitor { const RootInfo& info ATTRIBUTE_UNUSED) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_) { for (size_t i = 0; i < count; ++i) { - roots[i]->Assign(ImageAddress(roots[i]->AsMirrorPtr())); + roots[i]->Assign(image_writer_->GetImageAddress(roots[i]->AsMirrorPtr())); } } private: ImageWriter* const image_writer_; - - mirror::Object* ImageAddress(mirror::Object* obj) SHARED_REQUIRES(Locks::mutator_lock_) { - const size_t offset = image_writer_->GetImageOffset(obj); - auto* const dest = reinterpret_cast<Object*>(image_writer_->image_begin_ + offset); - VLOG(compiler) << "Update root from " << obj << " to " << dest; - return dest; - } }; void ImageWriter::CopyAndFixupNativeData() { + const char* oat_filename = oat_file_->GetLocation().c_str(); + ImageInfo& image_info = GetImageInfo(oat_filename); // Copy ArtFields and methods to their locations and update the array for convenience. for (auto& pair : native_object_relocations_) { NativeObjectRelocation& relocation = pair.second; - auto* dest = image_->Begin() + relocation.offset; - DCHECK_GE(dest, image_->Begin() + image_end_); + // Only work with fields and methods that are in the current oat file. + if (strcmp(relocation.oat_filename, oat_filename) != 0) { + continue; + } + auto* dest = image_info.image_->Begin() + relocation.offset; + DCHECK_GE(dest, image_info.image_->Begin() + image_info.image_end_); DCHECK(!IsInBootImage(pair.first)); switch (relocation.type) { case kNativeObjectRelocationTypeArtField: { @@ -1368,7 +1479,8 @@ void ImageWriter::CopyAndFixupNativeData() { case kNativeObjectRelocationTypeArtMethodClean: case kNativeObjectRelocationTypeArtMethodDirty: { CopyAndFixupMethod(reinterpret_cast<ArtMethod*>(pair.first), - reinterpret_cast<ArtMethod*>(dest)); + reinterpret_cast<ArtMethod*>(dest), + image_info); break; } // For arrays, copy just the header since the elements will get copied by their corresponding @@ -1391,67 +1503,69 @@ void ImageWriter::CopyAndFixupNativeData() { } } // Fixup the image method roots. - auto* image_header = reinterpret_cast<ImageHeader*>(image_->Begin()); + auto* image_header = reinterpret_cast<ImageHeader*>(image_info.image_->Begin()); const ImageSection& methods_section = image_header->GetMethodsSection(); for (size_t i = 0; i < ImageHeader::kImageMethodsCount; ++i) { ArtMethod* method = image_methods_[i]; CHECK(method != nullptr); + // Only place runtime methods in the image of the default oat file. + if (method->IsRuntimeMethod() && strcmp(default_oat_filename_, oat_filename) != 0) { + continue; + } if (!IsInBootImage(method)) { auto it = native_object_relocations_.find(method); - CHECK(it != native_object_relocations_.end()) << "No fowarding for " << PrettyMethod(method); + CHECK(it != native_object_relocations_.end()) << "No forwarding for " << PrettyMethod(method); NativeObjectRelocation& relocation = it->second; CHECK(methods_section.Contains(relocation.offset)) << relocation.offset << " not in " << methods_section; CHECK(relocation.IsArtMethodRelocation()) << relocation.type; - method = reinterpret_cast<ArtMethod*>(image_begin_ + it->second.offset); + method = reinterpret_cast<ArtMethod*>(global_image_begin_ + it->second.offset); } image_header->SetImageMethod(static_cast<ImageHeader::ImageMethod>(i), method); } FixupRootVisitor root_visitor(this); // Write the intern table into the image. - const ImageSection& intern_table_section = image_header->GetImageSection( - ImageHeader::kSectionInternedStrings); - Runtime* const runtime = Runtime::Current(); - InternTable* const intern_table = runtime->GetInternTable(); - uint8_t* const intern_table_memory_ptr = image_->Begin() + intern_table_section.Offset(); - const size_t intern_table_bytes = intern_table->WriteToMemory(intern_table_memory_ptr); - CHECK_EQ(intern_table_bytes, intern_table_bytes_); - // Fixup the pointers in the newly written intern table to contain image addresses. - InternTable temp_intern_table; - // Note that we require that ReadFromMemory does not make an internal copy of the elements so that - // the VisitRoots() will update the memory directly rather than the copies. - // This also relies on visit roots not doing any verification which could fail after we update - // the roots to be the image addresses. - temp_intern_table.ReadFromMemory(intern_table_memory_ptr); - CHECK_EQ(temp_intern_table.Size(), intern_table->Size()); - temp_intern_table.VisitRoots(&root_visitor, kVisitRootFlagAllRoots); - + if (image_info.intern_table_bytes_ > 0) { + const ImageSection& intern_table_section = image_header->GetImageSection( + ImageHeader::kSectionInternedStrings); + InternTable* const intern_table = image_info.intern_table_.get(); + uint8_t* const intern_table_memory_ptr = + image_info.image_->Begin() + intern_table_section.Offset(); + const size_t intern_table_bytes = intern_table->WriteToMemory(intern_table_memory_ptr); + CHECK_EQ(intern_table_bytes, image_info.intern_table_bytes_); + // Fixup the pointers in the newly written intern table to contain image addresses. + InternTable temp_intern_table; + // Note that we require that ReadFromMemory does not make an internal copy of the elements so that + // the VisitRoots() will update the memory directly rather than the copies. + // This also relies on visit roots not doing any verification which could fail after we update + // the roots to be the image addresses. + temp_intern_table.AddTableFromMemory(intern_table_memory_ptr); + CHECK_EQ(temp_intern_table.Size(), intern_table->Size()); + temp_intern_table.VisitRoots(&root_visitor, kVisitRootFlagAllRoots); + } // Write the class table(s) into the image. class_table_bytes_ may be 0 if there are multiple // class loaders. Writing multiple class tables into the image is currently unsupported. - if (class_table_bytes_ > 0u) { - ClassLinker* const class_linker = runtime->GetClassLinker(); + if (image_info.class_table_bytes_ > 0u) { const ImageSection& class_table_section = image_header->GetImageSection( ImageHeader::kSectionClassTable); - uint8_t* const class_table_memory_ptr = image_->Begin() + class_table_section.Offset(); + uint8_t* const class_table_memory_ptr = + image_info.image_->Begin() + class_table_section.Offset(); ReaderMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_); - size_t class_table_bytes = 0; - for (mirror::ClassLoader* loader : class_loaders_) { - ClassTable* table = class_linker->ClassTableForClassLoader(loader); - CHECK(table != nullptr); - uint8_t* memory_ptr = class_table_memory_ptr + class_table_bytes; - class_table_bytes += table->WriteToMemory(memory_ptr); - // Fixup the pointers in the newly written class table to contain image addresses. See - // above comment for intern tables. - ClassTable temp_class_table; - temp_class_table.ReadFromMemory(memory_ptr); - CHECK_EQ(temp_class_table.NumZygoteClasses(), table->NumNonZygoteClasses() + - table->NumZygoteClasses()); - BufferedRootVisitor<kDefaultBufferedRootCount> buffered_visitor(&root_visitor, - RootInfo(kRootUnknown)); - temp_class_table.VisitRoots(buffered_visitor); - } - CHECK_EQ(class_table_bytes, class_table_bytes_); + + ClassTable* table = image_info.class_table_.get(); + CHECK(table != nullptr); + const size_t class_table_bytes = table->WriteToMemory(class_table_memory_ptr); + CHECK_EQ(class_table_bytes, image_info.class_table_bytes_); + // Fixup the pointers in the newly written class table to contain image addresses. See + // above comment for intern tables. + ClassTable temp_class_table; + temp_class_table.ReadFromMemory(class_table_memory_ptr); + CHECK_EQ(temp_class_table.NumZygoteClasses(), table->NumNonZygoteClasses() + + table->NumZygoteClasses()); + BufferedRootVisitor<kDefaultBufferedRootCount> buffered_visitor(&root_visitor, + RootInfo(kRootUnknown)); + temp_class_table.VisitRoots(buffered_visitor); } } @@ -1500,7 +1614,8 @@ void ImageWriter::FixupPointerArray(mirror::Object* dst, mirror::PointerArray* a } UNREACHABLE(); } else { - elem = image_begin_ + it->second.offset; + ImageInfo& image_info = GetImageInfo(it->second.oat_filename); + elem = image_info.image_begin_ + it->second.offset; } } dest_array->SetElementPtrSize<false, true>(i, elem, target_ptr_size_); @@ -1512,14 +1627,16 @@ void ImageWriter::CopyAndFixupObject(Object* obj) { return; } size_t offset = GetImageOffset(obj); - auto* dst = reinterpret_cast<Object*>(image_->Begin() + offset); - DCHECK_LT(offset, image_end_); + const char* oat_filename = GetOatFilename(obj); + ImageInfo& image_info = GetImageInfo(oat_filename); + auto* dst = reinterpret_cast<Object*>(image_info.image_->Begin() + offset); + DCHECK_LT(offset, image_info.image_end_); const auto* src = reinterpret_cast<const uint8_t*>(obj); - image_bitmap_->Set(dst); // Mark the obj as live. + image_info.image_bitmap_->Set(dst); // Mark the obj as live. const size_t n = obj->SizeOf(); - DCHECK_LE(offset + n, image_->Size()); + DCHECK_LE(offset + n, image_info.image_->Size()); memcpy(dst, src, n); // Write in a hash code of objects which have inflated monitors or a hash code in their monitor @@ -1595,34 +1712,55 @@ uintptr_t ImageWriter::NativeOffsetInImage(void* obj) { } template <typename T> -T* ImageWriter::NativeLocationInImage(T* obj) { - return (obj == nullptr || IsInBootImage(obj)) - ? obj - : reinterpret_cast<T*>(image_begin_ + NativeOffsetInImage(obj)); +T* ImageWriter::NativeLocationInImage(T* obj, const char* oat_filename) { + if (obj == nullptr || IsInBootImage(obj)) { + return obj; + } else { + ImageInfo& image_info = GetImageInfo(oat_filename); + return reinterpret_cast<T*>(image_info.image_begin_ + NativeOffsetInImage(obj)); + } } template <typename T> -T* ImageWriter::NativeCopyLocation(T* obj) { - return (obj == nullptr || IsInBootImage(obj)) - ? obj - : reinterpret_cast<T*>(image_->Begin() + NativeOffsetInImage(obj)); +T* ImageWriter::NativeCopyLocation(T* obj, mirror::DexCache* dex_cache) { + if (obj == nullptr || IsInBootImage(obj)) { + return obj; + } else { + const char* oat_filename = GetOatFilenameForDexCache(dex_cache); + ImageInfo& image_info = GetImageInfo(oat_filename); + return reinterpret_cast<T*>(image_info.image_->Begin() + NativeOffsetInImage(obj)); + } } class NativeLocationVisitor { public: - explicit NativeLocationVisitor(ImageWriter* image_writer) : image_writer_(image_writer) {} + explicit NativeLocationVisitor(ImageWriter* image_writer, const char* oat_filename) + : image_writer_(image_writer), oat_filename_(oat_filename) {} template <typename T> - T* operator()(T* ptr) const { - return image_writer_->NativeLocationInImage(ptr); + T* operator()(T* ptr) const SHARED_REQUIRES(Locks::mutator_lock_) { + return image_writer_->NativeLocationInImage(ptr, oat_filename_); + } + + ArtMethod* operator()(ArtMethod* method) const SHARED_REQUIRES(Locks::mutator_lock_) { + const char* oat_filename = method->IsRuntimeMethod() ? image_writer_->GetDefaultOatFilename() : + image_writer_->GetOatFilenameForDexCache(method->GetDexCache()); + return image_writer_->NativeLocationInImage(method, oat_filename); + } + + ArtField* operator()(ArtField* field) const SHARED_REQUIRES(Locks::mutator_lock_) { + const char* oat_filename = image_writer_->GetOatFilenameForDexCache(field->GetDexCache()); + return image_writer_->NativeLocationInImage(field, oat_filename); } private: ImageWriter* const image_writer_; + const char* oat_filename_; }; void ImageWriter::FixupClass(mirror::Class* orig, mirror::Class* copy) { - orig->FixupNativePointers(copy, target_ptr_size_, NativeLocationVisitor(this)); + const char* oat_filename = GetOatFilename(orig); + orig->FixupNativePointers(copy, target_ptr_size_, NativeLocationVisitor(this, oat_filename)); FixupClassVisitor visitor(this, copy); static_cast<mirror::Object*>(orig)->VisitReferences(visitor, visitor); } @@ -1661,7 +1799,7 @@ void ImageWriter::FixupObject(Object* orig, Object* copy) { CHECK(it != native_object_relocations_.end()) << "Missing relocation for AbstractMethod.artMethod " << PrettyMethod(src_method); dest->SetArtMethod( - reinterpret_cast<ArtMethod*>(image_begin_ + it->second.offset)); + reinterpret_cast<ArtMethod*>(global_image_begin_ + it->second.offset)); } else if (!klass->IsArrayClass()) { ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); if (klass == class_linker->GetClassRoot(ClassLinker::kJavaLangDexCache)) { @@ -1702,41 +1840,52 @@ void ImageWriter::FixupDexCache(mirror::DexCache* orig_dex_cache, // 64-bit values here, clearing the top 32 bits for 32-bit targets. The zero-extension is // done by casting to the unsigned type uintptr_t before casting to int64_t, i.e. // static_cast<int64_t>(reinterpret_cast<uintptr_t>(image_begin_ + offset))). + const char* oat_filename = GetOatFilenameForDexCache(orig_dex_cache); GcRoot<mirror::String>* orig_strings = orig_dex_cache->GetStrings(); if (orig_strings != nullptr) { copy_dex_cache->SetFieldPtrWithSize<false>(mirror::DexCache::StringsOffset(), - NativeLocationInImage(orig_strings), + NativeLocationInImage(orig_strings, oat_filename), /*pointer size*/8u); - orig_dex_cache->FixupStrings(NativeCopyLocation(orig_strings), ImageAddressVisitor(this)); + orig_dex_cache->FixupStrings(NativeCopyLocation(orig_strings, orig_dex_cache), + ImageAddressVisitor(this)); } GcRoot<mirror::Class>* orig_types = orig_dex_cache->GetResolvedTypes(); if (orig_types != nullptr) { copy_dex_cache->SetFieldPtrWithSize<false>(mirror::DexCache::ResolvedTypesOffset(), - NativeLocationInImage(orig_types), + NativeLocationInImage(orig_types, oat_filename), /*pointer size*/8u); - orig_dex_cache->FixupResolvedTypes(NativeCopyLocation(orig_types), ImageAddressVisitor(this)); + orig_dex_cache->FixupResolvedTypes(NativeCopyLocation(orig_types, orig_dex_cache), + ImageAddressVisitor(this)); } ArtMethod** orig_methods = orig_dex_cache->GetResolvedMethods(); if (orig_methods != nullptr) { copy_dex_cache->SetFieldPtrWithSize<false>(mirror::DexCache::ResolvedMethodsOffset(), - NativeLocationInImage(orig_methods), + NativeLocationInImage(orig_methods, oat_filename), /*pointer size*/8u); - ArtMethod** copy_methods = NativeCopyLocation(orig_methods); + ArtMethod** copy_methods = NativeCopyLocation(orig_methods, orig_dex_cache); for (size_t i = 0, num = orig_dex_cache->NumResolvedMethods(); i != num; ++i) { ArtMethod* orig = mirror::DexCache::GetElementPtrSize(orig_methods, i, target_ptr_size_); - ArtMethod* copy = NativeLocationInImage(orig); + const char* method_oat_filename; + if (orig == nullptr || orig->IsRuntimeMethod()) { + method_oat_filename = default_oat_filename_; + } else { + method_oat_filename = GetOatFilenameForDexCache(orig->GetDexCache()); + } + ArtMethod* copy = NativeLocationInImage(orig, method_oat_filename); mirror::DexCache::SetElementPtrSize(copy_methods, i, copy, target_ptr_size_); } } ArtField** orig_fields = orig_dex_cache->GetResolvedFields(); if (orig_fields != nullptr) { copy_dex_cache->SetFieldPtrWithSize<false>(mirror::DexCache::ResolvedFieldsOffset(), - NativeLocationInImage(orig_fields), + NativeLocationInImage(orig_fields, oat_filename), /*pointer size*/8u); - ArtField** copy_fields = NativeCopyLocation(orig_fields); + ArtField** copy_fields = NativeCopyLocation(orig_fields, orig_dex_cache); for (size_t i = 0, num = orig_dex_cache->NumResolvedFields(); i != num; ++i) { ArtField* orig = mirror::DexCache::GetElementPtrSize(orig_fields, i, target_ptr_size_); - ArtField* copy = NativeLocationInImage(orig); + const char* field_oat_filename = + orig == nullptr ? default_oat_filename_ : GetOatFilenameForDexCache(orig->GetDexCache()); + ArtField* copy = NativeLocationInImage(orig, field_oat_filename); mirror::DexCache::SetElementPtrSize(copy_fields, i, copy, target_ptr_size_); } } @@ -1747,9 +1896,10 @@ const uint8_t* ImageWriter::GetOatAddress(OatAddress type) const { // If we are compiling an app image, we need to use the stubs of the boot image. if (compile_app_image_) { // Use the current image pointers. - gc::space::ImageSpace* image_space = Runtime::Current()->GetHeap()->GetBootImageSpace(); - DCHECK(image_space != nullptr); - const OatFile* oat_file = image_space->GetOatFile(); + std::vector<gc::space::ImageSpace*> image_spaces = + Runtime::Current()->GetHeap()->GetBootImageSpaces(); + DCHECK(!image_spaces.empty()); + const OatFile* oat_file = image_spaces[0]->GetOatFile(); CHECK(oat_file != nullptr); const OatHeader& header = oat_file->GetOatHeader(); switch (type) { @@ -1772,10 +1922,13 @@ const uint8_t* ImageWriter::GetOatAddress(OatAddress type) const { UNREACHABLE(); } } - return GetOatAddressForOffset(oat_address_offsets_[type]); + const ImageInfo& primary_image_info = GetImageInfo(0); + return GetOatAddressForOffset(primary_image_info.oat_address_offsets_[type], primary_image_info); } -const uint8_t* ImageWriter::GetQuickCode(ArtMethod* method, bool* quick_is_interpreted) { +const uint8_t* ImageWriter::GetQuickCode(ArtMethod* method, + const ImageInfo& image_info, + bool* quick_is_interpreted) { DCHECK(!method->IsResolutionMethod()) << PrettyMethod(method); DCHECK(!method->IsImtConflictMethod()) << PrettyMethod(method); DCHECK(!method->IsImtUnimplementedMethod()) << PrettyMethod(method); @@ -1788,7 +1941,7 @@ const uint8_t* ImageWriter::GetQuickCode(ArtMethod* method, bool* quick_is_inter // Quick entrypoint: uint32_t quick_oat_code_offset = PointerToLowMemUInt32( method->GetEntryPointFromQuickCompiledCodePtrSize(target_ptr_size_)); - const uint8_t* quick_code = GetOatAddressForOffset(quick_oat_code_offset); + const uint8_t* quick_code = GetOatAddressForOffset(quick_oat_code_offset, image_info); *quick_is_interpreted = false; if (quick_code != nullptr && (!method->IsStatic() || method->IsConstructor() || method->GetDeclaringClass()->IsInitialized())) { @@ -1808,42 +1961,32 @@ const uint8_t* ImageWriter::GetQuickCode(ArtMethod* method, bool* quick_is_inter quick_code = GetOatAddress(kOatAddressQuickResolutionTrampoline); } if (!IsInBootOatFile(quick_code)) { - DCHECK_GE(quick_code, oat_data_begin_); + // DCHECK_GE(quick_code, oat_data_begin_); } return quick_code; } -const uint8_t* ImageWriter::GetQuickEntryPoint(ArtMethod* method) { - // Calculate the quick entry point following the same logic as FixupMethod() below. - // The resolution method has a special trampoline to call. - Runtime* runtime = Runtime::Current(); - if (UNLIKELY(method == runtime->GetResolutionMethod())) { - return GetOatAddress(kOatAddressQuickResolutionTrampoline); - } else if (UNLIKELY(method == runtime->GetImtConflictMethod() || - method == runtime->GetImtUnimplementedMethod())) { - return GetOatAddress(kOatAddressQuickIMTConflictTrampoline); - } else { - // We assume all methods have code. If they don't currently then we set them to the use the - // resolution trampoline. Abstract methods never have code and so we need to make sure their - // use results in an AbstractMethodError. We use the interpreter to achieve this. - if (UNLIKELY(!method->IsInvokable())) { - return GetOatAddress(kOatAddressQuickToInterpreterBridge); - } else { - bool quick_is_interpreted; - return GetQuickCode(method, &quick_is_interpreted); - } - } -} - -void ImageWriter::CopyAndFixupMethod(ArtMethod* orig, ArtMethod* copy) { +void ImageWriter::CopyAndFixupMethod(ArtMethod* orig, + ArtMethod* copy, + const ImageInfo& image_info) { memcpy(copy, orig, ArtMethod::Size(target_ptr_size_)); copy->SetDeclaringClass(GetImageAddress(orig->GetDeclaringClassUnchecked())); + const char* oat_filename; + if (orig->IsRuntimeMethod() || compile_app_image_) { + oat_filename = default_oat_filename_; + } else { + auto it = dex_file_oat_filename_map_.find(orig->GetDexFile()); + DCHECK(it != dex_file_oat_filename_map_.end()) << orig->GetDexFile()->GetLocation(); + oat_filename = it->second; + } ArtMethod** orig_resolved_methods = orig->GetDexCacheResolvedMethods(target_ptr_size_); - copy->SetDexCacheResolvedMethods(NativeLocationInImage(orig_resolved_methods), target_ptr_size_); + copy->SetDexCacheResolvedMethods(NativeLocationInImage(orig_resolved_methods, oat_filename), + target_ptr_size_); GcRoot<mirror::Class>* orig_resolved_types = orig->GetDexCacheResolvedTypes(target_ptr_size_); - copy->SetDexCacheResolvedTypes(NativeLocationInImage(orig_resolved_types), target_ptr_size_); + copy->SetDexCacheResolvedTypes(NativeLocationInImage(orig_resolved_types, oat_filename), + target_ptr_size_); // OatWriter replaces the code_ with an offset value. Here we re-adjust to a pointer relative to // oat_begin_ @@ -1877,7 +2020,7 @@ void ImageWriter::CopyAndFixupMethod(ArtMethod* orig, ArtMethod* copy) { GetOatAddress(kOatAddressQuickToInterpreterBridge), target_ptr_size_); } else { bool quick_is_interpreted; - const uint8_t* quick_code = GetQuickCode(orig, &quick_is_interpreted); + const uint8_t* quick_code = GetQuickCode(orig, image_info, &quick_is_interpreted); copy->SetEntryPointFromQuickCompiledCodePtrSize(quick_code, target_ptr_size_); // JNI entrypoint: @@ -1914,13 +2057,16 @@ void ImageWriter::SetOatChecksumFromElfFile(File* elf_file) { CHECK(oat_header != nullptr); CHECK(oat_header->IsValid()); - ImageHeader* image_header = reinterpret_cast<ImageHeader*>(image_->Begin()); + ImageInfo& image_info = GetImageInfo(oat_file_->GetLocation().c_str()); + ImageHeader* image_header = reinterpret_cast<ImageHeader*>(image_info.image_->Begin()); image_header->SetOatChecksum(oat_header->GetChecksum()); } -size_t ImageWriter::GetBinSizeSum(ImageWriter::Bin up_to) const { +size_t ImageWriter::GetBinSizeSum(ImageWriter::ImageInfo& image_info, ImageWriter::Bin up_to) const { DCHECK_LE(up_to, kBinSize); - return std::accumulate(&bin_slot_sizes_[0], &bin_slot_sizes_[up_to], /*init*/0); + return std::accumulate(&image_info.bin_slot_sizes_[0], + &image_info.bin_slot_sizes_[up_to], + /*init*/0); } ImageWriter::BinSlot::BinSlot(uint32_t lockword) : lockword_(lockword) { @@ -1946,15 +2092,17 @@ uint32_t ImageWriter::BinSlot::GetIndex() const { return lockword_ & ~kBinMask; } -uint8_t* ImageWriter::GetOatFileBegin() const { - DCHECK_GT(intern_table_bytes_, 0u); - size_t native_sections_size = bin_slot_sizes_[kBinArtField] + - bin_slot_sizes_[kBinArtMethodDirty] + - bin_slot_sizes_[kBinArtMethodClean] + - bin_slot_sizes_[kBinDexCacheArray] + - intern_table_bytes_ + - class_table_bytes_; - return image_begin_ + RoundUp(image_end_ + native_sections_size, kPageSize); +uint8_t* ImageWriter::GetOatFileBegin(const char* oat_filename) const { + uintptr_t last_image_end = 0; + for (const char* oat_fn : oat_filenames_) { + const ImageInfo& image_info = GetConstImageInfo(oat_fn); + DCHECK(image_info.image_begin_ != nullptr); + uintptr_t this_end = reinterpret_cast<uintptr_t>(image_info.image_begin_) + + image_info.image_size_; + last_image_end = std::max(this_end, last_image_end); + } + const ImageInfo& image_info = GetConstImageInfo(oat_filename); + return reinterpret_cast<uint8_t*>(last_image_end) + image_info.oat_offset_; } ImageWriter::Bin ImageWriter::BinTypeForNativeRelocationType(NativeObjectRelocationType type) { @@ -1974,4 +2122,94 @@ ImageWriter::Bin ImageWriter::BinTypeForNativeRelocationType(NativeObjectRelocat UNREACHABLE(); } +const char* ImageWriter::GetOatFilename(mirror::Object* obj) const { + if (compile_app_image_) { + return default_oat_filename_; + } else { + return GetOatFilenameForDexCache(obj->IsDexCache() ? obj->AsDexCache() : + obj->IsClass() ? obj->AsClass()->GetDexCache() : obj->GetClass()->GetDexCache()); + } +} + +const char* ImageWriter::GetOatFilenameForDexCache(mirror::DexCache* dex_cache) const { + if (compile_app_image_ || dex_cache == nullptr) { + return default_oat_filename_; + } else { + auto it = dex_file_oat_filename_map_.find(dex_cache->GetDexFile()); + DCHECK(it != dex_file_oat_filename_map_.end()) << dex_cache->GetDexFile()->GetLocation(); + return it->second; + } +} + +ImageWriter::ImageInfo& ImageWriter::GetImageInfo(const char* oat_filename) { + auto it = image_info_map_.find(oat_filename); + DCHECK(it != image_info_map_.end()); + return it->second; +} + +const ImageWriter::ImageInfo& ImageWriter::GetConstImageInfo(const char* oat_filename) const { + auto it = image_info_map_.find(oat_filename); + DCHECK(it != image_info_map_.end()); + return it->second; +} + +const ImageWriter::ImageInfo& ImageWriter::GetImageInfo(size_t index) const { + DCHECK_LT(index, oat_filenames_.size()); + return GetConstImageInfo(oat_filenames_[index]); +} + +void ImageWriter::UpdateOatFile(const char* oat_filename) { + std::unique_ptr<File> oat_file(OS::OpenFileForReading(oat_filename)); + DCHECK(oat_file != nullptr); + size_t oat_loaded_size = 0; + size_t oat_data_offset = 0; + ElfWriter::GetOatElfInformation(oat_file.get(), &oat_loaded_size, &oat_data_offset); + + ImageInfo& cur_image_info = GetImageInfo(oat_filename); + + // Update the oat_offset of the next image info. + auto it = std::find(oat_filenames_.begin(), oat_filenames_.end(), oat_filename); + DCHECK(it != oat_filenames_.end()); + + it++; + if (it != oat_filenames_.end()) { + // There is a following one. + ImageInfo& next_image_info = GetImageInfo(*it); + next_image_info.oat_offset_ = cur_image_info.oat_offset_ + oat_loaded_size; + } +} + +ImageWriter::ImageWriter( + const CompilerDriver& compiler_driver, + uintptr_t image_begin, + bool compile_pic, + bool compile_app_image, + ImageHeader::StorageMode image_storage_mode, + const std::vector<const char*> oat_filenames, + const std::unordered_map<const DexFile*, const char*>& dex_file_oat_filename_map) + : compiler_driver_(compiler_driver), + global_image_begin_(reinterpret_cast<uint8_t*>(image_begin)), + image_objects_offset_begin_(0), + oat_file_(nullptr), + compile_pic_(compile_pic), + compile_app_image_(compile_app_image), + target_ptr_size_(InstructionSetPointerSize(compiler_driver_.GetInstructionSet())), + image_method_array_(ImageHeader::kImageMethodsCount), + dirty_methods_(0u), + clean_methods_(0u), + image_storage_mode_(image_storage_mode), + dex_file_oat_filename_map_(dex_file_oat_filename_map), + oat_filenames_(oat_filenames), + default_oat_filename_(oat_filenames[0]) { + CHECK_NE(image_begin, 0U); + for (const char* oat_filename : oat_filenames) { + image_info_map_.emplace(oat_filename, ImageInfo()); + } + std::fill_n(image_methods_, arraysize(image_methods_), nullptr); +} + +ImageWriter::ImageInfo::ImageInfo() + : intern_table_(new InternTable), + class_table_(new ClassTable) {} + } // namespace art diff --git a/compiler/image_writer.h b/compiler/image_writer.h index f1b2965a12..ad690389e9 100644 --- a/compiler/image_writer.h +++ b/compiler/image_writer.h @@ -47,6 +47,8 @@ class ImageSpace; } // namespace space } // namespace gc +class ClassTable; + static constexpr int kInvalidImageFd = -1; // Write a Space built during compilation for use during execution. @@ -56,46 +58,32 @@ class ImageWriter FINAL { uintptr_t image_begin, bool compile_pic, bool compile_app_image, - ImageHeader::StorageMode image_storage_mode) - : compiler_driver_(compiler_driver), - image_begin_(reinterpret_cast<uint8_t*>(image_begin)), - image_end_(0), - image_objects_offset_begin_(0), - image_roots_address_(0), - oat_file_(nullptr), - oat_data_begin_(nullptr), - compile_pic_(compile_pic), - compile_app_image_(compile_app_image), - boot_image_space_(nullptr), - target_ptr_size_(InstructionSetPointerSize(compiler_driver_.GetInstructionSet())), - bin_slot_sizes_(), - bin_slot_offsets_(), - bin_slot_count_(), - intern_table_bytes_(0u), - image_method_array_(ImageHeader::kImageMethodsCount), - dirty_methods_(0u), - clean_methods_(0u), - class_table_bytes_(0u), - image_storage_mode_(image_storage_mode) { - CHECK_NE(image_begin, 0U); - std::fill_n(image_methods_, arraysize(image_methods_), nullptr); - std::fill_n(oat_address_offsets_, arraysize(oat_address_offsets_), 0); - } - - ~ImageWriter() { - } + ImageHeader::StorageMode image_storage_mode, + const std::vector<const char*> oat_filenames, + const std::unordered_map<const DexFile*, const char*>& dex_file_oat_filename_map); bool PrepareImageAddressSpace(); bool IsImageAddressSpaceReady() const { - return image_roots_address_ != 0u; + bool ready = !image_info_map_.empty(); + for (auto& pair : image_info_map_) { + const ImageInfo& image_info = pair.second; + if (image_info.image_roots_address_ == 0u) { + return false; + } + } + return ready; } template <typename T> T* GetImageAddress(T* object) const SHARED_REQUIRES(Locks::mutator_lock_) { - return (object == nullptr || IsInBootImage(object)) - ? object - : reinterpret_cast<T*>(image_begin_ + GetImageOffset(object)); + if (object == nullptr || IsInBootImage(object)) { + return object; + } else { + const char* oat_filename = GetOatFilename(object); + const ImageInfo& image_info = GetConstImageInfo(oat_filename); + return reinterpret_cast<T*>(image_info.image_begin_ + GetImageOffset(object)); + } } ArtMethod* GetImageMethodAddress(ArtMethod* method) SHARED_REQUIRES(Locks::mutator_lock_); @@ -103,26 +91,36 @@ class ImageWriter FINAL { template <typename PtrType> PtrType GetDexCacheArrayElementImageAddress(const DexFile* dex_file, uint32_t offset) const SHARED_REQUIRES(Locks::mutator_lock_) { - auto it = dex_cache_array_starts_.find(dex_file); - DCHECK(it != dex_cache_array_starts_.end()); + auto oat_it = dex_file_oat_filename_map_.find(dex_file); + DCHECK(oat_it != dex_file_oat_filename_map_.end()); + const ImageInfo& image_info = GetConstImageInfo(oat_it->second); + auto it = image_info.dex_cache_array_starts_.find(dex_file); + DCHECK(it != image_info.dex_cache_array_starts_.end()); return reinterpret_cast<PtrType>( - image_begin_ + bin_slot_offsets_[kBinDexCacheArray] + it->second + offset); + image_info.image_begin_ + image_info.bin_slot_offsets_[kBinDexCacheArray] + + it->second + offset); } - uint8_t* GetOatFileBegin() const; + uint8_t* GetOatFileBegin(const char* oat_filename) const; // If image_fd is not kInvalidImageFd, then we use that for the file. Otherwise we open - // image_filename. + // the names in image_filenames. bool Write(int image_fd, - const std::string& image_filename, - const std::string& oat_filename, - const std::string& oat_location) + const std::vector<const char*>& image_filenames, + const std::vector<const char*>& oat_filenames) REQUIRES(!Locks::mutator_lock_); - uintptr_t GetOatDataBegin() { - return reinterpret_cast<uintptr_t>(oat_data_begin_); + uintptr_t GetOatDataBegin(const char* oat_filename) { + return reinterpret_cast<uintptr_t>(GetImageInfo(oat_filename).oat_data_begin_); } + const char* GetOatFilenameForDexCache(mirror::DexCache* dex_cache) const + SHARED_REQUIRES(Locks::mutator_lock_); + + // Update the oat size for the given oat file. This will make the oat_offset for the next oat + // file valid. + void UpdateOatFile(const char* oat_filename); + private: bool AllocMemory(); @@ -214,6 +212,69 @@ class ImageWriter FINAL { const uint32_t lockword_; }; + struct ImageInfo { + ImageInfo(); + ImageInfo(ImageInfo&&) = default; + + // Create the image sections into the out sections variable, returns the size of the image + // excluding the bitmap. + size_t CreateImageSections(size_t target_ptr_size, ImageSection* out_sections) const; + + std::unique_ptr<MemMap> image_; // Memory mapped for generating the image. + + // Target begin of this image. Notes: It is not valid to write here, this is the address + // of the target image, not necessarily where image_ is mapped. The address is only valid + // after layouting (otherwise null). + uint8_t* image_begin_ = nullptr; + + // Offset to the free space in image_, initially size of image header. + size_t image_end_ = RoundUp(sizeof(ImageHeader), kObjectAlignment); + uint32_t image_roots_address_ = 0; // The image roots address in the image. + size_t image_offset_ = 0; // Offset of this image from the start of the first image. + + // Image size is the *address space* covered by this image. As the live bitmap is aligned + // to the page size, the live bitmap will cover more address space than necessary. But live + // bitmaps may not overlap, so an image has a "shadow," which is accounted for in the size. + // The next image may only start at image_begin_ + image_size_ (which is guaranteed to be + // page-aligned). + size_t image_size_ = 0; + + // Oat data. + // Offset of the oat file for this image from start of oat files. This is + // valid when the previous oat file has been written. + size_t oat_offset_ = 0; + // Start of oatdata in the corresponding oat file. This is + // valid when the images have been layed out. + uint8_t* oat_data_begin_ = nullptr; + size_t oat_size_ = 0; // Size of the corresponding oat data. + + // Image bitmap which lets us know where the objects inside of the image reside. + std::unique_ptr<gc::accounting::ContinuousSpaceBitmap> image_bitmap_; + + // The start offsets of the dex cache arrays. + SafeMap<const DexFile*, size_t> dex_cache_array_starts_; + + // Offset from oat_data_begin_ to the stubs. + uint32_t oat_address_offsets_[kOatAddressCount] = {}; + + // Bin slot tracking for dirty object packing. + size_t bin_slot_sizes_[kBinSize] = {}; // Number of bytes in a bin. + size_t bin_slot_offsets_[kBinSize] = {}; // Number of bytes in previous bins. + size_t bin_slot_count_[kBinSize] = {}; // Number of objects in a bin. + + // Cached size of the intern table for when we allocate memory. + size_t intern_table_bytes_ = 0; + + // Number of image class table bytes. + size_t class_table_bytes_ = 0; + + // Intern table associated with this image for serialization. + std::unique_ptr<InternTable> intern_table_; + + // Class table associated with this image for serialization. + std::unique_ptr<ClassTable> class_table_; + }; + // We use the lock word to store the offset of the object in the image. void AssignImageOffset(mirror::Object* object, BinSlot bin_slot) SHARED_REQUIRES(Locks::mutator_lock_); @@ -233,7 +294,8 @@ class ImageWriter FINAL { SHARED_REQUIRES(Locks::mutator_lock_); BinSlot GetImageBinSlot(mirror::Object* object) const SHARED_REQUIRES(Locks::mutator_lock_); - void AddDexCacheArrayRelocation(void* array, size_t offset) SHARED_REQUIRES(Locks::mutator_lock_); + void AddDexCacheArrayRelocation(void* array, size_t offset, mirror::DexCache* dex_cache) + SHARED_REQUIRES(Locks::mutator_lock_); void AddMethodPointerArray(mirror::PointerArray* arr) SHARED_REQUIRES(Locks::mutator_lock_); static void* GetImageAddressCallback(void* writer, mirror::Object* obj) @@ -244,19 +306,21 @@ class ImageWriter FINAL { mirror::Object* GetLocalAddress(mirror::Object* object) const SHARED_REQUIRES(Locks::mutator_lock_) { size_t offset = GetImageOffset(object); - uint8_t* dst = image_->Begin() + offset; + const char* oat_filename = GetOatFilename(object); + const ImageInfo& image_info = GetConstImageInfo(oat_filename); + uint8_t* dst = image_info.image_->Begin() + offset; return reinterpret_cast<mirror::Object*>(dst); } // Returns the address in the boot image if we are compiling the app image. const uint8_t* GetOatAddress(OatAddress type) const; - const uint8_t* GetOatAddressForOffset(uint32_t offset) const { + const uint8_t* GetOatAddressForOffset(uint32_t offset, const ImageInfo& image_info) const { // With Quick, code is within the OatFile, as there are all in one - // .o ELF object. - DCHECK_LE(offset, oat_file_->Size()); - DCHECK(oat_data_begin_ != nullptr); - return offset == 0u ? nullptr : oat_data_begin_ + offset; + // .o ELF object. But interpret it as signed. + DCHECK_LE(static_cast<int32_t>(offset), static_cast<int32_t>(image_info.oat_size_)); + DCHECK(image_info.oat_data_begin_ != nullptr); + return offset == 0u ? nullptr : image_info.oat_data_begin_ + static_cast<int32_t>(offset); } // Returns true if the class was in the original requested image classes list. @@ -282,7 +346,7 @@ class ImageWriter FINAL { SHARED_REQUIRES(Locks::mutator_lock_); void CreateHeader(size_t oat_loaded_size, size_t oat_data_offset) SHARED_REQUIRES(Locks::mutator_lock_); - mirror::ObjectArray<mirror::Object>* CreateImageRoots() const + mirror::ObjectArray<mirror::Object>* CreateImageRoots(const char* oat_filename) const SHARED_REQUIRES(Locks::mutator_lock_); void CalculateObjectBinSlots(mirror::Object* obj) SHARED_REQUIRES(Locks::mutator_lock_); @@ -304,7 +368,7 @@ class ImageWriter FINAL { static void CopyAndFixupObjectsCallback(mirror::Object* obj, void* arg) SHARED_REQUIRES(Locks::mutator_lock_); void CopyAndFixupObject(mirror::Object* obj) SHARED_REQUIRES(Locks::mutator_lock_); - void CopyAndFixupMethod(ArtMethod* orig, ArtMethod* copy) + void CopyAndFixupMethod(ArtMethod* orig, ArtMethod* copy, const ImageInfo& image_info) SHARED_REQUIRES(Locks::mutator_lock_); void FixupClass(mirror::Class* orig, mirror::Class* copy) SHARED_REQUIRES(Locks::mutator_lock_); @@ -319,23 +383,24 @@ class ImageWriter FINAL { SHARED_REQUIRES(Locks::mutator_lock_); // Get quick code for non-resolution/imt_conflict/abstract method. - const uint8_t* GetQuickCode(ArtMethod* method, bool* quick_is_interpreted) - SHARED_REQUIRES(Locks::mutator_lock_); - - const uint8_t* GetQuickEntryPoint(ArtMethod* method) + const uint8_t* GetQuickCode(ArtMethod* method, + const ImageInfo& image_info, + bool* quick_is_interpreted) SHARED_REQUIRES(Locks::mutator_lock_); // Patches references in OatFile to expect runtime addresses. void SetOatChecksumFromElfFile(File* elf_file); // Calculate the sum total of the bin slot sizes in [0, up_to). Defaults to all bins. - size_t GetBinSizeSum(Bin up_to = kBinSize) const; + size_t GetBinSizeSum(ImageInfo& image_info, Bin up_to = kBinSize) const; // Return true if a method is likely to be dirtied at runtime. bool WillMethodBeDirty(ArtMethod* m) const SHARED_REQUIRES(Locks::mutator_lock_); // Assign the offset for an ArtMethod. - void AssignMethodOffset(ArtMethod* method, NativeObjectRelocationType type) + void AssignMethodOffset(ArtMethod* method, + NativeObjectRelocationType type, + const char* oat_filename) SHARED_REQUIRES(Locks::mutator_lock_); // Return true if klass is loaded by the boot class loader but not in the boot image. @@ -359,11 +424,11 @@ class ImageWriter FINAL { // Location of where the object will be when the image is loaded at runtime. template <typename T> - T* NativeLocationInImage(T* obj); + T* NativeLocationInImage(T* obj, const char* oat_filename) SHARED_REQUIRES(Locks::mutator_lock_); // Location of where the temporary copy of the object currently is. template <typename T> - T* NativeCopyLocation(T* obj); + T* NativeCopyLocation(T* obj, mirror::DexCache* dex_cache) SHARED_REQUIRES(Locks::mutator_lock_); // Return true of obj is inside of the boot image space. This may only return true if we are // compiling an app image. @@ -372,68 +437,50 @@ class ImageWriter FINAL { // Return true if ptr is within the boot oat file. bool IsInBootOatFile(const void* ptr) const; - const CompilerDriver& compiler_driver_; + const char* GetOatFilename(mirror::Object* object) const SHARED_REQUIRES(Locks::mutator_lock_); + + const char* GetDefaultOatFilename() const { + return default_oat_filename_; + } - // Beginning target image address for the output image. - uint8_t* image_begin_; + ImageInfo& GetImageInfo(const char* oat_filename); + const ImageInfo& GetConstImageInfo(const char* oat_filename) const; + const ImageInfo& GetImageInfo(size_t index) const; + + const CompilerDriver& compiler_driver_; - // Offset to the free space in image_. - size_t image_end_; + // Beginning target image address for the first image. + uint8_t* global_image_begin_; // Offset from image_begin_ to where the first object is in image_. size_t image_objects_offset_begin_; - // The image roots address in the image. - uint32_t image_roots_address_; - // oat file with code for this image OatFile* oat_file_; - // Memory mapped for generating the image. - std::unique_ptr<MemMap> image_; - // Pointer arrays that need to be updated. Since these are only some int and long arrays, we need // to keep track. These include vtable arrays, iftable arrays, and dex caches. std::unordered_map<mirror::PointerArray*, Bin> pointer_arrays_; - // The start offsets of the dex cache arrays. - SafeMap<const DexFile*, size_t> dex_cache_array_starts_; - // Saved hash codes. We use these to restore lockwords which were temporarily used to have // forwarding addresses as well as copying over hash codes. std::unordered_map<mirror::Object*, uint32_t> saved_hashcode_map_; - // Beginning target oat address for the pointers from the output image to its oat file. - const uint8_t* oat_data_begin_; - - // Image bitmap which lets us know where the objects inside of the image reside. - std::unique_ptr<gc::accounting::ContinuousSpaceBitmap> image_bitmap_; - - // Offset from oat_data_begin_ to the stubs. - uint32_t oat_address_offsets_[kOatAddressCount]; - // Boolean flags. const bool compile_pic_; const bool compile_app_image_; - // Cache the boot image space in this class for faster lookups. - gc::space::ImageSpace* boot_image_space_; - // Size of pointers on the target architecture. size_t target_ptr_size_; - // Bin slot tracking for dirty object packing - size_t bin_slot_sizes_[kBinSize]; // Number of bytes in a bin - size_t bin_slot_offsets_[kBinSize]; // Number of bytes in previous bins. - size_t bin_slot_count_[kBinSize]; // Number of objects in a bin - - // Cached size of the intern table for when we allocate memory. - size_t intern_table_bytes_; + // Mapping of oat filename to image data. + std::unordered_map<std::string, ImageInfo> image_info_map_; // ArtField, ArtMethod relocating map. These are allocated as array of structs but we want to // have one entry per art field for convenience. ArtFields are placed right after the end of the // image objects (aka sum of bin_slot_sizes_). ArtMethods are placed right after the ArtFields. struct NativeObjectRelocation { + const char* oat_filename; uintptr_t offset; NativeObjectRelocationType type; @@ -462,12 +509,14 @@ class ImageWriter FINAL { // null is a valid entry. std::unordered_set<mirror::ClassLoader*> class_loaders_; - // Number of image class table bytes. - size_t class_table_bytes_; - // Which mode the image is stored as, see image.h const ImageHeader::StorageMode image_storage_mode_; + // Map of dex files to the oat filenames that they were compiled into. + const std::unordered_map<const DexFile*, const char*>& dex_file_oat_filename_map_; + const std::vector<const char*> oat_filenames_; + const char* default_oat_filename_; + friend class ContainsBootClassLoaderNonImageClassVisitor; friend class FixupClassVisitor; friend class FixupRootVisitor; diff --git a/compiler/jit/jit_compiler.cc b/compiler/jit/jit_compiler.cc index d001495442..bc51ed6e6a 100644 --- a/compiler/jit/jit_compiler.cc +++ b/compiler/jit/jit_compiler.cc @@ -22,6 +22,7 @@ #include "base/stringpiece.h" #include "base/time_utils.h" #include "base/timing_logger.h" +#include "base/unix_file/fd_file.h" #include "compiler_callbacks.h" #include "dex/pass_manager.h" #include "dex/quick_compiler_callbacks.h" @@ -42,11 +43,12 @@ JitCompiler* JitCompiler::Create() { return new JitCompiler(); } -extern "C" void* jit_load(CompilerCallbacks** callbacks) { +extern "C" void* jit_load(CompilerCallbacks** callbacks, bool* generate_debug_info) { VLOG(jit) << "loading jit compiler"; auto* const jit_compiler = JitCompiler::Create(); CHECK(jit_compiler != nullptr); *callbacks = jit_compiler->GetCompilerCallbacks(); + *generate_debug_info = jit_compiler->GetCompilerOptions()->GetGenerateDebugInfo(); VLOG(jit) << "Done loading jit compiler"; return jit_compiler; } @@ -84,6 +86,7 @@ JitCompiler::JitCompiler() : total_time_(0) { CompilerOptions::kDefaultNumDexMethodsThreshold, CompilerOptions::kDefaultInlineDepthLimit, CompilerOptions::kDefaultInlineMaxCodeUnits, + /* no_inline_from */ nullptr, /* include_patch_information */ false, CompilerOptions::kDefaultTopKProfileThreshold, Runtime::Current()->IsDebuggable(), @@ -154,13 +157,33 @@ JitCompiler::JitCompiler() : total_time_(0) { /* dump_cfg_append */ false, cumulative_logger_.get(), /* swap_fd */ -1, - /* profile_file */ "")); + /* dex to oat map */ nullptr, + /* profile_compilation_info */ nullptr)); // Disable dedupe so we can remove compiled methods. compiler_driver_->SetDedupeEnabled(false); compiler_driver_->SetSupportBootImageFixup(false); + + if (compiler_options_->GetGenerateDebugInfo()) { +#ifdef __ANDROID__ + const char* prefix = GetAndroidData(); +#else + const char* prefix = "/tmp"; +#endif + DCHECK_EQ(compiler_driver_->GetThreadCount(), 1u) + << "Generating debug info only works with one compiler thread"; + std::string perf_filename = std::string(prefix) + "/perf-" + std::to_string(getpid()) + ".map"; + perf_file_.reset(OS::CreateEmptyFileWriteOnly(perf_filename.c_str())); + if (perf_file_ == nullptr) { + LOG(FATAL) << "Could not create perf file at " << perf_filename; + } + } } JitCompiler::~JitCompiler() { + if (perf_file_ != nullptr) { + UNUSED(perf_file_->Flush()); + UNUSED(perf_file_->Close()); + } } bool JitCompiler::CompileMethod(Thread* self, ArtMethod* method) { @@ -186,6 +209,20 @@ bool JitCompiler::CompileMethod(Thread* self, ArtMethod* method) { ArtMethod* method_to_compile = method->GetInterfaceMethodIfProxy(sizeof(void*)); JitCodeCache* const code_cache = runtime->GetJit()->GetCodeCache(); success = compiler_driver_->GetCompiler()->JitCompile(self, code_cache, method_to_compile); + if (success && compiler_options_->GetGenerateDebugInfo()) { + const void* ptr = method_to_compile->GetEntryPointFromQuickCompiledCode(); + std::ostringstream stream; + stream << std::hex + << reinterpret_cast<uintptr_t>(ptr) + << " " + << code_cache->GetMemorySizeOfCodePointer(ptr) + << " " + << PrettyMethod(method_to_compile) + << std::endl; + std::string str = stream.str(); + bool res = perf_file_->WriteFully(str.c_str(), str.size()); + CHECK(res); + } } // Trim maps to reduce memory usage. diff --git a/compiler/jit/jit_compiler.h b/compiler/jit/jit_compiler.h index 913a6d00ae..037a18ac7a 100644 --- a/compiler/jit/jit_compiler.h +++ b/compiler/jit/jit_compiler.h @@ -43,6 +43,9 @@ class JitCompiler { size_t GetTotalCompileTime() const { return total_time_; } + CompilerOptions* GetCompilerOptions() const { + return compiler_options_.get(); + } private: uint64_t total_time_; @@ -53,6 +56,7 @@ class JitCompiler { std::unique_ptr<CompilerCallbacks> callbacks_; std::unique_ptr<CompilerDriver> compiler_driver_; std::unique_ptr<const InstructionSetFeatures> instruction_set_features_; + std::unique_ptr<File> perf_file_; JitCompiler(); diff --git a/compiler/jni/jni_compiler_test.cc b/compiler/jni/jni_compiler_test.cc index 5ab55e0614..8d60be20ee 100644 --- a/compiler/jni/jni_compiler_test.cc +++ b/compiler/jni/jni_compiler_test.cc @@ -220,7 +220,8 @@ void JniCompilerTest::CompileAndRunIntMethodThroughStubImpl() { std::string reason; ASSERT_TRUE(Runtime::Current()->GetJavaVM()-> - LoadNativeLibrary(env_, "", class_loader_, nullptr, nullptr, &reason)) + LoadNativeLibrary(env_, "", class_loader_, /* is_shared_namespace */ false, + nullptr, nullptr, &reason)) << reason; jint result = env_->CallNonvirtualIntMethod(jobj_, jklass_, jmethod_, 24); @@ -235,7 +236,8 @@ void JniCompilerTest::CompileAndRunStaticIntMethodThroughStubImpl() { std::string reason; ASSERT_TRUE(Runtime::Current()->GetJavaVM()-> - LoadNativeLibrary(env_, "", class_loader_, nullptr, nullptr, &reason)) + LoadNativeLibrary(env_, "", class_loader_, /* is_shared_namespace */ false, + nullptr, nullptr, &reason)) << reason; jint result = env_->CallStaticIntMethod(jklass_, jmethod_, 42); diff --git a/compiler/linker/relative_patcher_test.h b/compiler/linker/relative_patcher_test.h index 92cf8ca7ff..b10cc3534c 100644 --- a/compiler/linker/relative_patcher_test.h +++ b/compiler/linker/relative_patcher_test.h @@ -47,7 +47,7 @@ class RelativePatcherTest : public testing::Test { driver_(&compiler_options_, &verification_results_, &inliner_map_, Compiler::kQuick, instruction_set, nullptr, false, nullptr, nullptr, nullptr, 1u, - false, false, "", false, nullptr, -1, ""), + false, false, "", false, nullptr, -1, nullptr, nullptr), error_msg_(), instruction_set_(instruction_set), features_(InstructionSetFeatures::FromVariant(instruction_set, variant, &error_msg_)), diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc index 451aa682d6..9f7ffa5ace 100644 --- a/compiler/oat_test.cc +++ b/compiler/oat_test.cc @@ -121,7 +121,8 @@ class OatTest : public CommonCompilerTest { false, timer_.get(), -1, - "")); + nullptr, + nullptr)); } bool WriteElf(File* file, @@ -199,7 +200,7 @@ TEST_F(OatTest, WriteRead) { ASSERT_TRUE(oat_file.get() != nullptr) << error_msg; const OatHeader& oat_header = oat_file->GetOatHeader(); ASSERT_TRUE(oat_header.IsValid()); - ASSERT_EQ(1U, oat_header.GetDexFileCount()); // core + ASSERT_EQ(class_linker->GetBootClassPath().size(), oat_header.GetDexFileCount()); // core ASSERT_EQ(42U, oat_header.GetImageFileLocationOatChecksum()); ASSERT_EQ(4096U, oat_header.GetImageFileLocationOatDataBegin()); ASSERT_EQ("lue.art", std::string(oat_header.GetStoreValueByKey(OatHeader::kImageLocationKey))); @@ -224,8 +225,9 @@ TEST_F(OatTest, WriteRead) { } const char* descriptor = dex_file.GetClassDescriptor(class_def); - mirror::Class* klass = class_linker->FindClass(soa.Self(), descriptor, - NullHandle<mirror::ClassLoader>()); + mirror::Class* klass = class_linker->FindClass(soa.Self(), + descriptor, + ScopedNullHandle<mirror::ClassLoader>()); const OatFile::OatClass oat_class = oat_dex_file->GetOatClass(i); CHECK_EQ(mirror::Class::Status::kStatusNotReady, oat_class.GetStatus()) << descriptor; diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc index 2b2f0e8c26..025e35e178 100644 --- a/compiler/oat_writer.cc +++ b/compiler/oat_writer.cc @@ -716,6 +716,14 @@ class OatWriter::InitImageMethodVisitor : public OatDexMethodVisitor { bool VisitMethod(size_t class_def_method_index, const ClassDataItemIterator& it) SHARED_REQUIRES(Locks::mutator_lock_) { + const DexFile::TypeId& type_id = + dex_file_->GetTypeId(dex_file_->GetClassDef(class_def_index_).class_idx_); + const char* class_descriptor = dex_file_->GetTypeDescriptor(type_id); + // Skip methods that are not in the image. + if (!writer_->GetCompilerDriver()->IsImageClass(class_descriptor)) { + return true; + } + OatClass* oat_class = &writer_->oat_classes_[oat_class_index_]; CompiledMethod* compiled_method = oat_class->GetCompiledMethod(class_def_method_index); @@ -737,7 +745,7 @@ class OatWriter::InitImageMethodVisitor : public OatDexMethodVisitor { *dex_file_, it.GetMemberIndex(), dex_cache, - NullHandle<mirror::ClassLoader>(), + ScopedNullHandle<mirror::ClassLoader>(), nullptr, invoke_type); if (method == nullptr) { @@ -958,7 +966,9 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor { if (writer_->HasBootImage()) { auto* element = writer_->image_writer_->GetDexCacheArrayElementImageAddress<const uint8_t*>( patch.TargetDexCacheDexFile(), patch.TargetDexCacheElementOffset()); - const uint8_t* oat_data = writer_->image_writer_->GetOatFileBegin() + file_offset_; + const char* oat_filename = writer_->image_writer_->GetOatFilenameForDexCache(dex_cache_); + const uint8_t* oat_data = + writer_->image_writer_->GetOatFileBegin(oat_filename) + file_offset_; return element - oat_data; } else { size_t start = writer_->dex_cache_arrays_offsets_.Get(patch.TargetDexCacheDexFile()); @@ -994,9 +1004,15 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor { // NOTE: We're using linker patches for app->boot references when the image can // be relocated and therefore we need to emit .oat_patches. We're not using this // for app->app references, so check that the method is an image method. - gc::space::ImageSpace* image_space = Runtime::Current()->GetHeap()->GetBootImageSpace(); - size_t method_offset = reinterpret_cast<const uint8_t*>(method) - image_space->Begin(); - CHECK(image_space->GetImageHeader().GetMethodsSection().Contains(method_offset)); + std::vector<gc::space::ImageSpace*> image_spaces = + Runtime::Current()->GetHeap()->GetBootImageSpaces(); + bool contains_method = false; + for (gc::space::ImageSpace* image_space : image_spaces) { + size_t method_offset = reinterpret_cast<const uint8_t*>(method) - image_space->Begin(); + contains_method |= + image_space->GetImageHeader().GetMethodsSection().Contains(method_offset); + } + CHECK(contains_method); } // Note: We only patch targeting ArtMethods in image which is in the low 4gb. uint32_t address = PointerToLowMemUInt32(method); @@ -1012,7 +1028,8 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor { SHARED_REQUIRES(Locks::mutator_lock_) { uint32_t address = target_offset; if (writer_->HasBootImage()) { - address = PointerToLowMemUInt32(writer_->image_writer_->GetOatFileBegin() + + const char* oat_filename = writer_->image_writer_->GetOatFilenameForDexCache(dex_cache_); + address = PointerToLowMemUInt32(writer_->image_writer_->GetOatFileBegin(oat_filename) + writer_->oat_data_offset_ + target_offset); } DCHECK_LE(offset + 4, code->size()); diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc index 4c3f66aa4f..d710747e76 100644 --- a/compiler/optimizing/bounds_check_elimination.cc +++ b/compiler/optimizing/bounds_check_elimination.cc @@ -1142,7 +1142,7 @@ class BCEVisitor : public HGraphVisitor { loop->IsDefinedOutOfTheLoop(array_get->InputAt(1))) { SideEffects loop_effects = side_effects_.GetLoopEffects(loop->GetHeader()); if (!array_get->GetSideEffects().MayDependOn(loop_effects)) { - HoistToPreheaderOrDeoptBlock(loop, array_get); + HoistToPreHeaderOrDeoptBlock(loop, array_get); } } } @@ -1280,7 +1280,8 @@ class BCEVisitor : public HGraphVisitor { // as runtime test. By restricting dynamic bce to unit strides (with a maximum of 32-bit // iterations) and by not combining access (e.g. a[i], a[i-3], a[i+5] etc.), these tests // correctly guard against any possible OOB (including arithmetic wrap-around cases). - HBasicBlock* block = TransformLoopForDeoptimizationIfNeeded(loop, needs_taken_test); + TransformLoopForDeoptimizationIfNeeded(loop, needs_taken_test); + HBasicBlock* block = GetPreHeader(loop, instruction); induction_range_.GenerateRangeCode(instruction, index, GetGraph(), block, &lower, &upper); if (lower != nullptr) { InsertDeopt(loop, block, new (GetGraph()->GetArena()) HAbove(lower, upper)); @@ -1353,7 +1354,7 @@ class BCEVisitor : public HGraphVisitor { return true; } else if (length->IsArrayLength() && length->GetBlock()->GetLoopInformation() == loop) { if (CanHandleNullCheck(loop, length->InputAt(0), needs_taken_test)) { - HoistToPreheaderOrDeoptBlock(loop, length); + HoistToPreHeaderOrDeoptBlock(loop, length); return true; } } @@ -1371,7 +1372,8 @@ class BCEVisitor : public HGraphVisitor { HInstruction* array = check->InputAt(0); if (loop->IsDefinedOutOfTheLoop(array)) { // Generate: if (array == null) deoptimize; - HBasicBlock* block = TransformLoopForDeoptimizationIfNeeded(loop, needs_taken_test); + TransformLoopForDeoptimizationIfNeeded(loop, needs_taken_test); + HBasicBlock* block = GetPreHeader(loop, check); HInstruction* cond = new (GetGraph()->GetArena()) HEqual(array, GetGraph()->GetNullConstant()); InsertDeopt(loop, block, cond); @@ -1418,6 +1420,28 @@ class BCEVisitor : public HGraphVisitor { return true; } + /** + * Returns appropriate preheader for the loop, depending on whether the + * instruction appears in the loop header or proper loop-body. + */ + HBasicBlock* GetPreHeader(HLoopInformation* loop, HInstruction* instruction) { + // Use preheader unless there is an earlier generated deoptimization block since + // hoisted expressions may depend on and/or used by the deoptimization tests. + HBasicBlock* header = loop->GetHeader(); + const uint32_t loop_id = header->GetBlockId(); + auto it = taken_test_loop_.find(loop_id); + if (it != taken_test_loop_.end()) { + HBasicBlock* block = it->second; + // If always taken, keep it that way by returning the original preheader, + // which can be found by following the predecessor of the true-block twice. + if (instruction->GetBlock() == header) { + return block->GetSinglePredecessor()->GetSinglePredecessor(); + } + return block; + } + return loop->GetPreHeader(); + } + /** Inserts a deoptimization test. */ void InsertDeopt(HLoopInformation* loop, HBasicBlock* block, HInstruction* condition) { HInstruction* suspend = loop->GetSuspendCheck(); @@ -1432,28 +1456,17 @@ class BCEVisitor : public HGraphVisitor { } /** Hoists instruction out of the loop to preheader or deoptimization block. */ - void HoistToPreheaderOrDeoptBlock(HLoopInformation* loop, HInstruction* instruction) { - // Use preheader unless there is an earlier generated deoptimization block since - // hoisted expressions may depend on and/or used by the deoptimization tests. - const uint32_t loop_id = loop->GetHeader()->GetBlockId(); - HBasicBlock* preheader = loop->GetPreHeader(); - HBasicBlock* block = preheader; - auto it = taken_test_loop_.find(loop_id); - if (it != taken_test_loop_.end()) { - block = it->second; - } - // Hoist the instruction. + void HoistToPreHeaderOrDeoptBlock(HLoopInformation* loop, HInstruction* instruction) { + HBasicBlock* block = GetPreHeader(loop, instruction); DCHECK(!instruction->HasEnvironment()); instruction->MoveBefore(block->GetLastInstruction()); } /** - * Adds a new taken-test structure to a loop if needed (and not already done). + * Adds a new taken-test structure to a loop if needed and not already done. * The taken-test protects range analysis evaluation code to avoid any * deoptimization caused by incorrect trip-count evaluation in non-taken loops. * - * Returns block in which deoptimizations/invariants can be put. - * * old_preheader * | * if_block <- taken-test protects deoptimization block @@ -1485,16 +1498,11 @@ class BCEVisitor : public HGraphVisitor { * array[i] = 0; * } */ - HBasicBlock* TransformLoopForDeoptimizationIfNeeded(HLoopInformation* loop, bool needs_taken_test) { - // Not needed (can use preheader), or already done (can reuse)? + void TransformLoopForDeoptimizationIfNeeded(HLoopInformation* loop, bool needs_taken_test) { + // Not needed (can use preheader) or already done (can reuse)? const uint32_t loop_id = loop->GetHeader()->GetBlockId(); - if (!needs_taken_test) { - return loop->GetPreHeader(); - } else { - auto it = taken_test_loop_.find(loop_id); - if (it != taken_test_loop_.end()) { - return it->second; - } + if (!needs_taken_test || taken_test_loop_.find(loop_id) != taken_test_loop_.end()) { + return; } // Generate top test structure. @@ -1523,7 +1531,6 @@ class BCEVisitor : public HGraphVisitor { if_block->AddInstruction(new (GetGraph()->GetArena()) HIf(condition)); taken_test_loop_.Put(loop_id, true_block); - return true_block; } /** @@ -1538,7 +1545,7 @@ class BCEVisitor : public HGraphVisitor { * \ / * x_1 = phi(x_0, null) <- synthetic phi * | - * header + * new_preheader */ void InsertPhiNodes() { // Scan all new deoptimization blocks. @@ -1590,15 +1597,18 @@ class BCEVisitor : public HGraphVisitor { HGraph* graph = GetGraph(); HInstruction* zero; switch (type) { - case Primitive::Type::kPrimNot: zero = graph->GetNullConstant(); break; - case Primitive::Type::kPrimFloat: zero = graph->GetFloatConstant(0); break; - case Primitive::Type::kPrimDouble: zero = graph->GetDoubleConstant(0); break; + case Primitive::kPrimNot: zero = graph->GetNullConstant(); break; + case Primitive::kPrimFloat: zero = graph->GetFloatConstant(0); break; + case Primitive::kPrimDouble: zero = graph->GetDoubleConstant(0); break; default: zero = graph->GetConstant(type, 0); break; } HPhi* phi = new (graph->GetArena()) HPhi(graph->GetArena(), kNoRegNumber, /*number_of_inputs*/ 2, HPhi::ToPhiType(type)); phi->SetRawInputAt(0, instruction); phi->SetRawInputAt(1, zero); + if (type == Primitive::kPrimNot) { + phi->SetReferenceTypeInfo(instruction->GetReferenceTypeInfo()); + } new_preheader->AddPhi(phi); return phi; } diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc index 1178d0fb25..1af684683b 100644 --- a/compiler/optimizing/builder.cc +++ b/compiler/optimizing/builder.cc @@ -17,6 +17,8 @@ #include "builder.h" #include "art_field-inl.h" +#include "base/arena_bit_vector.h" +#include "base/bit_vector-inl.h" #include "base/logging.h" #include "class_linker.h" #include "dex/verified_method.h" @@ -458,6 +460,19 @@ bool HGraphBuilder::BuildGraph(const DexFile::CodeItem& code_item) { return false; } + // Find locations where we want to generate extra stackmaps for native debugging. + // This allows us to generate the info only at interesting points (for example, + // at start of java statement) rather than before every dex instruction. + const bool native_debuggable = compiler_driver_ != nullptr && + compiler_driver_->GetCompilerOptions().GetNativeDebuggable(); + ArenaBitVector* native_debug_info_locations; + if (native_debuggable) { + const uint32_t num_instructions = code_item.insns_size_in_code_units_; + native_debug_info_locations = new (arena_) ArenaBitVector (arena_, num_instructions, false); + native_debug_info_locations->ClearAllBits(); + FindNativeDebugInfoLocations(code_item, native_debug_info_locations); + } + CreateBlocksForTryCatch(code_item); InitializeParameters(code_item.ins_size_); @@ -467,6 +482,11 @@ bool HGraphBuilder::BuildGraph(const DexFile::CodeItem& code_item) { // Update the current block if dex_pc starts a new block. MaybeUpdateCurrentBlock(dex_pc); const Instruction& instruction = *Instruction::At(code_ptr); + if (native_debuggable && native_debug_info_locations->IsBitSet(dex_pc)) { + if (current_block_ != nullptr) { + current_block_->AddInstruction(new (arena_) HNativeDebugInfo(dex_pc)); + } + } if (!AnalyzeDexInstruction(instruction, dex_pc)) { return false; } @@ -507,6 +527,47 @@ void HGraphBuilder::MaybeUpdateCurrentBlock(size_t dex_pc) { current_block_ = block; } +void HGraphBuilder::FindNativeDebugInfoLocations(const DexFile::CodeItem& code_item, + ArenaBitVector* locations) { + // The callback gets called when the line number changes. + // In other words, it marks the start of new java statement. + struct Callback { + static bool Position(void* ctx, const DexFile::PositionInfo& entry) { + static_cast<ArenaBitVector*>(ctx)->SetBit(entry.address_); + return false; + } + }; + dex_file_->DecodeDebugPositionInfo(&code_item, Callback::Position, locations); + // Add native debug info at the start of every basic block. + for (uint32_t pc = 0; pc < code_item.insns_size_in_code_units_; pc++) { + if (FindBlockStartingAt(pc) != nullptr) { + locations->SetBit(pc); + } + } + // Instruction-specific tweaks. + const Instruction* const begin = Instruction::At(code_item.insns_); + const Instruction* const end = begin->RelativeAt(code_item.insns_size_in_code_units_); + for (const Instruction* inst = begin; inst < end; inst = inst->Next()) { + switch (inst->Opcode()) { + case Instruction::MOVE_EXCEPTION: + case Instruction::MOVE_RESULT: + case Instruction::MOVE_RESULT_WIDE: + case Instruction::MOVE_RESULT_OBJECT: { + // The compiler checks that there are no instructions before those. + // So generate HNativeDebugInfo after them instead. + locations->ClearBit(inst->GetDexPc(code_item.insns_)); + const Instruction* next = inst->Next(); + if (next < end) { + locations->SetBit(next->GetDexPc(code_item.insns_)); + } + break; + } + default: + break; + } + } +} + bool HGraphBuilder::ComputeBranchTargets(const uint16_t* code_ptr, const uint16_t* code_end, size_t* number_of_branches) { @@ -1756,7 +1817,12 @@ void HGraphBuilder::BuildTypeCheck(const Instruction& instruction, UpdateLocal(destination, current_block_->GetLastInstruction(), dex_pc); } else { DCHECK_EQ(instruction.Opcode(), Instruction::CHECK_CAST); + // We emit a CheckCast followed by a BoundType. CheckCast is a statement + // which may throw. If it succeeds BoundType sets the new type of `object` + // for all subsequent uses. current_block_->AddInstruction(new (arena_) HCheckCast(object, cls, check_kind, dex_pc)); + current_block_->AddInstruction(new (arena_) HBoundType(object, dex_pc)); + UpdateLocal(reference, current_block_->GetLastInstruction(), dex_pc); } } diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h index ca71c32802..26bf1cbc75 100644 --- a/compiler/optimizing/builder.h +++ b/compiler/optimizing/builder.h @@ -80,7 +80,8 @@ class HGraphBuilder : public ValueObject { can_use_baseline_for_string_init_(true), compilation_stats_(nullptr), interpreter_metadata_(nullptr), - dex_cache_(NullHandle<mirror::DexCache>()) {} + null_dex_cache_(), + dex_cache_(null_dex_cache_) {} bool BuildGraph(const DexFile::CodeItem& code); @@ -111,6 +112,7 @@ class HGraphBuilder : public ValueObject { const uint16_t* end, size_t* number_of_branches); void MaybeUpdateCurrentBlock(size_t dex_pc); + void FindNativeDebugInfoLocations(const DexFile::CodeItem& code_item, ArenaBitVector* locations); HBasicBlock* FindBlockStartingAt(int32_t dex_pc) const; HBasicBlock* FindOrCreateBlockStartingAt(int32_t dex_pc); @@ -371,6 +373,7 @@ class HGraphBuilder : public ValueObject { const uint8_t* interpreter_metadata_; // Dex cache for dex_file_. + ScopedNullHandle<mirror::DexCache> null_dex_cache_; Handle<mirror::DexCache> dex_cache_; DISALLOW_COPY_AND_ASSIGN(HGraphBuilder); diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index 53d3615a41..ea0b9eca9a 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -997,6 +997,12 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction, stack_map_stream_.EndStackMapEntry(); } +bool CodeGenerator::HasStackMapAtCurrentPc() { + uint32_t pc = GetAssembler()->CodeSize(); + size_t count = stack_map_stream_.GetNumberOfStackMaps(); + return count > 0 && stack_map_stream_.GetStackMap(count - 1).native_pc_offset == pc; +} + void CodeGenerator::RecordCatchBlockInfo() { ArenaAllocator* arena = graph_->GetArena(); @@ -1320,12 +1326,6 @@ void CodeGenerator::ValidateInvokeRuntime(HInstruction* instruction, SlowPathCod << "instruction->DebugName()=" << instruction->DebugName() << " slow_path->GetDescription()=" << slow_path->GetDescription(); DCHECK(instruction->GetSideEffects().Includes(SideEffects::CanTriggerGC()) || - // Control flow would not come back into the code if a fatal slow - // path is taken, so we do not care if it triggers GC. - slow_path->IsFatal() || - // HDeoptimize is a special case: we know we are not coming back from - // it into the code. - instruction->IsDeoptimize() || // When read barriers are enabled, some instructions use a // slow path to emit a read barrier, which does not trigger // GC, is not fatal, nor is emitted by HDeoptimize diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index eade05d7b6..5958cd89bc 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -269,6 +269,8 @@ class CodeGenerator { // Record native to dex mapping for a suspend point. Required by runtime. void RecordPcInfo(HInstruction* instruction, uint32_t dex_pc, SlowPathCode* slow_path = nullptr); + // Check whether we have already recorded mapping at this PC. + bool HasStackMapAtCurrentPc(); bool CanMoveNullCheckToUser(HNullCheck* null_check); void MaybeRecordImplicitNullCheck(HInstruction* instruction); @@ -611,7 +613,7 @@ class CodeGenerator { ArenaVector<SlowPathCode*> slow_paths_; - // The current slow path that we're generating code for. + // The current slow-path that we're generating code for. SlowPathCode* current_slow_path_; // The current block index in `block_order_` of the block @@ -672,6 +674,122 @@ class CallingConvention { DISALLOW_COPY_AND_ASSIGN(CallingConvention); }; +/** + * A templated class SlowPathGenerator with a templated method NewSlowPath() + * that can be used by any code generator to share equivalent slow-paths with + * the objective of reducing generated code size. + * + * InstructionType: instruction that requires SlowPathCodeType + * SlowPathCodeType: subclass of SlowPathCode, with constructor SlowPathCodeType(InstructionType *) + */ +template <typename InstructionType> +class SlowPathGenerator { + static_assert(std::is_base_of<HInstruction, InstructionType>::value, + "InstructionType is not a subclass of art::HInstruction"); + + public: + SlowPathGenerator(HGraph* graph, CodeGenerator* codegen) + : graph_(graph), + codegen_(codegen), + slow_path_map_(std::less<uint32_t>(), graph->GetArena()->Adapter(kArenaAllocSlowPaths)) {} + + // Creates and adds a new slow-path, if needed, or returns existing one otherwise. + // Templating the method (rather than the whole class) on the slow-path type enables + // keeping this code at a generic, non architecture-specific place. + // + // NOTE: This approach assumes each InstructionType only generates one SlowPathCodeType. + // To relax this requirement, we would need some RTTI on the stored slow-paths, + // or template the class as a whole on SlowPathType. + template <typename SlowPathCodeType> + SlowPathCodeType* NewSlowPath(InstructionType* instruction) { + static_assert(std::is_base_of<SlowPathCode, SlowPathCodeType>::value, + "SlowPathCodeType is not a subclass of art::SlowPathCode"); + static_assert(std::is_constructible<SlowPathCodeType, InstructionType*>::value, + "SlowPathCodeType is not constructible from InstructionType*"); + // Iterate over potential candidates for sharing. Currently, only same-typed + // slow-paths with exactly the same dex-pc are viable candidates. + // TODO: pass dex-pc/slow-path-type to run-time to allow even more sharing? + const uint32_t dex_pc = instruction->GetDexPc(); + auto iter = slow_path_map_.find(dex_pc); + if (iter != slow_path_map_.end()) { + auto candidates = iter->second; + for (const auto& it : candidates) { + InstructionType* other_instruction = it.first; + SlowPathCodeType* other_slow_path = down_cast<SlowPathCodeType*>(it.second); + // Determine if the instructions allow for slow-path sharing. + if (HaveSameLiveRegisters(instruction, other_instruction) && + HaveSameStackMap(instruction, other_instruction)) { + // Can share: reuse existing one. + return other_slow_path; + } + } + } else { + // First time this dex-pc is seen. + iter = slow_path_map_.Put(dex_pc, {{}, {graph_->GetArena()->Adapter(kArenaAllocSlowPaths)}}); + } + // Cannot share: create and add new slow-path for this particular dex-pc. + SlowPathCodeType* slow_path = new (graph_->GetArena()) SlowPathCodeType(instruction); + iter->second.emplace_back(std::make_pair(instruction, slow_path)); + codegen_->AddSlowPath(slow_path); + return slow_path; + } + + private: + // Tests if both instructions have same set of live physical registers. This ensures + // the slow-path has exactly the same preamble on saving these registers to stack. + bool HaveSameLiveRegisters(const InstructionType* i1, const InstructionType* i2) const { + const uint32_t core_spill = ~codegen_->GetCoreSpillMask(); + const uint32_t fpu_spill = ~codegen_->GetFpuSpillMask(); + RegisterSet* live1 = i1->GetLocations()->GetLiveRegisters(); + RegisterSet* live2 = i2->GetLocations()->GetLiveRegisters(); + return (((live1->GetCoreRegisters() & core_spill) == + (live2->GetCoreRegisters() & core_spill)) && + ((live1->GetFloatingPointRegisters() & fpu_spill) == + (live2->GetFloatingPointRegisters() & fpu_spill))); + } + + // Tests if both instructions have the same stack map. This ensures the interpreter + // will find exactly the same dex-registers at the same entries. + bool HaveSameStackMap(const InstructionType* i1, const InstructionType* i2) const { + DCHECK(i1->HasEnvironment()); + DCHECK(i2->HasEnvironment()); + // We conservatively test if the two instructions find exactly the same instructions + // and location in each dex-register. This guarantees they will have the same stack map. + HEnvironment* e1 = i1->GetEnvironment(); + HEnvironment* e2 = i2->GetEnvironment(); + if (e1->GetParent() != e2->GetParent() || e1->Size() != e2->Size()) { + return false; + } + for (size_t i = 0, sz = e1->Size(); i < sz; ++i) { + if (e1->GetInstructionAt(i) != e2->GetInstructionAt(i) || + !e1->GetLocationAt(i).Equals(e2->GetLocationAt(i))) { + return false; + } + } + return true; + } + + HGraph* const graph_; + CodeGenerator* const codegen_; + + // Map from dex-pc to vector of already existing instruction/slow-path pairs. + ArenaSafeMap<uint32_t, ArenaVector<std::pair<InstructionType*, SlowPathCode*>>> slow_path_map_; + + DISALLOW_COPY_AND_ASSIGN(SlowPathGenerator); +}; + +class InstructionCodeGenerator : public HGraphVisitor { + public: + InstructionCodeGenerator(HGraph* graph, CodeGenerator* codegen) + : HGraphVisitor(graph), + deopt_slow_paths_(graph, codegen) {} + + protected: + // Add slow-path generator for each instruction/slow-path combination that desires sharing. + // TODO: under current regime, only deopt sharing make sense; extend later. + SlowPathGenerator<HDeoptimize> deopt_slow_paths_; +}; + } // namespace art #endif // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_ diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index 9fda83840c..45520b45bf 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -350,24 +350,24 @@ class TypeCheckSlowPathARM : public SlowPathCode { class DeoptimizationSlowPathARM : public SlowPathCode { public: - explicit DeoptimizationSlowPathARM(HInstruction* instruction) + explicit DeoptimizationSlowPathARM(HDeoptimize* instruction) : instruction_(instruction) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, instruction_->GetLocations()); - DCHECK(instruction_->IsDeoptimize()); - HDeoptimize* deoptimize = instruction_->AsDeoptimize(); - uint32_t dex_pc = deoptimize->GetDexPc(); - CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen); - arm_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize), instruction_, dex_pc, this); + arm_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize), + instruction_, + instruction_->GetDexPc(), + this); CheckEntrypointTypes<kQuickDeoptimize, void, void>(); } const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathARM"; } private: - HInstruction* const instruction_; + HDeoptimize* const instruction_; DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathARM); }; @@ -417,6 +417,56 @@ class ArraySetSlowPathARM : public SlowPathCode { DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathARM); }; +// Slow path marking an object during a read barrier. +class ReadBarrierMarkSlowPathARM : public SlowPathCode { + public: + ReadBarrierMarkSlowPathARM(HInstruction* instruction, Location out, Location obj) + : instruction_(instruction), out_(out), obj_(obj) { + DCHECK(kEmitCompilerReadBarrier); + } + + const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathARM"; } + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + LocationSummary* locations = instruction_->GetLocations(); + Register reg_out = out_.AsRegister<Register>(); + DCHECK(locations->CanCall()); + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out)); + DCHECK(instruction_->IsInstanceFieldGet() || + instruction_->IsStaticFieldGet() || + instruction_->IsArrayGet() || + instruction_->IsLoadClass() || + instruction_->IsLoadString() || + instruction_->IsInstanceOf() || + instruction_->IsCheckCast()) + << "Unexpected instruction in read barrier marking slow path: " + << instruction_->DebugName(); + + __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, locations); + + InvokeRuntimeCallingConvention calling_convention; + CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen); + arm_codegen->Move32(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), obj_); + arm_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierMark), + instruction_, + instruction_->GetDexPc(), + this); + CheckEntrypointTypes<kQuickReadBarrierMark, mirror::Object*, mirror::Object*>(); + arm_codegen->Move32(out_, Location::RegisterLocation(R0)); + + RestoreLiveRegisters(codegen, locations); + __ b(GetExitLabel()); + } + + private: + HInstruction* const instruction_; + const Location out_; + const Location obj_; + + DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathARM); +}; + // Slow path generating a read barrier for a heap reference. class ReadBarrierForHeapReferenceSlowPathARM : public SlowPathCode { public: @@ -438,7 +488,7 @@ class ReadBarrierForHeapReferenceSlowPathARM : public SlowPathCode { // to be instrumented, e.g.: // // __ LoadFromOffset(kLoadWord, out, out, offset); - // codegen_->GenerateReadBarrier(instruction, out_loc, out_loc, out_loc, offset); + // codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset); // // In that case, we have lost the information about the original // object, and the emitted read barrier cannot work properly. @@ -454,7 +504,9 @@ class ReadBarrierForHeapReferenceSlowPathARM : public SlowPathCode { DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out)); DCHECK(!instruction_->IsInvoke() || (instruction_->IsInvokeStaticOrDirect() && - instruction_->GetLocations()->Intrinsified())); + instruction_->GetLocations()->Intrinsified())) + << "Unexpected instruction in read barrier for heap reference slow path: " + << instruction_->DebugName(); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, locations); @@ -596,14 +648,18 @@ class ReadBarrierForHeapReferenceSlowPathARM : public SlowPathCode { class ReadBarrierForRootSlowPathARM : public SlowPathCode { public: ReadBarrierForRootSlowPathARM(HInstruction* instruction, Location out, Location root) - : instruction_(instruction), out_(out), root_(root) {} + : instruction_(instruction), out_(out), root_(root) { + DCHECK(kEmitCompilerReadBarrier); + } void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { LocationSummary* locations = instruction_->GetLocations(); Register reg_out = out_.AsRegister<Register>(); DCHECK(locations->CanCall()); DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out)); - DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString()); + DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString()) + << "Unexpected instruction in read barrier for GC root slow path: " + << instruction_->DebugName(); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, locations); @@ -857,7 +913,7 @@ void CodeGeneratorARM::UpdateBlockedPairRegisters() const { } InstructionCodeGeneratorARM::InstructionCodeGeneratorARM(HGraph* graph, CodeGeneratorARM* codegen) - : HGraphVisitor(graph), + : InstructionCodeGenerator(graph, codegen), assembler_(codegen->GetAssembler()), codegen_(codegen) {} @@ -1358,17 +1414,6 @@ void LocationsBuilderARM::VisitExit(HExit* exit) { void InstructionCodeGeneratorARM::VisitExit(HExit* exit ATTRIBUTE_UNUSED) { } -void InstructionCodeGeneratorARM::GenerateCompareWithImmediate(Register left, int32_t right) { - ShifterOperand operand; - if (GetAssembler()->ShifterOperandCanHold(R0, left, CMP, right, &operand)) { - __ cmp(left, operand); - } else { - Register temp = IP; - __ LoadImmediate(temp, right); - __ cmp(left, ShifterOperand(temp)); - } -} - void InstructionCodeGeneratorARM::GenerateFPJumps(HCondition* cond, Label* true_label, Label* false_label) { @@ -1434,7 +1479,7 @@ void InstructionCodeGeneratorARM::GenerateLongComparesAndJumps(HCondition* cond, int32_t val_low = Low32Bits(value); int32_t val_high = High32Bits(value); - GenerateCompareWithImmediate(left_high, val_high); + __ CmpConstant(left_high, val_high); if (if_cond == kCondNE) { __ b(true_label, ARMCondition(true_high_cond)); } else if (if_cond == kCondEQ) { @@ -1444,7 +1489,7 @@ void InstructionCodeGeneratorARM::GenerateLongComparesAndJumps(HCondition* cond, __ b(false_label, ARMCondition(false_high_cond)); } // Must be equal high, so compare the lows. - GenerateCompareWithImmediate(left_low, val_low); + __ CmpConstant(left_low, val_low); } else { Register right_high = right.AsRegisterPairHigh<Register>(); Register right_low = right.AsRegisterPairLow<Register>(); @@ -1568,7 +1613,7 @@ void InstructionCodeGeneratorARM::GenerateTestAndBranch(HInstruction* instructio __ cmp(left, ShifterOperand(right.AsRegister<Register>())); } else { DCHECK(right.IsConstant()); - GenerateCompareWithImmediate(left, CodeGenerator::GetInt32ValueOf(right.GetConstant())); + __ CmpConstant(left, CodeGenerator::GetInt32ValueOf(right.GetConstant())); } if (true_target == nullptr) { __ b(false_target, ARMCondition(condition->GetOppositeCondition())); @@ -1610,15 +1655,26 @@ void LocationsBuilderARM::VisitDeoptimize(HDeoptimize* deoptimize) { } void InstructionCodeGeneratorARM::VisitDeoptimize(HDeoptimize* deoptimize) { - SlowPathCode* slow_path = new (GetGraph()->GetArena()) DeoptimizationSlowPathARM(deoptimize); - codegen_->AddSlowPath(slow_path); + SlowPathCode* slow_path = deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathARM>(deoptimize); GenerateTestAndBranch(deoptimize, /* condition_input_index */ 0, slow_path->GetEntryLabel(), /* false_target */ nullptr); } -void LocationsBuilderARM::VisitCondition(HCondition* cond) { +void LocationsBuilderARM::VisitNativeDebugInfo(HNativeDebugInfo* info) { + new (GetGraph()->GetArena()) LocationSummary(info); +} + +void InstructionCodeGeneratorARM::VisitNativeDebugInfo(HNativeDebugInfo* info) { + if (codegen_->HasStackMapAtCurrentPc()) { + // Ensure that we do not collide with the stack map of the previous instruction. + __ nop(); + } + codegen_->RecordPcInfo(info, info->GetDexPc()); +} + +void LocationsBuilderARM::HandleCondition(HCondition* cond) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cond, LocationSummary::kNoCall); // Handle the long/FP comparisons made in instruction simplification. @@ -1649,7 +1705,7 @@ void LocationsBuilderARM::VisitCondition(HCondition* cond) { } } -void InstructionCodeGeneratorARM::VisitCondition(HCondition* cond) { +void InstructionCodeGeneratorARM::HandleCondition(HCondition* cond) { if (!cond->NeedsMaterialization()) { return; } @@ -1667,8 +1723,8 @@ void InstructionCodeGeneratorARM::VisitCondition(HCondition* cond) { __ cmp(left.AsRegister<Register>(), ShifterOperand(right.AsRegister<Register>())); } else { DCHECK(right.IsConstant()); - GenerateCompareWithImmediate(left.AsRegister<Register>(), - CodeGenerator::GetInt32ValueOf(right.GetConstant())); + __ CmpConstant(left.AsRegister<Register>(), + CodeGenerator::GetInt32ValueOf(right.GetConstant())); } __ it(ARMCondition(cond->GetCondition()), kItElse); __ mov(locations->Out().AsRegister<Register>(), ShifterOperand(1), @@ -1706,83 +1762,83 @@ void InstructionCodeGeneratorARM::VisitCondition(HCondition* cond) { } void LocationsBuilderARM::VisitEqual(HEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void InstructionCodeGeneratorARM::VisitEqual(HEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void LocationsBuilderARM::VisitNotEqual(HNotEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void InstructionCodeGeneratorARM::VisitNotEqual(HNotEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void LocationsBuilderARM::VisitLessThan(HLessThan* comp) { - VisitCondition(comp); + HandleCondition(comp); } void InstructionCodeGeneratorARM::VisitLessThan(HLessThan* comp) { - VisitCondition(comp); + HandleCondition(comp); } void LocationsBuilderARM::VisitLessThanOrEqual(HLessThanOrEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void InstructionCodeGeneratorARM::VisitLessThanOrEqual(HLessThanOrEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void LocationsBuilderARM::VisitGreaterThan(HGreaterThan* comp) { - VisitCondition(comp); + HandleCondition(comp); } void InstructionCodeGeneratorARM::VisitGreaterThan(HGreaterThan* comp) { - VisitCondition(comp); + HandleCondition(comp); } void LocationsBuilderARM::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void InstructionCodeGeneratorARM::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void LocationsBuilderARM::VisitBelow(HBelow* comp) { - VisitCondition(comp); + HandleCondition(comp); } void InstructionCodeGeneratorARM::VisitBelow(HBelow* comp) { - VisitCondition(comp); + HandleCondition(comp); } void LocationsBuilderARM::VisitBelowOrEqual(HBelowOrEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void InstructionCodeGeneratorARM::VisitBelowOrEqual(HBelowOrEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void LocationsBuilderARM::VisitAbove(HAbove* comp) { - VisitCondition(comp); + HandleCondition(comp); } void InstructionCodeGeneratorARM::VisitAbove(HAbove* comp) { - VisitCondition(comp); + HandleCondition(comp); } void LocationsBuilderARM::VisitAboveOrEqual(HAboveOrEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void InstructionCodeGeneratorARM::VisitAboveOrEqual(HAboveOrEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void LocationsBuilderARM::VisitLocal(HLocal* local) { @@ -1883,7 +1939,7 @@ void LocationsBuilderARM::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) { } void InstructionCodeGeneratorARM::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) { - GenerateMemoryBarrier(memory_barrier->GetBarrierKind()); + codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind()); } void LocationsBuilderARM::VisitReturnVoid(HReturnVoid* ret) { @@ -2838,8 +2894,7 @@ void InstructionCodeGeneratorARM::DivRemByPowerOfTwo(HBinaryOperation* instructi Register dividend = locations->InAt(0).AsRegister<Register>(); Register temp = locations->GetTemp(0).AsRegister<Register>(); int32_t imm = second.GetConstant()->AsIntConstant()->GetValue(); - uint32_t abs_imm = static_cast<uint32_t>(std::abs(imm)); - DCHECK(IsPowerOfTwo(abs_imm)); + uint32_t abs_imm = static_cast<uint32_t>(AbsOrMin(imm)); int ctz_imm = CTZ(abs_imm); if (ctz_imm == 1) { @@ -2915,7 +2970,7 @@ void InstructionCodeGeneratorARM::GenerateDivRemConstantIntegral(HBinaryOperatio // Do not generate anything. DivZeroCheck would prevent any code to be executed. } else if (imm == 1 || imm == -1) { DivRemOneOrMinusOne(instruction); - } else if (IsPowerOfTwo(std::abs(imm))) { + } else if (IsPowerOfTwo(AbsOrMin(imm))) { DivRemByPowerOfTwo(instruction); } else { DCHECK(imm <= -2 || imm >= 2); @@ -2944,12 +2999,12 @@ void LocationsBuilderARM::VisitDiv(HDiv* div) { locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::ConstantLocation(div->InputAt(1)->AsConstant())); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); - int32_t abs_imm = std::abs(div->InputAt(1)->AsIntConstant()->GetValue()); - if (abs_imm <= 1) { + int32_t value = div->InputAt(1)->AsIntConstant()->GetValue(); + if (value == 1 || value == 0 || value == -1) { // No temp register required. } else { locations->AddTemp(Location::RequiresRegister()); - if (!IsPowerOfTwo(abs_imm)) { + if (!IsPowerOfTwo(AbsOrMin(value))) { locations->AddTemp(Location::RequiresRegister()); } } @@ -3070,12 +3125,12 @@ void LocationsBuilderARM::VisitRem(HRem* rem) { locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::ConstantLocation(rem->InputAt(1)->AsConstant())); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); - int32_t abs_imm = std::abs(rem->InputAt(1)->AsIntConstant()->GetValue()); - if (abs_imm <= 1) { + int32_t value = rem->InputAt(1)->AsIntConstant()->GetValue(); + if (value == 1 || value == 0 || value == -1) { // No temp register required. } else { locations->AddTemp(Location::RequiresRegister()); - if (!IsPowerOfTwo(abs_imm)) { + if (!IsPowerOfTwo(AbsOrMin(value))) { locations->AddTemp(Location::RequiresRegister()); } } @@ -3429,7 +3484,7 @@ void InstructionCodeGeneratorARM::HandleShift(HBinaryOperation* op) { Register first_reg = first.AsRegister<Register>(); if (second.IsRegister()) { Register second_reg = second.AsRegister<Register>(); - // Arm doesn't mask the shift count so we need to do it ourselves. + // ARM doesn't mask the shift count so we need to do it ourselves. __ and_(out_reg, second_reg, ShifterOperand(kMaxIntShiftValue)); if (op->IsShl()) { __ Lsl(out_reg, first_reg, out_reg); @@ -3441,7 +3496,7 @@ void InstructionCodeGeneratorARM::HandleShift(HBinaryOperation* op) { } else { int32_t cst = second.GetConstant()->AsIntConstant()->GetValue(); uint32_t shift_value = static_cast<uint32_t>(cst & kMaxIntShiftValue); - if (shift_value == 0) { // arm does not support shifting with 0 immediate. + if (shift_value == 0) { // ARM does not support shifting with 0 immediate. __ Mov(out_reg, first_reg); } else if (op->IsShl()) { __ Lsl(out_reg, first_reg, shift_value); @@ -3788,9 +3843,9 @@ void InstructionCodeGeneratorARM::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) { LOG(FATAL) << "Unreachable"; } -void InstructionCodeGeneratorARM::GenerateMemoryBarrier(MemBarrierKind kind) { - // TODO (ported from quick): revisit Arm barrier kinds - DmbOptions flavor = DmbOptions::ISH; // quiet c++ warnings +void CodeGeneratorARM::GenerateMemoryBarrier(MemBarrierKind kind) { + // TODO (ported from quick): revisit ARM barrier kinds. + DmbOptions flavor = DmbOptions::ISH; // Quiet C++ warnings. switch (kind) { case MemBarrierKind::kAnyStore: case MemBarrierKind::kLoadAny: @@ -3871,11 +3926,11 @@ void LocationsBuilderARM::HandleFieldSet(HInstruction* instruction, const FieldI locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too. locations->AddTemp(Location::RequiresRegister()); } else if (generate_volatile) { - // Arm encoding have some additional constraints for ldrexd/strexd: + // ARM encoding have some additional constraints for ldrexd/strexd: // - registers need to be consecutive // - the first register should be even but not R14. - // We don't test for Arm yet, and the assertion makes sure that we revisit this if we ever - // enable Arm encoding. + // We don't test for ARM yet, and the assertion makes sure that we + // revisit this if we ever enable ARM encoding. DCHECK_EQ(InstructionSet::kThumb2, codegen_->GetInstructionSet()); locations->AddTemp(Location::RequiresRegister()); @@ -3905,7 +3960,7 @@ void InstructionCodeGeneratorARM::HandleFieldSet(HInstruction* instruction, CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1)); if (is_volatile) { - GenerateMemoryBarrier(MemBarrierKind::kAnyStore); + codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore); } switch (field_type) { @@ -3997,7 +4052,7 @@ void InstructionCodeGeneratorARM::HandleFieldSet(HInstruction* instruction, } if (is_volatile) { - GenerateMemoryBarrier(MemBarrierKind::kAnyAny); + codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny); } } @@ -4031,14 +4086,18 @@ void LocationsBuilderARM::HandleFieldGet(HInstruction* instruction, const FieldI (overlap ? Location::kOutputOverlap : Location::kNoOutputOverlap)); } if (volatile_for_double) { - // Arm encoding have some additional constraints for ldrexd/strexd: + // ARM encoding have some additional constraints for ldrexd/strexd: // - registers need to be consecutive // - the first register should be even but not R14. - // We don't test for Arm yet, and the assertion makes sure that we revisit this if we ever - // enable Arm encoding. + // We don't test for ARM yet, and the assertion makes sure that we + // revisit this if we ever enable ARM encoding. DCHECK_EQ(InstructionSet::kThumb2, codegen_->GetInstructionSet()); locations->AddTemp(Location::RequiresRegister()); locations->AddTemp(Location::RequiresRegister()); + } else if (object_field_get_with_read_barrier && kUseBakerReadBarrier) { + // We need a temporary register for the read barrier marking slow + // path in CodeGeneratorARM::GenerateFieldLoadWithBakerReadBarrier. + locations->AddTemp(Location::RequiresRegister()); } } @@ -4097,33 +4156,52 @@ void InstructionCodeGeneratorARM::HandleFieldGet(HInstruction* instruction, uint32_t offset = field_info.GetFieldOffset().Uint32Value(); switch (field_type) { - case Primitive::kPrimBoolean: { + case Primitive::kPrimBoolean: __ LoadFromOffset(kLoadUnsignedByte, out.AsRegister<Register>(), base, offset); break; - } - case Primitive::kPrimByte: { + case Primitive::kPrimByte: __ LoadFromOffset(kLoadSignedByte, out.AsRegister<Register>(), base, offset); break; - } - case Primitive::kPrimShort: { + case Primitive::kPrimShort: __ LoadFromOffset(kLoadSignedHalfword, out.AsRegister<Register>(), base, offset); break; - } - case Primitive::kPrimChar: { + case Primitive::kPrimChar: __ LoadFromOffset(kLoadUnsignedHalfword, out.AsRegister<Register>(), base, offset); break; - } case Primitive::kPrimInt: - case Primitive::kPrimNot: { __ LoadFromOffset(kLoadWord, out.AsRegister<Register>(), base, offset); break; + + case Primitive::kPrimNot: { + // /* HeapReference<Object> */ out = *(base + offset) + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + Location temp_loc = locations->GetTemp(0); + // Note that a potential implicit null check is handled in this + // CodeGeneratorARM::GenerateFieldLoadWithBakerReadBarrier call. + codegen_->GenerateFieldLoadWithBakerReadBarrier( + instruction, out, base, offset, temp_loc, /* needs_null_check */ true); + if (is_volatile) { + codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); + } + } else { + __ LoadFromOffset(kLoadWord, out.AsRegister<Register>(), base, offset); + codegen_->MaybeRecordImplicitNullCheck(instruction); + if (is_volatile) { + codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); + } + // If read barriers are enabled, emit read barriers other than + // Baker's using a slow path (and also unpoison the loaded + // reference, if heap poisoning is enabled). + codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, base_loc, offset); + } + break; } - case Primitive::kPrimLong: { + case Primitive::kPrimLong: if (is_volatile && !atomic_ldrd_strd) { GenerateWideAtomicLoad(base, offset, out.AsRegisterPairLow<Register>(), @@ -4132,12 +4210,10 @@ void InstructionCodeGeneratorARM::HandleFieldGet(HInstruction* instruction, __ LoadFromOffset(kLoadWordPair, out.AsRegisterPairLow<Register>(), base, offset); } break; - } - case Primitive::kPrimFloat: { + case Primitive::kPrimFloat: __ LoadSFromOffset(out.AsFpuRegister<SRegister>(), base, offset); break; - } case Primitive::kPrimDouble: { DRegister out_reg = FromLowSToD(out.AsFpuRegisterPairLow<SRegister>()); @@ -4159,17 +4235,20 @@ void InstructionCodeGeneratorARM::HandleFieldGet(HInstruction* instruction, UNREACHABLE(); } - // Doubles are handled in the switch. - if (field_type != Primitive::kPrimDouble) { + if (field_type == Primitive::kPrimNot || field_type == Primitive::kPrimDouble) { + // Potential implicit null checks, in the case of reference or + // double fields, are handled in the previous switch statement. + } else { codegen_->MaybeRecordImplicitNullCheck(instruction); } if (is_volatile) { - GenerateMemoryBarrier(MemBarrierKind::kLoadAny); - } - - if (field_type == Primitive::kPrimNot) { - codegen_->MaybeGenerateReadBarrier(instruction, out, out, base_loc, offset); + if (field_type == Primitive::kPrimNot) { + // Memory barriers, in the case of references, are also handled + // in the previous switch statement. + } else { + codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); + } } } @@ -4332,6 +4411,11 @@ void LocationsBuilderARM::VisitArrayGet(HArrayGet* instruction) { Location::RequiresRegister(), object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap); } + // We need a temporary register for the read barrier marking slow + // path in CodeGeneratorARM::GenerateArrayLoadWithBakerReadBarrier. + if (object_array_get_with_read_barrier && kUseBakerReadBarrier) { + locations->AddTemp(Location::RequiresRegister()); + } } void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) { @@ -4339,12 +4423,13 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) { Location obj_loc = locations->InAt(0); Register obj = obj_loc.AsRegister<Register>(); Location index = locations->InAt(1); - Primitive::Type type = instruction->GetType(); + Location out_loc = locations->Out(); + Primitive::Type type = instruction->GetType(); switch (type) { case Primitive::kPrimBoolean: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value(); - Register out = locations->Out().AsRegister<Register>(); + Register out = out_loc.AsRegister<Register>(); if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset; @@ -4358,7 +4443,7 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) { case Primitive::kPrimByte: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(int8_t)).Uint32Value(); - Register out = locations->Out().AsRegister<Register>(); + Register out = out_loc.AsRegister<Register>(); if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset; @@ -4372,7 +4457,7 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) { case Primitive::kPrimShort: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(int16_t)).Uint32Value(); - Register out = locations->Out().AsRegister<Register>(); + Register out = out_loc.AsRegister<Register>(); if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset; @@ -4386,7 +4471,7 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) { case Primitive::kPrimChar: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value(); - Register out = locations->Out().AsRegister<Register>(); + Register out = out_loc.AsRegister<Register>(); if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset; @@ -4398,13 +4483,9 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) { break; } - case Primitive::kPrimInt: - case Primitive::kPrimNot: { - static_assert( - sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), - "art::mirror::HeapReference<mirror::Object> and int32_t have different sizes."); + case Primitive::kPrimInt: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); - Register out = locations->Out().AsRegister<Register>(); + Register out = out_loc.AsRegister<Register>(); if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; @@ -4416,44 +4497,79 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) { break; } + case Primitive::kPrimNot: { + static_assert( + sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); + uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); + // /* HeapReference<Object> */ out = + // *(obj + data_offset + index * sizeof(HeapReference<Object>)) + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + Location temp = locations->GetTemp(0); + // Note that a potential implicit null check is handled in this + // CodeGeneratorARM::GenerateArrayLoadWithBakerReadBarrier call. + codegen_->GenerateArrayLoadWithBakerReadBarrier( + instruction, out_loc, obj, data_offset, index, temp, /* needs_null_check */ true); + } else { + Register out = out_loc.AsRegister<Register>(); + if (index.IsConstant()) { + size_t offset = + (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; + __ LoadFromOffset(kLoadWord, out, obj, offset); + codegen_->MaybeRecordImplicitNullCheck(instruction); + // If read barriers are enabled, emit read barriers other than + // Baker's using a slow path (and also unpoison the loaded + // reference, if heap poisoning is enabled). + codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset); + } else { + __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4)); + __ LoadFromOffset(kLoadWord, out, IP, data_offset); + codegen_->MaybeRecordImplicitNullCheck(instruction); + // If read barriers are enabled, emit read barriers other than + // Baker's using a slow path (and also unpoison the loaded + // reference, if heap poisoning is enabled). + codegen_->MaybeGenerateReadBarrierSlow( + instruction, out_loc, out_loc, obj_loc, data_offset, index); + } + } + break; + } + case Primitive::kPrimLong: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value(); - Location out = locations->Out(); if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset; - __ LoadFromOffset(kLoadWordPair, out.AsRegisterPairLow<Register>(), obj, offset); + __ LoadFromOffset(kLoadWordPair, out_loc.AsRegisterPairLow<Register>(), obj, offset); } else { __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_8)); - __ LoadFromOffset(kLoadWordPair, out.AsRegisterPairLow<Register>(), IP, data_offset); + __ LoadFromOffset(kLoadWordPair, out_loc.AsRegisterPairLow<Register>(), IP, data_offset); } break; } case Primitive::kPrimFloat: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value(); - Location out = locations->Out(); - DCHECK(out.IsFpuRegister()); + SRegister out = out_loc.AsFpuRegister<SRegister>(); if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; - __ LoadSFromOffset(out.AsFpuRegister<SRegister>(), obj, offset); + __ LoadSFromOffset(out, obj, offset); } else { __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4)); - __ LoadSFromOffset(out.AsFpuRegister<SRegister>(), IP, data_offset); + __ LoadSFromOffset(out, IP, data_offset); } break; } case Primitive::kPrimDouble: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value(); - Location out = locations->Out(); - DCHECK(out.IsFpuRegisterPair()); + SRegister out = out_loc.AsFpuRegisterPairLow<SRegister>(); if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset; - __ LoadDFromOffset(FromLowSToD(out.AsFpuRegisterPairLow<SRegister>()), obj, offset); + __ LoadDFromOffset(FromLowSToD(out), obj, offset); } else { __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_8)); - __ LoadDFromOffset(FromLowSToD(out.AsFpuRegisterPairLow<SRegister>()), IP, data_offset); + __ LoadDFromOffset(FromLowSToD(out), IP, data_offset); } break; } @@ -4462,20 +4578,12 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) { LOG(FATAL) << "Unreachable type " << type; UNREACHABLE(); } - codegen_->MaybeRecordImplicitNullCheck(instruction); if (type == Primitive::kPrimNot) { - static_assert( - sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), - "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); - uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); - Location out = locations->Out(); - if (index.IsConstant()) { - uint32_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; - codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, offset); - } else { - codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, data_offset, index); - } + // Potential implicit null checks, in the case of reference + // arrays, are handled in the previous switch statement. + } else { + codegen_->MaybeRecordImplicitNullCheck(instruction); } } @@ -4566,6 +4674,7 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) { __ add(IP, array, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4)); __ StoreToOffset(kStoreWord, source, IP, data_offset); } + codegen_->MaybeRecordImplicitNullCheck(instruction); DCHECK(!needs_write_barrier); DCHECK(!may_need_runtime_call_for_type_check); break; @@ -4607,12 +4716,12 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) { // __ Mov(temp2, temp1); // // /* HeapReference<Class> */ temp1 = temp1->component_type_ // __ LoadFromOffset(kLoadWord, temp1, temp1, component_offset); - // codegen_->GenerateReadBarrier( + // codegen_->GenerateReadBarrierSlow( // instruction, temp1_loc, temp1_loc, temp2_loc, component_offset); // // // /* HeapReference<Class> */ temp2 = value->klass_ // __ LoadFromOffset(kLoadWord, temp2, value, class_offset); - // codegen_->GenerateReadBarrier( + // codegen_->GenerateReadBarrierSlow( // instruction, temp2_loc, temp2_loc, value_loc, class_offset, temp1_loc); // // __ cmp(temp1, ShifterOperand(temp2)); @@ -4709,8 +4818,6 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) { __ add(IP, array, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4)); __ StoreToOffset(kStoreWord, value, IP, data_offset); } - - codegen_->MaybeRecordImplicitNullCheck(instruction); break; } @@ -4762,8 +4869,8 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) { UNREACHABLE(); } - // Ints and objects are handled in the switch. - if (value_type != Primitive::kPrimInt && value_type != Primitive::kPrimNot) { + // Objects are handled in the switch. + if (value_type != Primitive::kPrimNot) { codegen_->MaybeRecordImplicitNullCheck(instruction); } } @@ -5132,16 +5239,9 @@ void InstructionCodeGeneratorARM::VisitLoadClass(HLoadClass* cls) { if (cls->IsReferrersClass()) { DCHECK(!cls->CanCallRuntime()); DCHECK(!cls->MustGenerateClinitCheck()); - uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value(); - if (kEmitCompilerReadBarrier) { - // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_) - __ AddConstant(out, current_method, declaring_class_offset); - // /* mirror::Class* */ out = out->Read() - codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc); - } else { - // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ - __ LoadFromOffset(kLoadWord, out, current_method, declaring_class_offset); - } + // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ + GenerateGcRootFieldLoad( + cls, out_loc, current_method, ArtMethod::DeclaringClassOffset().Int32Value()); } else { // /* GcRoot<mirror::Class>[] */ out = // current_method.ptr_sized_fields_->dex_cache_resolved_types_ @@ -5149,17 +5249,8 @@ void InstructionCodeGeneratorARM::VisitLoadClass(HLoadClass* cls) { out, current_method, ArtMethod::DexCacheResolvedTypesOffset(kArmPointerSize).Int32Value()); - - size_t cache_offset = CodeGenerator::GetCacheOffset(cls->GetTypeIndex()); - if (kEmitCompilerReadBarrier) { - // /* GcRoot<mirror::Class>* */ out = &out[type_index] - __ AddConstant(out, out, cache_offset); - // /* mirror::Class* */ out = out->Read() - codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc); - } else { - // /* GcRoot<mirror::Class> */ out = out[type_index] - __ LoadFromOffset(kLoadWord, out, out, cache_offset); - } + // /* GcRoot<mirror::Class> */ out = out[type_index] + GenerateGcRootFieldLoad(cls, out_loc, out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex())); if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) { DCHECK(cls->CanCallRuntime()); @@ -5222,30 +5313,14 @@ void InstructionCodeGeneratorARM::VisitLoadString(HLoadString* load) { Register out = out_loc.AsRegister<Register>(); Register current_method = locations->InAt(0).AsRegister<Register>(); - uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value(); - if (kEmitCompilerReadBarrier) { - // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_) - __ AddConstant(out, current_method, declaring_class_offset); - // /* mirror::Class* */ out = out->Read() - codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc); - } else { - // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ - __ LoadFromOffset(kLoadWord, out, current_method, declaring_class_offset); - } - + // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ + GenerateGcRootFieldLoad( + load, out_loc, current_method, ArtMethod::DeclaringClassOffset().Int32Value()); // /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_ __ LoadFromOffset(kLoadWord, out, out, mirror::Class::DexCacheStringsOffset().Int32Value()); - - size_t cache_offset = CodeGenerator::GetCacheOffset(load->GetStringIndex()); - if (kEmitCompilerReadBarrier) { - // /* GcRoot<mirror::String>* */ out = &out[string_index] - __ AddConstant(out, out, cache_offset); - // /* mirror::String* */ out = out->Read() - codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc); - } else { - // /* GcRoot<mirror::String> */ out = out[string_index] - __ LoadFromOffset(kLoadWord, out, out, cache_offset); - } + // /* GcRoot<mirror::String> */ out = out[string_index] + GenerateGcRootFieldLoad( + load, out_loc, out, CodeGenerator::GetCacheOffset(load->GetStringIndex())); if (!load->IsInDexCache()) { SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathARM(load); @@ -5292,6 +5367,14 @@ void InstructionCodeGeneratorARM::VisitThrow(HThrow* instruction) { CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>(); } +static bool TypeCheckNeedsATemporary(TypeCheckKind type_check_kind) { + return kEmitCompilerReadBarrier && + (kUseBakerReadBarrier || + type_check_kind == TypeCheckKind::kAbstractClassCheck || + type_check_kind == TypeCheckKind::kClassHierarchyCheck || + type_check_kind == TypeCheckKind::kArrayObjectCheck); +} + void LocationsBuilderARM::VisitInstanceOf(HInstanceOf* instruction) { LocationSummary::CallKind call_kind = LocationSummary::kNoCall; TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); @@ -5318,21 +5401,22 @@ void LocationsBuilderARM::VisitInstanceOf(HInstanceOf* instruction) { locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); // When read barriers are enabled, we need a temporary register for // some cases. - if (kEmitCompilerReadBarrier && - (type_check_kind == TypeCheckKind::kAbstractClassCheck || - type_check_kind == TypeCheckKind::kClassHierarchyCheck || - type_check_kind == TypeCheckKind::kArrayObjectCheck)) { + if (TypeCheckNeedsATemporary(type_check_kind)) { locations->AddTemp(Location::RequiresRegister()); } } void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) { + TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); LocationSummary* locations = instruction->GetLocations(); Location obj_loc = locations->InAt(0); Register obj = obj_loc.AsRegister<Register>(); Register cls = locations->InAt(1).AsRegister<Register>(); Location out_loc = locations->Out(); Register out = out_loc.AsRegister<Register>(); + Location temp_loc = TypeCheckNeedsATemporary(type_check_kind) ? + locations->GetTemp(0) : + Location::NoLocation(); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); @@ -5347,10 +5431,9 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) { } // /* HeapReference<Class> */ out = obj->klass_ - __ LoadFromOffset(kLoadWord, out, obj, class_offset); - codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, obj_loc, class_offset); + GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, temp_loc); - switch (instruction->GetTypeCheckKind()) { + switch (type_check_kind) { case TypeCheckKind::kExactCheck: { __ cmp(out, ShifterOperand(cls)); // Classes must be equal for the instanceof to succeed. @@ -5365,17 +5448,8 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) { // object to avoid doing a comparison we know will fail. Label loop; __ Bind(&loop); - Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation(); - if (kEmitCompilerReadBarrier) { - // Save the value of `out` into `temp` before overwriting it - // in the following move operation, as we will need it for the - // read barrier below. - Register temp = temp_loc.AsRegister<Register>(); - __ Mov(temp, out); - } // /* HeapReference<Class> */ out = out->super_class_ - __ LoadFromOffset(kLoadWord, out, out, super_offset); - codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset); + GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, temp_loc); // If `out` is null, we use it for the result, and jump to `done`. __ CompareAndBranchIfZero(out, &done); __ cmp(out, ShifterOperand(cls)); @@ -5393,17 +5467,8 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) { __ Bind(&loop); __ cmp(out, ShifterOperand(cls)); __ b(&success, EQ); - Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation(); - if (kEmitCompilerReadBarrier) { - // Save the value of `out` into `temp` before overwriting it - // in the following move operation, as we will need it for the - // read barrier below. - Register temp = temp_loc.AsRegister<Register>(); - __ Mov(temp, out); - } // /* HeapReference<Class> */ out = out->super_class_ - __ LoadFromOffset(kLoadWord, out, out, super_offset); - codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset); + GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, temp_loc); __ CompareAndBranchIfNonZero(out, &loop); // If `out` is null, we use it for the result, and jump to `done`. __ b(&done); @@ -5421,17 +5486,8 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) { __ cmp(out, ShifterOperand(cls)); __ b(&exact_check, EQ); // Otherwise, we need to check that the object's class is a non-primitive array. - Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation(); - if (kEmitCompilerReadBarrier) { - // Save the value of `out` into `temp` before overwriting it - // in the following move operation, as we will need it for the - // read barrier below. - Register temp = temp_loc.AsRegister<Register>(); - __ Mov(temp, out); - } // /* HeapReference<Class> */ out = out->component_type_ - __ LoadFromOffset(kLoadWord, out, out, component_offset); - codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, component_offset); + GenerateReferenceLoadOneRegister(instruction, out_loc, component_offset, temp_loc); // If `out` is null, we use it for the result, and jump to `done`. __ CompareAndBranchIfZero(out, &done); __ LoadFromOffset(kLoadUnsignedHalfword, out, out, primitive_offset); @@ -5470,6 +5526,13 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) { // HInstanceOf instruction (following the runtime calling // convention), which might be cluttered by the potential first // read barrier emission at the beginning of this method. + // + // TODO: Introduce a new runtime entry point taking the object + // to test (instead of its class) as argument, and let it deal + // with the read barrier issues. This will let us refactor this + // case of the `switch` code as it was previously (with a direct + // call to the runtime not using a type checking slow path). + // This should also be beneficial for the other cases above. DCHECK(locations->OnlyCallsOnSlowPath()); slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM(instruction, /* is_fatal */ false); @@ -5524,27 +5587,27 @@ void LocationsBuilderARM::VisitCheckCast(HCheckCast* instruction) { locations->AddTemp(Location::RequiresRegister()); // When read barriers are enabled, we need an additional temporary // register for some cases. - if (kEmitCompilerReadBarrier && - (type_check_kind == TypeCheckKind::kAbstractClassCheck || - type_check_kind == TypeCheckKind::kClassHierarchyCheck || - type_check_kind == TypeCheckKind::kArrayObjectCheck)) { + if (TypeCheckNeedsATemporary(type_check_kind)) { locations->AddTemp(Location::RequiresRegister()); } } void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) { + TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); LocationSummary* locations = instruction->GetLocations(); Location obj_loc = locations->InAt(0); Register obj = obj_loc.AsRegister<Register>(); Register cls = locations->InAt(1).AsRegister<Register>(); Location temp_loc = locations->GetTemp(0); Register temp = temp_loc.AsRegister<Register>(); + Location temp2_loc = TypeCheckNeedsATemporary(type_check_kind) ? + locations->GetTemp(1) : + Location::NoLocation(); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); - TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); bool is_type_check_slow_path_fatal = (type_check_kind == TypeCheckKind::kExactCheck || type_check_kind == TypeCheckKind::kAbstractClassCheck || @@ -5563,8 +5626,7 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) { } // /* HeapReference<Class> */ temp = obj->klass_ - __ LoadFromOffset(kLoadWord, temp, obj, class_offset); - codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc); switch (type_check_kind) { case TypeCheckKind::kExactCheck: @@ -5581,18 +5643,8 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) { // object to avoid doing a comparison we know will fail. Label loop, compare_classes; __ Bind(&loop); - Location temp2_loc = - kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation(); - if (kEmitCompilerReadBarrier) { - // Save the value of `temp` into `temp2` before overwriting it - // in the following move operation, as we will need it for the - // read barrier below. - Register temp2 = temp2_loc.AsRegister<Register>(); - __ Mov(temp2, temp); - } // /* HeapReference<Class> */ temp = temp->super_class_ - __ LoadFromOffset(kLoadWord, temp, temp, super_offset); - codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset); + GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, temp2_loc); // If the class reference currently in `temp` is not null, jump // to the `compare_classes` label to compare it with the checked @@ -5604,8 +5656,7 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) { // going into the slow path, as it has been overwritten in the // meantime. // /* HeapReference<Class> */ temp = obj->klass_ - __ LoadFromOffset(kLoadWord, temp, obj, class_offset); - codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc); __ b(type_check_slow_path->GetEntryLabel()); __ Bind(&compare_classes); @@ -5621,18 +5672,8 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) { __ cmp(temp, ShifterOperand(cls)); __ b(&done, EQ); - Location temp2_loc = - kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation(); - if (kEmitCompilerReadBarrier) { - // Save the value of `temp` into `temp2` before overwriting it - // in the following move operation, as we will need it for the - // read barrier below. - Register temp2 = temp2_loc.AsRegister<Register>(); - __ Mov(temp2, temp); - } // /* HeapReference<Class> */ temp = temp->super_class_ - __ LoadFromOffset(kLoadWord, temp, temp, super_offset); - codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset); + GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, temp2_loc); // If the class reference currently in `temp` is not null, jump // back at the beginning of the loop. @@ -5643,8 +5684,7 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) { // going into the slow path, as it has been overwritten in the // meantime. // /* HeapReference<Class> */ temp = obj->klass_ - __ LoadFromOffset(kLoadWord, temp, obj, class_offset); - codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc); __ b(type_check_slow_path->GetEntryLabel()); break; } @@ -5656,19 +5696,8 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) { __ b(&done, EQ); // Otherwise, we need to check that the object's class is a non-primitive array. - Location temp2_loc = - kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation(); - if (kEmitCompilerReadBarrier) { - // Save the value of `temp` into `temp2` before overwriting it - // in the following move operation, as we will need it for the - // read barrier below. - Register temp2 = temp2_loc.AsRegister<Register>(); - __ Mov(temp2, temp); - } // /* HeapReference<Class> */ temp = temp->component_type_ - __ LoadFromOffset(kLoadWord, temp, temp, component_offset); - codegen_->MaybeGenerateReadBarrier( - instruction, temp_loc, temp_loc, temp2_loc, component_offset); + GenerateReferenceLoadOneRegister(instruction, temp_loc, component_offset, temp2_loc); // If the component type is not null (i.e. the object is indeed // an array), jump to label `check_non_primitive_component_type` @@ -5681,8 +5710,7 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) { // going into the slow path, as it has been overwritten in the // meantime. // /* HeapReference<Class> */ temp = obj->klass_ - __ LoadFromOffset(kLoadWord, temp, obj, class_offset); - codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc); __ b(type_check_slow_path->GetEntryLabel()); __ Bind(&check_non_primitive_component_type); @@ -5691,8 +5719,7 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) { __ CompareAndBranchIfZero(temp, &done); // Same comment as above regarding `temp` and the slow path. // /* HeapReference<Class> */ temp = obj->klass_ - __ LoadFromOffset(kLoadWord, temp, obj, class_offset); - codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset); + GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, temp2_loc); __ b(type_check_slow_path->GetEntryLabel()); break; } @@ -5709,6 +5736,13 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) { // instruction (following the runtime calling convention), which // might be cluttered by the potential first read barrier // emission at the beginning of this method. + // + // TODO: Introduce a new runtime entry point taking the object + // to test (instead of its class) as argument, and let it deal + // with the read barrier issues. This will let us refactor this + // case of the `switch` code as it was previously (with a direct + // call to the runtime not using a type checking slow path). + // This should also be beneficial for the other cases above. __ b(type_check_slow_path->GetEntryLabel()); break; } @@ -5893,14 +5927,249 @@ void InstructionCodeGeneratorARM::HandleBitwiseOperation(HBinaryOperation* instr } } -void CodeGeneratorARM::GenerateReadBarrier(HInstruction* instruction, - Location out, - Location ref, - Location obj, - uint32_t offset, - Location index) { +void InstructionCodeGeneratorARM::GenerateReferenceLoadOneRegister(HInstruction* instruction, + Location out, + uint32_t offset, + Location temp) { + Register out_reg = out.AsRegister<Register>(); + if (kEmitCompilerReadBarrier) { + if (kUseBakerReadBarrier) { + // Load with fast path based Baker's read barrier. + // /* HeapReference<Object> */ out = *(out + offset) + codegen_->GenerateFieldLoadWithBakerReadBarrier( + instruction, out, out_reg, offset, temp, /* needs_null_check */ false); + } else { + // Load with slow path based read barrier. + // Save the value of `out` into `temp` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + __ Mov(temp.AsRegister<Register>(), out_reg); + // /* HeapReference<Object> */ out = *(out + offset) + __ LoadFromOffset(kLoadWord, out_reg, out_reg, offset); + codegen_->GenerateReadBarrierSlow(instruction, out, out, temp, offset); + } + } else { + // Plain load with no read barrier. + // /* HeapReference<Object> */ out = *(out + offset) + __ LoadFromOffset(kLoadWord, out_reg, out_reg, offset); + __ MaybeUnpoisonHeapReference(out_reg); + } +} + +void InstructionCodeGeneratorARM::GenerateReferenceLoadTwoRegisters(HInstruction* instruction, + Location out, + Location obj, + uint32_t offset, + Location temp) { + Register out_reg = out.AsRegister<Register>(); + Register obj_reg = obj.AsRegister<Register>(); + if (kEmitCompilerReadBarrier) { + if (kUseBakerReadBarrier) { + // Load with fast path based Baker's read barrier. + // /* HeapReference<Object> */ out = *(obj + offset) + codegen_->GenerateFieldLoadWithBakerReadBarrier( + instruction, out, obj_reg, offset, temp, /* needs_null_check */ false); + } else { + // Load with slow path based read barrier. + // /* HeapReference<Object> */ out = *(obj + offset) + __ LoadFromOffset(kLoadWord, out_reg, obj_reg, offset); + codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset); + } + } else { + // Plain load with no read barrier. + // /* HeapReference<Object> */ out = *(obj + offset) + __ LoadFromOffset(kLoadWord, out_reg, obj_reg, offset); + __ MaybeUnpoisonHeapReference(out_reg); + } +} + +void InstructionCodeGeneratorARM::GenerateGcRootFieldLoad(HInstruction* instruction, + Location root, + Register obj, + uint32_t offset) { + Register root_reg = root.AsRegister<Register>(); + if (kEmitCompilerReadBarrier) { + if (kUseBakerReadBarrier) { + // Fast path implementation of art::ReadBarrier::BarrierForRoot when + // Baker's read barrier are used: + // + // root = obj.field; + // if (Thread::Current()->GetIsGcMarking()) { + // root = ReadBarrier::Mark(root) + // } + + // /* GcRoot<mirror::Object> */ root = *(obj + offset) + __ LoadFromOffset(kLoadWord, root_reg, obj, offset); + static_assert( + sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>), + "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> " + "have different sizes."); + static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::CompressedReference<mirror::Object> and int32_t " + "have different sizes."); + + // Slow path used to mark the GC root `root`. + SlowPathCode* slow_path = + new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM(instruction, root, root); + codegen_->AddSlowPath(slow_path); + + __ LoadFromOffset( + kLoadWord, IP, TR, Thread::IsGcMarkingOffset<kArmWordSize>().Int32Value()); + __ CompareAndBranchIfNonZero(IP, slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); + } else { + // GC root loaded through a slow path for read barriers other + // than Baker's. + // /* GcRoot<mirror::Object>* */ root = obj + offset + __ AddConstant(root_reg, obj, offset); + // /* mirror::Object* */ root = root->Read() + codegen_->GenerateReadBarrierForRootSlow(instruction, root, root); + } + } else { + // Plain GC root load with no read barrier. + // /* GcRoot<mirror::Object> */ root = *(obj + offset) + __ LoadFromOffset(kLoadWord, root_reg, obj, offset); + // Note that GC roots are not affected by heap poisoning, thus we + // do not have to unpoison `root_reg` here. + } +} + +void CodeGeneratorARM::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, + Location ref, + Register obj, + uint32_t offset, + Location temp, + bool needs_null_check) { DCHECK(kEmitCompilerReadBarrier); + DCHECK(kUseBakerReadBarrier); + + // /* HeapReference<Object> */ ref = *(obj + offset) + Location no_index = Location::NoLocation(); + GenerateReferenceLoadWithBakerReadBarrier( + instruction, ref, obj, offset, no_index, temp, needs_null_check); +} +void CodeGeneratorARM::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction, + Location ref, + Register obj, + uint32_t data_offset, + Location index, + Location temp, + bool needs_null_check) { + DCHECK(kEmitCompilerReadBarrier); + DCHECK(kUseBakerReadBarrier); + + // /* HeapReference<Object> */ ref = + // *(obj + data_offset + index * sizeof(HeapReference<Object>)) + GenerateReferenceLoadWithBakerReadBarrier( + instruction, ref, obj, data_offset, index, temp, needs_null_check); +} + +void CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, + Location ref, + Register obj, + uint32_t offset, + Location index, + Location temp, + bool needs_null_check) { + DCHECK(kEmitCompilerReadBarrier); + DCHECK(kUseBakerReadBarrier); + + // In slow path based read barriers, the read barrier call is + // inserted after the original load. However, in fast path based + // Baker's read barriers, we need to perform the load of + // mirror::Object::monitor_ *before* the original reference load. + // This load-load ordering is required by the read barrier. + // The fast path/slow path (for Baker's algorithm) should look like: + // + // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); + // lfence; // Load fence or artificial data dependency to prevent load-load reordering + // HeapReference<Object> ref = *src; // Original reference load. + // bool is_gray = (rb_state == ReadBarrier::gray_ptr_); + // if (is_gray) { + // ref = ReadBarrier::Mark(ref); // Performed by runtime entrypoint slow path. + // } + // + // Note: the original implementation in ReadBarrier::Barrier is + // slightly more complex as: + // - it implements the load-load fence using a data dependency on + // the high-bits of rb_state, which are expected to be all zeroes; + // - it performs additional checks that we do not do here for + // performance reasons. + + Register ref_reg = ref.AsRegister<Register>(); + Register temp_reg = temp.AsRegister<Register>(); + uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value(); + + // /* int32_t */ monitor = obj->monitor_ + __ LoadFromOffset(kLoadWord, temp_reg, obj, monitor_offset); + if (needs_null_check) { + MaybeRecordImplicitNullCheck(instruction); + } + // /* LockWord */ lock_word = LockWord(monitor) + static_assert(sizeof(LockWord) == sizeof(int32_t), + "art::LockWord and int32_t have different sizes."); + // /* uint32_t */ rb_state = lock_word.ReadBarrierState() + __ Lsr(temp_reg, temp_reg, LockWord::kReadBarrierStateShift); + __ and_(temp_reg, temp_reg, ShifterOperand(LockWord::kReadBarrierStateMask)); + static_assert( + LockWord::kReadBarrierStateMask == ReadBarrier::rb_ptr_mask_, + "art::LockWord::kReadBarrierStateMask is not equal to art::ReadBarrier::rb_ptr_mask_."); + + // Introduce a dependency on the high bits of rb_state, which shall + // be all zeroes, to prevent load-load reordering, and without using + // a memory barrier (which would be more expensive). + // IP = rb_state & ~LockWord::kReadBarrierStateMask = 0 + __ bic(IP, temp_reg, ShifterOperand(LockWord::kReadBarrierStateMask)); + // obj is unchanged by this operation, but its value now depends on + // IP, which depends on temp_reg. + __ add(obj, obj, ShifterOperand(IP)); + + // The actual reference load. + if (index.IsValid()) { + static_assert( + sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); + // /* HeapReference<Object> */ ref = + // *(obj + offset + index * sizeof(HeapReference<Object>)) + if (index.IsConstant()) { + size_t computed_offset = + (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + offset; + __ LoadFromOffset(kLoadWord, ref_reg, obj, computed_offset); + } else { + __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4)); + __ LoadFromOffset(kLoadWord, ref_reg, IP, offset); + } + } else { + // /* HeapReference<Object> */ ref = *(obj + offset) + __ LoadFromOffset(kLoadWord, ref_reg, obj, offset); + } + + // Object* ref = ref_addr->AsMirrorPtr() + __ MaybeUnpoisonHeapReference(ref_reg); + + // Slow path used to mark the object `ref` when it is gray. + SlowPathCode* slow_path = + new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM(instruction, ref, ref); + AddSlowPath(slow_path); + + // if (rb_state == ReadBarrier::gray_ptr_) + // ref = ReadBarrier::Mark(ref); + __ cmp(temp_reg, ShifterOperand(ReadBarrier::gray_ptr_)); + __ b(slow_path->GetEntryLabel(), EQ); + __ Bind(slow_path->GetExitLabel()); +} + +void CodeGeneratorARM::GenerateReadBarrierSlow(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index) { + DCHECK(kEmitCompilerReadBarrier); + + // Insert a slow path based read barrier *after* the reference load. + // // If heap poisoning is enabled, the unpoisoning of the loaded // reference will be carried out by the runtime within the slow // path. @@ -5914,57 +6183,41 @@ void CodeGeneratorARM::GenerateReadBarrier(HInstruction* instruction, ReadBarrierForHeapReferenceSlowPathARM(instruction, out, ref, obj, offset, index); AddSlowPath(slow_path); - // TODO: When read barrier has a fast path, add it here. - /* Currently the read barrier call is inserted after the original load. - * However, if we have a fast path, we need to perform the load of obj.LockWord *before* the - * original load. This load-load ordering is required by the read barrier. - * The fast path/slow path (for Baker's algorithm) should look like: - * - * bool isGray = obj.LockWord & kReadBarrierMask; - * lfence; // load fence or artificial data dependence to prevent load-load reordering - * ref = obj.field; // this is the original load - * if (isGray) { - * ref = Mark(ref); // ideally the slow path just does Mark(ref) - * } - */ - __ b(slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); } -void CodeGeneratorARM::MaybeGenerateReadBarrier(HInstruction* instruction, - Location out, - Location ref, - Location obj, - uint32_t offset, - Location index) { +void CodeGeneratorARM::MaybeGenerateReadBarrierSlow(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index) { if (kEmitCompilerReadBarrier) { + // Baker's read barriers shall be handled by the fast path + // (CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier). + DCHECK(!kUseBakerReadBarrier); // If heap poisoning is enabled, unpoisoning will be taken care of // by the runtime within the slow path. - GenerateReadBarrier(instruction, out, ref, obj, offset, index); + GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index); } else if (kPoisonHeapReferences) { __ UnpoisonHeapReference(out.AsRegister<Register>()); } } -void CodeGeneratorARM::GenerateReadBarrierForRoot(HInstruction* instruction, - Location out, - Location root) { +void CodeGeneratorARM::GenerateReadBarrierForRootSlow(HInstruction* instruction, + Location out, + Location root) { DCHECK(kEmitCompilerReadBarrier); + // Insert a slow path based read barrier *after* the GC root load. + // // Note that GC roots are not affected by heap poisoning, so we do // not need to do anything special for this here. SlowPathCode* slow_path = new (GetGraph()->GetArena()) ReadBarrierForRootSlowPathARM(instruction, out, root); AddSlowPath(slow_path); - // TODO: Implement a fast path for ReadBarrierForRoot, performing - // the following operation (for Baker's algorithm): - // - // if (thread.tls32_.is_gc_marking) { - // root = Mark(root); - // } - __ b(slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); } @@ -6288,7 +6541,7 @@ void InstructionCodeGeneratorARM::VisitPackedSwitch(HPackedSwitch* switch_instr) } if (num_entries - last_index == 2) { // The last missing case_value. - GenerateCompareWithImmediate(temp_reg, 1); + __ CmpConstant(temp_reg, 1); __ b(codegen_->GetLabelOf(successors[last_index + 1]), EQ); } @@ -6348,7 +6601,7 @@ void InstructionCodeGeneratorARM::VisitArmDexCacheArraysBase(HArmDexCacheArraysB void CodeGeneratorARM::MoveFromReturnRegister(Location trg, Primitive::Type type) { if (!trg.IsValid()) { - DCHECK(type == Primitive::kPrimVoid); + DCHECK_EQ(type, Primitive::kPrimVoid); return; } diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h index 8193c2808c..26d6d63b31 100644 --- a/compiler/optimizing/code_generator_arm.h +++ b/compiler/optimizing/code_generator_arm.h @@ -170,6 +170,7 @@ class LocationsBuilderARM : public HGraphVisitor { private: void HandleInvoke(HInvoke* invoke); void HandleBitwiseOperation(HBinaryOperation* operation, Opcode opcode); + void HandleCondition(HCondition* condition); void HandleIntegerRotate(LocationSummary* locations); void HandleLongRotate(LocationSummary* locations); void HandleRotate(HRor* ror); @@ -187,7 +188,7 @@ class LocationsBuilderARM : public HGraphVisitor { DISALLOW_COPY_AND_ASSIGN(LocationsBuilderARM); }; -class InstructionCodeGeneratorARM : public HGraphVisitor { +class InstructionCodeGeneratorARM : public InstructionCodeGenerator { public: InstructionCodeGeneratorARM(HGraph* graph, CodeGeneratorARM* codegen); @@ -216,28 +217,62 @@ class InstructionCodeGeneratorARM : public HGraphVisitor { void GenerateOrrConst(Register out, Register first, uint32_t value); void GenerateEorConst(Register out, Register first, uint32_t value); void HandleBitwiseOperation(HBinaryOperation* operation); + void HandleCondition(HCondition* condition); void HandleIntegerRotate(LocationSummary* locations); void HandleLongRotate(LocationSummary* locations); void HandleRotate(HRor* ror); void HandleShift(HBinaryOperation* operation); - void GenerateMemoryBarrier(MemBarrierKind kind); + void GenerateWideAtomicStore(Register addr, uint32_t offset, Register value_lo, Register value_hi, Register temp1, Register temp2, HInstruction* instruction); void GenerateWideAtomicLoad(Register addr, uint32_t offset, Register out_lo, Register out_hi); + void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info, bool value_can_be_null); void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); + + // Generate a heap reference load using one register `out`: + // + // out <- *(out + offset) + // + // while honoring heap poisoning and/or read barriers (if any). + // Register `temp` is used when generating a read barrier. + void GenerateReferenceLoadOneRegister(HInstruction* instruction, + Location out, + uint32_t offset, + Location temp); + // Generate a heap reference load using two different registers + // `out` and `obj`: + // + // out <- *(obj + offset) + // + // while honoring heap poisoning and/or read barriers (if any). + // Register `temp` is used when generating a Baker's read barrier. + void GenerateReferenceLoadTwoRegisters(HInstruction* instruction, + Location out, + Location obj, + uint32_t offset, + Location temp); + // Generate a GC root reference load: + // + // root <- *(obj + offset) + // + // while honoring read barriers (if any). + void GenerateGcRootFieldLoad(HInstruction* instruction, + Location root, + Register obj, + uint32_t offset); + void GenerateImplicitNullCheck(HNullCheck* instruction); void GenerateExplicitNullCheck(HNullCheck* instruction); void GenerateTestAndBranch(HInstruction* instruction, size_t condition_input_index, Label* true_target, Label* false_target); - void GenerateCompareWithImmediate(Register left, int32_t right); void GenerateCompareTestAndBranch(HCondition* condition, Label* true_target, Label* false_target); @@ -344,6 +379,8 @@ class CodeGeneratorARM : public CodeGenerator { // Emit a write barrier. void MarkGCCard(Register temp, Register card, Register object, Register value, bool can_be_null); + void GenerateMemoryBarrier(MemBarrierKind kind); + Label* GetLabelOf(HBasicBlock* block) const { return CommonGetLabelOf<Label>(block_labels_, block); } @@ -404,7 +441,26 @@ class CodeGeneratorARM : public CodeGenerator { return &it->second; } - // Generate a read barrier for a heap reference within `instruction`. + // Fast path implementation of ReadBarrier::Barrier for a heap + // reference field load when Baker's read barriers are used. + void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, + Location out, + Register obj, + uint32_t offset, + Location temp, + bool needs_null_check); + // Fast path implementation of ReadBarrier::Barrier for a heap + // reference array load when Baker's read barriers are used. + void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction, + Location out, + Register obj, + uint32_t data_offset, + Location index, + Location temp, + bool needs_null_check); + + // Generate a read barrier for a heap reference within `instruction` + // using a slow path. // // A read barrier for an object reference read from the heap is // implemented as a call to the artReadBarrierSlow runtime entry @@ -421,23 +477,25 @@ class CodeGeneratorARM : public CodeGenerator { // When `index` is provided (i.e. for array accesses), the offset // value passed to artReadBarrierSlow is adjusted to take `index` // into account. - void GenerateReadBarrier(HInstruction* instruction, - Location out, - Location ref, - Location obj, - uint32_t offset, - Location index = Location::NoLocation()); - - // If read barriers are enabled, generate a read barrier for a heap reference. - // If heap poisoning is enabled, also unpoison the reference in `out`. - void MaybeGenerateReadBarrier(HInstruction* instruction, - Location out, - Location ref, - Location obj, - uint32_t offset, - Location index = Location::NoLocation()); - - // Generate a read barrier for a GC root within `instruction`. + void GenerateReadBarrierSlow(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index = Location::NoLocation()); + + // If read barriers are enabled, generate a read barrier for a heap + // reference using a slow path. If heap poisoning is enabled, also + // unpoison the reference in `out`. + void MaybeGenerateReadBarrierSlow(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index = Location::NoLocation()); + + // Generate a read barrier for a GC root within `instruction` using + // a slow path. // // A read barrier for an object reference GC root is implemented as // a call to the artReadBarrierForRootSlow runtime entry point, @@ -447,9 +505,19 @@ class CodeGeneratorARM : public CodeGenerator { // // The `out` location contains the value returned by // artReadBarrierForRootSlow. - void GenerateReadBarrierForRoot(HInstruction* instruction, Location out, Location root); + void GenerateReadBarrierForRootSlow(HInstruction* instruction, Location out, Location root); private: + // Factored implementation of GenerateFieldLoadWithBakerReadBarrier + // and GenerateArrayLoadWithBakerReadBarrier. + void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, + Location ref, + Register obj, + uint32_t offset, + Location index, + Location temp, + bool needs_null_check); + Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, Register temp); using MethodToLiteralMap = ArenaSafeMap<MethodReference, Literal*, MethodReferenceComparator>; diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 52058302be..a3150d3d22 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -477,24 +477,24 @@ class TypeCheckSlowPathARM64 : public SlowPathCodeARM64 { class DeoptimizationSlowPathARM64 : public SlowPathCodeARM64 { public: - explicit DeoptimizationSlowPathARM64(HInstruction* instruction) + explicit DeoptimizationSlowPathARM64(HDeoptimize* instruction) : instruction_(instruction) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, instruction_->GetLocations()); - DCHECK(instruction_->IsDeoptimize()); - HDeoptimize* deoptimize = instruction_->AsDeoptimize(); - uint32_t dex_pc = deoptimize->GetDexPc(); - CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); - arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize), instruction_, dex_pc, this); + arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize), + instruction_, + instruction_->GetDexPc(), + this); CheckEntrypointTypes<kQuickDeoptimize, void, void>(); } const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathARM64"; } private: - HInstruction* const instruction_; + HDeoptimize* const instruction_; DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathARM64); }; @@ -1605,7 +1605,7 @@ void InstructionCodeGeneratorARM64::GenerateSuspendCheck(HSuspendCheck* instruct InstructionCodeGeneratorARM64::InstructionCodeGeneratorARM64(HGraph* graph, CodeGeneratorARM64* codegen) - : HGraphVisitor(graph), + : InstructionCodeGenerator(graph, codegen), assembler_(codegen->GetAssembler()), codegen_(codegen) {} @@ -2427,7 +2427,7 @@ void InstructionCodeGeneratorARM64::VisitCompare(HCompare* compare) { } } -void LocationsBuilderARM64::VisitCondition(HCondition* instruction) { +void LocationsBuilderARM64::HandleCondition(HCondition* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); if (Primitive::IsFloatingPointType(instruction->InputAt(0)->GetType())) { @@ -2447,7 +2447,7 @@ void LocationsBuilderARM64::VisitCondition(HCondition* instruction) { } } -void InstructionCodeGeneratorARM64::VisitCondition(HCondition* instruction) { +void InstructionCodeGeneratorARM64::HandleCondition(HCondition* instruction) { if (!instruction->NeedsMaterialization()) { return; } @@ -2495,8 +2495,8 @@ void InstructionCodeGeneratorARM64::VisitCondition(HCondition* instruction) { M(Above) \ M(AboveOrEqual) #define DEFINE_CONDITION_VISITORS(Name) \ -void LocationsBuilderARM64::Visit##Name(H##Name* comp) { VisitCondition(comp); } \ -void InstructionCodeGeneratorARM64::Visit##Name(H##Name* comp) { VisitCondition(comp); } +void LocationsBuilderARM64::Visit##Name(H##Name* comp) { HandleCondition(comp); } \ +void InstructionCodeGeneratorARM64::Visit##Name(H##Name* comp) { HandleCondition(comp); } FOR_EACH_CONDITION_INSTRUCTION(DEFINE_CONDITION_VISITORS) #undef DEFINE_CONDITION_VISITORS #undef FOR_EACH_CONDITION_INSTRUCTION @@ -2534,8 +2534,7 @@ void InstructionCodeGeneratorARM64::DivRemByPowerOfTwo(HBinaryOperation* instruc Register out = OutputRegister(instruction); Register dividend = InputRegisterAt(instruction, 0); int64_t imm = Int64FromConstant(second.GetConstant()); - uint64_t abs_imm = static_cast<uint64_t>(std::abs(imm)); - DCHECK(IsPowerOfTwo(abs_imm)); + uint64_t abs_imm = static_cast<uint64_t>(AbsOrMin(imm)); int ctz_imm = CTZ(abs_imm); UseScratchRegisterScope temps(GetVIXLAssembler()); @@ -2627,7 +2626,7 @@ void InstructionCodeGeneratorARM64::GenerateDivRemIntegral(HBinaryOperation* ins // Do not generate anything. DivZeroCheck would prevent any code to be executed. } else if (imm == 1 || imm == -1) { DivRemOneOrMinusOne(instruction); - } else if (IsPowerOfTwo(std::abs(imm))) { + } else if (IsPowerOfTwo(AbsOrMin(imm))) { DivRemByPowerOfTwo(instruction); } else { DCHECK(imm <= -2 || imm >= 2); @@ -2940,15 +2939,26 @@ void LocationsBuilderARM64::VisitDeoptimize(HDeoptimize* deoptimize) { } void InstructionCodeGeneratorARM64::VisitDeoptimize(HDeoptimize* deoptimize) { - SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) - DeoptimizationSlowPathARM64(deoptimize); - codegen_->AddSlowPath(slow_path); + SlowPathCodeARM64* slow_path = + deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathARM64>(deoptimize); GenerateTestAndBranch(deoptimize, /* condition_input_index */ 0, slow_path->GetEntryLabel(), /* false_target */ nullptr); } +void LocationsBuilderARM64::VisitNativeDebugInfo(HNativeDebugInfo* info) { + new (GetGraph()->GetArena()) LocationSummary(info); +} + +void InstructionCodeGeneratorARM64::VisitNativeDebugInfo(HNativeDebugInfo* info) { + if (codegen_->HasStackMapAtCurrentPc()) { + // Ensure that we do not collide with the stack map of the previous instruction. + __ Nop(); + } + codegen_->RecordPcInfo(info, info->GetDexPc()); +} + void LocationsBuilderARM64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { HandleFieldGet(instruction); } diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index 7950f078ad..f2ff89488e 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -186,7 +186,7 @@ class FieldAccessCallingConventionARM64 : public FieldAccessCallingConvention { DISALLOW_COPY_AND_ASSIGN(FieldAccessCallingConventionARM64); }; -class InstructionCodeGeneratorARM64 : public HGraphVisitor { +class InstructionCodeGeneratorARM64 : public InstructionCodeGenerator { public: InstructionCodeGeneratorARM64(HGraph* graph, CodeGeneratorARM64* codegen); @@ -215,6 +215,7 @@ class InstructionCodeGeneratorARM64 : public HGraphVisitor { const FieldInfo& field_info, bool value_can_be_null); void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); + void HandleCondition(HCondition* instruction); void HandleShift(HBinaryOperation* instr); void GenerateImplicitNullCheck(HNullCheck* instruction); void GenerateExplicitNullCheck(HNullCheck* instruction); @@ -257,6 +258,7 @@ class LocationsBuilderARM64 : public HGraphVisitor { void HandleFieldSet(HInstruction* instruction); void HandleFieldGet(HInstruction* instruction); void HandleInvoke(HInvoke* instr); + void HandleCondition(HCondition* instruction); void HandleShift(HBinaryOperation* instr); CodeGeneratorARM64* const codegen_; diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc index ae0f2c8935..322912976e 100644 --- a/compiler/optimizing/code_generator_mips.cc +++ b/compiler/optimizing/code_generator_mips.cc @@ -444,19 +444,16 @@ class TypeCheckSlowPathMIPS : public SlowPathCodeMIPS { class DeoptimizationSlowPathMIPS : public SlowPathCodeMIPS { public: - explicit DeoptimizationSlowPathMIPS(HInstruction* instruction) + explicit DeoptimizationSlowPathMIPS(HDeoptimize* instruction) : instruction_(instruction) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, instruction_->GetLocations()); - DCHECK(instruction_->IsDeoptimize()); - HDeoptimize* deoptimize = instruction_->AsDeoptimize(); - uint32_t dex_pc = deoptimize->GetDexPc(); - CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen); mips_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize), instruction_, - dex_pc, + instruction_->GetDexPc(), this, IsDirectEntrypoint(kQuickDeoptimize)); CheckEntrypointTypes<kQuickDeoptimize, void, void>(); @@ -465,7 +462,7 @@ class DeoptimizationSlowPathMIPS : public SlowPathCodeMIPS { const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathMIPS"; } private: - HInstruction* const instruction_; + HDeoptimize* const instruction_; DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathMIPS); }; @@ -608,9 +605,9 @@ void ParallelMoveResolverMIPS::EmitSwap(size_t index) { // then swap the high 32 bits of the same FPR. mtc1 makes the high 32 bits of an FPR // unpredictable and the following mfch1 will fail. __ Mfc1(TMP, f1); - __ Mfhc1(AT, f1); + __ MoveFromFpuHigh(AT, f1); __ Mtc1(r2_l, f1); - __ Mthc1(r2_h, f1); + __ MoveToFpuHigh(r2_h, f1); __ Move(r2_l, TMP); __ Move(r2_h, AT); } else if (loc1.IsStackSlot() && loc2.IsStackSlot()) { @@ -862,7 +859,7 @@ void CodeGeneratorMIPS::Move64(Location destination, Location source) { Register dst_low = destination.AsRegisterPairLow<Register>(); FRegister src = source.AsFpuRegister<FRegister>(); __ Mfc1(dst_low, src); - __ Mfhc1(dst_high, src); + __ MoveFromFpuHigh(dst_high, src); } else { DCHECK(source.IsDoubleStackSlot()) << "Cannot move from " << source << " to " << destination; int32_t off = source.GetStackIndex(); @@ -875,7 +872,7 @@ void CodeGeneratorMIPS::Move64(Location destination, Location source) { Register src_high = source.AsRegisterPairHigh<Register>(); Register src_low = source.AsRegisterPairLow<Register>(); __ Mtc1(src_low, dst); - __ Mthc1(src_high, dst); + __ MoveToFpuHigh(src_high, dst); } else if (source.IsFpuRegister()) { __ MovD(destination.AsFpuRegister<FRegister>(), source.AsFpuRegister<FRegister>()); } else { @@ -1191,17 +1188,16 @@ void CodeGeneratorMIPS::InvokeRuntime(int32_t entry_point_offset, uint32_t dex_pc, SlowPathCode* slow_path, bool is_direct_entrypoint) { + __ LoadFromOffset(kLoadWord, T9, TR, entry_point_offset); + __ Jalr(T9); if (is_direct_entrypoint) { // Reserve argument space on stack (for $a0-$a3) for // entrypoints that directly reference native implementations. // Called function may use this space to store $a0-$a3 regs. - __ IncreaseFrameSize(kMipsDirectEntrypointRuntimeOffset); - } - __ LoadFromOffset(kLoadWord, T9, TR, entry_point_offset); - __ Jalr(T9); - __ Nop(); - if (is_direct_entrypoint) { + __ IncreaseFrameSize(kMipsDirectEntrypointRuntimeOffset); // Single instruction in delay slot. __ DecreaseFrameSize(kMipsDirectEntrypointRuntimeOffset); + } else { + __ Nop(); // In delay slot. } RecordPcInfo(instruction, dex_pc, slow_path); } @@ -1242,7 +1238,7 @@ void InstructionCodeGeneratorMIPS::GenerateSuspendCheck(HSuspendCheck* instructi InstructionCodeGeneratorMIPS::InstructionCodeGeneratorMIPS(HGraph* graph, CodeGeneratorMIPS* codegen) - : HGraphVisitor(graph), + : InstructionCodeGenerator(graph, codegen), assembler_(codegen->GetAssembler()), codegen_(codegen) {} @@ -1275,15 +1271,9 @@ void LocationsBuilderMIPS::HandleBinaryOp(HBinaryOperation* instruction) { } case Primitive::kPrimLong: { - // TODO: can 2nd param be const? locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - if (instruction->IsAdd() || instruction->IsSub()) { - locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); - } else { - DCHECK(instruction->IsAnd() || instruction->IsOr() || instruction->IsXor()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); - } + locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); break; } @@ -1350,34 +1340,142 @@ void InstructionCodeGeneratorMIPS::HandleBinaryOp(HBinaryOperation* instruction) } case Primitive::kPrimLong: { - // TODO: can 2nd param be const? Register dst_high = locations->Out().AsRegisterPairHigh<Register>(); Register dst_low = locations->Out().AsRegisterPairLow<Register>(); Register lhs_high = locations->InAt(0).AsRegisterPairHigh<Register>(); Register lhs_low = locations->InAt(0).AsRegisterPairLow<Register>(); - Register rhs_high = locations->InAt(1).AsRegisterPairHigh<Register>(); - Register rhs_low = locations->InAt(1).AsRegisterPairLow<Register>(); - - if (instruction->IsAnd()) { - __ And(dst_low, lhs_low, rhs_low); - __ And(dst_high, lhs_high, rhs_high); - } else if (instruction->IsOr()) { - __ Or(dst_low, lhs_low, rhs_low); - __ Or(dst_high, lhs_high, rhs_high); - } else if (instruction->IsXor()) { - __ Xor(dst_low, lhs_low, rhs_low); - __ Xor(dst_high, lhs_high, rhs_high); - } else if (instruction->IsAdd()) { - __ Addu(dst_low, lhs_low, rhs_low); - __ Sltu(TMP, dst_low, lhs_low); - __ Addu(dst_high, lhs_high, rhs_high); - __ Addu(dst_high, dst_high, TMP); + Location rhs_location = locations->InAt(1); + bool use_imm = rhs_location.IsConstant(); + if (!use_imm) { + Register rhs_high = rhs_location.AsRegisterPairHigh<Register>(); + Register rhs_low = rhs_location.AsRegisterPairLow<Register>(); + if (instruction->IsAnd()) { + __ And(dst_low, lhs_low, rhs_low); + __ And(dst_high, lhs_high, rhs_high); + } else if (instruction->IsOr()) { + __ Or(dst_low, lhs_low, rhs_low); + __ Or(dst_high, lhs_high, rhs_high); + } else if (instruction->IsXor()) { + __ Xor(dst_low, lhs_low, rhs_low); + __ Xor(dst_high, lhs_high, rhs_high); + } else if (instruction->IsAdd()) { + if (lhs_low == rhs_low) { + // Special case for lhs = rhs and the sum potentially overwriting both lhs and rhs. + __ Slt(TMP, lhs_low, ZERO); + __ Addu(dst_low, lhs_low, rhs_low); + } else { + __ Addu(dst_low, lhs_low, rhs_low); + // If the sum overwrites rhs, lhs remains unchanged, otherwise rhs remains unchanged. + __ Sltu(TMP, dst_low, (dst_low == rhs_low) ? lhs_low : rhs_low); + } + __ Addu(dst_high, lhs_high, rhs_high); + __ Addu(dst_high, dst_high, TMP); + } else { + DCHECK(instruction->IsSub()); + __ Sltu(TMP, lhs_low, rhs_low); + __ Subu(dst_low, lhs_low, rhs_low); + __ Subu(dst_high, lhs_high, rhs_high); + __ Subu(dst_high, dst_high, TMP); + } } else { - DCHECK(instruction->IsSub()); - __ Subu(dst_low, lhs_low, rhs_low); - __ Sltu(TMP, lhs_low, dst_low); - __ Subu(dst_high, lhs_high, rhs_high); - __ Subu(dst_high, dst_high, TMP); + int64_t value = CodeGenerator::GetInt64ValueOf(rhs_location.GetConstant()->AsConstant()); + if (instruction->IsOr()) { + uint32_t low = Low32Bits(value); + uint32_t high = High32Bits(value); + if (IsUint<16>(low)) { + if (dst_low != lhs_low || low != 0) { + __ Ori(dst_low, lhs_low, low); + } + } else { + __ LoadConst32(TMP, low); + __ Or(dst_low, lhs_low, TMP); + } + if (IsUint<16>(high)) { + if (dst_high != lhs_high || high != 0) { + __ Ori(dst_high, lhs_high, high); + } + } else { + if (high != low) { + __ LoadConst32(TMP, high); + } + __ Or(dst_high, lhs_high, TMP); + } + } else if (instruction->IsXor()) { + uint32_t low = Low32Bits(value); + uint32_t high = High32Bits(value); + if (IsUint<16>(low)) { + if (dst_low != lhs_low || low != 0) { + __ Xori(dst_low, lhs_low, low); + } + } else { + __ LoadConst32(TMP, low); + __ Xor(dst_low, lhs_low, TMP); + } + if (IsUint<16>(high)) { + if (dst_high != lhs_high || high != 0) { + __ Xori(dst_high, lhs_high, high); + } + } else { + if (high != low) { + __ LoadConst32(TMP, high); + } + __ Xor(dst_high, lhs_high, TMP); + } + } else if (instruction->IsAnd()) { + uint32_t low = Low32Bits(value); + uint32_t high = High32Bits(value); + if (IsUint<16>(low)) { + __ Andi(dst_low, lhs_low, low); + } else if (low != 0xFFFFFFFF) { + __ LoadConst32(TMP, low); + __ And(dst_low, lhs_low, TMP); + } else if (dst_low != lhs_low) { + __ Move(dst_low, lhs_low); + } + if (IsUint<16>(high)) { + __ Andi(dst_high, lhs_high, high); + } else if (high != 0xFFFFFFFF) { + if (high != low) { + __ LoadConst32(TMP, high); + } + __ And(dst_high, lhs_high, TMP); + } else if (dst_high != lhs_high) { + __ Move(dst_high, lhs_high); + } + } else { + if (instruction->IsSub()) { + value = -value; + } else { + DCHECK(instruction->IsAdd()); + } + int32_t low = Low32Bits(value); + int32_t high = High32Bits(value); + if (IsInt<16>(low)) { + if (dst_low != lhs_low || low != 0) { + __ Addiu(dst_low, lhs_low, low); + } + if (low != 0) { + __ Sltiu(AT, dst_low, low); + } + } else { + __ LoadConst32(TMP, low); + __ Addu(dst_low, lhs_low, TMP); + __ Sltu(AT, dst_low, TMP); + } + if (IsInt<16>(high)) { + if (dst_high != lhs_high || high != 0) { + __ Addiu(dst_high, lhs_high, high); + } + } else { + if (high != low) { + __ LoadConst32(TMP, high); + } + __ Addu(dst_high, lhs_high, TMP); + } + if (low != 0) { + __ Addu(dst_high, dst_high, AT); + } + } } break; } @@ -1410,18 +1508,21 @@ void InstructionCodeGeneratorMIPS::HandleBinaryOp(HBinaryOperation* instruction) } void LocationsBuilderMIPS::HandleShift(HBinaryOperation* instr) { - DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr()); + DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr() || instr->IsRor()); LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr); Primitive::Type type = instr->GetResultType(); switch (type) { case Primitive::kPrimInt: - case Primitive::kPrimLong: { + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(instr->InputAt(1))); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + break; + case Primitive::kPrimLong: locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(instr->InputAt(1))); locations->SetOut(Location::RequiresRegister()); break; - } default: LOG(FATAL) << "Unexpected shift type " << type; } @@ -1430,7 +1531,7 @@ void LocationsBuilderMIPS::HandleShift(HBinaryOperation* instr) { static constexpr size_t kMipsBitsPerWord = kMipsWordSize * kBitsPerByte; void InstructionCodeGeneratorMIPS::HandleShift(HBinaryOperation* instr) { - DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr()); + DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr() || instr->IsRor()); LocationSummary* locations = instr->GetLocations(); Primitive::Type type = instr->GetType(); @@ -1438,28 +1539,58 @@ void InstructionCodeGeneratorMIPS::HandleShift(HBinaryOperation* instr) { bool use_imm = rhs_location.IsConstant(); Register rhs_reg = use_imm ? ZERO : rhs_location.AsRegister<Register>(); int64_t rhs_imm = use_imm ? CodeGenerator::GetInt64ValueOf(rhs_location.GetConstant()) : 0; - uint32_t shift_mask = (type == Primitive::kPrimInt) ? kMaxIntShiftValue : kMaxLongShiftValue; - uint32_t shift_value = rhs_imm & shift_mask; + const uint32_t shift_mask = (type == Primitive::kPrimInt) + ? kMaxIntShiftValue + : kMaxLongShiftValue; + const uint32_t shift_value = rhs_imm & shift_mask; + // Are the INS (Insert Bit Field) and ROTR instructions supported? + bool has_ins_rotr = codegen_->GetInstructionSetFeatures().IsMipsIsaRevGreaterThanEqual2(); switch (type) { case Primitive::kPrimInt: { Register dst = locations->Out().AsRegister<Register>(); Register lhs = locations->InAt(0).AsRegister<Register>(); if (use_imm) { - if (instr->IsShl()) { + if (shift_value == 0) { + if (dst != lhs) { + __ Move(dst, lhs); + } + } else if (instr->IsShl()) { __ Sll(dst, lhs, shift_value); } else if (instr->IsShr()) { __ Sra(dst, lhs, shift_value); - } else { + } else if (instr->IsUShr()) { __ Srl(dst, lhs, shift_value); + } else { + if (has_ins_rotr) { + __ Rotr(dst, lhs, shift_value); + } else { + __ Sll(TMP, lhs, (kMipsBitsPerWord - shift_value) & shift_mask); + __ Srl(dst, lhs, shift_value); + __ Or(dst, dst, TMP); + } } } else { if (instr->IsShl()) { __ Sllv(dst, lhs, rhs_reg); } else if (instr->IsShr()) { __ Srav(dst, lhs, rhs_reg); - } else { + } else if (instr->IsUShr()) { __ Srlv(dst, lhs, rhs_reg); + } else { + if (has_ins_rotr) { + __ Rotrv(dst, lhs, rhs_reg); + } else { + __ Subu(TMP, ZERO, rhs_reg); + // 32-bit shift instructions use the 5 least significant bits of the shift count, so + // shifting by `-rhs_reg` is equivalent to shifting by `(32 - rhs_reg) & 31`. The case + // when `rhs_reg & 31 == 0` is OK even though we don't shift `lhs` left all the way out + // by 32, because the result in this case is computed as `(lhs >> 0) | (lhs << 0)`, + // IOW, the OR'd values are equal. + __ Sllv(TMP, lhs, TMP); + __ Srlv(dst, lhs, rhs_reg); + __ Or(dst, dst, TMP); + } } } break; @@ -1474,33 +1605,81 @@ void InstructionCodeGeneratorMIPS::HandleShift(HBinaryOperation* instr) { if (shift_value == 0) { codegen_->Move64(locations->Out(), locations->InAt(0)); } else if (shift_value < kMipsBitsPerWord) { - if (instr->IsShl()) { - __ Sll(dst_low, lhs_low, shift_value); - __ Srl(TMP, lhs_low, kMipsBitsPerWord - shift_value); - __ Sll(dst_high, lhs_high, shift_value); - __ Or(dst_high, dst_high, TMP); - } else if (instr->IsShr()) { - __ Sra(dst_high, lhs_high, shift_value); - __ Sll(TMP, lhs_high, kMipsBitsPerWord - shift_value); - __ Srl(dst_low, lhs_low, shift_value); - __ Or(dst_low, dst_low, TMP); + if (has_ins_rotr) { + if (instr->IsShl()) { + __ Srl(dst_high, lhs_low, kMipsBitsPerWord - shift_value); + __ Ins(dst_high, lhs_high, shift_value, kMipsBitsPerWord - shift_value); + __ Sll(dst_low, lhs_low, shift_value); + } else if (instr->IsShr()) { + __ Srl(dst_low, lhs_low, shift_value); + __ Ins(dst_low, lhs_high, kMipsBitsPerWord - shift_value, shift_value); + __ Sra(dst_high, lhs_high, shift_value); + } else if (instr->IsUShr()) { + __ Srl(dst_low, lhs_low, shift_value); + __ Ins(dst_low, lhs_high, kMipsBitsPerWord - shift_value, shift_value); + __ Srl(dst_high, lhs_high, shift_value); + } else { + __ Srl(dst_low, lhs_low, shift_value); + __ Ins(dst_low, lhs_high, kMipsBitsPerWord - shift_value, shift_value); + __ Srl(dst_high, lhs_high, shift_value); + __ Ins(dst_high, lhs_low, kMipsBitsPerWord - shift_value, shift_value); + } } else { - __ Srl(dst_high, lhs_high, shift_value); - __ Sll(TMP, lhs_high, kMipsBitsPerWord - shift_value); - __ Srl(dst_low, lhs_low, shift_value); - __ Or(dst_low, dst_low, TMP); + if (instr->IsShl()) { + __ Sll(dst_low, lhs_low, shift_value); + __ Srl(TMP, lhs_low, kMipsBitsPerWord - shift_value); + __ Sll(dst_high, lhs_high, shift_value); + __ Or(dst_high, dst_high, TMP); + } else if (instr->IsShr()) { + __ Sra(dst_high, lhs_high, shift_value); + __ Sll(TMP, lhs_high, kMipsBitsPerWord - shift_value); + __ Srl(dst_low, lhs_low, shift_value); + __ Or(dst_low, dst_low, TMP); + } else if (instr->IsUShr()) { + __ Srl(dst_high, lhs_high, shift_value); + __ Sll(TMP, lhs_high, kMipsBitsPerWord - shift_value); + __ Srl(dst_low, lhs_low, shift_value); + __ Or(dst_low, dst_low, TMP); + } else { + __ Srl(TMP, lhs_low, shift_value); + __ Sll(dst_low, lhs_high, kMipsBitsPerWord - shift_value); + __ Or(dst_low, dst_low, TMP); + __ Srl(TMP, lhs_high, shift_value); + __ Sll(dst_high, lhs_low, kMipsBitsPerWord - shift_value); + __ Or(dst_high, dst_high, TMP); + } } } else { - shift_value -= kMipsBitsPerWord; + const uint32_t shift_value_high = shift_value - kMipsBitsPerWord; if (instr->IsShl()) { - __ Sll(dst_high, lhs_low, shift_value); + __ Sll(dst_high, lhs_low, shift_value_high); __ Move(dst_low, ZERO); } else if (instr->IsShr()) { - __ Sra(dst_low, lhs_high, shift_value); + __ Sra(dst_low, lhs_high, shift_value_high); __ Sra(dst_high, dst_low, kMipsBitsPerWord - 1); - } else { - __ Srl(dst_low, lhs_high, shift_value); + } else if (instr->IsUShr()) { + __ Srl(dst_low, lhs_high, shift_value_high); __ Move(dst_high, ZERO); + } else { + if (shift_value == kMipsBitsPerWord) { + // 64-bit rotation by 32 is just a swap. + __ Move(dst_low, lhs_high); + __ Move(dst_high, lhs_low); + } else { + if (has_ins_rotr) { + __ Srl(dst_low, lhs_high, shift_value_high); + __ Ins(dst_low, lhs_low, kMipsBitsPerWord - shift_value_high, shift_value_high); + __ Srl(dst_high, lhs_low, shift_value_high); + __ Ins(dst_high, lhs_high, kMipsBitsPerWord - shift_value_high, shift_value_high); + } else { + __ Sll(TMP, lhs_low, kMipsBitsPerWord - shift_value_high); + __ Srl(dst_low, lhs_high, shift_value_high); + __ Or(dst_low, dst_low, TMP); + __ Sll(TMP, lhs_high, kMipsBitsPerWord - shift_value_high); + __ Srl(dst_high, lhs_low, shift_value_high); + __ Or(dst_high, dst_high, TMP); + } + } } } } else { @@ -1527,7 +1706,7 @@ void InstructionCodeGeneratorMIPS::HandleShift(HBinaryOperation* instr) { __ Beqz(TMP, &done); __ Move(dst_low, dst_high); __ Sra(dst_high, dst_high, 31); - } else { + } else if (instr->IsUShr()) { __ Srlv(dst_high, lhs_high, rhs_reg); __ Nor(AT, ZERO, rhs_reg); __ Sll(TMP, lhs_high, 1); @@ -1538,6 +1717,21 @@ void InstructionCodeGeneratorMIPS::HandleShift(HBinaryOperation* instr) { __ Beqz(TMP, &done); __ Move(dst_low, dst_high); __ Move(dst_high, ZERO); + } else { + __ Nor(AT, ZERO, rhs_reg); + __ Srlv(TMP, lhs_low, rhs_reg); + __ Sll(dst_low, lhs_high, 1); + __ Sllv(dst_low, dst_low, AT); + __ Or(dst_low, dst_low, TMP); + __ Srlv(TMP, lhs_high, rhs_reg); + __ Sll(dst_high, lhs_low, 1); + __ Sllv(dst_high, dst_high, AT); + __ Or(dst_high, dst_high, TMP); + __ Andi(TMP, rhs_reg, kMipsBitsPerWord); + __ Beqz(TMP, &done); + __ Move(TMP, dst_high); + __ Move(dst_high, dst_low); + __ Move(dst_low, TMP); } __ Bind(&done); } @@ -2092,7 +2286,7 @@ void InstructionCodeGeneratorMIPS::VisitCompare(HCompare* instruction) { } } -void LocationsBuilderMIPS::VisitCondition(HCondition* instruction) { +void LocationsBuilderMIPS::HandleCondition(HCondition* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); switch (instruction->InputAt(0)->GetType()) { default: @@ -2112,7 +2306,7 @@ void LocationsBuilderMIPS::VisitCondition(HCondition* instruction) { } } -void InstructionCodeGeneratorMIPS::VisitCondition(HCondition* instruction) { +void InstructionCodeGeneratorMIPS::HandleCondition(HCondition* instruction) { if (!instruction->NeedsMaterialization()) { return; } @@ -2192,8 +2386,7 @@ void InstructionCodeGeneratorMIPS::DivRemByPowerOfTwo(HBinaryOperation* instruct Register out = locations->Out().AsRegister<Register>(); Register dividend = locations->InAt(0).AsRegister<Register>(); int32_t imm = second.GetConstant()->AsIntConstant()->GetValue(); - uint32_t abs_imm = static_cast<uint32_t>(std::abs(imm)); - DCHECK(IsPowerOfTwo(abs_imm)); + uint32_t abs_imm = static_cast<uint32_t>(AbsOrMin(imm)); int ctz_imm = CTZ(abs_imm); if (instruction->IsDiv()) { @@ -2296,7 +2489,7 @@ void InstructionCodeGeneratorMIPS::GenerateDivRemIntegral(HBinaryOperation* inst // Do not generate anything. DivZeroCheck would prevent any code to be executed. } else if (imm == 1 || imm == -1) { DivRemOneOrMinusOne(instruction); - } else if (IsPowerOfTwo(std::abs(imm))) { + } else if (IsPowerOfTwo(AbsOrMin(imm))) { DivRemByPowerOfTwo(instruction); } else { DCHECK(imm <= -2 || imm >= 2); @@ -3236,14 +3429,26 @@ void LocationsBuilderMIPS::VisitDeoptimize(HDeoptimize* deoptimize) { } void InstructionCodeGeneratorMIPS::VisitDeoptimize(HDeoptimize* deoptimize) { - SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) DeoptimizationSlowPathMIPS(deoptimize); - codegen_->AddSlowPath(slow_path); + SlowPathCodeMIPS* slow_path = + deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathMIPS>(deoptimize); GenerateTestAndBranch(deoptimize, /* condition_input_index */ 0, slow_path->GetEntryLabel(), /* false_target */ nullptr); } +void LocationsBuilderMIPS::VisitNativeDebugInfo(HNativeDebugInfo* info) { + new (GetGraph()->GetArena()) LocationSummary(info); +} + +void InstructionCodeGeneratorMIPS::VisitNativeDebugInfo(HNativeDebugInfo* info) { + if (codegen_->HasStackMapAtCurrentPc()) { + // Ensure that we do not collide with the stack map of the previous instruction. + __ Nop(); + } + codegen_->RecordPcInfo(info, info->GetDexPc()); +} + void LocationsBuilderMIPS::HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info) { Primitive::Type field_type = field_info.GetFieldType(); bool is_wide = (field_type == Primitive::kPrimLong) || (field_type == Primitive::kPrimDouble); @@ -3327,8 +3532,8 @@ void InstructionCodeGeneratorMIPS::HandleFieldGet(HInstruction* instruction, // Need to move to FP regs since FP results are returned in core registers. __ Mtc1(locations->GetTemp(1).AsRegister<Register>(), locations->Out().AsFpuRegister<FRegister>()); - __ Mthc1(locations->GetTemp(2).AsRegister<Register>(), - locations->Out().AsFpuRegister<FRegister>()); + __ MoveToFpuHigh(locations->GetTemp(2).AsRegister<Register>(), + locations->Out().AsFpuRegister<FRegister>()); } } else { if (!Primitive::IsFloatingPointType(type)) { @@ -3448,8 +3653,8 @@ void InstructionCodeGeneratorMIPS::HandleFieldSet(HInstruction* instruction, // Pass FP parameters in core registers. __ Mfc1(locations->GetTemp(1).AsRegister<Register>(), locations->InAt(1).AsFpuRegister<FRegister>()); - __ Mfhc1(locations->GetTemp(2).AsRegister<Register>(), - locations->InAt(1).AsFpuRegister<FRegister>()); + __ MoveFromFpuHigh(locations->GetTemp(2).AsRegister<Register>(), + locations->InAt(1).AsFpuRegister<FRegister>()); } codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pA64Store), instruction, @@ -4406,14 +4611,12 @@ void InstructionCodeGeneratorMIPS::VisitReturnVoid(HReturnVoid* ret ATTRIBUTE_UN codegen_->GenerateFrameExit(); } -void LocationsBuilderMIPS::VisitRor(HRor* ror ATTRIBUTE_UNUSED) { - LOG(FATAL) << "Unreachable"; - UNREACHABLE(); +void LocationsBuilderMIPS::VisitRor(HRor* ror) { + HandleShift(ror); } -void InstructionCodeGeneratorMIPS::VisitRor(HRor* ror ATTRIBUTE_UNUSED) { - LOG(FATAL) << "Unreachable"; - UNREACHABLE(); +void InstructionCodeGeneratorMIPS::VisitRor(HRor* ror) { + HandleShift(ror); } void LocationsBuilderMIPS::VisitShl(HShl* shl) { @@ -4601,6 +4804,7 @@ void LocationsBuilderMIPS::VisitTypeConversion(HTypeConversion* conversion) { Primitive::Type input_type = conversion->GetInputType(); Primitive::Type result_type = conversion->GetResultType(); DCHECK_NE(input_type, result_type); + bool isR6 = codegen_->GetInstructionSetFeatures().IsR6(); if ((input_type == Primitive::kPrimNot) || (input_type == Primitive::kPrimVoid) || (result_type == Primitive::kPrimNot) || (result_type == Primitive::kPrimVoid)) { @@ -4608,8 +4812,9 @@ void LocationsBuilderMIPS::VisitTypeConversion(HTypeConversion* conversion) { } LocationSummary::CallKind call_kind = LocationSummary::kNoCall; - if ((Primitive::IsFloatingPointType(result_type) && input_type == Primitive::kPrimLong) || - (Primitive::IsIntegralType(result_type) && Primitive::IsFloatingPointType(input_type))) { + if (!isR6 && + ((Primitive::IsFloatingPointType(result_type) && input_type == Primitive::kPrimLong) || + (result_type == Primitive::kPrimLong && Primitive::IsFloatingPointType(input_type)))) { call_kind = LocationSummary::kCall; } @@ -4647,6 +4852,8 @@ void InstructionCodeGeneratorMIPS::VisitTypeConversion(HTypeConversion* conversi Primitive::Type result_type = conversion->GetResultType(); Primitive::Type input_type = conversion->GetInputType(); bool has_sign_extension = codegen_->GetInstructionSetFeatures().IsMipsIsaRevGreaterThanEqual2(); + bool isR6 = codegen_->GetInstructionSetFeatures().IsR6(); + bool fpu_32bit = codegen_->GetInstructionSetFeatures().Is32BitFloatingPoint(); DCHECK_NE(input_type, result_type); @@ -4692,7 +4899,37 @@ void InstructionCodeGeneratorMIPS::VisitTypeConversion(HTypeConversion* conversi << " to " << result_type; } } else if (Primitive::IsFloatingPointType(result_type) && Primitive::IsIntegralType(input_type)) { - if (input_type != Primitive::kPrimLong) { + if (input_type == Primitive::kPrimLong) { + if (isR6) { + // cvt.s.l/cvt.d.l requires MIPSR2+ with FR=1. MIPS32R6 is implemented as a secondary + // architecture on top of MIPS64R6, which has FR=1, and therefore can use the instruction. + Register src_high = locations->InAt(0).AsRegisterPairHigh<Register>(); + Register src_low = locations->InAt(0).AsRegisterPairLow<Register>(); + FRegister dst = locations->Out().AsFpuRegister<FRegister>(); + __ Mtc1(src_low, FTMP); + __ Mthc1(src_high, FTMP); + if (result_type == Primitive::kPrimFloat) { + __ Cvtsl(dst, FTMP); + } else { + __ Cvtdl(dst, FTMP); + } + } else { + int32_t entry_offset = (result_type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pL2f) + : QUICK_ENTRY_POINT(pL2d); + bool direct = (result_type == Primitive::kPrimFloat) ? IsDirectEntrypoint(kQuickL2f) + : IsDirectEntrypoint(kQuickL2d); + codegen_->InvokeRuntime(entry_offset, + conversion, + conversion->GetDexPc(), + nullptr, + direct); + if (result_type == Primitive::kPrimFloat) { + CheckEntrypointTypes<kQuickL2f, float, int64_t>(); + } else { + CheckEntrypointTypes<kQuickL2d, double, int64_t>(); + } + } + } else { Register src = locations->InAt(0).AsRegister<Register>(); FRegister dst = locations->Out().AsFpuRegister<FRegister>(); __ Mtc1(src, FTMP); @@ -4701,54 +4938,168 @@ void InstructionCodeGeneratorMIPS::VisitTypeConversion(HTypeConversion* conversi } else { __ Cvtdw(dst, FTMP); } - } else { - int32_t entry_offset = (result_type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pL2f) - : QUICK_ENTRY_POINT(pL2d); - bool direct = (result_type == Primitive::kPrimFloat) ? IsDirectEntrypoint(kQuickL2f) - : IsDirectEntrypoint(kQuickL2d); - codegen_->InvokeRuntime(entry_offset, - conversion, - conversion->GetDexPc(), - nullptr, - direct); - if (result_type == Primitive::kPrimFloat) { - CheckEntrypointTypes<kQuickL2f, float, int64_t>(); - } else { - CheckEntrypointTypes<kQuickL2d, double, int64_t>(); - } } } else if (Primitive::IsIntegralType(result_type) && Primitive::IsFloatingPointType(input_type)) { CHECK(result_type == Primitive::kPrimInt || result_type == Primitive::kPrimLong); - int32_t entry_offset; - bool direct; - if (result_type != Primitive::kPrimLong) { - entry_offset = (input_type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pF2iz) - : QUICK_ENTRY_POINT(pD2iz); - direct = (result_type == Primitive::kPrimFloat) ? IsDirectEntrypoint(kQuickF2iz) - : IsDirectEntrypoint(kQuickD2iz); + if (result_type == Primitive::kPrimLong) { + if (isR6) { + // trunc.l.s/trunc.l.d requires MIPSR2+ with FR=1. MIPS32R6 is implemented as a secondary + // architecture on top of MIPS64R6, which has FR=1, and therefore can use the instruction. + FRegister src = locations->InAt(0).AsFpuRegister<FRegister>(); + Register dst_high = locations->Out().AsRegisterPairHigh<Register>(); + Register dst_low = locations->Out().AsRegisterPairLow<Register>(); + MipsLabel truncate; + MipsLabel done; + + // When NAN2008=0 (R2 and before), the truncate instruction produces the maximum positive + // value when the input is either a NaN or is outside of the range of the output type + // after the truncation. IOW, the three special cases (NaN, too small, too big) produce + // the same result. + // + // When NAN2008=1 (R6), the truncate instruction caps the output at the minimum/maximum + // value of the output type if the input is outside of the range after the truncation or + // produces 0 when the input is a NaN. IOW, the three special cases produce three distinct + // results. This matches the desired float/double-to-int/long conversion exactly. + // + // So, NAN2008 affects handling of negative values and NaNs by the truncate instruction. + // + // The following code supports both NAN2008=0 and NAN2008=1 behaviors of the truncate + // instruction, the reason being that the emulator implements NAN2008=0 on MIPS64R6, + // even though it must be NAN2008=1 on R6. + // + // The code takes care of the different behaviors by first comparing the input to the + // minimum output value (-2**-63 for truncating to long, -2**-31 for truncating to int). + // If the input is greater than or equal to the minimum, it procedes to the truncate + // instruction, which will handle such an input the same way irrespective of NAN2008. + // Otherwise the input is compared to itself to determine whether it is a NaN or not + // in order to return either zero or the minimum value. + // + // TODO: simplify this when the emulator correctly implements NAN2008=1 behavior of the + // truncate instruction for MIPS64R6. + if (input_type == Primitive::kPrimFloat) { + uint32_t min_val = bit_cast<uint32_t, float>(std::numeric_limits<int64_t>::min()); + __ LoadConst32(TMP, min_val); + __ Mtc1(TMP, FTMP); + __ CmpLeS(FTMP, FTMP, src); + } else { + uint64_t min_val = bit_cast<uint64_t, double>(std::numeric_limits<int64_t>::min()); + __ LoadConst32(TMP, High32Bits(min_val)); + __ Mtc1(ZERO, FTMP); + __ Mthc1(TMP, FTMP); + __ CmpLeD(FTMP, FTMP, src); + } + + __ Bc1nez(FTMP, &truncate); + + if (input_type == Primitive::kPrimFloat) { + __ CmpEqS(FTMP, src, src); + } else { + __ CmpEqD(FTMP, src, src); + } + __ Move(dst_low, ZERO); + __ LoadConst32(dst_high, std::numeric_limits<int32_t>::min()); + __ Mfc1(TMP, FTMP); + __ And(dst_high, dst_high, TMP); + + __ B(&done); + + __ Bind(&truncate); + + if (input_type == Primitive::kPrimFloat) { + __ TruncLS(FTMP, src); + } else { + __ TruncLD(FTMP, src); + } + __ Mfc1(dst_low, FTMP); + __ Mfhc1(dst_high, FTMP); + + __ Bind(&done); + } else { + int32_t entry_offset = (input_type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pF2l) + : QUICK_ENTRY_POINT(pD2l); + bool direct = (result_type == Primitive::kPrimFloat) ? IsDirectEntrypoint(kQuickF2l) + : IsDirectEntrypoint(kQuickD2l); + codegen_->InvokeRuntime(entry_offset, conversion, conversion->GetDexPc(), nullptr, direct); + if (input_type == Primitive::kPrimFloat) { + CheckEntrypointTypes<kQuickF2l, int64_t, float>(); + } else { + CheckEntrypointTypes<kQuickD2l, int64_t, double>(); + } + } } else { - entry_offset = (input_type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pF2l) - : QUICK_ENTRY_POINT(pD2l); - direct = (result_type == Primitive::kPrimFloat) ? IsDirectEntrypoint(kQuickF2l) - : IsDirectEntrypoint(kQuickD2l); - } - codegen_->InvokeRuntime(entry_offset, - conversion, - conversion->GetDexPc(), - nullptr, - direct); - if (result_type != Primitive::kPrimLong) { + FRegister src = locations->InAt(0).AsFpuRegister<FRegister>(); + Register dst = locations->Out().AsRegister<Register>(); + MipsLabel truncate; + MipsLabel done; + + // The following code supports both NAN2008=0 and NAN2008=1 behaviors of the truncate + // instruction, the reason being that the emulator implements NAN2008=0 on MIPS64R6, + // even though it must be NAN2008=1 on R6. + // + // For details see the large comment above for the truncation of float/double to long on R6. + // + // TODO: simplify this when the emulator correctly implements NAN2008=1 behavior of the + // truncate instruction for MIPS64R6. if (input_type == Primitive::kPrimFloat) { - CheckEntrypointTypes<kQuickF2iz, int32_t, float>(); + uint32_t min_val = bit_cast<uint32_t, float>(std::numeric_limits<int32_t>::min()); + __ LoadConst32(TMP, min_val); + __ Mtc1(TMP, FTMP); + } else { + uint64_t min_val = bit_cast<uint64_t, double>(std::numeric_limits<int32_t>::min()); + __ LoadConst32(TMP, High32Bits(min_val)); + __ Mtc1(ZERO, FTMP); + if (fpu_32bit) { + __ Mtc1(TMP, static_cast<FRegister>(FTMP + 1)); + } else { + __ Mthc1(TMP, FTMP); + } + } + + if (isR6) { + if (input_type == Primitive::kPrimFloat) { + __ CmpLeS(FTMP, FTMP, src); + } else { + __ CmpLeD(FTMP, FTMP, src); + } + __ Bc1nez(FTMP, &truncate); + + if (input_type == Primitive::kPrimFloat) { + __ CmpEqS(FTMP, src, src); + } else { + __ CmpEqD(FTMP, src, src); + } + __ LoadConst32(dst, std::numeric_limits<int32_t>::min()); + __ Mfc1(TMP, FTMP); + __ And(dst, dst, TMP); } else { - CheckEntrypointTypes<kQuickD2iz, int32_t, double>(); + if (input_type == Primitive::kPrimFloat) { + __ ColeS(0, FTMP, src); + } else { + __ ColeD(0, FTMP, src); + } + __ Bc1t(0, &truncate); + + if (input_type == Primitive::kPrimFloat) { + __ CeqS(0, src, src); + } else { + __ CeqD(0, src, src); + } + __ LoadConst32(dst, std::numeric_limits<int32_t>::min()); + __ Movf(dst, ZERO, 0); } - } else { + + __ B(&done); + + __ Bind(&truncate); + if (input_type == Primitive::kPrimFloat) { - CheckEntrypointTypes<kQuickF2l, int64_t, float>(); + __ TruncWS(FTMP, src); } else { - CheckEntrypointTypes<kQuickD2l, int64_t, double>(); + __ TruncWD(FTMP, src); } + __ Mfc1(dst, FTMP); + + __ Bind(&done); } } else if (Primitive::IsFloatingPointType(result_type) && Primitive::IsFloatingPointType(input_type)) { @@ -4792,83 +5143,83 @@ void InstructionCodeGeneratorMIPS::VisitBoundType(HBoundType* instruction ATTRIB } void LocationsBuilderMIPS::VisitEqual(HEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void InstructionCodeGeneratorMIPS::VisitEqual(HEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void LocationsBuilderMIPS::VisitNotEqual(HNotEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void InstructionCodeGeneratorMIPS::VisitNotEqual(HNotEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void LocationsBuilderMIPS::VisitLessThan(HLessThan* comp) { - VisitCondition(comp); + HandleCondition(comp); } void InstructionCodeGeneratorMIPS::VisitLessThan(HLessThan* comp) { - VisitCondition(comp); + HandleCondition(comp); } void LocationsBuilderMIPS::VisitLessThanOrEqual(HLessThanOrEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void InstructionCodeGeneratorMIPS::VisitLessThanOrEqual(HLessThanOrEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void LocationsBuilderMIPS::VisitGreaterThan(HGreaterThan* comp) { - VisitCondition(comp); + HandleCondition(comp); } void InstructionCodeGeneratorMIPS::VisitGreaterThan(HGreaterThan* comp) { - VisitCondition(comp); + HandleCondition(comp); } void LocationsBuilderMIPS::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void InstructionCodeGeneratorMIPS::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void LocationsBuilderMIPS::VisitBelow(HBelow* comp) { - VisitCondition(comp); + HandleCondition(comp); } void InstructionCodeGeneratorMIPS::VisitBelow(HBelow* comp) { - VisitCondition(comp); + HandleCondition(comp); } void LocationsBuilderMIPS::VisitBelowOrEqual(HBelowOrEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void InstructionCodeGeneratorMIPS::VisitBelowOrEqual(HBelowOrEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void LocationsBuilderMIPS::VisitAbove(HAbove* comp) { - VisitCondition(comp); + HandleCondition(comp); } void InstructionCodeGeneratorMIPS::VisitAbove(HAbove* comp) { - VisitCondition(comp); + HandleCondition(comp); } void LocationsBuilderMIPS::VisitAboveOrEqual(HAboveOrEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void InstructionCodeGeneratorMIPS::VisitAboveOrEqual(HAboveOrEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void LocationsBuilderMIPS::VisitFakeString(HFakeString* instruction) { diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h index 1ee6bdef8e..c3d4851ee9 100644 --- a/compiler/optimizing/code_generator_mips.h +++ b/compiler/optimizing/code_generator_mips.h @@ -185,6 +185,7 @@ class LocationsBuilderMIPS : public HGraphVisitor { private: void HandleInvoke(HInvoke* invoke); void HandleBinaryOp(HBinaryOperation* operation); + void HandleCondition(HCondition* instruction); void HandleShift(HBinaryOperation* operation); void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info); void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); @@ -196,7 +197,7 @@ class LocationsBuilderMIPS : public HGraphVisitor { DISALLOW_COPY_AND_ASSIGN(LocationsBuilderMIPS); }; -class InstructionCodeGeneratorMIPS : public HGraphVisitor { +class InstructionCodeGeneratorMIPS : public InstructionCodeGenerator { public: InstructionCodeGeneratorMIPS(HGraph* graph, CodeGeneratorMIPS* codegen); @@ -220,6 +221,7 @@ class InstructionCodeGeneratorMIPS : public HGraphVisitor { void GenerateMemoryBarrier(MemBarrierKind kind); void GenerateSuspendCheck(HSuspendCheck* check, HBasicBlock* successor); void HandleBinaryOp(HBinaryOperation* operation); + void HandleCondition(HCondition* instruction); void HandleShift(HBinaryOperation* operation); void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info, uint32_t dex_pc); void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info, uint32_t dex_pc); diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc index fb45ef938d..38c32cad06 100644 --- a/compiler/optimizing/code_generator_mips64.cc +++ b/compiler/optimizing/code_generator_mips64.cc @@ -391,24 +391,24 @@ class TypeCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 { class DeoptimizationSlowPathMIPS64 : public SlowPathCodeMIPS64 { public: - explicit DeoptimizationSlowPathMIPS64(HInstruction* instruction) + explicit DeoptimizationSlowPathMIPS64(HDeoptimize* instruction) : instruction_(instruction) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, instruction_->GetLocations()); - DCHECK(instruction_->IsDeoptimize()); - HDeoptimize* deoptimize = instruction_->AsDeoptimize(); - uint32_t dex_pc = deoptimize->GetDexPc(); - CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen); - mips64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize), instruction_, dex_pc, this); + mips64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize), + instruction_, + instruction_->GetDexPc(), + this); CheckEntrypointTypes<kQuickDeoptimize, void, void>(); } const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathMIPS64"; } private: - HInstruction* const instruction_; + HDeoptimize* const instruction_; DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathMIPS64); }; @@ -1113,7 +1113,7 @@ void InstructionCodeGeneratorMIPS64::GenerateSuspendCheck(HSuspendCheck* instruc InstructionCodeGeneratorMIPS64::InstructionCodeGeneratorMIPS64(HGraph* graph, CodeGeneratorMIPS64* codegen) - : HGraphVisitor(graph), + : InstructionCodeGenerator(graph, codegen), assembler_(codegen->GetAssembler()), codegen_(codegen) {} @@ -1247,7 +1247,7 @@ void InstructionCodeGeneratorMIPS64::HandleBinaryOp(HBinaryOperation* instructio } void LocationsBuilderMIPS64::HandleShift(HBinaryOperation* instr) { - DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr()); + DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr() || instr->IsRor()); LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr); Primitive::Type type = instr->GetResultType(); @@ -1265,7 +1265,7 @@ void LocationsBuilderMIPS64::HandleShift(HBinaryOperation* instr) { } void InstructionCodeGeneratorMIPS64::HandleShift(HBinaryOperation* instr) { - DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr()); + DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr() || instr->IsRor()); LocationSummary* locations = instr->GetLocations(); Primitive::Type type = instr->GetType(); @@ -1290,13 +1290,19 @@ void InstructionCodeGeneratorMIPS64::HandleShift(HBinaryOperation* instr) { ? static_cast<uint32_t>(rhs_imm & kMaxIntShiftValue) : static_cast<uint32_t>(rhs_imm & kMaxLongShiftValue); - if (type == Primitive::kPrimInt) { + if (shift_value == 0) { + if (dst != lhs) { + __ Move(dst, lhs); + } + } else if (type == Primitive::kPrimInt) { if (instr->IsShl()) { __ Sll(dst, lhs, shift_value); } else if (instr->IsShr()) { __ Sra(dst, lhs, shift_value); - } else { + } else if (instr->IsUShr()) { __ Srl(dst, lhs, shift_value); + } else { + __ Rotr(dst, lhs, shift_value); } } else { if (shift_value < 32) { @@ -1304,8 +1310,10 @@ void InstructionCodeGeneratorMIPS64::HandleShift(HBinaryOperation* instr) { __ Dsll(dst, lhs, shift_value); } else if (instr->IsShr()) { __ Dsra(dst, lhs, shift_value); - } else { + } else if (instr->IsUShr()) { __ Dsrl(dst, lhs, shift_value); + } else { + __ Drotr(dst, lhs, shift_value); } } else { shift_value -= 32; @@ -1313,8 +1321,10 @@ void InstructionCodeGeneratorMIPS64::HandleShift(HBinaryOperation* instr) { __ Dsll32(dst, lhs, shift_value); } else if (instr->IsShr()) { __ Dsra32(dst, lhs, shift_value); - } else { + } else if (instr->IsUShr()) { __ Dsrl32(dst, lhs, shift_value); + } else { + __ Drotr32(dst, lhs, shift_value); } } } @@ -1324,16 +1334,20 @@ void InstructionCodeGeneratorMIPS64::HandleShift(HBinaryOperation* instr) { __ Sllv(dst, lhs, rhs_reg); } else if (instr->IsShr()) { __ Srav(dst, lhs, rhs_reg); - } else { + } else if (instr->IsUShr()) { __ Srlv(dst, lhs, rhs_reg); + } else { + __ Rotrv(dst, lhs, rhs_reg); } } else { if (instr->IsShl()) { __ Dsllv(dst, lhs, rhs_reg); } else if (instr->IsShr()) { __ Dsrav(dst, lhs, rhs_reg); - } else { + } else if (instr->IsUShr()) { __ Dsrlv(dst, lhs, rhs_reg); + } else { + __ Drotrv(dst, lhs, rhs_reg); } } } @@ -1752,11 +1766,7 @@ void InstructionCodeGeneratorMIPS64::VisitClinitCheck(HClinitCheck* check) { void LocationsBuilderMIPS64::VisitCompare(HCompare* compare) { Primitive::Type in_type = compare->InputAt(0)->GetType(); - LocationSummary::CallKind call_kind = Primitive::IsFloatingPointType(in_type) - ? LocationSummary::kCall - : LocationSummary::kNoCall; - - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(compare, call_kind); + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(compare); switch (in_type) { case Primitive::kPrimLong: @@ -1766,13 +1776,11 @@ void LocationsBuilderMIPS64::VisitCompare(HCompare* compare) { break; case Primitive::kPrimFloat: - case Primitive::kPrimDouble: { - InvokeRuntimeCallingConvention calling_convention; - locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0))); - locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1))); - locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimInt)); + case Primitive::kPrimDouble: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); break; - } default: LOG(FATAL) << "Unexpected type for compare operation " << in_type; @@ -1781,14 +1789,15 @@ void LocationsBuilderMIPS64::VisitCompare(HCompare* compare) { void InstructionCodeGeneratorMIPS64::VisitCompare(HCompare* instruction) { LocationSummary* locations = instruction->GetLocations(); + GpuRegister res = locations->Out().AsRegister<GpuRegister>(); Primitive::Type in_type = instruction->InputAt(0)->GetType(); + bool gt_bias = instruction->IsGtBias(); // 0 if: left == right // 1 if: left > right // -1 if: left < right switch (in_type) { case Primitive::kPrimLong: { - GpuRegister dst = locations->Out().AsRegister<GpuRegister>(); GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>(); Location rhs_location = locations->InAt(1); bool use_imm = rhs_location.IsConstant(); @@ -1803,35 +1812,52 @@ void InstructionCodeGeneratorMIPS64::VisitCompare(HCompare* instruction) { rhs = rhs_location.AsRegister<GpuRegister>(); } __ Slt(TMP, lhs, rhs); - __ Slt(dst, rhs, lhs); - __ Subu(dst, dst, TMP); + __ Slt(res, rhs, lhs); + __ Subu(res, res, TMP); break; } - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: { - int32_t entry_point_offset; - if (in_type == Primitive::kPrimFloat) { - entry_point_offset = instruction->IsGtBias() ? QUICK_ENTRY_POINT(pCmpgFloat) - : QUICK_ENTRY_POINT(pCmplFloat); + case Primitive::kPrimFloat: { + FpuRegister lhs = locations->InAt(0).AsFpuRegister<FpuRegister>(); + FpuRegister rhs = locations->InAt(1).AsFpuRegister<FpuRegister>(); + Mips64Label done; + __ CmpEqS(FTMP, lhs, rhs); + __ LoadConst32(res, 0); + __ Bc1nez(FTMP, &done); + if (gt_bias) { + __ CmpLtS(FTMP, lhs, rhs); + __ LoadConst32(res, -1); + __ Bc1nez(FTMP, &done); + __ LoadConst32(res, 1); } else { - entry_point_offset = instruction->IsGtBias() ? QUICK_ENTRY_POINT(pCmpgDouble) - : QUICK_ENTRY_POINT(pCmplDouble); + __ CmpLtS(FTMP, rhs, lhs); + __ LoadConst32(res, 1); + __ Bc1nez(FTMP, &done); + __ LoadConst32(res, -1); } - codegen_->InvokeRuntime(entry_point_offset, instruction, instruction->GetDexPc(), nullptr); - if (in_type == Primitive::kPrimFloat) { - if (instruction->IsGtBias()) { - CheckEntrypointTypes<kQuickCmpgFloat, int32_t, float, float>(); - } else { - CheckEntrypointTypes<kQuickCmplFloat, int32_t, float, float>(); - } + __ Bind(&done); + break; + } + + case Primitive::kPrimDouble: { + FpuRegister lhs = locations->InAt(0).AsFpuRegister<FpuRegister>(); + FpuRegister rhs = locations->InAt(1).AsFpuRegister<FpuRegister>(); + Mips64Label done; + __ CmpEqD(FTMP, lhs, rhs); + __ LoadConst32(res, 0); + __ Bc1nez(FTMP, &done); + if (gt_bias) { + __ CmpLtD(FTMP, lhs, rhs); + __ LoadConst32(res, -1); + __ Bc1nez(FTMP, &done); + __ LoadConst32(res, 1); } else { - if (instruction->IsGtBias()) { - CheckEntrypointTypes<kQuickCmpgDouble, int32_t, double, double>(); - } else { - CheckEntrypointTypes<kQuickCmplDouble, int32_t, double, double>(); - } + __ CmpLtD(FTMP, rhs, lhs); + __ LoadConst32(res, 1); + __ Bc1nez(FTMP, &done); + __ LoadConst32(res, -1); } + __ Bind(&done); break; } @@ -1840,143 +1866,67 @@ void InstructionCodeGeneratorMIPS64::VisitCompare(HCompare* instruction) { } } -void LocationsBuilderMIPS64::VisitCondition(HCondition* instruction) { +void LocationsBuilderMIPS64::HandleCondition(HCondition* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); + switch (instruction->InputAt(0)->GetType()) { + default: + case Primitive::kPrimLong: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); + break; + + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + break; + } if (instruction->NeedsMaterialization()) { locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); } } -void InstructionCodeGeneratorMIPS64::VisitCondition(HCondition* instruction) { +void InstructionCodeGeneratorMIPS64::HandleCondition(HCondition* instruction) { if (!instruction->NeedsMaterialization()) { return; } - // TODO: generalize to long - DCHECK_NE(instruction->InputAt(0)->GetType(), Primitive::kPrimLong); - + Primitive::Type type = instruction->InputAt(0)->GetType(); LocationSummary* locations = instruction->GetLocations(); - GpuRegister dst = locations->Out().AsRegister<GpuRegister>(); - GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>(); - Location rhs_location = locations->InAt(1); - - GpuRegister rhs_reg = ZERO; - int64_t rhs_imm = 0; - bool use_imm = rhs_location.IsConstant(); - if (use_imm) { - rhs_imm = CodeGenerator::GetInt32ValueOf(rhs_location.GetConstant()); - } else { - rhs_reg = rhs_location.AsRegister<GpuRegister>(); - } - - IfCondition if_cond = instruction->GetCondition(); + Mips64Label true_label; - switch (if_cond) { - case kCondEQ: - case kCondNE: - if (use_imm && IsUint<16>(rhs_imm)) { - __ Xori(dst, lhs, rhs_imm); - } else { - if (use_imm) { - rhs_reg = TMP; - __ LoadConst32(rhs_reg, rhs_imm); - } - __ Xor(dst, lhs, rhs_reg); - } - if (if_cond == kCondEQ) { - __ Sltiu(dst, dst, 1); - } else { - __ Sltu(dst, ZERO, dst); - } - break; + switch (type) { + default: + // Integer case. + GenerateIntLongCompare(instruction->GetCondition(), /* is64bit */ false, locations); + return; + case Primitive::kPrimLong: + GenerateIntLongCompare(instruction->GetCondition(), /* is64bit */ true, locations); + return; - case kCondLT: - case kCondGE: - if (use_imm && IsInt<16>(rhs_imm)) { - __ Slti(dst, lhs, rhs_imm); - } else { - if (use_imm) { - rhs_reg = TMP; - __ LoadConst32(rhs_reg, rhs_imm); - } - __ Slt(dst, lhs, rhs_reg); - } - if (if_cond == kCondGE) { - // Simulate lhs >= rhs via !(lhs < rhs) since there's - // only the slt instruction but no sge. - __ Xori(dst, dst, 1); - } + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + // TODO: don't use branches. + GenerateFpCompareAndBranch(instruction->GetCondition(), + instruction->IsGtBias(), + type, + locations, + &true_label); break; + } - case kCondLE: - case kCondGT: - if (use_imm && IsInt<16>(rhs_imm + 1)) { - // Simulate lhs <= rhs via lhs < rhs + 1. - __ Slti(dst, lhs, rhs_imm + 1); - if (if_cond == kCondGT) { - // Simulate lhs > rhs via !(lhs <= rhs) since there's - // only the slti instruction but no sgti. - __ Xori(dst, dst, 1); - } - } else { - if (use_imm) { - rhs_reg = TMP; - __ LoadConst32(rhs_reg, rhs_imm); - } - __ Slt(dst, rhs_reg, lhs); - if (if_cond == kCondLE) { - // Simulate lhs <= rhs via !(rhs < lhs) since there's - // only the slt instruction but no sle. - __ Xori(dst, dst, 1); - } - } - break; + // Convert the branches into the result. + Mips64Label done; - case kCondB: - case kCondAE: - if (use_imm && 0 <= rhs_imm && rhs_imm <= 0x7fff) { - __ Sltiu(dst, lhs, rhs_imm); - } else { - if (use_imm) { - rhs_reg = TMP; - __ LoadConst32(rhs_reg, rhs_imm); - } - __ Sltu(dst, lhs, rhs_reg); - } - if (if_cond == kCondAE) { - // Simulate lhs >= rhs via !(lhs < rhs) since there's - // only the sltu instruction but no sgeu. - __ Xori(dst, dst, 1); - } - break; + // False case: result = 0. + __ LoadConst32(dst, 0); + __ Bc(&done); - case kCondBE: - case kCondA: - if (use_imm && 0 <= rhs_imm && rhs_imm <= 0x7ffe) { - // Simulate lhs <= rhs via lhs < rhs + 1. - __ Sltiu(dst, lhs, rhs_imm + 1); - if (if_cond == kCondA) { - // Simulate lhs > rhs via !(lhs <= rhs) since there's - // only the sltiu instruction but no sgtiu. - __ Xori(dst, dst, 1); - } - } else { - if (use_imm) { - rhs_reg = TMP; - __ LoadConst32(rhs_reg, rhs_imm); - } - __ Sltu(dst, rhs_reg, lhs); - if (if_cond == kCondBE) { - // Simulate lhs <= rhs via !(rhs < lhs) since there's - // only the sltu instruction but no sleu. - __ Xori(dst, dst, 1); - } - } - break; - } + // True case: result = 1. + __ Bind(&true_label); + __ LoadConst32(dst, 1); + __ Bind(&done); } void InstructionCodeGeneratorMIPS64::DivRemOneOrMinusOne(HBinaryOperation* instruction) { @@ -2019,8 +1969,7 @@ void InstructionCodeGeneratorMIPS64::DivRemByPowerOfTwo(HBinaryOperation* instru GpuRegister out = locations->Out().AsRegister<GpuRegister>(); GpuRegister dividend = locations->InAt(0).AsRegister<GpuRegister>(); int64_t imm = Int64FromConstant(second.GetConstant()); - uint64_t abs_imm = static_cast<uint64_t>(std::abs(imm)); - DCHECK(IsPowerOfTwo(abs_imm)); + uint64_t abs_imm = static_cast<uint64_t>(AbsOrMin(imm)); int ctz_imm = CTZ(abs_imm); if (instruction->IsDiv()) { @@ -2202,7 +2151,7 @@ void InstructionCodeGeneratorMIPS64::GenerateDivRemIntegral(HBinaryOperation* in // Do not generate anything. DivZeroCheck would prevent any code to be executed. } else if (imm == 1 || imm == -1) { DivRemOneOrMinusOne(instruction); - } else if (IsPowerOfTwo(std::abs(imm))) { + } else if (IsPowerOfTwo(AbsOrMin(imm))) { DivRemByPowerOfTwo(instruction); } else { DCHECK(imm <= -2 || imm >= 2); @@ -2375,6 +2324,329 @@ void InstructionCodeGeneratorMIPS64::VisitTryBoundary(HTryBoundary* try_boundary } } +void InstructionCodeGeneratorMIPS64::GenerateIntLongCompare(IfCondition cond, + bool is64bit, + LocationSummary* locations) { + GpuRegister dst = locations->Out().AsRegister<GpuRegister>(); + GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>(); + Location rhs_location = locations->InAt(1); + GpuRegister rhs_reg = ZERO; + int64_t rhs_imm = 0; + bool use_imm = rhs_location.IsConstant(); + if (use_imm) { + if (is64bit) { + rhs_imm = CodeGenerator::GetInt64ValueOf(rhs_location.GetConstant()); + } else { + rhs_imm = CodeGenerator::GetInt32ValueOf(rhs_location.GetConstant()); + } + } else { + rhs_reg = rhs_location.AsRegister<GpuRegister>(); + } + int64_t rhs_imm_plus_one = rhs_imm + UINT64_C(1); + + switch (cond) { + case kCondEQ: + case kCondNE: + if (use_imm && IsUint<16>(rhs_imm)) { + __ Xori(dst, lhs, rhs_imm); + } else { + if (use_imm) { + rhs_reg = TMP; + __ LoadConst64(rhs_reg, rhs_imm); + } + __ Xor(dst, lhs, rhs_reg); + } + if (cond == kCondEQ) { + __ Sltiu(dst, dst, 1); + } else { + __ Sltu(dst, ZERO, dst); + } + break; + + case kCondLT: + case kCondGE: + if (use_imm && IsInt<16>(rhs_imm)) { + __ Slti(dst, lhs, rhs_imm); + } else { + if (use_imm) { + rhs_reg = TMP; + __ LoadConst64(rhs_reg, rhs_imm); + } + __ Slt(dst, lhs, rhs_reg); + } + if (cond == kCondGE) { + // Simulate lhs >= rhs via !(lhs < rhs) since there's + // only the slt instruction but no sge. + __ Xori(dst, dst, 1); + } + break; + + case kCondLE: + case kCondGT: + if (use_imm && IsInt<16>(rhs_imm_plus_one)) { + // Simulate lhs <= rhs via lhs < rhs + 1. + __ Slti(dst, lhs, rhs_imm_plus_one); + if (cond == kCondGT) { + // Simulate lhs > rhs via !(lhs <= rhs) since there's + // only the slti instruction but no sgti. + __ Xori(dst, dst, 1); + } + } else { + if (use_imm) { + rhs_reg = TMP; + __ LoadConst64(rhs_reg, rhs_imm); + } + __ Slt(dst, rhs_reg, lhs); + if (cond == kCondLE) { + // Simulate lhs <= rhs via !(rhs < lhs) since there's + // only the slt instruction but no sle. + __ Xori(dst, dst, 1); + } + } + break; + + case kCondB: + case kCondAE: + if (use_imm && IsInt<16>(rhs_imm)) { + // Sltiu sign-extends its 16-bit immediate operand before + // the comparison and thus lets us compare directly with + // unsigned values in the ranges [0, 0x7fff] and + // [0x[ffffffff]ffff8000, 0x[ffffffff]ffffffff]. + __ Sltiu(dst, lhs, rhs_imm); + } else { + if (use_imm) { + rhs_reg = TMP; + __ LoadConst64(rhs_reg, rhs_imm); + } + __ Sltu(dst, lhs, rhs_reg); + } + if (cond == kCondAE) { + // Simulate lhs >= rhs via !(lhs < rhs) since there's + // only the sltu instruction but no sgeu. + __ Xori(dst, dst, 1); + } + break; + + case kCondBE: + case kCondA: + if (use_imm && (rhs_imm_plus_one != 0) && IsInt<16>(rhs_imm_plus_one)) { + // Simulate lhs <= rhs via lhs < rhs + 1. + // Note that this only works if rhs + 1 does not overflow + // to 0, hence the check above. + // Sltiu sign-extends its 16-bit immediate operand before + // the comparison and thus lets us compare directly with + // unsigned values in the ranges [0, 0x7fff] and + // [0x[ffffffff]ffff8000, 0x[ffffffff]ffffffff]. + __ Sltiu(dst, lhs, rhs_imm_plus_one); + if (cond == kCondA) { + // Simulate lhs > rhs via !(lhs <= rhs) since there's + // only the sltiu instruction but no sgtiu. + __ Xori(dst, dst, 1); + } + } else { + if (use_imm) { + rhs_reg = TMP; + __ LoadConst64(rhs_reg, rhs_imm); + } + __ Sltu(dst, rhs_reg, lhs); + if (cond == kCondBE) { + // Simulate lhs <= rhs via !(rhs < lhs) since there's + // only the sltu instruction but no sleu. + __ Xori(dst, dst, 1); + } + } + break; + } +} + +void InstructionCodeGeneratorMIPS64::GenerateIntLongCompareAndBranch(IfCondition cond, + bool is64bit, + LocationSummary* locations, + Mips64Label* label) { + GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>(); + Location rhs_location = locations->InAt(1); + GpuRegister rhs_reg = ZERO; + int64_t rhs_imm = 0; + bool use_imm = rhs_location.IsConstant(); + if (use_imm) { + if (is64bit) { + rhs_imm = CodeGenerator::GetInt64ValueOf(rhs_location.GetConstant()); + } else { + rhs_imm = CodeGenerator::GetInt32ValueOf(rhs_location.GetConstant()); + } + } else { + rhs_reg = rhs_location.AsRegister<GpuRegister>(); + } + + if (use_imm && rhs_imm == 0) { + switch (cond) { + case kCondEQ: + case kCondBE: // <= 0 if zero + __ Beqzc(lhs, label); + break; + case kCondNE: + case kCondA: // > 0 if non-zero + __ Bnezc(lhs, label); + break; + case kCondLT: + __ Bltzc(lhs, label); + break; + case kCondGE: + __ Bgezc(lhs, label); + break; + case kCondLE: + __ Blezc(lhs, label); + break; + case kCondGT: + __ Bgtzc(lhs, label); + break; + case kCondB: // always false + break; + case kCondAE: // always true + __ Bc(label); + break; + } + } else { + if (use_imm) { + rhs_reg = TMP; + __ LoadConst64(rhs_reg, rhs_imm); + } + switch (cond) { + case kCondEQ: + __ Beqc(lhs, rhs_reg, label); + break; + case kCondNE: + __ Bnec(lhs, rhs_reg, label); + break; + case kCondLT: + __ Bltc(lhs, rhs_reg, label); + break; + case kCondGE: + __ Bgec(lhs, rhs_reg, label); + break; + case kCondLE: + __ Bgec(rhs_reg, lhs, label); + break; + case kCondGT: + __ Bltc(rhs_reg, lhs, label); + break; + case kCondB: + __ Bltuc(lhs, rhs_reg, label); + break; + case kCondAE: + __ Bgeuc(lhs, rhs_reg, label); + break; + case kCondBE: + __ Bgeuc(rhs_reg, lhs, label); + break; + case kCondA: + __ Bltuc(rhs_reg, lhs, label); + break; + } + } +} + +void InstructionCodeGeneratorMIPS64::GenerateFpCompareAndBranch(IfCondition cond, + bool gt_bias, + Primitive::Type type, + LocationSummary* locations, + Mips64Label* label) { + FpuRegister lhs = locations->InAt(0).AsFpuRegister<FpuRegister>(); + FpuRegister rhs = locations->InAt(1).AsFpuRegister<FpuRegister>(); + if (type == Primitive::kPrimFloat) { + switch (cond) { + case kCondEQ: + __ CmpEqS(FTMP, lhs, rhs); + __ Bc1nez(FTMP, label); + break; + case kCondNE: + __ CmpEqS(FTMP, lhs, rhs); + __ Bc1eqz(FTMP, label); + break; + case kCondLT: + if (gt_bias) { + __ CmpLtS(FTMP, lhs, rhs); + } else { + __ CmpUltS(FTMP, lhs, rhs); + } + __ Bc1nez(FTMP, label); + break; + case kCondLE: + if (gt_bias) { + __ CmpLeS(FTMP, lhs, rhs); + } else { + __ CmpUleS(FTMP, lhs, rhs); + } + __ Bc1nez(FTMP, label); + break; + case kCondGT: + if (gt_bias) { + __ CmpUltS(FTMP, rhs, lhs); + } else { + __ CmpLtS(FTMP, rhs, lhs); + } + __ Bc1nez(FTMP, label); + break; + case kCondGE: + if (gt_bias) { + __ CmpUleS(FTMP, rhs, lhs); + } else { + __ CmpLeS(FTMP, rhs, lhs); + } + __ Bc1nez(FTMP, label); + break; + default: + LOG(FATAL) << "Unexpected non-floating-point condition"; + } + } else { + DCHECK_EQ(type, Primitive::kPrimDouble); + switch (cond) { + case kCondEQ: + __ CmpEqD(FTMP, lhs, rhs); + __ Bc1nez(FTMP, label); + break; + case kCondNE: + __ CmpEqD(FTMP, lhs, rhs); + __ Bc1eqz(FTMP, label); + break; + case kCondLT: + if (gt_bias) { + __ CmpLtD(FTMP, lhs, rhs); + } else { + __ CmpUltD(FTMP, lhs, rhs); + } + __ Bc1nez(FTMP, label); + break; + case kCondLE: + if (gt_bias) { + __ CmpLeD(FTMP, lhs, rhs); + } else { + __ CmpUleD(FTMP, lhs, rhs); + } + __ Bc1nez(FTMP, label); + break; + case kCondGT: + if (gt_bias) { + __ CmpUltD(FTMP, rhs, lhs); + } else { + __ CmpLtD(FTMP, rhs, lhs); + } + __ Bc1nez(FTMP, label); + break; + case kCondGE: + if (gt_bias) { + __ CmpUleD(FTMP, rhs, lhs); + } else { + __ CmpLeD(FTMP, rhs, lhs); + } + __ Bc1nez(FTMP, label); + break; + default: + LOG(FATAL) << "Unexpected non-floating-point condition"; + } + } +} + void InstructionCodeGeneratorMIPS64::GenerateTestAndBranch(HInstruction* instruction, size_t condition_input_index, Mips64Label* true_target, @@ -2420,97 +2692,27 @@ void InstructionCodeGeneratorMIPS64::GenerateTestAndBranch(HInstruction* instruc // The condition instruction has not been materialized, use its inputs as // the comparison and its condition as the branch condition. HCondition* condition = cond->AsCondition(); + Primitive::Type type = condition->InputAt(0)->GetType(); + LocationSummary* locations = cond->GetLocations(); + IfCondition if_cond = condition->GetCondition(); + Mips64Label* branch_target = true_target; - GpuRegister lhs = condition->GetLocations()->InAt(0).AsRegister<GpuRegister>(); - Location rhs_location = condition->GetLocations()->InAt(1); - GpuRegister rhs_reg = ZERO; - int32_t rhs_imm = 0; - bool use_imm = rhs_location.IsConstant(); - if (use_imm) { - rhs_imm = CodeGenerator::GetInt32ValueOf(rhs_location.GetConstant()); - } else { - rhs_reg = rhs_location.AsRegister<GpuRegister>(); - } - - IfCondition if_cond; - Mips64Label* non_fallthrough_target; if (true_target == nullptr) { if_cond = condition->GetOppositeCondition(); - non_fallthrough_target = false_target; - } else { - if_cond = condition->GetCondition(); - non_fallthrough_target = true_target; - } - - if (use_imm && rhs_imm == 0) { - switch (if_cond) { - case kCondEQ: - __ Beqzc(lhs, non_fallthrough_target); - break; - case kCondNE: - __ Bnezc(lhs, non_fallthrough_target); - break; - case kCondLT: - __ Bltzc(lhs, non_fallthrough_target); - break; - case kCondGE: - __ Bgezc(lhs, non_fallthrough_target); - break; - case kCondLE: - __ Blezc(lhs, non_fallthrough_target); - break; - case kCondGT: - __ Bgtzc(lhs, non_fallthrough_target); - break; - case kCondB: - break; // always false - case kCondBE: - __ Beqzc(lhs, non_fallthrough_target); // <= 0 if zero - break; - case kCondA: - __ Bnezc(lhs, non_fallthrough_target); // > 0 if non-zero - break; - case kCondAE: - __ Bc(non_fallthrough_target); // always true - break; - } - } else { - if (use_imm) { - rhs_reg = TMP; - __ LoadConst32(rhs_reg, rhs_imm); - } - switch (if_cond) { - case kCondEQ: - __ Beqc(lhs, rhs_reg, non_fallthrough_target); - break; - case kCondNE: - __ Bnec(lhs, rhs_reg, non_fallthrough_target); - break; - case kCondLT: - __ Bltc(lhs, rhs_reg, non_fallthrough_target); - break; - case kCondGE: - __ Bgec(lhs, rhs_reg, non_fallthrough_target); - break; - case kCondLE: - __ Bgec(rhs_reg, lhs, non_fallthrough_target); - break; - case kCondGT: - __ Bltc(rhs_reg, lhs, non_fallthrough_target); - break; - case kCondB: - __ Bltuc(lhs, rhs_reg, non_fallthrough_target); - break; - case kCondAE: - __ Bgeuc(lhs, rhs_reg, non_fallthrough_target); - break; - case kCondBE: - __ Bgeuc(rhs_reg, lhs, non_fallthrough_target); - break; - case kCondA: - __ Bltuc(rhs_reg, lhs, non_fallthrough_target); - break; - } + branch_target = false_target; + } + + switch (type) { + default: + GenerateIntLongCompareAndBranch(if_cond, /* is64bit */ false, locations, branch_target); + break; + case Primitive::kPrimLong: + GenerateIntLongCompareAndBranch(if_cond, /* is64bit */ true, locations, branch_target); + break; + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + GenerateFpCompareAndBranch(if_cond, condition->IsGtBias(), type, locations, branch_target); + break; } } @@ -2547,15 +2749,26 @@ void LocationsBuilderMIPS64::VisitDeoptimize(HDeoptimize* deoptimize) { } void InstructionCodeGeneratorMIPS64::VisitDeoptimize(HDeoptimize* deoptimize) { - SlowPathCodeMIPS64* slow_path = new (GetGraph()->GetArena()) - DeoptimizationSlowPathMIPS64(deoptimize); - codegen_->AddSlowPath(slow_path); + SlowPathCodeMIPS64* slow_path = + deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathMIPS64>(deoptimize); GenerateTestAndBranch(deoptimize, /* condition_input_index */ 0, slow_path->GetEntryLabel(), /* false_target */ nullptr); } +void LocationsBuilderMIPS64::VisitNativeDebugInfo(HNativeDebugInfo* info) { + new (GetGraph()->GetArena()) LocationSummary(info); +} + +void InstructionCodeGeneratorMIPS64::VisitNativeDebugInfo(HNativeDebugInfo* info) { + if (codegen_->HasStackMapAtCurrentPc()) { + // Ensure that we do not collide with the stack map of the previous instruction. + __ Nop(); + } + codegen_->RecordPcInfo(info, info->GetDexPc()); +} + void LocationsBuilderMIPS64::HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info ATTRIBUTE_UNUSED) { LocationSummary* locations = @@ -3525,14 +3738,12 @@ void InstructionCodeGeneratorMIPS64::VisitReturnVoid(HReturnVoid* ret ATTRIBUTE_ codegen_->GenerateFrameExit(); } -void LocationsBuilderMIPS64::VisitRor(HRor* ror ATTRIBUTE_UNUSED) { - LOG(FATAL) << "Unreachable"; - UNREACHABLE(); +void LocationsBuilderMIPS64::VisitRor(HRor* ror) { + HandleShift(ror); } -void InstructionCodeGeneratorMIPS64::VisitRor(HRor* ror ATTRIBUTE_UNUSED) { - LOG(FATAL) << "Unreachable"; - UNREACHABLE(); +void InstructionCodeGeneratorMIPS64::VisitRor(HRor* ror) { + HandleShift(ror); } void LocationsBuilderMIPS64::VisitShl(HShl* shl) { @@ -3721,36 +3932,18 @@ void LocationsBuilderMIPS64::VisitTypeConversion(HTypeConversion* conversion) { LOG(FATAL) << "Unexpected type conversion from " << input_type << " to " << result_type; } - LocationSummary::CallKind call_kind = LocationSummary::kNoCall; - if ((Primitive::IsFloatingPointType(result_type) && input_type == Primitive::kPrimLong) || - (Primitive::IsIntegralType(result_type) && Primitive::IsFloatingPointType(input_type))) { - call_kind = LocationSummary::kCall; - } - - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(conversion, call_kind); - - if (call_kind == LocationSummary::kNoCall) { - if (Primitive::IsFloatingPointType(input_type)) { - locations->SetInAt(0, Location::RequiresFpuRegister()); - } else { - locations->SetInAt(0, Location::RequiresRegister()); - } + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(conversion); - if (Primitive::IsFloatingPointType(result_type)) { - locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); - } else { - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); - } + if (Primitive::IsFloatingPointType(input_type)) { + locations->SetInAt(0, Location::RequiresFpuRegister()); } else { - InvokeRuntimeCallingConvention calling_convention; - - if (Primitive::IsFloatingPointType(input_type)) { - locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0))); - } else { - locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - } + locations->SetInAt(0, Location::RequiresRegister()); + } - locations->SetOut(calling_convention.GetReturnLocation(result_type)); + if (Primitive::IsFloatingPointType(result_type)) { + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); + } else { + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); } } @@ -3795,55 +3988,107 @@ void InstructionCodeGeneratorMIPS64::VisitTypeConversion(HTypeConversion* conver << " to " << result_type; } } else if (Primitive::IsFloatingPointType(result_type) && Primitive::IsIntegralType(input_type)) { - if (input_type != Primitive::kPrimLong) { - FpuRegister dst = locations->Out().AsFpuRegister<FpuRegister>(); - GpuRegister src = locations->InAt(0).AsRegister<GpuRegister>(); - __ Mtc1(src, FTMP); + FpuRegister dst = locations->Out().AsFpuRegister<FpuRegister>(); + GpuRegister src = locations->InAt(0).AsRegister<GpuRegister>(); + if (input_type == Primitive::kPrimLong) { + __ Dmtc1(src, FTMP); if (result_type == Primitive::kPrimFloat) { - __ Cvtsw(dst, FTMP); + __ Cvtsl(dst, FTMP); } else { - __ Cvtdw(dst, FTMP); + __ Cvtdl(dst, FTMP); } } else { - int32_t entry_offset = (result_type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pL2f) - : QUICK_ENTRY_POINT(pL2d); - codegen_->InvokeRuntime(entry_offset, - conversion, - conversion->GetDexPc(), - nullptr); + __ Mtc1(src, FTMP); if (result_type == Primitive::kPrimFloat) { - CheckEntrypointTypes<kQuickL2f, float, int64_t>(); + __ Cvtsw(dst, FTMP); } else { - CheckEntrypointTypes<kQuickL2d, double, int64_t>(); + __ Cvtdw(dst, FTMP); } } } else if (Primitive::IsIntegralType(result_type) && Primitive::IsFloatingPointType(input_type)) { CHECK(result_type == Primitive::kPrimInt || result_type == Primitive::kPrimLong); - int32_t entry_offset; - if (result_type != Primitive::kPrimLong) { - entry_offset = (input_type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pF2iz) - : QUICK_ENTRY_POINT(pD2iz); + GpuRegister dst = locations->Out().AsRegister<GpuRegister>(); + FpuRegister src = locations->InAt(0).AsFpuRegister<FpuRegister>(); + Mips64Label truncate; + Mips64Label done; + + // When NAN2008=0 (R2 and before), the truncate instruction produces the maximum positive + // value when the input is either a NaN or is outside of the range of the output type + // after the truncation. IOW, the three special cases (NaN, too small, too big) produce + // the same result. + // + // When NAN2008=1 (R6), the truncate instruction caps the output at the minimum/maximum + // value of the output type if the input is outside of the range after the truncation or + // produces 0 when the input is a NaN. IOW, the three special cases produce three distinct + // results. This matches the desired float/double-to-int/long conversion exactly. + // + // So, NAN2008 affects handling of negative values and NaNs by the truncate instruction. + // + // The following code supports both NAN2008=0 and NAN2008=1 behaviors of the truncate + // instruction, the reason being that the emulator implements NAN2008=0 on MIPS64R6, + // even though it must be NAN2008=1 on R6. + // + // The code takes care of the different behaviors by first comparing the input to the + // minimum output value (-2**-63 for truncating to long, -2**-31 for truncating to int). + // If the input is greater than or equal to the minimum, it procedes to the truncate + // instruction, which will handle such an input the same way irrespective of NAN2008. + // Otherwise the input is compared to itself to determine whether it is a NaN or not + // in order to return either zero or the minimum value. + // + // TODO: simplify this when the emulator correctly implements NAN2008=1 behavior of the + // truncate instruction for MIPS64R6. + if (input_type == Primitive::kPrimFloat) { + uint32_t min_val = (result_type == Primitive::kPrimLong) + ? bit_cast<uint32_t, float>(std::numeric_limits<int64_t>::min()) + : bit_cast<uint32_t, float>(std::numeric_limits<int32_t>::min()); + __ LoadConst32(TMP, min_val); + __ Mtc1(TMP, FTMP); + __ CmpLeS(FTMP, FTMP, src); } else { - entry_offset = (input_type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pF2l) - : QUICK_ENTRY_POINT(pD2l); + uint64_t min_val = (result_type == Primitive::kPrimLong) + ? bit_cast<uint64_t, double>(std::numeric_limits<int64_t>::min()) + : bit_cast<uint64_t, double>(std::numeric_limits<int32_t>::min()); + __ LoadConst64(TMP, min_val); + __ Dmtc1(TMP, FTMP); + __ CmpLeD(FTMP, FTMP, src); } - codegen_->InvokeRuntime(entry_offset, - conversion, - conversion->GetDexPc(), - nullptr); - if (result_type != Primitive::kPrimLong) { + + __ Bc1nez(FTMP, &truncate); + + if (input_type == Primitive::kPrimFloat) { + __ CmpEqS(FTMP, src, src); + } else { + __ CmpEqD(FTMP, src, src); + } + if (result_type == Primitive::kPrimLong) { + __ LoadConst64(dst, std::numeric_limits<int64_t>::min()); + } else { + __ LoadConst32(dst, std::numeric_limits<int32_t>::min()); + } + __ Mfc1(TMP, FTMP); + __ And(dst, dst, TMP); + + __ Bc(&done); + + __ Bind(&truncate); + + if (result_type == Primitive::kPrimLong) { if (input_type == Primitive::kPrimFloat) { - CheckEntrypointTypes<kQuickF2iz, int32_t, float>(); + __ TruncLS(FTMP, src); } else { - CheckEntrypointTypes<kQuickD2iz, int32_t, double>(); + __ TruncLD(FTMP, src); } + __ Dmfc1(dst, FTMP); } else { if (input_type == Primitive::kPrimFloat) { - CheckEntrypointTypes<kQuickF2l, int64_t, float>(); + __ TruncWS(FTMP, src); } else { - CheckEntrypointTypes<kQuickD2l, int64_t, double>(); + __ TruncWD(FTMP, src); } + __ Mfc1(dst, FTMP); } + + __ Bind(&done); } else if (Primitive::IsFloatingPointType(result_type) && Primitive::IsFloatingPointType(input_type)) { FpuRegister dst = locations->Out().AsFpuRegister<FpuRegister>(); @@ -3886,83 +4131,83 @@ void InstructionCodeGeneratorMIPS64::VisitBoundType(HBoundType* instruction ATTR } void LocationsBuilderMIPS64::VisitEqual(HEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void InstructionCodeGeneratorMIPS64::VisitEqual(HEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void LocationsBuilderMIPS64::VisitNotEqual(HNotEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void InstructionCodeGeneratorMIPS64::VisitNotEqual(HNotEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void LocationsBuilderMIPS64::VisitLessThan(HLessThan* comp) { - VisitCondition(comp); + HandleCondition(comp); } void InstructionCodeGeneratorMIPS64::VisitLessThan(HLessThan* comp) { - VisitCondition(comp); + HandleCondition(comp); } void LocationsBuilderMIPS64::VisitLessThanOrEqual(HLessThanOrEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void InstructionCodeGeneratorMIPS64::VisitLessThanOrEqual(HLessThanOrEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void LocationsBuilderMIPS64::VisitGreaterThan(HGreaterThan* comp) { - VisitCondition(comp); + HandleCondition(comp); } void InstructionCodeGeneratorMIPS64::VisitGreaterThan(HGreaterThan* comp) { - VisitCondition(comp); + HandleCondition(comp); } void LocationsBuilderMIPS64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void InstructionCodeGeneratorMIPS64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void LocationsBuilderMIPS64::VisitBelow(HBelow* comp) { - VisitCondition(comp); + HandleCondition(comp); } void InstructionCodeGeneratorMIPS64::VisitBelow(HBelow* comp) { - VisitCondition(comp); + HandleCondition(comp); } void LocationsBuilderMIPS64::VisitBelowOrEqual(HBelowOrEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void InstructionCodeGeneratorMIPS64::VisitBelowOrEqual(HBelowOrEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void LocationsBuilderMIPS64::VisitAbove(HAbove* comp) { - VisitCondition(comp); + HandleCondition(comp); } void InstructionCodeGeneratorMIPS64::VisitAbove(HAbove* comp) { - VisitCondition(comp); + HandleCondition(comp); } void LocationsBuilderMIPS64::VisitAboveOrEqual(HAboveOrEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void InstructionCodeGeneratorMIPS64::VisitAboveOrEqual(HAboveOrEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void LocationsBuilderMIPS64::VisitFakeString(HFakeString* instruction) { diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h index 85e3a4a3ce..7182e8e987 100644 --- a/compiler/optimizing/code_generator_mips64.h +++ b/compiler/optimizing/code_generator_mips64.h @@ -189,6 +189,7 @@ class LocationsBuilderMIPS64 : public HGraphVisitor { private: void HandleInvoke(HInvoke* invoke); void HandleBinaryOp(HBinaryOperation* operation); + void HandleCondition(HCondition* instruction); void HandleShift(HBinaryOperation* operation); void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info); void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); @@ -200,7 +201,7 @@ class LocationsBuilderMIPS64 : public HGraphVisitor { DISALLOW_COPY_AND_ASSIGN(LocationsBuilderMIPS64); }; -class InstructionCodeGeneratorMIPS64 : public HGraphVisitor { +class InstructionCodeGeneratorMIPS64 : public InstructionCodeGenerator { public: InstructionCodeGeneratorMIPS64(HGraph* graph, CodeGeneratorMIPS64* codegen); @@ -224,6 +225,7 @@ class InstructionCodeGeneratorMIPS64 : public HGraphVisitor { void GenerateMemoryBarrier(MemBarrierKind kind); void GenerateSuspendCheck(HSuspendCheck* check, HBasicBlock* successor); void HandleBinaryOp(HBinaryOperation* operation); + void HandleCondition(HCondition* instruction); void HandleShift(HBinaryOperation* operation); void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info); void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); @@ -237,6 +239,16 @@ class InstructionCodeGeneratorMIPS64 : public HGraphVisitor { void DivRemByPowerOfTwo(HBinaryOperation* instruction); void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction); void GenerateDivRemIntegral(HBinaryOperation* instruction); + void GenerateIntLongCompare(IfCondition cond, bool is64bit, LocationSummary* locations); + void GenerateIntLongCompareAndBranch(IfCondition cond, + bool is64bit, + LocationSummary* locations, + Mips64Label* label); + void GenerateFpCompareAndBranch(IfCondition cond, + bool gt_bias, + Primitive::Type type, + LocationSummary* locations, + Mips64Label* label); void HandleGoto(HInstruction* got, HBasicBlock* successor); Mips64Assembler* const assembler_; diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 469dd49a8e..c24d25876c 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -365,11 +365,10 @@ class TypeCheckSlowPathX86 : public SlowPathCode { class DeoptimizationSlowPathX86 : public SlowPathCode { public: - explicit DeoptimizationSlowPathX86(HInstruction* instruction) + explicit DeoptimizationSlowPathX86(HDeoptimize* instruction) : instruction_(instruction) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { - DCHECK(instruction_->IsDeoptimize()); CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, instruction_->GetLocations()); @@ -383,7 +382,7 @@ class DeoptimizationSlowPathX86 : public SlowPathCode { const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathX86"; } private: - HInstruction* const instruction_; + HDeoptimize* const instruction_; DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathX86); }; @@ -892,7 +891,7 @@ void CodeGeneratorX86::UpdateBlockedPairRegisters() const { } InstructionCodeGeneratorX86::InstructionCodeGeneratorX86(HGraph* graph, CodeGeneratorX86* codegen) - : HGraphVisitor(graph), + : InstructionCodeGenerator(graph, codegen), assembler_(codegen->GetAssembler()), codegen_(codegen) {} @@ -1335,9 +1334,10 @@ void LocationsBuilderX86::VisitExit(HExit* exit) { void InstructionCodeGeneratorX86::VisitExit(HExit* exit ATTRIBUTE_UNUSED) { } +template<class LabelType> void InstructionCodeGeneratorX86::GenerateFPJumps(HCondition* cond, - Label* true_label, - Label* false_label) { + LabelType* true_label, + LabelType* false_label) { if (cond->IsFPConditionTrueIfNaN()) { __ j(kUnordered, true_label); } else if (cond->IsFPConditionFalseIfNaN()) { @@ -1346,9 +1346,10 @@ void InstructionCodeGeneratorX86::GenerateFPJumps(HCondition* cond, __ j(X86UnsignedOrFPCondition(cond->GetCondition()), true_label); } +template<class LabelType> void InstructionCodeGeneratorX86::GenerateLongComparesAndJumps(HCondition* cond, - Label* true_label, - Label* false_label) { + LabelType* true_label, + LabelType* false_label) { LocationSummary* locations = cond->GetLocations(); Location left = locations->InAt(0); Location right = locations->InAt(1); @@ -1437,14 +1438,15 @@ void InstructionCodeGeneratorX86::GenerateLongComparesAndJumps(HCondition* cond, __ j(final_condition, true_label); } +template<class LabelType> void InstructionCodeGeneratorX86::GenerateCompareTestAndBranch(HCondition* condition, - Label* true_target_in, - Label* false_target_in) { + LabelType* true_target_in, + LabelType* false_target_in) { // Generated branching requires both targets to be explicit. If either of the // targets is nullptr (fallthrough) use and bind `fallthrough_target` instead. - Label fallthrough_target; - Label* true_target = true_target_in == nullptr ? &fallthrough_target : true_target_in; - Label* false_target = false_target_in == nullptr ? &fallthrough_target : false_target_in; + LabelType fallthrough_target; + LabelType* true_target = true_target_in == nullptr ? &fallthrough_target : true_target_in; + LabelType* false_target = false_target_in == nullptr ? &fallthrough_target : false_target_in; LocationSummary* locations = condition->GetLocations(); Location left = locations->InAt(0); @@ -1486,10 +1488,11 @@ static bool AreEflagsSetFrom(HInstruction* cond, HInstruction* branch) { !Primitive::IsFloatingPointType(cond->InputAt(0)->GetType()); } +template<class LabelType> void InstructionCodeGeneratorX86::GenerateTestAndBranch(HInstruction* instruction, size_t condition_input_index, - Label* true_target, - Label* false_target) { + LabelType* true_target, + LabelType* false_target) { HInstruction* cond = instruction->InputAt(condition_input_index); if (true_target == nullptr && false_target == nullptr) { @@ -1554,7 +1557,7 @@ void InstructionCodeGeneratorX86::GenerateTestAndBranch(HInstruction* instructio Location lhs = condition->GetLocations()->InAt(0); Location rhs = condition->GetLocations()->InAt(1); - // LHS is guaranteed to be in a register (see LocationsBuilderX86::VisitCondition). + // LHS is guaranteed to be in a register (see LocationsBuilderX86::HandleCondition). if (rhs.IsRegister()) { __ cmpl(lhs.AsRegister<Register>(), rhs.AsRegister<Register>()); } else if (rhs.IsConstant()) { @@ -1607,13 +1610,23 @@ void LocationsBuilderX86::VisitDeoptimize(HDeoptimize* deoptimize) { } void InstructionCodeGeneratorX86::VisitDeoptimize(HDeoptimize* deoptimize) { - SlowPathCode* slow_path = new (GetGraph()->GetArena()) - DeoptimizationSlowPathX86(deoptimize); - codegen_->AddSlowPath(slow_path); + SlowPathCode* slow_path = deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathX86>(deoptimize); GenerateTestAndBranch(deoptimize, /* condition_input_index */ 0, slow_path->GetEntryLabel(), - /* false_target */ nullptr); + /* false_target */ static_cast<Label*>(nullptr)); +} + +void LocationsBuilderX86::VisitNativeDebugInfo(HNativeDebugInfo* info) { + new (GetGraph()->GetArena()) LocationSummary(info); +} + +void InstructionCodeGeneratorX86::VisitNativeDebugInfo(HNativeDebugInfo* info) { + if (codegen_->HasStackMapAtCurrentPc()) { + // Ensure that we do not collide with the stack map of the previous instruction. + __ nop(); + } + codegen_->RecordPcInfo(info, info->GetDexPc()); } void LocationsBuilderX86::VisitLocal(HLocal* local) { @@ -1659,7 +1672,7 @@ void LocationsBuilderX86::VisitStoreLocal(HStoreLocal* store) { void InstructionCodeGeneratorX86::VisitStoreLocal(HStoreLocal* store ATTRIBUTE_UNUSED) { } -void LocationsBuilderX86::VisitCondition(HCondition* cond) { +void LocationsBuilderX86::HandleCondition(HCondition* cond) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cond, LocationSummary::kNoCall); // Handle the long/FP comparisons made in instruction simplification. @@ -1692,7 +1705,7 @@ void LocationsBuilderX86::VisitCondition(HCondition* cond) { } } -void InstructionCodeGeneratorX86::VisitCondition(HCondition* cond) { +void InstructionCodeGeneratorX86::HandleCondition(HCondition* cond) { if (!cond->NeedsMaterialization()) { return; } @@ -1701,7 +1714,7 @@ void InstructionCodeGeneratorX86::VisitCondition(HCondition* cond) { Location lhs = locations->InAt(0); Location rhs = locations->InAt(1); Register reg = locations->Out().AsRegister<Register>(); - Label true_label, false_label; + NearLabel true_label, false_label; switch (cond->InputAt(0)->GetType()) { default: { @@ -1753,83 +1766,83 @@ void InstructionCodeGeneratorX86::VisitCondition(HCondition* cond) { } void LocationsBuilderX86::VisitEqual(HEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void InstructionCodeGeneratorX86::VisitEqual(HEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void LocationsBuilderX86::VisitNotEqual(HNotEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void InstructionCodeGeneratorX86::VisitNotEqual(HNotEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void LocationsBuilderX86::VisitLessThan(HLessThan* comp) { - VisitCondition(comp); + HandleCondition(comp); } void InstructionCodeGeneratorX86::VisitLessThan(HLessThan* comp) { - VisitCondition(comp); + HandleCondition(comp); } void LocationsBuilderX86::VisitLessThanOrEqual(HLessThanOrEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void InstructionCodeGeneratorX86::VisitLessThanOrEqual(HLessThanOrEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void LocationsBuilderX86::VisitGreaterThan(HGreaterThan* comp) { - VisitCondition(comp); + HandleCondition(comp); } void InstructionCodeGeneratorX86::VisitGreaterThan(HGreaterThan* comp) { - VisitCondition(comp); + HandleCondition(comp); } void LocationsBuilderX86::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void InstructionCodeGeneratorX86::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void LocationsBuilderX86::VisitBelow(HBelow* comp) { - VisitCondition(comp); + HandleCondition(comp); } void InstructionCodeGeneratorX86::VisitBelow(HBelow* comp) { - VisitCondition(comp); + HandleCondition(comp); } void LocationsBuilderX86::VisitBelowOrEqual(HBelowOrEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void InstructionCodeGeneratorX86::VisitBelowOrEqual(HBelowOrEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void LocationsBuilderX86::VisitAbove(HAbove* comp) { - VisitCondition(comp); + HandleCondition(comp); } void InstructionCodeGeneratorX86::VisitAbove(HAbove* comp) { - VisitCondition(comp); + HandleCondition(comp); } void LocationsBuilderX86::VisitAboveOrEqual(HAboveOrEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void InstructionCodeGeneratorX86::VisitAboveOrEqual(HAboveOrEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void LocationsBuilderX86::VisitIntConstant(HIntConstant* constant) { @@ -3211,11 +3224,12 @@ void InstructionCodeGeneratorX86::DivByPowerOfTwo(HDiv* instruction) { Register out_register = locations->Out().AsRegister<Register>(); Register input_register = locations->InAt(0).AsRegister<Register>(); int32_t imm = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); + DCHECK(IsPowerOfTwo(AbsOrMin(imm))); + uint32_t abs_imm = static_cast<uint32_t>(AbsOrMin(imm)); - DCHECK(IsPowerOfTwo(std::abs(imm))); Register num = locations->GetTemp(0).AsRegister<Register>(); - __ leal(num, Address(input_register, std::abs(imm) - 1)); + __ leal(num, Address(input_register, abs_imm - 1)); __ testl(input_register, input_register); __ cmovl(kGreaterEqual, num, input_register); int shift = CTZ(imm); @@ -3328,7 +3342,7 @@ void InstructionCodeGeneratorX86::GenerateDivRemIntegral(HBinaryOperation* instr // Do not generate anything for 0. DivZeroCheck would forbid any generated code. } else if (imm == 1 || imm == -1) { DivRemOneOrMinusOne(instruction); - } else if (is_div && IsPowerOfTwo(std::abs(imm))) { + } else if (is_div && IsPowerOfTwo(AbsOrMin(imm))) { DivByPowerOfTwo(instruction->AsDiv()); } else { DCHECK(imm <= -2 || imm >= 2); @@ -4156,7 +4170,7 @@ void CodeGeneratorX86::GenerateMemoryBarrier(MemBarrierKind kind) { */ switch (kind) { case MemBarrierKind::kAnyAny: { - __ mfence(); + MemoryFence(); break; } case MemBarrierKind::kAnyStore: diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index 712179920b..c65c423eae 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -17,6 +17,7 @@ #ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_H_ #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_H_ +#include "arch/x86/instruction_set_features_x86.h" #include "code_generator.h" #include "dex/compiler_enums.h" #include "driver/compiler_options.h" @@ -166,6 +167,7 @@ class LocationsBuilderX86 : public HGraphVisitor { private: void HandleBitwiseOperation(HBinaryOperation* instruction); void HandleInvoke(HInvoke* invoke); + void HandleCondition(HCondition* condition); void HandleShift(HBinaryOperation* instruction); void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info); void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); @@ -176,7 +178,7 @@ class LocationsBuilderX86 : public HGraphVisitor { DISALLOW_COPY_AND_ASSIGN(LocationsBuilderX86); }; -class InstructionCodeGeneratorX86 : public HGraphVisitor { +class InstructionCodeGeneratorX86 : public InstructionCodeGenerator { public: InstructionCodeGeneratorX86(HGraph* graph, CodeGeneratorX86* codegen); @@ -212,6 +214,7 @@ class InstructionCodeGeneratorX86 : public HGraphVisitor { void DivByPowerOfTwo(HDiv* instruction); void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction); void GenerateRemFP(HRem* rem); + void HandleCondition(HCondition* condition); void HandleShift(HBinaryOperation* instruction); void GenerateShlLong(const Location& loc, Register shifter); void GenerateShrLong(const Location& loc, Register shifter); @@ -264,15 +267,22 @@ class InstructionCodeGeneratorX86 : public HGraphVisitor { void GenerateImplicitNullCheck(HNullCheck* instruction); void GenerateExplicitNullCheck(HNullCheck* instruction); + template<class LabelType> void GenerateTestAndBranch(HInstruction* instruction, size_t condition_input_index, - Label* true_target, - Label* false_target); + LabelType* true_target, + LabelType* false_target); + template<class LabelType> void GenerateCompareTestAndBranch(HCondition* condition, - Label* true_target, - Label* false_target); - void GenerateFPJumps(HCondition* cond, Label* true_label, Label* false_label); - void GenerateLongComparesAndJumps(HCondition* cond, Label* true_label, Label* false_label); + LabelType* true_target, + LabelType* false_target); + template<class LabelType> + void GenerateFPJumps(HCondition* cond, LabelType* true_label, LabelType* false_label); + template<class LabelType> + void GenerateLongComparesAndJumps(HCondition* cond, + LabelType* true_label, + LabelType* false_label); + void HandleGoto(HInstruction* got, HBasicBlock* successor); void GenPackedSwitchWithCompares(Register value_reg, int32_t lower_bound, @@ -506,6 +516,19 @@ class CodeGeneratorX86 : public CodeGenerator { // artReadBarrierForRootSlow. void GenerateReadBarrierForRootSlow(HInstruction* instruction, Location out, Location root); + // Ensure that prior stores complete to memory before subsequent loads. + // The locked add implementation will avoid serializing device memory, but will + // touch (but not change) the top of the stack. + // The 'non_temporal' parameter should be used to ensure ordering of non-temporal stores. + void MemoryFence(bool non_temporal = false) { + if (!non_temporal && isa_features_.PrefersLockedAddSynchronization()) { + assembler_.lock()->addl(Address(ESP, 0), Immediate(0)); + } else { + assembler_.mfence(); + } + } + + private: // Factored implementation of GenerateFieldLoadWithBakerReadBarrier // and GenerateArrayLoadWithBakerReadBarrier. diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 2c5fbc78bf..294b40e3d4 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -387,18 +387,16 @@ class TypeCheckSlowPathX86_64 : public SlowPathCode { class DeoptimizationSlowPathX86_64 : public SlowPathCode { public: - explicit DeoptimizationSlowPathX86_64(HInstruction* instruction) + explicit DeoptimizationSlowPathX86_64(HDeoptimize* instruction) : instruction_(instruction) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, instruction_->GetLocations()); - DCHECK(instruction_->IsDeoptimize()); - HDeoptimize* deoptimize = instruction_->AsDeoptimize(); x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize), - deoptimize, - deoptimize->GetDexPc(), + instruction_, + instruction_->GetDexPc(), this); CheckEntrypointTypes<kQuickDeoptimize, void, void>(); } @@ -406,7 +404,7 @@ class DeoptimizationSlowPathX86_64 : public SlowPathCode { const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathX86_64"; } private: - HInstruction* const instruction_; + HDeoptimize* const instruction_; DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathX86_64); }; @@ -786,7 +784,7 @@ void CodeGeneratorX86_64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invo switch (invoke->GetMethodLoadKind()) { case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: // temp = thread->string_init_entrypoint - __ gs()->movl(temp.AsRegister<CpuRegister>(), + __ gs()->movq(temp.AsRegister<CpuRegister>(), Address::Absolute(invoke->GetStringInitOffset(), /* no_rip */ true)); break; case HInvokeStaticOrDirect::MethodLoadKind::kRecursive: @@ -1000,7 +998,7 @@ CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph, InstructionCodeGeneratorX86_64::InstructionCodeGeneratorX86_64(HGraph* graph, CodeGeneratorX86_64* codegen) - : HGraphVisitor(graph), + : InstructionCodeGenerator(graph, codegen), assembler_(codegen->GetAssembler()), codegen_(codegen) {} @@ -1370,9 +1368,10 @@ void LocationsBuilderX86_64::VisitExit(HExit* exit) { void InstructionCodeGeneratorX86_64::VisitExit(HExit* exit ATTRIBUTE_UNUSED) { } +template<class LabelType> void InstructionCodeGeneratorX86_64::GenerateFPJumps(HCondition* cond, - Label* true_label, - Label* false_label) { + LabelType* true_label, + LabelType* false_label) { if (cond->IsFPConditionTrueIfNaN()) { __ j(kUnordered, true_label); } else if (cond->IsFPConditionFalseIfNaN()) { @@ -1381,14 +1380,15 @@ void InstructionCodeGeneratorX86_64::GenerateFPJumps(HCondition* cond, __ j(X86_64FPCondition(cond->GetCondition()), true_label); } +template<class LabelType> void InstructionCodeGeneratorX86_64::GenerateCompareTestAndBranch(HCondition* condition, - Label* true_target_in, - Label* false_target_in) { + LabelType* true_target_in, + LabelType* false_target_in) { // Generated branching requires both targets to be explicit. If either of the // targets is nullptr (fallthrough) use and bind `fallthrough_target` instead. - Label fallthrough_target; - Label* true_target = true_target_in == nullptr ? &fallthrough_target : true_target_in; - Label* false_target = false_target_in == nullptr ? &fallthrough_target : false_target_in; + LabelType fallthrough_target; + LabelType* true_target = true_target_in == nullptr ? &fallthrough_target : true_target_in; + LabelType* false_target = false_target_in == nullptr ? &fallthrough_target : false_target_in; LocationSummary* locations = condition->GetLocations(); Location left = locations->InAt(0); @@ -1470,10 +1470,11 @@ static bool AreEflagsSetFrom(HInstruction* cond, HInstruction* branch) { !Primitive::IsFloatingPointType(cond->InputAt(0)->GetType()); } +template<class LabelType> void InstructionCodeGeneratorX86_64::GenerateTestAndBranch(HInstruction* instruction, size_t condition_input_index, - Label* true_target, - Label* false_target) { + LabelType* true_target, + LabelType* false_target) { HInstruction* cond = instruction->InputAt(condition_input_index); if (true_target == nullptr && false_target == nullptr) { @@ -1591,13 +1592,23 @@ void LocationsBuilderX86_64::VisitDeoptimize(HDeoptimize* deoptimize) { } void InstructionCodeGeneratorX86_64::VisitDeoptimize(HDeoptimize* deoptimize) { - SlowPathCode* slow_path = new (GetGraph()->GetArena()) - DeoptimizationSlowPathX86_64(deoptimize); - codegen_->AddSlowPath(slow_path); + SlowPathCode* slow_path = deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathX86_64>(deoptimize); GenerateTestAndBranch(deoptimize, /* condition_input_index */ 0, slow_path->GetEntryLabel(), - /* false_target */ nullptr); + /* false_target */ static_cast<Label*>(nullptr)); +} + +void LocationsBuilderX86_64::VisitNativeDebugInfo(HNativeDebugInfo* info) { + new (GetGraph()->GetArena()) LocationSummary(info); +} + +void InstructionCodeGeneratorX86_64::VisitNativeDebugInfo(HNativeDebugInfo* info) { + if (codegen_->HasStackMapAtCurrentPc()) { + // Ensure that we do not collide with the stack map of the previous instruction. + __ nop(); + } + codegen_->RecordPcInfo(info, info->GetDexPc()); } void LocationsBuilderX86_64::VisitLocal(HLocal* local) { @@ -1643,7 +1654,7 @@ void LocationsBuilderX86_64::VisitStoreLocal(HStoreLocal* store) { void InstructionCodeGeneratorX86_64::VisitStoreLocal(HStoreLocal* store ATTRIBUTE_UNUSED) { } -void LocationsBuilderX86_64::VisitCondition(HCondition* cond) { +void LocationsBuilderX86_64::HandleCondition(HCondition* cond) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cond, LocationSummary::kNoCall); // Handle the long/FP comparisons made in instruction simplification. @@ -1667,7 +1678,7 @@ void LocationsBuilderX86_64::VisitCondition(HCondition* cond) { } } -void InstructionCodeGeneratorX86_64::VisitCondition(HCondition* cond) { +void InstructionCodeGeneratorX86_64::HandleCondition(HCondition* cond) { if (!cond->NeedsMaterialization()) { return; } @@ -1676,7 +1687,7 @@ void InstructionCodeGeneratorX86_64::VisitCondition(HCondition* cond) { Location lhs = locations->InAt(0); Location rhs = locations->InAt(1); CpuRegister reg = locations->Out().AsRegister<CpuRegister>(); - Label true_label, false_label; + NearLabel true_label, false_label; switch (cond->InputAt(0)->GetType()) { default: @@ -1765,83 +1776,83 @@ void InstructionCodeGeneratorX86_64::VisitCondition(HCondition* cond) { } void LocationsBuilderX86_64::VisitEqual(HEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void InstructionCodeGeneratorX86_64::VisitEqual(HEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void LocationsBuilderX86_64::VisitNotEqual(HNotEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void InstructionCodeGeneratorX86_64::VisitNotEqual(HNotEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void LocationsBuilderX86_64::VisitLessThan(HLessThan* comp) { - VisitCondition(comp); + HandleCondition(comp); } void InstructionCodeGeneratorX86_64::VisitLessThan(HLessThan* comp) { - VisitCondition(comp); + HandleCondition(comp); } void LocationsBuilderX86_64::VisitLessThanOrEqual(HLessThanOrEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void InstructionCodeGeneratorX86_64::VisitLessThanOrEqual(HLessThanOrEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void LocationsBuilderX86_64::VisitGreaterThan(HGreaterThan* comp) { - VisitCondition(comp); + HandleCondition(comp); } void InstructionCodeGeneratorX86_64::VisitGreaterThan(HGreaterThan* comp) { - VisitCondition(comp); + HandleCondition(comp); } void LocationsBuilderX86_64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void InstructionCodeGeneratorX86_64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void LocationsBuilderX86_64::VisitBelow(HBelow* comp) { - VisitCondition(comp); + HandleCondition(comp); } void InstructionCodeGeneratorX86_64::VisitBelow(HBelow* comp) { - VisitCondition(comp); + HandleCondition(comp); } void LocationsBuilderX86_64::VisitBelowOrEqual(HBelowOrEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void InstructionCodeGeneratorX86_64::VisitBelowOrEqual(HBelowOrEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void LocationsBuilderX86_64::VisitAbove(HAbove* comp) { - VisitCondition(comp); + HandleCondition(comp); } void InstructionCodeGeneratorX86_64::VisitAbove(HAbove* comp) { - VisitCondition(comp); + HandleCondition(comp); } void LocationsBuilderX86_64::VisitAboveOrEqual(HAboveOrEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void InstructionCodeGeneratorX86_64::VisitAboveOrEqual(HAboveOrEqual* comp) { - VisitCondition(comp); + HandleCondition(comp); } void LocationsBuilderX86_64::VisitCompare(HCompare* compare) { @@ -3339,13 +3350,13 @@ void InstructionCodeGeneratorX86_64::DivByPowerOfTwo(HDiv* instruction) { CpuRegister numerator = locations->InAt(0).AsRegister<CpuRegister>(); int64_t imm = Int64FromConstant(second.GetConstant()); - - DCHECK(IsPowerOfTwo(std::abs(imm))); + DCHECK(IsPowerOfTwo(AbsOrMin(imm))); + uint64_t abs_imm = AbsOrMin(imm); CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>(); if (instruction->GetResultType() == Primitive::kPrimInt) { - __ leal(tmp, Address(numerator, std::abs(imm) - 1)); + __ leal(tmp, Address(numerator, abs_imm - 1)); __ testl(numerator, numerator); __ cmov(kGreaterEqual, tmp, numerator); int shift = CTZ(imm); @@ -3360,7 +3371,7 @@ void InstructionCodeGeneratorX86_64::DivByPowerOfTwo(HDiv* instruction) { DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimLong); CpuRegister rdx = locations->GetTemp(0).AsRegister<CpuRegister>(); - codegen_->Load64BitValue(rdx, std::abs(imm) - 1); + codegen_->Load64BitValue(rdx, abs_imm - 1); __ addq(rdx, numerator); __ testq(numerator, numerator); __ cmov(kGreaterEqual, rdx, numerator); @@ -3518,7 +3529,7 @@ void InstructionCodeGeneratorX86_64::GenerateDivRemIntegral(HBinaryOperation* in // Do not generate anything. DivZeroCheck would prevent any code to be executed. } else if (imm == 1 || imm == -1) { DivRemOneOrMinusOne(instruction); - } else if (instruction->IsDiv() && IsPowerOfTwo(std::abs(imm))) { + } else if (instruction->IsDiv() && IsPowerOfTwo(AbsOrMin(imm))) { DivByPowerOfTwo(instruction->AsDiv()); } else { DCHECK(imm <= -2 || imm >= 2); @@ -4033,7 +4044,7 @@ void CodeGeneratorX86_64::GenerateMemoryBarrier(MemBarrierKind kind) { */ switch (kind) { case MemBarrierKind::kAnyAny: { - __ mfence(); + MemoryFence(); break; } case MemBarrierKind::kAnyStore: @@ -5739,7 +5750,7 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { is_type_check_slow_path_fatal); codegen_->AddSlowPath(type_check_slow_path); - Label done; + NearLabel done; // Avoid null check if we know obj is not null. if (instruction->MustDoNullCheck()) { __ testl(obj, obj); @@ -6377,7 +6388,7 @@ void InstructionCodeGeneratorX86_64::VisitPackedSwitch(HPackedSwitch* switch_ins if (index != num_entries) { // There are an odd number of entries. Handle the last one. DCHECK_EQ(index + 1, num_entries); - __ cmpl(value_reg_in, Immediate(lower_bound + index)); + __ cmpl(value_reg_in, Immediate(static_cast<int32_t>(lower_bound + index))); __ j(kEqual, codegen_->GetLabelOf(successors[index])); } diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index dda9ea22d9..505c9dcdad 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -17,6 +17,7 @@ #ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_64_H_ #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_64_H_ +#include "arch/x86_64/instruction_set_features_x86_64.h" #include "code_generator.h" #include "dex/compiler_enums.h" #include "driver/compiler_options.h" @@ -171,6 +172,7 @@ class LocationsBuilderX86_64 : public HGraphVisitor { private: void HandleInvoke(HInvoke* invoke); void HandleBitwiseOperation(HBinaryOperation* operation); + void HandleCondition(HCondition* condition); void HandleShift(HBinaryOperation* operation); void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info); void HandleFieldGet(HInstruction* instruction); @@ -181,7 +183,7 @@ class LocationsBuilderX86_64 : public HGraphVisitor { DISALLOW_COPY_AND_ASSIGN(LocationsBuilderX86_64); }; -class InstructionCodeGeneratorX86_64 : public HGraphVisitor { +class InstructionCodeGeneratorX86_64 : public InstructionCodeGenerator { public: InstructionCodeGeneratorX86_64(HGraph* graph, CodeGeneratorX86_64* codegen); @@ -212,6 +214,7 @@ class InstructionCodeGeneratorX86_64 : public HGraphVisitor { void DivByPowerOfTwo(HDiv* instruction); void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction); void GenerateDivRemIntegral(HBinaryOperation* instruction); + void HandleCondition(HCondition* condition); void HandleShift(HBinaryOperation* operation); void HandleFieldSet(HInstruction* instruction, @@ -255,14 +258,18 @@ class InstructionCodeGeneratorX86_64 : public HGraphVisitor { void GenerateExplicitNullCheck(HNullCheck* instruction); void PushOntoFPStack(Location source, uint32_t temp_offset, uint32_t stack_adjustment, bool is_float); + template<class LabelType> void GenerateTestAndBranch(HInstruction* instruction, size_t condition_input_index, - Label* true_target, - Label* false_target); + LabelType* true_target, + LabelType* false_target); + template<class LabelType> void GenerateCompareTestAndBranch(HCondition* condition, - Label* true_target, - Label* false_target); - void GenerateFPJumps(HCondition* cond, Label* true_label, Label* false_label); + LabelType* true_target, + LabelType* false_target); + template<class LabelType> + void GenerateFPJumps(HCondition* cond, LabelType* true_label, LabelType* false_label); + void HandleGoto(HInstruction* got, HBasicBlock* successor); X86_64Assembler* const assembler_; @@ -479,6 +486,18 @@ class CodeGeneratorX86_64 : public CodeGenerator { int64_t v, HInstruction* instruction); + // Ensure that prior stores complete to memory before subsequent loads. + // The locked add implementation will avoid serializing device memory, but will + // touch (but not change) the top of the stack. The locked add should not be used for + // ordering non-temporal stores. + void MemoryFence(bool force_mfence = false) { + if (!force_mfence && isa_features_.PrefersLockedAddSynchronization()) { + assembler_.lock()->addl(Address(CpuRegister(RSP), 0), Immediate(0)); + } else { + assembler_.mfence(); + } + } + private: // Factored implementation of GenerateFieldLoadWithBakerReadBarrier // and GenerateArrayLoadWithBakerReadBarrier. diff --git a/compiler/optimizing/constant_folding_test.cc b/compiler/optimizing/constant_folding_test.cc index e469c8d6d0..a8f65bf516 100644 --- a/compiler/optimizing/constant_folding_test.cc +++ b/compiler/optimizing/constant_folding_test.cc @@ -32,7 +32,7 @@ namespace art { /** * Fixture class for the constant folding and dce tests. */ -class ConstantFoldingTest : public testing::Test { +class ConstantFoldingTest : public CommonCompilerTest { public: ConstantFoldingTest() : pool_(), allocator_(&pool_) { graph_ = CreateGraph(&allocator_); @@ -56,7 +56,7 @@ class ConstantFoldingTest : public testing::Test { const std::string& expected_after_dce, std::function<void(HGraph*)> check_after_cf) { ASSERT_NE(graph_, nullptr); - graph_->TryBuildingSsa(); + TransformToSsa(graph_); StringPrettyPrinter printer_before(graph_); printer_before.VisitInsertionOrder(); diff --git a/compiler/optimizing/dead_code_elimination.cc b/compiler/optimizing/dead_code_elimination.cc index 02e5dab3d4..67ff87a759 100644 --- a/compiler/optimizing/dead_code_elimination.cc +++ b/compiler/optimizing/dead_code_elimination.cc @@ -165,6 +165,7 @@ void HDeadCodeElimination::RemoveDeadInstructions() { if (!inst->HasSideEffects() && !inst->CanThrow() && !inst->IsSuspendCheck() + && !inst->IsNativeDebugInfo() // If we added an explicit barrier then we should keep it. && !inst->IsMemoryBarrier() && !inst->IsParameterValue() diff --git a/compiler/optimizing/dead_code_elimination_test.cc b/compiler/optimizing/dead_code_elimination_test.cc index 2c6a1ef63d..f0f98efadb 100644 --- a/compiler/optimizing/dead_code_elimination_test.cc +++ b/compiler/optimizing/dead_code_elimination_test.cc @@ -26,6 +26,8 @@ namespace art { +class DeadCodeEliminationTest : public CommonCompilerTest {}; + static void TestCode(const uint16_t* data, const std::string& expected_before, const std::string& expected_after) { @@ -34,7 +36,7 @@ static void TestCode(const uint16_t* data, HGraph* graph = CreateCFG(&allocator, data); ASSERT_NE(graph, nullptr); - graph->TryBuildingSsa(); + TransformToSsa(graph); StringPrettyPrinter printer_before(graph); printer_before.VisitInsertionOrder(); @@ -55,7 +57,6 @@ static void TestCode(const uint16_t* data, ASSERT_EQ(actual_after, expected_after); } - /** * Small three-register program. * @@ -69,7 +70,7 @@ static void TestCode(const uint16_t* data, * L1: v2 <- v0 + v1 5. add-int v2, v0, v1 * return-void 7. return */ -TEST(DeadCodeElimination, AdditionAndConditionalJump) { +TEST_F(DeadCodeEliminationTest, AdditionAndConditionalJump) { const uint16_t data[] = THREE_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 1 << 8 | 1 << 12, Instruction::CONST_4 | 0 << 8 | 0 << 12, @@ -131,7 +132,7 @@ TEST(DeadCodeElimination, AdditionAndConditionalJump) { * L3: v2 <- v1 + 4 11. add-int/lit16 v2, v1, #+4 * return 13. return-void */ -TEST(DeadCodeElimination, AdditionsAndInconditionalJumps) { +TEST_F(DeadCodeEliminationTest, AdditionsAndInconditionalJumps) { const uint16_t data[] = THREE_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 0 << 8 | 0 << 12, Instruction::CONST_4 | 1 << 8 | 1 << 12, diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc index dfc363f9fd..6d0bdbe19b 100644 --- a/compiler/optimizing/graph_checker.cc +++ b/compiler/optimizing/graph_checker.cc @@ -24,6 +24,7 @@ #include "base/arena_containers.h" #include "base/bit_vector-inl.h" #include "base/stringprintf.h" +#include "handle_scope-inl.h" namespace art { @@ -594,6 +595,17 @@ void SSAChecker::VisitInstruction(HInstruction* instruction) { } } } + + // Ensure that reference type instructions have reference type info. + if (instruction->GetType() == Primitive::kPrimNot) { + ScopedObjectAccess soa(Thread::Current()); + if (!instruction->GetReferenceTypeInfo().IsValid()) { + AddError(StringPrintf("Reference type instruction %s:%d does not have " + "valid reference type information.", + instruction->DebugName(), + instruction->GetId())); + } + } } static Primitive::Type PrimitiveKind(Primitive::Type type) { @@ -751,6 +763,14 @@ void SSAChecker::VisitPhi(HPhi* phi) { phi->GetId(), phi->GetRegNumber(), type_str.str().c_str())); + } else if (phi->GetType() == Primitive::kPrimNot) { + std::stringstream type_str; + type_str << other_phi->GetType(); + AddError(StringPrintf( + "Equivalent non-reference phi (%d) found for VReg %d with type: %s.", + phi->GetId(), + phi->GetRegNumber(), + type_str.str().c_str())); } else { ArenaBitVector visited(GetGraph()->GetArena(), 0, /* expandable */ true); if (!IsConstantEquivalent(phi, other_phi, &visited)) { @@ -901,4 +921,16 @@ void SSAChecker::VisitConstant(HConstant* instruction) { } } +void SSAChecker::VisitBoundType(HBoundType* instruction) { + VisitInstruction(instruction); + + ScopedObjectAccess soa(Thread::Current()); + if (!instruction->GetUpperBound().IsValid()) { + AddError(StringPrintf( + "%s %d does not have a valid upper bound RTI.", + instruction->DebugName(), + instruction->GetId())); + } +} + } // namespace art diff --git a/compiler/optimizing/graph_checker.h b/compiler/optimizing/graph_checker.h index d5ddbabc8c..2e16bfe245 100644 --- a/compiler/optimizing/graph_checker.h +++ b/compiler/optimizing/graph_checker.h @@ -128,6 +128,7 @@ class SSAChecker : public GraphChecker { void VisitPackedSwitch(HPackedSwitch* instruction) OVERRIDE; void VisitBooleanNot(HBooleanNot* instruction) OVERRIDE; void VisitConstant(HConstant* instruction) OVERRIDE; + void VisitBoundType(HBoundType* instruction) OVERRIDE; void HandleBooleanInput(HInstruction* instruction, size_t input_index); diff --git a/compiler/optimizing/graph_checker_test.cc b/compiler/optimizing/graph_checker_test.cc index fee56c7f9e..d10df4ce3f 100644 --- a/compiler/optimizing/graph_checker_test.cc +++ b/compiler/optimizing/graph_checker_test.cc @@ -17,8 +17,6 @@ #include "graph_checker.h" #include "optimizing_unit_test.h" -#include "gtest/gtest.h" - namespace art { /** @@ -43,7 +41,6 @@ HGraph* CreateSimpleCFG(ArenaAllocator* allocator) { return graph; } - static void TestCode(const uint16_t* data) { ArenaPool pool; ArenaAllocator allocator(&pool); @@ -61,8 +58,7 @@ static void TestCodeSSA(const uint16_t* data) { HGraph* graph = CreateCFG(&allocator, data); ASSERT_NE(graph, nullptr); - graph->BuildDominatorTree(); - graph->TransformToSsa(); + TransformToSsa(graph); SSAChecker ssa_checker(graph); ssa_checker.Run(); @@ -145,7 +141,9 @@ TEST(GraphChecker, BlockEndingWithNonBranchInstruction) { ASSERT_FALSE(graph_checker.IsValid()); } -TEST(SSAChecker, SSAPhi) { +class SSACheckerTest : public CommonCompilerTest {}; + +TEST_F(SSACheckerTest, SSAPhi) { // This code creates one Phi function during the conversion to SSA form. const uint16_t data[] = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc index e9fdb84d1e..5f1328f545 100644 --- a/compiler/optimizing/graph_visualizer.cc +++ b/compiler/optimizing/graph_visualizer.cc @@ -30,6 +30,7 @@ #include "optimization.h" #include "reference_type_propagation.h" #include "register_allocator.h" +#include "ssa_builder.h" #include "ssa_liveness_analysis.h" #include "utils/assembler.h" @@ -505,7 +506,7 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { } else { StartAttributeStream("loop") << "B" << info->GetHeader()->GetBlockId(); } - } else if ((IsPass(ReferenceTypePropagation::kReferenceTypePropagationPassName) + } else if ((IsPass(SsaBuilder::kSsaBuilderPassName) || IsPass(HInliner::kInlinerPassName)) && (instruction->GetType() == Primitive::kPrimNot)) { ReferenceTypeInfo info = instruction->IsLoadClass() @@ -519,21 +520,15 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { StartAttributeStream("exact") << std::boolalpha << info.IsExact() << std::noboolalpha; } else if (instruction->IsLoadClass()) { StartAttributeStream("klass") << "unresolved"; - } else if (instruction->IsNullConstant()) { + } else { // The NullConstant may be added to the graph during other passes that happen between // ReferenceTypePropagation and Inliner (e.g. InstructionSimplifier). If the inliner // doesn't run or doesn't inline anything, the NullConstant remains untyped. // So we should check NullConstants for validity only after reference type propagation. - // - // Note: The infrastructure to properly type NullConstants everywhere is to complex to add - // for the benefits. - StartAttributeStream("klass") << "not_set"; - DCHECK(!is_after_pass_ - || !IsPass(ReferenceTypePropagation::kReferenceTypePropagationPassName)) - << " Expected a valid rti after reference type propagation"; - } else { - DCHECK(!is_after_pass_) - << "Expected a valid rti after reference type propagation"; + DCHECK(graph_in_bad_state_ || + (!is_after_pass_ && IsPass(SsaBuilder::kSsaBuilderPassName))) + << instruction->DebugName() << instruction->GetId() << " has invalid rti " + << (is_after_pass_ ? "after" : "before") << " pass " << pass_name_; } } if (disasm_info_ != nullptr) { diff --git a/compiler/optimizing/gvn_test.cc b/compiler/optimizing/gvn_test.cc index de60cf21aa..1f4eaf3cfd 100644 --- a/compiler/optimizing/gvn_test.cc +++ b/compiler/optimizing/gvn_test.cc @@ -21,14 +21,14 @@ #include "optimizing_unit_test.h" #include "side_effects_analysis.h" -#include "gtest/gtest.h" - namespace art { -TEST(GVNTest, LocalFieldElimination) { +class GVNTest : public CommonCompilerTest {}; + +TEST_F(GVNTest, LocalFieldElimination) { ArenaPool pool; ArenaAllocator allocator(&pool); - NullHandle<mirror::DexCache> dex_cache; + ScopedNullHandle<mirror::DexCache> dex_cache; HGraph* graph = CreateGraph(&allocator); HBasicBlock* entry = new (&allocator) HBasicBlock(graph); @@ -100,7 +100,7 @@ TEST(GVNTest, LocalFieldElimination) { ASSERT_EQ(different_offset->GetBlock(), block); ASSERT_EQ(use_after_kill->GetBlock(), block); - graph->TryBuildingSsa(); + TransformToSsa(graph); SideEffectsAnalysis side_effects(graph); side_effects.Run(); GVNOptimization(graph, side_effects).Run(); @@ -110,10 +110,10 @@ TEST(GVNTest, LocalFieldElimination) { ASSERT_EQ(use_after_kill->GetBlock(), block); } -TEST(GVNTest, GlobalFieldElimination) { +TEST_F(GVNTest, GlobalFieldElimination) { ArenaPool pool; ArenaAllocator allocator(&pool); - NullHandle<mirror::DexCache> dex_cache; + ScopedNullHandle<mirror::DexCache> dex_cache; HGraph* graph = CreateGraph(&allocator); HBasicBlock* entry = new (&allocator) HBasicBlock(graph); @@ -182,7 +182,7 @@ TEST(GVNTest, GlobalFieldElimination) { 0)); join->AddInstruction(new (&allocator) HExit()); - graph->TryBuildingSsa(); + TransformToSsa(graph); SideEffectsAnalysis side_effects(graph); side_effects.Run(); GVNOptimization(graph, side_effects).Run(); @@ -193,10 +193,10 @@ TEST(GVNTest, GlobalFieldElimination) { ASSERT_TRUE(join->GetFirstInstruction()->IsExit()); } -TEST(GVNTest, LoopFieldElimination) { +TEST_F(GVNTest, LoopFieldElimination) { ArenaPool pool; ArenaAllocator allocator(&pool); - NullHandle<mirror::DexCache> dex_cache; + ScopedNullHandle<mirror::DexCache> dex_cache; HGraph* graph = CreateGraph(&allocator); HBasicBlock* entry = new (&allocator) HBasicBlock(graph); @@ -288,7 +288,7 @@ TEST(GVNTest, LoopFieldElimination) { ASSERT_EQ(field_get_in_loop_body->GetBlock(), loop_body); ASSERT_EQ(field_get_in_exit->GetBlock(), exit); - graph->TryBuildingSsa(); + TransformToSsa(graph); { SideEffectsAnalysis side_effects(graph); side_effects.Run(); @@ -316,10 +316,10 @@ TEST(GVNTest, LoopFieldElimination) { } // Test that inner loops affect the side effects of the outer loop. -TEST(GVNTest, LoopSideEffects) { +TEST_F(GVNTest, LoopSideEffects) { ArenaPool pool; ArenaAllocator allocator(&pool); - NullHandle<mirror::DexCache> dex_cache; + ScopedNullHandle<mirror::DexCache> dex_cache; static const SideEffects kCanTriggerGC = SideEffects::CanTriggerGC(); @@ -364,7 +364,7 @@ TEST(GVNTest, LoopSideEffects) { inner_loop_exit->AddInstruction(new (&allocator) HGoto()); outer_loop_exit->AddInstruction(new (&allocator) HExit()); - graph->TryBuildingSsa(); + TransformToSsa(graph); ASSERT_TRUE(inner_loop_header->GetLoopInformation()->IsIn( *outer_loop_header->GetLoopInformation())); diff --git a/compiler/optimizing/induction_var_analysis.cc b/compiler/optimizing/induction_var_analysis.cc index 19e6cbd314..eef6cef5f0 100644 --- a/compiler/optimizing/induction_var_analysis.cc +++ b/compiler/optimizing/induction_var_analysis.cc @@ -706,7 +706,6 @@ HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::LookupInfo(HLoopInf } } if (loop->IsDefinedOutOfTheLoop(instruction)) { - DCHECK(instruction->GetBlock()->Dominates(loop->GetPreHeader())); InductionInfo* info = CreateInvariantFetch(instruction); AssignInfo(loop, instruction, info); return info; diff --git a/compiler/optimizing/induction_var_analysis_test.cc b/compiler/optimizing/induction_var_analysis_test.cc index 5de94f43c9..29a1845658 100644 --- a/compiler/optimizing/induction_var_analysis_test.cc +++ b/compiler/optimizing/induction_var_analysis_test.cc @@ -18,7 +18,6 @@ #include "base/arena_allocator.h" #include "builder.h" -#include "gtest/gtest.h" #include "induction_var_analysis.h" #include "nodes.h" #include "optimizing_unit_test.h" @@ -28,7 +27,7 @@ namespace art { /** * Fixture class for the InductionVarAnalysis tests. */ -class InductionVarAnalysisTest : public testing::Test { +class InductionVarAnalysisTest : public CommonCompilerTest { public: InductionVarAnalysisTest() : pool_(), allocator_(&pool_) { graph_ = CreateGraph(&allocator_); @@ -86,6 +85,7 @@ class InductionVarAnalysisTest : public testing::Test { constant0_ = graph_->GetIntConstant(0); constant1_ = graph_->GetIntConstant(1); constant100_ = graph_->GetIntConstant(100); + float_constant0_ = graph_->GetFloatConstant(0.0f); induc_ = new (&allocator_) HLocal(n); entry_->AddInstruction(induc_); entry_->AddInstruction(new (&allocator_) HStoreLocal(induc_, constant0_)); @@ -102,6 +102,7 @@ class InductionVarAnalysisTest : public testing::Test { basic_[d] = new (&allocator_) HLocal(d); entry_->AddInstruction(basic_[d]); loop_preheader_[d]->AddInstruction(new (&allocator_) HStoreLocal(basic_[d], constant0_)); + loop_preheader_[d]->AddInstruction(new (&allocator_) HGoto()); HInstruction* load = new (&allocator_) HLoadLocal(basic_[d], Primitive::kPrimInt); loop_header_[d]->AddInstruction(load); HInstruction* compare = new (&allocator_) HLessThan(load, constant100_); @@ -156,8 +157,10 @@ class InductionVarAnalysisTest : public testing::Test { HInstruction* InsertArrayStore(HLocal* subscript, int d) { HInstruction* load = InsertInstruction( new (&allocator_) HLoadLocal(subscript, Primitive::kPrimInt), d); + // ArraySet is given a float value in order to avoid SsaBuilder typing + // it from the array's non-existent reference type info. return InsertInstruction(new (&allocator_) HArraySet( - parameter_, load, constant0_, Primitive::kPrimInt, 0), d); + parameter_, load, float_constant0_, Primitive::kPrimFloat, 0), d); } // Returns induction information of instruction in loop at depth d. @@ -168,7 +171,7 @@ class InductionVarAnalysisTest : public testing::Test { // Performs InductionVarAnalysis (after proper set up). void PerformInductionVarAnalysis() { - ASSERT_TRUE(graph_->TryBuildingSsa()); + TransformToSsa(graph_); iva_ = new (&allocator_) HInductionVarAnalysis(graph_); iva_->Run(); } @@ -187,6 +190,7 @@ class InductionVarAnalysisTest : public testing::Test { HInstruction* constant0_; HInstruction* constant1_; HInstruction* constant100_; + HInstruction* float_constant0_; HLocal* induc_; // "vreg_n", the "k" HLocal* tmp_; // "vreg_n+1" HLocal* dum_; // "vreg_n+2" @@ -212,7 +216,7 @@ TEST_F(InductionVarAnalysisTest, ProperLoopSetup) { // .. // } BuildLoopNest(10); - ASSERT_TRUE(graph_->TryBuildingSsa()); + TransformToSsa(graph_); ASSERT_EQ(entry_->GetLoopInformation(), nullptr); for (int d = 0; d < 1; d++) { ASSERT_EQ(loop_preheader_[d]->GetLoopInformation(), diff --git a/compiler/optimizing/induction_var_range_test.cc b/compiler/optimizing/induction_var_range_test.cc index 5c0bdd7c4c..eda9c01a01 100644 --- a/compiler/optimizing/induction_var_range_test.cc +++ b/compiler/optimizing/induction_var_range_test.cc @@ -16,7 +16,6 @@ #include "base/arena_allocator.h" #include "builder.h" -#include "gtest/gtest.h" #include "induction_var_analysis.h" #include "induction_var_range.h" #include "nodes.h" @@ -29,7 +28,7 @@ using Value = InductionVarRange::Value; /** * Fixture class for the InductionVarRange tests. */ -class InductionVarRangeTest : public testing::Test { +class InductionVarRangeTest : public CommonCompilerTest { public: InductionVarRangeTest() : pool_(), @@ -113,7 +112,7 @@ class InductionVarRangeTest : public testing::Test { /** Constructs SSA and performs induction variable analysis. */ void PerformInductionVarAnalysis() { - ASSERT_TRUE(graph_->TryBuildingSsa()); + TransformToSsa(graph_); iva_->Run(); } diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc index a4dcb3aeba..48d32999b7 100644 --- a/compiler/optimizing/inliner.cc +++ b/compiler/optimizing/inliner.cc @@ -33,6 +33,7 @@ #include "reference_type_propagation.h" #include "register_allocator.h" #include "sharpening.h" +#include "ssa_builder.h" #include "ssa_phi_elimination.h" #include "scoped_thread_state_change.h" #include "thread.h" @@ -41,7 +42,14 @@ namespace art { -static constexpr size_t kMaximumNumberOfHInstructions = 12; +static constexpr size_t kMaximumNumberOfHInstructions = 32; + +// Limit the number of dex registers that we accumulate while inlining +// to avoid creating large amount of nested environments. +static constexpr size_t kMaximumNumberOfCumulatedDexRegisters = 64; + +// Avoid inlining within a huge method due to memory pressure. +static constexpr size_t kMaximumCodeUnitSize = 4096; void HInliner::Run() { const CompilerOptions& compiler_options = compiler_driver_->GetCompilerOptions(); @@ -49,6 +57,9 @@ void HInliner::Run() { || (compiler_options.GetInlineMaxCodeUnits() == 0)) { return; } + if (caller_compilation_unit_.GetCodeItem()->insns_size_in_code_units_ > kMaximumCodeUnitSize) { + return; + } if (graph_->IsDebuggable()) { // For simplicity, we currently never inline when the graph is debuggable. This avoids // doing some logic in the runtime to discover if a method could have been inlined. @@ -215,6 +226,7 @@ bool HInliner::TryInline(HInvoke* invoke_instruction) { ClassLinker* class_linker = caller_compilation_unit_.GetClassLinker(); // We can query the dex cache directly. The verifier has populated it already. ArtMethod* resolved_method; + ArtMethod* actual_method = nullptr; if (invoke_instruction->IsInvokeStaticOrDirect()) { if (invoke_instruction->AsInvokeStaticOrDirect()->IsStringInit()) { VLOG(compiler) << "Not inlining a String.<init> method"; @@ -226,9 +238,15 @@ bool HInliner::TryInline(HInvoke* invoke_instruction) { : class_linker->FindDexCache(soa.Self(), *ref.dex_file); resolved_method = dex_cache->GetResolvedMethod( ref.dex_method_index, class_linker->GetImagePointerSize()); + // actual_method == resolved_method for direct or static calls. + actual_method = resolved_method; } else { resolved_method = caller_compilation_unit_.GetDexCache().Get()->GetResolvedMethod( method_index, class_linker->GetImagePointerSize()); + if (resolved_method != nullptr) { + // Check if we can statically find the method. + actual_method = FindVirtualOrInterfaceTarget(invoke_instruction, resolved_method); + } } if (resolved_method == nullptr) { @@ -238,15 +256,10 @@ bool HInliner::TryInline(HInvoke* invoke_instruction) { return false; } - if (invoke_instruction->IsInvokeStaticOrDirect()) { - return TryInline(invoke_instruction, resolved_method); - } - - // Check if we can statically find the method. - ArtMethod* actual_method = FindVirtualOrInterfaceTarget(invoke_instruction, resolved_method); if (actual_method != nullptr) { return TryInline(invoke_instruction, actual_method); } + DCHECK(!invoke_instruction->IsInvokeStaticOrDirect()); // Check if we can use an inline cache. ArtMethod* caller = graph_->GetArtMethod(); @@ -372,6 +385,18 @@ bool HInliner::TryInlinePolymorphicCall(HInvoke* invoke_instruction ATTRIBUTE_UN bool HInliner::TryInline(HInvoke* invoke_instruction, ArtMethod* method, bool do_rtp) { const DexFile& caller_dex_file = *caller_compilation_unit_.GetDexFile(); + + // Check whether we're allowed to inline. The outermost compilation unit is the relevant + // dex file here (though the transitivity of an inline chain would allow checking the calller). + if (!compiler_driver_->MayInline(method->GetDexFile(), + outer_compilation_unit_.GetDexFile())) { + VLOG(compiler) << "Won't inline " << PrettyMethod(method) << " in " + << outer_compilation_unit_.GetDexFile()->GetLocation() << " (" + << caller_compilation_unit_.GetDexFile()->GetLocation() << ") from " + << method->GetDexFile()->GetLocation(); + return false; + } + uint32_t method_index = FindMethodIndexIn( method, caller_dex_file, invoke_instruction->GetDexMethodIndex()); if (method_index == DexFile::kDexNoIndex) { @@ -514,7 +539,7 @@ bool HInliner::TryBuildAndInline(ArtMethod* resolved_method, return false; } - if (!callee_graph->TryBuildingSsa()) { + if (callee_graph->TryBuildingSsa(handles_) != kBuildSsaSuccess) { VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file) << " could not be transformed to SSA"; return false; @@ -549,14 +574,12 @@ bool HInliner::TryBuildAndInline(ArtMethod* resolved_method, // Run simple optimizations on the graph. HDeadCodeElimination dce(callee_graph, stats_); HConstantFolding fold(callee_graph); - ReferenceTypePropagation type_propagation(callee_graph, handles_); HSharpening sharpening(callee_graph, codegen_, dex_compilation_unit, compiler_driver_); InstructionSimplifier simplify(callee_graph, stats_); IntrinsicsRecognizer intrinsics(callee_graph, compiler_driver_); HOptimization* optimizations[] = { &intrinsics, - &type_propagation, &sharpening, &simplify, &fold, @@ -578,6 +601,7 @@ bool HInliner::TryBuildAndInline(ArtMethod* resolved_method, compiler_driver_, handles_, stats_, + total_number_of_dex_registers_ + code_item->registers_size_, depth_ + 1); inliner.Run(); number_of_instructions_budget += inliner.number_of_inlined_instructions_; @@ -609,6 +633,10 @@ bool HInliner::TryBuildAndInline(ArtMethod* resolved_method, HReversePostOrderIterator it(*callee_graph); it.Advance(); // Past the entry block, it does not contain instructions that prevent inlining. size_t number_of_instructions = 0; + + bool can_inline_environment = + total_number_of_dex_registers_ < kMaximumNumberOfCumulatedDexRegisters; + for (; !it.Done(); it.Advance()) { HBasicBlock* block = it.Current(); if (block->IsLoopHeader()) { @@ -622,10 +650,17 @@ bool HInliner::TryBuildAndInline(ArtMethod* resolved_method, instr_it.Advance()) { if (number_of_instructions++ == number_of_instructions_budget) { VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file) - << " could not be inlined because it is too big."; + << " is not inlined because its caller has reached" + << " its instruction budget limit."; return false; } HInstruction* current = instr_it.Current(); + if (!can_inline_environment && current->NeedsEnvironment()) { + VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file) + << " is not inlined because its caller has reached" + << " its environment budget limit."; + return false; + } if (current->IsInvokeInterface()) { // Disable inlining of interface calls. The cost in case of entering the @@ -677,42 +712,36 @@ bool HInliner::TryBuildAndInline(ArtMethod* resolved_method, DCHECK_EQ(graph_, return_replacement->GetBlock()->GetGraph()); } - // When merging the graph we might create a new NullConstant in the caller graph which does - // not have the chance to be typed. We assign the correct type here so that we can keep the - // assertion that every reference has a valid type. This also simplifies checks along the way. - HNullConstant* null_constant = graph_->GetNullConstant(); - if (!null_constant->GetReferenceTypeInfo().IsValid()) { - ReferenceTypeInfo::TypeHandle obj_handle = - handles_->NewHandle(class_linker->GetClassRoot(ClassLinker::kJavaLangObject)); - null_constant->SetReferenceTypeInfo( - ReferenceTypeInfo::Create(obj_handle, false /* is_exact */)); - } - // Check the integrity of reference types and run another type propagation if needed. - if ((return_replacement != nullptr) - && (return_replacement->GetType() == Primitive::kPrimNot)) { - if (!return_replacement->GetReferenceTypeInfo().IsValid()) { - // Make sure that we have a valid type for the return. We may get an invalid one when - // we inline invokes with multiple branches and create a Phi for the result. - // TODO: we could be more precise by merging the phi inputs but that requires - // some functionality from the reference type propagation. - DCHECK(return_replacement->IsPhi()); - size_t pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize(); - ReferenceTypeInfo::TypeHandle return_handle = - handles_->NewHandle(resolved_method->GetReturnType(true /* resolve */, pointer_size)); - return_replacement->SetReferenceTypeInfo(ReferenceTypeInfo::Create( - return_handle, return_handle->CannotBeAssignedFromOtherTypes() /* is_exact */)); - } + if (return_replacement != nullptr) { + if (return_replacement->GetType() == Primitive::kPrimNot) { + if (!return_replacement->GetReferenceTypeInfo().IsValid()) { + // Make sure that we have a valid type for the return. We may get an invalid one when + // we inline invokes with multiple branches and create a Phi for the result. + // TODO: we could be more precise by merging the phi inputs but that requires + // some functionality from the reference type propagation. + DCHECK(return_replacement->IsPhi()); + size_t pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize(); + ReferenceTypeInfo::TypeHandle return_handle = + handles_->NewHandle(resolved_method->GetReturnType(true /* resolve */, pointer_size)); + return_replacement->SetReferenceTypeInfo(ReferenceTypeInfo::Create( + return_handle, return_handle->CannotBeAssignedFromOtherTypes() /* is_exact */)); + } - if (do_rtp) { - // If the return type is a refinement of the declared type run the type propagation again. - ReferenceTypeInfo return_rti = return_replacement->GetReferenceTypeInfo(); - ReferenceTypeInfo invoke_rti = invoke_instruction->GetReferenceTypeInfo(); - if (invoke_rti.IsStrictSupertypeOf(return_rti) - || (return_rti.IsExact() && !invoke_rti.IsExact()) - || !return_replacement->CanBeNull()) { - ReferenceTypePropagation rtp_fixup(graph_, handles_); - rtp_fixup.Run(); + if (do_rtp) { + // If the return type is a refinement of the declared type run the type propagation again. + ReferenceTypeInfo return_rti = return_replacement->GetReferenceTypeInfo(); + ReferenceTypeInfo invoke_rti = invoke_instruction->GetReferenceTypeInfo(); + if (invoke_rti.IsStrictSupertypeOf(return_rti) + || (return_rti.IsExact() && !invoke_rti.IsExact()) + || !return_replacement->CanBeNull()) { + ReferenceTypePropagation(graph_, handles_).Run(); + } + } + } else if (return_replacement->IsInstanceOf()) { + if (do_rtp) { + // Inlining InstanceOf into an If may put a tighter bound on reference types. + ReferenceTypePropagation(graph_, handles_).Run(); } } } diff --git a/compiler/optimizing/inliner.h b/compiler/optimizing/inliner.h index 7b9fb73ccf..8de510ea37 100644 --- a/compiler/optimizing/inliner.h +++ b/compiler/optimizing/inliner.h @@ -40,13 +40,15 @@ class HInliner : public HOptimization { CompilerDriver* compiler_driver, StackHandleScopeCollection* handles, OptimizingCompilerStats* stats, - size_t depth = 0) + size_t total_number_of_dex_registers, + size_t depth) : HOptimization(outer_graph, kInlinerPassName, stats), outermost_graph_(outermost_graph), outer_compilation_unit_(outer_compilation_unit), caller_compilation_unit_(caller_compilation_unit), codegen_(codegen), compiler_driver_(compiler_driver), + total_number_of_dex_registers_(total_number_of_dex_registers), depth_(depth), number_of_inlined_instructions_(0), handles_(handles) {} @@ -88,6 +90,7 @@ class HInliner : public HOptimization { const DexCompilationUnit& caller_compilation_unit_; CodeGenerator* const codegen_; CompilerDriver* const compiler_driver_; + const size_t total_number_of_dex_registers_; const size_t depth_; size_t number_of_inlined_instructions_; StackHandleScopeCollection* const handles_; diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc index 67097deaeb..b90afb1d73 100644 --- a/compiler/optimizing/instruction_simplifier.cc +++ b/compiler/optimizing/instruction_simplifier.cc @@ -211,19 +211,6 @@ bool InstructionSimplifierVisitor::ReplaceRotateWithRor(HBinaryOperation* op, // Try to replace a binary operation flanked by one UShr and one Shl with a bitfield rotation. bool InstructionSimplifierVisitor::TryReplaceWithRotate(HBinaryOperation* op) { - // This simplification is currently supported on x86, x86_64, ARM and ARM64. - // TODO: Implement it for MIPS/64. - const InstructionSet instruction_set = GetGraph()->GetInstructionSet(); - switch (instruction_set) { - case kArm: - case kArm64: - case kThumb2: - case kX86: - case kX86_64: - break; - default: - return false; - } DCHECK(op->IsAdd() || op->IsXor() || op->IsOr()); HInstruction* left = op->GetLeft(); HInstruction* right = op->GetRight(); @@ -777,13 +764,6 @@ void InstructionSimplifierVisitor::VisitLessThanOrEqual(HLessThanOrEqual* condit void InstructionSimplifierVisitor::VisitCondition(HCondition* condition) { // Try to fold an HCompare into this HCondition. - // This simplification is currently supported on x86, x86_64, ARM and ARM64. - // TODO: Implement it for MIPS64. - InstructionSet instruction_set = GetGraph()->GetInstructionSet(); - if (instruction_set == kMips64) { - return; - } - HInstruction* left = condition->GetLeft(); HInstruction* right = condition->GetRight(); // We can only replace an HCondition which compares a Compare to 0. @@ -1268,19 +1248,6 @@ void InstructionSimplifierVisitor::SimplifyStringEquals(HInvoke* instruction) { void InstructionSimplifierVisitor::SimplifyRotate(HInvoke* invoke, bool is_left) { DCHECK(invoke->IsInvokeStaticOrDirect()); DCHECK_EQ(invoke->GetOriginalInvokeType(), InvokeType::kStatic); - // This simplification is currently supported on x86, x86_64, ARM and ARM64. - // TODO: Implement it for MIPS/64. - const InstructionSet instruction_set = GetGraph()->GetInstructionSet(); - switch (instruction_set) { - case kArm: - case kArm64: - case kThumb2: - case kX86: - case kX86_64: - break; - default: - return; - } HInstruction* value = invoke->InputAt(0); HInstruction* distance = invoke->InputAt(1); // Replace the invoke with an HRor. diff --git a/compiler/optimizing/instruction_simplifier_arm64.cc b/compiler/optimizing/instruction_simplifier_arm64.cc index 6a34b13320..6bbc751bee 100644 --- a/compiler/optimizing/instruction_simplifier_arm64.cc +++ b/compiler/optimizing/instruction_simplifier_arm64.cc @@ -49,6 +49,7 @@ void InstructionSimplifierArm64Visitor::TryExtractArrayAccessAddress(HInstructio GetGraph()->GetIntConstant(mirror::Array::DataOffset(access_size).Uint32Value()); HArm64IntermediateAddress* address = new (arena) HArm64IntermediateAddress(array, offset, kNoDexPc); + address->SetReferenceTypeInfo(array->GetReferenceTypeInfo()); access->GetBlock()->InsertInstructionBefore(address, access); access->ReplaceInput(address, 0); // Both instructions must depend on GC to prevent any instruction that can diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc index 7127215c51..c6da9a3f5e 100644 --- a/compiler/optimizing/intrinsics.cc +++ b/compiler/optimizing/intrinsics.cc @@ -36,8 +36,8 @@ static inline InvokeType GetIntrinsicInvokeType(Intrinsics i) { switch (i) { case Intrinsics::kNone: return kInterface; // Non-sensical for intrinsic. -#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache) \ - case Intrinsics::k ## Name: \ +#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \ + case Intrinsics::k ## Name: \ return IsStatic; #include "intrinsics_list.h" INTRINSICS_LIST(OPTIMIZING_INTRINSICS) @@ -52,8 +52,8 @@ static inline IntrinsicNeedsEnvironmentOrCache NeedsEnvironmentOrCache(Intrinsic switch (i) { case Intrinsics::kNone: return kNeedsEnvironmentOrCache; // Non-sensical for intrinsic. -#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache) \ - case Intrinsics::k ## Name: \ +#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \ + case Intrinsics::k ## Name: \ return NeedsEnvironmentOrCache; #include "intrinsics_list.h" INTRINSICS_LIST(OPTIMIZING_INTRINSICS) @@ -63,6 +63,38 @@ INTRINSICS_LIST(OPTIMIZING_INTRINSICS) return kNeedsEnvironmentOrCache; } +// Function that returns whether an intrinsic has side effects. +static inline IntrinsicSideEffects GetSideEffects(Intrinsics i) { + switch (i) { + case Intrinsics::kNone: + return kAllSideEffects; +#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \ + case Intrinsics::k ## Name: \ + return SideEffects; +#include "intrinsics_list.h" +INTRINSICS_LIST(OPTIMIZING_INTRINSICS) +#undef INTRINSICS_LIST +#undef OPTIMIZING_INTRINSICS + } + return kAllSideEffects; +} + +// Function that returns whether an intrinsic can throw exceptions. +static inline IntrinsicExceptions GetExceptions(Intrinsics i) { + switch (i) { + case Intrinsics::kNone: + return kCanThrow; +#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \ + case Intrinsics::k ## Name: \ + return Exceptions; +#include "intrinsics_list.h" +INTRINSICS_LIST(OPTIMIZING_INTRINSICS) +#undef INTRINSICS_LIST +#undef OPTIMIZING_INTRINSICS + } + return kCanThrow; +} + static Primitive::Type GetType(uint64_t data, bool is_op_size) { if (is_op_size) { switch (static_cast<OpSize>(data)) { @@ -248,7 +280,7 @@ static Intrinsics GetIntrinsic(InlineMethod method) { // Thread.currentThread. case kIntrinsicCurrentThread: - return Intrinsics::kThreadCurrentThread; + return Intrinsics::kThreadCurrentThread; // Memory.peek. case kIntrinsicPeek: @@ -473,7 +505,10 @@ void IntrinsicsRecognizer::Run() { << PrettyMethod(invoke->GetDexMethodIndex(), invoke->GetDexFile()) << invoke->DebugName(); } else { - invoke->SetIntrinsic(intrinsic, NeedsEnvironmentOrCache(intrinsic)); + invoke->SetIntrinsic(intrinsic, + NeedsEnvironmentOrCache(intrinsic), + GetSideEffects(intrinsic), + GetExceptions(intrinsic)); } } } @@ -487,7 +522,7 @@ std::ostream& operator<<(std::ostream& os, const Intrinsics& intrinsic) { case Intrinsics::kNone: os << "None"; break; -#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache) \ +#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \ case Intrinsics::k ## Name: \ os << # Name; \ break; diff --git a/compiler/optimizing/intrinsics.h b/compiler/optimizing/intrinsics.h index e459516e59..9f50d1814e 100644 --- a/compiler/optimizing/intrinsics.h +++ b/compiler/optimizing/intrinsics.h @@ -27,6 +27,9 @@ namespace art { class CompilerDriver; class DexFile; +// Temporary measure until we have caught up with the Java 7 definition of Math.round. b/26327751 +static constexpr bool kRoundIsPlusPointFive = false; + // Recognize intrinsics from HInvoke nodes. class IntrinsicsRecognizer : public HOptimization { public: @@ -54,9 +57,9 @@ class IntrinsicVisitor : public ValueObject { switch (invoke->GetIntrinsic()) { case Intrinsics::kNone: return; -#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironment) \ - case Intrinsics::k ## Name: \ - Visit ## Name(invoke); \ +#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironment, SideEffects, Exceptions) \ + case Intrinsics::k ## Name: \ + Visit ## Name(invoke); \ return; #include "intrinsics_list.h" INTRINSICS_LIST(OPTIMIZING_INTRINSICS) @@ -69,7 +72,7 @@ INTRINSICS_LIST(OPTIMIZING_INTRINSICS) // Define visitor methods. -#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironment) \ +#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironment, SideEffects, Exceptions) \ virtual void Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \ } #include "intrinsics_list.h" diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc index e8181bbb06..b1fbf28204 100644 --- a/compiler/optimizing/intrinsics_arm.cc +++ b/compiler/optimizing/intrinsics_arm.cc @@ -502,9 +502,6 @@ static void GenUnsafeGet(HInvoke* invoke, bool is_volatile, CodeGeneratorARM* codegen) { LocationSummary* locations = invoke->GetLocations(); - DCHECK((type == Primitive::kPrimInt) || - (type == Primitive::kPrimLong) || - (type == Primitive::kPrimNot)); ArmAssembler* assembler = codegen->GetAssembler(); Location base_loc = locations->InAt(1); Register base = base_loc.AsRegister<Register>(); // Object pointer. @@ -512,30 +509,67 @@ static void GenUnsafeGet(HInvoke* invoke, Register offset = offset_loc.AsRegisterPairLow<Register>(); // Long offset, lo part only. Location trg_loc = locations->Out(); - if (type == Primitive::kPrimLong) { - Register trg_lo = trg_loc.AsRegisterPairLow<Register>(); - __ add(IP, base, ShifterOperand(offset)); - if (is_volatile && !codegen->GetInstructionSetFeatures().HasAtomicLdrdAndStrd()) { - Register trg_hi = trg_loc.AsRegisterPairHigh<Register>(); - __ ldrexd(trg_lo, trg_hi, IP); - } else { - __ ldrd(trg_lo, Address(IP)); + switch (type) { + case Primitive::kPrimInt: { + Register trg = trg_loc.AsRegister<Register>(); + __ ldr(trg, Address(base, offset)); + if (is_volatile) { + __ dmb(ISH); + } + break; } - } else { - Register trg = trg_loc.AsRegister<Register>(); - __ ldr(trg, Address(base, offset)); - } - if (is_volatile) { - __ dmb(ISH); - } + case Primitive::kPrimNot: { + Register trg = trg_loc.AsRegister<Register>(); + if (kEmitCompilerReadBarrier) { + if (kUseBakerReadBarrier) { + Location temp = locations->GetTemp(0); + codegen->GenerateArrayLoadWithBakerReadBarrier( + invoke, trg_loc, base, 0U, offset_loc, temp, /* needs_null_check */ false); + if (is_volatile) { + __ dmb(ISH); + } + } else { + __ ldr(trg, Address(base, offset)); + if (is_volatile) { + __ dmb(ISH); + } + codegen->GenerateReadBarrierSlow(invoke, trg_loc, trg_loc, base_loc, 0U, offset_loc); + } + } else { + __ ldr(trg, Address(base, offset)); + if (is_volatile) { + __ dmb(ISH); + } + __ MaybeUnpoisonHeapReference(trg); + } + break; + } - if (type == Primitive::kPrimNot) { - codegen->MaybeGenerateReadBarrier(invoke, trg_loc, trg_loc, base_loc, 0U, offset_loc); + case Primitive::kPrimLong: { + Register trg_lo = trg_loc.AsRegisterPairLow<Register>(); + __ add(IP, base, ShifterOperand(offset)); + if (is_volatile && !codegen->GetInstructionSetFeatures().HasAtomicLdrdAndStrd()) { + Register trg_hi = trg_loc.AsRegisterPairHigh<Register>(); + __ ldrexd(trg_lo, trg_hi, IP); + } else { + __ ldrd(trg_lo, Address(IP)); + } + if (is_volatile) { + __ dmb(ISH); + } + break; + } + + default: + LOG(FATAL) << "Unexpected type " << type; + UNREACHABLE(); } } -static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { +static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, + HInvoke* invoke, + Primitive::Type type) { bool can_call = kEmitCompilerReadBarrier && (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject || invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile); @@ -548,25 +582,30 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke locations->SetInAt(1, Location::RequiresRegister()); locations->SetInAt(2, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // We need a temporary register for the read barrier marking slow + // path in InstructionCodeGeneratorARM::GenerateArrayLoadWithBakerReadBarrier. + locations->AddTemp(Location::RequiresRegister()); + } } void IntrinsicLocationsBuilderARM::VisitUnsafeGet(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke); + CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt); } void IntrinsicLocationsBuilderARM::VisitUnsafeGetVolatile(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke); + CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt); } void IntrinsicLocationsBuilderARM::VisitUnsafeGetLong(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke); + CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong); } void IntrinsicLocationsBuilderARM::VisitUnsafeGetLongVolatile(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke); + CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong); } void IntrinsicLocationsBuilderARM::VisitUnsafeGetObject(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke); + CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot); } void IntrinsicLocationsBuilderARM::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke); + CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot); } void IntrinsicCodeGeneratorARM::VisitUnsafeGet(HInvoke* invoke) { @@ -808,6 +847,9 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat } // Prevent reordering with prior memory operations. + // Emit a DMB ISH instruction instead of an DMB ISHST one, as the + // latter allows a preceding load to be delayed past the STXR + // instruction below. __ dmb(ISH); __ add(tmp_ptr, base, ShifterOperand(offset)); @@ -825,8 +867,15 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat Label loop_head; __ Bind(&loop_head); + // TODO: When `type == Primitive::kPrimNot`, add a read barrier for + // the reference stored in the object before attempting the CAS, + // similar to the one in the art::Unsafe_compareAndSwapObject JNI + // implementation. + // + // Note that this code is not (yet) used when read barriers are + // enabled (see IntrinsicLocationsBuilderARM::VisitUnsafeCASObject). + DCHECK(!(type == Primitive::kPrimNot && kEmitCompilerReadBarrier)); __ ldrex(tmp_lo, tmp_ptr); - // TODO: Do we need a read barrier here when `type == Primitive::kPrimNot`? __ subs(tmp_lo, tmp_lo, ShifterOperand(expected_lo)); @@ -852,15 +901,17 @@ void IntrinsicLocationsBuilderARM::VisitUnsafeCASInt(HInvoke* invoke) { CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke); } void IntrinsicLocationsBuilderARM::VisitUnsafeCASObject(HInvoke* invoke) { - // The UnsafeCASObject intrinsic does not always work when heap - // poisoning is enabled (it breaks run-test 004-UnsafeTest); turn it - // off temporarily as a quick fix. + // The UnsafeCASObject intrinsic is missing a read barrier, and + // therefore sometimes does not work as expected (b/25883050). + // Turn it off temporarily as a quick fix, until the read barrier is + // implemented (see TODO in GenCAS below). // - // TODO(rpl): Fix it and turn it back on. + // Also, the UnsafeCASObject intrinsic does not always work when heap + // poisoning is enabled (it breaks run-test 004-UnsafeTest); turn it + // off temporarily as a quick fix (b/26204023). // - // TODO(rpl): Also, we should investigate whether we need a read - // barrier in the generated code. - if (kPoisonHeapReferences) { + // TODO(rpl): Fix these two issues and re-enable this intrinsic. + if (kEmitCompilerReadBarrier || kPoisonHeapReferences) { return; } diff --git a/compiler/optimizing/intrinsics_arm.h b/compiler/optimizing/intrinsics_arm.h index 127e9a4aa0..e01b6fffb8 100644 --- a/compiler/optimizing/intrinsics_arm.h +++ b/compiler/optimizing/intrinsics_arm.h @@ -40,7 +40,7 @@ class IntrinsicLocationsBuilderARM FINAL : public IntrinsicVisitor { // Define visitor methods. -#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache) \ +#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \ void Visit ## Name(HInvoke* invoke) OVERRIDE; #include "intrinsics_list.h" INTRINSICS_LIST(OPTIMIZING_INTRINSICS) @@ -67,7 +67,7 @@ class IntrinsicCodeGeneratorARM FINAL : public IntrinsicVisitor { // Define visitor methods. -#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache) \ +#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \ void Visit ## Name(HInvoke* invoke) OVERRIDE; #include "intrinsics_list.h" INTRINSICS_LIST(OPTIMIZING_INTRINSICS) diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index 6b34daadf0..81cab86c83 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -614,7 +614,10 @@ static void GenMathRound(LocationSummary* locations, } void IntrinsicLocationsBuilderARM64::VisitMathRoundDouble(HInvoke* invoke) { - CreateFPToIntPlusTempLocations(arena_, invoke); + // See intrinsics.h. + if (kRoundIsPlusPointFive) { + CreateFPToIntPlusTempLocations(arena_, invoke); + } } void IntrinsicCodeGeneratorARM64::VisitMathRoundDouble(HInvoke* invoke) { @@ -622,7 +625,10 @@ void IntrinsicCodeGeneratorARM64::VisitMathRoundDouble(HInvoke* invoke) { } void IntrinsicLocationsBuilderARM64::VisitMathRoundFloat(HInvoke* invoke) { - CreateFPToIntPlusTempLocations(arena_, invoke); + // See intrinsics.h. + if (kRoundIsPlusPointFive) { + CreateFPToIntPlusTempLocations(arena_, invoke); + } } void IntrinsicCodeGeneratorARM64::VisitMathRoundFloat(HInvoke* invoke) { @@ -1029,12 +1035,21 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat __ Stlxr(tmp_32, value, MemOperand(tmp_ptr)); __ Cbnz(tmp_32, &loop_head); } else { - __ Dmb(InnerShareable, BarrierWrites); + // Emit a `Dmb(InnerShareable, BarrierAll)` (DMB ISH) instruction + // instead of a `Dmb(InnerShareable, BarrierWrites)` (DMB ISHST) + // one, as the latter allows a preceding load to be delayed past + // the STXR instruction below. + __ Dmb(InnerShareable, BarrierAll); __ Bind(&loop_head); - __ Ldxr(tmp_value, MemOperand(tmp_ptr)); - // TODO: Do we need a read barrier here when `type == Primitive::kPrimNot`? + // TODO: When `type == Primitive::kPrimNot`, add a read barrier for + // the reference stored in the object before attempting the CAS, + // similar to the one in the art::Unsafe_compareAndSwapObject JNI + // implementation. + // // Note that this code is not (yet) used when read barriers are // enabled (see IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject). + DCHECK(!(type == Primitive::kPrimNot && kEmitCompilerReadBarrier)); + __ Ldxr(tmp_value, MemOperand(tmp_ptr)); __ Cmp(tmp_value, expected); __ B(&exit_loop, ne); __ Stxr(tmp_32, value, MemOperand(tmp_ptr)); @@ -1057,15 +1072,17 @@ void IntrinsicLocationsBuilderARM64::VisitUnsafeCASLong(HInvoke* invoke) { CreateIntIntIntIntIntToInt(arena_, invoke); } void IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject(HInvoke* invoke) { - // The UnsafeCASObject intrinsic does not always work when heap - // poisoning is enabled (it breaks run-test 004-UnsafeTest); turn it - // off temporarily as a quick fix. + // The UnsafeCASObject intrinsic is missing a read barrier, and + // therefore sometimes does not work as expected (b/25883050). + // Turn it off temporarily as a quick fix, until the read barrier is + // implemented (see TODO in GenCAS below). // - // TODO(rpl): Fix it and turn it back on. + // Also, the UnsafeCASObject intrinsic does not always work when heap + // poisoning is enabled (it breaks run-test 004-UnsafeTest); turn it + // off temporarily as a quick fix (b/26204023). // - // TODO(rpl): Also, we should investigate whether we need a read - // barrier in the generated code. - if (kPoisonHeapReferences) { + // TODO(rpl): Fix these two issues and re-enable this intrinsic. + if (kEmitCompilerReadBarrier || kPoisonHeapReferences) { return; } diff --git a/compiler/optimizing/intrinsics_arm64.h b/compiler/optimizing/intrinsics_arm64.h index 4250ecf358..d47448a9c3 100644 --- a/compiler/optimizing/intrinsics_arm64.h +++ b/compiler/optimizing/intrinsics_arm64.h @@ -41,7 +41,7 @@ class IntrinsicLocationsBuilderARM64 FINAL : public IntrinsicVisitor { // Define visitor methods. -#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache) \ +#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \ void Visit ## Name(HInvoke* invoke) OVERRIDE; #include "intrinsics_list.h" INTRINSICS_LIST(OPTIMIZING_INTRINSICS) @@ -65,7 +65,7 @@ class IntrinsicCodeGeneratorARM64 FINAL : public IntrinsicVisitor { // Define visitor methods. -#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache) \ +#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \ void Visit ## Name(HInvoke* invoke) OVERRIDE; #include "intrinsics_list.h" INTRINSICS_LIST(OPTIMIZING_INTRINSICS) diff --git a/compiler/optimizing/intrinsics_list.h b/compiler/optimizing/intrinsics_list.h index 96f43a0f74..2e87546282 100644 --- a/compiler/optimizing/intrinsics_list.h +++ b/compiler/optimizing/intrinsics_list.h @@ -22,97 +22,97 @@ // environment. #define INTRINSICS_LIST(V) \ - V(DoubleDoubleToRawLongBits, kStatic, kNeedsEnvironmentOrCache) \ - V(DoubleLongBitsToDouble, kStatic, kNeedsEnvironmentOrCache) \ - V(FloatFloatToRawIntBits, kStatic, kNeedsEnvironmentOrCache) \ - V(FloatIntBitsToFloat, kStatic, kNeedsEnvironmentOrCache) \ - V(IntegerReverse, kStatic, kNeedsEnvironmentOrCache) \ - V(IntegerReverseBytes, kStatic, kNeedsEnvironmentOrCache) \ - V(IntegerNumberOfLeadingZeros, kStatic, kNeedsEnvironmentOrCache) \ - V(IntegerNumberOfTrailingZeros, kStatic, kNeedsEnvironmentOrCache) \ - V(IntegerRotateRight, kStatic, kNeedsEnvironmentOrCache) \ - V(IntegerRotateLeft, kStatic, kNeedsEnvironmentOrCache) \ - V(LongReverse, kStatic, kNeedsEnvironmentOrCache) \ - V(LongReverseBytes, kStatic, kNeedsEnvironmentOrCache) \ - V(LongNumberOfLeadingZeros, kStatic, kNeedsEnvironmentOrCache) \ - V(LongNumberOfTrailingZeros, kStatic, kNeedsEnvironmentOrCache) \ - V(LongRotateRight, kStatic, kNeedsEnvironmentOrCache) \ - V(LongRotateLeft, kStatic, kNeedsEnvironmentOrCache) \ - V(ShortReverseBytes, kStatic, kNeedsEnvironmentOrCache) \ - V(MathAbsDouble, kStatic, kNeedsEnvironmentOrCache) \ - V(MathAbsFloat, kStatic, kNeedsEnvironmentOrCache) \ - V(MathAbsLong, kStatic, kNeedsEnvironmentOrCache) \ - V(MathAbsInt, kStatic, kNeedsEnvironmentOrCache) \ - V(MathMinDoubleDouble, kStatic, kNeedsEnvironmentOrCache) \ - V(MathMinFloatFloat, kStatic, kNeedsEnvironmentOrCache) \ - V(MathMinLongLong, kStatic, kNeedsEnvironmentOrCache) \ - V(MathMinIntInt, kStatic, kNeedsEnvironmentOrCache) \ - V(MathMaxDoubleDouble, kStatic, kNeedsEnvironmentOrCache) \ - V(MathMaxFloatFloat, kStatic, kNeedsEnvironmentOrCache) \ - V(MathMaxLongLong, kStatic, kNeedsEnvironmentOrCache) \ - V(MathMaxIntInt, kStatic, kNeedsEnvironmentOrCache) \ - V(MathCos, kStatic, kNeedsEnvironmentOrCache) \ - V(MathSin, kStatic, kNeedsEnvironmentOrCache) \ - V(MathAcos, kStatic, kNeedsEnvironmentOrCache) \ - V(MathAsin, kStatic, kNeedsEnvironmentOrCache) \ - V(MathAtan, kStatic, kNeedsEnvironmentOrCache) \ - V(MathAtan2, kStatic, kNeedsEnvironmentOrCache) \ - V(MathCbrt, kStatic, kNeedsEnvironmentOrCache) \ - V(MathCosh, kStatic, kNeedsEnvironmentOrCache) \ - V(MathExp, kStatic, kNeedsEnvironmentOrCache) \ - V(MathExpm1, kStatic, kNeedsEnvironmentOrCache) \ - V(MathHypot, kStatic, kNeedsEnvironmentOrCache) \ - V(MathLog, kStatic, kNeedsEnvironmentOrCache) \ - V(MathLog10, kStatic, kNeedsEnvironmentOrCache) \ - V(MathNextAfter, kStatic, kNeedsEnvironmentOrCache) \ - V(MathSinh, kStatic, kNeedsEnvironmentOrCache) \ - V(MathTan, kStatic, kNeedsEnvironmentOrCache) \ - V(MathTanh, kStatic, kNeedsEnvironmentOrCache) \ - V(MathSqrt, kStatic, kNeedsEnvironmentOrCache) \ - V(MathCeil, kStatic, kNeedsEnvironmentOrCache) \ - V(MathFloor, kStatic, kNeedsEnvironmentOrCache) \ - V(MathRint, kStatic, kNeedsEnvironmentOrCache) \ - V(MathRoundDouble, kStatic, kNeedsEnvironmentOrCache) \ - V(MathRoundFloat, kStatic, kNeedsEnvironmentOrCache) \ - V(SystemArrayCopyChar, kStatic, kNeedsEnvironmentOrCache) \ - V(SystemArrayCopy, kStatic, kNeedsEnvironmentOrCache) \ - V(ThreadCurrentThread, kStatic, kNeedsEnvironmentOrCache) \ - V(MemoryPeekByte, kStatic, kNeedsEnvironmentOrCache) \ - V(MemoryPeekIntNative, kStatic, kNeedsEnvironmentOrCache) \ - V(MemoryPeekLongNative, kStatic, kNeedsEnvironmentOrCache) \ - V(MemoryPeekShortNative, kStatic, kNeedsEnvironmentOrCache) \ - V(MemoryPokeByte, kStatic, kNeedsEnvironmentOrCache) \ - V(MemoryPokeIntNative, kStatic, kNeedsEnvironmentOrCache) \ - V(MemoryPokeLongNative, kStatic, kNeedsEnvironmentOrCache) \ - V(MemoryPokeShortNative, kStatic, kNeedsEnvironmentOrCache) \ - V(StringCharAt, kDirect, kNeedsEnvironmentOrCache) \ - V(StringCompareTo, kDirect, kNeedsEnvironmentOrCache) \ - V(StringEquals, kDirect, kNeedsEnvironmentOrCache) \ - V(StringGetCharsNoCheck, kDirect, kNeedsEnvironmentOrCache) \ - V(StringIndexOf, kDirect, kNeedsEnvironmentOrCache) \ - V(StringIndexOfAfter, kDirect, kNeedsEnvironmentOrCache) \ - V(StringNewStringFromBytes, kStatic, kNeedsEnvironmentOrCache) \ - V(StringNewStringFromChars, kStatic, kNeedsEnvironmentOrCache) \ - V(StringNewStringFromString, kStatic, kNeedsEnvironmentOrCache) \ - V(UnsafeCASInt, kDirect, kNeedsEnvironmentOrCache) \ - V(UnsafeCASLong, kDirect, kNeedsEnvironmentOrCache) \ - V(UnsafeCASObject, kDirect, kNeedsEnvironmentOrCache) \ - V(UnsafeGet, kDirect, kNeedsEnvironmentOrCache) \ - V(UnsafeGetVolatile, kDirect, kNeedsEnvironmentOrCache) \ - V(UnsafeGetObject, kDirect, kNeedsEnvironmentOrCache) \ - V(UnsafeGetObjectVolatile, kDirect, kNeedsEnvironmentOrCache) \ - V(UnsafeGetLong, kDirect, kNeedsEnvironmentOrCache) \ - V(UnsafeGetLongVolatile, kDirect, kNeedsEnvironmentOrCache) \ - V(UnsafePut, kDirect, kNeedsEnvironmentOrCache) \ - V(UnsafePutOrdered, kDirect, kNeedsEnvironmentOrCache) \ - V(UnsafePutVolatile, kDirect, kNeedsEnvironmentOrCache) \ - V(UnsafePutObject, kDirect, kNeedsEnvironmentOrCache) \ - V(UnsafePutObjectOrdered, kDirect, kNeedsEnvironmentOrCache) \ - V(UnsafePutObjectVolatile, kDirect, kNeedsEnvironmentOrCache) \ - V(UnsafePutLong, kDirect, kNeedsEnvironmentOrCache) \ - V(UnsafePutLongOrdered, kDirect, kNeedsEnvironmentOrCache) \ - V(UnsafePutLongVolatile, kDirect, kNeedsEnvironmentOrCache) \ - V(ReferenceGetReferent, kDirect, kNeedsEnvironmentOrCache) + V(DoubleDoubleToRawLongBits, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ + V(DoubleLongBitsToDouble, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ + V(FloatFloatToRawIntBits, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ + V(FloatIntBitsToFloat, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ + V(IntegerReverse, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ + V(IntegerReverseBytes, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ + V(IntegerNumberOfLeadingZeros, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ + V(IntegerNumberOfTrailingZeros, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ + V(IntegerRotateRight, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ + V(IntegerRotateLeft, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ + V(LongReverse, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ + V(LongReverseBytes, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ + V(LongNumberOfLeadingZeros, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ + V(LongNumberOfTrailingZeros, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ + V(LongRotateRight, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ + V(LongRotateLeft, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ + V(ShortReverseBytes, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ + V(MathAbsDouble, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ + V(MathAbsFloat, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ + V(MathAbsLong, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ + V(MathAbsInt, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ + V(MathMinDoubleDouble, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ + V(MathMinFloatFloat, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ + V(MathMinLongLong, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ + V(MathMinIntInt, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ + V(MathMaxDoubleDouble, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ + V(MathMaxFloatFloat, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ + V(MathMaxLongLong, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ + V(MathMaxIntInt, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ + V(MathCos, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ + V(MathSin, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ + V(MathAcos, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ + V(MathAsin, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ + V(MathAtan, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ + V(MathAtan2, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ + V(MathCbrt, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ + V(MathCosh, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ + V(MathExp, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ + V(MathExpm1, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ + V(MathHypot, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ + V(MathLog, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ + V(MathLog10, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ + V(MathNextAfter, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ + V(MathSinh, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ + V(MathTan, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ + V(MathTanh, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ + V(MathSqrt, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ + V(MathCeil, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ + V(MathFloor, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ + V(MathRint, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ + V(MathRoundDouble, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ + V(MathRoundFloat, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ + V(SystemArrayCopyChar, kStatic, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \ + V(SystemArrayCopy, kStatic, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \ + V(ThreadCurrentThread, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ + V(MemoryPeekByte, kStatic, kNeedsEnvironmentOrCache, kReadSideEffects, kCanThrow) \ + V(MemoryPeekIntNative, kStatic, kNeedsEnvironmentOrCache, kReadSideEffects, kCanThrow) \ + V(MemoryPeekLongNative, kStatic, kNeedsEnvironmentOrCache, kReadSideEffects, kCanThrow) \ + V(MemoryPeekShortNative, kStatic, kNeedsEnvironmentOrCache, kReadSideEffects, kCanThrow) \ + V(MemoryPokeByte, kStatic, kNeedsEnvironmentOrCache, kWriteSideEffects, kCanThrow) \ + V(MemoryPokeIntNative, kStatic, kNeedsEnvironmentOrCache, kWriteSideEffects, kCanThrow) \ + V(MemoryPokeLongNative, kStatic, kNeedsEnvironmentOrCache, kWriteSideEffects, kCanThrow) \ + V(MemoryPokeShortNative, kStatic, kNeedsEnvironmentOrCache, kWriteSideEffects, kCanThrow) \ + V(StringCharAt, kDirect, kNeedsEnvironmentOrCache, kReadSideEffects, kCanThrow) \ + V(StringCompareTo, kDirect, kNeedsEnvironmentOrCache, kReadSideEffects, kCanThrow) \ + V(StringEquals, kDirect, kNeedsEnvironmentOrCache, kReadSideEffects, kCanThrow) \ + V(StringGetCharsNoCheck, kDirect, kNeedsEnvironmentOrCache, kReadSideEffects, kCanThrow) \ + V(StringIndexOf, kDirect, kNeedsEnvironmentOrCache, kReadSideEffects, kCanThrow) \ + V(StringIndexOfAfter, kDirect, kNeedsEnvironmentOrCache, kReadSideEffects, kCanThrow) \ + V(StringNewStringFromBytes, kStatic, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \ + V(StringNewStringFromChars, kStatic, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \ + V(StringNewStringFromString, kStatic, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \ + V(UnsafeCASInt, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \ + V(UnsafeCASLong, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \ + V(UnsafeCASObject, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \ + V(UnsafeGet, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \ + V(UnsafeGetVolatile, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \ + V(UnsafeGetObject, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \ + V(UnsafeGetObjectVolatile, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \ + V(UnsafeGetLong, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \ + V(UnsafeGetLongVolatile, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \ + V(UnsafePut, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \ + V(UnsafePutOrdered, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \ + V(UnsafePutVolatile, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \ + V(UnsafePutObject, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \ + V(UnsafePutObjectOrdered, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \ + V(UnsafePutObjectVolatile, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \ + V(UnsafePutLong, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \ + V(UnsafePutLongOrdered, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \ + V(UnsafePutLongVolatile, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) \ + V(ReferenceGetReferent, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow) #endif // ART_COMPILER_OPTIMIZING_INTRINSICS_LIST_H_ #undef ART_COMPILER_OPTIMIZING_INTRINSICS_LIST_H_ // #define is only for lint. diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc index 06fab616ad..bc126a2716 100644 --- a/compiler/optimizing/intrinsics_mips.cc +++ b/compiler/optimizing/intrinsics_mips.cc @@ -43,14 +43,18 @@ ArenaAllocator* IntrinsicCodeGeneratorMIPS::GetAllocator() { return codegen_->GetGraph()->GetArena(); } -inline bool IntrinsicCodeGeneratorMIPS::IsR2OrNewer() { +inline bool IntrinsicCodeGeneratorMIPS::IsR2OrNewer() const { return codegen_->GetInstructionSetFeatures().IsMipsIsaRevGreaterThanEqual2(); } -inline bool IntrinsicCodeGeneratorMIPS::IsR6() { +inline bool IntrinsicCodeGeneratorMIPS::IsR6() const { return codegen_->GetInstructionSetFeatures().IsR6(); } +inline bool IntrinsicCodeGeneratorMIPS::Is32BitFPU() const { + return codegen_->GetInstructionSetFeatures().Is32BitFloatingPoint(); +} + #define __ codegen->GetAssembler()-> static void MoveFromReturnRegister(Location trg, @@ -162,7 +166,7 @@ static void MoveFPToInt(LocationSummary* locations, bool is64bit, MipsAssembler* Register out_hi = locations->Out().AsRegisterPairHigh<Register>(); __ Mfc1(out_lo, in); - __ Mfhc1(out_hi, in); + __ MoveFromFpuHigh(out_hi, in); } else { Register out = locations->Out().AsRegister<Register>(); @@ -204,7 +208,7 @@ static void MoveIntToFP(LocationSummary* locations, bool is64bit, MipsAssembler* Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>(); __ Mtc1(in_lo, out); - __ Mthc1(in_hi, out); + __ MoveToFpuHigh(in_hi, out); } else { Register in = locations->InAt(0).AsRegister<Register>(); diff --git a/compiler/optimizing/intrinsics_mips.h b/compiler/optimizing/intrinsics_mips.h index 19ad5255d5..575a7d0a23 100644 --- a/compiler/optimizing/intrinsics_mips.h +++ b/compiler/optimizing/intrinsics_mips.h @@ -36,7 +36,7 @@ class IntrinsicLocationsBuilderMIPS FINAL : public IntrinsicVisitor { // Define visitor methods. -#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache) \ +#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \ void Visit ## Name(HInvoke* invoke) OVERRIDE; #include "intrinsics_list.h" INTRINSICS_LIST(OPTIMIZING_INTRINSICS) @@ -60,15 +60,16 @@ class IntrinsicCodeGeneratorMIPS FINAL : public IntrinsicVisitor { // Define visitor methods. -#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache) \ +#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \ void Visit ## Name(HInvoke* invoke) OVERRIDE; #include "intrinsics_list.h" INTRINSICS_LIST(OPTIMIZING_INTRINSICS) #undef INTRINSICS_LIST #undef OPTIMIZING_INTRINSICS - bool IsR2OrNewer(void); - bool IsR6(void); + bool IsR2OrNewer() const; + bool IsR6() const; + bool Is32BitFPU() const; private: MipsAssembler* GetAssembler(); diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc index 8aa7d9ff6f..8b45ea7c4f 100644 --- a/compiler/optimizing/intrinsics_mips64.cc +++ b/compiler/optimizing/intrinsics_mips64.cc @@ -1299,6 +1299,8 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat if (type == Primitive::kPrimLong) { __ Lld(out, TMP); } else { + // Note: We will need a read barrier here, when read barrier + // support is added to the MIPS64 back end. __ Ll(out, TMP); } __ Dsubu(out, out, expected); // If we didn't get the 'expected' diff --git a/compiler/optimizing/intrinsics_mips64.h b/compiler/optimizing/intrinsics_mips64.h index 1481d24c9e..4137fbd1b6 100644 --- a/compiler/optimizing/intrinsics_mips64.h +++ b/compiler/optimizing/intrinsics_mips64.h @@ -36,7 +36,7 @@ class IntrinsicLocationsBuilderMIPS64 FINAL : public IntrinsicVisitor { // Define visitor methods. -#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache) \ +#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \ void Visit ## Name(HInvoke* invoke) OVERRIDE; #include "intrinsics_list.h" INTRINSICS_LIST(OPTIMIZING_INTRINSICS) @@ -60,7 +60,7 @@ class IntrinsicCodeGeneratorMIPS64 FINAL : public IntrinsicVisitor { // Define visitor methods. -#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache) \ +#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \ void Visit ## Name(HInvoke* invoke) OVERRIDE; #include "intrinsics_list.h" INTRINSICS_LIST(OPTIMIZING_INTRINSICS) diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc index fd454d8322..677f2e9c81 100644 --- a/compiler/optimizing/intrinsics_x86.cc +++ b/compiler/optimizing/intrinsics_x86.cc @@ -720,6 +720,11 @@ void IntrinsicCodeGeneratorX86::VisitMathRint(HInvoke* invoke) { // Note that 32 bit x86 doesn't have the capability to inline MathRoundDouble, // as it needs 64 bit instructions. void IntrinsicLocationsBuilderX86::VisitMathRoundFloat(HInvoke* invoke) { + // See intrinsics.h. + if (!kRoundIsPlusPointFive) { + return; + } + // Do we have instruction support? if (codegen_->GetInstructionSetFeatures().HasSSE4_1()) { LocationSummary* locations = new (arena_) LocationSummary(invoke, @@ -2005,7 +2010,7 @@ static void GenUnsafePut(LocationSummary* locations, } if (is_volatile) { - __ mfence(); + codegen->MemoryFence(); } if (type == Primitive::kPrimNot) { @@ -2085,6 +2090,17 @@ void IntrinsicLocationsBuilderX86::VisitUnsafeCASLong(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86::VisitUnsafeCASObject(HInvoke* invoke) { + // The UnsafeCASObject intrinsic is missing a read barrier, and + // therefore sometimes does not work as expected (b/25883050). + // Turn it off temporarily as a quick fix, until the read barrier is + // implemented. + // + // TODO(rpl): Implement a read barrier in GenCAS below and re-enable + // this intrinsic. + if (kEmitCompilerReadBarrier) { + return; + } + CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimNot, invoke); } @@ -2136,6 +2152,13 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86* code __ PoisonHeapReference(value); } + // TODO: Add a read barrier for the reference stored in the object + // before attempting the CAS, similar to the one in the + // art::Unsafe_compareAndSwapObject JNI implementation. + // + // Note that this code is not (yet) used when read barriers are + // enabled (see IntrinsicLocationsBuilderX86::VisitUnsafeCASObject). + DCHECK(!kEmitCompilerReadBarrier); __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), value); // LOCK CMPXCHG has full barrier semantics, and we don't need @@ -2145,11 +2168,8 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86* code __ setb(kZero, out.AsRegister<Register>()); __ movzxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>()); - // In the case of the `UnsafeCASObject` intrinsic, accessing an - // object in the heap with LOCK CMPXCHG does not require a read - // barrier, as we do not keep a reference to this heap location. - // However, if heap poisoning is enabled, we need to unpoison the - // values that were poisoned earlier. + // If heap poisoning is enabled, we need to unpoison the values + // that were poisoned earlier. if (kPoisonHeapReferences) { if (base_equals_value) { // `value` has been moved to a temporary register, no need to diff --git a/compiler/optimizing/intrinsics_x86.h b/compiler/optimizing/intrinsics_x86.h index fefe9c6143..08bd197400 100644 --- a/compiler/optimizing/intrinsics_x86.h +++ b/compiler/optimizing/intrinsics_x86.h @@ -36,7 +36,7 @@ class IntrinsicLocationsBuilderX86 FINAL : public IntrinsicVisitor { // Define visitor methods. -#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache) \ +#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \ void Visit ## Name(HInvoke* invoke) OVERRIDE; #include "intrinsics_list.h" INTRINSICS_LIST(OPTIMIZING_INTRINSICS) @@ -61,7 +61,7 @@ class IntrinsicCodeGeneratorX86 FINAL : public IntrinsicVisitor { // Define visitor methods. -#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache) \ +#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \ void Visit ## Name(HInvoke* invoke) OVERRIDE; #include "intrinsics_list.h" INTRINSICS_LIST(OPTIMIZING_INTRINSICS) diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc index ce737e3f7e..690cf3d413 100644 --- a/compiler/optimizing/intrinsics_x86_64.cc +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -610,7 +610,10 @@ static void CreateSSE41FPToIntLocations(ArenaAllocator* arena, } void IntrinsicLocationsBuilderX86_64::VisitMathRoundFloat(HInvoke* invoke) { - CreateSSE41FPToIntLocations(arena_, invoke, codegen_); + // See intrinsics.h. + if (kRoundIsPlusPointFive) { + CreateSSE41FPToIntLocations(arena_, invoke, codegen_); + } } void IntrinsicCodeGeneratorX86_64::VisitMathRoundFloat(HInvoke* invoke) { @@ -657,7 +660,10 @@ void IntrinsicCodeGeneratorX86_64::VisitMathRoundFloat(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86_64::VisitMathRoundDouble(HInvoke* invoke) { - CreateSSE41FPToIntLocations(arena_, invoke, codegen_); + // See intrinsics.h. + if (kRoundIsPlusPointFive) { + CreateSSE41FPToIntLocations(arena_, invoke, codegen_); + } } void IntrinsicCodeGeneratorX86_64::VisitMathRoundDouble(HInvoke* invoke) { @@ -2080,7 +2086,7 @@ static void GenUnsafePut(LocationSummary* locations, Primitive::Type type, bool } if (is_volatile) { - __ mfence(); + codegen->MemoryFence(); } if (type == Primitive::kPrimNot) { @@ -2150,6 +2156,17 @@ void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASLong(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASObject(HInvoke* invoke) { + // The UnsafeCASObject intrinsic is missing a read barrier, and + // therefore sometimes does not work as expected (b/25883050). + // Turn it off temporarily as a quick fix, until the read barrier is + // implemented. + // + // TODO(rpl): Implement a read barrier in GenCAS below and re-enable + // this intrinsic. + if (kEmitCompilerReadBarrier) { + return; + } + CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimNot, invoke); } @@ -2200,6 +2217,13 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86_64* c __ PoisonHeapReference(CpuRegister(value_reg)); } + // TODO: Add a read barrier for the reference stored in the object + // before attempting the CAS, similar to the one in the + // art::Unsafe_compareAndSwapObject JNI implementation. + // + // Note that this code is not (yet) used when read barriers are + // enabled (see IntrinsicLocationsBuilderX86_64::VisitUnsafeCASObject). + DCHECK(!kEmitCompilerReadBarrier); __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), CpuRegister(value_reg)); // LOCK CMPXCHG has full barrier semantics, and we don't need @@ -2209,11 +2233,8 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86_64* c __ setcc(kZero, out); __ movzxb(out, out); - // In the case of the `UnsafeCASObject` intrinsic, accessing an - // object in the heap with LOCK CMPXCHG does not require a read - // barrier, as we do not keep a reference to this heap location. - // However, if heap poisoning is enabled, we need to unpoison the - // values that were poisoned earlier. + // If heap poisoning is enabled, we need to unpoison the values + // that were poisoned earlier. if (kPoisonHeapReferences) { if (base_equals_value) { // `value_reg` has been moved to a temporary register, no need diff --git a/compiler/optimizing/intrinsics_x86_64.h b/compiler/optimizing/intrinsics_x86_64.h index 6894e1b527..155ff6548b 100644 --- a/compiler/optimizing/intrinsics_x86_64.h +++ b/compiler/optimizing/intrinsics_x86_64.h @@ -36,7 +36,7 @@ class IntrinsicLocationsBuilderX86_64 FINAL : public IntrinsicVisitor { // Define visitor methods. -#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache) \ +#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \ void Visit ## Name(HInvoke* invoke) OVERRIDE; #include "intrinsics_list.h" INTRINSICS_LIST(OPTIMIZING_INTRINSICS) @@ -61,7 +61,7 @@ class IntrinsicCodeGeneratorX86_64 FINAL : public IntrinsicVisitor { // Define visitor methods. -#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache) \ +#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \ void Visit ## Name(HInvoke* invoke) OVERRIDE; #include "intrinsics_list.h" INTRINSICS_LIST(OPTIMIZING_INTRINSICS) diff --git a/compiler/optimizing/licm_test.cc b/compiler/optimizing/licm_test.cc index 2bb769a430..2b63ec8971 100644 --- a/compiler/optimizing/licm_test.cc +++ b/compiler/optimizing/licm_test.cc @@ -16,7 +16,6 @@ #include "base/arena_allocator.h" #include "builder.h" -#include "gtest/gtest.h" #include "licm.h" #include "nodes.h" #include "optimizing_unit_test.h" @@ -27,7 +26,7 @@ namespace art { /** * Fixture class for the LICM tests. */ -class LICMTest : public testing::Test { +class LICMTest : public CommonCompilerTest { public: LICMTest() : pool_(), allocator_(&pool_) { graph_ = CreateGraph(&allocator_); @@ -66,20 +65,21 @@ class LICMTest : public testing::Test { // Provide boiler-plate instructions. parameter_ = new (&allocator_) HParameterValue(graph_->GetDexFile(), 0, 0, Primitive::kPrimNot); entry_->AddInstruction(parameter_); - constant_ = graph_->GetIntConstant(42); + int_constant_ = graph_->GetIntConstant(42); + float_constant_ = graph_->GetFloatConstant(42.0f); loop_preheader_->AddInstruction(new (&allocator_) HGoto()); loop_header_->AddInstruction(new (&allocator_) HIf(parameter_)); loop_body_->AddInstruction(new (&allocator_) HGoto()); + return_->AddInstruction(new (&allocator_) HReturnVoid()); exit_->AddInstruction(new (&allocator_) HExit()); } // Performs LICM optimizations (after proper set up). void PerformLICM() { - ASSERT_TRUE(graph_->TryBuildingSsa()); + TransformToSsa(graph_); SideEffectsAnalysis side_effects(graph_); side_effects.Run(); - LICM licm(graph_, side_effects); - licm.Run(); + LICM(graph_, side_effects).Run(); } // General building fields. @@ -96,7 +96,8 @@ class LICMTest : public testing::Test { HBasicBlock* exit_; HInstruction* parameter_; // "this" - HInstruction* constant_; + HInstruction* int_constant_; + HInstruction* float_constant_; }; // @@ -107,7 +108,7 @@ TEST_F(LICMTest, FieldHoisting) { BuildLoop(); // Populate the loop with instructions: set/get field with different types. - NullHandle<mirror::DexCache> dex_cache; + ScopedNullHandle<mirror::DexCache> dex_cache; HInstruction* get_field = new (&allocator_) HInstanceFieldGet(parameter_, Primitive::kPrimLong, MemberOffset(10), @@ -119,7 +120,7 @@ TEST_F(LICMTest, FieldHoisting) { 0); loop_body_->InsertInstructionBefore(get_field, loop_body_->GetLastInstruction()); HInstruction* set_field = new (&allocator_) HInstanceFieldSet( - parameter_, constant_, Primitive::kPrimInt, MemberOffset(20), + parameter_, int_constant_, Primitive::kPrimInt, MemberOffset(20), false, kUnknownFieldIndex, kUnknownClassDefIndex, graph_->GetDexFile(), dex_cache, 0); loop_body_->InsertInstructionBefore(set_field, loop_body_->GetLastInstruction()); @@ -134,7 +135,7 @@ TEST_F(LICMTest, NoFieldHoisting) { BuildLoop(); // Populate the loop with instructions: set/get field with same types. - NullHandle<mirror::DexCache> dex_cache; + ScopedNullHandle<mirror::DexCache> dex_cache; HInstruction* get_field = new (&allocator_) HInstanceFieldGet(parameter_, Primitive::kPrimLong, MemberOffset(10), @@ -168,11 +169,13 @@ TEST_F(LICMTest, ArrayHoisting) { BuildLoop(); // Populate the loop with instructions: set/get array with different types. + // ArrayGet is typed as kPrimByte and ArraySet given a float value in order to + // avoid SsaBuilder's typing of ambiguous array operations from reference type info. HInstruction* get_array = new (&allocator_) HArrayGet( - parameter_, constant_, Primitive::kPrimLong, 0); + parameter_, int_constant_, Primitive::kPrimByte, 0); loop_body_->InsertInstructionBefore(get_array, loop_body_->GetLastInstruction()); HInstruction* set_array = new (&allocator_) HArraySet( - parameter_, constant_, constant_, Primitive::kPrimInt, 0); + parameter_, int_constant_, float_constant_, Primitive::kPrimShort, 0); loop_body_->InsertInstructionBefore(set_array, loop_body_->GetLastInstruction()); EXPECT_EQ(get_array->GetBlock(), loop_body_); @@ -186,11 +189,13 @@ TEST_F(LICMTest, NoArrayHoisting) { BuildLoop(); // Populate the loop with instructions: set/get array with same types. + // ArrayGet is typed as kPrimByte and ArraySet given a float value in order to + // avoid SsaBuilder's typing of ambiguous array operations from reference type info. HInstruction* get_array = new (&allocator_) HArrayGet( - parameter_, constant_, Primitive::kPrimLong, 0); + parameter_, int_constant_, Primitive::kPrimByte, 0); loop_body_->InsertInstructionBefore(get_array, loop_body_->GetLastInstruction()); HInstruction* set_array = new (&allocator_) HArraySet( - parameter_, get_array, constant_, Primitive::kPrimLong, 0); + parameter_, get_array, float_constant_, Primitive::kPrimByte, 0); loop_body_->InsertInstructionBefore(set_array, loop_body_->GetLastInstruction()); EXPECT_EQ(get_array->GetBlock(), loop_body_); diff --git a/compiler/optimizing/linearize_test.cc b/compiler/optimizing/linearize_test.cc index a059766e00..ed275b1544 100644 --- a/compiler/optimizing/linearize_test.cc +++ b/compiler/optimizing/linearize_test.cc @@ -29,13 +29,12 @@ #include "nodes.h" #include "optimizing_unit_test.h" #include "pretty_printer.h" -#include "ssa_builder.h" #include "ssa_liveness_analysis.h" -#include "gtest/gtest.h" - namespace art { +class LinearizeTest : public CommonCompilerTest {}; + template <size_t number_of_blocks> static void TestCode(const uint16_t* data, const uint32_t (&expected_order)[number_of_blocks]) { ArenaPool pool; @@ -46,7 +45,7 @@ static void TestCode(const uint16_t* data, const uint32_t (&expected_order)[numb bool graph_built = builder.BuildGraph(*item); ASSERT_TRUE(graph_built); - graph->TryBuildingSsa(); + TransformToSsa(graph); std::unique_ptr<const X86InstructionSetFeatures> features_x86( X86InstructionSetFeatures::FromCppDefines()); @@ -60,7 +59,7 @@ static void TestCode(const uint16_t* data, const uint32_t (&expected_order)[numb } } -TEST(LinearizeTest, CFG1) { +TEST_F(LinearizeTest, CFG1) { // Structure of this graph (+ are back edges) // Block0 // | @@ -85,7 +84,7 @@ TEST(LinearizeTest, CFG1) { TestCode(data, blocks); } -TEST(LinearizeTest, CFG2) { +TEST_F(LinearizeTest, CFG2) { // Structure of this graph (+ are back edges) // Block0 // | @@ -110,7 +109,7 @@ TEST(LinearizeTest, CFG2) { TestCode(data, blocks); } -TEST(LinearizeTest, CFG3) { +TEST_F(LinearizeTest, CFG3) { // Structure of this graph (+ are back edges) // Block0 // | @@ -137,7 +136,7 @@ TEST(LinearizeTest, CFG3) { TestCode(data, blocks); } -TEST(LinearizeTest, CFG4) { +TEST_F(LinearizeTest, CFG4) { /* Structure of this graph (+ are back edges) // Block0 // | @@ -167,7 +166,7 @@ TEST(LinearizeTest, CFG4) { TestCode(data, blocks); } -TEST(LinearizeTest, CFG5) { +TEST_F(LinearizeTest, CFG5) { /* Structure of this graph (+ are back edges) // Block0 // | @@ -197,7 +196,7 @@ TEST(LinearizeTest, CFG5) { TestCode(data, blocks); } -TEST(LinearizeTest, CFG6) { +TEST_F(LinearizeTest, CFG6) { // Block0 // | // Block1 @@ -223,7 +222,7 @@ TEST(LinearizeTest, CFG6) { TestCode(data, blocks); } -TEST(LinearizeTest, CFG7) { +TEST_F(LinearizeTest, CFG7) { // Structure of this graph (+ are back edges) // Block0 // | diff --git a/compiler/optimizing/live_ranges_test.cc b/compiler/optimizing/live_ranges_test.cc index 7f67560692..926f9399a5 100644 --- a/compiler/optimizing/live_ranges_test.cc +++ b/compiler/optimizing/live_ranges_test.cc @@ -27,10 +27,10 @@ #include "prepare_for_register_allocation.h" #include "ssa_liveness_analysis.h" -#include "gtest/gtest.h" - namespace art { +class LiveRangesTest : public CommonCompilerTest {}; + static HGraph* BuildGraph(const uint16_t* data, ArenaAllocator* allocator) { HGraph* graph = CreateGraph(allocator); HGraphBuilder builder(graph); @@ -39,13 +39,13 @@ static HGraph* BuildGraph(const uint16_t* data, ArenaAllocator* allocator) { // Suspend checks implementation may change in the future, and this test relies // on how instructions are ordered. RemoveSuspendChecks(graph); - graph->TryBuildingSsa(); + TransformToSsa(graph); // `Inline` conditions into ifs. PrepareForRegisterAllocation(graph).Run(); return graph; } -TEST(LiveRangesTest, CFG1) { +TEST_F(LiveRangesTest, CFG1) { /* * Test the following snippet: * return 0; @@ -83,7 +83,7 @@ TEST(LiveRangesTest, CFG1) { ASSERT_TRUE(range->GetNext() == nullptr); } -TEST(LiveRangesTest, CFG2) { +TEST_F(LiveRangesTest, CFG2) { /* * Test the following snippet: * var a = 0; @@ -131,7 +131,7 @@ TEST(LiveRangesTest, CFG2) { ASSERT_TRUE(range->GetNext() == nullptr); } -TEST(LiveRangesTest, CFG3) { +TEST_F(LiveRangesTest, CFG3) { /* * Test the following snippet: * var a = 0; @@ -204,7 +204,7 @@ TEST(LiveRangesTest, CFG3) { ASSERT_TRUE(range->GetNext() == nullptr); } -TEST(LiveRangesTest, Loop1) { +TEST_F(LiveRangesTest, Loop1) { /* * Test the following snippet: * var a = 0; @@ -284,7 +284,7 @@ TEST(LiveRangesTest, Loop1) { ASSERT_TRUE(range->GetNext() == nullptr); } -TEST(LiveRangesTest, Loop2) { +TEST_F(LiveRangesTest, Loop2) { /* * Test the following snippet: * var a = 0; @@ -360,7 +360,7 @@ TEST(LiveRangesTest, Loop2) { ASSERT_TRUE(range->GetNext() == nullptr); } -TEST(LiveRangesTest, CFG4) { +TEST_F(LiveRangesTest, CFG4) { /* * Test the following snippet: * var a = 0; diff --git a/compiler/optimizing/liveness_test.cc b/compiler/optimizing/liveness_test.cc index 9d7d0b6c67..7736eedae1 100644 --- a/compiler/optimizing/liveness_test.cc +++ b/compiler/optimizing/liveness_test.cc @@ -27,10 +27,10 @@ #include "prepare_for_register_allocation.h" #include "ssa_liveness_analysis.h" -#include "gtest/gtest.h" - namespace art { +class LivenessTest : public CommonCompilerTest {}; + static void DumpBitVector(BitVector* vector, std::ostream& buffer, size_t count, @@ -51,7 +51,7 @@ static void TestCode(const uint16_t* data, const char* expected) { const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data); bool graph_built = builder.BuildGraph(*item); ASSERT_TRUE(graph_built); - graph->TryBuildingSsa(); + TransformToSsa(graph); // `Inline` conditions into ifs. PrepareForRegisterAllocation(graph).Run(); std::unique_ptr<const X86InstructionSetFeatures> features_x86( @@ -75,7 +75,7 @@ static void TestCode(const uint16_t* data, const char* expected) { ASSERT_STREQ(expected, buffer.str().c_str()); } -TEST(LivenessTest, CFG1) { +TEST_F(LivenessTest, CFG1) { const char* expected = "Block 0\n" " live in: (0)\n" @@ -98,7 +98,7 @@ TEST(LivenessTest, CFG1) { TestCode(data, expected); } -TEST(LivenessTest, CFG2) { +TEST_F(LivenessTest, CFG2) { const char* expected = "Block 0\n" " live in: (0)\n" @@ -120,7 +120,7 @@ TEST(LivenessTest, CFG2) { TestCode(data, expected); } -TEST(LivenessTest, CFG3) { +TEST_F(LivenessTest, CFG3) { const char* expected = "Block 0\n" // entry block " live in: (000)\n" @@ -149,7 +149,7 @@ TEST(LivenessTest, CFG3) { TestCode(data, expected); } -TEST(LivenessTest, CFG4) { +TEST_F(LivenessTest, CFG4) { // var a; // if (0 == 0) { // a = 5; @@ -197,7 +197,7 @@ TEST(LivenessTest, CFG4) { TestCode(data, expected); } -TEST(LivenessTest, CFG5) { +TEST_F(LivenessTest, CFG5) { // var a = 0; // if (0 == 0) { // } else { @@ -242,7 +242,7 @@ TEST(LivenessTest, CFG5) { TestCode(data, expected); } -TEST(LivenessTest, Loop1) { +TEST_F(LivenessTest, Loop1) { // Simple loop with one preheader and one back edge. // var a = 0; // while (a == a) { @@ -288,7 +288,7 @@ TEST(LivenessTest, Loop1) { TestCode(data, expected); } -TEST(LivenessTest, Loop3) { +TEST_F(LivenessTest, Loop3) { // Test that the returned value stays live in a preceding loop. // var a = 0; // while (a == a) { @@ -335,7 +335,7 @@ TEST(LivenessTest, Loop3) { } -TEST(LivenessTest, Loop4) { +TEST_F(LivenessTest, Loop4) { // Make sure we support a preheader of a loop not being the first predecessor // in the predecessor list of the header. // var a = 0; @@ -387,7 +387,7 @@ TEST(LivenessTest, Loop4) { TestCode(data, expected); } -TEST(LivenessTest, Loop5) { +TEST_F(LivenessTest, Loop5) { // Make sure we create a preheader of a loop when a header originally has two // incoming blocks and one back edge. // Bitsets are made of: @@ -443,7 +443,7 @@ TEST(LivenessTest, Loop5) { TestCode(data, expected); } -TEST(LivenessTest, Loop6) { +TEST_F(LivenessTest, Loop6) { // Bitsets are made of: // (constant0, constant4, constant5, phi in block 2) const char* expected = @@ -494,7 +494,7 @@ TEST(LivenessTest, Loop6) { } -TEST(LivenessTest, Loop7) { +TEST_F(LivenessTest, Loop7) { // Bitsets are made of: // (constant0, constant4, constant5, phi in block 2, phi in block 6) const char* expected = @@ -548,7 +548,7 @@ TEST(LivenessTest, Loop7) { TestCode(data, expected); } -TEST(LivenessTest, Loop8) { +TEST_F(LivenessTest, Loop8) { // var a = 0; // while (a == a) { // a = a + a; diff --git a/compiler/optimizing/load_store_elimination.cc b/compiler/optimizing/load_store_elimination.cc index adde00464b..2b313f6b81 100644 --- a/compiler/optimizing/load_store_elimination.cc +++ b/compiler/optimizing/load_store_elimination.cc @@ -119,10 +119,16 @@ class HeapLocation : public ArenaObject<kArenaAllocMisc> { : ref_info_(ref_info), offset_(offset), index_(index), - declaring_class_def_index_(declaring_class_def_index) { + declaring_class_def_index_(declaring_class_def_index), + value_killed_by_loop_side_effects_(true) { DCHECK(ref_info != nullptr); DCHECK((offset == kInvalidFieldOffset && index != nullptr) || (offset != kInvalidFieldOffset && index == nullptr)); + if (ref_info->IsSingleton() && !IsArrayElement()) { + // Assume this location's value cannot be killed by loop side effects + // until proven otherwise. + value_killed_by_loop_side_effects_ = false; + } } ReferenceInfo* GetReferenceInfo() const { return ref_info_; } @@ -139,11 +145,22 @@ class HeapLocation : public ArenaObject<kArenaAllocMisc> { return index_ != nullptr; } + bool IsValueKilledByLoopSideEffects() const { + return value_killed_by_loop_side_effects_; + } + + void SetValueKilledByLoopSideEffects(bool val) { + value_killed_by_loop_side_effects_ = val; + } + private: ReferenceInfo* const ref_info_; // reference for instance/static field or array access. const size_t offset_; // offset of static/instance field. HInstruction* const index_; // index of an array element. const int16_t declaring_class_def_index_; // declaring class's def's dex index. + bool value_killed_by_loop_side_effects_; // value of this location may be killed by loop + // side effects because this location is stored + // into inside a loop. DISALLOW_COPY_AND_ASSIGN(HeapLocation); }; @@ -370,13 +387,13 @@ class HeapLocationCollector : public HGraphVisitor { return heap_locations_[heap_location_idx]; } - void VisitFieldAccess(HInstruction* ref, const FieldInfo& field_info) { + HeapLocation* VisitFieldAccess(HInstruction* ref, const FieldInfo& field_info) { if (field_info.IsVolatile()) { has_volatile_ = true; } const uint16_t declaring_class_def_index = field_info.GetDeclaringClassDefIndex(); const size_t offset = field_info.GetFieldOffset().SizeValue(); - GetOrCreateHeapLocation(ref, offset, nullptr, declaring_class_def_index); + return GetOrCreateHeapLocation(ref, offset, nullptr, declaring_class_def_index); } void VisitArrayAccess(HInstruction* array, HInstruction* index) { @@ -390,8 +407,11 @@ class HeapLocationCollector : public HGraphVisitor { } void VisitInstanceFieldSet(HInstanceFieldSet* instruction) OVERRIDE { - VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo()); + HeapLocation* location = VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo()); has_heap_stores_ = true; + if (instruction->GetBlock()->GetLoopInformation() != nullptr) { + location->SetValueKilledByLoopSideEffects(true); + } } void VisitStaticFieldGet(HStaticFieldGet* instruction) OVERRIDE { @@ -565,23 +585,26 @@ class LSEVisitor : public HGraphVisitor { HBasicBlock* pre_header = block->GetLoopInformation()->GetPreHeader(); ArenaVector<HInstruction*>& pre_header_heap_values = heap_values_for_[pre_header->GetBlockId()]; + // Inherit the values from pre-header. + for (size_t i = 0; i < heap_values.size(); i++) { + heap_values[i] = pre_header_heap_values[i]; + } + // We do a single pass in reverse post order. For loops, use the side effects as a hint // to see if the heap values should be killed. if (side_effects_.GetLoopEffects(block).DoesAnyWrite()) { - for (size_t i = 0; i < pre_header_heap_values.size(); i++) { - // heap value is killed by loop side effects, need to keep the last store. - KeepIfIsStore(pre_header_heap_values[i]); - } - if (kIsDebugBuild) { - // heap_values should all be kUnknownHeapValue that it is inited with. - for (size_t i = 0; i < heap_values.size(); i++) { - DCHECK_EQ(heap_values[i], kUnknownHeapValue); - } - } - } else { - // Inherit the values from pre-header. for (size_t i = 0; i < heap_values.size(); i++) { - heap_values[i] = pre_header_heap_values[i]; + HeapLocation* location = heap_location_collector_.GetHeapLocation(i); + ReferenceInfo* ref_info = location->GetReferenceInfo(); + if (!ref_info->IsSingleton() || location->IsValueKilledByLoopSideEffects()) { + // heap value is killed by loop side effects (stored into directly, or due to + // aliasing). + KeepIfIsStore(pre_header_heap_values[i]); + heap_values[i] = kUnknownHeapValue; + } else { + // A singleton's field that's not stored into inside a loop is invariant throughout + // the loop. + } } } } @@ -683,21 +706,14 @@ class LSEVisitor : public HGraphVisitor { // Get the real heap value of the store. heap_value = store->InputAt(1); } - if ((heap_value != kUnknownHeapValue) && - // Keep the load due to possible I/F, J/D array aliasing. - // See b/22538329 for details. - (heap_value->GetType() == instruction->GetType())) { - removed_loads_.push_back(instruction); - substitute_instructions_for_loads_.push_back(heap_value); - TryRemovingNullCheck(instruction); - return; - } - - // Load isn't eliminated. if (heap_value == kUnknownHeapValue) { - // Put the load as the value into the HeapLocation. + // Load isn't eliminated. Put the load as the value into the HeapLocation. // This acts like GVN but with better aliasing analysis. heap_values[idx] = instruction; + } else { + removed_loads_.push_back(instruction); + substitute_instructions_for_loads_.push_back(heap_value); + TryRemovingNullCheck(instruction); } } @@ -751,6 +767,9 @@ class LSEVisitor : public HGraphVisitor { if (loop_info != nullptr) { // instruction is a store in the loop so the loop must does write. DCHECK(side_effects_.GetLoopEffects(loop_info->GetHeader()).DoesAnyWrite()); + // If it's a singleton, IsValueKilledByLoopSideEffects() must be true. + DCHECK(!ref_info->IsSingleton() || + heap_location_collector_.GetHeapLocation(idx)->IsValueKilledByLoopSideEffects()); if (loop_info->IsDefinedOutOfTheLoop(original_ref)) { DCHECK(original_ref->GetBlock()->Dominates(loop_info->GetPreHeader())); diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index 926bc156cf..8de9700250 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -198,10 +198,38 @@ void HGraph::ComputeDominanceInformation() { } } -void HGraph::TransformToSsa() { - DCHECK(!reverse_post_order_.empty()); - SsaBuilder ssa_builder(this); - ssa_builder.BuildSsa(); +BuildSsaResult HGraph::TryBuildingSsa(StackHandleScopeCollection* handles) { + BuildDominatorTree(); + + // The SSA builder requires loops to all be natural. Specifically, the dead phi + // elimination phase checks the consistency of the graph when doing a post-order + // visit for eliminating dead phis: a dead phi can only have loop header phi + // users remaining when being visited. + BuildSsaResult result = AnalyzeNaturalLoops(); + if (result != kBuildSsaSuccess) { + return result; + } + + // Precompute per-block try membership before entering the SSA builder, + // which needs the information to build catch block phis from values of + // locals at throwing instructions inside try blocks. + ComputeTryBlockInformation(); + + // Create the inexact Object reference type and store it in the HGraph. + ScopedObjectAccess soa(Thread::Current()); + ClassLinker* linker = Runtime::Current()->GetClassLinker(); + inexact_object_rti_ = ReferenceTypeInfo::Create( + handles->NewHandle(linker->GetClassRoot(ClassLinker::kJavaLangObject)), + /* is_exact */ false); + + // Tranforms graph to SSA form. + result = SsaBuilder(this, handles).BuildSsa(); + if (result != kBuildSsaSuccess) { + return result; + } + + in_ssa_form_ = true; + return kBuildSsaSuccess; } HBasicBlock* HGraph::SplitEdge(HBasicBlock* block, HBasicBlock* successor) { @@ -410,7 +438,7 @@ void HGraph::SimplifyCFG() { } } -bool HGraph::AnalyzeNaturalLoops() const { +BuildSsaResult HGraph::AnalyzeNaturalLoops() const { // Order does not matter. for (HReversePostOrderIterator it(*this); !it.Done(); it.Advance()) { HBasicBlock* block = it.Current(); @@ -418,16 +446,16 @@ bool HGraph::AnalyzeNaturalLoops() const { if (block->IsCatchBlock()) { // TODO: Dealing with exceptional back edges could be tricky because // they only approximate the real control flow. Bail out for now. - return false; + return kBuildSsaFailThrowCatchLoop; } HLoopInformation* info = block->GetLoopInformation(); if (!info->Populate()) { // Abort if the loop is non natural. We currently bailout in such cases. - return false; + return kBuildSsaFailNonNaturalLoop; } } } - return true; + return kBuildSsaSuccess; } void HGraph::InsertConstant(HConstant* constant) { @@ -446,8 +474,13 @@ HNullConstant* HGraph::GetNullConstant(uint32_t dex_pc) { // id and/or any invariants the graph is assuming when adding new instructions. if ((cached_null_constant_ == nullptr) || (cached_null_constant_->GetBlock() == nullptr)) { cached_null_constant_ = new (arena_) HNullConstant(dex_pc); + cached_null_constant_->SetReferenceTypeInfo(inexact_object_rti_); InsertConstant(cached_null_constant_); } + if (kIsDebugBuild) { + ScopedObjectAccess soa(Thread::Current()); + DCHECK(cached_null_constant_->GetReferenceTypeInfo().IsValid()); + } return cached_null_constant_; } @@ -777,6 +810,10 @@ void HEnvironment::RemoveAsUserOfInput(size_t index) const { user_record.GetInstruction()->RemoveEnvironmentUser(user_record.GetUseNode()); } +HInstruction::InstructionKind HInstruction::GetKind() const { + return GetKindInternal(); +} + HInstruction* HInstruction::GetNextDisregardingMoves() const { HInstruction* next = GetNext(); while (next != nullptr && next->IsParallelMove()) { @@ -960,7 +997,7 @@ void H##name::Accept(HGraphVisitor* visitor) { \ visitor->Visit##name(this); \ } -FOR_EACH_INSTRUCTION(DEFINE_ACCEPT) +FOR_EACH_CONCRETE_INSTRUCTION(DEFINE_ACCEPT) #undef DEFINE_ACCEPT @@ -2023,6 +2060,16 @@ void HGraph::TransformLoopHeaderForBCE(HBasicBlock* header) { new_pre_header->SetTryCatchInformation(try_catch_info); } +static void CheckAgainstUpperBound(ReferenceTypeInfo rti, ReferenceTypeInfo upper_bound_rti) + SHARED_REQUIRES(Locks::mutator_lock_) { + if (rti.IsValid()) { + DCHECK(upper_bound_rti.IsSupertypeOf(rti)) + << " upper_bound_rti: " << upper_bound_rti + << " rti: " << rti; + DCHECK(!upper_bound_rti.GetTypeHandle()->CannotBeAssignedFromOtherTypes() || rti.IsExact()); + } +} + void HInstruction::SetReferenceTypeInfo(ReferenceTypeInfo rti) { if (kIsDebugBuild) { DCHECK_EQ(GetType(), Primitive::kPrimNot); @@ -2031,16 +2078,23 @@ void HInstruction::SetReferenceTypeInfo(ReferenceTypeInfo rti) { if (IsBoundType()) { // Having the test here spares us from making the method virtual just for // the sake of a DCHECK. - ReferenceTypeInfo upper_bound_rti = AsBoundType()->GetUpperBound(); - DCHECK(upper_bound_rti.IsSupertypeOf(rti)) - << " upper_bound_rti: " << upper_bound_rti - << " rti: " << rti; - DCHECK(!upper_bound_rti.GetTypeHandle()->CannotBeAssignedFromOtherTypes() || rti.IsExact()); + CheckAgainstUpperBound(rti, AsBoundType()->GetUpperBound()); } } reference_type_info_ = rti; } +void HBoundType::SetUpperBound(const ReferenceTypeInfo& upper_bound, bool can_be_null) { + if (kIsDebugBuild) { + ScopedObjectAccess soa(Thread::Current()); + DCHECK(upper_bound.IsValid()); + DCHECK(!upper_bound_.IsValid()) << "Upper bound should only be set once."; + CheckAgainstUpperBound(GetReferenceTypeInfo(), upper_bound); + } + upper_bound_ = upper_bound; + upper_can_be_null_ = can_be_null; +} + ReferenceTypeInfo::ReferenceTypeInfo() : type_handle_(TypeHandle()), is_exact_(false) {} ReferenceTypeInfo::ReferenceTypeInfo(TypeHandle type_handle, bool is_exact) @@ -2087,12 +2141,31 @@ bool HInstruction::HasAnyEnvironmentUseBefore(HInstruction* other) { } void HInvoke::SetIntrinsic(Intrinsics intrinsic, - IntrinsicNeedsEnvironmentOrCache needs_env_or_cache) { + IntrinsicNeedsEnvironmentOrCache needs_env_or_cache, + IntrinsicSideEffects side_effects, + IntrinsicExceptions exceptions) { intrinsic_ = intrinsic; IntrinsicOptimizations opt(this); + + // Adjust method's side effects from intrinsic table. + switch (side_effects) { + case kNoSideEffects: SetSideEffects(SideEffects::None()); break; + case kReadSideEffects: SetSideEffects(SideEffects::AllReads()); break; + case kWriteSideEffects: SetSideEffects(SideEffects::AllWrites()); break; + case kAllSideEffects: SetSideEffects(SideEffects::AllExceptGCDependency()); break; + } + if (needs_env_or_cache == kNoEnvironmentOrCache) { opt.SetDoesNotNeedDexCache(); opt.SetDoesNotNeedEnvironment(); + } else { + // If we need an environment, that means there will be a call, which can trigger GC. + SetSideEffects(GetSideEffects().Union(SideEffects::CanTriggerGC())); + } + // Adjust method's exception status from intrinsic table. + switch (exceptions) { + case kNoThrow: SetCanThrow(false); break; + case kCanThrow: SetCanThrow(true); break; } } @@ -2220,4 +2293,19 @@ HInstruction* HGraph::InsertOppositeCondition(HInstruction* cond, HInstruction* } } +std::ostream& operator<<(std::ostream& os, const MoveOperands& rhs) { + os << "[" + << " source=" << rhs.GetSource() + << " destination=" << rhs.GetDestination() + << " type=" << rhs.GetType() + << " instruction="; + if (rhs.GetInstruction() != nullptr) { + os << rhs.GetInstruction()->DebugName() << ' ' << rhs.GetInstruction()->GetId(); + } else { + os << "null"; + } + os << " ]"; + return os; +} + } // namespace art diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 1f8ef4717c..fdb14fcb07 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -98,6 +98,13 @@ enum IfCondition { kCondAE, // >= }; +enum BuildSsaResult { + kBuildSsaFailNonNaturalLoop, + kBuildSsaFailThrowCatchLoop, + kBuildSsaFailAmbiguousArrayOp, + kBuildSsaSuccess, +}; + class HInstructionList : public ValueObject { public: HInstructionList() : first_instruction_(nullptr), last_instruction_(nullptr) {} @@ -143,6 +150,122 @@ class HInstructionList : public ValueObject { DISALLOW_COPY_AND_ASSIGN(HInstructionList); }; +class ReferenceTypeInfo : ValueObject { + public: + typedef Handle<mirror::Class> TypeHandle; + + static ReferenceTypeInfo Create(TypeHandle type_handle, bool is_exact) { + // The constructor will check that the type_handle is valid. + return ReferenceTypeInfo(type_handle, is_exact); + } + + static ReferenceTypeInfo CreateInvalid() { return ReferenceTypeInfo(); } + + static bool IsValidHandle(TypeHandle handle) SHARED_REQUIRES(Locks::mutator_lock_) { + return handle.GetReference() != nullptr; + } + + bool IsValid() const SHARED_REQUIRES(Locks::mutator_lock_) { + return IsValidHandle(type_handle_); + } + + bool IsExact() const { return is_exact_; } + + bool IsObjectClass() const SHARED_REQUIRES(Locks::mutator_lock_) { + DCHECK(IsValid()); + return GetTypeHandle()->IsObjectClass(); + } + + bool IsStringClass() const SHARED_REQUIRES(Locks::mutator_lock_) { + DCHECK(IsValid()); + return GetTypeHandle()->IsStringClass(); + } + + bool IsObjectArray() const SHARED_REQUIRES(Locks::mutator_lock_) { + DCHECK(IsValid()); + return IsArrayClass() && GetTypeHandle()->GetComponentType()->IsObjectClass(); + } + + bool IsInterface() const SHARED_REQUIRES(Locks::mutator_lock_) { + DCHECK(IsValid()); + return GetTypeHandle()->IsInterface(); + } + + bool IsArrayClass() const SHARED_REQUIRES(Locks::mutator_lock_) { + DCHECK(IsValid()); + return GetTypeHandle()->IsArrayClass(); + } + + bool IsPrimitiveArrayClass() const SHARED_REQUIRES(Locks::mutator_lock_) { + DCHECK(IsValid()); + return GetTypeHandle()->IsPrimitiveArray(); + } + + bool IsNonPrimitiveArrayClass() const SHARED_REQUIRES(Locks::mutator_lock_) { + DCHECK(IsValid()); + return GetTypeHandle()->IsArrayClass() && !GetTypeHandle()->IsPrimitiveArray(); + } + + bool CanArrayHold(ReferenceTypeInfo rti) const SHARED_REQUIRES(Locks::mutator_lock_) { + DCHECK(IsValid()); + if (!IsExact()) return false; + if (!IsArrayClass()) return false; + return GetTypeHandle()->GetComponentType()->IsAssignableFrom(rti.GetTypeHandle().Get()); + } + + bool CanArrayHoldValuesOf(ReferenceTypeInfo rti) const SHARED_REQUIRES(Locks::mutator_lock_) { + DCHECK(IsValid()); + if (!IsExact()) return false; + if (!IsArrayClass()) return false; + if (!rti.IsArrayClass()) return false; + return GetTypeHandle()->GetComponentType()->IsAssignableFrom( + rti.GetTypeHandle()->GetComponentType()); + } + + Handle<mirror::Class> GetTypeHandle() const { return type_handle_; } + + bool IsSupertypeOf(ReferenceTypeInfo rti) const SHARED_REQUIRES(Locks::mutator_lock_) { + DCHECK(IsValid()); + DCHECK(rti.IsValid()); + return GetTypeHandle()->IsAssignableFrom(rti.GetTypeHandle().Get()); + } + + bool IsStrictSupertypeOf(ReferenceTypeInfo rti) const SHARED_REQUIRES(Locks::mutator_lock_) { + DCHECK(IsValid()); + DCHECK(rti.IsValid()); + return GetTypeHandle().Get() != rti.GetTypeHandle().Get() && + GetTypeHandle()->IsAssignableFrom(rti.GetTypeHandle().Get()); + } + + // Returns true if the type information provide the same amount of details. + // Note that it does not mean that the instructions have the same actual type + // (because the type can be the result of a merge). + bool IsEqual(ReferenceTypeInfo rti) const SHARED_REQUIRES(Locks::mutator_lock_) { + if (!IsValid() && !rti.IsValid()) { + // Invalid types are equal. + return true; + } + if (!IsValid() || !rti.IsValid()) { + // One is valid, the other not. + return false; + } + return IsExact() == rti.IsExact() + && GetTypeHandle().Get() == rti.GetTypeHandle().Get(); + } + + private: + ReferenceTypeInfo(); + ReferenceTypeInfo(TypeHandle type_handle, bool is_exact); + + // The class of the object. + TypeHandle type_handle_; + // Whether or not the type is exact or a superclass of the actual type. + // Whether or not we have any information about this type. + bool is_exact_; +}; + +std::ostream& operator<<(std::ostream& os, const ReferenceTypeInfo& rhs); + // Control-flow graph of a method. Contains a list of basic blocks. class HGraph : public ArenaObject<kArenaAllocGraph> { public: @@ -179,7 +302,8 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { cached_float_constants_(std::less<int32_t>(), arena->Adapter(kArenaAllocConstantsMap)), cached_long_constants_(std::less<int64_t>(), arena->Adapter(kArenaAllocConstantsMap)), cached_double_constants_(std::less<int64_t>(), arena->Adapter(kArenaAllocConstantsMap)), - cached_current_method_(nullptr) { + cached_current_method_(nullptr), + inexact_object_rti_(ReferenceTypeInfo::CreateInvalid()) { blocks_.reserve(kDefaultNumberOfBlocks); } @@ -197,36 +321,23 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { void AddBlock(HBasicBlock* block); - // Try building the SSA form of this graph, with dominance computation and loop - // recognition. Returns whether it was successful in doing all these steps. - bool TryBuildingSsa() { - BuildDominatorTree(); - // The SSA builder requires loops to all be natural. Specifically, the dead phi - // elimination phase checks the consistency of the graph when doing a post-order - // visit for eliminating dead phis: a dead phi can only have loop header phi - // users remaining when being visited. - if (!AnalyzeNaturalLoops()) return false; - // Precompute per-block try membership before entering the SSA builder, - // which needs the information to build catch block phis from values of - // locals at throwing instructions inside try blocks. - ComputeTryBlockInformation(); - TransformToSsa(); - in_ssa_form_ = true; - return true; - } + // Try building the SSA form of this graph, with dominance computation and + // loop recognition. Returns a code specifying that it was successful or the + // reason for failure. + BuildSsaResult TryBuildingSsa(StackHandleScopeCollection* handles); void ComputeDominanceInformation(); void ClearDominanceInformation(); void BuildDominatorTree(); - void TransformToSsa(); void SimplifyCFG(); void SimplifyCatchBlocks(); - // Analyze all natural loops in this graph. Returns false if one - // loop is not natural, that is the header does not dominate the - // back edge. - bool AnalyzeNaturalLoops() const; + // Analyze all natural loops in this graph. Returns a code specifying that it + // was successful or the reason for failure. The method will fail if a loop + // is not natural, that is the header does not dominate a back edge, or if it + // is a throw-catch loop, i.e. the header is a catch block. + BuildSsaResult AnalyzeNaturalLoops() const; // Iterate over blocks to compute try block membership. Needs reverse post // order and loop information. @@ -487,6 +598,10 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { // (such as when the superclass could not be found). ArtMethod* art_method_; + // Keep the RTI of inexact Object to avoid having to pass stack handle + // collection pointer to passes which may create NullConstant. + ReferenceTypeInfo inexact_object_rti_; + friend class SsaBuilder; // For caching constants. friend class SsaLivenessAnalysis; // For the linear order. ART_FRIEND_TEST(GraphTest, IfSuccessorSimpleJoinBlock1); @@ -1034,7 +1149,6 @@ class HLoopInformationOutwardIterator : public ValueObject { M(ClearException, Instruction) \ M(ClinitCheck, Instruction) \ M(Compare, BinaryOperation) \ - M(Condition, BinaryOperation) \ M(CurrentMethod, Instruction) \ M(Deoptimize, Instruction) \ M(Div, BinaryOperation) \ @@ -1067,6 +1181,7 @@ class HLoopInformationOutwardIterator : public ValueObject { M(MemoryBarrier, Instruction) \ M(MonitorOperation, Instruction) \ M(Mul, BinaryOperation) \ + M(NativeDebugInfo, Instruction) \ M(Neg, UnaryOperation) \ M(NewArray, Instruction) \ M(NewInstance, Instruction) \ @@ -1141,27 +1256,34 @@ class HLoopInformationOutwardIterator : public ValueObject { FOR_EACH_CONCRETE_INSTRUCTION_X86(M) \ FOR_EACH_CONCRETE_INSTRUCTION_X86_64(M) -#define FOR_EACH_INSTRUCTION(M) \ - FOR_EACH_CONCRETE_INSTRUCTION(M) \ +#define FOR_EACH_ABSTRACT_INSTRUCTION(M) \ + M(Condition, BinaryOperation) \ M(Constant, Instruction) \ M(UnaryOperation, Instruction) \ M(BinaryOperation, Instruction) \ M(Invoke, Instruction) +#define FOR_EACH_INSTRUCTION(M) \ + FOR_EACH_CONCRETE_INSTRUCTION(M) \ + FOR_EACH_ABSTRACT_INSTRUCTION(M) + #define FORWARD_DECLARATION(type, super) class H##type; FOR_EACH_INSTRUCTION(FORWARD_DECLARATION) #undef FORWARD_DECLARATION #define DECLARE_INSTRUCTION(type) \ - InstructionKind GetKind() const OVERRIDE { return k##type; } \ + InstructionKind GetKindInternal() const OVERRIDE { return k##type; } \ const char* DebugName() const OVERRIDE { return #type; } \ - const H##type* As##type() const OVERRIDE { return this; } \ - H##type* As##type() OVERRIDE { return this; } \ bool InstructionTypeEquals(HInstruction* other) const OVERRIDE { \ return other->Is##type(); \ } \ void Accept(HGraphVisitor* visitor) OVERRIDE +#define DECLARE_ABSTRACT_INSTRUCTION(type) \ + bool Is##type() const { return As##type() != nullptr; } \ + const H##type* As##type() const { return this; } \ + H##type* As##type() { return this; } + template <typename T> class HUseList; template <typename T> @@ -1674,122 +1796,6 @@ class HEnvironment : public ArenaObject<kArenaAllocEnvironment> { DISALLOW_COPY_AND_ASSIGN(HEnvironment); }; -class ReferenceTypeInfo : ValueObject { - public: - typedef Handle<mirror::Class> TypeHandle; - - static ReferenceTypeInfo Create(TypeHandle type_handle, bool is_exact) { - // The constructor will check that the type_handle is valid. - return ReferenceTypeInfo(type_handle, is_exact); - } - - static ReferenceTypeInfo CreateInvalid() { return ReferenceTypeInfo(); } - - static bool IsValidHandle(TypeHandle handle) SHARED_REQUIRES(Locks::mutator_lock_) { - return handle.GetReference() != nullptr; - } - - bool IsValid() const SHARED_REQUIRES(Locks::mutator_lock_) { - return IsValidHandle(type_handle_); - } - - bool IsExact() const { return is_exact_; } - - bool IsObjectClass() const SHARED_REQUIRES(Locks::mutator_lock_) { - DCHECK(IsValid()); - return GetTypeHandle()->IsObjectClass(); - } - - bool IsStringClass() const SHARED_REQUIRES(Locks::mutator_lock_) { - DCHECK(IsValid()); - return GetTypeHandle()->IsStringClass(); - } - - bool IsObjectArray() const SHARED_REQUIRES(Locks::mutator_lock_) { - DCHECK(IsValid()); - return IsArrayClass() && GetTypeHandle()->GetComponentType()->IsObjectClass(); - } - - bool IsInterface() const SHARED_REQUIRES(Locks::mutator_lock_) { - DCHECK(IsValid()); - return GetTypeHandle()->IsInterface(); - } - - bool IsArrayClass() const SHARED_REQUIRES(Locks::mutator_lock_) { - DCHECK(IsValid()); - return GetTypeHandle()->IsArrayClass(); - } - - bool IsPrimitiveArrayClass() const SHARED_REQUIRES(Locks::mutator_lock_) { - DCHECK(IsValid()); - return GetTypeHandle()->IsPrimitiveArray(); - } - - bool IsNonPrimitiveArrayClass() const SHARED_REQUIRES(Locks::mutator_lock_) { - DCHECK(IsValid()); - return GetTypeHandle()->IsArrayClass() && !GetTypeHandle()->IsPrimitiveArray(); - } - - bool CanArrayHold(ReferenceTypeInfo rti) const SHARED_REQUIRES(Locks::mutator_lock_) { - DCHECK(IsValid()); - if (!IsExact()) return false; - if (!IsArrayClass()) return false; - return GetTypeHandle()->GetComponentType()->IsAssignableFrom(rti.GetTypeHandle().Get()); - } - - bool CanArrayHoldValuesOf(ReferenceTypeInfo rti) const SHARED_REQUIRES(Locks::mutator_lock_) { - DCHECK(IsValid()); - if (!IsExact()) return false; - if (!IsArrayClass()) return false; - if (!rti.IsArrayClass()) return false; - return GetTypeHandle()->GetComponentType()->IsAssignableFrom( - rti.GetTypeHandle()->GetComponentType()); - } - - Handle<mirror::Class> GetTypeHandle() const { return type_handle_; } - - bool IsSupertypeOf(ReferenceTypeInfo rti) const SHARED_REQUIRES(Locks::mutator_lock_) { - DCHECK(IsValid()); - DCHECK(rti.IsValid()); - return GetTypeHandle()->IsAssignableFrom(rti.GetTypeHandle().Get()); - } - - bool IsStrictSupertypeOf(ReferenceTypeInfo rti) const SHARED_REQUIRES(Locks::mutator_lock_) { - DCHECK(IsValid()); - DCHECK(rti.IsValid()); - return GetTypeHandle().Get() != rti.GetTypeHandle().Get() && - GetTypeHandle()->IsAssignableFrom(rti.GetTypeHandle().Get()); - } - - // Returns true if the type information provide the same amount of details. - // Note that it does not mean that the instructions have the same actual type - // (because the type can be the result of a merge). - bool IsEqual(ReferenceTypeInfo rti) SHARED_REQUIRES(Locks::mutator_lock_) { - if (!IsValid() && !rti.IsValid()) { - // Invalid types are equal. - return true; - } - if (!IsValid() || !rti.IsValid()) { - // One is valid, the other not. - return false; - } - return IsExact() == rti.IsExact() - && GetTypeHandle().Get() == rti.GetTypeHandle().Get(); - } - - private: - ReferenceTypeInfo(); - ReferenceTypeInfo(TypeHandle type_handle, bool is_exact); - - // The class of the object. - TypeHandle type_handle_; - // Whether or not the type is exact or a superclass of the actual type. - // Whether or not we have any information about this type. - bool is_exact_; -}; - -std::ostream& operator<<(std::ostream& os, const ReferenceTypeInfo& rhs); - class HInstruction : public ArenaObject<kArenaAllocInstruction> { public: HInstruction(SideEffects side_effects, uint32_t dex_pc) @@ -1862,6 +1868,10 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> { return false; } + virtual bool IsActualObject() const { + return GetType() == Primitive::kPrimNot; + } + void SetReferenceTypeInfo(ReferenceTypeInfo rti); ReferenceTypeInfo GetReferenceTypeInfo() const { @@ -1972,11 +1982,18 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> { void MoveBeforeFirstUserAndOutOfLoops(); #define INSTRUCTION_TYPE_CHECK(type, super) \ + bool Is##type() const; \ + const H##type* As##type() const; \ + H##type* As##type(); + + FOR_EACH_CONCRETE_INSTRUCTION(INSTRUCTION_TYPE_CHECK) +#undef INSTRUCTION_TYPE_CHECK + +#define INSTRUCTION_TYPE_CHECK(type, super) \ bool Is##type() const { return (As##type() != nullptr); } \ virtual const H##type* As##type() const { return nullptr; } \ virtual H##type* As##type() { return nullptr; } - - FOR_EACH_INSTRUCTION(INSTRUCTION_TYPE_CHECK) + FOR_EACH_ABSTRACT_INSTRUCTION(INSTRUCTION_TYPE_CHECK) #undef INSTRUCTION_TYPE_CHECK // Returns whether the instruction can be moved within the graph. @@ -1999,7 +2016,12 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> { // 2) Their inputs are identical. bool Equals(HInstruction* other) const; - virtual InstructionKind GetKind() const = 0; + // TODO: Remove this indirection when the [[pure]] attribute proposal (n3744) + // is adopted and implemented by our C++ compiler(s). Fow now, we need to hide + // the virtual function because the __attribute__((__pure__)) doesn't really + // apply the strong requirement for virtual functions, preventing optimizations. + InstructionKind GetKind() const PURE; + virtual InstructionKind GetKindInternal() const = 0; virtual size_t ComputeHashCode() const { size_t result = GetKind(); @@ -2045,6 +2067,7 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> { protected: virtual const HUserRecord<HInstruction*> InputRecordAt(size_t i) const = 0; virtual void SetRawInputRecordAt(size_t index, const HUserRecord<HInstruction*>& input) = 0; + void SetSideEffects(SideEffects other) { side_effects_ = other; } private: void RemoveEnvironmentUser(HUseListNode<HEnvironment*>* use_node) { env_uses_.Remove(use_node); } @@ -2297,7 +2320,7 @@ class HConstant : public HExpression<0> { virtual uint64_t GetValueAsUint64() const = 0; - DECLARE_INSTRUCTION(Constant); + DECLARE_ABSTRACT_INSTRUCTION(Constant); private: DISALLOW_COPY_AND_ASSIGN(HConstant); @@ -2468,8 +2491,10 @@ class HTryBoundary : public HTemplateInstruction<0> { // Deoptimize to interpreter, upon checking a condition. class HDeoptimize : public HTemplateInstruction<1> { public: + // We set CanTriggerGC to prevent any intermediate address to be live + // at the point of the `HDeoptimize`. HDeoptimize(HInstruction* cond, uint32_t dex_pc) - : HTemplateInstruction(SideEffects::None(), dex_pc) { + : HTemplateInstruction(SideEffects::CanTriggerGC(), dex_pc) { SetRawInputAt(0, cond); } @@ -2558,7 +2583,7 @@ class HUnaryOperation : public HExpression<1> { virtual HConstant* Evaluate(HIntConstant* x) const = 0; virtual HConstant* Evaluate(HLongConstant* x) const = 0; - DECLARE_INSTRUCTION(UnaryOperation); + DECLARE_ABSTRACT_INSTRUCTION(UnaryOperation); private: DISALLOW_COPY_AND_ASSIGN(HUnaryOperation); @@ -2651,7 +2676,7 @@ class HBinaryOperation : public HExpression<2> { // one. Otherwise it returns null. HInstruction* GetLeastConstantLeft() const; - DECLARE_INSTRUCTION(BinaryOperation); + DECLARE_ABSTRACT_INSTRUCTION(BinaryOperation); private: DISALLOW_COPY_AND_ASSIGN(HBinaryOperation); @@ -2679,7 +2704,7 @@ class HCondition : public HBinaryOperation { // `instruction`, and disregard moves in between. bool IsBeforeWhenDisregardMoves(HInstruction* instruction) const; - DECLARE_INSTRUCTION(Condition); + DECLARE_ABSTRACT_INSTRUCTION(Condition); virtual IfCondition GetCondition() const = 0; @@ -3228,7 +3253,8 @@ class HDoubleConstant : public HConstant { }; enum class Intrinsics { -#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache) k ## Name, +#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions) \ + k ## Name, #include "intrinsics_list.h" kNone, INTRINSICS_LIST(OPTIMIZING_INTRINSICS) @@ -3242,6 +3268,18 @@ enum IntrinsicNeedsEnvironmentOrCache { kNeedsEnvironmentOrCache // Intrinsic requires an environment or requires a dex cache. }; +enum IntrinsicSideEffects { + kNoSideEffects, // Intrinsic does not have any heap memory side effects. + kReadSideEffects, // Intrinsic may read heap memory. + kWriteSideEffects, // Intrinsic may write heap memory. + kAllSideEffects // Intrinsic may read or write heap memory, or trigger GC. +}; + +enum IntrinsicExceptions { + kNoThrow, // Intrinsic does not throw any exceptions. + kCanThrow // Intrinsic may throw exceptions. +}; + class HInvoke : public HInstruction { public: size_t InputCount() const OVERRIDE { return inputs_.size(); } @@ -3260,7 +3298,6 @@ class HInvoke : public HInstruction { Primitive::Type GetType() const OVERRIDE { return return_type_; } - uint32_t GetDexMethodIndex() const { return dex_method_index_; } const DexFile& GetDexFile() const { return GetEnvironment()->GetDexFile(); } @@ -3270,13 +3307,22 @@ class HInvoke : public HInstruction { return intrinsic_; } - void SetIntrinsic(Intrinsics intrinsic, IntrinsicNeedsEnvironmentOrCache needs_env_or_cache); + void SetIntrinsic(Intrinsics intrinsic, + IntrinsicNeedsEnvironmentOrCache needs_env_or_cache, + IntrinsicSideEffects side_effects, + IntrinsicExceptions exceptions); bool IsFromInlinedInvoke() const { return GetEnvironment()->IsFromInlinedInvoke(); } - bool CanThrow() const OVERRIDE { return true; } + bool CanThrow() const OVERRIDE { return can_throw_; } + + bool CanBeMoved() const OVERRIDE { return IsIntrinsic(); } + + bool InstructionDataEquals(HInstruction* other) const OVERRIDE { + return intrinsic_ != Intrinsics::kNone && intrinsic_ == other->AsInvoke()->intrinsic_; + } uint32_t* GetIntrinsicOptimizations() { return &intrinsic_optimizations_; @@ -3288,7 +3334,7 @@ class HInvoke : public HInstruction { bool IsIntrinsic() const { return intrinsic_ != Intrinsics::kNone; } - DECLARE_INSTRUCTION(Invoke); + DECLARE_ABSTRACT_INSTRUCTION(Invoke); protected: HInvoke(ArenaAllocator* arena, @@ -3306,6 +3352,7 @@ class HInvoke : public HInstruction { return_type_(return_type), dex_method_index_(dex_method_index), original_invoke_type_(original_invoke_type), + can_throw_(true), intrinsic_(Intrinsics::kNone), intrinsic_optimizations_(0) { } @@ -3318,11 +3365,14 @@ class HInvoke : public HInstruction { inputs_[index] = input; } + void SetCanThrow(bool can_throw) { can_throw_ = can_throw; } + uint32_t number_of_arguments_; ArenaVector<HUserRecord<HInstruction*>> inputs_; const Primitive::Type return_type_; const uint32_t dex_method_index_; const InvokeType original_invoke_type_; + bool can_throw_; Intrinsics intrinsic_; // A magic word holding optimizations for intrinsics. See intrinsics.h. @@ -3960,8 +4010,10 @@ class HRem : public HBinaryOperation { class HDivZeroCheck : public HExpression<1> { public: + // `HDivZeroCheck` can trigger GC, as it may call the `ArithmeticException` + // constructor. HDivZeroCheck(HInstruction* value, uint32_t dex_pc) - : HExpression(value->GetType(), SideEffects::None(), dex_pc) { + : HExpression(value->GetType(), SideEffects::CanTriggerGC(), dex_pc) { SetRawInputAt(0, value); } @@ -4417,7 +4469,16 @@ class HPhi : public HInstruction { void RemoveInputAt(size_t index); Primitive::Type GetType() const OVERRIDE { return type_; } - void SetType(Primitive::Type type) { type_ = type; } + void SetType(Primitive::Type new_type) { + // Make sure that only valid type changes occur. The following are allowed: + // (1) int -> float/ref (primitive type propagation), + // (2) long -> double (primitive type propagation). + DCHECK(type_ == new_type || + (type_ == Primitive::kPrimInt && new_type == Primitive::kPrimFloat) || + (type_ == Primitive::kPrimInt && new_type == Primitive::kPrimNot) || + (type_ == Primitive::kPrimLong && new_type == Primitive::kPrimDouble)); + type_ = new_type; + } bool CanBeNull() const OVERRIDE { return can_be_null_; } void SetCanBeNull(bool can_be_null) { can_be_null_ = can_be_null; } @@ -4473,8 +4534,10 @@ class HPhi : public HInstruction { class HNullCheck : public HExpression<1> { public: + // `HNullCheck` can trigger GC, as it may call the `NullPointerException` + // constructor. HNullCheck(HInstruction* value, uint32_t dex_pc) - : HExpression(value->GetType(), SideEffects::None(), dex_pc) { + : HExpression(value->GetType(), SideEffects::CanTriggerGC(), dex_pc) { SetRawInputAt(0, value); } @@ -4657,7 +4720,21 @@ class HArrayGet : public HExpression<2> { return false; } - void SetType(Primitive::Type type) { type_ = type; } + bool IsEquivalentOf(HArrayGet* other) const { + bool result = (GetDexPc() == other->GetDexPc()); + if (kIsDebugBuild && result) { + DCHECK_EQ(GetBlock(), other->GetBlock()); + DCHECK_EQ(GetArray(), other->GetArray()); + DCHECK_EQ(GetIndex(), other->GetIndex()); + if (Primitive::IsIntOrLongType(GetType())) { + DCHECK(Primitive::IsFloatingPointType(other->GetType())); + } else { + DCHECK(Primitive::IsFloatingPointType(GetType())); + DCHECK(Primitive::IsIntOrLongType(other->GetType())); + } + } + return result; + } HInstruction* GetArray() const { return InputAt(0); } HInstruction* GetIndex() const { return InputAt(1); } @@ -4781,8 +4858,10 @@ class HArrayLength : public HExpression<1> { class HBoundsCheck : public HExpression<2> { public: + // `HBoundsCheck` can trigger GC, as it may call the `IndexOutOfBoundsException` + // constructor. HBoundsCheck(HInstruction* index, HInstruction* length, uint32_t dex_pc) - : HExpression(index->GetType(), SideEffects::None(), dex_pc) { + : HExpression(index->GetType(), SideEffects::CanTriggerGC(), dex_pc) { DCHECK(index->GetType() == Primitive::kPrimInt); SetRawInputAt(0, index); SetRawInputAt(1, length); @@ -4854,6 +4933,23 @@ class HSuspendCheck : public HTemplateInstruction<0> { DISALLOW_COPY_AND_ASSIGN(HSuspendCheck); }; +// Pseudo-instruction which provides the native debugger with mapping information. +// It ensures that we can generate line number and local variables at this point. +class HNativeDebugInfo : public HTemplateInstruction<0> { + public: + explicit HNativeDebugInfo(uint32_t dex_pc) + : HTemplateInstruction<0>(SideEffects::None(), dex_pc) {} + + bool NeedsEnvironment() const OVERRIDE { + return true; + } + + DECLARE_INSTRUCTION(NativeDebugInfo); + + private: + DISALLOW_COPY_AND_ASSIGN(HNativeDebugInfo); +}; + /** * Instruction to load a Class object. */ @@ -5347,24 +5443,19 @@ class HInstanceOf : public HExpression<2> { class HBoundType : public HExpression<1> { public: - // Constructs an HBoundType with the given upper_bound. - // Ensures that the upper_bound is valid. - HBoundType(HInstruction* input, - ReferenceTypeInfo upper_bound, - bool upper_can_be_null, - uint32_t dex_pc = kNoDexPc) + HBoundType(HInstruction* input, uint32_t dex_pc = kNoDexPc) : HExpression(Primitive::kPrimNot, SideEffects::None(), dex_pc), - upper_bound_(upper_bound), - upper_can_be_null_(upper_can_be_null), - can_be_null_(upper_can_be_null) { + upper_bound_(ReferenceTypeInfo::CreateInvalid()), + upper_can_be_null_(true), + can_be_null_(true) { DCHECK_EQ(input->GetType(), Primitive::kPrimNot); SetRawInputAt(0, input); - SetReferenceTypeInfo(upper_bound_); } - // GetUpper* should only be used in reference type propagation. + // {Get,Set}Upper* should only be used in reference type propagation. const ReferenceTypeInfo& GetUpperBound() const { return upper_bound_; } bool GetUpperCanBeNull() const { return upper_can_be_null_; } + void SetUpperBound(const ReferenceTypeInfo& upper_bound, bool can_be_null); void SetCanBeNull(bool can_be_null) { DCHECK(upper_can_be_null_ || !can_be_null); @@ -5382,10 +5473,10 @@ class HBoundType : public HExpression<1> { // if (x instanceof ClassX) { // // uper_bound_ will be ClassX // } - const ReferenceTypeInfo upper_bound_; + ReferenceTypeInfo upper_bound_; // Represents the top constraint that can_be_null_ cannot exceed (i.e. if this // is false then can_be_null_ cannot be true). - const bool upper_can_be_null_; + bool upper_can_be_null_; bool can_be_null_; DISALLOW_COPY_AND_ASSIGN(HBoundType); @@ -5534,8 +5625,8 @@ class MoveOperands : public ArenaObject<kArenaAllocMoveOperands> { } bool IsPending() const { - DCHECK(!source_.IsInvalid() || destination_.IsInvalid()); - return destination_.IsInvalid() && !source_.IsInvalid(); + DCHECK(source_.IsValid() || destination_.IsInvalid()); + return destination_.IsInvalid() && source_.IsValid(); } // True if this blocks a move from the given location. @@ -5579,6 +5670,8 @@ class MoveOperands : public ArenaObject<kArenaAllocMoveOperands> { HInstruction* instruction_; }; +std::ostream& operator<<(std::ostream& os, const MoveOperands& rhs); + static constexpr size_t kDefaultNumberOfMoves = 4; class HParallelMove : public HTemplateInstruction<0> { @@ -5869,6 +5962,18 @@ inline bool IsSameDexFile(const DexFile& lhs, const DexFile& rhs) { return &lhs == &rhs; } +#define INSTRUCTION_TYPE_CHECK(type, super) \ + inline bool HInstruction::Is##type() const { return GetKind() == k##type; } \ + inline const H##type* HInstruction::As##type() const { \ + return Is##type() ? down_cast<const H##type*>(this) : nullptr; \ + } \ + inline H##type* HInstruction::As##type() { \ + return Is##type() ? static_cast<H##type*>(this) : nullptr; \ + } + + FOR_EACH_CONCRETE_INSTRUCTION(INSTRUCTION_TYPE_CHECK) +#undef INSTRUCTION_TYPE_CHECK + } // namespace art #endif // ART_COMPILER_OPTIMIZING_NODES_H_ diff --git a/compiler/optimizing/nodes_arm64.h b/compiler/optimizing/nodes_arm64.h index 18405f2623..445cdab191 100644 --- a/compiler/optimizing/nodes_arm64.h +++ b/compiler/optimizing/nodes_arm64.h @@ -107,6 +107,7 @@ class HArm64IntermediateAddress : public HExpression<2> { bool CanBeMoved() const OVERRIDE { return true; } bool InstructionDataEquals(HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { return true; } + bool IsActualObject() const OVERRIDE { return false; } HInstruction* GetBaseAddress() const { return InputAt(0); } HInstruction* GetOffset() const { return InputAt(1); } diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index 831b626c4f..988e32bc1a 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -17,6 +17,7 @@ #include "optimizing_compiler.h" #include <fstream> +#include <memory> #include <stdint.h> #ifdef ART_ENABLE_CODEGEN_arm64 @@ -52,6 +53,8 @@ #include "driver/compiler_driver-inl.h" #include "driver/compiler_options.h" #include "driver/dex_compilation_unit.h" +#include "dwarf/method_debug_info.h" +#include "elf_writer_debug.h" #include "elf_writer_quick.h" #include "graph_checker.h" #include "graph_visualizer.h" @@ -60,6 +63,7 @@ #include "inliner.h" #include "instruction_simplifier.h" #include "intrinsics.h" +#include "jit/debugger_interface.h" #include "jit/jit_code_cache.h" #include "licm.h" #include "jni/quick/jni_compiler.h" @@ -68,6 +72,7 @@ #include "prepare_for_register_allocation.h" #include "reference_type_propagation.h" #include "register_allocator.h" +#include "oat_quick_method_header.h" #include "sharpening.h" #include "side_effects_analysis.h" #include "ssa_builder.h" @@ -426,8 +431,18 @@ static void MaybeRunInliner(HGraph* graph, if (!should_inline) { return; } + size_t number_of_dex_registers = dex_compilation_unit.GetCodeItem()->registers_size_; HInliner* inliner = new (graph->GetArena()) HInliner( - graph, graph, codegen, dex_compilation_unit, dex_compilation_unit, driver, handles, stats); + graph, + graph, + codegen, + dex_compilation_unit, + dex_compilation_unit, + driver, + handles, + stats, + number_of_dex_registers, + /* depth */ 0); HOptimization* optimizations[] = { inliner }; RunOptimizations(optimizations, arraysize(optimizations), pass_observer); @@ -501,11 +516,8 @@ static void RunOptimizations(HGraph* graph, CompilerDriver* driver, OptimizingCompilerStats* stats, const DexCompilationUnit& dex_compilation_unit, - PassObserver* pass_observer) { - ScopedObjectAccess soa(Thread::Current()); - StackHandleScopeCollection handles(soa.Self()); - ScopedThreadSuspension sts(soa.Self(), kNative); - + PassObserver* pass_observer, + StackHandleScopeCollection* handles) { ArenaAllocator* arena = graph->GetArena(); HDeadCodeElimination* dce1 = new (arena) HDeadCodeElimination( graph, stats, HDeadCodeElimination::kInitialDeadCodeEliminationPassName); @@ -522,29 +534,23 @@ static void RunOptimizations(HGraph* graph, LoadStoreElimination* lse = new (arena) LoadStoreElimination(graph, *side_effects); HInductionVarAnalysis* induction = new (arena) HInductionVarAnalysis(graph); BoundsCheckElimination* bce = new (arena) BoundsCheckElimination(graph, *side_effects, induction); - ReferenceTypePropagation* type_propagation = - new (arena) ReferenceTypePropagation(graph, &handles); HSharpening* sharpening = new (arena) HSharpening(graph, codegen, dex_compilation_unit, driver); InstructionSimplifier* simplify2 = new (arena) InstructionSimplifier( - graph, stats, "instruction_simplifier_after_types"); - InstructionSimplifier* simplify3 = new (arena) InstructionSimplifier( graph, stats, "instruction_simplifier_after_bce"); - InstructionSimplifier* simplify4 = new (arena) InstructionSimplifier( + InstructionSimplifier* simplify3 = new (arena) InstructionSimplifier( graph, stats, "instruction_simplifier_before_codegen"); IntrinsicsRecognizer* intrinsics = new (arena) IntrinsicsRecognizer(graph, driver); HOptimization* optimizations1[] = { intrinsics, + sharpening, fold1, simplify1, - type_propagation, - sharpening, dce1, - simplify2 }; RunOptimizations(optimizations1, arraysize(optimizations1), pass_observer); - MaybeRunInliner(graph, codegen, driver, stats, dex_compilation_unit, pass_observer, &handles); + MaybeRunInliner(graph, codegen, driver, stats, dex_compilation_unit, pass_observer, handles); HOptimization* optimizations2[] = { // BooleanSimplifier depends on the InstructionSimplifier removing @@ -557,13 +563,13 @@ static void RunOptimizations(HGraph* graph, induction, bce, fold3, // evaluates code generated by dynamic bce - simplify3, + simplify2, lse, dce2, // The codegen has a few assumptions that only the instruction simplifier // can satisfy. For example, the code generator does not expect to see a // HTypeConversion from a type to the same type. - simplify4, + simplify3, }; RunOptimizations(optimizations2, arraysize(optimizations2), pass_observer); @@ -768,14 +774,29 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* arena, } VLOG(compiler) << "Optimizing " << pass_observer.GetMethodName(); + if (run_optimizations_) { + ScopedObjectAccess soa(Thread::Current()); + StackHandleScopeCollection handles(soa.Self()); + ScopedThreadSuspension sts(soa.Self(), kNative); + { PassScope scope(SsaBuilder::kSsaBuilderPassName, &pass_observer); - if (!graph->TryBuildingSsa()) { - // We could not transform the graph to SSA, bailout. - LOG(INFO) << "Skipping compilation of " << pass_observer.GetMethodName() - << ": it contains a non natural loop"; - MaybeRecordStat(MethodCompilationStat::kNotCompiledCannotBuildSSA); + BuildSsaResult result = graph->TryBuildingSsa(&handles); + if (result != kBuildSsaSuccess) { + switch (result) { + case kBuildSsaFailNonNaturalLoop: + MaybeRecordStat(MethodCompilationStat::kNotCompiledNonNaturalLoop); + break; + case kBuildSsaFailThrowCatchLoop: + MaybeRecordStat(MethodCompilationStat::kNotCompiledThrowCatchLoop); + break; + case kBuildSsaFailAmbiguousArrayOp: + MaybeRecordStat(MethodCompilationStat::kNotCompiledAmbiguousArrayOp); + break; + case kBuildSsaSuccess: + UNREACHABLE(); + } pass_observer.SetGraphInBadState(); return nullptr; } @@ -786,7 +807,8 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* arena, compiler_driver, compilation_stats_.get(), dex_compilation_unit, - &pass_observer); + &pass_observer, + &handles); codegen->CompileOptimized(code_allocator); } else { codegen->CompileBaseline(code_allocator); @@ -880,7 +902,11 @@ Compiler* CreateOptimizingCompiler(CompilerDriver* driver) { bool IsCompilingWithCoreImage() { const std::string& image = Runtime::Current()->GetImageLocation(); - return EndsWith(image, "core.art") || EndsWith(image, "core-optimizing.art"); + // TODO: This is under-approximating... + if (EndsWith(image, "core.art") || EndsWith(image, "core-optimizing.art")) { + return true; + } + return false; } bool OptimizingCompiler::JitCompile(Thread* self, @@ -947,6 +973,39 @@ bool OptimizingCompiler::JitCompile(Thread* self, return false; } + if (GetCompilerDriver()->GetCompilerOptions().GetGenerateDebugInfo()) { + const auto* method_header = reinterpret_cast<const OatQuickMethodHeader*>(code); + const uintptr_t code_address = reinterpret_cast<uintptr_t>(method_header->GetCode()); + CompiledMethod compiled_method( + GetCompilerDriver(), + codegen->GetInstructionSet(), + ArrayRef<const uint8_t>(code_allocator.GetMemory()), + codegen->HasEmptyFrame() ? 0 : codegen->GetFrameSize(), + codegen->GetCoreSpillMask(), + codegen->GetFpuSpillMask(), + ArrayRef<const SrcMapElem>(), + ArrayRef<const uint8_t>(), // mapping_table. + ArrayRef<const uint8_t>(stack_map_data, stack_map_size), + ArrayRef<const uint8_t>(), // native_gc_map. + ArrayRef<const uint8_t>(*codegen->GetAssembler()->cfi().data()), + ArrayRef<const LinkerPatch>()); + dwarf::MethodDebugInfo method_debug_info { + dex_file, + class_def_idx, + method_idx, + access_flags, + code_item, + false, // deduped. + code_address, + code_address + code_allocator.GetSize(), + &compiled_method + }; + ArrayRef<const uint8_t> elf_file = dwarf::WriteDebugElfFileForMethod(method_debug_info); + CreateJITCodeEntryForAddress(code_address, + std::unique_ptr<const uint8_t[]>(elf_file.data()), + elf_file.size()); + } + return true; } diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h index 6296eedfb0..bca1632e31 100644 --- a/compiler/optimizing/optimizing_compiler_stats.h +++ b/compiler/optimizing/optimizing_compiler_stats.h @@ -38,7 +38,9 @@ enum MethodCompilationStat { kRemovedDeadInstruction, kRemovedNullCheck, kNotCompiledBranchOutsideMethodCode, - kNotCompiledCannotBuildSSA, + kNotCompiledNonNaturalLoop, + kNotCompiledThrowCatchLoop, + kNotCompiledAmbiguousArrayOp, kNotCompiledHugeMethod, kNotCompiledLargeMethodNoBranches, kNotCompiledMalformedOpcode, @@ -104,7 +106,9 @@ class OptimizingCompilerStats { case kRemovedDeadInstruction: name = "RemovedDeadInstruction"; break; case kRemovedNullCheck: name = "RemovedNullCheck"; break; case kNotCompiledBranchOutsideMethodCode: name = "NotCompiledBranchOutsideMethodCode"; break; - case kNotCompiledCannotBuildSSA : name = "NotCompiledCannotBuildSSA"; break; + case kNotCompiledNonNaturalLoop : name = "NotCompiledNonNaturalLoop"; break; + case kNotCompiledThrowCatchLoop : name = "NotCompiledThrowCatchLoop"; break; + case kNotCompiledAmbiguousArrayOp : name = "NotCompiledAmbiguousArrayOp"; break; case kNotCompiledHugeMethod : name = "NotCompiledHugeMethod"; break; case kNotCompiledLargeMethodNoBranches : name = "NotCompiledLargeMethodNoBranches"; break; case kNotCompiledMalformedOpcode : name = "NotCompiledMalformedOpcode"; break; diff --git a/compiler/optimizing/optimizing_unit_test.h b/compiler/optimizing/optimizing_unit_test.h index 350f0b14ab..af3a005304 100644 --- a/compiler/optimizing/optimizing_unit_test.h +++ b/compiler/optimizing/optimizing_unit_test.h @@ -19,9 +19,13 @@ #include "nodes.h" #include "builder.h" +#include "common_compiler_test.h" #include "compiler/dex/pass_manager.h" #include "dex_file.h" #include "dex_instruction.h" +#include "handle_scope-inl.h" +#include "scoped_thread_state_change.h" +#include "ssa_builder.h" #include "ssa_liveness_analysis.h" #include "gtest/gtest.h" @@ -42,7 +46,6 @@ namespace art { #define FIVE_REGISTERS_CODE_ITEM(...) N_REGISTERS_CODE_ITEM(5, __VA_ARGS__) #define SIX_REGISTERS_CODE_ITEM(...) N_REGISTERS_CODE_ITEM(6, __VA_ARGS__) - LiveInterval* BuildInterval(const size_t ranges[][2], size_t number_of_ranges, ArenaAllocator* allocator, @@ -111,6 +114,12 @@ inline bool IsRemoved(HInstruction* instruction) { return instruction->GetBlock() == nullptr; } +inline void TransformToSsa(HGraph* graph) { + ScopedObjectAccess soa(Thread::Current()); + StackHandleScopeCollection handles(soa.Self()); + EXPECT_EQ(graph->TryBuildingSsa(&handles), kBuildSsaSuccess); +} + } // namespace art #endif // ART_COMPILER_OPTIMIZING_OPTIMIZING_UNIT_TEST_H_ diff --git a/compiler/optimizing/parallel_move_resolver.cc b/compiler/optimizing/parallel_move_resolver.cc index 176c50ce21..9d136f3ae6 100644 --- a/compiler/optimizing/parallel_move_resolver.cc +++ b/compiler/optimizing/parallel_move_resolver.cc @@ -13,7 +13,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include <iostream> #include "parallel_move_resolver.h" @@ -172,7 +171,7 @@ MoveOperands* ParallelMoveResolverWithSwap::PerformMove(size_t index) { i = -1; } else if (required_swap != nullptr) { // A move is required to swap. We walk back the cycle to find the - // move by just returning from this `PerforrmMove`. + // move by just returning from this `PerformMove`. moves_[index]->ClearPending(destination); return required_swap; } @@ -201,7 +200,7 @@ MoveOperands* ParallelMoveResolverWithSwap::PerformMove(size_t index) { } else { for (MoveOperands* other_move : moves_) { if (other_move->Blocks(destination)) { - DCHECK(other_move->IsPending()); + DCHECK(other_move->IsPending()) << "move=" << *move << " other_move=" << *other_move; if (!move->Is64BitMove() && other_move->Is64BitMove()) { // We swap 64bits moves before swapping 32bits moves. Go back from the // cycle by returning the move that must be swapped. diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc index d1770b75ab..63ef600756 100644 --- a/compiler/optimizing/prepare_for_register_allocation.cc +++ b/compiler/optimizing/prepare_for_register_allocation.cc @@ -96,7 +96,7 @@ void PrepareForRegisterAllocation::VisitClinitCheck(HClinitCheck* check) { if (can_merge_with_load_class && !load_class->HasUses()) { load_class->GetBlock()->RemoveInstruction(load_class); } - } else if (can_merge_with_load_class) { + } else if (can_merge_with_load_class && !load_class->NeedsAccessCheck()) { // Pass the initialization duty to the `HLoadClass` instruction, // and remove the instruction from the graph. load_class->SetMustGenerateClinitCheck(true); diff --git a/compiler/optimizing/primitive_type_propagation.cc b/compiler/optimizing/primitive_type_propagation.cc deleted file mode 100644 index bde54ee977..0000000000 --- a/compiler/optimizing/primitive_type_propagation.cc +++ /dev/null @@ -1,133 +0,0 @@ -/* - * Copyright (C) 2014 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "primitive_type_propagation.h" - -#include "nodes.h" -#include "ssa_builder.h" - -namespace art { - -static Primitive::Type MergeTypes(Primitive::Type existing, Primitive::Type new_type) { - // We trust the verifier has already done the necessary checking. - switch (existing) { - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: - case Primitive::kPrimNot: - return existing; - default: - // Phis are initialized with a void type, so if we are asked - // to merge with a void type, we should use the existing one. - return new_type == Primitive::kPrimVoid - ? existing - : HPhi::ToPhiType(new_type); - } -} - -// Re-compute and update the type of the instruction. Returns -// whether or not the type was changed. -bool PrimitiveTypePropagation::UpdateType(HPhi* phi) { - DCHECK(phi->IsLive()); - Primitive::Type existing = phi->GetType(); - - Primitive::Type new_type = existing; - for (size_t i = 0, e = phi->InputCount(); i < e; ++i) { - Primitive::Type input_type = phi->InputAt(i)->GetType(); - new_type = MergeTypes(new_type, input_type); - } - phi->SetType(new_type); - - if (new_type == Primitive::kPrimDouble - || new_type == Primitive::kPrimFloat - || new_type == Primitive::kPrimNot) { - // If the phi is of floating point type, we need to update its inputs to that - // type. For inputs that are phis, we need to recompute their types. - for (size_t i = 0, e = phi->InputCount(); i < e; ++i) { - HInstruction* input = phi->InputAt(i); - if (input->GetType() != new_type) { - HInstruction* equivalent = (new_type == Primitive::kPrimNot) - ? SsaBuilder::GetReferenceTypeEquivalent(input) - : SsaBuilder::GetFloatOrDoubleEquivalent(phi, input, new_type); - phi->ReplaceInput(equivalent, i); - if (equivalent->IsPhi()) { - AddToWorklist(equivalent->AsPhi()); - } else if (equivalent == input) { - // The input has changed its type. It can be an input of other phis, - // so we need to put phi users in the work list. - AddDependentInstructionsToWorklist(equivalent); - } - } - } - } - - return existing != new_type; -} - -void PrimitiveTypePropagation::Run() { - for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) { - VisitBasicBlock(it.Current()); - } - ProcessWorklist(); -} - -void PrimitiveTypePropagation::VisitBasicBlock(HBasicBlock* block) { - if (block->IsLoopHeader()) { - for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { - HPhi* phi = it.Current()->AsPhi(); - if (phi->IsLive()) { - AddToWorklist(phi); - } - } - } else { - for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { - // Eagerly compute the type of the phi, for quicker convergence. Note - // that we don't need to add users to the worklist because we are - // doing a reverse post-order visit, therefore either the phi users are - // non-loop phi and will be visited later in the visit, or are loop-phis, - // and they are already in the work list. - HPhi* phi = it.Current()->AsPhi(); - if (phi->IsLive()) { - UpdateType(phi); - } - } - } -} - -void PrimitiveTypePropagation::ProcessWorklist() { - while (!worklist_.empty()) { - HPhi* instruction = worklist_.back(); - worklist_.pop_back(); - if (UpdateType(instruction)) { - AddDependentInstructionsToWorklist(instruction); - } - } -} - -void PrimitiveTypePropagation::AddToWorklist(HPhi* instruction) { - DCHECK(instruction->IsLive()); - worklist_.push_back(instruction); -} - -void PrimitiveTypePropagation::AddDependentInstructionsToWorklist(HInstruction* instruction) { - for (HUseIterator<HInstruction*> it(instruction->GetUses()); !it.Done(); it.Advance()) { - HPhi* phi = it.Current()->GetUser()->AsPhi(); - if (phi != nullptr && phi->IsLive() && phi->GetType() != instruction->GetType()) { - AddToWorklist(phi); - } - } -} - -} // namespace art diff --git a/compiler/optimizing/primitive_type_propagation.h b/compiler/optimizing/primitive_type_propagation.h deleted file mode 100644 index 212fcfc69f..0000000000 --- a/compiler/optimizing/primitive_type_propagation.h +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright (C) 2014 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ART_COMPILER_OPTIMIZING_PRIMITIVE_TYPE_PROPAGATION_H_ -#define ART_COMPILER_OPTIMIZING_PRIMITIVE_TYPE_PROPAGATION_H_ - -#include "base/arena_containers.h" -#include "nodes.h" - -namespace art { - -// Compute and propagate primitive types of phis in the graph. -class PrimitiveTypePropagation : public ValueObject { - public: - explicit PrimitiveTypePropagation(HGraph* graph) - : graph_(graph), worklist_(graph->GetArena()->Adapter(kArenaAllocPrimitiveTypePropagation)) { - worklist_.reserve(kDefaultWorklistSize); - } - - void Run(); - - private: - void VisitBasicBlock(HBasicBlock* block); - void ProcessWorklist(); - void AddToWorklist(HPhi* phi); - void AddDependentInstructionsToWorklist(HInstruction* instruction); - bool UpdateType(HPhi* phi); - - HGraph* const graph_; - ArenaVector<HPhi*> worklist_; - - static constexpr size_t kDefaultWorklistSize = 8; - - DISALLOW_COPY_AND_ASSIGN(PrimitiveTypePropagation); -}; - -} // namespace art - -#endif // ART_COMPILER_OPTIMIZING_PRIMITIVE_TYPE_PROPAGATION_H_ diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc index fea903d9cf..1c25e4824c 100644 --- a/compiler/optimizing/reference_type_propagation.cc +++ b/compiler/optimizing/reference_type_propagation.cc @@ -40,7 +40,6 @@ class RTPVisitor : public HGraphDelegateVisitor { throwable_class_handle_(throwable_class_handle), worklist_(worklist) {} - void VisitNullConstant(HNullConstant* null_constant) OVERRIDE; void VisitNewInstance(HNewInstance* new_instance) OVERRIDE; void VisitLoadClass(HLoadClass* load_class) OVERRIDE; void VisitClinitCheck(HClinitCheck* clinit_check) OVERRIDE; @@ -57,6 +56,7 @@ class RTPVisitor : public HGraphDelegateVisitor { void VisitInvoke(HInvoke* instr) OVERRIDE; void VisitArrayGet(HArrayGet* instr) OVERRIDE; void VisitCheckCast(HCheckCast* instr) OVERRIDE; + void VisitBoundType(HBoundType* instr) OVERRIDE; void VisitNullCheck(HNullCheck* instr) OVERRIDE; void VisitFakeString(HFakeString* instr) OVERRIDE; void UpdateReferenceTypeInfo(HInstruction* instr, @@ -71,8 +71,6 @@ class RTPVisitor : public HGraphDelegateVisitor { ReferenceTypeInfo::TypeHandle string_class_handle_; ReferenceTypeInfo::TypeHandle throwable_class_handle_; ArenaVector<HInstruction*>* worklist_; - - static constexpr size_t kDefaultWorklistSize = 8; }; ReferenceTypePropagation::ReferenceTypePropagation(HGraph* graph, @@ -127,87 +125,6 @@ void ReferenceTypePropagation::ValidateTypes() { } } -static void CheckHasNoTypedInputs(HInstruction* root_instr) { - ArenaAllocatorAdapter<void> adapter = - root_instr->GetBlock()->GetGraph()->GetArena()->Adapter(kArenaAllocReferenceTypePropagation); - - ArenaVector<HPhi*> visited_phis(adapter); - ArenaVector<HInstruction*> worklist(adapter); - worklist.push_back(root_instr); - - while (!worklist.empty()) { - HInstruction* instr = worklist.back(); - worklist.pop_back(); - - if (instr->IsPhi() || instr->IsBoundType() || instr->IsNullCheck()) { - // Expect that both `root_instr` and its inputs have invalid RTI. - ScopedObjectAccess soa(Thread::Current()); - DCHECK(!instr->GetReferenceTypeInfo().IsValid()) << "Instruction should not have valid RTI."; - - // Insert all unvisited inputs to the worklist. - for (HInputIterator it(instr); !it.Done(); it.Advance()) { - HInstruction* input = it.Current(); - if (input->IsPhi()) { - if (ContainsElement(visited_phis, input->AsPhi())) { - continue; - } else { - visited_phis.push_back(input->AsPhi()); - } - } - worklist.push_back(input); - } - } else if (instr->IsNullConstant()) { - // The only input of `root_instr` allowed to have valid RTI because it is ignored. - } else { - LOG(FATAL) << "Unexpected input " << instr->DebugName() << instr->GetId() << " with RTI " - << instr->GetReferenceTypeInfo(); - UNREACHABLE(); - } - } -} - -template<typename Functor> -static void ForEachUntypedInstruction(HGraph* graph, Functor fn) { - ScopedObjectAccess soa(Thread::Current()); - for (HReversePostOrderIterator block_it(*graph); !block_it.Done(); block_it.Advance()) { - for (HInstructionIterator it(block_it.Current()->GetPhis()); !it.Done(); it.Advance()) { - HInstruction* instr = it.Current(); - if (instr->GetType() == Primitive::kPrimNot && !instr->GetReferenceTypeInfo().IsValid()) { - fn(instr); - } - } - for (HInstructionIterator it(block_it.Current()->GetInstructions()); !it.Done(); it.Advance()) { - HInstruction* instr = it.Current(); - if (instr->GetType() == Primitive::kPrimNot && !instr->GetReferenceTypeInfo().IsValid()) { - fn(instr); - } - } - } -} - -void ReferenceTypePropagation::SetUntypedInstructionsToObject() { - // In some cases, the fix-point iteration will leave kPrimNot instructions with - // invalid RTI because bytecode does not provide enough typing information. - // Set the RTI of such instructions to Object. - // Example: - // MyClass a = null, b = null; - // while (a == null) { - // if (cond) { a = b; } else { b = a; } - // } - - if (kIsDebugBuild) { - // Test that if we are going to set RTI from invalid to Object, that - // instruction did not have any typed instructions in its def-use chain - // and therefore its type could not be inferred. - ForEachUntypedInstruction(graph_, [](HInstruction* instr) { CheckHasNoTypedInputs(instr); }); - } - - ReferenceTypeInfo obj_rti = ReferenceTypeInfo::Create(object_class_handle_, /* is_exact */ false); - ForEachUntypedInstruction(graph_, [obj_rti](HInstruction* instr) { - instr->SetReferenceTypeInfo(obj_rti); - }); -} - void ReferenceTypePropagation::Run() { // To properly propagate type info we need to visit in the dominator-based order. // Reverse post order guarantees a node's dominators are visited first. @@ -217,7 +134,6 @@ void ReferenceTypePropagation::Run() { } ProcessWorklist(); - SetUntypedInstructionsToObject(); ValidateTypes(); } @@ -245,34 +161,6 @@ void ReferenceTypePropagation::VisitBasicBlock(HBasicBlock* block) { BoundTypeForIfInstanceOf(block); } -// Create a bound type for the given object narrowing the type as much as possible. -// The BoundType upper values for the super type and can_be_null will be taken from -// load_class.GetLoadedClassRTI() and upper_can_be_null. -static HBoundType* CreateBoundType(ArenaAllocator* arena, - HInstruction* obj, - HLoadClass* load_class, - bool upper_can_be_null) - SHARED_REQUIRES(Locks::mutator_lock_) { - ReferenceTypeInfo obj_rti = obj->GetReferenceTypeInfo(); - ReferenceTypeInfo class_rti = load_class->GetLoadedClassRTI(); - DCHECK(class_rti.IsValid()); - HBoundType* bound_type = new (arena) HBoundType(obj, class_rti, upper_can_be_null); - // Narrow the type as much as possible. - if (class_rti.GetTypeHandle()->CannotBeAssignedFromOtherTypes()) { - bound_type->SetReferenceTypeInfo( - ReferenceTypeInfo::Create(class_rti.GetTypeHandle(), /* is_exact */ true)); - } else if (obj_rti.IsValid() && class_rti.IsSupertypeOf(obj_rti)) { - bound_type->SetReferenceTypeInfo(obj_rti); - } else { - bound_type->SetReferenceTypeInfo( - ReferenceTypeInfo::Create(class_rti.GetTypeHandle(), /* is_exact */ false)); - } - if (upper_can_be_null) { - bound_type->SetCanBeNull(obj->CanBeNull()); - } - return bound_type; -} - // Check if we should create a bound type for the given object at the specified // position. Because of inlining and the fact we run RTP more than once and we // might have a HBoundType already. If we do, we should not create a new one. @@ -358,8 +246,8 @@ void ReferenceTypePropagation::BoundTypeForIfNotNull(HBasicBlock* block) { ReferenceTypeInfo object_rti = ReferenceTypeInfo::Create( object_class_handle_, /* is_exact */ true); if (ShouldCreateBoundType(insert_point, obj, object_rti, nullptr, notNullBlock)) { - bound_type = new (graph_->GetArena()) HBoundType( - obj, object_rti, /* bound_can_be_null */ false); + bound_type = new (graph_->GetArena()) HBoundType(obj); + bound_type->SetUpperBound(object_rti, /* bound_can_be_null */ false); if (obj->GetReferenceTypeInfo().IsValid()) { bound_type->SetReferenceTypeInfo(obj->GetReferenceTypeInfo()); } @@ -376,6 +264,75 @@ void ReferenceTypePropagation::BoundTypeForIfNotNull(HBasicBlock* block) { } } +// Returns true if one of the patterns below has been recognized. If so, the +// InstanceOf instruction together with the true branch of `ifInstruction` will +// be returned using the out parameters. +// Recognized patterns: +// (1) patterns equivalent to `if (obj instanceof X)` +// (a) InstanceOf -> Equal to 1 -> If +// (b) InstanceOf -> NotEqual to 0 -> If +// (c) InstanceOf -> If +// (2) patterns equivalent to `if (!(obj instanceof X))` +// (a) InstanceOf -> Equal to 0 -> If +// (b) InstanceOf -> NotEqual to 1 -> If +// (c) InstanceOf -> BooleanNot -> If +static bool MatchIfInstanceOf(HIf* ifInstruction, + /* out */ HInstanceOf** instanceOf, + /* out */ HBasicBlock** trueBranch) { + HInstruction* input = ifInstruction->InputAt(0); + + if (input->IsEqual()) { + HInstruction* rhs = input->AsEqual()->GetConstantRight(); + if (rhs != nullptr) { + HInstruction* lhs = input->AsEqual()->GetLeastConstantLeft(); + if (lhs->IsInstanceOf() && rhs->IsIntConstant()) { + if (rhs->AsIntConstant()->IsOne()) { + // Case (1a) + *trueBranch = ifInstruction->IfTrueSuccessor(); + } else { + // Case (2a) + DCHECK(rhs->AsIntConstant()->IsZero()); + *trueBranch = ifInstruction->IfFalseSuccessor(); + } + *instanceOf = lhs->AsInstanceOf(); + return true; + } + } + } else if (input->IsNotEqual()) { + HInstruction* rhs = input->AsNotEqual()->GetConstantRight(); + if (rhs != nullptr) { + HInstruction* lhs = input->AsNotEqual()->GetLeastConstantLeft(); + if (lhs->IsInstanceOf() && rhs->IsIntConstant()) { + if (rhs->AsIntConstant()->IsZero()) { + // Case (1b) + *trueBranch = ifInstruction->IfTrueSuccessor(); + } else { + // Case (2b) + DCHECK(rhs->AsIntConstant()->IsOne()); + *trueBranch = ifInstruction->IfFalseSuccessor(); + } + *instanceOf = lhs->AsInstanceOf(); + return true; + } + } + } else if (input->IsInstanceOf()) { + // Case (1c) + *instanceOf = input->AsInstanceOf(); + *trueBranch = ifInstruction->IfTrueSuccessor(); + return true; + } else if (input->IsBooleanNot()) { + HInstruction* not_input = input->InputAt(0); + if (not_input->IsInstanceOf()) { + // Case (2c) + *instanceOf = not_input->AsInstanceOf(); + *trueBranch = ifInstruction->IfFalseSuccessor(); + return true; + } + } + + return false; +} + // Detects if `block` is the True block for the pattern // `if (x instanceof ClassX) { }` // If that's the case insert an HBoundType instruction to bound the type of `x` @@ -385,22 +342,11 @@ void ReferenceTypePropagation::BoundTypeForIfInstanceOf(HBasicBlock* block) { if (ifInstruction == nullptr) { return; } - HInstruction* ifInput = ifInstruction->InputAt(0); - HInstruction* instanceOf = nullptr; - HBasicBlock* instanceOfTrueBlock = nullptr; - // The instruction simplifier has transformed: - // - `if (a instanceof A)` into an HIf with an HInstanceOf input - // - `if (!(a instanceof A)` into an HIf with an HBooleanNot input (which in turn - // has an HInstanceOf input) - // So we should not see the usual HEqual here. - if (ifInput->IsInstanceOf()) { - instanceOf = ifInput; - instanceOfTrueBlock = ifInstruction->IfTrueSuccessor(); - } else if (ifInput->IsBooleanNot() && ifInput->InputAt(0)->IsInstanceOf()) { - instanceOf = ifInput->InputAt(0); - instanceOfTrueBlock = ifInstruction->IfFalseSuccessor(); - } else { + // Try to recognize common `if (instanceof)` and `if (!instanceof)` patterns. + HInstanceOf* instanceOf = nullptr; + HBasicBlock* instanceOfTrueBlock = nullptr; + if (!MatchIfInstanceOf(ifInstruction, &instanceOf, &instanceOfTrueBlock)) { return; } @@ -435,11 +381,8 @@ void ReferenceTypePropagation::BoundTypeForIfInstanceOf(HBasicBlock* block) { ScopedObjectAccess soa(Thread::Current()); HInstruction* insert_point = instanceOfTrueBlock->GetFirstInstruction(); if (ShouldCreateBoundType(insert_point, obj, class_rti, nullptr, instanceOfTrueBlock)) { - bound_type = CreateBoundType( - graph_->GetArena(), - obj, - load_class, - false /* InstanceOf ensures the object is not null. */); + bound_type = new (graph_->GetArena()) HBoundType(obj); + bound_type->SetUpperBound(class_rti, /* InstanceOf fails for null. */ false); instanceOfTrueBlock->InsertInstructionBefore(bound_type, insert_point); } else { // We already have a bound type on the position we would need to insert @@ -505,13 +448,6 @@ void RTPVisitor::UpdateReferenceTypeInfo(HInstruction* instr, SetClassAsTypeInfo(instr, dex_cache->GetResolvedType(type_idx), is_exact); } -void RTPVisitor::VisitNullConstant(HNullConstant* instr) { - // TODO: The null constant could be bound contextually (e.g. based on return statements) - // to a more precise type. - instr->SetReferenceTypeInfo( - ReferenceTypeInfo::Create(object_class_handle_, /* is_exact */ false)); -} - void RTPVisitor::VisitNewInstance(HNewInstance* instr) { UpdateReferenceTypeInfo(instr, instr->GetTypeIndex(), instr->GetDexFile(), /* is_exact */ true); } @@ -523,7 +459,11 @@ void RTPVisitor::VisitNewArray(HNewArray* instr) { static mirror::Class* GetClassFromDexCache(Thread* self, const DexFile& dex_file, uint16_t type_idx) SHARED_REQUIRES(Locks::mutator_lock_) { mirror::DexCache* dex_cache = - Runtime::Current()->GetClassLinker()->FindDexCache(self, dex_file, false); + Runtime::Current()->GetClassLinker()->FindDexCache(self, dex_file, /* allow_failure */ true); + if (dex_cache == nullptr) { + // Dex cache could not be found. This should only happen during gtests. + return nullptr; + } // Get type from dex cache assuming it was populated by the verifier. return dex_cache->GetResolvedType(type_idx); } @@ -540,17 +480,24 @@ void RTPVisitor::VisitParameterValue(HParameterValue* instr) { void RTPVisitor::UpdateFieldAccessTypeInfo(HInstruction* instr, const FieldInfo& info) { - // The field index is unknown only during tests. - if (instr->GetType() != Primitive::kPrimNot || info.GetFieldIndex() == kUnknownFieldIndex) { + if (instr->GetType() != Primitive::kPrimNot) { return; } ScopedObjectAccess soa(Thread::Current()); - ClassLinker* cl = Runtime::Current()->GetClassLinker(); - ArtField* field = cl->GetResolvedField(info.GetFieldIndex(), info.GetDexCache().Get()); - // TODO: There are certain cases where we can't resolve the field. - // b/21914925 is open to keep track of a repro case for this issue. - mirror::Class* klass = (field == nullptr) ? nullptr : field->GetType<false>(); + mirror::Class* klass = nullptr; + + // The field index is unknown only during tests. + if (info.GetFieldIndex() != kUnknownFieldIndex) { + ClassLinker* cl = Runtime::Current()->GetClassLinker(); + ArtField* field = cl->GetResolvedField(info.GetFieldIndex(), info.GetDexCache().Get()); + // TODO: There are certain cases where we can't resolve the field. + // b/21914925 is open to keep track of a repro case for this issue. + if (field != nullptr) { + klass = field->GetType<false>(); + } + } + SetClassAsTypeInfo(instr, klass, /* is_exact */ false); } @@ -625,48 +572,66 @@ void RTPVisitor::VisitFakeString(HFakeString* instr) { instr->SetReferenceTypeInfo(ReferenceTypeInfo::Create(string_class_handle_, /* is_exact */ true)); } +void RTPVisitor::VisitBoundType(HBoundType* instr) { + ScopedObjectAccess soa(Thread::Current()); + + ReferenceTypeInfo class_rti = instr->GetUpperBound(); + if (class_rti.IsValid()) { + // Narrow the type as much as possible. + HInstruction* obj = instr->InputAt(0); + ReferenceTypeInfo obj_rti = obj->GetReferenceTypeInfo(); + if (class_rti.GetTypeHandle()->CannotBeAssignedFromOtherTypes()) { + instr->SetReferenceTypeInfo( + ReferenceTypeInfo::Create(class_rti.GetTypeHandle(), /* is_exact */ true)); + } else if (obj_rti.IsValid()) { + if (class_rti.IsSupertypeOf(obj_rti)) { + // Object type is more specific. + instr->SetReferenceTypeInfo(obj_rti); + } else { + // Upper bound is more specific. + instr->SetReferenceTypeInfo( + ReferenceTypeInfo::Create(class_rti.GetTypeHandle(), /* is_exact */ false)); + } + } else { + // Object not typed yet. Leave BoundType untyped for now rather than + // assign the type conservatively. + } + instr->SetCanBeNull(obj->CanBeNull() && instr->GetUpperCanBeNull()); + } else { + // The owner of the BoundType was already visited. If the class is unresolved, + // the BoundType should have been removed from the data flow and this method + // should remove it from the graph. + DCHECK(!instr->HasUses()); + instr->GetBlock()->RemoveInstruction(instr); + } +} + void RTPVisitor::VisitCheckCast(HCheckCast* check_cast) { + ScopedObjectAccess soa(Thread::Current()); + HLoadClass* load_class = check_cast->InputAt(1)->AsLoadClass(); ReferenceTypeInfo class_rti = load_class->GetLoadedClassRTI(); - { - ScopedObjectAccess soa(Thread::Current()); - if (!class_rti.IsValid()) { - // He have loaded an unresolved class. Don't bother bounding the type. - return; - } + HBoundType* bound_type = check_cast->GetNext()->AsBoundType(); + if (bound_type == nullptr || bound_type->GetUpperBound().IsValid()) { + // The next instruction is not an uninitialized BoundType. This must be + // an RTP pass after SsaBuilder and we do not need to do anything. + return; } - HInstruction* obj = check_cast->InputAt(0); - HBoundType* bound_type = nullptr; - for (HUseIterator<HInstruction*> it(obj->GetUses()); !it.Done(); it.Advance()) { - HInstruction* user = it.Current()->GetUser(); - if (check_cast->StrictlyDominates(user)) { - if (bound_type == nullptr) { - ScopedObjectAccess soa(Thread::Current()); - if (ShouldCreateBoundType(check_cast->GetNext(), obj, class_rti, check_cast, nullptr)) { - bound_type = CreateBoundType( - GetGraph()->GetArena(), - obj, - load_class, - true /* CheckCast succeeds for nulls. */); - check_cast->GetBlock()->InsertInstructionAfter(bound_type, check_cast); - } else { - // Update nullability of the existing bound type, which may not have known - // that its input was not null when it was being created. - bound_type = check_cast->GetNext()->AsBoundType(); - bound_type->SetCanBeNull(obj->CanBeNull()); - // We already have a bound type on the position we would need to insert - // the new one. The existing bound type should dominate all the users - // (dchecked) so there's no need to continue. - break; - } - } - user->ReplaceInput(bound_type, it.Current()->GetIndex()); - } + DCHECK_EQ(bound_type->InputAt(0), check_cast->InputAt(0)); + + if (class_rti.IsValid()) { + // This is the first run of RTP and class is resolved. + bound_type->SetUpperBound(class_rti, /* CheckCast succeeds for nulls. */ true); + } else { + // This is the first run of RTP and class is unresolved. Remove the binding. + // The instruction itself is removed in VisitBoundType so as to not + // invalidate HInstructionIterator. + bound_type->ReplaceWith(bound_type->InputAt(0)); } } void ReferenceTypePropagation::VisitPhi(HPhi* phi) { - if (phi->GetType() != Primitive::kPrimNot) { + if (phi->IsDead() || phi->GetType() != Primitive::kPrimNot) { return; } @@ -824,6 +789,8 @@ void ReferenceTypePropagation::UpdateBoundType(HBoundType* instr) { // NullConstant inputs are ignored during merging as they do not provide any useful information. // If all the inputs are NullConstants then the type of the phi will be set to Object. void ReferenceTypePropagation::UpdatePhi(HPhi* instr) { + DCHECK(instr->IsLive()); + size_t input_count = instr->InputCount(); size_t first_input_index_not_null = 0; while (first_input_index_not_null < input_count && @@ -868,7 +835,7 @@ void ReferenceTypePropagation::UpdatePhi(HPhi* instr) { // Re-computes and updates the nullability of the instruction. Returns whether or // not the nullability was changed. bool ReferenceTypePropagation::UpdateNullability(HInstruction* instr) { - DCHECK(instr->IsPhi() + DCHECK((instr->IsPhi() && instr->AsPhi()->IsLive()) || instr->IsBoundType() || instr->IsNullCheck() || instr->IsArrayGet()); @@ -916,7 +883,7 @@ void ReferenceTypePropagation::AddToWorklist(HInstruction* instruction) { void ReferenceTypePropagation::AddDependentInstructionsToWorklist(HInstruction* instruction) { for (HUseIterator<HInstruction*> it(instruction->GetUses()); !it.Done(); it.Advance()) { HInstruction* user = it.Current()->GetUser(); - if (user->IsPhi() + if ((user->IsPhi() && user->AsPhi()->IsLive()) || user->IsBoundType() || user->IsNullCheck() || (user->IsArrayGet() && (user->GetType() == Primitive::kPrimNot))) { diff --git a/compiler/optimizing/reference_type_propagation.h b/compiler/optimizing/reference_type_propagation.h index 21789e1331..5c05592726 100644 --- a/compiler/optimizing/reference_type_propagation.h +++ b/compiler/optimizing/reference_type_propagation.h @@ -57,7 +57,6 @@ class ReferenceTypePropagation : public HOptimization { SHARED_REQUIRES(Locks::mutator_lock_); void ValidateTypes(); - void SetUntypedInstructionsToObject(); StackHandleScopeCollection* handles_; diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc index d399bc2d7a..eb0419b6e0 100644 --- a/compiler/optimizing/register_allocator.cc +++ b/compiler/optimizing/register_allocator.cc @@ -1677,6 +1677,9 @@ void RegisterAllocator::ConnectSiblings(LiveInterval* interval) { LocationSummary* locations = safepoint_position->GetLocations(); if ((current->GetType() == Primitive::kPrimNot) && current->GetParent()->HasSpillSlot()) { + DCHECK(interval->GetDefinedBy()->IsActualObject()) + << interval->GetDefinedBy()->DebugName() + << "@" << safepoint_position->GetInstruction()->DebugName(); locations->SetStackBit(current->GetParent()->GetSpillSlot() / kVRegSize); } @@ -1689,6 +1692,9 @@ void RegisterAllocator::ConnectSiblings(LiveInterval* interval) { maximum_number_of_live_fp_registers_); } if (current->GetType() == Primitive::kPrimNot) { + DCHECK(interval->GetDefinedBy()->IsActualObject()) + << interval->GetDefinedBy()->DebugName() + << "@" << safepoint_position->GetInstruction()->DebugName(); locations->SetRegisterBit(source.reg()); } break; diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc index 080f970756..306a457a9c 100644 --- a/compiler/optimizing/register_allocator_test.cc +++ b/compiler/optimizing/register_allocator_test.cc @@ -28,13 +28,13 @@ #include "ssa_liveness_analysis.h" #include "ssa_phi_elimination.h" -#include "gtest/gtest.h" - namespace art { // Note: the register allocator tests rely on the fact that constants have live // intervals and registers get allocated to them. +class RegisterAllocatorTest : public CommonCompilerTest {}; + static bool Check(const uint16_t* data) { ArenaPool pool; ArenaAllocator allocator(&pool); @@ -42,7 +42,7 @@ static bool Check(const uint16_t* data) { HGraphBuilder builder(graph); const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data); builder.BuildGraph(*item); - graph->TryBuildingSsa(); + TransformToSsa(graph); std::unique_ptr<const X86InstructionSetFeatures> features_x86( X86InstructionSetFeatures::FromCppDefines()); x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); @@ -57,7 +57,7 @@ static bool Check(const uint16_t* data) { * Unit testing of RegisterAllocator::ValidateIntervals. Register allocator * tests are based on this validation method. */ -TEST(RegisterAllocatorTest, ValidateIntervals) { +TEST_F(RegisterAllocatorTest, ValidateIntervals) { ArenaPool pool; ArenaAllocator allocator(&pool); HGraph* graph = CreateGraph(&allocator); @@ -146,7 +146,7 @@ TEST(RegisterAllocatorTest, ValidateIntervals) { } } -TEST(RegisterAllocatorTest, CFG1) { +TEST_F(RegisterAllocatorTest, CFG1) { /* * Test the following snippet: * return 0; @@ -166,7 +166,7 @@ TEST(RegisterAllocatorTest, CFG1) { ASSERT_TRUE(Check(data)); } -TEST(RegisterAllocatorTest, Loop1) { +TEST_F(RegisterAllocatorTest, Loop1) { /* * Test the following snippet: * int a = 0; @@ -205,7 +205,7 @@ TEST(RegisterAllocatorTest, Loop1) { ASSERT_TRUE(Check(data)); } -TEST(RegisterAllocatorTest, Loop2) { +TEST_F(RegisterAllocatorTest, Loop2) { /* * Test the following snippet: * int a = 0; @@ -259,11 +259,11 @@ static HGraph* BuildSSAGraph(const uint16_t* data, ArenaAllocator* allocator) { HGraphBuilder builder(graph); const DexFile::CodeItem* item = reinterpret_cast<const DexFile::CodeItem*>(data); builder.BuildGraph(*item); - graph->TryBuildingSsa(); + TransformToSsa(graph); return graph; } -TEST(RegisterAllocatorTest, Loop3) { +TEST_F(RegisterAllocatorTest, Loop3) { /* * Test the following snippet: * int a = 0 @@ -326,7 +326,7 @@ TEST(RegisterAllocatorTest, Loop3) { ASSERT_EQ(phi_interval->GetRegister(), ret->InputAt(0)->GetLiveInterval()->GetRegister()); } -TEST(RegisterAllocatorTest, FirstRegisterUse) { +TEST_F(RegisterAllocatorTest, FirstRegisterUse) { const uint16_t data[] = THREE_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::XOR_INT_LIT8 | 1 << 8, 1 << 8, @@ -366,7 +366,7 @@ TEST(RegisterAllocatorTest, FirstRegisterUse) { ASSERT_EQ(new_interval->FirstRegisterUse(), last_xor->GetLifetimePosition()); } -TEST(RegisterAllocatorTest, DeadPhi) { +TEST_F(RegisterAllocatorTest, DeadPhi) { /* Test for a dead loop phi taking as back-edge input a phi that also has * this loop phi as input. Walking backwards in SsaDeadPhiElimination * does not solve the problem because the loop phi will be visited last. @@ -407,7 +407,7 @@ TEST(RegisterAllocatorTest, DeadPhi) { * that share the same register. It should split the interval it is currently * allocating for at the minimum lifetime position between the two inactive intervals. */ -TEST(RegisterAllocatorTest, FreeUntil) { +TEST_F(RegisterAllocatorTest, FreeUntil) { const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::RETURN); @@ -472,7 +472,7 @@ static HGraph* BuildIfElseWithPhi(ArenaAllocator* allocator, HInstruction** input2) { HGraph* graph = CreateGraph(allocator); HBasicBlock* entry = new (allocator) HBasicBlock(graph); - NullHandle<mirror::DexCache> dex_cache; + ScopedNullHandle<mirror::DexCache> dex_cache; graph->AddBlock(entry); graph->SetEntryBlock(entry); HInstruction* parameter = new (allocator) HParameterValue( @@ -539,7 +539,7 @@ static HGraph* BuildIfElseWithPhi(ArenaAllocator* allocator, return graph; } -TEST(RegisterAllocatorTest, PhiHint) { +TEST_F(RegisterAllocatorTest, PhiHint) { ArenaPool pool; ArenaAllocator allocator(&pool); HPhi *phi; @@ -624,7 +624,7 @@ static HGraph* BuildFieldReturn(ArenaAllocator* allocator, HInstruction** field, HInstruction** ret) { HGraph* graph = CreateGraph(allocator); - NullHandle<mirror::DexCache> dex_cache; + ScopedNullHandle<mirror::DexCache> dex_cache; HBasicBlock* entry = new (allocator) HBasicBlock(graph); graph->AddBlock(entry); graph->SetEntryBlock(entry); @@ -658,7 +658,7 @@ static HGraph* BuildFieldReturn(ArenaAllocator* allocator, return graph; } -TEST(RegisterAllocatorTest, ExpectedInRegisterHint) { +TEST_F(RegisterAllocatorTest, ExpectedInRegisterHint) { ArenaPool pool; ArenaAllocator allocator(&pool); HInstruction *field, *ret; @@ -726,7 +726,7 @@ static HGraph* BuildTwoSubs(ArenaAllocator* allocator, return graph; } -TEST(RegisterAllocatorTest, SameAsFirstInputHint) { +TEST_F(RegisterAllocatorTest, SameAsFirstInputHint) { ArenaPool pool; ArenaAllocator allocator(&pool); HInstruction *first_sub, *second_sub; @@ -795,7 +795,7 @@ static HGraph* BuildDiv(ArenaAllocator* allocator, return graph; } -TEST(RegisterAllocatorTest, ExpectedExactInRegisterAndSameOutputHint) { +TEST_F(RegisterAllocatorTest, ExpectedExactInRegisterAndSameOutputHint) { ArenaPool pool; ArenaAllocator allocator(&pool); HInstruction *div; @@ -819,7 +819,7 @@ TEST(RegisterAllocatorTest, ExpectedExactInRegisterAndSameOutputHint) { // Test a bug in the register allocator, where allocating a blocked // register would lead to spilling an inactive interval at the wrong // position. -TEST(RegisterAllocatorTest, SpillInactive) { +TEST_F(RegisterAllocatorTest, SpillInactive) { ArenaPool pool; // Create a synthesized graph to please the register_allocator and diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc index 9e6cfbe653..f6bab8efcb 100644 --- a/compiler/optimizing/ssa_builder.cc +++ b/compiler/optimizing/ssa_builder.cc @@ -17,214 +17,11 @@ #include "ssa_builder.h" #include "nodes.h" -#include "primitive_type_propagation.h" +#include "reference_type_propagation.h" #include "ssa_phi_elimination.h" namespace art { -// Returns whether this is a loop header phi which was eagerly created but later -// found inconsistent due to the vreg being undefined in one of its predecessors. -// Such phi is marked dead and should be ignored until its removal in SsaPhiElimination. -static bool IsUndefinedLoopHeaderPhi(HPhi* phi) { - return phi->IsLoopHeaderPhi() && phi->InputCount() != phi->GetBlock()->GetPredecessors().size(); -} - -/** - * A debuggable application may require to reviving phis, to ensure their - * associated DEX register is available to a debugger. This class implements - * the logic for statement (c) of the SsaBuilder (see ssa_builder.h). It - * also makes sure that phis with incompatible input types are not revived - * (statement (b) of the SsaBuilder). - * - * This phase must be run after detecting dead phis through the - * DeadPhiElimination phase, and before deleting the dead phis. - */ -class DeadPhiHandling : public ValueObject { - public: - explicit DeadPhiHandling(HGraph* graph) - : graph_(graph), worklist_(graph->GetArena()->Adapter(kArenaAllocSsaBuilder)) { - worklist_.reserve(kDefaultWorklistSize); - } - - void Run(); - - private: - void VisitBasicBlock(HBasicBlock* block); - void ProcessWorklist(); - void AddToWorklist(HPhi* phi); - void AddDependentInstructionsToWorklist(HPhi* phi); - bool UpdateType(HPhi* phi); - - HGraph* const graph_; - ArenaVector<HPhi*> worklist_; - - static constexpr size_t kDefaultWorklistSize = 8; - - DISALLOW_COPY_AND_ASSIGN(DeadPhiHandling); -}; - -static bool HasConflictingEquivalent(HPhi* phi) { - if (phi->GetNext() == nullptr) { - return false; - } - HPhi* next = phi->GetNext()->AsPhi(); - if (next->GetRegNumber() == phi->GetRegNumber()) { - if (next->GetType() == Primitive::kPrimVoid) { - // We only get a void type for an equivalent phi we processed and found out - // it was conflicting. - return true; - } else { - // Go to the next phi, in case it is also an equivalent. - return HasConflictingEquivalent(next); - } - } - return false; -} - -bool DeadPhiHandling::UpdateType(HPhi* phi) { - if (phi->IsDead()) { - // Phi was rendered dead while waiting in the worklist because it was replaced - // with an equivalent. - return false; - } - - Primitive::Type existing = phi->GetType(); - - bool conflict = false; - Primitive::Type new_type = existing; - for (size_t i = 0, e = phi->InputCount(); i < e; ++i) { - HInstruction* input = phi->InputAt(i); - if (input->IsPhi() && input->AsPhi()->IsDead()) { - // We are doing a reverse post order visit of the graph, reviving - // phis that have environment uses and updating their types. If an - // input is a phi, and it is dead (because its input types are - // conflicting), this phi must be marked dead as well. - conflict = true; - break; - } - Primitive::Type input_type = HPhi::ToPhiType(input->GetType()); - - // The only acceptable transitions are: - // - From void to typed: first time we update the type of this phi. - // - From int to reference (or reference to int): the phi has to change - // to reference type. If the integer input cannot be converted to a - // reference input, the phi will remain dead. - if (new_type == Primitive::kPrimVoid) { - new_type = input_type; - } else if (new_type == Primitive::kPrimNot && input_type == Primitive::kPrimInt) { - if (input->IsPhi() && HasConflictingEquivalent(input->AsPhi())) { - // If we already asked for an equivalent of the input phi, but that equivalent - // ended up conflicting, make this phi conflicting too. - conflict = true; - break; - } - HInstruction* equivalent = SsaBuilder::GetReferenceTypeEquivalent(input); - if (equivalent == nullptr) { - conflict = true; - break; - } - phi->ReplaceInput(equivalent, i); - if (equivalent->IsPhi()) { - DCHECK_EQ(equivalent->GetType(), Primitive::kPrimNot); - // We created a new phi, but that phi has the same inputs as the old phi. We - // add it to the worklist to ensure its inputs can also be converted to reference. - // If not, it will remain dead, and the algorithm will make the current phi dead - // as well. - equivalent->AsPhi()->SetLive(); - AddToWorklist(equivalent->AsPhi()); - } - } else if (new_type == Primitive::kPrimInt && input_type == Primitive::kPrimNot) { - new_type = Primitive::kPrimNot; - // Start over, we may request reference equivalents for the inputs of the phi. - i = -1; - } else if (new_type != input_type) { - conflict = true; - break; - } - } - - if (conflict) { - phi->SetType(Primitive::kPrimVoid); - phi->SetDead(); - return true; - } else if (existing == new_type) { - return false; - } - - DCHECK(phi->IsLive()); - phi->SetType(new_type); - - // There might exist a `new_type` equivalent of `phi` already. In that case, - // we replace the equivalent with the, now live, `phi`. - HPhi* equivalent = phi->GetNextEquivalentPhiWithSameType(); - if (equivalent != nullptr) { - // There cannot be more than two equivalents with the same type. - DCHECK(equivalent->GetNextEquivalentPhiWithSameType() == nullptr); - // If doing fix-point iteration, the equivalent might be in `worklist_`. - // Setting it dead will make UpdateType skip it. - equivalent->SetDead(); - equivalent->ReplaceWith(phi); - } - - return true; -} - -void DeadPhiHandling::VisitBasicBlock(HBasicBlock* block) { - for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { - HPhi* phi = it.Current()->AsPhi(); - if (IsUndefinedLoopHeaderPhi(phi)) { - DCHECK(phi->IsDead()); - continue; - } - if (phi->IsDead() && phi->HasEnvironmentUses()) { - phi->SetLive(); - if (block->IsLoopHeader()) { - // Loop phis must have a type to guarantee convergence of the algorithm. - DCHECK_NE(phi->GetType(), Primitive::kPrimVoid); - AddToWorklist(phi); - } else { - // Because we are doing a reverse post order visit, all inputs of - // this phi have been visited and therefore had their (initial) type set. - UpdateType(phi); - } - } - } -} - -void DeadPhiHandling::ProcessWorklist() { - while (!worklist_.empty()) { - HPhi* instruction = worklist_.back(); - worklist_.pop_back(); - // Note that the same equivalent phi can be added multiple times in the work list, if - // used by multiple phis. The first call to `UpdateType` will know whether the phi is - // dead or live. - if (instruction->IsLive() && UpdateType(instruction)) { - AddDependentInstructionsToWorklist(instruction); - } - } -} - -void DeadPhiHandling::AddToWorklist(HPhi* instruction) { - DCHECK(instruction->IsLive()); - worklist_.push_back(instruction); -} - -void DeadPhiHandling::AddDependentInstructionsToWorklist(HPhi* instruction) { - for (HUseIterator<HInstruction*> it(instruction->GetUses()); !it.Done(); it.Advance()) { - HPhi* phi = it.Current()->GetUser()->AsPhi(); - if (phi != nullptr && !phi->IsDead()) { - AddToWorklist(phi); - } - } -} - -void DeadPhiHandling::Run() { - for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) { - VisitBasicBlock(it.Current()); - } - ProcessWorklist(); -} - void SsaBuilder::SetLoopHeaderPhiInputs() { for (size_t i = loop_headers_.size(); i > 0; --i) { HBasicBlock* block = loop_headers_[i - 1]; @@ -285,10 +82,11 @@ void SsaBuilder::EquivalentPhisCleanup() { HPhi* phi = it.Current()->AsPhi(); HPhi* next = phi->GetNextEquivalentPhiWithSameType(); if (next != nullptr) { - // Make sure we do not replace a live phi with a dead phi. A live phi has been - // handled by the type propagation phase, unlike a dead phi. + // Make sure we do not replace a live phi with a dead phi. A live phi + // has been handled by the type propagation phase, unlike a dead phi. if (next->IsLive()) { phi->ReplaceWith(next); + phi->SetDead(); } else { next->ReplaceWith(phi); } @@ -300,64 +98,7 @@ void SsaBuilder::EquivalentPhisCleanup() { } } -void SsaBuilder::BuildSsa() { - // 1) Visit in reverse post order. We need to have all predecessors of a block visited - // (with the exception of loops) in order to create the right environment for that - // block. For loops, we create phis whose inputs will be set in 2). - for (HReversePostOrderIterator it(*GetGraph()); !it.Done(); it.Advance()) { - VisitBasicBlock(it.Current()); - } - - // 2) Set inputs of loop phis. - SetLoopHeaderPhiInputs(); - - // 3) Mark dead phis. This will mark phis that are only used by environments: - // at the DEX level, the type of these phis does not need to be consistent, but - // our code generator will complain if the inputs of a phi do not have the same - // type. The marking allows the type propagation to know which phis it needs - // to handle. We mark but do not eliminate: the elimination will be done in - // step 9). - SsaDeadPhiElimination dead_phis_for_type_propagation(GetGraph()); - dead_phis_for_type_propagation.MarkDeadPhis(); - - // 4) Propagate types of phis. At this point, phis are typed void in the general - // case, or float/double/reference when we created an equivalent phi. So we - // need to propagate the types across phis to give them a correct type. - PrimitiveTypePropagation type_propagation(GetGraph()); - type_propagation.Run(); - - // 5) When creating equivalent phis we copy the inputs of the original phi which - // may be improperly typed. This was fixed during the type propagation in 4) but - // as a result we may end up with two equivalent phis with the same type for - // the same dex register. This pass cleans them up. - EquivalentPhisCleanup(); - - // 6) Mark dead phis again. Step 4) may have introduced new phis. - // Step 5) might enable the death of new phis. - SsaDeadPhiElimination dead_phis(GetGraph()); - dead_phis.MarkDeadPhis(); - - // 7) Now that the graph is correctly typed, we can get rid of redundant phis. - // Note that we cannot do this phase before type propagation, otherwise - // we could get rid of phi equivalents, whose presence is a requirement for the - // type propagation phase. Note that this is to satisfy statement (a) of the - // SsaBuilder (see ssa_builder.h). - SsaRedundantPhiElimination redundant_phi(GetGraph()); - redundant_phi.Run(); - - // 8) Fix the type for null constants which are part of an equality comparison. - // We need to do this after redundant phi elimination, to ensure the only cases - // that we can see are reference comparison against 0. The redundant phi - // elimination ensures we do not see a phi taking two 0 constants in a HEqual - // or HNotEqual. - FixNullConstantType(); - - // 9) Make sure environments use the right phi "equivalent": a phi marked dead - // can have a phi equivalent that is not dead. We must therefore update - // all environment uses of the dead phi to use its equivalent. Note that there - // can be multiple phis for the same Dex register that are live (for example - // when merging constants), in which case it is OK for the environments - // to just reference one. +void SsaBuilder::FixEnvironmentPhis() { for (HReversePostOrderIterator it(*GetGraph()); !it.Done(); it.Advance()) { HBasicBlock* block = it.Current(); for (HInstructionIterator it_phis(block->GetPhis()); !it_phis.Done(); it_phis.Advance()) { @@ -378,24 +119,375 @@ void SsaBuilder::BuildSsa() { phi->ReplaceWith(next); } } +} + +static void AddDependentInstructionsToWorklist(HInstruction* instruction, + ArenaVector<HPhi*>* worklist) { + // If `instruction` is a dead phi, type conflict was just identified. All its + // live phi users, and transitively users of those users, therefore need to be + // marked dead/conflicting too, so we add them to the worklist. Otherwise we + // add users whose type does not match and needs to be updated. + bool add_all_live_phis = instruction->IsPhi() && instruction->AsPhi()->IsDead(); + for (HUseIterator<HInstruction*> it(instruction->GetUses()); !it.Done(); it.Advance()) { + HInstruction* user = it.Current()->GetUser(); + if (user->IsPhi() && user->AsPhi()->IsLive()) { + if (add_all_live_phis || user->GetType() != instruction->GetType()) { + worklist->push_back(user->AsPhi()); + } + } + } +} + +// Find a candidate primitive type for `phi` by merging the type of its inputs. +// Return false if conflict is identified. +static bool TypePhiFromInputs(HPhi* phi) { + Primitive::Type common_type = phi->GetType(); + + for (HInputIterator it(phi); !it.Done(); it.Advance()) { + HInstruction* input = it.Current(); + if (input->IsPhi() && input->AsPhi()->IsDead()) { + // Phis are constructed live so if an input is a dead phi, it must have + // been made dead due to type conflict. Mark this phi conflicting too. + return false; + } - // 10) Deal with phis to guarantee liveness of phis in case of a debuggable - // application. This is for satisfying statement (c) of the SsaBuilder - // (see ssa_builder.h). - if (GetGraph()->IsDebuggable()) { - DeadPhiHandling dead_phi_handler(GetGraph()); - dead_phi_handler.Run(); + Primitive::Type input_type = HPhi::ToPhiType(input->GetType()); + if (common_type == input_type) { + // No change in type. + } else if (Primitive::Is64BitType(common_type) != Primitive::Is64BitType(input_type)) { + // Types are of different sizes, e.g. int vs. long. Must be a conflict. + return false; + } else if (Primitive::IsIntegralType(common_type)) { + // Previous inputs were integral, this one is not but is of the same size. + // This does not imply conflict since some bytecode instruction types are + // ambiguous. TypeInputsOfPhi will either type them or detect a conflict. + DCHECK(Primitive::IsFloatingPointType(input_type) || input_type == Primitive::kPrimNot); + common_type = input_type; + } else if (Primitive::IsIntegralType(input_type)) { + // Input is integral, common type is not. Same as in the previous case, if + // there is a conflict, it will be detected during TypeInputsOfPhi. + DCHECK(Primitive::IsFloatingPointType(common_type) || common_type == Primitive::kPrimNot); + } else { + // Combining float and reference types. Clearly a conflict. + DCHECK((common_type == Primitive::kPrimFloat && input_type == Primitive::kPrimNot) || + (common_type == Primitive::kPrimNot && input_type == Primitive::kPrimFloat)); + return false; + } } - // 11) Now that the right phis are used for the environments, and we - // have potentially revive dead phis in case of a debuggable application, - // we can eliminate phis we do not need. Regardless of the debuggable status, - // this phase is necessary for statement (b) of the SsaBuilder (see ssa_builder.h), - // as well as for the code generation, which does not deal with phis of conflicting + // We have found a candidate type for the phi. Set it and return true. We may + // still discover conflict whilst typing the individual inputs in TypeInputsOfPhi. + phi->SetType(common_type); + return true; +} + +// Replace inputs of `phi` to match its type. Return false if conflict is identified. +bool SsaBuilder::TypeInputsOfPhi(HPhi* phi, ArenaVector<HPhi*>* worklist) { + Primitive::Type common_type = phi->GetType(); + if (common_type == Primitive::kPrimVoid || Primitive::IsIntegralType(common_type)) { + // Phi either contains only other untyped phis (common_type == kPrimVoid), + // or `common_type` is integral and we do not need to retype ambiguous inputs + // because they are always constructed with the integral type candidate. + if (kIsDebugBuild) { + for (size_t i = 0, e = phi->InputCount(); i < e; ++i) { + HInstruction* input = phi->InputAt(i); + if (common_type == Primitive::kPrimVoid) { + DCHECK(input->IsPhi() && input->GetType() == Primitive::kPrimVoid); + } else { + DCHECK((input->IsPhi() && input->GetType() == Primitive::kPrimVoid) || + HPhi::ToPhiType(input->GetType()) == common_type); + } + } + } + // Inputs did not need to be replaced, hence no conflict. Report success. + return true; + } else { + DCHECK(common_type == Primitive::kPrimNot || Primitive::IsFloatingPointType(common_type)); + for (size_t i = 0, e = phi->InputCount(); i < e; ++i) { + HInstruction* input = phi->InputAt(i); + if (input->GetType() != common_type) { + // Input type does not match phi's type. Try to retype the input or + // generate a suitably typed equivalent. + HInstruction* equivalent = (common_type == Primitive::kPrimNot) + ? GetReferenceTypeEquivalent(input) + : GetFloatOrDoubleEquivalent(input, common_type); + if (equivalent == nullptr) { + // Input could not be typed. Report conflict. + return false; + } + // Make sure the input did not change its type and we do not need to + // update its users. + DCHECK_NE(input, equivalent); + + phi->ReplaceInput(equivalent, i); + if (equivalent->IsPhi()) { + worklist->push_back(equivalent->AsPhi()); + } + } + } + // All inputs either matched the type of the phi or we successfully replaced + // them with a suitable equivalent. Report success. + return true; + } +} + +// Attempt to set the primitive type of `phi` to match its inputs. Return whether +// it was changed by the algorithm or not. +bool SsaBuilder::UpdatePrimitiveType(HPhi* phi, ArenaVector<HPhi*>* worklist) { + DCHECK(phi->IsLive()); + Primitive::Type original_type = phi->GetType(); + + // Try to type the phi in two stages: + // (1) find a candidate type for the phi by merging types of all its inputs, + // (2) try to type the phi's inputs to that candidate type. + // Either of these stages may detect a type conflict and fail, in which case + // we immediately abort. + if (!TypePhiFromInputs(phi) || !TypeInputsOfPhi(phi, worklist)) { + // Conflict detected. Mark the phi dead and return true because it changed. + phi->SetDead(); + return true; + } + + // Return true if the type of the phi has changed. + return phi->GetType() != original_type; +} + +void SsaBuilder::RunPrimitiveTypePropagation() { + ArenaVector<HPhi*> worklist(GetGraph()->GetArena()->Adapter()); + + for (HReversePostOrderIterator it(*GetGraph()); !it.Done(); it.Advance()) { + HBasicBlock* block = it.Current(); + if (block->IsLoopHeader()) { + for (HInstructionIterator phi_it(block->GetPhis()); !phi_it.Done(); phi_it.Advance()) { + HPhi* phi = phi_it.Current()->AsPhi(); + if (phi->IsLive()) { + worklist.push_back(phi); + } + } + } else { + for (HInstructionIterator phi_it(block->GetPhis()); !phi_it.Done(); phi_it.Advance()) { + // Eagerly compute the type of the phi, for quicker convergence. Note + // that we don't need to add users to the worklist because we are + // doing a reverse post-order visit, therefore either the phi users are + // non-loop phi and will be visited later in the visit, or are loop-phis, + // and they are already in the work list. + HPhi* phi = phi_it.Current()->AsPhi(); + if (phi->IsLive()) { + UpdatePrimitiveType(phi, &worklist); + } + } + } + } + + ProcessPrimitiveTypePropagationWorklist(&worklist); + EquivalentPhisCleanup(); +} + +void SsaBuilder::ProcessPrimitiveTypePropagationWorklist(ArenaVector<HPhi*>* worklist) { + // Process worklist + while (!worklist->empty()) { + HPhi* phi = worklist->back(); + worklist->pop_back(); + // The phi could have been made dead as a result of conflicts while in the + // worklist. If it is now dead, there is no point in updating its type. + if (phi->IsLive() && UpdatePrimitiveType(phi, worklist)) { + AddDependentInstructionsToWorklist(phi, worklist); + } + } +} + +static HArrayGet* FindFloatOrDoubleEquivalentOfArrayGet(HArrayGet* aget) { + Primitive::Type type = aget->GetType(); + DCHECK(Primitive::IsIntOrLongType(type)); + HArrayGet* next = aget->GetNext()->AsArrayGet(); + return (next != nullptr && next->IsEquivalentOf(aget)) ? next : nullptr; +} + +static HArrayGet* CreateFloatOrDoubleEquivalentOfArrayGet(HArrayGet* aget) { + Primitive::Type type = aget->GetType(); + DCHECK(Primitive::IsIntOrLongType(type)); + DCHECK(FindFloatOrDoubleEquivalentOfArrayGet(aget) == nullptr); + + HArrayGet* equivalent = new (aget->GetBlock()->GetGraph()->GetArena()) HArrayGet( + aget->GetArray(), + aget->GetIndex(), + type == Primitive::kPrimInt ? Primitive::kPrimFloat : Primitive::kPrimDouble, + aget->GetDexPc()); + aget->GetBlock()->InsertInstructionAfter(equivalent, aget); + return equivalent; +} + +static Primitive::Type GetPrimitiveArrayComponentType(HInstruction* array) + SHARED_REQUIRES(Locks::mutator_lock_) { + ReferenceTypeInfo array_type = array->GetReferenceTypeInfo(); + DCHECK(array_type.IsPrimitiveArrayClass()); + return array_type.GetTypeHandle()->GetComponentType()->GetPrimitiveType(); +} + +bool SsaBuilder::FixAmbiguousArrayOps() { + if (ambiguous_agets_.empty() && ambiguous_asets_.empty()) { + return true; + } + + // The wrong ArrayGet equivalent may still have Phi uses coming from ArraySet + // uses (because they are untyped) and environment uses (if --debuggable). + // After resolving all ambiguous ArrayGets, we will re-run primitive type + // propagation on the Phis which need to be updated. + ArenaVector<HPhi*> worklist(GetGraph()->GetArena()->Adapter()); + + { + ScopedObjectAccess soa(Thread::Current()); + + for (HArrayGet* aget_int : ambiguous_agets_) { + HInstruction* array = aget_int->GetArray(); + if (!array->GetReferenceTypeInfo().IsPrimitiveArrayClass()) { + // RTP did not type the input array. Bail. + return false; + } + + HArrayGet* aget_float = FindFloatOrDoubleEquivalentOfArrayGet(aget_int); + Primitive::Type array_type = GetPrimitiveArrayComponentType(array); + DCHECK_EQ(Primitive::Is64BitType(aget_int->GetType()), Primitive::Is64BitType(array_type)); + + if (Primitive::IsIntOrLongType(array_type)) { + if (aget_float != nullptr) { + // There is a float/double equivalent. We must replace it and re-run + // primitive type propagation on all dependent instructions. + aget_float->ReplaceWith(aget_int); + aget_float->GetBlock()->RemoveInstruction(aget_float); + AddDependentInstructionsToWorklist(aget_int, &worklist); + } + } else { + DCHECK(Primitive::IsFloatingPointType(array_type)); + if (aget_float == nullptr) { + // This is a float/double ArrayGet but there were no typed uses which + // would create the typed equivalent. Create it now. + aget_float = CreateFloatOrDoubleEquivalentOfArrayGet(aget_int); + } + // Replace the original int/long instruction. Note that it may have phi + // uses, environment uses, as well as real uses (from untyped ArraySets). + // We need to re-run primitive type propagation on its dependent instructions. + aget_int->ReplaceWith(aget_float); + aget_int->GetBlock()->RemoveInstruction(aget_int); + AddDependentInstructionsToWorklist(aget_float, &worklist); + } + } + + // Set a flag stating that types of ArrayGets have been resolved. Requesting + // equivalent of the wrong type with GetFloatOrDoubleEquivalentOfArrayGet + // will fail from now on. + agets_fixed_ = true; + + for (HArraySet* aset : ambiguous_asets_) { + HInstruction* array = aset->GetArray(); + if (!array->GetReferenceTypeInfo().IsPrimitiveArrayClass()) { + // RTP did not type the input array. Bail. + return false; + } + + HInstruction* value = aset->GetValue(); + Primitive::Type value_type = value->GetType(); + Primitive::Type array_type = GetPrimitiveArrayComponentType(array); + DCHECK_EQ(Primitive::Is64BitType(value_type), Primitive::Is64BitType(array_type)); + + if (Primitive::IsFloatingPointType(array_type)) { + if (!Primitive::IsFloatingPointType(value_type)) { + DCHECK(Primitive::IsIntegralType(value_type)); + // Array elements are floating-point but the value has not been replaced + // with its floating-point equivalent. The replacement must always + // succeed in code validated by the verifier. + HInstruction* equivalent = GetFloatOrDoubleEquivalent(value, array_type); + DCHECK(equivalent != nullptr); + aset->ReplaceInput(equivalent, /* input_index */ 2); + if (equivalent->IsPhi()) { + // Returned equivalent is a phi which may not have had its inputs + // replaced yet. We need to run primitive type propagation on it. + worklist.push_back(equivalent->AsPhi()); + } + } + } else { + // Array elements are integral and the value assigned to it initially + // was integral too. Nothing to do. + DCHECK(Primitive::IsIntegralType(array_type)); + DCHECK(Primitive::IsIntegralType(value_type)); + } + } + } + + if (!worklist.empty()) { + ProcessPrimitiveTypePropagationWorklist(&worklist); + EquivalentPhisCleanup(); + } + + return true; +} + +BuildSsaResult SsaBuilder::BuildSsa() { + // 1) Visit in reverse post order. We need to have all predecessors of a block + // visited (with the exception of loops) in order to create the right environment + // for that block. For loops, we create phis whose inputs will be set in 2). + for (HReversePostOrderIterator it(*GetGraph()); !it.Done(); it.Advance()) { + VisitBasicBlock(it.Current()); + } + + // 2) Set inputs of loop header phis. + SetLoopHeaderPhiInputs(); + + // 3) Propagate types of phis. At this point, phis are typed void in the general + // case, or float/double/reference if we created an equivalent phi. So we need + // to propagate the types across phis to give them a correct type. If a type + // conflict is detected in this stage, the phi is marked dead. + RunPrimitiveTypePropagation(); + + // 4) Now that the correct primitive types have been assigned, we can get rid + // of redundant phis. Note that we cannot do this phase before type propagation, + // otherwise we could get rid of phi equivalents, whose presence is a requirement + // for the type propagation phase. Note that this is to satisfy statement (a) + // of the SsaBuilder (see ssa_builder.h). + SsaRedundantPhiElimination(GetGraph()).Run(); + + // 5) Fix the type for null constants which are part of an equality comparison. + // We need to do this after redundant phi elimination, to ensure the only cases + // that we can see are reference comparison against 0. The redundant phi + // elimination ensures we do not see a phi taking two 0 constants in a HEqual + // or HNotEqual. + FixNullConstantType(); + + // 6) Compute type of reference type instructions. The pass assumes that + // NullConstant has been fixed up. + ReferenceTypePropagation(GetGraph(), handles_).Run(); + + // 7) Step 1) duplicated ArrayGet instructions with ambiguous type (int/float + // or long/double) and marked ArraySets with ambiguous input type. Now that RTP + // computed the type of the array input, the ambiguity can be resolved and the + // correct equivalents kept. + if (!FixAmbiguousArrayOps()) { + return kBuildSsaFailAmbiguousArrayOp; + } + + // 8) Mark dead phis. This will mark phis which are not used by instructions + // or other live phis. If compiling as debuggable code, phis will also be kept + // live if they have an environment use. + SsaDeadPhiElimination dead_phi_elimimation(GetGraph()); + dead_phi_elimimation.MarkDeadPhis(); + + // 9) Make sure environments use the right phi equivalent: a phi marked dead + // can have a phi equivalent that is not dead. In that case we have to replace + // it with the live equivalent because deoptimization and try/catch rely on + // environments containing values of all live vregs at that point. Note that + // there can be multiple phis for the same Dex register that are live + // (for example when merging constants), in which case it is okay for the + // environments to just reference one. + FixEnvironmentPhis(); + + // 10) Now that the right phis are used for the environments, we can eliminate + // phis we do not need. Regardless of the debuggable status, this phase is + /// necessary for statement (b) of the SsaBuilder (see ssa_builder.h), as well + // as for the code generation, which does not deal with phis of conflicting // input types. - dead_phis.EliminateDeadPhis(); + dead_phi_elimimation.EliminateDeadPhis(); - // 12) Clear locals. + // 11) Clear locals. for (HInstructionIterator it(GetGraph()->GetEntryBlock()->GetInstructions()); !it.Done(); it.Advance()) { @@ -404,6 +496,8 @@ void SsaBuilder::BuildSsa() { current->GetBlock()->RemoveInstruction(current); } } + + return kBuildSsaSuccess; } ArenaVector<HInstruction*>* SsaBuilder::GetLocalsFor(HBasicBlock* block) { @@ -591,6 +685,8 @@ HDoubleConstant* SsaBuilder::GetDoubleEquivalent(HLongConstant* constant) { * phi with a floating point / reference type. */ HPhi* SsaBuilder::GetFloatDoubleOrReferenceEquivalentOfPhi(HPhi* phi, Primitive::Type type) { + DCHECK(phi->IsLive()) << "Cannot get equivalent of a dead phi since it would create a live one."; + // We place the floating point /reference phi next to this phi. HInstruction* next = phi->GetNext(); if (next != nullptr @@ -606,35 +702,50 @@ HPhi* SsaBuilder::GetFloatDoubleOrReferenceEquivalentOfPhi(HPhi* phi, Primitive: ArenaAllocator* allocator = phi->GetBlock()->GetGraph()->GetArena(); HPhi* new_phi = new (allocator) HPhi(allocator, phi->GetRegNumber(), phi->InputCount(), type); for (size_t i = 0, e = phi->InputCount(); i < e; ++i) { - // Copy the inputs. Note that the graph may not be correctly typed by doing this copy, - // but the type propagation phase will fix it. + // Copy the inputs. Note that the graph may not be correctly typed + // by doing this copy, but the type propagation phase will fix it. new_phi->SetRawInputAt(i, phi->InputAt(i)); } phi->GetBlock()->InsertPhiAfter(new_phi, phi); + DCHECK(new_phi->IsLive()); return new_phi; } else { + // An existing equivalent was found. If it is dead, conflict was previously + // identified and we return nullptr instead. HPhi* next_phi = next->AsPhi(); DCHECK_EQ(next_phi->GetType(), type); - if (next_phi->IsDead()) { - // TODO(dbrazdil): Remove this SetLive (we should not need to revive phis) - // once we stop running MarkDeadPhis before PrimitiveTypePropagation. This - // cannot revive undefined loop header phis because they cannot have uses. - DCHECK(!IsUndefinedLoopHeaderPhi(next_phi)); - next_phi->SetLive(); + return next_phi->IsLive() ? next_phi : nullptr; + } +} + +HArrayGet* SsaBuilder::GetFloatOrDoubleEquivalentOfArrayGet(HArrayGet* aget) { + DCHECK(Primitive::IsIntegralType(aget->GetType())); + + if (!Primitive::IsIntOrLongType(aget->GetType())) { + // Cannot type boolean, char, byte, short to float/double. + return nullptr; + } + + DCHECK(ContainsElement(ambiguous_agets_, aget)); + if (agets_fixed_) { + // This used to be an ambiguous ArrayGet but its type has been resolved to + // int/long. Requesting a float/double equivalent should lead to a conflict. + if (kIsDebugBuild) { + ScopedObjectAccess soa(Thread::Current()); + DCHECK(Primitive::IsIntOrLongType(GetPrimitiveArrayComponentType(aget->GetArray()))); } - return next_phi; + return nullptr; + } else { + // This is an ambiguous ArrayGet which has not been resolved yet. Return an + // equivalent float/double instruction to use until it is resolved. + HArrayGet* equivalent = FindFloatOrDoubleEquivalentOfArrayGet(aget); + return (equivalent == nullptr) ? CreateFloatOrDoubleEquivalentOfArrayGet(aget) : equivalent; } } -HInstruction* SsaBuilder::GetFloatOrDoubleEquivalent(HInstruction* user, - HInstruction* value, - Primitive::Type type) { +HInstruction* SsaBuilder::GetFloatOrDoubleEquivalent(HInstruction* value, Primitive::Type type) { if (value->IsArrayGet()) { - // The verifier has checked that values in arrays cannot be used for both - // floating point and non-floating point operations. It is therefore safe to just - // change the type of the operation. - value->AsArrayGet()->SetType(type); - return value; + return GetFloatOrDoubleEquivalentOfArrayGet(value->AsArrayGet()); } else if (value->IsLongConstant()) { return GetDoubleEquivalent(value->AsLongConstant()); } else if (value->IsIntConstant()) { @@ -642,12 +753,7 @@ HInstruction* SsaBuilder::GetFloatOrDoubleEquivalent(HInstruction* user, } else if (value->IsPhi()) { return GetFloatDoubleOrReferenceEquivalentOfPhi(value->AsPhi(), type); } else { - // For other instructions, we assume the verifier has checked that the dex format is correctly - // typed and the value in a dex register will not be used for both floating point and - // non-floating point operations. So the only reason an instruction would want a floating - // point equivalent is for an unused phi that will be removed by the dead phi elimination phase. - DCHECK(user->IsPhi()) << "is actually " << user->DebugName() << " (" << user->GetId() << ")"; - return value; + return nullptr; } } @@ -662,15 +768,17 @@ HInstruction* SsaBuilder::GetReferenceTypeEquivalent(HInstruction* value) { } void SsaBuilder::VisitLoadLocal(HLoadLocal* load) { + Primitive::Type load_type = load->GetType(); HInstruction* value = (*current_locals_)[load->GetLocal()->GetRegNumber()]; // If the operation requests a specific type, we make sure its input is of that type. - if (load->GetType() != value->GetType()) { - if (load->GetType() == Primitive::kPrimFloat || load->GetType() == Primitive::kPrimDouble) { - value = GetFloatOrDoubleEquivalent(load, value, load->GetType()); - } else if (load->GetType() == Primitive::kPrimNot) { + if (load_type != value->GetType()) { + if (load_type == Primitive::kPrimFloat || load_type == Primitive::kPrimDouble) { + value = GetFloatOrDoubleEquivalent(value, load_type); + } else if (load_type == Primitive::kPrimNot) { value = GetReferenceTypeEquivalent(value); } } + load->ReplaceWith(value); load->GetBlock()->RemoveInstruction(load); } @@ -760,4 +868,21 @@ void SsaBuilder::VisitTemporary(HTemporary* temp) { temp->GetBlock()->RemoveInstruction(temp); } +void SsaBuilder::VisitArrayGet(HArrayGet* aget) { + Primitive::Type type = aget->GetType(); + DCHECK(!Primitive::IsFloatingPointType(type)); + if (Primitive::IsIntOrLongType(type)) { + ambiguous_agets_.push_back(aget); + } + VisitInstruction(aget); +} + +void SsaBuilder::VisitArraySet(HArraySet* aset) { + Primitive::Type type = aset->GetValue()->GetType(); + if (Primitive::IsIntOrLongType(type)) { + ambiguous_asets_.push_back(aset); + } + VisitInstruction(aset); +} + } // namespace art diff --git a/compiler/optimizing/ssa_builder.h b/compiler/optimizing/ssa_builder.h index dcce5e4c2c..0fcc3a1306 100644 --- a/compiler/optimizing/ssa_builder.h +++ b/compiler/optimizing/ssa_builder.h @@ -49,17 +49,21 @@ static constexpr int kDefaultNumberOfLoops = 2; */ class SsaBuilder : public HGraphVisitor { public: - explicit SsaBuilder(HGraph* graph) + explicit SsaBuilder(HGraph* graph, StackHandleScopeCollection* handles) : HGraphVisitor(graph), + handles_(handles), + agets_fixed_(false), current_locals_(nullptr), loop_headers_(graph->GetArena()->Adapter(kArenaAllocSsaBuilder)), + ambiguous_agets_(graph->GetArena()->Adapter(kArenaAllocSsaBuilder)), + ambiguous_asets_(graph->GetArena()->Adapter(kArenaAllocSsaBuilder)), locals_for_(graph->GetBlocks().size(), ArenaVector<HInstruction*>(graph->GetArena()->Adapter(kArenaAllocSsaBuilder)), graph->GetArena()->Adapter(kArenaAllocSsaBuilder)) { loop_headers_.reserve(kDefaultNumberOfLoops); } - void BuildSsa(); + BuildSsaResult BuildSsa(); // Returns locals vector for `block`. If it is a catch block, the vector will be // prepopulated with catch phis for vregs which are defined in `current_locals_`. @@ -71,23 +75,39 @@ class SsaBuilder : public HGraphVisitor { void VisitStoreLocal(HStoreLocal* store); void VisitInstruction(HInstruction* instruction); void VisitTemporary(HTemporary* instruction); - - static HInstruction* GetFloatOrDoubleEquivalent(HInstruction* user, - HInstruction* instruction, - Primitive::Type type); - - static HInstruction* GetReferenceTypeEquivalent(HInstruction* instruction); + void VisitArrayGet(HArrayGet* aget); + void VisitArraySet(HArraySet* aset); static constexpr const char* kSsaBuilderPassName = "ssa_builder"; private: void SetLoopHeaderPhiInputs(); + void FixEnvironmentPhis(); void FixNullConstantType(); void EquivalentPhisCleanup(); + void RunPrimitiveTypePropagation(); + + // Attempts to resolve types of aget(-wide) instructions and type values passed + // to aput(-wide) instructions from reference type information on the array + // input. Returns false if the type of an array is unknown. + bool FixAmbiguousArrayOps(); + + bool TypeInputsOfPhi(HPhi* phi, ArenaVector<HPhi*>* worklist); + bool UpdatePrimitiveType(HPhi* phi, ArenaVector<HPhi*>* worklist); + void ProcessPrimitiveTypePropagationWorklist(ArenaVector<HPhi*>* worklist); - static HFloatConstant* GetFloatEquivalent(HIntConstant* constant); - static HDoubleConstant* GetDoubleEquivalent(HLongConstant* constant); - static HPhi* GetFloatDoubleOrReferenceEquivalentOfPhi(HPhi* phi, Primitive::Type type); + HInstruction* GetFloatOrDoubleEquivalent(HInstruction* instruction, Primitive::Type type); + HInstruction* GetReferenceTypeEquivalent(HInstruction* instruction); + + HFloatConstant* GetFloatEquivalent(HIntConstant* constant); + HDoubleConstant* GetDoubleEquivalent(HLongConstant* constant); + HPhi* GetFloatDoubleOrReferenceEquivalentOfPhi(HPhi* phi, Primitive::Type type); + HArrayGet* GetFloatOrDoubleEquivalentOfArrayGet(HArrayGet* aget); + + StackHandleScopeCollection* const handles_; + + // True if types of ambiguous ArrayGets have been resolved. + bool agets_fixed_; // Locals for the current block being visited. ArenaVector<HInstruction*>* current_locals_; @@ -96,6 +116,9 @@ class SsaBuilder : public HGraphVisitor { // over these blocks to set the inputs of their phis. ArenaVector<HBasicBlock*> loop_headers_; + ArenaVector<HArrayGet*> ambiguous_agets_; + ArenaVector<HArraySet*> ambiguous_asets_; + // HEnvironment for each block. ArenaVector<ArenaVector<HInstruction*>> locals_for_; diff --git a/compiler/optimizing/ssa_phi_elimination.cc b/compiler/optimizing/ssa_phi_elimination.cc index a3219dcc38..2eef307295 100644 --- a/compiler/optimizing/ssa_phi_elimination.cc +++ b/compiler/optimizing/ssa_phi_elimination.cc @@ -17,6 +17,7 @@ #include "ssa_phi_elimination.h" #include "base/arena_containers.h" +#include "base/bit_vector-inl.h" namespace art { @@ -40,15 +41,17 @@ void SsaDeadPhiElimination::MarkDeadPhis() { continue; } - bool has_non_phi_use = false; - for (HUseIterator<HInstruction*> use_it(phi->GetUses()); !use_it.Done(); use_it.Advance()) { - if (!use_it.Current()->GetUser()->IsPhi()) { - has_non_phi_use = true; - break; + bool keep_alive = (graph_->IsDebuggable() && phi->HasEnvironmentUses()); + if (!keep_alive) { + for (HUseIterator<HInstruction*> use_it(phi->GetUses()); !use_it.Done(); use_it.Advance()) { + if (!use_it.Current()->GetUser()->IsPhi()) { + keep_alive = true; + break; + } } } - if (has_non_phi_use) { + if (keep_alive) { worklist_.push_back(phi); } else { phi->SetDead(); @@ -94,8 +97,8 @@ void SsaDeadPhiElimination::EliminateDeadPhis() { for (HUseIterator<HInstruction*> use_it(phi->GetUses()); !use_it.Done(); use_it.Advance()) { HInstruction* user = use_it.Current()->GetUser(); - DCHECK(user->IsLoopHeaderPhi()) << user->GetId(); - DCHECK(user->AsPhi()->IsDead()) << user->GetId(); + DCHECK(user->IsLoopHeaderPhi()); + DCHECK(user->AsPhi()->IsDead()); } } // Remove the phi from use lists of its inputs. @@ -127,6 +130,9 @@ void SsaRedundantPhiElimination::Run() { } } + ArenaSet<uint32_t> visited_phis_in_cycle(graph_->GetArena()->Adapter()); + ArenaVector<HPhi*> cycle_worklist(graph_->GetArena()->Adapter()); + while (!worklist_.empty()) { HPhi* phi = worklist_.back(); worklist_.pop_back(); @@ -141,46 +147,92 @@ void SsaRedundantPhiElimination::Run() { continue; } - // Find if the inputs of the phi are the same instruction. - HInstruction* candidate = phi->InputAt(0); - // A loop phi cannot have itself as the first phi. Note that this - // check relies on our simplification pass ensuring the pre-header - // block is first in the list of predecessors of the loop header. - DCHECK(!phi->IsLoopHeaderPhi() || phi->GetBlock()->IsLoopPreHeaderFirstPredecessor()); - DCHECK_NE(phi, candidate); - - for (size_t i = 1; i < phi->InputCount(); ++i) { - HInstruction* input = phi->InputAt(i); - // For a loop phi, if the input is the phi, the phi is still candidate for - // elimination. - if (input != candidate && input != phi) { + HInstruction* candidate = nullptr; + visited_phis_in_cycle.clear(); + cycle_worklist.clear(); + + cycle_worklist.push_back(phi); + visited_phis_in_cycle.insert(phi->GetId()); + bool catch_phi_in_cycle = phi->IsCatchPhi(); + + // First do a simple loop over inputs and check if they are all the same. + for (size_t j = 0; j < phi->InputCount(); ++j) { + HInstruction* input = phi->InputAt(j); + if (input == phi) { + continue; + } else if (candidate == nullptr) { + candidate = input; + } else if (candidate != input) { candidate = nullptr; break; } } - // If the inputs are not the same, continue. + // If we haven't found a candidate, check for a phi cycle. Note that we need to detect + // such cycles to avoid having reference and non-reference equivalents. We check this + // invariant in the graph checker. if (candidate == nullptr) { - continue; + // We iterate over the array as long as it grows. + for (size_t i = 0; i < cycle_worklist.size(); ++i) { + HPhi* current = cycle_worklist[i]; + DCHECK(!current->IsLoopHeaderPhi() || + current->GetBlock()->IsLoopPreHeaderFirstPredecessor()); + + for (size_t j = 0; j < current->InputCount(); ++j) { + HInstruction* input = current->InputAt(j); + if (input == current) { + continue; + } else if (input->IsPhi()) { + if (!ContainsElement(visited_phis_in_cycle, input->GetId())) { + cycle_worklist.push_back(input->AsPhi()); + visited_phis_in_cycle.insert(input->GetId()); + catch_phi_in_cycle |= input->AsPhi()->IsCatchPhi(); + } else { + // Already visited, nothing to do. + } + } else if (candidate == nullptr) { + candidate = input; + } else if (candidate != input) { + candidate = nullptr; + // Clear the cycle worklist to break out of the outer loop. + cycle_worklist.clear(); + break; + } + } + } } - // The candidate may not dominate a phi in a catch block. - if (phi->IsCatchPhi() && !candidate->StrictlyDominates(phi)) { + if (candidate == nullptr) { continue; } - // Because we're updating the users of this phi, we may have new candidates - // for elimination. Add phis that use this phi to the worklist. - for (HUseIterator<HInstruction*> it(phi->GetUses()); !it.Done(); it.Advance()) { - HUseListNode<HInstruction*>* current = it.Current(); - HInstruction* user = current->GetUser(); - if (user->IsPhi()) { - worklist_.push_back(user->AsPhi()); + for (HPhi* current : cycle_worklist) { + // The candidate may not dominate a phi in a catch block: there may be non-throwing + // instructions at the beginning of a try range, that may be the first input of + // catch phis. + // TODO(dbrazdil): Remove this situation by moving those non-throwing instructions + // before the try entry. + if (catch_phi_in_cycle) { + if (!candidate->StrictlyDominates(current)) { + continue; + } + } else { + DCHECK(candidate->StrictlyDominates(current)); + } + + // Because we're updating the users of this phi, we may have new candidates + // for elimination. Add phis that use this phi to the worklist. + for (HUseIterator<HInstruction*> it(current->GetUses()); !it.Done(); it.Advance()) { + HUseListNode<HInstruction*>* use = it.Current(); + HInstruction* user = use->GetUser(); + if (user->IsPhi() && !ContainsElement(visited_phis_in_cycle, user->GetId())) { + worklist_.push_back(user->AsPhi()); + } } + DCHECK(candidate->StrictlyDominates(current)); + current->ReplaceWith(candidate); + current->GetBlock()->RemovePhi(current); } - - phi->ReplaceWith(candidate); - phi->GetBlock()->RemovePhi(phi); } } diff --git a/compiler/optimizing/ssa_test.cc b/compiler/optimizing/ssa_test.cc index 024278f4b2..d2885a8fd7 100644 --- a/compiler/optimizing/ssa_test.cc +++ b/compiler/optimizing/ssa_test.cc @@ -28,6 +28,8 @@ namespace art { +class SsaTest : public CommonCompilerTest {}; + class SsaPrettyPrinter : public HPrettyPrinter { public: explicit SsaPrettyPrinter(HGraph* graph) : HPrettyPrinter(graph), str_("") {} @@ -83,11 +85,10 @@ static void TestCode(const uint16_t* data, const char* expected) { bool graph_built = builder.BuildGraph(*item); ASSERT_TRUE(graph_built); - graph->BuildDominatorTree(); + TransformToSsa(graph); // Suspend checks implementation may change in the future, and this test relies // on how instructions are ordered. RemoveSuspendChecks(graph); - graph->TransformToSsa(); ReNumberInstructions(graph); // Test that phis had their type set. @@ -103,7 +104,7 @@ static void TestCode(const uint16_t* data, const char* expected) { ASSERT_STREQ(expected, printer.str().c_str()); } -TEST(SsaTest, CFG1) { +TEST_F(SsaTest, CFG1) { // Test that we get rid of loads and stores. const char* expected = "BasicBlock 0, succ: 1\n" @@ -131,7 +132,7 @@ TEST(SsaTest, CFG1) { TestCode(data, expected); } -TEST(SsaTest, CFG2) { +TEST_F(SsaTest, CFG2) { // Test that we create a phi for the join block of an if control flow instruction // when there is only code in the else branch. const char* expected = @@ -162,7 +163,7 @@ TEST(SsaTest, CFG2) { TestCode(data, expected); } -TEST(SsaTest, CFG3) { +TEST_F(SsaTest, CFG3) { // Test that we create a phi for the join block of an if control flow instruction // when both branches update a local. const char* expected = @@ -195,7 +196,7 @@ TEST(SsaTest, CFG3) { TestCode(data, expected); } -TEST(SsaTest, Loop1) { +TEST_F(SsaTest, Loop1) { // Test that we create a phi for an initialized local at entry of a loop. const char* expected = "BasicBlock 0, succ: 1\n" @@ -228,7 +229,7 @@ TEST(SsaTest, Loop1) { TestCode(data, expected); } -TEST(SsaTest, Loop2) { +TEST_F(SsaTest, Loop2) { // Simple loop with one preheader and one back edge. const char* expected = "BasicBlock 0, succ: 1\n" @@ -258,7 +259,7 @@ TEST(SsaTest, Loop2) { TestCode(data, expected); } -TEST(SsaTest, Loop3) { +TEST_F(SsaTest, Loop3) { // Test that a local not yet defined at the entry of a loop is handled properly. const char* expected = "BasicBlock 0, succ: 1\n" @@ -290,7 +291,7 @@ TEST(SsaTest, Loop3) { TestCode(data, expected); } -TEST(SsaTest, Loop4) { +TEST_F(SsaTest, Loop4) { // Make sure we support a preheader of a loop not being the first predecessor // in the predecessor list of the header. const char* expected = @@ -325,7 +326,7 @@ TEST(SsaTest, Loop4) { TestCode(data, expected); } -TEST(SsaTest, Loop5) { +TEST_F(SsaTest, Loop5) { // Make sure we create a preheader of a loop when a header originally has two // incoming blocks and one back edge. const char* expected = @@ -367,7 +368,7 @@ TEST(SsaTest, Loop5) { TestCode(data, expected); } -TEST(SsaTest, Loop6) { +TEST_F(SsaTest, Loop6) { // Test a loop with one preheader and two back edges (e.g. continue). const char* expected = "BasicBlock 0, succ: 1\n" @@ -406,7 +407,7 @@ TEST(SsaTest, Loop6) { TestCode(data, expected); } -TEST(SsaTest, Loop7) { +TEST_F(SsaTest, Loop7) { // Test a loop with one preheader, one back edge, and two exit edges (e.g. break). const char* expected = "BasicBlock 0, succ: 1\n" @@ -448,7 +449,7 @@ TEST(SsaTest, Loop7) { TestCode(data, expected); } -TEST(SsaTest, DeadLocal) { +TEST_F(SsaTest, DeadLocal) { // Test that we correctly handle a local not being used. const char* expected = "BasicBlock 0, succ: 1\n" @@ -466,7 +467,7 @@ TEST(SsaTest, DeadLocal) { TestCode(data, expected); } -TEST(SsaTest, LocalInIf) { +TEST_F(SsaTest, LocalInIf) { // Test that we do not create a phi in the join block when one predecessor // does not update the local. const char* expected = @@ -496,7 +497,7 @@ TEST(SsaTest, LocalInIf) { TestCode(data, expected); } -TEST(SsaTest, MultiplePredecessors) { +TEST_F(SsaTest, MultiplePredecessors) { // Test that we do not create a phi when one predecessor // does not update the local. const char* expected = diff --git a/compiler/optimizing/stack_map_stream.cc b/compiler/optimizing/stack_map_stream.cc index c60a4eacaa..4784de1380 100644 --- a/compiler/optimizing/stack_map_stream.cc +++ b/compiler/optimizing/stack_map_stream.cc @@ -270,7 +270,7 @@ void StackMapStream::FillIn(MemoryRegion region) { stack_map.SetStackMask(stack_map_encoding_, *entry.sp_mask); } - if (entry.num_dex_registers == 0) { + if (entry.num_dex_registers == 0 || (entry.live_dex_registers_mask->NumSetBits() == 0)) { // No dex map available. stack_map.SetDexRegisterMapOffset(stack_map_encoding_, StackMap::kNoDexRegisterMap); } else { diff --git a/compiler/optimizing/stack_map_test.cc b/compiler/optimizing/stack_map_test.cc index 560502fde6..604787fd92 100644 --- a/compiler/optimizing/stack_map_test.cc +++ b/compiler/optimizing/stack_map_test.cc @@ -614,6 +614,10 @@ TEST(StackMapTest, TestNoDexRegisterMap) { stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0); stream.EndStackMapEntry(); + number_of_dex_registers = 1; + stream.BeginStackMapEntry(1, 67, 0x4, &sp_mask, number_of_dex_registers, 0); + stream.EndStackMapEntry(); + size_t size = stream.PrepareForFillIn(); void* memory = arena.Alloc(size, kArenaAllocMisc); MemoryRegion region(memory, size); @@ -622,7 +626,7 @@ TEST(StackMapTest, TestNoDexRegisterMap) { CodeInfo code_info(region); StackMapEncoding encoding = code_info.ExtractEncoding(); ASSERT_EQ(0u, encoding.NumberOfBytesForStackMask()); - ASSERT_EQ(1u, code_info.GetNumberOfStackMaps()); + ASSERT_EQ(2u, code_info.GetNumberOfStackMaps()); uint32_t number_of_location_catalog_entries = code_info.GetNumberOfLocationCatalogEntries(); ASSERT_EQ(0u, number_of_location_catalog_entries); @@ -638,6 +642,16 @@ TEST(StackMapTest, TestNoDexRegisterMap) { ASSERT_FALSE(stack_map.HasDexRegisterMap(encoding)); ASSERT_FALSE(stack_map.HasInlineInfo(encoding)); + + stack_map = code_info.GetStackMapAt(1, encoding); + ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(1, encoding))); + ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(67, encoding))); + ASSERT_EQ(1u, stack_map.GetDexPc(encoding)); + ASSERT_EQ(67u, stack_map.GetNativePcOffset(encoding)); + ASSERT_EQ(0x4u, stack_map.GetRegisterMask(encoding)); + + ASSERT_FALSE(stack_map.HasDexRegisterMap(encoding)); + ASSERT_FALSE(stack_map.HasInlineInfo(encoding)); } TEST(StackMapTest, InlineTest) { diff --git a/compiler/profile_assistant.cc b/compiler/profile_assistant.cc new file mode 100644 index 0000000000..81f2a5692d --- /dev/null +++ b/compiler/profile_assistant.cc @@ -0,0 +1,69 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "profile_assistant.h" + +namespace art { + +// Minimum number of new methods that profiles must contain to enable recompilation. +static constexpr const uint32_t kMinNewMethodsForCompilation = 10; + +bool ProfileAssistant::ProcessProfiles( + const std::vector<std::string>& profile_files, + const std::vector<std::string>& reference_profile_files, + /*out*/ ProfileCompilationInfo** profile_compilation_info) { + DCHECK(!profile_files.empty()); + DCHECK(reference_profile_files.empty() || + (profile_files.size() == reference_profile_files.size())); + + std::vector<ProfileCompilationInfo> new_info(profile_files.size()); + bool should_compile = false; + // Read the main profile files. + for (size_t i = 0; i < profile_files.size(); i++) { + if (!new_info[i].Load(profile_files[i])) { + LOG(WARNING) << "Could not load profile file: " << profile_files[i]; + return false; + } + // Do we have enough new profiled methods that will make the compilation worthwhile? + should_compile |= (new_info[i].GetNumberOfMethods() > kMinNewMethodsForCompilation); + } + if (!should_compile) { + *profile_compilation_info = nullptr; + return true; + } + + std::unique_ptr<ProfileCompilationInfo> result(new ProfileCompilationInfo()); + for (size_t i = 0; i < new_info.size(); i++) { + // Merge all data into a single object. + result->Load(new_info[i]); + // If we have any reference profile information merge their information with + // the current profiles and save them back to disk. + if (!reference_profile_files.empty()) { + if (!new_info[i].Load(reference_profile_files[i])) { + LOG(WARNING) << "Could not load reference profile file: " << reference_profile_files[i]; + return false; + } + if (!new_info[i].Save(reference_profile_files[i])) { + LOG(WARNING) << "Could not save reference profile file: " << reference_profile_files[i]; + return false; + } + } + } + *profile_compilation_info = result.release(); + return true; +} + +} // namespace art diff --git a/compiler/profile_assistant.h b/compiler/profile_assistant.h new file mode 100644 index 0000000000..088c8bd1c7 --- /dev/null +++ b/compiler/profile_assistant.h @@ -0,0 +1,61 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_PROFILE_ASSISTANT_H_ +#define ART_COMPILER_PROFILE_ASSISTANT_H_ + +#include <string> +#include <vector> + +#include "jit/offline_profiling_info.cc" + +namespace art { + +class ProfileAssistant { + public: + // Process the profile information present in the given files. Returns true + // if the analysis ended up successfully (i.e. no errors during reading, + // merging or writing of profile files). + // + // If the returned value is true and there is a significant difference between + // profile_files and reference_profile_files: + // - profile_compilation_info is set to a not null object that + // can be used to drive compilation. It will be the merge of all the data + // found in profile_files and reference_profile_files. + // - the data from profile_files[i] is merged into + // reference_profile_files[i] and the corresponding backing file is + // updated. + // + // If the returned value is false or the difference is insignificant, + // profile_compilation_info will be set to null. + // + // Additional notes: + // - as mentioned above, this function may update the content of the files + // passed with the reference_profile_files. + // - if reference_profile_files is not empty it must be the same size as + // profile_files. + static bool ProcessProfiles( + const std::vector<std::string>& profile_files, + const std::vector<std::string>& reference_profile_files, + /*out*/ ProfileCompilationInfo** profile_compilation_info); + + private: + DISALLOW_COPY_AND_ASSIGN(ProfileAssistant); +}; + +} // namespace art + +#endif // ART_COMPILER_PROFILE_ASSISTANT_H_ diff --git a/compiler/utils/arm/assembler_arm.h b/compiler/utils/arm/assembler_arm.h index b79c2f0f4e..f96376d9fe 100644 --- a/compiler/utils/arm/assembler_arm.h +++ b/compiler/utils/arm/assembler_arm.h @@ -501,6 +501,8 @@ class ArmAssembler : public Assembler { virtual void cmp(Register rn, const ShifterOperand& so, Condition cond = AL) = 0; + // Note: CMN updates flags based on addition of its operands. Do not confuse + // the "N" suffix with bitwise inversion performed by MVN. virtual void cmn(Register rn, const ShifterOperand& so, Condition cond = AL) = 0; virtual void orr(Register rd, Register rn, const ShifterOperand& so, diff --git a/compiler/utils/arm/assembler_thumb2.cc b/compiler/utils/arm/assembler_thumb2.cc index f341030c15..52023a67ee 100644 --- a/compiler/utils/arm/assembler_thumb2.cc +++ b/compiler/utils/arm/assembler_thumb2.cc @@ -3428,10 +3428,10 @@ void Thumb2Assembler::AddConstant(Register rd, Register rn, int32_t value, CHECK(rn != IP); // If rd != rn, use rd as temp. This alows 16-bit ADD/SUB in more situations than using IP. Register temp = (rd != rn) ? rd : IP; - if (ShifterOperandCanHold(temp, kNoRegister, MVN, ~value, set_cc, &shifter_op)) { + if (ShifterOperandCanHold(temp, kNoRegister, MVN, ~value, kCcKeep, &shifter_op)) { mvn(temp, shifter_op, cond, kCcKeep); add(rd, rn, ShifterOperand(temp), cond, set_cc); - } else if (ShifterOperandCanHold(temp, kNoRegister, MVN, ~(-value), set_cc, &shifter_op)) { + } else if (ShifterOperandCanHold(temp, kNoRegister, MVN, ~(-value), kCcKeep, &shifter_op)) { mvn(temp, shifter_op, cond, kCcKeep); sub(rd, rn, ShifterOperand(temp), cond, set_cc); } else if (High16Bits(-value) == 0) { @@ -3449,22 +3449,32 @@ void Thumb2Assembler::AddConstant(Register rd, Register rn, int32_t value, } void Thumb2Assembler::CmpConstant(Register rn, int32_t value, Condition cond) { - // We prefer to select the shorter code sequence rather than selecting add for - // positive values and sub for negatives ones, which would slightly improve - // the readability of generated code for some constants. + // We prefer to select the shorter code sequence rather than using plain cmp and cmn + // which would slightly improve the readability of generated code for some constants. ShifterOperand shifter_op; if (ShifterOperandCanHold(kNoRegister, rn, CMP, value, kCcSet, &shifter_op)) { cmp(rn, shifter_op, cond); - } else if (ShifterOperandCanHold(kNoRegister, rn, CMN, ~value, kCcSet, &shifter_op)) { + } else if (ShifterOperandCanHold(kNoRegister, rn, CMN, -value, kCcSet, &shifter_op)) { cmn(rn, shifter_op, cond); } else { CHECK(rn != IP); - movw(IP, Low16Bits(value), cond); - uint16_t value_high = High16Bits(value); - if (value_high != 0) { - movt(IP, value_high, cond); + if (ShifterOperandCanHold(IP, kNoRegister, MVN, ~value, kCcKeep, &shifter_op)) { + mvn(IP, shifter_op, cond, kCcKeep); + cmp(rn, ShifterOperand(IP), cond); + } else if (ShifterOperandCanHold(IP, kNoRegister, MVN, ~(-value), kCcKeep, &shifter_op)) { + mvn(IP, shifter_op, cond, kCcKeep); + cmn(rn, ShifterOperand(IP), cond); + } else if (High16Bits(-value) == 0) { + movw(IP, Low16Bits(-value), cond); + cmn(rn, ShifterOperand(IP), cond); + } else { + movw(IP, Low16Bits(value), cond); + uint16_t value_high = High16Bits(value); + if (value_high != 0) { + movt(IP, value_high, cond); + } + cmp(rn, ShifterOperand(IP), cond); } - cmp(rn, ShifterOperand(IP), cond); } } diff --git a/compiler/utils/assembler_thumb_test.cc b/compiler/utils/assembler_thumb_test.cc index 0ef0dc19e6..2df9b177bf 100644 --- a/compiler/utils/assembler_thumb_test.cc +++ b/compiler/utils/assembler_thumb_test.cc @@ -1626,6 +1626,76 @@ TEST(Thumb2AssemblerTest, AddConstant) { EmitAndCheck(&assembler, "AddConstant"); } +TEST(Thumb2AssemblerTest, CmpConstant) { + arm::Thumb2Assembler assembler; + + __ CmpConstant(R0, 0); // 16-bit CMP. + __ CmpConstant(R1, 1); // 16-bit CMP. + __ CmpConstant(R0, 7); // 16-bit CMP. + __ CmpConstant(R1, 8); // 16-bit CMP. + __ CmpConstant(R0, 255); // 16-bit CMP. + __ CmpConstant(R1, 256); // 32-bit CMP. + __ CmpConstant(R0, 257); // MNV+CMN. + __ CmpConstant(R1, 0xfff); // MOVW+CMP. + __ CmpConstant(R0, 0x1000); // 32-bit CMP. + __ CmpConstant(R1, 0x1001); // MNV+CMN. + __ CmpConstant(R0, 0x1002); // MOVW+CMP. + __ CmpConstant(R1, 0xffff); // MOVW+CMP. + __ CmpConstant(R0, 0x10000); // 32-bit CMP. + __ CmpConstant(R1, 0x10001); // 32-bit CMP. + __ CmpConstant(R0, 0x10002); // MVN+CMN. + __ CmpConstant(R1, 0x10003); // MOVW+MOVT+CMP. + __ CmpConstant(R0, -1); // 32-bit CMP. + __ CmpConstant(R1, -7); // CMN. + __ CmpConstant(R0, -8); // CMN. + __ CmpConstant(R1, -255); // CMN. + __ CmpConstant(R0, -256); // CMN. + __ CmpConstant(R1, -257); // MNV+CMP. + __ CmpConstant(R0, -0xfff); // MOVW+CMN. + __ CmpConstant(R1, -0x1000); // CMN. + __ CmpConstant(R0, -0x1001); // MNV+CMP. + __ CmpConstant(R1, -0x1002); // MOVW+CMN. + __ CmpConstant(R0, -0xffff); // MOVW+CMN. + __ CmpConstant(R1, -0x10000); // CMN. + __ CmpConstant(R0, -0x10001); // CMN. + __ CmpConstant(R1, -0x10002); // MVN+CMP. + __ CmpConstant(R0, -0x10003); // MOVW+MOVT+CMP. + + __ CmpConstant(R8, 0); // 32-bit CMP. + __ CmpConstant(R9, 1); // 32-bit CMP. + __ CmpConstant(R8, 7); // 32-bit CMP. + __ CmpConstant(R9, 8); // 32-bit CMP. + __ CmpConstant(R8, 255); // 32-bit CMP. + __ CmpConstant(R9, 256); // 32-bit CMP. + __ CmpConstant(R8, 257); // MNV+CMN + __ CmpConstant(R9, 0xfff); // MOVW+CMP. + __ CmpConstant(R8, 0x1000); // 32-bit CMP. + __ CmpConstant(R9, 0x1001); // MVN+CMN. + __ CmpConstant(R8, 0x1002); // MOVW+CMP. + __ CmpConstant(R9, 0xffff); // MOVW+CMP. + __ CmpConstant(R8, 0x10000); // 32-bit CMP. + __ CmpConstant(R9, 0x10001); // 32-bit CMP. + __ CmpConstant(R8, 0x10002); // MVN+CMN. + __ CmpConstant(R9, 0x10003); // MOVW+MOVT+CMP. + __ CmpConstant(R8, -1); // 32-bit CMP + __ CmpConstant(R9, -7); // CMN. + __ CmpConstant(R8, -8); // CMN. + __ CmpConstant(R9, -255); // CMN. + __ CmpConstant(R8, -256); // CMN. + __ CmpConstant(R9, -257); // MNV+CMP. + __ CmpConstant(R8, -0xfff); // MOVW+CMN. + __ CmpConstant(R9, -0x1000); // CMN. + __ CmpConstant(R8, -0x1001); // MVN+CMP. + __ CmpConstant(R9, -0x1002); // MOVW+CMN. + __ CmpConstant(R8, -0xffff); // MOVW+CMN. + __ CmpConstant(R9, -0x10000); // CMN. + __ CmpConstant(R8, -0x10001); // CMN. + __ CmpConstant(R9, -0x10002); // MVN+CMP. + __ CmpConstant(R8, -0x10003); // MOVW+MOVT+CMP. + + EmitAndCheck(&assembler, "CmpConstant"); +} + #undef __ } // namespace arm } // namespace art diff --git a/compiler/utils/assembler_thumb_test_expected.cc.inc b/compiler/utils/assembler_thumb_test_expected.cc.inc index f07f8c74d7..6736015bf1 100644 --- a/compiler/utils/assembler_thumb_test_expected.cc.inc +++ b/compiler/utils/assembler_thumb_test_expected.cc.inc @@ -1,4 +1,4 @@ -const char* SimpleMovResults[] = { +const char* const SimpleMovResults[] = { " 0: 0008 movs r0, r1\n", " 2: 4608 mov r0, r1\n", " 4: 46c8 mov r8, r9\n", @@ -6,18 +6,18 @@ const char* SimpleMovResults[] = { " 8: f04f 0809 mov.w r8, #9\n", nullptr }; -const char* SimpleMov32Results[] = { +const char* const SimpleMov32Results[] = { " 0: ea4f 0001 mov.w r0, r1\n", " 4: ea4f 0809 mov.w r8, r9\n", nullptr }; -const char* SimpleMovAddResults[] = { +const char* const SimpleMovAddResults[] = { " 0: 4608 mov r0, r1\n", " 2: 1888 adds r0, r1, r2\n", " 4: 1c08 adds r0, r1, #0\n", nullptr }; -const char* DataProcessingRegisterResults[] = { +const char* const DataProcessingRegisterResults[] = { " 0: ea6f 0001 mvn.w r0, r1\n", " 4: eb01 0002 add.w r0, r1, r2\n", " 8: eba1 0002 sub.w r0, r1, r2\n", @@ -129,7 +129,7 @@ const char* DataProcessingRegisterResults[] = { " 120: eb01 0c00 add.w ip, r1, r0\n", nullptr }; -const char* DataProcessingImmediateResults[] = { +const char* const DataProcessingImmediateResults[] = { " 0: 2055 movs r0, #85 ; 0x55\n", " 2: f06f 0055 mvn.w r0, #85 ; 0x55\n", " 6: f101 0055 add.w r0, r1, #85 ; 0x55\n", @@ -154,7 +154,7 @@ const char* DataProcessingImmediateResults[] = { " 48: 1f48 subs r0, r1, #5\n", nullptr }; -const char* DataProcessingModifiedImmediateResults[] = { +const char* const DataProcessingModifiedImmediateResults[] = { " 0: f04f 1055 mov.w r0, #5570645 ; 0x550055\n", " 4: f06f 1055 mvn.w r0, #5570645 ; 0x550055\n", " 8: f101 1055 add.w r0, r1, #5570645 ; 0x550055\n", @@ -173,7 +173,7 @@ const char* DataProcessingModifiedImmediateResults[] = { " 3c: f110 1f55 cmn.w r0, #5570645 ; 0x550055\n", nullptr }; -const char* DataProcessingModifiedImmediatesResults[] = { +const char* const DataProcessingModifiedImmediatesResults[] = { " 0: f04f 1055 mov.w r0, #5570645 ; 0x550055\n", " 4: f04f 2055 mov.w r0, #1426085120 ; 0x55005500\n", " 8: f04f 3055 mov.w r0, #1431655765 ; 0x55555555\n", @@ -183,7 +183,7 @@ const char* DataProcessingModifiedImmediatesResults[] = { " 18: f44f 70d4 mov.w r0, #424 ; 0x1a8\n", nullptr }; -const char* DataProcessingShiftedRegisterResults[] = { +const char* const DataProcessingShiftedRegisterResults[] = { " 0: 0123 lsls r3, r4, #4\n", " 2: 0963 lsrs r3, r4, #5\n", " 4: 11a3 asrs r3, r4, #6\n", @@ -201,7 +201,7 @@ const char* DataProcessingShiftedRegisterResults[] = { " 32: ea5f 0834 movs.w r8, r4, rrx\n", nullptr }; -const char* ShiftImmediateResults[] = { +const char* const ShiftImmediateResults[] = { " 0: 0123 lsls r3, r4, #4\n", " 2: 0963 lsrs r3, r4, #5\n", " 4: 11a3 asrs r3, r4, #6\n", @@ -219,7 +219,7 @@ const char* ShiftImmediateResults[] = { " 32: ea5f 0834 movs.w r8, r4, rrx\n", nullptr }; -const char* BasicLoadResults[] = { +const char* const BasicLoadResults[] = { " 0: 69a3 ldr r3, [r4, #24]\n", " 2: 7e23 ldrb r3, [r4, #24]\n", " 4: 8b23 ldrh r3, [r4, #24]\n", @@ -233,7 +233,7 @@ const char* BasicLoadResults[] = { " 20: f9b4 8018 ldrsh.w r8, [r4, #24]\n", nullptr }; -const char* BasicStoreResults[] = { +const char* const BasicStoreResults[] = { " 0: 61a3 str r3, [r4, #24]\n", " 2: 7623 strb r3, [r4, #24]\n", " 4: 8323 strh r3, [r4, #24]\n", @@ -243,7 +243,7 @@ const char* BasicStoreResults[] = { " 10: f8a4 8018 strh.w r8, [r4, #24]\n", nullptr }; -const char* ComplexLoadResults[] = { +const char* const ComplexLoadResults[] = { " 0: 69a3 ldr r3, [r4, #24]\n", " 2: f854 3f18 ldr.w r3, [r4, #24]!\n", " 6: f854 3b18 ldr.w r3, [r4], #24\n", @@ -276,7 +276,7 @@ const char* ComplexLoadResults[] = { " 6e: f934 3918 ldrsh.w r3, [r4], #-24\n", nullptr }; -const char* ComplexStoreResults[] = { +const char* const ComplexStoreResults[] = { " 0: 61a3 str r3, [r4, #24]\n", " 2: f844 3f18 str.w r3, [r4, #24]!\n", " 6: f844 3b18 str.w r3, [r4], #24\n", @@ -297,7 +297,7 @@ const char* ComplexStoreResults[] = { " 3e: f824 3918 strh.w r3, [r4], #-24\n", nullptr }; -const char* NegativeLoadStoreResults[] = { +const char* const NegativeLoadStoreResults[] = { " 0: f854 3c18 ldr.w r3, [r4, #-24]\n", " 4: f854 3d18 ldr.w r3, [r4, #-24]!\n", " 8: f854 3918 ldr.w r3, [r4], #-24\n", @@ -348,12 +348,12 @@ const char* NegativeLoadStoreResults[] = { " bc: f824 3b18 strh.w r3, [r4], #24\n", nullptr }; -const char* SimpleLoadStoreDualResults[] = { +const char* const SimpleLoadStoreDualResults[] = { " 0: e9c0 2306 strd r2, r3, [r0, #24]\n", " 4: e9d0 2306 ldrd r2, r3, [r0, #24]\n", nullptr }; -const char* ComplexLoadStoreDualResults[] = { +const char* const ComplexLoadStoreDualResults[] = { " 0: e9c0 2306 strd r2, r3, [r0, #24]\n", " 4: e9e0 2306 strd r2, r3, [r0, #24]!\n", " 8: e8e0 2306 strd r2, r3, [r0], #24\n", @@ -368,7 +368,7 @@ const char* ComplexLoadStoreDualResults[] = { " 2c: e870 2306 ldrd r2, r3, [r0], #-24\n", nullptr }; -const char* NegativeLoadStoreDualResults[] = { +const char* const NegativeLoadStoreDualResults[] = { " 0: e940 2306 strd r2, r3, [r0, #-24]\n", " 4: e960 2306 strd r2, r3, [r0, #-24]!\n", " 8: e860 2306 strd r2, r3, [r0], #-24\n", @@ -383,7 +383,7 @@ const char* NegativeLoadStoreDualResults[] = { " 2c: e8f0 2306 ldrd r2, r3, [r0], #24\n", nullptr }; -const char* SimpleBranchResults[] = { +const char* const SimpleBranchResults[] = { " 0: 2002 movs r0, #2\n", " 2: 2101 movs r1, #1\n", " 4: e7fd b.n 2 <SimpleBranch+0x2>\n", @@ -403,7 +403,7 @@ const char* SimpleBranchResults[] = { " 20: 2006 movs r0, #6\n", nullptr }; -const char* LongBranchResults[] = { +const char* const LongBranchResults[] = { " 0: f04f 0002 mov.w r0, #2\n", " 4: f04f 0101 mov.w r1, #1\n", " 8: f7ff bffc b.w 4 <LongBranch+0x4>\n", @@ -423,14 +423,14 @@ const char* LongBranchResults[] = { " 40: f04f 0006 mov.w r0, #6\n", nullptr }; -const char* LoadMultipleResults[] = { +const char* const LoadMultipleResults[] = { " 0: cc09 ldmia r4!, {r0, r3}\n", " 2: e934 4800 ldmdb r4!, {fp, lr}\n", " 6: e914 4800 ldmdb r4, {fp, lr}\n", " a: f854 5b04 ldr.w r5, [r4], #4\n", nullptr }; -const char* StoreMultipleResults[] = { +const char* const StoreMultipleResults[] = { " 0: c409 stmia r4!, {r0, r3}\n", " 2: e8a4 4800 stmia.w r4!, {fp, lr}\n", " 6: e884 4800 stmia.w r4, {fp, lr}\n", @@ -438,7 +438,7 @@ const char* StoreMultipleResults[] = { " e: f844 5d04 str.w r5, [r4, #-4]!\n", nullptr }; -const char* MovWMovTResults[] = { +const char* const MovWMovTResults[] = { " 0: f240 0400 movw r4, #0\n", " 4: f240 0434 movw r4, #52 ; 0x34\n", " 8: f240 0934 movw r9, #52 ; 0x34\n", @@ -449,7 +449,7 @@ const char* MovWMovTResults[] = { " 1c: f6cf 71ff movt r1, #65535 ; 0xffff\n", nullptr }; -const char* SpecialAddSubResults[] = { +const char* const SpecialAddSubResults[] = { " 0: aa14 add r2, sp, #80 ; 0x50\n", " 2: b014 add sp, #80 ; 0x50\n", " 4: f10d 0850 add.w r8, sp, #80 ; 0x50\n", @@ -463,7 +463,7 @@ const char* SpecialAddSubResults[] = { " 22: f6ad 7dfc subw sp, sp, #4092 ; 0xffc\n", nullptr }; -const char* LoadFromOffsetResults[] = { +const char* const LoadFromOffsetResults[] = { " 0: 68e2 ldr r2, [r4, #12]\n", " 2: f8d4 2fff ldr.w r2, [r4, #4095] ; 0xfff\n", " 6: f504 5280 add.w r2, r4, #4096 ; 0x1000\n", @@ -514,7 +514,7 @@ const char* LoadFromOffsetResults[] = { " 9e: f9b4 200c ldrsh.w r2, [r4, #12]\n", nullptr }; -const char* StoreToOffsetResults[] = { +const char* const StoreToOffsetResults[] = { " 0: 60e2 str r2, [r4, #12]\n", " 2: f8c4 2fff str.w r2, [r4, #4095] ; 0xfff\n", " 6: f504 5c80 add.w ip, r4, #4096 ; 0x1000\n", @@ -563,7 +563,7 @@ const char* StoreToOffsetResults[] = { " a4: 7322 strb r2, [r4, #12]\n", nullptr }; -const char* IfThenResults[] = { +const char* const IfThenResults[] = { " 0: bf08 it eq\n", " 2: 2101 moveq r1, #1\n", " 4: bf04 itt eq\n", @@ -587,7 +587,7 @@ const char* IfThenResults[] = { " 28: 2404 movne r4, #4\n", nullptr }; -const char* CbzCbnzResults[] = { +const char* const CbzCbnzResults[] = { " 0: b10a cbz r2, 6 <CbzCbnz+0x6>\n", " 2: 2103 movs r1, #3\n", " 4: 2203 movs r2, #3\n", @@ -598,7 +598,7 @@ const char* CbzCbnzResults[] = { " 10: 2204 movs r2, #4\n", nullptr }; -const char* MultiplyResults[] = { +const char* const MultiplyResults[] = { " 0: 4348 muls r0, r1\n", " 2: fb01 f002 mul.w r0, r1, r2\n", " 6: fb09 f808 mul.w r8, r9, r8\n", @@ -611,21 +611,21 @@ const char* MultiplyResults[] = { " 22: fbaa 890b umull r8, r9, sl, fp\n", nullptr }; -const char* DivideResults[] = { +const char* const DivideResults[] = { " 0: fb91 f0f2 sdiv r0, r1, r2\n", " 4: fb99 f8fa sdiv r8, r9, sl\n", " 8: fbb1 f0f2 udiv r0, r1, r2\n", " c: fbb9 f8fa udiv r8, r9, sl\n", nullptr }; -const char* VMovResults[] = { +const char* const VMovResults[] = { " 0: eef7 0a00 vmov.f32 s1, #112 ; 0x70\n", " 4: eeb7 1b00 vmov.f64 d1, #112 ; 0x70\n", " 8: eef0 0a41 vmov.f32 s1, s2\n", " c: eeb0 1b42 vmov.f64 d1, d2\n", nullptr }; -const char* BasicFloatingPointResults[] = { +const char* const BasicFloatingPointResults[] = { " 0: ee30 0a81 vadd.f32 s0, s1, s2\n", " 4: ee30 0ac1 vsub.f32 s0, s1, s2\n", " 8: ee20 0a81 vmul.f32 s0, s1, s2\n", @@ -646,7 +646,7 @@ const char* BasicFloatingPointResults[] = { " 44: eeb1 0bc1 vsqrt.f64 d0, d1\n", nullptr }; -const char* FloatingPointConversionsResults[] = { +const char* const FloatingPointConversionsResults[] = { " 0: eeb7 1bc2 vcvt.f32.f64 s2, d2\n", " 4: eeb7 2ac1 vcvt.f64.f32 d2, s2\n", " 8: eefd 0ac1 vcvt.s32.f32 s1, s2\n", @@ -659,35 +659,35 @@ const char* FloatingPointConversionsResults[] = { " 24: eeb8 1b41 vcvt.f64.u32 d1, s2\n", nullptr }; -const char* FloatingPointComparisonsResults[] = { +const char* const FloatingPointComparisonsResults[] = { " 0: eeb4 0a60 vcmp.f32 s0, s1\n", " 4: eeb4 0b41 vcmp.f64 d0, d1\n", " 8: eeb5 1a40 vcmp.f32 s2, #0.0\n", " c: eeb5 2b40 vcmp.f64 d2, #0.0\n", nullptr }; -const char* CallsResults[] = { +const char* const CallsResults[] = { " 0: 47f0 blx lr\n", " 2: 4770 bx lr\n", nullptr }; -const char* BreakpointResults[] = { +const char* const BreakpointResults[] = { " 0: be00 bkpt 0x0000\n", nullptr }; -const char* StrR1Results[] = { +const char* const StrR1Results[] = { " 0: 9111 str r1, [sp, #68] ; 0x44\n", " 2: f8cd 142c str.w r1, [sp, #1068] ; 0x42c\n", nullptr }; -const char* VPushPopResults[] = { +const char* const VPushPopResults[] = { " 0: ed2d 1a04 vpush {s2-s5}\n", " 4: ed2d 2b08 vpush {d2-d5}\n", " 8: ecbd 1a04 vpop {s2-s5}\n", " c: ecbd 2b08 vpop {d2-d5}\n", nullptr }; -const char* Max16BitBranchResults[] = { +const char* const Max16BitBranchResults[] = { " 0: e3ff b.n 802 <Max16BitBranch+0x802>\n", " 2: 2300 movs r3, #0\n", " 4: 2302 movs r3, #2\n", @@ -1716,7 +1716,7 @@ const char* Max16BitBranchResults[] = { " 802: 4611 mov r1, r2\n", nullptr }; -const char* Branch32Results[] = { +const char* const Branch32Results[] = { " 0: f000 bc01 b.w 806 <Branch32+0x806>\n", " 4: 2300 movs r3, #0\n", " 6: 2302 movs r3, #2\n", @@ -2746,7 +2746,7 @@ const char* Branch32Results[] = { " 806: 4611 mov r1, r2\n", nullptr }; -const char* CompareAndBranchMaxResults[] = { +const char* const CompareAndBranchMaxResults[] = { " 0: b3fc cbz r4, 82 <CompareAndBranchMax+0x82>\n", " 2: 2300 movs r3, #0\n", " 4: 2302 movs r3, #2\n", @@ -2815,7 +2815,7 @@ const char* CompareAndBranchMaxResults[] = { " 82: 4611 mov r1, r2\n", nullptr }; -const char* CompareAndBranchRelocation16Results[] = { +const char* const CompareAndBranchRelocation16Results[] = { " 0: 2c00 cmp r4, #0\n", " 2: d040 beq.n 86 <CompareAndBranchRelocation16+0x86>\n", " 4: 2300 movs r3, #0\n", @@ -2886,7 +2886,7 @@ const char* CompareAndBranchRelocation16Results[] = { " 86: 4611 mov r1, r2\n", nullptr }; -const char* CompareAndBranchRelocation32Results[] = { +const char* const CompareAndBranchRelocation32Results[] = { " 0: 2c00 cmp r4, #0\n", " 2: f000 8401 beq.w 808 <CompareAndBranchRelocation32+0x808>\n", " 6: 2300 movs r3, #0\n", @@ -3917,7 +3917,7 @@ const char* CompareAndBranchRelocation32Results[] = { " 808: 4611 mov r1, r2\n", nullptr }; -const char* MixedBranch32Results[] = { +const char* const MixedBranch32Results[] = { " 0: f000 bc03 b.w 80a <MixedBranch32+0x80a>\n", " 4: 2300 movs r3, #0\n", " 6: 2302 movs r3, #2\n", @@ -4948,7 +4948,7 @@ const char* MixedBranch32Results[] = { " 80a: 4611 mov r1, r2\n", nullptr }; -const char* ShiftsResults[] = { +const char* const ShiftsResults[] = { " 0: 0148 lsls r0, r1, #5\n", " 2: 0948 lsrs r0, r1, #5\n", " 4: 1148 asrs r0, r1, #5\n", @@ -4997,7 +4997,7 @@ const char* ShiftsResults[] = { " 98: fa51 f008 asrs.w r0, r1, r8\n", nullptr }; -const char* LoadStoreRegOffsetResults[] = { +const char* const LoadStoreRegOffsetResults[] = { " 0: 5888 ldr r0, [r1, r2]\n", " 2: 5088 str r0, [r1, r2]\n", " 4: f851 0012 ldr.w r0, [r1, r2, lsl #1]\n", @@ -5012,7 +5012,7 @@ const char* LoadStoreRegOffsetResults[] = { " 28: f841 0008 str.w r0, [r1, r8]\n", nullptr }; -const char* LoadStoreLiteralResults[] = { +const char* const LoadStoreLiteralResults[] = { " 0: 4801 ldr r0, [pc, #4] ; (8 <LoadStoreLiteral+0x8>)\n", " 2: f8cf 0004 str.w r0, [pc, #4] ; 8 <LoadStoreLiteral+0x8>\n", " 6: f85f 0008 ldr.w r0, [pc, #-8] ; 0 <LoadStoreLiteral>\n", @@ -5023,7 +5023,7 @@ const char* LoadStoreLiteralResults[] = { " 18: f8cf 07ff str.w r0, [pc, #2047] ; 81b <LoadStoreLiteral+0x81b>\n", nullptr }; -const char* LoadStoreLimitsResults[] = { +const char* const LoadStoreLimitsResults[] = { " 0: 6fe0 ldr r0, [r4, #124] ; 0x7c\n", " 2: f8d4 0080 ldr.w r0, [r4, #128] ; 0x80\n", " 6: 7fe0 ldrb r0, [r4, #31]\n", @@ -5042,7 +5042,7 @@ const char* LoadStoreLimitsResults[] = { " 30: f8a4 0040 strh.w r0, [r4, #64] ; 0x40\n", nullptr }; -const char* CompareAndBranchResults[] = { +const char* const CompareAndBranchResults[] = { " 0: b130 cbz r0, 10 <CompareAndBranch+0x10>\n", " 2: f1bb 0f00 cmp.w fp, #0\n", " 6: d003 beq.n 10 <CompareAndBranch+0x10>\n", @@ -5052,7 +5052,7 @@ const char* CompareAndBranchResults[] = { nullptr }; -const char* AddConstantResults[] = { +const char* const AddConstantResults[] = { " 0: 4608 mov r0, r1\n", " 2: 1c48 adds r0, r1, #1\n", " 4: 1dc8 adds r0, r1, #7\n", @@ -5370,6 +5370,104 @@ const char* AddConstantResults[] = { nullptr }; +const char* const CmpConstantResults[] = { + " 0: 2800 cmp r0, #0\n", + " 2: 2901 cmp r1, #1\n", + " 4: 2807 cmp r0, #7\n", + " 6: 2908 cmp r1, #8\n", + " 8: 28ff cmp r0, #255 ; 0xff\n", + " a: f5b1 7f80 cmp.w r1, #256 ; 0x100\n", + " e: f46f 7c80 mvn.w ip, #256 ; 0x100\n", + " 12: eb10 0f0c cmn.w r0, ip\n", + " 16: f640 7cff movw ip, #4095 ; 0xfff\n", + " 1a: 4561 cmp r1, ip\n", + " 1c: f5b0 5f80 cmp.w r0, #4096 ; 0x1000\n", + " 20: f46f 5c80 mvn.w ip, #4096 ; 0x1000\n", + " 24: eb11 0f0c cmn.w r1, ip\n", + " 28: f241 0c02 movw ip, #4098 ; 0x1002\n", + " 2c: 4560 cmp r0, ip\n", + " 2e: f64f 7cff movw ip, #65535 ; 0xffff\n", + " 32: 4561 cmp r1, ip\n", + " 34: f5b0 3f80 cmp.w r0, #65536 ; 0x10000\n", + " 38: f1b1 1f01 cmp.w r1, #65537 ; 0x10001\n", + " 3c: f06f 1c01 mvn.w ip, #65537 ; 0x10001\n", + " 40: eb10 0f0c cmn.w r0, ip\n", + " 44: f240 0c03 movw ip, #3\n", + " 48: f2c0 0c01 movt ip, #1\n", + " 4c: 4561 cmp r1, ip\n", + " 4e: f1b0 3fff cmp.w r0, #4294967295 ; 0xffffffff\n", + " 52: f111 0f07 cmn.w r1, #7\n", + " 56: f110 0f08 cmn.w r0, #8\n", + " 5a: f111 0fff cmn.w r1, #255 ; 0xff\n", + " 5e: f510 7f80 cmn.w r0, #256 ; 0x100\n", + " 62: f46f 7c80 mvn.w ip, #256 ; 0x100\n", + " 66: 4561 cmp r1, ip\n", + " 68: f640 7cff movw ip, #4095 ; 0xfff\n", + " 6c: eb10 0f0c cmn.w r0, ip\n", + " 70: f511 5f80 cmn.w r1, #4096 ; 0x1000\n", + " 74: f46f 5c80 mvn.w ip, #4096 ; 0x1000\n", + " 78: 4560 cmp r0, ip\n", + " 7a: f241 0c02 movw ip, #4098 ; 0x1002\n", + " 7e: eb11 0f0c cmn.w r1, ip\n", + " 82: f64f 7cff movw ip, #65535 ; 0xffff\n", + " 86: eb10 0f0c cmn.w r0, ip\n", + " 8a: f511 3f80 cmn.w r1, #65536 ; 0x10000\n", + " 8e: f110 1f01 cmn.w r0, #65537 ; 0x10001\n", + " 92: f06f 1c01 mvn.w ip, #65537 ; 0x10001\n", + " 96: 4561 cmp r1, ip\n", + " 98: f64f 7cfd movw ip, #65533 ; 0xfffd\n", + " 9c: f6cf 7cfe movt ip, #65534 ; 0xfffe\n", + " a0: 4560 cmp r0, ip\n", + " a2: f1b8 0f00 cmp.w r8, #0\n", + " a6: f1b9 0f01 cmp.w r9, #1\n", + " aa: f1b8 0f07 cmp.w r8, #7\n", + " ae: f1b9 0f08 cmp.w r9, #8\n", + " b2: f1b8 0fff cmp.w r8, #255 ; 0xff\n", + " b6: f5b9 7f80 cmp.w r9, #256 ; 0x100\n", + " ba: f46f 7c80 mvn.w ip, #256 ; 0x100\n", + " be: eb18 0f0c cmn.w r8, ip\n", + " c2: f640 7cff movw ip, #4095 ; 0xfff\n", + " c6: 45e1 cmp r9, ip\n", + " c8: f5b8 5f80 cmp.w r8, #4096 ; 0x1000\n", + " cc: f46f 5c80 mvn.w ip, #4096 ; 0x1000\n", + " d0: eb19 0f0c cmn.w r9, ip\n", + " d4: f241 0c02 movw ip, #4098 ; 0x1002\n", + " d8: 45e0 cmp r8, ip\n", + " da: f64f 7cff movw ip, #65535 ; 0xffff\n", + " de: 45e1 cmp r9, ip\n", + " e0: f5b8 3f80 cmp.w r8, #65536 ; 0x10000\n", + " e4: f1b9 1f01 cmp.w r9, #65537 ; 0x10001\n", + " e8: f06f 1c01 mvn.w ip, #65537 ; 0x10001\n", + " ec: eb18 0f0c cmn.w r8, ip\n", + " f0: f240 0c03 movw ip, #3\n", + " f4: f2c0 0c01 movt ip, #1\n", + " f8: 45e1 cmp r9, ip\n", + " fa: f1b8 3fff cmp.w r8, #4294967295 ; 0xffffffff\n", + " fe: f119 0f07 cmn.w r9, #7\n", + " 102: f118 0f08 cmn.w r8, #8\n", + " 106: f119 0fff cmn.w r9, #255 ; 0xff\n", + " 10a: f518 7f80 cmn.w r8, #256 ; 0x100\n", + " 10e: f46f 7c80 mvn.w ip, #256 ; 0x100\n", + " 112: 45e1 cmp r9, ip\n", + " 114: f640 7cff movw ip, #4095 ; 0xfff\n", + " 118: eb18 0f0c cmn.w r8, ip\n", + " 11c: f519 5f80 cmn.w r9, #4096 ; 0x1000\n", + " 120: f46f 5c80 mvn.w ip, #4096 ; 0x1000\n", + " 124: 45e0 cmp r8, ip\n", + " 126: f241 0c02 movw ip, #4098 ; 0x1002\n", + " 12a: eb19 0f0c cmn.w r9, ip\n", + " 12e: f64f 7cff movw ip, #65535 ; 0xffff\n", + " 132: eb18 0f0c cmn.w r8, ip\n", + " 136: f519 3f80 cmn.w r9, #65536 ; 0x10000\n", + " 13a: f118 1f01 cmn.w r8, #65537 ; 0x10001\n", + " 13e: f06f 1c01 mvn.w ip, #65537 ; 0x10001\n", + " 142: 45e1 cmp r9, ip\n", + " 144: f64f 7cfd movw ip, #65533 ; 0xfffd\n", + " 148: f6cf 7cfe movt ip, #65534 ; 0xfffe\n", + " 14c: 45e0 cmp r8, ip\n", + nullptr +}; + std::map<std::string, const char* const*> test_results; void setup_results() { test_results["SimpleMov"] = SimpleMovResults; @@ -5421,4 +5519,5 @@ void setup_results() { test_results["LoadStoreLimits"] = LoadStoreLimitsResults; test_results["CompareAndBranch"] = CompareAndBranchResults; test_results["AddConstant"] = AddConstantResults; + test_results["CmpConstant"] = CmpConstantResults; } diff --git a/compiler/utils/mips/assembler_mips.cc b/compiler/utils/mips/assembler_mips.cc index afca8adcbb..ac9c097892 100644 --- a/compiler/utils/mips/assembler_mips.cc +++ b/compiler/utils/mips/assembler_mips.cc @@ -400,6 +400,20 @@ void MipsAssembler::Srav(Register rd, Register rt, Register rs) { EmitR(0, rs, rt, rd, 0, 0x07); } +void MipsAssembler::Ext(Register rd, Register rt, int pos, int size) { + CHECK(IsUint<5>(pos)) << pos; + CHECK(0 < size && size <= 32) << size; + CHECK(0 < pos + size && pos + size <= 32) << pos << " + " << size; + EmitR(0x1f, rt, rd, static_cast<Register>(size - 1), pos, 0x00); +} + +void MipsAssembler::Ins(Register rd, Register rt, int pos, int size) { + CHECK(IsUint<5>(pos)) << pos; + CHECK(0 < size && size <= 32) << size; + CHECK(0 < pos + size && pos + size <= 32) << pos << " + " << size; + EmitR(0x1f, rt, rd, static_cast<Register>(pos + size - 1), pos, 0x04); +} + void MipsAssembler::Lb(Register rt, Register rs, uint16_t imm16) { EmitI(0x20, rs, rt, imm16); } @@ -1021,6 +1035,22 @@ void MipsAssembler::Movt(Register rd, Register rs, int cc) { EmitR(0, rs, static_cast<Register>((cc << 2) | 1), rd, 0, 0x01); } +void MipsAssembler::TruncLS(FRegister fd, FRegister fs) { + EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x09); +} + +void MipsAssembler::TruncLD(FRegister fd, FRegister fs) { + EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0x09); +} + +void MipsAssembler::TruncWS(FRegister fd, FRegister fs) { + EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x0D); +} + +void MipsAssembler::TruncWD(FRegister fd, FRegister fs) { + EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0x0D); +} + void MipsAssembler::Cvtsw(FRegister fd, FRegister fs) { EmitFR(0x11, 0x14, static_cast<FRegister>(0), fs, fd, 0x20); } @@ -1037,6 +1067,14 @@ void MipsAssembler::Cvtds(FRegister fd, FRegister fs) { EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x21); } +void MipsAssembler::Cvtsl(FRegister fd, FRegister fs) { + EmitFR(0x11, 0x15, static_cast<FRegister>(0), fs, fd, 0x20); +} + +void MipsAssembler::Cvtdl(FRegister fd, FRegister fs) { + EmitFR(0x11, 0x15, static_cast<FRegister>(0), fs, fd, 0x21); +} + void MipsAssembler::Mfc1(Register rt, FRegister fs) { EmitFR(0x11, 0x00, static_cast<FRegister>(rt), fs, static_cast<FRegister>(0), 0x0); } @@ -1053,6 +1091,24 @@ void MipsAssembler::Mthc1(Register rt, FRegister fs) { EmitFR(0x11, 0x07, static_cast<FRegister>(rt), fs, static_cast<FRegister>(0), 0x0); } +void MipsAssembler::MoveFromFpuHigh(Register rt, FRegister fs) { + if (Is32BitFPU()) { + CHECK_EQ(fs % 2, 0) << fs; + Mfc1(rt, static_cast<FRegister>(fs + 1)); + } else { + Mfhc1(rt, fs); + } +} + +void MipsAssembler::MoveToFpuHigh(Register rt, FRegister fs) { + if (Is32BitFPU()) { + CHECK_EQ(fs % 2, 0) << fs; + Mtc1(rt, static_cast<FRegister>(fs + 1)); + } else { + Mthc1(rt, fs); + } +} + void MipsAssembler::Lwc1(FRegister ft, Register rs, uint16_t imm16) { EmitI(0x31, rs, static_cast<Register>(ft), imm16); } @@ -1121,8 +1177,14 @@ void MipsAssembler::LoadConst32(Register rd, int32_t value) { } void MipsAssembler::LoadConst64(Register reg_hi, Register reg_lo, int64_t value) { - LoadConst32(reg_lo, Low32Bits(value)); - LoadConst32(reg_hi, High32Bits(value)); + uint32_t low = Low32Bits(value); + uint32_t high = High32Bits(value); + LoadConst32(reg_lo, low); + if (high != low) { + LoadConst32(reg_hi, high); + } else { + Move(reg_hi, reg_lo); + } } void MipsAssembler::StoreConst32ToOffset(int32_t value, @@ -1136,7 +1198,11 @@ void MipsAssembler::StoreConst32ToOffset(int32_t value, base = AT; offset = 0; } - LoadConst32(temp, value); + if (value == 0) { + temp = ZERO; + } else { + LoadConst32(temp, value); + } Sw(temp, base, offset); } @@ -1152,22 +1218,48 @@ void MipsAssembler::StoreConst64ToOffset(int64_t value, base = AT; offset = 0; } - LoadConst32(temp, Low32Bits(value)); - Sw(temp, base, offset); - LoadConst32(temp, High32Bits(value)); - Sw(temp, base, offset + kMipsWordSize); + uint32_t low = Low32Bits(value); + uint32_t high = High32Bits(value); + if (low == 0) { + Sw(ZERO, base, offset); + } else { + LoadConst32(temp, low); + Sw(temp, base, offset); + } + if (high == 0) { + Sw(ZERO, base, offset + kMipsWordSize); + } else { + if (high != low) { + LoadConst32(temp, high); + } + Sw(temp, base, offset + kMipsWordSize); + } } void MipsAssembler::LoadSConst32(FRegister r, int32_t value, Register temp) { - LoadConst32(temp, value); + if (value == 0) { + temp = ZERO; + } else { + LoadConst32(temp, value); + } Mtc1(temp, r); } void MipsAssembler::LoadDConst64(FRegister rd, int64_t value, Register temp) { - LoadConst32(temp, Low32Bits(value)); - Mtc1(temp, rd); - LoadConst32(temp, High32Bits(value)); - Mthc1(temp, rd); + uint32_t low = Low32Bits(value); + uint32_t high = High32Bits(value); + if (low == 0) { + Mtc1(ZERO, rd); + } else { + LoadConst32(temp, low); + Mtc1(temp, rd); + } + if (high == 0) { + MoveToFpuHigh(ZERO, rd); + } else { + LoadConst32(temp, high); + MoveToFpuHigh(temp, rd); + } } void MipsAssembler::Addiu32(Register rt, Register rs, int32_t value, Register temp) { diff --git a/compiler/utils/mips/assembler_mips.h b/compiler/utils/mips/assembler_mips.h index f569aa858c..01c6490f88 100644 --- a/compiler/utils/mips/assembler_mips.h +++ b/compiler/utils/mips/assembler_mips.h @@ -156,6 +156,8 @@ class MipsAssembler FINAL : public Assembler { void Srlv(Register rd, Register rt, Register rs); void Rotrv(Register rd, Register rt, Register rs); // R2+ void Srav(Register rd, Register rt, Register rs); + void Ext(Register rd, Register rt, int pos, int size); // R2+ + void Ins(Register rd, Register rt, int pos, int size); // R2+ void Lb(Register rt, Register rs, uint16_t imm16); void Lh(Register rt, Register rs, uint16_t imm16); @@ -263,15 +265,23 @@ class MipsAssembler FINAL : public Assembler { void Movf(Register rd, Register rs, int cc); // R2 void Movt(Register rd, Register rs, int cc); // R2 + void TruncLS(FRegister fd, FRegister fs); // R2+, FR=1 + void TruncLD(FRegister fd, FRegister fs); // R2+, FR=1 + void TruncWS(FRegister fd, FRegister fs); + void TruncWD(FRegister fd, FRegister fs); void Cvtsw(FRegister fd, FRegister fs); void Cvtdw(FRegister fd, FRegister fs); void Cvtsd(FRegister fd, FRegister fs); void Cvtds(FRegister fd, FRegister fs); + void Cvtsl(FRegister fd, FRegister fs); // R2+, FR=1 + void Cvtdl(FRegister fd, FRegister fs); // R2+, FR=1 void Mfc1(Register rt, FRegister fs); void Mtc1(Register rt, FRegister fs); void Mfhc1(Register rt, FRegister fs); void Mthc1(Register rt, FRegister fs); + void MoveFromFpuHigh(Register rt, FRegister fs); + void MoveToFpuHigh(Register rt, FRegister fs); void Lwc1(FRegister ft, Register rs, uint16_t imm16); void Ldc1(FRegister ft, Register rs, uint16_t imm16); void Swc1(FRegister ft, Register rs, uint16_t imm16); diff --git a/compiler/utils/mips/assembler_mips_test.cc b/compiler/utils/mips/assembler_mips_test.cc index 6f8b3e8c57..5fc3deebd3 100644 --- a/compiler/utils/mips/assembler_mips_test.cc +++ b/compiler/utils/mips/assembler_mips_test.cc @@ -367,6 +367,44 @@ TEST_F(AssemblerMIPSTest, Srav) { DriverStr(RepeatRRR(&mips::MipsAssembler::Srav, "srav ${reg1}, ${reg2}, ${reg3}"), "Srav"); } +TEST_F(AssemblerMIPSTest, Ins) { + std::vector<mips::Register*> regs = GetRegisters(); + WarnOnCombinations(regs.size() * regs.size() * 33 * 16); + std::string expected; + for (mips::Register* reg1 : regs) { + for (mips::Register* reg2 : regs) { + for (int32_t pos = 0; pos < 32; pos++) { + for (int32_t size = 1; pos + size <= 32; size++) { + __ Ins(*reg1, *reg2, pos, size); + std::ostringstream instr; + instr << "ins $" << *reg1 << ", $" << *reg2 << ", " << pos << ", " << size << "\n"; + expected += instr.str(); + } + } + } + } + DriverStr(expected, "Ins"); +} + +TEST_F(AssemblerMIPSTest, Ext) { + std::vector<mips::Register*> regs = GetRegisters(); + WarnOnCombinations(regs.size() * regs.size() * 33 * 16); + std::string expected; + for (mips::Register* reg1 : regs) { + for (mips::Register* reg2 : regs) { + for (int32_t pos = 0; pos < 32; pos++) { + for (int32_t size = 1; pos + size <= 32; size++) { + __ Ext(*reg1, *reg2, pos, size); + std::ostringstream instr; + instr << "ext $" << *reg1 << ", $" << *reg2 << ", " << pos << ", " << size << "\n"; + expected += instr.str(); + } + } + } + } + DriverStr(expected, "Ext"); +} + TEST_F(AssemblerMIPSTest, Lb) { DriverStr(RepeatRRIb(&mips::MipsAssembler::Lb, -16, "lb ${reg1}, {imm}(${reg2})"), "Lb"); } @@ -561,6 +599,14 @@ TEST_F(AssemblerMIPSTest, CvtDW) { DriverStr(RepeatFF(&mips::MipsAssembler::Cvtdw, "cvt.d.w ${reg1}, ${reg2}"), "CvtDW"); } +TEST_F(AssemblerMIPSTest, CvtSL) { + DriverStr(RepeatFF(&mips::MipsAssembler::Cvtsl, "cvt.s.l ${reg1}, ${reg2}"), "CvtSL"); +} + +TEST_F(AssemblerMIPSTest, CvtDL) { + DriverStr(RepeatFF(&mips::MipsAssembler::Cvtdl, "cvt.d.l ${reg1}, ${reg2}"), "CvtDL"); +} + TEST_F(AssemblerMIPSTest, CvtSD) { DriverStr(RepeatFF(&mips::MipsAssembler::Cvtsd, "cvt.s.d ${reg1}, ${reg2}"), "CvtSD"); } @@ -569,6 +615,22 @@ TEST_F(AssemblerMIPSTest, CvtDS) { DriverStr(RepeatFF(&mips::MipsAssembler::Cvtds, "cvt.d.s ${reg1}, ${reg2}"), "CvtDS"); } +TEST_F(AssemblerMIPSTest, TruncWS) { + DriverStr(RepeatFF(&mips::MipsAssembler::TruncWS, "trunc.w.s ${reg1}, ${reg2}"), "TruncWS"); +} + +TEST_F(AssemblerMIPSTest, TruncWD) { + DriverStr(RepeatFF(&mips::MipsAssembler::TruncWD, "trunc.w.d ${reg1}, ${reg2}"), "TruncWD"); +} + +TEST_F(AssemblerMIPSTest, TruncLS) { + DriverStr(RepeatFF(&mips::MipsAssembler::TruncLS, "trunc.l.s ${reg1}, ${reg2}"), "TruncLS"); +} + +TEST_F(AssemblerMIPSTest, TruncLD) { + DriverStr(RepeatFF(&mips::MipsAssembler::TruncLD, "trunc.l.d ${reg1}, ${reg2}"), "TruncLD"); +} + TEST_F(AssemblerMIPSTest, Mfc1) { DriverStr(RepeatRF(&mips::MipsAssembler::Mfc1, "mfc1 ${reg1}, ${reg2}"), "Mfc1"); } diff --git a/compiler/utils/mips64/assembler_mips64.cc b/compiler/utils/mips64/assembler_mips64.cc index 107d5bb572..f9ff2df8bb 100644 --- a/compiler/utils/mips64/assembler_mips64.cc +++ b/compiler/utils/mips64/assembler_mips64.cc @@ -616,6 +616,14 @@ void Mips64Assembler::Bnezc(GpuRegister rs, uint32_t imm21) { EmitI21(0x3E, rs, imm21); } +void Mips64Assembler::Bc1eqz(FpuRegister ft, uint16_t imm16) { + EmitFI(0x11, 0x9, ft, imm16); +} + +void Mips64Assembler::Bc1nez(FpuRegister ft, uint16_t imm16) { + EmitFI(0x11, 0xD, ft, imm16); +} + void Mips64Assembler::EmitBcondc(BranchCondition cond, GpuRegister rs, GpuRegister rt, @@ -669,6 +677,14 @@ void Mips64Assembler::EmitBcondc(BranchCondition cond, case kCondGEU: Bgeuc(rs, rt, imm16_21); break; + case kCondF: + CHECK_EQ(rt, ZERO); + Bc1eqz(static_cast<FpuRegister>(rs), imm16_21); + break; + case kCondT: + CHECK_EQ(rt, ZERO); + Bc1nez(static_cast<FpuRegister>(rs), imm16_21); + break; case kUncond: LOG(FATAL) << "Unexpected branch condition " << cond; UNREACHABLE(); @@ -755,6 +771,22 @@ void Mips64Assembler::RoundWD(FpuRegister fd, FpuRegister fs) { EmitFR(0x11, 0x11, static_cast<FpuRegister>(0), fs, fd, 0xc); } +void Mips64Assembler::TruncLS(FpuRegister fd, FpuRegister fs) { + EmitFR(0x11, 0x10, static_cast<FpuRegister>(0), fs, fd, 0x9); +} + +void Mips64Assembler::TruncLD(FpuRegister fd, FpuRegister fs) { + EmitFR(0x11, 0x11, static_cast<FpuRegister>(0), fs, fd, 0x9); +} + +void Mips64Assembler::TruncWS(FpuRegister fd, FpuRegister fs) { + EmitFR(0x11, 0x10, static_cast<FpuRegister>(0), fs, fd, 0xd); +} + +void Mips64Assembler::TruncWD(FpuRegister fd, FpuRegister fs) { + EmitFR(0x11, 0x11, static_cast<FpuRegister>(0), fs, fd, 0xd); +} + void Mips64Assembler::CeilLS(FpuRegister fd, FpuRegister fs) { EmitFR(0x11, 0x10, static_cast<FpuRegister>(0), fs, fd, 0xa); } @@ -827,6 +859,86 @@ void Mips64Assembler::MaxD(FpuRegister fd, FpuRegister fs, FpuRegister ft) { EmitFR(0x11, 0x11, ft, fs, fd, 0x1e); } +void Mips64Assembler::CmpUnS(FpuRegister fd, FpuRegister fs, FpuRegister ft) { + EmitFR(0x11, 0x14, ft, fs, fd, 0x01); +} + +void Mips64Assembler::CmpEqS(FpuRegister fd, FpuRegister fs, FpuRegister ft) { + EmitFR(0x11, 0x14, ft, fs, fd, 0x02); +} + +void Mips64Assembler::CmpUeqS(FpuRegister fd, FpuRegister fs, FpuRegister ft) { + EmitFR(0x11, 0x14, ft, fs, fd, 0x03); +} + +void Mips64Assembler::CmpLtS(FpuRegister fd, FpuRegister fs, FpuRegister ft) { + EmitFR(0x11, 0x14, ft, fs, fd, 0x04); +} + +void Mips64Assembler::CmpUltS(FpuRegister fd, FpuRegister fs, FpuRegister ft) { + EmitFR(0x11, 0x14, ft, fs, fd, 0x05); +} + +void Mips64Assembler::CmpLeS(FpuRegister fd, FpuRegister fs, FpuRegister ft) { + EmitFR(0x11, 0x14, ft, fs, fd, 0x06); +} + +void Mips64Assembler::CmpUleS(FpuRegister fd, FpuRegister fs, FpuRegister ft) { + EmitFR(0x11, 0x14, ft, fs, fd, 0x07); +} + +void Mips64Assembler::CmpOrS(FpuRegister fd, FpuRegister fs, FpuRegister ft) { + EmitFR(0x11, 0x14, ft, fs, fd, 0x11); +} + +void Mips64Assembler::CmpUneS(FpuRegister fd, FpuRegister fs, FpuRegister ft) { + EmitFR(0x11, 0x14, ft, fs, fd, 0x12); +} + +void Mips64Assembler::CmpNeS(FpuRegister fd, FpuRegister fs, FpuRegister ft) { + EmitFR(0x11, 0x14, ft, fs, fd, 0x13); +} + +void Mips64Assembler::CmpUnD(FpuRegister fd, FpuRegister fs, FpuRegister ft) { + EmitFR(0x11, 0x15, ft, fs, fd, 0x01); +} + +void Mips64Assembler::CmpEqD(FpuRegister fd, FpuRegister fs, FpuRegister ft) { + EmitFR(0x11, 0x15, ft, fs, fd, 0x02); +} + +void Mips64Assembler::CmpUeqD(FpuRegister fd, FpuRegister fs, FpuRegister ft) { + EmitFR(0x11, 0x15, ft, fs, fd, 0x03); +} + +void Mips64Assembler::CmpLtD(FpuRegister fd, FpuRegister fs, FpuRegister ft) { + EmitFR(0x11, 0x15, ft, fs, fd, 0x04); +} + +void Mips64Assembler::CmpUltD(FpuRegister fd, FpuRegister fs, FpuRegister ft) { + EmitFR(0x11, 0x15, ft, fs, fd, 0x05); +} + +void Mips64Assembler::CmpLeD(FpuRegister fd, FpuRegister fs, FpuRegister ft) { + EmitFR(0x11, 0x15, ft, fs, fd, 0x06); +} + +void Mips64Assembler::CmpUleD(FpuRegister fd, FpuRegister fs, FpuRegister ft) { + EmitFR(0x11, 0x15, ft, fs, fd, 0x07); +} + +void Mips64Assembler::CmpOrD(FpuRegister fd, FpuRegister fs, FpuRegister ft) { + EmitFR(0x11, 0x15, ft, fs, fd, 0x11); +} + +void Mips64Assembler::CmpUneD(FpuRegister fd, FpuRegister fs, FpuRegister ft) { + EmitFR(0x11, 0x15, ft, fs, fd, 0x12); +} + +void Mips64Assembler::CmpNeD(FpuRegister fd, FpuRegister fs, FpuRegister ft) { + EmitFR(0x11, 0x15, ft, fs, fd, 0x13); +} + void Mips64Assembler::Cvtsw(FpuRegister fd, FpuRegister fs) { EmitFR(0x11, 0x14, static_cast<FpuRegister>(0), fs, fd, 0x20); } @@ -1134,6 +1246,10 @@ Mips64Assembler::Branch::Branch(uint32_t location, CHECK_NE(lhs_reg, ZERO); CHECK_EQ(rhs_reg, ZERO); break; + case kCondF: + case kCondT: + CHECK_EQ(rhs_reg, ZERO); + break; case kUncond: UNREACHABLE(); } @@ -1188,6 +1304,10 @@ Mips64Assembler::BranchCondition Mips64Assembler::Branch::OppositeCondition( return kCondGEU; case kCondGEU: return kCondLTU; + case kCondF: + return kCondT; + case kCondT: + return kCondF; case kUncond: LOG(FATAL) << "Unexpected branch condition " << cond; } @@ -1567,7 +1687,7 @@ void Mips64Assembler::EmitBranch(Mips64Assembler::Branch* branch) { case Branch::kCondBranch: CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); EmitBcondc(condition, lhs, rhs, offset); - Nop(); // TODO: improve by filling the forbidden slot. + Nop(); // TODO: improve by filling the forbidden/delay slot. break; case Branch::kCall: CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); @@ -1657,6 +1777,14 @@ void Mips64Assembler::Bnezc(GpuRegister rs, Mips64Label* label) { Bcond(label, kCondNEZ, rs); } +void Mips64Assembler::Bc1eqz(FpuRegister ft, Mips64Label* label) { + Bcond(label, kCondF, static_cast<GpuRegister>(ft), ZERO); +} + +void Mips64Assembler::Bc1nez(FpuRegister ft, Mips64Label* label) { + Bcond(label, kCondT, static_cast<GpuRegister>(ft), ZERO); +} + void Mips64Assembler::LoadFromOffset(LoadOperandType type, GpuRegister reg, GpuRegister base, int32_t offset) { if (!IsInt<16>(offset)) { diff --git a/compiler/utils/mips64/assembler_mips64.h b/compiler/utils/mips64/assembler_mips64.h index 57fc19a6e9..3262640ce7 100644 --- a/compiler/utils/mips64/assembler_mips64.h +++ b/compiler/utils/mips64/assembler_mips64.h @@ -227,6 +227,8 @@ class Mips64Assembler FINAL : public Assembler { void Bnec(GpuRegister rs, GpuRegister rt, uint16_t imm16); void Beqzc(GpuRegister rs, uint32_t imm21); void Bnezc(GpuRegister rs, uint32_t imm21); + void Bc1eqz(FpuRegister ft, uint16_t imm16); + void Bc1nez(FpuRegister ft, uint16_t imm16); void AddS(FpuRegister fd, FpuRegister fs, FpuRegister ft); void SubS(FpuRegister fd, FpuRegister fs, FpuRegister ft); @@ -248,6 +250,10 @@ class Mips64Assembler FINAL : public Assembler { void RoundLD(FpuRegister fd, FpuRegister fs); void RoundWS(FpuRegister fd, FpuRegister fs); void RoundWD(FpuRegister fd, FpuRegister fs); + void TruncLS(FpuRegister fd, FpuRegister fs); + void TruncLD(FpuRegister fd, FpuRegister fs); + void TruncWS(FpuRegister fd, FpuRegister fs); + void TruncWD(FpuRegister fd, FpuRegister fs); void CeilLS(FpuRegister fd, FpuRegister fs); void CeilLD(FpuRegister fd, FpuRegister fs); void CeilWS(FpuRegister fd, FpuRegister fs); @@ -266,6 +272,26 @@ class Mips64Assembler FINAL : public Assembler { void MinD(FpuRegister fd, FpuRegister fs, FpuRegister ft); void MaxS(FpuRegister fd, FpuRegister fs, FpuRegister ft); void MaxD(FpuRegister fd, FpuRegister fs, FpuRegister ft); + void CmpUnS(FpuRegister fd, FpuRegister fs, FpuRegister ft); + void CmpEqS(FpuRegister fd, FpuRegister fs, FpuRegister ft); + void CmpUeqS(FpuRegister fd, FpuRegister fs, FpuRegister ft); + void CmpLtS(FpuRegister fd, FpuRegister fs, FpuRegister ft); + void CmpUltS(FpuRegister fd, FpuRegister fs, FpuRegister ft); + void CmpLeS(FpuRegister fd, FpuRegister fs, FpuRegister ft); + void CmpUleS(FpuRegister fd, FpuRegister fs, FpuRegister ft); + void CmpOrS(FpuRegister fd, FpuRegister fs, FpuRegister ft); + void CmpUneS(FpuRegister fd, FpuRegister fs, FpuRegister ft); + void CmpNeS(FpuRegister fd, FpuRegister fs, FpuRegister ft); + void CmpUnD(FpuRegister fd, FpuRegister fs, FpuRegister ft); + void CmpEqD(FpuRegister fd, FpuRegister fs, FpuRegister ft); + void CmpUeqD(FpuRegister fd, FpuRegister fs, FpuRegister ft); + void CmpLtD(FpuRegister fd, FpuRegister fs, FpuRegister ft); + void CmpUltD(FpuRegister fd, FpuRegister fs, FpuRegister ft); + void CmpLeD(FpuRegister fd, FpuRegister fs, FpuRegister ft); + void CmpUleD(FpuRegister fd, FpuRegister fs, FpuRegister ft); + void CmpOrD(FpuRegister fd, FpuRegister fs, FpuRegister ft); + void CmpUneD(FpuRegister fd, FpuRegister fs, FpuRegister ft); + void CmpNeD(FpuRegister fd, FpuRegister fs, FpuRegister ft); void Cvtsw(FpuRegister fd, FpuRegister fs); void Cvtdw(FpuRegister fd, FpuRegister fs); @@ -317,6 +343,8 @@ class Mips64Assembler FINAL : public Assembler { void Bnec(GpuRegister rs, GpuRegister rt, Mips64Label* label); void Beqzc(GpuRegister rs, Mips64Label* label); void Bnezc(GpuRegister rs, Mips64Label* label); + void Bc1eqz(FpuRegister ft, Mips64Label* label); + void Bc1nez(FpuRegister ft, Mips64Label* label); void EmitLoad(ManagedRegister m_dst, GpuRegister src_register, int32_t src_offset, size_t size); void LoadFromOffset(LoadOperandType type, GpuRegister reg, GpuRegister base, int32_t offset); @@ -474,6 +502,8 @@ class Mips64Assembler FINAL : public Assembler { kCondNEZ, kCondLTU, kCondGEU, + kCondF, // Floating-point predicate false. + kCondT, // Floating-point predicate true. kUncond, }; friend std::ostream& operator<<(std::ostream& os, const BranchCondition& rhs); diff --git a/compiler/utils/mips64/assembler_mips64_test.cc b/compiler/utils/mips64/assembler_mips64_test.cc index 29a5a88316..7d79be2731 100644 --- a/compiler/utils/mips64/assembler_mips64_test.cc +++ b/compiler/utils/mips64/assembler_mips64_test.cc @@ -403,6 +403,106 @@ TEST_F(AssemblerMIPS64Test, MaxD) { DriverStr(RepeatFFF(&mips64::Mips64Assembler::MaxD, "max.d ${reg1}, ${reg2}, ${reg3}"), "max.d"); } +TEST_F(AssemblerMIPS64Test, CmpUnS) { + DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpUnS, "cmp.un.s ${reg1}, ${reg2}, ${reg3}"), + "cmp.un.s"); +} + +TEST_F(AssemblerMIPS64Test, CmpEqS) { + DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpEqS, "cmp.eq.s ${reg1}, ${reg2}, ${reg3}"), + "cmp.eq.s"); +} + +TEST_F(AssemblerMIPS64Test, CmpUeqS) { + DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpUeqS, "cmp.ueq.s ${reg1}, ${reg2}, ${reg3}"), + "cmp.ueq.s"); +} + +TEST_F(AssemblerMIPS64Test, CmpLtS) { + DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpLtS, "cmp.lt.s ${reg1}, ${reg2}, ${reg3}"), + "cmp.lt.s"); +} + +TEST_F(AssemblerMIPS64Test, CmpUltS) { + DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpUltS, "cmp.ult.s ${reg1}, ${reg2}, ${reg3}"), + "cmp.ult.s"); +} + +TEST_F(AssemblerMIPS64Test, CmpLeS) { + DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpLeS, "cmp.le.s ${reg1}, ${reg2}, ${reg3}"), + "cmp.le.s"); +} + +TEST_F(AssemblerMIPS64Test, CmpUleS) { + DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpUleS, "cmp.ule.s ${reg1}, ${reg2}, ${reg3}"), + "cmp.ule.s"); +} + +TEST_F(AssemblerMIPS64Test, CmpOrS) { + DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpOrS, "cmp.or.s ${reg1}, ${reg2}, ${reg3}"), + "cmp.or.s"); +} + +TEST_F(AssemblerMIPS64Test, CmpUneS) { + DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpUneS, "cmp.une.s ${reg1}, ${reg2}, ${reg3}"), + "cmp.une.s"); +} + +TEST_F(AssemblerMIPS64Test, CmpNeS) { + DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpNeS, "cmp.ne.s ${reg1}, ${reg2}, ${reg3}"), + "cmp.ne.s"); +} + +TEST_F(AssemblerMIPS64Test, CmpUnD) { + DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpUnD, "cmp.un.d ${reg1}, ${reg2}, ${reg3}"), + "cmp.un.d"); +} + +TEST_F(AssemblerMIPS64Test, CmpEqD) { + DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpEqD, "cmp.eq.d ${reg1}, ${reg2}, ${reg3}"), + "cmp.eq.d"); +} + +TEST_F(AssemblerMIPS64Test, CmpUeqD) { + DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpUeqD, "cmp.ueq.d ${reg1}, ${reg2}, ${reg3}"), + "cmp.ueq.d"); +} + +TEST_F(AssemblerMIPS64Test, CmpLtD) { + DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpLtD, "cmp.lt.d ${reg1}, ${reg2}, ${reg3}"), + "cmp.lt.d"); +} + +TEST_F(AssemblerMIPS64Test, CmpUltD) { + DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpUltD, "cmp.ult.d ${reg1}, ${reg2}, ${reg3}"), + "cmp.ult.d"); +} + +TEST_F(AssemblerMIPS64Test, CmpLeD) { + DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpLeD, "cmp.le.d ${reg1}, ${reg2}, ${reg3}"), + "cmp.le.d"); +} + +TEST_F(AssemblerMIPS64Test, CmpUleD) { + DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpUleD, "cmp.ule.d ${reg1}, ${reg2}, ${reg3}"), + "cmp.ule.d"); +} + +TEST_F(AssemblerMIPS64Test, CmpOrD) { + DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpOrD, "cmp.or.d ${reg1}, ${reg2}, ${reg3}"), + "cmp.or.d"); +} + +TEST_F(AssemblerMIPS64Test, CmpUneD) { + DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpUneD, "cmp.une.d ${reg1}, ${reg2}, ${reg3}"), + "cmp.une.d"); +} + +TEST_F(AssemblerMIPS64Test, CmpNeD) { + DriverStr(RepeatFFF(&mips64::Mips64Assembler::CmpNeD, "cmp.ne.d ${reg1}, ${reg2}, ${reg3}"), + "cmp.ne.d"); +} + TEST_F(AssemblerMIPS64Test, CvtDL) { DriverStr(RepeatFF(&mips64::Mips64Assembler::Cvtdl, "cvt.d.l ${reg1}, ${reg2}"), "cvt.d.l"); } @@ -427,6 +527,22 @@ TEST_F(AssemblerMIPS64Test, CvtSW) { DriverStr(RepeatFF(&mips64::Mips64Assembler::Cvtsw, "cvt.s.w ${reg1}, ${reg2}"), "cvt.s.w"); } +TEST_F(AssemblerMIPS64Test, TruncWS) { + DriverStr(RepeatFF(&mips64::Mips64Assembler::TruncWS, "trunc.w.s ${reg1}, ${reg2}"), "trunc.w.s"); +} + +TEST_F(AssemblerMIPS64Test, TruncWD) { + DriverStr(RepeatFF(&mips64::Mips64Assembler::TruncWD, "trunc.w.d ${reg1}, ${reg2}"), "trunc.w.d"); +} + +TEST_F(AssemblerMIPS64Test, TruncLS) { + DriverStr(RepeatFF(&mips64::Mips64Assembler::TruncLS, "trunc.l.s ${reg1}, ${reg2}"), "trunc.l.s"); +} + +TEST_F(AssemblerMIPS64Test, TruncLD) { + DriverStr(RepeatFF(&mips64::Mips64Assembler::TruncLD, "trunc.l.d ${reg1}, ${reg2}"), "trunc.l.d"); +} + //////////////// // CALL / JMP // //////////////// @@ -591,6 +707,58 @@ TEST_F(AssemblerMIPS64Test, Bgeuc) { BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Bgeuc, "Bgeuc"); } +TEST_F(AssemblerMIPS64Test, Bc1eqz) { + mips64::Mips64Label label; + __ Bc1eqz(mips64::F0, &label); + constexpr size_t kAdduCount1 = 63; + for (size_t i = 0; i != kAdduCount1; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + __ Bind(&label); + constexpr size_t kAdduCount2 = 64; + for (size_t i = 0; i != kAdduCount2; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + __ Bc1eqz(mips64::F31, &label); + + std::string expected = + ".set noreorder\n" + "bc1eqz $f0, 1f\n" + "nop\n" + + RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") + + "1:\n" + + RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") + + "bc1eqz $f31, 1b\n" + "nop\n"; + DriverStr(expected, "Bc1eqz"); +} + +TEST_F(AssemblerMIPS64Test, Bc1nez) { + mips64::Mips64Label label; + __ Bc1nez(mips64::F0, &label); + constexpr size_t kAdduCount1 = 63; + for (size_t i = 0; i != kAdduCount1; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + __ Bind(&label); + constexpr size_t kAdduCount2 = 64; + for (size_t i = 0; i != kAdduCount2; ++i) { + __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO); + } + __ Bc1nez(mips64::F31, &label); + + std::string expected = + ".set noreorder\n" + "bc1nez $f0, 1f\n" + "nop\n" + + RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") + + "1:\n" + + RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") + + "bc1nez $f31, 1b\n" + "nop\n"; + DriverStr(expected, "Bc1nez"); +} + TEST_F(AssemblerMIPS64Test, LongBeqc) { mips64::Mips64Label label; __ Beqc(mips64::A0, mips64::A1, &label); diff --git a/compiler/utils/swap_space.cc b/compiler/utils/swap_space.cc index 42ed8810f8..244a5fedbe 100644 --- a/compiler/utils/swap_space.cc +++ b/compiler/utils/swap_space.cc @@ -18,6 +18,7 @@ #include <algorithm> #include <numeric> +#include <sys/mman.h> #include "base/logging.h" #include "base/macros.h" @@ -44,23 +45,17 @@ static void DumpFreeMap(const FreeBySizeSet& free_by_size) { } } -template <typename FreeByStartSet, typename FreeBySizeSet> -static void RemoveChunk(FreeByStartSet* free_by_start, - FreeBySizeSet* free_by_size, - typename FreeBySizeSet::const_iterator free_by_size_pos) { +void SwapSpace::RemoveChunk(FreeBySizeSet::const_iterator free_by_size_pos) { auto free_by_start_pos = free_by_size_pos->second; - free_by_size->erase(free_by_size_pos); - free_by_start->erase(free_by_start_pos); + free_by_size_.erase(free_by_size_pos); + free_by_start_.erase(free_by_start_pos); } -template <typename FreeByStartSet, typename FreeBySizeSet> -static void InsertChunk(FreeByStartSet* free_by_start, - FreeBySizeSet* free_by_size, - const SpaceChunk& chunk) { +inline void SwapSpace::InsertChunk(const SpaceChunk& chunk) { DCHECK_NE(chunk.size, 0u); - auto insert_result = free_by_start->insert(chunk); + auto insert_result = free_by_start_.insert(chunk); DCHECK(insert_result.second); - free_by_size->emplace(chunk.size, insert_result.first); + free_by_size_.emplace(chunk.size, insert_result.first); } SwapSpace::SwapSpace(int fd, size_t initial_size) @@ -69,10 +64,18 @@ SwapSpace::SwapSpace(int fd, size_t initial_size) lock_("SwapSpace lock", static_cast<LockLevel>(LockLevel::kDefaultMutexLevel - 1)) { // Assume that the file is unlinked. - InsertChunk(&free_by_start_, &free_by_size_, NewFileChunk(initial_size)); + InsertChunk(NewFileChunk(initial_size)); } SwapSpace::~SwapSpace() { + // Unmap all mmapped chunks. Nothing should be allocated anymore at + // this point, so there should be only full size chunks in free_by_start_. + for (const SpaceChunk& chunk : free_by_start_) { + if (munmap(chunk.ptr, chunk.size) != 0) { + PLOG(ERROR) << "Failed to unmap swap space chunk at " + << static_cast<const void*>(chunk.ptr) << " size=" << chunk.size; + } + } // All arenas are backed by the same file. Just close the descriptor. close(fd_); } @@ -113,7 +116,7 @@ void* SwapSpace::Alloc(size_t size) { : free_by_size_.lower_bound(FreeBySizeEntry { size, free_by_start_.begin() }); if (it != free_by_size_.end()) { old_chunk = *it->second; - RemoveChunk(&free_by_start_, &free_by_size_, it); + RemoveChunk(it); } else { // Not a big enough free chunk, need to increase file size. old_chunk = NewFileChunk(size); @@ -124,13 +127,13 @@ void* SwapSpace::Alloc(size_t size) { if (old_chunk.size != size) { // Insert the remainder. SpaceChunk new_chunk = { old_chunk.ptr + size, old_chunk.size - size }; - InsertChunk(&free_by_start_, &free_by_size_, new_chunk); + InsertChunk(new_chunk); } return ret; } -SpaceChunk SwapSpace::NewFileChunk(size_t min_size) { +SwapSpace::SpaceChunk SwapSpace::NewFileChunk(size_t min_size) { #if !defined(__APPLE__) size_t next_part = std::max(RoundUp(min_size, kPageSize), RoundUp(kMininumMapSize, kPageSize)); int result = TEMP_FAILURE_RETRY(ftruncate64(fd_, size_ + next_part)); @@ -159,7 +162,7 @@ SpaceChunk SwapSpace::NewFileChunk(size_t min_size) { } // TODO: Full coalescing. -void SwapSpace::Free(void* ptrV, size_t size) { +void SwapSpace::Free(void* ptr, size_t size) { MutexLock lock(Thread::Current(), lock_); size = RoundUp(size, 8U); @@ -168,7 +171,7 @@ void SwapSpace::Free(void* ptrV, size_t size) { free_before = CollectFree(free_by_start_, free_by_size_); } - SpaceChunk chunk = { reinterpret_cast<uint8_t*>(ptrV), size }; + SpaceChunk chunk = { reinterpret_cast<uint8_t*>(ptr), size }; auto it = free_by_start_.lower_bound(chunk); if (it != free_by_start_.begin()) { auto prev = it; @@ -180,7 +183,7 @@ void SwapSpace::Free(void* ptrV, size_t size) { chunk.ptr -= prev->size; auto erase_pos = free_by_size_.find(FreeBySizeEntry { prev->size, prev }); DCHECK(erase_pos != free_by_size_.end()); - RemoveChunk(&free_by_start_, &free_by_size_, erase_pos); + RemoveChunk(erase_pos); // "prev" is invalidated but "it" remains valid. } } @@ -191,11 +194,11 @@ void SwapSpace::Free(void* ptrV, size_t size) { chunk.size += it->size; auto erase_pos = free_by_size_.find(FreeBySizeEntry { it->size, it }); DCHECK(erase_pos != free_by_size_.end()); - RemoveChunk(&free_by_start_, &free_by_size_, erase_pos); + RemoveChunk(erase_pos); // "it" is invalidated but we don't need it anymore. } } - InsertChunk(&free_by_start_, &free_by_size_, chunk); + InsertChunk(chunk); if (kCheckFreeMaps) { size_t free_after = CollectFree(free_by_start_, free_by_size_); diff --git a/compiler/utils/swap_space.h b/compiler/utils/swap_space.h index 9127b6b096..b659f1d3c7 100644 --- a/compiler/utils/swap_space.h +++ b/compiler/utils/swap_space.h @@ -19,42 +19,17 @@ #include <cstdlib> #include <list> +#include <vector> #include <set> #include <stdint.h> #include <stddef.h> -#include "base/debug_stack.h" #include "base/logging.h" #include "base/macros.h" #include "base/mutex.h" -#include "mem_map.h" namespace art { -// Chunk of space. -struct SpaceChunk { - uint8_t* ptr; - size_t size; - - uintptr_t Start() const { - return reinterpret_cast<uintptr_t>(ptr); - } - uintptr_t End() const { - return reinterpret_cast<uintptr_t>(ptr) + size; - } -}; - -inline bool operator==(const SpaceChunk& lhs, const SpaceChunk& rhs) { - return (lhs.size == rhs.size) && (lhs.ptr == rhs.ptr); -} - -class SortChunkByPtr { - public: - bool operator()(const SpaceChunk& a, const SpaceChunk& b) const { - return reinterpret_cast<uintptr_t>(a.ptr) < reinterpret_cast<uintptr_t>(b.ptr); - } -}; - // An arena pool that creates arenas backed by an mmaped file. class SwapSpace { public: @@ -68,17 +43,27 @@ class SwapSpace { } private: - SpaceChunk NewFileChunk(size_t min_size) REQUIRES(lock_); + // Chunk of space. + struct SpaceChunk { + uint8_t* ptr; + size_t size; - int fd_; - size_t size_; - std::list<SpaceChunk> maps_; + uintptr_t Start() const { + return reinterpret_cast<uintptr_t>(ptr); + } + uintptr_t End() const { + return reinterpret_cast<uintptr_t>(ptr) + size; + } + }; - // NOTE: Boost.Bimap would be useful for the two following members. + class SortChunkByPtr { + public: + bool operator()(const SpaceChunk& a, const SpaceChunk& b) const { + return reinterpret_cast<uintptr_t>(a.ptr) < reinterpret_cast<uintptr_t>(b.ptr); + } + }; - // Map start of a free chunk to its size. typedef std::set<SpaceChunk, SortChunkByPtr> FreeByStartSet; - FreeByStartSet free_by_start_ GUARDED_BY(lock_); // Map size to an iterator to free_by_start_'s entry. typedef std::pair<size_t, FreeByStartSet::const_iterator> FreeBySizeEntry; @@ -92,6 +77,21 @@ class SwapSpace { } }; typedef std::set<FreeBySizeEntry, FreeBySizeComparator> FreeBySizeSet; + + SpaceChunk NewFileChunk(size_t min_size) REQUIRES(lock_); + + void RemoveChunk(FreeBySizeSet::const_iterator free_by_size_pos) REQUIRES(lock_); + void InsertChunk(const SpaceChunk& chunk) REQUIRES(lock_); + + int fd_; + size_t size_; + std::list<SpaceChunk> maps_; + + // NOTE: Boost.Bimap would be useful for the two following members. + + // Map start of a free chunk to its size. + FreeByStartSet free_by_start_ GUARDED_BY(lock_); + // Free chunks ordered by size. FreeBySizeSet free_by_size_ GUARDED_BY(lock_); mutable Mutex lock_ DEFAULT_MUTEX_ACQUIRED_AFTER; @@ -126,6 +126,9 @@ class SwapAllocator<void> { template <typename U> friend class SwapAllocator; + + template <typename U> + friend bool operator==(const SwapAllocator<U>& lhs, const SwapAllocator<U>& rhs); }; template <typename T> @@ -201,9 +204,22 @@ class SwapAllocator { template <typename U> friend class SwapAllocator; + + template <typename U> + friend bool operator==(const SwapAllocator<U>& lhs, const SwapAllocator<U>& rhs); }; template <typename T> +inline bool operator==(const SwapAllocator<T>& lhs, const SwapAllocator<T>& rhs) { + return lhs.swap_space_ == rhs.swap_space_; +} + +template <typename T> +inline bool operator!=(const SwapAllocator<T>& lhs, const SwapAllocator<T>& rhs) { + return !(lhs == rhs); +} + +template <typename T> using SwapVector = std::vector<T, SwapAllocator<T>>; template <typename T, typename Comparator> using SwapSet = std::set<T, Comparator, SwapAllocator<T>>; diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc index 9eb5e67041..db072678ef 100644 --- a/compiler/utils/x86_64/assembler_x86_64.cc +++ b/compiler/utils/x86_64/assembler_x86_64.cc @@ -1213,6 +1213,7 @@ void X86_64Assembler::xchgl(CpuRegister reg, const Address& address) { void X86_64Assembler::cmpw(const Address& address, const Immediate& imm) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); + CHECK(imm.is_int32()); EmitOperandSizeOverride(); EmitOptionalRex32(address); EmitComplex(7, address, imm); @@ -1221,6 +1222,7 @@ void X86_64Assembler::cmpw(const Address& address, const Immediate& imm) { void X86_64Assembler::cmpl(CpuRegister reg, const Immediate& imm) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); + CHECK(imm.is_int32()); EmitOptionalRex32(reg); EmitComplex(7, Operand(reg), imm); } @@ -1252,6 +1254,7 @@ void X86_64Assembler::cmpl(const Address& address, CpuRegister reg) { void X86_64Assembler::cmpl(const Address& address, const Immediate& imm) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); + CHECK(imm.is_int32()); EmitOptionalRex32(address); EmitComplex(7, address, imm); } |