diff options
Diffstat (limited to 'compiler')
46 files changed, 1594 insertions, 555 deletions
diff --git a/compiler/Android.mk b/compiler/Android.mk index 6f32e07ce1..87eff82982 100644 --- a/compiler/Android.mk +++ b/compiler/Android.mk @@ -66,7 +66,6 @@ LIBART_COMPILER_SRC_FILES := \ jit/jit_compiler.cc \ jni/quick/calling_convention.cc \ jni/quick/jni_compiler.cc \ - optimizing/boolean_simplifier.cc \ optimizing/bounds_check_elimination.cc \ optimizing/builder.cc \ optimizing/code_generator.cc \ @@ -94,6 +93,7 @@ LIBART_COMPILER_SRC_FILES := \ optimizing/prepare_for_register_allocation.cc \ optimizing/reference_type_propagation.cc \ optimizing/register_allocator.cc \ + optimizing/select_generator.cc \ optimizing/sharpening.cc \ optimizing/side_effects_analysis.cc \ optimizing/ssa_builder.cc \ diff --git a/compiler/dex/quick/quick_cfi_test.cc b/compiler/dex/quick/quick_cfi_test.cc index c5df134493..0cd41bbf4c 100644 --- a/compiler/dex/quick/quick_cfi_test.cc +++ b/compiler/dex/quick/quick_cfi_test.cc @@ -71,6 +71,7 @@ class QuickCFITest : public CFITest { nullptr, false, "", + false, false); VerificationResults verification_results(&compiler_options); DexFileToMethodInlinerMap method_inliner_map; diff --git a/compiler/dex/quick/x86/quick_assemble_x86_test.cc b/compiler/dex/quick/x86/quick_assemble_x86_test.cc index d63878d6b9..efdc333261 100644 --- a/compiler/dex/quick/x86/quick_assemble_x86_test.cc +++ b/compiler/dex/quick/x86/quick_assemble_x86_test.cc @@ -54,6 +54,7 @@ class QuickAssembleX86TestBase : public testing::Test { nullptr, false, "", + false, false)); verification_results_.reset(new VerificationResults(compiler_options_.get())); method_inliner_map_.reset(new DexFileToMethodInlinerMap()); diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc index c483f33ae6..f1b745895f 100644 --- a/compiler/driver/compiler_driver.cc +++ b/compiler/driver/compiler_driver.cc @@ -39,6 +39,7 @@ #include "compiler_driver-inl.h" #include "dex_compilation_unit.h" #include "dex_file-inl.h" +#include "dex_instruction-inl.h" #include "dex/dex_to_dex_compiler.h" #include "dex/verification_results.h" #include "dex/verified_method.h" @@ -365,7 +366,7 @@ CompilerDriver::CompilerDriver( classes_to_compile_(compiled_classes), methods_to_compile_(compiled_methods), had_hard_verifier_failure_(false), - thread_count_(thread_count), + parallel_thread_count_(thread_count), stats_(new AOTCompilationStats), dump_stats_(dump_stats), dump_passes_(dump_passes), @@ -435,24 +436,27 @@ void CompilerDriver::CompileAll(jobject class_loader, const std::vector<const DexFile*>& dex_files, TimingLogger* timings) { DCHECK(!Runtime::Current()->IsStarted()); - std::unique_ptr<ThreadPool> thread_pool( - new ThreadPool("Compiler driver thread pool", thread_count_ - 1)); + + InitializeThreadPools(); + VLOG(compiler) << "Before precompile " << GetMemoryUsageString(false); // Precompile: // 1) Load image classes // 2) Resolve all classes // 3) Attempt to verify all classes // 4) Attempt to initialize image classes, and trivially initialized classes - PreCompile(class_loader, dex_files, thread_pool.get(), timings); + PreCompile(class_loader, dex_files, timings); // Compile: // 1) Compile all classes and methods enabled for compilation. May fall back to dex-to-dex // compilation. if (!GetCompilerOptions().VerifyAtRuntime()) { - Compile(class_loader, dex_files, thread_pool.get(), timings); + Compile(class_loader, dex_files, timings); } if (dump_stats_) { stats_->Dump(); } + + FreeThreadPools(); } static optimizer::DexToDexCompilationLevel GetDexToDexCompilationLevel( @@ -653,8 +657,9 @@ void CompilerDriver::CompileOne(Thread* self, ArtMethod* method, TimingLogger* t std::vector<const DexFile*> dex_files; dex_files.push_back(dex_file); - std::unique_ptr<ThreadPool> thread_pool(new ThreadPool("Compiler driver thread pool", 0U)); - PreCompile(jclass_loader, dex_files, thread_pool.get(), timings); + InitializeThreadPools(); + + PreCompile(jclass_loader, dex_files, timings); // Can we run DEX-to-DEX compiler on this class ? optimizer::DexToDexCompilationLevel dex_to_dex_compilation_level = @@ -677,20 +682,147 @@ void CompilerDriver::CompileOne(Thread* self, ArtMethod* method, TimingLogger* t true, dex_cache); + FreeThreadPools(); + self->GetJniEnv()->DeleteGlobalRef(jclass_loader); } -void CompilerDriver::Resolve(jobject class_loader, const std::vector<const DexFile*>& dex_files, - ThreadPool* thread_pool, TimingLogger* timings) { +void CompilerDriver::Resolve(jobject class_loader, + const std::vector<const DexFile*>& dex_files, + TimingLogger* timings) { + // Resolution allocates classes and needs to run single-threaded to be deterministic. + bool force_determinism = GetCompilerOptions().IsForceDeterminism(); + ThreadPool* resolve_thread_pool = force_determinism + ? single_thread_pool_.get() + : parallel_thread_pool_.get(); + size_t resolve_thread_count = force_determinism ? 1U : parallel_thread_count_; + for (size_t i = 0; i != dex_files.size(); ++i) { const DexFile* dex_file = dex_files[i]; CHECK(dex_file != nullptr); - ResolveDexFile(class_loader, *dex_file, dex_files, thread_pool, timings); + ResolveDexFile(class_loader, + *dex_file, + dex_files, + resolve_thread_pool, + resolve_thread_count, + timings); + } +} + +// Resolve const-strings in the code. Done to have deterministic allocation behavior. Right now +// this is single-threaded for simplicity. +// TODO: Collect the relevant string indices in parallel, then allocate them sequentially in a +// stable order. + +static void ResolveConstStrings(CompilerDriver* driver, + const DexFile& dex_file, + const DexFile::CodeItem* code_item) { + if (code_item == nullptr) { + // Abstract or native method. + return; + } + + const uint16_t* code_ptr = code_item->insns_; + const uint16_t* code_end = code_item->insns_ + code_item->insns_size_in_code_units_; + + while (code_ptr < code_end) { + const Instruction* inst = Instruction::At(code_ptr); + switch (inst->Opcode()) { + case Instruction::CONST_STRING: { + uint32_t string_index = inst->VRegB_21c(); + driver->CanAssumeStringIsPresentInDexCache(dex_file, string_index); + break; + } + case Instruction::CONST_STRING_JUMBO: { + uint32_t string_index = inst->VRegB_31c(); + driver->CanAssumeStringIsPresentInDexCache(dex_file, string_index); + break; + } + + default: + break; + } + + code_ptr += inst->SizeInCodeUnits(); + } +} + +static void ResolveConstStrings(CompilerDriver* driver, + const std::vector<const DexFile*>& dex_files, + TimingLogger* timings) { + for (const DexFile* dex_file : dex_files) { + TimingLogger::ScopedTiming t("Resolve const-string Strings", timings); + + size_t class_def_count = dex_file->NumClassDefs(); + for (size_t class_def_index = 0; class_def_index < class_def_count; ++class_def_index) { + const DexFile::ClassDef& class_def = dex_file->GetClassDef(class_def_index); + + const uint8_t* class_data = dex_file->GetClassData(class_def); + if (class_data == nullptr) { + // empty class, probably a marker interface + continue; + } + + ClassDataItemIterator it(*dex_file, class_data); + // Skip fields + while (it.HasNextStaticField()) { + it.Next(); + } + while (it.HasNextInstanceField()) { + it.Next(); + } + + bool compilation_enabled = driver->IsClassToCompile( + dex_file->StringByTypeIdx(class_def.class_idx_)); + if (!compilation_enabled) { + // Compilation is skipped, do not resolve const-string in code of this class. + // TODO: Make sure that inlining honors this. + continue; + } + + // Direct methods. + int64_t previous_direct_method_idx = -1; + while (it.HasNextDirectMethod()) { + uint32_t method_idx = it.GetMemberIndex(); + if (method_idx == previous_direct_method_idx) { + // smali can create dex files with two encoded_methods sharing the same method_idx + // http://code.google.com/p/smali/issues/detail?id=119 + it.Next(); + continue; + } + previous_direct_method_idx = method_idx; + ResolveConstStrings(driver, *dex_file, it.GetMethodCodeItem()); + it.Next(); + } + // Virtual methods. + int64_t previous_virtual_method_idx = -1; + while (it.HasNextVirtualMethod()) { + uint32_t method_idx = it.GetMemberIndex(); + if (method_idx == previous_virtual_method_idx) { + // smali can create dex files with two encoded_methods sharing the same method_idx + // http://code.google.com/p/smali/issues/detail?id=119 + it.Next(); + continue; + } + previous_virtual_method_idx = method_idx; + ResolveConstStrings(driver, *dex_file, it.GetMethodCodeItem()); + it.Next(); + } + DCHECK(!it.HasNext()); + } } } -void CompilerDriver::PreCompile(jobject class_loader, const std::vector<const DexFile*>& dex_files, - ThreadPool* thread_pool, TimingLogger* timings) { +inline void CompilerDriver::CheckThreadPools() { + DCHECK(parallel_thread_pool_ != nullptr); + DCHECK(single_thread_pool_ != nullptr); +} + +void CompilerDriver::PreCompile(jobject class_loader, + const std::vector<const DexFile*>& dex_files, + TimingLogger* timings) { + CheckThreadPools(); + LoadImageClasses(timings); VLOG(compiler) << "LoadImageClasses: " << GetMemoryUsageString(false); @@ -700,20 +832,26 @@ void CompilerDriver::PreCompile(jobject class_loader, const std::vector<const De // We need to resolve for never_verify since it needs to run dex to dex to add the // RETURN_VOID_NO_BARRIER. if (never_verify || verification_enabled) { - Resolve(class_loader, dex_files, thread_pool, timings); + Resolve(class_loader, dex_files, timings); VLOG(compiler) << "Resolve: " << GetMemoryUsageString(false); } if (never_verify) { VLOG(compiler) << "Verify none mode specified, skipping verification."; - SetVerified(class_loader, dex_files, thread_pool, timings); + SetVerified(class_loader, dex_files, timings); } if (!verification_enabled) { return; } - Verify(class_loader, dex_files, thread_pool, timings); + if (GetCompilerOptions().IsForceDeterminism() && IsBootImage()) { + // Resolve strings from const-string. Do this now to have a deterministic image. + ResolveConstStrings(this, dex_files, timings); + VLOG(compiler) << "Resolve const-strings: " << GetMemoryUsageString(false); + } + + Verify(class_loader, dex_files, timings); VLOG(compiler) << "Verify: " << GetMemoryUsageString(false); if (had_hard_verifier_failure_ && GetCompilerOptions().AbortOnHardVerifierFailure()) { @@ -721,7 +859,7 @@ void CompilerDriver::PreCompile(jobject class_loader, const std::vector<const De << "situations. Please check the log."; } - InitializeClasses(class_loader, dex_files, thread_pool, timings); + InitializeClasses(class_loader, dex_files, timings); VLOG(compiler) << "InitializeClasses: " << GetMemoryUsageString(false); UpdateImageClasses(timings); @@ -1759,6 +1897,9 @@ class ParallelCompilationManager { // Wait for all the worker threads to finish. thread_pool_->Wait(self, true, false); + + // And stop the workers accepting jobs. + thread_pool_->StopWorkers(self); } size_t NextIndex() { @@ -1995,9 +2136,12 @@ class ResolveTypeVisitor : public CompilationVisitor { const ParallelCompilationManager* const manager_; }; -void CompilerDriver::ResolveDexFile(jobject class_loader, const DexFile& dex_file, +void CompilerDriver::ResolveDexFile(jobject class_loader, + const DexFile& dex_file, const std::vector<const DexFile*>& dex_files, - ThreadPool* thread_pool, TimingLogger* timings) { + ThreadPool* thread_pool, + size_t thread_count, + TimingLogger* timings) { ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); // TODO: we could resolve strings here, although the string table is largely filled with class @@ -2010,27 +2154,43 @@ void CompilerDriver::ResolveDexFile(jobject class_loader, const DexFile& dex_fil // classdefs are resolved by ResolveClassFieldsAndMethods. TimingLogger::ScopedTiming t("Resolve Types", timings); ResolveTypeVisitor visitor(&context); - context.ForAll(0, dex_file.NumTypeIds(), &visitor, thread_count_); + context.ForAll(0, dex_file.NumTypeIds(), &visitor, thread_count); } TimingLogger::ScopedTiming t("Resolve MethodsAndFields", timings); ResolveClassFieldsAndMethodsVisitor visitor(&context); - context.ForAll(0, dex_file.NumClassDefs(), &visitor, thread_count_); + context.ForAll(0, dex_file.NumClassDefs(), &visitor, thread_count); } -void CompilerDriver::SetVerified(jobject class_loader, const std::vector<const DexFile*>& dex_files, - ThreadPool* thread_pool, TimingLogger* timings) { +void CompilerDriver::SetVerified(jobject class_loader, + const std::vector<const DexFile*>& dex_files, + TimingLogger* timings) { + // This can be run in parallel. for (const DexFile* dex_file : dex_files) { CHECK(dex_file != nullptr); - SetVerifiedDexFile(class_loader, *dex_file, dex_files, thread_pool, timings); + SetVerifiedDexFile(class_loader, + *dex_file, + dex_files, + parallel_thread_pool_.get(), + parallel_thread_count_, + timings); } } -void CompilerDriver::Verify(jobject class_loader, const std::vector<const DexFile*>& dex_files, - ThreadPool* thread_pool, TimingLogger* timings) { +void CompilerDriver::Verify(jobject class_loader, + const std::vector<const DexFile*>& dex_files, + TimingLogger* timings) { + // Note: verification should not be pulling in classes anymore when compiling the boot image, + // as all should have been resolved before. As such, doing this in parallel should still + // be deterministic. for (const DexFile* dex_file : dex_files) { CHECK(dex_file != nullptr); - VerifyDexFile(class_loader, *dex_file, dex_files, thread_pool, timings); + VerifyDexFile(class_loader, + *dex_file, + dex_files, + parallel_thread_pool_.get(), + parallel_thread_count_, + timings); } } @@ -2104,15 +2264,18 @@ class VerifyClassVisitor : public CompilationVisitor { const ParallelCompilationManager* const manager_; }; -void CompilerDriver::VerifyDexFile(jobject class_loader, const DexFile& dex_file, +void CompilerDriver::VerifyDexFile(jobject class_loader, + const DexFile& dex_file, const std::vector<const DexFile*>& dex_files, - ThreadPool* thread_pool, TimingLogger* timings) { + ThreadPool* thread_pool, + size_t thread_count, + TimingLogger* timings) { TimingLogger::ScopedTiming t("Verify Dex File", timings); ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); ParallelCompilationManager context(class_linker, class_loader, this, &dex_file, dex_files, thread_pool); VerifyClassVisitor visitor(&context); - context.ForAll(0, dex_file.NumClassDefs(), &visitor, thread_count_); + context.ForAll(0, dex_file.NumClassDefs(), &visitor, thread_count); } class SetVerifiedClassVisitor : public CompilationVisitor { @@ -2162,15 +2325,18 @@ class SetVerifiedClassVisitor : public CompilationVisitor { const ParallelCompilationManager* const manager_; }; -void CompilerDriver::SetVerifiedDexFile(jobject class_loader, const DexFile& dex_file, +void CompilerDriver::SetVerifiedDexFile(jobject class_loader, + const DexFile& dex_file, const std::vector<const DexFile*>& dex_files, - ThreadPool* thread_pool, TimingLogger* timings) { + ThreadPool* thread_pool, + size_t thread_count, + TimingLogger* timings) { TimingLogger::ScopedTiming t("Verify Dex File", timings); ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); ParallelCompilationManager context(class_linker, class_loader, this, &dex_file, dex_files, thread_pool); SetVerifiedClassVisitor visitor(&context); - context.ForAll(0, dex_file.NumClassDefs(), &visitor, thread_count_); + context.ForAll(0, dex_file.NumClassDefs(), &visitor, thread_count); } class InitializeClassVisitor : public CompilationVisitor { @@ -2271,31 +2437,37 @@ class InitializeClassVisitor : public CompilationVisitor { const ParallelCompilationManager* const manager_; }; -void CompilerDriver::InitializeClasses(jobject jni_class_loader, const DexFile& dex_file, +void CompilerDriver::InitializeClasses(jobject jni_class_loader, + const DexFile& dex_file, const std::vector<const DexFile*>& dex_files, - ThreadPool* thread_pool, TimingLogger* timings) { + TimingLogger* timings) { TimingLogger::ScopedTiming t("InitializeNoClinit", timings); + + // Initialization allocates objects and needs to run single-threaded to be deterministic. + bool force_determinism = GetCompilerOptions().IsForceDeterminism(); + ThreadPool* init_thread_pool = force_determinism + ? single_thread_pool_.get() + : parallel_thread_pool_.get(); + size_t init_thread_count = force_determinism ? 1U : parallel_thread_count_; + ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); ParallelCompilationManager context(class_linker, jni_class_loader, this, &dex_file, dex_files, - thread_pool); - size_t thread_count; + init_thread_pool); if (IsBootImage()) { // TODO: remove this when transactional mode supports multithreading. - thread_count = 1U; - } else { - thread_count = thread_count_; + init_thread_count = 1U; } InitializeClassVisitor visitor(&context); - context.ForAll(0, dex_file.NumClassDefs(), &visitor, thread_count); + context.ForAll(0, dex_file.NumClassDefs(), &visitor, init_thread_count); } void CompilerDriver::InitializeClasses(jobject class_loader, const std::vector<const DexFile*>& dex_files, - ThreadPool* thread_pool, TimingLogger* timings) { + TimingLogger* timings) { for (size_t i = 0; i != dex_files.size(); ++i) { const DexFile* dex_file = dex_files[i]; CHECK(dex_file != nullptr); - InitializeClasses(class_loader, *dex_file, dex_files, thread_pool, timings); + InitializeClasses(class_loader, *dex_file, dex_files, timings); } if (IsBootImage()) { // Prune garbage objects created during aborted transactions. @@ -2303,8 +2475,9 @@ void CompilerDriver::InitializeClasses(jobject class_loader, } } -void CompilerDriver::Compile(jobject class_loader, const std::vector<const DexFile*>& dex_files, - ThreadPool* thread_pool, TimingLogger* timings) { +void CompilerDriver::Compile(jobject class_loader, + const std::vector<const DexFile*>& dex_files, + TimingLogger* timings) { if (kDebugProfileGuidedCompilation) { LOG(INFO) << "[ProfileGuidedCompilation] " << ((profile_compilation_info_ == nullptr) @@ -2314,7 +2487,12 @@ void CompilerDriver::Compile(jobject class_loader, const std::vector<const DexFi for (size_t i = 0; i != dex_files.size(); ++i) { const DexFile* dex_file = dex_files[i]; CHECK(dex_file != nullptr); - CompileDexFile(class_loader, *dex_file, dex_files, thread_pool, timings); + CompileDexFile(class_loader, + *dex_file, + dex_files, + parallel_thread_pool_.get(), + parallel_thread_count_, + timings); } VLOG(compiler) << "Compile: " << GetMemoryUsageString(false); } @@ -2421,14 +2599,17 @@ class CompileClassVisitor : public CompilationVisitor { const ParallelCompilationManager* const manager_; }; -void CompilerDriver::CompileDexFile(jobject class_loader, const DexFile& dex_file, +void CompilerDriver::CompileDexFile(jobject class_loader, + const DexFile& dex_file, const std::vector<const DexFile*>& dex_files, - ThreadPool* thread_pool, TimingLogger* timings) { + ThreadPool* thread_pool, + size_t thread_count, + TimingLogger* timings) { TimingLogger::ScopedTiming t("Compile Dex File", timings); ParallelCompilationManager context(Runtime::Current()->GetClassLinker(), class_loader, this, &dex_file, dex_files, thread_pool); CompileClassVisitor visitor(&context); - context.ForAll(0, dex_file.NumClassDefs(), &visitor, thread_count_); + context.ForAll(0, dex_file.NumClassDefs(), &visitor, thread_count); } void CompilerDriver::AddCompiledMethod(const MethodReference& method_ref, @@ -2590,4 +2771,16 @@ bool CompilerDriver::MayInlineInternal(const DexFile* inlined_from, return true; } +void CompilerDriver::InitializeThreadPools() { + size_t parallel_count = parallel_thread_count_ > 0 ? parallel_thread_count_ - 1 : 0; + parallel_thread_pool_.reset( + new ThreadPool("Compiler driver thread pool", parallel_count)); + single_thread_pool_.reset(new ThreadPool("Single-threaded Compiler driver thread pool", 0)); +} + +void CompilerDriver::FreeThreadPools() { + parallel_thread_pool_.reset(); + single_thread_pool_.reset(); +} + } // namespace art diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h index 6a2f7bfd4e..5e35cbb309 100644 --- a/compiler/driver/compiler_driver.h +++ b/compiler/driver/compiler_driver.h @@ -411,7 +411,7 @@ class CompilerDriver { } size_t GetThreadCount() const { - return thread_count_; + return parallel_thread_count_; } bool GetDumpStats() const { @@ -550,8 +550,9 @@ class CompilerDriver { SHARED_REQUIRES(Locks::mutator_lock_); private: - void PreCompile(jobject class_loader, const std::vector<const DexFile*>& dex_files, - ThreadPool* thread_pool, TimingLogger* timings) + void PreCompile(jobject class_loader, + const std::vector<const DexFile*>& dex_files, + TimingLogger* timings) REQUIRES(!Locks::mutator_lock_, !compiled_classes_lock_); void LoadImageClasses(TimingLogger* timings) REQUIRES(!Locks::mutator_lock_); @@ -559,49 +560,71 @@ class CompilerDriver { // Attempt to resolve all type, methods, fields, and strings // referenced from code in the dex file following PathClassLoader // ordering semantics. - void Resolve(jobject class_loader, const std::vector<const DexFile*>& dex_files, - ThreadPool* thread_pool, TimingLogger* timings) + void Resolve(jobject class_loader, + const std::vector<const DexFile*>& dex_files, + TimingLogger* timings) REQUIRES(!Locks::mutator_lock_); - void ResolveDexFile(jobject class_loader, const DexFile& dex_file, + void ResolveDexFile(jobject class_loader, + const DexFile& dex_file, const std::vector<const DexFile*>& dex_files, - ThreadPool* thread_pool, TimingLogger* timings) + ThreadPool* thread_pool, + size_t thread_count, + TimingLogger* timings) REQUIRES(!Locks::mutator_lock_); - void Verify(jobject class_loader, const std::vector<const DexFile*>& dex_files, - ThreadPool* thread_pool, TimingLogger* timings); - void VerifyDexFile(jobject class_loader, const DexFile& dex_file, + void Verify(jobject class_loader, + const std::vector<const DexFile*>& dex_files, + TimingLogger* timings); + void VerifyDexFile(jobject class_loader, + const DexFile& dex_file, const std::vector<const DexFile*>& dex_files, - ThreadPool* thread_pool, TimingLogger* timings) + ThreadPool* thread_pool, + size_t thread_count, + TimingLogger* timings) REQUIRES(!Locks::mutator_lock_); - void SetVerified(jobject class_loader, const std::vector<const DexFile*>& dex_files, - ThreadPool* thread_pool, TimingLogger* timings); - void SetVerifiedDexFile(jobject class_loader, const DexFile& dex_file, + void SetVerified(jobject class_loader, + const std::vector<const DexFile*>& dex_files, + TimingLogger* timings); + void SetVerifiedDexFile(jobject class_loader, + const DexFile& dex_file, const std::vector<const DexFile*>& dex_files, - ThreadPool* thread_pool, TimingLogger* timings) + ThreadPool* thread_pool, + size_t thread_count, + TimingLogger* timings) REQUIRES(!Locks::mutator_lock_); - void InitializeClasses(jobject class_loader, const std::vector<const DexFile*>& dex_files, - ThreadPool* thread_pool, TimingLogger* timings) + void InitializeClasses(jobject class_loader, + const std::vector<const DexFile*>& dex_files, + TimingLogger* timings) REQUIRES(!Locks::mutator_lock_, !compiled_classes_lock_); - void InitializeClasses(jobject class_loader, const DexFile& dex_file, + void InitializeClasses(jobject class_loader, + const DexFile& dex_file, const std::vector<const DexFile*>& dex_files, - ThreadPool* thread_pool, TimingLogger* timings) + TimingLogger* timings) REQUIRES(!Locks::mutator_lock_, !compiled_classes_lock_); void UpdateImageClasses(TimingLogger* timings) REQUIRES(!Locks::mutator_lock_); static void FindClinitImageClassesCallback(mirror::Object* object, void* arg) SHARED_REQUIRES(Locks::mutator_lock_); - void Compile(jobject class_loader, const std::vector<const DexFile*>& dex_files, - ThreadPool* thread_pool, TimingLogger* timings); - void CompileDexFile(jobject class_loader, const DexFile& dex_file, + void Compile(jobject class_loader, + const std::vector<const DexFile*>& dex_files, + TimingLogger* timings); + void CompileDexFile(jobject class_loader, + const DexFile& dex_file, const std::vector<const DexFile*>& dex_files, - ThreadPool* thread_pool, TimingLogger* timings) + ThreadPool* thread_pool, + size_t thread_count, + TimingLogger* timings) REQUIRES(!Locks::mutator_lock_); bool MayInlineInternal(const DexFile* inlined_from, const DexFile* inlined_into) const; + void InitializeThreadPools(); + void FreeThreadPools(); + void CheckThreadPools(); + const CompilerOptions* const compiler_options_; VerificationResults* const verification_results_; DexFileToMethodInlinerMap* const method_inliner_map_; @@ -652,7 +675,12 @@ class CompilerDriver { bool had_hard_verifier_failure_; - size_t thread_count_; + // A thread pool that can (potentially) run tasks in parallel. + std::unique_ptr<ThreadPool> parallel_thread_pool_; + size_t parallel_thread_count_; + + // A thread pool that guarantees running single-threaded on the main thread. + std::unique_ptr<ThreadPool> single_thread_pool_; class AOTCompilationStats; std::unique_ptr<AOTCompilationStats> stats_; diff --git a/compiler/driver/compiler_options.cc b/compiler/driver/compiler_options.cc index 9285b8c927..3bf89214d7 100644 --- a/compiler/driver/compiler_options.cc +++ b/compiler/driver/compiler_options.cc @@ -47,7 +47,8 @@ CompilerOptions::CompilerOptions() abort_on_hard_verifier_failure_(false), init_failure_output_(nullptr), dump_cfg_file_name_(""), - dump_cfg_append_(false) { + dump_cfg_append_(false), + force_determinism_(false) { } CompilerOptions::~CompilerOptions() { @@ -76,7 +77,8 @@ CompilerOptions::CompilerOptions(CompilerFilter compiler_filter, std::ostream* init_failure_output, bool abort_on_hard_verifier_failure, const std::string& dump_cfg_file_name, - bool dump_cfg_append + bool dump_cfg_append, + bool force_determinism ) : // NOLINT(whitespace/parens) compiler_filter_(compiler_filter), huge_method_threshold_(huge_method_threshold), @@ -102,7 +104,8 @@ CompilerOptions::CompilerOptions(CompilerFilter compiler_filter, abort_on_hard_verifier_failure_(abort_on_hard_verifier_failure), init_failure_output_(init_failure_output), dump_cfg_file_name_(dump_cfg_file_name), - dump_cfg_append_(dump_cfg_append) { + dump_cfg_append_(dump_cfg_append), + force_determinism_(force_determinism) { } void CompilerOptions::ParseHugeMethodMax(const StringPiece& option, UsageFn Usage) { diff --git a/compiler/driver/compiler_options.h b/compiler/driver/compiler_options.h index 6989bd5bae..39372b36b8 100644 --- a/compiler/driver/compiler_options.h +++ b/compiler/driver/compiler_options.h @@ -86,7 +86,8 @@ class CompilerOptions FINAL { std::ostream* init_failure_output, bool abort_on_hard_verifier_failure, const std::string& dump_cfg_file_name, - bool dump_cfg_append); + bool dump_cfg_append, + bool force_determinism); CompilerFilter GetCompilerFilter() const { return compiler_filter_; @@ -245,6 +246,10 @@ class CompilerOptions FINAL { return dump_cfg_append_; } + bool IsForceDeterminism() const { + return force_determinism_; + } + private: void ParseDumpInitFailures(const StringPiece& option, UsageFn Usage); void ParsePassOptions(const StringPiece& option, UsageFn Usage); @@ -300,6 +305,10 @@ class CompilerOptions FINAL { std::string dump_cfg_file_name_; bool dump_cfg_append_; + // Whether the compiler should trade performance for determinism to guarantee exactly reproducable + // outcomes. + bool force_determinism_; + friend class Dex2Oat; DISALLOW_COPY_AND_ASSIGN(CompilerOptions); diff --git a/compiler/image_test.cc b/compiler/image_test.cc index b65fb36167..a5a7796614 100644 --- a/compiler/image_test.cc +++ b/compiler/image_test.cc @@ -119,6 +119,7 @@ void ImageTest::TestWriteRead(ImageHeader::StorageMode storage_mode) { compiler_driver_->GetInstructionSet(), compiler_driver_->GetInstructionSetFeatures(), &key_value_store, + /* verify */ false, // Dex files may be dex-to-dex-ed, don't verify. &opened_dex_files_map, &opened_dex_files); ASSERT_TRUE(dex_files_ok); diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc index 72c615e4bc..c8720eab8a 100644 --- a/compiler/image_writer.cc +++ b/compiler/image_writer.cc @@ -1866,6 +1866,9 @@ void ImageWriter::FixupClass(mirror::Class* orig, mirror::Class* copy) { orig->FixupNativePointers(copy, target_ptr_size_, NativeLocationVisitor(this, oat_filename)); FixupClassVisitor visitor(this, copy); static_cast<mirror::Object*>(orig)->VisitReferences(visitor, visitor); + + // Remove the clinitThreadId. This is required for image determinism. + copy->SetClinitThreadId(static_cast<pid_t>(0)); } void ImageWriter::FixupObject(Object* orig, Object* copy) { @@ -1993,6 +1996,10 @@ void ImageWriter::FixupDexCache(mirror::DexCache* orig_dex_cache, mirror::DexCache::SetElementPtrSize(copy_fields, i, copy, target_ptr_size_); } } + + // Remove the DexFile pointers. They will be fixed up when the runtime loads the oat file. Leaving + // compiler pointers in here will make the output non-deterministic. + copy_dex_cache->SetDexFile(nullptr); } const uint8_t* ImageWriter::GetOatAddress(OatAddress type) const { diff --git a/compiler/jit/jit_compiler.cc b/compiler/jit/jit_compiler.cc index 3a3275a5f4..67747586c4 100644 --- a/compiler/jit/jit_compiler.cc +++ b/compiler/jit/jit_compiler.cc @@ -112,7 +112,8 @@ JitCompiler::JitCompiler() : total_time_(0) { /* init_failure_output */ nullptr, /* abort_on_hard_verifier_failure */ false, /* dump_cfg_file_name */ "", - /* dump_cfg_append */ false)); + /* dump_cfg_append */ false, + /* force_determinism */ false)); for (const std::string& argument : Runtime::Current()->GetCompilerOptions()) { compiler_options_->ParseCompilerOption(argument, Usage); } @@ -178,7 +179,7 @@ JitCompiler::JitCompiler() : total_time_(0) { if (compiler_options_->GetGenerateDebugInfo()) { #ifdef __ANDROID__ - const char* prefix = GetAndroidData(); + const char* prefix = "/data/misc/trace"; #else const char* prefix = "/tmp"; #endif @@ -187,7 +188,8 @@ JitCompiler::JitCompiler() : total_time_(0) { std::string perf_filename = std::string(prefix) + "/perf-" + std::to_string(getpid()) + ".map"; perf_file_.reset(OS::CreateEmptyFileWriteOnly(perf_filename.c_str())); if (perf_file_ == nullptr) { - LOG(FATAL) << "Could not create perf file at " << perf_filename; + LOG(ERROR) << "Could not create perf file at " << perf_filename << + " Are you on a user build? Perf only works on userdebug/eng builds"; } } } @@ -222,7 +224,7 @@ bool JitCompiler::CompileMethod(Thread* self, ArtMethod* method) { ArtMethod* method_to_compile = method->GetInterfaceMethodIfProxy(sizeof(void*)); JitCodeCache* const code_cache = runtime->GetJit()->GetCodeCache(); success = compiler_driver_->GetCompiler()->JitCompile(self, code_cache, method_to_compile); - if (success && compiler_options_->GetGenerateDebugInfo()) { + if (success && perf_file_ != nullptr) { const void* ptr = method_to_compile->GetEntryPointFromQuickCompiledCode(); std::ostringstream stream; stream << std::hex diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc index c0d15f3439..cff2f471bf 100644 --- a/compiler/oat_test.cc +++ b/compiler/oat_test.cc @@ -126,7 +126,8 @@ class OatTest : public CommonCompilerTest { bool WriteElf(File* file, const std::vector<const DexFile*>& dex_files, - SafeMap<std::string, std::string>& key_value_store) { + SafeMap<std::string, std::string>& key_value_store, + bool verify) { TimingLogger timings("WriteElf", false, false); OatWriter oat_writer(/*compiling_boot_image*/false, &timings); for (const DexFile* dex_file : dex_files) { @@ -139,12 +140,13 @@ class OatTest : public CommonCompilerTest { return false; } } - return DoWriteElf(file, oat_writer, key_value_store); + return DoWriteElf(file, oat_writer, key_value_store, verify); } bool WriteElf(File* file, const std::vector<const char*>& dex_filenames, - SafeMap<std::string, std::string>& key_value_store) { + SafeMap<std::string, std::string>& key_value_store, + bool verify) { TimingLogger timings("WriteElf", false, false); OatWriter oat_writer(/*compiling_boot_image*/false, &timings); for (const char* dex_filename : dex_filenames) { @@ -152,24 +154,26 @@ class OatTest : public CommonCompilerTest { return false; } } - return DoWriteElf(file, oat_writer, key_value_store); + return DoWriteElf(file, oat_writer, key_value_store, verify); } bool WriteElf(File* file, ScopedFd&& zip_fd, const char* location, - SafeMap<std::string, std::string>& key_value_store) { + SafeMap<std::string, std::string>& key_value_store, + bool verify) { TimingLogger timings("WriteElf", false, false); OatWriter oat_writer(/*compiling_boot_image*/false, &timings); if (!oat_writer.AddZippedDexFilesSource(std::move(zip_fd), location)) { return false; } - return DoWriteElf(file, oat_writer, key_value_store); + return DoWriteElf(file, oat_writer, key_value_store, verify); } bool DoWriteElf(File* file, OatWriter& oat_writer, - SafeMap<std::string, std::string>& key_value_store) { + SafeMap<std::string, std::string>& key_value_store, + bool verify) { std::unique_ptr<ElfWriter> elf_writer = CreateElfWriterQuick( compiler_driver_->GetInstructionSet(), &compiler_driver_->GetCompilerOptions(), @@ -183,6 +187,7 @@ class OatTest : public CommonCompilerTest { compiler_driver_->GetInstructionSet(), compiler_driver_->GetInstructionSetFeatures(), &key_value_store, + verify, &opened_dex_files_map, &opened_dex_files)) { return false; @@ -219,6 +224,9 @@ class OatTest : public CommonCompilerTest { return elf_writer->End(); } + void TestDexFileInput(bool verify); + void TestZipFileInput(bool verify); + std::unique_ptr<const InstructionSetFeatures> insn_features_; std::unique_ptr<QuickCompilerCallbacks> callbacks_; }; @@ -354,7 +362,7 @@ TEST_F(OatTest, WriteRead) { ScratchFile tmp; SafeMap<std::string, std::string> key_value_store; key_value_store.Put(OatHeader::kImageLocationKey, "lue.art"); - bool success = WriteElf(tmp.GetFile(), class_linker->GetBootClassPath(), key_value_store); + bool success = WriteElf(tmp.GetFile(), class_linker->GetBootClassPath(), key_value_store, false); ASSERT_TRUE(success); if (kCompile) { // OatWriter strips the code, regenerate to compare @@ -480,7 +488,7 @@ TEST_F(OatTest, EmptyTextSection) { ScratchFile tmp; SafeMap<std::string, std::string> key_value_store; key_value_store.Put(OatHeader::kImageLocationKey, "test.art"); - bool success = WriteElf(tmp.GetFile(), dex_files, key_value_store); + bool success = WriteElf(tmp.GetFile(), dex_files, key_value_store, false); ASSERT_TRUE(success); std::unique_ptr<OatFile> oat_file(OatFile::Open(tmp.GetFilename(), @@ -494,7 +502,15 @@ TEST_F(OatTest, EmptyTextSection) { EXPECT_LT(static_cast<size_t>(oat_file->Size()), static_cast<size_t>(tmp.GetFile()->GetLength())); } -TEST_F(OatTest, DexFileInput) { +static void MaybeModifyDexFileToFail(bool verify, std::unique_ptr<const DexFile>& data) { + // If in verify mode (= fail the verifier mode), make sure we fail early. We'll fail already + // because of the missing map, but that may lead to out of bounds reads. + if (verify) { + const_cast<DexFile::Header*>(&data->GetHeader())->checksum_++; + } +} + +void OatTest::TestDexFileInput(bool verify) { TimingLogger timings("OatTest::DexFileInput", false, false); std::vector<const char*> input_filenames; @@ -504,6 +520,9 @@ TEST_F(OatTest, DexFileInput) { builder1.AddField("Lsome.TestClass;", "int", "someField"); builder1.AddMethod("Lsome.TestClass;", "()I", "foo"); std::unique_ptr<const DexFile> dex_file1_data = builder1.Build(dex_file1.GetFilename()); + + MaybeModifyDexFileToFail(verify, dex_file1_data); + bool success = dex_file1.GetFile()->WriteFully(&dex_file1_data->GetHeader(), dex_file1_data->GetHeader().file_size_); ASSERT_TRUE(success); @@ -516,6 +535,9 @@ TEST_F(OatTest, DexFileInput) { builder2.AddField("Land.AnotherTestClass;", "boolean", "someOtherField"); builder2.AddMethod("Land.AnotherTestClass;", "()J", "bar"); std::unique_ptr<const DexFile> dex_file2_data = builder2.Build(dex_file2.GetFilename()); + + MaybeModifyDexFileToFail(verify, dex_file2_data); + success = dex_file2.GetFile()->WriteFully(&dex_file2_data->GetHeader(), dex_file2_data->GetHeader().file_size_); ASSERT_TRUE(success); @@ -526,7 +548,14 @@ TEST_F(OatTest, DexFileInput) { ScratchFile oat_file; SafeMap<std::string, std::string> key_value_store; key_value_store.Put(OatHeader::kImageLocationKey, "test.art"); - success = WriteElf(oat_file.GetFile(), input_filenames, key_value_store); + success = WriteElf(oat_file.GetFile(), input_filenames, key_value_store, verify); + + // In verify mode, we expect failure. + if (verify) { + ASSERT_FALSE(success); + return; + } + ASSERT_TRUE(success); std::string error_msg; @@ -557,7 +586,15 @@ TEST_F(OatTest, DexFileInput) { ASSERT_EQ(dex_file2_data->GetLocation(), opened_dex_file2->GetLocation()); } -TEST_F(OatTest, ZipFileInput) { +TEST_F(OatTest, DexFileInputCheckOutput) { + TestDexFileInput(false); +} + +TEST_F(OatTest, DexFileInputCheckVerifier) { + TestDexFileInput(true); +} + +void OatTest::TestZipFileInput(bool verify) { TimingLogger timings("OatTest::DexFileInput", false, false); ScratchFile zip_file; @@ -568,6 +605,9 @@ TEST_F(OatTest, ZipFileInput) { builder1.AddField("Lsome.TestClass;", "long", "someField"); builder1.AddMethod("Lsome.TestClass;", "()D", "foo"); std::unique_ptr<const DexFile> dex_file1_data = builder1.Build(dex_file1.GetFilename()); + + MaybeModifyDexFileToFail(verify, dex_file1_data); + bool success = dex_file1.GetFile()->WriteFully(&dex_file1_data->GetHeader(), dex_file1_data->GetHeader().file_size_); ASSERT_TRUE(success); @@ -583,6 +623,9 @@ TEST_F(OatTest, ZipFileInput) { builder2.AddField("Land.AnotherTestClass;", "boolean", "someOtherField"); builder2.AddMethod("Land.AnotherTestClass;", "()J", "bar"); std::unique_ptr<const DexFile> dex_file2_data = builder2.Build(dex_file2.GetFilename()); + + MaybeModifyDexFileToFail(verify, dex_file2_data); + success = dex_file2.GetFile()->WriteFully(&dex_file2_data->GetHeader(), dex_file2_data->GetHeader().file_size_); ASSERT_TRUE(success); @@ -603,37 +646,42 @@ TEST_F(OatTest, ZipFileInput) { std::vector<const char*> input_filenames { zip_file.GetFilename().c_str() }; // NOLINT [readability/braces] [4] ScratchFile oat_file; - success = WriteElf(oat_file.GetFile(), input_filenames, key_value_store); - ASSERT_TRUE(success); + success = WriteElf(oat_file.GetFile(), input_filenames, key_value_store, verify); - std::string error_msg; - std::unique_ptr<OatFile> opened_oat_file(OatFile::Open(oat_file.GetFilename(), - oat_file.GetFilename(), - nullptr, - nullptr, - false, - nullptr, - &error_msg)); - ASSERT_TRUE(opened_oat_file != nullptr); - ASSERT_EQ(2u, opened_oat_file->GetOatDexFiles().size()); - std::unique_ptr<const DexFile> opened_dex_file1 = - opened_oat_file->GetOatDexFiles()[0]->OpenDexFile(&error_msg); - std::unique_ptr<const DexFile> opened_dex_file2 = - opened_oat_file->GetOatDexFiles()[1]->OpenDexFile(&error_msg); - - ASSERT_EQ(dex_file1_data->GetHeader().file_size_, opened_dex_file1->GetHeader().file_size_); - ASSERT_EQ(0, memcmp(&dex_file1_data->GetHeader(), - &opened_dex_file1->GetHeader(), - dex_file1_data->GetHeader().file_size_)); - ASSERT_EQ(DexFile::GetMultiDexLocation(0, zip_file.GetFilename().c_str()), - opened_dex_file1->GetLocation()); - - ASSERT_EQ(dex_file2_data->GetHeader().file_size_, opened_dex_file2->GetHeader().file_size_); - ASSERT_EQ(0, memcmp(&dex_file2_data->GetHeader(), - &opened_dex_file2->GetHeader(), - dex_file2_data->GetHeader().file_size_)); - ASSERT_EQ(DexFile::GetMultiDexLocation(1, zip_file.GetFilename().c_str()), - opened_dex_file2->GetLocation()); + if (verify) { + ASSERT_FALSE(success); + } else { + ASSERT_TRUE(success); + + std::string error_msg; + std::unique_ptr<OatFile> opened_oat_file(OatFile::Open(oat_file.GetFilename(), + oat_file.GetFilename(), + nullptr, + nullptr, + false, + nullptr, + &error_msg)); + ASSERT_TRUE(opened_oat_file != nullptr); + ASSERT_EQ(2u, opened_oat_file->GetOatDexFiles().size()); + std::unique_ptr<const DexFile> opened_dex_file1 = + opened_oat_file->GetOatDexFiles()[0]->OpenDexFile(&error_msg); + std::unique_ptr<const DexFile> opened_dex_file2 = + opened_oat_file->GetOatDexFiles()[1]->OpenDexFile(&error_msg); + + ASSERT_EQ(dex_file1_data->GetHeader().file_size_, opened_dex_file1->GetHeader().file_size_); + ASSERT_EQ(0, memcmp(&dex_file1_data->GetHeader(), + &opened_dex_file1->GetHeader(), + dex_file1_data->GetHeader().file_size_)); + ASSERT_EQ(DexFile::GetMultiDexLocation(0, zip_file.GetFilename().c_str()), + opened_dex_file1->GetLocation()); + + ASSERT_EQ(dex_file2_data->GetHeader().file_size_, opened_dex_file2->GetHeader().file_size_); + ASSERT_EQ(0, memcmp(&dex_file2_data->GetHeader(), + &opened_dex_file2->GetHeader(), + dex_file2_data->GetHeader().file_size_)); + ASSERT_EQ(DexFile::GetMultiDexLocation(1, zip_file.GetFilename().c_str()), + opened_dex_file2->GetLocation()); + } } { @@ -645,38 +693,51 @@ TEST_F(OatTest, ZipFileInput) { success = WriteElf(oat_file.GetFile(), std::move(zip_fd), zip_file.GetFilename().c_str(), - key_value_store); - ASSERT_TRUE(success); - - std::string error_msg; - std::unique_ptr<OatFile> opened_oat_file(OatFile::Open(oat_file.GetFilename(), - oat_file.GetFilename(), - nullptr, - nullptr, - false, - nullptr, - &error_msg)); - ASSERT_TRUE(opened_oat_file != nullptr); - ASSERT_EQ(2u, opened_oat_file->GetOatDexFiles().size()); - std::unique_ptr<const DexFile> opened_dex_file1 = - opened_oat_file->GetOatDexFiles()[0]->OpenDexFile(&error_msg); - std::unique_ptr<const DexFile> opened_dex_file2 = - opened_oat_file->GetOatDexFiles()[1]->OpenDexFile(&error_msg); - - ASSERT_EQ(dex_file1_data->GetHeader().file_size_, opened_dex_file1->GetHeader().file_size_); - ASSERT_EQ(0, memcmp(&dex_file1_data->GetHeader(), - &opened_dex_file1->GetHeader(), - dex_file1_data->GetHeader().file_size_)); - ASSERT_EQ(DexFile::GetMultiDexLocation(0, zip_file.GetFilename().c_str()), - opened_dex_file1->GetLocation()); - - ASSERT_EQ(dex_file2_data->GetHeader().file_size_, opened_dex_file2->GetHeader().file_size_); - ASSERT_EQ(0, memcmp(&dex_file2_data->GetHeader(), - &opened_dex_file2->GetHeader(), - dex_file2_data->GetHeader().file_size_)); - ASSERT_EQ(DexFile::GetMultiDexLocation(1, zip_file.GetFilename().c_str()), - opened_dex_file2->GetLocation()); + key_value_store, + verify); + if (verify) { + ASSERT_FALSE(success); + } else { + ASSERT_TRUE(success); + + std::string error_msg; + std::unique_ptr<OatFile> opened_oat_file(OatFile::Open(oat_file.GetFilename(), + oat_file.GetFilename(), + nullptr, + nullptr, + false, + nullptr, + &error_msg)); + ASSERT_TRUE(opened_oat_file != nullptr); + ASSERT_EQ(2u, opened_oat_file->GetOatDexFiles().size()); + std::unique_ptr<const DexFile> opened_dex_file1 = + opened_oat_file->GetOatDexFiles()[0]->OpenDexFile(&error_msg); + std::unique_ptr<const DexFile> opened_dex_file2 = + opened_oat_file->GetOatDexFiles()[1]->OpenDexFile(&error_msg); + + ASSERT_EQ(dex_file1_data->GetHeader().file_size_, opened_dex_file1->GetHeader().file_size_); + ASSERT_EQ(0, memcmp(&dex_file1_data->GetHeader(), + &opened_dex_file1->GetHeader(), + dex_file1_data->GetHeader().file_size_)); + ASSERT_EQ(DexFile::GetMultiDexLocation(0, zip_file.GetFilename().c_str()), + opened_dex_file1->GetLocation()); + + ASSERT_EQ(dex_file2_data->GetHeader().file_size_, opened_dex_file2->GetHeader().file_size_); + ASSERT_EQ(0, memcmp(&dex_file2_data->GetHeader(), + &opened_dex_file2->GetHeader(), + dex_file2_data->GetHeader().file_size_)); + ASSERT_EQ(DexFile::GetMultiDexLocation(1, zip_file.GetFilename().c_str()), + opened_dex_file2->GetLocation()); + } } } +TEST_F(OatTest, ZipFileInputCheckOutput) { + TestZipFileInput(false); +} + +TEST_F(OatTest, ZipFileInputCheckVerifier) { + TestZipFileInput(true); +} + } // namespace art diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc index 569e0f4e19..90ac499ba2 100644 --- a/compiler/oat_writer.cc +++ b/compiler/oat_writer.cc @@ -397,6 +397,7 @@ bool OatWriter::WriteAndOpenDexFiles( InstructionSet instruction_set, const InstructionSetFeatures* instruction_set_features, SafeMap<std::string, std::string>* key_value_store, + bool verify, /*out*/ std::unique_ptr<MemMap>* opened_dex_files_map, /*out*/ std::vector<std::unique_ptr<const DexFile>>* opened_dex_files) { CHECK(write_state_ == WriteState::kAddingDexFileSources); @@ -424,7 +425,7 @@ bool OatWriter::WriteAndOpenDexFiles( } if (!WriteOatDexFiles(rodata) || !ExtendForTypeLookupTables(rodata, file, size_after_type_lookup_tables) || - !OpenDexFiles(file, &dex_files_map, &dex_files) || + !OpenDexFiles(file, verify, &dex_files_map, &dex_files) || !WriteTypeLookupTables(dex_files_map.get(), dex_files)) { return false; } @@ -2143,6 +2144,7 @@ bool OatWriter::ExtendForTypeLookupTables(OutputStream* rodata, File* file, size bool OatWriter::OpenDexFiles( File* file, + bool verify, /*out*/ std::unique_ptr<MemMap>* opened_dex_files_map, /*out*/ std::vector<std::unique_ptr<const DexFile>>* opened_dex_files) { TimingLogger::ScopedTiming split("OpenDexFiles", timings_); @@ -2201,9 +2203,11 @@ bool OatWriter::OpenDexFiles( oat_dex_file.GetLocation(), oat_dex_file.dex_file_location_checksum_, /* oat_dex_file */ nullptr, + verify, &error_msg)); if (dex_files.back() == nullptr) { - LOG(ERROR) << "Failed to open dex file from oat file. File:" << oat_dex_file.GetLocation(); + LOG(ERROR) << "Failed to open dex file from oat file. File: " << oat_dex_file.GetLocation() + << " Error: " << error_msg; return false; } } diff --git a/compiler/oat_writer.h b/compiler/oat_writer.h index d681998774..14c6d5054a 100644 --- a/compiler/oat_writer.h +++ b/compiler/oat_writer.h @@ -139,12 +139,15 @@ class OatWriter { CreateTypeLookupTable create_type_lookup_table = CreateTypeLookupTable::kDefault); dchecked_vector<const char*> GetSourceLocations() const; - // Write raw dex files to the .rodata section and open them from the oat file. + // Write raw dex files to the .rodata section and open them from the oat file. The verify + // setting dictates whether the dex file verifier should check the dex files. This is generally + // the case, and should only be false for tests. bool WriteAndOpenDexFiles(OutputStream* rodata, File* file, InstructionSet instruction_set, const InstructionSetFeatures* instruction_set_features, SafeMap<std::string, std::string>* key_value_store, + bool verify, /*out*/ std::unique_ptr<MemMap>* opened_dex_files_map, /*out*/ std::vector<std::unique_ptr<const DexFile>>* opened_dex_files); // Prepare layout of remaining data. @@ -258,6 +261,7 @@ class OatWriter { bool WriteOatDexFiles(OutputStream* rodata); bool ExtendForTypeLookupTables(OutputStream* rodata, File* file, size_t offset); bool OpenDexFiles(File* file, + bool verify, /*out*/ std::unique_ptr<MemMap>* opened_dex_files_map, /*out*/ std::vector<std::unique_ptr<const DexFile>>* opened_dex_files); bool WriteTypeLookupTables(MemMap* opened_dex_files_map, diff --git a/compiler/optimizing/boolean_simplifier.cc b/compiler/optimizing/boolean_simplifier.cc deleted file mode 100644 index f0cafc847f..0000000000 --- a/compiler/optimizing/boolean_simplifier.cc +++ /dev/null @@ -1,136 +0,0 @@ -/* - * Copyright (C) 2015 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "boolean_simplifier.h" - -namespace art { - -void HBooleanSimplifier::TryRemovingNegatedCondition(HBasicBlock* block) { - DCHECK(block->EndsWithIf()); - - // Check if the condition is a Boolean negation. - HIf* if_instruction = block->GetLastInstruction()->AsIf(); - HInstruction* boolean_not = if_instruction->InputAt(0); - if (!boolean_not->IsBooleanNot()) { - return; - } - - // Make BooleanNot's input the condition of the If and swap branches. - if_instruction->ReplaceInput(boolean_not->InputAt(0), 0); - block->SwapSuccessors(); - - // Remove the BooleanNot if it is now unused. - if (!boolean_not->HasUses()) { - boolean_not->GetBlock()->RemoveInstruction(boolean_not); - } -} - -// Returns true if 'block1' and 'block2' are empty, merge into the same single -// successor and the successor can only be reached from them. -static bool BlocksDoMergeTogether(HBasicBlock* block1, HBasicBlock* block2) { - if (!block1->IsSingleGoto() || !block2->IsSingleGoto()) return false; - HBasicBlock* succ1 = block1->GetSuccessors()[0]; - HBasicBlock* succ2 = block2->GetSuccessors()[0]; - return succ1 == succ2 && succ1->GetPredecessors().size() == 2u; -} - -// Returns true if the outcome of the branching matches the boolean value of -// the branching condition. -static bool PreservesCondition(HInstruction* input_true, HInstruction* input_false) { - return input_true->IsIntConstant() && input_true->AsIntConstant()->IsOne() - && input_false->IsIntConstant() && input_false->AsIntConstant()->IsZero(); -} - -// Returns true if the outcome of the branching is exactly opposite of the -// boolean value of the branching condition. -static bool NegatesCondition(HInstruction* input_true, HInstruction* input_false) { - return input_true->IsIntConstant() && input_true->AsIntConstant()->IsZero() - && input_false->IsIntConstant() && input_false->AsIntConstant()->IsOne(); -} - -void HBooleanSimplifier::TryRemovingBooleanSelection(HBasicBlock* block) { - DCHECK(block->EndsWithIf()); - - // Find elements of the pattern. - HIf* if_instruction = block->GetLastInstruction()->AsIf(); - HBasicBlock* true_block = if_instruction->IfTrueSuccessor(); - HBasicBlock* false_block = if_instruction->IfFalseSuccessor(); - if (!BlocksDoMergeTogether(true_block, false_block)) { - return; - } - HBasicBlock* merge_block = true_block->GetSuccessors()[0]; - if (!merge_block->HasSinglePhi()) { - return; - } - HPhi* phi = merge_block->GetFirstPhi()->AsPhi(); - HInstruction* true_value = phi->InputAt(merge_block->GetPredecessorIndexOf(true_block)); - HInstruction* false_value = phi->InputAt(merge_block->GetPredecessorIndexOf(false_block)); - - // Check if the selection negates/preserves the value of the condition and - // if so, generate a suitable replacement instruction. - HInstruction* if_condition = if_instruction->InputAt(0); - - // Don't change FP compares. The definition of compares involving NaNs forces - // the compares to be done as written by the user. - if (if_condition->IsCondition() && - Primitive::IsFloatingPointType(if_condition->InputAt(0)->GetType())) { - return; - } - - HInstruction* replacement; - if (NegatesCondition(true_value, false_value)) { - replacement = graph_->InsertOppositeCondition(if_condition, if_instruction); - } else if (PreservesCondition(true_value, false_value)) { - replacement = if_condition; - } else { - return; - } - - // Replace the selection outcome with the new instruction. - phi->ReplaceWith(replacement); - merge_block->RemovePhi(phi); - - // Delete the true branch and merge the resulting chain of blocks - // 'block->false_block->merge_block' into one. - true_block->DisconnectAndDelete(); - block->MergeWith(false_block); - block->MergeWith(merge_block); - - // No need to update any dominance information, as we are simplifying - // a simple diamond shape, where the join block is merged with the - // entry block. Any following blocks would have had the join block - // as a dominator, and `MergeWith` handles changing that to the - // entry block. -} - -void HBooleanSimplifier::Run() { - // Iterate in post order in the unlikely case that removing one occurrence of - // the selection pattern empties a branch block of another occurrence. - // Otherwise the order does not matter. - for (HPostOrderIterator it(*graph_); !it.Done(); it.Advance()) { - HBasicBlock* block = it.Current(); - if (!block->EndsWithIf()) continue; - - // If condition is negated, remove the negation and swap the branches. - TryRemovingNegatedCondition(block); - - // If this is a boolean-selection diamond pattern, replace its result with - // the condition value (or its negation) and simplify the graph. - TryRemovingBooleanSelection(block); - } -} - -} // namespace art diff --git a/compiler/optimizing/boolean_simplifier.h b/compiler/optimizing/boolean_simplifier.h deleted file mode 100644 index e12a12c95b..0000000000 --- a/compiler/optimizing/boolean_simplifier.h +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Copyright (C) 2015 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// This optimization recognizes two common patterns: -// (a) Boolean selection: Casting a boolean to an integer or negating it is -// carried out with an If statement selecting from zero/one integer -// constants. Because Boolean values are represented as zero/one, the -// pattern can be replaced with the condition instruction itself or its -// negation, depending on the layout. -// (b) Negated condition: Instruction simplifier may replace an If's condition -// with a boolean value. If this value is the result of a Boolean negation, -// the true/false branches can be swapped and negation removed. - -// Example: Negating a boolean value -// B1: -// z1 ParameterValue -// i2 IntConstant 0 -// i3 IntConstant 1 -// v4 Goto B2 -// B2: -// z5 NotEquals [ z1 i2 ] -// v6 If [ z5 ] then B3 else B4 -// B3: -// v7 Goto B5 -// B4: -// v8 Goto B5 -// B5: -// i9 Phi [ i3 i2 ] -// v10 Return [ i9 ] -// turns into -// B1: -// z1 ParameterValue -// i2 IntConstant 0 -// v4 Goto B2 -// B2: -// z11 Equals [ z1 i2 ] -// v10 Return [ z11 ] -// B3, B4, B5: removed - -// Note: in order to recognize empty blocks, this optimization must be run -// after the instruction simplifier has removed redundant suspend checks. - -#ifndef ART_COMPILER_OPTIMIZING_BOOLEAN_SIMPLIFIER_H_ -#define ART_COMPILER_OPTIMIZING_BOOLEAN_SIMPLIFIER_H_ - -#include "optimization.h" - -namespace art { - -class HBooleanSimplifier : public HOptimization { - public: - explicit HBooleanSimplifier(HGraph* graph) - : HOptimization(graph, kBooleanSimplifierPassName) {} - - void Run() OVERRIDE; - - static constexpr const char* kBooleanSimplifierPassName = "boolean_simplifier"; - - private: - void TryRemovingNegatedCondition(HBasicBlock* block); - void TryRemovingBooleanSelection(HBasicBlock* block); - - DISALLOW_COPY_AND_ASSIGN(HBooleanSimplifier); -}; - -} // namespace art - -#endif // ART_COMPILER_OPTIMIZING_BOOLEAN_SIMPLIFIER_H_ diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index 5f00f0a536..c2d9edd43e 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -1285,11 +1285,9 @@ void CodeGeneratorARM::MoveConstant(Location location, int32_t value) { } void CodeGeneratorARM::MoveLocation(Location dst, Location src, Primitive::Type dst_type) { - if (Primitive::Is64BitType(dst_type)) { - Move64(dst, src); - } else { - Move32(dst, src); - } + HParallelMove move(GetGraph()->GetArena()); + move.AddMove(src, dst, dst_type, nullptr); + GetMoveResolver()->EmitNativeCode(&move); } void CodeGeneratorARM::AddLocationAsTemp(Location location, LocationSummary* locations) { @@ -1612,6 +1610,32 @@ void InstructionCodeGeneratorARM::VisitDeoptimize(HDeoptimize* deoptimize) { /* false_target */ nullptr); } +void LocationsBuilderARM::VisitSelect(HSelect* select) { + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(select); + if (Primitive::IsFloatingPointType(select->GetType())) { + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + } else { + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + } + if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) { + locations->SetInAt(2, Location::RequiresRegister()); + } + locations->SetOut(Location::SameAsFirstInput()); +} + +void InstructionCodeGeneratorARM::VisitSelect(HSelect* select) { + LocationSummary* locations = select->GetLocations(); + Label false_target; + GenerateTestAndBranch(select, + /* condition_input_index */ 2, + /* true_target */ nullptr, + &false_target); + codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType()); + __ Bind(&false_target); +} + void LocationsBuilderARM::VisitNativeDebugInfo(HNativeDebugInfo* info) { new (GetGraph()->GetArena()) LocationSummary(info); } @@ -1632,7 +1656,7 @@ void LocationsBuilderARM::HandleCondition(HCondition* cond) { case Primitive::kPrimLong: locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(cond->InputAt(1))); - if (cond->NeedsMaterialization()) { + if (!cond->IsEmittedAtUseSite()) { locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); } break; @@ -1641,7 +1665,7 @@ void LocationsBuilderARM::HandleCondition(HCondition* cond) { case Primitive::kPrimDouble: locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetInAt(1, Location::RequiresFpuRegister()); - if (cond->NeedsMaterialization()) { + if (!cond->IsEmittedAtUseSite()) { locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); } break; @@ -1649,14 +1673,14 @@ void LocationsBuilderARM::HandleCondition(HCondition* cond) { default: locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(cond->InputAt(1))); - if (cond->NeedsMaterialization()) { + if (!cond->IsEmittedAtUseSite()) { locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); } } } void InstructionCodeGeneratorARM::HandleCondition(HCondition* cond) { - if (!cond->NeedsMaterialization()) { + if (cond->IsEmittedAtUseSite()) { return; } @@ -4973,6 +4997,8 @@ void ParallelMoveResolverARM::EmitMove(size_t index) { if (source.IsRegister()) { if (destination.IsRegister()) { __ Mov(destination.AsRegister<Register>(), source.AsRegister<Register>()); + } else if (destination.IsFpuRegister()) { + __ vmovsr(destination.AsFpuRegister<SRegister>(), source.AsRegister<Register>()); } else { DCHECK(destination.IsStackSlot()); __ StoreToOffset(kStoreWord, source.AsRegister<Register>(), @@ -4990,7 +5016,9 @@ void ParallelMoveResolverARM::EmitMove(size_t index) { __ StoreToOffset(kStoreWord, IP, SP, destination.GetStackIndex()); } } else if (source.IsFpuRegister()) { - if (destination.IsFpuRegister()) { + if (destination.IsRegister()) { + __ vmovrs(destination.AsRegister<Register>(), source.AsFpuRegister<SRegister>()); + } else if (destination.IsFpuRegister()) { __ vmovs(destination.AsFpuRegister<SRegister>(), source.AsFpuRegister<SRegister>()); } else { DCHECK(destination.IsStackSlot()); @@ -5014,6 +5042,10 @@ void ParallelMoveResolverARM::EmitMove(size_t index) { if (destination.IsRegisterPair()) { __ Mov(destination.AsRegisterPairLow<Register>(), source.AsRegisterPairLow<Register>()); __ Mov(destination.AsRegisterPairHigh<Register>(), source.AsRegisterPairHigh<Register>()); + } else if (destination.IsFpuRegisterPair()) { + __ vmovdrr(FromLowSToD(destination.AsFpuRegisterPairLow<SRegister>()), + source.AsRegisterPairLow<Register>(), + source.AsRegisterPairHigh<Register>()); } else { DCHECK(destination.IsDoubleStackSlot()) << destination; DCHECK(ExpectedPairLayout(source)); @@ -5021,7 +5053,11 @@ void ParallelMoveResolverARM::EmitMove(size_t index) { kStoreWordPair, source.AsRegisterPairLow<Register>(), SP, destination.GetStackIndex()); } } else if (source.IsFpuRegisterPair()) { - if (destination.IsFpuRegisterPair()) { + if (destination.IsRegisterPair()) { + __ vmovrrd(destination.AsRegisterPairLow<Register>(), + destination.AsRegisterPairHigh<Register>(), + FromLowSToD(source.AsFpuRegisterPairLow<SRegister>())); + } else if (destination.IsFpuRegisterPair()) { __ vmovd(FromLowSToD(destination.AsFpuRegisterPairLow<SRegister>()), FromLowSToD(source.AsFpuRegisterPairLow<SRegister>())); } else { @@ -6599,6 +6635,29 @@ void CodeGeneratorARM::MoveFromReturnRegister(Location trg, Primitive::Type type } } +void LocationsBuilderARM::VisitClassTableGet(HClassTableGet* instruction) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister()); +} + +void InstructionCodeGeneratorARM::VisitClassTableGet(HClassTableGet* instruction) { + LocationSummary* locations = instruction->GetLocations(); + uint32_t method_offset = 0; + if (instruction->GetTableKind() == HClassTableGet::kVTable) { + method_offset = mirror::Class::EmbeddedVTableEntryOffset( + instruction->GetIndex(), kArmPointerSize).SizeValue(); + } else { + method_offset = mirror::Class::EmbeddedImTableEntryOffset( + instruction->GetIndex() % mirror::Class::kImtSize, kArmPointerSize).Uint32Value(); + } + __ LoadFromOffset(kLoadWord, + locations->Out().AsRegister<Register>(), + locations->InAt(0).AsRegister<Register>(), + method_offset); +} + #undef __ #undef QUICK_ENTRY_POINT diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index c0e3959933..a59024e139 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -2510,13 +2510,13 @@ void LocationsBuilderARM64::HandleCondition(HCondition* instruction) { locations->SetInAt(1, ARM64EncodableConstantOrRegister(instruction->InputAt(1), instruction)); } - if (instruction->NeedsMaterialization()) { + if (!instruction->IsEmittedAtUseSite()) { locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); } } void InstructionCodeGeneratorARM64::HandleCondition(HCondition* instruction) { - if (!instruction->NeedsMaterialization()) { + if (instruction->IsEmittedAtUseSite()) { return; } @@ -3004,6 +3004,32 @@ void InstructionCodeGeneratorARM64::VisitDeoptimize(HDeoptimize* deoptimize) { /* false_target */ nullptr); } +void LocationsBuilderARM64::VisitSelect(HSelect* select) { + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(select); + if (Primitive::IsFloatingPointType(select->GetType())) { + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + } else { + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + } + if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) { + locations->SetInAt(2, Location::RequiresRegister()); + } + locations->SetOut(Location::SameAsFirstInput()); +} + +void InstructionCodeGeneratorARM64::VisitSelect(HSelect* select) { + LocationSummary* locations = select->GetLocations(); + vixl::Label false_target; + GenerateTestAndBranch(select, + /* condition_input_index */ 2, + /* true_target */ nullptr, + &false_target); + codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType()); + __ Bind(&false_target); +} + void LocationsBuilderARM64::VisitNativeDebugInfo(HNativeDebugInfo* info) { new (GetGraph()->GetArena()) LocationSummary(info); } @@ -4983,6 +5009,29 @@ void CodeGeneratorARM64::GenerateReadBarrierForRootSlow(HInstruction* instructio __ Bind(slow_path->GetExitLabel()); } +void LocationsBuilderARM64::VisitClassTableGet(HClassTableGet* instruction) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister()); +} + +void InstructionCodeGeneratorARM64::VisitClassTableGet(HClassTableGet* instruction) { + LocationSummary* locations = instruction->GetLocations(); + uint32_t method_offset = 0; + if (instruction->GetTableKind() == HClassTableGet::kVTable) { + method_offset = mirror::Class::EmbeddedVTableEntryOffset( + instruction->GetIndex(), kArm64PointerSize).SizeValue(); + } else { + method_offset = mirror::Class::EmbeddedImTableEntryOffset( + instruction->GetIndex() % mirror::Class::kImtSize, kArm64PointerSize).Uint32Value(); + } + __ Ldr(XRegisterFrom(locations->Out()), + MemOperand(XRegisterFrom(locations->InAt(0)), method_offset)); +} + + + #undef __ #undef QUICK_ENTRY_POINT diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc index 5bd136a3f0..85ffd66ce8 100644 --- a/compiler/optimizing/code_generator_mips.cc +++ b/compiler/optimizing/code_generator_mips.cc @@ -614,6 +614,31 @@ void ParallelMoveResolverMIPS::EmitSwap(size_t index) { Exchange(loc1.GetStackIndex(), loc2.GetStackIndex(), /* double_slot */ false); } else if (loc1.IsDoubleStackSlot() && loc2.IsDoubleStackSlot()) { Exchange(loc1.GetStackIndex(), loc2.GetStackIndex(), /* double_slot */ true); + } else if ((loc1.IsRegister() && loc2.IsStackSlot()) || + (loc1.IsStackSlot() && loc2.IsRegister())) { + Register reg = loc1.IsRegister() ? loc1.AsRegister<Register>() + : loc2.AsRegister<Register>(); + intptr_t offset = loc1.IsStackSlot() ? loc1.GetStackIndex() + : loc2.GetStackIndex(); + __ Move(TMP, reg); + __ LoadFromOffset(kLoadWord, reg, SP, offset); + __ StoreToOffset(kStoreWord, TMP, SP, offset); + } else if ((loc1.IsRegisterPair() && loc2.IsDoubleStackSlot()) || + (loc1.IsDoubleStackSlot() && loc2.IsRegisterPair())) { + Register reg_l = loc1.IsRegisterPair() ? loc1.AsRegisterPairLow<Register>() + : loc2.AsRegisterPairLow<Register>(); + Register reg_h = loc1.IsRegisterPair() ? loc1.AsRegisterPairHigh<Register>() + : loc2.AsRegisterPairHigh<Register>(); + intptr_t offset_l = loc1.IsDoubleStackSlot() ? loc1.GetStackIndex() + : loc2.GetStackIndex(); + intptr_t offset_h = loc1.IsDoubleStackSlot() ? loc1.GetHighStackIndex(kMipsWordSize) + : loc2.GetHighStackIndex(kMipsWordSize); + __ Move(TMP, reg_l); + __ Move(AT, reg_h); + __ LoadFromOffset(kLoadWord, reg_l, SP, offset_l); + __ LoadFromOffset(kLoadWord, reg_h, SP, offset_h); + __ StoreToOffset(kStoreWord, TMP, SP, offset_l); + __ StoreToOffset(kStoreWord, AT, SP, offset_h); } else { LOG(FATAL) << "Swap between " << loc1 << " and " << loc2 << " is unsupported"; } @@ -2245,13 +2270,13 @@ void LocationsBuilderMIPS::HandleCondition(HCondition* instruction) { locations->SetInAt(1, Location::RequiresFpuRegister()); break; } - if (instruction->NeedsMaterialization()) { + if (!instruction->IsEmittedAtUseSite()) { locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); } } void InstructionCodeGeneratorMIPS::HandleCondition(HCondition* instruction) { - if (!instruction->NeedsMaterialization()) { + if (instruction->IsEmittedAtUseSite()) { return; } @@ -3381,6 +3406,32 @@ void InstructionCodeGeneratorMIPS::VisitDeoptimize(HDeoptimize* deoptimize) { /* false_target */ nullptr); } +void LocationsBuilderMIPS::VisitSelect(HSelect* select) { + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(select); + if (Primitive::IsFloatingPointType(select->GetType())) { + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + } else { + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + } + if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) { + locations->SetInAt(2, Location::RequiresRegister()); + } + locations->SetOut(Location::SameAsFirstInput()); +} + +void InstructionCodeGeneratorMIPS::VisitSelect(HSelect* select) { + LocationSummary* locations = select->GetLocations(); + MipsLabel false_target; + GenerateTestAndBranch(select, + /* condition_input_index */ 2, + /* true_target */ nullptr, + &false_target); + codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType()); + __ Bind(&false_target); +} + void LocationsBuilderMIPS::VisitNativeDebugInfo(HNativeDebugInfo* info) { new (GetGraph()->GetArena()) LocationSummary(info); } @@ -5236,6 +5287,14 @@ void InstructionCodeGeneratorMIPS::VisitInvokeUnresolved(HInvokeUnresolved* invo codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke); } +void LocationsBuilderMIPS::VisitClassTableGet(HClassTableGet*) { + UNIMPLEMENTED(FATAL) << "ClassTableGet is unimplemented on mips"; +} + +void InstructionCodeGeneratorMIPS::VisitClassTableGet(HClassTableGet*) { + UNIMPLEMENTED(FATAL) << "ClassTableGet is unimplemented on mips"; +} + #undef __ #undef QUICK_ENTRY_POINT diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc index e3115f416a..3c928dedde 100644 --- a/compiler/optimizing/code_generator_mips64.cc +++ b/compiler/optimizing/code_generator_mips64.cc @@ -1872,13 +1872,13 @@ void LocationsBuilderMIPS64::HandleCondition(HCondition* instruction) { locations->SetInAt(1, Location::RequiresFpuRegister()); break; } - if (instruction->NeedsMaterialization()) { + if (!instruction->IsEmittedAtUseSite()) { locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); } } void InstructionCodeGeneratorMIPS64::HandleCondition(HCondition* instruction) { - if (!instruction->NeedsMaterialization()) { + if (instruction->IsEmittedAtUseSite()) { return; } @@ -2748,6 +2748,32 @@ void InstructionCodeGeneratorMIPS64::VisitDeoptimize(HDeoptimize* deoptimize) { /* false_target */ nullptr); } +void LocationsBuilderMIPS64::VisitSelect(HSelect* select) { + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(select); + if (Primitive::IsFloatingPointType(select->GetType())) { + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + } else { + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + } + if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) { + locations->SetInAt(2, Location::RequiresRegister()); + } + locations->SetOut(Location::SameAsFirstInput()); +} + +void InstructionCodeGeneratorMIPS64::VisitSelect(HSelect* select) { + LocationSummary* locations = select->GetLocations(); + Mips64Label false_target; + GenerateTestAndBranch(select, + /* condition_input_index */ 2, + /* true_target */ nullptr, + &false_target); + codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType()); + __ Bind(&false_target); +} + void LocationsBuilderMIPS64::VisitNativeDebugInfo(HNativeDebugInfo* info) { new (GetGraph()->GetArena()) LocationSummary(info); } @@ -4267,5 +4293,14 @@ void InstructionCodeGeneratorMIPS64::VisitPackedSwitch(HPackedSwitch* switch_ins } } +void LocationsBuilderMIPS64::VisitClassTableGet(HClassTableGet*) { + UNIMPLEMENTED(FATAL) << "ClassTableGet is unimplemented on mips64"; +} + +void InstructionCodeGeneratorMIPS64::VisitClassTableGet(HClassTableGet*) { + UNIMPLEMENTED(FATAL) << "ClassTableGet is unimplemented on mips64"; +} + } // namespace mips64 } // namespace art + diff --git a/compiler/optimizing/code_generator_utils.cc b/compiler/optimizing/code_generator_utils.cc index 644a3fb75e..96fe2a17e6 100644 --- a/compiler/optimizing/code_generator_utils.cc +++ b/compiler/optimizing/code_generator_utils.cc @@ -96,7 +96,7 @@ void CalculateMagicAndShiftForDivRem(int64_t divisor, bool is_long, } bool IsBooleanValueOrMaterializedCondition(HInstruction* cond_input) { - return !cond_input->IsCondition() || cond_input->AsCondition()->NeedsMaterialization(); + return !cond_input->IsCondition() || !cond_input->IsEmittedAtUseSite(); } } // namespace art diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 50c4ba23c5..18d70daf47 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -1218,11 +1218,14 @@ void CodeGeneratorX86::MoveConstant(Location location, int32_t value) { } void CodeGeneratorX86::MoveLocation(Location dst, Location src, Primitive::Type dst_type) { - if (Primitive::Is64BitType(dst_type)) { - Move64(dst, src); + HParallelMove move(GetGraph()->GetArena()); + if (dst_type == Primitive::kPrimLong && !src.IsConstant() && !src.IsFpuRegister()) { + move.AddMove(src.ToLow(), dst.ToLow(), Primitive::kPrimInt, nullptr); + move.AddMove(src.ToHigh(), dst.ToHigh(), Primitive::kPrimInt, nullptr); } else { - Move32(dst, src); + move.AddMove(src, dst, dst_type, nullptr); } + GetMoveResolver()->EmitNativeCode(&move); } void CodeGeneratorX86::AddLocationAsTemp(Location location, LocationSummary* locations) { @@ -1559,10 +1562,36 @@ void LocationsBuilderX86::VisitDeoptimize(HDeoptimize* deoptimize) { void InstructionCodeGeneratorX86::VisitDeoptimize(HDeoptimize* deoptimize) { SlowPathCode* slow_path = deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathX86>(deoptimize); - GenerateTestAndBranch(deoptimize, - /* condition_input_index */ 0, - slow_path->GetEntryLabel(), - /* false_target */ static_cast<Label*>(nullptr)); + GenerateTestAndBranch<Label>(deoptimize, + /* condition_input_index */ 0, + slow_path->GetEntryLabel(), + /* false_target */ nullptr); +} + +void LocationsBuilderX86::VisitSelect(HSelect* select) { + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(select); + Primitive::Type select_type = select->GetType(); + HInstruction* cond = select->GetCondition(); + + if (Primitive::IsFloatingPointType(select_type)) { + locations->SetInAt(0, Location::RequiresFpuRegister()); + } else { + locations->SetInAt(0, Location::RequiresRegister()); + } + locations->SetInAt(1, Location::Any()); + if (IsBooleanValueOrMaterializedCondition(cond)) { + locations->SetInAt(2, Location::Any()); + } + locations->SetOut(Location::SameAsFirstInput()); +} + +void InstructionCodeGeneratorX86::VisitSelect(HSelect* select) { + LocationSummary* locations = select->GetLocations(); + NearLabel false_target; + GenerateTestAndBranch<NearLabel>( + select, /* condition_input_index */ 2, /* true_target */ nullptr, &false_target); + codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType()); + __ Bind(&false_target); } void LocationsBuilderX86::VisitNativeDebugInfo(HNativeDebugInfo* info) { @@ -1628,7 +1657,7 @@ void LocationsBuilderX86::HandleCondition(HCondition* cond) { case Primitive::kPrimLong: { locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(cond->InputAt(1))); - if (cond->NeedsMaterialization()) { + if (!cond->IsEmittedAtUseSite()) { locations->SetOut(Location::RequiresRegister()); } break; @@ -1637,7 +1666,7 @@ void LocationsBuilderX86::HandleCondition(HCondition* cond) { case Primitive::kPrimDouble: { locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetInAt(1, Location::RequiresFpuRegister()); - if (cond->NeedsMaterialization()) { + if (!cond->IsEmittedAtUseSite()) { locations->SetOut(Location::RequiresRegister()); } break; @@ -1645,7 +1674,7 @@ void LocationsBuilderX86::HandleCondition(HCondition* cond) { default: locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::Any()); - if (cond->NeedsMaterialization()) { + if (!cond->IsEmittedAtUseSite()) { // We need a byte register. locations->SetOut(Location::RegisterLocation(ECX)); } @@ -1654,7 +1683,7 @@ void LocationsBuilderX86::HandleCondition(HCondition* cond) { } void InstructionCodeGeneratorX86::HandleCondition(HCondition* cond) { - if (!cond->NeedsMaterialization()) { + if (cond->IsEmittedAtUseSite()) { return; } @@ -2657,7 +2686,11 @@ void LocationsBuilderX86::VisitAdd(HAdd* add) { case Primitive::kPrimFloat: case Primitive::kPrimDouble: { locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::Any()); + if (add->InputAt(1)->IsX86LoadFromConstantTable()) { + DCHECK(add->InputAt(1)->IsEmittedAtUseSite()); + } else { + locations->SetInAt(1, Location::Any()); + } locations->SetOut(Location::SameAsFirstInput()); break; } @@ -2721,7 +2754,7 @@ void InstructionCodeGeneratorX86::VisitAdd(HAdd* add) { __ addss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); } else if (add->InputAt(1)->IsX86LoadFromConstantTable()) { HX86LoadFromConstantTable* const_area = add->InputAt(1)->AsX86LoadFromConstantTable(); - DCHECK(!const_area->NeedsMaterialization()); + DCHECK(const_area->IsEmittedAtUseSite()); __ addss(first.AsFpuRegister<XmmRegister>(), codegen_->LiteralFloatAddress( const_area->GetConstant()->AsFloatConstant()->GetValue(), @@ -2738,7 +2771,7 @@ void InstructionCodeGeneratorX86::VisitAdd(HAdd* add) { __ addsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); } else if (add->InputAt(1)->IsX86LoadFromConstantTable()) { HX86LoadFromConstantTable* const_area = add->InputAt(1)->AsX86LoadFromConstantTable(); - DCHECK(!const_area->NeedsMaterialization()); + DCHECK(const_area->IsEmittedAtUseSite()); __ addsd(first.AsFpuRegister<XmmRegister>(), codegen_->LiteralDoubleAddress( const_area->GetConstant()->AsDoubleConstant()->GetValue(), @@ -2769,7 +2802,11 @@ void LocationsBuilderX86::VisitSub(HSub* sub) { case Primitive::kPrimFloat: case Primitive::kPrimDouble: { locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::Any()); + if (sub->InputAt(1)->IsX86LoadFromConstantTable()) { + DCHECK(sub->InputAt(1)->IsEmittedAtUseSite()); + } else { + locations->SetInAt(1, Location::Any()); + } locations->SetOut(Location::SameAsFirstInput()); break; } @@ -2819,7 +2856,7 @@ void InstructionCodeGeneratorX86::VisitSub(HSub* sub) { __ subss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); } else if (sub->InputAt(1)->IsX86LoadFromConstantTable()) { HX86LoadFromConstantTable* const_area = sub->InputAt(1)->AsX86LoadFromConstantTable(); - DCHECK(!const_area->NeedsMaterialization()); + DCHECK(const_area->IsEmittedAtUseSite()); __ subss(first.AsFpuRegister<XmmRegister>(), codegen_->LiteralFloatAddress( const_area->GetConstant()->AsFloatConstant()->GetValue(), @@ -2836,7 +2873,7 @@ void InstructionCodeGeneratorX86::VisitSub(HSub* sub) { __ subsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); } else if (sub->InputAt(1)->IsX86LoadFromConstantTable()) { HX86LoadFromConstantTable* const_area = sub->InputAt(1)->AsX86LoadFromConstantTable(); - DCHECK(!const_area->NeedsMaterialization()); + DCHECK(const_area->IsEmittedAtUseSite()); __ subsd(first.AsFpuRegister<XmmRegister>(), codegen_->LiteralDoubleAddress( const_area->GetConstant()->AsDoubleConstant()->GetValue(), @@ -2879,7 +2916,11 @@ void LocationsBuilderX86::VisitMul(HMul* mul) { case Primitive::kPrimFloat: case Primitive::kPrimDouble: { locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::Any()); + if (mul->InputAt(1)->IsX86LoadFromConstantTable()) { + DCHECK(mul->InputAt(1)->IsEmittedAtUseSite()); + } else { + locations->SetInAt(1, Location::Any()); + } locations->SetOut(Location::SameAsFirstInput()); break; } @@ -3000,7 +3041,7 @@ void InstructionCodeGeneratorX86::VisitMul(HMul* mul) { __ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); } else if (mul->InputAt(1)->IsX86LoadFromConstantTable()) { HX86LoadFromConstantTable* const_area = mul->InputAt(1)->AsX86LoadFromConstantTable(); - DCHECK(!const_area->NeedsMaterialization()); + DCHECK(const_area->IsEmittedAtUseSite()); __ mulss(first.AsFpuRegister<XmmRegister>(), codegen_->LiteralFloatAddress( const_area->GetConstant()->AsFloatConstant()->GetValue(), @@ -3018,7 +3059,7 @@ void InstructionCodeGeneratorX86::VisitMul(HMul* mul) { __ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); } else if (mul->InputAt(1)->IsX86LoadFromConstantTable()) { HX86LoadFromConstantTable* const_area = mul->InputAt(1)->AsX86LoadFromConstantTable(); - DCHECK(!const_area->NeedsMaterialization()); + DCHECK(const_area->IsEmittedAtUseSite()); __ mulsd(first.AsFpuRegister<XmmRegister>(), codegen_->LiteralDoubleAddress( const_area->GetConstant()->AsDoubleConstant()->GetValue(), @@ -3372,7 +3413,11 @@ void LocationsBuilderX86::VisitDiv(HDiv* div) { case Primitive::kPrimFloat: case Primitive::kPrimDouble: { locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::Any()); + if (div->InputAt(1)->IsX86LoadFromConstantTable()) { + DCHECK(div->InputAt(1)->IsEmittedAtUseSite()); + } else { + locations->SetInAt(1, Location::Any()); + } locations->SetOut(Location::SameAsFirstInput()); break; } @@ -3399,7 +3444,7 @@ void InstructionCodeGeneratorX86::VisitDiv(HDiv* div) { __ divss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); } else if (div->InputAt(1)->IsX86LoadFromConstantTable()) { HX86LoadFromConstantTable* const_area = div->InputAt(1)->AsX86LoadFromConstantTable(); - DCHECK(!const_area->NeedsMaterialization()); + DCHECK(const_area->IsEmittedAtUseSite()); __ divss(first.AsFpuRegister<XmmRegister>(), codegen_->LiteralFloatAddress( const_area->GetConstant()->AsFloatConstant()->GetValue(), @@ -3416,7 +3461,7 @@ void InstructionCodeGeneratorX86::VisitDiv(HDiv* div) { __ divsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); } else if (div->InputAt(1)->IsX86LoadFromConstantTable()) { HX86LoadFromConstantTable* const_area = div->InputAt(1)->AsX86LoadFromConstantTable(); - DCHECK(!const_area->NeedsMaterialization()); + DCHECK(const_area->IsEmittedAtUseSite()); __ divsd(first.AsFpuRegister<XmmRegister>(), codegen_->LiteralDoubleAddress( const_area->GetConstant()->AsDoubleConstant()->GetValue(), @@ -3957,6 +4002,27 @@ void LocationsBuilderX86::VisitCurrentMethod(HCurrentMethod* instruction) { void InstructionCodeGeneratorX86::VisitCurrentMethod(HCurrentMethod* instruction ATTRIBUTE_UNUSED) { } +void LocationsBuilderX86::VisitClassTableGet(HClassTableGet* instruction) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister()); +} + +void InstructionCodeGeneratorX86::VisitClassTableGet(HClassTableGet* instruction) { + LocationSummary* locations = instruction->GetLocations(); + uint32_t method_offset = 0; + if (instruction->GetTableKind() == HClassTableGet::kVTable) { + method_offset = mirror::Class::EmbeddedVTableEntryOffset( + instruction->GetIndex(), kX86PointerSize).SizeValue(); + } else { + method_offset = mirror::Class::EmbeddedImTableEntryOffset( + instruction->GetIndex() % mirror::Class::kImtSize, kX86PointerSize).Uint32Value(); + } + __ movl(locations->Out().AsRegister<Register>(), + Address(locations->InAt(0).AsRegister<Register>(), method_offset)); +} + void LocationsBuilderX86::VisitNot(HNot* not_) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(not_, LocationSummary::kNoCall); @@ -5465,13 +5531,31 @@ void ParallelMoveResolverX86::EmitMove(size_t index) { if (source.IsRegister()) { if (destination.IsRegister()) { __ movl(destination.AsRegister<Register>(), source.AsRegister<Register>()); + } else if (destination.IsFpuRegister()) { + __ movd(destination.AsFpuRegister<XmmRegister>(), source.AsRegister<Register>()); } else { DCHECK(destination.IsStackSlot()); __ movl(Address(ESP, destination.GetStackIndex()), source.AsRegister<Register>()); } + } else if (source.IsRegisterPair()) { + size_t elem_size = Primitive::ComponentSize(Primitive::kPrimInt); + // Create stack space for 2 elements. + __ subl(ESP, Immediate(2 * elem_size)); + __ movl(Address(ESP, 0), source.AsRegisterPairLow<Register>()); + __ movl(Address(ESP, elem_size), source.AsRegisterPairHigh<Register>()); + __ movsd(destination.AsFpuRegister<XmmRegister>(), Address(ESP, 0)); + // And remove the temporary stack space we allocated. + __ addl(ESP, Immediate(2 * elem_size)); } else if (source.IsFpuRegister()) { - if (destination.IsFpuRegister()) { + if (destination.IsRegister()) { + __ movd(destination.AsRegister<Register>(), source.AsFpuRegister<XmmRegister>()); + } else if (destination.IsFpuRegister()) { __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>()); + } else if (destination.IsRegisterPair()) { + XmmRegister src_reg = source.AsFpuRegister<XmmRegister>(); + __ movd(destination.AsRegisterPairLow<Register>(), src_reg); + __ psrlq(src_reg, Immediate(32)); + __ movd(destination.AsRegisterPairHigh<Register>(), src_reg); } else if (destination.IsStackSlot()) { __ movss(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>()); } else { @@ -5488,7 +5572,11 @@ void ParallelMoveResolverX86::EmitMove(size_t index) { MoveMemoryToMemory32(destination.GetStackIndex(), source.GetStackIndex()); } } else if (source.IsDoubleStackSlot()) { - if (destination.IsFpuRegister()) { + if (destination.IsRegisterPair()) { + __ movl(destination.AsRegisterPairLow<Register>(), Address(ESP, source.GetStackIndex())); + __ movl(destination.AsRegisterPairHigh<Register>(), + Address(ESP, source.GetHighStackIndex(kX86WordSize))); + } else if (destination.IsFpuRegister()) { __ movsd(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex())); } else { DCHECK(destination.IsDoubleStackSlot()) << destination; @@ -6865,7 +6953,7 @@ void LocationsBuilderX86::VisitX86LoadFromConstantTable( locations->SetInAt(1, Location::ConstantLocation(insn->GetConstant())); // If we don't need to be materialized, we only need the inputs to be set. - if (!insn->NeedsMaterialization()) { + if (insn->IsEmittedAtUseSite()) { return; } @@ -6885,7 +6973,7 @@ void LocationsBuilderX86::VisitX86LoadFromConstantTable( } void InstructionCodeGeneratorX86::VisitX86LoadFromConstantTable(HX86LoadFromConstantTable* insn) { - if (!insn->NeedsMaterialization()) { + if (insn->IsEmittedAtUseSite()) { return; } diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index fd188346e4..86ffb0f70d 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -1558,10 +1558,36 @@ void LocationsBuilderX86_64::VisitDeoptimize(HDeoptimize* deoptimize) { void InstructionCodeGeneratorX86_64::VisitDeoptimize(HDeoptimize* deoptimize) { SlowPathCode* slow_path = deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathX86_64>(deoptimize); - GenerateTestAndBranch(deoptimize, - /* condition_input_index */ 0, - slow_path->GetEntryLabel(), - /* false_target */ static_cast<Label*>(nullptr)); + GenerateTestAndBranch<Label>(deoptimize, + /* condition_input_index */ 0, + slow_path->GetEntryLabel(), + /* false_target */ nullptr); +} + +void LocationsBuilderX86_64::VisitSelect(HSelect* select) { + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(select); + if (Primitive::IsFloatingPointType(select->GetType())) { + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + } else { + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + } + if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) { + locations->SetInAt(2, Location::RequiresRegister()); + } + locations->SetOut(Location::SameAsFirstInput()); +} + +void InstructionCodeGeneratorX86_64::VisitSelect(HSelect* select) { + LocationSummary* locations = select->GetLocations(); + NearLabel false_target; + GenerateTestAndBranch<NearLabel>(select, + /* condition_input_index */ 2, + /* true_target */ nullptr, + &false_target); + codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType()); + __ Bind(&false_target); } void LocationsBuilderX86_64::VisitNativeDebugInfo(HNativeDebugInfo* info) { @@ -1638,13 +1664,13 @@ void LocationsBuilderX86_64::HandleCondition(HCondition* cond) { locations->SetInAt(1, Location::Any()); break; } - if (cond->NeedsMaterialization()) { + if (!cond->IsEmittedAtUseSite()) { locations->SetOut(Location::RequiresRegister()); } } void InstructionCodeGeneratorX86_64::HandleCondition(HCondition* cond) { - if (!cond->NeedsMaterialization()) { + if (cond->IsEmittedAtUseSite()) { return; } @@ -3959,6 +3985,27 @@ void InstructionCodeGeneratorX86_64::VisitCurrentMethod( // Nothing to do, the method is already at its location. } +void LocationsBuilderX86_64::VisitClassTableGet(HClassTableGet* instruction) { + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister()); +} + +void InstructionCodeGeneratorX86_64::VisitClassTableGet(HClassTableGet* instruction) { + LocationSummary* locations = instruction->GetLocations(); + uint32_t method_offset = 0; + if (instruction->GetTableKind() == HClassTableGet::kVTable) { + method_offset = mirror::Class::EmbeddedVTableEntryOffset( + instruction->GetIndex(), kX86_64PointerSize).SizeValue(); + } else { + method_offset = mirror::Class::EmbeddedImTableEntryOffset( + instruction->GetIndex() % mirror::Class::kImtSize, kX86_64PointerSize).Uint32Value(); + } + __ movq(locations->Out().AsRegister<CpuRegister>(), + Address(locations->InAt(0).AsRegister<CpuRegister>(), method_offset)); +} + void LocationsBuilderX86_64::VisitNot(HNot* not_) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(not_, LocationSummary::kNoCall); diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc index 19d63de499..322a577bbf 100644 --- a/compiler/optimizing/codegen_test.cc +++ b/compiler/optimizing/codegen_test.cc @@ -671,10 +671,10 @@ TEST_F(CodegenTest, NonMaterializedCondition) { then_block->AddInstruction(new (&allocator) HReturn(constant0)); else_block->AddInstruction(new (&allocator) HReturn(constant1)); - ASSERT_TRUE(equal->NeedsMaterialization()); + ASSERT_FALSE(equal->IsEmittedAtUseSite()); TransformToSsa(graph); PrepareForRegisterAllocation(graph).Run(); - ASSERT_FALSE(equal->NeedsMaterialization()); + ASSERT_TRUE(equal->IsEmittedAtUseSite()); auto hook_before_codegen = [](HGraph* graph_in) { HBasicBlock* block = graph_in->GetEntryBlock()->GetSuccessors()[0]; diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc index 31136772c7..962e77dfc9 100644 --- a/compiler/optimizing/graph_checker.cc +++ b/compiler/optimizing/graph_checker.cc @@ -859,8 +859,12 @@ void SSAChecker::HandleBooleanInput(HInstruction* instruction, size_t input_inde value)); } } else if (input->GetType() == Primitive::kPrimInt - && (input->IsPhi() || input->IsAnd() || input->IsOr() || input->IsXor())) { - // TODO: We need a data-flow analysis to determine if the Phi or + && (input->IsPhi() || + input->IsAnd() || + input->IsOr() || + input->IsXor() || + input->IsSelect())) { + // TODO: We need a data-flow analysis to determine if the Phi or Select or // binary operation is actually Boolean. Allow for now. } else if (input->GetType() != Primitive::kPrimBoolean) { AddError(StringPrintf( @@ -893,6 +897,11 @@ void SSAChecker::VisitIf(HIf* instruction) { HandleBooleanInput(instruction, 0); } +void SSAChecker::VisitSelect(HSelect* instruction) { + VisitInstruction(instruction); + HandleBooleanInput(instruction, 2); +} + void SSAChecker::VisitBooleanNot(HBooleanNot* instruction) { VisitInstruction(instruction); HandleBooleanInput(instruction, 0); diff --git a/compiler/optimizing/graph_checker.h b/compiler/optimizing/graph_checker.h index 2e16bfe245..8724cde5dd 100644 --- a/compiler/optimizing/graph_checker.h +++ b/compiler/optimizing/graph_checker.h @@ -126,6 +126,7 @@ class SSAChecker : public GraphChecker { void VisitCondition(HCondition* op) OVERRIDE; void VisitIf(HIf* instruction) OVERRIDE; void VisitPackedSwitch(HPackedSwitch* instruction) OVERRIDE; + void VisitSelect(HSelect* instruction) OVERRIDE; void VisitBooleanNot(HBooleanNot* instruction) OVERRIDE; void VisitConstant(HConstant* instruction) OVERRIDE; void VisitBoundType(HBoundType* instruction) OVERRIDE; diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc index 280516252b..9d796c1004 100644 --- a/compiler/optimizing/graph_visualizer.cc +++ b/compiler/optimizing/graph_visualizer.cc @@ -507,6 +507,7 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { || IsPass(HDeadCodeElimination::kFinalDeadCodeEliminationPassName) || IsPass(HDeadCodeElimination::kInitialDeadCodeEliminationPassName) || IsPass(BoundsCheckElimination::kBoundsCheckEliminationPassName) + || IsPass(RegisterAllocator::kRegisterAllocatorPassName) || IsPass(SsaBuilder::kSsaBuilderPassName)) { HLoopInformation* info = instruction->GetBlock()->GetLoopInformation(); if (info == nullptr) { diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc index 2e79df1b84..35109fa538 100644 --- a/compiler/optimizing/inliner.cc +++ b/compiler/optimizing/inliner.cc @@ -296,9 +296,29 @@ bool HInliner::TryInline(HInvoke* invoke_instruction) { return false; } +HInstanceFieldGet* HInliner::BuildGetReceiverClass(ClassLinker* class_linker, + HInstruction* receiver, + uint32_t dex_pc) const { + ArtField* field = class_linker->GetClassRoot(ClassLinker::kJavaLangObject)->GetInstanceField(0); + DCHECK_EQ(std::string(field->GetName()), "shadow$_klass_"); + return new (graph_->GetArena()) HInstanceFieldGet( + receiver, + Primitive::kPrimNot, + field->GetOffset(), + field->IsVolatile(), + field->GetDexFieldIndex(), + field->GetDeclaringClass()->GetDexClassDefIndex(), + *field->GetDexFile(), + handles_->NewHandle(field->GetDexCache()), + dex_pc); +} + bool HInliner::TryInlineMonomorphicCall(HInvoke* invoke_instruction, ArtMethod* resolved_method, const InlineCache& ic) { + DCHECK(invoke_instruction->IsInvokeVirtual() || invoke_instruction->IsInvokeInterface()) + << invoke_instruction->DebugName(); + const DexFile& caller_dex_file = *caller_compilation_unit_.GetDexFile(); uint32_t class_index = FindClassIndexIn(ic.GetMonomorphicType(), caller_dex_file); if (class_index == DexFile::kDexNoIndex) { @@ -328,18 +348,8 @@ bool HInliner::TryInlineMonomorphicCall(HInvoke* invoke_instruction, } // We successfully inlined, now add a guard. - ArtField* field = class_linker->GetClassRoot(ClassLinker::kJavaLangObject)->GetInstanceField(0); - DCHECK_EQ(std::string(field->GetName()), "shadow$_klass_"); - HInstanceFieldGet* field_get = new (graph_->GetArena()) HInstanceFieldGet( - receiver, - Primitive::kPrimNot, - field->GetOffset(), - field->IsVolatile(), - field->GetDexFieldIndex(), - field->GetDeclaringClass()->GetDexClassDefIndex(), - *field->GetDexFile(), - handles_->NewHandle(field->GetDexCache()), - invoke_instruction->GetDexPc()); + HInstanceFieldGet* receiver_class = BuildGetReceiverClass( + class_linker, receiver, invoke_instruction->GetDexPc()); bool is_referrer = (ic.GetMonomorphicType() == outermost_graph_->GetArtMethod()->GetDeclaringClass()); @@ -351,16 +361,16 @@ bool HInliner::TryInlineMonomorphicCall(HInvoke* invoke_instruction, /* needs_access_check */ false, /* is_in_dex_cache */ true); - HNotEqual* compare = new (graph_->GetArena()) HNotEqual(load_class, field_get); + HNotEqual* compare = new (graph_->GetArena()) HNotEqual(load_class, receiver_class); HDeoptimize* deoptimize = new (graph_->GetArena()) HDeoptimize( compare, invoke_instruction->GetDexPc()); // TODO: Extend reference type propagation to understand the guard. if (cursor != nullptr) { - bb_cursor->InsertInstructionAfter(field_get, cursor); + bb_cursor->InsertInstructionAfter(receiver_class, cursor); } else { - bb_cursor->InsertInstructionBefore(field_get, bb_cursor->GetFirstInstruction()); + bb_cursor->InsertInstructionBefore(receiver_class, bb_cursor->GetFirstInstruction()); } - bb_cursor->InsertInstructionAfter(load_class, field_get); + bb_cursor->InsertInstructionAfter(load_class, receiver_class); bb_cursor->InsertInstructionAfter(compare, load_class); bb_cursor->InsertInstructionAfter(deoptimize, compare); deoptimize->CopyEnvironmentFrom(invoke_instruction->GetEnvironment()); @@ -374,13 +384,101 @@ bool HInliner::TryInlineMonomorphicCall(HInvoke* invoke_instruction, return true; } -bool HInliner::TryInlinePolymorphicCall(HInvoke* invoke_instruction ATTRIBUTE_UNUSED, +bool HInliner::TryInlinePolymorphicCall(HInvoke* invoke_instruction, ArtMethod* resolved_method, - const InlineCache& ic ATTRIBUTE_UNUSED) { - // TODO - VLOG(compiler) << "Unimplemented polymorphic inlining for " - << PrettyMethod(resolved_method); - return false; + const InlineCache& ic) { + DCHECK(invoke_instruction->IsInvokeVirtual() || invoke_instruction->IsInvokeInterface()) + << invoke_instruction->DebugName(); + // This optimization only works under JIT for now. + DCHECK(Runtime::Current()->UseJit()); + if (graph_->GetInstructionSet() == kMips || graph_->GetInstructionSet() == kMips64) { + // TODO: Support HClassTableGet for mips and mips64. + return false; + } + ClassLinker* class_linker = caller_compilation_unit_.GetClassLinker(); + size_t pointer_size = class_linker->GetImagePointerSize(); + + DCHECK(resolved_method != nullptr); + ArtMethod* actual_method = nullptr; + // Check whether we are actually calling the same method among + // the different types seen. + for (size_t i = 0; i < InlineCache::kIndividualCacheSize; ++i) { + if (ic.GetTypeAt(i) == nullptr) { + break; + } + ArtMethod* new_method = nullptr; + if (invoke_instruction->IsInvokeInterface()) { + new_method = ic.GetTypeAt(i)->FindVirtualMethodForInterface( + resolved_method, pointer_size); + } else { + DCHECK(invoke_instruction->IsInvokeVirtual()); + new_method = ic.GetTypeAt(i)->FindVirtualMethodForVirtual( + resolved_method, pointer_size); + } + if (actual_method == nullptr) { + actual_method = new_method; + } else if (actual_method != new_method) { + // Different methods, bailout. + return false; + } + } + + HInstruction* receiver = invoke_instruction->InputAt(0); + HInstruction* cursor = invoke_instruction->GetPrevious(); + HBasicBlock* bb_cursor = invoke_instruction->GetBlock(); + + if (!TryInline(invoke_instruction, actual_method, /* do_rtp */ false)) { + return false; + } + + // We successfully inlined, now add a guard. + HInstanceFieldGet* receiver_class = BuildGetReceiverClass( + class_linker, receiver, invoke_instruction->GetDexPc()); + + size_t method_offset = invoke_instruction->IsInvokeVirtual() + ? actual_method->GetVtableIndex() + : invoke_instruction->AsInvokeInterface()->GetImtIndex(); + + Primitive::Type type = Is64BitInstructionSet(graph_->GetInstructionSet()) + ? Primitive::kPrimLong + : Primitive::kPrimInt; + HClassTableGet* class_table_get = new (graph_->GetArena()) HClassTableGet( + receiver_class, + type, + invoke_instruction->IsInvokeVirtual() ? HClassTableGet::kVTable : HClassTableGet::kIMTable, + method_offset, + invoke_instruction->GetDexPc()); + + HConstant* constant; + if (type == Primitive::kPrimLong) { + constant = graph_->GetLongConstant( + reinterpret_cast<intptr_t>(actual_method), invoke_instruction->GetDexPc()); + } else { + constant = graph_->GetIntConstant( + reinterpret_cast<intptr_t>(actual_method), invoke_instruction->GetDexPc()); + } + + HNotEqual* compare = new (graph_->GetArena()) HNotEqual(class_table_get, constant); + HDeoptimize* deoptimize = new (graph_->GetArena()) HDeoptimize( + compare, invoke_instruction->GetDexPc()); + // TODO: Extend reference type propagation to understand the guard. + if (cursor != nullptr) { + bb_cursor->InsertInstructionAfter(receiver_class, cursor); + } else { + bb_cursor->InsertInstructionBefore(receiver_class, bb_cursor->GetFirstInstruction()); + } + bb_cursor->InsertInstructionAfter(class_table_get, receiver_class); + bb_cursor->InsertInstructionAfter(compare, class_table_get); + bb_cursor->InsertInstructionAfter(deoptimize, compare); + deoptimize->CopyEnvironmentFrom(invoke_instruction->GetEnvironment()); + + // Run type propagation to get the guard typed. + ReferenceTypePropagation rtp_fixup(graph_, handles_); + rtp_fixup.Run(); + + MaybeRecordStat(kInlinedPolymorphicCall); + + return true; } bool HInliner::TryInline(HInvoke* invoke_instruction, ArtMethod* method, bool do_rtp) { diff --git a/compiler/optimizing/inliner.h b/compiler/optimizing/inliner.h index 8de510ea37..3c01751a70 100644 --- a/compiler/optimizing/inliner.h +++ b/compiler/optimizing/inliner.h @@ -85,6 +85,11 @@ class HInliner : public HOptimization { bool same_dex_file, bool do_rtp = true); + HInstanceFieldGet* BuildGetReceiverClass(ClassLinker* class_linker, + HInstruction* receiver, + uint32_t dex_pc) const + SHARED_REQUIRES(Locks::mutator_lock_); + HGraph* const outermost_graph_; const DexCompilationUnit& outer_compilation_unit_; const DexCompilationUnit& caller_compilation_unit_; diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc index 49fc8c71b3..7d3a7238dc 100644 --- a/compiler/optimizing/instruction_simplifier.cc +++ b/compiler/optimizing/instruction_simplifier.cc @@ -76,6 +76,8 @@ class InstructionSimplifierVisitor : public HGraphDelegateVisitor { void VisitSub(HSub* instruction) OVERRIDE; void VisitUShr(HUShr* instruction) OVERRIDE; void VisitXor(HXor* instruction) OVERRIDE; + void VisitSelect(HSelect* select) OVERRIDE; + void VisitIf(HIf* instruction) OVERRIDE; void VisitInstanceOf(HInstanceOf* instruction) OVERRIDE; void VisitInvoke(HInvoke* invoke) OVERRIDE; void VisitDeoptimize(HDeoptimize* deoptimize) OVERRIDE; @@ -559,14 +561,86 @@ void InstructionSimplifierVisitor::VisitNotEqual(HNotEqual* not_equal) { } void InstructionSimplifierVisitor::VisitBooleanNot(HBooleanNot* bool_not) { - HInstruction* parent = bool_not->InputAt(0); - if (parent->IsBooleanNot()) { - HInstruction* value = parent->InputAt(0); - // Replace (!(!bool_value)) with bool_value - bool_not->ReplaceWith(value); + HInstruction* input = bool_not->InputAt(0); + HInstruction* replace_with = nullptr; + + if (input->IsIntConstant()) { + // Replace !(true/false) with false/true. + if (input->AsIntConstant()->IsOne()) { + replace_with = GetGraph()->GetIntConstant(0); + } else { + DCHECK(input->AsIntConstant()->IsZero()); + replace_with = GetGraph()->GetIntConstant(1); + } + } else if (input->IsBooleanNot()) { + // Replace (!(!bool_value)) with bool_value. + replace_with = input->InputAt(0); + } else if (input->IsCondition() && + // Don't change FP compares. The definition of compares involving + // NaNs forces the compares to be done as written by the user. + !Primitive::IsFloatingPointType(input->InputAt(0)->GetType())) { + // Replace condition with its opposite. + replace_with = GetGraph()->InsertOppositeCondition(input->AsCondition(), bool_not); + } + + if (replace_with != nullptr) { + bool_not->ReplaceWith(replace_with); bool_not->GetBlock()->RemoveInstruction(bool_not); - // It is possible that `parent` is dead at this point but we leave - // its removal to DCE for simplicity. + RecordSimplification(); + } +} + +void InstructionSimplifierVisitor::VisitSelect(HSelect* select) { + HInstruction* replace_with = nullptr; + HInstruction* condition = select->GetCondition(); + HInstruction* true_value = select->GetTrueValue(); + HInstruction* false_value = select->GetFalseValue(); + + if (condition->IsBooleanNot()) { + // Change ((!cond) ? x : y) to (cond ? y : x). + condition = condition->InputAt(0); + std::swap(true_value, false_value); + select->ReplaceInput(false_value, 0); + select->ReplaceInput(true_value, 1); + select->ReplaceInput(condition, 2); + RecordSimplification(); + } + + if (true_value == false_value) { + // Replace (cond ? x : x) with (x). + replace_with = true_value; + } else if (condition->IsIntConstant()) { + if (condition->AsIntConstant()->IsOne()) { + // Replace (true ? x : y) with (x). + replace_with = true_value; + } else { + // Replace (false ? x : y) with (y). + DCHECK(condition->AsIntConstant()->IsZero()); + replace_with = false_value; + } + } else if (true_value->IsIntConstant() && false_value->IsIntConstant()) { + if (true_value->AsIntConstant()->IsOne() && false_value->AsIntConstant()->IsZero()) { + // Replace (cond ? true : false) with (cond). + replace_with = condition; + } else if (true_value->AsIntConstant()->IsZero() && false_value->AsIntConstant()->IsOne()) { + // Replace (cond ? false : true) with (!cond). + replace_with = GetGraph()->InsertOppositeCondition(condition, select); + } + } + + if (replace_with != nullptr) { + select->ReplaceWith(replace_with); + select->GetBlock()->RemoveInstruction(select); + RecordSimplification(); + } +} + +void InstructionSimplifierVisitor::VisitIf(HIf* instruction) { + HInstruction* condition = instruction->InputAt(0); + if (condition->IsBooleanNot()) { + // Swap successors if input is negated. + instruction->ReplaceInput(condition->InputAt(0), 0); + instruction->GetBlock()->SwapSuccessors(); RecordSimplification(); } } diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc index 97fe5872bf..e8912b39ab 100644 --- a/compiler/optimizing/intrinsics_arm.cc +++ b/compiler/optimizing/intrinsics_arm.cc @@ -807,7 +807,8 @@ void IntrinsicCodeGeneratorARM::VisitUnsafePutLongVolatile(HInvoke* invoke) { } static void CreateIntIntIntIntIntToIntPlusTemps(ArenaAllocator* arena, - HInvoke* invoke) { + HInvoke* invoke, + Primitive::Type type) { LocationSummary* locations = new (arena) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); @@ -817,11 +818,15 @@ static void CreateIntIntIntIntIntToIntPlusTemps(ArenaAllocator* arena, locations->SetInAt(3, Location::RequiresRegister()); locations->SetInAt(4, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + // If heap poisoning is enabled, we don't want the unpoisoning + // operations to potentially clobber the output. + Location::OutputOverlap overlaps = (kPoisonHeapReferences && type == Primitive::kPrimNot) + ? Location::kOutputOverlap + : Location::kNoOutputOverlap; + locations->SetOut(Location::RequiresRegister(), overlaps); locations->AddTemp(Location::RequiresRegister()); // Pointer. locations->AddTemp(Location::RequiresRegister()); // Temp 1. - locations->AddTemp(Location::RequiresRegister()); // Temp 2. } static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGeneratorARM* codegen) { @@ -856,7 +861,12 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat if (kPoisonHeapReferences && type == Primitive::kPrimNot) { codegen->GetAssembler()->PoisonHeapReference(expected_lo); - codegen->GetAssembler()->PoisonHeapReference(value_lo); + if (value_lo == expected_lo) { + // Do not poison `value_lo`, as it is the same register as + // `expected_lo`, which has just been poisoned. + } else { + codegen->GetAssembler()->PoisonHeapReference(value_lo); + } } // do { @@ -892,13 +902,18 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat __ mov(out, ShifterOperand(0), CC); if (kPoisonHeapReferences && type == Primitive::kPrimNot) { - codegen->GetAssembler()->UnpoisonHeapReference(value_lo); codegen->GetAssembler()->UnpoisonHeapReference(expected_lo); + if (value_lo == expected_lo) { + // Do not unpoison `value_lo`, as it is the same register as + // `expected_lo`, which has just been unpoisoned. + } else { + codegen->GetAssembler()->UnpoisonHeapReference(value_lo); + } } } void IntrinsicLocationsBuilderARM::VisitUnsafeCASInt(HInvoke* invoke) { - CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke); + CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke, Primitive::kPrimInt); } void IntrinsicLocationsBuilderARM::VisitUnsafeCASObject(HInvoke* invoke) { // The UnsafeCASObject intrinsic is missing a read barrier, and @@ -906,16 +921,12 @@ void IntrinsicLocationsBuilderARM::VisitUnsafeCASObject(HInvoke* invoke) { // Turn it off temporarily as a quick fix, until the read barrier is // implemented (see TODO in GenCAS below). // - // Also, the UnsafeCASObject intrinsic does not always work when heap - // poisoning is enabled (it breaks run-test 004-UnsafeTest); turn it - // off temporarily as a quick fix (b/26204023). - // - // TODO(rpl): Fix these two issues and re-enable this intrinsic. - if (kEmitCompilerReadBarrier || kPoisonHeapReferences) { + // TODO(rpl): Fix this issue and re-enable this intrinsic with read barriers. + if (kEmitCompilerReadBarrier) { return; } - CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke); + CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke, Primitive::kPrimNot); } void IntrinsicCodeGeneratorARM::VisitUnsafeCASInt(HInvoke* invoke) { GenCas(invoke->GetLocations(), Primitive::kPrimInt, codegen_); diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index c888f01841..d5ed58530d 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -986,7 +986,9 @@ void IntrinsicCodeGeneratorARM64::VisitUnsafePutLongVolatile(HInvoke* invoke) { codegen_); } -static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, HInvoke* invoke) { +static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, + HInvoke* invoke, + Primitive::Type type) { LocationSummary* locations = new (arena) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); @@ -996,7 +998,12 @@ static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, HInvoke* invoke) { locations->SetInAt(3, Location::RequiresRegister()); locations->SetInAt(4, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + // If heap poisoning is enabled, we don't want the unpoisoning + // operations to potentially clobber the output. + Location::OutputOverlap overlaps = (kPoisonHeapReferences && type == Primitive::kPrimNot) + ? Location::kOutputOverlap + : Location::kNoOutputOverlap; + locations->SetOut(Location::RequiresRegister(), overlaps); } static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGeneratorARM64* codegen) { @@ -1027,7 +1034,12 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat if (kPoisonHeapReferences && type == Primitive::kPrimNot) { codegen->GetAssembler()->PoisonHeapReference(expected); - codegen->GetAssembler()->PoisonHeapReference(value); + if (value.Is(expected)) { + // Do not poison `value`, as it is the same register as + // `expected`, which has just been poisoned. + } else { + codegen->GetAssembler()->PoisonHeapReference(value); + } } // do { @@ -1077,16 +1089,21 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat __ Cset(out, eq); if (kPoisonHeapReferences && type == Primitive::kPrimNot) { - codegen->GetAssembler()->UnpoisonHeapReference(value); codegen->GetAssembler()->UnpoisonHeapReference(expected); + if (value.Is(expected)) { + // Do not unpoison `value`, as it is the same register as + // `expected`, which has just been unpoisoned. + } else { + codegen->GetAssembler()->UnpoisonHeapReference(value); + } } } void IntrinsicLocationsBuilderARM64::VisitUnsafeCASInt(HInvoke* invoke) { - CreateIntIntIntIntIntToInt(arena_, invoke); + CreateIntIntIntIntIntToInt(arena_, invoke, Primitive::kPrimInt); } void IntrinsicLocationsBuilderARM64::VisitUnsafeCASLong(HInvoke* invoke) { - CreateIntIntIntIntIntToInt(arena_, invoke); + CreateIntIntIntIntIntToInt(arena_, invoke, Primitive::kPrimLong); } void IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject(HInvoke* invoke) { // The UnsafeCASObject intrinsic is missing a read barrier, and @@ -1094,16 +1111,12 @@ void IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject(HInvoke* invoke) { // Turn it off temporarily as a quick fix, until the read barrier is // implemented (see TODO in GenCAS below). // - // Also, the UnsafeCASObject intrinsic does not always work when heap - // poisoning is enabled (it breaks run-test 004-UnsafeTest); turn it - // off temporarily as a quick fix (b/26204023). - // - // TODO(rpl): Fix these two issues and re-enable this intrinsic. - if (kEmitCompilerReadBarrier || kPoisonHeapReferences) { + // TODO(rpl): Fix this issue and re-enable this intrinsic with read barriers. + if (kEmitCompilerReadBarrier) { return; } - CreateIntIntIntIntIntToInt(arena_, invoke); + CreateIntIntIntIntIntToInt(arena_, invoke, Primitive::kPrimNot); } void IntrinsicCodeGeneratorARM64::VisitUnsafeCASInt(HInvoke* invoke) { diff --git a/compiler/optimizing/live_ranges_test.cc b/compiler/optimizing/live_ranges_test.cc index 926f9399a5..991f8f70ea 100644 --- a/compiler/optimizing/live_ranges_test.cc +++ b/compiler/optimizing/live_ranges_test.cc @@ -278,9 +278,9 @@ TEST_F(LiveRangesTest, Loop1) { // Test for the phi. interval = liveness.GetInstructionFromSsaIndex(3)->GetLiveInterval(); range = interval->GetFirstRange(); - // Instruction is consumed by the if. + // Instruction is input of non-materialized Equal and hence live until If. ASSERT_EQ(14u, range->GetStart()); - ASSERT_EQ(17u, range->GetEnd()); + ASSERT_EQ(19u, range->GetEnd()); ASSERT_TRUE(range->GetNext() == nullptr); } diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index 92f758d61d..c057eca434 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -722,6 +722,22 @@ void HBasicBlock::ReplaceAndRemoveInstructionWith(HInstruction* initial, RemoveInstruction(initial); } +void HBasicBlock::MoveInstructionBefore(HInstruction* insn, HInstruction* cursor) { + DCHECK(!cursor->IsPhi()); + DCHECK(!insn->IsPhi()); + DCHECK(!insn->IsControlFlow()); + DCHECK(insn->CanBeMoved()); + DCHECK(!insn->HasSideEffects()); + + HBasicBlock* from_block = insn->GetBlock(); + HBasicBlock* to_block = cursor->GetBlock(); + DCHECK(from_block != to_block); + + from_block->RemoveInstruction(insn, /* ensure_safety */ false); + insn->SetBlock(to_block); + to_block->instructions_.InsertInstructionBefore(insn, cursor); +} + static void Add(HInstructionList* instruction_list, HBasicBlock* block, HInstruction* instruction) { diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 41c2f17cd9..d90c1fb335 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -1011,6 +1011,7 @@ class HBasicBlock : public ArenaObject<kArenaAllocBasicBlock> { // Replace instruction `initial` with `replacement` within this block. void ReplaceAndRemoveInstructionWith(HInstruction* initial, HInstruction* replacement); + void MoveInstructionBefore(HInstruction* insn, HInstruction* cursor); void AddPhi(HPhi* phi); void InsertPhiAfter(HPhi* instruction, HPhi* cursor); // RemoveInstruction and RemovePhi delete a given instruction from the respective @@ -1161,6 +1162,7 @@ class HLoopInformationOutwardIterator : public ValueObject { M(BoundsCheck, Instruction) \ M(BoundType, Instruction) \ M(CheckCast, Instruction) \ + M(ClassTableGet, Instruction) \ M(ClearException, Instruction) \ M(ClinitCheck, Instruction) \ M(Compare, BinaryOperation) \ @@ -1220,6 +1222,7 @@ class HLoopInformationOutwardIterator : public ValueObject { M(UnresolvedInstanceFieldSet, Instruction) \ M(UnresolvedStaticFieldGet, Instruction) \ M(UnresolvedStaticFieldSet, Instruction) \ + M(Select, Instruction) \ M(StoreLocal, Instruction) \ M(Sub, BinaryOperation) \ M(SuspendCheck, Instruction) \ @@ -1819,6 +1822,7 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> { dex_pc_(dex_pc), id_(-1), ssa_index_(-1), + emitted_at_use_site_(false), environment_(nullptr), locations_(nullptr), live_interval_(nullptr), @@ -2081,6 +2085,9 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> { // The caller must ensure that this is safe to do. void RemoveEnvironmentUsers(); + bool IsEmittedAtUseSite() const { return emitted_at_use_site_; } + void MarkEmittedAtUseSite() { emitted_at_use_site_ = true; } + protected: virtual const HUserRecord<HInstruction*> InputRecordAt(size_t i) const = 0; virtual void SetRawInputRecordAt(size_t index, const HUserRecord<HInstruction*>& input) = 0; @@ -2102,6 +2109,10 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> { // When doing liveness analysis, instructions that have uses get an SSA index. int ssa_index_; + // If set, the machine code for this instruction is assumed to be generated by + // its users. Used by liveness analysis to compute use positions accordingly. + bool emitted_at_use_site_; + // List of instructions that have this instruction as input. HUseList<HInstruction*> uses_; @@ -2542,6 +2553,44 @@ class HCurrentMethod : public HExpression<0> { DISALLOW_COPY_AND_ASSIGN(HCurrentMethod); }; +// Fetches an ArtMethod from the virtual table or the interface method table +// of a class. +class HClassTableGet : public HExpression<1> { + public: + enum TableKind { + kVTable, + kIMTable, + }; + HClassTableGet(HInstruction* cls, + Primitive::Type type, + TableKind kind, + size_t index, + uint32_t dex_pc) + : HExpression(type, SideEffects::None(), dex_pc), + index_(index), + table_kind_(kind) { + SetRawInputAt(0, cls); + } + + bool CanBeMoved() const OVERRIDE { return true; } + bool InstructionDataEquals(HInstruction* other) const OVERRIDE { + return other->AsClassTableGet()->GetIndex() == index_ && + other->AsClassTableGet()->GetTableKind() == table_kind_; + } + + TableKind GetTableKind() const { return table_kind_; } + size_t GetIndex() const { return index_; } + + DECLARE_INSTRUCTION(ClassTableGet); + + private: + // The index of the ArtMethod in the table. + const size_t index_; + const TableKind table_kind_; + + DISALLOW_COPY_AND_ASSIGN(HClassTableGet); +}; + // PackedSwitch (jump table). A block ending with a PackedSwitch instruction will // have one successor for each entry in the switch table, and the final successor // will be the block containing the next Dex opcode. @@ -2711,12 +2760,8 @@ class HCondition : public HBinaryOperation { public: HCondition(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc) : HBinaryOperation(Primitive::kPrimBoolean, first, second, SideEffects::None(), dex_pc), - needs_materialization_(true), bias_(ComparisonBias::kNoBias) {} - bool NeedsMaterialization() const { return needs_materialization_; } - void ClearNeedsMaterialization() { needs_materialization_ = false; } - // For code generation purposes, returns whether this instruction is just before // `instruction`, and disregard moves in between. bool IsBeforeWhenDisregardMoves(HInstruction* instruction) const; @@ -2748,10 +2793,6 @@ class HCondition : public HBinaryOperation { } private: - // For register allocation purposes, returns whether this instruction needs to be - // materialized (that is, not just be in the processor flags). - bool needs_materialization_; - // Needed if we merge a HCompare into a HCondition. ComparisonBias bias_; @@ -5586,6 +5627,41 @@ class HMonitorOperation : public HTemplateInstruction<1> { DISALLOW_COPY_AND_ASSIGN(HMonitorOperation); }; +class HSelect : public HExpression<3> { + public: + HSelect(HInstruction* condition, + HInstruction* true_value, + HInstruction* false_value, + uint32_t dex_pc) + : HExpression(HPhi::ToPhiType(true_value->GetType()), SideEffects::None(), dex_pc) { + DCHECK_EQ(HPhi::ToPhiType(true_value->GetType()), HPhi::ToPhiType(false_value->GetType())); + + // First input must be `true_value` or `false_value` to allow codegens to + // use the SameAsFirstInput allocation policy. We make it `false_value`, so + // that architectures which implement HSelect as a conditional move also + // will not need to invert the condition. + SetRawInputAt(0, false_value); + SetRawInputAt(1, true_value); + SetRawInputAt(2, condition); + } + + HInstruction* GetFalseValue() const { return InputAt(0); } + HInstruction* GetTrueValue() const { return InputAt(1); } + HInstruction* GetCondition() const { return InputAt(2); } + + bool CanBeMoved() const OVERRIDE { return true; } + bool InstructionDataEquals(HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { return true; } + + bool CanBeNull() const OVERRIDE { + return GetTrueValue()->CanBeNull() || GetFalseValue()->CanBeNull(); + } + + DECLARE_INSTRUCTION(Select); + + private: + DISALLOW_COPY_AND_ASSIGN(HSelect); +}; + class MoveOperands : public ArenaObject<kArenaAllocMoveOperands> { public: MoveOperands(Location source, diff --git a/compiler/optimizing/nodes_x86.h b/compiler/optimizing/nodes_x86.h index 556217bf74..b1bf939b36 100644 --- a/compiler/optimizing/nodes_x86.h +++ b/compiler/optimizing/nodes_x86.h @@ -36,16 +36,12 @@ class HX86ComputeBaseMethodAddress : public HExpression<0> { class HX86LoadFromConstantTable : public HExpression<2> { public: HX86LoadFromConstantTable(HX86ComputeBaseMethodAddress* method_base, - HConstant* constant, - bool needs_materialization = true) - : HExpression(constant->GetType(), SideEffects::None(), kNoDexPc), - needs_materialization_(needs_materialization) { + HConstant* constant) + : HExpression(constant->GetType(), SideEffects::None(), kNoDexPc) { SetRawInputAt(0, method_base); SetRawInputAt(1, constant); } - bool NeedsMaterialization() const { return needs_materialization_; } - HX86ComputeBaseMethodAddress* GetBaseMethodAddress() const { return InputAt(0)->AsX86ComputeBaseMethodAddress(); } @@ -57,8 +53,6 @@ class HX86LoadFromConstantTable : public HExpression<2> { DECLARE_INSTRUCTION(X86LoadFromConstantTable); private: - const bool needs_materialization_; - DISALLOW_COPY_AND_ASSIGN(HX86LoadFromConstantTable); }; diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index 3fac914017..bdc664b3eb 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -38,7 +38,6 @@ #include "base/dumpable.h" #include "base/macros.h" #include "base/timing_logger.h" -#include "boolean_simplifier.h" #include "bounds_check_elimination.h" #include "builder.h" #include "code_generator.h" @@ -73,6 +72,7 @@ #include "reference_type_propagation.h" #include "register_allocator.h" #include "oat_quick_method_header.h" +#include "select_generator.h" #include "sharpening.h" #include "side_effects_analysis.h" #include "ssa_builder.h" @@ -512,7 +512,7 @@ static void RunOptimizations(HGraph* graph, graph, stats, HDeadCodeElimination::kFinalDeadCodeEliminationPassName); HConstantFolding* fold1 = new (arena) HConstantFolding(graph); InstructionSimplifier* simplify1 = new (arena) InstructionSimplifier(graph, stats); - HBooleanSimplifier* boolean_simplify = new (arena) HBooleanSimplifier(graph); + HSelectGenerator* select_generator = new (arena) HSelectGenerator(graph); HConstantFolding* fold2 = new (arena) HConstantFolding(graph, "constant_folding_after_inlining"); HConstantFolding* fold3 = new (arena) HConstantFolding(graph, "constant_folding_after_bce"); SideEffectsAnalysis* side_effects = new (arena) SideEffectsAnalysis(graph); @@ -540,9 +540,9 @@ static void RunOptimizations(HGraph* graph, MaybeRunInliner(graph, codegen, driver, stats, dex_compilation_unit, pass_observer, handles); HOptimization* optimizations2[] = { - // BooleanSimplifier depends on the InstructionSimplifier removing + // SelectGenerator depends on the InstructionSimplifier removing // redundant suspend checks to recognize empty blocks. - boolean_simplify, + select_generator, fold2, // TODO: if we don't inline we can also skip fold2. side_effects, gvn, diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h index f8035aae34..881beb49a6 100644 --- a/compiler/optimizing/optimizing_compiler_stats.h +++ b/compiler/optimizing/optimizing_compiler_stats.h @@ -51,6 +51,7 @@ enum MethodCompilationStat { kNotCompiledVerificationError, kNotCompiledVerifyAtRuntime, kInlinedMonomorphicCall, + kInlinedPolymorphicCall, kMonomorphicCall, kPolymorphicCall, kMegamorphicCall, @@ -118,6 +119,7 @@ class OptimizingCompilerStats { case kNotCompiledVerificationError : name = "NotCompiledVerificationError"; break; case kNotCompiledVerifyAtRuntime : name = "NotCompiledVerifyAtRuntime"; break; case kInlinedMonomorphicCall: name = "InlinedMonomorphicCall"; break; + case kInlinedPolymorphicCall: name = "InlinedPolymorphicCall"; break; case kMonomorphicCall: name = "MonomorphicCall"; break; case kPolymorphicCall: name = "PolymorphicCall"; break; case kMegamorphicCall: name = "kMegamorphicCall"; break; diff --git a/compiler/optimizing/pc_relative_fixups_x86.cc b/compiler/optimizing/pc_relative_fixups_x86.cc index 1394dfaf5d..a2180bc9d7 100644 --- a/compiler/optimizing/pc_relative_fixups_x86.cc +++ b/compiler/optimizing/pc_relative_fixups_x86.cc @@ -115,7 +115,10 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { void ReplaceInput(HInstruction* insn, HConstant* value, int input_index, bool materialize) { InitializePCRelativeBasePointer(); HX86LoadFromConstantTable* load_constant = - new (GetGraph()->GetArena()) HX86LoadFromConstantTable(base_, value, materialize); + new (GetGraph()->GetArena()) HX86LoadFromConstantTable(base_, value); + if (!materialize) { + load_constant->MarkEmittedAtUseSite(); + } insn->GetBlock()->InsertInstructionBefore(load_constant, insn); insn->ReplaceInput(load_constant, input_index); } diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc index 63ef600756..324d84f3db 100644 --- a/compiler/optimizing/prepare_for_register_allocation.cc +++ b/compiler/optimizing/prepare_for_register_allocation.cc @@ -127,24 +127,37 @@ void PrepareForRegisterAllocation::VisitNewInstance(HNewInstance* instruction) { } } -void PrepareForRegisterAllocation::VisitCondition(HCondition* condition) { - bool needs_materialization = false; - if (!condition->GetUses().HasOnlyOneUse() || !condition->GetEnvUses().IsEmpty()) { - needs_materialization = true; - } else { - HInstruction* user = condition->GetUses().GetFirst()->GetUser(); - if (!user->IsIf() && !user->IsDeoptimize()) { - needs_materialization = true; +bool PrepareForRegisterAllocation::CanEmitConditionAt(HCondition* condition, + HInstruction* user) const { + if (condition->GetNext() != user) { + return false; + } + + if (user->IsIf() || user->IsDeoptimize()) { + return true; + } + + if (user->IsSelect() && user->AsSelect()->GetCondition() == condition) { + if (GetGraph()->GetInstructionSet() == kX86) { + // Long values and long condition inputs result in 8 required core registers. + // We don't have that many on x86. Materialize the condition in such case. + return user->GetType() != Primitive::kPrimLong || + condition->InputAt(1)->GetType() != Primitive::kPrimLong || + condition->InputAt(1)->IsConstant(); } else { - // TODO: if there is no intervening instructions with side-effect between this condition - // and the If instruction, we should move the condition just before the If. - if (condition->GetNext() != user) { - needs_materialization = true; - } + return true; } } - if (!needs_materialization) { - condition->ClearNeedsMaterialization(); + + return false; +} + +void PrepareForRegisterAllocation::VisitCondition(HCondition* condition) { + if (condition->HasOnlyOneNonEnvironmentUse()) { + HInstruction* user = condition->GetUses().GetFirst()->GetUser(); + if (CanEmitConditionAt(condition, user)) { + condition->MarkEmittedAtUseSite(); + } } } @@ -165,7 +178,8 @@ void PrepareForRegisterAllocation::VisitInvokeStaticOrDirect(HInvokeStaticOrDire } } -bool PrepareForRegisterAllocation::CanMoveClinitCheck(HInstruction* input, HInstruction* user) { +bool PrepareForRegisterAllocation::CanMoveClinitCheck(HInstruction* input, + HInstruction* user) const { // Determine if input and user come from the same dex instruction, so that we can move // the clinit check responsibility from one to the other, i.e. from HClinitCheck (user) // to HLoadClass (input), or from HClinitCheck (input) to HInvokeStaticOrDirect (user). diff --git a/compiler/optimizing/prepare_for_register_allocation.h b/compiler/optimizing/prepare_for_register_allocation.h index 9b2434250d..c8b8b0dcfa 100644 --- a/compiler/optimizing/prepare_for_register_allocation.h +++ b/compiler/optimizing/prepare_for_register_allocation.h @@ -42,7 +42,8 @@ class PrepareForRegisterAllocation : public HGraphDelegateVisitor { void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE; void VisitNewInstance(HNewInstance* instruction) OVERRIDE; - bool CanMoveClinitCheck(HInstruction* input, HInstruction* user); + bool CanMoveClinitCheck(HInstruction* input, HInstruction* user) const; + bool CanEmitConditionAt(HCondition* condition, HInstruction* user) const; DISALLOW_COPY_AND_ASSIGN(PrepareForRegisterAllocation); }; diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc index d77639d608..5cd30adb45 100644 --- a/compiler/optimizing/register_allocator.cc +++ b/compiler/optimizing/register_allocator.cc @@ -179,7 +179,7 @@ void RegisterAllocator::AllocateRegistersInternal() { } if (block->IsCatchBlock() || - (block->GetLoopInformation() != nullptr && block->GetLoopInformation()->IsIrreducible())) { + (block->IsLoopHeader() && block->GetLoopInformation()->IsIrreducible())) { // By blocking all registers at the top of each catch block or irreducible loop, we force // intervals belonging to the live-in set of the catch/header block to be spilled. // TODO(ngeoffray): Phis in this block could be allocated in register. @@ -1749,8 +1749,10 @@ void RegisterAllocator::ConnectSplitSiblings(LiveInterval* interval, } // Find the intervals that cover `from` and `to`. - LiveInterval* destination = interval->GetSiblingAt(to->GetLifetimeStart()); - LiveInterval* source = interval->GetSiblingAt(from->GetLifetimeEnd() - 1); + size_t destination_position = to->GetLifetimeStart(); + size_t source_position = from->GetLifetimeEnd() - 1; + LiveInterval* destination = interval->GetSiblingAt(destination_position); + LiveInterval* source = interval->GetSiblingAt(source_position); if (destination == source) { // Interval was not split. @@ -1759,7 +1761,8 @@ void RegisterAllocator::ConnectSplitSiblings(LiveInterval* interval, LiveInterval* parent = interval->GetParent(); HInstruction* defined_by = parent->GetDefinedBy(); - if (destination == nullptr) { + if (codegen_->GetGraph()->HasIrreducibleLoops() && + (destination == nullptr || !destination->CoversSlow(destination_position))) { // Our live_in fixed point calculation has found that the instruction is live // in the `to` block because it will eventually enter an irreducible loop. Our // live interval computation however does not compute a fixed point, and @@ -1775,18 +1778,41 @@ void RegisterAllocator::ConnectSplitSiblings(LiveInterval* interval, return; } + Location location_source; + // `GetSiblingAt` returns the interval whose start and end cover `position`, + // but does not check whether the interval is inactive at that position. + // The only situation where the interval is inactive at that position is in the + // presence of irreducible loops for constants and ArtMethod. + if (codegen_->GetGraph()->HasIrreducibleLoops() && + (source == nullptr || !source->CoversSlow(source_position))) { + DCHECK(IsMaterializableEntryBlockInstructionOfGraphWithIrreducibleLoop(defined_by)); + if (defined_by->IsConstant()) { + location_source = defined_by->GetLocations()->Out(); + } else { + DCHECK(defined_by->IsCurrentMethod()); + location_source = parent->NeedsTwoSpillSlots() + ? Location::DoubleStackSlot(parent->GetSpillSlot()) + : Location::StackSlot(parent->GetSpillSlot()); + } + } else { + DCHECK(source != nullptr); + DCHECK(source->CoversSlow(source_position)); + DCHECK(destination->CoversSlow(destination_position)); + location_source = source->ToLocation(); + } + // If `from` has only one successor, we can put the moves at the exit of it. Otherwise // we need to put the moves at the entry of `to`. if (from->GetNormalSuccessors().size() == 1) { InsertParallelMoveAtExitOf(from, defined_by, - source->ToLocation(), + location_source, destination->ToLocation()); } else { DCHECK_EQ(to->GetPredecessors().size(), 1u); InsertParallelMoveAtEntryOf(to, defined_by, - source->ToLocation(), + location_source, destination->ToLocation()); } } @@ -1890,7 +1916,7 @@ void RegisterAllocator::Resolve() { for (HLinearOrderIterator it(*codegen_->GetGraph()); !it.Done(); it.Advance()) { HBasicBlock* block = it.Current(); if (block->IsCatchBlock() || - (block->GetLoopInformation() != nullptr && block->GetLoopInformation()->IsIrreducible())) { + (block->IsLoopHeader() && block->GetLoopInformation()->IsIrreducible())) { // Instructions live at the top of catch blocks or irreducible loop header // were forced to spill. if (kIsDebugBuild) { diff --git a/compiler/optimizing/select_generator.cc b/compiler/optimizing/select_generator.cc new file mode 100644 index 0000000000..105b30ae5d --- /dev/null +++ b/compiler/optimizing/select_generator.cc @@ -0,0 +1,152 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "select_generator.h" + +namespace art { + +static constexpr size_t kMaxInstructionsInBranch = 1u; + +// Returns true if `block` has only one predecessor, ends with a Goto and +// contains at most `kMaxInstructionsInBranch` other movable instruction with +// no side-effects. +static bool IsSimpleBlock(HBasicBlock* block) { + if (block->GetPredecessors().size() != 1u) { + return false; + } + DCHECK(block->GetPhis().IsEmpty()); + + size_t num_instructions = 0u; + for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { + HInstruction* instruction = it.Current(); + if (instruction->IsControlFlow()) { + return instruction->IsGoto() && num_instructions <= kMaxInstructionsInBranch; + } else if (instruction->CanBeMoved() && !instruction->HasSideEffects()) { + num_instructions++; + } else { + return false; + } + } + + LOG(FATAL) << "Unreachable"; + UNREACHABLE(); +} + +// Returns true if 'block1' and 'block2' are empty, merge into the same single +// successor and the successor can only be reached from them. +static bool BlocksMergeTogether(HBasicBlock* block1, HBasicBlock* block2) { + return block1->GetSingleSuccessor() == block2->GetSingleSuccessor(); +} + +// Returns nullptr if `block` has either no phis or there is more than one phi +// with different inputs at `index1` and `index2`. Otherwise returns that phi. +static HPhi* GetSingleChangedPhi(HBasicBlock* block, size_t index1, size_t index2) { + DCHECK_NE(index1, index2); + + HPhi* select_phi = nullptr; + for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { + HPhi* phi = it.Current()->AsPhi(); + if (phi->InputAt(index1) != phi->InputAt(index2)) { + if (select_phi == nullptr) { + // First phi with different inputs for the two indices found. + select_phi = phi; + } else { + // More than one phis has different inputs for the two indices. + return nullptr; + } + } + } + return select_phi; +} + +void HSelectGenerator::Run() { + // Iterate in post order in the unlikely case that removing one occurrence of + // the selection pattern empties a branch block of another occurrence. + // Otherwise the order does not matter. + for (HPostOrderIterator it(*graph_); !it.Done(); it.Advance()) { + HBasicBlock* block = it.Current(); + if (!block->EndsWithIf()) continue; + + // Find elements of the diamond pattern. + HIf* if_instruction = block->GetLastInstruction()->AsIf(); + HBasicBlock* true_block = if_instruction->IfTrueSuccessor(); + HBasicBlock* false_block = if_instruction->IfFalseSuccessor(); + DCHECK_NE(true_block, false_block); + if (!IsSimpleBlock(true_block) || + !IsSimpleBlock(false_block) || + !BlocksMergeTogether(true_block, false_block)) { + continue; + } + HBasicBlock* merge_block = true_block->GetSingleSuccessor(); + + // If the branches are not empty, move instructions in front of the If. + // TODO(dbrazdil): This puts an instruction between If and its condition. + // Implement moving of conditions to first users if possible. + if (!true_block->IsSingleGoto()) { + true_block->MoveInstructionBefore(true_block->GetFirstInstruction(), if_instruction); + } + if (!false_block->IsSingleGoto()) { + false_block->MoveInstructionBefore(false_block->GetFirstInstruction(), if_instruction); + } + DCHECK(true_block->IsSingleGoto()); + DCHECK(false_block->IsSingleGoto()); + + // Find the resulting true/false values. + size_t predecessor_index_true = merge_block->GetPredecessorIndexOf(true_block); + size_t predecessor_index_false = merge_block->GetPredecessorIndexOf(false_block); + DCHECK_NE(predecessor_index_true, predecessor_index_false); + + HPhi* phi = GetSingleChangedPhi(merge_block, predecessor_index_true, predecessor_index_false); + if (phi == nullptr) { + continue; + } + HInstruction* true_value = phi->InputAt(predecessor_index_true); + HInstruction* false_value = phi->InputAt(predecessor_index_false); + + // Create the Select instruction and insert it in front of the If. + HSelect* select = new (graph_->GetArena()) HSelect(if_instruction->InputAt(0), + true_value, + false_value, + if_instruction->GetDexPc()); + if (phi->GetType() == Primitive::kPrimNot) { + select->SetReferenceTypeInfo(phi->GetReferenceTypeInfo()); + } + block->InsertInstructionBefore(select, if_instruction); + + // Remove the true branch which removes the corresponding Phi input. + // If left only with the false branch, the Phi is automatically removed. + phi->ReplaceInput(select, predecessor_index_false); + bool only_two_predecessors = (merge_block->GetPredecessors().size() == 2u); + true_block->DisconnectAndDelete(); + DCHECK_EQ(only_two_predecessors, phi->GetBlock() == nullptr); + + // Merge remaining blocks which are now connected with Goto. + DCHECK_EQ(block->GetSingleSuccessor(), false_block); + block->MergeWith(false_block); + if (only_two_predecessors) { + DCHECK_EQ(block->GetSingleSuccessor(), merge_block); + block->MergeWith(merge_block); + } + + // No need to update dominance information, as we are simplifying + // a simple diamond shape, where the join block is merged with the + // entry block. Any following blocks would have had the join block + // as a dominator, and `MergeWith` handles changing that to the + // entry block. + } +} + +} // namespace art diff --git a/compiler/optimizing/select_generator.h b/compiler/optimizing/select_generator.h new file mode 100644 index 0000000000..f9d6d4d8de --- /dev/null +++ b/compiler/optimizing/select_generator.h @@ -0,0 +1,63 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * This optimization recognizes the common diamond selection pattern and + * replaces it with an instance of the HSelect instruction. + * + * Recognized pattern: + * + * If [ Condition ] + * / \ + * false branch true branch + * \ / + * Phi [FalseValue, TrueValue] + * + * The pattern will be simplified if `true_branch` and `false_branch` each + * contain at most one instruction without any side effects. + * + * Blocks are merged into one and Select replaces the If and the Phi: + * true branch + * false branch + * Select [FalseValue, TrueValue, Condition] + * + * Note: In order to recognize no side-effect blocks, this optimization must be + * run after the instruction simplifier has removed redundant suspend checks. + */ + +#ifndef ART_COMPILER_OPTIMIZING_SELECT_GENERATOR_H_ +#define ART_COMPILER_OPTIMIZING_SELECT_GENERATOR_H_ + +#include "optimization.h" + +namespace art { + +class HSelectGenerator : public HOptimization { + public: + explicit HSelectGenerator(HGraph* graph) + : HOptimization(graph, kSelectGeneratorPassName) {} + + void Run() OVERRIDE; + + static constexpr const char* kSelectGeneratorPassName = "select_generator"; + + private: + DISALLOW_COPY_AND_ASSIGN(HSelectGenerator); +}; + +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_SELECT_GENERATOR_H_ diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc index a5609fc466..7ed3c84f13 100644 --- a/compiler/optimizing/ssa_liveness_analysis.cc +++ b/compiler/optimizing/ssa_liveness_analysis.cc @@ -242,19 +242,53 @@ void SsaLivenessAnalysis::ComputeLiveRanges() { } if (instruction != nullptr) { instruction->GetLiveInterval()->AddUse( - current, environment, i, should_be_live); + current, environment, i, /* actual_user */ nullptr, should_be_live); } } } - // All inputs of an instruction must be live. - for (size_t i = 0, e = current->InputCount(); i < e; ++i) { - HInstruction* input = current->InputAt(i); - // Some instructions 'inline' their inputs, that is they do not need - // to be materialized. - if (input->HasSsaIndex() && current->GetLocations()->InAt(i).IsValid()) { - live_in->SetBit(input->GetSsaIndex()); - input->GetLiveInterval()->AddUse(current, /* environment */ nullptr, i); + // Process inputs of instructions. + if (current->IsEmittedAtUseSite()) { + if (kIsDebugBuild) { + DCHECK(!current->GetLocations()->Out().IsValid()); + for (HUseIterator<HInstruction*> use_it(current->GetUses()); + !use_it.Done(); + use_it.Advance()) { + HInstruction* user = use_it.Current()->GetUser(); + size_t index = use_it.Current()->GetIndex(); + DCHECK(!user->GetLocations()->InAt(index).IsValid()); + } + DCHECK(!current->HasEnvironmentUses()); + } + } else { + for (size_t i = 0, e = current->InputCount(); i < e; ++i) { + HInstruction* input = current->InputAt(i); + bool has_in_location = current->GetLocations()->InAt(i).IsValid(); + bool has_out_location = input->GetLocations()->Out().IsValid(); + + if (has_in_location) { + DCHECK(has_out_location); + DCHECK(input->HasSsaIndex()); + // `Input` generates a result used by `current`. Add use and update + // the live-in set. + input->GetLiveInterval()->AddUse(current, /* environment */ nullptr, i); + live_in->SetBit(input->GetSsaIndex()); + } else if (has_out_location) { + // `Input` generates a result but it is not used by `current`. + } else { + // `Input` is inlined into `current`. Walk over its inputs and record + // uses at `current`. + DCHECK(input->IsEmittedAtUseSite()); + for (size_t i2 = 0, e2 = input->InputCount(); i2 < e2; ++i2) { + HInstruction* inlined_input = input->InputAt(i2); + DCHECK(inlined_input->HasSsaIndex()) << "Recursive inlining not allowed."; + if (input->GetLocations()->InAt(i2).IsValid()) { + live_in->SetBit(inlined_input->GetSsaIndex()); + inlined_input->GetLiveInterval()->AddUse( + /* owner */ input, /* environment */ nullptr, i2, /* actual_user */ current); + } + } + } } } } diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h index 572a7b6a53..a78aedcff5 100644 --- a/compiler/optimizing/ssa_liveness_analysis.h +++ b/compiler/optimizing/ssa_liveness_analysis.h @@ -113,10 +113,6 @@ class UsePosition : public ArenaObject<kArenaAllocSsaLiveness> { input_index_(input_index), position_(position), next_(next) { - DCHECK((user == nullptr) - || user->IsPhi() - || (GetPosition() == user->GetLifetimePosition() + 1) - || (GetPosition() == user->GetLifetimePosition())); DCHECK(environment == nullptr || user == nullptr); DCHECK(next_ == nullptr || next->GetPosition() >= GetPosition()); } @@ -243,21 +239,30 @@ class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> { AddRange(position, position + 1); } + // Record use of an input. The use will be recorded as an environment use if + // `environment` is not null and as register use otherwise. If `actual_user` + // is specified, the use will be recorded at `actual_user`'s lifetime position. void AddUse(HInstruction* instruction, HEnvironment* environment, size_t input_index, + HInstruction* actual_user = nullptr, bool keep_alive = false) { - // Set the use within the instruction. bool is_environment = (environment != nullptr); - size_t position = instruction->GetLifetimePosition() + 1; LocationSummary* locations = instruction->GetLocations(); + if (actual_user == nullptr) { + actual_user = instruction; + } + + // Set the use within the instruction. + size_t position = actual_user->GetLifetimePosition() + 1; if (!is_environment) { if (locations->IsFixedInput(input_index) || locations->OutputUsesSameAs(input_index)) { // For fixed inputs and output same as input, the register allocator // requires to have inputs die at the instruction, so that input moves use the // location of the input just before that instruction (and not potential moves due // to splitting). - position = instruction->GetLifetimePosition(); + DCHECK_EQ(instruction, actual_user); + position = actual_user->GetLifetimePosition(); } else if (!locations->InAt(input_index).IsValid()) { return; } @@ -267,11 +272,8 @@ class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> { AddBackEdgeUses(*instruction->GetBlock()); } - DCHECK(position == instruction->GetLifetimePosition() - || position == instruction->GetLifetimePosition() + 1); - if ((first_use_ != nullptr) - && (first_use_->GetUser() == instruction) + && (first_use_->GetUser() == actual_user) && (first_use_->GetPosition() < position)) { // The user uses the instruction multiple times, and one use dies before the other. // We update the use list so that the latter is first. diff --git a/compiler/utils/test_dex_file_builder.h b/compiler/utils/test_dex_file_builder.h index e57a540669..2958dc6e2a 100644 --- a/compiler/utils/test_dex_file_builder.h +++ b/compiler/utils/test_dex_file_builder.h @@ -89,11 +89,12 @@ class TestDexFileBuilder { DexFile::Header* header = reinterpret_cast<DexFile::Header*>(&header_data.data); std::copy_n(DexFile::kDexMagic, 4u, header->magic_); std::copy_n(DexFile::kDexMagicVersion, 4u, header->magic_ + 4u); - header->header_size_ = sizeof(header); + header->header_size_ = sizeof(DexFile::Header); header->endian_tag_ = DexFile::kDexEndianConstant; header->link_size_ = 0u; // Unused. header->link_off_ = 0u; // Unused. - header->map_off_ = 0u; // Unused. + header->map_off_ = 0u; // Unused. TODO: This is wrong. Dex files created by this builder + // cannot be verified. b/26808512 uint32_t data_section_size = 0u; @@ -213,13 +214,22 @@ class TestDexFileBuilder { // Leave signature as zeros. header->file_size_ = dex_file_data_.size(); + + // Write the complete header early, as part of it needs to be checksummed. + std::memcpy(&dex_file_data_[0], header_data.data, sizeof(DexFile::Header)); + + // Checksum starts after the checksum field. size_t skip = sizeof(header->magic_) + sizeof(header->checksum_); - header->checksum_ = adler32(0u, dex_file_data_.data() + skip, dex_file_data_.size() - skip); + header->checksum_ = adler32(adler32(0L, Z_NULL, 0), + dex_file_data_.data() + skip, + dex_file_data_.size() - skip); + + // Write the complete header again, just simpler that way. std::memcpy(&dex_file_data_[0], header_data.data, sizeof(DexFile::Header)); std::string error_msg; std::unique_ptr<const DexFile> dex_file(DexFile::Open( - &dex_file_data_[0], dex_file_data_.size(), dex_location, 0u, nullptr, &error_msg)); + &dex_file_data_[0], dex_file_data_.size(), dex_location, 0u, nullptr, false, &error_msg)); CHECK(dex_file != nullptr) << error_msg; return dex_file; } |