diff options
Diffstat (limited to 'compiler')
52 files changed, 1785 insertions, 457 deletions
diff --git a/compiler/Android.bp b/compiler/Android.bp index f6a4db49fb..f5589cd7a3 100644 --- a/compiler/Android.bp +++ b/compiler/Android.bp @@ -111,6 +111,7 @@ art_cc_defaults { "optimizing/instruction_simplifier_shared.cc", "optimizing/intrinsics_arm.cc", "optimizing/intrinsics_arm_vixl.cc", + "optimizing/nodes_shared.cc", "utils/arm/assembler_arm.cc", "utils/arm/assembler_arm_vixl.cc", "utils/arm/assembler_thumb2.cc", @@ -127,7 +128,6 @@ art_cc_defaults { "optimizing/scheduler_arm64.cc", "optimizing/instruction_simplifier_arm64.cc", "optimizing/intrinsics_arm64.cc", - "optimizing/nodes_arm64.cc", "utils/arm64/assembler_arm64.cc", "utils/arm64/jni_macro_assembler_arm64.cc", "utils/arm64/managed_register_arm64.cc", diff --git a/compiler/common_compiler_test.cc b/compiler/common_compiler_test.cc index 2f9164c0e0..d89cdbabf8 100644 --- a/compiler/common_compiler_test.cc +++ b/compiler/common_compiler_test.cc @@ -175,6 +175,7 @@ void CommonCompilerTest::CreateCompilerDriver(Compiler::Kind kind, InstructionSet isa, size_t number_of_threads) { compiler_options_->boot_image_ = true; + compiler_options_->SetCompilerFilter(GetCompilerFilter()); compiler_driver_.reset(new CompilerDriver(compiler_options_.get(), verification_results_.get(), kind, diff --git a/compiler/common_compiler_test.h b/compiler/common_compiler_test.h index 0d45a50053..98dcf20714 100644 --- a/compiler/common_compiler_test.h +++ b/compiler/common_compiler_test.h @@ -77,6 +77,10 @@ class CommonCompilerTest : public CommonRuntimeTest { virtual ProfileCompilationInfo* GetProfileCompilationInfo(); + virtual CompilerFilter::Filter GetCompilerFilter() const { + return CompilerFilter::kDefaultCompilerFilter; + } + virtual void TearDown(); void CompileClass(mirror::ClassLoader* class_loader, const char* class_name) diff --git a/compiler/compiler.h b/compiler/compiler.h index 2ca0b77a73..908d3669ed 100644 --- a/compiler/compiler.h +++ b/compiler/compiler.h @@ -27,6 +27,7 @@ namespace jit { class JitCodeCache; } namespace mirror { + class ClassLoader; class DexCache; } @@ -63,7 +64,7 @@ class Compiler { InvokeType invoke_type, uint16_t class_def_idx, uint32_t method_idx, - jobject class_loader, + Handle<mirror::ClassLoader> class_loader, const DexFile& dex_file, Handle<mirror::DexCache> dex_cache) const = 0; diff --git a/compiler/dex/dex_to_dex_compiler.cc b/compiler/dex/dex_to_dex_compiler.cc index d4f6545c59..76aeaa55d7 100644 --- a/compiler/dex/dex_to_dex_compiler.cc +++ b/compiler/dex/dex_to_dex_compiler.cc @@ -284,16 +284,13 @@ void DexCompiler::CompileInvokeVirtual(Instruction* inst, uint32_t dex_pc, } uint32_t method_idx = is_range ? inst->VRegB_3rc() : inst->VRegB_35c(); ScopedObjectAccess soa(Thread::Current()); - StackHandleScope<1> hs(soa.Self()); - Handle<mirror::ClassLoader> class_loader(hs.NewHandle( - soa.Decode<mirror::ClassLoader>(unit_.GetClassLoader()))); ClassLinker* class_linker = unit_.GetClassLinker(); ArtMethod* resolved_method = class_linker->ResolveMethod<ClassLinker::kForceICCECheck>( GetDexFile(), method_idx, unit_.GetDexCache(), - class_loader, + unit_.GetClassLoader(), /* referrer */ nullptr, kVirtual); @@ -330,7 +327,7 @@ CompiledMethod* ArtCompileDEX( InvokeType invoke_type ATTRIBUTE_UNUSED, uint16_t class_def_idx, uint32_t method_idx, - jobject class_loader, + Handle<mirror::ClassLoader> class_loader, const DexFile& dex_file, DexToDexCompilationLevel dex_to_dex_compilation_level) { DCHECK(driver != nullptr); diff --git a/compiler/dex/dex_to_dex_compiler.h b/compiler/dex/dex_to_dex_compiler.h index 0a00d45297..00c596d60e 100644 --- a/compiler/dex/dex_to_dex_compiler.h +++ b/compiler/dex/dex_to_dex_compiler.h @@ -18,6 +18,7 @@ #define ART_COMPILER_DEX_DEX_TO_DEX_COMPILER_H_ #include "dex_file.h" +#include "handle.h" #include "invoke_type.h" namespace art { @@ -25,6 +26,10 @@ namespace art { class CompiledMethod; class CompilerDriver; +namespace mirror { +class ClassLoader; +} // namespace mirror + namespace optimizer { enum class DexToDexCompilationLevel { @@ -40,7 +45,7 @@ CompiledMethod* ArtCompileDEX(CompilerDriver* driver, InvokeType invoke_type, uint16_t class_def_idx, uint32_t method_idx, - jobject class_loader, + Handle<mirror::ClassLoader> class_loader, const DexFile& dex_file, DexToDexCompilationLevel dex_to_dex_compilation_level); diff --git a/compiler/driver/compiler_driver-inl.h b/compiler/driver/compiler_driver-inl.h index f296851ebf..582330611d 100644 --- a/compiler/driver/compiler_driver-inl.h +++ b/compiler/driver/compiler_driver-inl.h @@ -31,17 +31,12 @@ namespace art { -inline mirror::ClassLoader* CompilerDriver::GetClassLoader(const ScopedObjectAccess& soa, - const DexCompilationUnit* mUnit) { - return soa.Decode<mirror::ClassLoader>(mUnit->GetClassLoader()).Ptr(); -} - inline mirror::Class* CompilerDriver::ResolveClass( const ScopedObjectAccess& soa, Handle<mirror::DexCache> dex_cache, Handle<mirror::ClassLoader> class_loader, dex::TypeIndex cls_index, const DexCompilationUnit* mUnit) { DCHECK_EQ(dex_cache->GetDexFile(), mUnit->GetDexFile()); - DCHECK_EQ(class_loader.Get(), GetClassLoader(soa, mUnit)); + DCHECK_EQ(class_loader.Get(), mUnit->GetClassLoader().Get()); mirror::Class* cls = mUnit->GetClassLinker()->ResolveType( *mUnit->GetDexFile(), cls_index, dex_cache, class_loader); DCHECK_EQ(cls == nullptr, soa.Self()->IsExceptionPending()); @@ -56,7 +51,7 @@ inline mirror::Class* CompilerDriver::ResolveCompilingMethodsClass( const ScopedObjectAccess& soa, Handle<mirror::DexCache> dex_cache, Handle<mirror::ClassLoader> class_loader, const DexCompilationUnit* mUnit) { DCHECK_EQ(dex_cache->GetDexFile(), mUnit->GetDexFile()); - DCHECK_EQ(class_loader.Get(), GetClassLoader(soa, mUnit)); + DCHECK_EQ(class_loader.Get(), mUnit->GetClassLoader().Get()); const DexFile::MethodId& referrer_method_id = mUnit->GetDexFile()->GetMethodId(mUnit->GetDexMethodIndex()); return ResolveClass(soa, dex_cache, class_loader, referrer_method_id.class_idx_, mUnit); @@ -87,7 +82,7 @@ inline ArtField* CompilerDriver::ResolveField( const ScopedObjectAccess& soa, Handle<mirror::DexCache> dex_cache, Handle<mirror::ClassLoader> class_loader, const DexCompilationUnit* mUnit, uint32_t field_idx, bool is_static) { - DCHECK_EQ(class_loader.Get(), GetClassLoader(soa, mUnit)); + DCHECK_EQ(class_loader.Get(), mUnit->GetClassLoader().Get()); return ResolveFieldWithDexFile(soa, dex_cache, class_loader, mUnit->GetDexFile(), field_idx, is_static); } @@ -139,7 +134,7 @@ inline ArtMethod* CompilerDriver::ResolveMethod( ScopedObjectAccess& soa, Handle<mirror::DexCache> dex_cache, Handle<mirror::ClassLoader> class_loader, const DexCompilationUnit* mUnit, uint32_t method_idx, InvokeType invoke_type, bool check_incompatible_class_change) { - DCHECK_EQ(class_loader.Get(), GetClassLoader(soa, mUnit)); + DCHECK_EQ(class_loader.Get(), mUnit->GetClassLoader().Get()); ArtMethod* resolved_method = check_incompatible_class_change ? mUnit->GetClassLinker()->ResolveMethod<ClassLinker::kForceICCECheck>( diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc index 26c0818b85..52ffa55342 100644 --- a/compiler/driver/compiler_driver.cc +++ b/compiler/driver/compiler_driver.cc @@ -580,7 +580,7 @@ static void CompileMethod(Thread* self, InvokeType invoke_type, uint16_t class_def_idx, uint32_t method_idx, - jobject class_loader, + Handle<mirror::ClassLoader> class_loader, const DexFile& dex_file, optimizer::DexToDexCompilationLevel dex_to_dex_compilation_level, bool compilation_enabled, @@ -621,9 +621,6 @@ static void CompileMethod(Thread* self, // Look-up the ArtMethod associated with this code_item (if any) // -- It is later used to lookup any [optimization] annotations for this method. ScopedObjectAccess soa(self); - StackHandleScope<1> hs(soa.Self()); - Handle<mirror::ClassLoader> class_loader_handle(hs.NewHandle( - soa.Decode<mirror::ClassLoader>(class_loader))); // TODO: Lookup annotation from DexFile directly without resolving method. ArtMethod* method = @@ -631,7 +628,7 @@ static void CompileMethod(Thread* self, dex_file, method_idx, dex_cache, - class_loader_handle, + class_loader, /* referrer */ nullptr, invoke_type); @@ -678,9 +675,14 @@ static void CompileMethod(Thread* self, if (compile) { // NOTE: if compiler declines to compile this method, it will return null. - compiled_method = driver->GetCompiler()->Compile(code_item, access_flags, invoke_type, - class_def_idx, method_idx, class_loader, - dex_file, dex_cache); + compiled_method = driver->GetCompiler()->Compile(code_item, + access_flags, + invoke_type, + class_def_idx, + method_idx, + class_loader, + dex_file, + dex_cache); } if (compiled_method == nullptr && dex_to_dex_compilation_level != optimizer::DexToDexCompilationLevel::kDontDexToDexCompile) { @@ -727,12 +729,14 @@ void CompilerDriver::CompileOne(Thread* self, ArtMethod* method, TimingLogger* t uint32_t method_idx = method->GetDexMethodIndex(); uint32_t access_flags = method->GetAccessFlags(); InvokeType invoke_type = method->GetInvokeType(); - StackHandleScope<1> hs(self); + StackHandleScope<2> hs(self); Handle<mirror::DexCache> dex_cache(hs.NewHandle(method->GetDexCache())); + Handle<mirror::ClassLoader> class_loader( + hs.NewHandle(method->GetDeclaringClass()->GetClassLoader())); { ScopedObjectAccessUnchecked soa(self); ScopedLocalRef<jobject> local_class_loader( - soa.Env(), soa.AddLocalReference<jobject>(method->GetDeclaringClass()->GetClassLoader())); + soa.Env(), soa.AddLocalReference<jobject>(class_loader.Get())); jclass_loader = soa.Env()->NewGlobalRef(local_class_loader.get()); // Find the dex_file dex_file = method->GetDexFile(); @@ -766,7 +770,7 @@ void CompilerDriver::CompileOne(Thread* self, ArtMethod* method, TimingLogger* t invoke_type, class_def_idx, method_idx, - jclass_loader, + class_loader, *dex_file, dex_to_dex_compilation_level, true, @@ -792,7 +796,7 @@ void CompilerDriver::CompileOne(Thread* self, ArtMethod* method, TimingLogger* t invoke_type, class_def_idx, method_idx, - jclass_loader, + class_loader, *dex_file, dex_to_dex_compilation_level, true, @@ -1050,9 +1054,9 @@ bool CompilerDriver::IsMethodToCompile(const MethodReference& method_ref) const } bool CompilerDriver::ShouldCompileBasedOnProfile(const MethodReference& method_ref) const { - if (profile_compilation_info_ == nullptr) { - // If we miss profile information it means that we don't do a profile guided compilation. - // Return true, and let the other filters decide if the method should be compiled. + if (!CompilerFilter::DependsOnProfile(compiler_options_->GetCompilerFilter())) { + // Use the compiler filter instead of the presence of profile_compilation_info_ since + // we may want to have full speed compilation along with profile based layout optimizations. return true; } bool result = profile_compilation_info_->ContainsMethod(method_ref); @@ -1067,22 +1071,30 @@ bool CompilerDriver::ShouldCompileBasedOnProfile(const MethodReference& method_r class ResolveCatchBlockExceptionsClassVisitor : public ClassVisitor { public: - explicit ResolveCatchBlockExceptionsClassVisitor( - std::set<std::pair<dex::TypeIndex, const DexFile*>>& exceptions_to_resolve) - : exceptions_to_resolve_(exceptions_to_resolve) {} + ResolveCatchBlockExceptionsClassVisitor() : classes_() {} virtual bool operator()(ObjPtr<mirror::Class> c) OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_) { + classes_.push_back(c); + return true; + } + + void FindExceptionTypesToResolve( + std::set<std::pair<dex::TypeIndex, const DexFile*>>* exceptions_to_resolve) + REQUIRES_SHARED(Locks::mutator_lock_) { const auto pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize(); - for (auto& m : c->GetMethods(pointer_size)) { - ResolveExceptionsForMethod(&m); + for (ObjPtr<mirror::Class> klass : classes_) { + for (ArtMethod& method : klass->GetMethods(pointer_size)) { + FindExceptionTypesToResolveForMethod(&method, exceptions_to_resolve); + } } - return true; } private: - void ResolveExceptionsForMethod(ArtMethod* method_handle) + void FindExceptionTypesToResolveForMethod( + ArtMethod* method, + std::set<std::pair<dex::TypeIndex, const DexFile*>>* exceptions_to_resolve) REQUIRES_SHARED(Locks::mutator_lock_) { - const DexFile::CodeItem* code_item = method_handle->GetCodeItem(); + const DexFile::CodeItem* code_item = method->GetCodeItem(); if (code_item == nullptr) { return; // native or abstract method } @@ -1102,9 +1114,9 @@ class ResolveCatchBlockExceptionsClassVisitor : public ClassVisitor { dex::TypeIndex encoded_catch_handler_handlers_type_idx = dex::TypeIndex(DecodeUnsignedLeb128(&encoded_catch_handler_list)); // Add to set of types to resolve if not already in the dex cache resolved types - if (!method_handle->IsResolvedTypeIdx(encoded_catch_handler_handlers_type_idx)) { - exceptions_to_resolve_.emplace(encoded_catch_handler_handlers_type_idx, - method_handle->GetDexFile()); + if (!method->IsResolvedTypeIdx(encoded_catch_handler_handlers_type_idx)) { + exceptions_to_resolve->emplace(encoded_catch_handler_handlers_type_idx, + method->GetDexFile()); } // ignore address associated with catch handler DecodeUnsignedLeb128(&encoded_catch_handler_list); @@ -1116,7 +1128,7 @@ class ResolveCatchBlockExceptionsClassVisitor : public ClassVisitor { } } - std::set<std::pair<dex::TypeIndex, const DexFile*>>& exceptions_to_resolve_; + std::vector<ObjPtr<mirror::Class>> classes_; }; class RecordImageClassesVisitor : public ClassVisitor { @@ -1170,8 +1182,14 @@ void CompilerDriver::LoadImageClasses(TimingLogger* timings) { hs.NewHandle(class_linker->FindSystemClass(self, "Ljava/lang/Throwable;"))); do { unresolved_exception_types.clear(); - ResolveCatchBlockExceptionsClassVisitor visitor(unresolved_exception_types); - class_linker->VisitClasses(&visitor); + { + // Thread suspension is not allowed while ResolveCatchBlockExceptionsClassVisitor + // is using a std::vector<ObjPtr<mirror::Class>>. + ScopedAssertNoThreadSuspension ants(__FUNCTION__); + ResolveCatchBlockExceptionsClassVisitor visitor; + class_linker->VisitClasses(&visitor); + visitor.FindExceptionTypesToResolve(&unresolved_exception_types); + } for (const auto& exception_type : unresolved_exception_types) { dex::TypeIndex exception_type_idx = exception_type.first; const DexFile* dex_file = exception_type.second; @@ -1422,19 +1440,14 @@ void CompilerDriver::MarkForDexToDexCompilation(Thread* self, const MethodRefere dex_to_dex_references_.back().GetMethodIndexes().SetBit(method_ref.dex_method_index); } -bool CompilerDriver::CanAccessTypeWithoutChecks(uint32_t referrer_idx, - Handle<mirror::DexCache> dex_cache, - dex::TypeIndex type_idx) { - // Get type from dex cache assuming it was populated by the verifier - mirror::Class* resolved_class = dex_cache->GetResolvedType(type_idx); +bool CompilerDriver::CanAccessTypeWithoutChecks(ObjPtr<mirror::Class> referrer_class, + ObjPtr<mirror::Class> resolved_class) { if (resolved_class == nullptr) { stats_->TypeNeedsAccessCheck(); return false; // Unknown class needs access checks. } - const DexFile::MethodId& method_id = dex_cache->GetDexFile()->GetMethodId(referrer_idx); bool is_accessible = resolved_class->IsPublic(); // Public classes are always accessible. if (!is_accessible) { - mirror::Class* referrer_class = dex_cache->GetResolvedType(method_id.class_idx_); if (referrer_class == nullptr) { stats_->TypeNeedsAccessCheck(); return false; // Incomplete referrer knowledge needs access check. @@ -1451,12 +1464,9 @@ bool CompilerDriver::CanAccessTypeWithoutChecks(uint32_t referrer_idx, return is_accessible; } -bool CompilerDriver::CanAccessInstantiableTypeWithoutChecks(uint32_t referrer_idx, - Handle<mirror::DexCache> dex_cache, - dex::TypeIndex type_idx, +bool CompilerDriver::CanAccessInstantiableTypeWithoutChecks(ObjPtr<mirror::Class> referrer_class, + ObjPtr<mirror::Class> resolved_class, bool* finalizable) { - // Get type from dex cache assuming it was populated by the verifier. - mirror::Class* resolved_class = dex_cache->GetResolvedType(type_idx); if (resolved_class == nullptr) { stats_->TypeNeedsAccessCheck(); // Be conservative. @@ -1464,10 +1474,8 @@ bool CompilerDriver::CanAccessInstantiableTypeWithoutChecks(uint32_t referrer_id return false; // Unknown class needs access checks. } *finalizable = resolved_class->IsFinalizable(); - const DexFile::MethodId& method_id = dex_cache->GetDexFile()->GetMethodId(referrer_idx); bool is_accessible = resolved_class->IsPublic(); // Public classes are always accessible. if (!is_accessible) { - mirror::Class* referrer_class = dex_cache->GetResolvedType(method_id.class_idx_); if (referrer_class == nullptr) { stats_->TypeNeedsAccessCheck(); return false; // Incomplete referrer knowledge needs access check. @@ -1511,9 +1519,7 @@ ArtField* CompilerDriver::ComputeInstanceFieldInfo(uint32_t field_idx, mirror::Class* referrer_class; Handle<mirror::DexCache> dex_cache(mUnit->GetDexCache()); { - StackHandleScope<1> hs(soa.Self()); - Handle<mirror::ClassLoader> class_loader_handle( - hs.NewHandle(soa.Decode<mirror::ClassLoader>(mUnit->GetClassLoader()))); + Handle<mirror::ClassLoader> class_loader_handle = mUnit->GetClassLoader(); resolved_field = ResolveField(soa, dex_cache, class_loader_handle, mUnit, field_idx, false); referrer_class = resolved_field != nullptr ? ResolveCompilingMethodsClass(soa, dex_cache, class_loader_handle, mUnit) : nullptr; @@ -2585,10 +2591,18 @@ class CompileClassVisitor : public CompilationVisitor { continue; } previous_direct_method_idx = method_idx; - CompileMethod(soa.Self(), driver, it.GetMethodCodeItem(), it.GetMethodAccessFlags(), - it.GetMethodInvokeType(class_def), class_def_index, - method_idx, jclass_loader, dex_file, dex_to_dex_compilation_level, - compilation_enabled, dex_cache); + CompileMethod(soa.Self(), + driver, + it.GetMethodCodeItem(), + it.GetMethodAccessFlags(), + it.GetMethodInvokeType(class_def), + class_def_index, + method_idx, + class_loader, + dex_file, + dex_to_dex_compilation_level, + compilation_enabled, + dex_cache); it.Next(); } // Compile virtual methods @@ -2602,10 +2616,17 @@ class CompileClassVisitor : public CompilationVisitor { continue; } previous_virtual_method_idx = method_idx; - CompileMethod(soa.Self(), driver, it.GetMethodCodeItem(), it.GetMethodAccessFlags(), - it.GetMethodInvokeType(class_def), class_def_index, - method_idx, jclass_loader, dex_file, dex_to_dex_compilation_level, - compilation_enabled, dex_cache); + CompileMethod(soa.Self(), + driver, it.GetMethodCodeItem(), + it.GetMethodAccessFlags(), + it.GetMethodInvokeType(class_def), + class_def_index, + method_idx, + class_loader, + dex_file, + dex_to_dex_compilation_level, + compilation_enabled, + dex_cache); it.Next(); } DCHECK(!it.HasNext()); diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h index 5b4c751c4a..1e5c43d833 100644 --- a/compiler/driver/compiler_driver.h +++ b/compiler/driver/compiler_driver.h @@ -187,16 +187,14 @@ class CompilerDriver { REQUIRES(!requires_constructor_barrier_lock_); // Are runtime access checks necessary in the compiled code? - bool CanAccessTypeWithoutChecks(uint32_t referrer_idx, - Handle<mirror::DexCache> dex_cache, - dex::TypeIndex type_idx) + bool CanAccessTypeWithoutChecks(ObjPtr<mirror::Class> referrer_class, + ObjPtr<mirror::Class> resolved_class) REQUIRES_SHARED(Locks::mutator_lock_); // Are runtime access and instantiable checks necessary in the code? // out_is_finalizable is set to whether the type is finalizable. - bool CanAccessInstantiableTypeWithoutChecks(uint32_t referrer_idx, - Handle<mirror::DexCache> dex_cache, - dex::TypeIndex type_idx, + bool CanAccessInstantiableTypeWithoutChecks(ObjPtr<mirror::Class> referrer_class, + ObjPtr<mirror::Class> resolved_class, bool* out_is_finalizable) REQUIRES_SHARED(Locks::mutator_lock_); @@ -370,10 +368,6 @@ class CompilerDriver { uint32_t field_idx) REQUIRES_SHARED(Locks::mutator_lock_); - mirror::ClassLoader* GetClassLoader(const ScopedObjectAccess& soa, - const DexCompilationUnit* mUnit) - REQUIRES_SHARED(Locks::mutator_lock_); - private: void PreCompile(jobject class_loader, const std::vector<const DexFile*>& dex_files, diff --git a/compiler/driver/compiler_driver_test.cc b/compiler/driver/compiler_driver_test.cc index 1e4ca16844..97954f3c29 100644 --- a/compiler/driver/compiler_driver_test.cc +++ b/compiler/driver/compiler_driver_test.cc @@ -101,6 +101,7 @@ class CompilerDriverTest : public CommonCompilerTest { }; // Disabled due to 10 second runtime on host +// TODO: Update the test for hash-based dex cache arrays. Bug: 30627598 TEST_F(CompilerDriverTest, DISABLED_LARGE_CompileDexLibCore) { CompileAll(nullptr); @@ -246,6 +247,11 @@ class CompilerDriverProfileTest : public CompilerDriverTest { return &profile_info_; } + CompilerFilter::Filter GetCompilerFilter() const OVERRIDE { + // Use a profile based filter. + return CompilerFilter::kSpeedProfile; + } + std::unordered_set<std::string> GetExpectedMethodsForClass(const std::string& clazz) { if (clazz == "Main") { return std::unordered_set<std::string>({ @@ -304,7 +310,6 @@ TEST_F(CompilerDriverProfileTest, ProfileGuidedCompilation) { // Need to enable dex-file writability. Methods rejected to be compiled will run through the // dex-to-dex compiler. - ProfileCompilationInfo info; for (const DexFile* dex_file : GetDexFiles(class_loader)) { ASSERT_TRUE(dex_file->EnableWrite()); } diff --git a/compiler/driver/dex_compilation_unit.cc b/compiler/driver/dex_compilation_unit.cc index 47b19297e5..7e8e812c4a 100644 --- a/compiler/driver/dex_compilation_unit.cc +++ b/compiler/driver/dex_compilation_unit.cc @@ -21,7 +21,7 @@ namespace art { -DexCompilationUnit::DexCompilationUnit(jobject class_loader, +DexCompilationUnit::DexCompilationUnit(Handle<mirror::ClassLoader> class_loader, ClassLinker* class_linker, const DexFile& dex_file, const DexFile::CodeItem* code_item, diff --git a/compiler/driver/dex_compilation_unit.h b/compiler/driver/dex_compilation_unit.h index 854927d747..24a9a5b653 100644 --- a/compiler/driver/dex_compilation_unit.h +++ b/compiler/driver/dex_compilation_unit.h @@ -34,7 +34,7 @@ class VerifiedMethod; class DexCompilationUnit : public DeletableArenaObject<kArenaAllocMisc> { public: - DexCompilationUnit(jobject class_loader, + DexCompilationUnit(Handle<mirror::ClassLoader> class_loader, ClassLinker* class_linker, const DexFile& dex_file, const DexFile::CodeItem* code_item, @@ -44,7 +44,7 @@ class DexCompilationUnit : public DeletableArenaObject<kArenaAllocMisc> { const VerifiedMethod* verified_method, Handle<mirror::DexCache> dex_cache); - jobject GetClassLoader() const { + Handle<mirror::ClassLoader> GetClassLoader() const { return class_loader_; } @@ -113,7 +113,7 @@ class DexCompilationUnit : public DeletableArenaObject<kArenaAllocMisc> { } private: - const jobject class_loader_; + const Handle<mirror::ClassLoader> class_loader_; ClassLinker* const class_linker_; @@ -125,7 +125,7 @@ class DexCompilationUnit : public DeletableArenaObject<kArenaAllocMisc> { const uint32_t access_flags_; const VerifiedMethod* verified_method_; - Handle<mirror::DexCache> dex_cache_; + const Handle<mirror::DexCache> dex_cache_; std::string symbol_; }; diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc index d2dd30d8e6..117d1131b5 100644 --- a/compiler/image_writer.cc +++ b/compiler/image_writer.cc @@ -940,9 +940,11 @@ void ImageWriter::PruneNonImageClasses() { } ObjPtr<mirror::DexCache> dex_cache = self->DecodeJObject(data.weak_root)->AsDexCache(); for (size_t i = 0; i < dex_cache->NumResolvedTypes(); i++) { - Class* klass = dex_cache->GetResolvedType(dex::TypeIndex(i)); + mirror::TypeDexCachePair pair = + dex_cache->GetResolvedTypes()[i].load(std::memory_order_relaxed); + mirror::Class* klass = pair.object.Read(); if (klass != nullptr && !KeepClass(klass)) { - dex_cache->SetResolvedType(dex::TypeIndex(i), nullptr); + dex_cache->ClearResolvedType(dex::TypeIndex(pair.index)); } } ArtMethod** resolved_methods = dex_cache->GetResolvedMethods(); @@ -1922,8 +1924,7 @@ void ImageWriter::CopyAndFixupNativeData(size_t oat_index) { // above comment for intern tables. ClassTable temp_class_table; temp_class_table.ReadFromMemory(class_table_memory_ptr); - CHECK_EQ(class_loaders_.size(), compile_app_image_ ? 1u : 0u); - mirror::ClassLoader* class_loader = compile_app_image_ ? *class_loaders_.begin() : nullptr; + ObjPtr<mirror::ClassLoader> class_loader = GetClassLoader(); CHECK_EQ(temp_class_table.NumZygoteClasses(class_loader), table->NumNonZygoteClasses(class_loader) + table->NumZygoteClasses(class_loader)); UnbufferedRootVisitor visitor(&root_visitor, RootInfo(kRootUnknown)); @@ -2213,7 +2214,7 @@ void ImageWriter::FixupDexCache(mirror::DexCache* orig_dex_cache, orig_dex_cache->FixupStrings(NativeCopyLocation(orig_strings, orig_dex_cache), ImageAddressVisitor(this)); } - GcRoot<mirror::Class>* orig_types = orig_dex_cache->GetResolvedTypes(); + mirror::TypeDexCacheType* orig_types = orig_dex_cache->GetResolvedTypes(); if (orig_types != nullptr) { copy_dex_cache->SetFieldPtrWithSize<false>(mirror::DexCache::ResolvedTypesOffset(), NativeLocationInImage(orig_types), diff --git a/compiler/image_writer.h b/compiler/image_writer.h index cc7df1ce21..bdc7146632 100644 --- a/compiler/image_writer.h +++ b/compiler/image_writer.h @@ -51,8 +51,13 @@ class ImageSpace; } // namespace space } // namespace gc +namespace mirror { +class ClassLoader; +} // namespace mirror + class ClassLoaderVisitor; class ClassTable; +class ImtConflictTable; static constexpr int kInvalidFd = -1; @@ -79,6 +84,11 @@ class ImageWriter FINAL { return true; } + ObjPtr<mirror::ClassLoader> GetClassLoader() { + CHECK_EQ(class_loaders_.size(), compile_app_image_ ? 1u : 0u); + return compile_app_image_ ? *class_loaders_.begin() : nullptr; + } + template <typename T> T* GetImageAddress(T* object) const REQUIRES_SHARED(Locks::mutator_lock_) { if (object == nullptr || IsInBootImage(object)) { diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc index 7c0cdbf270..0ea11255a8 100644 --- a/compiler/oat_writer.cc +++ b/compiler/oat_writer.cc @@ -1060,6 +1060,7 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor { WriteCodeMethodVisitor(OatWriter* writer, OutputStream* out, const size_t file_offset, size_t relative_offset) SHARED_LOCK_FUNCTION(Locks::mutator_lock_) : OatDexMethodVisitor(writer, relative_offset), + class_loader_(writer->HasImage() ? writer->image_writer_->GetClassLoader() : nullptr), out_(out), file_offset_(file_offset), soa_(Thread::Current()), @@ -1245,6 +1246,7 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor { } private: + ObjPtr<mirror::ClassLoader> class_loader_; OutputStream* const out_; const size_t file_offset_; const ScopedObjectAccess soa_; @@ -1303,10 +1305,12 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor { } mirror::Class* GetTargetType(const LinkerPatch& patch) REQUIRES_SHARED(Locks::mutator_lock_) { + DCHECK(writer_->HasImage()); ObjPtr<mirror::DexCache> dex_cache = GetDexCache(patch.TargetTypeDexFile()); - mirror::Class* type = dex_cache->GetResolvedType(patch.TargetTypeIndex()); + ObjPtr<mirror::Class> type = + ClassLinker::LookupResolvedType(patch.TargetTypeIndex(), dex_cache, class_loader_); CHECK(type != nullptr); - return type; + return type.Ptr(); } mirror::String* GetTargetString(const LinkerPatch& patch) REQUIRES_SHARED(Locks::mutator_lock_) { diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h index e4ad4222fb..3a4c9dbd16 100644 --- a/compiler/optimizing/builder.h +++ b/compiler/optimizing/builder.h @@ -54,7 +54,10 @@ class HGraphBuilder : public ValueObject { compiler_driver_(driver), compilation_stats_(compiler_stats), block_builder_(graph, dex_file, code_item), - ssa_builder_(graph, dex_compilation_unit->GetDexCache(), handles), + ssa_builder_(graph, + dex_compilation_unit->GetClassLoader(), + dex_compilation_unit->GetDexCache(), + handles), instruction_builder_(graph, &block_builder_, &ssa_builder_, @@ -80,10 +83,12 @@ class HGraphBuilder : public ValueObject { code_item_(code_item), dex_compilation_unit_(nullptr), compiler_driver_(nullptr), - null_dex_cache_(), compilation_stats_(nullptr), block_builder_(graph, nullptr, code_item), - ssa_builder_(graph, null_dex_cache_, handles), + ssa_builder_(graph, + handles->NewHandle<mirror::ClassLoader>(nullptr), + handles->NewHandle<mirror::DexCache>(nullptr), + handles), instruction_builder_(graph, &block_builder_, &ssa_builder_, @@ -96,7 +101,7 @@ class HGraphBuilder : public ValueObject { /* code_generator */ nullptr, /* interpreter_metadata */ nullptr, /* compiler_stats */ nullptr, - null_dex_cache_, + handles->NewHandle<mirror::DexCache>(nullptr), handles) {} GraphAnalysisResult BuildGraph(); @@ -117,8 +122,6 @@ class HGraphBuilder : public ValueObject { CompilerDriver* const compiler_driver_; - ScopedNullHandle<mirror::DexCache> null_dex_cache_; - OptimizingCompilerStats* compilation_stats_; HBasicBlockBuilder block_builder_; diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index 759a951d6b..7b84ef83cd 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -19,6 +19,7 @@ #include "arch/arm/instruction_set_features_arm.h" #include "art_method.h" #include "code_generator_utils.h" +#include "common_arm.h" #include "compiled_method.h" #include "entrypoints/quick/quick_entrypoints.h" #include "gc/accounting/card_table.h" @@ -1132,10 +1133,6 @@ class ReadBarrierForRootSlowPathARM : public SlowPathCodeARM { DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathARM); }; -#undef __ -// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. -#define __ down_cast<ArmAssembler*>(GetAssembler())-> // NOLINT - inline Condition ARMCondition(IfCondition cond) { switch (cond) { case kCondEQ: return EQ; @@ -1191,6 +1188,197 @@ inline Condition ARMFPCondition(IfCondition cond, bool gt_bias) { } } +inline Shift ShiftFromOpKind(HDataProcWithShifterOp::OpKind op_kind) { + switch (op_kind) { + case HDataProcWithShifterOp::kASR: return ASR; + case HDataProcWithShifterOp::kLSL: return LSL; + case HDataProcWithShifterOp::kLSR: return LSR; + default: + LOG(FATAL) << "Unexpected op kind " << op_kind; + UNREACHABLE(); + } +} + +static void GenerateDataProcInstruction(HInstruction::InstructionKind kind, + Register out, + Register first, + const ShifterOperand& second, + CodeGeneratorARM* codegen) { + if (second.IsImmediate() && second.GetImmediate() == 0) { + const ShifterOperand in = kind == HInstruction::kAnd + ? ShifterOperand(0) + : ShifterOperand(first); + + __ mov(out, in); + } else { + switch (kind) { + case HInstruction::kAdd: + __ add(out, first, second); + break; + case HInstruction::kAnd: + __ and_(out, first, second); + break; + case HInstruction::kOr: + __ orr(out, first, second); + break; + case HInstruction::kSub: + __ sub(out, first, second); + break; + case HInstruction::kXor: + __ eor(out, first, second); + break; + default: + LOG(FATAL) << "Unexpected instruction kind: " << kind; + UNREACHABLE(); + } + } +} + +static void GenerateDataProc(HInstruction::InstructionKind kind, + const Location& out, + const Location& first, + const ShifterOperand& second_lo, + const ShifterOperand& second_hi, + CodeGeneratorARM* codegen) { + const Register first_hi = first.AsRegisterPairHigh<Register>(); + const Register first_lo = first.AsRegisterPairLow<Register>(); + const Register out_hi = out.AsRegisterPairHigh<Register>(); + const Register out_lo = out.AsRegisterPairLow<Register>(); + + if (kind == HInstruction::kAdd) { + __ adds(out_lo, first_lo, second_lo); + __ adc(out_hi, first_hi, second_hi); + } else if (kind == HInstruction::kSub) { + __ subs(out_lo, first_lo, second_lo); + __ sbc(out_hi, first_hi, second_hi); + } else { + GenerateDataProcInstruction(kind, out_lo, first_lo, second_lo, codegen); + GenerateDataProcInstruction(kind, out_hi, first_hi, second_hi, codegen); + } +} + +static ShifterOperand GetShifterOperand(Register rm, Shift shift, uint32_t shift_imm) { + return shift_imm == 0 ? ShifterOperand(rm) : ShifterOperand(rm, shift, shift_imm); +} + +static void GenerateLongDataProc(HDataProcWithShifterOp* instruction, CodeGeneratorARM* codegen) { + DCHECK_EQ(instruction->GetType(), Primitive::kPrimLong); + DCHECK(HDataProcWithShifterOp::IsShiftOp(instruction->GetOpKind())); + + const LocationSummary* const locations = instruction->GetLocations(); + const uint32_t shift_value = instruction->GetShiftAmount(); + const HInstruction::InstructionKind kind = instruction->GetInstrKind(); + const Location first = locations->InAt(0); + const Location second = locations->InAt(1); + const Location out = locations->Out(); + const Register first_hi = first.AsRegisterPairHigh<Register>(); + const Register first_lo = first.AsRegisterPairLow<Register>(); + const Register out_hi = out.AsRegisterPairHigh<Register>(); + const Register out_lo = out.AsRegisterPairLow<Register>(); + const Register second_hi = second.AsRegisterPairHigh<Register>(); + const Register second_lo = second.AsRegisterPairLow<Register>(); + const Shift shift = ShiftFromOpKind(instruction->GetOpKind()); + + if (shift_value >= 32) { + if (shift == LSL) { + GenerateDataProcInstruction(kind, + out_hi, + first_hi, + ShifterOperand(second_lo, LSL, shift_value - 32), + codegen); + GenerateDataProcInstruction(kind, + out_lo, + first_lo, + ShifterOperand(0), + codegen); + } else if (shift == ASR) { + GenerateDataProc(kind, + out, + first, + GetShifterOperand(second_hi, ASR, shift_value - 32), + ShifterOperand(second_hi, ASR, 31), + codegen); + } else { + DCHECK_EQ(shift, LSR); + GenerateDataProc(kind, + out, + first, + GetShifterOperand(second_hi, LSR, shift_value - 32), + ShifterOperand(0), + codegen); + } + } else { + DCHECK_GT(shift_value, 1U); + DCHECK_LT(shift_value, 32U); + + if (shift == LSL) { + // We are not doing this for HInstruction::kAdd because the output will require + // Location::kOutputOverlap; not applicable to other cases. + if (kind == HInstruction::kOr || kind == HInstruction::kXor) { + GenerateDataProcInstruction(kind, + out_hi, + first_hi, + ShifterOperand(second_hi, LSL, shift_value), + codegen); + GenerateDataProcInstruction(kind, + out_hi, + out_hi, + ShifterOperand(second_lo, LSR, 32 - shift_value), + codegen); + GenerateDataProcInstruction(kind, + out_lo, + first_lo, + ShifterOperand(second_lo, LSL, shift_value), + codegen); + } else { + __ Lsl(IP, second_hi, shift_value); + __ orr(IP, IP, ShifterOperand(second_lo, LSR, 32 - shift_value)); + GenerateDataProc(kind, + out, + first, + ShifterOperand(second_lo, LSL, shift_value), + ShifterOperand(IP), + codegen); + } + } else { + DCHECK(shift == ASR || shift == LSR); + + // We are not doing this for HInstruction::kAdd because the output will require + // Location::kOutputOverlap; not applicable to other cases. + if (kind == HInstruction::kOr || kind == HInstruction::kXor) { + GenerateDataProcInstruction(kind, + out_lo, + first_lo, + ShifterOperand(second_lo, LSR, shift_value), + codegen); + GenerateDataProcInstruction(kind, + out_lo, + out_lo, + ShifterOperand(second_hi, LSL, 32 - shift_value), + codegen); + GenerateDataProcInstruction(kind, + out_hi, + first_hi, + ShifterOperand(second_hi, shift, shift_value), + codegen); + } else { + __ Lsr(IP, second_lo, shift_value); + __ orr(IP, IP, ShifterOperand(second_hi, LSL, 32 - shift_value)); + GenerateDataProc(kind, + out, + first, + ShifterOperand(IP), + ShifterOperand(second_hi, shift, shift_value), + codegen); + } + } + } +} + +#undef __ +// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. +#define __ down_cast<ArmAssembler*>(GetAssembler())-> // NOLINT + void CodeGeneratorARM::DumpCoreRegister(std::ostream& stream, int reg) const { stream << Register(reg); } @@ -6709,6 +6897,63 @@ void InstructionCodeGeneratorARM::VisitBitwiseNegatedRight(HBitwiseNegatedRight* } } +void LocationsBuilderARM::VisitDataProcWithShifterOp( + HDataProcWithShifterOp* instruction) { + DCHECK(instruction->GetType() == Primitive::kPrimInt || + instruction->GetType() == Primitive::kPrimLong); + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + const bool overlap = instruction->GetType() == Primitive::kPrimLong && + HDataProcWithShifterOp::IsExtensionOp(instruction->GetOpKind()); + + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), + overlap ? Location::kOutputOverlap : Location::kNoOutputOverlap); +} + +void InstructionCodeGeneratorARM::VisitDataProcWithShifterOp( + HDataProcWithShifterOp* instruction) { + const LocationSummary* const locations = instruction->GetLocations(); + const HInstruction::InstructionKind kind = instruction->GetInstrKind(); + const HDataProcWithShifterOp::OpKind op_kind = instruction->GetOpKind(); + const Location left = locations->InAt(0); + const Location right = locations->InAt(1); + const Location out = locations->Out(); + + if (instruction->GetType() == Primitive::kPrimInt) { + DCHECK(!HDataProcWithShifterOp::IsExtensionOp(op_kind)); + + const Register second = instruction->InputAt(1)->GetType() == Primitive::kPrimLong + ? right.AsRegisterPairLow<Register>() + : right.AsRegister<Register>(); + + GenerateDataProcInstruction(kind, + out.AsRegister<Register>(), + left.AsRegister<Register>(), + ShifterOperand(second, + ShiftFromOpKind(op_kind), + instruction->GetShiftAmount()), + codegen_); + } else { + DCHECK_EQ(instruction->GetType(), Primitive::kPrimLong); + + if (HDataProcWithShifterOp::IsExtensionOp(op_kind)) { + const Register second = right.AsRegister<Register>(); + + DCHECK_NE(out.AsRegisterPairLow<Register>(), second); + GenerateDataProc(kind, + out, + left, + ShifterOperand(second), + ShifterOperand(second, ASR, 31), + codegen_); + } else { + GenerateLongDataProc(instruction, codegen_); + } + } +} + void InstructionCodeGeneratorARM::GenerateAndConst(Register out, Register first, uint32_t value) { // Optimize special cases for individual halfs of `and-long` (`and` is simplified earlier). if (value == 0xffffffffu) { diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index e6032d2381..edccbd4904 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -2277,8 +2277,8 @@ void InstructionCodeGeneratorARM64::VisitBitwiseNegatedRight(HBitwiseNegatedRigh } } -void LocationsBuilderARM64::VisitArm64DataProcWithShifterOp( - HArm64DataProcWithShifterOp* instruction) { +void LocationsBuilderARM64::VisitDataProcWithShifterOp( + HDataProcWithShifterOp* instruction) { DCHECK(instruction->GetType() == Primitive::kPrimInt || instruction->GetType() == Primitive::kPrimLong); LocationSummary* locations = @@ -2292,8 +2292,8 @@ void LocationsBuilderARM64::VisitArm64DataProcWithShifterOp( locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); } -void InstructionCodeGeneratorARM64::VisitArm64DataProcWithShifterOp( - HArm64DataProcWithShifterOp* instruction) { +void InstructionCodeGeneratorARM64::VisitDataProcWithShifterOp( + HDataProcWithShifterOp* instruction) { Primitive::Type type = instruction->GetType(); HInstruction::InstructionKind kind = instruction->GetInstrKind(); DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong); @@ -2302,21 +2302,20 @@ void InstructionCodeGeneratorARM64::VisitArm64DataProcWithShifterOp( if (kind != HInstruction::kNeg) { left = InputRegisterAt(instruction, 0); } - // If this `HArm64DataProcWithShifterOp` was created by merging a type conversion as the + // If this `HDataProcWithShifterOp` was created by merging a type conversion as the // shifter operand operation, the IR generating `right_reg` (input to the type // conversion) can have a different type from the current instruction's type, // so we manually indicate the type. Register right_reg = RegisterFrom(instruction->GetLocations()->InAt(1), type); - int64_t shift_amount = instruction->GetShiftAmount() & - (type == Primitive::kPrimInt ? kMaxIntShiftDistance : kMaxLongShiftDistance); - Operand right_operand(0); - HArm64DataProcWithShifterOp::OpKind op_kind = instruction->GetOpKind(); - if (HArm64DataProcWithShifterOp::IsExtensionOp(op_kind)) { + HDataProcWithShifterOp::OpKind op_kind = instruction->GetOpKind(); + if (HDataProcWithShifterOp::IsExtensionOp(op_kind)) { right_operand = Operand(right_reg, helpers::ExtendFromOpKind(op_kind)); } else { - right_operand = Operand(right_reg, helpers::ShiftFromOpKind(op_kind), shift_amount); + right_operand = Operand(right_reg, + helpers::ShiftFromOpKind(op_kind), + instruction->GetShiftAmount()); } // Logical binary operations do not support extension operations in the diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc index 5c4ca5bc17..6bfbe4a9c9 100644 --- a/compiler/optimizing/code_generator_arm_vixl.cc +++ b/compiler/optimizing/code_generator_arm_vixl.cc @@ -1216,6 +1216,17 @@ inline vixl32::Condition ARMFPCondition(IfCondition cond, bool gt_bias) { } } +inline ShiftType ShiftFromOpKind(HDataProcWithShifterOp::OpKind op_kind) { + switch (op_kind) { + case HDataProcWithShifterOp::kASR: return ShiftType::ASR; + case HDataProcWithShifterOp::kLSL: return ShiftType::LSL; + case HDataProcWithShifterOp::kLSR: return ShiftType::LSR; + default: + LOG(FATAL) << "Unexpected op kind " << op_kind; + UNREACHABLE(); + } +} + void CodeGeneratorARMVIXL::DumpCoreRegister(std::ostream& stream, int reg) const { stream << vixl32::Register(reg); } @@ -1260,6 +1271,185 @@ size_t CodeGeneratorARMVIXL::RestoreFloatingPointRegister(size_t stack_index ATT return 0; } +static void GenerateDataProcInstruction(HInstruction::InstructionKind kind, + vixl32::Register out, + vixl32::Register first, + const Operand& second, + CodeGeneratorARMVIXL* codegen) { + if (second.IsImmediate() && second.GetImmediate() == 0) { + const Operand in = kind == HInstruction::kAnd + ? Operand(0) + : Operand(first); + + __ Mov(out, in); + } else { + switch (kind) { + case HInstruction::kAdd: + __ Add(out, first, second); + break; + case HInstruction::kAnd: + __ And(out, first, second); + break; + case HInstruction::kOr: + __ Orr(out, first, second); + break; + case HInstruction::kSub: + __ Sub(out, first, second); + break; + case HInstruction::kXor: + __ Eor(out, first, second); + break; + default: + LOG(FATAL) << "Unexpected instruction kind: " << kind; + UNREACHABLE(); + } + } +} + +static void GenerateDataProc(HInstruction::InstructionKind kind, + const Location& out, + const Location& first, + const Operand& second_lo, + const Operand& second_hi, + CodeGeneratorARMVIXL* codegen) { + const vixl32::Register first_hi = HighRegisterFrom(first); + const vixl32::Register first_lo = LowRegisterFrom(first); + const vixl32::Register out_hi = HighRegisterFrom(out); + const vixl32::Register out_lo = LowRegisterFrom(out); + + if (kind == HInstruction::kAdd) { + __ Adds(out_lo, first_lo, second_lo); + __ Adc(out_hi, first_hi, second_hi); + } else if (kind == HInstruction::kSub) { + __ Subs(out_lo, first_lo, second_lo); + __ Sbc(out_hi, first_hi, second_hi); + } else { + GenerateDataProcInstruction(kind, out_lo, first_lo, second_lo, codegen); + GenerateDataProcInstruction(kind, out_hi, first_hi, second_hi, codegen); + } +} + +static Operand GetShifterOperand(vixl32::Register rm, ShiftType shift, uint32_t shift_imm) { + return shift_imm == 0 ? Operand(rm) : Operand(rm, shift, shift_imm); +} + +static void GenerateLongDataProc(HDataProcWithShifterOp* instruction, + CodeGeneratorARMVIXL* codegen) { + DCHECK_EQ(instruction->GetType(), Primitive::kPrimLong); + DCHECK(HDataProcWithShifterOp::IsShiftOp(instruction->GetOpKind())); + + const LocationSummary* const locations = instruction->GetLocations(); + const uint32_t shift_value = instruction->GetShiftAmount(); + const HInstruction::InstructionKind kind = instruction->GetInstrKind(); + const Location first = locations->InAt(0); + const Location second = locations->InAt(1); + const Location out = locations->Out(); + const vixl32::Register first_hi = HighRegisterFrom(first); + const vixl32::Register first_lo = LowRegisterFrom(first); + const vixl32::Register out_hi = HighRegisterFrom(out); + const vixl32::Register out_lo = LowRegisterFrom(out); + const vixl32::Register second_hi = HighRegisterFrom(second); + const vixl32::Register second_lo = LowRegisterFrom(second); + const ShiftType shift = ShiftFromOpKind(instruction->GetOpKind()); + + if (shift_value >= 32) { + if (shift == ShiftType::LSL) { + GenerateDataProcInstruction(kind, + out_hi, + first_hi, + Operand(second_lo, ShiftType::LSL, shift_value - 32), + codegen); + GenerateDataProcInstruction(kind, out_lo, first_lo, 0, codegen); + } else if (shift == ShiftType::ASR) { + GenerateDataProc(kind, + out, + first, + GetShifterOperand(second_hi, ShiftType::ASR, shift_value - 32), + Operand(second_hi, ShiftType::ASR, 31), + codegen); + } else { + DCHECK_EQ(shift, ShiftType::LSR); + GenerateDataProc(kind, + out, + first, + GetShifterOperand(second_hi, ShiftType::LSR, shift_value - 32), + 0, + codegen); + } + } else { + DCHECK_GT(shift_value, 1U); + DCHECK_LT(shift_value, 32U); + + UseScratchRegisterScope temps(codegen->GetVIXLAssembler()); + + if (shift == ShiftType::LSL) { + // We are not doing this for HInstruction::kAdd because the output will require + // Location::kOutputOverlap; not applicable to other cases. + if (kind == HInstruction::kOr || kind == HInstruction::kXor) { + GenerateDataProcInstruction(kind, + out_hi, + first_hi, + Operand(second_hi, ShiftType::LSL, shift_value), + codegen); + GenerateDataProcInstruction(kind, + out_hi, + out_hi, + Operand(second_lo, ShiftType::LSR, 32 - shift_value), + codegen); + GenerateDataProcInstruction(kind, + out_lo, + first_lo, + Operand(second_lo, ShiftType::LSL, shift_value), + codegen); + } else { + const vixl32::Register temp = temps.Acquire(); + + __ Lsl(temp, second_hi, shift_value); + __ Orr(temp, temp, Operand(second_lo, ShiftType::LSR, 32 - shift_value)); + GenerateDataProc(kind, + out, + first, + Operand(second_lo, ShiftType::LSL, shift_value), + temp, + codegen); + } + } else { + DCHECK(shift == ShiftType::ASR || shift == ShiftType::LSR); + + // We are not doing this for HInstruction::kAdd because the output will require + // Location::kOutputOverlap; not applicable to other cases. + if (kind == HInstruction::kOr || kind == HInstruction::kXor) { + GenerateDataProcInstruction(kind, + out_lo, + first_lo, + Operand(second_lo, ShiftType::LSR, shift_value), + codegen); + GenerateDataProcInstruction(kind, + out_lo, + out_lo, + Operand(second_hi, ShiftType::LSL, 32 - shift_value), + codegen); + GenerateDataProcInstruction(kind, + out_hi, + first_hi, + Operand(second_hi, shift, shift_value), + codegen); + } else { + const vixl32::Register temp = temps.Acquire(); + + __ Lsr(temp, second_lo, shift_value); + __ Orr(temp, temp, Operand(second_hi, ShiftType::LSL, 32 - shift_value)); + GenerateDataProc(kind, + out, + first, + temp, + Operand(second_hi, shift, shift_value), + codegen); + } + } + } +} + #undef __ CodeGeneratorARMVIXL::CodeGeneratorARMVIXL(HGraph* graph, @@ -6781,6 +6971,60 @@ void InstructionCodeGeneratorARMVIXL::VisitBitwiseNegatedRight(HBitwiseNegatedRi } } +void LocationsBuilderARMVIXL::VisitDataProcWithShifterOp( + HDataProcWithShifterOp* instruction) { + DCHECK(instruction->GetType() == Primitive::kPrimInt || + instruction->GetType() == Primitive::kPrimLong); + LocationSummary* locations = + new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + const bool overlap = instruction->GetType() == Primitive::kPrimLong && + HDataProcWithShifterOp::IsExtensionOp(instruction->GetOpKind()); + + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), + overlap ? Location::kOutputOverlap : Location::kNoOutputOverlap); +} + +void InstructionCodeGeneratorARMVIXL::VisitDataProcWithShifterOp( + HDataProcWithShifterOp* instruction) { + const LocationSummary* const locations = instruction->GetLocations(); + const HInstruction::InstructionKind kind = instruction->GetInstrKind(); + const HDataProcWithShifterOp::OpKind op_kind = instruction->GetOpKind(); + + if (instruction->GetType() == Primitive::kPrimInt) { + DCHECK(!HDataProcWithShifterOp::IsExtensionOp(op_kind)); + + const vixl32::Register second = instruction->InputAt(1)->GetType() == Primitive::kPrimLong + ? LowRegisterFrom(locations->InAt(1)) + : InputRegisterAt(instruction, 1); + + GenerateDataProcInstruction(kind, + OutputRegister(instruction), + InputRegisterAt(instruction, 0), + Operand(second, + ShiftFromOpKind(op_kind), + instruction->GetShiftAmount()), + codegen_); + } else { + DCHECK_EQ(instruction->GetType(), Primitive::kPrimLong); + + if (HDataProcWithShifterOp::IsExtensionOp(op_kind)) { + const vixl32::Register second = InputRegisterAt(instruction, 1); + + DCHECK(!LowRegisterFrom(locations->Out()).Is(second)); + GenerateDataProc(kind, + locations->Out(), + locations->InAt(0), + second, + Operand(second, ShiftType::ASR, 31), + codegen_); + } else { + GenerateLongDataProc(instruction, codegen_); + } + } +} + // TODO(VIXL): Remove optimizations in the helper when they are implemented in vixl. void InstructionCodeGeneratorARMVIXL::GenerateAndConst(vixl32::Register out, vixl32::Register first, diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 09612c8dbf..b779aed763 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -5262,7 +5262,7 @@ void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) { // Branch cases into compressed and uncompressed for each index's type. uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); NearLabel done, not_compressed; - __ testl(Address(obj, count_offset), Immediate(1)); + __ testb(Address(obj, count_offset), Immediate(1)); codegen_->MaybeRecordImplicitNullCheck(instruction); static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, "Expecting 0=compressed, 1=uncompressed"); diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 0879992e32..179bf6d3d1 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -4720,7 +4720,7 @@ void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) { // Branch cases into compressed and uncompressed for each index's type. uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); NearLabel done, not_compressed; - __ testl(Address(obj, count_offset), Immediate(1)); + __ testb(Address(obj, count_offset), Immediate(1)); codegen_->MaybeRecordImplicitNullCheck(instruction); static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, "Expecting 0=compressed, 1=uncompressed"); diff --git a/compiler/optimizing/common_arm.h b/compiler/optimizing/common_arm.h index ecb86875d6..e184745520 100644 --- a/compiler/optimizing/common_arm.h +++ b/compiler/optimizing/common_arm.h @@ -17,6 +17,7 @@ #ifndef ART_COMPILER_OPTIMIZING_COMMON_ARM_H_ #define ART_COMPILER_OPTIMIZING_COMMON_ARM_H_ +#include "instruction_simplifier_shared.h" #include "debug/dwarf/register.h" #include "locations.h" #include "nodes.h" @@ -29,6 +30,9 @@ #pragma GCC diagnostic pop namespace art { + +using helpers::HasShifterOperand; + namespace arm { namespace helpers { @@ -218,6 +222,14 @@ inline Location LocationFrom(const vixl::aarch32::SRegister& low, return Location::FpuRegisterPairLocation(low.GetCode(), high.GetCode()); } +inline bool ShifterOperandSupportsExtension(HInstruction* instruction) { + DCHECK(HasShifterOperand(instruction, kArm)); + // TODO: HAdd applied to the other integral types could make use of + // the SXTAB, SXTAH, UXTAB and UXTAH instructions. + return instruction->GetType() == Primitive::kPrimLong && + (instruction->IsAdd() || instruction->IsSub()); +} + } // namespace helpers } // namespace arm } // namespace art diff --git a/compiler/optimizing/common_arm64.h b/compiler/optimizing/common_arm64.h index 93ea090583..d3f431e327 100644 --- a/compiler/optimizing/common_arm64.h +++ b/compiler/optimizing/common_arm64.h @@ -18,6 +18,7 @@ #define ART_COMPILER_OPTIMIZING_COMMON_ARM64_H_ #include "code_generator.h" +#include "instruction_simplifier_shared.h" #include "locations.h" #include "nodes.h" #include "utils/arm64/assembler_arm64.h" @@ -31,6 +32,10 @@ #pragma GCC diagnostic pop namespace art { + +using helpers::CanFitInShifterOperand; +using helpers::HasShifterOperand; + namespace arm64 { namespace helpers { @@ -290,11 +295,11 @@ inline bool ArtVixlRegCodeCoherentForRegSet(uint32_t art_core_registers, return true; } -inline vixl::aarch64::Shift ShiftFromOpKind(HArm64DataProcWithShifterOp::OpKind op_kind) { +inline vixl::aarch64::Shift ShiftFromOpKind(HDataProcWithShifterOp::OpKind op_kind) { switch (op_kind) { - case HArm64DataProcWithShifterOp::kASR: return vixl::aarch64::ASR; - case HArm64DataProcWithShifterOp::kLSL: return vixl::aarch64::LSL; - case HArm64DataProcWithShifterOp::kLSR: return vixl::aarch64::LSR; + case HDataProcWithShifterOp::kASR: return vixl::aarch64::ASR; + case HDataProcWithShifterOp::kLSL: return vixl::aarch64::LSL; + case HDataProcWithShifterOp::kLSR: return vixl::aarch64::LSR; default: LOG(FATAL) << "Unexpected op kind " << op_kind; UNREACHABLE(); @@ -302,14 +307,14 @@ inline vixl::aarch64::Shift ShiftFromOpKind(HArm64DataProcWithShifterOp::OpKind } } -inline vixl::aarch64::Extend ExtendFromOpKind(HArm64DataProcWithShifterOp::OpKind op_kind) { +inline vixl::aarch64::Extend ExtendFromOpKind(HDataProcWithShifterOp::OpKind op_kind) { switch (op_kind) { - case HArm64DataProcWithShifterOp::kUXTB: return vixl::aarch64::UXTB; - case HArm64DataProcWithShifterOp::kUXTH: return vixl::aarch64::UXTH; - case HArm64DataProcWithShifterOp::kUXTW: return vixl::aarch64::UXTW; - case HArm64DataProcWithShifterOp::kSXTB: return vixl::aarch64::SXTB; - case HArm64DataProcWithShifterOp::kSXTH: return vixl::aarch64::SXTH; - case HArm64DataProcWithShifterOp::kSXTW: return vixl::aarch64::SXTW; + case HDataProcWithShifterOp::kUXTB: return vixl::aarch64::UXTB; + case HDataProcWithShifterOp::kUXTH: return vixl::aarch64::UXTH; + case HDataProcWithShifterOp::kUXTW: return vixl::aarch64::UXTW; + case HDataProcWithShifterOp::kSXTB: return vixl::aarch64::SXTB; + case HDataProcWithShifterOp::kSXTH: return vixl::aarch64::SXTH; + case HDataProcWithShifterOp::kSXTW: return vixl::aarch64::SXTW; default: LOG(FATAL) << "Unexpected op kind " << op_kind; UNREACHABLE(); @@ -317,31 +322,8 @@ inline vixl::aarch64::Extend ExtendFromOpKind(HArm64DataProcWithShifterOp::OpKin } } -inline bool CanFitInShifterOperand(HInstruction* instruction) { - if (instruction->IsTypeConversion()) { - HTypeConversion* conversion = instruction->AsTypeConversion(); - Primitive::Type result_type = conversion->GetResultType(); - Primitive::Type input_type = conversion->GetInputType(); - // We don't expect to see the same type as input and result. - return Primitive::IsIntegralType(result_type) && Primitive::IsIntegralType(input_type) && - (result_type != input_type); - } else { - return (instruction->IsShl() && instruction->AsShl()->InputAt(1)->IsIntConstant()) || - (instruction->IsShr() && instruction->AsShr()->InputAt(1)->IsIntConstant()) || - (instruction->IsUShr() && instruction->AsUShr()->InputAt(1)->IsIntConstant()); - } -} - -inline bool HasShifterOperand(HInstruction* instr) { - // `neg` instructions are an alias of `sub` using the zero register as the - // first register input. - bool res = instr->IsAdd() || instr->IsAnd() || instr->IsNeg() || - instr->IsOr() || instr->IsSub() || instr->IsXor(); - return res; -} - inline bool ShifterOperandSupportsExtension(HInstruction* instruction) { - DCHECK(HasShifterOperand(instruction)); + DCHECK(HasShifterOperand(instruction, kArm64)); // Although the `neg` instruction is an alias of the `sub` instruction, `HNeg` // does *not* support extension. This is because the `extended register` form // of the `sub` instruction interprets the left register with code 31 as the diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc index f6fba883bd..2bf5c53e17 100644 --- a/compiler/optimizing/graph_visualizer.cc +++ b/compiler/optimizing/graph_visualizer.cc @@ -511,12 +511,10 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { void VisitBitwiseNegatedRight(HBitwiseNegatedRight* instruction) OVERRIDE { StartAttributeStream("kind") << instruction->GetOpKind(); } -#endif -#ifdef ART_ENABLE_CODEGEN_arm64 - void VisitArm64DataProcWithShifterOp(HArm64DataProcWithShifterOp* instruction) OVERRIDE { + void VisitDataProcWithShifterOp(HDataProcWithShifterOp* instruction) OVERRIDE { StartAttributeStream("kind") << instruction->GetInstrKind() << "+" << instruction->GetOpKind(); - if (HArm64DataProcWithShifterOp::IsShiftOp(instruction->GetOpKind())) { + if (HDataProcWithShifterOp::IsShiftOp(instruction->GetOpKind())) { StartAttributeStream("shift") << instruction->GetShiftAmount(); } } diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc index e012a4287f..8c73f1d036 100644 --- a/compiler/optimizing/inliner.cc +++ b/compiler/optimizing/inliner.cc @@ -192,9 +192,9 @@ static uint32_t FindMethodIndexIn(ArtMethod* method, } static dex::TypeIndex FindClassIndexIn(mirror::Class* cls, - const DexFile& dex_file, - Handle<mirror::DexCache> dex_cache) + const DexCompilationUnit& compilation_unit) REQUIRES_SHARED(Locks::mutator_lock_) { + const DexFile& dex_file = *compilation_unit.GetDexFile(); dex::TypeIndex index; if (cls->GetDexCache() == nullptr) { DCHECK(cls->IsArrayClass()) << cls->PrettyClass(); @@ -203,22 +203,19 @@ static dex::TypeIndex FindClassIndexIn(mirror::Class* cls, DCHECK(cls->IsProxyClass()) << cls->PrettyClass(); // TODO: deal with proxy classes. } else if (IsSameDexFile(cls->GetDexFile(), dex_file)) { - DCHECK_EQ(cls->GetDexCache(), dex_cache.Get()); + DCHECK_EQ(cls->GetDexCache(), compilation_unit.GetDexCache().Get()); index = cls->GetDexTypeIndex(); - // Update the dex cache to ensure the class is in. The generated code will - // consider it is. We make it safe by updating the dex cache, as other - // dex files might also load the class, and there is no guarantee the dex - // cache of the dex file of the class will be updated. - if (dex_cache->GetResolvedType(index) == nullptr) { - dex_cache->SetResolvedType(index, cls); - } } else { index = cls->FindTypeIndexInOtherDexFile(dex_file); - // We cannot guarantee the entry in the dex cache will resolve to the same class, + // We cannot guarantee the entry will resolve to the same class, // as there may be different class loaders. So only return the index if it's - // the right class in the dex cache already. - if (index.IsValid() && dex_cache->GetResolvedType(index) != cls) { - index = dex::TypeIndex::Invalid(); + // the right class already resolved with the class loader. + if (index.IsValid()) { + ObjPtr<mirror::Class> resolved = ClassLinker::LookupResolvedType( + index, compilation_unit.GetDexCache().Get(), compilation_unit.GetClassLoader().Get()); + if (resolved != cls) { + index = dex::TypeIndex::Invalid(); + } } } @@ -445,9 +442,8 @@ bool HInliner::TryInlineMonomorphicCall(HInvoke* invoke_instruction, DCHECK(invoke_instruction->IsInvokeVirtual() || invoke_instruction->IsInvokeInterface()) << invoke_instruction->DebugName(); - const DexFile& caller_dex_file = *caller_compilation_unit_.GetDexFile(); dex::TypeIndex class_index = FindClassIndexIn( - GetMonomorphicType(classes), caller_dex_file, caller_compilation_unit_.GetDexCache()); + GetMonomorphicType(classes), caller_compilation_unit_); if (!class_index.IsValid()) { VLOG(compiler) << "Call to " << ArtMethod::PrettyMethod(resolved_method) << " from inline cache is not inlined because its class is not" @@ -490,6 +486,7 @@ bool HInliner::TryInlineMonomorphicCall(HInvoke* invoke_instruction, // Run type propagation to get the guard typed, and eventually propagate the // type of the receiver. ReferenceTypePropagation rtp_fixup(graph_, + outer_compilation_unit_.GetClassLoader(), outer_compilation_unit_.GetDexCache(), handles_, /* is_first_run */ false); @@ -583,7 +580,6 @@ bool HInliner::TryInlinePolymorphicCall(HInvoke* invoke_instruction, ClassLinker* class_linker = caller_compilation_unit_.GetClassLinker(); PointerSize pointer_size = class_linker->GetImagePointerSize(); - const DexFile& caller_dex_file = *caller_compilation_unit_.GetDexFile(); bool all_targets_inlined = true; bool one_target_inlined = false; @@ -605,8 +601,7 @@ bool HInliner::TryInlinePolymorphicCall(HInvoke* invoke_instruction, HInstruction* cursor = invoke_instruction->GetPrevious(); HBasicBlock* bb_cursor = invoke_instruction->GetBlock(); - dex::TypeIndex class_index = FindClassIndexIn( - handle.Get(), caller_dex_file, caller_compilation_unit_.GetDexCache()); + dex::TypeIndex class_index = FindClassIndexIn(handle.Get(), caller_compilation_unit_); HInstruction* return_replacement = nullptr; if (!class_index.IsValid() || !TryBuildAndInline(invoke_instruction, @@ -662,6 +657,7 @@ bool HInliner::TryInlinePolymorphicCall(HInvoke* invoke_instruction, // Run type propagation to get the guards typed. ReferenceTypePropagation rtp_fixup(graph_, + outer_compilation_unit_.GetClassLoader(), outer_compilation_unit_.GetDexCache(), handles_, /* is_first_run */ false); @@ -855,6 +851,7 @@ bool HInliner::TryInlinePolymorphicCallToSameTarget( // Run type propagation to get the guard typed. ReferenceTypePropagation rtp_fixup(graph_, + outer_compilation_unit_.GetClassLoader(), outer_compilation_unit_.GetDexCache(), handles_, /* is_first_run */ false); @@ -923,6 +920,7 @@ bool HInliner::TryInlineAndReplace(HInvoke* invoke_instruction, // Actual return value has a more specific type than the method's declared // return type. Run RTP again on the outer graph to propagate it. ReferenceTypePropagation(graph_, + outer_compilation_unit_.GetClassLoader(), outer_compilation_unit_.GetDexCache(), handles_, /* is_first_run */ false).Run(); @@ -1175,7 +1173,11 @@ HInstanceFieldGet* HInliner::CreateInstanceFieldGet(Handle<mirror::DexCache> dex /* dex_pc */ 0); if (iget->GetType() == Primitive::kPrimNot) { // Use the same dex_cache that we used for field lookup as the hint_dex_cache. - ReferenceTypePropagation rtp(graph_, dex_cache, handles_, /* is_first_run */ false); + ReferenceTypePropagation rtp(graph_, + outer_compilation_unit_.GetClassLoader(), + dex_cache, + handles_, + /* is_first_run */ false); rtp.Visit(iget); } return iget; @@ -1221,7 +1223,7 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction, resolved_method->GetDeclaringClass()->GetClassLoader())); DexCompilationUnit dex_compilation_unit( - class_loader.ToJObject(), + class_loader, class_linker, callee_dex_file, code_item, @@ -1338,6 +1340,7 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction, // are more specific than the declared ones, run RTP again on the inner graph. if (run_rtp || ArgumentTypesMoreSpecific(invoke_instruction, resolved_method)) { ReferenceTypePropagation(callee_graph, + outer_compilation_unit_.GetClassLoader(), dex_compilation_unit.GetDexCache(), handles_, /* is_first_run */ false).Run(); diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc index 3374e42955..c60f6e5393 100644 --- a/compiler/optimizing/instruction_builder.cc +++ b/compiler/optimizing/instruction_builder.cc @@ -669,11 +669,10 @@ static InvokeType GetInvokeTypeFromOpCode(Instruction::Code opcode) { ArtMethod* HInstructionBuilder::ResolveMethod(uint16_t method_idx, InvokeType invoke_type) { ScopedObjectAccess soa(Thread::Current()); - StackHandleScope<3> hs(soa.Self()); + StackHandleScope<2> hs(soa.Self()); ClassLinker* class_linker = dex_compilation_unit_->GetClassLinker(); - Handle<mirror::ClassLoader> class_loader(hs.NewHandle( - soa.Decode<mirror::ClassLoader>(dex_compilation_unit_->GetClassLoader()))); + Handle<mirror::ClassLoader> class_loader = dex_compilation_unit_->GetClassLoader(); Handle<mirror::Class> compiling_class(hs.NewHandle(GetCompilingClass())); // We fetch the referenced class eagerly (that is, the class pointed by in the MethodId // at method_idx), as `CanAccessResolvedMethod` expects it be be in the dex cache. @@ -1260,9 +1259,7 @@ bool HInstructionBuilder::BuildInstanceFieldAccess(const Instruction& instructio static mirror::Class* GetClassFrom(CompilerDriver* driver, const DexCompilationUnit& compilation_unit) { ScopedObjectAccess soa(Thread::Current()); - StackHandleScope<1> hs(soa.Self()); - Handle<mirror::ClassLoader> class_loader(hs.NewHandle( - soa.Decode<mirror::ClassLoader>(compilation_unit.GetClassLoader()))); + Handle<mirror::ClassLoader> class_loader = compilation_unit.GetClassLoader(); Handle<mirror::DexCache> dex_cache = compilation_unit.GetDexCache(); return driver->ResolveCompilingMethodsClass(soa, dex_cache, class_loader, &compilation_unit); @@ -1278,10 +1275,9 @@ mirror::Class* HInstructionBuilder::GetCompilingClass() const { bool HInstructionBuilder::IsOutermostCompilingClass(dex::TypeIndex type_index) const { ScopedObjectAccess soa(Thread::Current()); - StackHandleScope<3> hs(soa.Self()); + StackHandleScope<2> hs(soa.Self()); Handle<mirror::DexCache> dex_cache = dex_compilation_unit_->GetDexCache(); - Handle<mirror::ClassLoader> class_loader(hs.NewHandle( - soa.Decode<mirror::ClassLoader>(dex_compilation_unit_->GetClassLoader()))); + Handle<mirror::ClassLoader> class_loader = dex_compilation_unit_->GetClassLoader(); Handle<mirror::Class> cls(hs.NewHandle(compiler_driver_->ResolveClass( soa, dex_cache, class_loader, type_index, dex_compilation_unit_))); Handle<mirror::Class> outer_class(hs.NewHandle(GetOutermostCompilingClass())); @@ -1317,8 +1313,7 @@ ArtField* HInstructionBuilder::ResolveField(uint16_t field_idx, bool is_static, StackHandleScope<2> hs(soa.Self()); ClassLinker* class_linker = dex_compilation_unit_->GetClassLinker(); - Handle<mirror::ClassLoader> class_loader(hs.NewHandle( - soa.Decode<mirror::ClassLoader>(dex_compilation_unit_->GetClassLoader()))); + Handle<mirror::ClassLoader> class_loader = dex_compilation_unit_->GetClassLoader(); Handle<mirror::Class> compiling_class(hs.NewHandle(GetCompilingClass())); ArtField* resolved_field = class_linker->ResolveField(*dex_compilation_unit_->GetDexFile(), @@ -1635,10 +1630,8 @@ static TypeCheckKind ComputeTypeCheckKind(Handle<mirror::Class> cls) HLoadClass* HInstructionBuilder::BuildLoadClass(dex::TypeIndex type_index, uint32_t dex_pc) { ScopedObjectAccess soa(Thread::Current()); - StackHandleScope<2> hs(soa.Self()); const DexFile& dex_file = *dex_compilation_unit_->GetDexFile(); - Handle<mirror::ClassLoader> class_loader(hs.NewHandle( - soa.Decode<mirror::ClassLoader>(dex_compilation_unit_->GetClassLoader()))); + Handle<mirror::ClassLoader> class_loader = dex_compilation_unit_->GetClassLoader(); Handle<mirror::Class> klass = handles_->NewHandle(compiler_driver_->ResolveClass( soa, dex_compilation_unit_->GetDexCache(), class_loader, type_index, dex_compilation_unit_)); @@ -1722,17 +1715,9 @@ void HInstructionBuilder::BuildTypeCheck(const Instruction& instruction, } } -bool HInstructionBuilder::NeedsAccessCheck(dex::TypeIndex type_index, - Handle<mirror::DexCache> dex_cache, - bool* finalizable) const { - return !compiler_driver_->CanAccessInstantiableTypeWithoutChecks( - dex_compilation_unit_->GetDexMethodIndex(), dex_cache, type_index, finalizable); -} - bool HInstructionBuilder::NeedsAccessCheck(dex::TypeIndex type_index, bool* finalizable) const { - ScopedObjectAccess soa(Thread::Current()); - Handle<mirror::DexCache> dex_cache = dex_compilation_unit_->GetDexCache(); - return NeedsAccessCheck(type_index, dex_cache, finalizable); + return !compiler_driver_->CanAccessInstantiableTypeWithoutChecks( + LookupReferrerClass(), LookupResolvedType(type_index, *dex_compilation_unit_), finalizable); } bool HInstructionBuilder::CanDecodeQuickenedInfo() const { @@ -2772,4 +2757,18 @@ bool HInstructionBuilder::ProcessDexInstruction(const Instruction& instruction, return true; } // NOLINT(readability/fn_size) +ObjPtr<mirror::Class> HInstructionBuilder::LookupResolvedType( + dex::TypeIndex type_index, + const DexCompilationUnit& compilation_unit) const { + return ClassLinker::LookupResolvedType( + type_index, compilation_unit.GetDexCache().Get(), compilation_unit.GetClassLoader().Get()); +} + +ObjPtr<mirror::Class> HInstructionBuilder::LookupReferrerClass() const { + // TODO: Cache the result in a Handle<mirror::Class>. + const DexFile::MethodId& method_id = + dex_compilation_unit_->GetDexFile()->GetMethodId(dex_compilation_unit_->GetDexMethodIndex()); + return LookupResolvedType(method_id.class_idx_, *dex_compilation_unit_); +} + } // namespace art diff --git a/compiler/optimizing/instruction_builder.h b/compiler/optimizing/instruction_builder.h index 3bb680ce44..e735a0c46d 100644 --- a/compiler/optimizing/instruction_builder.h +++ b/compiler/optimizing/instruction_builder.h @@ -106,11 +106,8 @@ class HInstructionBuilder : public ValueObject { // Returns whether the current method needs access check for the type. // Output parameter finalizable is set to whether the type is finalizable. - bool NeedsAccessCheck(dex::TypeIndex type_index, - Handle<mirror::DexCache> dex_cache, - /*out*/bool* finalizable) const + bool NeedsAccessCheck(dex::TypeIndex type_index, /*out*/bool* finalizable) const REQUIRES_SHARED(Locks::mutator_lock_); - bool NeedsAccessCheck(dex::TypeIndex type_index, /*out*/bool* finalizable) const; template<typename T> void Unop_12x(const Instruction& instruction, Primitive::Type type, uint32_t dex_pc); @@ -300,6 +297,12 @@ class HInstructionBuilder : public ValueObject { // be found. ArtField* ResolveField(uint16_t field_idx, bool is_static, bool is_put); + ObjPtr<mirror::Class> LookupResolvedType(dex::TypeIndex type_index, + const DexCompilationUnit& compilation_unit) const + REQUIRES_SHARED(Locks::mutator_lock_); + + ObjPtr<mirror::Class> LookupReferrerClass() const REQUIRES_SHARED(Locks::mutator_lock_); + ArenaAllocator* const arena_; HGraph* const graph_; VariableSizedHandleScope* handles_; diff --git a/compiler/optimizing/instruction_simplifier_arm.cc b/compiler/optimizing/instruction_simplifier_arm.cc index 56e4c7a9c2..5f5e29b024 100644 --- a/compiler/optimizing/instruction_simplifier_arm.cc +++ b/compiler/optimizing/instruction_simplifier_arm.cc @@ -15,23 +15,124 @@ */ #include "code_generator.h" +#include "common_arm.h" #include "instruction_simplifier_arm.h" #include "instruction_simplifier_shared.h" #include "mirror/array-inl.h" +#include "nodes.h" namespace art { + +using helpers::CanFitInShifterOperand; +using helpers::HasShifterOperand; + namespace arm { -void InstructionSimplifierArmVisitor::VisitMul(HMul* instruction) { - if (TryCombineMultiplyAccumulate(instruction, kArm)) { +using helpers::ShifterOperandSupportsExtension; + +bool InstructionSimplifierArmVisitor::TryMergeIntoShifterOperand(HInstruction* use, + HInstruction* bitfield_op, + bool do_merge) { + DCHECK(HasShifterOperand(use, kArm)); + DCHECK(use->IsBinaryOperation()); + DCHECK(CanFitInShifterOperand(bitfield_op)); + DCHECK(!bitfield_op->HasEnvironmentUses()); + + Primitive::Type type = use->GetType(); + if (type != Primitive::kPrimInt && type != Primitive::kPrimLong) { + return false; + } + + HInstruction* left = use->InputAt(0); + HInstruction* right = use->InputAt(1); + DCHECK(left == bitfield_op || right == bitfield_op); + + if (left == right) { + // TODO: Handle special transformations in this situation? + // For example should we transform `(x << 1) + (x << 1)` into `(x << 2)`? + // Or should this be part of a separate transformation logic? + return false; + } + + bool is_commutative = use->AsBinaryOperation()->IsCommutative(); + HInstruction* other_input; + if (bitfield_op == right) { + other_input = left; + } else { + if (is_commutative) { + other_input = right; + } else { + return false; + } + } + + HDataProcWithShifterOp::OpKind op_kind; + int shift_amount = 0; + + HDataProcWithShifterOp::GetOpInfoFromInstruction(bitfield_op, &op_kind, &shift_amount); + shift_amount &= use->GetType() == Primitive::kPrimInt + ? kMaxIntShiftDistance + : kMaxLongShiftDistance; + + if (HDataProcWithShifterOp::IsExtensionOp(op_kind)) { + if (!ShifterOperandSupportsExtension(use)) { + return false; + } + // Shift by 1 is a special case that results in the same number and type of instructions + // as this simplification, but potentially shorter code. + } else if (type == Primitive::kPrimLong && shift_amount == 1) { + return false; + } + + if (do_merge) { + HDataProcWithShifterOp* alu_with_op = + new (GetGraph()->GetArena()) HDataProcWithShifterOp(use, + other_input, + bitfield_op->InputAt(0), + op_kind, + shift_amount, + use->GetDexPc()); + use->GetBlock()->ReplaceAndRemoveInstructionWith(use, alu_with_op); + if (bitfield_op->GetUses().empty()) { + bitfield_op->GetBlock()->RemoveInstruction(bitfield_op); + } RecordSimplification(); } + + return true; } -void InstructionSimplifierArmVisitor::VisitOr(HOr* instruction) { - if (TryMergeNegatedInput(instruction)) { - RecordSimplification(); +// Merge a bitfield move instruction into its uses if it can be merged in all of them. +bool InstructionSimplifierArmVisitor::TryMergeIntoUsersShifterOperand(HInstruction* bitfield_op) { + DCHECK(CanFitInShifterOperand(bitfield_op)); + + if (bitfield_op->HasEnvironmentUses()) { + return false; + } + + const HUseList<HInstruction*>& uses = bitfield_op->GetUses(); + + // Check whether we can merge the instruction in all its users' shifter operand. + for (const HUseListNode<HInstruction*>& use : uses) { + HInstruction* user = use.GetUser(); + if (!HasShifterOperand(user, kArm)) { + return false; + } + if (!CanMergeIntoShifterOperand(user, bitfield_op)) { + return false; + } } + + // Merge the instruction into its uses. + for (auto it = uses.begin(), end = uses.end(); it != end; /* ++it below */) { + HInstruction* user = it->GetUser(); + // Increment `it` now because `*it` will disappear thanks to MergeIntoShifterOperand(). + ++it; + bool merged = MergeIntoShifterOperand(user, bitfield_op); + DCHECK(merged); + } + + return true; } void InstructionSimplifierArmVisitor::VisitAnd(HAnd* instruction) { @@ -89,5 +190,49 @@ void InstructionSimplifierArmVisitor::VisitArraySet(HArraySet* instruction) { } } +void InstructionSimplifierArmVisitor::VisitMul(HMul* instruction) { + if (TryCombineMultiplyAccumulate(instruction, kArm)) { + RecordSimplification(); + } +} + +void InstructionSimplifierArmVisitor::VisitOr(HOr* instruction) { + if (TryMergeNegatedInput(instruction)) { + RecordSimplification(); + } +} + +void InstructionSimplifierArmVisitor::VisitShl(HShl* instruction) { + if (instruction->InputAt(1)->IsConstant()) { + TryMergeIntoUsersShifterOperand(instruction); + } +} + +void InstructionSimplifierArmVisitor::VisitShr(HShr* instruction) { + if (instruction->InputAt(1)->IsConstant()) { + TryMergeIntoUsersShifterOperand(instruction); + } +} + +void InstructionSimplifierArmVisitor::VisitTypeConversion(HTypeConversion* instruction) { + Primitive::Type result_type = instruction->GetResultType(); + Primitive::Type input_type = instruction->GetInputType(); + + if (input_type == result_type) { + // We let the arch-independent code handle this. + return; + } + + if (Primitive::IsIntegralType(result_type) && Primitive::IsIntegralType(input_type)) { + TryMergeIntoUsersShifterOperand(instruction); + } +} + +void InstructionSimplifierArmVisitor::VisitUShr(HUShr* instruction) { + if (instruction->InputAt(1)->IsConstant()) { + TryMergeIntoUsersShifterOperand(instruction); + } +} + } // namespace arm } // namespace art diff --git a/compiler/optimizing/instruction_simplifier_arm.h b/compiler/optimizing/instruction_simplifier_arm.h index 9b54511340..e2ed257777 100644 --- a/compiler/optimizing/instruction_simplifier_arm.h +++ b/compiler/optimizing/instruction_simplifier_arm.h @@ -35,11 +35,41 @@ class InstructionSimplifierArmVisitor : public HGraphVisitor { } } - void VisitMul(HMul* instruction) OVERRIDE; - void VisitOr(HOr* instruction) OVERRIDE; + bool TryMergeIntoUsersShifterOperand(HInstruction* instruction); + bool TryMergeIntoShifterOperand(HInstruction* use, HInstruction* bitfield_op, bool do_merge); + bool CanMergeIntoShifterOperand(HInstruction* use, HInstruction* bitfield_op) { + return TryMergeIntoShifterOperand(use, bitfield_op, /* do_merge */ false); + } + bool MergeIntoShifterOperand(HInstruction* use, HInstruction* bitfield_op) { + DCHECK(CanMergeIntoShifterOperand(use, bitfield_op)); + return TryMergeIntoShifterOperand(use, bitfield_op, /* do_merge */ true); + } + + /** + * This simplifier uses a special-purpose BB visitor. + * (1) No need to visit Phi nodes. + * (2) Since statements can be removed in a "forward" fashion, + * the visitor should test if each statement is still there. + */ + void VisitBasicBlock(HBasicBlock* block) OVERRIDE { + // TODO: fragile iteration, provide more robust iterators? + for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { + HInstruction* instruction = it.Current(); + if (instruction->IsInBlock()) { + instruction->Accept(this); + } + } + } + void VisitAnd(HAnd* instruction) OVERRIDE; void VisitArrayGet(HArrayGet* instruction) OVERRIDE; void VisitArraySet(HArraySet* instruction) OVERRIDE; + void VisitMul(HMul* instruction) OVERRIDE; + void VisitOr(HOr* instruction) OVERRIDE; + void VisitShl(HShl* instruction) OVERRIDE; + void VisitShr(HShr* instruction) OVERRIDE; + void VisitTypeConversion(HTypeConversion* instruction) OVERRIDE; + void VisitUShr(HUShr* instruction) OVERRIDE; OptimizingCompilerStats* stats_; }; diff --git a/compiler/optimizing/instruction_simplifier_arm64.cc b/compiler/optimizing/instruction_simplifier_arm64.cc index 6d107d571f..73b7b2bd95 100644 --- a/compiler/optimizing/instruction_simplifier_arm64.cc +++ b/compiler/optimizing/instruction_simplifier_arm64.cc @@ -22,16 +22,18 @@ #include "mirror/string.h" namespace art { -namespace arm64 { using helpers::CanFitInShifterOperand; using helpers::HasShifterOperand; + +namespace arm64 { + using helpers::ShifterOperandSupportsExtension; bool InstructionSimplifierArm64Visitor::TryMergeIntoShifterOperand(HInstruction* use, HInstruction* bitfield_op, bool do_merge) { - DCHECK(HasShifterOperand(use)); + DCHECK(HasShifterOperand(use, kArm64)); DCHECK(use->IsBinaryOperation() || use->IsNeg()); DCHECK(CanFitInShifterOperand(bitfield_op)); DCHECK(!bitfield_op->HasEnvironmentUses()); @@ -72,23 +74,22 @@ bool InstructionSimplifierArm64Visitor::TryMergeIntoShifterOperand(HInstruction* } } - HArm64DataProcWithShifterOp::OpKind op_kind; + HDataProcWithShifterOp::OpKind op_kind; int shift_amount = 0; - HArm64DataProcWithShifterOp::GetOpInfoFromInstruction(bitfield_op, &op_kind, &shift_amount); + HDataProcWithShifterOp::GetOpInfoFromInstruction(bitfield_op, &op_kind, &shift_amount); - if (HArm64DataProcWithShifterOp::IsExtensionOp(op_kind) && - !ShifterOperandSupportsExtension(use)) { + if (HDataProcWithShifterOp::IsExtensionOp(op_kind) && !ShifterOperandSupportsExtension(use)) { return false; } if (do_merge) { - HArm64DataProcWithShifterOp* alu_with_op = - new (GetGraph()->GetArena()) HArm64DataProcWithShifterOp(use, - other_input, - bitfield_op->InputAt(0), - op_kind, - shift_amount, - use->GetDexPc()); + HDataProcWithShifterOp* alu_with_op = + new (GetGraph()->GetArena()) HDataProcWithShifterOp(use, + other_input, + bitfield_op->InputAt(0), + op_kind, + shift_amount, + use->GetDexPc()); use->GetBlock()->ReplaceAndRemoveInstructionWith(use, alu_with_op); if (bitfield_op->GetUses().empty()) { bitfield_op->GetBlock()->RemoveInstruction(bitfield_op); @@ -112,7 +113,7 @@ bool InstructionSimplifierArm64Visitor::TryMergeIntoUsersShifterOperand(HInstruc // Check whether we can merge the instruction in all its users' shifter operand. for (const HUseListNode<HInstruction*>& use : uses) { HInstruction* user = use.GetUser(); - if (!HasShifterOperand(user)) { + if (!HasShifterOperand(user, kArm64)) { return false; } if (!CanMergeIntoShifterOperand(user, bitfield_op)) { diff --git a/compiler/optimizing/instruction_simplifier_arm64.h b/compiler/optimizing/instruction_simplifier_arm64.h index d4cb1f14b7..65654f50f4 100644 --- a/compiler/optimizing/instruction_simplifier_arm64.h +++ b/compiler/optimizing/instruction_simplifier_arm64.h @@ -40,11 +40,11 @@ class InstructionSimplifierArm64Visitor : public HGraphVisitor { HInstruction* bitfield_op, bool do_merge); bool CanMergeIntoShifterOperand(HInstruction* use, HInstruction* bitfield_op) { - return TryMergeIntoShifterOperand(use, bitfield_op, false); + return TryMergeIntoShifterOperand(use, bitfield_op, /* do_merge */ false); } bool MergeIntoShifterOperand(HInstruction* use, HInstruction* bitfield_op) { DCHECK(CanMergeIntoShifterOperand(use, bitfield_op)); - return TryMergeIntoShifterOperand(use, bitfield_op, true); + return TryMergeIntoShifterOperand(use, bitfield_op, /* do_merge */ true); } /** diff --git a/compiler/optimizing/instruction_simplifier_shared.h b/compiler/optimizing/instruction_simplifier_shared.h index 56804f5e90..83e3ffca57 100644 --- a/compiler/optimizing/instruction_simplifier_shared.h +++ b/compiler/optimizing/instruction_simplifier_shared.h @@ -21,6 +21,33 @@ namespace art { +namespace helpers { + +inline bool CanFitInShifterOperand(HInstruction* instruction) { + if (instruction->IsTypeConversion()) { + HTypeConversion* conversion = instruction->AsTypeConversion(); + Primitive::Type result_type = conversion->GetResultType(); + Primitive::Type input_type = conversion->GetInputType(); + // We don't expect to see the same type as input and result. + return Primitive::IsIntegralType(result_type) && Primitive::IsIntegralType(input_type) && + (result_type != input_type); + } else { + return (instruction->IsShl() && instruction->AsShl()->InputAt(1)->IsIntConstant()) || + (instruction->IsShr() && instruction->AsShr()->InputAt(1)->IsIntConstant()) || + (instruction->IsUShr() && instruction->AsUShr()->InputAt(1)->IsIntConstant()); + } +} + +inline bool HasShifterOperand(HInstruction* instr, InstructionSet isa) { + // On ARM64 `neg` instructions are an alias of `sub` using the zero register + // as the first register input. + bool res = instr->IsAdd() || instr->IsAnd() || (isa == kArm64 && instr->IsNeg()) || + instr->IsOr() || instr->IsSub() || instr->IsXor(); + return res; +} + +} // namespace helpers + bool TryCombineMultiplyAccumulate(HMul* mul, InstructionSet isa); // For bitwise operations (And/Or/Xor) with a negated input, try to use // a negated bitwise instruction. diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc index 95838380cc..26c9ab83c2 100644 --- a/compiler/optimizing/loop_optimization.cc +++ b/compiler/optimizing/loop_optimization.cc @@ -71,7 +71,7 @@ HLoopOptimization::HLoopOptimization(HGraph* graph, void HLoopOptimization::Run() { // Well-behaved loops only. // TODO: make this less of a sledgehammer. - if (graph_->HasTryCatch() || graph_->HasIrreducibleLoops()) { + if (!graph_->HasLoops() || graph_->HasTryCatch() || graph_->HasIrreducibleLoops()) { return; } @@ -84,6 +84,10 @@ void HLoopOptimization::Run() { // Perform loop optimizations. LocalRun(); + if (top_loop_ == nullptr) { + graph_->SetHasLoops(false); + } + // Detach. loop_allocator_ = nullptr; last_loop_ = top_loop_ = nullptr; diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index 71a26ebe79..62c89100eb 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -688,6 +688,7 @@ void HLoopInformation::Populate() { contains_irreducible_loop_ = true; graph->SetHasIrreducibleLoops(true); } + graph->SetHasLoops(true); } HBasicBlock* HLoopInformation::GetPreHeader() const { @@ -2032,9 +2033,19 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { } } outer_graph->UpdateMaximumNumberOfOutVRegs(GetMaximumNumberOfOutVRegs()); + if (HasBoundsChecks()) { outer_graph->SetHasBoundsChecks(true); } + if (HasLoops()) { + outer_graph->SetHasLoops(true); + } + if (HasIrreducibleLoops()) { + outer_graph->SetHasIrreducibleLoops(true); + } + if (HasTryCatch()) { + outer_graph->SetHasTryCatch(true); + } HInstruction* return_value = nullptr; if (GetBlocks().size() == 3) { diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 96f9abafbf..8a9e61875a 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -323,6 +323,7 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { temporaries_vreg_slots_(0), has_bounds_checks_(false), has_try_catch_(false), + has_loops_(false), has_irreducible_loops_(false), debuggable_(debuggable), current_instruction_id_(start_instruction_id), @@ -559,6 +560,9 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { bool HasTryCatch() const { return has_try_catch_; } void SetHasTryCatch(bool value) { has_try_catch_ = value; } + bool HasLoops() const { return has_loops_; } + void SetHasLoops(bool value) { has_loops_ = value; } + bool HasIrreducibleLoops() const { return has_irreducible_loops_; } void SetHasIrreducibleLoops(bool value) { has_irreducible_loops_ = value; } @@ -637,14 +641,26 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { // Number of vreg size slots that the temporaries use (used in baseline compiler). size_t temporaries_vreg_slots_; - // Has bounds checks. We can totally skip BCE if it's false. + // Flag whether there are bounds checks in the graph. We can skip + // BCE if it's false. It's only best effort to keep it up to date in + // the presence of code elimination so there might be false positives. bool has_bounds_checks_; - // Flag whether there are any try/catch blocks in the graph. We will skip - // try/catch-related passes if false. + // Flag whether there are try/catch blocks in the graph. We will skip + // try/catch-related passes if it's false. It's only best effort to keep + // it up to date in the presence of code elimination so there might be + // false positives. bool has_try_catch_; - // Flag whether there are any irreducible loops in the graph. + // Flag whether there are any loops in the graph. We can skip loop + // optimization if it's false. It's only best effort to keep it up + // to date in the presence of code elimination so there might be false + // positives. + bool has_loops_; + + // Flag whether there are any irreducible loops in the graph. It's only + // best effort to keep it up to date in the presence of code elimination + // so there might be false positives. bool has_irreducible_loops_; // Indicates whether the graph should be compiled in a way that @@ -1346,6 +1362,7 @@ class HLoopInformationOutwardIterator : public ValueObject { #else #define FOR_EACH_CONCRETE_INSTRUCTION_SHARED(M) \ M(BitwiseNegatedRight, Instruction) \ + M(DataProcWithShifterOp, Instruction) \ M(MultiplyAccumulate, Instruction) \ M(IntermediateAddress, Instruction) #endif @@ -1357,12 +1374,7 @@ class HLoopInformationOutwardIterator : public ValueObject { M(ArmDexCacheArraysBase, Instruction) #endif -#ifndef ART_ENABLE_CODEGEN_arm64 #define FOR_EACH_CONCRETE_INSTRUCTION_ARM64(M) -#else -#define FOR_EACH_CONCRETE_INSTRUCTION_ARM64(M) \ - M(Arm64DataProcWithShifterOp, Instruction) -#endif #ifndef ART_ENABLE_CODEGEN_mips #define FOR_EACH_CONCRETE_INSTRUCTION_MIPS(M) @@ -6603,9 +6615,6 @@ class HParallelMove FINAL : public HTemplateInstruction<0> { #ifdef ART_ENABLE_CODEGEN_arm #include "nodes_arm.h" #endif -#ifdef ART_ENABLE_CODEGEN_arm64 -#include "nodes_arm64.h" -#endif #ifdef ART_ENABLE_CODEGEN_mips #include "nodes_mips.h" #endif diff --git a/compiler/optimizing/nodes_arm64.h b/compiler/optimizing/nodes_arm64.h deleted file mode 100644 index 3f88717c2a..0000000000 --- a/compiler/optimizing/nodes_arm64.h +++ /dev/null @@ -1,99 +0,0 @@ -/* - * Copyright (C) 2015 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ART_COMPILER_OPTIMIZING_NODES_ARM64_H_ -#define ART_COMPILER_OPTIMIZING_NODES_ARM64_H_ - -#include "nodes.h" - -namespace art { - -class HArm64DataProcWithShifterOp FINAL : public HExpression<2> { - public: - enum OpKind { - kLSL, // Logical shift left. - kLSR, // Logical shift right. - kASR, // Arithmetic shift right. - kUXTB, // Unsigned extend byte. - kUXTH, // Unsigned extend half-word. - kUXTW, // Unsigned extend word. - kSXTB, // Signed extend byte. - kSXTH, // Signed extend half-word. - kSXTW, // Signed extend word. - - // Aliases. - kFirstShiftOp = kLSL, - kLastShiftOp = kASR, - kFirstExtensionOp = kUXTB, - kLastExtensionOp = kSXTW - }; - HArm64DataProcWithShifterOp(HInstruction* instr, - HInstruction* left, - HInstruction* right, - OpKind op, - // The shift argument is unused if the operation - // is an extension. - int shift = 0, - uint32_t dex_pc = kNoDexPc) - : HExpression(instr->GetType(), SideEffects::None(), dex_pc), - instr_kind_(instr->GetKind()), op_kind_(op), shift_amount_(shift) { - DCHECK(!instr->HasSideEffects()); - SetRawInputAt(0, left); - SetRawInputAt(1, right); - } - - bool CanBeMoved() const OVERRIDE { return true; } - bool InstructionDataEquals(const HInstruction* other_instr) const OVERRIDE { - const HArm64DataProcWithShifterOp* other = other_instr->AsArm64DataProcWithShifterOp(); - return instr_kind_ == other->instr_kind_ && - op_kind_ == other->op_kind_ && - shift_amount_ == other->shift_amount_; - } - - static bool IsShiftOp(OpKind op_kind) { - return kFirstShiftOp <= op_kind && op_kind <= kLastShiftOp; - } - - static bool IsExtensionOp(OpKind op_kind) { - return kFirstExtensionOp <= op_kind && op_kind <= kLastExtensionOp; - } - - // Find the operation kind and shift amount from a bitfield move instruction. - static void GetOpInfoFromInstruction(HInstruction* bitfield_op, - /*out*/OpKind* op_kind, - /*out*/int* shift_amount); - - InstructionKind GetInstrKind() const { return instr_kind_; } - OpKind GetOpKind() const { return op_kind_; } - int GetShiftAmount() const { return shift_amount_; } - - DECLARE_INSTRUCTION(Arm64DataProcWithShifterOp); - - private: - InstructionKind instr_kind_; - OpKind op_kind_; - int shift_amount_; - - friend std::ostream& operator<<(std::ostream& os, OpKind op); - - DISALLOW_COPY_AND_ASSIGN(HArm64DataProcWithShifterOp); -}; - -std::ostream& operator<<(std::ostream& os, const HArm64DataProcWithShifterOp::OpKind op); - -} // namespace art - -#endif // ART_COMPILER_OPTIMIZING_NODES_ARM64_H_ diff --git a/compiler/optimizing/nodes_arm64.cc b/compiler/optimizing/nodes_shared.cc index ac2f093847..f145bf9130 100644 --- a/compiler/optimizing/nodes_arm64.cc +++ b/compiler/optimizing/nodes_shared.cc @@ -1,5 +1,5 @@ /* - * Copyright (C) 2015 The Android Open Source Project + * Copyright (C) 2017 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,15 +15,15 @@ */ #include "common_arm64.h" -#include "nodes.h" +#include "nodes_shared.h" namespace art { -using arm64::helpers::CanFitInShifterOperand; +using helpers::CanFitInShifterOperand; -void HArm64DataProcWithShifterOp::GetOpInfoFromInstruction(HInstruction* instruction, - /*out*/OpKind* op_kind, - /*out*/int* shift_amount) { +void HDataProcWithShifterOp::GetOpInfoFromInstruction(HInstruction* instruction, + /*out*/OpKind* op_kind, + /*out*/int* shift_amount) { DCHECK(CanFitInShifterOperand(instruction)); if (instruction->IsShl()) { *op_kind = kLSL; @@ -41,12 +41,11 @@ void HArm64DataProcWithShifterOp::GetOpInfoFromInstruction(HInstruction* instruc int result_size = Primitive::ComponentSize(result_type); int input_size = Primitive::ComponentSize(input_type); int min_size = std::min(result_size, input_size); - // This follows the logic in - // `InstructionCodeGeneratorARM64::VisitTypeConversion()`. if (result_type == Primitive::kPrimInt && input_type == Primitive::kPrimLong) { - // There is actually nothing to do. The register will be used as a W - // register, discarding the top bits. This is represented by the default - // encoding 'LSL 0'. + // There is actually nothing to do. On ARM the high register from the + // pair will be ignored. On ARM64 the register will be used as a W + // register, discarding the top bits. This is represented by the + // default encoding 'LSL 0'. *op_kind = kLSL; *shift_amount = 0; } else if (result_type == Primitive::kPrimChar || @@ -64,17 +63,17 @@ void HArm64DataProcWithShifterOp::GetOpInfoFromInstruction(HInstruction* instruc } } -std::ostream& operator<<(std::ostream& os, const HArm64DataProcWithShifterOp::OpKind op) { +std::ostream& operator<<(std::ostream& os, const HDataProcWithShifterOp::OpKind op) { switch (op) { - case HArm64DataProcWithShifterOp::kLSL: return os << "LSL"; - case HArm64DataProcWithShifterOp::kLSR: return os << "LSR"; - case HArm64DataProcWithShifterOp::kASR: return os << "ASR"; - case HArm64DataProcWithShifterOp::kUXTB: return os << "UXTB"; - case HArm64DataProcWithShifterOp::kUXTH: return os << "UXTH"; - case HArm64DataProcWithShifterOp::kUXTW: return os << "UXTW"; - case HArm64DataProcWithShifterOp::kSXTB: return os << "SXTB"; - case HArm64DataProcWithShifterOp::kSXTH: return os << "SXTH"; - case HArm64DataProcWithShifterOp::kSXTW: return os << "SXTW"; + case HDataProcWithShifterOp::kLSL: return os << "LSL"; + case HDataProcWithShifterOp::kLSR: return os << "LSR"; + case HDataProcWithShifterOp::kASR: return os << "ASR"; + case HDataProcWithShifterOp::kUXTB: return os << "UXTB"; + case HDataProcWithShifterOp::kUXTH: return os << "UXTH"; + case HDataProcWithShifterOp::kUXTW: return os << "UXTW"; + case HDataProcWithShifterOp::kSXTB: return os << "SXTB"; + case HDataProcWithShifterOp::kSXTH: return os << "SXTH"; + case HDataProcWithShifterOp::kSXTW: return os << "SXTW"; default: LOG(FATAL) << "Invalid OpKind " << static_cast<int>(op); UNREACHABLE(); diff --git a/compiler/optimizing/nodes_shared.h b/compiler/optimizing/nodes_shared.h index 814202e97b..c6bfbcc7fb 100644 --- a/compiler/optimizing/nodes_shared.h +++ b/compiler/optimizing/nodes_shared.h @@ -150,6 +150,81 @@ class HIntermediateAddress FINAL : public HExpression<2> { DISALLOW_COPY_AND_ASSIGN(HIntermediateAddress); }; +class HDataProcWithShifterOp FINAL : public HExpression<2> { + public: + enum OpKind { + kLSL, // Logical shift left. + kLSR, // Logical shift right. + kASR, // Arithmetic shift right. + kUXTB, // Unsigned extend byte. + kUXTH, // Unsigned extend half-word. + kUXTW, // Unsigned extend word. + kSXTB, // Signed extend byte. + kSXTH, // Signed extend half-word. + kSXTW, // Signed extend word. + + // Aliases. + kFirstShiftOp = kLSL, + kLastShiftOp = kASR, + kFirstExtensionOp = kUXTB, + kLastExtensionOp = kSXTW + }; + HDataProcWithShifterOp(HInstruction* instr, + HInstruction* left, + HInstruction* right, + OpKind op, + // The shift argument is unused if the operation + // is an extension. + int shift = 0, + uint32_t dex_pc = kNoDexPc) + : HExpression(instr->GetType(), SideEffects::None(), dex_pc), + instr_kind_(instr->GetKind()), op_kind_(op), + shift_amount_(shift & (instr->GetType() == Primitive::kPrimInt + ? kMaxIntShiftDistance + : kMaxLongShiftDistance)) { + DCHECK(!instr->HasSideEffects()); + SetRawInputAt(0, left); + SetRawInputAt(1, right); + } + + bool CanBeMoved() const OVERRIDE { return true; } + bool InstructionDataEquals(const HInstruction* other_instr) const OVERRIDE { + const HDataProcWithShifterOp* other = other_instr->AsDataProcWithShifterOp(); + return instr_kind_ == other->instr_kind_ && + op_kind_ == other->op_kind_ && + shift_amount_ == other->shift_amount_; + } + + static bool IsShiftOp(OpKind op_kind) { + return kFirstShiftOp <= op_kind && op_kind <= kLastShiftOp; + } + + static bool IsExtensionOp(OpKind op_kind) { + return kFirstExtensionOp <= op_kind && op_kind <= kLastExtensionOp; + } + + // Find the operation kind and shift amount from a bitfield move instruction. + static void GetOpInfoFromInstruction(HInstruction* bitfield_op, + /*out*/OpKind* op_kind, + /*out*/int* shift_amount); + + InstructionKind GetInstrKind() const { return instr_kind_; } + OpKind GetOpKind() const { return op_kind_; } + int GetShiftAmount() const { return shift_amount_; } + + DECLARE_INSTRUCTION(DataProcWithShifterOp); + + private: + InstructionKind instr_kind_; + OpKind op_kind_; + int shift_amount_; + + friend std::ostream& operator<<(std::ostream& os, OpKind op); + + DISALLOW_COPY_AND_ASSIGN(HDataProcWithShifterOp); +}; + +std::ostream& operator<<(std::ostream& os, const HDataProcWithShifterOp::OpKind op); } // namespace art diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index 8638e346fb..f72bd6a5a3 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -306,7 +306,7 @@ class OptimizingCompiler FINAL : public Compiler { InvokeType invoke_type, uint16_t class_def_idx, uint32_t method_idx, - jobject class_loader, + Handle<mirror::ClassLoader> class_loader, const DexFile& dex_file, Handle<mirror::DexCache> dex_cache) const OVERRIDE; @@ -375,7 +375,7 @@ class OptimizingCompiler FINAL : public Compiler { InvokeType invoke_type, uint16_t class_def_idx, uint32_t method_idx, - jobject class_loader, + Handle<mirror::ClassLoader> class_loader, const DexFile& dex_file, Handle<mirror::DexCache> dex_cache, ArtMethod* method, @@ -875,7 +875,7 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* arena, InvokeType invoke_type, uint16_t class_def_idx, uint32_t method_idx, - jobject class_loader, + Handle<mirror::ClassLoader> class_loader, const DexFile& dex_file, Handle<mirror::DexCache> dex_cache, ArtMethod* method, @@ -946,11 +946,8 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* arena, const uint8_t* interpreter_metadata = nullptr; if (method == nullptr) { ScopedObjectAccess soa(Thread::Current()); - StackHandleScope<1> hs(soa.Self()); - Handle<mirror::ClassLoader> loader(hs.NewHandle( - soa.Decode<mirror::ClassLoader>(class_loader))); method = compiler_driver->ResolveMethod( - soa, dex_cache, loader, &dex_compilation_unit, method_idx, invoke_type); + soa, dex_cache, class_loader, &dex_compilation_unit, method_idx, invoke_type); } // For AOT compilation, we may not get a method, for example if its class is erroneous. // JIT should always have a method. @@ -959,16 +956,6 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* arena, graph->SetArtMethod(method); ScopedObjectAccess soa(Thread::Current()); interpreter_metadata = method->GetQuickenedInfo(class_linker->GetImagePointerSize()); - dex::TypeIndex type_index = method->GetDeclaringClass()->GetDexTypeIndex(); - - // Update the dex cache if the type is not in it yet. Note that under AOT, - // the verifier must have set it, but under JIT, there's no guarantee, as we - // don't necessarily run the verifier. - // The compiler and the compiler driver assume the compiling class is - // in the dex cache. - if (dex_cache->GetResolvedType(type_index) == nullptr) { - dex_cache->SetResolvedType(type_index, method->GetDeclaringClass()); - } } std::unique_ptr<CodeGenerator> codegen( @@ -1049,7 +1036,7 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, InvokeType invoke_type, uint16_t class_def_idx, uint32_t method_idx, - jobject jclass_loader, + Handle<mirror::ClassLoader> jclass_loader, const DexFile& dex_file, Handle<mirror::DexCache> dex_cache) const { CompilerDriver* compiler_driver = GetCompilerDriver(); @@ -1163,7 +1150,6 @@ bool OptimizingCompiler::JitCompile(Thread* self, Handle<mirror::DexCache> dex_cache(hs.NewHandle(method->GetDexCache())); DCHECK(method->IsCompilable()); - jobject jclass_loader = class_loader.ToJObject(); const DexFile* dex_file = method->GetDexFile(); const uint16_t class_def_idx = method->GetClassDefIndex(); const DexFile::CodeItem* code_item = dex_file->GetCodeItem(method->GetCodeItemOffset()); @@ -1187,7 +1173,7 @@ bool OptimizingCompiler::JitCompile(Thread* self, invoke_type, class_def_idx, method_idx, - jclass_loader, + class_loader, *dex_file, dex_cache, method, diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc index c55fccc7d3..6e332ca59b 100644 --- a/compiler/optimizing/reference_type_propagation.cc +++ b/compiler/optimizing/reference_type_propagation.cc @@ -65,11 +65,13 @@ ReferenceTypeInfo::TypeHandle ReferenceTypePropagation::HandleCache::GetThrowabl class ReferenceTypePropagation::RTPVisitor : public HGraphDelegateVisitor { public: RTPVisitor(HGraph* graph, + Handle<mirror::ClassLoader> class_loader, Handle<mirror::DexCache> hint_dex_cache, HandleCache* handle_cache, ArenaVector<HInstruction*>* worklist, bool is_first_run) : HGraphDelegateVisitor(graph), + class_loader_(class_loader), hint_dex_cache_(hint_dex_cache), handle_cache_(handle_cache), worklist_(worklist), @@ -101,6 +103,7 @@ class ReferenceTypePropagation::RTPVisitor : public HGraphDelegateVisitor { bool is_exact); private: + Handle<mirror::ClassLoader> class_loader_; Handle<mirror::DexCache> hint_dex_cache_; HandleCache* handle_cache_; ArenaVector<HInstruction*>* worklist_; @@ -108,11 +111,13 @@ class ReferenceTypePropagation::RTPVisitor : public HGraphDelegateVisitor { }; ReferenceTypePropagation::ReferenceTypePropagation(HGraph* graph, + Handle<mirror::ClassLoader> class_loader, Handle<mirror::DexCache> hint_dex_cache, VariableSizedHandleScope* handles, bool is_first_run, const char* name) : HOptimization(graph, name), + class_loader_(class_loader), hint_dex_cache_(hint_dex_cache), handle_cache_(handles), worklist_(graph->GetArena()->Adapter(kArenaAllocReferenceTypePropagation)), @@ -147,7 +152,12 @@ void ReferenceTypePropagation::ValidateTypes() { } void ReferenceTypePropagation::Visit(HInstruction* instruction) { - RTPVisitor visitor(graph_, hint_dex_cache_, &handle_cache_, &worklist_, is_first_run_); + RTPVisitor visitor(graph_, + class_loader_, + hint_dex_cache_, + &handle_cache_, + &worklist_, + is_first_run_); instruction->Accept(&visitor); } @@ -321,7 +331,12 @@ void ReferenceTypePropagation::Run() { } void ReferenceTypePropagation::VisitBasicBlock(HBasicBlock* block) { - RTPVisitor visitor(graph_, hint_dex_cache_, &handle_cache_, &worklist_, is_first_run_); + RTPVisitor visitor(graph_, + class_loader_, + hint_dex_cache_, + &handle_cache_, + &worklist_, + is_first_run_); // Handle Phis first as there might be instructions in the same block who depend on them. for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { VisitPhi(it.Current()->AsPhi()); @@ -542,8 +557,9 @@ void ReferenceTypePropagation::RTPVisitor::UpdateReferenceTypeInfo(HInstruction* ScopedObjectAccess soa(Thread::Current()); ObjPtr<mirror::DexCache> dex_cache = FindDexCacheWithHint(soa.Self(), dex_file, hint_dex_cache_); - // Get type from dex cache assuming it was populated by the verifier. - SetClassAsTypeInfo(instr, dex_cache->GetResolvedType(type_idx), is_exact); + ObjPtr<mirror::Class> klass = + ClassLinker::LookupResolvedType(type_idx, dex_cache, class_loader_.Get()); + SetClassAsTypeInfo(instr, klass, is_exact); } void ReferenceTypePropagation::RTPVisitor::VisitNewInstance(HNewInstance* instr) { @@ -556,25 +572,13 @@ void ReferenceTypePropagation::RTPVisitor::VisitNewArray(HNewArray* instr) { SetClassAsTypeInfo(instr, instr->GetLoadClass()->GetClass().Get(), /* is_exact */ true); } -static mirror::Class* GetClassFromDexCache(Thread* self, - const DexFile& dex_file, - dex::TypeIndex type_idx, - Handle<mirror::DexCache> hint_dex_cache) - REQUIRES_SHARED(Locks::mutator_lock_) { - ObjPtr<mirror::DexCache> dex_cache = FindDexCacheWithHint(self, dex_file, hint_dex_cache); - // Get type from dex cache assuming it was populated by the verifier. - return dex_cache->GetResolvedType(type_idx); -} - void ReferenceTypePropagation::RTPVisitor::VisitParameterValue(HParameterValue* instr) { // We check if the existing type is valid: the inliner may have set it. if (instr->GetType() == Primitive::kPrimNot && !instr->GetReferenceTypeInfo().IsValid()) { - ScopedObjectAccess soa(Thread::Current()); - mirror::Class* resolved_class = GetClassFromDexCache(soa.Self(), - instr->GetDexFile(), - instr->GetTypeIndex(), - hint_dex_cache_); - SetClassAsTypeInfo(instr, resolved_class, /* is_exact */ false); + UpdateReferenceTypeInfo(instr, + instr->GetTypeIndex(), + instr->GetDexFile(), + /* is_exact */ false); } } diff --git a/compiler/optimizing/reference_type_propagation.h b/compiler/optimizing/reference_type_propagation.h index 4663471729..215e96786b 100644 --- a/compiler/optimizing/reference_type_propagation.h +++ b/compiler/optimizing/reference_type_propagation.h @@ -33,6 +33,7 @@ namespace art { class ReferenceTypePropagation : public HOptimization { public: ReferenceTypePropagation(HGraph* graph, + Handle<mirror::ClassLoader> class_loader, Handle<mirror::DexCache> hint_dex_cache, VariableSizedHandleScope* handles, bool is_first_run, @@ -105,6 +106,8 @@ class ReferenceTypePropagation : public HOptimization { void ValidateTypes(); + Handle<mirror::ClassLoader> class_loader_; + // Note: hint_dex_cache_ is usually, but not necessarily, the dex cache associated with // graph_->GetDexFile(). Since we may look up also in other dex files, it's used only // as a hint, to reduce the number of calls to the costly ClassLinker::FindDexCache(). diff --git a/compiler/optimizing/reference_type_propagation_test.cc b/compiler/optimizing/reference_type_propagation_test.cc index b061c871b0..84a4bab1a9 100644 --- a/compiler/optimizing/reference_type_propagation_test.cc +++ b/compiler/optimizing/reference_type_propagation_test.cc @@ -38,6 +38,7 @@ class ReferenceTypePropagationTest : public CommonCompilerTest { void SetupPropagation(VariableSizedHandleScope* handles) { graph_->InitializeInexactObjectRTI(handles); propagation_ = new (&allocator_) ReferenceTypePropagation(graph_, + Handle<mirror::ClassLoader>(), Handle<mirror::DexCache>(), handles, true, diff --git a/compiler/optimizing/scheduler_arm64.cc b/compiler/optimizing/scheduler_arm64.cc index e3701fbcb1..558dcc4cbc 100644 --- a/compiler/optimizing/scheduler_arm64.cc +++ b/compiler/optimizing/scheduler_arm64.cc @@ -31,8 +31,8 @@ void SchedulingLatencyVisitorARM64::VisitBitwiseNegatedRight( last_visited_latency_ = kArm64IntegerOpLatency; } -void SchedulingLatencyVisitorARM64::VisitArm64DataProcWithShifterOp( - HArm64DataProcWithShifterOp* ATTRIBUTE_UNUSED) { +void SchedulingLatencyVisitorARM64::VisitDataProcWithShifterOp( + HDataProcWithShifterOp* ATTRIBUTE_UNUSED) { last_visited_latency_ = kArm64DataProcWithShifterOpLatency; } diff --git a/compiler/optimizing/scheduler_arm64.h b/compiler/optimizing/scheduler_arm64.h index 702027c535..7a33720655 100644 --- a/compiler/optimizing/scheduler_arm64.h +++ b/compiler/optimizing/scheduler_arm64.h @@ -74,7 +74,8 @@ class SchedulingLatencyVisitorARM64 : public SchedulingLatencyVisitor { #define FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(M) \ M(BitwiseNegatedRight, unused) \ M(MultiplyAccumulate, unused) \ - M(IntermediateAddress, unused) + M(IntermediateAddress, unused) \ + M(DataProcWithShifterOp, unused) #define DECLARE_VISIT_INSTRUCTION(type, unused) \ void Visit##type(H##type* instruction) OVERRIDE; diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc index 487e4dd498..50ab11bc23 100644 --- a/compiler/optimizing/ssa_builder.cc +++ b/compiler/optimizing/ssa_builder.cc @@ -499,7 +499,11 @@ GraphAnalysisResult SsaBuilder::BuildSsa() { // 4) Compute type of reference type instructions. The pass assumes that // NullConstant has been fixed up. - ReferenceTypePropagation(graph_, dex_cache_, handles_, /* is_first_run */ true).Run(); + ReferenceTypePropagation(graph_, + class_loader_, + dex_cache_, + handles_, + /* is_first_run */ true).Run(); // 5) HInstructionBuilder duplicated ArrayGet instructions with ambiguous type // (int/float or long/double) and marked ArraySets with ambiguous input type. diff --git a/compiler/optimizing/ssa_builder.h b/compiler/optimizing/ssa_builder.h index 45dac54115..978f113ec4 100644 --- a/compiler/optimizing/ssa_builder.h +++ b/compiler/optimizing/ssa_builder.h @@ -48,9 +48,11 @@ namespace art { class SsaBuilder : public ValueObject { public: SsaBuilder(HGraph* graph, + Handle<mirror::ClassLoader> class_loader, Handle<mirror::DexCache> dex_cache, VariableSizedHandleScope* handles) : graph_(graph), + class_loader_(class_loader), dex_cache_(dex_cache), handles_(handles), agets_fixed_(false), @@ -115,6 +117,7 @@ class SsaBuilder : public ValueObject { void RemoveRedundantUninitializedStrings(); HGraph* graph_; + Handle<mirror::ClassLoader> class_loader_; Handle<mirror::DexCache> dex_cache_; VariableSizedHandleScope* const handles_; diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc index 5a466e1d5d..6eab302dab 100644 --- a/compiler/utils/x86/assembler_x86.cc +++ b/compiler/utils/x86/assembler_x86.cc @@ -642,39 +642,6 @@ void X86Assembler::movhpd(const Address& dst, XmmRegister src) { } -void X86Assembler::psrldq(XmmRegister reg, const Immediate& shift_count) { - DCHECK(shift_count.is_uint8()); - - AssemblerBuffer::EnsureCapacity ensured(&buffer_); - EmitUint8(0x66); - EmitUint8(0x0F); - EmitUint8(0x73); - EmitXmmRegisterOperand(3, reg); - EmitUint8(shift_count.value()); -} - - -void X86Assembler::psrlq(XmmRegister reg, const Immediate& shift_count) { - DCHECK(shift_count.is_uint8()); - - AssemblerBuffer::EnsureCapacity ensured(&buffer_); - EmitUint8(0x66); - EmitUint8(0x0F); - EmitUint8(0x73); - EmitXmmRegisterOperand(2, reg); - EmitUint8(shift_count.value()); -} - - -void X86Assembler::punpckldq(XmmRegister dst, XmmRegister src) { - AssemblerBuffer::EnsureCapacity ensured(&buffer_); - EmitUint8(0x66); - EmitUint8(0x0F); - EmitUint8(0x62); - EmitXmmRegisterOperand(dst, src); -} - - void X86Assembler::addsd(XmmRegister dst, XmmRegister src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0xF2); @@ -828,6 +795,51 @@ void X86Assembler::movdqu(const Address& dst, XmmRegister src) { } +void X86Assembler::paddb(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0xFC); + EmitXmmRegisterOperand(dst, src); +} + + +void X86Assembler::psubb(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0xF8); + EmitXmmRegisterOperand(dst, src); +} + + +void X86Assembler::paddw(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0xFD); + EmitXmmRegisterOperand(dst, src); +} + + +void X86Assembler::psubw(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0xF9); + EmitXmmRegisterOperand(dst, src); +} + + +void X86Assembler::pmullw(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0xD5); + EmitXmmRegisterOperand(dst, src); +} + + void X86Assembler::paddd(XmmRegister dst, XmmRegister src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0x66); @@ -856,6 +868,24 @@ void X86Assembler::pmulld(XmmRegister dst, XmmRegister src) { } +void X86Assembler::paddq(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0xD4); + EmitXmmRegisterOperand(dst, src); +} + + +void X86Assembler::psubq(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0xFB); + EmitXmmRegisterOperand(dst, src); +} + + void X86Assembler::cvtsi2ss(XmmRegister dst, Register src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0xF3); @@ -1186,6 +1216,141 @@ void X86Assembler::pshufd(XmmRegister dst, XmmRegister src, const Immediate& imm } +void X86Assembler::punpcklbw(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x60); + EmitXmmRegisterOperand(dst, src); +} + + +void X86Assembler::punpcklwd(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x61); + EmitXmmRegisterOperand(dst, src); +} + + +void X86Assembler::punpckldq(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x62); + EmitXmmRegisterOperand(dst, src); +} + + +void X86Assembler::punpcklqdq(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x6C); + EmitXmmRegisterOperand(dst, src); +} + + +void X86Assembler::psllw(XmmRegister reg, const Immediate& shift_count) { + DCHECK(shift_count.is_uint8()); + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x71); + EmitXmmRegisterOperand(6, reg); + EmitUint8(shift_count.value()); +} + + +void X86Assembler::pslld(XmmRegister reg, const Immediate& shift_count) { + DCHECK(shift_count.is_uint8()); + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x72); + EmitXmmRegisterOperand(6, reg); + EmitUint8(shift_count.value()); +} + + +void X86Assembler::psllq(XmmRegister reg, const Immediate& shift_count) { + DCHECK(shift_count.is_uint8()); + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x73); + EmitXmmRegisterOperand(6, reg); + EmitUint8(shift_count.value()); +} + + +void X86Assembler::psraw(XmmRegister reg, const Immediate& shift_count) { + DCHECK(shift_count.is_uint8()); + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x71); + EmitXmmRegisterOperand(4, reg); + EmitUint8(shift_count.value()); +} + + +void X86Assembler::psrad(XmmRegister reg, const Immediate& shift_count) { + DCHECK(shift_count.is_uint8()); + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x72); + EmitXmmRegisterOperand(4, reg); + EmitUint8(shift_count.value()); +} + + +void X86Assembler::psrlw(XmmRegister reg, const Immediate& shift_count) { + DCHECK(shift_count.is_uint8()); + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x71); + EmitXmmRegisterOperand(2, reg); + EmitUint8(shift_count.value()); +} + + +void X86Assembler::psrld(XmmRegister reg, const Immediate& shift_count) { + DCHECK(shift_count.is_uint8()); + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x72); + EmitXmmRegisterOperand(2, reg); + EmitUint8(shift_count.value()); +} + + +void X86Assembler::psrlq(XmmRegister reg, const Immediate& shift_count) { + DCHECK(shift_count.is_uint8()); + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x73); + EmitXmmRegisterOperand(2, reg); + EmitUint8(shift_count.value()); +} + + +void X86Assembler::psrldq(XmmRegister reg, const Immediate& shift_count) { + DCHECK(shift_count.is_uint8()); + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0x73); + EmitXmmRegisterOperand(3, reg); + EmitUint8(shift_count.value()); +} + + void X86Assembler::fldl(const Address& src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0xDD); diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h index 4343e2e734..2999599fc5 100644 --- a/compiler/utils/x86/assembler_x86.h +++ b/compiler/utils/x86/assembler_x86.h @@ -408,14 +408,9 @@ class X86Assembler FINAL : public Assembler { void movsd(const Address& dst, XmmRegister src); void movsd(XmmRegister dst, XmmRegister src); - void psrlq(XmmRegister reg, const Immediate& shift_count); - void punpckldq(XmmRegister dst, XmmRegister src); - void movhpd(XmmRegister dst, const Address& src); void movhpd(const Address& dst, XmmRegister src); - void psrldq(XmmRegister reg, const Immediate& shift_count); - void addsd(XmmRegister dst, XmmRegister src); void addsd(XmmRegister dst, const Address& src); void subsd(XmmRegister dst, XmmRegister src); @@ -436,10 +431,20 @@ class X86Assembler FINAL : public Assembler { void movdqa(const Address& dst, XmmRegister src); // store aligned void movdqu(const Address& dst, XmmRegister src); // store unaligned - void paddd(XmmRegister dst, XmmRegister src); // no addr variant (for now) + void paddb(XmmRegister dst, XmmRegister src); // no addr variant (for now) + void psubb(XmmRegister dst, XmmRegister src); + + void paddw(XmmRegister dst, XmmRegister src); + void psubw(XmmRegister dst, XmmRegister src); + void pmullw(XmmRegister dst, XmmRegister src); + + void paddd(XmmRegister dst, XmmRegister src); void psubd(XmmRegister dst, XmmRegister src); void pmulld(XmmRegister dst, XmmRegister src); + void paddq(XmmRegister dst, XmmRegister src); + void psubq(XmmRegister dst, XmmRegister src); + void cvtsi2ss(XmmRegister dst, Register src); void cvtsi2sd(XmmRegister dst, Register src); @@ -489,6 +494,24 @@ class X86Assembler FINAL : public Assembler { void shufps(XmmRegister dst, XmmRegister src, const Immediate& imm); void pshufd(XmmRegister dst, XmmRegister src, const Immediate& imm); + void punpcklbw(XmmRegister dst, XmmRegister src); + void punpcklwd(XmmRegister dst, XmmRegister src); + void punpckldq(XmmRegister dst, XmmRegister src); + void punpcklqdq(XmmRegister dst, XmmRegister src); + + void psllw(XmmRegister reg, const Immediate& shift_count); + void pslld(XmmRegister reg, const Immediate& shift_count); + void psllq(XmmRegister reg, const Immediate& shift_count); + + void psraw(XmmRegister reg, const Immediate& shift_count); + void psrad(XmmRegister reg, const Immediate& shift_count); + // no psraq + + void psrlw(XmmRegister reg, const Immediate& shift_count); + void psrld(XmmRegister reg, const Immediate& shift_count); + void psrlq(XmmRegister reg, const Immediate& shift_count); + void psrldq(XmmRegister reg, const Immediate& shift_count); + void flds(const Address& src); void fstps(const Address& dst); void fsts(const Address& dst); diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc index c6ab893aea..a74bea207e 100644 --- a/compiler/utils/x86/assembler_x86_test.cc +++ b/compiler/utils/x86/assembler_x86_test.cc @@ -122,18 +122,6 @@ TEST_F(AssemblerX86Test, Movntl) { DriverStr(expected, "movntl"); } -TEST_F(AssemblerX86Test, psrlq) { - GetAssembler()->psrlq(x86::XMM0, CreateImmediate(32)); - const char* expected = "psrlq $0x20, %xmm0\n"; - DriverStr(expected, "psrlq"); -} - -TEST_F(AssemblerX86Test, punpckldq) { - GetAssembler()->punpckldq(x86::XMM0, x86::XMM1); - const char* expected = "punpckldq %xmm1, %xmm0\n"; - DriverStr(expected, "punpckldq"); -} - TEST_F(AssemblerX86Test, LoadLongConstant) { GetAssembler()->LoadLongConstant(x86::XMM0, 51); const char* expected = @@ -521,6 +509,26 @@ TEST_F(AssemblerX86Test, DivPD) { DriverStr(RepeatFF(&x86::X86Assembler::divpd, "divpd %{reg2}, %{reg1}"), "divpd"); } +TEST_F(AssemblerX86Test, PAddB) { + DriverStr(RepeatFF(&x86::X86Assembler::paddb, "paddb %{reg2}, %{reg1}"), "paddb"); +} + +TEST_F(AssemblerX86Test, PSubB) { + DriverStr(RepeatFF(&x86::X86Assembler::psubb, "psubb %{reg2}, %{reg1}"), "psubb"); +} + +TEST_F(AssemblerX86Test, PAddW) { + DriverStr(RepeatFF(&x86::X86Assembler::paddw, "paddw %{reg2}, %{reg1}"), "paddw"); +} + +TEST_F(AssemblerX86Test, PSubW) { + DriverStr(RepeatFF(&x86::X86Assembler::psubw, "psubw %{reg2}, %{reg1}"), "psubw"); +} + +TEST_F(AssemblerX86Test, PMullW) { + DriverStr(RepeatFF(&x86::X86Assembler::pmullw, "pmullw %{reg2}, %{reg1}"), "pmullw"); +} + TEST_F(AssemblerX86Test, PAddD) { DriverStr(RepeatFF(&x86::X86Assembler::paddd, "paddd %{reg2}, %{reg1}"), "paddd"); } @@ -533,6 +541,14 @@ TEST_F(AssemblerX86Test, PMullD) { DriverStr(RepeatFF(&x86::X86Assembler::pmulld, "pmulld %{reg2}, %{reg1}"), "pmulld"); } +TEST_F(AssemblerX86Test, PAddQ) { + DriverStr(RepeatFF(&x86::X86Assembler::paddq, "paddq %{reg2}, %{reg1}"), "paddq"); +} + +TEST_F(AssemblerX86Test, PSubQ) { + DriverStr(RepeatFF(&x86::X86Assembler::psubq, "psubq %{reg2}, %{reg1}"), "psubq"); +} + TEST_F(AssemblerX86Test, XorPD) { DriverStr(RepeatFF(&x86::X86Assembler::xorpd, "xorpd %{reg2}, %{reg1}"), "xorpd"); } @@ -581,6 +597,67 @@ TEST_F(AssemblerX86Test, PShufD) { DriverStr(RepeatFFI(&x86::X86Assembler::pshufd, 1, "pshufd ${imm}, %{reg2}, %{reg1}"), "pshufd"); } +TEST_F(AssemblerX86Test, Punpcklbw) { + DriverStr(RepeatFF(&x86::X86Assembler::punpcklbw, "punpcklbw %{reg2}, %{reg1}"), "punpcklbw"); +} + +TEST_F(AssemblerX86Test, Punpcklwd) { + DriverStr(RepeatFF(&x86::X86Assembler::punpcklwd, "punpcklwd %{reg2}, %{reg1}"), "punpcklwd"); +} + +TEST_F(AssemblerX86Test, Punpckldq) { + DriverStr(RepeatFF(&x86::X86Assembler::punpckldq, "punpckldq %{reg2}, %{reg1}"), "punpckldq"); +} + +TEST_F(AssemblerX86Test, Punpcklqdq) { + DriverStr(RepeatFF(&x86::X86Assembler::punpcklqdq, "punpcklqdq %{reg2}, %{reg1}"), "punpcklqdq"); +} + +TEST_F(AssemblerX86Test, psllw) { + GetAssembler()->psllw(x86::XMM0, CreateImmediate(16)); + DriverStr("psllw $0x10, %xmm0\n", "psllwi"); +} + +TEST_F(AssemblerX86Test, pslld) { + GetAssembler()->pslld(x86::XMM0, CreateImmediate(16)); + DriverStr("pslld $0x10, %xmm0\n", "pslldi"); +} + +TEST_F(AssemblerX86Test, psllq) { + GetAssembler()->psllq(x86::XMM0, CreateImmediate(16)); + DriverStr("psllq $0x10, %xmm0\n", "psllqi"); +} + +TEST_F(AssemblerX86Test, psraw) { + GetAssembler()->psraw(x86::XMM0, CreateImmediate(16)); + DriverStr("psraw $0x10, %xmm0\n", "psrawi"); +} + +TEST_F(AssemblerX86Test, psrad) { + GetAssembler()->psrad(x86::XMM0, CreateImmediate(16)); + DriverStr("psrad $0x10, %xmm0\n", "psradi"); +} + +TEST_F(AssemblerX86Test, psrlw) { + GetAssembler()->psrlw(x86::XMM0, CreateImmediate(16)); + DriverStr("psrlw $0x10, %xmm0\n", "psrlwi"); +} + +TEST_F(AssemblerX86Test, psrld) { + GetAssembler()->psrld(x86::XMM0, CreateImmediate(16)); + DriverStr("psrld $0x10, %xmm0\n", "psrldi"); +} + +TEST_F(AssemblerX86Test, psrlq) { + GetAssembler()->psrlq(x86::XMM0, CreateImmediate(16)); + DriverStr("psrlq $0x10, %xmm0\n", "psrlqi"); +} + +TEST_F(AssemblerX86Test, psrldq) { + GetAssembler()->psrldq(x86::XMM0, CreateImmediate(16)); + DriverStr("psrldq $0x10, %xmm0\n", "psrldqi"); +} + ///////////////// // Near labels // ///////////////// diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc index b41be80ae4..458204aca9 100644 --- a/compiler/utils/x86_64/assembler_x86_64.cc +++ b/compiler/utils/x86_64/assembler_x86_64.cc @@ -882,6 +882,56 @@ void X86_64Assembler::movdqu(const Address& dst, XmmRegister src) { } +void X86_64Assembler::paddb(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0xFC); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + + +void X86_64Assembler::psubb(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0xF8); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + + +void X86_64Assembler::paddw(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0xFD); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + + +void X86_64Assembler::psubw(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0xF9); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + + +void X86_64Assembler::pmullw(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0xD5); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + + void X86_64Assembler::paddd(XmmRegister dst, XmmRegister src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0x66); @@ -913,6 +963,26 @@ void X86_64Assembler::pmulld(XmmRegister dst, XmmRegister src) { } +void X86_64Assembler::paddq(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0xD4); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + + +void X86_64Assembler::psubq(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0xFB); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + + void X86_64Assembler::cvtsi2ss(XmmRegister dst, CpuRegister src) { cvtsi2ss(dst, src, false); } @@ -1354,6 +1424,142 @@ void X86_64Assembler::pshufd(XmmRegister dst, XmmRegister src, const Immediate& } +void X86_64Assembler::punpcklbw(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x60); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + + +void X86_64Assembler::punpcklwd(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x61); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + + +void X86_64Assembler::punpckldq(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x62); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + + +void X86_64Assembler::punpcklqdq(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x6C); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + + +void X86_64Assembler::psllw(XmmRegister reg, const Immediate& shift_count) { + DCHECK(shift_count.is_uint8()); + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex(false, false, false, false, reg.NeedsRex()); + EmitUint8(0x0F); + EmitUint8(0x71); + EmitXmmRegisterOperand(6, reg); + EmitUint8(shift_count.value()); +} + + +void X86_64Assembler::pslld(XmmRegister reg, const Immediate& shift_count) { + DCHECK(shift_count.is_uint8()); + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex(false, false, false, false, reg.NeedsRex()); + EmitUint8(0x0F); + EmitUint8(0x72); + EmitXmmRegisterOperand(6, reg); + EmitUint8(shift_count.value()); +} + + +void X86_64Assembler::psllq(XmmRegister reg, const Immediate& shift_count) { + DCHECK(shift_count.is_uint8()); + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex(false, false, false, false, reg.NeedsRex()); + EmitUint8(0x0F); + EmitUint8(0x73); + EmitXmmRegisterOperand(6, reg); + EmitUint8(shift_count.value()); +} + + +void X86_64Assembler::psraw(XmmRegister reg, const Immediate& shift_count) { + DCHECK(shift_count.is_uint8()); + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex(false, false, false, false, reg.NeedsRex()); + EmitUint8(0x0F); + EmitUint8(0x71); + EmitXmmRegisterOperand(4, reg); + EmitUint8(shift_count.value()); +} + + +void X86_64Assembler::psrad(XmmRegister reg, const Immediate& shift_count) { + DCHECK(shift_count.is_uint8()); + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex(false, false, false, false, reg.NeedsRex()); + EmitUint8(0x0F); + EmitUint8(0x72); + EmitXmmRegisterOperand(4, reg); + EmitUint8(shift_count.value()); +} + + +void X86_64Assembler::psrlw(XmmRegister reg, const Immediate& shift_count) { + DCHECK(shift_count.is_uint8()); + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex(false, false, false, false, reg.NeedsRex()); + EmitUint8(0x0F); + EmitUint8(0x71); + EmitXmmRegisterOperand(2, reg); + EmitUint8(shift_count.value()); +} + + +void X86_64Assembler::psrld(XmmRegister reg, const Immediate& shift_count) { + DCHECK(shift_count.is_uint8()); + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex(false, false, false, false, reg.NeedsRex()); + EmitUint8(0x0F); + EmitUint8(0x72); + EmitXmmRegisterOperand(2, reg); + EmitUint8(shift_count.value()); +} + + +void X86_64Assembler::psrlq(XmmRegister reg, const Immediate& shift_count) { + DCHECK(shift_count.is_uint8()); + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex(false, false, false, false, reg.NeedsRex()); + EmitUint8(0x0F); + EmitUint8(0x73); + EmitXmmRegisterOperand(2, reg); + EmitUint8(shift_count.value()); +} + + void X86_64Assembler::fldl(const Address& src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0xDD); diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h index 43ea12a4cb..0dc11d840b 100644 --- a/compiler/utils/x86_64/assembler_x86_64.h +++ b/compiler/utils/x86_64/assembler_x86_64.h @@ -452,10 +452,20 @@ class X86_64Assembler FINAL : public Assembler { void movdqa(const Address& dst, XmmRegister src); // store aligned void movdqu(const Address& dst, XmmRegister src); // store unaligned - void paddd(XmmRegister dst, XmmRegister src); // no addr variant (for now) + void paddb(XmmRegister dst, XmmRegister src); // no addr variant (for now) + void psubb(XmmRegister dst, XmmRegister src); + + void paddw(XmmRegister dst, XmmRegister src); + void psubw(XmmRegister dst, XmmRegister src); + void pmullw(XmmRegister dst, XmmRegister src); + + void paddd(XmmRegister dst, XmmRegister src); void psubd(XmmRegister dst, XmmRegister src); void pmulld(XmmRegister dst, XmmRegister src); + void paddq(XmmRegister dst, XmmRegister src); + void psubq(XmmRegister dst, XmmRegister src); + void cvtsi2ss(XmmRegister dst, CpuRegister src); // Note: this is the r/m32 version. void cvtsi2ss(XmmRegister dst, CpuRegister src, bool is64bit); void cvtsi2ss(XmmRegister dst, const Address& src, bool is64bit); @@ -512,6 +522,23 @@ class X86_64Assembler FINAL : public Assembler { void shufps(XmmRegister dst, XmmRegister src, const Immediate& imm); void pshufd(XmmRegister dst, XmmRegister src, const Immediate& imm); + void punpcklbw(XmmRegister dst, XmmRegister src); + void punpcklwd(XmmRegister dst, XmmRegister src); + void punpckldq(XmmRegister dst, XmmRegister src); + void punpcklqdq(XmmRegister dst, XmmRegister src); + + void psllw(XmmRegister reg, const Immediate& shift_count); + void pslld(XmmRegister reg, const Immediate& shift_count); + void psllq(XmmRegister reg, const Immediate& shift_count); + + void psraw(XmmRegister reg, const Immediate& shift_count); + void psrad(XmmRegister reg, const Immediate& shift_count); + // no psraq + + void psrlw(XmmRegister reg, const Immediate& shift_count); + void psrld(XmmRegister reg, const Immediate& shift_count); + void psrlq(XmmRegister reg, const Immediate& shift_count); + void flds(const Address& src); void fstps(const Address& dst); void fsts(const Address& dst); diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc index aeb1911835..fe9449720f 100644 --- a/compiler/utils/x86_64/assembler_x86_64_test.cc +++ b/compiler/utils/x86_64/assembler_x86_64_test.cc @@ -1128,6 +1128,26 @@ TEST_F(AssemblerX86_64Test, Divpd) { DriverStr(RepeatFF(&x86_64::X86_64Assembler::divpd, "divpd %{reg2}, %{reg1}"), "divpd"); } +TEST_F(AssemblerX86_64Test, Paddb) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::paddb, "paddb %{reg2}, %{reg1}"), "paddb"); +} + +TEST_F(AssemblerX86_64Test, Psubb) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::psubb, "psubb %{reg2}, %{reg1}"), "psubb"); +} + +TEST_F(AssemblerX86_64Test, Paddw) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::paddw, "paddw %{reg2}, %{reg1}"), "paddw"); +} + +TEST_F(AssemblerX86_64Test, Psubw) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::psubw, "psubw %{reg2}, %{reg1}"), "psubw"); +} + +TEST_F(AssemblerX86_64Test, Pmullw) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::pmullw, "pmullw %{reg2}, %{reg1}"), "pmullw"); +} + TEST_F(AssemblerX86_64Test, Paddd) { DriverStr(RepeatFF(&x86_64::X86_64Assembler::paddd, "paddd %{reg2}, %{reg1}"), "paddd"); } @@ -1140,6 +1160,14 @@ TEST_F(AssemblerX86_64Test, Pmulld) { DriverStr(RepeatFF(&x86_64::X86_64Assembler::pmulld, "pmulld %{reg2}, %{reg1}"), "pmulld"); } +TEST_F(AssemblerX86_64Test, Paddq) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::paddq, "paddq %{reg2}, %{reg1}"), "paddq"); +} + +TEST_F(AssemblerX86_64Test, Psubq) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::psubq, "psubq %{reg2}, %{reg1}"), "psubq"); +} + TEST_F(AssemblerX86_64Test, Cvtsi2ss) { DriverStr(RepeatFr(&x86_64::X86_64Assembler::cvtsi2ss, "cvtsi2ss %{reg2}, %{reg1}"), "cvtsi2ss"); } @@ -1261,6 +1289,78 @@ TEST_F(AssemblerX86_64Test, PShufd) { DriverStr(RepeatFFI(&x86_64::X86_64Assembler::pshufd, 1, "pshufd ${imm}, %{reg2}, %{reg1}"), "pshufd"); } +TEST_F(AssemblerX86_64Test, Punpcklbw) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::punpcklbw, "punpcklbw %{reg2}, %{reg1}"), "punpcklbw"); +} + +TEST_F(AssemblerX86_64Test, Punpcklwd) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::punpcklwd, "punpcklwd %{reg2}, %{reg1}"), "punpcklwd"); +} + +TEST_F(AssemblerX86_64Test, Punpckldq) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::punpckldq, "punpckldq %{reg2}, %{reg1}"), "punpckldq"); +} + +TEST_F(AssemblerX86_64Test, Punpcklqdq) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::punpcklqdq, "punpcklqdq %{reg2}, %{reg1}"), "punpcklqdq"); +} + +TEST_F(AssemblerX86_64Test, Psllw) { + GetAssembler()->psllw(x86_64::XmmRegister(x86_64::XMM0), x86_64::Immediate(1)); + GetAssembler()->psllw(x86_64::XmmRegister(x86_64::XMM15), x86_64::Immediate(2)); + DriverStr("psllw $1, %xmm0\n" + "psllw $2, %xmm15\n", "psllwi"); +} + +TEST_F(AssemblerX86_64Test, Pslld) { + GetAssembler()->pslld(x86_64::XmmRegister(x86_64::XMM0), x86_64::Immediate(1)); + GetAssembler()->pslld(x86_64::XmmRegister(x86_64::XMM15), x86_64::Immediate(2)); + DriverStr("pslld $1, %xmm0\n" + "pslld $2, %xmm15\n", "pslldi"); +} + +TEST_F(AssemblerX86_64Test, Psllq) { + GetAssembler()->psllq(x86_64::XmmRegister(x86_64::XMM0), x86_64::Immediate(1)); + GetAssembler()->psllq(x86_64::XmmRegister(x86_64::XMM15), x86_64::Immediate(2)); + DriverStr("psllq $1, %xmm0\n" + "psllq $2, %xmm15\n", "psllqi"); +} + +TEST_F(AssemblerX86_64Test, Psraw) { + GetAssembler()->psraw(x86_64::XmmRegister(x86_64::XMM0), x86_64::Immediate(1)); + GetAssembler()->psraw(x86_64::XmmRegister(x86_64::XMM15), x86_64::Immediate(2)); + DriverStr("psraw $1, %xmm0\n" + "psraw $2, %xmm15\n", "psrawi"); +} + +TEST_F(AssemblerX86_64Test, Psrad) { + GetAssembler()->psrad(x86_64::XmmRegister(x86_64::XMM0), x86_64::Immediate(1)); + GetAssembler()->psrad(x86_64::XmmRegister(x86_64::XMM15), x86_64::Immediate(2)); + DriverStr("psrad $1, %xmm0\n" + "psrad $2, %xmm15\n", "psradi"); +} + +TEST_F(AssemblerX86_64Test, Psrlw) { + GetAssembler()->psrlw(x86_64::XmmRegister(x86_64::XMM0), x86_64::Immediate(1)); + GetAssembler()->psrlw(x86_64::XmmRegister(x86_64::XMM15), x86_64::Immediate(2)); + DriverStr("psrlw $1, %xmm0\n" + "psrlw $2, %xmm15\n", "psrlwi"); +} + +TEST_F(AssemblerX86_64Test, Psrld) { + GetAssembler()->psrld(x86_64::XmmRegister(x86_64::XMM0), x86_64::Immediate(1)); + GetAssembler()->psrld(x86_64::XmmRegister(x86_64::XMM15), x86_64::Immediate(2)); + DriverStr("psrld $1, %xmm0\n" + "psrld $2, %xmm15\n", "pslldi"); +} + +TEST_F(AssemblerX86_64Test, Psrlq) { + GetAssembler()->psrlq(x86_64::XmmRegister(x86_64::XMM0), x86_64::Immediate(1)); + GetAssembler()->psrlq(x86_64::XmmRegister(x86_64::XMM15), x86_64::Immediate(2)); + DriverStr("psrlq $1, %xmm0\n" + "psrlq $2, %xmm15\n", "pslrqi"); +} + TEST_F(AssemblerX86_64Test, UcomissAddress) { GetAssembler()->ucomiss(x86_64::XmmRegister(x86_64::XMM0), x86_64::Address( x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12)); |