diff options
Diffstat (limited to 'compiler')
36 files changed, 1155 insertions, 487 deletions
diff --git a/compiler/driver/compiler_options.cc b/compiler/driver/compiler_options.cc index 34ad1c5c08..a0c0a2acf6 100644 --- a/compiler/driver/compiler_options.cc +++ b/compiler/driver/compiler_options.cc @@ -27,7 +27,6 @@ CompilerOptions::CompilerOptions() small_method_threshold_(kDefaultSmallMethodThreshold), tiny_method_threshold_(kDefaultTinyMethodThreshold), num_dex_methods_threshold_(kDefaultNumDexMethodsThreshold), - inline_depth_limit_(kUnsetInlineDepthLimit), inline_max_code_units_(kUnsetInlineMaxCodeUnits), no_inline_from_(nullptr), boot_image_(false), @@ -62,7 +61,6 @@ CompilerOptions::CompilerOptions(CompilerFilter::Filter compiler_filter, size_t small_method_threshold, size_t tiny_method_threshold, size_t num_dex_methods_threshold, - size_t inline_depth_limit, size_t inline_max_code_units, const std::vector<const DexFile*>* no_inline_from, double top_k_profile_threshold, @@ -86,7 +84,6 @@ CompilerOptions::CompilerOptions(CompilerFilter::Filter compiler_filter, small_method_threshold_(small_method_threshold), tiny_method_threshold_(tiny_method_threshold), num_dex_methods_threshold_(num_dex_methods_threshold), - inline_depth_limit_(inline_depth_limit), inline_max_code_units_(inline_max_code_units), no_inline_from_(no_inline_from), boot_image_(false), @@ -130,10 +127,6 @@ void CompilerOptions::ParseNumDexMethods(const StringPiece& option, UsageFn Usag ParseUintOption(option, "--num-dex-methods", &num_dex_methods_threshold_, Usage); } -void CompilerOptions::ParseInlineDepthLimit(const StringPiece& option, UsageFn Usage) { - ParseUintOption(option, "--inline-depth-limit", &inline_depth_limit_, Usage); -} - void CompilerOptions::ParseInlineMaxCodeUnits(const StringPiece& option, UsageFn Usage) { ParseUintOption(option, "--inline-max-code-units", &inline_max_code_units_, Usage); } @@ -183,8 +176,6 @@ bool CompilerOptions::ParseCompilerOption(const StringPiece& option, UsageFn Usa ParseTinyMethodMax(option, Usage); } else if (option.starts_with("--num-dex-methods=")) { ParseNumDexMethods(option, Usage); - } else if (option.starts_with("--inline-depth-limit=")) { - ParseInlineDepthLimit(option, Usage); } else if (option.starts_with("--inline-max-code-units=")) { ParseInlineMaxCodeUnits(option, Usage); } else if (option == "--generate-debug-info" || option == "-g") { diff --git a/compiler/driver/compiler_options.h b/compiler/driver/compiler_options.h index 2e3e55f6c6..2376fbf5f5 100644 --- a/compiler/driver/compiler_options.h +++ b/compiler/driver/compiler_options.h @@ -46,15 +46,9 @@ class CompilerOptions FINAL { static constexpr double kDefaultTopKProfileThreshold = 90.0; static const bool kDefaultGenerateDebugInfo = false; static const bool kDefaultGenerateMiniDebugInfo = false; - static const size_t kDefaultInlineDepthLimit = 3; static const size_t kDefaultInlineMaxCodeUnits = 32; - static constexpr size_t kUnsetInlineDepthLimit = -1; static constexpr size_t kUnsetInlineMaxCodeUnits = -1; - // Default inlining settings when the space filter is used. - static constexpr size_t kSpaceFilterInlineDepthLimit = 3; - static constexpr size_t kSpaceFilterInlineMaxCodeUnits = 10; - CompilerOptions(); ~CompilerOptions(); @@ -64,7 +58,6 @@ class CompilerOptions FINAL { size_t small_method_threshold, size_t tiny_method_threshold, size_t num_dex_methods_threshold, - size_t inline_depth_limit, size_t inline_max_code_units, const std::vector<const DexFile*>* no_inline_from, double top_k_profile_threshold, @@ -155,13 +148,6 @@ class CompilerOptions FINAL { return num_dex_methods_threshold_; } - size_t GetInlineDepthLimit() const { - return inline_depth_limit_; - } - void SetInlineDepthLimit(size_t limit) { - inline_depth_limit_ = limit; - } - size_t GetInlineMaxCodeUnits() const { return inline_max_code_units_; } @@ -275,7 +261,6 @@ class CompilerOptions FINAL { void ParseDumpInitFailures(const StringPiece& option, UsageFn Usage); void ParseDumpCfgPasses(const StringPiece& option, UsageFn Usage); void ParseInlineMaxCodeUnits(const StringPiece& option, UsageFn Usage); - void ParseInlineDepthLimit(const StringPiece& option, UsageFn Usage); void ParseNumDexMethods(const StringPiece& option, UsageFn Usage); void ParseTinyMethodMax(const StringPiece& option, UsageFn Usage); void ParseSmallMethodMax(const StringPiece& option, UsageFn Usage); @@ -289,7 +274,6 @@ class CompilerOptions FINAL { size_t small_method_threshold_; size_t tiny_method_threshold_; size_t num_dex_methods_threshold_; - size_t inline_depth_limit_; size_t inline_max_code_units_; // Dex files from which we should not inline code. diff --git a/compiler/image_test.cc b/compiler/image_test.cc index 89e8a678b1..897d81993d 100644 --- a/compiler/image_test.cc +++ b/compiler/image_test.cc @@ -76,7 +76,7 @@ class ImageTest : public CommonCompilerTest { void Compile(ImageHeader::StorageMode storage_mode, CompilationHelper& out_helper, const std::string& extra_dex = "", - const std::string& image_class = ""); + const std::initializer_list<std::string>& image_classes = {}); void SetUpRuntimeOptions(RuntimeOptions* options) OVERRIDE { CommonCompilerTest::SetUpRuntimeOptions(options); @@ -90,6 +90,18 @@ class ImageTest : public CommonCompilerTest { return new std::unordered_set<std::string>(image_classes_); } + ArtMethod* FindCopiedMethod(ArtMethod* origin, mirror::Class* klass) + REQUIRES_SHARED(Locks::mutator_lock_) { + PointerSize pointer_size = class_linker_->GetImagePointerSize(); + for (ArtMethod& m : klass->GetCopiedMethods(pointer_size)) { + if (strcmp(origin->GetName(), m.GetName()) == 0 && + origin->GetSignature() == m.GetSignature()) { + return &m; + } + } + return nullptr; + } + private: std::unordered_set<std::string> image_classes_; }; @@ -345,26 +357,27 @@ void CompilationHelper::Compile(CompilerDriver* driver, void ImageTest::Compile(ImageHeader::StorageMode storage_mode, CompilationHelper& helper, const std::string& extra_dex, - const std::string& image_class) { - if (!image_class.empty()) { + const std::initializer_list<std::string>& image_classes) { + for (const std::string& image_class : image_classes) { image_classes_.insert(image_class); } CreateCompilerDriver(Compiler::kOptimizing, kRuntimeISA, kIsTargetBuild ? 2U : 16U); // Set inline filter values. - compiler_options_->SetInlineDepthLimit(CompilerOptions::kDefaultInlineDepthLimit); compiler_options_->SetInlineMaxCodeUnits(CompilerOptions::kDefaultInlineMaxCodeUnits); image_classes_.clear(); if (!extra_dex.empty()) { helper.extra_dex_files = OpenTestDexFiles(extra_dex.c_str()); } helper.Compile(compiler_driver_.get(), storage_mode); - if (!image_class.empty()) { + if (image_classes.begin() != image_classes.end()) { // Make sure the class got initialized. ScopedObjectAccess soa(Thread::Current()); ClassLinker* const class_linker = Runtime::Current()->GetClassLinker(); - mirror::Class* klass = class_linker->FindSystemClass(Thread::Current(), image_class.c_str()); - EXPECT_TRUE(klass != nullptr); - EXPECT_TRUE(klass->IsInitialized()); + for (const std::string& image_class : image_classes) { + mirror::Class* klass = class_linker->FindSystemClass(Thread::Current(), image_class.c_str()); + EXPECT_TRUE(klass != nullptr); + EXPECT_TRUE(klass->IsInitialized()); + } } } @@ -492,7 +505,7 @@ TEST_F(ImageTest, TestImageLayout) { // Compile multi-image with ImageLayoutA being the last image. { CompilationHelper helper; - Compile(ImageHeader::kStorageModeUncompressed, helper, "ImageLayoutA", "LMyClass;"); + Compile(ImageHeader::kStorageModeUncompressed, helper, "ImageLayoutA", {"LMyClass;"}); image_sizes = helper.GetImageObjectSectionSizes(); } TearDown(); @@ -501,7 +514,7 @@ TEST_F(ImageTest, TestImageLayout) { // Compile multi-image with ImageLayoutB being the last image. { CompilationHelper helper; - Compile(ImageHeader::kStorageModeUncompressed, helper, "ImageLayoutB", "LMyClass;"); + Compile(ImageHeader::kStorageModeUncompressed, helper, "ImageLayoutB", {"LMyClass;"}); image_sizes_extra = helper.GetImageObjectSectionSizes(); } // Make sure that the new stuff in the clinit in ImageLayoutB is in the last image and not in the @@ -553,4 +566,63 @@ TEST_F(ImageTest, ImageHeaderIsValid) { ASSERT_FALSE(image_header.IsValid()); } +// Test that pointer to quick code is the same in +// a default method of an interface and in a copied method +// of a class which implements the interface. This should be true +// only if the copied method and the origin method are located in the +// same oat file. +TEST_F(ImageTest, TestDefaultMethods) { + CompilationHelper helper; + Compile(ImageHeader::kStorageModeUncompressed, + helper, + "DefaultMethods", + {"LIface;", "LImpl;", "LIterableBase;"}); + + PointerSize pointer_size = class_linker_->GetImagePointerSize(); + Thread* self = Thread::Current(); + ScopedObjectAccess soa(self); + + // Test the pointer to quick code is the same in origin method + // and in the copied method form the same oat file. + mirror::Class* iface_klass = class_linker_->LookupClass( + self, "LIface;", ObjPtr<mirror::ClassLoader>()); + ASSERT_NE(nullptr, iface_klass); + ArtMethod* origin = iface_klass->FindDeclaredVirtualMethod( + "defaultMethod", "()V", pointer_size); + ASSERT_NE(nullptr, origin); + const void* code = origin->GetEntryPointFromQuickCompiledCodePtrSize(pointer_size); + // The origin method should have a pointer to quick code + ASSERT_NE(nullptr, code); + ASSERT_FALSE(class_linker_->IsQuickToInterpreterBridge(code)); + mirror::Class* impl_klass = class_linker_->LookupClass( + self, "LImpl;", ObjPtr<mirror::ClassLoader>()); + ASSERT_NE(nullptr, impl_klass); + ArtMethod* copied = FindCopiedMethod(origin, impl_klass); + ASSERT_NE(nullptr, copied); + // the copied method should have pointer to the same quick code as the origin method + ASSERT_EQ(code, copied->GetEntryPointFromQuickCompiledCodePtrSize(pointer_size)); + + // Test the origin method has pointer to quick code + // but the copied method has pointer to interpreter + // because these methods are in different oat files. + mirror::Class* iterable_klass = class_linker_->LookupClass( + self, "Ljava/lang/Iterable;", ObjPtr<mirror::ClassLoader>()); + ASSERT_NE(nullptr, iterable_klass); + origin = iterable_klass->FindDeclaredVirtualMethod( + "forEach", "(Ljava/util/function/Consumer;)V", pointer_size); + ASSERT_NE(nullptr, origin); + code = origin->GetEntryPointFromQuickCompiledCodePtrSize(pointer_size); + // the origin method should have a pointer to quick code + ASSERT_NE(nullptr, code); + ASSERT_FALSE(class_linker_->IsQuickToInterpreterBridge(code)); + mirror::Class* iterablebase_klass = class_linker_->LookupClass( + self, "LIterableBase;", ObjPtr<mirror::ClassLoader>()); + ASSERT_NE(nullptr, iterablebase_klass); + copied = FindCopiedMethod(origin, iterablebase_klass); + ASSERT_NE(nullptr, copied); + code = copied->GetEntryPointFromQuickCompiledCodePtrSize(pointer_size); + // the copied method should have a pointer to interpreter + ASSERT_TRUE(class_linker_->IsQuickToInterpreterBridge(code)); +} + } // namespace art diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc index aefdb548ff..d156644484 100644 --- a/compiler/image_writer.cc +++ b/compiler/image_writer.cc @@ -714,7 +714,8 @@ void ImageWriter::ComputeLazyFieldsForImageClasses() { class_linker->VisitClassesWithoutClassesLock(&visitor); } -static bool IsBootClassLoaderClass(mirror::Class* klass) REQUIRES_SHARED(Locks::mutator_lock_) { +static bool IsBootClassLoaderClass(ObjPtr<mirror::Class> klass) + REQUIRES_SHARED(Locks::mutator_lock_) { return klass->GetClassLoader() == nullptr; } @@ -722,33 +723,33 @@ bool ImageWriter::IsBootClassLoaderNonImageClass(mirror::Class* klass) { return IsBootClassLoaderClass(klass) && !IsInBootImage(klass); } -bool ImageWriter::PruneAppImageClass(mirror::Class* klass) { +bool ImageWriter::PruneAppImageClass(ObjPtr<mirror::Class> klass) { bool early_exit = false; std::unordered_set<mirror::Class*> visited; return PruneAppImageClassInternal(klass, &early_exit, &visited); } bool ImageWriter::PruneAppImageClassInternal( - mirror::Class* klass, + ObjPtr<mirror::Class> klass, bool* early_exit, std::unordered_set<mirror::Class*>* visited) { DCHECK(early_exit != nullptr); DCHECK(visited != nullptr); DCHECK(compile_app_image_); - if (klass == nullptr || IsInBootImage(klass)) { + if (klass == nullptr || IsInBootImage(klass.Ptr())) { return false; } - auto found = prune_class_memo_.find(klass); + auto found = prune_class_memo_.find(klass.Ptr()); if (found != prune_class_memo_.end()) { // Already computed, return the found value. return found->second; } // Circular dependencies, return false but do not store the result in the memoization table. - if (visited->find(klass) != visited->end()) { + if (visited->find(klass.Ptr()) != visited->end()) { *early_exit = true; return false; } - visited->emplace(klass); + visited->emplace(klass.Ptr()); bool result = IsBootClassLoaderClass(klass); std::string temp; // Prune if not an image class, this handles any broken sets of image classes such as having a @@ -812,20 +813,20 @@ bool ImageWriter::PruneAppImageClassInternal( dex_file_oat_index_map_.find(dex_cache->GetDexFile()) == dex_file_oat_index_map_.end(); } // Erase the element we stored earlier since we are exiting the function. - auto it = visited->find(klass); + auto it = visited->find(klass.Ptr()); DCHECK(it != visited->end()); visited->erase(it); // Only store result if it is true or none of the calls early exited due to circular // dependencies. If visited is empty then we are the root caller, in this case the cycle was in // a child call and we can remember the result. if (result == true || !my_early_exit || visited->empty()) { - prune_class_memo_[klass] = result; + prune_class_memo_[klass.Ptr()] = result; } *early_exit |= my_early_exit; return result; } -bool ImageWriter::KeepClass(Class* klass) { +bool ImageWriter::KeepClass(ObjPtr<mirror::Class> klass) { if (klass == nullptr) { return false; } @@ -896,15 +897,27 @@ class ImageWriter::PruneClassLoaderClassesVisitor : public ClassLoaderVisitor { Runtime::Current()->GetClassLinker()->ClassTableForClassLoader(class_loader); class_table->Visit(classes_visitor); removed_class_count_ += classes_visitor.Prune(); + + // Record app image class loader. The fake boot class loader should not get registered + // and we should end up with only one class loader for an app and none for boot image. + if (class_loader != nullptr && class_table != nullptr) { + DCHECK(class_loader_ == nullptr); + class_loader_ = class_loader; + } } size_t GetRemovedClassCount() const { return removed_class_count_; } + ObjPtr<mirror::ClassLoader> GetClassLoader() const REQUIRES_SHARED(Locks::mutator_lock_) { + return class_loader_; + } + private: ImageWriter* const image_writer_; size_t removed_class_count_; + ObjPtr<mirror::ClassLoader> class_loader_; }; void ImageWriter::VisitClassLoaders(ClassLoaderVisitor* visitor) { @@ -913,70 +926,150 @@ void ImageWriter::VisitClassLoaders(ClassLoaderVisitor* visitor) { Runtime::Current()->GetClassLinker()->VisitClassLoaders(visitor); } +void ImageWriter::PruneAndPreloadDexCache(ObjPtr<mirror::DexCache> dex_cache, + ObjPtr<mirror::ClassLoader> class_loader) { + // To ensure deterministic contents of the hash-based arrays, each slot shall contain + // the candidate with the lowest index. As we're processing entries in increasing index + // order, this means trying to look up the entry for the current index if the slot is + // empty or if it contains a higher index. + + Runtime* runtime = Runtime::Current(); + ClassLinker* class_linker = runtime->GetClassLinker(); + ArtMethod* resolution_method = runtime->GetResolutionMethod(); + const DexFile& dex_file = *dex_cache->GetDexFile(); + // Prune methods. + ArtMethod** resolved_methods = dex_cache->GetResolvedMethods(); + for (size_t i = 0, num = dex_cache->NumResolvedMethods(); i != num; ++i) { + ArtMethod* method = + mirror::DexCache::GetElementPtrSize(resolved_methods, i, target_ptr_size_); + DCHECK(method != nullptr) << "Expected resolution method instead of null method"; + mirror::Class* declaring_class = method->GetDeclaringClass(); + // Copied methods may be held live by a class which was not an image class but have a + // declaring class which is an image class. Set it to the resolution method to be safe and + // prevent dangling pointers. + if (method->IsCopied() || !KeepClass(declaring_class)) { + mirror::DexCache::SetElementPtrSize(resolved_methods, + i, + resolution_method, + target_ptr_size_); + } else if (kIsDebugBuild) { + // Check that the class is still in the classes table. + ReaderMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_); + CHECK(class_linker->ClassInClassTable(declaring_class)) << "Class " + << Class::PrettyClass(declaring_class) << " not in class linker table"; + } + } + // Prune fields and make the contents of the field array deterministic. + mirror::FieldDexCacheType* resolved_fields = dex_cache->GetResolvedFields(); + dex::TypeIndex last_class_idx; // Initialized to invalid index. + ObjPtr<mirror::Class> last_class = nullptr; + for (size_t i = 0, end = dex_file.NumFieldIds(); i < end; ++i) { + uint32_t slot_idx = dex_cache->FieldSlotIndex(i); + auto pair = mirror::DexCache::GetNativePairPtrSize(resolved_fields, slot_idx, target_ptr_size_); + uint32_t stored_index = pair.index; + ArtField* field = pair.object; + if (field != nullptr && i > stored_index) { + continue; // Already checked. + } + // Check if the referenced class is in the image. Note that we want to check the referenced + // class rather than the declaring class to preserve the semantics, i.e. using a FieldId + // results in resolving the referenced class and that can for example throw OOME. + const DexFile::FieldId& field_id = dex_file.GetFieldId(i); + if (field_id.class_idx_ != last_class_idx) { + last_class_idx = field_id.class_idx_; + last_class = class_linker->LookupResolvedType( + dex_file, last_class_idx, dex_cache, class_loader); + if (last_class != nullptr && !KeepClass(last_class)) { + last_class = nullptr; + } + } + if (field == nullptr || i < stored_index) { + if (last_class != nullptr) { + const char* name = dex_file.StringDataByIdx(field_id.name_idx_); + const char* type = dex_file.StringByTypeIdx(field_id.type_idx_); + field = mirror::Class::FindField(Thread::Current(), last_class, name, type); + if (field != nullptr) { + // If the referenced class is in the image, the defining class must also be there. + DCHECK(KeepClass(field->GetDeclaringClass())); + dex_cache->SetResolvedField(i, field, target_ptr_size_); + } + } + } else { + DCHECK_EQ(i, stored_index); + if (last_class == nullptr) { + dex_cache->ClearResolvedField(stored_index, target_ptr_size_); + } + } + } + // Prune types and make the contents of the type array deterministic. + // This is done after fields and methods as their lookup can touch the types array. + for (size_t i = 0, end = dex_cache->GetDexFile()->NumTypeIds(); i < end; ++i) { + dex::TypeIndex type_idx(i); + uint32_t slot_idx = dex_cache->TypeSlotIndex(type_idx); + mirror::TypeDexCachePair pair = + dex_cache->GetResolvedTypes()[slot_idx].load(std::memory_order_relaxed); + uint32_t stored_index = pair.index; + ObjPtr<mirror::Class> klass = pair.object.Read(); + if (klass == nullptr || i < stored_index) { + klass = class_linker->LookupResolvedType(dex_file, type_idx, dex_cache, class_loader); + if (klass != nullptr) { + DCHECK_EQ(dex_cache->GetResolvedType(type_idx), klass); + stored_index = i; // For correct clearing below if not keeping the `klass`. + } + } else if (i == stored_index && !KeepClass(klass)) { + dex_cache->ClearResolvedType(dex::TypeIndex(stored_index)); + } + } + // Strings do not need pruning, but the contents of the string array must be deterministic. + for (size_t i = 0, end = dex_cache->GetDexFile()->NumStringIds(); i < end; ++i) { + dex::StringIndex string_idx(i); + uint32_t slot_idx = dex_cache->StringSlotIndex(string_idx); + mirror::StringDexCachePair pair = + dex_cache->GetStrings()[slot_idx].load(std::memory_order_relaxed); + uint32_t stored_index = pair.index; + ObjPtr<mirror::String> string = pair.object.Read(); + if (string == nullptr || i < stored_index) { + string = class_linker->LookupString(dex_file, string_idx, dex_cache); + DCHECK(string == nullptr || dex_cache->GetResolvedString(string_idx) == string); + } + } +} + void ImageWriter::PruneNonImageClasses() { Runtime* runtime = Runtime::Current(); ClassLinker* class_linker = runtime->GetClassLinker(); Thread* self = Thread::Current(); + ScopedAssertNoThreadSuspension sa(__FUNCTION__); // Clear class table strong roots so that dex caches can get pruned. We require pruning the class // path dex caches. class_linker->ClearClassTableStrongRoots(); // Remove the undesired classes from the class roots. + ObjPtr<mirror::ClassLoader> class_loader; { PruneClassLoaderClassesVisitor class_loader_visitor(this); VisitClassLoaders(&class_loader_visitor); VLOG(compiler) << "Pruned " << class_loader_visitor.GetRemovedClassCount() << " classes"; + class_loader = class_loader_visitor.GetClassLoader(); + DCHECK_EQ(class_loader != nullptr, compile_app_image_); } // Clear references to removed classes from the DexCaches. - ArtMethod* resolution_method = runtime->GetResolutionMethod(); - - ScopedAssertNoThreadSuspension sa(__FUNCTION__); - ReaderMutexLock mu(self, *Locks::classlinker_classes_lock_); // For ClassInClassTable - ReaderMutexLock mu2(self, *Locks::dex_lock_); - for (const ClassLinker::DexCacheData& data : class_linker->GetDexCachesData()) { - if (self->IsJWeakCleared(data.weak_root)) { - continue; - } - ObjPtr<mirror::DexCache> dex_cache = self->DecodeJObject(data.weak_root)->AsDexCache(); - for (size_t i = 0; i < dex_cache->NumResolvedTypes(); i++) { - mirror::TypeDexCachePair pair = - dex_cache->GetResolvedTypes()[i].load(std::memory_order_relaxed); - mirror::Class* klass = pair.object.Read(); - if (klass != nullptr && !KeepClass(klass)) { - dex_cache->ClearResolvedType(dex::TypeIndex(pair.index)); - } - } - ArtMethod** resolved_methods = dex_cache->GetResolvedMethods(); - for (size_t i = 0, num = dex_cache->NumResolvedMethods(); i != num; ++i) { - ArtMethod* method = - mirror::DexCache::GetElementPtrSize(resolved_methods, i, target_ptr_size_); - DCHECK(method != nullptr) << "Expected resolution method instead of null method"; - mirror::Class* declaring_class = method->GetDeclaringClass(); - // Copied methods may be held live by a class which was not an image class but have a - // declaring class which is an image class. Set it to the resolution method to be safe and - // prevent dangling pointers. - if (method->IsCopied() || !KeepClass(declaring_class)) { - mirror::DexCache::SetElementPtrSize(resolved_methods, - i, - resolution_method, - target_ptr_size_); - } else { - // Check that the class is still in the classes table. - DCHECK(class_linker->ClassInClassTable(declaring_class)) << "Class " - << Class::PrettyClass(declaring_class) << " not in class linker table"; - } - } - mirror::FieldDexCacheType* resolved_fields = dex_cache->GetResolvedFields(); - for (size_t i = 0; i < dex_cache->NumResolvedFields(); i++) { - auto pair = mirror::DexCache::GetNativePairPtrSize(resolved_fields, i, target_ptr_size_); - ArtField* field = pair.object; - if (field != nullptr && !KeepClass(field->GetDeclaringClass().Ptr())) { - dex_cache->ClearResolvedField(pair.index, target_ptr_size_); + std::vector<ObjPtr<mirror::DexCache>> dex_caches; + { + ReaderMutexLock mu2(self, *Locks::dex_lock_); + dex_caches.reserve(class_linker->GetDexCachesData().size()); + for (const ClassLinker::DexCacheData& data : class_linker->GetDexCachesData()) { + if (self->IsJWeakCleared(data.weak_root)) { + continue; } + dex_caches.push_back(self->DecodeJObject(data.weak_root)->AsDexCache()); } } + for (ObjPtr<mirror::DexCache> dex_cache : dex_caches) { + PruneAndPreloadDexCache(dex_cache, class_loader); + } // Drop the array class cache in the ClassLinker, as these are roots holding those classes live. class_linker->DropFindArrayClassCache(); diff --git a/compiler/image_writer.h b/compiler/image_writer.h index bdc7146632..16aff61dab 100644 --- a/compiler/image_writer.h +++ b/compiler/image_writer.h @@ -376,7 +376,7 @@ class ImageWriter FINAL { } // Returns true if the class was in the original requested image classes list. - bool KeepClass(mirror::Class* klass) REQUIRES_SHARED(Locks::mutator_lock_); + bool KeepClass(ObjPtr<mirror::Class> klass) REQUIRES_SHARED(Locks::mutator_lock_); // Debug aid that list of requested image classes. void DumpImageClasses(); @@ -391,6 +391,12 @@ class ImageWriter FINAL { // Remove unwanted classes from various roots. void PruneNonImageClasses() REQUIRES_SHARED(Locks::mutator_lock_); + // Remove unwanted classes from the DexCache roots and preload deterministic DexCache contents. + void PruneAndPreloadDexCache(ObjPtr<mirror::DexCache> dex_cache, + ObjPtr<mirror::ClassLoader> class_loader) + REQUIRES_SHARED(Locks::mutator_lock_) + REQUIRES(!Locks::classlinker_classes_lock_); + // Verify unwanted classes removed. void CheckNonImageClassesRemoved() REQUIRES_SHARED(Locks::mutator_lock_); static void CheckNonImageClassesRemovedCallback(mirror::Object* obj, void* arg) @@ -473,11 +479,11 @@ class ImageWriter FINAL { // we also cannot have any classes which refer to these boot class loader non image classes. // PruneAppImageClass also prunes if klass depends on a non-image class according to the compiler // driver. - bool PruneAppImageClass(mirror::Class* klass) + bool PruneAppImageClass(ObjPtr<mirror::Class> klass) REQUIRES_SHARED(Locks::mutator_lock_); // early_exit is true if we had a cyclic dependency anywhere down the chain. - bool PruneAppImageClassInternal(mirror::Class* klass, + bool PruneAppImageClassInternal(ObjPtr<mirror::Class> klass, bool* early_exit, std::unordered_set<mirror::Class*>* visited) REQUIRES_SHARED(Locks::mutator_lock_); diff --git a/compiler/jit/jit_compiler.cc b/compiler/jit/jit_compiler.cc index 3ae7974038..ad951bcc3f 100644 --- a/compiler/jit/jit_compiler.cc +++ b/compiler/jit/jit_compiler.cc @@ -97,7 +97,6 @@ JitCompiler::JitCompiler() { CompilerOptions::kDefaultSmallMethodThreshold, CompilerOptions::kDefaultTinyMethodThreshold, CompilerOptions::kDefaultNumDexMethodsThreshold, - CompilerOptions::kDefaultInlineDepthLimit, CompilerOptions::kDefaultInlineMaxCodeUnits, /* no_inline_from */ nullptr, CompilerOptions::kDefaultTopKProfileThreshold, @@ -177,10 +176,6 @@ JitCompiler::JitCompiler() { jit_logger_.reset(new JitLogger()); jit_logger_->OpenLog(); } - - size_t inline_depth_limit = compiler_driver_->GetCompilerOptions().GetInlineDepthLimit(); - DCHECK_LT(thread_count * inline_depth_limit, std::numeric_limits<uint16_t>::max()) - << "ProfilingInfo's inline counter can potentially overflow"; } JitCompiler::~JitCompiler() { diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc index 5406ae72d1..105db1d2d0 100644 --- a/compiler/oat_writer.cc +++ b/compiler/oat_writer.cc @@ -1034,18 +1034,63 @@ class OatWriter::InitMethodInfoVisitor : public OatDexMethodVisitor { class OatWriter::InitImageMethodVisitor : public OatDexMethodVisitor { public: - InitImageMethodVisitor(OatWriter* writer, size_t offset) + InitImageMethodVisitor(OatWriter* writer, + size_t offset, + const std::vector<const DexFile*>* dex_files) : OatDexMethodVisitor(writer, offset), - pointer_size_(GetInstructionSetPointerSize(writer_->compiler_driver_->GetInstructionSet())) { + pointer_size_(GetInstructionSetPointerSize(writer_->compiler_driver_->GetInstructionSet())), + dex_files_(dex_files), + class_linker_(Runtime::Current()->GetClassLinker()) { + } + + // Handle copied methods here. Copy pointer to quick code from + // an origin method to a copied method only if they are + // in the same oat file. If the origin and the copied methods are + // in different oat files don't touch the copied method. + // References to other oat files are not supported yet. + bool StartClass(const DexFile* dex_file, size_t class_def_index) + REQUIRES_SHARED(Locks::mutator_lock_) { + OatDexMethodVisitor::StartClass(dex_file, class_def_index); + // Skip classes that are not in the image. + if (!IsImageClass()) { + return true; + } + ScopedObjectAccessUnchecked soa(Thread::Current()); + StackHandleScope<1> hs(soa.Self()); + Handle<mirror::DexCache> dex_cache = hs.NewHandle( + class_linker_->FindDexCache(Thread::Current(), *dex_file)); + const DexFile::ClassDef& class_def = dex_file->GetClassDef(class_def_index); + mirror::Class* klass = dex_cache->GetResolvedType(class_def.class_idx_); + if (klass != nullptr) { + for (ArtMethod& method : klass->GetCopiedMethods(pointer_size_)) { + // Find origin method. Declaring class and dex_method_idx + // in the copied method should be the same as in the origin + // method. + mirror::Class* declaring_class = method.GetDeclaringClass(); + ArtMethod* origin = declaring_class->FindDeclaredVirtualMethod( + declaring_class->GetDexCache(), + method.GetDexMethodIndex(), + pointer_size_); + CHECK(origin != nullptr); + if (IsInOatFile(&declaring_class->GetDexFile())) { + const void* code_ptr = + origin->GetEntryPointFromQuickCompiledCodePtrSize(pointer_size_); + if (code_ptr == nullptr) { + methods_to_process_.push_back(std::make_pair(&method, origin)); + } else { + method.SetEntryPointFromQuickCompiledCodePtrSize( + code_ptr, pointer_size_); + } + } + } + } + return true; } bool VisitMethod(size_t class_def_method_index, const ClassDataItemIterator& it) REQUIRES_SHARED(Locks::mutator_lock_) { - const DexFile::TypeId& type_id = - dex_file_->GetTypeId(dex_file_->GetClassDef(class_def_index_).class_idx_); - const char* class_descriptor = dex_file_->GetTypeDescriptor(type_id); // Skip methods that are not in the image. - if (!writer_->GetCompilerDriver()->IsImageClass(class_descriptor)) { + if (!IsImageClass()) { return true; } @@ -1059,17 +1104,16 @@ class OatWriter::InitImageMethodVisitor : public OatDexMethodVisitor { ++method_offsets_index_; } - ClassLinker* linker = Runtime::Current()->GetClassLinker(); // Unchecked as we hold mutator_lock_ on entry. ScopedObjectAccessUnchecked soa(Thread::Current()); StackHandleScope<1> hs(soa.Self()); - Handle<mirror::DexCache> dex_cache(hs.NewHandle(linker->FindDexCache( + Handle<mirror::DexCache> dex_cache(hs.NewHandle(class_linker_->FindDexCache( Thread::Current(), *dex_file_))); ArtMethod* method; if (writer_->HasBootImage()) { const InvokeType invoke_type = it.GetMethodInvokeType( dex_file_->GetClassDef(class_def_index_)); - method = linker->ResolveMethod<ClassLinker::kNoICCECheckForCache>( + method = class_linker_->ResolveMethod<ClassLinker::kNoICCECheckForCache>( *dex_file_, it.GetMemberIndex(), dex_cache, @@ -1089,7 +1133,8 @@ class OatWriter::InitImageMethodVisitor : public OatDexMethodVisitor { // Should already have been resolved by the compiler, just peek into the dex cache. // It may not be resolved if the class failed to verify, in this case, don't set the // entrypoint. This is not fatal since the dex cache will contain a resolution method. - method = dex_cache->GetResolvedMethod(it.GetMemberIndex(), linker->GetImagePointerSize()); + method = dex_cache->GetResolvedMethod(it.GetMemberIndex(), + class_linker_->GetImagePointerSize()); } if (method != nullptr && compiled_method != nullptr && @@ -1101,8 +1146,38 @@ class OatWriter::InitImageMethodVisitor : public OatDexMethodVisitor { return true; } + // Check whether current class is image class + bool IsImageClass() { + const DexFile::TypeId& type_id = + dex_file_->GetTypeId(dex_file_->GetClassDef(class_def_index_).class_idx_); + const char* class_descriptor = dex_file_->GetTypeDescriptor(type_id); + return writer_->GetCompilerDriver()->IsImageClass(class_descriptor); + } + + // Check whether specified dex file is in the compiled oat file. + bool IsInOatFile(const DexFile* dex_file) { + return ContainsElement(*dex_files_, dex_file); + } + + // Assign a pointer to quick code for copied methods + // not handled in the method StartClass + void Postprocess() { + for (std::pair<ArtMethod*, ArtMethod*>& p : methods_to_process_) { + ArtMethod* method = p.first; + ArtMethod* origin = p.second; + const void* code_ptr = + origin->GetEntryPointFromQuickCompiledCodePtrSize(pointer_size_); + if (code_ptr != nullptr) { + method->SetEntryPointFromQuickCompiledCodePtrSize(code_ptr, pointer_size_); + } + } + } + protected: const PointerSize pointer_size_; + const std::vector<const DexFile*>* dex_files_; + ClassLinker* const class_linker_; + std::vector<std::pair<ArtMethod*, ArtMethod*>> methods_to_process_; }; class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor { @@ -1365,12 +1440,10 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor { mirror::String* GetTargetString(const LinkerPatch& patch) REQUIRES_SHARED(Locks::mutator_lock_) { ScopedObjectAccessUnchecked soa(Thread::Current()); - StackHandleScope<1> hs(soa.Self()); ClassLinker* linker = Runtime::Current()->GetClassLinker(); - Handle<mirror::DexCache> dex_cache(hs.NewHandle(GetDexCache(patch.TargetStringDexFile()))); mirror::String* string = linker->LookupString(*patch.TargetStringDexFile(), patch.TargetStringIndex(), - dex_cache); + GetDexCache(patch.TargetStringDexFile())); DCHECK(string != nullptr); DCHECK(writer_->HasBootImage() || Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(string)); @@ -1744,8 +1817,9 @@ size_t OatWriter::InitOatCodeDexFiles(size_t offset) { offset = code_visitor.GetOffset(); if (HasImage()) { - InitImageMethodVisitor image_visitor(this, offset); + InitImageMethodVisitor image_visitor(this, offset, dex_files_); success = VisitDexMethods(&image_visitor); + image_visitor.Postprocess(); DCHECK(success); offset = image_visitor.GetOffset(); } diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index e34f116b75..caea250ab6 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -1875,6 +1875,7 @@ static bool CanGenerateConditionalMove(const Location& out, const Location& src) Label* CodeGeneratorARM::GetFinalLabel(HInstruction* instruction, Label* final_label) { DCHECK(!instruction->IsControlFlow() && !instruction->IsSuspendCheck()); + DCHECK(!instruction->IsInvoke() || !instruction->GetLocations()->CanCall()); const HBasicBlock* const block = instruction->GetBlock(); const HLoopInformation* const info = block->GetLoopInformation(); @@ -2901,16 +2902,20 @@ void InstructionCodeGeneratorARM::HandleCondition(HCondition* cond) { // Convert the jumps into the result. Label done_label; + Label* final_label = codegen_->GetFinalLabel(cond, &done_label); // False case: result = 0. __ Bind(&false_label); __ LoadImmediate(out, 0); - __ b(&done_label); + __ b(final_label); // True case: result = 1. __ Bind(&true_label); __ LoadImmediate(out, 1); - __ Bind(&done_label); + + if (done_label.IsLinked()) { + __ Bind(&done_label); + } } void LocationsBuilderARM::VisitEqual(HEqual* comp) { @@ -4441,7 +4446,8 @@ void InstructionCodeGeneratorARM::HandleIntegerRotate(LocationSummary* locations // rotates by swapping input regs (effectively rotating by the first 32-bits of // a larger rotation) or flipping direction (thus treating larger right/left // rotations as sub-word sized rotations in the other direction) as appropriate. -void InstructionCodeGeneratorARM::HandleLongRotate(LocationSummary* locations) { +void InstructionCodeGeneratorARM::HandleLongRotate(HRor* ror) { + LocationSummary* locations = ror->GetLocations(); Register in_reg_lo = locations->InAt(0).AsRegisterPairLow<Register>(); Register in_reg_hi = locations->InAt(0).AsRegisterPairHigh<Register>(); Location rhs = locations->InAt(1); @@ -4474,6 +4480,7 @@ void InstructionCodeGeneratorARM::HandleLongRotate(LocationSummary* locations) { Register shift_left = locations->GetTemp(1).AsRegister<Register>(); Label end; Label shift_by_32_plus_shift_right; + Label* final_label = codegen_->GetFinalLabel(ror, &end); __ and_(shift_right, rhs.AsRegister<Register>(), ShifterOperand(0x1F)); __ Lsrs(shift_left, rhs.AsRegister<Register>(), 6); @@ -4488,7 +4495,7 @@ void InstructionCodeGeneratorARM::HandleLongRotate(LocationSummary* locations) { __ Lsl(out_reg_lo, in_reg_lo, shift_left); __ Lsr(shift_left, in_reg_hi, shift_right); __ add(out_reg_lo, out_reg_lo, ShifterOperand(shift_left)); - __ b(&end); + __ b(final_label); __ Bind(&shift_by_32_plus_shift_right); // Shift by 32+shift_right. // out_reg_hi = (reg_hi >> shift_right) | (reg_lo << shift_left). @@ -4500,7 +4507,9 @@ void InstructionCodeGeneratorARM::HandleLongRotate(LocationSummary* locations) { __ Lsl(shift_right, in_reg_hi, shift_left); __ add(out_reg_lo, out_reg_lo, ShifterOperand(shift_right)); - __ Bind(&end); + if (end.IsLinked()) { + __ Bind(&end); + } } } @@ -4540,7 +4549,7 @@ void InstructionCodeGeneratorARM::VisitRor(HRor* ror) { break; } case Primitive::kPrimLong: { - HandleLongRotate(locations); + HandleLongRotate(ror); break; } default: @@ -4919,6 +4928,7 @@ void InstructionCodeGeneratorARM::VisitCompare(HCompare* compare) { Location right = locations->InAt(1); Label less, greater, done; + Label* final_label = codegen_->GetFinalLabel(compare, &done); Primitive::Type type = compare->InputAt(0)->GetType(); Condition less_cond; switch (type) { @@ -4958,17 +4968,19 @@ void InstructionCodeGeneratorARM::VisitCompare(HCompare* compare) { UNREACHABLE(); } - __ b(&done, EQ); + __ b(final_label, EQ); __ b(&less, less_cond); __ Bind(&greater); __ LoadImmediate(out, 1); - __ b(&done); + __ b(final_label); __ Bind(&less); __ LoadImmediate(out, -1); - __ Bind(&done); + if (done.IsLinked()) { + __ Bind(&done); + } } void LocationsBuilderARM::VisitPhi(HPhi* instruction) { @@ -5746,6 +5758,7 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) { int32_t const_index = index.GetConstant()->AsIntConstant()->GetValue(); if (maybe_compressed_char_at) { Label uncompressed_load, done; + Label* final_label = codegen_->GetFinalLabel(instruction, &done); __ Lsrs(length, length, 1u); // LSRS has a 16-bit encoding, TST (immediate) does not. static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, "Expecting 0=compressed, 1=uncompressed"); @@ -5754,13 +5767,15 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) { out_loc.AsRegister<Register>(), obj, data_offset + const_index); - __ b(&done); + __ b(final_label); __ Bind(&uncompressed_load); __ LoadFromOffset(GetLoadOperandType(Primitive::kPrimChar), out_loc.AsRegister<Register>(), obj, data_offset + (const_index << 1)); - __ Bind(&done); + if (done.IsLinked()) { + __ Bind(&done); + } } else { uint32_t full_offset = data_offset + (const_index << Primitive::ComponentSizeShift(type)); @@ -5784,17 +5799,20 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) { } if (maybe_compressed_char_at) { Label uncompressed_load, done; + Label* final_label = codegen_->GetFinalLabel(instruction, &done); __ Lsrs(length, length, 1u); // LSRS has a 16-bit encoding, TST (immediate) does not. static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, "Expecting 0=compressed, 1=uncompressed"); __ b(&uncompressed_load, CS); __ ldrb(out_loc.AsRegister<Register>(), Address(temp, index.AsRegister<Register>(), Shift::LSL, 0)); - __ b(&done); + __ b(final_label); __ Bind(&uncompressed_load); __ ldrh(out_loc.AsRegister<Register>(), Address(temp, index.AsRegister<Register>(), Shift::LSL, 1)); - __ Bind(&done); + if (done.IsLinked()) { + __ Bind(&done); + } } else { codegen_->LoadFromShiftedRegOffset(type, out_loc, temp, index.AsRegister<Register>()); } @@ -6019,6 +6037,7 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) { uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); Label done; + Label* final_label = codegen_->GetFinalLabel(instruction, &done); SlowPathCodeARM* slow_path = nullptr; if (may_need_runtime_call_for_type_check) { @@ -6040,7 +6059,7 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) { index.AsRegister<Register>()); } codegen_->MaybeRecordImplicitNullCheck(instruction); - __ b(&done); + __ b(final_label); __ Bind(&non_zero); } @@ -7021,6 +7040,7 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) { uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); Label done, zero; + Label* final_label = codegen_->GetFinalLabel(instruction, &done); SlowPathCodeARM* slow_path = nullptr; // Return 0 if `obj` is null. @@ -7042,7 +7062,7 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) { // Classes must be equal for the instanceof to succeed. __ b(&zero, NE); __ LoadImmediate(out, 1); - __ b(&done); + __ b(final_label); break; } @@ -7065,12 +7085,12 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) { maybe_temp_loc, kCompilerReadBarrierOption); // If `out` is null, we use it for the result, and jump to `done`. - __ CompareAndBranchIfZero(out, &done); + __ CompareAndBranchIfZero(out, final_label); __ cmp(out, ShifterOperand(cls)); __ b(&loop, NE); __ LoadImmediate(out, 1); if (zero.IsLinked()) { - __ b(&done); + __ b(final_label); } break; } @@ -7096,11 +7116,11 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) { kCompilerReadBarrierOption); __ CompareAndBranchIfNonZero(out, &loop); // If `out` is null, we use it for the result, and jump to `done`. - __ b(&done); + __ b(final_label); __ Bind(&success); __ LoadImmediate(out, 1); if (zero.IsLinked()) { - __ b(&done); + __ b(final_label); } break; } @@ -7125,13 +7145,13 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) { maybe_temp_loc, kCompilerReadBarrierOption); // If `out` is null, we use it for the result, and jump to `done`. - __ CompareAndBranchIfZero(out, &done); + __ CompareAndBranchIfZero(out, final_label); __ LoadFromOffset(kLoadUnsignedHalfword, out, out, primitive_offset); static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); __ CompareAndBranchIfNonZero(out, &zero); __ Bind(&exact_check); __ LoadImmediate(out, 1); - __ b(&done); + __ b(final_label); break; } @@ -7152,7 +7172,7 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) { __ b(slow_path->GetEntryLabel(), NE); __ LoadImmediate(out, 1); if (zero.IsLinked()) { - __ b(&done); + __ b(final_label); } break; } @@ -7183,7 +7203,7 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) { codegen_->AddSlowPath(slow_path); __ b(slow_path->GetEntryLabel()); if (zero.IsLinked()) { - __ b(&done); + __ b(final_label); } break; } @@ -7269,9 +7289,10 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) { codegen_->AddSlowPath(type_check_slow_path); Label done; + Label* final_label = codegen_->GetFinalLabel(instruction, &done); // Avoid null check if we know obj is not null. if (instruction->MustDoNullCheck()) { - __ CompareAndBranchIfZero(obj, &done); + __ CompareAndBranchIfZero(obj, final_label); } switch (type_check_kind) { @@ -7335,7 +7356,7 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) { Label loop; __ Bind(&loop); __ cmp(temp, ShifterOperand(cls)); - __ b(&done, EQ); + __ b(final_label, EQ); // /* HeapReference<Class> */ temp = temp->super_class_ GenerateReferenceLoadOneRegister(instruction, @@ -7363,7 +7384,7 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) { // Do an exact check. __ cmp(temp, ShifterOperand(cls)); - __ b(&done, EQ); + __ b(final_label, EQ); // Otherwise, we need to check that the object's class is a non-primitive array. // /* HeapReference<Class> */ temp = temp->component_type_ @@ -7433,7 +7454,10 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) { break; } } - __ Bind(&done); + + if (done.IsLinked()) { + __ Bind(&done); + } __ Bind(type_check_slow_path->GetExitLabel()); } diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h index 5b15902ccd..59a7f7c048 100644 --- a/compiler/optimizing/code_generator_arm.h +++ b/compiler/optimizing/code_generator_arm.h @@ -237,7 +237,7 @@ class InstructionCodeGeneratorARM : public InstructionCodeGenerator { void HandleBitwiseOperation(HBinaryOperation* operation); void HandleCondition(HCondition* condition); void HandleIntegerRotate(LocationSummary* locations); - void HandleLongRotate(LocationSummary* locations); + void HandleLongRotate(HRor* ror); void HandleShift(HBinaryOperation* operation); void GenerateWideAtomicStore(Register addr, uint32_t offset, diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc index d75779cef6..2d2d8109a3 100644 --- a/compiler/optimizing/code_generator_arm_vixl.cc +++ b/compiler/optimizing/code_generator_arm_vixl.cc @@ -1950,6 +1950,7 @@ static bool CanGenerateConditionalMove(const Location& out, const Location& src) vixl32::Label* CodeGeneratorARMVIXL::GetFinalLabel(HInstruction* instruction, vixl32::Label* final_label) { DCHECK(!instruction->IsControlFlow() && !instruction->IsSuspendCheck()); + DCHECK(!instruction->IsInvoke() || !instruction->GetLocations()->CanCall()); const HBasicBlock* const block = instruction->GetBlock(); const HLoopInformation* const info = block->GetLoopInformation(); @@ -2925,16 +2926,20 @@ void InstructionCodeGeneratorARMVIXL::HandleCondition(HCondition* cond) { // Convert the jumps into the result. vixl32::Label done_label; + vixl32::Label* final_label = codegen_->GetFinalLabel(cond, &done_label); // False case: result = 0. __ Bind(&false_label); __ Mov(out, 0); - __ B(&done_label); + __ B(final_label); // True case: result = 1. __ Bind(&true_label); __ Mov(out, 1); - __ Bind(&done_label); + + if (done_label.IsReferenced()) { + __ Bind(&done_label); + } } void LocationsBuilderARMVIXL::VisitEqual(HEqual* comp) { @@ -4447,6 +4452,7 @@ void InstructionCodeGeneratorARMVIXL::HandleLongRotate(HRor* ror) { vixl32::Register shift_left = RegisterFrom(locations->GetTemp(1)); vixl32::Label end; vixl32::Label shift_by_32_plus_shift_right; + vixl32::Label* final_label = codegen_->GetFinalLabel(ror, &end); __ And(shift_right, RegisterFrom(rhs), 0x1F); __ Lsrs(shift_left, RegisterFrom(rhs), 6); @@ -4461,7 +4467,7 @@ void InstructionCodeGeneratorARMVIXL::HandleLongRotate(HRor* ror) { __ Lsl(out_reg_lo, in_reg_lo, shift_left); __ Lsr(shift_left, in_reg_hi, shift_right); __ Add(out_reg_lo, out_reg_lo, shift_left); - __ B(&end); + __ B(final_label); __ Bind(&shift_by_32_plus_shift_right); // Shift by 32+shift_right. // out_reg_hi = (reg_hi >> shift_right) | (reg_lo << shift_left). @@ -4473,7 +4479,9 @@ void InstructionCodeGeneratorARMVIXL::HandleLongRotate(HRor* ror) { __ Lsl(shift_right, in_reg_hi, shift_left); __ Add(out_reg_lo, out_reg_lo, shift_right); - __ Bind(&end); + if (end.IsReferenced()) { + __ Bind(&end); + } } } @@ -4906,6 +4914,7 @@ void InstructionCodeGeneratorARMVIXL::VisitCompare(HCompare* compare) { Location right = locations->InAt(1); vixl32::Label less, greater, done; + vixl32::Label* final_label = codegen_->GetFinalLabel(compare, &done); Primitive::Type type = compare->InputAt(0)->GetType(); vixl32::Condition less_cond = vixl32::Condition(kNone); switch (type) { @@ -4944,17 +4953,19 @@ void InstructionCodeGeneratorARMVIXL::VisitCompare(HCompare* compare) { UNREACHABLE(); } - __ B(eq, &done, /* far_target */ false); + __ B(eq, final_label, /* far_target */ false); __ B(less_cond, &less, /* far_target */ false); __ Bind(&greater); __ Mov(out, 1); - __ B(&done); + __ B(final_label); __ Bind(&less); __ Mov(out, -1); - __ Bind(&done); + if (done.IsReferenced()) { + __ Bind(&done); + } } void LocationsBuilderARMVIXL::VisitPhi(HPhi* instruction) { @@ -5746,6 +5757,7 @@ void InstructionCodeGeneratorARMVIXL::VisitArrayGet(HArrayGet* instruction) { int32_t const_index = Int32ConstantFrom(index); if (maybe_compressed_char_at) { vixl32::Label uncompressed_load, done; + vixl32::Label* final_label = codegen_->GetFinalLabel(instruction, &done); __ Lsrs(length, length, 1u); // LSRS has a 16-bit encoding, TST (immediate) does not. static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, "Expecting 0=compressed, 1=uncompressed"); @@ -5754,13 +5766,15 @@ void InstructionCodeGeneratorARMVIXL::VisitArrayGet(HArrayGet* instruction) { RegisterFrom(out_loc), obj, data_offset + const_index); - __ B(&done); + __ B(final_label); __ Bind(&uncompressed_load); GetAssembler()->LoadFromOffset(GetLoadOperandType(Primitive::kPrimChar), RegisterFrom(out_loc), obj, data_offset + (const_index << 1)); - __ Bind(&done); + if (done.IsReferenced()) { + __ Bind(&done); + } } else { uint32_t full_offset = data_offset + (const_index << Primitive::ComponentSizeShift(type)); @@ -5785,15 +5799,18 @@ void InstructionCodeGeneratorARMVIXL::VisitArrayGet(HArrayGet* instruction) { } if (maybe_compressed_char_at) { vixl32::Label uncompressed_load, done; + vixl32::Label* final_label = codegen_->GetFinalLabel(instruction, &done); __ Lsrs(length, length, 1u); // LSRS has a 16-bit encoding, TST (immediate) does not. static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, "Expecting 0=compressed, 1=uncompressed"); __ B(cs, &uncompressed_load, /* far_target */ false); __ Ldrb(RegisterFrom(out_loc), MemOperand(temp, RegisterFrom(index), vixl32::LSL, 0)); - __ B(&done); + __ B(final_label); __ Bind(&uncompressed_load); __ Ldrh(RegisterFrom(out_loc), MemOperand(temp, RegisterFrom(index), vixl32::LSL, 1)); - __ Bind(&done); + if (done.IsReferenced()) { + __ Bind(&done); + } } else { codegen_->LoadFromShiftedRegOffset(type, out_loc, temp, RegisterFrom(index)); } @@ -6032,6 +6049,7 @@ void InstructionCodeGeneratorARMVIXL::VisitArraySet(HArraySet* instruction) { uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); vixl32::Label done; + vixl32::Label* final_label = codegen_->GetFinalLabel(instruction, &done); SlowPathCodeARMVIXL* slow_path = nullptr; if (may_need_runtime_call_for_type_check) { @@ -6054,7 +6072,7 @@ void InstructionCodeGeneratorARMVIXL::VisitArraySet(HArraySet* instruction) { // TODO(VIXL): Use a scope to ensure we record the pc info immediately after the preceding // store instruction. codegen_->MaybeRecordImplicitNullCheck(instruction); - __ B(&done); + __ B(final_label); __ Bind(&non_zero); } @@ -7062,6 +7080,7 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); vixl32::Label done, zero; + vixl32::Label* final_label = codegen_->GetFinalLabel(instruction, &done); SlowPathCodeARMVIXL* slow_path = nullptr; // Return 0 if `obj` is null. @@ -7083,7 +7102,7 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) // Classes must be equal for the instanceof to succeed. __ B(ne, &zero, /* far_target */ false); __ Mov(out, 1); - __ B(&done); + __ B(final_label); break; } @@ -7106,12 +7125,12 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) maybe_temp_loc, kCompilerReadBarrierOption); // If `out` is null, we use it for the result, and jump to `done`. - __ CompareAndBranchIfZero(out, &done, /* far_target */ false); + __ CompareAndBranchIfZero(out, final_label, /* far_target */ false); __ Cmp(out, cls); __ B(ne, &loop, /* far_target */ false); __ Mov(out, 1); if (zero.IsReferenced()) { - __ B(&done); + __ B(final_label); } break; } @@ -7137,11 +7156,11 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) kCompilerReadBarrierOption); __ CompareAndBranchIfNonZero(out, &loop); // If `out` is null, we use it for the result, and jump to `done`. - __ B(&done); + __ B(final_label); __ Bind(&success); __ Mov(out, 1); if (zero.IsReferenced()) { - __ B(&done); + __ B(final_label); } break; } @@ -7166,13 +7185,13 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) maybe_temp_loc, kCompilerReadBarrierOption); // If `out` is null, we use it for the result, and jump to `done`. - __ CompareAndBranchIfZero(out, &done, /* far_target */ false); + __ CompareAndBranchIfZero(out, final_label, /* far_target */ false); GetAssembler()->LoadFromOffset(kLoadUnsignedHalfword, out, out, primitive_offset); static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); __ CompareAndBranchIfNonZero(out, &zero, /* far_target */ false); __ Bind(&exact_check); __ Mov(out, 1); - __ B(&done); + __ B(final_label); break; } @@ -7193,7 +7212,7 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) __ B(ne, slow_path->GetEntryLabel()); __ Mov(out, 1); if (zero.IsReferenced()) { - __ B(&done); + __ B(final_label); } break; } @@ -7224,7 +7243,7 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) codegen_->AddSlowPath(slow_path); __ B(slow_path->GetEntryLabel()); if (zero.IsReferenced()) { - __ B(&done); + __ B(final_label); } break; } @@ -7310,9 +7329,10 @@ void InstructionCodeGeneratorARMVIXL::VisitCheckCast(HCheckCast* instruction) { codegen_->AddSlowPath(type_check_slow_path); vixl32::Label done; + vixl32::Label* final_label = codegen_->GetFinalLabel(instruction, &done); // Avoid null check if we know obj is not null. if (instruction->MustDoNullCheck()) { - __ CompareAndBranchIfZero(obj, &done, /* far_target */ false); + __ CompareAndBranchIfZero(obj, final_label, /* far_target */ false); } switch (type_check_kind) { @@ -7376,7 +7396,7 @@ void InstructionCodeGeneratorARMVIXL::VisitCheckCast(HCheckCast* instruction) { vixl32::Label loop; __ Bind(&loop); __ Cmp(temp, cls); - __ B(eq, &done, /* far_target */ false); + __ B(eq, final_label, /* far_target */ false); // /* HeapReference<Class> */ temp = temp->super_class_ GenerateReferenceLoadOneRegister(instruction, @@ -7404,7 +7424,7 @@ void InstructionCodeGeneratorARMVIXL::VisitCheckCast(HCheckCast* instruction) { // Do an exact check. __ Cmp(temp, cls); - __ B(eq, &done, /* far_target */ false); + __ B(eq, final_label, /* far_target */ false); // Otherwise, we need to check that the object's class is a non-primitive array. // /* HeapReference<Class> */ temp = temp->component_type_ @@ -7472,7 +7492,9 @@ void InstructionCodeGeneratorARMVIXL::VisitCheckCast(HCheckCast* instruction) { break; } } - __ Bind(&done); + if (done.IsReferenced()) { + __ Bind(&done); + } __ Bind(type_check_slow_path->GetExitLabel()); } diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 0b50619a66..4db4796985 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -183,10 +183,13 @@ class SuspendCheckSlowPathX86 : public SlowPathCode { : SlowPathCode(instruction), successor_(successor) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + LocationSummary* locations = instruction_->GetLocations(); CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen); __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, locations); // only saves full width XMM for SIMD x86_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this); CheckEntrypointTypes<kQuickTestSuspend, void, void>(); + RestoreLiveRegisters(codegen, locations); // only saves full width XMM for SIMD if (successor_ == nullptr) { __ jmp(GetReturnLabel()); } else { @@ -963,12 +966,20 @@ size_t CodeGeneratorX86::RestoreCoreRegister(size_t stack_index, uint32_t reg_id } size_t CodeGeneratorX86::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) { - __ movsd(Address(ESP, stack_index), XmmRegister(reg_id)); + if (GetGraph()->HasSIMD()) { + __ movups(Address(ESP, stack_index), XmmRegister(reg_id)); + } else { + __ movsd(Address(ESP, stack_index), XmmRegister(reg_id)); + } return GetFloatingPointSpillSlotSize(); } size_t CodeGeneratorX86::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) { - __ movsd(XmmRegister(reg_id), Address(ESP, stack_index)); + if (GetGraph()->HasSIMD()) { + __ movups(XmmRegister(reg_id), Address(ESP, stack_index)); + } else { + __ movsd(XmmRegister(reg_id), Address(ESP, stack_index)); + } return GetFloatingPointSpillSlotSize(); } @@ -5699,7 +5710,11 @@ void InstructionCodeGeneratorX86::VisitParallelMove(HParallelMove* instruction) void LocationsBuilderX86::VisitSuspendCheck(HSuspendCheck* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath); - locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. + // In suspend check slow path, usually there are no caller-save registers at all. + // If SIMD instructions are present, however, we force spilling all live SIMD + // registers in full width (since the runtime only saves/restores lower part). + locations->SetCustomSlowPathCallerSaves( + GetGraph()->HasSIMD() ? RegisterSet::AllFpu() : RegisterSet::Empty()); } void InstructionCodeGeneratorX86::VisitSuspendCheck(HSuspendCheck* instruction) { @@ -5802,9 +5817,11 @@ void ParallelMoveResolverX86::EmitMove(size_t index) { __ movd(destination.AsRegisterPairHigh<Register>(), src_reg); } else if (destination.IsStackSlot()) { __ movss(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>()); - } else { - DCHECK(destination.IsDoubleStackSlot()); + } else if (destination.IsDoubleStackSlot()) { __ movsd(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>()); + } else { + DCHECK(destination.IsSIMDStackSlot()); + __ movups(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>()); } } else if (source.IsStackSlot()) { if (destination.IsRegister()) { @@ -5826,6 +5843,9 @@ void ParallelMoveResolverX86::EmitMove(size_t index) { DCHECK(destination.IsDoubleStackSlot()) << destination; MoveMemoryToMemory64(destination.GetStackIndex(), source.GetStackIndex()); } + } else if (source.IsSIMDStackSlot()) { + DCHECK(destination.IsFpuRegister()); + __ movups(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex())); } else if (source.IsConstant()) { HConstant* constant = source.GetConstant(); if (constant->IsIntConstant() || constant->IsNullConstant()) { diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index 65ee383b54..ca3a9eadd2 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -348,8 +348,9 @@ class CodeGeneratorX86 : public CodeGenerator { } size_t GetFloatingPointSpillSlotSize() const OVERRIDE { - // 8 bytes == 2 words for each spill. - return 2 * kX86WordSize; + return GetGraph()->HasSIMD() + ? 4 * kX86WordSize // 16 bytes == 4 words for each spill + : 2 * kX86WordSize; // 8 bytes == 2 words for each spill } HGraphVisitor* GetLocationBuilder() OVERRIDE { diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 08f1adfcff..2ffc398287 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -140,10 +140,13 @@ class SuspendCheckSlowPathX86_64 : public SlowPathCode { : SlowPathCode(instruction), successor_(successor) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + LocationSummary* locations = instruction_->GetLocations(); CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, locations); // only saves full width XMM for SIMD x86_64_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this); CheckEntrypointTypes<kQuickTestSuspend, void, void>(); + RestoreLiveRegisters(codegen, locations); // only saves full width XMM for SIMD if (successor_ == nullptr) { __ jmp(GetReturnLabel()); } else { @@ -1158,13 +1161,21 @@ size_t CodeGeneratorX86_64::RestoreCoreRegister(size_t stack_index, uint32_t reg } size_t CodeGeneratorX86_64::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) { - __ movsd(Address(CpuRegister(RSP), stack_index), XmmRegister(reg_id)); - return kX86_64WordSize; + if (GetGraph()->HasSIMD()) { + __ movups(Address(CpuRegister(RSP), stack_index), XmmRegister(reg_id)); + } else { + __ movsd(Address(CpuRegister(RSP), stack_index), XmmRegister(reg_id)); + } + return GetFloatingPointSpillSlotSize(); } size_t CodeGeneratorX86_64::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) { - __ movsd(XmmRegister(reg_id), Address(CpuRegister(RSP), stack_index)); - return kX86_64WordSize; + if (GetGraph()->HasSIMD()) { + __ movups(XmmRegister(reg_id), Address(CpuRegister(RSP), stack_index)); + } else { + __ movsd(XmmRegister(reg_id), Address(CpuRegister(RSP), stack_index)); + } + return GetFloatingPointSpillSlotSize(); } void CodeGeneratorX86_64::InvokeRuntime(QuickEntrypointEnum entrypoint, @@ -5152,7 +5163,11 @@ void InstructionCodeGeneratorX86_64::VisitParallelMove(HParallelMove* instructio void LocationsBuilderX86_64::VisitSuspendCheck(HSuspendCheck* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath); - locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. + // In suspend check slow path, usually there are no caller-save registers at all. + // If SIMD instructions are present, however, we force spilling all live SIMD + // registers in full width (since the runtime only saves/restores lower part). + locations->SetCustomSlowPathCallerSaves( + GetGraph()->HasSIMD() ? RegisterSet::AllFpu() : RegisterSet::Empty()); } void InstructionCodeGeneratorX86_64::VisitSuspendCheck(HSuspendCheck* instruction) { @@ -5241,6 +5256,10 @@ void ParallelMoveResolverX86_64::EmitMove(size_t index) { __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex())); __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP)); } + } else if (source.IsSIMDStackSlot()) { + DCHECK(destination.IsFpuRegister()); + __ movups(destination.AsFpuRegister<XmmRegister>(), + Address(CpuRegister(RSP), source.GetStackIndex())); } else if (source.IsConstant()) { HConstant* constant = source.GetConstant(); if (constant->IsIntConstant() || constant->IsNullConstant()) { @@ -5291,10 +5310,13 @@ void ParallelMoveResolverX86_64::EmitMove(size_t index) { } else if (destination.IsStackSlot()) { __ movss(Address(CpuRegister(RSP), destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>()); - } else { - DCHECK(destination.IsDoubleStackSlot()) << destination; + } else if (destination.IsDoubleStackSlot()) { __ movsd(Address(CpuRegister(RSP), destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>()); + } else { + DCHECK(destination.IsSIMDStackSlot()); + __ movups(Address(CpuRegister(RSP), destination.GetStackIndex()), + source.AsFpuRegister<XmmRegister>()); } } } diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index 376c3ce381..c8336dabd9 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -326,7 +326,9 @@ class CodeGeneratorX86_64 : public CodeGenerator { } size_t GetFloatingPointSpillSlotSize() const OVERRIDE { - return kX86_64WordSize; + return GetGraph()->HasSIMD() + ? 2 * kX86_64WordSize // 16 bytes == 2 x86_64 words for each spill + : 1 * kX86_64WordSize; // 8 bytes == 1 x86_64 words for each spill } HGraphVisitor* GetLocationBuilder() OVERRIDE { diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc index 2bf5c53e17..0dfae11465 100644 --- a/compiler/optimizing/graph_visualizer.cc +++ b/compiler/optimizing/graph_visualizer.cc @@ -322,9 +322,11 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { codegen_.DumpCoreRegister(stream, location.high()); } else if (location.IsUnallocated()) { stream << "unallocated"; - } else { - DCHECK(location.IsDoubleStackSlot()); + } else if (location.IsDoubleStackSlot()) { stream << "2x" << location.GetStackIndex() << "(sp)"; + } else { + DCHECK(location.IsSIMDStackSlot()); + stream << "4x" << location.GetStackIndex() << "(sp)"; } } diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc index 62f5114e59..eda26f1127 100644 --- a/compiler/optimizing/inliner.cc +++ b/compiler/optimizing/inliner.cc @@ -46,32 +46,100 @@ namespace art { -static constexpr size_t kMaximumNumberOfHInstructions = 32; +// Instruction limit to control memory. +static constexpr size_t kMaximumNumberOfTotalInstructions = 1024; + +// Maximum number of instructions for considering a method small, +// which we will always try to inline if the other non-instruction limits +// are not reached. +static constexpr size_t kMaximumNumberOfInstructionsForSmallMethod = 3; // Limit the number of dex registers that we accumulate while inlining // to avoid creating large amount of nested environments. static constexpr size_t kMaximumNumberOfCumulatedDexRegisters = 64; -// Avoid inlining within a huge method due to memory pressure. -static constexpr size_t kMaximumCodeUnitSize = 4096; +// Limit recursive call inlining, which do not benefit from too +// much inlining compared to code locality. +static constexpr size_t kMaximumNumberOfRecursiveCalls = 4; // Controls the use of inline caches in AOT mode. static constexpr bool kUseAOTInlineCaches = false; -void HInliner::Run() { - const CompilerOptions& compiler_options = compiler_driver_->GetCompilerOptions(); - if ((compiler_options.GetInlineDepthLimit() == 0) - || (compiler_options.GetInlineMaxCodeUnits() == 0)) { - return; +// We check for line numbers to make sure the DepthString implementation +// aligns the output nicely. +#define LOG_INTERNAL(msg) \ + static_assert(__LINE__ > 10, "Unhandled line number"); \ + static_assert(__LINE__ < 10000, "Unhandled line number"); \ + VLOG(compiler) << DepthString(__LINE__) << msg + +#define LOG_TRY() LOG_INTERNAL("Try inlinining call: ") +#define LOG_NOTE() LOG_INTERNAL("Note: ") +#define LOG_SUCCESS() LOG_INTERNAL("Success: ") +#define LOG_FAIL(stat) MaybeRecordStat(stat); LOG_INTERNAL("Fail: ") +#define LOG_FAIL_NO_STAT() LOG_INTERNAL("Fail: ") + +std::string HInliner::DepthString(int line) const { + std::string value; + // Indent according to the inlining depth. + size_t count = depth_; + // Line numbers get printed in the log, so add a space if the log's line is less + // than 1000, and two if less than 100. 10 cannot be reached as it's the copyright. + if (!kIsTargetBuild) { + if (line < 100) { + value += " "; + } + if (line < 1000) { + value += " "; + } + // Safeguard if this file reaches more than 10000 lines. + DCHECK_LT(line, 10000); } - if (caller_compilation_unit_.GetCodeItem()->insns_size_in_code_units_ > kMaximumCodeUnitSize) { - return; + for (size_t i = 0; i < count; ++i) { + value += " "; + } + return value; +} + +static size_t CountNumberOfInstructions(HGraph* graph) { + size_t number_of_instructions = 0; + for (HBasicBlock* block : graph->GetReversePostOrderSkipEntryBlock()) { + for (HInstructionIterator instr_it(block->GetInstructions()); + !instr_it.Done(); + instr_it.Advance()) { + ++number_of_instructions; + } } + return number_of_instructions; +} + +void HInliner::UpdateInliningBudget() { + if (total_number_of_instructions_ >= kMaximumNumberOfTotalInstructions) { + // Always try to inline small methods. + inlining_budget_ = kMaximumNumberOfInstructionsForSmallMethod; + } else { + inlining_budget_ = std::max( + kMaximumNumberOfInstructionsForSmallMethod, + kMaximumNumberOfTotalInstructions - total_number_of_instructions_); + } +} + +void HInliner::Run() { if (graph_->IsDebuggable()) { // For simplicity, we currently never inline when the graph is debuggable. This avoids // doing some logic in the runtime to discover if a method could have been inlined. return; } + + // Initialize the number of instructions for the method being compiled. Recursive calls + // to HInliner::Run have already updated the instruction count. + if (outermost_graph_ == graph_) { + total_number_of_instructions_ = CountNumberOfInstructions(graph_); + } + + UpdateInliningBudget(); + DCHECK_NE(total_number_of_instructions_, 0u); + DCHECK_NE(inlining_budget_, 0u); + // Keep a copy of all blocks when starting the visit. ArenaVector<HBasicBlock*> blocks = graph_->GetReversePostOrder(); DCHECK(!blocks.empty()); @@ -305,17 +373,18 @@ bool HInliner::TryInline(HInvoke* invoke_instruction) { ScopedObjectAccess soa(Thread::Current()); uint32_t method_index = invoke_instruction->GetDexMethodIndex(); const DexFile& caller_dex_file = *caller_compilation_unit_.GetDexFile(); - VLOG(compiler) << "Try inlining " << caller_dex_file.PrettyMethod(method_index); + LOG_TRY() << caller_dex_file.PrettyMethod(method_index); - // We can query the dex cache directly. The verifier has populated it already. ArtMethod* resolved_method = invoke_instruction->GetResolvedMethod(); - ArtMethod* actual_method = nullptr; if (resolved_method == nullptr) { DCHECK(invoke_instruction->IsInvokeStaticOrDirect()); DCHECK(invoke_instruction->AsInvokeStaticOrDirect()->IsStringInit()); - VLOG(compiler) << "Not inlining a String.<init> method"; + LOG_FAIL_NO_STAT() << "Not inlining a String.<init> method"; return false; - } else if (invoke_instruction->IsInvokeStaticOrDirect()) { + } + ArtMethod* actual_method = nullptr; + + if (invoke_instruction->IsInvokeStaticOrDirect()) { actual_method = resolved_method; } else { // Check if we can statically find the method. @@ -328,6 +397,7 @@ bool HInliner::TryInline(HInvoke* invoke_instruction) { if (method != nullptr) { cha_devirtualize = true; actual_method = method; + LOG_NOTE() << "Try CHA-based inlining of " << actual_method->PrettyMethod(); } } @@ -390,16 +460,23 @@ bool HInliner::TryInlineFromInlineCache(const DexFile& caller_dex_file, : GetInlineCacheJIT(invoke_instruction, &hs, &inline_cache); switch (inline_cache_type) { - case kInlineCacheNoData: - break; + case kInlineCacheNoData: { + LOG_FAIL_NO_STAT() + << "Interface or virtual call to " + << caller_dex_file.PrettyMethod(invoke_instruction->GetDexMethodIndex()) + << " could not be statically determined"; + return false; + } - case kInlineCacheUninitialized: - VLOG(compiler) << "Interface or virtual call to " - << caller_dex_file.PrettyMethod(invoke_instruction->GetDexMethodIndex()) - << " is not hit and not inlined"; + case kInlineCacheUninitialized: { + LOG_FAIL_NO_STAT() + << "Interface or virtual call to " + << caller_dex_file.PrettyMethod(invoke_instruction->GetDexMethodIndex()) + << " is not hit and not inlined"; return false; + } - case kInlineCacheMonomorphic: + case kInlineCacheMonomorphic: { MaybeRecordStat(kMonomorphicCall); if (outermost_graph_->IsCompilingOsr()) { // If we are compiling OSR, we pretend this call is polymorphic, as we may come from the @@ -408,23 +485,29 @@ bool HInliner::TryInlineFromInlineCache(const DexFile& caller_dex_file, } else { return TryInlineMonomorphicCall(invoke_instruction, resolved_method, inline_cache); } + } - case kInlineCachePolymorphic: + case kInlineCachePolymorphic: { MaybeRecordStat(kPolymorphicCall); return TryInlinePolymorphicCall(invoke_instruction, resolved_method, inline_cache); + } - case kInlineCacheMegamorphic: - VLOG(compiler) << "Interface or virtual call to " - << caller_dex_file.PrettyMethod(invoke_instruction->GetDexMethodIndex()) - << " is megamorphic and not inlined"; + case kInlineCacheMegamorphic: { + LOG_FAIL_NO_STAT() + << "Interface or virtual call to " + << caller_dex_file.PrettyMethod(invoke_instruction->GetDexMethodIndex()) + << " is megamorphic and not inlined"; MaybeRecordStat(kMegamorphicCall); return false; + } - case kInlineCacheMissingTypes: - VLOG(compiler) << "Interface or virtual call to " - << caller_dex_file.PrettyMethod(invoke_instruction->GetDexMethodIndex()) - << " is missing types and not inlined"; + case kInlineCacheMissingTypes: { + LOG_FAIL_NO_STAT() + << "Interface or virtual call to " + << caller_dex_file.PrettyMethod(invoke_instruction->GetDexMethodIndex()) + << " is missing types and not inlined"; return false; + } } UNREACHABLE(); } @@ -587,9 +670,10 @@ bool HInliner::TryInlineMonomorphicCall(HInvoke* invoke_instruction, dex::TypeIndex class_index = FindClassIndexIn( GetMonomorphicType(classes), caller_compilation_unit_); if (!class_index.IsValid()) { - VLOG(compiler) << "Call to " << ArtMethod::PrettyMethod(resolved_method) - << " from inline cache is not inlined because its class is not" - << " accessible to the caller"; + LOG_FAIL(kNotInlinedDexCache) + << "Call to " << ArtMethod::PrettyMethod(resolved_method) + << " from inline cache is not inlined because its class is not" + << " accessible to the caller"; return false; } @@ -603,6 +687,7 @@ bool HInliner::TryInlineMonomorphicCall(HInvoke* invoke_instruction, resolved_method = GetMonomorphicType(classes)->FindVirtualMethodForVirtual( resolved_method, pointer_size); } + LOG_NOTE() << "Try inline monomorphic call to " << resolved_method->PrettyMethod(); DCHECK(resolved_method != nullptr); HInstruction* receiver = invoke_instruction->InputAt(0); HInstruction* cursor = invoke_instruction->GetPrevious(); @@ -752,6 +837,7 @@ bool HInliner::TryInlinePolymorphicCall(HInvoke* invoke_instruction, dex::TypeIndex class_index = FindClassIndexIn(handle.Get(), caller_compilation_unit_); HInstruction* return_replacement = nullptr; + LOG_NOTE() << "Try inline polymorphic call to " << method->PrettyMethod(); if (!class_index.IsValid() || !TryBuildAndInline(invoke_instruction, method, @@ -761,8 +847,8 @@ bool HInliner::TryInlinePolymorphicCall(HInvoke* invoke_instruction, } else { one_target_inlined = true; - VLOG(compiler) << "Polymorphic call to " << ArtMethod::PrettyMethod(resolved_method) - << " has inlined " << ArtMethod::PrettyMethod(method); + LOG_SUCCESS() << "Polymorphic call to " << ArtMethod::PrettyMethod(resolved_method) + << " has inlined " << ArtMethod::PrettyMethod(method); // If we have inlined all targets before, and this receiver is the last seen, // we deoptimize instead of keeping the original invoke instruction. @@ -796,9 +882,10 @@ bool HInliner::TryInlinePolymorphicCall(HInvoke* invoke_instruction, } if (!one_target_inlined) { - VLOG(compiler) << "Call to " << ArtMethod::PrettyMethod(resolved_method) - << " from inline cache is not inlined because none" - << " of its targets could be inlined"; + LOG_FAIL_NO_STAT() + << "Call to " << ArtMethod::PrettyMethod(resolved_method) + << " from inline cache is not inlined because none" + << " of its targets could be inlined"; return false; } @@ -932,9 +1019,6 @@ bool HInliner::TryInlinePolymorphicCallToSameTarget( actual_method = new_method; } else if (actual_method != new_method) { // Different methods, bailout. - VLOG(compiler) << "Call to " << ArtMethod::PrettyMethod(resolved_method) - << " from inline cache is not inlined because it resolves" - << " to different methods"; return false; } } @@ -1007,6 +1091,7 @@ bool HInliner::TryInlinePolymorphicCallToSameTarget( MaybeRecordStat(kInlinedPolymorphicCall); + LOG_SUCCESS() << "Inlined same polymorphic target " << actual_method->PrettyMethod(); return true; } @@ -1076,13 +1161,34 @@ bool HInliner::TryInlineAndReplace(HInvoke* invoke_instruction, return true; } +size_t HInliner::CountRecursiveCallsOf(ArtMethod* method) const { + const HInliner* current = this; + size_t count = 0; + do { + if (current->graph_->GetArtMethod() == method) { + ++count; + } + current = current->parent_; + } while (current != nullptr); + return count; +} + bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction, ArtMethod* method, ReferenceTypeInfo receiver_type, HInstruction** return_replacement) { if (method->IsProxyMethod()) { - VLOG(compiler) << "Method " << method->PrettyMethod() - << " is not inlined because of unimplemented inline support for proxy methods."; + LOG_FAIL(kNotInlinedProxy) + << "Method " << method->PrettyMethod() + << " is not inlined because of unimplemented inline support for proxy methods."; + return false; + } + + if (CountRecursiveCallsOf(method) > kMaximumNumberOfRecursiveCalls) { + LOG_FAIL(kNotInlinedRecursiveBudget) + << "Method " + << method->PrettyMethod() + << " is not inlined because it has reached its recursive call budget."; return false; } @@ -1091,15 +1197,16 @@ bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction, if (!compiler_driver_->MayInline(method->GetDexFile(), outer_compilation_unit_.GetDexFile())) { if (TryPatternSubstitution(invoke_instruction, method, return_replacement)) { - VLOG(compiler) << "Successfully replaced pattern of invoke " - << method->PrettyMethod(); + LOG_SUCCESS() << "Successfully replaced pattern of invoke " + << method->PrettyMethod(); MaybeRecordStat(kReplacedInvokeWithSimplePattern); return true; } - VLOG(compiler) << "Won't inline " << method->PrettyMethod() << " in " - << outer_compilation_unit_.GetDexFile()->GetLocation() << " (" - << caller_compilation_unit_.GetDexFile()->GetLocation() << ") from " - << method->GetDexFile()->GetLocation(); + LOG_FAIL(kNotInlinedWont) + << "Won't inline " << method->PrettyMethod() << " in " + << outer_compilation_unit_.GetDexFile()->GetLocation() << " (" + << caller_compilation_unit_.GetDexFile()->GetLocation() << ") from " + << method->GetDexFile()->GetLocation(); return false; } @@ -1108,30 +1215,32 @@ bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction, const DexFile::CodeItem* code_item = method->GetCodeItem(); if (code_item == nullptr) { - VLOG(compiler) << "Method " << method->PrettyMethod() - << " is not inlined because it is native"; + LOG_FAIL_NO_STAT() + << "Method " << method->PrettyMethod() << " is not inlined because it is native"; return false; } size_t inline_max_code_units = compiler_driver_->GetCompilerOptions().GetInlineMaxCodeUnits(); if (code_item->insns_size_in_code_units_ > inline_max_code_units) { - VLOG(compiler) << "Method " << method->PrettyMethod() - << " is too big to inline: " - << code_item->insns_size_in_code_units_ - << " > " - << inline_max_code_units; + LOG_FAIL(kNotInlinedCodeItem) + << "Method " << method->PrettyMethod() + << " is not inlined because its code item is too big: " + << code_item->insns_size_in_code_units_ + << " > " + << inline_max_code_units; return false; } if (code_item->tries_size_ != 0) { - VLOG(compiler) << "Method " << method->PrettyMethod() - << " is not inlined because of try block"; + LOG_FAIL(kNotInlinedTryCatch) + << "Method " << method->PrettyMethod() << " is not inlined because of try block"; return false; } if (!method->IsCompilable()) { - VLOG(compiler) << "Method " << method->PrettyMethod() - << " has soft failures un-handled by the compiler, so it cannot be inlined"; + LOG_FAIL(kNotInlinedNotVerified) + << "Method " << method->PrettyMethod() + << " has soft failures un-handled by the compiler, so it cannot be inlined"; } if (!method->GetDeclaringClass()->IsVerified()) { @@ -1139,8 +1248,9 @@ bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction, if (Runtime::Current()->UseJitCompilation() || !compiler_driver_->IsMethodVerifiedWithoutFailures( method->GetDexMethodIndex(), class_def_idx, *method->GetDexFile())) { - VLOG(compiler) << "Method " << method->PrettyMethod() - << " couldn't be verified, so it cannot be inlined"; + LOG_FAIL(kNotInlinedNotVerified) + << "Method " << method->PrettyMethod() + << " couldn't be verified, so it cannot be inlined"; return false; } } @@ -1149,9 +1259,9 @@ bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction, invoke_instruction->AsInvokeStaticOrDirect()->IsStaticWithImplicitClinitCheck()) { // Case of a static method that cannot be inlined because it implicitly // requires an initialization check of its declaring class. - VLOG(compiler) << "Method " << method->PrettyMethod() - << " is not inlined because it is static and requires a clinit" - << " check that cannot be emitted due to Dex cache limitations"; + LOG_FAIL(kNotInlinedDexCache) << "Method " << method->PrettyMethod() + << " is not inlined because it is static and requires a clinit" + << " check that cannot be emitted due to Dex cache limitations"; return false; } @@ -1160,7 +1270,7 @@ bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction, return false; } - VLOG(compiler) << "Successfully inlined " << method->PrettyMethod(); + LOG_SUCCESS() << method->PrettyMethod(); MaybeRecordStat(kInlinedInvoke); return true; } @@ -1448,15 +1558,17 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction, handles_); if (builder.BuildGraph() != kAnalysisSuccess) { - VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index) - << " could not be built, so cannot be inlined"; + LOG_FAIL(kNotInlinedCannotBuild) + << "Method " << callee_dex_file.PrettyMethod(method_index) + << " could not be built, so cannot be inlined"; return false; } if (!RegisterAllocator::CanAllocateRegistersFor(*callee_graph, compiler_driver_->GetInstructionSet())) { - VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index) - << " cannot be inlined because of the register allocator"; + LOG_FAIL(kNotInlinedRegisterAllocator) + << "Method " << callee_dex_file.PrettyMethod(method_index) + << " cannot be inlined because of the register allocator"; return false; } @@ -1503,15 +1615,13 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction, /* is_first_run */ false).Run(); } - size_t number_of_instructions_budget = kMaximumNumberOfHInstructions; - size_t number_of_inlined_instructions = - RunOptimizations(callee_graph, code_item, dex_compilation_unit); - number_of_instructions_budget += number_of_inlined_instructions; + RunOptimizations(callee_graph, code_item, dex_compilation_unit); HBasicBlock* exit_block = callee_graph->GetExitBlock(); if (exit_block == nullptr) { - VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index) - << " could not be inlined because it has an infinite loop"; + LOG_FAIL(kNotInlinedInfiniteLoop) + << "Method " << callee_dex_file.PrettyMethod(method_index) + << " could not be inlined because it has an infinite loop"; return false; } @@ -1520,15 +1630,17 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction, if (predecessor->GetLastInstruction()->IsThrow()) { if (invoke_instruction->GetBlock()->IsTryBlock()) { // TODO(ngeoffray): Support adding HTryBoundary in Hgraph::InlineInto. - VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index) - << " could not be inlined because one branch always throws and" - << " caller is in a try/catch block"; + LOG_FAIL(kNotInlinedTryCatch) + << "Method " << callee_dex_file.PrettyMethod(method_index) + << " could not be inlined because one branch always throws and" + << " caller is in a try/catch block"; return false; } else if (graph_->GetExitBlock() == nullptr) { // TODO(ngeoffray): Support adding HExit in the caller graph. - VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index) - << " could not be inlined because one branch always throws and" - << " caller does not have an exit block"; + LOG_FAIL(kNotInlinedInfiniteLoop) + << "Method " << callee_dex_file.PrettyMethod(method_index) + << " could not be inlined because one branch always throws and" + << " caller does not have an exit block"; return false; } else if (graph_->HasIrreducibleLoops()) { // TODO(ngeoffray): Support re-computing loop information to graphs with @@ -1544,32 +1656,31 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction, } if (!has_one_return) { - VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index) - << " could not be inlined because it always throws"; + LOG_FAIL(kNotInlinedAlwaysThrows) + << "Method " << callee_dex_file.PrettyMethod(method_index) + << " could not be inlined because it always throws"; return false; } size_t number_of_instructions = 0; - - bool can_inline_environment = - total_number_of_dex_registers_ < kMaximumNumberOfCumulatedDexRegisters; - // Skip the entry block, it does not contain instructions that prevent inlining. for (HBasicBlock* block : callee_graph->GetReversePostOrderSkipEntryBlock()) { if (block->IsLoopHeader()) { if (block->GetLoopInformation()->IsIrreducible()) { // Don't inline methods with irreducible loops, they could prevent some // optimizations to run. - VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index) - << " could not be inlined because it contains an irreducible loop"; + LOG_FAIL(kNotInlinedIrreducibleLoop) + << "Method " << callee_dex_file.PrettyMethod(method_index) + << " could not be inlined because it contains an irreducible loop"; return false; } if (!block->GetLoopInformation()->HasExitEdge()) { // Don't inline methods with loops without exit, since they cause the // loop information to be computed incorrectly when updating after // inlining. - VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index) - << " could not be inlined because it contains a loop with no exit"; + LOG_FAIL(kNotInlinedLoopWithoutExit) + << "Method " << callee_dex_file.PrettyMethod(method_index) + << " could not be inlined because it contains a loop with no exit"; return false; } } @@ -1577,34 +1688,39 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction, for (HInstructionIterator instr_it(block->GetInstructions()); !instr_it.Done(); instr_it.Advance()) { - if (number_of_instructions++ == number_of_instructions_budget) { - VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index) - << " is not inlined because its caller has reached" - << " its instruction budget limit."; + if (++number_of_instructions >= inlining_budget_) { + LOG_FAIL(kNotInlinedInstructionBudget) + << "Method " << callee_dex_file.PrettyMethod(method_index) + << " is not inlined because the outer method has reached" + << " its instruction budget limit."; return false; } HInstruction* current = instr_it.Current(); - if (!can_inline_environment && current->NeedsEnvironment()) { - VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index) - << " is not inlined because its caller has reached" - << " its environment budget limit."; + if (current->NeedsEnvironment() && + (total_number_of_dex_registers_ >= kMaximumNumberOfCumulatedDexRegisters)) { + LOG_FAIL(kNotInlinedEnvironmentBudget) + << "Method " << callee_dex_file.PrettyMethod(method_index) + << " is not inlined because its caller has reached" + << " its environment budget limit."; return false; } if (current->NeedsEnvironment() && !CanEncodeInlinedMethodInStackMap(*caller_compilation_unit_.GetDexFile(), resolved_method)) { - VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index) - << " could not be inlined because " << current->DebugName() - << " needs an environment, is in a different dex file" - << ", and cannot be encoded in the stack maps."; + LOG_FAIL(kNotInlinedStackMaps) + << "Method " << callee_dex_file.PrettyMethod(method_index) + << " could not be inlined because " << current->DebugName() + << " needs an environment, is in a different dex file" + << ", and cannot be encoded in the stack maps."; return false; } if (!same_dex_file && current->NeedsDexCacheOfDeclaringClass()) { - VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index) - << " could not be inlined because " << current->DebugName() - << " it is in a different dex file and requires access to the dex cache"; + LOG_FAIL(kNotInlinedDexCache) + << "Method " << callee_dex_file.PrettyMethod(method_index) + << " could not be inlined because " << current->DebugName() + << " it is in a different dex file and requires access to the dex cache"; return false; } @@ -1613,21 +1729,24 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction, current->IsUnresolvedStaticFieldSet() || current->IsUnresolvedInstanceFieldSet()) { // Entrypoint for unresolved fields does not handle inlined frames. - VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index) - << " could not be inlined because it is using an unresolved" - << " entrypoint"; + LOG_FAIL(kNotInlinedUnresolvedEntrypoint) + << "Method " << callee_dex_file.PrettyMethod(method_index) + << " could not be inlined because it is using an unresolved" + << " entrypoint"; return false; } } } - number_of_inlined_instructions_ += number_of_instructions; - DCHECK_EQ(caller_instruction_counter, graph_->GetCurrentInstructionId()) << "No instructions can be added to the outer graph while inner graph is being built"; + // Inline the callee graph inside the caller graph. const int32_t callee_instruction_counter = callee_graph->GetCurrentInstructionId(); graph_->SetCurrentInstructionId(callee_instruction_counter); *return_replacement = callee_graph->InlineInto(graph_, invoke_instruction); + // Update our budget for other inlining attempts in `caller_graph`. + total_number_of_instructions_ += number_of_instructions; + UpdateInliningBudget(); DCHECK_EQ(callee_instruction_counter, callee_graph->GetCurrentInstructionId()) << "No instructions can be added to the inner graph during inlining into the outer graph"; @@ -1640,9 +1759,9 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction, return true; } -size_t HInliner::RunOptimizations(HGraph* callee_graph, - const DexFile::CodeItem* code_item, - const DexCompilationUnit& dex_compilation_unit) { +void HInliner::RunOptimizations(HGraph* callee_graph, + const DexFile::CodeItem* code_item, + const DexCompilationUnit& dex_compilation_unit) { // Note: if the outermost_graph_ is being compiled OSR, we should not run any // optimization that could lead to a HDeoptimize. The following optimizations do not. HDeadCodeElimination dce(callee_graph, inline_stats_, "dead_code_elimination$inliner"); @@ -1664,23 +1783,37 @@ size_t HInliner::RunOptimizations(HGraph* callee_graph, optimization->Run(); } - size_t number_of_inlined_instructions = 0u; - if (depth_ + 1 < compiler_driver_->GetCompilerOptions().GetInlineDepthLimit()) { - HInliner inliner(callee_graph, - outermost_graph_, - codegen_, - outer_compilation_unit_, - dex_compilation_unit, - compiler_driver_, - handles_, - inline_stats_, - total_number_of_dex_registers_ + code_item->registers_size_, - depth_ + 1); - inliner.Run(); - number_of_inlined_instructions += inliner.number_of_inlined_instructions_; + // Bail early for pathological cases on the environment (for example recursive calls, + // or too large environment). + if (total_number_of_dex_registers_ >= kMaximumNumberOfCumulatedDexRegisters) { + LOG_NOTE() << "Calls in " << callee_graph->GetArtMethod()->PrettyMethod() + << " will not be inlined because the outer method has reached" + << " its environment budget limit."; + return; + } + + // Bail early if we know we already are over the limit. + size_t number_of_instructions = CountNumberOfInstructions(callee_graph); + if (number_of_instructions > inlining_budget_) { + LOG_NOTE() << "Calls in " << callee_graph->GetArtMethod()->PrettyMethod() + << " will not be inlined because the outer method has reached" + << " its instruction budget limit. " << number_of_instructions; + return; } - return number_of_inlined_instructions; + HInliner inliner(callee_graph, + outermost_graph_, + codegen_, + outer_compilation_unit_, + dex_compilation_unit, + compiler_driver_, + handles_, + inline_stats_, + total_number_of_dex_registers_ + code_item->registers_size_, + total_number_of_instructions_ + number_of_instructions, + this, + depth_ + 1); + inliner.Run(); } static bool IsReferenceTypeRefinement(ReferenceTypeInfo declared_rti, diff --git a/compiler/optimizing/inliner.h b/compiler/optimizing/inliner.h index a032042c78..9e4685cbf4 100644 --- a/compiler/optimizing/inliner.h +++ b/compiler/optimizing/inliner.h @@ -42,7 +42,9 @@ class HInliner : public HOptimization { VariableSizedHandleScope* handles, OptimizingCompilerStats* stats, size_t total_number_of_dex_registers, - size_t depth) + size_t total_number_of_instructions, + HInliner* parent, + size_t depth = 0) : HOptimization(outer_graph, kInlinerPassName, stats), outermost_graph_(outermost_graph), outer_compilation_unit_(outer_compilation_unit), @@ -50,8 +52,10 @@ class HInliner : public HOptimization { codegen_(codegen), compiler_driver_(compiler_driver), total_number_of_dex_registers_(total_number_of_dex_registers), + total_number_of_instructions_(total_number_of_instructions), + parent_(parent), depth_(depth), - number_of_inlined_instructions_(0), + inlining_budget_(0), handles_(handles), inline_stats_(nullptr) {} @@ -95,10 +99,10 @@ class HInliner : public HOptimization { HInstruction** return_replacement); // Run simple optimizations on `callee_graph`. - // Returns the number of inlined instructions. - size_t RunOptimizations(HGraph* callee_graph, - const DexFile::CodeItem* code_item, - const DexCompilationUnit& dex_compilation_unit); + void RunOptimizations(HGraph* callee_graph, + const DexFile::CodeItem* code_item, + const DexCompilationUnit& dex_compilation_unit) + REQUIRES_SHARED(Locks::mutator_lock_); // Try to recognize known simple patterns and replace invoke call with appropriate instructions. bool TryPatternSubstitution(HInvoke* invoke_instruction, @@ -259,14 +263,30 @@ class HInliner : public HOptimization { HInstruction* return_replacement, HInstruction* invoke_instruction); + // Update the inlining budget based on `total_number_of_instructions_`. + void UpdateInliningBudget(); + + // Count the number of calls of `method` being inlined recursively. + size_t CountRecursiveCallsOf(ArtMethod* method) const; + + // Pretty-print for spaces during logging. + std::string DepthString(int line) const; + HGraph* const outermost_graph_; const DexCompilationUnit& outer_compilation_unit_; const DexCompilationUnit& caller_compilation_unit_; CodeGenerator* const codegen_; CompilerDriver* const compiler_driver_; const size_t total_number_of_dex_registers_; + size_t total_number_of_instructions_; + + // The 'parent' inliner, that means the inlinigng optimization that requested + // `graph_` to be inlined. + const HInliner* const parent_; const size_t depth_; - size_t number_of_inlined_instructions_; + + // The budget left for inlining, in number of instructions. + size_t inlining_budget_; VariableSizedHandleScope* const handles_; // Used to record stats about optimizations on the inlined graph. diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc index 98b80f5d3c..1006a776f0 100644 --- a/compiler/optimizing/intrinsics_arm.cc +++ b/compiler/optimizing/intrinsics_arm.cc @@ -270,9 +270,11 @@ static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) { locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); } -static void GenNumberOfLeadingZeros(LocationSummary* locations, +static void GenNumberOfLeadingZeros(HInvoke* invoke, Primitive::Type type, - ArmAssembler* assembler) { + CodeGeneratorARM* codegen) { + ArmAssembler* assembler = codegen->GetAssembler(); + LocationSummary* locations = invoke->GetLocations(); Location in = locations->InAt(0); Register out = locations->Out().AsRegister<Register>(); @@ -282,11 +284,14 @@ static void GenNumberOfLeadingZeros(LocationSummary* locations, Register in_reg_lo = in.AsRegisterPairLow<Register>(); Register in_reg_hi = in.AsRegisterPairHigh<Register>(); Label end; + Label* final_label = codegen->GetFinalLabel(invoke, &end); __ clz(out, in_reg_hi); - __ CompareAndBranchIfNonZero(in_reg_hi, &end); + __ CompareAndBranchIfNonZero(in_reg_hi, final_label); __ clz(out, in_reg_lo); __ AddConstant(out, 32); - __ Bind(&end); + if (end.IsLinked()) { + __ Bind(&end); + } } else { __ clz(out, in.AsRegister<Register>()); } @@ -297,7 +302,7 @@ void IntrinsicLocationsBuilderARM::VisitIntegerNumberOfLeadingZeros(HInvoke* inv } void IntrinsicCodeGeneratorARM::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) { - GenNumberOfLeadingZeros(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler()); + GenNumberOfLeadingZeros(invoke, Primitive::kPrimInt, codegen_); } void IntrinsicLocationsBuilderARM::VisitLongNumberOfLeadingZeros(HInvoke* invoke) { @@ -309,27 +314,32 @@ void IntrinsicLocationsBuilderARM::VisitLongNumberOfLeadingZeros(HInvoke* invoke } void IntrinsicCodeGeneratorARM::VisitLongNumberOfLeadingZeros(HInvoke* invoke) { - GenNumberOfLeadingZeros(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler()); + GenNumberOfLeadingZeros(invoke, Primitive::kPrimLong, codegen_); } -static void GenNumberOfTrailingZeros(LocationSummary* locations, +static void GenNumberOfTrailingZeros(HInvoke* invoke, Primitive::Type type, - ArmAssembler* assembler) { + CodeGeneratorARM* codegen) { DCHECK((type == Primitive::kPrimInt) || (type == Primitive::kPrimLong)); + ArmAssembler* assembler = codegen->GetAssembler(); + LocationSummary* locations = invoke->GetLocations(); Register out = locations->Out().AsRegister<Register>(); if (type == Primitive::kPrimLong) { Register in_reg_lo = locations->InAt(0).AsRegisterPairLow<Register>(); Register in_reg_hi = locations->InAt(0).AsRegisterPairHigh<Register>(); Label end; + Label* final_label = codegen->GetFinalLabel(invoke, &end); __ rbit(out, in_reg_lo); __ clz(out, out); - __ CompareAndBranchIfNonZero(in_reg_lo, &end); + __ CompareAndBranchIfNonZero(in_reg_lo, final_label); __ rbit(out, in_reg_hi); __ clz(out, out); __ AddConstant(out, 32); - __ Bind(&end); + if (end.IsLinked()) { + __ Bind(&end); + } } else { Register in = locations->InAt(0).AsRegister<Register>(); __ rbit(out, in); @@ -346,7 +356,7 @@ void IntrinsicLocationsBuilderARM::VisitIntegerNumberOfTrailingZeros(HInvoke* in } void IntrinsicCodeGeneratorARM::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) { - GenNumberOfTrailingZeros(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler()); + GenNumberOfTrailingZeros(invoke, Primitive::kPrimInt, codegen_); } void IntrinsicLocationsBuilderARM::VisitLongNumberOfTrailingZeros(HInvoke* invoke) { @@ -358,7 +368,7 @@ void IntrinsicLocationsBuilderARM::VisitLongNumberOfTrailingZeros(HInvoke* invok } void IntrinsicCodeGeneratorARM::VisitLongNumberOfTrailingZeros(HInvoke* invoke) { - GenNumberOfTrailingZeros(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler()); + GenNumberOfTrailingZeros(invoke, Primitive::kPrimLong, codegen_); } static void MathAbsFP(LocationSummary* locations, bool is64bit, ArmAssembler* assembler) { @@ -1355,6 +1365,7 @@ void IntrinsicCodeGeneratorARM::VisitStringEquals(HInvoke* invoke) { Label end; Label return_true; Label return_false; + Label* final_label = codegen_->GetFinalLabel(invoke, &end); // Get offsets of count, value, and class fields within a string object. const uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); @@ -1428,12 +1439,15 @@ void IntrinsicCodeGeneratorARM::VisitStringEquals(HInvoke* invoke) { // If loop does not result in returning false, we return true. __ Bind(&return_true); __ LoadImmediate(out, 1); - __ b(&end); + __ b(final_label); // Return false and exit the function. __ Bind(&return_false); __ LoadImmediate(out, 0); - __ Bind(&end); + + if (end.IsLinked()) { + __ Bind(&end); + } } static void GenerateVisitStringIndexOf(HInvoke* invoke, @@ -2491,13 +2505,14 @@ void IntrinsicCodeGeneratorARM::VisitStringGetCharsNoCheck(HInvoke* invoke) { Register dst_ptr = locations->GetTemp(2).AsRegister<Register>(); Label done, compressed_string_loop; + Label* final_label = codegen_->GetFinalLabel(invoke, &done); // dst to be copied. __ add(dst_ptr, dstObj, ShifterOperand(data_offset)); __ add(dst_ptr, dst_ptr, ShifterOperand(dstBegin, LSL, 1)); __ subs(num_chr, srcEnd, ShifterOperand(srcBegin)); // Early out for valid zero-length retrievals. - __ b(&done, EQ); + __ b(final_label, EQ); // src range to copy. __ add(src_ptr, srcObj, ShifterOperand(value_offset)); @@ -2534,7 +2549,7 @@ void IntrinsicCodeGeneratorARM::VisitStringGetCharsNoCheck(HInvoke* invoke) { __ b(&loop, GE); __ adds(num_chr, num_chr, ShifterOperand(4)); - __ b(&done, EQ); + __ b(final_label, EQ); // Main loop for < 4 character case and remainder handling. Loads and stores one // 16-bit Java character at a time. @@ -2545,7 +2560,7 @@ void IntrinsicCodeGeneratorARM::VisitStringGetCharsNoCheck(HInvoke* invoke) { __ b(&remainder, GT); if (mirror::kUseStringCompression) { - __ b(&done); + __ b(final_label); const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte); DCHECK_EQ(c_char_size, 1u); @@ -2559,7 +2574,9 @@ void IntrinsicCodeGeneratorARM::VisitStringGetCharsNoCheck(HInvoke* invoke) { __ b(&compressed_string_loop, GT); } - __ Bind(&done); + if (done.IsLinked()) { + __ Bind(&done); + } } void IntrinsicLocationsBuilderARM::VisitFloatIsInfinite(HInvoke* invoke) { diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc index 19ff49c6ce..b25bad7170 100644 --- a/compiler/optimizing/intrinsics_arm_vixl.cc +++ b/compiler/optimizing/intrinsics_arm_vixl.cc @@ -333,9 +333,11 @@ static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) { locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); } -static void GenNumberOfLeadingZeros(LocationSummary* locations, +static void GenNumberOfLeadingZeros(HInvoke* invoke, Primitive::Type type, - ArmVIXLAssembler* assembler) { + CodeGeneratorARMVIXL* codegen) { + ArmVIXLAssembler* assembler = codegen->GetAssembler(); + LocationSummary* locations = invoke->GetLocations(); Location in = locations->InAt(0); vixl32::Register out = RegisterFrom(locations->Out()); @@ -345,11 +347,14 @@ static void GenNumberOfLeadingZeros(LocationSummary* locations, vixl32::Register in_reg_lo = LowRegisterFrom(in); vixl32::Register in_reg_hi = HighRegisterFrom(in); vixl32::Label end; + vixl32::Label* final_label = codegen->GetFinalLabel(invoke, &end); __ Clz(out, in_reg_hi); - __ CompareAndBranchIfNonZero(in_reg_hi, &end, /* far_target */ false); + __ CompareAndBranchIfNonZero(in_reg_hi, final_label, /* far_target */ false); __ Clz(out, in_reg_lo); __ Add(out, out, 32); - __ Bind(&end); + if (end.IsReferenced()) { + __ Bind(&end); + } } else { __ Clz(out, RegisterFrom(in)); } @@ -360,7 +365,7 @@ void IntrinsicLocationsBuilderARMVIXL::VisitIntegerNumberOfLeadingZeros(HInvoke* } void IntrinsicCodeGeneratorARMVIXL::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) { - GenNumberOfLeadingZeros(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler()); + GenNumberOfLeadingZeros(invoke, Primitive::kPrimInt, codegen_); } void IntrinsicLocationsBuilderARMVIXL::VisitLongNumberOfLeadingZeros(HInvoke* invoke) { @@ -372,27 +377,32 @@ void IntrinsicLocationsBuilderARMVIXL::VisitLongNumberOfLeadingZeros(HInvoke* in } void IntrinsicCodeGeneratorARMVIXL::VisitLongNumberOfLeadingZeros(HInvoke* invoke) { - GenNumberOfLeadingZeros(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler()); + GenNumberOfLeadingZeros(invoke, Primitive::kPrimLong, codegen_); } -static void GenNumberOfTrailingZeros(LocationSummary* locations, +static void GenNumberOfTrailingZeros(HInvoke* invoke, Primitive::Type type, - ArmVIXLAssembler* assembler) { + CodeGeneratorARMVIXL* codegen) { DCHECK((type == Primitive::kPrimInt) || (type == Primitive::kPrimLong)); + ArmVIXLAssembler* assembler = codegen->GetAssembler(); + LocationSummary* locations = invoke->GetLocations(); vixl32::Register out = RegisterFrom(locations->Out()); if (type == Primitive::kPrimLong) { vixl32::Register in_reg_lo = LowRegisterFrom(locations->InAt(0)); vixl32::Register in_reg_hi = HighRegisterFrom(locations->InAt(0)); vixl32::Label end; + vixl32::Label* final_label = codegen->GetFinalLabel(invoke, &end); __ Rbit(out, in_reg_lo); __ Clz(out, out); - __ CompareAndBranchIfNonZero(in_reg_lo, &end, /* far_target */ false); + __ CompareAndBranchIfNonZero(in_reg_lo, final_label, /* far_target */ false); __ Rbit(out, in_reg_hi); __ Clz(out, out); __ Add(out, out, 32); - __ Bind(&end); + if (end.IsReferenced()) { + __ Bind(&end); + } } else { vixl32::Register in = RegisterFrom(locations->InAt(0)); __ Rbit(out, in); @@ -409,7 +419,7 @@ void IntrinsicLocationsBuilderARMVIXL::VisitIntegerNumberOfTrailingZeros(HInvoke } void IntrinsicCodeGeneratorARMVIXL::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) { - GenNumberOfTrailingZeros(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler()); + GenNumberOfTrailingZeros(invoke, Primitive::kPrimInt, codegen_); } void IntrinsicLocationsBuilderARMVIXL::VisitLongNumberOfTrailingZeros(HInvoke* invoke) { @@ -421,7 +431,7 @@ void IntrinsicLocationsBuilderARMVIXL::VisitLongNumberOfTrailingZeros(HInvoke* i } void IntrinsicCodeGeneratorARMVIXL::VisitLongNumberOfTrailingZeros(HInvoke* invoke) { - GenNumberOfTrailingZeros(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler()); + GenNumberOfTrailingZeros(invoke, Primitive::kPrimLong, codegen_); } static void MathAbsFP(HInvoke* invoke, ArmVIXLAssembler* assembler) { @@ -502,7 +512,8 @@ void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsLong(HInvoke* invoke) { GenAbsInteger(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); } -static void GenMinMaxFloat(HInvoke* invoke, bool is_min, ArmVIXLAssembler* assembler) { +static void GenMinMaxFloat(HInvoke* invoke, bool is_min, CodeGeneratorARMVIXL* codegen) { + ArmVIXLAssembler* assembler = codegen->GetAssembler(); Location op1_loc = invoke->GetLocations()->InAt(0); Location op2_loc = invoke->GetLocations()->InAt(1); Location out_loc = invoke->GetLocations()->Out(); @@ -520,6 +531,7 @@ static void GenMinMaxFloat(HInvoke* invoke, bool is_min, ArmVIXLAssembler* assem const vixl32::Register temp1 = temps.Acquire(); vixl32::Register temp2 = RegisterFrom(invoke->GetLocations()->GetTemp(0)); vixl32::Label nan, done; + vixl32::Label* final_label = codegen->GetFinalLabel(invoke, &done); DCHECK(op1.Is(out)); @@ -536,7 +548,8 @@ static void GenMinMaxFloat(HInvoke* invoke, bool is_min, ArmVIXLAssembler* assem __ it(cond); __ vmov(cond, F32, out, op2); } - __ B(ne, &done, /* far_target */ false); // for <>(not equal), we've done min/max calculation. + // for <>(not equal), we've done min/max calculation. + __ B(ne, final_label, /* far_target */ false); // handle op1 == op2, max(+0.0,-0.0), min(+0.0,-0.0). __ Vmov(temp1, op1); @@ -547,14 +560,16 @@ static void GenMinMaxFloat(HInvoke* invoke, bool is_min, ArmVIXLAssembler* assem __ And(temp1, temp1, temp2); } __ Vmov(out, temp1); - __ B(&done); + __ B(final_label); // handle NaN input. __ Bind(&nan); __ Movt(temp1, High16Bits(kNanFloat)); // 0x7FC0xxxx is a NaN. __ Vmov(out, temp1); - __ Bind(&done); + if (done.IsReferenced()) { + __ Bind(&done); + } } static void CreateFPFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) { @@ -572,7 +587,7 @@ void IntrinsicLocationsBuilderARMVIXL::VisitMathMinFloatFloat(HInvoke* invoke) { } void IntrinsicCodeGeneratorARMVIXL::VisitMathMinFloatFloat(HInvoke* invoke) { - GenMinMaxFloat(invoke, /* is_min */ true, GetAssembler()); + GenMinMaxFloat(invoke, /* is_min */ true, codegen_); } void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxFloatFloat(HInvoke* invoke) { @@ -581,10 +596,11 @@ void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxFloatFloat(HInvoke* invoke) { } void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxFloatFloat(HInvoke* invoke) { - GenMinMaxFloat(invoke, /* is_min */ false, GetAssembler()); + GenMinMaxFloat(invoke, /* is_min */ false, codegen_); } -static void GenMinMaxDouble(HInvoke* invoke, bool is_min, ArmVIXLAssembler* assembler) { +static void GenMinMaxDouble(HInvoke* invoke, bool is_min, CodeGeneratorARMVIXL* codegen) { + ArmVIXLAssembler* assembler = codegen->GetAssembler(); Location op1_loc = invoke->GetLocations()->InAt(0); Location op2_loc = invoke->GetLocations()->InAt(1); Location out_loc = invoke->GetLocations()->Out(); @@ -599,6 +615,7 @@ static void GenMinMaxDouble(HInvoke* invoke, bool is_min, ArmVIXLAssembler* asse vixl32::DRegister op2 = DRegisterFrom(op2_loc); vixl32::DRegister out = OutputDRegister(invoke); vixl32::Label handle_nan_eq, done; + vixl32::Label* final_label = codegen->GetFinalLabel(invoke, &done); DCHECK(op1.Is(out)); @@ -615,19 +632,22 @@ static void GenMinMaxDouble(HInvoke* invoke, bool is_min, ArmVIXLAssembler* asse __ it(cond); __ vmov(cond, F64, out, op2); } - __ B(ne, &done, /* far_target */ false); // for <>(not equal), we've done min/max calculation. + // for <>(not equal), we've done min/max calculation. + __ B(ne, final_label, /* far_target */ false); // handle op1 == op2, max(+0.0,-0.0). if (!is_min) { __ Vand(F64, out, op1, op2); - __ B(&done); + __ B(final_label); } // handle op1 == op2, min(+0.0,-0.0), NaN input. __ Bind(&handle_nan_eq); __ Vorr(F64, out, op1, op2); // assemble op1/-0.0/NaN. - __ Bind(&done); + if (done.IsReferenced()) { + __ Bind(&done); + } } void IntrinsicLocationsBuilderARMVIXL::VisitMathMinDoubleDouble(HInvoke* invoke) { @@ -635,7 +655,7 @@ void IntrinsicLocationsBuilderARMVIXL::VisitMathMinDoubleDouble(HInvoke* invoke) } void IntrinsicCodeGeneratorARMVIXL::VisitMathMinDoubleDouble(HInvoke* invoke) { - GenMinMaxDouble(invoke, /* is_min */ true , GetAssembler()); + GenMinMaxDouble(invoke, /* is_min */ true , codegen_); } void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxDoubleDouble(HInvoke* invoke) { @@ -643,7 +663,7 @@ void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxDoubleDouble(HInvoke* invoke) } void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxDoubleDouble(HInvoke* invoke) { - GenMinMaxDouble(invoke, /* is_min */ false, GetAssembler()); + GenMinMaxDouble(invoke, /* is_min */ false, codegen_); } static void GenMinMaxLong(HInvoke* invoke, bool is_min, ArmVIXLAssembler* assembler) { @@ -1670,6 +1690,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringEquals(HInvoke* invoke) { vixl32::Label end; vixl32::Label return_true; vixl32::Label return_false; + vixl32::Label* final_label = codegen_->GetFinalLabel(invoke, &end); // Get offsets of count, value, and class fields within a string object. const uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); @@ -1746,12 +1767,15 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringEquals(HInvoke* invoke) { // If loop does not result in returning false, we return true. __ Bind(&return_true); __ Mov(out, 1); - __ B(&end); + __ B(final_label); // Return false and exit the function. __ Bind(&return_false); __ Mov(out, 0); - __ Bind(&end); + + if (end.IsReferenced()) { + __ Bind(&end); + } } static void GenerateVisitStringIndexOf(HInvoke* invoke, @@ -2789,13 +2813,14 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke) vixl32::Register dst_ptr = RegisterFrom(locations->GetTemp(2)); vixl32::Label done, compressed_string_loop; + vixl32::Label* final_label = codegen_->GetFinalLabel(invoke, &done); // dst to be copied. __ Add(dst_ptr, dstObj, data_offset); __ Add(dst_ptr, dst_ptr, Operand(dstBegin, vixl32::LSL, 1)); __ Subs(num_chr, srcEnd, srcBegin); // Early out for valid zero-length retrievals. - __ B(eq, &done, /* far_target */ false); + __ B(eq, final_label, /* far_target */ false); // src range to copy. __ Add(src_ptr, srcObj, value_offset); @@ -2839,7 +2864,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke) __ B(ge, &loop, /* far_target */ false); __ Adds(num_chr, num_chr, 4); - __ B(eq, &done, /* far_target */ false); + __ B(eq, final_label, /* far_target */ false); // Main loop for < 4 character case and remainder handling. Loads and stores one // 16-bit Java character at a time. @@ -2852,7 +2877,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke) __ B(gt, &remainder, /* far_target */ false); if (mirror::kUseStringCompression) { - __ B(&done); + __ B(final_label); const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte); DCHECK_EQ(c_char_size, 1u); @@ -2868,7 +2893,9 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke) __ B(gt, &compressed_string_loop, /* far_target */ false); } - __ Bind(&done); + if (done.IsReferenced()) { + __ Bind(&done); + } } void IntrinsicLocationsBuilderARMVIXL::VisitFloatIsInfinite(HInvoke* invoke) { diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc index ba006edfa2..bf85b1989e 100644 --- a/compiler/optimizing/intrinsics_mips.cc +++ b/compiler/optimizing/intrinsics_mips.cc @@ -2559,7 +2559,7 @@ void IntrinsicCodeGeneratorMIPS::VisitMathRoundFloat(HInvoke* invoke) { // void java.lang.String.getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin) void IntrinsicLocationsBuilderMIPS::VisitStringGetCharsNoCheck(HInvoke* invoke) { LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCallOnMainOnly, + LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); @@ -2567,17 +2567,9 @@ void IntrinsicLocationsBuilderMIPS::VisitStringGetCharsNoCheck(HInvoke* invoke) locations->SetInAt(3, Location::RequiresRegister()); locations->SetInAt(4, Location::RequiresRegister()); - // We will call memcpy() to do the actual work. Allocate the temporary - // registers to use the correct input registers, and output register. - // memcpy() uses the normal MIPS calling convention. - InvokeRuntimeCallingConvention calling_convention; - - locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(1))); - locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(2))); - - Location outLocation = calling_convention.GetReturnLocation(Primitive::kPrimInt); - locations->AddTemp(Location::RegisterLocation(outLocation.AsRegister<Register>())); + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); } void IntrinsicCodeGeneratorMIPS::VisitStringGetCharsNoCheck(HInvoke* invoke) { @@ -2596,16 +2588,11 @@ void IntrinsicCodeGeneratorMIPS::VisitStringGetCharsNoCheck(HInvoke* invoke) { Register dstBegin = locations->InAt(4).AsRegister<Register>(); Register dstPtr = locations->GetTemp(0).AsRegister<Register>(); - DCHECK_EQ(dstPtr, A0); Register srcPtr = locations->GetTemp(1).AsRegister<Register>(); - DCHECK_EQ(srcPtr, A1); Register numChrs = locations->GetTemp(2).AsRegister<Register>(); - DCHECK_EQ(numChrs, A2); - - Register dstReturn = locations->GetTemp(3).AsRegister<Register>(); - DCHECK_EQ(dstReturn, V0); MipsLabel done; + MipsLabel loop; // Location of data in char array buffer. const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value(); @@ -2634,7 +2621,7 @@ void IntrinsicCodeGeneratorMIPS::VisitStringGetCharsNoCheck(HInvoke* invoke) { __ LoadFromOffset(kLoadWord, TMP, srcObj, count_offset); __ Sll(TMP, TMP, 31); - // If string is uncompressed, use memcpy() path. + // If string is uncompressed, use uncompressed path. __ Bnez(TMP, &uncompressed_copy); // Copy loop for compressed src, copying 1 character (8-bit) to (16-bit) at a time. @@ -2660,10 +2647,13 @@ void IntrinsicCodeGeneratorMIPS::VisitStringGetCharsNoCheck(HInvoke* invoke) { __ Addu(srcPtr, srcPtr, AT); } - // Calculate number of bytes to copy from number of characters. - __ Sll(numChrs, numChrs, char_shift); - - codegen_->InvokeRuntime(kQuickMemcpy, invoke, invoke->GetDexPc(), nullptr); + __ Bind(&loop); + __ Lh(AT, srcPtr, 0); + __ Addiu(numChrs, numChrs, -1); + __ Addiu(srcPtr, srcPtr, char_size); + __ Sh(AT, dstPtr, 0); + __ Addiu(dstPtr, dstPtr, char_size); + __ Bnez(numChrs, &loop); __ Bind(&done); } diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc index 21c5074a1c..1ee89cf127 100644 --- a/compiler/optimizing/intrinsics_mips64.cc +++ b/compiler/optimizing/intrinsics_mips64.cc @@ -1895,7 +1895,7 @@ void IntrinsicCodeGeneratorMIPS64::VisitDoubleIsInfinite(HInvoke* invoke) { // void java.lang.String.getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin) void IntrinsicLocationsBuilderMIPS64::VisitStringGetCharsNoCheck(HInvoke* invoke) { LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCallOnMainOnly, + LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); @@ -1903,17 +1903,9 @@ void IntrinsicLocationsBuilderMIPS64::VisitStringGetCharsNoCheck(HInvoke* invoke locations->SetInAt(3, Location::RequiresRegister()); locations->SetInAt(4, Location::RequiresRegister()); - // We will call memcpy() to do the actual work. Allocate the temporary - // registers to use the correct input registers, and output register. - // memcpy() uses the normal MIPS calling conventions. - InvokeRuntimeCallingConvention calling_convention; - - locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(1))); - locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(2))); - - Location outLocation = calling_convention.GetReturnLocation(Primitive::kPrimLong); - locations->AddTemp(Location::RegisterLocation(outLocation.AsRegister<GpuRegister>())); + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); } void IntrinsicCodeGeneratorMIPS64::VisitStringGetCharsNoCheck(HInvoke* invoke) { @@ -1932,16 +1924,11 @@ void IntrinsicCodeGeneratorMIPS64::VisitStringGetCharsNoCheck(HInvoke* invoke) { GpuRegister dstBegin = locations->InAt(4).AsRegister<GpuRegister>(); GpuRegister dstPtr = locations->GetTemp(0).AsRegister<GpuRegister>(); - DCHECK_EQ(dstPtr, A0); GpuRegister srcPtr = locations->GetTemp(1).AsRegister<GpuRegister>(); - DCHECK_EQ(srcPtr, A1); GpuRegister numChrs = locations->GetTemp(2).AsRegister<GpuRegister>(); - DCHECK_EQ(numChrs, A2); - - GpuRegister dstReturn = locations->GetTemp(3).AsRegister<GpuRegister>(); - DCHECK_EQ(dstReturn, V0); Mips64Label done; + Mips64Label loop; // Location of data in char array buffer. const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value(); @@ -1965,7 +1952,7 @@ void IntrinsicCodeGeneratorMIPS64::VisitStringGetCharsNoCheck(HInvoke* invoke) { __ LoadFromOffset(kLoadWord, TMP, srcObj, count_offset); __ Dext(TMP, TMP, 0, 1); - // If string is uncompressed, use memcpy() path. + // If string is uncompressed, use uncompressed path. __ Bnezc(TMP, &uncompressed_copy); // Copy loop for compressed src, copying 1 character (8-bit) to (16-bit) at a time. @@ -1986,10 +1973,13 @@ void IntrinsicCodeGeneratorMIPS64::VisitStringGetCharsNoCheck(HInvoke* invoke) { __ Daddiu(srcPtr, srcObj, value_offset); __ Dlsa(srcPtr, srcBegin, srcPtr, char_shift); - // Calculate number of bytes to copy from number of characters. - __ Dsll(numChrs, numChrs, char_shift); - - codegen_->InvokeRuntime(kQuickMemcpy, invoke, invoke->GetDexPc(), nullptr); + __ Bind(&loop); + __ Lh(AT, srcPtr, 0); + __ Daddiu(numChrs, numChrs, -1); + __ Daddiu(srcPtr, srcPtr, char_size); + __ Sh(AT, dstPtr, 0); + __ Daddiu(dstPtr, dstPtr, char_size); + __ Bnezc(numChrs, &loop); __ Bind(&done); } diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h index 091b58a63d..6f0dbce2df 100644 --- a/compiler/optimizing/locations.h +++ b/compiler/optimizing/locations.h @@ -69,11 +69,13 @@ class Location : public ValueObject { // We do not use the value 9 because it conflicts with kLocationConstantMask. kDoNotUse9 = 9, + kSIMDStackSlot = 10, // 128bit stack slot. TODO: generalize with encoded #bytes? + // Unallocated location represents a location that is not fixed and can be // allocated by a register allocator. Each unallocated location has // a policy that specifies what kind of location is suitable. Payload // contains register allocation policy. - kUnallocated = 10, + kUnallocated = 11, }; Location() : ValueObject(), value_(kInvalid) { @@ -82,6 +84,7 @@ class Location : public ValueObject { static_assert((kUnallocated & kLocationConstantMask) != kConstant, "TagError"); static_assert((kStackSlot & kLocationConstantMask) != kConstant, "TagError"); static_assert((kDoubleStackSlot & kLocationConstantMask) != kConstant, "TagError"); + static_assert((kSIMDStackSlot & kLocationConstantMask) != kConstant, "TagError"); static_assert((kRegister & kLocationConstantMask) != kConstant, "TagError"); static_assert((kFpuRegister & kLocationConstantMask) != kConstant, "TagError"); static_assert((kRegisterPair & kLocationConstantMask) != kConstant, "TagError"); @@ -266,8 +269,20 @@ class Location : public ValueObject { return GetKind() == kDoubleStackSlot; } + static Location SIMDStackSlot(intptr_t stack_index) { + uintptr_t payload = EncodeStackIndex(stack_index); + Location loc(kSIMDStackSlot, payload); + // Ensure that sign is preserved. + DCHECK_EQ(loc.GetStackIndex(), stack_index); + return loc; + } + + bool IsSIMDStackSlot() const { + return GetKind() == kSIMDStackSlot; + } + intptr_t GetStackIndex() const { - DCHECK(IsStackSlot() || IsDoubleStackSlot()); + DCHECK(IsStackSlot() || IsDoubleStackSlot() || IsSIMDStackSlot()); // Decode stack index manually to preserve sign. return GetPayload() - kStackIndexBias; } @@ -315,6 +330,7 @@ class Location : public ValueObject { case kRegister: return "R"; case kStackSlot: return "S"; case kDoubleStackSlot: return "DS"; + case kSIMDStackSlot: return "SIMD"; case kUnallocated: return "U"; case kConstant: return "C"; case kFpuRegister: return "F"; @@ -417,6 +433,7 @@ std::ostream& operator<<(std::ostream& os, const Location::Policy& rhs); class RegisterSet : public ValueObject { public: static RegisterSet Empty() { return RegisterSet(); } + static RegisterSet AllFpu() { return RegisterSet(0, -1); } void Add(Location loc) { if (loc.IsRegister()) { @@ -462,6 +479,7 @@ class RegisterSet : public ValueObject { private: RegisterSet() : core_registers_(0), floating_point_registers_(0) {} + RegisterSet(uint32_t core, uint32_t fp) : core_registers_(core), floating_point_registers_(fp) {} uint32_t core_registers_; uint32_t floating_point_registers_; diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index 020e4463d4..ec706e6694 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -2046,6 +2046,9 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { if (HasTryCatch()) { outer_graph->SetHasTryCatch(true); } + if (HasSIMD()) { + outer_graph->SetHasSIMD(true); + } HInstruction* return_value = nullptr; if (GetBlocks().size() == 3) { diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 542b218cf8..6881d8f6ae 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -323,6 +323,7 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { temporaries_vreg_slots_(0), has_bounds_checks_(false), has_try_catch_(false), + has_simd_(false), has_loops_(false), has_irreducible_loops_(false), debuggable_(debuggable), @@ -560,6 +561,9 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { bool HasTryCatch() const { return has_try_catch_; } void SetHasTryCatch(bool value) { has_try_catch_ = value; } + bool HasSIMD() const { return has_simd_; } + void SetHasSIMD(bool value) { has_simd_ = value; } + bool HasLoops() const { return has_loops_; } void SetHasLoops(bool value) { has_loops_ = value; } @@ -652,6 +656,11 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { // false positives. bool has_try_catch_; + // Flag whether SIMD instructions appear in the graph. If true, the + // code generators may have to be more careful spilling the wider + // contents of SIMD registers. + bool has_simd_; + // Flag whether there are any loops in the graph. We can skip loop // optimization if it's false. It's only best effort to keep it up // to date in the presence of code elimination so there might be false diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index 23ccd9e953..3c6d2d64a9 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -499,7 +499,8 @@ static HOptimization* BuildOptimization( handles, stats, number_of_dex_registers, - /* depth */ 0); + /* total_number_of_instructions */ 0, + /* parent */ nullptr); } else if (opt_name == HSharpening::kSharpeningPassName) { return new (arena) HSharpening(graph, codegen, dex_compilation_unit, driver, handles); } else if (opt_name == HSelectGenerator::kSelectGeneratorPassName) { @@ -607,8 +608,7 @@ void OptimizingCompiler::MaybeRunInliner(HGraph* graph, VariableSizedHandleScope* handles) const { OptimizingCompilerStats* stats = compilation_stats_.get(); const CompilerOptions& compiler_options = driver->GetCompilerOptions(); - bool should_inline = (compiler_options.GetInlineDepthLimit() > 0) - && (compiler_options.GetInlineMaxCodeUnits() > 0); + bool should_inline = (compiler_options.GetInlineMaxCodeUnits() > 0); if (!should_inline) { return; } @@ -623,7 +623,8 @@ void OptimizingCompiler::MaybeRunInliner(HGraph* graph, handles, stats, number_of_dex_registers, - /* depth */ 0); + /* total_number_of_instructions */ 0, + /* parent */ nullptr); HOptimization* optimizations[] = { inliner }; RunOptimizations(optimizations, arraysize(optimizations), pass_observer); diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h index ae9a8119a7..a211c5472a 100644 --- a/compiler/optimizing/optimizing_compiler_stats.h +++ b/compiler/optimizing/optimizing_compiler_stats.h @@ -69,6 +69,23 @@ enum MethodCompilationStat { kExplicitNullCheckGenerated, kSimplifyIf, kInstructionSunk, + kNotInlinedUnresolvedEntrypoint, + kNotInlinedDexCache, + kNotInlinedStackMaps, + kNotInlinedEnvironmentBudget, + kNotInlinedInstructionBudget, + kNotInlinedLoopWithoutExit, + kNotInlinedIrreducibleLoop, + kNotInlinedAlwaysThrows, + kNotInlinedInfiniteLoop, + kNotInlinedTryCatch, + kNotInlinedRegisterAllocator, + kNotInlinedCannotBuild, + kNotInlinedNotVerified, + kNotInlinedCodeItem, + kNotInlinedWont, + kNotInlinedRecursiveBudget, + kNotInlinedProxy, kLastStat }; @@ -168,6 +185,23 @@ class OptimizingCompilerStats { case kExplicitNullCheckGenerated: name = "ExplicitNullCheckGenerated"; break; case kSimplifyIf: name = "SimplifyIf"; break; case kInstructionSunk: name = "InstructionSunk"; break; + case kNotInlinedUnresolvedEntrypoint: name = "NotInlinedUnresolvedEntrypoint"; break; + case kNotInlinedDexCache: name = "NotInlinedDexCache"; break; + case kNotInlinedStackMaps: name = "NotInlinedStackMaps"; break; + case kNotInlinedEnvironmentBudget: name = "NotInlinedEnvironmentBudget"; break; + case kNotInlinedInstructionBudget: name = "NotInlinedInstructionBudget"; break; + case kNotInlinedLoopWithoutExit: name = "NotInlinedLoopWithoutExit"; break; + case kNotInlinedIrreducibleLoop: name = "NotInlinedIrreducibleLoop"; break; + case kNotInlinedAlwaysThrows: name = "NotInlinedAlwaysThrows"; break; + case kNotInlinedInfiniteLoop: name = "NotInlinedInfiniteLoop"; break; + case kNotInlinedTryCatch: name = "NotInlinedTryCatch"; break; + case kNotInlinedRegisterAllocator: name = "NotInlinedRegisterAllocator"; break; + case kNotInlinedCannotBuild: name = "NotInlinedCannotBuild"; break; + case kNotInlinedNotVerified: name = "NotInlinedNotVerified"; break; + case kNotInlinedCodeItem: name = "NotInlinedCodeItem"; break; + case kNotInlinedWont: name = "NotInlinedWont"; break; + case kNotInlinedRecursiveBudget: name = "NotInlinedRecursiveBudget"; break; + case kNotInlinedProxy: name = "NotInlinedProxy"; break; case kLastStat: LOG(FATAL) << "invalid stat " diff --git a/compiler/optimizing/register_allocation_resolver.cc b/compiler/optimizing/register_allocation_resolver.cc index 8a9c1ccaff..c6a0b6a0d2 100644 --- a/compiler/optimizing/register_allocation_resolver.cc +++ b/compiler/optimizing/register_allocation_resolver.cc @@ -299,11 +299,14 @@ void RegisterAllocationResolver::ConnectSiblings(LiveInterval* interval) { // Currently, we spill unconditionnally the current method in the code generators. && !interval->GetDefinedBy()->IsCurrentMethod()) { // We spill eagerly, so move must be at definition. - InsertMoveAfter(interval->GetDefinedBy(), - interval->ToLocation(), - interval->NeedsTwoSpillSlots() - ? Location::DoubleStackSlot(interval->GetParent()->GetSpillSlot()) - : Location::StackSlot(interval->GetParent()->GetSpillSlot())); + Location loc; + switch (interval->NumberOfSpillSlotsNeeded()) { + case 1: loc = Location::StackSlot(interval->GetParent()->GetSpillSlot()); break; + case 2: loc = Location::DoubleStackSlot(interval->GetParent()->GetSpillSlot()); break; + case 4: loc = Location::SIMDStackSlot(interval->GetParent()->GetSpillSlot()); break; + default: LOG(FATAL) << "Unexpected number of spill slots"; UNREACHABLE(); + } + InsertMoveAfter(interval->GetDefinedBy(), interval->ToLocation(), loc); } UsePosition* use = current->GetFirstUse(); EnvUsePosition* env_use = current->GetFirstEnvironmentUse(); @@ -459,9 +462,12 @@ void RegisterAllocationResolver::ConnectSplitSiblings(LiveInterval* interval, location_source = defined_by->GetLocations()->Out(); } else { DCHECK(defined_by->IsCurrentMethod()); - location_source = parent->NeedsTwoSpillSlots() - ? Location::DoubleStackSlot(parent->GetSpillSlot()) - : Location::StackSlot(parent->GetSpillSlot()); + switch (parent->NumberOfSpillSlotsNeeded()) { + case 1: location_source = Location::StackSlot(parent->GetSpillSlot()); break; + case 2: location_source = Location::DoubleStackSlot(parent->GetSpillSlot()); break; + case 4: location_source = Location::SIMDStackSlot(parent->GetSpillSlot()); break; + default: LOG(FATAL) << "Unexpected number of spill slots"; UNREACHABLE(); + } } } else { DCHECK(source != nullptr); @@ -492,7 +498,8 @@ static bool IsValidDestination(Location destination) { || destination.IsFpuRegister() || destination.IsFpuRegisterPair() || destination.IsStackSlot() - || destination.IsDoubleStackSlot(); + || destination.IsDoubleStackSlot() + || destination.IsSIMDStackSlot(); } void RegisterAllocationResolver::AddMove(HParallelMove* move, diff --git a/compiler/optimizing/register_allocator_graph_color.cc b/compiler/optimizing/register_allocator_graph_color.cc index 9064f865c3..87f709f63d 100644 --- a/compiler/optimizing/register_allocator_graph_color.cc +++ b/compiler/optimizing/register_allocator_graph_color.cc @@ -1029,7 +1029,7 @@ void RegisterAllocatorGraphColor::AllocateSpillSlotForCatchPhi(HInstruction* ins interval->SetSpillSlot(previous_phi->GetLiveInterval()->GetSpillSlot()); } else { interval->SetSpillSlot(catch_phi_spill_slot_counter_); - catch_phi_spill_slot_counter_ += interval->NeedsTwoSpillSlots() ? 2 : 1; + catch_phi_spill_slot_counter_ += interval->NumberOfSpillSlotsNeeded(); } } } @@ -1996,43 +1996,48 @@ void RegisterAllocatorGraphColor::ColorSpillSlots(ArenaVector<LiveInterval*>* in bool is_interval_beginning; size_t position; std::tie(position, is_interval_beginning, parent_interval) = *it; - - bool needs_two_slots = parent_interval->NeedsTwoSpillSlots(); + size_t number_of_spill_slots_needed = parent_interval->NumberOfSpillSlotsNeeded(); if (is_interval_beginning) { DCHECK(!parent_interval->HasSpillSlot()); DCHECK_EQ(position, parent_interval->GetStart()); - // Find a free stack slot. + // Find first available free stack slot(s). size_t slot = 0; - for (; taken.IsBitSet(slot) || (needs_two_slots && taken.IsBitSet(slot + 1)); ++slot) { - // Skip taken slots. + for (; ; ++slot) { + bool found = true; + for (size_t s = slot, u = slot + number_of_spill_slots_needed; s < u; s++) { + if (taken.IsBitSet(s)) { + found = false; + break; // failure + } + } + if (found) { + break; // success + } } + parent_interval->SetSpillSlot(slot); - *num_stack_slots_used = std::max(*num_stack_slots_used, - needs_two_slots ? slot + 1 : slot + 2); - if (needs_two_slots && *num_stack_slots_used % 2 != 0) { + *num_stack_slots_used = std::max(*num_stack_slots_used, slot + number_of_spill_slots_needed); + if (number_of_spill_slots_needed > 1 && *num_stack_slots_used % 2 != 0) { // The parallel move resolver requires that there be an even number of spill slots // allocated for pair value types. ++(*num_stack_slots_used); } - taken.SetBit(slot); - if (needs_two_slots) { - taken.SetBit(slot + 1); + for (size_t s = slot, u = slot + number_of_spill_slots_needed; s < u; s++) { + taken.SetBit(s); } } else { DCHECK_EQ(position, parent_interval->GetLastSibling()->GetEnd()); DCHECK(parent_interval->HasSpillSlot()); - // Free up the stack slot used by this interval. + // Free up the stack slot(s) used by this interval. size_t slot = parent_interval->GetSpillSlot(); - DCHECK(taken.IsBitSet(slot)); - DCHECK(!needs_two_slots || taken.IsBitSet(slot + 1)); - taken.ClearBit(slot); - if (needs_two_slots) { - taken.ClearBit(slot + 1); + for (size_t s = slot, u = slot + number_of_spill_slots_needed; s < u; s++) { + DCHECK(taken.IsBitSet(s)); + taken.ClearBit(s); } } } diff --git a/compiler/optimizing/register_allocator_linear_scan.cc b/compiler/optimizing/register_allocator_linear_scan.cc index 6354e76ec8..ab8d540359 100644 --- a/compiler/optimizing/register_allocator_linear_scan.cc +++ b/compiler/optimizing/register_allocator_linear_scan.cc @@ -1125,36 +1125,31 @@ void RegisterAllocatorLinearScan::AllocateSpillSlotFor(LiveInterval* interval) { LOG(FATAL) << "Unexpected type for interval " << interval->GetType(); } - // Find an available spill slot. + // Find first available spill slots. + size_t number_of_spill_slots_needed = parent->NumberOfSpillSlotsNeeded(); size_t slot = 0; for (size_t e = spill_slots->size(); slot < e; ++slot) { - if ((*spill_slots)[slot] <= parent->GetStart()) { - if (!parent->NeedsTwoSpillSlots()) { - // One spill slot is sufficient. - break; - } - if (slot == e - 1 || (*spill_slots)[slot + 1] <= parent->GetStart()) { - // Two spill slots are available. + bool found = true; + for (size_t s = slot, u = std::min(slot + number_of_spill_slots_needed, e); s < u; s++) { + if ((*spill_slots)[s] > parent->GetStart()) { + found = false; // failure break; } } + if (found) { + break; // success + } } + // Need new spill slots? + size_t upper = slot + number_of_spill_slots_needed; + if (upper > spill_slots->size()) { + spill_slots->resize(upper); + } + // Set slots to end. size_t end = interval->GetLastSibling()->GetEnd(); - if (parent->NeedsTwoSpillSlots()) { - if (slot + 2u > spill_slots->size()) { - // We need a new spill slot. - spill_slots->resize(slot + 2u, end); - } - (*spill_slots)[slot] = end; - (*spill_slots)[slot + 1] = end; - } else { - if (slot == spill_slots->size()) { - // We need a new spill slot. - spill_slots->push_back(end); - } else { - (*spill_slots)[slot] = end; - } + for (size_t s = slot; s < upper; s++) { + (*spill_slots)[s] = end; } // Note that the exact spill slot location will be computed when we resolve, @@ -1180,7 +1175,7 @@ void RegisterAllocatorLinearScan::AllocateSpillSlotForCatchPhi(HPhi* phi) { // TODO: Reuse spill slots when intervals of phis from different catch // blocks do not overlap. interval->SetSpillSlot(catch_phi_spill_slots_); - catch_phi_spill_slots_ += interval->NeedsTwoSpillSlots() ? 2 : 1; + catch_phi_spill_slots_ += interval->NumberOfSpillSlotsNeeded(); } } diff --git a/compiler/optimizing/sharpening.cc b/compiler/optimizing/sharpening.cc index 7bd38c7a8c..eedaf6e67e 100644 --- a/compiler/optimizing/sharpening.cc +++ b/compiler/optimizing/sharpening.cc @@ -259,7 +259,7 @@ void HSharpening::ProcessLoadString(HLoadString* load_string) { } else if (runtime->UseJitCompilation()) { // TODO: Make sure we don't set the "compile PIC" flag for JIT as that's bogus. // DCHECK(!codegen_->GetCompilerOptions().GetCompilePic()); - string = class_linker->LookupString(dex_file, string_index, dex_cache); + string = class_linker->LookupString(dex_file, string_index, dex_cache.Get()); if (string != nullptr) { if (runtime->GetHeap()->ObjectIsInBootImageSpace(string)) { desired_load_kind = HLoadString::LoadKind::kBootImageAddress; @@ -271,7 +271,7 @@ void HSharpening::ProcessLoadString(HLoadString* load_string) { } } else { // AOT app compilation. Try to lookup the string without allocating if not found. - string = class_linker->LookupString(dex_file, string_index, dex_cache); + string = class_linker->LookupString(dex_file, string_index, dex_cache.Get()); if (string != nullptr && runtime->GetHeap()->ObjectIsInBootImageSpace(string) && !codegen_->GetCompilerOptions().GetCompilePic()) { diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc index e8e12e1a55..36ee5a903a 100644 --- a/compiler/optimizing/ssa_liveness_analysis.cc +++ b/compiler/optimizing/ssa_liveness_analysis.cc @@ -469,8 +469,10 @@ bool LiveInterval::SameRegisterKind(Location other) const { } } -bool LiveInterval::NeedsTwoSpillSlots() const { - return type_ == Primitive::kPrimLong || type_ == Primitive::kPrimDouble; +size_t LiveInterval::NumberOfSpillSlotsNeeded() const { + // TODO: detect vector operation. + // Return number of needed spill slots based on type. + return (type_ == Primitive::kPrimLong || type_ == Primitive::kPrimDouble) ? 2 : 1; } Location LiveInterval::ToLocation() const { @@ -494,10 +496,11 @@ Location LiveInterval::ToLocation() const { if (defined_by->IsConstant()) { return defined_by->GetLocations()->Out(); } else if (GetParent()->HasSpillSlot()) { - if (NeedsTwoSpillSlots()) { - return Location::DoubleStackSlot(GetParent()->GetSpillSlot()); - } else { - return Location::StackSlot(GetParent()->GetSpillSlot()); + switch (NumberOfSpillSlotsNeeded()) { + case 1: return Location::StackSlot(GetParent()->GetSpillSlot()); + case 2: return Location::DoubleStackSlot(GetParent()->GetSpillSlot()); + case 4: return Location::SIMDStackSlot(GetParent()->GetSpillSlot()); + default: LOG(FATAL) << "Unexpected number of spill slots"; UNREACHABLE(); } } else { return Location(); diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h index 340d0ccefe..e9dffc1fac 100644 --- a/compiler/optimizing/ssa_liveness_analysis.h +++ b/compiler/optimizing/ssa_liveness_analysis.h @@ -762,9 +762,9 @@ class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> { // Returns kNoRegister otherwise. int FindHintAtDefinition() const; - // Returns whether the interval needs two (Dex virtual register size `kVRegSize`) - // slots for spilling. - bool NeedsTwoSpillSlots() const; + // Returns the number of required spilling slots (measured as a multiple of the + // Dex virtual register size `kVRegSize`). + size_t NumberOfSpillSlotsNeeded() const; bool IsFloatingPoint() const { return type_ == Primitive::kPrimFloat || type_ == Primitive::kPrimDouble; diff --git a/compiler/utils/jni_macro_assembler.cc b/compiler/utils/jni_macro_assembler.cc index 2f154fb862..3ac6c3ca7a 100644 --- a/compiler/utils/jni_macro_assembler.cc +++ b/compiler/utils/jni_macro_assembler.cc @@ -84,7 +84,11 @@ template <> MacroAsm64UniquePtr JNIMacroAssembler<PointerSize::k64>::Create( ArenaAllocator* arena, InstructionSet instruction_set, - const InstructionSetFeatures* instruction_set_features ATTRIBUTE_UNUSED) { + const InstructionSetFeatures* instruction_set_features) { +#ifndef ART_ENABLE_CODEGEN_mips64 + UNUSED(instruction_set_features); +#endif + switch (instruction_set) { #ifdef ART_ENABLE_CODEGEN_arm64 case kArm64: @@ -92,7 +96,11 @@ MacroAsm64UniquePtr JNIMacroAssembler<PointerSize::k64>::Create( #endif #ifdef ART_ENABLE_CODEGEN_mips64 case kMips64: - return MacroAsm64UniquePtr(new (arena) mips64::Mips64Assembler(arena)); + return MacroAsm64UniquePtr(new (arena) mips64::Mips64Assembler( + arena, + instruction_set_features != nullptr + ? instruction_set_features->AsMips64InstructionSetFeatures() + : nullptr)); #endif #ifdef ART_ENABLE_CODEGEN_x86_64 case kX86_64: diff --git a/compiler/utils/mips64/assembler_mips64.cc b/compiler/utils/mips64/assembler_mips64.cc index 4e7f635246..8a5ae754df 100644 --- a/compiler/utils/mips64/assembler_mips64.cc +++ b/compiler/utils/mips64/assembler_mips64.cc @@ -1180,373 +1180,456 @@ void Mips64Assembler::Not(GpuRegister rd, GpuRegister rs) { Nor(rd, rs, ZERO); } -// TODO: Check for MSA presence in Mips64InstructionSetFeatures for each MSA instruction. - void Mips64Assembler::AndV(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); EmitMsa3R(0x0, 0x0, wt, ws, wd, 0x1e); } void Mips64Assembler::OrV(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); EmitMsa3R(0x0, 0x1, wt, ws, wd, 0x1e); } void Mips64Assembler::NorV(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); EmitMsa3R(0x0, 0x2, wt, ws, wd, 0x1e); } void Mips64Assembler::XorV(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); EmitMsa3R(0x0, 0x3, wt, ws, wd, 0x1e); } void Mips64Assembler::AddvB(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); EmitMsa3R(0x0, 0x0, wt, ws, wd, 0xe); } void Mips64Assembler::AddvH(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); EmitMsa3R(0x0, 0x1, wt, ws, wd, 0xe); } void Mips64Assembler::AddvW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); EmitMsa3R(0x0, 0x2, wt, ws, wd, 0xe); } void Mips64Assembler::AddvD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); EmitMsa3R(0x0, 0x3, wt, ws, wd, 0xe); } void Mips64Assembler::SubvB(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); EmitMsa3R(0x1, 0x0, wt, ws, wd, 0xe); } void Mips64Assembler::SubvH(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); EmitMsa3R(0x1, 0x1, wt, ws, wd, 0xe); } void Mips64Assembler::SubvW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); EmitMsa3R(0x1, 0x2, wt, ws, wd, 0xe); } void Mips64Assembler::SubvD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); EmitMsa3R(0x1, 0x3, wt, ws, wd, 0xe); } void Mips64Assembler::MulvB(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); EmitMsa3R(0x0, 0x0, wt, ws, wd, 0x12); } void Mips64Assembler::MulvH(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); EmitMsa3R(0x0, 0x1, wt, ws, wd, 0x12); } void Mips64Assembler::MulvW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); EmitMsa3R(0x0, 0x2, wt, ws, wd, 0x12); } void Mips64Assembler::MulvD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); EmitMsa3R(0x0, 0x3, wt, ws, wd, 0x12); } void Mips64Assembler::Div_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); EmitMsa3R(0x4, 0x0, wt, ws, wd, 0x12); } void Mips64Assembler::Div_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); EmitMsa3R(0x4, 0x1, wt, ws, wd, 0x12); } void Mips64Assembler::Div_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); EmitMsa3R(0x4, 0x2, wt, ws, wd, 0x12); } void Mips64Assembler::Div_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); EmitMsa3R(0x4, 0x3, wt, ws, wd, 0x12); } void Mips64Assembler::Div_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); EmitMsa3R(0x5, 0x0, wt, ws, wd, 0x12); } void Mips64Assembler::Div_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); EmitMsa3R(0x5, 0x1, wt, ws, wd, 0x12); } void Mips64Assembler::Div_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); EmitMsa3R(0x5, 0x2, wt, ws, wd, 0x12); } void Mips64Assembler::Div_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); EmitMsa3R(0x5, 0x3, wt, ws, wd, 0x12); } void Mips64Assembler::Mod_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); EmitMsa3R(0x6, 0x0, wt, ws, wd, 0x12); } void Mips64Assembler::Mod_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); EmitMsa3R(0x6, 0x1, wt, ws, wd, 0x12); } void Mips64Assembler::Mod_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); EmitMsa3R(0x6, 0x2, wt, ws, wd, 0x12); } void Mips64Assembler::Mod_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); EmitMsa3R(0x6, 0x3, wt, ws, wd, 0x12); } void Mips64Assembler::Mod_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); EmitMsa3R(0x7, 0x0, wt, ws, wd, 0x12); } void Mips64Assembler::Mod_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); EmitMsa3R(0x7, 0x1, wt, ws, wd, 0x12); } void Mips64Assembler::Mod_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); EmitMsa3R(0x7, 0x2, wt, ws, wd, 0x12); } void Mips64Assembler::Mod_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); EmitMsa3R(0x7, 0x3, wt, ws, wd, 0x12); } void Mips64Assembler::FaddW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); EmitMsa3R(0x0, 0x0, wt, ws, wd, 0x1b); } void Mips64Assembler::FaddD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); EmitMsa3R(0x0, 0x1, wt, ws, wd, 0x1b); } void Mips64Assembler::FsubW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); EmitMsa3R(0x0, 0x2, wt, ws, wd, 0x1b); } void Mips64Assembler::FsubD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); EmitMsa3R(0x0, 0x3, wt, ws, wd, 0x1b); } void Mips64Assembler::FmulW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); EmitMsa3R(0x1, 0x0, wt, ws, wd, 0x1b); } void Mips64Assembler::FmulD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); EmitMsa3R(0x1, 0x1, wt, ws, wd, 0x1b); } void Mips64Assembler::FdivW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); EmitMsa3R(0x1, 0x2, wt, ws, wd, 0x1b); } void Mips64Assembler::FdivD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); EmitMsa3R(0x1, 0x3, wt, ws, wd, 0x1b); } void Mips64Assembler::Ffint_sW(VectorRegister wd, VectorRegister ws) { + CHECK(HasMsa()); EmitMsa2RF(0x19e, 0x0, ws, wd, 0x1e); } void Mips64Assembler::Ffint_sD(VectorRegister wd, VectorRegister ws) { + CHECK(HasMsa()); EmitMsa2RF(0x19e, 0x1, ws, wd, 0x1e); } void Mips64Assembler::Ftint_sW(VectorRegister wd, VectorRegister ws) { + CHECK(HasMsa()); EmitMsa2RF(0x19c, 0x0, ws, wd, 0x1e); } void Mips64Assembler::Ftint_sD(VectorRegister wd, VectorRegister ws) { + CHECK(HasMsa()); EmitMsa2RF(0x19c, 0x1, ws, wd, 0x1e); } void Mips64Assembler::SllB(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); EmitMsa3R(0x0, 0x0, wt, ws, wd, 0xd); } void Mips64Assembler::SllH(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); EmitMsa3R(0x0, 0x1, wt, ws, wd, 0xd); } void Mips64Assembler::SllW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); EmitMsa3R(0x0, 0x2, wt, ws, wd, 0xd); } void Mips64Assembler::SllD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); EmitMsa3R(0x0, 0x3, wt, ws, wd, 0xd); } void Mips64Assembler::SraB(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); EmitMsa3R(0x1, 0x0, wt, ws, wd, 0xd); } void Mips64Assembler::SraH(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); EmitMsa3R(0x1, 0x1, wt, ws, wd, 0xd); } void Mips64Assembler::SraW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); EmitMsa3R(0x1, 0x2, wt, ws, wd, 0xd); } void Mips64Assembler::SraD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); EmitMsa3R(0x1, 0x3, wt, ws, wd, 0xd); } void Mips64Assembler::SrlB(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); EmitMsa3R(0x2, 0x0, wt, ws, wd, 0xd); } void Mips64Assembler::SrlH(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); EmitMsa3R(0x2, 0x1, wt, ws, wd, 0xd); } void Mips64Assembler::SrlW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); EmitMsa3R(0x2, 0x2, wt, ws, wd, 0xd); } void Mips64Assembler::SrlD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); EmitMsa3R(0x2, 0x3, wt, ws, wd, 0xd); } void Mips64Assembler::SlliB(VectorRegister wd, VectorRegister ws, int shamt3) { + CHECK(HasMsa()); CHECK(IsUint<3>(shamt3)) << shamt3; EmitMsaBIT(0x0, shamt3 | kMsaDfMByteMask, ws, wd, 0x9); } void Mips64Assembler::SlliH(VectorRegister wd, VectorRegister ws, int shamt4) { + CHECK(HasMsa()); CHECK(IsUint<4>(shamt4)) << shamt4; EmitMsaBIT(0x0, shamt4 | kMsaDfMHalfwordMask, ws, wd, 0x9); } void Mips64Assembler::SlliW(VectorRegister wd, VectorRegister ws, int shamt5) { + CHECK(HasMsa()); CHECK(IsUint<5>(shamt5)) << shamt5; EmitMsaBIT(0x0, shamt5 | kMsaDfMWordMask, ws, wd, 0x9); } void Mips64Assembler::SlliD(VectorRegister wd, VectorRegister ws, int shamt6) { + CHECK(HasMsa()); CHECK(IsUint<6>(shamt6)) << shamt6; EmitMsaBIT(0x0, shamt6 | kMsaDfMDoublewordMask, ws, wd, 0x9); } void Mips64Assembler::SraiB(VectorRegister wd, VectorRegister ws, int shamt3) { + CHECK(HasMsa()); CHECK(IsUint<3>(shamt3)) << shamt3; EmitMsaBIT(0x1, shamt3 | kMsaDfMByteMask, ws, wd, 0x9); } void Mips64Assembler::SraiH(VectorRegister wd, VectorRegister ws, int shamt4) { + CHECK(HasMsa()); CHECK(IsUint<4>(shamt4)) << shamt4; EmitMsaBIT(0x1, shamt4 | kMsaDfMHalfwordMask, ws, wd, 0x9); } void Mips64Assembler::SraiW(VectorRegister wd, VectorRegister ws, int shamt5) { + CHECK(HasMsa()); CHECK(IsUint<5>(shamt5)) << shamt5; EmitMsaBIT(0x1, shamt5 | kMsaDfMWordMask, ws, wd, 0x9); } void Mips64Assembler::SraiD(VectorRegister wd, VectorRegister ws, int shamt6) { + CHECK(HasMsa()); CHECK(IsUint<6>(shamt6)) << shamt6; EmitMsaBIT(0x1, shamt6 | kMsaDfMDoublewordMask, ws, wd, 0x9); } void Mips64Assembler::SrliB(VectorRegister wd, VectorRegister ws, int shamt3) { + CHECK(HasMsa()); CHECK(IsUint<3>(shamt3)) << shamt3; EmitMsaBIT(0x2, shamt3 | kMsaDfMByteMask, ws, wd, 0x9); } void Mips64Assembler::SrliH(VectorRegister wd, VectorRegister ws, int shamt4) { + CHECK(HasMsa()); CHECK(IsUint<4>(shamt4)) << shamt4; EmitMsaBIT(0x2, shamt4 | kMsaDfMHalfwordMask, ws, wd, 0x9); } void Mips64Assembler::SrliW(VectorRegister wd, VectorRegister ws, int shamt5) { + CHECK(HasMsa()); CHECK(IsUint<5>(shamt5)) << shamt5; EmitMsaBIT(0x2, shamt5 | kMsaDfMWordMask, ws, wd, 0x9); } void Mips64Assembler::SrliD(VectorRegister wd, VectorRegister ws, int shamt6) { + CHECK(HasMsa()); CHECK(IsUint<6>(shamt6)) << shamt6; EmitMsaBIT(0x2, shamt6 | kMsaDfMDoublewordMask, ws, wd, 0x9); } void Mips64Assembler::MoveV(VectorRegister wd, VectorRegister ws) { + CHECK(HasMsa()); EmitMsaBIT(0x1, 0x3e, ws, wd, 0x19); } void Mips64Assembler::SplatiB(VectorRegister wd, VectorRegister ws, int n4) { + CHECK(HasMsa()); CHECK(IsUint<4>(n4)) << n4; EmitMsaELM(0x1, n4 | kMsaDfNByteMask, ws, wd, 0x19); } void Mips64Assembler::SplatiH(VectorRegister wd, VectorRegister ws, int n3) { + CHECK(HasMsa()); CHECK(IsUint<3>(n3)) << n3; EmitMsaELM(0x1, n3 | kMsaDfNHalfwordMask, ws, wd, 0x19); } void Mips64Assembler::SplatiW(VectorRegister wd, VectorRegister ws, int n2) { + CHECK(HasMsa()); CHECK(IsUint<2>(n2)) << n2; EmitMsaELM(0x1, n2 | kMsaDfNWordMask, ws, wd, 0x19); } void Mips64Assembler::SplatiD(VectorRegister wd, VectorRegister ws, int n1) { + CHECK(HasMsa()); CHECK(IsUint<1>(n1)) << n1; EmitMsaELM(0x1, n1 | kMsaDfNDoublewordMask, ws, wd, 0x19); } void Mips64Assembler::FillB(VectorRegister wd, GpuRegister rs) { + CHECK(HasMsa()); EmitMsa2R(0xc0, 0x0, static_cast<VectorRegister>(rs), wd, 0x1e); } void Mips64Assembler::FillH(VectorRegister wd, GpuRegister rs) { + CHECK(HasMsa()); EmitMsa2R(0xc0, 0x1, static_cast<VectorRegister>(rs), wd, 0x1e); } void Mips64Assembler::FillW(VectorRegister wd, GpuRegister rs) { + CHECK(HasMsa()); EmitMsa2R(0xc0, 0x2, static_cast<VectorRegister>(rs), wd, 0x1e); } void Mips64Assembler::FillD(VectorRegister wd, GpuRegister rs) { + CHECK(HasMsa()); EmitMsa2R(0xc0, 0x3, static_cast<VectorRegister>(rs), wd, 0x1e); } void Mips64Assembler::LdB(VectorRegister wd, GpuRegister rs, int offset) { + CHECK(HasMsa()); CHECK(IsInt<10>(offset)) << offset; EmitMsaMI10(offset & kMsaS10Mask, rs, wd, 0x8, 0x0); } void Mips64Assembler::LdH(VectorRegister wd, GpuRegister rs, int offset) { + CHECK(HasMsa()); CHECK(IsInt<11>(offset)) << offset; CHECK_ALIGNED(offset, kMips64HalfwordSize); EmitMsaMI10((offset >> TIMES_2) & kMsaS10Mask, rs, wd, 0x8, 0x1); } void Mips64Assembler::LdW(VectorRegister wd, GpuRegister rs, int offset) { + CHECK(HasMsa()); CHECK(IsInt<12>(offset)) << offset; CHECK_ALIGNED(offset, kMips64WordSize); EmitMsaMI10((offset >> TIMES_4) & kMsaS10Mask, rs, wd, 0x8, 0x2); } void Mips64Assembler::LdD(VectorRegister wd, GpuRegister rs, int offset) { + CHECK(HasMsa()); CHECK(IsInt<13>(offset)) << offset; CHECK_ALIGNED(offset, kMips64DoublewordSize); EmitMsaMI10((offset >> TIMES_8) & kMsaS10Mask, rs, wd, 0x8, 0x3); } void Mips64Assembler::StB(VectorRegister wd, GpuRegister rs, int offset) { + CHECK(HasMsa()); CHECK(IsInt<10>(offset)) << offset; EmitMsaMI10(offset & kMsaS10Mask, rs, wd, 0x9, 0x0); } void Mips64Assembler::StH(VectorRegister wd, GpuRegister rs, int offset) { + CHECK(HasMsa()); CHECK(IsInt<11>(offset)) << offset; CHECK_ALIGNED(offset, kMips64HalfwordSize); EmitMsaMI10((offset >> TIMES_2) & kMsaS10Mask, rs, wd, 0x9, 0x1); } void Mips64Assembler::StW(VectorRegister wd, GpuRegister rs, int offset) { + CHECK(HasMsa()); CHECK(IsInt<12>(offset)) << offset; CHECK_ALIGNED(offset, kMips64WordSize); EmitMsaMI10((offset >> TIMES_4) & kMsaS10Mask, rs, wd, 0x9, 0x2); } void Mips64Assembler::StD(VectorRegister wd, GpuRegister rs, int offset) { + CHECK(HasMsa()); CHECK(IsInt<13>(offset)) << offset; CHECK_ALIGNED(offset, kMips64DoublewordSize); EmitMsaMI10((offset >> TIMES_8) & kMsaS10Mask, rs, wd, 0x9, 0x3); diff --git a/compiler/utils/mips64/assembler_mips64.h b/compiler/utils/mips64/assembler_mips64.h index f42c1626df..a8035b6da4 100644 --- a/compiler/utils/mips64/assembler_mips64.h +++ b/compiler/utils/mips64/assembler_mips64.h @@ -21,6 +21,7 @@ #include <utility> #include <vector> +#include "arch/mips64/instruction_set_features_mips64.h" #include "base/arena_containers.h" #include "base/enums.h" #include "base/macros.h" @@ -413,7 +414,8 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer public: using JNIBase = JNIMacroAssembler<PointerSize::k64>; - explicit Mips64Assembler(ArenaAllocator* arena) + explicit Mips64Assembler(ArenaAllocator* arena, + const Mips64InstructionSetFeatures* instruction_set_features = nullptr) : Assembler(arena), overwriting_(false), overwrite_location_(0), @@ -422,7 +424,8 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer jump_tables_(arena->Adapter(kArenaAllocAssembler)), last_position_adjustment_(0), last_old_position_(0), - last_branch_id_(0) { + last_branch_id_(0), + has_msa_(instruction_set_features != nullptr ? instruction_set_features->HasMsa() : false) { cfi().DelayEmittingAdvancePCs(); } @@ -1479,6 +1482,10 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer // Emits exception block. void EmitExceptionPoll(Mips64ExceptionSlowPath* exception); + bool HasMsa() const { + return has_msa_; + } + // List of exception blocks to generate at the end of the code cache. std::vector<Mips64ExceptionSlowPath> exception_blocks_; @@ -1502,6 +1509,8 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer uint32_t last_old_position_; uint32_t last_branch_id_; + const bool has_msa_; + DISALLOW_COPY_AND_ASSIGN(Mips64Assembler); }; diff --git a/compiler/utils/mips64/assembler_mips64_test.cc b/compiler/utils/mips64/assembler_mips64_test.cc index 12660ce85d..cadbe27819 100644 --- a/compiler/utils/mips64/assembler_mips64_test.cc +++ b/compiler/utils/mips64/assembler_mips64_test.cc @@ -46,6 +46,9 @@ class AssemblerMIPS64Test : public AssemblerTest<mips64::Mips64Assembler, uint32_t, mips64::VectorRegister> Base; + AssemblerMIPS64Test() + : instruction_set_features_(Mips64InstructionSetFeatures::FromVariant("default", nullptr)) {} + protected: // Get the typically used name for this architecture, e.g., aarch64, x86-64, ... std::string GetArchitectureString() OVERRIDE { @@ -78,6 +81,10 @@ class AssemblerMIPS64Test : public AssemblerTest<mips64::Mips64Assembler, return " -D -bbinary -mmips:isa64r6"; } + mips64::Mips64Assembler* CreateAssembler(ArenaAllocator* arena) OVERRIDE { + return new (arena) mips64::Mips64Assembler(arena, instruction_set_features_.get()); + } + void SetUpHelpers() OVERRIDE { if (registers_.size() == 0) { registers_.push_back(new mips64::GpuRegister(mips64::ZERO)); @@ -313,8 +320,9 @@ class AssemblerMIPS64Test : public AssemblerTest<mips64::Mips64Assembler, std::vector<mips64::FpuRegister*> fp_registers_; std::vector<mips64::VectorRegister*> vec_registers_; -}; + std::unique_ptr<const Mips64InstructionSetFeatures> instruction_set_features_; +}; TEST_F(AssemblerMIPS64Test, Toolchain) { EXPECT_TRUE(CheckTools()); |