summaryrefslogtreecommitdiff
path: root/compiler
diff options
context:
space:
mode:
Diffstat (limited to 'compiler')
-rw-r--r--compiler/driver/compiler_options.cc9
-rw-r--r--compiler/driver/compiler_options.h16
-rw-r--r--compiler/image_test.cc92
-rw-r--r--compiler/image_writer.cc201
-rw-r--r--compiler/image_writer.h12
-rw-r--r--compiler/jit/jit_compiler.cc5
-rw-r--r--compiler/oat_writer.cc102
-rw-r--r--compiler/optimizing/code_generator_arm.cc78
-rw-r--r--compiler/optimizing/code_generator_arm.h2
-rw-r--r--compiler/optimizing/code_generator_arm_vixl.cc72
-rw-r--r--compiler/optimizing/code_generator_x86.cc30
-rw-r--r--compiler/optimizing/code_generator_x86.h5
-rw-r--r--compiler/optimizing/code_generator_x86_64.cc36
-rw-r--r--compiler/optimizing/code_generator_x86_64.h4
-rw-r--r--compiler/optimizing/graph_visualizer.cc6
-rw-r--r--compiler/optimizing/inliner.cc395
-rw-r--r--compiler/optimizing/inliner.h34
-rw-r--r--compiler/optimizing/intrinsics_arm.cc53
-rw-r--r--compiler/optimizing/intrinsics_arm_vixl.cc87
-rw-r--r--compiler/optimizing/intrinsics_mips.cc36
-rw-r--r--compiler/optimizing/intrinsics_mips64.cc36
-rw-r--r--compiler/optimizing/locations.h22
-rw-r--r--compiler/optimizing/nodes.cc3
-rw-r--r--compiler/optimizing/nodes.h9
-rw-r--r--compiler/optimizing/optimizing_compiler.cc9
-rw-r--r--compiler/optimizing/optimizing_compiler_stats.h34
-rw-r--r--compiler/optimizing/register_allocation_resolver.cc25
-rw-r--r--compiler/optimizing/register_allocator_graph_color.cc41
-rw-r--r--compiler/optimizing/register_allocator_linear_scan.cc41
-rw-r--r--compiler/optimizing/sharpening.cc4
-rw-r--r--compiler/optimizing/ssa_liveness_analysis.cc15
-rw-r--r--compiler/optimizing/ssa_liveness_analysis.h6
-rw-r--r--compiler/utils/jni_macro_assembler.cc12
-rw-r--r--compiler/utils/mips64/assembler_mips64.cc87
-rw-r--r--compiler/utils/mips64/assembler_mips64.h13
-rw-r--r--compiler/utils/mips64/assembler_mips64_test.cc10
36 files changed, 1155 insertions, 487 deletions
diff --git a/compiler/driver/compiler_options.cc b/compiler/driver/compiler_options.cc
index 34ad1c5c08..a0c0a2acf6 100644
--- a/compiler/driver/compiler_options.cc
+++ b/compiler/driver/compiler_options.cc
@@ -27,7 +27,6 @@ CompilerOptions::CompilerOptions()
small_method_threshold_(kDefaultSmallMethodThreshold),
tiny_method_threshold_(kDefaultTinyMethodThreshold),
num_dex_methods_threshold_(kDefaultNumDexMethodsThreshold),
- inline_depth_limit_(kUnsetInlineDepthLimit),
inline_max_code_units_(kUnsetInlineMaxCodeUnits),
no_inline_from_(nullptr),
boot_image_(false),
@@ -62,7 +61,6 @@ CompilerOptions::CompilerOptions(CompilerFilter::Filter compiler_filter,
size_t small_method_threshold,
size_t tiny_method_threshold,
size_t num_dex_methods_threshold,
- size_t inline_depth_limit,
size_t inline_max_code_units,
const std::vector<const DexFile*>* no_inline_from,
double top_k_profile_threshold,
@@ -86,7 +84,6 @@ CompilerOptions::CompilerOptions(CompilerFilter::Filter compiler_filter,
small_method_threshold_(small_method_threshold),
tiny_method_threshold_(tiny_method_threshold),
num_dex_methods_threshold_(num_dex_methods_threshold),
- inline_depth_limit_(inline_depth_limit),
inline_max_code_units_(inline_max_code_units),
no_inline_from_(no_inline_from),
boot_image_(false),
@@ -130,10 +127,6 @@ void CompilerOptions::ParseNumDexMethods(const StringPiece& option, UsageFn Usag
ParseUintOption(option, "--num-dex-methods", &num_dex_methods_threshold_, Usage);
}
-void CompilerOptions::ParseInlineDepthLimit(const StringPiece& option, UsageFn Usage) {
- ParseUintOption(option, "--inline-depth-limit", &inline_depth_limit_, Usage);
-}
-
void CompilerOptions::ParseInlineMaxCodeUnits(const StringPiece& option, UsageFn Usage) {
ParseUintOption(option, "--inline-max-code-units", &inline_max_code_units_, Usage);
}
@@ -183,8 +176,6 @@ bool CompilerOptions::ParseCompilerOption(const StringPiece& option, UsageFn Usa
ParseTinyMethodMax(option, Usage);
} else if (option.starts_with("--num-dex-methods=")) {
ParseNumDexMethods(option, Usage);
- } else if (option.starts_with("--inline-depth-limit=")) {
- ParseInlineDepthLimit(option, Usage);
} else if (option.starts_with("--inline-max-code-units=")) {
ParseInlineMaxCodeUnits(option, Usage);
} else if (option == "--generate-debug-info" || option == "-g") {
diff --git a/compiler/driver/compiler_options.h b/compiler/driver/compiler_options.h
index 2e3e55f6c6..2376fbf5f5 100644
--- a/compiler/driver/compiler_options.h
+++ b/compiler/driver/compiler_options.h
@@ -46,15 +46,9 @@ class CompilerOptions FINAL {
static constexpr double kDefaultTopKProfileThreshold = 90.0;
static const bool kDefaultGenerateDebugInfo = false;
static const bool kDefaultGenerateMiniDebugInfo = false;
- static const size_t kDefaultInlineDepthLimit = 3;
static const size_t kDefaultInlineMaxCodeUnits = 32;
- static constexpr size_t kUnsetInlineDepthLimit = -1;
static constexpr size_t kUnsetInlineMaxCodeUnits = -1;
- // Default inlining settings when the space filter is used.
- static constexpr size_t kSpaceFilterInlineDepthLimit = 3;
- static constexpr size_t kSpaceFilterInlineMaxCodeUnits = 10;
-
CompilerOptions();
~CompilerOptions();
@@ -64,7 +58,6 @@ class CompilerOptions FINAL {
size_t small_method_threshold,
size_t tiny_method_threshold,
size_t num_dex_methods_threshold,
- size_t inline_depth_limit,
size_t inline_max_code_units,
const std::vector<const DexFile*>* no_inline_from,
double top_k_profile_threshold,
@@ -155,13 +148,6 @@ class CompilerOptions FINAL {
return num_dex_methods_threshold_;
}
- size_t GetInlineDepthLimit() const {
- return inline_depth_limit_;
- }
- void SetInlineDepthLimit(size_t limit) {
- inline_depth_limit_ = limit;
- }
-
size_t GetInlineMaxCodeUnits() const {
return inline_max_code_units_;
}
@@ -275,7 +261,6 @@ class CompilerOptions FINAL {
void ParseDumpInitFailures(const StringPiece& option, UsageFn Usage);
void ParseDumpCfgPasses(const StringPiece& option, UsageFn Usage);
void ParseInlineMaxCodeUnits(const StringPiece& option, UsageFn Usage);
- void ParseInlineDepthLimit(const StringPiece& option, UsageFn Usage);
void ParseNumDexMethods(const StringPiece& option, UsageFn Usage);
void ParseTinyMethodMax(const StringPiece& option, UsageFn Usage);
void ParseSmallMethodMax(const StringPiece& option, UsageFn Usage);
@@ -289,7 +274,6 @@ class CompilerOptions FINAL {
size_t small_method_threshold_;
size_t tiny_method_threshold_;
size_t num_dex_methods_threshold_;
- size_t inline_depth_limit_;
size_t inline_max_code_units_;
// Dex files from which we should not inline code.
diff --git a/compiler/image_test.cc b/compiler/image_test.cc
index 89e8a678b1..897d81993d 100644
--- a/compiler/image_test.cc
+++ b/compiler/image_test.cc
@@ -76,7 +76,7 @@ class ImageTest : public CommonCompilerTest {
void Compile(ImageHeader::StorageMode storage_mode,
CompilationHelper& out_helper,
const std::string& extra_dex = "",
- const std::string& image_class = "");
+ const std::initializer_list<std::string>& image_classes = {});
void SetUpRuntimeOptions(RuntimeOptions* options) OVERRIDE {
CommonCompilerTest::SetUpRuntimeOptions(options);
@@ -90,6 +90,18 @@ class ImageTest : public CommonCompilerTest {
return new std::unordered_set<std::string>(image_classes_);
}
+ ArtMethod* FindCopiedMethod(ArtMethod* origin, mirror::Class* klass)
+ REQUIRES_SHARED(Locks::mutator_lock_) {
+ PointerSize pointer_size = class_linker_->GetImagePointerSize();
+ for (ArtMethod& m : klass->GetCopiedMethods(pointer_size)) {
+ if (strcmp(origin->GetName(), m.GetName()) == 0 &&
+ origin->GetSignature() == m.GetSignature()) {
+ return &m;
+ }
+ }
+ return nullptr;
+ }
+
private:
std::unordered_set<std::string> image_classes_;
};
@@ -345,26 +357,27 @@ void CompilationHelper::Compile(CompilerDriver* driver,
void ImageTest::Compile(ImageHeader::StorageMode storage_mode,
CompilationHelper& helper,
const std::string& extra_dex,
- const std::string& image_class) {
- if (!image_class.empty()) {
+ const std::initializer_list<std::string>& image_classes) {
+ for (const std::string& image_class : image_classes) {
image_classes_.insert(image_class);
}
CreateCompilerDriver(Compiler::kOptimizing, kRuntimeISA, kIsTargetBuild ? 2U : 16U);
// Set inline filter values.
- compiler_options_->SetInlineDepthLimit(CompilerOptions::kDefaultInlineDepthLimit);
compiler_options_->SetInlineMaxCodeUnits(CompilerOptions::kDefaultInlineMaxCodeUnits);
image_classes_.clear();
if (!extra_dex.empty()) {
helper.extra_dex_files = OpenTestDexFiles(extra_dex.c_str());
}
helper.Compile(compiler_driver_.get(), storage_mode);
- if (!image_class.empty()) {
+ if (image_classes.begin() != image_classes.end()) {
// Make sure the class got initialized.
ScopedObjectAccess soa(Thread::Current());
ClassLinker* const class_linker = Runtime::Current()->GetClassLinker();
- mirror::Class* klass = class_linker->FindSystemClass(Thread::Current(), image_class.c_str());
- EXPECT_TRUE(klass != nullptr);
- EXPECT_TRUE(klass->IsInitialized());
+ for (const std::string& image_class : image_classes) {
+ mirror::Class* klass = class_linker->FindSystemClass(Thread::Current(), image_class.c_str());
+ EXPECT_TRUE(klass != nullptr);
+ EXPECT_TRUE(klass->IsInitialized());
+ }
}
}
@@ -492,7 +505,7 @@ TEST_F(ImageTest, TestImageLayout) {
// Compile multi-image with ImageLayoutA being the last image.
{
CompilationHelper helper;
- Compile(ImageHeader::kStorageModeUncompressed, helper, "ImageLayoutA", "LMyClass;");
+ Compile(ImageHeader::kStorageModeUncompressed, helper, "ImageLayoutA", {"LMyClass;"});
image_sizes = helper.GetImageObjectSectionSizes();
}
TearDown();
@@ -501,7 +514,7 @@ TEST_F(ImageTest, TestImageLayout) {
// Compile multi-image with ImageLayoutB being the last image.
{
CompilationHelper helper;
- Compile(ImageHeader::kStorageModeUncompressed, helper, "ImageLayoutB", "LMyClass;");
+ Compile(ImageHeader::kStorageModeUncompressed, helper, "ImageLayoutB", {"LMyClass;"});
image_sizes_extra = helper.GetImageObjectSectionSizes();
}
// Make sure that the new stuff in the clinit in ImageLayoutB is in the last image and not in the
@@ -553,4 +566,63 @@ TEST_F(ImageTest, ImageHeaderIsValid) {
ASSERT_FALSE(image_header.IsValid());
}
+// Test that pointer to quick code is the same in
+// a default method of an interface and in a copied method
+// of a class which implements the interface. This should be true
+// only if the copied method and the origin method are located in the
+// same oat file.
+TEST_F(ImageTest, TestDefaultMethods) {
+ CompilationHelper helper;
+ Compile(ImageHeader::kStorageModeUncompressed,
+ helper,
+ "DefaultMethods",
+ {"LIface;", "LImpl;", "LIterableBase;"});
+
+ PointerSize pointer_size = class_linker_->GetImagePointerSize();
+ Thread* self = Thread::Current();
+ ScopedObjectAccess soa(self);
+
+ // Test the pointer to quick code is the same in origin method
+ // and in the copied method form the same oat file.
+ mirror::Class* iface_klass = class_linker_->LookupClass(
+ self, "LIface;", ObjPtr<mirror::ClassLoader>());
+ ASSERT_NE(nullptr, iface_klass);
+ ArtMethod* origin = iface_klass->FindDeclaredVirtualMethod(
+ "defaultMethod", "()V", pointer_size);
+ ASSERT_NE(nullptr, origin);
+ const void* code = origin->GetEntryPointFromQuickCompiledCodePtrSize(pointer_size);
+ // The origin method should have a pointer to quick code
+ ASSERT_NE(nullptr, code);
+ ASSERT_FALSE(class_linker_->IsQuickToInterpreterBridge(code));
+ mirror::Class* impl_klass = class_linker_->LookupClass(
+ self, "LImpl;", ObjPtr<mirror::ClassLoader>());
+ ASSERT_NE(nullptr, impl_klass);
+ ArtMethod* copied = FindCopiedMethod(origin, impl_klass);
+ ASSERT_NE(nullptr, copied);
+ // the copied method should have pointer to the same quick code as the origin method
+ ASSERT_EQ(code, copied->GetEntryPointFromQuickCompiledCodePtrSize(pointer_size));
+
+ // Test the origin method has pointer to quick code
+ // but the copied method has pointer to interpreter
+ // because these methods are in different oat files.
+ mirror::Class* iterable_klass = class_linker_->LookupClass(
+ self, "Ljava/lang/Iterable;", ObjPtr<mirror::ClassLoader>());
+ ASSERT_NE(nullptr, iterable_klass);
+ origin = iterable_klass->FindDeclaredVirtualMethod(
+ "forEach", "(Ljava/util/function/Consumer;)V", pointer_size);
+ ASSERT_NE(nullptr, origin);
+ code = origin->GetEntryPointFromQuickCompiledCodePtrSize(pointer_size);
+ // the origin method should have a pointer to quick code
+ ASSERT_NE(nullptr, code);
+ ASSERT_FALSE(class_linker_->IsQuickToInterpreterBridge(code));
+ mirror::Class* iterablebase_klass = class_linker_->LookupClass(
+ self, "LIterableBase;", ObjPtr<mirror::ClassLoader>());
+ ASSERT_NE(nullptr, iterablebase_klass);
+ copied = FindCopiedMethod(origin, iterablebase_klass);
+ ASSERT_NE(nullptr, copied);
+ code = copied->GetEntryPointFromQuickCompiledCodePtrSize(pointer_size);
+ // the copied method should have a pointer to interpreter
+ ASSERT_TRUE(class_linker_->IsQuickToInterpreterBridge(code));
+}
+
} // namespace art
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index aefdb548ff..d156644484 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -714,7 +714,8 @@ void ImageWriter::ComputeLazyFieldsForImageClasses() {
class_linker->VisitClassesWithoutClassesLock(&visitor);
}
-static bool IsBootClassLoaderClass(mirror::Class* klass) REQUIRES_SHARED(Locks::mutator_lock_) {
+static bool IsBootClassLoaderClass(ObjPtr<mirror::Class> klass)
+ REQUIRES_SHARED(Locks::mutator_lock_) {
return klass->GetClassLoader() == nullptr;
}
@@ -722,33 +723,33 @@ bool ImageWriter::IsBootClassLoaderNonImageClass(mirror::Class* klass) {
return IsBootClassLoaderClass(klass) && !IsInBootImage(klass);
}
-bool ImageWriter::PruneAppImageClass(mirror::Class* klass) {
+bool ImageWriter::PruneAppImageClass(ObjPtr<mirror::Class> klass) {
bool early_exit = false;
std::unordered_set<mirror::Class*> visited;
return PruneAppImageClassInternal(klass, &early_exit, &visited);
}
bool ImageWriter::PruneAppImageClassInternal(
- mirror::Class* klass,
+ ObjPtr<mirror::Class> klass,
bool* early_exit,
std::unordered_set<mirror::Class*>* visited) {
DCHECK(early_exit != nullptr);
DCHECK(visited != nullptr);
DCHECK(compile_app_image_);
- if (klass == nullptr || IsInBootImage(klass)) {
+ if (klass == nullptr || IsInBootImage(klass.Ptr())) {
return false;
}
- auto found = prune_class_memo_.find(klass);
+ auto found = prune_class_memo_.find(klass.Ptr());
if (found != prune_class_memo_.end()) {
// Already computed, return the found value.
return found->second;
}
// Circular dependencies, return false but do not store the result in the memoization table.
- if (visited->find(klass) != visited->end()) {
+ if (visited->find(klass.Ptr()) != visited->end()) {
*early_exit = true;
return false;
}
- visited->emplace(klass);
+ visited->emplace(klass.Ptr());
bool result = IsBootClassLoaderClass(klass);
std::string temp;
// Prune if not an image class, this handles any broken sets of image classes such as having a
@@ -812,20 +813,20 @@ bool ImageWriter::PruneAppImageClassInternal(
dex_file_oat_index_map_.find(dex_cache->GetDexFile()) == dex_file_oat_index_map_.end();
}
// Erase the element we stored earlier since we are exiting the function.
- auto it = visited->find(klass);
+ auto it = visited->find(klass.Ptr());
DCHECK(it != visited->end());
visited->erase(it);
// Only store result if it is true or none of the calls early exited due to circular
// dependencies. If visited is empty then we are the root caller, in this case the cycle was in
// a child call and we can remember the result.
if (result == true || !my_early_exit || visited->empty()) {
- prune_class_memo_[klass] = result;
+ prune_class_memo_[klass.Ptr()] = result;
}
*early_exit |= my_early_exit;
return result;
}
-bool ImageWriter::KeepClass(Class* klass) {
+bool ImageWriter::KeepClass(ObjPtr<mirror::Class> klass) {
if (klass == nullptr) {
return false;
}
@@ -896,15 +897,27 @@ class ImageWriter::PruneClassLoaderClassesVisitor : public ClassLoaderVisitor {
Runtime::Current()->GetClassLinker()->ClassTableForClassLoader(class_loader);
class_table->Visit(classes_visitor);
removed_class_count_ += classes_visitor.Prune();
+
+ // Record app image class loader. The fake boot class loader should not get registered
+ // and we should end up with only one class loader for an app and none for boot image.
+ if (class_loader != nullptr && class_table != nullptr) {
+ DCHECK(class_loader_ == nullptr);
+ class_loader_ = class_loader;
+ }
}
size_t GetRemovedClassCount() const {
return removed_class_count_;
}
+ ObjPtr<mirror::ClassLoader> GetClassLoader() const REQUIRES_SHARED(Locks::mutator_lock_) {
+ return class_loader_;
+ }
+
private:
ImageWriter* const image_writer_;
size_t removed_class_count_;
+ ObjPtr<mirror::ClassLoader> class_loader_;
};
void ImageWriter::VisitClassLoaders(ClassLoaderVisitor* visitor) {
@@ -913,70 +926,150 @@ void ImageWriter::VisitClassLoaders(ClassLoaderVisitor* visitor) {
Runtime::Current()->GetClassLinker()->VisitClassLoaders(visitor);
}
+void ImageWriter::PruneAndPreloadDexCache(ObjPtr<mirror::DexCache> dex_cache,
+ ObjPtr<mirror::ClassLoader> class_loader) {
+ // To ensure deterministic contents of the hash-based arrays, each slot shall contain
+ // the candidate with the lowest index. As we're processing entries in increasing index
+ // order, this means trying to look up the entry for the current index if the slot is
+ // empty or if it contains a higher index.
+
+ Runtime* runtime = Runtime::Current();
+ ClassLinker* class_linker = runtime->GetClassLinker();
+ ArtMethod* resolution_method = runtime->GetResolutionMethod();
+ const DexFile& dex_file = *dex_cache->GetDexFile();
+ // Prune methods.
+ ArtMethod** resolved_methods = dex_cache->GetResolvedMethods();
+ for (size_t i = 0, num = dex_cache->NumResolvedMethods(); i != num; ++i) {
+ ArtMethod* method =
+ mirror::DexCache::GetElementPtrSize(resolved_methods, i, target_ptr_size_);
+ DCHECK(method != nullptr) << "Expected resolution method instead of null method";
+ mirror::Class* declaring_class = method->GetDeclaringClass();
+ // Copied methods may be held live by a class which was not an image class but have a
+ // declaring class which is an image class. Set it to the resolution method to be safe and
+ // prevent dangling pointers.
+ if (method->IsCopied() || !KeepClass(declaring_class)) {
+ mirror::DexCache::SetElementPtrSize(resolved_methods,
+ i,
+ resolution_method,
+ target_ptr_size_);
+ } else if (kIsDebugBuild) {
+ // Check that the class is still in the classes table.
+ ReaderMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_);
+ CHECK(class_linker->ClassInClassTable(declaring_class)) << "Class "
+ << Class::PrettyClass(declaring_class) << " not in class linker table";
+ }
+ }
+ // Prune fields and make the contents of the field array deterministic.
+ mirror::FieldDexCacheType* resolved_fields = dex_cache->GetResolvedFields();
+ dex::TypeIndex last_class_idx; // Initialized to invalid index.
+ ObjPtr<mirror::Class> last_class = nullptr;
+ for (size_t i = 0, end = dex_file.NumFieldIds(); i < end; ++i) {
+ uint32_t slot_idx = dex_cache->FieldSlotIndex(i);
+ auto pair = mirror::DexCache::GetNativePairPtrSize(resolved_fields, slot_idx, target_ptr_size_);
+ uint32_t stored_index = pair.index;
+ ArtField* field = pair.object;
+ if (field != nullptr && i > stored_index) {
+ continue; // Already checked.
+ }
+ // Check if the referenced class is in the image. Note that we want to check the referenced
+ // class rather than the declaring class to preserve the semantics, i.e. using a FieldId
+ // results in resolving the referenced class and that can for example throw OOME.
+ const DexFile::FieldId& field_id = dex_file.GetFieldId(i);
+ if (field_id.class_idx_ != last_class_idx) {
+ last_class_idx = field_id.class_idx_;
+ last_class = class_linker->LookupResolvedType(
+ dex_file, last_class_idx, dex_cache, class_loader);
+ if (last_class != nullptr && !KeepClass(last_class)) {
+ last_class = nullptr;
+ }
+ }
+ if (field == nullptr || i < stored_index) {
+ if (last_class != nullptr) {
+ const char* name = dex_file.StringDataByIdx(field_id.name_idx_);
+ const char* type = dex_file.StringByTypeIdx(field_id.type_idx_);
+ field = mirror::Class::FindField(Thread::Current(), last_class, name, type);
+ if (field != nullptr) {
+ // If the referenced class is in the image, the defining class must also be there.
+ DCHECK(KeepClass(field->GetDeclaringClass()));
+ dex_cache->SetResolvedField(i, field, target_ptr_size_);
+ }
+ }
+ } else {
+ DCHECK_EQ(i, stored_index);
+ if (last_class == nullptr) {
+ dex_cache->ClearResolvedField(stored_index, target_ptr_size_);
+ }
+ }
+ }
+ // Prune types and make the contents of the type array deterministic.
+ // This is done after fields and methods as their lookup can touch the types array.
+ for (size_t i = 0, end = dex_cache->GetDexFile()->NumTypeIds(); i < end; ++i) {
+ dex::TypeIndex type_idx(i);
+ uint32_t slot_idx = dex_cache->TypeSlotIndex(type_idx);
+ mirror::TypeDexCachePair pair =
+ dex_cache->GetResolvedTypes()[slot_idx].load(std::memory_order_relaxed);
+ uint32_t stored_index = pair.index;
+ ObjPtr<mirror::Class> klass = pair.object.Read();
+ if (klass == nullptr || i < stored_index) {
+ klass = class_linker->LookupResolvedType(dex_file, type_idx, dex_cache, class_loader);
+ if (klass != nullptr) {
+ DCHECK_EQ(dex_cache->GetResolvedType(type_idx), klass);
+ stored_index = i; // For correct clearing below if not keeping the `klass`.
+ }
+ } else if (i == stored_index && !KeepClass(klass)) {
+ dex_cache->ClearResolvedType(dex::TypeIndex(stored_index));
+ }
+ }
+ // Strings do not need pruning, but the contents of the string array must be deterministic.
+ for (size_t i = 0, end = dex_cache->GetDexFile()->NumStringIds(); i < end; ++i) {
+ dex::StringIndex string_idx(i);
+ uint32_t slot_idx = dex_cache->StringSlotIndex(string_idx);
+ mirror::StringDexCachePair pair =
+ dex_cache->GetStrings()[slot_idx].load(std::memory_order_relaxed);
+ uint32_t stored_index = pair.index;
+ ObjPtr<mirror::String> string = pair.object.Read();
+ if (string == nullptr || i < stored_index) {
+ string = class_linker->LookupString(dex_file, string_idx, dex_cache);
+ DCHECK(string == nullptr || dex_cache->GetResolvedString(string_idx) == string);
+ }
+ }
+}
+
void ImageWriter::PruneNonImageClasses() {
Runtime* runtime = Runtime::Current();
ClassLinker* class_linker = runtime->GetClassLinker();
Thread* self = Thread::Current();
+ ScopedAssertNoThreadSuspension sa(__FUNCTION__);
// Clear class table strong roots so that dex caches can get pruned. We require pruning the class
// path dex caches.
class_linker->ClearClassTableStrongRoots();
// Remove the undesired classes from the class roots.
+ ObjPtr<mirror::ClassLoader> class_loader;
{
PruneClassLoaderClassesVisitor class_loader_visitor(this);
VisitClassLoaders(&class_loader_visitor);
VLOG(compiler) << "Pruned " << class_loader_visitor.GetRemovedClassCount() << " classes";
+ class_loader = class_loader_visitor.GetClassLoader();
+ DCHECK_EQ(class_loader != nullptr, compile_app_image_);
}
// Clear references to removed classes from the DexCaches.
- ArtMethod* resolution_method = runtime->GetResolutionMethod();
-
- ScopedAssertNoThreadSuspension sa(__FUNCTION__);
- ReaderMutexLock mu(self, *Locks::classlinker_classes_lock_); // For ClassInClassTable
- ReaderMutexLock mu2(self, *Locks::dex_lock_);
- for (const ClassLinker::DexCacheData& data : class_linker->GetDexCachesData()) {
- if (self->IsJWeakCleared(data.weak_root)) {
- continue;
- }
- ObjPtr<mirror::DexCache> dex_cache = self->DecodeJObject(data.weak_root)->AsDexCache();
- for (size_t i = 0; i < dex_cache->NumResolvedTypes(); i++) {
- mirror::TypeDexCachePair pair =
- dex_cache->GetResolvedTypes()[i].load(std::memory_order_relaxed);
- mirror::Class* klass = pair.object.Read();
- if (klass != nullptr && !KeepClass(klass)) {
- dex_cache->ClearResolvedType(dex::TypeIndex(pair.index));
- }
- }
- ArtMethod** resolved_methods = dex_cache->GetResolvedMethods();
- for (size_t i = 0, num = dex_cache->NumResolvedMethods(); i != num; ++i) {
- ArtMethod* method =
- mirror::DexCache::GetElementPtrSize(resolved_methods, i, target_ptr_size_);
- DCHECK(method != nullptr) << "Expected resolution method instead of null method";
- mirror::Class* declaring_class = method->GetDeclaringClass();
- // Copied methods may be held live by a class which was not an image class but have a
- // declaring class which is an image class. Set it to the resolution method to be safe and
- // prevent dangling pointers.
- if (method->IsCopied() || !KeepClass(declaring_class)) {
- mirror::DexCache::SetElementPtrSize(resolved_methods,
- i,
- resolution_method,
- target_ptr_size_);
- } else {
- // Check that the class is still in the classes table.
- DCHECK(class_linker->ClassInClassTable(declaring_class)) << "Class "
- << Class::PrettyClass(declaring_class) << " not in class linker table";
- }
- }
- mirror::FieldDexCacheType* resolved_fields = dex_cache->GetResolvedFields();
- for (size_t i = 0; i < dex_cache->NumResolvedFields(); i++) {
- auto pair = mirror::DexCache::GetNativePairPtrSize(resolved_fields, i, target_ptr_size_);
- ArtField* field = pair.object;
- if (field != nullptr && !KeepClass(field->GetDeclaringClass().Ptr())) {
- dex_cache->ClearResolvedField(pair.index, target_ptr_size_);
+ std::vector<ObjPtr<mirror::DexCache>> dex_caches;
+ {
+ ReaderMutexLock mu2(self, *Locks::dex_lock_);
+ dex_caches.reserve(class_linker->GetDexCachesData().size());
+ for (const ClassLinker::DexCacheData& data : class_linker->GetDexCachesData()) {
+ if (self->IsJWeakCleared(data.weak_root)) {
+ continue;
}
+ dex_caches.push_back(self->DecodeJObject(data.weak_root)->AsDexCache());
}
}
+ for (ObjPtr<mirror::DexCache> dex_cache : dex_caches) {
+ PruneAndPreloadDexCache(dex_cache, class_loader);
+ }
// Drop the array class cache in the ClassLinker, as these are roots holding those classes live.
class_linker->DropFindArrayClassCache();
diff --git a/compiler/image_writer.h b/compiler/image_writer.h
index bdc7146632..16aff61dab 100644
--- a/compiler/image_writer.h
+++ b/compiler/image_writer.h
@@ -376,7 +376,7 @@ class ImageWriter FINAL {
}
// Returns true if the class was in the original requested image classes list.
- bool KeepClass(mirror::Class* klass) REQUIRES_SHARED(Locks::mutator_lock_);
+ bool KeepClass(ObjPtr<mirror::Class> klass) REQUIRES_SHARED(Locks::mutator_lock_);
// Debug aid that list of requested image classes.
void DumpImageClasses();
@@ -391,6 +391,12 @@ class ImageWriter FINAL {
// Remove unwanted classes from various roots.
void PruneNonImageClasses() REQUIRES_SHARED(Locks::mutator_lock_);
+ // Remove unwanted classes from the DexCache roots and preload deterministic DexCache contents.
+ void PruneAndPreloadDexCache(ObjPtr<mirror::DexCache> dex_cache,
+ ObjPtr<mirror::ClassLoader> class_loader)
+ REQUIRES_SHARED(Locks::mutator_lock_)
+ REQUIRES(!Locks::classlinker_classes_lock_);
+
// Verify unwanted classes removed.
void CheckNonImageClassesRemoved() REQUIRES_SHARED(Locks::mutator_lock_);
static void CheckNonImageClassesRemovedCallback(mirror::Object* obj, void* arg)
@@ -473,11 +479,11 @@ class ImageWriter FINAL {
// we also cannot have any classes which refer to these boot class loader non image classes.
// PruneAppImageClass also prunes if klass depends on a non-image class according to the compiler
// driver.
- bool PruneAppImageClass(mirror::Class* klass)
+ bool PruneAppImageClass(ObjPtr<mirror::Class> klass)
REQUIRES_SHARED(Locks::mutator_lock_);
// early_exit is true if we had a cyclic dependency anywhere down the chain.
- bool PruneAppImageClassInternal(mirror::Class* klass,
+ bool PruneAppImageClassInternal(ObjPtr<mirror::Class> klass,
bool* early_exit,
std::unordered_set<mirror::Class*>* visited)
REQUIRES_SHARED(Locks::mutator_lock_);
diff --git a/compiler/jit/jit_compiler.cc b/compiler/jit/jit_compiler.cc
index 3ae7974038..ad951bcc3f 100644
--- a/compiler/jit/jit_compiler.cc
+++ b/compiler/jit/jit_compiler.cc
@@ -97,7 +97,6 @@ JitCompiler::JitCompiler() {
CompilerOptions::kDefaultSmallMethodThreshold,
CompilerOptions::kDefaultTinyMethodThreshold,
CompilerOptions::kDefaultNumDexMethodsThreshold,
- CompilerOptions::kDefaultInlineDepthLimit,
CompilerOptions::kDefaultInlineMaxCodeUnits,
/* no_inline_from */ nullptr,
CompilerOptions::kDefaultTopKProfileThreshold,
@@ -177,10 +176,6 @@ JitCompiler::JitCompiler() {
jit_logger_.reset(new JitLogger());
jit_logger_->OpenLog();
}
-
- size_t inline_depth_limit = compiler_driver_->GetCompilerOptions().GetInlineDepthLimit();
- DCHECK_LT(thread_count * inline_depth_limit, std::numeric_limits<uint16_t>::max())
- << "ProfilingInfo's inline counter can potentially overflow";
}
JitCompiler::~JitCompiler() {
diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc
index 5406ae72d1..105db1d2d0 100644
--- a/compiler/oat_writer.cc
+++ b/compiler/oat_writer.cc
@@ -1034,18 +1034,63 @@ class OatWriter::InitMethodInfoVisitor : public OatDexMethodVisitor {
class OatWriter::InitImageMethodVisitor : public OatDexMethodVisitor {
public:
- InitImageMethodVisitor(OatWriter* writer, size_t offset)
+ InitImageMethodVisitor(OatWriter* writer,
+ size_t offset,
+ const std::vector<const DexFile*>* dex_files)
: OatDexMethodVisitor(writer, offset),
- pointer_size_(GetInstructionSetPointerSize(writer_->compiler_driver_->GetInstructionSet())) {
+ pointer_size_(GetInstructionSetPointerSize(writer_->compiler_driver_->GetInstructionSet())),
+ dex_files_(dex_files),
+ class_linker_(Runtime::Current()->GetClassLinker()) {
+ }
+
+ // Handle copied methods here. Copy pointer to quick code from
+ // an origin method to a copied method only if they are
+ // in the same oat file. If the origin and the copied methods are
+ // in different oat files don't touch the copied method.
+ // References to other oat files are not supported yet.
+ bool StartClass(const DexFile* dex_file, size_t class_def_index)
+ REQUIRES_SHARED(Locks::mutator_lock_) {
+ OatDexMethodVisitor::StartClass(dex_file, class_def_index);
+ // Skip classes that are not in the image.
+ if (!IsImageClass()) {
+ return true;
+ }
+ ScopedObjectAccessUnchecked soa(Thread::Current());
+ StackHandleScope<1> hs(soa.Self());
+ Handle<mirror::DexCache> dex_cache = hs.NewHandle(
+ class_linker_->FindDexCache(Thread::Current(), *dex_file));
+ const DexFile::ClassDef& class_def = dex_file->GetClassDef(class_def_index);
+ mirror::Class* klass = dex_cache->GetResolvedType(class_def.class_idx_);
+ if (klass != nullptr) {
+ for (ArtMethod& method : klass->GetCopiedMethods(pointer_size_)) {
+ // Find origin method. Declaring class and dex_method_idx
+ // in the copied method should be the same as in the origin
+ // method.
+ mirror::Class* declaring_class = method.GetDeclaringClass();
+ ArtMethod* origin = declaring_class->FindDeclaredVirtualMethod(
+ declaring_class->GetDexCache(),
+ method.GetDexMethodIndex(),
+ pointer_size_);
+ CHECK(origin != nullptr);
+ if (IsInOatFile(&declaring_class->GetDexFile())) {
+ const void* code_ptr =
+ origin->GetEntryPointFromQuickCompiledCodePtrSize(pointer_size_);
+ if (code_ptr == nullptr) {
+ methods_to_process_.push_back(std::make_pair(&method, origin));
+ } else {
+ method.SetEntryPointFromQuickCompiledCodePtrSize(
+ code_ptr, pointer_size_);
+ }
+ }
+ }
+ }
+ return true;
}
bool VisitMethod(size_t class_def_method_index, const ClassDataItemIterator& it)
REQUIRES_SHARED(Locks::mutator_lock_) {
- const DexFile::TypeId& type_id =
- dex_file_->GetTypeId(dex_file_->GetClassDef(class_def_index_).class_idx_);
- const char* class_descriptor = dex_file_->GetTypeDescriptor(type_id);
// Skip methods that are not in the image.
- if (!writer_->GetCompilerDriver()->IsImageClass(class_descriptor)) {
+ if (!IsImageClass()) {
return true;
}
@@ -1059,17 +1104,16 @@ class OatWriter::InitImageMethodVisitor : public OatDexMethodVisitor {
++method_offsets_index_;
}
- ClassLinker* linker = Runtime::Current()->GetClassLinker();
// Unchecked as we hold mutator_lock_ on entry.
ScopedObjectAccessUnchecked soa(Thread::Current());
StackHandleScope<1> hs(soa.Self());
- Handle<mirror::DexCache> dex_cache(hs.NewHandle(linker->FindDexCache(
+ Handle<mirror::DexCache> dex_cache(hs.NewHandle(class_linker_->FindDexCache(
Thread::Current(), *dex_file_)));
ArtMethod* method;
if (writer_->HasBootImage()) {
const InvokeType invoke_type = it.GetMethodInvokeType(
dex_file_->GetClassDef(class_def_index_));
- method = linker->ResolveMethod<ClassLinker::kNoICCECheckForCache>(
+ method = class_linker_->ResolveMethod<ClassLinker::kNoICCECheckForCache>(
*dex_file_,
it.GetMemberIndex(),
dex_cache,
@@ -1089,7 +1133,8 @@ class OatWriter::InitImageMethodVisitor : public OatDexMethodVisitor {
// Should already have been resolved by the compiler, just peek into the dex cache.
// It may not be resolved if the class failed to verify, in this case, don't set the
// entrypoint. This is not fatal since the dex cache will contain a resolution method.
- method = dex_cache->GetResolvedMethod(it.GetMemberIndex(), linker->GetImagePointerSize());
+ method = dex_cache->GetResolvedMethod(it.GetMemberIndex(),
+ class_linker_->GetImagePointerSize());
}
if (method != nullptr &&
compiled_method != nullptr &&
@@ -1101,8 +1146,38 @@ class OatWriter::InitImageMethodVisitor : public OatDexMethodVisitor {
return true;
}
+ // Check whether current class is image class
+ bool IsImageClass() {
+ const DexFile::TypeId& type_id =
+ dex_file_->GetTypeId(dex_file_->GetClassDef(class_def_index_).class_idx_);
+ const char* class_descriptor = dex_file_->GetTypeDescriptor(type_id);
+ return writer_->GetCompilerDriver()->IsImageClass(class_descriptor);
+ }
+
+ // Check whether specified dex file is in the compiled oat file.
+ bool IsInOatFile(const DexFile* dex_file) {
+ return ContainsElement(*dex_files_, dex_file);
+ }
+
+ // Assign a pointer to quick code for copied methods
+ // not handled in the method StartClass
+ void Postprocess() {
+ for (std::pair<ArtMethod*, ArtMethod*>& p : methods_to_process_) {
+ ArtMethod* method = p.first;
+ ArtMethod* origin = p.second;
+ const void* code_ptr =
+ origin->GetEntryPointFromQuickCompiledCodePtrSize(pointer_size_);
+ if (code_ptr != nullptr) {
+ method->SetEntryPointFromQuickCompiledCodePtrSize(code_ptr, pointer_size_);
+ }
+ }
+ }
+
protected:
const PointerSize pointer_size_;
+ const std::vector<const DexFile*>* dex_files_;
+ ClassLinker* const class_linker_;
+ std::vector<std::pair<ArtMethod*, ArtMethod*>> methods_to_process_;
};
class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor {
@@ -1365,12 +1440,10 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor {
mirror::String* GetTargetString(const LinkerPatch& patch) REQUIRES_SHARED(Locks::mutator_lock_) {
ScopedObjectAccessUnchecked soa(Thread::Current());
- StackHandleScope<1> hs(soa.Self());
ClassLinker* linker = Runtime::Current()->GetClassLinker();
- Handle<mirror::DexCache> dex_cache(hs.NewHandle(GetDexCache(patch.TargetStringDexFile())));
mirror::String* string = linker->LookupString(*patch.TargetStringDexFile(),
patch.TargetStringIndex(),
- dex_cache);
+ GetDexCache(patch.TargetStringDexFile()));
DCHECK(string != nullptr);
DCHECK(writer_->HasBootImage() ||
Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(string));
@@ -1744,8 +1817,9 @@ size_t OatWriter::InitOatCodeDexFiles(size_t offset) {
offset = code_visitor.GetOffset();
if (HasImage()) {
- InitImageMethodVisitor image_visitor(this, offset);
+ InitImageMethodVisitor image_visitor(this, offset, dex_files_);
success = VisitDexMethods(&image_visitor);
+ image_visitor.Postprocess();
DCHECK(success);
offset = image_visitor.GetOffset();
}
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index e34f116b75..caea250ab6 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -1875,6 +1875,7 @@ static bool CanGenerateConditionalMove(const Location& out, const Location& src)
Label* CodeGeneratorARM::GetFinalLabel(HInstruction* instruction, Label* final_label) {
DCHECK(!instruction->IsControlFlow() && !instruction->IsSuspendCheck());
+ DCHECK(!instruction->IsInvoke() || !instruction->GetLocations()->CanCall());
const HBasicBlock* const block = instruction->GetBlock();
const HLoopInformation* const info = block->GetLoopInformation();
@@ -2901,16 +2902,20 @@ void InstructionCodeGeneratorARM::HandleCondition(HCondition* cond) {
// Convert the jumps into the result.
Label done_label;
+ Label* final_label = codegen_->GetFinalLabel(cond, &done_label);
// False case: result = 0.
__ Bind(&false_label);
__ LoadImmediate(out, 0);
- __ b(&done_label);
+ __ b(final_label);
// True case: result = 1.
__ Bind(&true_label);
__ LoadImmediate(out, 1);
- __ Bind(&done_label);
+
+ if (done_label.IsLinked()) {
+ __ Bind(&done_label);
+ }
}
void LocationsBuilderARM::VisitEqual(HEqual* comp) {
@@ -4441,7 +4446,8 @@ void InstructionCodeGeneratorARM::HandleIntegerRotate(LocationSummary* locations
// rotates by swapping input regs (effectively rotating by the first 32-bits of
// a larger rotation) or flipping direction (thus treating larger right/left
// rotations as sub-word sized rotations in the other direction) as appropriate.
-void InstructionCodeGeneratorARM::HandleLongRotate(LocationSummary* locations) {
+void InstructionCodeGeneratorARM::HandleLongRotate(HRor* ror) {
+ LocationSummary* locations = ror->GetLocations();
Register in_reg_lo = locations->InAt(0).AsRegisterPairLow<Register>();
Register in_reg_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
Location rhs = locations->InAt(1);
@@ -4474,6 +4480,7 @@ void InstructionCodeGeneratorARM::HandleLongRotate(LocationSummary* locations) {
Register shift_left = locations->GetTemp(1).AsRegister<Register>();
Label end;
Label shift_by_32_plus_shift_right;
+ Label* final_label = codegen_->GetFinalLabel(ror, &end);
__ and_(shift_right, rhs.AsRegister<Register>(), ShifterOperand(0x1F));
__ Lsrs(shift_left, rhs.AsRegister<Register>(), 6);
@@ -4488,7 +4495,7 @@ void InstructionCodeGeneratorARM::HandleLongRotate(LocationSummary* locations) {
__ Lsl(out_reg_lo, in_reg_lo, shift_left);
__ Lsr(shift_left, in_reg_hi, shift_right);
__ add(out_reg_lo, out_reg_lo, ShifterOperand(shift_left));
- __ b(&end);
+ __ b(final_label);
__ Bind(&shift_by_32_plus_shift_right); // Shift by 32+shift_right.
// out_reg_hi = (reg_hi >> shift_right) | (reg_lo << shift_left).
@@ -4500,7 +4507,9 @@ void InstructionCodeGeneratorARM::HandleLongRotate(LocationSummary* locations) {
__ Lsl(shift_right, in_reg_hi, shift_left);
__ add(out_reg_lo, out_reg_lo, ShifterOperand(shift_right));
- __ Bind(&end);
+ if (end.IsLinked()) {
+ __ Bind(&end);
+ }
}
}
@@ -4540,7 +4549,7 @@ void InstructionCodeGeneratorARM::VisitRor(HRor* ror) {
break;
}
case Primitive::kPrimLong: {
- HandleLongRotate(locations);
+ HandleLongRotate(ror);
break;
}
default:
@@ -4919,6 +4928,7 @@ void InstructionCodeGeneratorARM::VisitCompare(HCompare* compare) {
Location right = locations->InAt(1);
Label less, greater, done;
+ Label* final_label = codegen_->GetFinalLabel(compare, &done);
Primitive::Type type = compare->InputAt(0)->GetType();
Condition less_cond;
switch (type) {
@@ -4958,17 +4968,19 @@ void InstructionCodeGeneratorARM::VisitCompare(HCompare* compare) {
UNREACHABLE();
}
- __ b(&done, EQ);
+ __ b(final_label, EQ);
__ b(&less, less_cond);
__ Bind(&greater);
__ LoadImmediate(out, 1);
- __ b(&done);
+ __ b(final_label);
__ Bind(&less);
__ LoadImmediate(out, -1);
- __ Bind(&done);
+ if (done.IsLinked()) {
+ __ Bind(&done);
+ }
}
void LocationsBuilderARM::VisitPhi(HPhi* instruction) {
@@ -5746,6 +5758,7 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) {
int32_t const_index = index.GetConstant()->AsIntConstant()->GetValue();
if (maybe_compressed_char_at) {
Label uncompressed_load, done;
+ Label* final_label = codegen_->GetFinalLabel(instruction, &done);
__ Lsrs(length, length, 1u); // LSRS has a 16-bit encoding, TST (immediate) does not.
static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
"Expecting 0=compressed, 1=uncompressed");
@@ -5754,13 +5767,15 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) {
out_loc.AsRegister<Register>(),
obj,
data_offset + const_index);
- __ b(&done);
+ __ b(final_label);
__ Bind(&uncompressed_load);
__ LoadFromOffset(GetLoadOperandType(Primitive::kPrimChar),
out_loc.AsRegister<Register>(),
obj,
data_offset + (const_index << 1));
- __ Bind(&done);
+ if (done.IsLinked()) {
+ __ Bind(&done);
+ }
} else {
uint32_t full_offset = data_offset + (const_index << Primitive::ComponentSizeShift(type));
@@ -5784,17 +5799,20 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) {
}
if (maybe_compressed_char_at) {
Label uncompressed_load, done;
+ Label* final_label = codegen_->GetFinalLabel(instruction, &done);
__ Lsrs(length, length, 1u); // LSRS has a 16-bit encoding, TST (immediate) does not.
static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
"Expecting 0=compressed, 1=uncompressed");
__ b(&uncompressed_load, CS);
__ ldrb(out_loc.AsRegister<Register>(),
Address(temp, index.AsRegister<Register>(), Shift::LSL, 0));
- __ b(&done);
+ __ b(final_label);
__ Bind(&uncompressed_load);
__ ldrh(out_loc.AsRegister<Register>(),
Address(temp, index.AsRegister<Register>(), Shift::LSL, 1));
- __ Bind(&done);
+ if (done.IsLinked()) {
+ __ Bind(&done);
+ }
} else {
codegen_->LoadFromShiftedRegOffset(type, out_loc, temp, index.AsRegister<Register>());
}
@@ -6019,6 +6037,7 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) {
uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
Label done;
+ Label* final_label = codegen_->GetFinalLabel(instruction, &done);
SlowPathCodeARM* slow_path = nullptr;
if (may_need_runtime_call_for_type_check) {
@@ -6040,7 +6059,7 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) {
index.AsRegister<Register>());
}
codegen_->MaybeRecordImplicitNullCheck(instruction);
- __ b(&done);
+ __ b(final_label);
__ Bind(&non_zero);
}
@@ -7021,6 +7040,7 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) {
uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
Label done, zero;
+ Label* final_label = codegen_->GetFinalLabel(instruction, &done);
SlowPathCodeARM* slow_path = nullptr;
// Return 0 if `obj` is null.
@@ -7042,7 +7062,7 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) {
// Classes must be equal for the instanceof to succeed.
__ b(&zero, NE);
__ LoadImmediate(out, 1);
- __ b(&done);
+ __ b(final_label);
break;
}
@@ -7065,12 +7085,12 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) {
maybe_temp_loc,
kCompilerReadBarrierOption);
// If `out` is null, we use it for the result, and jump to `done`.
- __ CompareAndBranchIfZero(out, &done);
+ __ CompareAndBranchIfZero(out, final_label);
__ cmp(out, ShifterOperand(cls));
__ b(&loop, NE);
__ LoadImmediate(out, 1);
if (zero.IsLinked()) {
- __ b(&done);
+ __ b(final_label);
}
break;
}
@@ -7096,11 +7116,11 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) {
kCompilerReadBarrierOption);
__ CompareAndBranchIfNonZero(out, &loop);
// If `out` is null, we use it for the result, and jump to `done`.
- __ b(&done);
+ __ b(final_label);
__ Bind(&success);
__ LoadImmediate(out, 1);
if (zero.IsLinked()) {
- __ b(&done);
+ __ b(final_label);
}
break;
}
@@ -7125,13 +7145,13 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) {
maybe_temp_loc,
kCompilerReadBarrierOption);
// If `out` is null, we use it for the result, and jump to `done`.
- __ CompareAndBranchIfZero(out, &done);
+ __ CompareAndBranchIfZero(out, final_label);
__ LoadFromOffset(kLoadUnsignedHalfword, out, out, primitive_offset);
static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
__ CompareAndBranchIfNonZero(out, &zero);
__ Bind(&exact_check);
__ LoadImmediate(out, 1);
- __ b(&done);
+ __ b(final_label);
break;
}
@@ -7152,7 +7172,7 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) {
__ b(slow_path->GetEntryLabel(), NE);
__ LoadImmediate(out, 1);
if (zero.IsLinked()) {
- __ b(&done);
+ __ b(final_label);
}
break;
}
@@ -7183,7 +7203,7 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) {
codegen_->AddSlowPath(slow_path);
__ b(slow_path->GetEntryLabel());
if (zero.IsLinked()) {
- __ b(&done);
+ __ b(final_label);
}
break;
}
@@ -7269,9 +7289,10 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) {
codegen_->AddSlowPath(type_check_slow_path);
Label done;
+ Label* final_label = codegen_->GetFinalLabel(instruction, &done);
// Avoid null check if we know obj is not null.
if (instruction->MustDoNullCheck()) {
- __ CompareAndBranchIfZero(obj, &done);
+ __ CompareAndBranchIfZero(obj, final_label);
}
switch (type_check_kind) {
@@ -7335,7 +7356,7 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) {
Label loop;
__ Bind(&loop);
__ cmp(temp, ShifterOperand(cls));
- __ b(&done, EQ);
+ __ b(final_label, EQ);
// /* HeapReference<Class> */ temp = temp->super_class_
GenerateReferenceLoadOneRegister(instruction,
@@ -7363,7 +7384,7 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) {
// Do an exact check.
__ cmp(temp, ShifterOperand(cls));
- __ b(&done, EQ);
+ __ b(final_label, EQ);
// Otherwise, we need to check that the object's class is a non-primitive array.
// /* HeapReference<Class> */ temp = temp->component_type_
@@ -7433,7 +7454,10 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) {
break;
}
}
- __ Bind(&done);
+
+ if (done.IsLinked()) {
+ __ Bind(&done);
+ }
__ Bind(type_check_slow_path->GetExitLabel());
}
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index 5b15902ccd..59a7f7c048 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -237,7 +237,7 @@ class InstructionCodeGeneratorARM : public InstructionCodeGenerator {
void HandleBitwiseOperation(HBinaryOperation* operation);
void HandleCondition(HCondition* condition);
void HandleIntegerRotate(LocationSummary* locations);
- void HandleLongRotate(LocationSummary* locations);
+ void HandleLongRotate(HRor* ror);
void HandleShift(HBinaryOperation* operation);
void GenerateWideAtomicStore(Register addr, uint32_t offset,
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index d75779cef6..2d2d8109a3 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -1950,6 +1950,7 @@ static bool CanGenerateConditionalMove(const Location& out, const Location& src)
vixl32::Label* CodeGeneratorARMVIXL::GetFinalLabel(HInstruction* instruction,
vixl32::Label* final_label) {
DCHECK(!instruction->IsControlFlow() && !instruction->IsSuspendCheck());
+ DCHECK(!instruction->IsInvoke() || !instruction->GetLocations()->CanCall());
const HBasicBlock* const block = instruction->GetBlock();
const HLoopInformation* const info = block->GetLoopInformation();
@@ -2925,16 +2926,20 @@ void InstructionCodeGeneratorARMVIXL::HandleCondition(HCondition* cond) {
// Convert the jumps into the result.
vixl32::Label done_label;
+ vixl32::Label* final_label = codegen_->GetFinalLabel(cond, &done_label);
// False case: result = 0.
__ Bind(&false_label);
__ Mov(out, 0);
- __ B(&done_label);
+ __ B(final_label);
// True case: result = 1.
__ Bind(&true_label);
__ Mov(out, 1);
- __ Bind(&done_label);
+
+ if (done_label.IsReferenced()) {
+ __ Bind(&done_label);
+ }
}
void LocationsBuilderARMVIXL::VisitEqual(HEqual* comp) {
@@ -4447,6 +4452,7 @@ void InstructionCodeGeneratorARMVIXL::HandleLongRotate(HRor* ror) {
vixl32::Register shift_left = RegisterFrom(locations->GetTemp(1));
vixl32::Label end;
vixl32::Label shift_by_32_plus_shift_right;
+ vixl32::Label* final_label = codegen_->GetFinalLabel(ror, &end);
__ And(shift_right, RegisterFrom(rhs), 0x1F);
__ Lsrs(shift_left, RegisterFrom(rhs), 6);
@@ -4461,7 +4467,7 @@ void InstructionCodeGeneratorARMVIXL::HandleLongRotate(HRor* ror) {
__ Lsl(out_reg_lo, in_reg_lo, shift_left);
__ Lsr(shift_left, in_reg_hi, shift_right);
__ Add(out_reg_lo, out_reg_lo, shift_left);
- __ B(&end);
+ __ B(final_label);
__ Bind(&shift_by_32_plus_shift_right); // Shift by 32+shift_right.
// out_reg_hi = (reg_hi >> shift_right) | (reg_lo << shift_left).
@@ -4473,7 +4479,9 @@ void InstructionCodeGeneratorARMVIXL::HandleLongRotate(HRor* ror) {
__ Lsl(shift_right, in_reg_hi, shift_left);
__ Add(out_reg_lo, out_reg_lo, shift_right);
- __ Bind(&end);
+ if (end.IsReferenced()) {
+ __ Bind(&end);
+ }
}
}
@@ -4906,6 +4914,7 @@ void InstructionCodeGeneratorARMVIXL::VisitCompare(HCompare* compare) {
Location right = locations->InAt(1);
vixl32::Label less, greater, done;
+ vixl32::Label* final_label = codegen_->GetFinalLabel(compare, &done);
Primitive::Type type = compare->InputAt(0)->GetType();
vixl32::Condition less_cond = vixl32::Condition(kNone);
switch (type) {
@@ -4944,17 +4953,19 @@ void InstructionCodeGeneratorARMVIXL::VisitCompare(HCompare* compare) {
UNREACHABLE();
}
- __ B(eq, &done, /* far_target */ false);
+ __ B(eq, final_label, /* far_target */ false);
__ B(less_cond, &less, /* far_target */ false);
__ Bind(&greater);
__ Mov(out, 1);
- __ B(&done);
+ __ B(final_label);
__ Bind(&less);
__ Mov(out, -1);
- __ Bind(&done);
+ if (done.IsReferenced()) {
+ __ Bind(&done);
+ }
}
void LocationsBuilderARMVIXL::VisitPhi(HPhi* instruction) {
@@ -5746,6 +5757,7 @@ void InstructionCodeGeneratorARMVIXL::VisitArrayGet(HArrayGet* instruction) {
int32_t const_index = Int32ConstantFrom(index);
if (maybe_compressed_char_at) {
vixl32::Label uncompressed_load, done;
+ vixl32::Label* final_label = codegen_->GetFinalLabel(instruction, &done);
__ Lsrs(length, length, 1u); // LSRS has a 16-bit encoding, TST (immediate) does not.
static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
"Expecting 0=compressed, 1=uncompressed");
@@ -5754,13 +5766,15 @@ void InstructionCodeGeneratorARMVIXL::VisitArrayGet(HArrayGet* instruction) {
RegisterFrom(out_loc),
obj,
data_offset + const_index);
- __ B(&done);
+ __ B(final_label);
__ Bind(&uncompressed_load);
GetAssembler()->LoadFromOffset(GetLoadOperandType(Primitive::kPrimChar),
RegisterFrom(out_loc),
obj,
data_offset + (const_index << 1));
- __ Bind(&done);
+ if (done.IsReferenced()) {
+ __ Bind(&done);
+ }
} else {
uint32_t full_offset = data_offset + (const_index << Primitive::ComponentSizeShift(type));
@@ -5785,15 +5799,18 @@ void InstructionCodeGeneratorARMVIXL::VisitArrayGet(HArrayGet* instruction) {
}
if (maybe_compressed_char_at) {
vixl32::Label uncompressed_load, done;
+ vixl32::Label* final_label = codegen_->GetFinalLabel(instruction, &done);
__ Lsrs(length, length, 1u); // LSRS has a 16-bit encoding, TST (immediate) does not.
static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
"Expecting 0=compressed, 1=uncompressed");
__ B(cs, &uncompressed_load, /* far_target */ false);
__ Ldrb(RegisterFrom(out_loc), MemOperand(temp, RegisterFrom(index), vixl32::LSL, 0));
- __ B(&done);
+ __ B(final_label);
__ Bind(&uncompressed_load);
__ Ldrh(RegisterFrom(out_loc), MemOperand(temp, RegisterFrom(index), vixl32::LSL, 1));
- __ Bind(&done);
+ if (done.IsReferenced()) {
+ __ Bind(&done);
+ }
} else {
codegen_->LoadFromShiftedRegOffset(type, out_loc, temp, RegisterFrom(index));
}
@@ -6032,6 +6049,7 @@ void InstructionCodeGeneratorARMVIXL::VisitArraySet(HArraySet* instruction) {
uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
vixl32::Label done;
+ vixl32::Label* final_label = codegen_->GetFinalLabel(instruction, &done);
SlowPathCodeARMVIXL* slow_path = nullptr;
if (may_need_runtime_call_for_type_check) {
@@ -6054,7 +6072,7 @@ void InstructionCodeGeneratorARMVIXL::VisitArraySet(HArraySet* instruction) {
// TODO(VIXL): Use a scope to ensure we record the pc info immediately after the preceding
// store instruction.
codegen_->MaybeRecordImplicitNullCheck(instruction);
- __ B(&done);
+ __ B(final_label);
__ Bind(&non_zero);
}
@@ -7062,6 +7080,7 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction)
uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
vixl32::Label done, zero;
+ vixl32::Label* final_label = codegen_->GetFinalLabel(instruction, &done);
SlowPathCodeARMVIXL* slow_path = nullptr;
// Return 0 if `obj` is null.
@@ -7083,7 +7102,7 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction)
// Classes must be equal for the instanceof to succeed.
__ B(ne, &zero, /* far_target */ false);
__ Mov(out, 1);
- __ B(&done);
+ __ B(final_label);
break;
}
@@ -7106,12 +7125,12 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction)
maybe_temp_loc,
kCompilerReadBarrierOption);
// If `out` is null, we use it for the result, and jump to `done`.
- __ CompareAndBranchIfZero(out, &done, /* far_target */ false);
+ __ CompareAndBranchIfZero(out, final_label, /* far_target */ false);
__ Cmp(out, cls);
__ B(ne, &loop, /* far_target */ false);
__ Mov(out, 1);
if (zero.IsReferenced()) {
- __ B(&done);
+ __ B(final_label);
}
break;
}
@@ -7137,11 +7156,11 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction)
kCompilerReadBarrierOption);
__ CompareAndBranchIfNonZero(out, &loop);
// If `out` is null, we use it for the result, and jump to `done`.
- __ B(&done);
+ __ B(final_label);
__ Bind(&success);
__ Mov(out, 1);
if (zero.IsReferenced()) {
- __ B(&done);
+ __ B(final_label);
}
break;
}
@@ -7166,13 +7185,13 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction)
maybe_temp_loc,
kCompilerReadBarrierOption);
// If `out` is null, we use it for the result, and jump to `done`.
- __ CompareAndBranchIfZero(out, &done, /* far_target */ false);
+ __ CompareAndBranchIfZero(out, final_label, /* far_target */ false);
GetAssembler()->LoadFromOffset(kLoadUnsignedHalfword, out, out, primitive_offset);
static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
__ CompareAndBranchIfNonZero(out, &zero, /* far_target */ false);
__ Bind(&exact_check);
__ Mov(out, 1);
- __ B(&done);
+ __ B(final_label);
break;
}
@@ -7193,7 +7212,7 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction)
__ B(ne, slow_path->GetEntryLabel());
__ Mov(out, 1);
if (zero.IsReferenced()) {
- __ B(&done);
+ __ B(final_label);
}
break;
}
@@ -7224,7 +7243,7 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction)
codegen_->AddSlowPath(slow_path);
__ B(slow_path->GetEntryLabel());
if (zero.IsReferenced()) {
- __ B(&done);
+ __ B(final_label);
}
break;
}
@@ -7310,9 +7329,10 @@ void InstructionCodeGeneratorARMVIXL::VisitCheckCast(HCheckCast* instruction) {
codegen_->AddSlowPath(type_check_slow_path);
vixl32::Label done;
+ vixl32::Label* final_label = codegen_->GetFinalLabel(instruction, &done);
// Avoid null check if we know obj is not null.
if (instruction->MustDoNullCheck()) {
- __ CompareAndBranchIfZero(obj, &done, /* far_target */ false);
+ __ CompareAndBranchIfZero(obj, final_label, /* far_target */ false);
}
switch (type_check_kind) {
@@ -7376,7 +7396,7 @@ void InstructionCodeGeneratorARMVIXL::VisitCheckCast(HCheckCast* instruction) {
vixl32::Label loop;
__ Bind(&loop);
__ Cmp(temp, cls);
- __ B(eq, &done, /* far_target */ false);
+ __ B(eq, final_label, /* far_target */ false);
// /* HeapReference<Class> */ temp = temp->super_class_
GenerateReferenceLoadOneRegister(instruction,
@@ -7404,7 +7424,7 @@ void InstructionCodeGeneratorARMVIXL::VisitCheckCast(HCheckCast* instruction) {
// Do an exact check.
__ Cmp(temp, cls);
- __ B(eq, &done, /* far_target */ false);
+ __ B(eq, final_label, /* far_target */ false);
// Otherwise, we need to check that the object's class is a non-primitive array.
// /* HeapReference<Class> */ temp = temp->component_type_
@@ -7472,7 +7492,9 @@ void InstructionCodeGeneratorARMVIXL::VisitCheckCast(HCheckCast* instruction) {
break;
}
}
- __ Bind(&done);
+ if (done.IsReferenced()) {
+ __ Bind(&done);
+ }
__ Bind(type_check_slow_path->GetExitLabel());
}
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 0b50619a66..4db4796985 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -183,10 +183,13 @@ class SuspendCheckSlowPathX86 : public SlowPathCode {
: SlowPathCode(instruction), successor_(successor) {}
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ LocationSummary* locations = instruction_->GetLocations();
CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
__ Bind(GetEntryLabel());
+ SaveLiveRegisters(codegen, locations); // only saves full width XMM for SIMD
x86_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this);
CheckEntrypointTypes<kQuickTestSuspend, void, void>();
+ RestoreLiveRegisters(codegen, locations); // only saves full width XMM for SIMD
if (successor_ == nullptr) {
__ jmp(GetReturnLabel());
} else {
@@ -963,12 +966,20 @@ size_t CodeGeneratorX86::RestoreCoreRegister(size_t stack_index, uint32_t reg_id
}
size_t CodeGeneratorX86::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
- __ movsd(Address(ESP, stack_index), XmmRegister(reg_id));
+ if (GetGraph()->HasSIMD()) {
+ __ movups(Address(ESP, stack_index), XmmRegister(reg_id));
+ } else {
+ __ movsd(Address(ESP, stack_index), XmmRegister(reg_id));
+ }
return GetFloatingPointSpillSlotSize();
}
size_t CodeGeneratorX86::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
- __ movsd(XmmRegister(reg_id), Address(ESP, stack_index));
+ if (GetGraph()->HasSIMD()) {
+ __ movups(XmmRegister(reg_id), Address(ESP, stack_index));
+ } else {
+ __ movsd(XmmRegister(reg_id), Address(ESP, stack_index));
+ }
return GetFloatingPointSpillSlotSize();
}
@@ -5699,7 +5710,11 @@ void InstructionCodeGeneratorX86::VisitParallelMove(HParallelMove* instruction)
void LocationsBuilderX86::VisitSuspendCheck(HSuspendCheck* instruction) {
LocationSummary* locations =
new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath);
- locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
+ // In suspend check slow path, usually there are no caller-save registers at all.
+ // If SIMD instructions are present, however, we force spilling all live SIMD
+ // registers in full width (since the runtime only saves/restores lower part).
+ locations->SetCustomSlowPathCallerSaves(
+ GetGraph()->HasSIMD() ? RegisterSet::AllFpu() : RegisterSet::Empty());
}
void InstructionCodeGeneratorX86::VisitSuspendCheck(HSuspendCheck* instruction) {
@@ -5802,9 +5817,11 @@ void ParallelMoveResolverX86::EmitMove(size_t index) {
__ movd(destination.AsRegisterPairHigh<Register>(), src_reg);
} else if (destination.IsStackSlot()) {
__ movss(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
- } else {
- DCHECK(destination.IsDoubleStackSlot());
+ } else if (destination.IsDoubleStackSlot()) {
__ movsd(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
+ } else {
+ DCHECK(destination.IsSIMDStackSlot());
+ __ movups(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
}
} else if (source.IsStackSlot()) {
if (destination.IsRegister()) {
@@ -5826,6 +5843,9 @@ void ParallelMoveResolverX86::EmitMove(size_t index) {
DCHECK(destination.IsDoubleStackSlot()) << destination;
MoveMemoryToMemory64(destination.GetStackIndex(), source.GetStackIndex());
}
+ } else if (source.IsSIMDStackSlot()) {
+ DCHECK(destination.IsFpuRegister());
+ __ movups(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex()));
} else if (source.IsConstant()) {
HConstant* constant = source.GetConstant();
if (constant->IsIntConstant() || constant->IsNullConstant()) {
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index 65ee383b54..ca3a9eadd2 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -348,8 +348,9 @@ class CodeGeneratorX86 : public CodeGenerator {
}
size_t GetFloatingPointSpillSlotSize() const OVERRIDE {
- // 8 bytes == 2 words for each spill.
- return 2 * kX86WordSize;
+ return GetGraph()->HasSIMD()
+ ? 4 * kX86WordSize // 16 bytes == 4 words for each spill
+ : 2 * kX86WordSize; // 8 bytes == 2 words for each spill
}
HGraphVisitor* GetLocationBuilder() OVERRIDE {
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 08f1adfcff..2ffc398287 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -140,10 +140,13 @@ class SuspendCheckSlowPathX86_64 : public SlowPathCode {
: SlowPathCode(instruction), successor_(successor) {}
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ LocationSummary* locations = instruction_->GetLocations();
CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
__ Bind(GetEntryLabel());
+ SaveLiveRegisters(codegen, locations); // only saves full width XMM for SIMD
x86_64_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this);
CheckEntrypointTypes<kQuickTestSuspend, void, void>();
+ RestoreLiveRegisters(codegen, locations); // only saves full width XMM for SIMD
if (successor_ == nullptr) {
__ jmp(GetReturnLabel());
} else {
@@ -1158,13 +1161,21 @@ size_t CodeGeneratorX86_64::RestoreCoreRegister(size_t stack_index, uint32_t reg
}
size_t CodeGeneratorX86_64::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
- __ movsd(Address(CpuRegister(RSP), stack_index), XmmRegister(reg_id));
- return kX86_64WordSize;
+ if (GetGraph()->HasSIMD()) {
+ __ movups(Address(CpuRegister(RSP), stack_index), XmmRegister(reg_id));
+ } else {
+ __ movsd(Address(CpuRegister(RSP), stack_index), XmmRegister(reg_id));
+ }
+ return GetFloatingPointSpillSlotSize();
}
size_t CodeGeneratorX86_64::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
- __ movsd(XmmRegister(reg_id), Address(CpuRegister(RSP), stack_index));
- return kX86_64WordSize;
+ if (GetGraph()->HasSIMD()) {
+ __ movups(XmmRegister(reg_id), Address(CpuRegister(RSP), stack_index));
+ } else {
+ __ movsd(XmmRegister(reg_id), Address(CpuRegister(RSP), stack_index));
+ }
+ return GetFloatingPointSpillSlotSize();
}
void CodeGeneratorX86_64::InvokeRuntime(QuickEntrypointEnum entrypoint,
@@ -5152,7 +5163,11 @@ void InstructionCodeGeneratorX86_64::VisitParallelMove(HParallelMove* instructio
void LocationsBuilderX86_64::VisitSuspendCheck(HSuspendCheck* instruction) {
LocationSummary* locations =
new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath);
- locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
+ // In suspend check slow path, usually there are no caller-save registers at all.
+ // If SIMD instructions are present, however, we force spilling all live SIMD
+ // registers in full width (since the runtime only saves/restores lower part).
+ locations->SetCustomSlowPathCallerSaves(
+ GetGraph()->HasSIMD() ? RegisterSet::AllFpu() : RegisterSet::Empty());
}
void InstructionCodeGeneratorX86_64::VisitSuspendCheck(HSuspendCheck* instruction) {
@@ -5241,6 +5256,10 @@ void ParallelMoveResolverX86_64::EmitMove(size_t index) {
__ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
__ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
}
+ } else if (source.IsSIMDStackSlot()) {
+ DCHECK(destination.IsFpuRegister());
+ __ movups(destination.AsFpuRegister<XmmRegister>(),
+ Address(CpuRegister(RSP), source.GetStackIndex()));
} else if (source.IsConstant()) {
HConstant* constant = source.GetConstant();
if (constant->IsIntConstant() || constant->IsNullConstant()) {
@@ -5291,10 +5310,13 @@ void ParallelMoveResolverX86_64::EmitMove(size_t index) {
} else if (destination.IsStackSlot()) {
__ movss(Address(CpuRegister(RSP), destination.GetStackIndex()),
source.AsFpuRegister<XmmRegister>());
- } else {
- DCHECK(destination.IsDoubleStackSlot()) << destination;
+ } else if (destination.IsDoubleStackSlot()) {
__ movsd(Address(CpuRegister(RSP), destination.GetStackIndex()),
source.AsFpuRegister<XmmRegister>());
+ } else {
+ DCHECK(destination.IsSIMDStackSlot());
+ __ movups(Address(CpuRegister(RSP), destination.GetStackIndex()),
+ source.AsFpuRegister<XmmRegister>());
}
}
}
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index 376c3ce381..c8336dabd9 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -326,7 +326,9 @@ class CodeGeneratorX86_64 : public CodeGenerator {
}
size_t GetFloatingPointSpillSlotSize() const OVERRIDE {
- return kX86_64WordSize;
+ return GetGraph()->HasSIMD()
+ ? 2 * kX86_64WordSize // 16 bytes == 2 x86_64 words for each spill
+ : 1 * kX86_64WordSize; // 8 bytes == 1 x86_64 words for each spill
}
HGraphVisitor* GetLocationBuilder() OVERRIDE {
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index 2bf5c53e17..0dfae11465 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -322,9 +322,11 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor {
codegen_.DumpCoreRegister(stream, location.high());
} else if (location.IsUnallocated()) {
stream << "unallocated";
- } else {
- DCHECK(location.IsDoubleStackSlot());
+ } else if (location.IsDoubleStackSlot()) {
stream << "2x" << location.GetStackIndex() << "(sp)";
+ } else {
+ DCHECK(location.IsSIMDStackSlot());
+ stream << "4x" << location.GetStackIndex() << "(sp)";
}
}
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index 62f5114e59..eda26f1127 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -46,32 +46,100 @@
namespace art {
-static constexpr size_t kMaximumNumberOfHInstructions = 32;
+// Instruction limit to control memory.
+static constexpr size_t kMaximumNumberOfTotalInstructions = 1024;
+
+// Maximum number of instructions for considering a method small,
+// which we will always try to inline if the other non-instruction limits
+// are not reached.
+static constexpr size_t kMaximumNumberOfInstructionsForSmallMethod = 3;
// Limit the number of dex registers that we accumulate while inlining
// to avoid creating large amount of nested environments.
static constexpr size_t kMaximumNumberOfCumulatedDexRegisters = 64;
-// Avoid inlining within a huge method due to memory pressure.
-static constexpr size_t kMaximumCodeUnitSize = 4096;
+// Limit recursive call inlining, which do not benefit from too
+// much inlining compared to code locality.
+static constexpr size_t kMaximumNumberOfRecursiveCalls = 4;
// Controls the use of inline caches in AOT mode.
static constexpr bool kUseAOTInlineCaches = false;
-void HInliner::Run() {
- const CompilerOptions& compiler_options = compiler_driver_->GetCompilerOptions();
- if ((compiler_options.GetInlineDepthLimit() == 0)
- || (compiler_options.GetInlineMaxCodeUnits() == 0)) {
- return;
+// We check for line numbers to make sure the DepthString implementation
+// aligns the output nicely.
+#define LOG_INTERNAL(msg) \
+ static_assert(__LINE__ > 10, "Unhandled line number"); \
+ static_assert(__LINE__ < 10000, "Unhandled line number"); \
+ VLOG(compiler) << DepthString(__LINE__) << msg
+
+#define LOG_TRY() LOG_INTERNAL("Try inlinining call: ")
+#define LOG_NOTE() LOG_INTERNAL("Note: ")
+#define LOG_SUCCESS() LOG_INTERNAL("Success: ")
+#define LOG_FAIL(stat) MaybeRecordStat(stat); LOG_INTERNAL("Fail: ")
+#define LOG_FAIL_NO_STAT() LOG_INTERNAL("Fail: ")
+
+std::string HInliner::DepthString(int line) const {
+ std::string value;
+ // Indent according to the inlining depth.
+ size_t count = depth_;
+ // Line numbers get printed in the log, so add a space if the log's line is less
+ // than 1000, and two if less than 100. 10 cannot be reached as it's the copyright.
+ if (!kIsTargetBuild) {
+ if (line < 100) {
+ value += " ";
+ }
+ if (line < 1000) {
+ value += " ";
+ }
+ // Safeguard if this file reaches more than 10000 lines.
+ DCHECK_LT(line, 10000);
}
- if (caller_compilation_unit_.GetCodeItem()->insns_size_in_code_units_ > kMaximumCodeUnitSize) {
- return;
+ for (size_t i = 0; i < count; ++i) {
+ value += " ";
+ }
+ return value;
+}
+
+static size_t CountNumberOfInstructions(HGraph* graph) {
+ size_t number_of_instructions = 0;
+ for (HBasicBlock* block : graph->GetReversePostOrderSkipEntryBlock()) {
+ for (HInstructionIterator instr_it(block->GetInstructions());
+ !instr_it.Done();
+ instr_it.Advance()) {
+ ++number_of_instructions;
+ }
}
+ return number_of_instructions;
+}
+
+void HInliner::UpdateInliningBudget() {
+ if (total_number_of_instructions_ >= kMaximumNumberOfTotalInstructions) {
+ // Always try to inline small methods.
+ inlining_budget_ = kMaximumNumberOfInstructionsForSmallMethod;
+ } else {
+ inlining_budget_ = std::max(
+ kMaximumNumberOfInstructionsForSmallMethod,
+ kMaximumNumberOfTotalInstructions - total_number_of_instructions_);
+ }
+}
+
+void HInliner::Run() {
if (graph_->IsDebuggable()) {
// For simplicity, we currently never inline when the graph is debuggable. This avoids
// doing some logic in the runtime to discover if a method could have been inlined.
return;
}
+
+ // Initialize the number of instructions for the method being compiled. Recursive calls
+ // to HInliner::Run have already updated the instruction count.
+ if (outermost_graph_ == graph_) {
+ total_number_of_instructions_ = CountNumberOfInstructions(graph_);
+ }
+
+ UpdateInliningBudget();
+ DCHECK_NE(total_number_of_instructions_, 0u);
+ DCHECK_NE(inlining_budget_, 0u);
+
// Keep a copy of all blocks when starting the visit.
ArenaVector<HBasicBlock*> blocks = graph_->GetReversePostOrder();
DCHECK(!blocks.empty());
@@ -305,17 +373,18 @@ bool HInliner::TryInline(HInvoke* invoke_instruction) {
ScopedObjectAccess soa(Thread::Current());
uint32_t method_index = invoke_instruction->GetDexMethodIndex();
const DexFile& caller_dex_file = *caller_compilation_unit_.GetDexFile();
- VLOG(compiler) << "Try inlining " << caller_dex_file.PrettyMethod(method_index);
+ LOG_TRY() << caller_dex_file.PrettyMethod(method_index);
- // We can query the dex cache directly. The verifier has populated it already.
ArtMethod* resolved_method = invoke_instruction->GetResolvedMethod();
- ArtMethod* actual_method = nullptr;
if (resolved_method == nullptr) {
DCHECK(invoke_instruction->IsInvokeStaticOrDirect());
DCHECK(invoke_instruction->AsInvokeStaticOrDirect()->IsStringInit());
- VLOG(compiler) << "Not inlining a String.<init> method";
+ LOG_FAIL_NO_STAT() << "Not inlining a String.<init> method";
return false;
- } else if (invoke_instruction->IsInvokeStaticOrDirect()) {
+ }
+ ArtMethod* actual_method = nullptr;
+
+ if (invoke_instruction->IsInvokeStaticOrDirect()) {
actual_method = resolved_method;
} else {
// Check if we can statically find the method.
@@ -328,6 +397,7 @@ bool HInliner::TryInline(HInvoke* invoke_instruction) {
if (method != nullptr) {
cha_devirtualize = true;
actual_method = method;
+ LOG_NOTE() << "Try CHA-based inlining of " << actual_method->PrettyMethod();
}
}
@@ -390,16 +460,23 @@ bool HInliner::TryInlineFromInlineCache(const DexFile& caller_dex_file,
: GetInlineCacheJIT(invoke_instruction, &hs, &inline_cache);
switch (inline_cache_type) {
- case kInlineCacheNoData:
- break;
+ case kInlineCacheNoData: {
+ LOG_FAIL_NO_STAT()
+ << "Interface or virtual call to "
+ << caller_dex_file.PrettyMethod(invoke_instruction->GetDexMethodIndex())
+ << " could not be statically determined";
+ return false;
+ }
- case kInlineCacheUninitialized:
- VLOG(compiler) << "Interface or virtual call to "
- << caller_dex_file.PrettyMethod(invoke_instruction->GetDexMethodIndex())
- << " is not hit and not inlined";
+ case kInlineCacheUninitialized: {
+ LOG_FAIL_NO_STAT()
+ << "Interface or virtual call to "
+ << caller_dex_file.PrettyMethod(invoke_instruction->GetDexMethodIndex())
+ << " is not hit and not inlined";
return false;
+ }
- case kInlineCacheMonomorphic:
+ case kInlineCacheMonomorphic: {
MaybeRecordStat(kMonomorphicCall);
if (outermost_graph_->IsCompilingOsr()) {
// If we are compiling OSR, we pretend this call is polymorphic, as we may come from the
@@ -408,23 +485,29 @@ bool HInliner::TryInlineFromInlineCache(const DexFile& caller_dex_file,
} else {
return TryInlineMonomorphicCall(invoke_instruction, resolved_method, inline_cache);
}
+ }
- case kInlineCachePolymorphic:
+ case kInlineCachePolymorphic: {
MaybeRecordStat(kPolymorphicCall);
return TryInlinePolymorphicCall(invoke_instruction, resolved_method, inline_cache);
+ }
- case kInlineCacheMegamorphic:
- VLOG(compiler) << "Interface or virtual call to "
- << caller_dex_file.PrettyMethod(invoke_instruction->GetDexMethodIndex())
- << " is megamorphic and not inlined";
+ case kInlineCacheMegamorphic: {
+ LOG_FAIL_NO_STAT()
+ << "Interface or virtual call to "
+ << caller_dex_file.PrettyMethod(invoke_instruction->GetDexMethodIndex())
+ << " is megamorphic and not inlined";
MaybeRecordStat(kMegamorphicCall);
return false;
+ }
- case kInlineCacheMissingTypes:
- VLOG(compiler) << "Interface or virtual call to "
- << caller_dex_file.PrettyMethod(invoke_instruction->GetDexMethodIndex())
- << " is missing types and not inlined";
+ case kInlineCacheMissingTypes: {
+ LOG_FAIL_NO_STAT()
+ << "Interface or virtual call to "
+ << caller_dex_file.PrettyMethod(invoke_instruction->GetDexMethodIndex())
+ << " is missing types and not inlined";
return false;
+ }
}
UNREACHABLE();
}
@@ -587,9 +670,10 @@ bool HInliner::TryInlineMonomorphicCall(HInvoke* invoke_instruction,
dex::TypeIndex class_index = FindClassIndexIn(
GetMonomorphicType(classes), caller_compilation_unit_);
if (!class_index.IsValid()) {
- VLOG(compiler) << "Call to " << ArtMethod::PrettyMethod(resolved_method)
- << " from inline cache is not inlined because its class is not"
- << " accessible to the caller";
+ LOG_FAIL(kNotInlinedDexCache)
+ << "Call to " << ArtMethod::PrettyMethod(resolved_method)
+ << " from inline cache is not inlined because its class is not"
+ << " accessible to the caller";
return false;
}
@@ -603,6 +687,7 @@ bool HInliner::TryInlineMonomorphicCall(HInvoke* invoke_instruction,
resolved_method = GetMonomorphicType(classes)->FindVirtualMethodForVirtual(
resolved_method, pointer_size);
}
+ LOG_NOTE() << "Try inline monomorphic call to " << resolved_method->PrettyMethod();
DCHECK(resolved_method != nullptr);
HInstruction* receiver = invoke_instruction->InputAt(0);
HInstruction* cursor = invoke_instruction->GetPrevious();
@@ -752,6 +837,7 @@ bool HInliner::TryInlinePolymorphicCall(HInvoke* invoke_instruction,
dex::TypeIndex class_index = FindClassIndexIn(handle.Get(), caller_compilation_unit_);
HInstruction* return_replacement = nullptr;
+ LOG_NOTE() << "Try inline polymorphic call to " << method->PrettyMethod();
if (!class_index.IsValid() ||
!TryBuildAndInline(invoke_instruction,
method,
@@ -761,8 +847,8 @@ bool HInliner::TryInlinePolymorphicCall(HInvoke* invoke_instruction,
} else {
one_target_inlined = true;
- VLOG(compiler) << "Polymorphic call to " << ArtMethod::PrettyMethod(resolved_method)
- << " has inlined " << ArtMethod::PrettyMethod(method);
+ LOG_SUCCESS() << "Polymorphic call to " << ArtMethod::PrettyMethod(resolved_method)
+ << " has inlined " << ArtMethod::PrettyMethod(method);
// If we have inlined all targets before, and this receiver is the last seen,
// we deoptimize instead of keeping the original invoke instruction.
@@ -796,9 +882,10 @@ bool HInliner::TryInlinePolymorphicCall(HInvoke* invoke_instruction,
}
if (!one_target_inlined) {
- VLOG(compiler) << "Call to " << ArtMethod::PrettyMethod(resolved_method)
- << " from inline cache is not inlined because none"
- << " of its targets could be inlined";
+ LOG_FAIL_NO_STAT()
+ << "Call to " << ArtMethod::PrettyMethod(resolved_method)
+ << " from inline cache is not inlined because none"
+ << " of its targets could be inlined";
return false;
}
@@ -932,9 +1019,6 @@ bool HInliner::TryInlinePolymorphicCallToSameTarget(
actual_method = new_method;
} else if (actual_method != new_method) {
// Different methods, bailout.
- VLOG(compiler) << "Call to " << ArtMethod::PrettyMethod(resolved_method)
- << " from inline cache is not inlined because it resolves"
- << " to different methods";
return false;
}
}
@@ -1007,6 +1091,7 @@ bool HInliner::TryInlinePolymorphicCallToSameTarget(
MaybeRecordStat(kInlinedPolymorphicCall);
+ LOG_SUCCESS() << "Inlined same polymorphic target " << actual_method->PrettyMethod();
return true;
}
@@ -1076,13 +1161,34 @@ bool HInliner::TryInlineAndReplace(HInvoke* invoke_instruction,
return true;
}
+size_t HInliner::CountRecursiveCallsOf(ArtMethod* method) const {
+ const HInliner* current = this;
+ size_t count = 0;
+ do {
+ if (current->graph_->GetArtMethod() == method) {
+ ++count;
+ }
+ current = current->parent_;
+ } while (current != nullptr);
+ return count;
+}
+
bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction,
ArtMethod* method,
ReferenceTypeInfo receiver_type,
HInstruction** return_replacement) {
if (method->IsProxyMethod()) {
- VLOG(compiler) << "Method " << method->PrettyMethod()
- << " is not inlined because of unimplemented inline support for proxy methods.";
+ LOG_FAIL(kNotInlinedProxy)
+ << "Method " << method->PrettyMethod()
+ << " is not inlined because of unimplemented inline support for proxy methods.";
+ return false;
+ }
+
+ if (CountRecursiveCallsOf(method) > kMaximumNumberOfRecursiveCalls) {
+ LOG_FAIL(kNotInlinedRecursiveBudget)
+ << "Method "
+ << method->PrettyMethod()
+ << " is not inlined because it has reached its recursive call budget.";
return false;
}
@@ -1091,15 +1197,16 @@ bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction,
if (!compiler_driver_->MayInline(method->GetDexFile(),
outer_compilation_unit_.GetDexFile())) {
if (TryPatternSubstitution(invoke_instruction, method, return_replacement)) {
- VLOG(compiler) << "Successfully replaced pattern of invoke "
- << method->PrettyMethod();
+ LOG_SUCCESS() << "Successfully replaced pattern of invoke "
+ << method->PrettyMethod();
MaybeRecordStat(kReplacedInvokeWithSimplePattern);
return true;
}
- VLOG(compiler) << "Won't inline " << method->PrettyMethod() << " in "
- << outer_compilation_unit_.GetDexFile()->GetLocation() << " ("
- << caller_compilation_unit_.GetDexFile()->GetLocation() << ") from "
- << method->GetDexFile()->GetLocation();
+ LOG_FAIL(kNotInlinedWont)
+ << "Won't inline " << method->PrettyMethod() << " in "
+ << outer_compilation_unit_.GetDexFile()->GetLocation() << " ("
+ << caller_compilation_unit_.GetDexFile()->GetLocation() << ") from "
+ << method->GetDexFile()->GetLocation();
return false;
}
@@ -1108,30 +1215,32 @@ bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction,
const DexFile::CodeItem* code_item = method->GetCodeItem();
if (code_item == nullptr) {
- VLOG(compiler) << "Method " << method->PrettyMethod()
- << " is not inlined because it is native";
+ LOG_FAIL_NO_STAT()
+ << "Method " << method->PrettyMethod() << " is not inlined because it is native";
return false;
}
size_t inline_max_code_units = compiler_driver_->GetCompilerOptions().GetInlineMaxCodeUnits();
if (code_item->insns_size_in_code_units_ > inline_max_code_units) {
- VLOG(compiler) << "Method " << method->PrettyMethod()
- << " is too big to inline: "
- << code_item->insns_size_in_code_units_
- << " > "
- << inline_max_code_units;
+ LOG_FAIL(kNotInlinedCodeItem)
+ << "Method " << method->PrettyMethod()
+ << " is not inlined because its code item is too big: "
+ << code_item->insns_size_in_code_units_
+ << " > "
+ << inline_max_code_units;
return false;
}
if (code_item->tries_size_ != 0) {
- VLOG(compiler) << "Method " << method->PrettyMethod()
- << " is not inlined because of try block";
+ LOG_FAIL(kNotInlinedTryCatch)
+ << "Method " << method->PrettyMethod() << " is not inlined because of try block";
return false;
}
if (!method->IsCompilable()) {
- VLOG(compiler) << "Method " << method->PrettyMethod()
- << " has soft failures un-handled by the compiler, so it cannot be inlined";
+ LOG_FAIL(kNotInlinedNotVerified)
+ << "Method " << method->PrettyMethod()
+ << " has soft failures un-handled by the compiler, so it cannot be inlined";
}
if (!method->GetDeclaringClass()->IsVerified()) {
@@ -1139,8 +1248,9 @@ bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction,
if (Runtime::Current()->UseJitCompilation() ||
!compiler_driver_->IsMethodVerifiedWithoutFailures(
method->GetDexMethodIndex(), class_def_idx, *method->GetDexFile())) {
- VLOG(compiler) << "Method " << method->PrettyMethod()
- << " couldn't be verified, so it cannot be inlined";
+ LOG_FAIL(kNotInlinedNotVerified)
+ << "Method " << method->PrettyMethod()
+ << " couldn't be verified, so it cannot be inlined";
return false;
}
}
@@ -1149,9 +1259,9 @@ bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction,
invoke_instruction->AsInvokeStaticOrDirect()->IsStaticWithImplicitClinitCheck()) {
// Case of a static method that cannot be inlined because it implicitly
// requires an initialization check of its declaring class.
- VLOG(compiler) << "Method " << method->PrettyMethod()
- << " is not inlined because it is static and requires a clinit"
- << " check that cannot be emitted due to Dex cache limitations";
+ LOG_FAIL(kNotInlinedDexCache) << "Method " << method->PrettyMethod()
+ << " is not inlined because it is static and requires a clinit"
+ << " check that cannot be emitted due to Dex cache limitations";
return false;
}
@@ -1160,7 +1270,7 @@ bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction,
return false;
}
- VLOG(compiler) << "Successfully inlined " << method->PrettyMethod();
+ LOG_SUCCESS() << method->PrettyMethod();
MaybeRecordStat(kInlinedInvoke);
return true;
}
@@ -1448,15 +1558,17 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction,
handles_);
if (builder.BuildGraph() != kAnalysisSuccess) {
- VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index)
- << " could not be built, so cannot be inlined";
+ LOG_FAIL(kNotInlinedCannotBuild)
+ << "Method " << callee_dex_file.PrettyMethod(method_index)
+ << " could not be built, so cannot be inlined";
return false;
}
if (!RegisterAllocator::CanAllocateRegistersFor(*callee_graph,
compiler_driver_->GetInstructionSet())) {
- VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index)
- << " cannot be inlined because of the register allocator";
+ LOG_FAIL(kNotInlinedRegisterAllocator)
+ << "Method " << callee_dex_file.PrettyMethod(method_index)
+ << " cannot be inlined because of the register allocator";
return false;
}
@@ -1503,15 +1615,13 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction,
/* is_first_run */ false).Run();
}
- size_t number_of_instructions_budget = kMaximumNumberOfHInstructions;
- size_t number_of_inlined_instructions =
- RunOptimizations(callee_graph, code_item, dex_compilation_unit);
- number_of_instructions_budget += number_of_inlined_instructions;
+ RunOptimizations(callee_graph, code_item, dex_compilation_unit);
HBasicBlock* exit_block = callee_graph->GetExitBlock();
if (exit_block == nullptr) {
- VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index)
- << " could not be inlined because it has an infinite loop";
+ LOG_FAIL(kNotInlinedInfiniteLoop)
+ << "Method " << callee_dex_file.PrettyMethod(method_index)
+ << " could not be inlined because it has an infinite loop";
return false;
}
@@ -1520,15 +1630,17 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction,
if (predecessor->GetLastInstruction()->IsThrow()) {
if (invoke_instruction->GetBlock()->IsTryBlock()) {
// TODO(ngeoffray): Support adding HTryBoundary in Hgraph::InlineInto.
- VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index)
- << " could not be inlined because one branch always throws and"
- << " caller is in a try/catch block";
+ LOG_FAIL(kNotInlinedTryCatch)
+ << "Method " << callee_dex_file.PrettyMethod(method_index)
+ << " could not be inlined because one branch always throws and"
+ << " caller is in a try/catch block";
return false;
} else if (graph_->GetExitBlock() == nullptr) {
// TODO(ngeoffray): Support adding HExit in the caller graph.
- VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index)
- << " could not be inlined because one branch always throws and"
- << " caller does not have an exit block";
+ LOG_FAIL(kNotInlinedInfiniteLoop)
+ << "Method " << callee_dex_file.PrettyMethod(method_index)
+ << " could not be inlined because one branch always throws and"
+ << " caller does not have an exit block";
return false;
} else if (graph_->HasIrreducibleLoops()) {
// TODO(ngeoffray): Support re-computing loop information to graphs with
@@ -1544,32 +1656,31 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction,
}
if (!has_one_return) {
- VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index)
- << " could not be inlined because it always throws";
+ LOG_FAIL(kNotInlinedAlwaysThrows)
+ << "Method " << callee_dex_file.PrettyMethod(method_index)
+ << " could not be inlined because it always throws";
return false;
}
size_t number_of_instructions = 0;
-
- bool can_inline_environment =
- total_number_of_dex_registers_ < kMaximumNumberOfCumulatedDexRegisters;
-
// Skip the entry block, it does not contain instructions that prevent inlining.
for (HBasicBlock* block : callee_graph->GetReversePostOrderSkipEntryBlock()) {
if (block->IsLoopHeader()) {
if (block->GetLoopInformation()->IsIrreducible()) {
// Don't inline methods with irreducible loops, they could prevent some
// optimizations to run.
- VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index)
- << " could not be inlined because it contains an irreducible loop";
+ LOG_FAIL(kNotInlinedIrreducibleLoop)
+ << "Method " << callee_dex_file.PrettyMethod(method_index)
+ << " could not be inlined because it contains an irreducible loop";
return false;
}
if (!block->GetLoopInformation()->HasExitEdge()) {
// Don't inline methods with loops without exit, since they cause the
// loop information to be computed incorrectly when updating after
// inlining.
- VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index)
- << " could not be inlined because it contains a loop with no exit";
+ LOG_FAIL(kNotInlinedLoopWithoutExit)
+ << "Method " << callee_dex_file.PrettyMethod(method_index)
+ << " could not be inlined because it contains a loop with no exit";
return false;
}
}
@@ -1577,34 +1688,39 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction,
for (HInstructionIterator instr_it(block->GetInstructions());
!instr_it.Done();
instr_it.Advance()) {
- if (number_of_instructions++ == number_of_instructions_budget) {
- VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index)
- << " is not inlined because its caller has reached"
- << " its instruction budget limit.";
+ if (++number_of_instructions >= inlining_budget_) {
+ LOG_FAIL(kNotInlinedInstructionBudget)
+ << "Method " << callee_dex_file.PrettyMethod(method_index)
+ << " is not inlined because the outer method has reached"
+ << " its instruction budget limit.";
return false;
}
HInstruction* current = instr_it.Current();
- if (!can_inline_environment && current->NeedsEnvironment()) {
- VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index)
- << " is not inlined because its caller has reached"
- << " its environment budget limit.";
+ if (current->NeedsEnvironment() &&
+ (total_number_of_dex_registers_ >= kMaximumNumberOfCumulatedDexRegisters)) {
+ LOG_FAIL(kNotInlinedEnvironmentBudget)
+ << "Method " << callee_dex_file.PrettyMethod(method_index)
+ << " is not inlined because its caller has reached"
+ << " its environment budget limit.";
return false;
}
if (current->NeedsEnvironment() &&
!CanEncodeInlinedMethodInStackMap(*caller_compilation_unit_.GetDexFile(),
resolved_method)) {
- VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index)
- << " could not be inlined because " << current->DebugName()
- << " needs an environment, is in a different dex file"
- << ", and cannot be encoded in the stack maps.";
+ LOG_FAIL(kNotInlinedStackMaps)
+ << "Method " << callee_dex_file.PrettyMethod(method_index)
+ << " could not be inlined because " << current->DebugName()
+ << " needs an environment, is in a different dex file"
+ << ", and cannot be encoded in the stack maps.";
return false;
}
if (!same_dex_file && current->NeedsDexCacheOfDeclaringClass()) {
- VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index)
- << " could not be inlined because " << current->DebugName()
- << " it is in a different dex file and requires access to the dex cache";
+ LOG_FAIL(kNotInlinedDexCache)
+ << "Method " << callee_dex_file.PrettyMethod(method_index)
+ << " could not be inlined because " << current->DebugName()
+ << " it is in a different dex file and requires access to the dex cache";
return false;
}
@@ -1613,21 +1729,24 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction,
current->IsUnresolvedStaticFieldSet() ||
current->IsUnresolvedInstanceFieldSet()) {
// Entrypoint for unresolved fields does not handle inlined frames.
- VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index)
- << " could not be inlined because it is using an unresolved"
- << " entrypoint";
+ LOG_FAIL(kNotInlinedUnresolvedEntrypoint)
+ << "Method " << callee_dex_file.PrettyMethod(method_index)
+ << " could not be inlined because it is using an unresolved"
+ << " entrypoint";
return false;
}
}
}
- number_of_inlined_instructions_ += number_of_instructions;
-
DCHECK_EQ(caller_instruction_counter, graph_->GetCurrentInstructionId())
<< "No instructions can be added to the outer graph while inner graph is being built";
+ // Inline the callee graph inside the caller graph.
const int32_t callee_instruction_counter = callee_graph->GetCurrentInstructionId();
graph_->SetCurrentInstructionId(callee_instruction_counter);
*return_replacement = callee_graph->InlineInto(graph_, invoke_instruction);
+ // Update our budget for other inlining attempts in `caller_graph`.
+ total_number_of_instructions_ += number_of_instructions;
+ UpdateInliningBudget();
DCHECK_EQ(callee_instruction_counter, callee_graph->GetCurrentInstructionId())
<< "No instructions can be added to the inner graph during inlining into the outer graph";
@@ -1640,9 +1759,9 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction,
return true;
}
-size_t HInliner::RunOptimizations(HGraph* callee_graph,
- const DexFile::CodeItem* code_item,
- const DexCompilationUnit& dex_compilation_unit) {
+void HInliner::RunOptimizations(HGraph* callee_graph,
+ const DexFile::CodeItem* code_item,
+ const DexCompilationUnit& dex_compilation_unit) {
// Note: if the outermost_graph_ is being compiled OSR, we should not run any
// optimization that could lead to a HDeoptimize. The following optimizations do not.
HDeadCodeElimination dce(callee_graph, inline_stats_, "dead_code_elimination$inliner");
@@ -1664,23 +1783,37 @@ size_t HInliner::RunOptimizations(HGraph* callee_graph,
optimization->Run();
}
- size_t number_of_inlined_instructions = 0u;
- if (depth_ + 1 < compiler_driver_->GetCompilerOptions().GetInlineDepthLimit()) {
- HInliner inliner(callee_graph,
- outermost_graph_,
- codegen_,
- outer_compilation_unit_,
- dex_compilation_unit,
- compiler_driver_,
- handles_,
- inline_stats_,
- total_number_of_dex_registers_ + code_item->registers_size_,
- depth_ + 1);
- inliner.Run();
- number_of_inlined_instructions += inliner.number_of_inlined_instructions_;
+ // Bail early for pathological cases on the environment (for example recursive calls,
+ // or too large environment).
+ if (total_number_of_dex_registers_ >= kMaximumNumberOfCumulatedDexRegisters) {
+ LOG_NOTE() << "Calls in " << callee_graph->GetArtMethod()->PrettyMethod()
+ << " will not be inlined because the outer method has reached"
+ << " its environment budget limit.";
+ return;
+ }
+
+ // Bail early if we know we already are over the limit.
+ size_t number_of_instructions = CountNumberOfInstructions(callee_graph);
+ if (number_of_instructions > inlining_budget_) {
+ LOG_NOTE() << "Calls in " << callee_graph->GetArtMethod()->PrettyMethod()
+ << " will not be inlined because the outer method has reached"
+ << " its instruction budget limit. " << number_of_instructions;
+ return;
}
- return number_of_inlined_instructions;
+ HInliner inliner(callee_graph,
+ outermost_graph_,
+ codegen_,
+ outer_compilation_unit_,
+ dex_compilation_unit,
+ compiler_driver_,
+ handles_,
+ inline_stats_,
+ total_number_of_dex_registers_ + code_item->registers_size_,
+ total_number_of_instructions_ + number_of_instructions,
+ this,
+ depth_ + 1);
+ inliner.Run();
}
static bool IsReferenceTypeRefinement(ReferenceTypeInfo declared_rti,
diff --git a/compiler/optimizing/inliner.h b/compiler/optimizing/inliner.h
index a032042c78..9e4685cbf4 100644
--- a/compiler/optimizing/inliner.h
+++ b/compiler/optimizing/inliner.h
@@ -42,7 +42,9 @@ class HInliner : public HOptimization {
VariableSizedHandleScope* handles,
OptimizingCompilerStats* stats,
size_t total_number_of_dex_registers,
- size_t depth)
+ size_t total_number_of_instructions,
+ HInliner* parent,
+ size_t depth = 0)
: HOptimization(outer_graph, kInlinerPassName, stats),
outermost_graph_(outermost_graph),
outer_compilation_unit_(outer_compilation_unit),
@@ -50,8 +52,10 @@ class HInliner : public HOptimization {
codegen_(codegen),
compiler_driver_(compiler_driver),
total_number_of_dex_registers_(total_number_of_dex_registers),
+ total_number_of_instructions_(total_number_of_instructions),
+ parent_(parent),
depth_(depth),
- number_of_inlined_instructions_(0),
+ inlining_budget_(0),
handles_(handles),
inline_stats_(nullptr) {}
@@ -95,10 +99,10 @@ class HInliner : public HOptimization {
HInstruction** return_replacement);
// Run simple optimizations on `callee_graph`.
- // Returns the number of inlined instructions.
- size_t RunOptimizations(HGraph* callee_graph,
- const DexFile::CodeItem* code_item,
- const DexCompilationUnit& dex_compilation_unit);
+ void RunOptimizations(HGraph* callee_graph,
+ const DexFile::CodeItem* code_item,
+ const DexCompilationUnit& dex_compilation_unit)
+ REQUIRES_SHARED(Locks::mutator_lock_);
// Try to recognize known simple patterns and replace invoke call with appropriate instructions.
bool TryPatternSubstitution(HInvoke* invoke_instruction,
@@ -259,14 +263,30 @@ class HInliner : public HOptimization {
HInstruction* return_replacement,
HInstruction* invoke_instruction);
+ // Update the inlining budget based on `total_number_of_instructions_`.
+ void UpdateInliningBudget();
+
+ // Count the number of calls of `method` being inlined recursively.
+ size_t CountRecursiveCallsOf(ArtMethod* method) const;
+
+ // Pretty-print for spaces during logging.
+ std::string DepthString(int line) const;
+
HGraph* const outermost_graph_;
const DexCompilationUnit& outer_compilation_unit_;
const DexCompilationUnit& caller_compilation_unit_;
CodeGenerator* const codegen_;
CompilerDriver* const compiler_driver_;
const size_t total_number_of_dex_registers_;
+ size_t total_number_of_instructions_;
+
+ // The 'parent' inliner, that means the inlinigng optimization that requested
+ // `graph_` to be inlined.
+ const HInliner* const parent_;
const size_t depth_;
- size_t number_of_inlined_instructions_;
+
+ // The budget left for inlining, in number of instructions.
+ size_t inlining_budget_;
VariableSizedHandleScope* const handles_;
// Used to record stats about optimizations on the inlined graph.
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index 98b80f5d3c..1006a776f0 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -270,9 +270,11 @@ static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
}
-static void GenNumberOfLeadingZeros(LocationSummary* locations,
+static void GenNumberOfLeadingZeros(HInvoke* invoke,
Primitive::Type type,
- ArmAssembler* assembler) {
+ CodeGeneratorARM* codegen) {
+ ArmAssembler* assembler = codegen->GetAssembler();
+ LocationSummary* locations = invoke->GetLocations();
Location in = locations->InAt(0);
Register out = locations->Out().AsRegister<Register>();
@@ -282,11 +284,14 @@ static void GenNumberOfLeadingZeros(LocationSummary* locations,
Register in_reg_lo = in.AsRegisterPairLow<Register>();
Register in_reg_hi = in.AsRegisterPairHigh<Register>();
Label end;
+ Label* final_label = codegen->GetFinalLabel(invoke, &end);
__ clz(out, in_reg_hi);
- __ CompareAndBranchIfNonZero(in_reg_hi, &end);
+ __ CompareAndBranchIfNonZero(in_reg_hi, final_label);
__ clz(out, in_reg_lo);
__ AddConstant(out, 32);
- __ Bind(&end);
+ if (end.IsLinked()) {
+ __ Bind(&end);
+ }
} else {
__ clz(out, in.AsRegister<Register>());
}
@@ -297,7 +302,7 @@ void IntrinsicLocationsBuilderARM::VisitIntegerNumberOfLeadingZeros(HInvoke* inv
}
void IntrinsicCodeGeneratorARM::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
- GenNumberOfLeadingZeros(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
+ GenNumberOfLeadingZeros(invoke, Primitive::kPrimInt, codegen_);
}
void IntrinsicLocationsBuilderARM::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
@@ -309,27 +314,32 @@ void IntrinsicLocationsBuilderARM::VisitLongNumberOfLeadingZeros(HInvoke* invoke
}
void IntrinsicCodeGeneratorARM::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
- GenNumberOfLeadingZeros(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
+ GenNumberOfLeadingZeros(invoke, Primitive::kPrimLong, codegen_);
}
-static void GenNumberOfTrailingZeros(LocationSummary* locations,
+static void GenNumberOfTrailingZeros(HInvoke* invoke,
Primitive::Type type,
- ArmAssembler* assembler) {
+ CodeGeneratorARM* codegen) {
DCHECK((type == Primitive::kPrimInt) || (type == Primitive::kPrimLong));
+ ArmAssembler* assembler = codegen->GetAssembler();
+ LocationSummary* locations = invoke->GetLocations();
Register out = locations->Out().AsRegister<Register>();
if (type == Primitive::kPrimLong) {
Register in_reg_lo = locations->InAt(0).AsRegisterPairLow<Register>();
Register in_reg_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
Label end;
+ Label* final_label = codegen->GetFinalLabel(invoke, &end);
__ rbit(out, in_reg_lo);
__ clz(out, out);
- __ CompareAndBranchIfNonZero(in_reg_lo, &end);
+ __ CompareAndBranchIfNonZero(in_reg_lo, final_label);
__ rbit(out, in_reg_hi);
__ clz(out, out);
__ AddConstant(out, 32);
- __ Bind(&end);
+ if (end.IsLinked()) {
+ __ Bind(&end);
+ }
} else {
Register in = locations->InAt(0).AsRegister<Register>();
__ rbit(out, in);
@@ -346,7 +356,7 @@ void IntrinsicLocationsBuilderARM::VisitIntegerNumberOfTrailingZeros(HInvoke* in
}
void IntrinsicCodeGeneratorARM::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
- GenNumberOfTrailingZeros(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
+ GenNumberOfTrailingZeros(invoke, Primitive::kPrimInt, codegen_);
}
void IntrinsicLocationsBuilderARM::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
@@ -358,7 +368,7 @@ void IntrinsicLocationsBuilderARM::VisitLongNumberOfTrailingZeros(HInvoke* invok
}
void IntrinsicCodeGeneratorARM::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
- GenNumberOfTrailingZeros(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
+ GenNumberOfTrailingZeros(invoke, Primitive::kPrimLong, codegen_);
}
static void MathAbsFP(LocationSummary* locations, bool is64bit, ArmAssembler* assembler) {
@@ -1355,6 +1365,7 @@ void IntrinsicCodeGeneratorARM::VisitStringEquals(HInvoke* invoke) {
Label end;
Label return_true;
Label return_false;
+ Label* final_label = codegen_->GetFinalLabel(invoke, &end);
// Get offsets of count, value, and class fields within a string object.
const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
@@ -1428,12 +1439,15 @@ void IntrinsicCodeGeneratorARM::VisitStringEquals(HInvoke* invoke) {
// If loop does not result in returning false, we return true.
__ Bind(&return_true);
__ LoadImmediate(out, 1);
- __ b(&end);
+ __ b(final_label);
// Return false and exit the function.
__ Bind(&return_false);
__ LoadImmediate(out, 0);
- __ Bind(&end);
+
+ if (end.IsLinked()) {
+ __ Bind(&end);
+ }
}
static void GenerateVisitStringIndexOf(HInvoke* invoke,
@@ -2491,13 +2505,14 @@ void IntrinsicCodeGeneratorARM::VisitStringGetCharsNoCheck(HInvoke* invoke) {
Register dst_ptr = locations->GetTemp(2).AsRegister<Register>();
Label done, compressed_string_loop;
+ Label* final_label = codegen_->GetFinalLabel(invoke, &done);
// dst to be copied.
__ add(dst_ptr, dstObj, ShifterOperand(data_offset));
__ add(dst_ptr, dst_ptr, ShifterOperand(dstBegin, LSL, 1));
__ subs(num_chr, srcEnd, ShifterOperand(srcBegin));
// Early out for valid zero-length retrievals.
- __ b(&done, EQ);
+ __ b(final_label, EQ);
// src range to copy.
__ add(src_ptr, srcObj, ShifterOperand(value_offset));
@@ -2534,7 +2549,7 @@ void IntrinsicCodeGeneratorARM::VisitStringGetCharsNoCheck(HInvoke* invoke) {
__ b(&loop, GE);
__ adds(num_chr, num_chr, ShifterOperand(4));
- __ b(&done, EQ);
+ __ b(final_label, EQ);
// Main loop for < 4 character case and remainder handling. Loads and stores one
// 16-bit Java character at a time.
@@ -2545,7 +2560,7 @@ void IntrinsicCodeGeneratorARM::VisitStringGetCharsNoCheck(HInvoke* invoke) {
__ b(&remainder, GT);
if (mirror::kUseStringCompression) {
- __ b(&done);
+ __ b(final_label);
const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte);
DCHECK_EQ(c_char_size, 1u);
@@ -2559,7 +2574,9 @@ void IntrinsicCodeGeneratorARM::VisitStringGetCharsNoCheck(HInvoke* invoke) {
__ b(&compressed_string_loop, GT);
}
- __ Bind(&done);
+ if (done.IsLinked()) {
+ __ Bind(&done);
+ }
}
void IntrinsicLocationsBuilderARM::VisitFloatIsInfinite(HInvoke* invoke) {
diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc
index 19ff49c6ce..b25bad7170 100644
--- a/compiler/optimizing/intrinsics_arm_vixl.cc
+++ b/compiler/optimizing/intrinsics_arm_vixl.cc
@@ -333,9 +333,11 @@ static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
}
-static void GenNumberOfLeadingZeros(LocationSummary* locations,
+static void GenNumberOfLeadingZeros(HInvoke* invoke,
Primitive::Type type,
- ArmVIXLAssembler* assembler) {
+ CodeGeneratorARMVIXL* codegen) {
+ ArmVIXLAssembler* assembler = codegen->GetAssembler();
+ LocationSummary* locations = invoke->GetLocations();
Location in = locations->InAt(0);
vixl32::Register out = RegisterFrom(locations->Out());
@@ -345,11 +347,14 @@ static void GenNumberOfLeadingZeros(LocationSummary* locations,
vixl32::Register in_reg_lo = LowRegisterFrom(in);
vixl32::Register in_reg_hi = HighRegisterFrom(in);
vixl32::Label end;
+ vixl32::Label* final_label = codegen->GetFinalLabel(invoke, &end);
__ Clz(out, in_reg_hi);
- __ CompareAndBranchIfNonZero(in_reg_hi, &end, /* far_target */ false);
+ __ CompareAndBranchIfNonZero(in_reg_hi, final_label, /* far_target */ false);
__ Clz(out, in_reg_lo);
__ Add(out, out, 32);
- __ Bind(&end);
+ if (end.IsReferenced()) {
+ __ Bind(&end);
+ }
} else {
__ Clz(out, RegisterFrom(in));
}
@@ -360,7 +365,7 @@ void IntrinsicLocationsBuilderARMVIXL::VisitIntegerNumberOfLeadingZeros(HInvoke*
}
void IntrinsicCodeGeneratorARMVIXL::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
- GenNumberOfLeadingZeros(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
+ GenNumberOfLeadingZeros(invoke, Primitive::kPrimInt, codegen_);
}
void IntrinsicLocationsBuilderARMVIXL::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
@@ -372,27 +377,32 @@ void IntrinsicLocationsBuilderARMVIXL::VisitLongNumberOfLeadingZeros(HInvoke* in
}
void IntrinsicCodeGeneratorARMVIXL::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
- GenNumberOfLeadingZeros(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
+ GenNumberOfLeadingZeros(invoke, Primitive::kPrimLong, codegen_);
}
-static void GenNumberOfTrailingZeros(LocationSummary* locations,
+static void GenNumberOfTrailingZeros(HInvoke* invoke,
Primitive::Type type,
- ArmVIXLAssembler* assembler) {
+ CodeGeneratorARMVIXL* codegen) {
DCHECK((type == Primitive::kPrimInt) || (type == Primitive::kPrimLong));
+ ArmVIXLAssembler* assembler = codegen->GetAssembler();
+ LocationSummary* locations = invoke->GetLocations();
vixl32::Register out = RegisterFrom(locations->Out());
if (type == Primitive::kPrimLong) {
vixl32::Register in_reg_lo = LowRegisterFrom(locations->InAt(0));
vixl32::Register in_reg_hi = HighRegisterFrom(locations->InAt(0));
vixl32::Label end;
+ vixl32::Label* final_label = codegen->GetFinalLabel(invoke, &end);
__ Rbit(out, in_reg_lo);
__ Clz(out, out);
- __ CompareAndBranchIfNonZero(in_reg_lo, &end, /* far_target */ false);
+ __ CompareAndBranchIfNonZero(in_reg_lo, final_label, /* far_target */ false);
__ Rbit(out, in_reg_hi);
__ Clz(out, out);
__ Add(out, out, 32);
- __ Bind(&end);
+ if (end.IsReferenced()) {
+ __ Bind(&end);
+ }
} else {
vixl32::Register in = RegisterFrom(locations->InAt(0));
__ Rbit(out, in);
@@ -409,7 +419,7 @@ void IntrinsicLocationsBuilderARMVIXL::VisitIntegerNumberOfTrailingZeros(HInvoke
}
void IntrinsicCodeGeneratorARMVIXL::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
- GenNumberOfTrailingZeros(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
+ GenNumberOfTrailingZeros(invoke, Primitive::kPrimInt, codegen_);
}
void IntrinsicLocationsBuilderARMVIXL::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
@@ -421,7 +431,7 @@ void IntrinsicLocationsBuilderARMVIXL::VisitLongNumberOfTrailingZeros(HInvoke* i
}
void IntrinsicCodeGeneratorARMVIXL::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
- GenNumberOfTrailingZeros(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
+ GenNumberOfTrailingZeros(invoke, Primitive::kPrimLong, codegen_);
}
static void MathAbsFP(HInvoke* invoke, ArmVIXLAssembler* assembler) {
@@ -502,7 +512,8 @@ void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsLong(HInvoke* invoke) {
GenAbsInteger(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
}
-static void GenMinMaxFloat(HInvoke* invoke, bool is_min, ArmVIXLAssembler* assembler) {
+static void GenMinMaxFloat(HInvoke* invoke, bool is_min, CodeGeneratorARMVIXL* codegen) {
+ ArmVIXLAssembler* assembler = codegen->GetAssembler();
Location op1_loc = invoke->GetLocations()->InAt(0);
Location op2_loc = invoke->GetLocations()->InAt(1);
Location out_loc = invoke->GetLocations()->Out();
@@ -520,6 +531,7 @@ static void GenMinMaxFloat(HInvoke* invoke, bool is_min, ArmVIXLAssembler* assem
const vixl32::Register temp1 = temps.Acquire();
vixl32::Register temp2 = RegisterFrom(invoke->GetLocations()->GetTemp(0));
vixl32::Label nan, done;
+ vixl32::Label* final_label = codegen->GetFinalLabel(invoke, &done);
DCHECK(op1.Is(out));
@@ -536,7 +548,8 @@ static void GenMinMaxFloat(HInvoke* invoke, bool is_min, ArmVIXLAssembler* assem
__ it(cond);
__ vmov(cond, F32, out, op2);
}
- __ B(ne, &done, /* far_target */ false); // for <>(not equal), we've done min/max calculation.
+ // for <>(not equal), we've done min/max calculation.
+ __ B(ne, final_label, /* far_target */ false);
// handle op1 == op2, max(+0.0,-0.0), min(+0.0,-0.0).
__ Vmov(temp1, op1);
@@ -547,14 +560,16 @@ static void GenMinMaxFloat(HInvoke* invoke, bool is_min, ArmVIXLAssembler* assem
__ And(temp1, temp1, temp2);
}
__ Vmov(out, temp1);
- __ B(&done);
+ __ B(final_label);
// handle NaN input.
__ Bind(&nan);
__ Movt(temp1, High16Bits(kNanFloat)); // 0x7FC0xxxx is a NaN.
__ Vmov(out, temp1);
- __ Bind(&done);
+ if (done.IsReferenced()) {
+ __ Bind(&done);
+ }
}
static void CreateFPFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
@@ -572,7 +587,7 @@ void IntrinsicLocationsBuilderARMVIXL::VisitMathMinFloatFloat(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorARMVIXL::VisitMathMinFloatFloat(HInvoke* invoke) {
- GenMinMaxFloat(invoke, /* is_min */ true, GetAssembler());
+ GenMinMaxFloat(invoke, /* is_min */ true, codegen_);
}
void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxFloatFloat(HInvoke* invoke) {
@@ -581,10 +596,11 @@ void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxFloatFloat(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxFloatFloat(HInvoke* invoke) {
- GenMinMaxFloat(invoke, /* is_min */ false, GetAssembler());
+ GenMinMaxFloat(invoke, /* is_min */ false, codegen_);
}
-static void GenMinMaxDouble(HInvoke* invoke, bool is_min, ArmVIXLAssembler* assembler) {
+static void GenMinMaxDouble(HInvoke* invoke, bool is_min, CodeGeneratorARMVIXL* codegen) {
+ ArmVIXLAssembler* assembler = codegen->GetAssembler();
Location op1_loc = invoke->GetLocations()->InAt(0);
Location op2_loc = invoke->GetLocations()->InAt(1);
Location out_loc = invoke->GetLocations()->Out();
@@ -599,6 +615,7 @@ static void GenMinMaxDouble(HInvoke* invoke, bool is_min, ArmVIXLAssembler* asse
vixl32::DRegister op2 = DRegisterFrom(op2_loc);
vixl32::DRegister out = OutputDRegister(invoke);
vixl32::Label handle_nan_eq, done;
+ vixl32::Label* final_label = codegen->GetFinalLabel(invoke, &done);
DCHECK(op1.Is(out));
@@ -615,19 +632,22 @@ static void GenMinMaxDouble(HInvoke* invoke, bool is_min, ArmVIXLAssembler* asse
__ it(cond);
__ vmov(cond, F64, out, op2);
}
- __ B(ne, &done, /* far_target */ false); // for <>(not equal), we've done min/max calculation.
+ // for <>(not equal), we've done min/max calculation.
+ __ B(ne, final_label, /* far_target */ false);
// handle op1 == op2, max(+0.0,-0.0).
if (!is_min) {
__ Vand(F64, out, op1, op2);
- __ B(&done);
+ __ B(final_label);
}
// handle op1 == op2, min(+0.0,-0.0), NaN input.
__ Bind(&handle_nan_eq);
__ Vorr(F64, out, op1, op2); // assemble op1/-0.0/NaN.
- __ Bind(&done);
+ if (done.IsReferenced()) {
+ __ Bind(&done);
+ }
}
void IntrinsicLocationsBuilderARMVIXL::VisitMathMinDoubleDouble(HInvoke* invoke) {
@@ -635,7 +655,7 @@ void IntrinsicLocationsBuilderARMVIXL::VisitMathMinDoubleDouble(HInvoke* invoke)
}
void IntrinsicCodeGeneratorARMVIXL::VisitMathMinDoubleDouble(HInvoke* invoke) {
- GenMinMaxDouble(invoke, /* is_min */ true , GetAssembler());
+ GenMinMaxDouble(invoke, /* is_min */ true , codegen_);
}
void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxDoubleDouble(HInvoke* invoke) {
@@ -643,7 +663,7 @@ void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxDoubleDouble(HInvoke* invoke)
}
void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxDoubleDouble(HInvoke* invoke) {
- GenMinMaxDouble(invoke, /* is_min */ false, GetAssembler());
+ GenMinMaxDouble(invoke, /* is_min */ false, codegen_);
}
static void GenMinMaxLong(HInvoke* invoke, bool is_min, ArmVIXLAssembler* assembler) {
@@ -1670,6 +1690,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringEquals(HInvoke* invoke) {
vixl32::Label end;
vixl32::Label return_true;
vixl32::Label return_false;
+ vixl32::Label* final_label = codegen_->GetFinalLabel(invoke, &end);
// Get offsets of count, value, and class fields within a string object.
const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
@@ -1746,12 +1767,15 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringEquals(HInvoke* invoke) {
// If loop does not result in returning false, we return true.
__ Bind(&return_true);
__ Mov(out, 1);
- __ B(&end);
+ __ B(final_label);
// Return false and exit the function.
__ Bind(&return_false);
__ Mov(out, 0);
- __ Bind(&end);
+
+ if (end.IsReferenced()) {
+ __ Bind(&end);
+ }
}
static void GenerateVisitStringIndexOf(HInvoke* invoke,
@@ -2789,13 +2813,14 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke)
vixl32::Register dst_ptr = RegisterFrom(locations->GetTemp(2));
vixl32::Label done, compressed_string_loop;
+ vixl32::Label* final_label = codegen_->GetFinalLabel(invoke, &done);
// dst to be copied.
__ Add(dst_ptr, dstObj, data_offset);
__ Add(dst_ptr, dst_ptr, Operand(dstBegin, vixl32::LSL, 1));
__ Subs(num_chr, srcEnd, srcBegin);
// Early out for valid zero-length retrievals.
- __ B(eq, &done, /* far_target */ false);
+ __ B(eq, final_label, /* far_target */ false);
// src range to copy.
__ Add(src_ptr, srcObj, value_offset);
@@ -2839,7 +2864,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke)
__ B(ge, &loop, /* far_target */ false);
__ Adds(num_chr, num_chr, 4);
- __ B(eq, &done, /* far_target */ false);
+ __ B(eq, final_label, /* far_target */ false);
// Main loop for < 4 character case and remainder handling. Loads and stores one
// 16-bit Java character at a time.
@@ -2852,7 +2877,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke)
__ B(gt, &remainder, /* far_target */ false);
if (mirror::kUseStringCompression) {
- __ B(&done);
+ __ B(final_label);
const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte);
DCHECK_EQ(c_char_size, 1u);
@@ -2868,7 +2893,9 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke)
__ B(gt, &compressed_string_loop, /* far_target */ false);
}
- __ Bind(&done);
+ if (done.IsReferenced()) {
+ __ Bind(&done);
+ }
}
void IntrinsicLocationsBuilderARMVIXL::VisitFloatIsInfinite(HInvoke* invoke) {
diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc
index ba006edfa2..bf85b1989e 100644
--- a/compiler/optimizing/intrinsics_mips.cc
+++ b/compiler/optimizing/intrinsics_mips.cc
@@ -2559,7 +2559,7 @@ void IntrinsicCodeGeneratorMIPS::VisitMathRoundFloat(HInvoke* invoke) {
// void java.lang.String.getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin)
void IntrinsicLocationsBuilderMIPS::VisitStringGetCharsNoCheck(HInvoke* invoke) {
LocationSummary* locations = new (arena_) LocationSummary(invoke,
- LocationSummary::kCallOnMainOnly,
+ LocationSummary::kNoCall,
kIntrinsified);
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::RequiresRegister());
@@ -2567,17 +2567,9 @@ void IntrinsicLocationsBuilderMIPS::VisitStringGetCharsNoCheck(HInvoke* invoke)
locations->SetInAt(3, Location::RequiresRegister());
locations->SetInAt(4, Location::RequiresRegister());
- // We will call memcpy() to do the actual work. Allocate the temporary
- // registers to use the correct input registers, and output register.
- // memcpy() uses the normal MIPS calling convention.
- InvokeRuntimeCallingConvention calling_convention;
-
- locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
- locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
- locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
-
- Location outLocation = calling_convention.GetReturnLocation(Primitive::kPrimInt);
- locations->AddTemp(Location::RegisterLocation(outLocation.AsRegister<Register>()));
+ locations->AddTemp(Location::RequiresRegister());
+ locations->AddTemp(Location::RequiresRegister());
+ locations->AddTemp(Location::RequiresRegister());
}
void IntrinsicCodeGeneratorMIPS::VisitStringGetCharsNoCheck(HInvoke* invoke) {
@@ -2596,16 +2588,11 @@ void IntrinsicCodeGeneratorMIPS::VisitStringGetCharsNoCheck(HInvoke* invoke) {
Register dstBegin = locations->InAt(4).AsRegister<Register>();
Register dstPtr = locations->GetTemp(0).AsRegister<Register>();
- DCHECK_EQ(dstPtr, A0);
Register srcPtr = locations->GetTemp(1).AsRegister<Register>();
- DCHECK_EQ(srcPtr, A1);
Register numChrs = locations->GetTemp(2).AsRegister<Register>();
- DCHECK_EQ(numChrs, A2);
-
- Register dstReturn = locations->GetTemp(3).AsRegister<Register>();
- DCHECK_EQ(dstReturn, V0);
MipsLabel done;
+ MipsLabel loop;
// Location of data in char array buffer.
const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value();
@@ -2634,7 +2621,7 @@ void IntrinsicCodeGeneratorMIPS::VisitStringGetCharsNoCheck(HInvoke* invoke) {
__ LoadFromOffset(kLoadWord, TMP, srcObj, count_offset);
__ Sll(TMP, TMP, 31);
- // If string is uncompressed, use memcpy() path.
+ // If string is uncompressed, use uncompressed path.
__ Bnez(TMP, &uncompressed_copy);
// Copy loop for compressed src, copying 1 character (8-bit) to (16-bit) at a time.
@@ -2660,10 +2647,13 @@ void IntrinsicCodeGeneratorMIPS::VisitStringGetCharsNoCheck(HInvoke* invoke) {
__ Addu(srcPtr, srcPtr, AT);
}
- // Calculate number of bytes to copy from number of characters.
- __ Sll(numChrs, numChrs, char_shift);
-
- codegen_->InvokeRuntime(kQuickMemcpy, invoke, invoke->GetDexPc(), nullptr);
+ __ Bind(&loop);
+ __ Lh(AT, srcPtr, 0);
+ __ Addiu(numChrs, numChrs, -1);
+ __ Addiu(srcPtr, srcPtr, char_size);
+ __ Sh(AT, dstPtr, 0);
+ __ Addiu(dstPtr, dstPtr, char_size);
+ __ Bnez(numChrs, &loop);
__ Bind(&done);
}
diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc
index 21c5074a1c..1ee89cf127 100644
--- a/compiler/optimizing/intrinsics_mips64.cc
+++ b/compiler/optimizing/intrinsics_mips64.cc
@@ -1895,7 +1895,7 @@ void IntrinsicCodeGeneratorMIPS64::VisitDoubleIsInfinite(HInvoke* invoke) {
// void java.lang.String.getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin)
void IntrinsicLocationsBuilderMIPS64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
LocationSummary* locations = new (arena_) LocationSummary(invoke,
- LocationSummary::kCallOnMainOnly,
+ LocationSummary::kNoCall,
kIntrinsified);
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::RequiresRegister());
@@ -1903,17 +1903,9 @@ void IntrinsicLocationsBuilderMIPS64::VisitStringGetCharsNoCheck(HInvoke* invoke
locations->SetInAt(3, Location::RequiresRegister());
locations->SetInAt(4, Location::RequiresRegister());
- // We will call memcpy() to do the actual work. Allocate the temporary
- // registers to use the correct input registers, and output register.
- // memcpy() uses the normal MIPS calling conventions.
- InvokeRuntimeCallingConvention calling_convention;
-
- locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
- locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
- locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
-
- Location outLocation = calling_convention.GetReturnLocation(Primitive::kPrimLong);
- locations->AddTemp(Location::RegisterLocation(outLocation.AsRegister<GpuRegister>()));
+ locations->AddTemp(Location::RequiresRegister());
+ locations->AddTemp(Location::RequiresRegister());
+ locations->AddTemp(Location::RequiresRegister());
}
void IntrinsicCodeGeneratorMIPS64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
@@ -1932,16 +1924,11 @@ void IntrinsicCodeGeneratorMIPS64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
GpuRegister dstBegin = locations->InAt(4).AsRegister<GpuRegister>();
GpuRegister dstPtr = locations->GetTemp(0).AsRegister<GpuRegister>();
- DCHECK_EQ(dstPtr, A0);
GpuRegister srcPtr = locations->GetTemp(1).AsRegister<GpuRegister>();
- DCHECK_EQ(srcPtr, A1);
GpuRegister numChrs = locations->GetTemp(2).AsRegister<GpuRegister>();
- DCHECK_EQ(numChrs, A2);
-
- GpuRegister dstReturn = locations->GetTemp(3).AsRegister<GpuRegister>();
- DCHECK_EQ(dstReturn, V0);
Mips64Label done;
+ Mips64Label loop;
// Location of data in char array buffer.
const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value();
@@ -1965,7 +1952,7 @@ void IntrinsicCodeGeneratorMIPS64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
__ LoadFromOffset(kLoadWord, TMP, srcObj, count_offset);
__ Dext(TMP, TMP, 0, 1);
- // If string is uncompressed, use memcpy() path.
+ // If string is uncompressed, use uncompressed path.
__ Bnezc(TMP, &uncompressed_copy);
// Copy loop for compressed src, copying 1 character (8-bit) to (16-bit) at a time.
@@ -1986,10 +1973,13 @@ void IntrinsicCodeGeneratorMIPS64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
__ Daddiu(srcPtr, srcObj, value_offset);
__ Dlsa(srcPtr, srcBegin, srcPtr, char_shift);
- // Calculate number of bytes to copy from number of characters.
- __ Dsll(numChrs, numChrs, char_shift);
-
- codegen_->InvokeRuntime(kQuickMemcpy, invoke, invoke->GetDexPc(), nullptr);
+ __ Bind(&loop);
+ __ Lh(AT, srcPtr, 0);
+ __ Daddiu(numChrs, numChrs, -1);
+ __ Daddiu(srcPtr, srcPtr, char_size);
+ __ Sh(AT, dstPtr, 0);
+ __ Daddiu(dstPtr, dstPtr, char_size);
+ __ Bnezc(numChrs, &loop);
__ Bind(&done);
}
diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h
index 091b58a63d..6f0dbce2df 100644
--- a/compiler/optimizing/locations.h
+++ b/compiler/optimizing/locations.h
@@ -69,11 +69,13 @@ class Location : public ValueObject {
// We do not use the value 9 because it conflicts with kLocationConstantMask.
kDoNotUse9 = 9,
+ kSIMDStackSlot = 10, // 128bit stack slot. TODO: generalize with encoded #bytes?
+
// Unallocated location represents a location that is not fixed and can be
// allocated by a register allocator. Each unallocated location has
// a policy that specifies what kind of location is suitable. Payload
// contains register allocation policy.
- kUnallocated = 10,
+ kUnallocated = 11,
};
Location() : ValueObject(), value_(kInvalid) {
@@ -82,6 +84,7 @@ class Location : public ValueObject {
static_assert((kUnallocated & kLocationConstantMask) != kConstant, "TagError");
static_assert((kStackSlot & kLocationConstantMask) != kConstant, "TagError");
static_assert((kDoubleStackSlot & kLocationConstantMask) != kConstant, "TagError");
+ static_assert((kSIMDStackSlot & kLocationConstantMask) != kConstant, "TagError");
static_assert((kRegister & kLocationConstantMask) != kConstant, "TagError");
static_assert((kFpuRegister & kLocationConstantMask) != kConstant, "TagError");
static_assert((kRegisterPair & kLocationConstantMask) != kConstant, "TagError");
@@ -266,8 +269,20 @@ class Location : public ValueObject {
return GetKind() == kDoubleStackSlot;
}
+ static Location SIMDStackSlot(intptr_t stack_index) {
+ uintptr_t payload = EncodeStackIndex(stack_index);
+ Location loc(kSIMDStackSlot, payload);
+ // Ensure that sign is preserved.
+ DCHECK_EQ(loc.GetStackIndex(), stack_index);
+ return loc;
+ }
+
+ bool IsSIMDStackSlot() const {
+ return GetKind() == kSIMDStackSlot;
+ }
+
intptr_t GetStackIndex() const {
- DCHECK(IsStackSlot() || IsDoubleStackSlot());
+ DCHECK(IsStackSlot() || IsDoubleStackSlot() || IsSIMDStackSlot());
// Decode stack index manually to preserve sign.
return GetPayload() - kStackIndexBias;
}
@@ -315,6 +330,7 @@ class Location : public ValueObject {
case kRegister: return "R";
case kStackSlot: return "S";
case kDoubleStackSlot: return "DS";
+ case kSIMDStackSlot: return "SIMD";
case kUnallocated: return "U";
case kConstant: return "C";
case kFpuRegister: return "F";
@@ -417,6 +433,7 @@ std::ostream& operator<<(std::ostream& os, const Location::Policy& rhs);
class RegisterSet : public ValueObject {
public:
static RegisterSet Empty() { return RegisterSet(); }
+ static RegisterSet AllFpu() { return RegisterSet(0, -1); }
void Add(Location loc) {
if (loc.IsRegister()) {
@@ -462,6 +479,7 @@ class RegisterSet : public ValueObject {
private:
RegisterSet() : core_registers_(0), floating_point_registers_(0) {}
+ RegisterSet(uint32_t core, uint32_t fp) : core_registers_(core), floating_point_registers_(fp) {}
uint32_t core_registers_;
uint32_t floating_point_registers_;
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index 020e4463d4..ec706e6694 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -2046,6 +2046,9 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) {
if (HasTryCatch()) {
outer_graph->SetHasTryCatch(true);
}
+ if (HasSIMD()) {
+ outer_graph->SetHasSIMD(true);
+ }
HInstruction* return_value = nullptr;
if (GetBlocks().size() == 3) {
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 542b218cf8..6881d8f6ae 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -323,6 +323,7 @@ class HGraph : public ArenaObject<kArenaAllocGraph> {
temporaries_vreg_slots_(0),
has_bounds_checks_(false),
has_try_catch_(false),
+ has_simd_(false),
has_loops_(false),
has_irreducible_loops_(false),
debuggable_(debuggable),
@@ -560,6 +561,9 @@ class HGraph : public ArenaObject<kArenaAllocGraph> {
bool HasTryCatch() const { return has_try_catch_; }
void SetHasTryCatch(bool value) { has_try_catch_ = value; }
+ bool HasSIMD() const { return has_simd_; }
+ void SetHasSIMD(bool value) { has_simd_ = value; }
+
bool HasLoops() const { return has_loops_; }
void SetHasLoops(bool value) { has_loops_ = value; }
@@ -652,6 +656,11 @@ class HGraph : public ArenaObject<kArenaAllocGraph> {
// false positives.
bool has_try_catch_;
+ // Flag whether SIMD instructions appear in the graph. If true, the
+ // code generators may have to be more careful spilling the wider
+ // contents of SIMD registers.
+ bool has_simd_;
+
// Flag whether there are any loops in the graph. We can skip loop
// optimization if it's false. It's only best effort to keep it up
// to date in the presence of code elimination so there might be false
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 23ccd9e953..3c6d2d64a9 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -499,7 +499,8 @@ static HOptimization* BuildOptimization(
handles,
stats,
number_of_dex_registers,
- /* depth */ 0);
+ /* total_number_of_instructions */ 0,
+ /* parent */ nullptr);
} else if (opt_name == HSharpening::kSharpeningPassName) {
return new (arena) HSharpening(graph, codegen, dex_compilation_unit, driver, handles);
} else if (opt_name == HSelectGenerator::kSelectGeneratorPassName) {
@@ -607,8 +608,7 @@ void OptimizingCompiler::MaybeRunInliner(HGraph* graph,
VariableSizedHandleScope* handles) const {
OptimizingCompilerStats* stats = compilation_stats_.get();
const CompilerOptions& compiler_options = driver->GetCompilerOptions();
- bool should_inline = (compiler_options.GetInlineDepthLimit() > 0)
- && (compiler_options.GetInlineMaxCodeUnits() > 0);
+ bool should_inline = (compiler_options.GetInlineMaxCodeUnits() > 0);
if (!should_inline) {
return;
}
@@ -623,7 +623,8 @@ void OptimizingCompiler::MaybeRunInliner(HGraph* graph,
handles,
stats,
number_of_dex_registers,
- /* depth */ 0);
+ /* total_number_of_instructions */ 0,
+ /* parent */ nullptr);
HOptimization* optimizations[] = { inliner };
RunOptimizations(optimizations, arraysize(optimizations), pass_observer);
diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h
index ae9a8119a7..a211c5472a 100644
--- a/compiler/optimizing/optimizing_compiler_stats.h
+++ b/compiler/optimizing/optimizing_compiler_stats.h
@@ -69,6 +69,23 @@ enum MethodCompilationStat {
kExplicitNullCheckGenerated,
kSimplifyIf,
kInstructionSunk,
+ kNotInlinedUnresolvedEntrypoint,
+ kNotInlinedDexCache,
+ kNotInlinedStackMaps,
+ kNotInlinedEnvironmentBudget,
+ kNotInlinedInstructionBudget,
+ kNotInlinedLoopWithoutExit,
+ kNotInlinedIrreducibleLoop,
+ kNotInlinedAlwaysThrows,
+ kNotInlinedInfiniteLoop,
+ kNotInlinedTryCatch,
+ kNotInlinedRegisterAllocator,
+ kNotInlinedCannotBuild,
+ kNotInlinedNotVerified,
+ kNotInlinedCodeItem,
+ kNotInlinedWont,
+ kNotInlinedRecursiveBudget,
+ kNotInlinedProxy,
kLastStat
};
@@ -168,6 +185,23 @@ class OptimizingCompilerStats {
case kExplicitNullCheckGenerated: name = "ExplicitNullCheckGenerated"; break;
case kSimplifyIf: name = "SimplifyIf"; break;
case kInstructionSunk: name = "InstructionSunk"; break;
+ case kNotInlinedUnresolvedEntrypoint: name = "NotInlinedUnresolvedEntrypoint"; break;
+ case kNotInlinedDexCache: name = "NotInlinedDexCache"; break;
+ case kNotInlinedStackMaps: name = "NotInlinedStackMaps"; break;
+ case kNotInlinedEnvironmentBudget: name = "NotInlinedEnvironmentBudget"; break;
+ case kNotInlinedInstructionBudget: name = "NotInlinedInstructionBudget"; break;
+ case kNotInlinedLoopWithoutExit: name = "NotInlinedLoopWithoutExit"; break;
+ case kNotInlinedIrreducibleLoop: name = "NotInlinedIrreducibleLoop"; break;
+ case kNotInlinedAlwaysThrows: name = "NotInlinedAlwaysThrows"; break;
+ case kNotInlinedInfiniteLoop: name = "NotInlinedInfiniteLoop"; break;
+ case kNotInlinedTryCatch: name = "NotInlinedTryCatch"; break;
+ case kNotInlinedRegisterAllocator: name = "NotInlinedRegisterAllocator"; break;
+ case kNotInlinedCannotBuild: name = "NotInlinedCannotBuild"; break;
+ case kNotInlinedNotVerified: name = "NotInlinedNotVerified"; break;
+ case kNotInlinedCodeItem: name = "NotInlinedCodeItem"; break;
+ case kNotInlinedWont: name = "NotInlinedWont"; break;
+ case kNotInlinedRecursiveBudget: name = "NotInlinedRecursiveBudget"; break;
+ case kNotInlinedProxy: name = "NotInlinedProxy"; break;
case kLastStat:
LOG(FATAL) << "invalid stat "
diff --git a/compiler/optimizing/register_allocation_resolver.cc b/compiler/optimizing/register_allocation_resolver.cc
index 8a9c1ccaff..c6a0b6a0d2 100644
--- a/compiler/optimizing/register_allocation_resolver.cc
+++ b/compiler/optimizing/register_allocation_resolver.cc
@@ -299,11 +299,14 @@ void RegisterAllocationResolver::ConnectSiblings(LiveInterval* interval) {
// Currently, we spill unconditionnally the current method in the code generators.
&& !interval->GetDefinedBy()->IsCurrentMethod()) {
// We spill eagerly, so move must be at definition.
- InsertMoveAfter(interval->GetDefinedBy(),
- interval->ToLocation(),
- interval->NeedsTwoSpillSlots()
- ? Location::DoubleStackSlot(interval->GetParent()->GetSpillSlot())
- : Location::StackSlot(interval->GetParent()->GetSpillSlot()));
+ Location loc;
+ switch (interval->NumberOfSpillSlotsNeeded()) {
+ case 1: loc = Location::StackSlot(interval->GetParent()->GetSpillSlot()); break;
+ case 2: loc = Location::DoubleStackSlot(interval->GetParent()->GetSpillSlot()); break;
+ case 4: loc = Location::SIMDStackSlot(interval->GetParent()->GetSpillSlot()); break;
+ default: LOG(FATAL) << "Unexpected number of spill slots"; UNREACHABLE();
+ }
+ InsertMoveAfter(interval->GetDefinedBy(), interval->ToLocation(), loc);
}
UsePosition* use = current->GetFirstUse();
EnvUsePosition* env_use = current->GetFirstEnvironmentUse();
@@ -459,9 +462,12 @@ void RegisterAllocationResolver::ConnectSplitSiblings(LiveInterval* interval,
location_source = defined_by->GetLocations()->Out();
} else {
DCHECK(defined_by->IsCurrentMethod());
- location_source = parent->NeedsTwoSpillSlots()
- ? Location::DoubleStackSlot(parent->GetSpillSlot())
- : Location::StackSlot(parent->GetSpillSlot());
+ switch (parent->NumberOfSpillSlotsNeeded()) {
+ case 1: location_source = Location::StackSlot(parent->GetSpillSlot()); break;
+ case 2: location_source = Location::DoubleStackSlot(parent->GetSpillSlot()); break;
+ case 4: location_source = Location::SIMDStackSlot(parent->GetSpillSlot()); break;
+ default: LOG(FATAL) << "Unexpected number of spill slots"; UNREACHABLE();
+ }
}
} else {
DCHECK(source != nullptr);
@@ -492,7 +498,8 @@ static bool IsValidDestination(Location destination) {
|| destination.IsFpuRegister()
|| destination.IsFpuRegisterPair()
|| destination.IsStackSlot()
- || destination.IsDoubleStackSlot();
+ || destination.IsDoubleStackSlot()
+ || destination.IsSIMDStackSlot();
}
void RegisterAllocationResolver::AddMove(HParallelMove* move,
diff --git a/compiler/optimizing/register_allocator_graph_color.cc b/compiler/optimizing/register_allocator_graph_color.cc
index 9064f865c3..87f709f63d 100644
--- a/compiler/optimizing/register_allocator_graph_color.cc
+++ b/compiler/optimizing/register_allocator_graph_color.cc
@@ -1029,7 +1029,7 @@ void RegisterAllocatorGraphColor::AllocateSpillSlotForCatchPhi(HInstruction* ins
interval->SetSpillSlot(previous_phi->GetLiveInterval()->GetSpillSlot());
} else {
interval->SetSpillSlot(catch_phi_spill_slot_counter_);
- catch_phi_spill_slot_counter_ += interval->NeedsTwoSpillSlots() ? 2 : 1;
+ catch_phi_spill_slot_counter_ += interval->NumberOfSpillSlotsNeeded();
}
}
}
@@ -1996,43 +1996,48 @@ void RegisterAllocatorGraphColor::ColorSpillSlots(ArenaVector<LiveInterval*>* in
bool is_interval_beginning;
size_t position;
std::tie(position, is_interval_beginning, parent_interval) = *it;
-
- bool needs_two_slots = parent_interval->NeedsTwoSpillSlots();
+ size_t number_of_spill_slots_needed = parent_interval->NumberOfSpillSlotsNeeded();
if (is_interval_beginning) {
DCHECK(!parent_interval->HasSpillSlot());
DCHECK_EQ(position, parent_interval->GetStart());
- // Find a free stack slot.
+ // Find first available free stack slot(s).
size_t slot = 0;
- for (; taken.IsBitSet(slot) || (needs_two_slots && taken.IsBitSet(slot + 1)); ++slot) {
- // Skip taken slots.
+ for (; ; ++slot) {
+ bool found = true;
+ for (size_t s = slot, u = slot + number_of_spill_slots_needed; s < u; s++) {
+ if (taken.IsBitSet(s)) {
+ found = false;
+ break; // failure
+ }
+ }
+ if (found) {
+ break; // success
+ }
}
+
parent_interval->SetSpillSlot(slot);
- *num_stack_slots_used = std::max(*num_stack_slots_used,
- needs_two_slots ? slot + 1 : slot + 2);
- if (needs_two_slots && *num_stack_slots_used % 2 != 0) {
+ *num_stack_slots_used = std::max(*num_stack_slots_used, slot + number_of_spill_slots_needed);
+ if (number_of_spill_slots_needed > 1 && *num_stack_slots_used % 2 != 0) {
// The parallel move resolver requires that there be an even number of spill slots
// allocated for pair value types.
++(*num_stack_slots_used);
}
- taken.SetBit(slot);
- if (needs_two_slots) {
- taken.SetBit(slot + 1);
+ for (size_t s = slot, u = slot + number_of_spill_slots_needed; s < u; s++) {
+ taken.SetBit(s);
}
} else {
DCHECK_EQ(position, parent_interval->GetLastSibling()->GetEnd());
DCHECK(parent_interval->HasSpillSlot());
- // Free up the stack slot used by this interval.
+ // Free up the stack slot(s) used by this interval.
size_t slot = parent_interval->GetSpillSlot();
- DCHECK(taken.IsBitSet(slot));
- DCHECK(!needs_two_slots || taken.IsBitSet(slot + 1));
- taken.ClearBit(slot);
- if (needs_two_slots) {
- taken.ClearBit(slot + 1);
+ for (size_t s = slot, u = slot + number_of_spill_slots_needed; s < u; s++) {
+ DCHECK(taken.IsBitSet(s));
+ taken.ClearBit(s);
}
}
}
diff --git a/compiler/optimizing/register_allocator_linear_scan.cc b/compiler/optimizing/register_allocator_linear_scan.cc
index 6354e76ec8..ab8d540359 100644
--- a/compiler/optimizing/register_allocator_linear_scan.cc
+++ b/compiler/optimizing/register_allocator_linear_scan.cc
@@ -1125,36 +1125,31 @@ void RegisterAllocatorLinearScan::AllocateSpillSlotFor(LiveInterval* interval) {
LOG(FATAL) << "Unexpected type for interval " << interval->GetType();
}
- // Find an available spill slot.
+ // Find first available spill slots.
+ size_t number_of_spill_slots_needed = parent->NumberOfSpillSlotsNeeded();
size_t slot = 0;
for (size_t e = spill_slots->size(); slot < e; ++slot) {
- if ((*spill_slots)[slot] <= parent->GetStart()) {
- if (!parent->NeedsTwoSpillSlots()) {
- // One spill slot is sufficient.
- break;
- }
- if (slot == e - 1 || (*spill_slots)[slot + 1] <= parent->GetStart()) {
- // Two spill slots are available.
+ bool found = true;
+ for (size_t s = slot, u = std::min(slot + number_of_spill_slots_needed, e); s < u; s++) {
+ if ((*spill_slots)[s] > parent->GetStart()) {
+ found = false; // failure
break;
}
}
+ if (found) {
+ break; // success
+ }
}
+ // Need new spill slots?
+ size_t upper = slot + number_of_spill_slots_needed;
+ if (upper > spill_slots->size()) {
+ spill_slots->resize(upper);
+ }
+ // Set slots to end.
size_t end = interval->GetLastSibling()->GetEnd();
- if (parent->NeedsTwoSpillSlots()) {
- if (slot + 2u > spill_slots->size()) {
- // We need a new spill slot.
- spill_slots->resize(slot + 2u, end);
- }
- (*spill_slots)[slot] = end;
- (*spill_slots)[slot + 1] = end;
- } else {
- if (slot == spill_slots->size()) {
- // We need a new spill slot.
- spill_slots->push_back(end);
- } else {
- (*spill_slots)[slot] = end;
- }
+ for (size_t s = slot; s < upper; s++) {
+ (*spill_slots)[s] = end;
}
// Note that the exact spill slot location will be computed when we resolve,
@@ -1180,7 +1175,7 @@ void RegisterAllocatorLinearScan::AllocateSpillSlotForCatchPhi(HPhi* phi) {
// TODO: Reuse spill slots when intervals of phis from different catch
// blocks do not overlap.
interval->SetSpillSlot(catch_phi_spill_slots_);
- catch_phi_spill_slots_ += interval->NeedsTwoSpillSlots() ? 2 : 1;
+ catch_phi_spill_slots_ += interval->NumberOfSpillSlotsNeeded();
}
}
diff --git a/compiler/optimizing/sharpening.cc b/compiler/optimizing/sharpening.cc
index 7bd38c7a8c..eedaf6e67e 100644
--- a/compiler/optimizing/sharpening.cc
+++ b/compiler/optimizing/sharpening.cc
@@ -259,7 +259,7 @@ void HSharpening::ProcessLoadString(HLoadString* load_string) {
} else if (runtime->UseJitCompilation()) {
// TODO: Make sure we don't set the "compile PIC" flag for JIT as that's bogus.
// DCHECK(!codegen_->GetCompilerOptions().GetCompilePic());
- string = class_linker->LookupString(dex_file, string_index, dex_cache);
+ string = class_linker->LookupString(dex_file, string_index, dex_cache.Get());
if (string != nullptr) {
if (runtime->GetHeap()->ObjectIsInBootImageSpace(string)) {
desired_load_kind = HLoadString::LoadKind::kBootImageAddress;
@@ -271,7 +271,7 @@ void HSharpening::ProcessLoadString(HLoadString* load_string) {
}
} else {
// AOT app compilation. Try to lookup the string without allocating if not found.
- string = class_linker->LookupString(dex_file, string_index, dex_cache);
+ string = class_linker->LookupString(dex_file, string_index, dex_cache.Get());
if (string != nullptr &&
runtime->GetHeap()->ObjectIsInBootImageSpace(string) &&
!codegen_->GetCompilerOptions().GetCompilePic()) {
diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc
index e8e12e1a55..36ee5a903a 100644
--- a/compiler/optimizing/ssa_liveness_analysis.cc
+++ b/compiler/optimizing/ssa_liveness_analysis.cc
@@ -469,8 +469,10 @@ bool LiveInterval::SameRegisterKind(Location other) const {
}
}
-bool LiveInterval::NeedsTwoSpillSlots() const {
- return type_ == Primitive::kPrimLong || type_ == Primitive::kPrimDouble;
+size_t LiveInterval::NumberOfSpillSlotsNeeded() const {
+ // TODO: detect vector operation.
+ // Return number of needed spill slots based on type.
+ return (type_ == Primitive::kPrimLong || type_ == Primitive::kPrimDouble) ? 2 : 1;
}
Location LiveInterval::ToLocation() const {
@@ -494,10 +496,11 @@ Location LiveInterval::ToLocation() const {
if (defined_by->IsConstant()) {
return defined_by->GetLocations()->Out();
} else if (GetParent()->HasSpillSlot()) {
- if (NeedsTwoSpillSlots()) {
- return Location::DoubleStackSlot(GetParent()->GetSpillSlot());
- } else {
- return Location::StackSlot(GetParent()->GetSpillSlot());
+ switch (NumberOfSpillSlotsNeeded()) {
+ case 1: return Location::StackSlot(GetParent()->GetSpillSlot());
+ case 2: return Location::DoubleStackSlot(GetParent()->GetSpillSlot());
+ case 4: return Location::SIMDStackSlot(GetParent()->GetSpillSlot());
+ default: LOG(FATAL) << "Unexpected number of spill slots"; UNREACHABLE();
}
} else {
return Location();
diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h
index 340d0ccefe..e9dffc1fac 100644
--- a/compiler/optimizing/ssa_liveness_analysis.h
+++ b/compiler/optimizing/ssa_liveness_analysis.h
@@ -762,9 +762,9 @@ class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> {
// Returns kNoRegister otherwise.
int FindHintAtDefinition() const;
- // Returns whether the interval needs two (Dex virtual register size `kVRegSize`)
- // slots for spilling.
- bool NeedsTwoSpillSlots() const;
+ // Returns the number of required spilling slots (measured as a multiple of the
+ // Dex virtual register size `kVRegSize`).
+ size_t NumberOfSpillSlotsNeeded() const;
bool IsFloatingPoint() const {
return type_ == Primitive::kPrimFloat || type_ == Primitive::kPrimDouble;
diff --git a/compiler/utils/jni_macro_assembler.cc b/compiler/utils/jni_macro_assembler.cc
index 2f154fb862..3ac6c3ca7a 100644
--- a/compiler/utils/jni_macro_assembler.cc
+++ b/compiler/utils/jni_macro_assembler.cc
@@ -84,7 +84,11 @@ template <>
MacroAsm64UniquePtr JNIMacroAssembler<PointerSize::k64>::Create(
ArenaAllocator* arena,
InstructionSet instruction_set,
- const InstructionSetFeatures* instruction_set_features ATTRIBUTE_UNUSED) {
+ const InstructionSetFeatures* instruction_set_features) {
+#ifndef ART_ENABLE_CODEGEN_mips64
+ UNUSED(instruction_set_features);
+#endif
+
switch (instruction_set) {
#ifdef ART_ENABLE_CODEGEN_arm64
case kArm64:
@@ -92,7 +96,11 @@ MacroAsm64UniquePtr JNIMacroAssembler<PointerSize::k64>::Create(
#endif
#ifdef ART_ENABLE_CODEGEN_mips64
case kMips64:
- return MacroAsm64UniquePtr(new (arena) mips64::Mips64Assembler(arena));
+ return MacroAsm64UniquePtr(new (arena) mips64::Mips64Assembler(
+ arena,
+ instruction_set_features != nullptr
+ ? instruction_set_features->AsMips64InstructionSetFeatures()
+ : nullptr));
#endif
#ifdef ART_ENABLE_CODEGEN_x86_64
case kX86_64:
diff --git a/compiler/utils/mips64/assembler_mips64.cc b/compiler/utils/mips64/assembler_mips64.cc
index 4e7f635246..8a5ae754df 100644
--- a/compiler/utils/mips64/assembler_mips64.cc
+++ b/compiler/utils/mips64/assembler_mips64.cc
@@ -1180,373 +1180,456 @@ void Mips64Assembler::Not(GpuRegister rd, GpuRegister rs) {
Nor(rd, rs, ZERO);
}
-// TODO: Check for MSA presence in Mips64InstructionSetFeatures for each MSA instruction.
-
void Mips64Assembler::AndV(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+ CHECK(HasMsa());
EmitMsa3R(0x0, 0x0, wt, ws, wd, 0x1e);
}
void Mips64Assembler::OrV(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+ CHECK(HasMsa());
EmitMsa3R(0x0, 0x1, wt, ws, wd, 0x1e);
}
void Mips64Assembler::NorV(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+ CHECK(HasMsa());
EmitMsa3R(0x0, 0x2, wt, ws, wd, 0x1e);
}
void Mips64Assembler::XorV(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+ CHECK(HasMsa());
EmitMsa3R(0x0, 0x3, wt, ws, wd, 0x1e);
}
void Mips64Assembler::AddvB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+ CHECK(HasMsa());
EmitMsa3R(0x0, 0x0, wt, ws, wd, 0xe);
}
void Mips64Assembler::AddvH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+ CHECK(HasMsa());
EmitMsa3R(0x0, 0x1, wt, ws, wd, 0xe);
}
void Mips64Assembler::AddvW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+ CHECK(HasMsa());
EmitMsa3R(0x0, 0x2, wt, ws, wd, 0xe);
}
void Mips64Assembler::AddvD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+ CHECK(HasMsa());
EmitMsa3R(0x0, 0x3, wt, ws, wd, 0xe);
}
void Mips64Assembler::SubvB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+ CHECK(HasMsa());
EmitMsa3R(0x1, 0x0, wt, ws, wd, 0xe);
}
void Mips64Assembler::SubvH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+ CHECK(HasMsa());
EmitMsa3R(0x1, 0x1, wt, ws, wd, 0xe);
}
void Mips64Assembler::SubvW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+ CHECK(HasMsa());
EmitMsa3R(0x1, 0x2, wt, ws, wd, 0xe);
}
void Mips64Assembler::SubvD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+ CHECK(HasMsa());
EmitMsa3R(0x1, 0x3, wt, ws, wd, 0xe);
}
void Mips64Assembler::MulvB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+ CHECK(HasMsa());
EmitMsa3R(0x0, 0x0, wt, ws, wd, 0x12);
}
void Mips64Assembler::MulvH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+ CHECK(HasMsa());
EmitMsa3R(0x0, 0x1, wt, ws, wd, 0x12);
}
void Mips64Assembler::MulvW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+ CHECK(HasMsa());
EmitMsa3R(0x0, 0x2, wt, ws, wd, 0x12);
}
void Mips64Assembler::MulvD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+ CHECK(HasMsa());
EmitMsa3R(0x0, 0x3, wt, ws, wd, 0x12);
}
void Mips64Assembler::Div_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+ CHECK(HasMsa());
EmitMsa3R(0x4, 0x0, wt, ws, wd, 0x12);
}
void Mips64Assembler::Div_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+ CHECK(HasMsa());
EmitMsa3R(0x4, 0x1, wt, ws, wd, 0x12);
}
void Mips64Assembler::Div_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+ CHECK(HasMsa());
EmitMsa3R(0x4, 0x2, wt, ws, wd, 0x12);
}
void Mips64Assembler::Div_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+ CHECK(HasMsa());
EmitMsa3R(0x4, 0x3, wt, ws, wd, 0x12);
}
void Mips64Assembler::Div_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+ CHECK(HasMsa());
EmitMsa3R(0x5, 0x0, wt, ws, wd, 0x12);
}
void Mips64Assembler::Div_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+ CHECK(HasMsa());
EmitMsa3R(0x5, 0x1, wt, ws, wd, 0x12);
}
void Mips64Assembler::Div_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+ CHECK(HasMsa());
EmitMsa3R(0x5, 0x2, wt, ws, wd, 0x12);
}
void Mips64Assembler::Div_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+ CHECK(HasMsa());
EmitMsa3R(0x5, 0x3, wt, ws, wd, 0x12);
}
void Mips64Assembler::Mod_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+ CHECK(HasMsa());
EmitMsa3R(0x6, 0x0, wt, ws, wd, 0x12);
}
void Mips64Assembler::Mod_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+ CHECK(HasMsa());
EmitMsa3R(0x6, 0x1, wt, ws, wd, 0x12);
}
void Mips64Assembler::Mod_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+ CHECK(HasMsa());
EmitMsa3R(0x6, 0x2, wt, ws, wd, 0x12);
}
void Mips64Assembler::Mod_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+ CHECK(HasMsa());
EmitMsa3R(0x6, 0x3, wt, ws, wd, 0x12);
}
void Mips64Assembler::Mod_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+ CHECK(HasMsa());
EmitMsa3R(0x7, 0x0, wt, ws, wd, 0x12);
}
void Mips64Assembler::Mod_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+ CHECK(HasMsa());
EmitMsa3R(0x7, 0x1, wt, ws, wd, 0x12);
}
void Mips64Assembler::Mod_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+ CHECK(HasMsa());
EmitMsa3R(0x7, 0x2, wt, ws, wd, 0x12);
}
void Mips64Assembler::Mod_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+ CHECK(HasMsa());
EmitMsa3R(0x7, 0x3, wt, ws, wd, 0x12);
}
void Mips64Assembler::FaddW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+ CHECK(HasMsa());
EmitMsa3R(0x0, 0x0, wt, ws, wd, 0x1b);
}
void Mips64Assembler::FaddD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+ CHECK(HasMsa());
EmitMsa3R(0x0, 0x1, wt, ws, wd, 0x1b);
}
void Mips64Assembler::FsubW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+ CHECK(HasMsa());
EmitMsa3R(0x0, 0x2, wt, ws, wd, 0x1b);
}
void Mips64Assembler::FsubD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+ CHECK(HasMsa());
EmitMsa3R(0x0, 0x3, wt, ws, wd, 0x1b);
}
void Mips64Assembler::FmulW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+ CHECK(HasMsa());
EmitMsa3R(0x1, 0x0, wt, ws, wd, 0x1b);
}
void Mips64Assembler::FmulD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+ CHECK(HasMsa());
EmitMsa3R(0x1, 0x1, wt, ws, wd, 0x1b);
}
void Mips64Assembler::FdivW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+ CHECK(HasMsa());
EmitMsa3R(0x1, 0x2, wt, ws, wd, 0x1b);
}
void Mips64Assembler::FdivD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+ CHECK(HasMsa());
EmitMsa3R(0x1, 0x3, wt, ws, wd, 0x1b);
}
void Mips64Assembler::Ffint_sW(VectorRegister wd, VectorRegister ws) {
+ CHECK(HasMsa());
EmitMsa2RF(0x19e, 0x0, ws, wd, 0x1e);
}
void Mips64Assembler::Ffint_sD(VectorRegister wd, VectorRegister ws) {
+ CHECK(HasMsa());
EmitMsa2RF(0x19e, 0x1, ws, wd, 0x1e);
}
void Mips64Assembler::Ftint_sW(VectorRegister wd, VectorRegister ws) {
+ CHECK(HasMsa());
EmitMsa2RF(0x19c, 0x0, ws, wd, 0x1e);
}
void Mips64Assembler::Ftint_sD(VectorRegister wd, VectorRegister ws) {
+ CHECK(HasMsa());
EmitMsa2RF(0x19c, 0x1, ws, wd, 0x1e);
}
void Mips64Assembler::SllB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+ CHECK(HasMsa());
EmitMsa3R(0x0, 0x0, wt, ws, wd, 0xd);
}
void Mips64Assembler::SllH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+ CHECK(HasMsa());
EmitMsa3R(0x0, 0x1, wt, ws, wd, 0xd);
}
void Mips64Assembler::SllW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+ CHECK(HasMsa());
EmitMsa3R(0x0, 0x2, wt, ws, wd, 0xd);
}
void Mips64Assembler::SllD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+ CHECK(HasMsa());
EmitMsa3R(0x0, 0x3, wt, ws, wd, 0xd);
}
void Mips64Assembler::SraB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+ CHECK(HasMsa());
EmitMsa3R(0x1, 0x0, wt, ws, wd, 0xd);
}
void Mips64Assembler::SraH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+ CHECK(HasMsa());
EmitMsa3R(0x1, 0x1, wt, ws, wd, 0xd);
}
void Mips64Assembler::SraW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+ CHECK(HasMsa());
EmitMsa3R(0x1, 0x2, wt, ws, wd, 0xd);
}
void Mips64Assembler::SraD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+ CHECK(HasMsa());
EmitMsa3R(0x1, 0x3, wt, ws, wd, 0xd);
}
void Mips64Assembler::SrlB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+ CHECK(HasMsa());
EmitMsa3R(0x2, 0x0, wt, ws, wd, 0xd);
}
void Mips64Assembler::SrlH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+ CHECK(HasMsa());
EmitMsa3R(0x2, 0x1, wt, ws, wd, 0xd);
}
void Mips64Assembler::SrlW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+ CHECK(HasMsa());
EmitMsa3R(0x2, 0x2, wt, ws, wd, 0xd);
}
void Mips64Assembler::SrlD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+ CHECK(HasMsa());
EmitMsa3R(0x2, 0x3, wt, ws, wd, 0xd);
}
void Mips64Assembler::SlliB(VectorRegister wd, VectorRegister ws, int shamt3) {
+ CHECK(HasMsa());
CHECK(IsUint<3>(shamt3)) << shamt3;
EmitMsaBIT(0x0, shamt3 | kMsaDfMByteMask, ws, wd, 0x9);
}
void Mips64Assembler::SlliH(VectorRegister wd, VectorRegister ws, int shamt4) {
+ CHECK(HasMsa());
CHECK(IsUint<4>(shamt4)) << shamt4;
EmitMsaBIT(0x0, shamt4 | kMsaDfMHalfwordMask, ws, wd, 0x9);
}
void Mips64Assembler::SlliW(VectorRegister wd, VectorRegister ws, int shamt5) {
+ CHECK(HasMsa());
CHECK(IsUint<5>(shamt5)) << shamt5;
EmitMsaBIT(0x0, shamt5 | kMsaDfMWordMask, ws, wd, 0x9);
}
void Mips64Assembler::SlliD(VectorRegister wd, VectorRegister ws, int shamt6) {
+ CHECK(HasMsa());
CHECK(IsUint<6>(shamt6)) << shamt6;
EmitMsaBIT(0x0, shamt6 | kMsaDfMDoublewordMask, ws, wd, 0x9);
}
void Mips64Assembler::SraiB(VectorRegister wd, VectorRegister ws, int shamt3) {
+ CHECK(HasMsa());
CHECK(IsUint<3>(shamt3)) << shamt3;
EmitMsaBIT(0x1, shamt3 | kMsaDfMByteMask, ws, wd, 0x9);
}
void Mips64Assembler::SraiH(VectorRegister wd, VectorRegister ws, int shamt4) {
+ CHECK(HasMsa());
CHECK(IsUint<4>(shamt4)) << shamt4;
EmitMsaBIT(0x1, shamt4 | kMsaDfMHalfwordMask, ws, wd, 0x9);
}
void Mips64Assembler::SraiW(VectorRegister wd, VectorRegister ws, int shamt5) {
+ CHECK(HasMsa());
CHECK(IsUint<5>(shamt5)) << shamt5;
EmitMsaBIT(0x1, shamt5 | kMsaDfMWordMask, ws, wd, 0x9);
}
void Mips64Assembler::SraiD(VectorRegister wd, VectorRegister ws, int shamt6) {
+ CHECK(HasMsa());
CHECK(IsUint<6>(shamt6)) << shamt6;
EmitMsaBIT(0x1, shamt6 | kMsaDfMDoublewordMask, ws, wd, 0x9);
}
void Mips64Assembler::SrliB(VectorRegister wd, VectorRegister ws, int shamt3) {
+ CHECK(HasMsa());
CHECK(IsUint<3>(shamt3)) << shamt3;
EmitMsaBIT(0x2, shamt3 | kMsaDfMByteMask, ws, wd, 0x9);
}
void Mips64Assembler::SrliH(VectorRegister wd, VectorRegister ws, int shamt4) {
+ CHECK(HasMsa());
CHECK(IsUint<4>(shamt4)) << shamt4;
EmitMsaBIT(0x2, shamt4 | kMsaDfMHalfwordMask, ws, wd, 0x9);
}
void Mips64Assembler::SrliW(VectorRegister wd, VectorRegister ws, int shamt5) {
+ CHECK(HasMsa());
CHECK(IsUint<5>(shamt5)) << shamt5;
EmitMsaBIT(0x2, shamt5 | kMsaDfMWordMask, ws, wd, 0x9);
}
void Mips64Assembler::SrliD(VectorRegister wd, VectorRegister ws, int shamt6) {
+ CHECK(HasMsa());
CHECK(IsUint<6>(shamt6)) << shamt6;
EmitMsaBIT(0x2, shamt6 | kMsaDfMDoublewordMask, ws, wd, 0x9);
}
void Mips64Assembler::MoveV(VectorRegister wd, VectorRegister ws) {
+ CHECK(HasMsa());
EmitMsaBIT(0x1, 0x3e, ws, wd, 0x19);
}
void Mips64Assembler::SplatiB(VectorRegister wd, VectorRegister ws, int n4) {
+ CHECK(HasMsa());
CHECK(IsUint<4>(n4)) << n4;
EmitMsaELM(0x1, n4 | kMsaDfNByteMask, ws, wd, 0x19);
}
void Mips64Assembler::SplatiH(VectorRegister wd, VectorRegister ws, int n3) {
+ CHECK(HasMsa());
CHECK(IsUint<3>(n3)) << n3;
EmitMsaELM(0x1, n3 | kMsaDfNHalfwordMask, ws, wd, 0x19);
}
void Mips64Assembler::SplatiW(VectorRegister wd, VectorRegister ws, int n2) {
+ CHECK(HasMsa());
CHECK(IsUint<2>(n2)) << n2;
EmitMsaELM(0x1, n2 | kMsaDfNWordMask, ws, wd, 0x19);
}
void Mips64Assembler::SplatiD(VectorRegister wd, VectorRegister ws, int n1) {
+ CHECK(HasMsa());
CHECK(IsUint<1>(n1)) << n1;
EmitMsaELM(0x1, n1 | kMsaDfNDoublewordMask, ws, wd, 0x19);
}
void Mips64Assembler::FillB(VectorRegister wd, GpuRegister rs) {
+ CHECK(HasMsa());
EmitMsa2R(0xc0, 0x0, static_cast<VectorRegister>(rs), wd, 0x1e);
}
void Mips64Assembler::FillH(VectorRegister wd, GpuRegister rs) {
+ CHECK(HasMsa());
EmitMsa2R(0xc0, 0x1, static_cast<VectorRegister>(rs), wd, 0x1e);
}
void Mips64Assembler::FillW(VectorRegister wd, GpuRegister rs) {
+ CHECK(HasMsa());
EmitMsa2R(0xc0, 0x2, static_cast<VectorRegister>(rs), wd, 0x1e);
}
void Mips64Assembler::FillD(VectorRegister wd, GpuRegister rs) {
+ CHECK(HasMsa());
EmitMsa2R(0xc0, 0x3, static_cast<VectorRegister>(rs), wd, 0x1e);
}
void Mips64Assembler::LdB(VectorRegister wd, GpuRegister rs, int offset) {
+ CHECK(HasMsa());
CHECK(IsInt<10>(offset)) << offset;
EmitMsaMI10(offset & kMsaS10Mask, rs, wd, 0x8, 0x0);
}
void Mips64Assembler::LdH(VectorRegister wd, GpuRegister rs, int offset) {
+ CHECK(HasMsa());
CHECK(IsInt<11>(offset)) << offset;
CHECK_ALIGNED(offset, kMips64HalfwordSize);
EmitMsaMI10((offset >> TIMES_2) & kMsaS10Mask, rs, wd, 0x8, 0x1);
}
void Mips64Assembler::LdW(VectorRegister wd, GpuRegister rs, int offset) {
+ CHECK(HasMsa());
CHECK(IsInt<12>(offset)) << offset;
CHECK_ALIGNED(offset, kMips64WordSize);
EmitMsaMI10((offset >> TIMES_4) & kMsaS10Mask, rs, wd, 0x8, 0x2);
}
void Mips64Assembler::LdD(VectorRegister wd, GpuRegister rs, int offset) {
+ CHECK(HasMsa());
CHECK(IsInt<13>(offset)) << offset;
CHECK_ALIGNED(offset, kMips64DoublewordSize);
EmitMsaMI10((offset >> TIMES_8) & kMsaS10Mask, rs, wd, 0x8, 0x3);
}
void Mips64Assembler::StB(VectorRegister wd, GpuRegister rs, int offset) {
+ CHECK(HasMsa());
CHECK(IsInt<10>(offset)) << offset;
EmitMsaMI10(offset & kMsaS10Mask, rs, wd, 0x9, 0x0);
}
void Mips64Assembler::StH(VectorRegister wd, GpuRegister rs, int offset) {
+ CHECK(HasMsa());
CHECK(IsInt<11>(offset)) << offset;
CHECK_ALIGNED(offset, kMips64HalfwordSize);
EmitMsaMI10((offset >> TIMES_2) & kMsaS10Mask, rs, wd, 0x9, 0x1);
}
void Mips64Assembler::StW(VectorRegister wd, GpuRegister rs, int offset) {
+ CHECK(HasMsa());
CHECK(IsInt<12>(offset)) << offset;
CHECK_ALIGNED(offset, kMips64WordSize);
EmitMsaMI10((offset >> TIMES_4) & kMsaS10Mask, rs, wd, 0x9, 0x2);
}
void Mips64Assembler::StD(VectorRegister wd, GpuRegister rs, int offset) {
+ CHECK(HasMsa());
CHECK(IsInt<13>(offset)) << offset;
CHECK_ALIGNED(offset, kMips64DoublewordSize);
EmitMsaMI10((offset >> TIMES_8) & kMsaS10Mask, rs, wd, 0x9, 0x3);
diff --git a/compiler/utils/mips64/assembler_mips64.h b/compiler/utils/mips64/assembler_mips64.h
index f42c1626df..a8035b6da4 100644
--- a/compiler/utils/mips64/assembler_mips64.h
+++ b/compiler/utils/mips64/assembler_mips64.h
@@ -21,6 +21,7 @@
#include <utility>
#include <vector>
+#include "arch/mips64/instruction_set_features_mips64.h"
#include "base/arena_containers.h"
#include "base/enums.h"
#include "base/macros.h"
@@ -413,7 +414,8 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer
public:
using JNIBase = JNIMacroAssembler<PointerSize::k64>;
- explicit Mips64Assembler(ArenaAllocator* arena)
+ explicit Mips64Assembler(ArenaAllocator* arena,
+ const Mips64InstructionSetFeatures* instruction_set_features = nullptr)
: Assembler(arena),
overwriting_(false),
overwrite_location_(0),
@@ -422,7 +424,8 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer
jump_tables_(arena->Adapter(kArenaAllocAssembler)),
last_position_adjustment_(0),
last_old_position_(0),
- last_branch_id_(0) {
+ last_branch_id_(0),
+ has_msa_(instruction_set_features != nullptr ? instruction_set_features->HasMsa() : false) {
cfi().DelayEmittingAdvancePCs();
}
@@ -1479,6 +1482,10 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer
// Emits exception block.
void EmitExceptionPoll(Mips64ExceptionSlowPath* exception);
+ bool HasMsa() const {
+ return has_msa_;
+ }
+
// List of exception blocks to generate at the end of the code cache.
std::vector<Mips64ExceptionSlowPath> exception_blocks_;
@@ -1502,6 +1509,8 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer
uint32_t last_old_position_;
uint32_t last_branch_id_;
+ const bool has_msa_;
+
DISALLOW_COPY_AND_ASSIGN(Mips64Assembler);
};
diff --git a/compiler/utils/mips64/assembler_mips64_test.cc b/compiler/utils/mips64/assembler_mips64_test.cc
index 12660ce85d..cadbe27819 100644
--- a/compiler/utils/mips64/assembler_mips64_test.cc
+++ b/compiler/utils/mips64/assembler_mips64_test.cc
@@ -46,6 +46,9 @@ class AssemblerMIPS64Test : public AssemblerTest<mips64::Mips64Assembler,
uint32_t,
mips64::VectorRegister> Base;
+ AssemblerMIPS64Test()
+ : instruction_set_features_(Mips64InstructionSetFeatures::FromVariant("default", nullptr)) {}
+
protected:
// Get the typically used name for this architecture, e.g., aarch64, x86-64, ...
std::string GetArchitectureString() OVERRIDE {
@@ -78,6 +81,10 @@ class AssemblerMIPS64Test : public AssemblerTest<mips64::Mips64Assembler,
return " -D -bbinary -mmips:isa64r6";
}
+ mips64::Mips64Assembler* CreateAssembler(ArenaAllocator* arena) OVERRIDE {
+ return new (arena) mips64::Mips64Assembler(arena, instruction_set_features_.get());
+ }
+
void SetUpHelpers() OVERRIDE {
if (registers_.size() == 0) {
registers_.push_back(new mips64::GpuRegister(mips64::ZERO));
@@ -313,8 +320,9 @@ class AssemblerMIPS64Test : public AssemblerTest<mips64::Mips64Assembler,
std::vector<mips64::FpuRegister*> fp_registers_;
std::vector<mips64::VectorRegister*> vec_registers_;
-};
+ std::unique_ptr<const Mips64InstructionSetFeatures> instruction_set_features_;
+};
TEST_F(AssemblerMIPS64Test, Toolchain) {
EXPECT_TRUE(CheckTools());