diff options
73 files changed, 3556 insertions, 2004 deletions
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk index c27f8dbe4a..ed34a8df5f 100644 --- a/build/Android.gtest.mk +++ b/build/Android.gtest.mk @@ -25,6 +25,7 @@ include art/build/Android.common_build.mk GTEST_DEX_DIRECTORIES := \ AbstractMethod \ AllFields \ + DefaultMethods \ DexToDexDecompiler \ ErroneousA \ ErroneousB \ @@ -104,7 +105,7 @@ ART_GTEST_dex_cache_test_DEX_DEPS := Main Packages MethodTypes ART_GTEST_dex_file_test_DEX_DEPS := GetMethodSignature Main Nested MultiDex ART_GTEST_dex2oat_test_DEX_DEPS := $(ART_GTEST_dex2oat_environment_tests_DEX_DEPS) Statics VerifierDeps ART_GTEST_exception_test_DEX_DEPS := ExceptionHandle -ART_GTEST_image_test_DEX_DEPS := ImageLayoutA ImageLayoutB +ART_GTEST_image_test_DEX_DEPS := ImageLayoutA ImageLayoutB DefaultMethods ART_GTEST_imtable_test_DEX_DEPS := IMTA IMTB ART_GTEST_instrumentation_test_DEX_DEPS := Instrumentation ART_GTEST_jni_compiler_test_DEX_DEPS := MyClassNatives diff --git a/cmdline/cmdline_parser_test.cc b/cmdline/cmdline_parser_test.cc index 550e8c4605..5b331bcbec 100644 --- a/cmdline/cmdline_parser_test.cc +++ b/cmdline/cmdline_parser_test.cc @@ -476,7 +476,7 @@ TEST_F(CmdlineParserTest, TestJitOptions) { * -Xps-* */ TEST_F(CmdlineParserTest, ProfileSaverOptions) { - ProfileSaverOptions opt = ProfileSaverOptions(true, 1, 2, 3, 4, 5, 6, 7); + ProfileSaverOptions opt = ProfileSaverOptions(true, 1, 2, 3, 4, 5, 6, 7, "abc"); EXPECT_SINGLE_PARSE_VALUE(opt, "-Xjitsaveprofilinginfo " @@ -486,7 +486,8 @@ TEST_F(CmdlineParserTest, ProfileSaverOptions) { "-Xps-min-methods-to-save:4 " "-Xps-min-classes-to-save:5 " "-Xps-min-notification-before-wake:6 " - "-Xps-max-notification-before-wake:7", + "-Xps-max-notification-before-wake:7 " + "-Xps-profile-path:abc", M::ProfileSaverOpts); } // TEST_F diff --git a/cmdline/cmdline_types.h b/cmdline/cmdline_types.h index f1123eb692..71c4e95921 100644 --- a/cmdline/cmdline_types.h +++ b/cmdline/cmdline_types.h @@ -752,9 +752,13 @@ struct CmdlineType<ProfileSaverOptions> : CmdlineTypeParser<ProfileSaverOptions> return ParseInto(existing, &ProfileSaverOptions::max_notification_before_wake_, type_parser.Parse(suffix)); - } else { - return Result::Failure(std::string("Invalid suboption '") + option + "'"); } + if (android::base::StartsWith(option, "profile-path:")) { + existing.profile_path_ = suffix; + return Result::SuccessNoValue(); + } + + return Result::Failure(std::string("Invalid suboption '") + option + "'"); } static const char* Name() { return "ProfileSaverOptions"; } @@ -774,6 +778,5 @@ struct CmdlineType<ExperimentalFlags> : CmdlineTypeParser<ExperimentalFlags> { static const char* Name() { return "ExperimentalFlags"; } }; - } // namespace art #endif // ART_CMDLINE_CMDLINE_TYPES_H_ diff --git a/compiler/image_test.cc b/compiler/image_test.cc index 89e8a678b1..7ee494a131 100644 --- a/compiler/image_test.cc +++ b/compiler/image_test.cc @@ -76,7 +76,7 @@ class ImageTest : public CommonCompilerTest { void Compile(ImageHeader::StorageMode storage_mode, CompilationHelper& out_helper, const std::string& extra_dex = "", - const std::string& image_class = ""); + const std::initializer_list<std::string>& image_classes = {}); void SetUpRuntimeOptions(RuntimeOptions* options) OVERRIDE { CommonCompilerTest::SetUpRuntimeOptions(options); @@ -90,6 +90,18 @@ class ImageTest : public CommonCompilerTest { return new std::unordered_set<std::string>(image_classes_); } + ArtMethod* FindCopiedMethod(ArtMethod* origin, mirror::Class* klass) + REQUIRES_SHARED(Locks::mutator_lock_) { + PointerSize pointer_size = class_linker_->GetImagePointerSize(); + for (ArtMethod& m : klass->GetCopiedMethods(pointer_size)) { + if (strcmp(origin->GetName(), m.GetName()) == 0 && + origin->GetSignature() == m.GetSignature()) { + return &m; + } + } + return nullptr; + } + private: std::unordered_set<std::string> image_classes_; }; @@ -345,8 +357,8 @@ void CompilationHelper::Compile(CompilerDriver* driver, void ImageTest::Compile(ImageHeader::StorageMode storage_mode, CompilationHelper& helper, const std::string& extra_dex, - const std::string& image_class) { - if (!image_class.empty()) { + const std::initializer_list<std::string>& image_classes) { + for (const std::string& image_class : image_classes) { image_classes_.insert(image_class); } CreateCompilerDriver(Compiler::kOptimizing, kRuntimeISA, kIsTargetBuild ? 2U : 16U); @@ -358,13 +370,15 @@ void ImageTest::Compile(ImageHeader::StorageMode storage_mode, helper.extra_dex_files = OpenTestDexFiles(extra_dex.c_str()); } helper.Compile(compiler_driver_.get(), storage_mode); - if (!image_class.empty()) { + if (image_classes.begin() != image_classes.end()) { // Make sure the class got initialized. ScopedObjectAccess soa(Thread::Current()); ClassLinker* const class_linker = Runtime::Current()->GetClassLinker(); - mirror::Class* klass = class_linker->FindSystemClass(Thread::Current(), image_class.c_str()); - EXPECT_TRUE(klass != nullptr); - EXPECT_TRUE(klass->IsInitialized()); + for (const std::string& image_class : image_classes) { + mirror::Class* klass = class_linker->FindSystemClass(Thread::Current(), image_class.c_str()); + EXPECT_TRUE(klass != nullptr); + EXPECT_TRUE(klass->IsInitialized()); + } } } @@ -492,7 +506,7 @@ TEST_F(ImageTest, TestImageLayout) { // Compile multi-image with ImageLayoutA being the last image. { CompilationHelper helper; - Compile(ImageHeader::kStorageModeUncompressed, helper, "ImageLayoutA", "LMyClass;"); + Compile(ImageHeader::kStorageModeUncompressed, helper, "ImageLayoutA", {"LMyClass;"}); image_sizes = helper.GetImageObjectSectionSizes(); } TearDown(); @@ -501,7 +515,7 @@ TEST_F(ImageTest, TestImageLayout) { // Compile multi-image with ImageLayoutB being the last image. { CompilationHelper helper; - Compile(ImageHeader::kStorageModeUncompressed, helper, "ImageLayoutB", "LMyClass;"); + Compile(ImageHeader::kStorageModeUncompressed, helper, "ImageLayoutB", {"LMyClass;"}); image_sizes_extra = helper.GetImageObjectSectionSizes(); } // Make sure that the new stuff in the clinit in ImageLayoutB is in the last image and not in the @@ -553,4 +567,63 @@ TEST_F(ImageTest, ImageHeaderIsValid) { ASSERT_FALSE(image_header.IsValid()); } +// Test that pointer to quick code is the same in +// a default method of an interface and in a copied method +// of a class which implements the interface. This should be true +// only if the copied method and the origin method are located in the +// same oat file. +TEST_F(ImageTest, TestDefaultMethods) { + CompilationHelper helper; + Compile(ImageHeader::kStorageModeUncompressed, + helper, + "DefaultMethods", + {"LIface;", "LImpl;", "LIterableBase;"}); + + PointerSize pointer_size = class_linker_->GetImagePointerSize(); + Thread* self = Thread::Current(); + ScopedObjectAccess soa(self); + + // Test the pointer to quick code is the same in origin method + // and in the copied method form the same oat file. + mirror::Class* iface_klass = class_linker_->LookupClass( + self, "LIface;", ObjPtr<mirror::ClassLoader>()); + ASSERT_NE(nullptr, iface_klass); + ArtMethod* origin = iface_klass->FindDeclaredVirtualMethod( + "defaultMethod", "()V", pointer_size); + ASSERT_NE(nullptr, origin); + const void* code = origin->GetEntryPointFromQuickCompiledCodePtrSize(pointer_size); + // The origin method should have a pointer to quick code + ASSERT_NE(nullptr, code); + ASSERT_FALSE(class_linker_->IsQuickToInterpreterBridge(code)); + mirror::Class* impl_klass = class_linker_->LookupClass( + self, "LImpl;", ObjPtr<mirror::ClassLoader>()); + ASSERT_NE(nullptr, impl_klass); + ArtMethod* copied = FindCopiedMethod(origin, impl_klass); + ASSERT_NE(nullptr, copied); + // the copied method should have pointer to the same quick code as the origin method + ASSERT_EQ(code, copied->GetEntryPointFromQuickCompiledCodePtrSize(pointer_size)); + + // Test the origin method has pointer to quick code + // but the copied method has pointer to interpreter + // because these methods are in different oat files. + mirror::Class* iterable_klass = class_linker_->LookupClass( + self, "Ljava/lang/Iterable;", ObjPtr<mirror::ClassLoader>()); + ASSERT_NE(nullptr, iterable_klass); + origin = iterable_klass->FindDeclaredVirtualMethod( + "forEach", "(Ljava/util/function/Consumer;)V", pointer_size); + ASSERT_NE(nullptr, origin); + code = origin->GetEntryPointFromQuickCompiledCodePtrSize(pointer_size); + // the origin method should have a pointer to quick code + ASSERT_NE(nullptr, code); + ASSERT_FALSE(class_linker_->IsQuickToInterpreterBridge(code)); + mirror::Class* iterablebase_klass = class_linker_->LookupClass( + self, "LIterableBase;", ObjPtr<mirror::ClassLoader>()); + ASSERT_NE(nullptr, iterablebase_klass); + copied = FindCopiedMethod(origin, iterablebase_klass); + ASSERT_NE(nullptr, copied); + code = copied->GetEntryPointFromQuickCompiledCodePtrSize(pointer_size); + // the copied method should have a pointer to interpreter + ASSERT_TRUE(class_linker_->IsQuickToInterpreterBridge(code)); +} + } // namespace art diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc index 3ddb3f4ea5..105db1d2d0 100644 --- a/compiler/oat_writer.cc +++ b/compiler/oat_writer.cc @@ -1034,18 +1034,63 @@ class OatWriter::InitMethodInfoVisitor : public OatDexMethodVisitor { class OatWriter::InitImageMethodVisitor : public OatDexMethodVisitor { public: - InitImageMethodVisitor(OatWriter* writer, size_t offset) + InitImageMethodVisitor(OatWriter* writer, + size_t offset, + const std::vector<const DexFile*>* dex_files) : OatDexMethodVisitor(writer, offset), - pointer_size_(GetInstructionSetPointerSize(writer_->compiler_driver_->GetInstructionSet())) { + pointer_size_(GetInstructionSetPointerSize(writer_->compiler_driver_->GetInstructionSet())), + dex_files_(dex_files), + class_linker_(Runtime::Current()->GetClassLinker()) { + } + + // Handle copied methods here. Copy pointer to quick code from + // an origin method to a copied method only if they are + // in the same oat file. If the origin and the copied methods are + // in different oat files don't touch the copied method. + // References to other oat files are not supported yet. + bool StartClass(const DexFile* dex_file, size_t class_def_index) + REQUIRES_SHARED(Locks::mutator_lock_) { + OatDexMethodVisitor::StartClass(dex_file, class_def_index); + // Skip classes that are not in the image. + if (!IsImageClass()) { + return true; + } + ScopedObjectAccessUnchecked soa(Thread::Current()); + StackHandleScope<1> hs(soa.Self()); + Handle<mirror::DexCache> dex_cache = hs.NewHandle( + class_linker_->FindDexCache(Thread::Current(), *dex_file)); + const DexFile::ClassDef& class_def = dex_file->GetClassDef(class_def_index); + mirror::Class* klass = dex_cache->GetResolvedType(class_def.class_idx_); + if (klass != nullptr) { + for (ArtMethod& method : klass->GetCopiedMethods(pointer_size_)) { + // Find origin method. Declaring class and dex_method_idx + // in the copied method should be the same as in the origin + // method. + mirror::Class* declaring_class = method.GetDeclaringClass(); + ArtMethod* origin = declaring_class->FindDeclaredVirtualMethod( + declaring_class->GetDexCache(), + method.GetDexMethodIndex(), + pointer_size_); + CHECK(origin != nullptr); + if (IsInOatFile(&declaring_class->GetDexFile())) { + const void* code_ptr = + origin->GetEntryPointFromQuickCompiledCodePtrSize(pointer_size_); + if (code_ptr == nullptr) { + methods_to_process_.push_back(std::make_pair(&method, origin)); + } else { + method.SetEntryPointFromQuickCompiledCodePtrSize( + code_ptr, pointer_size_); + } + } + } + } + return true; } bool VisitMethod(size_t class_def_method_index, const ClassDataItemIterator& it) REQUIRES_SHARED(Locks::mutator_lock_) { - const DexFile::TypeId& type_id = - dex_file_->GetTypeId(dex_file_->GetClassDef(class_def_index_).class_idx_); - const char* class_descriptor = dex_file_->GetTypeDescriptor(type_id); // Skip methods that are not in the image. - if (!writer_->GetCompilerDriver()->IsImageClass(class_descriptor)) { + if (!IsImageClass()) { return true; } @@ -1059,17 +1104,16 @@ class OatWriter::InitImageMethodVisitor : public OatDexMethodVisitor { ++method_offsets_index_; } - ClassLinker* linker = Runtime::Current()->GetClassLinker(); // Unchecked as we hold mutator_lock_ on entry. ScopedObjectAccessUnchecked soa(Thread::Current()); StackHandleScope<1> hs(soa.Self()); - Handle<mirror::DexCache> dex_cache(hs.NewHandle(linker->FindDexCache( + Handle<mirror::DexCache> dex_cache(hs.NewHandle(class_linker_->FindDexCache( Thread::Current(), *dex_file_))); ArtMethod* method; if (writer_->HasBootImage()) { const InvokeType invoke_type = it.GetMethodInvokeType( dex_file_->GetClassDef(class_def_index_)); - method = linker->ResolveMethod<ClassLinker::kNoICCECheckForCache>( + method = class_linker_->ResolveMethod<ClassLinker::kNoICCECheckForCache>( *dex_file_, it.GetMemberIndex(), dex_cache, @@ -1089,7 +1133,8 @@ class OatWriter::InitImageMethodVisitor : public OatDexMethodVisitor { // Should already have been resolved by the compiler, just peek into the dex cache. // It may not be resolved if the class failed to verify, in this case, don't set the // entrypoint. This is not fatal since the dex cache will contain a resolution method. - method = dex_cache->GetResolvedMethod(it.GetMemberIndex(), linker->GetImagePointerSize()); + method = dex_cache->GetResolvedMethod(it.GetMemberIndex(), + class_linker_->GetImagePointerSize()); } if (method != nullptr && compiled_method != nullptr && @@ -1101,8 +1146,38 @@ class OatWriter::InitImageMethodVisitor : public OatDexMethodVisitor { return true; } + // Check whether current class is image class + bool IsImageClass() { + const DexFile::TypeId& type_id = + dex_file_->GetTypeId(dex_file_->GetClassDef(class_def_index_).class_idx_); + const char* class_descriptor = dex_file_->GetTypeDescriptor(type_id); + return writer_->GetCompilerDriver()->IsImageClass(class_descriptor); + } + + // Check whether specified dex file is in the compiled oat file. + bool IsInOatFile(const DexFile* dex_file) { + return ContainsElement(*dex_files_, dex_file); + } + + // Assign a pointer to quick code for copied methods + // not handled in the method StartClass + void Postprocess() { + for (std::pair<ArtMethod*, ArtMethod*>& p : methods_to_process_) { + ArtMethod* method = p.first; + ArtMethod* origin = p.second; + const void* code_ptr = + origin->GetEntryPointFromQuickCompiledCodePtrSize(pointer_size_); + if (code_ptr != nullptr) { + method->SetEntryPointFromQuickCompiledCodePtrSize(code_ptr, pointer_size_); + } + } + } + protected: const PointerSize pointer_size_; + const std::vector<const DexFile*>* dex_files_; + ClassLinker* const class_linker_; + std::vector<std::pair<ArtMethod*, ArtMethod*>> methods_to_process_; }; class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor { @@ -1742,8 +1817,9 @@ size_t OatWriter::InitOatCodeDexFiles(size_t offset) { offset = code_visitor.GetOffset(); if (HasImage()) { - InitImageMethodVisitor image_visitor(this, offset); + InitImageMethodVisitor image_visitor(this, offset, dex_files_); success = VisitDexMethods(&image_visitor); + image_visitor.Postprocess(); DCHECK(success); offset = image_visitor.GetOffset(); } diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index e34f116b75..caea250ab6 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -1875,6 +1875,7 @@ static bool CanGenerateConditionalMove(const Location& out, const Location& src) Label* CodeGeneratorARM::GetFinalLabel(HInstruction* instruction, Label* final_label) { DCHECK(!instruction->IsControlFlow() && !instruction->IsSuspendCheck()); + DCHECK(!instruction->IsInvoke() || !instruction->GetLocations()->CanCall()); const HBasicBlock* const block = instruction->GetBlock(); const HLoopInformation* const info = block->GetLoopInformation(); @@ -2901,16 +2902,20 @@ void InstructionCodeGeneratorARM::HandleCondition(HCondition* cond) { // Convert the jumps into the result. Label done_label; + Label* final_label = codegen_->GetFinalLabel(cond, &done_label); // False case: result = 0. __ Bind(&false_label); __ LoadImmediate(out, 0); - __ b(&done_label); + __ b(final_label); // True case: result = 1. __ Bind(&true_label); __ LoadImmediate(out, 1); - __ Bind(&done_label); + + if (done_label.IsLinked()) { + __ Bind(&done_label); + } } void LocationsBuilderARM::VisitEqual(HEqual* comp) { @@ -4441,7 +4446,8 @@ void InstructionCodeGeneratorARM::HandleIntegerRotate(LocationSummary* locations // rotates by swapping input regs (effectively rotating by the first 32-bits of // a larger rotation) or flipping direction (thus treating larger right/left // rotations as sub-word sized rotations in the other direction) as appropriate. -void InstructionCodeGeneratorARM::HandleLongRotate(LocationSummary* locations) { +void InstructionCodeGeneratorARM::HandleLongRotate(HRor* ror) { + LocationSummary* locations = ror->GetLocations(); Register in_reg_lo = locations->InAt(0).AsRegisterPairLow<Register>(); Register in_reg_hi = locations->InAt(0).AsRegisterPairHigh<Register>(); Location rhs = locations->InAt(1); @@ -4474,6 +4480,7 @@ void InstructionCodeGeneratorARM::HandleLongRotate(LocationSummary* locations) { Register shift_left = locations->GetTemp(1).AsRegister<Register>(); Label end; Label shift_by_32_plus_shift_right; + Label* final_label = codegen_->GetFinalLabel(ror, &end); __ and_(shift_right, rhs.AsRegister<Register>(), ShifterOperand(0x1F)); __ Lsrs(shift_left, rhs.AsRegister<Register>(), 6); @@ -4488,7 +4495,7 @@ void InstructionCodeGeneratorARM::HandleLongRotate(LocationSummary* locations) { __ Lsl(out_reg_lo, in_reg_lo, shift_left); __ Lsr(shift_left, in_reg_hi, shift_right); __ add(out_reg_lo, out_reg_lo, ShifterOperand(shift_left)); - __ b(&end); + __ b(final_label); __ Bind(&shift_by_32_plus_shift_right); // Shift by 32+shift_right. // out_reg_hi = (reg_hi >> shift_right) | (reg_lo << shift_left). @@ -4500,7 +4507,9 @@ void InstructionCodeGeneratorARM::HandleLongRotate(LocationSummary* locations) { __ Lsl(shift_right, in_reg_hi, shift_left); __ add(out_reg_lo, out_reg_lo, ShifterOperand(shift_right)); - __ Bind(&end); + if (end.IsLinked()) { + __ Bind(&end); + } } } @@ -4540,7 +4549,7 @@ void InstructionCodeGeneratorARM::VisitRor(HRor* ror) { break; } case Primitive::kPrimLong: { - HandleLongRotate(locations); + HandleLongRotate(ror); break; } default: @@ -4919,6 +4928,7 @@ void InstructionCodeGeneratorARM::VisitCompare(HCompare* compare) { Location right = locations->InAt(1); Label less, greater, done; + Label* final_label = codegen_->GetFinalLabel(compare, &done); Primitive::Type type = compare->InputAt(0)->GetType(); Condition less_cond; switch (type) { @@ -4958,17 +4968,19 @@ void InstructionCodeGeneratorARM::VisitCompare(HCompare* compare) { UNREACHABLE(); } - __ b(&done, EQ); + __ b(final_label, EQ); __ b(&less, less_cond); __ Bind(&greater); __ LoadImmediate(out, 1); - __ b(&done); + __ b(final_label); __ Bind(&less); __ LoadImmediate(out, -1); - __ Bind(&done); + if (done.IsLinked()) { + __ Bind(&done); + } } void LocationsBuilderARM::VisitPhi(HPhi* instruction) { @@ -5746,6 +5758,7 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) { int32_t const_index = index.GetConstant()->AsIntConstant()->GetValue(); if (maybe_compressed_char_at) { Label uncompressed_load, done; + Label* final_label = codegen_->GetFinalLabel(instruction, &done); __ Lsrs(length, length, 1u); // LSRS has a 16-bit encoding, TST (immediate) does not. static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, "Expecting 0=compressed, 1=uncompressed"); @@ -5754,13 +5767,15 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) { out_loc.AsRegister<Register>(), obj, data_offset + const_index); - __ b(&done); + __ b(final_label); __ Bind(&uncompressed_load); __ LoadFromOffset(GetLoadOperandType(Primitive::kPrimChar), out_loc.AsRegister<Register>(), obj, data_offset + (const_index << 1)); - __ Bind(&done); + if (done.IsLinked()) { + __ Bind(&done); + } } else { uint32_t full_offset = data_offset + (const_index << Primitive::ComponentSizeShift(type)); @@ -5784,17 +5799,20 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) { } if (maybe_compressed_char_at) { Label uncompressed_load, done; + Label* final_label = codegen_->GetFinalLabel(instruction, &done); __ Lsrs(length, length, 1u); // LSRS has a 16-bit encoding, TST (immediate) does not. static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, "Expecting 0=compressed, 1=uncompressed"); __ b(&uncompressed_load, CS); __ ldrb(out_loc.AsRegister<Register>(), Address(temp, index.AsRegister<Register>(), Shift::LSL, 0)); - __ b(&done); + __ b(final_label); __ Bind(&uncompressed_load); __ ldrh(out_loc.AsRegister<Register>(), Address(temp, index.AsRegister<Register>(), Shift::LSL, 1)); - __ Bind(&done); + if (done.IsLinked()) { + __ Bind(&done); + } } else { codegen_->LoadFromShiftedRegOffset(type, out_loc, temp, index.AsRegister<Register>()); } @@ -6019,6 +6037,7 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) { uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); Label done; + Label* final_label = codegen_->GetFinalLabel(instruction, &done); SlowPathCodeARM* slow_path = nullptr; if (may_need_runtime_call_for_type_check) { @@ -6040,7 +6059,7 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) { index.AsRegister<Register>()); } codegen_->MaybeRecordImplicitNullCheck(instruction); - __ b(&done); + __ b(final_label); __ Bind(&non_zero); } @@ -7021,6 +7040,7 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) { uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); Label done, zero; + Label* final_label = codegen_->GetFinalLabel(instruction, &done); SlowPathCodeARM* slow_path = nullptr; // Return 0 if `obj` is null. @@ -7042,7 +7062,7 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) { // Classes must be equal for the instanceof to succeed. __ b(&zero, NE); __ LoadImmediate(out, 1); - __ b(&done); + __ b(final_label); break; } @@ -7065,12 +7085,12 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) { maybe_temp_loc, kCompilerReadBarrierOption); // If `out` is null, we use it for the result, and jump to `done`. - __ CompareAndBranchIfZero(out, &done); + __ CompareAndBranchIfZero(out, final_label); __ cmp(out, ShifterOperand(cls)); __ b(&loop, NE); __ LoadImmediate(out, 1); if (zero.IsLinked()) { - __ b(&done); + __ b(final_label); } break; } @@ -7096,11 +7116,11 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) { kCompilerReadBarrierOption); __ CompareAndBranchIfNonZero(out, &loop); // If `out` is null, we use it for the result, and jump to `done`. - __ b(&done); + __ b(final_label); __ Bind(&success); __ LoadImmediate(out, 1); if (zero.IsLinked()) { - __ b(&done); + __ b(final_label); } break; } @@ -7125,13 +7145,13 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) { maybe_temp_loc, kCompilerReadBarrierOption); // If `out` is null, we use it for the result, and jump to `done`. - __ CompareAndBranchIfZero(out, &done); + __ CompareAndBranchIfZero(out, final_label); __ LoadFromOffset(kLoadUnsignedHalfword, out, out, primitive_offset); static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); __ CompareAndBranchIfNonZero(out, &zero); __ Bind(&exact_check); __ LoadImmediate(out, 1); - __ b(&done); + __ b(final_label); break; } @@ -7152,7 +7172,7 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) { __ b(slow_path->GetEntryLabel(), NE); __ LoadImmediate(out, 1); if (zero.IsLinked()) { - __ b(&done); + __ b(final_label); } break; } @@ -7183,7 +7203,7 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) { codegen_->AddSlowPath(slow_path); __ b(slow_path->GetEntryLabel()); if (zero.IsLinked()) { - __ b(&done); + __ b(final_label); } break; } @@ -7269,9 +7289,10 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) { codegen_->AddSlowPath(type_check_slow_path); Label done; + Label* final_label = codegen_->GetFinalLabel(instruction, &done); // Avoid null check if we know obj is not null. if (instruction->MustDoNullCheck()) { - __ CompareAndBranchIfZero(obj, &done); + __ CompareAndBranchIfZero(obj, final_label); } switch (type_check_kind) { @@ -7335,7 +7356,7 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) { Label loop; __ Bind(&loop); __ cmp(temp, ShifterOperand(cls)); - __ b(&done, EQ); + __ b(final_label, EQ); // /* HeapReference<Class> */ temp = temp->super_class_ GenerateReferenceLoadOneRegister(instruction, @@ -7363,7 +7384,7 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) { // Do an exact check. __ cmp(temp, ShifterOperand(cls)); - __ b(&done, EQ); + __ b(final_label, EQ); // Otherwise, we need to check that the object's class is a non-primitive array. // /* HeapReference<Class> */ temp = temp->component_type_ @@ -7433,7 +7454,10 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) { break; } } - __ Bind(&done); + + if (done.IsLinked()) { + __ Bind(&done); + } __ Bind(type_check_slow_path->GetExitLabel()); } diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h index 5b15902ccd..59a7f7c048 100644 --- a/compiler/optimizing/code_generator_arm.h +++ b/compiler/optimizing/code_generator_arm.h @@ -237,7 +237,7 @@ class InstructionCodeGeneratorARM : public InstructionCodeGenerator { void HandleBitwiseOperation(HBinaryOperation* operation); void HandleCondition(HCondition* condition); void HandleIntegerRotate(LocationSummary* locations); - void HandleLongRotate(LocationSummary* locations); + void HandleLongRotate(HRor* ror); void HandleShift(HBinaryOperation* operation); void GenerateWideAtomicStore(Register addr, uint32_t offset, diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc index d75779cef6..2d2d8109a3 100644 --- a/compiler/optimizing/code_generator_arm_vixl.cc +++ b/compiler/optimizing/code_generator_arm_vixl.cc @@ -1950,6 +1950,7 @@ static bool CanGenerateConditionalMove(const Location& out, const Location& src) vixl32::Label* CodeGeneratorARMVIXL::GetFinalLabel(HInstruction* instruction, vixl32::Label* final_label) { DCHECK(!instruction->IsControlFlow() && !instruction->IsSuspendCheck()); + DCHECK(!instruction->IsInvoke() || !instruction->GetLocations()->CanCall()); const HBasicBlock* const block = instruction->GetBlock(); const HLoopInformation* const info = block->GetLoopInformation(); @@ -2925,16 +2926,20 @@ void InstructionCodeGeneratorARMVIXL::HandleCondition(HCondition* cond) { // Convert the jumps into the result. vixl32::Label done_label; + vixl32::Label* final_label = codegen_->GetFinalLabel(cond, &done_label); // False case: result = 0. __ Bind(&false_label); __ Mov(out, 0); - __ B(&done_label); + __ B(final_label); // True case: result = 1. __ Bind(&true_label); __ Mov(out, 1); - __ Bind(&done_label); + + if (done_label.IsReferenced()) { + __ Bind(&done_label); + } } void LocationsBuilderARMVIXL::VisitEqual(HEqual* comp) { @@ -4447,6 +4452,7 @@ void InstructionCodeGeneratorARMVIXL::HandleLongRotate(HRor* ror) { vixl32::Register shift_left = RegisterFrom(locations->GetTemp(1)); vixl32::Label end; vixl32::Label shift_by_32_plus_shift_right; + vixl32::Label* final_label = codegen_->GetFinalLabel(ror, &end); __ And(shift_right, RegisterFrom(rhs), 0x1F); __ Lsrs(shift_left, RegisterFrom(rhs), 6); @@ -4461,7 +4467,7 @@ void InstructionCodeGeneratorARMVIXL::HandleLongRotate(HRor* ror) { __ Lsl(out_reg_lo, in_reg_lo, shift_left); __ Lsr(shift_left, in_reg_hi, shift_right); __ Add(out_reg_lo, out_reg_lo, shift_left); - __ B(&end); + __ B(final_label); __ Bind(&shift_by_32_plus_shift_right); // Shift by 32+shift_right. // out_reg_hi = (reg_hi >> shift_right) | (reg_lo << shift_left). @@ -4473,7 +4479,9 @@ void InstructionCodeGeneratorARMVIXL::HandleLongRotate(HRor* ror) { __ Lsl(shift_right, in_reg_hi, shift_left); __ Add(out_reg_lo, out_reg_lo, shift_right); - __ Bind(&end); + if (end.IsReferenced()) { + __ Bind(&end); + } } } @@ -4906,6 +4914,7 @@ void InstructionCodeGeneratorARMVIXL::VisitCompare(HCompare* compare) { Location right = locations->InAt(1); vixl32::Label less, greater, done; + vixl32::Label* final_label = codegen_->GetFinalLabel(compare, &done); Primitive::Type type = compare->InputAt(0)->GetType(); vixl32::Condition less_cond = vixl32::Condition(kNone); switch (type) { @@ -4944,17 +4953,19 @@ void InstructionCodeGeneratorARMVIXL::VisitCompare(HCompare* compare) { UNREACHABLE(); } - __ B(eq, &done, /* far_target */ false); + __ B(eq, final_label, /* far_target */ false); __ B(less_cond, &less, /* far_target */ false); __ Bind(&greater); __ Mov(out, 1); - __ B(&done); + __ B(final_label); __ Bind(&less); __ Mov(out, -1); - __ Bind(&done); + if (done.IsReferenced()) { + __ Bind(&done); + } } void LocationsBuilderARMVIXL::VisitPhi(HPhi* instruction) { @@ -5746,6 +5757,7 @@ void InstructionCodeGeneratorARMVIXL::VisitArrayGet(HArrayGet* instruction) { int32_t const_index = Int32ConstantFrom(index); if (maybe_compressed_char_at) { vixl32::Label uncompressed_load, done; + vixl32::Label* final_label = codegen_->GetFinalLabel(instruction, &done); __ Lsrs(length, length, 1u); // LSRS has a 16-bit encoding, TST (immediate) does not. static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, "Expecting 0=compressed, 1=uncompressed"); @@ -5754,13 +5766,15 @@ void InstructionCodeGeneratorARMVIXL::VisitArrayGet(HArrayGet* instruction) { RegisterFrom(out_loc), obj, data_offset + const_index); - __ B(&done); + __ B(final_label); __ Bind(&uncompressed_load); GetAssembler()->LoadFromOffset(GetLoadOperandType(Primitive::kPrimChar), RegisterFrom(out_loc), obj, data_offset + (const_index << 1)); - __ Bind(&done); + if (done.IsReferenced()) { + __ Bind(&done); + } } else { uint32_t full_offset = data_offset + (const_index << Primitive::ComponentSizeShift(type)); @@ -5785,15 +5799,18 @@ void InstructionCodeGeneratorARMVIXL::VisitArrayGet(HArrayGet* instruction) { } if (maybe_compressed_char_at) { vixl32::Label uncompressed_load, done; + vixl32::Label* final_label = codegen_->GetFinalLabel(instruction, &done); __ Lsrs(length, length, 1u); // LSRS has a 16-bit encoding, TST (immediate) does not. static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, "Expecting 0=compressed, 1=uncompressed"); __ B(cs, &uncompressed_load, /* far_target */ false); __ Ldrb(RegisterFrom(out_loc), MemOperand(temp, RegisterFrom(index), vixl32::LSL, 0)); - __ B(&done); + __ B(final_label); __ Bind(&uncompressed_load); __ Ldrh(RegisterFrom(out_loc), MemOperand(temp, RegisterFrom(index), vixl32::LSL, 1)); - __ Bind(&done); + if (done.IsReferenced()) { + __ Bind(&done); + } } else { codegen_->LoadFromShiftedRegOffset(type, out_loc, temp, RegisterFrom(index)); } @@ -6032,6 +6049,7 @@ void InstructionCodeGeneratorARMVIXL::VisitArraySet(HArraySet* instruction) { uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); vixl32::Label done; + vixl32::Label* final_label = codegen_->GetFinalLabel(instruction, &done); SlowPathCodeARMVIXL* slow_path = nullptr; if (may_need_runtime_call_for_type_check) { @@ -6054,7 +6072,7 @@ void InstructionCodeGeneratorARMVIXL::VisitArraySet(HArraySet* instruction) { // TODO(VIXL): Use a scope to ensure we record the pc info immediately after the preceding // store instruction. codegen_->MaybeRecordImplicitNullCheck(instruction); - __ B(&done); + __ B(final_label); __ Bind(&non_zero); } @@ -7062,6 +7080,7 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); vixl32::Label done, zero; + vixl32::Label* final_label = codegen_->GetFinalLabel(instruction, &done); SlowPathCodeARMVIXL* slow_path = nullptr; // Return 0 if `obj` is null. @@ -7083,7 +7102,7 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) // Classes must be equal for the instanceof to succeed. __ B(ne, &zero, /* far_target */ false); __ Mov(out, 1); - __ B(&done); + __ B(final_label); break; } @@ -7106,12 +7125,12 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) maybe_temp_loc, kCompilerReadBarrierOption); // If `out` is null, we use it for the result, and jump to `done`. - __ CompareAndBranchIfZero(out, &done, /* far_target */ false); + __ CompareAndBranchIfZero(out, final_label, /* far_target */ false); __ Cmp(out, cls); __ B(ne, &loop, /* far_target */ false); __ Mov(out, 1); if (zero.IsReferenced()) { - __ B(&done); + __ B(final_label); } break; } @@ -7137,11 +7156,11 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) kCompilerReadBarrierOption); __ CompareAndBranchIfNonZero(out, &loop); // If `out` is null, we use it for the result, and jump to `done`. - __ B(&done); + __ B(final_label); __ Bind(&success); __ Mov(out, 1); if (zero.IsReferenced()) { - __ B(&done); + __ B(final_label); } break; } @@ -7166,13 +7185,13 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) maybe_temp_loc, kCompilerReadBarrierOption); // If `out` is null, we use it for the result, and jump to `done`. - __ CompareAndBranchIfZero(out, &done, /* far_target */ false); + __ CompareAndBranchIfZero(out, final_label, /* far_target */ false); GetAssembler()->LoadFromOffset(kLoadUnsignedHalfword, out, out, primitive_offset); static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); __ CompareAndBranchIfNonZero(out, &zero, /* far_target */ false); __ Bind(&exact_check); __ Mov(out, 1); - __ B(&done); + __ B(final_label); break; } @@ -7193,7 +7212,7 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) __ B(ne, slow_path->GetEntryLabel()); __ Mov(out, 1); if (zero.IsReferenced()) { - __ B(&done); + __ B(final_label); } break; } @@ -7224,7 +7243,7 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) codegen_->AddSlowPath(slow_path); __ B(slow_path->GetEntryLabel()); if (zero.IsReferenced()) { - __ B(&done); + __ B(final_label); } break; } @@ -7310,9 +7329,10 @@ void InstructionCodeGeneratorARMVIXL::VisitCheckCast(HCheckCast* instruction) { codegen_->AddSlowPath(type_check_slow_path); vixl32::Label done; + vixl32::Label* final_label = codegen_->GetFinalLabel(instruction, &done); // Avoid null check if we know obj is not null. if (instruction->MustDoNullCheck()) { - __ CompareAndBranchIfZero(obj, &done, /* far_target */ false); + __ CompareAndBranchIfZero(obj, final_label, /* far_target */ false); } switch (type_check_kind) { @@ -7376,7 +7396,7 @@ void InstructionCodeGeneratorARMVIXL::VisitCheckCast(HCheckCast* instruction) { vixl32::Label loop; __ Bind(&loop); __ Cmp(temp, cls); - __ B(eq, &done, /* far_target */ false); + __ B(eq, final_label, /* far_target */ false); // /* HeapReference<Class> */ temp = temp->super_class_ GenerateReferenceLoadOneRegister(instruction, @@ -7404,7 +7424,7 @@ void InstructionCodeGeneratorARMVIXL::VisitCheckCast(HCheckCast* instruction) { // Do an exact check. __ Cmp(temp, cls); - __ B(eq, &done, /* far_target */ false); + __ B(eq, final_label, /* far_target */ false); // Otherwise, we need to check that the object's class is a non-primitive array. // /* HeapReference<Class> */ temp = temp->component_type_ @@ -7472,7 +7492,9 @@ void InstructionCodeGeneratorARMVIXL::VisitCheckCast(HCheckCast* instruction) { break; } } - __ Bind(&done); + if (done.IsReferenced()) { + __ Bind(&done); + } __ Bind(type_check_slow_path->GetExitLabel()); } diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 0b50619a66..958c1a6fdb 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -183,10 +183,13 @@ class SuspendCheckSlowPathX86 : public SlowPathCode { : SlowPathCode(instruction), successor_(successor) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + LocationSummary* locations = instruction_->GetLocations(); CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen); __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, locations); // only saves full width XMM for SIMD x86_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this); CheckEntrypointTypes<kQuickTestSuspend, void, void>(); + RestoreLiveRegisters(codegen, locations); // only saves full width XMM for SIMD if (successor_ == nullptr) { __ jmp(GetReturnLabel()); } else { @@ -963,12 +966,20 @@ size_t CodeGeneratorX86::RestoreCoreRegister(size_t stack_index, uint32_t reg_id } size_t CodeGeneratorX86::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) { - __ movsd(Address(ESP, stack_index), XmmRegister(reg_id)); + if (GetGraph()->HasSIMD()) { + __ movupd(Address(ESP, stack_index), XmmRegister(reg_id)); + } else { + __ movsd(Address(ESP, stack_index), XmmRegister(reg_id)); + } return GetFloatingPointSpillSlotSize(); } size_t CodeGeneratorX86::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) { - __ movsd(XmmRegister(reg_id), Address(ESP, stack_index)); + if (GetGraph()->HasSIMD()) { + __ movupd(XmmRegister(reg_id), Address(ESP, stack_index)); + } else { + __ movsd(XmmRegister(reg_id), Address(ESP, stack_index)); + } return GetFloatingPointSpillSlotSize(); } @@ -5699,7 +5710,12 @@ void InstructionCodeGeneratorX86::VisitParallelMove(HParallelMove* instruction) void LocationsBuilderX86::VisitSuspendCheck(HSuspendCheck* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath); - locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. + // In suspend check slow path, usually there are no caller-save registers at all. + // If SIMD instructions are present, however, we force spilling all live SIMD + // registers in full width (since the runtime only saves/restores lower part). + locations->SetCustomSlowPathCallerSaves(GetGraph()->HasSIMD() + ? RegisterSet::AllFpu() + : RegisterSet::Empty()); } void InstructionCodeGeneratorX86::VisitSuspendCheck(HSuspendCheck* instruction) { diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index 65ee383b54..ca3a9eadd2 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -348,8 +348,9 @@ class CodeGeneratorX86 : public CodeGenerator { } size_t GetFloatingPointSpillSlotSize() const OVERRIDE { - // 8 bytes == 2 words for each spill. - return 2 * kX86WordSize; + return GetGraph()->HasSIMD() + ? 4 * kX86WordSize // 16 bytes == 4 words for each spill + : 2 * kX86WordSize; // 8 bytes == 2 words for each spill } HGraphVisitor* GetLocationBuilder() OVERRIDE { diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 08f1adfcff..c106d9b06e 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -140,10 +140,13 @@ class SuspendCheckSlowPathX86_64 : public SlowPathCode { : SlowPathCode(instruction), successor_(successor) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + LocationSummary* locations = instruction_->GetLocations(); CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, locations); // only saves full width XMM for SIMD x86_64_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this); CheckEntrypointTypes<kQuickTestSuspend, void, void>(); + RestoreLiveRegisters(codegen, locations); // only saves full width XMM for SIMD if (successor_ == nullptr) { __ jmp(GetReturnLabel()); } else { @@ -1158,13 +1161,21 @@ size_t CodeGeneratorX86_64::RestoreCoreRegister(size_t stack_index, uint32_t reg } size_t CodeGeneratorX86_64::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) { - __ movsd(Address(CpuRegister(RSP), stack_index), XmmRegister(reg_id)); - return kX86_64WordSize; + if (GetGraph()->HasSIMD()) { + __ movupd(Address(CpuRegister(RSP), stack_index), XmmRegister(reg_id)); + } else { + __ movsd(Address(CpuRegister(RSP), stack_index), XmmRegister(reg_id)); + } + return GetFloatingPointSpillSlotSize(); } size_t CodeGeneratorX86_64::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) { - __ movsd(XmmRegister(reg_id), Address(CpuRegister(RSP), stack_index)); - return kX86_64WordSize; + if (GetGraph()->HasSIMD()) { + __ movupd(XmmRegister(reg_id), Address(CpuRegister(RSP), stack_index)); + } else { + __ movsd(XmmRegister(reg_id), Address(CpuRegister(RSP), stack_index)); + } + return GetFloatingPointSpillSlotSize(); } void CodeGeneratorX86_64::InvokeRuntime(QuickEntrypointEnum entrypoint, @@ -5152,7 +5163,12 @@ void InstructionCodeGeneratorX86_64::VisitParallelMove(HParallelMove* instructio void LocationsBuilderX86_64::VisitSuspendCheck(HSuspendCheck* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath); - locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. + // In suspend check slow path, usually there are no caller-save registers at all. + // If SIMD instructions are present, however, we force spilling all live SIMD + // registers in full width (since the runtime only saves/restores lower part). + locations->SetCustomSlowPathCallerSaves(GetGraph()->HasSIMD() + ? RegisterSet::AllFpu() + : RegisterSet::Empty()); } void InstructionCodeGeneratorX86_64::VisitSuspendCheck(HSuspendCheck* instruction) { diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index 376c3ce381..c8336dabd9 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -326,7 +326,9 @@ class CodeGeneratorX86_64 : public CodeGenerator { } size_t GetFloatingPointSpillSlotSize() const OVERRIDE { - return kX86_64WordSize; + return GetGraph()->HasSIMD() + ? 2 * kX86_64WordSize // 16 bytes == 2 x86_64 words for each spill + : 1 * kX86_64WordSize; // 8 bytes == 1 x86_64 words for each spill } HGraphVisitor* GetLocationBuilder() OVERRIDE { diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc index 8a813bd54c..62f5114e59 100644 --- a/compiler/optimizing/inliner.cc +++ b/compiler/optimizing/inliner.cc @@ -55,6 +55,9 @@ static constexpr size_t kMaximumNumberOfCumulatedDexRegisters = 64; // Avoid inlining within a huge method due to memory pressure. static constexpr size_t kMaximumCodeUnitSize = 4096; +// Controls the use of inline caches in AOT mode. +static constexpr bool kUseAOTInlineCaches = false; + void HInliner::Run() { const CompilerOptions& compiler_options = compiler_driver_->GetCompilerOptions(); if ((compiler_options.GetInlineDepthLimit() == 0) @@ -376,6 +379,10 @@ bool HInliner::TryInlineFromInlineCache(const DexFile& caller_dex_file, HInvoke* invoke_instruction, ArtMethod* resolved_method) REQUIRES_SHARED(Locks::mutator_lock_) { + if (Runtime::Current()->IsAotCompiler() && !kUseAOTInlineCaches) { + return false; + } + StackHandleScope<1> hs(Thread::Current()); Handle<mirror::ObjectArray<mirror::Class>> inline_cache; InlineCacheType inline_cache_type = Runtime::Current()->IsAotCompiler() diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc index 28095c4d3f..1006a776f0 100644 --- a/compiler/optimizing/intrinsics_arm.cc +++ b/compiler/optimizing/intrinsics_arm.cc @@ -41,6 +41,54 @@ ArenaAllocator* IntrinsicCodeGeneratorARM::GetAllocator() { using IntrinsicSlowPathARM = IntrinsicSlowPath<InvokeDexCallingConventionVisitorARM>; +#define __ assembler-> + +// Compute base address for the System.arraycopy intrinsic in `base`. +static void GenSystemArrayCopyBaseAddress(ArmAssembler* assembler, + Primitive::Type type, + const Register& array, + const Location& pos, + const Register& base) { + // This routine is only used by the SystemArrayCopy intrinsic at the + // moment. We can allow Primitive::kPrimNot as `type` to implement + // the SystemArrayCopyChar intrinsic. + DCHECK_EQ(type, Primitive::kPrimNot); + const int32_t element_size = Primitive::ComponentSize(type); + const uint32_t element_size_shift = Primitive::ComponentSizeShift(type); + const uint32_t data_offset = mirror::Array::DataOffset(element_size).Uint32Value(); + + if (pos.IsConstant()) { + int32_t constant = pos.GetConstant()->AsIntConstant()->GetValue(); + __ AddConstant(base, array, element_size * constant + data_offset); + } else { + __ add(base, array, ShifterOperand(pos.AsRegister<Register>(), LSL, element_size_shift)); + __ AddConstant(base, data_offset); + } +} + +// Compute end address for the System.arraycopy intrinsic in `end`. +static void GenSystemArrayCopyEndAddress(ArmAssembler* assembler, + Primitive::Type type, + const Location& copy_length, + const Register& base, + const Register& end) { + // This routine is only used by the SystemArrayCopy intrinsic at the + // moment. We can allow Primitive::kPrimNot as `type` to implement + // the SystemArrayCopyChar intrinsic. + DCHECK_EQ(type, Primitive::kPrimNot); + const int32_t element_size = Primitive::ComponentSize(type); + const uint32_t element_size_shift = Primitive::ComponentSizeShift(type); + + if (copy_length.IsConstant()) { + int32_t constant = copy_length.GetConstant()->AsIntConstant()->GetValue(); + __ AddConstant(end, base, element_size * constant); + } else { + __ add(end, base, ShifterOperand(copy_length.AsRegister<Register>(), LSL, element_size_shift)); + } +} + +#undef __ + // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. #define __ down_cast<ArmAssembler*>(codegen->GetAssembler())-> // NOLINT @@ -55,6 +103,7 @@ class ReadBarrierSystemArrayCopySlowPathARM : public SlowPathCode { void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen); + ArmAssembler* assembler = arm_codegen->GetAssembler(); LocationSummary* locations = instruction_->GetLocations(); DCHECK(locations->CanCall()); DCHECK(instruction_->IsInvokeStaticOrDirect()) @@ -63,9 +112,8 @@ class ReadBarrierSystemArrayCopySlowPathARM : public SlowPathCode { DCHECK(instruction_->GetLocations()->Intrinsified()); DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy); - int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot); - uint32_t element_size_shift = Primitive::ComponentSizeShift(Primitive::kPrimNot); - uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value(); + Primitive::Type type = Primitive::kPrimNot; + const int32_t element_size = Primitive::ComponentSize(type); Register dest = locations->InAt(2).AsRegister<Register>(); Location dest_pos = locations->InAt(3); @@ -76,15 +124,7 @@ class ReadBarrierSystemArrayCopySlowPathARM : public SlowPathCode { __ Bind(GetEntryLabel()); // Compute the base destination address in `dst_curr_addr`. - if (dest_pos.IsConstant()) { - int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue(); - __ AddConstant(dst_curr_addr, dest, element_size * constant + offset); - } else { - __ add(dst_curr_addr, - dest, - ShifterOperand(dest_pos.AsRegister<Register>(), LSL, element_size_shift)); - __ AddConstant(dst_curr_addr, offset); - } + GenSystemArrayCopyBaseAddress(assembler, type, dest, dest_pos, dst_curr_addr); Label loop; __ Bind(&loop); @@ -108,6 +148,8 @@ class ReadBarrierSystemArrayCopySlowPathARM : public SlowPathCode { DCHECK_NE(src_stop_addr, IP); DCHECK_NE(tmp, IP); DCHECK(0 <= tmp && tmp < kNumberOfCoreRegisters) << tmp; + // TODO: Load the entrypoint once before the loop, instead of + // loading it at every iteration. int32_t entry_point_offset = CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(tmp); // This runtime call does not require a stack map. @@ -228,9 +270,11 @@ static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) { locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); } -static void GenNumberOfLeadingZeros(LocationSummary* locations, +static void GenNumberOfLeadingZeros(HInvoke* invoke, Primitive::Type type, - ArmAssembler* assembler) { + CodeGeneratorARM* codegen) { + ArmAssembler* assembler = codegen->GetAssembler(); + LocationSummary* locations = invoke->GetLocations(); Location in = locations->InAt(0); Register out = locations->Out().AsRegister<Register>(); @@ -240,11 +284,14 @@ static void GenNumberOfLeadingZeros(LocationSummary* locations, Register in_reg_lo = in.AsRegisterPairLow<Register>(); Register in_reg_hi = in.AsRegisterPairHigh<Register>(); Label end; + Label* final_label = codegen->GetFinalLabel(invoke, &end); __ clz(out, in_reg_hi); - __ CompareAndBranchIfNonZero(in_reg_hi, &end); + __ CompareAndBranchIfNonZero(in_reg_hi, final_label); __ clz(out, in_reg_lo); __ AddConstant(out, 32); - __ Bind(&end); + if (end.IsLinked()) { + __ Bind(&end); + } } else { __ clz(out, in.AsRegister<Register>()); } @@ -255,7 +302,7 @@ void IntrinsicLocationsBuilderARM::VisitIntegerNumberOfLeadingZeros(HInvoke* inv } void IntrinsicCodeGeneratorARM::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) { - GenNumberOfLeadingZeros(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler()); + GenNumberOfLeadingZeros(invoke, Primitive::kPrimInt, codegen_); } void IntrinsicLocationsBuilderARM::VisitLongNumberOfLeadingZeros(HInvoke* invoke) { @@ -267,27 +314,32 @@ void IntrinsicLocationsBuilderARM::VisitLongNumberOfLeadingZeros(HInvoke* invoke } void IntrinsicCodeGeneratorARM::VisitLongNumberOfLeadingZeros(HInvoke* invoke) { - GenNumberOfLeadingZeros(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler()); + GenNumberOfLeadingZeros(invoke, Primitive::kPrimLong, codegen_); } -static void GenNumberOfTrailingZeros(LocationSummary* locations, +static void GenNumberOfTrailingZeros(HInvoke* invoke, Primitive::Type type, - ArmAssembler* assembler) { + CodeGeneratorARM* codegen) { DCHECK((type == Primitive::kPrimInt) || (type == Primitive::kPrimLong)); + ArmAssembler* assembler = codegen->GetAssembler(); + LocationSummary* locations = invoke->GetLocations(); Register out = locations->Out().AsRegister<Register>(); if (type == Primitive::kPrimLong) { Register in_reg_lo = locations->InAt(0).AsRegisterPairLow<Register>(); Register in_reg_hi = locations->InAt(0).AsRegisterPairHigh<Register>(); Label end; + Label* final_label = codegen->GetFinalLabel(invoke, &end); __ rbit(out, in_reg_lo); __ clz(out, out); - __ CompareAndBranchIfNonZero(in_reg_lo, &end); + __ CompareAndBranchIfNonZero(in_reg_lo, final_label); __ rbit(out, in_reg_hi); __ clz(out, out); __ AddConstant(out, 32); - __ Bind(&end); + if (end.IsLinked()) { + __ Bind(&end); + } } else { Register in = locations->InAt(0).AsRegister<Register>(); __ rbit(out, in); @@ -304,7 +356,7 @@ void IntrinsicLocationsBuilderARM::VisitIntegerNumberOfTrailingZeros(HInvoke* in } void IntrinsicCodeGeneratorARM::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) { - GenNumberOfTrailingZeros(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler()); + GenNumberOfTrailingZeros(invoke, Primitive::kPrimInt, codegen_); } void IntrinsicLocationsBuilderARM::VisitLongNumberOfTrailingZeros(HInvoke* invoke) { @@ -316,7 +368,7 @@ void IntrinsicLocationsBuilderARM::VisitLongNumberOfTrailingZeros(HInvoke* invok } void IntrinsicCodeGeneratorARM::VisitLongNumberOfTrailingZeros(HInvoke* invoke) { - GenNumberOfTrailingZeros(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler()); + GenNumberOfTrailingZeros(invoke, Primitive::kPrimLong, codegen_); } static void MathAbsFP(LocationSummary* locations, bool is64bit, ArmAssembler* assembler) { @@ -1313,6 +1365,7 @@ void IntrinsicCodeGeneratorARM::VisitStringEquals(HInvoke* invoke) { Label end; Label return_true; Label return_false; + Label* final_label = codegen_->GetFinalLabel(invoke, &end); // Get offsets of count, value, and class fields within a string object. const uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); @@ -1386,12 +1439,15 @@ void IntrinsicCodeGeneratorARM::VisitStringEquals(HInvoke* invoke) { // If loop does not result in returning false, we return true. __ Bind(&return_true); __ LoadImmediate(out, 1); - __ b(&end); + __ b(final_label); // Return false and exit the function. __ Bind(&return_false); __ LoadImmediate(out, 0); - __ Bind(&end); + + if (end.IsLinked()) { + __ Bind(&end); + } } static void GenerateVisitStringIndexOf(HInvoke* invoke, @@ -1925,138 +1981,113 @@ void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) { __ CompareAndBranchIfNonZero(temp3, intrinsic_slow_path->GetEntryLabel()); } - int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot); - uint32_t element_size_shift = Primitive::ComponentSizeShift(Primitive::kPrimNot); - uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value(); - - // Compute the base source address in `temp1`. - if (src_pos.IsConstant()) { - int32_t constant = src_pos.GetConstant()->AsIntConstant()->GetValue(); - __ AddConstant(temp1, src, element_size * constant + offset); - } else { - __ add(temp1, src, ShifterOperand(src_pos.AsRegister<Register>(), LSL, element_size_shift)); - __ AddConstant(temp1, offset); - } - - // Compute the end source address in `temp3`. - if (length.IsConstant()) { - int32_t constant = length.GetConstant()->AsIntConstant()->GetValue(); - __ AddConstant(temp3, temp1, element_size * constant); + if (length.IsConstant() && length.GetConstant()->AsIntConstant()->GetValue() == 0) { + // Null constant length: not need to emit the loop code at all. } else { - __ add(temp3, temp1, ShifterOperand(length.AsRegister<Register>(), LSL, element_size_shift)); - } + Label done; + const Primitive::Type type = Primitive::kPrimNot; + const int32_t element_size = Primitive::ComponentSize(type); - if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { - // TODO: Also convert this intrinsic to the IsGcMarking strategy? - - // The base destination address is computed later, as `temp2` is - // used for intermediate computations. - - // SystemArrayCopy implementation for Baker read barriers (see - // also CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier): - // - // if (src_ptr != end_ptr) { - // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState(); - // lfence; // Load fence or artificial data dependency to prevent load-load reordering - // bool is_gray = (rb_state == ReadBarrier::GrayState()); - // if (is_gray) { - // // Slow-path copy. - // do { - // *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++))); - // } while (src_ptr != end_ptr) - // } else { - // // Fast-path copy. - // do { - // *dest_ptr++ = *src_ptr++; - // } while (src_ptr != end_ptr) - // } - // } - - Label loop, done; - - // Don't enter copy loop if `length == 0`. - __ cmp(temp1, ShifterOperand(temp3)); - __ b(&done, EQ); - - // /* int32_t */ monitor = src->monitor_ - __ LoadFromOffset(kLoadWord, temp2, src, monitor_offset); - // /* LockWord */ lock_word = LockWord(monitor) - static_assert(sizeof(LockWord) == sizeof(int32_t), - "art::LockWord and int32_t have different sizes."); - - // Introduce a dependency on the lock_word including the rb_state, - // which shall prevent load-load reordering without using - // a memory barrier (which would be more expensive). - // `src` is unchanged by this operation, but its value now depends - // on `temp2`. - __ add(src, src, ShifterOperand(temp2, LSR, 32)); - - // Slow path used to copy array when `src` is gray. - SlowPathCode* read_barrier_slow_path = - new (GetAllocator()) ReadBarrierSystemArrayCopySlowPathARM(invoke); - codegen_->AddSlowPath(read_barrier_slow_path); - - // Given the numeric representation, it's enough to check the low bit of the - // rb_state. We do that by shifting the bit out of the lock word with LSRS - // which can be a 16-bit instruction unlike the TST immediate. - static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0"); - static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); - __ Lsrs(temp2, temp2, LockWord::kReadBarrierStateShift + 1); - // Carry flag is the last bit shifted out by LSRS. - __ b(read_barrier_slow_path->GetEntryLabel(), CS); - - // Fast-path copy. - - // Compute the base destination address in `temp2`. - if (dest_pos.IsConstant()) { - int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue(); - __ AddConstant(temp2, dest, element_size * constant + offset); - } else { - __ add(temp2, dest, ShifterOperand(dest_pos.AsRegister<Register>(), LSL, element_size_shift)); - __ AddConstant(temp2, offset); + if (length.IsRegister()) { + // Don't enter the copy loop if the length is null. + __ CompareAndBranchIfZero(length.AsRegister<Register>(), &done); } - // Iterate over the arrays and do a raw copy of the objects. We don't need to - // poison/unpoison. - __ Bind(&loop); - __ ldr(IP, Address(temp1, element_size, Address::PostIndex)); - __ str(IP, Address(temp2, element_size, Address::PostIndex)); - __ cmp(temp1, ShifterOperand(temp3)); - __ b(&loop, NE); - - __ Bind(read_barrier_slow_path->GetExitLabel()); - __ Bind(&done); - } else { - // Non read barrier code. - - // Compute the base destination address in `temp2`. - if (dest_pos.IsConstant()) { - int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue(); - __ AddConstant(temp2, dest, element_size * constant + offset); + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // TODO: Also convert this intrinsic to the IsGcMarking strategy? + + // SystemArrayCopy implementation for Baker read barriers (see + // also CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier): + // + // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState(); + // lfence; // Load fence or artificial data dependency to prevent load-load reordering + // bool is_gray = (rb_state == ReadBarrier::GrayState()); + // if (is_gray) { + // // Slow-path copy. + // do { + // *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++))); + // } while (src_ptr != end_ptr) + // } else { + // // Fast-path copy. + // do { + // *dest_ptr++ = *src_ptr++; + // } while (src_ptr != end_ptr) + // } + + // /* int32_t */ monitor = src->monitor_ + __ LoadFromOffset(kLoadWord, temp2, src, monitor_offset); + // /* LockWord */ lock_word = LockWord(monitor) + static_assert(sizeof(LockWord) == sizeof(int32_t), + "art::LockWord and int32_t have different sizes."); + + // Introduce a dependency on the lock_word including the rb_state, + // which shall prevent load-load reordering without using + // a memory barrier (which would be more expensive). + // `src` is unchanged by this operation, but its value now depends + // on `temp2`. + __ add(src, src, ShifterOperand(temp2, LSR, 32)); + + // Compute the base source address in `temp1`. + // Note that `temp1` (the base source address) is computed from + // `src` (and `src_pos`) here, and thus honors the artificial + // dependency of `src` on `temp2`. + GenSystemArrayCopyBaseAddress(GetAssembler(), type, src, src_pos, temp1); + // Compute the end source address in `temp3`. + GenSystemArrayCopyEndAddress(GetAssembler(), type, length, temp1, temp3); + // The base destination address is computed later, as `temp2` is + // used for intermediate computations. + + // Slow path used to copy array when `src` is gray. + // Note that the base destination address is computed in `temp2` + // by the slow path code. + SlowPathCode* read_barrier_slow_path = + new (GetAllocator()) ReadBarrierSystemArrayCopySlowPathARM(invoke); + codegen_->AddSlowPath(read_barrier_slow_path); + + // Given the numeric representation, it's enough to check the low bit of the + // rb_state. We do that by shifting the bit out of the lock word with LSRS + // which can be a 16-bit instruction unlike the TST immediate. + static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0"); + static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); + __ Lsrs(temp2, temp2, LockWord::kReadBarrierStateShift + 1); + // Carry flag is the last bit shifted out by LSRS. + __ b(read_barrier_slow_path->GetEntryLabel(), CS); + + // Fast-path copy. + // Compute the base destination address in `temp2`. + GenSystemArrayCopyBaseAddress(GetAssembler(), type, dest, dest_pos, temp2); + // Iterate over the arrays and do a raw copy of the objects. We don't need to + // poison/unpoison. + Label loop; + __ Bind(&loop); + __ ldr(IP, Address(temp1, element_size, Address::PostIndex)); + __ str(IP, Address(temp2, element_size, Address::PostIndex)); + __ cmp(temp1, ShifterOperand(temp3)); + __ b(&loop, NE); + + __ Bind(read_barrier_slow_path->GetExitLabel()); } else { - __ add(temp2, dest, ShifterOperand(dest_pos.AsRegister<Register>(), LSL, element_size_shift)); - __ AddConstant(temp2, offset); + // Non read barrier code. + // Compute the base source address in `temp1`. + GenSystemArrayCopyBaseAddress(GetAssembler(), type, src, src_pos, temp1); + // Compute the base destination address in `temp2`. + GenSystemArrayCopyBaseAddress(GetAssembler(), type, dest, dest_pos, temp2); + // Compute the end source address in `temp3`. + GenSystemArrayCopyEndAddress(GetAssembler(), type, length, temp1, temp3); + // Iterate over the arrays and do a raw copy of the objects. We don't need to + // poison/unpoison. + Label loop; + __ Bind(&loop); + __ ldr(IP, Address(temp1, element_size, Address::PostIndex)); + __ str(IP, Address(temp2, element_size, Address::PostIndex)); + __ cmp(temp1, ShifterOperand(temp3)); + __ b(&loop, NE); } - - // Iterate over the arrays and do a raw copy of the objects. We don't need to - // poison/unpoison. - Label loop, done; - __ cmp(temp1, ShifterOperand(temp3)); - __ b(&done, EQ); - __ Bind(&loop); - __ ldr(IP, Address(temp1, element_size, Address::PostIndex)); - __ str(IP, Address(temp2, element_size, Address::PostIndex)); - __ cmp(temp1, ShifterOperand(temp3)); - __ b(&loop, NE); __ Bind(&done); } // We only need one card marking on the destination array. - codegen_->MarkGCCard(temp1, - temp2, - dest, - Register(kNoRegister), - /* value_can_be_null */ false); + codegen_->MarkGCCard(temp1, temp2, dest, Register(kNoRegister), /* value_can_be_null */ false); __ Bind(intrinsic_slow_path->GetExitLabel()); } @@ -2474,13 +2505,14 @@ void IntrinsicCodeGeneratorARM::VisitStringGetCharsNoCheck(HInvoke* invoke) { Register dst_ptr = locations->GetTemp(2).AsRegister<Register>(); Label done, compressed_string_loop; + Label* final_label = codegen_->GetFinalLabel(invoke, &done); // dst to be copied. __ add(dst_ptr, dstObj, ShifterOperand(data_offset)); __ add(dst_ptr, dst_ptr, ShifterOperand(dstBegin, LSL, 1)); __ subs(num_chr, srcEnd, ShifterOperand(srcBegin)); // Early out for valid zero-length retrievals. - __ b(&done, EQ); + __ b(final_label, EQ); // src range to copy. __ add(src_ptr, srcObj, ShifterOperand(value_offset)); @@ -2517,7 +2549,7 @@ void IntrinsicCodeGeneratorARM::VisitStringGetCharsNoCheck(HInvoke* invoke) { __ b(&loop, GE); __ adds(num_chr, num_chr, ShifterOperand(4)); - __ b(&done, EQ); + __ b(final_label, EQ); // Main loop for < 4 character case and remainder handling. Loads and stores one // 16-bit Java character at a time. @@ -2528,7 +2560,7 @@ void IntrinsicCodeGeneratorARM::VisitStringGetCharsNoCheck(HInvoke* invoke) { __ b(&remainder, GT); if (mirror::kUseStringCompression) { - __ b(&done); + __ b(final_label); const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte); DCHECK_EQ(c_char_size, 1u); @@ -2542,7 +2574,9 @@ void IntrinsicCodeGeneratorARM::VisitStringGetCharsNoCheck(HInvoke* invoke) { __ b(&compressed_string_loop, GT); } - __ Bind(&done); + if (done.IsLinked()) { + __ Bind(&done); + } } void IntrinsicLocationsBuilderARM::VisitFloatIsInfinite(HInvoke* invoke) { diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index 807d6cf54f..423fd3c6ae 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -198,6 +198,8 @@ class ReadBarrierSystemArrayCopySlowPathARM64 : public SlowPathCodeARM64 { DCHECK_NE(LocationFrom(src_stop_addr).reg(), IP0); DCHECK_NE(tmp_.reg(), IP0); DCHECK(0 <= tmp_.reg() && tmp_.reg() < kNumberOfWRegisters) << tmp_.reg(); + // TODO: Load the entrypoint once before the loop, instead of + // loading it at every iteration. int32_t entry_point_offset = CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(tmp_.reg()); // This runtime call does not require a stack map. @@ -2191,8 +2193,9 @@ static void CheckSystemArrayCopyPosition(MacroAssembler* masm, } } -// Compute base source address, base destination address, and end source address -// for System.arraycopy* intrinsics. +// Compute base source address, base destination address, and end +// source address for System.arraycopy* intrinsics in `src_base`, +// `dst_base` and `src_end` respectively. static void GenSystemArrayCopyAddresses(MacroAssembler* masm, Primitive::Type type, const Register& src, @@ -2203,12 +2206,13 @@ static void GenSystemArrayCopyAddresses(MacroAssembler* masm, const Register& src_base, const Register& dst_base, const Register& src_end) { + // This routine is used by the SystemArrayCopy and the SystemArrayCopyChar intrinsics. DCHECK(type == Primitive::kPrimNot || type == Primitive::kPrimChar) << "Unexpected element type: " << type; const int32_t element_size = Primitive::ComponentSize(type); const int32_t element_size_shift = Primitive::ComponentSizeShift(type); + const uint32_t data_offset = mirror::Array::DataOffset(element_size).Uint32Value(); - uint32_t data_offset = mirror::Array::DataOffset(element_size).Uint32Value(); if (src_pos.IsConstant()) { int32_t constant = src_pos.GetConstant()->AsIntConstant()->GetValue(); __ Add(src_base, src, element_size * constant + data_offset); @@ -2712,111 +2716,131 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) { __ Cbnz(temp2, intrinsic_slow_path->GetEntryLabel()); } - Register src_curr_addr = temp1.X(); - Register dst_curr_addr = temp2.X(); - Register src_stop_addr = temp3.X(); - - GenSystemArrayCopyAddresses(masm, - Primitive::kPrimNot, - src, - src_pos, - dest, - dest_pos, - length, - src_curr_addr, - dst_curr_addr, - src_stop_addr); - - const int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot); - - if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { - // TODO: Also convert this intrinsic to the IsGcMarking strategy? - - // SystemArrayCopy implementation for Baker read barriers (see - // also CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier): - // - // if (src_ptr != end_ptr) { - // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState(); - // lfence; // Load fence or artificial data dependency to prevent load-load reordering - // bool is_gray = (rb_state == ReadBarrier::GrayState()); - // if (is_gray) { - // // Slow-path copy. - // do { - // *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++))); - // } while (src_ptr != end_ptr) - // } else { - // // Fast-path copy. - // do { - // *dest_ptr++ = *src_ptr++; - // } while (src_ptr != end_ptr) - // } - // } - - vixl::aarch64::Label loop, done; - - // Don't enter copy loop if `length == 0`. - __ Cmp(src_curr_addr, src_stop_addr); - __ B(&done, eq); - - // Make sure `tmp` is not IP0, as it is clobbered by - // ReadBarrierMarkRegX entry points in - // ReadBarrierSystemArrayCopySlowPathARM64. - temps.Exclude(ip0); - Register tmp = temps.AcquireW(); - DCHECK_NE(LocationFrom(tmp).reg(), IP0); - - // /* int32_t */ monitor = src->monitor_ - __ Ldr(tmp, HeapOperand(src.W(), monitor_offset)); - // /* LockWord */ lock_word = LockWord(monitor) - static_assert(sizeof(LockWord) == sizeof(int32_t), - "art::LockWord and int32_t have different sizes."); - - // Introduce a dependency on the lock_word including rb_state, - // to prevent load-load reordering, and without using - // a memory barrier (which would be more expensive). - // `src` is unchanged by this operation, but its value now depends - // on `tmp`. - __ Add(src.X(), src.X(), Operand(tmp.X(), LSR, 32)); - - // Slow path used to copy array when `src` is gray. - SlowPathCodeARM64* read_barrier_slow_path = - new (GetAllocator()) ReadBarrierSystemArrayCopySlowPathARM64(invoke, LocationFrom(tmp)); - codegen_->AddSlowPath(read_barrier_slow_path); - - // Given the numeric representation, it's enough to check the low bit of the rb_state. - static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0"); - static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); - __ Tbnz(tmp, LockWord::kReadBarrierStateShift, read_barrier_slow_path->GetEntryLabel()); - - // Fast-path copy. - // Iterate over the arrays and do a raw copy of the objects. We don't need to - // poison/unpoison. - __ Bind(&loop); - __ Ldr(tmp, MemOperand(src_curr_addr, element_size, PostIndex)); - __ Str(tmp, MemOperand(dst_curr_addr, element_size, PostIndex)); - __ Cmp(src_curr_addr, src_stop_addr); - __ B(&loop, ne); - - __ Bind(read_barrier_slow_path->GetExitLabel()); - __ Bind(&done); + if (length.IsConstant() && length.GetConstant()->AsIntConstant()->GetValue() == 0) { + // Null constant length: not need to emit the loop code at all. } else { - // Non read barrier code. - - // Iterate over the arrays and do a raw copy of the objects. We don't need to - // poison/unpoison. - vixl::aarch64::Label loop, done; - __ Bind(&loop); - __ Cmp(src_curr_addr, src_stop_addr); - __ B(&done, eq); - { + Register src_curr_addr = temp1.X(); + Register dst_curr_addr = temp2.X(); + Register src_stop_addr = temp3.X(); + vixl::aarch64::Label done; + const Primitive::Type type = Primitive::kPrimNot; + const int32_t element_size = Primitive::ComponentSize(type); + + if (length.IsRegister()) { + // Don't enter the copy loop if the length is null. + __ Cbz(WRegisterFrom(length), &done); + } + + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // TODO: Also convert this intrinsic to the IsGcMarking strategy? + + // SystemArrayCopy implementation for Baker read barriers (see + // also CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier): + // + // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState(); + // lfence; // Load fence or artificial data dependency to prevent load-load reordering + // bool is_gray = (rb_state == ReadBarrier::GrayState()); + // if (is_gray) { + // // Slow-path copy. + // do { + // *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++))); + // } while (src_ptr != end_ptr) + // } else { + // // Fast-path copy. + // do { + // *dest_ptr++ = *src_ptr++; + // } while (src_ptr != end_ptr) + // } + + // Make sure `tmp` is not IP0, as it is clobbered by + // ReadBarrierMarkRegX entry points in + // ReadBarrierSystemArrayCopySlowPathARM64. + temps.Exclude(ip0); Register tmp = temps.AcquireW(); + DCHECK_NE(LocationFrom(tmp).reg(), IP0); + + // /* int32_t */ monitor = src->monitor_ + __ Ldr(tmp, HeapOperand(src.W(), monitor_offset)); + // /* LockWord */ lock_word = LockWord(monitor) + static_assert(sizeof(LockWord) == sizeof(int32_t), + "art::LockWord and int32_t have different sizes."); + + // Introduce a dependency on the lock_word including rb_state, + // to prevent load-load reordering, and without using + // a memory barrier (which would be more expensive). + // `src` is unchanged by this operation, but its value now depends + // on `tmp`. + __ Add(src.X(), src.X(), Operand(tmp.X(), LSR, 32)); + + // Compute base source address, base destination address, and end + // source address for System.arraycopy* intrinsics in `src_base`, + // `dst_base` and `src_end` respectively. + // Note that `src_curr_addr` is computed from from `src` (and + // `src_pos`) here, and thus honors the artificial dependency + // of `src` on `tmp`. + GenSystemArrayCopyAddresses(masm, + type, + src, + src_pos, + dest, + dest_pos, + length, + src_curr_addr, + dst_curr_addr, + src_stop_addr); + + // Slow path used to copy array when `src` is gray. + SlowPathCodeARM64* read_barrier_slow_path = + new (GetAllocator()) ReadBarrierSystemArrayCopySlowPathARM64(invoke, LocationFrom(tmp)); + codegen_->AddSlowPath(read_barrier_slow_path); + + // Given the numeric representation, it's enough to check the low bit of the rb_state. + static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0"); + static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); + __ Tbnz(tmp, LockWord::kReadBarrierStateShift, read_barrier_slow_path->GetEntryLabel()); + + // Fast-path copy. + // Iterate over the arrays and do a raw copy of the objects. We don't need to + // poison/unpoison. + vixl::aarch64::Label loop; + __ Bind(&loop); __ Ldr(tmp, MemOperand(src_curr_addr, element_size, PostIndex)); __ Str(tmp, MemOperand(dst_curr_addr, element_size, PostIndex)); + __ Cmp(src_curr_addr, src_stop_addr); + __ B(&loop, ne); + + __ Bind(read_barrier_slow_path->GetExitLabel()); + } else { + // Non read barrier code. + // Compute base source address, base destination address, and end + // source address for System.arraycopy* intrinsics in `src_base`, + // `dst_base` and `src_end` respectively. + GenSystemArrayCopyAddresses(masm, + type, + src, + src_pos, + dest, + dest_pos, + length, + src_curr_addr, + dst_curr_addr, + src_stop_addr); + // Iterate over the arrays and do a raw copy of the objects. We don't need to + // poison/unpoison. + vixl::aarch64::Label loop; + __ Bind(&loop); + { + Register tmp = temps.AcquireW(); + __ Ldr(tmp, MemOperand(src_curr_addr, element_size, PostIndex)); + __ Str(tmp, MemOperand(dst_curr_addr, element_size, PostIndex)); + } + __ Cmp(src_curr_addr, src_stop_addr); + __ B(&loop, ne); } - __ B(&loop); __ Bind(&done); } } + // We only need one card marking on the destination array. codegen_->MarkGCCard(dest.W(), Register(), /* value_can_be_null */ false); diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc index 60bcf2cfd5..b25bad7170 100644 --- a/compiler/optimizing/intrinsics_arm_vixl.cc +++ b/compiler/optimizing/intrinsics_arm_vixl.cc @@ -117,6 +117,50 @@ class IntrinsicSlowPathARMVIXL : public SlowPathCodeARMVIXL { DISALLOW_COPY_AND_ASSIGN(IntrinsicSlowPathARMVIXL); }; +// Compute base address for the System.arraycopy intrinsic in `base`. +static void GenSystemArrayCopyBaseAddress(ArmVIXLAssembler* assembler, + Primitive::Type type, + const vixl32::Register& array, + const Location& pos, + const vixl32::Register& base) { + // This routine is only used by the SystemArrayCopy intrinsic at the + // moment. We can allow Primitive::kPrimNot as `type` to implement + // the SystemArrayCopyChar intrinsic. + DCHECK_EQ(type, Primitive::kPrimNot); + const int32_t element_size = Primitive::ComponentSize(type); + const uint32_t element_size_shift = Primitive::ComponentSizeShift(type); + const uint32_t data_offset = mirror::Array::DataOffset(element_size).Uint32Value(); + + if (pos.IsConstant()) { + int32_t constant = Int32ConstantFrom(pos); + __ Add(base, array, element_size * constant + data_offset); + } else { + __ Add(base, array, Operand(RegisterFrom(pos), vixl32::LSL, element_size_shift)); + __ Add(base, base, data_offset); + } +} + +// Compute end address for the System.arraycopy intrinsic in `end`. +static void GenSystemArrayCopyEndAddress(ArmVIXLAssembler* assembler, + Primitive::Type type, + const Location& copy_length, + const vixl32::Register& base, + const vixl32::Register& end) { + // This routine is only used by the SystemArrayCopy intrinsic at the + // moment. We can allow Primitive::kPrimNot as `type` to implement + // the SystemArrayCopyChar intrinsic. + DCHECK_EQ(type, Primitive::kPrimNot); + const int32_t element_size = Primitive::ComponentSize(type); + const uint32_t element_size_shift = Primitive::ComponentSizeShift(type); + + if (copy_length.IsConstant()) { + int32_t constant = Int32ConstantFrom(copy_length); + __ Add(end, base, element_size * constant); + } else { + __ Add(end, base, Operand(RegisterFrom(copy_length), vixl32::LSL, element_size_shift)); + } +} + // Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers. class ReadBarrierSystemArrayCopySlowPathARMVIXL : public SlowPathCodeARMVIXL { public: @@ -137,9 +181,8 @@ class ReadBarrierSystemArrayCopySlowPathARMVIXL : public SlowPathCodeARMVIXL { DCHECK(instruction_->GetLocations()->Intrinsified()); DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy); - int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot); - uint32_t element_size_shift = Primitive::ComponentSizeShift(Primitive::kPrimNot); - uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value(); + Primitive::Type type = Primitive::kPrimNot; + const int32_t element_size = Primitive::ComponentSize(type); vixl32::Register dest = InputRegisterAt(instruction_, 2); Location dest_pos = locations->InAt(3); @@ -150,15 +193,7 @@ class ReadBarrierSystemArrayCopySlowPathARMVIXL : public SlowPathCodeARMVIXL { __ Bind(GetEntryLabel()); // Compute the base destination address in `dst_curr_addr`. - if (dest_pos.IsConstant()) { - int32_t constant = Int32ConstantFrom(dest_pos); - __ Add(dst_curr_addr, dest, element_size * constant + offset); - } else { - __ Add(dst_curr_addr, - dest, - Operand(RegisterFrom(dest_pos), vixl32::LSL, element_size_shift)); - __ Add(dst_curr_addr, dst_curr_addr, offset); - } + GenSystemArrayCopyBaseAddress(assembler, type, dest, dest_pos, dst_curr_addr); vixl32::Label loop; __ Bind(&loop); @@ -182,6 +217,8 @@ class ReadBarrierSystemArrayCopySlowPathARMVIXL : public SlowPathCodeARMVIXL { DCHECK(!src_stop_addr.Is(ip)); DCHECK(!tmp.Is(ip)); DCHECK(tmp.IsRegister()) << tmp; + // TODO: Load the entrypoint once before the loop, instead of + // loading it at every iteration. int32_t entry_point_offset = CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(tmp.GetCode()); // This runtime call does not require a stack map. @@ -296,9 +333,11 @@ static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) { locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); } -static void GenNumberOfLeadingZeros(LocationSummary* locations, +static void GenNumberOfLeadingZeros(HInvoke* invoke, Primitive::Type type, - ArmVIXLAssembler* assembler) { + CodeGeneratorARMVIXL* codegen) { + ArmVIXLAssembler* assembler = codegen->GetAssembler(); + LocationSummary* locations = invoke->GetLocations(); Location in = locations->InAt(0); vixl32::Register out = RegisterFrom(locations->Out()); @@ -308,11 +347,14 @@ static void GenNumberOfLeadingZeros(LocationSummary* locations, vixl32::Register in_reg_lo = LowRegisterFrom(in); vixl32::Register in_reg_hi = HighRegisterFrom(in); vixl32::Label end; + vixl32::Label* final_label = codegen->GetFinalLabel(invoke, &end); __ Clz(out, in_reg_hi); - __ CompareAndBranchIfNonZero(in_reg_hi, &end, /* far_target */ false); + __ CompareAndBranchIfNonZero(in_reg_hi, final_label, /* far_target */ false); __ Clz(out, in_reg_lo); __ Add(out, out, 32); - __ Bind(&end); + if (end.IsReferenced()) { + __ Bind(&end); + } } else { __ Clz(out, RegisterFrom(in)); } @@ -323,7 +365,7 @@ void IntrinsicLocationsBuilderARMVIXL::VisitIntegerNumberOfLeadingZeros(HInvoke* } void IntrinsicCodeGeneratorARMVIXL::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) { - GenNumberOfLeadingZeros(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler()); + GenNumberOfLeadingZeros(invoke, Primitive::kPrimInt, codegen_); } void IntrinsicLocationsBuilderARMVIXL::VisitLongNumberOfLeadingZeros(HInvoke* invoke) { @@ -335,27 +377,32 @@ void IntrinsicLocationsBuilderARMVIXL::VisitLongNumberOfLeadingZeros(HInvoke* in } void IntrinsicCodeGeneratorARMVIXL::VisitLongNumberOfLeadingZeros(HInvoke* invoke) { - GenNumberOfLeadingZeros(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler()); + GenNumberOfLeadingZeros(invoke, Primitive::kPrimLong, codegen_); } -static void GenNumberOfTrailingZeros(LocationSummary* locations, +static void GenNumberOfTrailingZeros(HInvoke* invoke, Primitive::Type type, - ArmVIXLAssembler* assembler) { + CodeGeneratorARMVIXL* codegen) { DCHECK((type == Primitive::kPrimInt) || (type == Primitive::kPrimLong)); + ArmVIXLAssembler* assembler = codegen->GetAssembler(); + LocationSummary* locations = invoke->GetLocations(); vixl32::Register out = RegisterFrom(locations->Out()); if (type == Primitive::kPrimLong) { vixl32::Register in_reg_lo = LowRegisterFrom(locations->InAt(0)); vixl32::Register in_reg_hi = HighRegisterFrom(locations->InAt(0)); vixl32::Label end; + vixl32::Label* final_label = codegen->GetFinalLabel(invoke, &end); __ Rbit(out, in_reg_lo); __ Clz(out, out); - __ CompareAndBranchIfNonZero(in_reg_lo, &end, /* far_target */ false); + __ CompareAndBranchIfNonZero(in_reg_lo, final_label, /* far_target */ false); __ Rbit(out, in_reg_hi); __ Clz(out, out); __ Add(out, out, 32); - __ Bind(&end); + if (end.IsReferenced()) { + __ Bind(&end); + } } else { vixl32::Register in = RegisterFrom(locations->InAt(0)); __ Rbit(out, in); @@ -372,7 +419,7 @@ void IntrinsicLocationsBuilderARMVIXL::VisitIntegerNumberOfTrailingZeros(HInvoke } void IntrinsicCodeGeneratorARMVIXL::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) { - GenNumberOfTrailingZeros(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler()); + GenNumberOfTrailingZeros(invoke, Primitive::kPrimInt, codegen_); } void IntrinsicLocationsBuilderARMVIXL::VisitLongNumberOfTrailingZeros(HInvoke* invoke) { @@ -384,7 +431,7 @@ void IntrinsicLocationsBuilderARMVIXL::VisitLongNumberOfTrailingZeros(HInvoke* i } void IntrinsicCodeGeneratorARMVIXL::VisitLongNumberOfTrailingZeros(HInvoke* invoke) { - GenNumberOfTrailingZeros(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler()); + GenNumberOfTrailingZeros(invoke, Primitive::kPrimLong, codegen_); } static void MathAbsFP(HInvoke* invoke, ArmVIXLAssembler* assembler) { @@ -465,7 +512,8 @@ void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsLong(HInvoke* invoke) { GenAbsInteger(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); } -static void GenMinMaxFloat(HInvoke* invoke, bool is_min, ArmVIXLAssembler* assembler) { +static void GenMinMaxFloat(HInvoke* invoke, bool is_min, CodeGeneratorARMVIXL* codegen) { + ArmVIXLAssembler* assembler = codegen->GetAssembler(); Location op1_loc = invoke->GetLocations()->InAt(0); Location op2_loc = invoke->GetLocations()->InAt(1); Location out_loc = invoke->GetLocations()->Out(); @@ -483,6 +531,7 @@ static void GenMinMaxFloat(HInvoke* invoke, bool is_min, ArmVIXLAssembler* assem const vixl32::Register temp1 = temps.Acquire(); vixl32::Register temp2 = RegisterFrom(invoke->GetLocations()->GetTemp(0)); vixl32::Label nan, done; + vixl32::Label* final_label = codegen->GetFinalLabel(invoke, &done); DCHECK(op1.Is(out)); @@ -499,7 +548,8 @@ static void GenMinMaxFloat(HInvoke* invoke, bool is_min, ArmVIXLAssembler* assem __ it(cond); __ vmov(cond, F32, out, op2); } - __ B(ne, &done, /* far_target */ false); // for <>(not equal), we've done min/max calculation. + // for <>(not equal), we've done min/max calculation. + __ B(ne, final_label, /* far_target */ false); // handle op1 == op2, max(+0.0,-0.0), min(+0.0,-0.0). __ Vmov(temp1, op1); @@ -510,14 +560,16 @@ static void GenMinMaxFloat(HInvoke* invoke, bool is_min, ArmVIXLAssembler* assem __ And(temp1, temp1, temp2); } __ Vmov(out, temp1); - __ B(&done); + __ B(final_label); // handle NaN input. __ Bind(&nan); __ Movt(temp1, High16Bits(kNanFloat)); // 0x7FC0xxxx is a NaN. __ Vmov(out, temp1); - __ Bind(&done); + if (done.IsReferenced()) { + __ Bind(&done); + } } static void CreateFPFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) { @@ -535,7 +587,7 @@ void IntrinsicLocationsBuilderARMVIXL::VisitMathMinFloatFloat(HInvoke* invoke) { } void IntrinsicCodeGeneratorARMVIXL::VisitMathMinFloatFloat(HInvoke* invoke) { - GenMinMaxFloat(invoke, /* is_min */ true, GetAssembler()); + GenMinMaxFloat(invoke, /* is_min */ true, codegen_); } void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxFloatFloat(HInvoke* invoke) { @@ -544,10 +596,11 @@ void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxFloatFloat(HInvoke* invoke) { } void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxFloatFloat(HInvoke* invoke) { - GenMinMaxFloat(invoke, /* is_min */ false, GetAssembler()); + GenMinMaxFloat(invoke, /* is_min */ false, codegen_); } -static void GenMinMaxDouble(HInvoke* invoke, bool is_min, ArmVIXLAssembler* assembler) { +static void GenMinMaxDouble(HInvoke* invoke, bool is_min, CodeGeneratorARMVIXL* codegen) { + ArmVIXLAssembler* assembler = codegen->GetAssembler(); Location op1_loc = invoke->GetLocations()->InAt(0); Location op2_loc = invoke->GetLocations()->InAt(1); Location out_loc = invoke->GetLocations()->Out(); @@ -562,6 +615,7 @@ static void GenMinMaxDouble(HInvoke* invoke, bool is_min, ArmVIXLAssembler* asse vixl32::DRegister op2 = DRegisterFrom(op2_loc); vixl32::DRegister out = OutputDRegister(invoke); vixl32::Label handle_nan_eq, done; + vixl32::Label* final_label = codegen->GetFinalLabel(invoke, &done); DCHECK(op1.Is(out)); @@ -578,19 +632,22 @@ static void GenMinMaxDouble(HInvoke* invoke, bool is_min, ArmVIXLAssembler* asse __ it(cond); __ vmov(cond, F64, out, op2); } - __ B(ne, &done, /* far_target */ false); // for <>(not equal), we've done min/max calculation. + // for <>(not equal), we've done min/max calculation. + __ B(ne, final_label, /* far_target */ false); // handle op1 == op2, max(+0.0,-0.0). if (!is_min) { __ Vand(F64, out, op1, op2); - __ B(&done); + __ B(final_label); } // handle op1 == op2, min(+0.0,-0.0), NaN input. __ Bind(&handle_nan_eq); __ Vorr(F64, out, op1, op2); // assemble op1/-0.0/NaN. - __ Bind(&done); + if (done.IsReferenced()) { + __ Bind(&done); + } } void IntrinsicLocationsBuilderARMVIXL::VisitMathMinDoubleDouble(HInvoke* invoke) { @@ -598,7 +655,7 @@ void IntrinsicLocationsBuilderARMVIXL::VisitMathMinDoubleDouble(HInvoke* invoke) } void IntrinsicCodeGeneratorARMVIXL::VisitMathMinDoubleDouble(HInvoke* invoke) { - GenMinMaxDouble(invoke, /* is_min */ true , GetAssembler()); + GenMinMaxDouble(invoke, /* is_min */ true , codegen_); } void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxDoubleDouble(HInvoke* invoke) { @@ -606,7 +663,7 @@ void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxDoubleDouble(HInvoke* invoke) } void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxDoubleDouble(HInvoke* invoke) { - GenMinMaxDouble(invoke, /* is_min */ false, GetAssembler()); + GenMinMaxDouble(invoke, /* is_min */ false, codegen_); } static void GenMinMaxLong(HInvoke* invoke, bool is_min, ArmVIXLAssembler* assembler) { @@ -1633,6 +1690,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringEquals(HInvoke* invoke) { vixl32::Label end; vixl32::Label return_true; vixl32::Label return_false; + vixl32::Label* final_label = codegen_->GetFinalLabel(invoke, &end); // Get offsets of count, value, and class fields within a string object. const uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); @@ -1709,12 +1767,15 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringEquals(HInvoke* invoke) { // If loop does not result in returning false, we return true. __ Bind(&return_true); __ Mov(out, 1); - __ B(&end); + __ B(final_label); // Return false and exit the function. __ Bind(&return_false); __ Mov(out, 0); - __ Bind(&end); + + if (end.IsReferenced()) { + __ Bind(&end); + } } static void GenerateVisitStringIndexOf(HInvoke* invoke, @@ -2243,143 +2304,116 @@ void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) { __ CompareAndBranchIfNonZero(temp3, intrinsic_slow_path->GetEntryLabel()); } - int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot); - uint32_t element_size_shift = Primitive::ComponentSizeShift(Primitive::kPrimNot); - uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value(); - - // Compute the base source address in `temp1`. - if (src_pos.IsConstant()) { - int32_t constant = Int32ConstantFrom(src_pos); - __ Add(temp1, src, element_size * constant + offset); + if (length.IsConstant() && Int32ConstantFrom(length) == 0) { + // Null constant length: not need to emit the loop code at all. } else { - __ Add(temp1, src, Operand(RegisterFrom(src_pos), vixl32::LSL, element_size_shift)); - __ Add(temp1, temp1, offset); - } + vixl32::Label done; + const Primitive::Type type = Primitive::kPrimNot; + const int32_t element_size = Primitive::ComponentSize(type); - // Compute the end source address in `temp3`. - if (length.IsConstant()) { - int32_t constant = Int32ConstantFrom(length); - __ Add(temp3, temp1, element_size * constant); - } else { - __ Add(temp3, temp1, Operand(RegisterFrom(length), vixl32::LSL, element_size_shift)); - } - - if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { - // TODO: Also convert this intrinsic to the IsGcMarking strategy? - - // The base destination address is computed later, as `temp2` is - // used for intermediate computations. - - // SystemArrayCopy implementation for Baker read barriers (see - // also CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier): - // - // if (src_ptr != end_ptr) { - // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState(); - // lfence; // Load fence or artificial data dependency to prevent load-load reordering - // bool is_gray = (rb_state == ReadBarrier::GrayState()); - // if (is_gray) { - // // Slow-path copy. - // do { - // *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++))); - // } while (src_ptr != end_ptr) - // } else { - // // Fast-path copy. - // do { - // *dest_ptr++ = *src_ptr++; - // } while (src_ptr != end_ptr) - // } - // } - - vixl32::Label loop, done; - - // Don't enter copy loop if `length == 0`. - __ Cmp(temp1, temp3); - __ B(eq, &done, /* far_target */ false); - - // /* int32_t */ monitor = src->monitor_ - __ Ldr(temp2, MemOperand(src, monitor_offset)); - // /* LockWord */ lock_word = LockWord(monitor) - static_assert(sizeof(LockWord) == sizeof(int32_t), - "art::LockWord and int32_t have different sizes."); - - // Introduce a dependency on the lock_word including the rb_state, - // which shall prevent load-load reordering without using - // a memory barrier (which would be more expensive). - // `src` is unchanged by this operation, but its value now depends - // on `temp2`. - __ Add(src, src, Operand(temp2, vixl32::LSR, 32)); - - // Slow path used to copy array when `src` is gray. - SlowPathCodeARMVIXL* read_barrier_slow_path = - new (GetAllocator()) ReadBarrierSystemArrayCopySlowPathARMVIXL(invoke); - codegen_->AddSlowPath(read_barrier_slow_path); - - // Given the numeric representation, it's enough to check the low bit of the - // rb_state. We do that by shifting the bit out of the lock word with LSRS - // which can be a 16-bit instruction unlike the TST immediate. - static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0"); - static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); - __ Lsrs(temp2, temp2, LockWord::kReadBarrierStateShift + 1); - // Carry flag is the last bit shifted out by LSRS. - __ B(cs, read_barrier_slow_path->GetEntryLabel()); - - // Fast-path copy. - - // Compute the base destination address in `temp2`. - if (dest_pos.IsConstant()) { - int32_t constant = Int32ConstantFrom(dest_pos); - __ Add(temp2, dest, element_size * constant + offset); - } else { - __ Add(temp2, dest, Operand(RegisterFrom(dest_pos), vixl32::LSL, element_size_shift)); - __ Add(temp2, temp2, offset); - } - - // Iterate over the arrays and do a raw copy of the objects. We don't need to - // poison/unpoison. - __ Bind(&loop); - - { - UseScratchRegisterScope temps(assembler->GetVIXLAssembler()); - const vixl32::Register temp_reg = temps.Acquire(); - - __ Ldr(temp_reg, MemOperand(temp1, element_size, PostIndex)); - __ Str(temp_reg, MemOperand(temp2, element_size, PostIndex)); + if (length.IsRegister()) { + // Don't enter the copy loop if the length is null. + __ CompareAndBranchIfZero(RegisterFrom(length), &done, /* is_far_target */ false); } - __ Cmp(temp1, temp3); - __ B(ne, &loop, /* far_target */ false); - - __ Bind(read_barrier_slow_path->GetExitLabel()); - __ Bind(&done); - } else { - // Non read barrier code. + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // TODO: Also convert this intrinsic to the IsGcMarking strategy? + + // SystemArrayCopy implementation for Baker read barriers (see + // also CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier): + // + // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState(); + // lfence; // Load fence or artificial data dependency to prevent load-load reordering + // bool is_gray = (rb_state == ReadBarrier::GrayState()); + // if (is_gray) { + // // Slow-path copy. + // do { + // *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++))); + // } while (src_ptr != end_ptr) + // } else { + // // Fast-path copy. + // do { + // *dest_ptr++ = *src_ptr++; + // } while (src_ptr != end_ptr) + // } + + // /* int32_t */ monitor = src->monitor_ + __ Ldr(temp2, MemOperand(src, monitor_offset)); + // /* LockWord */ lock_word = LockWord(monitor) + static_assert(sizeof(LockWord) == sizeof(int32_t), + "art::LockWord and int32_t have different sizes."); + + // Introduce a dependency on the lock_word including the rb_state, + // which shall prevent load-load reordering without using + // a memory barrier (which would be more expensive). + // `src` is unchanged by this operation, but its value now depends + // on `temp2`. + __ Add(src, src, Operand(temp2, vixl32::LSR, 32)); + + // Compute the base source address in `temp1`. + // Note that `temp1` (the base source address) is computed from + // `src` (and `src_pos`) here, and thus honors the artificial + // dependency of `src` on `temp2`. + GenSystemArrayCopyBaseAddress(GetAssembler(), type, src, src_pos, temp1); + // Compute the end source address in `temp3`. + GenSystemArrayCopyEndAddress(GetAssembler(), type, length, temp1, temp3); + // The base destination address is computed later, as `temp2` is + // used for intermediate computations. + + // Slow path used to copy array when `src` is gray. + // Note that the base destination address is computed in `temp2` + // by the slow path code. + SlowPathCodeARMVIXL* read_barrier_slow_path = + new (GetAllocator()) ReadBarrierSystemArrayCopySlowPathARMVIXL(invoke); + codegen_->AddSlowPath(read_barrier_slow_path); + + // Given the numeric representation, it's enough to check the low bit of the + // rb_state. We do that by shifting the bit out of the lock word with LSRS + // which can be a 16-bit instruction unlike the TST immediate. + static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0"); + static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); + __ Lsrs(temp2, temp2, LockWord::kReadBarrierStateShift + 1); + // Carry flag is the last bit shifted out by LSRS. + __ B(cs, read_barrier_slow_path->GetEntryLabel()); + + // Fast-path copy. + // Compute the base destination address in `temp2`. + GenSystemArrayCopyBaseAddress(GetAssembler(), type, dest, dest_pos, temp2); + // Iterate over the arrays and do a raw copy of the objects. We don't need to + // poison/unpoison. + vixl32::Label loop; + __ Bind(&loop); + { + UseScratchRegisterScope temps(assembler->GetVIXLAssembler()); + const vixl32::Register temp_reg = temps.Acquire(); + __ Ldr(temp_reg, MemOperand(temp1, element_size, PostIndex)); + __ Str(temp_reg, MemOperand(temp2, element_size, PostIndex)); + } + __ Cmp(temp1, temp3); + __ B(ne, &loop, /* far_target */ false); - // Compute the base destination address in `temp2`. - if (dest_pos.IsConstant()) { - int32_t constant = Int32ConstantFrom(dest_pos); - __ Add(temp2, dest, element_size * constant + offset); + __ Bind(read_barrier_slow_path->GetExitLabel()); } else { - __ Add(temp2, dest, Operand(RegisterFrom(dest_pos), vixl32::LSL, element_size_shift)); - __ Add(temp2, temp2, offset); - } - - // Iterate over the arrays and do a raw copy of the objects. We don't need to - // poison/unpoison. - vixl32::Label loop, done; - __ Cmp(temp1, temp3); - __ B(eq, &done, /* far_target */ false); - __ Bind(&loop); - - { - UseScratchRegisterScope temps(assembler->GetVIXLAssembler()); - const vixl32::Register temp_reg = temps.Acquire(); - - __ Ldr(temp_reg, MemOperand(temp1, element_size, PostIndex)); - __ Str(temp_reg, MemOperand(temp2, element_size, PostIndex)); + // Non read barrier code. + // Compute the base source address in `temp1`. + GenSystemArrayCopyBaseAddress(GetAssembler(), type, src, src_pos, temp1); + // Compute the base destination address in `temp2`. + GenSystemArrayCopyBaseAddress(GetAssembler(), type, dest, dest_pos, temp2); + // Compute the end source address in `temp3`. + GenSystemArrayCopyEndAddress(GetAssembler(), type, length, temp1, temp3); + // Iterate over the arrays and do a raw copy of the objects. We don't need to + // poison/unpoison. + vixl32::Label loop; + __ Bind(&loop); + { + UseScratchRegisterScope temps(assembler->GetVIXLAssembler()); + const vixl32::Register temp_reg = temps.Acquire(); + __ Ldr(temp_reg, MemOperand(temp1, element_size, PostIndex)); + __ Str(temp_reg, MemOperand(temp2, element_size, PostIndex)); + } + __ Cmp(temp1, temp3); + __ B(ne, &loop, /* far_target */ false); } - - __ Cmp(temp1, temp3); - __ B(ne, &loop, /* far_target */ false); __ Bind(&done); } @@ -2779,13 +2813,14 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke) vixl32::Register dst_ptr = RegisterFrom(locations->GetTemp(2)); vixl32::Label done, compressed_string_loop; + vixl32::Label* final_label = codegen_->GetFinalLabel(invoke, &done); // dst to be copied. __ Add(dst_ptr, dstObj, data_offset); __ Add(dst_ptr, dst_ptr, Operand(dstBegin, vixl32::LSL, 1)); __ Subs(num_chr, srcEnd, srcBegin); // Early out for valid zero-length retrievals. - __ B(eq, &done, /* far_target */ false); + __ B(eq, final_label, /* far_target */ false); // src range to copy. __ Add(src_ptr, srcObj, value_offset); @@ -2829,7 +2864,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke) __ B(ge, &loop, /* far_target */ false); __ Adds(num_chr, num_chr, 4); - __ B(eq, &done, /* far_target */ false); + __ B(eq, final_label, /* far_target */ false); // Main loop for < 4 character case and remainder handling. Loads and stores one // 16-bit Java character at a time. @@ -2842,7 +2877,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke) __ B(gt, &remainder, /* far_target */ false); if (mirror::kUseStringCompression) { - __ B(&done); + __ B(final_label); const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte); DCHECK_EQ(c_char_size, 1u); @@ -2858,7 +2893,9 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke) __ B(gt, &compressed_string_loop, /* far_target */ false); } - __ Bind(&done); + if (done.IsReferenced()) { + __ Bind(&done); + } } void IntrinsicLocationsBuilderARMVIXL::VisitFloatIsInfinite(HInvoke* invoke) { diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc index ba006edfa2..bf85b1989e 100644 --- a/compiler/optimizing/intrinsics_mips.cc +++ b/compiler/optimizing/intrinsics_mips.cc @@ -2559,7 +2559,7 @@ void IntrinsicCodeGeneratorMIPS::VisitMathRoundFloat(HInvoke* invoke) { // void java.lang.String.getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin) void IntrinsicLocationsBuilderMIPS::VisitStringGetCharsNoCheck(HInvoke* invoke) { LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCallOnMainOnly, + LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); @@ -2567,17 +2567,9 @@ void IntrinsicLocationsBuilderMIPS::VisitStringGetCharsNoCheck(HInvoke* invoke) locations->SetInAt(3, Location::RequiresRegister()); locations->SetInAt(4, Location::RequiresRegister()); - // We will call memcpy() to do the actual work. Allocate the temporary - // registers to use the correct input registers, and output register. - // memcpy() uses the normal MIPS calling convention. - InvokeRuntimeCallingConvention calling_convention; - - locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(1))); - locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(2))); - - Location outLocation = calling_convention.GetReturnLocation(Primitive::kPrimInt); - locations->AddTemp(Location::RegisterLocation(outLocation.AsRegister<Register>())); + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); } void IntrinsicCodeGeneratorMIPS::VisitStringGetCharsNoCheck(HInvoke* invoke) { @@ -2596,16 +2588,11 @@ void IntrinsicCodeGeneratorMIPS::VisitStringGetCharsNoCheck(HInvoke* invoke) { Register dstBegin = locations->InAt(4).AsRegister<Register>(); Register dstPtr = locations->GetTemp(0).AsRegister<Register>(); - DCHECK_EQ(dstPtr, A0); Register srcPtr = locations->GetTemp(1).AsRegister<Register>(); - DCHECK_EQ(srcPtr, A1); Register numChrs = locations->GetTemp(2).AsRegister<Register>(); - DCHECK_EQ(numChrs, A2); - - Register dstReturn = locations->GetTemp(3).AsRegister<Register>(); - DCHECK_EQ(dstReturn, V0); MipsLabel done; + MipsLabel loop; // Location of data in char array buffer. const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value(); @@ -2634,7 +2621,7 @@ void IntrinsicCodeGeneratorMIPS::VisitStringGetCharsNoCheck(HInvoke* invoke) { __ LoadFromOffset(kLoadWord, TMP, srcObj, count_offset); __ Sll(TMP, TMP, 31); - // If string is uncompressed, use memcpy() path. + // If string is uncompressed, use uncompressed path. __ Bnez(TMP, &uncompressed_copy); // Copy loop for compressed src, copying 1 character (8-bit) to (16-bit) at a time. @@ -2660,10 +2647,13 @@ void IntrinsicCodeGeneratorMIPS::VisitStringGetCharsNoCheck(HInvoke* invoke) { __ Addu(srcPtr, srcPtr, AT); } - // Calculate number of bytes to copy from number of characters. - __ Sll(numChrs, numChrs, char_shift); - - codegen_->InvokeRuntime(kQuickMemcpy, invoke, invoke->GetDexPc(), nullptr); + __ Bind(&loop); + __ Lh(AT, srcPtr, 0); + __ Addiu(numChrs, numChrs, -1); + __ Addiu(srcPtr, srcPtr, char_size); + __ Sh(AT, dstPtr, 0); + __ Addiu(dstPtr, dstPtr, char_size); + __ Bnez(numChrs, &loop); __ Bind(&done); } diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc index 21c5074a1c..1ee89cf127 100644 --- a/compiler/optimizing/intrinsics_mips64.cc +++ b/compiler/optimizing/intrinsics_mips64.cc @@ -1895,7 +1895,7 @@ void IntrinsicCodeGeneratorMIPS64::VisitDoubleIsInfinite(HInvoke* invoke) { // void java.lang.String.getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin) void IntrinsicLocationsBuilderMIPS64::VisitStringGetCharsNoCheck(HInvoke* invoke) { LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCallOnMainOnly, + LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); @@ -1903,17 +1903,9 @@ void IntrinsicLocationsBuilderMIPS64::VisitStringGetCharsNoCheck(HInvoke* invoke locations->SetInAt(3, Location::RequiresRegister()); locations->SetInAt(4, Location::RequiresRegister()); - // We will call memcpy() to do the actual work. Allocate the temporary - // registers to use the correct input registers, and output register. - // memcpy() uses the normal MIPS calling conventions. - InvokeRuntimeCallingConvention calling_convention; - - locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(1))); - locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(2))); - - Location outLocation = calling_convention.GetReturnLocation(Primitive::kPrimLong); - locations->AddTemp(Location::RegisterLocation(outLocation.AsRegister<GpuRegister>())); + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); } void IntrinsicCodeGeneratorMIPS64::VisitStringGetCharsNoCheck(HInvoke* invoke) { @@ -1932,16 +1924,11 @@ void IntrinsicCodeGeneratorMIPS64::VisitStringGetCharsNoCheck(HInvoke* invoke) { GpuRegister dstBegin = locations->InAt(4).AsRegister<GpuRegister>(); GpuRegister dstPtr = locations->GetTemp(0).AsRegister<GpuRegister>(); - DCHECK_EQ(dstPtr, A0); GpuRegister srcPtr = locations->GetTemp(1).AsRegister<GpuRegister>(); - DCHECK_EQ(srcPtr, A1); GpuRegister numChrs = locations->GetTemp(2).AsRegister<GpuRegister>(); - DCHECK_EQ(numChrs, A2); - - GpuRegister dstReturn = locations->GetTemp(3).AsRegister<GpuRegister>(); - DCHECK_EQ(dstReturn, V0); Mips64Label done; + Mips64Label loop; // Location of data in char array buffer. const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value(); @@ -1965,7 +1952,7 @@ void IntrinsicCodeGeneratorMIPS64::VisitStringGetCharsNoCheck(HInvoke* invoke) { __ LoadFromOffset(kLoadWord, TMP, srcObj, count_offset); __ Dext(TMP, TMP, 0, 1); - // If string is uncompressed, use memcpy() path. + // If string is uncompressed, use uncompressed path. __ Bnezc(TMP, &uncompressed_copy); // Copy loop for compressed src, copying 1 character (8-bit) to (16-bit) at a time. @@ -1986,10 +1973,13 @@ void IntrinsicCodeGeneratorMIPS64::VisitStringGetCharsNoCheck(HInvoke* invoke) { __ Daddiu(srcPtr, srcObj, value_offset); __ Dlsa(srcPtr, srcBegin, srcPtr, char_shift); - // Calculate number of bytes to copy from number of characters. - __ Dsll(numChrs, numChrs, char_shift); - - codegen_->InvokeRuntime(kQuickMemcpy, invoke, invoke->GetDexPc(), nullptr); + __ Bind(&loop); + __ Lh(AT, srcPtr, 0); + __ Daddiu(numChrs, numChrs, -1); + __ Daddiu(srcPtr, srcPtr, char_size); + __ Sh(AT, dstPtr, 0); + __ Daddiu(dstPtr, dstPtr, char_size); + __ Bnezc(numChrs, &loop); __ Bind(&done); } diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc index a671788ff5..ecf919bceb 100644 --- a/compiler/optimizing/intrinsics_x86.cc +++ b/compiler/optimizing/intrinsics_x86.cc @@ -2878,6 +2878,49 @@ static bool IsSameInput(HInstruction* instruction, size_t input0, size_t input1) return instruction->InputAt(input0) == instruction->InputAt(input1); } +// Compute base address for the System.arraycopy intrinsic in `base`. +static void GenSystemArrayCopyBaseAddress(X86Assembler* assembler, + Primitive::Type type, + const Register& array, + const Location& pos, + const Register& base) { + // This routine is only used by the SystemArrayCopy intrinsic at the + // moment. We can allow Primitive::kPrimNot as `type` to implement + // the SystemArrayCopyChar intrinsic. + DCHECK_EQ(type, Primitive::kPrimNot); + const int32_t element_size = Primitive::ComponentSize(type); + const ScaleFactor scale_factor = static_cast<ScaleFactor>(Primitive::ComponentSizeShift(type)); + const uint32_t data_offset = mirror::Array::DataOffset(element_size).Uint32Value(); + + if (pos.IsConstant()) { + int32_t constant = pos.GetConstant()->AsIntConstant()->GetValue(); + __ leal(base, Address(array, element_size * constant + data_offset)); + } else { + __ leal(base, Address(array, pos.AsRegister<Register>(), scale_factor, data_offset)); + } +} + +// Compute end source address for the System.arraycopy intrinsic in `end`. +static void GenSystemArrayCopyEndAddress(X86Assembler* assembler, + Primitive::Type type, + const Location& copy_length, + const Register& base, + const Register& end) { + // This routine is only used by the SystemArrayCopy intrinsic at the + // moment. We can allow Primitive::kPrimNot as `type` to implement + // the SystemArrayCopyChar intrinsic. + DCHECK_EQ(type, Primitive::kPrimNot); + const int32_t element_size = Primitive::ComponentSize(type); + const ScaleFactor scale_factor = static_cast<ScaleFactor>(Primitive::ComponentSizeShift(type)); + + if (copy_length.IsConstant()) { + int32_t constant = copy_length.GetConstant()->AsIntConstant()->GetValue(); + __ leal(end, Address(base, element_size * constant)); + } else { + __ leal(end, Address(base, copy_length.AsRegister<Register>(), scale_factor, 0)); + } +} + void IntrinsicLocationsBuilderX86::VisitSystemArrayCopy(HInvoke* invoke) { // The only read barrier implementation supporting the // SystemArrayCopy intrinsic is the Baker-style read barriers. @@ -3182,16 +3225,11 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel()); } + const Primitive::Type type = Primitive::kPrimNot; + const int32_t element_size = Primitive::ComponentSize(type); + // Compute the base source address in `temp1`. - int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot); - DCHECK_EQ(element_size, 4); - uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value(); - if (src_pos.IsConstant()) { - int32_t constant = src_pos.GetConstant()->AsIntConstant()->GetValue(); - __ leal(temp1, Address(src, element_size * constant + offset)); - } else { - __ leal(temp1, Address(src, src_pos.AsRegister<Register>(), ScaleFactor::TIMES_4, offset)); - } + GenSystemArrayCopyBaseAddress(GetAssembler(), type, src, src_pos, temp1); if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { // If it is needed (in the case of the fast-path loop), the base @@ -3199,20 +3237,15 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { // intermediate computations. // Compute the end source address in `temp3`. - if (length.IsConstant()) { - int32_t constant = length.GetConstant()->AsIntConstant()->GetValue(); - __ leal(temp3, Address(temp1, element_size * constant)); - } else { - if (length.IsStackSlot()) { - // Location `length` is again pointing at a stack slot, as - // register `temp3` (which was containing the length parameter - // earlier) has been overwritten; restore it now - DCHECK(length.Equals(length_arg)); - __ movl(temp3, Address(ESP, length.GetStackIndex())); - length = Location::RegisterLocation(temp3); - } - __ leal(temp3, Address(temp1, length.AsRegister<Register>(), ScaleFactor::TIMES_4, 0)); + if (length.IsStackSlot()) { + // Location `length` is again pointing at a stack slot, as + // register `temp3` (which was containing the length parameter + // earlier) has been overwritten; restore it now + DCHECK(length.Equals(length_arg)); + __ movl(temp3, Address(ESP, length.GetStackIndex())); + length = Location::RegisterLocation(temp3); } + GenSystemArrayCopyEndAddress(GetAssembler(), type, length, temp1, temp3); // SystemArrayCopy implementation for Baker read barriers (see // also CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier): @@ -3266,15 +3299,8 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { __ j(kNotZero, read_barrier_slow_path->GetEntryLabel()); // Fast-path copy. - - // Set the base destination address in `temp2`. - if (dest_pos.IsConstant()) { - int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue(); - __ leal(temp2, Address(dest, element_size * constant + offset)); - } else { - __ leal(temp2, Address(dest, dest_pos.AsRegister<Register>(), ScaleFactor::TIMES_4, offset)); - } - + // Compute the base destination address in `temp2`. + GenSystemArrayCopyBaseAddress(GetAssembler(), type, dest, dest_pos, temp2); // Iterate over the arrays and do a raw copy of the objects. We don't need to // poison/unpoison. __ Bind(&loop); @@ -3291,23 +3317,10 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { __ Bind(&done); } else { // Non read barrier code. - // Compute the base destination address in `temp2`. - if (dest_pos.IsConstant()) { - int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue(); - __ leal(temp2, Address(dest, element_size * constant + offset)); - } else { - __ leal(temp2, Address(dest, dest_pos.AsRegister<Register>(), ScaleFactor::TIMES_4, offset)); - } - + GenSystemArrayCopyBaseAddress(GetAssembler(), type, dest, dest_pos, temp2); // Compute the end source address in `temp3`. - if (length.IsConstant()) { - int32_t constant = length.GetConstant()->AsIntConstant()->GetValue(); - __ leal(temp3, Address(temp1, element_size * constant)); - } else { - __ leal(temp3, Address(temp1, length.AsRegister<Register>(), ScaleFactor::TIMES_4, 0)); - } - + GenSystemArrayCopyEndAddress(GetAssembler(), type, length, temp1, temp3); // Iterate over the arrays and do a raw copy of the objects. We don't need to // poison/unpoison. NearLabel loop, done; @@ -3326,11 +3339,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { } // We only need one card marking on the destination array. - codegen_->MarkGCCard(temp1, - temp2, - dest, - Register(kNoRegister), - /* value_can_be_null */ false); + codegen_->MarkGCCard(temp1, temp2, dest, Register(kNoRegister), /* value_can_be_null */ false); __ Bind(intrinsic_slow_path->GetExitLabel()); } diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc index 1e17c6ebc5..13956dfb8e 100644 --- a/compiler/optimizing/intrinsics_x86_64.cc +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -1118,6 +1118,47 @@ void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopy(HInvoke* invoke) { CodeGenerator::CreateSystemArrayCopyLocationSummary(invoke); } +// Compute base source address, base destination address, and end +// source address for the System.arraycopy intrinsic in `src_base`, +// `dst_base` and `src_end` respectively. +static void GenSystemArrayCopyAddresses(X86_64Assembler* assembler, + Primitive::Type type, + const CpuRegister& src, + const Location& src_pos, + const CpuRegister& dst, + const Location& dst_pos, + const Location& copy_length, + const CpuRegister& src_base, + const CpuRegister& dst_base, + const CpuRegister& src_end) { + // This routine is only used by the SystemArrayCopy intrinsic. + DCHECK_EQ(type, Primitive::kPrimNot); + const int32_t element_size = Primitive::ComponentSize(type); + const ScaleFactor scale_factor = static_cast<ScaleFactor>(Primitive::ComponentSizeShift(type)); + const uint32_t data_offset = mirror::Array::DataOffset(element_size).Uint32Value(); + + if (src_pos.IsConstant()) { + int32_t constant = src_pos.GetConstant()->AsIntConstant()->GetValue(); + __ leal(src_base, Address(src, element_size * constant + data_offset)); + } else { + __ leal(src_base, Address(src, src_pos.AsRegister<CpuRegister>(), scale_factor, data_offset)); + } + + if (dst_pos.IsConstant()) { + int32_t constant = dst_pos.GetConstant()->AsIntConstant()->GetValue(); + __ leal(dst_base, Address(dst, element_size * constant + data_offset)); + } else { + __ leal(dst_base, Address(dst, dst_pos.AsRegister<CpuRegister>(), scale_factor, data_offset)); + } + + if (copy_length.IsConstant()) { + int32_t constant = copy_length.GetConstant()->AsIntConstant()->GetValue(); + __ leal(src_end, Address(src_base, element_size * constant)); + } else { + __ leal(src_end, Address(src_base, copy_length.AsRegister<CpuRegister>(), scale_factor, 0)); + } +} + void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { // The only read barrier implementation supporting the // SystemArrayCopy intrinsic is the Baker-style read barriers. @@ -1366,30 +1407,13 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel()); } - // Compute base source address, base destination address, and end source address. + const Primitive::Type type = Primitive::kPrimNot; + const int32_t element_size = Primitive::ComponentSize(type); - int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot); - uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value(); - if (src_pos.IsConstant()) { - int32_t constant = src_pos.GetConstant()->AsIntConstant()->GetValue(); - __ leal(temp1, Address(src, element_size * constant + offset)); - } else { - __ leal(temp1, Address(src, src_pos.AsRegister<CpuRegister>(), ScaleFactor::TIMES_4, offset)); - } - - if (dest_pos.IsConstant()) { - int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue(); - __ leal(temp2, Address(dest, element_size * constant + offset)); - } else { - __ leal(temp2, Address(dest, dest_pos.AsRegister<CpuRegister>(), ScaleFactor::TIMES_4, offset)); - } - - if (length.IsConstant()) { - int32_t constant = length.GetConstant()->AsIntConstant()->GetValue(); - __ leal(temp3, Address(temp1, element_size * constant)); - } else { - __ leal(temp3, Address(temp1, length.AsRegister<CpuRegister>(), ScaleFactor::TIMES_4, 0)); - } + // Compute base source address, base destination address, and end + // source address in `temp1`, `temp2` and `temp3` respectively. + GenSystemArrayCopyAddresses( + GetAssembler(), type, src, src_pos, dest, dest_pos, length, temp1, temp2, temp3); if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { // SystemArrayCopy implementation for Baker read barriers (see @@ -1474,11 +1498,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { } // We only need one card marking on the destination array. - codegen_->MarkGCCard(temp1, - temp2, - dest, - CpuRegister(kNoRegister), - /* value_can_be_null */ false); + codegen_->MarkGCCard(temp1, temp2, dest, CpuRegister(kNoRegister), /* value_can_be_null */ false); __ Bind(intrinsic_slow_path->GetExitLabel()); } diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h index 091b58a63d..d391f6913c 100644 --- a/compiler/optimizing/locations.h +++ b/compiler/optimizing/locations.h @@ -417,6 +417,7 @@ std::ostream& operator<<(std::ostream& os, const Location::Policy& rhs); class RegisterSet : public ValueObject { public: static RegisterSet Empty() { return RegisterSet(); } + static RegisterSet AllFpu() { return RegisterSet(0, -1); } void Add(Location loc) { if (loc.IsRegister()) { @@ -462,6 +463,7 @@ class RegisterSet : public ValueObject { private: RegisterSet() : core_registers_(0), floating_point_registers_(0) {} + RegisterSet(uint32_t core, uint32_t fp) : core_registers_(core), floating_point_registers_(fp) {} uint32_t core_registers_; uint32_t floating_point_registers_; diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index 020e4463d4..ec706e6694 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -2046,6 +2046,9 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { if (HasTryCatch()) { outer_graph->SetHasTryCatch(true); } + if (HasSIMD()) { + outer_graph->SetHasSIMD(true); + } HInstruction* return_value = nullptr; if (GetBlocks().size() == 3) { diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 542b218cf8..6881d8f6ae 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -323,6 +323,7 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { temporaries_vreg_slots_(0), has_bounds_checks_(false), has_try_catch_(false), + has_simd_(false), has_loops_(false), has_irreducible_loops_(false), debuggable_(debuggable), @@ -560,6 +561,9 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { bool HasTryCatch() const { return has_try_catch_; } void SetHasTryCatch(bool value) { has_try_catch_ = value; } + bool HasSIMD() const { return has_simd_; } + void SetHasSIMD(bool value) { has_simd_ = value; } + bool HasLoops() const { return has_loops_; } void SetHasLoops(bool value) { has_loops_ = value; } @@ -652,6 +656,11 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { // false positives. bool has_try_catch_; + // Flag whether SIMD instructions appear in the graph. If true, the + // code generators may have to be more careful spilling the wider + // contents of SIMD registers. + bool has_simd_; + // Flag whether there are any loops in the graph. We can skip loop // optimization if it's false. It's only best effort to keep it up // to date in the presence of code elimination so there might be false diff --git a/compiler/optimizing/register_allocation_resolver.cc b/compiler/optimizing/register_allocation_resolver.cc index 8a9c1ccaff..0d33b49fdb 100644 --- a/compiler/optimizing/register_allocation_resolver.cc +++ b/compiler/optimizing/register_allocation_resolver.cc @@ -299,11 +299,13 @@ void RegisterAllocationResolver::ConnectSiblings(LiveInterval* interval) { // Currently, we spill unconditionnally the current method in the code generators. && !interval->GetDefinedBy()->IsCurrentMethod()) { // We spill eagerly, so move must be at definition. - InsertMoveAfter(interval->GetDefinedBy(), - interval->ToLocation(), - interval->NeedsTwoSpillSlots() - ? Location::DoubleStackSlot(interval->GetParent()->GetSpillSlot()) - : Location::StackSlot(interval->GetParent()->GetSpillSlot())); + Location loc; + switch (interval->NumberOfSpillSlotsNeeded()) { + case 1: loc = Location::StackSlot(interval->GetParent()->GetSpillSlot()); break; + case 2: loc = Location::DoubleStackSlot(interval->GetParent()->GetSpillSlot()); break; + default: LOG(FATAL) << "Unexpected number of spill slots"; UNREACHABLE(); + } + InsertMoveAfter(interval->GetDefinedBy(), interval->ToLocation(), loc); } UsePosition* use = current->GetFirstUse(); EnvUsePosition* env_use = current->GetFirstEnvironmentUse(); @@ -459,9 +461,11 @@ void RegisterAllocationResolver::ConnectSplitSiblings(LiveInterval* interval, location_source = defined_by->GetLocations()->Out(); } else { DCHECK(defined_by->IsCurrentMethod()); - location_source = parent->NeedsTwoSpillSlots() - ? Location::DoubleStackSlot(parent->GetSpillSlot()) - : Location::StackSlot(parent->GetSpillSlot()); + switch (parent->NumberOfSpillSlotsNeeded()) { + case 1: location_source = Location::StackSlot(parent->GetSpillSlot()); break; + case 2: location_source = Location::DoubleStackSlot(parent->GetSpillSlot()); break; + default: LOG(FATAL) << "Unexpected number of spill slots"; UNREACHABLE(); + } } } else { DCHECK(source != nullptr); diff --git a/compiler/optimizing/register_allocator_graph_color.cc b/compiler/optimizing/register_allocator_graph_color.cc index 9064f865c3..87f709f63d 100644 --- a/compiler/optimizing/register_allocator_graph_color.cc +++ b/compiler/optimizing/register_allocator_graph_color.cc @@ -1029,7 +1029,7 @@ void RegisterAllocatorGraphColor::AllocateSpillSlotForCatchPhi(HInstruction* ins interval->SetSpillSlot(previous_phi->GetLiveInterval()->GetSpillSlot()); } else { interval->SetSpillSlot(catch_phi_spill_slot_counter_); - catch_phi_spill_slot_counter_ += interval->NeedsTwoSpillSlots() ? 2 : 1; + catch_phi_spill_slot_counter_ += interval->NumberOfSpillSlotsNeeded(); } } } @@ -1996,43 +1996,48 @@ void RegisterAllocatorGraphColor::ColorSpillSlots(ArenaVector<LiveInterval*>* in bool is_interval_beginning; size_t position; std::tie(position, is_interval_beginning, parent_interval) = *it; - - bool needs_two_slots = parent_interval->NeedsTwoSpillSlots(); + size_t number_of_spill_slots_needed = parent_interval->NumberOfSpillSlotsNeeded(); if (is_interval_beginning) { DCHECK(!parent_interval->HasSpillSlot()); DCHECK_EQ(position, parent_interval->GetStart()); - // Find a free stack slot. + // Find first available free stack slot(s). size_t slot = 0; - for (; taken.IsBitSet(slot) || (needs_two_slots && taken.IsBitSet(slot + 1)); ++slot) { - // Skip taken slots. + for (; ; ++slot) { + bool found = true; + for (size_t s = slot, u = slot + number_of_spill_slots_needed; s < u; s++) { + if (taken.IsBitSet(s)) { + found = false; + break; // failure + } + } + if (found) { + break; // success + } } + parent_interval->SetSpillSlot(slot); - *num_stack_slots_used = std::max(*num_stack_slots_used, - needs_two_slots ? slot + 1 : slot + 2); - if (needs_two_slots && *num_stack_slots_used % 2 != 0) { + *num_stack_slots_used = std::max(*num_stack_slots_used, slot + number_of_spill_slots_needed); + if (number_of_spill_slots_needed > 1 && *num_stack_slots_used % 2 != 0) { // The parallel move resolver requires that there be an even number of spill slots // allocated for pair value types. ++(*num_stack_slots_used); } - taken.SetBit(slot); - if (needs_two_slots) { - taken.SetBit(slot + 1); + for (size_t s = slot, u = slot + number_of_spill_slots_needed; s < u; s++) { + taken.SetBit(s); } } else { DCHECK_EQ(position, parent_interval->GetLastSibling()->GetEnd()); DCHECK(parent_interval->HasSpillSlot()); - // Free up the stack slot used by this interval. + // Free up the stack slot(s) used by this interval. size_t slot = parent_interval->GetSpillSlot(); - DCHECK(taken.IsBitSet(slot)); - DCHECK(!needs_two_slots || taken.IsBitSet(slot + 1)); - taken.ClearBit(slot); - if (needs_two_slots) { - taken.ClearBit(slot + 1); + for (size_t s = slot, u = slot + number_of_spill_slots_needed; s < u; s++) { + DCHECK(taken.IsBitSet(s)); + taken.ClearBit(s); } } } diff --git a/compiler/optimizing/register_allocator_linear_scan.cc b/compiler/optimizing/register_allocator_linear_scan.cc index 6354e76ec8..ab8d540359 100644 --- a/compiler/optimizing/register_allocator_linear_scan.cc +++ b/compiler/optimizing/register_allocator_linear_scan.cc @@ -1125,36 +1125,31 @@ void RegisterAllocatorLinearScan::AllocateSpillSlotFor(LiveInterval* interval) { LOG(FATAL) << "Unexpected type for interval " << interval->GetType(); } - // Find an available spill slot. + // Find first available spill slots. + size_t number_of_spill_slots_needed = parent->NumberOfSpillSlotsNeeded(); size_t slot = 0; for (size_t e = spill_slots->size(); slot < e; ++slot) { - if ((*spill_slots)[slot] <= parent->GetStart()) { - if (!parent->NeedsTwoSpillSlots()) { - // One spill slot is sufficient. - break; - } - if (slot == e - 1 || (*spill_slots)[slot + 1] <= parent->GetStart()) { - // Two spill slots are available. + bool found = true; + for (size_t s = slot, u = std::min(slot + number_of_spill_slots_needed, e); s < u; s++) { + if ((*spill_slots)[s] > parent->GetStart()) { + found = false; // failure break; } } + if (found) { + break; // success + } } + // Need new spill slots? + size_t upper = slot + number_of_spill_slots_needed; + if (upper > spill_slots->size()) { + spill_slots->resize(upper); + } + // Set slots to end. size_t end = interval->GetLastSibling()->GetEnd(); - if (parent->NeedsTwoSpillSlots()) { - if (slot + 2u > spill_slots->size()) { - // We need a new spill slot. - spill_slots->resize(slot + 2u, end); - } - (*spill_slots)[slot] = end; - (*spill_slots)[slot + 1] = end; - } else { - if (slot == spill_slots->size()) { - // We need a new spill slot. - spill_slots->push_back(end); - } else { - (*spill_slots)[slot] = end; - } + for (size_t s = slot; s < upper; s++) { + (*spill_slots)[s] = end; } // Note that the exact spill slot location will be computed when we resolve, @@ -1180,7 +1175,7 @@ void RegisterAllocatorLinearScan::AllocateSpillSlotForCatchPhi(HPhi* phi) { // TODO: Reuse spill slots when intervals of phis from different catch // blocks do not overlap. interval->SetSpillSlot(catch_phi_spill_slots_); - catch_phi_spill_slots_ += interval->NeedsTwoSpillSlots() ? 2 : 1; + catch_phi_spill_slots_ += interval->NumberOfSpillSlotsNeeded(); } } diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc index e8e12e1a55..c0a045c33e 100644 --- a/compiler/optimizing/ssa_liveness_analysis.cc +++ b/compiler/optimizing/ssa_liveness_analysis.cc @@ -469,8 +469,8 @@ bool LiveInterval::SameRegisterKind(Location other) const { } } -bool LiveInterval::NeedsTwoSpillSlots() const { - return type_ == Primitive::kPrimLong || type_ == Primitive::kPrimDouble; +size_t LiveInterval::NumberOfSpillSlotsNeeded() const { + return (type_ == Primitive::kPrimLong || type_ == Primitive::kPrimDouble) ? 2 : 1; } Location LiveInterval::ToLocation() const { @@ -494,10 +494,10 @@ Location LiveInterval::ToLocation() const { if (defined_by->IsConstant()) { return defined_by->GetLocations()->Out(); } else if (GetParent()->HasSpillSlot()) { - if (NeedsTwoSpillSlots()) { - return Location::DoubleStackSlot(GetParent()->GetSpillSlot()); - } else { - return Location::StackSlot(GetParent()->GetSpillSlot()); + switch (NumberOfSpillSlotsNeeded()) { + case 1: return Location::StackSlot(GetParent()->GetSpillSlot()); + case 2: return Location::DoubleStackSlot(GetParent()->GetSpillSlot()); + default: LOG(FATAL) << "Unexpected number of spill slots"; UNREACHABLE(); } } else { return Location(); diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h index 340d0ccefe..e9dffc1fac 100644 --- a/compiler/optimizing/ssa_liveness_analysis.h +++ b/compiler/optimizing/ssa_liveness_analysis.h @@ -762,9 +762,9 @@ class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> { // Returns kNoRegister otherwise. int FindHintAtDefinition() const; - // Returns whether the interval needs two (Dex virtual register size `kVRegSize`) - // slots for spilling. - bool NeedsTwoSpillSlots() const; + // Returns the number of required spilling slots (measured as a multiple of the + // Dex virtual register size `kVRegSize`). + size_t NumberOfSpillSlotsNeeded() const; bool IsFloatingPoint() const { return type_ == Primitive::kPrimFloat || type_ == Primitive::kPrimDouble; diff --git a/compiler/utils/assembler_test.h b/compiler/utils/assembler_test.h index 5c4875951b..d265a44092 100644 --- a/compiler/utils/assembler_test.h +++ b/compiler/utils/assembler_test.h @@ -42,7 +42,10 @@ enum class RegisterView { // private kUseQuaternaryName, }; -template<typename Ass, typename Reg, typename FPReg, typename Imm> +// For use in the template as the default type to get a nonvector registers version. +struct NoVectorRegs {}; + +template<typename Ass, typename Reg, typename FPReg, typename Imm, typename VecReg = NoVectorRegs> class AssemblerTest : public testing::Test { public: Ass* GetAssembler() { @@ -146,7 +149,8 @@ class AssemblerTest : public testing::Test { std::string (AssemblerTest::*GetName1)(const Reg1&), std::string (AssemblerTest::*GetName2)(const Reg2&), const std::string& fmt, - int bias = 0) { + int bias = 0, + int multiplier = 1) { std::string str; std::vector<int64_t> imms = CreateImmediateValuesBits(abs(imm_bits), (imm_bits > 0)); @@ -154,7 +158,7 @@ class AssemblerTest : public testing::Test { for (auto reg2 : reg2_registers) { for (int64_t imm : imms) { ImmType new_imm = CreateImmediate(imm); - (assembler_.get()->*f)(*reg1, *reg2, new_imm + bias); + (assembler_.get()->*f)(*reg1, *reg2, new_imm * multiplier + bias); std::string base = fmt; std::string reg1_string = (this->*GetName1)(*reg1); @@ -172,7 +176,7 @@ class AssemblerTest : public testing::Test { size_t imm_index = base.find(IMM_TOKEN); if (imm_index != std::string::npos) { std::ostringstream sreg; - sreg << imm + bias; + sreg << imm * multiplier + bias; std::string imm_string = sreg.str(); base.replace(imm_index, ConstexprStrLen(IMM_TOKEN), imm_string); } @@ -538,6 +542,69 @@ class AssemblerTest : public testing::Test { return str; } + std::string RepeatVV(void (Ass::*f)(VecReg, VecReg), const std::string& fmt) { + return RepeatTemplatedRegisters<VecReg, VecReg>(f, + GetVectorRegisters(), + GetVectorRegisters(), + &AssemblerTest::GetVecRegName, + &AssemblerTest::GetVecRegName, + fmt); + } + + std::string RepeatVVV(void (Ass::*f)(VecReg, VecReg, VecReg), const std::string& fmt) { + return RepeatTemplatedRegisters<VecReg, VecReg, VecReg>(f, + GetVectorRegisters(), + GetVectorRegisters(), + GetVectorRegisters(), + &AssemblerTest::GetVecRegName, + &AssemblerTest::GetVecRegName, + &AssemblerTest::GetVecRegName, + fmt); + } + + std::string RepeatVR(void (Ass::*f)(VecReg, Reg), const std::string& fmt) { + return RepeatTemplatedRegisters<VecReg, Reg>( + f, + GetVectorRegisters(), + GetRegisters(), + &AssemblerTest::GetVecRegName, + &AssemblerTest::GetRegName<RegisterView::kUsePrimaryName>, + fmt); + } + + template <typename ImmType> + std::string RepeatVRIb(void (Ass::*f)(VecReg, Reg, ImmType), + int imm_bits, + const std::string& fmt, + int bias = 0, + int multiplier = 1) { + return RepeatTemplatedRegistersImmBits<VecReg, Reg, ImmType>( + f, + imm_bits, + GetVectorRegisters(), + GetRegisters(), + &AssemblerTest::GetVecRegName, + &AssemblerTest::GetRegName<RegisterView::kUsePrimaryName>, + fmt, + bias, + multiplier); + } + + template <typename ImmType> + std::string RepeatVVIb(void (Ass::*f)(VecReg, VecReg, ImmType), + int imm_bits, + const std::string& fmt, + int bias = 0) { + return RepeatTemplatedRegistersImmBits<VecReg, VecReg, ImmType>(f, + imm_bits, + GetVectorRegisters(), + GetVectorRegisters(), + &AssemblerTest::GetVecRegName, + &AssemblerTest::GetVecRegName, + fmt, + bias); + } + // This is intended to be run as a test. bool CheckTools() { return test_helper_->CheckTools(); @@ -552,6 +619,11 @@ class AssemblerTest : public testing::Test { UNREACHABLE(); } + virtual std::vector<VecReg*> GetVectorRegisters() { + UNIMPLEMENTED(FATAL) << "Architecture does not support vector registers"; + UNREACHABLE(); + } + // Secondary register names are the secondary view on registers, e.g., 32b on 64b systems. virtual std::string GetSecondaryRegisterName(const Reg& reg ATTRIBUTE_UNUSED) { UNIMPLEMENTED(FATAL) << "Architecture does not support secondary registers"; @@ -971,6 +1043,12 @@ class AssemblerTest : public testing::Test { return sreg.str(); } + std::string GetVecRegName(const VecReg& reg) { + std::ostringstream sreg; + sreg << reg; + return sreg.str(); + } + // If the assembly file needs a header, return it in a sub-class. virtual const char* GetAssemblyHeader() { return nullptr; diff --git a/compiler/utils/mips64/assembler_mips64.cc b/compiler/utils/mips64/assembler_mips64.cc index c410365a8c..4e7f635246 100644 --- a/compiler/utils/mips64/assembler_mips64.cc +++ b/compiler/utils/mips64/assembler_mips64.cc @@ -184,6 +184,106 @@ void Mips64Assembler::EmitFI(int opcode, int fmt, FpuRegister ft, uint16_t imm) Emit(encoding); } +void Mips64Assembler::EmitMsa3R(int operation, + int df, + VectorRegister wt, + VectorRegister ws, + VectorRegister wd, + int minor_opcode) { + CHECK_NE(wt, kNoVectorRegister); + CHECK_NE(ws, kNoVectorRegister); + CHECK_NE(wd, kNoVectorRegister); + uint32_t encoding = static_cast<uint32_t>(kMsaMajorOpcode) << kOpcodeShift | + operation << kMsaOperationShift | + df << kDfShift | + static_cast<uint32_t>(wt) << kWtShift | + static_cast<uint32_t>(ws) << kWsShift | + static_cast<uint32_t>(wd) << kWdShift | + minor_opcode; + Emit(encoding); +} + +void Mips64Assembler::EmitMsaBIT(int operation, + int df_m, + VectorRegister ws, + VectorRegister wd, + int minor_opcode) { + CHECK_NE(ws, kNoVectorRegister); + CHECK_NE(wd, kNoVectorRegister); + uint32_t encoding = static_cast<uint32_t>(kMsaMajorOpcode) << kOpcodeShift | + operation << kMsaOperationShift | + df_m << kDfMShift | + static_cast<uint32_t>(ws) << kWsShift | + static_cast<uint32_t>(wd) << kWdShift | + minor_opcode; + Emit(encoding); +} + +void Mips64Assembler::EmitMsaELM(int operation, + int df_n, + VectorRegister ws, + VectorRegister wd, + int minor_opcode) { + CHECK_NE(ws, kNoVectorRegister); + CHECK_NE(wd, kNoVectorRegister); + uint32_t encoding = static_cast<uint32_t>(kMsaMajorOpcode) << kOpcodeShift | + operation << kMsaELMOperationShift | + df_n << kDfNShift | + static_cast<uint32_t>(ws) << kWsShift | + static_cast<uint32_t>(wd) << kWdShift | + minor_opcode; + Emit(encoding); +} + +void Mips64Assembler::EmitMsaMI10(int s10, + GpuRegister rs, + VectorRegister wd, + int minor_opcode, + int df) { + CHECK_NE(rs, kNoGpuRegister); + CHECK_NE(wd, kNoVectorRegister); + CHECK(IsUint<10>(s10)) << s10; + uint32_t encoding = static_cast<uint32_t>(kMsaMajorOpcode) << kOpcodeShift | + s10 << kS10Shift | + static_cast<uint32_t>(rs) << kWsShift | + static_cast<uint32_t>(wd) << kWdShift | + minor_opcode << kS10MinorShift | + df; + Emit(encoding); +} + +void Mips64Assembler::EmitMsa2R(int operation, + int df, + VectorRegister ws, + VectorRegister wd, + int minor_opcode) { + CHECK_NE(ws, kNoVectorRegister); + CHECK_NE(wd, kNoVectorRegister); + uint32_t encoding = static_cast<uint32_t>(kMsaMajorOpcode) << kOpcodeShift | + operation << kMsa2ROperationShift | + df << kDf2RShift | + static_cast<uint32_t>(ws) << kWsShift | + static_cast<uint32_t>(wd) << kWdShift | + minor_opcode; + Emit(encoding); +} + +void Mips64Assembler::EmitMsa2RF(int operation, + int df, + VectorRegister ws, + VectorRegister wd, + int minor_opcode) { + CHECK_NE(ws, kNoVectorRegister); + CHECK_NE(wd, kNoVectorRegister); + uint32_t encoding = static_cast<uint32_t>(kMsaMajorOpcode) << kOpcodeShift | + operation << kMsa2RFOperationShift | + df << kDf2RShift | + static_cast<uint32_t>(ws) << kWsShift | + static_cast<uint32_t>(wd) << kWdShift | + minor_opcode; + Emit(encoding); +} + void Mips64Assembler::Addu(GpuRegister rd, GpuRegister rs, GpuRegister rt) { EmitR(0, rs, rt, rd, 0, 0x21); } @@ -1080,6 +1180,378 @@ void Mips64Assembler::Not(GpuRegister rd, GpuRegister rs) { Nor(rd, rs, ZERO); } +// TODO: Check for MSA presence in Mips64InstructionSetFeatures for each MSA instruction. + +void Mips64Assembler::AndV(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + EmitMsa3R(0x0, 0x0, wt, ws, wd, 0x1e); +} + +void Mips64Assembler::OrV(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + EmitMsa3R(0x0, 0x1, wt, ws, wd, 0x1e); +} + +void Mips64Assembler::NorV(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + EmitMsa3R(0x0, 0x2, wt, ws, wd, 0x1e); +} + +void Mips64Assembler::XorV(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + EmitMsa3R(0x0, 0x3, wt, ws, wd, 0x1e); +} + +void Mips64Assembler::AddvB(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + EmitMsa3R(0x0, 0x0, wt, ws, wd, 0xe); +} + +void Mips64Assembler::AddvH(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + EmitMsa3R(0x0, 0x1, wt, ws, wd, 0xe); +} + +void Mips64Assembler::AddvW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + EmitMsa3R(0x0, 0x2, wt, ws, wd, 0xe); +} + +void Mips64Assembler::AddvD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + EmitMsa3R(0x0, 0x3, wt, ws, wd, 0xe); +} + +void Mips64Assembler::SubvB(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + EmitMsa3R(0x1, 0x0, wt, ws, wd, 0xe); +} + +void Mips64Assembler::SubvH(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + EmitMsa3R(0x1, 0x1, wt, ws, wd, 0xe); +} + +void Mips64Assembler::SubvW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + EmitMsa3R(0x1, 0x2, wt, ws, wd, 0xe); +} + +void Mips64Assembler::SubvD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + EmitMsa3R(0x1, 0x3, wt, ws, wd, 0xe); +} + +void Mips64Assembler::MulvB(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + EmitMsa3R(0x0, 0x0, wt, ws, wd, 0x12); +} + +void Mips64Assembler::MulvH(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + EmitMsa3R(0x0, 0x1, wt, ws, wd, 0x12); +} + +void Mips64Assembler::MulvW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + EmitMsa3R(0x0, 0x2, wt, ws, wd, 0x12); +} + +void Mips64Assembler::MulvD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + EmitMsa3R(0x0, 0x3, wt, ws, wd, 0x12); +} + +void Mips64Assembler::Div_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + EmitMsa3R(0x4, 0x0, wt, ws, wd, 0x12); +} + +void Mips64Assembler::Div_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + EmitMsa3R(0x4, 0x1, wt, ws, wd, 0x12); +} + +void Mips64Assembler::Div_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + EmitMsa3R(0x4, 0x2, wt, ws, wd, 0x12); +} + +void Mips64Assembler::Div_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + EmitMsa3R(0x4, 0x3, wt, ws, wd, 0x12); +} + +void Mips64Assembler::Div_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + EmitMsa3R(0x5, 0x0, wt, ws, wd, 0x12); +} + +void Mips64Assembler::Div_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + EmitMsa3R(0x5, 0x1, wt, ws, wd, 0x12); +} + +void Mips64Assembler::Div_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + EmitMsa3R(0x5, 0x2, wt, ws, wd, 0x12); +} + +void Mips64Assembler::Div_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + EmitMsa3R(0x5, 0x3, wt, ws, wd, 0x12); +} + +void Mips64Assembler::Mod_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + EmitMsa3R(0x6, 0x0, wt, ws, wd, 0x12); +} + +void Mips64Assembler::Mod_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + EmitMsa3R(0x6, 0x1, wt, ws, wd, 0x12); +} + +void Mips64Assembler::Mod_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + EmitMsa3R(0x6, 0x2, wt, ws, wd, 0x12); +} + +void Mips64Assembler::Mod_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + EmitMsa3R(0x6, 0x3, wt, ws, wd, 0x12); +} + +void Mips64Assembler::Mod_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + EmitMsa3R(0x7, 0x0, wt, ws, wd, 0x12); +} + +void Mips64Assembler::Mod_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + EmitMsa3R(0x7, 0x1, wt, ws, wd, 0x12); +} + +void Mips64Assembler::Mod_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + EmitMsa3R(0x7, 0x2, wt, ws, wd, 0x12); +} + +void Mips64Assembler::Mod_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + EmitMsa3R(0x7, 0x3, wt, ws, wd, 0x12); +} + +void Mips64Assembler::FaddW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + EmitMsa3R(0x0, 0x0, wt, ws, wd, 0x1b); +} + +void Mips64Assembler::FaddD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + EmitMsa3R(0x0, 0x1, wt, ws, wd, 0x1b); +} + +void Mips64Assembler::FsubW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + EmitMsa3R(0x0, 0x2, wt, ws, wd, 0x1b); +} + +void Mips64Assembler::FsubD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + EmitMsa3R(0x0, 0x3, wt, ws, wd, 0x1b); +} + +void Mips64Assembler::FmulW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + EmitMsa3R(0x1, 0x0, wt, ws, wd, 0x1b); +} + +void Mips64Assembler::FmulD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + EmitMsa3R(0x1, 0x1, wt, ws, wd, 0x1b); +} + +void Mips64Assembler::FdivW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + EmitMsa3R(0x1, 0x2, wt, ws, wd, 0x1b); +} + +void Mips64Assembler::FdivD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + EmitMsa3R(0x1, 0x3, wt, ws, wd, 0x1b); +} + +void Mips64Assembler::Ffint_sW(VectorRegister wd, VectorRegister ws) { + EmitMsa2RF(0x19e, 0x0, ws, wd, 0x1e); +} + +void Mips64Assembler::Ffint_sD(VectorRegister wd, VectorRegister ws) { + EmitMsa2RF(0x19e, 0x1, ws, wd, 0x1e); +} + +void Mips64Assembler::Ftint_sW(VectorRegister wd, VectorRegister ws) { + EmitMsa2RF(0x19c, 0x0, ws, wd, 0x1e); +} + +void Mips64Assembler::Ftint_sD(VectorRegister wd, VectorRegister ws) { + EmitMsa2RF(0x19c, 0x1, ws, wd, 0x1e); +} + +void Mips64Assembler::SllB(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + EmitMsa3R(0x0, 0x0, wt, ws, wd, 0xd); +} + +void Mips64Assembler::SllH(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + EmitMsa3R(0x0, 0x1, wt, ws, wd, 0xd); +} + +void Mips64Assembler::SllW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + EmitMsa3R(0x0, 0x2, wt, ws, wd, 0xd); +} + +void Mips64Assembler::SllD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + EmitMsa3R(0x0, 0x3, wt, ws, wd, 0xd); +} + +void Mips64Assembler::SraB(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + EmitMsa3R(0x1, 0x0, wt, ws, wd, 0xd); +} + +void Mips64Assembler::SraH(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + EmitMsa3R(0x1, 0x1, wt, ws, wd, 0xd); +} + +void Mips64Assembler::SraW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + EmitMsa3R(0x1, 0x2, wt, ws, wd, 0xd); +} + +void Mips64Assembler::SraD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + EmitMsa3R(0x1, 0x3, wt, ws, wd, 0xd); +} + +void Mips64Assembler::SrlB(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + EmitMsa3R(0x2, 0x0, wt, ws, wd, 0xd); +} + +void Mips64Assembler::SrlH(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + EmitMsa3R(0x2, 0x1, wt, ws, wd, 0xd); +} + +void Mips64Assembler::SrlW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + EmitMsa3R(0x2, 0x2, wt, ws, wd, 0xd); +} + +void Mips64Assembler::SrlD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + EmitMsa3R(0x2, 0x3, wt, ws, wd, 0xd); +} + +void Mips64Assembler::SlliB(VectorRegister wd, VectorRegister ws, int shamt3) { + CHECK(IsUint<3>(shamt3)) << shamt3; + EmitMsaBIT(0x0, shamt3 | kMsaDfMByteMask, ws, wd, 0x9); +} + +void Mips64Assembler::SlliH(VectorRegister wd, VectorRegister ws, int shamt4) { + CHECK(IsUint<4>(shamt4)) << shamt4; + EmitMsaBIT(0x0, shamt4 | kMsaDfMHalfwordMask, ws, wd, 0x9); +} + +void Mips64Assembler::SlliW(VectorRegister wd, VectorRegister ws, int shamt5) { + CHECK(IsUint<5>(shamt5)) << shamt5; + EmitMsaBIT(0x0, shamt5 | kMsaDfMWordMask, ws, wd, 0x9); +} + +void Mips64Assembler::SlliD(VectorRegister wd, VectorRegister ws, int shamt6) { + CHECK(IsUint<6>(shamt6)) << shamt6; + EmitMsaBIT(0x0, shamt6 | kMsaDfMDoublewordMask, ws, wd, 0x9); +} + +void Mips64Assembler::SraiB(VectorRegister wd, VectorRegister ws, int shamt3) { + CHECK(IsUint<3>(shamt3)) << shamt3; + EmitMsaBIT(0x1, shamt3 | kMsaDfMByteMask, ws, wd, 0x9); +} + +void Mips64Assembler::SraiH(VectorRegister wd, VectorRegister ws, int shamt4) { + CHECK(IsUint<4>(shamt4)) << shamt4; + EmitMsaBIT(0x1, shamt4 | kMsaDfMHalfwordMask, ws, wd, 0x9); +} + +void Mips64Assembler::SraiW(VectorRegister wd, VectorRegister ws, int shamt5) { + CHECK(IsUint<5>(shamt5)) << shamt5; + EmitMsaBIT(0x1, shamt5 | kMsaDfMWordMask, ws, wd, 0x9); +} + +void Mips64Assembler::SraiD(VectorRegister wd, VectorRegister ws, int shamt6) { + CHECK(IsUint<6>(shamt6)) << shamt6; + EmitMsaBIT(0x1, shamt6 | kMsaDfMDoublewordMask, ws, wd, 0x9); +} + +void Mips64Assembler::SrliB(VectorRegister wd, VectorRegister ws, int shamt3) { + CHECK(IsUint<3>(shamt3)) << shamt3; + EmitMsaBIT(0x2, shamt3 | kMsaDfMByteMask, ws, wd, 0x9); +} + +void Mips64Assembler::SrliH(VectorRegister wd, VectorRegister ws, int shamt4) { + CHECK(IsUint<4>(shamt4)) << shamt4; + EmitMsaBIT(0x2, shamt4 | kMsaDfMHalfwordMask, ws, wd, 0x9); +} + +void Mips64Assembler::SrliW(VectorRegister wd, VectorRegister ws, int shamt5) { + CHECK(IsUint<5>(shamt5)) << shamt5; + EmitMsaBIT(0x2, shamt5 | kMsaDfMWordMask, ws, wd, 0x9); +} + +void Mips64Assembler::SrliD(VectorRegister wd, VectorRegister ws, int shamt6) { + CHECK(IsUint<6>(shamt6)) << shamt6; + EmitMsaBIT(0x2, shamt6 | kMsaDfMDoublewordMask, ws, wd, 0x9); +} + +void Mips64Assembler::MoveV(VectorRegister wd, VectorRegister ws) { + EmitMsaBIT(0x1, 0x3e, ws, wd, 0x19); +} + +void Mips64Assembler::SplatiB(VectorRegister wd, VectorRegister ws, int n4) { + CHECK(IsUint<4>(n4)) << n4; + EmitMsaELM(0x1, n4 | kMsaDfNByteMask, ws, wd, 0x19); +} + +void Mips64Assembler::SplatiH(VectorRegister wd, VectorRegister ws, int n3) { + CHECK(IsUint<3>(n3)) << n3; + EmitMsaELM(0x1, n3 | kMsaDfNHalfwordMask, ws, wd, 0x19); +} + +void Mips64Assembler::SplatiW(VectorRegister wd, VectorRegister ws, int n2) { + CHECK(IsUint<2>(n2)) << n2; + EmitMsaELM(0x1, n2 | kMsaDfNWordMask, ws, wd, 0x19); +} + +void Mips64Assembler::SplatiD(VectorRegister wd, VectorRegister ws, int n1) { + CHECK(IsUint<1>(n1)) << n1; + EmitMsaELM(0x1, n1 | kMsaDfNDoublewordMask, ws, wd, 0x19); +} + +void Mips64Assembler::FillB(VectorRegister wd, GpuRegister rs) { + EmitMsa2R(0xc0, 0x0, static_cast<VectorRegister>(rs), wd, 0x1e); +} + +void Mips64Assembler::FillH(VectorRegister wd, GpuRegister rs) { + EmitMsa2R(0xc0, 0x1, static_cast<VectorRegister>(rs), wd, 0x1e); +} + +void Mips64Assembler::FillW(VectorRegister wd, GpuRegister rs) { + EmitMsa2R(0xc0, 0x2, static_cast<VectorRegister>(rs), wd, 0x1e); +} + +void Mips64Assembler::FillD(VectorRegister wd, GpuRegister rs) { + EmitMsa2R(0xc0, 0x3, static_cast<VectorRegister>(rs), wd, 0x1e); +} + +void Mips64Assembler::LdB(VectorRegister wd, GpuRegister rs, int offset) { + CHECK(IsInt<10>(offset)) << offset; + EmitMsaMI10(offset & kMsaS10Mask, rs, wd, 0x8, 0x0); +} + +void Mips64Assembler::LdH(VectorRegister wd, GpuRegister rs, int offset) { + CHECK(IsInt<11>(offset)) << offset; + CHECK_ALIGNED(offset, kMips64HalfwordSize); + EmitMsaMI10((offset >> TIMES_2) & kMsaS10Mask, rs, wd, 0x8, 0x1); +} + +void Mips64Assembler::LdW(VectorRegister wd, GpuRegister rs, int offset) { + CHECK(IsInt<12>(offset)) << offset; + CHECK_ALIGNED(offset, kMips64WordSize); + EmitMsaMI10((offset >> TIMES_4) & kMsaS10Mask, rs, wd, 0x8, 0x2); +} + +void Mips64Assembler::LdD(VectorRegister wd, GpuRegister rs, int offset) { + CHECK(IsInt<13>(offset)) << offset; + CHECK_ALIGNED(offset, kMips64DoublewordSize); + EmitMsaMI10((offset >> TIMES_8) & kMsaS10Mask, rs, wd, 0x8, 0x3); +} + +void Mips64Assembler::StB(VectorRegister wd, GpuRegister rs, int offset) { + CHECK(IsInt<10>(offset)) << offset; + EmitMsaMI10(offset & kMsaS10Mask, rs, wd, 0x9, 0x0); +} + +void Mips64Assembler::StH(VectorRegister wd, GpuRegister rs, int offset) { + CHECK(IsInt<11>(offset)) << offset; + CHECK_ALIGNED(offset, kMips64HalfwordSize); + EmitMsaMI10((offset >> TIMES_2) & kMsaS10Mask, rs, wd, 0x9, 0x1); +} + +void Mips64Assembler::StW(VectorRegister wd, GpuRegister rs, int offset) { + CHECK(IsInt<12>(offset)) << offset; + CHECK_ALIGNED(offset, kMips64WordSize); + EmitMsaMI10((offset >> TIMES_4) & kMsaS10Mask, rs, wd, 0x9, 0x2); +} + +void Mips64Assembler::StD(VectorRegister wd, GpuRegister rs, int offset) { + CHECK(IsInt<13>(offset)) << offset; + CHECK_ALIGNED(offset, kMips64DoublewordSize); + EmitMsaMI10((offset >> TIMES_8) & kMsaS10Mask, rs, wd, 0x9, 0x3); +} + void Mips64Assembler::LoadConst32(GpuRegister rd, int32_t value) { TemplateLoadConst32(this, rd, value); } diff --git a/compiler/utils/mips64/assembler_mips64.h b/compiler/utils/mips64/assembler_mips64.h index 2c5072efe9..f42c1626df 100644 --- a/compiler/utils/mips64/assembler_mips64.h +++ b/compiler/utils/mips64/assembler_mips64.h @@ -266,6 +266,7 @@ void TemplateLoadConst64(Asm* a, Rtype rd, Vtype value) { } } +static constexpr size_t kMips64HalfwordSize = 2; static constexpr size_t kMips64WordSize = 4; static constexpr size_t kMips64DoublewordSize = 8; @@ -644,6 +645,101 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer void Clear(GpuRegister rd); void Not(GpuRegister rd, GpuRegister rs); + // MSA instructions. + void AndV(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void OrV(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void NorV(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void XorV(VectorRegister wd, VectorRegister ws, VectorRegister wt); + + void AddvB(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void AddvH(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void AddvW(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void AddvD(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void SubvB(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void SubvH(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void SubvW(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void SubvD(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void MulvB(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void MulvH(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void MulvW(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void MulvD(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Div_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Div_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Div_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Div_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Div_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Div_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Div_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Div_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Mod_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Mod_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Mod_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Mod_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Mod_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Mod_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Mod_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Mod_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt); + + void FaddW(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void FaddD(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void FsubW(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void FsubD(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void FmulW(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void FmulD(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void FdivW(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void FdivD(VectorRegister wd, VectorRegister ws, VectorRegister wt); + + void Ffint_sW(VectorRegister wd, VectorRegister ws); + void Ffint_sD(VectorRegister wd, VectorRegister ws); + void Ftint_sW(VectorRegister wd, VectorRegister ws); + void Ftint_sD(VectorRegister wd, VectorRegister ws); + + void SllB(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void SllH(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void SllW(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void SllD(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void SraB(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void SraH(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void SraW(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void SraD(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void SrlB(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void SrlH(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void SrlW(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void SrlD(VectorRegister wd, VectorRegister ws, VectorRegister wt); + + // Immediate shift instructions, where shamtN denotes shift amount (must be between 0 and 2^N-1). + void SlliB(VectorRegister wd, VectorRegister ws, int shamt3); + void SlliH(VectorRegister wd, VectorRegister ws, int shamt4); + void SlliW(VectorRegister wd, VectorRegister ws, int shamt5); + void SlliD(VectorRegister wd, VectorRegister ws, int shamt6); + void SraiB(VectorRegister wd, VectorRegister ws, int shamt3); + void SraiH(VectorRegister wd, VectorRegister ws, int shamt4); + void SraiW(VectorRegister wd, VectorRegister ws, int shamt5); + void SraiD(VectorRegister wd, VectorRegister ws, int shamt6); + void SrliB(VectorRegister wd, VectorRegister ws, int shamt3); + void SrliH(VectorRegister wd, VectorRegister ws, int shamt4); + void SrliW(VectorRegister wd, VectorRegister ws, int shamt5); + void SrliD(VectorRegister wd, VectorRegister ws, int shamt6); + + void MoveV(VectorRegister wd, VectorRegister ws); + void SplatiB(VectorRegister wd, VectorRegister ws, int n4); + void SplatiH(VectorRegister wd, VectorRegister ws, int n3); + void SplatiW(VectorRegister wd, VectorRegister ws, int n2); + void SplatiD(VectorRegister wd, VectorRegister ws, int n1); + void FillB(VectorRegister wd, GpuRegister rs); + void FillH(VectorRegister wd, GpuRegister rs); + void FillW(VectorRegister wd, GpuRegister rs); + void FillD(VectorRegister wd, GpuRegister rs); + + void LdB(VectorRegister wd, GpuRegister rs, int offset); + void LdH(VectorRegister wd, GpuRegister rs, int offset); + void LdW(VectorRegister wd, GpuRegister rs, int offset); + void LdD(VectorRegister wd, GpuRegister rs, int offset); + void StB(VectorRegister wd, GpuRegister rs, int offset); + void StH(VectorRegister wd, GpuRegister rs, int offset); + void StW(VectorRegister wd, GpuRegister rs, int offset); + void StD(VectorRegister wd, GpuRegister rs, int offset); + // Higher level composite instructions. int InstrCountForLoadReplicatedConst32(int64_t); void LoadConst32(GpuRegister rd, int32_t value); @@ -1349,6 +1445,17 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer void EmitFR(int opcode, int fmt, FpuRegister ft, FpuRegister fs, FpuRegister fd, int funct); void EmitFI(int opcode, int fmt, FpuRegister rt, uint16_t imm); void EmitBcondc(BranchCondition cond, GpuRegister rs, GpuRegister rt, uint32_t imm16_21); + void EmitMsa3R(int operation, + int df, + VectorRegister wt, + VectorRegister ws, + VectorRegister wd, + int minor_opcode); + void EmitMsaBIT(int operation, int df_m, VectorRegister ws, VectorRegister wd, int minor_opcode); + void EmitMsaELM(int operation, int df_n, VectorRegister ws, VectorRegister wd, int minor_opcode); + void EmitMsaMI10(int s10, GpuRegister rs, VectorRegister wd, int minor_opcode, int df); + void EmitMsa2R(int operation, int df, VectorRegister ws, VectorRegister wd, int minor_opcode); + void EmitMsa2RF(int operation, int df, VectorRegister ws, VectorRegister wd, int minor_opcode); void Buncond(Mips64Label* label); void Bcond(Mips64Label* label, diff --git a/compiler/utils/mips64/assembler_mips64_test.cc b/compiler/utils/mips64/assembler_mips64_test.cc index e5d3605b01..12660ce85d 100644 --- a/compiler/utils/mips64/assembler_mips64_test.cc +++ b/compiler/utils/mips64/assembler_mips64_test.cc @@ -37,12 +37,14 @@ struct MIPS64CpuRegisterCompare { class AssemblerMIPS64Test : public AssemblerTest<mips64::Mips64Assembler, mips64::GpuRegister, mips64::FpuRegister, - uint32_t> { + uint32_t, + mips64::VectorRegister> { public: typedef AssemblerTest<mips64::Mips64Assembler, mips64::GpuRegister, mips64::FpuRegister, - uint32_t> Base; + uint32_t, + mips64::VectorRegister> Base; protected: // Get the typically used name for this architecture, e.g., aarch64, x86-64, ... @@ -60,7 +62,7 @@ class AssemblerMIPS64Test : public AssemblerTest<mips64::Mips64Assembler, // (and MIPS32R6) with the GNU assembler don't have correct final offsets in PC-relative // branches in the .text section and so they require a relocation pass (there's a relocation // section, .rela.text, that has the needed info to fix up the branches). - return " -march=mips64r6 -Wa,--no-warn -Wl,-Ttext=0 -Wl,-e0 -nostdlib"; + return " -march=mips64r6 -mmsa -Wa,--no-warn -Wl,-Ttext=0 -Wl,-e0 -nostdlib"; } void Pad(std::vector<uint8_t>& data) OVERRIDE { @@ -176,6 +178,39 @@ class AssemblerMIPS64Test : public AssemblerTest<mips64::Mips64Assembler, fp_registers_.push_back(new mips64::FpuRegister(mips64::F29)); fp_registers_.push_back(new mips64::FpuRegister(mips64::F30)); fp_registers_.push_back(new mips64::FpuRegister(mips64::F31)); + + vec_registers_.push_back(new mips64::VectorRegister(mips64::W0)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W1)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W2)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W3)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W4)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W5)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W6)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W7)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W8)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W9)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W10)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W11)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W12)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W13)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W14)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W15)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W16)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W17)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W18)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W19)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W20)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W21)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W22)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W23)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W24)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W25)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W26)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W27)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W28)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W29)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W30)); + vec_registers_.push_back(new mips64::VectorRegister(mips64::W31)); } } @@ -183,6 +218,7 @@ class AssemblerMIPS64Test : public AssemblerTest<mips64::Mips64Assembler, AssemblerTest::TearDown(); STLDeleteElements(®isters_); STLDeleteElements(&fp_registers_); + STLDeleteElements(&vec_registers_); } std::vector<mips64::GpuRegister*> GetRegisters() OVERRIDE { @@ -193,6 +229,10 @@ class AssemblerMIPS64Test : public AssemblerTest<mips64::Mips64Assembler, return fp_registers_; } + std::vector<mips64::VectorRegister*> GetVectorRegisters() OVERRIDE { + return vec_registers_; + } + uint32_t CreateImmediate(int64_t imm_value) OVERRIDE { return imm_value; } @@ -272,6 +312,7 @@ class AssemblerMIPS64Test : public AssemblerTest<mips64::Mips64Assembler, std::map<mips64::GpuRegister, std::string, MIPS64CpuRegisterCompare> secondary_register_names_; std::vector<mips64::FpuRegister*> fp_registers_; + std::vector<mips64::VectorRegister*> vec_registers_; }; @@ -2461,6 +2502,370 @@ TEST_F(AssemblerMIPS64Test, LoadConst64) { EXPECT_EQ(tester.GetPathsCovered(), art::mips64::kLoadConst64PathAllPaths); } +// MSA instructions. + +TEST_F(AssemblerMIPS64Test, AndV) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::AndV, "and.v ${reg1}, ${reg2}, ${reg3}"), "and.v"); +} + +TEST_F(AssemblerMIPS64Test, OrV) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::OrV, "or.v ${reg1}, ${reg2}, ${reg3}"), "or.v"); +} + +TEST_F(AssemblerMIPS64Test, NorV) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::NorV, "nor.v ${reg1}, ${reg2}, ${reg3}"), "nor.v"); +} + +TEST_F(AssemblerMIPS64Test, XorV) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::XorV, "xor.v ${reg1}, ${reg2}, ${reg3}"), "xor.v"); +} + +TEST_F(AssemblerMIPS64Test, AddvB) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::AddvB, "addv.b ${reg1}, ${reg2}, ${reg3}"), + "addv.b"); +} + +TEST_F(AssemblerMIPS64Test, AddvH) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::AddvH, "addv.h ${reg1}, ${reg2}, ${reg3}"), + "addv.h"); +} + +TEST_F(AssemblerMIPS64Test, AddvW) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::AddvW, "addv.w ${reg1}, ${reg2}, ${reg3}"), + "addv.w"); +} + +TEST_F(AssemblerMIPS64Test, AddvD) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::AddvD, "addv.d ${reg1}, ${reg2}, ${reg3}"), + "addv.d"); +} + +TEST_F(AssemblerMIPS64Test, SubvB) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::SubvB, "subv.b ${reg1}, ${reg2}, ${reg3}"), + "subv.b"); +} + +TEST_F(AssemblerMIPS64Test, SubvH) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::SubvH, "subv.h ${reg1}, ${reg2}, ${reg3}"), + "subv.h"); +} + +TEST_F(AssemblerMIPS64Test, SubvW) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::SubvW, "subv.w ${reg1}, ${reg2}, ${reg3}"), + "subv.w"); +} + +TEST_F(AssemblerMIPS64Test, SubvD) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::SubvD, "subv.d ${reg1}, ${reg2}, ${reg3}"), + "subv.d"); +} + +TEST_F(AssemblerMIPS64Test, MulvB) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::MulvB, "mulv.b ${reg1}, ${reg2}, ${reg3}"), + "mulv.b"); +} + +TEST_F(AssemblerMIPS64Test, MulvH) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::MulvH, "mulv.h ${reg1}, ${reg2}, ${reg3}"), + "mulv.h"); +} + +TEST_F(AssemblerMIPS64Test, MulvW) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::MulvW, "mulv.w ${reg1}, ${reg2}, ${reg3}"), + "mulv.w"); +} + +TEST_F(AssemblerMIPS64Test, MulvD) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::MulvD, "mulv.d ${reg1}, ${reg2}, ${reg3}"), + "mulv.d"); +} + +TEST_F(AssemblerMIPS64Test, Div_sB) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Div_sB, "div_s.b ${reg1}, ${reg2}, ${reg3}"), + "div_s.b"); +} + +TEST_F(AssemblerMIPS64Test, Div_sH) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Div_sH, "div_s.h ${reg1}, ${reg2}, ${reg3}"), + "div_s.h"); +} + +TEST_F(AssemblerMIPS64Test, Div_sW) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Div_sW, "div_s.w ${reg1}, ${reg2}, ${reg3}"), + "div_s.w"); +} + +TEST_F(AssemblerMIPS64Test, Div_sD) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Div_sD, "div_s.d ${reg1}, ${reg2}, ${reg3}"), + "div_s.d"); +} + +TEST_F(AssemblerMIPS64Test, Div_uB) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Div_uB, "div_u.b ${reg1}, ${reg2}, ${reg3}"), + "div_u.b"); +} + +TEST_F(AssemblerMIPS64Test, Div_uH) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Div_uH, "div_u.h ${reg1}, ${reg2}, ${reg3}"), + "div_u.h"); +} + +TEST_F(AssemblerMIPS64Test, Div_uW) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Div_uW, "div_u.w ${reg1}, ${reg2}, ${reg3}"), + "div_u.w"); +} + +TEST_F(AssemblerMIPS64Test, Div_uD) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Div_uD, "div_u.d ${reg1}, ${reg2}, ${reg3}"), + "div_u.d"); +} + +TEST_F(AssemblerMIPS64Test, Mod_sB) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Mod_sB, "mod_s.b ${reg1}, ${reg2}, ${reg3}"), + "mod_s.b"); +} + +TEST_F(AssemblerMIPS64Test, Mod_sH) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Mod_sH, "mod_s.h ${reg1}, ${reg2}, ${reg3}"), + "mod_s.h"); +} + +TEST_F(AssemblerMIPS64Test, Mod_sW) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Mod_sW, "mod_s.w ${reg1}, ${reg2}, ${reg3}"), + "mod_s.w"); +} + +TEST_F(AssemblerMIPS64Test, Mod_sD) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Mod_sD, "mod_s.d ${reg1}, ${reg2}, ${reg3}"), + "mod_s.d"); +} + +TEST_F(AssemblerMIPS64Test, Mod_uB) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Mod_uB, "mod_u.b ${reg1}, ${reg2}, ${reg3}"), + "mod_u.b"); +} + +TEST_F(AssemblerMIPS64Test, Mod_uH) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Mod_uH, "mod_u.h ${reg1}, ${reg2}, ${reg3}"), + "mod_u.h"); +} + +TEST_F(AssemblerMIPS64Test, Mod_uW) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Mod_uW, "mod_u.w ${reg1}, ${reg2}, ${reg3}"), + "mod_u.w"); +} + +TEST_F(AssemblerMIPS64Test, Mod_uD) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Mod_uD, "mod_u.d ${reg1}, ${reg2}, ${reg3}"), + "mod_u.d"); +} + +TEST_F(AssemblerMIPS64Test, FaddW) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::FaddW, "fadd.w ${reg1}, ${reg2}, ${reg3}"), + "fadd.w"); +} + +TEST_F(AssemblerMIPS64Test, FaddD) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::FaddD, "fadd.d ${reg1}, ${reg2}, ${reg3}"), + "fadd.d"); +} + +TEST_F(AssemblerMIPS64Test, FsubW) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::FsubW, "fsub.w ${reg1}, ${reg2}, ${reg3}"), + "fsub.w"); +} + +TEST_F(AssemblerMIPS64Test, FsubD) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::FsubD, "fsub.d ${reg1}, ${reg2}, ${reg3}"), + "fsub.d"); +} + +TEST_F(AssemblerMIPS64Test, FmulW) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::FmulW, "fmul.w ${reg1}, ${reg2}, ${reg3}"), + "fmul.w"); +} + +TEST_F(AssemblerMIPS64Test, FmulD) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::FmulD, "fmul.d ${reg1}, ${reg2}, ${reg3}"), + "fmul.d"); +} + +TEST_F(AssemblerMIPS64Test, FdivW) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::FdivW, "fdiv.w ${reg1}, ${reg2}, ${reg3}"), + "fdiv.w"); +} + +TEST_F(AssemblerMIPS64Test, FdivD) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::FdivD, "fdiv.d ${reg1}, ${reg2}, ${reg3}"), + "fdiv.d"); +} + +TEST_F(AssemblerMIPS64Test, Ffint_sW) { + DriverStr(RepeatVV(&mips64::Mips64Assembler::Ffint_sW, "ffint_s.w ${reg1}, ${reg2}"), + "ffint_s.w"); +} + +TEST_F(AssemblerMIPS64Test, Ffint_sD) { + DriverStr(RepeatVV(&mips64::Mips64Assembler::Ffint_sD, "ffint_s.d ${reg1}, ${reg2}"), + "ffint_s.d"); +} + +TEST_F(AssemblerMIPS64Test, Ftint_sW) { + DriverStr(RepeatVV(&mips64::Mips64Assembler::Ftint_sW, "ftint_s.w ${reg1}, ${reg2}"), + "ftint_s.w"); +} + +TEST_F(AssemblerMIPS64Test, Ftint_sD) { + DriverStr(RepeatVV(&mips64::Mips64Assembler::Ftint_sD, "ftint_s.d ${reg1}, ${reg2}"), + "ftint_s.d"); +} + +TEST_F(AssemblerMIPS64Test, SllB) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::SllB, "sll.b ${reg1}, ${reg2}, ${reg3}"), "sll.b"); +} + +TEST_F(AssemblerMIPS64Test, SllH) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::SllH, "sll.h ${reg1}, ${reg2}, ${reg3}"), "sll.h"); +} + +TEST_F(AssemblerMIPS64Test, SllW) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::SllW, "sll.w ${reg1}, ${reg2}, ${reg3}"), "sll.w"); +} + +TEST_F(AssemblerMIPS64Test, SllD) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::SllD, "sll.d ${reg1}, ${reg2}, ${reg3}"), "sll.d"); +} + +TEST_F(AssemblerMIPS64Test, SraB) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::SraB, "sra.b ${reg1}, ${reg2}, ${reg3}"), "sra.b"); +} + +TEST_F(AssemblerMIPS64Test, SraH) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::SraH, "sra.h ${reg1}, ${reg2}, ${reg3}"), "sra.h"); +} + +TEST_F(AssemblerMIPS64Test, SraW) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::SraW, "sra.w ${reg1}, ${reg2}, ${reg3}"), "sra.w"); +} + +TEST_F(AssemblerMIPS64Test, SraD) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::SraD, "sra.d ${reg1}, ${reg2}, ${reg3}"), "sra.d"); +} + +TEST_F(AssemblerMIPS64Test, SrlB) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::SrlB, "srl.b ${reg1}, ${reg2}, ${reg3}"), "srl.b"); +} + +TEST_F(AssemblerMIPS64Test, SrlH) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::SrlH, "srl.h ${reg1}, ${reg2}, ${reg3}"), "srl.h"); +} + +TEST_F(AssemblerMIPS64Test, SrlW) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::SrlW, "srl.w ${reg1}, ${reg2}, ${reg3}"), "srl.w"); +} + +TEST_F(AssemblerMIPS64Test, SrlD) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::SrlD, "srl.d ${reg1}, ${reg2}, ${reg3}"), "srl.d"); +} + +TEST_F(AssemblerMIPS64Test, SlliB) { + DriverStr(RepeatVVIb(&mips64::Mips64Assembler::SlliB, 3, "slli.b ${reg1}, ${reg2}, {imm}"), + "slli.b"); +} + +TEST_F(AssemblerMIPS64Test, SlliH) { + DriverStr(RepeatVVIb(&mips64::Mips64Assembler::SlliH, 4, "slli.h ${reg1}, ${reg2}, {imm}"), + "slli.h"); +} + +TEST_F(AssemblerMIPS64Test, SlliW) { + DriverStr(RepeatVVIb(&mips64::Mips64Assembler::SlliW, 5, "slli.w ${reg1}, ${reg2}, {imm}"), + "slli.w"); +} + +TEST_F(AssemblerMIPS64Test, SlliD) { + DriverStr(RepeatVVIb(&mips64::Mips64Assembler::SlliD, 6, "slli.d ${reg1}, ${reg2}, {imm}"), + "slli.d"); +} + +TEST_F(AssemblerMIPS64Test, MoveV) { + DriverStr(RepeatVV(&mips64::Mips64Assembler::MoveV, "move.v ${reg1}, ${reg2}"), "move.v"); +} + +TEST_F(AssemblerMIPS64Test, SplatiB) { + DriverStr(RepeatVVIb(&mips64::Mips64Assembler::SplatiB, 4, "splati.b ${reg1}, ${reg2}[{imm}]"), + "splati.b"); +} + +TEST_F(AssemblerMIPS64Test, SplatiH) { + DriverStr(RepeatVVIb(&mips64::Mips64Assembler::SplatiH, 3, "splati.h ${reg1}, ${reg2}[{imm}]"), + "splati.h"); +} + +TEST_F(AssemblerMIPS64Test, SplatiW) { + DriverStr(RepeatVVIb(&mips64::Mips64Assembler::SplatiW, 2, "splati.w ${reg1}, ${reg2}[{imm}]"), + "splati.w"); +} + +TEST_F(AssemblerMIPS64Test, SplatiD) { + DriverStr(RepeatVVIb(&mips64::Mips64Assembler::SplatiD, 1, "splati.d ${reg1}, ${reg2}[{imm}]"), + "splati.d"); +} + +TEST_F(AssemblerMIPS64Test, FillB) { + DriverStr(RepeatVR(&mips64::Mips64Assembler::FillB, "fill.b ${reg1}, ${reg2}"), "fill.b"); +} + +TEST_F(AssemblerMIPS64Test, FillH) { + DriverStr(RepeatVR(&mips64::Mips64Assembler::FillH, "fill.h ${reg1}, ${reg2}"), "fill.h"); +} + +TEST_F(AssemblerMIPS64Test, FillW) { + DriverStr(RepeatVR(&mips64::Mips64Assembler::FillW, "fill.w ${reg1}, ${reg2}"), "fill.w"); +} + +TEST_F(AssemblerMIPS64Test, FillD) { + DriverStr(RepeatVR(&mips64::Mips64Assembler::FillD, "fill.d ${reg1}, ${reg2}"), "fill.d"); +} + +TEST_F(AssemblerMIPS64Test, LdB) { + DriverStr(RepeatVRIb(&mips64::Mips64Assembler::LdB, -10, "ld.b ${reg1}, {imm}(${reg2})"), "ld.b"); +} + +TEST_F(AssemblerMIPS64Test, LdH) { + DriverStr(RepeatVRIb(&mips64::Mips64Assembler::LdH, -10, "ld.h ${reg1}, {imm}(${reg2})", 0, 2), + "ld.h"); +} + +TEST_F(AssemblerMIPS64Test, LdW) { + DriverStr(RepeatVRIb(&mips64::Mips64Assembler::LdW, -10, "ld.w ${reg1}, {imm}(${reg2})", 0, 4), + "ld.w"); +} + +TEST_F(AssemblerMIPS64Test, LdD) { + DriverStr(RepeatVRIb(&mips64::Mips64Assembler::LdD, -10, "ld.d ${reg1}, {imm}(${reg2})", 0, 8), + "ld.d"); +} + +TEST_F(AssemblerMIPS64Test, StB) { + DriverStr(RepeatVRIb(&mips64::Mips64Assembler::StB, -10, "st.b ${reg1}, {imm}(${reg2})"), "st.b"); +} + +TEST_F(AssemblerMIPS64Test, StH) { + DriverStr(RepeatVRIb(&mips64::Mips64Assembler::StH, -10, "st.h ${reg1}, {imm}(${reg2})", 0, 2), + "st.h"); +} + +TEST_F(AssemblerMIPS64Test, StW) { + DriverStr(RepeatVRIb(&mips64::Mips64Assembler::StW, -10, "st.w ${reg1}, {imm}(${reg2})", 0, 4), + "st.w"); +} + +TEST_F(AssemblerMIPS64Test, StD) { + DriverStr(RepeatVRIb(&mips64::Mips64Assembler::StD, -10, "st.d ${reg1}, {imm}(${reg2})", 0, 8), + "st.d"); +} + #undef __ } // namespace art diff --git a/compiler/utils/mips64/constants_mips64.h b/compiler/utils/mips64/constants_mips64.h index f57498d34f..5ae9c73589 100644 --- a/compiler/utils/mips64/constants_mips64.h +++ b/compiler/utils/mips64/constants_mips64.h @@ -51,8 +51,35 @@ enum InstructionFields { kFdShift = 6, kFdBits = 5, + kMsaOperationShift = 23, + kMsaELMOperationShift = 22, + kMsa2ROperationShift = 18, + kMsa2RFOperationShift = 17, + kDfShift = 21, + kDfMShift = 16, + kDf2RShift = 16, + kDfNShift = 16, + kWtShift = 16, + kWtBits = 5, + kWsShift = 11, + kWsBits = 5, + kWdShift = 6, + kWdBits = 5, + kS10Shift = 16, + kS10MinorShift = 2, + kBranchOffsetMask = 0x0000ffff, kJumpOffsetMask = 0x03ffffff, + kMsaMajorOpcode = 0x1e, + kMsaDfMByteMask = 0x70, + kMsaDfMHalfwordMask = 0x60, + kMsaDfMWordMask = 0x40, + kMsaDfMDoublewordMask = 0x00, + kMsaDfNByteMask = 0x00, + kMsaDfNHalfwordMask = 0x20, + kMsaDfNWordMask = 0x30, + kMsaDfNDoublewordMask = 0x38, + kMsaS10Mask = 0x3ff, }; enum ScaleFactor { diff --git a/dex2oat/Android.bp b/dex2oat/Android.bp index 0924aec7f1..048f36d76c 100644 --- a/dex2oat/Android.bp +++ b/dex2oat/Android.bp @@ -14,6 +14,12 @@ // limitations under the License. // +cc_library_headers { + name: "dex2oat_headers", + host_supported: true, + export_include_dirs: ["include"], +} + cc_defaults { name: "dex2oat-defaults", host_supported: true, @@ -40,6 +46,7 @@ cc_defaults { include_dirs: [ "art/cmdline", ], + header_libs: ["dex2oat_headers"], } art_cc_binary { @@ -132,4 +139,5 @@ art_cc_test { "art_gtest_defaults", ], srcs: ["dex2oat_test.cc"], + header_libs: ["dex2oat_headers"], } diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc index 92a12c8d07..e80be8172a 100644 --- a/dex2oat/dex2oat.cc +++ b/dex2oat/dex2oat.cc @@ -54,6 +54,7 @@ #include "debug/method_debug_info.h" #include "dex/quick_compiler_callbacks.h" #include "dex/verification_results.h" +#include "dex2oat_return_codes.h" #include "dex_file-inl.h" #include "driver/compiler_driver.h" #include "driver/compiler_options.h" @@ -1442,11 +1443,11 @@ class Dex2Oat FINAL { // Set up the environment for compilation. Includes starting the runtime and loading/opening the // boot class path. - bool Setup() { + dex2oat::ReturnCode Setup() { TimingLogger::ScopedTiming t("dex2oat Setup", timings_); if (!PrepareImageClasses() || !PrepareCompiledClasses() || !PrepareCompiledMethods()) { - return false; + return dex2oat::ReturnCode::kOther; } verification_results_.reset(new VerificationResults(compiler_options_.get())); @@ -1458,12 +1459,12 @@ class Dex2Oat FINAL { RuntimeArgumentMap runtime_options; if (!PrepareRuntimeOptions(&runtime_options)) { - return false; + return dex2oat::ReturnCode::kOther; } CreateOatWriters(); if (!AddDexFileSources()) { - return false; + return dex2oat::ReturnCode::kOther; } if (IsBootImage() && image_filenames_.size() > 1) { @@ -1479,7 +1480,7 @@ class Dex2Oat FINAL { // When compiling an app, create the runtime early to retrieve // the image location key needed for the oat header. if (!CreateRuntime(std::move(runtime_options))) { - return false; + return dex2oat::ReturnCode::kCreateRuntime; } if (CompilerFilter::DependsOnImageChecksum(compiler_options_->GetCompilerFilter())) { @@ -1550,7 +1551,7 @@ class Dex2Oat FINAL { update_input_vdex_, &opened_dex_files_map, &opened_dex_files)) { - return false; + return dex2oat::ReturnCode::kOther; } dex_files_per_oat_file_.push_back(MakeNonOwningPointerVector(opened_dex_files)); if (opened_dex_files_map != nullptr) { @@ -1602,7 +1603,7 @@ class Dex2Oat FINAL { // Note: Runtime acquires ownership of these dex files. runtime_options.Set(RuntimeArgumentMap::BootClassPathDexList, &opened_dex_files_); if (!CreateRuntime(std::move(runtime_options))) { - return false; + return dex2oat::ReturnCode::kOther; } } @@ -1636,7 +1637,7 @@ class Dex2Oat FINAL { for (const std::unique_ptr<MemMap>& map : opened_dex_files_maps_) { if (!map->Protect(PROT_READ | PROT_WRITE)) { PLOG(ERROR) << "Failed to make .dex files writeable."; - return false; + return dex2oat::ReturnCode::kOther; } } @@ -1651,14 +1652,14 @@ class Dex2Oat FINAL { soa.Self()->AssertPendingException(); soa.Self()->ClearException(); PLOG(ERROR) << "Failed to register dex file."; - return false; + return dex2oat::ReturnCode::kOther; } // Pre-register dex files so that we can access verification results without locks during // compilation and verification. verification_results_->AddDexFile(dex_file); } - return true; + return dex2oat::ReturnCode::kNoFailure; } // If we need to keep the oat file open for the image writer. @@ -2789,13 +2790,13 @@ static void b13564922() { #endif } -static int CompileImage(Dex2Oat& dex2oat) { +static dex2oat::ReturnCode CompileImage(Dex2Oat& dex2oat) { dex2oat.LoadClassProfileDescriptors(); dex2oat.Compile(); if (!dex2oat.WriteOutputFiles()) { dex2oat.EraseOutputFiles(); - return EXIT_FAILURE; + return dex2oat::ReturnCode::kOther; } // Flush boot.oat. We always expect the output file by name, and it will be re-opened from the @@ -2804,46 +2805,46 @@ static int CompileImage(Dex2Oat& dex2oat) { if (dex2oat.ShouldKeepOatFileOpen()) { if (!dex2oat.FlushOutputFiles()) { dex2oat.EraseOutputFiles(); - return EXIT_FAILURE; + return dex2oat::ReturnCode::kOther; } } else if (!dex2oat.FlushCloseOutputFiles()) { - return EXIT_FAILURE; + return dex2oat::ReturnCode::kOther; } // Creates the boot.art and patches the oat files. if (!dex2oat.HandleImage()) { - return EXIT_FAILURE; + return dex2oat::ReturnCode::kOther; } // When given --host, finish early without stripping. if (dex2oat.IsHost()) { if (!dex2oat.FlushCloseOutputFiles()) { - return EXIT_FAILURE; + return dex2oat::ReturnCode::kOther; } dex2oat.DumpTiming(); - return EXIT_SUCCESS; + return dex2oat::ReturnCode::kNoFailure; } // Copy stripped to unstripped location, if necessary. if (!dex2oat.CopyStrippedToUnstripped()) { - return EXIT_FAILURE; + return dex2oat::ReturnCode::kOther; } // FlushClose again, as stripping might have re-opened the oat files. if (!dex2oat.FlushCloseOutputFiles()) { - return EXIT_FAILURE; + return dex2oat::ReturnCode::kOther; } dex2oat.DumpTiming(); - return EXIT_SUCCESS; + return dex2oat::ReturnCode::kNoFailure; } -static int CompileApp(Dex2Oat& dex2oat) { +static dex2oat::ReturnCode CompileApp(Dex2Oat& dex2oat) { dex2oat.Compile(); if (!dex2oat.WriteOutputFiles()) { dex2oat.EraseOutputFiles(); - return EXIT_FAILURE; + return dex2oat::ReturnCode::kOther; } // Do not close the oat files here. We might have gotten the output file by file descriptor, @@ -2852,29 +2853,29 @@ static int CompileApp(Dex2Oat& dex2oat) { // When given --host, finish early without stripping. if (dex2oat.IsHost()) { if (!dex2oat.FlushCloseOutputFiles()) { - return EXIT_FAILURE; + return dex2oat::ReturnCode::kOther; } dex2oat.DumpTiming(); - return EXIT_SUCCESS; + return dex2oat::ReturnCode::kNoFailure; } // Copy stripped to unstripped location, if necessary. This will implicitly flush & close the // stripped versions. If this is given, we expect to be able to open writable files by name. if (!dex2oat.CopyStrippedToUnstripped()) { - return EXIT_FAILURE; + return dex2oat::ReturnCode::kOther; } // Flush and close the files. if (!dex2oat.FlushCloseOutputFiles()) { - return EXIT_FAILURE; + return dex2oat::ReturnCode::kOther; } dex2oat.DumpTiming(); - return EXIT_SUCCESS; + return dex2oat::ReturnCode::kNoFailure; } -static int dex2oat(int argc, char** argv) { +static dex2oat::ReturnCode Dex2oat(int argc, char** argv) { b13564922(); TimingLogger timings("compiler", false, false); @@ -2893,14 +2894,14 @@ static int dex2oat(int argc, char** argv) { if (dex2oat->UseProfile()) { if (!dex2oat->LoadProfile()) { LOG(ERROR) << "Failed to process profile file"; - return EXIT_FAILURE; + return dex2oat::ReturnCode::kOther; } } if (dex2oat->DoDexLayoutOptimizations()) { if (dex2oat->HasInputVdexFile()) { LOG(ERROR) << "Dexlayout is incompatible with an input VDEX"; - return EXIT_FAILURE; + return dex2oat::ReturnCode::kOther; } } @@ -2908,7 +2909,7 @@ static int dex2oat(int argc, char** argv) { // Check early that the result of compilation can be written if (!dex2oat->OpenFile()) { - return EXIT_FAILURE; + return dex2oat::ReturnCode::kOther; } // Print the complete line when any of the following is true: @@ -2923,16 +2924,17 @@ static int dex2oat(int argc, char** argv) { LOG(INFO) << StrippedCommandLine(); } - if (!dex2oat->Setup()) { + dex2oat::ReturnCode setup_code = dex2oat->Setup(); + if (setup_code != dex2oat::ReturnCode::kNoFailure) { dex2oat->EraseOutputFiles(); - return EXIT_FAILURE; + return setup_code; } // Helps debugging on device. Can be used to determine which dalvikvm instance invoked a dex2oat // instance. Used by tools/bisection_search/bisection_search.py. VLOG(compiler) << "Running dex2oat (parent PID = " << getppid() << ")"; - bool result; + dex2oat::ReturnCode result; if (dex2oat->IsImage()) { result = CompileImage(*dex2oat); } else { @@ -2945,7 +2947,7 @@ static int dex2oat(int argc, char** argv) { } // namespace art int main(int argc, char** argv) { - int result = art::dex2oat(argc, argv); + int result = static_cast<int>(art::Dex2oat(argc, argv)); // Everything was done, do an explicit exit here to avoid running Runtime destructors that take // time (bug 10645725) unless we're a debug build or running on valgrind. Note: The Dex2Oat class // should not destruct the runtime in this case. diff --git a/dex2oat/dex2oat_test.cc b/dex2oat/dex2oat_test.cc index 289b8ab50a..8c14b50094 100644 --- a/dex2oat/dex2oat_test.cc +++ b/dex2oat/dex2oat_test.cc @@ -30,6 +30,7 @@ #include "base/macros.h" #include "dex_file-inl.h" #include "dex2oat_environment_test.h" +#include "dex2oat_return_codes.h" #include "jit/profile_compilation_info.h" #include "oat.h" #include "oat_file.h" @@ -50,12 +51,12 @@ class Dex2oatTest : public Dex2oatEnvironmentTest { } protected: - void GenerateOdexForTest(const std::string& dex_location, - const std::string& odex_location, - CompilerFilter::Filter filter, - const std::vector<std::string>& extra_args = {}, - bool expect_success = true, - bool use_fd = false) { + int GenerateOdexForTestWithStatus(const std::string& dex_location, + const std::string& odex_location, + CompilerFilter::Filter filter, + std::string* error_msg, + const std::vector<std::string>& extra_args = {}, + bool use_fd = false) { std::unique_ptr<File> oat_file; std::vector<std::string> args; args.push_back("--dex-file=" + dex_location); @@ -73,12 +74,27 @@ class Dex2oatTest : public Dex2oatEnvironmentTest { args.insert(args.end(), extra_args.begin(), extra_args.end()); - std::string error_msg; - bool success = Dex2Oat(args, &error_msg); + int status = Dex2Oat(args, error_msg); if (oat_file != nullptr) { - ASSERT_EQ(oat_file->FlushClose(), 0) << "Could not flush and close oat file"; + CHECK_EQ(oat_file->FlushClose(), 0) << "Could not flush and close oat file"; } + return status; + } + void GenerateOdexForTest(const std::string& dex_location, + const std::string& odex_location, + CompilerFilter::Filter filter, + const std::vector<std::string>& extra_args = {}, + bool expect_success = true, + bool use_fd = false) { + std::string error_msg; + int status = GenerateOdexForTestWithStatus(dex_location, + odex_location, + filter, + &error_msg, + extra_args, + use_fd); + bool success = (status == 0); if (expect_success) { ASSERT_TRUE(success) << error_msg << std::endl << output_; @@ -118,7 +134,7 @@ class Dex2oatTest : public Dex2oatEnvironmentTest { EXPECT_EQ(expected, actual); } - bool Dex2Oat(const std::vector<std::string>& dex2oat_args, std::string* error_msg) { + int Dex2Oat(const std::vector<std::string>& dex2oat_args, std::string* error_msg) { Runtime* runtime = Runtime::Current(); const std::vector<gc::space::ImageSpace*>& image_spaces = @@ -196,6 +212,7 @@ class Dex2oatTest : public Dex2oatEnvironmentTest { c_args.push_back(nullptr); execv(c_args[0], const_cast<char* const*>(c_args.data())); exit(1); + UNREACHABLE(); } else { close(link[1]); char buffer[128]; @@ -206,12 +223,12 @@ class Dex2oatTest : public Dex2oatEnvironmentTest { output_ += std::string(buffer, bytes_read); } close(link[0]); - int status = 0; + int status = -1; if (waitpid(pid, &status, 0) != -1) { success_ = (status == 0); } + return status; } - return success_; } std::string output_ = ""; @@ -845,4 +862,30 @@ TEST_F(Dex2oatWatchdogTest, TestWatchdogTrigger) { RunTest(false, { "--watchdog-timeout=10" }); } +class Dex2oatReturnCodeTest : public Dex2oatTest { + protected: + int RunTest(const std::vector<std::string>& extra_args = {}) { + std::string dex_location = GetScratchDir() + "/Dex2OatSwapTest.jar"; + std::string odex_location = GetOdexDir() + "/Dex2OatSwapTest.odex"; + + Copy(GetTestDexFileName(), dex_location); + + std::string error_msg; + return GenerateOdexForTestWithStatus(dex_location, + odex_location, + CompilerFilter::kSpeed, + &error_msg, + extra_args); + } + + std::string GetTestDexFileName() { + return GetDexSrc1(); + } +}; + +TEST_F(Dex2oatReturnCodeTest, TestCreateRuntime) { + int status = RunTest({ "--boot-image=/this/does/not/exist/yolo.oat" }); + EXPECT_EQ(static_cast<int>(dex2oat::ReturnCode::kCreateRuntime), WEXITSTATUS(status)) << output_; +} + } // namespace art diff --git a/dex2oat/include/dex2oat_return_codes.h b/dex2oat/include/dex2oat_return_codes.h new file mode 100644 index 0000000000..cc5400fc27 --- /dev/null +++ b/dex2oat/include/dex2oat_return_codes.h @@ -0,0 +1,32 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_DEX2OAT_INCLUDE_DEX2OAT_RETURN_CODES_H_ +#define ART_DEX2OAT_INCLUDE_DEX2OAT_RETURN_CODES_H_ + +namespace art { +namespace dex2oat { + +enum class ReturnCode : int { + kNoFailure = 0, + kOther = 1, + kCreateRuntime = 2, +}; + +} // namespace dex2oat +} // namespace art + +#endif // ART_DEX2OAT_INCLUDE_DEX2OAT_RETURN_CODES_H_ diff --git a/dexlayout/dex_visualize.cc b/dexlayout/dex_visualize.cc index 8997146c74..452f51b28b 100644 --- a/dexlayout/dex_visualize.cc +++ b/dexlayout/dex_visualize.cc @@ -35,9 +35,9 @@ namespace art { -std::string MultidexName(const std::string& prefix, - size_t dex_file_index, - const std::string& suffix) { +static std::string MultidexName(const std::string& prefix, + size_t dex_file_index, + const std::string& suffix) { return prefix + ((dex_file_index > 0) ? std::to_string(dex_file_index + 1) : "") + suffix; } @@ -432,20 +432,41 @@ void VisualizeDexLayout(dex_ir::Header* header, } // for } +static uint32_t FindNextByteAfterSection(dex_ir::Header* header, + const dex_ir::Collections& collections, + std::vector<const FileSection*>& sorted_sections, + size_t section_index) { + for (size_t i = section_index + 1; i < sorted_sections.size(); ++i) { + const FileSection* section = sorted_sections[i]; + if (section->size_fn_(collections) != 0) { + return section->offset_fn_(collections); + } + } + return header->FileSize(); +} + /* * Dumps the offset and size of sections within the file. */ void ShowDexSectionStatistics(dex_ir::Header* header, size_t dex_file_index) { // Compute the (multidex) class file name). - fprintf(stdout, "%s\n", MultidexName("classes", dex_file_index, ".dex").c_str()); - fprintf(stdout, "section offset items\n"); + fprintf(stdout, "%s (%d bytes)\n", + MultidexName("classes", dex_file_index, ".dex").c_str(), + header->FileSize()); + fprintf(stdout, "section offset items bytes pages pct\n"); const dex_ir::Collections& collections = header->GetCollections(); std::vector<const FileSection*> sorted_sections(GetSortedSections(collections, kSortAscending)); - for (const FileSection* file_section : sorted_sections) { - fprintf(stdout, "%-10s 0x%08x 0x%08x\n", - file_section->name_.c_str(), - file_section->offset_fn_(collections), - file_section->size_fn_(collections)); + for (size_t i = 0; i < sorted_sections.size(); ++i) { + const FileSection* file_section = sorted_sections[i]; + const char* name = file_section->name_.c_str(); + uint32_t offset = file_section->offset_fn_(collections); + uint32_t items = file_section->size_fn_(collections); + uint32_t bytes = 0; + if (items > 0) { + bytes = FindNextByteAfterSection(header, collections, sorted_sections, i) - offset; + } + fprintf(stdout, "%-10s %8d %8d %8d %8d %%%02d\n", name, offset, items, bytes, + (bytes + kPageSize - 1) / kPageSize, 100 * bytes / header->FileSize()); } fprintf(stdout, "\n"); } diff --git a/disassembler/disassembler_mips.cc b/disassembler/disassembler_mips.cc index 1f6b87447f..fc6c18b1df 100644 --- a/disassembler/disassembler_mips.cc +++ b/disassembler/disassembler_mips.cc @@ -43,6 +43,7 @@ struct MipsInstruction { static const uint32_t kOpcodeShift = 26; static const uint32_t kCop1 = (17 << kOpcodeShift); +static const uint32_t kMsa = (30 << kOpcodeShift); // MSA major opcode. static const uint32_t kITypeMask = (0x3f << kOpcodeShift); static const uint32_t kJTypeMask = (0x3f << kOpcodeShift); @@ -51,6 +52,8 @@ static const uint32_t kSpecial0Mask = (0x3f << kOpcodeShift); static const uint32_t kSpecial2Mask = (0x3f << kOpcodeShift); static const uint32_t kSpecial3Mask = (0x3f << kOpcodeShift); static const uint32_t kFpMask = kRTypeMask; +static const uint32_t kMsaMask = kRTypeMask; +static const uint32_t kMsaSpecialMask = (0x3f << kOpcodeShift); static const MipsInstruction gMipsInstructions[] = { // "sll r0, r0, 0" is the canonical "nop", used in delay slots. @@ -417,6 +420,36 @@ static const MipsInstruction gMipsInstructions[] = { { kFpMask, kCop1 | 0x10, "sel", "fadt" }, { kFpMask, kCop1 | 0x1e, "max", "fadt" }, { kFpMask, kCop1 | 0x1c, "min", "fadt" }, + + // MSA instructions. + { kMsaMask | (0x1f << 21), kMsa | (0x0 << 21) | 0x1e, "and.v", "kmn" }, + { kMsaMask | (0x1f << 21), kMsa | (0x1 << 21) | 0x1e, "or.v", "kmn" }, + { kMsaMask | (0x1f << 21), kMsa | (0x2 << 21) | 0x1e, "nor.v", "kmn" }, + { kMsaMask | (0x1f << 21), kMsa | (0x3 << 21) | 0x1e, "xor.v", "kmn" }, + { kMsaMask | (0x7 << 23), kMsa | (0x0 << 23) | 0xe, "addv", "Vkmn" }, + { kMsaMask | (0x7 << 23), kMsa | (0x1 << 23) | 0xe, "subv", "Vkmn" }, + { kMsaMask | (0x7 << 23), kMsa | (0x0 << 23) | 0x12, "mulv", "Vkmn" }, + { kMsaMask | (0x7 << 23), kMsa | (0x4 << 23) | 0x12, "div_s", "Vkmn" }, + { kMsaMask | (0x7 << 23), kMsa | (0x5 << 23) | 0x12, "div_u", "Vkmn" }, + { kMsaMask | (0x7 << 23), kMsa | (0x6 << 23) | 0x12, "mod_s", "Vkmn" }, + { kMsaMask | (0x7 << 23), kMsa | (0x7 << 23) | 0x12, "mod_u", "Vkmn" }, + { kMsaMask | (0xf << 22), kMsa | (0x0 << 22) | 0x1b, "fadd", "Ukmn" }, + { kMsaMask | (0xf << 22), kMsa | (0x1 << 22) | 0x1b, "fsub", "Ukmn" }, + { kMsaMask | (0xf << 22), kMsa | (0x2 << 22) | 0x1b, "fmul", "Ukmn" }, + { kMsaMask | (0xf << 22), kMsa | (0x3 << 22) | 0x1b, "fdiv", "Ukmn" }, + { kMsaMask | (0x1ff << 17), kMsa | (0x19e << 17) | 0x1e, "ffint_s", "ukm" }, + { kMsaMask | (0x1ff << 17), kMsa | (0x19c << 17) | 0x1e, "ftint_s", "ukm" }, + { kMsaMask | (0x7 << 23), kMsa | (0x0 << 23) | 0xd, "sll", "Vkmn" }, + { kMsaMask | (0x7 << 23), kMsa | (0x1 << 23) | 0xd, "sra", "Vkmn" }, + { kMsaMask | (0x7 << 23), kMsa | (0x2 << 23) | 0xd, "srl", "Vkmn" }, + { kMsaMask | (0x7 << 23), kMsa | (0x0 << 23) | 0x9, "slli", "kmW" }, + { kMsaMask | (0x7 << 23), kMsa | (0x1 << 23) | 0x9, "srai", "kmW" }, + { kMsaMask | (0x7 << 23), kMsa | (0x2 << 23) | 0x9, "srli", "kmW" }, + { kMsaMask | (0x3ff << 16), kMsa | (0xbe << 16) | 0x19, "move.v", "km" }, + { kMsaMask | (0xf << 22), kMsa | (0x1 << 22) | 0x19, "splati", "kX" }, + { kMsaMask | (0xff << 18), kMsa | (0xc0 << 18) | 0x1e, "fill", "vkD" }, + { kMsaSpecialMask | (0xf << 2), kMsa | (0x8 << 2), "ld", "kw" }, + { kMsaSpecialMask | (0xf << 2), kMsa | (0x9 << 2), "st", "kw" }, }; static uint32_t ReadU32(const uint8_t* ptr) { @@ -559,6 +592,111 @@ size_t DisassemblerMips::Dump(std::ostream& os, const uint8_t* instr_ptr) { case 't': args << 'f' << rt; break; case 'Z': args << (rd + 1); break; // sz ([d]ext size). case 'z': args << (rd - sa + 1); break; // sz ([d]ins, dinsu size). + case 'k': args << 'w' << sa; break; + case 'm': args << 'w' << rd; break; + case 'n': args << 'w' << rt; break; + case 'U': // MSA 1-bit df (word/doubleword), position 21. + { + int32_t df = (instruction >> 21) & 0x1; + switch (df) { + case 0: opcode += ".w"; break; + case 1: opcode += ".d"; break; + } + continue; // No ", ". + } + case 'u': // MSA 1-bit df (word/doubleword), position 16. + { + int32_t df = (instruction >> 16) & 0x1; + switch (df) { + case 0: opcode += ".w"; break; + case 1: opcode += ".d"; break; + } + continue; // No ", ". + } + case 'V': // MSA 2-bit df, position 21. + { + int32_t df = (instruction >> 21) & 0x3; + switch (df) { + case 0: opcode += ".b"; break; + case 1: opcode += ".h"; break; + case 2: opcode += ".w"; break; + case 3: opcode += ".d"; break; + } + continue; // No ", ". + } + case 'v': // MSA 2-bit df, position 16. + { + int32_t df = (instruction >> 16) & 0x3; + switch (df) { + case 0: opcode += ".b"; break; + case 1: opcode += ".h"; break; + case 2: opcode += ".w"; break; + case 3: opcode += ".d"; break; + } + continue; // No ", ". + } + case 'W': // MSA df/m. + { + int32_t df_m = (instruction >> 16) & 0x7f; + if ((df_m & (0x1 << 6)) == 0) { + opcode += ".d"; + args << (df_m & 0x3f); + break; + } + if ((df_m & (0x1 << 5)) == 0) { + opcode += ".w"; + args << (df_m & 0x1f); + break; + } + if ((df_m & (0x1 << 4)) == 0) { + opcode += ".h"; + args << (df_m & 0xf); + break; + } + if ((df_m & (0x1 << 3)) == 0) { + opcode += ".b"; + args << (df_m & 0x7); + } + break; + } + case 'w': // MSA +x(rs). + { + int32_t df = instruction & 0x3; + int32_t s10 = (instruction >> 16) & 0x3ff; + s10 -= (s10 & 0x200) << 1; // Sign-extend s10. + switch (df) { + case 0: opcode += ".b"; break; + case 1: opcode += ".h"; break; + case 2: opcode += ".w"; break; + case 3: opcode += ".d"; break; + } + args << StringPrintf("%+d(r%d)", s10 << df, rd); + break; + } + case 'X': // MSA df/n - ws[x]. + { + int32_t df_n = (instruction >> 16) & 0x3f; + if ((df_n & (0x3 << 4)) == 0) { + opcode += ".b"; + args << 'w' << rd << '[' << (df_n & 0xf) << ']'; + break; + } + if ((df_n & (0x3 << 3)) == 0) { + opcode += ".h"; + args << 'w' << rd << '[' << (df_n & 0x7) << ']'; + break; + } + if ((df_n & (0x3 << 2)) == 0) { + opcode += ".w"; + args << 'w' << rd << '[' << (df_n & 0x3) << ']'; + break; + } + if ((df_n & (0x3 << 1)) == 0) { + opcode += ".d"; + args << 'w' << rd << '[' << (df_n & 0x1) << ']'; + } + break; + } } if (*(args_fmt + 1)) { args << ", "; diff --git a/disassembler/disassembler_x86.cc b/disassembler/disassembler_x86.cc index a289433af5..77ed3c6a22 100644 --- a/disassembler/disassembler_x86.cc +++ b/disassembler/disassembler_x86.cc @@ -832,6 +832,24 @@ DISASSEMBLER_ENTRY(cmp, store = true; immediate_bytes = 1; break; + case 0x74: + case 0x75: + case 0x76: + if (prefix[2] == 0x66) { + src_reg_file = dst_reg_file = SSE; + prefix[2] = 0; // clear prefix now it's served its purpose as part of the opcode + } else { + src_reg_file = dst_reg_file = MMX; + } + switch (*instr) { + case 0x74: opcode1 = "pcmpeqb"; break; + case 0x75: opcode1 = "pcmpeqw"; break; + case 0x76: opcode1 = "pcmpeqd"; break; + } + prefix[2] = 0; + has_modrm = true; + load = true; + break; case 0x7C: if (prefix[0] == 0xF2) { opcode1 = "haddps"; diff --git a/profman/profile_assistant_test.cc b/profman/profile_assistant_test.cc index 52f3b52ee2..1a8a614a4a 100644 --- a/profman/profile_assistant_test.cc +++ b/profman/profile_assistant_test.cc @@ -22,6 +22,7 @@ #include "exec_utils.h" #include "jit/profile_compilation_info.h" #include "mirror/class-inl.h" +#include "obj_ptr-inl.h" #include "profile_assistant.h" #include "scoped_thread_state_change-inl.h" #include "utils.h" @@ -140,7 +141,8 @@ class ProfileAssistantTest : public CommonRuntimeTest { return true; } - bool CreateAndDump(const std::string& input_file_contents, std::string* output_file_contents) { + bool CreateAndDump(const std::string& input_file_contents, + std::string* output_file_contents) { ScratchFile profile_file; EXPECT_TRUE(CreateProfile(input_file_contents, profile_file.GetFilename(), @@ -156,7 +158,7 @@ class ProfileAssistantTest : public CommonRuntimeTest { ScopedObjectAccess soa(self); StackHandleScope<1> hs(self); Handle<mirror::ClassLoader> h_loader( - hs.NewHandle(self->DecodeJObject(class_loader)->AsClassLoader())); + hs.NewHandle(ObjPtr<mirror::ClassLoader>::DownCast(self->DecodeJObject(class_loader)))); return class_linker->FindClass(self, clazz.c_str(), h_loader); } @@ -442,6 +444,44 @@ TEST_F(ProfileAssistantTest, TestProfileCreationAllMatch) { ASSERT_EQ(output_file_contents, expected_contents); } +TEST_F(ProfileAssistantTest, TestProfileCreationGenerateMethods) { + // Class names put here need to be in sorted order. + std::vector<std::string> class_names = { + "Ljava/lang/Math;->*", + }; + std::string input_file_contents; + std::string expected_contents; + for (std::string& class_name : class_names) { + input_file_contents += class_name + std::string("\n"); + expected_contents += DescriptorToDot(class_name.c_str()) + + std::string("\n"); + } + std::string output_file_contents; + ScratchFile profile_file; + EXPECT_TRUE(CreateProfile(input_file_contents, + profile_file.GetFilename(), + GetLibCoreDexFileNames()[0])); + ProfileCompilationInfo info; + profile_file.GetFile()->ResetOffset(); + ASSERT_TRUE(info.Load(GetFd(profile_file))); + // Verify that the profile has matching methods. + ScopedObjectAccess soa(Thread::Current()); + ObjPtr<mirror::Class> klass = GetClass(nullptr, "Ljava/lang/Math;"); + ASSERT_TRUE(klass != nullptr); + size_t method_count = 0; + for (ArtMethod& method : klass->GetMethods(kRuntimePointerSize)) { + if (!method.IsCopied() && method.GetCodeItem() != nullptr) { + ++method_count; + ProfileCompilationInfo::OfflineProfileMethodInfo pmi; + ASSERT_TRUE(info.GetMethod(method.GetDexFile()->GetLocation(), + method.GetDexFile()->GetLocationChecksum(), + method.GetDexMethodIndex(), + &pmi)); + } + } + EXPECT_GT(method_count, 0u); +} + TEST_F(ProfileAssistantTest, TestProfileCreationOneNotMatched) { // Class names put here need to be in sorted order. std::vector<std::string> class_names = { diff --git a/profman/profman.cc b/profman/profman.cc index f7316cc129..fdb9a75a6f 100644 --- a/profman/profman.cc +++ b/profman/profman.cc @@ -120,7 +120,6 @@ NO_RETURN static void Usage(const char *fmt, ...) { UsageError(""); UsageError(" --create-profile-from=<filename>: creates a profile from a list of classes."); UsageError(""); - UsageError(""); UsageError(" --dex-location=<string>: location string to use with corresponding"); UsageError(" apk-fd to find dex files"); UsageError(""); @@ -140,6 +139,7 @@ static constexpr uint16_t kDefaultTestProfileClassRatio = 5; // Separators used when parsing human friendly representation of profiles. static const std::string kMethodSep = "->"; static const std::string kMissingTypesMarker = "missing_types"; +static const std::string kClassAllMethods = "*"; static constexpr char kProfileParsingInlineChacheSep = '+'; static constexpr char kProfileParsingTypeSep = ','; static constexpr char kProfileParsingFirstCharInSignature = '('; @@ -630,6 +630,7 @@ class ProfMan FINAL { // "LTestInline;->inlinePolymorphic(LSuper;)I+LSubA;,LSubB;,LSubC;". // "LTestInline;->inlineMissingTypes(LSuper;)I+missing_types". // "LTestInline;->inlineNoInlineCaches(LSuper;)I". + // "LTestInline;->*". // The method and classes are searched only in the given dex files. bool ProcessLine(const std::vector<std::unique_ptr<const DexFile>>& dex_files, const std::string& line, @@ -650,8 +651,8 @@ class ProfMan FINAL { return false; } - if (method_str.empty()) { - // No method to add. Just add the class. + if (method_str.empty() || method_str == kClassAllMethods) { + // Start by adding the class. std::set<DexCacheResolvedClasses> resolved_class_set; const DexFile* dex_file = class_ref.dex_file; const auto& dex_resolved_classes = resolved_class_set.emplace( @@ -659,7 +660,27 @@ class ProfMan FINAL { dex_file->GetBaseLocation(), dex_file->GetLocationChecksum()); dex_resolved_classes.first->AddClass(class_ref.type_index); - profile->AddMethodsAndClasses(std::vector<ProfileMethodInfo>(), resolved_class_set); + std::vector<ProfileMethodInfo> methods; + if (method_str == kClassAllMethods) { + // Add all of the methods. + const DexFile::ClassDef* class_def = dex_file->FindClassDef(class_ref.type_index); + const uint8_t* class_data = dex_file->GetClassData(*class_def); + if (class_data != nullptr) { + ClassDataItemIterator it(*dex_file, class_data); + while (it.HasNextStaticField() || it.HasNextInstanceField()) { + it.Next(); + } + while (it.HasNextDirectMethod() || it.HasNextVirtualMethod()) { + if (it.GetMethodCodeItemOffset() != 0) { + // Add all of the methods that have code to the profile. + const uint32_t method_idx = it.GetMemberIndex(); + methods.push_back(ProfileMethodInfo(dex_file, method_idx)); + } + it.Next(); + } + } + } + profile->AddMethodsAndClasses(methods, resolved_class_set); return true; } diff --git a/runtime/arch/mips64/instruction_set_features_mips64.cc b/runtime/arch/mips64/instruction_set_features_mips64.cc index 5757906618..08d0bac2c3 100644 --- a/runtime/arch/mips64/instruction_set_features_mips64.cc +++ b/runtime/arch/mips64/instruction_set_features_mips64.cc @@ -30,22 +30,52 @@ using android::base::StringPrintf; Mips64FeaturesUniquePtr Mips64InstructionSetFeatures::FromVariant( const std::string& variant, std::string* error_msg ATTRIBUTE_UNUSED) { + bool msa = true; if (variant != "default" && variant != "mips64r6") { LOG(WARNING) << "Unexpected CPU variant for Mips64 using defaults: " << variant; } - return Mips64FeaturesUniquePtr(new Mips64InstructionSetFeatures()); + return Mips64FeaturesUniquePtr(new Mips64InstructionSetFeatures(msa)); } -Mips64FeaturesUniquePtr Mips64InstructionSetFeatures::FromBitmap(uint32_t bitmap ATTRIBUTE_UNUSED) { - return Mips64FeaturesUniquePtr(new Mips64InstructionSetFeatures()); +Mips64FeaturesUniquePtr Mips64InstructionSetFeatures::FromBitmap(uint32_t bitmap) { + bool msa = (bitmap & kMsaBitfield) != 0; + return Mips64FeaturesUniquePtr(new Mips64InstructionSetFeatures(msa)); } Mips64FeaturesUniquePtr Mips64InstructionSetFeatures::FromCppDefines() { - return Mips64FeaturesUniquePtr(new Mips64InstructionSetFeatures()); +#if defined(_MIPS_ARCH_MIPS64R6) + const bool msa = true; +#else + const bool msa = false; +#endif + return Mips64FeaturesUniquePtr(new Mips64InstructionSetFeatures(msa)); } Mips64FeaturesUniquePtr Mips64InstructionSetFeatures::FromCpuInfo() { - return Mips64FeaturesUniquePtr(new Mips64InstructionSetFeatures()); + // Look in /proc/cpuinfo for features we need. Only use this when we can guarantee that + // the kernel puts the appropriate feature flags in here. Sometimes it doesn't. + bool msa = false; + + std::ifstream in("/proc/cpuinfo"); + if (!in.fail()) { + while (!in.eof()) { + std::string line; + std::getline(in, line); + if (!in.eof()) { + LOG(INFO) << "cpuinfo line: " << line; + if (line.find("ASEs") != std::string::npos) { + LOG(INFO) << "found Application Specific Extensions"; + if (line.find("msa") != std::string::npos) { + msa = true; + } + } + } + } + in.close(); + } else { + LOG(ERROR) << "Failed to open /proc/cpuinfo"; + } + return Mips64FeaturesUniquePtr(new Mips64InstructionSetFeatures(msa)); } Mips64FeaturesUniquePtr Mips64InstructionSetFeatures::FromHwcap() { @@ -62,28 +92,40 @@ bool Mips64InstructionSetFeatures::Equals(const InstructionSetFeatures* other) c if (kMips64 != other->GetInstructionSet()) { return false; } - return true; + const Mips64InstructionSetFeatures* other_as_mips64 = other->AsMips64InstructionSetFeatures(); + return msa_ == other_as_mips64->msa_; } uint32_t Mips64InstructionSetFeatures::AsBitmap() const { - return 0; + return (msa_ ? kMsaBitfield : 0); } std::string Mips64InstructionSetFeatures::GetFeatureString() const { - return "default"; + std::string result; + if (msa_) { + result += "msa"; + } else { + result += "-msa"; + } + return result; } std::unique_ptr<const InstructionSetFeatures> Mips64InstructionSetFeatures::AddFeaturesFromSplitString( const std::vector<std::string>& features, std::string* error_msg) const { - auto i = features.begin(); - if (i != features.end()) { - // We don't have any features. + bool msa = msa_; + for (auto i = features.begin(); i != features.end(); i++) { std::string feature = android::base::Trim(*i); - *error_msg = StringPrintf("Unknown instruction set feature: '%s'", feature.c_str()); - return nullptr; + if (feature == "msa") { + msa = true; + } else if (feature == "-msa") { + msa = false; + } else { + *error_msg = StringPrintf("Unknown instruction set feature: '%s'", feature.c_str()); + return nullptr; + } } - return std::unique_ptr<const InstructionSetFeatures>(new Mips64InstructionSetFeatures()); + return std::unique_ptr<const InstructionSetFeatures>(new Mips64InstructionSetFeatures(msa)); } } // namespace art diff --git a/runtime/arch/mips64/instruction_set_features_mips64.h b/runtime/arch/mips64/instruction_set_features_mips64.h index c80c466dfc..d9f30c755e 100644 --- a/runtime/arch/mips64/instruction_set_features_mips64.h +++ b/runtime/arch/mips64/instruction_set_features_mips64.h @@ -58,6 +58,11 @@ class Mips64InstructionSetFeatures FINAL : public InstructionSetFeatures { std::string GetFeatureString() const OVERRIDE; + // Does it have MSA (MIPS SIMD Architecture) support. + bool HasMsa() const { + return msa_; + } + virtual ~Mips64InstructionSetFeatures() {} protected: @@ -67,9 +72,16 @@ class Mips64InstructionSetFeatures FINAL : public InstructionSetFeatures { std::string* error_msg) const OVERRIDE; private: - Mips64InstructionSetFeatures() : InstructionSetFeatures() { + explicit Mips64InstructionSetFeatures(bool msa) : InstructionSetFeatures(), msa_(msa) { } + // Bitmap positions for encoding features as a bitmap. + enum { + kMsaBitfield = 1, + }; + + const bool msa_; + DISALLOW_COPY_AND_ASSIGN(Mips64InstructionSetFeatures); }; diff --git a/runtime/arch/mips64/instruction_set_features_mips64_test.cc b/runtime/arch/mips64/instruction_set_features_mips64_test.cc index 380c4e5433..0ba0bd4c15 100644 --- a/runtime/arch/mips64/instruction_set_features_mips64_test.cc +++ b/runtime/arch/mips64/instruction_set_features_mips64_test.cc @@ -20,15 +20,31 @@ namespace art { -TEST(Mips64InstructionSetFeaturesTest, Mips64Features) { +TEST(Mips64InstructionSetFeaturesTest, Mips64FeaturesFromDefaultVariant) { std::string error_msg; std::unique_ptr<const InstructionSetFeatures> mips64_features( InstructionSetFeatures::FromVariant(kMips64, "default", &error_msg)); ASSERT_TRUE(mips64_features.get() != nullptr) << error_msg; EXPECT_EQ(mips64_features->GetInstructionSet(), kMips64); EXPECT_TRUE(mips64_features->Equals(mips64_features.get())); - EXPECT_STREQ("default", mips64_features->GetFeatureString().c_str()); - EXPECT_EQ(mips64_features->AsBitmap(), 0U); + EXPECT_STREQ("msa", mips64_features->GetFeatureString().c_str()); + EXPECT_EQ(mips64_features->AsBitmap(), 1U); +} + +TEST(Mips64InstructionSetFeaturesTest, Mips64FeaturesFromR6Variant) { + std::string error_msg; + std::unique_ptr<const InstructionSetFeatures> mips64r6_features( + InstructionSetFeatures::FromVariant(kMips64, "mips64r6", &error_msg)); + ASSERT_TRUE(mips64r6_features.get() != nullptr) << error_msg; + EXPECT_EQ(mips64r6_features->GetInstructionSet(), kMips64); + EXPECT_TRUE(mips64r6_features->Equals(mips64r6_features.get())); + EXPECT_STREQ("msa", mips64r6_features->GetFeatureString().c_str()); + EXPECT_EQ(mips64r6_features->AsBitmap(), 1U); + + std::unique_ptr<const InstructionSetFeatures> mips64_default_features( + InstructionSetFeatures::FromVariant(kMips64, "default", &error_msg)); + ASSERT_TRUE(mips64_default_features.get() != nullptr) << error_msg; + EXPECT_TRUE(mips64r6_features->Equals(mips64_default_features.get())); } } // namespace art diff --git a/runtime/arch/mips64/registers_mips64.cc b/runtime/arch/mips64/registers_mips64.cc index 495920809f..1ee2cdd204 100644 --- a/runtime/arch/mips64/registers_mips64.cc +++ b/runtime/arch/mips64/registers_mips64.cc @@ -46,5 +46,14 @@ std::ostream& operator<<(std::ostream& os, const FpuRegister& rhs) { return os; } +std::ostream& operator<<(std::ostream& os, const VectorRegister& rhs) { + if (rhs >= W0 && rhs < kNumberOfVectorRegisters) { + os << "w" << static_cast<int>(rhs); + } else { + os << "VectorRegister[" << static_cast<int>(rhs) << "]"; + } + return os; +} + } // namespace mips64 } // namespace art diff --git a/runtime/arch/mips64/registers_mips64.h b/runtime/arch/mips64/registers_mips64.h index 81fae72b44..30de2cc009 100644 --- a/runtime/arch/mips64/registers_mips64.h +++ b/runtime/arch/mips64/registers_mips64.h @@ -107,6 +107,45 @@ enum FpuRegister { }; std::ostream& operator<<(std::ostream& os, const FpuRegister& rhs); +// Values for vector registers. +enum VectorRegister { + W0 = 0, + W1 = 1, + W2 = 2, + W3 = 3, + W4 = 4, + W5 = 5, + W6 = 6, + W7 = 7, + W8 = 8, + W9 = 9, + W10 = 10, + W11 = 11, + W12 = 12, + W13 = 13, + W14 = 14, + W15 = 15, + W16 = 16, + W17 = 17, + W18 = 18, + W19 = 19, + W20 = 20, + W21 = 21, + W22 = 22, + W23 = 23, + W24 = 24, + W25 = 25, + W26 = 26, + W27 = 27, + W28 = 28, + W29 = 29, + W30 = 30, + W31 = 31, + kNumberOfVectorRegisters = 32, + kNoVectorRegister = -1, +}; +std::ostream& operator<<(std::ostream& os, const VectorRegister& rhs); + } // namespace mips64 } // namespace art diff --git a/runtime/base/scoped_flock.cc b/runtime/base/scoped_flock.cc index d4bb56b62a..5394e53fa3 100644 --- a/runtime/base/scoped_flock.cc +++ b/runtime/base/scoped_flock.cc @@ -116,7 +116,10 @@ ScopedFlock::ScopedFlock() { } ScopedFlock::~ScopedFlock() { if (file_.get() != nullptr) { int flock_result = TEMP_FAILURE_RETRY(flock(file_->Fd(), LOCK_UN)); - CHECK_EQ(0, flock_result); + if (flock_result != 0) { + PLOG(FATAL) << "Unable to unlock file " << file_->GetPath(); + UNREACHABLE(); + } int close_result = -1; if (file_->ReadOnlyMode()) { close_result = file_->Close(); diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc index 9a64097a01..8162a820e0 100644 --- a/runtime/class_linker.cc +++ b/runtime/class_linker.cc @@ -6732,10 +6732,11 @@ static void CheckClassOwnsVTableEntries(Thread* self, auto is_same_method = [m] (const ArtMethod& meth) { return &meth == m; }; - CHECK((super_vtable_length > i && superclass->GetVTableEntry(i, pointer_size) == m) || - std::find_if(virtuals.begin(), virtuals.end(), is_same_method) != virtuals.end()) - << m->PrettyMethod() << " does not seem to be owned by current class " - << klass->PrettyClass() << " or any of its superclasses!"; + if (!((super_vtable_length > i && superclass->GetVTableEntry(i, pointer_size) == m) || + std::find_if(virtuals.begin(), virtuals.end(), is_same_method) != virtuals.end())) { + LOG(WARNING) << m->PrettyMethod() << " does not seem to be owned by current class " + << klass->PrettyClass() << " or any of its superclasses!"; + } } } @@ -6763,14 +6764,15 @@ static void CheckVTableHasNoDuplicates(Thread* self, other_entry->GetAccessFlags())) { continue; } - CHECK(vtable_entry != other_entry && - !name_comparator.HasSameNameAndSignature( - other_entry->GetInterfaceMethodIfProxy(pointer_size))) - << "vtable entries " << i << " and " << j << " are identical for " - << klass->PrettyClass() << " in method " << vtable_entry->PrettyMethod() << " (0x" - << std::hex << reinterpret_cast<uintptr_t>(vtable_entry) << ") and " - << other_entry->PrettyMethod() << " (0x" << std::hex - << reinterpret_cast<uintptr_t>(other_entry) << ")"; + if (vtable_entry == other_entry || + name_comparator.HasSameNameAndSignature( + other_entry->GetInterfaceMethodIfProxy(pointer_size))) { + LOG(WARNING) << "vtable entries " << i << " and " << j << " are identical for " + << klass->PrettyClass() << " in method " << vtable_entry->PrettyMethod() + << " (0x" << std::hex << reinterpret_cast<uintptr_t>(vtable_entry) << ") and " + << other_entry->PrettyMethod() << " (0x" << std::hex + << reinterpret_cast<uintptr_t>(other_entry) << ")"; + } } } } diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc index 7136f101aa..d2ab41d409 100644 --- a/runtime/gc/collector/concurrent_copying.cc +++ b/runtime/gc/collector/concurrent_copying.cc @@ -2171,9 +2171,12 @@ mirror::Object* ConcurrentCopying::Copy(mirror::Object* from_ref) { fall_back_to_non_moving = true; to_ref = heap_->non_moving_space_->Alloc(Thread::Current(), obj_size, &non_moving_space_bytes_allocated, nullptr, &dummy); - CHECK(to_ref != nullptr) << "Fall-back non-moving space allocation failed for a " - << obj_size << " byte object in region type " - << region_space_->GetRegionType(from_ref); + if (UNLIKELY(to_ref == nullptr)) { + LOG(FATAL_WITHOUT_ABORT) << "Fall-back non-moving space allocation failed for a " + << obj_size << " byte object in region type " + << region_space_->GetRegionType(from_ref); + LOG(FATAL) << "Object address=" << from_ref << " type=" << from_ref->PrettyTypeOf(); + } bytes_allocated = non_moving_space_bytes_allocated; // Mark it in the mark bitmap. accounting::ContinuousSpaceBitmap* mark_bitmap = diff --git a/runtime/jdwp/jdwp_adb.cc b/runtime/jdwp/jdwp_adb.cc index b13d565ec2..0aa04c10ca 100644 --- a/runtime/jdwp/jdwp_adb.cc +++ b/runtime/jdwp/jdwp_adb.cc @@ -227,7 +227,7 @@ bool JdwpAdbState::Accept() { const int sleep_max_ms = 2*1000; char buff[5]; - int sock = socket(PF_UNIX, SOCK_STREAM, 0); + int sock = socket(AF_UNIX, SOCK_SEQPACKET, 0); if (sock < 0) { PLOG(ERROR) << "Could not create ADB control socket"; return false; @@ -264,7 +264,7 @@ bool JdwpAdbState::Accept() { * up after a few minutes in case somebody ships an app with * the debuggable flag set. */ - int ret = connect(ControlSock(), &control_addr_.controlAddrPlain, control_addr_len_); + int ret = connect(ControlSock(), &control_addr_.controlAddrPlain, control_addr_len_); if (!ret) { int control_sock = ControlSock(); #ifdef ART_TARGET_ANDROID @@ -278,7 +278,7 @@ bool JdwpAdbState::Accept() { /* now try to send our pid to the ADB daemon */ ret = TEMP_FAILURE_RETRY(send(control_sock, buff, 4, 0)); - if (ret >= 0) { + if (ret == 4) { VLOG(jdwp) << StringPrintf("PID sent as '%.*s' to ADB", 4, buff); break; } diff --git a/runtime/jit/profile_saver_options.h b/runtime/jit/profile_saver_options.h index a6385d7469..c8d256fec0 100644 --- a/runtime/jit/profile_saver_options.h +++ b/runtime/jit/profile_saver_options.h @@ -21,7 +21,7 @@ namespace art { struct ProfileSaverOptions { public: static constexpr uint32_t kMinSavePeriodMs = 20 * 1000; // 20 seconds - static constexpr uint32_t kSaveResolvedClassesDelayMs = 2 * 1000; // 2 seconds + static constexpr uint32_t kSaveResolvedClassesDelayMs = 5 * 1000; // 5 seconds // Minimum number of JIT samples during launch to include a method into the profile. static constexpr uint32_t kStartupMethodSamples = 1; static constexpr uint32_t kMinMethodsToSave = 10; @@ -37,7 +37,8 @@ struct ProfileSaverOptions { min_methods_to_save_(kMinMethodsToSave), min_classes_to_save_(kMinClassesToSave), min_notification_before_wake_(kMinNotificationBeforeWake), - max_notification_before_wake_(kMaxNotificationBeforeWake) {} + max_notification_before_wake_(kMaxNotificationBeforeWake), + profile_path_("") {} ProfileSaverOptions( bool enabled, @@ -47,7 +48,8 @@ struct ProfileSaverOptions { uint32_t min_methods_to_save, uint32_t min_classes_to_save, uint32_t min_notification_before_wake, - uint32_t max_notification_before_wake): + uint32_t max_notification_before_wake, + const std::string& profile_path): enabled_(enabled), min_save_period_ms_(min_save_period_ms), save_resolved_classes_delay_ms_(save_resolved_classes_delay_ms), @@ -55,7 +57,8 @@ struct ProfileSaverOptions { min_methods_to_save_(min_methods_to_save), min_classes_to_save_(min_classes_to_save), min_notification_before_wake_(min_notification_before_wake), - max_notification_before_wake_(max_notification_before_wake) {} + max_notification_before_wake_(max_notification_before_wake), + profile_path_(profile_path) {} bool IsEnabled() const { return enabled_; @@ -85,6 +88,9 @@ struct ProfileSaverOptions { uint32_t GetMaxNotificationBeforeWake() const { return max_notification_before_wake_; } + std::string GetProfilePath() const { + return profile_path_; + } friend std::ostream & operator<<(std::ostream &os, const ProfileSaverOptions& pso) { os << "enabled_" << pso.enabled_ @@ -106,6 +112,7 @@ struct ProfileSaverOptions { uint32_t min_classes_to_save_; uint32_t min_notification_before_wake_; uint32_t max_notification_before_wake_; + std::string profile_path_; }; } // namespace art diff --git a/runtime/oat_file_assistant.cc b/runtime/oat_file_assistant.cc index 1735045d60..3396ce0b57 100644 --- a/runtime/oat_file_assistant.cc +++ b/runtime/oat_file_assistant.cc @@ -725,44 +725,23 @@ const std::vector<uint32_t>* OatFileAssistant::GetRequiredDexChecksums() { return required_dex_checksums_found_ ? &cached_required_dex_checksums_ : nullptr; } -// TODO: Use something better than xor for the combined image checksum. std::unique_ptr<OatFileAssistant::ImageInfo> OatFileAssistant::ImageInfo::GetRuntimeImageInfo(InstructionSet isa, std::string* error_msg) { CHECK(error_msg != nullptr); - // Use the currently loaded image to determine the image locations for all - // the image spaces, regardless of the isa requested. Otherwise we would - // need to read from the boot image's oat file to determine the rest of the - // image locations in the case of multi-image. Runtime* runtime = Runtime::Current(); - std::vector<gc::space::ImageSpace*> image_spaces = runtime->GetHeap()->GetBootImageSpaces(); - if (image_spaces.empty()) { - *error_msg = "There are no boot image spaces"; + std::unique_ptr<ImageInfo> info(new ImageInfo()); + info->location = runtime->GetImageLocation(); + + std::unique_ptr<ImageHeader> image_header( + gc::space::ImageSpace::ReadImageHeader(info->location.c_str(), isa, error_msg)); + if (image_header == nullptr) { return nullptr; } - std::unique_ptr<ImageInfo> info(new ImageInfo()); - info->location = image_spaces[0]->GetImageLocation(); - - // TODO: Special casing on isa == kRuntimeISA is presumably motivated by - // performance: 'it's faster to use an already loaded image header than read - // the image header from disk'. But the loaded image is not necessarily the - // same as kRuntimeISA, so this behavior is suspect (b/35659889). - if (isa == kRuntimeISA) { - const ImageHeader& image_header = image_spaces[0]->GetImageHeader(); - info->oat_checksum = image_header.GetOatChecksum(); - info->oat_data_begin = reinterpret_cast<uintptr_t>(image_header.GetOatDataBegin()); - info->patch_delta = image_header.GetPatchDelta(); - } else { - std::unique_ptr<ImageHeader> image_header( - gc::space::ImageSpace::ReadImageHeader(info->location.c_str(), isa, error_msg)); - if (image_header == nullptr) { - return nullptr; - } - info->oat_checksum = image_header->GetOatChecksum(); - info->oat_data_begin = reinterpret_cast<uintptr_t>(image_header->GetOatDataBegin()); - info->patch_delta = image_header->GetPatchDelta(); - } + info->oat_checksum = image_header->GetOatChecksum(); + info->oat_data_begin = reinterpret_cast<uintptr_t>(image_header->GetOatDataBegin()); + info->patch_delta = image_header->GetPatchDelta(); return info; } diff --git a/runtime/parsed_options.cc b/runtime/parsed_options.cc index 9113f83cd4..4d787db5ac 100644 --- a/runtime/parsed_options.cc +++ b/runtime/parsed_options.cc @@ -708,6 +708,7 @@ void ParsedOptions::Usage(const char* fmt, ...) { UsageMessage(stream, " -Xps-min-classes-to-save:integervalue\n"); UsageMessage(stream, " -Xps-min-notification-before-wake:integervalue\n"); UsageMessage(stream, " -Xps-max-notification-before-wake:integervalue\n"); + UsageMessage(stream, " -Xps-profile-path:file-path\n"); UsageMessage(stream, " -Xcompiler:filename\n"); UsageMessage(stream, " -Xcompiler-option dex2oat-option\n"); UsageMessage(stream, " -Ximage-compiler-option dex2oat-option\n"); diff --git a/runtime/runtime.cc b/runtime/runtime.cc index 13370a0b4a..44f8281abf 100644 --- a/runtime/runtime.cc +++ b/runtime/runtime.cc @@ -2155,6 +2155,19 @@ void Runtime::CreateJit() { jit_.reset(jit::Jit::Create(jit_options_.get(), &error_msg)); if (jit_.get() == nullptr) { LOG(WARNING) << "Failed to create JIT " << error_msg; + return; + } + + // In case we have a profile path passed as a command line argument, + // register the current class path for profiling now. Note that we cannot do + // this before we create the JIT and having it here is the most convenient way. + // This is used when testing profiles with dalvikvm command as there is no + // framework to register the dex files for profiling. + if (jit_options_->GetSaveProfilingInfo() && + !jit_options_->GetProfileSaverOptions().GetProfilePath().empty()) { + std::vector<std::string> dex_filenames; + Split(class_path_string_, ':', &dex_filenames); + RegisterAppInfo(dex_filenames, jit_options_->GetProfileSaverOptions().GetProfilePath()); } } diff --git a/runtime/thread.cc b/runtime/thread.cc index 30a4046d73..008c388229 100644 --- a/runtime/thread.cc +++ b/runtime/thread.cc @@ -16,6 +16,10 @@ #include "thread.h" +#if !defined(__APPLE__) +#include <sched.h> +#endif + #include <pthread.h> #include <signal.h> #include <sys/resource.h> @@ -1591,8 +1595,21 @@ void Thread::DumpState(std::ostream& os, const Thread* thread, pid_t tid) { if (thread != nullptr) { int policy; sched_param sp; +#if !defined(__APPLE__) + // b/36445592 Don't use pthread_getschedparam since pthread may have exited. + policy = sched_getscheduler(tid); + if (policy == -1) { + PLOG(WARNING) << "sched_getscheduler(" << tid << ")"; + } + int sched_getparam_result = sched_getparam(tid, &sp); + if (sched_getparam_result == -1) { + PLOG(WARNING) << "sched_getparam(" << tid << ", &sp)"; + sp.sched_priority = -1; + } +#else CHECK_PTHREAD_CALL(pthread_getschedparam, (thread->tlsPtr_.pthread_self, &policy, &sp), __FUNCTION__); +#endif os << " sched=" << policy << "/" << sp.sched_priority << " handle=" << reinterpret_cast<void*>(thread->tlsPtr_.pthread_self); } diff --git a/runtime/well_known_classes.cc b/runtime/well_known_classes.cc index 54cce98e8c..5aef062728 100644 --- a/runtime/well_known_classes.cc +++ b/runtime/well_known_classes.cc @@ -34,7 +34,6 @@ namespace art { -jclass WellKnownClasses::com_android_dex_Dex; jclass WellKnownClasses::dalvik_annotation_optimization_CriticalNative; jclass WellKnownClasses::dalvik_annotation_optimization_FastNative; jclass WellKnownClasses::dalvik_system_BaseDexClassLoader; @@ -267,7 +266,6 @@ uint32_t WellKnownClasses::StringInitToEntryPoint(ArtMethod* string_init) { #undef STRING_INIT_LIST void WellKnownClasses::Init(JNIEnv* env) { - com_android_dex_Dex = CacheClass(env, "com/android/dex/Dex"); dalvik_annotation_optimization_CriticalNative = CacheClass(env, "dalvik/annotation/optimization/CriticalNative"); dalvik_annotation_optimization_FastNative = CacheClass(env, "dalvik/annotation/optimization/FastNative"); diff --git a/runtime/well_known_classes.h b/runtime/well_known_classes.h index af4dbbf076..c18473197b 100644 --- a/runtime/well_known_classes.h +++ b/runtime/well_known_classes.h @@ -44,7 +44,6 @@ struct WellKnownClasses { static ObjPtr<mirror::Class> ToClass(jclass global_jclass) REQUIRES_SHARED(Locks::mutator_lock_); - static jclass com_android_dex_Dex; static jclass dalvik_annotation_optimization_CriticalNative; static jclass dalvik_annotation_optimization_FastNative; static jclass dalvik_system_BaseDexClassLoader; diff --git a/test.py b/test.py new file mode 100755 index 0000000000..414d7790f8 --- /dev/null +++ b/test.py @@ -0,0 +1,74 @@ +#!/usr/bin/env python +# +# Copyright 2017, The Android Open Source Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# --run-test : To run run-test +# --gtest : To run gtest +# -j : Number of jobs +# --host: for host tests +# --target: for target tests +# All the other arguments will be passed to the run-test testrunner. +import sys +import subprocess +import os +import argparse + +ANDROID_BUILD_TOP = os.environ.get('ANDROID_BUILD_TOP', os.getcwd()) + +parser = argparse.ArgumentParser() +parser.add_argument('-j', default='', dest='n_threads') +parser.add_argument('--run-test', '-r', action='store_true', dest='run_test') +parser.add_argument('--gtest', '-g', action='store_true', dest='gtest') +parser.add_argument('--target', action='store_true', dest='target') +parser.add_argument('--host', action='store_true', dest='host') +options, unknown = parser.parse_known_args() + +if options.run_test or not options.gtest: + testrunner = os.path.join('./', + ANDROID_BUILD_TOP, + 'art/test/testrunner/testrunner.py') + run_test_args = [] + for arg in sys.argv[1:]: + if arg == '--run-test' or arg == '--gtest' \ + or arg == '-r' or arg == '-g': + continue + run_test_args.append(arg) + + test_runner_cmd = [testrunner] + run_test_args + print test_runner_cmd + if subprocess.call(test_runner_cmd): + sys.exit(1) + +if options.gtest or not options.run_test: + build_target = '' + if options.host or not options.target: + build_target += ' test-art-host-gtest' + if options.target or not options.host: + build_target += ' test-art-target-gtest' + + build_command = 'make' + build_command += ' -j' + str(options.n_threads) + + build_command += ' -C ' + ANDROID_BUILD_TOP + build_command += ' ' + build_target + # Add 'dist' to avoid Jack issues b/36169180. + build_command += ' dist' + + print build_command + + if subprocess.call(build_command.split()): + sys.exit(1) + +sys.exit(0) diff --git a/test/051-thread/expected.txt b/test/051-thread/expected.txt index c6cd4f8bea..3fc34929eb 100644 --- a/test/051-thread/expected.txt +++ b/test/051-thread/expected.txt @@ -1,6 +1,6 @@ JNI_OnLoad called thread test starting -testThreadCapacity thread count: 512 +testThreadCapacity thread count: 128 testThreadDaemons starting thread 'TestDaemonThread' testThreadDaemons @ Thread running testThreadDaemons @ Got expected setDaemon exception diff --git a/test/051-thread/src/Main.java b/test/051-thread/src/Main.java index 2e26b22265..82fc0d471b 100644 --- a/test/051-thread/src/Main.java +++ b/test/051-thread/src/Main.java @@ -35,8 +35,8 @@ public class Main { * Simple thread capacity test. */ private static void testThreadCapacity() throws Exception { - TestCapacityThread[] threads = new TestCapacityThread[512]; - for (int i = 0; i < 512; i++) { + TestCapacityThread[] threads = new TestCapacityThread[128]; + for (int i = 0; i < threads.length; i++) { threads[i] = new TestCapacityThread(); } diff --git a/test/080-oom-throw/run b/test/080-oom-throw/run new file mode 100644 index 0000000000..eb473782a5 --- /dev/null +++ b/test/080-oom-throw/run @@ -0,0 +1,17 @@ +#!/bin/bash +# +# Copyright (C) 2017 The Android Open Source Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +exec ${RUN} $@ --runtime-option -Xmx16m diff --git a/test/080-oom-throw/src/Main.java b/test/080-oom-throw/src/Main.java index a6c18b75fc..3d5d0629f3 100644 --- a/test/080-oom-throw/src/Main.java +++ b/test/080-oom-throw/src/Main.java @@ -114,13 +114,13 @@ public class Main { static Object[] holder; public static void blowup() throws Exception { - int size = 32 * 1024 * 1024; + int size = 2 * 1024 * 1024; for (int i = 0; i < holder.length; ) { try { holder[i] = new char[size]; i++; } catch (OutOfMemoryError oome) { - size = size / 2; + size = size / 16; if (size == 0) { break; } diff --git a/test/527-checker-array-access-split/src/Main.java b/test/527-checker-array-access-split/src/Main.java index 3de900a3a9..a5caa7bce0 100644 --- a/test/527-checker-array-access-split/src/Main.java +++ b/test/527-checker-array-access-split/src/Main.java @@ -327,17 +327,17 @@ public class Main { // check. /// CHECK-START-ARM64: int Main.canMergeAfterBCE1() instruction_simplifier_arm64 (before) - /// CHECK: <<Const1:i\d+>> IntConstant 1 + /// CHECK: <<Const7:i\d+>> IntConstant 7 /// CHECK: <<Array:l\d+>> NewArray /// CHECK: <<Index:i\d+>> Phi /// CHECK: If // -------------- Loop /// CHECK: <<ArrayGet:i\d+>> ArrayGet [<<Array>>,<<Index>>] - /// CHECK: <<Add:i\d+>> Add [<<ArrayGet>>,<<Const1>>] - /// CHECK: ArraySet [<<Array>>,<<Index>>,<<Add>>] + /// CHECK: <<Div:i\d+>> Div [<<ArrayGet>>,<<Const7>>] + /// CHECK: ArraySet [<<Array>>,<<Index>>,<<Div>>] /// CHECK-START-ARM64: int Main.canMergeAfterBCE1() instruction_simplifier_arm64 (after) - /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 + /// CHECK-DAG: <<Const7:i\d+>> IntConstant 7 /// CHECK-DAG: <<DataOffset:i\d+>> IntConstant 12 /// CHECK: <<Array:l\d+>> NewArray /// CHECK: <<Index:i\d+>> Phi @@ -345,12 +345,12 @@ public class Main { // -------------- Loop /// CHECK: <<Address1:i\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>] /// CHECK-NEXT: <<ArrayGet:i\d+>> ArrayGet [<<Address1>>,<<Index>>] - /// CHECK: <<Add:i\d+>> Add [<<ArrayGet>>,<<Const1>>] + /// CHECK: <<Div:i\d+>> Div [<<ArrayGet>>,<<Const7>>] /// CHECK: <<Address2:i\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>] - /// CHECK-NEXT: ArraySet [<<Address2>>,<<Index>>,<<Add>>] + /// CHECK-NEXT: ArraySet [<<Address2>>,<<Index>>,<<Div>>] /// CHECK-START-ARM64: int Main.canMergeAfterBCE1() GVN$after_arch (after) - /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 + /// CHECK-DAG: <<Const7:i\d+>> IntConstant 7 /// CHECK-DAG: <<DataOffset:i\d+>> IntConstant 12 /// CHECK: <<Array:l\d+>> NewArray /// CHECK: <<Index:i\d+>> Phi @@ -358,23 +358,23 @@ public class Main { // -------------- Loop /// CHECK: <<Address:i\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>] /// CHECK: <<ArrayGet:i\d+>> ArrayGet [<<Address>>,<<Index>>] - /// CHECK: <<Add:i\d+>> Add [<<ArrayGet>>,<<Const1>>] + /// CHECK: <<Div:i\d+>> Div [<<ArrayGet>>,<<Const7>>] /// CHECK-NOT: IntermediateAddress - /// CHECK: ArraySet [<<Address>>,<<Index>>,<<Add>>] + /// CHECK: ArraySet [<<Address>>,<<Index>>,<<Div>>] /// CHECK-START-ARM: int Main.canMergeAfterBCE1() instruction_simplifier_arm (before) - /// CHECK: <<Const1:i\d+>> IntConstant 1 + /// CHECK: <<Const7:i\d+>> IntConstant 7 /// CHECK: <<Array:l\d+>> NewArray /// CHECK: <<Index:i\d+>> Phi /// CHECK: If // -------------- Loop /// CHECK: <<ArrayGet:i\d+>> ArrayGet [<<Array>>,<<Index>>] - /// CHECK: <<Add:i\d+>> Add [<<ArrayGet>>,<<Const1>>] - /// CHECK: ArraySet [<<Array>>,<<Index>>,<<Add>>] + /// CHECK: <<Div:i\d+>> Div [<<ArrayGet>>,<<Const7>>] + /// CHECK: ArraySet [<<Array>>,<<Index>>,<<Div>>] /// CHECK-START-ARM: int Main.canMergeAfterBCE1() instruction_simplifier_arm (after) - /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 + /// CHECK-DAG: <<Const7:i\d+>> IntConstant 7 /// CHECK-DAG: <<DataOffset:i\d+>> IntConstant 12 /// CHECK: <<Array:l\d+>> NewArray /// CHECK: <<Index:i\d+>> Phi @@ -382,12 +382,12 @@ public class Main { // -------------- Loop /// CHECK: <<Address1:i\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>] /// CHECK-NEXT: <<ArrayGet:i\d+>> ArrayGet [<<Address1>>,<<Index>>] - /// CHECK: <<Add:i\d+>> Add [<<ArrayGet>>,<<Const1>>] + /// CHECK: <<Div:i\d+>> Div [<<ArrayGet>>,<<Const7>>] /// CHECK: <<Address2:i\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>] - /// CHECK-NEXT: ArraySet [<<Address2>>,<<Index>>,<<Add>>] + /// CHECK-NEXT: ArraySet [<<Address2>>,<<Index>>,<<Div>>] /// CHECK-START-ARM: int Main.canMergeAfterBCE1() GVN$after_arch (after) - /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 + /// CHECK-DAG: <<Const7:i\d+>> IntConstant 7 /// CHECK-DAG: <<DataOffset:i\d+>> IntConstant 12 /// CHECK: <<Array:l\d+>> NewArray /// CHECK: <<Index:i\d+>> Phi @@ -395,14 +395,14 @@ public class Main { // -------------- Loop /// CHECK: <<Address:i\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>] /// CHECK: <<ArrayGet:i\d+>> ArrayGet [<<Address>>,<<Index>>] - /// CHECK: <<Add:i\d+>> Add [<<ArrayGet>>,<<Const1>>] + /// CHECK: <<Div:i\d+>> Div [<<ArrayGet>>,<<Const7>>] /// CHECK-NOT: IntermediateAddress - /// CHECK: ArraySet [<<Address>>,<<Index>>,<<Add>>] + /// CHECK: ArraySet [<<Address>>,<<Index>>,<<Div>>] public static int canMergeAfterBCE1() { - int[] array = {0, 1, 2, 3}; + int[] array = {0, 7, 14, 21}; for (int i = 0; i < array.length; i++) { - array[i] = array[i] + 1; + array[i] = array[i] / 7; } return array[array.length - 1]; } @@ -421,8 +421,8 @@ public class Main { /// CHECK-DAG: <<Index1:i\d+>> Add [<<Index>>,<<Const1>>] /// CHECK-DAG: <<ArrayGetI:i\d+>> ArrayGet [<<Array>>,<<Index>>] /// CHECK-DAG: <<ArrayGetI1:i\d+>> ArrayGet [<<Array>>,<<Index1>>] - /// CHECK: <<Add:i\d+>> Add [<<ArrayGetI>>,<<ArrayGetI1>>] - /// CHECK: ArraySet [<<Array>>,<<Index1>>,<<Add>>] + /// CHECK: <<Shl:i\d+>> Shl [<<ArrayGetI>>,<<ArrayGetI1>>] + /// CHECK: ArraySet [<<Array>>,<<Index1>>,<<Shl>>] // Note that we do not care that the `DataOffset` is `12`. But if we do not // specify it and any other `IntConstant` appears before that instruction, @@ -441,9 +441,9 @@ public class Main { /// CHECK-DAG: <<ArrayGetI:i\d+>> ArrayGet [<<Address1>>,<<Index>>] /// CHECK-DAG: <<Address2:i\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>] /// CHECK-DAG: <<ArrayGetI1:i\d+>> ArrayGet [<<Address2>>,<<Index1>>] - /// CHECK: <<Add:i\d+>> Add [<<ArrayGetI>>,<<ArrayGetI1>>] + /// CHECK: <<Shl:i\d+>> Shl [<<ArrayGetI>>,<<ArrayGetI1>>] /// CHECK: <<Address3:i\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>] - /// CHECK: ArraySet [<<Address3>>,<<Index1>>,<<Add>>] + /// CHECK: ArraySet [<<Address3>>,<<Index1>>,<<Shl>>] /// CHECK-START-ARM64: int Main.canMergeAfterBCE2() GVN$after_arch (after) /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 @@ -456,8 +456,8 @@ public class Main { /// CHECK-DAG: <<Address:i\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>] /// CHECK-DAG: <<ArrayGetI:i\d+>> ArrayGet [<<Address>>,<<Index>>] /// CHECK-DAG: <<ArrayGetI1:i\d+>> ArrayGet [<<Address>>,<<Index1>>] - /// CHECK: <<Add:i\d+>> Add [<<ArrayGetI>>,<<ArrayGetI1>>] - /// CHECK: ArraySet [<<Address>>,<<Index1>>,<<Add>>] + /// CHECK: <<Shl:i\d+>> Shl [<<ArrayGetI>>,<<ArrayGetI1>>] + /// CHECK: ArraySet [<<Address>>,<<Index1>>,<<Shl>>] // There should be only one intermediate address computation in the loop. @@ -475,8 +475,8 @@ public class Main { /// CHECK-DAG: <<Index1:i\d+>> Add [<<Index>>,<<Const1>>] /// CHECK-DAG: <<ArrayGetI:i\d+>> ArrayGet [<<Array>>,<<Index>>] /// CHECK-DAG: <<ArrayGetI1:i\d+>> ArrayGet [<<Array>>,<<Index1>>] - /// CHECK: <<Add:i\d+>> Add [<<ArrayGetI>>,<<ArrayGetI1>>] - /// CHECK: ArraySet [<<Array>>,<<Index1>>,<<Add>>] + /// CHECK: <<Shl:i\d+>> Shl [<<ArrayGetI>>,<<ArrayGetI1>>] + /// CHECK: ArraySet [<<Array>>,<<Index1>>,<<Shl>>] /// CHECK-START-ARM: int Main.canMergeAfterBCE2() instruction_simplifier_arm (after) /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 @@ -490,9 +490,9 @@ public class Main { /// CHECK-DAG: <<ArrayGetI:i\d+>> ArrayGet [<<Address1>>,<<Index>>] /// CHECK-DAG: <<Address2:i\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>] /// CHECK-DAG: <<ArrayGetI1:i\d+>> ArrayGet [<<Address2>>,<<Index1>>] - /// CHECK: <<Add:i\d+>> Add [<<ArrayGetI>>,<<ArrayGetI1>>] + /// CHECK: <<Shl:i\d+>> Shl [<<ArrayGetI>>,<<ArrayGetI1>>] /// CHECK: <<Address3:i\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>] - /// CHECK: ArraySet [<<Address3>>,<<Index1>>,<<Add>>] + /// CHECK: ArraySet [<<Address3>>,<<Index1>>,<<Shl>>] /// CHECK-START-ARM: int Main.canMergeAfterBCE2() GVN$after_arch (after) /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 @@ -505,17 +505,17 @@ public class Main { /// CHECK-DAG: <<Address:i\d+>> IntermediateAddress [<<Array>>,<<DataOffset>>] /// CHECK-DAG: <<ArrayGetI:i\d+>> ArrayGet [<<Address>>,<<Index>>] /// CHECK-DAG: <<ArrayGetI1:i\d+>> ArrayGet [<<Address>>,<<Index1>>] - /// CHECK: <<Add:i\d+>> Add [<<ArrayGetI>>,<<ArrayGetI1>>] - /// CHECK: ArraySet [<<Address>>,<<Index1>>,<<Add>>] + /// CHECK: <<Shl:i\d+>> Shl [<<ArrayGetI>>,<<ArrayGetI1>>] + /// CHECK: ArraySet [<<Address>>,<<Index1>>,<<Shl>>] /// CHECK-START-ARM: int Main.canMergeAfterBCE2() GVN$after_arch (after) /// CHECK: IntermediateAddress /// CHECK-NOT: IntermediateAddress public static int canMergeAfterBCE2() { - int[] array = {0, 1, 2, 3}; + int[] array = {64, 8, 4, 2 }; for (int i = 0; i < array.length - 1; i++) { - array[i + 1] = array[i] + array[i + 1]; + array[i + 1] = array[i] << array[i + 1]; } return array[array.length - 1]; } @@ -571,8 +571,8 @@ public class Main { accrossGC(array, 0); assertIntEquals(125, array[0]); - assertIntEquals(4, canMergeAfterBCE1()); - assertIntEquals(6, canMergeAfterBCE2()); + assertIntEquals(3, canMergeAfterBCE1()); + assertIntEquals(1048576, canMergeAfterBCE2()); assertIntEquals(18, checkLongFloatDouble()); } diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk index 100b031edc..703b911f0f 100644 --- a/test/Android.run-test.mk +++ b/test/Android.run-test.mk @@ -43,18 +43,6 @@ TEST_ART_RUN_TEST_DEPENDENCIES := \ TEST_ART_RUN_TEST_ORDERONLY_DEPENDENCIES := setup-jack-server -ifeq ($(ART_TEST_DEBUG_GC),true) - ART_TEST_WITH_STRACE := true -endif - -ifeq ($(ART_TEST_BISECTION),true) - # Need to keep rebuilding the test to bisection search it. - ART_TEST_RUN_TEST_NO_PREBUILD := true - ART_TEST_RUN_TEST_PREBUILD := false - # Bisection search writes to standard output. - ART_TEST_QUIET := false -endif - # Helper to create individual build targets for tests. Must be called with $(eval). # $(1): the test number define define-build-art-run-test @@ -97,699 +85,11 @@ LOCAL_PICKUP_FILES := $(art_run_tests_install_dir) include $(BUILD_PHONY_PACKAGE) -# Clear temp vars. -art_run_tests_build_dir := -art_run_tests_install_dir := -define-build-art-run-test := -TEST_ART_RUN_TEST_BUILD_RULES := - -######################################################################## -# General rules to build and run a run-test. - -TARGET_TYPES := host target -PREBUILD_TYPES := -ifeq ($(ART_TEST_RUN_TEST_PREBUILD),true) - PREBUILD_TYPES += prebuild -endif -ifeq ($(ART_TEST_RUN_TEST_NO_PREBUILD),true) - PREBUILD_TYPES += no-prebuild -endif -ifeq ($(ART_TEST_RUN_TEST_NO_DEX2OAT),true) - PREBUILD_TYPES += no-dex2oat -endif -COMPILER_TYPES := -ifeq ($(ART_TEST_INTERPRETER_ACCESS_CHECKS),true) - COMPILER_TYPES += interp-ac -endif -ifeq ($(ART_TEST_INTERPRETER),true) - COMPILER_TYPES += interpreter -endif -ifeq ($(ART_TEST_JIT),true) - COMPILER_TYPES += jit -endif -OPTIMIZING_COMPILER_TYPES := -ifeq ($(ART_TEST_OPTIMIZING),true) - COMPILER_TYPES += optimizing - OPTIMIZING_COMPILER_TYPES += optimizing -endif -ifeq ($(ART_TEST_OPTIMIZING_GRAPH_COLOR),true) - COMPILER_TYPES += regalloc_gc - OPTIMIZING_COMPILER_TYPES += regalloc_gc -endif -RELOCATE_TYPES := no-relocate -ifeq ($(ART_TEST_RUN_TEST_RELOCATE),true) - RELOCATE_TYPES += relocate -endif -ifeq ($(ART_TEST_RUN_TEST_RELOCATE_NO_PATCHOAT),true) - RELOCATE_TYPES += relocate-npatchoat -endif -TRACE_TYPES := ntrace -ifeq ($(ART_TEST_TRACE),true) - TRACE_TYPES += trace -endif -ifeq ($(ART_TEST_TRACE_STREAM),true) - TRACE_TYPES += stream -endif -GC_TYPES := cms -ifeq ($(ART_TEST_GC_STRESS),true) - GC_TYPES += gcstress -endif -ifeq ($(ART_TEST_GC_VERIFY),true) - GC_TYPES += gcverify -endif -JNI_TYPES := checkjni -ifeq ($(ART_TEST_JNI_FORCECOPY),true) - JNI_TYPES += forcecopy -endif -ifeq ($(ART_TEST_RUN_TEST_IMAGE),true) -IMAGE_TYPES := picimage -endif -ifeq ($(ART_TEST_RUN_TEST_NO_IMAGE),true) - IMAGE_TYPES += no-image -endif -ifeq ($(ART_TEST_RUN_TEST_MULTI_IMAGE),true) - IMAGE_TYPES := multipicimage -endif -PICTEST_TYPES := npictest -ifeq ($(ART_TEST_PIC_TEST),true) - PICTEST_TYPES += pictest -endif -RUN_TYPES := -ifeq ($(ART_TEST_RUN_TEST_DEBUG),true) - RUN_TYPES += debug -endif -ifeq ($(ART_TEST_RUN_TEST_NDEBUG),true) - RUN_TYPES += ndebug -endif -DEBUGGABLE_TYPES := ndebuggable -ifeq ($(ART_TEST_RUN_TEST_DEBUGGABLE),true) -DEBUGGABLE_TYPES += debuggable -endif -ADDRESS_SIZES_TARGET := $(ART_PHONY_TEST_TARGET_SUFFIX) -ADDRESS_SIZES_HOST := $(ART_PHONY_TEST_HOST_SUFFIX) -ifeq ($(ART_TEST_RUN_TEST_2ND_ARCH),true) - ADDRESS_SIZES_TARGET += $(2ND_ART_PHONY_TEST_TARGET_SUFFIX) - ADDRESS_SIZES_HOST += $(2ND_ART_PHONY_TEST_HOST_SUFFIX) -endif -ALL_ADDRESS_SIZES := 64 32 - -# List all run test names with number arguments agreeing with the comment above. -define all-run-test-names - $(foreach target, $(1), \ - $(foreach run-type, $(2), \ - $(foreach prebuild, $(3), \ - $(foreach compiler, $(4), \ - $(foreach relocate, $(5), \ - $(foreach trace, $(6), \ - $(foreach gc, $(7), \ - $(foreach jni, $(8), \ - $(foreach image, $(9), \ - $(foreach pictest, $(10), \ - $(foreach debuggable, $(11), \ - $(foreach test, $(12), \ - $(foreach address_size, $(13), \ - test-art-$(target)-run-test-$(run-type)-$(prebuild)-$(compiler)-$(relocate)-$(trace)-$(gc)-$(jni)-$(image)-$(pictest)-$(debuggable)-$(test)$(address_size) \ - ))))))))))))) -endef # all-run-test-names - -# To generate a full list or tests: -# $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES),$(COMPILER_TYPES), \ -# $(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES),$(IMAGE_TYPES), \ -# $(PICTEST_TYPES),$(DEBUGGABLE_TYPES),$(TEST_ART_RUN_TESTS),$(ALL_ADDRESS_SIZES)) - # Convert's a rule name to the form used in variables, e.g. no-relocate to NO_RELOCATE define name-to-var $(shell echo $(1) | tr '[:lower:]' '[:upper:]' | tr '-' '_') endef # name-to-var -# Disable 115-native-bridge, it fails when run through make b/35984597. -# Disable 153-reference-stress temporarily until a fix arrives. b/33389022. -# Disable 080-oom-fragmentation due to flakes. b/33795328 -# Disable 497-inlining-and-class-loader and 542-unresolved-access-check until -# they are rewritten. These tests use a broken class loader that tries to -# register a dex file that's already registered with a different loader. -# b/34193123 -ART_TEST_RUN_TEST_SKIP += \ - 115-native-bridge \ - 153-reference-stress \ - 080-oom-fragmentation \ - 497-inlining-and-class-loader \ - 542-unresolved-access-check - -ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \ - $(COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \ - $(IMAGE_TYPES), $(PICTEST_TYPES), $(DEBUGGABLE_TYPES), $(ART_TEST_RUN_TEST_SKIP), $(ALL_ADDRESS_SIZES)) - - -# Disable 149-suspend-all-stress, its output is flaky (b/28988206). -TEST_ART_BROKEN_ALL_TARGET_TESTS := \ - 149-suspend-all-stress \ - -ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \ - $(COMPILER_TYPES), $(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \ - $(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES), $(TEST_ART_BROKEN_ALL_TARGET_TESTS), \ - $(ALL_ADDRESS_SIZES)) - -TEST_ART_BROKEN_ALL_TARGET_TESTS := - -# Tests that are timing sensitive and flaky on heavily loaded systems. -TEST_ART_TIMING_SENSITIVE_RUN_TESTS := \ - 002-sleep \ - 053-wait-some \ - 055-enum-performance \ - 133-static-invoke-super - -# disable timing sensitive tests on "dist" builds. -ifdef dist_goal - ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \ - $(COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \ - $(IMAGE_TYPES), $(PICTEST_TYPES), $(DEBUGGABLE_TYPES), $(TEST_ART_TIMING_SENSITIVE_RUN_TESTS), $(ALL_ADDRESS_SIZES)) -endif - -# 147-stripped-dex-fallback isn't supported on device because --strip-dex -# requires the zip command. -# 569-checker-pattern-replacement tests behaviour present only on host. -TEST_ART_BROKEN_TARGET_TESTS := \ - 147-stripped-dex-fallback \ - 569-checker-pattern-replacement - -ifneq (,$(filter target,$(TARGET_TYPES))) - ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,target,$(RUN_TYPES),$(PREBUILD_TYPES), \ - $(COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \ - $(IMAGE_TYPES), $(PICTEST_TYPES), $(DEBUGGABLE_TYPES), $(TEST_ART_BROKEN_TARGET_TESTS), $(ALL_ADDRESS_SIZES)) -endif - -TEST_ART_BROKEN_TARGET_TESTS := - -# Tests that require python3. -TEST_ART_PYTHON3_DEPENDENCY_RUN_TESTS := \ - 960-default-smali \ - 961-default-iface-resolution-gen \ - 964-default-iface-init-gen \ - 968-default-partial-compile-gen \ - 969-iface-super \ - 970-iface-super-resolution-gen \ - 971-iface-super - -# Check if we have python3 to run our tests. -ifeq ($(wildcard /usr/bin/python3),) - $(warning "No python3 found. Disabling tests: $(TEST_ART_PYTHON3_DEPENDENCY_RUN_TESTS)") - - # Currently disable tests requiring python3 when it is not installed. - ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \ - $(COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \ - $(IMAGE_TYPES), $(PICTEST_TYPES), $(DEBUGGABLE_TYPES), $(TEST_ART_PYTHON3_DEPENDENCY_RUN_TESTS), $(ALL_ADDRESS_SIZES)) -endif - -TEST_ART_TIMING_SENSITIVE_RUN_TESTS := - -# Note 116-nodex2oat is not broken per-se it just doesn't (and isn't meant to) work with --prebuild. -TEST_ART_BROKEN_PREBUILD_RUN_TESTS := \ - 116-nodex2oat \ - 118-noimage-dex2oat \ - 134-nodex2oat-nofallback - -ifneq (,$(filter prebuild,$(PREBUILD_TYPES))) - ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),prebuild, \ - $(COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \ - $(IMAGE_TYPES), $(PICTEST_TYPES), $(DEBUGGABLE_TYPES), $(TEST_ART_BROKEN_PREBUILD_RUN_TESTS), $(ALL_ADDRESS_SIZES)) -endif - -TEST_ART_BROKEN_PREBUILD_RUN_TESTS := - -# 554-jit-profile-file is disabled because it needs a primary oat file to know what it should save. -# 529 and 555: b/27784033 -TEST_ART_BROKEN_NO_PREBUILD_TESTS := \ - 117-nopatchoat \ - 147-stripped-dex-fallback \ - 554-jit-profile-file \ - 529-checker-unresolved \ - 555-checker-regression-x86const \ - 608-checker-unresolved-lse - -ifneq (,$(filter no-prebuild,$(PREBUILD_TYPES))) - ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),no-prebuild, \ - $(COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \ - $(IMAGE_TYPES), $(PICTEST_TYPES), $(DEBUGGABLE_TYPES), $(TEST_ART_BROKEN_NO_PREBUILD_TESTS), $(ALL_ADDRESS_SIZES)) -endif - -TEST_ART_BROKEN_NO_PREBUILD_TESTS := - -# Note 117-nopatchoat is not broken per-se it just doesn't work (and isn't meant to) without -# --prebuild --relocate -TEST_ART_BROKEN_NO_RELOCATE_TESTS := \ - 117-nopatchoat \ - 118-noimage-dex2oat \ - 119-noimage-patchoat \ - 554-jit-profile-file - -ifneq (,$(filter no-relocate,$(RELOCATE_TYPES))) - ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \ - $(COMPILER_TYPES), no-relocate,$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \ - $(IMAGE_TYPES), $(PICTEST_TYPES), $(DEBUGGABLE_TYPES), $(TEST_ART_BROKEN_NO_RELOCATE_TESTS), $(ALL_ADDRESS_SIZES)) -endif - -TEST_ART_BROKEN_NO_RELOCATE_TESTS := - -# Temporarily disable some broken tests when forcing access checks in interpreter b/22414682 -# 629 requires compilation. -# 080 and 530: b/36377828 -TEST_ART_BROKEN_INTERPRETER_ACCESS_CHECK_TESTS := \ - 137-cfi \ - 530-checker-lse \ - 080-oom-throw \ - 629-vdex-speed - -ifneq (,$(filter interp-ac,$(COMPILER_TYPES))) - ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \ - interp-ac,$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \ - $(IMAGE_TYPES), $(PICTEST_TYPES), $(DEBUGGABLE_TYPES), $(TEST_ART_BROKEN_INTERPRETER_ACCESS_CHECK_TESTS), $(ALL_ADDRESS_SIZES)) -endif - -TEST_ART_BROKEN_INTERPRETER_ACCESS_CHECK_TESTS := - -# Tests that are broken with GC stress. -# * 137-cfi needs to unwind a second forked process. We're using a primitive sleep to wait till we -# hope the second process got into the expected state. The slowness of gcstress makes this bad. -# * 152-dead-large-object requires a heap larger than what gcstress uses. -# * 908-gc-start-finish expects GCs only to be run at clear points. The reduced heap size makes -# this non-deterministic. Same for 913. -# * 961-default-iface-resolution-gen and 964-default-iface-init-genare very long tests that often -# will take more than the timeout to run when gcstress is enabled. This is because gcstress -# slows down allocations significantly which these tests do a lot. -TEST_ART_BROKEN_GCSTRESS_RUN_TESTS := \ - 137-cfi \ - 152-dead-large-object \ - 154-gc-loop \ - 908-gc-start-finish \ - 913-heaps \ - 961-default-iface-resolution-gen \ - 964-default-iface-init-gen \ - -ifneq (,$(filter gcstress,$(GC_TYPES))) - ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \ - $(COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),gcstress,$(JNI_TYPES), \ - $(IMAGE_TYPES), $(PICTEST_TYPES), $(DEBUGGABLE_TYPES), $(TEST_ART_BROKEN_GCSTRESS_RUN_TESTS), $(ALL_ADDRESS_SIZES)) -endif - -TEST_ART_BROKEN_GCSTRESS_RUN_TESTS := - -# 115-native-bridge setup is complicated. Need to implement it correctly for the target. -ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,target,$(RUN_TYPES),$(PREBUILD_TYPES),$(COMPILER_TYPES), \ - $(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES),$(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES), 115-native-bridge, \ - $(ALL_ADDRESS_SIZES)) - -# 130-hprof dumps the heap and runs hprof-conv to check whether the file is somewhat readable. This -# is only possible on the host. -# TODO: Turn off all the other combinations, this is more about testing actual ART code. A gtest is -# very hard to write here, as (for a complete test) JDWP must be set up. -ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,target,$(RUN_TYPES),$(PREBUILD_TYPES), \ - $(COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES),$(IMAGE_TYPES), \ - $(PICTEST_TYPES),$(DEBUGGABLE_TYPES),130-hprof,$(ALL_ADDRESS_SIZES)) - -# 131 is an old test. The functionality has been implemented at an earlier stage and is checked -# in tests 138. Blacklisted for debug builds since these builds have duplicate classes checks which -# punt to interpreter. -ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),debug,$(PREBUILD_TYPES), \ - $(COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES),$(IMAGE_TYPES), \ - $(PICTEST_TYPES),$(DEBUGGABLE_TYPES),131-structural-change,$(ALL_ADDRESS_SIZES)) - -# 138-duplicate-classes-check. Turned on for debug builds since debug builds have duplicate classes -# checks enabled, b/2133391. -ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),ndebug,$(PREBUILD_TYPES), \ - $(COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES),$(IMAGE_TYPES), \ - $(PICTEST_TYPES),$(DEBUGGABLE_TYPES),138-duplicate-classes-check,$(ALL_ADDRESS_SIZES)) - -# All these tests check that we have sane behavior if we don't have a patchoat or dex2oat. -# Therefore we shouldn't run them in situations where we actually don't have these since they -# explicitly test for them. These all also assume we have an image. -# 147-stripped-dex-fallback is disabled because it requires --prebuild. -# 554-jit-profile-file is disabled because it needs a primary oat file to know what it should save. -# 629-vdex-speed requires compiled code. -TEST_ART_BROKEN_FALLBACK_RUN_TESTS := \ - 116-nodex2oat \ - 117-nopatchoat \ - 118-noimage-dex2oat \ - 119-noimage-patchoat \ - 137-cfi \ - 138-duplicate-classes-check2 \ - 147-stripped-dex-fallback \ - 554-jit-profile-file \ - 616-cha \ - 616-cha-abstract \ - 912-classes \ - 629-vdex-speed - -# This test fails without an image. -# 018, 961, 964, 968 often time out. b/34369284 -TEST_ART_BROKEN_NO_IMAGE_RUN_TESTS := \ - 137-cfi \ - 138-duplicate-classes-check \ - 018-stack-overflow \ - 476-clinit-inline-static-invoke \ - 496-checker-inlining-class-loader \ - 637-checker-throw-inline \ - 616-cha \ - 616-cha-abstract \ - 912-classes \ - 961-default-iface-resolution-gen \ - 964-default-iface-init \ - 968-default-partial-compile-gen \ - -ifneq (,$(filter no-dex2oat,$(PREBUILD_TYPES))) - ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),no-dex2oat, \ - $(COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES),$(IMAGE_TYPES), \ - $(PICTEST_TYPES),$(DEBUGGABLE_TYPES), $(TEST_ART_BROKEN_FALLBACK_RUN_TESTS),$(ALL_ADDRESS_SIZES)) -endif - - -ifneq (,$(filter no-image,$(IMAGE_TYPES))) - ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \ - $(COMPILER_TYPES), $(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES),no-image, \ - $(PICTEST_TYPES), $(DEBUGGABLE_TYPES), $(TEST_ART_BROKEN_FALLBACK_RUN_TESTS),$(ALL_ADDRESS_SIZES)) - ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \ - $(COMPILER_TYPES), $(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES),no-image, \ - $(PICTEST_TYPES), $(DEBUGGABLE_TYPES), $(TEST_ART_BROKEN_NO_IMAGE_RUN_TESTS),$(ALL_ADDRESS_SIZES)) -endif - -ifneq (,$(filter relocate-npatchoat,$(RELOCATE_TYPES))) - ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \ - $(COMPILER_TYPES), relocate-npatchoat,$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \ - $(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES), $(TEST_ART_BROKEN_FALLBACK_RUN_TESTS),$(ALL_ADDRESS_SIZES)) -endif - -TEST_ART_BROKEN_FALLBACK_RUN_TESTS := - -# 137: -# This test unrolls and expects managed frames, but tracing means we run the interpreter. -# 802 and 570-checker-osr: -# This test dynamically enables tracing to force a deoptimization. This makes the test meaningless -# when already tracing, and writes an error message that we do not want to check for. -# 130 occasional timeout b/32383962. -# 629 requires compilation. -TEST_ART_BROKEN_TRACING_RUN_TESTS := \ - 087-gc-after-link \ - 130-hprof \ - 137-cfi \ - 141-class-unload \ - 570-checker-osr \ - 629-vdex-speed \ - 802-deoptimization - -ifneq (,$(filter trace stream,$(TRACE_TYPES))) - ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \ - $(COMPILER_TYPES),$(RELOCATE_TYPES),trace stream,$(GC_TYPES),$(JNI_TYPES),$(IMAGE_TYPES), \ - $(PICTEST_TYPES),$(DEBUGGABLE_TYPES), $(TEST_ART_BROKEN_TRACING_RUN_TESTS),$(ALL_ADDRESS_SIZES)) -endif - -TEST_ART_BROKEN_TRACING_RUN_TESTS := - -# These tests expect JIT compilation, which is suppressed when tracing. -TEST_ART_BROKEN_JIT_TRACING_RUN_TESTS := \ - 604-hot-static-interface \ - 612-jit-dex-cache \ - 613-inlining-dex-cache \ - 616-cha \ - 626-set-resolved-string \ - -ifneq (,$(filter trace stream,$(TRACE_TYPES))) - ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \ - jit,$(RELOCATE_TYPES),trace stream,$(GC_TYPES),$(JNI_TYPES),$(IMAGE_TYPES), \ - $(PICTEST_TYPES),$(DEBUGGABLE_TYPES), $(TEST_ART_BROKEN_JIT_TRACING_RUN_TESTS),$(ALL_ADDRESS_SIZES)) -endif - -TEST_ART_BROKEN_JIT_TRACING_RUN_TESTS := - -# Known broken tests for the interpreter. -# CFI unwinding expects managed frames. -# 629 requires compilation. -TEST_ART_BROKEN_INTERPRETER_RUN_TESTS := \ - 137-cfi \ - 554-jit-profile-file \ - 629-vdex-speed - -ifneq (,$(filter interpreter,$(COMPILER_TYPES))) - ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \ - interpreter,$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \ - $(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES),$(TEST_ART_BROKEN_INTERPRETER_RUN_TESTS),$(ALL_ADDRESS_SIZES)) -endif - -TEST_ART_BROKEN_INTERPRETER_RUN_TESTS := - -# Known broken tests for the JIT. -# CFI unwinding expects managed frames, and the test does not iterate enough to even compile. JIT -# also uses Generic JNI instead of the JNI compiler. -# 154-gc-loop requires more deterministic GC behavior than what JIT does. -# Test 906 iterates the heap filtering with different options. No instances should be created -# between those runs to be able to have precise checks. -# Test 629 requires compilation. -# 912: b/34655682 -TEST_ART_BROKEN_JIT_RUN_TESTS := \ - 137-cfi \ - 154-gc-loop \ - 629-vdex-speed \ - 904-object-allocation \ - 906-iterate-heap \ - -ifneq (,$(filter jit,$(COMPILER_TYPES))) - ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \ - jit,$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \ - $(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES),$(TEST_ART_BROKEN_JIT_RUN_TESTS),$(ALL_ADDRESS_SIZES)) -endif - -TEST_ART_BROKEN_JIT_RUN_TESTS := - -# Known broken tests for the graph coloring register allocator. -# These tests were based on the linear scan allocator, which makes different decisions than -# the graph coloring allocator. (These attempt to test for code quality, not correctness.) -TEST_ART_BROKEN_OPTIMIZING_GRAPH_COLOR := \ - 570-checker-select \ - 484-checker-register-hints - -ifneq (,$(filter regalloc_gc,$(COMPILER_TYPES))) - ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \ - regalloc_gc,$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \ - $(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES), \ - $(TEST_ART_BROKEN_OPTIMIZING_GRAPH_COLOR),$(ALL_ADDRESS_SIZES)) -endif - -# Known broken tests for the mips32 optimizing compiler backend. -TEST_ART_BROKEN_OPTIMIZING_MIPS_RUN_TESTS := \ - -ifeq (mips,$(TARGET_ARCH)) - ifneq (,$(filter $(OPTIMIZING_COMPILER_TYPES),$(COMPILER_TYPES))) - ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,target,$(RUN_TYPES),$(PREBUILD_TYPES), \ - $(OPTIMIZING_COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \ - $(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES), \ - $(TEST_ART_BROKEN_OPTIMIZING_MIPS_RUN_TESTS),$(ALL_ADDRESS_SIZES)) - endif -endif - -TEST_ART_BROKEN_OPTIMIZING_MIPS_RUN_TESTS := - -# Known broken tests for the mips64 optimizing compiler backend. -TEST_ART_BROKEN_OPTIMIZING_MIPS64_RUN_TESTS := \ - -ifeq (mips64,$(TARGET_ARCH)) - ifneq (,$(filter $(OPTIMIZING_COMPILER_TYPES),$(COMPILER_TYPES))) - ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,target,$(RUN_TYPES),$(PREBUILD_TYPES), \ - $(OPTIMIZING_COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \ - $(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES), \ - $(TEST_ART_BROKEN_OPTIMIZING_MIPS64_RUN_TESTS),$(ALL_ADDRESS_SIZES)) - endif -endif - -TEST_ART_BROKEN_OPTIMIZING_MIPS64_RUN_TESTS := - -# Tests that should fail when the optimizing compiler compiles them non-debuggable. -TEST_ART_BROKEN_OPTIMIZING_NONDEBUGGABLE_RUN_TESTS := \ - 454-get-vreg \ - 457-regs \ - 602-deoptimizeable - -ifneq (,$(filter $(OPTIMIZING_COMPILER_TYPES),$(COMPILER_TYPES))) - ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \ - $(OPTIMIZING_COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \ - $(IMAGE_TYPES),$(PICTEST_TYPES),ndebuggable,$(TEST_ART_BROKEN_OPTIMIZING_NONDEBUGGABLE_RUN_TESTS),$(ALL_ADDRESS_SIZES)) -endif - -TEST_ART_BROKEN_OPTIMIZING_NONDEBUGGABLE_RUN_TESTS := - -# Tests that should fail when the optimizing compiler compiles them debuggable. -TEST_ART_BROKEN_OPTIMIZING_DEBUGGABLE_RUN_TESTS := \ - -ifneq (,$(filter $(OPTIMIZING_COMPILER_TYPES),$(COMPILER_TYPES))) - ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \ - $(OPTIMIZING_COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \ - $(IMAGE_TYPES),$(PICTEST_TYPES),debuggable,$(TEST_ART_BROKEN_OPTIMIZING_DEBUGGABLE_RUN_TESTS),$(ALL_ADDRESS_SIZES)) -endif - -TEST_ART_BROKEN_OPTIMIZING_DEBUGGABLE_RUN_TESTS := - -# Tests that should fail in the read barrier configuration with the interpreter. -TEST_ART_BROKEN_INTERPRETER_READ_BARRIER_RUN_TESTS := - -# Tests that should fail in the read barrier configuration with the Optimizing compiler (AOT). -TEST_ART_BROKEN_OPTIMIZING_READ_BARRIER_RUN_TESTS := - -# Tests that should fail in the read barrier configuration with JIT (Optimizing compiler). -TEST_ART_BROKEN_JIT_READ_BARRIER_RUN_TESTS := - -# Tests failing in non-Baker read barrier configurations with the Optimizing compiler (AOT). -# 537 and 641: Expect an array copy to be intrinsified, but calling-on-slowpath intrinsics are not yet -# handled in non-Baker read barrier configurations. -TEST_ART_BROKEN_OPTIMIZING_NON_BAKER_READ_BARRIER_RUN_TESTS := \ - 537-checker-arraycopy \ - 641-checker-arraycopy - -# Tests failing in non-Baker read barrier configurations with JIT (Optimizing compiler). -# 537 and 641: Expect an array copy to be intrinsified, but calling-on-slowpath intrinsics are not yet -# handled in non-Baker read barrier configurations. -TEST_ART_BROKEN_JIT_NON_BAKER_READ_BARRIER_RUN_TESTS := \ - 537-checker-arraycopy \ - 641-checker-arraycopy - -ifeq ($(ART_USE_READ_BARRIER),true) - ifneq (,$(filter interpreter,$(COMPILER_TYPES))) - ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES), \ - $(PREBUILD_TYPES),interpreter,$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES), \ - $(JNI_TYPES),$(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES), \ - $(TEST_ART_BROKEN_INTERPRETER_READ_BARRIER_RUN_TESTS),$(ALL_ADDRESS_SIZES)) - endif - - ifneq (,$(filter $(OPTIMIZING_COMPILER_TYPES),$(COMPILER_TYPES))) - ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES), \ - $(PREBUILD_TYPES),$(OPTIMIZING_COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES), \ - $(GC_TYPES),$(JNI_TYPES),$(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES), \ - $(TEST_ART_BROKEN_OPTIMIZING_READ_BARRIER_RUN_TESTS),$(ALL_ADDRESS_SIZES)) - ifneq ($(ART_READ_BARRIER_TYPE),BAKER) - ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES), \ - $(PREBUILD_TYPES),$(OPTIMIZING_COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES), \ - $(GC_TYPES),$(JNI_TYPES),$(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES), \ - $(TEST_ART_BROKEN_OPTIMIZING_NON_BAKER_READ_BARRIER_RUN_TESTS),$(ALL_ADDRESS_SIZES)) - endif - endif - - ifneq (,$(filter jit,$(COMPILER_TYPES))) - ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES), \ - $(PREBUILD_TYPES),jit,$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES), \ - $(JNI_TYPES),$(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES), \ - $(TEST_ART_BROKEN_JIT_READ_BARRIER_RUN_TESTS),$(ALL_ADDRESS_SIZES)) - ifneq ($(ART_READ_BARRIER_TYPE),BAKER) - ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES), \ - $(PREBUILD_TYPES),jit,$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES), \ - $(JNI_TYPES),$(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES), \ - $(TEST_ART_BROKEN_JIT_NON_BAKER_READ_BARRIER_RUN_TESTS),$(ALL_ADDRESS_SIZES)) - endif - endif -endif - -TEST_ART_BROKEN_OPTIMIZING_READ_BARRIER_RUN_TESTS := -TEST_ART_BROKEN_JIT_READ_BARRIER_RUN_TESTS := - -TEST_ART_BROKEN_NPIC_RUN_TESTS := 596-app-images -ifneq (,$(filter npictest,$(PICTEST_TYPES))) - ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \ - ${COMPILER_TYPES},$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \ - $(IMAGE_TYPES),npictest,$(DEBUGGABLE_TYPES),$(TEST_ART_BROKEN_NPIC_RUN_TESTS),$(ALL_ADDRESS_SIZES)) -endif - -# Tests that should fail in the heap poisoning configuration with the Optimizing compiler. -# 055: Exceeds run time limits due to heap poisoning instrumentation (on ARM and ARM64 devices). -TEST_ART_BROKEN_OPTIMIZING_HEAP_POISONING_RUN_TESTS := \ - 055-enum-performance - -ifeq ($(ART_HEAP_POISONING),true) - ifneq (,$(filter $(OPTIMIZING_COMPILER_TYPES),$(COMPILER_TYPES))) - ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES), \ - $(PREBUILD_TYPES),$(OPTIMIZING_COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \ - $(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES), \ - $(TEST_ART_BROKEN_OPTIMIZING_HEAP_POISONING_RUN_TESTS),$(ALL_ADDRESS_SIZES)) - endif -endif - -TEST_ART_BROKEN_OPTIMIZING_HEAP_POISONING_RUN_TESTS := - -# 909: Tests that check semantics for a non-debuggable app. -# 137: relies on AOT code and debuggable makes us JIT always. -TEST_ART_BROKEN_DEBUGGABLE_RUN_TESTS := \ - 137-cfi \ - 909-attach-agent \ - -ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \ - $(COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \ - $(IMAGE_TYPES),$(PICTEST_TYPES),debuggable,$(TEST_ART_BROKEN_DEBUGGABLE_RUN_TESTS),$(ALL_ADDRESS_SIZES)) - -TEST_ART_BROKEN_DEBUGGABLE_RUN_TESTS := - -# Tests incompatible with bisection bug search. Sorted by incompatibility reason. -# 000 through 595 do not compile anything. 089 tests a build failure. 018 through 137 -# run dalvikvm more than once. 115 and 088 assume they are always compiled. -# 055 tests performance which is degraded during bisecting. -TEST_ART_INCOMPATIBLE_BISECTION_SEARCH_RUN_TESTS := \ - 000-nop \ - 134-nodex2oat-nofallback \ - 147-stripped-dex-fallback \ - 595-profile-saving \ - \ - 089-many-methods \ - \ - 018-stack-overflow \ - 116-nodex2oat \ - 117-nopatchoat \ - 118-noimage-dex2oat \ - 119-noimage-patchoat \ - 126-miranda-multidex \ - 137-cfi \ - \ - 115-native-bridge \ - 088-monitor-verification \ - \ - 055-enum-performance - -ifeq ($(ART_TEST_BISECTION),true) - ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES), \ - $(PREBUILD_TYPES),$(OPTIMIZING_COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \ - $(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES), \ - $(TEST_ART_INCOMPATIBLE_BISECTION_SEARCH_RUN_TESTS),$(ALL_ADDRESS_SIZES)) -endif - -# Clear variables ahead of appending to them when defining tests. -$(foreach target, $(TARGET_TYPES), $(eval ART_RUN_TEST_$(call name-to-var,$(target))_RULES :=)) -$(foreach target, $(TARGET_TYPES), \ - $(foreach prebuild, $(PREBUILD_TYPES), \ - $(eval ART_RUN_TEST_$(call name-to-var,$(target))_$(call name-to-var,$(prebuild))_RULES :=))) -$(foreach target, $(TARGET_TYPES), \ - $(foreach compiler, $(COMPILER_TYPES), \ - $(eval ART_RUN_TEST_$(call name-to-var,$(target))_$(call name-to-var,$(compiler))_RULES :=))) -$(foreach target, $(TARGET_TYPES), \ - $(foreach relocate, $(RELOCATE_TYPES), \ - $(eval ART_RUN_TEST_$(call name-to-var,$(target))_$(call name-to-var,$(relocate))_RULES :=))) -$(foreach target, $(TARGET_TYPES), \ - $(foreach trace, $(TRACE_TYPES), \ - $(eval ART_RUN_TEST_$(call name-to-var,$(target))_$(call name-to-var,$(trace))_RULES :=))) -$(foreach target, $(TARGET_TYPES), \ - $(foreach gc, $(GC_TYPES), \ - $(eval ART_RUN_TEST_$(call name-to-var,$(target))_$(call name-to-var,$(gc))_RULES :=))) -$(foreach target, $(TARGET_TYPES), \ - $(foreach jni, $(JNI_TYPES), \ - $(eval ART_RUN_TEST_$(call name-to-var,$(target))_$(call name-to-var,$(jni))_RULES :=))) -$(foreach target, $(TARGET_TYPES), \ - $(foreach image, $(IMAGE_TYPES), \ - $(eval ART_RUN_TEST_$(call name-to-var,$(target))_$(call name-to-var,$(image))_RULES :=))) -$(foreach target, $(TARGET_TYPES), \ - $(foreach test, $(TEST_ART_RUN_TESTS), \ - $(eval ART_RUN_TEST_$(call name-to-var,$(target))_$(call name-to-var,$(test))_RULES :=))) -$(foreach target, $(TARGET_TYPES), \ - $(foreach address_size, $(ALL_ADDRESS_SIZES), \ - $(eval ART_RUN_TEST_$(call name-to-var,$(target))_$(call name-to-var,$(address_size))_RULES :=))) -$(foreach target, $(TARGET_TYPES), \ - $(foreach run_type, $(RUN_TYPES), \ - $(eval ART_RUN_TEST_$(call name-to-var,$(target))_$(call name-to-var,$(run_type))_RULES :=))) -$(foreach target, $(TARGET_TYPES), \ - $(foreach debuggable_type, $(DEBUGGABLE_TYPES), \ - $(eval ART_RUN_TEST_$(call name-to-var,$(target))_$(call name-to-var,$(debuggable_type))_RULES :=))) - # We need dex2oat and dalvikvm on the target as well as the core images (all images as we sync # only once). TEST_ART_TARGET_SYNC_DEPS += $(ART_TARGET_EXECUTABLES) $(TARGET_CORE_IMG_OUTS) @@ -908,423 +208,62 @@ define core-image-dependencies endif endef -COMPILER_TYPES_2 := optimizing -COMPILER_TYPES_2 += interpreter -COMPILER_TYPES_2 += jit -COMPILER_TYPES_2 += regalloc_gc -COMPILER_TYPES_2 += interp-ac -ALL_ADDRESS_SIZES_2 := 32 64 -IMAGE_TYPES_2 := picimage -IMAGE_TYPES_2 += no-image -IMAGE_TYPES_2 += npicimage -IMAGE_TYPES_2 += multinpicimage -IMAGE_TYPES_2 += multipicimage +TARGET_TYPES := host target +COMPILER_TYPES := jit interpreter optimizing regalloc_gc jit interp-ac +IMAGE_TYPES := picimage no-image multipicimage +ALL_ADDRESS_SIZES := 64 32 # Add core image dependencies required for given target - HOST or TARGET, # IMAGE_TYPE, COMPILER_TYPE and ADDRESS_SIZE to the prereq_rules. $(foreach target, $(TARGET_TYPES), \ - $(foreach image, $(IMAGE_TYPES_2), \ - $(foreach compiler, $(COMPILER_TYPES_2), \ - $(foreach address_size, $(ALL_ADDRESS_SIZES_2), $(eval \ + $(foreach image, $(IMAGE_TYPES), \ + $(foreach compiler, $(COMPILER_TYPES), \ + $(foreach address_size, $(ALL_ADDRESS_SIZES), $(eval \ $(call core-image-dependencies,$(target),$(image),$(compiler),$(address_size))))))) test-art-host-run-test-dependencies : $(host_prereq_rules) test-art-target-run-test-dependencies : $(target_prereq_rules) test-art-run-test-dependencies : test-art-host-run-test-dependencies test-art-target-run-test-dependencies -host_prereq_rules := -target_prereq_rules := - -# Create a rule to build and run a tests following the form: -# test-art-{1: host or target}-run-test-{2: debug ndebug}-{3: prebuild no-prebuild no-dex2oat}- -# {4: interpreter optimizing jit interp-ac}- -# {5: relocate nrelocate relocate-npatchoat}- -# {6: trace or ntrace}-{7: gcstress gcverify cms}-{8: forcecopy checkjni jni}- -# {9: no-image image picimage}-{10: pictest npictest}- -# {11: ndebuggable debuggable}-{12: test name}{13: 32 or 64} -define define-test-art-run-test - run_test_options := - prereq_rule := - test_groups := - uc_host_or_target := - jack_classpath := - ifeq ($(ART_TEST_WITH_STRACE),true) - run_test_options += --strace - endif - ifeq ($(ART_TEST_RUN_TEST_ALWAYS_CLEAN),true) - run_test_options += --always-clean - endif - ifeq ($(ART_TEST_BISECTION),true) - run_test_options += --bisection-search - endif - ifeq ($(1),host) - uc_host_or_target := HOST - test_groups := ART_RUN_TEST_HOST_RULES - run_test_options += --host - prereq_rule := $(ART_TEST_HOST_RUN_TEST_DEPENDENCIES) $(HOST_JACK_CLASSPATH_DEPENDENCIES) - jack_classpath := $(HOST_JACK_CLASSPATH) - else - ifeq ($(1),target) - uc_host_or_target := TARGET - test_groups := ART_RUN_TEST_TARGET_RULES - prereq_rule := test-art-target-sync $(TARGET_JACK_CLASSPATH_DEPENDENCIES) - jack_classpath := $(TARGET_JACK_CLASSPATH) - else - $$(error found $(1) expected $(TARGET_TYPES)) - endif - endif - ifeq ($(2),debug) - test_groups += ART_RUN_TEST_$$(uc_host_or_target)_DEBUG_RULES - else - ifeq ($(2),ndebug) - test_groups += ART_RUN_TEST_$$(uc_host_or_target)_RELEASE_RULES - run_test_options += -O - else - $$(error found $(2) expected $(RUN_TYPES)) - endif - endif - ifeq ($(3),prebuild) - test_groups += ART_RUN_TEST_$$(uc_host_or_target)_PREBUILD_RULES - run_test_options += --prebuild - else - ifeq ($(3),no-prebuild) - test_groups += ART_RUN_TEST_$$(uc_host_or_target)_NO_PREBUILD_RULES - run_test_options += --no-prebuild - else - ifeq ($(3),no-dex2oat) - test_groups += ART_RUN_TEST_$$(uc_host_or_target)_NO_DEX2OAT_RULES - run_test_options += --no-prebuild --no-dex2oat - else - $$(error found $(3) expected $(PREBUILD_TYPES)) - endif - endif - endif - ifeq ($(4),optimizing) - test_groups += ART_RUN_TEST_$$(uc_host_or_target)_OPTIMIZING_RULES - run_test_options += --optimizing - else ifeq ($(4),regalloc_gc) - test_groups += ART_RUN_TEST_$$(uc_host_or_target)_OPTIMIZING_GRAPH_COLOR_RULES - run_test_options += --optimizing -Xcompiler-option --register-allocation-strategy=graph-color - else - ifeq ($(4),interpreter) - test_groups += ART_RUN_TEST_$$(uc_host_or_target)_INTERPRETER_RULES - run_test_options += --interpreter - else ifeq ($(4),interp-ac) - test_groups += ART_RUN_TEST_$$(uc_host_or_target)_INTERPRETER_ACCESS_CHECKS_RULES - run_test_options += --interpreter --verify-soft-fail - else - ifeq ($(4),jit) - test_groups += ART_RUN_TEST_$$(uc_host_or_target)_JIT_RULES - run_test_options += --jit - else - $$(error found $(4) expected $(COMPILER_TYPES)) - endif - endif - endif - - ifeq ($(5),relocate) - test_groups += ART_RUN_TEST_$$(uc_host_or_target)_RELOCATE_RULES - run_test_options += --relocate - else - ifeq ($(5),no-relocate) - test_groups += ART_RUN_TEST_$$(uc_host_or_target)_NO_RELOCATE_RULES - run_test_options += --no-relocate - else - ifeq ($(5),relocate-npatchoat) - test_groups += ART_RUN_TEST_$$(uc_host_or_target)_RELOCATE_NO_PATCHOAT_RULES - run_test_options += --relocate --no-patchoat - else - $$(error found $(5) expected $(RELOCATE_TYPES)) - endif - endif - endif - ifeq ($(6),trace) - test_groups += ART_RUN_TEST_$$(uc_host_or_target)_TRACE_RULES - run_test_options += --trace - else - ifeq ($(6),ntrace) - test_groups += ART_RUN_TEST_$$(uc_host_or_target)_NO_TRACE_RULES - else - ifeq ($(6),stream) - # Group streaming under normal tracing rules. - test_groups += ART_RUN_TEST_$$(uc_host_or_target)_TRACE_RULES - run_test_options += --trace --stream - else - $$(error found $(6) expected $(TRACE_TYPES)) - endif - endif - endif - ifeq ($(7),gcverify) - test_groups += ART_RUN_TEST_$$(uc_host_or_target)_GCVERIFY_RULES - run_test_options += --gcverify - else - ifeq ($(7),gcstress) - test_groups += ART_RUN_TEST_$$(uc_host_or_target)_GCSTRESS_RULES - run_test_options += --gcstress - else - ifeq ($(7),cms) - test_groups += ART_RUN_TEST_$$(uc_host_or_target)_CMS_RULES - else - $$(error found $(7) expected $(GC_TYPES)) - endif - endif - endif - ifeq ($(8),forcecopy) - test_groups += ART_RUN_TEST_$$(uc_host_or_target)_FORCECOPY_RULES - run_test_options += --runtime-option -Xjniopts:forcecopy - ifneq ($$(ART_TEST_JNI_FORCECOPY),true) - skip_test := true - endif - else - ifeq ($(8),checkjni) - test_groups += ART_RUN_TEST_$$(uc_host_or_target)_CHECKJNI_RULES - run_test_options += --runtime-option -Xcheck:jni - else - ifeq ($(8),jni) - test_groups += ART_RUN_TEST_$$(uc_host_or_target)_JNI_RULES - else - $$(error found $(8) expected $(JNI_TYPES)) - endif - endif - endif - image_suffix := $(4) - ifeq ($(4),regalloc_gc) - # Graph coloring tests share the image_suffix with optimizing tests. - image_suffix := optimizing - else - ifeq ($(4),jit) - # JIT tests share the image_suffix with interpreter tests. - image_suffix := interpreter - endif - endif - ifeq ($(9),no-image) - test_groups += ART_RUN_TEST_$$(uc_host_or_target)_NO_IMAGE_RULES - run_test_options += --no-image - # Add the core dependency. This is required for pre-building. - # Use the PIC image, as it is the default in run-test, to match dependencies. - ifeq ($(1),host) - prereq_rule += $$(HOST_CORE_IMAGE_$$(image_suffix)_$(13)) - else - prereq_rule += $$(TARGET_CORE_IMAGE_$$(image_suffix)_$(13)) - endif - else - ifeq ($(9),picimage) - test_groups += ART_RUN_TEST_$$(uc_host_or_target)_PICIMAGE_RULES - ifeq ($(1),host) - prereq_rule += $$(HOST_CORE_IMAGE_$$(image_suffix)_$(13)) - else - prereq_rule += $$(TARGET_CORE_IMAGE_$$(image_suffix)_$(13)) - endif - else - ifeq ($(9),multipicimage) - test_groups += ART_RUN_TEST_$$(uc_host_or_target)_PICIMAGE_RULES - run_test_options += --multi-image - ifeq ($(1),host) - prereq_rule += $$(HOST_CORE_IMAGE_$$(image_suffix)_multi_$(13)) - else - prereq_rule += $$(TARGET_CORE_IMAGE_$$(image_suffix)_multi_$(13)) - endif - else - $$(error found $(9) expected $(IMAGE_TYPES)) - endif - endif - endif - ifeq ($(10),pictest) - run_test_options += --pic-test - else - ifeq ($(10),npictest) - # Nothing to be done. - else - $$(error found $(10) expected $(PICTEST_TYPES)) - endif - endif - ifeq ($(11),debuggable) - test_groups += ART_RUN_TEST_$$(uc_host_or_target)_DEBUGGABLE_RULES - run_test_options += --debuggable - else - ifeq ($(11),ndebuggable) - test_groups += ART_RUN_TEST_$$(uc_host_or_target)_NONDEBUGGABLE_RULES - # Nothing to be done. - else - $$(error found $(11) expected $(DEBUGGABLE_TYPES)) - endif - endif - # $(12) is the test name. - test_groups += ART_RUN_TEST_$$(uc_host_or_target)_$(call name-to-var,$(12))_RULES - ifeq ($(13),64) - test_groups += ART_RUN_TEST_$$(uc_host_or_target)_64_RULES - run_test_options += --64 - else - ifeq ($(13),32) - test_groups += ART_RUN_TEST_$$(uc_host_or_target)_32_RULES - else - $$(error found $(13) expected $(ALL_ADDRESS_SIZES)) - endif - endif - # Override of host instruction-set-features. Required to test advanced x86 intrinsics. The - # conditionals aren't really correct, they will fail to do the right thing on a 32-bit only - # host. However, this isn't common enough to worry here and make the conditions complicated. - ifneq ($(DEX2OAT_HOST_INSTRUCTION_SET_FEATURES),) - ifeq ($(13),64) - run_test_options += --instruction-set-features $(DEX2OAT_HOST_INSTRUCTION_SET_FEATURES) - endif - endif - ifneq ($($(HOST_2ND_ARCH_VAR_PREFIX)DEX2OAT_HOST_INSTRUCTION_SET_FEATURES),) - ifeq ($(13),32) - run_test_options += --instruction-set-features $($(HOST_2ND_ARCH_VAR_PREFIX)DEX2OAT_HOST_INSTRUCTION_SET_FEATURES) - endif - endif - run_test_rule_name := test-art-$(1)-run-test-$(2)-$(3)-$(4)-$(5)-$(6)-$(7)-$(8)-$(9)-$(10)-$(11)-$(12)$(13) - run_test_options := --output-path $(ART_HOST_TEST_DIR)/run-test-output/$$(run_test_rule_name) \ - $$(run_test_options) - ifneq ($(ART_TEST_ANDROID_ROOT),) - run_test_options := --android-root $(ART_TEST_ANDROID_ROOT) $$(run_test_options) - endif - ifeq ($(ART_TEST_QUIET),true) - run_test_options += --quiet - endif -$$(run_test_rule_name): PRIVATE_RUN_TEST_OPTIONS := $$(run_test_options) -$$(run_test_rule_name): PRIVATE_JACK_CLASSPATH := $$(jack_classpath) -.PHONY: $$(run_test_rule_name) -$$(run_test_rule_name): $(TEST_ART_RUN_TEST_DEPENDENCIES) $(HOST_OUT_EXECUTABLES)/hprof-conv $$(prereq_rule) | $(TEST_ART_RUN_TEST_ORDERONLY_DEPENDENCIES) - $(hide) $$(call ART_TEST_SKIP,$$@) && \ - DX=$(abspath $(DX)) \ - JASMIN=$(abspath $(HOST_OUT_EXECUTABLES)/jasmin) \ - SMALI=$(abspath $(HOST_OUT_EXECUTABLES)/smali) \ - DXMERGER=$(abspath $(HOST_OUT_EXECUTABLES)/dexmerger) \ - JACK_VERSION=$(JACK_DEFAULT_VERSION) \ - JACK=$(abspath $(JACK)) \ - JACK_VERSION=$(JACK_DEFAULT_VERSION) \ - JACK_CLASSPATH=$$(PRIVATE_JACK_CLASSPATH) \ - art/test/run-test $$(PRIVATE_RUN_TEST_OPTIONS) $(12) \ - && $$(call ART_TEST_PASSED,$$@) || $$(call ART_TEST_FAILED,$$@) - $$(hide) (echo $(MAKECMDGOALS) | grep -q $$@ && \ - echo "run-test run as top-level target, removing test directory $(ART_HOST_TEST_DIR)" && \ - rm -r $(ART_HOST_TEST_DIR)) || true - - $$(foreach test_group,$$(test_groups), $$(eval $$(value test_group) += $$(run_test_rule_name))) - - # Clear locally defined variables. - uc_host_or_target := - test_groups := - run_test_options := - run_test_rule_name := - prereq_rule := - jack_classpath := -endef # define-test-art-run-test - +# Generate list of dependencies required for given target - HOST or TARGET, IMAGE_TYPE, +# COMPILER_TYPE and ADDRESS_SIZE. $(foreach target, $(TARGET_TYPES), \ - $(foreach test, $(TEST_ART_RUN_TESTS), \ - $(foreach run_type, $(RUN_TYPES), \ - $(foreach address_size, $(ADDRESS_SIZES_$(call name-to-var,$(target))), \ - $(foreach prebuild, $(PREBUILD_TYPES), \ - $(foreach compiler, $(COMPILER_TYPES), \ - $(foreach relocate, $(RELOCATE_TYPES), \ - $(foreach trace, $(TRACE_TYPES), \ - $(foreach gc, $(GC_TYPES), \ - $(foreach jni, $(JNI_TYPES), \ - $(foreach image, $(IMAGE_TYPES), \ - $(foreach pictest, $(PICTEST_TYPES), \ - $(foreach debuggable, $(DEBUGGABLE_TYPES), \ - $(eval $(call define-test-art-run-test,$(target),$(run_type),$(prebuild),$(compiler),$(relocate),$(trace),$(gc),$(jni),$(image),$(pictest),$(debuggable),$(test),$(address_size))) \ - ))))))))))))) -define-test-art-run-test := + $(foreach image, $(IMAGE_TYPES), \ + $(foreach compiler, $(COMPILER_TYPES), \ + $(foreach address_size, $(ALL_ADDRESS_SIZES), $(eval \ + $(call core-image-dependencies,$(target),$(image),$(compiler),$(address_size))))))) -# Define a phony rule whose purpose is to test its prerequisites. -# $(1): host or target -# $(2): list of prerequisites -define define-test-art-run-test-group -.PHONY: $(1) -$(1): $(2) - $(hide) $$(call ART_TEST_PREREQ_FINISHED,$$@) +test-art-host-run-test-dependencies : $(host_prereq_rules) +test-art-target-run-test-dependencies : $(target_prereq_rules) +test-art-run-test-dependencies : test-art-host-run-test-dependencies test-art-target-run-test-dependencies -endef # define-test-art-run-test-group +# Create a rule to build and run a test group of the following form: +# test-art-{1: host target}-run-test +define define-test-art-host-or-target-run-test-group + build_target := test-art-$(1)-run-test + .PHONY: $$(build_target) + $$(build_target) : args := --$(1) --verbose + $$(build_target) : test-art-$(1)-run-test-dependencies + ./art/test/testrunner/testrunner.py $$(args) + build_target := + args := +endef # define-test-art-host-or-target-run-test-group $(foreach target, $(TARGET_TYPES), $(eval \ - $(call define-test-art-run-test-group,test-art-$(target)-run-test,$(ART_RUN_TEST_$(call name-to-var,$(target))_RULES)))) -$(foreach target, $(TARGET_TYPES), \ - $(foreach prebuild, $(PREBUILD_TYPES), $(eval \ - $(call define-test-art-run-test-group,test-art-$(target)-run-test-$(prebuild),$(ART_RUN_TEST_$(call name-to-var,$(target))_$(call name-to-var,$(prebuild))_RULES))))) -$(foreach target, $(TARGET_TYPES), \ - $(foreach run-type, $(RUN_TYPES), $(eval \ - $(call define-test-art-run-test-group,test-art-$(target)-run-test-$(run-type),$(ART_RUN_TEST_$(call name-to-var,$(target))_$(call name-to-var,$(run-type))_RULES))))) -$(foreach target, $(TARGET_TYPES), \ - $(foreach compiler, $(COMPILER_TYPES), $(eval \ - $(call define-test-art-run-test-group,test-art-$(target)-run-test-$(compiler),$(ART_RUN_TEST_$(call name-to-var,$(target))_$(call name-to-var,$(compiler))_RULES))))) -$(foreach target, $(TARGET_TYPES), \ - $(foreach relocate, $(RELOCATE_TYPES), $(eval \ - $(call define-test-art-run-test-group,test-art-$(target)-run-test-$(relocate),$(ART_RUN_TEST_$(call name-to-var,$(target))_$(call name-to-var,$(relocate))_RULES))))) -$(foreach target, $(TARGET_TYPES), \ - $(foreach trace, $(TRACE_TYPES), $(eval \ - $(call define-test-art-run-test-group,test-art-$(target)-run-test-$(trace),$(ART_RUN_TEST_$(call name-to-var,$(target))_$(call name-to-var,$(trace))_RULES))))) -$(foreach target, $(TARGET_TYPES), \ - $(foreach gc, $(GC_TYPES), $(eval \ - $(call define-test-art-run-test-group,test-art-$(target)-run-test-$(gc),$(ART_RUN_TEST_$(call name-to-var,$(target))_$(call name-to-var,$(gc))_RULES))))) -$(foreach target, $(TARGET_TYPES), \ - $(foreach jni, $(JNI_TYPES), $(eval \ - $(call define-test-art-run-test-group,test-art-$(target)-run-test-$(jni),$(ART_RUN_TEST_$(call name-to-var,$(target))_$(call name-to-var,$(jni))_RULES))))) -$(foreach target, $(TARGET_TYPES), \ - $(foreach debuggable, $(DEBUGGABLE_TYPES), $(eval \ - $(call define-test-art-run-test-group,test-art-$(target)-run-test-$(debuggable),$(ART_RUN_TEST_$(call name-to-var,$(target))_$(call name-to-var,$(debuggable))_RULES))))) -$(foreach target, $(TARGET_TYPES), \ - $(foreach image, $(IMAGE_TYPES), $(eval \ - $(call define-test-art-run-test-group,test-art-$(target)-run-test-$(image),$(ART_RUN_TEST_$(call name-to-var,$(target))_$(call name-to-var,$(image))_RULES))))) -$(foreach target, $(TARGET_TYPES), \ - $(foreach test, $(TEST_ART_RUN_TESTS), $(eval \ - $(call define-test-art-run-test-group,test-art-$(target)-run-test-$(test),$(ART_RUN_TEST_$(call name-to-var,$(target))_$(call name-to-var,$(test))_RULES))))) -$(foreach target, $(TARGET_TYPES), \ - $(foreach address_size, $(ADDRESS_SIZES_$(call name-to-var,$(target))), $(eval \ - $(call define-test-art-run-test-group,test-art-$(target)-run-test$(address_size),$(ART_RUN_TEST_$(call name-to-var,$(target))_$(address_size)_RULES))))) + $(call define-test-art-host-or-target-run-test-group,$(target)))) -# Clear variables now we're finished with them. -$(foreach target, $(TARGET_TYPES), $(eval ART_RUN_TEST_$(call name-to-var,$(target))_RULES :=)) -$(foreach target, $(TARGET_TYPES), \ - $(foreach prebuild, $(PREBUILD_TYPES), \ - $(eval ART_RUN_TEST_$(call name-to-var,$(target))_$(call name-to-var,$(prebuild))_RULES :=))) -$(foreach target, $(TARGET_TYPES), \ - $(foreach compiler, $(COMPILER_TYPES), \ - $(eval ART_RUN_TEST_$(call name-to-var,$(target))_$(call name-to-var,$(compiler))_RULES :=))) -$(foreach target, $(TARGET_TYPES), \ - $(foreach relocate, $(RELOCATE_TYPES), \ - $(eval ART_RUN_TEST_$(call name-to-var,$(target))_$(call name-to-var,$(relocate))_RULES :=))) -$(foreach target, $(TARGET_TYPES), \ - $(foreach trace, $(TRACE_TYPES), \ - $(eval ART_RUN_TEST_$(call name-to-var,$(target))_$(call name-to-var,$(trace))_RULES :=))) -$(foreach target, $(TARGET_TYPES), \ - $(foreach gc, $(GC_TYPES), \ - $(eval ART_RUN_TEST_$(call name-to-var,$(target))_$(call name-to-var,$(gc))_RULES :=))) -$(foreach target, $(TARGET_TYPES), \ - $(foreach jni, $(JNI_TYPES), \ - $(eval ART_RUN_TEST_$(call name-to-var,$(target))_$(call name-to-var,$(jni))_RULES :=))) -$(foreach target, $(TARGET_TYPES), \ - $(foreach debuggable, $(DEBUGGABLE_TYPES), \ - $(eval ART_RUN_TEST_$(call name-to-var,$(target))_$(call name-to-var,$(debuggable))_RULES :=))) -$(foreach target, $(TARGET_TYPES), \ - $(foreach image, $(IMAGE_TYPES), \ - $(eval ART_RUN_TEST_$(call name-to-var,$(target))_$(call name-to-var,$(image))_RULES :=))) -$(foreach target, $(TARGET_TYPES), \ - $(foreach test, $(TEST_ART_RUN_TESTS), \ - $(eval ART_RUN_TEST_$(call name-to-var,$(target))_$(call name-to-var,$(test))_RULES :=))) -$(foreach target, $(TARGET_TYPES), \ - $(foreach address_size, $(ALL_ADDRESS_SIZES), \ - $(eval ART_RUN_TEST_$(call name-to-var,$(target))_$(call name-to-var,$(address_size))_RULES :=))) -$(foreach target, $(TARGET_TYPES), \ - $(foreach run_type, $(RUN_TYPES), \ - $(eval ART_RUN_TEST_$(call name-to-var,$(target))_$(call name-to-var,$(run_type))_RULES :=))) -define-test-art-run-test-group := +test-art-run-test : test-art-host-run-test test-art-target-run-test + +host_prereq_rules := +target_prereq_rules := +core-image-dependencies := +name-to-var := +ART_TEST_HOST_RUN_TEST_DEPENDENCIES := +TEST_ART_TARGET_SYNC_DEPS := +define-test-art-host-or-target-run-test-group := TARGET_TYPES := -PREBUILD_TYPES := COMPILER_TYPES := -RELOCATE_TYPES := -TRACE_TYPES := -GC_TYPES := -JNI_TYPES := IMAGE_TYPES := -ADDRESS_SIZES_TARGET := -ADDRESS_SIZES_HOST := ALL_ADDRESS_SIZES := -RUN_TYPES := -DEBUGGABLE_TYPES := - LOCAL_PATH := diff --git a/test/DefaultMethods/IterableBase.java b/test/DefaultMethods/IterableBase.java new file mode 100644 index 0000000000..4cefdefb67 --- /dev/null +++ b/test/DefaultMethods/IterableBase.java @@ -0,0 +1,27 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +interface Iface { + default void defaultMethod() { + } +} + +class Impl implements Iface { +} + +abstract class IterableBase implements Iterable { +} + diff --git a/test/knownfailures.json b/test/knownfailures.json index d540a792fd..2de34ca44f 100644 --- a/test/knownfailures.json +++ b/test/knownfailures.json @@ -339,14 +339,28 @@ { "tests": ["476-clinit-inline-static-invoke", "496-checker-inlining-class-loader", + "508-referrer-method", "637-checker-throw-inline"], "bug": "http://b/36365552", "variant": "no-image & jit" }, { + "tests": ["597-deopt-new-string"], + "bug": "http://b/36467228", + "variant": "no-image & jit" + }, + { "tests": ["530-checker-lse", + "530-checker-lse2", + "030-bad-finalizer", "080-oom-throw"], "bug": "http://b/36377828", "variant": "interp-ac" + }, + { + "tests": "638-checker-inline-caches", + "description": ["Disable 638-checker-inline-caches temporarily until a fix", + "arrives."], + "bug": "http://b/36371709" } ] diff --git a/test/testrunner/env.py b/test/testrunner/env.py index ed4b4a9f3e..e93fb3afa8 100644 --- a/test/testrunner/env.py +++ b/test/testrunner/env.py @@ -1,5 +1,3 @@ -#!/usr/bin/env python -# # Copyright 2017, The Android Open Source Project # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -57,7 +55,9 @@ def dump_many_vars(var_name): "make --no-print-directory -C \"%s\" -f build/core/config.mk " "dump-many-vars DUMP_MANY_VARS=\"%s\"") % (ANDROID_BUILD_TOP, all_vars) - config = subprocess.Popen(command, stdout=subprocess.PIPE, + config = subprocess.Popen(command, + stdout=subprocess.PIPE, + universal_newlines=True, shell=True).communicate()[0] # read until EOF, select stdin # Prints out something like: # TARGET_ARCH='arm64' diff --git a/test/testrunner/run_build_test_target.py b/test/testrunner/run_build_test_target.py index 4c519ae7f7..835b678cd6 100755 --- a/test/testrunner/run_build_test_target.py +++ b/test/testrunner/run_build_test_target.py @@ -46,7 +46,6 @@ custom_env['SOONG_ALLOW_MISSING_DEPENDENCIES'] = 'true' print custom_env os.environ.update(custom_env) - if target.get('target'): build_command = 'make' build_command += ' -j' + str(n_threads) @@ -56,7 +55,7 @@ if target.get('target'): if subprocess.call(build_command.split()): sys.exit(1) -else: +if target.get('run-tests'): run_test_command = [os.path.join(env.ANDROID_BUILD_TOP, 'art/test/testrunner/testrunner.py')] run_test_command += target.get('flags', []) diff --git a/test/testrunner/target_config.py b/test/testrunner/target_config.py index 1af2ae7a63..5a6ecffd44 100644 --- a/test/testrunner/target_config.py +++ b/test/testrunner/target_config.py @@ -1,29 +1,35 @@ target_config = { 'art-test' : { + 'target' : 'test-art-host-gtest', + 'run-tests' : True, 'flags' : [], 'env' : { 'ART_USE_READ_BARRIER' : 'false' } }, 'art-interpreter' : { + 'run-tests' : True, 'flags' : ['--interpreter'], 'env' : { 'ART_USE_READ_BARRIER' : 'false' } }, 'art-interpreter-access-checks' : { + 'run-tests' : True, 'flags' : ['--interp-ac'], 'env' : { 'ART_USE_READ_BARRIER' : 'false' } }, 'art-jit' : { + 'run-tests' : True, 'flags' : ['--jit'], 'env' : { 'ART_USE_READ_BARRIER' : 'false' } }, 'art-gcstress-gcverify': { + 'run-tests' : True, 'flags' : ['--gcstress', '--gcverify'], 'env' : { @@ -32,6 +38,7 @@ target_config = { } }, 'art-interpreter-gcstress' : { + 'run-tests' : True, 'flags': ['--interpreter', '--gcstress'], 'env' : { @@ -40,6 +47,7 @@ target_config = { } }, 'art-optimizing-gcstress' : { + 'run-tests' : True, 'flags': ['--gcstress', '--optimizing'], 'env' : { @@ -48,6 +56,7 @@ target_config = { } }, 'art-jit-gcstress' : { + 'run-tests' : True, 'flags': ['--jit', '--gcstress'], 'env' : { @@ -56,6 +65,7 @@ target_config = { } }, 'art-read-barrier' : { + 'run-tests' : True, 'flags': ['--interpreter', '--optimizing'], 'env' : { @@ -64,6 +74,7 @@ target_config = { } }, 'art-read-barrier-gcstress' : { + 'run-tests' : True, 'flags' : ['--interpreter', '--optimizing', '--gcstress'], @@ -73,6 +84,7 @@ target_config = { } }, 'art-read-barrier-table-lookup' : { + 'run-tests' : True, 'flags' : ['--interpreter', '--optimizing'], 'env' : { @@ -82,6 +94,7 @@ target_config = { } }, 'art-debug-gc' : { + 'run-tests' : True, 'flags' : ['--interpreter', '--optimizing'], 'env' : { @@ -90,6 +103,7 @@ target_config = { } }, 'art-ss-gc' : { + 'run-tests' : True, 'flags' : ['--interpreter', '--optimizing', '--jit'], @@ -99,6 +113,7 @@ target_config = { } }, 'art-gss-gc' : { + 'run-tests' : True, 'flags' : ['--interpreter', '--optimizing', '--jit'], @@ -108,6 +123,7 @@ target_config = { } }, 'art-ss-gc-tlab' : { + 'run-tests' : True, 'flags' : ['--interpreter', '--optimizing', '--jit'], @@ -118,6 +134,7 @@ target_config = { } }, 'art-gss-gc-tlab' : { + 'run-tests' : True, 'flags' : ['--interpreter', '--optimizing', '--jit'], @@ -128,12 +145,14 @@ target_config = { } }, 'art-tracing' : { + 'run-tests' : True, 'flags' : ['--trace'], 'env' : { 'ART_USE_READ_BARRIER' : 'false' } }, 'art-interpreter-tracing' : { + 'run-tests' : True, 'flags' : ['--interpreter', '--trace'], 'env' : { @@ -141,24 +160,28 @@ target_config = { } }, 'art-forcecopy' : { + 'run-tests' : True, 'flags' : ['--forcecopy'], 'env' : { 'ART_USE_READ_BARRIER' : 'false', } }, 'art-no-prebuild' : { + 'run-tests' : True, 'flags' : ['--no-prebuild'], 'env' : { 'ART_USE_READ_BARRIER' : 'false', } }, 'art-no-image' : { + 'run-tests' : True, 'flags' : ['--no-image'], 'env' : { 'ART_USE_READ_BARRIER' : 'false', } }, 'art-interpreter-no-image' : { + 'run-tests' : True, 'flags' : ['--interpreter', '--no-image'], 'env' : { @@ -166,18 +189,21 @@ target_config = { } }, 'art-relocate-no-patchoat' : { + 'run-tests' : True, 'flags' : ['--relocate-npatchoat'], 'env' : { 'ART_USE_READ_BARRIER' : 'false', } }, 'art-no-dex2oat' : { + 'run-tests' : True, 'flags' : ['--no-dex2oat'], 'env' : { 'ART_USE_READ_BARRIER' : 'false', } }, 'art-heap-poisoning' : { + 'run-tests' : True, 'flags' : ['--interpreter', '--optimizing'], 'env' : { diff --git a/test/testrunner/testrunner.py b/test/testrunner/testrunner.py index a5504098d8..3203f7ad84 100755 --- a/test/testrunner/testrunner.py +++ b/test/testrunner/testrunner.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # # Copyright 2017, The Android Open Source Project # @@ -48,6 +48,7 @@ import argparse import fnmatch import itertools import json +import multiprocessing import os import re import subprocess @@ -72,6 +73,9 @@ DEBUGGABLE_TYPES = set() ADDRESS_SIZES = set() OPTIMIZING_COMPILER_TYPES = set() ADDRESS_SIZES_TARGET = {'host': set(), 'target': set()} +# timeout for individual tests. +# TODO: make it adjustable per tests and for buildbots +timeout = 3000 # 50 minutes # DISABLED_TEST_CONTAINER holds information about the disabled tests. It is a map # that has key as the test name (like 001-HelloWorld), and value as set of @@ -112,7 +116,7 @@ failed_tests = [] skipped_tests = [] # Flags -n_thread = 1 +n_thread = -1 test_count = 0 total_test_count = 0 verbose = False @@ -254,9 +258,26 @@ def setup_test_env(): ADDRESS_SIZES_TARGET['host'] = ADDRESS_SIZES_TARGET['host'].union(ADDRESS_SIZES) ADDRESS_SIZES_TARGET['target'] = ADDRESS_SIZES_TARGET['target'].union(ADDRESS_SIZES) + global n_thread + if n_thread is -1: + if 'target' in TARGET_TYPES: + n_thread = get_default_threads('target') + else: + n_thread = get_default_threads('host') + global semaphore semaphore = threading.Semaphore(n_thread) + if not sys.stdout.isatty(): + global COLOR_ERROR + global COLOR_PASS + global COLOR_SKIP + global COLOR_NORMAL + COLOR_ERROR = '' + COLOR_PASS = '' + COLOR_SKIP = '' + COLOR_NORMAL = '' + def run_tests(tests): """Creates thread workers to run the tests. @@ -451,8 +472,8 @@ def run_test(command, test, test_variant, test_name): test_skipped = True else: test_skipped = False - proc = subprocess.Popen(command.split(), stderr=subprocess.STDOUT, stdout=subprocess.PIPE) - script_output = proc.stdout.read().strip() + proc = subprocess.Popen(command.split(), stderr=subprocess.STDOUT, stdout=subprocess.PIPE, universal_newlines=True) + script_output = proc.communicate(timeout=timeout)[0] test_passed = not proc.wait() if not test_skipped: @@ -469,9 +490,14 @@ def run_test(command, test, test_variant, test_name): skipped_tests.append(test_name) else: print_test_info(test_name, '') - except Exception, e: + except subprocess.TimeoutExpired as e: + failed_tests.append(test_name) + print_test_info(test_name, 'TIMEOUT', 'timed out in %d\n%s' % ( + timeout, command)) + except Exception as e: failed_tests.append(test_name) - print_text(('%s\n%s\n') % (command, str(e))) + print_test_info(test_name, 'FAIL') + print_text(('%s\n%s\n\n') % (command, str(e))) finally: semaphore.release() @@ -508,11 +534,11 @@ def print_test_info(test_name, result, failed_test_info=""): test_count, total_test_count) - if result == "FAIL": + if result == 'FAIL' or result == 'TIMEOUT': info += ('%s %s %s\n%s\n') % ( progress_info, test_name, - COLOR_ERROR + 'FAIL' + COLOR_NORMAL, + COLOR_ERROR + result + COLOR_NORMAL, failed_test_info) else: result_text = '' @@ -533,15 +559,14 @@ def print_test_info(test_name, result, failed_test_info=""): allowed_test_length = console_width - total_output_length test_name_len = len(test_name) if allowed_test_length < test_name_len: - test_name = ('%s...%s') % ( - test_name[:(allowed_test_length - 3)/2], - test_name[-(allowed_test_length - 3)/2:]) + test_name = ('...%s') % ( + test_name[-(allowed_test_length - 3):]) info += ('%s %s %s') % ( progress_info, test_name, result_text) print_text(info) - except Exception, e: + except Exception as e: print_text(('%s\n%s\n') % (test_name, str(e))) failed_tests.append(test_name) finally: @@ -549,10 +574,10 @@ def print_test_info(test_name, result, failed_test_info=""): def verify_knownfailure_entry(entry): supported_field = { - 'tests' : (list, unicode), - 'description' : (list, unicode), - 'bug' : (unicode,), - 'variant' : (unicode,), + 'tests' : (list, str), + 'description' : (list, str), + 'bug' : (str,), + 'variant' : (str,), 'env_vars' : (dict,), } for field in entry: @@ -581,7 +606,7 @@ def get_disabled_test_info(): for failure in known_failures_info: verify_knownfailure_entry(failure) tests = failure.get('tests', []) - if isinstance(tests, unicode): + if isinstance(tests, str): tests = [tests] variants = parse_variants(failure.get('variant')) env_vars = failure.get('env_vars') @@ -767,6 +792,15 @@ def setup_env_for_build_target(build_target, parser, options): return target_options +def get_default_threads(target): + if target is 'target': + adb_command = 'adb shell cat /sys/devices/system/cpu/present' + cpu_info_proc = subprocess.Popen(adb_command.split(), stdout=subprocess.PIPE) + cpu_info = cpu_info_proc.stdout.read() + return int(cpu_info.split('-')[1]) + else: + return multiprocessing.cpu_count() + def parse_option(): global verbose global dry_run @@ -774,10 +808,12 @@ def parse_option(): global build global gdb global gdb_arg + global timeout parser = argparse.ArgumentParser(description="Runs all or a subset of the ART test suite.") parser.add_argument('-t', '--test', dest='test', help='name of the test') parser.add_argument('-j', type=int, dest='n_thread') + parser.add_argument('--timeout', default=timeout, type=int, dest='timeout') for variant in TOTAL_VARIANTS_SET: flag = '--' + variant flag_dest = variant.replace('-', '_') @@ -885,6 +921,7 @@ def parse_option(): gdb = True if options['gdb_arg']: gdb_arg = options['gdb_arg'] + timeout = options['timeout'] return test @@ -899,9 +936,11 @@ def main(): if 'target' in TARGET_TYPES: build_targets += 'test-art-target-run-test-dependencies' build_command = 'make' - build_command += ' -j' + str(n_thread) + build_command += ' -j' build_command += ' -C ' + env.ANDROID_BUILD_TOP build_command += ' ' + build_targets + # Add 'dist' to avoid Jack issues b/36169180. + build_command += ' dist' if subprocess.call(build_command.split()): sys.exit(1) if user_requested_test: @@ -914,7 +953,7 @@ def main(): while threading.active_count() > 1: time.sleep(0.1) print_analysis() - except Exception, e: + except Exception as e: print_analysis() print_text(str(e)) sys.exit(1) diff --git a/tools/golem/build-target.sh b/tools/golem/build-target.sh new file mode 100755 index 0000000000..8d8e2bbe6f --- /dev/null +++ b/tools/golem/build-target.sh @@ -0,0 +1,384 @@ +#!/bin/bash +# +# Copyright (C) 2017 The Android Open Source Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +if [[ ! -d art ]]; then + echo "Script needs to be run at the root of the android tree" + exit 1 +fi + +ALL_CONFIGS=(linux-ia32 linux-x64 linux-armv8 linux-armv7 android-armv8 android-armv7) + +usage() { + local config + local golem_target + + (cat << EOF + Usage: $(basename "${BASH_SOURCE[0]}") [--golem=<target>] --machine-type=MACHINE_TYPE + [--tarball[=<target>.tar.gz]] + + Build minimal art binaries required to run golem benchmarks either + locally or on the golem servers. + + Creates the \$MACHINE_TYPE binaries in your \$OUT_DIR, and if --tarball was specified, + it also tars the results of the build together into your <target.tar.gz> file. + -------------------------------------------------------- + Required Flags: + --machine-type=MT Specify the machine type that will be built. + + Optional Flags": + --golem=<target> Builds with identical commands that Golem servers use. + --tarball[=o.tgz] Tar/gz the results. File name defaults to <machine_type>.tar.gz + -j<num> Specify how many jobs to use for parallelism. + --help Print this help listing. + --showcommands Show commands as they are being executed. + --simulate Print commands only, don't execute commands. +EOF + ) | sed -e 's/^[[:space:]][[:space:]]//g' >&2 # Strip leading whitespace from heredoc. + + echo >&2 "Available machine types:" + for config in "${ALL_CONFIGS[@]}"; do + echo >&2 " $config" + done + + echo >&2 + echo >&2 "Available Golem targets:" + while IFS='' read -r golem_target; do + echo >&2 " $golem_target" + done < <("$(thisdir)/env" --list-targets) +} + +# Check if $1 element is in array $2 +contains_element() { + local e + for e in "${@:2}"; do [[ "$e" == "$1" ]] && return 0; done + return 1 +} + +# Display a command, but don't execute it, if --showcommands was set. +show_command() { + if [[ $showcommands == "showcommands" ]]; then + echo "$@" + fi +} + +# Execute a command, displaying it if --showcommands was set. +# If --simulate is used, command is not executed. +execute() { + show_command "$@" + execute_noshow "$@" +} + +# Execute a command unless --simulate was used. +execute_noshow() { + if [[ $simulate == "simulate" ]]; then + return 0 + fi + + local prog="$1" + shift + "$prog" "$@" +} + +# Export environment variable, echoing it to screen. +setenv() { + local name="$1" + local value="$2" + + export $name="$value" + echo export $name="$value" +} + +# Export environment variable, echoing $3 to screen ($3 is meant to be unevaluated). +setenv_escape() { + local name="$1" + local value="$2" + local escaped_value="$3" + + export $name="$value" + echo export $name="$escaped_value" +} + +log_usage_error() { + echo >&2 "ERROR: " "$@" + echo >&2 " See --help for the correct usage information." + exit 1 +} + +log_fatal() { + echo >&2 "FATAL: " "$@" + exit 2 +} + +# Get the directory of this script. +thisdir() { + (\cd "$(dirname "${BASH_SOURCE[0]}")" && pwd ) +} + +# Get the path to the top of the Android source tree. +gettop() { + if [[ "x$ANDROID_BUILD_TOP" != "x" ]]; then + echo "$ANDROID_BUILD_TOP"; + else + echo "$(thisdir)/../../.." + fi +} + +# Get a build variable from the Android build system. +get_build_var() { + local varname="$1" + + # include the desired target product/build-variant + # which won't be set in our env if neither we nor the user first executed + # source build/envsetup.sh (e.g. if simulating from a fresh shell). + local extras + [[ -n $target_product ]] && extras+=" TARGET_PRODUCT=$target_product" + [[ -n $target_build_variant ]] && extras+=" TARGET_BUILD_VARIANT=$target_build_variant" + + # call dumpvar-$name from the makefile system. + (\cd "$(gettop)"; + CALLED_FROM_SETUP=true BUILD_SYSTEM=build/core \ + command make --no-print-directory -f build/core/config.mk \ + $extras \ + dumpvar-$varname) +} + +# Defaults from command-line. + +mode="" # blank or 'golem' if --golem was specified. +golem_target="" # --golem=$golem_target +config="" # --machine-type=$config +j_arg="-j8" +showcommands="" +simulate="" +make_tarball="" +tarball="" + +# Parse command line arguments + +while [[ "$1" != "" ]]; do + case "$1" in + --help) + usage + exit 1 + ;; + --golem=*) + mode="golem" + golem_target="${1##--golem=}" + + if [[ "x$golem_target" == x ]]; then + log_usage_error "Missing --golem target type." + fi + + shift + ;; + --machine-type=*) + config="${1##--machine-type=}" + if ! contains_element "$config" "${ALL_CONFIGS[@]}"; then + log_usage_error "Invalid --machine-type value '$config'" + fi + shift + ;; + --tarball) + tarball="" # reuse the machine type name. + make_tarball="make_tarball" + shift + ;; + --tarball=*) + tarball="${1##--tarball=}" + make_tarball="make_tarball" + shift + ;; + -j*) + j_arg="$1" + shift + ;; + --showcommands) + showcommands="showcommands" + shift + ;; + --simulate) + simulate="simulate" + shift + ;; + *) + log_usage_error "Unknown options $1" + ;; + esac +done + +################################### +################################### +################################### + +if [[ -z $config ]]; then + log_usage_error "--machine-type option is required." +fi + +# --tarball defaults to the --machine-type value with .tar.gz. +tarball="${tarball:-$config.tar.gz}" + +target_product="$TARGET_PRODUCT" +target_build_variant="$TARGET_BUILD_VARIANT" + +# If not using --golem, use whatever the user had lunch'd prior to this script. +if [[ $mode == "golem" ]]; then + # This section is intended solely to be executed by a golem build server. + + target_build_variant=eng + case "$config" in + *-armv7) + target_product="arm_krait" + ;; + *-armv8) + target_product="armv8" + ;; + *) + target_product="sdk" + ;; + esac + + if [[ $target_product = arm* ]]; then + # If using the regular manifest, e.g. 'master' + # The lunch command for arm will assuredly fail because we don't have device/generic/art. + # + # Print a human-readable error message instead of trying to lunch and failing there. + if ! [[ -d "$(gettop)/device/generic/art" ]]; then + log_fatal "Missing device/generic/art directory. Perhaps try master-art repo manifest?\n" \ + " Cannot build ARM targets (arm_krait, armv8) for Golem." >&2 + fi + # We could try to keep on simulating but it seems brittle because we won't have the proper + # build variables to output the right strings. + fi + + # Get this particular target's environment variables (e.g. ART read barrier on/off). + source "$(thisdir)"/env "$golem_target" || exit 1 + + lunch_target="$target_product-$target_build_variant" + + execute 'source' build/envsetup.sh + # Build generic targets (as opposed to something specific like aosp_angler-eng). + execute lunch "$lunch_target" + setenv JACK_SERVER false + setenv_escape JACK_REPOSITORY "$PWD/prebuilts/sdk/tools/jacks" '$PWD/prebuilts/sdk/tools/jacks' + # Golem uses master-art repository which is missing a lot of other libraries. + setenv SOONG_ALLOW_MISSING_DEPENDENCIES true + # Golem may be missing tools such as javac from its path. + setenv_escape PATH "/usr/lib/jvm/java-8-openjdk-amd64/bin/:$PATH" '/usr/lib/jvm/java-8-openjdk-amd64/bin/:$PATH' +else + # Look up the default variables from the build system if they weren't set already. + [[ -z $target_product ]] && target_product="$(get_build_var TARGET_PRODUCT)" + [[ -z $target_build_variant ]] && target_build_variant="$(get_build_var TARGET_BUILD_VARIANT)" +fi + +# Defaults for all machine types. +make_target="build-art-target-golem" +out_dir="out/x86_64" +root_dir_var="PRODUCT_OUT" +strip_symbols=false +bit64_suffix="" +tar_directories=(system data/art-test) + +# Per-machine type overrides +if [[ $config == linux-arm* ]]; then + setenv ART_TARGET_LINUX true +fi + +case "$config" in + linux-ia32|linux-x64) + root_dir_var="HOST_OUT" + # Android strips target builds automatically, but not host builds. + strip_symbols=true + make_target="build-art-host-golem" + + if [[ $config == linux-ia32 ]]; then + out_dir="out/x86" + setenv HOST_PREFER_32_BIT true + else + bit64_suffix="64" + fi + + tar_directories=(bin framework usr lib${bit64_suffix}) + ;; + *-armv8) + bit64_suffix="64" + ;; + *-armv7) + ;; + *) + log_fatal "Unsupported machine-type '$config'" +esac + +# Golem benchmark run commands expect a certain $OUT_DIR to be set, +# so specify it here. +# +# Note: It is questionable if we want to customize this since users +# could alternatively probably use their own build directly (and forgo this script). +setenv OUT_DIR "$out_dir" +root_dir="$(get_build_var "$root_dir_var")" + +if [[ $mode == "golem" ]]; then + # For golem-style running only. + # Sets the DT_INTERP to this path in every .so we can run the + # non-system version of dalvikvm with our own copies of the dependencies (e.g. our own libc++). + if [[ $config == android-* ]]; then + # TODO: the linker can be relative to the binaries + # (which is what we do for linux-armv8 and linux-armv7) + golem_run_path="/data/local/tmp/runner/" + else + golem_run_path="" + fi + + # Only do this for target builds. Host doesn't need this. + if [[ $config == *-arm* ]]; then + setenv CUSTOM_TARGET_LINKER "${golem_run_path}${root_dir}/system/bin/linker${bit64_suffix}" + fi +fi + +# +# Main command execution below here. +# (everything prior to this just sets up environment variables, +# and maybe calls lunch). +# + +execute make "${j_arg}" "${make_target}" + +if $strip_symbols; then + # Further reduce size by stripping symbols. + execute_noshow strip $root_dir/bin/* || true + show_command strip $root_dir/bin/'*' '|| true' + execute_noshow strip $root_dir/lib${bit64_suffix}/'*' + show_command strip $root_dir/lib${bit64_suffix}/'*' +fi + +if [[ "$make_tarball" == "make_tarball" ]]; then + # Create a tarball which is required for the golem build resource. + # (In particular, each golem benchmark's run commands depend on a list of resource files + # in order to have all the files it needs to actually execute, + # and this tarball would satisfy that particular target+machine-type's requirements). + dirs_rooted=() + for tar_dir in "${tar_directories[@]}"; do + dirs_rooted+=("$root_dir/$tar_dir") + done + + execute tar -czf "${tarball}" "${dirs_rooted[@]}" --exclude .git --exclude .gitignore + tar_result=$? + if [[ $tar_result -ne 0 ]]; then + [[ -f $tarball ]] && rm $tarball + fi + + show_command '[[ $? -ne 0 ]] && rm' "$tarball" +fi + diff --git a/tools/golem/env b/tools/golem/env new file mode 100755 index 0000000000..187ba3a01f --- /dev/null +++ b/tools/golem/env @@ -0,0 +1,117 @@ +#!/bin/bash +# +# Copyright (C) 2017 The Android Open Source Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# +# Export some environment variables used by ART's Android.mk/Android.bp +# build systems to configure ART [to use a different implementation]. +# +# Currently only varies on ART_USE_READ_BARRIER for a concurrent/non-concurrent +# flavor of the ART garbage collector. +# +# Only meant for golem use since when building ART directly, one can/should set +# these environment flags themselves. +# +# These environment flags are not really meant here to be for "correctness", +# but rather telling the ART C++ to use alternative algorithms. +# In other words, the same exact binary build with a different "target" +# should run in the same context (e.g. it does not change arch or the OS it's built for). +# + +setenv() { + local name="$1" + local value="$2" + + export $name="$value" + echo export $name="$value" +} + +# Enforce specified target-name is one of these. +# Perhaps we should be less strict? +ALL_TARGETS=(art-interpreter art-opt art-jit art-jit-cc art-opt-cc art-opt-debuggable art-vdex) + +usage() { + echo >&2 "Usage: $(basename $0) (--list-targets | <target-name>)" + echo >&2 + echo >&2 "Exports the necessary ART environment variables" + echo >&2 "to pass to the Golem build to correctly configure ART." + echo >&2 "--------------------------------------------------------" + echo >&2 "Required Arguments:" + echo >&2 " <target-name> Specify the golem target to get environment variables for." + echo >&2 + echo >&2 "Optional Flags": + echo >&2 " --list-targets Display all the targets. Do not require the main target-name." + echo >&2 " --help Print this help listing." + echo >&2 + echo >&2 "Available Targets:" + + list_targets 2 " " +} + +list_targets() { + local out_fd="${1:-1}" # defaults to 1 if no param was set + local prefix="$2" + + for target in "${ALL_TARGETS[@]}"; do + echo >&$out_fd "${prefix}${target}" + done +} + + +# Check if $1 element is in array $2 +contains_element() { + local e + for e in "${@:2}"; do [[ "$e" == "$1" ]] && return 0; done + return 1 +} + +main() { + if [[ $# -lt 1 ]]; then + usage + exit 1 + fi + + if [[ "$1" == "--help" ]]; then + usage + exit 1 + fi + + if [[ "$1" == "--list-targets" ]]; then + list_targets + exit 0 + fi + + local selected_target="$1" + if ! contains_element "$selected_target" "${ALL_TARGETS[@]}"; then + echo "ERROR: Invalid target value '$selected_target'" >&2 + exit 1 + fi + + case "$selected_target" in + *-cc) + setenv ART_USE_READ_BARRIER true + ;; + *) + setenv ART_USE_READ_BARRIER false + ;; + esac + + # Make smaller .tar.gz files by excluding debug targets. + setenv ART_BUILD_TARGET_DEBUG false + setenv ART_BUILD_HOST_DEBUG false + setenv USE_DEX2OAT_DEBUG false +} + +main "$@" |