diff options
Diffstat (limited to 'compiler')
| -rw-r--r-- | compiler/image_test.cc | 91 | ||||
| -rw-r--r-- | compiler/oat_writer.cc | 98 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_arm.cc | 78 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_arm.h | 2 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_arm_vixl.cc | 72 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_x86.cc | 22 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_x86.h | 5 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_x86_64.cc | 26 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_x86_64.h | 4 | ||||
| -rw-r--r-- | compiler/optimizing/intrinsics_arm.cc | 53 | ||||
| -rw-r--r-- | compiler/optimizing/intrinsics_arm_vixl.cc | 87 | ||||
| -rw-r--r-- | compiler/optimizing/locations.h | 2 | ||||
| -rw-r--r-- | compiler/optimizing/nodes.cc | 3 | ||||
| -rw-r--r-- | compiler/optimizing/nodes.h | 9 | ||||
| -rw-r--r-- | compiler/optimizing/register_allocation_resolver.cc | 20 | ||||
| -rw-r--r-- | compiler/optimizing/register_allocator_graph_color.cc | 41 | ||||
| -rw-r--r-- | compiler/optimizing/register_allocator_linear_scan.cc | 41 | ||||
| -rw-r--r-- | compiler/optimizing/ssa_liveness_analysis.cc | 12 | ||||
| -rw-r--r-- | compiler/optimizing/ssa_liveness_analysis.h | 6 |
19 files changed, 482 insertions, 190 deletions
diff --git a/compiler/image_test.cc b/compiler/image_test.cc index 89e8a678b1..7ee494a131 100644 --- a/compiler/image_test.cc +++ b/compiler/image_test.cc @@ -76,7 +76,7 @@ class ImageTest : public CommonCompilerTest { void Compile(ImageHeader::StorageMode storage_mode, CompilationHelper& out_helper, const std::string& extra_dex = "", - const std::string& image_class = ""); + const std::initializer_list<std::string>& image_classes = {}); void SetUpRuntimeOptions(RuntimeOptions* options) OVERRIDE { CommonCompilerTest::SetUpRuntimeOptions(options); @@ -90,6 +90,18 @@ class ImageTest : public CommonCompilerTest { return new std::unordered_set<std::string>(image_classes_); } + ArtMethod* FindCopiedMethod(ArtMethod* origin, mirror::Class* klass) + REQUIRES_SHARED(Locks::mutator_lock_) { + PointerSize pointer_size = class_linker_->GetImagePointerSize(); + for (ArtMethod& m : klass->GetCopiedMethods(pointer_size)) { + if (strcmp(origin->GetName(), m.GetName()) == 0 && + origin->GetSignature() == m.GetSignature()) { + return &m; + } + } + return nullptr; + } + private: std::unordered_set<std::string> image_classes_; }; @@ -345,8 +357,8 @@ void CompilationHelper::Compile(CompilerDriver* driver, void ImageTest::Compile(ImageHeader::StorageMode storage_mode, CompilationHelper& helper, const std::string& extra_dex, - const std::string& image_class) { - if (!image_class.empty()) { + const std::initializer_list<std::string>& image_classes) { + for (const std::string& image_class : image_classes) { image_classes_.insert(image_class); } CreateCompilerDriver(Compiler::kOptimizing, kRuntimeISA, kIsTargetBuild ? 2U : 16U); @@ -358,13 +370,15 @@ void ImageTest::Compile(ImageHeader::StorageMode storage_mode, helper.extra_dex_files = OpenTestDexFiles(extra_dex.c_str()); } helper.Compile(compiler_driver_.get(), storage_mode); - if (!image_class.empty()) { + if (image_classes.begin() != image_classes.end()) { // Make sure the class got initialized. ScopedObjectAccess soa(Thread::Current()); ClassLinker* const class_linker = Runtime::Current()->GetClassLinker(); - mirror::Class* klass = class_linker->FindSystemClass(Thread::Current(), image_class.c_str()); - EXPECT_TRUE(klass != nullptr); - EXPECT_TRUE(klass->IsInitialized()); + for (const std::string& image_class : image_classes) { + mirror::Class* klass = class_linker->FindSystemClass(Thread::Current(), image_class.c_str()); + EXPECT_TRUE(klass != nullptr); + EXPECT_TRUE(klass->IsInitialized()); + } } } @@ -492,7 +506,7 @@ TEST_F(ImageTest, TestImageLayout) { // Compile multi-image with ImageLayoutA being the last image. { CompilationHelper helper; - Compile(ImageHeader::kStorageModeUncompressed, helper, "ImageLayoutA", "LMyClass;"); + Compile(ImageHeader::kStorageModeUncompressed, helper, "ImageLayoutA", {"LMyClass;"}); image_sizes = helper.GetImageObjectSectionSizes(); } TearDown(); @@ -501,7 +515,7 @@ TEST_F(ImageTest, TestImageLayout) { // Compile multi-image with ImageLayoutB being the last image. { CompilationHelper helper; - Compile(ImageHeader::kStorageModeUncompressed, helper, "ImageLayoutB", "LMyClass;"); + Compile(ImageHeader::kStorageModeUncompressed, helper, "ImageLayoutB", {"LMyClass;"}); image_sizes_extra = helper.GetImageObjectSectionSizes(); } // Make sure that the new stuff in the clinit in ImageLayoutB is in the last image and not in the @@ -553,4 +567,63 @@ TEST_F(ImageTest, ImageHeaderIsValid) { ASSERT_FALSE(image_header.IsValid()); } +// Test that pointer to quick code is the same in +// a default method of an interface and in a copied method +// of a class which implements the interface. This should be true +// only if the copied method and the origin method are located in the +// same oat file. +TEST_F(ImageTest, TestDefaultMethods) { + CompilationHelper helper; + Compile(ImageHeader::kStorageModeUncompressed, + helper, + "DefaultMethods", + {"LIface;", "LImpl;", "LIterableBase;"}); + + PointerSize pointer_size = class_linker_->GetImagePointerSize(); + Thread* self = Thread::Current(); + ScopedObjectAccess soa(self); + + // Test the pointer to quick code is the same in origin method + // and in the copied method form the same oat file. + mirror::Class* iface_klass = class_linker_->LookupClass( + self, "LIface;", ObjPtr<mirror::ClassLoader>()); + ASSERT_NE(nullptr, iface_klass); + ArtMethod* origin = iface_klass->FindDeclaredVirtualMethod( + "defaultMethod", "()V", pointer_size); + ASSERT_NE(nullptr, origin); + const void* code = origin->GetEntryPointFromQuickCompiledCodePtrSize(pointer_size); + // The origin method should have a pointer to quick code + ASSERT_NE(nullptr, code); + ASSERT_FALSE(class_linker_->IsQuickToInterpreterBridge(code)); + mirror::Class* impl_klass = class_linker_->LookupClass( + self, "LImpl;", ObjPtr<mirror::ClassLoader>()); + ASSERT_NE(nullptr, impl_klass); + ArtMethod* copied = FindCopiedMethod(origin, impl_klass); + ASSERT_NE(nullptr, copied); + // the copied method should have pointer to the same quick code as the origin method + ASSERT_EQ(code, copied->GetEntryPointFromQuickCompiledCodePtrSize(pointer_size)); + + // Test the origin method has pointer to quick code + // but the copied method has pointer to interpreter + // because these methods are in different oat files. + mirror::Class* iterable_klass = class_linker_->LookupClass( + self, "Ljava/lang/Iterable;", ObjPtr<mirror::ClassLoader>()); + ASSERT_NE(nullptr, iterable_klass); + origin = iterable_klass->FindDeclaredVirtualMethod( + "forEach", "(Ljava/util/function/Consumer;)V", pointer_size); + ASSERT_NE(nullptr, origin); + code = origin->GetEntryPointFromQuickCompiledCodePtrSize(pointer_size); + // the origin method should have a pointer to quick code + ASSERT_NE(nullptr, code); + ASSERT_FALSE(class_linker_->IsQuickToInterpreterBridge(code)); + mirror::Class* iterablebase_klass = class_linker_->LookupClass( + self, "LIterableBase;", ObjPtr<mirror::ClassLoader>()); + ASSERT_NE(nullptr, iterablebase_klass); + copied = FindCopiedMethod(origin, iterablebase_klass); + ASSERT_NE(nullptr, copied); + code = copied->GetEntryPointFromQuickCompiledCodePtrSize(pointer_size); + // the copied method should have a pointer to interpreter + ASSERT_TRUE(class_linker_->IsQuickToInterpreterBridge(code)); +} + } // namespace art diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc index 5406ae72d1..8e25aa3421 100644 --- a/compiler/oat_writer.cc +++ b/compiler/oat_writer.cc @@ -1034,18 +1034,63 @@ class OatWriter::InitMethodInfoVisitor : public OatDexMethodVisitor { class OatWriter::InitImageMethodVisitor : public OatDexMethodVisitor { public: - InitImageMethodVisitor(OatWriter* writer, size_t offset) + InitImageMethodVisitor(OatWriter* writer, + size_t offset, + const std::vector<const DexFile*>* dex_files) : OatDexMethodVisitor(writer, offset), - pointer_size_(GetInstructionSetPointerSize(writer_->compiler_driver_->GetInstructionSet())) { + pointer_size_(GetInstructionSetPointerSize(writer_->compiler_driver_->GetInstructionSet())), + dex_files_(dex_files), + class_linker_(Runtime::Current()->GetClassLinker()) { + } + + // Handle copied methods here. Copy pointer to quick code from + // an origin method to a copied method only if they are + // in the same oat file. If the origin and the copied methods are + // in different oat files don't touch the copied method. + // References to other oat files are not supported yet. + bool StartClass(const DexFile* dex_file, size_t class_def_index) + REQUIRES_SHARED(Locks::mutator_lock_) { + OatDexMethodVisitor::StartClass(dex_file, class_def_index); + // Skip classes that are not in the image. + if (!IsImageClass()) { + return true; + } + ScopedObjectAccessUnchecked soa(Thread::Current()); + StackHandleScope<1> hs(soa.Self()); + Handle<mirror::DexCache> dex_cache = hs.NewHandle( + class_linker_->FindDexCache(Thread::Current(), *dex_file)); + const DexFile::ClassDef& class_def = dex_file->GetClassDef(class_def_index); + mirror::Class* klass = dex_cache->GetResolvedType(class_def.class_idx_); + if (klass != nullptr) { + for (ArtMethod& method : klass->GetCopiedMethods(pointer_size_)) { + // Find origin method. Declaring class and dex_method_idx + // in the copied method should be the same as in the origin + // method. + mirror::Class* declaring_class = method.GetDeclaringClass(); + ArtMethod* origin = declaring_class->FindDeclaredVirtualMethod( + declaring_class->GetDexCache(), + method.GetDexMethodIndex(), + pointer_size_); + CHECK(origin != nullptr); + if (IsInOatFile(&declaring_class->GetDexFile())) { + const void* code_ptr = + origin->GetEntryPointFromQuickCompiledCodePtrSize(pointer_size_); + if (code_ptr == nullptr) { + methods_to_process_.push_back(std::make_pair(&method, origin)); + } else { + method.SetEntryPointFromQuickCompiledCodePtrSize( + code_ptr, pointer_size_); + } + } + } + } + return true; } bool VisitMethod(size_t class_def_method_index, const ClassDataItemIterator& it) REQUIRES_SHARED(Locks::mutator_lock_) { - const DexFile::TypeId& type_id = - dex_file_->GetTypeId(dex_file_->GetClassDef(class_def_index_).class_idx_); - const char* class_descriptor = dex_file_->GetTypeDescriptor(type_id); // Skip methods that are not in the image. - if (!writer_->GetCompilerDriver()->IsImageClass(class_descriptor)) { + if (!IsImageClass()) { return true; } @@ -1059,17 +1104,16 @@ class OatWriter::InitImageMethodVisitor : public OatDexMethodVisitor { ++method_offsets_index_; } - ClassLinker* linker = Runtime::Current()->GetClassLinker(); // Unchecked as we hold mutator_lock_ on entry. ScopedObjectAccessUnchecked soa(Thread::Current()); StackHandleScope<1> hs(soa.Self()); - Handle<mirror::DexCache> dex_cache(hs.NewHandle(linker->FindDexCache( + Handle<mirror::DexCache> dex_cache(hs.NewHandle(class_linker_->FindDexCache( Thread::Current(), *dex_file_))); ArtMethod* method; if (writer_->HasBootImage()) { const InvokeType invoke_type = it.GetMethodInvokeType( dex_file_->GetClassDef(class_def_index_)); - method = linker->ResolveMethod<ClassLinker::kNoICCECheckForCache>( + method = class_linker_->ResolveMethod<ClassLinker::kNoICCECheckForCache>( *dex_file_, it.GetMemberIndex(), dex_cache, @@ -1089,7 +1133,8 @@ class OatWriter::InitImageMethodVisitor : public OatDexMethodVisitor { // Should already have been resolved by the compiler, just peek into the dex cache. // It may not be resolved if the class failed to verify, in this case, don't set the // entrypoint. This is not fatal since the dex cache will contain a resolution method. - method = dex_cache->GetResolvedMethod(it.GetMemberIndex(), linker->GetImagePointerSize()); + method = dex_cache->GetResolvedMethod(it.GetMemberIndex(), + class_linker_->GetImagePointerSize()); } if (method != nullptr && compiled_method != nullptr && @@ -1101,8 +1146,38 @@ class OatWriter::InitImageMethodVisitor : public OatDexMethodVisitor { return true; } + // Check whether current class is image class + bool IsImageClass() { + const DexFile::TypeId& type_id = + dex_file_->GetTypeId(dex_file_->GetClassDef(class_def_index_).class_idx_); + const char* class_descriptor = dex_file_->GetTypeDescriptor(type_id); + return writer_->GetCompilerDriver()->IsImageClass(class_descriptor); + } + + // Check whether specified dex file is in the compiled oat file. + bool IsInOatFile(const DexFile* dex_file) { + return ContainsElement(*dex_files_, dex_file); + } + + // Assign a pointer to quick code for copied methods + // not handled in the method StartClass + void Postprocess() { + for (std::pair<ArtMethod*, ArtMethod*>& p : methods_to_process_) { + ArtMethod* method = p.first; + ArtMethod* origin = p.second; + const void* code_ptr = + origin->GetEntryPointFromQuickCompiledCodePtrSize(pointer_size_); + if (code_ptr != nullptr) { + method->SetEntryPointFromQuickCompiledCodePtrSize(code_ptr, pointer_size_); + } + } + } + protected: const PointerSize pointer_size_; + const std::vector<const DexFile*>* dex_files_; + ClassLinker* const class_linker_; + std::vector<std::pair<ArtMethod*, ArtMethod*>> methods_to_process_; }; class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor { @@ -1744,8 +1819,9 @@ size_t OatWriter::InitOatCodeDexFiles(size_t offset) { offset = code_visitor.GetOffset(); if (HasImage()) { - InitImageMethodVisitor image_visitor(this, offset); + InitImageMethodVisitor image_visitor(this, offset, dex_files_); success = VisitDexMethods(&image_visitor); + image_visitor.Postprocess(); DCHECK(success); offset = image_visitor.GetOffset(); } diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index e34f116b75..caea250ab6 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -1875,6 +1875,7 @@ static bool CanGenerateConditionalMove(const Location& out, const Location& src) Label* CodeGeneratorARM::GetFinalLabel(HInstruction* instruction, Label* final_label) { DCHECK(!instruction->IsControlFlow() && !instruction->IsSuspendCheck()); + DCHECK(!instruction->IsInvoke() || !instruction->GetLocations()->CanCall()); const HBasicBlock* const block = instruction->GetBlock(); const HLoopInformation* const info = block->GetLoopInformation(); @@ -2901,16 +2902,20 @@ void InstructionCodeGeneratorARM::HandleCondition(HCondition* cond) { // Convert the jumps into the result. Label done_label; + Label* final_label = codegen_->GetFinalLabel(cond, &done_label); // False case: result = 0. __ Bind(&false_label); __ LoadImmediate(out, 0); - __ b(&done_label); + __ b(final_label); // True case: result = 1. __ Bind(&true_label); __ LoadImmediate(out, 1); - __ Bind(&done_label); + + if (done_label.IsLinked()) { + __ Bind(&done_label); + } } void LocationsBuilderARM::VisitEqual(HEqual* comp) { @@ -4441,7 +4446,8 @@ void InstructionCodeGeneratorARM::HandleIntegerRotate(LocationSummary* locations // rotates by swapping input regs (effectively rotating by the first 32-bits of // a larger rotation) or flipping direction (thus treating larger right/left // rotations as sub-word sized rotations in the other direction) as appropriate. -void InstructionCodeGeneratorARM::HandleLongRotate(LocationSummary* locations) { +void InstructionCodeGeneratorARM::HandleLongRotate(HRor* ror) { + LocationSummary* locations = ror->GetLocations(); Register in_reg_lo = locations->InAt(0).AsRegisterPairLow<Register>(); Register in_reg_hi = locations->InAt(0).AsRegisterPairHigh<Register>(); Location rhs = locations->InAt(1); @@ -4474,6 +4480,7 @@ void InstructionCodeGeneratorARM::HandleLongRotate(LocationSummary* locations) { Register shift_left = locations->GetTemp(1).AsRegister<Register>(); Label end; Label shift_by_32_plus_shift_right; + Label* final_label = codegen_->GetFinalLabel(ror, &end); __ and_(shift_right, rhs.AsRegister<Register>(), ShifterOperand(0x1F)); __ Lsrs(shift_left, rhs.AsRegister<Register>(), 6); @@ -4488,7 +4495,7 @@ void InstructionCodeGeneratorARM::HandleLongRotate(LocationSummary* locations) { __ Lsl(out_reg_lo, in_reg_lo, shift_left); __ Lsr(shift_left, in_reg_hi, shift_right); __ add(out_reg_lo, out_reg_lo, ShifterOperand(shift_left)); - __ b(&end); + __ b(final_label); __ Bind(&shift_by_32_plus_shift_right); // Shift by 32+shift_right. // out_reg_hi = (reg_hi >> shift_right) | (reg_lo << shift_left). @@ -4500,7 +4507,9 @@ void InstructionCodeGeneratorARM::HandleLongRotate(LocationSummary* locations) { __ Lsl(shift_right, in_reg_hi, shift_left); __ add(out_reg_lo, out_reg_lo, ShifterOperand(shift_right)); - __ Bind(&end); + if (end.IsLinked()) { + __ Bind(&end); + } } } @@ -4540,7 +4549,7 @@ void InstructionCodeGeneratorARM::VisitRor(HRor* ror) { break; } case Primitive::kPrimLong: { - HandleLongRotate(locations); + HandleLongRotate(ror); break; } default: @@ -4919,6 +4928,7 @@ void InstructionCodeGeneratorARM::VisitCompare(HCompare* compare) { Location right = locations->InAt(1); Label less, greater, done; + Label* final_label = codegen_->GetFinalLabel(compare, &done); Primitive::Type type = compare->InputAt(0)->GetType(); Condition less_cond; switch (type) { @@ -4958,17 +4968,19 @@ void InstructionCodeGeneratorARM::VisitCompare(HCompare* compare) { UNREACHABLE(); } - __ b(&done, EQ); + __ b(final_label, EQ); __ b(&less, less_cond); __ Bind(&greater); __ LoadImmediate(out, 1); - __ b(&done); + __ b(final_label); __ Bind(&less); __ LoadImmediate(out, -1); - __ Bind(&done); + if (done.IsLinked()) { + __ Bind(&done); + } } void LocationsBuilderARM::VisitPhi(HPhi* instruction) { @@ -5746,6 +5758,7 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) { int32_t const_index = index.GetConstant()->AsIntConstant()->GetValue(); if (maybe_compressed_char_at) { Label uncompressed_load, done; + Label* final_label = codegen_->GetFinalLabel(instruction, &done); __ Lsrs(length, length, 1u); // LSRS has a 16-bit encoding, TST (immediate) does not. static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, "Expecting 0=compressed, 1=uncompressed"); @@ -5754,13 +5767,15 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) { out_loc.AsRegister<Register>(), obj, data_offset + const_index); - __ b(&done); + __ b(final_label); __ Bind(&uncompressed_load); __ LoadFromOffset(GetLoadOperandType(Primitive::kPrimChar), out_loc.AsRegister<Register>(), obj, data_offset + (const_index << 1)); - __ Bind(&done); + if (done.IsLinked()) { + __ Bind(&done); + } } else { uint32_t full_offset = data_offset + (const_index << Primitive::ComponentSizeShift(type)); @@ -5784,17 +5799,20 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) { } if (maybe_compressed_char_at) { Label uncompressed_load, done; + Label* final_label = codegen_->GetFinalLabel(instruction, &done); __ Lsrs(length, length, 1u); // LSRS has a 16-bit encoding, TST (immediate) does not. static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, "Expecting 0=compressed, 1=uncompressed"); __ b(&uncompressed_load, CS); __ ldrb(out_loc.AsRegister<Register>(), Address(temp, index.AsRegister<Register>(), Shift::LSL, 0)); - __ b(&done); + __ b(final_label); __ Bind(&uncompressed_load); __ ldrh(out_loc.AsRegister<Register>(), Address(temp, index.AsRegister<Register>(), Shift::LSL, 1)); - __ Bind(&done); + if (done.IsLinked()) { + __ Bind(&done); + } } else { codegen_->LoadFromShiftedRegOffset(type, out_loc, temp, index.AsRegister<Register>()); } @@ -6019,6 +6037,7 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) { uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); Label done; + Label* final_label = codegen_->GetFinalLabel(instruction, &done); SlowPathCodeARM* slow_path = nullptr; if (may_need_runtime_call_for_type_check) { @@ -6040,7 +6059,7 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) { index.AsRegister<Register>()); } codegen_->MaybeRecordImplicitNullCheck(instruction); - __ b(&done); + __ b(final_label); __ Bind(&non_zero); } @@ -7021,6 +7040,7 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) { uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); Label done, zero; + Label* final_label = codegen_->GetFinalLabel(instruction, &done); SlowPathCodeARM* slow_path = nullptr; // Return 0 if `obj` is null. @@ -7042,7 +7062,7 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) { // Classes must be equal for the instanceof to succeed. __ b(&zero, NE); __ LoadImmediate(out, 1); - __ b(&done); + __ b(final_label); break; } @@ -7065,12 +7085,12 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) { maybe_temp_loc, kCompilerReadBarrierOption); // If `out` is null, we use it for the result, and jump to `done`. - __ CompareAndBranchIfZero(out, &done); + __ CompareAndBranchIfZero(out, final_label); __ cmp(out, ShifterOperand(cls)); __ b(&loop, NE); __ LoadImmediate(out, 1); if (zero.IsLinked()) { - __ b(&done); + __ b(final_label); } break; } @@ -7096,11 +7116,11 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) { kCompilerReadBarrierOption); __ CompareAndBranchIfNonZero(out, &loop); // If `out` is null, we use it for the result, and jump to `done`. - __ b(&done); + __ b(final_label); __ Bind(&success); __ LoadImmediate(out, 1); if (zero.IsLinked()) { - __ b(&done); + __ b(final_label); } break; } @@ -7125,13 +7145,13 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) { maybe_temp_loc, kCompilerReadBarrierOption); // If `out` is null, we use it for the result, and jump to `done`. - __ CompareAndBranchIfZero(out, &done); + __ CompareAndBranchIfZero(out, final_label); __ LoadFromOffset(kLoadUnsignedHalfword, out, out, primitive_offset); static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); __ CompareAndBranchIfNonZero(out, &zero); __ Bind(&exact_check); __ LoadImmediate(out, 1); - __ b(&done); + __ b(final_label); break; } @@ -7152,7 +7172,7 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) { __ b(slow_path->GetEntryLabel(), NE); __ LoadImmediate(out, 1); if (zero.IsLinked()) { - __ b(&done); + __ b(final_label); } break; } @@ -7183,7 +7203,7 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) { codegen_->AddSlowPath(slow_path); __ b(slow_path->GetEntryLabel()); if (zero.IsLinked()) { - __ b(&done); + __ b(final_label); } break; } @@ -7269,9 +7289,10 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) { codegen_->AddSlowPath(type_check_slow_path); Label done; + Label* final_label = codegen_->GetFinalLabel(instruction, &done); // Avoid null check if we know obj is not null. if (instruction->MustDoNullCheck()) { - __ CompareAndBranchIfZero(obj, &done); + __ CompareAndBranchIfZero(obj, final_label); } switch (type_check_kind) { @@ -7335,7 +7356,7 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) { Label loop; __ Bind(&loop); __ cmp(temp, ShifterOperand(cls)); - __ b(&done, EQ); + __ b(final_label, EQ); // /* HeapReference<Class> */ temp = temp->super_class_ GenerateReferenceLoadOneRegister(instruction, @@ -7363,7 +7384,7 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) { // Do an exact check. __ cmp(temp, ShifterOperand(cls)); - __ b(&done, EQ); + __ b(final_label, EQ); // Otherwise, we need to check that the object's class is a non-primitive array. // /* HeapReference<Class> */ temp = temp->component_type_ @@ -7433,7 +7454,10 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) { break; } } - __ Bind(&done); + + if (done.IsLinked()) { + __ Bind(&done); + } __ Bind(type_check_slow_path->GetExitLabel()); } diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h index 5b15902ccd..59a7f7c048 100644 --- a/compiler/optimizing/code_generator_arm.h +++ b/compiler/optimizing/code_generator_arm.h @@ -237,7 +237,7 @@ class InstructionCodeGeneratorARM : public InstructionCodeGenerator { void HandleBitwiseOperation(HBinaryOperation* operation); void HandleCondition(HCondition* condition); void HandleIntegerRotate(LocationSummary* locations); - void HandleLongRotate(LocationSummary* locations); + void HandleLongRotate(HRor* ror); void HandleShift(HBinaryOperation* operation); void GenerateWideAtomicStore(Register addr, uint32_t offset, diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc index d75779cef6..2d2d8109a3 100644 --- a/compiler/optimizing/code_generator_arm_vixl.cc +++ b/compiler/optimizing/code_generator_arm_vixl.cc @@ -1950,6 +1950,7 @@ static bool CanGenerateConditionalMove(const Location& out, const Location& src) vixl32::Label* CodeGeneratorARMVIXL::GetFinalLabel(HInstruction* instruction, vixl32::Label* final_label) { DCHECK(!instruction->IsControlFlow() && !instruction->IsSuspendCheck()); + DCHECK(!instruction->IsInvoke() || !instruction->GetLocations()->CanCall()); const HBasicBlock* const block = instruction->GetBlock(); const HLoopInformation* const info = block->GetLoopInformation(); @@ -2925,16 +2926,20 @@ void InstructionCodeGeneratorARMVIXL::HandleCondition(HCondition* cond) { // Convert the jumps into the result. vixl32::Label done_label; + vixl32::Label* final_label = codegen_->GetFinalLabel(cond, &done_label); // False case: result = 0. __ Bind(&false_label); __ Mov(out, 0); - __ B(&done_label); + __ B(final_label); // True case: result = 1. __ Bind(&true_label); __ Mov(out, 1); - __ Bind(&done_label); + + if (done_label.IsReferenced()) { + __ Bind(&done_label); + } } void LocationsBuilderARMVIXL::VisitEqual(HEqual* comp) { @@ -4447,6 +4452,7 @@ void InstructionCodeGeneratorARMVIXL::HandleLongRotate(HRor* ror) { vixl32::Register shift_left = RegisterFrom(locations->GetTemp(1)); vixl32::Label end; vixl32::Label shift_by_32_plus_shift_right; + vixl32::Label* final_label = codegen_->GetFinalLabel(ror, &end); __ And(shift_right, RegisterFrom(rhs), 0x1F); __ Lsrs(shift_left, RegisterFrom(rhs), 6); @@ -4461,7 +4467,7 @@ void InstructionCodeGeneratorARMVIXL::HandleLongRotate(HRor* ror) { __ Lsl(out_reg_lo, in_reg_lo, shift_left); __ Lsr(shift_left, in_reg_hi, shift_right); __ Add(out_reg_lo, out_reg_lo, shift_left); - __ B(&end); + __ B(final_label); __ Bind(&shift_by_32_plus_shift_right); // Shift by 32+shift_right. // out_reg_hi = (reg_hi >> shift_right) | (reg_lo << shift_left). @@ -4473,7 +4479,9 @@ void InstructionCodeGeneratorARMVIXL::HandleLongRotate(HRor* ror) { __ Lsl(shift_right, in_reg_hi, shift_left); __ Add(out_reg_lo, out_reg_lo, shift_right); - __ Bind(&end); + if (end.IsReferenced()) { + __ Bind(&end); + } } } @@ -4906,6 +4914,7 @@ void InstructionCodeGeneratorARMVIXL::VisitCompare(HCompare* compare) { Location right = locations->InAt(1); vixl32::Label less, greater, done; + vixl32::Label* final_label = codegen_->GetFinalLabel(compare, &done); Primitive::Type type = compare->InputAt(0)->GetType(); vixl32::Condition less_cond = vixl32::Condition(kNone); switch (type) { @@ -4944,17 +4953,19 @@ void InstructionCodeGeneratorARMVIXL::VisitCompare(HCompare* compare) { UNREACHABLE(); } - __ B(eq, &done, /* far_target */ false); + __ B(eq, final_label, /* far_target */ false); __ B(less_cond, &less, /* far_target */ false); __ Bind(&greater); __ Mov(out, 1); - __ B(&done); + __ B(final_label); __ Bind(&less); __ Mov(out, -1); - __ Bind(&done); + if (done.IsReferenced()) { + __ Bind(&done); + } } void LocationsBuilderARMVIXL::VisitPhi(HPhi* instruction) { @@ -5746,6 +5757,7 @@ void InstructionCodeGeneratorARMVIXL::VisitArrayGet(HArrayGet* instruction) { int32_t const_index = Int32ConstantFrom(index); if (maybe_compressed_char_at) { vixl32::Label uncompressed_load, done; + vixl32::Label* final_label = codegen_->GetFinalLabel(instruction, &done); __ Lsrs(length, length, 1u); // LSRS has a 16-bit encoding, TST (immediate) does not. static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, "Expecting 0=compressed, 1=uncompressed"); @@ -5754,13 +5766,15 @@ void InstructionCodeGeneratorARMVIXL::VisitArrayGet(HArrayGet* instruction) { RegisterFrom(out_loc), obj, data_offset + const_index); - __ B(&done); + __ B(final_label); __ Bind(&uncompressed_load); GetAssembler()->LoadFromOffset(GetLoadOperandType(Primitive::kPrimChar), RegisterFrom(out_loc), obj, data_offset + (const_index << 1)); - __ Bind(&done); + if (done.IsReferenced()) { + __ Bind(&done); + } } else { uint32_t full_offset = data_offset + (const_index << Primitive::ComponentSizeShift(type)); @@ -5785,15 +5799,18 @@ void InstructionCodeGeneratorARMVIXL::VisitArrayGet(HArrayGet* instruction) { } if (maybe_compressed_char_at) { vixl32::Label uncompressed_load, done; + vixl32::Label* final_label = codegen_->GetFinalLabel(instruction, &done); __ Lsrs(length, length, 1u); // LSRS has a 16-bit encoding, TST (immediate) does not. static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, "Expecting 0=compressed, 1=uncompressed"); __ B(cs, &uncompressed_load, /* far_target */ false); __ Ldrb(RegisterFrom(out_loc), MemOperand(temp, RegisterFrom(index), vixl32::LSL, 0)); - __ B(&done); + __ B(final_label); __ Bind(&uncompressed_load); __ Ldrh(RegisterFrom(out_loc), MemOperand(temp, RegisterFrom(index), vixl32::LSL, 1)); - __ Bind(&done); + if (done.IsReferenced()) { + __ Bind(&done); + } } else { codegen_->LoadFromShiftedRegOffset(type, out_loc, temp, RegisterFrom(index)); } @@ -6032,6 +6049,7 @@ void InstructionCodeGeneratorARMVIXL::VisitArraySet(HArraySet* instruction) { uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); vixl32::Label done; + vixl32::Label* final_label = codegen_->GetFinalLabel(instruction, &done); SlowPathCodeARMVIXL* slow_path = nullptr; if (may_need_runtime_call_for_type_check) { @@ -6054,7 +6072,7 @@ void InstructionCodeGeneratorARMVIXL::VisitArraySet(HArraySet* instruction) { // TODO(VIXL): Use a scope to ensure we record the pc info immediately after the preceding // store instruction. codegen_->MaybeRecordImplicitNullCheck(instruction); - __ B(&done); + __ B(final_label); __ Bind(&non_zero); } @@ -7062,6 +7080,7 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); vixl32::Label done, zero; + vixl32::Label* final_label = codegen_->GetFinalLabel(instruction, &done); SlowPathCodeARMVIXL* slow_path = nullptr; // Return 0 if `obj` is null. @@ -7083,7 +7102,7 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) // Classes must be equal for the instanceof to succeed. __ B(ne, &zero, /* far_target */ false); __ Mov(out, 1); - __ B(&done); + __ B(final_label); break; } @@ -7106,12 +7125,12 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) maybe_temp_loc, kCompilerReadBarrierOption); // If `out` is null, we use it for the result, and jump to `done`. - __ CompareAndBranchIfZero(out, &done, /* far_target */ false); + __ CompareAndBranchIfZero(out, final_label, /* far_target */ false); __ Cmp(out, cls); __ B(ne, &loop, /* far_target */ false); __ Mov(out, 1); if (zero.IsReferenced()) { - __ B(&done); + __ B(final_label); } break; } @@ -7137,11 +7156,11 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) kCompilerReadBarrierOption); __ CompareAndBranchIfNonZero(out, &loop); // If `out` is null, we use it for the result, and jump to `done`. - __ B(&done); + __ B(final_label); __ Bind(&success); __ Mov(out, 1); if (zero.IsReferenced()) { - __ B(&done); + __ B(final_label); } break; } @@ -7166,13 +7185,13 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) maybe_temp_loc, kCompilerReadBarrierOption); // If `out` is null, we use it for the result, and jump to `done`. - __ CompareAndBranchIfZero(out, &done, /* far_target */ false); + __ CompareAndBranchIfZero(out, final_label, /* far_target */ false); GetAssembler()->LoadFromOffset(kLoadUnsignedHalfword, out, out, primitive_offset); static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); __ CompareAndBranchIfNonZero(out, &zero, /* far_target */ false); __ Bind(&exact_check); __ Mov(out, 1); - __ B(&done); + __ B(final_label); break; } @@ -7193,7 +7212,7 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) __ B(ne, slow_path->GetEntryLabel()); __ Mov(out, 1); if (zero.IsReferenced()) { - __ B(&done); + __ B(final_label); } break; } @@ -7224,7 +7243,7 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) codegen_->AddSlowPath(slow_path); __ B(slow_path->GetEntryLabel()); if (zero.IsReferenced()) { - __ B(&done); + __ B(final_label); } break; } @@ -7310,9 +7329,10 @@ void InstructionCodeGeneratorARMVIXL::VisitCheckCast(HCheckCast* instruction) { codegen_->AddSlowPath(type_check_slow_path); vixl32::Label done; + vixl32::Label* final_label = codegen_->GetFinalLabel(instruction, &done); // Avoid null check if we know obj is not null. if (instruction->MustDoNullCheck()) { - __ CompareAndBranchIfZero(obj, &done, /* far_target */ false); + __ CompareAndBranchIfZero(obj, final_label, /* far_target */ false); } switch (type_check_kind) { @@ -7376,7 +7396,7 @@ void InstructionCodeGeneratorARMVIXL::VisitCheckCast(HCheckCast* instruction) { vixl32::Label loop; __ Bind(&loop); __ Cmp(temp, cls); - __ B(eq, &done, /* far_target */ false); + __ B(eq, final_label, /* far_target */ false); // /* HeapReference<Class> */ temp = temp->super_class_ GenerateReferenceLoadOneRegister(instruction, @@ -7404,7 +7424,7 @@ void InstructionCodeGeneratorARMVIXL::VisitCheckCast(HCheckCast* instruction) { // Do an exact check. __ Cmp(temp, cls); - __ B(eq, &done, /* far_target */ false); + __ B(eq, final_label, /* far_target */ false); // Otherwise, we need to check that the object's class is a non-primitive array. // /* HeapReference<Class> */ temp = temp->component_type_ @@ -7472,7 +7492,9 @@ void InstructionCodeGeneratorARMVIXL::VisitCheckCast(HCheckCast* instruction) { break; } } - __ Bind(&done); + if (done.IsReferenced()) { + __ Bind(&done); + } __ Bind(type_check_slow_path->GetExitLabel()); } diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 0b50619a66..958c1a6fdb 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -183,10 +183,13 @@ class SuspendCheckSlowPathX86 : public SlowPathCode { : SlowPathCode(instruction), successor_(successor) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + LocationSummary* locations = instruction_->GetLocations(); CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen); __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, locations); // only saves full width XMM for SIMD x86_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this); CheckEntrypointTypes<kQuickTestSuspend, void, void>(); + RestoreLiveRegisters(codegen, locations); // only saves full width XMM for SIMD if (successor_ == nullptr) { __ jmp(GetReturnLabel()); } else { @@ -963,12 +966,20 @@ size_t CodeGeneratorX86::RestoreCoreRegister(size_t stack_index, uint32_t reg_id } size_t CodeGeneratorX86::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) { - __ movsd(Address(ESP, stack_index), XmmRegister(reg_id)); + if (GetGraph()->HasSIMD()) { + __ movupd(Address(ESP, stack_index), XmmRegister(reg_id)); + } else { + __ movsd(Address(ESP, stack_index), XmmRegister(reg_id)); + } return GetFloatingPointSpillSlotSize(); } size_t CodeGeneratorX86::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) { - __ movsd(XmmRegister(reg_id), Address(ESP, stack_index)); + if (GetGraph()->HasSIMD()) { + __ movupd(XmmRegister(reg_id), Address(ESP, stack_index)); + } else { + __ movsd(XmmRegister(reg_id), Address(ESP, stack_index)); + } return GetFloatingPointSpillSlotSize(); } @@ -5699,7 +5710,12 @@ void InstructionCodeGeneratorX86::VisitParallelMove(HParallelMove* instruction) void LocationsBuilderX86::VisitSuspendCheck(HSuspendCheck* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath); - locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. + // In suspend check slow path, usually there are no caller-save registers at all. + // If SIMD instructions are present, however, we force spilling all live SIMD + // registers in full width (since the runtime only saves/restores lower part). + locations->SetCustomSlowPathCallerSaves(GetGraph()->HasSIMD() + ? RegisterSet::AllFpu() + : RegisterSet::Empty()); } void InstructionCodeGeneratorX86::VisitSuspendCheck(HSuspendCheck* instruction) { diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index 65ee383b54..ca3a9eadd2 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -348,8 +348,9 @@ class CodeGeneratorX86 : public CodeGenerator { } size_t GetFloatingPointSpillSlotSize() const OVERRIDE { - // 8 bytes == 2 words for each spill. - return 2 * kX86WordSize; + return GetGraph()->HasSIMD() + ? 4 * kX86WordSize // 16 bytes == 4 words for each spill + : 2 * kX86WordSize; // 8 bytes == 2 words for each spill } HGraphVisitor* GetLocationBuilder() OVERRIDE { diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 08f1adfcff..c106d9b06e 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -140,10 +140,13 @@ class SuspendCheckSlowPathX86_64 : public SlowPathCode { : SlowPathCode(instruction), successor_(successor) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + LocationSummary* locations = instruction_->GetLocations(); CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, locations); // only saves full width XMM for SIMD x86_64_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this); CheckEntrypointTypes<kQuickTestSuspend, void, void>(); + RestoreLiveRegisters(codegen, locations); // only saves full width XMM for SIMD if (successor_ == nullptr) { __ jmp(GetReturnLabel()); } else { @@ -1158,13 +1161,21 @@ size_t CodeGeneratorX86_64::RestoreCoreRegister(size_t stack_index, uint32_t reg } size_t CodeGeneratorX86_64::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) { - __ movsd(Address(CpuRegister(RSP), stack_index), XmmRegister(reg_id)); - return kX86_64WordSize; + if (GetGraph()->HasSIMD()) { + __ movupd(Address(CpuRegister(RSP), stack_index), XmmRegister(reg_id)); + } else { + __ movsd(Address(CpuRegister(RSP), stack_index), XmmRegister(reg_id)); + } + return GetFloatingPointSpillSlotSize(); } size_t CodeGeneratorX86_64::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) { - __ movsd(XmmRegister(reg_id), Address(CpuRegister(RSP), stack_index)); - return kX86_64WordSize; + if (GetGraph()->HasSIMD()) { + __ movupd(XmmRegister(reg_id), Address(CpuRegister(RSP), stack_index)); + } else { + __ movsd(XmmRegister(reg_id), Address(CpuRegister(RSP), stack_index)); + } + return GetFloatingPointSpillSlotSize(); } void CodeGeneratorX86_64::InvokeRuntime(QuickEntrypointEnum entrypoint, @@ -5152,7 +5163,12 @@ void InstructionCodeGeneratorX86_64::VisitParallelMove(HParallelMove* instructio void LocationsBuilderX86_64::VisitSuspendCheck(HSuspendCheck* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath); - locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. + // In suspend check slow path, usually there are no caller-save registers at all. + // If SIMD instructions are present, however, we force spilling all live SIMD + // registers in full width (since the runtime only saves/restores lower part). + locations->SetCustomSlowPathCallerSaves(GetGraph()->HasSIMD() + ? RegisterSet::AllFpu() + : RegisterSet::Empty()); } void InstructionCodeGeneratorX86_64::VisitSuspendCheck(HSuspendCheck* instruction) { diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index 376c3ce381..c8336dabd9 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -326,7 +326,9 @@ class CodeGeneratorX86_64 : public CodeGenerator { } size_t GetFloatingPointSpillSlotSize() const OVERRIDE { - return kX86_64WordSize; + return GetGraph()->HasSIMD() + ? 2 * kX86_64WordSize // 16 bytes == 2 x86_64 words for each spill + : 1 * kX86_64WordSize; // 8 bytes == 1 x86_64 words for each spill } HGraphVisitor* GetLocationBuilder() OVERRIDE { diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc index 98b80f5d3c..1006a776f0 100644 --- a/compiler/optimizing/intrinsics_arm.cc +++ b/compiler/optimizing/intrinsics_arm.cc @@ -270,9 +270,11 @@ static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) { locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); } -static void GenNumberOfLeadingZeros(LocationSummary* locations, +static void GenNumberOfLeadingZeros(HInvoke* invoke, Primitive::Type type, - ArmAssembler* assembler) { + CodeGeneratorARM* codegen) { + ArmAssembler* assembler = codegen->GetAssembler(); + LocationSummary* locations = invoke->GetLocations(); Location in = locations->InAt(0); Register out = locations->Out().AsRegister<Register>(); @@ -282,11 +284,14 @@ static void GenNumberOfLeadingZeros(LocationSummary* locations, Register in_reg_lo = in.AsRegisterPairLow<Register>(); Register in_reg_hi = in.AsRegisterPairHigh<Register>(); Label end; + Label* final_label = codegen->GetFinalLabel(invoke, &end); __ clz(out, in_reg_hi); - __ CompareAndBranchIfNonZero(in_reg_hi, &end); + __ CompareAndBranchIfNonZero(in_reg_hi, final_label); __ clz(out, in_reg_lo); __ AddConstant(out, 32); - __ Bind(&end); + if (end.IsLinked()) { + __ Bind(&end); + } } else { __ clz(out, in.AsRegister<Register>()); } @@ -297,7 +302,7 @@ void IntrinsicLocationsBuilderARM::VisitIntegerNumberOfLeadingZeros(HInvoke* inv } void IntrinsicCodeGeneratorARM::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) { - GenNumberOfLeadingZeros(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler()); + GenNumberOfLeadingZeros(invoke, Primitive::kPrimInt, codegen_); } void IntrinsicLocationsBuilderARM::VisitLongNumberOfLeadingZeros(HInvoke* invoke) { @@ -309,27 +314,32 @@ void IntrinsicLocationsBuilderARM::VisitLongNumberOfLeadingZeros(HInvoke* invoke } void IntrinsicCodeGeneratorARM::VisitLongNumberOfLeadingZeros(HInvoke* invoke) { - GenNumberOfLeadingZeros(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler()); + GenNumberOfLeadingZeros(invoke, Primitive::kPrimLong, codegen_); } -static void GenNumberOfTrailingZeros(LocationSummary* locations, +static void GenNumberOfTrailingZeros(HInvoke* invoke, Primitive::Type type, - ArmAssembler* assembler) { + CodeGeneratorARM* codegen) { DCHECK((type == Primitive::kPrimInt) || (type == Primitive::kPrimLong)); + ArmAssembler* assembler = codegen->GetAssembler(); + LocationSummary* locations = invoke->GetLocations(); Register out = locations->Out().AsRegister<Register>(); if (type == Primitive::kPrimLong) { Register in_reg_lo = locations->InAt(0).AsRegisterPairLow<Register>(); Register in_reg_hi = locations->InAt(0).AsRegisterPairHigh<Register>(); Label end; + Label* final_label = codegen->GetFinalLabel(invoke, &end); __ rbit(out, in_reg_lo); __ clz(out, out); - __ CompareAndBranchIfNonZero(in_reg_lo, &end); + __ CompareAndBranchIfNonZero(in_reg_lo, final_label); __ rbit(out, in_reg_hi); __ clz(out, out); __ AddConstant(out, 32); - __ Bind(&end); + if (end.IsLinked()) { + __ Bind(&end); + } } else { Register in = locations->InAt(0).AsRegister<Register>(); __ rbit(out, in); @@ -346,7 +356,7 @@ void IntrinsicLocationsBuilderARM::VisitIntegerNumberOfTrailingZeros(HInvoke* in } void IntrinsicCodeGeneratorARM::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) { - GenNumberOfTrailingZeros(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler()); + GenNumberOfTrailingZeros(invoke, Primitive::kPrimInt, codegen_); } void IntrinsicLocationsBuilderARM::VisitLongNumberOfTrailingZeros(HInvoke* invoke) { @@ -358,7 +368,7 @@ void IntrinsicLocationsBuilderARM::VisitLongNumberOfTrailingZeros(HInvoke* invok } void IntrinsicCodeGeneratorARM::VisitLongNumberOfTrailingZeros(HInvoke* invoke) { - GenNumberOfTrailingZeros(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler()); + GenNumberOfTrailingZeros(invoke, Primitive::kPrimLong, codegen_); } static void MathAbsFP(LocationSummary* locations, bool is64bit, ArmAssembler* assembler) { @@ -1355,6 +1365,7 @@ void IntrinsicCodeGeneratorARM::VisitStringEquals(HInvoke* invoke) { Label end; Label return_true; Label return_false; + Label* final_label = codegen_->GetFinalLabel(invoke, &end); // Get offsets of count, value, and class fields within a string object. const uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); @@ -1428,12 +1439,15 @@ void IntrinsicCodeGeneratorARM::VisitStringEquals(HInvoke* invoke) { // If loop does not result in returning false, we return true. __ Bind(&return_true); __ LoadImmediate(out, 1); - __ b(&end); + __ b(final_label); // Return false and exit the function. __ Bind(&return_false); __ LoadImmediate(out, 0); - __ Bind(&end); + + if (end.IsLinked()) { + __ Bind(&end); + } } static void GenerateVisitStringIndexOf(HInvoke* invoke, @@ -2491,13 +2505,14 @@ void IntrinsicCodeGeneratorARM::VisitStringGetCharsNoCheck(HInvoke* invoke) { Register dst_ptr = locations->GetTemp(2).AsRegister<Register>(); Label done, compressed_string_loop; + Label* final_label = codegen_->GetFinalLabel(invoke, &done); // dst to be copied. __ add(dst_ptr, dstObj, ShifterOperand(data_offset)); __ add(dst_ptr, dst_ptr, ShifterOperand(dstBegin, LSL, 1)); __ subs(num_chr, srcEnd, ShifterOperand(srcBegin)); // Early out for valid zero-length retrievals. - __ b(&done, EQ); + __ b(final_label, EQ); // src range to copy. __ add(src_ptr, srcObj, ShifterOperand(value_offset)); @@ -2534,7 +2549,7 @@ void IntrinsicCodeGeneratorARM::VisitStringGetCharsNoCheck(HInvoke* invoke) { __ b(&loop, GE); __ adds(num_chr, num_chr, ShifterOperand(4)); - __ b(&done, EQ); + __ b(final_label, EQ); // Main loop for < 4 character case and remainder handling. Loads and stores one // 16-bit Java character at a time. @@ -2545,7 +2560,7 @@ void IntrinsicCodeGeneratorARM::VisitStringGetCharsNoCheck(HInvoke* invoke) { __ b(&remainder, GT); if (mirror::kUseStringCompression) { - __ b(&done); + __ b(final_label); const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte); DCHECK_EQ(c_char_size, 1u); @@ -2559,7 +2574,9 @@ void IntrinsicCodeGeneratorARM::VisitStringGetCharsNoCheck(HInvoke* invoke) { __ b(&compressed_string_loop, GT); } - __ Bind(&done); + if (done.IsLinked()) { + __ Bind(&done); + } } void IntrinsicLocationsBuilderARM::VisitFloatIsInfinite(HInvoke* invoke) { diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc index 19ff49c6ce..b25bad7170 100644 --- a/compiler/optimizing/intrinsics_arm_vixl.cc +++ b/compiler/optimizing/intrinsics_arm_vixl.cc @@ -333,9 +333,11 @@ static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) { locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); } -static void GenNumberOfLeadingZeros(LocationSummary* locations, +static void GenNumberOfLeadingZeros(HInvoke* invoke, Primitive::Type type, - ArmVIXLAssembler* assembler) { + CodeGeneratorARMVIXL* codegen) { + ArmVIXLAssembler* assembler = codegen->GetAssembler(); + LocationSummary* locations = invoke->GetLocations(); Location in = locations->InAt(0); vixl32::Register out = RegisterFrom(locations->Out()); @@ -345,11 +347,14 @@ static void GenNumberOfLeadingZeros(LocationSummary* locations, vixl32::Register in_reg_lo = LowRegisterFrom(in); vixl32::Register in_reg_hi = HighRegisterFrom(in); vixl32::Label end; + vixl32::Label* final_label = codegen->GetFinalLabel(invoke, &end); __ Clz(out, in_reg_hi); - __ CompareAndBranchIfNonZero(in_reg_hi, &end, /* far_target */ false); + __ CompareAndBranchIfNonZero(in_reg_hi, final_label, /* far_target */ false); __ Clz(out, in_reg_lo); __ Add(out, out, 32); - __ Bind(&end); + if (end.IsReferenced()) { + __ Bind(&end); + } } else { __ Clz(out, RegisterFrom(in)); } @@ -360,7 +365,7 @@ void IntrinsicLocationsBuilderARMVIXL::VisitIntegerNumberOfLeadingZeros(HInvoke* } void IntrinsicCodeGeneratorARMVIXL::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) { - GenNumberOfLeadingZeros(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler()); + GenNumberOfLeadingZeros(invoke, Primitive::kPrimInt, codegen_); } void IntrinsicLocationsBuilderARMVIXL::VisitLongNumberOfLeadingZeros(HInvoke* invoke) { @@ -372,27 +377,32 @@ void IntrinsicLocationsBuilderARMVIXL::VisitLongNumberOfLeadingZeros(HInvoke* in } void IntrinsicCodeGeneratorARMVIXL::VisitLongNumberOfLeadingZeros(HInvoke* invoke) { - GenNumberOfLeadingZeros(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler()); + GenNumberOfLeadingZeros(invoke, Primitive::kPrimLong, codegen_); } -static void GenNumberOfTrailingZeros(LocationSummary* locations, +static void GenNumberOfTrailingZeros(HInvoke* invoke, Primitive::Type type, - ArmVIXLAssembler* assembler) { + CodeGeneratorARMVIXL* codegen) { DCHECK((type == Primitive::kPrimInt) || (type == Primitive::kPrimLong)); + ArmVIXLAssembler* assembler = codegen->GetAssembler(); + LocationSummary* locations = invoke->GetLocations(); vixl32::Register out = RegisterFrom(locations->Out()); if (type == Primitive::kPrimLong) { vixl32::Register in_reg_lo = LowRegisterFrom(locations->InAt(0)); vixl32::Register in_reg_hi = HighRegisterFrom(locations->InAt(0)); vixl32::Label end; + vixl32::Label* final_label = codegen->GetFinalLabel(invoke, &end); __ Rbit(out, in_reg_lo); __ Clz(out, out); - __ CompareAndBranchIfNonZero(in_reg_lo, &end, /* far_target */ false); + __ CompareAndBranchIfNonZero(in_reg_lo, final_label, /* far_target */ false); __ Rbit(out, in_reg_hi); __ Clz(out, out); __ Add(out, out, 32); - __ Bind(&end); + if (end.IsReferenced()) { + __ Bind(&end); + } } else { vixl32::Register in = RegisterFrom(locations->InAt(0)); __ Rbit(out, in); @@ -409,7 +419,7 @@ void IntrinsicLocationsBuilderARMVIXL::VisitIntegerNumberOfTrailingZeros(HInvoke } void IntrinsicCodeGeneratorARMVIXL::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) { - GenNumberOfTrailingZeros(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler()); + GenNumberOfTrailingZeros(invoke, Primitive::kPrimInt, codegen_); } void IntrinsicLocationsBuilderARMVIXL::VisitLongNumberOfTrailingZeros(HInvoke* invoke) { @@ -421,7 +431,7 @@ void IntrinsicLocationsBuilderARMVIXL::VisitLongNumberOfTrailingZeros(HInvoke* i } void IntrinsicCodeGeneratorARMVIXL::VisitLongNumberOfTrailingZeros(HInvoke* invoke) { - GenNumberOfTrailingZeros(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler()); + GenNumberOfTrailingZeros(invoke, Primitive::kPrimLong, codegen_); } static void MathAbsFP(HInvoke* invoke, ArmVIXLAssembler* assembler) { @@ -502,7 +512,8 @@ void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsLong(HInvoke* invoke) { GenAbsInteger(invoke->GetLocations(), /* is64bit */ true, GetAssembler()); } -static void GenMinMaxFloat(HInvoke* invoke, bool is_min, ArmVIXLAssembler* assembler) { +static void GenMinMaxFloat(HInvoke* invoke, bool is_min, CodeGeneratorARMVIXL* codegen) { + ArmVIXLAssembler* assembler = codegen->GetAssembler(); Location op1_loc = invoke->GetLocations()->InAt(0); Location op2_loc = invoke->GetLocations()->InAt(1); Location out_loc = invoke->GetLocations()->Out(); @@ -520,6 +531,7 @@ static void GenMinMaxFloat(HInvoke* invoke, bool is_min, ArmVIXLAssembler* assem const vixl32::Register temp1 = temps.Acquire(); vixl32::Register temp2 = RegisterFrom(invoke->GetLocations()->GetTemp(0)); vixl32::Label nan, done; + vixl32::Label* final_label = codegen->GetFinalLabel(invoke, &done); DCHECK(op1.Is(out)); @@ -536,7 +548,8 @@ static void GenMinMaxFloat(HInvoke* invoke, bool is_min, ArmVIXLAssembler* assem __ it(cond); __ vmov(cond, F32, out, op2); } - __ B(ne, &done, /* far_target */ false); // for <>(not equal), we've done min/max calculation. + // for <>(not equal), we've done min/max calculation. + __ B(ne, final_label, /* far_target */ false); // handle op1 == op2, max(+0.0,-0.0), min(+0.0,-0.0). __ Vmov(temp1, op1); @@ -547,14 +560,16 @@ static void GenMinMaxFloat(HInvoke* invoke, bool is_min, ArmVIXLAssembler* assem __ And(temp1, temp1, temp2); } __ Vmov(out, temp1); - __ B(&done); + __ B(final_label); // handle NaN input. __ Bind(&nan); __ Movt(temp1, High16Bits(kNanFloat)); // 0x7FC0xxxx is a NaN. __ Vmov(out, temp1); - __ Bind(&done); + if (done.IsReferenced()) { + __ Bind(&done); + } } static void CreateFPFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) { @@ -572,7 +587,7 @@ void IntrinsicLocationsBuilderARMVIXL::VisitMathMinFloatFloat(HInvoke* invoke) { } void IntrinsicCodeGeneratorARMVIXL::VisitMathMinFloatFloat(HInvoke* invoke) { - GenMinMaxFloat(invoke, /* is_min */ true, GetAssembler()); + GenMinMaxFloat(invoke, /* is_min */ true, codegen_); } void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxFloatFloat(HInvoke* invoke) { @@ -581,10 +596,11 @@ void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxFloatFloat(HInvoke* invoke) { } void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxFloatFloat(HInvoke* invoke) { - GenMinMaxFloat(invoke, /* is_min */ false, GetAssembler()); + GenMinMaxFloat(invoke, /* is_min */ false, codegen_); } -static void GenMinMaxDouble(HInvoke* invoke, bool is_min, ArmVIXLAssembler* assembler) { +static void GenMinMaxDouble(HInvoke* invoke, bool is_min, CodeGeneratorARMVIXL* codegen) { + ArmVIXLAssembler* assembler = codegen->GetAssembler(); Location op1_loc = invoke->GetLocations()->InAt(0); Location op2_loc = invoke->GetLocations()->InAt(1); Location out_loc = invoke->GetLocations()->Out(); @@ -599,6 +615,7 @@ static void GenMinMaxDouble(HInvoke* invoke, bool is_min, ArmVIXLAssembler* asse vixl32::DRegister op2 = DRegisterFrom(op2_loc); vixl32::DRegister out = OutputDRegister(invoke); vixl32::Label handle_nan_eq, done; + vixl32::Label* final_label = codegen->GetFinalLabel(invoke, &done); DCHECK(op1.Is(out)); @@ -615,19 +632,22 @@ static void GenMinMaxDouble(HInvoke* invoke, bool is_min, ArmVIXLAssembler* asse __ it(cond); __ vmov(cond, F64, out, op2); } - __ B(ne, &done, /* far_target */ false); // for <>(not equal), we've done min/max calculation. + // for <>(not equal), we've done min/max calculation. + __ B(ne, final_label, /* far_target */ false); // handle op1 == op2, max(+0.0,-0.0). if (!is_min) { __ Vand(F64, out, op1, op2); - __ B(&done); + __ B(final_label); } // handle op1 == op2, min(+0.0,-0.0), NaN input. __ Bind(&handle_nan_eq); __ Vorr(F64, out, op1, op2); // assemble op1/-0.0/NaN. - __ Bind(&done); + if (done.IsReferenced()) { + __ Bind(&done); + } } void IntrinsicLocationsBuilderARMVIXL::VisitMathMinDoubleDouble(HInvoke* invoke) { @@ -635,7 +655,7 @@ void IntrinsicLocationsBuilderARMVIXL::VisitMathMinDoubleDouble(HInvoke* invoke) } void IntrinsicCodeGeneratorARMVIXL::VisitMathMinDoubleDouble(HInvoke* invoke) { - GenMinMaxDouble(invoke, /* is_min */ true , GetAssembler()); + GenMinMaxDouble(invoke, /* is_min */ true , codegen_); } void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxDoubleDouble(HInvoke* invoke) { @@ -643,7 +663,7 @@ void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxDoubleDouble(HInvoke* invoke) } void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxDoubleDouble(HInvoke* invoke) { - GenMinMaxDouble(invoke, /* is_min */ false, GetAssembler()); + GenMinMaxDouble(invoke, /* is_min */ false, codegen_); } static void GenMinMaxLong(HInvoke* invoke, bool is_min, ArmVIXLAssembler* assembler) { @@ -1670,6 +1690,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringEquals(HInvoke* invoke) { vixl32::Label end; vixl32::Label return_true; vixl32::Label return_false; + vixl32::Label* final_label = codegen_->GetFinalLabel(invoke, &end); // Get offsets of count, value, and class fields within a string object. const uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); @@ -1746,12 +1767,15 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringEquals(HInvoke* invoke) { // If loop does not result in returning false, we return true. __ Bind(&return_true); __ Mov(out, 1); - __ B(&end); + __ B(final_label); // Return false and exit the function. __ Bind(&return_false); __ Mov(out, 0); - __ Bind(&end); + + if (end.IsReferenced()) { + __ Bind(&end); + } } static void GenerateVisitStringIndexOf(HInvoke* invoke, @@ -2789,13 +2813,14 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke) vixl32::Register dst_ptr = RegisterFrom(locations->GetTemp(2)); vixl32::Label done, compressed_string_loop; + vixl32::Label* final_label = codegen_->GetFinalLabel(invoke, &done); // dst to be copied. __ Add(dst_ptr, dstObj, data_offset); __ Add(dst_ptr, dst_ptr, Operand(dstBegin, vixl32::LSL, 1)); __ Subs(num_chr, srcEnd, srcBegin); // Early out for valid zero-length retrievals. - __ B(eq, &done, /* far_target */ false); + __ B(eq, final_label, /* far_target */ false); // src range to copy. __ Add(src_ptr, srcObj, value_offset); @@ -2839,7 +2864,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke) __ B(ge, &loop, /* far_target */ false); __ Adds(num_chr, num_chr, 4); - __ B(eq, &done, /* far_target */ false); + __ B(eq, final_label, /* far_target */ false); // Main loop for < 4 character case and remainder handling. Loads and stores one // 16-bit Java character at a time. @@ -2852,7 +2877,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke) __ B(gt, &remainder, /* far_target */ false); if (mirror::kUseStringCompression) { - __ B(&done); + __ B(final_label); const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte); DCHECK_EQ(c_char_size, 1u); @@ -2868,7 +2893,9 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke) __ B(gt, &compressed_string_loop, /* far_target */ false); } - __ Bind(&done); + if (done.IsReferenced()) { + __ Bind(&done); + } } void IntrinsicLocationsBuilderARMVIXL::VisitFloatIsInfinite(HInvoke* invoke) { diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h index 091b58a63d..d391f6913c 100644 --- a/compiler/optimizing/locations.h +++ b/compiler/optimizing/locations.h @@ -417,6 +417,7 @@ std::ostream& operator<<(std::ostream& os, const Location::Policy& rhs); class RegisterSet : public ValueObject { public: static RegisterSet Empty() { return RegisterSet(); } + static RegisterSet AllFpu() { return RegisterSet(0, -1); } void Add(Location loc) { if (loc.IsRegister()) { @@ -462,6 +463,7 @@ class RegisterSet : public ValueObject { private: RegisterSet() : core_registers_(0), floating_point_registers_(0) {} + RegisterSet(uint32_t core, uint32_t fp) : core_registers_(core), floating_point_registers_(fp) {} uint32_t core_registers_; uint32_t floating_point_registers_; diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index 020e4463d4..ec706e6694 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -2046,6 +2046,9 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { if (HasTryCatch()) { outer_graph->SetHasTryCatch(true); } + if (HasSIMD()) { + outer_graph->SetHasSIMD(true); + } HInstruction* return_value = nullptr; if (GetBlocks().size() == 3) { diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 542b218cf8..6881d8f6ae 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -323,6 +323,7 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { temporaries_vreg_slots_(0), has_bounds_checks_(false), has_try_catch_(false), + has_simd_(false), has_loops_(false), has_irreducible_loops_(false), debuggable_(debuggable), @@ -560,6 +561,9 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { bool HasTryCatch() const { return has_try_catch_; } void SetHasTryCatch(bool value) { has_try_catch_ = value; } + bool HasSIMD() const { return has_simd_; } + void SetHasSIMD(bool value) { has_simd_ = value; } + bool HasLoops() const { return has_loops_; } void SetHasLoops(bool value) { has_loops_ = value; } @@ -652,6 +656,11 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { // false positives. bool has_try_catch_; + // Flag whether SIMD instructions appear in the graph. If true, the + // code generators may have to be more careful spilling the wider + // contents of SIMD registers. + bool has_simd_; + // Flag whether there are any loops in the graph. We can skip loop // optimization if it's false. It's only best effort to keep it up // to date in the presence of code elimination so there might be false diff --git a/compiler/optimizing/register_allocation_resolver.cc b/compiler/optimizing/register_allocation_resolver.cc index 8a9c1ccaff..0d33b49fdb 100644 --- a/compiler/optimizing/register_allocation_resolver.cc +++ b/compiler/optimizing/register_allocation_resolver.cc @@ -299,11 +299,13 @@ void RegisterAllocationResolver::ConnectSiblings(LiveInterval* interval) { // Currently, we spill unconditionnally the current method in the code generators. && !interval->GetDefinedBy()->IsCurrentMethod()) { // We spill eagerly, so move must be at definition. - InsertMoveAfter(interval->GetDefinedBy(), - interval->ToLocation(), - interval->NeedsTwoSpillSlots() - ? Location::DoubleStackSlot(interval->GetParent()->GetSpillSlot()) - : Location::StackSlot(interval->GetParent()->GetSpillSlot())); + Location loc; + switch (interval->NumberOfSpillSlotsNeeded()) { + case 1: loc = Location::StackSlot(interval->GetParent()->GetSpillSlot()); break; + case 2: loc = Location::DoubleStackSlot(interval->GetParent()->GetSpillSlot()); break; + default: LOG(FATAL) << "Unexpected number of spill slots"; UNREACHABLE(); + } + InsertMoveAfter(interval->GetDefinedBy(), interval->ToLocation(), loc); } UsePosition* use = current->GetFirstUse(); EnvUsePosition* env_use = current->GetFirstEnvironmentUse(); @@ -459,9 +461,11 @@ void RegisterAllocationResolver::ConnectSplitSiblings(LiveInterval* interval, location_source = defined_by->GetLocations()->Out(); } else { DCHECK(defined_by->IsCurrentMethod()); - location_source = parent->NeedsTwoSpillSlots() - ? Location::DoubleStackSlot(parent->GetSpillSlot()) - : Location::StackSlot(parent->GetSpillSlot()); + switch (parent->NumberOfSpillSlotsNeeded()) { + case 1: location_source = Location::StackSlot(parent->GetSpillSlot()); break; + case 2: location_source = Location::DoubleStackSlot(parent->GetSpillSlot()); break; + default: LOG(FATAL) << "Unexpected number of spill slots"; UNREACHABLE(); + } } } else { DCHECK(source != nullptr); diff --git a/compiler/optimizing/register_allocator_graph_color.cc b/compiler/optimizing/register_allocator_graph_color.cc index 9064f865c3..87f709f63d 100644 --- a/compiler/optimizing/register_allocator_graph_color.cc +++ b/compiler/optimizing/register_allocator_graph_color.cc @@ -1029,7 +1029,7 @@ void RegisterAllocatorGraphColor::AllocateSpillSlotForCatchPhi(HInstruction* ins interval->SetSpillSlot(previous_phi->GetLiveInterval()->GetSpillSlot()); } else { interval->SetSpillSlot(catch_phi_spill_slot_counter_); - catch_phi_spill_slot_counter_ += interval->NeedsTwoSpillSlots() ? 2 : 1; + catch_phi_spill_slot_counter_ += interval->NumberOfSpillSlotsNeeded(); } } } @@ -1996,43 +1996,48 @@ void RegisterAllocatorGraphColor::ColorSpillSlots(ArenaVector<LiveInterval*>* in bool is_interval_beginning; size_t position; std::tie(position, is_interval_beginning, parent_interval) = *it; - - bool needs_two_slots = parent_interval->NeedsTwoSpillSlots(); + size_t number_of_spill_slots_needed = parent_interval->NumberOfSpillSlotsNeeded(); if (is_interval_beginning) { DCHECK(!parent_interval->HasSpillSlot()); DCHECK_EQ(position, parent_interval->GetStart()); - // Find a free stack slot. + // Find first available free stack slot(s). size_t slot = 0; - for (; taken.IsBitSet(slot) || (needs_two_slots && taken.IsBitSet(slot + 1)); ++slot) { - // Skip taken slots. + for (; ; ++slot) { + bool found = true; + for (size_t s = slot, u = slot + number_of_spill_slots_needed; s < u; s++) { + if (taken.IsBitSet(s)) { + found = false; + break; // failure + } + } + if (found) { + break; // success + } } + parent_interval->SetSpillSlot(slot); - *num_stack_slots_used = std::max(*num_stack_slots_used, - needs_two_slots ? slot + 1 : slot + 2); - if (needs_two_slots && *num_stack_slots_used % 2 != 0) { + *num_stack_slots_used = std::max(*num_stack_slots_used, slot + number_of_spill_slots_needed); + if (number_of_spill_slots_needed > 1 && *num_stack_slots_used % 2 != 0) { // The parallel move resolver requires that there be an even number of spill slots // allocated for pair value types. ++(*num_stack_slots_used); } - taken.SetBit(slot); - if (needs_two_slots) { - taken.SetBit(slot + 1); + for (size_t s = slot, u = slot + number_of_spill_slots_needed; s < u; s++) { + taken.SetBit(s); } } else { DCHECK_EQ(position, parent_interval->GetLastSibling()->GetEnd()); DCHECK(parent_interval->HasSpillSlot()); - // Free up the stack slot used by this interval. + // Free up the stack slot(s) used by this interval. size_t slot = parent_interval->GetSpillSlot(); - DCHECK(taken.IsBitSet(slot)); - DCHECK(!needs_two_slots || taken.IsBitSet(slot + 1)); - taken.ClearBit(slot); - if (needs_two_slots) { - taken.ClearBit(slot + 1); + for (size_t s = slot, u = slot + number_of_spill_slots_needed; s < u; s++) { + DCHECK(taken.IsBitSet(s)); + taken.ClearBit(s); } } } diff --git a/compiler/optimizing/register_allocator_linear_scan.cc b/compiler/optimizing/register_allocator_linear_scan.cc index 6354e76ec8..ab8d540359 100644 --- a/compiler/optimizing/register_allocator_linear_scan.cc +++ b/compiler/optimizing/register_allocator_linear_scan.cc @@ -1125,36 +1125,31 @@ void RegisterAllocatorLinearScan::AllocateSpillSlotFor(LiveInterval* interval) { LOG(FATAL) << "Unexpected type for interval " << interval->GetType(); } - // Find an available spill slot. + // Find first available spill slots. + size_t number_of_spill_slots_needed = parent->NumberOfSpillSlotsNeeded(); size_t slot = 0; for (size_t e = spill_slots->size(); slot < e; ++slot) { - if ((*spill_slots)[slot] <= parent->GetStart()) { - if (!parent->NeedsTwoSpillSlots()) { - // One spill slot is sufficient. - break; - } - if (slot == e - 1 || (*spill_slots)[slot + 1] <= parent->GetStart()) { - // Two spill slots are available. + bool found = true; + for (size_t s = slot, u = std::min(slot + number_of_spill_slots_needed, e); s < u; s++) { + if ((*spill_slots)[s] > parent->GetStart()) { + found = false; // failure break; } } + if (found) { + break; // success + } } + // Need new spill slots? + size_t upper = slot + number_of_spill_slots_needed; + if (upper > spill_slots->size()) { + spill_slots->resize(upper); + } + // Set slots to end. size_t end = interval->GetLastSibling()->GetEnd(); - if (parent->NeedsTwoSpillSlots()) { - if (slot + 2u > spill_slots->size()) { - // We need a new spill slot. - spill_slots->resize(slot + 2u, end); - } - (*spill_slots)[slot] = end; - (*spill_slots)[slot + 1] = end; - } else { - if (slot == spill_slots->size()) { - // We need a new spill slot. - spill_slots->push_back(end); - } else { - (*spill_slots)[slot] = end; - } + for (size_t s = slot; s < upper; s++) { + (*spill_slots)[s] = end; } // Note that the exact spill slot location will be computed when we resolve, @@ -1180,7 +1175,7 @@ void RegisterAllocatorLinearScan::AllocateSpillSlotForCatchPhi(HPhi* phi) { // TODO: Reuse spill slots when intervals of phis from different catch // blocks do not overlap. interval->SetSpillSlot(catch_phi_spill_slots_); - catch_phi_spill_slots_ += interval->NeedsTwoSpillSlots() ? 2 : 1; + catch_phi_spill_slots_ += interval->NumberOfSpillSlotsNeeded(); } } diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc index e8e12e1a55..c0a045c33e 100644 --- a/compiler/optimizing/ssa_liveness_analysis.cc +++ b/compiler/optimizing/ssa_liveness_analysis.cc @@ -469,8 +469,8 @@ bool LiveInterval::SameRegisterKind(Location other) const { } } -bool LiveInterval::NeedsTwoSpillSlots() const { - return type_ == Primitive::kPrimLong || type_ == Primitive::kPrimDouble; +size_t LiveInterval::NumberOfSpillSlotsNeeded() const { + return (type_ == Primitive::kPrimLong || type_ == Primitive::kPrimDouble) ? 2 : 1; } Location LiveInterval::ToLocation() const { @@ -494,10 +494,10 @@ Location LiveInterval::ToLocation() const { if (defined_by->IsConstant()) { return defined_by->GetLocations()->Out(); } else if (GetParent()->HasSpillSlot()) { - if (NeedsTwoSpillSlots()) { - return Location::DoubleStackSlot(GetParent()->GetSpillSlot()); - } else { - return Location::StackSlot(GetParent()->GetSpillSlot()); + switch (NumberOfSpillSlotsNeeded()) { + case 1: return Location::StackSlot(GetParent()->GetSpillSlot()); + case 2: return Location::DoubleStackSlot(GetParent()->GetSpillSlot()); + default: LOG(FATAL) << "Unexpected number of spill slots"; UNREACHABLE(); } } else { return Location(); diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h index 340d0ccefe..e9dffc1fac 100644 --- a/compiler/optimizing/ssa_liveness_analysis.h +++ b/compiler/optimizing/ssa_liveness_analysis.h @@ -762,9 +762,9 @@ class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> { // Returns kNoRegister otherwise. int FindHintAtDefinition() const; - // Returns whether the interval needs two (Dex virtual register size `kVRegSize`) - // slots for spilling. - bool NeedsTwoSpillSlots() const; + // Returns the number of required spilling slots (measured as a multiple of the + // Dex virtual register size `kVRegSize`). + size_t NumberOfSpillSlotsNeeded() const; bool IsFloatingPoint() const { return type_ == Primitive::kPrimFloat || type_ == Primitive::kPrimDouble; |