diff options
Diffstat (limited to 'compiler')
| -rw-r--r-- | compiler/image_writer.cc | 201 | ||||
| -rw-r--r-- | compiler/image_writer.h | 12 | ||||
| -rw-r--r-- | compiler/oat_writer.cc | 4 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_x86.cc | 30 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_x86.h | 5 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_x86_64.cc | 36 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_x86_64.h | 4 | ||||
| -rw-r--r-- | compiler/optimizing/graph_visualizer.cc | 6 | ||||
| -rw-r--r-- | compiler/optimizing/intrinsics_mips.cc | 36 | ||||
| -rw-r--r-- | compiler/optimizing/intrinsics_mips64.cc | 36 | ||||
| -rw-r--r-- | compiler/optimizing/locations.h | 22 | ||||
| -rw-r--r-- | compiler/optimizing/nodes.cc | 3 | ||||
| -rw-r--r-- | compiler/optimizing/nodes.h | 9 | ||||
| -rw-r--r-- | compiler/optimizing/register_allocation_resolver.cc | 25 | ||||
| -rw-r--r-- | compiler/optimizing/register_allocator_graph_color.cc | 41 | ||||
| -rw-r--r-- | compiler/optimizing/register_allocator_linear_scan.cc | 41 | ||||
| -rw-r--r-- | compiler/optimizing/sharpening.cc | 4 | ||||
| -rw-r--r-- | compiler/optimizing/ssa_liveness_analysis.cc | 15 | ||||
| -rw-r--r-- | compiler/optimizing/ssa_liveness_analysis.h | 6 |
19 files changed, 350 insertions, 186 deletions
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc index aefdb548ff..d156644484 100644 --- a/compiler/image_writer.cc +++ b/compiler/image_writer.cc @@ -714,7 +714,8 @@ void ImageWriter::ComputeLazyFieldsForImageClasses() { class_linker->VisitClassesWithoutClassesLock(&visitor); } -static bool IsBootClassLoaderClass(mirror::Class* klass) REQUIRES_SHARED(Locks::mutator_lock_) { +static bool IsBootClassLoaderClass(ObjPtr<mirror::Class> klass) + REQUIRES_SHARED(Locks::mutator_lock_) { return klass->GetClassLoader() == nullptr; } @@ -722,33 +723,33 @@ bool ImageWriter::IsBootClassLoaderNonImageClass(mirror::Class* klass) { return IsBootClassLoaderClass(klass) && !IsInBootImage(klass); } -bool ImageWriter::PruneAppImageClass(mirror::Class* klass) { +bool ImageWriter::PruneAppImageClass(ObjPtr<mirror::Class> klass) { bool early_exit = false; std::unordered_set<mirror::Class*> visited; return PruneAppImageClassInternal(klass, &early_exit, &visited); } bool ImageWriter::PruneAppImageClassInternal( - mirror::Class* klass, + ObjPtr<mirror::Class> klass, bool* early_exit, std::unordered_set<mirror::Class*>* visited) { DCHECK(early_exit != nullptr); DCHECK(visited != nullptr); DCHECK(compile_app_image_); - if (klass == nullptr || IsInBootImage(klass)) { + if (klass == nullptr || IsInBootImage(klass.Ptr())) { return false; } - auto found = prune_class_memo_.find(klass); + auto found = prune_class_memo_.find(klass.Ptr()); if (found != prune_class_memo_.end()) { // Already computed, return the found value. return found->second; } // Circular dependencies, return false but do not store the result in the memoization table. - if (visited->find(klass) != visited->end()) { + if (visited->find(klass.Ptr()) != visited->end()) { *early_exit = true; return false; } - visited->emplace(klass); + visited->emplace(klass.Ptr()); bool result = IsBootClassLoaderClass(klass); std::string temp; // Prune if not an image class, this handles any broken sets of image classes such as having a @@ -812,20 +813,20 @@ bool ImageWriter::PruneAppImageClassInternal( dex_file_oat_index_map_.find(dex_cache->GetDexFile()) == dex_file_oat_index_map_.end(); } // Erase the element we stored earlier since we are exiting the function. - auto it = visited->find(klass); + auto it = visited->find(klass.Ptr()); DCHECK(it != visited->end()); visited->erase(it); // Only store result if it is true or none of the calls early exited due to circular // dependencies. If visited is empty then we are the root caller, in this case the cycle was in // a child call and we can remember the result. if (result == true || !my_early_exit || visited->empty()) { - prune_class_memo_[klass] = result; + prune_class_memo_[klass.Ptr()] = result; } *early_exit |= my_early_exit; return result; } -bool ImageWriter::KeepClass(Class* klass) { +bool ImageWriter::KeepClass(ObjPtr<mirror::Class> klass) { if (klass == nullptr) { return false; } @@ -896,15 +897,27 @@ class ImageWriter::PruneClassLoaderClassesVisitor : public ClassLoaderVisitor { Runtime::Current()->GetClassLinker()->ClassTableForClassLoader(class_loader); class_table->Visit(classes_visitor); removed_class_count_ += classes_visitor.Prune(); + + // Record app image class loader. The fake boot class loader should not get registered + // and we should end up with only one class loader for an app and none for boot image. + if (class_loader != nullptr && class_table != nullptr) { + DCHECK(class_loader_ == nullptr); + class_loader_ = class_loader; + } } size_t GetRemovedClassCount() const { return removed_class_count_; } + ObjPtr<mirror::ClassLoader> GetClassLoader() const REQUIRES_SHARED(Locks::mutator_lock_) { + return class_loader_; + } + private: ImageWriter* const image_writer_; size_t removed_class_count_; + ObjPtr<mirror::ClassLoader> class_loader_; }; void ImageWriter::VisitClassLoaders(ClassLoaderVisitor* visitor) { @@ -913,70 +926,150 @@ void ImageWriter::VisitClassLoaders(ClassLoaderVisitor* visitor) { Runtime::Current()->GetClassLinker()->VisitClassLoaders(visitor); } +void ImageWriter::PruneAndPreloadDexCache(ObjPtr<mirror::DexCache> dex_cache, + ObjPtr<mirror::ClassLoader> class_loader) { + // To ensure deterministic contents of the hash-based arrays, each slot shall contain + // the candidate with the lowest index. As we're processing entries in increasing index + // order, this means trying to look up the entry for the current index if the slot is + // empty or if it contains a higher index. + + Runtime* runtime = Runtime::Current(); + ClassLinker* class_linker = runtime->GetClassLinker(); + ArtMethod* resolution_method = runtime->GetResolutionMethod(); + const DexFile& dex_file = *dex_cache->GetDexFile(); + // Prune methods. + ArtMethod** resolved_methods = dex_cache->GetResolvedMethods(); + for (size_t i = 0, num = dex_cache->NumResolvedMethods(); i != num; ++i) { + ArtMethod* method = + mirror::DexCache::GetElementPtrSize(resolved_methods, i, target_ptr_size_); + DCHECK(method != nullptr) << "Expected resolution method instead of null method"; + mirror::Class* declaring_class = method->GetDeclaringClass(); + // Copied methods may be held live by a class which was not an image class but have a + // declaring class which is an image class. Set it to the resolution method to be safe and + // prevent dangling pointers. + if (method->IsCopied() || !KeepClass(declaring_class)) { + mirror::DexCache::SetElementPtrSize(resolved_methods, + i, + resolution_method, + target_ptr_size_); + } else if (kIsDebugBuild) { + // Check that the class is still in the classes table. + ReaderMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_); + CHECK(class_linker->ClassInClassTable(declaring_class)) << "Class " + << Class::PrettyClass(declaring_class) << " not in class linker table"; + } + } + // Prune fields and make the contents of the field array deterministic. + mirror::FieldDexCacheType* resolved_fields = dex_cache->GetResolvedFields(); + dex::TypeIndex last_class_idx; // Initialized to invalid index. + ObjPtr<mirror::Class> last_class = nullptr; + for (size_t i = 0, end = dex_file.NumFieldIds(); i < end; ++i) { + uint32_t slot_idx = dex_cache->FieldSlotIndex(i); + auto pair = mirror::DexCache::GetNativePairPtrSize(resolved_fields, slot_idx, target_ptr_size_); + uint32_t stored_index = pair.index; + ArtField* field = pair.object; + if (field != nullptr && i > stored_index) { + continue; // Already checked. + } + // Check if the referenced class is in the image. Note that we want to check the referenced + // class rather than the declaring class to preserve the semantics, i.e. using a FieldId + // results in resolving the referenced class and that can for example throw OOME. + const DexFile::FieldId& field_id = dex_file.GetFieldId(i); + if (field_id.class_idx_ != last_class_idx) { + last_class_idx = field_id.class_idx_; + last_class = class_linker->LookupResolvedType( + dex_file, last_class_idx, dex_cache, class_loader); + if (last_class != nullptr && !KeepClass(last_class)) { + last_class = nullptr; + } + } + if (field == nullptr || i < stored_index) { + if (last_class != nullptr) { + const char* name = dex_file.StringDataByIdx(field_id.name_idx_); + const char* type = dex_file.StringByTypeIdx(field_id.type_idx_); + field = mirror::Class::FindField(Thread::Current(), last_class, name, type); + if (field != nullptr) { + // If the referenced class is in the image, the defining class must also be there. + DCHECK(KeepClass(field->GetDeclaringClass())); + dex_cache->SetResolvedField(i, field, target_ptr_size_); + } + } + } else { + DCHECK_EQ(i, stored_index); + if (last_class == nullptr) { + dex_cache->ClearResolvedField(stored_index, target_ptr_size_); + } + } + } + // Prune types and make the contents of the type array deterministic. + // This is done after fields and methods as their lookup can touch the types array. + for (size_t i = 0, end = dex_cache->GetDexFile()->NumTypeIds(); i < end; ++i) { + dex::TypeIndex type_idx(i); + uint32_t slot_idx = dex_cache->TypeSlotIndex(type_idx); + mirror::TypeDexCachePair pair = + dex_cache->GetResolvedTypes()[slot_idx].load(std::memory_order_relaxed); + uint32_t stored_index = pair.index; + ObjPtr<mirror::Class> klass = pair.object.Read(); + if (klass == nullptr || i < stored_index) { + klass = class_linker->LookupResolvedType(dex_file, type_idx, dex_cache, class_loader); + if (klass != nullptr) { + DCHECK_EQ(dex_cache->GetResolvedType(type_idx), klass); + stored_index = i; // For correct clearing below if not keeping the `klass`. + } + } else if (i == stored_index && !KeepClass(klass)) { + dex_cache->ClearResolvedType(dex::TypeIndex(stored_index)); + } + } + // Strings do not need pruning, but the contents of the string array must be deterministic. + for (size_t i = 0, end = dex_cache->GetDexFile()->NumStringIds(); i < end; ++i) { + dex::StringIndex string_idx(i); + uint32_t slot_idx = dex_cache->StringSlotIndex(string_idx); + mirror::StringDexCachePair pair = + dex_cache->GetStrings()[slot_idx].load(std::memory_order_relaxed); + uint32_t stored_index = pair.index; + ObjPtr<mirror::String> string = pair.object.Read(); + if (string == nullptr || i < stored_index) { + string = class_linker->LookupString(dex_file, string_idx, dex_cache); + DCHECK(string == nullptr || dex_cache->GetResolvedString(string_idx) == string); + } + } +} + void ImageWriter::PruneNonImageClasses() { Runtime* runtime = Runtime::Current(); ClassLinker* class_linker = runtime->GetClassLinker(); Thread* self = Thread::Current(); + ScopedAssertNoThreadSuspension sa(__FUNCTION__); // Clear class table strong roots so that dex caches can get pruned. We require pruning the class // path dex caches. class_linker->ClearClassTableStrongRoots(); // Remove the undesired classes from the class roots. + ObjPtr<mirror::ClassLoader> class_loader; { PruneClassLoaderClassesVisitor class_loader_visitor(this); VisitClassLoaders(&class_loader_visitor); VLOG(compiler) << "Pruned " << class_loader_visitor.GetRemovedClassCount() << " classes"; + class_loader = class_loader_visitor.GetClassLoader(); + DCHECK_EQ(class_loader != nullptr, compile_app_image_); } // Clear references to removed classes from the DexCaches. - ArtMethod* resolution_method = runtime->GetResolutionMethod(); - - ScopedAssertNoThreadSuspension sa(__FUNCTION__); - ReaderMutexLock mu(self, *Locks::classlinker_classes_lock_); // For ClassInClassTable - ReaderMutexLock mu2(self, *Locks::dex_lock_); - for (const ClassLinker::DexCacheData& data : class_linker->GetDexCachesData()) { - if (self->IsJWeakCleared(data.weak_root)) { - continue; - } - ObjPtr<mirror::DexCache> dex_cache = self->DecodeJObject(data.weak_root)->AsDexCache(); - for (size_t i = 0; i < dex_cache->NumResolvedTypes(); i++) { - mirror::TypeDexCachePair pair = - dex_cache->GetResolvedTypes()[i].load(std::memory_order_relaxed); - mirror::Class* klass = pair.object.Read(); - if (klass != nullptr && !KeepClass(klass)) { - dex_cache->ClearResolvedType(dex::TypeIndex(pair.index)); - } - } - ArtMethod** resolved_methods = dex_cache->GetResolvedMethods(); - for (size_t i = 0, num = dex_cache->NumResolvedMethods(); i != num; ++i) { - ArtMethod* method = - mirror::DexCache::GetElementPtrSize(resolved_methods, i, target_ptr_size_); - DCHECK(method != nullptr) << "Expected resolution method instead of null method"; - mirror::Class* declaring_class = method->GetDeclaringClass(); - // Copied methods may be held live by a class which was not an image class but have a - // declaring class which is an image class. Set it to the resolution method to be safe and - // prevent dangling pointers. - if (method->IsCopied() || !KeepClass(declaring_class)) { - mirror::DexCache::SetElementPtrSize(resolved_methods, - i, - resolution_method, - target_ptr_size_); - } else { - // Check that the class is still in the classes table. - DCHECK(class_linker->ClassInClassTable(declaring_class)) << "Class " - << Class::PrettyClass(declaring_class) << " not in class linker table"; - } - } - mirror::FieldDexCacheType* resolved_fields = dex_cache->GetResolvedFields(); - for (size_t i = 0; i < dex_cache->NumResolvedFields(); i++) { - auto pair = mirror::DexCache::GetNativePairPtrSize(resolved_fields, i, target_ptr_size_); - ArtField* field = pair.object; - if (field != nullptr && !KeepClass(field->GetDeclaringClass().Ptr())) { - dex_cache->ClearResolvedField(pair.index, target_ptr_size_); + std::vector<ObjPtr<mirror::DexCache>> dex_caches; + { + ReaderMutexLock mu2(self, *Locks::dex_lock_); + dex_caches.reserve(class_linker->GetDexCachesData().size()); + for (const ClassLinker::DexCacheData& data : class_linker->GetDexCachesData()) { + if (self->IsJWeakCleared(data.weak_root)) { + continue; } + dex_caches.push_back(self->DecodeJObject(data.weak_root)->AsDexCache()); } } + for (ObjPtr<mirror::DexCache> dex_cache : dex_caches) { + PruneAndPreloadDexCache(dex_cache, class_loader); + } // Drop the array class cache in the ClassLinker, as these are roots holding those classes live. class_linker->DropFindArrayClassCache(); diff --git a/compiler/image_writer.h b/compiler/image_writer.h index bdc7146632..16aff61dab 100644 --- a/compiler/image_writer.h +++ b/compiler/image_writer.h @@ -376,7 +376,7 @@ class ImageWriter FINAL { } // Returns true if the class was in the original requested image classes list. - bool KeepClass(mirror::Class* klass) REQUIRES_SHARED(Locks::mutator_lock_); + bool KeepClass(ObjPtr<mirror::Class> klass) REQUIRES_SHARED(Locks::mutator_lock_); // Debug aid that list of requested image classes. void DumpImageClasses(); @@ -391,6 +391,12 @@ class ImageWriter FINAL { // Remove unwanted classes from various roots. void PruneNonImageClasses() REQUIRES_SHARED(Locks::mutator_lock_); + // Remove unwanted classes from the DexCache roots and preload deterministic DexCache contents. + void PruneAndPreloadDexCache(ObjPtr<mirror::DexCache> dex_cache, + ObjPtr<mirror::ClassLoader> class_loader) + REQUIRES_SHARED(Locks::mutator_lock_) + REQUIRES(!Locks::classlinker_classes_lock_); + // Verify unwanted classes removed. void CheckNonImageClassesRemoved() REQUIRES_SHARED(Locks::mutator_lock_); static void CheckNonImageClassesRemovedCallback(mirror::Object* obj, void* arg) @@ -473,11 +479,11 @@ class ImageWriter FINAL { // we also cannot have any classes which refer to these boot class loader non image classes. // PruneAppImageClass also prunes if klass depends on a non-image class according to the compiler // driver. - bool PruneAppImageClass(mirror::Class* klass) + bool PruneAppImageClass(ObjPtr<mirror::Class> klass) REQUIRES_SHARED(Locks::mutator_lock_); // early_exit is true if we had a cyclic dependency anywhere down the chain. - bool PruneAppImageClassInternal(mirror::Class* klass, + bool PruneAppImageClassInternal(ObjPtr<mirror::Class> klass, bool* early_exit, std::unordered_set<mirror::Class*>* visited) REQUIRES_SHARED(Locks::mutator_lock_); diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc index 8e25aa3421..105db1d2d0 100644 --- a/compiler/oat_writer.cc +++ b/compiler/oat_writer.cc @@ -1440,12 +1440,10 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor { mirror::String* GetTargetString(const LinkerPatch& patch) REQUIRES_SHARED(Locks::mutator_lock_) { ScopedObjectAccessUnchecked soa(Thread::Current()); - StackHandleScope<1> hs(soa.Self()); ClassLinker* linker = Runtime::Current()->GetClassLinker(); - Handle<mirror::DexCache> dex_cache(hs.NewHandle(GetDexCache(patch.TargetStringDexFile()))); mirror::String* string = linker->LookupString(*patch.TargetStringDexFile(), patch.TargetStringIndex(), - dex_cache); + GetDexCache(patch.TargetStringDexFile())); DCHECK(string != nullptr); DCHECK(writer_->HasBootImage() || Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(string)); diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 0b50619a66..4db4796985 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -183,10 +183,13 @@ class SuspendCheckSlowPathX86 : public SlowPathCode { : SlowPathCode(instruction), successor_(successor) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + LocationSummary* locations = instruction_->GetLocations(); CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen); __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, locations); // only saves full width XMM for SIMD x86_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this); CheckEntrypointTypes<kQuickTestSuspend, void, void>(); + RestoreLiveRegisters(codegen, locations); // only saves full width XMM for SIMD if (successor_ == nullptr) { __ jmp(GetReturnLabel()); } else { @@ -963,12 +966,20 @@ size_t CodeGeneratorX86::RestoreCoreRegister(size_t stack_index, uint32_t reg_id } size_t CodeGeneratorX86::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) { - __ movsd(Address(ESP, stack_index), XmmRegister(reg_id)); + if (GetGraph()->HasSIMD()) { + __ movups(Address(ESP, stack_index), XmmRegister(reg_id)); + } else { + __ movsd(Address(ESP, stack_index), XmmRegister(reg_id)); + } return GetFloatingPointSpillSlotSize(); } size_t CodeGeneratorX86::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) { - __ movsd(XmmRegister(reg_id), Address(ESP, stack_index)); + if (GetGraph()->HasSIMD()) { + __ movups(XmmRegister(reg_id), Address(ESP, stack_index)); + } else { + __ movsd(XmmRegister(reg_id), Address(ESP, stack_index)); + } return GetFloatingPointSpillSlotSize(); } @@ -5699,7 +5710,11 @@ void InstructionCodeGeneratorX86::VisitParallelMove(HParallelMove* instruction) void LocationsBuilderX86::VisitSuspendCheck(HSuspendCheck* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath); - locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. + // In suspend check slow path, usually there are no caller-save registers at all. + // If SIMD instructions are present, however, we force spilling all live SIMD + // registers in full width (since the runtime only saves/restores lower part). + locations->SetCustomSlowPathCallerSaves( + GetGraph()->HasSIMD() ? RegisterSet::AllFpu() : RegisterSet::Empty()); } void InstructionCodeGeneratorX86::VisitSuspendCheck(HSuspendCheck* instruction) { @@ -5802,9 +5817,11 @@ void ParallelMoveResolverX86::EmitMove(size_t index) { __ movd(destination.AsRegisterPairHigh<Register>(), src_reg); } else if (destination.IsStackSlot()) { __ movss(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>()); - } else { - DCHECK(destination.IsDoubleStackSlot()); + } else if (destination.IsDoubleStackSlot()) { __ movsd(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>()); + } else { + DCHECK(destination.IsSIMDStackSlot()); + __ movups(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>()); } } else if (source.IsStackSlot()) { if (destination.IsRegister()) { @@ -5826,6 +5843,9 @@ void ParallelMoveResolverX86::EmitMove(size_t index) { DCHECK(destination.IsDoubleStackSlot()) << destination; MoveMemoryToMemory64(destination.GetStackIndex(), source.GetStackIndex()); } + } else if (source.IsSIMDStackSlot()) { + DCHECK(destination.IsFpuRegister()); + __ movups(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex())); } else if (source.IsConstant()) { HConstant* constant = source.GetConstant(); if (constant->IsIntConstant() || constant->IsNullConstant()) { diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index 65ee383b54..ca3a9eadd2 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -348,8 +348,9 @@ class CodeGeneratorX86 : public CodeGenerator { } size_t GetFloatingPointSpillSlotSize() const OVERRIDE { - // 8 bytes == 2 words for each spill. - return 2 * kX86WordSize; + return GetGraph()->HasSIMD() + ? 4 * kX86WordSize // 16 bytes == 4 words for each spill + : 2 * kX86WordSize; // 8 bytes == 2 words for each spill } HGraphVisitor* GetLocationBuilder() OVERRIDE { diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 08f1adfcff..2ffc398287 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -140,10 +140,13 @@ class SuspendCheckSlowPathX86_64 : public SlowPathCode { : SlowPathCode(instruction), successor_(successor) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + LocationSummary* locations = instruction_->GetLocations(); CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, locations); // only saves full width XMM for SIMD x86_64_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this); CheckEntrypointTypes<kQuickTestSuspend, void, void>(); + RestoreLiveRegisters(codegen, locations); // only saves full width XMM for SIMD if (successor_ == nullptr) { __ jmp(GetReturnLabel()); } else { @@ -1158,13 +1161,21 @@ size_t CodeGeneratorX86_64::RestoreCoreRegister(size_t stack_index, uint32_t reg } size_t CodeGeneratorX86_64::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) { - __ movsd(Address(CpuRegister(RSP), stack_index), XmmRegister(reg_id)); - return kX86_64WordSize; + if (GetGraph()->HasSIMD()) { + __ movups(Address(CpuRegister(RSP), stack_index), XmmRegister(reg_id)); + } else { + __ movsd(Address(CpuRegister(RSP), stack_index), XmmRegister(reg_id)); + } + return GetFloatingPointSpillSlotSize(); } size_t CodeGeneratorX86_64::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) { - __ movsd(XmmRegister(reg_id), Address(CpuRegister(RSP), stack_index)); - return kX86_64WordSize; + if (GetGraph()->HasSIMD()) { + __ movups(XmmRegister(reg_id), Address(CpuRegister(RSP), stack_index)); + } else { + __ movsd(XmmRegister(reg_id), Address(CpuRegister(RSP), stack_index)); + } + return GetFloatingPointSpillSlotSize(); } void CodeGeneratorX86_64::InvokeRuntime(QuickEntrypointEnum entrypoint, @@ -5152,7 +5163,11 @@ void InstructionCodeGeneratorX86_64::VisitParallelMove(HParallelMove* instructio void LocationsBuilderX86_64::VisitSuspendCheck(HSuspendCheck* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath); - locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. + // In suspend check slow path, usually there are no caller-save registers at all. + // If SIMD instructions are present, however, we force spilling all live SIMD + // registers in full width (since the runtime only saves/restores lower part). + locations->SetCustomSlowPathCallerSaves( + GetGraph()->HasSIMD() ? RegisterSet::AllFpu() : RegisterSet::Empty()); } void InstructionCodeGeneratorX86_64::VisitSuspendCheck(HSuspendCheck* instruction) { @@ -5241,6 +5256,10 @@ void ParallelMoveResolverX86_64::EmitMove(size_t index) { __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex())); __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP)); } + } else if (source.IsSIMDStackSlot()) { + DCHECK(destination.IsFpuRegister()); + __ movups(destination.AsFpuRegister<XmmRegister>(), + Address(CpuRegister(RSP), source.GetStackIndex())); } else if (source.IsConstant()) { HConstant* constant = source.GetConstant(); if (constant->IsIntConstant() || constant->IsNullConstant()) { @@ -5291,10 +5310,13 @@ void ParallelMoveResolverX86_64::EmitMove(size_t index) { } else if (destination.IsStackSlot()) { __ movss(Address(CpuRegister(RSP), destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>()); - } else { - DCHECK(destination.IsDoubleStackSlot()) << destination; + } else if (destination.IsDoubleStackSlot()) { __ movsd(Address(CpuRegister(RSP), destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>()); + } else { + DCHECK(destination.IsSIMDStackSlot()); + __ movups(Address(CpuRegister(RSP), destination.GetStackIndex()), + source.AsFpuRegister<XmmRegister>()); } } } diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index 376c3ce381..c8336dabd9 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -326,7 +326,9 @@ class CodeGeneratorX86_64 : public CodeGenerator { } size_t GetFloatingPointSpillSlotSize() const OVERRIDE { - return kX86_64WordSize; + return GetGraph()->HasSIMD() + ? 2 * kX86_64WordSize // 16 bytes == 2 x86_64 words for each spill + : 1 * kX86_64WordSize; // 8 bytes == 1 x86_64 words for each spill } HGraphVisitor* GetLocationBuilder() OVERRIDE { diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc index 2bf5c53e17..0dfae11465 100644 --- a/compiler/optimizing/graph_visualizer.cc +++ b/compiler/optimizing/graph_visualizer.cc @@ -322,9 +322,11 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { codegen_.DumpCoreRegister(stream, location.high()); } else if (location.IsUnallocated()) { stream << "unallocated"; - } else { - DCHECK(location.IsDoubleStackSlot()); + } else if (location.IsDoubleStackSlot()) { stream << "2x" << location.GetStackIndex() << "(sp)"; + } else { + DCHECK(location.IsSIMDStackSlot()); + stream << "4x" << location.GetStackIndex() << "(sp)"; } } diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc index ba006edfa2..bf85b1989e 100644 --- a/compiler/optimizing/intrinsics_mips.cc +++ b/compiler/optimizing/intrinsics_mips.cc @@ -2559,7 +2559,7 @@ void IntrinsicCodeGeneratorMIPS::VisitMathRoundFloat(HInvoke* invoke) { // void java.lang.String.getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin) void IntrinsicLocationsBuilderMIPS::VisitStringGetCharsNoCheck(HInvoke* invoke) { LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCallOnMainOnly, + LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); @@ -2567,17 +2567,9 @@ void IntrinsicLocationsBuilderMIPS::VisitStringGetCharsNoCheck(HInvoke* invoke) locations->SetInAt(3, Location::RequiresRegister()); locations->SetInAt(4, Location::RequiresRegister()); - // We will call memcpy() to do the actual work. Allocate the temporary - // registers to use the correct input registers, and output register. - // memcpy() uses the normal MIPS calling convention. - InvokeRuntimeCallingConvention calling_convention; - - locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(1))); - locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(2))); - - Location outLocation = calling_convention.GetReturnLocation(Primitive::kPrimInt); - locations->AddTemp(Location::RegisterLocation(outLocation.AsRegister<Register>())); + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); } void IntrinsicCodeGeneratorMIPS::VisitStringGetCharsNoCheck(HInvoke* invoke) { @@ -2596,16 +2588,11 @@ void IntrinsicCodeGeneratorMIPS::VisitStringGetCharsNoCheck(HInvoke* invoke) { Register dstBegin = locations->InAt(4).AsRegister<Register>(); Register dstPtr = locations->GetTemp(0).AsRegister<Register>(); - DCHECK_EQ(dstPtr, A0); Register srcPtr = locations->GetTemp(1).AsRegister<Register>(); - DCHECK_EQ(srcPtr, A1); Register numChrs = locations->GetTemp(2).AsRegister<Register>(); - DCHECK_EQ(numChrs, A2); - - Register dstReturn = locations->GetTemp(3).AsRegister<Register>(); - DCHECK_EQ(dstReturn, V0); MipsLabel done; + MipsLabel loop; // Location of data in char array buffer. const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value(); @@ -2634,7 +2621,7 @@ void IntrinsicCodeGeneratorMIPS::VisitStringGetCharsNoCheck(HInvoke* invoke) { __ LoadFromOffset(kLoadWord, TMP, srcObj, count_offset); __ Sll(TMP, TMP, 31); - // If string is uncompressed, use memcpy() path. + // If string is uncompressed, use uncompressed path. __ Bnez(TMP, &uncompressed_copy); // Copy loop for compressed src, copying 1 character (8-bit) to (16-bit) at a time. @@ -2660,10 +2647,13 @@ void IntrinsicCodeGeneratorMIPS::VisitStringGetCharsNoCheck(HInvoke* invoke) { __ Addu(srcPtr, srcPtr, AT); } - // Calculate number of bytes to copy from number of characters. - __ Sll(numChrs, numChrs, char_shift); - - codegen_->InvokeRuntime(kQuickMemcpy, invoke, invoke->GetDexPc(), nullptr); + __ Bind(&loop); + __ Lh(AT, srcPtr, 0); + __ Addiu(numChrs, numChrs, -1); + __ Addiu(srcPtr, srcPtr, char_size); + __ Sh(AT, dstPtr, 0); + __ Addiu(dstPtr, dstPtr, char_size); + __ Bnez(numChrs, &loop); __ Bind(&done); } diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc index 21c5074a1c..1ee89cf127 100644 --- a/compiler/optimizing/intrinsics_mips64.cc +++ b/compiler/optimizing/intrinsics_mips64.cc @@ -1895,7 +1895,7 @@ void IntrinsicCodeGeneratorMIPS64::VisitDoubleIsInfinite(HInvoke* invoke) { // void java.lang.String.getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin) void IntrinsicLocationsBuilderMIPS64::VisitStringGetCharsNoCheck(HInvoke* invoke) { LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCallOnMainOnly, + LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); @@ -1903,17 +1903,9 @@ void IntrinsicLocationsBuilderMIPS64::VisitStringGetCharsNoCheck(HInvoke* invoke locations->SetInAt(3, Location::RequiresRegister()); locations->SetInAt(4, Location::RequiresRegister()); - // We will call memcpy() to do the actual work. Allocate the temporary - // registers to use the correct input registers, and output register. - // memcpy() uses the normal MIPS calling conventions. - InvokeRuntimeCallingConvention calling_convention; - - locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(1))); - locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(2))); - - Location outLocation = calling_convention.GetReturnLocation(Primitive::kPrimLong); - locations->AddTemp(Location::RegisterLocation(outLocation.AsRegister<GpuRegister>())); + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); } void IntrinsicCodeGeneratorMIPS64::VisitStringGetCharsNoCheck(HInvoke* invoke) { @@ -1932,16 +1924,11 @@ void IntrinsicCodeGeneratorMIPS64::VisitStringGetCharsNoCheck(HInvoke* invoke) { GpuRegister dstBegin = locations->InAt(4).AsRegister<GpuRegister>(); GpuRegister dstPtr = locations->GetTemp(0).AsRegister<GpuRegister>(); - DCHECK_EQ(dstPtr, A0); GpuRegister srcPtr = locations->GetTemp(1).AsRegister<GpuRegister>(); - DCHECK_EQ(srcPtr, A1); GpuRegister numChrs = locations->GetTemp(2).AsRegister<GpuRegister>(); - DCHECK_EQ(numChrs, A2); - - GpuRegister dstReturn = locations->GetTemp(3).AsRegister<GpuRegister>(); - DCHECK_EQ(dstReturn, V0); Mips64Label done; + Mips64Label loop; // Location of data in char array buffer. const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value(); @@ -1965,7 +1952,7 @@ void IntrinsicCodeGeneratorMIPS64::VisitStringGetCharsNoCheck(HInvoke* invoke) { __ LoadFromOffset(kLoadWord, TMP, srcObj, count_offset); __ Dext(TMP, TMP, 0, 1); - // If string is uncompressed, use memcpy() path. + // If string is uncompressed, use uncompressed path. __ Bnezc(TMP, &uncompressed_copy); // Copy loop for compressed src, copying 1 character (8-bit) to (16-bit) at a time. @@ -1986,10 +1973,13 @@ void IntrinsicCodeGeneratorMIPS64::VisitStringGetCharsNoCheck(HInvoke* invoke) { __ Daddiu(srcPtr, srcObj, value_offset); __ Dlsa(srcPtr, srcBegin, srcPtr, char_shift); - // Calculate number of bytes to copy from number of characters. - __ Dsll(numChrs, numChrs, char_shift); - - codegen_->InvokeRuntime(kQuickMemcpy, invoke, invoke->GetDexPc(), nullptr); + __ Bind(&loop); + __ Lh(AT, srcPtr, 0); + __ Daddiu(numChrs, numChrs, -1); + __ Daddiu(srcPtr, srcPtr, char_size); + __ Sh(AT, dstPtr, 0); + __ Daddiu(dstPtr, dstPtr, char_size); + __ Bnezc(numChrs, &loop); __ Bind(&done); } diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h index 091b58a63d..6f0dbce2df 100644 --- a/compiler/optimizing/locations.h +++ b/compiler/optimizing/locations.h @@ -69,11 +69,13 @@ class Location : public ValueObject { // We do not use the value 9 because it conflicts with kLocationConstantMask. kDoNotUse9 = 9, + kSIMDStackSlot = 10, // 128bit stack slot. TODO: generalize with encoded #bytes? + // Unallocated location represents a location that is not fixed and can be // allocated by a register allocator. Each unallocated location has // a policy that specifies what kind of location is suitable. Payload // contains register allocation policy. - kUnallocated = 10, + kUnallocated = 11, }; Location() : ValueObject(), value_(kInvalid) { @@ -82,6 +84,7 @@ class Location : public ValueObject { static_assert((kUnallocated & kLocationConstantMask) != kConstant, "TagError"); static_assert((kStackSlot & kLocationConstantMask) != kConstant, "TagError"); static_assert((kDoubleStackSlot & kLocationConstantMask) != kConstant, "TagError"); + static_assert((kSIMDStackSlot & kLocationConstantMask) != kConstant, "TagError"); static_assert((kRegister & kLocationConstantMask) != kConstant, "TagError"); static_assert((kFpuRegister & kLocationConstantMask) != kConstant, "TagError"); static_assert((kRegisterPair & kLocationConstantMask) != kConstant, "TagError"); @@ -266,8 +269,20 @@ class Location : public ValueObject { return GetKind() == kDoubleStackSlot; } + static Location SIMDStackSlot(intptr_t stack_index) { + uintptr_t payload = EncodeStackIndex(stack_index); + Location loc(kSIMDStackSlot, payload); + // Ensure that sign is preserved. + DCHECK_EQ(loc.GetStackIndex(), stack_index); + return loc; + } + + bool IsSIMDStackSlot() const { + return GetKind() == kSIMDStackSlot; + } + intptr_t GetStackIndex() const { - DCHECK(IsStackSlot() || IsDoubleStackSlot()); + DCHECK(IsStackSlot() || IsDoubleStackSlot() || IsSIMDStackSlot()); // Decode stack index manually to preserve sign. return GetPayload() - kStackIndexBias; } @@ -315,6 +330,7 @@ class Location : public ValueObject { case kRegister: return "R"; case kStackSlot: return "S"; case kDoubleStackSlot: return "DS"; + case kSIMDStackSlot: return "SIMD"; case kUnallocated: return "U"; case kConstant: return "C"; case kFpuRegister: return "F"; @@ -417,6 +433,7 @@ std::ostream& operator<<(std::ostream& os, const Location::Policy& rhs); class RegisterSet : public ValueObject { public: static RegisterSet Empty() { return RegisterSet(); } + static RegisterSet AllFpu() { return RegisterSet(0, -1); } void Add(Location loc) { if (loc.IsRegister()) { @@ -462,6 +479,7 @@ class RegisterSet : public ValueObject { private: RegisterSet() : core_registers_(0), floating_point_registers_(0) {} + RegisterSet(uint32_t core, uint32_t fp) : core_registers_(core), floating_point_registers_(fp) {} uint32_t core_registers_; uint32_t floating_point_registers_; diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index 020e4463d4..ec706e6694 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -2046,6 +2046,9 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { if (HasTryCatch()) { outer_graph->SetHasTryCatch(true); } + if (HasSIMD()) { + outer_graph->SetHasSIMD(true); + } HInstruction* return_value = nullptr; if (GetBlocks().size() == 3) { diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 542b218cf8..6881d8f6ae 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -323,6 +323,7 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { temporaries_vreg_slots_(0), has_bounds_checks_(false), has_try_catch_(false), + has_simd_(false), has_loops_(false), has_irreducible_loops_(false), debuggable_(debuggable), @@ -560,6 +561,9 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { bool HasTryCatch() const { return has_try_catch_; } void SetHasTryCatch(bool value) { has_try_catch_ = value; } + bool HasSIMD() const { return has_simd_; } + void SetHasSIMD(bool value) { has_simd_ = value; } + bool HasLoops() const { return has_loops_; } void SetHasLoops(bool value) { has_loops_ = value; } @@ -652,6 +656,11 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { // false positives. bool has_try_catch_; + // Flag whether SIMD instructions appear in the graph. If true, the + // code generators may have to be more careful spilling the wider + // contents of SIMD registers. + bool has_simd_; + // Flag whether there are any loops in the graph. We can skip loop // optimization if it's false. It's only best effort to keep it up // to date in the presence of code elimination so there might be false diff --git a/compiler/optimizing/register_allocation_resolver.cc b/compiler/optimizing/register_allocation_resolver.cc index 8a9c1ccaff..c6a0b6a0d2 100644 --- a/compiler/optimizing/register_allocation_resolver.cc +++ b/compiler/optimizing/register_allocation_resolver.cc @@ -299,11 +299,14 @@ void RegisterAllocationResolver::ConnectSiblings(LiveInterval* interval) { // Currently, we spill unconditionnally the current method in the code generators. && !interval->GetDefinedBy()->IsCurrentMethod()) { // We spill eagerly, so move must be at definition. - InsertMoveAfter(interval->GetDefinedBy(), - interval->ToLocation(), - interval->NeedsTwoSpillSlots() - ? Location::DoubleStackSlot(interval->GetParent()->GetSpillSlot()) - : Location::StackSlot(interval->GetParent()->GetSpillSlot())); + Location loc; + switch (interval->NumberOfSpillSlotsNeeded()) { + case 1: loc = Location::StackSlot(interval->GetParent()->GetSpillSlot()); break; + case 2: loc = Location::DoubleStackSlot(interval->GetParent()->GetSpillSlot()); break; + case 4: loc = Location::SIMDStackSlot(interval->GetParent()->GetSpillSlot()); break; + default: LOG(FATAL) << "Unexpected number of spill slots"; UNREACHABLE(); + } + InsertMoveAfter(interval->GetDefinedBy(), interval->ToLocation(), loc); } UsePosition* use = current->GetFirstUse(); EnvUsePosition* env_use = current->GetFirstEnvironmentUse(); @@ -459,9 +462,12 @@ void RegisterAllocationResolver::ConnectSplitSiblings(LiveInterval* interval, location_source = defined_by->GetLocations()->Out(); } else { DCHECK(defined_by->IsCurrentMethod()); - location_source = parent->NeedsTwoSpillSlots() - ? Location::DoubleStackSlot(parent->GetSpillSlot()) - : Location::StackSlot(parent->GetSpillSlot()); + switch (parent->NumberOfSpillSlotsNeeded()) { + case 1: location_source = Location::StackSlot(parent->GetSpillSlot()); break; + case 2: location_source = Location::DoubleStackSlot(parent->GetSpillSlot()); break; + case 4: location_source = Location::SIMDStackSlot(parent->GetSpillSlot()); break; + default: LOG(FATAL) << "Unexpected number of spill slots"; UNREACHABLE(); + } } } else { DCHECK(source != nullptr); @@ -492,7 +498,8 @@ static bool IsValidDestination(Location destination) { || destination.IsFpuRegister() || destination.IsFpuRegisterPair() || destination.IsStackSlot() - || destination.IsDoubleStackSlot(); + || destination.IsDoubleStackSlot() + || destination.IsSIMDStackSlot(); } void RegisterAllocationResolver::AddMove(HParallelMove* move, diff --git a/compiler/optimizing/register_allocator_graph_color.cc b/compiler/optimizing/register_allocator_graph_color.cc index 9064f865c3..87f709f63d 100644 --- a/compiler/optimizing/register_allocator_graph_color.cc +++ b/compiler/optimizing/register_allocator_graph_color.cc @@ -1029,7 +1029,7 @@ void RegisterAllocatorGraphColor::AllocateSpillSlotForCatchPhi(HInstruction* ins interval->SetSpillSlot(previous_phi->GetLiveInterval()->GetSpillSlot()); } else { interval->SetSpillSlot(catch_phi_spill_slot_counter_); - catch_phi_spill_slot_counter_ += interval->NeedsTwoSpillSlots() ? 2 : 1; + catch_phi_spill_slot_counter_ += interval->NumberOfSpillSlotsNeeded(); } } } @@ -1996,43 +1996,48 @@ void RegisterAllocatorGraphColor::ColorSpillSlots(ArenaVector<LiveInterval*>* in bool is_interval_beginning; size_t position; std::tie(position, is_interval_beginning, parent_interval) = *it; - - bool needs_two_slots = parent_interval->NeedsTwoSpillSlots(); + size_t number_of_spill_slots_needed = parent_interval->NumberOfSpillSlotsNeeded(); if (is_interval_beginning) { DCHECK(!parent_interval->HasSpillSlot()); DCHECK_EQ(position, parent_interval->GetStart()); - // Find a free stack slot. + // Find first available free stack slot(s). size_t slot = 0; - for (; taken.IsBitSet(slot) || (needs_two_slots && taken.IsBitSet(slot + 1)); ++slot) { - // Skip taken slots. + for (; ; ++slot) { + bool found = true; + for (size_t s = slot, u = slot + number_of_spill_slots_needed; s < u; s++) { + if (taken.IsBitSet(s)) { + found = false; + break; // failure + } + } + if (found) { + break; // success + } } + parent_interval->SetSpillSlot(slot); - *num_stack_slots_used = std::max(*num_stack_slots_used, - needs_two_slots ? slot + 1 : slot + 2); - if (needs_two_slots && *num_stack_slots_used % 2 != 0) { + *num_stack_slots_used = std::max(*num_stack_slots_used, slot + number_of_spill_slots_needed); + if (number_of_spill_slots_needed > 1 && *num_stack_slots_used % 2 != 0) { // The parallel move resolver requires that there be an even number of spill slots // allocated for pair value types. ++(*num_stack_slots_used); } - taken.SetBit(slot); - if (needs_two_slots) { - taken.SetBit(slot + 1); + for (size_t s = slot, u = slot + number_of_spill_slots_needed; s < u; s++) { + taken.SetBit(s); } } else { DCHECK_EQ(position, parent_interval->GetLastSibling()->GetEnd()); DCHECK(parent_interval->HasSpillSlot()); - // Free up the stack slot used by this interval. + // Free up the stack slot(s) used by this interval. size_t slot = parent_interval->GetSpillSlot(); - DCHECK(taken.IsBitSet(slot)); - DCHECK(!needs_two_slots || taken.IsBitSet(slot + 1)); - taken.ClearBit(slot); - if (needs_two_slots) { - taken.ClearBit(slot + 1); + for (size_t s = slot, u = slot + number_of_spill_slots_needed; s < u; s++) { + DCHECK(taken.IsBitSet(s)); + taken.ClearBit(s); } } } diff --git a/compiler/optimizing/register_allocator_linear_scan.cc b/compiler/optimizing/register_allocator_linear_scan.cc index 6354e76ec8..ab8d540359 100644 --- a/compiler/optimizing/register_allocator_linear_scan.cc +++ b/compiler/optimizing/register_allocator_linear_scan.cc @@ -1125,36 +1125,31 @@ void RegisterAllocatorLinearScan::AllocateSpillSlotFor(LiveInterval* interval) { LOG(FATAL) << "Unexpected type for interval " << interval->GetType(); } - // Find an available spill slot. + // Find first available spill slots. + size_t number_of_spill_slots_needed = parent->NumberOfSpillSlotsNeeded(); size_t slot = 0; for (size_t e = spill_slots->size(); slot < e; ++slot) { - if ((*spill_slots)[slot] <= parent->GetStart()) { - if (!parent->NeedsTwoSpillSlots()) { - // One spill slot is sufficient. - break; - } - if (slot == e - 1 || (*spill_slots)[slot + 1] <= parent->GetStart()) { - // Two spill slots are available. + bool found = true; + for (size_t s = slot, u = std::min(slot + number_of_spill_slots_needed, e); s < u; s++) { + if ((*spill_slots)[s] > parent->GetStart()) { + found = false; // failure break; } } + if (found) { + break; // success + } } + // Need new spill slots? + size_t upper = slot + number_of_spill_slots_needed; + if (upper > spill_slots->size()) { + spill_slots->resize(upper); + } + // Set slots to end. size_t end = interval->GetLastSibling()->GetEnd(); - if (parent->NeedsTwoSpillSlots()) { - if (slot + 2u > spill_slots->size()) { - // We need a new spill slot. - spill_slots->resize(slot + 2u, end); - } - (*spill_slots)[slot] = end; - (*spill_slots)[slot + 1] = end; - } else { - if (slot == spill_slots->size()) { - // We need a new spill slot. - spill_slots->push_back(end); - } else { - (*spill_slots)[slot] = end; - } + for (size_t s = slot; s < upper; s++) { + (*spill_slots)[s] = end; } // Note that the exact spill slot location will be computed when we resolve, @@ -1180,7 +1175,7 @@ void RegisterAllocatorLinearScan::AllocateSpillSlotForCatchPhi(HPhi* phi) { // TODO: Reuse spill slots when intervals of phis from different catch // blocks do not overlap. interval->SetSpillSlot(catch_phi_spill_slots_); - catch_phi_spill_slots_ += interval->NeedsTwoSpillSlots() ? 2 : 1; + catch_phi_spill_slots_ += interval->NumberOfSpillSlotsNeeded(); } } diff --git a/compiler/optimizing/sharpening.cc b/compiler/optimizing/sharpening.cc index 7bd38c7a8c..eedaf6e67e 100644 --- a/compiler/optimizing/sharpening.cc +++ b/compiler/optimizing/sharpening.cc @@ -259,7 +259,7 @@ void HSharpening::ProcessLoadString(HLoadString* load_string) { } else if (runtime->UseJitCompilation()) { // TODO: Make sure we don't set the "compile PIC" flag for JIT as that's bogus. // DCHECK(!codegen_->GetCompilerOptions().GetCompilePic()); - string = class_linker->LookupString(dex_file, string_index, dex_cache); + string = class_linker->LookupString(dex_file, string_index, dex_cache.Get()); if (string != nullptr) { if (runtime->GetHeap()->ObjectIsInBootImageSpace(string)) { desired_load_kind = HLoadString::LoadKind::kBootImageAddress; @@ -271,7 +271,7 @@ void HSharpening::ProcessLoadString(HLoadString* load_string) { } } else { // AOT app compilation. Try to lookup the string without allocating if not found. - string = class_linker->LookupString(dex_file, string_index, dex_cache); + string = class_linker->LookupString(dex_file, string_index, dex_cache.Get()); if (string != nullptr && runtime->GetHeap()->ObjectIsInBootImageSpace(string) && !codegen_->GetCompilerOptions().GetCompilePic()) { diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc index e8e12e1a55..36ee5a903a 100644 --- a/compiler/optimizing/ssa_liveness_analysis.cc +++ b/compiler/optimizing/ssa_liveness_analysis.cc @@ -469,8 +469,10 @@ bool LiveInterval::SameRegisterKind(Location other) const { } } -bool LiveInterval::NeedsTwoSpillSlots() const { - return type_ == Primitive::kPrimLong || type_ == Primitive::kPrimDouble; +size_t LiveInterval::NumberOfSpillSlotsNeeded() const { + // TODO: detect vector operation. + // Return number of needed spill slots based on type. + return (type_ == Primitive::kPrimLong || type_ == Primitive::kPrimDouble) ? 2 : 1; } Location LiveInterval::ToLocation() const { @@ -494,10 +496,11 @@ Location LiveInterval::ToLocation() const { if (defined_by->IsConstant()) { return defined_by->GetLocations()->Out(); } else if (GetParent()->HasSpillSlot()) { - if (NeedsTwoSpillSlots()) { - return Location::DoubleStackSlot(GetParent()->GetSpillSlot()); - } else { - return Location::StackSlot(GetParent()->GetSpillSlot()); + switch (NumberOfSpillSlotsNeeded()) { + case 1: return Location::StackSlot(GetParent()->GetSpillSlot()); + case 2: return Location::DoubleStackSlot(GetParent()->GetSpillSlot()); + case 4: return Location::SIMDStackSlot(GetParent()->GetSpillSlot()); + default: LOG(FATAL) << "Unexpected number of spill slots"; UNREACHABLE(); } } else { return Location(); diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h index 340d0ccefe..e9dffc1fac 100644 --- a/compiler/optimizing/ssa_liveness_analysis.h +++ b/compiler/optimizing/ssa_liveness_analysis.h @@ -762,9 +762,9 @@ class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> { // Returns kNoRegister otherwise. int FindHintAtDefinition() const; - // Returns whether the interval needs two (Dex virtual register size `kVRegSize`) - // slots for spilling. - bool NeedsTwoSpillSlots() const; + // Returns the number of required spilling slots (measured as a multiple of the + // Dex virtual register size `kVRegSize`). + size_t NumberOfSpillSlotsNeeded() const; bool IsFloatingPoint() const { return type_ == Primitive::kPrimFloat || type_ == Primitive::kPrimDouble; |