summaryrefslogtreecommitdiff
path: root/compiler
diff options
context:
space:
mode:
Diffstat (limited to 'compiler')
-rw-r--r--compiler/image_writer.cc201
-rw-r--r--compiler/image_writer.h12
-rw-r--r--compiler/oat_writer.cc4
-rw-r--r--compiler/optimizing/code_generator_x86.cc30
-rw-r--r--compiler/optimizing/code_generator_x86.h5
-rw-r--r--compiler/optimizing/code_generator_x86_64.cc36
-rw-r--r--compiler/optimizing/code_generator_x86_64.h4
-rw-r--r--compiler/optimizing/graph_visualizer.cc6
-rw-r--r--compiler/optimizing/intrinsics_mips.cc36
-rw-r--r--compiler/optimizing/intrinsics_mips64.cc36
-rw-r--r--compiler/optimizing/locations.h22
-rw-r--r--compiler/optimizing/nodes.cc3
-rw-r--r--compiler/optimizing/nodes.h9
-rw-r--r--compiler/optimizing/register_allocation_resolver.cc25
-rw-r--r--compiler/optimizing/register_allocator_graph_color.cc41
-rw-r--r--compiler/optimizing/register_allocator_linear_scan.cc41
-rw-r--r--compiler/optimizing/sharpening.cc4
-rw-r--r--compiler/optimizing/ssa_liveness_analysis.cc15
-rw-r--r--compiler/optimizing/ssa_liveness_analysis.h6
19 files changed, 350 insertions, 186 deletions
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index aefdb548ff..d156644484 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -714,7 +714,8 @@ void ImageWriter::ComputeLazyFieldsForImageClasses() {
class_linker->VisitClassesWithoutClassesLock(&visitor);
}
-static bool IsBootClassLoaderClass(mirror::Class* klass) REQUIRES_SHARED(Locks::mutator_lock_) {
+static bool IsBootClassLoaderClass(ObjPtr<mirror::Class> klass)
+ REQUIRES_SHARED(Locks::mutator_lock_) {
return klass->GetClassLoader() == nullptr;
}
@@ -722,33 +723,33 @@ bool ImageWriter::IsBootClassLoaderNonImageClass(mirror::Class* klass) {
return IsBootClassLoaderClass(klass) && !IsInBootImage(klass);
}
-bool ImageWriter::PruneAppImageClass(mirror::Class* klass) {
+bool ImageWriter::PruneAppImageClass(ObjPtr<mirror::Class> klass) {
bool early_exit = false;
std::unordered_set<mirror::Class*> visited;
return PruneAppImageClassInternal(klass, &early_exit, &visited);
}
bool ImageWriter::PruneAppImageClassInternal(
- mirror::Class* klass,
+ ObjPtr<mirror::Class> klass,
bool* early_exit,
std::unordered_set<mirror::Class*>* visited) {
DCHECK(early_exit != nullptr);
DCHECK(visited != nullptr);
DCHECK(compile_app_image_);
- if (klass == nullptr || IsInBootImage(klass)) {
+ if (klass == nullptr || IsInBootImage(klass.Ptr())) {
return false;
}
- auto found = prune_class_memo_.find(klass);
+ auto found = prune_class_memo_.find(klass.Ptr());
if (found != prune_class_memo_.end()) {
// Already computed, return the found value.
return found->second;
}
// Circular dependencies, return false but do not store the result in the memoization table.
- if (visited->find(klass) != visited->end()) {
+ if (visited->find(klass.Ptr()) != visited->end()) {
*early_exit = true;
return false;
}
- visited->emplace(klass);
+ visited->emplace(klass.Ptr());
bool result = IsBootClassLoaderClass(klass);
std::string temp;
// Prune if not an image class, this handles any broken sets of image classes such as having a
@@ -812,20 +813,20 @@ bool ImageWriter::PruneAppImageClassInternal(
dex_file_oat_index_map_.find(dex_cache->GetDexFile()) == dex_file_oat_index_map_.end();
}
// Erase the element we stored earlier since we are exiting the function.
- auto it = visited->find(klass);
+ auto it = visited->find(klass.Ptr());
DCHECK(it != visited->end());
visited->erase(it);
// Only store result if it is true or none of the calls early exited due to circular
// dependencies. If visited is empty then we are the root caller, in this case the cycle was in
// a child call and we can remember the result.
if (result == true || !my_early_exit || visited->empty()) {
- prune_class_memo_[klass] = result;
+ prune_class_memo_[klass.Ptr()] = result;
}
*early_exit |= my_early_exit;
return result;
}
-bool ImageWriter::KeepClass(Class* klass) {
+bool ImageWriter::KeepClass(ObjPtr<mirror::Class> klass) {
if (klass == nullptr) {
return false;
}
@@ -896,15 +897,27 @@ class ImageWriter::PruneClassLoaderClassesVisitor : public ClassLoaderVisitor {
Runtime::Current()->GetClassLinker()->ClassTableForClassLoader(class_loader);
class_table->Visit(classes_visitor);
removed_class_count_ += classes_visitor.Prune();
+
+ // Record app image class loader. The fake boot class loader should not get registered
+ // and we should end up with only one class loader for an app and none for boot image.
+ if (class_loader != nullptr && class_table != nullptr) {
+ DCHECK(class_loader_ == nullptr);
+ class_loader_ = class_loader;
+ }
}
size_t GetRemovedClassCount() const {
return removed_class_count_;
}
+ ObjPtr<mirror::ClassLoader> GetClassLoader() const REQUIRES_SHARED(Locks::mutator_lock_) {
+ return class_loader_;
+ }
+
private:
ImageWriter* const image_writer_;
size_t removed_class_count_;
+ ObjPtr<mirror::ClassLoader> class_loader_;
};
void ImageWriter::VisitClassLoaders(ClassLoaderVisitor* visitor) {
@@ -913,70 +926,150 @@ void ImageWriter::VisitClassLoaders(ClassLoaderVisitor* visitor) {
Runtime::Current()->GetClassLinker()->VisitClassLoaders(visitor);
}
+void ImageWriter::PruneAndPreloadDexCache(ObjPtr<mirror::DexCache> dex_cache,
+ ObjPtr<mirror::ClassLoader> class_loader) {
+ // To ensure deterministic contents of the hash-based arrays, each slot shall contain
+ // the candidate with the lowest index. As we're processing entries in increasing index
+ // order, this means trying to look up the entry for the current index if the slot is
+ // empty or if it contains a higher index.
+
+ Runtime* runtime = Runtime::Current();
+ ClassLinker* class_linker = runtime->GetClassLinker();
+ ArtMethod* resolution_method = runtime->GetResolutionMethod();
+ const DexFile& dex_file = *dex_cache->GetDexFile();
+ // Prune methods.
+ ArtMethod** resolved_methods = dex_cache->GetResolvedMethods();
+ for (size_t i = 0, num = dex_cache->NumResolvedMethods(); i != num; ++i) {
+ ArtMethod* method =
+ mirror::DexCache::GetElementPtrSize(resolved_methods, i, target_ptr_size_);
+ DCHECK(method != nullptr) << "Expected resolution method instead of null method";
+ mirror::Class* declaring_class = method->GetDeclaringClass();
+ // Copied methods may be held live by a class which was not an image class but have a
+ // declaring class which is an image class. Set it to the resolution method to be safe and
+ // prevent dangling pointers.
+ if (method->IsCopied() || !KeepClass(declaring_class)) {
+ mirror::DexCache::SetElementPtrSize(resolved_methods,
+ i,
+ resolution_method,
+ target_ptr_size_);
+ } else if (kIsDebugBuild) {
+ // Check that the class is still in the classes table.
+ ReaderMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_);
+ CHECK(class_linker->ClassInClassTable(declaring_class)) << "Class "
+ << Class::PrettyClass(declaring_class) << " not in class linker table";
+ }
+ }
+ // Prune fields and make the contents of the field array deterministic.
+ mirror::FieldDexCacheType* resolved_fields = dex_cache->GetResolvedFields();
+ dex::TypeIndex last_class_idx; // Initialized to invalid index.
+ ObjPtr<mirror::Class> last_class = nullptr;
+ for (size_t i = 0, end = dex_file.NumFieldIds(); i < end; ++i) {
+ uint32_t slot_idx = dex_cache->FieldSlotIndex(i);
+ auto pair = mirror::DexCache::GetNativePairPtrSize(resolved_fields, slot_idx, target_ptr_size_);
+ uint32_t stored_index = pair.index;
+ ArtField* field = pair.object;
+ if (field != nullptr && i > stored_index) {
+ continue; // Already checked.
+ }
+ // Check if the referenced class is in the image. Note that we want to check the referenced
+ // class rather than the declaring class to preserve the semantics, i.e. using a FieldId
+ // results in resolving the referenced class and that can for example throw OOME.
+ const DexFile::FieldId& field_id = dex_file.GetFieldId(i);
+ if (field_id.class_idx_ != last_class_idx) {
+ last_class_idx = field_id.class_idx_;
+ last_class = class_linker->LookupResolvedType(
+ dex_file, last_class_idx, dex_cache, class_loader);
+ if (last_class != nullptr && !KeepClass(last_class)) {
+ last_class = nullptr;
+ }
+ }
+ if (field == nullptr || i < stored_index) {
+ if (last_class != nullptr) {
+ const char* name = dex_file.StringDataByIdx(field_id.name_idx_);
+ const char* type = dex_file.StringByTypeIdx(field_id.type_idx_);
+ field = mirror::Class::FindField(Thread::Current(), last_class, name, type);
+ if (field != nullptr) {
+ // If the referenced class is in the image, the defining class must also be there.
+ DCHECK(KeepClass(field->GetDeclaringClass()));
+ dex_cache->SetResolvedField(i, field, target_ptr_size_);
+ }
+ }
+ } else {
+ DCHECK_EQ(i, stored_index);
+ if (last_class == nullptr) {
+ dex_cache->ClearResolvedField(stored_index, target_ptr_size_);
+ }
+ }
+ }
+ // Prune types and make the contents of the type array deterministic.
+ // This is done after fields and methods as their lookup can touch the types array.
+ for (size_t i = 0, end = dex_cache->GetDexFile()->NumTypeIds(); i < end; ++i) {
+ dex::TypeIndex type_idx(i);
+ uint32_t slot_idx = dex_cache->TypeSlotIndex(type_idx);
+ mirror::TypeDexCachePair pair =
+ dex_cache->GetResolvedTypes()[slot_idx].load(std::memory_order_relaxed);
+ uint32_t stored_index = pair.index;
+ ObjPtr<mirror::Class> klass = pair.object.Read();
+ if (klass == nullptr || i < stored_index) {
+ klass = class_linker->LookupResolvedType(dex_file, type_idx, dex_cache, class_loader);
+ if (klass != nullptr) {
+ DCHECK_EQ(dex_cache->GetResolvedType(type_idx), klass);
+ stored_index = i; // For correct clearing below if not keeping the `klass`.
+ }
+ } else if (i == stored_index && !KeepClass(klass)) {
+ dex_cache->ClearResolvedType(dex::TypeIndex(stored_index));
+ }
+ }
+ // Strings do not need pruning, but the contents of the string array must be deterministic.
+ for (size_t i = 0, end = dex_cache->GetDexFile()->NumStringIds(); i < end; ++i) {
+ dex::StringIndex string_idx(i);
+ uint32_t slot_idx = dex_cache->StringSlotIndex(string_idx);
+ mirror::StringDexCachePair pair =
+ dex_cache->GetStrings()[slot_idx].load(std::memory_order_relaxed);
+ uint32_t stored_index = pair.index;
+ ObjPtr<mirror::String> string = pair.object.Read();
+ if (string == nullptr || i < stored_index) {
+ string = class_linker->LookupString(dex_file, string_idx, dex_cache);
+ DCHECK(string == nullptr || dex_cache->GetResolvedString(string_idx) == string);
+ }
+ }
+}
+
void ImageWriter::PruneNonImageClasses() {
Runtime* runtime = Runtime::Current();
ClassLinker* class_linker = runtime->GetClassLinker();
Thread* self = Thread::Current();
+ ScopedAssertNoThreadSuspension sa(__FUNCTION__);
// Clear class table strong roots so that dex caches can get pruned. We require pruning the class
// path dex caches.
class_linker->ClearClassTableStrongRoots();
// Remove the undesired classes from the class roots.
+ ObjPtr<mirror::ClassLoader> class_loader;
{
PruneClassLoaderClassesVisitor class_loader_visitor(this);
VisitClassLoaders(&class_loader_visitor);
VLOG(compiler) << "Pruned " << class_loader_visitor.GetRemovedClassCount() << " classes";
+ class_loader = class_loader_visitor.GetClassLoader();
+ DCHECK_EQ(class_loader != nullptr, compile_app_image_);
}
// Clear references to removed classes from the DexCaches.
- ArtMethod* resolution_method = runtime->GetResolutionMethod();
-
- ScopedAssertNoThreadSuspension sa(__FUNCTION__);
- ReaderMutexLock mu(self, *Locks::classlinker_classes_lock_); // For ClassInClassTable
- ReaderMutexLock mu2(self, *Locks::dex_lock_);
- for (const ClassLinker::DexCacheData& data : class_linker->GetDexCachesData()) {
- if (self->IsJWeakCleared(data.weak_root)) {
- continue;
- }
- ObjPtr<mirror::DexCache> dex_cache = self->DecodeJObject(data.weak_root)->AsDexCache();
- for (size_t i = 0; i < dex_cache->NumResolvedTypes(); i++) {
- mirror::TypeDexCachePair pair =
- dex_cache->GetResolvedTypes()[i].load(std::memory_order_relaxed);
- mirror::Class* klass = pair.object.Read();
- if (klass != nullptr && !KeepClass(klass)) {
- dex_cache->ClearResolvedType(dex::TypeIndex(pair.index));
- }
- }
- ArtMethod** resolved_methods = dex_cache->GetResolvedMethods();
- for (size_t i = 0, num = dex_cache->NumResolvedMethods(); i != num; ++i) {
- ArtMethod* method =
- mirror::DexCache::GetElementPtrSize(resolved_methods, i, target_ptr_size_);
- DCHECK(method != nullptr) << "Expected resolution method instead of null method";
- mirror::Class* declaring_class = method->GetDeclaringClass();
- // Copied methods may be held live by a class which was not an image class but have a
- // declaring class which is an image class. Set it to the resolution method to be safe and
- // prevent dangling pointers.
- if (method->IsCopied() || !KeepClass(declaring_class)) {
- mirror::DexCache::SetElementPtrSize(resolved_methods,
- i,
- resolution_method,
- target_ptr_size_);
- } else {
- // Check that the class is still in the classes table.
- DCHECK(class_linker->ClassInClassTable(declaring_class)) << "Class "
- << Class::PrettyClass(declaring_class) << " not in class linker table";
- }
- }
- mirror::FieldDexCacheType* resolved_fields = dex_cache->GetResolvedFields();
- for (size_t i = 0; i < dex_cache->NumResolvedFields(); i++) {
- auto pair = mirror::DexCache::GetNativePairPtrSize(resolved_fields, i, target_ptr_size_);
- ArtField* field = pair.object;
- if (field != nullptr && !KeepClass(field->GetDeclaringClass().Ptr())) {
- dex_cache->ClearResolvedField(pair.index, target_ptr_size_);
+ std::vector<ObjPtr<mirror::DexCache>> dex_caches;
+ {
+ ReaderMutexLock mu2(self, *Locks::dex_lock_);
+ dex_caches.reserve(class_linker->GetDexCachesData().size());
+ for (const ClassLinker::DexCacheData& data : class_linker->GetDexCachesData()) {
+ if (self->IsJWeakCleared(data.weak_root)) {
+ continue;
}
+ dex_caches.push_back(self->DecodeJObject(data.weak_root)->AsDexCache());
}
}
+ for (ObjPtr<mirror::DexCache> dex_cache : dex_caches) {
+ PruneAndPreloadDexCache(dex_cache, class_loader);
+ }
// Drop the array class cache in the ClassLinker, as these are roots holding those classes live.
class_linker->DropFindArrayClassCache();
diff --git a/compiler/image_writer.h b/compiler/image_writer.h
index bdc7146632..16aff61dab 100644
--- a/compiler/image_writer.h
+++ b/compiler/image_writer.h
@@ -376,7 +376,7 @@ class ImageWriter FINAL {
}
// Returns true if the class was in the original requested image classes list.
- bool KeepClass(mirror::Class* klass) REQUIRES_SHARED(Locks::mutator_lock_);
+ bool KeepClass(ObjPtr<mirror::Class> klass) REQUIRES_SHARED(Locks::mutator_lock_);
// Debug aid that list of requested image classes.
void DumpImageClasses();
@@ -391,6 +391,12 @@ class ImageWriter FINAL {
// Remove unwanted classes from various roots.
void PruneNonImageClasses() REQUIRES_SHARED(Locks::mutator_lock_);
+ // Remove unwanted classes from the DexCache roots and preload deterministic DexCache contents.
+ void PruneAndPreloadDexCache(ObjPtr<mirror::DexCache> dex_cache,
+ ObjPtr<mirror::ClassLoader> class_loader)
+ REQUIRES_SHARED(Locks::mutator_lock_)
+ REQUIRES(!Locks::classlinker_classes_lock_);
+
// Verify unwanted classes removed.
void CheckNonImageClassesRemoved() REQUIRES_SHARED(Locks::mutator_lock_);
static void CheckNonImageClassesRemovedCallback(mirror::Object* obj, void* arg)
@@ -473,11 +479,11 @@ class ImageWriter FINAL {
// we also cannot have any classes which refer to these boot class loader non image classes.
// PruneAppImageClass also prunes if klass depends on a non-image class according to the compiler
// driver.
- bool PruneAppImageClass(mirror::Class* klass)
+ bool PruneAppImageClass(ObjPtr<mirror::Class> klass)
REQUIRES_SHARED(Locks::mutator_lock_);
// early_exit is true if we had a cyclic dependency anywhere down the chain.
- bool PruneAppImageClassInternal(mirror::Class* klass,
+ bool PruneAppImageClassInternal(ObjPtr<mirror::Class> klass,
bool* early_exit,
std::unordered_set<mirror::Class*>* visited)
REQUIRES_SHARED(Locks::mutator_lock_);
diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc
index 8e25aa3421..105db1d2d0 100644
--- a/compiler/oat_writer.cc
+++ b/compiler/oat_writer.cc
@@ -1440,12 +1440,10 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor {
mirror::String* GetTargetString(const LinkerPatch& patch) REQUIRES_SHARED(Locks::mutator_lock_) {
ScopedObjectAccessUnchecked soa(Thread::Current());
- StackHandleScope<1> hs(soa.Self());
ClassLinker* linker = Runtime::Current()->GetClassLinker();
- Handle<mirror::DexCache> dex_cache(hs.NewHandle(GetDexCache(patch.TargetStringDexFile())));
mirror::String* string = linker->LookupString(*patch.TargetStringDexFile(),
patch.TargetStringIndex(),
- dex_cache);
+ GetDexCache(patch.TargetStringDexFile()));
DCHECK(string != nullptr);
DCHECK(writer_->HasBootImage() ||
Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(string));
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 0b50619a66..4db4796985 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -183,10 +183,13 @@ class SuspendCheckSlowPathX86 : public SlowPathCode {
: SlowPathCode(instruction), successor_(successor) {}
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ LocationSummary* locations = instruction_->GetLocations();
CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
__ Bind(GetEntryLabel());
+ SaveLiveRegisters(codegen, locations); // only saves full width XMM for SIMD
x86_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this);
CheckEntrypointTypes<kQuickTestSuspend, void, void>();
+ RestoreLiveRegisters(codegen, locations); // only saves full width XMM for SIMD
if (successor_ == nullptr) {
__ jmp(GetReturnLabel());
} else {
@@ -963,12 +966,20 @@ size_t CodeGeneratorX86::RestoreCoreRegister(size_t stack_index, uint32_t reg_id
}
size_t CodeGeneratorX86::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
- __ movsd(Address(ESP, stack_index), XmmRegister(reg_id));
+ if (GetGraph()->HasSIMD()) {
+ __ movups(Address(ESP, stack_index), XmmRegister(reg_id));
+ } else {
+ __ movsd(Address(ESP, stack_index), XmmRegister(reg_id));
+ }
return GetFloatingPointSpillSlotSize();
}
size_t CodeGeneratorX86::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
- __ movsd(XmmRegister(reg_id), Address(ESP, stack_index));
+ if (GetGraph()->HasSIMD()) {
+ __ movups(XmmRegister(reg_id), Address(ESP, stack_index));
+ } else {
+ __ movsd(XmmRegister(reg_id), Address(ESP, stack_index));
+ }
return GetFloatingPointSpillSlotSize();
}
@@ -5699,7 +5710,11 @@ void InstructionCodeGeneratorX86::VisitParallelMove(HParallelMove* instruction)
void LocationsBuilderX86::VisitSuspendCheck(HSuspendCheck* instruction) {
LocationSummary* locations =
new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath);
- locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
+ // In suspend check slow path, usually there are no caller-save registers at all.
+ // If SIMD instructions are present, however, we force spilling all live SIMD
+ // registers in full width (since the runtime only saves/restores lower part).
+ locations->SetCustomSlowPathCallerSaves(
+ GetGraph()->HasSIMD() ? RegisterSet::AllFpu() : RegisterSet::Empty());
}
void InstructionCodeGeneratorX86::VisitSuspendCheck(HSuspendCheck* instruction) {
@@ -5802,9 +5817,11 @@ void ParallelMoveResolverX86::EmitMove(size_t index) {
__ movd(destination.AsRegisterPairHigh<Register>(), src_reg);
} else if (destination.IsStackSlot()) {
__ movss(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
- } else {
- DCHECK(destination.IsDoubleStackSlot());
+ } else if (destination.IsDoubleStackSlot()) {
__ movsd(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
+ } else {
+ DCHECK(destination.IsSIMDStackSlot());
+ __ movups(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
}
} else if (source.IsStackSlot()) {
if (destination.IsRegister()) {
@@ -5826,6 +5843,9 @@ void ParallelMoveResolverX86::EmitMove(size_t index) {
DCHECK(destination.IsDoubleStackSlot()) << destination;
MoveMemoryToMemory64(destination.GetStackIndex(), source.GetStackIndex());
}
+ } else if (source.IsSIMDStackSlot()) {
+ DCHECK(destination.IsFpuRegister());
+ __ movups(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex()));
} else if (source.IsConstant()) {
HConstant* constant = source.GetConstant();
if (constant->IsIntConstant() || constant->IsNullConstant()) {
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index 65ee383b54..ca3a9eadd2 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -348,8 +348,9 @@ class CodeGeneratorX86 : public CodeGenerator {
}
size_t GetFloatingPointSpillSlotSize() const OVERRIDE {
- // 8 bytes == 2 words for each spill.
- return 2 * kX86WordSize;
+ return GetGraph()->HasSIMD()
+ ? 4 * kX86WordSize // 16 bytes == 4 words for each spill
+ : 2 * kX86WordSize; // 8 bytes == 2 words for each spill
}
HGraphVisitor* GetLocationBuilder() OVERRIDE {
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 08f1adfcff..2ffc398287 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -140,10 +140,13 @@ class SuspendCheckSlowPathX86_64 : public SlowPathCode {
: SlowPathCode(instruction), successor_(successor) {}
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ LocationSummary* locations = instruction_->GetLocations();
CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
__ Bind(GetEntryLabel());
+ SaveLiveRegisters(codegen, locations); // only saves full width XMM for SIMD
x86_64_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this);
CheckEntrypointTypes<kQuickTestSuspend, void, void>();
+ RestoreLiveRegisters(codegen, locations); // only saves full width XMM for SIMD
if (successor_ == nullptr) {
__ jmp(GetReturnLabel());
} else {
@@ -1158,13 +1161,21 @@ size_t CodeGeneratorX86_64::RestoreCoreRegister(size_t stack_index, uint32_t reg
}
size_t CodeGeneratorX86_64::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
- __ movsd(Address(CpuRegister(RSP), stack_index), XmmRegister(reg_id));
- return kX86_64WordSize;
+ if (GetGraph()->HasSIMD()) {
+ __ movups(Address(CpuRegister(RSP), stack_index), XmmRegister(reg_id));
+ } else {
+ __ movsd(Address(CpuRegister(RSP), stack_index), XmmRegister(reg_id));
+ }
+ return GetFloatingPointSpillSlotSize();
}
size_t CodeGeneratorX86_64::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
- __ movsd(XmmRegister(reg_id), Address(CpuRegister(RSP), stack_index));
- return kX86_64WordSize;
+ if (GetGraph()->HasSIMD()) {
+ __ movups(XmmRegister(reg_id), Address(CpuRegister(RSP), stack_index));
+ } else {
+ __ movsd(XmmRegister(reg_id), Address(CpuRegister(RSP), stack_index));
+ }
+ return GetFloatingPointSpillSlotSize();
}
void CodeGeneratorX86_64::InvokeRuntime(QuickEntrypointEnum entrypoint,
@@ -5152,7 +5163,11 @@ void InstructionCodeGeneratorX86_64::VisitParallelMove(HParallelMove* instructio
void LocationsBuilderX86_64::VisitSuspendCheck(HSuspendCheck* instruction) {
LocationSummary* locations =
new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath);
- locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
+ // In suspend check slow path, usually there are no caller-save registers at all.
+ // If SIMD instructions are present, however, we force spilling all live SIMD
+ // registers in full width (since the runtime only saves/restores lower part).
+ locations->SetCustomSlowPathCallerSaves(
+ GetGraph()->HasSIMD() ? RegisterSet::AllFpu() : RegisterSet::Empty());
}
void InstructionCodeGeneratorX86_64::VisitSuspendCheck(HSuspendCheck* instruction) {
@@ -5241,6 +5256,10 @@ void ParallelMoveResolverX86_64::EmitMove(size_t index) {
__ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
__ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
}
+ } else if (source.IsSIMDStackSlot()) {
+ DCHECK(destination.IsFpuRegister());
+ __ movups(destination.AsFpuRegister<XmmRegister>(),
+ Address(CpuRegister(RSP), source.GetStackIndex()));
} else if (source.IsConstant()) {
HConstant* constant = source.GetConstant();
if (constant->IsIntConstant() || constant->IsNullConstant()) {
@@ -5291,10 +5310,13 @@ void ParallelMoveResolverX86_64::EmitMove(size_t index) {
} else if (destination.IsStackSlot()) {
__ movss(Address(CpuRegister(RSP), destination.GetStackIndex()),
source.AsFpuRegister<XmmRegister>());
- } else {
- DCHECK(destination.IsDoubleStackSlot()) << destination;
+ } else if (destination.IsDoubleStackSlot()) {
__ movsd(Address(CpuRegister(RSP), destination.GetStackIndex()),
source.AsFpuRegister<XmmRegister>());
+ } else {
+ DCHECK(destination.IsSIMDStackSlot());
+ __ movups(Address(CpuRegister(RSP), destination.GetStackIndex()),
+ source.AsFpuRegister<XmmRegister>());
}
}
}
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index 376c3ce381..c8336dabd9 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -326,7 +326,9 @@ class CodeGeneratorX86_64 : public CodeGenerator {
}
size_t GetFloatingPointSpillSlotSize() const OVERRIDE {
- return kX86_64WordSize;
+ return GetGraph()->HasSIMD()
+ ? 2 * kX86_64WordSize // 16 bytes == 2 x86_64 words for each spill
+ : 1 * kX86_64WordSize; // 8 bytes == 1 x86_64 words for each spill
}
HGraphVisitor* GetLocationBuilder() OVERRIDE {
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index 2bf5c53e17..0dfae11465 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -322,9 +322,11 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor {
codegen_.DumpCoreRegister(stream, location.high());
} else if (location.IsUnallocated()) {
stream << "unallocated";
- } else {
- DCHECK(location.IsDoubleStackSlot());
+ } else if (location.IsDoubleStackSlot()) {
stream << "2x" << location.GetStackIndex() << "(sp)";
+ } else {
+ DCHECK(location.IsSIMDStackSlot());
+ stream << "4x" << location.GetStackIndex() << "(sp)";
}
}
diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc
index ba006edfa2..bf85b1989e 100644
--- a/compiler/optimizing/intrinsics_mips.cc
+++ b/compiler/optimizing/intrinsics_mips.cc
@@ -2559,7 +2559,7 @@ void IntrinsicCodeGeneratorMIPS::VisitMathRoundFloat(HInvoke* invoke) {
// void java.lang.String.getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin)
void IntrinsicLocationsBuilderMIPS::VisitStringGetCharsNoCheck(HInvoke* invoke) {
LocationSummary* locations = new (arena_) LocationSummary(invoke,
- LocationSummary::kCallOnMainOnly,
+ LocationSummary::kNoCall,
kIntrinsified);
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::RequiresRegister());
@@ -2567,17 +2567,9 @@ void IntrinsicLocationsBuilderMIPS::VisitStringGetCharsNoCheck(HInvoke* invoke)
locations->SetInAt(3, Location::RequiresRegister());
locations->SetInAt(4, Location::RequiresRegister());
- // We will call memcpy() to do the actual work. Allocate the temporary
- // registers to use the correct input registers, and output register.
- // memcpy() uses the normal MIPS calling convention.
- InvokeRuntimeCallingConvention calling_convention;
-
- locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
- locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
- locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
-
- Location outLocation = calling_convention.GetReturnLocation(Primitive::kPrimInt);
- locations->AddTemp(Location::RegisterLocation(outLocation.AsRegister<Register>()));
+ locations->AddTemp(Location::RequiresRegister());
+ locations->AddTemp(Location::RequiresRegister());
+ locations->AddTemp(Location::RequiresRegister());
}
void IntrinsicCodeGeneratorMIPS::VisitStringGetCharsNoCheck(HInvoke* invoke) {
@@ -2596,16 +2588,11 @@ void IntrinsicCodeGeneratorMIPS::VisitStringGetCharsNoCheck(HInvoke* invoke) {
Register dstBegin = locations->InAt(4).AsRegister<Register>();
Register dstPtr = locations->GetTemp(0).AsRegister<Register>();
- DCHECK_EQ(dstPtr, A0);
Register srcPtr = locations->GetTemp(1).AsRegister<Register>();
- DCHECK_EQ(srcPtr, A1);
Register numChrs = locations->GetTemp(2).AsRegister<Register>();
- DCHECK_EQ(numChrs, A2);
-
- Register dstReturn = locations->GetTemp(3).AsRegister<Register>();
- DCHECK_EQ(dstReturn, V0);
MipsLabel done;
+ MipsLabel loop;
// Location of data in char array buffer.
const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value();
@@ -2634,7 +2621,7 @@ void IntrinsicCodeGeneratorMIPS::VisitStringGetCharsNoCheck(HInvoke* invoke) {
__ LoadFromOffset(kLoadWord, TMP, srcObj, count_offset);
__ Sll(TMP, TMP, 31);
- // If string is uncompressed, use memcpy() path.
+ // If string is uncompressed, use uncompressed path.
__ Bnez(TMP, &uncompressed_copy);
// Copy loop for compressed src, copying 1 character (8-bit) to (16-bit) at a time.
@@ -2660,10 +2647,13 @@ void IntrinsicCodeGeneratorMIPS::VisitStringGetCharsNoCheck(HInvoke* invoke) {
__ Addu(srcPtr, srcPtr, AT);
}
- // Calculate number of bytes to copy from number of characters.
- __ Sll(numChrs, numChrs, char_shift);
-
- codegen_->InvokeRuntime(kQuickMemcpy, invoke, invoke->GetDexPc(), nullptr);
+ __ Bind(&loop);
+ __ Lh(AT, srcPtr, 0);
+ __ Addiu(numChrs, numChrs, -1);
+ __ Addiu(srcPtr, srcPtr, char_size);
+ __ Sh(AT, dstPtr, 0);
+ __ Addiu(dstPtr, dstPtr, char_size);
+ __ Bnez(numChrs, &loop);
__ Bind(&done);
}
diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc
index 21c5074a1c..1ee89cf127 100644
--- a/compiler/optimizing/intrinsics_mips64.cc
+++ b/compiler/optimizing/intrinsics_mips64.cc
@@ -1895,7 +1895,7 @@ void IntrinsicCodeGeneratorMIPS64::VisitDoubleIsInfinite(HInvoke* invoke) {
// void java.lang.String.getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin)
void IntrinsicLocationsBuilderMIPS64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
LocationSummary* locations = new (arena_) LocationSummary(invoke,
- LocationSummary::kCallOnMainOnly,
+ LocationSummary::kNoCall,
kIntrinsified);
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::RequiresRegister());
@@ -1903,17 +1903,9 @@ void IntrinsicLocationsBuilderMIPS64::VisitStringGetCharsNoCheck(HInvoke* invoke
locations->SetInAt(3, Location::RequiresRegister());
locations->SetInAt(4, Location::RequiresRegister());
- // We will call memcpy() to do the actual work. Allocate the temporary
- // registers to use the correct input registers, and output register.
- // memcpy() uses the normal MIPS calling conventions.
- InvokeRuntimeCallingConvention calling_convention;
-
- locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
- locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
- locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
-
- Location outLocation = calling_convention.GetReturnLocation(Primitive::kPrimLong);
- locations->AddTemp(Location::RegisterLocation(outLocation.AsRegister<GpuRegister>()));
+ locations->AddTemp(Location::RequiresRegister());
+ locations->AddTemp(Location::RequiresRegister());
+ locations->AddTemp(Location::RequiresRegister());
}
void IntrinsicCodeGeneratorMIPS64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
@@ -1932,16 +1924,11 @@ void IntrinsicCodeGeneratorMIPS64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
GpuRegister dstBegin = locations->InAt(4).AsRegister<GpuRegister>();
GpuRegister dstPtr = locations->GetTemp(0).AsRegister<GpuRegister>();
- DCHECK_EQ(dstPtr, A0);
GpuRegister srcPtr = locations->GetTemp(1).AsRegister<GpuRegister>();
- DCHECK_EQ(srcPtr, A1);
GpuRegister numChrs = locations->GetTemp(2).AsRegister<GpuRegister>();
- DCHECK_EQ(numChrs, A2);
-
- GpuRegister dstReturn = locations->GetTemp(3).AsRegister<GpuRegister>();
- DCHECK_EQ(dstReturn, V0);
Mips64Label done;
+ Mips64Label loop;
// Location of data in char array buffer.
const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value();
@@ -1965,7 +1952,7 @@ void IntrinsicCodeGeneratorMIPS64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
__ LoadFromOffset(kLoadWord, TMP, srcObj, count_offset);
__ Dext(TMP, TMP, 0, 1);
- // If string is uncompressed, use memcpy() path.
+ // If string is uncompressed, use uncompressed path.
__ Bnezc(TMP, &uncompressed_copy);
// Copy loop for compressed src, copying 1 character (8-bit) to (16-bit) at a time.
@@ -1986,10 +1973,13 @@ void IntrinsicCodeGeneratorMIPS64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
__ Daddiu(srcPtr, srcObj, value_offset);
__ Dlsa(srcPtr, srcBegin, srcPtr, char_shift);
- // Calculate number of bytes to copy from number of characters.
- __ Dsll(numChrs, numChrs, char_shift);
-
- codegen_->InvokeRuntime(kQuickMemcpy, invoke, invoke->GetDexPc(), nullptr);
+ __ Bind(&loop);
+ __ Lh(AT, srcPtr, 0);
+ __ Daddiu(numChrs, numChrs, -1);
+ __ Daddiu(srcPtr, srcPtr, char_size);
+ __ Sh(AT, dstPtr, 0);
+ __ Daddiu(dstPtr, dstPtr, char_size);
+ __ Bnezc(numChrs, &loop);
__ Bind(&done);
}
diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h
index 091b58a63d..6f0dbce2df 100644
--- a/compiler/optimizing/locations.h
+++ b/compiler/optimizing/locations.h
@@ -69,11 +69,13 @@ class Location : public ValueObject {
// We do not use the value 9 because it conflicts with kLocationConstantMask.
kDoNotUse9 = 9,
+ kSIMDStackSlot = 10, // 128bit stack slot. TODO: generalize with encoded #bytes?
+
// Unallocated location represents a location that is not fixed and can be
// allocated by a register allocator. Each unallocated location has
// a policy that specifies what kind of location is suitable. Payload
// contains register allocation policy.
- kUnallocated = 10,
+ kUnallocated = 11,
};
Location() : ValueObject(), value_(kInvalid) {
@@ -82,6 +84,7 @@ class Location : public ValueObject {
static_assert((kUnallocated & kLocationConstantMask) != kConstant, "TagError");
static_assert((kStackSlot & kLocationConstantMask) != kConstant, "TagError");
static_assert((kDoubleStackSlot & kLocationConstantMask) != kConstant, "TagError");
+ static_assert((kSIMDStackSlot & kLocationConstantMask) != kConstant, "TagError");
static_assert((kRegister & kLocationConstantMask) != kConstant, "TagError");
static_assert((kFpuRegister & kLocationConstantMask) != kConstant, "TagError");
static_assert((kRegisterPair & kLocationConstantMask) != kConstant, "TagError");
@@ -266,8 +269,20 @@ class Location : public ValueObject {
return GetKind() == kDoubleStackSlot;
}
+ static Location SIMDStackSlot(intptr_t stack_index) {
+ uintptr_t payload = EncodeStackIndex(stack_index);
+ Location loc(kSIMDStackSlot, payload);
+ // Ensure that sign is preserved.
+ DCHECK_EQ(loc.GetStackIndex(), stack_index);
+ return loc;
+ }
+
+ bool IsSIMDStackSlot() const {
+ return GetKind() == kSIMDStackSlot;
+ }
+
intptr_t GetStackIndex() const {
- DCHECK(IsStackSlot() || IsDoubleStackSlot());
+ DCHECK(IsStackSlot() || IsDoubleStackSlot() || IsSIMDStackSlot());
// Decode stack index manually to preserve sign.
return GetPayload() - kStackIndexBias;
}
@@ -315,6 +330,7 @@ class Location : public ValueObject {
case kRegister: return "R";
case kStackSlot: return "S";
case kDoubleStackSlot: return "DS";
+ case kSIMDStackSlot: return "SIMD";
case kUnallocated: return "U";
case kConstant: return "C";
case kFpuRegister: return "F";
@@ -417,6 +433,7 @@ std::ostream& operator<<(std::ostream& os, const Location::Policy& rhs);
class RegisterSet : public ValueObject {
public:
static RegisterSet Empty() { return RegisterSet(); }
+ static RegisterSet AllFpu() { return RegisterSet(0, -1); }
void Add(Location loc) {
if (loc.IsRegister()) {
@@ -462,6 +479,7 @@ class RegisterSet : public ValueObject {
private:
RegisterSet() : core_registers_(0), floating_point_registers_(0) {}
+ RegisterSet(uint32_t core, uint32_t fp) : core_registers_(core), floating_point_registers_(fp) {}
uint32_t core_registers_;
uint32_t floating_point_registers_;
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index 020e4463d4..ec706e6694 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -2046,6 +2046,9 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) {
if (HasTryCatch()) {
outer_graph->SetHasTryCatch(true);
}
+ if (HasSIMD()) {
+ outer_graph->SetHasSIMD(true);
+ }
HInstruction* return_value = nullptr;
if (GetBlocks().size() == 3) {
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 542b218cf8..6881d8f6ae 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -323,6 +323,7 @@ class HGraph : public ArenaObject<kArenaAllocGraph> {
temporaries_vreg_slots_(0),
has_bounds_checks_(false),
has_try_catch_(false),
+ has_simd_(false),
has_loops_(false),
has_irreducible_loops_(false),
debuggable_(debuggable),
@@ -560,6 +561,9 @@ class HGraph : public ArenaObject<kArenaAllocGraph> {
bool HasTryCatch() const { return has_try_catch_; }
void SetHasTryCatch(bool value) { has_try_catch_ = value; }
+ bool HasSIMD() const { return has_simd_; }
+ void SetHasSIMD(bool value) { has_simd_ = value; }
+
bool HasLoops() const { return has_loops_; }
void SetHasLoops(bool value) { has_loops_ = value; }
@@ -652,6 +656,11 @@ class HGraph : public ArenaObject<kArenaAllocGraph> {
// false positives.
bool has_try_catch_;
+ // Flag whether SIMD instructions appear in the graph. If true, the
+ // code generators may have to be more careful spilling the wider
+ // contents of SIMD registers.
+ bool has_simd_;
+
// Flag whether there are any loops in the graph. We can skip loop
// optimization if it's false. It's only best effort to keep it up
// to date in the presence of code elimination so there might be false
diff --git a/compiler/optimizing/register_allocation_resolver.cc b/compiler/optimizing/register_allocation_resolver.cc
index 8a9c1ccaff..c6a0b6a0d2 100644
--- a/compiler/optimizing/register_allocation_resolver.cc
+++ b/compiler/optimizing/register_allocation_resolver.cc
@@ -299,11 +299,14 @@ void RegisterAllocationResolver::ConnectSiblings(LiveInterval* interval) {
// Currently, we spill unconditionnally the current method in the code generators.
&& !interval->GetDefinedBy()->IsCurrentMethod()) {
// We spill eagerly, so move must be at definition.
- InsertMoveAfter(interval->GetDefinedBy(),
- interval->ToLocation(),
- interval->NeedsTwoSpillSlots()
- ? Location::DoubleStackSlot(interval->GetParent()->GetSpillSlot())
- : Location::StackSlot(interval->GetParent()->GetSpillSlot()));
+ Location loc;
+ switch (interval->NumberOfSpillSlotsNeeded()) {
+ case 1: loc = Location::StackSlot(interval->GetParent()->GetSpillSlot()); break;
+ case 2: loc = Location::DoubleStackSlot(interval->GetParent()->GetSpillSlot()); break;
+ case 4: loc = Location::SIMDStackSlot(interval->GetParent()->GetSpillSlot()); break;
+ default: LOG(FATAL) << "Unexpected number of spill slots"; UNREACHABLE();
+ }
+ InsertMoveAfter(interval->GetDefinedBy(), interval->ToLocation(), loc);
}
UsePosition* use = current->GetFirstUse();
EnvUsePosition* env_use = current->GetFirstEnvironmentUse();
@@ -459,9 +462,12 @@ void RegisterAllocationResolver::ConnectSplitSiblings(LiveInterval* interval,
location_source = defined_by->GetLocations()->Out();
} else {
DCHECK(defined_by->IsCurrentMethod());
- location_source = parent->NeedsTwoSpillSlots()
- ? Location::DoubleStackSlot(parent->GetSpillSlot())
- : Location::StackSlot(parent->GetSpillSlot());
+ switch (parent->NumberOfSpillSlotsNeeded()) {
+ case 1: location_source = Location::StackSlot(parent->GetSpillSlot()); break;
+ case 2: location_source = Location::DoubleStackSlot(parent->GetSpillSlot()); break;
+ case 4: location_source = Location::SIMDStackSlot(parent->GetSpillSlot()); break;
+ default: LOG(FATAL) << "Unexpected number of spill slots"; UNREACHABLE();
+ }
}
} else {
DCHECK(source != nullptr);
@@ -492,7 +498,8 @@ static bool IsValidDestination(Location destination) {
|| destination.IsFpuRegister()
|| destination.IsFpuRegisterPair()
|| destination.IsStackSlot()
- || destination.IsDoubleStackSlot();
+ || destination.IsDoubleStackSlot()
+ || destination.IsSIMDStackSlot();
}
void RegisterAllocationResolver::AddMove(HParallelMove* move,
diff --git a/compiler/optimizing/register_allocator_graph_color.cc b/compiler/optimizing/register_allocator_graph_color.cc
index 9064f865c3..87f709f63d 100644
--- a/compiler/optimizing/register_allocator_graph_color.cc
+++ b/compiler/optimizing/register_allocator_graph_color.cc
@@ -1029,7 +1029,7 @@ void RegisterAllocatorGraphColor::AllocateSpillSlotForCatchPhi(HInstruction* ins
interval->SetSpillSlot(previous_phi->GetLiveInterval()->GetSpillSlot());
} else {
interval->SetSpillSlot(catch_phi_spill_slot_counter_);
- catch_phi_spill_slot_counter_ += interval->NeedsTwoSpillSlots() ? 2 : 1;
+ catch_phi_spill_slot_counter_ += interval->NumberOfSpillSlotsNeeded();
}
}
}
@@ -1996,43 +1996,48 @@ void RegisterAllocatorGraphColor::ColorSpillSlots(ArenaVector<LiveInterval*>* in
bool is_interval_beginning;
size_t position;
std::tie(position, is_interval_beginning, parent_interval) = *it;
-
- bool needs_two_slots = parent_interval->NeedsTwoSpillSlots();
+ size_t number_of_spill_slots_needed = parent_interval->NumberOfSpillSlotsNeeded();
if (is_interval_beginning) {
DCHECK(!parent_interval->HasSpillSlot());
DCHECK_EQ(position, parent_interval->GetStart());
- // Find a free stack slot.
+ // Find first available free stack slot(s).
size_t slot = 0;
- for (; taken.IsBitSet(slot) || (needs_two_slots && taken.IsBitSet(slot + 1)); ++slot) {
- // Skip taken slots.
+ for (; ; ++slot) {
+ bool found = true;
+ for (size_t s = slot, u = slot + number_of_spill_slots_needed; s < u; s++) {
+ if (taken.IsBitSet(s)) {
+ found = false;
+ break; // failure
+ }
+ }
+ if (found) {
+ break; // success
+ }
}
+
parent_interval->SetSpillSlot(slot);
- *num_stack_slots_used = std::max(*num_stack_slots_used,
- needs_two_slots ? slot + 1 : slot + 2);
- if (needs_two_slots && *num_stack_slots_used % 2 != 0) {
+ *num_stack_slots_used = std::max(*num_stack_slots_used, slot + number_of_spill_slots_needed);
+ if (number_of_spill_slots_needed > 1 && *num_stack_slots_used % 2 != 0) {
// The parallel move resolver requires that there be an even number of spill slots
// allocated for pair value types.
++(*num_stack_slots_used);
}
- taken.SetBit(slot);
- if (needs_two_slots) {
- taken.SetBit(slot + 1);
+ for (size_t s = slot, u = slot + number_of_spill_slots_needed; s < u; s++) {
+ taken.SetBit(s);
}
} else {
DCHECK_EQ(position, parent_interval->GetLastSibling()->GetEnd());
DCHECK(parent_interval->HasSpillSlot());
- // Free up the stack slot used by this interval.
+ // Free up the stack slot(s) used by this interval.
size_t slot = parent_interval->GetSpillSlot();
- DCHECK(taken.IsBitSet(slot));
- DCHECK(!needs_two_slots || taken.IsBitSet(slot + 1));
- taken.ClearBit(slot);
- if (needs_two_slots) {
- taken.ClearBit(slot + 1);
+ for (size_t s = slot, u = slot + number_of_spill_slots_needed; s < u; s++) {
+ DCHECK(taken.IsBitSet(s));
+ taken.ClearBit(s);
}
}
}
diff --git a/compiler/optimizing/register_allocator_linear_scan.cc b/compiler/optimizing/register_allocator_linear_scan.cc
index 6354e76ec8..ab8d540359 100644
--- a/compiler/optimizing/register_allocator_linear_scan.cc
+++ b/compiler/optimizing/register_allocator_linear_scan.cc
@@ -1125,36 +1125,31 @@ void RegisterAllocatorLinearScan::AllocateSpillSlotFor(LiveInterval* interval) {
LOG(FATAL) << "Unexpected type for interval " << interval->GetType();
}
- // Find an available spill slot.
+ // Find first available spill slots.
+ size_t number_of_spill_slots_needed = parent->NumberOfSpillSlotsNeeded();
size_t slot = 0;
for (size_t e = spill_slots->size(); slot < e; ++slot) {
- if ((*spill_slots)[slot] <= parent->GetStart()) {
- if (!parent->NeedsTwoSpillSlots()) {
- // One spill slot is sufficient.
- break;
- }
- if (slot == e - 1 || (*spill_slots)[slot + 1] <= parent->GetStart()) {
- // Two spill slots are available.
+ bool found = true;
+ for (size_t s = slot, u = std::min(slot + number_of_spill_slots_needed, e); s < u; s++) {
+ if ((*spill_slots)[s] > parent->GetStart()) {
+ found = false; // failure
break;
}
}
+ if (found) {
+ break; // success
+ }
}
+ // Need new spill slots?
+ size_t upper = slot + number_of_spill_slots_needed;
+ if (upper > spill_slots->size()) {
+ spill_slots->resize(upper);
+ }
+ // Set slots to end.
size_t end = interval->GetLastSibling()->GetEnd();
- if (parent->NeedsTwoSpillSlots()) {
- if (slot + 2u > spill_slots->size()) {
- // We need a new spill slot.
- spill_slots->resize(slot + 2u, end);
- }
- (*spill_slots)[slot] = end;
- (*spill_slots)[slot + 1] = end;
- } else {
- if (slot == spill_slots->size()) {
- // We need a new spill slot.
- spill_slots->push_back(end);
- } else {
- (*spill_slots)[slot] = end;
- }
+ for (size_t s = slot; s < upper; s++) {
+ (*spill_slots)[s] = end;
}
// Note that the exact spill slot location will be computed when we resolve,
@@ -1180,7 +1175,7 @@ void RegisterAllocatorLinearScan::AllocateSpillSlotForCatchPhi(HPhi* phi) {
// TODO: Reuse spill slots when intervals of phis from different catch
// blocks do not overlap.
interval->SetSpillSlot(catch_phi_spill_slots_);
- catch_phi_spill_slots_ += interval->NeedsTwoSpillSlots() ? 2 : 1;
+ catch_phi_spill_slots_ += interval->NumberOfSpillSlotsNeeded();
}
}
diff --git a/compiler/optimizing/sharpening.cc b/compiler/optimizing/sharpening.cc
index 7bd38c7a8c..eedaf6e67e 100644
--- a/compiler/optimizing/sharpening.cc
+++ b/compiler/optimizing/sharpening.cc
@@ -259,7 +259,7 @@ void HSharpening::ProcessLoadString(HLoadString* load_string) {
} else if (runtime->UseJitCompilation()) {
// TODO: Make sure we don't set the "compile PIC" flag for JIT as that's bogus.
// DCHECK(!codegen_->GetCompilerOptions().GetCompilePic());
- string = class_linker->LookupString(dex_file, string_index, dex_cache);
+ string = class_linker->LookupString(dex_file, string_index, dex_cache.Get());
if (string != nullptr) {
if (runtime->GetHeap()->ObjectIsInBootImageSpace(string)) {
desired_load_kind = HLoadString::LoadKind::kBootImageAddress;
@@ -271,7 +271,7 @@ void HSharpening::ProcessLoadString(HLoadString* load_string) {
}
} else {
// AOT app compilation. Try to lookup the string without allocating if not found.
- string = class_linker->LookupString(dex_file, string_index, dex_cache);
+ string = class_linker->LookupString(dex_file, string_index, dex_cache.Get());
if (string != nullptr &&
runtime->GetHeap()->ObjectIsInBootImageSpace(string) &&
!codegen_->GetCompilerOptions().GetCompilePic()) {
diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc
index e8e12e1a55..36ee5a903a 100644
--- a/compiler/optimizing/ssa_liveness_analysis.cc
+++ b/compiler/optimizing/ssa_liveness_analysis.cc
@@ -469,8 +469,10 @@ bool LiveInterval::SameRegisterKind(Location other) const {
}
}
-bool LiveInterval::NeedsTwoSpillSlots() const {
- return type_ == Primitive::kPrimLong || type_ == Primitive::kPrimDouble;
+size_t LiveInterval::NumberOfSpillSlotsNeeded() const {
+ // TODO: detect vector operation.
+ // Return number of needed spill slots based on type.
+ return (type_ == Primitive::kPrimLong || type_ == Primitive::kPrimDouble) ? 2 : 1;
}
Location LiveInterval::ToLocation() const {
@@ -494,10 +496,11 @@ Location LiveInterval::ToLocation() const {
if (defined_by->IsConstant()) {
return defined_by->GetLocations()->Out();
} else if (GetParent()->HasSpillSlot()) {
- if (NeedsTwoSpillSlots()) {
- return Location::DoubleStackSlot(GetParent()->GetSpillSlot());
- } else {
- return Location::StackSlot(GetParent()->GetSpillSlot());
+ switch (NumberOfSpillSlotsNeeded()) {
+ case 1: return Location::StackSlot(GetParent()->GetSpillSlot());
+ case 2: return Location::DoubleStackSlot(GetParent()->GetSpillSlot());
+ case 4: return Location::SIMDStackSlot(GetParent()->GetSpillSlot());
+ default: LOG(FATAL) << "Unexpected number of spill slots"; UNREACHABLE();
}
} else {
return Location();
diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h
index 340d0ccefe..e9dffc1fac 100644
--- a/compiler/optimizing/ssa_liveness_analysis.h
+++ b/compiler/optimizing/ssa_liveness_analysis.h
@@ -762,9 +762,9 @@ class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> {
// Returns kNoRegister otherwise.
int FindHintAtDefinition() const;
- // Returns whether the interval needs two (Dex virtual register size `kVRegSize`)
- // slots for spilling.
- bool NeedsTwoSpillSlots() const;
+ // Returns the number of required spilling slots (measured as a multiple of the
+ // Dex virtual register size `kVRegSize`).
+ size_t NumberOfSpillSlotsNeeded() const;
bool IsFloatingPoint() const {
return type_ == Primitive::kPrimFloat || type_ == Primitive::kPrimDouble;