summaryrefslogtreecommitdiff
path: root/compiler
diff options
context:
space:
mode:
Diffstat (limited to 'compiler')
-rw-r--r--compiler/image_test.cc91
-rw-r--r--compiler/oat_writer.cc98
-rw-r--r--compiler/optimizing/code_generator_arm.cc78
-rw-r--r--compiler/optimizing/code_generator_arm.h2
-rw-r--r--compiler/optimizing/code_generator_arm_vixl.cc72
-rw-r--r--compiler/optimizing/code_generator_x86.cc22
-rw-r--r--compiler/optimizing/code_generator_x86.h5
-rw-r--r--compiler/optimizing/code_generator_x86_64.cc26
-rw-r--r--compiler/optimizing/code_generator_x86_64.h4
-rw-r--r--compiler/optimizing/intrinsics_arm.cc53
-rw-r--r--compiler/optimizing/intrinsics_arm_vixl.cc87
-rw-r--r--compiler/optimizing/locations.h2
-rw-r--r--compiler/optimizing/nodes.cc3
-rw-r--r--compiler/optimizing/nodes.h9
-rw-r--r--compiler/optimizing/register_allocation_resolver.cc20
-rw-r--r--compiler/optimizing/register_allocator_graph_color.cc41
-rw-r--r--compiler/optimizing/register_allocator_linear_scan.cc41
-rw-r--r--compiler/optimizing/ssa_liveness_analysis.cc12
-rw-r--r--compiler/optimizing/ssa_liveness_analysis.h6
19 files changed, 482 insertions, 190 deletions
diff --git a/compiler/image_test.cc b/compiler/image_test.cc
index 89e8a678b1..7ee494a131 100644
--- a/compiler/image_test.cc
+++ b/compiler/image_test.cc
@@ -76,7 +76,7 @@ class ImageTest : public CommonCompilerTest {
void Compile(ImageHeader::StorageMode storage_mode,
CompilationHelper& out_helper,
const std::string& extra_dex = "",
- const std::string& image_class = "");
+ const std::initializer_list<std::string>& image_classes = {});
void SetUpRuntimeOptions(RuntimeOptions* options) OVERRIDE {
CommonCompilerTest::SetUpRuntimeOptions(options);
@@ -90,6 +90,18 @@ class ImageTest : public CommonCompilerTest {
return new std::unordered_set<std::string>(image_classes_);
}
+ ArtMethod* FindCopiedMethod(ArtMethod* origin, mirror::Class* klass)
+ REQUIRES_SHARED(Locks::mutator_lock_) {
+ PointerSize pointer_size = class_linker_->GetImagePointerSize();
+ for (ArtMethod& m : klass->GetCopiedMethods(pointer_size)) {
+ if (strcmp(origin->GetName(), m.GetName()) == 0 &&
+ origin->GetSignature() == m.GetSignature()) {
+ return &m;
+ }
+ }
+ return nullptr;
+ }
+
private:
std::unordered_set<std::string> image_classes_;
};
@@ -345,8 +357,8 @@ void CompilationHelper::Compile(CompilerDriver* driver,
void ImageTest::Compile(ImageHeader::StorageMode storage_mode,
CompilationHelper& helper,
const std::string& extra_dex,
- const std::string& image_class) {
- if (!image_class.empty()) {
+ const std::initializer_list<std::string>& image_classes) {
+ for (const std::string& image_class : image_classes) {
image_classes_.insert(image_class);
}
CreateCompilerDriver(Compiler::kOptimizing, kRuntimeISA, kIsTargetBuild ? 2U : 16U);
@@ -358,13 +370,15 @@ void ImageTest::Compile(ImageHeader::StorageMode storage_mode,
helper.extra_dex_files = OpenTestDexFiles(extra_dex.c_str());
}
helper.Compile(compiler_driver_.get(), storage_mode);
- if (!image_class.empty()) {
+ if (image_classes.begin() != image_classes.end()) {
// Make sure the class got initialized.
ScopedObjectAccess soa(Thread::Current());
ClassLinker* const class_linker = Runtime::Current()->GetClassLinker();
- mirror::Class* klass = class_linker->FindSystemClass(Thread::Current(), image_class.c_str());
- EXPECT_TRUE(klass != nullptr);
- EXPECT_TRUE(klass->IsInitialized());
+ for (const std::string& image_class : image_classes) {
+ mirror::Class* klass = class_linker->FindSystemClass(Thread::Current(), image_class.c_str());
+ EXPECT_TRUE(klass != nullptr);
+ EXPECT_TRUE(klass->IsInitialized());
+ }
}
}
@@ -492,7 +506,7 @@ TEST_F(ImageTest, TestImageLayout) {
// Compile multi-image with ImageLayoutA being the last image.
{
CompilationHelper helper;
- Compile(ImageHeader::kStorageModeUncompressed, helper, "ImageLayoutA", "LMyClass;");
+ Compile(ImageHeader::kStorageModeUncompressed, helper, "ImageLayoutA", {"LMyClass;"});
image_sizes = helper.GetImageObjectSectionSizes();
}
TearDown();
@@ -501,7 +515,7 @@ TEST_F(ImageTest, TestImageLayout) {
// Compile multi-image with ImageLayoutB being the last image.
{
CompilationHelper helper;
- Compile(ImageHeader::kStorageModeUncompressed, helper, "ImageLayoutB", "LMyClass;");
+ Compile(ImageHeader::kStorageModeUncompressed, helper, "ImageLayoutB", {"LMyClass;"});
image_sizes_extra = helper.GetImageObjectSectionSizes();
}
// Make sure that the new stuff in the clinit in ImageLayoutB is in the last image and not in the
@@ -553,4 +567,63 @@ TEST_F(ImageTest, ImageHeaderIsValid) {
ASSERT_FALSE(image_header.IsValid());
}
+// Test that pointer to quick code is the same in
+// a default method of an interface and in a copied method
+// of a class which implements the interface. This should be true
+// only if the copied method and the origin method are located in the
+// same oat file.
+TEST_F(ImageTest, TestDefaultMethods) {
+ CompilationHelper helper;
+ Compile(ImageHeader::kStorageModeUncompressed,
+ helper,
+ "DefaultMethods",
+ {"LIface;", "LImpl;", "LIterableBase;"});
+
+ PointerSize pointer_size = class_linker_->GetImagePointerSize();
+ Thread* self = Thread::Current();
+ ScopedObjectAccess soa(self);
+
+ // Test the pointer to quick code is the same in origin method
+ // and in the copied method form the same oat file.
+ mirror::Class* iface_klass = class_linker_->LookupClass(
+ self, "LIface;", ObjPtr<mirror::ClassLoader>());
+ ASSERT_NE(nullptr, iface_klass);
+ ArtMethod* origin = iface_klass->FindDeclaredVirtualMethod(
+ "defaultMethod", "()V", pointer_size);
+ ASSERT_NE(nullptr, origin);
+ const void* code = origin->GetEntryPointFromQuickCompiledCodePtrSize(pointer_size);
+ // The origin method should have a pointer to quick code
+ ASSERT_NE(nullptr, code);
+ ASSERT_FALSE(class_linker_->IsQuickToInterpreterBridge(code));
+ mirror::Class* impl_klass = class_linker_->LookupClass(
+ self, "LImpl;", ObjPtr<mirror::ClassLoader>());
+ ASSERT_NE(nullptr, impl_klass);
+ ArtMethod* copied = FindCopiedMethod(origin, impl_klass);
+ ASSERT_NE(nullptr, copied);
+ // the copied method should have pointer to the same quick code as the origin method
+ ASSERT_EQ(code, copied->GetEntryPointFromQuickCompiledCodePtrSize(pointer_size));
+
+ // Test the origin method has pointer to quick code
+ // but the copied method has pointer to interpreter
+ // because these methods are in different oat files.
+ mirror::Class* iterable_klass = class_linker_->LookupClass(
+ self, "Ljava/lang/Iterable;", ObjPtr<mirror::ClassLoader>());
+ ASSERT_NE(nullptr, iterable_klass);
+ origin = iterable_klass->FindDeclaredVirtualMethod(
+ "forEach", "(Ljava/util/function/Consumer;)V", pointer_size);
+ ASSERT_NE(nullptr, origin);
+ code = origin->GetEntryPointFromQuickCompiledCodePtrSize(pointer_size);
+ // the origin method should have a pointer to quick code
+ ASSERT_NE(nullptr, code);
+ ASSERT_FALSE(class_linker_->IsQuickToInterpreterBridge(code));
+ mirror::Class* iterablebase_klass = class_linker_->LookupClass(
+ self, "LIterableBase;", ObjPtr<mirror::ClassLoader>());
+ ASSERT_NE(nullptr, iterablebase_klass);
+ copied = FindCopiedMethod(origin, iterablebase_klass);
+ ASSERT_NE(nullptr, copied);
+ code = copied->GetEntryPointFromQuickCompiledCodePtrSize(pointer_size);
+ // the copied method should have a pointer to interpreter
+ ASSERT_TRUE(class_linker_->IsQuickToInterpreterBridge(code));
+}
+
} // namespace art
diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc
index 5406ae72d1..8e25aa3421 100644
--- a/compiler/oat_writer.cc
+++ b/compiler/oat_writer.cc
@@ -1034,18 +1034,63 @@ class OatWriter::InitMethodInfoVisitor : public OatDexMethodVisitor {
class OatWriter::InitImageMethodVisitor : public OatDexMethodVisitor {
public:
- InitImageMethodVisitor(OatWriter* writer, size_t offset)
+ InitImageMethodVisitor(OatWriter* writer,
+ size_t offset,
+ const std::vector<const DexFile*>* dex_files)
: OatDexMethodVisitor(writer, offset),
- pointer_size_(GetInstructionSetPointerSize(writer_->compiler_driver_->GetInstructionSet())) {
+ pointer_size_(GetInstructionSetPointerSize(writer_->compiler_driver_->GetInstructionSet())),
+ dex_files_(dex_files),
+ class_linker_(Runtime::Current()->GetClassLinker()) {
+ }
+
+ // Handle copied methods here. Copy pointer to quick code from
+ // an origin method to a copied method only if they are
+ // in the same oat file. If the origin and the copied methods are
+ // in different oat files don't touch the copied method.
+ // References to other oat files are not supported yet.
+ bool StartClass(const DexFile* dex_file, size_t class_def_index)
+ REQUIRES_SHARED(Locks::mutator_lock_) {
+ OatDexMethodVisitor::StartClass(dex_file, class_def_index);
+ // Skip classes that are not in the image.
+ if (!IsImageClass()) {
+ return true;
+ }
+ ScopedObjectAccessUnchecked soa(Thread::Current());
+ StackHandleScope<1> hs(soa.Self());
+ Handle<mirror::DexCache> dex_cache = hs.NewHandle(
+ class_linker_->FindDexCache(Thread::Current(), *dex_file));
+ const DexFile::ClassDef& class_def = dex_file->GetClassDef(class_def_index);
+ mirror::Class* klass = dex_cache->GetResolvedType(class_def.class_idx_);
+ if (klass != nullptr) {
+ for (ArtMethod& method : klass->GetCopiedMethods(pointer_size_)) {
+ // Find origin method. Declaring class and dex_method_idx
+ // in the copied method should be the same as in the origin
+ // method.
+ mirror::Class* declaring_class = method.GetDeclaringClass();
+ ArtMethod* origin = declaring_class->FindDeclaredVirtualMethod(
+ declaring_class->GetDexCache(),
+ method.GetDexMethodIndex(),
+ pointer_size_);
+ CHECK(origin != nullptr);
+ if (IsInOatFile(&declaring_class->GetDexFile())) {
+ const void* code_ptr =
+ origin->GetEntryPointFromQuickCompiledCodePtrSize(pointer_size_);
+ if (code_ptr == nullptr) {
+ methods_to_process_.push_back(std::make_pair(&method, origin));
+ } else {
+ method.SetEntryPointFromQuickCompiledCodePtrSize(
+ code_ptr, pointer_size_);
+ }
+ }
+ }
+ }
+ return true;
}
bool VisitMethod(size_t class_def_method_index, const ClassDataItemIterator& it)
REQUIRES_SHARED(Locks::mutator_lock_) {
- const DexFile::TypeId& type_id =
- dex_file_->GetTypeId(dex_file_->GetClassDef(class_def_index_).class_idx_);
- const char* class_descriptor = dex_file_->GetTypeDescriptor(type_id);
// Skip methods that are not in the image.
- if (!writer_->GetCompilerDriver()->IsImageClass(class_descriptor)) {
+ if (!IsImageClass()) {
return true;
}
@@ -1059,17 +1104,16 @@ class OatWriter::InitImageMethodVisitor : public OatDexMethodVisitor {
++method_offsets_index_;
}
- ClassLinker* linker = Runtime::Current()->GetClassLinker();
// Unchecked as we hold mutator_lock_ on entry.
ScopedObjectAccessUnchecked soa(Thread::Current());
StackHandleScope<1> hs(soa.Self());
- Handle<mirror::DexCache> dex_cache(hs.NewHandle(linker->FindDexCache(
+ Handle<mirror::DexCache> dex_cache(hs.NewHandle(class_linker_->FindDexCache(
Thread::Current(), *dex_file_)));
ArtMethod* method;
if (writer_->HasBootImage()) {
const InvokeType invoke_type = it.GetMethodInvokeType(
dex_file_->GetClassDef(class_def_index_));
- method = linker->ResolveMethod<ClassLinker::kNoICCECheckForCache>(
+ method = class_linker_->ResolveMethod<ClassLinker::kNoICCECheckForCache>(
*dex_file_,
it.GetMemberIndex(),
dex_cache,
@@ -1089,7 +1133,8 @@ class OatWriter::InitImageMethodVisitor : public OatDexMethodVisitor {
// Should already have been resolved by the compiler, just peek into the dex cache.
// It may not be resolved if the class failed to verify, in this case, don't set the
// entrypoint. This is not fatal since the dex cache will contain a resolution method.
- method = dex_cache->GetResolvedMethod(it.GetMemberIndex(), linker->GetImagePointerSize());
+ method = dex_cache->GetResolvedMethod(it.GetMemberIndex(),
+ class_linker_->GetImagePointerSize());
}
if (method != nullptr &&
compiled_method != nullptr &&
@@ -1101,8 +1146,38 @@ class OatWriter::InitImageMethodVisitor : public OatDexMethodVisitor {
return true;
}
+ // Check whether current class is image class
+ bool IsImageClass() {
+ const DexFile::TypeId& type_id =
+ dex_file_->GetTypeId(dex_file_->GetClassDef(class_def_index_).class_idx_);
+ const char* class_descriptor = dex_file_->GetTypeDescriptor(type_id);
+ return writer_->GetCompilerDriver()->IsImageClass(class_descriptor);
+ }
+
+ // Check whether specified dex file is in the compiled oat file.
+ bool IsInOatFile(const DexFile* dex_file) {
+ return ContainsElement(*dex_files_, dex_file);
+ }
+
+ // Assign a pointer to quick code for copied methods
+ // not handled in the method StartClass
+ void Postprocess() {
+ for (std::pair<ArtMethod*, ArtMethod*>& p : methods_to_process_) {
+ ArtMethod* method = p.first;
+ ArtMethod* origin = p.second;
+ const void* code_ptr =
+ origin->GetEntryPointFromQuickCompiledCodePtrSize(pointer_size_);
+ if (code_ptr != nullptr) {
+ method->SetEntryPointFromQuickCompiledCodePtrSize(code_ptr, pointer_size_);
+ }
+ }
+ }
+
protected:
const PointerSize pointer_size_;
+ const std::vector<const DexFile*>* dex_files_;
+ ClassLinker* const class_linker_;
+ std::vector<std::pair<ArtMethod*, ArtMethod*>> methods_to_process_;
};
class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor {
@@ -1744,8 +1819,9 @@ size_t OatWriter::InitOatCodeDexFiles(size_t offset) {
offset = code_visitor.GetOffset();
if (HasImage()) {
- InitImageMethodVisitor image_visitor(this, offset);
+ InitImageMethodVisitor image_visitor(this, offset, dex_files_);
success = VisitDexMethods(&image_visitor);
+ image_visitor.Postprocess();
DCHECK(success);
offset = image_visitor.GetOffset();
}
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index e34f116b75..caea250ab6 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -1875,6 +1875,7 @@ static bool CanGenerateConditionalMove(const Location& out, const Location& src)
Label* CodeGeneratorARM::GetFinalLabel(HInstruction* instruction, Label* final_label) {
DCHECK(!instruction->IsControlFlow() && !instruction->IsSuspendCheck());
+ DCHECK(!instruction->IsInvoke() || !instruction->GetLocations()->CanCall());
const HBasicBlock* const block = instruction->GetBlock();
const HLoopInformation* const info = block->GetLoopInformation();
@@ -2901,16 +2902,20 @@ void InstructionCodeGeneratorARM::HandleCondition(HCondition* cond) {
// Convert the jumps into the result.
Label done_label;
+ Label* final_label = codegen_->GetFinalLabel(cond, &done_label);
// False case: result = 0.
__ Bind(&false_label);
__ LoadImmediate(out, 0);
- __ b(&done_label);
+ __ b(final_label);
// True case: result = 1.
__ Bind(&true_label);
__ LoadImmediate(out, 1);
- __ Bind(&done_label);
+
+ if (done_label.IsLinked()) {
+ __ Bind(&done_label);
+ }
}
void LocationsBuilderARM::VisitEqual(HEqual* comp) {
@@ -4441,7 +4446,8 @@ void InstructionCodeGeneratorARM::HandleIntegerRotate(LocationSummary* locations
// rotates by swapping input regs (effectively rotating by the first 32-bits of
// a larger rotation) or flipping direction (thus treating larger right/left
// rotations as sub-word sized rotations in the other direction) as appropriate.
-void InstructionCodeGeneratorARM::HandleLongRotate(LocationSummary* locations) {
+void InstructionCodeGeneratorARM::HandleLongRotate(HRor* ror) {
+ LocationSummary* locations = ror->GetLocations();
Register in_reg_lo = locations->InAt(0).AsRegisterPairLow<Register>();
Register in_reg_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
Location rhs = locations->InAt(1);
@@ -4474,6 +4480,7 @@ void InstructionCodeGeneratorARM::HandleLongRotate(LocationSummary* locations) {
Register shift_left = locations->GetTemp(1).AsRegister<Register>();
Label end;
Label shift_by_32_plus_shift_right;
+ Label* final_label = codegen_->GetFinalLabel(ror, &end);
__ and_(shift_right, rhs.AsRegister<Register>(), ShifterOperand(0x1F));
__ Lsrs(shift_left, rhs.AsRegister<Register>(), 6);
@@ -4488,7 +4495,7 @@ void InstructionCodeGeneratorARM::HandleLongRotate(LocationSummary* locations) {
__ Lsl(out_reg_lo, in_reg_lo, shift_left);
__ Lsr(shift_left, in_reg_hi, shift_right);
__ add(out_reg_lo, out_reg_lo, ShifterOperand(shift_left));
- __ b(&end);
+ __ b(final_label);
__ Bind(&shift_by_32_plus_shift_right); // Shift by 32+shift_right.
// out_reg_hi = (reg_hi >> shift_right) | (reg_lo << shift_left).
@@ -4500,7 +4507,9 @@ void InstructionCodeGeneratorARM::HandleLongRotate(LocationSummary* locations) {
__ Lsl(shift_right, in_reg_hi, shift_left);
__ add(out_reg_lo, out_reg_lo, ShifterOperand(shift_right));
- __ Bind(&end);
+ if (end.IsLinked()) {
+ __ Bind(&end);
+ }
}
}
@@ -4540,7 +4549,7 @@ void InstructionCodeGeneratorARM::VisitRor(HRor* ror) {
break;
}
case Primitive::kPrimLong: {
- HandleLongRotate(locations);
+ HandleLongRotate(ror);
break;
}
default:
@@ -4919,6 +4928,7 @@ void InstructionCodeGeneratorARM::VisitCompare(HCompare* compare) {
Location right = locations->InAt(1);
Label less, greater, done;
+ Label* final_label = codegen_->GetFinalLabel(compare, &done);
Primitive::Type type = compare->InputAt(0)->GetType();
Condition less_cond;
switch (type) {
@@ -4958,17 +4968,19 @@ void InstructionCodeGeneratorARM::VisitCompare(HCompare* compare) {
UNREACHABLE();
}
- __ b(&done, EQ);
+ __ b(final_label, EQ);
__ b(&less, less_cond);
__ Bind(&greater);
__ LoadImmediate(out, 1);
- __ b(&done);
+ __ b(final_label);
__ Bind(&less);
__ LoadImmediate(out, -1);
- __ Bind(&done);
+ if (done.IsLinked()) {
+ __ Bind(&done);
+ }
}
void LocationsBuilderARM::VisitPhi(HPhi* instruction) {
@@ -5746,6 +5758,7 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) {
int32_t const_index = index.GetConstant()->AsIntConstant()->GetValue();
if (maybe_compressed_char_at) {
Label uncompressed_load, done;
+ Label* final_label = codegen_->GetFinalLabel(instruction, &done);
__ Lsrs(length, length, 1u); // LSRS has a 16-bit encoding, TST (immediate) does not.
static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
"Expecting 0=compressed, 1=uncompressed");
@@ -5754,13 +5767,15 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) {
out_loc.AsRegister<Register>(),
obj,
data_offset + const_index);
- __ b(&done);
+ __ b(final_label);
__ Bind(&uncompressed_load);
__ LoadFromOffset(GetLoadOperandType(Primitive::kPrimChar),
out_loc.AsRegister<Register>(),
obj,
data_offset + (const_index << 1));
- __ Bind(&done);
+ if (done.IsLinked()) {
+ __ Bind(&done);
+ }
} else {
uint32_t full_offset = data_offset + (const_index << Primitive::ComponentSizeShift(type));
@@ -5784,17 +5799,20 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) {
}
if (maybe_compressed_char_at) {
Label uncompressed_load, done;
+ Label* final_label = codegen_->GetFinalLabel(instruction, &done);
__ Lsrs(length, length, 1u); // LSRS has a 16-bit encoding, TST (immediate) does not.
static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
"Expecting 0=compressed, 1=uncompressed");
__ b(&uncompressed_load, CS);
__ ldrb(out_loc.AsRegister<Register>(),
Address(temp, index.AsRegister<Register>(), Shift::LSL, 0));
- __ b(&done);
+ __ b(final_label);
__ Bind(&uncompressed_load);
__ ldrh(out_loc.AsRegister<Register>(),
Address(temp, index.AsRegister<Register>(), Shift::LSL, 1));
- __ Bind(&done);
+ if (done.IsLinked()) {
+ __ Bind(&done);
+ }
} else {
codegen_->LoadFromShiftedRegOffset(type, out_loc, temp, index.AsRegister<Register>());
}
@@ -6019,6 +6037,7 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) {
uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
Label done;
+ Label* final_label = codegen_->GetFinalLabel(instruction, &done);
SlowPathCodeARM* slow_path = nullptr;
if (may_need_runtime_call_for_type_check) {
@@ -6040,7 +6059,7 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) {
index.AsRegister<Register>());
}
codegen_->MaybeRecordImplicitNullCheck(instruction);
- __ b(&done);
+ __ b(final_label);
__ Bind(&non_zero);
}
@@ -7021,6 +7040,7 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) {
uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
Label done, zero;
+ Label* final_label = codegen_->GetFinalLabel(instruction, &done);
SlowPathCodeARM* slow_path = nullptr;
// Return 0 if `obj` is null.
@@ -7042,7 +7062,7 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) {
// Classes must be equal for the instanceof to succeed.
__ b(&zero, NE);
__ LoadImmediate(out, 1);
- __ b(&done);
+ __ b(final_label);
break;
}
@@ -7065,12 +7085,12 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) {
maybe_temp_loc,
kCompilerReadBarrierOption);
// If `out` is null, we use it for the result, and jump to `done`.
- __ CompareAndBranchIfZero(out, &done);
+ __ CompareAndBranchIfZero(out, final_label);
__ cmp(out, ShifterOperand(cls));
__ b(&loop, NE);
__ LoadImmediate(out, 1);
if (zero.IsLinked()) {
- __ b(&done);
+ __ b(final_label);
}
break;
}
@@ -7096,11 +7116,11 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) {
kCompilerReadBarrierOption);
__ CompareAndBranchIfNonZero(out, &loop);
// If `out` is null, we use it for the result, and jump to `done`.
- __ b(&done);
+ __ b(final_label);
__ Bind(&success);
__ LoadImmediate(out, 1);
if (zero.IsLinked()) {
- __ b(&done);
+ __ b(final_label);
}
break;
}
@@ -7125,13 +7145,13 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) {
maybe_temp_loc,
kCompilerReadBarrierOption);
// If `out` is null, we use it for the result, and jump to `done`.
- __ CompareAndBranchIfZero(out, &done);
+ __ CompareAndBranchIfZero(out, final_label);
__ LoadFromOffset(kLoadUnsignedHalfword, out, out, primitive_offset);
static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
__ CompareAndBranchIfNonZero(out, &zero);
__ Bind(&exact_check);
__ LoadImmediate(out, 1);
- __ b(&done);
+ __ b(final_label);
break;
}
@@ -7152,7 +7172,7 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) {
__ b(slow_path->GetEntryLabel(), NE);
__ LoadImmediate(out, 1);
if (zero.IsLinked()) {
- __ b(&done);
+ __ b(final_label);
}
break;
}
@@ -7183,7 +7203,7 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) {
codegen_->AddSlowPath(slow_path);
__ b(slow_path->GetEntryLabel());
if (zero.IsLinked()) {
- __ b(&done);
+ __ b(final_label);
}
break;
}
@@ -7269,9 +7289,10 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) {
codegen_->AddSlowPath(type_check_slow_path);
Label done;
+ Label* final_label = codegen_->GetFinalLabel(instruction, &done);
// Avoid null check if we know obj is not null.
if (instruction->MustDoNullCheck()) {
- __ CompareAndBranchIfZero(obj, &done);
+ __ CompareAndBranchIfZero(obj, final_label);
}
switch (type_check_kind) {
@@ -7335,7 +7356,7 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) {
Label loop;
__ Bind(&loop);
__ cmp(temp, ShifterOperand(cls));
- __ b(&done, EQ);
+ __ b(final_label, EQ);
// /* HeapReference<Class> */ temp = temp->super_class_
GenerateReferenceLoadOneRegister(instruction,
@@ -7363,7 +7384,7 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) {
// Do an exact check.
__ cmp(temp, ShifterOperand(cls));
- __ b(&done, EQ);
+ __ b(final_label, EQ);
// Otherwise, we need to check that the object's class is a non-primitive array.
// /* HeapReference<Class> */ temp = temp->component_type_
@@ -7433,7 +7454,10 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) {
break;
}
}
- __ Bind(&done);
+
+ if (done.IsLinked()) {
+ __ Bind(&done);
+ }
__ Bind(type_check_slow_path->GetExitLabel());
}
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index 5b15902ccd..59a7f7c048 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -237,7 +237,7 @@ class InstructionCodeGeneratorARM : public InstructionCodeGenerator {
void HandleBitwiseOperation(HBinaryOperation* operation);
void HandleCondition(HCondition* condition);
void HandleIntegerRotate(LocationSummary* locations);
- void HandleLongRotate(LocationSummary* locations);
+ void HandleLongRotate(HRor* ror);
void HandleShift(HBinaryOperation* operation);
void GenerateWideAtomicStore(Register addr, uint32_t offset,
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index d75779cef6..2d2d8109a3 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -1950,6 +1950,7 @@ static bool CanGenerateConditionalMove(const Location& out, const Location& src)
vixl32::Label* CodeGeneratorARMVIXL::GetFinalLabel(HInstruction* instruction,
vixl32::Label* final_label) {
DCHECK(!instruction->IsControlFlow() && !instruction->IsSuspendCheck());
+ DCHECK(!instruction->IsInvoke() || !instruction->GetLocations()->CanCall());
const HBasicBlock* const block = instruction->GetBlock();
const HLoopInformation* const info = block->GetLoopInformation();
@@ -2925,16 +2926,20 @@ void InstructionCodeGeneratorARMVIXL::HandleCondition(HCondition* cond) {
// Convert the jumps into the result.
vixl32::Label done_label;
+ vixl32::Label* final_label = codegen_->GetFinalLabel(cond, &done_label);
// False case: result = 0.
__ Bind(&false_label);
__ Mov(out, 0);
- __ B(&done_label);
+ __ B(final_label);
// True case: result = 1.
__ Bind(&true_label);
__ Mov(out, 1);
- __ Bind(&done_label);
+
+ if (done_label.IsReferenced()) {
+ __ Bind(&done_label);
+ }
}
void LocationsBuilderARMVIXL::VisitEqual(HEqual* comp) {
@@ -4447,6 +4452,7 @@ void InstructionCodeGeneratorARMVIXL::HandleLongRotate(HRor* ror) {
vixl32::Register shift_left = RegisterFrom(locations->GetTemp(1));
vixl32::Label end;
vixl32::Label shift_by_32_plus_shift_right;
+ vixl32::Label* final_label = codegen_->GetFinalLabel(ror, &end);
__ And(shift_right, RegisterFrom(rhs), 0x1F);
__ Lsrs(shift_left, RegisterFrom(rhs), 6);
@@ -4461,7 +4467,7 @@ void InstructionCodeGeneratorARMVIXL::HandleLongRotate(HRor* ror) {
__ Lsl(out_reg_lo, in_reg_lo, shift_left);
__ Lsr(shift_left, in_reg_hi, shift_right);
__ Add(out_reg_lo, out_reg_lo, shift_left);
- __ B(&end);
+ __ B(final_label);
__ Bind(&shift_by_32_plus_shift_right); // Shift by 32+shift_right.
// out_reg_hi = (reg_hi >> shift_right) | (reg_lo << shift_left).
@@ -4473,7 +4479,9 @@ void InstructionCodeGeneratorARMVIXL::HandleLongRotate(HRor* ror) {
__ Lsl(shift_right, in_reg_hi, shift_left);
__ Add(out_reg_lo, out_reg_lo, shift_right);
- __ Bind(&end);
+ if (end.IsReferenced()) {
+ __ Bind(&end);
+ }
}
}
@@ -4906,6 +4914,7 @@ void InstructionCodeGeneratorARMVIXL::VisitCompare(HCompare* compare) {
Location right = locations->InAt(1);
vixl32::Label less, greater, done;
+ vixl32::Label* final_label = codegen_->GetFinalLabel(compare, &done);
Primitive::Type type = compare->InputAt(0)->GetType();
vixl32::Condition less_cond = vixl32::Condition(kNone);
switch (type) {
@@ -4944,17 +4953,19 @@ void InstructionCodeGeneratorARMVIXL::VisitCompare(HCompare* compare) {
UNREACHABLE();
}
- __ B(eq, &done, /* far_target */ false);
+ __ B(eq, final_label, /* far_target */ false);
__ B(less_cond, &less, /* far_target */ false);
__ Bind(&greater);
__ Mov(out, 1);
- __ B(&done);
+ __ B(final_label);
__ Bind(&less);
__ Mov(out, -1);
- __ Bind(&done);
+ if (done.IsReferenced()) {
+ __ Bind(&done);
+ }
}
void LocationsBuilderARMVIXL::VisitPhi(HPhi* instruction) {
@@ -5746,6 +5757,7 @@ void InstructionCodeGeneratorARMVIXL::VisitArrayGet(HArrayGet* instruction) {
int32_t const_index = Int32ConstantFrom(index);
if (maybe_compressed_char_at) {
vixl32::Label uncompressed_load, done;
+ vixl32::Label* final_label = codegen_->GetFinalLabel(instruction, &done);
__ Lsrs(length, length, 1u); // LSRS has a 16-bit encoding, TST (immediate) does not.
static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
"Expecting 0=compressed, 1=uncompressed");
@@ -5754,13 +5766,15 @@ void InstructionCodeGeneratorARMVIXL::VisitArrayGet(HArrayGet* instruction) {
RegisterFrom(out_loc),
obj,
data_offset + const_index);
- __ B(&done);
+ __ B(final_label);
__ Bind(&uncompressed_load);
GetAssembler()->LoadFromOffset(GetLoadOperandType(Primitive::kPrimChar),
RegisterFrom(out_loc),
obj,
data_offset + (const_index << 1));
- __ Bind(&done);
+ if (done.IsReferenced()) {
+ __ Bind(&done);
+ }
} else {
uint32_t full_offset = data_offset + (const_index << Primitive::ComponentSizeShift(type));
@@ -5785,15 +5799,18 @@ void InstructionCodeGeneratorARMVIXL::VisitArrayGet(HArrayGet* instruction) {
}
if (maybe_compressed_char_at) {
vixl32::Label uncompressed_load, done;
+ vixl32::Label* final_label = codegen_->GetFinalLabel(instruction, &done);
__ Lsrs(length, length, 1u); // LSRS has a 16-bit encoding, TST (immediate) does not.
static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
"Expecting 0=compressed, 1=uncompressed");
__ B(cs, &uncompressed_load, /* far_target */ false);
__ Ldrb(RegisterFrom(out_loc), MemOperand(temp, RegisterFrom(index), vixl32::LSL, 0));
- __ B(&done);
+ __ B(final_label);
__ Bind(&uncompressed_load);
__ Ldrh(RegisterFrom(out_loc), MemOperand(temp, RegisterFrom(index), vixl32::LSL, 1));
- __ Bind(&done);
+ if (done.IsReferenced()) {
+ __ Bind(&done);
+ }
} else {
codegen_->LoadFromShiftedRegOffset(type, out_loc, temp, RegisterFrom(index));
}
@@ -6032,6 +6049,7 @@ void InstructionCodeGeneratorARMVIXL::VisitArraySet(HArraySet* instruction) {
uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
vixl32::Label done;
+ vixl32::Label* final_label = codegen_->GetFinalLabel(instruction, &done);
SlowPathCodeARMVIXL* slow_path = nullptr;
if (may_need_runtime_call_for_type_check) {
@@ -6054,7 +6072,7 @@ void InstructionCodeGeneratorARMVIXL::VisitArraySet(HArraySet* instruction) {
// TODO(VIXL): Use a scope to ensure we record the pc info immediately after the preceding
// store instruction.
codegen_->MaybeRecordImplicitNullCheck(instruction);
- __ B(&done);
+ __ B(final_label);
__ Bind(&non_zero);
}
@@ -7062,6 +7080,7 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction)
uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
vixl32::Label done, zero;
+ vixl32::Label* final_label = codegen_->GetFinalLabel(instruction, &done);
SlowPathCodeARMVIXL* slow_path = nullptr;
// Return 0 if `obj` is null.
@@ -7083,7 +7102,7 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction)
// Classes must be equal for the instanceof to succeed.
__ B(ne, &zero, /* far_target */ false);
__ Mov(out, 1);
- __ B(&done);
+ __ B(final_label);
break;
}
@@ -7106,12 +7125,12 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction)
maybe_temp_loc,
kCompilerReadBarrierOption);
// If `out` is null, we use it for the result, and jump to `done`.
- __ CompareAndBranchIfZero(out, &done, /* far_target */ false);
+ __ CompareAndBranchIfZero(out, final_label, /* far_target */ false);
__ Cmp(out, cls);
__ B(ne, &loop, /* far_target */ false);
__ Mov(out, 1);
if (zero.IsReferenced()) {
- __ B(&done);
+ __ B(final_label);
}
break;
}
@@ -7137,11 +7156,11 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction)
kCompilerReadBarrierOption);
__ CompareAndBranchIfNonZero(out, &loop);
// If `out` is null, we use it for the result, and jump to `done`.
- __ B(&done);
+ __ B(final_label);
__ Bind(&success);
__ Mov(out, 1);
if (zero.IsReferenced()) {
- __ B(&done);
+ __ B(final_label);
}
break;
}
@@ -7166,13 +7185,13 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction)
maybe_temp_loc,
kCompilerReadBarrierOption);
// If `out` is null, we use it for the result, and jump to `done`.
- __ CompareAndBranchIfZero(out, &done, /* far_target */ false);
+ __ CompareAndBranchIfZero(out, final_label, /* far_target */ false);
GetAssembler()->LoadFromOffset(kLoadUnsignedHalfword, out, out, primitive_offset);
static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
__ CompareAndBranchIfNonZero(out, &zero, /* far_target */ false);
__ Bind(&exact_check);
__ Mov(out, 1);
- __ B(&done);
+ __ B(final_label);
break;
}
@@ -7193,7 +7212,7 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction)
__ B(ne, slow_path->GetEntryLabel());
__ Mov(out, 1);
if (zero.IsReferenced()) {
- __ B(&done);
+ __ B(final_label);
}
break;
}
@@ -7224,7 +7243,7 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction)
codegen_->AddSlowPath(slow_path);
__ B(slow_path->GetEntryLabel());
if (zero.IsReferenced()) {
- __ B(&done);
+ __ B(final_label);
}
break;
}
@@ -7310,9 +7329,10 @@ void InstructionCodeGeneratorARMVIXL::VisitCheckCast(HCheckCast* instruction) {
codegen_->AddSlowPath(type_check_slow_path);
vixl32::Label done;
+ vixl32::Label* final_label = codegen_->GetFinalLabel(instruction, &done);
// Avoid null check if we know obj is not null.
if (instruction->MustDoNullCheck()) {
- __ CompareAndBranchIfZero(obj, &done, /* far_target */ false);
+ __ CompareAndBranchIfZero(obj, final_label, /* far_target */ false);
}
switch (type_check_kind) {
@@ -7376,7 +7396,7 @@ void InstructionCodeGeneratorARMVIXL::VisitCheckCast(HCheckCast* instruction) {
vixl32::Label loop;
__ Bind(&loop);
__ Cmp(temp, cls);
- __ B(eq, &done, /* far_target */ false);
+ __ B(eq, final_label, /* far_target */ false);
// /* HeapReference<Class> */ temp = temp->super_class_
GenerateReferenceLoadOneRegister(instruction,
@@ -7404,7 +7424,7 @@ void InstructionCodeGeneratorARMVIXL::VisitCheckCast(HCheckCast* instruction) {
// Do an exact check.
__ Cmp(temp, cls);
- __ B(eq, &done, /* far_target */ false);
+ __ B(eq, final_label, /* far_target */ false);
// Otherwise, we need to check that the object's class is a non-primitive array.
// /* HeapReference<Class> */ temp = temp->component_type_
@@ -7472,7 +7492,9 @@ void InstructionCodeGeneratorARMVIXL::VisitCheckCast(HCheckCast* instruction) {
break;
}
}
- __ Bind(&done);
+ if (done.IsReferenced()) {
+ __ Bind(&done);
+ }
__ Bind(type_check_slow_path->GetExitLabel());
}
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 0b50619a66..958c1a6fdb 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -183,10 +183,13 @@ class SuspendCheckSlowPathX86 : public SlowPathCode {
: SlowPathCode(instruction), successor_(successor) {}
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ LocationSummary* locations = instruction_->GetLocations();
CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
__ Bind(GetEntryLabel());
+ SaveLiveRegisters(codegen, locations); // only saves full width XMM for SIMD
x86_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this);
CheckEntrypointTypes<kQuickTestSuspend, void, void>();
+ RestoreLiveRegisters(codegen, locations); // only saves full width XMM for SIMD
if (successor_ == nullptr) {
__ jmp(GetReturnLabel());
} else {
@@ -963,12 +966,20 @@ size_t CodeGeneratorX86::RestoreCoreRegister(size_t stack_index, uint32_t reg_id
}
size_t CodeGeneratorX86::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
- __ movsd(Address(ESP, stack_index), XmmRegister(reg_id));
+ if (GetGraph()->HasSIMD()) {
+ __ movupd(Address(ESP, stack_index), XmmRegister(reg_id));
+ } else {
+ __ movsd(Address(ESP, stack_index), XmmRegister(reg_id));
+ }
return GetFloatingPointSpillSlotSize();
}
size_t CodeGeneratorX86::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
- __ movsd(XmmRegister(reg_id), Address(ESP, stack_index));
+ if (GetGraph()->HasSIMD()) {
+ __ movupd(XmmRegister(reg_id), Address(ESP, stack_index));
+ } else {
+ __ movsd(XmmRegister(reg_id), Address(ESP, stack_index));
+ }
return GetFloatingPointSpillSlotSize();
}
@@ -5699,7 +5710,12 @@ void InstructionCodeGeneratorX86::VisitParallelMove(HParallelMove* instruction)
void LocationsBuilderX86::VisitSuspendCheck(HSuspendCheck* instruction) {
LocationSummary* locations =
new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath);
- locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
+ // In suspend check slow path, usually there are no caller-save registers at all.
+ // If SIMD instructions are present, however, we force spilling all live SIMD
+ // registers in full width (since the runtime only saves/restores lower part).
+ locations->SetCustomSlowPathCallerSaves(GetGraph()->HasSIMD()
+ ? RegisterSet::AllFpu()
+ : RegisterSet::Empty());
}
void InstructionCodeGeneratorX86::VisitSuspendCheck(HSuspendCheck* instruction) {
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index 65ee383b54..ca3a9eadd2 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -348,8 +348,9 @@ class CodeGeneratorX86 : public CodeGenerator {
}
size_t GetFloatingPointSpillSlotSize() const OVERRIDE {
- // 8 bytes == 2 words for each spill.
- return 2 * kX86WordSize;
+ return GetGraph()->HasSIMD()
+ ? 4 * kX86WordSize // 16 bytes == 4 words for each spill
+ : 2 * kX86WordSize; // 8 bytes == 2 words for each spill
}
HGraphVisitor* GetLocationBuilder() OVERRIDE {
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 08f1adfcff..c106d9b06e 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -140,10 +140,13 @@ class SuspendCheckSlowPathX86_64 : public SlowPathCode {
: SlowPathCode(instruction), successor_(successor) {}
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ LocationSummary* locations = instruction_->GetLocations();
CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
__ Bind(GetEntryLabel());
+ SaveLiveRegisters(codegen, locations); // only saves full width XMM for SIMD
x86_64_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this);
CheckEntrypointTypes<kQuickTestSuspend, void, void>();
+ RestoreLiveRegisters(codegen, locations); // only saves full width XMM for SIMD
if (successor_ == nullptr) {
__ jmp(GetReturnLabel());
} else {
@@ -1158,13 +1161,21 @@ size_t CodeGeneratorX86_64::RestoreCoreRegister(size_t stack_index, uint32_t reg
}
size_t CodeGeneratorX86_64::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
- __ movsd(Address(CpuRegister(RSP), stack_index), XmmRegister(reg_id));
- return kX86_64WordSize;
+ if (GetGraph()->HasSIMD()) {
+ __ movupd(Address(CpuRegister(RSP), stack_index), XmmRegister(reg_id));
+ } else {
+ __ movsd(Address(CpuRegister(RSP), stack_index), XmmRegister(reg_id));
+ }
+ return GetFloatingPointSpillSlotSize();
}
size_t CodeGeneratorX86_64::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
- __ movsd(XmmRegister(reg_id), Address(CpuRegister(RSP), stack_index));
- return kX86_64WordSize;
+ if (GetGraph()->HasSIMD()) {
+ __ movupd(XmmRegister(reg_id), Address(CpuRegister(RSP), stack_index));
+ } else {
+ __ movsd(XmmRegister(reg_id), Address(CpuRegister(RSP), stack_index));
+ }
+ return GetFloatingPointSpillSlotSize();
}
void CodeGeneratorX86_64::InvokeRuntime(QuickEntrypointEnum entrypoint,
@@ -5152,7 +5163,12 @@ void InstructionCodeGeneratorX86_64::VisitParallelMove(HParallelMove* instructio
void LocationsBuilderX86_64::VisitSuspendCheck(HSuspendCheck* instruction) {
LocationSummary* locations =
new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath);
- locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
+ // In suspend check slow path, usually there are no caller-save registers at all.
+ // If SIMD instructions are present, however, we force spilling all live SIMD
+ // registers in full width (since the runtime only saves/restores lower part).
+ locations->SetCustomSlowPathCallerSaves(GetGraph()->HasSIMD()
+ ? RegisterSet::AllFpu()
+ : RegisterSet::Empty());
}
void InstructionCodeGeneratorX86_64::VisitSuspendCheck(HSuspendCheck* instruction) {
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index 376c3ce381..c8336dabd9 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -326,7 +326,9 @@ class CodeGeneratorX86_64 : public CodeGenerator {
}
size_t GetFloatingPointSpillSlotSize() const OVERRIDE {
- return kX86_64WordSize;
+ return GetGraph()->HasSIMD()
+ ? 2 * kX86_64WordSize // 16 bytes == 2 x86_64 words for each spill
+ : 1 * kX86_64WordSize; // 8 bytes == 1 x86_64 words for each spill
}
HGraphVisitor* GetLocationBuilder() OVERRIDE {
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index 98b80f5d3c..1006a776f0 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -270,9 +270,11 @@ static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
}
-static void GenNumberOfLeadingZeros(LocationSummary* locations,
+static void GenNumberOfLeadingZeros(HInvoke* invoke,
Primitive::Type type,
- ArmAssembler* assembler) {
+ CodeGeneratorARM* codegen) {
+ ArmAssembler* assembler = codegen->GetAssembler();
+ LocationSummary* locations = invoke->GetLocations();
Location in = locations->InAt(0);
Register out = locations->Out().AsRegister<Register>();
@@ -282,11 +284,14 @@ static void GenNumberOfLeadingZeros(LocationSummary* locations,
Register in_reg_lo = in.AsRegisterPairLow<Register>();
Register in_reg_hi = in.AsRegisterPairHigh<Register>();
Label end;
+ Label* final_label = codegen->GetFinalLabel(invoke, &end);
__ clz(out, in_reg_hi);
- __ CompareAndBranchIfNonZero(in_reg_hi, &end);
+ __ CompareAndBranchIfNonZero(in_reg_hi, final_label);
__ clz(out, in_reg_lo);
__ AddConstant(out, 32);
- __ Bind(&end);
+ if (end.IsLinked()) {
+ __ Bind(&end);
+ }
} else {
__ clz(out, in.AsRegister<Register>());
}
@@ -297,7 +302,7 @@ void IntrinsicLocationsBuilderARM::VisitIntegerNumberOfLeadingZeros(HInvoke* inv
}
void IntrinsicCodeGeneratorARM::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
- GenNumberOfLeadingZeros(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
+ GenNumberOfLeadingZeros(invoke, Primitive::kPrimInt, codegen_);
}
void IntrinsicLocationsBuilderARM::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
@@ -309,27 +314,32 @@ void IntrinsicLocationsBuilderARM::VisitLongNumberOfLeadingZeros(HInvoke* invoke
}
void IntrinsicCodeGeneratorARM::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
- GenNumberOfLeadingZeros(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
+ GenNumberOfLeadingZeros(invoke, Primitive::kPrimLong, codegen_);
}
-static void GenNumberOfTrailingZeros(LocationSummary* locations,
+static void GenNumberOfTrailingZeros(HInvoke* invoke,
Primitive::Type type,
- ArmAssembler* assembler) {
+ CodeGeneratorARM* codegen) {
DCHECK((type == Primitive::kPrimInt) || (type == Primitive::kPrimLong));
+ ArmAssembler* assembler = codegen->GetAssembler();
+ LocationSummary* locations = invoke->GetLocations();
Register out = locations->Out().AsRegister<Register>();
if (type == Primitive::kPrimLong) {
Register in_reg_lo = locations->InAt(0).AsRegisterPairLow<Register>();
Register in_reg_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
Label end;
+ Label* final_label = codegen->GetFinalLabel(invoke, &end);
__ rbit(out, in_reg_lo);
__ clz(out, out);
- __ CompareAndBranchIfNonZero(in_reg_lo, &end);
+ __ CompareAndBranchIfNonZero(in_reg_lo, final_label);
__ rbit(out, in_reg_hi);
__ clz(out, out);
__ AddConstant(out, 32);
- __ Bind(&end);
+ if (end.IsLinked()) {
+ __ Bind(&end);
+ }
} else {
Register in = locations->InAt(0).AsRegister<Register>();
__ rbit(out, in);
@@ -346,7 +356,7 @@ void IntrinsicLocationsBuilderARM::VisitIntegerNumberOfTrailingZeros(HInvoke* in
}
void IntrinsicCodeGeneratorARM::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
- GenNumberOfTrailingZeros(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
+ GenNumberOfTrailingZeros(invoke, Primitive::kPrimInt, codegen_);
}
void IntrinsicLocationsBuilderARM::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
@@ -358,7 +368,7 @@ void IntrinsicLocationsBuilderARM::VisitLongNumberOfTrailingZeros(HInvoke* invok
}
void IntrinsicCodeGeneratorARM::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
- GenNumberOfTrailingZeros(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
+ GenNumberOfTrailingZeros(invoke, Primitive::kPrimLong, codegen_);
}
static void MathAbsFP(LocationSummary* locations, bool is64bit, ArmAssembler* assembler) {
@@ -1355,6 +1365,7 @@ void IntrinsicCodeGeneratorARM::VisitStringEquals(HInvoke* invoke) {
Label end;
Label return_true;
Label return_false;
+ Label* final_label = codegen_->GetFinalLabel(invoke, &end);
// Get offsets of count, value, and class fields within a string object.
const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
@@ -1428,12 +1439,15 @@ void IntrinsicCodeGeneratorARM::VisitStringEquals(HInvoke* invoke) {
// If loop does not result in returning false, we return true.
__ Bind(&return_true);
__ LoadImmediate(out, 1);
- __ b(&end);
+ __ b(final_label);
// Return false and exit the function.
__ Bind(&return_false);
__ LoadImmediate(out, 0);
- __ Bind(&end);
+
+ if (end.IsLinked()) {
+ __ Bind(&end);
+ }
}
static void GenerateVisitStringIndexOf(HInvoke* invoke,
@@ -2491,13 +2505,14 @@ void IntrinsicCodeGeneratorARM::VisitStringGetCharsNoCheck(HInvoke* invoke) {
Register dst_ptr = locations->GetTemp(2).AsRegister<Register>();
Label done, compressed_string_loop;
+ Label* final_label = codegen_->GetFinalLabel(invoke, &done);
// dst to be copied.
__ add(dst_ptr, dstObj, ShifterOperand(data_offset));
__ add(dst_ptr, dst_ptr, ShifterOperand(dstBegin, LSL, 1));
__ subs(num_chr, srcEnd, ShifterOperand(srcBegin));
// Early out for valid zero-length retrievals.
- __ b(&done, EQ);
+ __ b(final_label, EQ);
// src range to copy.
__ add(src_ptr, srcObj, ShifterOperand(value_offset));
@@ -2534,7 +2549,7 @@ void IntrinsicCodeGeneratorARM::VisitStringGetCharsNoCheck(HInvoke* invoke) {
__ b(&loop, GE);
__ adds(num_chr, num_chr, ShifterOperand(4));
- __ b(&done, EQ);
+ __ b(final_label, EQ);
// Main loop for < 4 character case and remainder handling. Loads and stores one
// 16-bit Java character at a time.
@@ -2545,7 +2560,7 @@ void IntrinsicCodeGeneratorARM::VisitStringGetCharsNoCheck(HInvoke* invoke) {
__ b(&remainder, GT);
if (mirror::kUseStringCompression) {
- __ b(&done);
+ __ b(final_label);
const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte);
DCHECK_EQ(c_char_size, 1u);
@@ -2559,7 +2574,9 @@ void IntrinsicCodeGeneratorARM::VisitStringGetCharsNoCheck(HInvoke* invoke) {
__ b(&compressed_string_loop, GT);
}
- __ Bind(&done);
+ if (done.IsLinked()) {
+ __ Bind(&done);
+ }
}
void IntrinsicLocationsBuilderARM::VisitFloatIsInfinite(HInvoke* invoke) {
diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc
index 19ff49c6ce..b25bad7170 100644
--- a/compiler/optimizing/intrinsics_arm_vixl.cc
+++ b/compiler/optimizing/intrinsics_arm_vixl.cc
@@ -333,9 +333,11 @@ static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
}
-static void GenNumberOfLeadingZeros(LocationSummary* locations,
+static void GenNumberOfLeadingZeros(HInvoke* invoke,
Primitive::Type type,
- ArmVIXLAssembler* assembler) {
+ CodeGeneratorARMVIXL* codegen) {
+ ArmVIXLAssembler* assembler = codegen->GetAssembler();
+ LocationSummary* locations = invoke->GetLocations();
Location in = locations->InAt(0);
vixl32::Register out = RegisterFrom(locations->Out());
@@ -345,11 +347,14 @@ static void GenNumberOfLeadingZeros(LocationSummary* locations,
vixl32::Register in_reg_lo = LowRegisterFrom(in);
vixl32::Register in_reg_hi = HighRegisterFrom(in);
vixl32::Label end;
+ vixl32::Label* final_label = codegen->GetFinalLabel(invoke, &end);
__ Clz(out, in_reg_hi);
- __ CompareAndBranchIfNonZero(in_reg_hi, &end, /* far_target */ false);
+ __ CompareAndBranchIfNonZero(in_reg_hi, final_label, /* far_target */ false);
__ Clz(out, in_reg_lo);
__ Add(out, out, 32);
- __ Bind(&end);
+ if (end.IsReferenced()) {
+ __ Bind(&end);
+ }
} else {
__ Clz(out, RegisterFrom(in));
}
@@ -360,7 +365,7 @@ void IntrinsicLocationsBuilderARMVIXL::VisitIntegerNumberOfLeadingZeros(HInvoke*
}
void IntrinsicCodeGeneratorARMVIXL::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
- GenNumberOfLeadingZeros(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
+ GenNumberOfLeadingZeros(invoke, Primitive::kPrimInt, codegen_);
}
void IntrinsicLocationsBuilderARMVIXL::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
@@ -372,27 +377,32 @@ void IntrinsicLocationsBuilderARMVIXL::VisitLongNumberOfLeadingZeros(HInvoke* in
}
void IntrinsicCodeGeneratorARMVIXL::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
- GenNumberOfLeadingZeros(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
+ GenNumberOfLeadingZeros(invoke, Primitive::kPrimLong, codegen_);
}
-static void GenNumberOfTrailingZeros(LocationSummary* locations,
+static void GenNumberOfTrailingZeros(HInvoke* invoke,
Primitive::Type type,
- ArmVIXLAssembler* assembler) {
+ CodeGeneratorARMVIXL* codegen) {
DCHECK((type == Primitive::kPrimInt) || (type == Primitive::kPrimLong));
+ ArmVIXLAssembler* assembler = codegen->GetAssembler();
+ LocationSummary* locations = invoke->GetLocations();
vixl32::Register out = RegisterFrom(locations->Out());
if (type == Primitive::kPrimLong) {
vixl32::Register in_reg_lo = LowRegisterFrom(locations->InAt(0));
vixl32::Register in_reg_hi = HighRegisterFrom(locations->InAt(0));
vixl32::Label end;
+ vixl32::Label* final_label = codegen->GetFinalLabel(invoke, &end);
__ Rbit(out, in_reg_lo);
__ Clz(out, out);
- __ CompareAndBranchIfNonZero(in_reg_lo, &end, /* far_target */ false);
+ __ CompareAndBranchIfNonZero(in_reg_lo, final_label, /* far_target */ false);
__ Rbit(out, in_reg_hi);
__ Clz(out, out);
__ Add(out, out, 32);
- __ Bind(&end);
+ if (end.IsReferenced()) {
+ __ Bind(&end);
+ }
} else {
vixl32::Register in = RegisterFrom(locations->InAt(0));
__ Rbit(out, in);
@@ -409,7 +419,7 @@ void IntrinsicLocationsBuilderARMVIXL::VisitIntegerNumberOfTrailingZeros(HInvoke
}
void IntrinsicCodeGeneratorARMVIXL::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
- GenNumberOfTrailingZeros(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
+ GenNumberOfTrailingZeros(invoke, Primitive::kPrimInt, codegen_);
}
void IntrinsicLocationsBuilderARMVIXL::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
@@ -421,7 +431,7 @@ void IntrinsicLocationsBuilderARMVIXL::VisitLongNumberOfTrailingZeros(HInvoke* i
}
void IntrinsicCodeGeneratorARMVIXL::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
- GenNumberOfTrailingZeros(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
+ GenNumberOfTrailingZeros(invoke, Primitive::kPrimLong, codegen_);
}
static void MathAbsFP(HInvoke* invoke, ArmVIXLAssembler* assembler) {
@@ -502,7 +512,8 @@ void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsLong(HInvoke* invoke) {
GenAbsInteger(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
}
-static void GenMinMaxFloat(HInvoke* invoke, bool is_min, ArmVIXLAssembler* assembler) {
+static void GenMinMaxFloat(HInvoke* invoke, bool is_min, CodeGeneratorARMVIXL* codegen) {
+ ArmVIXLAssembler* assembler = codegen->GetAssembler();
Location op1_loc = invoke->GetLocations()->InAt(0);
Location op2_loc = invoke->GetLocations()->InAt(1);
Location out_loc = invoke->GetLocations()->Out();
@@ -520,6 +531,7 @@ static void GenMinMaxFloat(HInvoke* invoke, bool is_min, ArmVIXLAssembler* assem
const vixl32::Register temp1 = temps.Acquire();
vixl32::Register temp2 = RegisterFrom(invoke->GetLocations()->GetTemp(0));
vixl32::Label nan, done;
+ vixl32::Label* final_label = codegen->GetFinalLabel(invoke, &done);
DCHECK(op1.Is(out));
@@ -536,7 +548,8 @@ static void GenMinMaxFloat(HInvoke* invoke, bool is_min, ArmVIXLAssembler* assem
__ it(cond);
__ vmov(cond, F32, out, op2);
}
- __ B(ne, &done, /* far_target */ false); // for <>(not equal), we've done min/max calculation.
+ // for <>(not equal), we've done min/max calculation.
+ __ B(ne, final_label, /* far_target */ false);
// handle op1 == op2, max(+0.0,-0.0), min(+0.0,-0.0).
__ Vmov(temp1, op1);
@@ -547,14 +560,16 @@ static void GenMinMaxFloat(HInvoke* invoke, bool is_min, ArmVIXLAssembler* assem
__ And(temp1, temp1, temp2);
}
__ Vmov(out, temp1);
- __ B(&done);
+ __ B(final_label);
// handle NaN input.
__ Bind(&nan);
__ Movt(temp1, High16Bits(kNanFloat)); // 0x7FC0xxxx is a NaN.
__ Vmov(out, temp1);
- __ Bind(&done);
+ if (done.IsReferenced()) {
+ __ Bind(&done);
+ }
}
static void CreateFPFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
@@ -572,7 +587,7 @@ void IntrinsicLocationsBuilderARMVIXL::VisitMathMinFloatFloat(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorARMVIXL::VisitMathMinFloatFloat(HInvoke* invoke) {
- GenMinMaxFloat(invoke, /* is_min */ true, GetAssembler());
+ GenMinMaxFloat(invoke, /* is_min */ true, codegen_);
}
void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxFloatFloat(HInvoke* invoke) {
@@ -581,10 +596,11 @@ void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxFloatFloat(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxFloatFloat(HInvoke* invoke) {
- GenMinMaxFloat(invoke, /* is_min */ false, GetAssembler());
+ GenMinMaxFloat(invoke, /* is_min */ false, codegen_);
}
-static void GenMinMaxDouble(HInvoke* invoke, bool is_min, ArmVIXLAssembler* assembler) {
+static void GenMinMaxDouble(HInvoke* invoke, bool is_min, CodeGeneratorARMVIXL* codegen) {
+ ArmVIXLAssembler* assembler = codegen->GetAssembler();
Location op1_loc = invoke->GetLocations()->InAt(0);
Location op2_loc = invoke->GetLocations()->InAt(1);
Location out_loc = invoke->GetLocations()->Out();
@@ -599,6 +615,7 @@ static void GenMinMaxDouble(HInvoke* invoke, bool is_min, ArmVIXLAssembler* asse
vixl32::DRegister op2 = DRegisterFrom(op2_loc);
vixl32::DRegister out = OutputDRegister(invoke);
vixl32::Label handle_nan_eq, done;
+ vixl32::Label* final_label = codegen->GetFinalLabel(invoke, &done);
DCHECK(op1.Is(out));
@@ -615,19 +632,22 @@ static void GenMinMaxDouble(HInvoke* invoke, bool is_min, ArmVIXLAssembler* asse
__ it(cond);
__ vmov(cond, F64, out, op2);
}
- __ B(ne, &done, /* far_target */ false); // for <>(not equal), we've done min/max calculation.
+ // for <>(not equal), we've done min/max calculation.
+ __ B(ne, final_label, /* far_target */ false);
// handle op1 == op2, max(+0.0,-0.0).
if (!is_min) {
__ Vand(F64, out, op1, op2);
- __ B(&done);
+ __ B(final_label);
}
// handle op1 == op2, min(+0.0,-0.0), NaN input.
__ Bind(&handle_nan_eq);
__ Vorr(F64, out, op1, op2); // assemble op1/-0.0/NaN.
- __ Bind(&done);
+ if (done.IsReferenced()) {
+ __ Bind(&done);
+ }
}
void IntrinsicLocationsBuilderARMVIXL::VisitMathMinDoubleDouble(HInvoke* invoke) {
@@ -635,7 +655,7 @@ void IntrinsicLocationsBuilderARMVIXL::VisitMathMinDoubleDouble(HInvoke* invoke)
}
void IntrinsicCodeGeneratorARMVIXL::VisitMathMinDoubleDouble(HInvoke* invoke) {
- GenMinMaxDouble(invoke, /* is_min */ true , GetAssembler());
+ GenMinMaxDouble(invoke, /* is_min */ true , codegen_);
}
void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxDoubleDouble(HInvoke* invoke) {
@@ -643,7 +663,7 @@ void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxDoubleDouble(HInvoke* invoke)
}
void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxDoubleDouble(HInvoke* invoke) {
- GenMinMaxDouble(invoke, /* is_min */ false, GetAssembler());
+ GenMinMaxDouble(invoke, /* is_min */ false, codegen_);
}
static void GenMinMaxLong(HInvoke* invoke, bool is_min, ArmVIXLAssembler* assembler) {
@@ -1670,6 +1690,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringEquals(HInvoke* invoke) {
vixl32::Label end;
vixl32::Label return_true;
vixl32::Label return_false;
+ vixl32::Label* final_label = codegen_->GetFinalLabel(invoke, &end);
// Get offsets of count, value, and class fields within a string object.
const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
@@ -1746,12 +1767,15 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringEquals(HInvoke* invoke) {
// If loop does not result in returning false, we return true.
__ Bind(&return_true);
__ Mov(out, 1);
- __ B(&end);
+ __ B(final_label);
// Return false and exit the function.
__ Bind(&return_false);
__ Mov(out, 0);
- __ Bind(&end);
+
+ if (end.IsReferenced()) {
+ __ Bind(&end);
+ }
}
static void GenerateVisitStringIndexOf(HInvoke* invoke,
@@ -2789,13 +2813,14 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke)
vixl32::Register dst_ptr = RegisterFrom(locations->GetTemp(2));
vixl32::Label done, compressed_string_loop;
+ vixl32::Label* final_label = codegen_->GetFinalLabel(invoke, &done);
// dst to be copied.
__ Add(dst_ptr, dstObj, data_offset);
__ Add(dst_ptr, dst_ptr, Operand(dstBegin, vixl32::LSL, 1));
__ Subs(num_chr, srcEnd, srcBegin);
// Early out for valid zero-length retrievals.
- __ B(eq, &done, /* far_target */ false);
+ __ B(eq, final_label, /* far_target */ false);
// src range to copy.
__ Add(src_ptr, srcObj, value_offset);
@@ -2839,7 +2864,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke)
__ B(ge, &loop, /* far_target */ false);
__ Adds(num_chr, num_chr, 4);
- __ B(eq, &done, /* far_target */ false);
+ __ B(eq, final_label, /* far_target */ false);
// Main loop for < 4 character case and remainder handling. Loads and stores one
// 16-bit Java character at a time.
@@ -2852,7 +2877,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke)
__ B(gt, &remainder, /* far_target */ false);
if (mirror::kUseStringCompression) {
- __ B(&done);
+ __ B(final_label);
const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte);
DCHECK_EQ(c_char_size, 1u);
@@ -2868,7 +2893,9 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke)
__ B(gt, &compressed_string_loop, /* far_target */ false);
}
- __ Bind(&done);
+ if (done.IsReferenced()) {
+ __ Bind(&done);
+ }
}
void IntrinsicLocationsBuilderARMVIXL::VisitFloatIsInfinite(HInvoke* invoke) {
diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h
index 091b58a63d..d391f6913c 100644
--- a/compiler/optimizing/locations.h
+++ b/compiler/optimizing/locations.h
@@ -417,6 +417,7 @@ std::ostream& operator<<(std::ostream& os, const Location::Policy& rhs);
class RegisterSet : public ValueObject {
public:
static RegisterSet Empty() { return RegisterSet(); }
+ static RegisterSet AllFpu() { return RegisterSet(0, -1); }
void Add(Location loc) {
if (loc.IsRegister()) {
@@ -462,6 +463,7 @@ class RegisterSet : public ValueObject {
private:
RegisterSet() : core_registers_(0), floating_point_registers_(0) {}
+ RegisterSet(uint32_t core, uint32_t fp) : core_registers_(core), floating_point_registers_(fp) {}
uint32_t core_registers_;
uint32_t floating_point_registers_;
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index 020e4463d4..ec706e6694 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -2046,6 +2046,9 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) {
if (HasTryCatch()) {
outer_graph->SetHasTryCatch(true);
}
+ if (HasSIMD()) {
+ outer_graph->SetHasSIMD(true);
+ }
HInstruction* return_value = nullptr;
if (GetBlocks().size() == 3) {
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 542b218cf8..6881d8f6ae 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -323,6 +323,7 @@ class HGraph : public ArenaObject<kArenaAllocGraph> {
temporaries_vreg_slots_(0),
has_bounds_checks_(false),
has_try_catch_(false),
+ has_simd_(false),
has_loops_(false),
has_irreducible_loops_(false),
debuggable_(debuggable),
@@ -560,6 +561,9 @@ class HGraph : public ArenaObject<kArenaAllocGraph> {
bool HasTryCatch() const { return has_try_catch_; }
void SetHasTryCatch(bool value) { has_try_catch_ = value; }
+ bool HasSIMD() const { return has_simd_; }
+ void SetHasSIMD(bool value) { has_simd_ = value; }
+
bool HasLoops() const { return has_loops_; }
void SetHasLoops(bool value) { has_loops_ = value; }
@@ -652,6 +656,11 @@ class HGraph : public ArenaObject<kArenaAllocGraph> {
// false positives.
bool has_try_catch_;
+ // Flag whether SIMD instructions appear in the graph. If true, the
+ // code generators may have to be more careful spilling the wider
+ // contents of SIMD registers.
+ bool has_simd_;
+
// Flag whether there are any loops in the graph. We can skip loop
// optimization if it's false. It's only best effort to keep it up
// to date in the presence of code elimination so there might be false
diff --git a/compiler/optimizing/register_allocation_resolver.cc b/compiler/optimizing/register_allocation_resolver.cc
index 8a9c1ccaff..0d33b49fdb 100644
--- a/compiler/optimizing/register_allocation_resolver.cc
+++ b/compiler/optimizing/register_allocation_resolver.cc
@@ -299,11 +299,13 @@ void RegisterAllocationResolver::ConnectSiblings(LiveInterval* interval) {
// Currently, we spill unconditionnally the current method in the code generators.
&& !interval->GetDefinedBy()->IsCurrentMethod()) {
// We spill eagerly, so move must be at definition.
- InsertMoveAfter(interval->GetDefinedBy(),
- interval->ToLocation(),
- interval->NeedsTwoSpillSlots()
- ? Location::DoubleStackSlot(interval->GetParent()->GetSpillSlot())
- : Location::StackSlot(interval->GetParent()->GetSpillSlot()));
+ Location loc;
+ switch (interval->NumberOfSpillSlotsNeeded()) {
+ case 1: loc = Location::StackSlot(interval->GetParent()->GetSpillSlot()); break;
+ case 2: loc = Location::DoubleStackSlot(interval->GetParent()->GetSpillSlot()); break;
+ default: LOG(FATAL) << "Unexpected number of spill slots"; UNREACHABLE();
+ }
+ InsertMoveAfter(interval->GetDefinedBy(), interval->ToLocation(), loc);
}
UsePosition* use = current->GetFirstUse();
EnvUsePosition* env_use = current->GetFirstEnvironmentUse();
@@ -459,9 +461,11 @@ void RegisterAllocationResolver::ConnectSplitSiblings(LiveInterval* interval,
location_source = defined_by->GetLocations()->Out();
} else {
DCHECK(defined_by->IsCurrentMethod());
- location_source = parent->NeedsTwoSpillSlots()
- ? Location::DoubleStackSlot(parent->GetSpillSlot())
- : Location::StackSlot(parent->GetSpillSlot());
+ switch (parent->NumberOfSpillSlotsNeeded()) {
+ case 1: location_source = Location::StackSlot(parent->GetSpillSlot()); break;
+ case 2: location_source = Location::DoubleStackSlot(parent->GetSpillSlot()); break;
+ default: LOG(FATAL) << "Unexpected number of spill slots"; UNREACHABLE();
+ }
}
} else {
DCHECK(source != nullptr);
diff --git a/compiler/optimizing/register_allocator_graph_color.cc b/compiler/optimizing/register_allocator_graph_color.cc
index 9064f865c3..87f709f63d 100644
--- a/compiler/optimizing/register_allocator_graph_color.cc
+++ b/compiler/optimizing/register_allocator_graph_color.cc
@@ -1029,7 +1029,7 @@ void RegisterAllocatorGraphColor::AllocateSpillSlotForCatchPhi(HInstruction* ins
interval->SetSpillSlot(previous_phi->GetLiveInterval()->GetSpillSlot());
} else {
interval->SetSpillSlot(catch_phi_spill_slot_counter_);
- catch_phi_spill_slot_counter_ += interval->NeedsTwoSpillSlots() ? 2 : 1;
+ catch_phi_spill_slot_counter_ += interval->NumberOfSpillSlotsNeeded();
}
}
}
@@ -1996,43 +1996,48 @@ void RegisterAllocatorGraphColor::ColorSpillSlots(ArenaVector<LiveInterval*>* in
bool is_interval_beginning;
size_t position;
std::tie(position, is_interval_beginning, parent_interval) = *it;
-
- bool needs_two_slots = parent_interval->NeedsTwoSpillSlots();
+ size_t number_of_spill_slots_needed = parent_interval->NumberOfSpillSlotsNeeded();
if (is_interval_beginning) {
DCHECK(!parent_interval->HasSpillSlot());
DCHECK_EQ(position, parent_interval->GetStart());
- // Find a free stack slot.
+ // Find first available free stack slot(s).
size_t slot = 0;
- for (; taken.IsBitSet(slot) || (needs_two_slots && taken.IsBitSet(slot + 1)); ++slot) {
- // Skip taken slots.
+ for (; ; ++slot) {
+ bool found = true;
+ for (size_t s = slot, u = slot + number_of_spill_slots_needed; s < u; s++) {
+ if (taken.IsBitSet(s)) {
+ found = false;
+ break; // failure
+ }
+ }
+ if (found) {
+ break; // success
+ }
}
+
parent_interval->SetSpillSlot(slot);
- *num_stack_slots_used = std::max(*num_stack_slots_used,
- needs_two_slots ? slot + 1 : slot + 2);
- if (needs_two_slots && *num_stack_slots_used % 2 != 0) {
+ *num_stack_slots_used = std::max(*num_stack_slots_used, slot + number_of_spill_slots_needed);
+ if (number_of_spill_slots_needed > 1 && *num_stack_slots_used % 2 != 0) {
// The parallel move resolver requires that there be an even number of spill slots
// allocated for pair value types.
++(*num_stack_slots_used);
}
- taken.SetBit(slot);
- if (needs_two_slots) {
- taken.SetBit(slot + 1);
+ for (size_t s = slot, u = slot + number_of_spill_slots_needed; s < u; s++) {
+ taken.SetBit(s);
}
} else {
DCHECK_EQ(position, parent_interval->GetLastSibling()->GetEnd());
DCHECK(parent_interval->HasSpillSlot());
- // Free up the stack slot used by this interval.
+ // Free up the stack slot(s) used by this interval.
size_t slot = parent_interval->GetSpillSlot();
- DCHECK(taken.IsBitSet(slot));
- DCHECK(!needs_two_slots || taken.IsBitSet(slot + 1));
- taken.ClearBit(slot);
- if (needs_two_slots) {
- taken.ClearBit(slot + 1);
+ for (size_t s = slot, u = slot + number_of_spill_slots_needed; s < u; s++) {
+ DCHECK(taken.IsBitSet(s));
+ taken.ClearBit(s);
}
}
}
diff --git a/compiler/optimizing/register_allocator_linear_scan.cc b/compiler/optimizing/register_allocator_linear_scan.cc
index 6354e76ec8..ab8d540359 100644
--- a/compiler/optimizing/register_allocator_linear_scan.cc
+++ b/compiler/optimizing/register_allocator_linear_scan.cc
@@ -1125,36 +1125,31 @@ void RegisterAllocatorLinearScan::AllocateSpillSlotFor(LiveInterval* interval) {
LOG(FATAL) << "Unexpected type for interval " << interval->GetType();
}
- // Find an available spill slot.
+ // Find first available spill slots.
+ size_t number_of_spill_slots_needed = parent->NumberOfSpillSlotsNeeded();
size_t slot = 0;
for (size_t e = spill_slots->size(); slot < e; ++slot) {
- if ((*spill_slots)[slot] <= parent->GetStart()) {
- if (!parent->NeedsTwoSpillSlots()) {
- // One spill slot is sufficient.
- break;
- }
- if (slot == e - 1 || (*spill_slots)[slot + 1] <= parent->GetStart()) {
- // Two spill slots are available.
+ bool found = true;
+ for (size_t s = slot, u = std::min(slot + number_of_spill_slots_needed, e); s < u; s++) {
+ if ((*spill_slots)[s] > parent->GetStart()) {
+ found = false; // failure
break;
}
}
+ if (found) {
+ break; // success
+ }
}
+ // Need new spill slots?
+ size_t upper = slot + number_of_spill_slots_needed;
+ if (upper > spill_slots->size()) {
+ spill_slots->resize(upper);
+ }
+ // Set slots to end.
size_t end = interval->GetLastSibling()->GetEnd();
- if (parent->NeedsTwoSpillSlots()) {
- if (slot + 2u > spill_slots->size()) {
- // We need a new spill slot.
- spill_slots->resize(slot + 2u, end);
- }
- (*spill_slots)[slot] = end;
- (*spill_slots)[slot + 1] = end;
- } else {
- if (slot == spill_slots->size()) {
- // We need a new spill slot.
- spill_slots->push_back(end);
- } else {
- (*spill_slots)[slot] = end;
- }
+ for (size_t s = slot; s < upper; s++) {
+ (*spill_slots)[s] = end;
}
// Note that the exact spill slot location will be computed when we resolve,
@@ -1180,7 +1175,7 @@ void RegisterAllocatorLinearScan::AllocateSpillSlotForCatchPhi(HPhi* phi) {
// TODO: Reuse spill slots when intervals of phis from different catch
// blocks do not overlap.
interval->SetSpillSlot(catch_phi_spill_slots_);
- catch_phi_spill_slots_ += interval->NeedsTwoSpillSlots() ? 2 : 1;
+ catch_phi_spill_slots_ += interval->NumberOfSpillSlotsNeeded();
}
}
diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc
index e8e12e1a55..c0a045c33e 100644
--- a/compiler/optimizing/ssa_liveness_analysis.cc
+++ b/compiler/optimizing/ssa_liveness_analysis.cc
@@ -469,8 +469,8 @@ bool LiveInterval::SameRegisterKind(Location other) const {
}
}
-bool LiveInterval::NeedsTwoSpillSlots() const {
- return type_ == Primitive::kPrimLong || type_ == Primitive::kPrimDouble;
+size_t LiveInterval::NumberOfSpillSlotsNeeded() const {
+ return (type_ == Primitive::kPrimLong || type_ == Primitive::kPrimDouble) ? 2 : 1;
}
Location LiveInterval::ToLocation() const {
@@ -494,10 +494,10 @@ Location LiveInterval::ToLocation() const {
if (defined_by->IsConstant()) {
return defined_by->GetLocations()->Out();
} else if (GetParent()->HasSpillSlot()) {
- if (NeedsTwoSpillSlots()) {
- return Location::DoubleStackSlot(GetParent()->GetSpillSlot());
- } else {
- return Location::StackSlot(GetParent()->GetSpillSlot());
+ switch (NumberOfSpillSlotsNeeded()) {
+ case 1: return Location::StackSlot(GetParent()->GetSpillSlot());
+ case 2: return Location::DoubleStackSlot(GetParent()->GetSpillSlot());
+ default: LOG(FATAL) << "Unexpected number of spill slots"; UNREACHABLE();
}
} else {
return Location();
diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h
index 340d0ccefe..e9dffc1fac 100644
--- a/compiler/optimizing/ssa_liveness_analysis.h
+++ b/compiler/optimizing/ssa_liveness_analysis.h
@@ -762,9 +762,9 @@ class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> {
// Returns kNoRegister otherwise.
int FindHintAtDefinition() const;
- // Returns whether the interval needs two (Dex virtual register size `kVRegSize`)
- // slots for spilling.
- bool NeedsTwoSpillSlots() const;
+ // Returns the number of required spilling slots (measured as a multiple of the
+ // Dex virtual register size `kVRegSize`).
+ size_t NumberOfSpillSlotsNeeded() const;
bool IsFloatingPoint() const {
return type_ == Primitive::kPrimFloat || type_ == Primitive::kPrimDouble;