diff options
Diffstat (limited to 'compiler')
38 files changed, 821 insertions, 421 deletions
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc index a3e7efa559..e52dda35bb 100644 --- a/compiler/driver/compiler_driver.cc +++ b/compiler/driver/compiler_driver.cc @@ -2521,28 +2521,11 @@ class InitializeArrayClassesAndCreateConflictTablesVisitor : public ClassVisitor true); } // Create the conflict tables. - FillIMTAndConflictTables(klass); - return true; - } - - private: - void FillIMTAndConflictTables(mirror::Class* klass) SHARED_REQUIRES(Locks::mutator_lock_) { - if (!klass->ShouldHaveImt()) { - return; - } - if (visited_classes_.find(klass) != visited_classes_.end()) { - return; - } - if (klass->HasSuperClass()) { - FillIMTAndConflictTables(klass->GetSuperClass()); - } - if (!klass->IsTemp()) { + if (!klass->IsTemp() && klass->ShouldHaveEmbeddedImtAndVTable()) { Runtime::Current()->GetClassLinker()->FillIMTAndConflictTables(klass); } - visited_classes_.insert(klass); + return true; } - - std::set<mirror::Class*> visited_classes_; }; void CompilerDriver::InitializeClasses(jobject class_loader, diff --git a/compiler/elf_builder.h b/compiler/elf_builder.h index 26ab281741..7f2e1931d0 100644 --- a/compiler/elf_builder.h +++ b/compiler/elf_builder.h @@ -780,9 +780,9 @@ class ElfBuilder FINAL { EF_MIPS_PIC | EF_MIPS_CPIC | EF_MIPS_ABI_O32 | - features->AsMipsInstructionSetFeatures()->IsR6() - ? EF_MIPS_ARCH_32R6 - : EF_MIPS_ARCH_32R2); + (features->AsMipsInstructionSetFeatures()->IsR6() + ? EF_MIPS_ARCH_32R6 + : EF_MIPS_ARCH_32R2)); break; } case kMips64: { diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc index 063eb11718..da10568475 100644 --- a/compiler/image_writer.cc +++ b/compiler/image_writer.cc @@ -1232,10 +1232,9 @@ void ImageWriter::WalkFieldsInOrder(mirror::Object* obj) { } // Assign offsets for all runtime methods in the IMT since these may hold conflict tables // live. - if (as_klass->ShouldHaveImt()) { - ImTable* imt = as_klass->GetImt(target_ptr_size_); - for (size_t i = 0; i < ImTable::kSize; ++i) { - ArtMethod* imt_method = imt->Get(i, target_ptr_size_); + if (as_klass->ShouldHaveEmbeddedImtAndVTable()) { + for (size_t i = 0; i < mirror::Class::kImtSize; ++i) { + ArtMethod* imt_method = as_klass->GetEmbeddedImTableEntry(i, target_ptr_size_); DCHECK(imt_method != nullptr); if (imt_method->IsRuntimeMethod() && !IsInBootImage(imt_method) && @@ -1244,11 +1243,6 @@ void ImageWriter::WalkFieldsInOrder(mirror::Object* obj) { } } } - - if (as_klass->ShouldHaveImt()) { - ImTable* imt = as_klass->GetImt(target_ptr_size_); - TryAssignImTableOffset(imt, oat_index); - } } else if (h_obj->IsObjectArray()) { // Walk elements of an object array. int32_t length = h_obj->AsObjectArray<mirror::Object>()->GetLength(); @@ -1275,23 +1269,6 @@ bool ImageWriter::NativeRelocationAssigned(void* ptr) const { return native_object_relocations_.find(ptr) != native_object_relocations_.end(); } -void ImageWriter::TryAssignImTableOffset(ImTable* imt, size_t oat_index) { - // No offset, or already assigned. - if (imt == nullptr || IsInBootImage(imt) || NativeRelocationAssigned(imt)) { - return; - } - // If the method is a conflict method we also want to assign the conflict table offset. - ImageInfo& image_info = GetImageInfo(oat_index); - const size_t size = ImTable::SizeInBytes(target_ptr_size_); - native_object_relocations_.emplace( - imt, - NativeObjectRelocation { - oat_index, - image_info.bin_slot_sizes_[kBinImTable], - kNativeObjectRelocationTypeIMTable}); - image_info.bin_slot_sizes_[kBinImTable] += size; -} - void ImageWriter::TryAssignConflictTableOffset(ImtConflictTable* table, size_t oat_index) { // No offset, or already assigned. if (table == nullptr || NativeRelocationAssigned(table)) { @@ -1414,7 +1391,6 @@ void ImageWriter::CalculateNewObjectOffsets() { bin_offset = RoundUp(bin_offset, method_alignment); break; } - case kBinImTable: case kBinIMTConflictTable: { bin_offset = RoundUp(bin_offset, target_ptr_size_); break; @@ -1485,10 +1461,6 @@ size_t ImageWriter::ImageInfo::CreateImageSections(ImageSection* out_sections) c bin_slot_offsets_[kBinArtMethodClean], bin_slot_sizes_[kBinArtMethodClean] + bin_slot_sizes_[kBinArtMethodDirty]); - // IMT section. - ImageSection* imt_section = &out_sections[ImageHeader::kSectionImTables]; - *imt_section = ImageSection(bin_slot_offsets_[kBinImTable], bin_slot_sizes_[kBinImTable]); - // Conflict tables section. ImageSection* imt_conflict_tables_section = &out_sections[ImageHeader::kSectionIMTConflictTables]; *imt_conflict_tables_section = ImageSection(bin_slot_offsets_[kBinIMTConflictTable], @@ -1613,13 +1585,6 @@ class FixupRootVisitor : public RootVisitor { ImageWriter* const image_writer_; }; -void ImageWriter::CopyAndFixupImTable(ImTable* orig, ImTable* copy) { - for (size_t i = 0; i < ImTable::kSize; ++i) { - ArtMethod* method = orig->Get(i, target_ptr_size_); - copy->Set(i, NativeLocationInImage(method), target_ptr_size_); - } -} - void ImageWriter::CopyAndFixupImtConflictTable(ImtConflictTable* orig, ImtConflictTable* copy) { const size_t count = orig->NumEntries(target_ptr_size_); for (size_t i = 0; i < count; ++i) { @@ -1677,12 +1642,6 @@ void ImageWriter::CopyAndFixupNativeData(size_t oat_index) { case kNativeObjectRelocationTypeDexCacheArray: // Nothing to copy here, everything is done in FixupDexCache(). break; - case kNativeObjectRelocationTypeIMTable: { - ImTable* orig_imt = reinterpret_cast<ImTable*>(pair.first); - ImTable* dest_imt = reinterpret_cast<ImTable*>(dest); - CopyAndFixupImTable(orig_imt, dest_imt); - break; - } case kNativeObjectRelocationTypeIMTConflictTable: { auto* orig_table = reinterpret_cast<ImtConflictTable*>(pair.first); CopyAndFixupImtConflictTable( @@ -1891,25 +1850,13 @@ uintptr_t ImageWriter::NativeOffsetInImage(void* obj) { } template <typename T> -std::string PrettyPrint(T* ptr) SHARED_REQUIRES(Locks::mutator_lock_) { - std::ostringstream oss; - oss << ptr; - return oss.str(); -} - -template <> -std::string PrettyPrint(ArtMethod* method) SHARED_REQUIRES(Locks::mutator_lock_) { - return PrettyMethod(method); -} - -template <typename T> T* ImageWriter::NativeLocationInImage(T* obj) { if (obj == nullptr || IsInBootImage(obj)) { return obj; } else { auto it = native_object_relocations_.find(obj); - CHECK(it != native_object_relocations_.end()) << obj << " " << PrettyPrint(obj) - << " spaces " << Runtime::Current()->GetHeap()->DumpSpaces(); + CHECK(it != native_object_relocations_.end()) << obj << " spaces " + << Runtime::Current()->GetHeap()->DumpSpaces(); const NativeObjectRelocation& relocation = it->second; ImageInfo& image_info = GetImageInfo(relocation.oat_index); return reinterpret_cast<T*>(image_info.image_begin_ + relocation.offset); @@ -2263,8 +2210,6 @@ ImageWriter::Bin ImageWriter::BinTypeForNativeRelocationType(NativeObjectRelocat return kBinDexCacheArray; case kNativeObjectRelocationTypeRuntimeMethod: return kBinRuntimeMethod; - case kNativeObjectRelocationTypeIMTable: - return kBinImTable; case kNativeObjectRelocationTypeIMTConflictTable: return kBinIMTConflictTable; } diff --git a/compiler/image_writer.h b/compiler/image_writer.h index 1efdc22c0a..51976c511f 100644 --- a/compiler/image_writer.h +++ b/compiler/image_writer.h @@ -169,8 +169,6 @@ class ImageWriter FINAL { // ArtMethods may be dirty if the class has native methods or a declaring class that isn't // initialized. kBinArtMethodDirty, - // IMT (clean) - kBinImTable, // Conflict tables (clean). kBinIMTConflictTable, // Runtime methods (always clean, do not have a length prefix array). @@ -193,7 +191,6 @@ class ImageWriter FINAL { kNativeObjectRelocationTypeArtMethodDirty, kNativeObjectRelocationTypeArtMethodArrayDirty, kNativeObjectRelocationTypeRuntimeMethod, - kNativeObjectRelocationTypeIMTable, kNativeObjectRelocationTypeIMTConflictTable, kNativeObjectRelocationTypeDexCacheArray, }; @@ -404,7 +401,6 @@ class ImageWriter FINAL { void CopyAndFixupObject(mirror::Object* obj) SHARED_REQUIRES(Locks::mutator_lock_); void CopyAndFixupMethod(ArtMethod* orig, ArtMethod* copy, const ImageInfo& image_info) SHARED_REQUIRES(Locks::mutator_lock_); - void CopyAndFixupImTable(ImTable* orig, ImTable* copy) SHARED_REQUIRES(Locks::mutator_lock_); void CopyAndFixupImtConflictTable(ImtConflictTable* orig, ImtConflictTable* copy) SHARED_REQUIRES(Locks::mutator_lock_); void FixupClass(mirror::Class* orig, mirror::Class* copy) @@ -437,8 +433,6 @@ class ImageWriter FINAL { size_t oat_index) SHARED_REQUIRES(Locks::mutator_lock_); - void TryAssignImTableOffset(ImTable* imt, size_t oat_index) SHARED_REQUIRES(Locks::mutator_lock_); - // Assign the offset for an IMT conflict table. Does nothing if the table already has a native // relocation. void TryAssignConflictTableOffset(ImtConflictTable* table, size_t oat_index) diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index b9466ba212..5316d59bff 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -430,7 +430,9 @@ class ReadBarrierMarkSlowPathARM : public SlowPathCode { instruction_->IsLoadClass() || instruction_->IsLoadString() || instruction_->IsInstanceOf() || - instruction_->IsCheckCast()) + instruction_->IsCheckCast() || + ((instruction_->IsInvokeStaticOrDirect() || instruction_->IsInvokeVirtual()) && + instruction_->GetLocations()->Intrinsified())) << "Unexpected instruction in read barrier marking slow path: " << instruction_->DebugName(); @@ -493,8 +495,12 @@ class ReadBarrierForHeapReferenceSlowPathARM : public SlowPathCode { Register reg_out = out_.AsRegister<Register>(); DCHECK(locations->CanCall()); DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out)); - DCHECK(!instruction_->IsInvoke() || - (instruction_->IsInvokeStaticOrDirect() && + DCHECK(instruction_->IsInstanceFieldGet() || + instruction_->IsStaticFieldGet() || + instruction_->IsArrayGet() || + instruction_->IsInstanceOf() || + instruction_->IsCheckCast() || + ((instruction_->IsInvokeStaticOrDirect() || instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified())) << "Unexpected instruction in read barrier for heap reference slow path: " << instruction_->DebugName(); @@ -507,7 +513,7 @@ class ReadBarrierForHeapReferenceSlowPathARM : public SlowPathCode { // introduce a copy of it, `index`. Location index = index_; if (index_.IsValid()) { - // Handle `index_` for HArrayGet and intrinsic UnsafeGetObject. + // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics. if (instruction_->IsArrayGet()) { // Compute the actual memory offset and store it in `index`. Register index_reg = index_.AsRegister<Register>(); @@ -555,7 +561,11 @@ class ReadBarrierForHeapReferenceSlowPathARM : public SlowPathCode { "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); __ AddConstant(index_reg, index_reg, offset_); } else { - DCHECK(instruction_->IsInvoke()); + // In the case of the UnsafeGetObject/UnsafeGetObjectVolatile + // intrinsics, `index_` is not shifted by a scale factor of 2 + // (as in the case of ArrayGet), as it is actually an offset + // to an object field within an object. + DCHECK(instruction_->IsInvoke()) << instruction_->DebugName(); DCHECK(instruction_->GetLocations()->Intrinsified()); DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) || (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile)) @@ -1879,6 +1889,8 @@ void InstructionCodeGeneratorARM::VisitInvokeInterface(HInvokeInterface* invoke) LocationSummary* locations = invoke->GetLocations(); Register temp = locations->GetTemp(0).AsRegister<Register>(); Register hidden_reg = locations->GetTemp(1).AsRegister<Register>(); + uint32_t method_offset = mirror::Class::EmbeddedImTableEntryOffset( + invoke->GetImtIndex() % mirror::Class::kImtSize, kArmPointerSize).Uint32Value(); Location receiver = locations->InAt(0); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); @@ -1904,14 +1916,10 @@ void InstructionCodeGeneratorARM::VisitInvokeInterface(HInvokeInterface* invoke) // intact/accessible until the end of the marking phase (the // concurrent copying collector may not in the future). __ MaybeUnpoisonHeapReference(temp); - __ LoadFromOffset(kLoadWord, temp, temp, - mirror::Class::ImtPtrOffset(kArmPointerSize).Uint32Value()); - uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement( - invoke->GetImtIndex(), kArmPointerSize)); // temp = temp->GetImtEntryAt(method_offset); - __ LoadFromOffset(kLoadWord, temp, temp, method_offset); uint32_t entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmWordSize).Int32Value(); + __ LoadFromOffset(kLoadWord, temp, temp, method_offset); // LR = temp->GetEntryPoint(); __ LoadFromOffset(kLoadWord, LR, temp, entry_point); // LR(); @@ -6203,8 +6211,9 @@ void CodeGeneratorARM::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instr // /* HeapReference<Object> */ ref = *(obj + offset) Location no_index = Location::NoLocation(); + ScaleFactor no_scale_factor = TIMES_1; GenerateReferenceLoadWithBakerReadBarrier( - instruction, ref, obj, offset, no_index, temp, needs_null_check); + instruction, ref, obj, offset, no_index, no_scale_factor, temp, needs_null_check); } void CodeGeneratorARM::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction, @@ -6217,10 +6226,14 @@ void CodeGeneratorARM::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instr DCHECK(kEmitCompilerReadBarrier); DCHECK(kUseBakerReadBarrier); + static_assert( + sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); // /* HeapReference<Object> */ ref = // *(obj + data_offset + index * sizeof(HeapReference<Object>)) + ScaleFactor scale_factor = TIMES_4; GenerateReferenceLoadWithBakerReadBarrier( - instruction, ref, obj, data_offset, index, temp, needs_null_check); + instruction, ref, obj, data_offset, index, scale_factor, temp, needs_null_check); } void CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, @@ -6228,6 +6241,7 @@ void CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i Register obj, uint32_t offset, Location index, + ScaleFactor scale_factor, Location temp, bool needs_null_check) { DCHECK(kEmitCompilerReadBarrier); @@ -6282,17 +6296,22 @@ void CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i // The actual reference load. if (index.IsValid()) { - static_assert( - sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), - "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); - // /* HeapReference<Object> */ ref = - // *(obj + offset + index * sizeof(HeapReference<Object>)) + // Load types involving an "index": ArrayGet and + // UnsafeGetObject/UnsafeGetObjectVolatile intrinsics. + // /* HeapReference<Object> */ ref = *(obj + offset + (index << scale_factor)) if (index.IsConstant()) { size_t computed_offset = - (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + offset; + (index.GetConstant()->AsIntConstant()->GetValue() << scale_factor) + offset; __ LoadFromOffset(kLoadWord, ref_reg, obj, computed_offset); } else { - __ add(IP, obj, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4)); + // Handle the special case of the + // UnsafeGetObject/UnsafeGetObjectVolatile intrinsics, which use + // a register pair as index ("long offset"), of which only the low + // part contains data. + Register index_reg = index.IsRegisterPair() + ? index.AsRegisterPairLow<Register>() + : index.AsRegister<Register>(); + __ add(IP, obj, ShifterOperand(index_reg, LSL, scale_factor)); __ LoadFromOffset(kLoadWord, ref_reg, IP, offset); } } else { @@ -6940,11 +6959,8 @@ void InstructionCodeGeneratorARM::VisitClassTableGet(HClassTableGet* instruction method_offset = mirror::Class::EmbeddedVTableEntryOffset( instruction->GetIndex(), kArmPointerSize).SizeValue(); } else { - __ LoadFromOffset(kLoadWord, locations->Out().AsRegister<Register>(), - locations->InAt(0).AsRegister<Register>(), - mirror::Class::ImtPtrOffset(kArmPointerSize).Uint32Value()); - method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement( - instruction->GetIndex(), kArmPointerSize)); + method_offset = mirror::Class::EmbeddedImTableEntryOffset( + instruction->GetIndex() % mirror::Class::kImtSize, kArmPointerSize).Uint32Value(); } __ LoadFromOffset(kLoadWord, locations->Out().AsRegister<Register>(), diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h index 4fce5af8e6..477c4f18c1 100644 --- a/compiler/optimizing/code_generator_arm.h +++ b/compiler/optimizing/code_generator_arm.h @@ -472,6 +472,16 @@ class CodeGeneratorARM : public CodeGenerator { Location index, Location temp, bool needs_null_check); + // Factored implementation used by GenerateFieldLoadWithBakerReadBarrier + // and GenerateArrayLoadWithBakerReadBarrier. + void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, + Location ref, + Register obj, + uint32_t offset, + Location index, + ScaleFactor scale_factor, + Location temp, + bool needs_null_check); // Generate a read barrier for a heap reference within `instruction` // using a slow path. @@ -527,16 +537,6 @@ class CodeGeneratorARM : public CodeGenerator { void GenerateExplicitNullCheck(HNullCheck* instruction); private: - // Factored implementation of GenerateFieldLoadWithBakerReadBarrier - // and GenerateArrayLoadWithBakerReadBarrier. - void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, - Location ref, - Register obj, - uint32_t offset, - Location index, - Location temp, - bool needs_null_check); - Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, Register temp); using Uint32ToLiteralMap = ArenaSafeMap<uint32_t, Literal*>; diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 4692a4a876..fc2c2c34aa 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -598,7 +598,9 @@ class ReadBarrierMarkSlowPathARM64 : public SlowPathCodeARM64 { instruction_->IsLoadClass() || instruction_->IsLoadString() || instruction_->IsInstanceOf() || - instruction_->IsCheckCast()) + instruction_->IsCheckCast() || + ((instruction_->IsInvokeStaticOrDirect() || instruction_->IsInvokeVirtual()) && + instruction_->GetLocations()->Intrinsified())) << "Unexpected instruction in read barrier marking slow path: " << instruction_->DebugName(); @@ -661,8 +663,12 @@ class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 { Primitive::Type type = Primitive::kPrimNot; DCHECK(locations->CanCall()); DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg())); - DCHECK(!instruction_->IsInvoke() || - (instruction_->IsInvokeStaticOrDirect() && + DCHECK(instruction_->IsInstanceFieldGet() || + instruction_->IsStaticFieldGet() || + instruction_->IsArrayGet() || + instruction_->IsInstanceOf() || + instruction_->IsCheckCast() || + ((instruction_->IsInvokeStaticOrDirect() || instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified())) << "Unexpected instruction in read barrier for heap reference slow path: " << instruction_->DebugName(); @@ -680,7 +686,7 @@ class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 { // introduce a copy of it, `index`. Location index = index_; if (index_.IsValid()) { - // Handle `index_` for HArrayGet and intrinsic UnsafeGetObject. + // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics. if (instruction_->IsArrayGet()) { // Compute the actual memory offset and store it in `index`. Register index_reg = RegisterFrom(index_, Primitive::kPrimInt); @@ -728,7 +734,11 @@ class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 { "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); __ Add(index_reg, index_reg, Operand(offset_)); } else { - DCHECK(instruction_->IsInvoke()); + // In the case of the UnsafeGetObject/UnsafeGetObjectVolatile + // intrinsics, `index_` is not shifted by a scale factor of 2 + // (as in the case of ArrayGet), as it is actually an offset + // to an object field within an object. + DCHECK(instruction_->IsInvoke()) << instruction_->DebugName(); DCHECK(instruction_->GetLocations()->Intrinsified()); DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) || (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile)) @@ -3496,6 +3506,8 @@ void InstructionCodeGeneratorARM64::VisitInvokeInterface(HInvokeInterface* invok // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError. LocationSummary* locations = invoke->GetLocations(); Register temp = XRegisterFrom(locations->GetTemp(0)); + uint32_t method_offset = mirror::Class::EmbeddedImTableEntryOffset( + invoke->GetImtIndex() % mirror::Class::kImtSize, kArm64PointerSize).Uint32Value(); Location receiver = locations->InAt(0); Offset class_offset = mirror::Object::ClassOffset(); Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64WordSize); @@ -3525,10 +3537,6 @@ void InstructionCodeGeneratorARM64::VisitInvokeInterface(HInvokeInterface* invok // intact/accessible until the end of the marking phase (the // concurrent copying collector may not in the future). GetAssembler()->MaybeUnpoisonHeapReference(temp.W()); - __ Ldr(temp, - MemOperand(temp, mirror::Class::ImtPtrOffset(kArm64PointerSize).Uint32Value())); - uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement( - invoke->GetImtIndex(), kArm64PointerSize)); // temp = temp->GetImtEntryAt(method_offset); __ Ldr(temp, MemOperand(temp, method_offset)); // lr = temp->GetEntryPoint(); @@ -5102,8 +5110,16 @@ void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* ins // /* HeapReference<Object> */ ref = *(obj + offset) Location no_index = Location::NoLocation(); - GenerateReferenceLoadWithBakerReadBarrier( - instruction, ref, obj, offset, no_index, temp, needs_null_check, use_load_acquire); + size_t no_scale_factor = 0U; + GenerateReferenceLoadWithBakerReadBarrier(instruction, + ref, + obj, + offset, + no_index, + no_scale_factor, + temp, + needs_null_check, + use_load_acquire); } void CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction, @@ -5120,10 +5136,21 @@ void CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* ins // never use Load-Acquire instructions on ARM64. const bool use_load_acquire = false; + static_assert( + sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); // /* HeapReference<Object> */ ref = // *(obj + data_offset + index * sizeof(HeapReference<Object>)) - GenerateReferenceLoadWithBakerReadBarrier( - instruction, ref, obj, data_offset, index, temp, needs_null_check, use_load_acquire); + size_t scale_factor = Primitive::ComponentSizeShift(Primitive::kPrimNot); + GenerateReferenceLoadWithBakerReadBarrier(instruction, + ref, + obj, + data_offset, + index, + scale_factor, + temp, + needs_null_check, + use_load_acquire); } void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, @@ -5131,15 +5158,16 @@ void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* vixl::Register obj, uint32_t offset, Location index, + size_t scale_factor, Register temp, bool needs_null_check, bool use_load_acquire) { DCHECK(kEmitCompilerReadBarrier); DCHECK(kUseBakerReadBarrier); - // If `index` is a valid location, then we are emitting an array - // load, so we shouldn't be using a Load Acquire instruction. - // In other words: `index.IsValid()` => `!use_load_acquire`. - DCHECK(!index.IsValid() || !use_load_acquire); + // If we are emitting an array load, we should not be using a + // Load Acquire instruction. In other words: + // `instruction->IsArrayGet()` => `!use_load_acquire`. + DCHECK(!instruction->IsArrayGet() || !use_load_acquire); MacroAssembler* masm = GetVIXLAssembler(); UseScratchRegisterScope temps(masm); @@ -5196,20 +5224,33 @@ void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* // The actual reference load. if (index.IsValid()) { - static_assert( - sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), - "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); - // /* HeapReference<Object> */ ref = - // *(obj + offset + index * sizeof(HeapReference<Object>)) - const size_t shift_amount = Primitive::ComponentSizeShift(type); - if (index.IsConstant()) { - uint32_t computed_offset = offset + (Int64ConstantFrom(index) << shift_amount); - Load(type, ref_reg, HeapOperand(obj, computed_offset)); + // Load types involving an "index". + if (use_load_acquire) { + // UnsafeGetObjectVolatile intrinsic case. + // Register `index` is not an index in an object array, but an + // offset to an object reference field within object `obj`. + DCHECK(instruction->IsInvoke()) << instruction->DebugName(); + DCHECK(instruction->GetLocations()->Intrinsified()); + DCHECK(instruction->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile) + << instruction->AsInvoke()->GetIntrinsic(); + DCHECK_EQ(offset, 0U); + DCHECK_EQ(scale_factor, 0U); + DCHECK_EQ(needs_null_check, 0U); + // /* HeapReference<Object> */ ref = *(obj + index) + MemOperand field = HeapOperand(obj, XRegisterFrom(index)); + LoadAcquire(instruction, ref_reg, field, /* needs_null_check */ false); } else { - temp2 = temps.AcquireW(); - __ Add(temp2, obj, offset); - Load(type, ref_reg, HeapOperand(temp2, XRegisterFrom(index), LSL, shift_amount)); - temps.Release(temp2); + // ArrayGet and UnsafeGetObject intrinsics cases. + // /* HeapReference<Object> */ ref = *(obj + offset + (index << scale_factor)) + if (index.IsConstant()) { + uint32_t computed_offset = offset + (Int64ConstantFrom(index) << scale_factor); + Load(type, ref_reg, HeapOperand(obj, computed_offset)); + } else { + temp2 = temps.AcquireW(); + __ Add(temp2, obj, offset); + Load(type, ref_reg, HeapOperand(temp2, XRegisterFrom(index), LSL, scale_factor)); + temps.Release(temp2); + } } } else { // /* HeapReference<Object> */ ref = *(obj + offset) @@ -5312,10 +5353,8 @@ void InstructionCodeGeneratorARM64::VisitClassTableGet(HClassTableGet* instructi method_offset = mirror::Class::EmbeddedVTableEntryOffset( instruction->GetIndex(), kArm64PointerSize).SizeValue(); } else { - __ Ldr(XRegisterFrom(locations->Out()), MemOperand(XRegisterFrom(locations->InAt(0)), - mirror::Class::ImtPtrOffset(kArm64PointerSize).Uint32Value())); - method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement( - instruction->GetIndex(), kArm64PointerSize)); + method_offset = mirror::Class::EmbeddedImTableEntryOffset( + instruction->GetIndex() % mirror::Class::kImtSize, kArm64PointerSize).Uint32Value(); } __ Ldr(XRegisterFrom(locations->Out()), MemOperand(XRegisterFrom(locations->InAt(0)), method_offset)); diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index e6fd336be7..d4bf695602 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -531,6 +531,17 @@ class CodeGeneratorARM64 : public CodeGenerator { Location index, vixl::Register temp, bool needs_null_check); + // Factored implementation used by GenerateFieldLoadWithBakerReadBarrier + // and GenerateArrayLoadWithBakerReadBarrier. + void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, + Location ref, + vixl::Register obj, + uint32_t offset, + Location index, + size_t scale_factor, + vixl::Register temp, + bool needs_null_check, + bool use_load_acquire); // Generate a read barrier for a heap reference within `instruction` // using a slow path. @@ -586,17 +597,6 @@ class CodeGeneratorARM64 : public CodeGenerator { void GenerateExplicitNullCheck(HNullCheck* instruction); private: - // Factored implementation of GenerateFieldLoadWithBakerReadBarrier - // and GenerateArrayLoadWithBakerReadBarrier. - void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, - Location ref, - vixl::Register obj, - uint32_t offset, - Location index, - vixl::Register temp, - bool needs_null_check, - bool use_load_acquire); - using Uint64ToLiteralMap = ArenaSafeMap<uint64_t, vixl::Literal<uint64_t>*>; using Uint32ToLiteralMap = ArenaSafeMap<uint32_t, vixl::Literal<uint32_t>*>; using MethodToLiteralMap = ArenaSafeMap<MethodReference, diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc index 738180670f..4d44c18dcf 100644 --- a/compiler/optimizing/code_generator_mips.cc +++ b/compiler/optimizing/code_generator_mips.cc @@ -3698,6 +3698,8 @@ void LocationsBuilderMIPS::VisitInvokeInterface(HInvokeInterface* invoke) { void InstructionCodeGeneratorMIPS::VisitInvokeInterface(HInvokeInterface* invoke) { // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError. Register temp = invoke->GetLocations()->GetTemp(0).AsRegister<Register>(); + uint32_t method_offset = mirror::Class::EmbeddedImTableEntryOffset( + invoke->GetImtIndex() % mirror::Class::kImtSize, kMipsPointerSize).Uint32Value(); Location receiver = invoke->GetLocations()->InAt(0); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kMipsWordSize); @@ -3714,10 +3716,6 @@ void InstructionCodeGeneratorMIPS::VisitInvokeInterface(HInvokeInterface* invoke __ LoadFromOffset(kLoadWord, temp, receiver.AsRegister<Register>(), class_offset); } codegen_->MaybeRecordImplicitNullCheck(invoke); - __ LoadFromOffset(kLoadWord, temp, temp, - mirror::Class::ImtPtrOffset(kMipsPointerSize).Uint32Value()); - uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement( - invoke->GetImtIndex(), kMipsPointerSize)); // temp = temp->GetImtEntryAt(method_offset); __ LoadFromOffset(kLoadWord, temp, temp, method_offset); // T9 = temp->GetEntryPoint(); @@ -4730,7 +4728,6 @@ void InstructionCodeGeneratorMIPS::VisitTypeConversion(HTypeConversion* conversi Primitive::Type input_type = conversion->GetInputType(); bool has_sign_extension = codegen_->GetInstructionSetFeatures().IsMipsIsaRevGreaterThanEqual2(); bool isR6 = codegen_->GetInstructionSetFeatures().IsR6(); - bool fpu_32bit = codegen_->GetInstructionSetFeatures().Is32BitFloatingPoint(); DCHECK_NE(input_type, result_type); @@ -4739,7 +4736,9 @@ void InstructionCodeGeneratorMIPS::VisitTypeConversion(HTypeConversion* conversi Register dst_low = locations->Out().AsRegisterPairLow<Register>(); Register src = locations->InAt(0).AsRegister<Register>(); - __ Move(dst_low, src); + if (dst_low != src) { + __ Move(dst_low, src); + } __ Sra(dst_high, src, 31); } else if (Primitive::IsIntegralType(result_type) && Primitive::IsIntegralType(input_type)) { Register dst = locations->Out().AsRegister<Register>(); @@ -4768,7 +4767,9 @@ void InstructionCodeGeneratorMIPS::VisitTypeConversion(HTypeConversion* conversi } break; case Primitive::kPrimInt: - __ Move(dst, src); + if (dst != src) { + __ Move(dst, src); + } break; default: @@ -4925,11 +4926,7 @@ void InstructionCodeGeneratorMIPS::VisitTypeConversion(HTypeConversion* conversi uint64_t min_val = bit_cast<uint64_t, double>(std::numeric_limits<int32_t>::min()); __ LoadConst32(TMP, High32Bits(min_val)); __ Mtc1(ZERO, FTMP); - if (fpu_32bit) { - __ Mtc1(TMP, static_cast<FRegister>(FTMP + 1)); - } else { - __ Mthc1(TMP, FTMP); - } + __ MoveToFpuHigh(TMP, FTMP); } if (isR6) { @@ -5168,12 +5165,8 @@ void InstructionCodeGeneratorMIPS::VisitClassTableGet(HClassTableGet* instructio method_offset = mirror::Class::EmbeddedVTableEntryOffset( instruction->GetIndex(), kMipsPointerSize).SizeValue(); } else { - __ LoadFromOffset(kLoadWord, - locations->Out().AsRegister<Register>(), - locations->InAt(0).AsRegister<Register>(), - mirror::Class::ImtPtrOffset(kMipsPointerSize).Uint32Value()); - method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement( - instruction->GetIndex(), kMipsPointerSize)); + method_offset = mirror::Class::EmbeddedImTableEntryOffset( + instruction->GetIndex() % mirror::Class::kImtSize, kMipsPointerSize).Uint32Value(); } __ LoadFromOffset(kLoadWord, locations->Out().AsRegister<Register>(), diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc index 558735110a..2e78884daf 100644 --- a/compiler/optimizing/code_generator_mips64.cc +++ b/compiler/optimizing/code_generator_mips64.cc @@ -2932,6 +2932,8 @@ void LocationsBuilderMIPS64::VisitInvokeInterface(HInvokeInterface* invoke) { void InstructionCodeGeneratorMIPS64::VisitInvokeInterface(HInvokeInterface* invoke) { // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError. GpuRegister temp = invoke->GetLocations()->GetTemp(0).AsRegister<GpuRegister>(); + uint32_t method_offset = mirror::Class::EmbeddedImTableEntryOffset( + invoke->GetImtIndex() % mirror::Class::kImtSize, kMips64PointerSize).Uint32Value(); Location receiver = invoke->GetLocations()->InAt(0); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kMips64DoublewordSize); @@ -2948,10 +2950,6 @@ void InstructionCodeGeneratorMIPS64::VisitInvokeInterface(HInvokeInterface* invo __ LoadFromOffset(kLoadUnsignedWord, temp, receiver.AsRegister<GpuRegister>(), class_offset); } codegen_->MaybeRecordImplicitNullCheck(invoke); - __ LoadFromOffset(kLoadDoubleword, temp, temp, - mirror::Class::ImtPtrOffset(kMips64PointerSize).Uint32Value()); - uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement( - invoke->GetImtIndex(), kMips64PointerSize)); // temp = temp->GetImtEntryAt(method_offset); __ LoadFromOffset(kLoadDoubleword, temp, temp, method_offset); // T9 = temp->GetEntryPoint(); @@ -2983,19 +2981,6 @@ void LocationsBuilderMIPS64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* in } HandleInvoke(invoke); - - // While SetupBlockedRegisters() blocks registers S2-S8 due to their - // clobbering somewhere else, reduce further register pressure by avoiding - // allocation of a register for the current method pointer like on x86 baseline. - // TODO: remove this once all the issues with register saving/restoring are - // sorted out. - if (invoke->HasCurrentMethodInput()) { - LocationSummary* locations = invoke->GetLocations(); - Location location = locations->InAt(invoke->GetSpecialInputIndex()); - if (location.IsUnallocated() && location.GetPolicy() == Location::kRequiresRegister) { - locations->SetInAt(invoke->GetSpecialInputIndex(), Location::NoLocation()); - } - } } static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorMIPS64* codegen) { diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 52868f4b2e..1261619536 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -448,7 +448,9 @@ class ReadBarrierMarkSlowPathX86 : public SlowPathCode { instruction_->IsLoadClass() || instruction_->IsLoadString() || instruction_->IsInstanceOf() || - instruction_->IsCheckCast()) + instruction_->IsCheckCast() || + ((instruction_->IsInvokeStaticOrDirect() || instruction_->IsInvokeVirtual()) && + instruction_->GetLocations()->Intrinsified())) << "Unexpected instruction in read barrier marking slow path: " << instruction_->DebugName(); @@ -511,8 +513,12 @@ class ReadBarrierForHeapReferenceSlowPathX86 : public SlowPathCode { Register reg_out = out_.AsRegister<Register>(); DCHECK(locations->CanCall()); DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out)); - DCHECK(!instruction_->IsInvoke() || - (instruction_->IsInvokeStaticOrDirect() && + DCHECK(instruction_->IsInstanceFieldGet() || + instruction_->IsStaticFieldGet() || + instruction_->IsArrayGet() || + instruction_->IsInstanceOf() || + instruction_->IsCheckCast() || + ((instruction_->IsInvokeStaticOrDirect() || instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified())) << "Unexpected instruction in read barrier for heap reference slow path: " << instruction_->DebugName(); @@ -525,7 +531,7 @@ class ReadBarrierForHeapReferenceSlowPathX86 : public SlowPathCode { // introduce a copy of it, `index`. Location index = index_; if (index_.IsValid()) { - // Handle `index_` for HArrayGet and intrinsic UnsafeGetObject. + // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics. if (instruction_->IsArrayGet()) { // Compute the actual memory offset and store it in `index`. Register index_reg = index_.AsRegister<Register>(); @@ -573,7 +579,11 @@ class ReadBarrierForHeapReferenceSlowPathX86 : public SlowPathCode { "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); __ AddImmediate(index_reg, Immediate(offset_)); } else { - DCHECK(instruction_->IsInvoke()); + // In the case of the UnsafeGetObject/UnsafeGetObjectVolatile + // intrinsics, `index_` is not shifted by a scale factor of 2 + // (as in the case of ArrayGet), as it is actually an offset + // to an object field within an object. + DCHECK(instruction_->IsInvoke()) << instruction_->DebugName(); DCHECK(instruction_->GetLocations()->Intrinsified()); DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) || (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile)) @@ -2017,6 +2027,8 @@ void InstructionCodeGeneratorX86::VisitInvokeInterface(HInvokeInterface* invoke) LocationSummary* locations = invoke->GetLocations(); Register temp = locations->GetTemp(0).AsRegister<Register>(); XmmRegister hidden_reg = locations->GetTemp(1).AsFpuRegister<XmmRegister>(); + uint32_t method_offset = mirror::Class::EmbeddedImTableEntryOffset( + invoke->GetImtIndex() % mirror::Class::kImtSize, kX86PointerSize).Uint32Value(); Location receiver = locations->InAt(0); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); @@ -2043,12 +2055,7 @@ void InstructionCodeGeneratorX86::VisitInvokeInterface(HInvokeInterface* invoke) // intact/accessible until the end of the marking phase (the // concurrent copying collector may not in the future). __ MaybeUnpoisonHeapReference(temp); - // temp = temp->GetAddressOfIMT() - __ movl(temp, - Address(temp, mirror::Class::ImtPtrOffset(kX86PointerSize).Uint32Value())); // temp = temp->GetImtEntryAt(method_offset); - uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement( - invoke->GetImtIndex(), kX86PointerSize)); __ movl(temp, Address(temp, method_offset)); // call temp->GetEntryPoint(); __ call(Address(temp, @@ -4068,12 +4075,8 @@ void InstructionCodeGeneratorX86::VisitClassTableGet(HClassTableGet* instruction method_offset = mirror::Class::EmbeddedVTableEntryOffset( instruction->GetIndex(), kX86PointerSize).SizeValue(); } else { - __ movl(locations->InAt(0).AsRegister<Register>(), - Address(locations->InAt(0).AsRegister<Register>(), - mirror::Class::ImtPtrOffset(kX86PointerSize).Uint32Value())); - // temp = temp->GetImtEntryAt(method_offset); - method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement( - instruction->GetIndex(), kX86PointerSize)); + method_offset = mirror::Class::EmbeddedImTableEntryOffset( + instruction->GetIndex() % mirror::Class::kImtSize, kX86PointerSize).Uint32Value(); } __ movl(locations->Out().AsRegister<Register>(), Address(locations->InAt(0).AsRegister<Register>(), method_offset)); @@ -6977,6 +6980,9 @@ void CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instr DCHECK(kEmitCompilerReadBarrier); DCHECK(kUseBakerReadBarrier); + static_assert( + sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); // /* HeapReference<Object> */ ref = // *(obj + data_offset + index * sizeof(HeapReference<Object>)) Address src = index.IsConstant() ? diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index fb402bef00..2a9fb80995 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -491,6 +491,14 @@ class CodeGeneratorX86 : public CodeGenerator { Location index, Location temp, bool needs_null_check); + // Factored implementation used by GenerateFieldLoadWithBakerReadBarrier + // and GenerateArrayLoadWithBakerReadBarrier. + void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, + Location ref, + Register obj, + const Address& src, + Location temp, + bool needs_null_check); // Generate a read barrier for a heap reference within `instruction` // using a slow path. @@ -561,15 +569,6 @@ class CodeGeneratorX86 : public CodeGenerator { static constexpr int32_t kDummy32BitOffset = 256; private: - // Factored implementation of GenerateFieldLoadWithBakerReadBarrier - // and GenerateArrayLoadWithBakerReadBarrier. - void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, - Location ref, - Register obj, - const Address& src, - Location temp, - bool needs_null_check); - Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, Register temp); struct PcRelativeDexCacheAccessInfo { diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 9a3e8d266b..5e30203b38 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -469,7 +469,9 @@ class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode { instruction_->IsLoadClass() || instruction_->IsLoadString() || instruction_->IsInstanceOf() || - instruction_->IsCheckCast()) + instruction_->IsCheckCast() || + ((instruction_->IsInvokeStaticOrDirect() || instruction_->IsInvokeVirtual()) && + instruction_->GetLocations()->Intrinsified())) << "Unexpected instruction in read barrier marking slow path: " << instruction_->DebugName(); @@ -532,8 +534,12 @@ class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode { CpuRegister reg_out = out_.AsRegister<CpuRegister>(); DCHECK(locations->CanCall()); DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out.AsRegister())) << out_; - DCHECK(!instruction_->IsInvoke() || - (instruction_->IsInvokeStaticOrDirect() && + DCHECK(instruction_->IsInstanceFieldGet() || + instruction_->IsStaticFieldGet() || + instruction_->IsArrayGet() || + instruction_->IsInstanceOf() || + instruction_->IsCheckCast() || + ((instruction_->IsInvokeStaticOrDirect() || instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified())) << "Unexpected instruction in read barrier for heap reference slow path: " << instruction_->DebugName(); @@ -546,7 +552,7 @@ class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode { // introduce a copy of it, `index`. Location index = index_; if (index_.IsValid()) { - // Handle `index_` for HArrayGet and intrinsic UnsafeGetObject. + // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics. if (instruction_->IsArrayGet()) { // Compute real offset and store it in index_. Register index_reg = index_.AsRegister<CpuRegister>().AsRegister(); @@ -594,7 +600,11 @@ class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode { "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); __ AddImmediate(CpuRegister(index_reg), Immediate(offset_)); } else { - DCHECK(instruction_->IsInvoke()); + // In the case of the UnsafeGetObject/UnsafeGetObjectVolatile + // intrinsics, `index_` is not shifted by a scale factor of 2 + // (as in the case of ArrayGet), as it is actually an offset + // to an object field within an object. + DCHECK(instruction_->IsInvoke()) << instruction_->DebugName(); DCHECK(instruction_->GetLocations()->Intrinsified()); DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) || (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile)) @@ -2247,6 +2257,8 @@ void InstructionCodeGeneratorX86_64::VisitInvokeInterface(HInvokeInterface* invo LocationSummary* locations = invoke->GetLocations(); CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>(); CpuRegister hidden_reg = locations->GetTemp(1).AsRegister<CpuRegister>(); + uint32_t method_offset = mirror::Class::EmbeddedImTableEntryOffset( + invoke->GetImtIndex() % mirror::Class::kImtSize, kX86_64PointerSize).Uint32Value(); Location receiver = locations->InAt(0); size_t class_offset = mirror::Object::ClassOffset().SizeValue(); @@ -2272,12 +2284,6 @@ void InstructionCodeGeneratorX86_64::VisitInvokeInterface(HInvokeInterface* invo // intact/accessible until the end of the marking phase (the // concurrent copying collector may not in the future). __ MaybeUnpoisonHeapReference(temp); - // temp = temp->GetAddressOfIMT() - __ movq(temp, - Address(temp, mirror::Class::ImtPtrOffset(kX86_64PointerSize).Uint32Value())); - // temp = temp->GetImtEntryAt(method_offset); - uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement( - invoke->GetImtIndex(), kX86_64PointerSize)); // temp = temp->GetImtEntryAt(method_offset); __ movq(temp, Address(temp, method_offset)); // call temp->GetEntryPoint(); @@ -4001,11 +4007,8 @@ void InstructionCodeGeneratorX86_64::VisitClassTableGet(HClassTableGet* instruct method_offset = mirror::Class::EmbeddedVTableEntryOffset( instruction->GetIndex(), kX86_64PointerSize).SizeValue(); } else { - __ movq(locations->Out().AsRegister<CpuRegister>(), - Address(locations->InAt(0).AsRegister<CpuRegister>(), - mirror::Class::ImtPtrOffset(kX86_64PointerSize).Uint32Value())); - method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement( - instruction->GetIndex(), kX86_64PointerSize)); + method_offset = mirror::Class::EmbeddedImTableEntryOffset( + instruction->GetIndex() % mirror::Class::kImtSize, kX86_64PointerSize).Uint32Value(); } __ movq(locations->Out().AsRegister<CpuRegister>(), Address(locations->InAt(0).AsRegister<CpuRegister>(), method_offset)); @@ -6430,6 +6433,9 @@ void CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* in DCHECK(kEmitCompilerReadBarrier); DCHECK(kUseBakerReadBarrier); + static_assert( + sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); // /* HeapReference<Object> */ ref = // *(obj + data_offset + index * sizeof(HeapReference<Object>)) Address src = index.IsConstant() ? diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index cf4cc4c8d2..d7cfd37c33 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -433,6 +433,14 @@ class CodeGeneratorX86_64 : public CodeGenerator { Location index, Location temp, bool needs_null_check); + // Factored implementation used by GenerateFieldLoadWithBakerReadBarrier + // and GenerateArrayLoadWithBakerReadBarrier. + void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, + Location ref, + CpuRegister obj, + const Address& src, + Location temp, + bool needs_null_check); // Generate a read barrier for a heap reference within `instruction` // using a slow path. @@ -535,15 +543,6 @@ class CodeGeneratorX86_64 : public CodeGenerator { static constexpr int32_t kDummy32BitOffset = 256; private: - // Factored implementation of GenerateFieldLoadWithBakerReadBarrier - // and GenerateArrayLoadWithBakerReadBarrier. - void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, - Location ref, - CpuRegister obj, - const Address& src, - Location temp, - bool needs_null_check); - struct PcRelativeDexCacheAccessInfo { PcRelativeDexCacheAccessInfo(const DexFile& dex_file, uint32_t element_off) : target_dex_file(dex_file), element_offset(element_off), label() { } diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc index d5e80b4759..c67b2d5fe9 100644 --- a/compiler/optimizing/inliner.cc +++ b/compiler/optimizing/inliner.cc @@ -656,8 +656,8 @@ bool HInliner::TryInlinePolymorphicCallToSameTarget(HInvoke* invoke_instruction, } ArtMethod* new_method = nullptr; if (invoke_instruction->IsInvokeInterface()) { - new_method = ic.GetTypeAt(i)->GetImt(pointer_size)->Get( - method_index, pointer_size); + new_method = ic.GetTypeAt(i)->GetEmbeddedImTableEntry( + method_index % mirror::Class::kImtSize, pointer_size); if (new_method->IsRuntimeMethod()) { // Bail out as soon as we see a conflict trampoline in one of the target's // interface table. @@ -756,7 +756,15 @@ bool HInliner::TryInlineAndReplace(HInvoke* invoke_instruction, ArtMethod* metho invoke_instruction->ReplaceWith(return_replacement); } invoke_instruction->GetBlock()->RemoveInstruction(invoke_instruction); - FixUpReturnReferenceType(invoke_instruction, method, return_replacement, do_rtp); + FixUpReturnReferenceType(method, return_replacement); + if (do_rtp && ReturnTypeMoreSpecific(invoke_instruction, return_replacement)) { + // Actual return value has a more specific type than the method's declared + // return type. Run RTP again on the outer graph to propagate it. + ReferenceTypePropagation(graph_, + outer_compilation_unit_.GetDexCache(), + handles_, + /* is_first_run */ false).Run(); + } return true; } @@ -1159,6 +1167,15 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction, } } + // We have replaced formal arguments with actual arguments. If actual types + // are more specific than the declared ones, run RTP again on the inner graph. + if (ArgumentTypesMoreSpecific(invoke_instruction, resolved_method)) { + ReferenceTypePropagation(callee_graph, + dex_compilation_unit.GetDexCache(), + handles_, + /* is_first_run */ false).Run(); + } + size_t number_of_instructions_budget = kMaximumNumberOfHInstructions; size_t number_of_inlined_instructions = RunOptimizations(callee_graph, code_item, dex_compilation_unit); @@ -1332,13 +1349,87 @@ size_t HInliner::RunOptimizations(HGraph* callee_graph, return number_of_inlined_instructions; } -void HInliner::FixUpReturnReferenceType(HInvoke* invoke_instruction, - ArtMethod* resolved_method, - HInstruction* return_replacement, - bool do_rtp) { +static bool IsReferenceTypeRefinement(ReferenceTypeInfo declared_rti, + bool declared_can_be_null, + HInstruction* actual_obj) + SHARED_REQUIRES(Locks::mutator_lock_) { + if (declared_can_be_null && !actual_obj->CanBeNull()) { + return true; + } + + ReferenceTypeInfo actual_rti = actual_obj->GetReferenceTypeInfo(); + return (actual_rti.IsExact() && !declared_rti.IsExact()) || + declared_rti.IsStrictSupertypeOf(actual_rti); +} + +ReferenceTypeInfo HInliner::GetClassRTI(mirror::Class* klass) { + return ReferenceTypePropagation::IsAdmissible(klass) + ? ReferenceTypeInfo::Create(handles_->NewHandle(klass)) + : graph_->GetInexactObjectRti(); +} + +bool HInliner::ArgumentTypesMoreSpecific(HInvoke* invoke_instruction, ArtMethod* resolved_method) { + // If this is an instance call, test whether the type of the `this` argument + // is more specific than the class which declares the method. + if (!resolved_method->IsStatic()) { + if (IsReferenceTypeRefinement(GetClassRTI(resolved_method->GetDeclaringClass()), + /* declared_can_be_null */ false, + invoke_instruction->InputAt(0u))) { + return true; + } + } + + size_t pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize(); + + // Iterate over the list of parameter types and test whether any of the + // actual inputs has a more specific reference type than the type declared in + // the signature. + const DexFile::TypeList* param_list = resolved_method->GetParameterTypeList(); + for (size_t param_idx = 0, + input_idx = resolved_method->IsStatic() ? 0 : 1, + e = (param_list == nullptr ? 0 : param_list->Size()); + param_idx < e; + ++param_idx, ++input_idx) { + HInstruction* input = invoke_instruction->InputAt(input_idx); + if (input->GetType() == Primitive::kPrimNot) { + mirror::Class* param_cls = resolved_method->GetDexCacheResolvedType( + param_list->GetTypeItem(param_idx).type_idx_, + pointer_size); + if (IsReferenceTypeRefinement(GetClassRTI(param_cls), + /* declared_can_be_null */ true, + input)) { + return true; + } + } + } + + return false; +} + +bool HInliner::ReturnTypeMoreSpecific(HInvoke* invoke_instruction, + HInstruction* return_replacement) { // Check the integrity of reference types and run another type propagation if needed. if (return_replacement != nullptr) { if (return_replacement->GetType() == Primitive::kPrimNot) { + // Test if the return type is a refinement of the declared return type. + if (IsReferenceTypeRefinement(invoke_instruction->GetReferenceTypeInfo(), + /* declared_can_be_null */ true, + return_replacement)) { + return true; + } + } else if (return_replacement->IsInstanceOf()) { + // Inlining InstanceOf into an If may put a tighter bound on reference types. + return true; + } + } + + return false; +} + +void HInliner::FixUpReturnReferenceType(ArtMethod* resolved_method, + HInstruction* return_replacement) { + if (return_replacement != nullptr) { + if (return_replacement->GetType() == Primitive::kPrimNot) { if (!return_replacement->GetReferenceTypeInfo().IsValid()) { // Make sure that we have a valid type for the return. We may get an invalid one when // we inline invokes with multiple branches and create a Phi for the result. @@ -1347,36 +1438,7 @@ void HInliner::FixUpReturnReferenceType(HInvoke* invoke_instruction, DCHECK(return_replacement->IsPhi()); size_t pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize(); mirror::Class* cls = resolved_method->GetReturnType(false /* resolve */, pointer_size); - if (cls != nullptr && !cls->IsErroneous()) { - ReferenceTypeInfo::TypeHandle return_handle = handles_->NewHandle(cls); - return_replacement->SetReferenceTypeInfo(ReferenceTypeInfo::Create( - return_handle, return_handle->CannotBeAssignedFromOtherTypes() /* is_exact */)); - } else { - // Return inexact object type on failures. - return_replacement->SetReferenceTypeInfo(graph_->GetInexactObjectRti()); - } - } - - if (do_rtp) { - // If the return type is a refinement of the declared type run the type propagation again. - ReferenceTypeInfo return_rti = return_replacement->GetReferenceTypeInfo(); - ReferenceTypeInfo invoke_rti = invoke_instruction->GetReferenceTypeInfo(); - if (invoke_rti.IsStrictSupertypeOf(return_rti) - || (return_rti.IsExact() && !invoke_rti.IsExact()) - || !return_replacement->CanBeNull()) { - ReferenceTypePropagation(graph_, - outer_compilation_unit_.GetDexCache(), - handles_, - /* is_first_run */ false).Run(); - } - } - } else if (return_replacement->IsInstanceOf()) { - if (do_rtp) { - // Inlining InstanceOf into an If may put a tighter bound on reference types. - ReferenceTypePropagation(graph_, - outer_compilation_unit_.GetDexCache(), - handles_, - /* is_first_run */ false).Run(); + return_replacement->SetReferenceTypeInfo(GetClassRTI(cls)); } } } diff --git a/compiler/optimizing/inliner.h b/compiler/optimizing/inliner.h index 7cf1424b6d..02d3a5f499 100644 --- a/compiler/optimizing/inliner.h +++ b/compiler/optimizing/inliner.h @@ -124,10 +124,18 @@ class HInliner : public HOptimization { uint32_t dex_pc) const SHARED_REQUIRES(Locks::mutator_lock_); - void FixUpReturnReferenceType(HInvoke* invoke_instruction, - ArtMethod* resolved_method, - HInstruction* return_replacement, - bool do_rtp) + void FixUpReturnReferenceType(ArtMethod* resolved_method, HInstruction* return_replacement) + SHARED_REQUIRES(Locks::mutator_lock_); + + // Creates an instance of ReferenceTypeInfo from `klass` if `klass` is + // admissible (see ReferenceTypePropagation::IsAdmissible for details). + // Otherwise returns inexact Object RTI. + ReferenceTypeInfo GetClassRTI(mirror::Class* klass) SHARED_REQUIRES(Locks::mutator_lock_); + + bool ArgumentTypesMoreSpecific(HInvoke* invoke_instruction, ArtMethod* resolved_method) + SHARED_REQUIRES(Locks::mutator_lock_); + + bool ReturnTypeMoreSpecific(HInvoke* invoke_instruction, HInstruction* return_replacement) SHARED_REQUIRES(Locks::mutator_lock_); // Add a type guard on the given `receiver`. This will add to the graph: diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc index afac5f9cf1..b4125299ea 100644 --- a/compiler/optimizing/instruction_builder.cc +++ b/compiler/optimizing/instruction_builder.cc @@ -16,7 +16,6 @@ #include "instruction_builder.h" -#include "art_method-inl.h" #include "bytecode_utils.h" #include "class_linker.h" #include "driver/compiler_options.h" @@ -891,7 +890,7 @@ bool HInstructionBuilder::BuildInvoke(const Instruction& instruction, return_type, dex_pc, method_idx, - resolved_method->GetImtIndex()); + resolved_method->GetDexMethodIndex()); } return HandleInvoke(invoke, diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc index 62d637081d..3041c4d2c7 100644 --- a/compiler/optimizing/instruction_simplifier.cc +++ b/compiler/optimizing/instruction_simplifier.cc @@ -236,22 +236,40 @@ bool InstructionSimplifierVisitor::TryDeMorganNegationFactoring(HBinaryOperation void InstructionSimplifierVisitor::VisitShift(HBinaryOperation* instruction) { DCHECK(instruction->IsShl() || instruction->IsShr() || instruction->IsUShr()); - HConstant* input_cst = instruction->GetConstantRight(); - HInstruction* input_other = instruction->GetLeastConstantLeft(); + HInstruction* shift_amount = instruction->GetRight(); + HInstruction* value = instruction->GetLeft(); - if (input_cst != nullptr) { - int64_t cst = Int64FromConstant(input_cst); - int64_t mask = (input_other->GetType() == Primitive::kPrimLong) - ? kMaxLongShiftDistance - : kMaxIntShiftDistance; - if ((cst & mask) == 0) { + int64_t implicit_mask = (value->GetType() == Primitive::kPrimLong) + ? kMaxLongShiftDistance + : kMaxIntShiftDistance; + + if (shift_amount->IsConstant()) { + int64_t cst = Int64FromConstant(shift_amount->AsConstant()); + if ((cst & implicit_mask) == 0) { // Replace code looking like - // SHL dst, src, 0 + // SHL dst, value, 0 // with - // src - instruction->ReplaceWith(input_other); + // value + instruction->ReplaceWith(value); instruction->GetBlock()->RemoveInstruction(instruction); RecordSimplification(); + return; + } + } + + // Shift operations implicitly mask the shift amount according to the type width. Get rid of + // unnecessary explicit masking operations on the shift amount. + // Replace code looking like + // AND masked_shift, shift, <superset of implicit mask> + // SHL dst, value, masked_shift + // with + // SHL dst, value, shift + if (shift_amount->IsAnd()) { + HAnd* and_insn = shift_amount->AsAnd(); + HConstant* mask = and_insn->GetConstantRight(); + if ((mask != nullptr) && ((Int64FromConstant(mask) & implicit_mask) == implicit_mask)) { + instruction->ReplaceInput(and_insn->GetLeastConstantLeft(), 1); + RecordSimplification(); } } } diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc index 93950d58b5..19629b171f 100644 --- a/compiler/optimizing/intrinsics_arm.cc +++ b/compiler/optimizing/intrinsics_arm.cc @@ -47,19 +47,6 @@ bool IntrinsicLocationsBuilderARM::TryDispatch(HInvoke* invoke) { if (res == nullptr) { return false; } - if (kEmitCompilerReadBarrier && res->CanCall()) { - // Generating an intrinsic for this HInvoke may produce an - // IntrinsicSlowPathARM slow path. Currently this approach - // does not work when using read barriers, as the emitted - // calling sequence will make use of another slow path - // (ReadBarrierForRootSlowPathARM for HInvokeStaticOrDirect, - // ReadBarrierSlowPathARM for HInvokeVirtual). So we bail - // out in this case. - // - // TODO: Find a way to have intrinsics work with read barriers. - invoke->SetLocations(nullptr); - return false; - } return res->Intrinsified(); } @@ -524,8 +511,8 @@ static void GenUnsafeGet(HInvoke* invoke, if (kEmitCompilerReadBarrier) { if (kUseBakerReadBarrier) { Location temp = locations->GetTemp(0); - codegen->GenerateArrayLoadWithBakerReadBarrier( - invoke, trg_loc, base, 0U, offset_loc, temp, /* needs_null_check */ false); + codegen->GenerateReferenceLoadWithBakerReadBarrier( + invoke, trg_loc, base, 0U, offset_loc, TIMES_1, temp, /* needs_null_check */ false); if (is_volatile) { __ dmb(ISH); } @@ -581,10 +568,11 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, locations->SetInAt(0, Location::NoLocation()); // Unused receiver. locations->SetInAt(1, Location::RequiresRegister()); locations->SetInAt(2, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + locations->SetOut(Location::RequiresRegister(), + can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap); if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) { // We need a temporary register for the read barrier marking slow - // path in InstructionCodeGeneratorARM::GenerateArrayLoadWithBakerReadBarrier. + // path in InstructionCodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier. locations->AddTemp(Location::RequiresRegister()); } } @@ -919,9 +907,10 @@ void IntrinsicLocationsBuilderARM::VisitUnsafeCASObject(HInvoke* invoke) { // The UnsafeCASObject intrinsic is missing a read barrier, and // therefore sometimes does not work as expected (b/25883050). // Turn it off temporarily as a quick fix, until the read barrier is - // implemented (see TODO in GenCAS below). + // implemented (see TODO in GenCAS). // - // TODO(rpl): Fix this issue and re-enable this intrinsic with read barriers. + // TODO(rpl): Implement read barrier support in GenCAS and re-enable + // this intrinsic. if (kEmitCompilerReadBarrier) { return; } @@ -932,6 +921,15 @@ void IntrinsicCodeGeneratorARM::VisitUnsafeCASInt(HInvoke* invoke) { GenCas(invoke->GetLocations(), Primitive::kPrimInt, codegen_); } void IntrinsicCodeGeneratorARM::VisitUnsafeCASObject(HInvoke* invoke) { + // The UnsafeCASObject intrinsic is missing a read barrier, and + // therefore sometimes does not work as expected (b/25883050). + // Turn it off temporarily as a quick fix, until the read barrier is + // implemented (see TODO in GenCAS). + // + // TODO(rpl): Implement read barrier support in GenCAS and re-enable + // this intrinsic. + DCHECK(!kEmitCompilerReadBarrier); + GenCas(invoke->GetLocations(), Primitive::kPrimNot, codegen_); } @@ -1335,6 +1333,12 @@ void IntrinsicCodeGeneratorARM::VisitStringNewStringFromString(HInvoke* invoke) } void IntrinsicLocationsBuilderARM::VisitSystemArrayCopy(HInvoke* invoke) { + // TODO(rpl): Implement read barriers in the SystemArrayCopy + // intrinsic and re-enable it (b/29516905). + if (kEmitCompilerReadBarrier) { + return; + } + CodeGenerator::CreateSystemArrayCopyLocationSummary(invoke); LocationSummary* locations = invoke->GetLocations(); if (locations == nullptr) { @@ -1419,11 +1423,11 @@ static void CheckPosition(ArmAssembler* assembler, } } -// TODO: Implement read barriers in the SystemArrayCopy intrinsic. -// Note that this code path is not used (yet) because we do not -// intrinsify methods that can go into the IntrinsicSlowPathARM -// slow path. void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) { + // TODO(rpl): Implement read barriers in the SystemArrayCopy + // intrinsic and re-enable it (b/29516905). + DCHECK(!kEmitCompilerReadBarrier); + ArmAssembler* assembler = GetAssembler(); LocationSummary* locations = invoke->GetLocations(); @@ -1972,6 +1976,50 @@ void IntrinsicCodeGeneratorARM::VisitShortReverseBytes(HInvoke* invoke) { __ revsh(out, in); } +static void GenBitCount(HInvoke* instr, bool is64bit, ArmAssembler* assembler) { + DCHECK(instr->GetType() == Primitive::kPrimInt); + DCHECK((is64bit && instr->InputAt(0)->GetType() == Primitive::kPrimLong) || + (!is64bit && instr->InputAt(0)->GetType() == Primitive::kPrimInt)); + + LocationSummary* locations = instr->GetLocations(); + Location in = locations->InAt(0); + Register src_0 = is64bit ? in.AsRegisterPairLow<Register>() : in.AsRegister<Register>(); + Register src_1 = is64bit ? in.AsRegisterPairHigh<Register>() : src_0; + SRegister tmp_s = locations->GetTemp(0).AsFpuRegisterPairLow<SRegister>(); + DRegister tmp_d = FromLowSToD(tmp_s); + Register out_r = locations->Out().AsRegister<Register>(); + + // Move data from core register(s) to temp D-reg for bit count calculation, then move back. + // According to Cortex A57 and A72 optimization guides, compared to transferring to full D-reg, + // transferring data from core reg to upper or lower half of vfp D-reg requires extra latency, + // That's why for integer bit count, we use 'vmov d0, r0, r0' instead of 'vmov d0[0], r0'. + __ vmovdrr(tmp_d, src_1, src_0); // Temp DReg |--src_1|--src_0| + __ vcntd(tmp_d, tmp_d); // Temp DReg |c|c|c|c|c|c|c|c| + __ vpaddld(tmp_d, tmp_d, 8, /* is_unsigned */ true); // Temp DReg |--c|--c|--c|--c| + __ vpaddld(tmp_d, tmp_d, 16, /* is_unsigned */ true); // Temp DReg |------c|------c| + if (is64bit) { + __ vpaddld(tmp_d, tmp_d, 32, /* is_unsigned */ true); // Temp DReg |--------------c| + } + __ vmovrs(out_r, tmp_s); +} + +void IntrinsicLocationsBuilderARM::VisitIntegerBitCount(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); + invoke->GetLocations()->AddTemp(Location::RequiresFpuRegister()); +} + +void IntrinsicCodeGeneratorARM::VisitIntegerBitCount(HInvoke* invoke) { + GenBitCount(invoke, /* is64bit */ false, GetAssembler()); +} + +void IntrinsicLocationsBuilderARM::VisitLongBitCount(HInvoke* invoke) { + VisitIntegerBitCount(invoke); +} + +void IntrinsicCodeGeneratorARM::VisitLongBitCount(HInvoke* invoke) { + GenBitCount(invoke, /* is64bit */ true, GetAssembler()); +} + void IntrinsicLocationsBuilderARM::VisitStringGetCharsNoCheck(HInvoke* invoke) { LocationSummary* locations = new (arena_) LocationSummary(invoke, LocationSummary::kNoCall, @@ -2112,8 +2160,6 @@ void IntrinsicCodeGeneratorARM::VisitDoubleIsInfinite(HInvoke* invoke) { __ Lsr(out, out, 5); } -UNIMPLEMENTED_INTRINSIC(ARM, IntegerBitCount) -UNIMPLEMENTED_INTRINSIC(ARM, LongBitCount) UNIMPLEMENTED_INTRINSIC(ARM, MathMinDoubleDouble) UNIMPLEMENTED_INTRINSIC(ARM, MathMinFloatFloat) UNIMPLEMENTED_INTRINSIC(ARM, MathMaxDoubleDouble) diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index 4da0843a76..1685cf9c3c 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -149,19 +149,6 @@ bool IntrinsicLocationsBuilderARM64::TryDispatch(HInvoke* invoke) { if (res == nullptr) { return false; } - if (kEmitCompilerReadBarrier && res->CanCall()) { - // Generating an intrinsic for this HInvoke may produce an - // IntrinsicSlowPathARM64 slow path. Currently this approach - // does not work when using read barriers, as the emitted - // calling sequence will make use of another slow path - // (ReadBarrierForRootSlowPathARM64 for HInvokeStaticOrDirect, - // ReadBarrierSlowPathARM64 for HInvokeVirtual). So we bail - // out in this case. - // - // TODO: Find a way to have intrinsics work with read barriers. - invoke->SetLocations(nullptr); - return false; - } return res->Intrinsified(); } @@ -791,8 +778,15 @@ static void GenUnsafeGet(HInvoke* invoke, // UnsafeGetObject/UnsafeGetObjectVolatile with Baker's read barrier case. UseScratchRegisterScope temps(masm); Register temp = temps.AcquireW(); - codegen->GenerateArrayLoadWithBakerReadBarrier( - invoke, trg_loc, base, 0U, offset_loc, temp, /* needs_null_check */ false); + codegen->GenerateReferenceLoadWithBakerReadBarrier(invoke, + trg_loc, + base, + /* offset */ 0U, + /* index */ offset_loc, + /* scale_factor */ 0U, + temp, + /* needs_null_check */ false, + is_volatile); } else { // Other cases. MemOperand mem_op(base.X(), offset); @@ -821,7 +815,8 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke locations->SetInAt(0, Location::NoLocation()); // Unused receiver. locations->SetInAt(1, Location::RequiresRegister()); locations->SetInAt(2, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + locations->SetOut(Location::RequiresRegister(), + can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap); } void IntrinsicLocationsBuilderARM64::VisitUnsafeGet(HInvoke* invoke) { @@ -1102,9 +1097,10 @@ void IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject(HInvoke* invoke) { // The UnsafeCASObject intrinsic is missing a read barrier, and // therefore sometimes does not work as expected (b/25883050). // Turn it off temporarily as a quick fix, until the read barrier is - // implemented (see TODO in GenCAS below). + // implemented (see TODO in GenCAS). // - // TODO(rpl): Fix this issue and re-enable this intrinsic with read barriers. + // TODO(rpl): Implement read barrier support in GenCAS and re-enable + // this intrinsic. if (kEmitCompilerReadBarrier) { return; } @@ -1119,6 +1115,15 @@ void IntrinsicCodeGeneratorARM64::VisitUnsafeCASLong(HInvoke* invoke) { GenCas(invoke->GetLocations(), Primitive::kPrimLong, codegen_); } void IntrinsicCodeGeneratorARM64::VisitUnsafeCASObject(HInvoke* invoke) { + // The UnsafeCASObject intrinsic is missing a read barrier, and + // therefore sometimes does not work as expected (b/25883050). + // Turn it off temporarily as a quick fix, until the read barrier is + // implemented (see TODO in GenCAS). + // + // TODO(rpl): Implement read barrier support in GenCAS and re-enable + // this intrinsic. + DCHECK(!kEmitCompilerReadBarrier); + GenCas(invoke->GetLocations(), Primitive::kPrimNot, codegen_); } @@ -2012,6 +2017,12 @@ static constexpr int32_t kSystemArrayCopyThreshold = 128; // We want to use two temporary registers in order to reduce the register pressure in arm64. // So we don't use the CodeGenerator::CreateSystemArrayCopyLocationSummary. void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopy(HInvoke* invoke) { + // TODO(rpl): Implement read barriers in the SystemArrayCopy + // intrinsic and re-enable it (b/29516905). + if (kEmitCompilerReadBarrier) { + return; + } + // Check to see if we have known failures that will cause us to have to bail out // to the runtime, and just generate the runtime call directly. HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant(); @@ -2064,6 +2075,10 @@ void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopy(HInvoke* invoke) { } void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) { + // TODO(rpl): Implement read barriers in the SystemArrayCopy + // intrinsic and re-enable it (b/29516905). + DCHECK(!kEmitCompilerReadBarrier); + vixl::MacroAssembler* masm = GetVIXLAssembler(); LocationSummary* locations = invoke->GetLocations(); diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc index 4988398c92..031cd1313c 100644 --- a/compiler/optimizing/intrinsics_x86.cc +++ b/compiler/optimizing/intrinsics_x86.cc @@ -60,19 +60,6 @@ bool IntrinsicLocationsBuilderX86::TryDispatch(HInvoke* invoke) { if (res == nullptr) { return false; } - if (kEmitCompilerReadBarrier && res->CanCall()) { - // Generating an intrinsic for this HInvoke may produce an - // IntrinsicSlowPathX86 slow path. Currently this approach - // does not work when using read barriers, as the emitted - // calling sequence will make use of another slow path - // (ReadBarrierForRootSlowPathX86 for HInvokeStaticOrDirect, - // ReadBarrierSlowPathX86 for HInvokeVirtual). So we bail - // out in this case. - // - // TODO: Find a way to have intrinsics work with read barriers. - invoke->SetLocations(nullptr); - return false; - } return res->Intrinsified(); } @@ -1822,8 +1809,9 @@ static void GenUnsafeGet(HInvoke* invoke, if (kEmitCompilerReadBarrier) { if (kUseBakerReadBarrier) { Location temp = locations->GetTemp(0); - codegen->GenerateArrayLoadWithBakerReadBarrier( - invoke, output_loc, base, 0U, offset_loc, temp, /* needs_null_check */ false); + Address src(base, offset, ScaleFactor::TIMES_1, 0); + codegen->GenerateReferenceLoadWithBakerReadBarrier( + invoke, output_loc, base, src, temp, /* needs_null_check */ false); } else { __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0)); codegen->GenerateReadBarrierSlow( @@ -1878,16 +1866,17 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, if (is_volatile) { // Need to use XMM to read volatile. locations->AddTemp(Location::RequiresFpuRegister()); - locations->SetOut(Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); } else { locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); } } else { - locations->SetOut(Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), + can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap); } if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) { // We need a temporary register for the read barrier marking slow - // path in InstructionCodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier. + // path in InstructionCodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier. locations->AddTemp(Location::RequiresRegister()); } } @@ -2109,9 +2098,9 @@ void IntrinsicLocationsBuilderX86::VisitUnsafeCASObject(HInvoke* invoke) { // The UnsafeCASObject intrinsic is missing a read barrier, and // therefore sometimes does not work as expected (b/25883050). // Turn it off temporarily as a quick fix, until the read barrier is - // implemented. + // implemented (see TODO in GenCAS). // - // TODO(rpl): Implement a read barrier in GenCAS below and re-enable + // TODO(rpl): Implement read barrier support in GenCAS and re-enable // this intrinsic. if (kEmitCompilerReadBarrier) { return; @@ -2236,6 +2225,15 @@ void IntrinsicCodeGeneratorX86::VisitUnsafeCASLong(HInvoke* invoke) { } void IntrinsicCodeGeneratorX86::VisitUnsafeCASObject(HInvoke* invoke) { + // The UnsafeCASObject intrinsic is missing a read barrier, and + // therefore sometimes does not work as expected (b/25883050). + // Turn it off temporarily as a quick fix, until the read barrier is + // implemented (see TODO in GenCAS). + // + // TODO(rpl): Implement read barrier support in GenCAS and re-enable + // this intrinsic. + DCHECK(!kEmitCompilerReadBarrier); + GenCAS(Primitive::kPrimNot, invoke, codegen_); } diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc index 593c8f319b..c5b44d4f5c 100644 --- a/compiler/optimizing/intrinsics_x86_64.cc +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -54,19 +54,6 @@ bool IntrinsicLocationsBuilderX86_64::TryDispatch(HInvoke* invoke) { if (res == nullptr) { return false; } - if (kEmitCompilerReadBarrier && res->CanCall()) { - // Generating an intrinsic for this HInvoke may produce an - // IntrinsicSlowPathX86_64 slow path. Currently this approach - // does not work when using read barriers, as the emitted - // calling sequence will make use of another slow path - // (ReadBarrierForRootSlowPathX86_64 for HInvokeStaticOrDirect, - // ReadBarrierSlowPathX86_64 for HInvokeVirtual). So we bail - // out in this case. - // - // TODO: Find a way to have intrinsics work with read barriers. - invoke->SetLocations(nullptr); - return false; - } return res->Intrinsified(); } @@ -1079,14 +1066,20 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopyChar(HInvoke* invoke) { void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopy(HInvoke* invoke) { + // TODO(rpl): Implement read barriers in the SystemArrayCopy + // intrinsic and re-enable it (b/29516905). + if (kEmitCompilerReadBarrier) { + return; + } + CodeGenerator::CreateSystemArrayCopyLocationSummary(invoke); } -// TODO: Implement read barriers in the SystemArrayCopy intrinsic. -// Note that this code path is not used (yet) because we do not -// intrinsify methods that can go into the IntrinsicSlowPathX86_64 -// slow path. void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { + // TODO(rpl): Implement read barriers in the SystemArrayCopy + // intrinsic and re-enable it (b/29516905). + DCHECK(!kEmitCompilerReadBarrier); + X86_64Assembler* assembler = GetAssembler(); LocationSummary* locations = invoke->GetLocations(); @@ -1910,8 +1903,9 @@ static void GenUnsafeGet(HInvoke* invoke, if (kEmitCompilerReadBarrier) { if (kUseBakerReadBarrier) { Location temp = locations->GetTemp(0); - codegen->GenerateArrayLoadWithBakerReadBarrier( - invoke, output_loc, base, 0U, offset_loc, temp, /* needs_null_check */ false); + Address src(base, offset, ScaleFactor::TIMES_1, 0); + codegen->GenerateReferenceLoadWithBakerReadBarrier( + invoke, output_loc, base, src, temp, /* needs_null_check */ false); } else { __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0)); codegen->GenerateReadBarrierSlow( @@ -1948,10 +1942,11 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, locations->SetInAt(0, Location::NoLocation()); // Unused receiver. locations->SetInAt(1, Location::RequiresRegister()); locations->SetInAt(2, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), + can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap); if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) { // We need a temporary register for the read barrier marking slow - // path in InstructionCodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier. + // path in InstructionCodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier. locations->AddTemp(Location::RequiresRegister()); } } @@ -2135,9 +2130,9 @@ void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASObject(HInvoke* invoke) { // The UnsafeCASObject intrinsic is missing a read barrier, and // therefore sometimes does not work as expected (b/25883050). // Turn it off temporarily as a quick fix, until the read barrier is - // implemented. + // implemented (see TODO in GenCAS). // - // TODO(rpl): Implement a read barrier in GenCAS below and re-enable + // TODO(rpl): Implement read barrier support in GenCAS and re-enable // this intrinsic. if (kEmitCompilerReadBarrier) { return; @@ -2253,6 +2248,15 @@ void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASLong(HInvoke* invoke) { } void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASObject(HInvoke* invoke) { + // The UnsafeCASObject intrinsic is missing a read barrier, and + // therefore sometimes does not work as expected (b/25883050). + // Turn it off temporarily as a quick fix, until the read barrier is + // implemented (see TODO in GenCAS). + // + // TODO(rpl): Implement read barrier support in GenCAS and re-enable + // this intrinsic. + DCHECK(!kEmitCompilerReadBarrier); + GenCAS(Primitive::kPrimNot, invoke, codegen_); } @@ -2441,7 +2445,7 @@ static void GenOneBit(X86_64Assembler* assembler, : CTZ(static_cast<uint32_t>(value)); } if (is_long) { - codegen->Load64BitValue(out, 1L << value); + codegen->Load64BitValue(out, 1ULL << value); } else { codegen->Load32BitValue(out, 1 << value); } diff --git a/compiler/optimizing/load_store_elimination.cc b/compiler/optimizing/load_store_elimination.cc index 8a75a90cfd..7347686830 100644 --- a/compiler/optimizing/load_store_elimination.cc +++ b/compiler/optimizing/load_store_elimination.cc @@ -65,6 +65,16 @@ class ReferenceInfo : public ArenaObject<kArenaAllocMisc> { is_singleton_and_not_returned_ = false; return; } + if ((user->IsUnresolvedInstanceFieldGet() && (reference_ == user->InputAt(0))) || + (user->IsUnresolvedInstanceFieldSet() && (reference_ == user->InputAt(0)))) { + // The field is accessed in an unresolved way. We mark the object as a singleton to + // disable load/store optimizations on it. + // Note that we could optimize this case and still perform some optimizations until + // we hit the unresolved access, but disabling is the simplest. + is_singleton_ = false; + is_singleton_and_not_returned_ = false; + return; + } if (user->IsReturn()) { is_singleton_and_not_returned_ = false; } diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h index 63bbc2cd0a..3f27c911be 100644 --- a/compiler/optimizing/locations.h +++ b/compiler/optimizing/locations.h @@ -38,7 +38,13 @@ std::ostream& operator<<(std::ostream& os, const Location& location); class Location : public ValueObject { public: enum OutputOverlap { + // The liveness of the output overlaps the liveness of one or + // several input(s); the register allocator cannot reuse an + // input's location for the output's location. kOutputOverlap, + // The liveness of the output does not overlap the liveness of any + // input; the register allocator is allowed to reuse an input's + // location for the output's location. kNoOutputOverlap }; @@ -494,6 +500,10 @@ class LocationSummary : public ArenaObject<kArenaAllocLocationSummary> { return inputs_.size(); } + // Set the output location. Argument `overlaps` tells whether the + // output overlaps any of the inputs (if so, it cannot share the + // same register as one of the inputs); it is set to + // `Location::kOutputOverlap` by default for safety. void SetOut(Location location, Location::OutputOverlap overlaps = Location::kOutputOverlap) { DCHECK(output_.IsInvalid()); output_overlaps_ = overlaps; diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 455f4e338d..6b2c33e668 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -172,6 +172,10 @@ class ReferenceTypeInfo : ValueObject { static ReferenceTypeInfo Create(TypeHandle type_handle, bool is_exact); + static ReferenceTypeInfo Create(TypeHandle type_handle) SHARED_REQUIRES(Locks::mutator_lock_) { + return Create(type_handle, type_handle->CannotBeAssignedFromOtherTypes()); + } + static ReferenceTypeInfo CreateUnchecked(TypeHandle type_handle, bool is_exact) { return ReferenceTypeInfo(type_handle, is_exact); } @@ -5025,7 +5029,7 @@ class HInstanceFieldGet FINAL : public HExpression<1> { } bool CanDoImplicitNullCheckOn(HInstruction* obj) const OVERRIDE { - return (obj == InputAt(0)) && GetFieldOffset().Uint32Value() < kPageSize; + return (obj == InputAt(0)) && art::CanDoImplicitNullCheckOn(GetFieldOffset().Uint32Value()); } size_t ComputeHashCode() const OVERRIDE { @@ -5072,7 +5076,7 @@ class HInstanceFieldSet FINAL : public HTemplateInstruction<2> { } bool CanDoImplicitNullCheckOn(HInstruction* obj) const OVERRIDE { - return (obj == InputAt(0)) && GetFieldOffset().Uint32Value() < kPageSize; + return (obj == InputAt(0)) && art::CanDoImplicitNullCheckOn(GetFieldOffset().Uint32Value()); } const FieldInfo& GetFieldInfo() const { return field_info_; } diff --git a/compiler/optimizing/optimizing_cfi_test_expected.inc b/compiler/optimizing/optimizing_cfi_test_expected.inc index 764160adce..05eb06333e 100644 --- a/compiler/optimizing/optimizing_cfi_test_expected.inc +++ b/compiler/optimizing/optimizing_cfi_test_expected.inc @@ -32,21 +32,21 @@ static constexpr uint8_t expected_cfi_kThumb2[] = { // 0x00000012: .cfi_def_cfa_offset: 64 static constexpr uint8_t expected_asm_kArm64[] = { - 0xE0, 0x0F, 0x1C, 0xF8, 0xF4, 0xD7, 0x02, 0xA9, 0xFE, 0x1F, 0x00, 0xF9, - 0xE8, 0xA7, 0x01, 0x6D, 0xE8, 0xA7, 0x41, 0x6D, 0xF4, 0xD7, 0x42, 0xA9, - 0xFE, 0x1F, 0x40, 0xF9, 0xFF, 0x03, 0x01, 0x91, 0xC0, 0x03, 0x5F, 0xD6, + 0xE0, 0x0F, 0x1C, 0xF8, 0xF4, 0x17, 0x00, 0xF9, 0xF5, 0x7B, 0x03, 0xA9, + 0xE8, 0xA7, 0x01, 0x6D, 0xE8, 0xA7, 0x41, 0x6D, 0xF4, 0x17, 0x40, 0xF9, + 0xF5, 0x7B, 0x43, 0xA9, 0xFF, 0x03, 0x01, 0x91, 0xC0, 0x03, 0x5F, 0xD6, }; static constexpr uint8_t expected_cfi_kArm64[] = { - 0x44, 0x0E, 0x40, 0x44, 0x94, 0x06, 0x95, 0x04, 0x44, 0x9E, 0x02, 0x44, + 0x44, 0x0E, 0x40, 0x44, 0x94, 0x06, 0x44, 0x95, 0x04, 0x9E, 0x02, 0x44, 0x05, 0x48, 0x0A, 0x05, 0x49, 0x08, 0x0A, 0x44, 0x06, 0x48, 0x06, 0x49, - 0x44, 0xD4, 0xD5, 0x44, 0xDE, 0x44, 0x0E, 0x00, 0x44, 0x0B, 0x0E, 0x40, + 0x44, 0xD4, 0x44, 0xD5, 0xDE, 0x44, 0x0E, 0x00, 0x44, 0x0B, 0x0E, 0x40, }; // 0x00000000: str x0, [sp, #-64]! // 0x00000004: .cfi_def_cfa_offset: 64 -// 0x00000004: stp x20, x21, [sp, #40] +// 0x00000004: str x20, [sp, #40] // 0x00000008: .cfi_offset: r20 at cfa-24 -// 0x00000008: .cfi_offset: r21 at cfa-16 -// 0x00000008: str lr, [sp, #56] +// 0x00000008: stp x21, lr, [sp, #48] +// 0x0000000c: .cfi_offset: r21 at cfa-16 // 0x0000000c: .cfi_offset: r30 at cfa-8 // 0x0000000c: stp d8, d9, [sp, #24] // 0x00000010: .cfi_offset_extended: r72 at cfa-40 @@ -55,10 +55,10 @@ static constexpr uint8_t expected_cfi_kArm64[] = { // 0x00000010: ldp d8, d9, [sp, #24] // 0x00000014: .cfi_restore_extended: r72 // 0x00000014: .cfi_restore_extended: r73 -// 0x00000014: ldp x20, x21, [sp, #40] +// 0x00000014: ldr x20, [sp, #40] // 0x00000018: .cfi_restore: r20 -// 0x00000018: .cfi_restore: r21 -// 0x00000018: ldr lr, [sp, #56] +// 0x00000018: ldp x21, lr, [sp, #48] +// 0x0000001c: .cfi_restore: r21 // 0x0000001c: .cfi_restore: r30 // 0x0000001c: add sp, sp, #0x40 (64) // 0x00000020: .cfi_def_cfa_offset: 0 diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc index 3e6adcb172..3dfd7282cd 100644 --- a/compiler/optimizing/reference_type_propagation.cc +++ b/compiler/optimizing/reference_type_propagation.cc @@ -46,13 +46,6 @@ static inline ReferenceTypeInfo::TypeHandle GetRootHandle(StackHandleScopeCollec return *cache; } -// Returns true if klass is admissible to the propagation: non-null and resolved. -// For an array type, we also check if the component type is admissible. -static bool IsAdmissible(mirror::Class* klass) SHARED_REQUIRES(Locks::mutator_lock_) { - return klass != nullptr && klass->IsResolved() && - (!klass->IsArrayClass() || IsAdmissible(klass->GetComponentType())); -} - ReferenceTypeInfo::TypeHandle ReferenceTypePropagation::HandleCache::GetObjectClassHandle() { return GetRootHandle(handles_, ClassLinker::kJavaLangObject, &object_class_handle_); } diff --git a/compiler/optimizing/reference_type_propagation.h b/compiler/optimizing/reference_type_propagation.h index 2106be6b53..edd83bf5de 100644 --- a/compiler/optimizing/reference_type_propagation.h +++ b/compiler/optimizing/reference_type_propagation.h @@ -42,6 +42,14 @@ class ReferenceTypePropagation : public HOptimization { void Run() OVERRIDE; + // Returns true if klass is admissible to the propagation: non-null and resolved. + // For an array type, we also check if the component type is admissible. + static bool IsAdmissible(mirror::Class* klass) SHARED_REQUIRES(Locks::mutator_lock_) { + return klass != nullptr && + klass->IsResolved() && + (!klass->IsArrayClass() || IsAdmissible(klass->GetComponentType())); + } + static constexpr const char* kReferenceTypePropagationPassName = "reference_type_propagation"; private: diff --git a/compiler/utils/arm/assembler_arm.h b/compiler/utils/arm/assembler_arm.h index 274d0de166..a571d14a71 100644 --- a/compiler/utils/arm/assembler_arm.h +++ b/compiler/utils/arm/assembler_arm.h @@ -671,6 +671,9 @@ class ArmAssembler : public Assembler { virtual void vcmpdz(DRegister dd, Condition cond = AL) = 0; virtual void vmstat(Condition cond = AL) = 0; // VMRS APSR_nzcv, FPSCR + virtual void vcntd(DRegister dd, DRegister dm) = 0; + virtual void vpaddld(DRegister dd, DRegister dm, int32_t size, bool is_unsigned) = 0; + virtual void vpushs(SRegister reg, int nregs, Condition cond = AL) = 0; virtual void vpushd(DRegister reg, int nregs, Condition cond = AL) = 0; virtual void vpops(SRegister reg, int nregs, Condition cond = AL) = 0; diff --git a/compiler/utils/arm/assembler_arm32.cc b/compiler/utils/arm/assembler_arm32.cc index 0a227b21cd..6f7119d578 100644 --- a/compiler/utils/arm/assembler_arm32.cc +++ b/compiler/utils/arm/assembler_arm32.cc @@ -1264,6 +1264,31 @@ void Arm32Assembler::vmstat(Condition cond) { // VMRS APSR_nzcv, FPSCR Emit(encoding); } +void Arm32Assembler::vcntd(DRegister dd, DRegister dm) { + uint32_t encoding = (B31 | B30 | B29 | B28 | B25 | B24 | B23 | B21 | B20) | + ((static_cast<int32_t>(dd) >> 4) * B22) | + ((static_cast<uint32_t>(dd) & 0xf) * B12) | + (B10 | B8) | + ((static_cast<int32_t>(dm) >> 4) * B5) | + (static_cast<uint32_t>(dm) & 0xf); + + Emit(encoding); +} + +void Arm32Assembler::vpaddld(DRegister dd, DRegister dm, int32_t size, bool is_unsigned) { + CHECK(size == 8 || size == 16 || size == 32) << size; + uint32_t encoding = (B31 | B30 | B29 | B28 | B25 | B24 | B23 | B21 | B20) | + ((static_cast<uint32_t>(size >> 4) & 0x3) * B18) | + ((static_cast<int32_t>(dd) >> 4) * B22) | + ((static_cast<uint32_t>(dd) & 0xf) * B12) | + (B9) | + (is_unsigned ? B7 : 0) | + ((static_cast<int32_t>(dm) >> 4) * B5) | + (static_cast<uint32_t>(dm) & 0xf); + + Emit(encoding); +} + void Arm32Assembler::svc(uint32_t imm24) { CHECK(IsUint<24>(imm24)) << imm24; diff --git a/compiler/utils/arm/assembler_arm32.h b/compiler/utils/arm/assembler_arm32.h index bc6020e008..8726ac85fd 100644 --- a/compiler/utils/arm/assembler_arm32.h +++ b/compiler/utils/arm/assembler_arm32.h @@ -205,6 +205,9 @@ class Arm32Assembler FINAL : public ArmAssembler { void vcmpdz(DRegister dd, Condition cond = AL) OVERRIDE; void vmstat(Condition cond = AL) OVERRIDE; // VMRS APSR_nzcv, FPSCR + void vcntd(DRegister dd, DRegister dm) OVERRIDE; + void vpaddld(DRegister dd, DRegister dm, int32_t size, bool is_unsigned) OVERRIDE; + void vpushs(SRegister reg, int nregs, Condition cond = AL) OVERRIDE; void vpushd(DRegister reg, int nregs, Condition cond = AL) OVERRIDE; void vpops(SRegister reg, int nregs, Condition cond = AL) OVERRIDE; diff --git a/compiler/utils/arm/assembler_arm32_test.cc b/compiler/utils/arm/assembler_arm32_test.cc index e570e22fca..b214062e18 100644 --- a/compiler/utils/arm/assembler_arm32_test.cc +++ b/compiler/utils/arm/assembler_arm32_test.cc @@ -899,4 +899,43 @@ TEST_F(AssemblerArm32Test, revsh) { T3Helper(&arm::Arm32Assembler::revsh, true, "revsh{cond} {reg1}, {reg2}", "revsh"); } +TEST_F(AssemblerArm32Test, vcnt) { + // Different D register numbers are used here, to test register encoding. + // Source register number is encoded as M:Vm, destination register number is encoded as D:Vd, + // For source and destination registers which use D0..D15, the M bit and D bit should be 0. + // For source and destination registers which use D16..D32, the M bit and D bit should be 1. + GetAssembler()->vcntd(arm::D0, arm::D1); + GetAssembler()->vcntd(arm::D19, arm::D20); + GetAssembler()->vcntd(arm::D0, arm::D9); + GetAssembler()->vcntd(arm::D16, arm::D20); + + std::string expected = + "vcnt.8 d0, d1\n" + "vcnt.8 d19, d20\n" + "vcnt.8 d0, d9\n" + "vcnt.8 d16, d20\n"; + + DriverStr(expected, "vcnt"); +} + +TEST_F(AssemblerArm32Test, vpaddl) { + // Different D register numbers are used here, to test register encoding. + // Source register number is encoded as M:Vm, destination register number is encoded as D:Vd, + // For source and destination registers which use D0..D15, the M bit and D bit should be 0. + // For source and destination registers which use D16..D32, the M bit and D bit should be 1. + // Different data types (signed and unsigned) are also tested. + GetAssembler()->vpaddld(arm::D0, arm::D0, 8, true); + GetAssembler()->vpaddld(arm::D20, arm::D20, 8, false); + GetAssembler()->vpaddld(arm::D0, arm::D20, 16, false); + GetAssembler()->vpaddld(arm::D20, arm::D0, 32, true); + + std::string expected = + "vpaddl.u8 d0, d0\n" + "vpaddl.s8 d20, d20\n" + "vpaddl.s16 d0, d20\n" + "vpaddl.u32 d20, d0\n"; + + DriverStr(expected, "vpaddl"); +} + } // namespace art diff --git a/compiler/utils/arm/assembler_thumb2.cc b/compiler/utils/arm/assembler_thumb2.cc index 546dd653af..a72ea410ce 100644 --- a/compiler/utils/arm/assembler_thumb2.cc +++ b/compiler/utils/arm/assembler_thumb2.cc @@ -3117,6 +3117,30 @@ void Thumb2Assembler::vmstat(Condition cond) { // VMRS APSR_nzcv, FPSCR. Emit32(encoding); } +void Thumb2Assembler::vcntd(DRegister dd, DRegister dm) { + uint32_t encoding = (B31 | B30 | B29 | B28 | B27 | B26 | B25 | B24 | B23 | B21 | B20) | + ((static_cast<int32_t>(dd) >> 4) * B22) | + ((static_cast<uint32_t>(dd) & 0xf) * B12) | + (B10 | B8) | + ((static_cast<int32_t>(dm) >> 4) * B5) | + (static_cast<uint32_t>(dm) & 0xf); + + Emit32(encoding); +} + +void Thumb2Assembler::vpaddld(DRegister dd, DRegister dm, int32_t size, bool is_unsigned) { + CHECK(size == 8 || size == 16 || size == 32) << size; + uint32_t encoding = (B31 | B30 | B29 | B28 | B27 | B26 | B25 | B24 | B23 | B21 | B20) | + ((static_cast<uint32_t>(size >> 4) & 0x3) * B18) | + ((static_cast<int32_t>(dd) >> 4) * B22) | + ((static_cast<uint32_t>(dd) & 0xf) * B12) | + (B9) | + (is_unsigned ? B7 : 0) | + ((static_cast<int32_t>(dm) >> 4) * B5) | + (static_cast<uint32_t>(dm) & 0xf); + + Emit32(encoding); +} void Thumb2Assembler::svc(uint32_t imm8) { CHECK(IsUint<8>(imm8)) << imm8; diff --git a/compiler/utils/arm/assembler_thumb2.h b/compiler/utils/arm/assembler_thumb2.h index ce310a4da8..2ca74fc863 100644 --- a/compiler/utils/arm/assembler_thumb2.h +++ b/compiler/utils/arm/assembler_thumb2.h @@ -250,6 +250,9 @@ class Thumb2Assembler FINAL : public ArmAssembler { void vcmpdz(DRegister dd, Condition cond = AL) OVERRIDE; void vmstat(Condition cond = AL) OVERRIDE; // VMRS APSR_nzcv, FPSCR + void vcntd(DRegister dd, DRegister dm) OVERRIDE; + void vpaddld(DRegister dd, DRegister dm, int32_t size, bool is_unsigned) OVERRIDE; + void vpushs(SRegister reg, int nregs, Condition cond = AL) OVERRIDE; void vpushd(DRegister reg, int nregs, Condition cond = AL) OVERRIDE; void vpops(SRegister reg, int nregs, Condition cond = AL) OVERRIDE; diff --git a/compiler/utils/arm/assembler_thumb2_test.cc b/compiler/utils/arm/assembler_thumb2_test.cc index b5cafcbf66..7f1dc49734 100644 --- a/compiler/utils/arm/assembler_thumb2_test.cc +++ b/compiler/utils/arm/assembler_thumb2_test.cc @@ -1380,4 +1380,43 @@ TEST_F(AssemblerThumb2Test, revsh) { DriverStr(expected, "revsh"); } +TEST_F(AssemblerThumb2Test, vcnt) { + // Different D register numbers are used here, to test register encoding. + // Source register number is encoded as M:Vm, destination register number is encoded as D:Vd, + // For source and destination registers which use D0..D15, the M bit and D bit should be 0. + // For source and destination registers which use D16..D32, the M bit and D bit should be 1. + __ vcntd(arm::D0, arm::D1); + __ vcntd(arm::D19, arm::D20); + __ vcntd(arm::D0, arm::D9); + __ vcntd(arm::D16, arm::D20); + + std::string expected = + "vcnt.8 d0, d1\n" + "vcnt.8 d19, d20\n" + "vcnt.8 d0, d9\n" + "vcnt.8 d16, d20\n"; + + DriverStr(expected, "vcnt"); +} + +TEST_F(AssemblerThumb2Test, vpaddl) { + // Different D register numbers are used here, to test register encoding. + // Source register number is encoded as M:Vm, destination register number is encoded as D:Vd, + // For source and destination registers which use D0..D15, the M bit and D bit should be 0. + // For source and destination registers which use D16..D32, the M bit and D bit should be 1. + // Different data types (signed and unsigned) are also tested. + __ vpaddld(arm::D0, arm::D0, 8, true); + __ vpaddld(arm::D20, arm::D20, 8, false); + __ vpaddld(arm::D0, arm::D20, 16, false); + __ vpaddld(arm::D20, arm::D0, 32, true); + + std::string expected = + "vpaddl.u8 d0, d0\n" + "vpaddl.s8 d20, d20\n" + "vpaddl.s16 d0, d20\n" + "vpaddl.u32 d20, d0\n"; + + DriverStr(expected, "vpaddl"); +} + } // namespace art diff --git a/compiler/utils/arm64/assembler_arm64.cc b/compiler/utils/arm64/assembler_arm64.cc index 1842f00ff6..54ed62bef3 100644 --- a/compiler/utils/arm64/assembler_arm64.cc +++ b/compiler/utils/arm64/assembler_arm64.cc @@ -648,6 +648,15 @@ static inline dwarf::Reg DWARFReg(CPURegister reg) { void Arm64Assembler::SpillRegisters(vixl::CPURegList registers, int offset) { int size = registers.RegisterSizeInBytes(); const Register sp = vixl_masm_->StackPointer(); + // Since we are operating on register pairs, we would like to align on + // double the standard size; on the other hand, we don't want to insert + // an extra store, which will happen if the number of registers is even. + if (!IsAlignedParam(offset, 2 * size) && registers.Count() % 2 != 0) { + const CPURegister& dst0 = registers.PopLowestIndex(); + ___ Str(dst0, MemOperand(sp, offset)); + cfi_.RelOffset(DWARFReg(dst0), offset); + offset += size; + } while (registers.Count() >= 2) { const CPURegister& dst0 = registers.PopLowestIndex(); const CPURegister& dst1 = registers.PopLowestIndex(); @@ -667,6 +676,13 @@ void Arm64Assembler::SpillRegisters(vixl::CPURegList registers, int offset) { void Arm64Assembler::UnspillRegisters(vixl::CPURegList registers, int offset) { int size = registers.RegisterSizeInBytes(); const Register sp = vixl_masm_->StackPointer(); + // Be consistent with the logic for spilling registers. + if (!IsAlignedParam(offset, 2 * size) && registers.Count() % 2 != 0) { + const CPURegister& dst0 = registers.PopLowestIndex(); + ___ Ldr(dst0, MemOperand(sp, offset)); + cfi_.Restore(DWARFReg(dst0)); + offset += size; + } while (registers.Count() >= 2) { const CPURegister& dst0 = registers.PopLowestIndex(); const CPURegister& dst1 = registers.PopLowestIndex(); diff --git a/compiler/utils/string_reference.h b/compiler/utils/string_reference.h index 9e1058ea4d..e4c34ca605 100644 --- a/compiler/utils/string_reference.h +++ b/compiler/utils/string_reference.h @@ -20,16 +20,19 @@ #include <stdint.h> #include "base/logging.h" +#include "dex_file-inl.h" #include "utf-inl.h" namespace art { -class DexFile; - // A string is located by its DexFile and the string_ids_ table index into that DexFile. struct StringReference { StringReference(const DexFile* file, uint32_t index) : dex_file(file), string_index(index) { } + const char* GetStringData() const { + return dex_file->GetStringData(dex_file->GetStringId(string_index)); + } + const DexFile* dex_file; uint32_t string_index; }; @@ -46,15 +49,13 @@ struct StringReferenceValueComparator { // Use the string order enforced by the dex file verifier. DCHECK_EQ( sr1.string_index < sr2.string_index, - CompareModifiedUtf8ToModifiedUtf8AsUtf16CodePointValues( - sr1.dex_file->GetStringData(sr1.dex_file->GetStringId(sr1.string_index)), - sr1.dex_file->GetStringData(sr2.dex_file->GetStringId(sr2.string_index))) < 0); + CompareModifiedUtf8ToModifiedUtf8AsUtf16CodePointValues(sr1.GetStringData(), + sr2.GetStringData()) < 0); return sr1.string_index < sr2.string_index; } else { // Cannot compare indexes, so do the string comparison. - return CompareModifiedUtf8ToModifiedUtf8AsUtf16CodePointValues( - sr1.dex_file->GetStringData(sr1.dex_file->GetStringId(sr1.string_index)), - sr1.dex_file->GetStringData(sr2.dex_file->GetStringId(sr2.string_index))) < 0; + return CompareModifiedUtf8ToModifiedUtf8AsUtf16CodePointValues(sr1.GetStringData(), + sr2.GetStringData()) < 0; } } }; diff --git a/compiler/utils/string_reference_test.cc b/compiler/utils/string_reference_test.cc new file mode 100644 index 0000000000..df5080e93e --- /dev/null +++ b/compiler/utils/string_reference_test.cc @@ -0,0 +1,107 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "utils/string_reference.h" + +#include <memory> + +#include "gtest/gtest.h" +#include "utils/test_dex_file_builder.h" + +namespace art { + +TEST(StringReference, ValueComparator) { + // This is a regression test for the StringReferenceValueComparator using the wrong + // dex file to get the string data from a StringId. We construct two dex files with + // just a single string with the same length but different value. This creates dex + // files that have the same layout, so the byte offset read from the StringId in one + // dex file, when used in the other dex file still points to valid string data, except + // that it's the wrong string. Without the fix the strings would then compare equal. + TestDexFileBuilder builder1; + builder1.AddString("String1"); + std::unique_ptr<const DexFile> dex_file1 = builder1.Build("dummy location 1"); + ASSERT_EQ(1u, dex_file1->NumStringIds()); + ASSERT_STREQ("String1", dex_file1->GetStringData(dex_file1->GetStringId(0))); + StringReference sr1(dex_file1.get(), 0); + + TestDexFileBuilder builder2; + builder2.AddString("String2"); + std::unique_ptr<const DexFile> dex_file2 = builder2.Build("dummy location 2"); + ASSERT_EQ(1u, dex_file2->NumStringIds()); + ASSERT_STREQ("String2", dex_file2->GetStringData(dex_file2->GetStringId(0))); + StringReference sr2(dex_file2.get(), 0); + + StringReferenceValueComparator cmp; + EXPECT_TRUE(cmp(sr1, sr2)); // "String1" < "String2" is true. + EXPECT_FALSE(cmp(sr2, sr1)); // "String2" < "String1" is false. +} + +TEST(StringReference, ValueComparator2) { + const char* const kDexFile1Strings[] = { + "", + "abc", + "abcxyz", + }; + const char* const kDexFile2Strings[] = { + "a", + "abc", + "abcdef", + "def", + }; + const bool expectedCmp12[arraysize(kDexFile1Strings)][arraysize(kDexFile2Strings)] = { + { true, true, true, true }, + { false, false, true, true }, + { false, false, false, true }, + }; + const bool expectedCmp21[arraysize(kDexFile2Strings)][arraysize(kDexFile1Strings)] = { + { false, true, true }, + { false, false, true }, + { false, false, true }, + { false, false, false }, + }; + + TestDexFileBuilder builder1; + for (const char* s : kDexFile1Strings) { + builder1.AddString(s); + } + std::unique_ptr<const DexFile> dex_file1 = builder1.Build("dummy location 1"); + ASSERT_EQ(arraysize(kDexFile1Strings), dex_file1->NumStringIds()); + for (size_t index = 0; index != arraysize(kDexFile1Strings); ++index) { + ASSERT_STREQ(kDexFile1Strings[index], dex_file1->GetStringData(dex_file1->GetStringId(index))); + } + + TestDexFileBuilder builder2; + for (const char* s : kDexFile2Strings) { + builder2.AddString(s); + } + std::unique_ptr<const DexFile> dex_file2 = builder2.Build("dummy location 1"); + ASSERT_EQ(arraysize(kDexFile2Strings), dex_file2->NumStringIds()); + for (size_t index = 0; index != arraysize(kDexFile2Strings); ++index) { + ASSERT_STREQ(kDexFile2Strings[index], dex_file2->GetStringData(dex_file2->GetStringId(index))); + } + + StringReferenceValueComparator cmp; + for (size_t index1 = 0; index1 != arraysize(kDexFile1Strings); ++index1) { + for (size_t index2 = 0; index2 != arraysize(kDexFile2Strings); ++index2) { + StringReference sr1(dex_file1.get(), index1); + StringReference sr2(dex_file2.get(), index2); + EXPECT_EQ(expectedCmp12[index1][index2], cmp(sr1, sr2)) << index1 << " " << index2; + EXPECT_EQ(expectedCmp21[index2][index1], cmp(sr2, sr1)) << index1 << " " << index2; + } + } +} + +} // namespace art |