diff options
Diffstat (limited to 'compiler')
34 files changed, 1281 insertions, 369 deletions
diff --git a/compiler/dex/verification_results.cc b/compiler/dex/verification_results.cc index 669d8cd991..9d39bf2c7a 100644 --- a/compiler/dex/verification_results.cc +++ b/compiler/dex/verification_results.cc @@ -103,6 +103,17 @@ const VerifiedMethod* VerificationResults::GetVerifiedMethod(MethodReference ref return (it != verified_methods_.end()) ? it->second : nullptr; } +void VerificationResults::CreateVerifiedMethodFor(MethodReference ref) { + // This method should only be called for classes verified at compile time, + // which have no verifier error, nor has methods that we know will throw + // at runtime. + AtomicMap::InsertResult result = atomic_verified_methods_.Insert( + ref, + /*expected*/ nullptr, + new VerifiedMethod(/* encountered_error_types */ 0, /* has_runtime_throw */ false)); + DCHECK_EQ(result, AtomicMap::kInsertResultSuccess); +} + void VerificationResults::AddRejectedClass(ClassReference ref) { { WriterMutexLock mu(Thread::Current(), rejected_classes_lock_); diff --git a/compiler/dex/verification_results.h b/compiler/dex/verification_results.h index ea38f4d537..22749fa621 100644 --- a/compiler/dex/verification_results.h +++ b/compiler/dex/verification_results.h @@ -32,6 +32,7 @@ namespace art { namespace verifier { class MethodVerifier; +class VerifierDepsTest; } // namespace verifier class CompilerOptions; @@ -47,6 +48,9 @@ class VerificationResults { REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(!verified_methods_lock_); + void CreateVerifiedMethodFor(MethodReference ref) + REQUIRES(!verified_methods_lock_); + const VerifiedMethod* GetVerifiedMethod(MethodReference ref) REQUIRES(!verified_methods_lock_); @@ -77,6 +81,8 @@ class VerificationResults { // Rejected classes. ReaderWriterMutex rejected_classes_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER; std::set<ClassReference> rejected_classes_ GUARDED_BY(rejected_classes_lock_); + + friend class verifier::VerifierDepsTest; }; } // namespace art diff --git a/compiler/dex/verified_method.h b/compiler/dex/verified_method.h index 04331e5aff..ce53417185 100644 --- a/compiler/dex/verified_method.h +++ b/compiler/dex/verified_method.h @@ -32,6 +32,8 @@ class MethodVerifier; class VerifiedMethod { public: + VerifiedMethod(uint32_t encountered_error_types, bool has_runtime_throw); + // Cast elision set type. // Since we're adding the dex PCs to the set in increasing order, a sorted vector // is better for performance (not just memory usage), especially for large sets. @@ -80,8 +82,6 @@ class VerifiedMethod { } private: - VerifiedMethod(uint32_t encountered_error_types, bool has_runtime_throw); - /* * Generate the GC map for a method that has just been verified (i.e. we're doing this as part of * verification). For type-precise determination we have all the data we need, so we just need to diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc index 6b62110b91..a2bab80b85 100644 --- a/compiler/driver/compiler_driver.cc +++ b/compiler/driver/compiler_driver.cc @@ -2005,6 +2005,35 @@ void CompilerDriver::SetVerified(jobject class_loader, } } +static void PopulateVerifiedMethods(const DexFile& dex_file, + uint32_t class_def_index, + VerificationResults* verification_results) { + const DexFile::ClassDef& class_def = dex_file.GetClassDef(class_def_index); + const uint8_t* class_data = dex_file.GetClassData(class_def); + if (class_data == nullptr) { + return; + } + ClassDataItemIterator it(dex_file, class_data); + // Skip fields + while (it.HasNextStaticField()) { + it.Next(); + } + while (it.HasNextInstanceField()) { + it.Next(); + } + + while (it.HasNextDirectMethod()) { + verification_results->CreateVerifiedMethodFor(MethodReference(&dex_file, it.GetMemberIndex())); + it.Next(); + } + + while (it.HasNextVirtualMethod()) { + verification_results->CreateVerifiedMethodFor(MethodReference(&dex_file, it.GetMemberIndex())); + it.Next(); + } + DCHECK(!it.HasNext()); +} + void CompilerDriver::Verify(jobject jclass_loader, const std::vector<const DexFile*>& dex_files, TimingLogger* timings) { @@ -2041,6 +2070,13 @@ void CompilerDriver::Verify(jobject jclass_loader, } else if (set.find(class_def.class_idx_) == set.end()) { ObjectLock<mirror::Class> lock(soa.Self(), cls); mirror::Class::SetStatus(cls, mirror::Class::kStatusVerified, soa.Self()); + // Create `VerifiedMethod`s for each methods, the compiler expects one for + // quickening or compiling. + // Note that this means: + // - We're only going to compile methods that did verify. + // - Quickening will not do checkcast ellision. + // TODO(ngeoffray): Reconsider this once we refactor compiler filters. + PopulateVerifiedMethods(*dex_file, i, verification_results_); } } } diff --git a/compiler/image_test.cc b/compiler/image_test.cc index 5629dffce5..9bbe595fa9 100644 --- a/compiler/image_test.cc +++ b/compiler/image_test.cc @@ -29,6 +29,8 @@ #include "elf_writer_quick.h" #include "gc/space/image_space.h" #include "image_writer.h" +#include "linker/buffered_output_stream.h" +#include "linker/file_output_stream.h" #include "linker/multi_oat_relative_patcher.h" #include "lock_word.h" #include "mirror/object-inl.h" @@ -256,6 +258,16 @@ void CompilationHelper::Compile(CompilerDriver* driver, bool image_space_ok = writer->PrepareImageAddressSpace(); ASSERT_TRUE(image_space_ok); + if (kIsVdexEnabled) { + for (size_t i = 0, size = vdex_files.size(); i != size; ++i) { + std::unique_ptr<BufferedOutputStream> vdex_out( + MakeUnique<BufferedOutputStream>( + MakeUnique<FileOutputStream>(vdex_files[i].GetFile()))); + oat_writers[i]->WriteVerifierDeps(vdex_out.get(), nullptr); + oat_writers[i]->WriteChecksumsAndVdexHeader(vdex_out.get()); + } + } + for (size_t i = 0, size = oat_files.size(); i != size; ++i) { linker::MultiOatRelativePatcher patcher(driver->GetInstructionSet(), driver->GetInstructionSetFeatures()); diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc index fb5560b124..7bb2bb71a9 100644 --- a/compiler/image_writer.cc +++ b/compiler/image_writer.cc @@ -433,7 +433,7 @@ void ImageWriter::PrepareDexCacheArraySlots() { ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); Thread* const self = Thread::Current(); - ReaderMutexLock mu(self, *class_linker->DexLock()); + ReaderMutexLock mu(self, *Locks::dex_lock_); for (const ClassLinker::DexCacheData& data : class_linker->GetDexCachesData()) { ObjPtr<mirror::DexCache> dex_cache = ObjPtr<mirror::DexCache>::DownCast(self->DecodeJObject(data.weak_root)); @@ -884,7 +884,7 @@ void ImageWriter::PruneNonImageClasses() { ScopedAssertNoThreadSuspension sa(__FUNCTION__); ReaderMutexLock mu(self, *Locks::classlinker_classes_lock_); // For ClassInClassTable - ReaderMutexLock mu2(self, *class_linker->DexLock()); + ReaderMutexLock mu2(self, *Locks::dex_lock_); for (const ClassLinker::DexCacheData& data : class_linker->GetDexCachesData()) { if (self->IsJWeakCleared(data.weak_root)) { continue; @@ -1013,7 +1013,7 @@ ObjectArray<Object>* ImageWriter::CreateImageRoots(size_t oat_index) const { // caches. We check that the number of dex caches does not change. size_t dex_cache_count = 0; { - ReaderMutexLock mu(self, *class_linker->DexLock()); + ReaderMutexLock mu(self, *Locks::dex_lock_); // Count number of dex caches not in the boot image. for (const ClassLinker::DexCacheData& data : class_linker->GetDexCachesData()) { ObjPtr<mirror::DexCache> dex_cache = @@ -1031,7 +1031,7 @@ ObjectArray<Object>* ImageWriter::CreateImageRoots(size_t oat_index) const { hs.NewHandle(ObjectArray<Object>::Alloc(self, object_array_class.Get(), dex_cache_count))); CHECK(dex_caches.Get() != nullptr) << "Failed to allocate a dex cache array."; { - ReaderMutexLock mu(self, *class_linker->DexLock()); + ReaderMutexLock mu(self, *Locks::dex_lock_); size_t non_image_dex_caches = 0; // Re-count number of non image dex caches. for (const ClassLinker::DexCacheData& data : class_linker->GetDexCachesData()) { diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc index 94585769b4..0a778b0954 100644 --- a/compiler/oat_test.cc +++ b/compiler/oat_test.cc @@ -30,6 +30,8 @@ #include "elf_writer.h" #include "elf_writer_quick.h" #include "entrypoints/quick/quick_entrypoints.h" +#include "linker/buffered_output_stream.h" +#include "linker/file_output_stream.h" #include "linker/multi_oat_relative_patcher.h" #include "linker/vector_output_stream.h" #include "mirror/class-inl.h" @@ -218,6 +220,17 @@ class OatTest : public CommonCompilerTest { oat_writer.GetBssSize(), oat_writer.GetBssRootsOffset()); + if (kIsVdexEnabled) { + std::unique_ptr<BufferedOutputStream> vdex_out( + MakeUnique<BufferedOutputStream>(MakeUnique<FileOutputStream>(vdex_file))); + if (!oat_writer.WriteVerifierDeps(vdex_out.get(), nullptr)) { + return false; + } + if (!oat_writer.WriteChecksumsAndVdexHeader(vdex_out.get())) { + return false; + } + } + if (!oat_writer.WriteRodata(oat_rodata)) { return false; } diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc index 153aff40dc..bebd5f5ae2 100644 --- a/compiler/oat_writer.cc +++ b/compiler/oat_writer.cc @@ -300,6 +300,7 @@ OatWriter::OatWriter(bool compiling_boot_image, TimingLogger* timings, ProfileCo oat_data_offset_(0u), oat_header_(nullptr), size_vdex_header_(0), + size_vdex_checksums_(0), size_dex_file_alignment_(0), size_executable_offset_alignment_(0), size_oat_header_(0), @@ -409,10 +410,11 @@ bool OatWriter::AddVdexDexFilesSource(const VdexFile& vdex_file, CreateTypeLookupTable create_type_lookup_table) { DCHECK(write_state_ == WriteState::kAddingDexFileSources); const uint8_t* current_dex_data = nullptr; - for (size_t i = 0; ; ++i) { + for (size_t i = 0; i < vdex_file.GetHeader().GetNumberOfDexFiles(); ++i) { current_dex_data = vdex_file.GetNextDexFileData(current_dex_data); if (current_dex_data == nullptr) { - break; + LOG(ERROR) << "Unexpected number of dex files in vdex " << location; + return false; } if (!DexFile::IsMagicValid(current_dex_data)) { LOG(ERROR) << "Invalid magic in vdex file created from " << location; @@ -424,7 +426,14 @@ bool OatWriter::AddVdexDexFilesSource(const VdexFile& vdex_file, oat_dex_files_.emplace_back(full_location, DexFileSource(current_dex_data), create_type_lookup_table); + oat_dex_files_.back().dex_file_location_checksum_ = vdex_file.GetLocationChecksum(i); } + + if (vdex_file.GetNextDexFileData(current_dex_data) != nullptr) { + LOG(ERROR) << "Unexpected number of dex files in vdex " << location; + return false; + } + if (oat_dex_files_.empty()) { LOG(ERROR) << "No dex files in vdex file created from " << location; return false; @@ -488,8 +497,8 @@ bool OatWriter::WriteAndOpenDexFiles( // Initialize VDEX and OAT headers. if (kIsVdexEnabled) { - size_vdex_header_ = sizeof(VdexFile::Header); - vdex_size_ = size_vdex_header_; + // Reserve space for Vdex header and checksums. + vdex_size_ = sizeof(VdexFile::Header) + oat_dex_files_.size() * sizeof(VdexFile::VdexChecksum); } size_t oat_data_offset = InitOatHeader(instruction_set, instruction_set_features, @@ -793,7 +802,7 @@ class OatWriter::InitCodeMethodVisitor : public OatDexMethodVisitor { // Update quick method header. DCHECK_LT(method_offsets_index_, oat_class->method_headers_.size()); OatQuickMethodHeader* method_header = &oat_class->method_headers_[method_offsets_index_]; - uint32_t vmap_table_offset = method_header->vmap_table_offset_; + uint32_t vmap_table_offset = method_header->GetVmapTableOffset(); // The code offset was 0 when the mapping/vmap table offset was set, so it's set // to 0-offset and we need to adjust it by code_offset. uint32_t code_offset = quick_code_offset - thumb_offset; @@ -935,7 +944,7 @@ class OatWriter::InitMapMethodVisitor : public OatDexMethodVisitor { // If vdex is enabled, we only emit the stack map of compiled code. The quickening info will // be in the vdex file. if (!compiled_method->GetQuickCode().empty() || !kIsVdexEnabled) { - DCHECK_EQ(oat_class->method_headers_[method_offsets_index_].vmap_table_offset_, 0u); + DCHECK_EQ(oat_class->method_headers_[method_offsets_index_].GetVmapTableOffset(), 0u); ArrayRef<const uint8_t> map = compiled_method->GetVmapTable(); uint32_t map_size = map.size() * sizeof(map[0]); @@ -949,7 +958,7 @@ class OatWriter::InitMapMethodVisitor : public OatDexMethodVisitor { }); // Code offset is not initialized yet, so set the map offset to 0u-offset. DCHECK_EQ(oat_class->method_offsets_[method_offsets_index_].code_offset_, 0u); - oat_class->method_headers_[method_offsets_index_].vmap_table_offset_ = 0u - offset; + oat_class->method_headers_[method_offsets_index_].SetVmapTableOffset(0u - offset); } } ++method_offsets_index_; @@ -1406,7 +1415,7 @@ class OatWriter::WriteMapMethodVisitor : public OatDexMethodVisitor { size_t file_offset = file_offset_; OutputStream* out = out_; - uint32_t map_offset = oat_class->method_headers_[method_offsets_index_].vmap_table_offset_; + uint32_t map_offset = oat_class->method_headers_[method_offsets_index_].GetVmapTableOffset(); uint32_t code_offset = oat_class->method_offsets_[method_offsets_index_].code_offset_; ++method_offsets_index_; @@ -1837,6 +1846,7 @@ bool OatWriter::WriteCode(OutputStream* out) { size_total += (x); DO_STAT(size_vdex_header_); + DO_STAT(size_vdex_checksums_); DO_STAT(size_dex_file_alignment_); DO_STAT(size_executable_offset_alignment_); DO_STAT(size_oat_header_); @@ -2383,6 +2393,7 @@ bool OatWriter::WriteDexFile(OutputStream* out, // Update dex file size and resize class offsets in the OatDexFile. // Note: For raw data, the checksum is passed directly to AddRawDexFileSource(). + // Note: For vdex, the checksum is copied from the existing vdex file. oat_dex_file->dex_file_size_ = header->file_size_; oat_dex_file->class_offsets_.resize(header->class_defs_size_); return true; @@ -2592,11 +2603,31 @@ bool OatWriter::WriteTypeLookupTables( return true; } -bool OatWriter::WriteVdexHeader(OutputStream* vdex_out) { +bool OatWriter::WriteChecksumsAndVdexHeader(OutputStream* vdex_out) { if (!kIsVdexEnabled) { return true; } - off_t actual_offset = vdex_out->Seek(0, kSeekSet); + // Write checksums + off_t actual_offset = vdex_out->Seek(sizeof(VdexFile::Header), kSeekSet); + if (actual_offset != sizeof(VdexFile::Header)) { + PLOG(ERROR) << "Failed to seek to the checksum location of vdex file. Actual: " << actual_offset + << " File: " << vdex_out->GetLocation(); + return false; + } + + for (size_t i = 0, size = oat_dex_files_.size(); i != size; ++i) { + OatDexFile* oat_dex_file = &oat_dex_files_[i]; + if (!vdex_out->WriteFully( + &oat_dex_file->dex_file_location_checksum_, sizeof(VdexFile::VdexChecksum))) { + PLOG(ERROR) << "Failed to write dex file location checksum. File: " + << vdex_out->GetLocation(); + return false; + } + size_vdex_checksums_ += sizeof(VdexFile::VdexChecksum); + } + + // Write header. + actual_offset = vdex_out->Seek(0, kSeekSet); if (actual_offset != 0) { PLOG(ERROR) << "Failed to seek to the beginning of vdex file. Actual: " << actual_offset << " File: " << vdex_out->GetLocation(); @@ -2610,12 +2641,15 @@ bool OatWriter::WriteVdexHeader(OutputStream* vdex_out) { size_t verifier_deps_section_size = vdex_quickening_info_offset_ - vdex_verifier_deps_offset_; size_t quickening_info_section_size = vdex_size_ - vdex_quickening_info_offset_; - VdexFile::Header vdex_header( - dex_section_size, verifier_deps_section_size, quickening_info_section_size); + VdexFile::Header vdex_header(oat_dex_files_.size(), + dex_section_size, + verifier_deps_section_size, + quickening_info_section_size); if (!vdex_out->WriteFully(&vdex_header, sizeof(VdexFile::Header))) { PLOG(ERROR) << "Failed to write vdex header. File: " << vdex_out->GetLocation(); return false; } + size_vdex_header_ = sizeof(VdexFile::Header); if (!vdex_out->Flush()) { PLOG(ERROR) << "Failed to flush stream after writing to vdex file." diff --git a/compiler/oat_writer.h b/compiler/oat_writer.h index 0dcf79e54e..da221d6029 100644 --- a/compiler/oat_writer.h +++ b/compiler/oat_writer.h @@ -124,7 +124,7 @@ class OatWriter { // - Initialize() // - WriteVerifierDeps() // - WriteQuickeningInfo() - // - WriteVdexHeader() + // - WriteChecksumsAndVdexHeader() // - PrepareLayout(), // - WriteRodata(), // - WriteCode(), @@ -168,7 +168,7 @@ class OatWriter { /*out*/ std::vector<std::unique_ptr<const DexFile>>* opened_dex_files); bool WriteQuickeningInfo(OutputStream* vdex_out); bool WriteVerifierDeps(OutputStream* vdex_out, verifier::VerifierDeps* verifier_deps); - bool WriteVdexHeader(OutputStream* vdex_out); + bool WriteChecksumsAndVdexHeader(OutputStream* vdex_out); // Initialize the writer with the given parameters. void Initialize(const CompilerDriver* compiler, ImageWriter* image_writer, @@ -387,6 +387,7 @@ class OatWriter { // output stats uint32_t size_vdex_header_; + uint32_t size_vdex_checksums_; uint32_t size_dex_file_alignment_; uint32_t size_executable_offset_alignment_; uint32_t size_oat_header_; diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index 9f6b78a82c..fa6a5225e7 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -304,6 +304,7 @@ void CodeGenerator::InitializeCodeGeneration(size_t number_of_spill_slots, SetFrameSize(RoundUp( first_register_slot_in_slow_path_ + maximum_safepoint_spill_size + + (GetGraph()->HasShouldDeoptimizeFlag() ? kShouldDeoptimizeFlagSize : 0) + FrameEntrySpillSize(), kStackAlignment)); } diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index a5d19abe92..4b11e7c699 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -307,6 +307,12 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { return POPCOUNT(GetSlowPathSpills(locations, core_registers)); } + size_t GetStackOffsetOfShouldDeoptimizeFlag() const { + DCHECK(GetGraph()->HasShouldDeoptimizeFlag()); + DCHECK_GE(GetFrameSize(), FrameEntrySpillSize() + kShouldDeoptimizeFlagSize); + return GetFrameSize() - FrameEntrySpillSize() - kShouldDeoptimizeFlagSize; + } + // Record native to dex mapping for a suspend point. Required by runtime. void RecordPcInfo(HInstruction* instruction, uint32_t dex_pc, SlowPathCode* slow_path = nullptr); // Check whether we have already recorded mapping at this PC. diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index 1f5981682b..ed6eef1b55 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -1329,6 +1329,13 @@ void CodeGeneratorARM::GenerateFrameEntry() { __ cfi().AdjustCFAOffset(kArmWordSize * POPCOUNT(fpu_spill_mask_)); __ cfi().RelOffsetForMany(DWARFReg(S0), 0, fpu_spill_mask_, kArmWordSize); } + + if (GetGraph()->HasShouldDeoptimizeFlag()) { + // Initialize should_deoptimize flag to 0. + __ mov(IP, ShifterOperand(0)); + __ StoreToOffset(kStoreWord, IP, SP, -kShouldDeoptimizeFlagSize); + } + int adjust = GetFrameSize() - FrameEntrySpillSize(); __ AddConstant(SP, -adjust); __ cfi().AdjustCFAOffset(adjust); @@ -1944,6 +1951,19 @@ void InstructionCodeGeneratorARM::VisitDeoptimize(HDeoptimize* deoptimize) { /* false_target */ nullptr); } +void LocationsBuilderARM::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) { + LocationSummary* locations = new (GetGraph()->GetArena()) + LocationSummary(flag, LocationSummary::kNoCall); + locations->SetOut(Location::RequiresRegister()); +} + +void InstructionCodeGeneratorARM::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) { + __ LoadFromOffset(kLoadWord, + flag->GetLocations()->Out().AsRegister<Register>(), + SP, + codegen_->GetStackOffsetOfShouldDeoptimizeFlag()); +} + void LocationsBuilderARM::VisitSelect(HSelect* select) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(select); if (Primitive::IsFloatingPointType(select->GetType())) { diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index ab6a33fbd9..6eebd69a04 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -1273,6 +1273,12 @@ void CodeGeneratorARM64::GenerateFrameEntry() { frame_size - GetCoreSpillSize()); GetAssembler()->SpillRegisters(GetFramePreservedFPRegisters(), frame_size - FrameEntrySpillSize()); + + if (GetGraph()->HasShouldDeoptimizeFlag()) { + // Initialize should_deoptimize flag to 0. + Register wzr = Register(VIXLRegCodeFromART(WZR), kWRegSize); + __ Str(wzr, MemOperand(sp, GetStackOffsetOfShouldDeoptimizeFlag())); + } } } @@ -3235,6 +3241,17 @@ void InstructionCodeGeneratorARM64::VisitDeoptimize(HDeoptimize* deoptimize) { /* false_target */ nullptr); } +void LocationsBuilderARM64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) { + LocationSummary* locations = new (GetGraph()->GetArena()) + LocationSummary(flag, LocationSummary::kNoCall); + locations->SetOut(Location::RequiresRegister()); +} + +void InstructionCodeGeneratorARM64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) { + __ Ldr(OutputRegister(flag), + MemOperand(sp, codegen_->GetStackOffsetOfShouldDeoptimizeFlag())); +} + static inline bool IsConditionOnFloatingPointValues(HInstruction* condition) { return condition->IsCondition() && Primitive::IsFloatingPointType(condition->InputAt(0)->GetType()); diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc index 1ca439e8cf..4b24ac3459 100644 --- a/compiler/optimizing/code_generator_arm_vixl.cc +++ b/compiler/optimizing/code_generator_arm_vixl.cc @@ -63,9 +63,10 @@ static bool ExpectedPairLayout(Location location) { // We expected this for both core and fpu register pairs. return ((location.low() & 1) == 0) && (location.low() + 1 == location.high()); } - +// Use a local definition to prevent copying mistakes. +static constexpr size_t kArmWordSize = static_cast<size_t>(kArmPointerSize); +static constexpr size_t kArmBitsPerWord = kArmWordSize * kBitsPerByte; static constexpr int kCurrentMethodStackOffset = 0; -static constexpr size_t kArmInstrMaxSizeInBytes = 4u; static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7; #ifdef __ @@ -438,6 +439,62 @@ class LoadClassSlowPathARMVIXL : public SlowPathCodeARMVIXL { DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathARMVIXL); }; +class LoadStringSlowPathARMVIXL : public SlowPathCodeARMVIXL { + public: + explicit LoadStringSlowPathARMVIXL(HLoadString* instruction) + : SlowPathCodeARMVIXL(instruction) {} + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + LocationSummary* locations = instruction_->GetLocations(); + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg())); + HLoadString* load = instruction_->AsLoadString(); + const uint32_t string_index = load->GetStringIndex().index_; + vixl32::Register out = OutputRegister(load); + vixl32::Register temp = RegisterFrom(locations->GetTemp(0)); + constexpr bool call_saves_everything_except_r0 = (!kUseReadBarrier || kUseBakerReadBarrier); + + CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen); + __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, locations); + + InvokeRuntimeCallingConventionARMVIXL calling_convention; + // In the unlucky case that the `temp` is R0, we preserve the address in `out` across + // the kSaveEverything call (or use `out` for the address after non-kSaveEverything call). + bool temp_is_r0 = (temp.Is(calling_convention.GetRegisterAt(0))); + vixl32::Register entry_address = temp_is_r0 ? out : temp; + DCHECK(!entry_address.Is(calling_convention.GetRegisterAt(0))); + if (call_saves_everything_except_r0 && temp_is_r0) { + __ Mov(entry_address, temp); + } + + __ Mov(calling_convention.GetRegisterAt(0), string_index); + arm_codegen->InvokeRuntime(kQuickResolveString, instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); + + // Store the resolved String to the .bss entry. + if (call_saves_everything_except_r0) { + // The string entry address was preserved in `entry_address` thanks to kSaveEverything. + __ Str(r0, MemOperand(entry_address)); + } else { + // For non-Baker read barrier, we need to re-calculate the address of the string entry. + CodeGeneratorARMVIXL::PcRelativePatchInfo* labels = + arm_codegen->NewPcRelativeStringPatch(load->GetDexFile(), string_index); + arm_codegen->EmitMovwMovtPlaceholder(labels, out); + __ Str(r0, MemOperand(entry_address)); + } + + arm_codegen->Move32(locations->Out(), LocationFrom(r0)); + RestoreLiveRegisters(codegen, locations); + + __ B(GetExitLabel()); + } + + const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathARMVIXL"; } + + private: + DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathARMVIXL); +}; + class TypeCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL { public: TypeCheckSlowPathARMVIXL(HInstruction* instruction, bool is_fatal) @@ -630,9 +687,30 @@ static uint32_t ComputeSRegisterListMask(const SRegisterList& regs) { return mask; } -size_t CodeGeneratorARMVIXL::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) { - GetAssembler()->LoadSFromOffset(vixl32::SRegister(reg_id), sp, stack_index); - return kArmWordSize; +// Saves the register in the stack. Returns the size taken on stack. +size_t CodeGeneratorARMVIXL::SaveCoreRegister(size_t stack_index ATTRIBUTE_UNUSED, + uint32_t reg_id ATTRIBUTE_UNUSED) { + TODO_VIXL32(FATAL); + return 0; +} + +// Restores the register from the stack. Returns the size taken on stack. +size_t CodeGeneratorARMVIXL::RestoreCoreRegister(size_t stack_index ATTRIBUTE_UNUSED, + uint32_t reg_id ATTRIBUTE_UNUSED) { + TODO_VIXL32(FATAL); + return 0; +} + +size_t CodeGeneratorARMVIXL::SaveFloatingPointRegister(size_t stack_index ATTRIBUTE_UNUSED, + uint32_t reg_id ATTRIBUTE_UNUSED) { + TODO_VIXL32(FATAL); + return 0; +} + +size_t CodeGeneratorARMVIXL::RestoreFloatingPointRegister(size_t stack_index ATTRIBUTE_UNUSED, + uint32_t reg_id ATTRIBUTE_UNUSED) { + TODO_VIXL32(FATAL); + return 0; } #undef __ @@ -655,7 +733,11 @@ CodeGeneratorARMVIXL::CodeGeneratorARMVIXL(HGraph* graph, instruction_visitor_(graph, this), move_resolver_(graph->GetArena(), this), assembler_(graph->GetArena()), - isa_features_(isa_features) { + isa_features_(isa_features), + relative_call_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) { // Always save the LR register to mimic Quick. AddAllocatedRegister(Location::RegisterLocation(LR)); // Give d14 and d15 as scratch registers to VIXL. @@ -793,7 +875,7 @@ void CodeGeneratorARMVIXL::GenerateFrameEntry() { __ Sub(temp, sp, static_cast<int32_t>(GetStackOverflowReservedBytes(kArm))); // The load must immediately precede RecordPcInfo. AssemblerAccurateScope aas(GetVIXLAssembler(), - kArmInstrMaxSizeInBytes, + vixl32::kMaxInstructionSizeInBytes, CodeBufferCheckScope::kMaximumSize); __ ldr(temp, MemOperand(temp)); RecordPcInfo(nullptr, 0); @@ -853,6 +935,116 @@ void CodeGeneratorARMVIXL::Bind(HBasicBlock* block) { __ Bind(GetLabelOf(block)); } +Location InvokeDexCallingConventionVisitorARMVIXL::GetNextLocation(Primitive::Type type) { + switch (type) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimNot: { + uint32_t index = gp_index_++; + uint32_t stack_index = stack_index_++; + if (index < calling_convention.GetNumberOfRegisters()) { + return LocationFrom(calling_convention.GetRegisterAt(index)); + } else { + return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index)); + } + } + + case Primitive::kPrimLong: { + uint32_t index = gp_index_; + uint32_t stack_index = stack_index_; + gp_index_ += 2; + stack_index_ += 2; + if (index + 1 < calling_convention.GetNumberOfRegisters()) { + if (calling_convention.GetRegisterAt(index).Is(r1)) { + // Skip R1, and use R2_R3 instead. + gp_index_++; + index++; + } + } + if (index + 1 < calling_convention.GetNumberOfRegisters()) { + DCHECK_EQ(calling_convention.GetRegisterAt(index).GetCode() + 1, + calling_convention.GetRegisterAt(index + 1).GetCode()); + + return LocationFrom(calling_convention.GetRegisterAt(index), + calling_convention.GetRegisterAt(index + 1)); + } else { + return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index)); + } + } + + case Primitive::kPrimFloat: { + uint32_t stack_index = stack_index_++; + if (float_index_ % 2 == 0) { + float_index_ = std::max(double_index_, float_index_); + } + if (float_index_ < calling_convention.GetNumberOfFpuRegisters()) { + return LocationFrom(calling_convention.GetFpuRegisterAt(float_index_++)); + } else { + return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index)); + } + } + + case Primitive::kPrimDouble: { + double_index_ = std::max(double_index_, RoundUp(float_index_, 2)); + uint32_t stack_index = stack_index_; + stack_index_ += 2; + if (double_index_ + 1 < calling_convention.GetNumberOfFpuRegisters()) { + uint32_t index = double_index_; + double_index_ += 2; + Location result = LocationFrom( + calling_convention.GetFpuRegisterAt(index), + calling_convention.GetFpuRegisterAt(index + 1)); + DCHECK(ExpectedPairLayout(result)); + return result; + } else { + return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index)); + } + } + + case Primitive::kPrimVoid: + LOG(FATAL) << "Unexpected parameter type " << type; + break; + } + return Location::NoLocation(); +} + +Location InvokeDexCallingConventionVisitorARMVIXL::GetReturnLocation(Primitive::Type type) const { + switch (type) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimNot: { + return LocationFrom(r0); + } + + case Primitive::kPrimFloat: { + return LocationFrom(s0); + } + + case Primitive::kPrimLong: { + return LocationFrom(r0, r1); + } + + case Primitive::kPrimDouble: { + return LocationFrom(s0, s1); + } + + case Primitive::kPrimVoid: + return Location::NoLocation(); + } + + UNREACHABLE(); +} + +Location InvokeDexCallingConventionVisitorARMVIXL::GetMethodLocation() const { + return LocationFrom(kMethodRegister); +} + void CodeGeneratorARMVIXL::Move32(Location destination, Location source) { if (source.Equals(destination)) { return; @@ -924,10 +1116,14 @@ void CodeGeneratorARMVIXL::InvokeRuntime(QuickEntrypointEnum entrypoint, uint32_t dex_pc, SlowPathCode* slow_path) { ValidateInvokeRuntime(entrypoint, instruction, slow_path); - GenerateInvokeRuntime(GetThreadOffset<kArmPointerSize>(entrypoint).Int32Value()); + __ Ldr(lr, MemOperand(tr, GetThreadOffset<kArmPointerSize>(entrypoint).Int32Value())); + // Ensure the pc position is recorded immediately after the `blx` instruction. + // blx in T32 has only 16bit encoding that's why a stricter check for the scope is used. + AssemblerAccurateScope aas(GetVIXLAssembler(), + vixl32::k16BitT32InstructionSizeInBytes, + CodeBufferCheckScope::kExactSize); + __ blx(lr); if (EntrypointRequiresStackMap(entrypoint)) { - // TODO(VIXL): If necessary, use a scope to ensure we record the pc info immediately after the - // previous instruction. RecordPcInfo(instruction, dex_pc, slow_path); } } @@ -936,11 +1132,7 @@ void CodeGeneratorARMVIXL::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_poi HInstruction* instruction, SlowPathCode* slow_path) { ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path); - GenerateInvokeRuntime(entry_point_offset); -} - -void CodeGeneratorARMVIXL::GenerateInvokeRuntime(int32_t entry_point_offset) { - GetAssembler()->LoadFromOffset(kLoadWord, lr, tr, entry_point_offset); + __ Ldr(lr, MemOperand(tr, entry_point_offset)); __ Blx(lr); } @@ -1270,6 +1462,19 @@ void InstructionCodeGeneratorARMVIXL::VisitDeoptimize(HDeoptimize* deoptimize) { /* false_target */ nullptr); } +void LocationsBuilderARMVIXL::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) { + LocationSummary* locations = new (GetGraph()->GetArena()) + LocationSummary(flag, LocationSummary::kNoCall); + locations->SetOut(Location::RequiresRegister()); +} + +void InstructionCodeGeneratorARMVIXL::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) { + GetAssembler()->LoadFromOffset(kLoadWord, + OutputRegister(flag), + sp, + codegen_->GetStackOffsetOfShouldDeoptimizeFlag()); +} + void LocationsBuilderARMVIXL::VisitSelect(HSelect* select) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(select); if (Primitive::IsFloatingPointType(select->GetType())) { @@ -1360,7 +1565,7 @@ void InstructionCodeGeneratorARMVIXL::HandleCondition(HCondition* cond) { CodeGenerator::GetInt32ValueOf(right.GetConstant())); } AssemblerAccurateScope aas(GetVIXLAssembler(), - kArmInstrMaxSizeInBytes * 3u, + 3 * vixl32::kMaxInstructionSizeInBytes, CodeBufferCheckScope::kMaximumSize); __ ite(ARMCondition(cond->GetCondition())); __ mov(ARMCondition(cond->GetCondition()), OutputRegister(cond), 1); @@ -1575,7 +1780,10 @@ void LocationsBuilderARMVIXL::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* i HandleInvoke(invoke); - // TODO(VIXL): invoke->HasPcRelativeDexCache() + // For PC-relative dex cache the invoke has an extra input, the PC-relative address base. + if (invoke->HasPcRelativeDexCache()) { + invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::RequiresRegister()); + } } static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorARMVIXL* codegen) { @@ -1597,15 +1805,13 @@ void InstructionCodeGeneratorARMVIXL::VisitInvokeStaticOrDirect(HInvokeStaticOrD } LocationSummary* locations = invoke->GetLocations(); - DCHECK(locations->HasTemps()); - codegen_->GenerateStaticOrDirectCall(invoke, locations->GetTemp(0)); - // TODO(VIXL): If necessary, use a scope to ensure we record the pc info immediately after the - // previous instruction. + codegen_->GenerateStaticOrDirectCall( + invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation()); codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); } void LocationsBuilderARMVIXL::HandleInvoke(HInvoke* invoke) { - InvokeDexCallingConventionVisitorARM calling_convention_visitor; + InvokeDexCallingConventionVisitorARMVIXL calling_convention_visitor; CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor); } @@ -1624,10 +1830,8 @@ void InstructionCodeGeneratorARMVIXL::VisitInvokeVirtual(HInvokeVirtual* invoke) } codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0)); - DCHECK(!codegen_->IsLeafMethod()); - // TODO(VIXL): If necessary, use a scope to ensure we record the pc info immediately after the - // previous instruction. codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); + DCHECK(!codegen_->IsLeafMethod()); } void LocationsBuilderARMVIXL::VisitInvokeInterface(HInvokeInterface* invoke) { @@ -1646,10 +1850,15 @@ void InstructionCodeGeneratorARMVIXL::VisitInvokeInterface(HInvokeInterface* inv DCHECK(!receiver.IsStackSlot()); - // /* HeapReference<Class> */ temp = receiver->klass_ - GetAssembler()->LoadFromOffset(kLoadWord, temp, RegisterFrom(receiver), class_offset); - - codegen_->MaybeRecordImplicitNullCheck(invoke); + // Ensure the pc position is recorded immediately after the `ldr` instruction. + { + AssemblerAccurateScope aas(GetVIXLAssembler(), + vixl32::kMaxInstructionSizeInBytes, + CodeBufferCheckScope::kMaximumSize); + // /* HeapReference<Class> */ temp = receiver->klass_ + __ ldr(temp, MemOperand(RegisterFrom(receiver), class_offset)); + codegen_->MaybeRecordImplicitNullCheck(invoke); + } // Instead of simply (possibly) unpoisoning `temp` here, we should // emit a read barrier for the previous class reference load. // However this is not required in practice, as this is an @@ -1688,15 +1897,16 @@ void InstructionCodeGeneratorARMVIXL::VisitInvokeInterface(HInvokeInterface* inv temps.Exclude(hidden_reg); __ Mov(hidden_reg, invoke->GetDexMethodIndex()); } - { + // Ensure the pc position is recorded immediately after the `blx` instruction. + // blx in T32 has only 16bit encoding that's why a stricter check for the scope is used. AssemblerAccurateScope aas(GetVIXLAssembler(), - kArmInstrMaxSizeInBytes, - CodeBufferCheckScope::kMaximumSize); + vixl32::k16BitT32InstructionSizeInBytes, + CodeBufferCheckScope::kExactSize); // LR(); __ blx(lr); - DCHECK(!codegen_->IsLeafMethod()); codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); + DCHECK(!codegen_->IsLeafMethod()); } } @@ -3067,7 +3277,7 @@ void InstructionCodeGeneratorARMVIXL::HandleShift(HBinaryOperation* op) { __ Subs(temp, o_l, Operand::From(kArmBitsPerWord)); { AssemblerAccurateScope guard(GetVIXLAssembler(), - 3 * kArmInstrMaxSizeInBytes, + 2 * vixl32::kMaxInstructionSizeInBytes, CodeBufferCheckScope::kMaximumSize); __ it(pl); __ lsl(pl, o_h, low, temp); @@ -3086,7 +3296,7 @@ void InstructionCodeGeneratorARMVIXL::HandleShift(HBinaryOperation* op) { __ Subs(temp, o_h, Operand::From(kArmBitsPerWord)); { AssemblerAccurateScope guard(GetVIXLAssembler(), - 3 * kArmInstrMaxSizeInBytes, + 2 * vixl32::kMaxInstructionSizeInBytes, CodeBufferCheckScope::kMaximumSize); __ it(pl); __ asr(pl, o_l, high, temp); @@ -3103,7 +3313,7 @@ void InstructionCodeGeneratorARMVIXL::HandleShift(HBinaryOperation* op) { __ Subs(temp, o_h, Operand::From(kArmBitsPerWord)); { AssemblerAccurateScope guard(GetVIXLAssembler(), - 3 * kArmInstrMaxSizeInBytes, + 2 * vixl32::kMaxInstructionSizeInBytes, CodeBufferCheckScope::kMaximumSize); __ it(pl); __ lsr(pl, o_l, high, temp); @@ -3220,9 +3430,10 @@ void InstructionCodeGeneratorARMVIXL::VisitNewInstance(HNewInstance* instruction MemberOffset code_offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize); GetAssembler()->LoadFromOffset(kLoadWord, temp, tr, QUICK_ENTRY_POINT(pNewEmptyString)); GetAssembler()->LoadFromOffset(kLoadWord, lr, temp, code_offset.Int32Value()); + // blx in T32 has only 16bit encoding that's why a stricter check for the scope is used. AssemblerAccurateScope aas(GetVIXLAssembler(), - kArmInstrMaxSizeInBytes, - CodeBufferCheckScope::kMaximumSize); + vixl32::k16BitT32InstructionSizeInBytes, + CodeBufferCheckScope::kExactSize); __ blx(lr); codegen_->RecordPcInfo(instruction, instruction->GetDexPc()); } else { @@ -3462,10 +3673,16 @@ void InstructionCodeGeneratorARMVIXL::GenerateWideAtomicStore(vixl32::Register a addr = temp; } __ Bind(&fail); - // We need a load followed by store. (The address used in a STREX instruction must - // be the same as the address in the most recently executed LDREX instruction.) - __ Ldrexd(temp1, temp2, MemOperand(addr)); - codegen_->MaybeRecordImplicitNullCheck(instruction); + { + // Ensure the pc position is recorded immediately after the `ldrexd` instruction. + AssemblerAccurateScope aas(GetVIXLAssembler(), + vixl32::kMaxInstructionSizeInBytes, + CodeBufferCheckScope::kMaximumSize); + // We need a load followed by store. (The address used in a STREX instruction must + // be the same as the address in the most recently executed LDREX instruction.) + __ ldrexd(temp1, temp2, MemOperand(addr)); + codegen_->MaybeRecordImplicitNullCheck(instruction); + } __ Strexd(temp1, value_lo, value_hi, MemOperand(addr)); __ CompareAndBranchIfNonZero(temp1, &fail); } @@ -3614,6 +3831,11 @@ void InstructionCodeGeneratorARMVIXL::HandleFieldSet(HInstruction* instruction, // Longs and doubles are handled in the switch. if (field_type != Primitive::kPrimLong && field_type != Primitive::kPrimDouble) { + // TODO(VIXL): Here and for other calls to `MaybeRecordImplicitNullCheck` in this method, we + // should use a scope and the assembler to emit the store instruction to guarantee that we + // record the pc at the correct position. But the `Assembler` does not automatically handle + // unencodable offsets. Practically, everything is fine because the helper and VIXL, at the time + // of writing, do generate the store instruction last. codegen_->MaybeRecordImplicitNullCheck(instruction); } @@ -3788,7 +4010,6 @@ void InstructionCodeGeneratorARMVIXL::HandleFieldGet(HInstruction* instruction, TODO_VIXL32(FATAL); } else { GetAssembler()->LoadFromOffset(kLoadWord, RegisterFrom(out), base, offset); - // TODO(VIXL): Scope to guarantee the position immediately after the load. codegen_->MaybeRecordImplicitNullCheck(instruction); if (is_volatile) { codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); @@ -3825,7 +4046,6 @@ void InstructionCodeGeneratorARMVIXL::HandleFieldGet(HInstruction* instruction, __ Vmov(out_dreg, lo, hi); } else { GetAssembler()->LoadDFromOffset(out_dreg, base, offset); - // TODO(VIXL): Scope to guarantee the position immediately after the load. codegen_->MaybeRecordImplicitNullCheck(instruction); } break; @@ -3841,6 +4061,11 @@ void InstructionCodeGeneratorARMVIXL::HandleFieldGet(HInstruction* instruction, // double fields, are handled in the previous switch statement. } else { // Address cases other than reference and double that may require an implicit null check. + // TODO(VIXL): Here and for other calls to `MaybeRecordImplicitNullCheck` in this method, we + // should use a scope and the assembler to emit the load instruction to guarantee that we + // record the pc at the correct position. But the `Assembler` does not automatically handle + // unencodable offsets. Practically, everything is fine because the helper and VIXL, at the time + // of writing, do generate the store instruction last. codegen_->MaybeRecordImplicitNullCheck(instruction); } @@ -3965,8 +4190,9 @@ void CodeGeneratorARMVIXL::GenerateImplicitNullCheck(HNullCheck* instruction) { } UseScratchRegisterScope temps(GetVIXLAssembler()); + // Ensure the pc position is recorded immediately after the `ldr` instruction. AssemblerAccurateScope aas(GetVIXLAssembler(), - kArmInstrMaxSizeInBytes, + vixl32::kMaxInstructionSizeInBytes, CodeBufferCheckScope::kMaximumSize); __ ldr(temps.Acquire(), MemOperand(InputRegisterAt(instruction, 0))); RecordPcInfo(instruction, instruction->GetDexPc()); @@ -4233,6 +4459,11 @@ void InstructionCodeGeneratorARMVIXL::VisitArrayGet(HArrayGet* instruction) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; GetAssembler()->LoadFromOffset(kLoadWord, out, obj, offset); + // TODO(VIXL): Here and for other calls to `MaybeRecordImplicitNullCheck` in this method, + // we should use a scope and the assembler to emit the load instruction to guarantee that + // we record the pc at the correct position. But the `Assembler` does not automatically + // handle unencodable offsets. Practically, everything is fine because the helper and + // VIXL, at the time of writing, do generate the store instruction last. codegen_->MaybeRecordImplicitNullCheck(instruction); // If read barriers are enabled, emit read barriers other than // Baker's using a slow path (and also unpoison the loaded @@ -4255,7 +4486,9 @@ void InstructionCodeGeneratorARMVIXL::VisitArrayGet(HArrayGet* instruction) { } codegen_->LoadFromShiftedRegOffset(type, out_loc, temp, RegisterFrom(index)); temps.Release(temp); - + // TODO(VIXL): Use a scope to ensure that we record the pc position immediately after the + // load instruction. Practically, everything is fine because the helper and VIXL, at the + // time of writing, do generate the store instruction last. codegen_->MaybeRecordImplicitNullCheck(instruction); // If read barriers are enabled, emit read barriers other than // Baker's using a slow path (and also unpoison the loaded @@ -4317,6 +4550,8 @@ void InstructionCodeGeneratorARMVIXL::VisitArrayGet(HArrayGet* instruction) { // Potential implicit null checks, in the case of reference // arrays, are handled in the previous switch statement. } else if (!maybe_compressed_char_at) { + // TODO(VIXL): Use a scope to ensure we record the pc info immediately after + // the preceding load instruction. codegen_->MaybeRecordImplicitNullCheck(instruction); } } @@ -4417,6 +4652,8 @@ void InstructionCodeGeneratorARMVIXL::VisitArraySet(HArraySet* instruction) { codegen_->StoreToShiftedRegOffset(value_type, value_loc, temp, RegisterFrom(index)); temps.Release(temp); } + // TODO(VIXL): Use a scope to ensure we record the pc info immediately after the preceding + // store instruction. codegen_->MaybeRecordImplicitNullCheck(instruction); DCHECK(!needs_write_barrier); DCHECK(!may_need_runtime_call_for_type_check); @@ -4451,6 +4688,8 @@ void InstructionCodeGeneratorARMVIXL::VisitArraySet(HArraySet* instruction) { codegen_->StoreToShiftedRegOffset(value_type, value_loc, temp, RegisterFrom(index)); temps.Release(temp); } + // TODO(VIXL): Use a scope to ensure we record the pc info immediately after the preceding + // store instruction. codegen_->MaybeRecordImplicitNullCheck(instruction); __ B(&done); __ Bind(&non_zero); @@ -4464,9 +4703,15 @@ void InstructionCodeGeneratorARMVIXL::VisitArraySet(HArraySet* instruction) { // negative, in which case we would take the ArraySet slow // path. - // /* HeapReference<Class> */ temp1 = array->klass_ - GetAssembler()->LoadFromOffset(kLoadWord, temp1, array, class_offset); - codegen_->MaybeRecordImplicitNullCheck(instruction); + { + // Ensure we record the pc position immediately after the `ldr` instruction. + AssemblerAccurateScope aas(GetVIXLAssembler(), + vixl32::kMaxInstructionSizeInBytes, + CodeBufferCheckScope::kMaximumSize); + // /* HeapReference<Class> */ temp1 = array->klass_ + __ ldr(temp1, MemOperand(array, class_offset)); + codegen_->MaybeRecordImplicitNullCheck(instruction); + } GetAssembler()->MaybeUnpoisonHeapReference(temp1); // /* HeapReference<Class> */ temp1 = temp1->component_type_ @@ -4523,6 +4768,8 @@ void InstructionCodeGeneratorARMVIXL::VisitArraySet(HArraySet* instruction) { } if (!may_need_runtime_call_for_type_check) { + // TODO(VIXL): Ensure we record the pc position immediately after the preceding store + // instruction. codegen_->MaybeRecordImplicitNullCheck(instruction); } @@ -4591,6 +4838,8 @@ void InstructionCodeGeneratorARMVIXL::VisitArraySet(HArraySet* instruction) { // Objects are handled in the switch. if (value_type != Primitive::kPrimNot) { + // TODO(VIXL): Ensure we record the pc position immediately after the preceding store + // instruction. codegen_->MaybeRecordImplicitNullCheck(instruction); } } @@ -4606,8 +4855,13 @@ void InstructionCodeGeneratorARMVIXL::VisitArrayLength(HArrayLength* instruction uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction); vixl32::Register obj = InputRegisterAt(instruction, 0); vixl32::Register out = OutputRegister(instruction); - GetAssembler()->LoadFromOffset(kLoadWord, out, obj, offset); - codegen_->MaybeRecordImplicitNullCheck(instruction); + { + AssemblerAccurateScope aas(GetVIXLAssembler(), + vixl32::kMaxInstructionSizeInBytes, + CodeBufferCheckScope::kMaximumSize); + __ ldr(out, MemOperand(obj, offset)); + codegen_->MaybeRecordImplicitNullCheck(instruction); + } // Mask out compression flag from String's array length. if (mirror::kUseStringCompression && instruction->IsStringLength()) { __ Lsr(out, out, 1u); @@ -4985,12 +5239,37 @@ void ParallelMoveResolverARMVIXL::RestoreScratch(int reg ATTRIBUTE_UNUSED) { TODO_VIXL32(FATAL); } -// Check if the desired_class_load_kind is supported. If it is, return it, -// otherwise return a fall-back kind that should be used instead. HLoadClass::LoadKind CodeGeneratorARMVIXL::GetSupportedLoadClassKind( - HLoadClass::LoadKind desired_class_load_kind ATTRIBUTE_UNUSED) { - // TODO(VIXL): Implement optimized code paths. - return HLoadClass::LoadKind::kDexCacheViaMethod; + HLoadClass::LoadKind desired_class_load_kind) { + switch (desired_class_load_kind) { + case HLoadClass::LoadKind::kReferrersClass: + break; + case HLoadClass::LoadKind::kBootImageLinkTimeAddress: + // TODO(VIXL): Enable it back when literal pools are fixed in VIXL. + return HLoadClass::LoadKind::kDexCacheViaMethod; + case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: + DCHECK(GetCompilerOptions().GetCompilePic()); + break; + case HLoadClass::LoadKind::kBootImageAddress: + // TODO(VIXL): Enable it back when literal pools are fixed in VIXL. + return HLoadClass::LoadKind::kDexCacheViaMethod; + case HLoadClass::LoadKind::kDexCacheAddress: + // TODO(VIXL): Enable it back when literal pools are fixed in VIXL. + return HLoadClass::LoadKind::kDexCacheViaMethod; + case HLoadClass::LoadKind::kDexCachePcRelative: + DCHECK(!Runtime::Current()->UseJitCompilation()); + // We disable pc-relative load when there is an irreducible loop, as the optimization + // is incompatible with it. + // TODO: Create as many ArmDexCacheArraysBase instructions as needed for methods + // with irreducible loops. + if (GetGraph()->HasIrreducibleLoops()) { + return HLoadClass::LoadKind::kDexCacheViaMethod; + } + break; + case HLoadClass::LoadKind::kDexCacheViaMethod: + break; + } + return desired_class_load_kind; } void LocationsBuilderARMVIXL::VisitLoadClass(HLoadClass* cls) { @@ -5004,11 +5283,15 @@ void LocationsBuilderARMVIXL::VisitLoadClass(HLoadClass* cls) { return; } - // TODO(VIXL): read barrier code. - LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || kEmitCompilerReadBarrier) + const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage(); + LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier) ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cls, call_kind); + if (kUseBakerReadBarrier && requires_read_barrier && !cls->NeedsEnvironment()) { + TODO_VIXL32(FATAL); + } + HLoadClass::LoadKind load_kind = cls->GetLoadKind(); if (load_kind == HLoadClass::LoadKind::kReferrersClass || load_kind == HLoadClass::LoadKind::kDexCacheViaMethod || @@ -5030,7 +5313,9 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadClass(HLoadClass* cls) { Location out_loc = locations->Out(); vixl32::Register out = OutputRegister(cls); - // TODO(VIXL): read barrier code. + const ReadBarrierOption read_barrier_option = cls->IsInBootImage() + ? kWithoutReadBarrier + : kCompilerReadBarrierOption; bool generate_null_check = false; switch (cls->GetLoadKind()) { case HLoadClass::LoadKind::kReferrersClass: { @@ -5042,7 +5327,35 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadClass(HLoadClass* cls) { out_loc, current_method, ArtMethod::DeclaringClassOffset().Int32Value(), - kEmitCompilerReadBarrier); + read_barrier_option); + break; + } + case HLoadClass::LoadKind::kBootImageLinkTimeAddress: { + TODO_VIXL32(FATAL); + break; + } + case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: { + DCHECK_EQ(read_barrier_option, kWithoutReadBarrier); + CodeGeneratorARMVIXL::PcRelativePatchInfo* labels = + codegen_->NewPcRelativeTypePatch(cls->GetDexFile(), cls->GetTypeIndex()); + codegen_->EmitMovwMovtPlaceholder(labels, out); + break; + } + case HLoadClass::LoadKind::kBootImageAddress: { + TODO_VIXL32(FATAL); + break; + } + case HLoadClass::LoadKind::kDexCacheAddress: { + TODO_VIXL32(FATAL); + break; + } + case HLoadClass::LoadKind::kDexCachePcRelative: { + vixl32::Register base_reg = InputRegisterAt(cls, 0); + HArmDexCacheArraysBase* base = cls->InputAt(0)->AsArmDexCacheArraysBase(); + int32_t offset = cls->GetDexCacheElementOffset() - base->GetElementOffset(); + // /* GcRoot<mirror::Class> */ out = *(dex_cache_arrays_base + offset) + GenerateGcRootFieldLoad(cls, out_loc, base_reg, offset, read_barrier_option); + generate_null_check = !cls->IsInDexCache(); break; } case HLoadClass::LoadKind::kDexCacheViaMethod: { @@ -5054,7 +5367,7 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadClass(HLoadClass* cls) { GetAssembler()->LoadFromOffset(kLoadWord, out, current_method, resolved_types_offset); // /* GcRoot<mirror::Class> */ out = out[type_index] size_t offset = CodeGenerator::GetCacheOffset(cls->GetTypeIndex().index_); - GenerateGcRootFieldLoad(cls, out_loc, out, offset, kEmitCompilerReadBarrier); + GenerateGcRootFieldLoad(cls, out_loc, out, offset, read_barrier_option); generate_null_check = !cls->IsInDexCache(); break; } @@ -5114,37 +5427,101 @@ void InstructionCodeGeneratorARMVIXL::GenerateClassInitializationCheck( __ Bind(slow_path->GetExitLabel()); } -// Check if the desired_string_load_kind is supported. If it is, return it, -// otherwise return a fall-back kind that should be used instead. HLoadString::LoadKind CodeGeneratorARMVIXL::GetSupportedLoadStringKind( - HLoadString::LoadKind desired_string_load_kind ATTRIBUTE_UNUSED) { - // TODO(VIXL): Implement optimized code paths. For now we always use the simpler fallback code. - return HLoadString::LoadKind::kDexCacheViaMethod; + HLoadString::LoadKind desired_string_load_kind) { + switch (desired_string_load_kind) { + case HLoadString::LoadKind::kBootImageLinkTimeAddress: + // TODO(VIXL): Implement missing optimization. + return HLoadString::LoadKind::kDexCacheViaMethod; + case HLoadString::LoadKind::kBootImageLinkTimePcRelative: + DCHECK(GetCompilerOptions().GetCompilePic()); + break; + case HLoadString::LoadKind::kBootImageAddress: + // TODO(VIXL): Implement missing optimization. + return HLoadString::LoadKind::kDexCacheViaMethod; + case HLoadString::LoadKind::kBssEntry: + DCHECK(!Runtime::Current()->UseJitCompilation()); + break; + case HLoadString::LoadKind::kJitTableAddress: + DCHECK(Runtime::Current()->UseJitCompilation()); + // TODO(VIXL): Implement missing optimization. + return HLoadString::LoadKind::kDexCacheViaMethod; + case HLoadString::LoadKind::kDexCacheViaMethod: + break; + } + return desired_string_load_kind; } void LocationsBuilderARMVIXL::VisitLoadString(HLoadString* load) { - LocationSummary::CallKind call_kind = load->NeedsEnvironment() - ? LocationSummary::kCallOnMainOnly - : LocationSummary::kNoCall; + LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load); LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind); - - // TODO(VIXL): Implement optimized code paths. - // See InstructionCodeGeneratorARMVIXL::VisitLoadString. HLoadString::LoadKind load_kind = load->GetLoadKind(); if (load_kind == HLoadString::LoadKind::kDexCacheViaMethod) { - locations->SetInAt(0, Location::RequiresRegister()); - // TODO(VIXL): Use InvokeRuntimeCallingConventionARMVIXL instead. locations->SetOut(LocationFrom(r0)); } else { locations->SetOut(Location::RequiresRegister()); + if (load_kind == HLoadString::LoadKind::kBssEntry) { + if (!kUseReadBarrier || kUseBakerReadBarrier) { + // Rely on the pResolveString and/or marking to save everything, including temps. + // Note that IP may theoretically be clobbered by saving/restoring the live register + // (only one thanks to the custom calling convention), so we request a different temp. + locations->AddTemp(Location::RequiresRegister()); + RegisterSet caller_saves = RegisterSet::Empty(); + InvokeRuntimeCallingConventionARMVIXL calling_convention; + caller_saves.Add(LocationFrom(calling_convention.GetRegisterAt(0))); + // TODO: Add GetReturnLocation() to the calling convention so that we can DCHECK() + // that the the kPrimNot result register is the same as the first argument register. + locations->SetCustomSlowPathCallerSaves(caller_saves); + } else { + // For non-Baker read barrier we have a temp-clobbering call. + } + } } } void InstructionCodeGeneratorARMVIXL::VisitLoadString(HLoadString* load) { - // TODO(VIXL): Implement optimized code paths. - // We implemented the simplest solution to get first ART tests passing, we deferred the - // optimized path until later, we should implement it using ARM64 implementation as a - // reference. The same related to LocationsBuilderARMVIXL::VisitLoadString. + LocationSummary* locations = load->GetLocations(); + Location out_loc = locations->Out(); + vixl32::Register out = OutputRegister(load); + HLoadString::LoadKind load_kind = load->GetLoadKind(); + + switch (load_kind) { + case HLoadString::LoadKind::kBootImageLinkTimeAddress: { + TODO_VIXL32(FATAL); + break; + } + case HLoadString::LoadKind::kBootImageLinkTimePcRelative: { + DCHECK(codegen_->GetCompilerOptions().IsBootImage()); + CodeGeneratorARMVIXL::PcRelativePatchInfo* labels = + codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex().index_); + codegen_->EmitMovwMovtPlaceholder(labels, out); + return; // No dex cache slow path. + } + case HLoadString::LoadKind::kBootImageAddress: { + TODO_VIXL32(FATAL); + break; + } + case HLoadString::LoadKind::kBssEntry: { + DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); + vixl32::Register temp = RegisterFrom(locations->GetTemp(0)); + CodeGeneratorARMVIXL::PcRelativePatchInfo* labels = + codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex().index_); + codegen_->EmitMovwMovtPlaceholder(labels, temp); + GenerateGcRootFieldLoad(load, out_loc, temp, /* offset */ 0, kCompilerReadBarrierOption); + LoadStringSlowPathARMVIXL* slow_path = + new (GetGraph()->GetArena()) LoadStringSlowPathARMVIXL(load); + codegen_->AddSlowPath(slow_path); + __ CompareAndBranchIfZero(out, slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); + return; + } + case HLoadString::LoadKind::kJitTableAddress: { + TODO_VIXL32(FATAL); + break; + } + default: + break; + } // TODO: Re-add the compiler code to do string dex cache lookup again. DCHECK_EQ(load->GetLoadKind(), HLoadString::LoadKind::kDexCacheViaMethod); @@ -5999,9 +6376,9 @@ void InstructionCodeGeneratorARMVIXL::GenerateGcRootFieldLoad( Location root, vixl32::Register obj, uint32_t offset, - bool requires_read_barrier) { + ReadBarrierOption read_barrier_option) { vixl32::Register root_reg = RegisterFrom(root); - if (requires_read_barrier) { + if (read_barrier_option == kWithReadBarrier) { TODO_VIXL32(FATAL); } else { // Plain GC root load with no read barrier. @@ -6062,15 +6439,51 @@ void CodeGeneratorARMVIXL::MaybeGenerateReadBarrierSlow(HInstruction* instructio // Check if the desired_dispatch_info is supported. If it is, return it, // otherwise return a fall-back info that should be used instead. HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARMVIXL::GetSupportedInvokeStaticOrDirectDispatch( - const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info ATTRIBUTE_UNUSED, - HInvokeStaticOrDirect* invoke ATTRIBUTE_UNUSED) { + const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, + HInvokeStaticOrDirect* invoke) { // TODO(VIXL): Implement optimized code paths. - return { - HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod, - HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod, - 0u, - 0u - }; + if (desired_dispatch_info.method_load_kind == + HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup || + desired_dispatch_info.code_ptr_location == + HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup) { + return { + HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod, + HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod, + 0u, + 0u + }; + } + + HInvokeStaticOrDirect::DispatchInfo dispatch_info = desired_dispatch_info; + // We disable pc-relative load when there is an irreducible loop, as the optimization + // is incompatible with it. + // TODO: Create as many ArmDexCacheArraysBase instructions as needed for methods + // with irreducible loops. + if (GetGraph()->HasIrreducibleLoops() && + (dispatch_info.method_load_kind == + HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative)) { + dispatch_info.method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod; + } + + if (dispatch_info.code_ptr_location == HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative) { + const DexFile& outer_dex_file = GetGraph()->GetDexFile(); + if (&outer_dex_file != invoke->GetTargetMethod().dex_file) { + // Calls across dex files are more likely to exceed the available BL range, + // so use absolute patch with fixup if available and kCallArtMethod otherwise. + HInvokeStaticOrDirect::CodePtrLocation code_ptr_location = + (desired_dispatch_info.method_load_kind == + HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup) + ? HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup + : HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod; + return HInvokeStaticOrDirect::DispatchInfo { + dispatch_info.method_load_kind, + code_ptr_location, + dispatch_info.method_load_data, + 0u + }; + } + } + return dispatch_info; } vixl32::Register CodeGeneratorARMVIXL::GetInvokeStaticOrDirectExtraParameter( @@ -6101,59 +6514,119 @@ vixl32::Register CodeGeneratorARMVIXL::GetInvokeStaticOrDirectExtraParameter( void CodeGeneratorARMVIXL::GenerateStaticOrDirectCall( HInvokeStaticOrDirect* invoke, Location temp) { - Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp. - vixl32::Register temp_reg = RegisterFrom(temp); + // For better instruction scheduling we load the direct code pointer before the method pointer. + switch (invoke->GetCodePtrLocation()) { + case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup: + // LR = code address from literal pool with link-time patch. + TODO_VIXL32(FATAL); + break; + case HInvokeStaticOrDirect::CodePtrLocation::kCallDirect: + // LR = invoke->GetDirectCodePtr(); + __ Mov(lr, Operand::From(invoke->GetDirectCodePtr())); + break; + default: + break; + } + Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp. switch (invoke->GetMethodLoadKind()) { case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: { uint32_t offset = GetThreadOffset<kArmPointerSize>(invoke->GetStringInitEntryPoint()).Int32Value(); // temp = thread->string_init_entrypoint - GetAssembler()->LoadFromOffset(kLoadWord, temp_reg, tr, offset); + GetAssembler()->LoadFromOffset(kLoadWord, RegisterFrom(temp), tr, offset); + break; + } + case HInvokeStaticOrDirect::MethodLoadKind::kRecursive: + callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); + break; + case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: + __ Mov(RegisterFrom(temp), Operand::From(invoke->GetMethodAddress())); + break; + case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup: + TODO_VIXL32(FATAL); + break; + case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: { + HArmDexCacheArraysBase* base = + invoke->InputAt(invoke->GetSpecialInputIndex())->AsArmDexCacheArraysBase(); + vixl32::Register base_reg = GetInvokeStaticOrDirectExtraParameter(invoke, RegisterFrom(temp)); + int32_t offset = invoke->GetDexCacheArrayOffset() - base->GetElementOffset(); + GetAssembler()->LoadFromOffset(kLoadWord, RegisterFrom(temp), base_reg, offset); break; } case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: { Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); vixl32::Register method_reg; + vixl32::Register reg = RegisterFrom(temp); if (current_method.IsRegister()) { method_reg = RegisterFrom(current_method); } else { DCHECK(invoke->GetLocations()->Intrinsified()); DCHECK(!current_method.IsValid()); - method_reg = temp_reg; - GetAssembler()->LoadFromOffset(kLoadWord, temp_reg, sp, kCurrentMethodStackOffset); + method_reg = reg; + GetAssembler()->LoadFromOffset(kLoadWord, reg, sp, kCurrentMethodStackOffset); } // /* ArtMethod*[] */ temp = temp.ptr_sized_fields_->dex_cache_resolved_methods_; GetAssembler()->LoadFromOffset( kLoadWord, - temp_reg, + reg, method_reg, ArtMethod::DexCacheResolvedMethodsOffset(kArmPointerSize).Int32Value()); // temp = temp[index_in_cache]; // Note: Don't use invoke->GetTargetMethod() as it may point to a different dex file. uint32_t index_in_cache = invoke->GetDexMethodIndex(); GetAssembler()->LoadFromOffset( - kLoadWord, temp_reg, temp_reg, CodeGenerator::GetCachePointerOffset(index_in_cache)); + kLoadWord, reg, reg, CodeGenerator::GetCachePointerOffset(index_in_cache)); break; } - default: - TODO_VIXL32(FATAL); } - // TODO(VIXL): Support `CodePtrLocation` values other than `kCallArtMethod`. - if (invoke->GetCodePtrLocation() != HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod) { - TODO_VIXL32(FATAL); + switch (invoke->GetCodePtrLocation()) { + case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf: + __ Bl(GetFrameEntryLabel()); + break; + case HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative: + relative_call_patches_.emplace_back(*invoke->GetTargetMethod().dex_file, + invoke->GetTargetMethod().dex_method_index); + { + AssemblerAccurateScope aas(GetVIXLAssembler(), + vixl32::kMaxInstructionSizeInBytes, + CodeBufferCheckScope::kMaximumSize); + __ bind(&relative_call_patches_.back().label); + // Arbitrarily branch to the BL itself, override at link time. + __ bl(&relative_call_patches_.back().label); + } + break; + case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup: + case HInvokeStaticOrDirect::CodePtrLocation::kCallDirect: + // LR prepared above for better instruction scheduling. + // LR() + { + // blx in T32 has only 16bit encoding that's why a stricter check for the scope is used. + AssemblerAccurateScope aas(GetVIXLAssembler(), + vixl32::k16BitT32InstructionSizeInBytes, + CodeBufferCheckScope::kExactSize); + __ blx(lr); + } + break; + case HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod: + // LR = callee_method->entry_point_from_quick_compiled_code_ + GetAssembler()->LoadFromOffset( + kLoadWord, + lr, + RegisterFrom(callee_method), + ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize).Int32Value()); + { + // blx in T32 has only 16bit encoding that's why a stricter check for the scope is used. + AssemblerAccurateScope aas(GetVIXLAssembler(), + vixl32::k16BitT32InstructionSizeInBytes, + CodeBufferCheckScope::kExactSize); + // LR() + __ blx(lr); + } + break; } - // LR = callee_method->entry_point_from_quick_compiled_code_ - GetAssembler()->LoadFromOffset( - kLoadWord, - lr, - RegisterFrom(callee_method), - ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize).Int32Value()); - // LR() - __ Blx(lr); - DCHECK(!IsLeafMethod()); } @@ -6169,9 +6642,15 @@ void CodeGeneratorARMVIXL::GenerateVirtualCall(HInvokeVirtual* invoke, Location InvokeDexCallingConventionARMVIXL calling_convention; vixl32::Register receiver = calling_convention.GetRegisterAt(0); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); - // /* HeapReference<Class> */ temp = receiver->klass_ - GetAssembler()->LoadFromOffset(kLoadWord, temp, receiver, class_offset); - MaybeRecordImplicitNullCheck(invoke); + { + // Make sure the pc is recorded immediately after the `ldr` instruction. + AssemblerAccurateScope aas(GetVIXLAssembler(), + vixl32::kMaxInstructionSizeInBytes, + CodeBufferCheckScope::kMaximumSize); + // /* HeapReference<Class> */ temp = receiver->klass_ + __ ldr(temp, MemOperand(receiver, class_offset)); + MaybeRecordImplicitNullCheck(invoke); + } // Instead of simply (possibly) unpoisoning `temp` here, we should // emit a read barrier for the previous class reference load. // However this is not required in practice, as this is an @@ -6188,7 +6667,81 @@ void CodeGeneratorARMVIXL::GenerateVirtualCall(HInvokeVirtual* invoke, Location // LR = temp->GetEntryPoint(); GetAssembler()->LoadFromOffset(kLoadWord, lr, temp, entry_point); // LR(); - __ Blx(lr); + // This `blx` *must* be the *last* instruction generated by this stub, so that calls to + // `RecordPcInfo()` immediately following record the correct pc. Use a scope to help guarantee + // that. + // blx in T32 has only 16bit encoding that's why a stricter check for the scope is used. + AssemblerAccurateScope aas(GetVIXLAssembler(), + vixl32::k16BitT32InstructionSizeInBytes, + CodeBufferCheckScope::kExactSize); + __ blx(lr); +} + +CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewPcRelativeStringPatch( + const DexFile& dex_file, uint32_t string_index) { + return NewPcRelativePatch(dex_file, string_index, &pc_relative_string_patches_); +} + +CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewPcRelativeTypePatch( + const DexFile& dex_file, dex::TypeIndex type_index) { + return NewPcRelativePatch(dex_file, type_index.index_, &pc_relative_type_patches_); +} + +CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewPcRelativeDexCacheArrayPatch( + const DexFile& dex_file, uint32_t element_offset) { + return NewPcRelativePatch(dex_file, element_offset, &pc_relative_dex_cache_patches_); +} + +CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewPcRelativePatch( + const DexFile& dex_file, uint32_t offset_or_index, ArenaDeque<PcRelativePatchInfo>* patches) { + patches->emplace_back(dex_file, offset_or_index); + return &patches->back(); +} + +template <LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)> +inline void CodeGeneratorARMVIXL::EmitPcRelativeLinkerPatches( + const ArenaDeque<PcRelativePatchInfo>& infos, + ArenaVector<LinkerPatch>* linker_patches) { + for (const PcRelativePatchInfo& info : infos) { + const DexFile& dex_file = info.target_dex_file; + size_t offset_or_index = info.offset_or_index; + DCHECK(info.add_pc_label.IsBound()); + uint32_t add_pc_offset = dchecked_integral_cast<uint32_t>(info.add_pc_label.GetLocation()); + // Add MOVW patch. + DCHECK(info.movw_label.IsBound()); + uint32_t movw_offset = dchecked_integral_cast<uint32_t>(info.movw_label.GetLocation()); + linker_patches->push_back(Factory(movw_offset, &dex_file, add_pc_offset, offset_or_index)); + // Add MOVT patch. + DCHECK(info.movt_label.IsBound()); + uint32_t movt_offset = dchecked_integral_cast<uint32_t>(info.movt_label.GetLocation()); + linker_patches->push_back(Factory(movt_offset, &dex_file, add_pc_offset, offset_or_index)); + } +} + +void CodeGeneratorARMVIXL::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) { + DCHECK(linker_patches->empty()); + size_t size = + relative_call_patches_.size() + + /* MOVW+MOVT for each entry */ 2u * pc_relative_dex_cache_patches_.size() + + /* MOVW+MOVT for each entry */ 2u * pc_relative_string_patches_.size() + + /* MOVW+MOVT for each entry */ 2u * pc_relative_type_patches_.size(); + linker_patches->reserve(size); + for (const PatchInfo<vixl32::Label>& info : relative_call_patches_) { + uint32_t literal_offset = info.label.GetLocation(); + linker_patches->push_back( + LinkerPatch::RelativeCodePatch(literal_offset, &info.dex_file, info.index)); + } + EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_, + linker_patches); + if (!GetCompilerOptions().IsBootImage()) { + EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_, + linker_patches); + } else { + EmitPcRelativeLinkerPatches<LinkerPatch::RelativeStringPatch>(pc_relative_string_patches_, + linker_patches); + } + EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(pc_relative_type_patches_, + linker_patches); } void LocationsBuilderARMVIXL::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) { @@ -6315,6 +6868,17 @@ void InstructionCodeGeneratorARMVIXL::VisitPackedSwitch(HPackedSwitch* switch_in jump_table->EmitTable(codegen_); } } +void LocationsBuilderARMVIXL::VisitArmDexCacheArraysBase(HArmDexCacheArraysBase* base) { + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(base); + locations->SetOut(Location::RequiresRegister()); +} + +void InstructionCodeGeneratorARMVIXL::VisitArmDexCacheArraysBase(HArmDexCacheArraysBase* base) { + vixl32::Register base_reg = OutputRegister(base); + CodeGeneratorARMVIXL::PcRelativePatchInfo* labels = + codegen_->NewPcRelativeDexCacheArrayPatch(base->GetDexFile(), base->GetElementOffset()); + codegen_->EmitMovwMovtPlaceholder(labels, base_reg); +} // Copy the result of a call into the given target. void CodeGeneratorARMVIXL::MoveFromReturnRegister(Location trg, Primitive::Type type) { @@ -6325,7 +6889,7 @@ void CodeGeneratorARMVIXL::MoveFromReturnRegister(Location trg, Primitive::Type DCHECK_NE(type, Primitive::kPrimVoid); - Location return_loc = InvokeDexCallingConventionVisitorARM().GetReturnLocation(type); + Location return_loc = InvokeDexCallingConventionVisitorARMVIXL().GetReturnLocation(type); if (return_loc.Equals(trg)) { return; } @@ -6373,6 +6937,21 @@ void InstructionCodeGeneratorARMVIXL::VisitClassTableGet(HClassTableGet* instruc } } +void CodeGeneratorARMVIXL::EmitMovwMovtPlaceholder( + CodeGeneratorARMVIXL::PcRelativePatchInfo* labels, + vixl32::Register out) { + AssemblerAccurateScope aas(GetVIXLAssembler(), + 3 * vixl32::kMaxInstructionSizeInBytes, + CodeBufferCheckScope::kMaximumSize); + // TODO(VIXL): Think about using mov instead of movw. + __ bind(&labels->movw_label); + __ movw(out, /* placeholder */ 0u); + __ bind(&labels->movt_label); + __ movt(out, /* placeholder */ 0u); + __ bind(&labels->add_pc_label); + __ add(out, out, pc); +} + #undef __ #undef QUICK_ENTRY_POINT #undef TODO_VIXL32 diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h index bd91127121..b7ba8ddf0d 100644 --- a/compiler/optimizing/code_generator_arm_vixl.h +++ b/compiler/optimizing/code_generator_arm_vixl.h @@ -17,9 +17,15 @@ #ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_ARM_VIXL_H_ #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_ARM_VIXL_H_ -#include "code_generator_arm.h" +#include "base/enums.h" +#include "code_generator.h" #include "common_arm.h" +#include "driver/compiler_options.h" +#include "nodes.h" +#include "string_reference.h" +#include "parallel_move_resolver.h" #include "utils/arm/assembler_arm_vixl.h" +#include "utils/type_reference.h" // TODO(VIXL): make vixl clean wrt -Wshadow. #pragma GCC diagnostic push @@ -44,7 +50,7 @@ static const vixl::aarch32::Register kParameterCoreRegistersVIXL[] = { vixl::aarch32::r2, vixl::aarch32::r3 }; -static const size_t kParameterCoreRegistersLengthVIXL = arraysize(kParameterCoreRegisters); +static const size_t kParameterCoreRegistersLengthVIXL = arraysize(kParameterCoreRegistersVIXL); static const vixl::aarch32::SRegister kParameterFpuRegistersVIXL[] = { vixl::aarch32::s0, vixl::aarch32::s1, @@ -63,7 +69,7 @@ static const vixl::aarch32::SRegister kParameterFpuRegistersVIXL[] = { vixl::aarch32::s14, vixl::aarch32::s15 }; -static const size_t kParameterFpuRegistersLengthVIXL = arraysize(kParameterFpuRegisters); +static const size_t kParameterFpuRegistersLengthVIXL = arraysize(kParameterFpuRegistersVIXL); static const vixl::aarch32::Register kMethodRegister = vixl::aarch32::r0; @@ -90,7 +96,7 @@ static const vixl::aarch32::Register kRuntimeParameterCoreRegistersVIXL[] = { vixl::aarch32::r3 }; static const size_t kRuntimeParameterCoreRegistersLengthVIXL = - arraysize(kRuntimeParameterCoreRegisters); + arraysize(kRuntimeParameterCoreRegistersVIXL); static const vixl::aarch32::SRegister kRuntimeParameterFpuRegistersVIXL[] = { vixl::aarch32::s0, vixl::aarch32::s1, @@ -98,98 +104,10 @@ static const vixl::aarch32::SRegister kRuntimeParameterFpuRegistersVIXL[] = { vixl::aarch32::s3 }; static const size_t kRuntimeParameterFpuRegistersLengthVIXL = - arraysize(kRuntimeParameterFpuRegisters); + arraysize(kRuntimeParameterFpuRegistersVIXL); class LoadClassSlowPathARMVIXL; -#define FOR_EACH_IMPLEMENTED_INSTRUCTION(M) \ - M(Above) \ - M(AboveOrEqual) \ - M(Add) \ - M(And) \ - M(ArrayGet) \ - M(ArrayLength) \ - M(ArraySet) \ - M(Below) \ - M(BelowOrEqual) \ - M(BitwiseNegatedRight) \ - M(BooleanNot) \ - M(BoundsCheck) \ - M(BoundType) \ - M(CheckCast) \ - M(ClassTableGet) \ - M(ClearException) \ - M(ClinitCheck) \ - M(Compare) \ - M(CurrentMethod) \ - M(Deoptimize) \ - M(Div) \ - M(DivZeroCheck) \ - M(DoubleConstant) \ - M(Equal) \ - M(Exit) \ - M(FloatConstant) \ - M(Goto) \ - M(GreaterThan) \ - M(GreaterThanOrEqual) \ - M(If) \ - M(InstanceFieldGet) \ - M(InstanceFieldSet) \ - M(InstanceOf) \ - M(IntConstant) \ - M(IntermediateAddress) \ - M(InvokeInterface) \ - M(InvokeStaticOrDirect) \ - M(InvokeUnresolved) \ - M(InvokeVirtual) \ - M(LessThan) \ - M(LessThanOrEqual) \ - M(LoadClass) \ - M(LoadException) \ - M(LoadString) \ - M(LongConstant) \ - M(MemoryBarrier) \ - M(MonitorOperation) \ - M(Mul) \ - M(MultiplyAccumulate) \ - M(NativeDebugInfo) \ - M(Neg) \ - M(NewArray) \ - M(NewInstance) \ - M(Not) \ - M(NotEqual) \ - M(NullCheck) \ - M(NullConstant) \ - M(Or) \ - M(PackedSwitch) \ - M(ParallelMove) \ - M(ParameterValue) \ - M(Phi) \ - M(Rem) \ - M(Return) \ - M(ReturnVoid) \ - M(Ror) \ - M(Select) \ - M(Shl) \ - M(Shr) \ - M(StaticFieldGet) \ - M(StaticFieldSet) \ - M(Sub) \ - M(SuspendCheck) \ - M(Throw) \ - M(TryBoundary) \ - M(TypeConversion) \ - M(UnresolvedInstanceFieldGet) \ - M(UnresolvedInstanceFieldSet) \ - M(UnresolvedStaticFieldGet) \ - M(UnresolvedStaticFieldSet) \ - M(UShr) \ - M(Xor) \ - -// TODO: Remove once the VIXL32 backend is implemented completely. -#define FOR_EACH_UNIMPLEMENTED_INSTRUCTION(M) \ - M(ArmDexCacheArraysBase) \ - class CodeGeneratorARMVIXL; class JumpTableARMVIXL : public DeletableArenaObject<kArenaAllocSwitchTable> { @@ -248,6 +166,22 @@ class InvokeDexCallingConventionARMVIXL DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionARMVIXL); }; +class InvokeDexCallingConventionVisitorARMVIXL : public InvokeDexCallingConventionVisitor { + public: + InvokeDexCallingConventionVisitorARMVIXL() {} + virtual ~InvokeDexCallingConventionVisitorARMVIXL() {} + + Location GetNextLocation(Primitive::Type type) OVERRIDE; + Location GetReturnLocation(Primitive::Type type) const OVERRIDE; + Location GetMethodLocation() const OVERRIDE; + + private: + InvokeDexCallingConventionARMVIXL calling_convention; + uint32_t double_index_ = 0; + + DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitorARMVIXL); +}; + class FieldAccessCallingConventionARMVIXL : public FieldAccessCallingConvention { public: FieldAccessCallingConventionARMVIXL() {} @@ -319,27 +253,26 @@ class ParallelMoveResolverARMVIXL : public ParallelMoveResolverWithSwap { DISALLOW_COPY_AND_ASSIGN(ParallelMoveResolverARMVIXL); }; -#define DEFINE_IMPLEMENTED_INSTRUCTION_VISITOR(Name) \ - void Visit##Name(H##Name*) OVERRIDE; - -#define DEFINE_UNIMPLEMENTED_INSTRUCTION_VISITOR(Name) \ - void Visit##Name(H##Name* instr) OVERRIDE { \ - VisitUnimplemementedInstruction(instr); } - class LocationsBuilderARMVIXL : public HGraphVisitor { public: LocationsBuilderARMVIXL(HGraph* graph, CodeGeneratorARMVIXL* codegen) : HGraphVisitor(graph), codegen_(codegen) {} - FOR_EACH_IMPLEMENTED_INSTRUCTION(DEFINE_IMPLEMENTED_INSTRUCTION_VISITOR) +#define DECLARE_VISIT_INSTRUCTION(name, super) \ + void Visit##name(H##name* instr) OVERRIDE; - FOR_EACH_UNIMPLEMENTED_INSTRUCTION(DEFINE_UNIMPLEMENTED_INSTRUCTION_VISITOR) + FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION) + FOR_EACH_CONCRETE_INSTRUCTION_ARM(DECLARE_VISIT_INSTRUCTION) + FOR_EACH_CONCRETE_INSTRUCTION_SHARED(DECLARE_VISIT_INSTRUCTION) - private: - void VisitUnimplemementedInstruction(HInstruction* instruction) { - LOG(FATAL) << "Unimplemented Instruction: " << instruction->DebugName(); +#undef DECLARE_VISIT_INSTRUCTION + + void VisitInstruction(HInstruction* instruction) OVERRIDE { + LOG(FATAL) << "Unreachable instruction " << instruction->DebugName() + << " (id " << instruction->GetId() << ")"; } + private: void HandleInvoke(HInvoke* invoke); void HandleBitwiseOperation(HBinaryOperation* operation, Opcode opcode); void HandleCondition(HCondition* condition); @@ -355,7 +288,7 @@ class LocationsBuilderARMVIXL : public HGraphVisitor { bool CanEncodeConstantAsImmediate(uint32_t value, Opcode opcode, SetCc set_cc = kCcDontCare); CodeGeneratorARMVIXL* const codegen_; - InvokeDexCallingConventionVisitorARM parameter_visitor_; + InvokeDexCallingConventionVisitorARMVIXL parameter_visitor_; DISALLOW_COPY_AND_ASSIGN(LocationsBuilderARMVIXL); }; @@ -364,25 +297,30 @@ class InstructionCodeGeneratorARMVIXL : public InstructionCodeGenerator { public: InstructionCodeGeneratorARMVIXL(HGraph* graph, CodeGeneratorARMVIXL* codegen); - FOR_EACH_IMPLEMENTED_INSTRUCTION(DEFINE_IMPLEMENTED_INSTRUCTION_VISITOR) +#define DECLARE_VISIT_INSTRUCTION(name, super) \ + void Visit##name(H##name* instr) OVERRIDE; - FOR_EACH_UNIMPLEMENTED_INSTRUCTION(DEFINE_UNIMPLEMENTED_INSTRUCTION_VISITOR) + FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION) + FOR_EACH_CONCRETE_INSTRUCTION_ARM(DECLARE_VISIT_INSTRUCTION) + FOR_EACH_CONCRETE_INSTRUCTION_SHARED(DECLARE_VISIT_INSTRUCTION) + +#undef DECLARE_VISIT_INSTRUCTION + + void VisitInstruction(HInstruction* instruction) OVERRIDE { + LOG(FATAL) << "Unreachable instruction " << instruction->DebugName() + << " (id " << instruction->GetId() << ")"; + } ArmVIXLAssembler* GetAssembler() const { return assembler_; } ArmVIXLMacroAssembler* GetVIXLAssembler() { return GetAssembler()->GetVIXLAssembler(); } private: - void VisitUnimplemementedInstruction(HInstruction* instruction) { - LOG(FATAL) << "Unimplemented Instruction: " << instruction->DebugName(); - } - // Generate code for the given suspend check. If not null, `successor` // is the block to branch to if the suspend check is not needed, and after // the suspend call. void GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor); void GenerateClassInitializationCheck(LoadClassSlowPathARMVIXL* slow_path, vixl32::Register class_reg); - void HandleGoto(HInstruction* got, HBasicBlock* successor); void GenerateAndConst(vixl::aarch32::Register out, vixl::aarch32::Register first, uint32_t value); void GenerateOrrConst(vixl::aarch32::Register out, vixl::aarch32::Register first, uint32_t value); void GenerateEorConst(vixl::aarch32::Register out, vixl::aarch32::Register first, uint32_t value); @@ -440,17 +378,16 @@ class InstructionCodeGeneratorARMVIXL : public InstructionCodeGenerator { uint32_t offset, Location maybe_temp, ReadBarrierOption read_barrier_option); - // Generate a GC root reference load: // // root <- *(obj + offset) // - // while honoring read barriers if `requires_read_barrier` is true. + // while honoring read barriers based on read_barrier_option. void GenerateGcRootFieldLoad(HInstruction* instruction, Location root, vixl::aarch32::Register obj, uint32_t offset, - bool requires_read_barrier); + ReadBarrierOption read_barrier_option); void GenerateTestAndBranch(HInstruction* instruction, size_t condition_input_index, vixl::aarch32::Label* true_target, @@ -470,6 +407,7 @@ class InstructionCodeGeneratorARMVIXL : public InstructionCodeGenerator { void DivRemByPowerOfTwo(HBinaryOperation* instruction); void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction); void GenerateDivRemConstantIntegral(HBinaryOperation* instruction); + void HandleGoto(HInstruction* got, HBasicBlock* successor); ArmVIXLAssembler* const assembler_; CodeGeneratorARMVIXL* const codegen_; @@ -483,62 +421,50 @@ class CodeGeneratorARMVIXL : public CodeGenerator { const ArmInstructionSetFeatures& isa_features, const CompilerOptions& compiler_options, OptimizingCompilerStats* stats = nullptr); - virtual ~CodeGeneratorARMVIXL() {} - void Initialize() OVERRIDE { - block_labels_.resize(GetGraph()->GetBlocks().size()); - } - void GenerateFrameEntry() OVERRIDE; void GenerateFrameExit() OVERRIDE; - void Bind(HBasicBlock* block) OVERRIDE; - - vixl::aarch32::Label* GetLabelOf(HBasicBlock* block) { - block = FirstNonEmptyBlock(block); - return &(block_labels_[block->GetBlockId()]); - } - void MoveConstant(Location destination, int32_t value) OVERRIDE; void MoveLocation(Location dst, Location src, Primitive::Type dst_type) OVERRIDE; void AddLocationAsTemp(Location location, LocationSummary* locations) OVERRIDE; + size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; + size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; + size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; + size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; + + size_t GetWordSize() const OVERRIDE { + return static_cast<size_t>(kArmPointerSize); + } + + size_t GetFloatingPointSpillSlotSize() const OVERRIDE { return vixl::aarch32::kRegSizeInBytes; } + + HGraphVisitor* GetLocationBuilder() OVERRIDE { return &location_builder_; } + + HGraphVisitor* GetInstructionVisitor() OVERRIDE { return &instruction_visitor_; } + ArmVIXLAssembler* GetAssembler() OVERRIDE { return &assembler_; } const ArmVIXLAssembler& GetAssembler() const OVERRIDE { return assembler_; } ArmVIXLMacroAssembler* GetVIXLAssembler() { return GetAssembler()->GetVIXLAssembler(); } - size_t GetWordSize() const OVERRIDE { return kArmWordSize; } - - size_t GetFloatingPointSpillSlotSize() const OVERRIDE { return vixl::aarch32::kRegSizeInBytes; } - uintptr_t GetAddressOf(HBasicBlock* block) OVERRIDE { vixl::aarch32::Label* block_entry_label = GetLabelOf(block); DCHECK(block_entry_label->IsBound()); return block_entry_label->GetLocation(); } - JumpTableARMVIXL* CreateJumpTable(HPackedSwitch* switch_instr) { - jump_tables_.emplace_back(new (GetGraph()->GetArena()) JumpTableARMVIXL(switch_instr)); - return jump_tables_.back().get(); - } - - HGraphVisitor* GetLocationBuilder() OVERRIDE { return &location_builder_; } - - HGraphVisitor* GetInstructionVisitor() OVERRIDE { return &instruction_visitor_; } - void FixJumpTables(); - void GenerateMemoryBarrier(MemBarrierKind kind); - void Finalize(CodeAllocator* allocator) OVERRIDE; void SetupBlockedRegisters() const OVERRIDE; void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE; void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE; + ParallelMoveResolver* GetMoveResolver() OVERRIDE { return &move_resolver_; } InstructionSet GetInstructionSet() const OVERRIDE { return InstructionSet::kThumb2; } - // Helper method to move a 32-bit value between two locations. void Move32(Location destination, Location source); @@ -553,31 +479,39 @@ class CodeGeneratorARMVIXL : public CodeGenerator { vixl::aarch32::Register reg_index, vixl::aarch32::Condition cond = vixl::aarch32::al); - const ArmInstructionSetFeatures& GetInstructionSetFeatures() const { return isa_features_; } + // Generate code to invoke a runtime entry point. + void InvokeRuntime(QuickEntrypointEnum entrypoint, + HInstruction* instruction, + uint32_t dex_pc, + SlowPathCode* slow_path = nullptr) OVERRIDE; - vixl::aarch32::Label* GetFrameEntryLabel() { return &frame_entry_label_; } + // Generate code to invoke a runtime entry point, but do not record + // PC-related information in a stack map. + void InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset, + HInstruction* instruction, + SlowPathCode* slow_path); - // Saves the register in the stack. Returns the size taken on stack. - size_t SaveCoreRegister(size_t stack_index ATTRIBUTE_UNUSED, - uint32_t reg_id ATTRIBUTE_UNUSED) OVERRIDE { - UNIMPLEMENTED(INFO) << "TODO: SaveCoreRegister"; - return 0; - } + // Emit a write barrier. + void MarkGCCard(vixl::aarch32::Register temp, + vixl::aarch32::Register card, + vixl::aarch32::Register object, + vixl::aarch32::Register value, + bool can_be_null); + + void GenerateMemoryBarrier(MemBarrierKind kind); - // Restores the register from the stack. Returns the size taken on stack. - size_t RestoreCoreRegister(size_t stack_index ATTRIBUTE_UNUSED, - uint32_t reg_id ATTRIBUTE_UNUSED) OVERRIDE { - UNIMPLEMENTED(INFO) << "TODO: RestoreCoreRegister"; - return 0; + vixl::aarch32::Label* GetLabelOf(HBasicBlock* block) { + block = FirstNonEmptyBlock(block); + return &(block_labels_[block->GetBlockId()]); } - size_t SaveFloatingPointRegister(size_t stack_index ATTRIBUTE_UNUSED, - uint32_t reg_id ATTRIBUTE_UNUSED) OVERRIDE { - UNIMPLEMENTED(INFO) << "TODO: SaveFloatingPointRegister"; - return 0; + void Initialize() OVERRIDE { + block_labels_.resize(GetGraph()->GetBlocks().size()); } - size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; + void Finalize(CodeAllocator* allocator) OVERRIDE; + + const ArmInstructionSetFeatures& GetInstructionSetFeatures() const { return isa_features_; } bool NeedsTwoRegisters(Primitive::Type type) const OVERRIDE { return type == Primitive::kPrimDouble || type == Primitive::kPrimLong; @@ -585,33 +519,54 @@ class CodeGeneratorARMVIXL : public CodeGenerator { void ComputeSpillMask() OVERRIDE; - void GenerateImplicitNullCheck(HNullCheck* null_check) OVERRIDE; - void GenerateExplicitNullCheck(HNullCheck* null_check) OVERRIDE; + vixl::aarch32::Label* GetFrameEntryLabel() { return &frame_entry_label_; } - ParallelMoveResolver* GetMoveResolver() OVERRIDE { - return &move_resolver_; - } + // Check if the desired_string_load_kind is supported. If it is, return it, + // otherwise return a fall-back kind that should be used instead. + HLoadString::LoadKind GetSupportedLoadStringKind( + HLoadString::LoadKind desired_string_load_kind) OVERRIDE; - // Generate code to invoke a runtime entry point. - void InvokeRuntime(QuickEntrypointEnum entrypoint, - HInstruction* instruction, - uint32_t dex_pc, - SlowPathCode* slow_path = nullptr) OVERRIDE; + // Check if the desired_class_load_kind is supported. If it is, return it, + // otherwise return a fall-back kind that should be used instead. + HLoadClass::LoadKind GetSupportedLoadClassKind( + HLoadClass::LoadKind desired_class_load_kind) OVERRIDE; - // Generate code to invoke a runtime entry point, but do not record - // PC-related information in a stack map. - void InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset, - HInstruction* instruction, - SlowPathCode* slow_path); + // Check if the desired_dispatch_info is supported. If it is, return it, + // otherwise return a fall-back info that should be used instead. + HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch( + const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, + HInvokeStaticOrDirect* invoke) OVERRIDE; + + void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) OVERRIDE; + void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE; - void GenerateInvokeRuntime(int32_t entry_point_offset); + void MoveFromReturnRegister(Location trg, Primitive::Type type) OVERRIDE; - // Emit a write barrier. - void MarkGCCard(vixl::aarch32::Register temp, - vixl::aarch32::Register card, - vixl::aarch32::Register object, - vixl::aarch32::Register value, - bool can_be_null); + // The PcRelativePatchInfo is used for PC-relative addressing of dex cache arrays + // and boot image strings/types. The only difference is the interpretation of the + // offset_or_index. The PC-relative address is loaded with three instructions, + // MOVW+MOVT to load the offset to base_reg and then ADD base_reg, PC. The offset + // is calculated from the ADD's effective PC, i.e. PC+4 on Thumb2. Though we + // currently emit these 3 instructions together, instruction scheduling could + // split this sequence apart, so we keep separate labels for each of them. + struct PcRelativePatchInfo { + PcRelativePatchInfo(const DexFile& dex_file, uint32_t off_or_idx) + : target_dex_file(dex_file), offset_or_index(off_or_idx) { } + PcRelativePatchInfo(PcRelativePatchInfo&& other) = default; + + const DexFile& target_dex_file; + // Either the dex cache array element offset or the string/type index. + uint32_t offset_or_index; + vixl::aarch32::Label movw_label; + vixl::aarch32::Label movt_label; + vixl::aarch32::Label add_pc_label; + }; + + PcRelativePatchInfo* NewPcRelativeStringPatch(const DexFile& dex_file, uint32_t string_index); + PcRelativePatchInfo* NewPcRelativeTypePatch(const DexFile& dex_file, dex::TypeIndex type_index); + PcRelativePatchInfo* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file, + uint32_t element_offset); + void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE; // Fast path implementation of ReadBarrier::Barrier for a heap // reference field load when Baker's read barriers are used. @@ -679,33 +634,35 @@ class CodeGeneratorARMVIXL : public CodeGenerator { uint32_t offset, Location index = Location::NoLocation()); - // Check if the desired_string_load_kind is supported. If it is, return it, - // otherwise return a fall-back kind that should be used instead. - HLoadString::LoadKind GetSupportedLoadStringKind( - HLoadString::LoadKind desired_string_load_kind) OVERRIDE; - - // Check if the desired_class_load_kind is supported. If it is, return it, - // otherwise return a fall-back kind that should be used instead. - HLoadClass::LoadKind GetSupportedLoadClassKind( - HLoadClass::LoadKind desired_class_load_kind) OVERRIDE; - - // Check if the desired_dispatch_info is supported. If it is, return it, - // otherwise return a fall-back info that should be used instead. - HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch( - const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, - HInvokeStaticOrDirect* invoke) OVERRIDE; + void GenerateNop() OVERRIDE; - void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) OVERRIDE; - void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE; + void GenerateImplicitNullCheck(HNullCheck* instruction) OVERRIDE; + void GenerateExplicitNullCheck(HNullCheck* instruction) OVERRIDE; - void MoveFromReturnRegister(Location trg, Primitive::Type type) OVERRIDE; + JumpTableARMVIXL* CreateJumpTable(HPackedSwitch* switch_instr) { + jump_tables_.emplace_back(new (GetGraph()->GetArena()) JumpTableARMVIXL(switch_instr)); + return jump_tables_.back().get(); + } + void EmitJumpTables(); - void GenerateNop() OVERRIDE; + void EmitMovwMovtPlaceholder(CodeGeneratorARMVIXL::PcRelativePatchInfo* labels, + vixl::aarch32::Register out); private: vixl::aarch32::Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, vixl::aarch32::Register temp); + using Uint32ToLiteralMap = ArenaSafeMap<uint32_t, vixl::aarch32::Literal<uint32_t>*>; + using MethodToLiteralMap = + ArenaSafeMap<MethodReference, vixl::aarch32::Literal<uint32_t>*, MethodReferenceComparator>; + + PcRelativePatchInfo* NewPcRelativePatch(const DexFile& dex_file, + uint32_t offset_or_index, + ArenaDeque<PcRelativePatchInfo>* patches); + template <LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)> + static void EmitPcRelativeLinkerPatches(const ArenaDeque<PcRelativePatchInfo>& infos, + ArenaVector<LinkerPatch>* linker_patches); + // Labels for each block that will be compiled. // We use a deque so that the `vixl::aarch32::Label` objects do not move in memory. ArenaDeque<vixl::aarch32::Label> block_labels_; // Indexed by block id. @@ -719,15 +676,19 @@ class CodeGeneratorARMVIXL : public CodeGenerator { ArmVIXLAssembler assembler_; const ArmInstructionSetFeatures& isa_features_; + // Relative call patch info. + // Using ArenaDeque<> which retains element addresses on push/emplace_back(). + ArenaDeque<PatchInfo<vixl::aarch32::Label>> relative_call_patches_; + // PC-relative patch info for each HArmDexCacheArraysBase. + ArenaDeque<PcRelativePatchInfo> pc_relative_dex_cache_patches_; + // PC-relative String patch info; type depends on configuration (app .bss or boot image PIC). + ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_; + // PC-relative type patch info. + ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_; + DISALLOW_COPY_AND_ASSIGN(CodeGeneratorARMVIXL); }; -#undef FOR_EACH_IMPLEMENTED_INSTRUCTION -#undef FOR_EACH_UNIMPLEMENTED_INSTRUCTION -#undef DEFINE_IMPLEMENTED_INSTRUCTION_VISITOR -#undef DEFINE_UNIMPLEMENTED_INSTRUCTION_VISITOR - - } // namespace arm } // namespace art diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc index 572d900909..61dabfabaa 100644 --- a/compiler/optimizing/code_generator_mips.cc +++ b/compiler/optimizing/code_generator_mips.cc @@ -4688,6 +4688,16 @@ void InstructionCodeGeneratorMIPS::GenConditionalMoveR6(HSelect* select) { } } +void LocationsBuilderMIPS::VisitShouldDeoptimizeFlag( + HShouldDeoptimizeFlag* flag ATTRIBUTE_UNUSED) { + // TODO: to be implemented. +} + +void InstructionCodeGeneratorMIPS::VisitShouldDeoptimizeFlag( + HShouldDeoptimizeFlag* flag ATTRIBUTE_UNUSED) { + // TODO: to be implemented. +} + void LocationsBuilderMIPS::VisitSelect(HSelect* select) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(select); CanMoveConditionally(select, codegen_->GetInstructionSetFeatures().IsR6(), locations); diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc index b5e98714e6..b1f9b1db53 100644 --- a/compiler/optimizing/code_generator_mips64.cc +++ b/compiler/optimizing/code_generator_mips64.cc @@ -2636,6 +2636,16 @@ void InstructionCodeGeneratorMIPS64::VisitDeoptimize(HDeoptimize* deoptimize) { /* false_target */ nullptr); } +void LocationsBuilderMIPS64::VisitShouldDeoptimizeFlag( + HShouldDeoptimizeFlag* flag ATTRIBUTE_UNUSED) { + // TODO: to be implemented. +} + +void InstructionCodeGeneratorMIPS64::VisitShouldDeoptimizeFlag( + HShouldDeoptimizeFlag* flag ATTRIBUTE_UNUSED) { + // TODO: to be implemented. +} + void LocationsBuilderMIPS64::VisitSelect(HSelect* select) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(select); if (Primitive::IsFloatingPointType(select->GetType())) { diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 12aa03c4af..d6e92ccb81 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -1059,6 +1059,11 @@ void CodeGeneratorX86::GenerateFrameEntry() { } } + if (GetGraph()->HasShouldDeoptimizeFlag()) { + // Initialize should_deoptimize flag to 0. + __ movl(Address(ESP, -kShouldDeoptimizeFlagSize), Immediate(0)); + } + int adjust = GetFrameSize() - FrameEntrySpillSize(); __ subl(ESP, Immediate(adjust)); __ cfi().AdjustCFAOffset(adjust); @@ -1676,6 +1681,17 @@ void InstructionCodeGeneratorX86::VisitDeoptimize(HDeoptimize* deoptimize) { /* false_target */ nullptr); } +void LocationsBuilderX86::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) { + LocationSummary* locations = new (GetGraph()->GetArena()) + LocationSummary(flag, LocationSummary::kNoCall); + locations->SetOut(Location::RequiresRegister()); +} + +void InstructionCodeGeneratorX86::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) { + __ movl(flag->GetLocations()->Out().AsRegister<Register>(), + Address(ESP, codegen_->GetStackOffsetOfShouldDeoptimizeFlag())); +} + static bool SelectCanUseCMOV(HSelect* select) { // There are no conditional move instructions for XMMs. if (Primitive::IsFloatingPointType(select->GetType())) { diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 22f7f6b52b..4474decf59 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -1326,6 +1326,12 @@ void CodeGeneratorX86_64::GenerateFrameEntry() { } } + if (GetGraph()->HasShouldDeoptimizeFlag()) { + // Initialize should_deoptimize flag to 0. + __ movl(Address(CpuRegister(RSP), xmm_spill_location - kShouldDeoptimizeFlagSize), + Immediate(0)); + } + // Save the current method if we need it. Note that we do not // do this in HCurrentMethod, as the instruction might have been removed // in the SSA graph. @@ -1747,6 +1753,17 @@ void InstructionCodeGeneratorX86_64::VisitDeoptimize(HDeoptimize* deoptimize) { /* false_target */ nullptr); } +void LocationsBuilderX86_64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) { + LocationSummary* locations = new (GetGraph()->GetArena()) + LocationSummary(flag, LocationSummary::kNoCall); + locations->SetOut(Location::RequiresRegister()); +} + +void InstructionCodeGeneratorX86_64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) { + __ movl(flag->GetLocations()->Out().AsRegister<CpuRegister>(), + Address(CpuRegister(RSP), codegen_->GetStackOffsetOfShouldDeoptimizeFlag())); +} + static bool SelectCanUseCMOV(HSelect* select) { // There are no conditional move instructions for XMMs. if (Primitive::IsFloatingPointType(select->GetType())) { diff --git a/compiler/optimizing/common_arm.h b/compiler/optimizing/common_arm.h index d3623f17d1..eabdbad13c 100644 --- a/compiler/optimizing/common_arm.h +++ b/compiler/optimizing/common_arm.h @@ -17,6 +17,11 @@ #ifndef ART_COMPILER_OPTIMIZING_COMMON_ARM_H_ #define ART_COMPILER_OPTIMIZING_COMMON_ARM_H_ +#include "debug/dwarf/register.h" +#include "locations.h" +#include "nodes.h" +#include "utils/arm/constants_arm.h" + // TODO(VIXL): Make VIXL compile with -Wshadow. #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wshadow" diff --git a/compiler/optimizing/dex_cache_array_fixups_arm.cc b/compiler/optimizing/dex_cache_array_fixups_arm.cc index 82b81238ab..10a36c6ff4 100644 --- a/compiler/optimizing/dex_cache_array_fixups_arm.cc +++ b/compiler/optimizing/dex_cache_array_fixups_arm.cc @@ -17,12 +17,24 @@ #include "dex_cache_array_fixups_arm.h" #include "base/arena_containers.h" +#ifdef ART_USE_VIXL_ARM_BACKEND +#include "code_generator_arm_vixl.h" +#include "intrinsics_arm_vixl.h" +#else #include "code_generator_arm.h" #include "intrinsics_arm.h" +#endif #include "utils/dex_cache_arrays_layout-inl.h" namespace art { namespace arm { +#ifdef ART_USE_VIXL_ARM_BACKEND +typedef CodeGeneratorARMVIXL CodeGeneratorARMType; +typedef IntrinsicLocationsBuilderARMVIXL IntrinsicLocationsBuilderARMType; +#else +typedef CodeGeneratorARM CodeGeneratorARMType; +typedef IntrinsicLocationsBuilderARM IntrinsicLocationsBuilderARMType; +#endif /** * Finds instructions that need the dex cache arrays base as an input. @@ -31,7 +43,7 @@ class DexCacheArrayFixupsVisitor : public HGraphVisitor { public: DexCacheArrayFixupsVisitor(HGraph* graph, CodeGenerator* codegen) : HGraphVisitor(graph), - codegen_(down_cast<CodeGeneratorARM*>(codegen)), + codegen_(down_cast<CodeGeneratorARMType*>(codegen)), dex_cache_array_bases_(std::less<const DexFile*>(), // Attribute memory use to code generator. graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {} @@ -66,7 +78,7 @@ class DexCacheArrayFixupsVisitor : public HGraphVisitor { // If this is an invoke with PC-relative access to the dex cache methods array, // we need to add the dex cache arrays base as the special input. if (invoke->HasPcRelativeDexCache() && - !IsCallFreeIntrinsic<IntrinsicLocationsBuilderARM>(invoke, codegen_)) { + !IsCallFreeIntrinsic<IntrinsicLocationsBuilderARMType>(invoke, codegen_)) { HArmDexCacheArraysBase* base = GetOrCreateDexCacheArrayBase(invoke->GetDexFile()); // Update the element offset in base. DexCacheArraysLayout layout(kArmPointerSize, &invoke->GetDexFile()); @@ -94,7 +106,7 @@ class DexCacheArrayFixupsVisitor : public HGraphVisitor { return base; } - CodeGeneratorARM* codegen_; + CodeGeneratorARMType* codegen_; using DexCacheArraysBaseMap = ArenaSafeMap<const DexFile*, HArmDexCacheArraysBase*, std::less<const DexFile*>>; diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc index c8cba205fd..188ee3a8d1 100644 --- a/compiler/optimizing/graph_checker.cc +++ b/compiler/optimizing/graph_checker.cc @@ -23,7 +23,6 @@ #include "base/arena_containers.h" #include "base/bit_vector-inl.h" #include "base/stringprintf.h" -#include "handle_scope-inl.h" namespace art { @@ -448,7 +447,6 @@ void GraphChecker::VisitInstruction(HInstruction* instruction) { // Ensure that reference type instructions have reference type info. if (instruction->GetType() == Primitive::kPrimNot) { - ScopedObjectAccess soa(Thread::Current()); if (!instruction->GetReferenceTypeInfo().IsValid()) { AddError(StringPrintf("Reference type instruction %s:%d does not have " "valid reference type information.", @@ -1011,7 +1009,6 @@ void GraphChecker::VisitConstant(HConstant* instruction) { void GraphChecker::VisitBoundType(HBoundType* instruction) { VisitInstruction(instruction); - ScopedObjectAccess soa(Thread::Current()); if (!instruction->GetUpperBound().IsValid()) { AddError(StringPrintf( "%s %d does not have a valid upper bound RTI.", diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc index 01e89bb304..8d93867230 100644 --- a/compiler/optimizing/inliner.cc +++ b/compiler/optimizing/inliner.cc @@ -292,6 +292,21 @@ static bool IsPolymorphic(Handle<mirror::ObjectArray<mirror::Class>> classes) classes->Get(InlineCache::kIndividualCacheSize - 1) == nullptr; } +ArtMethod* HInliner::TryCHADevirtualization(ArtMethod* resolved_method) { + if (!resolved_method->HasSingleImplementation()) { + return nullptr; + } + if (Runtime::Current()->IsAotCompiler()) { + // No CHA-based devirtulization for AOT compiler (yet). + return nullptr; + } + if (outermost_graph_->IsCompilingOsr()) { + // We do not support HDeoptimize in OSR methods. + return nullptr; + } + return resolved_method->GetSingleImplementation(); +} + bool HInliner::TryInline(HInvoke* invoke_instruction) { if (invoke_instruction->IsInvokeUnresolved()) { return false; // Don't bother to move further if we know the method is unresolved. @@ -317,10 +332,29 @@ bool HInliner::TryInline(HInvoke* invoke_instruction) { actual_method = FindVirtualOrInterfaceTarget(invoke_instruction, resolved_method); } + bool cha_devirtualize = false; + if (actual_method == nullptr) { + ArtMethod* method = TryCHADevirtualization(resolved_method); + if (method != nullptr) { + cha_devirtualize = true; + actual_method = method; + } + } + if (actual_method != nullptr) { - bool result = TryInlineAndReplace(invoke_instruction, actual_method, /* do_rtp */ true); + bool result = TryInlineAndReplace(invoke_instruction, + actual_method, + /* do_rtp */ true, + cha_devirtualize); if (result && !invoke_instruction->IsInvokeStaticOrDirect()) { - MaybeRecordStat(kInlinedInvokeVirtualOrInterface); + if (cha_devirtualize) { + // Add dependency due to devirtulization. We've assumed resolved_method + // has single implementation. + outermost_graph_->AddCHASingleImplementationDependency(resolved_method); + MaybeRecordStat(kCHAInline); + } else { + MaybeRecordStat(kInlinedInvokeVirtualOrInterface); + } } return result; } @@ -438,7 +472,10 @@ bool HInliner::TryInlineMonomorphicCall(HInvoke* invoke_instruction, HInstruction* cursor = invoke_instruction->GetPrevious(); HBasicBlock* bb_cursor = invoke_instruction->GetBlock(); - if (!TryInlineAndReplace(invoke_instruction, resolved_method, /* do_rtp */ false)) { + if (!TryInlineAndReplace(invoke_instruction, + resolved_method, + /* do_rtp */ false, + /* cha_devirtualize */ false)) { return false; } @@ -465,6 +502,25 @@ bool HInliner::TryInlineMonomorphicCall(HInvoke* invoke_instruction, return true; } +void HInliner::AddCHAGuard(HInstruction* invoke_instruction, + uint32_t dex_pc, + HInstruction* cursor, + HBasicBlock* bb_cursor) { + HInstruction* deopt_flag = new (graph_->GetArena()) HShouldDeoptimizeFlag(dex_pc); + HInstruction* should_deopt = new (graph_->GetArena()) HNotEqual( + deopt_flag, graph_->GetIntConstant(0, dex_pc)); + HInstruction* deopt = new (graph_->GetArena()) HDeoptimize(should_deopt, dex_pc); + + if (cursor != nullptr) { + bb_cursor->InsertInstructionAfter(deopt_flag, cursor); + } else { + bb_cursor->InsertInstructionBefore(deopt_flag, bb_cursor->GetFirstInstruction()); + } + bb_cursor->InsertInstructionAfter(should_deopt, deopt_flag); + bb_cursor->InsertInstructionAfter(deopt, should_deopt); + deopt->CopyEnvironmentFrom(invoke_instruction->GetEnvironment()); +} + HInstruction* HInliner::AddTypeGuard(HInstruction* receiver, HInstruction* cursor, HBasicBlock* bb_cursor, @@ -787,8 +843,14 @@ bool HInliner::TryInlinePolymorphicCallToSameTarget( return true; } -bool HInliner::TryInlineAndReplace(HInvoke* invoke_instruction, ArtMethod* method, bool do_rtp) { +bool HInliner::TryInlineAndReplace(HInvoke* invoke_instruction, + ArtMethod* method, + bool do_rtp, + bool cha_devirtualize) { HInstruction* return_replacement = nullptr; + uint32_t dex_pc = invoke_instruction->GetDexPc(); + HInstruction* cursor = invoke_instruction->GetPrevious(); + HBasicBlock* bb_cursor = invoke_instruction->GetBlock(); if (!TryBuildAndInline(invoke_instruction, method, &return_replacement)) { if (invoke_instruction->IsInvokeInterface()) { // Turn an invoke-interface into an invoke-virtual. An invoke-virtual is always @@ -826,6 +888,9 @@ bool HInliner::TryInlineAndReplace(HInvoke* invoke_instruction, ArtMethod* metho return false; } } + if (cha_devirtualize) { + AddCHAGuard(invoke_instruction, dex_pc, cursor, bb_cursor); + } if (return_replacement != nullptr) { invoke_instruction->ReplaceWith(return_replacement); } diff --git a/compiler/optimizing/inliner.h b/compiler/optimizing/inliner.h index a2b4fc96c4..ffebd97cb8 100644 --- a/compiler/optimizing/inliner.h +++ b/compiler/optimizing/inliner.h @@ -62,8 +62,12 @@ class HInliner : public HOptimization { // Try to inline `resolved_method` in place of `invoke_instruction`. `do_rtp` is whether // reference type propagation can run after the inlining. If the inlining is successful, this - // method will replace and remove the `invoke_instruction`. - bool TryInlineAndReplace(HInvoke* invoke_instruction, ArtMethod* resolved_method, bool do_rtp) + // method will replace and remove the `invoke_instruction`. If `cha_devirtualize` is true, + // a CHA guard needs to be added for the inlining. + bool TryInlineAndReplace(HInvoke* invoke_instruction, + ArtMethod* resolved_method, + bool do_rtp, + bool cha_devirtualize) REQUIRES_SHARED(Locks::mutator_lock_); bool TryBuildAndInline(HInvoke* invoke_instruction, @@ -118,6 +122,18 @@ class HInliner : public HOptimization { Handle<mirror::ObjectArray<mirror::Class>> classes) REQUIRES_SHARED(Locks::mutator_lock_); + // Try CHA-based devirtualization to change virtual method calls into + // direct calls. + // Returns the actual method that resolved_method can be devirtualized to. + ArtMethod* TryCHADevirtualization(ArtMethod* resolved_method) + REQUIRES_SHARED(Locks::mutator_lock_); + + // Add a CHA guard for a CHA-based devirtualized call. A CHA guard checks a + // should_deoptimize flag and if it's true, does deoptimization. + void AddCHAGuard(HInstruction* invoke_instruction, + uint32_t dex_pc, + HInstruction* cursor, + HBasicBlock* bb_cursor); HInstanceFieldGet* BuildGetReceiverClass(ClassLinker* class_linker, HInstruction* receiver, diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc index 9e724474d0..433dced9d7 100644 --- a/compiler/optimizing/intrinsics_arm_vixl.cc +++ b/compiler/optimizing/intrinsics_arm_vixl.cc @@ -71,7 +71,7 @@ class IntrinsicSlowPathARMVIXL : public SlowPathCodeARMVIXL { : SlowPathCodeARMVIXL(invoke), invoke_(invoke) {} Location MoveArguments(CodeGenerator* codegen) { - InvokeDexCallingConventionVisitorARM calling_convention_visitor; + InvokeDexCallingConventionVisitorARMVIXL calling_convention_visitor; IntrinsicVisitor::MoveArguments(invoke_, codegen, &calling_convention_visitor); return calling_convention_visitor.GetMethodLocation(); } diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 7ab04e15fc..e3f4d8f035 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -333,7 +333,8 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { cached_double_constants_(std::less<int64_t>(), arena->Adapter(kArenaAllocConstantsMap)), cached_current_method_(nullptr), inexact_object_rti_(ReferenceTypeInfo::CreateInvalid()), - osr_(osr) { + osr_(osr), + cha_single_implementation_list_(arena->Adapter(kArenaAllocCHA)) { blocks_.reserve(kDefaultNumberOfBlocks); } @@ -536,6 +537,20 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { bool IsCompilingOsr() const { return osr_; } + ArenaSet<ArtMethod*>& GetCHASingleImplementationList() { + return cha_single_implementation_list_; + } + + void AddCHASingleImplementationDependency(ArtMethod* method) { + cha_single_implementation_list_.insert(method); + } + + bool HasShouldDeoptimizeFlag() const { + // TODO: if all CHA guards can be eliminated, there is no need for the flag + // even if cha_single_implementation_list_ is not empty. + return !cha_single_implementation_list_.empty(); + } + bool HasTryCatch() const { return has_try_catch_; } void SetHasTryCatch(bool value) { has_try_catch_ = value; } @@ -672,6 +687,9 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { // compiled code entries which the interpreter can directly jump to. const bool osr_; + // List of methods that are assumed to have single implementation. + ArenaSet<ArtMethod*> cha_single_implementation_list_; + friend class SsaBuilder; // For caching constants. friend class SsaLivenessAnalysis; // For the linear order. friend class HInliner; // For the reverse post order. @@ -1240,6 +1258,7 @@ class HLoopInformationOutwardIterator : public ValueObject { M(ClinitCheck, Instruction) \ M(Compare, BinaryOperation) \ M(CurrentMethod, Instruction) \ + M(ShouldDeoptimizeFlag, Instruction) \ M(Deoptimize, Instruction) \ M(Div, BinaryOperation) \ M(DivZeroCheck, Instruction) \ @@ -2875,6 +2894,27 @@ class HDeoptimize FINAL : public HTemplateInstruction<1> { DISALLOW_COPY_AND_ASSIGN(HDeoptimize); }; +// Represents a should_deoptimize flag. Currently used for CHA-based devirtualization. +// The compiled code checks this flag value in a guard before devirtualized call and +// if it's true, starts to do deoptimization. +// It has a 4-byte slot on stack. +// TODO: allocate a register for this flag. +class HShouldDeoptimizeFlag FINAL : public HExpression<0> { + public: + // TODO: use SideEffects to aid eliminating some CHA guards. + explicit HShouldDeoptimizeFlag(uint32_t dex_pc) + : HExpression(Primitive::kPrimInt, SideEffects::None(), dex_pc) { + } + + // We don't eliminate CHA guards yet. + bool CanBeMoved() const OVERRIDE { return false; } + + DECLARE_INSTRUCTION(ShouldDeoptimizeFlag); + + private: + DISALLOW_COPY_AND_ASSIGN(HShouldDeoptimizeFlag); +}; + // Represents the ArtMethod that was passed as a first argument to // the method. It is used by instructions that depend on it, like // instructions that work with the dex cache. diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index 2382b728df..8ea2b06530 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -630,10 +630,8 @@ void OptimizingCompiler::RunArchOptimizations(InstructionSet instruction_set, #if defined(ART_ENABLE_CODEGEN_arm) case kThumb2: case kArm: { -#ifndef ART_USE_VIXL_ARM_BACKEND arm::DexCacheArrayFixups* fixups = new (arena) arm::DexCacheArrayFixups(graph, codegen, stats); -#endif arm::InstructionSimplifierArm* simplifier = new (arena) arm::InstructionSimplifierArm(graph, stats); SideEffectsAnalysis* side_effects = new (arena) SideEffectsAnalysis(graph); @@ -642,9 +640,7 @@ void OptimizingCompiler::RunArchOptimizations(InstructionSet instruction_set, simplifier, side_effects, gvn, -#ifndef ART_USE_VIXL_ARM_BACKEND fixups -#endif }; RunOptimizations(arm_optimizations, arraysize(arm_optimizations), pass_observer); break; @@ -1208,7 +1204,9 @@ bool OptimizingCompiler::JitCompile(Thread* self, code_allocator.GetMemory().data(), code_allocator.GetSize(), osr, - roots); + roots, + codegen->GetGraph()->HasShouldDeoptimizeFlag(), + codegen->GetGraph()->GetCHASingleImplementationList()); if (code == nullptr) { code_cache->ClearData(self, stack_map_data, roots_data); diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h index c8d1ce0bd5..203b1ec7ec 100644 --- a/compiler/optimizing/optimizing_compiler_stats.h +++ b/compiler/optimizing/optimizing_compiler_stats.h @@ -27,6 +27,7 @@ namespace art { enum MethodCompilationStat { kAttemptCompilation = 0, + kCHAInline, kCompiled, kInlinedInvoke, kReplacedInvokeWithSimplePattern, @@ -106,6 +107,7 @@ class OptimizingCompilerStats { std::string name; switch (stat) { case kAttemptCompilation : name = "AttemptCompilation"; break; + case kCHAInline : name = "CHAInline"; break; case kCompiled : name = "Compiled"; break; case kInlinedInvoke : name = "InlinedInvoke"; break; case kReplacedInvokeWithSimplePattern: name = "ReplacedInvokeWithSimplePattern"; break; diff --git a/compiler/optimizing/register_allocation_resolver.cc b/compiler/optimizing/register_allocation_resolver.cc index 5991791a15..59523a93a0 100644 --- a/compiler/optimizing/register_allocation_resolver.cc +++ b/compiler/optimizing/register_allocation_resolver.cc @@ -87,6 +87,10 @@ void RegisterAllocationResolver::Resolve(ArrayRef<HInstruction* const> safepoint // Adjust the stack slot, now that we know the number of them for each type. // The way this implementation lays out the stack is the following: // [parameter slots ] + // [art method (caller) ] + // [entry spill (core) ] + // [entry spill (float) ] + // [should_deoptimize flag] (this is optional) // [catch phi spill slots ] // [double spill slots ] // [long spill slots ] diff --git a/compiler/optimizing/register_allocator_graph_color.cc b/compiler/optimizing/register_allocator_graph_color.cc index aa0d3710fa..9064f865c3 100644 --- a/compiler/optimizing/register_allocator_graph_color.cc +++ b/compiler/optimizing/register_allocator_graph_color.cc @@ -1749,7 +1749,7 @@ static std::bitset<kMaxNumRegs> BuildConflictMask(Container& intervals) { bool RegisterAllocatorGraphColor::IsCallerSave(size_t reg, bool processing_core_regs) { return processing_core_regs ? !codegen_->IsCoreCalleeSaveRegister(reg) - : !codegen_->IsCoreCalleeSaveRegister(reg); + : !codegen_->IsFloatingPointCalleeSaveRegister(reg); } static bool RegisterIsAligned(size_t reg) { diff --git a/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc b/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc index fb6f172cb0..2d026b83f9 100644 --- a/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc +++ b/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc @@ -428,8 +428,6 @@ void ArmVIXLJNIMacroAssembler::Copy(FrameOffset dst ATTRIBUTE_UNUSED, UNIMPLEMENTED(FATAL); } -static constexpr uint32_t kArmInstrMaxSizeInBytes = 4; - void ArmVIXLJNIMacroAssembler::CreateHandleScopeEntry(ManagedRegister mout_reg, FrameOffset handle_scope_offset, ManagedRegister min_reg, @@ -458,14 +456,14 @@ void ArmVIXLJNIMacroAssembler::CreateHandleScopeEntry(ManagedRegister mout_reg, if (asm_.ShifterOperandCanHold(ADD, handle_scope_offset.Int32Value(), kCcDontCare)) { if (!out_reg.Equals(in_reg)) { AssemblerAccurateScope guard(asm_.GetVIXLAssembler(), - 3 * kArmInstrMaxSizeInBytes, + 3 * vixl32::kMaxInstructionSizeInBytes, CodeBufferCheckScope::kMaximumSize); ___ it(eq, 0xc); ___ mov(eq, out_reg.AsVIXLRegister(), 0); asm_.AddConstantInIt(out_reg.AsVIXLRegister(), sp, handle_scope_offset.Int32Value(), ne); } else { AssemblerAccurateScope guard(asm_.GetVIXLAssembler(), - 2 * kArmInstrMaxSizeInBytes, + 2 * vixl32::kMaxInstructionSizeInBytes, CodeBufferCheckScope::kMaximumSize); ___ it(ne, 0x8); asm_.AddConstantInIt(out_reg.AsVIXLRegister(), sp, handle_scope_offset.Int32Value(), ne); @@ -496,7 +494,7 @@ void ArmVIXLJNIMacroAssembler::CreateHandleScopeEntry(FrameOffset out_off, if (asm_.ShifterOperandCanHold(ADD, handle_scope_offset.Int32Value(), kCcDontCare)) { AssemblerAccurateScope guard(asm_.GetVIXLAssembler(), - 2 * kArmInstrMaxSizeInBytes, + 2 * vixl32::kMaxInstructionSizeInBytes, CodeBufferCheckScope::kMaximumSize); ___ it(ne, 0x8); asm_.AddConstantInIt(scratch.AsVIXLRegister(), sp, handle_scope_offset.Int32Value(), ne); @@ -589,7 +587,7 @@ void ArmVIXLJNIMacroAssembler::ExceptionPoll(ManagedRegister m_scratch, size_t s ___ Cmp(scratch.AsVIXLRegister(), 0); { AssemblerAccurateScope guard(asm_.GetVIXLAssembler(), - kArmInstrMaxSizeInBytes, + vixl32::kMaxInstructionSizeInBytes, CodeBufferCheckScope::kMaximumSize); ___ b(ne, Narrow, exception_blocks_.back()->Entry()); } diff --git a/compiler/utils/atomic_method_ref_map-inl.h b/compiler/utils/atomic_method_ref_map-inl.h index 70ea028b17..d71c2fe997 100644 --- a/compiler/utils/atomic_method_ref_map-inl.h +++ b/compiler/utils/atomic_method_ref_map-inl.h @@ -78,6 +78,15 @@ inline void AtomicMethodRefMap<T>::Visit(const Visitor& visitor) { } } +template <typename T> +inline void AtomicMethodRefMap<T>::ClearEntries() { + for (auto& it : arrays_) { + for (auto& element : it.second) { + element.StoreRelaxed(nullptr); + } + } +} + } // namespace art #endif // ART_COMPILER_UTILS_ATOMIC_METHOD_REF_MAP_INL_H_ diff --git a/compiler/utils/atomic_method_ref_map.h b/compiler/utils/atomic_method_ref_map.h index 11ab211817..fed848f563 100644 --- a/compiler/utils/atomic_method_ref_map.h +++ b/compiler/utils/atomic_method_ref_map.h @@ -55,6 +55,8 @@ class AtomicMethodRefMap { template <typename Visitor> void Visit(const Visitor& visitor); + void ClearEntries(); + private: // Verified methods. The method array is fixed to avoid needing a lock to extend it. using ElementArray = dchecked_vector<Atomic<T>>; diff --git a/compiler/verifier_deps_test.cc b/compiler/verifier_deps_test.cc index 90fe6da438..52765f9e73 100644 --- a/compiler/verifier_deps_test.cc +++ b/compiler/verifier_deps_test.cc @@ -19,8 +19,11 @@ #include "class_linker.h" #include "compiler/common_compiler_test.h" +#include "compiler/dex/verification_results.h" +#include "compiler/dex/verified_method.h" #include "compiler/driver/compiler_options.h" #include "compiler/driver/compiler_driver.h" +#include "compiler/utils/atomic_method_ref_map-inl.h" #include "compiler_callbacks.h" #include "dex_file.h" #include "dex_file_types.h" @@ -90,6 +93,14 @@ class VerifierDepsTest : public CommonCompilerTest { verifier_deps_.reset(callbacks_->GetVerifierDeps()); } callbacks_->SetVerifierDeps(nullptr); + // Clear entries in the verification results to avoid hitting a DCHECK that + // we always succeed inserting a new entry after verifying. + AtomicMethodRefMap<const VerifiedMethod*>* map = + &compiler_driver_->GetVerificationResults()->atomic_verified_methods_; + map->Visit([](const MethodReference& ref ATTRIBUTE_UNUSED, const VerifiedMethod* method) { + delete method; + }); + map->ClearEntries(); } void SetVerifierDeps(const std::vector<const DexFile*>& dex_files) { @@ -112,6 +123,9 @@ class VerifierDepsTest : public CommonCompilerTest { for (const DexFile* dex_file : dex_files_) { class_linker_->RegisterDexFile(*dex_file, loader.Get()); } + for (const DexFile* dex_file : dex_files_) { + compiler_driver_->GetVerificationResults()->AddDexFile(dex_file); + } } void LoadDexFile(ScopedObjectAccess* soa) REQUIRES_SHARED(Locks::mutator_lock_) { |