diff options
Diffstat (limited to 'compiler')
54 files changed, 2078 insertions, 564 deletions
diff --git a/compiler/Android.bp b/compiler/Android.bp index f5589cd7a3..1ee2a21b18 100644 --- a/compiler/Android.bp +++ b/compiler/Android.bp @@ -52,6 +52,7 @@ art_cc_defaults { "optimizing/cha_guard_optimization.cc", "optimizing/code_generator.cc", "optimizing/code_generator_utils.cc", + "optimizing/code_sinking.cc", "optimizing/constant_folding.cc", "optimizing/dead_code_elimination.cc", "optimizing/escape.cc", diff --git a/compiler/common_compiler_test.cc b/compiler/common_compiler_test.cc index d89cdbabf8..9a45379a05 100644 --- a/compiler/common_compiler_test.cc +++ b/compiler/common_compiler_test.cc @@ -52,10 +52,10 @@ void CommonCompilerTest::MakeExecutable(ArtMethod* method) { compiler_driver_->GetCompiledMethod(MethodReference(&dex_file, method->GetDexMethodIndex())); } - if (compiled_method != nullptr) { + // If the code size is 0 it means the method was skipped due to profile guided compilation. + if (compiled_method != nullptr && compiled_method->GetQuickCode().size() != 0u) { ArrayRef<const uint8_t> code = compiled_method->GetQuickCode(); uint32_t code_size = code.size(); - CHECK_NE(0u, code_size); ArrayRef<const uint8_t> vmap_table = compiled_method->GetVmapTable(); uint32_t vmap_table_offset = vmap_table.empty() ? 0u : sizeof(OatQuickMethodHeader) + vmap_table.size(); diff --git a/compiler/dex/dex_to_dex_decompiler.cc b/compiler/dex/dex_to_dex_decompiler.cc index bfd485d126..53601033da 100644 --- a/compiler/dex/dex_to_dex_decompiler.cc +++ b/compiler/dex/dex_to_dex_decompiler.cc @@ -20,7 +20,7 @@ #include "base/mutex.h" #include "dex_file-inl.h" #include "dex_instruction-inl.h" -#include "optimizing/bytecode_utils.h" +#include "bytecode_utils.h" namespace art { namespace optimizer { diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc index 7e91453741..a5e4cb0877 100644 --- a/compiler/driver/compiler_driver.cc +++ b/compiler/driver/compiler_driver.cc @@ -2283,7 +2283,7 @@ class InitializeClassVisitor : public CompilationVisitor { public: explicit InitializeClassVisitor(const ParallelCompilationManager* manager) : manager_(manager) {} - virtual void Visit(size_t class_def_index) REQUIRES(!Locks::mutator_lock_) OVERRIDE { + void Visit(size_t class_def_index) REQUIRES(!Locks::mutator_lock_) OVERRIDE { ATRACE_CALL(); jobject jclass_loader = manager_->GetClassLoader(); const DexFile& dex_file = *manager_->GetDexFile(); @@ -2343,23 +2343,32 @@ class InitializeClassVisitor : public CompilationVisitor { // mode which prevents the GC from visiting objects modified during the transaction. // Ensure GC is not run so don't access freed objects when aborting transaction. - ScopedAssertNoThreadSuspension ants("Transaction end"); - runtime->ExitTransactionMode(); + { + ScopedAssertNoThreadSuspension ants("Transaction end"); + runtime->ExitTransactionMode(); + + if (!success) { + CHECK(soa.Self()->IsExceptionPending()); + mirror::Throwable* exception = soa.Self()->GetException(); + VLOG(compiler) << "Initialization of " << descriptor << " aborted because of " + << exception->Dump(); + std::ostream* file_log = manager_->GetCompiler()-> + GetCompilerOptions().GetInitFailureOutput(); + if (file_log != nullptr) { + *file_log << descriptor << "\n"; + *file_log << exception->Dump() << "\n"; + } + soa.Self()->ClearException(); + transaction.Rollback(); + CHECK_EQ(old_status, klass->GetStatus()) << "Previous class status not restored"; + } + } if (!success) { - CHECK(soa.Self()->IsExceptionPending()); - mirror::Throwable* exception = soa.Self()->GetException(); - VLOG(compiler) << "Initialization of " << descriptor << " aborted because of " - << exception->Dump(); - std::ostream* file_log = manager_->GetCompiler()-> - GetCompilerOptions().GetInitFailureOutput(); - if (file_log != nullptr) { - *file_log << descriptor << "\n"; - *file_log << exception->Dump() << "\n"; - } - soa.Self()->ClearException(); - transaction.Rollback(); - CHECK_EQ(old_status, klass->GetStatus()) << "Previous class status not restored"; + // On failure, still intern strings of static fields and seen in <clinit>, as these + // will be created in the zygote. This is separated from the transaction code just + // above as we will allocate strings, so must be allowed to suspend. + InternStrings(klass, class_loader); } } } @@ -2375,6 +2384,57 @@ class InitializeClassVisitor : public CompilationVisitor { } private: + void InternStrings(Handle<mirror::Class> klass, Handle<mirror::ClassLoader> class_loader) + REQUIRES_SHARED(Locks::mutator_lock_) { + DCHECK(manager_->GetCompiler()->GetCompilerOptions().IsBootImage()); + DCHECK(klass->IsVerified()); + DCHECK(!klass->IsInitialized()); + + StackHandleScope<1> hs(Thread::Current()); + Handle<mirror::DexCache> h_dex_cache = hs.NewHandle(klass->GetDexCache()); + const DexFile* dex_file = manager_->GetDexFile(); + const DexFile::ClassDef* class_def = klass->GetClassDef(); + ClassLinker* class_linker = manager_->GetClassLinker(); + + // Check encoded final field values for strings and intern. + annotations::RuntimeEncodedStaticFieldValueIterator value_it(*dex_file, + &h_dex_cache, + &class_loader, + manager_->GetClassLinker(), + *class_def); + for ( ; value_it.HasNext(); value_it.Next()) { + if (value_it.GetValueType() == annotations::RuntimeEncodedStaticFieldValueIterator::kString) { + // Resolve the string. This will intern the string. + art::ObjPtr<mirror::String> resolved = class_linker->ResolveString( + *dex_file, dex::StringIndex(value_it.GetJavaValue().i), h_dex_cache); + CHECK(resolved != nullptr); + } + } + + // Intern strings seen in <clinit>. + ArtMethod* clinit = klass->FindClassInitializer(class_linker->GetImagePointerSize()); + if (clinit != nullptr) { + const DexFile::CodeItem* code_item = clinit->GetCodeItem(); + DCHECK(code_item != nullptr); + const Instruction* inst = Instruction::At(code_item->insns_); + + const uint32_t insns_size = code_item->insns_size_in_code_units_; + for (uint32_t dex_pc = 0; dex_pc < insns_size;) { + if (inst->Opcode() == Instruction::CONST_STRING) { + ObjPtr<mirror::String> s = class_linker->ResolveString( + *dex_file, dex::StringIndex(inst->VRegB_21c()), h_dex_cache); + CHECK(s != nullptr); + } else if (inst->Opcode() == Instruction::CONST_STRING_JUMBO) { + ObjPtr<mirror::String> s = class_linker->ResolveString( + *dex_file, dex::StringIndex(inst->VRegB_31c()), h_dex_cache); + CHECK(s != nullptr); + } + dex_pc += inst->SizeInCodeUnits(); + inst = inst->Next(); + } + } + } + const ParallelCompilationManager* const manager_; }; diff --git a/compiler/driver/compiler_driver_test.cc b/compiler/driver/compiler_driver_test.cc index 97954f3c29..562f97b3ae 100644 --- a/compiler/driver/compiler_driver_test.cc +++ b/compiler/driver/compiler_driver_test.cc @@ -240,9 +240,8 @@ class CompilerDriverProfileTest : public CompilerDriverTest { ProfileCompilationInfo info; for (const std::unique_ptr<const DexFile>& dex_file : dex_files) { - std::string key = ProfileCompilationInfo::GetProfileDexFileKey(dex_file->GetLocation()); - profile_info_.AddMethodIndex(key, dex_file->GetLocationChecksum(), 1); - profile_info_.AddMethodIndex(key, dex_file->GetLocationChecksum(), 2); + profile_info_.AddMethodIndex(dex_file->GetLocation(), dex_file->GetLocationChecksum(), 1); + profile_info_.AddMethodIndex(dex_file->GetLocation(), dex_file->GetLocationChecksum(), 2); } return &profile_info_; } diff --git a/compiler/intrinsics_list.h b/compiler/intrinsics_list.h index 9bd25d8484..63c23cb074 100644 --- a/compiler/intrinsics_list.h +++ b/compiler/intrinsics_list.h @@ -24,6 +24,10 @@ // Note: adding a new intrinsic requires an art image version change, // as the modifiers flag for some ArtMethods will need to be changed. +// Note: j.l.Integer.valueOf says kNoThrow even though it could throw an OOME. +// The kNoThrow should be renamed to kNoVisibleThrow, as it is ok to GVN Integer.valueOf +// (kNoSideEffects), and it is also OK to remove it if it's unused. + #define INTRINSICS_LIST(V) \ V(DoubleDoubleToRawLongBits, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow, "Ljava/lang/Double;", "doubleToRawLongBits", "(D)J") \ V(DoubleDoubleToLongBits, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow, "Ljava/lang/Double;", "doubleToLongBits", "(D)J") \ @@ -149,7 +153,8 @@ V(UnsafeLoadFence, kVirtual, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow, "Lsun/misc/Unsafe;", "loadFence", "()V") \ V(UnsafeStoreFence, kVirtual, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow, "Lsun/misc/Unsafe;", "storeFence", "()V") \ V(UnsafeFullFence, kVirtual, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow, "Lsun/misc/Unsafe;", "fullFence", "()V") \ - V(ReferenceGetReferent, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow, "Ljava/lang/ref/Reference;", "getReferent", "()Ljava/lang/Object;") + V(ReferenceGetReferent, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow, "Ljava/lang/ref/Reference;", "getReferent", "()Ljava/lang/Object;") \ + V(IntegerValueOf, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow, "Ljava/lang/Integer;", "valueOf", "(I)Ljava/lang/Integer;") #endif // ART_COMPILER_INTRINSICS_LIST_H_ #undef ART_COMPILER_INTRINSICS_LIST_H_ // #define is only for lint. diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc index 66111f6e23..e2233e4bbd 100644 --- a/compiler/oat_test.cc +++ b/compiler/oat_test.cc @@ -265,6 +265,7 @@ class OatTest : public CommonCompilerTest { void TestDexFileInput(bool verify, bool low_4gb, bool use_profile); void TestZipFileInput(bool verify); + void TestZipFileInputWithEmptyDex(); std::unique_ptr<const InstructionSetFeatures> insn_features_; std::unique_ptr<QuickCompilerCallbacks> callbacks_; @@ -821,6 +822,28 @@ TEST_F(OatTest, ZipFileInputCheckVerifier) { TestZipFileInput(true); } +void OatTest::TestZipFileInputWithEmptyDex() { + ScratchFile zip_file; + ZipBuilder zip_builder(zip_file.GetFile()); + bool success = zip_builder.AddFile("classes.dex", nullptr, 0); + ASSERT_TRUE(success); + success = zip_builder.Finish(); + ASSERT_TRUE(success) << strerror(errno); + + SafeMap<std::string, std::string> key_value_store; + key_value_store.Put(OatHeader::kImageLocationKey, "test.art"); + std::vector<const char*> input_filenames { zip_file.GetFilename().c_str() }; // NOLINT [readability/braces] [4] + ScratchFile oat_file, vdex_file(oat_file, ".vdex"); + std::unique_ptr<ProfileCompilationInfo> profile_compilation_info(new ProfileCompilationInfo()); + success = WriteElf(vdex_file.GetFile(), oat_file.GetFile(), input_filenames, + key_value_store, /*verify*/false, profile_compilation_info.get()); + ASSERT_FALSE(success); +} + +TEST_F(OatTest, ZipFileInputWithEmptyDex) { + TestZipFileInputWithEmptyDex(); +} + TEST_F(OatTest, UpdateChecksum) { InstructionSet insn_set = kX86; std::string error_msg; diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc index 0ea11255a8..8ab44d2c19 100644 --- a/compiler/oat_writer.cc +++ b/compiler/oat_writer.cc @@ -104,6 +104,13 @@ inline uint32_t CodeAlignmentSize(uint32_t header_offset, const CompiledMethod& // Defines the location of the raw dex file to write. class OatWriter::DexFileSource { public: + enum Type { + kNone, + kZipEntry, + kRawFile, + kRawData, + }; + explicit DexFileSource(ZipEntry* zip_entry) : type_(kZipEntry), source_(zip_entry) { DCHECK(source_ != nullptr); @@ -119,6 +126,7 @@ class OatWriter::DexFileSource { DCHECK(source_ != nullptr); } + Type GetType() const { return type_; } bool IsZipEntry() const { return type_ == kZipEntry; } bool IsRawFile() const { return type_ == kRawFile; } bool IsRawData() const { return type_ == kRawData; } @@ -147,13 +155,6 @@ class OatWriter::DexFileSource { } private: - enum Type { - kNone, - kZipEntry, - kRawFile, - kRawData, - }; - Type type_; const void* source_; }; @@ -2259,16 +2260,38 @@ bool OatWriter::LayoutAndWriteDexFile(OutputStream* out, OatDexFile* oat_dex_fil ZipEntry* zip_entry = oat_dex_file->source_.GetZipEntry(); std::unique_ptr<MemMap> mem_map( zip_entry->ExtractToMemMap(location.c_str(), "classes.dex", &error_msg)); + if (mem_map == nullptr) { + LOG(ERROR) << "Failed to extract dex file to mem map for layout: " << error_msg; + return false; + } dex_file = DexFile::Open(location, zip_entry->GetCrc32(), std::move(mem_map), /* verify */ true, /* verify_checksum */ true, &error_msg); - } else { - DCHECK(oat_dex_file->source_.IsRawFile()); + } else if (oat_dex_file->source_.IsRawFile()) { File* raw_file = oat_dex_file->source_.GetRawFile(); dex_file = DexFile::OpenDex(raw_file->Fd(), location, /* verify_checksum */ true, &error_msg); + } else { + // The source data is a vdex file. + CHECK(oat_dex_file->source_.IsRawData()) + << static_cast<size_t>(oat_dex_file->source_.GetType()); + const uint8_t* raw_dex_file = oat_dex_file->source_.GetRawData(); + // Note: The raw data has already been checked to contain the header + // and all the data that the header specifies as the file size. + DCHECK(raw_dex_file != nullptr); + DCHECK(ValidateDexFileHeader(raw_dex_file, oat_dex_file->GetLocation())); + const UnalignedDexFileHeader* header = AsUnalignedDexFileHeader(raw_dex_file); + // Since the source may have had its layout changed, don't verify the checksum. + dex_file = DexFile::Open(raw_dex_file, + header->file_size_, + location, + oat_dex_file->dex_file_location_checksum_, + nullptr, + /* verify */ true, + /* verify_checksum */ false, + &error_msg); } if (dex_file == nullptr) { LOG(ERROR) << "Failed to open dex file for layout: " << error_msg; diff --git a/compiler/optimizing/bytecode_utils.h b/compiler/optimizing/bytecode_utils.h deleted file mode 100644 index 133afa47fe..0000000000 --- a/compiler/optimizing/bytecode_utils.h +++ /dev/null @@ -1,180 +0,0 @@ -/* - * Copyright (C) 2016 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ART_COMPILER_OPTIMIZING_BYTECODE_UTILS_H_ -#define ART_COMPILER_OPTIMIZING_BYTECODE_UTILS_H_ - -#include "base/arena_object.h" -#include "dex_file.h" -#include "dex_file-inl.h" -#include "dex_instruction-inl.h" - -namespace art { - -class CodeItemIterator : public ValueObject { - public: - explicit CodeItemIterator(const DexFile::CodeItem& code_item) : CodeItemIterator(code_item, 0u) {} - CodeItemIterator(const DexFile::CodeItem& code_item, uint32_t start_dex_pc) - : code_ptr_(code_item.insns_ + start_dex_pc), - code_end_(code_item.insns_ + code_item.insns_size_in_code_units_), - dex_pc_(start_dex_pc) {} - - bool Done() const { return code_ptr_ >= code_end_; } - bool IsLast() const { return code_ptr_ + CurrentInstruction().SizeInCodeUnits() >= code_end_; } - - const Instruction& CurrentInstruction() const { return *Instruction::At(code_ptr_); } - uint32_t CurrentDexPc() const { return dex_pc_; } - - void Advance() { - DCHECK(!Done()); - size_t instruction_size = CurrentInstruction().SizeInCodeUnits(); - code_ptr_ += instruction_size; - dex_pc_ += instruction_size; - } - - private: - const uint16_t* code_ptr_; - const uint16_t* const code_end_; - uint32_t dex_pc_; - - DISALLOW_COPY_AND_ASSIGN(CodeItemIterator); -}; - -class DexSwitchTable : public ValueObject { - public: - DexSwitchTable(const Instruction& instruction, uint32_t dex_pc) - : instruction_(instruction), - dex_pc_(dex_pc), - sparse_(instruction.Opcode() == Instruction::SPARSE_SWITCH) { - int32_t table_offset = instruction.VRegB_31t(); - const uint16_t* table = reinterpret_cast<const uint16_t*>(&instruction) + table_offset; - DCHECK_EQ(table[0], sparse_ ? static_cast<uint16_t>(Instruction::kSparseSwitchSignature) - : static_cast<uint16_t>(Instruction::kPackedSwitchSignature)); - num_entries_ = table[1]; - values_ = reinterpret_cast<const int32_t*>(&table[2]); - } - - uint16_t GetNumEntries() const { - return num_entries_; - } - - void CheckIndex(size_t index) const { - if (sparse_) { - // In a sparse table, we have num_entries_ keys and num_entries_ values, in that order. - DCHECK_LT(index, 2 * static_cast<size_t>(num_entries_)); - } else { - // In a packed table, we have the starting key and num_entries_ values. - DCHECK_LT(index, 1 + static_cast<size_t>(num_entries_)); - } - } - - int32_t GetEntryAt(size_t index) const { - CheckIndex(index); - return values_[index]; - } - - uint32_t GetDexPcForIndex(size_t index) const { - CheckIndex(index); - return dex_pc_ + - (reinterpret_cast<const int16_t*>(values_ + index) - - reinterpret_cast<const int16_t*>(&instruction_)); - } - - // Index of the first value in the table. - size_t GetFirstValueIndex() const { - if (sparse_) { - // In a sparse table, we have num_entries_ keys and num_entries_ values, in that order. - return num_entries_; - } else { - // In a packed table, we have the starting key and num_entries_ values. - return 1; - } - } - - bool IsSparse() const { return sparse_; } - - bool ShouldBuildDecisionTree() { - return IsSparse() || GetNumEntries() <= kSmallSwitchThreshold; - } - - private: - const Instruction& instruction_; - const uint32_t dex_pc_; - - // Whether this is a sparse-switch table (or a packed-switch one). - const bool sparse_; - - // This can't be const as it needs to be computed off of the given instruction, and complicated - // expressions in the initializer list seemed very ugly. - uint16_t num_entries_; - - const int32_t* values_; - - // The number of entries in a packed switch before we use a jump table or specified - // compare/jump series. - static constexpr uint16_t kSmallSwitchThreshold = 3; - - DISALLOW_COPY_AND_ASSIGN(DexSwitchTable); -}; - -class DexSwitchTableIterator { - public: - explicit DexSwitchTableIterator(const DexSwitchTable& table) - : table_(table), - num_entries_(static_cast<size_t>(table_.GetNumEntries())), - first_target_offset_(table_.GetFirstValueIndex()), - index_(0u) {} - - bool Done() const { return index_ >= num_entries_; } - bool IsLast() const { return index_ == num_entries_ - 1; } - - void Advance() { - DCHECK(!Done()); - index_++; - } - - int32_t CurrentKey() const { - return table_.IsSparse() ? table_.GetEntryAt(index_) : table_.GetEntryAt(0) + index_; - } - - int32_t CurrentTargetOffset() const { - return table_.GetEntryAt(index_ + first_target_offset_); - } - - uint32_t GetDexPcForCurrentIndex() const { return table_.GetDexPcForIndex(index_); } - - private: - const DexSwitchTable& table_; - const size_t num_entries_; - const size_t first_target_offset_; - - size_t index_; -}; - -inline const Instruction& GetDexInstructionAt(const DexFile::CodeItem& code_item, uint32_t dex_pc) { - return CodeItemIterator(code_item, dex_pc).CurrentInstruction(); -} - -inline bool IsThrowingDexInstruction(const Instruction& instruction) { - // Special-case MONITOR_EXIT which is a throwing instruction but the verifier - // guarantees that it will never throw. This is necessary to avoid rejecting - // 'synchronized' blocks/methods. - return instruction.IsThrow() && instruction.Opcode() != Instruction::MONITOR_EXIT; -} - -} // namespace art - -#endif // ART_COMPILER_OPTIMIZING_BYTECODE_UTILS_H_ diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index 8dd423fcbb..424b8507fb 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -861,8 +861,11 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction, bool CodeGenerator::HasStackMapAtCurrentPc() { uint32_t pc = GetAssembler()->CodeSize(); size_t count = stack_map_stream_.GetNumberOfStackMaps(); + if (count == 0) { + return false; + } CodeOffset native_pc_offset = stack_map_stream_.GetStackMap(count - 1).native_pc_code_offset; - return (count > 0) && (native_pc_offset.Uint32Value(GetInstructionSet()) == pc); + return (native_pc_offset.Uint32Value(GetInstructionSet()) == pc); } void CodeGenerator::MaybeRecordNativeDebugInfo(HInstruction* instruction, diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index edccbd4904..18c95b3c41 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -4094,7 +4094,7 @@ void InstructionCodeGeneratorARM64::VisitInvokeInterface(HInvokeInterface* invok } void LocationsBuilderARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) { - IntrinsicLocationsBuilderARM64 intrinsic(GetGraph()->GetArena()); + IntrinsicLocationsBuilderARM64 intrinsic(GetGraph()->GetArena(), codegen_); if (intrinsic.TryDispatch(invoke)) { return; } @@ -4107,7 +4107,7 @@ void LocationsBuilderARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* inv // art::PrepareForRegisterAllocation. DCHECK(!invoke->IsStaticWithExplicitClinitCheck()); - IntrinsicLocationsBuilderARM64 intrinsic(GetGraph()->GetArena()); + IntrinsicLocationsBuilderARM64 intrinsic(GetGraph()->GetArena(), codegen_); if (intrinsic.TryDispatch(invoke)) { return; } diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc index c9dde7cc55..51dd898a81 100644 --- a/compiler/optimizing/code_generator_mips.cc +++ b/compiler/optimizing/code_generator_mips.cc @@ -1899,9 +1899,9 @@ void LocationsBuilderMIPS::VisitArrayGet(HArrayGet* instruction) { } } -auto InstructionCodeGeneratorMIPS::GetImplicitNullChecker(HInstruction* instruction) { - auto null_checker = [this, instruction]() { - this->codegen_->MaybeRecordImplicitNullCheck(instruction); +static auto GetImplicitNullChecker(HInstruction* instruction, CodeGeneratorMIPS* codegen) { + auto null_checker = [codegen, instruction]() { + codegen->MaybeRecordImplicitNullCheck(instruction); }; return null_checker; } @@ -1911,7 +1911,7 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) { Register obj = locations->InAt(0).AsRegister<Register>(); Location index = locations->InAt(1); uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction); - auto null_checker = GetImplicitNullChecker(instruction); + auto null_checker = GetImplicitNullChecker(instruction, codegen_); Primitive::Type type = instruction->GetType(); const bool maybe_compressed_char_at = mirror::kUseStringCompression && @@ -2073,6 +2073,11 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) { LOG(FATAL) << "Unreachable type " << instruction->GetType(); UNREACHABLE(); } + + if (type == Primitive::kPrimNot) { + Register out = locations->Out().AsRegister<Register>(); + __ MaybeUnpoisonHeapReference(out); + } } void LocationsBuilderMIPS::VisitArrayLength(HArrayLength* instruction) { @@ -2143,7 +2148,7 @@ void InstructionCodeGeneratorMIPS::VisitArraySet(HArraySet* instruction) { bool needs_runtime_call = locations->WillCall(); bool needs_write_barrier = CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue()); - auto null_checker = GetImplicitNullChecker(instruction); + auto null_checker = GetImplicitNullChecker(instruction, codegen_); Register base_reg = index.IsConstant() ? obj : TMP; switch (value_type) { @@ -2200,7 +2205,31 @@ void InstructionCodeGeneratorMIPS::VisitArraySet(HArraySet* instruction) { DCHECK(!needs_write_barrier); } else { Register value = value_location.AsRegister<Register>(); - __ StoreToOffset(kStoreWord, value, base_reg, data_offset, null_checker); + if (kPoisonHeapReferences && needs_write_barrier) { + // Note that in the case where `value` is a null reference, + // we do not enter this block, as a null reference does not + // need poisoning. + DCHECK_EQ(value_type, Primitive::kPrimNot); + // Use Sw() instead of StoreToOffset() in order to be able to + // hold the poisoned reference in AT and thus avoid allocating + // yet another temporary register. + if (index.IsConstant()) { + if (!IsInt<16>(static_cast<int32_t>(data_offset))) { + int16_t low = Low16Bits(data_offset); + uint32_t high = data_offset - low; + __ Addiu32(TMP, obj, high); + base_reg = TMP; + data_offset = low; + } + } else { + DCHECK(IsInt<16>(static_cast<int32_t>(data_offset))); + } + __ PoisonHeapReference(AT, value); + __ Sw(AT, base_reg, data_offset); + null_checker(); + } else { + __ StoreToOffset(kStoreWord, value, base_reg, data_offset, null_checker); + } if (needs_write_barrier) { DCHECK_EQ(value_type, Primitive::kPrimNot); codegen_->MarkGCCard(obj, value, instruction->GetValueCanBeNull()); @@ -2208,6 +2237,8 @@ void InstructionCodeGeneratorMIPS::VisitArraySet(HArraySet* instruction) { } } else { DCHECK_EQ(value_type, Primitive::kPrimNot); + // Note: if heap poisoning is enabled, pAputObject takes care + // of poisoning the reference. codegen_->InvokeRuntime(kQuickAputObject, instruction, instruction->GetDexPc()); CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>(); } @@ -2322,6 +2353,7 @@ void InstructionCodeGeneratorMIPS::VisitCheckCast(HCheckCast* instruction) { __ Beqz(obj, slow_path->GetExitLabel()); // Compare the class of `obj` with `cls`. __ LoadFromOffset(kLoadWord, obj_cls, obj, mirror::Object::ClassOffset().Int32Value()); + __ MaybeUnpoisonHeapReference(obj_cls); __ Bne(obj_cls, cls, slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); } @@ -4891,7 +4923,7 @@ void InstructionCodeGeneratorMIPS::HandleFieldGet(HInstruction* instruction, LoadOperandType load_type = kLoadUnsignedByte; bool is_volatile = field_info.IsVolatile(); uint32_t offset = field_info.GetFieldOffset().Uint32Value(); - auto null_checker = GetImplicitNullChecker(instruction); + auto null_checker = GetImplicitNullChecker(instruction, codegen_); switch (type) { case Primitive::kPrimBoolean: @@ -4958,6 +4990,9 @@ void InstructionCodeGeneratorMIPS::HandleFieldGet(HInstruction* instruction, dst = locations->Out().AsRegister<Register>(); } __ LoadFromOffset(load_type, dst, obj, offset, null_checker); + if (type == Primitive::kPrimNot) { + __ MaybeUnpoisonHeapReference(dst); + } } else { DCHECK(locations->Out().IsFpuRegister()); FRegister dst = locations->Out().AsFpuRegister<FRegister>(); @@ -5016,7 +5051,8 @@ void InstructionCodeGeneratorMIPS::HandleFieldSet(HInstruction* instruction, StoreOperandType store_type = kStoreByte; bool is_volatile = field_info.IsVolatile(); uint32_t offset = field_info.GetFieldOffset().Uint32Value(); - auto null_checker = GetImplicitNullChecker(instruction); + bool needs_write_barrier = CodeGenerator::StoreNeedsWriteBarrier(type, instruction->InputAt(1)); + auto null_checker = GetImplicitNullChecker(instruction, codegen_); switch (type) { case Primitive::kPrimBoolean: @@ -5089,7 +5125,16 @@ void InstructionCodeGeneratorMIPS::HandleFieldSet(HInstruction* instruction, } else { src = value_location.AsRegister<Register>(); } - __ StoreToOffset(store_type, src, obj, offset, null_checker); + if (kPoisonHeapReferences && needs_write_barrier) { + // Note that in the case where `value` is a null reference, + // we do not enter this block, as a null reference does not + // need poisoning. + DCHECK_EQ(type, Primitive::kPrimNot); + __ PoisonHeapReference(TMP, src); + __ StoreToOffset(store_type, TMP, obj, offset, null_checker); + } else { + __ StoreToOffset(store_type, src, obj, offset, null_checker); + } } else { FRegister src = value_location.AsFpuRegister<FRegister>(); if (type == Primitive::kPrimFloat) { @@ -5101,7 +5146,7 @@ void InstructionCodeGeneratorMIPS::HandleFieldSet(HInstruction* instruction, } // TODO: memory barriers? - if (CodeGenerator::StoreNeedsWriteBarrier(type, instruction->InputAt(1))) { + if (needs_write_barrier) { Register src = value_location.AsRegister<Register>(); codegen_->MarkGCCard(obj, src, value_can_be_null); } @@ -5173,6 +5218,7 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) { // Compare the class of `obj` with `cls`. __ LoadFromOffset(kLoadWord, out, obj, mirror::Object::ClassOffset().Int32Value()); + __ MaybeUnpoisonHeapReference(out); if (instruction->IsExactCheck()) { // Classes must be equal for the instanceof to succeed. __ Xor(out, out, cls); @@ -5239,6 +5285,14 @@ void InstructionCodeGeneratorMIPS::VisitInvokeInterface(HInvokeInterface* invoke __ LoadFromOffset(kLoadWord, temp, receiver.AsRegister<Register>(), class_offset); } codegen_->MaybeRecordImplicitNullCheck(invoke); + // Instead of simply (possibly) unpoisoning `temp` here, we should + // emit a read barrier for the previous class reference load. + // However this is not required in practice, as this is an + // intermediate/temporary reference and because the current + // concurrent copying collector keeps the from-space memory + // intact/accessible until the end of the marking phase (the + // concurrent copying collector may not in the future). + __ MaybeUnpoisonHeapReference(temp); __ LoadFromOffset(kLoadWord, temp, temp, mirror::Class::ImtPtrOffset(kMipsPointerSize).Uint32Value()); uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement( @@ -5562,6 +5616,14 @@ void CodeGeneratorMIPS::GenerateVirtualCall(HInvokeVirtual* invoke, Location tem // temp = object->GetClass(); __ LoadFromOffset(kLoadWord, temp, receiver, class_offset); MaybeRecordImplicitNullCheck(invoke); + // Instead of simply (possibly) unpoisoning `temp` here, we should + // emit a read barrier for the previous class reference load. + // However this is not required in practice, as this is an + // intermediate/temporary reference and because the current + // concurrent copying collector keeps the from-space memory + // intact/accessible until the end of the marking phase (the + // concurrent copying collector may not in the future). + __ MaybeUnpoisonHeapReference(temp); // temp = temp->GetMethodAt(method_offset); __ LoadFromOffset(kLoadWord, temp, temp, method_offset); // T9 = temp->GetEntryPoint(); @@ -5692,7 +5754,7 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAF codegen_->NewTypeBssEntryPatch(cls->GetDexFile(), cls->GetTypeIndex()); bool reordering = __ SetReorder(false); codegen_->EmitPcRelativeAddressPlaceholderHigh(info, out, base_or_current_method_reg); - __ LoadFromOffset(kLoadWord, out, out, /* placeholder */ 0x5678); + GenerateGcRootFieldLoad(cls, out_loc, out, /* placeholder */ 0x5678); __ SetReorder(reordering); generate_null_check = true; break; @@ -5837,7 +5899,7 @@ void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) NO_THREAD_ codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex()); bool reordering = __ SetReorder(false); codegen_->EmitPcRelativeAddressPlaceholderHigh(info, out, base_or_current_method_reg); - __ LoadFromOffset(kLoadWord, out, out, /* placeholder */ 0x5678); + GenerateGcRootFieldLoad(load, out_loc, out, /* placeholder */ 0x5678); __ SetReorder(reordering); SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathMIPS(load); codegen_->AddSlowPath(slow_path); @@ -6059,6 +6121,8 @@ void LocationsBuilderMIPS::VisitNewArray(HNewArray* instruction) { } void InstructionCodeGeneratorMIPS::VisitNewArray(HNewArray* instruction) { + // Note: if heap poisoning is enabled, the entry point takes care + // of poisoning the reference. codegen_->InvokeRuntime(kQuickAllocArrayResolved, instruction, instruction->GetDexPc()); CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>(); } @@ -6076,6 +6140,8 @@ void LocationsBuilderMIPS::VisitNewInstance(HNewInstance* instruction) { } void InstructionCodeGeneratorMIPS::VisitNewInstance(HNewInstance* instruction) { + // Note: if heap poisoning is enabled, the entry point takes care + // of poisoning the reference. if (instruction->IsStringAlloc()) { // String is allocated through StringFactory. Call NewEmptyString entry point. Register temp = instruction->GetLocations()->GetTemp(0).AsRegister<Register>(); diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h index 47eba50248..0ccd80ab93 100644 --- a/compiler/optimizing/code_generator_mips.h +++ b/compiler/optimizing/code_generator_mips.h @@ -297,7 +297,6 @@ class InstructionCodeGeneratorMIPS : public InstructionCodeGenerator { void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction); void GenerateDivRemIntegral(HBinaryOperation* instruction); void HandleGoto(HInstruction* got, HBasicBlock* successor); - auto GetImplicitNullChecker(HInstruction* instruction); void GenPackedSwitchWithCompares(Register value_reg, int32_t lower_bound, uint32_t num_entries, diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc index 5be0da4011..138ebe6a25 100644 --- a/compiler/optimizing/code_generator_mips64.cc +++ b/compiler/optimizing/code_generator_mips64.cc @@ -1483,11 +1483,19 @@ void LocationsBuilderMIPS64::VisitArrayGet(HArrayGet* instruction) { } } +static auto GetImplicitNullChecker(HInstruction* instruction, CodeGeneratorMIPS64* codegen) { + auto null_checker = [codegen, instruction]() { + codegen->MaybeRecordImplicitNullCheck(instruction); + }; + return null_checker; +} + void InstructionCodeGeneratorMIPS64::VisitArrayGet(HArrayGet* instruction) { LocationSummary* locations = instruction->GetLocations(); GpuRegister obj = locations->InAt(0).AsRegister<GpuRegister>(); Location index = locations->InAt(1); uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction); + auto null_checker = GetImplicitNullChecker(instruction, codegen_); Primitive::Type type = instruction->GetType(); const bool maybe_compressed_char_at = mirror::kUseStringCompression && @@ -1498,10 +1506,10 @@ void InstructionCodeGeneratorMIPS64::VisitArrayGet(HArrayGet* instruction) { if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset; - __ LoadFromOffset(kLoadUnsignedByte, out, obj, offset); + __ LoadFromOffset(kLoadUnsignedByte, out, obj, offset, null_checker); } else { __ Daddu(TMP, obj, index.AsRegister<GpuRegister>()); - __ LoadFromOffset(kLoadUnsignedByte, out, TMP, data_offset); + __ LoadFromOffset(kLoadUnsignedByte, out, TMP, data_offset, null_checker); } break; } @@ -1511,10 +1519,10 @@ void InstructionCodeGeneratorMIPS64::VisitArrayGet(HArrayGet* instruction) { if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset; - __ LoadFromOffset(kLoadSignedByte, out, obj, offset); + __ LoadFromOffset(kLoadSignedByte, out, obj, offset, null_checker); } else { __ Daddu(TMP, obj, index.AsRegister<GpuRegister>()); - __ LoadFromOffset(kLoadSignedByte, out, TMP, data_offset); + __ LoadFromOffset(kLoadSignedByte, out, TMP, data_offset, null_checker); } break; } @@ -1524,11 +1532,11 @@ void InstructionCodeGeneratorMIPS64::VisitArrayGet(HArrayGet* instruction) { if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset; - __ LoadFromOffset(kLoadSignedHalfword, out, obj, offset); + __ LoadFromOffset(kLoadSignedHalfword, out, obj, offset, null_checker); } else { __ Dsll(TMP, index.AsRegister<GpuRegister>(), TIMES_2); __ Daddu(TMP, obj, TMP); - __ LoadFromOffset(kLoadSignedHalfword, out, TMP, data_offset); + __ LoadFromOffset(kLoadSignedHalfword, out, TMP, data_offset, null_checker); } break; } @@ -1537,8 +1545,7 @@ void InstructionCodeGeneratorMIPS64::VisitArrayGet(HArrayGet* instruction) { GpuRegister out = locations->Out().AsRegister<GpuRegister>(); if (maybe_compressed_char_at) { uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); - __ LoadFromOffset(kLoadWord, TMP, obj, count_offset); - codegen_->MaybeRecordImplicitNullCheck(instruction); + __ LoadFromOffset(kLoadWord, TMP, obj, count_offset, null_checker); __ Dext(TMP, TMP, 0, 1); static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, "Expecting 0=compressed, 1=uncompressed"); @@ -1563,7 +1570,8 @@ void InstructionCodeGeneratorMIPS64::VisitArrayGet(HArrayGet* instruction) { __ LoadFromOffset(kLoadUnsignedHalfword, out, obj, - data_offset + (const_index << TIMES_2)); + data_offset + (const_index << TIMES_2), + null_checker); } } else { GpuRegister index_reg = index.AsRegister<GpuRegister>(); @@ -1581,7 +1589,7 @@ void InstructionCodeGeneratorMIPS64::VisitArrayGet(HArrayGet* instruction) { } else { __ Dsll(TMP, index_reg, TIMES_2); __ Daddu(TMP, obj, TMP); - __ LoadFromOffset(kLoadUnsignedHalfword, out, TMP, data_offset); + __ LoadFromOffset(kLoadUnsignedHalfword, out, TMP, data_offset, null_checker); } } break; @@ -1595,11 +1603,11 @@ void InstructionCodeGeneratorMIPS64::VisitArrayGet(HArrayGet* instruction) { if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; - __ LoadFromOffset(load_type, out, obj, offset); + __ LoadFromOffset(load_type, out, obj, offset, null_checker); } else { __ Dsll(TMP, index.AsRegister<GpuRegister>(), TIMES_4); __ Daddu(TMP, obj, TMP); - __ LoadFromOffset(load_type, out, TMP, data_offset); + __ LoadFromOffset(load_type, out, TMP, data_offset, null_checker); } break; } @@ -1609,11 +1617,11 @@ void InstructionCodeGeneratorMIPS64::VisitArrayGet(HArrayGet* instruction) { if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset; - __ LoadFromOffset(kLoadDoubleword, out, obj, offset); + __ LoadFromOffset(kLoadDoubleword, out, obj, offset, null_checker); } else { __ Dsll(TMP, index.AsRegister<GpuRegister>(), TIMES_8); __ Daddu(TMP, obj, TMP); - __ LoadFromOffset(kLoadDoubleword, out, TMP, data_offset); + __ LoadFromOffset(kLoadDoubleword, out, TMP, data_offset, null_checker); } break; } @@ -1623,11 +1631,11 @@ void InstructionCodeGeneratorMIPS64::VisitArrayGet(HArrayGet* instruction) { if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; - __ LoadFpuFromOffset(kLoadWord, out, obj, offset); + __ LoadFpuFromOffset(kLoadWord, out, obj, offset, null_checker); } else { __ Dsll(TMP, index.AsRegister<GpuRegister>(), TIMES_4); __ Daddu(TMP, obj, TMP); - __ LoadFpuFromOffset(kLoadWord, out, TMP, data_offset); + __ LoadFpuFromOffset(kLoadWord, out, TMP, data_offset, null_checker); } break; } @@ -1637,11 +1645,11 @@ void InstructionCodeGeneratorMIPS64::VisitArrayGet(HArrayGet* instruction) { if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset; - __ LoadFpuFromOffset(kLoadDoubleword, out, obj, offset); + __ LoadFpuFromOffset(kLoadDoubleword, out, obj, offset, null_checker); } else { __ Dsll(TMP, index.AsRegister<GpuRegister>(), TIMES_8); __ Daddu(TMP, obj, TMP); - __ LoadFpuFromOffset(kLoadDoubleword, out, TMP, data_offset); + __ LoadFpuFromOffset(kLoadDoubleword, out, TMP, data_offset, null_checker); } break; } @@ -1650,8 +1658,10 @@ void InstructionCodeGeneratorMIPS64::VisitArrayGet(HArrayGet* instruction) { LOG(FATAL) << "Unreachable type " << instruction->GetType(); UNREACHABLE(); } - if (!maybe_compressed_char_at) { - codegen_->MaybeRecordImplicitNullCheck(instruction); + + if (type == Primitive::kPrimNot) { + GpuRegister out = locations->Out().AsRegister<GpuRegister>(); + __ MaybeUnpoisonHeapReference(out); } } @@ -1703,6 +1713,7 @@ void InstructionCodeGeneratorMIPS64::VisitArraySet(HArraySet* instruction) { bool needs_runtime_call = locations->WillCall(); bool needs_write_barrier = CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue()); + auto null_checker = GetImplicitNullChecker(instruction, codegen_); switch (value_type) { case Primitive::kPrimBoolean: @@ -1712,10 +1723,10 @@ void InstructionCodeGeneratorMIPS64::VisitArraySet(HArraySet* instruction) { if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset; - __ StoreToOffset(kStoreByte, value, obj, offset); + __ StoreToOffset(kStoreByte, value, obj, offset, null_checker); } else { __ Daddu(TMP, obj, index.AsRegister<GpuRegister>()); - __ StoreToOffset(kStoreByte, value, TMP, data_offset); + __ StoreToOffset(kStoreByte, value, TMP, data_offset, null_checker); } break; } @@ -1727,11 +1738,11 @@ void InstructionCodeGeneratorMIPS64::VisitArraySet(HArraySet* instruction) { if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset; - __ StoreToOffset(kStoreHalfword, value, obj, offset); + __ StoreToOffset(kStoreHalfword, value, obj, offset, null_checker); } else { __ Dsll(TMP, index.AsRegister<GpuRegister>(), TIMES_2); __ Daddu(TMP, obj, TMP); - __ StoreToOffset(kStoreHalfword, value, TMP, data_offset); + __ StoreToOffset(kStoreHalfword, value, TMP, data_offset, null_checker); } break; } @@ -1740,24 +1751,59 @@ void InstructionCodeGeneratorMIPS64::VisitArraySet(HArraySet* instruction) { case Primitive::kPrimNot: { if (!needs_runtime_call) { uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); + GpuRegister base_reg; GpuRegister value = locations->InAt(2).AsRegister<GpuRegister>(); if (index.IsConstant()) { - size_t offset = - (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; - __ StoreToOffset(kStoreWord, value, obj, offset); + data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4; + base_reg = obj; } else { DCHECK(index.IsRegister()) << index; __ Dsll(TMP, index.AsRegister<GpuRegister>(), TIMES_4); __ Daddu(TMP, obj, TMP); - __ StoreToOffset(kStoreWord, value, TMP, data_offset); + base_reg = TMP; + } + if (kPoisonHeapReferences && needs_write_barrier) { + // Note that in the case where `value` is a null reference, + // we do not enter this block, as a null reference does not + // need poisoning. + DCHECK_EQ(value_type, Primitive::kPrimNot); + // Use Sw() instead of StoreToOffset() in order to be able to + // hold the poisoned reference in AT and thus avoid allocating + // yet another temporary register. + if (index.IsConstant()) { + if (!IsInt<16>(static_cast<int32_t>(data_offset))) { + int16_t low16 = Low16Bits(data_offset); + // For consistency with StoreToOffset() and such treat data_offset as int32_t. + uint64_t high48 = static_cast<uint64_t>(static_cast<int32_t>(data_offset)) - low16; + int16_t upper16 = High16Bits(high48); + // Allow the full [-2GB,+2GB) range in case `low16` is negative and needs a + // compensatory 64KB added, which may push `high48` above 2GB and require + // the dahi instruction. + int16_t higher16 = High32Bits(high48) + ((upper16 < 0) ? 1 : 0); + __ Daui(TMP, obj, upper16); + if (higher16 != 0) { + __ Dahi(TMP, higher16); + } + base_reg = TMP; + data_offset = low16; + } + } else { + DCHECK(IsInt<16>(static_cast<int32_t>(data_offset))); + } + __ PoisonHeapReference(AT, value); + __ Sw(AT, base_reg, data_offset); + null_checker(); + } else { + __ StoreToOffset(kStoreWord, value, base_reg, data_offset, null_checker); } - codegen_->MaybeRecordImplicitNullCheck(instruction); if (needs_write_barrier) { DCHECK_EQ(value_type, Primitive::kPrimNot); codegen_->MarkGCCard(obj, value, instruction->GetValueCanBeNull()); } } else { DCHECK_EQ(value_type, Primitive::kPrimNot); + // Note: if heap poisoning is enabled, pAputObject takes care + // of poisoning the reference. codegen_->InvokeRuntime(kQuickAputObject, instruction, instruction->GetDexPc()); CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>(); } @@ -1770,11 +1816,11 @@ void InstructionCodeGeneratorMIPS64::VisitArraySet(HArraySet* instruction) { if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset; - __ StoreToOffset(kStoreDoubleword, value, obj, offset); + __ StoreToOffset(kStoreDoubleword, value, obj, offset, null_checker); } else { __ Dsll(TMP, index.AsRegister<GpuRegister>(), TIMES_8); __ Daddu(TMP, obj, TMP); - __ StoreToOffset(kStoreDoubleword, value, TMP, data_offset); + __ StoreToOffset(kStoreDoubleword, value, TMP, data_offset, null_checker); } break; } @@ -1786,11 +1832,11 @@ void InstructionCodeGeneratorMIPS64::VisitArraySet(HArraySet* instruction) { if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; - __ StoreFpuToOffset(kStoreWord, value, obj, offset); + __ StoreFpuToOffset(kStoreWord, value, obj, offset, null_checker); } else { __ Dsll(TMP, index.AsRegister<GpuRegister>(), TIMES_4); __ Daddu(TMP, obj, TMP); - __ StoreFpuToOffset(kStoreWord, value, TMP, data_offset); + __ StoreFpuToOffset(kStoreWord, value, TMP, data_offset, null_checker); } break; } @@ -1802,11 +1848,11 @@ void InstructionCodeGeneratorMIPS64::VisitArraySet(HArraySet* instruction) { if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset; - __ StoreFpuToOffset(kStoreDoubleword, value, obj, offset); + __ StoreFpuToOffset(kStoreDoubleword, value, obj, offset, null_checker); } else { __ Dsll(TMP, index.AsRegister<GpuRegister>(), TIMES_8); __ Daddu(TMP, obj, TMP); - __ StoreFpuToOffset(kStoreDoubleword, value, TMP, data_offset); + __ StoreFpuToOffset(kStoreDoubleword, value, TMP, data_offset, null_checker); } break; } @@ -1815,11 +1861,6 @@ void InstructionCodeGeneratorMIPS64::VisitArraySet(HArraySet* instruction) { LOG(FATAL) << "Unreachable type " << instruction->GetType(); UNREACHABLE(); } - - // Ints and objects are handled in the switch. - if (value_type != Primitive::kPrimInt && value_type != Primitive::kPrimNot) { - codegen_->MaybeRecordImplicitNullCheck(instruction); - } } void LocationsBuilderMIPS64::VisitBoundsCheck(HBoundsCheck* instruction) { @@ -1871,6 +1912,7 @@ void InstructionCodeGeneratorMIPS64::VisitCheckCast(HCheckCast* instruction) { __ Beqzc(obj, slow_path->GetExitLabel()); // Compare the class of `obj` with `cls`. __ LoadFromOffset(kLoadUnsignedWord, obj_cls, obj, mirror::Object::ClassOffset().Int32Value()); + __ MaybeUnpoisonHeapReference(obj_cls); __ Bnec(obj_cls, cls, slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); } @@ -3086,6 +3128,9 @@ void InstructionCodeGeneratorMIPS64::HandleFieldGet(HInstruction* instruction, LocationSummary* locations = instruction->GetLocations(); GpuRegister obj = locations->InAt(0).AsRegister<GpuRegister>(); LoadOperandType load_type = kLoadUnsignedByte; + uint32_t offset = field_info.GetFieldOffset().Uint32Value(); + auto null_checker = GetImplicitNullChecker(instruction, codegen_); + switch (type) { case Primitive::kPrimBoolean: load_type = kLoadUnsignedByte; @@ -3117,15 +3162,18 @@ void InstructionCodeGeneratorMIPS64::HandleFieldGet(HInstruction* instruction, if (!Primitive::IsFloatingPointType(type)) { DCHECK(locations->Out().IsRegister()); GpuRegister dst = locations->Out().AsRegister<GpuRegister>(); - __ LoadFromOffset(load_type, dst, obj, field_info.GetFieldOffset().Uint32Value()); + __ LoadFromOffset(load_type, dst, obj, offset, null_checker); } else { DCHECK(locations->Out().IsFpuRegister()); FpuRegister dst = locations->Out().AsFpuRegister<FpuRegister>(); - __ LoadFpuFromOffset(load_type, dst, obj, field_info.GetFieldOffset().Uint32Value()); + __ LoadFpuFromOffset(load_type, dst, obj, offset, null_checker); } - - codegen_->MaybeRecordImplicitNullCheck(instruction); // TODO: memory barrier? + + if (type == Primitive::kPrimNot) { + GpuRegister dst = locations->Out().AsRegister<GpuRegister>(); + __ MaybeUnpoisonHeapReference(dst); + } } void LocationsBuilderMIPS64::HandleFieldSet(HInstruction* instruction, @@ -3147,6 +3195,10 @@ void InstructionCodeGeneratorMIPS64::HandleFieldSet(HInstruction* instruction, LocationSummary* locations = instruction->GetLocations(); GpuRegister obj = locations->InAt(0).AsRegister<GpuRegister>(); StoreOperandType store_type = kStoreByte; + uint32_t offset = field_info.GetFieldOffset().Uint32Value(); + bool needs_write_barrier = CodeGenerator::StoreNeedsWriteBarrier(type, instruction->InputAt(1)); + auto null_checker = GetImplicitNullChecker(instruction, codegen_); + switch (type) { case Primitive::kPrimBoolean: case Primitive::kPrimByte: @@ -3172,16 +3224,24 @@ void InstructionCodeGeneratorMIPS64::HandleFieldSet(HInstruction* instruction, if (!Primitive::IsFloatingPointType(type)) { DCHECK(locations->InAt(1).IsRegister()); GpuRegister src = locations->InAt(1).AsRegister<GpuRegister>(); - __ StoreToOffset(store_type, src, obj, field_info.GetFieldOffset().Uint32Value()); + if (kPoisonHeapReferences && needs_write_barrier) { + // Note that in the case where `value` is a null reference, + // we do not enter this block, as a null reference does not + // need poisoning. + DCHECK_EQ(type, Primitive::kPrimNot); + __ PoisonHeapReference(TMP, src); + __ StoreToOffset(store_type, TMP, obj, offset, null_checker); + } else { + __ StoreToOffset(store_type, src, obj, offset, null_checker); + } } else { DCHECK(locations->InAt(1).IsFpuRegister()); FpuRegister src = locations->InAt(1).AsFpuRegister<FpuRegister>(); - __ StoreFpuToOffset(store_type, src, obj, field_info.GetFieldOffset().Uint32Value()); + __ StoreFpuToOffset(store_type, src, obj, offset, null_checker); } - codegen_->MaybeRecordImplicitNullCheck(instruction); // TODO: memory barriers? - if (CodeGenerator::StoreNeedsWriteBarrier(type, instruction->InputAt(1))) { + if (needs_write_barrier) { DCHECK(locations->InAt(1).IsRegister()); GpuRegister src = locations->InAt(1).AsRegister<GpuRegister>(); codegen_->MarkGCCard(obj, src, value_can_be_null); @@ -3247,6 +3307,7 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) { // Compare the class of `obj` with `cls`. __ LoadFromOffset(kLoadUnsignedWord, out, obj, mirror::Object::ClassOffset().Int32Value()); + __ MaybeUnpoisonHeapReference(out); if (instruction->IsExactCheck()) { // Classes must be equal for the instanceof to succeed. __ Xor(out, out, cls); @@ -3325,6 +3386,14 @@ void InstructionCodeGeneratorMIPS64::VisitInvokeInterface(HInvokeInterface* invo __ LoadFromOffset(kLoadUnsignedWord, temp, receiver.AsRegister<GpuRegister>(), class_offset); } codegen_->MaybeRecordImplicitNullCheck(invoke); + // Instead of simply (possibly) unpoisoning `temp` here, we should + // emit a read barrier for the previous class reference load. + // However this is not required in practice, as this is an + // intermediate/temporary reference and because the current + // concurrent copying collector keeps the from-space memory + // intact/accessible until the end of the marking phase (the + // concurrent copying collector may not in the future). + __ MaybeUnpoisonHeapReference(temp); __ LoadFromOffset(kLoadDoubleword, temp, temp, mirror::Class::ImtPtrOffset(kMips64PointerSize).Uint32Value()); uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement( @@ -3567,6 +3636,14 @@ void CodeGeneratorMIPS64::GenerateVirtualCall(HInvokeVirtual* invoke, Location t // temp = object->GetClass(); __ LoadFromOffset(kLoadUnsignedWord, temp, receiver, class_offset); MaybeRecordImplicitNullCheck(invoke); + // Instead of simply (possibly) unpoisoning `temp` here, we should + // emit a read barrier for the previous class reference load. + // However this is not required in practice, as this is an + // intermediate/temporary reference and because the current + // concurrent copying collector keeps the from-space memory + // intact/accessible until the end of the marking phase (the + // concurrent copying collector may not in the future). + __ MaybeUnpoisonHeapReference(temp); // temp = temp->GetMethodAt(method_offset); __ LoadFromOffset(kLoadDoubleword, temp, temp, method_offset); // T9 = temp->GetEntryPoint(); @@ -3666,8 +3743,8 @@ void InstructionCodeGeneratorMIPS64::VisitLoadClass(HLoadClass* cls) NO_THREAD_S case HLoadClass::LoadKind::kBssEntry: { CodeGeneratorMIPS64::PcRelativePatchInfo* info = codegen_->NewTypeBssEntryPatch(cls->GetDexFile(), cls->GetTypeIndex()); - codegen_->EmitPcRelativeAddressPlaceholderHigh(info, AT); - __ Lwu(out, AT, /* placeholder */ 0x5678); + codegen_->EmitPcRelativeAddressPlaceholderHigh(info, out); + GenerateGcRootFieldLoad(cls, out_loc, out, /* placeholder */ 0x5678); generate_null_check = true; break; } @@ -3773,8 +3850,8 @@ void InstructionCodeGeneratorMIPS64::VisitLoadString(HLoadString* load) NO_THREA DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); CodeGeneratorMIPS64::PcRelativePatchInfo* info = codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex()); - codegen_->EmitPcRelativeAddressPlaceholderHigh(info, AT); - __ Lwu(out, AT, /* placeholder */ 0x5678); + codegen_->EmitPcRelativeAddressPlaceholderHigh(info, out); + GenerateGcRootFieldLoad(load, out_loc, out, /* placeholder */ 0x5678); SlowPathCodeMIPS64* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathMIPS64(load); codegen_->AddSlowPath(slow_path); __ Beqzc(out, slow_path->GetEntryLabel()); @@ -3944,6 +4021,8 @@ void LocationsBuilderMIPS64::VisitNewArray(HNewArray* instruction) { } void InstructionCodeGeneratorMIPS64::VisitNewArray(HNewArray* instruction) { + // Note: if heap poisoning is enabled, the entry point takes care + // of poisoning the reference. codegen_->InvokeRuntime(kQuickAllocArrayResolved, instruction, instruction->GetDexPc()); CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>(); } @@ -3961,6 +4040,8 @@ void LocationsBuilderMIPS64::VisitNewInstance(HNewInstance* instruction) { } void InstructionCodeGeneratorMIPS64::VisitNewInstance(HNewInstance* instruction) { + // Note: if heap poisoning is enabled, the entry point takes care + // of poisoning the reference. if (instruction->IsStringAlloc()) { // String is allocated through StringFactory. Call NewEmptyString entry point. GpuRegister temp = instruction->GetLocations()->GetTemp(0).AsRegister<GpuRegister>(); diff --git a/compiler/optimizing/code_sinking.cc b/compiler/optimizing/code_sinking.cc new file mode 100644 index 0000000000..dc3d378e75 --- /dev/null +++ b/compiler/optimizing/code_sinking.cc @@ -0,0 +1,403 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "code_sinking.h" + +#include "common_dominator.h" +#include "nodes.h" + +namespace art { + +void CodeSinking::Run() { + HBasicBlock* exit = graph_->GetExitBlock(); + if (exit == nullptr) { + // Infinite loop, just bail. + return; + } + // TODO(ngeoffray): we do not profile branches yet, so use throw instructions + // as an indicator of an uncommon branch. + for (HBasicBlock* exit_predecessor : exit->GetPredecessors()) { + if (exit_predecessor->GetLastInstruction()->IsThrow()) { + SinkCodeToUncommonBranch(exit_predecessor); + } + } +} + +static bool IsInterestingInstruction(HInstruction* instruction) { + // Instructions from the entry graph (for example constants) are never interesting to move. + if (instruction->GetBlock() == instruction->GetBlock()->GetGraph()->GetEntryBlock()) { + return false; + } + // We want to move moveable instructions that cannot throw, as well as + // heap stores and allocations. + + // Volatile stores cannot be moved. + if (instruction->IsInstanceFieldSet()) { + if (instruction->AsInstanceFieldSet()->IsVolatile()) { + return false; + } + } + + // Check allocations first, as they can throw, but it is safe to move them. + if (instruction->IsNewInstance() || instruction->IsNewArray()) { + return true; + } + + // All other instructions that can throw cannot be moved. + if (instruction->CanThrow()) { + return false; + } + + // We can only store on local allocations. Other heap references can + // be escaping. Note that allocations can escape too, but we only move + // allocations if their users can move to, or are in the list of + // post dominated blocks. + if (instruction->IsInstanceFieldSet()) { + if (!instruction->InputAt(0)->IsNewInstance()) { + return false; + } + } + + if (instruction->IsArraySet()) { + if (!instruction->InputAt(0)->IsNewArray()) { + return false; + } + } + + // Heap accesses cannot go pass instructions that have memory side effects, which + // we are not tracking here. Note that the load/store elimination optimization + // runs before this optimization, and should have removed interesting ones. + // In theory, we could handle loads of local allocations, but this is currently + // hard to test, as LSE removes them. + if (instruction->IsStaticFieldGet() || + instruction->IsInstanceFieldGet() || + instruction->IsArrayGet()) { + return false; + } + + if (instruction->IsInstanceFieldSet() || + instruction->IsArraySet() || + instruction->CanBeMoved()) { + return true; + } + return false; +} + +static void AddInstruction(HInstruction* instruction, + const ArenaBitVector& processed_instructions, + const ArenaBitVector& discard_blocks, + ArenaVector<HInstruction*>* worklist) { + // Add to the work list if the instruction is not in the list of blocks + // to discard, hasn't been already processed and is of interest. + if (!discard_blocks.IsBitSet(instruction->GetBlock()->GetBlockId()) && + !processed_instructions.IsBitSet(instruction->GetId()) && + IsInterestingInstruction(instruction)) { + worklist->push_back(instruction); + } +} + +static void AddInputs(HInstruction* instruction, + const ArenaBitVector& processed_instructions, + const ArenaBitVector& discard_blocks, + ArenaVector<HInstruction*>* worklist) { + for (HInstruction* input : instruction->GetInputs()) { + AddInstruction(input, processed_instructions, discard_blocks, worklist); + } +} + +static void AddInputs(HBasicBlock* block, + const ArenaBitVector& processed_instructions, + const ArenaBitVector& discard_blocks, + ArenaVector<HInstruction*>* worklist) { + for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { + AddInputs(it.Current(), processed_instructions, discard_blocks, worklist); + } + for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { + AddInputs(it.Current(), processed_instructions, discard_blocks, worklist); + } +} + +static bool ShouldFilterUse(HInstruction* instruction, + HInstruction* user, + const ArenaBitVector& post_dominated) { + if (instruction->IsNewInstance()) { + return user->IsInstanceFieldSet() && + (user->InputAt(0) == instruction) && + !post_dominated.IsBitSet(user->GetBlock()->GetBlockId()); + } else if (instruction->IsNewArray()) { + return user->IsArraySet() && + (user->InputAt(0) == instruction) && + !post_dominated.IsBitSet(user->GetBlock()->GetBlockId()); + } + return false; +} + + +// Find the ideal position for moving `instruction`. If `filter` is true, +// we filter out store instructions to that instruction, which are processed +// first in the step (3) of the sinking algorithm. +// This method is tailored to the sinking algorithm, unlike +// the generic HInstruction::MoveBeforeFirstUserAndOutOfLoops. +static HInstruction* FindIdealPosition(HInstruction* instruction, + const ArenaBitVector& post_dominated, + bool filter = false) { + DCHECK(!instruction->IsPhi()); // Makes no sense for Phi. + + // Find the target block. + CommonDominator finder(/* start_block */ nullptr); + for (const HUseListNode<HInstruction*>& use : instruction->GetUses()) { + HInstruction* user = use.GetUser(); + if (!(filter && ShouldFilterUse(instruction, user, post_dominated))) { + finder.Update(user->IsPhi() + ? user->GetBlock()->GetPredecessors()[use.GetIndex()] + : user->GetBlock()); + } + } + for (const HUseListNode<HEnvironment*>& use : instruction->GetEnvUses()) { + DCHECK(!use.GetUser()->GetHolder()->IsPhi()); + DCHECK(!filter || !ShouldFilterUse(instruction, use.GetUser()->GetHolder(), post_dominated)); + finder.Update(use.GetUser()->GetHolder()->GetBlock()); + } + HBasicBlock* target_block = finder.Get(); + if (target_block == nullptr) { + // No user we can go next to? Likely a LSE or DCE limitation. + return nullptr; + } + + // Move to the first dominator not in a loop, if we can. + while (target_block->IsInLoop()) { + if (!post_dominated.IsBitSet(target_block->GetDominator()->GetBlockId())) { + break; + } + target_block = target_block->GetDominator(); + DCHECK(target_block != nullptr); + } + + // Find insertion position. No need to filter anymore, as we have found a + // target block. + HInstruction* insert_pos = nullptr; + for (const HUseListNode<HInstruction*>& use : instruction->GetUses()) { + if (use.GetUser()->GetBlock() == target_block && + (insert_pos == nullptr || use.GetUser()->StrictlyDominates(insert_pos))) { + insert_pos = use.GetUser(); + } + } + for (const HUseListNode<HEnvironment*>& use : instruction->GetEnvUses()) { + HInstruction* user = use.GetUser()->GetHolder(); + if (user->GetBlock() == target_block && + (insert_pos == nullptr || user->StrictlyDominates(insert_pos))) { + insert_pos = user; + } + } + if (insert_pos == nullptr) { + // No user in `target_block`, insert before the control flow instruction. + insert_pos = target_block->GetLastInstruction(); + DCHECK(insert_pos->IsControlFlow()); + // Avoid splitting HCondition from HIf to prevent unnecessary materialization. + if (insert_pos->IsIf()) { + HInstruction* if_input = insert_pos->AsIf()->InputAt(0); + if (if_input == insert_pos->GetPrevious()) { + insert_pos = if_input; + } + } + } + DCHECK(!insert_pos->IsPhi()); + return insert_pos; +} + + +void CodeSinking::SinkCodeToUncommonBranch(HBasicBlock* end_block) { + // Local allocator to discard data structures created below at the end of + // this optimization. + ArenaAllocator allocator(graph_->GetArena()->GetArenaPool()); + + size_t number_of_instructions = graph_->GetCurrentInstructionId(); + ArenaVector<HInstruction*> worklist(allocator.Adapter(kArenaAllocMisc)); + ArenaBitVector processed_instructions(&allocator, number_of_instructions, /* expandable */ false); + ArenaBitVector post_dominated(&allocator, graph_->GetBlocks().size(), /* expandable */ false); + ArenaBitVector instructions_that_can_move( + &allocator, number_of_instructions, /* expandable */ false); + ArenaVector<HInstruction*> move_in_order(allocator.Adapter(kArenaAllocMisc)); + + // Step (1): Visit post order to get a subset of blocks post dominated by `end_block`. + // TODO(ngeoffray): Getting the full set of post-dominated shoud be done by + // computint the post dominator tree, but that could be too time consuming. Also, + // we should start the analysis from blocks dominated by an uncommon branch, but we + // don't profile branches yet. + bool found_block = false; + for (HBasicBlock* block : graph_->GetPostOrder()) { + if (block == end_block) { + found_block = true; + post_dominated.SetBit(block->GetBlockId()); + } else if (found_block) { + bool is_post_dominated = true; + if (block->GetSuccessors().empty()) { + // We currently bail for loops. + is_post_dominated = false; + } else { + for (HBasicBlock* successor : block->GetSuccessors()) { + if (!post_dominated.IsBitSet(successor->GetBlockId())) { + is_post_dominated = false; + break; + } + } + } + if (is_post_dominated) { + post_dominated.SetBit(block->GetBlockId()); + } + } + } + + // Now that we have found a subset of post-dominated blocks, add to the worklist all inputs + // of instructions in these blocks that are not themselves in these blocks. + // Also find the common dominator of the found post dominated blocks, to help filtering + // out un-movable uses in step (2). + CommonDominator finder(end_block); + for (size_t i = 0, e = graph_->GetBlocks().size(); i < e; ++i) { + if (post_dominated.IsBitSet(i)) { + finder.Update(graph_->GetBlocks()[i]); + AddInputs(graph_->GetBlocks()[i], processed_instructions, post_dominated, &worklist); + } + } + HBasicBlock* common_dominator = finder.Get(); + + // Step (2): iterate over the worklist to find sinking candidates. + while (!worklist.empty()) { + HInstruction* instruction = worklist.back(); + if (processed_instructions.IsBitSet(instruction->GetId())) { + // The instruction has already been processed, continue. This happens + // when the instruction is the input/user of multiple instructions. + worklist.pop_back(); + continue; + } + bool all_users_in_post_dominated_blocks = true; + bool can_move = true; + // Check users of the instruction. + for (const HUseListNode<HInstruction*>& use : instruction->GetUses()) { + HInstruction* user = use.GetUser(); + if (!post_dominated.IsBitSet(user->GetBlock()->GetBlockId()) && + !instructions_that_can_move.IsBitSet(user->GetId())) { + all_users_in_post_dominated_blocks = false; + // If we've already processed this user, or the user cannot be moved, or + // is not dominating the post dominated blocks, bail. + // TODO(ngeoffray): The domination check is an approximation. We should + // instead check if the dominated blocks post dominate the user's block, + // but we do not have post dominance information here. + if (processed_instructions.IsBitSet(user->GetId()) || + !IsInterestingInstruction(user) || + !user->GetBlock()->Dominates(common_dominator)) { + can_move = false; + break; + } + } + } + + // Check environment users of the instruction. Some of these users require + // the instruction not to move. + if (all_users_in_post_dominated_blocks) { + for (const HUseListNode<HEnvironment*>& use : instruction->GetEnvUses()) { + HEnvironment* environment = use.GetUser(); + HInstruction* user = environment->GetHolder(); + if (!post_dominated.IsBitSet(user->GetBlock()->GetBlockId())) { + if (graph_->IsDebuggable() || + user->IsDeoptimize() || + user->CanThrowIntoCatchBlock() || + (user->IsSuspendCheck() && graph_->IsCompilingOsr())) { + can_move = false; + break; + } + } + } + } + if (!can_move) { + // Instruction cannot be moved, mark it as processed and remove it from the work + // list. + processed_instructions.SetBit(instruction->GetId()); + worklist.pop_back(); + } else if (all_users_in_post_dominated_blocks) { + // Instruction is a candidate for being sunk. Mark it as such, remove it from the + // work list, and add its inputs to the work list. + instructions_that_can_move.SetBit(instruction->GetId()); + move_in_order.push_back(instruction); + processed_instructions.SetBit(instruction->GetId()); + worklist.pop_back(); + AddInputs(instruction, processed_instructions, post_dominated, &worklist); + // Drop the environment use not in the list of post-dominated block. This is + // to help step (3) of this optimization, when we start moving instructions + // closer to their use. + for (const HUseListNode<HEnvironment*>& use : instruction->GetEnvUses()) { + HEnvironment* environment = use.GetUser(); + HInstruction* user = environment->GetHolder(); + if (!post_dominated.IsBitSet(user->GetBlock()->GetBlockId())) { + environment->RemoveAsUserOfInput(use.GetIndex()); + environment->SetRawEnvAt(use.GetIndex(), nullptr); + } + } + } else { + // The information we have on the users was not enough to decide whether the + // instruction could be moved. + // Add the users to the work list, and keep the instruction in the work list + // to process it again once all users have been processed. + for (const HUseListNode<HInstruction*>& use : instruction->GetUses()) { + AddInstruction(use.GetUser(), processed_instructions, post_dominated, &worklist); + } + } + } + + // Make sure we process instructions in dominated order. This is required for heap + // stores. + std::sort(move_in_order.begin(), move_in_order.end(), [](HInstruction* a, HInstruction* b) { + return b->StrictlyDominates(a); + }); + + // Step (3): Try to move sinking candidates. + for (HInstruction* instruction : move_in_order) { + HInstruction* position = nullptr; + if (instruction->IsArraySet() || instruction->IsInstanceFieldSet()) { + if (!instructions_that_can_move.IsBitSet(instruction->InputAt(0)->GetId())) { + // A store can trivially move, but it can safely do so only if the heap + // location it stores to can also move. + // TODO(ngeoffray): Handle allocation/store cycles by pruning these instructions + // from the set and all their inputs. + continue; + } + // Find the position of the instruction we're storing into, filtering out this + // store and all other stores to that instruction. + position = FindIdealPosition(instruction->InputAt(0), post_dominated, /* filter */ true); + + // The position needs to be dominated by the store, in order for the store to move there. + if (position == nullptr || !instruction->GetBlock()->Dominates(position->GetBlock())) { + continue; + } + } else { + // Find the ideal position within the post dominated blocks. + position = FindIdealPosition(instruction, post_dominated); + if (position == nullptr) { + continue; + } + } + // Bail if we could not find a position in the post dominated blocks (for example, + // if there are multiple users whose common dominator is not in the list of + // post dominated blocks). + if (!post_dominated.IsBitSet(position->GetBlock()->GetBlockId())) { + continue; + } + MaybeRecordStat(MethodCompilationStat::kInstructionSunk); + instruction->MoveBefore(position, /* ensure_safety */ false); + } +} + +} // namespace art diff --git a/compiler/optimizing/code_sinking.h b/compiler/optimizing/code_sinking.h new file mode 100644 index 0000000000..59cda52a8c --- /dev/null +++ b/compiler/optimizing/code_sinking.h @@ -0,0 +1,48 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_CODE_SINKING_H_ +#define ART_COMPILER_OPTIMIZING_CODE_SINKING_H_ + +#include "nodes.h" +#include "optimization.h" + +namespace art { + +/** + * Optimization pass to move instructions into uncommon branches, + * when it is safe to do so. + */ +class CodeSinking : public HOptimization { + public: + CodeSinking(HGraph* graph, OptimizingCompilerStats* stats) + : HOptimization(graph, kCodeSinkingPassName, stats) {} + + void Run() OVERRIDE; + + static constexpr const char* kCodeSinkingPassName = "code_sinking"; + + private: + // Try to move code only used by `end_block` and all its post-dominated / dominated + // blocks, to these blocks. + void SinkCodeToUncommonBranch(HBasicBlock* end_block); + + DISALLOW_COPY_AND_ASSIGN(CodeSinking); +}; + +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_CODE_SINKING_H_ diff --git a/compiler/optimizing/common_dominator.h b/compiler/optimizing/common_dominator.h index b459d24d7c..9f012cfbb2 100644 --- a/compiler/optimizing/common_dominator.h +++ b/compiler/optimizing/common_dominator.h @@ -36,12 +36,16 @@ class CommonDominator { // Create a finder starting with a given block. explicit CommonDominator(HBasicBlock* block) : dominator_(block), chain_length_(ChainLength(block)) { - DCHECK(block != nullptr); } // Update the common dominator with another block. void Update(HBasicBlock* block) { DCHECK(block != nullptr); + if (dominator_ == nullptr) { + dominator_ = block; + chain_length_ = ChainLength(block); + return; + } HBasicBlock* block2 = dominator_; DCHECK(block2 != nullptr); if (block == block2) { diff --git a/compiler/optimizing/induction_var_range.cc b/compiler/optimizing/induction_var_range.cc index 5539413aad..3dfe17647c 100644 --- a/compiler/optimizing/induction_var_range.cc +++ b/compiler/optimizing/induction_var_range.cc @@ -57,21 +57,27 @@ static bool IsIntAndGet(HInstruction* instruction, int64_t* value) { return false; } -/** Returns b^e for b,e >= 1. Sets overflow if arithmetic wrap-around occurred. */ +/** Computes a * b for a,b > 0 (at least until first overflow happens). */ +static int64_t SafeMul(int64_t a, int64_t b, /*out*/ bool* overflow) { + if (a > 0 && b > 0 && a > (std::numeric_limits<int64_t>::max() / b)) { + *overflow = true; + } + return a * b; +} + +/** Returns b^e for b,e > 0. Sets overflow if arithmetic wrap-around occurred. */ static int64_t IntPow(int64_t b, int64_t e, /*out*/ bool* overflow) { - DCHECK_GE(b, 1); - DCHECK_GE(e, 1); + DCHECK_LT(0, b); + DCHECK_LT(0, e); int64_t pow = 1; while (e) { if (e & 1) { - int64_t oldpow = pow; - pow *= b; - if (pow < oldpow) { - *overflow = true; - } + pow = SafeMul(pow, b, overflow); } e >>= 1; - b *= b; + if (e) { + b = SafeMul(b, b, overflow); + } } return pow; } @@ -377,6 +383,53 @@ bool InductionVarRange::IsFinite(HLoopInformation* loop, /*out*/ int64_t* tc) co return false; } +bool InductionVarRange::IsUnitStride(HInstruction* instruction, + /*out*/ HInstruction** offset) const { + HLoopInformation* loop = nullptr; + HInductionVarAnalysis::InductionInfo* info = nullptr; + HInductionVarAnalysis::InductionInfo* trip = nullptr; + if (HasInductionInfo(instruction, instruction, &loop, &info, &trip)) { + if (info->induction_class == HInductionVarAnalysis::kLinear && + info->op_b->operation == HInductionVarAnalysis::kFetch) { + int64_t stride_value = 0; + if (IsConstant(info->op_a, kExact, &stride_value) && stride_value == 1) { + int64_t off_value = 0; + if (IsConstant(info->op_b, kExact, &off_value) && off_value == 0) { + *offset = nullptr; + } else { + *offset = info->op_b->fetch; + } + return true; + } + } + } + return false; +} + +HInstruction* InductionVarRange::GenerateTripCount(HLoopInformation* loop, + HGraph* graph, + HBasicBlock* block) { + HInductionVarAnalysis::InductionInfo *trip = + induction_analysis_->LookupInfo(loop, GetLoopControl(loop)); + if (trip != nullptr && !IsUnsafeTripCount(trip)) { + HInstruction* taken_test = nullptr; + HInstruction* trip_expr = nullptr; + if (IsBodyTripCount(trip)) { + if (!GenerateCode(trip->op_b, nullptr, graph, block, &taken_test, false, false)) { + return nullptr; + } + } + if (GenerateCode(trip->op_a, nullptr, graph, block, &trip_expr, false, false)) { + if (taken_test != nullptr) { + HInstruction* zero = graph->GetConstant(trip->type, 0); + trip_expr = Insert(block, new (graph->GetArena()) HSelect(taken_test, trip_expr, zero, kNoDexPc)); + } + return trip_expr; + } + } + return nullptr; +} + // // Private class methods. // @@ -1157,12 +1210,15 @@ bool InductionVarRange::GenerateCode(HInductionVarAnalysis::InductionInfo* info, HInstruction* opb = nullptr; switch (info->induction_class) { case HInductionVarAnalysis::kInvariant: - // Invariants (note that even though is_min does not impact code generation for - // invariants, some effort is made to keep this parameter consistent). + // Invariants (note that since invariants only have other invariants as + // sub expressions, viz. no induction, there is no need to adjust is_min). switch (info->operation) { case HInductionVarAnalysis::kAdd: - case HInductionVarAnalysis::kRem: // no proper is_min for second arg - case HInductionVarAnalysis::kXor: // no proper is_min for second arg + case HInductionVarAnalysis::kSub: + case HInductionVarAnalysis::kMul: + case HInductionVarAnalysis::kDiv: + case HInductionVarAnalysis::kRem: + case HInductionVarAnalysis::kXor: case HInductionVarAnalysis::kLT: case HInductionVarAnalysis::kLE: case HInductionVarAnalysis::kGT: @@ -1174,6 +1230,12 @@ bool InductionVarRange::GenerateCode(HInductionVarAnalysis::InductionInfo* info, switch (info->operation) { case HInductionVarAnalysis::kAdd: operation = new (graph->GetArena()) HAdd(type, opa, opb); break; + case HInductionVarAnalysis::kSub: + operation = new (graph->GetArena()) HSub(type, opa, opb); break; + case HInductionVarAnalysis::kMul: + operation = new (graph->GetArena()) HMul(type, opa, opb, kNoDexPc); break; + case HInductionVarAnalysis::kDiv: + operation = new (graph->GetArena()) HDiv(type, opa, opb, kNoDexPc); break; case HInductionVarAnalysis::kRem: operation = new (graph->GetArena()) HRem(type, opa, opb, kNoDexPc); break; case HInductionVarAnalysis::kXor: @@ -1194,16 +1256,7 @@ bool InductionVarRange::GenerateCode(HInductionVarAnalysis::InductionInfo* info, return true; } break; - case HInductionVarAnalysis::kSub: // second reversed! - if (GenerateCode(info->op_a, trip, graph, block, &opa, in_body, is_min) && - GenerateCode(info->op_b, trip, graph, block, &opb, in_body, !is_min)) { - if (graph != nullptr) { - *result = Insert(block, new (graph->GetArena()) HSub(type, opa, opb)); - } - return true; - } - break; - case HInductionVarAnalysis::kNeg: // reversed! + case HInductionVarAnalysis::kNeg: if (GenerateCode(info->op_b, trip, graph, block, &opb, in_body, !is_min)) { if (graph != nullptr) { *result = Insert(block, new (graph->GetArena()) HNeg(type, opb)); @@ -1240,9 +1293,9 @@ bool InductionVarRange::GenerateCode(HInductionVarAnalysis::InductionInfo* info, } } break; - default: - break; - } + case HInductionVarAnalysis::kNop: + LOG(FATAL) << "unexpected invariant nop"; + } // switch invariant operation break; case HInductionVarAnalysis::kLinear: { // Linear induction a * i + b, for normalized 0 <= i < TC. For ranges, this should @@ -1293,7 +1346,7 @@ bool InductionVarRange::GenerateCode(HInductionVarAnalysis::InductionInfo* info, } break; } - } + } // switch induction class } return false; } diff --git a/compiler/optimizing/induction_var_range.h b/compiler/optimizing/induction_var_range.h index 6c424b78b9..0858d73982 100644 --- a/compiler/optimizing/induction_var_range.h +++ b/compiler/optimizing/induction_var_range.h @@ -24,7 +24,8 @@ namespace art { /** * This class implements range analysis on expressions within loops. It takes the results * of induction variable analysis in the constructor and provides a public API to obtain - * a conservative lower and upper bound value on each instruction in the HIR. + * a conservative lower and upper bound value or last value on each instruction in the HIR. + * The public API also provides a few general-purpose utility methods related to induction. * * The range analysis is done with a combination of symbolic and partial integral evaluation * of expressions. The analysis avoids complications with wrap-around arithmetic on the integral @@ -154,6 +155,19 @@ class InductionVarRange { */ bool IsFinite(HLoopInformation* loop, /*out*/ int64_t* tc) const; + /** + * Checks if instruction is a unit stride induction inside the closest enveloping loop. + * Returns invariant offset on success. + */ + bool IsUnitStride(HInstruction* instruction, /*out*/ HInstruction** offset) const; + + /** + * Generates the trip count expression for the given loop. Code is generated in given block + * and graph. The expression is guarded by a taken test if needed. Returns the trip count + * expression on success or null otherwise. + */ + HInstruction* GenerateTripCount(HLoopInformation* loop, HGraph* graph, HBasicBlock* block); + private: /* * Enum used in IsConstant() request. diff --git a/compiler/optimizing/induction_var_range_test.cc b/compiler/optimizing/induction_var_range_test.cc index d81817fb09..fcdf8eb7dc 100644 --- a/compiler/optimizing/induction_var_range_test.cc +++ b/compiler/optimizing/induction_var_range_test.cc @@ -48,6 +48,11 @@ class InductionVarRangeTest : public CommonCompilerTest { EXPECT_EQ(v1.is_known, v2.is_known); } + void ExpectInt(int32_t value, HInstruction* i) { + ASSERT_TRUE(i->IsIntConstant()); + EXPECT_EQ(value, i->AsIntConstant()->GetValue()); + } + // // Construction methods. // @@ -757,10 +762,20 @@ TEST_F(InductionVarRangeTest, ConstantTripCountUp) { // Last value (unsimplified). HInstruction* last = range_.GenerateLastValue(phi, graph_, loop_preheader_); ASSERT_TRUE(last->IsAdd()); - ASSERT_TRUE(last->InputAt(0)->IsIntConstant()); - EXPECT_EQ(1000, last->InputAt(0)->AsIntConstant()->GetValue()); - ASSERT_TRUE(last->InputAt(1)->IsIntConstant()); - EXPECT_EQ(0, last->InputAt(1)->AsIntConstant()->GetValue()); + ExpectInt(1000, last->InputAt(0)); + ExpectInt(0, last->InputAt(1)); + + // Loop logic. + int64_t tc = 0; + EXPECT_TRUE(range_.IsFinite(loop_header_->GetLoopInformation(), &tc)); + EXPECT_EQ(1000, tc); + HInstruction* offset = nullptr; + EXPECT_TRUE(range_.IsUnitStride(phi, &offset)); + EXPECT_TRUE(offset == nullptr); + HInstruction* tce = range_.GenerateTripCount( + loop_header_->GetLoopInformation(), graph_, loop_preheader_); + ASSERT_TRUE(tce != nullptr); + ExpectInt(1000, tce); } TEST_F(InductionVarRangeTest, ConstantTripCountDown) { @@ -799,15 +814,27 @@ TEST_F(InductionVarRangeTest, ConstantTripCountDown) { // Last value (unsimplified). HInstruction* last = range_.GenerateLastValue(phi, graph_, loop_preheader_); ASSERT_TRUE(last->IsSub()); - ASSERT_TRUE(last->InputAt(0)->IsIntConstant()); - EXPECT_EQ(1000, last->InputAt(0)->AsIntConstant()->GetValue()); + ExpectInt(1000, last->InputAt(0)); ASSERT_TRUE(last->InputAt(1)->IsNeg()); last = last->InputAt(1)->InputAt(0); ASSERT_TRUE(last->IsSub()); - ASSERT_TRUE(last->InputAt(0)->IsIntConstant()); - EXPECT_EQ(0, last->InputAt(0)->AsIntConstant()->GetValue()); - ASSERT_TRUE(last->InputAt(1)->IsIntConstant()); - EXPECT_EQ(1000, last->InputAt(1)->AsIntConstant()->GetValue()); + ExpectInt(0, last->InputAt(0)); + ExpectInt(1000, last->InputAt(1)); + + // Loop logic. + int64_t tc = 0; + EXPECT_TRUE(range_.IsFinite(loop_header_->GetLoopInformation(), &tc)); + EXPECT_EQ(1000, tc); + HInstruction* offset = nullptr; + EXPECT_FALSE(range_.IsUnitStride(phi, &offset)); + HInstruction* tce = range_.GenerateTripCount( + loop_header_->GetLoopInformation(), graph_, loop_preheader_); + ASSERT_TRUE(tce != nullptr); + ASSERT_TRUE(tce->IsNeg()); + last = tce->InputAt(0); + EXPECT_TRUE(last->IsSub()); + ExpectInt(0, last->InputAt(0)); + ExpectInt(1000, last->InputAt(1)); } TEST_F(InductionVarRangeTest, SymbolicTripCountUp) { @@ -851,27 +878,22 @@ TEST_F(InductionVarRangeTest, SymbolicTripCountUp) { // Verify lower is 0+0. ASSERT_TRUE(lower != nullptr); ASSERT_TRUE(lower->IsAdd()); - ASSERT_TRUE(lower->InputAt(0)->IsIntConstant()); - EXPECT_EQ(0, lower->InputAt(0)->AsIntConstant()->GetValue()); - ASSERT_TRUE(lower->InputAt(1)->IsIntConstant()); - EXPECT_EQ(0, lower->InputAt(1)->AsIntConstant()->GetValue()); + ExpectInt(0, lower->InputAt(0)); + ExpectInt(0, lower->InputAt(1)); // Verify upper is (V-1)+0. ASSERT_TRUE(upper != nullptr); ASSERT_TRUE(upper->IsAdd()); ASSERT_TRUE(upper->InputAt(0)->IsSub()); EXPECT_TRUE(upper->InputAt(0)->InputAt(0)->IsParameterValue()); - ASSERT_TRUE(upper->InputAt(0)->InputAt(1)->IsIntConstant()); - EXPECT_EQ(1, upper->InputAt(0)->InputAt(1)->AsIntConstant()->GetValue()); - ASSERT_TRUE(upper->InputAt(1)->IsIntConstant()); - EXPECT_EQ(0, upper->InputAt(1)->AsIntConstant()->GetValue()); + ExpectInt(1, upper->InputAt(0)->InputAt(1)); + ExpectInt(0, upper->InputAt(1)); // Verify taken-test is 0<V. HInstruction* taken = range_.GenerateTakenTest(increment_, graph_, loop_preheader_); ASSERT_TRUE(taken != nullptr); ASSERT_TRUE(taken->IsLessThan()); - ASSERT_TRUE(taken->InputAt(0)->IsIntConstant()); - EXPECT_EQ(0, taken->InputAt(0)->AsIntConstant()->GetValue()); + ExpectInt(0, taken->InputAt(0)); EXPECT_TRUE(taken->InputAt(1)->IsParameterValue()); // Replacement. @@ -880,6 +902,21 @@ TEST_F(InductionVarRangeTest, SymbolicTripCountUp) { EXPECT_FALSE(needs_finite_test); ExpectEqual(Value(1), v1); ExpectEqual(Value(y_, 1, 0), v2); + + // Loop logic. + int64_t tc = 0; + EXPECT_TRUE(range_.IsFinite(loop_header_->GetLoopInformation(), &tc)); + EXPECT_EQ(0, tc); // unknown + HInstruction* offset = nullptr; + EXPECT_TRUE(range_.IsUnitStride(phi, &offset)); + EXPECT_TRUE(offset == nullptr); + HInstruction* tce = range_.GenerateTripCount( + loop_header_->GetLoopInformation(), graph_, loop_preheader_); + ASSERT_TRUE(tce != nullptr); + EXPECT_TRUE(tce->IsSelect()); // guarded by taken-test + ExpectInt(0, tce->InputAt(0)); + EXPECT_TRUE(tce->InputAt(1)->IsParameterValue()); + EXPECT_TRUE(tce->InputAt(2)->IsLessThan()); } TEST_F(InductionVarRangeTest, SymbolicTripCountDown) { @@ -923,32 +960,26 @@ TEST_F(InductionVarRangeTest, SymbolicTripCountDown) { // Verify lower is 1000-((1000-V)-1). ASSERT_TRUE(lower != nullptr); ASSERT_TRUE(lower->IsSub()); - ASSERT_TRUE(lower->InputAt(0)->IsIntConstant()); - EXPECT_EQ(1000, lower->InputAt(0)->AsIntConstant()->GetValue()); + ExpectInt(1000, lower->InputAt(0)); lower = lower->InputAt(1); ASSERT_TRUE(lower->IsSub()); - ASSERT_TRUE(lower->InputAt(1)->IsIntConstant()); - EXPECT_EQ(1, lower->InputAt(1)->AsIntConstant()->GetValue()); + ExpectInt(1, lower->InputAt(1)); lower = lower->InputAt(0); ASSERT_TRUE(lower->IsSub()); - ASSERT_TRUE(lower->InputAt(0)->IsIntConstant()); - EXPECT_EQ(1000, lower->InputAt(0)->AsIntConstant()->GetValue()); + ExpectInt(1000, lower->InputAt(0)); EXPECT_TRUE(lower->InputAt(1)->IsParameterValue()); // Verify upper is 1000-0. ASSERT_TRUE(upper != nullptr); ASSERT_TRUE(upper->IsSub()); - ASSERT_TRUE(upper->InputAt(0)->IsIntConstant()); - EXPECT_EQ(1000, upper->InputAt(0)->AsIntConstant()->GetValue()); - ASSERT_TRUE(upper->InputAt(1)->IsIntConstant()); - EXPECT_EQ(0, upper->InputAt(1)->AsIntConstant()->GetValue()); + ExpectInt(1000, upper->InputAt(0)); + ExpectInt(0, upper->InputAt(1)); // Verify taken-test is 1000>V. HInstruction* taken = range_.GenerateTakenTest(increment_, graph_, loop_preheader_); ASSERT_TRUE(taken != nullptr); ASSERT_TRUE(taken->IsGreaterThan()); - ASSERT_TRUE(taken->InputAt(0)->IsIntConstant()); - EXPECT_EQ(1000, taken->InputAt(0)->AsIntConstant()->GetValue()); + ExpectInt(1000, taken->InputAt(0)); EXPECT_TRUE(taken->InputAt(1)->IsParameterValue()); // Replacement. @@ -957,6 +988,23 @@ TEST_F(InductionVarRangeTest, SymbolicTripCountDown) { EXPECT_FALSE(needs_finite_test); ExpectEqual(Value(y_, 1, 0), v1); ExpectEqual(Value(999), v2); + + // Loop logic. + int64_t tc = 0; + EXPECT_TRUE(range_.IsFinite(loop_header_->GetLoopInformation(), &tc)); + EXPECT_EQ(0, tc); // unknown + HInstruction* offset = nullptr; + EXPECT_FALSE(range_.IsUnitStride(phi, &offset)); + HInstruction* tce = range_.GenerateTripCount( + loop_header_->GetLoopInformation(), graph_, loop_preheader_); + ASSERT_TRUE(tce != nullptr); + EXPECT_TRUE(tce->IsSelect()); // guarded by taken-test + ExpectInt(0, tce->InputAt(0)); + EXPECT_TRUE(tce->InputAt(1)->IsSub()); + EXPECT_TRUE(tce->InputAt(2)->IsGreaterThan()); + tce = tce->InputAt(1); + ExpectInt(1000, taken->InputAt(0)); + EXPECT_TRUE(taken->InputAt(1)->IsParameterValue()); } } // namespace art diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc index 8c73f1d036..3e340908bf 100644 --- a/compiler/optimizing/inliner.cc +++ b/compiler/optimizing/inliner.cc @@ -1272,12 +1272,19 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction, caller_instruction_counter); callee_graph->SetArtMethod(resolved_method); - // When they are needed, allocate `inline_stats` on the heap instead + // When they are needed, allocate `inline_stats_` on the Arena instead // of on the stack, as Clang might produce a stack frame too large // for this function, that would not fit the requirements of the // `-Wframe-larger-than` option. - std::unique_ptr<OptimizingCompilerStats> inline_stats = - (stats_ == nullptr) ? nullptr : MakeUnique<OptimizingCompilerStats>(); + if (stats_ != nullptr) { + // Reuse one object for all inline attempts from this caller to keep Arena memory usage low. + if (inline_stats_ == nullptr) { + void* storage = graph_->GetArena()->Alloc<OptimizingCompilerStats>(kArenaAllocMisc); + inline_stats_ = new (storage) OptimizingCompilerStats; + } else { + inline_stats_->Reset(); + } + } HGraphBuilder builder(callee_graph, &dex_compilation_unit, &outer_compilation_unit_, @@ -1285,7 +1292,7 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction, *code_item, compiler_driver_, codegen_, - inline_stats.get(), + inline_stats_, resolved_method->GetQuickenedInfo(class_linker->GetImagePointerSize()), dex_cache, handles_); @@ -1468,6 +1475,11 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction, DCHECK_EQ(callee_instruction_counter, callee_graph->GetCurrentInstructionId()) << "No instructions can be added to the inner graph during inlining into the outer graph"; + if (stats_ != nullptr) { + DCHECK(inline_stats_ != nullptr); + inline_stats_->AddTo(stats_); + } + return true; } @@ -1476,11 +1488,11 @@ size_t HInliner::RunOptimizations(HGraph* callee_graph, const DexCompilationUnit& dex_compilation_unit) { // Note: if the outermost_graph_ is being compiled OSR, we should not run any // optimization that could lead to a HDeoptimize. The following optimizations do not. - HDeadCodeElimination dce(callee_graph, stats_, "dead_code_elimination$inliner"); + HDeadCodeElimination dce(callee_graph, inline_stats_, "dead_code_elimination$inliner"); HConstantFolding fold(callee_graph, "constant_folding$inliner"); HSharpening sharpening(callee_graph, codegen_, dex_compilation_unit, compiler_driver_, handles_); - InstructionSimplifier simplify(callee_graph, stats_); - IntrinsicsRecognizer intrinsics(callee_graph, stats_); + InstructionSimplifier simplify(callee_graph, inline_stats_); + IntrinsicsRecognizer intrinsics(callee_graph, inline_stats_); HOptimization* optimizations[] = { &intrinsics, @@ -1504,7 +1516,7 @@ size_t HInliner::RunOptimizations(HGraph* callee_graph, dex_compilation_unit, compiler_driver_, handles_, - stats_, + inline_stats_, total_number_of_dex_registers_ + code_item->registers_size_, depth_ + 1); inliner.Run(); diff --git a/compiler/optimizing/inliner.h b/compiler/optimizing/inliner.h index 11aacab802..75d025ae41 100644 --- a/compiler/optimizing/inliner.h +++ b/compiler/optimizing/inliner.h @@ -51,7 +51,8 @@ class HInliner : public HOptimization { total_number_of_dex_registers_(total_number_of_dex_registers), depth_(depth), number_of_inlined_instructions_(0), - handles_(handles) {} + handles_(handles), + inline_stats_(nullptr) {} void Run() OVERRIDE; @@ -218,6 +219,10 @@ class HInliner : public HOptimization { size_t number_of_inlined_instructions_; VariableSizedHandleScope* const handles_; + // Used to record stats about optimizations on the inlined graph. + // If the inlining is successful, these stats are merged to the caller graph's stats. + OptimizingCompilerStats* inline_stats_; + DISALLOW_COPY_AND_ASSIGN(HInliner); }; diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc index 17d683f357..8df80adc9f 100644 --- a/compiler/optimizing/intrinsics.cc +++ b/compiler/optimizing/intrinsics.cc @@ -19,6 +19,7 @@ #include "art_method.h" #include "class_linker.h" #include "driver/compiler_driver.h" +#include "driver/compiler_options.h" #include "invoke_type.h" #include "mirror/dex_cache-inl.h" #include "nodes.h" @@ -178,4 +179,112 @@ INTRINSICS_LIST(OPTIMIZING_INTRINSICS) return os; } +void IntrinsicVisitor::ComputeIntegerValueOfLocations(HInvoke* invoke, + CodeGenerator* codegen, + Location return_location, + Location first_argument_location) { + if (Runtime::Current()->IsAotCompiler()) { + if (codegen->GetCompilerOptions().IsBootImage() || + codegen->GetCompilerOptions().GetCompilePic()) { + // TODO(ngeoffray): Support boot image compilation. + return; + } + } + + IntegerValueOfInfo info = ComputeIntegerValueOfInfo(); + + // Most common case is that we have found all we needed (classes are initialized + // and in the boot image). Bail if not. + if (info.integer_cache == nullptr || + info.integer == nullptr || + info.cache == nullptr || + info.value_offset == 0 || + // low and high cannot be 0, per the spec. + info.low == 0 || + info.high == 0) { + LOG(INFO) << "Integer.valueOf will not be optimized"; + return; + } + + // The intrinsic will call if it needs to allocate a j.l.Integer. + LocationSummary* locations = new (invoke->GetBlock()->GetGraph()->GetArena()) LocationSummary( + invoke, LocationSummary::kCallOnMainOnly, kIntrinsified); + if (!invoke->InputAt(0)->IsConstant()) { + locations->SetInAt(0, Location::RequiresRegister()); + } + locations->AddTemp(first_argument_location); + locations->SetOut(return_location); +} + +IntrinsicVisitor::IntegerValueOfInfo IntrinsicVisitor::ComputeIntegerValueOfInfo() { + // Note that we could cache all of the data looked up here. but there's no good + // location for it. We don't want to add it to WellKnownClasses, to avoid creating global + // jni values. Adding it as state to the compiler singleton seems like wrong + // separation of concerns. + // The need for this data should be pretty rare though. + + // The most common case is that the classes are in the boot image and initialized, + // which is easy to generate code for. We bail if not. + Thread* self = Thread::Current(); + ScopedObjectAccess soa(self); + Runtime* runtime = Runtime::Current(); + ClassLinker* class_linker = runtime->GetClassLinker(); + gc::Heap* heap = runtime->GetHeap(); + IntegerValueOfInfo info; + info.integer_cache = class_linker->FindSystemClass(self, "Ljava/lang/Integer$IntegerCache;"); + if (info.integer_cache == nullptr) { + self->ClearException(); + return info; + } + if (!heap->ObjectIsInBootImageSpace(info.integer_cache) || !info.integer_cache->IsInitialized()) { + // Optimization only works if the class is initialized and in the boot image. + return info; + } + info.integer = class_linker->FindSystemClass(self, "Ljava/lang/Integer;"); + if (info.integer == nullptr) { + self->ClearException(); + return info; + } + if (!heap->ObjectIsInBootImageSpace(info.integer) || !info.integer->IsInitialized()) { + // Optimization only works if the class is initialized and in the boot image. + return info; + } + + ArtField* field = info.integer_cache->FindDeclaredStaticField("cache", "[Ljava/lang/Integer;"); + if (field == nullptr) { + return info; + } + info.cache = static_cast<mirror::ObjectArray<mirror::Object>*>( + field->GetObject(info.integer_cache).Ptr()); + if (info.cache == nullptr) { + return info; + } + + if (!heap->ObjectIsInBootImageSpace(info.cache)) { + // Optimization only works if the object is in the boot image. + return info; + } + + field = info.integer->FindDeclaredInstanceField("value", "I"); + if (field == nullptr) { + return info; + } + info.value_offset = field->GetOffset().Int32Value(); + + field = info.integer_cache->FindDeclaredStaticField("low", "I"); + if (field == nullptr) { + return info; + } + info.low = field->GetInt(info.integer_cache); + + field = info.integer_cache->FindDeclaredStaticField("high", "I"); + if (field == nullptr) { + return info; + } + info.high = field->GetInt(info.integer_cache); + + DCHECK_EQ(info.cache->GetLength(), info.high - info.low + 1); + return info; +} + } // namespace art diff --git a/compiler/optimizing/intrinsics.h b/compiler/optimizing/intrinsics.h index 6425e1313f..9da5a7fa3b 100644 --- a/compiler/optimizing/intrinsics.h +++ b/compiler/optimizing/intrinsics.h @@ -113,6 +113,39 @@ INTRINSICS_LIST(OPTIMIZING_INTRINSICS) codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); } + static void ComputeIntegerValueOfLocations(HInvoke* invoke, + CodeGenerator* codegen, + Location return_location, + Location first_argument_location); + + // Temporary data structure for holding Integer.valueOf useful data. We only + // use it if the mirror::Class* are in the boot image, so it is fine to keep raw + // mirror::Class pointers in this structure. + struct IntegerValueOfInfo { + IntegerValueOfInfo() + : integer_cache(nullptr), + integer(nullptr), + cache(nullptr), + low(0), + high(0), + value_offset(0) {} + + // The java.lang.IntegerCache class. + mirror::Class* integer_cache; + // The java.lang.Integer class. + mirror::Class* integer; + // Value of java.lang.IntegerCache#cache. + mirror::ObjectArray<mirror::Object>* cache; + // Value of java.lang.IntegerCache#low. + int32_t low; + // Value of java.lang.IntegerCache#high. + int32_t high; + // The offset of java.lang.Integer.value. + int32_t value_offset; + }; + + static IntegerValueOfInfo ComputeIntegerValueOfInfo(); + protected: IntrinsicVisitor() {} diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc index c262cf983d..86000e9356 100644 --- a/compiler/optimizing/intrinsics_arm.cc +++ b/compiler/optimizing/intrinsics_arm.cc @@ -129,6 +129,7 @@ class ReadBarrierSystemArrayCopySlowPathARM : public SlowPathCode { IntrinsicLocationsBuilderARM::IntrinsicLocationsBuilderARM(CodeGeneratorARM* codegen) : arena_(codegen->GetGraph()->GetArena()), + codegen_(codegen), assembler_(codegen->GetAssembler()), features_(codegen->GetInstructionSetFeatures()) {} @@ -2644,6 +2645,75 @@ void IntrinsicCodeGeneratorARM::VisitReferenceGetReferent(HInvoke* invoke) { __ Bind(slow_path->GetExitLabel()); } +void IntrinsicLocationsBuilderARM::VisitIntegerValueOf(HInvoke* invoke) { + InvokeRuntimeCallingConvention calling_convention; + IntrinsicVisitor::ComputeIntegerValueOfLocations( + invoke, + codegen_, + Location::RegisterLocation(R0), + Location::RegisterLocation(calling_convention.GetRegisterAt(0))); +} + +void IntrinsicCodeGeneratorARM::VisitIntegerValueOf(HInvoke* invoke) { + IntrinsicVisitor::IntegerValueOfInfo info = IntrinsicVisitor::ComputeIntegerValueOfInfo(); + LocationSummary* locations = invoke->GetLocations(); + ArmAssembler* const assembler = GetAssembler(); + + Register out = locations->Out().AsRegister<Register>(); + InvokeRuntimeCallingConvention calling_convention; + Register argument = calling_convention.GetRegisterAt(0); + if (invoke->InputAt(0)->IsConstant()) { + int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue(); + if (value >= info.low && value <= info.high) { + // Just embed the j.l.Integer in the code. + ScopedObjectAccess soa(Thread::Current()); + mirror::Object* boxed = info.cache->Get(value + (-info.low)); + DCHECK(boxed != nullptr && Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(boxed)); + uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(boxed)); + __ LoadLiteral(out, codegen_->DeduplicateBootImageAddressLiteral(address)); + } else { + // Allocate and initialize a new j.l.Integer. + // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the + // JIT object table. + uint32_t address = + dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer)); + __ LoadLiteral(argument, codegen_->DeduplicateBootImageAddressLiteral(address)); + codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc()); + CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); + __ LoadImmediate(IP, value); + __ StoreToOffset(kStoreWord, IP, out, info.value_offset); + // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation + // one. + codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore); + } + } else { + Register in = locations->InAt(0).AsRegister<Register>(); + // Check bounds of our cache. + __ AddConstant(out, in, -info.low); + __ CmpConstant(out, info.high - info.low + 1); + Label allocate, done; + __ b(&allocate, HS); + // If the value is within the bounds, load the j.l.Integer directly from the array. + uint32_t data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value(); + uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.cache)); + __ LoadLiteral(IP, codegen_->DeduplicateBootImageAddressLiteral(data_offset + address)); + codegen_->LoadFromShiftedRegOffset(Primitive::kPrimNot, locations->Out(), IP, out); + __ MaybeUnpoisonHeapReference(out); + __ b(&done); + __ Bind(&allocate); + // Otherwise allocate and initialize a new j.l.Integer. + address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer)); + __ LoadLiteral(argument, codegen_->DeduplicateBootImageAddressLiteral(address)); + codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc()); + CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); + __ StoreToOffset(kStoreWord, in, out, info.value_offset); + // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation + // one. + codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore); + __ Bind(&done); + } +} + UNIMPLEMENTED_INTRINSIC(ARM, MathMinDoubleDouble) UNIMPLEMENTED_INTRINSIC(ARM, MathMinFloatFloat) UNIMPLEMENTED_INTRINSIC(ARM, MathMaxDoubleDouble) diff --git a/compiler/optimizing/intrinsics_arm.h b/compiler/optimizing/intrinsics_arm.h index 7f20ea4b1f..2840863632 100644 --- a/compiler/optimizing/intrinsics_arm.h +++ b/compiler/optimizing/intrinsics_arm.h @@ -51,6 +51,7 @@ INTRINSICS_LIST(OPTIMIZING_INTRINSICS) private: ArenaAllocator* arena_; + CodeGenerator* codegen_; ArmAssembler* assembler_; const ArmInstructionSetFeatures& features_; diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index 86e54294ae..6c3938c1a9 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -2924,6 +2924,79 @@ void IntrinsicCodeGeneratorARM64::VisitReferenceGetReferent(HInvoke* invoke) { __ Bind(slow_path->GetExitLabel()); } +void IntrinsicLocationsBuilderARM64::VisitIntegerValueOf(HInvoke* invoke) { + InvokeRuntimeCallingConvention calling_convention; + IntrinsicVisitor::ComputeIntegerValueOfLocations( + invoke, + codegen_, + calling_convention.GetReturnLocation(Primitive::kPrimNot), + Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode())); +} + +void IntrinsicCodeGeneratorARM64::VisitIntegerValueOf(HInvoke* invoke) { + IntrinsicVisitor::IntegerValueOfInfo info = IntrinsicVisitor::ComputeIntegerValueOfInfo(); + LocationSummary* locations = invoke->GetLocations(); + MacroAssembler* masm = GetVIXLAssembler(); + + Register out = RegisterFrom(locations->Out(), Primitive::kPrimNot); + UseScratchRegisterScope temps(masm); + Register temp = temps.AcquireW(); + InvokeRuntimeCallingConvention calling_convention; + Register argument = calling_convention.GetRegisterAt(0); + if (invoke->InputAt(0)->IsConstant()) { + int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue(); + if (value >= info.low && value <= info.high) { + // Just embed the j.l.Integer in the code. + ScopedObjectAccess soa(Thread::Current()); + mirror::Object* boxed = info.cache->Get(value + (-info.low)); + DCHECK(boxed != nullptr && Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(boxed)); + uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(boxed)); + __ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(address)); + } else { + // Allocate and initialize a new j.l.Integer. + // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the + // JIT object table. + uint32_t address = + dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer)); + __ Ldr(argument.W(), codegen_->DeduplicateBootImageAddressLiteral(address)); + codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc()); + CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); + __ Mov(temp.W(), value); + __ Str(temp.W(), HeapOperand(out.W(), info.value_offset)); + // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation + // one. + codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore); + } + } else { + Register in = RegisterFrom(locations->InAt(0), Primitive::kPrimInt); + // Check bounds of our cache. + __ Add(out.W(), in.W(), -info.low); + __ Cmp(out.W(), info.high - info.low + 1); + vixl::aarch64::Label allocate, done; + __ B(&allocate, hs); + // If the value is within the bounds, load the j.l.Integer directly from the array. + uint32_t data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value(); + uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.cache)); + __ Ldr(temp.W(), codegen_->DeduplicateBootImageAddressLiteral(data_offset + address)); + MemOperand source = HeapOperand( + temp, out.X(), LSL, Primitive::ComponentSizeShift(Primitive::kPrimNot)); + codegen_->Load(Primitive::kPrimNot, out, source); + codegen_->GetAssembler()->MaybeUnpoisonHeapReference(out); + __ B(&done); + __ Bind(&allocate); + // Otherwise allocate and initialize a new j.l.Integer. + address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer)); + __ Ldr(argument.W(), codegen_->DeduplicateBootImageAddressLiteral(address)); + codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc()); + CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); + __ Str(in.W(), HeapOperand(out.W(), info.value_offset)); + // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation + // one. + codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore); + __ Bind(&done); + } +} + UNIMPLEMENTED_INTRINSIC(ARM64, IntegerHighestOneBit) UNIMPLEMENTED_INTRINSIC(ARM64, LongHighestOneBit) UNIMPLEMENTED_INTRINSIC(ARM64, IntegerLowestOneBit) diff --git a/compiler/optimizing/intrinsics_arm64.h b/compiler/optimizing/intrinsics_arm64.h index 28e41cb086..3c53517b28 100644 --- a/compiler/optimizing/intrinsics_arm64.h +++ b/compiler/optimizing/intrinsics_arm64.h @@ -38,7 +38,8 @@ class CodeGeneratorARM64; class IntrinsicLocationsBuilderARM64 FINAL : public IntrinsicVisitor { public: - explicit IntrinsicLocationsBuilderARM64(ArenaAllocator* arena) : arena_(arena) {} + explicit IntrinsicLocationsBuilderARM64(ArenaAllocator* arena, CodeGeneratorARM64* codegen) + : arena_(arena), codegen_(codegen) {} // Define visitor methods. @@ -56,6 +57,7 @@ INTRINSICS_LIST(OPTIMIZING_INTRINSICS) private: ArenaAllocator* arena_; + CodeGeneratorARM64* codegen_; DISALLOW_COPY_AND_ASSIGN(IntrinsicLocationsBuilderARM64); }; diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc index 70a3d38c13..aa89deae34 100644 --- a/compiler/optimizing/intrinsics_arm_vixl.cc +++ b/compiler/optimizing/intrinsics_arm_vixl.cc @@ -203,6 +203,7 @@ class ReadBarrierSystemArrayCopySlowPathARMVIXL : public SlowPathCodeARMVIXL { IntrinsicLocationsBuilderARMVIXL::IntrinsicLocationsBuilderARMVIXL(CodeGeneratorARMVIXL* codegen) : arena_(codegen->GetGraph()->GetArena()), + codegen_(codegen), assembler_(codegen->GetAssembler()), features_(codegen->GetInstructionSetFeatures()) {} @@ -2988,6 +2989,77 @@ void IntrinsicCodeGeneratorARMVIXL::VisitMathFloor(HInvoke* invoke) { __ Vrintm(F64, F64, OutputDRegister(invoke), InputDRegisterAt(invoke, 0)); } +void IntrinsicLocationsBuilderARMVIXL::VisitIntegerValueOf(HInvoke* invoke) { + InvokeRuntimeCallingConventionARMVIXL calling_convention; + IntrinsicVisitor::ComputeIntegerValueOfLocations( + invoke, + codegen_, + LocationFrom(r0), + LocationFrom(calling_convention.GetRegisterAt(0))); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitIntegerValueOf(HInvoke* invoke) { + IntrinsicVisitor::IntegerValueOfInfo info = IntrinsicVisitor::ComputeIntegerValueOfInfo(); + LocationSummary* locations = invoke->GetLocations(); + ArmVIXLAssembler* const assembler = GetAssembler(); + + vixl32::Register out = RegisterFrom(locations->Out()); + UseScratchRegisterScope temps(assembler->GetVIXLAssembler()); + vixl32::Register temp = temps.Acquire(); + InvokeRuntimeCallingConventionARMVIXL calling_convention; + vixl32::Register argument = calling_convention.GetRegisterAt(0); + if (invoke->InputAt(0)->IsConstant()) { + int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue(); + if (value >= info.low && value <= info.high) { + // Just embed the j.l.Integer in the code. + ScopedObjectAccess soa(Thread::Current()); + mirror::Object* boxed = info.cache->Get(value + (-info.low)); + DCHECK(boxed != nullptr && Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(boxed)); + uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(boxed)); + __ Ldr(out, codegen_->DeduplicateBootImageAddressLiteral(address)); + } else { + // Allocate and initialize a new j.l.Integer. + // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the + // JIT object table. + uint32_t address = + dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer)); + __ Ldr(argument, codegen_->DeduplicateBootImageAddressLiteral(address)); + codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc()); + CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); + __ Mov(temp, value); + assembler->StoreToOffset(kStoreWord, temp, out, info.value_offset); + // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation + // one. + codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore); + } + } else { + vixl32::Register in = RegisterFrom(locations->InAt(0)); + // Check bounds of our cache. + __ Add(out, in, -info.low); + __ Cmp(out, info.high - info.low + 1); + vixl32::Label allocate, done; + __ B(hs, &allocate); + // If the value is within the bounds, load the j.l.Integer directly from the array. + uint32_t data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value(); + uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.cache)); + __ Ldr(temp, codegen_->DeduplicateBootImageAddressLiteral(data_offset + address)); + codegen_->LoadFromShiftedRegOffset(Primitive::kPrimNot, locations->Out(), temp, out); + assembler->MaybeUnpoisonHeapReference(out); + __ B(&done); + __ Bind(&allocate); + // Otherwise allocate and initialize a new j.l.Integer. + address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer)); + __ Ldr(argument, codegen_->DeduplicateBootImageAddressLiteral(address)); + codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc()); + CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); + assembler->StoreToOffset(kStoreWord, in, out, info.value_offset); + // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation + // one. + codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore); + __ Bind(&done); + } +} + UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathRoundDouble) // Could be done by changing rounding mode, maybe? UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathRoundFloat) // Could be done by changing rounding mode, maybe? UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeCASLong) // High register pressure. diff --git a/compiler/optimizing/intrinsics_arm_vixl.h b/compiler/optimizing/intrinsics_arm_vixl.h index 6e79cb76a1..023cba1349 100644 --- a/compiler/optimizing/intrinsics_arm_vixl.h +++ b/compiler/optimizing/intrinsics_arm_vixl.h @@ -47,6 +47,7 @@ INTRINSICS_LIST(OPTIMIZING_INTRINSICS) private: ArenaAllocator* arena_; + CodeGenerator* codegen_; ArmVIXLAssembler* assembler_; const ArmInstructionSetFeatures& features_; diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc index 64a68403e9..ba006edfa2 100644 --- a/compiler/optimizing/intrinsics_mips.cc +++ b/compiler/optimizing/intrinsics_mips.cc @@ -1572,6 +1572,10 @@ static void GenUnsafeGet(HInvoke* invoke, __ Lwr(trg, TMP, 0); __ Lwl(trg, TMP, 3); } + + if (type == Primitive::kPrimNot) { + __ MaybeUnpoisonHeapReference(trg); + } } } @@ -1663,6 +1667,11 @@ static void GenUnsafePut(LocationSummary* locations, if ((type == Primitive::kPrimInt) || (type == Primitive::kPrimNot)) { Register value = locations->InAt(3).AsRegister<Register>(); + if (kPoisonHeapReferences && type == Primitive::kPrimNot) { + __ PoisonHeapReference(AT, value); + value = AT; + } + if (is_R6) { __ Sw(value, TMP, 0); } else { @@ -1852,13 +1861,23 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat codegen->MarkGCCard(base, value, value_can_be_null); } + MipsLabel loop_head, exit_loop; + __ Addu(TMP, base, offset_lo); + + if (kPoisonHeapReferences && type == Primitive::kPrimNot) { + __ PoisonHeapReference(expected); + // Do not poison `value`, if it is the same register as + // `expected`, which has just been poisoned. + if (value != expected) { + __ PoisonHeapReference(value); + } + } + // do { // tmp_value = [tmp_ptr] - expected; // } while (tmp_value == 0 && failure([tmp_ptr] <- r_new_value)); // result = tmp_value != 0; - MipsLabel loop_head, exit_loop; - __ Addu(TMP, base, offset_lo); __ Sync(0); __ Bind(&loop_head); if ((type == Primitive::kPrimInt) || (type == Primitive::kPrimNot)) { @@ -1868,8 +1887,8 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat __ LlR2(out, TMP); } } else { - LOG(FATAL) << "Unsupported op size " << type; - UNREACHABLE(); + LOG(FATAL) << "Unsupported op size " << type; + UNREACHABLE(); } __ Subu(out, out, expected); // If we didn't get the 'expected' __ Sltiu(out, out, 1); // value, set 'out' to false, and @@ -1894,6 +1913,15 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat // cycle atomically then retry. __ Bind(&exit_loop); __ Sync(0); + + if (kPoisonHeapReferences && type == Primitive::kPrimNot) { + __ UnpoisonHeapReference(expected); + // Do not unpoison `value`, if it is the same register as + // `expected`, which has just been unpoisoned. + if (value != expected) { + __ UnpoisonHeapReference(value); + } + } } // boolean sun.misc.Unsafe.compareAndSwapInt(Object o, long offset, int expected, int x) @@ -1989,20 +2017,24 @@ void IntrinsicCodeGeneratorMIPS::VisitStringEquals(HInvoke* invoke) { __ LoadConst32(out, 1); return; } - - // Check if input is null, return false if it is. - __ Beqz(arg, &return_false); + StringEqualsOptimizations optimizations(invoke); + if (!optimizations.GetArgumentNotNull()) { + // Check if input is null, return false if it is. + __ Beqz(arg, &return_false); + } // Reference equality check, return true if same reference. __ Beq(str, arg, &return_true); - // Instanceof check for the argument by comparing class fields. - // All string objects must have the same type since String cannot be subclassed. - // Receiver must be a string object, so its class field is equal to all strings' class fields. - // If the argument is a string object, its class field must be equal to receiver's class field. - __ Lw(temp1, str, class_offset); - __ Lw(temp2, arg, class_offset); - __ Bne(temp1, temp2, &return_false); + if (!optimizations.GetArgumentIsString()) { + // Instanceof check for the argument by comparing class fields. + // All string objects must have the same type since String cannot be subclassed. + // Receiver must be a string object, so its class field is equal to all strings' class fields. + // If the argument is a string object, its class field must be equal to receiver's class field. + __ Lw(temp1, str, class_offset); + __ Lw(temp2, arg, class_offset); + __ Bne(temp1, temp2, &return_false); + } // Load `count` fields of this and argument strings. __ Lw(temp1, str, count_offset); @@ -2682,6 +2714,8 @@ UNIMPLEMENTED_INTRINSIC(MIPS, UnsafeGetAndSetInt) UNIMPLEMENTED_INTRINSIC(MIPS, UnsafeGetAndSetLong) UNIMPLEMENTED_INTRINSIC(MIPS, UnsafeGetAndSetObject) +UNIMPLEMENTED_INTRINSIC(MIPS, IntegerValueOf) + UNREACHABLE_INTRINSICS(MIPS) #undef __ diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc index 3888828722..21c5074a1c 100644 --- a/compiler/optimizing/intrinsics_mips64.cc +++ b/compiler/optimizing/intrinsics_mips64.cc @@ -1187,6 +1187,7 @@ static void GenUnsafeGet(HInvoke* invoke, case Primitive::kPrimNot: __ Lwu(trg, TMP, 0); + __ MaybeUnpoisonHeapReference(trg); break; case Primitive::kPrimLong: @@ -1285,7 +1286,12 @@ static void GenUnsafePut(LocationSummary* locations, switch (type) { case Primitive::kPrimInt: case Primitive::kPrimNot: - __ Sw(value, TMP, 0); + if (kPoisonHeapReferences && type == Primitive::kPrimNot) { + __ PoisonHeapReference(AT, value); + __ Sw(AT, TMP, 0); + } else { + __ Sw(value, TMP, 0); + } break; case Primitive::kPrimLong: @@ -1454,13 +1460,23 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat codegen->MarkGCCard(base, value, value_can_be_null); } + Mips64Label loop_head, exit_loop; + __ Daddu(TMP, base, offset); + + if (kPoisonHeapReferences && type == Primitive::kPrimNot) { + __ PoisonHeapReference(expected); + // Do not poison `value`, if it is the same register as + // `expected`, which has just been poisoned. + if (value != expected) { + __ PoisonHeapReference(value); + } + } + // do { // tmp_value = [tmp_ptr] - expected; // } while (tmp_value == 0 && failure([tmp_ptr] <- r_new_value)); // result = tmp_value != 0; - Mips64Label loop_head, exit_loop; - __ Daddu(TMP, base, offset); __ Sync(0); __ Bind(&loop_head); if (type == Primitive::kPrimLong) { @@ -1469,6 +1485,11 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat // Note: We will need a read barrier here, when read barrier // support is added to the MIPS64 back end. __ Ll(out, TMP); + if (type == Primitive::kPrimNot) { + // The LL instruction sign-extends the 32-bit value, but + // 32-bit references must be zero-extended. Zero-extend `out`. + __ Dext(out, out, 0, 32); + } } __ Dsubu(out, out, expected); // If we didn't get the 'expected' __ Sltiu(out, out, 1); // value, set 'out' to false, and @@ -1487,6 +1508,15 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat // cycle atomically then retry. __ Bind(&exit_loop); __ Sync(0); + + if (kPoisonHeapReferences && type == Primitive::kPrimNot) { + __ UnpoisonHeapReference(expected); + // Do not unpoison `value`, if it is the same register as + // `expected`, which has just been unpoisoned. + if (value != expected) { + __ UnpoisonHeapReference(value); + } + } } // boolean sun.misc.Unsafe.compareAndSwapInt(Object o, long offset, int expected, int x) @@ -1593,19 +1623,24 @@ void IntrinsicCodeGeneratorMIPS64::VisitStringEquals(HInvoke* invoke) { return; } - // Check if input is null, return false if it is. - __ Beqzc(arg, &return_false); + StringEqualsOptimizations optimizations(invoke); + if (!optimizations.GetArgumentNotNull()) { + // Check if input is null, return false if it is. + __ Beqzc(arg, &return_false); + } // Reference equality check, return true if same reference. __ Beqc(str, arg, &return_true); - // Instanceof check for the argument by comparing class fields. - // All string objects must have the same type since String cannot be subclassed. - // Receiver must be a string object, so its class field is equal to all strings' class fields. - // If the argument is a string object, its class field must be equal to receiver's class field. - __ Lw(temp1, str, class_offset); - __ Lw(temp2, arg, class_offset); - __ Bnec(temp1, temp2, &return_false); + if (!optimizations.GetArgumentIsString()) { + // Instanceof check for the argument by comparing class fields. + // All string objects must have the same type since String cannot be subclassed. + // Receiver must be a string object, so its class field is equal to all strings' class fields. + // If the argument is a string object, its class field must be equal to receiver's class field. + __ Lw(temp1, str, class_offset); + __ Lw(temp2, arg, class_offset); + __ Bnec(temp1, temp2, &return_false); + } // Load `count` fields of this and argument strings. __ Lw(temp1, str, count_offset); @@ -2075,6 +2110,8 @@ UNIMPLEMENTED_INTRINSIC(MIPS64, UnsafeGetAndSetInt) UNIMPLEMENTED_INTRINSIC(MIPS64, UnsafeGetAndSetLong) UNIMPLEMENTED_INTRINSIC(MIPS64, UnsafeGetAndSetObject) +UNIMPLEMENTED_INTRINSIC(MIPS64, IntegerValueOf) + UNREACHABLE_INTRINSICS(MIPS64) #undef __ diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc index e1b7ea53b4..a671788ff5 100644 --- a/compiler/optimizing/intrinsics_x86.cc +++ b/compiler/optimizing/intrinsics_x86.cc @@ -3335,6 +3335,65 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { __ Bind(intrinsic_slow_path->GetExitLabel()); } +void IntrinsicLocationsBuilderX86::VisitIntegerValueOf(HInvoke* invoke) { + InvokeRuntimeCallingConvention calling_convention; + IntrinsicVisitor::ComputeIntegerValueOfLocations( + invoke, + codegen_, + Location::RegisterLocation(EAX), + Location::RegisterLocation(calling_convention.GetRegisterAt(0))); +} + +void IntrinsicCodeGeneratorX86::VisitIntegerValueOf(HInvoke* invoke) { + IntrinsicVisitor::IntegerValueOfInfo info = IntrinsicVisitor::ComputeIntegerValueOfInfo(); + LocationSummary* locations = invoke->GetLocations(); + X86Assembler* assembler = GetAssembler(); + + Register out = locations->Out().AsRegister<Register>(); + InvokeRuntimeCallingConvention calling_convention; + if (invoke->InputAt(0)->IsConstant()) { + int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue(); + if (value >= info.low && value <= info.high) { + // Just embed the j.l.Integer in the code. + ScopedObjectAccess soa(Thread::Current()); + mirror::Object* boxed = info.cache->Get(value + (-info.low)); + DCHECK(boxed != nullptr && Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(boxed)); + uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(boxed)); + __ movl(out, Immediate(address)); + } else { + // Allocate and initialize a new j.l.Integer. + // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the + // JIT object table. + uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer)); + __ movl(calling_convention.GetRegisterAt(0), Immediate(address)); + codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc()); + CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); + __ movl(Address(out, info.value_offset), Immediate(value)); + } + } else { + Register in = locations->InAt(0).AsRegister<Register>(); + // Check bounds of our cache. + __ leal(out, Address(in, -info.low)); + __ cmpl(out, Immediate(info.high - info.low + 1)); + NearLabel allocate, done; + __ j(kAboveEqual, &allocate); + // If the value is within the bounds, load the j.l.Integer directly from the array. + uint32_t data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value(); + uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.cache)); + __ movl(out, Address(out, TIMES_4, data_offset + address)); + __ MaybeUnpoisonHeapReference(out); + __ jmp(&done); + __ Bind(&allocate); + // Otherwise allocate and initialize a new j.l.Integer. + address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer)); + __ movl(calling_convention.GetRegisterAt(0), Immediate(address)); + codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc()); + CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); + __ movl(Address(out, info.value_offset), in); + __ Bind(&done); + } +} + UNIMPLEMENTED_INTRINSIC(X86, MathRoundDouble) UNIMPLEMENTED_INTRINSIC(X86, FloatIsInfinite) UNIMPLEMENTED_INTRINSIC(X86, DoubleIsInfinite) diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc index 05d270a4e6..9a6dd985a4 100644 --- a/compiler/optimizing/intrinsics_x86_64.cc +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -39,7 +39,6 @@ IntrinsicLocationsBuilderX86_64::IntrinsicLocationsBuilderX86_64(CodeGeneratorX8 : arena_(codegen->GetGraph()->GetArena()), codegen_(codegen) { } - X86_64Assembler* IntrinsicCodeGeneratorX86_64::GetAssembler() { return down_cast<X86_64Assembler*>(codegen_->GetAssembler()); } @@ -2995,6 +2994,65 @@ void IntrinsicCodeGeneratorX86_64::VisitReferenceGetReferent(HInvoke* invoke) { __ Bind(slow_path->GetExitLabel()); } +void IntrinsicLocationsBuilderX86_64::VisitIntegerValueOf(HInvoke* invoke) { + InvokeRuntimeCallingConvention calling_convention; + IntrinsicVisitor::ComputeIntegerValueOfLocations( + invoke, + codegen_, + Location::RegisterLocation(RAX), + Location::RegisterLocation(calling_convention.GetRegisterAt(0))); +} + +void IntrinsicCodeGeneratorX86_64::VisitIntegerValueOf(HInvoke* invoke) { + IntrinsicVisitor::IntegerValueOfInfo info = IntrinsicVisitor::ComputeIntegerValueOfInfo(); + LocationSummary* locations = invoke->GetLocations(); + X86_64Assembler* assembler = GetAssembler(); + + CpuRegister out = locations->Out().AsRegister<CpuRegister>(); + InvokeRuntimeCallingConvention calling_convention; + if (invoke->InputAt(0)->IsConstant()) { + int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue(); + if (value >= info.low && value <= info.high) { + // Just embed the j.l.Integer in the code. + ScopedObjectAccess soa(Thread::Current()); + mirror::Object* boxed = info.cache->Get(value + (-info.low)); + DCHECK(boxed != nullptr && Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(boxed)); + uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(boxed)); + __ movl(out, Immediate(address)); + } else { + // Allocate and initialize a new j.l.Integer. + // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the + // JIT object table. + uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer)); + __ movl(CpuRegister(calling_convention.GetRegisterAt(0)), Immediate(address)); + codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc()); + CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); + __ movl(Address(out, info.value_offset), Immediate(value)); + } + } else { + CpuRegister in = locations->InAt(0).AsRegister<CpuRegister>(); + // Check bounds of our cache. + __ leal(out, Address(in, -info.low)); + __ cmpl(out, Immediate(info.high - info.low + 1)); + NearLabel allocate, done; + __ j(kAboveEqual, &allocate); + // If the value is within the bounds, load the j.l.Integer directly from the array. + uint32_t data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value(); + uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.cache)); + __ movl(out, Address(out, TIMES_4, data_offset + address)); + __ MaybeUnpoisonHeapReference(out); + __ jmp(&done); + __ Bind(&allocate); + // Otherwise allocate and initialize a new j.l.Integer. + address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer)); + __ movl(CpuRegister(calling_convention.GetRegisterAt(0)), Immediate(address)); + codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc()); + CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); + __ movl(Address(out, info.value_offset), in); + __ Bind(&done); + } +} + UNIMPLEMENTED_INTRINSIC(X86_64, FloatIsInfinite) UNIMPLEMENTED_INTRINSIC(X86_64, DoubleIsInfinite) diff --git a/compiler/optimizing/load_store_elimination.cc b/compiler/optimizing/load_store_elimination.cc index 2d3c00fb97..46ba048738 100644 --- a/compiler/optimizing/load_store_elimination.cc +++ b/compiler/optimizing/load_store_elimination.cc @@ -38,7 +38,8 @@ class ReferenceInfo : public ArenaObject<kArenaAllocMisc> { position_(pos), is_singleton_(true), is_singleton_and_not_returned_(true), - is_singleton_and_not_deopt_visible_(true) { + is_singleton_and_not_deopt_visible_(true), + has_index_aliasing_(false) { CalculateEscape(reference_, nullptr, &is_singleton_, @@ -68,13 +69,29 @@ class ReferenceInfo : public ArenaObject<kArenaAllocMisc> { return is_singleton_and_not_returned_ && is_singleton_and_not_deopt_visible_; } + bool HasIndexAliasing() { + return has_index_aliasing_; + } + + void SetHasIndexAliasing(bool has_index_aliasing) { + // Only allow setting to true. + DCHECK(has_index_aliasing); + has_index_aliasing_ = has_index_aliasing; + } + private: HInstruction* const reference_; const size_t position_; // position in HeapLocationCollector's ref_info_array_. - bool is_singleton_; // can only be referred to by a single name in the method, - bool is_singleton_and_not_returned_; // and not returned to caller, - bool is_singleton_and_not_deopt_visible_; // and not used as an environment local of HDeoptimize. + // Can only be referred to by a single name in the method. + bool is_singleton_; + // Is singleton and not returned to caller. + bool is_singleton_and_not_returned_; + // Is singleton and not used as an environment local of HDeoptimize. + bool is_singleton_and_not_deopt_visible_; + // Some heap locations with reference_ have array index aliasing, + // e.g. arr[i] and arr[j] may be the same location. + bool has_index_aliasing_; DISALLOW_COPY_AND_ASSIGN(ReferenceInfo); }; @@ -321,6 +338,12 @@ class HeapLocationCollector : public HGraphVisitor { // Different constant indices do not alias. return false; } + ReferenceInfo* ref_info = loc1->GetReferenceInfo(); + if (ref_info->IsSingleton()) { + // This is guaranteed by the CanReferencesAlias() test above. + DCHECK_EQ(ref_info, loc2->GetReferenceInfo()); + ref_info->SetHasIndexAliasing(true); + } } return true; } @@ -497,7 +520,8 @@ class LSEVisitor : public HGraphVisitor { removed_loads_(graph->GetArena()->Adapter(kArenaAllocLSE)), substitute_instructions_for_loads_(graph->GetArena()->Adapter(kArenaAllocLSE)), possibly_removed_stores_(graph->GetArena()->Adapter(kArenaAllocLSE)), - singleton_new_instances_(graph->GetArena()->Adapter(kArenaAllocLSE)) { + singleton_new_instances_(graph->GetArena()->Adapter(kArenaAllocLSE)), + singleton_new_arrays_(graph->GetArena()->Adapter(kArenaAllocLSE)) { } void VisitBasicBlock(HBasicBlock* block) OVERRIDE { @@ -534,20 +558,24 @@ class LSEVisitor : public HGraphVisitor { } // At this point, stores in possibly_removed_stores_ can be safely removed. - for (size_t i = 0, e = possibly_removed_stores_.size(); i < e; i++) { - HInstruction* store = possibly_removed_stores_[i]; + for (HInstruction* store : possibly_removed_stores_) { DCHECK(store->IsInstanceFieldSet() || store->IsStaticFieldSet() || store->IsArraySet()); store->GetBlock()->RemoveInstruction(store); } // Eliminate allocations that are not used. - for (size_t i = 0, e = singleton_new_instances_.size(); i < e; i++) { - HInstruction* new_instance = singleton_new_instances_[i]; + for (HInstruction* new_instance : singleton_new_instances_) { if (!new_instance->HasNonEnvironmentUses()) { new_instance->RemoveEnvironmentUsers(); new_instance->GetBlock()->RemoveInstruction(new_instance); } } + for (HInstruction* new_array : singleton_new_arrays_) { + if (!new_array->HasNonEnvironmentUses()) { + new_array->RemoveEnvironmentUsers(); + new_array->GetBlock()->RemoveInstruction(new_array); + } + } } private: @@ -558,7 +586,7 @@ class LSEVisitor : public HGraphVisitor { void KeepIfIsStore(HInstruction* heap_value) { if (heap_value == kDefaultHeapValue || heap_value == kUnknownHeapValue || - !heap_value->IsInstanceFieldSet()) { + !(heap_value->IsInstanceFieldSet() || heap_value->IsArraySet())) { return; } auto idx = std::find(possibly_removed_stores_.begin(), @@ -734,13 +762,16 @@ class LSEVisitor : public HGraphVisitor { heap_values[idx] = constant; return; } - if (heap_value != kUnknownHeapValue && heap_value->IsInstanceFieldSet()) { - HInstruction* store = heap_value; - // This load must be from a singleton since it's from the same field - // that a "removed" store puts the value. That store must be to a singleton's field. - DCHECK(ref_info->IsSingleton()); - // Get the real heap value of the store. - heap_value = store->InputAt(1); + if (heap_value != kUnknownHeapValue) { + if (heap_value->IsInstanceFieldSet() || heap_value->IsArraySet()) { + HInstruction* store = heap_value; + // This load must be from a singleton since it's from the same + // field/element that a "removed" store puts the value. That store + // must be to a singleton's field/element. + DCHECK(ref_info->IsSingleton()); + // Get the real heap value of the store. + heap_value = heap_value->IsInstanceFieldSet() ? store->InputAt(1) : store->InputAt(2); + } } if (heap_value == kUnknownHeapValue) { // Load isn't eliminated. Put the load as the value into the HeapLocation. @@ -796,19 +827,19 @@ class LSEVisitor : public HGraphVisitor { if (Equal(heap_value, value)) { // Store into the heap location with the same value. same_value = true; - } else if (index != nullptr) { - // For array element, don't eliminate stores since it can be easily aliased - // with non-constant index. + } else if (index != nullptr && ref_info->HasIndexAliasing()) { + // For array element, don't eliminate stores if the index can be + // aliased. } else if (ref_info->IsSingletonAndRemovable()) { - // Store into a field of a singleton that's not returned. The value cannot be - // killed due to aliasing/invocation. It can be redundant since future loads can - // directly get the value set by this instruction. The value can still be killed due to - // merging or loop side effects. Stores whose values are killed due to merging/loop side - // effects later will be removed from possibly_removed_stores_ when that is detected. + // Store into a field/element of a singleton instance/array that's not returned. + // The value cannot be killed due to aliasing/invocation. It can be redundant since + // future loads can directly get the value set by this instruction. The value can + // still be killed due to merging or loop side effects. Stores whose values are + // killed due to merging/loop side effects later will be removed from + // possibly_removed_stores_ when that is detected. possibly_redundant = true; HNewInstance* new_instance = ref_info->GetReference()->AsNewInstance(); - DCHECK(new_instance != nullptr); - if (new_instance->IsFinalizable()) { + if (new_instance != nullptr && new_instance->IsFinalizable()) { // Finalizable objects escape globally. Need to keep the store. possibly_redundant = false; } else { @@ -834,7 +865,7 @@ class LSEVisitor : public HGraphVisitor { if (!same_value) { if (possibly_redundant) { - DCHECK(instruction->IsInstanceFieldSet()); + DCHECK(instruction->IsInstanceFieldSet() || instruction->IsArraySet()); // Put the store as the heap value. If the value is loaded from heap // by a load later, this store isn't really redundant. heap_values[idx] = instruction; @@ -995,6 +1026,27 @@ class LSEVisitor : public HGraphVisitor { } } + void VisitNewArray(HNewArray* new_array) OVERRIDE { + ReferenceInfo* ref_info = heap_location_collector_.FindReferenceInfoOf(new_array); + if (ref_info == nullptr) { + // new_array isn't used for array accesses. No need to process it. + return; + } + if (ref_info->IsSingletonAndRemovable()) { + singleton_new_arrays_.push_back(new_array); + } + ArenaVector<HInstruction*>& heap_values = + heap_values_for_[new_array->GetBlock()->GetBlockId()]; + for (size_t i = 0; i < heap_values.size(); i++) { + HeapLocation* location = heap_location_collector_.GetHeapLocation(i); + HInstruction* ref = location->GetReferenceInfo()->GetReference(); + if (ref == new_array && location->GetIndex() != nullptr) { + // Array elements are set to default heap values. + heap_values[i] = kDefaultHeapValue; + } + } + } + // Find an instruction's substitute if it should be removed. // Return the same instruction if it should not be removed. HInstruction* FindSubstitute(HInstruction* instruction) { @@ -1023,6 +1075,7 @@ class LSEVisitor : public HGraphVisitor { ArenaVector<HInstruction*> possibly_removed_stores_; ArenaVector<HInstruction*> singleton_new_instances_; + ArenaVector<HInstruction*> singleton_new_arrays_; DISALLOW_COPY_AND_ASSIGN(LSEVisitor); }; diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc index 26c9ab83c2..318d83bf40 100644 --- a/compiler/optimizing/loop_optimization.cc +++ b/compiler/optimizing/loop_optimization.cc @@ -16,6 +16,7 @@ #include "loop_optimization.h" +#include "driver/compiler_driver.h" #include "linear_order.h" namespace art { @@ -57,8 +58,10 @@ static bool IsEarlyExit(HLoopInformation* loop_info) { // HLoopOptimization::HLoopOptimization(HGraph* graph, + CompilerDriver* compiler_driver, HInductionVarAnalysis* induction_analysis) : HOptimization(graph, kLoopOptimizationPassName), + compiler_driver_(compiler_driver), induction_range_(induction_analysis), loop_allocator_(nullptr), top_loop_(nullptr), diff --git a/compiler/optimizing/loop_optimization.h b/compiler/optimizing/loop_optimization.h index 9ddab4150c..0b798fc7a9 100644 --- a/compiler/optimizing/loop_optimization.h +++ b/compiler/optimizing/loop_optimization.h @@ -23,13 +23,17 @@ namespace art { +class CompilerDriver; + /** * Loop optimizations. Builds a loop hierarchy and applies optimizations to * the detected nested loops, such as removal of dead induction and empty loops. */ class HLoopOptimization : public HOptimization { public: - HLoopOptimization(HGraph* graph, HInductionVarAnalysis* induction_analysis); + HLoopOptimization(HGraph* graph, + CompilerDriver* compiler_driver, + HInductionVarAnalysis* induction_analysis); void Run() OVERRIDE; @@ -76,6 +80,9 @@ class HLoopOptimization : public HOptimization { bool TryReplaceWithLastValue(HInstruction* instruction, HBasicBlock* block); void RemoveDeadInstructions(const HInstructionList& list); + // Compiler driver (to query ISA features). + const CompilerDriver* compiler_driver_; + // Range information based on prior induction variable analysis. InductionVarRange induction_range_; diff --git a/compiler/optimizing/loop_optimization_test.cc b/compiler/optimizing/loop_optimization_test.cc index 9a6b4935b2..5b9350689e 100644 --- a/compiler/optimizing/loop_optimization_test.cc +++ b/compiler/optimizing/loop_optimization_test.cc @@ -31,7 +31,7 @@ class LoopOptimizationTest : public CommonCompilerTest { allocator_(&pool_), graph_(CreateGraph(&allocator_)), iva_(new (&allocator_) HInductionVarAnalysis(graph_)), - loop_opt_(new (&allocator_) HLoopOptimization(graph_, iva_)) { + loop_opt_(new (&allocator_) HLoopOptimization(graph_, nullptr, iva_)) { BuildGraph(); } diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 8a9e61875a..c39aed2c6a 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -1914,6 +1914,9 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> { virtual bool IsControlFlow() const { return false; } + // Can the instruction throw? + // TODO: We should rename to CanVisiblyThrow, as some instructions (like HNewInstance), + // could throw OOME, but it is still OK to remove them if they are unused. virtual bool CanThrow() const { return false; } bool CanThrowIntoCatchBlock() const { return CanThrow() && block_->IsTryBlock(); } diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index f72bd6a5a3..607b9433ae 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -56,6 +56,7 @@ #include "builder.h" #include "cha_guard_optimization.h" #include "code_generator.h" +#include "code_sinking.h" #include "compiled_method.h" #include "compiler.h" #include "constant_folding.h" @@ -518,9 +519,11 @@ static HOptimization* BuildOptimization( } else if (opt_name == SideEffectsAnalysis::kSideEffectsAnalysisPassName) { return new (arena) SideEffectsAnalysis(graph); } else if (opt_name == HLoopOptimization::kLoopOptimizationPassName) { - return new (arena) HLoopOptimization(graph, most_recent_induction); + return new (arena) HLoopOptimization(graph, driver, most_recent_induction); } else if (opt_name == CHAGuardOptimization::kCHAGuardOptimizationPassName) { return new (arena) CHAGuardOptimization(graph); + } else if (opt_name == CodeSinking::kCodeSinkingPassName) { + return new (arena) CodeSinking(graph, stats); #ifdef ART_ENABLE_CODEGEN_arm } else if (opt_name == arm::DexCacheArrayFixups::kDexCacheArrayFixupsArmPassName) { return new (arena) arm::DexCacheArrayFixups(graph, codegen, stats); @@ -770,13 +773,16 @@ void OptimizingCompiler::RunOptimizations(HGraph* graph, HConstantFolding* fold2 = new (arena) HConstantFolding( graph, "constant_folding$after_inlining"); HConstantFolding* fold3 = new (arena) HConstantFolding(graph, "constant_folding$after_bce"); - SideEffectsAnalysis* side_effects = new (arena) SideEffectsAnalysis(graph); - GVNOptimization* gvn = new (arena) GVNOptimization(graph, *side_effects); - LICM* licm = new (arena) LICM(graph, *side_effects, stats); - LoadStoreElimination* lse = new (arena) LoadStoreElimination(graph, *side_effects); + SideEffectsAnalysis* side_effects1 = new (arena) SideEffectsAnalysis( + graph, "side_effects$before_gvn"); + SideEffectsAnalysis* side_effects2 = new (arena) SideEffectsAnalysis( + graph, "side_effects$before_lse"); + GVNOptimization* gvn = new (arena) GVNOptimization(graph, *side_effects1); + LICM* licm = new (arena) LICM(graph, *side_effects1, stats); HInductionVarAnalysis* induction = new (arena) HInductionVarAnalysis(graph); - BoundsCheckElimination* bce = new (arena) BoundsCheckElimination(graph, *side_effects, induction); - HLoopOptimization* loop = new (arena) HLoopOptimization(graph, induction); + BoundsCheckElimination* bce = new (arena) BoundsCheckElimination(graph, *side_effects1, induction); + HLoopOptimization* loop = new (arena) HLoopOptimization(graph, driver, induction); + LoadStoreElimination* lse = new (arena) LoadStoreElimination(graph, *side_effects2); HSharpening* sharpening = new (arena) HSharpening( graph, codegen, dex_compilation_unit, driver, handles); InstructionSimplifier* simplify2 = new (arena) InstructionSimplifier( @@ -787,6 +793,7 @@ void OptimizingCompiler::RunOptimizations(HGraph* graph, graph, stats, "instruction_simplifier$before_codegen"); IntrinsicsRecognizer* intrinsics = new (arena) IntrinsicsRecognizer(graph, stats); CHAGuardOptimization* cha_guard = new (arena) CHAGuardOptimization(graph); + CodeSinking* code_sinking = new (arena) CodeSinking(graph, stats); HOptimization* optimizations1[] = { intrinsics, @@ -806,7 +813,7 @@ void OptimizingCompiler::RunOptimizations(HGraph* graph, fold2, // TODO: if we don't inline we can also skip fold2. simplify2, dce2, - side_effects, + side_effects1, gvn, licm, induction, @@ -814,9 +821,11 @@ void OptimizingCompiler::RunOptimizations(HGraph* graph, loop, fold3, // evaluates code generated by dynamic bce simplify3, + side_effects2, lse, cha_guard, dce3, + code_sinking, // The codegen has a few assumptions that only the instruction simplifier // can satisfy. For example, the code generator does not expect to see a // HTypeConversion from a type to the same type. diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h index 203b1ec7ec..ae9a8119a7 100644 --- a/compiler/optimizing/optimizing_compiler_stats.h +++ b/compiler/optimizing/optimizing_compiler_stats.h @@ -17,6 +17,7 @@ #ifndef ART_COMPILER_OPTIMIZING_OPTIMIZING_COMPILER_STATS_H_ #define ART_COMPILER_OPTIMIZING_OPTIMIZING_COMPILER_STATS_H_ +#include <atomic> #include <iomanip> #include <string> #include <type_traits> @@ -67,14 +68,18 @@ enum MethodCompilationStat { kImplicitNullCheckGenerated, kExplicitNullCheckGenerated, kSimplifyIf, + kInstructionSunk, kLastStat }; class OptimizingCompilerStats { public: - OptimizingCompilerStats() {} + OptimizingCompilerStats() { + // The std::atomic<> default constructor leaves values uninitialized, so initialize them now. + Reset(); + } - void RecordStat(MethodCompilationStat stat, size_t count = 1) { + void RecordStat(MethodCompilationStat stat, uint32_t count = 1) { compile_stats_[stat] += count; } @@ -93,7 +98,7 @@ class OptimizingCompilerStats { << " methods: " << std::fixed << std::setprecision(2) << compiled_percent << "% (" << compile_stats_[kCompiled] << ") compiled."; - for (int i = 0; i < kLastStat; i++) { + for (size_t i = 0; i < kLastStat; i++) { if (compile_stats_[i] != 0) { LOG(INFO) << PrintMethodCompilationStat(static_cast<MethodCompilationStat>(i)) << ": " << compile_stats_[i]; @@ -102,6 +107,21 @@ class OptimizingCompilerStats { } } + void AddTo(OptimizingCompilerStats* other_stats) { + for (size_t i = 0; i != kLastStat; ++i) { + uint32_t count = compile_stats_[i]; + if (count != 0) { + other_stats->RecordStat(static_cast<MethodCompilationStat>(i), count); + } + } + } + + void Reset() { + for (size_t i = 0; i != kLastStat; ++i) { + compile_stats_[i] = 0u; + } + } + private: std::string PrintMethodCompilationStat(MethodCompilationStat stat) const { std::string name; @@ -147,6 +167,7 @@ class OptimizingCompilerStats { case kImplicitNullCheckGenerated: name = "ImplicitNullCheckGenerated"; break; case kExplicitNullCheckGenerated: name = "ExplicitNullCheckGenerated"; break; case kSimplifyIf: name = "SimplifyIf"; break; + case kInstructionSunk: name = "InstructionSunk"; break; case kLastStat: LOG(FATAL) << "invalid stat " @@ -156,7 +177,7 @@ class OptimizingCompilerStats { return "OptStat#" + name; } - AtomicInteger compile_stats_[kLastStat]; + std::atomic<uint32_t> compile_stats_[kLastStat]; DISALLOW_COPY_AND_ASSIGN(OptimizingCompilerStats); }; diff --git a/compiler/optimizing/side_effects_analysis.h b/compiler/optimizing/side_effects_analysis.h index bac6088bf7..fea47e66d9 100644 --- a/compiler/optimizing/side_effects_analysis.h +++ b/compiler/optimizing/side_effects_analysis.h @@ -25,8 +25,8 @@ namespace art { class SideEffectsAnalysis : public HOptimization { public: - explicit SideEffectsAnalysis(HGraph* graph) - : HOptimization(graph, kSideEffectsAnalysisPassName), + SideEffectsAnalysis(HGraph* graph, const char* pass_name = kSideEffectsAnalysisPassName) + : HOptimization(graph, pass_name), graph_(graph), block_effects_(graph->GetBlocks().size(), graph->GetArena()->Adapter(kArenaAllocSideEffectsAnalysis)), @@ -41,7 +41,7 @@ class SideEffectsAnalysis : public HOptimization { bool HasRun() const { return has_run_; } - static constexpr const char* kSideEffectsAnalysisPassName = "SideEffects"; + static constexpr const char* kSideEffectsAnalysisPassName = "side_effects"; private: void UpdateLoopEffects(HLoopInformation* info, SideEffects effects); diff --git a/compiler/utils/arm/assembler_arm_vixl.h b/compiler/utils/arm/assembler_arm_vixl.h index 322f6c4d70..e81e767575 100644 --- a/compiler/utils/arm/assembler_arm_vixl.h +++ b/compiler/utils/arm/assembler_arm_vixl.h @@ -135,6 +135,16 @@ class ArmVIXLMacroAssembler FINAL : public vixl32::MacroAssembler { // jumping within 2KB range. For B(cond, label), because the supported branch range is 256 // bytes; we use the far_target hint to try to use 16-bit T1 encoding for short range jumps. void B(vixl32::Condition cond, vixl32::Label* label, bool is_far_target = true); + + // Use literal for generating double constant if it doesn't fit VMOV encoding. + void Vmov(vixl32::DRegister rd, double imm) { + if (vixl::VFP::IsImmFP64(imm)) { + MacroAssembler::Vmov(rd, imm); + } else { + MacroAssembler::Vldr(rd, imm); + } + } + using MacroAssembler::Vmov; }; class ArmVIXLAssembler FINAL : public Assembler { diff --git a/compiler/utils/mips/assembler_mips.cc b/compiler/utils/mips/assembler_mips.cc index 5e83e825ed..2e2231b07d 100644 --- a/compiler/utils/mips/assembler_mips.cc +++ b/compiler/utils/mips/assembler_mips.cc @@ -3475,8 +3475,8 @@ void MipsAssembler::LoadRef(ManagedRegister mdest, ManagedRegister base, MemberO CHECK(dest.IsCoreRegister() && base.AsMips().IsCoreRegister()); LoadFromOffset(kLoadWord, dest.AsCoreRegister(), base.AsMips().AsCoreRegister(), offs.Int32Value()); - if (kPoisonHeapReferences && unpoison_reference) { - Subu(dest.AsCoreRegister(), ZERO, dest.AsCoreRegister()); + if (unpoison_reference) { + MaybeUnpoisonHeapReference(dest.AsCoreRegister()); } } diff --git a/compiler/utils/mips/assembler_mips.h b/compiler/utils/mips/assembler_mips.h index 2fca185ec3..1a5a23d10b 100644 --- a/compiler/utils/mips/assembler_mips.h +++ b/compiler/utils/mips/assembler_mips.h @@ -501,8 +501,10 @@ class MipsAssembler FINAL : public Assembler, public JNIMacroAssembler<PointerSi bool is_float = false); private: + // This will be used as an argument for loads/stores + // when there is no need for implicit null checks. struct NoImplicitNullChecker { - void operator()() {} + void operator()() const {} }; public: @@ -727,6 +729,38 @@ class MipsAssembler FINAL : public Assembler, public JNIMacroAssembler<PointerSi void Pop(Register rd); void PopAndReturn(Register rd, Register rt); + // + // Heap poisoning. + // + + // Poison a heap reference contained in `src` and store it in `dst`. + void PoisonHeapReference(Register dst, Register src) { + // dst = -src. + Subu(dst, ZERO, src); + } + // Poison a heap reference contained in `reg`. + void PoisonHeapReference(Register reg) { + // reg = -reg. + PoisonHeapReference(reg, reg); + } + // Unpoison a heap reference contained in `reg`. + void UnpoisonHeapReference(Register reg) { + // reg = -reg. + Subu(reg, ZERO, reg); + } + // Poison a heap reference contained in `reg` if heap poisoning is enabled. + void MaybePoisonHeapReference(Register reg) { + if (kPoisonHeapReferences) { + PoisonHeapReference(reg); + } + } + // Unpoison a heap reference contained in `reg` if heap poisoning is enabled. + void MaybeUnpoisonHeapReference(Register reg) { + if (kPoisonHeapReferences) { + UnpoisonHeapReference(reg); + } + } + void Bind(Label* label) OVERRIDE { Bind(down_cast<MipsLabel*>(label)); } diff --git a/compiler/utils/mips64/assembler_mips64.cc b/compiler/utils/mips64/assembler_mips64.cc index 998f2c709b..39eb5893d8 100644 --- a/compiler/utils/mips64/assembler_mips64.cc +++ b/compiler/utils/mips64/assembler_mips64.cc @@ -488,6 +488,11 @@ void Mips64Assembler::Aui(GpuRegister rt, GpuRegister rs, uint16_t imm16) { EmitI(0xf, rs, rt, imm16); } +void Mips64Assembler::Daui(GpuRegister rt, GpuRegister rs, uint16_t imm16) { + CHECK_NE(rs, ZERO); + EmitI(0x1d, rs, rt, imm16); +} + void Mips64Assembler::Dahi(GpuRegister rs, uint16_t imm16) { EmitI(1, rs, static_cast<GpuRegister>(6), imm16); } @@ -2015,80 +2020,18 @@ void Mips64Assembler::Bc1nez(FpuRegister ft, Mips64Label* label) { Bcond(label, kCondT, static_cast<GpuRegister>(ft), ZERO); } -void Mips64Assembler::LoadFromOffset(LoadOperandType type, GpuRegister reg, GpuRegister base, +void Mips64Assembler::LoadFromOffset(LoadOperandType type, + GpuRegister reg, + GpuRegister base, int32_t offset) { - if (!IsInt<16>(offset) || - (type == kLoadDoubleword && !IsAligned<kMips64DoublewordSize>(offset) && - !IsInt<16>(static_cast<int32_t>(offset + kMips64WordSize)))) { - LoadConst32(AT, offset & ~(kMips64DoublewordSize - 1)); - Daddu(AT, AT, base); - base = AT; - offset &= (kMips64DoublewordSize - 1); - } - - switch (type) { - case kLoadSignedByte: - Lb(reg, base, offset); - break; - case kLoadUnsignedByte: - Lbu(reg, base, offset); - break; - case kLoadSignedHalfword: - Lh(reg, base, offset); - break; - case kLoadUnsignedHalfword: - Lhu(reg, base, offset); - break; - case kLoadWord: - CHECK_ALIGNED(offset, kMips64WordSize); - Lw(reg, base, offset); - break; - case kLoadUnsignedWord: - CHECK_ALIGNED(offset, kMips64WordSize); - Lwu(reg, base, offset); - break; - case kLoadDoubleword: - if (!IsAligned<kMips64DoublewordSize>(offset)) { - CHECK_ALIGNED(offset, kMips64WordSize); - Lwu(reg, base, offset); - Lwu(TMP2, base, offset + kMips64WordSize); - Dinsu(reg, TMP2, 32, 32); - } else { - Ld(reg, base, offset); - } - break; - } + LoadFromOffset<>(type, reg, base, offset); } -void Mips64Assembler::LoadFpuFromOffset(LoadOperandType type, FpuRegister reg, GpuRegister base, +void Mips64Assembler::LoadFpuFromOffset(LoadOperandType type, + FpuRegister reg, + GpuRegister base, int32_t offset) { - if (!IsInt<16>(offset) || - (type == kLoadDoubleword && !IsAligned<kMips64DoublewordSize>(offset) && - !IsInt<16>(static_cast<int32_t>(offset + kMips64WordSize)))) { - LoadConst32(AT, offset & ~(kMips64DoublewordSize - 1)); - Daddu(AT, AT, base); - base = AT; - offset &= (kMips64DoublewordSize - 1); - } - - switch (type) { - case kLoadWord: - CHECK_ALIGNED(offset, kMips64WordSize); - Lwc1(reg, base, offset); - break; - case kLoadDoubleword: - if (!IsAligned<kMips64DoublewordSize>(offset)) { - CHECK_ALIGNED(offset, kMips64WordSize); - Lwc1(reg, base, offset); - Lw(TMP2, base, offset + kMips64WordSize); - Mthc1(TMP2, reg); - } else { - Ldc1(reg, base, offset); - } - break; - default: - LOG(FATAL) << "UNREACHABLE"; - } + LoadFpuFromOffset<>(type, reg, base, offset); } void Mips64Assembler::EmitLoad(ManagedRegister m_dst, GpuRegister src_register, int32_t src_offset, @@ -2118,72 +2061,18 @@ void Mips64Assembler::EmitLoad(ManagedRegister m_dst, GpuRegister src_register, } } -void Mips64Assembler::StoreToOffset(StoreOperandType type, GpuRegister reg, GpuRegister base, +void Mips64Assembler::StoreToOffset(StoreOperandType type, + GpuRegister reg, + GpuRegister base, int32_t offset) { - if (!IsInt<16>(offset) || - (type == kStoreDoubleword && !IsAligned<kMips64DoublewordSize>(offset) && - !IsInt<16>(static_cast<int32_t>(offset + kMips64WordSize)))) { - LoadConst32(AT, offset & ~(kMips64DoublewordSize - 1)); - Daddu(AT, AT, base); - base = AT; - offset &= (kMips64DoublewordSize - 1); - } - - switch (type) { - case kStoreByte: - Sb(reg, base, offset); - break; - case kStoreHalfword: - Sh(reg, base, offset); - break; - case kStoreWord: - CHECK_ALIGNED(offset, kMips64WordSize); - Sw(reg, base, offset); - break; - case kStoreDoubleword: - if (!IsAligned<kMips64DoublewordSize>(offset)) { - CHECK_ALIGNED(offset, kMips64WordSize); - Sw(reg, base, offset); - Dsrl32(TMP2, reg, 0); - Sw(TMP2, base, offset + kMips64WordSize); - } else { - Sd(reg, base, offset); - } - break; - default: - LOG(FATAL) << "UNREACHABLE"; - } + StoreToOffset<>(type, reg, base, offset); } -void Mips64Assembler::StoreFpuToOffset(StoreOperandType type, FpuRegister reg, GpuRegister base, +void Mips64Assembler::StoreFpuToOffset(StoreOperandType type, + FpuRegister reg, + GpuRegister base, int32_t offset) { - if (!IsInt<16>(offset) || - (type == kStoreDoubleword && !IsAligned<kMips64DoublewordSize>(offset) && - !IsInt<16>(static_cast<int32_t>(offset + kMips64WordSize)))) { - LoadConst32(AT, offset & ~(kMips64DoublewordSize - 1)); - Daddu(AT, AT, base); - base = AT; - offset &= (kMips64DoublewordSize - 1); - } - - switch (type) { - case kStoreWord: - CHECK_ALIGNED(offset, kMips64WordSize); - Swc1(reg, base, offset); - break; - case kStoreDoubleword: - if (!IsAligned<kMips64DoublewordSize>(offset)) { - CHECK_ALIGNED(offset, kMips64WordSize); - Mfhc1(TMP2, reg); - Swc1(reg, base, offset); - Sw(TMP2, base, offset + kMips64WordSize); - } else { - Sdc1(reg, base, offset); - } - break; - default: - LOG(FATAL) << "UNREACHABLE"; - } + StoreFpuToOffset<>(type, reg, base, offset); } static dwarf::Reg DWARFReg(GpuRegister reg) { @@ -2367,12 +2256,8 @@ void Mips64Assembler::LoadRef(ManagedRegister mdest, ManagedRegister base, Membe CHECK(dest.IsGpuRegister() && base.AsMips64().IsGpuRegister()); LoadFromOffset(kLoadUnsignedWord, dest.AsGpuRegister(), base.AsMips64().AsGpuRegister(), offs.Int32Value()); - if (kPoisonHeapReferences && unpoison_reference) { - // TODO: review - // Negate the 32-bit ref - Dsubu(dest.AsGpuRegister(), ZERO, dest.AsGpuRegister()); - // And constrain it to 32 bits (zero-extend into bits 32 through 63) as on Arm64 and x86/64 - Dext(dest.AsGpuRegister(), dest.AsGpuRegister(), 0, 32); + if (unpoison_reference) { + MaybeUnpoisonHeapReference(dest.AsGpuRegister()); } } diff --git a/compiler/utils/mips64/assembler_mips64.h b/compiler/utils/mips64/assembler_mips64.h index a0a1db634d..8bbe862d19 100644 --- a/compiler/utils/mips64/assembler_mips64.h +++ b/compiler/utils/mips64/assembler_mips64.h @@ -512,6 +512,7 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer void Ldpc(GpuRegister rs, uint32_t imm18); // MIPS64 void Lui(GpuRegister rt, uint16_t imm16); void Aui(GpuRegister rt, GpuRegister rs, uint16_t imm16); + void Daui(GpuRegister rt, GpuRegister rs, uint16_t imm16); // MIPS64 void Dahi(GpuRegister rs, uint16_t imm16); // MIPS64 void Dati(GpuRegister rs, uint16_t imm16); // MIPS64 void Sync(uint32_t stype); @@ -654,6 +655,44 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer void Addiu32(GpuRegister rt, GpuRegister rs, int32_t value); void Daddiu64(GpuRegister rt, GpuRegister rs, int64_t value, GpuRegister rtmp = AT); // MIPS64 + // + // Heap poisoning. + // + + // Poison a heap reference contained in `src` and store it in `dst`. + void PoisonHeapReference(GpuRegister dst, GpuRegister src) { + // dst = -src. + // Negate the 32-bit ref. + Dsubu(dst, ZERO, src); + // And constrain it to 32 bits (zero-extend into bits 32 through 63) as on Arm64 and x86/64. + Dext(dst, dst, 0, 32); + } + // Poison a heap reference contained in `reg`. + void PoisonHeapReference(GpuRegister reg) { + // reg = -reg. + PoisonHeapReference(reg, reg); + } + // Unpoison a heap reference contained in `reg`. + void UnpoisonHeapReference(GpuRegister reg) { + // reg = -reg. + // Negate the 32-bit ref. + Dsubu(reg, ZERO, reg); + // And constrain it to 32 bits (zero-extend into bits 32 through 63) as on Arm64 and x86/64. + Dext(reg, reg, 0, 32); + } + // Poison a heap reference contained in `reg` if heap poisoning is enabled. + void MaybePoisonHeapReference(GpuRegister reg) { + if (kPoisonHeapReferences) { + PoisonHeapReference(reg); + } + } + // Unpoison a heap reference contained in `reg` if heap poisoning is enabled. + void MaybeUnpoisonHeapReference(GpuRegister reg) { + if (kPoisonHeapReferences) { + UnpoisonHeapReference(reg); + } + } + void Bind(Label* label) OVERRIDE { Bind(down_cast<Mips64Label*>(label)); } @@ -733,6 +772,191 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer void Bc1nez(FpuRegister ft, Mips64Label* label); void EmitLoad(ManagedRegister m_dst, GpuRegister src_register, int32_t src_offset, size_t size); + + private: + // This will be used as an argument for loads/stores + // when there is no need for implicit null checks. + struct NoImplicitNullChecker { + void operator()() const {} + }; + + public: + template <typename ImplicitNullChecker = NoImplicitNullChecker> + void LoadFromOffset(LoadOperandType type, + GpuRegister reg, + GpuRegister base, + int32_t offset, + ImplicitNullChecker null_checker = NoImplicitNullChecker()) { + if (!IsInt<16>(offset) || + (type == kLoadDoubleword && !IsAligned<kMips64DoublewordSize>(offset) && + !IsInt<16>(static_cast<int32_t>(offset + kMips64WordSize)))) { + LoadConst32(AT, offset & ~(kMips64DoublewordSize - 1)); + Daddu(AT, AT, base); + base = AT; + offset &= (kMips64DoublewordSize - 1); + } + + switch (type) { + case kLoadSignedByte: + Lb(reg, base, offset); + break; + case kLoadUnsignedByte: + Lbu(reg, base, offset); + break; + case kLoadSignedHalfword: + Lh(reg, base, offset); + break; + case kLoadUnsignedHalfword: + Lhu(reg, base, offset); + break; + case kLoadWord: + CHECK_ALIGNED(offset, kMips64WordSize); + Lw(reg, base, offset); + break; + case kLoadUnsignedWord: + CHECK_ALIGNED(offset, kMips64WordSize); + Lwu(reg, base, offset); + break; + case kLoadDoubleword: + if (!IsAligned<kMips64DoublewordSize>(offset)) { + CHECK_ALIGNED(offset, kMips64WordSize); + Lwu(reg, base, offset); + null_checker(); + Lwu(TMP2, base, offset + kMips64WordSize); + Dinsu(reg, TMP2, 32, 32); + } else { + Ld(reg, base, offset); + null_checker(); + } + break; + } + if (type != kLoadDoubleword) { + null_checker(); + } + } + + template <typename ImplicitNullChecker = NoImplicitNullChecker> + void LoadFpuFromOffset(LoadOperandType type, + FpuRegister reg, + GpuRegister base, + int32_t offset, + ImplicitNullChecker null_checker = NoImplicitNullChecker()) { + if (!IsInt<16>(offset) || + (type == kLoadDoubleword && !IsAligned<kMips64DoublewordSize>(offset) && + !IsInt<16>(static_cast<int32_t>(offset + kMips64WordSize)))) { + LoadConst32(AT, offset & ~(kMips64DoublewordSize - 1)); + Daddu(AT, AT, base); + base = AT; + offset &= (kMips64DoublewordSize - 1); + } + + switch (type) { + case kLoadWord: + CHECK_ALIGNED(offset, kMips64WordSize); + Lwc1(reg, base, offset); + null_checker(); + break; + case kLoadDoubleword: + if (!IsAligned<kMips64DoublewordSize>(offset)) { + CHECK_ALIGNED(offset, kMips64WordSize); + Lwc1(reg, base, offset); + null_checker(); + Lw(TMP2, base, offset + kMips64WordSize); + Mthc1(TMP2, reg); + } else { + Ldc1(reg, base, offset); + null_checker(); + } + break; + default: + LOG(FATAL) << "UNREACHABLE"; + } + } + + template <typename ImplicitNullChecker = NoImplicitNullChecker> + void StoreToOffset(StoreOperandType type, + GpuRegister reg, + GpuRegister base, + int32_t offset, + ImplicitNullChecker null_checker = NoImplicitNullChecker()) { + if (!IsInt<16>(offset) || + (type == kStoreDoubleword && !IsAligned<kMips64DoublewordSize>(offset) && + !IsInt<16>(static_cast<int32_t>(offset + kMips64WordSize)))) { + LoadConst32(AT, offset & ~(kMips64DoublewordSize - 1)); + Daddu(AT, AT, base); + base = AT; + offset &= (kMips64DoublewordSize - 1); + } + + switch (type) { + case kStoreByte: + Sb(reg, base, offset); + break; + case kStoreHalfword: + Sh(reg, base, offset); + break; + case kStoreWord: + CHECK_ALIGNED(offset, kMips64WordSize); + Sw(reg, base, offset); + break; + case kStoreDoubleword: + if (!IsAligned<kMips64DoublewordSize>(offset)) { + CHECK_ALIGNED(offset, kMips64WordSize); + Sw(reg, base, offset); + null_checker(); + Dsrl32(TMP2, reg, 0); + Sw(TMP2, base, offset + kMips64WordSize); + } else { + Sd(reg, base, offset); + null_checker(); + } + break; + default: + LOG(FATAL) << "UNREACHABLE"; + } + if (type != kStoreDoubleword) { + null_checker(); + } + } + + template <typename ImplicitNullChecker = NoImplicitNullChecker> + void StoreFpuToOffset(StoreOperandType type, + FpuRegister reg, + GpuRegister base, + int32_t offset, + ImplicitNullChecker null_checker = NoImplicitNullChecker()) { + if (!IsInt<16>(offset) || + (type == kStoreDoubleword && !IsAligned<kMips64DoublewordSize>(offset) && + !IsInt<16>(static_cast<int32_t>(offset + kMips64WordSize)))) { + LoadConst32(AT, offset & ~(kMips64DoublewordSize - 1)); + Daddu(AT, AT, base); + base = AT; + offset &= (kMips64DoublewordSize - 1); + } + + switch (type) { + case kStoreWord: + CHECK_ALIGNED(offset, kMips64WordSize); + Swc1(reg, base, offset); + null_checker(); + break; + case kStoreDoubleword: + if (!IsAligned<kMips64DoublewordSize>(offset)) { + CHECK_ALIGNED(offset, kMips64WordSize); + Mfhc1(TMP2, reg); + Swc1(reg, base, offset); + null_checker(); + Sw(TMP2, base, offset + kMips64WordSize); + } else { + Sdc1(reg, base, offset); + null_checker(); + } + break; + default: + LOG(FATAL) << "UNREACHABLE"; + } + } + void LoadFromOffset(LoadOperandType type, GpuRegister reg, GpuRegister base, int32_t offset); void LoadFpuFromOffset(LoadOperandType type, FpuRegister reg, GpuRegister base, int32_t offset); void StoreToOffset(StoreOperandType type, GpuRegister reg, GpuRegister base, int32_t offset); diff --git a/compiler/utils/mips64/assembler_mips64_test.cc b/compiler/utils/mips64/assembler_mips64_test.cc index 74b8f068c1..96a02c46d7 100644 --- a/compiler/utils/mips64/assembler_mips64_test.cc +++ b/compiler/utils/mips64/assembler_mips64_test.cc @@ -1269,6 +1269,24 @@ TEST_F(AssemblerMIPS64Test, Lui) { DriverStr(RepeatRIb(&mips64::Mips64Assembler::Lui, 16, "lui ${reg}, {imm}"), "lui"); } +TEST_F(AssemblerMIPS64Test, Daui) { + std::vector<mips64::GpuRegister*> reg1_registers = GetRegisters(); + std::vector<mips64::GpuRegister*> reg2_registers = GetRegisters(); + reg2_registers.erase(reg2_registers.begin()); // reg2 can't be ZERO, remove it. + std::vector<int64_t> imms = CreateImmediateValuesBits(/* imm_bits */ 16, /* as_uint */ true); + WarnOnCombinations(reg1_registers.size() * reg2_registers.size() * imms.size()); + std::ostringstream expected; + for (mips64::GpuRegister* reg1 : reg1_registers) { + for (mips64::GpuRegister* reg2 : reg2_registers) { + for (int64_t imm : imms) { + __ Daui(*reg1, *reg2, imm); + expected << "daui $" << *reg1 << ", $" << *reg2 << ", " << imm << "\n"; + } + } + } + DriverStr(expected.str(), "daui"); +} + TEST_F(AssemblerMIPS64Test, Dahi) { DriverStr(RepeatRIb(&mips64::Mips64Assembler::Dahi, 16, "dahi ${reg}, ${reg}, {imm}"), "dahi"); } diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc index 6eab302dab..6a57f45e42 100644 --- a/compiler/utils/x86/assembler_x86.cc +++ b/compiler/utils/x86/assembler_x86.cc @@ -958,6 +958,14 @@ void X86Assembler::cvtsd2ss(XmmRegister dst, XmmRegister src) { } +void X86Assembler::cvtdq2ps(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x0F); + EmitUint8(0x5B); + EmitXmmRegisterOperand(dst, src); +} + + void X86Assembler::cvtdq2pd(XmmRegister dst, XmmRegister src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0xF3); diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h index 2999599fc5..e3c123ccaf 100644 --- a/compiler/utils/x86/assembler_x86.h +++ b/compiler/utils/x86/assembler_x86.h @@ -457,6 +457,7 @@ class X86Assembler FINAL : public Assembler { void cvttss2si(Register dst, XmmRegister src); void cvttsd2si(Register dst, XmmRegister src); + void cvtdq2ps(XmmRegister dst, XmmRegister src); void cvtdq2pd(XmmRegister dst, XmmRegister src); void comiss(XmmRegister a, XmmRegister b); diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc index a74bea207e..110d0dcd05 100644 --- a/compiler/utils/x86/assembler_x86_test.cc +++ b/compiler/utils/x86/assembler_x86_test.cc @@ -322,6 +322,14 @@ TEST_F(AssemblerX86Test, RollImm) { DriverStr(RepeatRI(&x86::X86Assembler::roll, 1U, "roll ${imm}, %{reg}"), "rolli"); } +TEST_F(AssemblerX86Test, Cvtdq2ps) { + DriverStr(RepeatFF(&x86::X86Assembler::cvtdq2ps, "cvtdq2ps %{reg2}, %{reg1}"), "cvtdq2ps"); +} + +TEST_F(AssemblerX86Test, Cvtdq2pd) { + DriverStr(RepeatFF(&x86::X86Assembler::cvtdq2pd, "cvtdq2pd %{reg2}, %{reg1}"), "cvtdq2pd"); +} + TEST_F(AssemblerX86Test, ComissAddr) { GetAssembler()->comiss(x86::XmmRegister(x86::XMM0), x86::Address(x86::EAX, 0)); const char* expected = "comiss 0(%EAX), %xmm0\n"; diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc index 458204aca9..688fdcc37d 100644 --- a/compiler/utils/x86_64/assembler_x86_64.cc +++ b/compiler/utils/x86_64/assembler_x86_64.cc @@ -1153,6 +1153,15 @@ void X86_64Assembler::cvtsd2ss(XmmRegister dst, const Address& src) { } +void X86_64Assembler::cvtdq2ps(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0x5B); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + + void X86_64Assembler::cvtdq2pd(XmmRegister dst, XmmRegister src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0xF3); diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h index 0dc11d840b..480e7116eb 100644 --- a/compiler/utils/x86_64/assembler_x86_64.h +++ b/compiler/utils/x86_64/assembler_x86_64.h @@ -486,6 +486,7 @@ class X86_64Assembler FINAL : public Assembler { void cvttsd2si(CpuRegister dst, XmmRegister src); // Note: this is the r32 version. void cvttsd2si(CpuRegister dst, XmmRegister src, bool is64bit); + void cvtdq2ps(XmmRegister dst, XmmRegister src); void cvtdq2pd(XmmRegister dst, XmmRegister src); void comiss(XmmRegister a, XmmRegister b); diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc index fe9449720f..ba011c968e 100644 --- a/compiler/utils/x86_64/assembler_x86_64_test.cc +++ b/compiler/utils/x86_64/assembler_x86_64_test.cc @@ -1205,6 +1205,10 @@ TEST_F(AssemblerX86_64Test, Cvtsd2ss) { DriverStr(RepeatFF(&x86_64::X86_64Assembler::cvtsd2ss, "cvtsd2ss %{reg2}, %{reg1}"), "cvtsd2ss"); } +TEST_F(AssemblerX86_64Test, Cvtdq2ps) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::cvtdq2ps, "cvtdq2ps %{reg2}, %{reg1}"), "cvtdq2ps"); +} + TEST_F(AssemblerX86_64Test, Cvtdq2pd) { DriverStr(RepeatFF(&x86_64::X86_64Assembler::cvtdq2pd, "cvtdq2pd %{reg2}, %{reg1}"), "cvtdq2pd"); } |