diff options
74 files changed, 3072 insertions, 748 deletions
diff --git a/compiler/Android.bp b/compiler/Android.bp index 7fb009adc0..2556178ddf 100644 --- a/compiler/Android.bp +++ b/compiler/Android.bp @@ -63,6 +63,7 @@ art_cc_defaults { "optimizing/licm.cc", "optimizing/load_store_elimination.cc", "optimizing/locations.cc", + "optimizing/loop_optimization.cc", "optimizing/nodes.cc", "optimizing/optimization.cc", "optimizing/optimizing_compiler.cc", @@ -318,6 +319,7 @@ art_cc_test { "optimizing/induction_var_range_test.cc", "optimizing/licm_test.cc", "optimizing/live_interval_test.cc", + "optimizing/loop_optimization_test.cc", "optimizing/nodes_test.cc", "optimizing/parallel_move_test.cc", "optimizing/pretty_printer_test.cc", diff --git a/compiler/common_compiler_test.cc b/compiler/common_compiler_test.cc index b726649138..bc8facdb41 100644 --- a/compiler/common_compiler_test.cc +++ b/compiler/common_compiler_test.cc @@ -174,13 +174,12 @@ void CommonCompilerTest::SetUp() { void CommonCompilerTest::CreateCompilerDriver(Compiler::Kind kind, InstructionSet isa, size_t number_of_threads) { + compiler_options_->boot_image_ = true; compiler_driver_.reset(new CompilerDriver(compiler_options_.get(), verification_results_.get(), kind, isa, instruction_set_features_.get(), - /* boot_image */ true, - /* app_image */ false, GetImageClasses(), GetCompiledClasses(), GetCompiledMethods(), diff --git a/compiler/compiled_method.h b/compiler/compiled_method.h index 1a87448e80..99b0ac10d1 100644 --- a/compiler/compiled_method.h +++ b/compiler/compiled_method.h @@ -177,6 +177,7 @@ class LinkerPatch { kTypeRelative, // NOTE: Actual patching is instruction_set-dependent. kString, kStringRelative, // NOTE: Actual patching is instruction_set-dependent. + kStringBssEntry, // NOTE: Actual patching is instruction_set-dependent. kDexCacheArray, // NOTE: Actual patching is instruction_set-dependent. }; @@ -244,10 +245,20 @@ class LinkerPatch { return patch; } + static LinkerPatch StringBssEntryPatch(size_t literal_offset, + const DexFile* target_dex_file, + uint32_t pc_insn_offset, + uint32_t target_string_idx) { + LinkerPatch patch(literal_offset, Type::kStringBssEntry, target_dex_file); + patch.string_idx_ = target_string_idx; + patch.pc_insn_offset_ = pc_insn_offset; + return patch; + } + static LinkerPatch DexCacheArrayPatch(size_t literal_offset, const DexFile* target_dex_file, uint32_t pc_insn_offset, - size_t element_offset) { + uint32_t element_offset) { DCHECK(IsUint<32>(element_offset)); LinkerPatch patch(literal_offset, Type::kDexCacheArray, target_dex_file); patch.pc_insn_offset_ = pc_insn_offset; @@ -271,6 +282,7 @@ class LinkerPatch { case Type::kCallRelative: case Type::kTypeRelative: case Type::kStringRelative: + case Type::kStringBssEntry: case Type::kDexCacheArray: return true; default: @@ -296,12 +308,16 @@ class LinkerPatch { } const DexFile* TargetStringDexFile() const { - DCHECK(patch_type_ == Type::kString || patch_type_ == Type::kStringRelative); + DCHECK(patch_type_ == Type::kString || + patch_type_ == Type::kStringRelative || + patch_type_ == Type::kStringBssEntry); return target_dex_file_; } uint32_t TargetStringIndex() const { - DCHECK(patch_type_ == Type::kString || patch_type_ == Type::kStringRelative); + DCHECK(patch_type_ == Type::kString || + patch_type_ == Type::kStringRelative || + patch_type_ == Type::kStringBssEntry); return string_idx_; } @@ -318,6 +334,7 @@ class LinkerPatch { uint32_t PcInsnOffset() const { DCHECK(patch_type_ == Type::kTypeRelative || patch_type_ == Type::kStringRelative || + patch_type_ == Type::kStringBssEntry || patch_type_ == Type::kDexCacheArray); return pc_insn_offset_; } diff --git a/compiler/driver/compiled_method_storage_test.cc b/compiler/driver/compiled_method_storage_test.cc index 5063d716d5..b72d0acb8e 100644 --- a/compiler/driver/compiled_method_storage_test.cc +++ b/compiler/driver/compiled_method_storage_test.cc @@ -32,8 +32,6 @@ TEST(CompiledMethodStorage, Deduplicate) { Compiler::kOptimizing, /* instruction_set_ */ kNone, /* instruction_set_features */ nullptr, - /* boot_image */ false, - /* app_image */ false, /* image_classes */ nullptr, /* compiled_classes */ nullptr, /* compiled_methods */ nullptr, diff --git a/compiler/driver/compiler_driver-inl.h b/compiler/driver/compiler_driver-inl.h index d807fcad96..3e9e3cbce6 100644 --- a/compiler/driver/compiler_driver-inl.h +++ b/compiler/driver/compiler_driver-inl.h @@ -31,6 +31,10 @@ namespace art { +inline mirror::DexCache* CompilerDriver::GetDexCache(const DexCompilationUnit* mUnit) { + return mUnit->GetClassLinker()->FindDexCache(Thread::Current(), *mUnit->GetDexFile(), false); +} + inline mirror::ClassLoader* CompilerDriver::GetClassLoader(const ScopedObjectAccess& soa, const DexCompilationUnit* mUnit) { return soa.Decode<mirror::ClassLoader>(mUnit->GetClassLoader()).Ptr(); @@ -83,6 +87,10 @@ inline ArtField* CompilerDriver::ResolveFieldWithDexFile( return resolved_field; } +inline mirror::DexCache* CompilerDriver::FindDexCache(const DexFile* dex_file) { + return Runtime::Current()->GetClassLinker()->FindDexCache(Thread::Current(), *dex_file, false); +} + inline ArtField* CompilerDriver::ResolveField( const ScopedObjectAccess& soa, Handle<mirror::DexCache> dex_cache, Handle<mirror::ClassLoader> class_loader, const DexCompilationUnit* mUnit, @@ -92,6 +100,23 @@ inline ArtField* CompilerDriver::ResolveField( is_static); } +inline void CompilerDriver::GetResolvedFieldDexFileLocation( + ArtField* resolved_field, const DexFile** declaring_dex_file, + uint16_t* declaring_class_idx, uint16_t* declaring_field_idx) { + ObjPtr<mirror::Class> declaring_class = resolved_field->GetDeclaringClass(); + *declaring_dex_file = declaring_class->GetDexCache()->GetDexFile(); + *declaring_class_idx = declaring_class->GetDexTypeIndex(); + *declaring_field_idx = resolved_field->GetDexFieldIndex(); +} + +inline bool CompilerDriver::IsFieldVolatile(ArtField* field) { + return field->IsVolatile(); +} + +inline MemberOffset CompilerDriver::GetFieldOffset(ArtField* field) { + return field->GetOffset(); +} + inline std::pair<bool, bool> CompilerDriver::IsFastInstanceField( mirror::DexCache* dex_cache, mirror::Class* referrer_class, ArtField* resolved_field, uint16_t field_idx) { @@ -194,6 +219,43 @@ inline bool CompilerDriver::IsClassOfStaticMethodAvailableToReferrer( return result.first; } +inline bool CompilerDriver::IsStaticFieldInReferrerClass(mirror::Class* referrer_class, + ArtField* resolved_field) { + DCHECK(resolved_field->IsStatic()); + ObjPtr<mirror::Class> fields_class = resolved_field->GetDeclaringClass(); + return referrer_class == fields_class; +} + +inline bool CompilerDriver::CanAssumeClassIsInitialized(mirror::Class* klass) { + // Being loaded is a pre-requisite for being initialized but let's do the cheap check first. + // + // NOTE: When AOT compiling an app, we eagerly initialize app classes (and potentially their + // super classes in the boot image) but only those that have a trivial initialization, i.e. + // without <clinit>() or static values in the dex file for that class or any of its super + // classes. So while we could see the klass as initialized during AOT compilation and have + // it only loaded at runtime, the needed initialization would have to be trivial and + // unobservable from Java, so we may as well treat it as initialized. + if (!klass->IsInitialized()) { + return false; + } + return CanAssumeClassIsLoaded(klass); +} + +inline bool CompilerDriver::CanReferrerAssumeClassIsInitialized(mirror::Class* referrer_class, + mirror::Class* klass) { + return (referrer_class != nullptr + && !referrer_class->IsInterface() + && referrer_class->IsSubClass(klass)) + || CanAssumeClassIsInitialized(klass); +} + +inline bool CompilerDriver::IsStaticFieldsClassInitialized(mirror::Class* referrer_class, + ArtField* resolved_field) { + DCHECK(resolved_field->IsStatic()); + ObjPtr<mirror::Class> fields_class = resolved_field->GetDeclaringClass(); + return CanReferrerAssumeClassIsInitialized(referrer_class, fields_class.Decode()); +} + inline ArtMethod* CompilerDriver::ResolveMethod( ScopedObjectAccess& soa, Handle<mirror::DexCache> dex_cache, Handle<mirror::ClassLoader> class_loader, const DexCompilationUnit* mUnit, @@ -213,6 +275,35 @@ inline ArtMethod* CompilerDriver::ResolveMethod( return resolved_method; } +inline void CompilerDriver::GetResolvedMethodDexFileLocation( + ArtMethod* resolved_method, const DexFile** declaring_dex_file, + uint16_t* declaring_class_idx, uint16_t* declaring_method_idx) { + mirror::Class* declaring_class = resolved_method->GetDeclaringClass(); + *declaring_dex_file = declaring_class->GetDexCache()->GetDexFile(); + *declaring_class_idx = declaring_class->GetDexTypeIndex(); + *declaring_method_idx = resolved_method->GetDexMethodIndex(); +} + +inline uint16_t CompilerDriver::GetResolvedMethodVTableIndex( + ArtMethod* resolved_method, InvokeType type) { + if (type == kVirtual || type == kSuper) { + return resolved_method->GetMethodIndex(); + } else if (type == kInterface) { + return resolved_method->GetDexMethodIndex(); + } else { + return DexFile::kDexNoIndex16; + } +} + +inline bool CompilerDriver::IsMethodsClassInitialized(mirror::Class* referrer_class, + ArtMethod* resolved_method) { + if (!resolved_method->IsStatic()) { + return true; + } + mirror::Class* methods_class = resolved_method->GetDeclaringClass(); + return CanReferrerAssumeClassIsInitialized(referrer_class, methods_class); +} + } // namespace art #endif // ART_COMPILER_DRIVER_COMPILER_DRIVER_INL_H_ diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc index 2ad30eeb95..e2f8d929c3 100644 --- a/compiler/driver/compiler_driver.cc +++ b/compiler/driver/compiler_driver.cc @@ -95,6 +95,8 @@ class CompilerDriver::AOTCompilationStats { public: AOTCompilationStats() : stats_lock_("AOT compilation statistics lock"), + types_in_dex_cache_(0), types_not_in_dex_cache_(0), + strings_in_dex_cache_(0), strings_not_in_dex_cache_(0), resolved_types_(0), unresolved_types_(0), resolved_instance_fields_(0), unresolved_instance_fields_(0), resolved_local_static_fields_(0), resolved_static_fields_(0), unresolved_static_fields_(0), @@ -110,6 +112,8 @@ class CompilerDriver::AOTCompilationStats { } void Dump() { + DumpStat(types_in_dex_cache_, types_not_in_dex_cache_, "types known to be in dex cache"); + DumpStat(strings_in_dex_cache_, strings_not_in_dex_cache_, "strings known to be in dex cache"); DumpStat(resolved_types_, unresolved_types_, "types resolved"); DumpStat(resolved_instance_fields_, unresolved_instance_fields_, "instance fields resolved"); DumpStat(resolved_local_static_fields_ + resolved_static_fields_, unresolved_static_fields_, @@ -160,6 +164,26 @@ class CompilerDriver::AOTCompilationStats { #define STATS_LOCK() #endif + void TypeInDexCache() REQUIRES(!stats_lock_) { + STATS_LOCK(); + types_in_dex_cache_++; + } + + void TypeNotInDexCache() REQUIRES(!stats_lock_) { + STATS_LOCK(); + types_not_in_dex_cache_++; + } + + void StringInDexCache() REQUIRES(!stats_lock_) { + STATS_LOCK(); + strings_in_dex_cache_++; + } + + void StringNotInDexCache() REQUIRES(!stats_lock_) { + STATS_LOCK(); + strings_not_in_dex_cache_++; + } + void TypeDoesntNeedAccessCheck() REQUIRES(!stats_lock_) { STATS_LOCK(); resolved_types_++; @@ -201,6 +225,67 @@ class CompilerDriver::AOTCompilationStats { type_based_devirtualization_++; } + // Indicate that a method of the given type was resolved at compile time. + void ResolvedMethod(InvokeType type) REQUIRES(!stats_lock_) { + DCHECK_LE(type, kMaxInvokeType); + STATS_LOCK(); + resolved_methods_[type]++; + } + + // Indicate that a method of the given type was unresolved at compile time as it was in an + // unknown dex file. + void UnresolvedMethod(InvokeType type) REQUIRES(!stats_lock_) { + DCHECK_LE(type, kMaxInvokeType); + STATS_LOCK(); + unresolved_methods_[type]++; + } + + // Indicate that a type of virtual method dispatch has been converted into a direct method + // dispatch. + void VirtualMadeDirect(InvokeType type) REQUIRES(!stats_lock_) { + DCHECK(type == kVirtual || type == kInterface || type == kSuper); + STATS_LOCK(); + virtual_made_direct_[type]++; + } + + // Indicate that a method of the given type was able to call directly into boot. + void DirectCallsToBoot(InvokeType type) REQUIRES(!stats_lock_) { + DCHECK_LE(type, kMaxInvokeType); + STATS_LOCK(); + direct_calls_to_boot_[type]++; + } + + // Indicate that a method of the given type was able to be resolved directly from boot. + void DirectMethodsToBoot(InvokeType type) REQUIRES(!stats_lock_) { + DCHECK_LE(type, kMaxInvokeType); + STATS_LOCK(); + direct_methods_to_boot_[type]++; + } + + void ProcessedInvoke(InvokeType type, int flags) REQUIRES(!stats_lock_) { + STATS_LOCK(); + if (flags == 0) { + unresolved_methods_[type]++; + } else { + DCHECK_NE((flags & kFlagMethodResolved), 0); + resolved_methods_[type]++; + if ((flags & kFlagVirtualMadeDirect) != 0) { + virtual_made_direct_[type]++; + if ((flags & kFlagPreciseTypeDevirtualization) != 0) { + type_based_devirtualization_++; + } + } else { + DCHECK_EQ((flags & kFlagPreciseTypeDevirtualization), 0); + } + if ((flags & kFlagDirectCallToBoot) != 0) { + direct_calls_to_boot_[type]++; + } + if ((flags & kFlagDirectMethodToBoot) != 0) { + direct_methods_to_boot_[type]++; + } + } + } + // A check-cast could be eliminated due to verifier type analysis. void SafeCast() REQUIRES(!stats_lock_) { STATS_LOCK(); @@ -216,6 +301,12 @@ class CompilerDriver::AOTCompilationStats { private: Mutex stats_lock_; + size_t types_in_dex_cache_; + size_t types_not_in_dex_cache_; + + size_t strings_in_dex_cache_; + size_t strings_not_in_dex_cache_; + size_t resolved_types_; size_t unresolved_types_; @@ -264,8 +355,6 @@ CompilerDriver::CompilerDriver( Compiler::Kind compiler_kind, InstructionSet instruction_set, const InstructionSetFeatures* instruction_set_features, - bool boot_image, - bool app_image, std::unordered_set<std::string>* image_classes, std::unordered_set<std::string>* compiled_classes, std::unordered_set<std::string>* compiled_methods, @@ -286,8 +375,6 @@ CompilerDriver::CompilerDriver( compiled_methods_lock_("compiled method lock"), compiled_methods_(MethodTable::key_compare()), non_relative_linker_patch_count_(0u), - boot_image_(boot_image), - app_image_(app_image), image_classes_(image_classes), classes_to_compile_(compiled_classes), methods_to_compile_(compiled_methods), @@ -313,7 +400,7 @@ CompilerDriver::CompilerDriver( if (compiler_options->VerifyOnlyProfile()) { CHECK(profile_compilation_info_ != nullptr) << "Requires profile"; } - if (boot_image_) { + if (GetCompilerOptions().IsBootImage()) { CHECK(image_classes_.get() != nullptr) << "Expected image classes for boot image"; } } @@ -405,7 +492,7 @@ void CompilerDriver::CompileAll(jobject class_loader, // 3) Attempt to verify all classes // 4) Attempt to initialize image classes, and trivially initialized classes PreCompile(class_loader, dex_files, timings); - if (IsBootImage()) { + if (GetCompilerOptions().IsBootImage()) { // We don't need to setup the intrinsics for non boot image compilation, as // those compilations will pick up a boot image that have the ArtMethod already // set with the intrinsics flag. @@ -758,10 +845,9 @@ void CompilerDriver::Resolve(jobject class_loader, // TODO: Collect the relevant string indices in parallel, then allocate them sequentially in a // stable order. -static void ResolveConstStrings(Handle<mirror::DexCache> dex_cache, +static void ResolveConstStrings(CompilerDriver* driver, const DexFile& dex_file, - const DexFile::CodeItem* code_item) - REQUIRES_SHARED(Locks::mutator_lock_) { + const DexFile::CodeItem* code_item) { if (code_item == nullptr) { // Abstract or native method. return; @@ -769,19 +855,18 @@ static void ResolveConstStrings(Handle<mirror::DexCache> dex_cache, const uint16_t* code_ptr = code_item->insns_; const uint16_t* code_end = code_item->insns_ + code_item->insns_size_in_code_units_; - ClassLinker* const class_linker = Runtime::Current()->GetClassLinker(); while (code_ptr < code_end) { const Instruction* inst = Instruction::At(code_ptr); switch (inst->Opcode()) { case Instruction::CONST_STRING: { uint32_t string_index = inst->VRegB_21c(); - class_linker->ResolveString(dex_file, string_index, dex_cache); + driver->CanAssumeStringIsPresentInDexCache(dex_file, string_index); break; } case Instruction::CONST_STRING_JUMBO: { uint32_t string_index = inst->VRegB_31c(); - class_linker->ResolveString(dex_file, string_index, dex_cache); + driver->CanAssumeStringIsPresentInDexCache(dex_file, string_index); break; } @@ -796,13 +881,7 @@ static void ResolveConstStrings(Handle<mirror::DexCache> dex_cache, static void ResolveConstStrings(CompilerDriver* driver, const std::vector<const DexFile*>& dex_files, TimingLogger* timings) { - ScopedObjectAccess soa(Thread::Current()); - StackHandleScope<1> hs(soa.Self()); - ClassLinker* const class_linker = Runtime::Current()->GetClassLinker(); - MutableHandle<mirror::DexCache> dex_cache(hs.NewHandle<mirror::DexCache>(nullptr)); - for (const DexFile* dex_file : dex_files) { - dex_cache.Assign(class_linker->FindDexCache(soa.Self(), *dex_file, false)); TimingLogger::ScopedTiming t("Resolve const-string Strings", timings); size_t class_def_count = dex_file->NumClassDefs(); @@ -843,7 +922,7 @@ static void ResolveConstStrings(CompilerDriver* driver, continue; } previous_direct_method_idx = method_idx; - ResolveConstStrings(dex_cache, *dex_file, it.GetMethodCodeItem()); + ResolveConstStrings(driver, *dex_file, it.GetMethodCodeItem()); it.Next(); } // Virtual methods. @@ -857,7 +936,7 @@ static void ResolveConstStrings(CompilerDriver* driver, continue; } previous_virtual_method_idx = method_idx; - ResolveConstStrings(dex_cache, *dex_file, it.GetMethodCodeItem()); + ResolveConstStrings(driver, *dex_file, it.GetMethodCodeItem()); it.Next(); } DCHECK(!it.HasNext()); @@ -899,7 +978,7 @@ void CompilerDriver::PreCompile(jobject class_loader, return; } - if (GetCompilerOptions().IsForceDeterminism() && IsBootImage()) { + if (GetCompilerOptions().IsForceDeterminism() && GetCompilerOptions().IsBootImage()) { // Resolve strings from const-string. Do this now to have a deterministic image. ResolveConstStrings(this, dex_files, timings); VLOG(compiler) << "Resolve const-strings: " << GetMemoryUsageString(false); @@ -927,7 +1006,7 @@ bool CompilerDriver::IsImageClass(const char* descriptor) const { } // No set of image classes, assume we include all the classes. // NOTE: Currently only reachable from InitImageMethodVisitor for the app image case. - return !IsBootImage(); + return !GetCompilerOptions().IsBootImage(); } bool CompilerDriver::IsClassToCompile(const char* descriptor) const { @@ -1051,7 +1130,7 @@ class RecordImageClassesVisitor : public ClassVisitor { // Make a list of descriptors for classes to include in the image void CompilerDriver::LoadImageClasses(TimingLogger* timings) { CHECK(timings != nullptr); - if (!IsBootImage()) { + if (!GetCompilerOptions().IsBootImage()) { return; } @@ -1279,7 +1358,7 @@ class ClinitImageUpdate { }; void CompilerDriver::UpdateImageClasses(TimingLogger* timings) { - if (IsBootImage()) { + if (GetCompilerOptions().IsBootImage()) { TimingLogger::ScopedTiming t("UpdateImageClasses", timings); Runtime* runtime = Runtime::Current(); @@ -1306,7 +1385,7 @@ bool CompilerDriver::CanAssumeClassIsLoaded(mirror::Class* klass) { // Having the klass reference here implies that the klass is already loaded. return true; } - if (!IsBootImage()) { + if (!GetCompilerOptions().IsBootImage()) { // Assume loaded only if klass is in the boot image. App classes cannot be assumed // loaded because we don't even know what class loader will be used to load them. bool class_in_image = runtime->GetHeap()->FindSpaceFromObject(klass, false)->IsImageSpace(); @@ -1328,6 +1407,54 @@ void CompilerDriver::MarkForDexToDexCompilation(Thread* self, const MethodRefere dex_to_dex_references_.back().GetMethodIndexes().SetBit(method_ref.dex_method_index); } +bool CompilerDriver::CanAssumeTypeIsPresentInDexCache(Handle<mirror::DexCache> dex_cache, + uint32_t type_idx) { + bool result = false; + if ((GetCompilerOptions().IsBootImage() && + IsImageClass(dex_cache->GetDexFile()->StringDataByIdx( + dex_cache->GetDexFile()->GetTypeId(type_idx).descriptor_idx_))) || + Runtime::Current()->UseJitCompilation()) { + mirror::Class* resolved_class = dex_cache->GetResolvedType(type_idx); + result = (resolved_class != nullptr); + } + + if (result) { + stats_->TypeInDexCache(); + } else { + stats_->TypeNotInDexCache(); + } + return result; +} + +bool CompilerDriver::CanAssumeStringIsPresentInDexCache(const DexFile& dex_file, + uint32_t string_idx) { + // See also Compiler::ResolveDexFile + + bool result = false; + if (GetCompilerOptions().IsBootImage() || Runtime::Current()->UseJitCompilation()) { + ScopedObjectAccess soa(Thread::Current()); + StackHandleScope<1> hs(soa.Self()); + ClassLinker* const class_linker = Runtime::Current()->GetClassLinker(); + Handle<mirror::DexCache> dex_cache(hs.NewHandle(class_linker->FindDexCache( + soa.Self(), dex_file, false))); + if (GetCompilerOptions().IsBootImage()) { + // We resolve all const-string strings when building for the image. + class_linker->ResolveString(dex_file, string_idx, dex_cache); + result = true; + } else { + // Just check whether the dex cache already has the string. + DCHECK(Runtime::Current()->UseJitCompilation()); + result = (dex_cache->GetResolvedString(string_idx) != nullptr); + } + } + if (result) { + stats_->StringInDexCache(); + } else { + stats_->StringNotInDexCache(); + } + return result; +} + bool CompilerDriver::CanAccessTypeWithoutChecks(uint32_t referrer_idx, Handle<mirror::DexCache> dex_cache, uint32_t type_idx) { @@ -1391,6 +1518,108 @@ bool CompilerDriver::CanAccessInstantiableTypeWithoutChecks(uint32_t referrer_id return result; } +bool CompilerDriver::CanEmbedTypeInCode(const DexFile& dex_file, uint32_t type_idx, + bool* is_type_initialized, bool* use_direct_type_ptr, + uintptr_t* direct_type_ptr, bool* out_is_finalizable) { + ScopedObjectAccess soa(Thread::Current()); + Runtime* runtime = Runtime::Current(); + mirror::DexCache* dex_cache = runtime->GetClassLinker()->FindDexCache( + soa.Self(), dex_file, false); + mirror::Class* resolved_class = dex_cache->GetResolvedType(type_idx); + if (resolved_class == nullptr) { + return false; + } + if (GetCompilerOptions().GetCompilePic()) { + // Do not allow a direct class pointer to be used when compiling for position-independent + return false; + } + *out_is_finalizable = resolved_class->IsFinalizable(); + gc::Heap* heap = runtime->GetHeap(); + const bool compiling_boot = heap->IsCompilingBoot(); + const bool support_boot_image_fixup = GetSupportBootImageFixup(); + if (compiling_boot) { + // boot -> boot class pointers. + // True if the class is in the image at boot compiling time. + const bool is_image_class = GetCompilerOptions().IsBootImage() && IsImageClass( + dex_file.StringDataByIdx(dex_file.GetTypeId(type_idx).descriptor_idx_)); + // True if pc relative load works. + if (is_image_class && support_boot_image_fixup) { + *is_type_initialized = resolved_class->IsInitialized(); + *use_direct_type_ptr = false; + *direct_type_ptr = 0; + return true; + } else { + return false; + } + } else if (runtime->UseJitCompilation() && !heap->IsMovableObject(resolved_class)) { + *is_type_initialized = resolved_class->IsInitialized(); + // If the class may move around, then don't embed it as a direct pointer. + *use_direct_type_ptr = true; + *direct_type_ptr = reinterpret_cast<uintptr_t>(resolved_class); + return true; + } else { + // True if the class is in the image at app compiling time. + const bool class_in_image = heap->FindSpaceFromObject(resolved_class, false)->IsImageSpace(); + if (class_in_image && support_boot_image_fixup) { + // boot -> app class pointers. + *is_type_initialized = resolved_class->IsInitialized(); + // TODO This is somewhat hacky. We should refactor all of this invoke codepath. + *use_direct_type_ptr = !GetCompilerOptions().GetIncludePatchInformation(); + *direct_type_ptr = reinterpret_cast<uintptr_t>(resolved_class); + return true; + } else { + // app -> app class pointers. + // Give up because app does not have an image and class + // isn't created at compile time. TODO: implement this + // if/when each app gets an image. + return false; + } + } +} + +bool CompilerDriver::CanEmbedReferenceTypeInCode(ClassReference* ref, + bool* use_direct_ptr, + uintptr_t* direct_type_ptr) { + CHECK(ref != nullptr); + CHECK(use_direct_ptr != nullptr); + CHECK(direct_type_ptr != nullptr); + + ScopedObjectAccess soa(Thread::Current()); + mirror::Class* reference_class = mirror::Reference::GetJavaLangRefReference(); + bool is_initialized = false; + bool unused_finalizable; + // Make sure we have a finished Reference class object before attempting to use it. + if (!CanEmbedTypeInCode(*reference_class->GetDexCache()->GetDexFile(), + reference_class->GetDexTypeIndex(), &is_initialized, + use_direct_ptr, direct_type_ptr, &unused_finalizable) || + !is_initialized) { + return false; + } + ref->first = &reference_class->GetDexFile(); + ref->second = reference_class->GetDexClassDefIndex(); + return true; +} + +uint32_t CompilerDriver::GetReferenceSlowFlagOffset() const { + ScopedObjectAccess soa(Thread::Current()); + mirror::Class* klass = mirror::Reference::GetJavaLangRefReference(); + DCHECK(klass->IsInitialized()); + return klass->GetSlowPathFlagOffset().Uint32Value(); +} + +uint32_t CompilerDriver::GetReferenceDisableFlagOffset() const { + ScopedObjectAccess soa(Thread::Current()); + mirror::Class* klass = mirror::Reference::GetJavaLangRefReference(); + DCHECK(klass->IsInitialized()); + return klass->GetDisableIntrinsicFlagOffset().Uint32Value(); +} + +DexCacheArraysLayout CompilerDriver::GetDexCacheArraysLayout(const DexFile* dex_file) { + return ContainsElement(GetDexFilesForOatFile(), dex_file) + ? DexCacheArraysLayout(GetInstructionSetPointerSize(instruction_set_), dex_file) + : DexCacheArraysLayout(); +} + void CompilerDriver::ProcessedInstanceField(bool resolved) { if (!resolved) { stats_->UnresolvedInstanceField(); @@ -1409,6 +1638,10 @@ void CompilerDriver::ProcessedStaticField(bool resolved, bool local) { } } +void CompilerDriver::ProcessedInvoke(InvokeType invoke_type, int flags) { + stats_->ProcessedInvoke(invoke_type, flags); +} + ArtField* CompilerDriver::ComputeInstanceFieldInfo(uint32_t field_idx, const DexCompilationUnit* mUnit, bool is_put, const ScopedObjectAccess& soa) { @@ -1496,7 +1729,7 @@ void CompilerDriver::GetCodeAndMethodForDirectCall(const mirror::Class* referrer if (!use_dex_cache && force_relocations) { bool is_in_image; - if (IsBootImage()) { + if (GetCompilerOptions().IsBootImage()) { is_in_image = IsImageClass(method->GetDeclaringClassDescriptor()); } else { is_in_image = instruction_set_ != kX86 && instruction_set_ != kX86_64 && @@ -1895,7 +2128,7 @@ void CompilerDriver::ResolveDexFile(jobject class_loader, ParallelCompilationManager context(class_linker, class_loader, this, &dex_file, dex_files, thread_pool); - if (IsBootImage()) { + if (GetCompilerOptions().IsBootImage()) { // For images we resolve all types, such as array, whereas for applications just those with // classdefs are resolved by ResolveClassFieldsAndMethods. TimingLogger::ScopedTiming t("Resolve Types", timings); @@ -2005,7 +2238,7 @@ class VerifyClassVisitor : public CompilationVisitor { // It is *very* problematic if there are verification errors in the boot classpath. For example, // we rely on things working OK without verification when the decryption dialog is brought up. // So abort in a debug build if we find this violated. - DCHECK(!manager_->GetCompiler()->IsBootImage() || klass->IsVerified()) + DCHECK(!manager_->GetCompiler()->GetCompilerOptions().IsBootImage() || klass->IsVerified()) << "Boot classpath class " << PrettyClass(klass.Get()) << " failed to fully verify."; } soa.Self()->AssertNoPendingException(); @@ -2136,7 +2369,8 @@ class InitializeClassVisitor : public CompilationVisitor { if (!klass->IsInitialized()) { // We need to initialize static fields, we only do this for image classes that aren't // marked with the $NoPreloadHolder (which implies this should not be initialized early). - bool can_init_static_fields = manager_->GetCompiler()->IsBootImage() && + bool can_init_static_fields = + manager_->GetCompiler()->GetCompilerOptions().IsBootImage() && manager_->GetCompiler()->IsImageClass(descriptor) && !StringPiece(descriptor).ends_with("$NoPreloadHolder;"); if (can_init_static_fields) { @@ -2208,7 +2442,7 @@ void CompilerDriver::InitializeClasses(jobject jni_class_loader, ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); ParallelCompilationManager context(class_linker, jni_class_loader, this, &dex_file, dex_files, init_thread_pool); - if (IsBootImage()) { + if (GetCompilerOptions().IsBootImage()) { // TODO: remove this when transactional mode supports multithreading. init_thread_count = 1U; } @@ -2262,7 +2496,7 @@ void CompilerDriver::InitializeClasses(jobject class_loader, CHECK(dex_file != nullptr); InitializeClasses(class_loader, *dex_file, dex_files, timings); } - if (boot_image_ || app_image_) { + if (GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsAppImage()) { // Make sure that we call EnsureIntiailized on all the array classes to call // SetVerificationAttempted so that the access flags are set. If we do not do this they get // changed at runtime resulting in more dirty image pages. @@ -2272,7 +2506,7 @@ void CompilerDriver::InitializeClasses(jobject class_loader, InitializeArrayClassesAndCreateConflictTablesVisitor visitor; Runtime::Current()->GetClassLinker()->VisitClassesWithoutClassesLock(&visitor); } - if (IsBootImage()) { + if (GetCompilerOptions().IsBootImage()) { // Prune garbage objects created during aborted transactions. Runtime::Current()->GetHeap()->CollectGarbage(true); } diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h index fc63df1925..eb1222c315 100644 --- a/compiler/driver/compiler_driver.h +++ b/compiler/driver/compiler_driver.h @@ -90,8 +90,6 @@ class CompilerDriver { Compiler::Kind compiler_kind, InstructionSet instruction_set, const InstructionSetFeatures* instruction_set_features, - bool boot_image, - bool app_image, std::unordered_set<std::string>* image_classes, std::unordered_set<std::string>* compiled_classes, std::unordered_set<std::string>* compiled_methods, @@ -147,11 +145,6 @@ class CompilerDriver { return compiler_.get(); } - // Are we compiling and creating an image file? - bool IsBootImage() const { - return boot_image_; - } - const std::unordered_set<std::string>* GetImageClasses() const { return image_classes_.get(); } @@ -189,6 +182,15 @@ class CompilerDriver { uint16_t class_def_index) REQUIRES(!requires_constructor_barrier_lock_); + // Callbacks from compiler to see what runtime checks must be generated. + + bool CanAssumeTypeIsPresentInDexCache(Handle<mirror::DexCache> dex_cache, + uint32_t type_idx) + REQUIRES_SHARED(Locks::mutator_lock_); + + bool CanAssumeStringIsPresentInDexCache(const DexFile& dex_file, uint32_t string_idx) + REQUIRES(!Locks::mutator_lock_); + // Are runtime access checks necessary in the compiled code? bool CanAccessTypeWithoutChecks(uint32_t referrer_idx, Handle<mirror::DexCache> dex_cache, @@ -203,6 +205,24 @@ class CompilerDriver { bool* out_is_finalizable) REQUIRES_SHARED(Locks::mutator_lock_); + bool CanEmbedTypeInCode(const DexFile& dex_file, uint32_t type_idx, + bool* is_type_initialized, bool* use_direct_type_ptr, + uintptr_t* direct_type_ptr, bool* out_is_finalizable); + + // Query methods for the java.lang.ref.Reference class. + bool CanEmbedReferenceTypeInCode(ClassReference* ref, + bool* use_direct_type_ptr, uintptr_t* direct_type_ptr); + uint32_t GetReferenceSlowFlagOffset() const; + uint32_t GetReferenceDisableFlagOffset() const; + + // Get the DexCache for the + mirror::DexCache* GetDexCache(const DexCompilationUnit* mUnit) + REQUIRES_SHARED(Locks::mutator_lock_); + + mirror::ClassLoader* GetClassLoader(const ScopedObjectAccess& soa, + const DexCompilationUnit* mUnit) + REQUIRES_SHARED(Locks::mutator_lock_); + // Resolve compiling method's class. Returns null on failure. mirror::Class* ResolveCompilingMethodsClass( const ScopedObjectAccess& soa, Handle<mirror::DexCache> dex_cache, @@ -230,6 +250,19 @@ class CompilerDriver { uint32_t field_idx, bool is_static) REQUIRES_SHARED(Locks::mutator_lock_); + // Get declaration location of a resolved field. + void GetResolvedFieldDexFileLocation( + ArtField* resolved_field, const DexFile** declaring_dex_file, + uint16_t* declaring_class_idx, uint16_t* declaring_field_idx) + REQUIRES_SHARED(Locks::mutator_lock_); + + bool IsFieldVolatile(ArtField* field) REQUIRES_SHARED(Locks::mutator_lock_); + MemberOffset GetFieldOffset(ArtField* field) REQUIRES_SHARED(Locks::mutator_lock_); + + // Find a dex cache for a dex file. + inline mirror::DexCache* FindDexCache(const DexFile* dex_file) + REQUIRES_SHARED(Locks::mutator_lock_); + // Can we fast-path an IGET/IPUT access to an instance field? If yes, compute the field offset. std::pair<bool, bool> IsFastInstanceField( mirror::DexCache* dex_cache, mirror::Class* referrer_class, @@ -255,6 +288,15 @@ class CompilerDriver { uint32_t* storage_index) REQUIRES_SHARED(Locks::mutator_lock_); + // Is static field's in referrer's class? + bool IsStaticFieldInReferrerClass(mirror::Class* referrer_class, ArtField* resolved_field) + REQUIRES_SHARED(Locks::mutator_lock_); + + // Is static field's class initialized? + bool IsStaticFieldsClassInitialized(mirror::Class* referrer_class, + ArtField* resolved_field) + REQUIRES_SHARED(Locks::mutator_lock_); + // Resolve a method. Returns null on failure, including incompatible class change. ArtMethod* ResolveMethod( ScopedObjectAccess& soa, Handle<mirror::DexCache> dex_cache, @@ -262,8 +304,37 @@ class CompilerDriver { uint32_t method_idx, InvokeType invoke_type, bool check_incompatible_class_change = true) REQUIRES_SHARED(Locks::mutator_lock_); + // Get declaration location of a resolved field. + void GetResolvedMethodDexFileLocation( + ArtMethod* resolved_method, const DexFile** declaring_dex_file, + uint16_t* declaring_class_idx, uint16_t* declaring_method_idx) + REQUIRES_SHARED(Locks::mutator_lock_); + + // Get the index in the vtable of the method. + uint16_t GetResolvedMethodVTableIndex( + ArtMethod* resolved_method, InvokeType type) + REQUIRES_SHARED(Locks::mutator_lock_); + + // Is method's class initialized for an invoke? + // For static invokes to determine whether we need to consider potential call to <clinit>(). + // For non-static invokes, assuming a non-null reference, the class is always initialized. + bool IsMethodsClassInitialized(mirror::Class* referrer_class, ArtMethod* resolved_method) + REQUIRES_SHARED(Locks::mutator_lock_); + + // Get the layout of dex cache arrays for a dex file. Returns invalid layout if the + // dex cache arrays don't have a fixed layout. + DexCacheArraysLayout GetDexCacheArraysLayout(const DexFile* dex_file); + void ProcessedInstanceField(bool resolved); void ProcessedStaticField(bool resolved, bool local); + void ProcessedInvoke(InvokeType invoke_type, int flags); + + void ComputeFieldInfo(uint32_t field_idx, const DexCompilationUnit* mUnit, + const ScopedObjectAccess& soa, bool is_static, + ArtField** resolved_field, + mirror::Class** referrer_class, + mirror::DexCache** dex_cache) + REQUIRES_SHARED(Locks::mutator_lock_); // Can we fast path instance field access? Computes field's offset and volatility. bool ComputeInstanceFieldInfo(uint32_t field_idx, const DexCompilationUnit* mUnit, bool is_put, @@ -315,7 +386,6 @@ class CompilerDriver { void SetDedupeEnabled(bool dedupe_enabled) { compiled_method_storage_.SetDedupeEnabled(dedupe_enabled); } - bool DedupeEnabled() const { return compiled_method_storage_.DedupeEnabled(); } @@ -379,13 +449,6 @@ class CompilerDriver { return current_dex_to_dex_methods_; } - // Compute constant code and method pointers when possible. - void GetCodeAndMethodForDirectCall(const mirror::Class* referrer_class, - ArtMethod* method, - /* out */ uintptr_t* direct_code, - /* out */ uintptr_t* direct_method) - REQUIRES_SHARED(Locks::mutator_lock_); - private: // Return whether the declaring class of `resolved_member` is // available to `referrer_class` for read or write access using two @@ -414,9 +477,38 @@ class CompilerDriver { uint32_t field_idx) REQUIRES_SHARED(Locks::mutator_lock_); - mirror::ClassLoader* GetClassLoader(const ScopedObjectAccess& soa, - const DexCompilationUnit* mUnit) - REQUIRES_SHARED(Locks::mutator_lock_); + // Can we assume that the klass is initialized? + bool CanAssumeClassIsInitialized(mirror::Class* klass) + REQUIRES_SHARED(Locks::mutator_lock_); + bool CanReferrerAssumeClassIsInitialized(mirror::Class* referrer_class, mirror::Class* klass) + REQUIRES_SHARED(Locks::mutator_lock_); + + // These flags are internal to CompilerDriver for collecting INVOKE resolution statistics. + // The only external contract is that unresolved method has flags 0 and resolved non-0. + enum { + kBitMethodResolved = 0, + kBitVirtualMadeDirect, + kBitPreciseTypeDevirtualization, + kBitDirectCallToBoot, + kBitDirectMethodToBoot + }; + static constexpr int kFlagMethodResolved = 1 << kBitMethodResolved; + static constexpr int kFlagVirtualMadeDirect = 1 << kBitVirtualMadeDirect; + static constexpr int kFlagPreciseTypeDevirtualization = 1 << kBitPreciseTypeDevirtualization; + static constexpr int kFlagDirectCallToBoot = 1 << kBitDirectCallToBoot; + static constexpr int kFlagDirectMethodToBoot = 1 << kBitDirectMethodToBoot; + static constexpr int kFlagsMethodResolvedVirtualMadeDirect = + kFlagMethodResolved | kFlagVirtualMadeDirect; + static constexpr int kFlagsMethodResolvedPreciseTypeDevirtualization = + kFlagsMethodResolvedVirtualMadeDirect | kFlagPreciseTypeDevirtualization; + + public: // TODO make private or eliminate. + // Compute constant code and method pointers when possible. + void GetCodeAndMethodForDirectCall(const mirror::Class* referrer_class, + ArtMethod* method, + /* out */ uintptr_t* direct_code, + /* out */ uintptr_t* direct_method) + REQUIRES_SHARED(Locks::mutator_lock_); private: void PreCompile(jobject class_loader, @@ -474,6 +566,8 @@ class CompilerDriver { REQUIRES(!Locks::mutator_lock_, !compiled_classes_lock_); void UpdateImageClasses(TimingLogger* timings) REQUIRES(!Locks::mutator_lock_); + static void FindClinitImageClassesCallback(mirror::Object* object, void* arg) + REQUIRES_SHARED(Locks::mutator_lock_); void Compile(jobject class_loader, const std::vector<const DexFile*>& dex_files, @@ -527,9 +621,6 @@ class CompilerDriver { // in the .oat_patches ELF section if requested in the compiler options. size_t non_relative_linker_patch_count_ GUARDED_BY(compiled_methods_lock_); - const bool boot_image_; - const bool app_image_; - // If image_ is true, specifies the classes that will be included in the image. // Note if image_classes_ is null, all classes are included in the image. std::unique_ptr<std::unordered_set<std::string>> image_classes_; diff --git a/compiler/driver/compiler_options.cc b/compiler/driver/compiler_options.cc index 30ba8c9e74..cbcc169f41 100644 --- a/compiler/driver/compiler_options.cc +++ b/compiler/driver/compiler_options.cc @@ -30,6 +30,8 @@ CompilerOptions::CompilerOptions() inline_depth_limit_(kUnsetInlineDepthLimit), inline_max_code_units_(kUnsetInlineMaxCodeUnits), no_inline_from_(nullptr), + boot_image_(false), + app_image_(false), include_patch_information_(kDefaultIncludePatchInformation), top_k_profile_threshold_(kDefaultTopKProfileThreshold), debuggable_(false), @@ -78,34 +80,35 @@ CompilerOptions::CompilerOptions(CompilerFilter::Filter compiler_filter, bool dump_cfg_append, bool force_determinism, RegisterAllocator::Strategy regalloc_strategy, - const std::vector<std::string>* passes_to_run - ) : // NOLINT(whitespace/parens) - compiler_filter_(compiler_filter), - huge_method_threshold_(huge_method_threshold), - large_method_threshold_(large_method_threshold), - small_method_threshold_(small_method_threshold), - tiny_method_threshold_(tiny_method_threshold), - num_dex_methods_threshold_(num_dex_methods_threshold), - inline_depth_limit_(inline_depth_limit), - inline_max_code_units_(inline_max_code_units), - no_inline_from_(no_inline_from), - include_patch_information_(include_patch_information), - top_k_profile_threshold_(top_k_profile_threshold), - debuggable_(debuggable), - generate_debug_info_(generate_debug_info), - generate_mini_debug_info_(kDefaultGenerateMiniDebugInfo), - implicit_null_checks_(implicit_null_checks), - implicit_so_checks_(implicit_so_checks), - implicit_suspend_checks_(implicit_suspend_checks), - compile_pic_(compile_pic), - verbose_methods_(verbose_methods), - abort_on_hard_verifier_failure_(abort_on_hard_verifier_failure), - init_failure_output_(init_failure_output), - dump_cfg_file_name_(dump_cfg_file_name), - dump_cfg_append_(dump_cfg_append), - force_determinism_(force_determinism), - register_allocation_strategy_(regalloc_strategy), - passes_to_run_(passes_to_run) { + const std::vector<std::string>* passes_to_run) + : compiler_filter_(compiler_filter), + huge_method_threshold_(huge_method_threshold), + large_method_threshold_(large_method_threshold), + small_method_threshold_(small_method_threshold), + tiny_method_threshold_(tiny_method_threshold), + num_dex_methods_threshold_(num_dex_methods_threshold), + inline_depth_limit_(inline_depth_limit), + inline_max_code_units_(inline_max_code_units), + no_inline_from_(no_inline_from), + boot_image_(false), + app_image_(false), + include_patch_information_(include_patch_information), + top_k_profile_threshold_(top_k_profile_threshold), + debuggable_(debuggable), + generate_debug_info_(generate_debug_info), + generate_mini_debug_info_(kDefaultGenerateMiniDebugInfo), + implicit_null_checks_(implicit_null_checks), + implicit_so_checks_(implicit_so_checks), + implicit_suspend_checks_(implicit_suspend_checks), + compile_pic_(compile_pic), + verbose_methods_(verbose_methods), + abort_on_hard_verifier_failure_(abort_on_hard_verifier_failure), + init_failure_output_(init_failure_output), + dump_cfg_file_name_(dump_cfg_file_name), + dump_cfg_append_(dump_cfg_append), + force_determinism_(force_determinism), + register_allocation_strategy_(regalloc_strategy), + passes_to_run_(passes_to_run) { } void CompilerOptions::ParseHugeMethodMax(const StringPiece& option, UsageFn Usage) { diff --git a/compiler/driver/compiler_options.h b/compiler/driver/compiler_options.h index abc58d7dda..8e4a775558 100644 --- a/compiler/driver/compiler_options.h +++ b/compiler/driver/compiler_options.h @@ -203,6 +203,14 @@ class CompilerOptions FINAL { return include_patch_information_; } + bool IsBootImage() const { + return boot_image_; + } + + bool IsAppImage() const { + return app_image_; + } + // Should the code be compiled as position independent? bool GetCompilePic() const { return compile_pic_; @@ -281,6 +289,8 @@ class CompilerOptions FINAL { // prefer vector<> over a lookup-oriented container, such as set<>. const std::vector<const DexFile*>* no_inline_from_; + bool boot_image_; + bool app_image_; bool include_patch_information_; // When using a profile file only the top K% of the profiled samples will be compiled. double top_k_profile_threshold_; @@ -305,7 +315,7 @@ class CompilerOptions FINAL { std::string dump_cfg_file_name_; bool dump_cfg_append_; - // Whether the compiler should trade performance for determinism to guarantee exactly reproducable + // Whether the compiler should trade performance for determinism to guarantee exactly reproducible // outcomes. bool force_determinism_; @@ -320,6 +330,7 @@ class CompilerOptions FINAL { const std::vector<std::string>* passes_to_run_; friend class Dex2Oat; + friend class CommonCompilerTest; DISALLOW_COPY_AND_ASSIGN(CompilerOptions); }; diff --git a/compiler/elf_builder.h b/compiler/elf_builder.h index 02831c9dc7..73240bed03 100644 --- a/compiler/elf_builder.h +++ b/compiler/elf_builder.h @@ -619,7 +619,8 @@ class ElfBuilder FINAL { void PrepareDynamicSection(const std::string& elf_file_path, Elf_Word rodata_size, Elf_Word text_size, - Elf_Word bss_size) { + Elf_Word bss_size, + Elf_Word bss_roots_offset) { std::string soname(elf_file_path); size_t directory_separator_pos = soname.rfind('/'); if (directory_separator_pos != std::string::npos) { @@ -659,10 +660,20 @@ class ElfBuilder FINAL { Elf_Word oatlastword_address = rodata_address + rodata_size - 4; dynsym_.Add(oatlastword, rodata_index, oatlastword_address, 4, STB_GLOBAL, STT_OBJECT); } + DCHECK_LE(bss_roots_offset, bss_size); if (bss_size != 0u) { Elf_Word bss_index = rodata_index + 1u + (text_size != 0 ? 1u : 0u); Elf_Word oatbss = dynstr_.Add("oatbss"); - dynsym_.Add(oatbss, bss_index, bss_address, bss_size, STB_GLOBAL, STT_OBJECT); + dynsym_.Add(oatbss, bss_index, bss_address, bss_roots_offset, STB_GLOBAL, STT_OBJECT); + // Add a symbol marking the start of the GC roots part of the .bss, if not empty. + if (bss_roots_offset != bss_size) { + DCHECK_LT(bss_roots_offset, bss_size); + Elf_Word bss_roots_address = bss_address + bss_roots_offset; + Elf_Word bss_roots_size = bss_size - bss_roots_offset; + Elf_Word oatbssroots = dynstr_.Add("oatbssroots"); + dynsym_.Add( + oatbssroots, bss_index, bss_roots_address, bss_roots_size, STB_GLOBAL, STT_OBJECT); + } Elf_Word oatbsslastword = dynstr_.Add("oatbsslastword"); Elf_Word bsslastword_address = bss_address + bss_size - 4; dynsym_.Add(oatbsslastword, bss_index, bsslastword_address, 4, STB_GLOBAL, STT_OBJECT); diff --git a/compiler/elf_writer.h b/compiler/elf_writer.h index f8f91029d4..d55f7458b2 100644 --- a/compiler/elf_writer.h +++ b/compiler/elf_writer.h @@ -52,7 +52,10 @@ class ElfWriter { virtual ~ElfWriter() {} virtual void Start() = 0; - virtual void SetLoadedSectionSizes(size_t rodata_size, size_t text_size, size_t bss_size) = 0; + virtual void PrepareDynamicSection(size_t rodata_size, + size_t text_size, + size_t bss_size, + size_t bss_roots_offset) = 0; virtual void PrepareDebugInfo(const ArrayRef<const debug::MethodDebugInfo>& method_infos) = 0; virtual OutputStream* StartRoData() = 0; virtual void EndRoData(OutputStream* rodata) = 0; diff --git a/compiler/elf_writer_quick.cc b/compiler/elf_writer_quick.cc index bed864b534..36cd2327c4 100644 --- a/compiler/elf_writer_quick.cc +++ b/compiler/elf_writer_quick.cc @@ -93,7 +93,10 @@ class ElfWriterQuick FINAL : public ElfWriter { ~ElfWriterQuick(); void Start() OVERRIDE; - void SetLoadedSectionSizes(size_t rodata_size, size_t text_size, size_t bss_size) OVERRIDE; + void PrepareDynamicSection(size_t rodata_size, + size_t text_size, + size_t bss_size, + size_t bss_roots_offset) OVERRIDE; void PrepareDebugInfo(const ArrayRef<const debug::MethodDebugInfo>& method_infos) OVERRIDE; OutputStream* StartRoData() OVERRIDE; void EndRoData(OutputStream* rodata) OVERRIDE; @@ -167,16 +170,21 @@ void ElfWriterQuick<ElfTypes>::Start() { } template <typename ElfTypes> -void ElfWriterQuick<ElfTypes>::SetLoadedSectionSizes(size_t rodata_size, +void ElfWriterQuick<ElfTypes>::PrepareDynamicSection(size_t rodata_size, size_t text_size, - size_t bss_size) { + size_t bss_size, + size_t bss_roots_offset) { DCHECK_EQ(rodata_size_, 0u); rodata_size_ = rodata_size; DCHECK_EQ(text_size_, 0u); text_size_ = text_size; DCHECK_EQ(bss_size_, 0u); bss_size_ = bss_size; - builder_->PrepareDynamicSection(elf_file_->GetPath(), rodata_size_, text_size_, bss_size_); + builder_->PrepareDynamicSection(elf_file_->GetPath(), + rodata_size_, + text_size_, + bss_size_, + bss_roots_offset); } template <typename ElfTypes> diff --git a/compiler/image_test.cc b/compiler/image_test.cc index 4689c9d300..9e94b9d861 100644 --- a/compiler/image_test.cc +++ b/compiler/image_test.cc @@ -263,7 +263,10 @@ void CompilationHelper::Compile(CompilerDriver* driver, oat_writer->PrepareLayout(driver, writer.get(), cur_dex_files, &patcher); size_t rodata_size = oat_writer->GetOatHeader().GetExecutableOffset(); size_t text_size = oat_writer->GetOatSize() - rodata_size; - elf_writer->SetLoadedSectionSizes(rodata_size, text_size, oat_writer->GetBssSize()); + elf_writer->PrepareDynamicSection(rodata_size, + text_size, + oat_writer->GetBssSize(), + oat_writer->GetBssRootsOffset()); writer->UpdateOatFileLayout(i, elf_writer->GetLoadedSize(), diff --git a/compiler/jit/jit_compiler.cc b/compiler/jit/jit_compiler.cc index 4f8690530b..4ef2db8b91 100644 --- a/compiler/jit/jit_compiler.cc +++ b/compiler/jit/jit_compiler.cc @@ -156,8 +156,6 @@ JitCompiler::JitCompiler() { Compiler::kOptimizing, instruction_set, instruction_set_features_.get(), - /* boot_image */ false, - /* app_image */ false, /* image_classes */ nullptr, /* compiled_classes */ nullptr, /* compiled_methods */ nullptr, diff --git a/compiler/linker/arm64/relative_patcher_arm64.cc b/compiler/linker/arm64/relative_patcher_arm64.cc index 4c8788e30d..3b7788068e 100644 --- a/compiler/linker/arm64/relative_patcher_arm64.cc +++ b/compiler/linker/arm64/relative_patcher_arm64.cc @@ -222,9 +222,10 @@ void Arm64RelativePatcher::PatchPcRelativeReference(std::vector<uint8_t>* code, } shift = 0u; // No shift for ADD. } else { - // LDR 32-bit or 64-bit with imm12 == 0 (unset). - DCHECK(patch.GetType() == LinkerPatch::Type::kDexCacheArray) << patch.GetType(); - DCHECK_EQ(insn & 0xbffffc00, 0xb9400000) << std::hex << insn; + // LDR/STR 32-bit or 64-bit with imm12 == 0 (unset). + DCHECK(patch.GetType() == LinkerPatch::Type::kDexCacheArray || + patch.GetType() == LinkerPatch::Type::kStringBssEntry) << patch.GetType(); + DCHECK_EQ(insn & 0xbfbffc00, 0xb9000000) << std::hex << insn; } if (kIsDebugBuild) { uint32_t adrp = GetInsn(code, pc_insn_offset); diff --git a/compiler/linker/relative_patcher_test.h b/compiler/linker/relative_patcher_test.h index 62b3a0a167..015178980c 100644 --- a/compiler/linker/relative_patcher_test.h +++ b/compiler/linker/relative_patcher_test.h @@ -47,8 +47,6 @@ class RelativePatcherTest : public testing::Test { Compiler::kQuick, instruction_set, /* instruction_set_features*/ nullptr, - /* boot_image */ false, - /* app_image */ false, /* image_classes */ nullptr, /* compiled_classes */ nullptr, /* compiled_methods */ nullptr, diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc index 9f352ce093..b4c60d15da 100644 --- a/compiler/oat_test.cc +++ b/compiler/oat_test.cc @@ -108,8 +108,6 @@ class OatTest : public CommonCompilerTest { compiler_kind, insn_set, insn_features_.get(), - /* boot_image */ false, - /* app_image */ false, /* image_classes */ nullptr, /* compiled_classes */ nullptr, /* compiled_methods */ nullptr, @@ -194,6 +192,7 @@ class OatTest : public CommonCompilerTest { &opened_dex_files)) { return false; } + Runtime* runtime = Runtime::Current(); ClassLinker* const class_linker = runtime->GetClassLinker(); std::vector<const DexFile*> dex_files; @@ -207,7 +206,10 @@ class OatTest : public CommonCompilerTest { oat_writer.PrepareLayout(compiler_driver_.get(), nullptr, dex_files, &patcher); size_t rodata_size = oat_writer.GetOatHeader().GetExecutableOffset(); size_t text_size = oat_writer.GetOatSize() - rodata_size; - elf_writer->SetLoadedSectionSizes(rodata_size, text_size, oat_writer.GetBssSize()); + elf_writer->PrepareDynamicSection(rodata_size, + text_size, + oat_writer.GetBssSize(), + oat_writer.GetBssRootsOffset()); if (!oat_writer.WriteRodata(oat_rodata)) { return false; @@ -228,7 +230,15 @@ class OatTest : public CommonCompilerTest { elf_writer->WriteDebugInfo(oat_writer.GetMethodDebugInfo()); elf_writer->WritePatchLocations(oat_writer.GetAbsolutePatchLocations()); - return elf_writer->End(); + if (!elf_writer->End()) { + return false; + } + + opened_dex_files_maps_.emplace_back(std::move(opened_dex_files_map)); + for (std::unique_ptr<const DexFile>& dex_file : opened_dex_files) { + opened_dex_files_.emplace_back(dex_file.release()); + } + return true; } void TestDexFileInput(bool verify, bool low_4gb); @@ -236,6 +246,9 @@ class OatTest : public CommonCompilerTest { std::unique_ptr<const InstructionSetFeatures> insn_features_; std::unique_ptr<QuickCompilerCallbacks> callbacks_; + + std::vector<std::unique_ptr<MemMap>> opened_dex_files_maps_; + std::vector<std::unique_ptr<const DexFile>> opened_dex_files_; }; class ZipBuilder { diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc index 54ec7c1edb..44c26edd71 100644 --- a/compiler/oat_writer.cc +++ b/compiler/oat_writer.cc @@ -300,7 +300,10 @@ OatWriter::OatWriter(bool compiling_boot_image, TimingLogger* timings) vdex_dex_files_offset_(0u), vdex_verifier_deps_offset_(0u), oat_size_(0u), + bss_start_(0u), bss_size_(0u), + bss_roots_offset_(0u), + bss_string_entries_(), oat_data_offset_(0u), oat_header_(nullptr), size_vdex_header_(0), @@ -554,15 +557,8 @@ void OatWriter::PrepareLayout(const CompilerDriver* compiler, oat_size_ = offset; if (!HasBootImage()) { - // Allocate space for app dex cache arrays in the .bss section. - size_t bss_start = RoundUp(oat_size_, kPageSize); - PointerSize pointer_size = GetInstructionSetPointerSize(instruction_set); - bss_size_ = 0u; - for (const DexFile* dex_file : *dex_files_) { - dex_cache_arrays_offsets_.Put(dex_file, bss_start + bss_size_); - DexCacheArraysLayout layout(pointer_size, dex_file); - bss_size_ += layout.Size(); - } + TimingLogger::ScopedTiming split("InitBssLayout", timings_); + InitBssLayout(instruction_set); } CHECK_EQ(dex_files_->size(), oat_dex_files_.size()); @@ -805,6 +801,10 @@ class OatWriter::InitCodeMethodVisitor : public OatDexMethodVisitor { if (!patch.IsPcRelative()) { writer_->absolute_patch_locations_.push_back(base_loc + patch.LiteralOffset()); } + if (patch.GetType() == LinkerPatch::Type::kStringBssEntry) { + StringReference ref(patch.TargetStringDexFile(), patch.TargetStringIndex()); + writer_->bss_string_entries_.Overwrite(ref, /* placeholder */ 0u); + } } } } @@ -1115,6 +1115,15 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor { target_offset); break; } + case LinkerPatch::Type::kStringBssEntry: { + StringReference ref(patch.TargetStringDexFile(), patch.TargetStringIndex()); + uint32_t target_offset = writer_->bss_string_entries_.Get(ref); + writer_->relative_patcher_->PatchPcRelativeReference(&patched_code_, + patch, + offset_ + literal_offset, + target_offset); + break; + } case LinkerPatch::Type::kTypeRelative: { uint32_t target_offset = GetTargetObjectOffset(GetTargetType(patch)); writer_->relative_patcher_->PatchPcRelativeReference(&patched_code_, @@ -1500,7 +1509,7 @@ size_t OatWriter::InitOatCode(size_t offset) { offset = RoundUp(offset, kPageSize); oat_header_->SetExecutableOffset(offset); size_executable_offset_alignment_ = offset - old_offset; - if (compiler_driver_->IsBootImage()) { + if (compiler_driver_->GetCompilerOptions().IsBootImage()) { InstructionSet instruction_set = compiler_driver_->GetInstructionSet(); #define DO_TRAMPOLINE(field, fn_name) \ @@ -1548,6 +1557,29 @@ size_t OatWriter::InitOatCodeDexFiles(size_t offset) { return offset; } +void OatWriter::InitBssLayout(InstructionSet instruction_set) { + DCHECK(!HasBootImage()); + + // Allocate space for app dex cache arrays in the .bss section. + bss_start_ = RoundUp(oat_size_, kPageSize); + PointerSize pointer_size = GetInstructionSetPointerSize(instruction_set); + bss_size_ = 0u; + for (const DexFile* dex_file : *dex_files_) { + dex_cache_arrays_offsets_.Put(dex_file, bss_start_ + bss_size_); + DexCacheArraysLayout layout(pointer_size, dex_file); + bss_size_ += layout.Size(); + } + + bss_roots_offset_ = bss_size_; + + // Prepare offsets for .bss String entries. + for (auto& entry : bss_string_entries_) { + DCHECK_EQ(entry.second, 0u); + entry.second = bss_start_ + bss_size_; + bss_size_ += sizeof(GcRoot<mirror::String>); + } +} + bool OatWriter::WriteRodata(OutputStream* out) { CHECK(write_state_ == WriteState::kWriteRoData); @@ -1736,7 +1768,7 @@ bool OatWriter::WriteHeader(OutputStream* out, oat_header_->SetImageFileLocationOatChecksum(image_file_location_oat_checksum); oat_header_->SetImageFileLocationOatDataBegin(image_file_location_oat_begin); - if (compiler_driver_->IsBootImage()) { + if (compiler_driver_->GetCompilerOptions().IsBootImage()) { CHECK_EQ(image_patch_delta, 0); CHECK_EQ(oat_header_->GetImagePatchDelta(), 0); } else { @@ -1826,7 +1858,7 @@ size_t OatWriter::WriteMaps(OutputStream* out, const size_t file_offset, size_t } size_t OatWriter::WriteCode(OutputStream* out, const size_t file_offset, size_t relative_offset) { - if (compiler_driver_->IsBootImage()) { + if (compiler_driver_->GetCompilerOptions().IsBootImage()) { InstructionSet instruction_set = compiler_driver_->GetInstructionSet(); #define DO_TRAMPOLINE(field) \ diff --git a/compiler/oat_writer.h b/compiler/oat_writer.h index 670accbbaf..1cc193b341 100644 --- a/compiler/oat_writer.h +++ b/compiler/oat_writer.h @@ -30,6 +30,7 @@ #include "oat.h" #include "os.h" #include "safe_map.h" +#include "string_reference.h" namespace art { @@ -194,6 +195,10 @@ class OatWriter { return bss_size_; } + size_t GetBssRootsOffset() const { + return bss_roots_offset_; + } + size_t GetOatDataOffset() const { return oat_data_offset_; } @@ -265,6 +270,7 @@ class OatWriter { size_t InitOatMaps(size_t offset); size_t InitOatCode(size_t offset); size_t InitOatCodeDexFiles(size_t offset); + void InitBssLayout(InstructionSet instruction_set); bool WriteClassOffsets(OutputStream* out); bool WriteClasses(OutputStream* out); @@ -322,9 +328,20 @@ class OatWriter { // Size required for Oat data structures. size_t oat_size_; - // The size of the required .bss section holding the DexCache data. + // The start of the required .bss section. + size_t bss_start_; + + // The size of the required .bss section holding the DexCache data and GC roots. size_t bss_size_; + // The offset of the GC roots in .bss section. + size_t bss_roots_offset_; + + // Map for allocating String entries in .bss. Indexed by StringReference for the source + // string in the dex file with the "string value comparator" for deduplication. The value + // is the target offset for patching, starting at `bss_start_ + bss_roots_offset_`. + SafeMap<StringReference, size_t, StringReferenceValueComparator> bss_string_entries_; + // Offsets of the dex cache arrays for each app dex file. For the // boot image, this information is provided by the ImageWriter. SafeMap<const DexFile*, size_t> dex_cache_arrays_offsets_; // DexFiles not owned. diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index 85002045a3..49f4f18390 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -531,40 +531,15 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { uint32_t GetReferenceDisableFlagOffset() const; protected: - // Method patch info used for recording locations of required linker patches and - // target methods. The target method can be used for various purposes, whether for - // patching the address of the method or the code pointer or a PC-relative call. + // Patch info used for recording locations of required linker patches and their targets, + // i.e. target method, string, type or code identified by their dex file and index. template <typename LabelType> - struct MethodPatchInfo { - explicit MethodPatchInfo(MethodReference m) : target_method(m), label() { } - - MethodReference target_method; - LabelType label; - }; - - // String patch info used for recording locations of required linker patches and - // target strings. The actual string address can be absolute or PC-relative. - template <typename LabelType> - struct StringPatchInfo { - StringPatchInfo(const DexFile& df, uint32_t index) - : dex_file(df), string_index(index), label() { } - - const DexFile& dex_file; - uint32_t string_index; - LabelType label; - }; - - // Type patch info used for recording locations of required linker patches and - // target types. The actual type address can be absolute or PC-relative. - // TODO: Consider merging with MethodPatchInfo and StringPatchInfo - all these - // classes contain the dex file, some index and the label. - template <typename LabelType> - struct TypePatchInfo { - TypePatchInfo(const DexFile& df, uint32_t index) - : dex_file(df), type_index(index), label() { } + struct PatchInfo { + PatchInfo(const DexFile& target_dex_file, uint32_t target_index) + : dex_file(target_dex_file), index(target_index) { } const DexFile& dex_file; - uint32_t type_index; + uint32_t index; LabelType label; }; diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index 681988d2ac..9870876879 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -422,6 +422,50 @@ class LoadClassSlowPathARM : public SlowPathCodeARM { DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathARM); }; +class LoadStringSlowPathARM : public SlowPathCodeARM { + public: + explicit LoadStringSlowPathARM(HLoadString* instruction) : SlowPathCodeARM(instruction) {} + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + LocationSummary* locations = instruction_->GetLocations(); + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg())); + + CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen); + __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, locations); + + InvokeRuntimeCallingConvention calling_convention; + HLoadString* load = instruction_->AsLoadString(); + const uint32_t string_index = load->GetStringIndex(); + __ LoadImmediate(calling_convention.GetRegisterAt(0), string_index); + arm_codegen->InvokeRuntime(kQuickResolveString, instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); + arm_codegen->Move32(locations->Out(), Location::RegisterLocation(R0)); + + RestoreLiveRegisters(codegen, locations); + + // Store the resolved String to the BSS entry. + // TODO: Change art_quick_resolve_string to kSaveEverything and use a temporary for the + // .bss entry address in the fast path, so that we can avoid another calculation here. + CodeGeneratorARM::PcRelativePatchInfo* labels = + arm_codegen->NewPcRelativeStringPatch(load->GetDexFile(), string_index); + __ BindTrackedLabel(&labels->movw_label); + __ movw(IP, /* placeholder */ 0u); + __ BindTrackedLabel(&labels->movt_label); + __ movt(IP, /* placeholder */ 0u); + __ BindTrackedLabel(&labels->add_pc_label); + __ add(IP, IP, ShifterOperand(PC)); + __ str(locations->Out().AsRegister<Register>(), Address(IP)); + + __ b(GetExitLabel()); + } + + const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathARM"; } + + private: + DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathARM); +}; + class TypeCheckSlowPathARM : public SlowPathCodeARM { public: TypeCheckSlowPathARM(HInstruction* instruction, bool is_fatal) @@ -5641,15 +5685,8 @@ HLoadString::LoadKind CodeGeneratorARM::GetSupportedLoadStringKind( case HLoadString::LoadKind::kDexCacheAddress: DCHECK(Runtime::Current()->UseJitCompilation()); break; - case HLoadString::LoadKind::kDexCachePcRelative: + case HLoadString::LoadKind::kBssEntry: DCHECK(!Runtime::Current()->UseJitCompilation()); - // We disable pc-relative load when there is an irreducible loop, as the optimization - // is incompatible with it. - // TODO: Create as many ArmDexCacheArraysBase instructions as needed for methods - // with irreducible loops. - if (GetGraph()->HasIrreducibleLoops()) { - return HLoadString::LoadKind::kDexCacheViaMethod; - } break; case HLoadString::LoadKind::kDexCacheViaMethod: break; @@ -5659,12 +5696,13 @@ HLoadString::LoadKind CodeGeneratorARM::GetSupportedLoadStringKind( void LocationsBuilderARM::VisitLoadString(HLoadString* load) { LocationSummary::CallKind call_kind = load->NeedsEnvironment() - ? LocationSummary::kCallOnMainOnly + ? ((load->GetLoadKind() == HLoadString::LoadKind::kDexCacheViaMethod) + ? LocationSummary::kCallOnMainOnly + : LocationSummary::kCallOnSlowPath) : LocationSummary::kNoCall; LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind); HLoadString::LoadKind load_kind = load->GetLoadKind(); - DCHECK(load_kind != HLoadString::LoadKind::kDexCachePcRelative) << "Not supported"; if (load_kind == HLoadString::LoadKind::kDexCacheViaMethod) { locations->SetInAt(0, Location::RequiresRegister()); locations->SetOut(Location::RegisterLocation(R0)); @@ -5686,6 +5724,7 @@ void InstructionCodeGeneratorARM::VisitLoadString(HLoadString* load) { return; // No dex cache slow path. } case HLoadString::LoadKind::kBootImageLinkTimePcRelative: { + DCHECK(codegen_->GetCompilerOptions().IsBootImage()); CodeGeneratorARM::PcRelativePatchInfo* labels = codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex()); __ BindTrackedLabel(&labels->movw_label); @@ -5702,6 +5741,23 @@ void InstructionCodeGeneratorARM::VisitLoadString(HLoadString* load) { __ LoadLiteral(out, codegen_->DeduplicateBootImageAddressLiteral(address)); return; // No dex cache slow path. } + case HLoadString::LoadKind::kBssEntry: { + DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); + CodeGeneratorARM::PcRelativePatchInfo* labels = + codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex()); + __ BindTrackedLabel(&labels->movw_label); + __ movw(out, /* placeholder */ 0u); + __ BindTrackedLabel(&labels->movt_label); + __ movt(out, /* placeholder */ 0u); + __ BindTrackedLabel(&labels->add_pc_label); + __ add(out, out, ShifterOperand(PC)); + GenerateGcRootFieldLoad(load, out_loc, out, 0); + SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathARM(load); + codegen_->AddSlowPath(slow_path); + __ CompareAndBranchIfZero(out, slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); + return; + } default: break; } @@ -6850,7 +6906,8 @@ void CodeGeneratorARM::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, __ bl(GetFrameEntryLabel()); break; case HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative: - relative_call_patches_.emplace_back(invoke->GetTargetMethod()); + relative_call_patches_.emplace_back(*invoke->GetTargetMethod().dex_file, + invoke->GetTargetMethod().dex_method_index); __ BindTrackedLabel(&relative_call_patches_.back().label); // Arbitrarily branch to the BL itself, override at link time. __ bl(&relative_call_patches_.back().label); @@ -6952,17 +7009,37 @@ Literal* CodeGeneratorARM::DeduplicateDexCacheAddressLiteral(uint32_t address) { return DeduplicateUint32Literal(address, &uint32_literals_); } +template <LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)> +inline void CodeGeneratorARM::EmitPcRelativeLinkerPatches( + const ArenaDeque<PcRelativePatchInfo>& infos, + ArenaVector<LinkerPatch>* linker_patches) { + for (const PcRelativePatchInfo& info : infos) { + const DexFile& dex_file = info.target_dex_file; + size_t offset_or_index = info.offset_or_index; + DCHECK(info.add_pc_label.IsBound()); + uint32_t add_pc_offset = dchecked_integral_cast<uint32_t>(info.add_pc_label.Position()); + // Add MOVW patch. + DCHECK(info.movw_label.IsBound()); + uint32_t movw_offset = dchecked_integral_cast<uint32_t>(info.movw_label.Position()); + linker_patches->push_back(Factory(movw_offset, &dex_file, add_pc_offset, offset_or_index)); + // Add MOVT patch. + DCHECK(info.movt_label.IsBound()); + uint32_t movt_offset = dchecked_integral_cast<uint32_t>(info.movt_label.Position()); + linker_patches->push_back(Factory(movt_offset, &dex_file, add_pc_offset, offset_or_index)); + } +} + void CodeGeneratorARM::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) { DCHECK(linker_patches->empty()); size_t size = method_patches_.size() + call_patches_.size() + relative_call_patches_.size() + - /* MOVW+MOVT for each base */ 2u * pc_relative_dex_cache_patches_.size() + + /* MOVW+MOVT for each entry */ 2u * pc_relative_dex_cache_patches_.size() + boot_image_string_patches_.size() + - /* MOVW+MOVT for each base */ 2u * pc_relative_string_patches_.size() + + /* MOVW+MOVT for each entry */ 2u * pc_relative_string_patches_.size() + boot_image_type_patches_.size() + - /* MOVW+MOVT for each base */ 2u * pc_relative_type_patches_.size() + + /* MOVW+MOVT for each entry */ 2u * pc_relative_type_patches_.size() + boot_image_address_patches_.size(); linker_patches->reserve(size); for (const auto& entry : method_patches_) { @@ -6983,32 +7060,13 @@ void CodeGeneratorARM::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patche target_method.dex_file, target_method.dex_method_index)); } - for (const MethodPatchInfo<Label>& info : relative_call_patches_) { + for (const PatchInfo<Label>& info : relative_call_patches_) { uint32_t literal_offset = info.label.Position(); - linker_patches->push_back(LinkerPatch::RelativeCodePatch(literal_offset, - info.target_method.dex_file, - info.target_method.dex_method_index)); - } - for (const PcRelativePatchInfo& info : pc_relative_dex_cache_patches_) { - const DexFile& dex_file = info.target_dex_file; - size_t base_element_offset = info.offset_or_index; - DCHECK(info.add_pc_label.IsBound()); - uint32_t add_pc_offset = dchecked_integral_cast<uint32_t>(info.add_pc_label.Position()); - // Add MOVW patch. - DCHECK(info.movw_label.IsBound()); - uint32_t movw_offset = dchecked_integral_cast<uint32_t>(info.movw_label.Position()); - linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(movw_offset, - &dex_file, - add_pc_offset, - base_element_offset)); - // Add MOVT patch. - DCHECK(info.movt_label.IsBound()); - uint32_t movt_offset = dchecked_integral_cast<uint32_t>(info.movt_label.Position()); - linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(movt_offset, - &dex_file, - add_pc_offset, - base_element_offset)); + linker_patches->push_back( + LinkerPatch::RelativeCodePatch(literal_offset, &info.dex_file, info.index)); } + EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_, + linker_patches); for (const auto& entry : boot_image_string_patches_) { const StringReference& target_string = entry.first; Literal* literal = entry.second; @@ -7018,25 +7076,12 @@ void CodeGeneratorARM::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patche target_string.dex_file, target_string.string_index)); } - for (const PcRelativePatchInfo& info : pc_relative_string_patches_) { - const DexFile& dex_file = info.target_dex_file; - uint32_t string_index = info.offset_or_index; - DCHECK(info.add_pc_label.IsBound()); - uint32_t add_pc_offset = dchecked_integral_cast<uint32_t>(info.add_pc_label.Position()); - // Add MOVW patch. - DCHECK(info.movw_label.IsBound()); - uint32_t movw_offset = dchecked_integral_cast<uint32_t>(info.movw_label.Position()); - linker_patches->push_back(LinkerPatch::RelativeStringPatch(movw_offset, - &dex_file, - add_pc_offset, - string_index)); - // Add MOVT patch. - DCHECK(info.movt_label.IsBound()); - uint32_t movt_offset = dchecked_integral_cast<uint32_t>(info.movt_label.Position()); - linker_patches->push_back(LinkerPatch::RelativeStringPatch(movt_offset, - &dex_file, - add_pc_offset, - string_index)); + if (!GetCompilerOptions().IsBootImage()) { + EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_, + linker_patches); + } else { + EmitPcRelativeLinkerPatches<LinkerPatch::RelativeStringPatch>(pc_relative_string_patches_, + linker_patches); } for (const auto& entry : boot_image_type_patches_) { const TypeReference& target_type = entry.first; @@ -7047,26 +7092,8 @@ void CodeGeneratorARM::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patche target_type.dex_file, target_type.type_index)); } - for (const PcRelativePatchInfo& info : pc_relative_type_patches_) { - const DexFile& dex_file = info.target_dex_file; - uint32_t type_index = info.offset_or_index; - DCHECK(info.add_pc_label.IsBound()); - uint32_t add_pc_offset = dchecked_integral_cast<uint32_t>(info.add_pc_label.Position()); - // Add MOVW patch. - DCHECK(info.movw_label.IsBound()); - uint32_t movw_offset = dchecked_integral_cast<uint32_t>(info.movw_label.Position()); - linker_patches->push_back(LinkerPatch::RelativeTypePatch(movw_offset, - &dex_file, - add_pc_offset, - type_index)); - // Add MOVT patch. - DCHECK(info.movt_label.IsBound()); - uint32_t movt_offset = dchecked_integral_cast<uint32_t>(info.movt_label.Position()); - linker_patches->push_back(LinkerPatch::RelativeTypePatch(movt_offset, - &dex_file, - add_pc_offset, - type_index)); - } + EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(pc_relative_type_patches_, + linker_patches); for (const auto& entry : boot_image_address_patches_) { DCHECK(GetCompilerOptions().GetIncludePatchInformation()); Literal* literal = entry.second; diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h index 6416d40f7f..ef2e23f258 100644 --- a/compiler/optimizing/code_generator_arm.h +++ b/compiler/optimizing/code_generator_arm.h @@ -593,6 +593,10 @@ class CodeGeneratorARM : public CodeGenerator { uint32_t offset_or_index, ArenaDeque<PcRelativePatchInfo>* patches); + template <LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)> + static void EmitPcRelativeLinkerPatches(const ArenaDeque<PcRelativePatchInfo>& infos, + ArenaVector<LinkerPatch>* linker_patches); + // Labels for each block that will be compiled. Label* block_labels_; // Indexed by block id. Label frame_entry_label_; @@ -609,12 +613,12 @@ class CodeGeneratorARM : public CodeGenerator { MethodToLiteralMap call_patches_; // Relative call patch info. // Using ArenaDeque<> which retains element addresses on push/emplace_back(). - ArenaDeque<MethodPatchInfo<Label>> relative_call_patches_; + ArenaDeque<PatchInfo<Label>> relative_call_patches_; // PC-relative patch info for each HArmDexCacheArraysBase. ArenaDeque<PcRelativePatchInfo> pc_relative_dex_cache_patches_; // Deduplication map for boot string literals for kBootImageLinkTimeAddress. BootStringToLiteralMap boot_image_string_patches_; - // PC-relative String patch info. + // PC-relative String patch info; type depends on configuration (app .bss or boot image PIC). ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_; // Deduplication map for boot type literals for kBootImageLinkTimeAddress. BootTypeToLiteralMap boot_image_type_patches_; diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 4f7f36bb5a..969d653f97 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -329,6 +329,55 @@ class LoadClassSlowPathARM64 : public SlowPathCodeARM64 { DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathARM64); }; +class LoadStringSlowPathARM64 : public SlowPathCodeARM64 { + public: + explicit LoadStringSlowPathARM64(HLoadString* instruction) : SlowPathCodeARM64(instruction) {} + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + LocationSummary* locations = instruction_->GetLocations(); + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg())); + CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); + + __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, locations); + + InvokeRuntimeCallingConvention calling_convention; + const uint32_t string_index = instruction_->AsLoadString()->GetStringIndex(); + __ Mov(calling_convention.GetRegisterAt(0).W(), string_index); + arm64_codegen->InvokeRuntime(kQuickResolveString, instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); + Primitive::Type type = instruction_->GetType(); + arm64_codegen->MoveLocation(locations->Out(), calling_convention.GetReturnLocation(type), type); + + RestoreLiveRegisters(codegen, locations); + + // Store the resolved String to the BSS entry. + UseScratchRegisterScope temps(arm64_codegen->GetVIXLAssembler()); + Register temp = temps.AcquireX(); + const DexFile& dex_file = instruction_->AsLoadString()->GetDexFile(); + // TODO: Change art_quick_resolve_string to kSaveEverything and use a temporary + // for the ADRP in the fast path, so that we can avoid the ADRP here. + vixl::aarch64::Label* adrp_label = + arm64_codegen->NewPcRelativeStringPatch(dex_file, string_index); + arm64_codegen->EmitAdrpPlaceholder(adrp_label, temp); + vixl::aarch64::Label* strp_label = + arm64_codegen->NewPcRelativeStringPatch(dex_file, string_index, adrp_label); + { + SingleEmissionCheckScope guard(arm64_codegen->GetVIXLAssembler()); + __ Bind(strp_label); + __ str(RegisterFrom(locations->Out(), Primitive::kPrimNot), + MemOperand(temp, /* offset placeholder */ 0)); + } + + __ B(GetExitLabel()); + } + + const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathARM64"; } + + private: + DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathARM64); +}; + class NullCheckSlowPathARM64 : public SlowPathCodeARM64 { public: explicit NullCheckSlowPathARM64(HNullCheck* instr) : SlowPathCodeARM64(instr) {} @@ -3631,19 +3680,11 @@ void CodeGeneratorARM64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invok const DexFile& dex_file = invoke->GetDexFile(); uint32_t element_offset = invoke->GetDexCacheArrayOffset(); vixl::aarch64::Label* adrp_label = NewPcRelativeDexCacheArrayPatch(dex_file, element_offset); - { - SingleEmissionCheckScope guard(GetVIXLAssembler()); - __ Bind(adrp_label); - __ adrp(XRegisterFrom(temp), /* offset placeholder */ 0); - } + EmitAdrpPlaceholder(adrp_label, XRegisterFrom(temp)); // Add LDR with its PC-relative DexCache access patch. vixl::aarch64::Label* ldr_label = NewPcRelativeDexCacheArrayPatch(dex_file, element_offset, adrp_label); - { - SingleEmissionCheckScope guard(GetVIXLAssembler()); - __ Bind(ldr_label); - __ ldr(XRegisterFrom(temp), MemOperand(XRegisterFrom(temp), /* offset placeholder */ 0)); - } + EmitLdrOffsetPlaceholder(ldr_label, XRegisterFrom(temp), XRegisterFrom(temp)); break; } case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: { @@ -3676,7 +3717,8 @@ void CodeGeneratorARM64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invok __ Bl(&frame_entry_label_); break; case HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative: { - relative_call_patches_.emplace_back(invoke->GetTargetMethod()); + relative_call_patches_.emplace_back(*invoke->GetTargetMethod().dex_file, + invoke->GetTargetMethod().dex_method_index); vixl::aarch64::Label* label = &relative_call_patches_.back().label; SingleEmissionCheckScope guard(GetVIXLAssembler()); __ Bind(label); @@ -3798,6 +3840,45 @@ vixl::aarch64::Literal<uint64_t>* CodeGeneratorARM64::DeduplicateDexCacheAddress return DeduplicateUint64Literal(address); } +void CodeGeneratorARM64::EmitAdrpPlaceholder(vixl::aarch64::Label* fixup_label, + vixl::aarch64::Register reg) { + DCHECK(reg.IsX()); + SingleEmissionCheckScope guard(GetVIXLAssembler()); + __ Bind(fixup_label); + __ adrp(reg, /* offset placeholder */ 0); +} + +void CodeGeneratorARM64::EmitAddPlaceholder(vixl::aarch64::Label* fixup_label, + vixl::aarch64::Register out, + vixl::aarch64::Register base) { + DCHECK(out.IsX()); + DCHECK(base.IsX()); + SingleEmissionCheckScope guard(GetVIXLAssembler()); + __ Bind(fixup_label); + __ add(out, base, Operand(/* offset placeholder */ 0)); +} + +void CodeGeneratorARM64::EmitLdrOffsetPlaceholder(vixl::aarch64::Label* fixup_label, + vixl::aarch64::Register out, + vixl::aarch64::Register base) { + DCHECK(base.IsX()); + SingleEmissionCheckScope guard(GetVIXLAssembler()); + __ Bind(fixup_label); + __ ldr(out, MemOperand(base, /* offset placeholder */ 0)); +} + +template <LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)> +inline void CodeGeneratorARM64::EmitPcRelativeLinkerPatches( + const ArenaDeque<PcRelativePatchInfo>& infos, + ArenaVector<LinkerPatch>* linker_patches) { + for (const PcRelativePatchInfo& info : infos) { + linker_patches->push_back(Factory(info.label.GetLocation(), + &info.target_dex_file, + info.pc_insn_label->GetLocation(), + info.offset_or_index)); + } +} + void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) { DCHECK(linker_patches->empty()); size_t size = @@ -3825,10 +3906,9 @@ void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patc target_method.dex_file, target_method.dex_method_index)); } - for (const MethodPatchInfo<vixl::aarch64::Label>& info : relative_call_patches_) { - linker_patches->push_back(LinkerPatch::RelativeCodePatch(info.label.GetLocation(), - info.target_method.dex_file, - info.target_method.dex_method_index)); + for (const PatchInfo<vixl::aarch64::Label>& info : relative_call_patches_) { + linker_patches->push_back( + LinkerPatch::RelativeCodePatch(info.label.GetLocation(), &info.dex_file, info.index)); } for (const PcRelativePatchInfo& info : pc_relative_dex_cache_patches_) { linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(info.label.GetLocation(), @@ -3843,11 +3923,12 @@ void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patc target_string.dex_file, target_string.string_index)); } - for (const PcRelativePatchInfo& info : pc_relative_string_patches_) { - linker_patches->push_back(LinkerPatch::RelativeStringPatch(info.label.GetLocation(), - &info.target_dex_file, - info.pc_insn_label->GetLocation(), - info.offset_or_index)); + if (!GetCompilerOptions().IsBootImage()) { + EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_, + linker_patches); + } else { + EmitPcRelativeLinkerPatches<LinkerPatch::RelativeStringPatch>(pc_relative_string_patches_, + linker_patches); } for (const auto& entry : boot_image_type_patches_) { const TypeReference& target_type = entry.first; @@ -3856,12 +3937,8 @@ void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patc target_type.dex_file, target_type.type_index)); } - for (const PcRelativePatchInfo& info : pc_relative_type_patches_) { - linker_patches->push_back(LinkerPatch::RelativeTypePatch(info.label.GetLocation(), - &info.target_dex_file, - info.pc_insn_label->GetLocation(), - info.offset_or_index)); - } + EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(pc_relative_type_patches_, + linker_patches); for (const auto& entry : boot_image_address_patches_) { DCHECK(GetCompilerOptions().GetIncludePatchInformation()); vixl::aarch64::Literal<uint32_t>* literal = entry.second; @@ -4018,19 +4095,11 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) { const DexFile& dex_file = cls->GetDexFile(); uint32_t type_index = cls->GetTypeIndex(); vixl::aarch64::Label* adrp_label = codegen_->NewPcRelativeTypePatch(dex_file, type_index); - { - SingleEmissionCheckScope guard(GetVIXLAssembler()); - __ Bind(adrp_label); - __ adrp(out.X(), /* offset placeholder */ 0); - } + codegen_->EmitAdrpPlaceholder(adrp_label, out.X()); // Add ADD with its PC-relative type patch. vixl::aarch64::Label* add_label = codegen_->NewPcRelativeTypePatch(dex_file, type_index, adrp_label); - { - SingleEmissionCheckScope guard(GetVIXLAssembler()); - __ Bind(add_label); - __ add(out.X(), out.X(), Operand(/* offset placeholder */ 0)); - } + codegen_->EmitAddPlaceholder(add_label, out.X(), out.X()); break; } case HLoadClass::LoadKind::kBootImageAddress: { @@ -4067,11 +4136,7 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) { uint32_t element_offset = cls->GetDexCacheElementOffset(); vixl::aarch64::Label* adrp_label = codegen_->NewPcRelativeDexCacheArrayPatch(dex_file, element_offset); - { - SingleEmissionCheckScope guard(GetVIXLAssembler()); - __ Bind(adrp_label); - __ adrp(out.X(), /* offset placeholder */ 0); - } + codegen_->EmitAdrpPlaceholder(adrp_label, out.X()); // Add LDR with its PC-relative DexCache access patch. vixl::aarch64::Label* ldr_label = codegen_->NewPcRelativeDexCacheArrayPatch(dex_file, element_offset, adrp_label); @@ -4156,7 +4221,7 @@ HLoadString::LoadKind CodeGeneratorARM64::GetSupportedLoadStringKind( case HLoadString::LoadKind::kDexCacheAddress: DCHECK(Runtime::Current()->UseJitCompilation()); break; - case HLoadString::LoadKind::kDexCachePcRelative: + case HLoadString::LoadKind::kBssEntry: DCHECK(!Runtime::Current()->UseJitCompilation()); break; case HLoadString::LoadKind::kDexCacheViaMethod: @@ -4167,7 +4232,9 @@ HLoadString::LoadKind CodeGeneratorARM64::GetSupportedLoadStringKind( void LocationsBuilderARM64::VisitLoadString(HLoadString* load) { LocationSummary::CallKind call_kind = load->NeedsEnvironment() - ? LocationSummary::kCallOnMainOnly + ? ((load->GetLoadKind() == HLoadString::LoadKind::kDexCacheViaMethod) + ? LocationSummary::kCallOnMainOnly + : LocationSummary::kCallOnSlowPath) : LocationSummary::kNoCall; LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind); if (load->GetLoadKind() == HLoadString::LoadKind::kDexCacheViaMethod) { @@ -4191,20 +4258,13 @@ void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) { // Add ADRP with its PC-relative String patch. const DexFile& dex_file = load->GetDexFile(); uint32_t string_index = load->GetStringIndex(); + DCHECK(codegen_->GetCompilerOptions().IsBootImage()); vixl::aarch64::Label* adrp_label = codegen_->NewPcRelativeStringPatch(dex_file, string_index); - { - SingleEmissionCheckScope guard(GetVIXLAssembler()); - __ Bind(adrp_label); - __ adrp(out.X(), /* offset placeholder */ 0); - } + codegen_->EmitAdrpPlaceholder(adrp_label, out.X()); // Add ADD with its PC-relative String patch. vixl::aarch64::Label* add_label = codegen_->NewPcRelativeStringPatch(dex_file, string_index, adrp_label); - { - SingleEmissionCheckScope guard(GetVIXLAssembler()); - __ Bind(add_label); - __ add(out.X(), out.X(), Operand(/* offset placeholder */ 0)); - } + codegen_->EmitAddPlaceholder(add_label, out.X(), out.X()); return; // No dex cache slow path. } case HLoadString::LoadKind::kBootImageAddress: { @@ -4212,6 +4272,28 @@ void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) { __ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(load->GetAddress())); return; // No dex cache slow path. } + case HLoadString::LoadKind::kBssEntry: { + // Add ADRP with its PC-relative String .bss entry patch. + const DexFile& dex_file = load->GetDexFile(); + uint32_t string_index = load->GetStringIndex(); + DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); + vixl::aarch64::Label* adrp_label = codegen_->NewPcRelativeStringPatch(dex_file, string_index); + codegen_->EmitAdrpPlaceholder(adrp_label, out.X()); + // Add LDR with its PC-relative String patch. + vixl::aarch64::Label* ldr_label = + codegen_->NewPcRelativeStringPatch(dex_file, string_index, adrp_label); + // /* GcRoot<mirror::Class> */ out = *(base_address + offset) /* PC-relative */ + GenerateGcRootFieldLoad(load, + load->GetLocations()->Out(), + out.X(), + /* placeholder */ 0u, + ldr_label); + SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathARM64(load); + codegen_->AddSlowPath(slow_path); + __ Cbz(out.X(), slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); + return; + } default: break; } @@ -4981,6 +5063,7 @@ void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad(HInstruction* instru uint32_t offset, vixl::aarch64::Label* fixup_label, bool requires_read_barrier) { + DCHECK(fixup_label == nullptr || offset == 0u); Register root_reg = RegisterFrom(root, Primitive::kPrimNot); if (requires_read_barrier) { DCHECK(kEmitCompilerReadBarrier); @@ -4997,9 +5080,7 @@ void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad(HInstruction* instru if (fixup_label == nullptr) { __ Ldr(root_reg, MemOperand(obj, offset)); } else { - SingleEmissionCheckScope guard(GetVIXLAssembler()); - __ Bind(fixup_label); - __ ldr(root_reg, MemOperand(obj, offset)); + codegen_->EmitLdrOffsetPlaceholder(fixup_label, root_reg, obj); } static_assert( sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>), @@ -5028,9 +5109,7 @@ void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad(HInstruction* instru if (fixup_label == nullptr) { __ Add(root_reg.X(), obj.X(), offset); } else { - SingleEmissionCheckScope guard(GetVIXLAssembler()); - __ Bind(fixup_label); - __ add(root_reg.X(), obj.X(), offset); + codegen_->EmitAddPlaceholder(fixup_label, root_reg.X(), obj.X()); } // /* mirror::Object* */ root = root->Read() codegen_->GenerateReadBarrierForRootSlow(instruction, root, root); @@ -5041,9 +5120,7 @@ void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad(HInstruction* instru if (fixup_label == nullptr) { __ Ldr(root_reg, MemOperand(obj, offset)); } else { - SingleEmissionCheckScope guard(GetVIXLAssembler()); - __ Bind(fixup_label); - __ ldr(root_reg, MemOperand(obj, offset)); + codegen_->EmitLdrOffsetPlaceholder(fixup_label, root_reg, obj.X()); } // Note that GC roots are not affected by heap poisoning, thus we // do not have to unpoison `root_reg` here. diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index a15224578d..eb28ecb427 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -564,6 +564,14 @@ class CodeGeneratorARM64 : public CodeGenerator { vixl::aarch64::Literal<uint32_t>* DeduplicateBootImageAddressLiteral(uint64_t address); vixl::aarch64::Literal<uint64_t>* DeduplicateDexCacheAddressLiteral(uint64_t address); + void EmitAdrpPlaceholder(vixl::aarch64::Label* fixup_label, vixl::aarch64::Register reg); + void EmitAddPlaceholder(vixl::aarch64::Label* fixup_label, + vixl::aarch64::Register out, + vixl::aarch64::Register base); + void EmitLdrOffsetPlaceholder(vixl::aarch64::Label* fixup_label, + vixl::aarch64::Register out, + vixl::aarch64::Register base); + void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE; // Fast path implementation of ReadBarrier::Barrier for a heap @@ -691,6 +699,10 @@ class CodeGeneratorARM64 : public CodeGenerator { void EmitJumpTables(); + template <LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)> + static void EmitPcRelativeLinkerPatches(const ArenaDeque<PcRelativePatchInfo>& infos, + ArenaVector<LinkerPatch>* linker_patches); + // Labels for each block that will be compiled. // We use a deque so that the `vixl::aarch64::Label` objects do not move in memory. ArenaDeque<vixl::aarch64::Label> block_labels_; // Indexed by block id. @@ -713,12 +725,12 @@ class CodeGeneratorARM64 : public CodeGenerator { MethodToLiteralMap call_patches_; // Relative call patch info. // Using ArenaDeque<> which retains element addresses on push/emplace_back(). - ArenaDeque<MethodPatchInfo<vixl::aarch64::Label>> relative_call_patches_; + ArenaDeque<PatchInfo<vixl::aarch64::Label>> relative_call_patches_; // PC-relative DexCache access info. ArenaDeque<PcRelativePatchInfo> pc_relative_dex_cache_patches_; // Deduplication map for boot string literals for kBootImageLinkTimeAddress. BootStringToLiteralMap boot_image_string_patches_; - // PC-relative String patch info. + // PC-relative String patch info; type depends on configuration (app .bss or boot image PIC). ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_; // Deduplication map for boot type literals for kBootImageLinkTimeAddress. BootTypeToLiteralMap boot_image_type_patches_; diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc index 5c0ca85c78..1c540c20ed 100644 --- a/compiler/optimizing/code_generator_mips.cc +++ b/compiler/optimizing/code_generator_mips.cc @@ -279,7 +279,8 @@ class LoadStringSlowPathMIPS : public SlowPathCodeMIPS { SaveLiveRegisters(codegen, locations); InvokeRuntimeCallingConvention calling_convention; - const uint32_t string_index = instruction_->AsLoadString()->GetStringIndex(); + HLoadString* load = instruction_->AsLoadString(); + const uint32_t string_index = load->GetStringIndex(); __ LoadConst32(calling_convention.GetRegisterAt(0), string_index); mips_codegen->InvokeRuntime(kQuickResolveString, instruction_, instruction_->GetDexPc(), this); CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); @@ -289,6 +290,19 @@ class LoadStringSlowPathMIPS : public SlowPathCodeMIPS { type); RestoreLiveRegisters(codegen, locations); + + // Store the resolved String to the BSS entry. + // TODO: Change art_quick_resolve_string to kSaveEverything and use a temporary for the + // .bss entry address in the fast path, so that we can avoid another calculation here. + bool isR6 = mips_codegen->GetInstructionSetFeatures().IsR6(); + Register base = isR6 ? ZERO : locations->InAt(0).AsRegister<Register>(); + Register out = locations->Out().AsRegister<Register>(); + DCHECK_NE(out, AT); + CodeGeneratorMIPS::PcRelativePatchInfo* info = + mips_codegen->NewPcRelativeStringPatch(load->GetDexFile(), string_index); + mips_codegen->EmitPcRelativeAddressPlaceholder(info, TMP, base); + __ StoreToOffset(kStoreWord, out, TMP, 0); + __ B(GetExitLabel()); } @@ -957,6 +971,24 @@ void CodeGeneratorMIPS::AddLocationAsTemp(Location location, LocationSummary* lo } } +template <LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)> +inline void CodeGeneratorMIPS::EmitPcRelativeLinkerPatches( + const ArenaDeque<PcRelativePatchInfo>& infos, + ArenaVector<LinkerPatch>* linker_patches) { + for (const PcRelativePatchInfo& info : infos) { + const DexFile& dex_file = info.target_dex_file; + size_t offset_or_index = info.offset_or_index; + DCHECK(info.high_label.IsBound()); + uint32_t high_offset = __ GetLabelLocation(&info.high_label); + // On R2 we use HMipsComputeBaseMethodAddress and patch relative to + // the assembler's base label used for PC-relative addressing. + uint32_t pc_rel_offset = info.pc_rel_label.IsBound() + ? __ GetLabelLocation(&info.pc_rel_label) + : __ GetPcRelBaseLabelLocation(); + linker_patches->push_back(Factory(high_offset, &dex_file, pc_rel_offset, offset_or_index)); + } +} + void CodeGeneratorMIPS::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) { DCHECK(linker_patches->empty()); size_t size = @@ -987,48 +1019,17 @@ void CodeGeneratorMIPS::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patch target_method.dex_file, target_method.dex_method_index)); } - for (const PcRelativePatchInfo& info : pc_relative_dex_cache_patches_) { - const DexFile& dex_file = info.target_dex_file; - size_t base_element_offset = info.offset_or_index; - DCHECK(info.high_label.IsBound()); - uint32_t high_offset = __ GetLabelLocation(&info.high_label); - DCHECK(info.pc_rel_label.IsBound()); - uint32_t pc_rel_offset = __ GetLabelLocation(&info.pc_rel_label); - linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(high_offset, - &dex_file, - pc_rel_offset, - base_element_offset)); - } - for (const PcRelativePatchInfo& info : pc_relative_string_patches_) { - const DexFile& dex_file = info.target_dex_file; - size_t string_index = info.offset_or_index; - DCHECK(info.high_label.IsBound()); - uint32_t high_offset = __ GetLabelLocation(&info.high_label); - // On R2 we use HMipsComputeBaseMethodAddress and patch relative to - // the assembler's base label used for PC-relative literals. - uint32_t pc_rel_offset = info.pc_rel_label.IsBound() - ? __ GetLabelLocation(&info.pc_rel_label) - : __ GetPcRelBaseLabelLocation(); - linker_patches->push_back(LinkerPatch::RelativeStringPatch(high_offset, - &dex_file, - pc_rel_offset, - string_index)); - } - for (const PcRelativePatchInfo& info : pc_relative_type_patches_) { - const DexFile& dex_file = info.target_dex_file; - size_t type_index = info.offset_or_index; - DCHECK(info.high_label.IsBound()); - uint32_t high_offset = __ GetLabelLocation(&info.high_label); - // On R2 we use HMipsComputeBaseMethodAddress and patch relative to - // the assembler's base label used for PC-relative literals. - uint32_t pc_rel_offset = info.pc_rel_label.IsBound() - ? __ GetLabelLocation(&info.pc_rel_label) - : __ GetPcRelBaseLabelLocation(); - linker_patches->push_back(LinkerPatch::RelativeTypePatch(high_offset, - &dex_file, - pc_rel_offset, - type_index)); + EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_, + linker_patches); + if (!GetCompilerOptions().IsBootImage()) { + EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_, + linker_patches); + } else { + EmitPcRelativeLinkerPatches<LinkerPatch::RelativeStringPatch>(pc_relative_string_patches_, + linker_patches); } + EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(pc_relative_type_patches_, + linker_patches); for (const auto& entry : boot_image_string_patches_) { const StringReference& target_string = entry.first; Literal* literal = entry.second; @@ -1118,6 +1119,36 @@ Literal* CodeGeneratorMIPS::DeduplicateBootImageAddressLiteral(uint32_t address) return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address), map); } +void CodeGeneratorMIPS::EmitPcRelativeAddressPlaceholder( + PcRelativePatchInfo* info, Register out, Register base) { + bool reordering = __ SetReorder(false); + if (GetInstructionSetFeatures().IsR6()) { + DCHECK_EQ(base, ZERO); + __ Bind(&info->high_label); + __ Bind(&info->pc_rel_label); + // Add a 32-bit offset to PC. + __ Auipc(out, /* placeholder */ 0x1234); + __ Addiu(out, out, /* placeholder */ 0x5678); + } else { + // If base is ZERO, emit NAL to obtain the actual base. + if (base == ZERO) { + // Generate a dummy PC-relative call to obtain PC. + __ Nal(); + } + __ Bind(&info->high_label); + __ Lui(out, /* placeholder */ 0x1234); + // If we emitted the NAL, bind the pc_rel_label, otherwise base is a register holding + // the HMipsComputeBaseMethodAddress which has its own label stored in MipsAssembler. + if (base == ZERO) { + __ Bind(&info->pc_rel_label); + } + __ Ori(out, out, /* placeholder */ 0x5678); + // Add a 32-bit offset to PC. + __ Addu(out, out, (base == ZERO) ? RA : base); + } + __ SetReorder(reordering); +} + void CodeGeneratorMIPS::MarkGCCard(Register object, Register value) { MipsLabel done; Register card = AT; @@ -4229,6 +4260,8 @@ HLoadString::LoadKind CodeGeneratorMIPS::GetSupportedLoadStringKind( } // We disable PC-relative load when there is an irreducible loop, as the optimization // is incompatible with it. + // TODO: Create as many MipsDexCacheArraysBase instructions as needed for methods + // with irreducible loops. bool has_irreducible_loops = GetGraph()->HasIrreducibleLoops(); bool fallback_load = has_irreducible_loops; switch (desired_string_load_kind) { @@ -4244,10 +4277,8 @@ HLoadString::LoadKind CodeGeneratorMIPS::GetSupportedLoadStringKind( DCHECK(Runtime::Current()->UseJitCompilation()); fallback_load = false; break; - case HLoadString::LoadKind::kDexCachePcRelative: + case HLoadString::LoadKind::kBssEntry: DCHECK(!Runtime::Current()->UseJitCompilation()); - // TODO: Create as many MipsDexCacheArraysBase instructions as needed for methods - // with irreducible loops. break; case HLoadString::LoadKind::kDexCacheViaMethod: fallback_load = false; @@ -4627,23 +4658,7 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) { DCHECK(!kEmitCompilerReadBarrier); CodeGeneratorMIPS::PcRelativePatchInfo* info = codegen_->NewPcRelativeTypePatch(cls->GetDexFile(), cls->GetTypeIndex()); - bool reordering = __ SetReorder(false); - if (isR6) { - __ Bind(&info->high_label); - __ Bind(&info->pc_rel_label); - // Add a 32-bit offset to PC. - __ Auipc(out, /* placeholder */ 0x1234); - __ Addiu(out, out, /* placeholder */ 0x5678); - } else { - __ Bind(&info->high_label); - __ Lui(out, /* placeholder */ 0x1234); - // We do not bind info->pc_rel_label here, we'll use the assembler's label - // for PC-relative literals and the base from HMipsComputeBaseMethodAddress. - __ Ori(out, out, /* placeholder */ 0x5678); - // Add a 32-bit offset to PC. - __ Addu(out, out, base_or_current_method_reg); - } - __ SetReorder(reordering); + codegen_->EmitPcRelativeAddressPlaceholder(info, out, base_or_current_method_reg); break; } case HLoadClass::LoadKind::kBootImageAddress: { @@ -4732,7 +4747,9 @@ void InstructionCodeGeneratorMIPS::VisitClearException(HClearException* clear AT void LocationsBuilderMIPS::VisitLoadString(HLoadString* load) { LocationSummary::CallKind call_kind = (load->NeedsEnvironment() || kEmitCompilerReadBarrier) - ? LocationSummary::kCallOnSlowPath + ? ((load->GetLoadKind() == HLoadString::LoadKind::kDexCacheViaMethod) + ? LocationSummary::kCallOnMainOnly + : LocationSummary::kCallOnSlowPath) : LocationSummary::kNoCall; LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind); HLoadString::LoadKind load_kind = load->GetLoadKind(); @@ -4741,12 +4758,12 @@ void LocationsBuilderMIPS::VisitLoadString(HLoadString* load) { case HLoadString::LoadKind::kBootImageLinkTimeAddress: case HLoadString::LoadKind::kBootImageAddress: case HLoadString::LoadKind::kBootImageLinkTimePcRelative: + case HLoadString::LoadKind::kBssEntry: if (codegen_->GetInstructionSetFeatures().IsR6()) { break; } FALLTHROUGH_INTENDED; // We need an extra register for PC-relative dex cache accesses. - case HLoadString::LoadKind::kDexCachePcRelative: case HLoadString::LoadKind::kDexCacheViaMethod: locations->SetInAt(0, Location::RequiresRegister()); break; @@ -4768,6 +4785,7 @@ void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) { case HLoadString::LoadKind::kBootImageLinkTimeAddress: case HLoadString::LoadKind::kBootImageAddress: case HLoadString::LoadKind::kBootImageLinkTimePcRelative: + case HLoadString::LoadKind::kBssEntry: base_or_current_method_reg = isR6 ? ZERO : locations->InAt(0).AsRegister<Register>(); break; default: @@ -4785,25 +4803,10 @@ void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) { return; // No dex cache slow path. case HLoadString::LoadKind::kBootImageLinkTimePcRelative: { DCHECK(!kEmitCompilerReadBarrier); + DCHECK(codegen_->GetCompilerOptions().IsBootImage()); CodeGeneratorMIPS::PcRelativePatchInfo* info = codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex()); - bool reordering = __ SetReorder(false); - if (isR6) { - __ Bind(&info->high_label); - __ Bind(&info->pc_rel_label); - // Add a 32-bit offset to PC. - __ Auipc(out, /* placeholder */ 0x1234); - __ Addiu(out, out, /* placeholder */ 0x5678); - } else { - __ Bind(&info->high_label); - __ Lui(out, /* placeholder */ 0x1234); - // We do not bind info->pc_rel_label here, we'll use the assembler's label - // for PC-relative literals and the base from HMipsComputeBaseMethodAddress. - __ Ori(out, out, /* placeholder */ 0x5678); - // Add a 32-bit offset to PC. - __ Addu(out, out, base_or_current_method_reg); - } - __ SetReorder(reordering); + codegen_->EmitPcRelativeAddressPlaceholder(info, out, base_or_current_method_reg); return; // No dex cache slow path. } case HLoadString::LoadKind::kBootImageAddress: { @@ -4815,15 +4818,28 @@ void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) { codegen_->DeduplicateBootImageAddressLiteral(address)); return; // No dex cache slow path. } + case HLoadString::LoadKind::kBssEntry: { + DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); + CodeGeneratorMIPS::PcRelativePatchInfo* info = + codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex()); + codegen_->EmitPcRelativeAddressPlaceholder(info, out, base_or_current_method_reg); + __ LoadFromOffset(kLoadWord, out, out, 0); + SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathMIPS(load); + codegen_->AddSlowPath(slow_path); + __ Beqz(out, slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); + return; + } default: break; } // TODO: Re-add the compiler code to do string dex cache lookup again. - SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathMIPS(load); - codegen_->AddSlowPath(slow_path); - __ B(slow_path->GetEntryLabel()); - __ Bind(slow_path->GetExitLabel()); + DCHECK(load_kind == HLoadString::LoadKind::kDexCacheViaMethod); + InvokeRuntimeCallingConvention calling_convention; + __ LoadConst32(calling_convention.GetRegisterAt(0), load->GetStringIndex()); + codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc()); + CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); } void LocationsBuilderMIPS::VisitLongConstant(HLongConstant* constant) { @@ -6011,25 +6027,8 @@ void InstructionCodeGeneratorMIPS::VisitMipsDexCacheArraysBase(HMipsDexCacheArra Register reg = base->GetLocations()->Out().AsRegister<Register>(); CodeGeneratorMIPS::PcRelativePatchInfo* info = codegen_->NewPcRelativeDexCacheArrayPatch(base->GetDexFile(), base->GetElementOffset()); - bool reordering = __ SetReorder(false); - if (codegen_->GetInstructionSetFeatures().IsR6()) { - __ Bind(&info->high_label); - __ Bind(&info->pc_rel_label); - // Add a 32-bit offset to PC. - __ Auipc(reg, /* placeholder */ 0x1234); - __ Addiu(reg, reg, /* placeholder */ 0x5678); - } else { - // Generate a dummy PC-relative call to obtain PC. - __ Nal(); - __ Bind(&info->high_label); - __ Lui(reg, /* placeholder */ 0x1234); - __ Bind(&info->pc_rel_label); - __ Ori(reg, reg, /* placeholder */ 0x5678); - // Add a 32-bit offset to PC. - __ Addu(reg, reg, RA); - // TODO: Can we share this code with that of VisitMipsComputeBaseMethodAddress()? - } - __ SetReorder(reordering); + // TODO: Reuse MipsComputeBaseMethodAddress on R2 instead of passing ZERO to force emitting NAL. + codegen_->EmitPcRelativeAddressPlaceholder(info, reg, ZERO); } void LocationsBuilderMIPS::VisitInvokeUnresolved(HInvokeUnresolved* invoke) { diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h index f943978b3b..0e8d8d40cf 100644 --- a/compiler/optimizing/code_generator_mips.h +++ b/compiler/optimizing/code_generator_mips.h @@ -435,6 +435,8 @@ class CodeGeneratorMIPS : public CodeGenerator { Literal* DeduplicateBootImageTypeLiteral(const DexFile& dex_file, uint32_t type_index); Literal* DeduplicateBootImageAddressLiteral(uint32_t address); + void EmitPcRelativeAddressPlaceholder(PcRelativePatchInfo* info, Register out, Register base); + private: Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, Register temp); @@ -455,6 +457,10 @@ class CodeGeneratorMIPS : public CodeGenerator { uint32_t offset_or_index, ArenaDeque<PcRelativePatchInfo>* patches); + template <LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)> + void EmitPcRelativeLinkerPatches(const ArenaDeque<PcRelativePatchInfo>& infos, + ArenaVector<LinkerPatch>* linker_patches); + // Labels for each block that will be compiled. MipsLabel* block_labels_; MipsLabel frame_entry_label_; @@ -473,7 +479,7 @@ class CodeGeneratorMIPS : public CodeGenerator { ArenaDeque<PcRelativePatchInfo> pc_relative_dex_cache_patches_; // Deduplication map for boot string literals for kBootImageLinkTimeAddress. BootStringToLiteralMap boot_image_string_patches_; - // PC-relative String patch info. + // PC-relative String patch info; type depends on configuration (app .bss or boot image PIC). ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_; // Deduplication map for boot type literals for kBootImageLinkTimeAddress. BootTypeToLiteralMap boot_image_type_patches_; diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index a7051aeeb1..0b23599665 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -212,6 +212,42 @@ class SuspendCheckSlowPathX86 : public SlowPathCode { DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathX86); }; +class LoadStringSlowPathX86 : public SlowPathCode { + public: + explicit LoadStringSlowPathX86(HLoadString* instruction): SlowPathCode(instruction) {} + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + LocationSummary* locations = instruction_->GetLocations(); + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg())); + + CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen); + __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, locations); + + InvokeRuntimeCallingConvention calling_convention; + const uint32_t string_index = instruction_->AsLoadString()->GetStringIndex(); + __ movl(calling_convention.GetRegisterAt(0), Immediate(string_index)); + x86_codegen->InvokeRuntime(kQuickResolveString, instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); + x86_codegen->Move32(locations->Out(), Location::RegisterLocation(EAX)); + RestoreLiveRegisters(codegen, locations); + + // Store the resolved String to the BSS entry. + Register method_address = locations->InAt(0).AsRegister<Register>(); + __ movl(Address(method_address, CodeGeneratorX86::kDummy32BitOffset), + locations->Out().AsRegister<Register>()); + Label* fixup_label = x86_codegen->NewStringBssEntryPatch(instruction_->AsLoadString()); + __ Bind(fixup_label); + + __ jmp(GetExitLabel()); + } + + const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathX86"; } + + private: + DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathX86); +}; + class LoadClassSlowPathX86 : public SlowPathCode { public: LoadClassSlowPathX86(HLoadClass* cls, @@ -4294,7 +4330,8 @@ Location CodeGeneratorX86::GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticO break; case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup: __ movl(temp.AsRegister<Register>(), Immediate(/* placeholder */ 0)); - method_patches_.emplace_back(invoke->GetTargetMethod()); + method_patches_.emplace_back(*invoke->GetTargetMethod().dex_file, + invoke->GetTargetMethod().dex_method_index); __ Bind(&method_patches_.back().label); // Bind the label at the end of the "movl" insn. break; case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: { @@ -4339,7 +4376,8 @@ void CodeGeneratorX86::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, __ call(GetFrameEntryLabel()); break; case HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative: { - relative_call_patches_.emplace_back(invoke->GetTargetMethod()); + relative_call_patches_.emplace_back(*invoke->GetTargetMethod().dex_file, + invoke->GetTargetMethod().dex_method_index); Label* label = &relative_call_patches_.back().label; __ call(label); // Bind to the patch label, override at link time. __ Bind(label); // Bind the label at the end of the "call" insn. @@ -4398,7 +4436,8 @@ void CodeGeneratorX86::RecordSimplePatch() { } } -void CodeGeneratorX86::RecordStringPatch(HLoadString* load_string) { +void CodeGeneratorX86::RecordBootStringPatch(HLoadString* load_string) { + DCHECK(GetCompilerOptions().IsBootImage()); string_patches_.emplace_back(load_string->GetDexFile(), load_string->GetStringIndex()); __ Bind(&string_patches_.back().label); } @@ -4408,6 +4447,12 @@ void CodeGeneratorX86::RecordTypePatch(HLoadClass* load_class) { __ Bind(&type_patches_.back().label); } +Label* CodeGeneratorX86::NewStringBssEntryPatch(HLoadString* load_string) { + DCHECK(!GetCompilerOptions().IsBootImage()); + string_patches_.emplace_back(load_string->GetDexFile(), load_string->GetStringIndex()); + return &string_patches_.back().label; +} + Label* CodeGeneratorX86::NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file, uint32_t element_offset) { // Add the patch entry and bind its label at the end of the instruction. @@ -4415,6 +4460,21 @@ Label* CodeGeneratorX86::NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file return &pc_relative_dex_cache_patches_.back().label; } +// The label points to the end of the "movl" or another instruction but the literal offset +// for method patch needs to point to the embedded constant which occupies the last 4 bytes. +constexpr uint32_t kLabelPositionToLiteralOffsetAdjustment = 4u; + +template <LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)> +inline void CodeGeneratorX86::EmitPcRelativeLinkerPatches( + const ArenaDeque<PatchInfo<Label>>& infos, + ArenaVector<LinkerPatch>* linker_patches) { + for (const PatchInfo<Label>& info : infos) { + uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment; + linker_patches->push_back( + Factory(literal_offset, &info.dex_file, GetMethodAddressOffset(), info.index)); + } +} + void CodeGeneratorX86::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) { DCHECK(linker_patches->empty()); size_t size = @@ -4425,59 +4485,38 @@ void CodeGeneratorX86::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patche string_patches_.size() + type_patches_.size(); linker_patches->reserve(size); - // The label points to the end of the "movl" insn but the literal offset for method - // patch needs to point to the embedded constant which occupies the last 4 bytes. - constexpr uint32_t kLabelPositionToLiteralOffsetAdjustment = 4u; - for (const MethodPatchInfo<Label>& info : method_patches_) { - uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment; - linker_patches->push_back(LinkerPatch::MethodPatch(literal_offset, - info.target_method.dex_file, - info.target_method.dex_method_index)); - } - for (const MethodPatchInfo<Label>& info : relative_call_patches_) { + for (const PatchInfo<Label>& info : method_patches_) { uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment; - linker_patches->push_back(LinkerPatch::RelativeCodePatch(literal_offset, - info.target_method.dex_file, - info.target_method.dex_method_index)); + linker_patches->push_back(LinkerPatch::MethodPatch(literal_offset, &info.dex_file, info.index)); } - for (const PcRelativeDexCacheAccessInfo& info : pc_relative_dex_cache_patches_) { + for (const PatchInfo<Label>& info : relative_call_patches_) { uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment; - linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(literal_offset, - &info.target_dex_file, - GetMethodAddressOffset(), - info.element_offset)); + linker_patches->push_back( + LinkerPatch::RelativeCodePatch(literal_offset, &info.dex_file, info.index)); } + EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_, + linker_patches); for (const Label& label : simple_patches_) { uint32_t literal_offset = label.Position() - kLabelPositionToLiteralOffsetAdjustment; linker_patches->push_back(LinkerPatch::RecordPosition(literal_offset)); } - if (GetCompilerOptions().GetCompilePic()) { - for (const StringPatchInfo<Label>& info : string_patches_) { - uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment; - linker_patches->push_back(LinkerPatch::RelativeStringPatch(literal_offset, - &info.dex_file, - GetMethodAddressOffset(), - info.string_index)); - } - for (const TypePatchInfo<Label>& info : type_patches_) { - uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment; - linker_patches->push_back(LinkerPatch::RelativeTypePatch(literal_offset, - &info.dex_file, - GetMethodAddressOffset(), - info.type_index)); - } + if (!GetCompilerOptions().IsBootImage()) { + EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(string_patches_, linker_patches); + } else if (GetCompilerOptions().GetCompilePic()) { + EmitPcRelativeLinkerPatches<LinkerPatch::RelativeStringPatch>(string_patches_, linker_patches); } else { - for (const StringPatchInfo<Label>& info : string_patches_) { + for (const PatchInfo<Label>& info : string_patches_) { uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment; - linker_patches->push_back(LinkerPatch::StringPatch(literal_offset, - &info.dex_file, - info.string_index)); + linker_patches->push_back( + LinkerPatch::StringPatch(literal_offset, &info.dex_file, info.index)); } - for (const TypePatchInfo<Label>& info : type_patches_) { + } + if (GetCompilerOptions().GetCompilePic()) { + EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(type_patches_, linker_patches); + } else { + for (const PatchInfo<Label>& info : type_patches_) { uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment; - linker_patches->push_back(LinkerPatch::TypePatch(literal_offset, - &info.dex_file, - info.type_index)); + linker_patches->push_back(LinkerPatch::TypePatch(literal_offset, &info.dex_file, info.index)); } } } @@ -5991,7 +6030,7 @@ HLoadString::LoadKind CodeGeneratorX86::GetSupportedLoadStringKind( case HLoadString::LoadKind::kBootImageLinkTimePcRelative: DCHECK(GetCompilerOptions().GetCompilePic()); FALLTHROUGH_INTENDED; - case HLoadString::LoadKind::kDexCachePcRelative: + case HLoadString::LoadKind::kBssEntry: DCHECK(!Runtime::Current()->UseJitCompilation()); // Note: boot image is also non-JIT. // We disable pc-relative load when there is an irreducible loop, as the optimization // is incompatible with it. @@ -6014,13 +6053,15 @@ HLoadString::LoadKind CodeGeneratorX86::GetSupportedLoadStringKind( void LocationsBuilderX86::VisitLoadString(HLoadString* load) { LocationSummary::CallKind call_kind = (load->NeedsEnvironment() || kEmitCompilerReadBarrier) - ? LocationSummary::kCallOnMainOnly + ? ((load->GetLoadKind() == HLoadString::LoadKind::kDexCacheViaMethod) + ? LocationSummary::kCallOnMainOnly + : LocationSummary::kCallOnSlowPath) : LocationSummary::kNoCall; LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind); HLoadString::LoadKind load_kind = load->GetLoadKind(); if (load_kind == HLoadString::LoadKind::kDexCacheViaMethod || load_kind == HLoadString::LoadKind::kBootImageLinkTimePcRelative || - load_kind == HLoadString::LoadKind::kDexCachePcRelative) { + load_kind == HLoadString::LoadKind::kBssEntry) { locations->SetInAt(0, Location::RequiresRegister()); } if (load_kind == HLoadString::LoadKind::kDexCacheViaMethod) { @@ -6038,13 +6079,13 @@ void InstructionCodeGeneratorX86::VisitLoadString(HLoadString* load) { switch (load->GetLoadKind()) { case HLoadString::LoadKind::kBootImageLinkTimeAddress: { __ movl(out, Immediate(/* placeholder */ 0)); - codegen_->RecordStringPatch(load); + codegen_->RecordBootStringPatch(load); return; // No dex cache slow path. } case HLoadString::LoadKind::kBootImageLinkTimePcRelative: { Register method_address = locations->InAt(0).AsRegister<Register>(); __ leal(out, Address(method_address, CodeGeneratorX86::kDummy32BitOffset)); - codegen_->RecordStringPatch(load); + codegen_->RecordBootStringPatch(load); return; // No dex cache slow path. } case HLoadString::LoadKind::kBootImageAddress: { @@ -6054,6 +6095,19 @@ void InstructionCodeGeneratorX86::VisitLoadString(HLoadString* load) { codegen_->RecordSimplePatch(); return; // No dex cache slow path. } + case HLoadString::LoadKind::kBssEntry: { + Register method_address = locations->InAt(0).AsRegister<Register>(); + Address address = Address(method_address, CodeGeneratorX86::kDummy32BitOffset); + Label* fixup_label = codegen_->NewStringBssEntryPatch(load); + // /* GcRoot<mirror::Class> */ out = *address /* PC-relative */ + GenerateGcRootFieldLoad(load, out_loc, address, fixup_label); + SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathX86(load); + codegen_->AddSlowPath(slow_path); + __ testl(out, out); + __ j(kEqual, slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); + return; + } default: break; } diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index 1bd28da178..25f5c2a58f 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -411,8 +411,9 @@ class CodeGeneratorX86 : public CodeGenerator { void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE; void RecordSimplePatch(); - void RecordStringPatch(HLoadString* load_string); + void RecordBootStringPatch(HLoadString* load_string); void RecordTypePatch(HLoadClass* load_class); + Label* NewStringBssEntryPatch(HLoadString* load_string); Label* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file, uint32_t element_offset); void MoveFromReturnRegister(Location trg, Primitive::Type type) OVERRIDE; @@ -580,15 +581,9 @@ class CodeGeneratorX86 : public CodeGenerator { private: Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, Register temp); - struct PcRelativeDexCacheAccessInfo { - PcRelativeDexCacheAccessInfo(const DexFile& dex_file, uint32_t element_off) - : target_dex_file(dex_file), element_offset(element_off), label() { } - - const DexFile& target_dex_file; - uint32_t element_offset; - // NOTE: Label is bound to the end of the instruction that has an embedded 32-bit offset. - Label label; - }; + template <LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)> + void EmitPcRelativeLinkerPatches(const ArenaDeque<PatchInfo<Label>>& infos, + ArenaVector<LinkerPatch>* linker_patches); // Labels for each block that will be compiled. Label* block_labels_; // Indexed by block id. @@ -600,16 +595,16 @@ class CodeGeneratorX86 : public CodeGenerator { const X86InstructionSetFeatures& isa_features_; // Method patch info. Using ArenaDeque<> which retains element addresses on push/emplace_back(). - ArenaDeque<MethodPatchInfo<Label>> method_patches_; - ArenaDeque<MethodPatchInfo<Label>> relative_call_patches_; + ArenaDeque<PatchInfo<Label>> method_patches_; + ArenaDeque<PatchInfo<Label>> relative_call_patches_; // PC-relative DexCache access info. - ArenaDeque<PcRelativeDexCacheAccessInfo> pc_relative_dex_cache_patches_; + ArenaDeque<PatchInfo<Label>> pc_relative_dex_cache_patches_; // Patch locations for patchoat where the linker doesn't do any other work. ArenaDeque<Label> simple_patches_; - // String patch locations. - ArenaDeque<StringPatchInfo<Label>> string_patches_; + // String patch locations; type depends on configuration (app .bss or boot image PIC/non-PIC). + ArenaDeque<PatchInfo<Label>> string_patches_; // Type patch locations. - ArenaDeque<TypePatchInfo<Label>> type_patches_; + ArenaDeque<PatchInfo<Label>> type_patches_; // Offset to the start of the constant area in the assembled code. // Used for fixups to the constant area. diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index b243ee0c59..28638d721d 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -287,6 +287,44 @@ class LoadClassSlowPathX86_64 : public SlowPathCode { DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathX86_64); }; +class LoadStringSlowPathX86_64 : public SlowPathCode { + public: + explicit LoadStringSlowPathX86_64(HLoadString* instruction) : SlowPathCode(instruction) {} + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + LocationSummary* locations = instruction_->GetLocations(); + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg())); + + CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); + __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, locations); + + InvokeRuntimeCallingConvention calling_convention; + const uint32_t string_index = instruction_->AsLoadString()->GetStringIndex(); + __ movl(CpuRegister(calling_convention.GetRegisterAt(0)), Immediate(string_index)); + x86_64_codegen->InvokeRuntime(kQuickResolveString, + instruction_, + instruction_->GetDexPc(), + this); + CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); + x86_64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX)); + RestoreLiveRegisters(codegen, locations); + + // Store the resolved String to the BSS entry. + __ movl(Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false), + locations->Out().AsRegister<CpuRegister>()); + Label* fixup_label = x86_64_codegen->NewStringBssEntryPatch(instruction_->AsLoadString()); + __ Bind(fixup_label); + + __ jmp(GetExitLabel()); + } + + const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathX86_64"; } + + private: + DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathX86_64); +}; + class TypeCheckSlowPathX86_64 : public SlowPathCode { public: TypeCheckSlowPathX86_64(HInstruction* instruction, bool is_fatal) @@ -772,7 +810,8 @@ Location CodeGeneratorX86_64::GenerateCalleeMethodStaticOrDirectCall(HInvokeStat break; case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup: __ movl(temp.AsRegister<CpuRegister>(), Immediate(0)); // Placeholder. - method_patches_.emplace_back(invoke->GetTargetMethod()); + method_patches_.emplace_back(*invoke->GetTargetMethod().dex_file, + invoke->GetTargetMethod().dex_method_index); __ Bind(&method_patches_.back().label); // Bind the label at the end of the "movl" insn. break; case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: { @@ -819,7 +858,8 @@ void CodeGeneratorX86_64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invo __ call(&frame_entry_label_); break; case HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative: { - relative_call_patches_.emplace_back(invoke->GetTargetMethod()); + relative_call_patches_.emplace_back(*invoke->GetTargetMethod().dex_file, + invoke->GetTargetMethod().dex_method_index); Label* label = &relative_call_patches_.back().label; __ call(label); // Bind to the patch label, override at link time. __ Bind(label); // Bind the label at the end of the "call" insn. @@ -879,7 +919,8 @@ void CodeGeneratorX86_64::RecordSimplePatch() { } } -void CodeGeneratorX86_64::RecordStringPatch(HLoadString* load_string) { +void CodeGeneratorX86_64::RecordBootStringPatch(HLoadString* load_string) { + DCHECK(GetCompilerOptions().IsBootImage()); string_patches_.emplace_back(load_string->GetDexFile(), load_string->GetStringIndex()); __ Bind(&string_patches_.back().label); } @@ -889,6 +930,12 @@ void CodeGeneratorX86_64::RecordTypePatch(HLoadClass* load_class) { __ Bind(&type_patches_.back().label); } +Label* CodeGeneratorX86_64::NewStringBssEntryPatch(HLoadString* load_string) { + DCHECK(!GetCompilerOptions().IsBootImage()); + string_patches_.emplace_back(load_string->GetDexFile(), load_string->GetStringIndex()); + return &string_patches_.back().label; +} + Label* CodeGeneratorX86_64::NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file, uint32_t element_offset) { // Add a patch entry and return the label. @@ -896,6 +943,21 @@ Label* CodeGeneratorX86_64::NewPcRelativeDexCacheArrayPatch(const DexFile& dex_f return &pc_relative_dex_cache_patches_.back().label; } +// The label points to the end of the "movl" or another instruction but the literal offset +// for method patch needs to point to the embedded constant which occupies the last 4 bytes. +constexpr uint32_t kLabelPositionToLiteralOffsetAdjustment = 4u; + +template <LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)> +inline void CodeGeneratorX86_64::EmitPcRelativeLinkerPatches( + const ArenaDeque<PatchInfo<Label>>& infos, + ArenaVector<LinkerPatch>* linker_patches) { + for (const PatchInfo<Label>& info : infos) { + uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment; + linker_patches->push_back( + Factory(literal_offset, &info.dex_file, info.label.Position(), info.index)); + } +} + void CodeGeneratorX86_64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) { DCHECK(linker_patches->empty()); size_t size = @@ -906,48 +968,29 @@ void CodeGeneratorX86_64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_pat string_patches_.size() + type_patches_.size(); linker_patches->reserve(size); - // The label points to the end of the "movl" insn but the literal offset for method - // patch needs to point to the embedded constant which occupies the last 4 bytes. - constexpr uint32_t kLabelPositionToLiteralOffsetAdjustment = 4u; - for (const MethodPatchInfo<Label>& info : method_patches_) { - uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment; - linker_patches->push_back(LinkerPatch::MethodPatch(literal_offset, - info.target_method.dex_file, - info.target_method.dex_method_index)); - } - for (const MethodPatchInfo<Label>& info : relative_call_patches_) { + for (const PatchInfo<Label>& info : method_patches_) { uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment; - linker_patches->push_back(LinkerPatch::RelativeCodePatch(literal_offset, - info.target_method.dex_file, - info.target_method.dex_method_index)); + linker_patches->push_back(LinkerPatch::MethodPatch(literal_offset, &info.dex_file, info.index)); } - for (const PcRelativeDexCacheAccessInfo& info : pc_relative_dex_cache_patches_) { + for (const PatchInfo<Label>& info : relative_call_patches_) { uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment; - linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(literal_offset, - &info.target_dex_file, - info.label.Position(), - info.element_offset)); + linker_patches->push_back( + LinkerPatch::RelativeCodePatch(literal_offset, &info.dex_file, info.index)); } + EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_, + linker_patches); for (const Label& label : simple_patches_) { uint32_t literal_offset = label.Position() - kLabelPositionToLiteralOffsetAdjustment; linker_patches->push_back(LinkerPatch::RecordPosition(literal_offset)); } - for (const StringPatchInfo<Label>& info : string_patches_) { + if (!GetCompilerOptions().IsBootImage()) { + EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(string_patches_, linker_patches); + } else { // These are always PC-relative, see GetSupportedLoadStringKind(). - uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment; - linker_patches->push_back(LinkerPatch::RelativeStringPatch(literal_offset, - &info.dex_file, - info.label.Position(), - info.string_index)); - } - for (const TypePatchInfo<Label>& info : type_patches_) { - // These are always PC-relative, see GetSupportedLoadClassKind(). - uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment; - linker_patches->push_back(LinkerPatch::RelativeTypePatch(literal_offset, - &info.dex_file, - info.label.Position(), - info.type_index)); + EmitPcRelativeLinkerPatches<LinkerPatch::RelativeStringPatch>(string_patches_, linker_patches); } + // These are always PC-relative, see GetSupportedLoadClassKind(). + EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(type_patches_, linker_patches); } void CodeGeneratorX86_64::DumpCoreRegister(std::ostream& stream, int reg) const { @@ -5390,7 +5433,7 @@ HLoadString::LoadKind CodeGeneratorX86_64::GetSupportedLoadStringKind( case HLoadString::LoadKind::kDexCacheAddress: DCHECK(Runtime::Current()->UseJitCompilation()); break; - case HLoadString::LoadKind::kDexCachePcRelative: + case HLoadString::LoadKind::kBssEntry: DCHECK(!Runtime::Current()->UseJitCompilation()); break; case HLoadString::LoadKind::kDexCacheViaMethod: @@ -5401,7 +5444,9 @@ HLoadString::LoadKind CodeGeneratorX86_64::GetSupportedLoadStringKind( void LocationsBuilderX86_64::VisitLoadString(HLoadString* load) { LocationSummary::CallKind call_kind = load->NeedsEnvironment() - ? LocationSummary::kCallOnMainOnly + ? ((load->GetLoadKind() == HLoadString::LoadKind::kDexCacheViaMethod) + ? LocationSummary::kCallOnMainOnly + : LocationSummary::kCallOnSlowPath) : LocationSummary::kNoCall; LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind); if (load->GetLoadKind() == HLoadString::LoadKind::kDexCacheViaMethod) { @@ -5420,7 +5465,7 @@ void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) { switch (load->GetLoadKind()) { case HLoadString::LoadKind::kBootImageLinkTimePcRelative: { __ leal(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false)); - codegen_->RecordStringPatch(load); + codegen_->RecordBootStringPatch(load); return; // No dex cache slow path. } case HLoadString::LoadKind::kBootImageAddress: { @@ -5430,6 +5475,19 @@ void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) { codegen_->RecordSimplePatch(); return; // No dex cache slow path. } + case HLoadString::LoadKind::kBssEntry: { + Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, + /* no_rip */ false); + Label* fixup_label = codegen_->NewStringBssEntryPatch(load); + // /* GcRoot<mirror::Class> */ out = *address /* PC-relative */ + GenerateGcRootFieldLoad(load, out_loc, address, fixup_label); + SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathX86_64(load); + codegen_->AddSlowPath(slow_path); + __ testl(out, out); + __ j(kEqual, slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); + return; + } default: break; } diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index 8dec44eb03..57ef83f621 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -406,8 +406,9 @@ class CodeGeneratorX86_64 : public CodeGenerator { void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE; void RecordSimplePatch(); - void RecordStringPatch(HLoadString* load_string); + void RecordBootStringPatch(HLoadString* load_string); void RecordTypePatch(HLoadClass* load_class); + Label* NewStringBssEntryPatch(HLoadString* load_string); Label* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file, uint32_t element_offset); void MoveFromReturnRegister(Location trg, Primitive::Type type) OVERRIDE; @@ -555,14 +556,9 @@ class CodeGeneratorX86_64 : public CodeGenerator { static constexpr int32_t kDummy32BitOffset = 256; private: - struct PcRelativeDexCacheAccessInfo { - PcRelativeDexCacheAccessInfo(const DexFile& dex_file, uint32_t element_off) - : target_dex_file(dex_file), element_offset(element_off), label() { } - - const DexFile& target_dex_file; - uint32_t element_offset; - Label label; - }; + template <LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)> + static void EmitPcRelativeLinkerPatches(const ArenaDeque<PatchInfo<Label>>& infos, + ArenaVector<LinkerPatch>* linker_patches); // Labels for each block that will be compiled. Label* block_labels_; // Indexed by block id. @@ -578,16 +574,16 @@ class CodeGeneratorX86_64 : public CodeGenerator { int constant_area_start_; // Method patch info. Using ArenaDeque<> which retains element addresses on push/emplace_back(). - ArenaDeque<MethodPatchInfo<Label>> method_patches_; - ArenaDeque<MethodPatchInfo<Label>> relative_call_patches_; + ArenaDeque<PatchInfo<Label>> method_patches_; + ArenaDeque<PatchInfo<Label>> relative_call_patches_; // PC-relative DexCache access info. - ArenaDeque<PcRelativeDexCacheAccessInfo> pc_relative_dex_cache_patches_; + ArenaDeque<PatchInfo<Label>> pc_relative_dex_cache_patches_; // Patch locations for patchoat where the linker doesn't do any other work. ArenaDeque<Label> simple_patches_; - // String patch locations. - ArenaDeque<StringPatchInfo<Label>> string_patches_; + // String patch locations; type depends on configuration (app .bss or boot image PIC). + ArenaDeque<PatchInfo<Label>> string_patches_; // Type patch locations. - ArenaDeque<TypePatchInfo<Label>> type_patches_; + ArenaDeque<PatchInfo<Label>> type_patches_; // Fixups for jump tables need to be handled specially. ArenaVector<JumpTableRIPFixup*> fixups_to_jump_tables_; diff --git a/compiler/optimizing/dex_cache_array_fixups_arm.cc b/compiler/optimizing/dex_cache_array_fixups_arm.cc index 7010171c80..82b81238ab 100644 --- a/compiler/optimizing/dex_cache_array_fixups_arm.cc +++ b/compiler/optimizing/dex_cache_array_fixups_arm.cc @@ -62,21 +62,6 @@ class DexCacheArrayFixupsVisitor : public HGraphVisitor { } } - void VisitLoadString(HLoadString* load_string) OVERRIDE { - // If this is a load with PC-relative access to the dex cache strings array, - // we need to add the dex cache arrays base as the special input. - if (load_string->GetLoadKind() == HLoadString::LoadKind::kDexCachePcRelative) { - // Initialize base for target dex file if needed. - const DexFile& dex_file = load_string->GetDexFile(); - HArmDexCacheArraysBase* base = GetOrCreateDexCacheArrayBase(dex_file); - // Update the element offset in base. - DexCacheArraysLayout layout(kArmPointerSize, &dex_file); - base->UpdateElementOffset(layout.StringOffset(load_string->GetStringIndex())); - // Add the special argument base to the load. - load_string->AddSpecialInput(base); - } - } - void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE { // If this is an invoke with PC-relative access to the dex cache methods array, // we need to add the dex cache arrays base as the special input. diff --git a/compiler/optimizing/dex_cache_array_fixups_mips.cc b/compiler/optimizing/dex_cache_array_fixups_mips.cc index 4456b49e87..31fff26dd5 100644 --- a/compiler/optimizing/dex_cache_array_fixups_mips.cc +++ b/compiler/optimizing/dex_cache_array_fixups_mips.cc @@ -68,21 +68,6 @@ class DexCacheArrayFixupsVisitor : public HGraphVisitor { } } - void VisitLoadString(HLoadString* load_string) OVERRIDE { - // If this is a load with PC-relative access to the dex cache strings array, - // we need to add the dex cache arrays base as the special input. - if (load_string->GetLoadKind() == HLoadString::LoadKind::kDexCachePcRelative) { - // Initialize base for target dex file if needed. - const DexFile& dex_file = load_string->GetDexFile(); - HMipsDexCacheArraysBase* base = GetOrCreateDexCacheArrayBase(dex_file); - // Update the element offset in base. - DexCacheArraysLayout layout(kMipsPointerSize, &dex_file); - base->UpdateElementOffset(layout.StringOffset(load_string->GetStringIndex())); - // Add the special argument base to the load. - load_string->AddSpecialInput(base); - } - } - void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE { // If this is an invoke with PC-relative access to the dex cache methods array, // we need to add the dex cache arrays base as the special input. diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc new file mode 100644 index 0000000000..7dfa4f160b --- /dev/null +++ b/compiler/optimizing/loop_optimization.cc @@ -0,0 +1,317 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "loop_optimization.h" + +#include "base/arena_containers.h" +#include "induction_var_range.h" +#include "ssa_liveness_analysis.h" +#include "nodes.h" + +namespace art { + +// TODO: Generalize to cycles, as found by induction analysis? +static bool IsPhiAddSub(HPhi* phi, /*out*/ HInstruction** addsub_out) { + HInputsRef inputs = phi->GetInputs(); + if (inputs.size() == 2 && (inputs[1]->IsAdd() || inputs[1]->IsSub())) { + HInstruction* addsub = inputs[1]; + if (addsub->InputAt(0) == phi || addsub->InputAt(1) == phi) { + if (addsub->GetUses().HasExactlyOneElement()) { + *addsub_out = addsub; + return true; + } + } + } + return false; +} + +static bool IsOnlyUsedAfterLoop(const HLoopInformation& loop_info, + HPhi* phi, HInstruction* addsub) { + for (const HUseListNode<HInstruction*>& use : phi->GetUses()) { + if (use.GetUser() != addsub) { + HLoopInformation* other_loop_info = use.GetUser()->GetBlock()->GetLoopInformation(); + if (other_loop_info != nullptr && other_loop_info->IsIn(loop_info)) { + return false; + } + } + } + return true; +} + +// Find: phi: Phi(init, addsub) +// s: SuspendCheck +// c: Condition(phi, bound) +// i: If(c) +// TODO: Find a less pattern matching approach? +static bool IsEmptyHeader(HBasicBlock* block, /*out*/ HInstruction** addsub) { + HInstruction* phi = block->GetFirstPhi(); + if (phi != nullptr && phi->GetNext() == nullptr && IsPhiAddSub(phi->AsPhi(), addsub)) { + HInstruction* s = block->GetFirstInstruction(); + if (s != nullptr && s->IsSuspendCheck()) { + HInstruction* c = s->GetNext(); + if (c != nullptr && c->IsCondition() && c->GetUses().HasExactlyOneElement()) { + HInstruction* i = c->GetNext(); + if (i != nullptr && i->IsIf() && i->InputAt(0) == c) { + // Check that phi is only used inside loop as expected. + for (const HUseListNode<HInstruction*>& use : phi->GetUses()) { + if (use.GetUser() != *addsub && use.GetUser() != c) { + return false; + } + } + return true; + } + } + } + } + return false; +} + +static bool IsEmptyBody(HBasicBlock* block, HInstruction* addsub) { + HInstruction* phi = block->GetFirstPhi(); + HInstruction* i = block->GetFirstInstruction(); + return phi == nullptr && i == addsub && i->GetNext() != nullptr && i->GetNext()->IsGoto(); +} + +static HBasicBlock* TryRemovePreHeader(HBasicBlock* preheader, HBasicBlock* entry_block) { + if (preheader->GetPredecessors().size() == 1) { + HBasicBlock* entry = preheader->GetSinglePredecessor(); + HInstruction* anchor = entry->GetLastInstruction(); + // If the pre-header has a single predecessor we can remove it too if + // either the pre-header just contains a goto, or if the predecessor + // is not the entry block so we can push instructions backward + // (moving computation into the entry block is too dangerous!). + if (preheader->GetFirstInstruction() == nullptr || + preheader->GetFirstInstruction()->IsGoto() || + (entry != entry_block && anchor->IsGoto())) { + // Push non-goto statements backward to empty the pre-header. + for (HInstructionIterator it(preheader->GetInstructions()); !it.Done(); it.Advance()) { + HInstruction* instruction = it.Current(); + if (!instruction->IsGoto()) { + if (!instruction->CanBeMoved()) { + return nullptr; // pushing failed to move all + } + it.Current()->MoveBefore(anchor); + } + } + return entry; + } + } + return nullptr; +} + +static void RemoveFromCycle(HInstruction* instruction) { + // A bit more elaborate than the usual instruction removal, + // since there may be a cycle in the use structure. + instruction->RemoveAsUserOfAllInputs(); + instruction->RemoveEnvironmentUsers(); + instruction->GetBlock()->RemoveInstructionOrPhi(instruction, /*ensure_safety=*/ false); +} + +// +// Class methods. +// + +HLoopOptimization::HLoopOptimization(HGraph* graph, + HInductionVarAnalysis* induction_analysis) + : HOptimization(graph, kLoopOptimizationPassName), + induction_range_(induction_analysis), + loop_allocator_(graph_->GetArena()->GetArenaPool()), // phase-local allocator on global pool + top_loop_(nullptr), + last_loop_(nullptr) { +} + +void HLoopOptimization::Run() { + // Well-behaved loops only. + // TODO: make this less of a sledgehammer. + if (graph_-> HasTryCatch() || graph_->HasIrreducibleLoops()) { + return; + } + + // Build the linear order. This step enables building a loop hierarchy that + // properly reflects the outer-inner and previous-next relation. + graph_->Linearize(); + // Build the loop hierarchy. + for (HLinearOrderIterator it_graph(*graph_); !it_graph.Done(); it_graph.Advance()) { + HBasicBlock* block = it_graph.Current(); + if (block->IsLoopHeader()) { + AddLoop(block->GetLoopInformation()); + } + } + if (top_loop_ == nullptr) { + return; // no loops + } + // Traverse the loop hierarchy inner-to-outer and optimize. + TraverseLoopsInnerToOuter(top_loop_); +} + +void HLoopOptimization::AddLoop(HLoopInformation* loop_info) { + DCHECK(loop_info != nullptr); + LoopNode* node = new (&loop_allocator_) LoopNode(loop_info); // phase-local allocator + if (last_loop_ == nullptr) { + // First loop. + DCHECK(top_loop_ == nullptr); + last_loop_ = top_loop_ = node; + } else if (loop_info->IsIn(*last_loop_->loop_info)) { + // Inner loop. + node->outer = last_loop_; + DCHECK(last_loop_->inner == nullptr); + last_loop_ = last_loop_->inner = node; + } else { + // Subsequent loop. + while (last_loop_->outer != nullptr && !loop_info->IsIn(*last_loop_->outer->loop_info)) { + last_loop_ = last_loop_->outer; + } + node->outer = last_loop_->outer; + node->previous = last_loop_; + DCHECK(last_loop_->next == nullptr); + last_loop_ = last_loop_->next = node; + } +} + +void HLoopOptimization::RemoveLoop(LoopNode* node) { + DCHECK(node != nullptr); + // TODO: implement when needed (for current set of optimizations, we don't + // need to keep recorded loop hierarchy up to date, but as we get different + // traversal, we may want to remove the node from the hierarchy here. +} + +void HLoopOptimization::TraverseLoopsInnerToOuter(LoopNode* node) { + for ( ; node != nullptr; node = node->next) { + if (node->inner != nullptr) { + TraverseLoopsInnerToOuter(node->inner); + } + // Visit loop after its inner loops have been visited. + SimplifyInduction(node); + RemoveIfEmptyLoop(node); + } +} + +void HLoopOptimization::SimplifyInduction(LoopNode* node) { + HBasicBlock* header = node->loop_info->GetHeader(); + HBasicBlock* preheader = node->loop_info->GetPreHeader(); + // Scan the phis in the header to find opportunities to optimize induction. + for (HInstructionIterator it(header->GetPhis()); !it.Done(); it.Advance()) { + HPhi* phi = it.Current()->AsPhi(); + HInstruction* addsub = nullptr; + // Find phi-add/sub cycle. + if (IsPhiAddSub(phi, &addsub)) { + // Simple case, the induction is only used by itself. Although redundant, + // later phases do not easily detect this property. Thus, eliminate here. + // Example: for (int i = 0; x != null; i++) { .... no i .... } + if (phi->GetUses().HasExactlyOneElement()) { + // Remove the cycle, including all uses. Even environment uses can be removed, + // since these computations have no effect at all. + RemoveFromCycle(phi); // removes environment uses too + RemoveFromCycle(addsub); + continue; + } + // Closed form case. Only the last value of the induction is needed. Remove all + // overhead from the loop, and replace subsequent uses with the last value. + // Example: for (int i = 0; i < 10; i++, k++) { .... no k .... } return k; + if (IsOnlyUsedAfterLoop(*node->loop_info, phi, addsub) && + induction_range_.CanGenerateLastValue(phi)) { + HInstruction* last = induction_range_.GenerateLastValue(phi, graph_, preheader); + // Remove the cycle, replacing all uses. Even environment uses can consume the final + // value, since any first real use is outside the loop (although this may imply + // that deopting may look "ahead" a bit on the phi value). + ReplaceAllUses(phi, last, addsub); + RemoveFromCycle(phi); // removes environment uses too + RemoveFromCycle(addsub); + } + } + } +} + +void HLoopOptimization::RemoveIfEmptyLoop(LoopNode* node) { + HBasicBlock* header = node->loop_info->GetHeader(); + HBasicBlock* preheader = node->loop_info->GetPreHeader(); + // Ensure there is only a single loop-body (besides the header). + HBasicBlock* body = nullptr; + for (HBlocksInLoopIterator it(*node->loop_info); !it.Done(); it.Advance()) { + if (it.Current() != header) { + if (body != nullptr) { + return; + } + body = it.Current(); + } + } + // Ensure there is only a single exit point. + if (header->GetSuccessors().size() != 2) { + return; + } + HBasicBlock* exit = (header->GetSuccessors()[0] == body) + ? header->GetSuccessors()[1] + : header->GetSuccessors()[0]; + // Ensure exit can only be reached by exiting loop (this seems typically the + // case anyway, and simplifies code generation below; TODO: perhaps relax?). + if (exit->GetPredecessors().size() != 1) { + return; + } + // Detect an empty loop: no side effects other than plain iteration. + HInstruction* addsub = nullptr; + if (IsEmptyHeader(header, &addsub) && IsEmptyBody(body, addsub)) { + HBasicBlock* entry = TryRemovePreHeader(preheader, graph_->GetEntryBlock()); + body->DisconnectAndDelete(); + exit->RemovePredecessor(header); + header->RemoveSuccessor(exit); + header->ClearDominanceInformation(); + header->SetDominator(preheader); // needed by next disconnect. + header->DisconnectAndDelete(); + // If allowed, remove preheader too, which may expose next outer empty loop + // Otherwise, link preheader directly to exit to restore the flow graph. + if (entry != nullptr) { + entry->ReplaceSuccessor(preheader, exit); + entry->AddDominatedBlock(exit); + exit->SetDominator(entry); + preheader->DisconnectAndDelete(); + } else { + preheader->AddSuccessor(exit); + preheader->AddInstruction(new (graph_->GetArena()) HGoto()); // global allocator + preheader->AddDominatedBlock(exit); + exit->SetDominator(preheader); + } + // Update hierarchy. + RemoveLoop(node); + } +} + +void HLoopOptimization::ReplaceAllUses(HInstruction* instruction, + HInstruction* replacement, + HInstruction* exclusion) { + const HUseList<HInstruction*>& uses = instruction->GetUses(); + for (auto it = uses.begin(), end = uses.end(); it != end;) { + HInstruction* user = it->GetUser(); + size_t index = it->GetIndex(); + ++it; // increment before replacing + if (user != exclusion) { + user->ReplaceInput(replacement, index); + induction_range_.Replace(user, instruction, replacement); // update induction + } + } + const HUseList<HEnvironment*>& env_uses = instruction->GetEnvUses(); + for (auto it = env_uses.begin(), end = env_uses.end(); it != end;) { + HEnvironment* user = it->GetUser(); + size_t index = it->GetIndex(); + ++it; // increment before replacing + if (user->GetHolder() != exclusion) { + user->RemoveAsUserOfInput(index); + user->SetRawEnvAt(index, replacement); + replacement->AddEnvUseAt(user, index); + } + } +} + +} // namespace art diff --git a/compiler/optimizing/loop_optimization.h b/compiler/optimizing/loop_optimization.h new file mode 100644 index 0000000000..e7980ce89e --- /dev/null +++ b/compiler/optimizing/loop_optimization.h @@ -0,0 +1,88 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_LOOP_OPTIMIZATION_H_ +#define ART_COMPILER_OPTIMIZING_LOOP_OPTIMIZATION_H_ + +#include <string> + +#include "induction_var_range.h" +#include "nodes.h" +#include "optimization.h" + +namespace art { + +/** + * Loop optimizations. Builds a loop hierarchy and applies optimizations to + * the detected nested loops, such as removal of dead induction and empty loops. + */ +class HLoopOptimization : public HOptimization { + public: + HLoopOptimization(HGraph* graph, HInductionVarAnalysis* induction_analysis); + + void Run() OVERRIDE; + + static constexpr const char* kLoopOptimizationPassName = "loop_optimization"; + + private: + /** + * A single loop inside the loop hierarchy representation. + */ + struct LoopNode : public ArenaObject<kArenaAllocInductionVarAnalysis> { + explicit LoopNode(HLoopInformation* lp_info) + : loop_info(lp_info), + outer(nullptr), + inner(nullptr), + previous(nullptr), + next(nullptr) {} + const HLoopInformation* const loop_info; + LoopNode* outer; + LoopNode* inner; + LoopNode* previous; + LoopNode* next; + }; + + void AddLoop(HLoopInformation* loop_info); + void RemoveLoop(LoopNode* node); + + void TraverseLoopsInnerToOuter(LoopNode* node); + + void SimplifyInduction(LoopNode* node); + void RemoveIfEmptyLoop(LoopNode* node); + + void ReplaceAllUses(HInstruction* instruction, + HInstruction* replacement, + HInstruction* exclusion); + + // Range analysis based on induction variables. + InductionVarRange induction_range_; + + // Phase-local heap memory allocator for the loop optimizer. Storage obtained + // through this allocator is released when the loop optimizer is done. + ArenaAllocator loop_allocator_; + + // Entries into the loop hierarchy representation. + LoopNode* top_loop_; + LoopNode* last_loop_; + + friend class LoopOptimizationTest; + + DISALLOW_COPY_AND_ASSIGN(HLoopOptimization); +}; + +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_LOOP_OPTIMIZATION_H_ diff --git a/compiler/optimizing/loop_optimization_test.cc b/compiler/optimizing/loop_optimization_test.cc new file mode 100644 index 0000000000..4e007d4e9a --- /dev/null +++ b/compiler/optimizing/loop_optimization_test.cc @@ -0,0 +1,193 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "loop_optimization.h" +#include "optimizing_unit_test.h" + +namespace art { + +/** + * Fixture class for the loop optimization tests. These unit tests focus + * constructing the loop hierarchy. Actual optimizations are tested + * through the checker tests. + */ +class LoopOptimizationTest : public CommonCompilerTest { + public: + LoopOptimizationTest() + : pool_(), + allocator_(&pool_), + graph_(CreateGraph(&allocator_)), + iva_(new (&allocator_) HInductionVarAnalysis(graph_)), + loop_opt_(new (&allocator_) HLoopOptimization(graph_, iva_)) { + BuildGraph(); + } + + ~LoopOptimizationTest() { } + + /** Constructs bare minimum graph. */ + void BuildGraph() { + graph_->SetNumberOfVRegs(1); + entry_block_ = new (&allocator_) HBasicBlock(graph_); + return_block_ = new (&allocator_) HBasicBlock(graph_); + exit_block_ = new (&allocator_) HBasicBlock(graph_); + graph_->AddBlock(entry_block_); + graph_->AddBlock(return_block_); + graph_->AddBlock(exit_block_); + graph_->SetEntryBlock(entry_block_); + graph_->SetExitBlock(exit_block_); + parameter_ = new (&allocator_) HParameterValue(graph_->GetDexFile(), 0, 0, Primitive::kPrimInt); + entry_block_->AddInstruction(parameter_); + return_block_->AddInstruction(new (&allocator_) HReturnVoid()); + exit_block_->AddInstruction(new (&allocator_) HExit()); + entry_block_->AddSuccessor(return_block_); + return_block_->AddSuccessor(exit_block_); + } + + /** Adds a loop nest at given position before successor. */ + HBasicBlock* AddLoop(HBasicBlock* position, HBasicBlock* successor) { + HBasicBlock* header = new (&allocator_) HBasicBlock(graph_); + HBasicBlock* body = new (&allocator_) HBasicBlock(graph_); + graph_->AddBlock(header); + graph_->AddBlock(body); + // Control flow. + position->ReplaceSuccessor(successor, header); + header->AddSuccessor(body); + header->AddSuccessor(successor); + header->AddInstruction(new (&allocator_) HIf(parameter_)); + body->AddSuccessor(header); + body->AddInstruction(new (&allocator_) HGoto()); + return header; + } + + /** Performs analysis. */ + void PerformAnalysis() { + graph_->BuildDominatorTree(); + iva_->Run(); + loop_opt_->Run(); + } + + /** Constructs string representation of computed loop hierarchy. */ + std::string LoopStructure() { + return LoopStructureRecurse(loop_opt_->top_loop_); + } + + // Helper method + std::string LoopStructureRecurse(HLoopOptimization::LoopNode* node) { + std::string s; + for ( ; node != nullptr; node = node->next) { + s.append("["); + s.append(LoopStructureRecurse(node->inner)); + s.append("]"); + } + return s; + } + + // General building fields. + ArenaPool pool_; + ArenaAllocator allocator_; + HGraph* graph_; + HInductionVarAnalysis* iva_; + HLoopOptimization* loop_opt_; + + HBasicBlock* entry_block_; + HBasicBlock* return_block_; + HBasicBlock* exit_block_; + + HInstruction* parameter_; +}; + +// +// The actual tests. +// + +TEST_F(LoopOptimizationTest, NoLoops) { + PerformAnalysis(); + EXPECT_EQ("", LoopStructure()); +} + +TEST_F(LoopOptimizationTest, SingleLoop) { + AddLoop(entry_block_, return_block_); + PerformAnalysis(); + EXPECT_EQ("[]", LoopStructure()); +} + +TEST_F(LoopOptimizationTest, LoopNest10) { + HBasicBlock* b = entry_block_; + HBasicBlock* s = return_block_; + for (int i = 0; i < 10; i++) { + s = AddLoop(b, s); + b = s->GetSuccessors()[0]; + } + PerformAnalysis(); + EXPECT_EQ("[[[[[[[[[[]]]]]]]]]]", LoopStructure()); +} + +TEST_F(LoopOptimizationTest, LoopSequence10) { + HBasicBlock* b = entry_block_; + HBasicBlock* s = return_block_; + for (int i = 0; i < 10; i++) { + b = AddLoop(b, s); + s = b->GetSuccessors()[1]; + } + PerformAnalysis(); + EXPECT_EQ("[][][][][][][][][][]", LoopStructure()); +} + +TEST_F(LoopOptimizationTest, LoopSequenceOfNests) { + HBasicBlock* b = entry_block_; + HBasicBlock* s = return_block_; + for (int i = 0; i < 10; i++) { + b = AddLoop(b, s); + s = b->GetSuccessors()[1]; + HBasicBlock* bi = b->GetSuccessors()[0]; + HBasicBlock* si = b; + for (int j = 0; j < i; j++) { + si = AddLoop(bi, si); + bi = si->GetSuccessors()[0]; + } + } + PerformAnalysis(); + EXPECT_EQ("[]" + "[[]]" + "[[[]]]" + "[[[[]]]]" + "[[[[[]]]]]" + "[[[[[[]]]]]]" + "[[[[[[[]]]]]]]" + "[[[[[[[[]]]]]]]]" + "[[[[[[[[[]]]]]]]]]" + "[[[[[[[[[[]]]]]]]]]]", + LoopStructure()); +} + +TEST_F(LoopOptimizationTest, LoopNestWithSequence) { + HBasicBlock* b = entry_block_; + HBasicBlock* s = return_block_; + for (int i = 0; i < 10; i++) { + s = AddLoop(b, s); + b = s->GetSuccessors()[0]; + } + b = s; + s = b->GetSuccessors()[1]; + for (int i = 0; i < 9; i++) { + b = AddLoop(b, s); + s = b->GetSuccessors()[1]; + } + PerformAnalysis(); + EXPECT_EQ("[[[[[[[[[[][][][][][][][][][]]]]]]]]]]", LoopStructure()); +} + +} // namespace art diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index ef9bf23a17..1ff2252348 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -522,7 +522,10 @@ static bool IsLinearOrderWellFormed(const HGraph& graph) { return true; } +// TODO: return order, and give only liveness analysis ownership of graph's linear_order_? void HGraph::Linearize() { + linear_order_.clear(); + // Create a reverse post ordering with the following properties: // - Blocks in a loop are consecutive, // - Back-edge is the last block before loop exits. @@ -2607,12 +2610,8 @@ bool HLoadString::InstructionDataEquals(const HInstruction* other) const { LoadKind load_kind = GetLoadKind(); if (HasAddress(load_kind)) { return GetAddress() == other_load_string->GetAddress(); - } else if (HasStringReference(load_kind)) { - return IsSameDexFile(GetDexFile(), other_load_string->GetDexFile()); } else { - DCHECK(HasDexCacheReference(load_kind)) << load_kind; - // If the string indexes and dex files are the same, dex cache element offsets - // must also be the same, so we don't need to compare them. + DCHECK(HasStringReference(load_kind)) << load_kind; return IsSameDexFile(GetDexFile(), other_load_string->GetDexFile()); } } @@ -2642,8 +2641,8 @@ std::ostream& operator<<(std::ostream& os, HLoadString::LoadKind rhs) { return os << "BootImageAddress"; case HLoadString::LoadKind::kDexCacheAddress: return os << "DexCacheAddress"; - case HLoadString::LoadKind::kDexCachePcRelative: - return os << "DexCachePcRelative"; + case HLoadString::LoadKind::kBssEntry: + return os << "BssEntry"; case HLoadString::LoadKind::kDexCacheViaMethod: return os << "DexCacheViaMethod"; default: diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 397abded27..5cfbf4249e 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -366,8 +366,8 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { // is a throw-catch loop, i.e. the header is a catch block. GraphAnalysisResult AnalyzeLoops() const; - // Computes the linear order (should be called before using HLinearOrderIterator). - // Linearizes the graph such that: + // Computes a linear order for the current graph (should be called before + // using HLinearOrderIterator). Linearizes the graph such that: // (1): a block is always after its dominator, // (2): blocks of loops are contiguous. // This creates a natural and efficient ordering when visualizing live ranges. @@ -586,7 +586,8 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { // List of blocks to perform a reverse post order tree traversal. ArenaVector<HBasicBlock*> reverse_post_order_; - // List of blocks to perform a linear order tree traversal. + // List of blocks to perform a linear order tree traversal. Unlike the reverse + // post order, this order is not incrementally kept up-to-date. ArenaVector<HBasicBlock*> linear_order_; HBasicBlock* entry_block_; @@ -5650,10 +5651,9 @@ class HLoadString FINAL : public HInstruction { // Used for strings outside the boot image referenced by JIT-compiled code. kDexCacheAddress, - // Load from resolved strings array in the dex cache using a PC-relative load. - // Used for strings outside boot image when we know that we can access - // the dex cache arrays using a PC-relative load. - kDexCachePcRelative, + // Load from an entry in the .bss section using a PC-relative load. + // Used for strings outside boot image when .bss is accessible with a PC-relative load. + kBssEntry, // Load from resolved strings array accessed through the class loaded from // the compiled method's own ArtMethod*. This is the default access type when @@ -5672,7 +5672,7 @@ class HLoadString FINAL : public HInstruction { string_index_(string_index) { SetPackedFlag<kFlagIsInDexCache>(false); SetPackedField<LoadKindField>(LoadKind::kDexCacheViaMethod); - load_data_.ref.dex_file = &dex_file; + load_data_.dex_file_ = &dex_file; } void SetLoadKindWithAddress(LoadKind load_kind, uint64_t address) { @@ -5685,20 +5685,11 @@ class HLoadString FINAL : public HInstruction { const DexFile& dex_file, uint32_t string_index) { DCHECK(HasStringReference(load_kind)); - load_data_.ref.dex_file = &dex_file; + load_data_.dex_file_ = &dex_file; string_index_ = string_index; SetLoadKindInternal(load_kind); } - void SetLoadKindWithDexCacheReference(LoadKind load_kind, - const DexFile& dex_file, - uint32_t element_index) { - DCHECK(HasDexCacheReference(load_kind)); - load_data_.ref.dex_file = &dex_file; - load_data_.ref.dex_cache_element_index = element_index; - SetLoadKindInternal(load_kind); - } - LoadKind GetLoadKind() const { return GetPackedField<LoadKindField>(); } @@ -5710,8 +5701,6 @@ class HLoadString FINAL : public HInstruction { return string_index_; } - uint32_t GetDexCacheElementOffset() const; - uint64_t GetAddress() const { DCHECK(HasAddress(GetLoadKind())); return load_data_.address; @@ -5781,6 +5770,7 @@ class HLoadString FINAL : public HInstruction { static bool HasStringReference(LoadKind load_kind) { return load_kind == LoadKind::kBootImageLinkTimeAddress || load_kind == LoadKind::kBootImageLinkTimePcRelative || + load_kind == LoadKind::kBssEntry || load_kind == LoadKind::kDexCacheViaMethod; } @@ -5788,10 +5778,6 @@ class HLoadString FINAL : public HInstruction { return load_kind == LoadKind::kBootImageAddress || load_kind == LoadKind::kDexCacheAddress; } - static bool HasDexCacheReference(LoadKind load_kind) { - return load_kind == LoadKind::kDexCachePcRelative; - } - void SetLoadKindInternal(LoadKind load_kind); // The special input is the HCurrentMethod for kDexCacheViaMethod. @@ -5804,10 +5790,7 @@ class HLoadString FINAL : public HInstruction { uint32_t string_index_; union { - struct { - const DexFile* dex_file; // For string reference and dex cache reference. - uint32_t dex_cache_element_index; // Only for dex cache reference. - } ref; + const DexFile* dex_file_; // For string reference. uint64_t address; // Up to 64-bit, needed for kDexCacheAddress on 64-bit targets. } load_data_; @@ -5817,15 +5800,8 @@ std::ostream& operator<<(std::ostream& os, HLoadString::LoadKind rhs); // Note: defined outside class to see operator<<(., HLoadString::LoadKind). inline const DexFile& HLoadString::GetDexFile() const { - DCHECK(HasStringReference(GetLoadKind()) || HasDexCacheReference(GetLoadKind())) - << GetLoadKind(); - return *load_data_.ref.dex_file; -} - -// Note: defined outside class to see operator<<(., HLoadString::LoadKind). -inline uint32_t HLoadString::GetDexCacheElementOffset() const { - DCHECK(HasDexCacheReference(GetLoadKind())) << GetLoadKind(); - return load_data_.ref.dex_cache_element_index; + DCHECK(HasStringReference(GetLoadKind())) << GetLoadKind(); + return *load_data_.dex_file_; } // Note: defined outside class to see operator<<(., HLoadString::LoadKind). @@ -5833,7 +5809,7 @@ inline void HLoadString::AddSpecialInput(HInstruction* special_input) { // The special input is used for PC-relative loads on some architectures, // including literal pool loads, which are PC-relative too. DCHECK(GetLoadKind() == LoadKind::kBootImageLinkTimePcRelative || - GetLoadKind() == LoadKind::kDexCachePcRelative || + GetLoadKind() == LoadKind::kBssEntry || GetLoadKind() == LoadKind::kBootImageLinkTimeAddress || GetLoadKind() == LoadKind::kBootImageAddress) << GetLoadKind(); // HLoadString::GetInputRecords() returns an empty array at this point, diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index d3a55dd365..c2fe1b144b 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -76,6 +76,7 @@ #include "jni/quick/jni_compiler.h" #include "licm.h" #include "load_store_elimination.h" +#include "loop_optimization.h" #include "nodes.h" #include "oat_quick_method_header.h" #include "prepare_for_register_allocation.h" @@ -737,6 +738,7 @@ void OptimizingCompiler::RunOptimizations(HGraph* graph, LoadStoreElimination* lse = new (arena) LoadStoreElimination(graph, *side_effects); HInductionVarAnalysis* induction = new (arena) HInductionVarAnalysis(graph); BoundsCheckElimination* bce = new (arena) BoundsCheckElimination(graph, *side_effects, induction); + HLoopOptimization* loop = new (arena) HLoopOptimization(graph, induction); HSharpening* sharpening = new (arena) HSharpening(graph, codegen, dex_compilation_unit, driver); InstructionSimplifier* simplify2 = new (arena) InstructionSimplifier( graph, stats, "instruction_simplifier$after_bce"); @@ -765,6 +767,7 @@ void OptimizingCompiler::RunOptimizations(HGraph* graph, licm, induction, bce, + loop, fold3, // evaluates code generated by dynamic bce simplify2, lse, diff --git a/compiler/optimizing/pc_relative_fixups_mips.cc b/compiler/optimizing/pc_relative_fixups_mips.cc index 6006e6cf5d..82feb95a2f 100644 --- a/compiler/optimizing/pc_relative_fixups_mips.cc +++ b/compiler/optimizing/pc_relative_fixups_mips.cc @@ -83,6 +83,7 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { case HLoadString::LoadKind::kBootImageLinkTimeAddress: case HLoadString::LoadKind::kBootImageAddress: case HLoadString::LoadKind::kBootImageLinkTimePcRelative: + case HLoadString::LoadKind::kBssEntry: // Add a base register for PC-relative literals on R2. InitializePCRelativeBasePointer(); load_string->AddSpecialInput(base_); diff --git a/compiler/optimizing/pc_relative_fixups_x86.cc b/compiler/optimizing/pc_relative_fixups_x86.cc index 75587af7a1..b1fdb1792d 100644 --- a/compiler/optimizing/pc_relative_fixups_x86.cc +++ b/compiler/optimizing/pc_relative_fixups_x86.cc @@ -92,7 +92,7 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { void VisitLoadString(HLoadString* load_string) OVERRIDE { HLoadString::LoadKind load_kind = load_string->GetLoadKind(); if (load_kind == HLoadString::LoadKind::kBootImageLinkTimePcRelative || - load_kind == HLoadString::LoadKind::kDexCachePcRelative) { + load_kind == HLoadString::LoadKind::kBssEntry) { InitializePCRelativeBasePointer(); load_string->AddSpecialInput(base_); } diff --git a/compiler/optimizing/sharpening.cc b/compiler/optimizing/sharpening.cc index a4a3e0695d..c1cfe8d00f 100644 --- a/compiler/optimizing/sharpening.cc +++ b/compiler/optimizing/sharpening.cc @@ -163,7 +163,7 @@ void HSharpening::ProcessLoadClass(HLoadClass* load_class) { : hs.NewHandle(class_linker->FindDexCache(soa.Self(), dex_file)); mirror::Class* klass = dex_cache->GetResolvedType(type_index); - if (compiler_driver_->IsBootImage()) { + if (codegen_->GetCompilerOptions().IsBootImage()) { // Compiling boot image. Check if the class is a boot image class. DCHECK(!runtime->UseJitCompilation()); if (!compiler_driver_->GetSupportBootImageFixup()) { @@ -281,7 +281,7 @@ void HSharpening::ProcessLoadString(HLoadString* load_string) { ? compilation_unit_.GetDexCache() : hs.NewHandle(class_linker->FindDexCache(soa.Self(), dex_file)); - if (compiler_driver_->IsBootImage()) { + if (codegen_->GetCompilerOptions().IsBootImage()) { // Compiling boot image. Resolve the string and allocate it if needed. DCHECK(!runtime->UseJitCompilation()); mirror::String* string = class_linker->ResolveString(dex_file, string_index, dex_cache); @@ -311,6 +311,8 @@ void HSharpening::ProcessLoadString(HLoadString* load_string) { !codegen_->GetCompilerOptions().GetCompilePic()) { desired_load_kind = HLoadString::LoadKind::kBootImageAddress; address = reinterpret_cast64<uint64_t>(string); + } else { + desired_load_kind = HLoadString::LoadKind::kBssEntry; } } } @@ -319,6 +321,7 @@ void HSharpening::ProcessLoadString(HLoadString* load_string) { switch (load_kind) { case HLoadString::LoadKind::kBootImageLinkTimeAddress: case HLoadString::LoadKind::kBootImageLinkTimePcRelative: + case HLoadString::LoadKind::kBssEntry: case HLoadString::LoadKind::kDexCacheViaMethod: load_string->SetLoadKindWithStringReference(load_kind, dex_file, string_index); break; @@ -327,13 +330,6 @@ void HSharpening::ProcessLoadString(HLoadString* load_string) { DCHECK_NE(address, 0u); load_string->SetLoadKindWithAddress(load_kind, address); break; - case HLoadString::LoadKind::kDexCachePcRelative: { - PointerSize pointer_size = InstructionSetPointerSize(codegen_->GetInstructionSet()); - DexCacheArraysLayout layout(pointer_size, &dex_file); - size_t element_index = layout.StringOffset(string_index); - load_string->SetLoadKindWithDexCacheReference(load_kind, dex_file, element_index); - break; - } } } diff --git a/compiler/utils/arm/assembler_arm.h b/compiler/utils/arm/assembler_arm.h index ee5811c3c0..0ed8a35338 100644 --- a/compiler/utils/arm/assembler_arm.h +++ b/compiler/utils/arm/assembler_arm.h @@ -262,12 +262,6 @@ class Address : public ValueObject { CHECK_NE(rm, PC); } - // LDR(literal) - pc relative load. - explicit Address(int32_t offset) : - rn_(PC), rm_(R0), offset_(offset), - am_(Offset), is_immed_offset_(false), shift_(LSL) { - } - static bool CanHoldLoadOffsetArm(LoadOperandType type, int offset); static bool CanHoldStoreOffsetArm(StoreOperandType type, int offset); diff --git a/compiler/utils/arm/assembler_thumb2.cc b/compiler/utils/arm/assembler_thumb2.cc index 2269ba2d20..61b7f08518 100644 --- a/compiler/utils/arm/assembler_thumb2.cc +++ b/compiler/utils/arm/assembler_thumb2.cc @@ -2461,58 +2461,36 @@ void Thumb2Assembler::EmitLoadStore(Condition cond, } } else { // Register shift. - if (ad.GetRegister() == PC) { - // PC relative literal encoding. - int32_t offset = ad.GetOffset(); - if (must_be_32bit || offset < 0 || offset >= (1 << 10) || !load) { - int32_t up = B23; - if (offset < 0) { - offset = -offset; - up = 0; - } - CHECK_LT(offset, (1 << 12)); - int32_t encoding = 0x1f << 27 | 0xf << 16 | B22 | (load ? B20 : 0) | - offset | up | - static_cast<uint32_t>(rd) << 12; - Emit32(encoding); - } else { - // 16 bit literal load. - CHECK_GE(offset, 0); - CHECK_LT(offset, (1 << 10)); - int32_t encoding = B14 | (load ? B11 : 0) | static_cast<uint32_t>(rd) << 8 | offset >> 2; - Emit16(encoding); + CHECK_NE(ad.GetRegister(), PC); + if (ad.GetShiftCount() != 0) { + // If there is a shift count this must be 32 bit. + must_be_32bit = true; + } else if (IsHighRegister(ad.GetRegisterOffset())) { + must_be_32bit = true; + } + + if (must_be_32bit) { + int32_t encoding = 0x1f << 27 | (load ? B20 : 0) | static_cast<uint32_t>(rd) << 12 | + ad.encodingThumb(true); + if (half) { + encoding |= B21; + } else if (!byte) { + encoding |= B22; } - } else { - if (ad.GetShiftCount() != 0) { - // If there is a shift count this must be 32 bit. - must_be_32bit = true; - } else if (IsHighRegister(ad.GetRegisterOffset())) { - must_be_32bit = true; + if (load && is_signed && (byte || half)) { + encoding |= B24; } - - if (must_be_32bit) { - int32_t encoding = 0x1f << 27 | (load ? B20 : 0) | static_cast<uint32_t>(rd) << 12 | - ad.encodingThumb(true); - if (half) { - encoding |= B21; - } else if (!byte) { - encoding |= B22; - } - if (load && is_signed && (byte || half)) { - encoding |= B24; - } - Emit32(encoding); - } else { - // 16 bit register offset. - int32_t encoding = B14 | B12 | (load ? B11 : 0) | static_cast<uint32_t>(rd) | - ad.encodingThumb(false); - if (byte) { - encoding |= B10; - } else if (half) { - encoding |= B9; - } - Emit16(encoding); + Emit32(encoding); + } else { + // 16 bit register offset. + int32_t encoding = B14 | B12 | (load ? B11 : 0) | static_cast<uint32_t>(rd) | + ad.encodingThumb(false); + if (byte) { + encoding |= B10; + } else if (half) { + encoding |= B9; } + Emit16(encoding); } } } diff --git a/compiler/utils/assembler_thumb_test.cc b/compiler/utils/assembler_thumb_test.cc index 3b05173d88..86a4aa2245 100644 --- a/compiler/utils/assembler_thumb_test.cc +++ b/compiler/utils/assembler_thumb_test.cc @@ -1245,22 +1245,6 @@ TEST_F(Thumb2AssemblerTest, LoadStoreRegOffset) { EmitAndCheck(&assembler, "LoadStoreRegOffset"); } -TEST_F(Thumb2AssemblerTest, LoadStoreLiteral) { - __ ldr(R0, Address(4)); - __ str(R0, Address(4)); - - __ ldr(R0, Address(-8)); - __ str(R0, Address(-8)); - - // Limits. - __ ldr(R0, Address(0x3ff)); // 10 bits (16 bit). - __ ldr(R0, Address(0x7ff)); // 11 bits (32 bit). - __ str(R0, Address(0x3ff)); // 32 bit (no 16 bit str(literal)). - __ str(R0, Address(0x7ff)); // 11 bits (32 bit). - - EmitAndCheck(&assembler, "LoadStoreLiteral"); -} - TEST_F(Thumb2AssemblerTest, LoadStoreLimits) { __ ldr(R0, Address(R4, 124)); // 16 bit. __ ldr(R0, Address(R4, 128)); // 32 bit. diff --git a/compiler/utils/assembler_thumb_test_expected.cc.inc b/compiler/utils/assembler_thumb_test_expected.cc.inc index 81c6ec5fac..91f397087c 100644 --- a/compiler/utils/assembler_thumb_test_expected.cc.inc +++ b/compiler/utils/assembler_thumb_test_expected.cc.inc @@ -5012,17 +5012,6 @@ const char* const LoadStoreRegOffsetResults[] = { " 28: f841 0008 str.w r0, [r1, r8]\n", nullptr }; -const char* const LoadStoreLiteralResults[] = { - " 0: 4801 ldr r0, [pc, #4] ; (8 <LoadStoreLiteral+0x8>)\n", - " 2: f8cf 0004 str.w r0, [pc, #4] ; 8 <LoadStoreLiteral+0x8>\n", - " 6: f85f 0008 ldr.w r0, [pc, #-8] ; 0 <LoadStoreLiteral>\n", - " a: f84f 0008 str.w r0, [pc, #-8] ; 4 <LoadStoreLiteral+0x4>\n", - " e: 48ff ldr r0, [pc, #1020] ; (40c <LoadStoreLiteral+0x40c>)\n", - " 10: f8df 07ff ldr.w r0, [pc, #2047] ; 813 <LoadStoreLiteral+0x813>\n", - " 14: f8cf 03ff str.w r0, [pc, #1023] ; 417 <LoadStoreLiteral+0x417>\n", - " 18: f8cf 07ff str.w r0, [pc, #2047] ; 81b <LoadStoreLiteral+0x81b>\n", - nullptr -}; const char* const LoadStoreLimitsResults[] = { " 0: 6fe0 ldr r0, [r4, #124] ; 0x7c\n", " 2: f8d4 0080 ldr.w r0, [r4, #128] ; 0x80\n", @@ -5708,7 +5697,6 @@ void setup_results() { test_results["MixedBranch32"] = MixedBranch32Results; test_results["Shifts"] = ShiftsResults; test_results["LoadStoreRegOffset"] = LoadStoreRegOffsetResults; - test_results["LoadStoreLiteral"] = LoadStoreLiteralResults; test_results["LoadStoreLimits"] = LoadStoreLimitsResults; test_results["CompareAndBranch"] = CompareAndBranchResults; test_results["AddConstant"] = AddConstantResults; diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc index 59c4681c0f..292aff43e9 100644 --- a/dex2oat/dex2oat.cc +++ b/dex2oat/dex2oat.cc @@ -523,8 +523,6 @@ class Dex2Oat FINAL { compiled_methods_zip_filename_(nullptr), compiled_methods_filename_(nullptr), passes_to_run_filename_(nullptr), - app_image_(false), - boot_image_(false), multi_image_(false), is_host_(false), class_loader_(nullptr), @@ -691,8 +689,8 @@ class Dex2Oat FINAL { } void ProcessOptions(ParserOptions* parser_options) { - boot_image_ = !image_filenames_.empty(); - app_image_ = app_image_fd_ != -1 || !app_image_file_name_.empty(); + compiler_options_->boot_image_ = !image_filenames_.empty(); + compiler_options_->app_image_ = app_image_fd_ != -1 || !app_image_file_name_.empty(); if (IsAppImage() && IsBootImage()) { Usage("Can't have both --image and (--app-image-fd or --app-image-file)"); @@ -744,7 +742,7 @@ class Dex2Oat FINAL { android_root_ += android_root_env_var; } - if (!boot_image_ && parser_options->boot_image_filename.empty()) { + if (!IsBootImage() && parser_options->boot_image_filename.empty()) { parser_options->boot_image_filename += android_root_; parser_options->boot_image_filename += "/framework/boot.art"; } @@ -1327,7 +1325,7 @@ class Dex2Oat FINAL { } void LoadClassProfileDescriptors() { - if (profile_compilation_info_ != nullptr && app_image_) { + if (profile_compilation_info_ != nullptr && IsAppImage()) { Runtime* runtime = Runtime::Current(); CHECK(runtime != nullptr); std::set<DexCacheResolvedClasses> resolved_classes( @@ -1634,8 +1632,6 @@ class Dex2Oat FINAL { compiler_kind_, instruction_set_, instruction_set_features_.get(), - IsBootImage(), - IsAppImage(), image_classes_.release(), compiled_classes_.release(), compiled_methods_.release(), @@ -1726,7 +1722,7 @@ class Dex2Oat FINAL { } if (IsImage()) { - if (app_image_ && image_base_ == 0) { + if (IsAppImage() && image_base_ == 0) { gc::Heap* const heap = Runtime::Current()->GetHeap(); for (gc::space::ImageSpace* image_space : heap->GetBootImageSpaces()) { image_base_ = std::max(image_base_, RoundUp( @@ -1794,7 +1790,10 @@ class Dex2Oat FINAL { size_t rodata_size = oat_writer->GetOatHeader().GetExecutableOffset(); size_t text_size = oat_writer->GetOatSize() - rodata_size; - elf_writer->SetLoadedSectionSizes(rodata_size, text_size, oat_writer->GetBssSize()); + elf_writer->PrepareDynamicSection(rodata_size, + text_size, + oat_writer->GetBssSize(), + oat_writer->GetBssRootsOffset()); if (IsImage()) { // Update oat layout. @@ -1977,11 +1976,11 @@ class Dex2Oat FINAL { } bool IsAppImage() const { - return app_image_; + return compiler_options_->IsAppImage(); } bool IsBootImage() const { - return boot_image_; + return compiler_options_->IsBootImage(); } bool IsHost() const { @@ -2575,8 +2574,6 @@ class Dex2Oat FINAL { std::unique_ptr<std::unordered_set<std::string>> compiled_classes_; std::unique_ptr<std::unordered_set<std::string>> compiled_methods_; std::unique_ptr<std::vector<std::string>> passes_to_run_; - bool app_image_; - bool boot_image_; bool multi_image_; bool is_host_; std::string android_root_; diff --git a/dexlayout/Android.bp b/dexlayout/Android.bp index 296cdb6140..0987df76dc 100644 --- a/dexlayout/Android.bp +++ b/dexlayout/Android.bp @@ -20,6 +20,7 @@ art_cc_binary { "dexlayout.cc", "dex_ir.cc", "dex_ir_builder.cc", + "dex_visualize.cc", ], cflags: ["-Wall"], shared_libs: [ diff --git a/dexlayout/dex_ir.h b/dexlayout/dex_ir.h index 6ae9f1c938..f3d2c900e3 100644 --- a/dexlayout/dex_ir.h +++ b/dexlayout/dex_ir.h @@ -217,6 +217,17 @@ class Collections { uint32_t MethodIdsSize() const { return method_ids_.Size(); } uint32_t ClassDefsSize() const { return class_defs_.Size(); } + uint32_t StringDatasSize() const { return string_datas_.Size(); } + uint32_t TypeListsSize() const { return type_lists_.Size(); } + uint32_t EncodedArraySize() const { return encoded_array_items_.Size(); } + uint32_t AnnotationSize() const { return annotation_items_.Size(); } + uint32_t AnnotationSetSize() const { return annotation_set_items_.Size(); } + uint32_t AnnotationSetRefListsSize() const { return annotation_set_ref_lists_.Size(); } + uint32_t AnnotationsDirectorySize() const { return annotations_directory_items_.Size(); } + uint32_t DebugInfoSize() const { return debug_info_items_.Size(); } + uint32_t CodeItemsSize() const { return code_items_.Size(); } + uint32_t ClassDatasSize() const { return class_datas_.Size(); } + private: EncodedValue* ReadEncodedValue(const uint8_t** data); EncodedValue* ReadEncodedValue(const uint8_t** data, uint8_t type, uint8_t length); diff --git a/dexlayout/dex_ir_builder.cc b/dexlayout/dex_ir_builder.cc index e6868d74bc..599f48b3b6 100644 --- a/dexlayout/dex_ir_builder.cc +++ b/dexlayout/dex_ir_builder.cc @@ -24,6 +24,8 @@ namespace art { namespace dex_ir { +static void CheckAndSetRemainingOffsets(const DexFile& dex_file, Collections* collections); + Header* DexIrBuilder(const DexFile& dex_file) { const DexFile::Header& disk_header = dex_file.GetHeader(); Header* header = new Header(disk_header.magic_, @@ -69,8 +71,87 @@ Header* DexIrBuilder(const DexFile& dex_file) { collections.CreateClassDef(dex_file, i); } + CheckAndSetRemainingOffsets(dex_file, &collections); + return header; } +static void CheckAndSetRemainingOffsets(const DexFile& dex_file, Collections* collections) { + const DexFile::Header& disk_header = dex_file.GetHeader(); + // Read MapItems and validate/set remaining offsets. + const DexFile::MapList* map = + reinterpret_cast<const DexFile::MapList*>(dex_file.Begin() + disk_header.map_off_); + const uint32_t count = map->size_; + for (uint32_t i = 0; i < count; ++i) { + const DexFile::MapItem* item = map->list_ + i; + switch (item->type_) { + case DexFile::kDexTypeHeaderItem: + CHECK_EQ(item->size_, 1u); + CHECK_EQ(item->offset_, 0u); + break; + case DexFile::kDexTypeStringIdItem: + CHECK_EQ(item->size_, collections->StringIdsSize()); + CHECK_EQ(item->offset_, collections->StringIdsOffset()); + break; + case DexFile::kDexTypeTypeIdItem: + CHECK_EQ(item->size_, collections->TypeIdsSize()); + CHECK_EQ(item->offset_, collections->TypeIdsOffset()); + break; + case DexFile::kDexTypeProtoIdItem: + CHECK_EQ(item->size_, collections->ProtoIdsSize()); + CHECK_EQ(item->offset_, collections->ProtoIdsOffset()); + break; + case DexFile::kDexTypeFieldIdItem: + CHECK_EQ(item->size_, collections->FieldIdsSize()); + CHECK_EQ(item->offset_, collections->FieldIdsOffset()); + break; + case DexFile::kDexTypeMethodIdItem: + CHECK_EQ(item->size_, collections->MethodIdsSize()); + CHECK_EQ(item->offset_, collections->MethodIdsOffset()); + break; + case DexFile::kDexTypeClassDefItem: + CHECK_EQ(item->size_, collections->ClassDefsSize()); + CHECK_EQ(item->offset_, collections->ClassDefsOffset()); + break; + case DexFile::kDexTypeMapList: + CHECK_EQ(item->size_, 1u); + CHECK_EQ(item->offset_, disk_header.map_off_); + break; + case DexFile::kDexTypeTypeList: + collections->SetTypeListsOffset(item->offset_); + break; + case DexFile::kDexTypeAnnotationSetRefList: + collections->SetAnnotationSetRefListsOffset(item->offset_); + break; + case DexFile::kDexTypeAnnotationSetItem: + collections->SetAnnotationSetOffset(item->offset_); + break; + case DexFile::kDexTypeClassDataItem: + collections->SetClassDatasOffset(item->offset_); + break; + case DexFile::kDexTypeCodeItem: + collections->SetCodeItemsOffset(item->offset_); + break; + case DexFile::kDexTypeStringDataItem: + collections->SetStringDatasOffset(item->offset_); + break; + case DexFile::kDexTypeDebugInfoItem: + collections->SetDebugInfoOffset(item->offset_); + break; + case DexFile::kDexTypeAnnotationItem: + collections->SetAnnotationOffset(item->offset_); + break; + case DexFile::kDexTypeEncodedArrayItem: + collections->SetEncodedArrayOffset(item->offset_); + break; + case DexFile::kDexTypeAnnotationsDirectoryItem: + collections->SetAnnotationsDirectoryOffset(item->offset_); + break; + default: + LOG(ERROR) << "Unknown map list item type."; + } + } +} + } // namespace dex_ir } // namespace art diff --git a/dexlayout/dex_visualize.cc b/dexlayout/dex_visualize.cc new file mode 100644 index 0000000000..be7bade8ef --- /dev/null +++ b/dexlayout/dex_visualize.cc @@ -0,0 +1,377 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Implementation file of the dex layout visualization. + * + * This is a tool to read dex files into an internal representation, + * reorganize the representation, and emit dex files with a better + * file layout. + */ + +#include "dex_visualize.h" + +#include <inttypes.h> +#include <stdio.h> + +#include <functional> +#include <memory> +#include <vector> + +#include "dex_ir.h" +#include "dexlayout.h" +#include "jit/offline_profiling_info.h" + +namespace art { + +struct FileSection { + public: + std::string name_; + uint16_t type_; + std::function<uint32_t(const dex_ir::Collections&)> size_fn_; + std::function<uint32_t(const dex_ir::Collections&)> offset_fn_; +}; + +static const std::vector<FileSection> kFileSections = { + { + "StringId", + DexFile::kDexTypeStringIdItem, + &dex_ir::Collections::StringIdsSize, + &dex_ir::Collections::StringIdsOffset + }, { + "TypeId", + DexFile::kDexTypeTypeIdItem, + &dex_ir::Collections::TypeIdsSize, + &dex_ir::Collections::TypeIdsOffset + }, { + "ProtoId", + DexFile::kDexTypeProtoIdItem, + &dex_ir::Collections::ProtoIdsSize, + &dex_ir::Collections::ProtoIdsOffset + }, { + "FieldId", + DexFile::kDexTypeFieldIdItem, + &dex_ir::Collections::FieldIdsSize, + &dex_ir::Collections::FieldIdsOffset + }, { + "MethodId", + DexFile::kDexTypeMethodIdItem, + &dex_ir::Collections::MethodIdsSize, + &dex_ir::Collections::MethodIdsOffset + }, { + "ClassDef", + DexFile::kDexTypeClassDefItem, + &dex_ir::Collections::ClassDefsSize, + &dex_ir::Collections::ClassDefsOffset + }, { + "StringData", + DexFile::kDexTypeStringDataItem, + &dex_ir::Collections::StringDatasSize, + &dex_ir::Collections::StringDatasOffset + }, { + "TypeList", + DexFile::kDexTypeTypeList, + &dex_ir::Collections::TypeListsSize, + &dex_ir::Collections::TypeListsOffset + }, { + "EncArr", + DexFile::kDexTypeEncodedArrayItem, + &dex_ir::Collections::EncodedArraySize, + &dex_ir::Collections::EncodedArrayOffset + }, { + "Annotation", + DexFile::kDexTypeAnnotationItem, + &dex_ir::Collections::AnnotationSize, + &dex_ir::Collections::AnnotationOffset + }, { + "AnnoSet", + DexFile::kDexTypeAnnotationSetItem, + &dex_ir::Collections::AnnotationSetSize, + &dex_ir::Collections::AnnotationSetOffset + }, { + "AnnoSetRL", + DexFile::kDexTypeAnnotationSetRefList, + &dex_ir::Collections::AnnotationSetRefListsSize, + &dex_ir::Collections::AnnotationSetRefListsOffset + }, { + "AnnoDir", + DexFile::kDexTypeAnnotationsDirectoryItem, + &dex_ir::Collections::AnnotationsDirectorySize, + &dex_ir::Collections::AnnotationsDirectoryOffset + }, { + "DebugInfo", + DexFile::kDexTypeDebugInfoItem, + &dex_ir::Collections::DebugInfoSize, + &dex_ir::Collections::DebugInfoOffset + }, { + "CodeItem", + DexFile::kDexTypeCodeItem, + &dex_ir::Collections::CodeItemsSize, + &dex_ir::Collections::CodeItemsOffset + }, { + "ClassData", + DexFile::kDexTypeClassDataItem, + &dex_ir::Collections::ClassDatasSize, + &dex_ir::Collections::ClassDatasOffset + } +}; + +class Dumper { + public: + // Colors are based on the type of the section in MapList. + Dumper(const dex_ir::Collections& collections, size_t dex_file_index) { + // Build the table that will map from offset to color + table_.emplace_back(DexFile::kDexTypeHeaderItem, 0u); + for (const FileSection& s : kFileSections) { + table_.emplace_back(s.type_, s.offset_fn_(collections)); + } + // Sort into descending order by offset. + std::sort(table_.begin(), + table_.end(), + [](const SectionColor& a, const SectionColor& b) { return a.offset_ > b.offset_; }); + // Open the file and emit the gnuplot prologue. + std::string dex_file_name("classes"); + std::string out_file_base_name("layout"); + if (dex_file_index > 0) { + out_file_base_name += std::to_string(dex_file_index + 1); + dex_file_name += std::to_string(dex_file_index + 1); + } + dex_file_name += ".dex"; + std::string out_file_name(out_file_base_name + ".gnuplot"); + std::string png_file_name(out_file_base_name + ".png"); + out_file_ = fopen(out_file_name.c_str(), "w"); + fprintf(out_file_, "set terminal png size 1920,1080\n"); + fprintf(out_file_, "set output \"%s\"\n", png_file_name.c_str()); + fprintf(out_file_, "set title \"%s\"\n", dex_file_name.c_str()); + fprintf(out_file_, "set xlabel \"Page offset into dex\"\n"); + fprintf(out_file_, "set ylabel \"ClassDef index\"\n"); + fprintf(out_file_, "set xtics rotate out ("); + fprintf(out_file_, "\"Header\" %d, ", 0); + bool printed_one = false; + for (const FileSection& s : kFileSections) { + if (s.size_fn_(collections) > 0) { + if (printed_one) { + fprintf(out_file_, ", "); + } + fprintf(out_file_, "\"%s\" %d", s.name_.c_str(), s.offset_fn_(collections) / kPageSize); + printed_one = true; + } + } + fprintf(out_file_, ")\n"); + fprintf(out_file_, + "plot \"-\" using 1:2:3:4:5 with vector nohead linewidth 1 lc variable notitle\n"); + } + + int GetColor(uint32_t offset) const { + // The dread linear search to find the right section for the reference. + uint16_t section = 0; + for (uint16_t i = 0; i < table_.size(); ++i) { + if (table_[i].offset_ < offset) { + section = table_[i].type_; + break; + } + } + // And a lookup table from type to color. + ColorMapType::const_iterator iter = kColorMap.find(section); + if (iter != kColorMap.end()) { + return iter->second; + } + return 0; + } + + void DumpAddressRange(uint32_t from, uint32_t size, int class_index) { + const uint32_t low_page = from / kPageSize; + const uint32_t high_page = (size > 0) ? (from + size - 1) / kPageSize : low_page; + const uint32_t size_delta = high_page - low_page; + fprintf(out_file_, "%d %d %d 0 %d\n", low_page, class_index, size_delta, GetColor(from)); + } + + void DumpAddressRange(const dex_ir::Item* item, int class_index) { + if (item != nullptr) { + DumpAddressRange(item->GetOffset(), item->GetSize(), class_index); + } + } + + void DumpStringData(const dex_ir::StringData* string_data, int class_index) { + DumpAddressRange(string_data, class_index); + } + + void DumpStringId(const dex_ir::StringId* string_id, int class_index) { + DumpAddressRange(string_id, class_index); + if (string_id == nullptr) { + return; + } + DumpStringData(string_id->DataItem(), class_index); + } + + void DumpTypeId(const dex_ir::TypeId* type_id, int class_index) { + DumpAddressRange(type_id, class_index); + DumpStringId(type_id->GetStringId(), class_index); + } + + void DumpFieldId(const dex_ir::FieldId* field_id, int class_index) { + DumpAddressRange(field_id, class_index); + if (field_id == nullptr) { + return; + } + DumpTypeId(field_id->Class(), class_index); + DumpTypeId(field_id->Type(), class_index); + DumpStringId(field_id->Name(), class_index); + } + + void DumpFieldItem(const dex_ir::FieldItem* field, int class_index) { + DumpAddressRange(field, class_index); + if (field == nullptr) { + return; + } + DumpFieldId(field->GetFieldId(), class_index); + } + + void DumpProtoId(const dex_ir::ProtoId* proto_id, int class_index) { + DumpAddressRange(proto_id, class_index); + if (proto_id == nullptr) { + return; + } + DumpStringId(proto_id->Shorty(), class_index); + const dex_ir::TypeIdVector& parameters = proto_id->Parameters(); + for (const dex_ir::TypeId* t : parameters) { + DumpTypeId(t, class_index); + } + DumpTypeId(proto_id->ReturnType(), class_index); + } + + void DumpMethodId(const dex_ir::MethodId* method_id, int class_index) { + DumpAddressRange(method_id, class_index); + if (method_id == nullptr) { + return; + } + DumpTypeId(method_id->Class(), class_index); + DumpProtoId(method_id->Proto(), class_index); + DumpStringId(method_id->Name(), class_index); + } + + void DumpMethodItem(const dex_ir::MethodItem* method, const DexFile* dex_file, int class_index) { + if (profile_info_ != nullptr) { + uint32_t method_idx = method->GetMethodId()->GetIndex(); + MethodReference mr(dex_file, method_idx); + if (!profile_info_->ContainsMethod(mr)) { + return; + } + } + DumpAddressRange(method, class_index); + if (method == nullptr) { + return; + } + DumpMethodId(method->GetMethodId(), class_index); + const dex_ir::CodeItem* code_item = method->GetCodeItem(); + if (code_item != nullptr) { + DumpAddressRange(code_item, class_index); + } + } + + ~Dumper() { + fclose(out_file_); + } + + private: + struct SectionColor { + public: + SectionColor(uint16_t type, uint32_t offset) : type_(type), offset_(offset) { } + uint16_t type_; + uint32_t offset_; + }; + + using ColorMapType = std::map<uint16_t, int>; + const ColorMapType kColorMap = { + { DexFile::kDexTypeHeaderItem, 1 }, + { DexFile::kDexTypeStringIdItem, 2 }, + { DexFile::kDexTypeTypeIdItem, 3 }, + { DexFile::kDexTypeProtoIdItem, 4 }, + { DexFile::kDexTypeFieldIdItem, 5 }, + { DexFile::kDexTypeMethodIdItem, 6 }, + { DexFile::kDexTypeClassDefItem, 7 }, + { DexFile::kDexTypeTypeList, 8 }, + { DexFile::kDexTypeAnnotationSetRefList, 9 }, + { DexFile::kDexTypeAnnotationSetItem, 10 }, + { DexFile::kDexTypeClassDataItem, 11 }, + { DexFile::kDexTypeCodeItem, 12 }, + { DexFile::kDexTypeStringDataItem, 13 }, + { DexFile::kDexTypeDebugInfoItem, 14 }, + { DexFile::kDexTypeAnnotationItem, 15 }, + { DexFile::kDexTypeEncodedArrayItem, 16 }, + { DexFile::kDexTypeAnnotationsDirectoryItem, 16 } + }; + + std::vector<SectionColor> table_; + FILE* out_file_; + + DISALLOW_COPY_AND_ASSIGN(Dumper); +}; + +/* + * Dumps a gnuplot data file showing the parts of the dex_file that belong to each class. + * If profiling information is present, it dumps only those classes that are marked as hot. + */ +void VisualizeDexLayout(dex_ir::Header* header, const DexFile* dex_file, size_t dex_file_index) { + std::unique_ptr<Dumper> dumper(new Dumper(header->GetCollections(), dex_file_index)); + + const uint32_t class_defs_size = header->GetCollections().ClassDefsSize(); + for (uint32_t class_index = 0; class_index < class_defs_size; class_index++) { + dex_ir::ClassDef* class_def = header->GetCollections().GetClassDef(class_index); + if (profile_info_ != nullptr && !profile_info_->ContainsClass(*dex_file, class_index)) { + continue; + } + dumper->DumpAddressRange(class_def, class_index); + // Type id. + dumper->DumpTypeId(class_def->ClassType(), class_index); + // Superclass type id. + dumper->DumpTypeId(class_def->Superclass(), class_index); + // Interfaces. + // TODO(jeffhao): get TypeList from class_def to use Item interface. + static constexpr uint32_t kInterfaceSizeKludge = 8; + dumper->DumpAddressRange(class_def->InterfacesOffset(), kInterfaceSizeKludge, class_index); + // Source file info. + dumper->DumpStringId(class_def->SourceFile(), class_index); + // Annotations. + dumper->DumpAddressRange(class_def->Annotations(), class_index); + // TODO(sehr): walk the annotations and dump them. + // Class data. + dex_ir::ClassData* class_data = class_def->GetClassData(); + if (class_data != nullptr) { + dumper->DumpAddressRange(class_data, class_index); + if (class_data->StaticFields()) { + for (auto& field_item : *class_data->StaticFields()) { + dumper->DumpFieldItem(field_item.get(), class_index); + } + } + if (class_data->InstanceFields()) { + for (auto& field_item : *class_data->InstanceFields()) { + dumper->DumpFieldItem(field_item.get(), class_index); + } + } + if (class_data->DirectMethods()) { + for (auto& method_item : *class_data->DirectMethods()) { + dumper->DumpMethodItem(method_item.get(), dex_file, class_index); + } + } + if (class_data->VirtualMethods()) { + for (auto& method_item : *class_data->VirtualMethods()) { + dumper->DumpMethodItem(method_item.get(), dex_file, class_index); + } + } + } + } // for +} + +} // namespace art diff --git a/dexlayout/dex_visualize.h b/dexlayout/dex_visualize.h new file mode 100644 index 0000000000..b1d2ed79aa --- /dev/null +++ b/dexlayout/dex_visualize.h @@ -0,0 +1,39 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Header file of the dexlayout utility. + * + * This is a tool to read dex files into an internal representation, + * reorganize the representation, and emit dex files with a better + * file layout. + */ + +#ifndef ART_DEXLAYOUT_DEX_VISUALIZE_H_ +#define ART_DEXLAYOUT_DEX_VISUALIZE_H_ + +#include <stddef.h> + +namespace art { + +class DexFile; +namespace dex_ir { +class Header; +} // namespace dex_ir + +void VisualizeDexLayout(dex_ir::Header* header, const DexFile* dex_file, size_t dex_file_index); + +} // namespace art + +#endif // ART_DEXLAYOUT_DEX_VISUALIZE_H_ diff --git a/dexlayout/dexlayout.cc b/dexlayout/dexlayout.cc index 6f34a33ed7..e6141372a6 100644 --- a/dexlayout/dexlayout.cc +++ b/dexlayout/dexlayout.cc @@ -34,6 +34,8 @@ #include "dex_ir_builder.h" #include "dex_file-inl.h" #include "dex_instruction-inl.h" +#include "dex_visualize.h" +#include "jit/offline_profiling_info.h" #include "os.h" #include "utils.h" @@ -50,6 +52,11 @@ struct Options options_; FILE* out_file_ = stdout; /* + * Profile information file. + */ +ProfileCompilationInfo* profile_info_ = nullptr; + +/* * Flags for use with createAccessFlagStr(). */ enum AccessFor { @@ -1587,13 +1594,18 @@ static void OutputDexFile(dex_ir::Header& header, const char* file_name) { /* * Dumps the requested sections of the file. */ -static void ProcessDexFile(const char* file_name, const DexFile* dex_file) { +static void ProcessDexFile(const char* file_name, const DexFile* dex_file, size_t dex_file_index) { if (options_.verbose_) { fprintf(out_file_, "Opened '%s', DEX version '%.3s'\n", file_name, dex_file->GetHeader().magic_ + 4); } std::unique_ptr<dex_ir::Header> header(dex_ir::DexIrBuilder(*dex_file)); + if (options_.visualize_pattern_) { + VisualizeDexLayout(header.get(), dex_file, dex_file_index); + return; + } + // Headers. if (options_.show_file_headers_) { DumpFileHeader(header.get()); @@ -1658,7 +1670,7 @@ int ProcessFile(const char* file_name) { fprintf(out_file_, "Checksum verified\n"); } else { for (size_t i = 0; i < dex_files.size(); i++) { - ProcessDexFile(file_name, dex_files[i].get()); + ProcessDexFile(file_name, dex_files[i].get(), i); } } return 0; diff --git a/dexlayout/dexlayout.h b/dexlayout/dexlayout.h index 736d230a99..c4892d278b 100644 --- a/dexlayout/dexlayout.h +++ b/dexlayout/dexlayout.h @@ -28,6 +28,8 @@ namespace art { +class ProfileCompilationInfo; + /* Supported output formats. */ enum OutputFormat { kOutputPlain = 0, // default @@ -47,13 +49,16 @@ struct Options { bool show_file_headers_; bool show_section_headers_; bool verbose_; + bool visualize_pattern_; OutputFormat output_format_; const char* output_file_name_; + const char* profile_file_name_; }; /* Prototypes. */ extern struct Options options_; extern FILE* out_file_; +extern ProfileCompilationInfo* profile_info_; int ProcessFile(const char* file_name); } // namespace art diff --git a/dexlayout/dexlayout_main.cc b/dexlayout/dexlayout_main.cc index ec5edf4065..f385b09119 100644 --- a/dexlayout/dexlayout_main.cc +++ b/dexlayout/dexlayout_main.cc @@ -25,8 +25,12 @@ #include <stdio.h> #include <string.h> #include <unistd.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> #include "base/logging.h" +#include "jit/offline_profiling_info.h" #include "mem_map.h" namespace art { @@ -37,9 +41,9 @@ static const char* kProgramName = "dexlayout"; * Shows usage. */ static void Usage(void) { - fprintf(stderr, "Copyright (C) 2007 The Android Open Source Project\n\n"); - fprintf(stderr, "%s: [-a] [-c] [-d] [-e] [-f] [-h] [-i] [-l layout] [-o outfile] [-w]" - " dexfile...\n\n", kProgramName); + fprintf(stderr, "Copyright (C) 2016 The Android Open Source Project\n\n"); + fprintf(stderr, "%s: [-a] [-c] [-d] [-e] [-f] [-h] [-i] [-l layout] [-o outfile] [-p profile]" + " [-s] [-w] dexfile...\n\n", kProgramName); fprintf(stderr, " -a : display annotations\n"); fprintf(stderr, " -b : build dex_ir\n"); fprintf(stderr, " -c : verify checksum and exit\n"); @@ -51,6 +55,8 @@ static void Usage(void) { fprintf(stderr, " -i : ignore checksum failures\n"); fprintf(stderr, " -l : output layout, either 'plain' or 'xml'\n"); fprintf(stderr, " -o : output file name (defaults to stdout)\n"); + fprintf(stderr, " -p : profile file name (defaults to no profile)\n"); + fprintf(stderr, " -s : visualize reference pattern\n"); fprintf(stderr, " -w : output dex files\n"); } @@ -69,7 +75,7 @@ int DexlayoutDriver(int argc, char** argv) { // Parse all arguments. while (1) { - const int ic = getopt(argc, argv, "abcdefghil:o:w"); + const int ic = getopt(argc, argv, "abcdefghil:o:p:sw"); if (ic < 0) { break; // done } @@ -114,6 +120,13 @@ int DexlayoutDriver(int argc, char** argv) { case 'o': // output file options_.output_file_name_ = optarg; break; + case 'p': // profile file + options_.profile_file_name_ = optarg; + break; + case 's': // visualize access pattern + options_.visualize_pattern_ = true; + options_.verbose_ = false; + break; case 'w': // output dex files options_.output_dex_files_ = true; break; @@ -146,6 +159,20 @@ int DexlayoutDriver(int argc, char** argv) { } } + // Open profile file. + if (options_.profile_file_name_) { + int profile_fd = open(options_.profile_file_name_, O_RDONLY); + if (profile_fd < 0) { + fprintf(stderr, "Can't open %s\n", options_.profile_file_name_); + return 1; + } + profile_info_ = new ProfileCompilationInfo(); + if (!profile_info_->Load(profile_fd)) { + fprintf(stderr, "Can't read profile info from %s\n", options_.profile_file_name_); + return 1; + } + } + // Process all files supplied on command line. int result = 0; while (optind < argc) { diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc index a5dc5930af..d6006b2424 100644 --- a/oatdump/oatdump.cc +++ b/oatdump/oatdump.cc @@ -155,8 +155,11 @@ class OatSymbolizer FINAL { if (isa == kMips || isa == kMips64) { builder_->WriteMIPSabiflagsSection(); } - builder_->PrepareDynamicSection( - elf_file->GetPath(), rodata_size, text_size, oat_file_->BssSize()); + builder_->PrepareDynamicSection(elf_file->GetPath(), + rodata_size, + text_size, + oat_file_->BssSize(), + oat_file_->BssRootsOffset()); builder_->WriteDynamicSection(); Walk(); diff --git a/runtime/base/logging.cc b/runtime/base/logging.cc index d09e66f3c4..08c036e60e 100644 --- a/runtime/base/logging.cc +++ b/runtime/base/logging.cc @@ -64,7 +64,7 @@ static void RuntimeAborter(const char* abort_message) { #else UNUSED(abort_message); #endif - Runtime::Abort(); + Runtime::Abort(abort_message); } void InitLogging(char* argv[]) { diff --git a/runtime/class_table-inl.h b/runtime/class_table-inl.h index d52365df6d..3e54a647dc 100644 --- a/runtime/class_table-inl.h +++ b/runtime/class_table-inl.h @@ -18,6 +18,7 @@ #define ART_RUNTIME_CLASS_TABLE_INL_H_ #include "class_table.h" +#include "oat_file.h" namespace art { @@ -32,6 +33,11 @@ void ClassTable::VisitRoots(Visitor& visitor) { for (GcRoot<mirror::Object>& root : strong_roots_) { visitor.VisitRoot(root.AddressWithoutBarrier()); } + for (const OatFile* oat_file : oat_files_) { + for (GcRoot<mirror::Object>& root : oat_file->GetBssGcRoots()) { + visitor.VisitRootIfNonNull(root.AddressWithoutBarrier()); + } + } } template<class Visitor> @@ -45,6 +51,11 @@ void ClassTable::VisitRoots(const Visitor& visitor) { for (GcRoot<mirror::Object>& root : strong_roots_) { visitor.VisitRoot(root.AddressWithoutBarrier()); } + for (const OatFile* oat_file : oat_files_) { + for (GcRoot<mirror::Object>& root : oat_file->GetBssGcRoots()) { + visitor.VisitRootIfNonNull(root.AddressWithoutBarrier()); + } + } } template <typename Visitor> diff --git a/runtime/class_table.cc b/runtime/class_table.cc index 0600876122..2ae7e8cc30 100644 --- a/runtime/class_table.cc +++ b/runtime/class_table.cc @@ -165,6 +165,16 @@ bool ClassTable::InsertStrongRoot(mirror::Object* obj) { } } strong_roots_.push_back(GcRoot<mirror::Object>(obj)); + // If `obj` is a dex cache associated with a new oat file with GC roots, add it to oat_files_. + if (obj->IsDexCache()) { + const DexFile* dex_file = down_cast<mirror::DexCache*>(obj)->GetDexFile(); + if (dex_file != nullptr && dex_file->GetOatDexFile() != nullptr) { + const OatFile* oat_file = dex_file->GetOatDexFile()->GetOatFile(); + if (!oat_file->GetBssGcRoots().empty() && !ContainsElement(oat_files_, oat_file)) { + oat_files_.push_back(oat_file); + } + } + } return true; } @@ -201,6 +211,7 @@ void ClassTable::AddClassSet(ClassSet&& set) { void ClassTable::ClearStrongRoots() { WriterMutexLock mu(Thread::Current(), lock_); + oat_files_.clear(); strong_roots_.clear(); } } // namespace art diff --git a/runtime/class_table.h b/runtime/class_table.h index 8c91806ae2..acb15c7879 100644 --- a/runtime/class_table.h +++ b/runtime/class_table.h @@ -32,6 +32,8 @@ namespace art { +class OatFile; + namespace mirror { class ClassLoader; } // namespace mirror @@ -173,6 +175,8 @@ class ClassTable { // loader which may not be owned by the class loader must be held strongly live. Also dex caches // are held live to prevent them being unloading once they have classes in them. std::vector<GcRoot<mirror::Object>> strong_roots_ GUARDED_BY(lock_); + // Keep track of oat files with GC roots associated with dex caches in `strong_roots_`. + std::vector<const OatFile*> oat_files_ GUARDED_BY(lock_); friend class ImageWriter; // for InsertWithoutLocks. }; diff --git a/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc b/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc index 2cd0331a2c..4311d19216 100644 --- a/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc +++ b/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc @@ -18,10 +18,15 @@ #include "callee_save_frame.h" #include "entrypoints/entrypoint_utils-inl.h" #include "class_linker-inl.h" +#include "class_table-inl.h" #include "dex_file-inl.h" -#include "gc/accounting/card_table-inl.h" +#include "gc/heap.h" +#include "mirror/class-inl.h" +#include "mirror/class_loader.h" #include "mirror/object_array-inl.h" #include "mirror/object-inl.h" +#include "oat_file.h" +#include "runtime.h" namespace art { @@ -56,7 +61,20 @@ extern "C" mirror::String* artResolveStringFromCode(int32_t string_idx, Thread* REQUIRES_SHARED(Locks::mutator_lock_) { ScopedQuickEntrypointChecks sqec(self); auto* caller = GetCalleeSaveMethodCaller(self, Runtime::kSaveRefsOnly); - return ResolveStringFromCode(caller, string_idx); + mirror::String* result = ResolveStringFromCode(caller, string_idx); + if (LIKELY(result != nullptr)) { + // For AOT code, we need a write barrier for the dex cache that holds the GC roots in the .bss. + const DexFile* dex_file = caller->GetDexFile(); + if (dex_file != nullptr && + dex_file->GetOatDexFile() != nullptr && + !dex_file->GetOatDexFile()->GetOatFile()->GetBssGcRoots().empty()) { + mirror::ClassLoader* class_loader = caller->GetDeclaringClass()->GetClassLoader(); + // Note that we emit the barrier before the compiled code stores the string as GC root. + // This is OK as there is no suspend point point in between. + Runtime::Current()->GetHeap()->WriteBarrierEveryFieldOf(class_loader); + } + } + return result; } } // namespace art diff --git a/runtime/indirect_reference_table-inl.h b/runtime/indirect_reference_table-inl.h index f70503d62a..5cc1de209d 100644 --- a/runtime/indirect_reference_table-inl.h +++ b/runtime/indirect_reference_table-inl.h @@ -36,21 +36,27 @@ inline bool IndirectReferenceTable::GetChecked(IndirectRef iref) const { return false; } if (UNLIKELY(GetIndirectRefKind(iref) == kHandleScopeOrInvalid)) { - LOG(ERROR) << "JNI ERROR (app bug): invalid " << kind_ << " " << iref; - AbortIfNoCheckJNI(); + AbortIfNoCheckJNI(StringPrintf("JNI ERROR (app bug): invalid %s %p", + GetIndirectRefKindString(kind_), + iref)); return false; } const int topIndex = segment_state_.parts.topIndex; int idx = ExtractIndex(iref); if (UNLIKELY(idx >= topIndex)) { - LOG(ERROR) << "JNI ERROR (app bug): accessed stale " << kind_ << " " - << iref << " (index " << idx << " in a table of size " << topIndex << ")"; - AbortIfNoCheckJNI(); + std::string msg = StringPrintf( + "JNI ERROR (app bug): accessed stale %s %p (index %d in a table of size %d)", + GetIndirectRefKindString(kind_), + iref, + idx, + topIndex); + AbortIfNoCheckJNI(msg); return false; } if (UNLIKELY(table_[idx].GetReference()->IsNull())) { - LOG(ERROR) << "JNI ERROR (app bug): accessed deleted " << kind_ << " " << iref; - AbortIfNoCheckJNI(); + AbortIfNoCheckJNI(StringPrintf("JNI ERROR (app bug): accessed deleted %s %p", + GetIndirectRefKindString(kind_), + iref)); return false; } if (UNLIKELY(!CheckEntry("use", iref, idx))) { @@ -63,10 +69,13 @@ inline bool IndirectReferenceTable::GetChecked(IndirectRef iref) const { inline bool IndirectReferenceTable::CheckEntry(const char* what, IndirectRef iref, int idx) const { IndirectRef checkRef = ToIndirectRef(idx); if (UNLIKELY(checkRef != iref)) { - LOG(ERROR) << "JNI ERROR (app bug): attempt to " << what - << " stale " << kind_ << " " << iref - << " (should be " << checkRef << ")"; - AbortIfNoCheckJNI(); + std::string msg = StringPrintf( + "JNI ERROR (app bug): attempt to %s stale %s %p (should be %p)", + what, + GetIndirectRefKindString(kind_), + iref, + checkRef); + AbortIfNoCheckJNI(msg); return false; } return true; diff --git a/runtime/indirect_reference_table.cc b/runtime/indirect_reference_table.cc index 202e472685..b742ccc4df 100644 --- a/runtime/indirect_reference_table.cc +++ b/runtime/indirect_reference_table.cc @@ -32,6 +32,20 @@ namespace art { static constexpr bool kDumpStackOnNonLocalReference = false; +const char* GetIndirectRefKindString(const IndirectRefKind& kind) { + switch (kind) { + case kHandleScopeOrInvalid: + return "HandleScopeOrInvalid"; + case kLocal: + return "Local"; + case kGlobal: + return "Global"; + case kWeakGlobal: + return "WeakGlobal"; + } + return "IndirectRefKind Error"; +} + template<typename T> class MutatorLockedDumpable { public: @@ -58,12 +72,14 @@ std::ostream& operator<<(std::ostream& os, const MutatorLockedDumpable<T>& rhs) return os; } -void IndirectReferenceTable::AbortIfNoCheckJNI() { +void IndirectReferenceTable::AbortIfNoCheckJNI(const std::string& msg) { // If -Xcheck:jni is on, it'll give a more detailed error before aborting. JavaVMExt* vm = Runtime::Current()->GetJavaVM(); if (!vm->IsCheckJniEnabled()) { // Otherwise, we want to abort rather than hand back a bad reference. - LOG(FATAL) << "JNI ERROR (app bug): see above."; + LOG(FATAL) << msg; + } else { + LOG(ERROR) << msg; } } diff --git a/runtime/indirect_reference_table.h b/runtime/indirect_reference_table.h index 13c622583e..e194f792b5 100644 --- a/runtime/indirect_reference_table.h +++ b/runtime/indirect_reference_table.h @@ -119,6 +119,7 @@ enum IndirectRefKind { kWeakGlobal = 3 // <<weak global reference>> }; std::ostream& operator<<(std::ostream& os, const IndirectRefKind& rhs); +const char* GetIndirectRefKindString(const IndirectRefKind& kind); /* * Determine what kind of indirect reference this is. @@ -372,8 +373,8 @@ class IndirectReferenceTable { return reinterpret_cast<IndirectRef>(uref); } - // Abort if check_jni is not enabled. - static void AbortIfNoCheckJNI(); + // Abort if check_jni is not enabled. Otherwise, just log as an error. + static void AbortIfNoCheckJNI(const std::string& msg); /* extra debugging checks */ bool GetChecked(IndirectRef) const; diff --git a/runtime/monitor.cc b/runtime/monitor.cc index 3bc1b06741..debbdd508d 100644 --- a/runtime/monitor.cc +++ b/runtime/monitor.cc @@ -1123,6 +1123,13 @@ void Monitor::DescribeWait(std::ostream& os, const Thread* thread) { wait_message = " - waiting to lock "; pretty_object = thread->GetMonitorEnterObject(); if (pretty_object != nullptr) { + if (kUseReadBarrier && Thread::Current()->GetIsGcMarking()) { + // We may call Thread::Dump() in the middle of the CC thread flip and this thread's stack + // may have not been flipped yet and "pretty_object" may be a from-space (stale) ref, in + // which case the GetLockOwnerThreadId() call below will crash. So explicitly mark/forward + // it here. + pretty_object = ReadBarrier::Mark(pretty_object); + } lock_owner = pretty_object->GetLockOwnerThreadId(); } } diff --git a/runtime/oat_file.cc b/runtime/oat_file.cc index ea692cdaae..5f37b82a98 100644 --- a/runtime/oat_file.cc +++ b/runtime/oat_file.cc @@ -40,6 +40,7 @@ #include "base/unix_file/fd_file.h" #include "elf_file.h" #include "elf_utils.h" +#include "gc_root.h" #include "oat.h" #include "mem_map.h" #include "mirror/class.h" @@ -239,6 +240,8 @@ bool OatFileBase::ComputeFields(uint8_t* requested_base, } // Readjust to be non-inclusive upper bound. bss_end_ += sizeof(uint32_t); + // Find bss roots if present. + bss_roots_ = const_cast<uint8_t*>(FindDynamicSymbolAddress("oatbssroots", &symbol_error_msg)); } return true; @@ -291,8 +294,31 @@ bool OatFileBase::Setup(const char* abs_dex_location, std::string* error_msg) { return false; } + if (!IsAligned<alignof(GcRoot<mirror::Object>)>(bss_begin_) || + !IsAligned<alignof(GcRoot<mirror::Object>)>(bss_roots_) || + !IsAligned<alignof(GcRoot<mirror::Object>)>(bss_end_)) { + *error_msg = StringPrintf("In oat file '%s' found unaligned bss symbol(s): " + "begin = %p, roots = %p, end = %p", + GetLocation().c_str(), + bss_begin_, + bss_roots_, + bss_end_); + return false; + } + + if (bss_roots_ != nullptr && (bss_roots_ < bss_begin_ || bss_roots_ > bss_end_)) { + *error_msg = StringPrintf("In oat file '%s' found bss roots outside .bss: " + "%p is outside range [%p, %p]", + GetLocation().c_str(), + bss_roots_, + bss_begin_, + bss_end_); + return false; + } + PointerSize pointer_size = GetInstructionSetPointerSize(GetOatHeader().GetInstructionSet()); uint8_t* dex_cache_arrays = bss_begin_; + uint8_t* dex_cache_arrays_end = (bss_roots_ != nullptr) ? bss_roots_ : bss_end_; uint32_t dex_file_count = GetOatHeader().GetDexFileCount(); oat_dex_files_storage_.reserve(dex_file_count); for (size_t i = 0; i < dex_file_count; i++) { @@ -469,13 +495,13 @@ bool OatFileBase::Setup(const char* abs_dex_location, std::string* error_msg) { if (dex_cache_arrays != nullptr) { DexCacheArraysLayout layout(pointer_size, *header); if (layout.Size() != 0u) { - if (static_cast<size_t>(bss_end_ - dex_cache_arrays) < layout.Size()) { + if (static_cast<size_t>(dex_cache_arrays_end - dex_cache_arrays) < layout.Size()) { *error_msg = StringPrintf("In oat file '%s' found OatDexFile #%zu for '%s' with " "truncated dex cache arrays, %zu < %zu.", GetLocation().c_str(), i, dex_file_location.c_str(), - static_cast<size_t>(bss_end_ - dex_cache_arrays), + static_cast<size_t>(dex_cache_arrays_end - dex_cache_arrays), layout.Size()); return false; } @@ -506,9 +532,9 @@ bool OatFileBase::Setup(const char* abs_dex_location, std::string* error_msg) { } } - if (dex_cache_arrays != bss_end_) { + if (dex_cache_arrays != dex_cache_arrays_end) { // We expect the bss section to be either empty (dex_cache_arrays and bss_end_ - // both null) or contain just the dex cache arrays and nothing else. + // both null) or contain just the dex cache arrays and optionally some GC roots. *error_msg = StringPrintf("In oat file '%s' found unexpected bss size bigger by %zu bytes.", GetLocation().c_str(), static_cast<size_t>(bss_end_ - dex_cache_arrays)); @@ -1082,6 +1108,7 @@ OatFile::OatFile(const std::string& location, bool is_executable) end_(nullptr), bss_begin_(nullptr), bss_end_(nullptr), + bss_roots_(nullptr), is_executable_(is_executable), secondary_lookup_lock_("OatFile secondary lookup lock", kOatFileSecondaryLookupLock) { CHECK(!location_.empty()); @@ -1121,6 +1148,16 @@ const uint8_t* OatFile::DexEnd() const { return kIsVdexEnabled ? vdex_->End() : End(); } +ArrayRef<GcRoot<mirror::Object>> OatFile::GetBssGcRoots() const { + if (bss_roots_ != nullptr) { + auto* roots = reinterpret_cast<GcRoot<mirror::Object>*>(bss_roots_); + auto* roots_end = reinterpret_cast<GcRoot<mirror::Object>*>(bss_end_); + return ArrayRef<GcRoot<mirror::Object>>(roots, roots_end - roots); + } else { + return ArrayRef<GcRoot<mirror::Object>>(); + } +} + const OatFile::OatDexFile* OatFile::GetOatDexFile(const char* dex_location, const uint32_t* dex_location_checksum, std::string* error_msg) const { diff --git a/runtime/oat_file.h b/runtime/oat_file.h index a61b941862..c3188cbb09 100644 --- a/runtime/oat_file.h +++ b/runtime/oat_file.h @@ -21,6 +21,7 @@ #include <string> #include <vector> +#include "base/array_ref.h" #include "base/mutex.h" #include "base/stringpiece.h" #include "dex_file.h" @@ -38,6 +39,7 @@ namespace art { class BitVector; class ElfFile; +template <class MirrorType> class GcRoot; class MemMap; class OatMethodOffsets; class OatHeader; @@ -253,6 +255,10 @@ class OatFile { return BssEnd() - BssBegin(); } + size_t BssRootsOffset() const { + return bss_roots_ - BssBegin(); + } + size_t DexSize() const { return DexEnd() - DexBegin(); } @@ -266,6 +272,8 @@ class OatFile { const uint8_t* DexBegin() const; const uint8_t* DexEnd() const; + ArrayRef<GcRoot<mirror::Object>> GetBssGcRoots() const; + // Returns the absolute dex location for the encoded relative dex location. // // If not null, abs_dex_location is used to resolve the absolute dex @@ -314,6 +322,9 @@ class OatFile { // Pointer to the end of the .bss section, if present, otherwise null. uint8_t* bss_end_; + // Pointer to the beginning of the GC roots in .bss section, if present, otherwise null. + uint8_t* bss_roots_; + // Was this oat_file loaded executable? const bool is_executable_; diff --git a/runtime/runtime.cc b/runtime/runtime.cc index 7032565dab..d5f592078b 100644 --- a/runtime/runtime.cc +++ b/runtime/runtime.cc @@ -422,7 +422,7 @@ struct AbortState { } }; -void Runtime::Abort() { +void Runtime::Abort(const char* msg) { gAborting++; // set before taking any locks // Ensure that we don't have multiple threads trying to abort at once, @@ -437,6 +437,12 @@ void Runtime::Abort() { AbortState state; LOG(FATAL_WITHOUT_ABORT) << Dumpable<AbortState>(state); + // Sometimes we dump long messages, and the Android abort message only retains the first line. + // In those cases, just log the message again, to avoid logcat limits. + if (msg != nullptr && strchr(msg, '\n') != nullptr) { + LOG(FATAL_WITHOUT_ABORT) << msg; + } + // Call the abort hook if we have one. if (Runtime::Current() != nullptr && Runtime::Current()->abort_ != nullptr) { LOG(FATAL_WITHOUT_ABORT) << "Calling abort hook..."; diff --git a/runtime/runtime.h b/runtime/runtime.h index 30f1b4a431..84c6b6f247 100644 --- a/runtime/runtime.h +++ b/runtime/runtime.h @@ -225,7 +225,7 @@ class Runtime { // Aborts semi-cleanly. Used in the implementation of LOG(FATAL), which most // callers should prefer. - NO_RETURN static void Abort() REQUIRES(!Locks::abort_lock_); + NO_RETURN static void Abort(const char* msg) REQUIRES(!Locks::abort_lock_); // Returns the "main" ThreadGroup, used when attaching user threads. jobject GetMainThreadGroup() const; diff --git a/test/482-checker-loop-back-edge-use/src/Main.java b/test/482-checker-loop-back-edge-use/src/Main.java index f8f0aa3f0a..65dfd411fd 100644 --- a/test/482-checker-loop-back-edge-use/src/Main.java +++ b/test/482-checker-loop-back-edge-use/src/Main.java @@ -115,7 +115,9 @@ public class Main { // 'incoming' must have a use only at the first loop's back edge. for (long i = System.nanoTime(); i < 42; ++i) { System.out.println(incoming); - for (long j = System.currentTimeMillis(); j != 42; ++j) {} + for (long j = System.currentTimeMillis(); j != 42; ++j) { + System.out.print(j); // non-empty body + } } } diff --git a/test/552-checker-sharpening/src/Main.java b/test/552-checker-sharpening/src/Main.java index 2232ff43d2..3c053cf5ea 100644 --- a/test/552-checker-sharpening/src/Main.java +++ b/test/552-checker-sharpening/src/Main.java @@ -285,31 +285,27 @@ public class Main { /// CHECK: LoadString load_kind:DexCacheViaMethod /// CHECK-START-X86: java.lang.String Main.$noinline$getNonBootImageString() sharpening (after) - /// CHECK: LoadString load_kind:DexCachePcRelative + /// CHECK: LoadString load_kind:BssEntry /// CHECK-START-X86: java.lang.String Main.$noinline$getNonBootImageString() pc_relative_fixups_x86 (after) /// CHECK-DAG: X86ComputeBaseMethodAddress - /// CHECK-DAG: LoadString load_kind:DexCachePcRelative + /// CHECK-DAG: LoadString load_kind:BssEntry /// CHECK-START-X86_64: java.lang.String Main.$noinline$getNonBootImageString() sharpening (after) - /// CHECK: LoadString load_kind:DexCachePcRelative + /// CHECK: LoadString load_kind:BssEntry /// CHECK-START-ARM: java.lang.String Main.$noinline$getNonBootImageString() sharpening (after) - /// CHECK: LoadString load_kind:DexCachePcRelative - - /// CHECK-START-ARM: java.lang.String Main.$noinline$getNonBootImageString() dex_cache_array_fixups_arm (after) - /// CHECK-DAG: ArmDexCacheArraysBase - /// CHECK-DAG: LoadString load_kind:DexCachePcRelative + /// CHECK: LoadString load_kind:BssEntry /// CHECK-START-ARM64: java.lang.String Main.$noinline$getNonBootImageString() sharpening (after) - /// CHECK: LoadString load_kind:DexCachePcRelative + /// CHECK: LoadString load_kind:BssEntry /// CHECK-START-MIPS: java.lang.String Main.$noinline$getNonBootImageString() sharpening (after) - /// CHECK: LoadString load_kind:DexCachePcRelative + /// CHECK: LoadString load_kind:BssEntry - /// CHECK-START-MIPS: java.lang.String Main.$noinline$getNonBootImageString() dex_cache_array_fixups_mips (after) - /// CHECK-DAG: MipsDexCacheArraysBase - /// CHECK-DAG: LoadString load_kind:DexCachePcRelative + /// CHECK-START-MIPS: java.lang.String Main.$noinline$getNonBootImageString() pc_relative_fixups_mips (after) + /// CHECK-DAG: MipsComputeBaseMethodAddress + /// CHECK-DAG: LoadString load_kind:BssEntry public static String $noinline$getNonBootImageString() { // Prevent inlining to avoid the string comparison being optimized away. diff --git a/test/618-checker-induction/expected.txt b/test/618-checker-induction/expected.txt new file mode 100644 index 0000000000..b0aad4deb5 --- /dev/null +++ b/test/618-checker-induction/expected.txt @@ -0,0 +1 @@ +passed diff --git a/test/618-checker-induction/info.txt b/test/618-checker-induction/info.txt new file mode 100644 index 0000000000..0c5ea55dde --- /dev/null +++ b/test/618-checker-induction/info.txt @@ -0,0 +1 @@ +Test on loop optimizations on induction. diff --git a/test/618-checker-induction/src/Main.java b/test/618-checker-induction/src/Main.java new file mode 100644 index 0000000000..a68c383c0a --- /dev/null +++ b/test/618-checker-induction/src/Main.java @@ -0,0 +1,422 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Tests on loop optimizations related to induction. + */ +public class Main { + + static int[] a = new int[10]; + + /// CHECK-START: void Main.deadSingleLoop() loop_optimization (before) + /// CHECK-DAG: Phi loop:{{B\d+}} outer_loop:none + // + /// CHECK-START: void Main.deadSingleLoop() loop_optimization (after) + /// CHECK-NOT: Phi loop:{{B\d+}} outer_loop:none + static void deadSingleLoop() { + for (int i = 0; i < 4; i++) { + } + } + + /// CHECK-START: void Main.deadNestedLoops() loop_optimization (before) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: Phi loop:{{B\d+}} outer_loop:<<Loop>> + // + /// CHECK-START: void Main.deadNestedLoops() loop_optimization (after) + /// CHECK-NOT: Phi loop:{{B\d+}} + static void deadNestedLoops() { + for (int i = 0; i < 4; i++) { + for (int j = 0; j < 4; j++) { + } + } + } + + /// CHECK-START: void Main.deadNestedAndFollowingLoops() loop_optimization (before) + /// CHECK-DAG: Phi loop:<<Loop1:B\d+>> outer_loop:none + /// CHECK-DAG: Phi loop:<<Loop2:B\d+>> outer_loop:<<Loop1>> + /// CHECK-DAG: Phi loop:{{B\d+}} outer_loop:<<Loop2>> + /// CHECK-DAG: Phi loop:{{B\d+}} outer_loop:<<Loop2>> + /// CHECK-DAG: Phi loop:<<Loop3:B\d+>> outer_loop:<<Loop1>> + /// CHECK-DAG: Phi loop:{{B\d+}} outer_loop:<<Loop3>> + /// CHECK-DAG: Phi loop:{{B\d+}} outer_loop:none + // + /// CHECK-START: void Main.deadNestedAndFollowingLoops() loop_optimization (after) + /// CHECK-NOT: Phi loop:{{B\d+}} + static void deadNestedAndFollowingLoops() { + for (int i = 0; i < 4; i++) { + for (int j = 0; j < 4; j++) { + for (int k = 0; k < 4; k++) { + } + for (int k = 0; k < 4; k++) { + } + } + for (int j = 0; j < 4; j++) { + for (int k = 0; k < 4; k++) { + } + } + } + for (int i = 0; i < 4; i++) { + } + } + + /// CHECK-START: void Main.deadInduction() loop_optimization (before) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: Phi loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet loop:<<Loop>> outer_loop:none + // + /// CHECK-START: void Main.deadInduction() loop_optimization (after) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-NOT: Phi loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet loop:<<Loop>> outer_loop:none + static void deadInduction() { + int dead = 0; + for (int i = 0; i < a.length; i++) { + a[i] = 1; + dead += 5; + } + } + + /// CHECK-START: void Main.deadManyInduction() loop_optimization (before) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: Phi loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Phi loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Phi loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet loop:<<Loop>> outer_loop:none + // + /// CHECK-START: void Main.deadManyInduction() loop_optimization (after) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-NOT: Phi loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet loop:<<Loop>> outer_loop:none + static void deadManyInduction() { + int dead1 = 0, dead2 = 1, dead3 = 3; + for (int i = 0; i < a.length; i++) { + dead1 += 5; + a[i] = 2; + dead2 += 10; + dead3 += 100; + } + } + + /// CHECK-START: void Main.deadSequence() loop_optimization (before) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: Phi loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet loop:<<Loop>> outer_loop:none + // + /// CHECK-START: void Main.deadSequence() loop_optimization (after) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-NOT: Phi loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet loop:<<Loop>> outer_loop:none + static void deadSequence() { + int dead = 0; + for (int i = 0; i < a.length; i++) { + a[i] = 3; + // Increment value defined inside loop, + // but sequence itself not used anywhere. + dead += i; + } + } + + /// CHECK-START: void Main.deadCycleWithException(int) loop_optimization (before) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: Phi loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArrayGet loop:<<Loop>> outer_loop:none + // + /// CHECK-START: void Main.deadCycleWithException(int) loop_optimization (after) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-NOT: Phi loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArrayGet loop:<<Loop>> outer_loop:none + static void deadCycleWithException(int k) { + int dead = 0; + for (int i = 0; i < a.length; i++) { + a[i] = 4; + // Increment value of dead cycle may throw exception. + dead += a[k]; + } + } + + /// CHECK-START: int Main.closedFormInductionUp() loop_optimization (before) + /// CHECK-DAG: <<Phi1:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:i\d+>> Phi loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Return [<<Phi1>>] loop:none + // + /// CHECK-START: int Main.closedFormInductionUp() loop_optimization (after) + /// CHECK-NOT: Phi loop:B\d+ outer_loop:none + /// CHECK-DAG: Return loop:none + static int closedFormInductionUp() { + int closed = 12345; + for (int i = 0; i < 10; i++) { + closed += 5; + } + return closed; // only needs last value + } + + /// CHECK-START: int Main.closedFormInductionInAndDown(int) loop_optimization (before) + /// CHECK-DAG: <<Phi1:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:i\d+>> Phi loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Return [<<Phi2>>] loop:none + // + /// CHECK-START: int Main.closedFormInductionInAndDown(int) loop_optimization (after) + /// CHECK-NOT: Phi loop:B\d+ outer_loop:none + /// CHECK-DAG: Return loop:none + static int closedFormInductionInAndDown(int closed) { + for (int i = 0; i < 10; i++) { + closed -= 5; + } + return closed; // only needs last value + } + + // TODO: taken test around closed form? + static int closedFormInductionUpN(int n) { + int closed = 12345; + for (int i = 0; i < n; i++) { + closed += 5; + } + return closed; // only needs last value + } + + // TODO: taken test around closed form? + static int closedFormInductionInAndDownN(int closed, int n) { + for (int i = 0; i < n; i++) { + closed -= 5; + } + return closed; // only needs last value + } + + // TODO: move closed form even further out? + static int closedFormNested(int n) { + int closed = 0; + for (int i = 0; i < n; i++) { + for (int j = 0; j < 10; j++) { + closed++; + } + } + return closed; // only needs last-value + } + + // TODO: handle as closed/empty eventually? + static int mainIndexReturned(int n) { + int i; + for (i = 0; i < n; i++); + return i; + } + + // If ever replaced by closed form, last value should be correct! + static int periodicReturned(int n) { + int k = 0; + for (int i = 0; i < n; i++) { + k = 1 - k; + } + return k; + } + + // Same here. + private static int getSum(int n) { + int k = 0; + int sum = 0; + for (int i = 0; i < n; i++) { + k++; + sum += k; + } + return sum; + } + + // Same here. + private static int getSum21() { + int k = 0; + int sum = 0; + for (int i = 0; i < 6; i++) { + k++; + sum += k; + } + return sum; + } + + // Same here. + private static int closedTwice() { + int closed = 0; + for (int i = 0; i < 10; i++) { + closed++; + } + // Closed form of first loop defines trip count of second loop. + int other_closed = 0; + for (int i = 0; i < closed; i++) { + other_closed++; + } + return other_closed; + } + + /// CHECK-START: int Main.closedFeed() loop_optimization (before) + /// CHECK-DAG: <<Phi1:i\d+>> Phi loop:<<Loop1:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:i\d+>> Phi loop:<<Loop1>> outer_loop:none + /// CHECK-DAG: <<Phi3:i\d+>> Phi loop:<<Loop2:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi4:i\d+>> Phi loop:<<Loop2>> outer_loop:none + /// CHECK-DAG: Return [<<Phi3>>] loop:none + /// CHECK-EVAL: "<<Loop1>>" != "<<Loop2>>" + // + /// CHECK-START: int Main.closedFeed() loop_optimization (after) + /// CHECK-NOT: Phi loop:B\d+ outer_loop:none + /// CHECK-DAG: Return loop:none + private static int closedFeed() { + int closed = 0; + for (int i = 0; i < 10; i++) { + closed++; + } + // Closed form of first loop feeds into initial value of second loop, + // used when generating closed form for the latter. + for (int i = 0; i < 10; i++) { + closed++; + } + return closed; + } + + /// CHECK-START: int Main.closedLargeUp() loop_optimization (before) + /// CHECK-DAG: <<Phi1:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:i\d+>> Phi loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Return [<<Phi1>>] loop:none + // + /// CHECK-START: int Main.closedLargeUp() loop_optimization (after) + /// CHECK-NOT: Phi loop:B\d+ outer_loop:none + /// CHECK-DAG: Return loop:none + private static int closedLargeUp() { + int closed = 0; + for (int i = 0; i < 10; i++) { + closed += 0x7fffffff; + } + return closed; + } + + /// CHECK-START: int Main.closedLargeDown() loop_optimization (before) + /// CHECK-DAG: <<Phi1:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:i\d+>> Phi loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Return [<<Phi1>>] loop:none + // + /// CHECK-START: int Main.closedLargeDown() loop_optimization (after) + /// CHECK-NOT: Phi loop:B\d+ outer_loop:none + /// CHECK-DAG: Return loop:none + private static int closedLargeDown() { + int closed = 0; + for (int i = 0; i < 10; i++) { + closed -= 0x7fffffff; + } + return closed; + } + + private static int exceptionExitBeforeAdd() { + int k = 0; + try { + for (int i = 0; i < 10; i++) { + a[i] = 0; + k += 10; // increment last + } + } catch(Exception e) { + // Flag error by returning current + // value of k negated. + return -k-1; + } + return k; + } + + private static int exceptionExitAfterAdd() { + int k = 0; + try { + for (int i = 0; i < 10; i++) { + k += 10; // increment first + a[i] = 0; + } + } catch(Exception e) { + // Flag error by returning current + // value of k negated. + return -k-1; + } + return k; + } + + public static void main(String[] args) { + deadSingleLoop(); + deadNestedLoops(); + deadNestedAndFollowingLoops(); + + deadInduction(); + for (int i = 0; i < a.length; i++) { + expectEquals(1, a[i]); + } + deadManyInduction(); + for (int i = 0; i < a.length; i++) { + expectEquals(2, a[i]); + } + deadSequence(); + for (int i = 0; i < a.length; i++) { + expectEquals(3, a[i]); + } + try { + deadCycleWithException(-1); + throw new Error("Expected: IOOB exception"); + } catch (IndexOutOfBoundsException e) { + } + for (int i = 0; i < a.length; i++) { + expectEquals(i == 0 ? 4 : 3, a[i]); + } + deadCycleWithException(0); + for (int i = 0; i < a.length; i++) { + expectEquals(4, a[i]); + } + + int c = closedFormInductionUp(); + expectEquals(12395, c); + c = closedFormInductionInAndDown(12345); + expectEquals(12295, c); + for (int n = -4; n < 10; n++) { + int tc = (n <= 0) ? 0 : n; + c = closedFormInductionUpN(n); + expectEquals(12345 + tc * 5, c); + c = closedFormInductionInAndDownN(12345, n); + expectEquals(12345 - tc * 5, c); + c = closedFormNested(n); + expectEquals(tc * 10, c); + } + + for (int n = -4; n < 4; n++) { + int tc = (n <= 0) ? 0 : n; + expectEquals(tc, mainIndexReturned(n)); + expectEquals(tc & 1, periodicReturned(n)); + expectEquals((tc * (tc + 1)) / 2, getSum(n)); + } + expectEquals(21, getSum21()); + expectEquals(10, closedTwice()); + expectEquals(20, closedFeed()); + expectEquals(-10, closedLargeUp()); + expectEquals(10, closedLargeDown()); + + expectEquals(100, exceptionExitBeforeAdd()); + expectEquals(100, exceptionExitAfterAdd()); + a = null; + expectEquals(-1, exceptionExitBeforeAdd()); + expectEquals(-11, exceptionExitAfterAdd()); + a = new int[4]; + expectEquals(-41, exceptionExitBeforeAdd()); + expectEquals(-51, exceptionExitAfterAdd()); + + System.out.println("passed"); + } + + private static void expectEquals(int expected, int result) { + if (expected != result) { + throw new Error("Expected: " + expected + ", found: " + result); + } + } +} diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk index 33108ddc43..64643218ea 100644 --- a/test/Android.run-test.mk +++ b/test/Android.run-test.mk @@ -233,11 +233,9 @@ ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES), # Disable 149-suspend-all-stress, its output is flaky (b/28988206). # Disable 577-profile-foreign-dex (b/27454772). -# Disable 552-checker-sharpening, until compiler component of new string dex cache is added (@cwadsworth, @vmarko) TEST_ART_BROKEN_ALL_TARGET_TESTS := \ 149-suspend-all-stress \ 577-profile-foreign-dex \ - 552-checker-sharpening \ ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \ $(COMPILER_TYPES), $(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \ |