diff options
242 files changed, 9866 insertions, 2854 deletions
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk index 11af1c0ca8..0a465c4c22 100644 --- a/build/Android.gtest.mk +++ b/build/Android.gtest.mk @@ -83,16 +83,16 @@ ART_TEST_HOST_GTEST_VerifierDepsMulti_DEX := $(dir $(ART_TEST_HOST_GTEST_Main_DE ART_TEST_TARGET_GTEST_VerifierDepsMulti_DEX := $(dir $(ART_TEST_TARGET_GTEST_Main_DEX))$(subst Main,VerifierDepsMulti,$(basename $(notdir $(ART_TEST_TARGET_GTEST_Main_DEX))))$(suffix $(ART_TEST_TARGET_GTEST_Main_DEX)) $(ART_TEST_HOST_GTEST_VerifierDeps_DEX): $(ART_TEST_GTEST_VerifierDeps_SRC) $(HOST_OUT_EXECUTABLES)/smali - $(HOST_OUT_EXECUTABLES)/smali --output=$@ $(filter %.smali,$^) + $(HOST_OUT_EXECUTABLES)/smali assemble --output $@ $(filter %.smali,$^) $(ART_TEST_TARGET_GTEST_VerifierDeps_DEX): $(ART_TEST_GTEST_VerifierDeps_SRC) $(HOST_OUT_EXECUTABLES)/smali - $(HOST_OUT_EXECUTABLES)/smali --output=$@ $(filter %.smali,$^) + $(HOST_OUT_EXECUTABLES)/smali assemble --output $@ $(filter %.smali,$^) $(ART_TEST_HOST_GTEST_VerifierDepsMulti_DEX): $(ART_TEST_GTEST_VerifierDepsMulti_SRC) $(HOST_OUT_EXECUTABLES)/smali - $(HOST_OUT_EXECUTABLES)/smali --output=$@ $(filter %.smali,$^) + $(HOST_OUT_EXECUTABLES)/smali assemble --output $@ $(filter %.smali,$^) $(ART_TEST_TARGET_GTEST_VerifierDepsMulti_DEX): $(ART_TEST_GTEST_VerifierDepsMulti_SRC) $(HOST_OUT_EXECUTABLES)/smali - $(HOST_OUT_EXECUTABLES)/smali --output=$@ $(filter %.smali,$^) + $(HOST_OUT_EXECUTABLES)/smali assemble --output $@ $(filter %.smali,$^) # Dex file dependencies for each gtest. ART_GTEST_dex2oat_environment_tests_DEX_DEPS := Main MainStripped MultiDex MultiDexModifiedSecondary Nested @@ -171,6 +171,12 @@ ART_GTEST_dex2oat_test_TARGET_DEPS := \ # TODO: document why this is needed. ART_GTEST_proxy_test_HOST_DEPS := $(HOST_CORE_IMAGE_DEFAULT_64) $(HOST_CORE_IMAGE_DEFAULT_32) +# The dexdiag test requires the dexdiag utility. +ART_GTEST_dexdiag_test_HOST_DEPS := \ + $(HOST_OUT_EXECUTABLES)/dexdiag +ART_GTEST_dexdiag_test_TARGET_DEPS := \ + dexdiag + # The dexdump test requires an image and the dexdump utility. # TODO: rename into dexdump when migration completes ART_GTEST_dexdump_test_HOST_DEPS := \ @@ -227,6 +233,8 @@ ART_GTEST_oatdump_test_TARGET_DEPS := \ $(TARGET_CORE_IMAGE_DEFAULT_64) \ $(TARGET_CORE_IMAGE_DEFAULT_32) \ oatdump +ART_GTEST_oatdump_image_test_HOST_DEPS := $(ART_GTEST_oatdump_test_HOST_DEPS) +ART_GTEST_oatdump_image_test_TARGET_DEPS := $(ART_GTEST_oatdump_test_TARGET_DEPS) # Profile assistant tests requires profman utility. ART_GTEST_profile_assistant_test_HOST_DEPS := \ @@ -242,6 +250,7 @@ ART_TEST_MODULES := \ art_compiler_tests \ art_compiler_host_tests \ art_dex2oat_tests \ + art_dexdiag_tests \ art_dexdump_tests \ art_dexlayout_tests \ art_dexlist_tests \ diff --git a/build/Android.oat.mk b/build/Android.oat.mk index c733febd06..3f9ea15fb3 100644 --- a/build/Android.oat.mk +++ b/build/Android.oat.mk @@ -56,11 +56,11 @@ define create-core-oat-host-rules core_dex2oat_dependency := $(DEX2OAT) endif ifeq ($(1),interpreter) - core_compile_options += --compiler-filter=interpret-only + core_compile_options += --compiler-filter=quicken core_infix := -interpreter endif ifeq ($(1),interp-ac) - core_compile_options += --compiler-filter=verify-at-runtime --runtime-arg -Xverify:softfail + core_compile_options += --compiler-filter=extract --runtime-arg -Xverify:softfail core_infix := -interp-ac endif ifneq ($(filter-out interpreter interp-ac optimizing,$(1)),) @@ -166,11 +166,11 @@ define create-core-oat-target-rules core_dex2oat_dependency := $(DEX2OAT) endif ifeq ($(1),interpreter) - core_compile_options += --compiler-filter=interpret-only + core_compile_options += --compiler-filter=quicken core_infix := -interpreter endif ifeq ($(1),interp-ac) - core_compile_options += --compiler-filter=verify-at-runtime --runtime-arg -Xverify:softfail + core_compile_options += --compiler-filter=extract --runtime-arg -Xverify:softfail core_infix := -interp-ac endif ifneq ($(filter-out interpreter interp-ac optimizing,$(1)),) diff --git a/cmdline/cmdline_parser.h b/cmdline/cmdline_parser.h index d82fd488e9..32480dd915 100644 --- a/cmdline/cmdline_parser.h +++ b/cmdline/cmdline_parser.h @@ -612,7 +612,7 @@ struct CmdlineParser { template <typename TVariantMap, template <typename TKeyValue> class TVariantMapKey> template <typename TArg> -CmdlineParser<TVariantMap, TVariantMapKey>::ArgumentBuilder<TArg> +typename CmdlineParser<TVariantMap, TVariantMapKey>::template ArgumentBuilder<TArg> CmdlineParser<TVariantMap, TVariantMapKey>::CreateArgumentBuilder( CmdlineParser<TVariantMap, TVariantMapKey>::Builder& parent) { return CmdlineParser<TVariantMap, TVariantMapKey>::ArgumentBuilder<TArg>( diff --git a/compiler/Android.bp b/compiler/Android.bp index dec8b577d8..a2b07af810 100644 --- a/compiler/Android.bp +++ b/compiler/Android.bp @@ -28,7 +28,6 @@ art_cc_defaults { "compiled_method.cc", "debug/elf_debug_writer.cc", "dex/dex_to_dex_compiler.cc", - "dex/dex_to_dex_decompiler.cc", "dex/inline_method_analyser.cc", "dex/verified_method.cc", "dex/verification_results.cc", @@ -339,6 +338,7 @@ art_cc_test { "elf_writer_test.cc", "exception_test.cc", "image_test.cc", + "image_write_read_test.cc", "jni/jni_compiler_test.cc", "linker/multi_oat_relative_patcher_test.cc", "linker/output_stream_test.cc", diff --git a/compiler/dex/dex_to_dex_decompiler_test.cc b/compiler/dex/dex_to_dex_decompiler_test.cc index 9a8d27cd03..e486e2e6ec 100644 --- a/compiler/dex/dex_to_dex_decompiler_test.cc +++ b/compiler/dex/dex_to_dex_decompiler_test.cc @@ -14,7 +14,7 @@ * limitations under the License. */ -#include "dex/dex_to_dex_decompiler.h" +#include "dex_to_dex_decompiler.h" #include "class_linker.h" #include "compiler/common_compiler_test.h" @@ -38,7 +38,7 @@ class DexToDexDecompilerTest : public CommonCompilerTest { TimingLogger timings("CompilerDriverTest::CompileAll", false, false); TimingLogger::ScopedTiming t(__FUNCTION__, &timings); compiler_options_->boot_image_ = false; - compiler_options_->SetCompilerFilter(CompilerFilter::kInterpretOnly); + compiler_options_->SetCompilerFilter(CompilerFilter::kQuicken); compiler_driver_->CompileAll(class_loader, GetDexFiles(class_loader), /* verifier_deps */ nullptr, diff --git a/compiler/dex/verification_results.cc b/compiler/dex/verification_results.cc index 00a7d44bac..3f0df3b2c8 100644 --- a/compiler/dex/verification_results.cc +++ b/compiler/dex/verification_results.cc @@ -104,11 +104,12 @@ void VerificationResults::CreateVerifiedMethodFor(MethodReference ref) { // This method should only be called for classes verified at compile time, // which have no verifier error, nor has methods that we know will throw // at runtime. - AtomicMap::InsertResult result = atomic_verified_methods_.Insert( + atomic_verified_methods_.Insert( ref, /*expected*/ nullptr, new VerifiedMethod(/* encountered_error_types */ 0, /* has_runtime_throw */ false)); - DCHECK_EQ(result, AtomicMap::kInsertResultSuccess); + // We don't check the result of `Insert` as we could insert twice for the same + // MethodReference in the presence of duplicate methods. } void VerificationResults::AddRejectedClass(ClassReference ref) { @@ -126,7 +127,7 @@ bool VerificationResults::IsClassRejected(ClassReference ref) { bool VerificationResults::IsCandidateForCompilation(MethodReference&, const uint32_t access_flags) { - if (!compiler_options_->IsBytecodeCompilationEnabled()) { + if (!compiler_options_->IsAotCompilationEnabled()) { return false; } // Don't compile class initializers unless kEverything. diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc index f77b3ddfe0..a8ab7c6091 100644 --- a/compiler/driver/compiler_driver.cc +++ b/compiler/driver/compiler_driver.cc @@ -45,7 +45,6 @@ #include "dex_file-inl.h" #include "dex_instruction-inl.h" #include "dex/dex_to_dex_compiler.h" -#include "dex/dex_to_dex_decompiler.h" #include "dex/verification_results.h" #include "dex/verified_method.h" #include "driver/compiler_options.h" @@ -421,7 +420,7 @@ INTRINSICS_LIST(SETUP_INTRINSICS) // Compile: // 1) Compile all classes and methods enabled for compilation. May fall back to dex-to-dex // compilation. - if (GetCompilerOptions().IsAnyMethodCompilationEnabled()) { + if (GetCompilerOptions().IsAnyCompilationEnabled()) { Compile(class_loader, dex_files, timings); } if (dump_stats_) { @@ -431,61 +430,6 @@ INTRINSICS_LIST(SETUP_INTRINSICS) FreeThreadPools(); } -// In-place unquicken the given `dex_files` based on `quickening_info`. -static void Unquicken(const std::vector<const DexFile*>& dex_files, - const ArrayRef<const uint8_t>& quickening_info, - bool decompile_return_instruction) { - const uint8_t* quickening_info_ptr = quickening_info.data(); - const uint8_t* const quickening_info_end = quickening_info.data() + quickening_info.size(); - for (const DexFile* dex_file : dex_files) { - for (uint32_t i = 0; i < dex_file->NumClassDefs(); ++i) { - const DexFile::ClassDef& class_def = dex_file->GetClassDef(i); - const uint8_t* class_data = dex_file->GetClassData(class_def); - if (class_data == nullptr) { - continue; - } - ClassDataItemIterator it(*dex_file, class_data); - // Skip fields - while (it.HasNextStaticField()) { - it.Next(); - } - while (it.HasNextInstanceField()) { - it.Next(); - } - - while (it.HasNextDirectMethod()) { - const DexFile::CodeItem* code_item = it.GetMethodCodeItem(); - if (code_item != nullptr) { - uint32_t quickening_size = *reinterpret_cast<const uint32_t*>(quickening_info_ptr); - quickening_info_ptr += sizeof(uint32_t); - optimizer::ArtDecompileDEX(*code_item, - ArrayRef<const uint8_t>(quickening_info_ptr, quickening_size), - decompile_return_instruction); - quickening_info_ptr += quickening_size; - } - it.Next(); - } - - while (it.HasNextVirtualMethod()) { - const DexFile::CodeItem* code_item = it.GetMethodCodeItem(); - if (code_item != nullptr) { - uint32_t quickening_size = *reinterpret_cast<const uint32_t*>(quickening_info_ptr); - quickening_info_ptr += sizeof(uint32_t); - optimizer::ArtDecompileDEX(*code_item, - ArrayRef<const uint8_t>(quickening_info_ptr, quickening_size), - decompile_return_instruction); - quickening_info_ptr += quickening_size; - } - it.Next(); - } - DCHECK(!it.HasNext()); - } - } - if (quickening_info_ptr != quickening_info_end) { - LOG(FATAL) << "Failed to use all quickening info"; - } -} - void CompilerDriver::CompileAll(jobject class_loader, const std::vector<const DexFile*>& dex_files, VdexFile* vdex_file, @@ -494,15 +438,12 @@ void CompilerDriver::CompileAll(jobject class_loader, // TODO: we unquicken unconditionnally, as we don't know // if the boot image has changed. How exactly we'll know is under // experimentation. - if (vdex_file->GetQuickeningInfo().size() != 0) { - TimingLogger::ScopedTiming t("Unquicken", timings); - // We do not decompile a RETURN_VOID_NO_BARRIER into a RETURN_VOID, as the quickening - // optimization does not depend on the boot image (the optimization relies on not - // having final fields in a class, which does not change for an app). - Unquicken(dex_files, - vdex_file->GetQuickeningInfo(), - /* decompile_return_instruction */ false); - } + TimingLogger::ScopedTiming t("Unquicken", timings); + // We do not decompile a RETURN_VOID_NO_BARRIER into a RETURN_VOID, as the quickening + // optimization does not depend on the boot image (the optimization relies on not + // having final fields in a class, which does not change for an app). + VdexFile::Unquicken(dex_files, vdex_file->GetQuickeningInfo()); + Runtime::Current()->GetCompilerCallbacks()->SetVerifierDeps( new verifier::VerifierDeps(dex_files, vdex_file->GetVerifierDepsData())); } @@ -514,7 +455,7 @@ static optimizer::DexToDexCompilationLevel GetDexToDexCompilationLevel( const DexFile& dex_file, const DexFile::ClassDef& class_def) REQUIRES_SHARED(Locks::mutator_lock_) { auto* const runtime = Runtime::Current(); - DCHECK(driver.GetCompilerOptions().IsAnyMethodCompilationEnabled()); + DCHECK(driver.GetCompilerOptions().IsQuickeningCompilationEnabled()); const char* descriptor = dex_file.GetClassDescriptor(class_def); ClassLinker* class_linker = runtime->GetClassLinker(); mirror::Class* klass = class_linker->FindClass(self, descriptor, class_loader); @@ -986,7 +927,8 @@ void CompilerDriver::PreCompile(jobject class_loader, LoadImageClasses(timings); VLOG(compiler) << "LoadImageClasses: " << GetMemoryUsageString(false); - if (compiler_options_->IsAnyMethodCompilationEnabled()) { + if (compiler_options_->IsAnyCompilationEnabled()) { + // Resolve eagerly to prepare for compilation. Resolve(class_loader, dex_files, timings); VLOG(compiler) << "Resolve: " << GetMemoryUsageString(false); } @@ -1014,7 +956,7 @@ void CompilerDriver::PreCompile(jobject class_loader, << "situations. Please check the log."; } - if (compiler_options_->IsAnyMethodCompilationEnabled()) { + if (compiler_options_->IsAnyCompilationEnabled()) { if (kIsDebugBuild) { EnsureVerifiedOrVerifyAtRuntime(class_loader, dex_files); } @@ -2017,7 +1959,7 @@ bool CompilerDriver::FastVerify(jobject jclass_loader, return false; } - bool compiler_only_verifies = !GetCompilerOptions().IsAnyMethodCompilationEnabled(); + bool compiler_only_verifies = !GetCompilerOptions().IsAnyCompilationEnabled(); // We successfully validated the dependencies, now update class status // of verified classes. Note that the dependencies also record which classes @@ -2088,16 +2030,18 @@ void CompilerDriver::Verify(jobject jclass_loader, } } - // Note: verification should not be pulling in classes anymore when compiling the boot image, - // as all should have been resolved before. As such, doing this in parallel should still - // be deterministic. + // Verification updates VerifierDeps and needs to run single-threaded to be deterministic. + bool force_determinism = GetCompilerOptions().IsForceDeterminism(); + ThreadPool* verify_thread_pool = + force_determinism ? single_thread_pool_.get() : parallel_thread_pool_.get(); + size_t verify_thread_count = force_determinism ? 1U : parallel_thread_count_; for (const DexFile* dex_file : dex_files) { CHECK(dex_file != nullptr); VerifyDexFile(jclass_loader, *dex_file, dex_files, - parallel_thread_pool_.get(), - parallel_thread_count_, + verify_thread_pool, + verify_thread_count, timings); } diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h index 874e35716c..fbab9dfbaf 100644 --- a/compiler/driver/compiler_driver.h +++ b/compiler/driver/compiler_driver.h @@ -179,6 +179,40 @@ class CompilerDriver { uint16_t class_def_index, bool requires) REQUIRES(!requires_constructor_barrier_lock_); + + // Do the <init> methods for this class require a constructor barrier (prior to the return)? + // The answer is "yes", if and only if this class has any instance final fields. + // (This must not be called for any non-<init> methods; the answer would be "no"). + // + // --- + // + // JLS 17.5.1 "Semantics of final fields" mandates that all final fields are frozen at the end + // of the invoked constructor. The constructor barrier is a conservative implementation means of + // enforcing the freezes happen-before the object being constructed is observable by another + // thread. + // + // Note: This question only makes sense for instance constructors; + // static constructors (despite possibly having finals) never need + // a barrier. + // + // JLS 12.4.2 "Detailed Initialization Procedure" approximately describes + // class initialization as: + // + // lock(class.lock) + // class.state = initializing + // unlock(class.lock) + // + // invoke <clinit> + // + // lock(class.lock) + // class.state = initialized + // unlock(class.lock) <-- acts as a release + // + // The last operation in the above example acts as an atomic release + // for any stores in <clinit>, which ends up being stricter + // than what a constructor barrier needs. + // + // See also QuasiAtomic::ThreadFenceForConstructor(). bool RequiresConstructorBarrier(Thread* self, const DexFile* dex_file, uint16_t class_def_index) diff --git a/compiler/driver/compiler_driver_test.cc b/compiler/driver/compiler_driver_test.cc index 42ff1e748a..17854fd61a 100644 --- a/compiler/driver/compiler_driver_test.cc +++ b/compiler/driver/compiler_driver_test.cc @@ -325,7 +325,7 @@ TEST_F(CompilerDriverProfileTest, ProfileGuidedCompilation) { class CompilerDriverVerifyTest : public CompilerDriverTest { protected: CompilerFilter::Filter GetCompilerFilter() const OVERRIDE { - return CompilerFilter::kVerifyProfile; + return CompilerFilter::kVerify; } void CheckVerifiedClass(jobject class_loader, const std::string& clazz) const { diff --git a/compiler/driver/compiler_options.cc b/compiler/driver/compiler_options.cc index a0c0a2acf6..a4e2083fe4 100644 --- a/compiler/driver/compiler_options.cc +++ b/compiler/driver/compiler_options.cc @@ -200,7 +200,7 @@ bool CompilerOptions::ParseCompilerOption(const StringPiece& option, UsageFn Usa ParseDumpInitFailures(option, Usage); } else if (option.starts_with("--dump-cfg=")) { dump_cfg_file_name_ = option.substr(strlen("--dump-cfg=")).data(); - } else if (option.starts_with("--dump-cfg-append")) { + } else if (option == "--dump-cfg-append") { dump_cfg_append_ = true; } else if (option.starts_with("--register-allocation-strategy=")) { ParseRegisterAllocationStrategy(option, Usage); diff --git a/compiler/driver/compiler_options.h b/compiler/driver/compiler_options.h index 2376fbf5f5..957ea99c49 100644 --- a/compiler/driver/compiler_options.h +++ b/compiler/driver/compiler_options.h @@ -84,32 +84,32 @@ class CompilerOptions FINAL { compiler_filter_ = compiler_filter; } - bool VerifyAtRuntime() const { - return compiler_filter_ == CompilerFilter::kVerifyAtRuntime; - } - - bool IsBytecodeCompilationEnabled() const { - return CompilerFilter::IsBytecodeCompilationEnabled(compiler_filter_); + bool IsAotCompilationEnabled() const { + return CompilerFilter::IsAotCompilationEnabled(compiler_filter_); } bool IsJniCompilationEnabled() const { return CompilerFilter::IsJniCompilationEnabled(compiler_filter_); } + bool IsQuickeningCompilationEnabled() const { + return CompilerFilter::IsQuickeningCompilationEnabled(compiler_filter_); + } + bool IsVerificationEnabled() const { return CompilerFilter::IsVerificationEnabled(compiler_filter_); } bool AssumeClassesAreVerified() const { - return compiler_filter_ == CompilerFilter::kVerifyNone; + return compiler_filter_ == CompilerFilter::kAssumeVerified; } - bool VerifyOnlyProfile() const { - return compiler_filter_ == CompilerFilter::kVerifyProfile; + bool VerifyAtRuntime() const { + return compiler_filter_ == CompilerFilter::kExtract; } - bool IsAnyMethodCompilationEnabled() const { - return CompilerFilter::IsAnyMethodCompilationEnabled(compiler_filter_); + bool IsAnyCompilationEnabled() const { + return CompilerFilter::IsAnyCompilationEnabled(compiler_filter_); } size_t GetHugeMethodThreshold() const { diff --git a/compiler/image_test.cc b/compiler/image_test.cc index 7e53d8d2ab..9d7aff769b 100644 --- a/compiler/image_test.cc +++ b/compiler/image_test.cc @@ -14,492 +14,17 @@ * limitations under the License. */ -#include "image.h" - -#include <memory> -#include <string> +#include <string.h> #include <vector> -#include "android-base/stringprintf.h" +#include "image_test.h" -#include "art_method-inl.h" -#include "base/unix_file/fd_file.h" -#include "class_linker-inl.h" -#include "compiler_callbacks.h" -#include "common_compiler_test.h" -#include "debug/method_debug_info.h" -#include "dex/quick_compiler_callbacks.h" -#include "driver/compiler_options.h" -#include "elf_writer.h" -#include "elf_writer_quick.h" -#include "gc/space/image_space.h" -#include "image_writer.h" -#include "linker/buffered_output_stream.h" -#include "linker/file_output_stream.h" -#include "linker/multi_oat_relative_patcher.h" -#include "lock_word.h" -#include "mirror/object-inl.h" -#include "oat_writer.h" +#include "image.h" #include "scoped_thread_state_change-inl.h" -#include "signal_catcher.h" -#include "utils.h" +#include "thread.h" namespace art { -static const uintptr_t kRequestedImageBase = ART_BASE_ADDRESS; - -struct CompilationHelper { - std::vector<std::string> dex_file_locations; - std::vector<ScratchFile> image_locations; - std::vector<std::unique_ptr<const DexFile>> extra_dex_files; - std::vector<ScratchFile> image_files; - std::vector<ScratchFile> oat_files; - std::vector<ScratchFile> vdex_files; - std::string image_dir; - - void Compile(CompilerDriver* driver, - ImageHeader::StorageMode storage_mode); - - std::vector<size_t> GetImageObjectSectionSizes(); - - ~CompilationHelper(); -}; - -class ImageTest : public CommonCompilerTest { - protected: - virtual void SetUp() { - ReserveImageSpace(); - CommonCompilerTest::SetUp(); - } - - void TestWriteRead(ImageHeader::StorageMode storage_mode); - - void Compile(ImageHeader::StorageMode storage_mode, - CompilationHelper& out_helper, - const std::string& extra_dex = "", - const std::initializer_list<std::string>& image_classes = {}); - - void SetUpRuntimeOptions(RuntimeOptions* options) OVERRIDE { - CommonCompilerTest::SetUpRuntimeOptions(options); - callbacks_.reset(new QuickCompilerCallbacks( - verification_results_.get(), - CompilerCallbacks::CallbackMode::kCompileBootImage)); - options->push_back(std::make_pair("compilercallbacks", callbacks_.get())); - } - - std::unordered_set<std::string>* GetImageClasses() OVERRIDE { - return new std::unordered_set<std::string>(image_classes_); - } - - ArtMethod* FindCopiedMethod(ArtMethod* origin, mirror::Class* klass) - REQUIRES_SHARED(Locks::mutator_lock_) { - PointerSize pointer_size = class_linker_->GetImagePointerSize(); - for (ArtMethod& m : klass->GetCopiedMethods(pointer_size)) { - if (strcmp(origin->GetName(), m.GetName()) == 0 && - origin->GetSignature() == m.GetSignature()) { - return &m; - } - } - return nullptr; - } - - private: - std::unordered_set<std::string> image_classes_; -}; - -CompilationHelper::~CompilationHelper() { - for (ScratchFile& image_file : image_files) { - image_file.Unlink(); - } - for (ScratchFile& oat_file : oat_files) { - oat_file.Unlink(); - } - for (ScratchFile& vdex_file : vdex_files) { - vdex_file.Unlink(); - } - const int rmdir_result = rmdir(image_dir.c_str()); - CHECK_EQ(0, rmdir_result); -} - -std::vector<size_t> CompilationHelper::GetImageObjectSectionSizes() { - std::vector<size_t> ret; - for (ScratchFile& image_file : image_files) { - std::unique_ptr<File> file(OS::OpenFileForReading(image_file.GetFilename().c_str())); - CHECK(file.get() != nullptr); - ImageHeader image_header; - CHECK_EQ(file->ReadFully(&image_header, sizeof(image_header)), true); - CHECK(image_header.IsValid()); - ret.push_back(image_header.GetImageSize()); - } - return ret; -} - -void CompilationHelper::Compile(CompilerDriver* driver, - ImageHeader::StorageMode storage_mode) { - ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); - std::vector<const DexFile*> class_path = class_linker->GetBootClassPath(); - - for (const std::unique_ptr<const DexFile>& dex_file : extra_dex_files) { - { - ScopedObjectAccess soa(Thread::Current()); - // Inject in boot class path so that the compiler driver can see it. - class_linker->AppendToBootClassPath(soa.Self(), *dex_file.get()); - } - class_path.push_back(dex_file.get()); - } - - // Enable write for dex2dex. - for (const DexFile* dex_file : class_path) { - dex_file_locations.push_back(dex_file->GetLocation()); - if (dex_file->IsReadOnly()) { - dex_file->EnableWrite(); - } - } - { - // Create a generic tmp file, to be the base of the .art and .oat temporary files. - ScratchFile location; - for (int i = 0; i < static_cast<int>(class_path.size()); ++i) { - std::string cur_location = - android::base::StringPrintf("%s-%d.art", location.GetFilename().c_str(), i); - image_locations.push_back(ScratchFile(cur_location)); - } - } - std::vector<std::string> image_filenames; - for (ScratchFile& file : image_locations) { - std::string image_filename(GetSystemImageFilename(file.GetFilename().c_str(), kRuntimeISA)); - image_filenames.push_back(image_filename); - size_t pos = image_filename.rfind('/'); - CHECK_NE(pos, std::string::npos) << image_filename; - if (image_dir.empty()) { - image_dir = image_filename.substr(0, pos); - int mkdir_result = mkdir(image_dir.c_str(), 0700); - CHECK_EQ(0, mkdir_result) << image_dir; - } - image_files.push_back(ScratchFile(OS::CreateEmptyFile(image_filename.c_str()))); - } - - std::vector<std::string> oat_filenames; - std::vector<std::string> vdex_filenames; - for (const std::string& image_filename : image_filenames) { - std::string oat_filename = ReplaceFileExtension(image_filename, "oat"); - oat_files.push_back(ScratchFile(OS::CreateEmptyFile(oat_filename.c_str()))); - oat_filenames.push_back(oat_filename); - std::string vdex_filename = ReplaceFileExtension(image_filename, "vdex"); - vdex_files.push_back(ScratchFile(OS::CreateEmptyFile(vdex_filename.c_str()))); - vdex_filenames.push_back(vdex_filename); - } - - std::unordered_map<const DexFile*, size_t> dex_file_to_oat_index_map; - std::vector<const char*> oat_filename_vector; - for (const std::string& file : oat_filenames) { - oat_filename_vector.push_back(file.c_str()); - } - std::vector<const char*> image_filename_vector; - for (const std::string& file : image_filenames) { - image_filename_vector.push_back(file.c_str()); - } - size_t image_idx = 0; - for (const DexFile* dex_file : class_path) { - dex_file_to_oat_index_map.emplace(dex_file, image_idx); - ++image_idx; - } - // TODO: compile_pic should be a test argument. - std::unique_ptr<ImageWriter> writer(new ImageWriter(*driver, - kRequestedImageBase, - /*compile_pic*/false, - /*compile_app_image*/false, - storage_mode, - oat_filename_vector, - dex_file_to_oat_index_map)); - { - { - jobject class_loader = nullptr; - TimingLogger timings("ImageTest::WriteRead", false, false); - TimingLogger::ScopedTiming t("CompileAll", &timings); - driver->SetDexFilesForOatFile(class_path); - driver->CompileAll(class_loader, class_path, /* verifier_deps */ nullptr, &timings); - - t.NewTiming("WriteElf"); - SafeMap<std::string, std::string> key_value_store; - std::vector<const char*> dex_filename_vector; - for (size_t i = 0; i < class_path.size(); ++i) { - dex_filename_vector.push_back(""); - } - key_value_store.Put(OatHeader::kBootClassPathKey, - gc::space::ImageSpace::GetMultiImageBootClassPath( - dex_filename_vector, - oat_filename_vector, - image_filename_vector)); - - std::vector<std::unique_ptr<ElfWriter>> elf_writers; - std::vector<std::unique_ptr<OatWriter>> oat_writers; - for (ScratchFile& oat_file : oat_files) { - elf_writers.emplace_back(CreateElfWriterQuick(driver->GetInstructionSet(), - driver->GetInstructionSetFeatures(), - &driver->GetCompilerOptions(), - oat_file.GetFile())); - elf_writers.back()->Start(); - oat_writers.emplace_back(new OatWriter(/*compiling_boot_image*/true, - &timings, - /*profile_compilation_info*/nullptr)); - } - - std::vector<OutputStream*> rodata; - std::vector<std::unique_ptr<MemMap>> opened_dex_files_map; - std::vector<std::unique_ptr<const DexFile>> opened_dex_files; - // Now that we have finalized key_value_store_, start writing the oat file. - for (size_t i = 0, size = oat_writers.size(); i != size; ++i) { - const DexFile* dex_file = class_path[i]; - rodata.push_back(elf_writers[i]->StartRoData()); - ArrayRef<const uint8_t> raw_dex_file( - reinterpret_cast<const uint8_t*>(&dex_file->GetHeader()), - dex_file->GetHeader().file_size_); - oat_writers[i]->AddRawDexFileSource(raw_dex_file, - dex_file->GetLocation().c_str(), - dex_file->GetLocationChecksum()); - - std::unique_ptr<MemMap> cur_opened_dex_files_map; - std::vector<std::unique_ptr<const DexFile>> cur_opened_dex_files; - bool dex_files_ok = oat_writers[i]->WriteAndOpenDexFiles( - kIsVdexEnabled ? vdex_files[i].GetFile() : oat_files[i].GetFile(), - rodata.back(), - driver->GetInstructionSet(), - driver->GetInstructionSetFeatures(), - &key_value_store, - /* verify */ false, // Dex files may be dex-to-dex-ed, don't verify. - /* update_input_vdex */ false, - &cur_opened_dex_files_map, - &cur_opened_dex_files); - ASSERT_TRUE(dex_files_ok); - - if (cur_opened_dex_files_map != nullptr) { - opened_dex_files_map.push_back(std::move(cur_opened_dex_files_map)); - for (std::unique_ptr<const DexFile>& cur_dex_file : cur_opened_dex_files) { - // dex_file_oat_index_map_.emplace(dex_file.get(), i); - opened_dex_files.push_back(std::move(cur_dex_file)); - } - } else { - ASSERT_TRUE(cur_opened_dex_files.empty()); - } - } - bool image_space_ok = writer->PrepareImageAddressSpace(); - ASSERT_TRUE(image_space_ok); - - if (kIsVdexEnabled) { - for (size_t i = 0, size = vdex_files.size(); i != size; ++i) { - std::unique_ptr<BufferedOutputStream> vdex_out( - MakeUnique<BufferedOutputStream>( - MakeUnique<FileOutputStream>(vdex_files[i].GetFile()))); - oat_writers[i]->WriteVerifierDeps(vdex_out.get(), nullptr); - oat_writers[i]->WriteChecksumsAndVdexHeader(vdex_out.get()); - } - } - - for (size_t i = 0, size = oat_files.size(); i != size; ++i) { - linker::MultiOatRelativePatcher patcher(driver->GetInstructionSet(), - driver->GetInstructionSetFeatures()); - OatWriter* const oat_writer = oat_writers[i].get(); - ElfWriter* const elf_writer = elf_writers[i].get(); - std::vector<const DexFile*> cur_dex_files(1u, class_path[i]); - oat_writer->Initialize(driver, writer.get(), cur_dex_files); - oat_writer->PrepareLayout(&patcher); - size_t rodata_size = oat_writer->GetOatHeader().GetExecutableOffset(); - size_t text_size = oat_writer->GetOatSize() - rodata_size; - elf_writer->PrepareDynamicSection(rodata_size, - text_size, - oat_writer->GetBssSize(), - oat_writer->GetBssRootsOffset()); - - writer->UpdateOatFileLayout(i, - elf_writer->GetLoadedSize(), - oat_writer->GetOatDataOffset(), - oat_writer->GetOatSize()); - - bool rodata_ok = oat_writer->WriteRodata(rodata[i]); - ASSERT_TRUE(rodata_ok); - elf_writer->EndRoData(rodata[i]); - - OutputStream* text = elf_writer->StartText(); - bool text_ok = oat_writer->WriteCode(text); - ASSERT_TRUE(text_ok); - elf_writer->EndText(text); - - bool header_ok = oat_writer->WriteHeader(elf_writer->GetStream(), 0u, 0u, 0u); - ASSERT_TRUE(header_ok); - - writer->UpdateOatFileHeader(i, oat_writer->GetOatHeader()); - - elf_writer->WriteDynamicSection(); - elf_writer->WriteDebugInfo(oat_writer->GetMethodDebugInfo()); - - bool success = elf_writer->End(); - ASSERT_TRUE(success); - } - } - - bool success_image = writer->Write(kInvalidFd, - image_filename_vector, - oat_filename_vector); - ASSERT_TRUE(success_image); - - for (size_t i = 0, size = oat_filenames.size(); i != size; ++i) { - const char* oat_filename = oat_filenames[i].c_str(); - std::unique_ptr<File> oat_file(OS::OpenFileReadWrite(oat_filename)); - ASSERT_TRUE(oat_file != nullptr); - bool success_fixup = ElfWriter::Fixup(oat_file.get(), - writer->GetOatDataBegin(i)); - ASSERT_TRUE(success_fixup); - ASSERT_EQ(oat_file->FlushCloseOrErase(), 0) << "Could not flush and close oat file " - << oat_filename; - } - } -} - -void ImageTest::Compile(ImageHeader::StorageMode storage_mode, - CompilationHelper& helper, - const std::string& extra_dex, - const std::initializer_list<std::string>& image_classes) { - for (const std::string& image_class : image_classes) { - image_classes_.insert(image_class); - } - CreateCompilerDriver(Compiler::kOptimizing, kRuntimeISA, kIsTargetBuild ? 2U : 16U); - // Set inline filter values. - compiler_options_->SetInlineMaxCodeUnits(CompilerOptions::kDefaultInlineMaxCodeUnits); - image_classes_.clear(); - if (!extra_dex.empty()) { - helper.extra_dex_files = OpenTestDexFiles(extra_dex.c_str()); - } - helper.Compile(compiler_driver_.get(), storage_mode); - if (image_classes.begin() != image_classes.end()) { - // Make sure the class got initialized. - ScopedObjectAccess soa(Thread::Current()); - ClassLinker* const class_linker = Runtime::Current()->GetClassLinker(); - for (const std::string& image_class : image_classes) { - mirror::Class* klass = class_linker->FindSystemClass(Thread::Current(), image_class.c_str()); - EXPECT_TRUE(klass != nullptr); - EXPECT_TRUE(klass->IsInitialized()); - } - } -} - -void ImageTest::TestWriteRead(ImageHeader::StorageMode storage_mode) { - CompilationHelper helper; - Compile(storage_mode, /*out*/ helper); - std::vector<uint64_t> image_file_sizes; - for (ScratchFile& image_file : helper.image_files) { - std::unique_ptr<File> file(OS::OpenFileForReading(image_file.GetFilename().c_str())); - ASSERT_TRUE(file.get() != nullptr); - ImageHeader image_header; - ASSERT_EQ(file->ReadFully(&image_header, sizeof(image_header)), true); - ASSERT_TRUE(image_header.IsValid()); - const auto& bitmap_section = image_header.GetImageSection(ImageHeader::kSectionImageBitmap); - ASSERT_GE(bitmap_section.Offset(), sizeof(image_header)); - ASSERT_NE(0U, bitmap_section.Size()); - - gc::Heap* heap = Runtime::Current()->GetHeap(); - ASSERT_TRUE(heap->HaveContinuousSpaces()); - gc::space::ContinuousSpace* space = heap->GetNonMovingSpace(); - ASSERT_FALSE(space->IsImageSpace()); - ASSERT_TRUE(space != nullptr); - ASSERT_TRUE(space->IsMallocSpace()); - image_file_sizes.push_back(file->GetLength()); - } - - ASSERT_TRUE(compiler_driver_->GetImageClasses() != nullptr); - std::unordered_set<std::string> image_classes(*compiler_driver_->GetImageClasses()); - - // Need to delete the compiler since it has worker threads which are attached to runtime. - compiler_driver_.reset(); - - // Tear down old runtime before making a new one, clearing out misc state. - - // Remove the reservation of the memory for use to load the image. - // Need to do this before we reset the runtime. - UnreserveImageSpace(); - - helper.extra_dex_files.clear(); - runtime_.reset(); - java_lang_dex_file_ = nullptr; - - MemMap::Init(); - - RuntimeOptions options; - std::string image("-Ximage:"); - image.append(helper.image_locations[0].GetFilename()); - options.push_back(std::make_pair(image.c_str(), static_cast<void*>(nullptr))); - // By default the compiler this creates will not include patch information. - options.push_back(std::make_pair("-Xnorelocate", nullptr)); - - if (!Runtime::Create(options, false)) { - LOG(FATAL) << "Failed to create runtime"; - return; - } - runtime_.reset(Runtime::Current()); - // Runtime::Create acquired the mutator_lock_ that is normally given away when we Runtime::Start, - // give it away now and then switch to a more managable ScopedObjectAccess. - Thread::Current()->TransitionFromRunnableToSuspended(kNative); - ScopedObjectAccess soa(Thread::Current()); - ASSERT_TRUE(runtime_.get() != nullptr); - class_linker_ = runtime_->GetClassLinker(); - - gc::Heap* heap = Runtime::Current()->GetHeap(); - ASSERT_TRUE(heap->HasBootImageSpace()); - ASSERT_TRUE(heap->GetNonMovingSpace()->IsMallocSpace()); - - // We loaded the runtime with an explicit image, so it must exist. - ASSERT_EQ(heap->GetBootImageSpaces().size(), image_file_sizes.size()); - for (size_t i = 0; i < helper.dex_file_locations.size(); ++i) { - std::unique_ptr<const DexFile> dex( - LoadExpectSingleDexFile(helper.dex_file_locations[i].c_str())); - ASSERT_TRUE(dex != nullptr); - uint64_t image_file_size = image_file_sizes[i]; - gc::space::ImageSpace* image_space = heap->GetBootImageSpaces()[i]; - ASSERT_TRUE(image_space != nullptr); - if (storage_mode == ImageHeader::kStorageModeUncompressed) { - // Uncompressed, image should be smaller than file. - ASSERT_LE(image_space->GetImageHeader().GetImageSize(), image_file_size); - } else if (image_file_size > 16 * KB) { - // Compressed, file should be smaller than image. Not really valid for small images. - ASSERT_LE(image_file_size, image_space->GetImageHeader().GetImageSize()); - } - - image_space->VerifyImageAllocations(); - uint8_t* image_begin = image_space->Begin(); - uint8_t* image_end = image_space->End(); - if (i == 0) { - // This check is only valid for image 0. - CHECK_EQ(kRequestedImageBase, reinterpret_cast<uintptr_t>(image_begin)); - } - for (size_t j = 0; j < dex->NumClassDefs(); ++j) { - const DexFile::ClassDef& class_def = dex->GetClassDef(j); - const char* descriptor = dex->GetClassDescriptor(class_def); - mirror::Class* klass = class_linker_->FindSystemClass(soa.Self(), descriptor); - EXPECT_TRUE(klass != nullptr) << descriptor; - if (image_classes.find(descriptor) == image_classes.end()) { - EXPECT_TRUE(reinterpret_cast<uint8_t*>(klass) >= image_end || - reinterpret_cast<uint8_t*>(klass) < image_begin) << descriptor; - } else { - // Image classes should be located inside the image. - EXPECT_LT(image_begin, reinterpret_cast<uint8_t*>(klass)) << descriptor; - EXPECT_LT(reinterpret_cast<uint8_t*>(klass), image_end) << descriptor; - } - EXPECT_TRUE(Monitor::IsValidLockWord(klass->GetLockWord(false))); - } - } -} - -TEST_F(ImageTest, WriteReadUncompressed) { - TestWriteRead(ImageHeader::kStorageModeUncompressed); -} - -TEST_F(ImageTest, WriteReadLZ4) { - TestWriteRead(ImageHeader::kStorageModeLZ4); -} - -TEST_F(ImageTest, WriteReadLZ4HC) { - TestWriteRead(ImageHeader::kStorageModeLZ4HC); -} - TEST_F(ImageTest, TestImageLayout) { std::vector<size_t> image_sizes; std::vector<size_t> image_sizes_extra; diff --git a/compiler/image_test.h b/compiler/image_test.h new file mode 100644 index 0000000000..2f15ff4815 --- /dev/null +++ b/compiler/image_test.h @@ -0,0 +1,497 @@ +/* + * Copyright (C) 2011 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_IMAGE_TEST_H_ +#define ART_COMPILER_IMAGE_TEST_H_ + +#include "image.h" + +#include <memory> +#include <string> +#include <vector> + +#include "android-base/stringprintf.h" + +#include "art_method-inl.h" +#include "base/unix_file/fd_file.h" +#include "class_linker-inl.h" +#include "compiler_callbacks.h" +#include "common_compiler_test.h" +#include "debug/method_debug_info.h" +#include "dex/quick_compiler_callbacks.h" +#include "driver/compiler_options.h" +#include "elf_writer.h" +#include "elf_writer_quick.h" +#include "gc/space/image_space.h" +#include "image_writer.h" +#include "linker/buffered_output_stream.h" +#include "linker/file_output_stream.h" +#include "linker/multi_oat_relative_patcher.h" +#include "lock_word.h" +#include "mirror/object-inl.h" +#include "oat_writer.h" +#include "scoped_thread_state_change-inl.h" +#include "signal_catcher.h" +#include "utils.h" + +namespace art { + +static const uintptr_t kRequestedImageBase = ART_BASE_ADDRESS; + +struct CompilationHelper { + std::vector<std::string> dex_file_locations; + std::vector<ScratchFile> image_locations; + std::vector<std::unique_ptr<const DexFile>> extra_dex_files; + std::vector<ScratchFile> image_files; + std::vector<ScratchFile> oat_files; + std::vector<ScratchFile> vdex_files; + std::string image_dir; + + void Compile(CompilerDriver* driver, + ImageHeader::StorageMode storage_mode); + + std::vector<size_t> GetImageObjectSectionSizes(); + + ~CompilationHelper(); +}; + +class ImageTest : public CommonCompilerTest { + protected: + virtual void SetUp() { + ReserveImageSpace(); + CommonCompilerTest::SetUp(); + } + + void TestWriteRead(ImageHeader::StorageMode storage_mode); + + void Compile(ImageHeader::StorageMode storage_mode, + CompilationHelper& out_helper, + const std::string& extra_dex = "", + const std::initializer_list<std::string>& image_classes = {}); + + void SetUpRuntimeOptions(RuntimeOptions* options) OVERRIDE { + CommonCompilerTest::SetUpRuntimeOptions(options); + callbacks_.reset(new QuickCompilerCallbacks( + verification_results_.get(), + CompilerCallbacks::CallbackMode::kCompileBootImage)); + options->push_back(std::make_pair("compilercallbacks", callbacks_.get())); + } + + std::unordered_set<std::string>* GetImageClasses() OVERRIDE { + return new std::unordered_set<std::string>(image_classes_); + } + + ArtMethod* FindCopiedMethod(ArtMethod* origin, mirror::Class* klass) + REQUIRES_SHARED(Locks::mutator_lock_) { + PointerSize pointer_size = class_linker_->GetImagePointerSize(); + for (ArtMethod& m : klass->GetCopiedMethods(pointer_size)) { + if (strcmp(origin->GetName(), m.GetName()) == 0 && + origin->GetSignature() == m.GetSignature()) { + return &m; + } + } + return nullptr; + } + + private: + std::unordered_set<std::string> image_classes_; +}; + +inline CompilationHelper::~CompilationHelper() { + for (ScratchFile& image_file : image_files) { + image_file.Unlink(); + } + for (ScratchFile& oat_file : oat_files) { + oat_file.Unlink(); + } + for (ScratchFile& vdex_file : vdex_files) { + vdex_file.Unlink(); + } + const int rmdir_result = rmdir(image_dir.c_str()); + CHECK_EQ(0, rmdir_result); +} + +inline std::vector<size_t> CompilationHelper::GetImageObjectSectionSizes() { + std::vector<size_t> ret; + for (ScratchFile& image_file : image_files) { + std::unique_ptr<File> file(OS::OpenFileForReading(image_file.GetFilename().c_str())); + CHECK(file.get() != nullptr); + ImageHeader image_header; + CHECK_EQ(file->ReadFully(&image_header, sizeof(image_header)), true); + CHECK(image_header.IsValid()); + ret.push_back(image_header.GetImageSize()); + } + return ret; +} + +inline void CompilationHelper::Compile(CompilerDriver* driver, + ImageHeader::StorageMode storage_mode) { + ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); + std::vector<const DexFile*> class_path = class_linker->GetBootClassPath(); + + for (const std::unique_ptr<const DexFile>& dex_file : extra_dex_files) { + { + ScopedObjectAccess soa(Thread::Current()); + // Inject in boot class path so that the compiler driver can see it. + class_linker->AppendToBootClassPath(soa.Self(), *dex_file.get()); + } + class_path.push_back(dex_file.get()); + } + + // Enable write for dex2dex. + for (const DexFile* dex_file : class_path) { + dex_file_locations.push_back(dex_file->GetLocation()); + if (dex_file->IsReadOnly()) { + dex_file->EnableWrite(); + } + } + { + // Create a generic tmp file, to be the base of the .art and .oat temporary files. + ScratchFile location; + for (int i = 0; i < static_cast<int>(class_path.size()); ++i) { + std::string cur_location = + android::base::StringPrintf("%s-%d.art", location.GetFilename().c_str(), i); + image_locations.push_back(ScratchFile(cur_location)); + } + } + std::vector<std::string> image_filenames; + for (ScratchFile& file : image_locations) { + std::string image_filename(GetSystemImageFilename(file.GetFilename().c_str(), kRuntimeISA)); + image_filenames.push_back(image_filename); + size_t pos = image_filename.rfind('/'); + CHECK_NE(pos, std::string::npos) << image_filename; + if (image_dir.empty()) { + image_dir = image_filename.substr(0, pos); + int mkdir_result = mkdir(image_dir.c_str(), 0700); + CHECK_EQ(0, mkdir_result) << image_dir; + } + image_files.push_back(ScratchFile(OS::CreateEmptyFile(image_filename.c_str()))); + } + + std::vector<std::string> oat_filenames; + std::vector<std::string> vdex_filenames; + for (const std::string& image_filename : image_filenames) { + std::string oat_filename = ReplaceFileExtension(image_filename, "oat"); + oat_files.push_back(ScratchFile(OS::CreateEmptyFile(oat_filename.c_str()))); + oat_filenames.push_back(oat_filename); + std::string vdex_filename = ReplaceFileExtension(image_filename, "vdex"); + vdex_files.push_back(ScratchFile(OS::CreateEmptyFile(vdex_filename.c_str()))); + vdex_filenames.push_back(vdex_filename); + } + + std::unordered_map<const DexFile*, size_t> dex_file_to_oat_index_map; + std::vector<const char*> oat_filename_vector; + for (const std::string& file : oat_filenames) { + oat_filename_vector.push_back(file.c_str()); + } + std::vector<const char*> image_filename_vector; + for (const std::string& file : image_filenames) { + image_filename_vector.push_back(file.c_str()); + } + size_t image_idx = 0; + for (const DexFile* dex_file : class_path) { + dex_file_to_oat_index_map.emplace(dex_file, image_idx); + ++image_idx; + } + // TODO: compile_pic should be a test argument. + std::unique_ptr<ImageWriter> writer(new ImageWriter(*driver, + kRequestedImageBase, + /*compile_pic*/false, + /*compile_app_image*/false, + storage_mode, + oat_filename_vector, + dex_file_to_oat_index_map)); + { + { + jobject class_loader = nullptr; + TimingLogger timings("ImageTest::WriteRead", false, false); + TimingLogger::ScopedTiming t("CompileAll", &timings); + driver->SetDexFilesForOatFile(class_path); + driver->CompileAll(class_loader, class_path, /* verifier_deps */ nullptr, &timings); + + t.NewTiming("WriteElf"); + SafeMap<std::string, std::string> key_value_store; + std::vector<const char*> dex_filename_vector; + for (size_t i = 0; i < class_path.size(); ++i) { + dex_filename_vector.push_back(""); + } + key_value_store.Put(OatHeader::kBootClassPathKey, + gc::space::ImageSpace::GetMultiImageBootClassPath( + dex_filename_vector, + oat_filename_vector, + image_filename_vector)); + + std::vector<std::unique_ptr<ElfWriter>> elf_writers; + std::vector<std::unique_ptr<OatWriter>> oat_writers; + for (ScratchFile& oat_file : oat_files) { + elf_writers.emplace_back(CreateElfWriterQuick(driver->GetInstructionSet(), + driver->GetInstructionSetFeatures(), + &driver->GetCompilerOptions(), + oat_file.GetFile())); + elf_writers.back()->Start(); + oat_writers.emplace_back(new OatWriter(/*compiling_boot_image*/true, + &timings, + /*profile_compilation_info*/nullptr)); + } + + std::vector<OutputStream*> rodata; + std::vector<std::unique_ptr<MemMap>> opened_dex_files_map; + std::vector<std::unique_ptr<const DexFile>> opened_dex_files; + // Now that we have finalized key_value_store_, start writing the oat file. + for (size_t i = 0, size = oat_writers.size(); i != size; ++i) { + const DexFile* dex_file = class_path[i]; + rodata.push_back(elf_writers[i]->StartRoData()); + ArrayRef<const uint8_t> raw_dex_file( + reinterpret_cast<const uint8_t*>(&dex_file->GetHeader()), + dex_file->GetHeader().file_size_); + oat_writers[i]->AddRawDexFileSource(raw_dex_file, + dex_file->GetLocation().c_str(), + dex_file->GetLocationChecksum()); + + std::unique_ptr<MemMap> cur_opened_dex_files_map; + std::vector<std::unique_ptr<const DexFile>> cur_opened_dex_files; + bool dex_files_ok = oat_writers[i]->WriteAndOpenDexFiles( + kIsVdexEnabled ? vdex_files[i].GetFile() : oat_files[i].GetFile(), + rodata.back(), + driver->GetInstructionSet(), + driver->GetInstructionSetFeatures(), + &key_value_store, + /* verify */ false, // Dex files may be dex-to-dex-ed, don't verify. + /* update_input_vdex */ false, + &cur_opened_dex_files_map, + &cur_opened_dex_files); + ASSERT_TRUE(dex_files_ok); + + if (cur_opened_dex_files_map != nullptr) { + opened_dex_files_map.push_back(std::move(cur_opened_dex_files_map)); + for (std::unique_ptr<const DexFile>& cur_dex_file : cur_opened_dex_files) { + // dex_file_oat_index_map_.emplace(dex_file.get(), i); + opened_dex_files.push_back(std::move(cur_dex_file)); + } + } else { + ASSERT_TRUE(cur_opened_dex_files.empty()); + } + } + bool image_space_ok = writer->PrepareImageAddressSpace(); + ASSERT_TRUE(image_space_ok); + + if (kIsVdexEnabled) { + for (size_t i = 0, size = vdex_files.size(); i != size; ++i) { + std::unique_ptr<BufferedOutputStream> vdex_out( + MakeUnique<BufferedOutputStream>( + MakeUnique<FileOutputStream>(vdex_files[i].GetFile()))); + oat_writers[i]->WriteVerifierDeps(vdex_out.get(), nullptr); + oat_writers[i]->WriteChecksumsAndVdexHeader(vdex_out.get()); + } + } + + for (size_t i = 0, size = oat_files.size(); i != size; ++i) { + linker::MultiOatRelativePatcher patcher(driver->GetInstructionSet(), + driver->GetInstructionSetFeatures()); + OatWriter* const oat_writer = oat_writers[i].get(); + ElfWriter* const elf_writer = elf_writers[i].get(); + std::vector<const DexFile*> cur_dex_files(1u, class_path[i]); + oat_writer->Initialize(driver, writer.get(), cur_dex_files); + oat_writer->PrepareLayout(&patcher); + size_t rodata_size = oat_writer->GetOatHeader().GetExecutableOffset(); + size_t text_size = oat_writer->GetOatSize() - rodata_size; + elf_writer->PrepareDynamicSection(rodata_size, + text_size, + oat_writer->GetBssSize(), + oat_writer->GetBssRootsOffset()); + + writer->UpdateOatFileLayout(i, + elf_writer->GetLoadedSize(), + oat_writer->GetOatDataOffset(), + oat_writer->GetOatSize()); + + bool rodata_ok = oat_writer->WriteRodata(rodata[i]); + ASSERT_TRUE(rodata_ok); + elf_writer->EndRoData(rodata[i]); + + OutputStream* text = elf_writer->StartText(); + bool text_ok = oat_writer->WriteCode(text); + ASSERT_TRUE(text_ok); + elf_writer->EndText(text); + + bool header_ok = oat_writer->WriteHeader(elf_writer->GetStream(), 0u, 0u, 0u); + ASSERT_TRUE(header_ok); + + writer->UpdateOatFileHeader(i, oat_writer->GetOatHeader()); + + elf_writer->WriteDynamicSection(); + elf_writer->WriteDebugInfo(oat_writer->GetMethodDebugInfo()); + + bool success = elf_writer->End(); + ASSERT_TRUE(success); + } + } + + bool success_image = writer->Write(kInvalidFd, + image_filename_vector, + oat_filename_vector); + ASSERT_TRUE(success_image); + + for (size_t i = 0, size = oat_filenames.size(); i != size; ++i) { + const char* oat_filename = oat_filenames[i].c_str(); + std::unique_ptr<File> oat_file(OS::OpenFileReadWrite(oat_filename)); + ASSERT_TRUE(oat_file != nullptr); + bool success_fixup = ElfWriter::Fixup(oat_file.get(), + writer->GetOatDataBegin(i)); + ASSERT_TRUE(success_fixup); + ASSERT_EQ(oat_file->FlushCloseOrErase(), 0) << "Could not flush and close oat file " + << oat_filename; + } + } +} + +inline void ImageTest::Compile(ImageHeader::StorageMode storage_mode, + CompilationHelper& helper, + const std::string& extra_dex, + const std::initializer_list<std::string>& image_classes) { + for (const std::string& image_class : image_classes) { + image_classes_.insert(image_class); + } + CreateCompilerDriver(Compiler::kOptimizing, kRuntimeISA, kIsTargetBuild ? 2U : 16U); + // Set inline filter values. + compiler_options_->SetInlineMaxCodeUnits(CompilerOptions::kDefaultInlineMaxCodeUnits); + image_classes_.clear(); + if (!extra_dex.empty()) { + helper.extra_dex_files = OpenTestDexFiles(extra_dex.c_str()); + } + helper.Compile(compiler_driver_.get(), storage_mode); + if (image_classes.begin() != image_classes.end()) { + // Make sure the class got initialized. + ScopedObjectAccess soa(Thread::Current()); + ClassLinker* const class_linker = Runtime::Current()->GetClassLinker(); + for (const std::string& image_class : image_classes) { + mirror::Class* klass = class_linker->FindSystemClass(Thread::Current(), image_class.c_str()); + EXPECT_TRUE(klass != nullptr); + EXPECT_TRUE(klass->IsInitialized()); + } + } +} + +inline void ImageTest::TestWriteRead(ImageHeader::StorageMode storage_mode) { + CompilationHelper helper; + Compile(storage_mode, /*out*/ helper); + std::vector<uint64_t> image_file_sizes; + for (ScratchFile& image_file : helper.image_files) { + std::unique_ptr<File> file(OS::OpenFileForReading(image_file.GetFilename().c_str())); + ASSERT_TRUE(file.get() != nullptr); + ImageHeader image_header; + ASSERT_EQ(file->ReadFully(&image_header, sizeof(image_header)), true); + ASSERT_TRUE(image_header.IsValid()); + const auto& bitmap_section = image_header.GetImageSection(ImageHeader::kSectionImageBitmap); + ASSERT_GE(bitmap_section.Offset(), sizeof(image_header)); + ASSERT_NE(0U, bitmap_section.Size()); + + gc::Heap* heap = Runtime::Current()->GetHeap(); + ASSERT_TRUE(heap->HaveContinuousSpaces()); + gc::space::ContinuousSpace* space = heap->GetNonMovingSpace(); + ASSERT_FALSE(space->IsImageSpace()); + ASSERT_TRUE(space != nullptr); + ASSERT_TRUE(space->IsMallocSpace()); + image_file_sizes.push_back(file->GetLength()); + } + + ASSERT_TRUE(compiler_driver_->GetImageClasses() != nullptr); + std::unordered_set<std::string> image_classes(*compiler_driver_->GetImageClasses()); + + // Need to delete the compiler since it has worker threads which are attached to runtime. + compiler_driver_.reset(); + + // Tear down old runtime before making a new one, clearing out misc state. + + // Remove the reservation of the memory for use to load the image. + // Need to do this before we reset the runtime. + UnreserveImageSpace(); + + helper.extra_dex_files.clear(); + runtime_.reset(); + java_lang_dex_file_ = nullptr; + + MemMap::Init(); + + RuntimeOptions options; + std::string image("-Ximage:"); + image.append(helper.image_locations[0].GetFilename()); + options.push_back(std::make_pair(image.c_str(), static_cast<void*>(nullptr))); + // By default the compiler this creates will not include patch information. + options.push_back(std::make_pair("-Xnorelocate", nullptr)); + + if (!Runtime::Create(options, false)) { + LOG(FATAL) << "Failed to create runtime"; + return; + } + runtime_.reset(Runtime::Current()); + // Runtime::Create acquired the mutator_lock_ that is normally given away when we Runtime::Start, + // give it away now and then switch to a more managable ScopedObjectAccess. + Thread::Current()->TransitionFromRunnableToSuspended(kNative); + ScopedObjectAccess soa(Thread::Current()); + ASSERT_TRUE(runtime_.get() != nullptr); + class_linker_ = runtime_->GetClassLinker(); + + gc::Heap* heap = Runtime::Current()->GetHeap(); + ASSERT_TRUE(heap->HasBootImageSpace()); + ASSERT_TRUE(heap->GetNonMovingSpace()->IsMallocSpace()); + + // We loaded the runtime with an explicit image, so it must exist. + ASSERT_EQ(heap->GetBootImageSpaces().size(), image_file_sizes.size()); + for (size_t i = 0; i < helper.dex_file_locations.size(); ++i) { + std::unique_ptr<const DexFile> dex( + LoadExpectSingleDexFile(helper.dex_file_locations[i].c_str())); + ASSERT_TRUE(dex != nullptr); + uint64_t image_file_size = image_file_sizes[i]; + gc::space::ImageSpace* image_space = heap->GetBootImageSpaces()[i]; + ASSERT_TRUE(image_space != nullptr); + if (storage_mode == ImageHeader::kStorageModeUncompressed) { + // Uncompressed, image should be smaller than file. + ASSERT_LE(image_space->GetImageHeader().GetImageSize(), image_file_size); + } else if (image_file_size > 16 * KB) { + // Compressed, file should be smaller than image. Not really valid for small images. + ASSERT_LE(image_file_size, image_space->GetImageHeader().GetImageSize()); + } + + image_space->VerifyImageAllocations(); + uint8_t* image_begin = image_space->Begin(); + uint8_t* image_end = image_space->End(); + if (i == 0) { + // This check is only valid for image 0. + CHECK_EQ(kRequestedImageBase, reinterpret_cast<uintptr_t>(image_begin)); + } + for (size_t j = 0; j < dex->NumClassDefs(); ++j) { + const DexFile::ClassDef& class_def = dex->GetClassDef(j); + const char* descriptor = dex->GetClassDescriptor(class_def); + mirror::Class* klass = class_linker_->FindSystemClass(soa.Self(), descriptor); + EXPECT_TRUE(klass != nullptr) << descriptor; + if (image_classes.find(descriptor) == image_classes.end()) { + EXPECT_TRUE(reinterpret_cast<uint8_t*>(klass) >= image_end || + reinterpret_cast<uint8_t*>(klass) < image_begin) << descriptor; + } else { + // Image classes should be located inside the image. + EXPECT_LT(image_begin, reinterpret_cast<uint8_t*>(klass)) << descriptor; + EXPECT_LT(reinterpret_cast<uint8_t*>(klass), image_end) << descriptor; + } + EXPECT_TRUE(Monitor::IsValidLockWord(klass->GetLockWord(false))); + } + } +} + + +} // namespace art + +#endif // ART_COMPILER_IMAGE_TEST_H_ diff --git a/compiler/image_write_read_test.cc b/compiler/image_write_read_test.cc new file mode 100644 index 0000000000..32c0b06766 --- /dev/null +++ b/compiler/image_write_read_test.cc @@ -0,0 +1,33 @@ +/* + * Copyright (C) 2011 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "image_test.h" + +namespace art { + +TEST_F(ImageTest, WriteReadUncompressed) { + TestWriteRead(ImageHeader::kStorageModeUncompressed); +} + +TEST_F(ImageTest, WriteReadLZ4) { + TestWriteRead(ImageHeader::kStorageModeLZ4); +} + +TEST_F(ImageTest, WriteReadLZ4HC) { + TestWriteRead(ImageHeader::kStorageModeLZ4HC); +} + +} // namespace art diff --git a/compiler/linker/arm/relative_patcher_arm_base.cc b/compiler/linker/arm/relative_patcher_arm_base.cc index f55d5a6fb8..e9d579d2b3 100644 --- a/compiler/linker/arm/relative_patcher_arm_base.cc +++ b/compiler/linker/arm/relative_patcher_arm_base.cc @@ -249,7 +249,7 @@ uint32_t ArmBaseRelativePatcher::ReserveSpaceInternal(uint32_t offset, // All remaining method call patches will be handled by this thunk. DCHECK(!unprocessed_method_call_patches_.empty()); DCHECK_LE(thunk_offset - unprocessed_method_call_patches_.front().GetPatchOffset(), - MaxPositiveDisplacement(ThunkType::kMethodCall)); + MaxPositiveDisplacement(GetMethodCallKey())); unprocessed_method_call_patches_.clear(); } } @@ -271,8 +271,8 @@ uint32_t ArmBaseRelativePatcher::CalculateMethodCallDisplacement(uint32_t patch_ DCHECK(method_call_thunk_ != nullptr); // Unsigned arithmetic with its well-defined overflow behavior is just fine here. uint32_t displacement = target_offset - patch_offset; - uint32_t max_positive_displacement = MaxPositiveDisplacement(ThunkType::kMethodCall); - uint32_t max_negative_displacement = MaxNegativeDisplacement(ThunkType::kMethodCall); + uint32_t max_positive_displacement = MaxPositiveDisplacement(GetMethodCallKey()); + uint32_t max_negative_displacement = MaxNegativeDisplacement(GetMethodCallKey()); // NOTE: With unsigned arithmetic we do mean to use && rather than || below. if (displacement > max_positive_displacement && displacement < -max_negative_displacement) { // Unwritten thunks have higher offsets, check if it's within range. @@ -299,29 +299,42 @@ uint32_t ArmBaseRelativePatcher::GetThunkTargetOffset(const ThunkKey& key, uint3 if (data.HasWrittenOffset()) { uint32_t offset = data.LastWrittenOffset(); DCHECK_LT(offset, patch_offset); - if (patch_offset - offset <= MaxNegativeDisplacement(key.GetType())) { + if (patch_offset - offset <= MaxNegativeDisplacement(key)) { return offset; } } DCHECK(data.HasPendingOffset()); uint32_t offset = data.GetPendingOffset(); DCHECK_GT(offset, patch_offset); - DCHECK_LE(offset - patch_offset, MaxPositiveDisplacement(key.GetType())); + DCHECK_LE(offset - patch_offset, MaxPositiveDisplacement(key)); return offset; } +ArmBaseRelativePatcher::ThunkKey ArmBaseRelativePatcher::GetMethodCallKey() { + return ThunkKey(ThunkType::kMethodCall, ThunkParams{{ 0u, 0u }}); // NOLINT(whitespace/braces) +} + +ArmBaseRelativePatcher::ThunkKey ArmBaseRelativePatcher::GetBakerThunkKey( + const LinkerPatch& patch) { + DCHECK_EQ(patch.GetType(), LinkerPatch::Type::kBakerReadBarrierBranch); + ThunkParams params; + params.baker_params.custom_value1 = patch.GetBakerCustomValue1(); + params.baker_params.custom_value2 = patch.GetBakerCustomValue2(); + ThunkKey key(ThunkType::kBakerReadBarrier, params); + return key; +} + void ArmBaseRelativePatcher::ProcessPatches(const CompiledMethod* compiled_method, uint32_t code_offset) { for (const LinkerPatch& patch : compiled_method->GetPatches()) { uint32_t patch_offset = code_offset + patch.LiteralOffset(); - ThunkType key_type = static_cast<ThunkType>(-1); + ThunkKey key(static_cast<ThunkType>(-1), ThunkParams{{ 0u, 0u }}); // NOLINT(whitespace/braces) ThunkData* old_data = nullptr; if (patch.GetType() == LinkerPatch::Type::kCallRelative) { - key_type = ThunkType::kMethodCall; + key = GetMethodCallKey(); unprocessed_method_call_patches_.emplace_back(patch_offset, patch.TargetMethod()); if (method_call_thunk_ == nullptr) { - ThunkKey key(key_type, ThunkParams{{ 0u, 0u }}); // NOLINT(whitespace/braces) - uint32_t max_next_offset = CalculateMaxNextOffset(patch_offset, key_type); + uint32_t max_next_offset = CalculateMaxNextOffset(patch_offset, key); auto it = thunks_.Put(key, ThunkData(CompileThunk(key), max_next_offset)); method_call_thunk_ = &it->second; AddUnreservedThunk(method_call_thunk_); @@ -329,11 +342,10 @@ void ArmBaseRelativePatcher::ProcessPatches(const CompiledMethod* compiled_metho old_data = method_call_thunk_; } } else if (patch.GetType() == LinkerPatch::Type::kBakerReadBarrierBranch) { - ThunkKey key = GetBakerReadBarrierKey(patch); - key_type = key.GetType(); + key = GetBakerThunkKey(patch); auto lb = thunks_.lower_bound(key); if (lb == thunks_.end() || thunks_.key_comp()(key, lb->first)) { - uint32_t max_next_offset = CalculateMaxNextOffset(patch_offset, key_type); + uint32_t max_next_offset = CalculateMaxNextOffset(patch_offset, key); auto it = thunks_.PutBefore(lb, key, ThunkData(CompileThunk(key), max_next_offset)); AddUnreservedThunk(&it->second); } else { @@ -342,16 +354,16 @@ void ArmBaseRelativePatcher::ProcessPatches(const CompiledMethod* compiled_metho } if (old_data != nullptr) { // Shared path where an old thunk may need an update. - DCHECK(key_type != static_cast<ThunkType>(-1)); + DCHECK(key.GetType() != static_cast<ThunkType>(-1)); DCHECK(!old_data->HasReservedOffset() || old_data->LastReservedOffset() < patch_offset); if (old_data->NeedsNextThunk()) { // Patches for a method are ordered by literal offset, so if we still need to place // this thunk for a previous patch, that thunk shall be in range for this patch. - DCHECK_LE(old_data->MaxNextOffset(), CalculateMaxNextOffset(patch_offset, key_type)); + DCHECK_LE(old_data->MaxNextOffset(), CalculateMaxNextOffset(patch_offset, key)); } else { if (!old_data->HasReservedOffset() || - patch_offset - old_data->LastReservedOffset() > MaxNegativeDisplacement(key_type)) { - old_data->SetMaxNextOffset(CalculateMaxNextOffset(patch_offset, key_type)); + patch_offset - old_data->LastReservedOffset() > MaxNegativeDisplacement(key)) { + old_data->SetMaxNextOffset(CalculateMaxNextOffset(patch_offset, key)); AddUnreservedThunk(old_data); } } @@ -385,8 +397,8 @@ void ArmBaseRelativePatcher::ResolveMethodCalls(uint32_t quick_code_offset, DCHECK(!unreserved_thunks_.empty()); DCHECK(!unprocessed_method_call_patches_.empty()); DCHECK(method_call_thunk_ != nullptr); - uint32_t max_positive_displacement = MaxPositiveDisplacement(ThunkType::kMethodCall); - uint32_t max_negative_displacement = MaxNegativeDisplacement(ThunkType::kMethodCall); + uint32_t max_positive_displacement = MaxPositiveDisplacement(GetMethodCallKey()); + uint32_t max_negative_displacement = MaxNegativeDisplacement(GetMethodCallKey()); // Process as many patches as possible, stop only on unresolved targets or calls too far back. while (!unprocessed_method_call_patches_.empty()) { MethodReference target_method = unprocessed_method_call_patches_.front().GetTargetMethod(); @@ -439,8 +451,8 @@ void ArmBaseRelativePatcher::ResolveMethodCalls(uint32_t quick_code_offset, } inline uint32_t ArmBaseRelativePatcher::CalculateMaxNextOffset(uint32_t patch_offset, - ThunkType type) { - return RoundDown(patch_offset + MaxPositiveDisplacement(type), + const ThunkKey& key) { + return RoundDown(patch_offset + MaxPositiveDisplacement(key), GetInstructionSetAlignment(instruction_set_)); } diff --git a/compiler/linker/arm/relative_patcher_arm_base.h b/compiler/linker/arm/relative_patcher_arm_base.h index 2cb1b6c535..fd204c05a6 100644 --- a/compiler/linker/arm/relative_patcher_arm_base.h +++ b/compiler/linker/arm/relative_patcher_arm_base.h @@ -42,21 +42,12 @@ class ArmBaseRelativePatcher : public RelativePatcher { enum class ThunkType { kMethodCall, // Method call thunk. - kBakerReadBarrierField, // Baker read barrier, load field or array element at known offset. - kBakerReadBarrierRoot, // Baker read barrier, GC root load. + kBakerReadBarrier, // Baker read barrier. }; - struct BakerReadBarrierOffsetParams { - uint32_t holder_reg; // Holder object for reading lock word. - uint32_t base_reg; // Base register, different from holder for large offset. - // If base differs from holder, it should be a pre-defined - // register to limit the number of thunks we need to emit. - // The offset is retrieved using introspection. - }; - - struct BakerReadBarrierRootParams { - uint32_t root_reg; // The register holding the GC root. - uint32_t dummy; + struct BakerReadBarrierParams { + uint32_t custom_value1; + uint32_t custom_value2; }; struct RawThunkParams { @@ -66,8 +57,8 @@ class ArmBaseRelativePatcher : public RelativePatcher { union ThunkParams { RawThunkParams raw_params; - BakerReadBarrierOffsetParams offset_params; - BakerReadBarrierRootParams root_params; + BakerReadBarrierParams baker_params; + static_assert(sizeof(raw_params) == sizeof(baker_params), "baker_params size check"); }; class ThunkKey { @@ -78,14 +69,9 @@ class ArmBaseRelativePatcher : public RelativePatcher { return type_; } - BakerReadBarrierOffsetParams GetOffsetParams() const { - DCHECK(type_ == ThunkType::kBakerReadBarrierField); - return params_.offset_params; - } - - BakerReadBarrierRootParams GetRootParams() const { - DCHECK(type_ == ThunkType::kBakerReadBarrierRoot); - return params_.root_params; + BakerReadBarrierParams GetBakerReadBarrierParams() const { + DCHECK(type_ == ThunkType::kBakerReadBarrier); + return params_.baker_params; } RawThunkParams GetRawParams() const { @@ -110,6 +96,9 @@ class ArmBaseRelativePatcher : public RelativePatcher { } }; + static ThunkKey GetMethodCallKey(); + static ThunkKey GetBakerThunkKey(const LinkerPatch& patch); + uint32_t ReserveSpaceInternal(uint32_t offset, const CompiledMethod* compiled_method, MethodReference method_ref, @@ -119,10 +108,9 @@ class ArmBaseRelativePatcher : public RelativePatcher { uint32_t CalculateMethodCallDisplacement(uint32_t patch_offset, uint32_t target_offset); - virtual ThunkKey GetBakerReadBarrierKey(const LinkerPatch& patch) = 0; virtual std::vector<uint8_t> CompileThunk(const ThunkKey& key) = 0; - virtual uint32_t MaxPositiveDisplacement(ThunkType type) = 0; - virtual uint32_t MaxNegativeDisplacement(ThunkType type) = 0; + virtual uint32_t MaxPositiveDisplacement(const ThunkKey& key) = 0; + virtual uint32_t MaxNegativeDisplacement(const ThunkKey& key) = 0; private: class ThunkData; @@ -132,7 +120,7 @@ class ArmBaseRelativePatcher : public RelativePatcher { void ResolveMethodCalls(uint32_t quick_code_offset, MethodReference method_ref); - uint32_t CalculateMaxNextOffset(uint32_t patch_offset, ThunkType type); + uint32_t CalculateMaxNextOffset(uint32_t patch_offset, const ThunkKey& key); RelativePatcherTargetProvider* const provider_; const InstructionSet instruction_set_; diff --git a/compiler/linker/arm/relative_patcher_thumb2.cc b/compiler/linker/arm/relative_patcher_thumb2.cc index 1a5d79ce70..ced52ff07a 100644 --- a/compiler/linker/arm/relative_patcher_thumb2.cc +++ b/compiler/linker/arm/relative_patcher_thumb2.cc @@ -16,9 +16,15 @@ #include "linker/arm/relative_patcher_thumb2.h" +#include "arch/arm/asm_support_arm.h" #include "art_method.h" #include "compiled_method.h" -#include "utils/arm/assembler_thumb2.h" +#include "entrypoints/quick/quick_entrypoints_enum.h" +#include "lock_word.h" +#include "mirror/object.h" +#include "mirror/array-inl.h" +#include "read_barrier.h" +#include "utils/arm/assembler_arm_vixl.h" namespace art { namespace linker { @@ -32,6 +38,12 @@ static constexpr int32_t kPcDisplacement = 4; constexpr uint32_t kMaxMethodCallPositiveDisplacement = (1u << 24) - 2 + kPcDisplacement; constexpr uint32_t kMaxMethodCallNegativeDisplacement = (1u << 24) - kPcDisplacement; +// Maximum positive and negative displacement for a conditional branch measured from the patch +// location. (Signed 21 bit displacement with the last bit 0 has range [-2^20, 2^20-2] measured +// from the Thumb2 PC pointing right after the B.cond, i.e. 4 bytes later than the patch location.) +constexpr uint32_t kMaxBcondPositiveDisplacement = (1u << 20) - 2u + kPcDisplacement; +constexpr uint32_t kMaxBcondNegativeDisplacement = (1u << 20) - kPcDisplacement; + Thumb2RelativePatcher::Thumb2RelativePatcher(RelativePatcherTargetProvider* provider) : ArmBaseRelativePatcher(provider, kThumb2) { } @@ -84,29 +96,225 @@ void Thumb2RelativePatcher::PatchPcRelativeReference(std::vector<uint8_t>* code, SetInsn32(code, literal_offset, insn); } -void Thumb2RelativePatcher::PatchBakerReadBarrierBranch(std::vector<uint8_t>* code ATTRIBUTE_UNUSED, - const LinkerPatch& patch ATTRIBUTE_UNUSED, - uint32_t patch_offset ATTRIBUTE_UNUSED) { - LOG(FATAL) << "UNIMPLEMENTED"; +void Thumb2RelativePatcher::PatchBakerReadBarrierBranch(std::vector<uint8_t>* code, + const LinkerPatch& patch, + uint32_t patch_offset) { + DCHECK_ALIGNED(patch_offset, 2u); + uint32_t literal_offset = patch.LiteralOffset(); + DCHECK_ALIGNED(literal_offset, 2u); + DCHECK_LT(literal_offset, code->size()); + uint32_t insn = GetInsn32(code, literal_offset); + DCHECK_EQ(insn, 0xf0408000); // BNE +0 (unpatched) + ThunkKey key = GetBakerThunkKey(patch); + if (kIsDebugBuild) { + const uint32_t encoded_data = key.GetBakerReadBarrierParams().custom_value1; + BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data); + // Check that the next instruction matches the expected LDR. + switch (kind) { + case BakerReadBarrierKind::kField: { + DCHECK_GE(code->size() - literal_offset, 8u); + uint32_t next_insn = GetInsn32(code, literal_offset + 4u); + // LDR (immediate) with correct base_reg. + CheckValidReg((next_insn >> 12) & 0xfu); // Check destination register. + const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data); + CHECK_EQ(next_insn & 0xffff0000u, 0xf8d00000u | (base_reg << 16)); + break; + } + case BakerReadBarrierKind::kArray: { + DCHECK_GE(code->size() - literal_offset, 8u); + uint32_t next_insn = GetInsn32(code, literal_offset + 4u); + // LDR (register) with correct base_reg, S=1 and option=011 (LDR Wt, [Xn, Xm, LSL #2]). + CheckValidReg((next_insn >> 12) & 0xfu); // Check destination register. + const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data); + CHECK_EQ(next_insn & 0xffff0ff0u, 0xf8500020u | (base_reg << 16)); + CheckValidReg(next_insn & 0xf); // Check index register + break; + } + case BakerReadBarrierKind::kGcRoot: { + DCHECK_GE(literal_offset, 4u); + uint32_t prev_insn = GetInsn32(code, literal_offset - 4u); + // LDR (immediate) with correct root_reg. + const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data); + CHECK_EQ(prev_insn & 0xfff0f000u, 0xf8d00000u | (root_reg << 12)); + break; + } + default: + LOG(FATAL) << "Unexpected type: " << static_cast<uint32_t>(key.GetType()); + UNREACHABLE(); + } + } + uint32_t target_offset = GetThunkTargetOffset(key, patch_offset); + DCHECK_ALIGNED(target_offset, 4u); + uint32_t disp = target_offset - (patch_offset + kPcDisplacement); + DCHECK((disp >> 20) == 0u || (disp >> 20) == 0xfffu); // 21-bit signed. + insn |= ((disp << (26 - 20)) & 0x04000000u) | // Shift bit 20 to 26, "S". + ((disp >> (19 - 11)) & 0x00000800u) | // Shift bit 19 to 13, "J1". + ((disp >> (18 - 13)) & 0x00002000u) | // Shift bit 18 to 11, "J2". + ((disp << (16 - 12)) & 0x003f0000u) | // Shift bits 12-17 to 16-25, "imm6". + ((disp >> (1 - 0)) & 0x000007ffu); // Shift bits 1-12 to 0-11, "imm11". + SetInsn32(code, literal_offset, insn); +} + +#define __ assembler.GetVIXLAssembler()-> + +static void EmitGrayCheckAndFastPath(arm::ArmVIXLAssembler& assembler, + vixl::aarch32::Register base_reg, + vixl::aarch32::MemOperand& lock_word, + vixl::aarch32::Label* slow_path) { + using namespace vixl::aarch32; // NOLINT(build/namespaces) + // Load the lock word containing the rb_state. + __ Ldr(ip, lock_word); + // Given the numeric representation, it's enough to check the low bit of the rb_state. + static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0"); + static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); + __ Tst(ip, Operand(LockWord::kReadBarrierStateMaskShifted)); + __ B(ne, slow_path, /* is_far_target */ false); + static_assert( + BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET, + "Field and array LDR offsets must be the same to reuse the same code."); + // Adjust the return address back to the LDR (1 instruction; 2 for heap poisoning). + static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4), + "Field LDR must be 1 instruction (4B) before the return address label; " + " 2 instructions (8B) for heap poisoning."); + __ Add(lr, lr, BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET); + // Introduce a dependency on the lock_word including rb_state, + // to prevent load-load reordering, and without using + // a memory barrier (which would be more expensive). + __ Add(base_reg, base_reg, Operand(ip, LSR, 32)); + __ Bx(lr); // And return back to the function. + // Note: The fake dependency is unnecessary for the slow path. } -ArmBaseRelativePatcher::ThunkKey Thumb2RelativePatcher::GetBakerReadBarrierKey( - const LinkerPatch& patch ATTRIBUTE_UNUSED) { - LOG(FATAL) << "UNIMPLEMENTED"; - UNREACHABLE(); +void Thumb2RelativePatcher::CompileBakerReadBarrierThunk(arm::ArmVIXLAssembler& assembler, + uint32_t encoded_data) { + using namespace vixl::aarch32; // NOLINT(build/namespaces) + BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data); + switch (kind) { + case BakerReadBarrierKind::kField: { + // Check if the holder is gray and, if not, add fake dependency to the base register + // and return to the LDR instruction to load the reference. Otherwise, use introspection + // to load the reference and call the entrypoint (in kBakerCcEntrypointRegister) + // that performs further checks on the reference and marks it if needed. + Register base_reg(BakerReadBarrierFirstRegField::Decode(encoded_data)); + CheckValidReg(base_reg.GetCode()); + Register holder_reg(BakerReadBarrierSecondRegField::Decode(encoded_data)); + CheckValidReg(holder_reg.GetCode()); + UseScratchRegisterScope temps(assembler.GetVIXLAssembler()); + temps.Exclude(ip); + // If base_reg differs from holder_reg, the offset was too large and we must have + // emitted an explicit null check before the load. Otherwise, we need to null-check + // the holder as we do not necessarily do that check before going to the thunk. + vixl::aarch32::Label throw_npe; + if (holder_reg.Is(base_reg)) { + __ CompareAndBranchIfZero(holder_reg, &throw_npe, /* is_far_target */ false); + } + vixl::aarch32::Label slow_path; + MemOperand lock_word(holder_reg, mirror::Object::MonitorOffset().Int32Value()); + EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path); + __ Bind(&slow_path); + const int32_t ldr_offset = /* Thumb state adjustment (LR contains Thumb state). */ -1 + + BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET; + MemOperand ldr_half_address(lr, ldr_offset + 2); + __ Ldrh(ip, ldr_half_address); // Load the LDR immediate half-word with "Rt | imm12". + __ Ubfx(ip, ip, 0, 12); // Extract the offset imm12. + __ Ldr(ip, MemOperand(base_reg, ip)); // Load the reference. + // Do not unpoison. With heap poisoning enabled, the entrypoint expects a poisoned reference. + __ Bx(Register(kBakerCcEntrypointRegister)); // Jump to the entrypoint. + if (holder_reg.Is(base_reg)) { + // Add null check slow path. The stack map is at the address pointed to by LR. + __ Bind(&throw_npe); + int32_t offset = GetThreadOffset<kArmPointerSize>(kQuickThrowNullPointer).Int32Value(); + __ Ldr(ip, MemOperand(/* Thread* */ vixl::aarch32::r9, offset)); + __ Bx(ip); + } + break; + } + case BakerReadBarrierKind::kArray: { + Register base_reg(BakerReadBarrierFirstRegField::Decode(encoded_data)); + CheckValidReg(base_reg.GetCode()); + DCHECK_EQ(kInvalidEncodedReg, BakerReadBarrierSecondRegField::Decode(encoded_data)); + UseScratchRegisterScope temps(assembler.GetVIXLAssembler()); + temps.Exclude(ip); + vixl::aarch32::Label slow_path; + int32_t data_offset = + mirror::Array::DataOffset(Primitive::ComponentSize(Primitive::kPrimNot)).Int32Value(); + MemOperand lock_word(base_reg, mirror::Object::MonitorOffset().Int32Value() - data_offset); + DCHECK_LT(lock_word.GetOffsetImmediate(), 0); + EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path); + __ Bind(&slow_path); + const int32_t ldr_offset = /* Thumb state adjustment (LR contains Thumb state). */ -1 + + BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET; + MemOperand ldr_address(lr, ldr_offset + 2); + __ Ldrb(ip, ldr_address); // Load the LDR (register) byte with "00 | imm2 | Rm", + // i.e. Rm+32 because the scale in imm2 is 2. + Register ep_reg(kBakerCcEntrypointRegister); // Insert ip to the entrypoint address to create + __ Bfi(ep_reg, ip, 3, 6); // a switch case target based on the index register. + __ Mov(ip, base_reg); // Move the base register to ip0. + __ Bx(ep_reg); // Jump to the entrypoint's array switch case. + break; + } + case BakerReadBarrierKind::kGcRoot: { + // Check if the reference needs to be marked and if so (i.e. not null, not marked yet + // and it does not have a forwarding address), call the correct introspection entrypoint; + // otherwise return the reference (or the extracted forwarding address). + // There is no gray bit check for GC roots. + Register root_reg(BakerReadBarrierFirstRegField::Decode(encoded_data)); + CheckValidReg(root_reg.GetCode()); + DCHECK_EQ(kInvalidEncodedReg, BakerReadBarrierSecondRegField::Decode(encoded_data)); + UseScratchRegisterScope temps(assembler.GetVIXLAssembler()); + temps.Exclude(ip); + vixl::aarch32::Label return_label, not_marked, forwarding_address; + __ CompareAndBranchIfZero(root_reg, &return_label, /* is_far_target */ false); + MemOperand lock_word(root_reg, mirror::Object::MonitorOffset().Int32Value()); + __ Ldr(ip, lock_word); + __ Tst(ip, LockWord::kMarkBitStateMaskShifted); + __ B(eq, ¬_marked); + __ Bind(&return_label); + __ Bx(lr); + __ Bind(¬_marked); + static_assert(LockWord::kStateShift == 30 && LockWord::kStateForwardingAddress == 3, + "To use 'CMP ip, #modified-immediate; BHS', we need the lock word state in " + " the highest bits and the 'forwarding address' state to have all bits set"); + __ Cmp(ip, Operand(0xc0000000)); + __ B(hs, &forwarding_address); + // Adjust the art_quick_read_barrier_mark_introspection address in kBakerCcEntrypointRegister + // to art_quick_read_barrier_mark_introspection_gc_roots. + Register ep_reg(kBakerCcEntrypointRegister); + __ Add(ep_reg, ep_reg, Operand(BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRYPOINT_OFFSET)); + __ Mov(ip, root_reg); + __ Bx(ep_reg); + __ Bind(&forwarding_address); + __ Lsl(root_reg, ip, LockWord::kForwardingAddressShift); + __ Bx(lr); + break; + } + default: + LOG(FATAL) << "Unexpected kind: " << static_cast<uint32_t>(kind); + UNREACHABLE(); + } } std::vector<uint8_t> Thumb2RelativePatcher::CompileThunk(const ThunkKey& key) { - DCHECK(key.GetType() == ThunkType::kMethodCall); - // The thunk just uses the entry point in the ArtMethod. This works even for calls - // to the generic JNI and interpreter trampolines. ArenaPool pool; ArenaAllocator arena(&pool); - arm::Thumb2Assembler assembler(&arena); - assembler.LoadFromOffset( - arm::kLoadWord, arm::PC, arm::R0, - ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize).Int32Value()); - assembler.bkpt(0); + arm::ArmVIXLAssembler assembler(&arena); + + switch (key.GetType()) { + case ThunkType::kMethodCall: + // The thunk just uses the entry point in the ArtMethod. This works even for calls + // to the generic JNI and interpreter trampolines. + assembler.LoadFromOffset( + arm::kLoadWord, + vixl::aarch32::pc, + vixl::aarch32::r0, + ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize).Int32Value()); + __ Bkpt(0); + break; + case ThunkType::kBakerReadBarrier: + CompileBakerReadBarrierThunk(assembler, key.GetBakerReadBarrierParams().custom_value1); + break; + } + assembler.FinalizeCode(); std::vector<uint8_t> thunk_code(assembler.CodeSize()); MemoryRegion code(thunk_code.data(), thunk_code.size()); @@ -114,14 +322,24 @@ std::vector<uint8_t> Thumb2RelativePatcher::CompileThunk(const ThunkKey& key) { return thunk_code; } -uint32_t Thumb2RelativePatcher::MaxPositiveDisplacement(ThunkType type) { - DCHECK(type == ThunkType::kMethodCall); - return kMaxMethodCallPositiveDisplacement; +#undef __ + +uint32_t Thumb2RelativePatcher::MaxPositiveDisplacement(const ThunkKey& key) { + switch (key.GetType()) { + case ThunkType::kMethodCall: + return kMaxMethodCallPositiveDisplacement; + case ThunkType::kBakerReadBarrier: + return kMaxBcondPositiveDisplacement; + } } -uint32_t Thumb2RelativePatcher::MaxNegativeDisplacement(ThunkType type) { - DCHECK(type == ThunkType::kMethodCall); - return kMaxMethodCallNegativeDisplacement; +uint32_t Thumb2RelativePatcher::MaxNegativeDisplacement(const ThunkKey& key) { + switch (key.GetType()) { + case ThunkType::kMethodCall: + return kMaxMethodCallNegativeDisplacement; + case ThunkType::kBakerReadBarrier: + return kMaxBcondNegativeDisplacement; + } } void Thumb2RelativePatcher::SetInsn32(std::vector<uint8_t>* code, uint32_t offset, uint32_t value) { diff --git a/compiler/linker/arm/relative_patcher_thumb2.h b/compiler/linker/arm/relative_patcher_thumb2.h index ab37802d0f..7fad245856 100644 --- a/compiler/linker/arm/relative_patcher_thumb2.h +++ b/compiler/linker/arm/relative_patcher_thumb2.h @@ -17,13 +17,46 @@ #ifndef ART_COMPILER_LINKER_ARM_RELATIVE_PATCHER_THUMB2_H_ #define ART_COMPILER_LINKER_ARM_RELATIVE_PATCHER_THUMB2_H_ +#include "arch/arm/registers_arm.h" +#include "base/array_ref.h" +#include "base/bit_field.h" +#include "base/bit_utils.h" #include "linker/arm/relative_patcher_arm_base.h" namespace art { + +namespace arm { +class ArmVIXLAssembler; +} // namespace arm + namespace linker { class Thumb2RelativePatcher FINAL : public ArmBaseRelativePatcher { public: + static constexpr uint32_t kBakerCcEntrypointRegister = 4u; + + static uint32_t EncodeBakerReadBarrierFieldData(uint32_t base_reg, uint32_t holder_reg) { + CheckValidReg(base_reg); + CheckValidReg(holder_reg); + return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kField) | + BakerReadBarrierFirstRegField::Encode(base_reg) | + BakerReadBarrierSecondRegField::Encode(holder_reg); + } + + static uint32_t EncodeBakerReadBarrierArrayData(uint32_t base_reg) { + CheckValidReg(base_reg); + return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kArray) | + BakerReadBarrierFirstRegField::Encode(base_reg) | + BakerReadBarrierSecondRegField::Encode(kInvalidEncodedReg); + } + + static uint32_t EncodeBakerReadBarrierGcRootData(uint32_t root_reg) { + CheckValidReg(root_reg); + return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kGcRoot) | + BakerReadBarrierFirstRegField::Encode(root_reg) | + BakerReadBarrierSecondRegField::Encode(kInvalidEncodedReg); + } + explicit Thumb2RelativePatcher(RelativePatcherTargetProvider* provider); void PatchCall(std::vector<uint8_t>* code, @@ -39,12 +72,36 @@ class Thumb2RelativePatcher FINAL : public ArmBaseRelativePatcher { uint32_t patch_offset) OVERRIDE; protected: - ThunkKey GetBakerReadBarrierKey(const LinkerPatch& patch) OVERRIDE; std::vector<uint8_t> CompileThunk(const ThunkKey& key) OVERRIDE; - uint32_t MaxPositiveDisplacement(ThunkType type) OVERRIDE; - uint32_t MaxNegativeDisplacement(ThunkType type) OVERRIDE; + uint32_t MaxPositiveDisplacement(const ThunkKey& key) OVERRIDE; + uint32_t MaxNegativeDisplacement(const ThunkKey& key) OVERRIDE; private: + static constexpr uint32_t kInvalidEncodedReg = /* pc is invalid */ 15u; + + enum class BakerReadBarrierKind : uint8_t { + kField, // Field get or array get with constant offset (i.e. constant index). + kArray, // Array get with index in register. + kGcRoot, // GC root load. + kLast + }; + + static constexpr size_t kBitsForBakerReadBarrierKind = + MinimumBitsToStore(static_cast<size_t>(BakerReadBarrierKind::kLast)); + static constexpr size_t kBitsForRegister = 4u; + using BakerReadBarrierKindField = + BitField<BakerReadBarrierKind, 0, kBitsForBakerReadBarrierKind>; + using BakerReadBarrierFirstRegField = + BitField<uint32_t, kBitsForBakerReadBarrierKind, kBitsForRegister>; + using BakerReadBarrierSecondRegField = + BitField<uint32_t, kBitsForBakerReadBarrierKind + kBitsForRegister, kBitsForRegister>; + + static void CheckValidReg(uint32_t reg) { + DCHECK(reg < 12u && reg != kBakerCcEntrypointRegister); + } + + void CompileBakerReadBarrierThunk(arm::ArmVIXLAssembler& assembler, uint32_t encoded_data); + void SetInsn32(std::vector<uint8_t>* code, uint32_t offset, uint32_t value); static uint32_t GetInsn32(ArrayRef<const uint8_t> code, uint32_t offset); diff --git a/compiler/linker/arm/relative_patcher_thumb2_test.cc b/compiler/linker/arm/relative_patcher_thumb2_test.cc index f08270d934..2e28349231 100644 --- a/compiler/linker/arm/relative_patcher_thumb2_test.cc +++ b/compiler/linker/arm/relative_patcher_thumb2_test.cc @@ -14,8 +14,12 @@ * limitations under the License. */ +#include "base/casts.h" #include "linker/relative_patcher_test.h" #include "linker/arm/relative_patcher_thumb2.h" +#include "lock_word.h" +#include "mirror/array-inl.h" +#include "mirror/object.h" #include "oat_quick_method_header.h" namespace art { @@ -34,13 +38,99 @@ class Thumb2RelativePatcherTest : public RelativePatcherTest { static const ArrayRef<const uint8_t> kUnpatchedPcRelativeCode; static const uint32_t kPcInsnOffset; + // The PC in Thumb mode is 4 bytes after the instruction location. + static constexpr uint32_t kPcAdjustment = 4u; + // Branches within range [-256, 256) can be created from these by adding the low 8 bits. - static constexpr uint32_t kBlPlus0 = 0xf000f800; - static constexpr uint32_t kBlMinus256 = 0xf7ffff00; + static constexpr uint32_t kBlPlus0 = 0xf000f800u; + static constexpr uint32_t kBlMinus256 = 0xf7ffff00u; // Special BL values. - static constexpr uint32_t kBlPlusMax = 0xf3ffd7ff; - static constexpr uint32_t kBlMinusMax = 0xf400d000; + static constexpr uint32_t kBlPlusMax = 0xf3ffd7ffu; + static constexpr uint32_t kBlMinusMax = 0xf400d000u; + + // BNE +0, 32-bit, encoding T3. Bits 0-10, 11, 13, 16-21, 26 are placeholder for target offset. + static constexpr uint32_t kBneWPlus0 = 0xf0408000u; + + // LDR immediate, 32-bit, encoding T3. Bits 0-11 are offset, 12-15 are Rt, 16-20 are Rn. + static constexpr uint32_t kLdrWInsn = 0xf8d00000u; + + // LDR immediate, negative offset, encoding T4. Bits 0-7 are the offset to subtract. + static constexpr uint32_t kLdrNegativeOffset = 0xf8500c00u; + + // LDR register, lsl #2. Bits 4-5 are the imm2, i.e. the lsl shift. + static constexpr uint32_t kLdrRegLsl2 = 0xf8500020u; + + // NOP instructions. + static constexpr uint32_t kNopInsn = 0xbf00u; + static constexpr uint32_t kNopWInsn = 0xf3af8000u; + + void InsertInsn(std::vector<uint8_t>* code, size_t pos, uint32_t insn) { + CHECK_LE(pos, code->size()); + if (IsUint<16>(insn)) { + const uint8_t insn_code[] = { + static_cast<uint8_t>(insn), + static_cast<uint8_t>(insn >> 8), + }; + static_assert(sizeof(insn_code) == 2u, "Invalid sizeof(insn_code)."); + code->insert(code->begin() + pos, insn_code, insn_code + sizeof(insn_code)); + } else { + const uint8_t insn_code[] = { + static_cast<uint8_t>(insn >> 16), + static_cast<uint8_t>(insn >> 24), + static_cast<uint8_t>(insn), + static_cast<uint8_t>(insn >> 8), + }; + static_assert(sizeof(insn_code) == 4u, "Invalid sizeof(insn_code)."); + code->insert(code->begin() + pos, insn_code, insn_code + sizeof(insn_code)); + } + } + + void PushBackInsn(std::vector<uint8_t>* code, uint32_t insn) { + InsertInsn(code, code->size(), insn); + } + + std::vector<uint8_t> GenNops(size_t num_nops) { + std::vector<uint8_t> result; + result.reserve(num_nops * 2u); + for (size_t i = 0; i != num_nops; ++i) { + PushBackInsn(&result, kNopInsn); + } + return result; + } + + std::vector<uint8_t> RawCode(std::initializer_list<uint32_t> insns) { + std::vector<uint8_t> raw_code; + size_t number_of_16_bit_insns = + std::count_if(insns.begin(), insns.end(), [](uint32_t x) { return IsUint<16>(x); }); + raw_code.reserve(insns.size() * 4u - number_of_16_bit_insns * 2u); + for (uint32_t insn : insns) { + PushBackInsn(&raw_code, insn); + } + return raw_code; + } + + uint32_t BneWWithOffset(uint32_t bne_offset, uint32_t target_offset) { + if (!IsAligned<2u>(bne_offset)) { + LOG(ERROR) << "Unaligned bne_offset: " << bne_offset; + return 0xffffffffu; // Fails code diff later. + } + if (!IsAligned<2u>(target_offset)) { + LOG(ERROR) << "Unaligned target_offset: " << target_offset; + return 0xffffffffu; // Fails code diff later. + } + uint32_t diff = target_offset - bne_offset - kPcAdjustment; + DCHECK_ALIGNED(diff, 2u); + if ((diff >> 20) != 0 && (diff >> 20) != 0xfffu) { + LOG(ERROR) << "Target out of range: " << diff; + return 0xffffffffu; // Fails code diff later. + } + return kBneWPlus0 | ((diff >> 1) & 0x7ffu) // imm11 + | (((diff >> 12) & 0x3fu) << 16) // imm6 + | (((diff >> 18) & 1) << 13) // J1 + | (((diff >> 19) & 1) << 11) // J2 + | (((diff >> 20) & 1) << 26); // S + } bool Create2MethodsWithGap(const ArrayRef<const uint8_t>& method1_code, const ArrayRef<const LinkerPatch>& method1_patches, @@ -95,9 +185,7 @@ class Thumb2RelativePatcherTest : public RelativePatcherTest { } std::vector<uint8_t> CompileMethodCallThunk() { - ArmBaseRelativePatcher::ThunkKey key( - ArmBaseRelativePatcher::ThunkType::kMethodCall, - ArmBaseRelativePatcher::ThunkParams{{ 0, 0 }}); // NOLINT(whitespace/braces) + ArmBaseRelativePatcher::ThunkKey key = ArmBaseRelativePatcher::GetMethodCallKey(); return static_cast<Thumb2RelativePatcher*>(patcher_.get())->CompileThunk(key); } @@ -125,19 +213,54 @@ class Thumb2RelativePatcherTest : public RelativePatcherTest { std::vector<uint8_t> result; result.reserve(num_nops * 2u + 4u); for (size_t i = 0; i != num_nops; ++i) { - result.push_back(0x00); - result.push_back(0xbf); + PushBackInsn(&result, kNopInsn); } - result.push_back(static_cast<uint8_t>(bl >> 16)); - result.push_back(static_cast<uint8_t>(bl >> 24)); - result.push_back(static_cast<uint8_t>(bl)); - result.push_back(static_cast<uint8_t>(bl >> 8)); + PushBackInsn(&result, bl); return result; } void TestDexCacheReference(uint32_t dex_cache_arrays_begin, uint32_t element_offset); void TestStringReference(uint32_t string_offset); void CheckPcRelativePatch(const ArrayRef<const LinkerPatch>& patches, uint32_t target_offset); + + std::vector<uint8_t> CompileBakerOffsetThunk(uint32_t base_reg, uint32_t holder_reg) { + const LinkerPatch patch = LinkerPatch::BakerReadBarrierBranchPatch( + 0u, Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(base_reg, holder_reg)); + ArmBaseRelativePatcher::ThunkKey key = ArmBaseRelativePatcher::GetBakerThunkKey(patch); + return down_cast<Thumb2RelativePatcher*>(patcher_.get())->CompileThunk(key); + } + + std::vector<uint8_t> CompileBakerArrayThunk(uint32_t base_reg) { + LinkerPatch patch = LinkerPatch::BakerReadBarrierBranchPatch( + 0u, Thumb2RelativePatcher::EncodeBakerReadBarrierArrayData(base_reg)); + ArmBaseRelativePatcher::ThunkKey key = ArmBaseRelativePatcher::GetBakerThunkKey(patch); + return down_cast<Thumb2RelativePatcher*>(patcher_.get())->CompileThunk(key); + } + + std::vector<uint8_t> CompileBakerGcRootThunk(uint32_t root_reg) { + LinkerPatch patch = LinkerPatch::BakerReadBarrierBranchPatch( + 0u, Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg)); + ArmBaseRelativePatcher::ThunkKey key = ArmBaseRelativePatcher::GetBakerThunkKey(patch); + return down_cast<Thumb2RelativePatcher*>(patcher_.get())->CompileThunk(key); + } + + uint32_t GetOutputInsn32(uint32_t offset) { + CHECK_LE(offset, output_.size()); + CHECK_GE(output_.size() - offset, 4u); + return (static_cast<uint32_t>(output_[offset]) << 16) | + (static_cast<uint32_t>(output_[offset + 1]) << 24) | + (static_cast<uint32_t>(output_[offset + 2]) << 0) | + (static_cast<uint32_t>(output_[offset + 3]) << 8); + } + + uint16_t GetOutputInsn16(uint32_t offset) { + CHECK_LE(offset, output_.size()); + CHECK_GE(output_.size() - offset, 2u); + return (static_cast<uint32_t>(output_[offset]) << 0) | + (static_cast<uint32_t>(output_[offset + 1]) << 8); + } + + void TestBakerField(uint32_t offset, uint32_t ref_reg); }; const uint8_t Thumb2RelativePatcherTest::kCallRawCode[] = { @@ -164,7 +287,7 @@ const uint32_t Thumb2RelativePatcherTest::kPcInsnOffset = 8u; void Thumb2RelativePatcherTest::TestDexCacheReference(uint32_t dex_cache_arrays_begin, uint32_t element_offset) { dex_cache_arrays_begin_ = dex_cache_arrays_begin; - LinkerPatch patches[] = { + const LinkerPatch patches[] = { LinkerPatch::DexCacheArrayPatch(0u, nullptr, kPcInsnOffset, element_offset), LinkerPatch::DexCacheArrayPatch(4u, nullptr, kPcInsnOffset, element_offset), }; @@ -175,7 +298,7 @@ void Thumb2RelativePatcherTest::TestDexCacheReference(uint32_t dex_cache_arrays_ void Thumb2RelativePatcherTest::TestStringReference(uint32_t string_offset) { constexpr uint32_t kStringIndex = 1u; string_index_to_offset_map_.Put(kStringIndex, string_offset); - LinkerPatch patches[] = { + const LinkerPatch patches[] = { LinkerPatch::RelativeStringPatch(0u, nullptr, kPcInsnOffset, kStringIndex), LinkerPatch::RelativeStringPatch(4u, nullptr, kPcInsnOffset, kStringIndex), }; @@ -214,7 +337,7 @@ void Thumb2RelativePatcherTest::CheckPcRelativePatch(const ArrayRef<const Linker } TEST_F(Thumb2RelativePatcherTest, CallSelf) { - LinkerPatch patches[] = { + const LinkerPatch patches[] = { LinkerPatch::RelativeCodePatch(0u, nullptr, 1u), }; AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(patches)); @@ -227,11 +350,11 @@ TEST_F(Thumb2RelativePatcherTest, CallSelf) { } TEST_F(Thumb2RelativePatcherTest, CallOther) { - LinkerPatch method1_patches[] = { + const LinkerPatch method1_patches[] = { LinkerPatch::RelativeCodePatch(0u, nullptr, 2u), }; AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(method1_patches)); - LinkerPatch method2_patches[] = { + const LinkerPatch method2_patches[] = { LinkerPatch::RelativeCodePatch(0u, nullptr, 1u), }; AddCompiledMethod(MethodRef(2u), kCallCode, ArrayRef<const LinkerPatch>(method2_patches)); @@ -254,7 +377,7 @@ TEST_F(Thumb2RelativePatcherTest, CallOther) { } TEST_F(Thumb2RelativePatcherTest, CallTrampoline) { - LinkerPatch patches[] = { + const LinkerPatch patches[] = { LinkerPatch::RelativeCodePatch(0u, nullptr, 2u), }; AddCompiledMethod(MethodRef(1u), kCallCode, ArrayRef<const LinkerPatch>(patches)); @@ -274,7 +397,7 @@ TEST_F(Thumb2RelativePatcherTest, CallTrampolineTooFar) { constexpr uint32_t bl_offset_in_method3 = 3u * 2u; // After NOPs. ArrayRef<const uint8_t> method3_code(method3_raw_code); ASSERT_EQ(bl_offset_in_method3 + 4u, method3_code.size()); - LinkerPatch method3_patches[] = { + const LinkerPatch method3_patches[] = { LinkerPatch::RelativeCodePatch(bl_offset_in_method3, nullptr, missing_method_index), }; @@ -303,7 +426,7 @@ TEST_F(Thumb2RelativePatcherTest, CallOtherAlmostTooFarAfter) { constexpr uint32_t bl_offset_in_method1 = 3u * 2u; // After NOPs. ArrayRef<const uint8_t> method1_code(method1_raw_code); ASSERT_EQ(bl_offset_in_method1 + 4u, method1_code.size()); - LinkerPatch method1_patches[] = { + const LinkerPatch method1_patches[] = { LinkerPatch::RelativeCodePatch(bl_offset_in_method1, nullptr, 3u), }; @@ -325,7 +448,7 @@ TEST_F(Thumb2RelativePatcherTest, CallOtherAlmostTooFarBefore) { constexpr uint32_t bl_offset_in_method3 = 2u * 2u; // After NOPs. ArrayRef<const uint8_t> method3_code(method3_raw_code); ASSERT_EQ(bl_offset_in_method3 + 4u, method3_code.size()); - LinkerPatch method3_patches[] = { + const LinkerPatch method3_patches[] = { LinkerPatch::RelativeCodePatch(bl_offset_in_method3, nullptr, 1u), }; @@ -347,7 +470,7 @@ TEST_F(Thumb2RelativePatcherTest, CallOtherJustTooFarAfter) { constexpr uint32_t bl_offset_in_method1 = 2u * 2u; // After NOPs. ArrayRef<const uint8_t> method1_code(method1_raw_code); ASSERT_EQ(bl_offset_in_method1 + 4u, method1_code.size()); - LinkerPatch method1_patches[] = { + const LinkerPatch method1_patches[] = { LinkerPatch::RelativeCodePatch(bl_offset_in_method1, nullptr, 3u), }; @@ -382,7 +505,7 @@ TEST_F(Thumb2RelativePatcherTest, CallOtherJustTooFarBefore) { constexpr uint32_t bl_offset_in_method3 = 3u * 2u; // After NOPs. ArrayRef<const uint8_t> method3_code(method3_raw_code); ASSERT_EQ(bl_offset_in_method3 + 4u, method3_code.size()); - LinkerPatch method3_patches[] = { + const LinkerPatch method3_patches[] = { LinkerPatch::RelativeCodePatch(bl_offset_in_method3, nullptr, 1u), }; @@ -445,5 +568,535 @@ TEST_F(Thumb2RelativePatcherTest, StringReference4) { ASSERT_LT(GetMethodOffset(1u), 0xfcu); } +void Thumb2RelativePatcherTest::TestBakerField(uint32_t offset, uint32_t ref_reg) { + uint32_t valid_regs[] = { + 0, 1, 2, 3, 5, 6, 7, // R4 is reserved for entrypoint address. + 8, 9, 10, 11, // IP, SP, LR and PC are reserved. + }; + DCHECK_ALIGNED(offset, 4u); + DCHECK_LT(offset, 4 * KB); + constexpr size_t kMethodCodeSize = 8u; + constexpr size_t kLiteralOffset = 0u; + uint32_t method_idx = 0u; + for (uint32_t base_reg : valid_regs) { + for (uint32_t holder_reg : valid_regs) { + uint32_t ldr = kLdrWInsn | offset | (base_reg << 16) | (ref_reg << 12); + const std::vector<uint8_t> raw_code = RawCode({kBneWPlus0, ldr}); + ASSERT_EQ(kMethodCodeSize, raw_code.size()); + ArrayRef<const uint8_t> code(raw_code); + uint32_t encoded_data = + Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(base_reg, holder_reg); + const LinkerPatch patches[] = { + LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset, encoded_data), + }; + ++method_idx; + AddCompiledMethod(MethodRef(method_idx), code, ArrayRef<const LinkerPatch>(patches)); + } + } + Link(); + + // All thunks are at the end. + uint32_t thunk_offset = GetMethodOffset(method_idx) + RoundUp(kMethodCodeSize, kArmAlignment); + method_idx = 0u; + for (uint32_t base_reg : valid_regs) { + for (uint32_t holder_reg : valid_regs) { + ++method_idx; + uint32_t bne = BneWWithOffset(GetMethodOffset(method_idx) + kLiteralOffset, thunk_offset); + uint32_t ldr = kLdrWInsn | offset | (base_reg << 16) | (ref_reg << 12); + const std::vector<uint8_t> expected_code = RawCode({bne, ldr}); + ASSERT_EQ(kMethodCodeSize, expected_code.size()) << "bne=0x" << std::hex << bne; + ASSERT_TRUE( + CheckLinkedMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(expected_code))); + + std::vector<uint8_t> expected_thunk = CompileBakerOffsetThunk(base_reg, holder_reg); + ASSERT_GT(output_.size(), thunk_offset); + ASSERT_GE(output_.size() - thunk_offset, expected_thunk.size()); + ArrayRef<const uint8_t> compiled_thunk(output_.data() + thunk_offset, + expected_thunk.size()); + if (ArrayRef<const uint8_t>(expected_thunk) != compiled_thunk) { + DumpDiff(ArrayRef<const uint8_t>(expected_thunk), compiled_thunk); + ASSERT_TRUE(false); + } + + size_t gray_check_offset = thunk_offset; + if (holder_reg == base_reg) { + // Verify that the null-check uses the correct register, i.e. holder_reg. + if (holder_reg < 8) { + ASSERT_GE(output_.size() - gray_check_offset, 2u); + ASSERT_EQ(0xb100 | holder_reg, GetOutputInsn16(thunk_offset) & 0xfd07u); + gray_check_offset +=2u; + } else { + ASSERT_GE(output_.size() - gray_check_offset, 6u); + ASSERT_EQ(0xf1b00f00u | (holder_reg << 16), GetOutputInsn32(thunk_offset) & 0xfbff8f00u); + ASSERT_EQ(0xd000u, GetOutputInsn16(thunk_offset + 4u) & 0xff00u); // BEQ + gray_check_offset += 6u; + } + } + // Verify that the lock word for gray bit check is loaded from the holder address. + ASSERT_GE(output_.size() - gray_check_offset, + 4u * /* 32-bit instructions */ 4u + 2u * /* 16-bit instructions */ 2u); + const uint32_t load_lock_word = + kLdrWInsn | + (holder_reg << 16) | + (/* IP */ 12 << 12) | + mirror::Object::MonitorOffset().Uint32Value(); + ASSERT_EQ(load_lock_word, GetOutputInsn32(gray_check_offset)); + // Verify the gray bit check. + DCHECK_GE(LockWord::kReadBarrierStateShift, 8u); // ROR modified immediate. + uint32_t ror_shift = 7 + (32 - LockWord::kReadBarrierStateShift); + const uint32_t tst_gray_bit_without_offset = + 0xf0100f00 | (/* IP */ 12 << 16) + | (((ror_shift >> 4) & 1) << 26) // i + | (((ror_shift >> 1) & 7) << 12) // imm3 + | ((ror_shift & 1) << 7); // imm8, ROR('1':imm8<7:0>, ror_shift). + EXPECT_EQ(tst_gray_bit_without_offset, GetOutputInsn32(gray_check_offset + 4u)); + EXPECT_EQ(0xd100u, GetOutputInsn16(gray_check_offset + 8u) & 0xff00u); // BNE + // Verify the fake dependency (skip "ADD LR, LR, #ldr_offset"). + const uint32_t fake_dependency = + 0xeb000010 | // ADD Rd, Rn, Rm, LSR 32 (type=01, imm3=000, imm2=00) + (/* IP */ 12) | // Rm = IP + (base_reg << 16) | // Rn = base_reg + (base_reg << 8); // Rd = base_reg + EXPECT_EQ(fake_dependency, GetOutputInsn32(gray_check_offset + 14u)); + // Do not check the rest of the implementation. + + // The next thunk follows on the next aligned offset. + thunk_offset += RoundUp(expected_thunk.size(), kArmAlignment); + } + } +} + +#define TEST_BAKER_FIELD(offset, ref_reg) \ + TEST_F(Thumb2RelativePatcherTest, \ + BakerOffset##offset##_##ref_reg) { \ + TestBakerField(offset, ref_reg); \ + } + +TEST_BAKER_FIELD(/* offset */ 0, /* ref_reg */ 0) +TEST_BAKER_FIELD(/* offset */ 8, /* ref_reg */ 7) +TEST_BAKER_FIELD(/* offset */ 0xffc, /* ref_reg */ 11) + +TEST_F(Thumb2RelativePatcherTest, BakerOffsetThunkInTheMiddle) { + // One thunk in the middle with maximum distance branches to it from both sides. + // Use offset = 0, base_reg = 0, ref_reg = 0, the LDR is simply `kLdrWInsn`. + constexpr uint32_t kLiteralOffset1 = 6u; + const std::vector<uint8_t> raw_code1 = RawCode({kNopWInsn, kNopInsn, kBneWPlus0, kLdrWInsn}); + ArrayRef<const uint8_t> code1(raw_code1); + uint32_t encoded_data = + Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(/* base_reg */ 0, /* holder_reg */ 0); + const LinkerPatch patches1[] = { + LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset1, encoded_data), + }; + AddCompiledMethod(MethodRef(1u), code1, ArrayRef<const LinkerPatch>(patches1)); + + constexpr uint32_t expected_thunk_offset = + kLiteralOffset1 + kPcAdjustment + /* kMaxBcondPositiveDisplacement */ ((1 << 20) - 2u); + static_assert(IsAligned<kArmAlignment>(expected_thunk_offset), "Target offset must be aligned."); + size_t filler1_size = expected_thunk_offset - + RoundUp(raw_code1.size() + sizeof(OatQuickMethodHeader), kArmAlignment); + std::vector<uint8_t> raw_filler1_code = GenNops(filler1_size / 2u); + ArrayRef<const uint8_t> filler1_code(raw_filler1_code); + AddCompiledMethod(MethodRef(2u), filler1_code); + + // Enforce thunk reservation with a tiny method. + AddCompiledMethod(MethodRef(3u), kNopCode); + + constexpr uint32_t kLiteralOffset2 = 4; + static_assert(IsAligned<kArmAlignment>(kLiteralOffset2 + kPcAdjustment), + "PC for BNE must be aligned."); + + // Allow reaching the thunk from the very beginning of a method almost 1MiB away. Backward branch + // reaches the full 1MiB but we need to take PC adjustment into account. Things to subtract: + // - thunk size and method 3 pre-header, rounded up (padding in between if needed) + // - method 3 code and method 4 pre-header, rounded up (padding in between if needed) + // - method 4 header (let there be no padding between method 4 code and method 5 pre-header). + size_t thunk_size = CompileBakerOffsetThunk(/* base_reg */ 0, /* holder_reg */ 0).size(); + size_t filler2_size = + 1 * MB - (kLiteralOffset2 + kPcAdjustment) + - RoundUp(thunk_size + sizeof(OatQuickMethodHeader), kArmAlignment) + - RoundUp(kNopCode.size() + sizeof(OatQuickMethodHeader), kArmAlignment) + - sizeof(OatQuickMethodHeader); + std::vector<uint8_t> raw_filler2_code = GenNops(filler2_size / 2u); + ArrayRef<const uint8_t> filler2_code(raw_filler2_code); + AddCompiledMethod(MethodRef(4u), filler2_code); + + const std::vector<uint8_t> raw_code2 = RawCode({kNopWInsn, kBneWPlus0, kLdrWInsn}); + ArrayRef<const uint8_t> code2(raw_code2); + const LinkerPatch patches2[] = { + LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset2, encoded_data), + }; + AddCompiledMethod(MethodRef(5u), code2, ArrayRef<const LinkerPatch>(patches2)); + + Link(); + + uint32_t first_method_offset = GetMethodOffset(1u); + uint32_t last_method_offset = GetMethodOffset(5u); + EXPECT_EQ(2 * MB, last_method_offset - first_method_offset); + + const uint32_t bne_max_forward = kBneWPlus0 | 0x003f2fff; + const uint32_t bne_max_backward = kBneWPlus0 | 0x04000000; + const std::vector<uint8_t> expected_code1 = + RawCode({kNopWInsn, kNopInsn, bne_max_forward, kLdrWInsn}); + const std::vector<uint8_t> expected_code2 = RawCode({kNopWInsn, bne_max_backward, kLdrWInsn}); + ASSERT_TRUE(CheckLinkedMethod(MethodRef(1), ArrayRef<const uint8_t>(expected_code1))); + ASSERT_TRUE(CheckLinkedMethod(MethodRef(5), ArrayRef<const uint8_t>(expected_code2))); +} + +TEST_F(Thumb2RelativePatcherTest, BakerOffsetThunkBeforeFiller) { + // Based on the first part of BakerOffsetThunkInTheMiddle but the BNE is one instruction + // earlier, so the thunk is emitted before the filler. + // Use offset = 0, base_reg = 0, ref_reg = 0, the LDR is simply `kLdrWInsn`. + constexpr uint32_t kLiteralOffset1 = 4u; + const std::vector<uint8_t> raw_code1 = RawCode({kNopWInsn, kBneWPlus0, kLdrWInsn, kNopInsn}); + ArrayRef<const uint8_t> code1(raw_code1); + uint32_t encoded_data = + Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(/* base_reg */ 0, /* holder_reg */ 0); + const LinkerPatch patches1[] = { + LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset1, encoded_data), + }; + AddCompiledMethod(MethodRef(1u), code1, ArrayRef<const LinkerPatch>(patches1)); + + constexpr uint32_t expected_thunk_offset = + kLiteralOffset1 + kPcAdjustment + /* kMaxBcondPositiveDisplacement + 2 */ (1u << 20); + static_assert(IsAligned<kArmAlignment>(expected_thunk_offset), "Target offset must be aligned."); + size_t filler1_size = expected_thunk_offset - + RoundUp(raw_code1.size() + sizeof(OatQuickMethodHeader), kArmAlignment); + std::vector<uint8_t> raw_filler1_code = GenNops(filler1_size / 2u); + ArrayRef<const uint8_t> filler1_code(raw_filler1_code); + AddCompiledMethod(MethodRef(2u), filler1_code); + + Link(); + + const uint32_t bne = BneWWithOffset(kLiteralOffset1, RoundUp(raw_code1.size(), kArmAlignment)); + const std::vector<uint8_t> expected_code1 = RawCode({kNopWInsn, bne, kLdrWInsn, kNopInsn}); + ASSERT_TRUE(CheckLinkedMethod(MethodRef(1), ArrayRef<const uint8_t>(expected_code1))); +} + +TEST_F(Thumb2RelativePatcherTest, BakerOffsetThunkInTheMiddleUnreachableFromLast) { + // Based on the BakerOffsetThunkInTheMiddle but the BNE in the last method is preceded + // by NOP and cannot reach the thunk in the middle, so we emit an extra thunk at the end. + // Use offset = 0, base_reg = 0, ref_reg = 0, the LDR is simply `kLdrWInsn`. + constexpr uint32_t kLiteralOffset1 = 6u; + const std::vector<uint8_t> raw_code1 = RawCode({kNopWInsn, kNopInsn, kBneWPlus0, kLdrWInsn}); + ArrayRef<const uint8_t> code1(raw_code1); + uint32_t encoded_data = + Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(/* base_reg */ 0, /* holder_reg */ 0); + const LinkerPatch patches1[] = { + LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset1, encoded_data), + }; + AddCompiledMethod(MethodRef(1u), code1, ArrayRef<const LinkerPatch>(patches1)); + + constexpr uint32_t expected_thunk_offset = + kLiteralOffset1 + kPcAdjustment + /* kMaxBcondPositiveDisplacement */ ((1 << 20) - 2u); + static_assert(IsAligned<kArmAlignment>(expected_thunk_offset), "Target offset must be aligned."); + size_t filler1_size = expected_thunk_offset - + RoundUp(raw_code1.size() + sizeof(OatQuickMethodHeader), kArmAlignment); + std::vector<uint8_t> raw_filler1_code = GenNops(filler1_size / 2u); + ArrayRef<const uint8_t> filler1_code(raw_filler1_code); + AddCompiledMethod(MethodRef(2u), filler1_code); + + // Enforce thunk reservation with a tiny method. + AddCompiledMethod(MethodRef(3u), kNopCode); + + constexpr uint32_t kReachableFromOffset2 = 4; + constexpr uint32_t kLiteralOffset2 = kReachableFromOffset2 + 2; + static_assert(IsAligned<kArmAlignment>(kReachableFromOffset2 + kPcAdjustment), + "PC for BNE must be aligned."); + + // If not for the extra NOP, this would allow reaching the thunk from the BNE + // of a method 1MiB away. Backward branch reaches the full 1MiB but we need to take + // PC adjustment into account. Things to subtract: + // - thunk size and method 3 pre-header, rounded up (padding in between if needed) + // - method 3 code and method 4 pre-header, rounded up (padding in between if needed) + // - method 4 header (let there be no padding between method 4 code and method 5 pre-header). + size_t thunk_size = CompileBakerOffsetThunk(/* base_reg */ 0, /* holder_reg */ 0).size(); + size_t filler2_size = + 1 * MB - (kReachableFromOffset2 + kPcAdjustment) + - RoundUp(thunk_size + sizeof(OatQuickMethodHeader), kArmAlignment) + - RoundUp(kNopCode.size() + sizeof(OatQuickMethodHeader), kArmAlignment) + - sizeof(OatQuickMethodHeader); + std::vector<uint8_t> raw_filler2_code = GenNops(filler2_size / 2u); + ArrayRef<const uint8_t> filler2_code(raw_filler2_code); + AddCompiledMethod(MethodRef(4u), filler2_code); + + // Extra 16-bit NOP compared to BakerOffsetThunkInTheMiddle. + const std::vector<uint8_t> raw_code2 = RawCode({kNopWInsn, kNopInsn, kBneWPlus0, kLdrWInsn}); + ArrayRef<const uint8_t> code2(raw_code2); + const LinkerPatch patches2[] = { + LinkerPatch::BakerReadBarrierBranchPatch(kLiteralOffset2, encoded_data), + }; + AddCompiledMethod(MethodRef(5u), code2, ArrayRef<const LinkerPatch>(patches2)); + + Link(); + + uint32_t first_method_offset = GetMethodOffset(1u); + uint32_t last_method_offset = GetMethodOffset(5u); + EXPECT_EQ(2 * MB, last_method_offset - first_method_offset); + + const uint32_t bne_max_forward = kBneWPlus0 | 0x003f2fff; + const uint32_t bne_last = + BneWWithOffset(kLiteralOffset2, RoundUp(raw_code2.size(), kArmAlignment)); + const std::vector<uint8_t> expected_code1 = + RawCode({kNopWInsn, kNopInsn, bne_max_forward, kLdrWInsn}); + const std::vector<uint8_t> expected_code2 = + RawCode({kNopWInsn, kNopInsn, bne_last, kLdrWInsn}); + ASSERT_TRUE(CheckLinkedMethod(MethodRef(1), ArrayRef<const uint8_t>(expected_code1))); + ASSERT_TRUE(CheckLinkedMethod(MethodRef(5), ArrayRef<const uint8_t>(expected_code2))); +} + +TEST_F(Thumb2RelativePatcherTest, BakerArray) { + uint32_t valid_regs[] = { + 0, 1, 2, 3, 5, 6, 7, // R4 is reserved for entrypoint address. + 8, 9, 10, 11, // IP, SP, LR and PC are reserved. + }; + auto ldr = [](uint32_t base_reg) { + uint32_t index_reg = (base_reg == 0u) ? 1u : 0u; + uint32_t ref_reg = (base_reg == 2) ? 3u : 2u; + return kLdrRegLsl2 | index_reg | (base_reg << 16) | (ref_reg << 12); + }; + constexpr size_t kMethodCodeSize = 8u; + constexpr size_t kLiteralOffset = 0u; + uint32_t method_idx = 0u; + for (uint32_t base_reg : valid_regs) { + ++method_idx; + const std::vector<uint8_t> raw_code = RawCode({kBneWPlus0, ldr(base_reg)}); + ASSERT_EQ(kMethodCodeSize, raw_code.size()); + ArrayRef<const uint8_t> code(raw_code); + const LinkerPatch patches[] = { + LinkerPatch::BakerReadBarrierBranchPatch( + kLiteralOffset, Thumb2RelativePatcher::EncodeBakerReadBarrierArrayData(base_reg)), + }; + AddCompiledMethod(MethodRef(method_idx), code, ArrayRef<const LinkerPatch>(patches)); + } + Link(); + + // All thunks are at the end. + uint32_t thunk_offset = GetMethodOffset(method_idx) + RoundUp(kMethodCodeSize, kArmAlignment); + method_idx = 0u; + for (uint32_t base_reg : valid_regs) { + ++method_idx; + uint32_t bne = BneWWithOffset(GetMethodOffset(method_idx) + kLiteralOffset, thunk_offset); + const std::vector<uint8_t> expected_code = RawCode({bne, ldr(base_reg)}); + ASSERT_EQ(kMethodCodeSize, expected_code.size()); + EXPECT_TRUE(CheckLinkedMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(expected_code))); + + std::vector<uint8_t> expected_thunk = CompileBakerArrayThunk(base_reg); + ASSERT_GT(output_.size(), thunk_offset); + ASSERT_GE(output_.size() - thunk_offset, expected_thunk.size()); + ArrayRef<const uint8_t> compiled_thunk(output_.data() + thunk_offset, + expected_thunk.size()); + if (ArrayRef<const uint8_t>(expected_thunk) != compiled_thunk) { + DumpDiff(ArrayRef<const uint8_t>(expected_thunk), compiled_thunk); + ASSERT_TRUE(false); + } + + // Verify that the lock word for gray bit check is loaded from the correct address + // before the base_reg which points to the array data. + ASSERT_GE(output_.size() - thunk_offset, + 4u * /* 32-bit instructions */ 4u + 2u * /* 16-bit instructions */ 2u); + int32_t data_offset = + mirror::Array::DataOffset(Primitive::ComponentSize(Primitive::kPrimNot)).Int32Value(); + int32_t offset = mirror::Object::MonitorOffset().Int32Value() - data_offset; + ASSERT_LT(offset, 0); + ASSERT_GT(offset, -256); + const uint32_t load_lock_word = + kLdrNegativeOffset | + (-offset & 0xffu) | + (base_reg << 16) | + (/* IP */ 12 << 12); + EXPECT_EQ(load_lock_word, GetOutputInsn32(thunk_offset)); + // Verify the gray bit check. + DCHECK_GE(LockWord::kReadBarrierStateShift, 8u); // ROR modified immediate. + uint32_t ror_shift = 7 + (32 - LockWord::kReadBarrierStateShift); + const uint32_t tst_gray_bit_without_offset = + 0xf0100f00 | (/* IP */ 12 << 16) + | (((ror_shift >> 4) & 1) << 26) // i + | (((ror_shift >> 1) & 7) << 12) // imm3 + | ((ror_shift & 1) << 7); // imm8, ROR('1':imm8<7:0>, ror_shift). + EXPECT_EQ(tst_gray_bit_without_offset, GetOutputInsn32(thunk_offset + 4u)); + EXPECT_EQ(0xd100u, GetOutputInsn16(thunk_offset + 8u) & 0xff00u); // BNE + // Verify the fake dependency. + const uint32_t fake_dependency = + 0xeb000010 | // ADD Rd, Rn, Rm, LSR 32 (type=01, imm3=000, imm2=00) + (/* IP */ 12) | // Rm = IP + (base_reg << 16) | // Rn = base_reg + (base_reg << 8); // Rd = base_reg + EXPECT_EQ(fake_dependency, GetOutputInsn32(thunk_offset + 14u)); + // Do not check the rest of the implementation. + + // The next thunk follows on the next aligned offset. + thunk_offset += RoundUp(expected_thunk.size(), kArmAlignment); + } +} + +TEST_F(Thumb2RelativePatcherTest, BakerGcRoot) { + uint32_t valid_regs[] = { + 0, 1, 2, 3, 5, 6, 7, // R4 is reserved for entrypoint address. + 8, 9, 10, 11, // IP, SP, LR and PC are reserved. + }; + constexpr size_t kMethodCodeSize = 8u; + constexpr size_t kLiteralOffset = 4u; + uint32_t method_idx = 0u; + for (uint32_t root_reg : valid_regs) { + ++method_idx; + uint32_t ldr = kLdrWInsn | (/* offset */ 8) | (/* base_reg */ 0 << 16) | (root_reg << 12); + const std::vector<uint8_t> raw_code = RawCode({ldr, kBneWPlus0}); + ASSERT_EQ(kMethodCodeSize, raw_code.size()); + ArrayRef<const uint8_t> code(raw_code); + const LinkerPatch patches[] = { + LinkerPatch::BakerReadBarrierBranchPatch( + kLiteralOffset, Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg)), + }; + AddCompiledMethod(MethodRef(method_idx), code, ArrayRef<const LinkerPatch>(patches)); + } + Link(); + + // All thunks are at the end. + uint32_t thunk_offset = GetMethodOffset(method_idx) + RoundUp(kMethodCodeSize, kArmAlignment); + method_idx = 0u; + for (uint32_t root_reg : valid_regs) { + ++method_idx; + uint32_t bne = BneWWithOffset(GetMethodOffset(method_idx) + kLiteralOffset, thunk_offset); + uint32_t ldr = kLdrWInsn | (/* offset */ 8) | (/* base_reg */ 0 << 16) | (root_reg << 12); + const std::vector<uint8_t> expected_code = RawCode({ldr, bne}); + ASSERT_EQ(kMethodCodeSize, expected_code.size()); + EXPECT_TRUE(CheckLinkedMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(expected_code))); + + std::vector<uint8_t> expected_thunk = CompileBakerGcRootThunk(root_reg); + ASSERT_GT(output_.size(), thunk_offset); + ASSERT_GE(output_.size() - thunk_offset, expected_thunk.size()); + ArrayRef<const uint8_t> compiled_thunk(output_.data() + thunk_offset, + expected_thunk.size()); + if (ArrayRef<const uint8_t>(expected_thunk) != compiled_thunk) { + DumpDiff(ArrayRef<const uint8_t>(expected_thunk), compiled_thunk); + ASSERT_TRUE(false); + } + + // Verify that the fast-path null-check CBZ uses the correct register, i.e. root_reg. + if (root_reg < 8) { + ASSERT_GE(output_.size() - thunk_offset, 2u); + ASSERT_EQ(0xb100 | root_reg, GetOutputInsn16(thunk_offset) & 0xfd07u); + } else { + ASSERT_GE(output_.size() - thunk_offset, 6u); + ASSERT_EQ(0xf1b00f00u | (root_reg << 16), GetOutputInsn32(thunk_offset) & 0xfbff8f00u); + ASSERT_EQ(0xd000u, GetOutputInsn16(thunk_offset + 4u) & 0xff00u); // BEQ + } + // Do not check the rest of the implementation. + + // The next thunk follows on the next aligned offset. + thunk_offset += RoundUp(expected_thunk.size(), kArmAlignment); + } +} + +TEST_F(Thumb2RelativePatcherTest, BakerGcRootOffsetBits) { + // Test 1MiB of patches to the same thunk to stress-test different large offsets. + // (The low bits are not that important but the location of the high bits is easy to get wrong.) + std::vector<uint8_t> code; + code.reserve(1 * MB); + const size_t num_patches = 1 * MB / 8u; + std::vector<LinkerPatch> patches; + patches.reserve(num_patches); + const uint32_t ldr = + kLdrWInsn | (/* offset */ 8) | (/* base_reg */ 0 << 16) | (/* root_reg */ 0 << 12); + uint32_t encoded_data = Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(/* root_reg */ 0); + for (size_t i = 0; i != num_patches; ++i) { + PushBackInsn(&code, ldr); + PushBackInsn(&code, kBneWPlus0); + patches.push_back(LinkerPatch::BakerReadBarrierBranchPatch(8u * i + 4u, encoded_data)); + } + ASSERT_EQ(1 * MB, code.size()); + ASSERT_EQ(num_patches, patches.size()); + AddCompiledMethod(MethodRef(1u), + ArrayRef<const uint8_t>(code), + ArrayRef<const LinkerPatch>(patches)); + Link(); + + // The thunk is right after the method code. + DCHECK_ALIGNED(1 * MB, kArmAlignment); + std::vector<uint8_t> expected_code; + for (size_t i = 0; i != num_patches; ++i) { + PushBackInsn(&expected_code, ldr); + PushBackInsn(&expected_code, BneWWithOffset(8u * i + 4u, 1 * MB)); + patches.push_back(LinkerPatch::BakerReadBarrierBranchPatch(8u * i + 4u, encoded_data)); + } + EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); +} + +TEST_F(Thumb2RelativePatcherTest, BakerAndMethodCallInteraction) { + // During development, there was a `DCHECK_LE(MaxNextOffset(), next_thunk.MaxNextOffset());` + // in `ArmBaseRelativePatcher::ThunkData::MakeSpaceBefore()` which does not necessarily + // hold when we're reserving thunks of different sizes. This test exposes the situation + // by using Baker thunks and a method call thunk. + + // Add a method call patch that can reach to method 1 offset + 16MiB. + uint32_t method_idx = 0u; + constexpr size_t kMethodCallLiteralOffset = 2u; + constexpr uint32_t kMissingMethodIdx = 2u; + const std::vector<uint8_t> raw_code1 = RawCode({kNopInsn, kBlPlus0}); + const LinkerPatch method1_patches[] = { + LinkerPatch::RelativeCodePatch(kMethodCallLiteralOffset, nullptr, 2u), + }; + ArrayRef<const uint8_t> code1(raw_code1); + ++method_idx; + AddCompiledMethod(MethodRef(1u), code1, ArrayRef<const LinkerPatch>(method1_patches)); + + // Skip kMissingMethodIdx. + ++method_idx; + ASSERT_EQ(kMissingMethodIdx, method_idx); + // Add a method with the right size that the method code for the next one starts 1MiB + // after code for method 1. + size_t filler_size = + 1 * MB - RoundUp(raw_code1.size() + sizeof(OatQuickMethodHeader), kArmAlignment) + - sizeof(OatQuickMethodHeader); + std::vector<uint8_t> filler_code = GenNops(filler_size / 2u); + ++method_idx; + AddCompiledMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(filler_code)); + // Add 14 methods with 1MiB code+header, making the code for the next method start 1MiB + // before the currently scheduled MaxNextOffset() for the method call thunk. + for (uint32_t i = 0; i != 14; ++i) { + filler_size = 1 * MB - sizeof(OatQuickMethodHeader); + filler_code = GenNops(filler_size / 2u); + ++method_idx; + AddCompiledMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(filler_code)); + } + + // Add 2 Baker GC root patches to the last method, one that would allow the thunk at + // 1MiB + kArmAlignment, i.e. kArmAlignment after the method call thunk, and the + // second that needs it kArmAlignment after that. Given the size of the GC root thunk + // is more than the space required by the method call thunk plus kArmAlignment, + // this pushes the first GC root thunk's pending MaxNextOffset() before the method call + // thunk's pending MaxNextOffset() which needs to be adjusted. + ASSERT_LT(RoundUp(CompileMethodCallThunk().size(), kArmAlignment) + kArmAlignment, + CompileBakerGcRootThunk(/* root_reg */ 0).size()); + static_assert(kArmAlignment == 8, "Code below assumes kArmAlignment == 8"); + constexpr size_t kBakerLiteralOffset1 = kArmAlignment + 2u - kPcAdjustment; + constexpr size_t kBakerLiteralOffset2 = kBakerLiteralOffset1 + kArmAlignment; + // Use offset = 0, base_reg = 0, the LDR is simply `kLdrWInsn | (root_reg << 12)`. + const uint32_t ldr1 = kLdrWInsn | (/* root_reg */ 1 << 12); + const uint32_t ldr2 = kLdrWInsn | (/* root_reg */ 2 << 12); + const std::vector<uint8_t> last_method_raw_code = RawCode({ + kNopInsn, // Padding before first GC root read barrier. + ldr1, kBneWPlus0, // First GC root LDR with read barrier. + ldr2, kBneWPlus0, // Second GC root LDR with read barrier. + }); + uint32_t encoded_data1 = + Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(/* root_reg */ 1); + uint32_t encoded_data2 = + Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(/* root_reg */ 2); + const LinkerPatch last_method_patches[] = { + LinkerPatch::BakerReadBarrierBranchPatch(kBakerLiteralOffset1, encoded_data1), + LinkerPatch::BakerReadBarrierBranchPatch(kBakerLiteralOffset2, encoded_data2), + }; + ++method_idx; + AddCompiledMethod(MethodRef(method_idx), + ArrayRef<const uint8_t>(last_method_raw_code), + ArrayRef<const LinkerPatch>(last_method_patches)); + + // The main purpose of the test is to check that Link() does not cause a crash. + Link(); + + ASSERT_EQ(15 * MB, GetMethodOffset(method_idx) - GetMethodOffset(1u)); +} + } // namespace linker } // namespace art diff --git a/compiler/linker/arm64/relative_patcher_arm64.cc b/compiler/linker/arm64/relative_patcher_arm64.cc index 551c73b2a4..2b06e3f649 100644 --- a/compiler/linker/arm64/relative_patcher_arm64.cc +++ b/compiler/linker/arm64/relative_patcher_arm64.cc @@ -29,6 +29,7 @@ #include "mirror/array-inl.h" #include "oat.h" #include "oat_quick_method_header.h" +#include "read_barrier.h" #include "utils/arm64/assembler_arm64.h" namespace art { @@ -304,27 +305,42 @@ void Arm64RelativePatcher::PatchBakerReadBarrierBranch(std::vector<uint8_t>* cod DCHECK_LT(literal_offset, code->size()); uint32_t insn = GetInsn(code, literal_offset); DCHECK_EQ(insn & 0xffffffe0u, 0xb5000000); // CBNZ Xt, +0 (unpatched) - ThunkKey key = GetBakerReadBarrierKey(patch); + ThunkKey key = GetBakerThunkKey(patch); if (kIsDebugBuild) { + const uint32_t encoded_data = key.GetBakerReadBarrierParams().custom_value1; + BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data); // Check that the next instruction matches the expected LDR. - switch (key.GetType()) { - case ThunkType::kBakerReadBarrierField: { + switch (kind) { + case BakerReadBarrierKind::kField: { DCHECK_GE(code->size() - literal_offset, 8u); uint32_t next_insn = GetInsn(code, literal_offset + 4u); // LDR (immediate) with correct base_reg. CheckValidReg(next_insn & 0x1fu); // Check destination register. - CHECK_EQ(next_insn & 0xffc003e0u, 0xb9400000u | (key.GetOffsetParams().base_reg << 5)); + const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data); + CHECK_EQ(next_insn & 0xffc003e0u, 0xb9400000u | (base_reg << 5)); break; } - case ThunkType::kBakerReadBarrierRoot: { + case BakerReadBarrierKind::kArray: { + DCHECK_GE(code->size() - literal_offset, 8u); + uint32_t next_insn = GetInsn(code, literal_offset + 4u); + // LDR (register) with the correct base_reg, size=10 (32-bit), option=011 (extend = LSL), + // and S=1 (shift amount = 2 for 32-bit version), i.e. LDR Wt, [Xn, Xm, LSL #2]. + CheckValidReg(next_insn & 0x1fu); // Check destination register. + const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data); + CHECK_EQ(next_insn & 0xffe0ffe0u, 0xb8607800u | (base_reg << 5)); + CheckValidReg((next_insn >> 16) & 0x1f); // Check index register + break; + } + case BakerReadBarrierKind::kGcRoot: { DCHECK_GE(literal_offset, 4u); uint32_t prev_insn = GetInsn(code, literal_offset - 4u); // LDR (immediate) with correct root_reg. - CHECK_EQ(prev_insn & 0xffc0001fu, 0xb9400000u | key.GetRootParams().root_reg); + const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data); + CHECK_EQ(prev_insn & 0xffc0001fu, 0xb9400000u | root_reg); break; } default: - LOG(FATAL) << "Unexpected type: " << static_cast<uint32_t>(key.GetType()); + LOG(FATAL) << "Unexpected kind: " << static_cast<uint32_t>(kind); UNREACHABLE(); } } @@ -336,40 +352,6 @@ void Arm64RelativePatcher::PatchBakerReadBarrierBranch(std::vector<uint8_t>* cod SetInsn(code, literal_offset, insn); } -ArmBaseRelativePatcher::ThunkKey Arm64RelativePatcher::GetBakerReadBarrierKey( - const LinkerPatch& patch) { - DCHECK_EQ(patch.GetType(), LinkerPatch::Type::kBakerReadBarrierBranch); - uint32_t value = patch.GetBakerCustomValue1(); - BakerReadBarrierKind type = BakerReadBarrierKindField::Decode(value); - ThunkParams params; - switch (type) { - case BakerReadBarrierKind::kField: - params.offset_params.base_reg = BakerReadBarrierFirstRegField::Decode(value); - CheckValidReg(params.offset_params.base_reg); - params.offset_params.holder_reg = BakerReadBarrierSecondRegField::Decode(value); - CheckValidReg(params.offset_params.holder_reg); - break; - case BakerReadBarrierKind::kGcRoot: - params.root_params.root_reg = BakerReadBarrierFirstRegField::Decode(value); - CheckValidReg(params.root_params.root_reg); - params.root_params.dummy = 0u; - DCHECK_EQ(BakerReadBarrierSecondRegField::Decode(value), kInvalidEncodedReg); - break; - default: - LOG(FATAL) << "Unexpected type: " << static_cast<uint32_t>(type); - UNREACHABLE(); - } - constexpr uint8_t kTypeTranslationOffset = 1u; - static_assert(static_cast<uint32_t>(BakerReadBarrierKind::kField) + kTypeTranslationOffset == - static_cast<uint32_t>(ThunkType::kBakerReadBarrierField), - "Thunk type translation check."); - static_assert(static_cast<uint32_t>(BakerReadBarrierKind::kGcRoot) + kTypeTranslationOffset == - static_cast<uint32_t>(ThunkType::kBakerReadBarrierRoot), - "Thunk type translation check."); - return ThunkKey(static_cast<ThunkType>(static_cast<uint32_t>(type) + kTypeTranslationOffset), - params); -} - #define __ assembler.GetVIXLAssembler()-> static void EmitGrayCheckAndFastPath(arm64::Arm64Assembler& assembler, @@ -394,33 +376,27 @@ static void EmitGrayCheckAndFastPath(arm64::Arm64Assembler& assembler, // Introduce a dependency on the lock_word including rb_state, // to prevent load-load reordering, and without using // a memory barrier (which would be more expensive). - __ Add(base_reg, base_reg, Operand(vixl::aarch64::ip0, LSR, 32)); + __ Add(base_reg, base_reg, Operand(ip0, LSR, 32)); __ Br(lr); // And return back to the function. // Note: The fake dependency is unnecessary for the slow path. } -std::vector<uint8_t> Arm64RelativePatcher::CompileThunk(const ThunkKey& key) { +void Arm64RelativePatcher::CompileBakerReadBarrierThunk(arm64::Arm64Assembler& assembler, + uint32_t encoded_data) { using namespace vixl::aarch64; // NOLINT(build/namespaces) - ArenaPool pool; - ArenaAllocator arena(&pool); - arm64::Arm64Assembler assembler(&arena); - - switch (key.GetType()) { - case ThunkType::kMethodCall: { - // The thunk just uses the entry point in the ArtMethod. This works even for calls - // to the generic JNI and interpreter trampolines. - Offset offset(ArtMethod::EntryPointFromQuickCompiledCodeOffset( - kArm64PointerSize).Int32Value()); - assembler.JumpTo(ManagedRegister(arm64::X0), offset, ManagedRegister(arm64::IP0)); - break; - } - case ThunkType::kBakerReadBarrierField: { + BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data); + switch (kind) { + case BakerReadBarrierKind::kField: { // Check if the holder is gray and, if not, add fake dependency to the base register // and return to the LDR instruction to load the reference. Otherwise, use introspection // to load the reference and call the entrypoint (in IP1) that performs further checks // on the reference and marks it if needed. - auto holder_reg = Register::GetXRegFromCode(key.GetOffsetParams().holder_reg); - auto base_reg = Register::GetXRegFromCode(key.GetOffsetParams().base_reg); + auto base_reg = + Register::GetXRegFromCode(BakerReadBarrierFirstRegField::Decode(encoded_data)); + CheckValidReg(base_reg.GetCode()); + auto holder_reg = + Register::GetXRegFromCode(BakerReadBarrierSecondRegField::Decode(encoded_data)); + CheckValidReg(holder_reg.GetCode()); UseScratchRegisterScope temps(assembler.GetVIXLAssembler()); temps.Exclude(ip0, ip1); // If base_reg differs from holder_reg, the offset was too large and we must have @@ -444,17 +420,43 @@ std::vector<uint8_t> Arm64RelativePatcher::CompileThunk(const ThunkKey& key) { // Add null check slow path. The stack map is at the address pointed to by LR. __ Bind(&throw_npe); int32_t offset = GetThreadOffset<kArm64PointerSize>(kQuickThrowNullPointer).Int32Value(); - __ Ldr(ip0, MemOperand(vixl::aarch64::x19, offset)); + __ Ldr(ip0, MemOperand(/* Thread* */ vixl::aarch64::x19, offset)); __ Br(ip0); } break; } - case ThunkType::kBakerReadBarrierRoot: { + case BakerReadBarrierKind::kArray: { + auto base_reg = + Register::GetXRegFromCode(BakerReadBarrierFirstRegField::Decode(encoded_data)); + CheckValidReg(base_reg.GetCode()); + DCHECK_EQ(kInvalidEncodedReg, BakerReadBarrierSecondRegField::Decode(encoded_data)); + UseScratchRegisterScope temps(assembler.GetVIXLAssembler()); + temps.Exclude(ip0, ip1); + vixl::aarch64::Label slow_path; + int32_t data_offset = + mirror::Array::DataOffset(Primitive::ComponentSize(Primitive::kPrimNot)).Int32Value(); + MemOperand lock_word(base_reg, mirror::Object::MonitorOffset().Int32Value() - data_offset); + DCHECK_LT(lock_word.GetOffset(), 0); + EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path); + __ Bind(&slow_path); + MemOperand ldr_address(lr, BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET); + __ Ldr(ip0.W(), ldr_address); // Load the LDR (register) unsigned offset. + __ Ubfx(ip0, ip0, 16, 6); // Extract the index register, plus 32 (bit 21 is set). + __ Bfi(ip1, ip0, 3, 6); // Insert ip0 to the entrypoint address to create + // a switch case target based on the index register. + __ Mov(ip0, base_reg); // Move the base register to ip0. + __ Br(ip1); // Jump to the entrypoint's array switch case. + break; + } + case BakerReadBarrierKind::kGcRoot: { // Check if the reference needs to be marked and if so (i.e. not null, not marked yet // and it does not have a forwarding address), call the correct introspection entrypoint; // otherwise return the reference (or the extracted forwarding address). // There is no gray bit check for GC roots. - auto root_reg = Register::GetWRegFromCode(key.GetRootParams().root_reg); + auto root_reg = + Register::GetWRegFromCode(BakerReadBarrierFirstRegField::Decode(encoded_data)); + CheckValidReg(root_reg.GetCode()); + DCHECK_EQ(kInvalidEncodedReg, BakerReadBarrierSecondRegField::Decode(encoded_data)); UseScratchRegisterScope temps(assembler.GetVIXLAssembler()); temps.Exclude(ip0, ip1); vixl::aarch64::Label return_label, not_marked, forwarding_address; @@ -477,6 +479,30 @@ std::vector<uint8_t> Arm64RelativePatcher::CompileThunk(const ThunkKey& key) { __ Br(lr); break; } + default: + LOG(FATAL) << "Unexpected kind: " << static_cast<uint32_t>(kind); + UNREACHABLE(); + } +} + +std::vector<uint8_t> Arm64RelativePatcher::CompileThunk(const ThunkKey& key) { + ArenaPool pool; + ArenaAllocator arena(&pool); + arm64::Arm64Assembler assembler(&arena); + + switch (key.GetType()) { + case ThunkType::kMethodCall: { + // The thunk just uses the entry point in the ArtMethod. This works even for calls + // to the generic JNI and interpreter trampolines. + Offset offset(ArtMethod::EntryPointFromQuickCompiledCodeOffset( + kArm64PointerSize).Int32Value()); + assembler.JumpTo(ManagedRegister(arm64::X0), offset, ManagedRegister(arm64::IP0)); + break; + } + case ThunkType::kBakerReadBarrier: { + CompileBakerReadBarrierThunk(assembler, key.GetBakerReadBarrierParams().custom_value1); + break; + } } // Ensure we emit the literal pool. @@ -489,22 +515,20 @@ std::vector<uint8_t> Arm64RelativePatcher::CompileThunk(const ThunkKey& key) { #undef __ -uint32_t Arm64RelativePatcher::MaxPositiveDisplacement(ThunkType type) { - switch (type) { +uint32_t Arm64RelativePatcher::MaxPositiveDisplacement(const ThunkKey& key) { + switch (key.GetType()) { case ThunkType::kMethodCall: return kMaxMethodCallPositiveDisplacement; - case ThunkType::kBakerReadBarrierField: - case ThunkType::kBakerReadBarrierRoot: + case ThunkType::kBakerReadBarrier: return kMaxBcondPositiveDisplacement; } } -uint32_t Arm64RelativePatcher::MaxNegativeDisplacement(ThunkType type) { - switch (type) { +uint32_t Arm64RelativePatcher::MaxNegativeDisplacement(const ThunkKey& key) { + switch (key.GetType()) { case ThunkType::kMethodCall: return kMaxMethodCallNegativeDisplacement; - case ThunkType::kBakerReadBarrierField: - case ThunkType::kBakerReadBarrierRoot: + case ThunkType::kBakerReadBarrier: return kMaxBcondNegativeDisplacement; } } diff --git a/compiler/linker/arm64/relative_patcher_arm64.h b/compiler/linker/arm64/relative_patcher_arm64.h index 7887cea5e6..d1ab410a7e 100644 --- a/compiler/linker/arm64/relative_patcher_arm64.h +++ b/compiler/linker/arm64/relative_patcher_arm64.h @@ -19,19 +19,19 @@ #include "base/array_ref.h" #include "base/bit_field.h" +#include "base/bit_utils.h" #include "linker/arm/relative_patcher_arm_base.h" namespace art { + +namespace arm64 { +class Arm64Assembler; +} // namespace arm64 + namespace linker { class Arm64RelativePatcher FINAL : public ArmBaseRelativePatcher { public: - enum class BakerReadBarrierKind : uint8_t { - kField, // Field get or array get with constant offset (i.e. constant index). - kGcRoot, // GC root load. - kLast - }; - static uint32_t EncodeBakerReadBarrierFieldData(uint32_t base_reg, uint32_t holder_reg) { CheckValidReg(base_reg); CheckValidReg(holder_reg); @@ -40,6 +40,13 @@ class Arm64RelativePatcher FINAL : public ArmBaseRelativePatcher { BakerReadBarrierSecondRegField::Encode(holder_reg); } + static uint32_t EncodeBakerReadBarrierArrayData(uint32_t base_reg) { + CheckValidReg(base_reg); + return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kArray) | + BakerReadBarrierFirstRegField::Encode(base_reg) | + BakerReadBarrierSecondRegField::Encode(kInvalidEncodedReg); + } + static uint32_t EncodeBakerReadBarrierGcRootData(uint32_t root_reg) { CheckValidReg(root_reg); return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kGcRoot) | @@ -68,14 +75,20 @@ class Arm64RelativePatcher FINAL : public ArmBaseRelativePatcher { uint32_t patch_offset) OVERRIDE; protected: - static constexpr uint32_t kInvalidEncodedReg = /* sp/zr is invalid */ 31u; - - ThunkKey GetBakerReadBarrierKey(const LinkerPatch& patch) OVERRIDE; std::vector<uint8_t> CompileThunk(const ThunkKey& key) OVERRIDE; - uint32_t MaxPositiveDisplacement(ThunkType type) OVERRIDE; - uint32_t MaxNegativeDisplacement(ThunkType type) OVERRIDE; + uint32_t MaxPositiveDisplacement(const ThunkKey& key) OVERRIDE; + uint32_t MaxNegativeDisplacement(const ThunkKey& key) OVERRIDE; private: + static constexpr uint32_t kInvalidEncodedReg = /* sp/zr is invalid */ 31u; + + enum class BakerReadBarrierKind : uint8_t { + kField, // Field get or array get with constant offset (i.e. constant index). + kArray, // Array get with index in register. + kGcRoot, // GC root load. + kLast + }; + static constexpr size_t kBitsForBakerReadBarrierKind = MinimumBitsToStore(static_cast<size_t>(BakerReadBarrierKind::kLast)); static constexpr size_t kBitsForRegister = 5u; @@ -90,6 +103,8 @@ class Arm64RelativePatcher FINAL : public ArmBaseRelativePatcher { DCHECK(reg < 30u && reg != 16u && reg != 17u); } + void CompileBakerReadBarrierThunk(arm64::Arm64Assembler& assembler, uint32_t encoded_data); + static uint32_t PatchAdrp(uint32_t adrp, uint32_t disp); static bool NeedsErratum843419Thunk(ArrayRef<const uint8_t> code, uint32_t literal_offset, diff --git a/compiler/linker/arm64/relative_patcher_arm64_test.cc b/compiler/linker/arm64/relative_patcher_arm64_test.cc index b4d35ab2a7..b6549eefb3 100644 --- a/compiler/linker/arm64/relative_patcher_arm64_test.cc +++ b/compiler/linker/arm64/relative_patcher_arm64_test.cc @@ -18,6 +18,7 @@ #include "linker/relative_patcher_test.h" #include "linker/arm64/relative_patcher_arm64.h" #include "lock_word.h" +#include "mirror/array-inl.h" #include "mirror/object.h" #include "oat_quick_method_header.h" @@ -46,9 +47,15 @@ class Arm64RelativePatcherTest : public RelativePatcherTest { static constexpr uint32_t kBlPlusMax = 0x95ffffffu; static constexpr uint32_t kBlMinusMax = 0x96000000u; - // LDR immediate, unsigned offset. + // LDR immediate, 32-bit, unsigned offset. static constexpr uint32_t kLdrWInsn = 0xb9400000u; + // LDR register, 32-bit, LSL #2. + static constexpr uint32_t kLdrWLsl2Insn = 0xb8607800u; + + // LDUR, 32-bit. + static constexpr uint32_t kLdurWInsn = 0xb8400000u; + // ADD/ADDS/SUB/SUBS immediate, 64-bit. static constexpr uint32_t kAddXInsn = 0x91000000u; static constexpr uint32_t kAddsXInsn = 0xb1000000u; @@ -68,7 +75,7 @@ class Arm64RelativePatcherTest : public RelativePatcherTest { static constexpr uint32_t kLdrXSpRelInsn = 0xf94003edu; // CBNZ x17, +0. Bits 5-23 are a placeholder for target offset from PC in units of 4-bytes. - static constexpr uint32_t kCbnzIP1Plus0Insn = 0xb5000011; + static constexpr uint32_t kCbnzIP1Plus0Insn = 0xb5000011u; void InsertInsn(std::vector<uint8_t>* code, size_t pos, uint32_t insn) { CHECK_LE(pos, code->size()); @@ -160,9 +167,7 @@ class Arm64RelativePatcherTest : public RelativePatcherTest { } std::vector<uint8_t> CompileMethodCallThunk() { - ArmBaseRelativePatcher::ThunkKey key( - ArmBaseRelativePatcher::ThunkType::kMethodCall, - ArmBaseRelativePatcher::ThunkParams{{ 0, 0 }}); // NOLINT(whitespace/braces) + ArmBaseRelativePatcher::ThunkKey key = ArmBaseRelativePatcher::GetMethodCallKey(); return down_cast<Arm64RelativePatcher*>(patcher_.get())->CompileThunk(key); } @@ -188,7 +193,7 @@ class Arm64RelativePatcherTest : public RelativePatcherTest { std::vector<uint8_t> GenNops(size_t num_nops) { std::vector<uint8_t> result; - result.reserve(num_nops * 4u + 4u); + result.reserve(num_nops * 4u); for (size_t i = 0; i != num_nops; ++i) { PushBackInsn(&result, kNopInsn); } @@ -228,7 +233,7 @@ class Arm64RelativePatcherTest : public RelativePatcherTest { } else { LOG(FATAL) << "Unexpected instruction: 0x" << std::hex << use_insn; } - uint32_t adrp = 0x90000000 | // ADRP x0, +SignExtend(immhi:immlo:Zeros(12), 64) + uint32_t adrp = 0x90000000u | // ADRP x0, +SignExtend(immhi:immlo:Zeros(12), 64) ((disp & 0x3000u) << (29 - 12)) | // immlo = ((disp & 0x3000u) >> 12) is at bit 29, ((disp & 0xffffc000) >> (14 - 5)) | // immhi = (disp >> 14) is at bit 5, // We take the sign bit from the disp, limiting disp to +- 2GiB. @@ -466,17 +471,22 @@ class Arm64RelativePatcherTest : public RelativePatcherTest { std::vector<uint8_t> CompileBakerOffsetThunk(uint32_t base_reg, uint32_t holder_reg) { const LinkerPatch patch = LinkerPatch::BakerReadBarrierBranchPatch( 0u, Arm64RelativePatcher::EncodeBakerReadBarrierFieldData(base_reg, holder_reg)); - auto* patcher = down_cast<Arm64RelativePatcher*>(patcher_.get()); - ArmBaseRelativePatcher::ThunkKey key = patcher->GetBakerReadBarrierKey(patch); - return patcher->CompileThunk(key); + ArmBaseRelativePatcher::ThunkKey key = ArmBaseRelativePatcher::GetBakerThunkKey(patch); + return down_cast<Arm64RelativePatcher*>(patcher_.get())->CompileThunk(key); + } + + std::vector<uint8_t> CompileBakerArrayThunk(uint32_t base_reg) { + LinkerPatch patch = LinkerPatch::BakerReadBarrierBranchPatch( + 0u, Arm64RelativePatcher::EncodeBakerReadBarrierArrayData(base_reg)); + ArmBaseRelativePatcher::ThunkKey key = ArmBaseRelativePatcher::GetBakerThunkKey(patch); + return down_cast<Arm64RelativePatcher*>(patcher_.get())->CompileThunk(key); } std::vector<uint8_t> CompileBakerGcRootThunk(uint32_t root_reg) { LinkerPatch patch = LinkerPatch::BakerReadBarrierBranchPatch( 0u, Arm64RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg)); - auto* patcher = down_cast<Arm64RelativePatcher*>(patcher_.get()); - ArmBaseRelativePatcher::ThunkKey key = patcher->GetBakerReadBarrierKey(patch); - return patcher->CompileThunk(key); + ArmBaseRelativePatcher::ThunkKey key = ArmBaseRelativePatcher::GetBakerThunkKey(patch); + return down_cast<Arm64RelativePatcher*>(patcher_.get())->CompileThunk(key); } uint32_t GetOutputInsn(uint32_t offset) { @@ -488,7 +498,7 @@ class Arm64RelativePatcherTest : public RelativePatcherTest { (static_cast<uint32_t>(output_[offset + 3]) << 24); } - void TestBakerField(uint32_t offset, uint32_t root_reg); + void TestBakerField(uint32_t offset, uint32_t ref_reg); }; const uint8_t Arm64RelativePatcherTest::kCallRawCode[] = { @@ -885,7 +895,7 @@ TEST_FOR_OFFSETS(LDRW_SPREL_ADD_TEST, 0, 4) TEST_FOR_OFFSETS(LDRX_SPREL_ADD_TEST, 0, 8) -void Arm64RelativePatcherTest::TestBakerField(uint32_t offset, uint32_t root_reg) { +void Arm64RelativePatcherTest::TestBakerField(uint32_t offset, uint32_t ref_reg) { uint32_t valid_regs[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 18, 19, // IP0 and IP1 are reserved. @@ -899,7 +909,7 @@ void Arm64RelativePatcherTest::TestBakerField(uint32_t offset, uint32_t root_reg uint32_t method_idx = 0u; for (uint32_t base_reg : valid_regs) { for (uint32_t holder_reg : valid_regs) { - uint32_t ldr = kLdrWInsn | (offset << (10 - 2)) | (base_reg << 5) | root_reg; + uint32_t ldr = kLdrWInsn | (offset << (10 - 2)) | (base_reg << 5) | ref_reg; const std::vector<uint8_t> raw_code = RawCode({kCbnzIP1Plus0Insn, ldr}); ASSERT_EQ(kMethodCodeSize, raw_code.size()); ArrayRef<const uint8_t> code(raw_code); @@ -922,7 +932,7 @@ void Arm64RelativePatcherTest::TestBakerField(uint32_t offset, uint32_t root_reg ++method_idx; uint32_t cbnz_offset = thunk_offset - (GetMethodOffset(method_idx) + kLiteralOffset); uint32_t cbnz = kCbnzIP1Plus0Insn | (cbnz_offset << (5 - 2)); - uint32_t ldr = kLdrWInsn | (offset << (10 - 2)) | (base_reg << 5) | root_reg; + uint32_t ldr = kLdrWInsn | (offset << (10 - 2)) | (base_reg << 5) | ref_reg; const std::vector<uint8_t> expected_code = RawCode({cbnz, ldr}); ASSERT_EQ(kMethodCodeSize, expected_code.size()); ASSERT_TRUE( @@ -942,7 +952,7 @@ void Arm64RelativePatcherTest::TestBakerField(uint32_t offset, uint32_t root_reg if (holder_reg == base_reg) { // Verify that the null-check CBZ uses the correct register, i.e. holder_reg. ASSERT_GE(output_.size() - gray_check_offset, 4u); - ASSERT_EQ(0x34000000 | holder_reg, GetOutputInsn(thunk_offset) & 0xff00001f); + ASSERT_EQ(0x34000000u | holder_reg, GetOutputInsn(thunk_offset) & 0xff00001fu); gray_check_offset +=4u; } // Verify that the lock word for gray bit check is loaded from the holder address. @@ -955,12 +965,12 @@ void Arm64RelativePatcherTest::TestBakerField(uint32_t offset, uint32_t root_reg /* ip0 */ 16; EXPECT_EQ(load_lock_word, GetOutputInsn(gray_check_offset)); // Verify the gray bit check. - const uint32_t check_gray_bit_witout_offset = - 0x37000000 | (LockWord::kReadBarrierStateShift << 19) | /* ip0 */ 16; - EXPECT_EQ(check_gray_bit_witout_offset, GetOutputInsn(gray_check_offset + 4u) & 0xfff8001f); + const uint32_t check_gray_bit_without_offset = + 0x37000000u | (LockWord::kReadBarrierStateShift << 19) | /* ip0 */ 16; + EXPECT_EQ(check_gray_bit_without_offset, GetOutputInsn(gray_check_offset + 4u) & 0xfff8001fu); // Verify the fake dependency. const uint32_t fake_dependency = - 0x8b408000 | // ADD Xd, Xn, Xm, LSR 32 + 0x8b408000u | // ADD Xd, Xn, Xm, LSR 32 (/* ip0 */ 16 << 16) | // Xm = ip0 (base_reg << 5) | // Xn = base_reg base_reg; // Xd = base_reg @@ -973,19 +983,19 @@ void Arm64RelativePatcherTest::TestBakerField(uint32_t offset, uint32_t root_reg } } -#define TEST_BAKER_FIELD(offset, root_reg) \ +#define TEST_BAKER_FIELD(offset, ref_reg) \ TEST_F(Arm64RelativePatcherTestDefault, \ - BakerOffset##offset##_##root_reg) { \ - TestBakerField(offset, root_reg); \ + BakerOffset##offset##_##ref_reg) { \ + TestBakerField(offset, ref_reg); \ } -TEST_BAKER_FIELD(/* offset */ 0, /* root_reg */ 0) -TEST_BAKER_FIELD(/* offset */ 8, /* root_reg */ 15) -TEST_BAKER_FIELD(/* offset */ 0x3ffc, /* root_reg */ 29) +TEST_BAKER_FIELD(/* offset */ 0, /* ref_reg */ 0) +TEST_BAKER_FIELD(/* offset */ 8, /* ref_reg */ 15) +TEST_BAKER_FIELD(/* offset */ 0x3ffc, /* ref_reg */ 29) TEST_F(Arm64RelativePatcherTestDefault, BakerOffsetThunkInTheMiddle) { // One thunk in the middle with maximum distance branches to it from both sides. - // Use offset = 0, base_reg = 0, root_reg = 0, the LDR is simply `kLdrWInsn`. + // Use offset = 0, base_reg = 0, ref_reg = 0, the LDR is simply `kLdrWInsn`. constexpr uint32_t kLiteralOffset1 = 4; const std::vector<uint8_t> raw_code1 = RawCode({kNopInsn, kCbnzIP1Plus0Insn, kLdrWInsn}); ArrayRef<const uint8_t> code1(raw_code1); @@ -1046,7 +1056,7 @@ TEST_F(Arm64RelativePatcherTestDefault, BakerOffsetThunkInTheMiddle) { TEST_F(Arm64RelativePatcherTestDefault, BakerOffsetThunkBeforeFiller) { // Based on the first part of BakerOffsetThunkInTheMiddle but the CBNZ is one instruction // earlier, so the thunk is emitted before the filler. - // Use offset = 0, base_reg = 0, root_reg = 0, the LDR is simply `kLdrWInsn`. + // Use offset = 0, base_reg = 0, ref_reg = 0, the LDR is simply `kLdrWInsn`. constexpr uint32_t kLiteralOffset1 = 0; const std::vector<uint8_t> raw_code1 = RawCode({kCbnzIP1Plus0Insn, kLdrWInsn, kNopInsn}); ArrayRef<const uint8_t> code1(raw_code1); @@ -1076,7 +1086,7 @@ TEST_F(Arm64RelativePatcherTestDefault, BakerOffsetThunkBeforeFiller) { TEST_F(Arm64RelativePatcherTestDefault, BakerOffsetThunkInTheMiddleUnreachableFromLast) { // Based on the BakerOffsetThunkInTheMiddle but the CBNZ in the last method is preceded // by NOP and cannot reach the thunk in the middle, so we emit an extra thunk at the end. - // Use offset = 0, base_reg = 0, root_reg = 0, the LDR is simply `kLdrWInsn`. + // Use offset = 0, base_reg = 0, ref_reg = 0, the LDR is simply `kLdrWInsn`. constexpr uint32_t kLiteralOffset1 = 4; const std::vector<uint8_t> raw_code1 = RawCode({kNopInsn, kCbnzIP1Plus0Insn, kLdrWInsn}); ArrayRef<const uint8_t> code1(raw_code1); @@ -1132,7 +1142,88 @@ TEST_F(Arm64RelativePatcherTestDefault, BakerOffsetThunkInTheMiddleUnreachableFr ASSERT_TRUE(CheckLinkedMethod(MethodRef(5), ArrayRef<const uint8_t>(expected_code2))); } -TEST_F(Arm64RelativePatcherTestDefault, BakerRootGcRoot) { +TEST_F(Arm64RelativePatcherTestDefault, BakerArray) { + uint32_t valid_regs[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + 10, 11, 12, 13, 14, 15, 18, 19, // IP0 and IP1 are reserved. + 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, + // LR and SP/ZR are reserved. + }; + auto ldr = [](uint32_t base_reg) { + uint32_t index_reg = (base_reg == 0u) ? 1u : 0u; + uint32_t ref_reg = (base_reg == 2) ? 3u : 2u; + return kLdrWLsl2Insn | (index_reg << 16) | (base_reg << 5) | ref_reg; + }; + constexpr size_t kMethodCodeSize = 8u; + constexpr size_t kLiteralOffset = 0u; + uint32_t method_idx = 0u; + for (uint32_t base_reg : valid_regs) { + ++method_idx; + const std::vector<uint8_t> raw_code = RawCode({kCbnzIP1Plus0Insn, ldr(base_reg)}); + ASSERT_EQ(kMethodCodeSize, raw_code.size()); + ArrayRef<const uint8_t> code(raw_code); + const LinkerPatch patches[] = { + LinkerPatch::BakerReadBarrierBranchPatch( + kLiteralOffset, Arm64RelativePatcher::EncodeBakerReadBarrierArrayData(base_reg)), + }; + AddCompiledMethod(MethodRef(method_idx), code, ArrayRef<const LinkerPatch>(patches)); + } + Link(); + + // All thunks are at the end. + uint32_t thunk_offset = GetMethodOffset(method_idx) + RoundUp(kMethodCodeSize, kArm64Alignment); + method_idx = 0u; + for (uint32_t base_reg : valid_regs) { + ++method_idx; + uint32_t cbnz_offset = thunk_offset - (GetMethodOffset(method_idx) + kLiteralOffset); + uint32_t cbnz = kCbnzIP1Plus0Insn | (cbnz_offset << (5 - 2)); + const std::vector<uint8_t> expected_code = RawCode({cbnz, ldr(base_reg)}); + ASSERT_EQ(kMethodCodeSize, expected_code.size()); + EXPECT_TRUE(CheckLinkedMethod(MethodRef(method_idx), ArrayRef<const uint8_t>(expected_code))); + + std::vector<uint8_t> expected_thunk = CompileBakerArrayThunk(base_reg); + ASSERT_GT(output_.size(), thunk_offset); + ASSERT_GE(output_.size() - thunk_offset, expected_thunk.size()); + ArrayRef<const uint8_t> compiled_thunk(output_.data() + thunk_offset, + expected_thunk.size()); + if (ArrayRef<const uint8_t>(expected_thunk) != compiled_thunk) { + DumpDiff(ArrayRef<const uint8_t>(expected_thunk), compiled_thunk); + ASSERT_TRUE(false); + } + + // Verify that the lock word for gray bit check is loaded from the correct address + // before the base_reg which points to the array data. + static constexpr size_t kGrayCheckInsns = 5; + ASSERT_GE(output_.size() - thunk_offset, 4u * kGrayCheckInsns); + int32_t data_offset = + mirror::Array::DataOffset(Primitive::ComponentSize(Primitive::kPrimNot)).Int32Value(); + int32_t offset = mirror::Object::MonitorOffset().Int32Value() - data_offset; + ASSERT_LT(offset, 0); + const uint32_t load_lock_word = + kLdurWInsn | + ((offset & 0x1ffu) << 12) | + (base_reg << 5) | + /* ip0 */ 16; + EXPECT_EQ(load_lock_word, GetOutputInsn(thunk_offset)); + // Verify the gray bit check. + const uint32_t check_gray_bit_without_offset = + 0x37000000u | (LockWord::kReadBarrierStateShift << 19) | /* ip0 */ 16; + EXPECT_EQ(check_gray_bit_without_offset, GetOutputInsn(thunk_offset + 4u) & 0xfff8001fu); + // Verify the fake dependency. + const uint32_t fake_dependency = + 0x8b408000u | // ADD Xd, Xn, Xm, LSR 32 + (/* ip0 */ 16 << 16) | // Xm = ip0 + (base_reg << 5) | // Xn = base_reg + base_reg; // Xd = base_reg + EXPECT_EQ(fake_dependency, GetOutputInsn(thunk_offset + 12u)); + // Do not check the rest of the implementation. + + // The next thunk follows on the next aligned offset. + thunk_offset += RoundUp(expected_thunk.size(), kArm64Alignment); + } +} + +TEST_F(Arm64RelativePatcherTestDefault, BakerGcRoot) { uint32_t valid_regs[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 18, 19, // IP0 and IP1 are reserved. @@ -1180,7 +1271,7 @@ TEST_F(Arm64RelativePatcherTestDefault, BakerRootGcRoot) { // Verify that the fast-path null-check CBZ uses the correct register, i.e. root_reg. ASSERT_GE(output_.size() - thunk_offset, 4u); - ASSERT_EQ(0x34000000 | root_reg, GetOutputInsn(thunk_offset) & 0xff00001f); + ASSERT_EQ(0x34000000u | root_reg, GetOutputInsn(thunk_offset) & 0xff00001fu); // Do not check the rest of the implementation. // The next thunk follows on the next aligned offset. diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc index ead41240c2..1578c0cd3e 100644 --- a/compiler/oat_test.cc +++ b/compiler/oat_test.cc @@ -519,7 +519,7 @@ TEST_F(OatTest, EmptyTextSection) { if (insn_set == kArm) insn_set = kThumb2; std::string error_msg; std::vector<std::string> compiler_options; - compiler_options.push_back("--compiler-filter=verify-at-runtime"); + compiler_options.push_back("--compiler-filter=extract"); SetupCompiler(compiler_kind, insn_set, compiler_options, /*out*/ &error_msg); jobject class_loader; diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc index af60def11e..6b5387ae19 100644 --- a/compiler/oat_writer.cc +++ b/compiler/oat_writer.cc @@ -1675,7 +1675,7 @@ bool OatWriter::VisitDexMethods(DexMethodVisitor* visitor) { if (UNLIKELY(!visitor->StartClass(dex_file, class_def_index))) { return false; } - if (compiler_driver_->GetCompilerOptions().IsAnyMethodCompilationEnabled()) { + if (compiler_driver_->GetCompilerOptions().IsAnyCompilationEnabled()) { const DexFile::ClassDef& class_def = dex_file->GetClassDef(class_def_index); const uint8_t* class_data = dex_file->GetClassData(class_def); if (class_data != nullptr) { // ie not an empty class, such as a marker interface @@ -1757,7 +1757,7 @@ size_t OatWriter::InitOatClasses(size_t offset) { } size_t OatWriter::InitOatMaps(size_t offset) { - if (!compiler_driver_->GetCompilerOptions().IsAnyMethodCompilationEnabled()) { + if (!compiler_driver_->GetCompilerOptions().IsAnyCompilationEnabled()) { return offset; } { @@ -1813,7 +1813,7 @@ size_t OatWriter::InitOatCode(size_t offset) { } size_t OatWriter::InitOatCodeDexFiles(size_t offset) { - if (!compiler_driver_->GetCompilerOptions().IsAnyMethodCompilationEnabled()) { + if (!compiler_driver_->GetCompilerOptions().IsAnyCompilationEnabled()) { return offset; } InitCodeMethodVisitor code_visitor(this, offset, vdex_quickening_info_offset_); @@ -1982,7 +1982,7 @@ bool OatWriter::WriteQuickeningInfo(OutputStream* vdex_out) { return false; } - if (compiler_driver_->GetCompilerOptions().IsAnyMethodCompilationEnabled()) { + if (compiler_driver_->GetCompilerOptions().IsAnyCompilationEnabled()) { WriteQuickeningInfoMethodVisitor visitor(this, vdex_out, start_offset); if (!VisitDexMethods(&visitor)) { PLOG(ERROR) << "Failed to write the vdex quickening info. File: " << vdex_out->GetLocation(); @@ -2474,11 +2474,28 @@ bool OatWriter::LayoutAndWriteDexFile(OutputStream* out, OatDexFile* oat_dex_fil /* verify */ true, /* verify_checksum */ true, &error_msg); - } else { - CHECK(oat_dex_file->source_.IsRawFile()) - << static_cast<size_t>(oat_dex_file->source_.GetType()); + } else if (oat_dex_file->source_.IsRawFile()) { File* raw_file = oat_dex_file->source_.GetRawFile(); dex_file = DexFile::OpenDex(raw_file->Fd(), location, /* verify_checksum */ true, &error_msg); + } else { + // The source data is a vdex file. + CHECK(oat_dex_file->source_.IsRawData()) + << static_cast<size_t>(oat_dex_file->source_.GetType()); + const uint8_t* raw_dex_file = oat_dex_file->source_.GetRawData(); + // Note: The raw data has already been checked to contain the header + // and all the data that the header specifies as the file size. + DCHECK(raw_dex_file != nullptr); + DCHECK(ValidateDexFileHeader(raw_dex_file, oat_dex_file->GetLocation())); + const UnalignedDexFileHeader* header = AsUnalignedDexFileHeader(raw_dex_file); + // Since the source may have had its layout changed, or may be quickened, don't verify it. + dex_file = DexFile::Open(raw_dex_file, + header->file_size_, + location, + oat_dex_file->dex_file_location_checksum_, + nullptr, + /* verify */ false, + /* verify_checksum */ false, + &error_msg); } if (dex_file == nullptr) { LOG(ERROR) << "Failed to open dex file for layout: " << error_msg; diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc index 476906a768..ed630cda91 100644 --- a/compiler/optimizing/bounds_check_elimination.cc +++ b/compiler/optimizing/bounds_check_elimination.cc @@ -1550,7 +1550,7 @@ class BCEVisitor : public HGraphVisitor { HBasicBlock* block = GetPreHeader(loop, check); HInstruction* cond = new (GetGraph()->GetArena()) HEqual(array, GetGraph()->GetNullConstant()); - InsertDeoptInLoop(loop, block, cond); + InsertDeoptInLoop(loop, block, cond, /* is_null_check */ true); ReplaceInstruction(check, array); return true; } @@ -1616,11 +1616,16 @@ class BCEVisitor : public HGraphVisitor { } /** Inserts a deoptimization test in a loop preheader. */ - void InsertDeoptInLoop(HLoopInformation* loop, HBasicBlock* block, HInstruction* condition) { + void InsertDeoptInLoop(HLoopInformation* loop, + HBasicBlock* block, + HInstruction* condition, + bool is_null_check = false) { HInstruction* suspend = loop->GetSuspendCheck(); block->InsertInstructionBefore(condition, block->GetLastInstruction()); + DeoptimizationKind kind = + is_null_check ? DeoptimizationKind::kLoopNullBCE : DeoptimizationKind::kLoopBoundsBCE; HDeoptimize* deoptimize = new (GetGraph()->GetArena()) HDeoptimize( - GetGraph()->GetArena(), condition, HDeoptimize::Kind::kBCE, suspend->GetDexPc()); + GetGraph()->GetArena(), condition, kind, suspend->GetDexPc()); block->InsertInstructionBefore(deoptimize, block->GetLastInstruction()); if (suspend->HasEnvironment()) { deoptimize->CopyEnvironmentFromWithLoopPhiAdjustment( @@ -1633,7 +1638,7 @@ class BCEVisitor : public HGraphVisitor { HBasicBlock* block = bounds_check->GetBlock(); block->InsertInstructionBefore(condition, bounds_check); HDeoptimize* deoptimize = new (GetGraph()->GetArena()) HDeoptimize( - GetGraph()->GetArena(), condition, HDeoptimize::Kind::kBCE, bounds_check->GetDexPc()); + GetGraph()->GetArena(), condition, DeoptimizationKind::kBlockBCE, bounds_check->GetDexPc()); block->InsertInstructionBefore(deoptimize, bounds_check); deoptimize->CopyEnvironmentFrom(bounds_check->GetEnvironment()); } @@ -1749,6 +1754,7 @@ class BCEVisitor : public HGraphVisitor { phi = NewPhi(new_preheader, instruction, type); } user->ReplaceInput(phi, index); // Removes the use node from the list. + induction_range_.Replace(user, instruction, phi); // update induction } } // Scan all environment uses of an instruction and replace each later use with a phi node. diff --git a/compiler/optimizing/cha_guard_optimization.cc b/compiler/optimizing/cha_guard_optimization.cc index 048073e37a..c806dbfef6 100644 --- a/compiler/optimizing/cha_guard_optimization.cc +++ b/compiler/optimizing/cha_guard_optimization.cc @@ -203,7 +203,7 @@ bool CHAGuardVisitor::HoistGuard(HShouldDeoptimizeFlag* flag, // Need a new deoptimize instruction that copies the environment // of the suspend instruction for the loop. HDeoptimize* deoptimize = new (GetGraph()->GetArena()) HDeoptimize( - GetGraph()->GetArena(), compare, HDeoptimize::Kind::kInline, suspend->GetDexPc()); + GetGraph()->GetArena(), compare, DeoptimizationKind::kCHA, suspend->GetDexPc()); pre_header->InsertInstructionBefore(deoptimize, pre_header->GetLastInstruction()); deoptimize->CopyEnvironmentFromWithLoopPhiAdjustment( suspend->GetEnvironment(), loop_info->GetHeader()); diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index d7cc577580..f5f40fc686 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -16,6 +16,7 @@ #include "code_generator_arm.h" +#include "arch/arm/asm_support_arm.h" #include "arch/arm/instruction_set_features_arm.h" #include "art_method.h" #include "code_generator_utils.h" @@ -25,6 +26,7 @@ #include "gc/accounting/card_table.h" #include "intrinsics.h" #include "intrinsics_arm.h" +#include "linker/arm/relative_patcher_thumb2.h" #include "mirror/array-inl.h" #include "mirror/class-inl.h" #include "thread.h" @@ -60,10 +62,41 @@ static constexpr DRegister DTMP = D31; static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7; +// Reference load (except object array loads) is using LDR Rt, [Rn, #offset] which can handle +// offset < 4KiB. For offsets >= 4KiB, the load shall be emitted as two or more instructions. +// For the Baker read barrier implementation using link-generated thunks we need to split +// the offset explicitly. +constexpr uint32_t kReferenceLoadMinFarOffset = 4 * KB; + +// Flags controlling the use of link-time generated thunks for Baker read barriers. +constexpr bool kBakerReadBarrierLinkTimeThunksEnableForFields = true; +constexpr bool kBakerReadBarrierLinkTimeThunksEnableForArrays = true; +constexpr bool kBakerReadBarrierLinkTimeThunksEnableForGcRoots = true; + +// The reserved entrypoint register for link-time generated thunks. +const Register kBakerCcEntrypointRegister = R4; + // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. #define __ down_cast<ArmAssembler*>(codegen->GetAssembler())-> // NOLINT #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kArmPointerSize, x).Int32Value() +static inline void CheckLastTempIsBakerCcEntrypointRegister(HInstruction* instruction) { + DCHECK_EQ(static_cast<uint32_t>(kBakerCcEntrypointRegister), + linker::Thumb2RelativePatcher::kBakerCcEntrypointRegister); + DCHECK_NE(instruction->GetLocations()->GetTempCount(), 0u); + DCHECK_EQ(kBakerCcEntrypointRegister, + instruction->GetLocations()->GetTemp( + instruction->GetLocations()->GetTempCount() - 1u).AsRegister<Register>()); +} + +static inline void EmitPlaceholderBne(CodeGeneratorARM* codegen, Label* bne_label) { + DCHECK(down_cast<Thumb2Assembler*>(codegen->GetAssembler())->IsForced32Bit()); + __ BindTrackedLabel(bne_label); + Label placeholder_label; + __ b(&placeholder_label, NE); // Placeholder, patched at link-time. + __ Bind(&placeholder_label); +} + static constexpr int kRegListThreshold = 4; // SaveLiveRegisters and RestoreLiveRegisters from SlowPathCodeARM operate on sets of S registers, @@ -585,8 +618,13 @@ class DeoptimizationSlowPathARM : public SlowPathCodeARM { void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen); __ Bind(GetEntryLabel()); + LocationSummary* locations = instruction_->GetLocations(); + SaveLiveRegisters(codegen, locations); + InvokeRuntimeCallingConvention calling_convention; + __ LoadImmediate(calling_convention.GetRegisterAt(0), + static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind())); arm_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this); - CheckEntrypointTypes<kQuickDeoptimize, void, void>(); + CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>(); } const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathARM"; } @@ -819,7 +857,7 @@ class LoadReferenceWithBakerReadBarrierSlowPathARM : public ReadBarrierMarkSlowP // Baker's read barriers, we need to perform the load of // mirror::Object::monitor_ *before* the original reference load. // This load-load ordering is required by the read barrier. - // The fast path/slow path (for Baker's algorithm) should look like: + // The slow path (for Baker's algorithm) should look like: // // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); // lfence; // Load fence or artificial data dependency to prevent load-load reordering @@ -954,6 +992,18 @@ class LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM __ Bind(GetEntryLabel()); + // The implementation is similar to LoadReferenceWithBakerReadBarrierSlowPathARM's: + // + // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); + // lfence; // Load fence or artificial data dependency to prevent load-load reordering + // HeapReference<mirror::Object> ref = *src; // Original reference load. + // bool is_gray = (rb_state == ReadBarrier::GrayState()); + // if (is_gray) { + // old_ref = ref; + // ref = entrypoint(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call. + // compareAndSwapObject(obj, field_offset, old_ref, ref); + // } + // /* int32_t */ monitor = obj->monitor_ uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value(); __ LoadFromOffset(kLoadWord, temp1_, obj_, monitor_offset); @@ -1962,6 +2012,7 @@ CodeGeneratorARM::CodeGeneratorARM(HGraph* graph, graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + baker_read_barrier_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), jit_string_patches_(StringReferenceValueComparator(), graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), jit_class_patches_(TypeReferenceValueComparator(), @@ -2672,7 +2723,10 @@ void InstructionCodeGeneratorARM::VisitIf(HIf* if_instr) { void LocationsBuilderARM::VisitDeoptimize(HDeoptimize* deoptimize) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath); - locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. + InvokeRuntimeCallingConvention calling_convention; + RegisterSet caller_saves = RegisterSet::Empty(); + caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + locations->SetCustomSlowPathCallerSaves(caller_saves); if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) { locations->SetInAt(0, Location::RequiresRegister()); } @@ -3067,6 +3121,15 @@ void InstructionCodeGeneratorARM::VisitDoubleConstant(HDoubleConstant* constant // Will be generated at use site. } +void LocationsBuilderARM::VisitConstructorFence(HConstructorFence* constructor_fence) { + constructor_fence->SetLocations(nullptr); +} + +void InstructionCodeGeneratorARM::VisitConstructorFence( + HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) { + codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore); +} + void LocationsBuilderARM::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) { memory_barrier->SetLocations(nullptr); } @@ -5272,7 +5335,18 @@ void LocationsBuilderARM::HandleFieldGet(HInstruction* instruction, const FieldI } else if (object_field_get_with_read_barrier && kUseBakerReadBarrier) { // We need a temporary register for the read barrier marking slow // path in CodeGeneratorARM::GenerateFieldLoadWithBakerReadBarrier. - locations->AddTemp(Location::RequiresRegister()); + if (kBakerReadBarrierLinkTimeThunksEnableForFields && + !Runtime::Current()->UseJitCompilation()) { + // If link-time thunks for the Baker read barrier are enabled, for AOT + // loads we need a temporary only if the offset is too big. + if (field_info.GetFieldOffset().Uint32Value() >= kReferenceLoadMinFarOffset) { + locations->AddTemp(Location::RequiresRegister()); + } + // And we always need the reserved entrypoint register. + locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister)); + } else { + locations->AddTemp(Location::RequiresRegister()); + } } } @@ -5738,11 +5812,35 @@ void LocationsBuilderARM::VisitArrayGet(HArrayGet* instruction) { Location::RequiresRegister(), object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap); } - // We need a temporary register for the read barrier marking slow - // path in CodeGeneratorARM::GenerateArrayLoadWithBakerReadBarrier. - // Also need for String compression feature. - if ((object_array_get_with_read_barrier && kUseBakerReadBarrier) - || (mirror::kUseStringCompression && instruction->IsStringCharAt())) { + if (object_array_get_with_read_barrier && kUseBakerReadBarrier) { + // We need a temporary register for the read barrier marking slow + // path in CodeGeneratorARM::GenerateArrayLoadWithBakerReadBarrier. + if (kBakerReadBarrierLinkTimeThunksEnableForFields && + !Runtime::Current()->UseJitCompilation() && + instruction->GetIndex()->IsConstant()) { + // Array loads with constant index are treated as field loads. + // If link-time thunks for the Baker read barrier are enabled, for AOT + // constant index loads we need a temporary only if the offset is too big. + uint32_t offset = CodeGenerator::GetArrayDataOffset(instruction); + uint32_t index = instruction->GetIndex()->AsIntConstant()->GetValue(); + offset += index << Primitive::ComponentSizeShift(Primitive::kPrimNot); + if (offset >= kReferenceLoadMinFarOffset) { + locations->AddTemp(Location::RequiresRegister()); + } + // And we always need the reserved entrypoint register. + locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister)); + } else if (kBakerReadBarrierLinkTimeThunksEnableForArrays && + !Runtime::Current()->UseJitCompilation() && + !instruction->GetIndex()->IsConstant()) { + // We need a non-scratch temporary for the array data pointer. + locations->AddTemp(Location::RequiresRegister()); + // And we always need the reserved entrypoint register. + locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister)); + } else { + locations->AddTemp(Location::RequiresRegister()); + } + } else if (mirror::kUseStringCompression && instruction->IsStringCharAt()) { + // Also need a temporary for String compression feature. locations->AddTemp(Location::RequiresRegister()); } } @@ -5854,8 +5952,20 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) { Location temp = locations->GetTemp(0); // Note that a potential implicit null check is handled in this // CodeGeneratorARM::GenerateArrayLoadWithBakerReadBarrier call. - codegen_->GenerateArrayLoadWithBakerReadBarrier( - instruction, out_loc, obj, data_offset, index, temp, /* needs_null_check */ true); + DCHECK(!instruction->CanDoImplicitNullCheckOn(instruction->InputAt(0))); + if (index.IsConstant()) { + // Array load with a constant index can be treated as a field load. + data_offset += helpers::Int32ConstantFrom(index) << Primitive::ComponentSizeShift(type); + codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction, + out_loc, + obj, + data_offset, + locations->GetTemp(0), + /* needs_null_check */ false); + } else { + codegen_->GenerateArrayLoadWithBakerReadBarrier( + instruction, out_loc, obj, data_offset, index, temp, /* needs_null_check */ false); + } } else { Register out = out_loc.AsRegister<Register>(); if (index.IsConstant()) { @@ -6692,6 +6802,13 @@ void LocationsBuilderARM::VisitLoadClass(HLoadClass* cls) { // For non-Baker read barrier we have a temp-clobbering call. } } + if (kUseBakerReadBarrier && kBakerReadBarrierLinkTimeThunksEnableForGcRoots) { + if (load_kind == HLoadClass::LoadKind::kBssEntry || + (load_kind == HLoadClass::LoadKind::kReferrersClass && + !Runtime::Current()->UseJitCompilation())) { + locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister)); + } + } } // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not @@ -6871,6 +6988,9 @@ void LocationsBuilderARM::VisitLoadString(HLoadString* load) { // TODO: Add GetReturnLocation() to the calling convention so that we can DCHECK() // that the the kPrimNot result register is the same as the first argument register. locations->SetCustomSlowPathCallerSaves(caller_saves); + if (kUseBakerReadBarrier && kBakerReadBarrierLinkTimeThunksEnableForGcRoots) { + locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister)); + } } else { // For non-Baker read barrier we have a temp-clobbering call. } @@ -7041,6 +7161,9 @@ void LocationsBuilderARM::VisitInstanceOf(HInstanceOf* instruction) { // Note that TypeCheckSlowPathARM uses this register too. locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind)); + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + codegen_->MaybeAddBakerCcEntrypointTempForFields(locations); + } } void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) { @@ -7914,48 +8037,93 @@ void InstructionCodeGeneratorARM::GenerateGcRootFieldLoad(HInstruction* instruct if (kUseBakerReadBarrier) { // Fast path implementation of art::ReadBarrier::BarrierForRoot when // Baker's read barrier are used. - // - // Note that we do not actually check the value of - // `GetIsGcMarking()` to decide whether to mark the loaded GC - // root or not. Instead, we load into `temp` the read barrier - // mark entry point corresponding to register `root`. If `temp` - // is null, it means that `GetIsGcMarking()` is false, and vice - // versa. - // - // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg() - // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load. - // if (temp != nullptr) { // <=> Thread::Current()->GetIsGcMarking() - // // Slow path. - // root = temp(root); // root = ReadBarrier::Mark(root); // Runtime entry point call. - // } - - // Slow path marking the GC root `root`. The entrypoint will already be loaded in `temp`. - Location temp = Location::RegisterLocation(LR); - SlowPathCodeARM* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM( - instruction, root, /* entrypoint */ temp); - codegen_->AddSlowPath(slow_path); + if (kBakerReadBarrierLinkTimeThunksEnableForGcRoots && + !Runtime::Current()->UseJitCompilation()) { + // Note that we do not actually check the value of `GetIsGcMarking()` + // to decide whether to mark the loaded GC root or not. Instead, we + // load into `temp` (actually kBakerCcEntrypointRegister) the read + // barrier mark introspection entrypoint. If `temp` is null, it means + // that `GetIsGcMarking()` is false, and vice versa. + // + // We use link-time generated thunks for the slow path. That thunk + // checks the reference and jumps to the entrypoint if needed. + // + // temp = Thread::Current()->pReadBarrierMarkIntrospection + // lr = &return_address; + // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load. + // if (temp != nullptr) { + // goto gc_root_thunk<root_reg>(lr) + // } + // return_address: + + CheckLastTempIsBakerCcEntrypointRegister(instruction); + uint32_t custom_data = + linker::Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg); + Label* bne_label = codegen_->NewBakerReadBarrierPatch(custom_data); + + // entrypoint_reg = + // Thread::Current()->pReadBarrierMarkReg12, i.e. pReadBarrierMarkIntrospection. + DCHECK_EQ(IP, 12); + const int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(IP); + __ LoadFromOffset(kLoadWord, kBakerCcEntrypointRegister, TR, entry_point_offset); + + Label return_address; + __ AdrCode(LR, &return_address); + __ CmpConstant(kBakerCcEntrypointRegister, 0); + static_assert( + BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET == -8, + "GC root LDR must be 2 32-bit instructions (8B) before the return address label."); + // Currently the offset is always within range. If that changes, + // we shall have to split the load the same way as for fields. + DCHECK_LT(offset, kReferenceLoadMinFarOffset); + ScopedForce32Bit force_32bit(down_cast<Thumb2Assembler*>(GetAssembler())); + __ LoadFromOffset(kLoadWord, root_reg, obj, offset); + EmitPlaceholderBne(codegen_, bne_label); + __ Bind(&return_address); + } else { + // Note that we do not actually check the value of + // `GetIsGcMarking()` to decide whether to mark the loaded GC + // root or not. Instead, we load into `temp` the read barrier + // mark entry point corresponding to register `root`. If `temp` + // is null, it means that `GetIsGcMarking()` is false, and vice + // versa. + // + // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg() + // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load. + // if (temp != nullptr) { // <=> Thread::Current()->GetIsGcMarking() + // // Slow path. + // root = temp(root); // root = ReadBarrier::Mark(root); // Runtime entry point call. + // } + + // Slow path marking the GC root `root`. The entrypoint will already be loaded in `temp`. + Location temp = Location::RegisterLocation(LR); + SlowPathCodeARM* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM( + instruction, root, /* entrypoint */ temp); + codegen_->AddSlowPath(slow_path); - // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg() - const int32_t entry_point_offset = - CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(root.reg()); - // Loading the entrypoint does not require a load acquire since it is only changed when - // threads are suspended or running a checkpoint. - __ LoadFromOffset(kLoadWord, temp.AsRegister<Register>(), TR, entry_point_offset); + // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg() + const int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(root.reg()); + // Loading the entrypoint does not require a load acquire since it is only changed when + // threads are suspended or running a checkpoint. + __ LoadFromOffset(kLoadWord, temp.AsRegister<Register>(), TR, entry_point_offset); - // /* GcRoot<mirror::Object> */ root = *(obj + offset) - __ LoadFromOffset(kLoadWord, root_reg, obj, offset); - static_assert( - sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>), - "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> " - "have different sizes."); - static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t), - "art::mirror::CompressedReference<mirror::Object> and int32_t " - "have different sizes."); - - // The entrypoint is null when the GC is not marking, this prevents one load compared to - // checking GetIsGcMarking. - __ CompareAndBranchIfNonZero(temp.AsRegister<Register>(), slow_path->GetEntryLabel()); - __ Bind(slow_path->GetExitLabel()); + // /* GcRoot<mirror::Object> */ root = *(obj + offset) + __ LoadFromOffset(kLoadWord, root_reg, obj, offset); + static_assert( + sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>), + "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> " + "have different sizes."); + static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::CompressedReference<mirror::Object> and int32_t " + "have different sizes."); + + // The entrypoint is null when the GC is not marking, this prevents one load compared to + // checking GetIsGcMarking. + __ CompareAndBranchIfNonZero(temp.AsRegister<Register>(), slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); + } } else { // GC root loaded through a slow path for read barriers other // than Baker's. @@ -7973,6 +8141,16 @@ void InstructionCodeGeneratorARM::GenerateGcRootFieldLoad(HInstruction* instruct } } +void CodeGeneratorARM::MaybeAddBakerCcEntrypointTempForFields(LocationSummary* locations) { + DCHECK(kEmitCompilerReadBarrier); + DCHECK(kUseBakerReadBarrier); + if (kBakerReadBarrierLinkTimeThunksEnableForFields) { + if (!Runtime::Current()->UseJitCompilation()) { + locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister)); + } + } +} + void CodeGeneratorARM::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, Location ref, Register obj, @@ -7982,6 +8160,69 @@ void CodeGeneratorARM::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instr DCHECK(kEmitCompilerReadBarrier); DCHECK(kUseBakerReadBarrier); + if (kBakerReadBarrierLinkTimeThunksEnableForFields && + !Runtime::Current()->UseJitCompilation()) { + // Note that we do not actually check the value of `GetIsGcMarking()` + // to decide whether to mark the loaded reference or not. Instead, we + // load into `temp` (actually kBakerCcEntrypointRegister) the read + // barrier mark introspection entrypoint. If `temp` is null, it means + // that `GetIsGcMarking()` is false, and vice versa. + // + // We use link-time generated thunks for the slow path. That thunk checks + // the holder and jumps to the entrypoint if needed. If the holder is not + // gray, it creates a fake dependency and returns to the LDR instruction. + // + // temp = Thread::Current()->pReadBarrierMarkIntrospection + // lr = &gray_return_address; + // if (temp != nullptr) { + // goto field_thunk<holder_reg, base_reg>(lr) + // } + // not_gray_return_address: + // // Original reference load. If the offset is too large to fit + // // into LDR, we use an adjusted base register here. + // GcRoot<mirror::Object> reference = *(obj+offset); + // gray_return_address: + + DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>)); + Register base = obj; + if (offset >= kReferenceLoadMinFarOffset) { + base = temp.AsRegister<Register>(); + DCHECK_NE(base, kBakerCcEntrypointRegister); + static_assert(IsPowerOfTwo(kReferenceLoadMinFarOffset), "Expecting a power of 2."); + __ AddConstant(base, obj, offset & ~(kReferenceLoadMinFarOffset - 1u)); + offset &= (kReferenceLoadMinFarOffset - 1u); + } + CheckLastTempIsBakerCcEntrypointRegister(instruction); + uint32_t custom_data = + linker::Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(base, obj); + Label* bne_label = NewBakerReadBarrierPatch(custom_data); + + // entrypoint_reg = + // Thread::Current()->pReadBarrierMarkReg12, i.e. pReadBarrierMarkIntrospection. + DCHECK_EQ(IP, 12); + const int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(IP); + __ LoadFromOffset(kLoadWord, kBakerCcEntrypointRegister, TR, entry_point_offset); + + Label return_address; + __ AdrCode(LR, &return_address); + __ CmpConstant(kBakerCcEntrypointRegister, 0); + ScopedForce32Bit force_32bit(down_cast<Thumb2Assembler*>(GetAssembler())); + EmitPlaceholderBne(this, bne_label); + static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4), + "Field LDR must be 1 32-bit instruction (4B) before the return address label; " + " 2 32-bit instructions (8B) for heap poisoning."); + Register ref_reg = ref.AsRegister<Register>(); + DCHECK_LT(offset, kReferenceLoadMinFarOffset); + __ LoadFromOffset(kLoadWord, ref_reg, base, offset); + if (needs_null_check) { + MaybeRecordImplicitNullCheck(instruction); + } + GetAssembler()->MaybeUnpoisonHeapReference(ref_reg); + __ Bind(&return_address); + return; + } + // /* HeapReference<Object> */ ref = *(obj + offset) Location no_index = Location::NoLocation(); ScaleFactor no_scale_factor = TIMES_1; @@ -8002,9 +8243,67 @@ void CodeGeneratorARM::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instr static_assert( sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); + ScaleFactor scale_factor = TIMES_4; + + if (kBakerReadBarrierLinkTimeThunksEnableForArrays && + !Runtime::Current()->UseJitCompilation()) { + // Note that we do not actually check the value of `GetIsGcMarking()` + // to decide whether to mark the loaded reference or not. Instead, we + // load into `temp` (actually kBakerCcEntrypointRegister) the read + // barrier mark introspection entrypoint. If `temp` is null, it means + // that `GetIsGcMarking()` is false, and vice versa. + // + // We use link-time generated thunks for the slow path. That thunk checks + // the holder and jumps to the entrypoint if needed. If the holder is not + // gray, it creates a fake dependency and returns to the LDR instruction. + // + // temp = Thread::Current()->pReadBarrierMarkIntrospection + // lr = &gray_return_address; + // if (temp != nullptr) { + // goto field_thunk<holder_reg, base_reg>(lr) + // } + // not_gray_return_address: + // // Original reference load. If the offset is too large to fit + // // into LDR, we use an adjusted base register here. + // GcRoot<mirror::Object> reference = data[index]; + // gray_return_address: + + DCHECK(index.IsValid()); + Register index_reg = index.AsRegister<Register>(); + Register ref_reg = ref.AsRegister<Register>(); + Register data_reg = temp.AsRegister<Register>(); + DCHECK_NE(data_reg, kBakerCcEntrypointRegister); + + CheckLastTempIsBakerCcEntrypointRegister(instruction); + uint32_t custom_data = + linker::Thumb2RelativePatcher::EncodeBakerReadBarrierArrayData(data_reg); + Label* bne_label = NewBakerReadBarrierPatch(custom_data); + + // entrypoint_reg = + // Thread::Current()->pReadBarrierMarkReg16, i.e. pReadBarrierMarkIntrospection. + DCHECK_EQ(IP, 12); + const int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(IP); + __ LoadFromOffset(kLoadWord, kBakerCcEntrypointRegister, TR, entry_point_offset); + __ AddConstant(data_reg, obj, data_offset); + + Label return_address; + __ AdrCode(LR, &return_address); + __ CmpConstant(kBakerCcEntrypointRegister, 0); + ScopedForce32Bit force_32bit(down_cast<Thumb2Assembler*>(GetAssembler())); + EmitPlaceholderBne(this, bne_label); + static_assert(BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4), + "Array LDR must be 1 32-bit instruction (4B) before the return address label; " + " 2 32-bit instructions (8B) for heap poisoning."); + __ ldr(ref_reg, Address(data_reg, index_reg, LSL, scale_factor)); + DCHECK(!needs_null_check); // The thunk cannot handle the null check. + GetAssembler()->MaybeUnpoisonHeapReference(ref_reg); + __ Bind(&return_address); + return; + } + // /* HeapReference<Object> */ ref = // *(obj + data_offset + index * sizeof(HeapReference<Object>)) - ScaleFactor scale_factor = TIMES_4; GenerateReferenceLoadWithBakerReadBarrier( instruction, ref, obj, data_offset, index, scale_factor, temp, needs_null_check); } @@ -8016,9 +8315,7 @@ void CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i Location index, ScaleFactor scale_factor, Location temp, - bool needs_null_check, - bool always_update_field, - Register* temp2) { + bool needs_null_check) { DCHECK(kEmitCompilerReadBarrier); DCHECK(kUseBakerReadBarrier); @@ -8029,6 +8326,73 @@ void CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i // not. // // Note that we do not actually check the value of `GetIsGcMarking()`; + // instead, we load into `temp2` the read barrier mark entry point + // corresponding to register `ref`. If `temp2` is null, it means + // that `GetIsGcMarking()` is false, and vice versa. + // + // temp2 = Thread::Current()->pReadBarrierMarkReg ## root.reg() + // if (temp2 != nullptr) { // <=> Thread::Current()->GetIsGcMarking() + // // Slow path. + // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); + // lfence; // Load fence or artificial data dependency to prevent load-load reordering + // HeapReference<mirror::Object> ref = *src; // Original reference load. + // bool is_gray = (rb_state == ReadBarrier::GrayState()); + // if (is_gray) { + // ref = temp2(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call. + // } + // } else { + // HeapReference<mirror::Object> ref = *src; // Original reference load. + // } + + Register temp_reg = temp.AsRegister<Register>(); + + // Slow path marking the object `ref` when the GC is marking. The + // entrypoint will already be loaded in `temp2`. + Location temp2 = Location::RegisterLocation(LR); + SlowPathCodeARM* slow_path = + new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierSlowPathARM( + instruction, + ref, + obj, + offset, + index, + scale_factor, + needs_null_check, + temp_reg, + /* entrypoint */ temp2); + AddSlowPath(slow_path); + + // temp2 = Thread::Current()->pReadBarrierMarkReg ## ref.reg() + const int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref.reg()); + // Loading the entrypoint does not require a load acquire since it is only changed when + // threads are suspended or running a checkpoint. + __ LoadFromOffset(kLoadWord, temp2.AsRegister<Register>(), TR, entry_point_offset); + // The entrypoint is null when the GC is not marking, this prevents one load compared to + // checking GetIsGcMarking. + __ CompareAndBranchIfNonZero(temp2.AsRegister<Register>(), slow_path->GetEntryLabel()); + // Fast path: the GC is not marking: just load the reference. + GenerateRawReferenceLoad(instruction, ref, obj, offset, index, scale_factor, needs_null_check); + __ Bind(slow_path->GetExitLabel()); +} + +void CodeGeneratorARM::UpdateReferenceFieldWithBakerReadBarrier(HInstruction* instruction, + Location ref, + Register obj, + Location field_offset, + Location temp, + bool needs_null_check, + Register temp2) { + DCHECK(kEmitCompilerReadBarrier); + DCHECK(kUseBakerReadBarrier); + + // Query `art::Thread::Current()->GetIsGcMarking()` to decide + // whether we need to enter the slow path to update the reference + // field within `obj`. Then, in the slow path, check the gray bit + // in the lock word of the reference's holder (`obj`) to decide + // whether to mark `ref` and update the field or not. + // + // Note that we do not actually check the value of `GetIsGcMarking()`; // instead, we load into `temp3` the read barrier mark entry point // corresponding to register `ref`. If `temp3` is null, it means // that `GetIsGcMarking()` is false, and vice versa. @@ -8041,52 +8405,30 @@ void CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i // HeapReference<mirror::Object> ref = *src; // Original reference load. // bool is_gray = (rb_state == ReadBarrier::GrayState()); // if (is_gray) { + // old_ref = ref; // ref = temp3(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call. + // compareAndSwapObject(obj, field_offset, old_ref, ref); // } - // } else { - // HeapReference<mirror::Object> ref = *src; // Original reference load. // } Register temp_reg = temp.AsRegister<Register>(); - // Slow path marking the object `ref` when the GC is marking. The - // entrypoint will already be loaded in `temp3`. + // Slow path updating the object reference at address `obj + + // field_offset` when the GC is marking. The entrypoint will already + // be loaded in `temp3`. Location temp3 = Location::RegisterLocation(LR); - SlowPathCodeARM* slow_path; - if (always_update_field) { - DCHECK(temp2 != nullptr); - // LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM only - // supports address of the form `obj + field_offset`, where `obj` - // is a register and `field_offset` is a register pair (of which - // only the lower half is used). Thus `offset` and `scale_factor` - // above are expected to be null in this code path. - DCHECK_EQ(offset, 0u); - DCHECK_EQ(scale_factor, ScaleFactor::TIMES_1); - Location field_offset = index; - slow_path = - new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM( - instruction, - ref, - obj, - offset, - /* index */ field_offset, - scale_factor, - needs_null_check, - temp_reg, - *temp2, - /* entrypoint */ temp3); - } else { - slow_path = new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierSlowPathARM( - instruction, - ref, - obj, - offset, - index, - scale_factor, - needs_null_check, - temp_reg, - /* entrypoint */ temp3); - } + SlowPathCodeARM* slow_path = + new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM( + instruction, + ref, + obj, + /* offset */ 0u, + /* index */ field_offset, + /* scale_factor */ ScaleFactor::TIMES_1, + needs_null_check, + temp_reg, + temp2, + /* entrypoint */ temp3); AddSlowPath(slow_path); // temp3 = Thread::Current()->pReadBarrierMarkReg ## ref.reg() @@ -8098,8 +8440,8 @@ void CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i // The entrypoint is null when the GC is not marking, this prevents one load compared to // checking GetIsGcMarking. __ CompareAndBranchIfNonZero(temp3.AsRegister<Register>(), slow_path->GetEntryLabel()); - // Fast path: just load the reference. - GenerateRawReferenceLoad(instruction, ref, obj, offset, index, scale_factor, needs_null_check); + // Fast path: the GC is not marking: nothing to do (the field is + // up-to-date, and we don't need to load the reference). __ Bind(slow_path->GetExitLabel()); } @@ -8370,6 +8712,11 @@ CodeGeneratorARM::PcRelativePatchInfo* CodeGeneratorARM::NewPcRelativePatch( return &patches->back(); } +Label* CodeGeneratorARM::NewBakerReadBarrierPatch(uint32_t custom_data) { + baker_read_barrier_patches_.emplace_back(custom_data); + return &baker_read_barrier_patches_.back().label; +} + Literal* CodeGeneratorARM::DeduplicateBootImageStringLiteral(const DexFile& dex_file, dex::StringIndex string_index) { return boot_image_string_patches_.GetOrCreate( @@ -8436,7 +8783,8 @@ void CodeGeneratorARM::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patche /* MOVW+MOVT for each entry */ 2u * pc_relative_string_patches_.size() + boot_image_type_patches_.size() + /* MOVW+MOVT for each entry */ 2u * pc_relative_type_patches_.size() + - /* MOVW+MOVT for each entry */ 2u * type_bss_entry_patches_.size(); + /* MOVW+MOVT for each entry */ 2u * type_bss_entry_patches_.size() + + baker_read_barrier_patches_.size(); linker_patches->reserve(size); EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_, linker_patches); @@ -8470,6 +8818,10 @@ void CodeGeneratorARM::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patche target_type.dex_file, target_type.type_index.index_)); } + for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) { + linker_patches->push_back(LinkerPatch::BakerReadBarrierBranchPatch(info.label.Position(), + info.custom_data)); + } DCHECK_EQ(size, linker_patches->size()); } diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h index 86f2f21df7..b94ee20d9d 100644 --- a/compiler/optimizing/code_generator_arm.h +++ b/compiler/optimizing/code_generator_arm.h @@ -488,6 +488,11 @@ class CodeGeneratorARM : public CodeGenerator { PcRelativePatchInfo* NewTypeBssEntryPatch(const DexFile& dex_file, dex::TypeIndex type_index); PcRelativePatchInfo* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file, uint32_t element_offset); + + // Add a new baker read barrier patch and return the label to be bound + // before the BNE instruction. + Label* NewBakerReadBarrierPatch(uint32_t custom_data); + Literal* DeduplicateBootImageStringLiteral(const DexFile& dex_file, dex::StringIndex string_index); Literal* DeduplicateBootImageTypeLiteral(const DexFile& dex_file, dex::TypeIndex type_index); @@ -503,6 +508,10 @@ class CodeGeneratorARM : public CodeGenerator { void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) OVERRIDE; + // Maybe add the reserved entrypoint register as a temporary for field load. This temp + // is added only for AOT compilation if link-time generated thunks for fields are enabled. + void MaybeAddBakerCcEntrypointTempForFields(LocationSummary* locations); + // Fast path implementation of ReadBarrier::Barrier for a heap // reference field load when Baker's read barriers are used. void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, @@ -526,11 +535,6 @@ class CodeGeneratorARM : public CodeGenerator { // Load the object reference located at the address // `obj + offset + (index << scale_factor)`, held by object `obj`, into // `ref`, and mark it if needed. - // - // If `always_update_field` is true, the value of the reference is - // atomically updated in the holder (`obj`). This operation - // requires an extra temporary register, which must be provided as a - // non-null pointer (`temp2`). void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, Location ref, Register obj, @@ -538,9 +542,27 @@ class CodeGeneratorARM : public CodeGenerator { Location index, ScaleFactor scale_factor, Location temp, - bool needs_null_check, - bool always_update_field = false, - Register* temp2 = nullptr); + bool needs_null_check); + + // Generate code checking whether the the reference field at the + // address `obj + field_offset`, held by object `obj`, needs to be + // marked, and if so, marking it and updating the field within `obj` + // with the marked value. + // + // This routine is used for the implementation of the + // UnsafeCASObject intrinsic with Baker read barriers. + // + // This method has a structure similar to + // GenerateReferenceLoadWithBakerReadBarrier, but note that argument + // `ref` is only as a temporary here, and thus its value should not + // be used afterwards. + void UpdateReferenceFieldWithBakerReadBarrier(HInstruction* instruction, + Location ref, + Register obj, + Location field_offset, + Location temp, + bool needs_null_check, + Register temp2); // Generate a heap reference load (with no read barrier). void GenerateRawReferenceLoad(HInstruction* instruction, @@ -616,6 +638,13 @@ class CodeGeneratorARM : public CodeGenerator { Literal*, TypeReferenceValueComparator>; + struct BakerReadBarrierPatchInfo { + explicit BakerReadBarrierPatchInfo(uint32_t data) : label(), custom_data(data) { } + + Label label; + uint32_t custom_data; + }; + Literal* DeduplicateUint32Literal(uint32_t value, Uint32ToLiteralMap* map); Literal* DeduplicateMethodLiteral(MethodReference target_method, MethodToLiteralMap* map); PcRelativePatchInfo* NewPcRelativePatch(const DexFile& dex_file, @@ -648,6 +677,8 @@ class CodeGeneratorARM : public CodeGenerator { ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_; // PC-relative type patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_; + // Baker read barrier patch info. + ArenaDeque<BakerReadBarrierPatchInfo> baker_read_barrier_patches_; // Patches for string literals in JIT compiled code. StringToLiteralMap jit_string_patches_; diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 4629c54a17..7d9778a4e7 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -91,6 +91,7 @@ constexpr uint32_t kReferenceLoadMinFarOffset = 16 * KB; // Flags controlling the use of link-time generated thunks for Baker read barriers. constexpr bool kBakerReadBarrierLinkTimeThunksEnableForFields = true; +constexpr bool kBakerReadBarrierLinkTimeThunksEnableForArrays = true; constexpr bool kBakerReadBarrierLinkTimeThunksEnableForGcRoots = true; // Some instructions have special requirements for a temporary, for example @@ -586,8 +587,13 @@ class DeoptimizationSlowPathARM64 : public SlowPathCodeARM64 { void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); __ Bind(GetEntryLabel()); + LocationSummary* locations = instruction_->GetLocations(); + SaveLiveRegisters(codegen, locations); + InvokeRuntimeCallingConvention calling_convention; + __ Mov(calling_convention.GetRegisterAt(0), + static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind())); arm64_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this); - CheckEntrypointTypes<kQuickDeoptimize, void, void>(); + CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>(); } const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathARM64"; } @@ -850,7 +856,7 @@ class LoadReferenceWithBakerReadBarrierSlowPathARM64 : public ReadBarrierMarkSlo // Baker's read barriers, we need to perform the load of // mirror::Object::monitor_ *before* the original reference load. // This load-load ordering is required by the read barrier. - // The fast path/slow path (for Baker's algorithm) should look like: + // The slow path (for Baker's algorithm) should look like: // // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); // lfence; // Load fence or artificial data dependency to prevent load-load reordering @@ -1001,6 +1007,18 @@ class LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64 __ Bind(GetEntryLabel()); + // The implementation is similar to LoadReferenceWithBakerReadBarrierSlowPathARM64's: + // + // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); + // lfence; // Load fence or artificial data dependency to prevent load-load reordering + // HeapReference<mirror::Object> ref = *src; // Original reference load. + // bool is_gray = (rb_state == ReadBarrier::GrayState()); + // if (is_gray) { + // old_ref = ref; + // ref = entrypoint(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call. + // compareAndSwapObject(obj, field_offset, old_ref, ref); + // } + // /* int32_t */ monitor = obj->monitor_ uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value(); __ Ldr(temp_, HeapOperand(obj_, monitor_offset)); @@ -2759,6 +2777,7 @@ void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) { // Object ArrayGet with Baker's read barrier case. // Note that a potential implicit null check is handled in the // CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier call. + DCHECK(!instruction->CanDoImplicitNullCheckOn(instruction->InputAt(0))); if (index.IsConstant()) { // Array load with a constant index can be treated as a field load. offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(type); @@ -2769,12 +2788,12 @@ void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) { obj.W(), offset, maybe_temp, - /* needs_null_check */ true, + /* needs_null_check */ false, /* use_load_acquire */ false); } else { Register temp = WRegisterFrom(locations->GetTemp(0)); codegen_->GenerateArrayLoadWithBakerReadBarrier( - instruction, out, obj.W(), offset, index, temp, /* needs_null_check */ true); + instruction, out, obj.W(), offset, index, temp, /* needs_null_check */ false); } } else { // General case. @@ -3691,7 +3710,10 @@ void InstructionCodeGeneratorARM64::VisitIf(HIf* if_instr) { void LocationsBuilderARM64::VisitDeoptimize(HDeoptimize* deoptimize) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath); - locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. + InvokeRuntimeCallingConvention calling_convention; + RegisterSet caller_saves = RegisterSet::Empty(); + caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode())); + locations->SetCustomSlowPathCallerSaves(caller_saves); if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) { locations->SetInAt(0, Location::RequiresRegister()); } @@ -5477,6 +5499,15 @@ void InstructionCodeGeneratorARM64::VisitRem(HRem* rem) { } } +void LocationsBuilderARM64::VisitConstructorFence(HConstructorFence* constructor_fence) { + constructor_fence->SetLocations(nullptr); +} + +void InstructionCodeGeneratorARM64::VisitConstructorFence( + HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) { + codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore); +} + void LocationsBuilderARM64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) { memory_barrier->SetLocations(nullptr); } @@ -5928,9 +5959,9 @@ void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad( !Runtime::Current()->UseJitCompilation()) { // Note that we do not actually check the value of `GetIsGcMarking()` // to decide whether to mark the loaded GC root or not. Instead, we - // load into `temp` the read barrier mark introspection entrypoint. - // If `temp` is null, it means that `GetIsGcMarking()` is false, and - // vice versa. + // load into `temp` (actually IP1) the read barrier mark introspection + // entrypoint. If `temp` is null, it means that `GetIsGcMarking()` is + // false, and vice versa. // // We use link-time generated thunks for the slow path. That thunk // checks the reference and jumps to the entrypoint if needed. @@ -6054,24 +6085,24 @@ void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* ins !use_load_acquire && !Runtime::Current()->UseJitCompilation()) { // Note that we do not actually check the value of `GetIsGcMarking()` - // to decide whether to mark the loaded GC root or not. Instead, we - // load into `temp` the read barrier mark introspection entrypoint. - // If `temp` is null, it means that `GetIsGcMarking()` is false, and - // vice versa. + // to decide whether to mark the loaded reference or not. Instead, we + // load into `temp` (actually IP1) the read barrier mark introspection + // entrypoint. If `temp` is null, it means that `GetIsGcMarking()` is + // false, and vice versa. // // We use link-time generated thunks for the slow path. That thunk checks // the holder and jumps to the entrypoint if needed. If the holder is not // gray, it creates a fake dependency and returns to the LDR instruction. // // temp = Thread::Current()->pReadBarrierMarkIntrospection - // lr = &return_address; + // lr = &gray_return_address; // if (temp != nullptr) { // goto field_thunk<holder_reg, base_reg>(lr) // } // not_gray_return_address: // // Original reference load. If the offset is too large to fit // // into LDR, we use an adjusted base register here. - // GcRoot<mirror::Object> root = *(obj+offset); + // GcRoot<mirror::Object> reference = *(obj+offset); // gray_return_address: DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>)); @@ -6141,16 +6172,74 @@ void CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* ins DCHECK(kEmitCompilerReadBarrier); DCHECK(kUseBakerReadBarrier); + static_assert( + sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); + size_t scale_factor = Primitive::ComponentSizeShift(Primitive::kPrimNot); + + if (kBakerReadBarrierLinkTimeThunksEnableForArrays && + !Runtime::Current()->UseJitCompilation()) { + // Note that we do not actually check the value of `GetIsGcMarking()` + // to decide whether to mark the loaded reference or not. Instead, we + // load into `temp` (actually IP1) the read barrier mark introspection + // entrypoint. If `temp` is null, it means that `GetIsGcMarking()` is + // false, and vice versa. + // + // We use link-time generated thunks for the slow path. That thunk checks + // the holder and jumps to the entrypoint if needed. If the holder is not + // gray, it creates a fake dependency and returns to the LDR instruction. + // + // temp = Thread::Current()->pReadBarrierMarkIntrospection + // lr = &gray_return_address; + // if (temp != nullptr) { + // goto field_thunk<holder_reg, base_reg>(lr) + // } + // not_gray_return_address: + // // Original reference load. If the offset is too large to fit + // // into LDR, we use an adjusted base register here. + // GcRoot<mirror::Object> reference = data[index]; + // gray_return_address: + + DCHECK(index.IsValid()); + Register index_reg = RegisterFrom(index, Primitive::kPrimInt); + Register ref_reg = RegisterFrom(ref, Primitive::kPrimNot); + + UseScratchRegisterScope temps(GetVIXLAssembler()); + DCHECK(temps.IsAvailable(ip0)); + DCHECK(temps.IsAvailable(ip1)); + temps.Exclude(ip0, ip1); + uint32_t custom_data = + linker::Arm64RelativePatcher::EncodeBakerReadBarrierArrayData(temp.GetCode()); + vixl::aarch64::Label* cbnz_label = NewBakerReadBarrierPatch(custom_data); + + // ip1 = Thread::Current()->pReadBarrierMarkReg16, i.e. pReadBarrierMarkIntrospection. + DCHECK_EQ(ip0.GetCode(), 16u); + const int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ip0.GetCode()); + __ Ldr(ip1, MemOperand(tr, entry_point_offset)); + __ Add(temp.X(), obj.X(), Operand(data_offset)); + EmissionCheckScope guard(GetVIXLAssembler(), + (kPoisonHeapReferences ? 4u : 3u) * vixl::aarch64::kInstructionSize); + vixl::aarch64::Label return_address; + __ adr(lr, &return_address); + __ Bind(cbnz_label); + __ cbnz(ip1, static_cast<int64_t>(0)); // Placeholder, patched at link-time. + static_assert(BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4), + "Array LDR must be 1 instruction (4B) before the return address label; " + " 2 instructions (8B) for heap poisoning."); + __ ldr(ref_reg, MemOperand(temp.X(), index_reg.X(), LSL, scale_factor)); + DCHECK(!needs_null_check); // The thunk cannot handle the null check. + GetAssembler()->MaybeUnpoisonHeapReference(ref_reg); + __ Bind(&return_address); + return; + } + // Array cells are never volatile variables, therefore array loads // never use Load-Acquire instructions on ARM64. const bool use_load_acquire = false; - static_assert( - sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), - "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); // /* HeapReference<Object> */ ref = // *(obj + data_offset + index * sizeof(HeapReference<Object>)) - size_t scale_factor = Primitive::ComponentSizeShift(Primitive::kPrimNot); GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, @@ -6170,8 +6259,7 @@ void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* size_t scale_factor, Register temp, bool needs_null_check, - bool use_load_acquire, - bool always_update_field) { + bool use_load_acquire) { DCHECK(kEmitCompilerReadBarrier); DCHECK(kUseBakerReadBarrier); // If we are emitting an array load, we should not be using a @@ -6208,41 +6296,18 @@ void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* // entrypoint will already be loaded in `temp2`. Register temp2 = lr; Location temp2_loc = LocationFrom(temp2); - SlowPathCodeARM64* slow_path; - if (always_update_field) { - // LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64 - // only supports address of the form `obj + field_offset`, where - // `obj` is a register and `field_offset` is a register. Thus - // `offset` and `scale_factor` above are expected to be null in - // this code path. - DCHECK_EQ(offset, 0u); - DCHECK_EQ(scale_factor, 0u); /* "times 1" */ - Location field_offset = index; - slow_path = - new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64( - instruction, - ref, - obj, - offset, - /* index */ field_offset, - scale_factor, - needs_null_check, - use_load_acquire, - temp, - /* entrypoint */ temp2_loc); - } else { - slow_path = new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierSlowPathARM64( - instruction, - ref, - obj, - offset, - index, - scale_factor, - needs_null_check, - use_load_acquire, - temp, - /* entrypoint */ temp2_loc); - } + SlowPathCodeARM64* slow_path = + new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierSlowPathARM64( + instruction, + ref, + obj, + offset, + index, + scale_factor, + needs_null_check, + use_load_acquire, + temp, + /* entrypoint */ temp2_loc); AddSlowPath(slow_path); // temp2 = Thread::Current()->pReadBarrierMarkReg ## ref.reg() @@ -6254,12 +6319,83 @@ void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* // The entrypoint is null when the GC is not marking, this prevents one load compared to // checking GetIsGcMarking. __ Cbnz(temp2, slow_path->GetEntryLabel()); - // Fast path: just load the reference. + // Fast path: the GC is not marking: just load the reference. GenerateRawReferenceLoad( instruction, ref, obj, offset, index, scale_factor, needs_null_check, use_load_acquire); __ Bind(slow_path->GetExitLabel()); } +void CodeGeneratorARM64::UpdateReferenceFieldWithBakerReadBarrier(HInstruction* instruction, + Location ref, + Register obj, + Location field_offset, + Register temp, + bool needs_null_check, + bool use_load_acquire) { + DCHECK(kEmitCompilerReadBarrier); + DCHECK(kUseBakerReadBarrier); + // If we are emitting an array load, we should not be using a + // Load Acquire instruction. In other words: + // `instruction->IsArrayGet()` => `!use_load_acquire`. + DCHECK(!instruction->IsArrayGet() || !use_load_acquire); + + // Query `art::Thread::Current()->GetIsGcMarking()` to decide + // whether we need to enter the slow path to update the reference + // field within `obj`. Then, in the slow path, check the gray bit + // in the lock word of the reference's holder (`obj`) to decide + // whether to mark `ref` and update the field or not. + // + // Note that we do not actually check the value of `GetIsGcMarking()`; + // instead, we load into `temp2` the read barrier mark entry point + // corresponding to register `ref`. If `temp2` is null, it means + // that `GetIsGcMarking()` is false, and vice versa. + // + // temp2 = Thread::Current()->pReadBarrierMarkReg ## root.reg() + // if (temp2 != nullptr) { // <=> Thread::Current()->GetIsGcMarking() + // // Slow path. + // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); + // lfence; // Load fence or artificial data dependency to prevent load-load reordering + // HeapReference<mirror::Object> ref = *(obj + field_offset); // Reference load. + // bool is_gray = (rb_state == ReadBarrier::GrayState()); + // if (is_gray) { + // old_ref = ref; + // ref = temp2(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call. + // compareAndSwapObject(obj, field_offset, old_ref, ref); + // } + // } + + // Slow path updating the object reference at address `obj + field_offset` + // when the GC is marking. The entrypoint will already be loaded in `temp2`. + Register temp2 = lr; + Location temp2_loc = LocationFrom(temp2); + SlowPathCodeARM64* slow_path = + new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64( + instruction, + ref, + obj, + /* offset */ 0u, + /* index */ field_offset, + /* scale_factor */ 0u /* "times 1" */, + needs_null_check, + use_load_acquire, + temp, + /* entrypoint */ temp2_loc); + AddSlowPath(slow_path); + + // temp2 = Thread::Current()->pReadBarrierMarkReg ## ref.reg() + const int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ref.reg()); + // Loading the entrypoint does not require a load acquire since it is only changed when + // threads are suspended or running a checkpoint. + __ Ldr(temp2, MemOperand(tr, entry_point_offset)); + // The entrypoint is null when the GC is not marking, this prevents one load compared to + // checking GetIsGcMarking. + __ Cbnz(temp2, slow_path->GetEntryLabel()); + // Fast path: the GC is not marking: nothing to do (the field is + // up-to-date, and we don't need to load the reference). + __ Bind(slow_path->GetExitLabel()); +} + void CodeGeneratorARM64::GenerateRawReferenceLoad(HInstruction* instruction, Location ref, Register obj, diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index 332ab49153..f16f625b6c 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -318,12 +318,13 @@ class InstructionCodeGeneratorARM64 : public InstructionCodeGenerator { void GenerateDivRemIntegral(HBinaryOperation* instruction); void HandleGoto(HInstruction* got, HBasicBlock* successor); - vixl::aarch64::MemOperand CreateVecMemRegisters( + vixl::aarch64::MemOperand VecAddress( HVecMemoryOperation* instruction, - Location* reg_loc, - bool is_load, // This function may acquire a scratch register. - vixl::aarch64::UseScratchRegisterScope* temps_scope); + vixl::aarch64::UseScratchRegisterScope* temps_scope, + size_t size, + bool is_string_char_at, + /*out*/ vixl::aarch64::Register* scratch); Arm64Assembler* const assembler_; CodeGeneratorARM64* const codegen_; @@ -634,9 +635,6 @@ class CodeGeneratorARM64 : public CodeGenerator { // Load the object reference located at the address // `obj + offset + (index << scale_factor)`, held by object `obj`, into // `ref`, and mark it if needed. - // - // If `always_update_field` is true, the value of the reference is - // atomically updated in the holder (`obj`). void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, Location ref, vixl::aarch64::Register obj, @@ -645,8 +643,27 @@ class CodeGeneratorARM64 : public CodeGenerator { size_t scale_factor, vixl::aarch64::Register temp, bool needs_null_check, - bool use_load_acquire, - bool always_update_field = false); + bool use_load_acquire); + + // Generate code checking whether the the reference field at the + // address `obj + field_offset`, held by object `obj`, needs to be + // marked, and if so, marking it and updating the field within `obj` + // with the marked value. + // + // This routine is used for the implementation of the + // UnsafeCASObject intrinsic with Baker read barriers. + // + // This method has a structure similar to + // GenerateReferenceLoadWithBakerReadBarrier, but note that argument + // `ref` is only as a temporary here, and thus its value should not + // be used afterwards. + void UpdateReferenceFieldWithBakerReadBarrier(HInstruction* instruction, + Location ref, + vixl::aarch64::Register obj, + Location field_offset, + vixl::aarch64::Register temp, + bool needs_null_check, + bool use_load_acquire); // Generate a heap reference load (with no read barrier). void GenerateRawReferenceLoad(HInstruction* instruction, diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc index b6678b03ef..3ad2b129b0 100644 --- a/compiler/optimizing/code_generator_arm_vixl.cc +++ b/compiler/optimizing/code_generator_arm_vixl.cc @@ -16,6 +16,7 @@ #include "code_generator_arm_vixl.h" +#include "arch/arm/asm_support_arm.h" #include "arch/arm/instruction_set_features_arm.h" #include "art_method.h" #include "code_generator_utils.h" @@ -24,6 +25,7 @@ #include "entrypoints/quick/quick_entrypoints.h" #include "gc/accounting/card_table.h" #include "intrinsics_arm_vixl.h" +#include "linker/arm/relative_patcher_thumb2.h" #include "mirror/array-inl.h" #include "mirror/class-inl.h" #include "thread.h" @@ -77,6 +79,20 @@ static constexpr size_t kArmBitsPerWord = kArmWordSize * kBitsPerByte; static constexpr int kCurrentMethodStackOffset = 0; static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7; +// Reference load (except object array loads) is using LDR Rt, [Rn, #offset] which can handle +// offset < 4KiB. For offsets >= 4KiB, the load shall be emitted as two or more instructions. +// For the Baker read barrier implementation using link-generated thunks we need to split +// the offset explicitly. +constexpr uint32_t kReferenceLoadMinFarOffset = 4 * KB; + +// Flags controlling the use of link-time generated thunks for Baker read barriers. +constexpr bool kBakerReadBarrierLinkTimeThunksEnableForFields = true; +constexpr bool kBakerReadBarrierLinkTimeThunksEnableForArrays = true; +constexpr bool kBakerReadBarrierLinkTimeThunksEnableForGcRoots = true; + +// The reserved entrypoint register for link-time generated thunks. +const vixl32::Register kBakerCcEntrypointRegister = r4; + #ifdef __ #error "ARM Codegen VIXL macro-assembler macro already defined." #endif @@ -88,6 +104,56 @@ static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7; // Marker that code is yet to be, and must, be implemented. #define TODO_VIXL32(level) LOG(level) << __PRETTY_FUNCTION__ << " unimplemented " +static inline void ExcludeIPAndBakerCcEntrypointRegister(UseScratchRegisterScope* temps, + HInstruction* instruction) { + DCHECK(temps->IsAvailable(ip)); + temps->Exclude(ip); + DCHECK(!temps->IsAvailable(kBakerCcEntrypointRegister)); + DCHECK_EQ(kBakerCcEntrypointRegister.GetCode(), + linker::Thumb2RelativePatcher::kBakerCcEntrypointRegister); + DCHECK_NE(instruction->GetLocations()->GetTempCount(), 0u); + DCHECK(RegisterFrom(instruction->GetLocations()->GetTemp( + instruction->GetLocations()->GetTempCount() - 1u)).Is(kBakerCcEntrypointRegister)); +} + +static inline void EmitPlaceholderBne(CodeGeneratorARMVIXL* codegen, vixl32::Label* patch_label) { + ExactAssemblyScope eas(codegen->GetVIXLAssembler(), kMaxInstructionSizeInBytes); + __ bind(patch_label); + vixl32::Label placeholder_label; + __ b(ne, EncodingSize(Wide), &placeholder_label); // Placeholder, patched at link-time. + __ bind(&placeholder_label); +} + +class EmitAdrCode { + public: + EmitAdrCode(ArmVIXLMacroAssembler* assembler, vixl32::Register rd, vixl32::Label* label) + : assembler_(assembler), rd_(rd), label_(label) { + ExactAssemblyScope aas(assembler, kMaxInstructionSizeInBytes); + adr_location_ = assembler->GetCursorOffset(); + assembler->adr(EncodingSize(Wide), rd, label); + } + + ~EmitAdrCode() { + DCHECK(label_->IsBound()); + // The ADR emitted by the assembler does not set the Thumb mode bit we need. + // TODO: Maybe extend VIXL to allow ADR for return address? + uint8_t* raw_adr = assembler_->GetBuffer()->GetOffsetAddress<uint8_t*>(adr_location_); + // Expecting ADR encoding T3 with `(offset & 1) == 0`. + DCHECK_EQ(raw_adr[1] & 0xfbu, 0xf2u); // Check bits 24-31, except 26. + DCHECK_EQ(raw_adr[0] & 0xffu, 0x0fu); // Check bits 16-23. + DCHECK_EQ(raw_adr[3] & 0x8fu, rd_.GetCode()); // Check bits 8-11 and 15. + DCHECK_EQ(raw_adr[2] & 0x01u, 0x00u); // Check bit 0, i.e. the `offset & 1`. + // Add the Thumb mode bit. + raw_adr[2] |= 0x01u; + } + + private: + ArmVIXLMacroAssembler* const assembler_; + vixl32::Register rd_; + vixl32::Label* const label_; + int32_t adr_location_; +}; + // SaveLiveRegisters and RestoreLiveRegisters from SlowPathCodeARM operate on sets of S registers, // for each live D registers they treat two corresponding S registers as live ones. // @@ -608,8 +674,14 @@ class DeoptimizationSlowPathARMVIXL : public SlowPathCodeARMVIXL { void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen); __ Bind(GetEntryLabel()); + LocationSummary* locations = instruction_->GetLocations(); + SaveLiveRegisters(codegen, locations); + InvokeRuntimeCallingConventionARMVIXL calling_convention; + __ Mov(calling_convention.GetRegisterAt(0), + static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind())); + arm_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this); - CheckEntrypointTypes<kQuickDeoptimize, void, void>(); + CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>(); } const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathARMVIXL"; } @@ -845,7 +917,7 @@ class LoadReferenceWithBakerReadBarrierSlowPathARMVIXL : public ReadBarrierMarkS // Baker's read barriers, we need to perform the load of // mirror::Object::monitor_ *before* the original reference load. // This load-load ordering is required by the read barrier. - // The fast path/slow path (for Baker's algorithm) should look like: + // The slow path (for Baker's algorithm) should look like: // // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); // lfence; // Load fence or artificial data dependency to prevent load-load reordering @@ -987,6 +1059,18 @@ class LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL __ Bind(GetEntryLabel()); + // The implementation is similar to LoadReferenceWithBakerReadBarrierSlowPathARMVIXL's: + // + // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); + // lfence; // Load fence or artificial data dependency to prevent load-load reordering + // HeapReference<mirror::Object> ref = *src; // Original reference load. + // bool is_gray = (rb_state == ReadBarrier::GrayState()); + // if (is_gray) { + // old_ref = ref; + // ref = entrypoint(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call. + // compareAndSwapObject(obj, field_offset, old_ref, ref); + // } + CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen); // /* int32_t */ monitor = obj->monitor_ @@ -2012,6 +2096,7 @@ CodeGeneratorARMVIXL::CodeGeneratorARMVIXL(HGraph* graph, graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + baker_read_barrier_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), jit_string_patches_(StringReferenceValueComparator(), graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), jit_class_patches_(TypeReferenceValueComparator(), @@ -2704,7 +2789,10 @@ void InstructionCodeGeneratorARMVIXL::VisitIf(HIf* if_instr) { void LocationsBuilderARMVIXL::VisitDeoptimize(HDeoptimize* deoptimize) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath); - locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. + InvokeRuntimeCallingConventionARMVIXL calling_convention; + RegisterSet caller_saves = RegisterSet::Empty(); + caller_saves.Add(LocationFrom(calling_convention.GetRegisterAt(0))); + locations->SetCustomSlowPathCallerSaves(caller_saves); if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) { locations->SetInAt(0, Location::RequiresRegister()); } @@ -3103,6 +3191,15 @@ void InstructionCodeGeneratorARMVIXL::VisitDoubleConstant( // Will be generated at use site. } +void LocationsBuilderARMVIXL::VisitConstructorFence(HConstructorFence* constructor_fence) { + constructor_fence->SetLocations(nullptr); +} + +void InstructionCodeGeneratorARMVIXL::VisitConstructorFence( + HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) { + codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore); +} + void LocationsBuilderARMVIXL::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) { memory_barrier->SetLocations(nullptr); } @@ -5280,7 +5377,18 @@ void LocationsBuilderARMVIXL::HandleFieldGet(HInstruction* instruction, } else if (object_field_get_with_read_barrier && kUseBakerReadBarrier) { // We need a temporary register for the read barrier marking slow // path in CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier. - locations->AddTemp(Location::RequiresRegister()); + if (kBakerReadBarrierLinkTimeThunksEnableForFields && + !Runtime::Current()->UseJitCompilation()) { + // If link-time thunks for the Baker read barrier are enabled, for AOT + // loads we need a temporary only if the offset is too big. + if (field_info.GetFieldOffset().Uint32Value() >= kReferenceLoadMinFarOffset) { + locations->AddTemp(Location::RequiresRegister()); + } + // And we always need the reserved entrypoint register. + locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister.GetCode())); + } else { + locations->AddTemp(Location::RequiresRegister()); + } } } @@ -5747,11 +5855,35 @@ void LocationsBuilderARMVIXL::VisitArrayGet(HArrayGet* instruction) { Location::RequiresRegister(), object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap); } - // We need a temporary register for the read barrier marking slow - // path in CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier. - // Also need for String compression feature. - if ((object_array_get_with_read_barrier && kUseBakerReadBarrier) - || (mirror::kUseStringCompression && instruction->IsStringCharAt())) { + if (object_array_get_with_read_barrier && kUseBakerReadBarrier) { + // We need a temporary register for the read barrier marking slow + // path in CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier. + if (kBakerReadBarrierLinkTimeThunksEnableForFields && + !Runtime::Current()->UseJitCompilation() && + instruction->GetIndex()->IsConstant()) { + // Array loads with constant index are treated as field loads. + // If link-time thunks for the Baker read barrier are enabled, for AOT + // constant index loads we need a temporary only if the offset is too big. + uint32_t offset = CodeGenerator::GetArrayDataOffset(instruction); + uint32_t index = instruction->GetIndex()->AsIntConstant()->GetValue(); + offset += index << Primitive::ComponentSizeShift(Primitive::kPrimNot); + if (offset >= kReferenceLoadMinFarOffset) { + locations->AddTemp(Location::RequiresRegister()); + } + // And we always need the reserved entrypoint register. + locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister.GetCode())); + } else if (kBakerReadBarrierLinkTimeThunksEnableForArrays && + !Runtime::Current()->UseJitCompilation() && + !instruction->GetIndex()->IsConstant()) { + // We need a non-scratch temporary for the array data pointer. + locations->AddTemp(Location::RequiresRegister()); + // And we always need the reserved entrypoint register. + locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister.GetCode())); + } else { + locations->AddTemp(Location::RequiresRegister()); + } + } else if (mirror::kUseStringCompression && instruction->IsStringCharAt()) { + // Also need a temporary for String compression feature. locations->AddTemp(Location::RequiresRegister()); } } @@ -5862,8 +5994,20 @@ void InstructionCodeGeneratorARMVIXL::VisitArrayGet(HArrayGet* instruction) { Location temp = locations->GetTemp(0); // Note that a potential implicit null check is handled in this // CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier call. - codegen_->GenerateArrayLoadWithBakerReadBarrier( - instruction, out_loc, obj, data_offset, index, temp, /* needs_null_check */ true); + DCHECK(!instruction->CanDoImplicitNullCheckOn(instruction->InputAt(0))); + if (index.IsConstant()) { + // Array load with a constant index can be treated as a field load. + data_offset += Int32ConstantFrom(index) << Primitive::ComponentSizeShift(type); + codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction, + out_loc, + obj, + data_offset, + locations->GetTemp(0), + /* needs_null_check */ false); + } else { + codegen_->GenerateArrayLoadWithBakerReadBarrier( + instruction, out_loc, obj, data_offset, index, temp, /* needs_null_check */ false); + } } else { vixl32::Register out = OutputRegister(instruction); if (index.IsConstant()) { @@ -6753,6 +6897,13 @@ void LocationsBuilderARMVIXL::VisitLoadClass(HLoadClass* cls) { // For non-Baker read barrier we have a temp-clobbering call. } } + if (kUseBakerReadBarrier && kBakerReadBarrierLinkTimeThunksEnableForGcRoots) { + if (load_kind == HLoadClass::LoadKind::kBssEntry || + (load_kind == HLoadClass::LoadKind::kReferrersClass && + !Runtime::Current()->UseJitCompilation())) { + locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister.GetCode())); + } + } } // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not @@ -6929,6 +7080,9 @@ void LocationsBuilderARMVIXL::VisitLoadString(HLoadString* load) { // TODO: Add GetReturnLocation() to the calling convention so that we can DCHECK() // that the the kPrimNot result register is the same as the first argument register. locations->SetCustomSlowPathCallerSaves(caller_saves); + if (kUseBakerReadBarrier && kBakerReadBarrierLinkTimeThunksEnableForGcRoots) { + locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister.GetCode())); + } } else { // For non-Baker read barrier we have a temp-clobbering call. } @@ -7091,6 +7245,9 @@ void LocationsBuilderARMVIXL::VisitInstanceOf(HInstanceOf* instruction) { // Note that TypeCheckSlowPathARM uses this register too. locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind)); + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + codegen_->MaybeAddBakerCcEntrypointTempForFields(locations); + } } void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) { @@ -7989,48 +8146,96 @@ void InstructionCodeGeneratorARMVIXL::GenerateGcRootFieldLoad( if (kUseBakerReadBarrier) { // Fast path implementation of art::ReadBarrier::BarrierForRoot when // Baker's read barrier are used. - // - // Note that we do not actually check the value of - // `GetIsGcMarking()` to decide whether to mark the loaded GC - // root or not. Instead, we load into `temp` the read barrier - // mark entry point corresponding to register `root`. If `temp` - // is null, it means that `GetIsGcMarking()` is false, and vice - // versa. - // - // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg() - // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load. - // if (temp != nullptr) { // <=> Thread::Current()->GetIsGcMarking() - // // Slow path. - // root = temp(root); // root = ReadBarrier::Mark(root); // Runtime entry point call. - // } - - // Slow path marking the GC root `root`. The entrypoint will already be loaded in `temp`. - Location temp = LocationFrom(lr); - SlowPathCodeARMVIXL* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARMVIXL( - instruction, root, /* entrypoint */ temp); - codegen_->AddSlowPath(slow_path); + if (kBakerReadBarrierLinkTimeThunksEnableForGcRoots && + !Runtime::Current()->UseJitCompilation()) { + // Note that we do not actually check the value of `GetIsGcMarking()` + // to decide whether to mark the loaded GC root or not. Instead, we + // load into `temp` (actually kBakerCcEntrypointRegister) the read + // barrier mark introspection entrypoint. If `temp` is null, it means + // that `GetIsGcMarking()` is false, and vice versa. + // + // We use link-time generated thunks for the slow path. That thunk + // checks the reference and jumps to the entrypoint if needed. + // + // temp = Thread::Current()->pReadBarrierMarkIntrospection + // lr = &return_address; + // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load. + // if (temp != nullptr) { + // goto gc_root_thunk<root_reg>(lr) + // } + // return_address: - // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg() - const int32_t entry_point_offset = - CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(root.reg()); - // Loading the entrypoint does not require a load acquire since it is only changed when - // threads are suspended or running a checkpoint. - GetAssembler()->LoadFromOffset(kLoadWord, RegisterFrom(temp), tr, entry_point_offset); + UseScratchRegisterScope temps(GetVIXLAssembler()); + ExcludeIPAndBakerCcEntrypointRegister(&temps, instruction); + uint32_t custom_data = + linker::Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg.GetCode()); + vixl32::Label* bne_label = codegen_->NewBakerReadBarrierPatch(custom_data); + + // entrypoint_reg = + // Thread::Current()->pReadBarrierMarkReg12, i.e. pReadBarrierMarkIntrospection. + DCHECK_EQ(ip.GetCode(), 12u); + const int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ip.GetCode()); + __ Ldr(kBakerCcEntrypointRegister, MemOperand(tr, entry_point_offset)); + + vixl::EmissionCheckScope guard(GetVIXLAssembler(), + 4 * vixl32::kMaxInstructionSizeInBytes); + vixl32::Label return_address; + EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address); + __ cmp(kBakerCcEntrypointRegister, Operand(0)); + static_assert( + BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET == -8, + "GC root LDR must be 2 32-bit instructions (8B) before the return address label."); + // Currently the offset is always within range. If that changes, + // we shall have to split the load the same way as for fields. + DCHECK_LT(offset, kReferenceLoadMinFarOffset); + __ ldr(EncodingSize(Wide), root_reg, MemOperand(obj, offset)); + EmitPlaceholderBne(codegen_, bne_label); + __ Bind(&return_address); + } else { + // Note that we do not actually check the value of + // `GetIsGcMarking()` to decide whether to mark the loaded GC + // root or not. Instead, we load into `temp` the read barrier + // mark entry point corresponding to register `root`. If `temp` + // is null, it means that `GetIsGcMarking()` is false, and vice + // versa. + // + // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg() + // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load. + // if (temp != nullptr) { // <=> Thread::Current()->GetIsGcMarking() + // // Slow path. + // root = temp(root); // root = ReadBarrier::Mark(root); // Runtime entry point call. + // } + + // Slow path marking the GC root `root`. The entrypoint will already be loaded in `temp`. + Location temp = LocationFrom(lr); + SlowPathCodeARMVIXL* slow_path = + new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARMVIXL( + instruction, root, /* entrypoint */ temp); + codegen_->AddSlowPath(slow_path); - // /* GcRoot<mirror::Object> */ root = *(obj + offset) - GetAssembler()->LoadFromOffset(kLoadWord, root_reg, obj, offset); - static_assert( - sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>), - "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> " - "have different sizes."); - static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t), - "art::mirror::CompressedReference<mirror::Object> and int32_t " - "have different sizes."); - - // The entrypoint is null when the GC is not marking, this prevents one load compared to - // checking GetIsGcMarking. - __ CompareAndBranchIfNonZero(RegisterFrom(temp), slow_path->GetEntryLabel()); - __ Bind(slow_path->GetExitLabel()); + // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg() + const int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(root.reg()); + // Loading the entrypoint does not require a load acquire since it is only changed when + // threads are suspended or running a checkpoint. + GetAssembler()->LoadFromOffset(kLoadWord, RegisterFrom(temp), tr, entry_point_offset); + + // /* GcRoot<mirror::Object> */ root = *(obj + offset) + GetAssembler()->LoadFromOffset(kLoadWord, root_reg, obj, offset); + static_assert( + sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>), + "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> " + "have different sizes."); + static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::CompressedReference<mirror::Object> and int32_t " + "have different sizes."); + + // The entrypoint is null when the GC is not marking, this prevents one load compared to + // checking GetIsGcMarking. + __ CompareAndBranchIfNonZero(RegisterFrom(temp), slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); + } } else { // GC root loaded through a slow path for read barriers other // than Baker's. @@ -8048,6 +8253,16 @@ void InstructionCodeGeneratorARMVIXL::GenerateGcRootFieldLoad( } } +void CodeGeneratorARMVIXL::MaybeAddBakerCcEntrypointTempForFields(LocationSummary* locations) { + DCHECK(kEmitCompilerReadBarrier); + DCHECK(kUseBakerReadBarrier); + if (kBakerReadBarrierLinkTimeThunksEnableForFields) { + if (!Runtime::Current()->UseJitCompilation()) { + locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister.GetCode())); + } + } +} + void CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, Location ref, vixl32::Register obj, @@ -8057,6 +8272,75 @@ void CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier(HInstruction* i DCHECK(kEmitCompilerReadBarrier); DCHECK(kUseBakerReadBarrier); + if (kBakerReadBarrierLinkTimeThunksEnableForFields && + !Runtime::Current()->UseJitCompilation()) { + // Note that we do not actually check the value of `GetIsGcMarking()` + // to decide whether to mark the loaded reference or not. Instead, we + // load into `temp` (actually kBakerCcEntrypointRegister) the read + // barrier mark introspection entrypoint. If `temp` is null, it means + // that `GetIsGcMarking()` is false, and vice versa. + // + // We use link-time generated thunks for the slow path. That thunk checks + // the holder and jumps to the entrypoint if needed. If the holder is not + // gray, it creates a fake dependency and returns to the LDR instruction. + // + // temp = Thread::Current()->pReadBarrierMarkIntrospection + // lr = &gray_return_address; + // if (temp != nullptr) { + // goto field_thunk<holder_reg, base_reg>(lr) + // } + // not_gray_return_address: + // // Original reference load. If the offset is too large to fit + // // into LDR, we use an adjusted base register here. + // GcRoot<mirror::Object> reference = *(obj+offset); + // gray_return_address: + + DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>)); + vixl32::Register base = obj; + if (offset >= kReferenceLoadMinFarOffset) { + base = RegisterFrom(temp); + DCHECK(!base.Is(kBakerCcEntrypointRegister)); + static_assert(IsPowerOfTwo(kReferenceLoadMinFarOffset), "Expecting a power of 2."); + __ Add(base, obj, Operand(offset & ~(kReferenceLoadMinFarOffset - 1u))); + offset &= (kReferenceLoadMinFarOffset - 1u); + } + UseScratchRegisterScope temps(GetVIXLAssembler()); + ExcludeIPAndBakerCcEntrypointRegister(&temps, instruction); + uint32_t custom_data = linker::Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData( + base.GetCode(), + obj.GetCode()); + vixl32::Label* bne_label = NewBakerReadBarrierPatch(custom_data); + + // entrypoint_reg = + // Thread::Current()->pReadBarrierMarkReg12, i.e. pReadBarrierMarkIntrospection. + DCHECK_EQ(ip.GetCode(), 12u); + const int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ip.GetCode()); + __ Ldr(kBakerCcEntrypointRegister, MemOperand(tr, entry_point_offset)); + + vixl::EmissionCheckScope guard( + GetVIXLAssembler(), + (kPoisonHeapReferences ? 5u : 4u) * vixl32::kMaxInstructionSizeInBytes); + vixl32::Label return_address; + EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address); + __ cmp(kBakerCcEntrypointRegister, Operand(0)); + EmitPlaceholderBne(this, bne_label); + static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4), + "Field LDR must be 1 32-bit instruction (4B) before the return address label; " + " 2 32-bit instructions (8B) for heap poisoning."); + vixl32::Register ref_reg = RegisterFrom(ref, Primitive::kPrimNot); + __ ldr(EncodingSize(Wide), ref_reg, MemOperand(base, offset)); + if (needs_null_check) { + MaybeRecordImplicitNullCheck(instruction); + } + // Note: We need a Wide NEG for the unpoisoning. + if (kPoisonHeapReferences) { + __ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0)); + } + __ Bind(&return_address); + return; + } + // /* HeapReference<Object> */ ref = *(obj + offset) Location no_index = Location::NoLocation(); ScaleFactor no_scale_factor = TIMES_1; @@ -8077,9 +8361,73 @@ void CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier(HInstruction* i static_assert( sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); + ScaleFactor scale_factor = TIMES_4; + + if (kBakerReadBarrierLinkTimeThunksEnableForArrays && + !Runtime::Current()->UseJitCompilation()) { + // Note that we do not actually check the value of `GetIsGcMarking()` + // to decide whether to mark the loaded reference or not. Instead, we + // load into `temp` (actually kBakerCcEntrypointRegister) the read + // barrier mark introspection entrypoint. If `temp` is null, it means + // that `GetIsGcMarking()` is false, and vice versa. + // + // We use link-time generated thunks for the slow path. That thunk checks + // the holder and jumps to the entrypoint if needed. If the holder is not + // gray, it creates a fake dependency and returns to the LDR instruction. + // + // temp = Thread::Current()->pReadBarrierMarkIntrospection + // lr = &gray_return_address; + // if (temp != nullptr) { + // goto field_thunk<holder_reg, base_reg>(lr) + // } + // not_gray_return_address: + // // Original reference load. If the offset is too large to fit + // // into LDR, we use an adjusted base register here. + // GcRoot<mirror::Object> reference = data[index]; + // gray_return_address: + + DCHECK(index.IsValid()); + vixl32::Register index_reg = RegisterFrom(index, Primitive::kPrimInt); + vixl32::Register ref_reg = RegisterFrom(ref, Primitive::kPrimNot); + vixl32::Register data_reg = RegisterFrom(temp, Primitive::kPrimInt); // Raw pointer. + DCHECK(!data_reg.Is(kBakerCcEntrypointRegister)); + + UseScratchRegisterScope temps(GetVIXLAssembler()); + ExcludeIPAndBakerCcEntrypointRegister(&temps, instruction); + uint32_t custom_data = + linker::Thumb2RelativePatcher::EncodeBakerReadBarrierArrayData(data_reg.GetCode()); + vixl32::Label* bne_label = NewBakerReadBarrierPatch(custom_data); + + // entrypoint_reg = + // Thread::Current()->pReadBarrierMarkReg16, i.e. pReadBarrierMarkIntrospection. + DCHECK_EQ(ip.GetCode(), 12u); + const int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ip.GetCode()); + __ Ldr(kBakerCcEntrypointRegister, MemOperand(tr, entry_point_offset)); + __ Add(data_reg, obj, Operand(data_offset)); + + vixl::EmissionCheckScope guard( + GetVIXLAssembler(), + (kPoisonHeapReferences ? 5u : 4u) * vixl32::kMaxInstructionSizeInBytes); + vixl32::Label return_address; + EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address); + __ cmp(kBakerCcEntrypointRegister, Operand(0)); + EmitPlaceholderBne(this, bne_label); + static_assert(BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4), + "Array LDR must be 1 32-bit instruction (4B) before the return address label; " + " 2 32-bit instructions (8B) for heap poisoning."); + __ ldr(ref_reg, MemOperand(data_reg, index_reg, vixl32::LSL, scale_factor)); + DCHECK(!needs_null_check); // The thunk cannot handle the null check. + // Note: We need a Wide NEG for the unpoisoning. + if (kPoisonHeapReferences) { + __ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0)); + } + __ Bind(&return_address); + return; + } + // /* HeapReference<Object> */ ref = // *(obj + data_offset + index * sizeof(HeapReference<Object>)) - ScaleFactor scale_factor = TIMES_4; GenerateReferenceLoadWithBakerReadBarrier( instruction, ref, obj, data_offset, index, scale_factor, temp, needs_null_check); } @@ -8091,9 +8439,7 @@ void CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier(HInstructio Location index, ScaleFactor scale_factor, Location temp, - bool needs_null_check, - bool always_update_field, - vixl32::Register* temp2) { + bool needs_null_check) { DCHECK(kEmitCompilerReadBarrier); DCHECK(kUseBakerReadBarrier); @@ -8104,6 +8450,73 @@ void CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier(HInstructio // not. // // Note that we do not actually check the value of `GetIsGcMarking()`; + // instead, we load into `temp2` the read barrier mark entry point + // corresponding to register `ref`. If `temp2` is null, it means + // that `GetIsGcMarking()` is false, and vice versa. + // + // temp2 = Thread::Current()->pReadBarrierMarkReg ## root.reg() + // if (temp2 != nullptr) { // <=> Thread::Current()->GetIsGcMarking() + // // Slow path. + // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); + // lfence; // Load fence or artificial data dependency to prevent load-load reordering + // HeapReference<mirror::Object> ref = *src; // Original reference load. + // bool is_gray = (rb_state == ReadBarrier::GrayState()); + // if (is_gray) { + // ref = temp2(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call. + // } + // } else { + // HeapReference<mirror::Object> ref = *src; // Original reference load. + // } + + vixl32::Register temp_reg = RegisterFrom(temp); + + // Slow path marking the object `ref` when the GC is marking. The + // entrypoint will already be loaded in `temp2`. + Location temp2 = LocationFrom(lr); + SlowPathCodeARMVIXL* slow_path = + new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierSlowPathARMVIXL( + instruction, + ref, + obj, + offset, + index, + scale_factor, + needs_null_check, + temp_reg, + /* entrypoint */ temp2); + AddSlowPath(slow_path); + + // temp2 = Thread::Current()->pReadBarrierMarkReg ## ref.reg() + const int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref.reg()); + // Loading the entrypoint does not require a load acquire since it is only changed when + // threads are suspended or running a checkpoint. + GetAssembler()->LoadFromOffset(kLoadWord, RegisterFrom(temp2), tr, entry_point_offset); + // The entrypoint is null when the GC is not marking, this prevents one load compared to + // checking GetIsGcMarking. + __ CompareAndBranchIfNonZero(RegisterFrom(temp2), slow_path->GetEntryLabel()); + // Fast path: the GC is not marking: just load the reference. + GenerateRawReferenceLoad(instruction, ref, obj, offset, index, scale_factor, needs_null_check); + __ Bind(slow_path->GetExitLabel()); +} + +void CodeGeneratorARMVIXL::UpdateReferenceFieldWithBakerReadBarrier(HInstruction* instruction, + Location ref, + vixl32::Register obj, + Location field_offset, + Location temp, + bool needs_null_check, + vixl32::Register temp2) { + DCHECK(kEmitCompilerReadBarrier); + DCHECK(kUseBakerReadBarrier); + + // Query `art::Thread::Current()->GetIsGcMarking()` to decide + // whether we need to enter the slow path to update the reference + // field within `obj`. Then, in the slow path, check the gray bit + // in the lock word of the reference's holder (`obj`) to decide + // whether to mark `ref` and update the field or not. + // + // Note that we do not actually check the value of `GetIsGcMarking()`; // instead, we load into `temp3` the read barrier mark entry point // corresponding to register `ref`. If `temp3` is null, it means // that `GetIsGcMarking()` is false, and vice versa. @@ -8113,55 +8526,32 @@ void CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier(HInstructio // // Slow path. // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); // lfence; // Load fence or artificial data dependency to prevent load-load reordering - // HeapReference<mirror::Object> ref = *src; // Original reference load. + // HeapReference<mirror::Object> ref = *(obj + field_offset); // Reference load. // bool is_gray = (rb_state == ReadBarrier::GrayState()); // if (is_gray) { + // old_ref = ref; // ref = temp3(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call. + // compareAndSwapObject(obj, field_offset, old_ref, ref); // } - // } else { - // HeapReference<mirror::Object> ref = *src; // Original reference load. // } vixl32::Register temp_reg = RegisterFrom(temp); - // Slow path marking the object `ref` when the GC is marking. The - // entrypoint will already be loaded in `temp3`. + // Slow path updating the object reference at address `obj + field_offset` + // when the GC is marking. The entrypoint will already be loaded in `temp3`. Location temp3 = LocationFrom(lr); - SlowPathCodeARMVIXL* slow_path; - if (always_update_field) { - DCHECK(temp2 != nullptr); - // LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL - // only supports address of the form `obj + field_offset`, where - // `obj` is a register and `field_offset` is a register pair (of - // which only the lower half is used). Thus `offset` and - // `scale_factor` above are expected to be null in this code path. - DCHECK_EQ(offset, 0u); - DCHECK_EQ(scale_factor, ScaleFactor::TIMES_1); - Location field_offset = index; - slow_path = - new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL( - instruction, - ref, - obj, - offset, - /* index */ field_offset, - scale_factor, - needs_null_check, - temp_reg, - *temp2, - /* entrypoint */ temp3); - } else { - slow_path = new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierSlowPathARMVIXL( - instruction, - ref, - obj, - offset, - index, - scale_factor, - needs_null_check, - temp_reg, - /* entrypoint */ temp3); - } + SlowPathCodeARMVIXL* slow_path = + new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL( + instruction, + ref, + obj, + /* offset */ 0u, + /* index */ field_offset, + /* scale_factor */ ScaleFactor::TIMES_1, + needs_null_check, + temp_reg, + temp2, + /* entrypoint */ temp3); AddSlowPath(slow_path); // temp3 = Thread::Current()->pReadBarrierMarkReg ## ref.reg() @@ -8173,8 +8563,8 @@ void CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier(HInstructio // The entrypoint is null when the GC is not marking, this prevents one load compared to // checking GetIsGcMarking. __ CompareAndBranchIfNonZero(RegisterFrom(temp3), slow_path->GetEntryLabel()); - // Fast path: just load the reference. - GenerateRawReferenceLoad(instruction, ref, obj, offset, index, scale_factor, needs_null_check); + // Fast path: the GC is not marking: nothing to do (the field is + // up-to-date, and we don't need to load the reference). __ Bind(slow_path->GetExitLabel()); } @@ -8488,6 +8878,11 @@ CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewPcRelativePa return &patches->back(); } +vixl::aarch32::Label* CodeGeneratorARMVIXL::NewBakerReadBarrierPatch(uint32_t custom_data) { + baker_read_barrier_patches_.emplace_back(custom_data); + return &baker_read_barrier_patches_.back().label; +} + VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateBootImageStringLiteral( const DexFile& dex_file, dex::StringIndex string_index) { @@ -8512,10 +8907,6 @@ VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateBootImageAddressLiteral(uint return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address), &uint32_literals_); } -VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateDexCacheAddressLiteral(uint32_t address) { - return DeduplicateUint32Literal(address, &uint32_literals_); -} - VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateJitStringLiteral( const DexFile& dex_file, dex::StringIndex string_index, @@ -8569,7 +8960,8 @@ void CodeGeneratorARMVIXL::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_pa /* MOVW+MOVT for each entry */ 2u * pc_relative_string_patches_.size() + boot_image_type_patches_.size() + /* MOVW+MOVT for each entry */ 2u * pc_relative_type_patches_.size() + - /* MOVW+MOVT for each entry */ 2u * type_bss_entry_patches_.size(); + /* MOVW+MOVT for each entry */ 2u * type_bss_entry_patches_.size() + + baker_read_barrier_patches_.size(); linker_patches->reserve(size); EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_, linker_patches); @@ -8603,6 +8995,10 @@ void CodeGeneratorARMVIXL::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_pa target_type.dex_file, target_type.type_index.index_)); } + for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) { + linker_patches->push_back(LinkerPatch::BakerReadBarrierBranchPatch(info.label.GetLocation(), + info.custom_data)); + } DCHECK_EQ(size, linker_patches->size()); } diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h index 1e9669dc38..657d3c134f 100644 --- a/compiler/optimizing/code_generator_arm_vixl.h +++ b/compiler/optimizing/code_generator_arm_vixl.h @@ -572,12 +572,16 @@ class CodeGeneratorARMVIXL : public CodeGenerator { PcRelativePatchInfo* NewTypeBssEntryPatch(const DexFile& dex_file, dex::TypeIndex type_index); PcRelativePatchInfo* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file, uint32_t element_offset); + + // Add a new baker read barrier patch and return the label to be bound + // before the BNE instruction. + vixl::aarch32::Label* NewBakerReadBarrierPatch(uint32_t custom_data); + VIXLUInt32Literal* DeduplicateBootImageStringLiteral(const DexFile& dex_file, dex::StringIndex string_index); VIXLUInt32Literal* DeduplicateBootImageTypeLiteral(const DexFile& dex_file, dex::TypeIndex type_index); VIXLUInt32Literal* DeduplicateBootImageAddressLiteral(uint32_t address); - VIXLUInt32Literal* DeduplicateDexCacheAddressLiteral(uint32_t address); VIXLUInt32Literal* DeduplicateJitStringLiteral(const DexFile& dex_file, dex::StringIndex string_index, Handle<mirror::String> handle); @@ -589,6 +593,10 @@ class CodeGeneratorARMVIXL : public CodeGenerator { void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) OVERRIDE; + // Maybe add the reserved entrypoint register as a temporary for field load. This temp + // is added only for AOT compilation if link-time generated thunks for fields are enabled. + void MaybeAddBakerCcEntrypointTempForFields(LocationSummary* locations); + // Fast path implementation of ReadBarrier::Barrier for a heap // reference field load when Baker's read barriers are used. void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, @@ -612,11 +620,6 @@ class CodeGeneratorARMVIXL : public CodeGenerator { // Load the object reference located at the address // `obj + offset + (index << scale_factor)`, held by object `obj`, into // `ref`, and mark it if needed. - // - // If `always_update_field` is true, the value of the reference is - // atomically updated in the holder (`obj`). This operation - // requires an extra temporary register, which must be provided as a - // non-null pointer (`temp2`). void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, Location ref, vixl::aarch32::Register obj, @@ -624,9 +627,27 @@ class CodeGeneratorARMVIXL : public CodeGenerator { Location index, ScaleFactor scale_factor, Location temp, - bool needs_null_check, - bool always_update_field = false, - vixl::aarch32::Register* temp2 = nullptr); + bool needs_null_check); + + // Generate code checking whether the the reference field at the + // address `obj + field_offset`, held by object `obj`, needs to be + // marked, and if so, marking it and updating the field within `obj` + // with the marked value. + // + // This routine is used for the implementation of the + // UnsafeCASObject intrinsic with Baker read barriers. + // + // This method has a structure similar to + // GenerateReferenceLoadWithBakerReadBarrier, but note that argument + // `ref` is only as a temporary here, and thus its value should not + // be used afterwards. + void UpdateReferenceFieldWithBakerReadBarrier(HInstruction* instruction, + Location ref, + vixl::aarch32::Register obj, + Location field_offset, + Location temp, + bool needs_null_check, + vixl::aarch32::Register temp2); // Generate a heap reference load (with no read barrier). void GenerateRawReferenceLoad(HInstruction* instruction, @@ -713,6 +734,13 @@ class CodeGeneratorARMVIXL : public CodeGenerator { VIXLUInt32Literal*, TypeReferenceValueComparator>; + struct BakerReadBarrierPatchInfo { + explicit BakerReadBarrierPatchInfo(uint32_t data) : label(), custom_data(data) { } + + vixl::aarch32::Label label; + uint32_t custom_data; + }; + VIXLUInt32Literal* DeduplicateUint32Literal(uint32_t value, Uint32ToLiteralMap* map); VIXLUInt32Literal* DeduplicateMethodLiteral(MethodReference target_method, MethodToLiteralMap* map); @@ -750,6 +778,8 @@ class CodeGeneratorARMVIXL : public CodeGenerator { ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_; // PC-relative type patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_; + // Baker read barrier patch info. + ArenaDeque<BakerReadBarrierPatchInfo> baker_read_barrier_patches_; // Patches for string literals in JIT compiled code. StringToLiteralMap jit_string_patches_; diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc index aa030b279c..e9870acff4 100644 --- a/compiler/optimizing/code_generator_mips.cc +++ b/compiler/optimizing/code_generator_mips.cc @@ -219,15 +219,33 @@ class LoadClassSlowPathMIPS : public SlowPathCodeMIPS { void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { LocationSummary* locations = instruction_->GetLocations(); + Location out = locations->Out(); CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen); - + const bool isR6 = mips_codegen->GetInstructionSetFeatures().IsR6(); + const bool r2_baker_or_no_read_barriers = !isR6 && (!kUseReadBarrier || kUseBakerReadBarrier); + InvokeRuntimeCallingConvention calling_convention; + DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_); + const bool is_load_class_bss_entry = + (cls_ == instruction_) && (cls_->GetLoadKind() == HLoadClass::LoadKind::kBssEntry); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, locations); - InvokeRuntimeCallingConvention calling_convention; + // For HLoadClass/kBssEntry/kSaveEverything, make sure we preserve the address of the entry. + Register entry_address = kNoRegister; + if (is_load_class_bss_entry && r2_baker_or_no_read_barriers) { + Register temp = locations->GetTemp(0).AsRegister<Register>(); + bool temp_is_a0 = (temp == calling_convention.GetRegisterAt(0)); + // In the unlucky case that `temp` is A0, we preserve the address in `out` across the + // kSaveEverything call. + entry_address = temp_is_a0 ? out.AsRegister<Register>() : temp; + DCHECK_NE(entry_address, calling_convention.GetRegisterAt(0)); + if (temp_is_a0) { + __ Move(entry_address, temp); + } + } + dex::TypeIndex type_index = cls_->GetTypeIndex(); __ LoadConst32(calling_convention.GetRegisterAt(0), type_index.index_); - QuickEntrypointEnum entrypoint = do_clinit_ ? kQuickInitializeStaticStorage : kQuickInitializeType; mips_codegen->InvokeRuntime(entrypoint, instruction_, dex_pc_, this); @@ -237,25 +255,27 @@ class LoadClassSlowPathMIPS : public SlowPathCodeMIPS { CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>(); } + // For HLoadClass/kBssEntry, store the resolved class to the BSS entry. + if (is_load_class_bss_entry && r2_baker_or_no_read_barriers) { + // The class entry address was preserved in `entry_address` thanks to kSaveEverything. + __ StoreToOffset(kStoreWord, calling_convention.GetRegisterAt(0), entry_address, 0); + } + // Move the class to the desired location. - Location out = locations->Out(); if (out.IsValid()) { DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg())); Primitive::Type type = instruction_->GetType(); - mips_codegen->MoveLocation(out, calling_convention.GetReturnLocation(type), type); + mips_codegen->MoveLocation(out, + Location::RegisterLocation(calling_convention.GetRegisterAt(0)), + type); } - RestoreLiveRegisters(codegen, locations); - // For HLoadClass/kBssEntry, store the resolved Class to the BSS entry. - DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_); - if (cls_ == instruction_ && cls_->GetLoadKind() == HLoadClass::LoadKind::kBssEntry) { - DCHECK(out.IsValid()); - // TODO: Change art_quick_initialize_type/art_quick_initialize_static_storage to - // kSaveEverything and use a temporary for the .bss entry address in the fast path, - // so that we can avoid another calculation here. - bool isR6 = mips_codegen->GetInstructionSetFeatures().IsR6(); + + // For HLoadClass/kBssEntry, store the resolved class to the BSS entry. + if (is_load_class_bss_entry && !r2_baker_or_no_read_barriers) { + // For non-Baker read barriers (or on R6), we need to re-calculate the address of + // the class entry. Register base = isR6 ? ZERO : locations->InAt(0).AsRegister<Register>(); - DCHECK_NE(out.AsRegister<Register>(), AT); CodeGeneratorMIPS::PcRelativePatchInfo* info = mips_codegen->NewTypeBssEntryPatch(cls_->GetDexFile(), type_index); bool reordering = __ SetReorder(false); @@ -286,40 +306,62 @@ class LoadStringSlowPathMIPS : public SlowPathCodeMIPS { explicit LoadStringSlowPathMIPS(HLoadString* instruction) : SlowPathCodeMIPS(instruction) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + DCHECK(instruction_->IsLoadString()); + DCHECK_EQ(instruction_->AsLoadString()->GetLoadKind(), HLoadString::LoadKind::kBssEntry); LocationSummary* locations = instruction_->GetLocations(); DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg())); + HLoadString* load = instruction_->AsLoadString(); + const dex::StringIndex string_index = load->GetStringIndex(); + Register out = locations->Out().AsRegister<Register>(); CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen); - + const bool isR6 = mips_codegen->GetInstructionSetFeatures().IsR6(); + const bool r2_baker_or_no_read_barriers = !isR6 && (!kUseReadBarrier || kUseBakerReadBarrier); + InvokeRuntimeCallingConvention calling_convention; __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, locations); - InvokeRuntimeCallingConvention calling_convention; - HLoadString* load = instruction_->AsLoadString(); - const dex::StringIndex string_index = load->GetStringIndex(); + // For HLoadString/kBssEntry/kSaveEverything, make sure we preserve the address of the entry. + Register entry_address = kNoRegister; + if (r2_baker_or_no_read_barriers) { + Register temp = locations->GetTemp(0).AsRegister<Register>(); + bool temp_is_a0 = (temp == calling_convention.GetRegisterAt(0)); + // In the unlucky case that `temp` is A0, we preserve the address in `out` across the + // kSaveEverything call. + entry_address = temp_is_a0 ? out : temp; + DCHECK_NE(entry_address, calling_convention.GetRegisterAt(0)); + if (temp_is_a0) { + __ Move(entry_address, temp); + } + } + __ LoadConst32(calling_convention.GetRegisterAt(0), string_index.index_); mips_codegen->InvokeRuntime(kQuickResolveString, instruction_, instruction_->GetDexPc(), this); CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); + + // Store the resolved string to the BSS entry. + if (r2_baker_or_no_read_barriers) { + // The string entry address was preserved in `entry_address` thanks to kSaveEverything. + __ StoreToOffset(kStoreWord, calling_convention.GetRegisterAt(0), entry_address, 0); + } + Primitive::Type type = instruction_->GetType(); mips_codegen->MoveLocation(locations->Out(), - calling_convention.GetReturnLocation(type), + Location::RegisterLocation(calling_convention.GetRegisterAt(0)), type); - RestoreLiveRegisters(codegen, locations); - // Store the resolved String to the BSS entry. - // TODO: Change art_quick_resolve_string to kSaveEverything and use a temporary for the - // .bss entry address in the fast path, so that we can avoid another calculation here. - bool isR6 = mips_codegen->GetInstructionSetFeatures().IsR6(); - Register base = isR6 ? ZERO : locations->InAt(0).AsRegister<Register>(); - Register out = locations->Out().AsRegister<Register>(); - DCHECK_NE(out, AT); - CodeGeneratorMIPS::PcRelativePatchInfo* info = - mips_codegen->NewPcRelativeStringPatch(load->GetDexFile(), string_index); - bool reordering = __ SetReorder(false); - mips_codegen->EmitPcRelativeAddressPlaceholderHigh(info, TMP, base); - __ StoreToOffset(kStoreWord, out, TMP, /* placeholder */ 0x5678); - __ SetReorder(reordering); - + // Store the resolved string to the BSS entry. + if (!r2_baker_or_no_read_barriers) { + // For non-Baker read barriers (or on R6), we need to re-calculate the address of + // the string entry. + Register base = isR6 ? ZERO : locations->InAt(0).AsRegister<Register>(); + CodeGeneratorMIPS::PcRelativePatchInfo* info = + mips_codegen->NewPcRelativeStringPatch(load->GetDexFile(), string_index); + bool reordering = __ SetReorder(false); + mips_codegen->EmitPcRelativeAddressPlaceholderHigh(info, TMP, base); + __ StoreToOffset(kStoreWord, out, TMP, /* placeholder */ 0x5678); + __ SetReorder(reordering); + } __ B(GetExitLabel()); } @@ -451,8 +493,13 @@ class DeoptimizationSlowPathMIPS : public SlowPathCodeMIPS { void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen); __ Bind(GetEntryLabel()); + LocationSummary* locations = instruction_->GetLocations(); + SaveLiveRegisters(codegen, locations); + InvokeRuntimeCallingConvention calling_convention; + __ LoadConst32(calling_convention.GetRegisterAt(0), + static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind())); mips_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this); - CheckEntrypointTypes<kQuickDeoptimize, void, void>(); + CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>(); } const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathMIPS"; } @@ -1719,15 +1766,14 @@ void CodeGeneratorMIPS::PatchJitRootUse(uint8_t* code, DCHECK_EQ(code[literal_offset + 1], 0x12); DCHECK_EQ((code[literal_offset + 2] & 0xE0), 0x00); DCHECK_EQ(code[literal_offset + 3], 0x3C); - // lw reg, reg, addr32_low + // instr reg, reg, addr32_low DCHECK_EQ(code[literal_offset + 4], 0x78); DCHECK_EQ(code[literal_offset + 5], 0x56); - DCHECK_EQ((code[literal_offset + 7] & 0xFC), 0x8C); - addr32 += (addr32 & 0x8000) << 1; // Account for sign extension in "lw reg, reg, addr32_low". + addr32 += (addr32 & 0x8000) << 1; // Account for sign extension in "instr reg, reg, addr32_low". // lui reg, addr32_high code[literal_offset + 0] = static_cast<uint8_t>(addr32 >> 16); code[literal_offset + 1] = static_cast<uint8_t>(addr32 >> 24); - // lw reg, reg, addr32_low + // instr reg, reg, addr32_low code[literal_offset + 4] = static_cast<uint8_t>(addr32 >> 0); code[literal_offset + 5] = static_cast<uint8_t>(addr32 >> 8); } @@ -2436,6 +2482,9 @@ void LocationsBuilderMIPS::VisitArrayGet(HArrayGet* instruction) { object_array_get_with_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall); + if (object_array_get_with_read_barrier && kUseBakerReadBarrier) { + locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. + } locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); if (Primitive::IsFloatingPointType(type)) { @@ -3438,8 +3487,6 @@ void InstructionCodeGeneratorMIPS::HandleCondition(HCondition* instruction) { Primitive::Type type = instruction->InputAt(0)->GetType(); LocationSummary* locations = instruction->GetLocations(); - Register dst = locations->Out().AsRegister<Register>(); - MipsLabel true_label; switch (type) { default: @@ -3448,27 +3495,14 @@ void InstructionCodeGeneratorMIPS::HandleCondition(HCondition* instruction) { return; case Primitive::kPrimLong: - // TODO: don't use branches. - GenerateLongCompareAndBranch(instruction->GetCondition(), locations, &true_label); - break; + GenerateLongCompare(instruction->GetCondition(), locations); + return; case Primitive::kPrimFloat: case Primitive::kPrimDouble: GenerateFpCompare(instruction->GetCondition(), instruction->IsGtBias(), type, locations); return; } - - // Convert the branches into the result. - MipsLabel done; - - // False case: result = 0. - __ LoadConst32(dst, 0); - __ B(&done); - - // True case: result = 1. - __ Bind(&true_label); - __ LoadConst32(dst, 1); - __ Bind(&done); } void InstructionCodeGeneratorMIPS::DivRemOneOrMinusOne(HBinaryOperation* instruction) { @@ -4238,6 +4272,221 @@ void InstructionCodeGeneratorMIPS::GenerateIntCompareAndBranch(IfCondition cond, } } +void InstructionCodeGeneratorMIPS::GenerateLongCompare(IfCondition cond, + LocationSummary* locations) { + Register dst = locations->Out().AsRegister<Register>(); + Register lhs_high = locations->InAt(0).AsRegisterPairHigh<Register>(); + Register lhs_low = locations->InAt(0).AsRegisterPairLow<Register>(); + Location rhs_location = locations->InAt(1); + Register rhs_high = ZERO; + Register rhs_low = ZERO; + int64_t imm = 0; + uint32_t imm_high = 0; + uint32_t imm_low = 0; + bool use_imm = rhs_location.IsConstant(); + if (use_imm) { + imm = rhs_location.GetConstant()->AsLongConstant()->GetValue(); + imm_high = High32Bits(imm); + imm_low = Low32Bits(imm); + } else { + rhs_high = rhs_location.AsRegisterPairHigh<Register>(); + rhs_low = rhs_location.AsRegisterPairLow<Register>(); + } + if (use_imm && imm == 0) { + switch (cond) { + case kCondEQ: + case kCondBE: // <= 0 if zero + __ Or(dst, lhs_high, lhs_low); + __ Sltiu(dst, dst, 1); + break; + case kCondNE: + case kCondA: // > 0 if non-zero + __ Or(dst, lhs_high, lhs_low); + __ Sltu(dst, ZERO, dst); + break; + case kCondLT: + __ Slt(dst, lhs_high, ZERO); + break; + case kCondGE: + __ Slt(dst, lhs_high, ZERO); + __ Xori(dst, dst, 1); + break; + case kCondLE: + __ Or(TMP, lhs_high, lhs_low); + __ Sra(AT, lhs_high, 31); + __ Sltu(dst, AT, TMP); + __ Xori(dst, dst, 1); + break; + case kCondGT: + __ Or(TMP, lhs_high, lhs_low); + __ Sra(AT, lhs_high, 31); + __ Sltu(dst, AT, TMP); + break; + case kCondB: // always false + __ Andi(dst, dst, 0); + break; + case kCondAE: // always true + __ Ori(dst, ZERO, 1); + break; + } + } else if (use_imm) { + // TODO: more efficient comparison with constants without loading them into TMP/AT. + switch (cond) { + case kCondEQ: + __ LoadConst32(TMP, imm_high); + __ Xor(TMP, TMP, lhs_high); + __ LoadConst32(AT, imm_low); + __ Xor(AT, AT, lhs_low); + __ Or(dst, TMP, AT); + __ Sltiu(dst, dst, 1); + break; + case kCondNE: + __ LoadConst32(TMP, imm_high); + __ Xor(TMP, TMP, lhs_high); + __ LoadConst32(AT, imm_low); + __ Xor(AT, AT, lhs_low); + __ Or(dst, TMP, AT); + __ Sltu(dst, ZERO, dst); + break; + case kCondLT: + case kCondGE: + if (dst == lhs_low) { + __ LoadConst32(TMP, imm_low); + __ Sltu(dst, lhs_low, TMP); + } + __ LoadConst32(TMP, imm_high); + __ Slt(AT, lhs_high, TMP); + __ Slt(TMP, TMP, lhs_high); + if (dst != lhs_low) { + __ LoadConst32(dst, imm_low); + __ Sltu(dst, lhs_low, dst); + } + __ Slt(dst, TMP, dst); + __ Or(dst, dst, AT); + if (cond == kCondGE) { + __ Xori(dst, dst, 1); + } + break; + case kCondGT: + case kCondLE: + if (dst == lhs_low) { + __ LoadConst32(TMP, imm_low); + __ Sltu(dst, TMP, lhs_low); + } + __ LoadConst32(TMP, imm_high); + __ Slt(AT, TMP, lhs_high); + __ Slt(TMP, lhs_high, TMP); + if (dst != lhs_low) { + __ LoadConst32(dst, imm_low); + __ Sltu(dst, dst, lhs_low); + } + __ Slt(dst, TMP, dst); + __ Or(dst, dst, AT); + if (cond == kCondLE) { + __ Xori(dst, dst, 1); + } + break; + case kCondB: + case kCondAE: + if (dst == lhs_low) { + __ LoadConst32(TMP, imm_low); + __ Sltu(dst, lhs_low, TMP); + } + __ LoadConst32(TMP, imm_high); + __ Sltu(AT, lhs_high, TMP); + __ Sltu(TMP, TMP, lhs_high); + if (dst != lhs_low) { + __ LoadConst32(dst, imm_low); + __ Sltu(dst, lhs_low, dst); + } + __ Slt(dst, TMP, dst); + __ Or(dst, dst, AT); + if (cond == kCondAE) { + __ Xori(dst, dst, 1); + } + break; + case kCondA: + case kCondBE: + if (dst == lhs_low) { + __ LoadConst32(TMP, imm_low); + __ Sltu(dst, TMP, lhs_low); + } + __ LoadConst32(TMP, imm_high); + __ Sltu(AT, TMP, lhs_high); + __ Sltu(TMP, lhs_high, TMP); + if (dst != lhs_low) { + __ LoadConst32(dst, imm_low); + __ Sltu(dst, dst, lhs_low); + } + __ Slt(dst, TMP, dst); + __ Or(dst, dst, AT); + if (cond == kCondBE) { + __ Xori(dst, dst, 1); + } + break; + } + } else { + switch (cond) { + case kCondEQ: + __ Xor(TMP, lhs_high, rhs_high); + __ Xor(AT, lhs_low, rhs_low); + __ Or(dst, TMP, AT); + __ Sltiu(dst, dst, 1); + break; + case kCondNE: + __ Xor(TMP, lhs_high, rhs_high); + __ Xor(AT, lhs_low, rhs_low); + __ Or(dst, TMP, AT); + __ Sltu(dst, ZERO, dst); + break; + case kCondLT: + case kCondGE: + __ Slt(TMP, rhs_high, lhs_high); + __ Sltu(AT, lhs_low, rhs_low); + __ Slt(TMP, TMP, AT); + __ Slt(AT, lhs_high, rhs_high); + __ Or(dst, AT, TMP); + if (cond == kCondGE) { + __ Xori(dst, dst, 1); + } + break; + case kCondGT: + case kCondLE: + __ Slt(TMP, lhs_high, rhs_high); + __ Sltu(AT, rhs_low, lhs_low); + __ Slt(TMP, TMP, AT); + __ Slt(AT, rhs_high, lhs_high); + __ Or(dst, AT, TMP); + if (cond == kCondLE) { + __ Xori(dst, dst, 1); + } + break; + case kCondB: + case kCondAE: + __ Sltu(TMP, rhs_high, lhs_high); + __ Sltu(AT, lhs_low, rhs_low); + __ Slt(TMP, TMP, AT); + __ Sltu(AT, lhs_high, rhs_high); + __ Or(dst, AT, TMP); + if (cond == kCondAE) { + __ Xori(dst, dst, 1); + } + break; + case kCondA: + case kCondBE: + __ Sltu(TMP, lhs_high, rhs_high); + __ Sltu(AT, rhs_low, lhs_low); + __ Slt(TMP, TMP, AT); + __ Sltu(AT, rhs_high, lhs_high); + __ Or(dst, AT, TMP); + if (cond == kCondBE) { + __ Xori(dst, dst, 1); + } + break; + } + } +} + void InstructionCodeGeneratorMIPS::GenerateLongCompareAndBranch(IfCondition cond, LocationSummary* locations, MipsLabel* label) { @@ -5155,7 +5404,10 @@ void InstructionCodeGeneratorMIPS::VisitIf(HIf* if_instr) { void LocationsBuilderMIPS::VisitDeoptimize(HDeoptimize* deoptimize) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath); - locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. + InvokeRuntimeCallingConvention calling_convention; + RegisterSet caller_saves = RegisterSet::Empty(); + caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + locations->SetCustomSlowPathCallerSaves(caller_saves); if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) { locations->SetInAt(0, Location::RequiresRegister()); } @@ -5767,6 +6019,9 @@ void LocationsBuilderMIPS::HandleFieldGet(HInstruction* instruction, const Field ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall)); + if (object_field_get_with_read_barrier && kUseBakerReadBarrier) { + locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. + } locations->SetInAt(0, Location::RequiresRegister()); if (generate_volatile) { InvokeRuntimeCallingConvention calling_convention; @@ -6445,6 +6700,7 @@ void CodeGeneratorMIPS::GenerateReadBarrierForRootSlow(HInstruction* instruction void LocationsBuilderMIPS::VisitInstanceOf(HInstanceOf* instruction) { LocationSummary::CallKind call_kind = LocationSummary::kNoCall; TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); + bool baker_read_barrier_slow_path = false; switch (type_check_kind) { case TypeCheckKind::kExactCheck: case TypeCheckKind::kAbstractClassCheck: @@ -6452,6 +6708,7 @@ void LocationsBuilderMIPS::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kArrayObjectCheck: call_kind = kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; + baker_read_barrier_slow_path = kUseBakerReadBarrier; break; case TypeCheckKind::kArrayCheck: case TypeCheckKind::kUnresolvedCheck: @@ -6461,6 +6718,9 @@ void LocationsBuilderMIPS::VisitInstanceOf(HInstanceOf* instruction) { } LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); + if (baker_read_barrier_slow_path) { + locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. + } locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); // The output does overlap inputs. @@ -7048,26 +7308,27 @@ void LocationsBuilderMIPS::VisitLoadClass(HLoadClass* cls) { HLoadClass::LoadKind load_kind = cls->GetLoadKind(); if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) { InvokeRuntimeCallingConvention calling_convention; - CodeGenerator::CreateLoadClassRuntimeCallLocationSummary( - cls, - Location::RegisterLocation(calling_convention.GetRegisterAt(0)), - calling_convention.GetReturnLocation(Primitive::kPrimNot)); + Location loc = Location::RegisterLocation(calling_convention.GetRegisterAt(0)); + CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(cls, loc, loc); return; } DCHECK(!cls->NeedsAccessCheck()); - + const bool isR6 = codegen_->GetInstructionSetFeatures().IsR6(); const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage(); LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier) ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cls, call_kind); + if (kUseBakerReadBarrier && requires_read_barrier && !cls->NeedsEnvironment()) { + locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. + } switch (load_kind) { // We need an extra register for PC-relative literals on R2. case HLoadClass::LoadKind::kBootImageLinkTimeAddress: case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: case HLoadClass::LoadKind::kBootImageAddress: case HLoadClass::LoadKind::kBssEntry: - if (codegen_->GetInstructionSetFeatures().IsR6()) { + if (isR6) { break; } FALLTHROUGH_INTENDED; @@ -7078,6 +7339,22 @@ void LocationsBuilderMIPS::VisitLoadClass(HLoadClass* cls) { break; } locations->SetOut(Location::RequiresRegister()); + if (load_kind == HLoadClass::LoadKind::kBssEntry) { + if (!kUseReadBarrier || kUseBakerReadBarrier) { + // Rely on the type resolution or initialization and marking to save everything we need. + // Request a temp to hold the BSS entry location for the slow path on R2 + // (no benefit for R6). + if (!isR6) { + locations->AddTemp(Location::RequiresRegister()); + } + RegisterSet caller_saves = RegisterSet::Empty(); + InvokeRuntimeCallingConvention calling_convention; + caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + locations->SetCustomSlowPathCallerSaves(caller_saves); + } else { + // For non-Baker read barriers we have a temp-clobbering call. + } + } } // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not @@ -7160,10 +7437,22 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAF case HLoadClass::LoadKind::kBssEntry: { CodeGeneratorMIPS::PcRelativePatchInfo* info = codegen_->NewTypeBssEntryPatch(cls->GetDexFile(), cls->GetTypeIndex()); - bool reordering = __ SetReorder(false); - codegen_->EmitPcRelativeAddressPlaceholderHigh(info, out, base_or_current_method_reg); - GenerateGcRootFieldLoad(cls, out_loc, out, /* placeholder */ 0x5678, read_barrier_option); - __ SetReorder(reordering); + constexpr bool non_baker_read_barrier = kUseReadBarrier && !kUseBakerReadBarrier; + if (isR6 || non_baker_read_barrier) { + bool reordering = __ SetReorder(false); + codegen_->EmitPcRelativeAddressPlaceholderHigh(info, out, base_or_current_method_reg); + GenerateGcRootFieldLoad(cls, out_loc, out, /* placeholder */ 0x5678, read_barrier_option); + __ SetReorder(reordering); + } else { + // On R2 save the BSS entry address in a temporary register instead of + // recalculating it in the slow path. + Register temp = locations->GetTemp(0).AsRegister<Register>(); + bool reordering = __ SetReorder(false); + codegen_->EmitPcRelativeAddressPlaceholderHigh(info, temp, base_or_current_method_reg); + __ Addiu(temp, temp, /* placeholder */ 0x5678); + __ SetReorder(reordering); + GenerateGcRootFieldLoad(cls, out_loc, temp, /* offset */ 0, read_barrier_option); + } generate_null_check = true; break; } @@ -7227,13 +7516,14 @@ void LocationsBuilderMIPS::VisitLoadString(HLoadString* load) { LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load); LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind); HLoadString::LoadKind load_kind = load->GetLoadKind(); + const bool isR6 = codegen_->GetInstructionSetFeatures().IsR6(); switch (load_kind) { // We need an extra register for PC-relative literals on R2. case HLoadString::LoadKind::kBootImageLinkTimeAddress: case HLoadString::LoadKind::kBootImageAddress: case HLoadString::LoadKind::kBootImageLinkTimePcRelative: case HLoadString::LoadKind::kBssEntry: - if (codegen_->GetInstructionSetFeatures().IsR6()) { + if (isR6) { break; } FALLTHROUGH_INTENDED; @@ -7246,9 +7536,25 @@ void LocationsBuilderMIPS::VisitLoadString(HLoadString* load) { } if (load_kind == HLoadString::LoadKind::kDexCacheViaMethod) { InvokeRuntimeCallingConvention calling_convention; - locations->SetOut(calling_convention.GetReturnLocation(load->GetType())); + locations->SetOut(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); } else { locations->SetOut(Location::RequiresRegister()); + if (load_kind == HLoadString::LoadKind::kBssEntry) { + if (!kUseReadBarrier || kUseBakerReadBarrier) { + // Rely on the pResolveString and marking to save everything we need. + // Request a temp to hold the BSS entry location for the slow path on R2 + // (no benefit for R6). + if (!isR6) { + locations->AddTemp(Location::RequiresRegister()); + } + RegisterSet caller_saves = RegisterSet::Empty(); + InvokeRuntimeCallingConvention calling_convention; + caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + locations->SetCustomSlowPathCallerSaves(caller_saves); + } else { + // For non-Baker read barriers we have a temp-clobbering call. + } + } } } @@ -7305,14 +7611,26 @@ void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) NO_THREAD_ DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); CodeGeneratorMIPS::PcRelativePatchInfo* info = codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex()); - bool reordering = __ SetReorder(false); - codegen_->EmitPcRelativeAddressPlaceholderHigh(info, out, base_or_current_method_reg); - GenerateGcRootFieldLoad(load, - out_loc, - out, - /* placeholder */ 0x5678, - kCompilerReadBarrierOption); - __ SetReorder(reordering); + constexpr bool non_baker_read_barrier = kUseReadBarrier && !kUseBakerReadBarrier; + if (isR6 || non_baker_read_barrier) { + bool reordering = __ SetReorder(false); + codegen_->EmitPcRelativeAddressPlaceholderHigh(info, out, base_or_current_method_reg); + GenerateGcRootFieldLoad(load, + out_loc, + out, + /* placeholder */ 0x5678, + kCompilerReadBarrierOption); + __ SetReorder(reordering); + } else { + // On R2 save the BSS entry address in a temporary register instead of + // recalculating it in the slow path. + Register temp = locations->GetTemp(0).AsRegister<Register>(); + bool reordering = __ SetReorder(false); + codegen_->EmitPcRelativeAddressPlaceholderHigh(info, temp, base_or_current_method_reg); + __ Addiu(temp, temp, /* placeholder */ 0x5678); + __ SetReorder(reordering); + GenerateGcRootFieldLoad(load, out_loc, temp, /* offset */ 0, kCompilerReadBarrierOption); + } SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathMIPS(load); codegen_->AddSlowPath(slow_path); __ Beqz(out, slow_path->GetEntryLabel()); @@ -7342,6 +7660,7 @@ void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) NO_THREAD_ // TODO: Re-add the compiler code to do string dex cache lookup again. DCHECK(load_kind == HLoadString::LoadKind::kDexCacheViaMethod); InvokeRuntimeCallingConvention calling_convention; + DCHECK_EQ(calling_convention.GetRegisterAt(0), out); __ LoadConst32(calling_convention.GetRegisterAt(0), load->GetStringIndex().index_); codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc()); CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); @@ -7766,6 +8085,15 @@ void InstructionCodeGeneratorMIPS::VisitRem(HRem* instruction) { } } +void LocationsBuilderMIPS::VisitConstructorFence(HConstructorFence* constructor_fence) { + constructor_fence->SetLocations(nullptr); +} + +void InstructionCodeGeneratorMIPS::VisitConstructorFence( + HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) { + GenerateMemoryBarrier(MemBarrierKind::kStoreStore); +} + void LocationsBuilderMIPS::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) { memory_barrier->SetLocations(nullptr); } diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h index 3875c4bdba..5ad1f12f8a 100644 --- a/compiler/optimizing/code_generator_mips.h +++ b/compiler/optimizing/code_generator_mips.h @@ -229,9 +229,10 @@ class InstructionCodeGeneratorMIPS : public InstructionCodeGenerator { // We switch to the table-based method starting with 7 cases. static constexpr uint32_t kPackedSwitchJumpTableThreshold = 6; + void GenerateMemoryBarrier(MemBarrierKind kind); + private: void GenerateClassInitializationCheck(SlowPathCodeMIPS* slow_path, Register class_reg); - void GenerateMemoryBarrier(MemBarrierKind kind); void GenerateSuspendCheck(HSuspendCheck* check, HBasicBlock* successor); void HandleBinaryOp(HBinaryOperation* operation); void HandleCondition(HCondition* instruction); @@ -294,6 +295,7 @@ class InstructionCodeGeneratorMIPS : public InstructionCodeGenerator { void GenerateIntCompareAndBranch(IfCondition cond, LocationSummary* locations, MipsLabel* label); + void GenerateLongCompare(IfCondition cond, LocationSummary* locations); void GenerateLongCompareAndBranch(IfCondition cond, LocationSummary* locations, MipsLabel* label); diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc index 19250c64e3..f04e3841f5 100644 --- a/compiler/optimizing/code_generator_mips64.cc +++ b/compiler/optimizing/code_generator_mips64.cc @@ -141,7 +141,8 @@ class BoundsCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 { class DivZeroCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 { public: - explicit DivZeroCheckSlowPathMIPS64(HDivZeroCheck* instruction) : SlowPathCodeMIPS64(instruction) {} + explicit DivZeroCheckSlowPathMIPS64(HDivZeroCheck* instruction) + : SlowPathCodeMIPS64(instruction) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen); @@ -192,7 +193,9 @@ class LoadClassSlowPathMIPS64 : public SlowPathCodeMIPS64 { if (out.IsValid()) { DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg())); Primitive::Type type = instruction_->GetType(); - mips64_codegen->MoveLocation(out, calling_convention.GetReturnLocation(type), type); + mips64_codegen->MoveLocation(out, + Location::RegisterLocation(calling_convention.GetRegisterAt(0)), + type); } RestoreLiveRegisters(codegen, locations); @@ -200,10 +203,6 @@ class LoadClassSlowPathMIPS64 : public SlowPathCodeMIPS64 { DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_); if (cls_ == instruction_ && cls_->GetLoadKind() == HLoadClass::LoadKind::kBssEntry) { DCHECK(out.IsValid()); - // TODO: Change art_quick_initialize_type/art_quick_initialize_static_storage to - // kSaveEverything and use a temporary for the .bss entry address in the fast path, - // so that we can avoid another calculation here. - DCHECK_NE(out.AsRegister<GpuRegister>(), AT); CodeGeneratorMIPS64::PcRelativePatchInfo* info = mips64_codegen->NewTypeBssEntryPatch(cls_->GetDexFile(), type_index); mips64_codegen->EmitPcRelativeAddressPlaceholderHigh(info, AT); @@ -250,16 +249,13 @@ class LoadStringSlowPathMIPS64 : public SlowPathCodeMIPS64 { CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); Primitive::Type type = instruction_->GetType(); mips64_codegen->MoveLocation(locations->Out(), - calling_convention.GetReturnLocation(type), + Location::RegisterLocation(calling_convention.GetRegisterAt(0)), type); RestoreLiveRegisters(codegen, locations); // Store the resolved String to the BSS entry. - // TODO: Change art_quick_resolve_string to kSaveEverything and use a temporary for the - // .bss entry address in the fast path, so that we can avoid another calculation here. GpuRegister out = locations->Out().AsRegister<GpuRegister>(); - DCHECK_NE(out, AT); CodeGeneratorMIPS64::PcRelativePatchInfo* info = mips64_codegen->NewPcRelativeStringPatch(load->GetDexFile(), string_index); mips64_codegen->EmitPcRelativeAddressPlaceholderHigh(info, AT); @@ -397,8 +393,13 @@ class DeoptimizationSlowPathMIPS64 : public SlowPathCodeMIPS64 { void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen); __ Bind(GetEntryLabel()); + LocationSummary* locations = instruction_->GetLocations(); + SaveLiveRegisters(codegen, locations); + InvokeRuntimeCallingConvention calling_convention; + __ LoadConst32(calling_convention.GetRegisterAt(0), + static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind())); mips64_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this); - CheckEntrypointTypes<kQuickDeoptimize, void, void>(); + CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>(); } const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathMIPS64"; } @@ -1986,6 +1987,9 @@ void LocationsBuilderMIPS64::VisitArrayGet(HArrayGet* instruction) { object_array_get_with_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall); + if (object_array_get_with_read_barrier && kUseBakerReadBarrier) { + locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. + } locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); if (Primitive::IsFloatingPointType(type)) { @@ -3906,7 +3910,10 @@ void InstructionCodeGeneratorMIPS64::VisitIf(HIf* if_instr) { void LocationsBuilderMIPS64::VisitDeoptimize(HDeoptimize* deoptimize) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath); - locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. + InvokeRuntimeCallingConvention calling_convention; + RegisterSet caller_saves = RegisterSet::Empty(); + caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + locations->SetCustomSlowPathCallerSaves(caller_saves); if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) { locations->SetInAt(0, Location::RequiresRegister()); } @@ -3982,6 +3989,9 @@ void LocationsBuilderMIPS64::HandleFieldGet(HInstruction* instruction, object_field_get_with_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall); + if (object_field_get_with_read_barrier && kUseBakerReadBarrier) { + locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. + } locations->SetInAt(0, Location::RequiresRegister()); if (Primitive::IsFloatingPointType(instruction->GetType())) { locations->SetOut(Location::RequiresFpuRegister()); @@ -4544,6 +4554,7 @@ void CodeGeneratorMIPS64::GenerateReadBarrierForRootSlow(HInstruction* instructi void LocationsBuilderMIPS64::VisitInstanceOf(HInstanceOf* instruction) { LocationSummary::CallKind call_kind = LocationSummary::kNoCall; TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); + bool baker_read_barrier_slow_path = false; switch (type_check_kind) { case TypeCheckKind::kExactCheck: case TypeCheckKind::kAbstractClassCheck: @@ -4551,6 +4562,7 @@ void LocationsBuilderMIPS64::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kArrayObjectCheck: call_kind = kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; + baker_read_barrier_slow_path = kUseBakerReadBarrier; break; case TypeCheckKind::kArrayCheck: case TypeCheckKind::kUnresolvedCheck: @@ -4560,6 +4572,9 @@ void LocationsBuilderMIPS64::VisitInstanceOf(HInstanceOf* instruction) { } LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); + if (baker_read_barrier_slow_path) { + locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. + } locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); // The output does overlap inputs. @@ -5077,10 +5092,8 @@ void LocationsBuilderMIPS64::VisitLoadClass(HLoadClass* cls) { HLoadClass::LoadKind load_kind = cls->GetLoadKind(); if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) { InvokeRuntimeCallingConvention calling_convention; - CodeGenerator::CreateLoadClassRuntimeCallLocationSummary( - cls, - Location::RegisterLocation(calling_convention.GetRegisterAt(0)), - calling_convention.GetReturnLocation(Primitive::kPrimNot)); + Location loc = Location::RegisterLocation(calling_convention.GetRegisterAt(0)); + CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(cls, loc, loc); return; } DCHECK(!cls->NeedsAccessCheck()); @@ -5090,10 +5103,24 @@ void LocationsBuilderMIPS64::VisitLoadClass(HLoadClass* cls) { ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cls, call_kind); + if (kUseBakerReadBarrier && requires_read_barrier && !cls->NeedsEnvironment()) { + locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. + } if (load_kind == HLoadClass::LoadKind::kReferrersClass) { locations->SetInAt(0, Location::RequiresRegister()); } locations->SetOut(Location::RequiresRegister()); + if (load_kind == HLoadClass::LoadKind::kBssEntry) { + if (!kUseReadBarrier || kUseBakerReadBarrier) { + // Rely on the type resolution or initialization and marking to save everything we need. + RegisterSet caller_saves = RegisterSet::Empty(); + InvokeRuntimeCallingConvention calling_convention; + caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + locations->SetCustomSlowPathCallerSaves(caller_saves); + } else { + // For non-Baker read barrier we have a temp-clobbering call. + } + } } // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not @@ -5224,9 +5251,20 @@ void LocationsBuilderMIPS64::VisitLoadString(HLoadString* load) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind); if (load_kind == HLoadString::LoadKind::kDexCacheViaMethod) { InvokeRuntimeCallingConvention calling_convention; - locations->SetOut(calling_convention.GetReturnLocation(load->GetType())); + locations->SetOut(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); } else { locations->SetOut(Location::RequiresRegister()); + if (load_kind == HLoadString::LoadKind::kBssEntry) { + if (!kUseReadBarrier || kUseBakerReadBarrier) { + // Rely on the pResolveString and marking to save everything we need. + RegisterSet caller_saves = RegisterSet::Empty(); + InvokeRuntimeCallingConvention calling_convention; + caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + locations->SetCustomSlowPathCallerSaves(caller_saves); + } else { + // For non-Baker read barrier we have a temp-clobbering call. + } + } } } @@ -5294,6 +5332,7 @@ void InstructionCodeGeneratorMIPS64::VisitLoadString(HLoadString* load) NO_THREA // TODO: Re-add the compiler code to do string dex cache lookup again. DCHECK(load_kind == HLoadString::LoadKind::kDexCacheViaMethod); InvokeRuntimeCallingConvention calling_convention; + DCHECK_EQ(calling_convention.GetRegisterAt(0), out); __ LoadConst32(calling_convention.GetRegisterAt(0), load->GetStringIndex().index_); codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc()); CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); @@ -5653,6 +5692,15 @@ void InstructionCodeGeneratorMIPS64::VisitRem(HRem* instruction) { } } +void LocationsBuilderMIPS64::VisitConstructorFence(HConstructorFence* constructor_fence) { + constructor_fence->SetLocations(nullptr); +} + +void InstructionCodeGeneratorMIPS64::VisitConstructorFence( + HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) { + GenerateMemoryBarrier(MemBarrierKind::kStoreStore); +} + void LocationsBuilderMIPS64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) { memory_barrier->SetLocations(nullptr); } diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h index fd1a174608..200e884c09 100644 --- a/compiler/optimizing/code_generator_mips64.h +++ b/compiler/optimizing/code_generator_mips64.h @@ -226,9 +226,10 @@ class InstructionCodeGeneratorMIPS64 : public InstructionCodeGenerator { // We switch to the table-based method starting with 7 cases. static constexpr uint32_t kPackedSwitchJumpTableThreshold = 6; + void GenerateMemoryBarrier(MemBarrierKind kind); + private: void GenerateClassInitializationCheck(SlowPathCodeMIPS64* slow_path, GpuRegister class_reg); - void GenerateMemoryBarrier(MemBarrierKind kind); void GenerateSuspendCheck(HSuspendCheck* check, HBasicBlock* successor); void HandleBinaryOp(HBinaryOperation* operation); void HandleCondition(HCondition* instruction); diff --git a/compiler/optimizing/code_generator_vector_arm64.cc b/compiler/optimizing/code_generator_vector_arm64.cc index 93befa439c..57f7e6b25c 100644 --- a/compiler/optimizing/code_generator_vector_arm64.cc +++ b/compiler/optimizing/code_generator_vector_arm64.cc @@ -22,6 +22,7 @@ using namespace vixl::aarch64; // NOLINT(build/namespaces) namespace art { namespace arm64 { +using helpers::DRegisterFrom; using helpers::VRegisterFrom; using helpers::HeapOperand; using helpers::InputRegisterAt; @@ -771,20 +772,22 @@ static void CreateVecMemLocations(ArenaAllocator* arena, } } -// Helper to set up registers and address for vector memory operations. -MemOperand InstructionCodeGeneratorARM64::CreateVecMemRegisters( +// Helper to set up locations for vector memory operations. Returns the memory operand and, +// if used, sets the output parameter scratch to a temporary register used in this operand, +// so that the client can release it right after the memory operand use. +MemOperand InstructionCodeGeneratorARM64::VecAddress( HVecMemoryOperation* instruction, - Location* reg_loc, - bool is_load, - UseScratchRegisterScope* temps_scope) { + UseScratchRegisterScope* temps_scope, + size_t size, + bool is_string_char_at, + /*out*/ Register* scratch) { LocationSummary* locations = instruction->GetLocations(); Register base = InputRegisterAt(instruction, 0); Location index = locations->InAt(1); - *reg_loc = is_load ? locations->Out() : locations->InAt(2); - - Primitive::Type packed_type = instruction->GetPackedType(); - uint32_t offset = mirror::Array::DataOffset(Primitive::ComponentSize(packed_type)).Uint32Value(); - size_t shift = Primitive::ComponentSizeShift(packed_type); + uint32_t offset = is_string_char_at + ? mirror::String::ValueOffset().Uint32Value() + : mirror::Array::DataOffset(size).Uint32Value(); + size_t shift = ComponentSizeShiftWidth(size); // HIntermediateAddress optimization is only applied for scalar ArrayGet and ArraySet. DCHECK(!instruction->InputAt(0)->IsIntermediateAddress()); @@ -793,10 +796,9 @@ MemOperand InstructionCodeGeneratorARM64::CreateVecMemRegisters( offset += Int64ConstantFrom(index) << shift; return HeapOperand(base, offset); } else { - Register temp = temps_scope->AcquireSameSizeAs(base); - __ Add(temp, base, Operand(WRegisterFrom(index), LSL, shift)); - - return HeapOperand(temp, offset); + *scratch = temps_scope->AcquireSameSizeAs(base); + __ Add(*scratch, base, Operand(WRegisterFrom(index), LSL, shift)); + return HeapOperand(*scratch, offset); } } @@ -805,15 +807,43 @@ void LocationsBuilderARM64::VisitVecLoad(HVecLoad* instruction) { } void InstructionCodeGeneratorARM64::VisitVecLoad(HVecLoad* instruction) { - Location reg_loc = Location::NoLocation(); + LocationSummary* locations = instruction->GetLocations(); + size_t size = Primitive::ComponentSize(instruction->GetPackedType()); + VRegister reg = VRegisterFrom(locations->Out()); UseScratchRegisterScope temps(GetVIXLAssembler()); - MemOperand mem = CreateVecMemRegisters(instruction, ®_loc, /*is_load*/ true, &temps); - VRegister reg = VRegisterFrom(reg_loc); + Register scratch; switch (instruction->GetPackedType()) { + case Primitive::kPrimChar: + DCHECK_EQ(8u, instruction->GetVectorLength()); + // Special handling of compressed/uncompressed string load. + if (mirror::kUseStringCompression && instruction->IsStringCharAt()) { + vixl::aarch64::Label uncompressed_load, done; + // Test compression bit. + static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, + "Expecting 0=compressed, 1=uncompressed"); + uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); + Register length = temps.AcquireW(); + __ Ldr(length, HeapOperand(InputRegisterAt(instruction, 0), count_offset)); + __ Tbnz(length.W(), 0, &uncompressed_load); + temps.Release(length); // no longer needed + // Zero extend 8 compressed bytes into 8 chars. + __ Ldr(DRegisterFrom(locations->Out()).V8B(), + VecAddress(instruction, &temps, 1, /*is_string_char_at*/ true, &scratch)); + __ Uxtl(reg.V8H(), reg.V8B()); + __ B(&done); + if (scratch.IsValid()) { + temps.Release(scratch); // if used, no longer needed + } + // Load 8 direct uncompressed chars. + __ Bind(&uncompressed_load); + __ Ldr(reg, VecAddress(instruction, &temps, size, /*is_string_char_at*/ true, &scratch)); + __ Bind(&done); + return; + } + FALLTHROUGH_INTENDED; case Primitive::kPrimBoolean: case Primitive::kPrimByte: - case Primitive::kPrimChar: case Primitive::kPrimShort: case Primitive::kPrimInt: case Primitive::kPrimFloat: @@ -821,7 +851,7 @@ void InstructionCodeGeneratorARM64::VisitVecLoad(HVecLoad* instruction) { case Primitive::kPrimDouble: DCHECK_LE(2u, instruction->GetVectorLength()); DCHECK_LE(instruction->GetVectorLength(), 16u); - __ Ldr(reg, mem); + __ Ldr(reg, VecAddress(instruction, &temps, size, instruction->IsStringCharAt(), &scratch)); break; default: LOG(FATAL) << "Unsupported SIMD type"; @@ -834,10 +864,11 @@ void LocationsBuilderARM64::VisitVecStore(HVecStore* instruction) { } void InstructionCodeGeneratorARM64::VisitVecStore(HVecStore* instruction) { - Location reg_loc = Location::NoLocation(); + LocationSummary* locations = instruction->GetLocations(); + size_t size = Primitive::ComponentSize(instruction->GetPackedType()); + VRegister reg = VRegisterFrom(locations->InAt(2)); UseScratchRegisterScope temps(GetVIXLAssembler()); - MemOperand mem = CreateVecMemRegisters(instruction, ®_loc, /*is_load*/ false, &temps); - VRegister reg = VRegisterFrom(reg_loc); + Register scratch; switch (instruction->GetPackedType()) { case Primitive::kPrimBoolean: @@ -850,7 +881,7 @@ void InstructionCodeGeneratorARM64::VisitVecStore(HVecStore* instruction) { case Primitive::kPrimDouble: DCHECK_LE(2u, instruction->GetVectorLength()); DCHECK_LE(instruction->GetVectorLength(), 16u); - __ Str(reg, mem); + __ Str(reg, VecAddress(instruction, &temps, size, /*is_string_char_at*/ false, &scratch)); break; default: LOG(FATAL) << "Unsupported SIMD type"; diff --git a/compiler/optimizing/code_generator_vector_x86.cc b/compiler/optimizing/code_generator_vector_x86.cc index 013b092b5a..5bb19c193c 100644 --- a/compiler/optimizing/code_generator_vector_x86.cc +++ b/compiler/optimizing/code_generator_vector_x86.cc @@ -201,6 +201,7 @@ void InstructionCodeGeneratorX86::VisitVecNeg(HVecNeg* instruction) { void LocationsBuilderX86::VisitVecAbs(HVecAbs* instruction) { CreateVecUnOpLocations(GetGraph()->GetArena(), instruction); + // Integral-abs requires a temporary for the comparison. if (instruction->GetPackedType() == Primitive::kPrimInt) { instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister()); } @@ -766,16 +767,10 @@ static void CreateVecMemLocations(ArenaAllocator* arena, } } -// Helper to set up registers and address for vector memory operations. -static Address CreateVecMemRegisters(HVecMemoryOperation* instruction, - Location* reg_loc, - bool is_load) { - LocationSummary* locations = instruction->GetLocations(); +// Helper to construct address for vector memory operations. +static Address VecAddress(LocationSummary* locations, size_t size, bool is_string_char_at) { Location base = locations->InAt(0); Location index = locations->InAt(1); - *reg_loc = is_load ? locations->Out() : locations->InAt(2); - size_t size = Primitive::ComponentSize(instruction->GetPackedType()); - uint32_t offset = mirror::Array::DataOffset(size).Uint32Value(); ScaleFactor scale = TIMES_1; switch (size) { case 2: scale = TIMES_2; break; @@ -783,22 +778,53 @@ static Address CreateVecMemRegisters(HVecMemoryOperation* instruction, case 8: scale = TIMES_8; break; default: break; } + uint32_t offset = is_string_char_at + ? mirror::String::ValueOffset().Uint32Value() + : mirror::Array::DataOffset(size).Uint32Value(); return CodeGeneratorX86::ArrayAddress(base.AsRegister<Register>(), index, scale, offset); } void LocationsBuilderX86::VisitVecLoad(HVecLoad* instruction) { CreateVecMemLocations(GetGraph()->GetArena(), instruction, /*is_load*/ true); + // String load requires a temporary for the compressed load. + if (mirror::kUseStringCompression && instruction->IsStringCharAt()) { + instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister()); + } } void InstructionCodeGeneratorX86::VisitVecLoad(HVecLoad* instruction) { - Location reg_loc = Location::NoLocation(); - Address address = CreateVecMemRegisters(instruction, ®_loc, /*is_load*/ true); - XmmRegister reg = reg_loc.AsFpuRegister<XmmRegister>(); + LocationSummary* locations = instruction->GetLocations(); + size_t size = Primitive::ComponentSize(instruction->GetPackedType()); + Address address = VecAddress(locations, size, instruction->IsStringCharAt()); + XmmRegister reg = locations->Out().AsFpuRegister<XmmRegister>(); bool is_aligned16 = instruction->GetAlignment().IsAlignedAt(16); switch (instruction->GetPackedType()) { + case Primitive::kPrimChar: + DCHECK_EQ(8u, instruction->GetVectorLength()); + // Special handling of compressed/uncompressed string load. + if (mirror::kUseStringCompression && instruction->IsStringCharAt()) { + NearLabel done, not_compressed; + XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); + // Test compression bit. + static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, + "Expecting 0=compressed, 1=uncompressed"); + uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); + __ testb(Address(locations->InAt(0).AsRegister<Register>(), count_offset), Immediate(1)); + __ j(kNotZero, ¬_compressed); + // Zero extend 8 compressed bytes into 8 chars. + __ movsd(reg, VecAddress(locations, 1, /*is_string_char_at*/ true)); + __ pxor(tmp, tmp); + __ punpcklbw(reg, tmp); + __ jmp(&done); + // Load 4 direct uncompressed chars. + __ Bind(¬_compressed); + is_aligned16 ? __ movdqa(reg, address) : __ movdqu(reg, address); + __ Bind(&done); + return; + } + FALLTHROUGH_INTENDED; case Primitive::kPrimBoolean: case Primitive::kPrimByte: - case Primitive::kPrimChar: case Primitive::kPrimShort: case Primitive::kPrimInt: case Primitive::kPrimLong: @@ -825,9 +851,10 @@ void LocationsBuilderX86::VisitVecStore(HVecStore* instruction) { } void InstructionCodeGeneratorX86::VisitVecStore(HVecStore* instruction) { - Location reg_loc = Location::NoLocation(); - Address address = CreateVecMemRegisters(instruction, ®_loc, /*is_load*/ false); - XmmRegister reg = reg_loc.AsFpuRegister<XmmRegister>(); + LocationSummary* locations = instruction->GetLocations(); + size_t size = Primitive::ComponentSize(instruction->GetPackedType()); + Address address = VecAddress(locations, size, /*is_string_char_at*/ false); + XmmRegister reg = locations->InAt(2).AsFpuRegister<XmmRegister>(); bool is_aligned16 = instruction->GetAlignment().IsAlignedAt(16); switch (instruction->GetPackedType()) { case Primitive::kPrimBoolean: diff --git a/compiler/optimizing/code_generator_vector_x86_64.cc b/compiler/optimizing/code_generator_vector_x86_64.cc index 66f19a4376..6d4aae86e6 100644 --- a/compiler/optimizing/code_generator_vector_x86_64.cc +++ b/compiler/optimizing/code_generator_vector_x86_64.cc @@ -194,6 +194,7 @@ void InstructionCodeGeneratorX86_64::VisitVecNeg(HVecNeg* instruction) { void LocationsBuilderX86_64::VisitVecAbs(HVecAbs* instruction) { CreateVecUnOpLocations(GetGraph()->GetArena(), instruction); + // Integral-abs requires a temporary for the comparison. if (instruction->GetPackedType() == Primitive::kPrimInt) { instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister()); } @@ -755,16 +756,10 @@ static void CreateVecMemLocations(ArenaAllocator* arena, } } -// Helper to set up registers and address for vector memory operations. -static Address CreateVecMemRegisters(HVecMemoryOperation* instruction, - Location* reg_loc, - bool is_load) { - LocationSummary* locations = instruction->GetLocations(); +// Helper to construct address for vector memory operations. +static Address VecAddress(LocationSummary* locations, size_t size, bool is_string_char_at) { Location base = locations->InAt(0); Location index = locations->InAt(1); - *reg_loc = is_load ? locations->Out() : locations->InAt(2); - size_t size = Primitive::ComponentSize(instruction->GetPackedType()); - uint32_t offset = mirror::Array::DataOffset(size).Uint32Value(); ScaleFactor scale = TIMES_1; switch (size) { case 2: scale = TIMES_2; break; @@ -772,22 +767,53 @@ static Address CreateVecMemRegisters(HVecMemoryOperation* instruction, case 8: scale = TIMES_8; break; default: break; } + uint32_t offset = is_string_char_at + ? mirror::String::ValueOffset().Uint32Value() + : mirror::Array::DataOffset(size).Uint32Value(); return CodeGeneratorX86_64::ArrayAddress(base.AsRegister<CpuRegister>(), index, scale, offset); } void LocationsBuilderX86_64::VisitVecLoad(HVecLoad* instruction) { CreateVecMemLocations(GetGraph()->GetArena(), instruction, /*is_load*/ true); + // String load requires a temporary for the compressed load. + if (mirror::kUseStringCompression && instruction->IsStringCharAt()) { + instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister()); + } } void InstructionCodeGeneratorX86_64::VisitVecLoad(HVecLoad* instruction) { - Location reg_loc = Location::NoLocation(); - Address address = CreateVecMemRegisters(instruction, ®_loc, /*is_load*/ true); - XmmRegister reg = reg_loc.AsFpuRegister<XmmRegister>(); + LocationSummary* locations = instruction->GetLocations(); + size_t size = Primitive::ComponentSize(instruction->GetPackedType()); + Address address = VecAddress(locations, size, instruction->IsStringCharAt()); + XmmRegister reg = locations->Out().AsFpuRegister<XmmRegister>(); bool is_aligned16 = instruction->GetAlignment().IsAlignedAt(16); switch (instruction->GetPackedType()) { + case Primitive::kPrimChar: + DCHECK_EQ(8u, instruction->GetVectorLength()); + // Special handling of compressed/uncompressed string load. + if (mirror::kUseStringCompression && instruction->IsStringCharAt()) { + NearLabel done, not_compressed; + XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); + // Test compression bit. + static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, + "Expecting 0=compressed, 1=uncompressed"); + uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); + __ testb(Address(locations->InAt(0).AsRegister<CpuRegister>(), count_offset), Immediate(1)); + __ j(kNotZero, ¬_compressed); + // Zero extend 8 compressed bytes into 8 chars. + __ movsd(reg, VecAddress(locations, 1, /*is_string_char_at*/ true)); + __ pxor(tmp, tmp); + __ punpcklbw(reg, tmp); + __ jmp(&done); + // Load 8 direct uncompressed chars. + __ Bind(¬_compressed); + is_aligned16 ? __ movdqa(reg, address) : __ movdqu(reg, address); + __ Bind(&done); + return; + } + FALLTHROUGH_INTENDED; case Primitive::kPrimBoolean: case Primitive::kPrimByte: - case Primitive::kPrimChar: case Primitive::kPrimShort: case Primitive::kPrimInt: case Primitive::kPrimLong: @@ -814,9 +840,10 @@ void LocationsBuilderX86_64::VisitVecStore(HVecStore* instruction) { } void InstructionCodeGeneratorX86_64::VisitVecStore(HVecStore* instruction) { - Location reg_loc = Location::NoLocation(); - Address address = CreateVecMemRegisters(instruction, ®_loc, /*is_load*/ false); - XmmRegister reg = reg_loc.AsFpuRegister<XmmRegister>(); + LocationSummary* locations = instruction->GetLocations(); + size_t size = Primitive::ComponentSize(instruction->GetPackedType()); + Address address = VecAddress(locations, size, /*is_string_char_at*/ false); + XmmRegister reg = locations->InAt(2).AsFpuRegister<XmmRegister>(); bool is_aligned16 = instruction->GetAlignment().IsAlignedAt(16); switch (instruction->GetPackedType()) { case Primitive::kPrimBoolean: diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 08a752f1d2..cf2d5cbee3 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -384,8 +384,14 @@ class DeoptimizationSlowPathX86 : public SlowPathCode { void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen); __ Bind(GetEntryLabel()); + LocationSummary* locations = instruction_->GetLocations(); + SaveLiveRegisters(codegen, locations); + InvokeRuntimeCallingConvention calling_convention; + x86_codegen->Load32BitValue( + calling_convention.GetRegisterAt(0), + static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind())); x86_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this); - CheckEntrypointTypes<kQuickDeoptimize, void, void>(); + CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>(); } const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathX86"; } @@ -1688,7 +1694,10 @@ void InstructionCodeGeneratorX86::VisitIf(HIf* if_instr) { void LocationsBuilderX86::VisitDeoptimize(HDeoptimize* deoptimize) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath); - locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. + InvokeRuntimeCallingConvention calling_convention; + RegisterSet caller_saves = RegisterSet::Empty(); + caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + locations->SetCustomSlowPathCallerSaves(caller_saves); if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) { locations->SetInAt(0, Location::Any()); } @@ -2057,6 +2066,15 @@ void InstructionCodeGeneratorX86::VisitDoubleConstant(HDoubleConstant* constant // Will be generated at use site. } +void LocationsBuilderX86::VisitConstructorFence(HConstructorFence* constructor_fence) { + constructor_fence->SetLocations(nullptr); +} + +void InstructionCodeGeneratorX86::VisitConstructorFence( + HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) { + codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore); +} + void LocationsBuilderX86::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) { memory_barrier->SetLocations(nullptr); } diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index ff6e099d12..f2ed52b5a5 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -397,8 +397,14 @@ class DeoptimizationSlowPathX86_64 : public SlowPathCode { void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); __ Bind(GetEntryLabel()); + LocationSummary* locations = instruction_->GetLocations(); + SaveLiveRegisters(codegen, locations); + InvokeRuntimeCallingConvention calling_convention; + x86_64_codegen->Load32BitValue( + CpuRegister(calling_convention.GetRegisterAt(0)), + static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind())); x86_64_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this); - CheckEntrypointTypes<kQuickDeoptimize, void, void>(); + CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>(); } const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathX86_64"; } @@ -1710,7 +1716,10 @@ void InstructionCodeGeneratorX86_64::VisitIf(HIf* if_instr) { void LocationsBuilderX86_64::VisitDeoptimize(HDeoptimize* deoptimize) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath); - locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. + InvokeRuntimeCallingConvention calling_convention; + RegisterSet caller_saves = RegisterSet::Empty(); + caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + locations->SetCustomSlowPathCallerSaves(caller_saves); if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) { locations->SetInAt(0, Location::Any()); } @@ -2165,6 +2174,15 @@ void InstructionCodeGeneratorX86_64::VisitDoubleConstant( // Will be generated at use site. } +void LocationsBuilderX86_64::VisitConstructorFence(HConstructorFence* constructor_fence) { + constructor_fence->SetLocations(nullptr); +} + +void InstructionCodeGeneratorX86_64::VisitConstructorFence( + HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) { + codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore); +} + void LocationsBuilderX86_64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) { memory_barrier->SetLocations(nullptr); } diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc index 34b52a87b5..aea901dec7 100644 --- a/compiler/optimizing/graph_checker.cc +++ b/compiler/optimizing/graph_checker.cc @@ -338,14 +338,21 @@ void GraphChecker::VisitInstruction(HInstruction* instruction) { // Ensure the inputs of `instruction` are defined in a block of the graph. for (HInstruction* input : instruction->GetInputs()) { - const HInstructionList& list = input->IsPhi() - ? input->GetBlock()->GetPhis() - : input->GetBlock()->GetInstructions(); - if (!list.Contains(input)) { - AddError(StringPrintf("Input %d of instruction %d is not defined " - "in a basic block of the control-flow graph.", + if (input->GetBlock() == nullptr) { + AddError(StringPrintf("Input %d of instruction %d is not in any " + "basic block of the control-flow graph.", input->GetId(), instruction->GetId())); + } else { + const HInstructionList& list = input->IsPhi() + ? input->GetBlock()->GetPhis() + : input->GetBlock()->GetInstructions(); + if (!list.Contains(input)) { + AddError(StringPrintf("Input %d of instruction %d is not defined " + "in a basic block of the control-flow graph.", + input->GetId(), + instruction->GetId())); + } } } @@ -497,8 +504,7 @@ void GraphChecker::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { "has a null pointer as last input.", invoke->DebugName(), invoke->GetId())); - } - if (!last_input->IsClinitCheck() && !last_input->IsLoadClass()) { + } else if (!last_input->IsClinitCheck() && !last_input->IsLoadClass()) { AddError(StringPrintf("Static invoke %s:%d marked as having an explicit clinit check " "has a last instruction (%s:%d) which is neither a clinit check " "nor a load class instruction.", diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc index 1f8a58cdaa..8674e727bb 100644 --- a/compiler/optimizing/inliner.cc +++ b/compiler/optimizing/inliner.cc @@ -140,6 +140,14 @@ void HInliner::Run() { DCHECK_NE(total_number_of_instructions_, 0u); DCHECK_NE(inlining_budget_, 0u); + // If we're compiling with a core image (which is only used for + // test purposes), honor inlining directives in method names: + // - if a method's name contains the substring "$inline$", ensure + // that this method is actually inlined; + // - if a method's name contains the substring "$noinline$", do not + // inline that method. + const bool honor_inlining_directives = IsCompilingWithCoreImage(); + // Keep a copy of all blocks when starting the visit. ArenaVector<HBasicBlock*> blocks = graph_->GetReversePostOrder(); DCHECK(!blocks.empty()); @@ -152,7 +160,7 @@ void HInliner::Run() { HInvoke* call = instruction->AsInvoke(); // As long as the call is not intrinsified, it is worth trying to inline. if (call != nullptr && call->GetIntrinsic() == Intrinsics::kNone) { - if (kIsDebugBuild && IsCompilingWithCoreImage()) { + if (honor_inlining_directives) { // Debugging case: directives in method names control or assert on inlining. std::string callee_name = outer_compilation_unit_.GetDexFile()->PrettyMethod( call->GetDexMethodIndex(), /* with_signature */ false); @@ -775,7 +783,7 @@ void HInliner::AddCHAGuard(HInstruction* invoke_instruction, HInstruction* compare = new (graph_->GetArena()) HNotEqual( deopt_flag, graph_->GetIntConstant(0, dex_pc)); HInstruction* deopt = new (graph_->GetArena()) HDeoptimize( - graph_->GetArena(), compare, HDeoptimize::Kind::kInline, dex_pc); + graph_->GetArena(), compare, DeoptimizationKind::kCHA, dex_pc); if (cursor != nullptr) { bb_cursor->InsertInstructionAfter(deopt_flag, cursor); @@ -809,7 +817,17 @@ HInstruction* HInliner::AddTypeGuard(HInstruction* receiver, } const DexFile& caller_dex_file = *caller_compilation_unit_.GetDexFile(); - bool is_referrer = (klass.Get() == outermost_graph_->GetArtMethod()->GetDeclaringClass()); + bool is_referrer; + ArtMethod* outermost_art_method = outermost_graph_->GetArtMethod(); + if (outermost_art_method == nullptr) { + DCHECK(Runtime::Current()->IsAotCompiler()); + // We are in AOT mode and we don't have an ART method to determine + // if the inlined method belongs to the referrer. Assume it doesn't. + is_referrer = false; + } else { + is_referrer = klass.Get() == outermost_art_method->GetDeclaringClass(); + } + // Note that we will just compare the classes, so we don't need Java semantics access checks. // Note that the type index and the dex file are relative to the method this type guard is // inlined into. @@ -842,7 +860,9 @@ HInstruction* HInliner::AddTypeGuard(HInstruction* receiver, graph_->GetArena(), compare, receiver, - HDeoptimize::Kind::kInline, + Runtime::Current()->IsAotCompiler() + ? DeoptimizationKind::kAotInlineCache + : DeoptimizationKind::kJitInlineCache, invoke_instruction->GetDexPc()); bb_cursor->InsertInstructionAfter(deoptimize, compare); deoptimize->CopyEnvironmentFrom(invoke_instruction->GetEnvironment()); @@ -1129,7 +1149,7 @@ bool HInliner::TryInlinePolymorphicCallToSameTarget( graph_->GetArena(), compare, receiver, - HDeoptimize::Kind::kInline, + DeoptimizationKind::kJitSameTarget, invoke_instruction->GetDexPc()); bb_cursor->InsertInstructionAfter(deoptimize, compare); deoptimize->CopyEnvironmentFrom(invoke_instruction->GetEnvironment()); @@ -1462,8 +1482,13 @@ bool HInliner::TryPatternSubstitution(HInvoke* invoke_instruction, } } if (needs_constructor_barrier) { - HMemoryBarrier* barrier = new (graph_->GetArena()) HMemoryBarrier(kStoreStore, kNoDexPc); - invoke_instruction->GetBlock()->InsertInstructionBefore(barrier, invoke_instruction); + // See CompilerDriver::RequiresConstructorBarrier for more details. + DCHECK(obj != nullptr) << "only non-static methods can have a constructor fence"; + + HConstructorFence* constructor_fence = + new (graph_->GetArena()) HConstructorFence(obj, kNoDexPc, graph_->GetArena()); + invoke_instruction->GetBlock()->InsertInstructionBefore(constructor_fence, + invoke_instruction); } *return_replacement = nullptr; break; diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc index 978c6a2d71..8b79da8c73 100644 --- a/compiler/optimizing/instruction_builder.cc +++ b/compiler/optimizing/instruction_builder.cc @@ -451,10 +451,13 @@ void HInstructionBuilder::InitializeParameters() { referrer_method_id.class_idx_, parameter_index++, Primitive::kPrimNot, - true); + /* is_this */ true); AppendInstruction(parameter); UpdateLocal(locals_index++, parameter); number_of_parameters--; + current_this_parameter_ = parameter; + } else { + DCHECK(current_this_parameter_ == nullptr); } const DexFile::ProtoId& proto = dex_file_->GetMethodPrototype(referrer_method_id); @@ -465,7 +468,7 @@ void HInstructionBuilder::InitializeParameters() { arg_types->GetTypeItem(shorty_pos - 1).type_idx_, parameter_index++, Primitive::GetType(shorty[shorty_pos]), - false); + /* is_this */ false); ++shorty_pos; AppendInstruction(parameter); // Store the parameter value in the local that the dex code will use @@ -588,6 +591,8 @@ void HInstructionBuilder::Binop_22b(const Instruction& instruction, bool reverse UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction()); } +// Does the method being compiled need any constructor barriers being inserted? +// (Always 'false' for methods that aren't <init>.) static bool RequiresConstructorBarrier(const DexCompilationUnit* cu, CompilerDriver* driver) { // Can be null in unit tests only. if (UNLIKELY(cu == nullptr)) { @@ -596,6 +601,11 @@ static bool RequiresConstructorBarrier(const DexCompilationUnit* cu, CompilerDri Thread* self = Thread::Current(); return cu->IsConstructor() + && !cu->IsStatic() + // RequiresConstructorBarrier must only be queried for <init> methods; + // it's effectively "false" for every other method. + // + // See CompilerDriver::RequiresConstructBarrier for more explanation. && driver->RequiresConstructorBarrier(self, cu->GetDexFile(), cu->GetClassDefIndex()); } @@ -639,13 +649,24 @@ void HInstructionBuilder::BuildReturn(const Instruction& instruction, Primitive::Type type, uint32_t dex_pc) { if (type == Primitive::kPrimVoid) { + // Only <init> (which is a return-void) could possibly have a constructor fence. // This may insert additional redundant constructor fences from the super constructors. // TODO: remove redundant constructor fences (b/36656456). if (RequiresConstructorBarrier(dex_compilation_unit_, compiler_driver_)) { - AppendInstruction(new (arena_) HMemoryBarrier(kStoreStore, dex_pc)); + // Compiling instance constructor. + if (kIsDebugBuild) { + std::string method_name = graph_->GetMethodName(); + CHECK_EQ(std::string("<init>"), method_name); + } + + HInstruction* fence_target = current_this_parameter_; + DCHECK(fence_target != nullptr); + + AppendInstruction(new (arena_) HConstructorFence(fence_target, dex_pc, arena_)); } AppendInstruction(new (arena_) HReturnVoid(dex_pc)); } else { + DCHECK(!RequiresConstructorBarrier(dex_compilation_unit_, compiler_driver_)); HInstruction* value = LoadLocal(instruction.VRegA(), type); AppendInstruction(new (arena_) HReturn(value, dex_pc)); } diff --git a/compiler/optimizing/instruction_builder.h b/compiler/optimizing/instruction_builder.h index 7fdc1883ca..2fb5c7b94d 100644 --- a/compiler/optimizing/instruction_builder.h +++ b/compiler/optimizing/instruction_builder.h @@ -62,6 +62,7 @@ class HInstructionBuilder : public ValueObject { current_block_(nullptr), current_locals_(nullptr), latest_result_(nullptr), + current_this_parameter_(nullptr), compiler_driver_(driver), code_generator_(code_generator), dex_compilation_unit_(dex_compilation_unit), @@ -325,6 +326,11 @@ class HInstructionBuilder : public ValueObject { HBasicBlock* current_block_; ArenaVector<HInstruction*>* current_locals_; HInstruction* latest_result_; + // Current "this" parameter. + // Valid only after InitializeParameters() finishes. + // * Null for static methods. + // * Non-null for instance methods. + HParameterValue* current_this_parameter_; CompilerDriver* const compiler_driver_; diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc index 2dcc12e294..2cedde900e 100644 --- a/compiler/optimizing/instruction_simplifier.cc +++ b/compiler/optimizing/instruction_simplifier.cc @@ -257,7 +257,8 @@ void InstructionSimplifierVisitor::VisitShift(HBinaryOperation* instruction) { if (shift_amount->IsConstant()) { int64_t cst = Int64FromConstant(shift_amount->AsConstant()); - if ((cst & implicit_mask) == 0) { + int64_t masked_cst = cst & implicit_mask; + if (masked_cst == 0) { // Replace code looking like // SHL dst, value, 0 // with @@ -266,6 +267,17 @@ void InstructionSimplifierVisitor::VisitShift(HBinaryOperation* instruction) { instruction->GetBlock()->RemoveInstruction(instruction); RecordSimplification(); return; + } else if (masked_cst != cst) { + // Replace code looking like + // SHL dst, value, cst + // where cst exceeds maximum distance with the equivalent + // SHL dst, value, cst & implicit_mask + // (as defined by shift semantics). This ensures other + // optimizations do not need to special case for such situations. + DCHECK_EQ(shift_amount->GetType(), Primitive::kPrimInt); + instruction->ReplaceInput(GetGraph()->GetIntConstant(masked_cst), /* index */ 1); + RecordSimplification(); + return; } } diff --git a/compiler/optimizing/instruction_simplifier_shared.cc b/compiler/optimizing/instruction_simplifier_shared.cc index 7d1f146587..c39e5f4d3b 100644 --- a/compiler/optimizing/instruction_simplifier_shared.cc +++ b/compiler/optimizing/instruction_simplifier_shared.cc @@ -247,6 +247,7 @@ bool TryExtractArrayAccessAddress(HInstruction* access, access->GetType() == Primitive::kPrimNot) { // For object arrays, the read barrier instrumentation requires // the original array pointer. + // TODO: This can be relaxed for Baker CC. return false; } diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc index 750f9cc213..69cf9a126f 100644 --- a/compiler/optimizing/intrinsics_arm.cc +++ b/compiler/optimizing/intrinsics_arm.cc @@ -1010,17 +1010,14 @@ static void GenCas(HInvoke* invoke, Primitive::Type type, CodeGeneratorARM* code if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { // Need to make sure the reference stored in the field is a to-space // one before attempting the CAS or the CAS could fail incorrectly. - codegen->GenerateReferenceLoadWithBakerReadBarrier( + codegen->UpdateReferenceFieldWithBakerReadBarrier( invoke, out_loc, // Unused, used only as a "temporary" within the read barrier. base, - /* offset */ 0u, - /* index */ offset_loc, - ScaleFactor::TIMES_1, + /* field_offset */ offset_loc, tmp_ptr_loc, /* needs_null_check */ false, - /* always_update_field */ true, - &tmp); + tmp); } } @@ -1648,6 +1645,8 @@ void IntrinsicLocationsBuilderARM::VisitSystemArrayCopy(HInvoke* invoke) { // is clobbered by ReadBarrierMarkRegX entry points). Get an extra // temporary register from the register allocator. locations->AddTemp(Location::RequiresRegister()); + CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen_); + arm_codegen->MaybeAddBakerCcEntrypointTempForFields(locations); } } diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index 4d360158a2..65a82229e9 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -1154,17 +1154,14 @@ static void GenCas(HInvoke* invoke, Primitive::Type type, CodeGeneratorARM64* co Register temp = WRegisterFrom(locations->GetTemp(0)); // Need to make sure the reference stored in the field is a to-space // one before attempting the CAS or the CAS could fail incorrectly. - codegen->GenerateReferenceLoadWithBakerReadBarrier( + codegen->UpdateReferenceFieldWithBakerReadBarrier( invoke, out_loc, // Unused, used only as a "temporary" within the read barrier. base, - /* offset */ 0u, - /* index */ offset_loc, - /* scale_factor */ 0u, + /* field_offset */ offset_loc, temp, /* needs_null_check */ false, - /* use_load_acquire */ false, - /* always_update_field */ true); + /* use_load_acquire */ false); } } diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc index fd8a37ae05..356d5bcb0c 100644 --- a/compiler/optimizing/intrinsics_arm_vixl.cc +++ b/compiler/optimizing/intrinsics_arm_vixl.cc @@ -1347,17 +1347,14 @@ static void GenCas(HInvoke* invoke, Primitive::Type type, CodeGeneratorARMVIXL* if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { // Need to make sure the reference stored in the field is a to-space // one before attempting the CAS or the CAS could fail incorrectly. - codegen->GenerateReferenceLoadWithBakerReadBarrier( + codegen->UpdateReferenceFieldWithBakerReadBarrier( invoke, out_loc, // Unused, used only as a "temporary" within the read barrier. base, - /* offset */ 0u, - /* index */ offset_loc, - ScaleFactor::TIMES_1, + /* field_offset */ offset_loc, tmp_ptr_loc, /* needs_null_check */ false, - /* always_update_field */ true, - &tmp); + tmp); } } @@ -2026,6 +2023,8 @@ void IntrinsicLocationsBuilderARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) { // is clobbered by ReadBarrierMarkRegX entry points). Get an extra // temporary register from the register allocator. locations->AddTemp(Location::RequiresRegister()); + CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen_); + arm_codegen->MaybeAddBakerCcEntrypointTempForFields(locations); } } diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc index 41df56b514..abf5b122c8 100644 --- a/compiler/optimizing/intrinsics_mips.cc +++ b/compiler/optimizing/intrinsics_mips.cc @@ -32,7 +32,7 @@ namespace art { namespace mips { IntrinsicLocationsBuilderMIPS::IntrinsicLocationsBuilderMIPS(CodeGeneratorMIPS* codegen) - : arena_(codegen->GetGraph()->GetArena()) { + : codegen_(codegen), arena_(codegen->GetGraph()->GetArena()) { } MipsAssembler* IntrinsicCodeGeneratorMIPS::GetAssembler() { @@ -1525,6 +1525,9 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall), kIntrinsified); + if (can_call && kUseBakerReadBarrier) { + locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. + } locations->SetInAt(0, Location::NoLocation()); // Unused receiver. locations->SetInAt(1, Location::RequiresRegister()); locations->SetInAt(2, Location::RequiresRegister()); @@ -3133,6 +3136,89 @@ void IntrinsicCodeGeneratorMIPS::VisitSystemArrayCopyChar(HInvoke* invoke) { __ Bind(slow_path->GetExitLabel()); } +// long java.lang.Integer.valueOf(long) +void IntrinsicLocationsBuilderMIPS::VisitIntegerValueOf(HInvoke* invoke) { + InvokeRuntimeCallingConvention calling_convention; + IntrinsicVisitor::ComputeIntegerValueOfLocations( + invoke, + codegen_, + calling_convention.GetReturnLocation(Primitive::kPrimNot), + Location::RegisterLocation(calling_convention.GetRegisterAt(0))); +} + +void IntrinsicCodeGeneratorMIPS::VisitIntegerValueOf(HInvoke* invoke) { + IntrinsicVisitor::IntegerValueOfInfo info = IntrinsicVisitor::ComputeIntegerValueOfInfo(); + LocationSummary* locations = invoke->GetLocations(); + MipsAssembler* assembler = GetAssembler(); + InstructionCodeGeneratorMIPS* icodegen = + down_cast<InstructionCodeGeneratorMIPS*>(codegen_->GetInstructionVisitor()); + + Register out = locations->Out().AsRegister<Register>(); + InvokeRuntimeCallingConvention calling_convention; + if (invoke->InputAt(0)->IsConstant()) { + int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue(); + if (value >= info.low && value <= info.high) { + // Just embed the j.l.Integer in the code. + ScopedObjectAccess soa(Thread::Current()); + mirror::Object* boxed = info.cache->Get(value + (-info.low)); + DCHECK(boxed != nullptr && Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(boxed)); + uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(boxed)); + __ LoadConst32(out, address); + } else { + // Allocate and initialize a new j.l.Integer. + // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the + // JIT object table. + uint32_t address = + dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer)); + __ LoadConst32(calling_convention.GetRegisterAt(0), address); + codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc()); + CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); + __ StoreConstToOffset(kStoreWord, value, out, info.value_offset, TMP); + // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation + // one. + icodegen->GenerateMemoryBarrier(MemBarrierKind::kStoreStore); + } + } else { + Register in = locations->InAt(0).AsRegister<Register>(); + MipsLabel allocate, done; + int32_t count = static_cast<uint32_t>(info.high) - info.low + 1; + + // Is (info.low <= in) && (in <= info.high)? + __ Addiu32(out, in, -info.low); + // As unsigned quantities is out < (info.high - info.low + 1)? + if (IsInt<16>(count)) { + __ Sltiu(AT, out, count); + } else { + __ LoadConst32(AT, count); + __ Sltu(AT, out, AT); + } + // Branch if out >= (info.high - info.low + 1). + // This means that "in" is outside of the range [info.low, info.high]. + __ Beqz(AT, &allocate); + + // If the value is within the bounds, load the j.l.Integer directly from the array. + uint32_t data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value(); + uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.cache)); + __ LoadConst32(TMP, data_offset + address); + __ ShiftAndAdd(out, out, TMP, TIMES_4); + __ Lw(out, out, 0); + __ MaybeUnpoisonHeapReference(out); + __ B(&done); + + __ Bind(&allocate); + // Otherwise allocate and initialize a new j.l.Integer. + address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer)); + __ LoadConst32(calling_convention.GetRegisterAt(0), address); + codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc()); + CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); + __ StoreToOffset(kStoreWord, in, out, info.value_offset); + // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation + // one. + icodegen->GenerateMemoryBarrier(MemBarrierKind::kStoreStore); + __ Bind(&done); + } +} + // Unimplemented intrinsics. UNIMPLEMENTED_INTRINSIC(MIPS, MathCeil) @@ -3162,8 +3248,6 @@ UNIMPLEMENTED_INTRINSIC(MIPS, UnsafeGetAndSetInt) UNIMPLEMENTED_INTRINSIC(MIPS, UnsafeGetAndSetLong) UNIMPLEMENTED_INTRINSIC(MIPS, UnsafeGetAndSetObject) -UNIMPLEMENTED_INTRINSIC(MIPS, IntegerValueOf) - UNREACHABLE_INTRINSICS(MIPS) #undef __ diff --git a/compiler/optimizing/intrinsics_mips.h b/compiler/optimizing/intrinsics_mips.h index e134cb882e..eaadad2515 100644 --- a/compiler/optimizing/intrinsics_mips.h +++ b/compiler/optimizing/intrinsics_mips.h @@ -49,6 +49,7 @@ INTRINSICS_LIST(OPTIMIZING_INTRINSICS) bool TryDispatch(HInvoke* invoke); private: + CodeGeneratorMIPS* codegen_; ArenaAllocator* arena_; DISALLOW_COPY_AND_ASSIGN(IntrinsicLocationsBuilderMIPS); diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc index b57b41f686..9dce59b2af 100644 --- a/compiler/optimizing/intrinsics_mips64.cc +++ b/compiler/optimizing/intrinsics_mips64.cc @@ -32,7 +32,7 @@ namespace art { namespace mips64 { IntrinsicLocationsBuilderMIPS64::IntrinsicLocationsBuilderMIPS64(CodeGeneratorMIPS64* codegen) - : arena_(codegen->GetGraph()->GetArena()) { + : codegen_(codegen), arena_(codegen->GetGraph()->GetArena()) { } Mips64Assembler* IntrinsicCodeGeneratorMIPS64::GetAssembler() { @@ -1168,6 +1168,9 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall), kIntrinsified); + if (can_call && kUseBakerReadBarrier) { + locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. + } locations->SetInAt(0, Location::NoLocation()); // Unused receiver. locations->SetInAt(1, Location::RequiresRegister()); locations->SetInAt(2, Location::RequiresRegister()); @@ -2564,6 +2567,84 @@ void IntrinsicCodeGeneratorMIPS64::VisitMathTanh(HInvoke* invoke) { GenFPToFPCall(invoke, codegen_, kQuickTanh); } +// long java.lang.Integer.valueOf(long) +void IntrinsicLocationsBuilderMIPS64::VisitIntegerValueOf(HInvoke* invoke) { + InvokeRuntimeCallingConvention calling_convention; + IntrinsicVisitor::ComputeIntegerValueOfLocations( + invoke, + codegen_, + calling_convention.GetReturnLocation(Primitive::kPrimNot), + Location::RegisterLocation(calling_convention.GetRegisterAt(0))); +} + +void IntrinsicCodeGeneratorMIPS64::VisitIntegerValueOf(HInvoke* invoke) { + IntrinsicVisitor::IntegerValueOfInfo info = IntrinsicVisitor::ComputeIntegerValueOfInfo(); + LocationSummary* locations = invoke->GetLocations(); + Mips64Assembler* assembler = GetAssembler(); + InstructionCodeGeneratorMIPS64* icodegen = + down_cast<InstructionCodeGeneratorMIPS64*>(codegen_->GetInstructionVisitor()); + + GpuRegister out = locations->Out().AsRegister<GpuRegister>(); + InvokeRuntimeCallingConvention calling_convention; + if (invoke->InputAt(0)->IsConstant()) { + int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue(); + if (value >= info.low && value <= info.high) { + // Just embed the j.l.Integer in the code. + ScopedObjectAccess soa(Thread::Current()); + mirror::Object* boxed = info.cache->Get(value + (-info.low)); + DCHECK(boxed != nullptr && Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(boxed)); + uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(boxed)); + __ LoadConst64(out, address); + } else { + // Allocate and initialize a new j.l.Integer. + // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the + // JIT object table. + uint32_t address = + dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer)); + __ LoadConst64(calling_convention.GetRegisterAt(0), address); + codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc()); + CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); + __ StoreConstToOffset(kStoreWord, value, out, info.value_offset, TMP); + // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation + // one. + icodegen->GenerateMemoryBarrier(MemBarrierKind::kStoreStore); + } + } else { + GpuRegister in = locations->InAt(0).AsRegister<GpuRegister>(); + Mips64Label allocate, done; + int32_t count = static_cast<uint32_t>(info.high) - info.low + 1; + + // Is (info.low <= in) && (in <= info.high)? + __ Addiu32(out, in, -info.low); + // As unsigned quantities is out < (info.high - info.low + 1)? + __ LoadConst32(AT, count); + // Branch if out >= (info.high - info.low + 1). + // This means that "in" is outside of the range [info.low, info.high]. + __ Bgeuc(out, AT, &allocate); + + // If the value is within the bounds, load the j.l.Integer directly from the array. + uint32_t data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value(); + uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.cache)); + __ LoadConst64(TMP, data_offset + address); + __ Dlsa(out, out, TMP, TIMES_4); + __ Lwu(out, out, 0); + __ MaybeUnpoisonHeapReference(out); + __ Bc(&done); + + __ Bind(&allocate); + // Otherwise allocate and initialize a new j.l.Integer. + address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer)); + __ LoadConst64(calling_convention.GetRegisterAt(0), address); + codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc()); + CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); + __ StoreToOffset(kStoreWord, in, out, info.value_offset); + // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation + // one. + icodegen->GenerateMemoryBarrier(MemBarrierKind::kStoreStore); + __ Bind(&done); + } +} + UNIMPLEMENTED_INTRINSIC(MIPS64, ReferenceGetReferent) UNIMPLEMENTED_INTRINSIC(MIPS64, SystemArrayCopy) @@ -2583,8 +2664,6 @@ UNIMPLEMENTED_INTRINSIC(MIPS64, UnsafeGetAndSetInt) UNIMPLEMENTED_INTRINSIC(MIPS64, UnsafeGetAndSetLong) UNIMPLEMENTED_INTRINSIC(MIPS64, UnsafeGetAndSetObject) -UNIMPLEMENTED_INTRINSIC(MIPS64, IntegerValueOf) - UNREACHABLE_INTRINSICS(MIPS64) #undef __ diff --git a/compiler/optimizing/intrinsics_mips64.h b/compiler/optimizing/intrinsics_mips64.h index 5b95c26a21..179627ab20 100644 --- a/compiler/optimizing/intrinsics_mips64.h +++ b/compiler/optimizing/intrinsics_mips64.h @@ -49,6 +49,7 @@ INTRINSICS_LIST(OPTIMIZING_INTRINSICS) bool TryDispatch(HInvoke* invoke); private: + CodeGeneratorMIPS64* codegen_; ArenaAllocator* arena_; DISALLOW_COPY_AND_ASSIGN(IntrinsicLocationsBuilderMIPS64); diff --git a/compiler/optimizing/load_store_elimination.cc b/compiler/optimizing/load_store_elimination.cc index 48699b33ae..8d8cc93b9b 100644 --- a/compiler/optimizing/load_store_elimination.cc +++ b/compiler/optimizing/load_store_elimination.cc @@ -566,14 +566,22 @@ class LSEVisitor : public HGraphVisitor { store->GetBlock()->RemoveInstruction(store); } - // Eliminate allocations that are not used. + // Eliminate singleton-classified instructions: + // * - Constructor fences (they never escape this thread). + // * - Allocations (if they are unused). for (HInstruction* new_instance : singleton_new_instances_) { + HConstructorFence::RemoveConstructorFences(new_instance); + if (!new_instance->HasNonEnvironmentUses()) { new_instance->RemoveEnvironmentUsers(); new_instance->GetBlock()->RemoveInstruction(new_instance); } } for (HInstruction* new_array : singleton_new_arrays_) { + // TODO: Delete constructor fences for new-array + // In the future HNewArray instructions will have HConstructorFence's for them. + // HConstructorFence::RemoveConstructorFences(new_array); + if (!new_array->HasNonEnvironmentUses()) { new_array->RemoveEnvironmentUsers(); new_array->GetBlock()->RemoveInstruction(new_array); diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc index 5a95abdb50..881802d714 100644 --- a/compiler/optimizing/loop_optimization.cc +++ b/compiler/optimizing/loop_optimization.cc @@ -71,7 +71,7 @@ static bool IsSignExtensionAndGet(HInstruction* instruction, // extension when represented in the *width* of the given narrower data type // (the fact that char normally zero extends does not matter here). int64_t value = 0; - if (IsInt64AndGet(instruction, &value)) { + if (IsInt64AndGet(instruction, /*out*/ &value)) { switch (type) { case Primitive::kPrimByte: if (std::numeric_limits<int8_t>::min() <= value && @@ -119,7 +119,7 @@ static bool IsZeroExtensionAndGet(HInstruction* instruction, // extension when represented in the *width* of the given narrower data type // (the fact that byte/short normally sign extend does not matter here). int64_t value = 0; - if (IsInt64AndGet(instruction, &value)) { + if (IsInt64AndGet(instruction, /*out*/ &value)) { switch (type) { case Primitive::kPrimByte: if (std::numeric_limits<uint8_t>::min() <= value && @@ -733,12 +733,6 @@ bool HLoopOptimization::VectorizeUse(LoopNode* node, } return true; } else if (instruction->IsArrayGet()) { - // Strings are different, with a different offset to the actual data - // and some compressed to save memory. For now, all cases are rejected - // to avoid the complexity. - if (instruction->AsArrayGet()->IsStringCharAt()) { - return false; - } // Accept a right-hand-side array base[index] for // (1) exact matching vector type, // (2) loop-invariant base, @@ -839,17 +833,17 @@ bool HLoopOptimization::VectorizeUse(LoopNode* node, // TODO: accept symbolic, albeit loop invariant shift factors. HInstruction* opa = instruction->InputAt(0); HInstruction* opb = instruction->InputAt(1); - if (VectorizeUse(node, opa, generate_code, type, restrictions) && opb->IsIntConstant()) { - if (generate_code) { - // Make sure shift factor only looks at lower bits, as defined for sequential shifts. - // Note that even the narrower SIMD shifts do the right thing after that. - int32_t mask = (instruction->GetType() == Primitive::kPrimLong) - ? kMaxLongShiftDistance - : kMaxIntShiftDistance; - HInstruction* s = graph_->GetIntConstant(opb->AsIntConstant()->GetValue() & mask); - GenerateVecOp(instruction, vector_map_->Get(opa), s, type); + int64_t distance = 0; + if (VectorizeUse(node, opa, generate_code, type, restrictions) && + IsInt64AndGet(opb, /*out*/ &distance)) { + // Restrict shift distance to packed data type width. + int64_t max_distance = Primitive::ComponentSize(type) * 8; + if (0 <= distance && distance < max_distance) { + if (generate_code) { + GenerateVecOp(instruction, vector_map_->Get(opa), opb, type); + } + return true; } - return true; } } else if (instruction->IsInvokeStaticOrDirect()) { // Accept particular intrinsics. @@ -1001,8 +995,9 @@ void HLoopOptimization::GenerateVecMem(HInstruction* org, vector = new (global_allocator_) HVecStore( global_allocator_, org->InputAt(0), opa, opb, type, vector_length_); } else { + bool is_string_char_at = org->AsArrayGet()->IsStringCharAt(); vector = new (global_allocator_) HVecLoad( - global_allocator_, org->InputAt(0), opa, type, vector_length_); + global_allocator_, org->InputAt(0), opa, type, vector_length_, is_string_char_at); } } else { // Scalar store or load. @@ -1010,7 +1005,9 @@ void HLoopOptimization::GenerateVecMem(HInstruction* org, if (opb != nullptr) { vector = new (global_allocator_) HArraySet(org->InputAt(0), opa, opb, type, kNoDexPc); } else { - vector = new (global_allocator_) HArrayGet(org->InputAt(0), opa, type, kNoDexPc); + bool is_string_char_at = org->AsArrayGet()->IsStringCharAt(); + vector = new (global_allocator_) HArrayGet( + org->InputAt(0), opa, type, kNoDexPc, is_string_char_at); } } vector_map_->Put(org, vector); @@ -1175,14 +1172,14 @@ bool HLoopOptimization::VectorizeHalvingAddIdiom(LoopNode* node, int64_t value = 0; if ((instruction->IsShr() || instruction->IsUShr()) && - IsInt64AndGet(instruction->InputAt(1), &value) && value == 1) { + IsInt64AndGet(instruction->InputAt(1), /*out*/ &value) && value == 1) { // // TODO: make following code less sensitive to associativity and commutativity differences. // HInstruction* x = instruction->InputAt(0); // Test for an optional rounding part (x + 1) >> 1. bool is_rounded = false; - if (x->IsAdd() && IsInt64AndGet(x->InputAt(1), &value) && value == 1) { + if (x->IsAdd() && IsInt64AndGet(x->InputAt(1), /*out*/ &value) && value == 1) { x = x->InputAt(0); is_rounded = true; } diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index ca953a1a7e..a8bfe610de 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -528,6 +528,15 @@ HCurrentMethod* HGraph::GetCurrentMethod() { return cached_current_method_; } +const char* HGraph::GetMethodName() const { + const DexFile::MethodId& method_id = dex_file_.GetMethodId(method_idx_); + return dex_file_.GetMethodName(method_id); +} + +std::string HGraph::PrettyMethod(bool with_signature) const { + return dex_file_.PrettyMethod(method_idx_, with_signature); +} + HConstant* HGraph::GetConstant(Primitive::Type type, int64_t value, uint32_t dex_pc) { switch (type) { case Primitive::Type::kPrimBoolean: @@ -1150,6 +1159,81 @@ void HVariableInputSizeInstruction::RemoveInputAt(size_t index) { } } +void HVariableInputSizeInstruction::RemoveAllInputs() { + RemoveAsUserOfAllInputs(); + DCHECK(!HasNonEnvironmentUses()); + + inputs_.clear(); + DCHECK_EQ(0u, InputCount()); +} + +void HConstructorFence::RemoveConstructorFences(HInstruction* instruction) { + DCHECK(instruction->GetBlock() != nullptr); + // Removing constructor fences only makes sense for instructions with an object return type. + DCHECK_EQ(Primitive::kPrimNot, instruction->GetType()); + + // Efficient implementation that simultaneously (in one pass): + // * Scans the uses list for all constructor fences. + // * Deletes that constructor fence from the uses list of `instruction`. + // * Deletes `instruction` from the constructor fence's inputs. + // * Deletes the constructor fence if it now has 0 inputs. + + const HUseList<HInstruction*>& uses = instruction->GetUses(); + // Warning: Although this is "const", we might mutate the list when calling RemoveInputAt. + for (auto it = uses.begin(), end = uses.end(); it != end; ) { + const HUseListNode<HInstruction*>& use_node = *it; + HInstruction* const use_instruction = use_node.GetUser(); + + // Advance the iterator immediately once we fetch the use_node. + // Warning: If the input is removed, the current iterator becomes invalid. + ++it; + + if (use_instruction->IsConstructorFence()) { + HConstructorFence* ctor_fence = use_instruction->AsConstructorFence(); + size_t input_index = use_node.GetIndex(); + + // Process the candidate instruction for removal + // from the graph. + + // Constructor fence instructions are never + // used by other instructions. + // + // If we wanted to make this more generic, it + // could be a runtime if statement. + DCHECK(!ctor_fence->HasUses()); + + // A constructor fence's return type is "kPrimVoid" + // and therefore it can't have any environment uses. + DCHECK(!ctor_fence->HasEnvironmentUses()); + + // Remove the inputs first, otherwise removing the instruction + // will try to remove its uses while we are already removing uses + // and this operation will fail. + DCHECK_EQ(instruction, ctor_fence->InputAt(input_index)); + + // Removing the input will also remove the `use_node`. + // (Do not look at `use_node` after this, it will be a dangling reference). + ctor_fence->RemoveInputAt(input_index); + + // Once all inputs are removed, the fence is considered dead and + // is removed. + if (ctor_fence->InputCount() == 0u) { + ctor_fence->GetBlock()->RemoveInstruction(ctor_fence); + } + } + } + + if (kIsDebugBuild) { + // Post-condition checks: + // * None of the uses of `instruction` are a constructor fence. + // * The `instruction` itself did not get removed from a block. + for (const HUseListNode<HInstruction*>& use_node : instruction->GetUses()) { + CHECK(!use_node.GetUser()->IsConstructorFence()); + } + CHECK(instruction->GetBlock() != nullptr); + } +} + #define DEFINE_ACCEPT(name, super) \ void H##name::Accept(HGraphVisitor* visitor) { \ visitor->Visit##name(this); \ @@ -1338,18 +1422,6 @@ std::ostream& operator<<(std::ostream& os, const ComparisonBias& rhs) { } } -std::ostream& operator<<(std::ostream& os, const HDeoptimize::Kind& rhs) { - switch (rhs) { - case HDeoptimize::Kind::kBCE: - return os << "bce"; - case HDeoptimize::Kind::kInline: - return os << "inline"; - default: - LOG(FATAL) << "Unknown Deoptimization kind: " << static_cast<int>(rhs); - UNREACHABLE(); - } -} - bool HCondition::IsBeforeWhenDisregardMoves(HInstruction* instruction) const { return this == instruction->GetPreviousDisregardingMoves(); } diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 8368026e92..b4da20b558 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -30,6 +30,7 @@ #include "base/transform_array_ref.h" #include "dex_file.h" #include "dex_file_types.h" +#include "deoptimization_kind.h" #include "entrypoints/quick/quick_entrypoints_enum.h" #include "handle.h" #include "handle_scope.h" @@ -46,6 +47,7 @@ namespace art { class GraphChecker; class HBasicBlock; +class HConstructorFence; class HCurrentMethod; class HDoubleConstant; class HEnvironment; @@ -57,6 +59,7 @@ class HIntConstant; class HInvoke; class HLongConstant; class HNullConstant; +class HParameterValue; class HPhi; class HSuspendCheck; class HTryBoundary; @@ -537,6 +540,12 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { return method_idx_; } + // Get the method name (without the signature), e.g. "<init>" + const char* GetMethodName() const; + + // Get the pretty method name (class + name + optionally signature). + std::string PrettyMethod(bool with_signature = true) const; + InvokeType GetInvokeType() const { return invoke_type_; } @@ -1297,6 +1306,7 @@ class HLoopInformationOutwardIterator : public ValueObject { M(ClearException, Instruction) \ M(ClinitCheck, Instruction) \ M(Compare, BinaryOperation) \ + M(ConstructorFence, Instruction) \ M(CurrentMethod, Instruction) \ M(ShouldDeoptimizeFlag, Instruction) \ M(Deoptimize, Instruction) \ @@ -1476,8 +1486,11 @@ FOR_EACH_INSTRUCTION(FORWARD_DECLARATION) template <typename T> class HUseListNode : public ArenaObject<kArenaAllocUseListNode> { public: + // Get the instruction which has this use as one of the inputs. T GetUser() const { return user_; } + // Get the position of the input record that this use corresponds to. size_t GetIndex() const { return index_; } + // Set the position of the input record that this use corresponds to. void SetIndex(size_t index) { index_ = index; } // Hook for the IntrusiveForwardList<>. @@ -2037,7 +2050,8 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> { !IsNativeDebugInfo() && !IsParameterValue() && // If we added an explicit barrier then we should keep it. - !IsMemoryBarrier(); + !IsMemoryBarrier() && + !IsConstructorFence(); } bool IsDeadAndRemovable() const { @@ -2431,6 +2445,11 @@ class HVariableInputSizeInstruction : public HInstruction { void InsertInputAt(size_t index, HInstruction* input); void RemoveInputAt(size_t index); + // Removes all the inputs. + // Also removes this instructions from each input's use list + // (for non-environment uses only). + void RemoveAllInputs(); + protected: HVariableInputSizeInstruction(SideEffects side_effects, uint32_t dex_pc, @@ -2973,15 +2992,9 @@ class HTryBoundary FINAL : public HTemplateInstruction<0> { // Deoptimize to interpreter, upon checking a condition. class HDeoptimize FINAL : public HVariableInputSizeInstruction { public: - enum class Kind { - kBCE, - kInline, - kLast = kInline - }; - // Use this constructor when the `HDeoptimize` acts as a barrier, where no code can move // across. - HDeoptimize(ArenaAllocator* arena, HInstruction* cond, Kind kind, uint32_t dex_pc) + HDeoptimize(ArenaAllocator* arena, HInstruction* cond, DeoptimizationKind kind, uint32_t dex_pc) : HVariableInputSizeInstruction( SideEffects::All(), dex_pc, @@ -3001,7 +3014,7 @@ class HDeoptimize FINAL : public HVariableInputSizeInstruction { HDeoptimize(ArenaAllocator* arena, HInstruction* cond, HInstruction* guard, - Kind kind, + DeoptimizationKind kind, uint32_t dex_pc) : HVariableInputSizeInstruction( SideEffects::CanTriggerGC(), @@ -3025,7 +3038,7 @@ class HDeoptimize FINAL : public HVariableInputSizeInstruction { bool CanThrow() const OVERRIDE { return true; } - Kind GetKind() const { return GetPackedField<DeoptimizeKindField>(); } + DeoptimizationKind GetDeoptimizationKind() const { return GetPackedField<DeoptimizeKindField>(); } Primitive::Type GetType() const OVERRIDE { return GuardsAnInput() ? GuardedInput()->GetType() : Primitive::kPrimVoid; @@ -3050,18 +3063,17 @@ class HDeoptimize FINAL : public HVariableInputSizeInstruction { static constexpr size_t kFieldCanBeMoved = kNumberOfGenericPackedBits; static constexpr size_t kFieldDeoptimizeKind = kNumberOfGenericPackedBits + 1; static constexpr size_t kFieldDeoptimizeKindSize = - MinimumBitsToStore(static_cast<size_t>(Kind::kLast)); + MinimumBitsToStore(static_cast<size_t>(DeoptimizationKind::kLast)); static constexpr size_t kNumberOfDeoptimizePackedBits = kFieldDeoptimizeKind + kFieldDeoptimizeKindSize; static_assert(kNumberOfDeoptimizePackedBits <= kMaxNumberOfPackedBits, "Too many packed fields."); - using DeoptimizeKindField = BitField<Kind, kFieldDeoptimizeKind, kFieldDeoptimizeKindSize>; + using DeoptimizeKindField = + BitField<DeoptimizationKind, kFieldDeoptimizeKind, kFieldDeoptimizeKindSize>; DISALLOW_COPY_AND_ASSIGN(HDeoptimize); }; -std::ostream& operator<<(std::ostream& os, const HDeoptimize::Kind& rhs); - // Represents a should_deoptimize flag. Currently used for CHA-based devirtualization. // The compiled code checks this flag value in a guard before devirtualized call and // if it's true, starts to do deoptimization. @@ -5069,7 +5081,7 @@ class HParameterValue FINAL : public HExpression<0> { const DexFile& GetDexFile() const { return dex_file_; } dex::TypeIndex GetTypeIndex() const { return type_index_; } uint8_t GetIndex() const { return index_; } - bool IsThis() const ATTRIBUTE_UNUSED { return GetPackedFlag<kFlagIsThis>(); } + bool IsThis() const { return GetPackedFlag<kFlagIsThis>(); } bool CanBeNull() const OVERRIDE { return GetPackedFlag<kFlagCanBeNull>(); } void SetCanBeNull(bool can_be_null) { SetPackedFlag<kFlagCanBeNull>(can_be_null); } @@ -5377,10 +5389,16 @@ class HArrayGet FINAL : public HExpression<2> { } bool CanDoImplicitNullCheckOn(HInstruction* obj ATTRIBUTE_UNUSED) const OVERRIDE { // TODO: We can be smarter here. - // Currently, the array access is always preceded by an ArrayLength or a NullCheck - // which generates the implicit null check. There are cases when these can be removed - // to produce better code. If we ever add optimizations to do so we should allow an - // implicit check here (as long as the address falls in the first page). + // Currently, unless the array is the result of NewArray, the array access is always + // preceded by some form of null NullCheck necessary for the bounds check, usually + // implicit null check on the ArrayLength input to BoundsCheck or Deoptimize for + // dynamic BCE. There are cases when these could be removed to produce better code. + // If we ever add optimizations to do so we should allow an implicit check here + // (as long as the address falls in the first page). + // + // As an example of such fancy optimization, we could eliminate BoundsCheck for + // a = cond ? new int[1] : null; + // a[0]; // The Phi does not need bounds check for either input. return false; } @@ -6501,6 +6519,137 @@ class HMemoryBarrier FINAL : public HTemplateInstruction<0> { DISALLOW_COPY_AND_ASSIGN(HMemoryBarrier); }; +// A constructor fence orders all prior stores to fields that could be accessed via a final field of +// the specified object(s), with respect to any subsequent store that might "publish" +// (i.e. make visible) the specified object to another thread. +// +// JLS 17.5.1 "Semantics of final fields" states that a freeze action happens +// for all final fields (that were set) at the end of the invoked constructor. +// +// The constructor fence models the freeze actions for the final fields of an object +// being constructed (semantically at the end of the constructor). Constructor fences +// have a per-object affinity; two separate objects being constructed get two separate +// constructor fences. +// +// (Note: that if calling a super-constructor or forwarding to another constructor, +// the freezes would happen at the end of *that* constructor being invoked). +// +// The memory model guarantees that when the object being constructed is "published" after +// constructor completion (i.e. escapes the current thread via a store), then any final field +// writes must be observable on other threads (once they observe that publication). +// +// Further, anything written before the freeze, and read by dereferencing through the final field, +// must also be visible (so final object field could itself have an object with non-final fields; +// yet the freeze must also extend to them). +// +// Constructor example: +// +// class HasFinal { +// final int field; Optimizing IR for <init>()V: +// HasFinal() { +// field = 123; HInstanceFieldSet(this, HasFinal.field, 123) +// // freeze(this.field); HConstructorFence(this) +// } HReturn +// } +// +// HConstructorFence can serve double duty as a fence for new-instance/new-array allocations of +// already-initialized classes; in that case the allocation must act as a "default-initializer" +// of the object which effectively writes the class pointer "final field". +// +// For example, we can model default-initialiation as roughly the equivalent of the following: +// +// class Object { +// private final Class header; +// } +// +// Java code: Optimizing IR: +// +// T new_instance<T>() { +// Object obj = allocate_memory(T.class.size); obj = HInvoke(art_quick_alloc_object, T) +// obj.header = T.class; // header write is done by above call. +// // freeze(obj.header) HConstructorFence(obj) +// return (T)obj; +// } +// +// See also: +// * CompilerDriver::RequiresConstructorBarrier +// * QuasiAtomic::ThreadFenceForConstructor +// +class HConstructorFence FINAL : public HVariableInputSizeInstruction { + // A fence has variable inputs because the inputs can be removed + // after prepare_for_register_allocation phase. + // (TODO: In the future a fence could freeze multiple objects + // after merging two fences together.) + public: + // `fence_object` is the reference that needs to be protected for correct publication. + // + // It makes sense in the following situations: + // * <init> constructors, it's the "this" parameter (i.e. HParameterValue, s.t. IsThis() == true). + // * new-instance-like instructions, it's the return value (i.e. HNewInstance). + // + // After construction the `fence_object` becomes the 0th input. + // This is not an input in a real sense, but just a convenient place to stash the information + // about the associated object. + HConstructorFence(HInstruction* fence_object, + uint32_t dex_pc, + ArenaAllocator* arena) + // We strongly suspect there is not a more accurate way to describe the fine-grained reordering + // constraints described in the class header. We claim that these SideEffects constraints + // enforce a superset of the real constraints. + // + // The ordering described above is conservatively modeled with SideEffects as follows: + // + // * To prevent reordering of the publication stores: + // ----> "Reads of objects" is the initial SideEffect. + // * For every primitive final field store in the constructor: + // ----> Union that field's type as a read (e.g. "Read of T") into the SideEffect. + // * If there are any stores to reference final fields in the constructor: + // ----> Use a more conservative "AllReads" SideEffect because any stores to any references + // that are reachable from `fence_object` also need to be prevented for reordering + // (and we do not want to do alias analysis to figure out what those stores are). + // + // In the implementation, this initially starts out as an "all reads" side effect; this is an + // even more conservative approach than the one described above, and prevents all of the + // above reordering without analyzing any of the instructions in the constructor. + // + // If in a later phase we discover that there are no writes to reference final fields, + // we can refine the side effect to a smaller set of type reads (see above constraints). + : HVariableInputSizeInstruction(SideEffects::AllReads(), + dex_pc, + arena, + /* number_of_inputs */ 1, + kArenaAllocConstructorFenceInputs) { + DCHECK(fence_object != nullptr); + SetRawInputAt(0, fence_object); + } + + // The object associated with this constructor fence. + // + // (Note: This will be null after the prepare_for_register_allocation phase, + // as all constructor fence inputs are removed there). + HInstruction* GetFenceObject() const { + return InputAt(0); + } + + // Find all the HConstructorFence uses (`fence_use`) for `this` and: + // - Delete `fence_use` from `this`'s use list. + // - Delete `this` from `fence_use`'s inputs list. + // - If the `fence_use` is dead, remove it from the graph. + // + // A fence is considered dead once it no longer has any uses + // and all of the inputs are dead. + // + // This must *not* be called during/after prepare_for_register_allocation, + // because that removes all the inputs to the fences but the fence is actually + // still considered live. + static void RemoveConstructorFences(HInstruction* instruction); + + DECLARE_INSTRUCTION(ConstructorFence); + + private: + DISALLOW_COPY_AND_ASSIGN(HConstructorFence); +}; + class HMonitorOperation FINAL : public HTemplateInstruction<1> { public: enum class OperationKind { diff --git a/compiler/optimizing/nodes_vector.h b/compiler/optimizing/nodes_vector.h index fb9dfb7afa..52c247b52f 100644 --- a/compiler/optimizing/nodes_vector.h +++ b/compiler/optimizing/nodes_vector.h @@ -98,7 +98,7 @@ class HVecOperation : public HVariableInputSizeInstruction { DECLARE_ABSTRACT_INSTRUCTION(VecOperation); - private: + protected: // Additional packed bits. static constexpr size_t kFieldType = HInstruction::kNumberOfGenericPackedBits; static constexpr size_t kFieldTypeSize = @@ -107,6 +107,7 @@ class HVecOperation : public HVariableInputSizeInstruction { static_assert(kNumberOfVectorOpPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields."); using TypeField = BitField<Primitive::Type, kFieldType, kFieldTypeSize>; + private: const size_t vector_length_; DISALLOW_COPY_AND_ASSIGN(HVecOperation); @@ -191,6 +192,24 @@ class HVecMemoryOperation : public HVecOperation { DISALLOW_COPY_AND_ASSIGN(HVecMemoryOperation); }; +// Packed type consistency checker (same vector length integral types may mix freely). +inline static bool HasConsistentPackedTypes(HInstruction* input, Primitive::Type type) { + DCHECK(input->IsVecOperation()); + Primitive::Type input_type = input->AsVecOperation()->GetPackedType(); + switch (input_type) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + return type == Primitive::kPrimBoolean || + type == Primitive::kPrimByte; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + return type == Primitive::kPrimChar || + type == Primitive::kPrimShort; + default: + return type == input_type; + } +} + // // Definitions of concrete unary vector operations in HIR. // @@ -221,8 +240,7 @@ class HVecSumReduce FINAL : public HVecUnaryOperation { size_t vector_length, uint32_t dex_pc = kNoDexPc) : HVecUnaryOperation(arena, input, packed_type, vector_length, dex_pc) { - DCHECK(input->IsVecOperation()); - DCHECK_EQ(input->AsVecOperation()->GetPackedType(), packed_type); + DCHECK(HasConsistentPackedTypes(input, packed_type)); } // TODO: probably integral promotion @@ -244,7 +262,7 @@ class HVecCnv FINAL : public HVecUnaryOperation { uint32_t dex_pc = kNoDexPc) : HVecUnaryOperation(arena, input, packed_type, vector_length, dex_pc) { DCHECK(input->IsVecOperation()); - DCHECK_NE(input->AsVecOperation()->GetPackedType(), packed_type); // actual convert + DCHECK_NE(GetInputType(), GetResultType()); // actual convert } Primitive::Type GetInputType() const { return InputAt(0)->AsVecOperation()->GetPackedType(); } @@ -266,8 +284,7 @@ class HVecNeg FINAL : public HVecUnaryOperation { size_t vector_length, uint32_t dex_pc = kNoDexPc) : HVecUnaryOperation(arena, input, packed_type, vector_length, dex_pc) { - DCHECK(input->IsVecOperation()); - DCHECK_EQ(input->AsVecOperation()->GetPackedType(), packed_type); + DCHECK(HasConsistentPackedTypes(input, packed_type)); } DECLARE_INSTRUCTION(VecNeg); private: @@ -284,8 +301,7 @@ class HVecAbs FINAL : public HVecUnaryOperation { size_t vector_length, uint32_t dex_pc = kNoDexPc) : HVecUnaryOperation(arena, input, packed_type, vector_length, dex_pc) { - DCHECK(input->IsVecOperation()); - DCHECK_EQ(input->AsVecOperation()->GetPackedType(), packed_type); + DCHECK(HasConsistentPackedTypes(input, packed_type)); } DECLARE_INSTRUCTION(VecAbs); private: @@ -325,9 +341,8 @@ class HVecAdd FINAL : public HVecBinaryOperation { size_t vector_length, uint32_t dex_pc = kNoDexPc) : HVecBinaryOperation(arena, left, right, packed_type, vector_length, dex_pc) { - DCHECK(left->IsVecOperation() && right->IsVecOperation()); - DCHECK_EQ(left->AsVecOperation()->GetPackedType(), packed_type); - DCHECK_EQ(right->AsVecOperation()->GetPackedType(), packed_type); + DCHECK(HasConsistentPackedTypes(left, packed_type)); + DCHECK(HasConsistentPackedTypes(right, packed_type)); } DECLARE_INSTRUCTION(VecAdd); private: @@ -348,22 +363,24 @@ class HVecHalvingAdd FINAL : public HVecBinaryOperation { bool is_unsigned, bool is_rounded, uint32_t dex_pc = kNoDexPc) - : HVecBinaryOperation(arena, left, right, packed_type, vector_length, dex_pc), - is_unsigned_(is_unsigned), - is_rounded_(is_rounded) { - DCHECK(left->IsVecOperation() && right->IsVecOperation()); - DCHECK_EQ(left->AsVecOperation()->GetPackedType(), packed_type); - DCHECK_EQ(right->AsVecOperation()->GetPackedType(), packed_type); + : HVecBinaryOperation(arena, left, right, packed_type, vector_length, dex_pc) { + DCHECK(HasConsistentPackedTypes(left, packed_type)); + DCHECK(HasConsistentPackedTypes(right, packed_type)); + SetPackedFlag<kFieldHAddIsUnsigned>(is_unsigned); + SetPackedFlag<kFieldHAddIsRounded>(is_rounded); } - bool IsUnsigned() const { return is_unsigned_; } - bool IsRounded() const { return is_rounded_; } + bool IsUnsigned() const { return GetPackedFlag<kFieldHAddIsUnsigned>(); } + bool IsRounded() const { return GetPackedFlag<kFieldHAddIsRounded>(); } DECLARE_INSTRUCTION(VecHalvingAdd); private: - bool is_unsigned_; - bool is_rounded_; + // Additional packed bits. + static constexpr size_t kFieldHAddIsUnsigned = HVecOperation::kNumberOfVectorOpPackedBits; + static constexpr size_t kFieldHAddIsRounded = kFieldHAddIsUnsigned + 1; + static constexpr size_t kNumberOfHAddPackedBits = kFieldHAddIsRounded + 1; + static_assert(kNumberOfHAddPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields."); DISALLOW_COPY_AND_ASSIGN(HVecHalvingAdd); }; @@ -379,9 +396,8 @@ class HVecSub FINAL : public HVecBinaryOperation { size_t vector_length, uint32_t dex_pc = kNoDexPc) : HVecBinaryOperation(arena, left, right, packed_type, vector_length, dex_pc) { - DCHECK(left->IsVecOperation() && right->IsVecOperation()); - DCHECK_EQ(left->AsVecOperation()->GetPackedType(), packed_type); - DCHECK_EQ(right->AsVecOperation()->GetPackedType(), packed_type); + DCHECK(HasConsistentPackedTypes(left, packed_type)); + DCHECK(HasConsistentPackedTypes(right, packed_type)); } DECLARE_INSTRUCTION(VecSub); private: @@ -399,9 +415,8 @@ class HVecMul FINAL : public HVecBinaryOperation { size_t vector_length, uint32_t dex_pc = kNoDexPc) : HVecBinaryOperation(arena, left, right, packed_type, vector_length, dex_pc) { - DCHECK(left->IsVecOperation() && right->IsVecOperation()); - DCHECK_EQ(left->AsVecOperation()->GetPackedType(), packed_type); - DCHECK_EQ(right->AsVecOperation()->GetPackedType(), packed_type); + DCHECK(HasConsistentPackedTypes(left, packed_type)); + DCHECK(HasConsistentPackedTypes(right, packed_type)); } DECLARE_INSTRUCTION(VecMul); private: @@ -419,9 +434,8 @@ class HVecDiv FINAL : public HVecBinaryOperation { size_t vector_length, uint32_t dex_pc = kNoDexPc) : HVecBinaryOperation(arena, left, right, packed_type, vector_length, dex_pc) { - DCHECK(left->IsVecOperation() && right->IsVecOperation()); - DCHECK_EQ(left->AsVecOperation()->GetPackedType(), packed_type); - DCHECK_EQ(right->AsVecOperation()->GetPackedType(), packed_type); + DCHECK(HasConsistentPackedTypes(left, packed_type)); + DCHECK(HasConsistentPackedTypes(right, packed_type)); } DECLARE_INSTRUCTION(VecDiv); private: @@ -439,9 +453,8 @@ class HVecMin FINAL : public HVecBinaryOperation { size_t vector_length, uint32_t dex_pc = kNoDexPc) : HVecBinaryOperation(arena, left, right, packed_type, vector_length, dex_pc) { - DCHECK(left->IsVecOperation() && right->IsVecOperation()); - DCHECK_EQ(left->AsVecOperation()->GetPackedType(), packed_type); - DCHECK_EQ(right->AsVecOperation()->GetPackedType(), packed_type); + DCHECK(HasConsistentPackedTypes(left, packed_type)); + DCHECK(HasConsistentPackedTypes(right, packed_type)); } DECLARE_INSTRUCTION(VecMin); private: @@ -459,9 +472,8 @@ class HVecMax FINAL : public HVecBinaryOperation { size_t vector_length, uint32_t dex_pc = kNoDexPc) : HVecBinaryOperation(arena, left, right, packed_type, vector_length, dex_pc) { - DCHECK(left->IsVecOperation() && right->IsVecOperation()); - DCHECK_EQ(left->AsVecOperation()->GetPackedType(), packed_type); - DCHECK_EQ(right->AsVecOperation()->GetPackedType(), packed_type); + DCHECK(HasConsistentPackedTypes(left, packed_type)); + DCHECK(HasConsistentPackedTypes(right, packed_type)); } DECLARE_INSTRUCTION(VecMax); private: @@ -551,8 +563,7 @@ class HVecShl FINAL : public HVecBinaryOperation { size_t vector_length, uint32_t dex_pc = kNoDexPc) : HVecBinaryOperation(arena, left, right, packed_type, vector_length, dex_pc) { - DCHECK(left->IsVecOperation()); - DCHECK_EQ(left->AsVecOperation()->GetPackedType(), packed_type); + DCHECK(HasConsistentPackedTypes(left, packed_type)); } DECLARE_INSTRUCTION(VecShl); private: @@ -570,8 +581,7 @@ class HVecShr FINAL : public HVecBinaryOperation { size_t vector_length, uint32_t dex_pc = kNoDexPc) : HVecBinaryOperation(arena, left, right, packed_type, vector_length, dex_pc) { - DCHECK(left->IsVecOperation()); - DCHECK_EQ(left->AsVecOperation()->GetPackedType(), packed_type); + DCHECK(HasConsistentPackedTypes(left, packed_type)); } DECLARE_INSTRUCTION(VecShr); private: @@ -589,8 +599,7 @@ class HVecUShr FINAL : public HVecBinaryOperation { size_t vector_length, uint32_t dex_pc = kNoDexPc) : HVecBinaryOperation(arena, left, right, packed_type, vector_length, dex_pc) { - DCHECK(left->IsVecOperation()); - DCHECK_EQ(left->AsVecOperation()->GetPackedType(), packed_type); + DCHECK(HasConsistentPackedTypes(left, packed_type)); } DECLARE_INSTRUCTION(VecUShr); private: @@ -646,12 +655,9 @@ class HVecMultiplyAccumulate FINAL : public HVecOperation { dex_pc), op_kind_(op) { DCHECK(op == InstructionKind::kAdd || op == InstructionKind::kSub); - DCHECK(accumulator->IsVecOperation()); - DCHECK(mul_left->IsVecOperation() && mul_right->IsVecOperation()); - DCHECK_EQ(accumulator->AsVecOperation()->GetPackedType(), packed_type); - DCHECK_EQ(mul_left->AsVecOperation()->GetPackedType(), packed_type); - DCHECK_EQ(mul_right->AsVecOperation()->GetPackedType(), packed_type); - + DCHECK(HasConsistentPackedTypes(accumulator, packed_type)); + DCHECK(HasConsistentPackedTypes(mul_left, packed_type)); + DCHECK(HasConsistentPackedTypes(mul_right, packed_type)); SetRawInputAt(kInputAccumulatorIndex, accumulator); SetRawInputAt(kInputMulLeftIndex, mul_left); SetRawInputAt(kInputMulRightIndex, mul_right); @@ -687,6 +693,7 @@ class HVecLoad FINAL : public HVecMemoryOperation { HInstruction* index, Primitive::Type packed_type, size_t vector_length, + bool is_string_char_at, uint32_t dex_pc = kNoDexPc) : HVecMemoryOperation(arena, packed_type, @@ -696,9 +703,18 @@ class HVecLoad FINAL : public HVecMemoryOperation { dex_pc) { SetRawInputAt(0, base); SetRawInputAt(1, index); + SetPackedFlag<kFieldIsStringCharAt>(is_string_char_at); } DECLARE_INSTRUCTION(VecLoad); + + bool IsStringCharAt() const { return GetPackedFlag<kFieldIsStringCharAt>(); } + private: + // Additional packed bits. + static constexpr size_t kFieldIsStringCharAt = HVecOperation::kNumberOfVectorOpPackedBits; + static constexpr size_t kNumberOfVecLoadPackedBits = kFieldIsStringCharAt + 1; + static_assert(kNumberOfVecLoadPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields."); + DISALLOW_COPY_AND_ASSIGN(HVecLoad); }; @@ -719,8 +735,7 @@ class HVecStore FINAL : public HVecMemoryOperation { /* number_of_inputs */ 3, vector_length, dex_pc) { - DCHECK(value->IsVecOperation()); - DCHECK_EQ(value->AsVecOperation()->GetPackedType(), packed_type); + DCHECK(HasConsistentPackedTypes(value, packed_type)); SetRawInputAt(0, base); SetRawInputAt(1, index); SetRawInputAt(2, value); diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc index 66bfea9860..c3c141bff7 100644 --- a/compiler/optimizing/prepare_for_register_allocation.cc +++ b/compiler/optimizing/prepare_for_register_allocation.cc @@ -167,6 +167,13 @@ void PrepareForRegisterAllocation::VisitCondition(HCondition* condition) { } } +void PrepareForRegisterAllocation::VisitConstructorFence(HConstructorFence* constructor_fence) { + // Delete all the inputs to the constructor fence; + // they aren't used by the InstructionCodeGenerator and this lets us avoid creating a + // LocationSummary in the LocationsBuilder. + constructor_fence->RemoveAllInputs(); +} + void PrepareForRegisterAllocation::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { if (invoke->IsStaticWithExplicitClinitCheck()) { HLoadClass* last_input = invoke->GetInputs().back()->AsLoadClass(); diff --git a/compiler/optimizing/prepare_for_register_allocation.h b/compiler/optimizing/prepare_for_register_allocation.h index 7ffbe44ef6..395d4ba2ee 100644 --- a/compiler/optimizing/prepare_for_register_allocation.h +++ b/compiler/optimizing/prepare_for_register_allocation.h @@ -43,6 +43,7 @@ class PrepareForRegisterAllocation : public HGraphDelegateVisitor { void VisitArraySet(HArraySet* instruction) OVERRIDE; void VisitClinitCheck(HClinitCheck* check) OVERRIDE; void VisitCondition(HCondition* condition) OVERRIDE; + void VisitConstructorFence(HConstructorFence* constructor_fence) OVERRIDE; void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE; void VisitDeoptimize(HDeoptimize* deoptimize) OVERRIDE; diff --git a/compiler/optimizing/ssa_liveness_analysis_test.cc b/compiler/optimizing/ssa_liveness_analysis_test.cc index a1016d1d47..029eb4ba61 100644 --- a/compiler/optimizing/ssa_liveness_analysis_test.cc +++ b/compiler/optimizing/ssa_liveness_analysis_test.cc @@ -190,7 +190,7 @@ TEST_F(SsaLivenessAnalysisTest, TestDeoptimize) { HInstruction* ae = new (&allocator_) HAboveOrEqual(index, length); block->AddInstruction(ae); HInstruction* deoptimize = - new(&allocator_) HDeoptimize(&allocator_, ae, HDeoptimize::Kind::kBCE, /* dex_pc */ 0u); + new(&allocator_) HDeoptimize(&allocator_, ae, DeoptimizationKind::kBlockBCE, /* dex_pc */ 0u); block->AddInstruction(deoptimize); HEnvironment* deoptimize_env = new (&allocator_) HEnvironment(&allocator_, /* number_of_vregs */ 5, diff --git a/compiler/utils/arm/assembler_arm.h b/compiler/utils/arm/assembler_arm.h index 0ed8a35338..0f24e81be2 100644 --- a/compiler/utils/arm/assembler_arm.h +++ b/compiler/utils/arm/assembler_arm.h @@ -652,6 +652,9 @@ class ArmAssembler : public Assembler { virtual void blx(Register rm, Condition cond = AL) = 0; virtual void bx(Register rm, Condition cond = AL) = 0; + // ADR instruction loading register for branching to the label. + virtual void AdrCode(Register rt, Label* label) = 0; + // Memory barriers. virtual void dmb(DmbOptions flavor) = 0; diff --git a/compiler/utils/arm/assembler_thumb2.cc b/compiler/utils/arm/assembler_thumb2.cc index 1e71d06b49..d7096b3c87 100644 --- a/compiler/utils/arm/assembler_thumb2.cc +++ b/compiler/utils/arm/assembler_thumb2.cc @@ -214,14 +214,14 @@ void Thumb2Assembler::EmitFixups(uint32_t adjusted_code_size) { DCHECK_GE(dest_end, src_end); for (auto i = fixups_.rbegin(), end = fixups_.rend(); i != end; ++i) { Fixup* fixup = &*i; + size_t old_fixup_location = fixup->GetLocation(); if (fixup->GetOriginalSize() == fixup->GetSize()) { // The size of this Fixup didn't change. To avoid moving the data // in small chunks, emit the code to its original position. - fixup->Emit(&buffer_, adjusted_code_size); fixup->Finalize(dest_end - src_end); + fixup->Emit(old_fixup_location, &buffer_, adjusted_code_size); } else { // Move the data between the end of the fixup and src_end to its final location. - size_t old_fixup_location = fixup->GetLocation(); size_t src_begin = old_fixup_location + fixup->GetOriginalSizeInBytes(); size_t data_size = src_end - src_begin; size_t dest_begin = dest_end - data_size; @@ -230,7 +230,7 @@ void Thumb2Assembler::EmitFixups(uint32_t adjusted_code_size) { dest_end = dest_begin - fixup->GetSizeInBytes(); // Finalize the Fixup and emit the data to the new location. fixup->Finalize(dest_end - src_end); - fixup->Emit(&buffer_, adjusted_code_size); + fixup->Emit(fixup->GetLocation(), &buffer_, adjusted_code_size); } } CHECK_EQ(src_end, dest_end); @@ -1895,6 +1895,9 @@ inline size_t Thumb2Assembler::Fixup::SizeInBytes(Size size) { case kCbxz48Bit: return 6u; + case kCodeAddr4KiB: + return 4u; + case kLiteral1KiB: return 2u; case kLiteral4KiB: @@ -1973,6 +1976,15 @@ inline int32_t Thumb2Assembler::Fixup::GetOffset(uint32_t current_code_size) con diff -= 2; // Extra CMP Rn, #0, 16-bit. break; + case kCodeAddr4KiB: + // The ADR instruction rounds down the PC+4 to a multiple of 4, so if the PC + // isn't a multiple of 2, we need to adjust. + DCHECK_ALIGNED(diff, 2); + diff += location_ & 2; + // Add the Thumb mode bit. + diff += 1; + break; + case kLiteral1KiB: case kLiteral4KiB: case kLongOrFPLiteral1KiB: @@ -1987,8 +1999,8 @@ inline int32_t Thumb2Assembler::Fixup::GetOffset(uint32_t current_code_size) con diff = diff + (diff & 2); DCHECK_GE(diff, 0); break; - case kLiteral1MiB: case kLiteral64KiB: + case kLiteral1MiB: case kLongOrFPLiteral64KiB: case kLiteralAddr64KiB: DCHECK_GE(diff, 4); // The target must be at least 4 bytes after the ADD rX, PC. @@ -2041,6 +2053,10 @@ bool Thumb2Assembler::Fixup::IsCandidateForEmitEarly() const { // We don't support conditional branches beyond +-1MiB. return true; + case kCodeAddr4KiB: + // ADR uses the aligned PC and as such the offset cannot be calculated early. + return false; + case kLiteral1KiB: case kLiteral4KiB: case kLiteral64KiB: @@ -2087,6 +2103,10 @@ uint32_t Thumb2Assembler::Fixup::AdjustSizeIfNeeded(uint32_t current_code_size) // We don't support conditional branches beyond +-1MiB. break; + case kCodeAddr4KiB: + // We don't support Code address ADR beyond +4KiB. + break; + case kLiteral1KiB: DCHECK(!IsHighRegister(rn_)); if (IsUint<10>(GetOffset(current_code_size))) { @@ -2159,13 +2179,15 @@ uint32_t Thumb2Assembler::Fixup::AdjustSizeIfNeeded(uint32_t current_code_size) return current_code_size - old_code_size; } -void Thumb2Assembler::Fixup::Emit(AssemblerBuffer* buffer, uint32_t code_size) const { +void Thumb2Assembler::Fixup::Emit(uint32_t emit_location, + AssemblerBuffer* buffer, + uint32_t code_size) const { switch (GetSize()) { case kBranch16Bit: { DCHECK(type_ == kUnconditional || type_ == kConditional); DCHECK_EQ(type_ == kConditional, cond_ != AL); int16_t encoding = BEncoding16(GetOffset(code_size), cond_); - buffer->Store<int16_t>(location_, encoding); + buffer->Store<int16_t>(emit_location, encoding); break; } case kBranch32Bit: { @@ -2180,15 +2202,15 @@ void Thumb2Assembler::Fixup::Emit(AssemblerBuffer* buffer, uint32_t code_size) c DCHECK_NE(encoding & B12, 0); encoding ^= B14 | B12; } - buffer->Store<int16_t>(location_, encoding >> 16); - buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(encoding & 0xffff)); + buffer->Store<int16_t>(emit_location, encoding >> 16); + buffer->Store<int16_t>(emit_location + 2u, static_cast<int16_t>(encoding & 0xffff)); break; } case kCbxz16Bit: { DCHECK(type_ == kCompareAndBranchXZero); int16_t encoding = CbxzEncoding16(rn_, GetOffset(code_size), cond_); - buffer->Store<int16_t>(location_, encoding); + buffer->Store<int16_t>(emit_location, encoding); break; } case kCbxz32Bit: { @@ -2196,8 +2218,8 @@ void Thumb2Assembler::Fixup::Emit(AssemblerBuffer* buffer, uint32_t code_size) c DCHECK(cond_ == EQ || cond_ == NE); int16_t cmp_encoding = CmpRnImm8Encoding16(rn_, 0); int16_t b_encoding = BEncoding16(GetOffset(code_size), cond_); - buffer->Store<int16_t>(location_, cmp_encoding); - buffer->Store<int16_t>(location_ + 2, b_encoding); + buffer->Store<int16_t>(emit_location, cmp_encoding); + buffer->Store<int16_t>(emit_location + 2, b_encoding); break; } case kCbxz48Bit: { @@ -2205,24 +2227,32 @@ void Thumb2Assembler::Fixup::Emit(AssemblerBuffer* buffer, uint32_t code_size) c DCHECK(cond_ == EQ || cond_ == NE); int16_t cmp_encoding = CmpRnImm8Encoding16(rn_, 0); int32_t b_encoding = BEncoding32(GetOffset(code_size), cond_); - buffer->Store<int16_t>(location_, cmp_encoding); - buffer->Store<int16_t>(location_ + 2u, b_encoding >> 16); - buffer->Store<int16_t>(location_ + 4u, static_cast<int16_t>(b_encoding & 0xffff)); + buffer->Store<int16_t>(emit_location, cmp_encoding); + buffer->Store<int16_t>(emit_location + 2u, b_encoding >> 16); + buffer->Store<int16_t>(emit_location + 4u, static_cast<int16_t>(b_encoding & 0xffff)); + break; + } + + case kCodeAddr4KiB: { + DCHECK(type_ == kLoadCodeAddr); + int32_t encoding = AdrEncoding32(rn_, GetOffset(code_size)); + buffer->Store<int16_t>(emit_location, encoding >> 16); + buffer->Store<int16_t>(emit_location + 2u, static_cast<int16_t>(encoding & 0xffff)); break; } case kLiteral1KiB: { DCHECK(type_ == kLoadLiteralNarrow); int16_t encoding = LdrLitEncoding16(rn_, GetOffset(code_size)); - buffer->Store<int16_t>(location_, encoding); + buffer->Store<int16_t>(emit_location, encoding); break; } case kLiteral4KiB: { DCHECK(type_ == kLoadLiteralNarrow); // GetOffset() uses PC+4 but load literal uses AlignDown(PC+4, 4). Adjust offset accordingly. int32_t encoding = LdrLitEncoding32(rn_, GetOffset(code_size)); - buffer->Store<int16_t>(location_, encoding >> 16); - buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(encoding & 0xffff)); + buffer->Store<int16_t>(emit_location, encoding >> 16); + buffer->Store<int16_t>(emit_location + 2u, static_cast<int16_t>(encoding & 0xffff)); break; } case kLiteral64KiB: { @@ -2242,11 +2272,11 @@ void Thumb2Assembler::Fixup::Emit(AssemblerBuffer* buffer, uint32_t code_size) c int32_t mov_encoding = MovModImmEncoding32(rn_, offset & ~0xfff); int16_t add_pc_encoding = AddRdnRmEncoding16(rn_, PC); int32_t ldr_encoding = LdrRtRnImm12Encoding(rn_, rn_, offset & 0xfff); - buffer->Store<int16_t>(location_, mov_encoding >> 16); - buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(mov_encoding & 0xffff)); - buffer->Store<int16_t>(location_ + 4u, add_pc_encoding); - buffer->Store<int16_t>(location_ + 6u, ldr_encoding >> 16); - buffer->Store<int16_t>(location_ + 8u, static_cast<int16_t>(ldr_encoding & 0xffff)); + buffer->Store<int16_t>(emit_location, mov_encoding >> 16); + buffer->Store<int16_t>(emit_location + 2u, static_cast<int16_t>(mov_encoding & 0xffff)); + buffer->Store<int16_t>(emit_location + 4u, add_pc_encoding); + buffer->Store<int16_t>(emit_location + 6u, ldr_encoding >> 16); + buffer->Store<int16_t>(emit_location + 8u, static_cast<int16_t>(ldr_encoding & 0xffff)); break; } case kLiteralFar: { @@ -2256,36 +2286,36 @@ void Thumb2Assembler::Fixup::Emit(AssemblerBuffer* buffer, uint32_t code_size) c int32_t movt_encoding = MovtEncoding32(rn_, offset & ~0xffff); int16_t add_pc_encoding = AddRdnRmEncoding16(rn_, PC); int32_t ldr_encoding = LdrRtRnImm12Encoding(rn_, rn_, 0); - buffer->Store<int16_t>(location_, movw_encoding >> 16); - buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(movw_encoding & 0xffff)); - buffer->Store<int16_t>(location_ + 4u, movt_encoding >> 16); - buffer->Store<int16_t>(location_ + 6u, static_cast<int16_t>(movt_encoding & 0xffff)); - buffer->Store<int16_t>(location_ + 8u, add_pc_encoding); - buffer->Store<int16_t>(location_ + 10u, ldr_encoding >> 16); - buffer->Store<int16_t>(location_ + 12u, static_cast<int16_t>(ldr_encoding & 0xffff)); + buffer->Store<int16_t>(emit_location, movw_encoding >> 16); + buffer->Store<int16_t>(emit_location + 2u, static_cast<int16_t>(movw_encoding & 0xffff)); + buffer->Store<int16_t>(emit_location + 4u, movt_encoding >> 16); + buffer->Store<int16_t>(emit_location + 6u, static_cast<int16_t>(movt_encoding & 0xffff)); + buffer->Store<int16_t>(emit_location + 8u, add_pc_encoding); + buffer->Store<int16_t>(emit_location + 10u, ldr_encoding >> 16); + buffer->Store<int16_t>(emit_location + 12u, static_cast<int16_t>(ldr_encoding & 0xffff)); break; } case kLiteralAddr1KiB: { DCHECK(type_ == kLoadLiteralAddr); int16_t encoding = AdrEncoding16(rn_, GetOffset(code_size)); - buffer->Store<int16_t>(location_, encoding); + buffer->Store<int16_t>(emit_location, encoding); break; } case kLiteralAddr4KiB: { DCHECK(type_ == kLoadLiteralAddr); int32_t encoding = AdrEncoding32(rn_, GetOffset(code_size)); - buffer->Store<int16_t>(location_, encoding >> 16); - buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(encoding & 0xffff)); + buffer->Store<int16_t>(emit_location, encoding >> 16); + buffer->Store<int16_t>(emit_location + 2u, static_cast<int16_t>(encoding & 0xffff)); break; } case kLiteralAddr64KiB: { DCHECK(type_ == kLoadLiteralAddr); int32_t mov_encoding = MovwEncoding32(rn_, GetOffset(code_size)); int16_t add_pc_encoding = AddRdnRmEncoding16(rn_, PC); - buffer->Store<int16_t>(location_, mov_encoding >> 16); - buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(mov_encoding & 0xffff)); - buffer->Store<int16_t>(location_ + 4u, add_pc_encoding); + buffer->Store<int16_t>(emit_location, mov_encoding >> 16); + buffer->Store<int16_t>(emit_location + 2u, static_cast<int16_t>(mov_encoding & 0xffff)); + buffer->Store<int16_t>(emit_location + 4u, add_pc_encoding); break; } case kLiteralAddrFar: { @@ -2294,29 +2324,29 @@ void Thumb2Assembler::Fixup::Emit(AssemblerBuffer* buffer, uint32_t code_size) c int32_t movw_encoding = MovwEncoding32(rn_, offset & 0xffff); int32_t movt_encoding = MovtEncoding32(rn_, offset & ~0xffff); int16_t add_pc_encoding = AddRdnRmEncoding16(rn_, PC); - buffer->Store<int16_t>(location_, movw_encoding >> 16); - buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(movw_encoding & 0xffff)); - buffer->Store<int16_t>(location_ + 4u, movt_encoding >> 16); - buffer->Store<int16_t>(location_ + 6u, static_cast<int16_t>(movt_encoding & 0xffff)); - buffer->Store<int16_t>(location_ + 8u, add_pc_encoding); + buffer->Store<int16_t>(emit_location, movw_encoding >> 16); + buffer->Store<int16_t>(emit_location + 2u, static_cast<int16_t>(movw_encoding & 0xffff)); + buffer->Store<int16_t>(emit_location + 4u, movt_encoding >> 16); + buffer->Store<int16_t>(emit_location + 6u, static_cast<int16_t>(movt_encoding & 0xffff)); + buffer->Store<int16_t>(emit_location + 8u, add_pc_encoding); break; } case kLongOrFPLiteral1KiB: { int32_t encoding = LoadWideOrFpEncoding(PC, GetOffset(code_size)); // DCHECKs type_. - buffer->Store<int16_t>(location_, encoding >> 16); - buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(encoding & 0xffff)); + buffer->Store<int16_t>(emit_location, encoding >> 16); + buffer->Store<int16_t>(emit_location + 2u, static_cast<int16_t>(encoding & 0xffff)); break; } case kLongOrFPLiteral64KiB: { int32_t mov_encoding = MovwEncoding32(IP, GetOffset(code_size)); int16_t add_pc_encoding = AddRdnRmEncoding16(IP, PC); int32_t ldr_encoding = LoadWideOrFpEncoding(IP, 0u); // DCHECKs type_. - buffer->Store<int16_t>(location_, mov_encoding >> 16); - buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(mov_encoding & 0xffff)); - buffer->Store<int16_t>(location_ + 4u, add_pc_encoding); - buffer->Store<int16_t>(location_ + 6u, ldr_encoding >> 16); - buffer->Store<int16_t>(location_ + 8u, static_cast<int16_t>(ldr_encoding & 0xffff)); + buffer->Store<int16_t>(emit_location, mov_encoding >> 16); + buffer->Store<int16_t>(emit_location + 2u, static_cast<int16_t>(mov_encoding & 0xffff)); + buffer->Store<int16_t>(emit_location + 4u, add_pc_encoding); + buffer->Store<int16_t>(emit_location + 6u, ldr_encoding >> 16); + buffer->Store<int16_t>(emit_location + 8u, static_cast<int16_t>(ldr_encoding & 0xffff)); break; } case kLongOrFPLiteralFar: { @@ -2325,13 +2355,13 @@ void Thumb2Assembler::Fixup::Emit(AssemblerBuffer* buffer, uint32_t code_size) c int32_t movt_encoding = MovtEncoding32(IP, offset & ~0xffff); int16_t add_pc_encoding = AddRdnRmEncoding16(IP, PC); int32_t ldr_encoding = LoadWideOrFpEncoding(IP, 0); // DCHECKs type_. - buffer->Store<int16_t>(location_, movw_encoding >> 16); - buffer->Store<int16_t>(location_ + 2u, static_cast<int16_t>(movw_encoding & 0xffff)); - buffer->Store<int16_t>(location_ + 4u, movt_encoding >> 16); - buffer->Store<int16_t>(location_ + 6u, static_cast<int16_t>(movt_encoding & 0xffff)); - buffer->Store<int16_t>(location_ + 8u, add_pc_encoding); - buffer->Store<int16_t>(location_ + 10u, ldr_encoding >> 16); - buffer->Store<int16_t>(location_ + 12u, static_cast<int16_t>(ldr_encoding & 0xffff)); + buffer->Store<int16_t>(emit_location, movw_encoding >> 16); + buffer->Store<int16_t>(emit_location + 2u, static_cast<int16_t>(movw_encoding & 0xffff)); + buffer->Store<int16_t>(emit_location + 4u, movt_encoding >> 16); + buffer->Store<int16_t>(emit_location + 6u, static_cast<int16_t>(movt_encoding & 0xffff)); + buffer->Store<int16_t>(emit_location + 8u, add_pc_encoding); + buffer->Store<int16_t>(emit_location + 10u, ldr_encoding >> 16); + buffer->Store<int16_t>(emit_location + 12u, static_cast<int16_t>(ldr_encoding & 0xffff)); break; } } @@ -3331,6 +3361,19 @@ void Thumb2Assembler::bx(Register rm, Condition cond) { } +void Thumb2Assembler::AdrCode(Register rt, Label* label) { + uint32_t pc = buffer_.Size(); + FixupId branch_id = AddFixup(Fixup::LoadCodeAddress(pc, rt)); + CHECK(!label->IsBound()); + // ADR target must be an unbound label. Add it to a singly-linked list maintained within + // the code with the label serving as the head. + Emit16(static_cast<uint16_t>(label->position_)); + label->LinkTo(branch_id); + Emit16(0); + DCHECK_EQ(buffer_.Size() - pc, GetFixup(branch_id)->GetSizeInBytes()); +} + + void Thumb2Assembler::Push(Register rd, Condition cond) { str(rd, Address(SP, -kRegisterSize, Address::PreIndex), cond); } @@ -3405,7 +3448,7 @@ void Thumb2Assembler::Bind(Label* label) { break; } } - last_fixup.Emit(&buffer_, buffer_.Size()); + last_fixup.Emit(last_fixup.GetLocation(), &buffer_, buffer_.Size()); fixups_.pop_back(); } } diff --git a/compiler/utils/arm/assembler_thumb2.h b/compiler/utils/arm/assembler_thumb2.h index 1c495aa7a7..5c36110cf6 100644 --- a/compiler/utils/arm/assembler_thumb2.h +++ b/compiler/utils/arm/assembler_thumb2.h @@ -268,6 +268,9 @@ class Thumb2Assembler FINAL : public ArmAssembler { void blx(Register rm, Condition cond = AL) OVERRIDE; void bx(Register rm, Condition cond = AL) OVERRIDE; + // ADR instruction loading register for branching to the label, including the Thumb mode bit. + void AdrCode(Register rt, Label* label) OVERRIDE; + virtual void Lsl(Register rd, Register rm, uint32_t shift_imm, Condition cond = AL, SetCc set_cc = kCcDontCare) OVERRIDE; virtual void Lsr(Register rd, Register rm, uint32_t shift_imm, @@ -377,6 +380,10 @@ class Thumb2Assembler FINAL : public ArmAssembler { force_32bit_ = true; } + void Allow16Bit() { + force_32bit_ = false; + } + // Emit an ADR (or a sequence of instructions) to load the jump table address into base_reg. This // will generate a fixup. JumpTable* CreateJumpTable(std::vector<Label*>&& labels, Register base_reg) OVERRIDE; @@ -422,6 +429,7 @@ class Thumb2Assembler FINAL : public ArmAssembler { kUnconditionalLink, // BL. kUnconditionalLinkX, // BLX. kCompareAndBranchXZero, // cbz/cbnz. + kLoadCodeAddr, // Get address of a code label, used for Baker read barriers. kLoadLiteralNarrow, // Load narrrow integer literal. kLoadLiteralWide, // Load wide integer literal. kLoadLiteralAddr, // Load address of literal (used for jump table). @@ -442,6 +450,10 @@ class Thumb2Assembler FINAL : public ArmAssembler { kCbxz32Bit, // CMP rX, #0 + Bcc label; X < 8; 16-bit Bcc; +-8-bit offset. kCbxz48Bit, // CMP rX, #0 + Bcc label; X < 8; 32-bit Bcc; up to +-1MiB offset. + // ADR variants. + kCodeAddr4KiB, // ADR rX, <label>; label must be after the ADR but within 4KiB range. + // Multi-instruction expansion is not supported. + // Load integer literal variants. // LDR rX, label; X < 8; 16-bit variant up to 1KiB offset; 2 bytes. kLiteral1KiB, @@ -492,6 +504,12 @@ class Thumb2Assembler FINAL : public ArmAssembler { cond, kCompareAndBranchXZero, kCbxz16Bit, location); } + // Code address. + static Fixup LoadCodeAddress(uint32_t location, Register rt) { + return Fixup(rt, kNoRegister, kNoSRegister, kNoDRegister, + AL, kLoadCodeAddr, kCodeAddr4KiB, location); + } + // Load narrow literal. static Fixup LoadNarrowLiteral(uint32_t location, Register rt, Size size) { DCHECK(size == kLiteral1KiB || size == kLiteral4KiB || size == kLiteral64KiB || @@ -550,6 +568,7 @@ class Thumb2Assembler FINAL : public ArmAssembler { switch (GetOriginalSize()) { case kBranch32Bit: case kCbxz48Bit: + case kCodeAddr4KiB: case kLiteralFar: case kLiteralAddrFar: case kLongOrFPLiteralFar: @@ -623,7 +642,7 @@ class Thumb2Assembler FINAL : public ArmAssembler { // Emit the branch instruction into the assembler buffer. This does the // encoding into the thumb instruction. - void Emit(AssemblerBuffer* buffer, uint32_t code_size) const; + void Emit(uint32_t emit_location, AssemblerBuffer* buffer, uint32_t code_size) const; private: Fixup(Register rn, Register rt2, SRegister sd, DRegister dd, @@ -903,6 +922,24 @@ class Thumb2Assembler FINAL : public ArmAssembler { FixupId last_fixup_id_; }; +class ScopedForce32Bit { + public: + explicit ScopedForce32Bit(Thumb2Assembler* assembler) + : assembler_(assembler), old_force_32bit_(assembler->IsForced32Bit()) { + assembler->Force32Bit(); + } + + ~ScopedForce32Bit() { + if (!old_force_32bit_) { + assembler_->Allow16Bit(); + } + } + + private: + Thumb2Assembler* const assembler_; + const bool old_force_32bit_; +}; + } // namespace arm } // namespace art diff --git a/compiler/utils/mips64/assembler_mips64.cc b/compiler/utils/mips64/assembler_mips64.cc index 57223b52a3..f4afb33034 100644 --- a/compiler/utils/mips64/assembler_mips64.cc +++ b/compiler/utils/mips64/assembler_mips64.cc @@ -1356,6 +1356,106 @@ void Mips64Assembler::Mod_uD(VectorRegister wd, VectorRegister ws, VectorRegiste EmitMsa3R(0x7, 0x3, wt, ws, wd, 0x12); } +void Mips64Assembler::Add_aB(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x0, 0x0, wt, ws, wd, 0x10); +} + +void Mips64Assembler::Add_aH(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x0, 0x1, wt, ws, wd, 0x10); +} + +void Mips64Assembler::Add_aW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x0, 0x2, wt, ws, wd, 0x10); +} + +void Mips64Assembler::Add_aD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x0, 0x3, wt, ws, wd, 0x10); +} + +void Mips64Assembler::Ave_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x4, 0x0, wt, ws, wd, 0x10); +} + +void Mips64Assembler::Ave_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x4, 0x1, wt, ws, wd, 0x10); +} + +void Mips64Assembler::Ave_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x4, 0x2, wt, ws, wd, 0x10); +} + +void Mips64Assembler::Ave_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x4, 0x3, wt, ws, wd, 0x10); +} + +void Mips64Assembler::Ave_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x5, 0x0, wt, ws, wd, 0x10); +} + +void Mips64Assembler::Ave_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x5, 0x1, wt, ws, wd, 0x10); +} + +void Mips64Assembler::Ave_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x5, 0x2, wt, ws, wd, 0x10); +} + +void Mips64Assembler::Ave_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x5, 0x3, wt, ws, wd, 0x10); +} + +void Mips64Assembler::Aver_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x6, 0x0, wt, ws, wd, 0x10); +} + +void Mips64Assembler::Aver_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x6, 0x1, wt, ws, wd, 0x10); +} + +void Mips64Assembler::Aver_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x6, 0x2, wt, ws, wd, 0x10); +} + +void Mips64Assembler::Aver_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x6, 0x3, wt, ws, wd, 0x10); +} + +void Mips64Assembler::Aver_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x7, 0x0, wt, ws, wd, 0x10); +} + +void Mips64Assembler::Aver_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x7, 0x1, wt, ws, wd, 0x10); +} + +void Mips64Assembler::Aver_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x7, 0x2, wt, ws, wd, 0x10); +} + +void Mips64Assembler::Aver_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt) { + CHECK(HasMsa()); + EmitMsa3R(0x7, 0x3, wt, ws, wd, 0x10); +} + void Mips64Assembler::FaddW(VectorRegister wd, VectorRegister ws, VectorRegister wt) { CHECK(HasMsa()); EmitMsa3R(0x0, 0x0, wt, ws, wd, 0x1b); diff --git a/compiler/utils/mips64/assembler_mips64.h b/compiler/utils/mips64/assembler_mips64.h index 666c6935a1..6ac336178b 100644 --- a/compiler/utils/mips64/assembler_mips64.h +++ b/compiler/utils/mips64/assembler_mips64.h @@ -682,6 +682,26 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer void Mod_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt); void Mod_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt); void Mod_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Add_aB(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Add_aH(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Add_aW(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Add_aD(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Ave_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Ave_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Ave_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Ave_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Ave_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Ave_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Ave_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Ave_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Aver_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Aver_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Aver_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Aver_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Aver_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Aver_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Aver_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt); + void Aver_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt); void FaddW(VectorRegister wd, VectorRegister ws, VectorRegister wt); void FaddD(VectorRegister wd, VectorRegister ws, VectorRegister wt); diff --git a/compiler/utils/mips64/assembler_mips64_test.cc b/compiler/utils/mips64/assembler_mips64_test.cc index f2e3b1610c..084ce6fa08 100644 --- a/compiler/utils/mips64/assembler_mips64_test.cc +++ b/compiler/utils/mips64/assembler_mips64_test.cc @@ -2668,6 +2668,106 @@ TEST_F(AssemblerMIPS64Test, Mod_uD) { "mod_u.d"); } +TEST_F(AssemblerMIPS64Test, Add_aB) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Add_aB, "add_a.b ${reg1}, ${reg2}, ${reg3}"), + "add_a.b"); +} + +TEST_F(AssemblerMIPS64Test, Add_aH) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Add_aH, "add_a.h ${reg1}, ${reg2}, ${reg3}"), + "add_a.h"); +} + +TEST_F(AssemblerMIPS64Test, Add_aW) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Add_aW, "add_a.w ${reg1}, ${reg2}, ${reg3}"), + "add_a.w"); +} + +TEST_F(AssemblerMIPS64Test, Add_aD) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Add_aD, "add_a.d ${reg1}, ${reg2}, ${reg3}"), + "add_a.d"); +} + +TEST_F(AssemblerMIPS64Test, Ave_sB) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Ave_sB, "ave_s.b ${reg1}, ${reg2}, ${reg3}"), + "ave_s.b"); +} + +TEST_F(AssemblerMIPS64Test, Ave_sH) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Ave_sH, "ave_s.h ${reg1}, ${reg2}, ${reg3}"), + "ave_s.h"); +} + +TEST_F(AssemblerMIPS64Test, Ave_sW) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Ave_sW, "ave_s.w ${reg1}, ${reg2}, ${reg3}"), + "ave_s.w"); +} + +TEST_F(AssemblerMIPS64Test, Ave_sD) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Ave_sD, "ave_s.d ${reg1}, ${reg2}, ${reg3}"), + "ave_s.d"); +} + +TEST_F(AssemblerMIPS64Test, Ave_uB) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Ave_uB, "ave_u.b ${reg1}, ${reg2}, ${reg3}"), + "ave_u.b"); +} + +TEST_F(AssemblerMIPS64Test, Ave_uH) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Ave_uH, "ave_u.h ${reg1}, ${reg2}, ${reg3}"), + "ave_u.h"); +} + +TEST_F(AssemblerMIPS64Test, Ave_uW) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Ave_uW, "ave_u.w ${reg1}, ${reg2}, ${reg3}"), + "ave_u.w"); +} + +TEST_F(AssemblerMIPS64Test, Ave_uD) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Ave_uD, "ave_u.d ${reg1}, ${reg2}, ${reg3}"), + "ave_u.d"); +} + +TEST_F(AssemblerMIPS64Test, Aver_sB) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Aver_sB, "aver_s.b ${reg1}, ${reg2}, ${reg3}"), + "aver_s.b"); +} + +TEST_F(AssemblerMIPS64Test, Aver_sH) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Aver_sH, "aver_s.h ${reg1}, ${reg2}, ${reg3}"), + "aver_s.h"); +} + +TEST_F(AssemblerMIPS64Test, Aver_sW) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Aver_sW, "aver_s.w ${reg1}, ${reg2}, ${reg3}"), + "aver_s.w"); +} + +TEST_F(AssemblerMIPS64Test, Aver_sD) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Aver_sD, "aver_s.d ${reg1}, ${reg2}, ${reg3}"), + "aver_s.d"); +} + +TEST_F(AssemblerMIPS64Test, Aver_uB) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Aver_uB, "aver_u.b ${reg1}, ${reg2}, ${reg3}"), + "aver_u.b"); +} + +TEST_F(AssemblerMIPS64Test, Aver_uH) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Aver_uH, "aver_u.h ${reg1}, ${reg2}, ${reg3}"), + "aver_u.h"); +} + +TEST_F(AssemblerMIPS64Test, Aver_uW) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Aver_uW, "aver_u.w ${reg1}, ${reg2}, ${reg3}"), + "aver_u.w"); +} + +TEST_F(AssemblerMIPS64Test, Aver_uD) { + DriverStr(RepeatVVV(&mips64::Mips64Assembler::Aver_uD, "aver_u.d ${reg1}, ${reg2}, ${reg3}"), + "aver_u.d"); +} + TEST_F(AssemblerMIPS64Test, FaddW) { DriverStr(RepeatVVV(&mips64::Mips64Assembler::FaddW, "fadd.w ${reg1}, ${reg2}, ${reg3}"), "fadd.w"); diff --git a/compiler/utils/swap_space.h b/compiler/utils/swap_space.h index c286b820fe..0ff9fc69ed 100644 --- a/compiler/utils/swap_space.h +++ b/compiler/utils/swap_space.h @@ -78,7 +78,7 @@ class SwapSpace { mutable FreeByStartSet::const_iterator free_by_start_entry; }; struct FreeBySizeComparator { - bool operator()(const FreeBySizeEntry& lhs, const FreeBySizeEntry& rhs) { + bool operator()(const FreeBySizeEntry& lhs, const FreeBySizeEntry& rhs) const { if (lhs.size != rhs.size) { return lhs.size < rhs.size; } else { diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc index 5090c1192a..660409f6f9 100644 --- a/dex2oat/dex2oat.cc +++ b/dex2oat/dex2oat.cc @@ -268,20 +268,17 @@ NO_RETURN static void Usage(const char* fmt, ...) { UsageError(" Default: Optimizing"); UsageError(""); UsageError(" --compiler-filter=" - "(verify-none" - "|verify-at-runtime" - "|verify-profile" - "|interpret-only" - "|time" + "(assume-verified" + "|extract" + "|verify" + "|quicken" "|space-profile" "|space" - "|balanced" "|speed-profile" "|speed" "|everything-profile" "|everything):"); UsageError(" select compiler filter."); - UsageError(" verify-profile requires a --profile(-fd) to also be passed in."); UsageError(" Example: --compiler-filter=everything"); UsageError(" Default: speed"); UsageError(""); @@ -352,23 +349,23 @@ NO_RETURN static void Usage(const char* fmt, ...) { UsageError(" --profile-file-fd=<number>: same as --profile-file but accepts a file descriptor."); UsageError(" Cannot be used together with --profile-file."); UsageError(""); - UsageError(" --swap-file=<file-name>: specifies a file to use for swap."); + UsageError(" --swap-file=<file-name>: specifies a file to use for swap."); UsageError(" Example: --swap-file=/data/tmp/swap.001"); UsageError(""); - UsageError(" --swap-fd=<file-descriptor>: specifies a file to use for swap (by descriptor)."); + UsageError(" --swap-fd=<file-descriptor>: specifies a file to use for swap (by descriptor)."); UsageError(" Example: --swap-fd=10"); UsageError(""); - UsageError(" --swap-dex-size-threshold=<size>: specifies the minimum total dex file size in"); + UsageError(" --swap-dex-size-threshold=<size>: specifies the minimum total dex file size in"); UsageError(" bytes to allow the use of swap."); UsageError(" Example: --swap-dex-size-threshold=1000000"); UsageError(" Default: %zu", kDefaultMinDexFileCumulativeSizeForSwap); UsageError(""); - UsageError(" --swap-dex-count-threshold=<count>: specifies the minimum number of dex files to"); + UsageError(" --swap-dex-count-threshold=<count>: specifies the minimum number of dex files to"); UsageError(" allow the use of swap."); UsageError(" Example: --swap-dex-count-threshold=10"); UsageError(" Default: %zu", kDefaultMinDexFilesForSwap); UsageError(""); - UsageError(" --very-large-app-threshold=<size>: specifies the minimum total dex file size in"); + UsageError(" --very-large-app-threshold=<size>: specifies the minimum total dex file size in"); UsageError(" bytes to consider the input \"very large\" and punt on the compilation."); UsageError(" Example: --very-large-app-threshold=100000000"); UsageError(""); @@ -383,6 +380,14 @@ NO_RETURN static void Usage(const char* fmt, ...) { UsageError(""); UsageError(" --force-determinism: force the compiler to emit a deterministic output."); UsageError(""); + UsageError(" --dump-cfg=<cfg-file>: dump control-flow graphs (CFGs) to specified file."); + UsageError(" Example: --dump-cfg=output.cfg"); + UsageError(""); + UsageError(" --dump-cfg-append: when dumping CFGs to an existing file, append new CFG data to"); + UsageError(" existing data (instead of overwriting existing data with new data, which is"); + UsageError(" the default behavior). This option is only meaningful when used with"); + UsageError(" --dump-cfg."); + UsageError(""); UsageError(" --classpath-dir=<directory-path>: directory used to resolve relative class paths."); UsageError(""); std::cerr << "See log for usage error information\n"; @@ -721,6 +726,10 @@ class Dex2Oat FINAL { Usage("Can't have both --input-vdex-fd and --input-vdex"); } + if (output_vdex_fd_ != -1 && !output_vdex_.empty()) { + Usage("Can't have both --output-vdex-fd and --output-vdex"); + } + if (!oat_filenames_.empty() && oat_fd_ != -1) { Usage("--oat-file should not be used with --oat-fd"); } @@ -1125,6 +1134,8 @@ class Dex2Oat FINAL { ParseInputVdexFd(option); } else if (option.starts_with("--input-vdex=")) { input_vdex_ = option.substr(strlen("--input-vdex=")).data(); + } else if (option.starts_with("--output-vdex=")) { + output_vdex_ = option.substr(strlen("--output-vdex=")).data(); } else if (option.starts_with("--output-vdex-fd=")) { ParseOutputVdexFd(option); } else if (option.starts_with("--oat-file=")) { @@ -1260,6 +1271,7 @@ class Dex2Oat FINAL { } // OAT and VDEX file handling + bool eagerly_unquicken_vdex = DoDexLayoutOptimizations(); if (oat_fd_ == -1) { DCHECK(!oat_filenames_.empty()); @@ -1281,12 +1293,15 @@ class Dex2Oat FINAL { input_vdex_file_ = VdexFile::Open(input_vdex_, /* writable */ false, /* low_4gb */ false, + eagerly_unquicken_vdex, &error_msg); } DCHECK_EQ(output_vdex_fd_, -1); - std::string vdex_filename = ReplaceFileExtension(oat_filename, "vdex"); - if (vdex_filename == input_vdex_) { + std::string vdex_filename = output_vdex_.empty() + ? ReplaceFileExtension(oat_filename, "vdex") + : output_vdex_; + if (vdex_filename == input_vdex_ && output_vdex_.empty()) { update_input_vdex_ = true; std::unique_ptr<File> vdex_file(OS::OpenFileReadWrite(vdex_filename.c_str())); vdex_files_.push_back(std::move(vdex_file)); @@ -1328,6 +1343,7 @@ class Dex2Oat FINAL { "vdex", /* writable */ false, /* low_4gb */ false, + eagerly_unquicken_vdex, &error_msg); // If there's any problem with the passed vdex, just warn and proceed // without it. @@ -1358,6 +1374,26 @@ class Dex2Oat FINAL { oat_filenames_.push_back(oat_location_.c_str()); } + // If we're updating in place a vdex file, be defensive and put an invalid vdex magic in case + // dex2oat gets killed. + // Note: we're only invalidating the magic data in the file, as dex2oat needs the rest of + // the information to remain valid. + if (update_input_vdex_) { + std::unique_ptr<BufferedOutputStream> vdex_out(MakeUnique<BufferedOutputStream>( + MakeUnique<FileOutputStream>(vdex_files_.back().get()))); + if (!vdex_out->WriteFully(&VdexFile::Header::kVdexInvalidMagic, + arraysize(VdexFile::Header::kVdexInvalidMagic))) { + PLOG(ERROR) << "Failed to invalidate vdex header. File: " << vdex_out->GetLocation(); + return false; + } + + if (!vdex_out->Flush()) { + PLOG(ERROR) << "Failed to flush stream after invalidating header of vdex file." + << " File: " << vdex_out->GetLocation(); + return false; + } + } + // Swap file handling // // If the swap fd is not -1, we assume this is the file descriptor of an open but unlinked file @@ -1570,14 +1606,14 @@ class Dex2Oat FINAL { // If we need to downgrade the compiler-filter for size reasons, do that check now. if (!IsBootImage() && IsVeryLarge(dex_files_)) { - if (!CompilerFilter::IsAsGoodAs(CompilerFilter::kVerifyAtRuntime, + if (!CompilerFilter::IsAsGoodAs(CompilerFilter::kExtract, compiler_options_->GetCompilerFilter())) { - LOG(INFO) << "Very large app, downgrading to verify-at-runtime."; + LOG(INFO) << "Very large app, downgrading to extract."; // Note: this change won't be reflected in the key-value store, as that had to be // finalized before loading the dex files. This setup is currently required // to get the size from the DexFile objects. // TODO: refactor. b/29790079 - compiler_options_->SetCompilerFilter(CompilerFilter::kVerifyAtRuntime); + compiler_options_->SetCompilerFilter(CompilerFilter::kExtract); } } @@ -2085,17 +2121,13 @@ class Dex2Oat FINAL { } bool DoProfileGuidedOptimizations() const { - return UseProfile() && compiler_options_->GetCompilerFilter() != CompilerFilter::kVerifyProfile; + return UseProfile(); } bool DoDexLayoutOptimizations() const { return DoProfileGuidedOptimizations(); } - bool HasInputVdexFile() const { - return input_vdex_file_ != nullptr || input_vdex_fd_ != -1 || !input_vdex_.empty(); - } - bool LoadProfile() { DCHECK(UseProfile()); @@ -2151,16 +2183,6 @@ class Dex2Oat FINAL { return dex_files_size >= very_large_threshold_; } - template <typename T> - static std::vector<T*> MakeNonOwningPointerVector(const std::vector<std::unique_ptr<T>>& src) { - std::vector<T*> result; - result.reserve(src.size()); - for (const std::unique_ptr<T>& t : src) { - result.push_back(t.get()); - } - return result; - } - std::vector<std::string> GetClassPathLocations(const std::string& class_path) { // This function is used only for apps and for an app we have exactly one oat file. DCHECK(!IsBootImage()); @@ -2412,6 +2434,8 @@ class Dex2Oat FINAL { if (!IsBootImage()) { raw_options.push_back(std::make_pair("-Xno-dex-file-fallback", nullptr)); } + // Never allow implicit image compilation. + raw_options.push_back(std::make_pair("-Xnoimage-dex2oat", nullptr)); // Disable libsigchain. We don't don't need it during compilation and it prevents us // from getting a statically linked version of dex2oat (because of dlsym and RTLD_NEXT). raw_options.push_back(std::make_pair("-Xno-sig-chain", nullptr)); @@ -2429,8 +2453,8 @@ class Dex2Oat FINAL { // which uses an unstarted runtime. raw_options.push_back(std::make_pair("-Xgc:nonconcurrent", nullptr)); - // Also force the free-list implementation for large objects. - raw_options.push_back(std::make_pair("-XX:LargeObjectSpace=freelist", nullptr)); + // The default LOS implementation (map) is not deterministic. So disable it. + raw_options.push_back(std::make_pair("-XX:LargeObjectSpace=disabled", nullptr)); // We also need to turn off the nonmoving space. For that, we need to disable HSpace // compaction (done above) and ensure that neither foreground nor background collectors @@ -2693,6 +2717,7 @@ class Dex2Oat FINAL { int input_vdex_fd_; int output_vdex_fd_; std::string input_vdex_; + std::string output_vdex_; std::unique_ptr<VdexFile> input_vdex_file_; std::vector<const char*> dex_filenames_; std::vector<const char*> dex_locations_; @@ -2899,13 +2924,6 @@ static dex2oat::ReturnCode Dex2oat(int argc, char** argv) { } } - if (dex2oat->DoDexLayoutOptimizations()) { - if (dex2oat->HasInputVdexFile()) { - LOG(ERROR) << "Dexlayout is incompatible with an input VDEX"; - return dex2oat::ReturnCode::kOther; - } - } - art::MemMap::Init(); // For ZipEntry::ExtractToMemMap, and vdex. // Check early that the result of compilation can be written diff --git a/dex2oat/dex2oat_test.cc b/dex2oat/dex2oat_test.cc index 8c14b50094..6420aa8759 100644 --- a/dex2oat/dex2oat_test.cc +++ b/dex2oat/dex2oat_test.cc @@ -161,7 +161,7 @@ class Dex2oatTest : public Dex2oatEnvironmentTest { runtime->AddCurrentRuntimeFeaturesAsDex2OatArguments(&argv); if (!runtime->IsVerificationEnabled()) { - argv.push_back("--compiler-filter=verify-none"); + argv.push_back("--compiler-filter=assume-verified"); } if (runtime->MustRelocateIfPossible()) { @@ -430,6 +430,9 @@ class Dex2oatSwapUseTest : public Dex2oatSwapTest { }; TEST_F(Dex2oatSwapUseTest, CheckSwapUsage) { + // Native memory usage isn't correctly tracked under sanitization. + TEST_DISABLED_FOR_MEMORY_TOOL_ASAN(); + // The `native_alloc_2_ >= native_alloc_1_` assertion below may not // hold true on some x86 systems; disable this test while we // investigate (b/29259363). @@ -514,7 +517,7 @@ class Dex2oatVeryLargeTest : public Dex2oatTest { } // If the input filter was "below," it should have been used. - if (!CompilerFilter::IsAsGoodAs(CompilerFilter::kVerifyAtRuntime, filter)) { + if (!CompilerFilter::IsAsGoodAs(CompilerFilter::kExtract, filter)) { EXPECT_EQ(odex_file->GetCompilerFilter(), filter); } } else { @@ -536,11 +539,11 @@ class Dex2oatVeryLargeTest : public Dex2oatTest { void CheckHostResult(bool expect_large) { if (!kIsTargetBuild) { if (expect_large) { - EXPECT_NE(output_.find("Very large app, downgrading to verify-at-runtime."), + EXPECT_NE(output_.find("Very large app, downgrading to extract."), std::string::npos) << output_; } else { - EXPECT_EQ(output_.find("Very large app, downgrading to verify-at-runtime."), + EXPECT_EQ(output_.find("Very large app, downgrading to extract."), std::string::npos) << output_; } @@ -567,21 +570,21 @@ class Dex2oatVeryLargeTest : public Dex2oatTest { }; TEST_F(Dex2oatVeryLargeTest, DontUseVeryLarge) { - RunTest(CompilerFilter::kVerifyNone, false); - RunTest(CompilerFilter::kVerifyAtRuntime, false); - RunTest(CompilerFilter::kInterpretOnly, false); + RunTest(CompilerFilter::kAssumeVerified, false); + RunTest(CompilerFilter::kExtract, false); + RunTest(CompilerFilter::kQuicken, false); RunTest(CompilerFilter::kSpeed, false); - RunTest(CompilerFilter::kVerifyNone, false, { "--very-large-app-threshold=1000000" }); - RunTest(CompilerFilter::kVerifyAtRuntime, false, { "--very-large-app-threshold=1000000" }); - RunTest(CompilerFilter::kInterpretOnly, false, { "--very-large-app-threshold=1000000" }); + RunTest(CompilerFilter::kAssumeVerified, false, { "--very-large-app-threshold=1000000" }); + RunTest(CompilerFilter::kExtract, false, { "--very-large-app-threshold=1000000" }); + RunTest(CompilerFilter::kQuicken, false, { "--very-large-app-threshold=1000000" }); RunTest(CompilerFilter::kSpeed, false, { "--very-large-app-threshold=1000000" }); } TEST_F(Dex2oatVeryLargeTest, UseVeryLarge) { - RunTest(CompilerFilter::kVerifyNone, false, { "--very-large-app-threshold=100" }); - RunTest(CompilerFilter::kVerifyAtRuntime, false, { "--very-large-app-threshold=100" }); - RunTest(CompilerFilter::kInterpretOnly, true, { "--very-large-app-threshold=100" }); + RunTest(CompilerFilter::kAssumeVerified, false, { "--very-large-app-threshold=100" }); + RunTest(CompilerFilter::kExtract, false, { "--very-large-app-threshold=100" }); + RunTest(CompilerFilter::kQuicken, true, { "--very-large-app-threshold=100" }); RunTest(CompilerFilter::kSpeed, true, { "--very-large-app-threshold=100" }); } @@ -736,12 +739,12 @@ class Dex2oatLayoutTest : public Dex2oatTest { /* use_fd */ true, /* num_profile_classes */ 1, { input_vdex, output_vdex }, - /* expect_success */ false); - EXPECT_EQ(vdex_file2.GetFile()->GetLength(), 0u); + /* expect_success */ true); + EXPECT_GT(vdex_file2.GetFile()->GetLength(), 0u); } ASSERT_EQ(vdex_file1->FlushCloseOrErase(), 0) << "Could not flush and close vdex file"; CheckValidity(); - ASSERT_FALSE(success_); + ASSERT_TRUE(success_); } void CheckResult(const std::string& dex_location, @@ -884,6 +887,7 @@ class Dex2oatReturnCodeTest : public Dex2oatTest { }; TEST_F(Dex2oatReturnCodeTest, TestCreateRuntime) { + TEST_DISABLED_FOR_MEMORY_TOOL(); // b/19100793 int status = RunTest({ "--boot-image=/this/does/not/exist/yolo.oat" }); EXPECT_EQ(static_cast<int>(dex2oat::ReturnCode::kCreateRuntime), WEXITSTATUS(status)) << output_; } diff --git a/dexlayout/Android.bp b/dexlayout/Android.bp index 4b65c5299a..588a3ae3ca 100644 --- a/dexlayout/Android.bp +++ b/dexlayout/Android.bp @@ -20,7 +20,7 @@ art_cc_defaults { "dexlayout.cc", "dex_ir.cc", "dex_ir_builder.cc", - "dex_verify.cc", + "dex_verify.cc", "dex_visualize.cc", "dex_writer.cc", ], @@ -43,12 +43,14 @@ art_cc_library { art_cc_binary { name: "dexlayout", + defaults: ["art_defaults"], host_supported: true, srcs: ["dexlayout_main.cc"], cflags: ["-Wall"], shared_libs: [ "libart", "libart-dexlayout", + "libbase", ], } @@ -60,13 +62,28 @@ art_cc_test { art_cc_binary { name: "dexdiag", - host_supported: false, + defaults: ["art_defaults"], + host_supported: true, srcs: ["dexdiag.cc"], cflags: ["-Wall"], shared_libs: [ "libart", "libart-dexlayout", - "libpagemap", ], + target: { + android: { + shared_libs: [ + "libpagemap", + ] + }, + } } +art_cc_test { + name: "art_dexdiag_tests", + host_supported: true, + defaults: [ + "art_gtest_defaults", + ], + srcs: ["dexdiag_test.cc"], +} diff --git a/dexlayout/dex_ir.cc b/dexlayout/dex_ir.cc index 6bd9da8194..cf453b9a16 100644 --- a/dexlayout/dex_ir.cc +++ b/dexlayout/dex_ir.cc @@ -281,6 +281,16 @@ void Collections::ReadEncodedValue( item->SetDouble(conv.d); break; } + case DexFile::kDexAnnotationMethodType: { + const uint32_t proto_index = static_cast<uint32_t>(ReadVarWidth(data, length, false)); + item->SetProtoId(GetProtoId(proto_index)); + break; + } + case DexFile::kDexAnnotationMethodHandle: { + const uint32_t method_handle_index = static_cast<uint32_t>(ReadVarWidth(data, length, false)); + item->SetMethodHandle(GetMethodHandle(method_handle_index)); + break; + } case DexFile::kDexAnnotationString: { const uint32_t string_index = static_cast<uint32_t>(ReadVarWidth(data, length, false)); item->SetStringId(GetStringId(string_index)); @@ -451,8 +461,8 @@ AnnotationItem* Collections::CreateAnnotationItem(const DexFile::AnnotationItem* } uint8_t visibility = annotation->visibility_; const uint8_t* annotation_data = annotation->annotation_; - EncodedValue* encoded_value = - ReadEncodedValue(&annotation_data, DexFile::kDexAnnotationAnnotation, 0); + std::unique_ptr<EncodedValue> encoded_value( + ReadEncodedValue(&annotation_data, DexFile::kDexAnnotationAnnotation, 0)); // TODO: Calculate the size of the annotation. AnnotationItem* annotation_item = new AnnotationItem(visibility, encoded_value->ReleaseEncodedAnnotation()); @@ -708,10 +718,12 @@ MethodItem* Collections::GenerateMethodItem(const DexFile& dex_file, ClassDataIt MethodId* method_item = GetMethodId(cdii.GetMemberIndex()); uint32_t access_flags = cdii.GetRawMemberAccessFlags(); const DexFile::CodeItem* disk_code_item = cdii.GetMethodCodeItem(); - CodeItem* code_item = nullptr; + CodeItem* code_item = code_items_.GetExistingObject(cdii.GetMethodCodeItemOffset());; DebugInfoItem* debug_info = nullptr; if (disk_code_item != nullptr) { - code_item = CreateCodeItem(dex_file, *disk_code_item, cdii.GetMethodCodeItemOffset()); + if (code_item == nullptr) { + code_item = CreateCodeItem(dex_file, *disk_code_item, cdii.GetMethodCodeItemOffset()); + } debug_info = code_item->DebugInfo(); } if (debug_info != nullptr) { @@ -764,6 +776,64 @@ ClassData* Collections::CreateClassData( return class_data; } +void Collections::CreateCallSitesAndMethodHandles(const DexFile& dex_file) { + // Iterate through the map list and set the offset of the CallSiteIds and MethodHandleItems. + const DexFile::MapList* map = + reinterpret_cast<const DexFile::MapList*>(dex_file.Begin() + MapListOffset()); + for (uint32_t i = 0; i < map->size_; ++i) { + const DexFile::MapItem* item = map->list_ + i; + switch (item->type_) { + case DexFile::kDexTypeCallSiteIdItem: + SetCallSiteIdsOffset(item->offset_); + break; + case DexFile::kDexTypeMethodHandleItem: + SetMethodHandleItemsOffset(item->offset_); + break; + default: + break; + } + } + // Populate MethodHandleItems first (CallSiteIds may depend on them). + for (uint32_t i = 0; i < dex_file.NumMethodHandles(); i++) { + CreateMethodHandleItem(dex_file, i); + } + // Populate CallSiteIds. + for (uint32_t i = 0; i < dex_file.NumCallSiteIds(); i++) { + CreateCallSiteId(dex_file, i); + } +} + +void Collections::CreateCallSiteId(const DexFile& dex_file, uint32_t i) { + const DexFile::CallSiteIdItem& disk_call_site_id = dex_file.GetCallSiteId(i); + const uint8_t* disk_call_item_ptr = dex_file.Begin() + disk_call_site_id.data_off_; + EncodedArrayItem* call_site_item = + CreateEncodedArrayItem(disk_call_item_ptr, disk_call_site_id.data_off_); + + CallSiteId* call_site_id = new CallSiteId(call_site_item); + call_site_ids_.AddIndexedItem(call_site_id, CallSiteIdsOffset() + i * CallSiteId::ItemSize(), i); +} + +void Collections::CreateMethodHandleItem(const DexFile& dex_file, uint32_t i) { + const DexFile::MethodHandleItem& disk_method_handle = dex_file.GetMethodHandle(i); + uint16_t index = disk_method_handle.field_or_method_idx_; + DexFile::MethodHandleType type = + static_cast<DexFile::MethodHandleType>(disk_method_handle.method_handle_type_); + bool is_invoke = type == DexFile::MethodHandleType::kInvokeStatic || + type == DexFile::MethodHandleType::kInvokeInstance || + type == DexFile::MethodHandleType::kInvokeConstructor; + static_assert(DexFile::MethodHandleType::kLast == DexFile::MethodHandleType::kInvokeConstructor, + "Unexpected method handle types."); + IndexedItem* field_or_method_id; + if (is_invoke) { + field_or_method_id = GetMethodId(index); + } else { + field_or_method_id = GetFieldId(index); + } + MethodHandleItem* method_handle = new MethodHandleItem(type, field_or_method_id); + method_handle_items_.AddIndexedItem( + method_handle, MethodHandleItemsOffset() + i * MethodHandleItem::ItemSize(), i); +} + static uint32_t HeaderOffset(const dex_ir::Collections& collections ATTRIBUTE_UNUSED) { return 0; } @@ -821,6 +891,16 @@ static const FileSectionDescriptor kFileSectionDescriptors[] = { &dex_ir::Collections::ClassDefsSize, &dex_ir::Collections::ClassDefsOffset }, { + "CallSiteId", + DexFile::kDexTypeCallSiteIdItem, + &dex_ir::Collections::CallSiteIdsSize, + &dex_ir::Collections::CallSiteIdsOffset + }, { + "MethodHandle", + DexFile::kDexTypeMethodHandleItem, + &dex_ir::Collections::MethodHandleItemsSize, + &dex_ir::Collections::MethodHandleItemsOffset + }, { "StringData", DexFile::kDexTypeStringDataItem, &dex_ir::Collections::StringDatasSize, diff --git a/dexlayout/dex_ir.h b/dexlayout/dex_ir.h index cad039550a..5692eb2b39 100644 --- a/dexlayout/dex_ir.h +++ b/dexlayout/dex_ir.h @@ -35,6 +35,7 @@ class AnnotationItem; class AnnotationsDirectoryItem; class AnnotationSetItem; class AnnotationSetRefList; +class CallSiteId; class ClassData; class ClassDef; class CodeItem; @@ -47,6 +48,7 @@ class FieldItem; class Header; class MapList; class MapItem; +class MethodHandleItem; class MethodId; class MethodItem; class ParameterAnnotation; @@ -65,6 +67,8 @@ static constexpr size_t kProtoIdItemSize = 12; static constexpr size_t kFieldIdItemSize = 8; static constexpr size_t kMethodIdItemSize = 8; static constexpr size_t kClassDefItemSize = 32; +static constexpr size_t kCallSiteIdItemSize = 4; +static constexpr size_t kMethodHandleItemSize = 8; // Visitor support class AbstractDispatcher { @@ -79,6 +83,8 @@ class AbstractDispatcher { virtual void Dispatch(const ProtoId* proto_id) = 0; virtual void Dispatch(const FieldId* field_id) = 0; virtual void Dispatch(const MethodId* method_id) = 0; + virtual void Dispatch(const CallSiteId* call_site_id) = 0; + virtual void Dispatch(const MethodHandleItem* method_handle_item) = 0; virtual void Dispatch(ClassData* class_data) = 0; virtual void Dispatch(ClassDef* class_def) = 0; virtual void Dispatch(FieldItem* field_item) = 0; @@ -165,6 +171,9 @@ class Collections { std::vector<std::unique_ptr<FieldId>>& FieldIds() { return field_ids_.Collection(); } std::vector<std::unique_ptr<MethodId>>& MethodIds() { return method_ids_.Collection(); } std::vector<std::unique_ptr<ClassDef>>& ClassDefs() { return class_defs_.Collection(); } + std::vector<std::unique_ptr<CallSiteId>>& CallSiteIds() { return call_site_ids_.Collection(); } + std::vector<std::unique_ptr<MethodHandleItem>>& MethodHandleItems() + { return method_handle_items_.Collection(); } std::map<uint32_t, std::unique_ptr<StringData>>& StringDatas() { return string_datas_.Collection(); } std::map<uint32_t, std::unique_ptr<TypeList>>& TypeLists() { return type_lists_.Collection(); } @@ -189,6 +198,10 @@ class Collections { void CreateFieldId(const DexFile& dex_file, uint32_t i); void CreateMethodId(const DexFile& dex_file, uint32_t i); void CreateClassDef(const DexFile& dex_file, uint32_t i); + void CreateCallSiteId(const DexFile& dex_file, uint32_t i); + void CreateMethodHandleItem(const DexFile& dex_file, uint32_t i); + + void CreateCallSitesAndMethodHandles(const DexFile& dex_file); TypeList* CreateTypeList(const DexFile::TypeList* type_list, uint32_t offset); EncodedArrayItem* CreateEncodedArrayItem(const uint8_t* static_data, uint32_t offset); @@ -207,6 +220,8 @@ class Collections { FieldId* GetFieldId(uint32_t index) { return FieldIds()[index].get(); } MethodId* GetMethodId(uint32_t index) { return MethodIds()[index].get(); } ClassDef* GetClassDef(uint32_t index) { return ClassDefs()[index].get(); } + CallSiteId* GetCallSiteId(uint32_t index) { return CallSiteIds()[index].get(); } + MethodHandleItem* GetMethodHandle(uint32_t index) { return MethodHandleItems()[index].get(); } StringId* GetStringIdOrNullPtr(uint32_t index) { return index == DexFile::kDexNoIndex ? nullptr : GetStringId(index); @@ -221,6 +236,8 @@ class Collections { uint32_t FieldIdsOffset() const { return field_ids_.GetOffset(); } uint32_t MethodIdsOffset() const { return method_ids_.GetOffset(); } uint32_t ClassDefsOffset() const { return class_defs_.GetOffset(); } + uint32_t CallSiteIdsOffset() const { return call_site_ids_.GetOffset(); } + uint32_t MethodHandleItemsOffset() const { return method_handle_items_.GetOffset(); } uint32_t StringDatasOffset() const { return string_datas_.GetOffset(); } uint32_t TypeListsOffset() const { return type_lists_.GetOffset(); } uint32_t EncodedArrayItemsOffset() const { return encoded_array_items_.GetOffset(); } @@ -240,6 +257,9 @@ class Collections { void SetFieldIdsOffset(uint32_t new_offset) { field_ids_.SetOffset(new_offset); } void SetMethodIdsOffset(uint32_t new_offset) { method_ids_.SetOffset(new_offset); } void SetClassDefsOffset(uint32_t new_offset) { class_defs_.SetOffset(new_offset); } + void SetCallSiteIdsOffset(uint32_t new_offset) { call_site_ids_.SetOffset(new_offset); } + void SetMethodHandleItemsOffset(uint32_t new_offset) + { method_handle_items_.SetOffset(new_offset); } void SetStringDatasOffset(uint32_t new_offset) { string_datas_.SetOffset(new_offset); } void SetTypeListsOffset(uint32_t new_offset) { type_lists_.SetOffset(new_offset); } void SetEncodedArrayItemsOffset(uint32_t new_offset) @@ -262,6 +282,8 @@ class Collections { uint32_t FieldIdsSize() const { return field_ids_.Size(); } uint32_t MethodIdsSize() const { return method_ids_.Size(); } uint32_t ClassDefsSize() const { return class_defs_.Size(); } + uint32_t CallSiteIdsSize() const { return call_site_ids_.Size(); } + uint32_t MethodHandleItemsSize() const { return method_handle_items_.Size(); } uint32_t StringDatasSize() const { return string_datas_.Size(); } uint32_t TypeListsSize() const { return type_lists_.Size(); } uint32_t EncodedArrayItemsSize() const { return encoded_array_items_.Size(); } @@ -288,6 +310,8 @@ class Collections { CollectionVector<FieldId> field_ids_; CollectionVector<MethodId> method_ids_; CollectionVector<ClassDef> class_defs_; + CollectionVector<CallSiteId> call_site_ids_; + CollectionVector<MethodHandleItem> method_handle_items_; CollectionMap<StringData> string_datas_; CollectionMap<TypeList> type_lists_; @@ -603,8 +627,10 @@ class EncodedValue { void SetDouble(double d) { u_.double_val_ = d; } void SetStringId(StringId* string_id) { u_.string_val_ = string_id; } void SetTypeId(TypeId* type_id) { u_.type_val_ = type_id; } + void SetProtoId(ProtoId* proto_id) { u_.proto_val_ = proto_id; } void SetFieldId(FieldId* field_id) { u_.field_val_ = field_id; } void SetMethodId(MethodId* method_id) { u_.method_val_ = method_id; } + void SetMethodHandle(MethodHandleItem* method_handle) { u_.method_handle_val_ = method_handle; } void SetEncodedArray(EncodedArrayItem* encoded_array) { encoded_array_.reset(encoded_array); } void SetEncodedAnnotation(EncodedAnnotation* encoded_annotation) { encoded_annotation_.reset(encoded_annotation); } @@ -619,8 +645,10 @@ class EncodedValue { double GetDouble() const { return u_.double_val_; } StringId* GetStringId() const { return u_.string_val_; } TypeId* GetTypeId() const { return u_.type_val_; } + ProtoId* GetProtoId() const { return u_.proto_val_; } FieldId* GetFieldId() const { return u_.field_val_; } MethodId* GetMethodId() const { return u_.method_val_; } + MethodHandleItem* GetMethodHandle() const { return u_.method_handle_val_; } EncodedArrayItem* GetEncodedArray() const { return encoded_array_.get(); } EncodedAnnotation* GetEncodedAnnotation() const { return encoded_annotation_.get(); } @@ -639,8 +667,10 @@ class EncodedValue { double double_val_; StringId* string_val_; TypeId* type_val_; + ProtoId* proto_val_; FieldId* field_val_; MethodId* method_val_; + MethodHandleItem* method_handle_val_; } u_; std::unique_ptr<EncodedArrayItem> encoded_array_; std::unique_ptr<EncodedAnnotation> encoded_annotation_; @@ -1087,6 +1117,48 @@ class AnnotationsDirectoryItem : public Item { DISALLOW_COPY_AND_ASSIGN(AnnotationsDirectoryItem); }; +class CallSiteId : public IndexedItem { + public: + explicit CallSiteId(EncodedArrayItem* call_site_item) : call_site_item_(call_site_item) { + size_ = kCallSiteIdItemSize; + } + ~CallSiteId() OVERRIDE { } + + static size_t ItemSize() { return kCallSiteIdItemSize; } + + EncodedArrayItem* CallSiteItem() const { return call_site_item_; } + + void Accept(AbstractDispatcher* dispatch) const { dispatch->Dispatch(this); } + + private: + EncodedArrayItem* call_site_item_; + + DISALLOW_COPY_AND_ASSIGN(CallSiteId); +}; + +class MethodHandleItem : public IndexedItem { + public: + MethodHandleItem(DexFile::MethodHandleType method_handle_type, IndexedItem* field_or_method_id) + : method_handle_type_(method_handle_type), + field_or_method_id_(field_or_method_id) { + size_ = kMethodHandleItemSize; + } + ~MethodHandleItem() OVERRIDE { } + + static size_t ItemSize() { return kMethodHandleItemSize; } + + DexFile::MethodHandleType GetMethodHandleType() const { return method_handle_type_; } + IndexedItem* GetFieldOrMethodId() const { return field_or_method_id_; } + + void Accept(AbstractDispatcher* dispatch) const { dispatch->Dispatch(this); } + + private: + DexFile::MethodHandleType method_handle_type_; + IndexedItem* field_or_method_id_; + + DISALLOW_COPY_AND_ASSIGN(MethodHandleItem); +}; + // TODO(sehr): implement MapList. class MapList : public Item { public: diff --git a/dexlayout/dex_ir_builder.cc b/dexlayout/dex_ir_builder.cc index d0c5bf964e..8eb726a64a 100644 --- a/dexlayout/dex_ir_builder.cc +++ b/dexlayout/dex_ir_builder.cc @@ -72,6 +72,8 @@ Header* DexIrBuilder(const DexFile& dex_file) { } // MapItem. collections.SetMapListOffset(disk_header.map_off_); + // CallSiteIds and MethodHandleItems. + collections.CreateCallSitesAndMethodHandles(dex_file); CheckAndSetRemainingOffsets(dex_file, &collections); @@ -115,6 +117,14 @@ static void CheckAndSetRemainingOffsets(const DexFile& dex_file, Collections* co CHECK_EQ(item->size_, collections->ClassDefsSize()); CHECK_EQ(item->offset_, collections->ClassDefsOffset()); break; + case DexFile::kDexTypeCallSiteIdItem: + CHECK_EQ(item->size_, collections->CallSiteIdsSize()); + CHECK_EQ(item->offset_, collections->CallSiteIdsOffset()); + break; + case DexFile::kDexTypeMethodHandleItem: + CHECK_EQ(item->size_, collections->MethodHandleItemsSize()); + CHECK_EQ(item->offset_, collections->MethodHandleItemsOffset()); + break; case DexFile::kDexTypeMapList: CHECK_EQ(item->size_, 1u); CHECK_EQ(item->offset_, disk_header.map_off_); diff --git a/dexlayout/dex_writer.cc b/dexlayout/dex_writer.cc index 7ffa38bfd4..e1b828ca52 100644 --- a/dexlayout/dex_writer.cc +++ b/dexlayout/dex_writer.cc @@ -151,6 +151,12 @@ size_t DexWriter::WriteEncodedValue(dex_ir::EncodedValue* encoded_value, size_t length = EncodeDoubleValue(encoded_value->GetDouble(), buffer); start = 8 - length; break; + case DexFile::kDexAnnotationMethodType: + length = EncodeUIntValue(encoded_value->GetProtoId()->GetIndex(), buffer); + break; + case DexFile::kDexAnnotationMethodHandle: + length = EncodeUIntValue(encoded_value->GetMethodHandle()->GetIndex(), buffer); + break; case DexFile::kDexAnnotationString: length = EncodeUIntValue(encoded_value->GetStringId()->GetIndex(), buffer); break; @@ -485,6 +491,27 @@ void DexWriter::WriteClasses() { } } +void DexWriter::WriteCallSites() { + uint32_t call_site_off[1]; + for (std::unique_ptr<dex_ir::CallSiteId>& call_site_id : + header_->GetCollections().CallSiteIds()) { + call_site_off[0] = call_site_id->CallSiteItem()->GetOffset(); + Write(call_site_off, call_site_id->GetSize(), call_site_id->GetOffset()); + } +} + +void DexWriter::WriteMethodHandles() { + uint16_t method_handle_buff[4]; + for (std::unique_ptr<dex_ir::MethodHandleItem>& method_handle : + header_->GetCollections().MethodHandleItems()) { + method_handle_buff[0] = static_cast<uint16_t>(method_handle->GetMethodHandleType()); + method_handle_buff[1] = 0; // unused. + method_handle_buff[2] = method_handle->GetFieldOrMethodId()->GetIndex(); + method_handle_buff[3] = 0; // unused. + Write(method_handle_buff, method_handle->GetSize(), method_handle->GetOffset()); + } +} + struct MapItemContainer { MapItemContainer(uint32_t type, uint32_t size, uint32_t offset) : type_(type), size_(size), offset_(offset) { } @@ -528,6 +555,14 @@ void DexWriter::WriteMapItem() { queue.push(MapItemContainer(DexFile::kDexTypeClassDefItem, collection.ClassDefsSize(), collection.ClassDefsOffset())); } + if (collection.CallSiteIdsSize() != 0) { + queue.push(MapItemContainer(DexFile::kDexTypeCallSiteIdItem, collection.CallSiteIdsSize(), + collection.CallSiteIdsOffset())); + } + if (collection.MethodHandleItemsSize() != 0) { + queue.push(MapItemContainer(DexFile::kDexTypeMethodHandleItem, + collection.MethodHandleItemsSize(), collection.MethodHandleItemsOffset())); + } // Data section. queue.push(MapItemContainer(DexFile::kDexTypeMapList, 1, collection.MapListOffset())); @@ -618,10 +653,8 @@ void DexWriter::WriteHeader() { uint32_t class_defs_off = collections.ClassDefsOffset(); buffer[16] = class_defs_size; buffer[17] = class_defs_off; - uint32_t data_off = class_defs_off + class_defs_size * dex_ir::ClassDef::ItemSize(); - uint32_t data_size = file_size - data_off; - buffer[18] = data_size; - buffer[19] = data_off; + buffer[18] = header_->DataSize(); + buffer[19] = header_->DataOffset(); Write(buffer, 20 * sizeof(uint32_t), offset); } @@ -640,6 +673,8 @@ void DexWriter::WriteMemMap() { WriteDebugInfoItems(); WriteCodeItems(); WriteClasses(); + WriteCallSites(); + WriteMethodHandles(); WriteMapItem(); WriteHeader(); } diff --git a/dexlayout/dex_writer.h b/dexlayout/dex_writer.h index fb76e5ccfc..b396adf126 100644 --- a/dexlayout/dex_writer.h +++ b/dexlayout/dex_writer.h @@ -59,6 +59,8 @@ class DexWriter { void WriteDebugInfoItems(); void WriteCodeItems(); void WriteClasses(); + void WriteCallSites(); + void WriteMethodHandles(); void WriteMapItem(); void WriteHeader(); diff --git a/dexlayout/dexdiag.cc b/dexlayout/dexdiag.cc index 688201b6b8..c577b6e105 100644 --- a/dexlayout/dexdiag.cc +++ b/dexlayout/dexdiag.cc @@ -15,6 +15,7 @@ */ #include <errno.h> +#include <inttypes.h> #include <stdint.h> #include <stdlib.h> #include <string.h> @@ -30,7 +31,9 @@ #include "dex_file.h" #include "dex_ir.h" #include "dex_ir_builder.h" +#ifdef ART_TARGET_ANDROID #include "pagemap/pagemap.h" +#endif #include "runtime.h" #include "vdex_file.h" @@ -38,8 +41,6 @@ namespace art { using android::base::StringPrintf; -static constexpr size_t kLineLength = 32; - static bool g_verbose = false; // The width needed to print a file page offset (32-bit). @@ -164,6 +165,7 @@ static void PrintLetterKey() { std::cout << ". (Mapped page not resident)" << std::endl; } +#ifdef ART_TARGET_ANDROID static char PageTypeChar(uint16_t type) { if (kDexSectionInfoMap.find(type) == kDexSectionInfoMap.end()) { return '-'; @@ -194,6 +196,7 @@ static void ProcessPageMap(uint64_t* pagemap, size_t end, const std::vector<dex_ir::DexFileSection>& sections, PageCount* page_counts) { + static constexpr size_t kLineLength = 32; for (size_t page = start; page < end; ++page) { char type_char = '.'; if (PM_PAGEMAP_PRESENT(pagemap[page])) { @@ -268,7 +271,7 @@ static void ProcessOneDexMapping(uint64_t* pagemap, std::cerr << "Dex file start offset for " << dex_file->GetLocation().c_str() << " is incorrect: map start " - << StringPrintf("%zx > dex start %zx\n", map_start, dex_file_start) + << StringPrintf("%" PRIx64 " > dex start %" PRIx64 "\n", map_start, dex_file_start) << std::endl; return; } @@ -277,7 +280,7 @@ static void ProcessOneDexMapping(uint64_t* pagemap, uint64_t end_page = RoundUp(start_address + dex_file_size, kPageSize) / kPageSize; std::cout << "DEX " << dex_file->GetLocation().c_str() - << StringPrintf(": %zx-%zx", + << StringPrintf(": %" PRIx64 "-%" PRIx64, map_start + start_page * kPageSize, map_start + end_page * kPageSize) << std::endl; @@ -293,26 +296,26 @@ static void ProcessOneDexMapping(uint64_t* pagemap, DisplayDexStatistics(start_page, end_page, section_resident_pages, sections, printer); } -static bool DisplayMappingIfFromVdexFile(pm_map_t* map, Printer* printer) { +static bool IsVdexFileMapping(const std::string& mapped_name) { // Confirm that the map is from a vdex file. static const char* suffixes[] = { ".vdex" }; - std::string vdex_name; - bool found = false; - for (size_t j = 0; j < sizeof(suffixes) / sizeof(suffixes[0]); ++j) { - if (strstr(pm_map_name(map), suffixes[j]) != nullptr) { - vdex_name = pm_map_name(map); - found = true; - break; + for (const char* suffix : suffixes) { + size_t match_loc = mapped_name.find(suffix); + if (match_loc != std::string::npos && mapped_name.length() == match_loc + strlen(suffix)) { + return true; } } - if (!found) { - return true; - } + return false; +} + +static bool DisplayMappingIfFromVdexFile(pm_map_t* map, Printer* printer) { + std::string vdex_name = pm_map_name(map); // Extract all the dex files from the vdex file. std::string error_msg; std::unique_ptr<VdexFile> vdex(VdexFile::Open(vdex_name, false /*writeable*/, false /*low_4gb*/, + false /*unquicken */, &error_msg /*out*/)); if (vdex == nullptr) { std::cerr << "Could not open vdex file " @@ -330,6 +333,7 @@ static bool DisplayMappingIfFromVdexFile(pm_map_t* map, Printer* printer) { << ": error " << error_msg << std::endl; + return false; } // Open the page mapping (one uint64_t per page) for the entire vdex mapping. uint64_t* pagemap; @@ -341,7 +345,7 @@ static bool DisplayMappingIfFromVdexFile(pm_map_t* map, Printer* printer) { // Process the dex files. std::cout << "MAPPING " << pm_map_name(map) - << StringPrintf(": %zx-%zx", pm_map_start(map), pm_map_end(map)) + << StringPrintf(": %" PRIx64 "-%" PRIx64, pm_map_start(map), pm_map_end(map)) << std::endl; for (const auto& dex_file : dex_files) { ProcessOneDexMapping(pagemap, @@ -355,6 +359,7 @@ static bool DisplayMappingIfFromVdexFile(pm_map_t* map, Printer* printer) { } static void ProcessOneOatMapping(uint64_t* pagemap, size_t size, Printer* printer) { + static constexpr size_t kLineLength = 32; size_t resident_page_count = 0; for (size_t page = 0; page < size; ++page) { char type_char = '.'; @@ -380,21 +385,19 @@ static void ProcessOneOatMapping(uint64_t* pagemap, size_t size, Printer* printe printer->PrintSkipLine(); } -static bool DisplayMappingIfFromOatFile(pm_map_t* map, Printer* printer) { - // Confirm that the map is from a vdex file. +static bool IsOatFileMapping(const std::string& mapped_name) { + // Confirm that the map is from an oat file. static const char* suffixes[] = { ".odex", ".oat" }; - std::string vdex_name; - bool found = false; - for (size_t j = 0; j < sizeof(suffixes) / sizeof(suffixes[0]); ++j) { - if (strstr(pm_map_name(map), suffixes[j]) != nullptr) { - vdex_name = pm_map_name(map); - found = true; - break; + for (const char* suffix : suffixes) { + size_t match_loc = mapped_name.find(suffix); + if (match_loc != std::string::npos && mapped_name.length() == match_loc + strlen(suffix)) { + return true; } } - if (!found) { - return true; - } + return false; +} + +static bool DisplayMappingIfFromOatFile(pm_map_t* map, Printer* printer) { // Open the page mapping (one uint64_t per page) for the entire vdex mapping. uint64_t* pagemap; size_t len; @@ -405,7 +408,7 @@ static bool DisplayMappingIfFromOatFile(pm_map_t* map, Printer* printer) { // Process the dex files. std::cout << "MAPPING " << pm_map_name(map) - << StringPrintf(": %zx-%zx", pm_map_start(map), pm_map_end(map)) + << StringPrintf(": %" PRIx64 "-%" PRIx64, pm_map_start(map), pm_map_end(map)) << std::endl; ProcessOneOatMapping(pagemap, len, printer); free(pagemap); @@ -425,9 +428,10 @@ static bool FilterByNameContains(const std::string& mapped_file_name, } return false; } +#endif static void Usage(const char* cmd) { - std::cerr << "Usage: " << cmd << " [options] pid" << std::endl + std::cout << "Usage: " << cmd << " [options] pid" << std::endl << " --contains=<string>: Display sections containing string." << std::endl << " --help: Shows this message." << std::endl << " --verbose: Makes displays verbose." << std::endl; @@ -462,6 +466,7 @@ static int DexDiagMain(int argc, char* argv[]) { InitLogging(argv, Runtime::Aborter); MemMap::Init(); +#ifdef ART_TARGET_ANDROID pid_t pid; char* endptr; pid = (pid_t)strtol(argv[argc - 1], &endptr, 10); @@ -495,7 +500,8 @@ static int DexDiagMain(int argc, char* argv[]) { return EXIT_FAILURE; } - // Process the mappings that are due to DEX files. + bool match_found = false; + // Process the mappings that are due to vdex or oat files. Printer printer; for (size_t i = 0; i < num_maps; ++i) { std::string mapped_file_name = pm_map_name(maps[i]); @@ -503,12 +509,23 @@ static int DexDiagMain(int argc, char* argv[]) { if (!FilterByNameContains(mapped_file_name, name_filters)) { continue; } - if (!DisplayMappingIfFromVdexFile(maps[i], &printer)) { - return EXIT_FAILURE; - } else if (!DisplayMappingIfFromOatFile(maps[i], &printer)) { - return EXIT_FAILURE; + if (IsVdexFileMapping(mapped_file_name)) { + if (!DisplayMappingIfFromVdexFile(maps[i], &printer)) { + return EXIT_FAILURE; + } + match_found = true; + } else if (IsOatFileMapping(mapped_file_name)) { + if (!DisplayMappingIfFromOatFile(maps[i], &printer)) { + return EXIT_FAILURE; + } + match_found = true; } } + if (!match_found) { + std::cerr << "No relevant memory maps were found." << std::endl; + return EXIT_FAILURE; + } +#endif return EXIT_SUCCESS; } diff --git a/dexlayout/dexdiag_test.cc b/dexlayout/dexdiag_test.cc new file mode 100644 index 0000000000..a0b3f32756 --- /dev/null +++ b/dexlayout/dexdiag_test.cc @@ -0,0 +1,152 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <string> +#include <vector> + +#include "common_runtime_test.h" + +#include "runtime/exec_utils.h" +#include "runtime/oat_file.h" +#include "runtime/os.h" + +namespace art { + +static const char* kDexDiagContains = "--contains=core.vdex"; +static const char* kDexDiagContainsFails = "--contains=anything_other_than_core.vdex"; +static const char* kDexDiagHelp = "--help"; +static const char* kDexDiagVerbose = "--verbose"; +static const char* kDexDiagBinaryName = "dexdiag"; + +class DexDiagTest : public CommonRuntimeTest { + protected: + virtual void SetUp() { + CommonRuntimeTest::SetUp(); + } + + // Path to the dexdiag(d?)[32|64] binary. + std::string GetDexDiagFilePath() { + std::string root = GetTestAndroidRoot(); + + root += "/bin/"; + root += kDexDiagBinaryName; + + std::string root32 = root + "32"; + // If we have both a 32-bit and a 64-bit build, the 32-bit file will have a 32 suffix. + if (OS::FileExists(root32.c_str()) && !Is64BitInstructionSet(kRuntimeISA)) { + return root32; + } else { + // This is a 64-bit build or only a single build exists. + return root; + } + } + + std::unique_ptr<OatFile> OpenOatAndVdexFiles() { + // Open the core.oat file. + // This is a little convoluted because we have to + // get the location of the default core image (.../framework/core.oat), + // find it in the right architecture subdirectory (.../framework/arm/core.oat), + // Then, opening the oat file has the side-effect of opening the corresponding + // vdex file (.../framework/arm/core.vdex). + const std::string default_location = GetCoreOatLocation(); + EXPECT_TRUE(!default_location.empty()); + std::string oat_location = GetSystemImageFilename(default_location.c_str(), kRuntimeISA); + EXPECT_TRUE(!oat_location.empty()); + std::cout << "==" << oat_location << std::endl; + std::string error_msg; + std::unique_ptr<OatFile> oat(OatFile::Open(oat_location.c_str(), + oat_location.c_str(), + nullptr, + nullptr, + false, + /*low_4gb*/false, + nullptr, + &error_msg)); + EXPECT_TRUE(oat != nullptr) << error_msg; + return oat; + } + + // Run dexdiag with a custom boot image location. + bool Exec(pid_t this_pid, const std::vector<std::string>& args, std::string* error_msg) { + // Invoke 'dexdiag' against the current process. + // This should succeed because we have a runtime and so it should + // be able to map in the boot.art and do a diff for it. + std::vector<std::string> exec_argv; + + // Build the command line "dexdiag <args> this_pid". + std::string executable_path = GetDexDiagFilePath(); + EXPECT_TRUE(OS::FileExists(executable_path.c_str())) << executable_path + << " should be a valid file path"; + exec_argv.push_back(executable_path); + for (const auto& arg : args) { + exec_argv.push_back(arg); + } + exec_argv.push_back(std::to_string(this_pid)); + + return ::art::Exec(exec_argv, error_msg); + } +}; + +// We can't run these tests on the host, as they will fail when trying to open +// /proc/pid/pagemap. +// On the target, we invoke 'dexdiag' against the current process. +// This should succeed because we have a runtime and so dexdiag should +// be able to find the map for, e.g., boot.vdex and friends. +TEST_F(DexDiagTest, DexDiagHelpTest) { + // TODO: test the resulting output. + std::string error_msg; + ASSERT_TRUE(Exec(getpid(), { kDexDiagHelp }, &error_msg)) << "Failed to execute -- because: " + << error_msg; +} + +#if defined (ART_TARGET) +TEST_F(DexDiagTest, DexDiagContainsTest) { +#else +TEST_F(DexDiagTest, DISABLED_DexDiagContainsTest) { +#endif + std::unique_ptr<OatFile> oat = OpenOatAndVdexFiles(); + // TODO: test the resulting output. + std::string error_msg; + ASSERT_TRUE(Exec(getpid(), { kDexDiagContains }, &error_msg)) << "Failed to execute -- because: " + << error_msg; +} + +#if defined (ART_TARGET) +TEST_F(DexDiagTest, DexDiagContainsFailsTest) { +#else +TEST_F(DexDiagTest, DISABLED_DexDiagContainsFailsTest) { +#endif + std::unique_ptr<OatFile> oat = OpenOatAndVdexFiles(); + // TODO: test the resulting output. + std::string error_msg; + ASSERT_FALSE(Exec(getpid(), { kDexDiagContainsFails }, &error_msg)) + << "Failed to execute -- because: " + << error_msg; +} + +#if defined (ART_TARGET) +TEST_F(DexDiagTest, DexDiagVerboseTest) { +#else +TEST_F(DexDiagTest, DISABLED_DexDiagVerboseTest) { +#endif + // TODO: test the resulting output. + std::unique_ptr<OatFile> oat = OpenOatAndVdexFiles(); + std::string error_msg; + ASSERT_TRUE(Exec(getpid(), { kDexDiagVerbose }, &error_msg)) << "Failed to execute -- because: " + << error_msg; +} + +} // namespace art diff --git a/dexlayout/dexlayout_main.cc b/dexlayout/dexlayout_main.cc index 38faf9688b..3c627ea6f0 100644 --- a/dexlayout/dexlayout_main.cc +++ b/dexlayout/dexlayout_main.cc @@ -170,14 +170,14 @@ int DexlayoutDriver(int argc, char** argv) { } // Open profile file. - ProfileCompilationInfo* profile_info = nullptr; + std::unique_ptr<ProfileCompilationInfo> profile_info; if (options.profile_file_name_) { int profile_fd = open(options.profile_file_name_, O_RDONLY); if (profile_fd < 0) { fprintf(stderr, "Can't open %s\n", options.profile_file_name_); return 1; } - profile_info = new ProfileCompilationInfo(); + profile_info.reset(new ProfileCompilationInfo()); if (!profile_info->Load(profile_fd)) { fprintf(stderr, "Can't read profile info from %s\n", options.profile_file_name_); return 1; @@ -185,13 +185,19 @@ int DexlayoutDriver(int argc, char** argv) { } // Create DexLayout instance. - DexLayout dex_layout(options, profile_info, out_file); + DexLayout dex_layout(options, profile_info.get(), out_file); // Process all files supplied on command line. int result = 0; while (optind < argc) { result |= dex_layout.ProcessFile(argv[optind++]); } // while + + if (options.output_file_name_) { + CHECK(out_file != nullptr && out_file != stdout); + fclose(out_file); + } + return result != 0; } diff --git a/dexlayout/dexlayout_test.cc b/dexlayout/dexlayout_test.cc index e988aac86f..877ea923fc 100644 --- a/dexlayout/dexlayout_test.cc +++ b/dexlayout/dexlayout_test.cc @@ -205,6 +205,19 @@ static const char kUnknownTypeDebugInfoInputDex[] = "AAIAAAAEAAAAkAAAAAMAAAACAAAAoAAAAAUAAAADAAAAuAAAAAYAAAABAAAA0AAAAAEgAAACAAAA" "8AAAAAIgAAAIAAAAHAEAAAMgAAACAAAAVAEAAAAgAAABAAAAYwEAAAAQAAABAAAAdAEAAA=="; +// Dex file with multiple class data items pointing to the same code item. +// Constructed by hex editing. +static const char kDuplicateCodeItemInputDex[] = + "ZGV4CjAzNQCwKtVglQOmLWuHwldN5jkBOInC7mTMhJMAAgAAcAAAAHhWNBIAAAAAAAAAAHgBAAAH" + "AAAAcAAAAAMAAACMAAAAAQAAAJgAAAAAAAAAAAAAAAQAAACkAAAAAQAAAMQAAAAcAQAA5AAAACQB" + "AAAsAQAANAEAADkBAABNAQAAUAEAAFMBAAACAAAAAwAAAAQAAAAEAAAAAgAAAAAAAAAAAAAAAAAA" + "AAAAAAAFAAAAAAAAAAYAAAABAAAAAAAAAAAAAAABAAAAAQAAAAAAAAABAAAAAAAAAGUBAAAAAAAA" + "AQABAAEAAABWAQAABAAAAHAQAwAAAA4AAQABAAAAAABbAQAAAQAAAA4AAAABAAEAAAAAAGABAAAB" + "AAAADgAAAAY8aW5pdD4ABkEuamF2YQADTEE7ABJMamF2YS9sYW5nL09iamVjdDsAAVYAAWEAAWIA" + "AQAHDgADAAcOAAUABw4AAAABAgCBgATkAQEA/AEBAPwBAAsAAAAAAAAAAQAAAAAAAAABAAAABwAA" + "AHAAAAACAAAAAwAAAIwAAAADAAAAAQAAAJgAAAAFAAAABAAAAKQAAAAGAAAAAQAAAMQAAAABIAAA" + "AwAAAOQAAAACIAAABwAAACQBAAADIAAAAwAAAFYBAAAAIAAAAQAAAGUBAAAAEAAAAQAAAHgBAAA="; + static void WriteBase64ToFile(const char* base64, File* file) { // Decode base64. CHECK(base64 != nullptr); @@ -519,4 +532,17 @@ TEST_F(DexLayoutTest, UnknownTypeDebugInfo) { dexlayout_exec_argv)); } +TEST_F(DexLayoutTest, DuplicateCodeItem) { + ScratchFile temp_dex; + std::string dexlayout = GetTestAndroidRoot() + "/bin/dexlayout"; + EXPECT_TRUE(OS::FileExists(dexlayout.c_str())) << dexlayout << " should be a valid file path"; + std::vector<std::string> dexlayout_exec_argv = + { dexlayout, "-o", "/dev/null", temp_dex.GetFilename() }; + ASSERT_TRUE(DexLayoutExec(&temp_dex, + kDuplicateCodeItemInputDex, + nullptr /* profile_file */, + nullptr /* profile_filename */, + dexlayout_exec_argv)); +} + } // namespace art diff --git a/dexoptanalyzer/dexoptanalyzer.cc b/dexoptanalyzer/dexoptanalyzer.cc index 965e4073ea..9a2eb7f8dd 100644 --- a/dexoptanalyzer/dexoptanalyzer.cc +++ b/dexoptanalyzer/dexoptanalyzer.cc @@ -216,6 +216,8 @@ class DexoptAnalyzer FINAL { if (!CreateRuntime()) { return kErrorCannotCreateRuntime; } + std::unique_ptr<Runtime> runtime(Runtime::Current()); + OatFileAssistant oat_file_assistant(dex_file_.c_str(), isa_, /*load_executable*/ false); // Always treat elements of the bootclasspath as up-to-date. // TODO(calin): this check should be in OatFileAssistant. diff --git a/dexoptanalyzer/dexoptanalyzer_test.cc b/dexoptanalyzer/dexoptanalyzer_test.cc index 57d3f1f68b..1703ff4cbc 100644 --- a/dexoptanalyzer/dexoptanalyzer_test.cc +++ b/dexoptanalyzer/dexoptanalyzer_test.cc @@ -89,8 +89,8 @@ TEST_F(DexoptAnalyzerTest, DexNoOat) { Copy(GetDexSrc1(), dex_location); Verify(dex_location, CompilerFilter::kSpeed); - Verify(dex_location, CompilerFilter::kVerifyAtRuntime); - Verify(dex_location, CompilerFilter::kInterpretOnly); + Verify(dex_location, CompilerFilter::kExtract); + Verify(dex_location, CompilerFilter::kQuicken); Verify(dex_location, CompilerFilter::kSpeedProfile); } @@ -101,8 +101,8 @@ TEST_F(DexoptAnalyzerTest, OatUpToDate) { GenerateOatForTest(dex_location.c_str(), CompilerFilter::kSpeed); Verify(dex_location, CompilerFilter::kSpeed); - Verify(dex_location, CompilerFilter::kInterpretOnly); - Verify(dex_location, CompilerFilter::kVerifyAtRuntime); + Verify(dex_location, CompilerFilter::kQuicken); + Verify(dex_location, CompilerFilter::kExtract); Verify(dex_location, CompilerFilter::kEverything); } @@ -113,9 +113,9 @@ TEST_F(DexoptAnalyzerTest, ProfileOatUpToDate) { GenerateOatForTest(dex_location.c_str(), CompilerFilter::kSpeedProfile); Verify(dex_location, CompilerFilter::kSpeedProfile, false); - Verify(dex_location, CompilerFilter::kInterpretOnly, false); + Verify(dex_location, CompilerFilter::kQuicken, false); Verify(dex_location, CompilerFilter::kSpeedProfile, true); - Verify(dex_location, CompilerFilter::kInterpretOnly, true); + Verify(dex_location, CompilerFilter::kQuicken, true); } // Case: We have a MultiDEX file and up-to-date OAT file for it. @@ -154,7 +154,7 @@ TEST_F(DexoptAnalyzerTest, OatDexOutOfDate) { GenerateOatForTest(dex_location.c_str(), CompilerFilter::kSpeed); Copy(GetDexSrc2(), dex_location); - Verify(dex_location, CompilerFilter::kVerifyAtRuntime); + Verify(dex_location, CompilerFilter::kExtract); Verify(dex_location, CompilerFilter::kSpeed); } @@ -170,8 +170,8 @@ TEST_F(DexoptAnalyzerTest, OatImageOutOfDate) { /*pic*/false, /*with_alternate_image*/true); - Verify(dex_location, CompilerFilter::kVerifyAtRuntime); - Verify(dex_location, CompilerFilter::kInterpretOnly); + Verify(dex_location, CompilerFilter::kExtract); + Verify(dex_location, CompilerFilter::kQuicken); Verify(dex_location, CompilerFilter::kSpeed); } @@ -184,13 +184,13 @@ TEST_F(DexoptAnalyzerTest, OatVerifyAtRuntimeImageOutOfDate) { Copy(GetDexSrc1(), dex_location); GenerateOatForTest(dex_location.c_str(), - CompilerFilter::kVerifyAtRuntime, + CompilerFilter::kExtract, /*relocate*/true, /*pic*/false, /*with_alternate_image*/true); - Verify(dex_location, CompilerFilter::kVerifyAtRuntime); - Verify(dex_location, CompilerFilter::kInterpretOnly); + Verify(dex_location, CompilerFilter::kExtract); + Verify(dex_location, CompilerFilter::kQuicken); } // Case: We have a DEX file and an ODEX file, but no OAT file. @@ -201,7 +201,7 @@ TEST_F(DexoptAnalyzerTest, DexOdexNoOat) { Copy(GetDexSrc1(), dex_location); GenerateOdexForTest(dex_location, odex_location, CompilerFilter::kSpeed); - Verify(dex_location, CompilerFilter::kVerifyAtRuntime); + Verify(dex_location, CompilerFilter::kExtract); Verify(dex_location, CompilerFilter::kSpeed); } @@ -235,7 +235,7 @@ TEST_F(DexoptAnalyzerTest, StrippedDexOdexOat) { // Strip the dex file. Copy(GetStrippedDexSrc1(), dex_location); - Verify(dex_location, CompilerFilter::kVerifyAtRuntime); + Verify(dex_location, CompilerFilter::kExtract); Verify(dex_location, CompilerFilter::kSpeed); Verify(dex_location, CompilerFilter::kEverything); } @@ -248,8 +248,8 @@ TEST_F(DexoptAnalyzerTest, ResourceOnlyDex) { Copy(GetStrippedDexSrc1(), dex_location); Verify(dex_location, CompilerFilter::kSpeed); - Verify(dex_location, CompilerFilter::kVerifyAtRuntime); - Verify(dex_location, CompilerFilter::kInterpretOnly); + Verify(dex_location, CompilerFilter::kExtract); + Verify(dex_location, CompilerFilter::kQuicken); } // Case: We have a DEX file, an ODEX file and an OAT file, where the ODEX and @@ -287,9 +287,9 @@ TEST_F(DexoptAnalyzerTest, DexVerifyAtRuntimeOdexNoOat) { std::string odex_location = GetOdexDir() + "/DexVerifyAtRuntimeOdexNoOat.odex"; Copy(GetDexSrc1(), dex_location); - GenerateOdexForTest(dex_location, odex_location, CompilerFilter::kVerifyAtRuntime); + GenerateOdexForTest(dex_location, odex_location, CompilerFilter::kExtract); - Verify(dex_location, CompilerFilter::kVerifyAtRuntime); + Verify(dex_location, CompilerFilter::kExtract); Verify(dex_location, CompilerFilter::kSpeed); } diff --git a/disassembler/disassembler_mips.cc b/disassembler/disassembler_mips.cc index eb57d339af..3c60bf4be5 100644 --- a/disassembler/disassembler_mips.cc +++ b/disassembler/disassembler_mips.cc @@ -433,6 +433,11 @@ static const MipsInstruction gMipsInstructions[] = { { kMsaMask | (0x7 << 23), kMsa | (0x5 << 23) | 0x12, "div_u", "Vkmn" }, { kMsaMask | (0x7 << 23), kMsa | (0x6 << 23) | 0x12, "mod_s", "Vkmn" }, { kMsaMask | (0x7 << 23), kMsa | (0x7 << 23) | 0x12, "mod_u", "Vkmn" }, + { kMsaMask | (0x7 << 23), kMsa | (0x0 << 23) | 0x10, "add_a", "Vkmn" }, + { kMsaMask | (0x7 << 23), kMsa | (0x4 << 23) | 0x10, "ave_s", "Vkmn" }, + { kMsaMask | (0x7 << 23), kMsa | (0x5 << 23) | 0x10, "ave_u", "Vkmn" }, + { kMsaMask | (0x7 << 23), kMsa | (0x6 << 23) | 0x10, "aver_s", "Vkmn" }, + { kMsaMask | (0x7 << 23), kMsa | (0x7 << 23) | 0x10, "aver_u", "Vkmn" }, { kMsaMask | (0xf << 22), kMsa | (0x0 << 22) | 0x1b, "fadd", "Ukmn" }, { kMsaMask | (0xf << 22), kMsa | (0x1 << 22) | 0x1b, "fsub", "Ukmn" }, { kMsaMask | (0xf << 22), kMsa | (0x2 << 22) | 0x1b, "fmul", "Ukmn" }, diff --git a/oatdump/Android.bp b/oatdump/Android.bp index f1fcf3dad8..1cd97c2b53 100644 --- a/oatdump/Android.bp +++ b/oatdump/Android.bp @@ -114,5 +114,8 @@ art_cc_test { defaults: [ "art_gtest_defaults", ], - srcs: ["oatdump_test.cc"], + srcs: [ + "oatdump_test.cc", + "oatdump_image_test.cc", + ], } diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc index 878d0f2cfe..f07e0f9941 100644 --- a/oatdump/oatdump.cc +++ b/oatdump/oatdump.cc @@ -125,9 +125,12 @@ class OatSymbolizer FINAL { std::unique_ptr<const InstructionSetFeatures> features = InstructionSetFeatures::FromBitmap( isa, oat_file_->GetOatHeader().GetInstructionSetFeaturesBitmap()); - File* elf_file = OS::CreateEmptyFile(output_name_.c_str()); + std::unique_ptr<File> elf_file(OS::CreateEmptyFile(output_name_.c_str())); + if (elf_file == nullptr) { + return false; + } std::unique_ptr<BufferedOutputStream> output_stream( - MakeUnique<BufferedOutputStream>(MakeUnique<FileOutputStream>(elf_file))); + MakeUnique<BufferedOutputStream>(MakeUnique<FileOutputStream>(elf_file.get()))); builder_.reset(new ElfBuilder<ElfTypes>(isa, features.get(), output_stream.get())); builder_->Start(); @@ -182,7 +185,17 @@ class OatSymbolizer FINAL { builder_->End(); - return builder_->Good(); + bool ret_value = builder_->Good(); + + builder_.reset(); + output_stream.reset(); + + if (elf_file->FlushCloseOrErase() != 0) { + return false; + } + elf_file.reset(); + + return ret_value; } void Walk() { @@ -2842,14 +2855,14 @@ static int DumpOat(Runtime* runtime, const char* oat_filename, OatDumperOptions* static int SymbolizeOat(const char* oat_filename, std::string& output_name, bool no_bits) { std::string error_msg; - OatFile* oat_file = OatFile::Open(oat_filename, - oat_filename, - nullptr, - nullptr, - false, - /*low_4gb*/false, - nullptr, - &error_msg); + std::unique_ptr<OatFile> oat_file(OatFile::Open(oat_filename, + oat_filename, + nullptr, + nullptr, + false, + /*low_4gb*/false, + nullptr, + &error_msg)); if (oat_file == nullptr) { fprintf(stderr, "Failed to open oat file from '%s': %s\n", oat_filename, error_msg.c_str()); return EXIT_FAILURE; @@ -2859,10 +2872,10 @@ static int SymbolizeOat(const char* oat_filename, std::string& output_name, bool // Try to produce an ELF file of the same type. This is finicky, as we have used 32-bit ELF // files for 64-bit code in the past. if (Is64BitInstructionSet(oat_file->GetOatHeader().GetInstructionSet())) { - OatSymbolizer<ElfTypes64> oat_symbolizer(oat_file, output_name, no_bits); + OatSymbolizer<ElfTypes64> oat_symbolizer(oat_file.get(), output_name, no_bits); result = oat_symbolizer.Symbolize(); } else { - OatSymbolizer<ElfTypes32> oat_symbolizer(oat_file, output_name, no_bits); + OatSymbolizer<ElfTypes32> oat_symbolizer(oat_file.get(), output_name, no_bits); result = oat_symbolizer.Symbolize(); } if (!result) { diff --git a/oatdump/oatdump_image_test.cc b/oatdump/oatdump_image_test.cc new file mode 100644 index 0000000000..e9cc922d9b --- /dev/null +++ b/oatdump/oatdump_image_test.cc @@ -0,0 +1,43 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "oatdump_test.h" + +namespace art { + +// Disable tests on arm and mips as they are taking too long to run. b/27824283. +#if !defined(__arm__) && !defined(__mips__) +TEST_F(OatDumpTest, TestImage) { + std::string error_msg; + ASSERT_TRUE(Exec(kDynamic, kModeArt, {}, kListAndCode, &error_msg)) << error_msg; +} +TEST_F(OatDumpTest, TestImageStatic) { + TEST_DISABLED_FOR_NON_STATIC_HOST_BUILDS(); + std::string error_msg; + ASSERT_TRUE(Exec(kStatic, kModeArt, {}, kListAndCode, &error_msg)) << error_msg; +} + +TEST_F(OatDumpTest, TestOatImage) { + std::string error_msg; + ASSERT_TRUE(Exec(kDynamic, kModeOat, {}, kListAndCode, &error_msg)) << error_msg; +} +TEST_F(OatDumpTest, TestOatImageStatic) { + TEST_DISABLED_FOR_NON_STATIC_HOST_BUILDS(); + std::string error_msg; + ASSERT_TRUE(Exec(kStatic, kModeOat, {}, kListAndCode, &error_msg)) << error_msg; +} +#endif +} // namespace art diff --git a/oatdump/oatdump_test.cc b/oatdump/oatdump_test.cc index c7c3ddd7cc..7260d7477b 100644 --- a/oatdump/oatdump_test.cc +++ b/oatdump/oatdump_test.cc @@ -14,235 +14,12 @@ * limitations under the License. */ -#include <sstream> -#include <string> -#include <vector> - -#include "android-base/strings.h" - -#include "common_runtime_test.h" - -#include "base/unix_file/fd_file.h" -#include "runtime/arch/instruction_set.h" -#include "runtime/exec_utils.h" -#include "runtime/gc/heap.h" -#include "runtime/gc/space/image_space.h" -#include "runtime/os.h" -#include "runtime/utils.h" -#include "utils.h" - -#include <sys/types.h> -#include <unistd.h> +#include "oatdump_test.h" namespace art { -class OatDumpTest : public CommonRuntimeTest { - protected: - virtual void SetUp() { - CommonRuntimeTest::SetUp(); - core_art_location_ = GetCoreArtLocation(); - core_oat_location_ = GetSystemImageFilename(GetCoreOatLocation().c_str(), kRuntimeISA); - } - - // Linking flavor. - enum Flavor { - kDynamic, // oatdump(d) - kStatic, // oatdump(d)s - }; - - // Returns path to the oatdump binary. - std::string GetOatDumpFilePath(Flavor flavor) { - std::string root = GetTestAndroidRoot(); - root += "/bin/oatdump"; - if (kIsDebugBuild) { - root += "d"; - } - if (flavor == kStatic) { - root += "s"; - } - return root; - } - - enum Mode { - kModeOat, - kModeArt, - kModeSymbolize, - }; - - // Display style. - enum Display { - kListOnly, - kListAndCode - }; - - // Run the test with custom arguments. - bool Exec(Flavor flavor, - Mode mode, - const std::vector<std::string>& args, - Display display, - std::string* error_msg) { - std::string file_path = GetOatDumpFilePath(flavor); - - EXPECT_TRUE(OS::FileExists(file_path.c_str())) << file_path << " should be a valid file path"; - - // ScratchFile scratch; - std::vector<std::string> exec_argv = { file_path }; - std::vector<std::string> expected_prefixes; - if (mode == kModeSymbolize) { - exec_argv.push_back("--symbolize=" + core_oat_location_); - exec_argv.push_back("--output=" + core_oat_location_ + ".symbolize"); - } else { - expected_prefixes.push_back("Dex file data for"); - expected_prefixes.push_back("Num string ids:"); - expected_prefixes.push_back("Num field ids:"); - expected_prefixes.push_back("Num method ids:"); - expected_prefixes.push_back("LOCATION:"); - expected_prefixes.push_back("MAGIC:"); - expected_prefixes.push_back("DEX FILE COUNT:"); - if (display == kListAndCode) { - // Code and dex code do not show up if list only. - expected_prefixes.push_back("DEX CODE:"); - expected_prefixes.push_back("CODE:"); - expected_prefixes.push_back("CodeInfoEncoding"); - expected_prefixes.push_back("CodeInfoInlineInfo"); - } - if (mode == kModeArt) { - exec_argv.push_back("--image=" + core_art_location_); - exec_argv.push_back("--instruction-set=" + std::string( - GetInstructionSetString(kRuntimeISA))); - expected_prefixes.push_back("IMAGE LOCATION:"); - expected_prefixes.push_back("IMAGE BEGIN:"); - expected_prefixes.push_back("kDexCaches:"); - } else { - CHECK_EQ(static_cast<size_t>(mode), static_cast<size_t>(kModeOat)); - exec_argv.push_back("--oat-file=" + core_oat_location_); - } - } - exec_argv.insert(exec_argv.end(), args.begin(), args.end()); - - bool result = true; - // We must set --android-root. - int link[2]; - if (pipe(link) == -1) { - *error_msg = strerror(errno); - return false; - } - - const pid_t pid = fork(); - if (pid == -1) { - *error_msg = strerror(errno); - return false; - } - - if (pid == 0) { - dup2(link[1], STDOUT_FILENO); - close(link[0]); - close(link[1]); - // change process groups, so we don't get reaped by ProcessManager - setpgid(0, 0); - // Use execv here rather than art::Exec to avoid blocking on waitpid here. - std::vector<char*> argv; - for (size_t i = 0; i < exec_argv.size(); ++i) { - argv.push_back(const_cast<char*>(exec_argv[i].c_str())); - } - argv.push_back(nullptr); - UNUSED(execv(argv[0], &argv[0])); - const std::string command_line(android::base::Join(exec_argv, ' ')); - PLOG(ERROR) << "Failed to execv(" << command_line << ")"; - // _exit to avoid atexit handlers in child. - _exit(1); - } else { - close(link[1]); - static const size_t kLineMax = 256; - char line[kLineMax] = {}; - size_t line_len = 0; - size_t total = 0; - std::vector<bool> found(expected_prefixes.size(), false); - while (true) { - while (true) { - size_t spaces = 0; - // Trim spaces at the start of the line. - for (; spaces < line_len && isspace(line[spaces]); ++spaces) {} - if (spaces > 0) { - line_len -= spaces; - memmove(&line[0], &line[spaces], line_len); - } - ssize_t bytes_read = - TEMP_FAILURE_RETRY(read(link[0], &line[line_len], kLineMax - line_len)); - if (bytes_read <= 0) { - break; - } - line_len += bytes_read; - total += bytes_read; - } - if (line_len == 0) { - break; - } - // Check contents. - for (size_t i = 0; i < expected_prefixes.size(); ++i) { - const std::string& expected = expected_prefixes[i]; - if (!found[i] && - line_len >= expected.length() && - memcmp(line, expected.c_str(), expected.length()) == 0) { - found[i] = true; - } - } - // Skip to next line. - size_t next_line = 0; - for (; next_line + 1 < line_len && line[next_line] != '\n'; ++next_line) {} - line_len -= next_line + 1; - memmove(&line[0], &line[next_line + 1], line_len); - } - if (mode == kModeSymbolize) { - EXPECT_EQ(total, 0u); - } else { - EXPECT_GT(total, 0u); - } - LOG(INFO) << "Processed bytes " << total; - close(link[0]); - int status = 0; - if (waitpid(pid, &status, 0) != -1) { - result = (status == 0); - } - - for (size_t i = 0; i < expected_prefixes.size(); ++i) { - if (!found[i]) { - LOG(ERROR) << "Did not find prefix " << expected_prefixes[i]; - result = false; - } - } - } - - return result; - } - - private: - std::string core_art_location_; - std::string core_oat_location_; -}; - // Disable tests on arm and mips as they are taking too long to run. b/27824283. #if !defined(__arm__) && !defined(__mips__) -TEST_F(OatDumpTest, TestImage) { - std::string error_msg; - ASSERT_TRUE(Exec(kDynamic, kModeArt, {}, kListAndCode, &error_msg)) << error_msg; -} -TEST_F(OatDumpTest, TestImageStatic) { - TEST_DISABLED_FOR_NON_STATIC_HOST_BUILDS(); - std::string error_msg; - ASSERT_TRUE(Exec(kStatic, kModeArt, {}, kListAndCode, &error_msg)) << error_msg; -} - -TEST_F(OatDumpTest, TestOatImage) { - std::string error_msg; - ASSERT_TRUE(Exec(kDynamic, kModeOat, {}, kListAndCode, &error_msg)) << error_msg; -} -TEST_F(OatDumpTest, TestOatImageStatic) { - TEST_DISABLED_FOR_NON_STATIC_HOST_BUILDS(); - std::string error_msg; - ASSERT_TRUE(Exec(kStatic, kModeOat, {}, kListAndCode, &error_msg)) << error_msg; -} - TEST_F(OatDumpTest, TestNoDumpVmap) { std::string error_msg; ASSERT_TRUE(Exec(kDynamic, kModeArt, {"--no-dump:vmap"}, kListAndCode, &error_msg)) << error_msg; diff --git a/oatdump/oatdump_test.h b/oatdump/oatdump_test.h new file mode 100644 index 0000000000..48e9eb5964 --- /dev/null +++ b/oatdump/oatdump_test.h @@ -0,0 +1,229 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_OATDUMP_OATDUMP_TEST_H_ +#define ART_OATDUMP_OATDUMP_TEST_H_ + +#include <sstream> +#include <string> +#include <vector> + +#include "android-base/strings.h" + +#include "common_runtime_test.h" + +#include "base/unix_file/fd_file.h" +#include "runtime/arch/instruction_set.h" +#include "runtime/exec_utils.h" +#include "runtime/gc/heap.h" +#include "runtime/gc/space/image_space.h" +#include "runtime/os.h" +#include "runtime/utils.h" +#include "utils.h" + +#include <sys/types.h> +#include <unistd.h> + +namespace art { + +class OatDumpTest : public CommonRuntimeTest { + protected: + virtual void SetUp() { + CommonRuntimeTest::SetUp(); + core_art_location_ = GetCoreArtLocation(); + core_oat_location_ = GetSystemImageFilename(GetCoreOatLocation().c_str(), kRuntimeISA); + } + + // Linking flavor. + enum Flavor { + kDynamic, // oatdump(d) + kStatic, // oatdump(d)s + }; + + // Returns path to the oatdump binary. + std::string GetOatDumpFilePath(Flavor flavor) { + std::string root = GetTestAndroidRoot(); + root += "/bin/oatdump"; + if (kIsDebugBuild) { + root += "d"; + } + if (flavor == kStatic) { + root += "s"; + } + return root; + } + + enum Mode { + kModeOat, + kModeArt, + kModeSymbolize, + }; + + // Display style. + enum Display { + kListOnly, + kListAndCode + }; + + // Run the test with custom arguments. + bool Exec(Flavor flavor, + Mode mode, + const std::vector<std::string>& args, + Display display, + std::string* error_msg) { + std::string file_path = GetOatDumpFilePath(flavor); + + EXPECT_TRUE(OS::FileExists(file_path.c_str())) << file_path << " should be a valid file path"; + + // ScratchFile scratch; + std::vector<std::string> exec_argv = { file_path }; + std::vector<std::string> expected_prefixes; + if (mode == kModeSymbolize) { + exec_argv.push_back("--symbolize=" + core_oat_location_); + exec_argv.push_back("--output=" + core_oat_location_ + ".symbolize"); + } else { + expected_prefixes.push_back("Dex file data for"); + expected_prefixes.push_back("Num string ids:"); + expected_prefixes.push_back("Num field ids:"); + expected_prefixes.push_back("Num method ids:"); + expected_prefixes.push_back("LOCATION:"); + expected_prefixes.push_back("MAGIC:"); + expected_prefixes.push_back("DEX FILE COUNT:"); + if (display == kListAndCode) { + // Code and dex code do not show up if list only. + expected_prefixes.push_back("DEX CODE:"); + expected_prefixes.push_back("CODE:"); + expected_prefixes.push_back("CodeInfoEncoding"); + expected_prefixes.push_back("CodeInfoInlineInfo"); + } + if (mode == kModeArt) { + exec_argv.push_back("--image=" + core_art_location_); + exec_argv.push_back("--instruction-set=" + std::string( + GetInstructionSetString(kRuntimeISA))); + expected_prefixes.push_back("IMAGE LOCATION:"); + expected_prefixes.push_back("IMAGE BEGIN:"); + expected_prefixes.push_back("kDexCaches:"); + } else { + CHECK_EQ(static_cast<size_t>(mode), static_cast<size_t>(kModeOat)); + exec_argv.push_back("--oat-file=" + core_oat_location_); + } + } + exec_argv.insert(exec_argv.end(), args.begin(), args.end()); + + bool result = true; + // We must set --android-root. + int link[2]; + if (pipe(link) == -1) { + *error_msg = strerror(errno); + return false; + } + + const pid_t pid = fork(); + if (pid == -1) { + *error_msg = strerror(errno); + return false; + } + + if (pid == 0) { + dup2(link[1], STDOUT_FILENO); + close(link[0]); + close(link[1]); + // change process groups, so we don't get reaped by ProcessManager + setpgid(0, 0); + // Use execv here rather than art::Exec to avoid blocking on waitpid here. + std::vector<char*> argv; + for (size_t i = 0; i < exec_argv.size(); ++i) { + argv.push_back(const_cast<char*>(exec_argv[i].c_str())); + } + argv.push_back(nullptr); + UNUSED(execv(argv[0], &argv[0])); + const std::string command_line(android::base::Join(exec_argv, ' ')); + PLOG(ERROR) << "Failed to execv(" << command_line << ")"; + // _exit to avoid atexit handlers in child. + _exit(1); + } else { + close(link[1]); + static const size_t kLineMax = 256; + char line[kLineMax] = {}; + size_t line_len = 0; + size_t total = 0; + std::vector<bool> found(expected_prefixes.size(), false); + while (true) { + while (true) { + size_t spaces = 0; + // Trim spaces at the start of the line. + for (; spaces < line_len && isspace(line[spaces]); ++spaces) {} + if (spaces > 0) { + line_len -= spaces; + memmove(&line[0], &line[spaces], line_len); + } + ssize_t bytes_read = + TEMP_FAILURE_RETRY(read(link[0], &line[line_len], kLineMax - line_len)); + if (bytes_read <= 0) { + break; + } + line_len += bytes_read; + total += bytes_read; + } + if (line_len == 0) { + break; + } + // Check contents. + for (size_t i = 0; i < expected_prefixes.size(); ++i) { + const std::string& expected = expected_prefixes[i]; + if (!found[i] && + line_len >= expected.length() && + memcmp(line, expected.c_str(), expected.length()) == 0) { + found[i] = true; + } + } + // Skip to next line. + size_t next_line = 0; + for (; next_line + 1 < line_len && line[next_line] != '\n'; ++next_line) {} + line_len -= next_line + 1; + memmove(&line[0], &line[next_line + 1], line_len); + } + if (mode == kModeSymbolize) { + EXPECT_EQ(total, 0u); + } else { + EXPECT_GT(total, 0u); + } + LOG(INFO) << "Processed bytes " << total; + close(link[0]); + int status = 0; + if (waitpid(pid, &status, 0) != -1) { + result = (status == 0); + } + + for (size_t i = 0; i < expected_prefixes.size(); ++i) { + if (!found[i]) { + LOG(ERROR) << "Did not find prefix " << expected_prefixes[i]; + result = false; + } + } + } + + return result; + } + + private: + std::string core_art_location_; + std::string core_oat_location_; +}; + +} // namespace art + +#endif // ART_OATDUMP_OATDUMP_TEST_H_ diff --git a/patchoat/patchoat.cc b/patchoat/patchoat.cc index fbb0978d53..e750ede8fa 100644 --- a/patchoat/patchoat.cc +++ b/patchoat/patchoat.cc @@ -30,6 +30,7 @@ #include "art_field-inl.h" #include "art_method-inl.h" #include "base/dumpable.h" +#include "base/memory_tool.h" #include "base/scoped_flock.h" #include "base/stringpiece.h" #include "base/unix_file/fd_file.h" @@ -142,6 +143,8 @@ bool PatchOat::Patch(const std::string& image_location, LOG(ERROR) << "Unable to initialize runtime"; return false; } + std::unique_ptr<Runtime> runtime(Runtime::Current()); + // Runtime::Create acquired the mutator_lock_ that is normally given away when we Runtime::Start, // give it away now and then switch to a more manageable ScopedObjectAccess. Thread::Current()->TransitionFromRunnableToSuspended(kNative); @@ -286,6 +289,13 @@ bool PatchOat::Patch(const std::string& image_location, return false; } } + + if (!kIsDebugBuild && !(RUNNING_ON_MEMORY_TOOL && kMemoryToolDetectsLeaks)) { + // We want to just exit on non-debug builds, not bringing the runtime down + // in an orderly fashion. So release the following fields. + runtime.release(); + } + return true; } diff --git a/runtime/Android.bp b/runtime/Android.bp index cff2cbccdb..186996894e 100644 --- a/runtime/Android.bp +++ b/runtime/Android.bp @@ -57,6 +57,7 @@ cc_defaults { "dex_file_annotations.cc", "dex_file_verifier.cc", "dex_instruction.cc", + "dex_to_dex_decompiler.cc", "elf_file.cc", "exec_utils.cc", "fault_handler.cc", diff --git a/runtime/arch/arch_test.cc b/runtime/arch/arch_test.cc index a857976021..1a5e39f0f7 100644 --- a/runtime/arch/arch_test.cc +++ b/runtime/arch/arch_test.cc @@ -71,6 +71,11 @@ static constexpr size_t kFrameSizeSaveRefsAndArgs = FRAME_SIZE_SAVE_REFS_AND_ARG #undef FRAME_SIZE_SAVE_REFS_AND_ARGS static constexpr size_t kFrameSizeSaveEverything = FRAME_SIZE_SAVE_EVERYTHING; #undef FRAME_SIZE_SAVE_EVERYTHING +#undef BAKER_MARK_INTROSPECTION_ARRAY_SWITCH_OFFSET +#undef BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRYPOINT_OFFSET +#undef BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET +#undef BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET +#undef BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET } // namespace arm namespace arm64 { @@ -83,6 +88,11 @@ static constexpr size_t kFrameSizeSaveRefsAndArgs = FRAME_SIZE_SAVE_REFS_AND_ARG #undef FRAME_SIZE_SAVE_REFS_AND_ARGS static constexpr size_t kFrameSizeSaveEverything = FRAME_SIZE_SAVE_EVERYTHING; #undef FRAME_SIZE_SAVE_EVERYTHING +#undef BAKER_MARK_INTROSPECTION_ARRAY_SWITCH_OFFSET +#undef BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRYPOINT_OFFSET +#undef BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET +#undef BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET +#undef BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET } // namespace arm64 namespace mips { diff --git a/runtime/arch/arm/asm_support_arm.h b/runtime/arch/arm/asm_support_arm.h index c03bcae526..f1f1766ad4 100644 --- a/runtime/arch/arm/asm_support_arm.h +++ b/runtime/arch/arm/asm_support_arm.h @@ -24,6 +24,28 @@ #define FRAME_SIZE_SAVE_REFS_AND_ARGS 112 #define FRAME_SIZE_SAVE_EVERYTHING 192 +// The offset from art_quick_read_barrier_mark_introspection to the array switch cases, +// i.e. art_quick_read_barrier_mark_introspection_arrays. +#define BAKER_MARK_INTROSPECTION_ARRAY_SWITCH_OFFSET 0x100 +// The offset from art_quick_read_barrier_mark_introspection to the GC root entrypoint, +// i.e. art_quick_read_barrier_mark_introspection_gc_roots. +#define BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRYPOINT_OFFSET 0xc0 + +// The offset of the reference load LDR from the return address in LR for field loads. +#ifdef USE_HEAP_POISONING +#define BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET -8 +#else +#define BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET -4 +#endif +// The offset of the reference load LDR from the return address in LR for array loads. +#ifdef USE_HEAP_POISONING +#define BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET -8 +#else +#define BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET -4 +#endif +// The offset of the reference load LDR from the return address in LR for GC root loads. +#define BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET -8 + // Flag for enabling R4 optimization in arm runtime // #define ARM_R4_SUSPEND_FLAG diff --git a/runtime/arch/arm/entrypoints_init_arm.cc b/runtime/arch/arm/entrypoints_init_arm.cc index de72d3a18f..6b7247773a 100644 --- a/runtime/arch/arm/entrypoints_init_arm.cc +++ b/runtime/arch/arm/entrypoints_init_arm.cc @@ -17,6 +17,7 @@ #include <math.h> #include <string.h> +#include "arch/arm/asm_support_arm.h" #include "entrypoints/jni/jni_entrypoints.h" #include "entrypoints/quick/quick_alloc_entrypoints.h" #include "entrypoints/quick/quick_default_externs.h" @@ -51,6 +52,10 @@ extern "C" mirror::Object* art_quick_read_barrier_mark_reg10(mirror::Object*); extern "C" mirror::Object* art_quick_read_barrier_mark_reg11(mirror::Object*); extern "C" mirror::Object* art_quick_read_barrier_mark_reg12(mirror::Object*); +extern "C" mirror::Object* art_quick_read_barrier_mark_introspection(mirror::Object*); +extern "C" mirror::Object* art_quick_read_barrier_mark_introspection_arrays(mirror::Object*); +extern "C" mirror::Object* art_quick_read_barrier_mark_introspection_gc_roots(mirror::Object*); + // Used by soft float. // Single-precision FP arithmetics. extern "C" float fmodf(float a, float b); // REM_FLOAT[_2ADDR] @@ -67,19 +72,35 @@ extern "C" int __aeabi_idivmod(int32_t, int32_t); // [DIV|REM]_INT[_2ADDR|_LIT8 // Long long arithmetics - REM_LONG[_2ADDR] and DIV_LONG[_2ADDR] extern "C" int64_t __aeabi_ldivmod(int64_t, int64_t); -void UpdateReadBarrierEntrypoints(QuickEntryPoints* qpoints, bool is_marking) { - qpoints->pReadBarrierMarkReg00 = is_marking ? art_quick_read_barrier_mark_reg00 : nullptr; - qpoints->pReadBarrierMarkReg01 = is_marking ? art_quick_read_barrier_mark_reg01 : nullptr; - qpoints->pReadBarrierMarkReg02 = is_marking ? art_quick_read_barrier_mark_reg02 : nullptr; - qpoints->pReadBarrierMarkReg03 = is_marking ? art_quick_read_barrier_mark_reg03 : nullptr; - qpoints->pReadBarrierMarkReg04 = is_marking ? art_quick_read_barrier_mark_reg04 : nullptr; - qpoints->pReadBarrierMarkReg05 = is_marking ? art_quick_read_barrier_mark_reg05 : nullptr; - qpoints->pReadBarrierMarkReg06 = is_marking ? art_quick_read_barrier_mark_reg06 : nullptr; - qpoints->pReadBarrierMarkReg07 = is_marking ? art_quick_read_barrier_mark_reg07 : nullptr; - qpoints->pReadBarrierMarkReg08 = is_marking ? art_quick_read_barrier_mark_reg08 : nullptr; - qpoints->pReadBarrierMarkReg09 = is_marking ? art_quick_read_barrier_mark_reg09 : nullptr; - qpoints->pReadBarrierMarkReg10 = is_marking ? art_quick_read_barrier_mark_reg10 : nullptr; - qpoints->pReadBarrierMarkReg11 = is_marking ? art_quick_read_barrier_mark_reg11 : nullptr; +void UpdateReadBarrierEntrypoints(QuickEntryPoints* qpoints, bool is_active) { + qpoints->pReadBarrierMarkReg00 = is_active ? art_quick_read_barrier_mark_reg00 : nullptr; + qpoints->pReadBarrierMarkReg01 = is_active ? art_quick_read_barrier_mark_reg01 : nullptr; + qpoints->pReadBarrierMarkReg02 = is_active ? art_quick_read_barrier_mark_reg02 : nullptr; + qpoints->pReadBarrierMarkReg03 = is_active ? art_quick_read_barrier_mark_reg03 : nullptr; + qpoints->pReadBarrierMarkReg04 = is_active ? art_quick_read_barrier_mark_reg04 : nullptr; + qpoints->pReadBarrierMarkReg05 = is_active ? art_quick_read_barrier_mark_reg05 : nullptr; + qpoints->pReadBarrierMarkReg06 = is_active ? art_quick_read_barrier_mark_reg06 : nullptr; + qpoints->pReadBarrierMarkReg07 = is_active ? art_quick_read_barrier_mark_reg07 : nullptr; + qpoints->pReadBarrierMarkReg08 = is_active ? art_quick_read_barrier_mark_reg08 : nullptr; + qpoints->pReadBarrierMarkReg09 = is_active ? art_quick_read_barrier_mark_reg09 : nullptr; + qpoints->pReadBarrierMarkReg10 = is_active ? art_quick_read_barrier_mark_reg10 : nullptr; + qpoints->pReadBarrierMarkReg11 = is_active ? art_quick_read_barrier_mark_reg11 : nullptr; + + // Check that array switch cases are at appropriate offsets from the introspection entrypoint. + // For the alignment check, strip the Thumb mode bit. + DCHECK_ALIGNED(reinterpret_cast<intptr_t>(art_quick_read_barrier_mark_introspection) - 1u, 256u); + intptr_t array_diff = + reinterpret_cast<intptr_t>(art_quick_read_barrier_mark_introspection_arrays) - + reinterpret_cast<intptr_t>(art_quick_read_barrier_mark_introspection); + DCHECK_EQ(BAKER_MARK_INTROSPECTION_ARRAY_SWITCH_OFFSET, array_diff); + // Check that the GC root entrypoint is at appropriate offset from the introspection entrypoint. + intptr_t gc_roots_diff = + reinterpret_cast<intptr_t>(art_quick_read_barrier_mark_introspection_gc_roots) - + reinterpret_cast<intptr_t>(art_quick_read_barrier_mark_introspection); + DCHECK_EQ(BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRYPOINT_OFFSET, gc_roots_diff); + // The register 12, i.e. IP, is reserved, so there is no art_quick_read_barrier_mark_reg12. + // We're using the entry to hold a pointer to the introspection entrypoint instead. + qpoints->pReadBarrierMarkReg12 = is_active ? art_quick_read_barrier_mark_introspection : nullptr; } void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) { @@ -138,7 +159,7 @@ void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) { // Read barrier. qpoints->pReadBarrierJni = ReadBarrierJni; - UpdateReadBarrierEntrypoints(qpoints, /*is_marking*/ false); + UpdateReadBarrierEntrypoints(qpoints, /*is_active*/ false); qpoints->pReadBarrierMarkReg12 = nullptr; // Cannot use register 12 (IP) to pass arguments. qpoints->pReadBarrierMarkReg13 = nullptr; // Cannot use register 13 (SP) to pass arguments. qpoints->pReadBarrierMarkReg14 = nullptr; // Cannot use register 14 (LR) to pass arguments. diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S index 029de4680c..6be7537d61 100644 --- a/runtime/arch/arm/quick_entrypoints_arm.S +++ b/runtime/arch/arm/quick_entrypoints_arm.S @@ -872,7 +872,7 @@ ENTRY art_quick_aput_obj POISON_HEAP_REF r2 str r2, [r3, r1, lsl #2] ldr r3, [r9, #THREAD_CARD_TABLE_OFFSET] - lsr r0, r0, #7 + lsr r0, r0, #CARD_TABLE_CARD_SHIFT strb r3, [r3, r0] blx lr .Ldo_aput_null: @@ -900,7 +900,7 @@ ENTRY art_quick_aput_obj POISON_HEAP_REF r2 str r2, [r3, r1, lsl #2] ldr r3, [r9, #THREAD_CARD_TABLE_OFFSET] - lsr r0, r0, #7 + lsr r0, r0, #CARD_TABLE_CARD_SHIFT strb r3, [r3, r0] blx lr .Lthrow_array_store_exception: @@ -1681,8 +1681,8 @@ END art_quick_instrumentation_entry .extern artDeoptimize ENTRY art_quick_deoptimize SETUP_SAVE_ALL_CALLEE_SAVES_FRAME r0 - mov r0, r9 @ Set up args. - blx artDeoptimize @ artDeoptimize(Thread*) + mov r0, r9 @ pass Thread::Current + blx artDeoptimize @ (Thread*) END art_quick_deoptimize /* @@ -1691,9 +1691,9 @@ END art_quick_deoptimize */ .extern artDeoptimizeFromCompiledCode ENTRY art_quick_deoptimize_from_compiled_code - SETUP_SAVE_EVERYTHING_FRAME r0 - mov r0, r9 @ Set up args. - blx artDeoptimizeFromCompiledCode @ artDeoptimizeFromCompiledCode(Thread*) + SETUP_SAVE_EVERYTHING_FRAME r1 + mov r1, r9 @ pass Thread::Current + blx artDeoptimizeFromCompiledCode @ (DeoptimizationKind, Thread*) END art_quick_deoptimize_from_compiled_code /* @@ -2146,6 +2146,216 @@ READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg09, r9 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg10, r10 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg11, r11 +// Helper macros for Baker CC read barrier mark introspection (BRBMI). +.macro BRBMI_FOR_12_REGISTERS macro_for_register, macro_for_reserved_register + \macro_for_register r0 + \macro_for_register r1 + \macro_for_register r2 + \macro_for_register r3 + \macro_for_reserved_register // R4 is reserved for the entrypoint address. + \macro_for_register r5 + \macro_for_register r6 + \macro_for_register r7 + \macro_for_register r8 + \macro_for_register r9 + \macro_for_register r10 + \macro_for_register r11 +.endm + +.macro BRBMI_FOR_REGISTERS macro_for_register, macro_for_reserved_register + BRBMI_FOR_12_REGISTERS \macro_for_register, \macro_for_reserved_register + \macro_for_reserved_register // IP is reserved. + \macro_for_reserved_register // SP is reserved. + \macro_for_reserved_register // LR is reserved. + \macro_for_reserved_register // PC is reserved. +.endm + +.macro BRBMI_RETURN_SWITCH_CASE reg +.Lmark_introspection_return_switch_case_\reg: + mov \reg, ip + bx lr +.endm + +.macro BRBMI_BAD_RETURN_SWITCH_CASE +.Lmark_introspection_return_switch_case_bad: + BRBMI_BKPT_FILL_4B +.endm + +.macro BRBMI_RETURN_SWITCH_CASE_OFFSET reg + .byte (.Lmark_introspection_return_switch_case_\reg - .Lmark_introspection_return_table) / 2 +.endm + +.macro BRBMI_BAD_RETURN_SWITCH_CASE_OFFSET + .byte (.Lmark_introspection_return_switch_case_bad - .Lmark_introspection_return_table) / 2 +.endm + +#if BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET != BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET +#error "Array and field introspection code sharing requires same LDR offset." +#endif +.macro BRBMI_ARRAY_LOAD index_reg + ldr ip, [ip, \index_reg, lsl #2] // 4 bytes. + b art_quick_read_barrier_mark_introspection // Should be 2 bytes, encoding T2. + .balign 8 // Add padding to 8 bytes. +.endm + +.macro BRBMI_BKPT_FILL_4B + bkpt 0 + bkpt 0 +.endm + +.macro BRBMI_BKPT_FILL_8B + BRBMI_BKPT_FILL_4B + BRBMI_BKPT_FILL_4B +.endm + +.macro BRBMI_SLOW_PATH ldr_offset + push {r0-r3, r7, lr} // Save return address and caller-save registers. + .cfi_adjust_cfa_offset 24 + .cfi_rel_offset r0, 0 + .cfi_rel_offset r1, 4 + .cfi_rel_offset r2, 8 + .cfi_rel_offset r3, 12 + .cfi_rel_offset r7, 16 + .cfi_rel_offset lr, 20 + + mov r0, ip // Pass the reference. + vpush {s0-s15} // save floating-point caller-save registers + .cfi_adjust_cfa_offset 64 + bl artReadBarrierMark // r0 <- artReadBarrierMark(obj) + vpop {s0-s15} // restore floating-point registers + .cfi_adjust_cfa_offset -64 + mov ip, r0 // Move reference to ip in preparation for return switch. + + pop {r0-r3, r7, lr} // Restore registers. + .cfi_adjust_cfa_offset -24 + .cfi_restore r0 + .cfi_restore r1 + .cfi_restore r2 + .cfi_restore r3 + .cfi_restore r7 + .cfi_restore lr + + // Load the half of the instruction that contains Rt. Adjust for the thumb state in LR. + ldrh r4, [lr, #(-1 + \ldr_offset + 2)] + lsr r4, r4, #12 // Extract `ref_reg`. + b .Lmark_introspection_return_switch +.endm + + /* + * Use introspection to load a reference from the same address as the LDR + * instruction in generated code would load (unless loaded by the thunk, + * see below), call ReadBarrier::Mark() with that reference if needed + * and return it in the same register as the LDR instruction would load. + * + * The entrypoint is called through a thunk that differs across load kinds. + * For field and array loads the LDR instruction in generated code follows + * the branch to the thunk, i.e. the LDR is at [LR, #(-4 - 1)] where the -1 + * is an adjustment for the Thumb mode bit in LR, and the thunk knows the + * holder and performs the gray bit check, returning to the LDR instruction + * if the object is not gray, so this entrypoint no longer needs to know + * anything about the holder. For GC root loads, the LDR instruction in + * generated code precedes the branch to the thunk, i.e. the LDR is at + * [LR, #(-8 - 1)] where the -1 is again the Thumb mode bit adjustment, and + * the thunk does not do the gray bit check. + * + * For field accesses and array loads with a constant index the thunk loads + * the reference into IP using introspection and calls the main entrypoint, + * art_quick_read_barrier_mark_introspection. With heap poisoning enabled, + * the passed reference is poisoned. + * + * For array accesses with non-constant index, the thunk inserts the bits + * 0-5 of the LDR instruction to the entrypoint address, effectively + * calculating a switch case label based on the index register (bits 0-3) + * and adding an extra offset (bits 4-5 hold the shift which is always 2 + * for reference loads) to differentiate from the main entrypoint, then + * moves the base register to IP and jumps to the switch case. Therefore + * we need to align the main entrypoint to 512 bytes, accounting for + * a 256-byte offset followed by 16 array entrypoints starting at + * art_quick_read_barrier_mark_introspection_arrays, each containing an LDR + * (register) and a branch to the main entrypoint. + * + * For GC root accesses we cannot use the main entrypoint because of the + * different offset where the LDR instruction in generated code is located. + * (And even with heap poisoning enabled, GC roots are not poisoned.) + * To re-use the same entrypoint pointer in generated code, we make sure + * that the gc root entrypoint (a copy of the entrypoint with a different + * offset for introspection loads) is located at a known offset (128 bytes, + * or BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRYPOINT_OFFSET) from the main + * entrypoint and the GC root thunk adjusts the entrypoint pointer, moves + * the root register to IP and jumps to the customized entrypoint, + * art_quick_read_barrier_mark_introspection_gc_roots. The thunk also + * performs all the fast-path checks, so we need just the slow path. + * + * The code structure is + * art_quick_read_barrier_mark_introspection: + * Over 128 bytes for the main entrypoint code. + * Padding to 192 bytes if needed. + * art_quick_read_barrier_mark_introspection_gc_roots: + * GC root entrypoint code. + * Padding to 256 bytes if needed. + * art_quick_read_barrier_mark_introspection_arrays: + * Exactly 128 bytes for array load switch cases (16x2 instructions). + */ + .balign 512 +ENTRY art_quick_read_barrier_mark_introspection + // At this point, IP contains the reference, R4 can be freely used. + // (R4 is reserved for the entrypoint address.) + // For heap poisoning, the reference is poisoned, so unpoison it first. + UNPOISON_HEAP_REF ip + // If reference is null, just return it in the right register. + cmp ip, #0 + beq .Lmark_introspection_return + // Use R4 as temp and check the mark bit of the reference. + ldr r4, [ip, #MIRROR_OBJECT_LOCK_WORD_OFFSET] + tst r4, #LOCK_WORD_MARK_BIT_MASK_SHIFTED + beq .Lmark_introspection_unmarked +.Lmark_introspection_return: + // Load the half of the instruction that contains Rt. Adjust for the thumb state in LR. + ldrh r4, [lr, #(-1 + BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET + 2)] + lsr r4, r4, #12 // Extract `ref_reg`. +.Lmark_introspection_return_switch: + tbb [pc, r4] // Jump to the switch case. +.Lmark_introspection_return_table: + BRBMI_FOR_REGISTERS BRBMI_RETURN_SWITCH_CASE_OFFSET, BRBMI_BAD_RETURN_SWITCH_CASE_OFFSET + .balign 16 + BRBMI_FOR_12_REGISTERS BRBMI_RETURN_SWITCH_CASE, BRBMI_BAD_RETURN_SWITCH_CASE + + .balign 16 +.Lmark_introspection_unmarked: + // Check if the top two bits are one, if this is the case it is a forwarding address. +#if (LOCK_WORD_STATE_SHIFT != 30) || (LOCK_WORD_STATE_FORWARDING_ADDRESS != 3) + // To use "CMP ip, #modified-immediate; BHS", we need the lock word state in + // the highest bits and the "forwarding address" state to have all bits set. +#error "Unexpected lock word state shift or forwarding address state value." +#endif + cmp r4, #(LOCK_WORD_STATE_FORWARDING_ADDRESS << LOCK_WORD_STATE_SHIFT) + bhs .Lmark_introspection_forwarding_address + BRBMI_SLOW_PATH BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET + + .balign 8 +.Lmark_introspection_forwarding_address: + // Shift left by the forwarding address shift. This clears out the state bits since they are + // in the top 2 bits of the lock word. + lsl ip, r4, #LOCK_WORD_STATE_FORWARDING_ADDRESS_SHIFT + b .Lmark_introspection_return + + .balign 64 + .thumb_func + .type art_quick_read_barrier_mark_introspection_gc_roots, #function + .hidden art_quick_read_barrier_mark_introspection_gc_roots + .global art_quick_read_barrier_mark_introspection_gc_roots +art_quick_read_barrier_mark_introspection_gc_roots: + BRBMI_SLOW_PATH BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET + + .balign 256 + .thumb_func + .type art_quick_read_barrier_mark_introspection_arrays, #function + .hidden art_quick_read_barrier_mark_introspection_arrays + .global art_quick_read_barrier_mark_introspection_arrays +art_quick_read_barrier_mark_introspection_arrays: + BRBMI_FOR_REGISTERS BRBMI_ARRAY_LOAD, BRBMI_BKPT_FILL_8B +END art_quick_read_barrier_mark_introspection + .extern artInvokePolymorphic ENTRY art_quick_invoke_polymorphic SETUP_SAVE_REFS_AND_ARGS_FRAME r2 diff --git a/runtime/arch/arm64/entrypoints_init_arm64.cc b/runtime/arch/arm64/entrypoints_init_arm64.cc index bc7bcb1739..610cdee683 100644 --- a/runtime/arch/arm64/entrypoints_init_arm64.cc +++ b/runtime/arch/arm64/entrypoints_init_arm64.cc @@ -75,7 +75,7 @@ extern "C" mirror::Object* art_quick_read_barrier_mark_introspection(mirror::Obj extern "C" mirror::Object* art_quick_read_barrier_mark_introspection_arrays(mirror::Object*); extern "C" mirror::Object* art_quick_read_barrier_mark_introspection_gc_roots(mirror::Object*); -void UpdateReadBarrierEntrypoints(QuickEntryPoints* qpoints, bool is_marking) { +void UpdateReadBarrierEntrypoints(QuickEntryPoints* qpoints, bool is_active) { // ARM64 is the architecture with the largest number of core // registers (32) that supports the read barrier configuration. // Because registers 30 (LR) and 31 (SP/XZR) cannot be used to pass @@ -85,35 +85,35 @@ void UpdateReadBarrierEntrypoints(QuickEntryPoints* qpoints, bool is_marking) { // have less core registers (resp. 16, 8 and 16). (We may have to // revise that design choice if read barrier support is added for // MIPS and/or MIPS64.) - qpoints->pReadBarrierMarkReg00 = is_marking ? art_quick_read_barrier_mark_reg00 : nullptr; - qpoints->pReadBarrierMarkReg01 = is_marking ? art_quick_read_barrier_mark_reg01 : nullptr; - qpoints->pReadBarrierMarkReg02 = is_marking ? art_quick_read_barrier_mark_reg02 : nullptr; - qpoints->pReadBarrierMarkReg03 = is_marking ? art_quick_read_barrier_mark_reg03 : nullptr; - qpoints->pReadBarrierMarkReg04 = is_marking ? art_quick_read_barrier_mark_reg04 : nullptr; - qpoints->pReadBarrierMarkReg05 = is_marking ? art_quick_read_barrier_mark_reg05 : nullptr; - qpoints->pReadBarrierMarkReg06 = is_marking ? art_quick_read_barrier_mark_reg06 : nullptr; - qpoints->pReadBarrierMarkReg07 = is_marking ? art_quick_read_barrier_mark_reg07 : nullptr; - qpoints->pReadBarrierMarkReg08 = is_marking ? art_quick_read_barrier_mark_reg08 : nullptr; - qpoints->pReadBarrierMarkReg09 = is_marking ? art_quick_read_barrier_mark_reg09 : nullptr; - qpoints->pReadBarrierMarkReg10 = is_marking ? art_quick_read_barrier_mark_reg10 : nullptr; - qpoints->pReadBarrierMarkReg11 = is_marking ? art_quick_read_barrier_mark_reg11 : nullptr; - qpoints->pReadBarrierMarkReg12 = is_marking ? art_quick_read_barrier_mark_reg12 : nullptr; - qpoints->pReadBarrierMarkReg13 = is_marking ? art_quick_read_barrier_mark_reg13 : nullptr; - qpoints->pReadBarrierMarkReg14 = is_marking ? art_quick_read_barrier_mark_reg14 : nullptr; - qpoints->pReadBarrierMarkReg15 = is_marking ? art_quick_read_barrier_mark_reg15 : nullptr; - qpoints->pReadBarrierMarkReg17 = is_marking ? art_quick_read_barrier_mark_reg17 : nullptr; - qpoints->pReadBarrierMarkReg18 = is_marking ? art_quick_read_barrier_mark_reg18 : nullptr; - qpoints->pReadBarrierMarkReg19 = is_marking ? art_quick_read_barrier_mark_reg19 : nullptr; - qpoints->pReadBarrierMarkReg20 = is_marking ? art_quick_read_barrier_mark_reg20 : nullptr; - qpoints->pReadBarrierMarkReg21 = is_marking ? art_quick_read_barrier_mark_reg21 : nullptr; - qpoints->pReadBarrierMarkReg22 = is_marking ? art_quick_read_barrier_mark_reg22 : nullptr; - qpoints->pReadBarrierMarkReg23 = is_marking ? art_quick_read_barrier_mark_reg23 : nullptr; - qpoints->pReadBarrierMarkReg24 = is_marking ? art_quick_read_barrier_mark_reg24 : nullptr; - qpoints->pReadBarrierMarkReg25 = is_marking ? art_quick_read_barrier_mark_reg25 : nullptr; - qpoints->pReadBarrierMarkReg26 = is_marking ? art_quick_read_barrier_mark_reg26 : nullptr; - qpoints->pReadBarrierMarkReg27 = is_marking ? art_quick_read_barrier_mark_reg27 : nullptr; - qpoints->pReadBarrierMarkReg28 = is_marking ? art_quick_read_barrier_mark_reg28 : nullptr; - qpoints->pReadBarrierMarkReg29 = is_marking ? art_quick_read_barrier_mark_reg29 : nullptr; + qpoints->pReadBarrierMarkReg00 = is_active ? art_quick_read_barrier_mark_reg00 : nullptr; + qpoints->pReadBarrierMarkReg01 = is_active ? art_quick_read_barrier_mark_reg01 : nullptr; + qpoints->pReadBarrierMarkReg02 = is_active ? art_quick_read_barrier_mark_reg02 : nullptr; + qpoints->pReadBarrierMarkReg03 = is_active ? art_quick_read_barrier_mark_reg03 : nullptr; + qpoints->pReadBarrierMarkReg04 = is_active ? art_quick_read_barrier_mark_reg04 : nullptr; + qpoints->pReadBarrierMarkReg05 = is_active ? art_quick_read_barrier_mark_reg05 : nullptr; + qpoints->pReadBarrierMarkReg06 = is_active ? art_quick_read_barrier_mark_reg06 : nullptr; + qpoints->pReadBarrierMarkReg07 = is_active ? art_quick_read_barrier_mark_reg07 : nullptr; + qpoints->pReadBarrierMarkReg08 = is_active ? art_quick_read_barrier_mark_reg08 : nullptr; + qpoints->pReadBarrierMarkReg09 = is_active ? art_quick_read_barrier_mark_reg09 : nullptr; + qpoints->pReadBarrierMarkReg10 = is_active ? art_quick_read_barrier_mark_reg10 : nullptr; + qpoints->pReadBarrierMarkReg11 = is_active ? art_quick_read_barrier_mark_reg11 : nullptr; + qpoints->pReadBarrierMarkReg12 = is_active ? art_quick_read_barrier_mark_reg12 : nullptr; + qpoints->pReadBarrierMarkReg13 = is_active ? art_quick_read_barrier_mark_reg13 : nullptr; + qpoints->pReadBarrierMarkReg14 = is_active ? art_quick_read_barrier_mark_reg14 : nullptr; + qpoints->pReadBarrierMarkReg15 = is_active ? art_quick_read_barrier_mark_reg15 : nullptr; + qpoints->pReadBarrierMarkReg17 = is_active ? art_quick_read_barrier_mark_reg17 : nullptr; + qpoints->pReadBarrierMarkReg18 = is_active ? art_quick_read_barrier_mark_reg18 : nullptr; + qpoints->pReadBarrierMarkReg19 = is_active ? art_quick_read_barrier_mark_reg19 : nullptr; + qpoints->pReadBarrierMarkReg20 = is_active ? art_quick_read_barrier_mark_reg20 : nullptr; + qpoints->pReadBarrierMarkReg21 = is_active ? art_quick_read_barrier_mark_reg21 : nullptr; + qpoints->pReadBarrierMarkReg22 = is_active ? art_quick_read_barrier_mark_reg22 : nullptr; + qpoints->pReadBarrierMarkReg23 = is_active ? art_quick_read_barrier_mark_reg23 : nullptr; + qpoints->pReadBarrierMarkReg24 = is_active ? art_quick_read_barrier_mark_reg24 : nullptr; + qpoints->pReadBarrierMarkReg25 = is_active ? art_quick_read_barrier_mark_reg25 : nullptr; + qpoints->pReadBarrierMarkReg26 = is_active ? art_quick_read_barrier_mark_reg26 : nullptr; + qpoints->pReadBarrierMarkReg27 = is_active ? art_quick_read_barrier_mark_reg27 : nullptr; + qpoints->pReadBarrierMarkReg28 = is_active ? art_quick_read_barrier_mark_reg28 : nullptr; + qpoints->pReadBarrierMarkReg29 = is_active ? art_quick_read_barrier_mark_reg29 : nullptr; // Check that array switch cases are at appropriate offsets from the introspection entrypoint. DCHECK_ALIGNED(art_quick_read_barrier_mark_introspection, 512u); @@ -128,7 +128,7 @@ void UpdateReadBarrierEntrypoints(QuickEntryPoints* qpoints, bool is_marking) { DCHECK_EQ(BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRYPOINT_OFFSET, gc_roots_diff); // The register 16, i.e. IP0, is reserved, so there is no art_quick_read_barrier_mark_reg16. // We're using the entry to hold a pointer to the introspection entrypoint instead. - qpoints->pReadBarrierMarkReg16 = is_marking ? art_quick_read_barrier_mark_introspection : nullptr; + qpoints->pReadBarrierMarkReg16 = is_active ? art_quick_read_barrier_mark_introspection : nullptr; } void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) { @@ -188,7 +188,7 @@ void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) { // Read barrier. qpoints->pReadBarrierJni = ReadBarrierJni; qpoints->pReadBarrierMarkReg16 = nullptr; // IP0 is used as a temp by the asm stub. - UpdateReadBarrierEntrypoints(qpoints, /*is_marking*/ false); + UpdateReadBarrierEntrypoints(qpoints, /*is_active*/ false); qpoints->pReadBarrierSlow = artReadBarrierSlow; qpoints->pReadBarrierForRootSlow = artReadBarrierForRootSlow; }; diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S index d043962b96..18015b572e 100644 --- a/runtime/arch/arm64/quick_entrypoints_arm64.S +++ b/runtime/arch/arm64/quick_entrypoints_arm64.S @@ -1416,7 +1416,7 @@ ENTRY art_quick_aput_obj POISON_HEAP_REF w2 str w2, [x3, x1, lsl #2] // Heap reference = 32b ldr x3, [xSELF, #THREAD_CARD_TABLE_OFFSET] - lsr x0, x0, #7 + lsr x0, x0, #CARD_TABLE_CARD_SHIFT strb w3, [x3, x0] ret .Ldo_aput_null: @@ -1447,7 +1447,7 @@ ENTRY art_quick_aput_obj POISON_HEAP_REF w2 str w2, [x3, x1, lsl #2] // Heap reference = 32b ldr x3, [xSELF, #THREAD_CARD_TABLE_OFFSET] - lsr x0, x0, #7 + lsr x0, x0, #CARD_TABLE_CARD_SHIFT strb w3, [x3, x0] ret .cfi_restore_state // Reset unwind info so following code unwinds. @@ -2219,7 +2219,7 @@ END art_quick_instrumentation_exit ENTRY art_quick_deoptimize SETUP_SAVE_ALL_CALLEE_SAVES_FRAME mov x0, xSELF // Pass thread. - bl artDeoptimize // artDeoptimize(Thread*) + bl artDeoptimize // (Thread*) brk 0 END art_quick_deoptimize @@ -2230,8 +2230,8 @@ END art_quick_deoptimize .extern artDeoptimizeFromCompiledCode ENTRY art_quick_deoptimize_from_compiled_code SETUP_SAVE_EVERYTHING_FRAME - mov x0, xSELF // Pass thread. - bl artDeoptimizeFromCompiledCode // artDeoptimizeFromCompiledCode(Thread*) + mov x1, xSELF // Pass thread. + bl artDeoptimizeFromCompiledCode // (DeoptimizationKind, Thread*) brk 0 END art_quick_deoptimize_from_compiled_code diff --git a/runtime/arch/context-inl.h b/runtime/arch/context-inl.h new file mode 100644 index 0000000000..ddcbbb18e5 --- /dev/null +++ b/runtime/arch/context-inl.h @@ -0,0 +1,55 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// This file is special-purpose for cases where you want a stack context. Most users should use +// Context::Create(). + +#include "context.h" + +#ifndef ART_RUNTIME_ARCH_CONTEXT_INL_H_ +#define ART_RUNTIME_ARCH_CONTEXT_INL_H_ + +#if defined(__arm__) +#include "arm/context_arm.h" +#define RUNTIME_CONTEXT_TYPE arm::ArmContext +#elif defined(__aarch64__) +#include "arm64/context_arm64.h" +#define RUNTIME_CONTEXT_TYPE arm64::Arm64Context +#elif defined(__mips__) && !defined(__LP64__) +#include "mips/context_mips.h" +#define RUNTIME_CONTEXT_TYPE mips::MipsContext +#elif defined(__mips__) && defined(__LP64__) +#include "mips64/context_mips64.h" +#define RUNTIME_CONTEXT_TYPE mips64::Mips64Context +#elif defined(__i386__) +#include "x86/context_x86.h" +#define RUNTIME_CONTEXT_TYPE x86::X86Context +#elif defined(__x86_64__) +#include "x86_64/context_x86_64.h" +#define RUNTIME_CONTEXT_TYPE x86_64::X86_64Context +#else +#error unimplemented +#endif + +namespace art { + +using RuntimeContextType = RUNTIME_CONTEXT_TYPE; + +} // namespace art + +#undef RUNTIME_CONTEXT_TYPE + +#endif // ART_RUNTIME_ARCH_CONTEXT_INL_H_ diff --git a/runtime/arch/context.cc b/runtime/arch/context.cc index bf40a3f8ce..82d8b6ca00 100644 --- a/runtime/arch/context.cc +++ b/runtime/arch/context.cc @@ -14,43 +14,12 @@ * limitations under the License. */ -#include "context.h" - -#if defined(__arm__) -#include "arm/context_arm.h" -#elif defined(__aarch64__) -#include "arm64/context_arm64.h" -#elif defined(__mips__) && !defined(__LP64__) -#include "mips/context_mips.h" -#elif defined(__mips__) && defined(__LP64__) -#include "mips64/context_mips64.h" -#elif defined(__i386__) -#include "x86/context_x86.h" -#elif defined(__x86_64__) -#include "x86_64/context_x86_64.h" -#else -#include "base/logging.h" -#endif +#include "context-inl.h" namespace art { Context* Context::Create() { -#if defined(__arm__) - return new arm::ArmContext(); -#elif defined(__aarch64__) - return new arm64::Arm64Context(); -#elif defined(__mips__) && !defined(__LP64__) - return new mips::MipsContext(); -#elif defined(__mips__) && defined(__LP64__) - return new mips64::Mips64Context(); -#elif defined(__i386__) - return new x86::X86Context(); -#elif defined(__x86_64__) - return new x86_64::X86_64Context(); -#else - UNIMPLEMENTED(FATAL); - return nullptr; -#endif + return new RuntimeContextType; } } // namespace art diff --git a/runtime/arch/mips/entrypoints_init_mips.cc b/runtime/arch/mips/entrypoints_init_mips.cc index 434e33c42a..9978da5f74 100644 --- a/runtime/arch/mips/entrypoints_init_mips.cc +++ b/runtime/arch/mips/entrypoints_init_mips.cc @@ -86,68 +86,68 @@ extern "C" double fmod(double a, double b); // REM_DOUBLE[_2ADDR] extern "C" int64_t __divdi3(int64_t, int64_t); extern "C" int64_t __moddi3(int64_t, int64_t); -void UpdateReadBarrierEntrypoints(QuickEntryPoints* qpoints, bool is_marking) { - qpoints->pReadBarrierMarkReg01 = is_marking ? art_quick_read_barrier_mark_reg01 : nullptr; +void UpdateReadBarrierEntrypoints(QuickEntryPoints* qpoints, bool is_active) { + qpoints->pReadBarrierMarkReg01 = is_active ? art_quick_read_barrier_mark_reg01 : nullptr; static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg01), "Non-direct C stub marked direct."); - qpoints->pReadBarrierMarkReg02 = is_marking ? art_quick_read_barrier_mark_reg02 : nullptr; + qpoints->pReadBarrierMarkReg02 = is_active ? art_quick_read_barrier_mark_reg02 : nullptr; static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg02), "Non-direct C stub marked direct."); - qpoints->pReadBarrierMarkReg03 = is_marking ? art_quick_read_barrier_mark_reg03 : nullptr; + qpoints->pReadBarrierMarkReg03 = is_active ? art_quick_read_barrier_mark_reg03 : nullptr; static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg03), "Non-direct C stub marked direct."); - qpoints->pReadBarrierMarkReg04 = is_marking ? art_quick_read_barrier_mark_reg04 : nullptr; + qpoints->pReadBarrierMarkReg04 = is_active ? art_quick_read_barrier_mark_reg04 : nullptr; static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg04), "Non-direct C stub marked direct."); - qpoints->pReadBarrierMarkReg05 = is_marking ? art_quick_read_barrier_mark_reg05 : nullptr; + qpoints->pReadBarrierMarkReg05 = is_active ? art_quick_read_barrier_mark_reg05 : nullptr; static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg05), "Non-direct C stub marked direct."); - qpoints->pReadBarrierMarkReg06 = is_marking ? art_quick_read_barrier_mark_reg06 : nullptr; + qpoints->pReadBarrierMarkReg06 = is_active ? art_quick_read_barrier_mark_reg06 : nullptr; static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg06), "Non-direct C stub marked direct."); - qpoints->pReadBarrierMarkReg07 = is_marking ? art_quick_read_barrier_mark_reg07 : nullptr; + qpoints->pReadBarrierMarkReg07 = is_active ? art_quick_read_barrier_mark_reg07 : nullptr; static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg07), "Non-direct C stub marked direct."); - qpoints->pReadBarrierMarkReg08 = is_marking ? art_quick_read_barrier_mark_reg08 : nullptr; + qpoints->pReadBarrierMarkReg08 = is_active ? art_quick_read_barrier_mark_reg08 : nullptr; static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg08), "Non-direct C stub marked direct."); - qpoints->pReadBarrierMarkReg09 = is_marking ? art_quick_read_barrier_mark_reg09 : nullptr; + qpoints->pReadBarrierMarkReg09 = is_active ? art_quick_read_barrier_mark_reg09 : nullptr; static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg09), "Non-direct C stub marked direct."); - qpoints->pReadBarrierMarkReg10 = is_marking ? art_quick_read_barrier_mark_reg10 : nullptr; + qpoints->pReadBarrierMarkReg10 = is_active ? art_quick_read_barrier_mark_reg10 : nullptr; static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg10), "Non-direct C stub marked direct."); - qpoints->pReadBarrierMarkReg11 = is_marking ? art_quick_read_barrier_mark_reg11 : nullptr; + qpoints->pReadBarrierMarkReg11 = is_active ? art_quick_read_barrier_mark_reg11 : nullptr; static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg11), "Non-direct C stub marked direct."); - qpoints->pReadBarrierMarkReg12 = is_marking ? art_quick_read_barrier_mark_reg12 : nullptr; + qpoints->pReadBarrierMarkReg12 = is_active ? art_quick_read_barrier_mark_reg12 : nullptr; static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg12), "Non-direct C stub marked direct."); - qpoints->pReadBarrierMarkReg13 = is_marking ? art_quick_read_barrier_mark_reg13 : nullptr; + qpoints->pReadBarrierMarkReg13 = is_active ? art_quick_read_barrier_mark_reg13 : nullptr; static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg13), "Non-direct C stub marked direct."); - qpoints->pReadBarrierMarkReg14 = is_marking ? art_quick_read_barrier_mark_reg14 : nullptr; + qpoints->pReadBarrierMarkReg14 = is_active ? art_quick_read_barrier_mark_reg14 : nullptr; static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg14), "Non-direct C stub marked direct."); - qpoints->pReadBarrierMarkReg17 = is_marking ? art_quick_read_barrier_mark_reg17 : nullptr; + qpoints->pReadBarrierMarkReg17 = is_active ? art_quick_read_barrier_mark_reg17 : nullptr; static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg17), "Non-direct C stub marked direct."); - qpoints->pReadBarrierMarkReg18 = is_marking ? art_quick_read_barrier_mark_reg18 : nullptr; + qpoints->pReadBarrierMarkReg18 = is_active ? art_quick_read_barrier_mark_reg18 : nullptr; static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg18), "Non-direct C stub marked direct."); - qpoints->pReadBarrierMarkReg19 = is_marking ? art_quick_read_barrier_mark_reg19 : nullptr; + qpoints->pReadBarrierMarkReg19 = is_active ? art_quick_read_barrier_mark_reg19 : nullptr; static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg19), "Non-direct C stub marked direct."); - qpoints->pReadBarrierMarkReg20 = is_marking ? art_quick_read_barrier_mark_reg20 : nullptr; + qpoints->pReadBarrierMarkReg20 = is_active ? art_quick_read_barrier_mark_reg20 : nullptr; static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg20), "Non-direct C stub marked direct."); - qpoints->pReadBarrierMarkReg21 = is_marking ? art_quick_read_barrier_mark_reg21 : nullptr; + qpoints->pReadBarrierMarkReg21 = is_active ? art_quick_read_barrier_mark_reg21 : nullptr; static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg21), "Non-direct C stub marked direct."); - qpoints->pReadBarrierMarkReg22 = is_marking ? art_quick_read_barrier_mark_reg22 : nullptr; + qpoints->pReadBarrierMarkReg22 = is_active ? art_quick_read_barrier_mark_reg22 : nullptr; static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg22), "Non-direct C stub marked direct."); - qpoints->pReadBarrierMarkReg29 = is_marking ? art_quick_read_barrier_mark_reg29 : nullptr; + qpoints->pReadBarrierMarkReg29 = is_active ? art_quick_read_barrier_mark_reg29 : nullptr; static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg29), "Non-direct C stub marked direct."); } @@ -160,7 +160,7 @@ void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) { jpoints->pDlsymLookup = art_jni_dlsym_lookup_stub; // Alloc - ResetQuickAllocEntryPoints(qpoints, /*is_marking*/ false); + ResetQuickAllocEntryPoints(qpoints, /*is_active*/ false); // Cast qpoints->pInstanceofNonTrivial = artInstanceOfFromCode; @@ -412,7 +412,7 @@ void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) { // Read barrier. qpoints->pReadBarrierJni = ReadBarrierJni; static_assert(IsDirectEntrypoint(kQuickReadBarrierJni), "Direct C stub not marked direct."); - UpdateReadBarrierEntrypoints(qpoints, /*is_marking*/ false); + UpdateReadBarrierEntrypoints(qpoints, /*is_active*/ false); // Cannot use the following registers to pass arguments: // 0(ZERO), 1(AT), 16(S0), 17(S1), 24(T8), 25(T9), 26(K0), 27(K1), 28(GP), 29(SP), 31(RA). // Note that there are 30 entry points only: 00 for register 1(AT), ..., 29 for register 30(S8). diff --git a/runtime/arch/mips/quick_entrypoints_mips.S b/runtime/arch/mips/quick_entrypoints_mips.S index 722a67908f..e628a9f40d 100644 --- a/runtime/arch/mips/quick_entrypoints_mips.S +++ b/runtime/arch/mips/quick_entrypoints_mips.S @@ -421,7 +421,7 @@ SETUP_SAVE_EVERYTHING_FRAME_DECREMENTED_SP .endm -.macro RESTORE_SAVE_EVERYTHING_FRAME +.macro RESTORE_SAVE_EVERYTHING_FRAME restore_a0=1 addiu $sp, $sp, ARG_SLOT_SIZE # remove argument slots on the stack .cfi_adjust_cfa_offset -ARG_SLOT_SIZE @@ -490,8 +490,10 @@ .cfi_restore 6 lw $a1, 160($sp) .cfi_restore 5 + .if \restore_a0 lw $a0, 156($sp) .cfi_restore 4 + .endif lw $v1, 152($sp) .cfi_restore 3 lw $v0, 148($sp) @@ -507,16 +509,26 @@ .endm /* - * Macro that set calls through to artDeliverPendingExceptionFromCode, where the pending - * exception is Thread::Current()->exception_ + * Macro that calls through to artDeliverPendingExceptionFromCode, where the pending + * exception is Thread::Current()->exception_ when the runtime method frame is ready. + * Requires $gp properly set up. */ -.macro DELIVER_PENDING_EXCEPTION - SETUP_SAVE_ALL_CALLEE_SAVES_FRAME # save callee saves for throw +.macro DELIVER_PENDING_EXCEPTION_FRAME_READY la $t9, artDeliverPendingExceptionFromCode jalr $zero, $t9 # artDeliverPendingExceptionFromCode(Thread*) move $a0, rSELF # pass Thread::Current .endm + /* + * Macro that calls through to artDeliverPendingExceptionFromCode, where the pending + * exception is Thread::Current()->exception_. + * Requires $gp properly set up. + */ +.macro DELIVER_PENDING_EXCEPTION + SETUP_SAVE_ALL_CALLEE_SAVES_FRAME # save callee saves for throw + DELIVER_PENDING_EXCEPTION_FRAME_READY +.endm + .macro RETURN_IF_NO_EXCEPTION lw $t0, THREAD_EXCEPTION_OFFSET(rSELF) # load Thread::Current()->exception_ RESTORE_SAVE_REFS_ONLY_FRAME @@ -1406,7 +1418,7 @@ ENTRY art_quick_aput_obj POISON_HEAP_REF $a2 sw $a2, MIRROR_OBJECT_ARRAY_DATA_OFFSET($t0) lw $t0, THREAD_CARD_TABLE_OFFSET(rSELF) - srl $t1, $a0, 7 + srl $t1, $a0, CARD_TABLE_CARD_SHIFT add $t1, $t1, $t0 sb $t0, ($t1) jalr $zero, $ra @@ -1660,30 +1672,51 @@ ART_QUICK_ALLOC_OBJECT_ROSALLOC art_quick_alloc_object_initialized_rosalloc, art GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB) +// Macro for string and type resolution and initialization. +// $a0 is both input and output. +.macro ONE_ARG_SAVE_EVERYTHING_DOWNCALL name, entrypoint + .extern \entrypoint +ENTRY_NO_GP \name + SETUP_SAVE_EVERYTHING_FRAME # Save everything in case of GC. + move $s2, $gp # Preserve $gp across the call for exception delivery. + la $t9, \entrypoint + jalr $t9 # (uint32_t index, Thread*) + move $a1, rSELF # Pass Thread::Current (in delay slot). + beqz $v0, 1f # Success? + move $a0, $v0 # Move result to $a0 (in delay slot). + RESTORE_SAVE_EVERYTHING_FRAME 0 # Restore everything except $a0. + jalr $zero, $ra # Return on success. + nop +1: + move $gp, $s2 + DELIVER_PENDING_EXCEPTION_FRAME_READY +END \name +.endm + /* * Entry from managed code to resolve a string, this stub will allocate a String and deliver an * exception on error. On success the String is returned. A0 holds the string index. The fast * path check for hit in strings cache has already been performed. */ -ONE_ARG_DOWNCALL art_quick_resolve_string, artResolveStringFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER +ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_string, artResolveStringFromCode /* * Entry from managed code when uninitialized static storage, this stub will run the class * initializer and deliver the exception on error. On success the static storage base is * returned. */ -ONE_ARG_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER +ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode /* * Entry from managed code when dex cache misses for a type_idx. */ -ONE_ARG_DOWNCALL art_quick_initialize_type, artInitializeTypeFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER +ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_initialize_type, artInitializeTypeFromCode /* * Entry from managed code when type_idx needs to be checked for access and dex cache may also * miss. */ -ONE_ARG_DOWNCALL art_quick_initialize_type_and_verify_access, artInitializeTypeAndVerifyAccessFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER +ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_initialize_type_and_verify_access, artInitializeTypeAndVerifyAccessFromCode /* * Called by managed code when the value in rSUSPEND has been decremented to 0. @@ -1854,7 +1887,8 @@ ENTRY art_quick_generic_jni_trampoline nop 2: - lw $sp, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF) + lw $sp, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF) + move $gp, $s3 # restore $gp from $s3 # This will create a new save-all frame, required by the runtime. DELIVER_PENDING_EXCEPTION END art_quick_generic_jni_trampoline @@ -1944,8 +1978,7 @@ END art_quick_instrumentation_exit ENTRY art_quick_deoptimize SETUP_SAVE_ALL_CALLEE_SAVES_FRAME la $t9, artDeoptimize - jalr $t9 # artDeoptimize(Thread*) - # Returns caller method's frame size. + jalr $t9 # (Thread*) move $a0, rSELF # pass Thread::current END art_quick_deoptimize @@ -1957,9 +1990,8 @@ END art_quick_deoptimize ENTRY art_quick_deoptimize_from_compiled_code SETUP_SAVE_EVERYTHING_FRAME la $t9, artDeoptimizeFromCompiledCode - jalr $t9 # artDeoptimizeFromCompiledCode(Thread*) - # Returns caller method's frame size. - move $a0, rSELF # pass Thread::current + jalr $t9 # (DeoptimizationKind, Thread*) + move $a1, rSELF # pass Thread::current END art_quick_deoptimize_from_compiled_code /* @@ -2213,8 +2245,32 @@ END art_quick_string_compareto */ .macro READ_BARRIER_MARK_REG name, reg ENTRY \name - /* TODO: optimizations: mark bit, forwarding. */ - addiu $sp, $sp, -160 # includes 16 bytes of space for argument registers a0-a3 + // Null check so that we can load the lock word. + bnez \reg, .Lnot_null_\name + nop +.Lret_rb_\name: + jalr $zero, $ra + nop +.Lnot_null_\name: + // Check lock word for mark bit, if marked return. + lw $t9, MIRROR_OBJECT_LOCK_WORD_OFFSET(\reg) + .set push + .set noat + sll $at, $t9, 31 - LOCK_WORD_MARK_BIT_SHIFT # Move mark bit to sign bit. + bltz $at, .Lret_rb_\name +#if (LOCK_WORD_STATE_SHIFT != 30) || (LOCK_WORD_STATE_FORWARDING_ADDRESS != 3) + // The below code depends on the lock word state being in the highest bits + // and the "forwarding address" state having all bits set. +#error "Unexpected lock word state shift or forwarding address state value." +#endif + // Test that both the forwarding state bits are 1. + sll $at, $t9, 1 + and $at, $at, $t9 # Sign bit = 1 IFF both bits are 1. + bltz $at, .Lret_forwarding_address\name + nop + .set pop + + addiu $sp, $sp, -160 # Includes 16 bytes of space for argument registers a0-a3. .cfi_adjust_cfa_offset 160 sw $ra, 156($sp) @@ -2319,6 +2375,12 @@ ENTRY \name jalr $zero, $ra addiu $sp, $sp, 160 .cfi_adjust_cfa_offset -160 + +.Lret_forwarding_address\name: + jalr $zero, $ra + // Shift left by the forwarding address shift. This clears out the state bits since they are + // in the top 2 bits of the lock word. + sll \reg, $t9, LOCK_WORD_STATE_FORWARDING_ADDRESS_SHIFT END \name .endm diff --git a/runtime/arch/mips64/entrypoints_init_mips64.cc b/runtime/arch/mips64/entrypoints_init_mips64.cc index f8242ae1b5..763d93eb47 100644 --- a/runtime/arch/mips64/entrypoints_init_mips64.cc +++ b/runtime/arch/mips64/entrypoints_init_mips64.cc @@ -86,27 +86,27 @@ extern "C" int64_t __divdi3(int64_t, int64_t); extern "C" int64_t __moddi3(int64_t, int64_t); // No read barrier entrypoints for marking registers. -void UpdateReadBarrierEntrypoints(QuickEntryPoints* qpoints, bool is_marking) { - qpoints->pReadBarrierMarkReg01 = is_marking ? art_quick_read_barrier_mark_reg01 : nullptr; - qpoints->pReadBarrierMarkReg02 = is_marking ? art_quick_read_barrier_mark_reg02 : nullptr; - qpoints->pReadBarrierMarkReg03 = is_marking ? art_quick_read_barrier_mark_reg03 : nullptr; - qpoints->pReadBarrierMarkReg04 = is_marking ? art_quick_read_barrier_mark_reg04 : nullptr; - qpoints->pReadBarrierMarkReg05 = is_marking ? art_quick_read_barrier_mark_reg05 : nullptr; - qpoints->pReadBarrierMarkReg06 = is_marking ? art_quick_read_barrier_mark_reg06 : nullptr; - qpoints->pReadBarrierMarkReg07 = is_marking ? art_quick_read_barrier_mark_reg07 : nullptr; - qpoints->pReadBarrierMarkReg08 = is_marking ? art_quick_read_barrier_mark_reg08 : nullptr; - qpoints->pReadBarrierMarkReg09 = is_marking ? art_quick_read_barrier_mark_reg09 : nullptr; - qpoints->pReadBarrierMarkReg10 = is_marking ? art_quick_read_barrier_mark_reg10 : nullptr; - qpoints->pReadBarrierMarkReg11 = is_marking ? art_quick_read_barrier_mark_reg11 : nullptr; - qpoints->pReadBarrierMarkReg12 = is_marking ? art_quick_read_barrier_mark_reg12 : nullptr; - qpoints->pReadBarrierMarkReg13 = is_marking ? art_quick_read_barrier_mark_reg13 : nullptr; - qpoints->pReadBarrierMarkReg17 = is_marking ? art_quick_read_barrier_mark_reg17 : nullptr; - qpoints->pReadBarrierMarkReg18 = is_marking ? art_quick_read_barrier_mark_reg18 : nullptr; - qpoints->pReadBarrierMarkReg19 = is_marking ? art_quick_read_barrier_mark_reg19 : nullptr; - qpoints->pReadBarrierMarkReg20 = is_marking ? art_quick_read_barrier_mark_reg20 : nullptr; - qpoints->pReadBarrierMarkReg21 = is_marking ? art_quick_read_barrier_mark_reg21 : nullptr; - qpoints->pReadBarrierMarkReg22 = is_marking ? art_quick_read_barrier_mark_reg22 : nullptr; - qpoints->pReadBarrierMarkReg29 = is_marking ? art_quick_read_barrier_mark_reg29 : nullptr; +void UpdateReadBarrierEntrypoints(QuickEntryPoints* qpoints, bool is_active) { + qpoints->pReadBarrierMarkReg01 = is_active ? art_quick_read_barrier_mark_reg01 : nullptr; + qpoints->pReadBarrierMarkReg02 = is_active ? art_quick_read_barrier_mark_reg02 : nullptr; + qpoints->pReadBarrierMarkReg03 = is_active ? art_quick_read_barrier_mark_reg03 : nullptr; + qpoints->pReadBarrierMarkReg04 = is_active ? art_quick_read_barrier_mark_reg04 : nullptr; + qpoints->pReadBarrierMarkReg05 = is_active ? art_quick_read_barrier_mark_reg05 : nullptr; + qpoints->pReadBarrierMarkReg06 = is_active ? art_quick_read_barrier_mark_reg06 : nullptr; + qpoints->pReadBarrierMarkReg07 = is_active ? art_quick_read_barrier_mark_reg07 : nullptr; + qpoints->pReadBarrierMarkReg08 = is_active ? art_quick_read_barrier_mark_reg08 : nullptr; + qpoints->pReadBarrierMarkReg09 = is_active ? art_quick_read_barrier_mark_reg09 : nullptr; + qpoints->pReadBarrierMarkReg10 = is_active ? art_quick_read_barrier_mark_reg10 : nullptr; + qpoints->pReadBarrierMarkReg11 = is_active ? art_quick_read_barrier_mark_reg11 : nullptr; + qpoints->pReadBarrierMarkReg12 = is_active ? art_quick_read_barrier_mark_reg12 : nullptr; + qpoints->pReadBarrierMarkReg13 = is_active ? art_quick_read_barrier_mark_reg13 : nullptr; + qpoints->pReadBarrierMarkReg17 = is_active ? art_quick_read_barrier_mark_reg17 : nullptr; + qpoints->pReadBarrierMarkReg18 = is_active ? art_quick_read_barrier_mark_reg18 : nullptr; + qpoints->pReadBarrierMarkReg19 = is_active ? art_quick_read_barrier_mark_reg19 : nullptr; + qpoints->pReadBarrierMarkReg20 = is_active ? art_quick_read_barrier_mark_reg20 : nullptr; + qpoints->pReadBarrierMarkReg21 = is_active ? art_quick_read_barrier_mark_reg21 : nullptr; + qpoints->pReadBarrierMarkReg22 = is_active ? art_quick_read_barrier_mark_reg22 : nullptr; + qpoints->pReadBarrierMarkReg29 = is_active ? art_quick_read_barrier_mark_reg29 : nullptr; } void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) { @@ -168,7 +168,7 @@ void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) { // Read barrier. qpoints->pReadBarrierJni = ReadBarrierJni; - UpdateReadBarrierEntrypoints(qpoints, /*is_marking*/ false); + UpdateReadBarrierEntrypoints(qpoints, /*is_active*/ false); // Cannot use the following registers to pass arguments: // 0(ZERO), 1(AT), 15(T3), 16(S0), 17(S1), 24(T8), 25(T9), 26(K0), 27(K1), 28(GP), 29(SP), 31(RA). // Note that there are 30 entry points only: 00 for register 1(AT), ..., 29 for register 30(S8). diff --git a/runtime/arch/mips64/quick_entrypoints_mips64.S b/runtime/arch/mips64/quick_entrypoints_mips64.S index 9402232996..40bad16291 100644 --- a/runtime/arch/mips64/quick_entrypoints_mips64.S +++ b/runtime/arch/mips64/quick_entrypoints_mips64.S @@ -447,7 +447,7 @@ SETUP_SAVE_EVERYTHING_FRAME_DECREMENTED_SP .endm -.macro RESTORE_SAVE_EVERYTHING_FRAME +.macro RESTORE_SAVE_EVERYTHING_FRAME restore_a0=1 // Restore FP registers. l.d $f31, 264($sp) l.d $f30, 256($sp) @@ -530,8 +530,10 @@ .cfi_restore 6 ld $a1, 304($sp) .cfi_restore 5 + .if \restore_a0 ld $a0, 296($sp) .cfi_restore 4 + .endif ld $v1, 288($sp) .cfi_restore 3 ld $v0, 280($sp) @@ -547,18 +549,26 @@ .endm /* - * Macro that set calls through to artDeliverPendingExceptionFromCode, - * where the pending - * exception is Thread::Current()->exception_ + * Macro that calls through to artDeliverPendingExceptionFromCode, where the pending + * exception is Thread::Current()->exception_ when the runtime method frame is ready. + * Requires $gp properly set up. */ -.macro DELIVER_PENDING_EXCEPTION - SETUP_GP - SETUP_SAVE_ALL_CALLEE_SAVES_FRAME # save callee saves for throw +.macro DELIVER_PENDING_EXCEPTION_FRAME_READY dla $t9, artDeliverPendingExceptionFromCode jalr $zero, $t9 # artDeliverPendingExceptionFromCode(Thread*) move $a0, rSELF # pass Thread::Current .endm + /* + * Macro that calls through to artDeliverPendingExceptionFromCode, where the pending + * exception is Thread::Current()->exception_. + */ +.macro DELIVER_PENDING_EXCEPTION + SETUP_GP + SETUP_SAVE_ALL_CALLEE_SAVES_FRAME # save callee saves for throw + DELIVER_PENDING_EXCEPTION_FRAME_READY +.endm + .macro RETURN_IF_NO_EXCEPTION ld $t0, THREAD_EXCEPTION_OFFSET(rSELF) # load Thread::Current()->exception_ RESTORE_SAVE_REFS_ONLY_FRAME @@ -1374,7 +1384,7 @@ ENTRY art_quick_aput_obj POISON_HEAP_REF $a2 sw $a2, MIRROR_OBJECT_ARRAY_DATA_OFFSET($t0) ld $t0, THREAD_CARD_TABLE_OFFSET(rSELF) - dsrl $t1, $a0, 7 + dsrl $t1, $a0, CARD_TABLE_CARD_SHIFT daddu $t1, $t1, $t0 sb $t0, ($t1) jalr $zero, $ra @@ -1615,30 +1625,48 @@ ART_QUICK_ALLOC_OBJECT_ROSALLOC art_quick_alloc_object_initialized_rosalloc, art GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB) GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB) +// Macro for string and type resolution and initialization. +// $a0 is both input and output. +.macro ONE_ARG_SAVE_EVERYTHING_DOWNCALL name, entrypoint + .extern \entrypoint +ENTRY_NO_GP \name + SETUP_SAVE_EVERYTHING_FRAME # Save everything in case of GC. + dla $t9, \entrypoint + jalr $t9 # (uint32_t index, Thread*) + move $a1, rSELF # Pass Thread::Current (in delay slot). + beqz $v0, 1f # Success? + move $a0, $v0 # Move result to $a0 (in delay slot). + RESTORE_SAVE_EVERYTHING_FRAME 0 # Restore everything except $a0. + jic $ra, 0 # Return on success. +1: + DELIVER_PENDING_EXCEPTION_FRAME_READY +END \name +.endm + /* * Entry from managed code to resolve a string, this stub will allocate a String and deliver an * exception on error. On success the String is returned. A0 holds the string index. The fast * path check for hit in strings cache has already been performed. */ -ONE_ARG_DOWNCALL art_quick_resolve_string, artResolveStringFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER +ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_string, artResolveStringFromCode /* * Entry from managed code when uninitialized static storage, this stub will run the class * initializer and deliver the exception on error. On success the static storage base is * returned. */ -ONE_ARG_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER +ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode /* * Entry from managed code when dex cache misses for a type_idx. */ -ONE_ARG_DOWNCALL art_quick_initialize_type, artInitializeTypeFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER +ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_initialize_type, artInitializeTypeFromCode /* * Entry from managed code when type_idx needs to be checked for access and dex cache may also * miss. */ -ONE_ARG_DOWNCALL art_quick_initialize_type_and_verify_access, artInitializeTypeAndVerifyAccessFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER +ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_initialize_type_and_verify_access, artInitializeTypeAndVerifyAccessFromCode /* * Called by managed code when the value in rSUSPEND has been decremented to 0. @@ -1885,8 +1913,7 @@ END art_quick_instrumentation_exit .extern artEnterInterpreterFromDeoptimize ENTRY art_quick_deoptimize SETUP_SAVE_ALL_CALLEE_SAVES_FRAME - jal artDeoptimize # artDeoptimize(Thread*, SP) - # Returns caller method's frame size. + jal artDeoptimize # artDeoptimize(Thread*) move $a0, rSELF # pass Thread::current END art_quick_deoptimize @@ -1897,9 +1924,8 @@ END art_quick_deoptimize .extern artDeoptimizeFromCompiledCode ENTRY_NO_GP art_quick_deoptimize_from_compiled_code SETUP_SAVE_EVERYTHING_FRAME - jal artDeoptimizeFromCompiledCode # artDeoptimizeFromCompiledCode(Thread*, SP) - # Returns caller method's frame size. - move $a0, rSELF # pass Thread::current + jal artDeoptimizeFromCompiledCode # (DeoptimizationKind, Thread*) + move $a1, rSELF # pass Thread::current END art_quick_deoptimize_from_compiled_code .set push @@ -2067,7 +2093,29 @@ END art_quick_indexof */ .macro READ_BARRIER_MARK_REG name, reg ENTRY \name - /* TODO: optimizations: mark bit, forwarding. */ + // Null check so that we can load the lock word. + bnezc \reg, .Lnot_null_\name + nop +.Lret_rb_\name: + jic $ra, 0 +.Lnot_null_\name: + // Check lock word for mark bit, if marked return. + lw $t9, MIRROR_OBJECT_LOCK_WORD_OFFSET(\reg) + .set push + .set noat + sll $at, $t9, 31 - LOCK_WORD_MARK_BIT_SHIFT # Move mark bit to sign bit. + bltzc $at, .Lret_rb_\name +#if (LOCK_WORD_STATE_SHIFT != 30) || (LOCK_WORD_STATE_FORWARDING_ADDRESS != 3) + // The below code depends on the lock word state being in the highest bits + // and the "forwarding address" state having all bits set. +#error "Unexpected lock word state shift or forwarding address state value." +#endif + // Test that both the forwarding state bits are 1. + sll $at, $t9, 1 + and $at, $at, $t9 # Sign bit = 1 IFF both bits are 1. + bltzc $at, .Lret_forwarding_address\name + .set pop + daddiu $sp, $sp, -320 .cfi_adjust_cfa_offset 320 @@ -2202,6 +2250,13 @@ ENTRY \name jalr $zero, $ra daddiu $sp, $sp, 320 .cfi_adjust_cfa_offset -320 + +.Lret_forwarding_address\name: + // Shift left by the forwarding address shift. This clears out the state bits since they are + // in the top 2 bits of the lock word. + sll \reg, $t9, LOCK_WORD_STATE_FORWARDING_ADDRESS_SHIFT + jalr $zero, $ra + dext \reg, \reg, 0, 32 # Make sure the address is zero-extended. END \name .endm diff --git a/runtime/arch/x86/entrypoints_init_x86.cc b/runtime/arch/x86/entrypoints_init_x86.cc index 9cd4a3ee3b..102faf19d4 100644 --- a/runtime/arch/x86/entrypoints_init_x86.cc +++ b/runtime/arch/x86/entrypoints_init_x86.cc @@ -44,14 +44,14 @@ extern "C" mirror::Object* art_quick_read_barrier_mark_reg07(mirror::Object*); extern "C" mirror::Object* art_quick_read_barrier_slow(mirror::Object*, mirror::Object*, uint32_t); extern "C" mirror::Object* art_quick_read_barrier_for_root_slow(GcRoot<mirror::Object>*); -void UpdateReadBarrierEntrypoints(QuickEntryPoints* qpoints, bool is_marking) { - qpoints->pReadBarrierMarkReg00 = is_marking ? art_quick_read_barrier_mark_reg00 : nullptr; - qpoints->pReadBarrierMarkReg01 = is_marking ? art_quick_read_barrier_mark_reg01 : nullptr; - qpoints->pReadBarrierMarkReg02 = is_marking ? art_quick_read_barrier_mark_reg02 : nullptr; - qpoints->pReadBarrierMarkReg03 = is_marking ? art_quick_read_barrier_mark_reg03 : nullptr; - qpoints->pReadBarrierMarkReg05 = is_marking ? art_quick_read_barrier_mark_reg05 : nullptr; - qpoints->pReadBarrierMarkReg06 = is_marking ? art_quick_read_barrier_mark_reg06 : nullptr; - qpoints->pReadBarrierMarkReg07 = is_marking ? art_quick_read_barrier_mark_reg07 : nullptr; +void UpdateReadBarrierEntrypoints(QuickEntryPoints* qpoints, bool is_active) { + qpoints->pReadBarrierMarkReg00 = is_active ? art_quick_read_barrier_mark_reg00 : nullptr; + qpoints->pReadBarrierMarkReg01 = is_active ? art_quick_read_barrier_mark_reg01 : nullptr; + qpoints->pReadBarrierMarkReg02 = is_active ? art_quick_read_barrier_mark_reg02 : nullptr; + qpoints->pReadBarrierMarkReg03 = is_active ? art_quick_read_barrier_mark_reg03 : nullptr; + qpoints->pReadBarrierMarkReg05 = is_active ? art_quick_read_barrier_mark_reg05 : nullptr; + qpoints->pReadBarrierMarkReg06 = is_active ? art_quick_read_barrier_mark_reg06 : nullptr; + qpoints->pReadBarrierMarkReg07 = is_active ? art_quick_read_barrier_mark_reg07 : nullptr; } void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) { @@ -97,7 +97,7 @@ void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) { // Read barrier. qpoints->pReadBarrierJni = ReadBarrierJni; - UpdateReadBarrierEntrypoints(qpoints, /*is_marking*/ false); + UpdateReadBarrierEntrypoints(qpoints, /*is_active*/ false); qpoints->pReadBarrierMarkReg04 = nullptr; // Cannot use register 4 (ESP) to pass arguments. // x86 has only 8 core registers. qpoints->pReadBarrierMarkReg08 = nullptr; diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S index 6c0bcc9d88..2222f5cc3b 100644 --- a/runtime/arch/x86/quick_entrypoints_x86.S +++ b/runtime/arch/x86/quick_entrypoints_x86.S @@ -1526,7 +1526,7 @@ DEFINE_FUNCTION art_quick_aput_obj POISON_HEAP_REF edx movl %edx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%eax, %ecx, 4) movl %fs:THREAD_CARD_TABLE_OFFSET, %edx - shrl LITERAL(7), %eax + shrl LITERAL(CARD_TABLE_CARD_SHIFT), %eax movb %dl, (%edx, %eax) ret .Ldo_aput_null: @@ -1567,7 +1567,7 @@ DEFINE_FUNCTION art_quick_aput_obj POISON_HEAP_REF edx movl %edx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%eax, %ecx, 4) // do the aput movl %fs:THREAD_CARD_TABLE_OFFSET, %edx - shrl LITERAL(7), %eax + shrl LITERAL(CARD_TABLE_CARD_SHIFT), %eax movb %dl, (%edx, %eax) ret CFI_ADJUST_CFA_OFFSET(12) // 3 POP after the jz for unwinding. @@ -2028,7 +2028,7 @@ DEFINE_FUNCTION art_quick_deoptimize CFI_ADJUST_CFA_OFFSET(12) pushl %fs:THREAD_SELF_OFFSET // Pass Thread::Current(). CFI_ADJUST_CFA_OFFSET(4) - call SYMBOL(artDeoptimize) // artDeoptimize(Thread*) + call SYMBOL(artDeoptimize) // (Thread*) UNREACHABLE END_FUNCTION art_quick_deoptimize @@ -2038,11 +2038,12 @@ END_FUNCTION art_quick_deoptimize */ DEFINE_FUNCTION art_quick_deoptimize_from_compiled_code SETUP_SAVE_EVERYTHING_FRAME ebx, ebx - subl LITERAL(12), %esp // Align stack. - CFI_ADJUST_CFA_OFFSET(12) + subl LITERAL(8), %esp // Align stack. + CFI_ADJUST_CFA_OFFSET(8) pushl %fs:THREAD_SELF_OFFSET // Pass Thread::Current(). CFI_ADJUST_CFA_OFFSET(4) - call SYMBOL(artDeoptimizeFromCompiledCode) // artDeoptimizeFromCompiledCode(Thread*) + PUSH eax + call SYMBOL(artDeoptimizeFromCompiledCode) // (DeoptimizationKind, Thread*) UNREACHABLE END_FUNCTION art_quick_deoptimize_from_compiled_code diff --git a/runtime/arch/x86_64/entrypoints_init_x86_64.cc b/runtime/arch/x86_64/entrypoints_init_x86_64.cc index a326b4eebc..1e56e8a087 100644 --- a/runtime/arch/x86_64/entrypoints_init_x86_64.cc +++ b/runtime/arch/x86_64/entrypoints_init_x86_64.cc @@ -55,22 +55,22 @@ extern "C" mirror::Object* art_quick_read_barrier_mark_reg15(mirror::Object*); extern "C" mirror::Object* art_quick_read_barrier_slow(mirror::Object*, mirror::Object*, uint32_t); extern "C" mirror::Object* art_quick_read_barrier_for_root_slow(GcRoot<mirror::Object>*); -void UpdateReadBarrierEntrypoints(QuickEntryPoints* qpoints, bool is_marking) { - qpoints->pReadBarrierMarkReg00 = is_marking ? art_quick_read_barrier_mark_reg00 : nullptr; - qpoints->pReadBarrierMarkReg01 = is_marking ? art_quick_read_barrier_mark_reg01 : nullptr; - qpoints->pReadBarrierMarkReg02 = is_marking ? art_quick_read_barrier_mark_reg02 : nullptr; - qpoints->pReadBarrierMarkReg03 = is_marking ? art_quick_read_barrier_mark_reg03 : nullptr; - qpoints->pReadBarrierMarkReg05 = is_marking ? art_quick_read_barrier_mark_reg05 : nullptr; - qpoints->pReadBarrierMarkReg06 = is_marking ? art_quick_read_barrier_mark_reg06 : nullptr; - qpoints->pReadBarrierMarkReg07 = is_marking ? art_quick_read_barrier_mark_reg07 : nullptr; - qpoints->pReadBarrierMarkReg08 = is_marking ? art_quick_read_barrier_mark_reg08 : nullptr; - qpoints->pReadBarrierMarkReg09 = is_marking ? art_quick_read_barrier_mark_reg09 : nullptr; - qpoints->pReadBarrierMarkReg10 = is_marking ? art_quick_read_barrier_mark_reg10 : nullptr; - qpoints->pReadBarrierMarkReg11 = is_marking ? art_quick_read_barrier_mark_reg11 : nullptr; - qpoints->pReadBarrierMarkReg12 = is_marking ? art_quick_read_barrier_mark_reg12 : nullptr; - qpoints->pReadBarrierMarkReg13 = is_marking ? art_quick_read_barrier_mark_reg13 : nullptr; - qpoints->pReadBarrierMarkReg14 = is_marking ? art_quick_read_barrier_mark_reg14 : nullptr; - qpoints->pReadBarrierMarkReg15 = is_marking ? art_quick_read_barrier_mark_reg15 : nullptr; +void UpdateReadBarrierEntrypoints(QuickEntryPoints* qpoints, bool is_active) { + qpoints->pReadBarrierMarkReg00 = is_active ? art_quick_read_barrier_mark_reg00 : nullptr; + qpoints->pReadBarrierMarkReg01 = is_active ? art_quick_read_barrier_mark_reg01 : nullptr; + qpoints->pReadBarrierMarkReg02 = is_active ? art_quick_read_barrier_mark_reg02 : nullptr; + qpoints->pReadBarrierMarkReg03 = is_active ? art_quick_read_barrier_mark_reg03 : nullptr; + qpoints->pReadBarrierMarkReg05 = is_active ? art_quick_read_barrier_mark_reg05 : nullptr; + qpoints->pReadBarrierMarkReg06 = is_active ? art_quick_read_barrier_mark_reg06 : nullptr; + qpoints->pReadBarrierMarkReg07 = is_active ? art_quick_read_barrier_mark_reg07 : nullptr; + qpoints->pReadBarrierMarkReg08 = is_active ? art_quick_read_barrier_mark_reg08 : nullptr; + qpoints->pReadBarrierMarkReg09 = is_active ? art_quick_read_barrier_mark_reg09 : nullptr; + qpoints->pReadBarrierMarkReg10 = is_active ? art_quick_read_barrier_mark_reg10 : nullptr; + qpoints->pReadBarrierMarkReg11 = is_active ? art_quick_read_barrier_mark_reg11 : nullptr; + qpoints->pReadBarrierMarkReg12 = is_active ? art_quick_read_barrier_mark_reg12 : nullptr; + qpoints->pReadBarrierMarkReg13 = is_active ? art_quick_read_barrier_mark_reg13 : nullptr; + qpoints->pReadBarrierMarkReg14 = is_active ? art_quick_read_barrier_mark_reg14 : nullptr; + qpoints->pReadBarrierMarkReg15 = is_active ? art_quick_read_barrier_mark_reg15 : nullptr; } void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) { @@ -119,7 +119,7 @@ void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) { // Read barrier. qpoints->pReadBarrierJni = ReadBarrierJni; - UpdateReadBarrierEntrypoints(qpoints, /*is_marking*/ false); + UpdateReadBarrierEntrypoints(qpoints, /*is_active*/ false); qpoints->pReadBarrierMarkReg04 = nullptr; // Cannot use register 4 (RSP) to pass arguments. // x86-64 has only 16 core registers. qpoints->pReadBarrierMarkReg16 = nullptr; diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S index 8e2acab3eb..41651d8f1a 100644 --- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S +++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S @@ -1504,8 +1504,8 @@ DEFINE_FUNCTION art_quick_aput_obj movl %edx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%edi, %esi, 4) // movq %rdx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%rdi, %rsi, 4) movq %gs:THREAD_CARD_TABLE_OFFSET, %rdx - shrl LITERAL(7), %edi -// shrl LITERAL(7), %rdi + shrl LITERAL(CARD_TABLE_CARD_SHIFT), %edi +// shrl LITERAL(CARD_TABLE_CARD_SHIFT), %rdi movb %dl, (%rdx, %rdi) // Note: this assumes that top 32b of %rdi are zero ret .Ldo_aput_null: @@ -1545,8 +1545,8 @@ DEFINE_FUNCTION art_quick_aput_obj movl %edx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%edi, %esi, 4) // movq %rdx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%rdi, %rsi, 4) movq %gs:THREAD_CARD_TABLE_OFFSET, %rdx - shrl LITERAL(7), %edi -// shrl LITERAL(7), %rdi + shrl LITERAL(CARD_TABLE_CARD_SHIFT), %edi +// shrl LITERAL(CARD_TABLE_CARD_SHIFT), %rdi movb %dl, (%rdx, %rdi) // Note: this assumes that top 32b of %rdi are zero // movb %dl, (%rdx, %rdi) ret @@ -1983,7 +1983,7 @@ DEFINE_FUNCTION art_quick_deoptimize SETUP_SAVE_ALL_CALLEE_SAVES_FRAME // Stack should be aligned now. movq %gs:THREAD_SELF_OFFSET, %rdi // Pass Thread. - call SYMBOL(artDeoptimize) // artDeoptimize(Thread*) + call SYMBOL(artDeoptimize) // (Thread*) UNREACHABLE END_FUNCTION art_quick_deoptimize @@ -1994,8 +1994,8 @@ END_FUNCTION art_quick_deoptimize DEFINE_FUNCTION art_quick_deoptimize_from_compiled_code SETUP_SAVE_EVERYTHING_FRAME // Stack should be aligned now. - movq %gs:THREAD_SELF_OFFSET, %rdi // Pass Thread. - call SYMBOL(artDeoptimizeFromCompiledCode) // artDeoptimizeFromCompiledCode(Thread*) + movq %gs:THREAD_SELF_OFFSET, %rsi // Pass Thread. + call SYMBOL(artDeoptimizeFromCompiledCode) // (DeoptimizationKind, Thread*) UNREACHABLE END_FUNCTION art_quick_deoptimize_from_compiled_code diff --git a/runtime/base/arena_allocator.cc b/runtime/base/arena_allocator.cc index 935fd81115..136ed12362 100644 --- a/runtime/base/arena_allocator.cc +++ b/runtime/base/arena_allocator.cc @@ -33,6 +33,7 @@ constexpr size_t Arena::kDefaultSize; template <bool kCount> const char* const ArenaAllocatorStatsImpl<kCount>::kAllocNames[] = { + // Every name should have the same width and end with a space. Abbreviate if necessary: "Misc ", "SwitchTbl ", "SlowPaths ", @@ -49,6 +50,7 @@ const char* const ArenaAllocatorStatsImpl<kCount>::kAllocNames[] = { "Successors ", "Dominated ", "Instruction ", + "CtorFenceIns ", "InvokeInputs ", "PhiInputs ", "LoopInfo ", diff --git a/runtime/base/arena_allocator.h b/runtime/base/arena_allocator.h index c39429ce06..60b6ea8d7a 100644 --- a/runtime/base/arena_allocator.h +++ b/runtime/base/arena_allocator.h @@ -59,6 +59,7 @@ enum ArenaAllocKind { kArenaAllocSuccessors, kArenaAllocDominated, kArenaAllocInstruction, + kArenaAllocConstructorFenceInputs, kArenaAllocInvokeInputs, kArenaAllocPhiInputs, kArenaAllocLoopInfo, diff --git a/runtime/base/casts.h b/runtime/base/casts.h index 6b67864b08..c5b0af665b 100644 --- a/runtime/base/casts.h +++ b/runtime/base/casts.h @@ -98,7 +98,9 @@ inline Dest dchecked_integral_cast(const Source source) { // Check that the value is within the upper limit of Dest. (static_cast<uintmax_t>(std::numeric_limits<Dest>::max()) >= static_cast<uintmax_t>(std::numeric_limits<Source>::max()) || - source <= static_cast<Source>(std::numeric_limits<Dest>::max()))); + source <= static_cast<Source>(std::numeric_limits<Dest>::max()))) + << "dchecked_integral_cast failed for " << source + << " (would be " << static_cast<Dest>(source) << ")"; return static_cast<Dest>(source); } diff --git a/runtime/base/mutex.h b/runtime/base/mutex.h index 2414b5f937..03ae63a068 100644 --- a/runtime/base/mutex.h +++ b/runtime/base/mutex.h @@ -373,19 +373,19 @@ class SHARED_LOCKABLE ReaderWriterMutex : public BaseMutex { bool IsSharedHeld(const Thread* self) const; // Assert the current thread has shared access to the ReaderWriterMutex. - void AssertSharedHeld(const Thread* self) ASSERT_SHARED_CAPABILITY(this) { + ALWAYS_INLINE void AssertSharedHeld(const Thread* self) ASSERT_SHARED_CAPABILITY(this) { if (kDebugLocking && (gAborting == 0)) { // TODO: we can only assert this well when self != null. CHECK(IsSharedHeld(self) || self == nullptr) << *this; } } - void AssertReaderHeld(const Thread* self) ASSERT_SHARED_CAPABILITY(this) { + ALWAYS_INLINE void AssertReaderHeld(const Thread* self) ASSERT_SHARED_CAPABILITY(this) { AssertSharedHeld(self); } // Assert the current thread doesn't hold this ReaderWriterMutex either in shared or exclusive // mode. - void AssertNotHeld(const Thread* self) ASSERT_SHARED_CAPABILITY(!this) { + ALWAYS_INLINE void AssertNotHeld(const Thread* self) ASSERT_SHARED_CAPABILITY(!this) { if (kDebugLocking && (gAborting == 0)) { CHECK(!IsSharedHeld(self)) << *this; } diff --git a/runtime/base/scoped_flock.cc b/runtime/base/scoped_flock.cc index 5394e53fa3..862f0d0b00 100644 --- a/runtime/base/scoped_flock.cc +++ b/runtime/base/scoped_flock.cc @@ -33,11 +33,22 @@ bool ScopedFlock::Init(const char* filename, std::string* error_msg) { } bool ScopedFlock::Init(const char* filename, int flags, bool block, std::string* error_msg) { + return Init(filename, flags, block, true, error_msg); +} + +bool ScopedFlock::Init(const char* filename, + int flags, + bool block, + bool flush_on_close, + std::string* error_msg) { + flush_on_close_ = flush_on_close; while (true) { if (file_.get() != nullptr) { UNUSED(file_->FlushCloseOrErase()); // Ignore result. } - file_.reset(OS::OpenFileWithFlags(filename, flags)); + + bool check_usage = flush_on_close; // Check usage only if we need to flush on close. + file_.reset(OS::OpenFileWithFlags(filename, flags, check_usage)); if (file_.get() == nullptr) { *error_msg = StringPrintf("Failed to open file '%s': %s", filename, strerror(errno)); return false; @@ -86,6 +97,7 @@ bool ScopedFlock::Init(const char* filename, int flags, bool block, std::string* } bool ScopedFlock::Init(File* file, std::string* error_msg) { + flush_on_close_ = true; file_.reset(new File(dup(file->Fd()), file->GetPath(), file->CheckUsage(), file->ReadOnlyMode())); if (file_->Fd() == -1) { file_.reset(); @@ -111,17 +123,21 @@ bool ScopedFlock::HasFile() { return file_.get() != nullptr; } -ScopedFlock::ScopedFlock() { } +ScopedFlock::ScopedFlock() : flush_on_close_(true) { } ScopedFlock::~ScopedFlock() { if (file_.get() != nullptr) { int flock_result = TEMP_FAILURE_RETRY(flock(file_->Fd(), LOCK_UN)); if (flock_result != 0) { - PLOG(FATAL) << "Unable to unlock file " << file_->GetPath(); - UNREACHABLE(); + // Only printing a warning is okay since this is only used with either: + // 1) a non-blocking Init call, or + // 2) as a part of a seperate binary (eg dex2oat) which has it's own timeout logic to prevent + // deadlocks. + // This means we can be sure that the warning won't cause a deadlock. + PLOG(WARNING) << "Unable to unlock file " << file_->GetPath(); } int close_result = -1; - if (file_->ReadOnlyMode()) { + if (file_->ReadOnlyMode() || !flush_on_close_) { close_result = file_->Close(); } else { close_result = file_->FlushCloseOrErase(); diff --git a/runtime/base/scoped_flock.h b/runtime/base/scoped_flock.h index cc22056443..a3a320f4cc 100644 --- a/runtime/base/scoped_flock.h +++ b/runtime/base/scoped_flock.h @@ -25,6 +25,15 @@ namespace art { +// A scoped file-lock implemented using flock. The file is locked by calling the Init function and +// is released during destruction. Note that failing to unlock the file only causes a warning to be +// printed. Users should take care that this does not cause potential deadlocks. +// +// Only printing a warning on unlock failure is okay since this is only used with either: +// 1) a non-blocking Init call, or +// 2) as a part of a seperate binary (eg dex2oat) which has it's own timeout logic to prevent +// deadlocks. +// This means we can be sure that the warning won't cause a deadlock. class ScopedFlock { public: ScopedFlock(); @@ -38,7 +47,16 @@ class ScopedFlock { // locking will be retried if the file changed. In non-blocking mode, false // is returned and no attempt is made to re-acquire the lock. // + // The argument `flush_on_close` controls whether or not the file + // will be explicitly flushed before close. + // // The file is opened with the provided flags. + bool Init(const char* filename, + int flags, + bool block, + bool flush_on_close, + std::string* error_msg); + // Calls Init(filename, flags, block, true, error_msg); bool Init(const char* filename, int flags, bool block, std::string* error_msg); // Calls Init(filename, O_CREAT | O_RDWR, true, errror_msg) bool Init(const char* filename, std::string* error_msg); @@ -57,6 +75,7 @@ class ScopedFlock { private: std::unique_ptr<File> file_; + bool flush_on_close_; DISALLOW_COPY_AND_ASSIGN(ScopedFlock); }; diff --git a/runtime/base/stl_util.h b/runtime/base/stl_util.h index d5f375a5d9..cfe27f3811 100644 --- a/runtime/base/stl_util.h +++ b/runtime/base/stl_util.h @@ -194,6 +194,17 @@ static inline void MergeSets(std::set<T>& to_update, const std::set<T>& other) { to_update.insert(other.begin(), other.end()); } +// Returns a copy of the passed vector that doesn't memory-own its entries. +template <typename T> +static inline std::vector<T*> MakeNonOwningPointerVector(const std::vector<std::unique_ptr<T>>& src) { + std::vector<T*> result; + result.reserve(src.size()); + for (const std::unique_ptr<T>& t : src) { + result.push_back(t.get()); + } + return result; +} + } // namespace art #endif // ART_RUNTIME_BASE_STL_UTIL_H_ diff --git a/runtime/common_runtime_test.h b/runtime/common_runtime_test.h index 56e8aa3685..a29cc6cd38 100644 --- a/runtime/common_runtime_test.h +++ b/runtime/common_runtime_test.h @@ -249,6 +249,12 @@ class CheckJniAbortCatcher { return; \ } +#define TEST_DISABLED_FOR_MEMORY_TOOL_ASAN() \ + if (RUNNING_ON_MEMORY_TOOL > 0 && !kMemoryToolIsValgrind) { \ + printf("WARNING: TEST DISABLED FOR MEMORY TOOL ASAN\n"); \ + return; \ + } + } // namespace art namespace std { diff --git a/runtime/compiler_filter.cc b/runtime/compiler_filter.cc index dc55ab8931..4847f38489 100644 --- a/runtime/compiler_filter.cc +++ b/runtime/compiler_filter.cc @@ -20,17 +20,15 @@ namespace art { -bool CompilerFilter::IsBytecodeCompilationEnabled(Filter filter) { +bool CompilerFilter::IsAotCompilationEnabled(Filter filter) { switch (filter) { - case CompilerFilter::kVerifyNone: - case CompilerFilter::kVerifyAtRuntime: - case CompilerFilter::kVerifyProfile: - case CompilerFilter::kInterpretOnly: return false; + case CompilerFilter::kAssumeVerified: + case CompilerFilter::kExtract: + case CompilerFilter::kVerify: + case CompilerFilter::kQuicken: return false; case CompilerFilter::kSpaceProfile: case CompilerFilter::kSpace: - case CompilerFilter::kBalanced: - case CompilerFilter::kTime: case CompilerFilter::kSpeedProfile: case CompilerFilter::kSpeed: case CompilerFilter::kEverythingProfile: @@ -41,15 +39,13 @@ bool CompilerFilter::IsBytecodeCompilationEnabled(Filter filter) { bool CompilerFilter::IsJniCompilationEnabled(Filter filter) { switch (filter) { - case CompilerFilter::kVerifyNone: - case CompilerFilter::kVerifyAtRuntime: return false; + case CompilerFilter::kAssumeVerified: + case CompilerFilter::kExtract: + case CompilerFilter::kVerify: return false; - case CompilerFilter::kVerifyProfile: - case CompilerFilter::kInterpretOnly: + case CompilerFilter::kQuicken: case CompilerFilter::kSpaceProfile: case CompilerFilter::kSpace: - case CompilerFilter::kBalanced: - case CompilerFilter::kTime: case CompilerFilter::kSpeedProfile: case CompilerFilter::kSpeed: case CompilerFilter::kEverythingProfile: @@ -58,17 +54,15 @@ bool CompilerFilter::IsJniCompilationEnabled(Filter filter) { UNREACHABLE(); } -bool CompilerFilter::IsAnyMethodCompilationEnabled(Filter filter) { +bool CompilerFilter::IsQuickeningCompilationEnabled(Filter filter) { switch (filter) { - case CompilerFilter::kVerifyNone: - case CompilerFilter::kVerifyAtRuntime: - case CompilerFilter::kVerifyProfile: return false; + case CompilerFilter::kAssumeVerified: + case CompilerFilter::kExtract: + case CompilerFilter::kVerify: return false; - case CompilerFilter::kInterpretOnly: + case CompilerFilter::kQuicken: case CompilerFilter::kSpaceProfile: case CompilerFilter::kSpace: - case CompilerFilter::kBalanced: - case CompilerFilter::kTime: case CompilerFilter::kSpeedProfile: case CompilerFilter::kSpeed: case CompilerFilter::kEverythingProfile: @@ -77,17 +71,21 @@ bool CompilerFilter::IsAnyMethodCompilationEnabled(Filter filter) { UNREACHABLE(); } +bool CompilerFilter::IsAnyCompilationEnabled(Filter filter) { + return IsJniCompilationEnabled(filter) || + IsQuickeningCompilationEnabled(filter) || + IsAotCompilationEnabled(filter); +} + bool CompilerFilter::IsVerificationEnabled(Filter filter) { switch (filter) { - case CompilerFilter::kVerifyNone: - case CompilerFilter::kVerifyAtRuntime: return false; + case CompilerFilter::kAssumeVerified: + case CompilerFilter::kExtract: return false; - case CompilerFilter::kVerifyProfile: - case CompilerFilter::kInterpretOnly: + case CompilerFilter::kVerify: + case CompilerFilter::kQuicken: case CompilerFilter::kSpaceProfile: case CompilerFilter::kSpace: - case CompilerFilter::kBalanced: - case CompilerFilter::kTime: case CompilerFilter::kSpeedProfile: case CompilerFilter::kSpeed: case CompilerFilter::kEverythingProfile: @@ -104,19 +102,14 @@ bool CompilerFilter::DependsOnImageChecksum(Filter filter) { bool CompilerFilter::DependsOnProfile(Filter filter) { switch (filter) { - case CompilerFilter::kVerifyNone: - case CompilerFilter::kVerifyAtRuntime: - case CompilerFilter::kInterpretOnly: + case CompilerFilter::kAssumeVerified: + case CompilerFilter::kExtract: + case CompilerFilter::kVerify: + case CompilerFilter::kQuicken: case CompilerFilter::kSpace: - case CompilerFilter::kBalanced: - case CompilerFilter::kTime: case CompilerFilter::kSpeed: case CompilerFilter::kEverything: return false; - // verify-profile doesn't look at profiles anymore. - // TODO(ngeoffray): this will be cleaned up with b/34715556. - case CompilerFilter::kVerifyProfile: return false; - case CompilerFilter::kSpaceProfile: case CompilerFilter::kSpeedProfile: case CompilerFilter::kEverythingProfile: return true; @@ -126,21 +119,15 @@ bool CompilerFilter::DependsOnProfile(Filter filter) { CompilerFilter::Filter CompilerFilter::GetNonProfileDependentFilterFrom(Filter filter) { switch (filter) { - case CompilerFilter::kVerifyNone: - case CompilerFilter::kVerifyAtRuntime: - case CompilerFilter::kInterpretOnly: + case CompilerFilter::kAssumeVerified: + case CompilerFilter::kExtract: + case CompilerFilter::kVerify: + case CompilerFilter::kQuicken: case CompilerFilter::kSpace: - case CompilerFilter::kBalanced: - case CompilerFilter::kTime: case CompilerFilter::kSpeed: case CompilerFilter::kEverything: return filter; - case CompilerFilter::kVerifyProfile: - // verify-profile doesn't look at profiles anymore. - // TODO(ngeoffray): this will be cleaned up with b/34715556. - return filter; - case CompilerFilter::kSpaceProfile: return CompilerFilter::kSpace; @@ -153,6 +140,26 @@ CompilerFilter::Filter CompilerFilter::GetNonProfileDependentFilterFrom(Filter f UNREACHABLE(); } +CompilerFilter::Filter CompilerFilter::GetSafeModeFilterFrom(Filter filter) { + // For safe mode, we should not return a filter that generates AOT compiled + // code. + switch (filter) { + case CompilerFilter::kAssumeVerified: + case CompilerFilter::kExtract: + case CompilerFilter::kVerify: + case CompilerFilter::kQuicken: + return filter; + + case CompilerFilter::kSpace: + case CompilerFilter::kSpeed: + case CompilerFilter::kEverything: + case CompilerFilter::kSpaceProfile: + case CompilerFilter::kSpeedProfile: + case CompilerFilter::kEverythingProfile: + return CompilerFilter::kQuicken; + } + UNREACHABLE(); +} bool CompilerFilter::IsAsGoodAs(Filter current, Filter target) { return current >= target; @@ -160,14 +167,12 @@ bool CompilerFilter::IsAsGoodAs(Filter current, Filter target) { std::string CompilerFilter::NameOfFilter(Filter filter) { switch (filter) { - case CompilerFilter::kVerifyNone: return "verify-none"; - case CompilerFilter::kVerifyAtRuntime: return "verify-at-runtime"; - case CompilerFilter::kVerifyProfile: return "verify-profile"; - case CompilerFilter::kInterpretOnly: return "interpret-only"; + case CompilerFilter::kAssumeVerified: return "assume-verified"; + case CompilerFilter::kExtract: return "extract"; + case CompilerFilter::kVerify: return "verify"; + case CompilerFilter::kQuicken: return "quicken"; case CompilerFilter::kSpaceProfile: return "space-profile"; case CompilerFilter::kSpace: return "space"; - case CompilerFilter::kBalanced: return "balanced"; - case CompilerFilter::kTime: return "time"; case CompilerFilter::kSpeedProfile: return "speed-profile"; case CompilerFilter::kSpeed: return "speed"; case CompilerFilter::kEverythingProfile: return "everything-profile"; @@ -180,19 +185,41 @@ bool CompilerFilter::ParseCompilerFilter(const char* option, Filter* filter) { CHECK(filter != nullptr); if (strcmp(option, "verify-none") == 0) { - *filter = kVerifyNone; + LOG(WARNING) << "'verify-none' is an obsolete compiler filter name that will be " + << "removed in future releases, please use 'assume-verified' instead."; + *filter = kAssumeVerified; } else if (strcmp(option, "interpret-only") == 0) { - *filter = kInterpretOnly; + LOG(WARNING) << "'interpret-only' is an obsolete compiler filter name that will be " + << "removed in future releases, please use 'quicken' instead."; + *filter = kQuicken; } else if (strcmp(option, "verify-profile") == 0) { - *filter = kVerifyProfile; + LOG(WARNING) << "'verify-profile' is an obsolete compiler filter name that will be " + << "removed in future releases, please use 'verify' instead."; + *filter = kVerify; } else if (strcmp(option, "verify-at-runtime") == 0) { - *filter = kVerifyAtRuntime; + LOG(WARNING) << "'verify-at-runtime' is an obsolete compiler filter name that will be " + << "removed in future releases, please use 'extract' instead."; + *filter = kExtract; + } else if (strcmp(option, "balanced") == 0) { + LOG(WARNING) << "'balanced' is an obsolete compiler filter name that will be " + << "removed in future releases, please use 'speed' instead."; + *filter = kSpeed; + } else if (strcmp(option, "time") == 0) { + LOG(WARNING) << "'time' is an obsolete compiler filter name that will be " + << "removed in future releases, please use 'space' instead."; + *filter = kSpace; + } else if (strcmp(option, "assume-verified") == 0) { + *filter = kAssumeVerified; + } else if (strcmp(option, "extract") == 0) { + *filter = kExtract; + } else if (strcmp(option, "verify") == 0) { + *filter = kVerify; + } else if (strcmp(option, "quicken") == 0) { + *filter = kQuicken; } else if (strcmp(option, "space") == 0) { *filter = kSpace; } else if (strcmp(option, "space-profile") == 0) { *filter = kSpaceProfile; - } else if (strcmp(option, "balanced") == 0) { - *filter = kBalanced; } else if (strcmp(option, "speed") == 0) { *filter = kSpeed; } else if (strcmp(option, "speed-profile") == 0) { @@ -201,8 +228,6 @@ bool CompilerFilter::ParseCompilerFilter(const char* option, Filter* filter) { *filter = kEverything; } else if (strcmp(option, "everything-profile") == 0) { *filter = kEverythingProfile; - } else if (strcmp(option, "time") == 0) { - *filter = kTime; } else { return false; } diff --git a/runtime/compiler_filter.h b/runtime/compiler_filter.h index 796f4aad0c..f802439053 100644 --- a/runtime/compiler_filter.h +++ b/runtime/compiler_filter.h @@ -30,14 +30,12 @@ class CompilerFilter FINAL { // Note: Order here matters. Later filter choices are considered "as good // as" earlier filter choices. enum Filter { - kVerifyNone, // Skip verification but mark all classes as verified anyway. - kVerifyAtRuntime, // Delay verication to runtime, do not compile anything. - kVerifyProfile, // Verify only the classes in the profile, compile only JNI stubs. - kInterpretOnly, // Verify everything, compile only JNI stubs. - kTime, // Compile methods, but minimize compilation time. + kAssumeVerified, // Skip verification but mark all classes as verified anyway. + kExtract, // Delay verication to runtime, do not compile anything. + kVerify, // Only verify classes. + kQuicken, // Verify, quicken, and compile JNI stubs. kSpaceProfile, // Maximize space savings based on profile. kSpace, // Maximize space savings. - kBalanced, // Good performance return on compilation investment. kSpeedProfile, // Maximize runtime performance based on profile. kSpeed, // Maximize runtime performance. kEverythingProfile, // Compile everything capable of being compiled based on profile. @@ -48,17 +46,21 @@ class CompilerFilter FINAL { // Returns true if an oat file with this compiler filter contains // compiled executable code for bytecode. - static bool IsBytecodeCompilationEnabled(Filter filter); + static bool IsAotCompilationEnabled(Filter filter); // Returns true if an oat file with this compiler filter contains // compiled executable code for bytecode, JNI methods, or quickened dex // bytecode. - static bool IsAnyMethodCompilationEnabled(Filter filter); + static bool IsAnyCompilationEnabled(Filter filter); // Returns true if an oat file with this compiler filter contains // compiled executable code for JNI methods. static bool IsJniCompilationEnabled(Filter filter); + // Returns true if an oat file with this compiler filter contains + // quickened dex bytecode. + static bool IsQuickeningCompilationEnabled(Filter filter); + // Returns true if this compiler filter requires running verification. static bool IsVerificationEnabled(Filter filter); @@ -73,6 +75,9 @@ class CompilerFilter FINAL { // Returns a non-profile-guided version of the given filter. static Filter GetNonProfileDependentFilterFrom(Filter filter); + // Returns a filter suitable for safe mode. + static Filter GetSafeModeFilterFrom(Filter filter); + // Returns true if the 'current' compiler filter is considered at least as // good as the 'target' compilation type. // For example: kSpeed is as good as kInterpretOnly, but kInterpretOnly is diff --git a/runtime/compiler_filter_test.cc b/runtime/compiler_filter_test.cc index c603be6e52..383f4e3666 100644 --- a/runtime/compiler_filter_test.cc +++ b/runtime/compiler_filter_test.cc @@ -28,19 +28,24 @@ static void TestCompilerFilterName(CompilerFilter::Filter filter, std::string na EXPECT_EQ(name, CompilerFilter::NameOfFilter(filter)); } +static void TestSafeModeFilter(CompilerFilter::Filter expected, std::string name) { + CompilerFilter::Filter parsed; + EXPECT_TRUE(CompilerFilter::ParseCompilerFilter(name.c_str(), &parsed)); + EXPECT_EQ(expected, CompilerFilter::GetSafeModeFilterFrom(parsed)); +} + + // Verify the dexopt status values from dalvik.system.DexFile // match the OatFileAssistant::DexOptStatus values. TEST(CompilerFilterTest, ParseCompilerFilter) { CompilerFilter::Filter filter; - TestCompilerFilterName(CompilerFilter::kVerifyNone, "verify-none"); - TestCompilerFilterName(CompilerFilter::kVerifyAtRuntime, "verify-at-runtime"); - TestCompilerFilterName(CompilerFilter::kVerifyProfile, "verify-profile"); - TestCompilerFilterName(CompilerFilter::kInterpretOnly, "interpret-only"); - TestCompilerFilterName(CompilerFilter::kTime, "time"); + TestCompilerFilterName(CompilerFilter::kAssumeVerified, "assume-verified"); + TestCompilerFilterName(CompilerFilter::kExtract, "extract"); + TestCompilerFilterName(CompilerFilter::kVerify, "verify"); + TestCompilerFilterName(CompilerFilter::kQuicken, "quicken"); TestCompilerFilterName(CompilerFilter::kSpaceProfile, "space-profile"); TestCompilerFilterName(CompilerFilter::kSpace, "space"); - TestCompilerFilterName(CompilerFilter::kBalanced, "balanced"); TestCompilerFilterName(CompilerFilter::kSpeedProfile, "speed-profile"); TestCompilerFilterName(CompilerFilter::kSpeed, "speed"); TestCompilerFilterName(CompilerFilter::kEverythingProfile, "everything-profile"); @@ -49,4 +54,17 @@ TEST(CompilerFilterTest, ParseCompilerFilter) { EXPECT_FALSE(CompilerFilter::ParseCompilerFilter("super-awesome-filter", &filter)); } +TEST(CompilerFilterTest, SafeModeFilter) { + TestSafeModeFilter(CompilerFilter::kAssumeVerified, "assume-verified"); + TestSafeModeFilter(CompilerFilter::kExtract, "extract"); + TestSafeModeFilter(CompilerFilter::kVerify, "verify"); + TestSafeModeFilter(CompilerFilter::kQuicken, "quicken"); + TestSafeModeFilter(CompilerFilter::kQuicken, "space-profile"); + TestSafeModeFilter(CompilerFilter::kQuicken, "space"); + TestSafeModeFilter(CompilerFilter::kQuicken, "speed-profile"); + TestSafeModeFilter(CompilerFilter::kQuicken, "speed"); + TestSafeModeFilter(CompilerFilter::kQuicken, "everything-profile"); + TestSafeModeFilter(CompilerFilter::kQuicken, "everything"); +} + } // namespace art diff --git a/runtime/debugger.cc b/runtime/debugger.cc index 63794bff6f..d0b50fe820 100644 --- a/runtime/debugger.cc +++ b/runtime/debugger.cc @@ -1092,6 +1092,23 @@ JDWP::JdwpError Dbg::GetSignature(JDWP::RefTypeId class_id, std::string* signatu return JDWP::ERR_NONE; } +JDWP::JdwpError Dbg::GetSourceDebugExtension(JDWP::RefTypeId class_id, + std::string* extension_data) { + JDWP::JdwpError error; + mirror::Class* c = DecodeClass(class_id, &error); + if (c == nullptr) { + return error; + } + StackHandleScope<1> hs(Thread::Current()); + Handle<mirror::Class> klass(hs.NewHandle(c)); + const char* data = annotations::GetSourceDebugExtension(klass); + if (data == nullptr) { + return JDWP::ERR_ABSENT_INFORMATION; + } + *extension_data = data; + return JDWP::ERR_NONE; +} + JDWP::JdwpError Dbg::GetSourceFile(JDWP::RefTypeId class_id, std::string* result) { JDWP::JdwpError error; mirror::Class* c = DecodeClass(class_id, &error); diff --git a/runtime/debugger.h b/runtime/debugger.h index 27124e19fb..4f3ff40e86 100644 --- a/runtime/debugger.h +++ b/runtime/debugger.h @@ -288,6 +288,9 @@ class Dbg { REQUIRES_SHARED(Locks::mutator_lock_); static JDWP::JdwpError GetSignature(JDWP::RefTypeId ref_type_id, std::string* signature) REQUIRES_SHARED(Locks::mutator_lock_); + static JDWP::JdwpError GetSourceDebugExtension(JDWP::RefTypeId ref_type_id, + std::string* extension_data) + REQUIRES_SHARED(Locks::mutator_lock_); static JDWP::JdwpError GetSourceFile(JDWP::RefTypeId ref_type_id, std::string* source_file) REQUIRES_SHARED(Locks::mutator_lock_); static JDWP::JdwpError GetObjectTag(JDWP::ObjectId object_id, uint8_t* tag) diff --git a/runtime/deoptimization_kind.h b/runtime/deoptimization_kind.h new file mode 100644 index 0000000000..14e189c5d1 --- /dev/null +++ b/runtime/deoptimization_kind.h @@ -0,0 +1,53 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_RUNTIME_DEOPTIMIZATION_KIND_H_ +#define ART_RUNTIME_DEOPTIMIZATION_KIND_H_ + +namespace art { + +enum class DeoptimizationKind { + kAotInlineCache = 0, + kJitInlineCache, + kJitSameTarget, + kLoopBoundsBCE, + kLoopNullBCE, + kBlockBCE, + kCHA, + kFullFrame, + kLast = kFullFrame +}; + +inline const char* GetDeoptimizationKindName(DeoptimizationKind kind) { + switch (kind) { + case DeoptimizationKind::kAotInlineCache: return "AOT inline cache"; + case DeoptimizationKind::kJitInlineCache: return "JIT inline cache"; + case DeoptimizationKind::kJitSameTarget: return "JIT same target"; + case DeoptimizationKind::kLoopBoundsBCE: return "loop bounds check elimination"; + case DeoptimizationKind::kLoopNullBCE: return "loop bounds check elimination on null"; + case DeoptimizationKind::kBlockBCE: return "block bounds check elimination"; + case DeoptimizationKind::kCHA: return "class hierarchy analysis"; + case DeoptimizationKind::kFullFrame: return "full frame"; + } + LOG(FATAL) << "Unexpected kind " << static_cast<size_t>(kind); + UNREACHABLE(); +} + +std::ostream& operator<<(std::ostream& os, const DeoptimizationKind& kind); + +} // namespace art + +#endif // ART_RUNTIME_DEOPTIMIZATION_KIND_H_ diff --git a/runtime/dex2oat_environment_test.h b/runtime/dex2oat_environment_test.h index e58c6f541e..6765407949 100644 --- a/runtime/dex2oat_environment_test.h +++ b/runtime/dex2oat_environment_test.h @@ -42,7 +42,16 @@ class Dex2oatEnvironmentTest : public CommonRuntimeTest { CommonRuntimeTest::SetUp(); // Create a scratch directory to work from. - scratch_dir_ = android_data_ + "/Dex2oatEnvironmentTest"; + + // Get the realpath of the android data. The oat dir should always point to real location + // when generating oat files in dalvik-cache. This avoids complicating the unit tests + // when matching the expected paths. + UniqueCPtr<const char[]> android_data_real(realpath(android_data_.c_str(), nullptr)); + ASSERT_TRUE(android_data_real != nullptr) + << "Could not get the realpath of the android data" << android_data_ << strerror(errno); + + scratch_dir_.assign(android_data_real.get()); + scratch_dir_ += "/Dex2oatEnvironmentTest"; ASSERT_EQ(0, mkdir(scratch_dir_.c_str(), 0700)); // Create a subdirectory in scratch for odex files. diff --git a/runtime/dex_file.cc b/runtime/dex_file.cc index 625794e2cd..eaf144ba97 100644 --- a/runtime/dex_file.cc +++ b/runtime/dex_file.cc @@ -1044,7 +1044,7 @@ bool DexFile::DecodeDebugLocalInfo(const CodeItem* code_item, bool is_static, ui } uint32_t name_idx = DecodeUnsignedLeb128P1(&stream); - uint32_t descriptor_idx = DecodeUnsignedLeb128P1(&stream); + uint16_t descriptor_idx = DecodeUnsignedLeb128P1(&stream); uint32_t signature_idx = kDexNoIndex; if (opcode == DBG_START_LOCAL_EXTENDED) { signature_idx = DecodeUnsignedLeb128P1(&stream); diff --git a/runtime/dex_file.h b/runtime/dex_file.h index 1b18d21cb1..36c734197a 100644 --- a/runtime/dex_file.h +++ b/runtime/dex_file.h @@ -92,8 +92,8 @@ class DexFile { uint32_t method_ids_off_; // file offset of MethodIds array uint32_t class_defs_size_; // number of ClassDefs uint32_t class_defs_off_; // file offset of ClassDef array - uint32_t data_size_; // unused - uint32_t data_off_; // unused + uint32_t data_size_; // size of data section + uint32_t data_off_; // file offset of data section // Decode the dex magic version uint32_t GetVersion() const; diff --git a/runtime/dex_file_annotations.cc b/runtime/dex_file_annotations.cc index 6b9654dc49..13979160bd 100644 --- a/runtime/dex_file_annotations.cc +++ b/runtime/dex_file_annotations.cc @@ -1135,7 +1135,7 @@ mirror::Object* GetAnnotationForMethodParameter(ArtMethod* method, bool GetParametersMetadataForMethod(ArtMethod* method, MutableHandle<mirror::ObjectArray<mirror::String>>* names, MutableHandle<mirror::IntArray>* access_flags) { - const DexFile::AnnotationSetItem::AnnotationSetItem* annotation_set = + const DexFile::AnnotationSetItem* annotation_set = FindAnnotationSetForMethod(method); if (annotation_set == nullptr) { return false; @@ -1420,6 +1420,40 @@ mirror::ObjectArray<mirror::String>* GetSignatureAnnotationForClass(Handle<mirro return GetSignatureValue(data, annotation_set); } +const char* GetSourceDebugExtension(Handle<mirror::Class> klass) { + ClassData data(klass); + const DexFile::AnnotationSetItem* annotation_set = FindAnnotationSetForClass(data); + if (annotation_set == nullptr) { + return nullptr; + } + const DexFile::AnnotationItem* annotation_item = SearchAnnotationSet( + data.GetDexFile(), + annotation_set, + "Ldalvik/annotation/SourceDebugExtension;", + DexFile::kDexVisibilitySystem); + if (annotation_item == nullptr) { + return nullptr; + } + const uint8_t* annotation = + SearchEncodedAnnotation(data.GetDexFile(), annotation_item->annotation_, "value"); + if (annotation == nullptr) { + return nullptr; + } + DexFile::AnnotationValue annotation_value; + if (!ProcessAnnotationValue<false>(data, + &annotation, + &annotation_value, + ScopedNullHandle<mirror::Class>(), + DexFile::kAllRaw)) { + return nullptr; + } + if (annotation_value.type_ != DexFile::kDexAnnotationString) { + return nullptr; + } + dex::StringIndex index(static_cast<uint32_t>(annotation_value.value_.GetI())); + return data.GetDexFile().StringDataByIdx(index); +} + bool IsClassAnnotationPresent(Handle<mirror::Class> klass, Handle<mirror::Class> annotation_class) { ClassData data(klass); const DexFile::AnnotationSetItem* annotation_set = FindAnnotationSetForClass(data); diff --git a/runtime/dex_file_annotations.h b/runtime/dex_file_annotations.h index c66c5bdb8b..651c9844eb 100644 --- a/runtime/dex_file_annotations.h +++ b/runtime/dex_file_annotations.h @@ -89,6 +89,8 @@ bool GetInnerClassFlags(Handle<mirror::Class> klass, uint32_t* flags) REQUIRES_SHARED(Locks::mutator_lock_); mirror::ObjectArray<mirror::String>* GetSignatureAnnotationForClass(Handle<mirror::Class> klass) REQUIRES_SHARED(Locks::mutator_lock_); +const char* GetSourceDebugExtension(Handle<mirror::Class> klass) + REQUIRES_SHARED(Locks::mutator_lock_); bool IsClassAnnotationPresent(Handle<mirror::Class> klass, Handle<mirror::Class> annotation_class) REQUIRES_SHARED(Locks::mutator_lock_); diff --git a/runtime/dex_file_test.cc b/runtime/dex_file_test.cc index f811287a9c..6627550574 100644 --- a/runtime/dex_file_test.cc +++ b/runtime/dex_file_test.cc @@ -171,6 +171,17 @@ static const char kRawDexBadMapOffset[] = "AAACAAAAnAAAAAYAAAABAAAArAAAAAEgAAABAAAAzAAAAAIgAAAFAAAA5AAAAAMgAAABAAAAEAEA" "AAAgAAABAAAAFQEAAAAQAAABAAAAIAEAAA=="; +static const char kRawDexDebugInfoLocalNullType[] = + "ZGV4CjAzNQA+Kwj2g6OZMH88OvK9Ey6ycdIsFCt18ED8AQAAcAAAAHhWNBIAAAAAAAAAAHQBAAAI" + "AAAAcAAAAAQAAACQAAAAAgAAAKAAAAAAAAAAAAAAAAMAAAC4AAAAAQAAANAAAAAMAQAA8AAAABwB" + "AAAkAQAALAEAAC8BAAA0AQAASAEAAEsBAABOAQAAAgAAAAMAAAAEAAAABQAAAAIAAAAAAAAAAAAA" + "AAUAAAADAAAAAAAAAAEAAQAAAAAAAQAAAAYAAAACAAEAAAAAAAEAAAABAAAAAgAAAAAAAAABAAAA" + "AAAAAGMBAAAAAAAAAQABAAEAAABUAQAABAAAAHAQAgAAAA4AAgABAAAAAABZAQAAAgAAABIQDwAG" + "PGluaXQ+AAZBLmphdmEAAUkAA0xBOwASTGphdmEvbGFuZy9PYmplY3Q7AAFWAAFhAAR0aGlzAAEA" + "Bw4AAwAHDh4DAAcAAAAAAQEAgYAE8AEBAIgCAAAACwAAAAAAAAABAAAAAAAAAAEAAAAIAAAAcAAA" + "AAIAAAAEAAAAkAAAAAMAAAACAAAAoAAAAAUAAAADAAAAuAAAAAYAAAABAAAA0AAAAAEgAAACAAAA" + "8AAAAAIgAAAIAAAAHAEAAAMgAAACAAAAVAEAAAAgAAABAAAAYwEAAAAQAAABAAAAdAEAAA=="; + static void DecodeAndWriteDexFile(const char* base64, const char* location) { // decode base64 CHECK(base64 != nullptr); @@ -598,4 +609,17 @@ TEST_F(DexFileTest, GetStringWithNoIndex) { EXPECT_EQ(raw->StringByTypeIdx(idx), nullptr); } +static void Callback(void* context ATTRIBUTE_UNUSED, + const DexFile::LocalInfo& entry ATTRIBUTE_UNUSED) { +} + +TEST_F(DexFileTest, OpenDexDebugInfoLocalNullType) { + ScratchFile tmp; + std::unique_ptr<const DexFile> raw = OpenDexFileInMemoryBase64( + kRawDexDebugInfoLocalNullType, tmp.GetFilename().c_str(), 0xf25f2b38U, true); + const DexFile::ClassDef& class_def = raw->GetClassDef(0); + const DexFile::CodeItem* code_item = raw->GetCodeItem(raw->FindCodeItemOffset(class_def, 1)); + ASSERT_TRUE(raw->DecodeDebugLocalInfo(code_item, true, 1, Callback, nullptr)); +} + } // namespace art diff --git a/runtime/dex_file_verifier.cc b/runtime/dex_file_verifier.cc index 11b3cd025a..c18ab47739 100644 --- a/runtime/dex_file_verifier.cc +++ b/runtime/dex_file_verifier.cc @@ -922,12 +922,12 @@ bool DexFileVerifier::CheckEncodedAnnotation() { return true; } -bool DexFileVerifier::FindClassFlags(uint32_t index, - bool is_field, - dex::TypeIndex* class_type_index, - uint32_t* class_access_flags) { +bool DexFileVerifier::FindClassIndexAndDef(uint32_t index, + bool is_field, + dex::TypeIndex* class_type_index, + const DexFile::ClassDef** output_class_def) { DCHECK(class_type_index != nullptr); - DCHECK(class_access_flags != nullptr); + DCHECK(output_class_def != nullptr); // First check if the index is valid. if (index >= (is_field ? header_->field_ids_size_ : header_->method_ids_size_)) { @@ -957,7 +957,7 @@ bool DexFileVerifier::FindClassFlags(uint32_t index, for (size_t i = 0; i < header_->class_defs_size_; ++i) { const DexFile::ClassDef* class_def = class_def_begin + i; if (class_def->class_idx_ == *class_type_index) { - *class_access_flags = class_def->access_flags_; + *output_class_def = class_def; return true; } } @@ -966,13 +966,13 @@ bool DexFileVerifier::FindClassFlags(uint32_t index, return false; } -bool DexFileVerifier::CheckOrderAndGetClassFlags(bool is_field, - const char* type_descr, - uint32_t curr_index, - uint32_t prev_index, - bool* have_class, - dex::TypeIndex* class_type_index, - uint32_t* class_access_flags) { +bool DexFileVerifier::CheckOrderAndGetClassDef(bool is_field, + const char* type_descr, + uint32_t curr_index, + uint32_t prev_index, + bool* have_class, + dex::TypeIndex* class_type_index, + const DexFile::ClassDef** class_def) { if (curr_index < prev_index) { ErrorStringPrintf("out-of-order %s indexes %" PRIu32 " and %" PRIu32, type_descr, @@ -982,7 +982,7 @@ bool DexFileVerifier::CheckOrderAndGetClassFlags(bool is_field, } if (!*have_class) { - *have_class = FindClassFlags(curr_index, is_field, class_type_index, class_access_flags); + *have_class = FindClassIndexAndDef(curr_index, is_field, class_type_index, class_def); if (!*have_class) { // Should have really found one. ErrorStringPrintf("could not find declaring class for %s index %" PRIu32, @@ -994,34 +994,130 @@ bool DexFileVerifier::CheckOrderAndGetClassFlags(bool is_field, return true; } +bool DexFileVerifier::CheckStaticFieldTypes(const DexFile::ClassDef* class_def) { + if (class_def == nullptr) { + return true; + } + + ClassDataItemIterator field_it(*dex_file_, ptr_); + EncodedStaticFieldValueIterator array_it(*dex_file_, *class_def); + + for (; field_it.HasNextStaticField() && array_it.HasNext(); field_it.Next(), array_it.Next()) { + uint32_t index = field_it.GetMemberIndex(); + const DexFile::TypeId& type_id = dex_file_->GetTypeId(dex_file_->GetFieldId(index).type_idx_); + const char* field_type_name = + dex_file_->GetStringData(dex_file_->GetStringId(type_id.descriptor_idx_)); + Primitive::Type field_type = Primitive::GetType(field_type_name[0]); + EncodedArrayValueIterator::ValueType array_type = array_it.GetValueType(); + // Ensure this matches RuntimeEncodedStaticFieldValueIterator. + switch (array_type) { + case EncodedArrayValueIterator::ValueType::kBoolean: + if (field_type != Primitive::kPrimBoolean) { + ErrorStringPrintf("unexpected static field initial value type: 'Z' vs '%c'", + field_type_name[0]); + return false; + } + break; + case EncodedArrayValueIterator::ValueType::kByte: + if (field_type != Primitive::kPrimByte) { + ErrorStringPrintf("unexpected static field initial value type: 'B' vs '%c'", + field_type_name[0]); + return false; + } + break; + case EncodedArrayValueIterator::ValueType::kShort: + if (field_type != Primitive::kPrimShort) { + ErrorStringPrintf("unexpected static field initial value type: 'S' vs '%c'", + field_type_name[0]); + return false; + } + break; + case EncodedArrayValueIterator::ValueType::kChar: + if (field_type != Primitive::kPrimChar) { + ErrorStringPrintf("unexpected static field initial value type: 'C' vs '%c'", + field_type_name[0]); + return false; + } + break; + case EncodedArrayValueIterator::ValueType::kInt: + if (field_type != Primitive::kPrimInt) { + ErrorStringPrintf("unexpected static field initial value type: 'I' vs '%c'", + field_type_name[0]); + return false; + } + break; + case EncodedArrayValueIterator::ValueType::kLong: + if (field_type != Primitive::kPrimLong) { + ErrorStringPrintf("unexpected static field initial value type: 'J' vs '%c'", + field_type_name[0]); + return false; + } + break; + case EncodedArrayValueIterator::ValueType::kFloat: + if (field_type != Primitive::kPrimFloat) { + ErrorStringPrintf("unexpected static field initial value type: 'F' vs '%c'", + field_type_name[0]); + return false; + } + break; + case EncodedArrayValueIterator::ValueType::kDouble: + if (field_type != Primitive::kPrimDouble) { + ErrorStringPrintf("unexpected static field initial value type: 'D' vs '%c'", + field_type_name[0]); + return false; + } + break; + case EncodedArrayValueIterator::ValueType::kNull: + case EncodedArrayValueIterator::ValueType::kString: + case EncodedArrayValueIterator::ValueType::kType: + if (field_type != Primitive::kPrimNot) { + ErrorStringPrintf("unexpected static field initial value type: 'L' vs '%c'", + field_type_name[0]); + return false; + } + break; + default: + ErrorStringPrintf("unexpected static field initial value type: %x", array_type); + return false; + } + } + + if (array_it.HasNext()) { + ErrorStringPrintf("too many static field initial values"); + return false; + } + return true; +} + template <bool kStatic> bool DexFileVerifier::CheckIntraClassDataItemFields(ClassDataItemIterator* it, bool* have_class, dex::TypeIndex* class_type_index, - uint32_t* class_access_flags) { + const DexFile::ClassDef** class_def) { DCHECK(it != nullptr); // These calls use the raw access flags to check whether the whole dex field is valid. uint32_t prev_index = 0; for (; kStatic ? it->HasNextStaticField() : it->HasNextInstanceField(); it->Next()) { uint32_t curr_index = it->GetMemberIndex(); - if (!CheckOrderAndGetClassFlags(true, - kStatic ? "static field" : "instance field", - curr_index, - prev_index, - have_class, - class_type_index, - class_access_flags)) { + if (!CheckOrderAndGetClassDef(true, + kStatic ? "static field" : "instance field", + curr_index, + prev_index, + have_class, + class_type_index, + class_def)) { return false; } - prev_index = curr_index; - + DCHECK(class_def != nullptr); if (!CheckClassDataItemField(curr_index, it->GetRawMemberAccessFlags(), - *class_access_flags, + (*class_def)->access_flags_, *class_type_index, kStatic)) { return false; } + + prev_index = curr_index; } return true; @@ -1033,30 +1129,31 @@ bool DexFileVerifier::CheckIntraClassDataItemMethods( std::unordered_set<uint32_t>* direct_method_indexes, bool* have_class, dex::TypeIndex* class_type_index, - uint32_t* class_access_flags) { + const DexFile::ClassDef** class_def) { uint32_t prev_index = 0; for (; kDirect ? it->HasNextDirectMethod() : it->HasNextVirtualMethod(); it->Next()) { uint32_t curr_index = it->GetMemberIndex(); - if (!CheckOrderAndGetClassFlags(false, - kDirect ? "direct method" : "virtual method", - curr_index, - prev_index, - have_class, - class_type_index, - class_access_flags)) { + if (!CheckOrderAndGetClassDef(false, + kDirect ? "direct method" : "virtual method", + curr_index, + prev_index, + have_class, + class_type_index, + class_def)) { return false; } - prev_index = curr_index; - + DCHECK(class_def != nullptr); if (!CheckClassDataItemMethod(curr_index, it->GetRawMemberAccessFlags(), - *class_access_flags, + (*class_def)->access_flags_, *class_type_index, it->GetMethodCodeItemOffset(), direct_method_indexes, kDirect)) { return false; } + + prev_index = curr_index; } return true; @@ -1071,19 +1168,19 @@ bool DexFileVerifier::CheckIntraClassDataItem() { // as the lookup is expensive, cache the result. bool have_class = false; dex::TypeIndex class_type_index; - uint32_t class_access_flags; + const DexFile::ClassDef* class_def = nullptr; // Check fields. if (!CheckIntraClassDataItemFields<true>(&it, &have_class, &class_type_index, - &class_access_flags)) { + &class_def)) { return false; } if (!CheckIntraClassDataItemFields<false>(&it, &have_class, &class_type_index, - &class_access_flags)) { + &class_def)) { return false; } @@ -1092,18 +1189,25 @@ bool DexFileVerifier::CheckIntraClassDataItem() { &direct_method_indexes, &have_class, &class_type_index, - &class_access_flags)) { + &class_def)) { return false; } if (!CheckIntraClassDataItemMethods<false>(&it, &direct_method_indexes, &have_class, &class_type_index, - &class_access_flags)) { + &class_def)) { return false; } - ptr_ = it.EndDataPointer(); + const uint8_t* end_ptr = it.EndDataPointer(); + + // Check static field types against initial static values in encoded array. + if (!CheckStaticFieldTypes(class_def)) { + return false; + } + + ptr_ = end_ptr; return true; } diff --git a/runtime/dex_file_verifier.h b/runtime/dex_file_verifier.h index 71b316c403..d1043c6841 100644 --- a/runtime/dex_file_verifier.h +++ b/runtime/dex_file_verifier.h @@ -86,13 +86,14 @@ class DexFileVerifier { uint32_t code_offset, std::unordered_set<uint32_t>* direct_method_indexes, bool expect_direct); - bool CheckOrderAndGetClassFlags(bool is_field, - const char* type_descr, - uint32_t curr_index, - uint32_t prev_index, - bool* have_class, - dex::TypeIndex* class_type_index, - uint32_t* class_access_flags); + bool CheckOrderAndGetClassDef(bool is_field, + const char* type_descr, + uint32_t curr_index, + uint32_t prev_index, + bool* have_class, + dex::TypeIndex* class_type_index, + const DexFile::ClassDef** class_def); + bool CheckStaticFieldTypes(const DexFile::ClassDef* class_def); bool CheckPadding(size_t offset, uint32_t aligned_offset); bool CheckEncodedValue(); @@ -106,7 +107,7 @@ class DexFileVerifier { bool CheckIntraClassDataItemFields(ClassDataItemIterator* it, bool* have_class, dex::TypeIndex* class_type_index, - uint32_t* class_access_flags); + const DexFile::ClassDef** class_def); // Check all methods of the given type from the given iterator. Load the class data from the first // method, if necessary (and return it), or use the given values. template <bool kDirect> @@ -114,7 +115,7 @@ class DexFileVerifier { std::unordered_set<uint32_t>* direct_method_indexes, bool* have_class, dex::TypeIndex* class_type_index, - uint32_t* class_access_flags); + const DexFile::ClassDef** class_def); bool CheckIntraCodeItem(); bool CheckIntraStringDataItem(); @@ -165,16 +166,15 @@ class DexFileVerifier { __attribute__((__format__(__printf__, 2, 3))) COLD_ATTR; bool FailureReasonIsSet() const { return failure_reason_.size() != 0; } - // Retrieve class index and class access flag from the given member. index is the member index, - // which is taken as either a field or a method index (as designated by is_field). The result, - // if the member and declaring class could be found, is stored in class_type_index and - // class_access_flags. - // This is an expensive lookup, as we have to find the class-def by type index, which is a + // Retrieve class index and class def from the given member. index is the member index, which is + // taken as either a field or a method index (as designated by is_field). The result, if the + // member and declaring class could be found, is stored in class_type_index and class_def. + // This is an expensive lookup, as we have to find the class def by type index, which is a // linear search. The output values should thus be cached by the caller. - bool FindClassFlags(uint32_t index, - bool is_field, - dex::TypeIndex* class_type_index, - uint32_t* class_access_flags); + bool FindClassIndexAndDef(uint32_t index, + bool is_field, + dex::TypeIndex* class_type_index, + const DexFile::ClassDef** output_class_def); // Check validity of the given access flags, interpreted for a field in the context of a class // with the given second access flags. diff --git a/runtime/dex_file_verifier_test.cc b/runtime/dex_file_verifier_test.cc index 7736f3d615..068e1223e5 100644 --- a/runtime/dex_file_verifier_test.cc +++ b/runtime/dex_file_verifier_test.cc @@ -123,7 +123,7 @@ static std::unique_ptr<const DexFile> OpenDexFileBase64(const char* base64, // To generate a base64 encoded Dex file (such as kGoodTestDex, below) // from Smali files, use: // -// smali -o classes.dex class1.smali [class2.smali ...] +// smali assemble -o classes.dex class1.smali [class2.smali ...] // base64 classes.dex >classes.dex.base64 // For reference. @@ -1461,7 +1461,7 @@ TEST_F(DexFileVerifierTest, ProtoOrdering) { // To generate a base64 encoded Dex file version 037 from Smali files, use: // -// smali --api-level 24 -o classes.dex class1.smali [class2.smali ...] +// smali assemble --api 24 -o classes.dex class1.smali [class2.smali ...] // base64 classes.dex >classes.dex.base64 // Dex file version 037 generated from: @@ -2090,4 +2090,105 @@ TEST_F(DexFileVerifierTest, InvokeCustomDexSamples) { } } +TEST_F(DexFileVerifierTest, BadStaticFieldInitialValuesArray) { + // Generated DEX file version (037) from: + // + // .class public LBadStaticFieldInitialValuesArray; + // .super Ljava/lang/Object; + // + // # static fields + // .field static final c:C = 'c' + // .field static final i:I = 0x1 + // .field static final s:Ljava/lang/String; = "s" + // + // # direct methods + // .method public constructor <init>()V + // .registers 1 + // invoke-direct {p0}, Ljava/lang/Object;-><init>()V + // return-void + // .end method + // + // Output file was hex edited so that static field "i" has string typing in initial values array. + static const char kDexBase64[] = + "ZGV4CjAzNQBrMi4cCPcMvvXNRw0uI6RRubwMPwgEYXIsAgAAcAAAAHhWNBIAAAAAAAAAAIwBAAAL" + "AAAAcAAAAAYAAACcAAAAAQAAALQAAAADAAAAwAAAAAIAAADYAAAAAQAAAOgAAAAkAQAACAEAACAB" + "AAAoAQAAMAEAADMBAAA2AQAAOwEAAE8BAABjAQAAZgEAAGkBAABsAQAAAgAAAAMAAAAEAAAABQAA" + "AAYAAAAHAAAABwAAAAUAAAAAAAAAAgAAAAgAAAACAAEACQAAAAIABAAKAAAAAgAAAAAAAAADAAAA" + "AAAAAAIAAAABAAAAAwAAAAAAAAABAAAAAAAAAHsBAAB0AQAAAQABAAEAAABvAQAABAAAAHAQAQAA" + "AA4ABjxpbml0PgAGQS5qYXZhAAFDAAFJAANMQTsAEkxqYXZhL2xhbmcvT2JqZWN0OwASTGphdmEv" + "bGFuZy9TdHJpbmc7AAFWAAFjAAFpAAFzAAEABw4AAwNjFwoXCgMAAQAAGAEYARgAgYAEiAIADQAA" + "AAAAAAABAAAAAAAAAAEAAAALAAAAcAAAAAIAAAAGAAAAnAAAAAMAAAABAAAAtAAAAAQAAAADAAAA" + "wAAAAAUAAAACAAAA2AAAAAYAAAABAAAA6AAAAAEgAAABAAAACAEAAAIgAAALAAAAIAEAAAMgAAAB" + "AAAAbwEAAAUgAAABAAAAdAEAAAAgAAABAAAAewEAAAAQAAABAAAAjAEAAA=="; + + size_t length; + std::unique_ptr<uint8_t[]> dex_bytes(DecodeBase64(kDexBase64, &length)); + CHECK(dex_bytes != nullptr); + // Note: `dex_file` will be destroyed before `dex_bytes`. + std::unique_ptr<DexFile> dex_file(GetDexFile(dex_bytes.get(), length)); + std::string error_msg; + EXPECT_FALSE(DexFileVerifier::Verify(dex_file.get(), + dex_file->Begin(), + dex_file->Size(), + "bad static field initial values array", + /*verify_checksum*/ true, + &error_msg)); +} + +TEST_F(DexFileVerifierTest, GoodStaticFieldInitialValuesArray) { + // Generated DEX file version (037) from: + // + // .class public LGoodStaticFieldInitialValuesArray; + // .super Ljava/lang/Object; + // + // # static fields + // .field static final b:B = 0x1t + // .field static final c:C = 'c' + // .field static final d:D = 0.6 + // .field static final f:F = 0.5f + // .field static final i:I = 0x3 + // .field static final j:J = 0x4L + // .field static final l1:Ljava/lang/String; + // .field static final l2:Ljava/lang/String; = "s" + // .field static final l3:Ljava/lang/Class; = Ljava/lang/String; + // .field static final s:S = 0x2s + // .field static final z:Z = true + // + // # direct methods + // .method public constructor <init>()V + // .registers 1 + // invoke-direct {p0}, Ljava/lang/Object;-><init>()V + // return-void + // .end method + static const char kDexBase64[] = + "ZGV4CjAzNQAwWxLbdhFa1NGiFWjsy5fhUCHxe5QHtPY8AwAAcAAAAHhWNBIAAAAAAAAAAJwCAAAZ" + "AAAAcAAAAA0AAADUAAAAAQAAAAgBAAALAAAAFAEAAAIAAABsAQAAAQAAAHwBAACgAQAAnAEAAJwB" + "AACkAQAApwEAAKoBAACtAQAAsAEAALMBAAC2AQAA2wEAAO4BAAACAgAAFgIAABkCAAAcAgAAHwIA" + "ACICAAAlAgAAKAIAACsCAAAuAgAAMQIAADUCAAA5AgAAPQIAAEACAAABAAAAAgAAAAMAAAAEAAAA" + "BQAAAAYAAAAHAAAACAAAAAkAAAAKAAAACwAAAAwAAAANAAAADAAAAAsAAAAAAAAABgAAAA4AAAAG" + "AAEADwAAAAYAAgAQAAAABgADABEAAAAGAAQAEgAAAAYABQATAAAABgAJABQAAAAGAAkAFQAAAAYA" + "BwAWAAAABgAKABcAAAAGAAwAGAAAAAYAAAAAAAAACAAAAAAAAAAGAAAAAQAAAAgAAAAAAAAA////" + "/wAAAAB8AgAARAIAAAY8aW5pdD4AAUIAAUMAAUQAAUYAAUkAAUoAI0xHb29kU3RhdGljRmllbGRJ" + "bml0aWFsVmFsdWVzQXJyYXk7ABFMamF2YS9sYW5nL0NsYXNzOwASTGphdmEvbGFuZy9PYmplY3Q7" + "ABJMamF2YS9sYW5nL1N0cmluZzsAAVMAAVYAAVoAAWIAAWMAAWQAAWYAAWkAAWoAAmwxAAJsMgAC" + "bDMAAXMAAXoAAAsAAQNj8TMzMzMzM+M/ED8EAwYEHhcXGAkCAj8AAAAAAQABAAEAAAAAAAAABAAA" + "AHAQAQAAAA4ACwABAAAYARgBGAEYARgBGAEYARgBGAEYARgAgYAE5AQNAAAAAAAAAAEAAAAAAAAA" + "AQAAABkAAABwAAAAAgAAAA0AAADUAAAAAwAAAAEAAAAIAQAABAAAAAsAAAAUAQAABQAAAAIAAABs" + "AQAABgAAAAEAAAB8AQAAAiAAABkAAACcAQAABSAAAAEAAABEAgAAAxAAAAEAAABgAgAAASAAAAEA" + "AABkAgAAACAAAAEAAAB8AgAAABAAAAEAAACcAgAA"; + + size_t length; + std::unique_ptr<uint8_t[]> dex_bytes(DecodeBase64(kDexBase64, &length)); + CHECK(dex_bytes != nullptr); + // Note: `dex_file` will be destroyed before `dex_bytes`. + std::unique_ptr<DexFile> dex_file(GetDexFile(dex_bytes.get(), length)); + std::string error_msg; + EXPECT_TRUE(DexFileVerifier::Verify(dex_file.get(), + dex_file->Begin(), + dex_file->Size(), + "good static field initial values array", + /*verify_checksum*/ true, + &error_msg)); +} + } // namespace art diff --git a/runtime/dex_instruction.cc b/runtime/dex_instruction.cc index 091085a85c..9f34c12d9a 100644 --- a/runtime/dex_instruction.cc +++ b/runtime/dex_instruction.cc @@ -515,6 +515,30 @@ std::string Instruction::DumpString(const DexFile* file) const { return os.str(); } +// Add some checks that ensure the flags make sense. We need a subclass to be in the context of +// Instruction. Otherwise the flags from the instruction list don't work. +struct InstructionStaticAsserts : private Instruction { + #define IMPLIES(a, b) (!(a) || (b)) + + #define VAR_ARGS_CHECK(o, c, pname, f, i, a, v) \ + static_assert(IMPLIES((f) == k35c || (f) == k45cc, \ + ((v) & (kVerifyVarArg | kVerifyVarArgNonZero)) != 0), \ + "Missing var-arg verification"); + #include "dex_instruction_list.h" + DEX_INSTRUCTION_LIST(VAR_ARGS_CHECK) + #undef DEX_INSTRUCTION_LIST + #undef VAR_ARGS_CHECK + + #define VAR_ARGS_RANGE_CHECK(o, c, pname, f, i, a, v) \ + static_assert(IMPLIES((f) == k3rc || (f) == k4rcc, \ + ((v) & (kVerifyVarArgRange | kVerifyVarArgRangeNonZero)) != 0), \ + "Missing var-arg verification"); + #include "dex_instruction_list.h" + DEX_INSTRUCTION_LIST(VAR_ARGS_RANGE_CHECK) + #undef DEX_INSTRUCTION_LIST + #undef VAR_ARGS_RANGE_CHECK +}; + std::ostream& operator<<(std::ostream& os, const Instruction::Code& code) { return os << Instruction::Name(code); } diff --git a/runtime/dex_instruction_list.h b/runtime/dex_instruction_list.h index a5ce3c2f8a..11dc7e2c9f 100644 --- a/runtime/dex_instruction_list.h +++ b/runtime/dex_instruction_list.h @@ -271,8 +271,8 @@ V(0xF9, UNUSED_F9, "unused-f9", k10x, kIndexUnknown, 0, kVerifyError) \ V(0xFA, INVOKE_POLYMORPHIC, "invoke-polymorphic", k45cc, kIndexMethodAndProtoRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgNonZero | kVerifyRegHPrototype) \ V(0xFB, INVOKE_POLYMORPHIC_RANGE, "invoke-polymorphic/range", k4rcc, kIndexMethodAndProtoRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgRangeNonZero | kVerifyRegHPrototype) \ - V(0xFC, INVOKE_CUSTOM, "invoke-custom", k35c, kIndexCallSiteRef, kContinue | kThrow, kVerifyRegBCallSite) \ - V(0xFD, INVOKE_CUSTOM_RANGE, "invoke-custom/range", k3rc, kIndexCallSiteRef, kContinue | kThrow, kVerifyRegBCallSite) \ + V(0xFC, INVOKE_CUSTOM, "invoke-custom", k35c, kIndexCallSiteRef, kContinue | kThrow, kVerifyRegBCallSite | kVerifyVarArg) \ + V(0xFD, INVOKE_CUSTOM_RANGE, "invoke-custom/range", k3rc, kIndexCallSiteRef, kContinue | kThrow, kVerifyRegBCallSite | kVerifyVarArgRange) \ V(0xFE, UNUSED_FE, "unused-fe", k10x, kIndexUnknown, 0, kVerifyError) \ V(0xFF, UNUSED_FF, "unused-ff", k10x, kIndexUnknown, 0, kVerifyError) diff --git a/compiler/dex/dex_to_dex_decompiler.cc b/runtime/dex_to_dex_decompiler.cc index 85d5784c7a..c15c9ec448 100644 --- a/compiler/dex/dex_to_dex_decompiler.cc +++ b/runtime/dex_to_dex_decompiler.cc @@ -32,6 +32,7 @@ class DexDecompiler { bool decompile_return_instruction) : code_item_(code_item), quickened_info_ptr_(quickened_info.data()), + quickened_info_start_(quickened_info.data()), quickened_info_end_(quickened_info.data() + quickened_info.size()), decompile_return_instruction_(decompile_return_instruction) {} @@ -89,6 +90,7 @@ class DexDecompiler { const DexFile::CodeItem& code_item_; const uint8_t* quickened_info_ptr_; + const uint8_t* const quickened_info_start_; const uint8_t* const quickened_info_end_; const bool decompile_return_instruction_; @@ -185,10 +187,15 @@ bool DexDecompiler::Decompile() { } if (quickened_info_ptr_ != quickened_info_end_) { - LOG(FATAL) << "Failed to use all values in quickening info." - << " Actual: " << std::hex << quickened_info_ptr_ - << " Expected: " << quickened_info_end_; - return false; + if (quickened_info_start_ == quickened_info_ptr_) { + LOG(WARNING) << "Failed to use any value in quickening info," + << " potentially due to duplicate methods."; + } else { + LOG(FATAL) << "Failed to use all values in quickening info." + << " Actual: " << std::hex << reinterpret_cast<uintptr_t>(quickened_info_ptr_) + << " Expected: " << reinterpret_cast<uintptr_t>(quickened_info_end_); + return false; + } } return true; diff --git a/compiler/dex/dex_to_dex_decompiler.h b/runtime/dex_to_dex_decompiler.h index b5d5b91915..d7cb1641e1 100644 --- a/compiler/dex/dex_to_dex_decompiler.h +++ b/runtime/dex_to_dex_decompiler.h @@ -14,8 +14,8 @@ * limitations under the License. */ -#ifndef ART_COMPILER_DEX_DEX_TO_DEX_DECOMPILER_H_ -#define ART_COMPILER_DEX_DEX_TO_DEX_DECOMPILER_H_ +#ifndef ART_RUNTIME_DEX_TO_DEX_DECOMPILER_H_ +#define ART_RUNTIME_DEX_TO_DEX_DECOMPILER_H_ #include "base/array_ref.h" #include "dex_file.h" @@ -36,4 +36,4 @@ bool ArtDecompileDEX(const DexFile::CodeItem& code_item, } // namespace optimizer } // namespace art -#endif // ART_COMPILER_DEX_DEX_TO_DEX_DECOMPILER_H_ +#endif // ART_RUNTIME_DEX_TO_DEX_DECOMPILER_H_ diff --git a/runtime/dexopt_test.cc b/runtime/dexopt_test.cc index db65e40da5..3c8243a6c5 100644 --- a/runtime/dexopt_test.cc +++ b/runtime/dexopt_test.cc @@ -45,18 +45,23 @@ void DexoptTest::PostRuntimeCreate() { } void DexoptTest::GenerateOatForTest(const std::string& dex_location, - const std::string& oat_location, - CompilerFilter::Filter filter, - bool relocate, - bool pic, - bool with_alternate_image) { + const std::string& oat_location_in, + CompilerFilter::Filter filter, + bool relocate, + bool pic, + bool with_alternate_image) { std::string dalvik_cache = GetDalvikCache(GetInstructionSetString(kRuntimeISA)); std::string dalvik_cache_tmp = dalvik_cache + ".redirected"; - + std::string oat_location = oat_location_in; if (!relocate) { // Temporarily redirect the dalvik cache so dex2oat doesn't find the // relocated image file. ASSERT_EQ(0, rename(dalvik_cache.c_str(), dalvik_cache_tmp.c_str())) << strerror(errno); + // If the oat location is in dalvik cache, replace the cache path with the temporary one. + size_t pos = oat_location.find(dalvik_cache); + if (pos != std::string::npos) { + oat_location = oat_location.replace(pos, dalvik_cache.length(), dalvik_cache_tmp); + } } std::vector<std::string> args; @@ -90,6 +95,7 @@ void DexoptTest::GenerateOatForTest(const std::string& dex_location, if (!relocate) { // Restore the dalvik cache if needed. ASSERT_EQ(0, rename(dalvik_cache_tmp.c_str(), dalvik_cache.c_str())) << strerror(errno); + oat_location = oat_location_in; } // Verify the odex file was generated as expected. @@ -122,7 +128,7 @@ void DexoptTest::GenerateOatForTest(const std::string& dex_location, } if (!with_alternate_image) { - if (CompilerFilter::IsBytecodeCompilationEnabled(filter)) { + if (CompilerFilter::IsAotCompilationEnabled(filter)) { if (relocate) { EXPECT_EQ(reinterpret_cast<uintptr_t>(image_header->GetOatDataBegin()), oat_header.GetImageFileLocationOatDataBegin()); diff --git a/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc b/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc index 3820d854f9..5762e4f00a 100644 --- a/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc +++ b/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc @@ -16,6 +16,7 @@ #include "base/logging.h" #include "base/mutex.h" +#include "base/systrace.h" #include "callee_save_frame.h" #include "interpreter/interpreter.h" #include "obj_ptr-inl.h" // TODO: Find the other include that isn't complete, and clean this up. @@ -24,8 +25,9 @@ namespace art { -NO_RETURN static void artDeoptimizeImpl(Thread* self, bool single_frame) +NO_RETURN static void artDeoptimizeImpl(Thread* self, DeoptimizationKind kind, bool single_frame) REQUIRES_SHARED(Locks::mutator_lock_) { + Runtime::Current()->IncrementDeoptimizationCount(kind); if (VLOG_IS_ON(deopt)) { if (single_frame) { // Deopt logging will be in DeoptimizeSingleFrame. It is there to take advantage of the @@ -38,10 +40,13 @@ NO_RETURN static void artDeoptimizeImpl(Thread* self, bool single_frame) self->AssertHasDeoptimizationContext(); QuickExceptionHandler exception_handler(self, true); - if (single_frame) { - exception_handler.DeoptimizeSingleFrame(); - } else { - exception_handler.DeoptimizeStack(); + { + ScopedTrace trace(std::string("Deoptimization ") + GetDeoptimizationKindName(kind)); + if (single_frame) { + exception_handler.DeoptimizeSingleFrame(kind); + } else { + exception_handler.DeoptimizeStack(); + } } uintptr_t return_pc = exception_handler.UpdateInstrumentationStack(); if (exception_handler.IsFullFragmentDone()) { @@ -57,18 +62,18 @@ NO_RETURN static void artDeoptimizeImpl(Thread* self, bool single_frame) extern "C" NO_RETURN void artDeoptimize(Thread* self) REQUIRES_SHARED(Locks::mutator_lock_) { ScopedQuickEntrypointChecks sqec(self); - artDeoptimizeImpl(self, false); + artDeoptimizeImpl(self, DeoptimizationKind::kFullFrame, false); } -// This is called directly from compiled code by an HDepptimize. -extern "C" NO_RETURN void artDeoptimizeFromCompiledCode(Thread* self) +// This is called directly from compiled code by an HDeoptimize. +extern "C" NO_RETURN void artDeoptimizeFromCompiledCode(DeoptimizationKind kind, Thread* self) REQUIRES_SHARED(Locks::mutator_lock_) { ScopedQuickEntrypointChecks sqec(self); // Before deoptimizing to interpreter, we must push the deoptimization context. JValue return_value; return_value.SetJ(0); // we never deoptimize from compiled code with an invoke result. self->PushDeoptimizationContext(return_value, false, /* from_code */ true, self->GetException()); - artDeoptimizeImpl(self, true); + artDeoptimizeImpl(self, kind, true); } } // namespace art diff --git a/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc b/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc index 355d7b3e2f..6b965678c3 100644 --- a/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc +++ b/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc @@ -58,18 +58,13 @@ static inline void BssWriteBarrier(ArtMethod* outer_method) REQUIRES_SHARED(Lock } } -constexpr Runtime::CalleeSaveType kInitEntrypointSaveType = - // TODO: Change allocation entrypoints on MIPS and MIPS64 to kSaveEverything. - (kRuntimeISA == kMips || kRuntimeISA == kMips64) ? Runtime::kSaveRefsOnly - : Runtime::kSaveEverything; - extern "C" mirror::Class* artInitializeStaticStorageFromCode(uint32_t type_idx, Thread* self) REQUIRES_SHARED(Locks::mutator_lock_) { // Called to ensure static storage base is initialized for direct static field reads and writes. // A class may be accessing another class' fields when it doesn't have access, as access has been // given by inheritance. ScopedQuickEntrypointChecks sqec(self); - auto caller_and_outer = GetCalleeSaveMethodCallerAndOuterMethod(self, kInitEntrypointSaveType); + auto caller_and_outer = GetCalleeSaveMethodCallerAndOuterMethod(self, Runtime::kSaveEverything); ArtMethod* caller = caller_and_outer.caller; mirror::Class* result = ResolveVerifyAndClinit(dex::TypeIndex(type_idx), caller, self, true, false); @@ -83,7 +78,7 @@ extern "C" mirror::Class* artInitializeTypeFromCode(uint32_t type_idx, Thread* s REQUIRES_SHARED(Locks::mutator_lock_) { // Called when method->dex_cache_resolved_types_[] misses. ScopedQuickEntrypointChecks sqec(self); - auto caller_and_outer = GetCalleeSaveMethodCallerAndOuterMethod(self, kInitEntrypointSaveType); + auto caller_and_outer = GetCalleeSaveMethodCallerAndOuterMethod(self, Runtime::kSaveEverything); ArtMethod* caller = caller_and_outer.caller; mirror::Class* result = ResolveVerifyAndClinit(dex::TypeIndex(type_idx), caller, self, false, false); @@ -98,7 +93,7 @@ extern "C" mirror::Class* artInitializeTypeAndVerifyAccessFromCode(uint32_t type // Called when caller isn't guaranteed to have access to a type and the dex cache may be // unpopulated. ScopedQuickEntrypointChecks sqec(self); - auto caller_and_outer = GetCalleeSaveMethodCallerAndOuterMethod(self, kInitEntrypointSaveType); + auto caller_and_outer = GetCalleeSaveMethodCallerAndOuterMethod(self, Runtime::kSaveEverything); ArtMethod* caller = caller_and_outer.caller; mirror::Class* result = ResolveVerifyAndClinit(dex::TypeIndex(type_idx), caller, self, false, true); @@ -111,7 +106,7 @@ extern "C" mirror::Class* artInitializeTypeAndVerifyAccessFromCode(uint32_t type extern "C" mirror::String* artResolveStringFromCode(int32_t string_idx, Thread* self) REQUIRES_SHARED(Locks::mutator_lock_) { ScopedQuickEntrypointChecks sqec(self); - auto caller_and_outer = GetCalleeSaveMethodCallerAndOuterMethod(self, kInitEntrypointSaveType); + auto caller_and_outer = GetCalleeSaveMethodCallerAndOuterMethod(self, Runtime::kSaveEverything); ArtMethod* caller = caller_and_outer.caller; mirror::String* result = ResolveStringFromCode(caller, dex::StringIndex(string_idx)); if (LIKELY(result != nullptr)) { diff --git a/runtime/entrypoints/quick/quick_entrypoints.h b/runtime/entrypoints/quick/quick_entrypoints.h index 915f18ed71..6cd9dc1d71 100644 --- a/runtime/entrypoints/quick/quick_entrypoints.h +++ b/runtime/entrypoints/quick/quick_entrypoints.h @@ -21,6 +21,7 @@ #include "base/macros.h" #include "base/mutex.h" +#include "deoptimization_kind.h" #include "offsets.h" #define QUICK_ENTRYPOINT_OFFSET(ptr_size, x) \ diff --git a/runtime/entrypoints/quick/quick_entrypoints_list.h b/runtime/entrypoints/quick/quick_entrypoints_list.h index e0a2e3cf8f..e2d45acb34 100644 --- a/runtime/entrypoints/quick/quick_entrypoints_list.h +++ b/runtime/entrypoints/quick/quick_entrypoints_list.h @@ -140,7 +140,7 @@ V(ThrowNullPointer, void, void) \ V(ThrowStackOverflow, void, void*) \ V(ThrowStringBounds, void, int32_t, int32_t) \ - V(Deoptimize, void, void) \ + V(Deoptimize, void, DeoptimizationKind) \ \ V(A64Load, int64_t, volatile const int64_t *) \ V(A64Store, void, volatile int64_t *, int64_t) \ diff --git a/runtime/entrypoints/runtime_asm_entrypoints.h b/runtime/entrypoints/runtime_asm_entrypoints.h index 4ca52de2a2..fa287cb0ad 100644 --- a/runtime/entrypoints/runtime_asm_entrypoints.h +++ b/runtime/entrypoints/runtime_asm_entrypoints.h @@ -17,6 +17,8 @@ #ifndef ART_RUNTIME_ENTRYPOINTS_RUNTIME_ASM_ENTRYPOINTS_H_ #define ART_RUNTIME_ENTRYPOINTS_RUNTIME_ASM_ENTRYPOINTS_H_ +#include "deoptimization_kind.h" + namespace art { #ifndef BUILDING_LIBART @@ -77,7 +79,7 @@ static inline const void* GetQuickInstrumentationEntryPoint() { } // Stub to deoptimize from compiled code. -extern "C" void art_quick_deoptimize_from_compiled_code(); +extern "C" void art_quick_deoptimize_from_compiled_code(DeoptimizationKind); // The return_pc of instrumentation exit stub. extern "C" void art_quick_instrumentation_exit(); diff --git a/runtime/gc/accounting/card_table.h b/runtime/gc/accounting/card_table.h index 68ef15d0cf..c3dd21f113 100644 --- a/runtime/gc/accounting/card_table.h +++ b/runtime/gc/accounting/card_table.h @@ -47,10 +47,11 @@ template<size_t kAlignment> class SpaceBitmap; // WriteBarrier, and from there to here. class CardTable { public: - static constexpr size_t kCardShift = 7; + static constexpr size_t kCardShift = 10; static constexpr size_t kCardSize = 1 << kCardShift; static constexpr uint8_t kCardClean = 0x0; static constexpr uint8_t kCardDirty = 0x70; + static constexpr uint8_t kCardAged = kCardDirty - 1; static CardTable* Create(const uint8_t* heap_begin, size_t heap_capacity); ~CardTable(); diff --git a/runtime/gc/accounting/mod_union_table.cc b/runtime/gc/accounting/mod_union_table.cc index 34e30c177f..c416b9cc3d 100644 --- a/runtime/gc/accounting/mod_union_table.cc +++ b/runtime/gc/accounting/mod_union_table.cc @@ -391,7 +391,7 @@ void ModUnionTableReferenceCache::VisitObjects(ObjectCallback* callback, void* a uintptr_t end = start + CardTable::kCardSize; live_bitmap->VisitMarkedRange(start, end, - [this, callback, arg](mirror::Object* obj) { + [callback, arg](mirror::Object* obj) { callback(obj, arg); }); } @@ -402,7 +402,7 @@ void ModUnionTableReferenceCache::VisitObjects(ObjectCallback* callback, void* a uintptr_t end = start + CardTable::kCardSize; live_bitmap->VisitMarkedRange(start, end, - [this, callback, arg](mirror::Object* obj) { + [callback, arg](mirror::Object* obj) { callback(obj, arg); }); } @@ -560,7 +560,7 @@ void ModUnionTableCardCache::VisitObjects(ObjectCallback* callback, void* arg) { << start << " " << *space_; space_->GetLiveBitmap()->VisitMarkedRange(start, start + CardTable::kCardSize, - [this, callback, arg](mirror::Object* obj) { + [callback, arg](mirror::Object* obj) { callback(obj, arg); }); }); diff --git a/runtime/gc/collector/concurrent_copying-inl.h b/runtime/gc/collector/concurrent_copying-inl.h index dd449f991b..3503973321 100644 --- a/runtime/gc/collector/concurrent_copying-inl.h +++ b/runtime/gc/collector/concurrent_copying-inl.h @@ -130,7 +130,7 @@ inline mirror::Object* ConcurrentCopying::Mark(mirror::Object* from_ref, mirror::Object* to_ref = GetFwdPtr(from_ref); if (to_ref == nullptr) { // It isn't marked yet. Mark it by copying it to the to-space. - to_ref = Copy(from_ref); + to_ref = Copy(from_ref, holder, offset); } DCHECK(region_space_->IsInToSpace(to_ref) || heap_->non_moving_space_->HasAddress(to_ref)) << "from_ref=" << from_ref << " to_ref=" << to_ref; @@ -152,7 +152,8 @@ inline mirror::Object* ConcurrentCopying::Mark(mirror::Object* from_ref, inline mirror::Object* ConcurrentCopying::MarkFromReadBarrier(mirror::Object* from_ref) { mirror::Object* ret; - if (from_ref == nullptr) { + // We can get here before marking starts since we gray immune objects before the marking phase. + if (from_ref == nullptr || !Thread::Current()->GetIsGcMarking()) { return from_ref; } // TODO: Consider removing this check when we are done investigating slow paths. b/30162165 diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc index 4192f34879..a450a751b8 100644 --- a/runtime/gc/collector/concurrent_copying.cc +++ b/runtime/gc/collector/concurrent_copying.cc @@ -77,6 +77,7 @@ ConcurrentCopying::ConcurrentCopying(Heap* heap, mark_stack_lock_("concurrent copying mark stack lock", kMarkSweepMarkStackLock), thread_running_gc_(nullptr), is_marking_(false), + is_using_read_barrier_entrypoints_(false), is_active_(false), is_asserting_to_space_invariant_(false), region_space_bitmap_(nullptr), @@ -163,6 +164,15 @@ void ConcurrentCopying::RunPhases() { ReaderMutexLock mu(self, *Locks::mutator_lock_); InitializePhase(); } + if (kUseBakerReadBarrier && kGrayDirtyImmuneObjects) { + // Switch to read barrier mark entrypoints before we gray the objects. This is required in case + // a mutator sees a gray bit and dispatches on the entrpoint. (b/37876887). + ActivateReadBarrierEntrypoints(); + // Gray dirty immune objects concurrently to reduce GC pause times. We re-process gray cards in + // the pause. + ReaderMutexLock mu(self, *Locks::mutator_lock_); + GrayAllDirtyImmuneObjects(); + } FlipThreadRoots(); { ReaderMutexLock mu(self, *Locks::mutator_lock_); @@ -192,6 +202,59 @@ void ConcurrentCopying::RunPhases() { thread_running_gc_ = nullptr; } +class ConcurrentCopying::ActivateReadBarrierEntrypointsCheckpoint : public Closure { + public: + explicit ActivateReadBarrierEntrypointsCheckpoint(ConcurrentCopying* concurrent_copying) + : concurrent_copying_(concurrent_copying) {} + + void Run(Thread* thread) OVERRIDE NO_THREAD_SAFETY_ANALYSIS { + // Note: self is not necessarily equal to thread since thread may be suspended. + Thread* self = Thread::Current(); + DCHECK(thread == self || thread->IsSuspended() || thread->GetState() == kWaitingPerformingGc) + << thread->GetState() << " thread " << thread << " self " << self; + // Switch to the read barrier entrypoints. + thread->SetReadBarrierEntrypoints(); + // If thread is a running mutator, then act on behalf of the garbage collector. + // See the code in ThreadList::RunCheckpoint. + concurrent_copying_->GetBarrier().Pass(self); + } + + private: + ConcurrentCopying* const concurrent_copying_; +}; + +class ConcurrentCopying::ActivateReadBarrierEntrypointsCallback : public Closure { + public: + explicit ActivateReadBarrierEntrypointsCallback(ConcurrentCopying* concurrent_copying) + : concurrent_copying_(concurrent_copying) {} + + void Run(Thread* self ATTRIBUTE_UNUSED) OVERRIDE REQUIRES(Locks::thread_list_lock_) { + // This needs to run under the thread_list_lock_ critical section in ThreadList::RunCheckpoint() + // to avoid a race with ThreadList::Register(). + CHECK(!concurrent_copying_->is_using_read_barrier_entrypoints_); + concurrent_copying_->is_using_read_barrier_entrypoints_ = true; + } + + private: + ConcurrentCopying* const concurrent_copying_; +}; + +void ConcurrentCopying::ActivateReadBarrierEntrypoints() { + Thread* const self = Thread::Current(); + ActivateReadBarrierEntrypointsCheckpoint checkpoint(this); + ThreadList* thread_list = Runtime::Current()->GetThreadList(); + gc_barrier_->Init(self, 0); + ActivateReadBarrierEntrypointsCallback callback(this); + const size_t barrier_count = thread_list->RunCheckpoint(&checkpoint, &callback); + // If there are no threads to wait which implies that all the checkpoint functions are finished, + // then no need to release the mutator lock. + if (barrier_count == 0) { + return; + } + ScopedThreadStateChange tsc(self, kWaitingForCheckPointsToRun); + gc_barrier_->Increment(self, barrier_count); +} + void ConcurrentCopying::BindBitmaps() { Thread* self = Thread::Current(); WriterMutexLock mu(self, *Locks::heap_bitmap_lock_); @@ -352,9 +415,12 @@ class ConcurrentCopying::FlipCallback : public Closure { if (kVerifyNoMissingCardMarks) { cc->VerifyNoMissingCardMarks(); } - CHECK(thread == self); + CHECK_EQ(thread, self); Locks::mutator_lock_->AssertExclusiveHeld(self); - cc->region_space_->SetFromSpace(cc->rb_table_, cc->force_evacuate_all_); + { + TimingLogger::ScopedTiming split2("(Paused)SetFromSpace", cc->GetTimings()); + cc->region_space_->SetFromSpace(cc->rb_table_, cc->force_evacuate_all_); + } cc->SwapStacks(); if (ConcurrentCopying::kEnableFromSpaceAccountingCheck) { cc->RecordLiveStackFreezeSize(self); @@ -368,11 +434,11 @@ class ConcurrentCopying::FlipCallback : public Closure { } if (UNLIKELY(Runtime::Current()->IsActiveTransaction())) { CHECK(Runtime::Current()->IsAotCompiler()); - TimingLogger::ScopedTiming split2("(Paused)VisitTransactionRoots", cc->GetTimings()); + TimingLogger::ScopedTiming split3("(Paused)VisitTransactionRoots", cc->GetTimings()); Runtime::Current()->VisitTransactionRoots(cc); } if (kUseBakerReadBarrier && kGrayDirtyImmuneObjects) { - cc->GrayAllDirtyImmuneObjects(); + cc->GrayAllNewlyDirtyImmuneObjects(); if (kIsDebugBuild) { // Check that all non-gray immune objects only refernce immune objects. cc->VerifyGrayImmuneObjects(); @@ -519,8 +585,8 @@ class ConcurrentCopying::VerifyNoMissingCardMarkVisitor { void ConcurrentCopying::VerifyNoMissingCardMarkCallback(mirror::Object* obj, void* arg) { auto* collector = reinterpret_cast<ConcurrentCopying*>(arg); - // Objects not on dirty cards should never have references to newly allocated regions. - if (!collector->heap_->GetCardTable()->IsDirty(obj)) { + // Objects not on dirty or aged cards should never have references to newly allocated regions. + if (collector->heap_->GetCardTable()->GetCard(obj) == gc::accounting::CardTable::kCardClean) { VerifyNoMissingCardMarkVisitor visitor(collector, /*holder*/ obj); obj->VisitReferences</*kVisitNativeRoots*/true, kVerifyNone, kWithoutReadBarrier>( visitor, @@ -583,53 +649,100 @@ void ConcurrentCopying::FlipThreadRoots() { } } +template <bool kConcurrent> class ConcurrentCopying::GrayImmuneObjectVisitor { public: - explicit GrayImmuneObjectVisitor() {} + explicit GrayImmuneObjectVisitor(Thread* self) : self_(self) {} ALWAYS_INLINE void operator()(mirror::Object* obj) const REQUIRES_SHARED(Locks::mutator_lock_) { - if (kUseBakerReadBarrier) { - if (kIsDebugBuild) { - Locks::mutator_lock_->AssertExclusiveHeld(Thread::Current()); + if (kUseBakerReadBarrier && obj->GetReadBarrierState() == ReadBarrier::WhiteState()) { + if (kConcurrent) { + Locks::mutator_lock_->AssertSharedHeld(self_); + obj->AtomicSetReadBarrierState(ReadBarrier::WhiteState(), ReadBarrier::GrayState()); + // Mod union table VisitObjects may visit the same object multiple times so we can't check + // the result of the atomic set. + } else { + Locks::mutator_lock_->AssertExclusiveHeld(self_); + obj->SetReadBarrierState(ReadBarrier::GrayState()); } - obj->SetReadBarrierState(ReadBarrier::GrayState()); } } static void Callback(mirror::Object* obj, void* arg) REQUIRES_SHARED(Locks::mutator_lock_) { - reinterpret_cast<GrayImmuneObjectVisitor*>(arg)->operator()(obj); + reinterpret_cast<GrayImmuneObjectVisitor<kConcurrent>*>(arg)->operator()(obj); } + + private: + Thread* const self_; }; void ConcurrentCopying::GrayAllDirtyImmuneObjects() { - TimingLogger::ScopedTiming split(__FUNCTION__, GetTimings()); - gc::Heap* const heap = Runtime::Current()->GetHeap(); - accounting::CardTable* const card_table = heap->GetCardTable(); - WriterMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_); + TimingLogger::ScopedTiming split("GrayAllDirtyImmuneObjects", GetTimings()); + accounting::CardTable* const card_table = heap_->GetCardTable(); + Thread* const self = Thread::Current(); + using VisitorType = GrayImmuneObjectVisitor</* kIsConcurrent */ true>; + VisitorType visitor(self); + WriterMutexLock mu(self, *Locks::heap_bitmap_lock_); for (space::ContinuousSpace* space : immune_spaces_.GetSpaces()) { DCHECK(space->IsImageSpace() || space->IsZygoteSpace()); - GrayImmuneObjectVisitor visitor; - accounting::ModUnionTable* table = heap->FindModUnionTableFromSpace(space); + accounting::ModUnionTable* table = heap_->FindModUnionTableFromSpace(space); // Mark all the objects on dirty cards since these may point to objects in other space. // Once these are marked, the GC will eventually clear them later. // Table is non null for boot image and zygote spaces. It is only null for application image // spaces. if (table != nullptr) { - // TODO: Consider adding precleaning outside the pause. table->ProcessCards(); - table->VisitObjects(GrayImmuneObjectVisitor::Callback, &visitor); - // Since the cards are recorded in the mod-union table and this is paused, we can clear - // the cards for the space (to madvise). + table->VisitObjects(&VisitorType::Callback, &visitor); + // Don't clear cards here since we need to rescan in the pause. If we cleared the cards here, + // there would be races with the mutator marking new cards. + } else { + // Keep cards aged if we don't have a mod-union table since we may need to scan them in future + // GCs. This case is for app images. + card_table->ModifyCardsAtomic( + space->Begin(), + space->End(), + [](uint8_t card) { + return (card != gc::accounting::CardTable::kCardClean) + ? gc::accounting::CardTable::kCardAged + : card; + }, + /* card modified visitor */ VoidFunctor()); + card_table->Scan</* kClearCard */ false>(space->GetMarkBitmap(), + space->Begin(), + space->End(), + visitor, + gc::accounting::CardTable::kCardAged); + } + } +} + +void ConcurrentCopying::GrayAllNewlyDirtyImmuneObjects() { + TimingLogger::ScopedTiming split("(Paused)GrayAllNewlyDirtyImmuneObjects", GetTimings()); + accounting::CardTable* const card_table = heap_->GetCardTable(); + using VisitorType = GrayImmuneObjectVisitor</* kIsConcurrent */ false>; + Thread* const self = Thread::Current(); + VisitorType visitor(self); + WriterMutexLock mu(Thread::Current(), *Locks::heap_bitmap_lock_); + for (space::ContinuousSpace* space : immune_spaces_.GetSpaces()) { + DCHECK(space->IsImageSpace() || space->IsZygoteSpace()); + accounting::ModUnionTable* table = heap_->FindModUnionTableFromSpace(space); + + // Don't need to scan aged cards since we did these before the pause. Note that scanning cards + // also handles the mod-union table cards. + card_table->Scan</* kClearCard */ false>(space->GetMarkBitmap(), + space->Begin(), + space->End(), + visitor, + gc::accounting::CardTable::kCardDirty); + if (table != nullptr) { + // Add the cards to the mod-union table so that we can clear cards to save RAM. + table->ProcessCards(); TimingLogger::ScopedTiming split2("(Paused)ClearCards", GetTimings()); card_table->ClearCardRange(space->Begin(), AlignDown(space->End(), accounting::CardTable::kCardSize)); - } else { - // TODO: Consider having a mark bitmap for app image spaces and avoid scanning during the - // pause because app image spaces are all dirty pages anyways. - card_table->Scan<false>(space->GetMarkBitmap(), space->Begin(), space->End(), visitor); } } - // Since all of the objects that may point to other spaces are marked, we can avoid all the read + // Since all of the objects that may point to other spaces are gray, we can avoid all the read // barriers in the immune spaces. updated_all_immune_objects_.StoreRelaxed(true); } @@ -658,6 +771,7 @@ class ConcurrentCopying::ImmuneSpaceScanObjVisitor { ALWAYS_INLINE void operator()(mirror::Object* obj) const REQUIRES_SHARED(Locks::mutator_lock_) { if (kUseBakerReadBarrier && kGrayDirtyImmuneObjects) { + // Only need to scan gray objects. if (obj->GetReadBarrierState() == ReadBarrier::GrayState()) { collector_->ScanImmuneObject(obj); // Done scanning the object, go back to white. @@ -707,6 +821,7 @@ void ConcurrentCopying::MarkingPhase() { if (kUseBakerReadBarrier && kGrayDirtyImmuneObjects && table != nullptr) { table->VisitObjects(ImmuneSpaceScanObjVisitor::Callback, &visitor); } else { + // TODO: Scan only the aged cards. live_bitmap->VisitMarkedRange(reinterpret_cast<uintptr_t>(space->Begin()), reinterpret_cast<uintptr_t>(space->Limit()), visitor); @@ -876,6 +991,12 @@ class ConcurrentCopying::DisableMarkingCallback : public Closure { // to avoid a race with ThreadList::Register(). CHECK(concurrent_copying_->is_marking_); concurrent_copying_->is_marking_ = false; + if (kUseBakerReadBarrier && kGrayDirtyImmuneObjects) { + CHECK(concurrent_copying_->is_using_read_barrier_entrypoints_); + concurrent_copying_->is_using_read_barrier_entrypoints_ = false; + } else { + CHECK(!concurrent_copying_->is_using_read_barrier_entrypoints_); + } } private: @@ -1621,25 +1742,29 @@ void ConcurrentCopying::MarkZygoteLargeObjects() { Thread* const self = Thread::Current(); WriterMutexLock rmu(self, *Locks::heap_bitmap_lock_); space::LargeObjectSpace* const los = heap_->GetLargeObjectsSpace(); - // Pick the current live bitmap (mark bitmap if swapped). - accounting::LargeObjectBitmap* const live_bitmap = los->GetLiveBitmap(); - accounting::LargeObjectBitmap* const mark_bitmap = los->GetMarkBitmap(); - // Walk through all of the objects and explicitly mark the zygote ones so they don't get swept. - std::pair<uint8_t*, uint8_t*> range = los->GetBeginEndAtomic(); - live_bitmap->VisitMarkedRange(reinterpret_cast<uintptr_t>(range.first), - reinterpret_cast<uintptr_t>(range.second), - [mark_bitmap, los, self](mirror::Object* obj) - REQUIRES(Locks::heap_bitmap_lock_) - REQUIRES_SHARED(Locks::mutator_lock_) { - if (los->IsZygoteLargeObject(self, obj)) { - mark_bitmap->Set(obj); - } - }); + if (los != nullptr) { + // Pick the current live bitmap (mark bitmap if swapped). + accounting::LargeObjectBitmap* const live_bitmap = los->GetLiveBitmap(); + accounting::LargeObjectBitmap* const mark_bitmap = los->GetMarkBitmap(); + // Walk through all of the objects and explicitly mark the zygote ones so they don't get swept. + std::pair<uint8_t*, uint8_t*> range = los->GetBeginEndAtomic(); + live_bitmap->VisitMarkedRange(reinterpret_cast<uintptr_t>(range.first), + reinterpret_cast<uintptr_t>(range.second), + [mark_bitmap, los, self](mirror::Object* obj) + REQUIRES(Locks::heap_bitmap_lock_) + REQUIRES_SHARED(Locks::mutator_lock_) { + if (los->IsZygoteLargeObject(self, obj)) { + mark_bitmap->Set(obj); + } + }); + } } void ConcurrentCopying::SweepLargeObjects(bool swap_bitmaps) { TimingLogger::ScopedTiming split("SweepLargeObjects", GetTimings()); - RecordFreeLOS(heap_->GetLargeObjectsSpace()->Sweep(swap_bitmaps)); + if (heap_->GetLargeObjectsSpace() != nullptr) { + RecordFreeLOS(heap_->GetLargeObjectsSpace()->Sweep(swap_bitmaps)); + } } void ConcurrentCopying::ReclaimPhase() { @@ -1888,7 +2013,6 @@ void ConcurrentCopying::AssertToSpaceInvariantInNonMovingSpace(mirror::Object* o heap_mark_bitmap_->GetContinuousSpaceBitmap(ref); accounting::LargeObjectBitmap* los_bitmap = heap_mark_bitmap_->GetLargeObjectBitmap(ref); - CHECK(los_bitmap != nullptr) << "LOS bitmap covers the entire address range"; bool is_los = mark_bitmap == nullptr; if ((!is_los && mark_bitmap->Test(ref)) || (is_los && los_bitmap->Test(ref))) { @@ -2160,8 +2284,16 @@ mirror::Object* ConcurrentCopying::AllocateInSkippedBlock(size_t alloc_size) { return reinterpret_cast<mirror::Object*>(addr); } -mirror::Object* ConcurrentCopying::Copy(mirror::Object* from_ref) { +mirror::Object* ConcurrentCopying::Copy(mirror::Object* from_ref, + mirror::Object* holder, + MemberOffset offset) { DCHECK(region_space_->IsInFromSpace(from_ref)); + // If the class pointer is null, the object is invalid. This could occur for a dangling pointer + // from a previous GC that is either inside or outside the allocated region. + mirror::Class* klass = from_ref->GetClass<kVerifyNone, kWithoutReadBarrier>(); + if (UNLIKELY(klass == nullptr)) { + heap_->GetVerification()->LogHeapCorruption(holder, offset, from_ref, /* fatal */ true); + } // There must not be a read barrier to avoid nested RB that might violate the to-space invariant. // Note that from_ref is a from space ref so the SizeOf() call will access the from-space meta // objects, but it's ok and necessary. @@ -2216,7 +2348,7 @@ mirror::Object* ConcurrentCopying::Copy(mirror::Object* from_ref) { DCHECK(to_ref != nullptr); // Copy the object excluding the lock word since that is handled in the loop. - to_ref->SetClass(from_ref->GetClass<kVerifyNone, kWithoutReadBarrier>()); + to_ref->SetClass(klass); const size_t kObjectHeaderSize = sizeof(mirror::Object); DCHECK_GE(obj_size, kObjectHeaderSize); static_assert(kObjectHeaderSize == sizeof(mirror::HeapReference<mirror::Class>) + @@ -2384,7 +2516,6 @@ mirror::Object* ConcurrentCopying::MarkNonMoving(mirror::Object* ref, heap_mark_bitmap_->GetContinuousSpaceBitmap(ref); accounting::LargeObjectBitmap* los_bitmap = heap_mark_bitmap_->GetLargeObjectBitmap(ref); - CHECK(los_bitmap != nullptr) << "LOS bitmap covers the entire address range"; bool is_los = mark_bitmap == nullptr; if (!is_los && mark_bitmap->Test(ref)) { // Already marked. @@ -2424,7 +2555,7 @@ mirror::Object* ConcurrentCopying::MarkNonMoving(mirror::Object* ref, if (is_los && !IsAligned<kPageSize>(ref)) { // Ref is a large object that is not aligned, it must be heap corruption. Dump data before // AtomicSetReadBarrierState since it will fault if the address is not valid. - heap_->GetVerification()->LogHeapCorruption(ref, offset, holder, /* fatal */ true); + heap_->GetVerification()->LogHeapCorruption(holder, offset, ref, /* fatal */ true); } // Not marked or on the allocation stack. Try to mark it. // This may or may not succeed, which is ok. diff --git a/runtime/gc/collector/concurrent_copying.h b/runtime/gc/collector/concurrent_copying.h index f8773145f0..c09e0eb109 100644 --- a/runtime/gc/collector/concurrent_copying.h +++ b/runtime/gc/collector/concurrent_copying.h @@ -118,6 +118,11 @@ class ConcurrentCopying : public GarbageCollector { bool IsMarking() const { return is_marking_; } + // We may want to use read barrier entrypoints before is_marking_ is true since concurrent graying + // creates a small window where we might dispatch on these entrypoints. + bool IsUsingReadBarrierEntrypoints() const { + return is_using_read_barrier_entrypoints_; + } bool IsActive() const { return is_active_; } @@ -133,7 +138,10 @@ class ConcurrentCopying : public GarbageCollector { private: void PushOntoMarkStack(mirror::Object* obj) REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(!mark_stack_lock_); - mirror::Object* Copy(mirror::Object* from_ref) REQUIRES_SHARED(Locks::mutator_lock_) + mirror::Object* Copy(mirror::Object* from_ref, + mirror::Object* holder, + MemberOffset offset) + REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_, !immune_gray_stack_lock_); void Scan(mirror::Object* to_ref) REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(!mark_stack_lock_); @@ -162,6 +170,9 @@ class ConcurrentCopying : public GarbageCollector { void GrayAllDirtyImmuneObjects() REQUIRES(Locks::mutator_lock_) REQUIRES(!mark_stack_lock_); + void GrayAllNewlyDirtyImmuneObjects() + REQUIRES(Locks::mutator_lock_) + REQUIRES(!mark_stack_lock_); void VerifyGrayImmuneObjects() REQUIRES(Locks::mutator_lock_) REQUIRES(!mark_stack_lock_); @@ -249,6 +260,8 @@ class ConcurrentCopying : public GarbageCollector { REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(!mark_stack_lock_, !skipped_blocks_lock_, !immune_gray_stack_lock_); void DumpPerformanceInfo(std::ostream& os) OVERRIDE REQUIRES(!rb_slow_path_histogram_lock_); + // Set the read barrier mark entrypoints to non-null. + void ActivateReadBarrierEntrypoints(); space::RegionSpace* region_space_; // The underlying region space. std::unique_ptr<Barrier> gc_barrier_; @@ -265,6 +278,8 @@ class ConcurrentCopying : public GarbageCollector { GUARDED_BY(mark_stack_lock_); Thread* thread_running_gc_; bool is_marking_; // True while marking is ongoing. + // True while we might dispatch on the read barrier entrypoints. + bool is_using_read_barrier_entrypoints_; bool is_active_; // True while the collection is ongoing. bool is_asserting_to_space_invariant_; // True while asserting the to-space invariant. ImmuneSpaces immune_spaces_; @@ -327,6 +342,8 @@ class ConcurrentCopying : public GarbageCollector { // ObjPtr since the GC may transition to suspended and runnable between phases. mirror::Class* java_lang_Object_; + class ActivateReadBarrierEntrypointsCallback; + class ActivateReadBarrierEntrypointsCheckpoint; class AssertToSpaceInvariantFieldVisitor; class AssertToSpaceInvariantObjectVisitor; class AssertToSpaceInvariantRefsVisitor; @@ -336,7 +353,7 @@ class ConcurrentCopying : public GarbageCollector { class DisableMarkingCheckpoint; class DisableWeakRefAccessCallback; class FlipCallback; - class GrayImmuneObjectVisitor; + template <bool kConcurrent> class GrayImmuneObjectVisitor; class ImmuneSpaceScanObjVisitor; class LostCopyVisitor; class RefFieldsVisitor; diff --git a/runtime/gc/collector/mark_compact.cc b/runtime/gc/collector/mark_compact.cc index cab293f23c..9d3d950a0f 100644 --- a/runtime/gc/collector/mark_compact.cc +++ b/runtime/gc/collector/mark_compact.cc @@ -140,7 +140,7 @@ inline mirror::Object* MarkCompact::MarkObject(mirror::Object* obj) { } } else { DCHECK(!space_->HasAddress(obj)); - auto slow_path = [this](const mirror::Object* ref) + auto slow_path = [](const mirror::Object* ref) REQUIRES_SHARED(Locks::mutator_lock_) { // Marking a large object, make sure its aligned as a sanity check. if (!IsAligned<kPageSize>(ref)) { diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc index 7da722160e..298336ae4d 100644 --- a/runtime/gc/heap.cc +++ b/runtime/gc/heap.cc @@ -150,8 +150,13 @@ static constexpr bool kUsePartialTlabs = true; static uint8_t* const kPreferredAllocSpaceBegin = reinterpret_cast<uint8_t*>(300 * MB - Heap::kDefaultNonMovingSpaceCapacity); #else -// For 32-bit, use 0x20000000 because asan reserves 0x04000000 - 0x20000000. +#ifdef __ANDROID__ +// For 32-bit Android, use 0x20000000 because asan reserves 0x04000000 - 0x20000000. static uint8_t* const kPreferredAllocSpaceBegin = reinterpret_cast<uint8_t*>(0x20000000); +#else +// For 32-bit host, use 0x40000000 because asan uses most of the space below this. +static uint8_t* const kPreferredAllocSpaceBegin = reinterpret_cast<uint8_t*>(0x40000000); +#endif #endif static inline bool CareAboutPauseTimes() { @@ -3517,7 +3522,13 @@ collector::GcType Heap::WaitForGcToCompleteLocked(GcCause cause, Thread* self) { // is not the heap task daemon thread, it's considered as a // blocking GC (i.e., blocking itself). running_collection_is_blocking_ = true; - VLOG(gc) << "Starting a blocking GC " << cause; + // Don't log fake "GC" types that are only used for debugger or hidden APIs. If we log these, + // it results in log spam. kGcCauseExplicit is already logged in LogGC, so avoid it here too. + if (cause == kGcCauseForAlloc || + cause == kGcCauseForNativeAlloc || + cause == kGcCauseDisableMovingGc) { + VLOG(gc) << "Starting a blocking GC " << cause; + } } return last_gc_type; } diff --git a/runtime/gc/space/image_space.cc b/runtime/gc/space/image_space.cc index e9f0758b85..748d378e76 100644 --- a/runtime/gc/space/image_space.cc +++ b/runtime/gc/space/image_space.cc @@ -651,7 +651,8 @@ class ImageSpaceLoader { bitmap_name, image_bitmap_map.release(), reinterpret_cast<uint8_t*>(map->Begin()), - image_objects.End())); + // Make sure the bitmap is aligned to card size instead of just bitmap word size. + RoundUp(image_objects.End(), gc::accounting::CardTable::kCardSize))); if (bitmap == nullptr) { *error_msg = StringPrintf("Could not create bitmap '%s'", bitmap_name.c_str()); return nullptr; @@ -1695,6 +1696,29 @@ bool ImageSpace::LoadBootImage(const std::string& image_file_name, return true; } +ImageSpace::~ImageSpace() { + Runtime* runtime = Runtime::Current(); + if (runtime == nullptr) { + return; + } + + if (GetImageHeader().IsAppImage()) { + // This image space did not modify resolution method then in Init. + return; + } + + if (!runtime->HasResolutionMethod()) { + // Another image space has already unloaded the below methods. + return; + } + + runtime->ClearInstructionSet(); + runtime->ClearResolutionMethod(); + runtime->ClearImtConflictMethod(); + runtime->ClearImtUnimplementedMethod(); + runtime->ClearCalleeSaveMethods(); +} + std::unique_ptr<ImageSpace> ImageSpace::CreateFromAppImage(const char* image, const OatFile* oat_file, std::string* error_msg) { diff --git a/runtime/gc/space/image_space.h b/runtime/gc/space/image_space.h index 199bbdd00a..aa3dd42416 100644 --- a/runtime/gc/space/image_space.h +++ b/runtime/gc/space/image_space.h @@ -159,6 +159,9 @@ class ImageSpace : public MemMapSpace { void DumpSections(std::ostream& os) const; + // De-initialize the image-space by undoing the effects in Init(). + virtual ~ImageSpace(); + protected: // Tries to initialize an ImageSpace from the given image path, returning null on error. // diff --git a/runtime/generated/asm_support_gen.h b/runtime/generated/asm_support_gen.h index af57397f96..06638e712c 100644 --- a/runtime/generated/asm_support_gen.h +++ b/runtime/generated/asm_support_gen.h @@ -78,6 +78,8 @@ DEFINE_CHECK_EQ(static_cast<int32_t>(STRING_DEX_CACHE_SIZE_MINUS_ONE), (static_c DEFINE_CHECK_EQ(static_cast<int32_t>(STRING_DEX_CACHE_HASH_BITS), (static_cast<int32_t>(art::LeastSignificantBit(art::mirror::DexCache::kDexCacheStringCacheSize)))) #define STRING_DEX_CACHE_ELEMENT_SIZE 8 DEFINE_CHECK_EQ(static_cast<int32_t>(STRING_DEX_CACHE_ELEMENT_SIZE), (static_cast<int32_t>(sizeof(art::mirror::StringDexCachePair)))) +#define CARD_TABLE_CARD_SHIFT 0xa +DEFINE_CHECK_EQ(static_cast<size_t>(CARD_TABLE_CARD_SHIFT), (static_cast<size_t>(art::gc::accounting::CardTable::kCardShift))) #define MIN_LARGE_OBJECT_THRESHOLD 0x3000 DEFINE_CHECK_EQ(static_cast<size_t>(MIN_LARGE_OBJECT_THRESHOLD), (static_cast<size_t>(art::gc::Heap::kMinLargeObjectThreshold))) #define LOCK_WORD_STATE_SHIFT 30 diff --git a/runtime/hprof/hprof.cc b/runtime/hprof/hprof.cc index 4f390fd30a..8bdf6b1f50 100644 --- a/runtime/hprof/hprof.cc +++ b/runtime/hprof/hprof.cc @@ -34,7 +34,6 @@ #include <time.h> #include <time.h> #include <unistd.h> - #include <set> #include "android-base/stringprintf.h" @@ -502,9 +501,16 @@ class Hprof : public SingleRootVisitor { void DumpHeapArray(mirror::Array* obj, mirror::Class* klass) REQUIRES_SHARED(Locks::mutator_lock_); - void DumpHeapInstanceObject(mirror::Object* obj, mirror::Class* klass) + void DumpFakeObjectArray(mirror::Object* obj, const std::set<mirror::Object*>& elements) + REQUIRES_SHARED(Locks::mutator_lock_); + + void DumpHeapInstanceObject(mirror::Object* obj, + mirror::Class* klass, + const std::set<mirror::Object*>& fake_roots) REQUIRES_SHARED(Locks::mutator_lock_); + bool AddRuntimeInternalObjectsField(mirror::Class* klass) REQUIRES_SHARED(Locks::mutator_lock_); + void ProcessHeap(bool header_first) REQUIRES(Locks::mutator_lock_) { // Reset current heap and object count. @@ -1062,37 +1068,17 @@ void Hprof::MarkRootObject(const mirror::Object* obj, jobject jni_obj, HprofHeap ++objects_in_segment_; } -// Use for visiting the GcRoots held live by ArtFields, ArtMethods, and ClassLoaders. -class GcRootVisitor { - public: - explicit GcRootVisitor(Hprof* hprof) : hprof_(hprof) {} - - void operator()(mirror::Object* obj ATTRIBUTE_UNUSED, - MemberOffset offset ATTRIBUTE_UNUSED, - bool is_static ATTRIBUTE_UNUSED) const {} - - // Note that these don't have read barriers. Its OK however since the GC is guaranteed to not be - // running during the hprof dumping process. - void VisitRootIfNonNull(mirror::CompressedReference<mirror::Object>* root) const - REQUIRES_SHARED(Locks::mutator_lock_) { - if (!root->IsNull()) { - VisitRoot(root); - } +bool Hprof::AddRuntimeInternalObjectsField(mirror::Class* klass) { + if (klass->IsDexCacheClass()) { + return true; } - - void VisitRoot(mirror::CompressedReference<mirror::Object>* root) const - REQUIRES_SHARED(Locks::mutator_lock_) { - mirror::Object* obj = root->AsMirrorPtr(); - // The two cases are either classes or dex cache arrays. If it is a dex cache array, then use - // VM internal. Otherwise the object is a declaring class of an ArtField or ArtMethod or a - // class from a ClassLoader. - hprof_->VisitRoot(obj, RootInfo(obj->IsClass() ? kRootStickyClass : kRootVMInternal)); + // IsClassLoaderClass is true for subclasses of classloader but we only want to add the fake + // field to the java.lang.ClassLoader class. + if (klass->IsClassLoaderClass() && klass->GetSuperClass()->IsObjectClass()) { + return true; } - - - private: - Hprof* const hprof_; -}; + return false; +} void Hprof::DumpHeapObject(mirror::Object* obj) { // Ignore classes that are retired. @@ -1103,8 +1089,41 @@ void Hprof::DumpHeapObject(mirror::Object* obj) { ++total_objects_; - GcRootVisitor visitor(this); - obj->VisitReferences(visitor, VoidFunctor()); + class RootCollector { + public: + explicit RootCollector() {} + + void operator()(mirror::Object*, MemberOffset, bool) const {} + + // Note that these don't have read barriers. Its OK however since the GC is guaranteed to not be + // running during the hprof dumping process. + void VisitRootIfNonNull(mirror::CompressedReference<mirror::Object>* root) const + REQUIRES_SHARED(Locks::mutator_lock_) { + if (!root->IsNull()) { + VisitRoot(root); + } + } + + void VisitRoot(mirror::CompressedReference<mirror::Object>* root) const + REQUIRES_SHARED(Locks::mutator_lock_) { + roots_.insert(root->AsMirrorPtr()); + } + + const std::set<mirror::Object*>& GetRoots() const { + return roots_; + } + + private: + // These roots are actually live from the object. Avoid marking them as roots in hprof to make + // it easier to debug class unloading. + mutable std::set<mirror::Object*> roots_; + }; + + RootCollector visitor; + // Collect all native roots. + if (!obj->IsClass()) { + obj->VisitReferences(visitor, VoidFunctor()); + } gc::Heap* const heap = Runtime::Current()->GetHeap(); const gc::space::ContinuousSpace* const space = heap->FindContinuousSpaceFromObject(obj, true); @@ -1112,15 +1131,18 @@ void Hprof::DumpHeapObject(mirror::Object* obj) { if (space != nullptr) { if (space->IsZygoteSpace()) { heap_type = HPROF_HEAP_ZYGOTE; + VisitRoot(obj, RootInfo(kRootVMInternal)); } else if (space->IsImageSpace() && heap->ObjectIsInBootImageSpace(obj)) { // Only count objects in the boot image as HPROF_HEAP_IMAGE, this leaves app image objects as // HPROF_HEAP_APP. b/35762934 heap_type = HPROF_HEAP_IMAGE; + VisitRoot(obj, RootInfo(kRootVMInternal)); } } else { const auto* los = heap->GetLargeObjectsSpace(); if (los->Contains(obj) && los->IsZygoteLargeObject(Thread::Current(), obj)) { heap_type = HPROF_HEAP_ZYGOTE; + VisitRoot(obj, RootInfo(kRootVMInternal)); } } CheckHeapSegmentConstraints(); @@ -1164,7 +1186,7 @@ void Hprof::DumpHeapObject(mirror::Object* obj) { } else if (c->IsArrayClass()) { DumpHeapArray(obj->AsArray(), c); } else { - DumpHeapInstanceObject(obj, c); + DumpHeapInstanceObject(obj, c, visitor.GetRoots()); } } @@ -1269,7 +1291,10 @@ void Hprof::DumpHeapClass(mirror::Class* klass) { // Instance fields for this class (no superclass fields) int iFieldCount = klass->NumInstanceFields(); - if (klass->IsStringClass()) { + // add_internal_runtime_objects is only for classes that may retain objects live through means + // other than fields. It is never the case for strings. + const bool add_internal_runtime_objects = AddRuntimeInternalObjectsField(klass); + if (klass->IsStringClass() || add_internal_runtime_objects) { __ AddU2((uint16_t)iFieldCount + 1); } else { __ AddU2((uint16_t)iFieldCount); @@ -1284,6 +1309,21 @@ void Hprof::DumpHeapClass(mirror::Class* klass) { if (klass->IsStringClass()) { __ AddStringId(LookupStringId("value")); __ AddU1(hprof_basic_object); + } else if (add_internal_runtime_objects) { + __ AddStringId(LookupStringId("runtimeInternalObjects")); + __ AddU1(hprof_basic_object); + } +} + +void Hprof::DumpFakeObjectArray(mirror::Object* obj, const std::set<mirror::Object*>& elements) { + __ AddU1(HPROF_OBJECT_ARRAY_DUMP); + __ AddObjectId(obj); + __ AddStackTraceSerialNumber(LookupStackTraceSerialNumber(obj)); + __ AddU4(elements.size()); + __ AddClassId(LookupClassId( + Runtime::Current()->GetClassLinker()->GetClassRoot(ClassLinker::kObjectArrayClass))); + for (mirror::Object* e : elements) { + __ AddObjectId(e); } } @@ -1327,7 +1367,9 @@ void Hprof::DumpHeapArray(mirror::Array* obj, mirror::Class* klass) { } } -void Hprof::DumpHeapInstanceObject(mirror::Object* obj, mirror::Class* klass) { +void Hprof::DumpHeapInstanceObject(mirror::Object* obj, + mirror::Class* klass, + const std::set<mirror::Object*>& fake_roots) { // obj is an instance object. __ AddU1(HPROF_INSTANCE_DUMP); __ AddObjectId(obj); @@ -1341,6 +1383,7 @@ void Hprof::DumpHeapInstanceObject(mirror::Object* obj, mirror::Class* klass) { // What we will use for the string value if the object is a string. mirror::Object* string_value = nullptr; + mirror::Object* fake_object_array = nullptr; // Write the instance data; fields for this class, followed by super class fields, and so on. do { @@ -1396,8 +1439,12 @@ void Hprof::DumpHeapInstanceObject(mirror::Object* obj, mirror::Class* klass) { } } __ AddObjectId(string_value); + } else if (AddRuntimeInternalObjectsField(klass)) { + // We need an id that is guaranteed to not be used, use 1/2 of the object alignment. + fake_object_array = reinterpret_cast<mirror::Object*>( + reinterpret_cast<uintptr_t>(obj) + kObjectAlignment / 2); + __ AddObjectId(fake_object_array); } - klass = klass->GetSuperClass(); } while (klass != nullptr); @@ -1419,6 +1466,8 @@ void Hprof::DumpHeapInstanceObject(mirror::Object* obj, mirror::Class* klass) { __ AddU1(hprof_basic_char); __ AddU2List(s->GetValue(), s->GetLength()); } + } else if (fake_object_array != nullptr) { + DumpFakeObjectArray(fake_object_array, fake_roots); } } diff --git a/runtime/interpreter/unstarted_runtime.cc b/runtime/interpreter/unstarted_runtime.cc index 70be30c22c..96934bc0ca 100644 --- a/runtime/interpreter/unstarted_runtime.cc +++ b/runtime/interpreter/unstarted_runtime.cc @@ -568,7 +568,7 @@ static void GetResourceAsStream(Thread* self, // Copy in content. memcpy(h_array->GetData(), mem_map->Begin(), map_size); // Be proactive releasing memory. - mem_map.release(); + mem_map.reset(); // Create a ByteArrayInputStream. Handle<mirror::Class> h_class(hs.NewHandle( diff --git a/runtime/jdwp/jdwp_handler.cc b/runtime/jdwp/jdwp_handler.cc index 971d03958c..e8a9904dc6 100644 --- a/runtime/jdwp/jdwp_handler.cc +++ b/runtime/jdwp/jdwp_handler.cc @@ -335,7 +335,7 @@ static JdwpError VM_CapabilitiesNew(JdwpState*, Request* request, ExpandBuf* rep expandBufAdd1(reply, false); // canUnrestrictedlyRedefineClasses expandBufAdd1(reply, false); // canPopFrames expandBufAdd1(reply, true); // canUseInstanceFilters - expandBufAdd1(reply, false); // canGetSourceDebugExtension + expandBufAdd1(reply, true); // canGetSourceDebugExtension expandBufAdd1(reply, false); // canRequestVMDeathEvent expandBufAdd1(reply, false); // canSetDefaultStratum expandBufAdd1(reply, true); // 1.6: canGetInstanceInfo @@ -499,13 +499,18 @@ static JdwpError RT_ClassObject(JdwpState*, Request* request, ExpandBuf* pReply) /* * Returns the value of the SourceDebugExtension attribute. - * - * JDB seems interested, but DEX files don't currently support this. */ -static JdwpError RT_SourceDebugExtension(JdwpState*, Request*, ExpandBuf*) +static JdwpError RT_SourceDebugExtension(JdwpState*, Request* request, ExpandBuf* pReply) REQUIRES_SHARED(Locks::mutator_lock_) { /* referenceTypeId in, string out */ - return ERR_ABSENT_INFORMATION; + RefTypeId refTypeId = request->ReadRefTypeId(); + std::string extension_data; + JdwpError status = Dbg::GetSourceDebugExtension(refTypeId, &extension_data); + if (status != ERR_NONE) { + return status; + } + expandBufAddUtf8String(pReply, extension_data); + return ERR_NONE; } static JdwpError RT_Signature(JdwpState*, Request* request, ExpandBuf* pReply, bool with_generic) diff --git a/runtime/jit/jit.cc b/runtime/jit/jit.cc index b32b272a31..1dfb0f6e9a 100644 --- a/runtime/jit/jit.cc +++ b/runtime/jit/jit.cc @@ -353,6 +353,7 @@ Jit::~Jit() { DCHECK(!profile_saver_options_.IsEnabled() || !ProfileSaver::IsStarted()); if (dump_info_on_shutdown_) { DumpInfo(LOG_STREAM(INFO)); + Runtime::Current()->DumpDeoptimizations(LOG_STREAM(INFO)); } DeleteThreadPool(); if (jit_compiler_handle_ != nullptr) { diff --git a/runtime/jit/jit_code_cache.cc b/runtime/jit/jit_code_cache.cc index e9a5ae5fa9..81b87f15fd 100644 --- a/runtime/jit/jit_code_cache.cc +++ b/runtime/jit/jit_code_cache.cc @@ -149,7 +149,6 @@ JitCodeCache::JitCodeCache(MemMap* code_map, used_memory_for_code_(0), number_of_compilations_(0), number_of_osr_compilations_(0), - number_of_deoptimizations_(0), number_of_collections_(0), histogram_stack_map_memory_use_("Memory used for stack maps", 16), histogram_code_memory_use_("Memory used for compiled code", 16), @@ -1416,8 +1415,6 @@ void JitCodeCache::InvalidateCompiledCodeFor(ArtMethod* method, osr_code_map_.erase(it); } } - MutexLock mu(Thread::Current(), lock_); - number_of_deoptimizations_++; } uint8_t* JitCodeCache::AllocateCode(size_t code_size) { @@ -1456,7 +1453,6 @@ void JitCodeCache::Dump(std::ostream& os) { << "Total number of JIT compilations: " << number_of_compilations_ << "\n" << "Total number of JIT compilations for on stack replacement: " << number_of_osr_compilations_ << "\n" - << "Total number of deoptimizations: " << number_of_deoptimizations_ << "\n" << "Total number of JIT code cache collections: " << number_of_collections_ << std::endl; histogram_stack_map_memory_use_.PrintMemoryUse(os); histogram_code_memory_use_.PrintMemoryUse(os); diff --git a/runtime/jit/jit_code_cache.h b/runtime/jit/jit_code_cache.h index db214e7983..612d06ba1c 100644 --- a/runtime/jit/jit_code_cache.h +++ b/runtime/jit/jit_code_cache.h @@ -384,9 +384,6 @@ class JitCodeCache { // Number of compilations for on-stack-replacement done throughout the lifetime of the JIT. size_t number_of_osr_compilations_ GUARDED_BY(lock_); - // Number of deoptimizations done throughout the lifetime of the JIT. - size_t number_of_deoptimizations_ GUARDED_BY(lock_); - // Number of code cache collections done throughout the lifetime of the JIT. size_t number_of_collections_ GUARDED_BY(lock_); diff --git a/runtime/jit/profile_compilation_info.cc b/runtime/jit/profile_compilation_info.cc index 52649c7075..0acce1e421 100644 --- a/runtime/jit/profile_compilation_info.cc +++ b/runtime/jit/profile_compilation_info.cc @@ -115,7 +115,11 @@ bool ProfileCompilationInfo::MergeAndSave(const std::string& filename, ScopedTrace trace(__PRETTY_FUNCTION__); ScopedFlock flock; std::string error; - if (!flock.Init(filename.c_str(), O_RDWR | O_NOFOLLOW | O_CLOEXEC, /* block */ false, &error)) { + int flags = O_RDWR | O_NOFOLLOW | O_CLOEXEC; + // There's no need to fsync profile data right away. We get many chances + // to write it again in case something goes wrong. We can rely on a simple + // close(), no sync, and let to the kernel decide when to write to disk. + if (!flock.Init(filename.c_str(), flags, /*block*/false, /*flush_on_close*/false, &error)) { LOG(WARNING) << "Couldn't lock the profile file " << filename << ": " << error; return false; } diff --git a/runtime/jit/profile_saver.cc b/runtime/jit/profile_saver.cc index d190bdfd46..1441987ef0 100644 --- a/runtime/jit/profile_saver.cc +++ b/runtime/jit/profile_saver.cc @@ -212,6 +212,10 @@ class GetMethodsVisitor : public ClassVisitor { void ProfileSaver::FetchAndCacheResolvedClassesAndMethods() { ScopedTrace trace(__PRETTY_FUNCTION__); + + // Resolve any new registered locations. + ResolveTrackedLocations(); + ClassLinker* const class_linker = Runtime::Current()->GetClassLinker(); std::set<DexCacheResolvedClasses> resolved_classes = class_linker->GetResolvedClasses(/*ignore boot classes*/ true); @@ -260,6 +264,10 @@ void ProfileSaver::FetchAndCacheResolvedClassesAndMethods() { bool ProfileSaver::ProcessProfilingInfo(bool force_save, /*out*/uint16_t* number_of_new_methods) { ScopedTrace trace(__PRETTY_FUNCTION__); + + // Resolve any new registered locations. + ResolveTrackedLocations(); + SafeMap<std::string, std::set<std::string>> tracked_locations; { // Make a copy so that we don't hold the lock while doing I/O. @@ -497,17 +505,34 @@ bool ProfileSaver::IsStarted() { return instance_ != nullptr; } -void ProfileSaver::AddTrackedLocations(const std::string& output_filename, - const std::vector<std::string>& code_paths) { - auto it = tracked_dex_base_locations_.find(output_filename); - if (it == tracked_dex_base_locations_.end()) { - tracked_dex_base_locations_.Put(output_filename, - std::set<std::string>(code_paths.begin(), code_paths.end())); +static void AddTrackedLocationsToMap(const std::string& output_filename, + const std::vector<std::string>& code_paths, + SafeMap<std::string, std::set<std::string>>* map) { + auto it = map->find(output_filename); + if (it == map->end()) { + map->Put(output_filename, std::set<std::string>(code_paths.begin(), code_paths.end())); } else { it->second.insert(code_paths.begin(), code_paths.end()); } } +void ProfileSaver::AddTrackedLocations(const std::string& output_filename, + const std::vector<std::string>& code_paths) { + // Add the code paths to the list of tracked location. + AddTrackedLocationsToMap(output_filename, code_paths, &tracked_dex_base_locations_); + // The code paths may contain symlinks which could fool the profiler. + // If the dex file is compiled with an absolute location but loaded with symlink + // the profiler could skip the dex due to location mismatch. + // To avoid this, we add the code paths to the temporary cache of 'to_be_resolved' + // locations. When the profiler thread executes we will resolve the paths to their + // real paths. + // Note that we delay taking the realpath to avoid spending more time than needed + // when registering location (as it is done during app launch). + AddTrackedLocationsToMap(output_filename, + code_paths, + &tracked_dex_base_locations_to_be_resolved_); +} + void ProfileSaver::DumpInstanceInfo(std::ostream& os) { MutexLock mu(Thread::Current(), *Locks::profiler_lock_); if (instance_ != nullptr) { @@ -556,4 +581,38 @@ bool ProfileSaver::HasSeenMethod(const std::string& profile, return false; } +void ProfileSaver::ResolveTrackedLocations() { + SafeMap<std::string, std::set<std::string>> locations_to_be_resolved; + { + // Make a copy so that we don't hold the lock while doing I/O. + MutexLock mu(Thread::Current(), *Locks::profiler_lock_); + locations_to_be_resolved = tracked_dex_base_locations_to_be_resolved_; + tracked_dex_base_locations_to_be_resolved_.clear(); + } + + // Resolve the locations. + SafeMap<std::string, std::vector<std::string>> resolved_locations_map; + for (const auto& it : locations_to_be_resolved) { + const std::string& filename = it.first; + const std::set<std::string>& locations = it.second; + auto resolved_locations_it = resolved_locations_map.Put( + filename, + std::vector<std::string>(locations.size())); + + for (const auto& location : locations) { + UniqueCPtr<const char[]> location_real(realpath(location.c_str(), nullptr)); + // Note that it's ok if we cannot get the real path. + if (location_real != nullptr) { + resolved_locations_it->second.emplace_back(location_real.get()); + } + } + } + + // Add the resolved locations to the tracked collection. + MutexLock mu(Thread::Current(), *Locks::profiler_lock_); + for (const auto& it : resolved_locations_map) { + AddTrackedLocationsToMap(it.first, it.second, &tracked_dex_base_locations_); + } +} + } // namespace art diff --git a/runtime/jit/profile_saver.h b/runtime/jit/profile_saver.h index be2bffc647..bd539a41d0 100644 --- a/runtime/jit/profile_saver.h +++ b/runtime/jit/profile_saver.h @@ -112,6 +112,10 @@ class ProfileSaver { void DumpInfo(std::ostream& os); + // Resolve the realpath of the locations stored in tracked_dex_base_locations_to_be_resolved_ + // and put the result in tracked_dex_base_locations_. + void ResolveTrackedLocations() REQUIRES(!Locks::profiler_lock_); + // The only instance of the saver. static ProfileSaver* instance_ GUARDED_BY(Locks::profiler_lock_); // Profile saver thread. @@ -119,11 +123,17 @@ class ProfileSaver { jit::JitCodeCache* jit_code_cache_; - // Collection of code paths that the profiles tracks. + // Collection of code paths that the profiler tracks. // It maps profile locations to code paths (dex base locations). SafeMap<std::string, std::set<std::string>> tracked_dex_base_locations_ GUARDED_BY(Locks::profiler_lock_); + // Collection of code paths that the profiler tracks but may note have been resolved + // to their realpath. The resolution is done async to minimize the time it takes for + // someone to register a path. + SafeMap<std::string, std::set<std::string>> tracked_dex_base_locations_to_be_resolved_ + GUARDED_BY(Locks::profiler_lock_); + bool shutting_down_ GUARDED_BY(Locks::profiler_lock_); uint64_t last_time_ns_saver_woke_up_ GUARDED_BY(wait_lock_); uint32_t jit_activity_notifications_; diff --git a/runtime/jit/profile_saver_options.h b/runtime/jit/profile_saver_options.h index c8d256fec0..07aeb66eb6 100644 --- a/runtime/jit/profile_saver_options.h +++ b/runtime/jit/profile_saver_options.h @@ -20,7 +20,7 @@ namespace art { struct ProfileSaverOptions { public: - static constexpr uint32_t kMinSavePeriodMs = 20 * 1000; // 20 seconds + static constexpr uint32_t kMinSavePeriodMs = 40 * 1000; // 40 seconds static constexpr uint32_t kSaveResolvedClassesDelayMs = 5 * 1000; // 5 seconds // Minimum number of JIT samples during launch to include a method into the profile. static constexpr uint32_t kStartupMethodSamples = 1; diff --git a/runtime/native/dalvik_system_DexFile.cc b/runtime/native/dalvik_system_DexFile.cc index 0617dae1ae..77554e8b30 100644 --- a/runtime/native/dalvik_system_DexFile.cc +++ b/runtime/native/dalvik_system_DexFile.cc @@ -622,6 +622,31 @@ static jstring DexFile_getNonProfileGuidedCompilerFilter(JNIEnv* env, return env->NewStringUTF(new_filter_str.c_str()); } +static jstring DexFile_getSafeModeCompilerFilter(JNIEnv* env, + jclass javeDexFileClass ATTRIBUTE_UNUSED, + jstring javaCompilerFilter) { + ScopedUtfChars compiler_filter(env, javaCompilerFilter); + if (env->ExceptionCheck()) { + return nullptr; + } + + CompilerFilter::Filter filter; + if (!CompilerFilter::ParseCompilerFilter(compiler_filter.c_str(), &filter)) { + return javaCompilerFilter; + } + + CompilerFilter::Filter new_filter = CompilerFilter::GetSafeModeFilterFrom(filter); + + // Filter stayed the same, return input. + if (filter == new_filter) { + return javaCompilerFilter; + } + + // Create a new string object and return. + std::string new_filter_str = CompilerFilter::NameOfFilter(new_filter); + return env->NewStringUTF(new_filter_str.c_str()); +} + static jboolean DexFile_isBackedByOatFile(JNIEnv* env, jclass, jobject cookie) { const OatFile* oat_file = nullptr; std::vector<const DexFile*> dex_files; @@ -695,6 +720,9 @@ static JNINativeMethod gMethods[] = { NATIVE_METHOD(DexFile, getNonProfileGuidedCompilerFilter, "(Ljava/lang/String;)Ljava/lang/String;"), + NATIVE_METHOD(DexFile, + getSafeModeCompilerFilter, + "(Ljava/lang/String;)Ljava/lang/String;"), NATIVE_METHOD(DexFile, isBackedByOatFile, "(Ljava/lang/Object;)Z"), NATIVE_METHOD(DexFile, getDexFileStatus, "(Ljava/lang/String;Ljava/lang/String;)Ljava/lang/String;"), diff --git a/runtime/native/dalvik_system_ZygoteHooks.cc b/runtime/native/dalvik_system_ZygoteHooks.cc index 2eaa8c71b1..0515ec6339 100644 --- a/runtime/native/dalvik_system_ZygoteHooks.cc +++ b/runtime/native/dalvik_system_ZygoteHooks.cc @@ -192,8 +192,8 @@ static void EnableDebugFeatures(uint32_t debug_flags) { const bool safe_mode = (debug_flags & DEBUG_ENABLE_SAFEMODE) != 0; if (safe_mode) { - // Ensure that any (secondary) oat files will be interpreted. - runtime->AddCompilerOption("--compiler-filter=interpret-only"); + // Only quicken oat files. + runtime->AddCompilerOption("--compiler-filter=quicken"); runtime->SetSafeMode(true); debug_flags &= ~DEBUG_ENABLE_SAFEMODE; } diff --git a/runtime/oat.h b/runtime/oat.h index 05706252fa..a38eebc188 100644 --- a/runtime/oat.h +++ b/runtime/oat.h @@ -32,7 +32,7 @@ class InstructionSetFeatures; class PACKED(4) OatHeader { public: static constexpr uint8_t kOatMagic[] = { 'o', 'a', 't', '\n' }; - static constexpr uint8_t kOatVersion[] = { '1', '1', '9', '\0' }; // Add thread_local_limit. + static constexpr uint8_t kOatVersion[] = { '1', '2', '4', '\0' }; // New compiler filter names. static constexpr const char* kImageLocationKey = "image-location"; static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline"; diff --git a/runtime/oat_file.cc b/runtime/oat_file.cc index 4a85d4795e..9affeb0fcc 100644 --- a/runtime/oat_file.cc +++ b/runtime/oat_file.cc @@ -193,7 +193,7 @@ bool OatFileBase::LoadVdex(const std::string& vdex_filename, bool writable, bool low_4gb, std::string* error_msg) { - vdex_ = VdexFile::Open(vdex_filename, writable, low_4gb, error_msg); + vdex_ = VdexFile::Open(vdex_filename, writable, low_4gb, /* unquicken*/ false, error_msg); if (vdex_.get() == nullptr) { *error_msg = StringPrintf("Failed to load vdex file '%s' %s", vdex_filename.c_str(), diff --git a/runtime/oat_file_assistant.cc b/runtime/oat_file_assistant.cc index a7be73a849..603bbbf8bd 100644 --- a/runtime/oat_file_assistant.cc +++ b/runtime/oat_file_assistant.cc @@ -68,19 +68,34 @@ std::ostream& operator << (std::ostream& stream, const OatFileAssistant::OatStat OatFileAssistant::OatFileAssistant(const char* dex_location, const InstructionSet isa, bool load_executable) - : OatFileAssistant(dex_location, nullptr, isa, load_executable) -{ } - -OatFileAssistant::OatFileAssistant(const char* dex_location, - const char* oat_location, - const InstructionSet isa, - bool load_executable) : isa_(isa), load_executable_(load_executable), odex_(this, /*is_oat_location*/ false), oat_(this, /*is_oat_location*/ true) { CHECK(dex_location != nullptr) << "OatFileAssistant: null dex location"; - dex_location_.assign(dex_location); + + // Try to get the realpath for the dex location. + // + // This is OK with respect to dalvik cache naming scheme because we never + // generate oat files starting from symlinks which go into dalvik cache. + // (recall that the oat files in dalvik cache are encoded by replacing '/' + // with '@' in the path). + // The boot image oat files (which are symlinked in dalvik-cache) are not + // loaded via the oat file assistant. + // + // The only case when the dex location may resolve to a different path + // is for secondary dex files (e.g. /data/user/0 symlinks to /data/data and + // the app is free to create its own internal layout). Related to this it is + // worthwhile to mention that installd resolves the secondary dex location + // before calling dex2oat. + UniqueCPtr<const char[]> dex_location_real(realpath(dex_location, nullptr)); + if (dex_location_real != nullptr) { + dex_location_.assign(dex_location_real.get()); + } else { + // If we can't get the realpath of the location there's not much point in trying to move on. + PLOG(ERROR) << "Could not get the realpath of dex_location " << dex_location; + return; + } if (load_executable_ && isa != kRuntimeISA) { LOG(WARNING) << "OatFileAssistant: Load executable specified, " @@ -98,15 +113,27 @@ OatFileAssistant::OatFileAssistant(const char* dex_location, } // Get the oat filename. - if (oat_location != nullptr) { - oat_.Reset(oat_location); + std::string oat_file_name; + if (DexLocationToOatFilename(dex_location_, isa_, &oat_file_name, &error_msg)) { + oat_.Reset(oat_file_name); } else { - std::string oat_file_name; - if (DexLocationToOatFilename(dex_location_, isa_, &oat_file_name, &error_msg)) { - oat_.Reset(oat_file_name); - } else { - LOG(WARNING) << "Failed to determine oat file name for dex location " + LOG(WARNING) << "Failed to determine oat file name for dex location " << dex_location_ << ": " << error_msg; + } + + // Check if the dex directory is writable. + // This will be needed in most uses of OatFileAssistant and so it's OK to + // compute it eagerly. (the only use which will not make use of it is + // OatFileAssistant::GetStatusDump()) + size_t pos = dex_location_.rfind('/'); + if (pos == std::string::npos) { + LOG(WARNING) << "Failed to determine dex file parent directory: " << dex_location_; + } else { + std::string parent = dex_location_.substr(0, pos); + if (access(parent.c_str(), W_OK) == 0) { + dex_parent_writable_ = true; + } else { + VLOG(oat) << "Dex parent of " << dex_location_ << " is not writable: " << strerror(errno); } } } @@ -139,12 +166,17 @@ bool OatFileAssistant::Lock(std::string* error_msg) { CHECK(error_msg != nullptr); CHECK(!flock_.HasFile()) << "OatFileAssistant::Lock already acquired"; - const std::string* oat_file_name = oat_.Filename(); - if (oat_file_name == nullptr) { - *error_msg = "Failed to determine lock file"; - return false; - } - std::string lock_file_name = *oat_file_name + ".flock"; + // Note the lock will only succeed for secondary dex files and in test + // environment. + // + // The lock *will fail* for all primary apks in a production environment. + // The app does not have permissions to create locks next to its dex location + // (be it system, data or vendor parition). We also cannot use the odex or + // oat location for the same reasoning. + // + // This is best effort and if it fails it's unlikely that we will be able + // to generate oat files anyway. + std::string lock_file_name = dex_location_ + "." + GetInstructionSetString(isa_) + ".flock"; if (!flock_.Init(lock_file_name.c_str(), error_msg)) { unlink(lock_file_name.c_str()); @@ -170,7 +202,7 @@ static bool GetRuntimeCompilerFilterOption(CompilerFilter::Filter* filter, CHECK(filter != nullptr); CHECK(error_msg != nullptr); - *filter = CompilerFilter::kDefaultCompilerFilter; + *filter = OatFileAssistant::kDefaultCompilerFilterForDexLoading; for (StringPiece option : Runtime::Current()->GetCompilerOptions()) { if (option.starts_with("--compiler-filter=")) { const char* compiler_filter_string = option.substr(strlen("--compiler-filter=")).data(); @@ -207,7 +239,7 @@ OatFileAssistant::MakeUpToDate(bool profile_changed, std::string* error_msg) { case kDex2OatForBootImage: case kDex2OatForRelocation: case kDex2OatForFilter: - return GenerateOatFile(error_msg); + return GenerateOatFileNoChecks(info, error_msg); } UNREACHABLE(); } @@ -439,7 +471,7 @@ OatFileAssistant::OatStatus OatFileAssistant::GivenOatFileStatus(const OatFile& VLOG(oat) << "Image checksum test skipped for compiler filter " << current_compiler_filter; } - if (CompilerFilter::IsBytecodeCompilationEnabled(current_compiler_filter)) { + if (CompilerFilter::IsAotCompilationEnabled(current_compiler_filter)) { if (!file.IsPic()) { const ImageInfo* image_info = GetImageInfo(); if (image_info == nullptr) { @@ -479,8 +511,110 @@ OatFileAssistant::OatStatus OatFileAssistant::GivenOatFileStatus(const OatFile& return kOatUpToDate; } -OatFileAssistant::ResultOfAttemptToUpdate -OatFileAssistant::GenerateOatFile(std::string* error_msg) { +static bool DexLocationToOdexNames(const std::string& location, + InstructionSet isa, + std::string* odex_filename, + std::string* oat_dir, + std::string* isa_dir, + std::string* error_msg) { + CHECK(odex_filename != nullptr); + CHECK(error_msg != nullptr); + + // The odex file name is formed by replacing the dex_location extension with + // .odex and inserting an oat/<isa> directory. For example: + // location = /foo/bar/baz.jar + // odex_location = /foo/bar/oat/<isa>/baz.odex + + // Find the directory portion of the dex location and add the oat/<isa> + // directory. + size_t pos = location.rfind('/'); + if (pos == std::string::npos) { + *error_msg = "Dex location " + location + " has no directory."; + return false; + } + std::string dir = location.substr(0, pos+1); + // Add the oat directory. + dir += "oat"; + if (oat_dir != nullptr) { + *oat_dir = dir; + } + // Add the isa directory + dir += "/" + std::string(GetInstructionSetString(isa)); + if (isa_dir != nullptr) { + *isa_dir = dir; + } + + // Get the base part of the file without the extension. + std::string file = location.substr(pos+1); + pos = file.rfind('.'); + if (pos == std::string::npos) { + *error_msg = "Dex location " + location + " has no extension."; + return false; + } + std::string base = file.substr(0, pos); + + *odex_filename = dir + "/" + base + ".odex"; + return true; +} + +// Prepare a subcomponent of the odex directory. +// (i.e. create and set the expected permissions on the path `dir`). +static bool PrepareDirectory(const std::string& dir, std::string* error_msg) { + struct stat dir_stat; + if (TEMP_FAILURE_RETRY(stat(dir.c_str(), &dir_stat)) == 0) { + // The directory exists. Check if it is indeed a directory. + if (!S_ISDIR(dir_stat.st_mode)) { + *error_msg = dir + " is not a dir"; + return false; + } else { + // The dir is already on disk. + return true; + } + } + + // Failed to stat. We need to create the directory. + if (errno != ENOENT) { + *error_msg = "Could not stat isa dir " + dir + ":" + strerror(errno); + return false; + } + + mode_t mode = S_IRWXU | S_IXGRP | S_IXOTH; + if (mkdir(dir.c_str(), mode) != 0) { + *error_msg = "Could not create dir " + dir + ":" + strerror(errno); + return false; + } + if (chmod(dir.c_str(), mode) != 0) { + *error_msg = "Could not create the oat dir " + dir + ":" + strerror(errno); + return false; + } + return true; +} + +// Prepares the odex directory for the given dex location. +static bool PrepareOdexDirectories(const std::string& dex_location, + const std::string& expected_odex_location, + InstructionSet isa, + std::string* error_msg) { + std::string actual_odex_location; + std::string oat_dir; + std::string isa_dir; + if (!DexLocationToOdexNames( + dex_location, isa, &actual_odex_location, &oat_dir, &isa_dir, error_msg)) { + return false; + } + DCHECK_EQ(expected_odex_location, actual_odex_location); + + if (!PrepareDirectory(oat_dir, error_msg)) { + return false; + } + if (!PrepareDirectory(isa_dir, error_msg)) { + return false; + } + return true; +} + +OatFileAssistant::ResultOfAttemptToUpdate OatFileAssistant::GenerateOatFileNoChecks( + OatFileAssistant::OatFileInfo& info, std::string* error_msg) { CHECK(error_msg != nullptr); Runtime* runtime = Runtime::Current(); @@ -490,22 +624,37 @@ OatFileAssistant::GenerateOatFile(std::string* error_msg) { return kUpdateNotAttempted; } - if (oat_.Filename() == nullptr) { + if (info.Filename() == nullptr) { *error_msg = "Generation of oat file for dex location " + dex_location_ + " not attempted because the oat file name could not be determined."; return kUpdateNotAttempted; } - const std::string& oat_file_name = *oat_.Filename(); + const std::string& oat_file_name = *info.Filename(); const std::string& vdex_file_name = ReplaceFileExtension(oat_file_name, "vdex"); // dex2oat ignores missing dex files and doesn't report an error. // Check explicitly here so we can detect the error properly. // TODO: Why does dex2oat behave that way? - if (!OS::FileExists(dex_location_.c_str())) { - *error_msg = "Dex location " + dex_location_ + " does not exists."; + struct stat dex_path_stat; + if (TEMP_FAILURE_RETRY(stat(dex_location_.c_str(), &dex_path_stat)) != 0) { + *error_msg = "Could not access dex location " + dex_location_ + ":" + strerror(errno); return kUpdateNotAttempted; } + // If this is the odex location, we need to create the odex file layout (../oat/isa/..) + if (!info.IsOatLocation()) { + if (!PrepareOdexDirectories(dex_location_, oat_file_name, isa_, error_msg)) { + return kUpdateNotAttempted; + } + } + + // Set the permissions for the oat and the vdex files. + // The user always gets read and write while the group and others propagate + // the reading access of the original dex file. + mode_t file_mode = S_IRUSR | S_IWUSR | + (dex_path_stat.st_mode & S_IRGRP) | + (dex_path_stat.st_mode & S_IROTH); + std::unique_ptr<File> vdex_file(OS::CreateEmptyFile(vdex_file_name.c_str())); if (vdex_file.get() == nullptr) { *error_msg = "Generation of oat file " + oat_file_name @@ -514,7 +663,7 @@ OatFileAssistant::GenerateOatFile(std::string* error_msg) { return kUpdateNotAttempted; } - if (fchmod(vdex_file->Fd(), 0644) != 0) { + if (fchmod(vdex_file->Fd(), file_mode) != 0) { *error_msg = "Generation of oat file " + oat_file_name + " not attempted because the vdex file " + vdex_file_name + " could not be made world readable."; @@ -528,7 +677,7 @@ OatFileAssistant::GenerateOatFile(std::string* error_msg) { return kUpdateNotAttempted; } - if (fchmod(oat_file->Fd(), 0644) != 0) { + if (fchmod(oat_file->Fd(), file_mode) != 0) { *error_msg = "Generation of oat file " + oat_file_name + " not attempted because the oat file could not be made world readable."; oat_file->Erase(); @@ -563,8 +712,8 @@ OatFileAssistant::GenerateOatFile(std::string* error_msg) { return kUpdateFailed; } - // Mark that the oat file has changed and we should try to reload. - oat_.Reset(); + // Mark that the odex file has changed and we should try to reload. + info.Reset(); return kUpdateSucceeded; } @@ -623,35 +772,7 @@ bool OatFileAssistant::DexLocationToOdexFilename(const std::string& location, InstructionSet isa, std::string* odex_filename, std::string* error_msg) { - CHECK(odex_filename != nullptr); - CHECK(error_msg != nullptr); - - // The odex file name is formed by replacing the dex_location extension with - // .odex and inserting an oat/<isa> directory. For example: - // location = /foo/bar/baz.jar - // odex_location = /foo/bar/oat/<isa>/baz.odex - - // Find the directory portion of the dex location and add the oat/<isa> - // directory. - size_t pos = location.rfind('/'); - if (pos == std::string::npos) { - *error_msg = "Dex location " + location + " has no directory."; - return false; - } - std::string dir = location.substr(0, pos+1); - dir += "oat/" + std::string(GetInstructionSetString(isa)); - - // Get the base part of the file without the extension. - std::string file = location.substr(pos+1); - pos = file.rfind('.'); - if (pos == std::string::npos) { - *error_msg = "Dex location " + location + " has no extension."; - return false; - } - std::string base = file.substr(0, pos); - - *odex_filename = dir + "/" + base + ".odex"; - return true; + return DexLocationToOdexNames(location, isa, odex_filename, nullptr, nullptr, error_msg); } bool OatFileAssistant::DexLocationToOatFilename(const std::string& location, @@ -752,8 +873,45 @@ const OatFileAssistant::ImageInfo* OatFileAssistant::GetImageInfo() { } OatFileAssistant::OatFileInfo& OatFileAssistant::GetBestInfo() { - bool use_oat = oat_.IsUseable() || odex_.Status() == kOatCannotOpen; - return use_oat ? oat_ : odex_; + // TODO(calin): Document the side effects of class loading when + // running dalvikvm command line. + if (dex_parent_writable_) { + // If the parent of the dex file is writable it means that we can + // create the odex file. In this case we unconditionally pick the odex + // as the best oat file. This corresponds to the regular use case when + // apps gets installed or when they load private, secondary dex file. + // For apps on the system partition the odex location will not be + // writable and thus the oat location might be more up to date. + return odex_; + } + + // We cannot write to the odex location. This must be a system app. + + // If the oat location is usable take it. + if (oat_.IsUseable()) { + return oat_; + } + + // The oat file is not usable but the odex file might be up to date. + // This is an indication that we are dealing with an up to date prebuilt + // (that doesn't need relocation). + if (odex_.Status() == kOatUpToDate) { + return odex_; + } + + // The oat file is not usable and the odex file is not up to date. + // However we have access to the original dex file which means we can make + // the oat location up to date. + if (HasOriginalDexFiles()) { + return oat_; + } + + // We got into the worst situation here: + // - the oat location is not usable + // - the prebuild odex location is not up to date + // - and we don't have the original dex file anymore (stripped). + // Pick the odex if it exists, or the oat if not. + return (odex_.Status() == kOatCannotOpen) ? oat_ : odex_; } std::unique_ptr<gc::space::ImageSpace> OatFileAssistant::OpenImageSpace(const OatFile* oat_file) { @@ -808,6 +966,7 @@ OatFileAssistant::OatStatus OatFileAssistant::OatFileInfo::Status() { std::unique_ptr<VdexFile> vdex = VdexFile::Open(vdex_filename, /*writeable*/false, /*low_4gb*/false, + /*unquicken*/false, &error_msg); if (vdex == nullptr) { status_ = kOatCannotOpen; @@ -834,7 +993,7 @@ OatFileAssistant::OatStatus OatFileAssistant::OatFileInfo::Status() { OatFileAssistant::DexOptNeeded OatFileAssistant::OatFileInfo::GetDexOptNeeded( CompilerFilter::Filter target, bool profile_changed) { - bool compilation_desired = CompilerFilter::IsBytecodeCompilationEnabled(target); + bool compilation_desired = CompilerFilter::IsAotCompilationEnabled(target); bool filter_okay = CompilerFilterIsOkay(target, profile_changed); if (filter_okay && Status() == kOatUpToDate) { @@ -848,24 +1007,24 @@ OatFileAssistant::DexOptNeeded OatFileAssistant::OatFileInfo::GetDexOptNeeded( return kNoDexOptNeeded; } - if (oat_file_assistant_->HasOriginalDexFiles()) { - if (filter_okay && Status() == kOatRelocationOutOfDate) { - return kDex2OatForRelocation; - } + if (filter_okay && Status() == kOatRelocationOutOfDate) { + return kDex2OatForRelocation; + } - if (IsUseable()) { - return kDex2OatForFilter; - } + if (IsUseable()) { + return kDex2OatForFilter; + } - if (Status() == kOatBootImageOutOfDate) { - return kDex2OatForBootImage; - } + if (Status() == kOatBootImageOutOfDate) { + return kDex2OatForBootImage; + } + if (oat_file_assistant_->HasOriginalDexFiles()) { return kDex2OatFromScratch; + } else { + // Otherwise there is nothing we can do, even if we want to. + return kNoDexOptNeeded; } - - // Otherwise there is nothing we can do, even if we want to. - return kNoDexOptNeeded; } const OatFile* OatFileAssistant::OatFileInfo::GetFile() { diff --git a/runtime/oat_file_assistant.h b/runtime/oat_file_assistant.h index b84e711daa..7e2385ec6c 100644 --- a/runtime/oat_file_assistant.h +++ b/runtime/oat_file_assistant.h @@ -47,6 +47,11 @@ class ImageSpace; // dex location is in the boot class path. class OatFileAssistant { public: + // The default compile filter to use when optimizing dex file at load time if they + // are out of date. + static const CompilerFilter::Filter kDefaultCompilerFilterForDexLoading = + CompilerFilter::kQuicken; + enum DexOptNeeded { // No dexopt should (or can) be done to update the apk/jar. // Matches Java: dalvik.system.DexFile.NO_DEXOPT_NEEDED = 0 @@ -117,13 +122,6 @@ class OatFileAssistant { const InstructionSet isa, bool load_executable); - // Constructs an OatFileAssistant, providing an explicit target oat_location - // to use instead of the standard oat location. - OatFileAssistant(const char* dex_location, - const char* oat_location, - const InstructionSet isa, - bool load_executable); - ~OatFileAssistant(); // Returns true if the dex location refers to an element of the boot class @@ -232,16 +230,6 @@ class OatFileAssistant { // Returns the status of the oat file for the dex location. OatStatus OatFileStatus(); - // Generate the oat file from the dex file using the current runtime - // compiler options. - // This does not check the current status before attempting to generate the - // oat file. - // - // If the result is not kUpdateSucceeded, the value of error_msg will be set - // to a string describing why there was a failure or the update was not - // attempted. error_msg must not be null. - ResultOfAttemptToUpdate GenerateOatFile(std::string* error_msg); - // Executes dex2oat using the current runtime configuration overridden with // the given arguments. This does not check to see if dex2oat is enabled in // the runtime configuration. @@ -377,6 +365,16 @@ class OatFileAssistant { bool file_released_ = false; }; + // Generate the oat file for the given info from the dex file using the + // current runtime compiler options. + // This does not check the current status before attempting to generate the + // oat file. + // + // If the result is not kUpdateSucceeded, the value of error_msg will be set + // to a string describing why there was a failure or the update was not + // attempted. error_msg must not be null. + ResultOfAttemptToUpdate GenerateOatFileNoChecks(OatFileInfo& info, std::string* error_msg); + // Return info for the best oat file. OatFileInfo& GetBestInfo(); @@ -422,6 +420,9 @@ class OatFileAssistant { std::string dex_location_; + // Whether or not the parent directory of the dex file is writable. + bool dex_parent_writable_ = false; + // In a properly constructed OatFileAssistant object, isa_ should be either // the 32 or 64 bit variant for the current device. const InstructionSet isa_ = kNone; @@ -446,6 +447,8 @@ class OatFileAssistant { bool image_info_load_attempted_ = false; std::unique_ptr<ImageInfo> cached_image_info_; + friend class OatFileAssistantTest; + DISALLOW_COPY_AND_ASSIGN(OatFileAssistant); }; diff --git a/runtime/oat_file_assistant_test.cc b/runtime/oat_file_assistant_test.cc index 4a738ab0a3..b2b86ee289 100644 --- a/runtime/oat_file_assistant_test.cc +++ b/runtime/oat_file_assistant_test.cc @@ -43,6 +43,38 @@ class OatFileAssistantNoDex2OatTest : public DexoptTest { } }; +class ScopedNonWritable { + public: + explicit ScopedNonWritable(const std::string& dex_location) { + is_valid_ = false; + size_t pos = dex_location.rfind('/'); + if (pos != std::string::npos) { + is_valid_ = true; + dex_parent_ = dex_location.substr(0, pos); + if (chmod(dex_parent_.c_str(), 0555) != 0) { + PLOG(ERROR) << "Could not change permissions on " << dex_parent_; + } + } + } + + bool IsSuccessful() { return is_valid_ && (access(dex_parent_.c_str(), W_OK) != 0); } + + ~ScopedNonWritable() { + if (is_valid_) { + if (chmod(dex_parent_.c_str(), 0777) != 0) { + PLOG(ERROR) << "Could not restore permissions on " << dex_parent_; + } + } + } + + private: + std::string dex_parent_; + bool is_valid_; +}; + +static bool IsExecutedAsRoot() { + return geteuid() == 0; +} // Case: We have a DEX file, but no OAT file for it. // Expect: The status is kDex2OatNeeded. @@ -53,9 +85,9 @@ TEST_F(OatFileAssistantTest, DexNoOat) { OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false); EXPECT_EQ(OatFileAssistant::kDex2OatFromScratch, - oat_file_assistant.GetDexOptNeeded(CompilerFilter::kVerifyAtRuntime)); + oat_file_assistant.GetDexOptNeeded(CompilerFilter::kExtract)); EXPECT_EQ(OatFileAssistant::kDex2OatFromScratch, - oat_file_assistant.GetDexOptNeeded(CompilerFilter::kInterpretOnly)); + oat_file_assistant.GetDexOptNeeded(CompilerFilter::kQuicken)); EXPECT_EQ(OatFileAssistant::kDex2OatFromScratch, oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeedProfile)); EXPECT_EQ(OatFileAssistant::kDex2OatFromScratch, @@ -87,21 +119,134 @@ TEST_F(OatFileAssistantTest, NoDexNoOat) { EXPECT_EQ(nullptr, oat_file.get()); } +// Case: We have a DEX file and a PIC ODEX file, but no OAT file. +// Expect: The status is kNoDexOptNeeded, because PIC needs no relocation. +TEST_F(OatFileAssistantTest, OdexUpToDate) { + std::string dex_location = GetScratchDir() + "/OdexUpToDate.jar"; + std::string odex_location = GetOdexDir() + "/OdexUpToDate.odex"; + Copy(GetDexSrc1(), dex_location); + GeneratePicOdexForTest(dex_location, odex_location, CompilerFilter::kSpeed); + + // For the use of oat location by making the dex parent not writable. + OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false); + + EXPECT_EQ(-OatFileAssistant::kNoDexOptNeeded, + oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed)); + EXPECT_EQ(-OatFileAssistant::kNoDexOptNeeded, + oat_file_assistant.GetDexOptNeeded(CompilerFilter::kQuicken)); + EXPECT_EQ(-OatFileAssistant::kNoDexOptNeeded, + oat_file_assistant.GetDexOptNeeded(CompilerFilter::kExtract)); + EXPECT_EQ(-OatFileAssistant::kDex2OatForFilter, + oat_file_assistant.GetDexOptNeeded(CompilerFilter::kEverything)); + + EXPECT_FALSE(oat_file_assistant.IsInBootClassPath()); + EXPECT_EQ(OatFileAssistant::kOatUpToDate, oat_file_assistant.OdexFileStatus()); + EXPECT_EQ(OatFileAssistant::kOatCannotOpen, oat_file_assistant.OatFileStatus()); + EXPECT_TRUE(oat_file_assistant.HasOriginalDexFiles()); +} + +// Case: We have a DEX file and a PIC ODEX file, but no OAT file. We load the dex +// file via a symlink. +// Expect: The status is kNoDexOptNeeded, because PIC needs no relocation. +TEST_F(OatFileAssistantTest, OdexUpToDateSymLink) { + std::string scratch_dir = GetScratchDir(); + std::string dex_location = GetScratchDir() + "/OdexUpToDate.jar"; + std::string odex_location = GetOdexDir() + "/OdexUpToDate.odex"; + + Copy(GetDexSrc1(), dex_location); + GeneratePicOdexForTest(dex_location, odex_location, CompilerFilter::kSpeed); + + // Now replace the dex location with a symlink. + std::string link = scratch_dir + "/link"; + ASSERT_EQ(0, symlink(scratch_dir.c_str(), link.c_str())); + dex_location = link + "/OdexUpToDate.jar"; + + OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false); + + EXPECT_EQ(-OatFileAssistant::kNoDexOptNeeded, + oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed)); + EXPECT_EQ(-OatFileAssistant::kNoDexOptNeeded, + oat_file_assistant.GetDexOptNeeded(CompilerFilter::kQuicken)); + EXPECT_EQ(-OatFileAssistant::kNoDexOptNeeded, + oat_file_assistant.GetDexOptNeeded(CompilerFilter::kExtract)); + EXPECT_EQ(-OatFileAssistant::kDex2OatForFilter, + oat_file_assistant.GetDexOptNeeded(CompilerFilter::kEverything)); + + EXPECT_FALSE(oat_file_assistant.IsInBootClassPath()); + EXPECT_EQ(OatFileAssistant::kOatUpToDate, oat_file_assistant.OdexFileStatus()); + EXPECT_EQ(OatFileAssistant::kOatCannotOpen, oat_file_assistant.OatFileStatus()); + EXPECT_TRUE(oat_file_assistant.HasOriginalDexFiles()); +} + // Case: We have a DEX file and up-to-date OAT file for it. // Expect: The status is kNoDexOptNeeded. TEST_F(OatFileAssistantTest, OatUpToDate) { + if (IsExecutedAsRoot()) { + // We cannot simulate non writable locations when executed as root: b/38000545. + LOG(ERROR) << "Test skipped because it's running as root"; + return; + } + std::string dex_location = GetScratchDir() + "/OatUpToDate.jar"; Copy(GetDexSrc1(), dex_location); GenerateOatForTest(dex_location.c_str(), CompilerFilter::kSpeed); + // For the use of oat location by making the dex parent not writable. + ScopedNonWritable scoped_non_writable(dex_location); + ASSERT_TRUE(scoped_non_writable.IsSuccessful()); + OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false); EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded, oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed)); EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded, - oat_file_assistant.GetDexOptNeeded(CompilerFilter::kInterpretOnly)); + oat_file_assistant.GetDexOptNeeded(CompilerFilter::kQuicken)); EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded, - oat_file_assistant.GetDexOptNeeded(CompilerFilter::kVerifyAtRuntime)); + oat_file_assistant.GetDexOptNeeded(CompilerFilter::kExtract)); + EXPECT_EQ(OatFileAssistant::kDex2OatForFilter, + oat_file_assistant.GetDexOptNeeded(CompilerFilter::kEverything)); + + EXPECT_FALSE(oat_file_assistant.IsInBootClassPath()); + EXPECT_EQ(OatFileAssistant::kOatCannotOpen, oat_file_assistant.OdexFileStatus()); + EXPECT_EQ(OatFileAssistant::kOatUpToDate, oat_file_assistant.OatFileStatus()); + EXPECT_TRUE(oat_file_assistant.HasOriginalDexFiles()); +} + +// Case: We have a DEX file and up-to-date OAT file for it. We load the dex file +// via a symlink. +// Expect: The status is kNoDexOptNeeded. +TEST_F(OatFileAssistantTest, OatUpToDateSymLink) { + if (IsExecutedAsRoot()) { + // We cannot simulate non writable locations when executed as root: b/38000545. + LOG(ERROR) << "Test skipped because it's running as root"; + return; + } + + std::string real = GetScratchDir() + "/real"; + ASSERT_EQ(0, mkdir(real.c_str(), 0700)); + std::string link = GetScratchDir() + "/link"; + ASSERT_EQ(0, symlink(real.c_str(), link.c_str())); + + std::string dex_location = real + "/OatUpToDate.jar"; + + Copy(GetDexSrc1(), dex_location); + GenerateOatForTest(dex_location.c_str(), CompilerFilter::kSpeed); + + // Update the dex location to point to the symlink. + dex_location = link + "/OatUpToDate.jar"; + + // For the use of oat location by making the dex parent not writable. + ScopedNonWritable scoped_non_writable(dex_location); + ASSERT_TRUE(scoped_non_writable.IsSuccessful()); + + OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false); + + EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded, + oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed)); + EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded, + oat_file_assistant.GetDexOptNeeded(CompilerFilter::kQuicken)); + EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded, + oat_file_assistant.GetDexOptNeeded(CompilerFilter::kExtract)); EXPECT_EQ(OatFileAssistant::kDex2OatForFilter, oat_file_assistant.GetDexOptNeeded(CompilerFilter::kEverything)); @@ -120,19 +265,16 @@ TEST_F(OatFileAssistantTest, VdexUpToDateNoOdex) { } std::string dex_location = GetScratchDir() + "/VdexUpToDateNoOdex.jar"; - std::string oat_location = GetOdexDir() + "/VdexUpToDateNoOdex.oat"; + std::string odex_location = GetOdexDir() + "/VdexUpToDateNoOdex.oat"; Copy(GetDexSrc1(), dex_location); // Generating and deleting the oat file should have the side effect of // creating an up-to-date vdex file. - GenerateOdexForTest(dex_location, oat_location, CompilerFilter::kSpeed); - ASSERT_EQ(0, unlink(oat_location.c_str())); + GenerateOdexForTest(dex_location, odex_location, CompilerFilter::kSpeed); + ASSERT_EQ(0, unlink(odex_location.c_str())); - OatFileAssistant oat_file_assistant(dex_location.c_str(), - oat_location.c_str(), - kRuntimeISA, - false); + OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false); // Even though the vdex file is up to date, because we don't have the oat // file, we can't know that the vdex depends on the boot image and is up to @@ -169,6 +311,11 @@ TEST_F(OatFileAssistantTest, VdexUpToDateNoOat) { if (!kIsVdexEnabled) { return; } + if (IsExecutedAsRoot()) { + // We cannot simulate non writable locations when executed as root: b/38000545. + LOG(ERROR) << "Test skipped because it's running as root"; + return; + } std::string dex_location = GetScratchDir() + "/VdexUpToDateNoOat.jar"; std::string oat_location; @@ -180,6 +327,8 @@ TEST_F(OatFileAssistantTest, VdexUpToDateNoOat) { GenerateOatForTest(dex_location.c_str(), CompilerFilter::kSpeed); ASSERT_EQ(0, unlink(oat_location.c_str())); + ScopedNonWritable scoped_non_writable(dex_location); + ASSERT_TRUE(scoped_non_writable.IsSuccessful()); OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false); // Even though the vdex file is up to date, because we don't have the oat @@ -195,20 +344,29 @@ TEST_F(OatFileAssistantTest, VdexUpToDateNoOat) { // Expect: The status is kNoDexOptNeeded if the profile hasn't changed, but // kDex2Oat if the profile has changed. TEST_F(OatFileAssistantTest, ProfileOatUpToDate) { + if (IsExecutedAsRoot()) { + // We cannot simulate non writable locations when executed as root: b/38000545. + LOG(ERROR) << "Test skipped because it's running as root"; + return; + } + std::string dex_location = GetScratchDir() + "/ProfileOatUpToDate.jar"; Copy(GetDexSrc1(), dex_location); GenerateOatForTest(dex_location.c_str(), CompilerFilter::kSpeedProfile); + ScopedNonWritable scoped_non_writable(dex_location); + ASSERT_TRUE(scoped_non_writable.IsSuccessful()); + OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false); EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded, oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeedProfile, false)); EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded, - oat_file_assistant.GetDexOptNeeded(CompilerFilter::kInterpretOnly, false)); + oat_file_assistant.GetDexOptNeeded(CompilerFilter::kQuicken, false)); EXPECT_EQ(OatFileAssistant::kDex2OatForFilter, oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeedProfile, true)); EXPECT_EQ(OatFileAssistant::kDex2OatForFilter, - oat_file_assistant.GetDexOptNeeded(CompilerFilter::kInterpretOnly, true)); + oat_file_assistant.GetDexOptNeeded(CompilerFilter::kQuicken, true)); EXPECT_FALSE(oat_file_assistant.IsInBootClassPath()); EXPECT_EQ(OatFileAssistant::kOatCannotOpen, oat_file_assistant.OdexFileStatus()); @@ -219,10 +377,19 @@ TEST_F(OatFileAssistantTest, ProfileOatUpToDate) { // Case: We have a MultiDEX file and up-to-date OAT file for it. // Expect: The status is kNoDexOptNeeded and we load all dex files. TEST_F(OatFileAssistantTest, MultiDexOatUpToDate) { + if (IsExecutedAsRoot()) { + // We cannot simulate non writable locations when executed as root: b/38000545. + LOG(ERROR) << "Test skipped because it's running as root"; + return; + } + std::string dex_location = GetScratchDir() + "/MultiDexOatUpToDate.jar"; Copy(GetMultiDexSrc1(), dex_location); GenerateOatForTest(dex_location.c_str(), CompilerFilter::kSpeed); + ScopedNonWritable scoped_non_writable(dex_location); + ASSERT_TRUE(scoped_non_writable.IsSuccessful()); + OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true); EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded, oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed, false)); @@ -240,6 +407,12 @@ TEST_F(OatFileAssistantTest, MultiDexOatUpToDate) { // Case: We have a MultiDEX file where the non-main multdex entry is out of date. // Expect: The status is kDex2OatNeeded. TEST_F(OatFileAssistantTest, MultiDexNonMainOutOfDate) { + if (IsExecutedAsRoot()) { + // We cannot simulate non writable locations when executed as root: b/38000545. + LOG(ERROR) << "Test skipped because it's running as root"; + return; + } + std::string dex_location = GetScratchDir() + "/MultiDexNonMainOutOfDate.jar"; // Compile code for GetMultiDexSrc1. @@ -250,6 +423,9 @@ TEST_F(OatFileAssistantTest, MultiDexNonMainOutOfDate) { // is out of date. Copy(GetMultiDexSrc2(), dex_location); + ScopedNonWritable scoped_non_writable(dex_location); + ASSERT_TRUE(scoped_non_writable.IsSuccessful()); + OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true); EXPECT_EQ(OatFileAssistant::kDex2OatFromScratch, oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed, false)); @@ -269,7 +445,7 @@ TEST_F(OatFileAssistantTest, StrippedMultiDexNonMainOutOfDate) { // Compile the odex from GetMultiDexSrc2, which has a different non-main // dex checksum. Copy(GetMultiDexSrc2(), dex_location); - GenerateOdexForTest(dex_location, odex_location, CompilerFilter::kInterpretOnly); + GenerateOdexForTest(dex_location, odex_location, CompilerFilter::kQuicken); // Strip the dex file. Copy(GetStrippedDexSrc1(), dex_location); @@ -287,12 +463,12 @@ TEST_F(OatFileAssistantTest, StrippedMultiDexNonMainOutOfDate) { EXPECT_EQ(OatFileAssistant::kOatDexOutOfDate, oat_file_assistant.OatFileStatus()); } -// Case: We have a MultiDEX file and up-to-date OAT file for it with relative +// Case: We have a MultiDEX file and up-to-date ODEX file for it with relative // encoded dex locations. // Expect: The oat file status is kNoDexOptNeeded. TEST_F(OatFileAssistantTest, RelativeEncodedDexLocation) { std::string dex_location = GetScratchDir() + "/RelativeEncodedDexLocation.jar"; - std::string oat_location = GetOdexDir() + "/RelativeEncodedDexLocation.oat"; + std::string odex_location = GetOdexDir() + "/RelativeEncodedDexLocation.odex"; // Create the dex file Copy(GetMultiDexSrc1(), dex_location); @@ -301,16 +477,15 @@ TEST_F(OatFileAssistantTest, RelativeEncodedDexLocation) { std::vector<std::string> args; args.push_back("--dex-file=" + dex_location); args.push_back("--dex-location=" + std::string("RelativeEncodedDexLocation.jar")); - args.push_back("--oat-file=" + oat_location); + args.push_back("--oat-file=" + odex_location); args.push_back("--compiler-filter=speed"); std::string error_msg; ASSERT_TRUE(OatFileAssistant::Dex2Oat(args, &error_msg)) << error_msg; // Verify we can load both dex files. - OatFileAssistant oat_file_assistant(dex_location.c_str(), - oat_location.c_str(), - kRuntimeISA, true); + OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true); + std::unique_ptr<OatFile> oat_file = oat_file_assistant.GetBestOatFile(); ASSERT_TRUE(oat_file.get() != nullptr); EXPECT_TRUE(oat_file->IsExecutable()); @@ -322,6 +497,12 @@ TEST_F(OatFileAssistantTest, RelativeEncodedDexLocation) { // Case: We have a DEX file and an OAT file out of date with respect to the // dex checksum. TEST_F(OatFileAssistantTest, OatDexOutOfDate) { + if (IsExecutedAsRoot()) { + // We cannot simulate non writable locations when executed as root: b/38000545. + LOG(ERROR) << "Test skipped because it's running as root"; + return; + } + std::string dex_location = GetScratchDir() + "/OatDexOutOfDate.jar"; // We create a dex, generate an oat for it, then overwrite the dex with a @@ -330,9 +511,12 @@ TEST_F(OatFileAssistantTest, OatDexOutOfDate) { GenerateOatForTest(dex_location.c_str(), CompilerFilter::kSpeed); Copy(GetDexSrc2(), dex_location); + ScopedNonWritable scoped_non_writable(dex_location); + ASSERT_TRUE(scoped_non_writable.IsSuccessful()); + OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false); EXPECT_EQ(OatFileAssistant::kDex2OatFromScratch, - oat_file_assistant.GetDexOptNeeded(CompilerFilter::kVerifyAtRuntime)); + oat_file_assistant.GetDexOptNeeded(CompilerFilter::kExtract)); EXPECT_EQ(OatFileAssistant::kDex2OatFromScratch, oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed)); @@ -351,17 +535,14 @@ TEST_F(OatFileAssistantTest, VdexDexOutOfDate) { } std::string dex_location = GetScratchDir() + "/VdexDexOutOfDate.jar"; - std::string oat_location = GetOdexDir() + "/VdexDexOutOfDate.oat"; + std::string odex_location = GetOdexDir() + "/VdexDexOutOfDate.oat"; Copy(GetDexSrc1(), dex_location); - GenerateOdexForTest(dex_location, oat_location, CompilerFilter::kSpeed); - ASSERT_EQ(0, unlink(oat_location.c_str())); + GenerateOdexForTest(dex_location, odex_location, CompilerFilter::kSpeed); + ASSERT_EQ(0, unlink(odex_location.c_str())); Copy(GetDexSrc2(), dex_location); - OatFileAssistant oat_file_assistant(dex_location.c_str(), - oat_location.c_str(), - kRuntimeISA, - false); + OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false); EXPECT_EQ(OatFileAssistant::kDex2OatFromScratch, oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed)); @@ -376,17 +557,14 @@ TEST_F(OatFileAssistantTest, VdexMultiDexNonMainOutOfDate) { } std::string dex_location = GetScratchDir() + "/VdexMultiDexNonMainOutOfDate.jar"; - std::string oat_location = GetOdexDir() + "/VdexMultiDexNonMainOutOfDate.oat"; + std::string odex_location = GetOdexDir() + "/VdexMultiDexNonMainOutOfDate.odex"; Copy(GetMultiDexSrc1(), dex_location); - GenerateOdexForTest(dex_location, oat_location, CompilerFilter::kSpeed); - ASSERT_EQ(0, unlink(oat_location.c_str())); + GenerateOdexForTest(dex_location, odex_location, CompilerFilter::kSpeed); + ASSERT_EQ(0, unlink(odex_location.c_str())); Copy(GetMultiDexSrc2(), dex_location); - OatFileAssistant oat_file_assistant(dex_location.c_str(), - oat_location.c_str(), - kRuntimeISA, - false); + OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false); EXPECT_EQ(OatFileAssistant::kDex2OatFromScratch, oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed)); @@ -395,6 +573,12 @@ TEST_F(OatFileAssistantTest, VdexMultiDexNonMainOutOfDate) { // Case: We have a DEX file and an OAT file out of date with respect to the // boot image. TEST_F(OatFileAssistantTest, OatImageOutOfDate) { + if (IsExecutedAsRoot()) { + // We cannot simulate non writable locations when executed as root: b/38000545. + LOG(ERROR) << "Test skipped because it's running as root"; + return; + } + std::string dex_location = GetScratchDir() + "/OatImageOutOfDate.jar"; Copy(GetDexSrc1(), dex_location); @@ -404,11 +588,14 @@ TEST_F(OatFileAssistantTest, OatImageOutOfDate) { /*pic*/false, /*with_alternate_image*/true); + ScopedNonWritable scoped_non_writable(dex_location); + ASSERT_TRUE(scoped_non_writable.IsSuccessful()); + OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false); EXPECT_EQ(OatFileAssistant::kDex2OatForBootImage, - oat_file_assistant.GetDexOptNeeded(CompilerFilter::kVerifyAtRuntime)); + oat_file_assistant.GetDexOptNeeded(CompilerFilter::kExtract)); EXPECT_EQ(OatFileAssistant::kDex2OatForBootImage, - oat_file_assistant.GetDexOptNeeded(CompilerFilter::kInterpretOnly)); + oat_file_assistant.GetDexOptNeeded(CompilerFilter::kQuicken)); EXPECT_EQ(OatFileAssistant::kDex2OatForBootImage, oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed)); @@ -423,20 +610,29 @@ TEST_F(OatFileAssistantTest, OatImageOutOfDate) { // It shouldn't matter that the OAT file is out of date, because it is // verify-at-runtime. TEST_F(OatFileAssistantTest, OatVerifyAtRuntimeImageOutOfDate) { + if (IsExecutedAsRoot()) { + // We cannot simulate non writable locations when executed as root: b/38000545. + LOG(ERROR) << "Test skipped because it's running as root"; + return; + } + std::string dex_location = GetScratchDir() + "/OatVerifyAtRuntimeImageOutOfDate.jar"; Copy(GetDexSrc1(), dex_location); GenerateOatForTest(dex_location.c_str(), - CompilerFilter::kVerifyAtRuntime, + CompilerFilter::kExtract, /*relocate*/true, /*pic*/false, /*with_alternate_image*/true); + ScopedNonWritable scoped_non_writable(dex_location); + ASSERT_TRUE(scoped_non_writable.IsSuccessful()); + OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false); EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded, - oat_file_assistant.GetDexOptNeeded(CompilerFilter::kVerifyAtRuntime)); + oat_file_assistant.GetDexOptNeeded(CompilerFilter::kExtract)); EXPECT_EQ(OatFileAssistant::kDex2OatForFilter, - oat_file_assistant.GetDexOptNeeded(CompilerFilter::kInterpretOnly)); + oat_file_assistant.GetDexOptNeeded(CompilerFilter::kQuicken)); EXPECT_FALSE(oat_file_assistant.IsInBootClassPath()); EXPECT_EQ(OatFileAssistant::kOatCannotOpen, oat_file_assistant.OdexFileStatus()); @@ -457,7 +653,7 @@ TEST_F(OatFileAssistantTest, DexOdexNoOat) { OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false); EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded, - oat_file_assistant.GetDexOptNeeded(CompilerFilter::kVerifyAtRuntime)); + oat_file_assistant.GetDexOptNeeded(CompilerFilter::kExtract)); EXPECT_EQ(-OatFileAssistant::kDex2OatForRelocation, oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed)); @@ -523,10 +719,10 @@ TEST_F(OatFileAssistantTest, StrippedDexOdexOat) { OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true); EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded, - oat_file_assistant.GetDexOptNeeded(CompilerFilter::kVerifyAtRuntime)); + oat_file_assistant.GetDexOptNeeded(CompilerFilter::kExtract)); EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded, oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed)); - EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded, // Can't run dex2oat because dex file is stripped. + EXPECT_EQ(-OatFileAssistant::kDex2OatForFilter, // Compiling from the .vdex file oat_file_assistant.GetDexOptNeeded(CompilerFilter::kEverything)); EXPECT_FALSE(oat_file_assistant.IsInBootClassPath()); @@ -556,9 +752,9 @@ TEST_F(OatFileAssistantTest, ResourceOnlyDex) { EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded, oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed)); EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded, - oat_file_assistant.GetDexOptNeeded(CompilerFilter::kVerifyAtRuntime)); + oat_file_assistant.GetDexOptNeeded(CompilerFilter::kExtract)); EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded, - oat_file_assistant.GetDexOptNeeded(CompilerFilter::kInterpretOnly)); + oat_file_assistant.GetDexOptNeeded(CompilerFilter::kQuicken)); EXPECT_FALSE(oat_file_assistant.IsInBootClassPath()); EXPECT_EQ(OatFileAssistant::kOatCannotOpen, oat_file_assistant.OdexFileStatus()); @@ -586,24 +782,23 @@ TEST_F(OatFileAssistantTest, ResourceOnlyDex) { TEST_F(OatFileAssistantTest, OdexOatOverlap) { std::string dex_location = GetScratchDir() + "/OdexOatOverlap.jar"; std::string odex_location = GetOdexDir() + "/OdexOatOverlap.odex"; - std::string oat_location = GetOdexDir() + "/OdexOatOverlap.oat"; - // Create the dex and odex files + // Create the dex, the odex and the oat files. Copy(GetDexSrc1(), dex_location); GenerateOdexForTest(dex_location, odex_location, CompilerFilter::kSpeed); - - // Create the oat file by copying the odex so they are located in the same - // place in memory. - Copy(odex_location, oat_location); + GenerateOatForTest(dex_location.c_str(), + CompilerFilter::kSpeed, + /*relocate*/false, + /*pic*/false, + /*with_alternate_image*/false); // Verify things don't go bad. - OatFileAssistant oat_file_assistant(dex_location.c_str(), - oat_location.c_str(), kRuntimeISA, true); + OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true); - // kDex2OatForRelocation is expected rather than -kDex2OatForRelocation - // based on the assumption that the oat location is more up-to-date than the odex + // -kDex2OatForRelocation is expected rather than kDex2OatForRelocation + // based on the assumption that the odex location is more up-to-date than the oat // location, even if they both need relocation. - EXPECT_EQ(OatFileAssistant::kDex2OatForRelocation, + EXPECT_EQ(-OatFileAssistant::kDex2OatForRelocation, oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed)); EXPECT_FALSE(oat_file_assistant.IsInBootClassPath()); @@ -621,30 +816,6 @@ TEST_F(OatFileAssistantTest, OdexOatOverlap) { EXPECT_EQ(1u, dex_files.size()); } -// Case: We have a DEX file and a PIC ODEX file, but no OAT file. -// Expect: The status is kNoDexOptNeeded, because PIC needs no relocation. -TEST_F(OatFileAssistantTest, DexPicOdexNoOat) { - std::string dex_location = GetScratchDir() + "/DexPicOdexNoOat.jar"; - std::string odex_location = GetOdexDir() + "/DexPicOdexNoOat.odex"; - - // Create the dex and odex files - Copy(GetDexSrc1(), dex_location); - GeneratePicOdexForTest(dex_location, odex_location, CompilerFilter::kSpeed); - - // Verify the status. - OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false); - - EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded, - oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed)); - EXPECT_EQ(-OatFileAssistant::kDex2OatForFilter, - oat_file_assistant.GetDexOptNeeded(CompilerFilter::kEverything)); - - EXPECT_FALSE(oat_file_assistant.IsInBootClassPath()); - EXPECT_EQ(OatFileAssistant::kOatUpToDate, oat_file_assistant.OdexFileStatus()); - EXPECT_EQ(OatFileAssistant::kOatCannotOpen, oat_file_assistant.OatFileStatus()); - EXPECT_TRUE(oat_file_assistant.HasOriginalDexFiles()); -} - // Case: We have a DEX file and a VerifyAtRuntime ODEX file, but no OAT file. // Expect: The status is kNoDexOptNeeded, because VerifyAtRuntime contains no code. TEST_F(OatFileAssistantTest, DexVerifyAtRuntimeOdexNoOat) { @@ -653,13 +824,13 @@ TEST_F(OatFileAssistantTest, DexVerifyAtRuntimeOdexNoOat) { // Create the dex and odex files Copy(GetDexSrc1(), dex_location); - GenerateOdexForTest(dex_location, odex_location, CompilerFilter::kVerifyAtRuntime); + GenerateOdexForTest(dex_location, odex_location, CompilerFilter::kExtract); // Verify the status. OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false); EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded, - oat_file_assistant.GetDexOptNeeded(CompilerFilter::kVerifyAtRuntime)); + oat_file_assistant.GetDexOptNeeded(CompilerFilter::kExtract)); EXPECT_EQ(-OatFileAssistant::kDex2OatForFilter, oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed)); @@ -672,11 +843,20 @@ TEST_F(OatFileAssistantTest, DexVerifyAtRuntimeOdexNoOat) { // Case: We have a DEX file and up-to-date OAT file for it. // Expect: We should load an executable dex file. TEST_F(OatFileAssistantTest, LoadOatUpToDate) { + if (IsExecutedAsRoot()) { + // We cannot simulate non writable locations when executed as root: b/38000545. + LOG(ERROR) << "Test skipped because it's running as root"; + return; + } + std::string dex_location = GetScratchDir() + "/LoadOatUpToDate.jar"; Copy(GetDexSrc1(), dex_location); GenerateOatForTest(dex_location.c_str(), CompilerFilter::kSpeed); + ScopedNonWritable scoped_non_writable(dex_location); + ASSERT_TRUE(scoped_non_writable.IsSuccessful()); + // Load the oat using an oat file assistant. OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true); @@ -688,13 +868,22 @@ TEST_F(OatFileAssistantTest, LoadOatUpToDate) { EXPECT_EQ(1u, dex_files.size()); } -// Case: We have a DEX file and up-to-date interpret-only OAT file for it. +// Case: We have a DEX file and up-to-date quicken OAT file for it. // Expect: We should still load the oat file as executable. TEST_F(OatFileAssistantTest, LoadExecInterpretOnlyOatUpToDate) { + if (IsExecutedAsRoot()) { + // We cannot simulate non writable locations when executed as root: b/38000545. + LOG(ERROR) << "Test skipped because it's running as root"; + return; + } + std::string dex_location = GetScratchDir() + "/LoadExecInterpretOnlyOatUpToDate.jar"; Copy(GetDexSrc1(), dex_location); - GenerateOatForTest(dex_location.c_str(), CompilerFilter::kInterpretOnly); + GenerateOatForTest(dex_location.c_str(), CompilerFilter::kQuicken); + + ScopedNonWritable scoped_non_writable(dex_location); + ASSERT_TRUE(scoped_non_writable.IsSuccessful()); // Load the oat using an oat file assistant. OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true); @@ -710,9 +899,19 @@ TEST_F(OatFileAssistantTest, LoadExecInterpretOnlyOatUpToDate) { // Case: We have a DEX file and up-to-date OAT file for it. // Expect: Loading non-executable should load the oat non-executable. TEST_F(OatFileAssistantTest, LoadNoExecOatUpToDate) { + if (IsExecutedAsRoot()) { + // We cannot simulate non writable locations when executed as root: b/38000545. + LOG(ERROR) << "Test skipped because it's running as root"; + return; + } + std::string dex_location = GetScratchDir() + "/LoadNoExecOatUpToDate.jar"; Copy(GetDexSrc1(), dex_location); + + ScopedNonWritable scoped_non_writable(dex_location); + ASSERT_TRUE(scoped_non_writable.IsSuccessful()); + GenerateOatForTest(dex_location.c_str(), CompilerFilter::kSpeed); // Load the oat using an oat file assistant. @@ -726,70 +925,33 @@ TEST_F(OatFileAssistantTest, LoadNoExecOatUpToDate) { EXPECT_EQ(1u, dex_files.size()); } -// Case: We have a DEX file. -// Expect: We should load an executable dex file from an alternative oat -// location. -TEST_F(OatFileAssistantTest, LoadDexNoAlternateOat) { - std::string dex_location = GetScratchDir() + "/LoadDexNoAlternateOat.jar"; - std::string oat_location = GetScratchDir() + "/LoadDexNoAlternateOat.oat"; +// Case: We don't have a DEX file and can't write the oat file. +// Expect: We should fail to generate the oat file without crashing. +TEST_F(OatFileAssistantTest, GenNoDex) { + if (IsExecutedAsRoot()) { + // We cannot simulate non writable locations when executed as root: b/38000545. + LOG(ERROR) << "Test skipped because it's running as root"; + return; + } - Copy(GetDexSrc1(), dex_location); + std::string dex_location = GetScratchDir() + "/GenNoDex.jar"; - OatFileAssistant oat_file_assistant( - dex_location.c_str(), oat_location.c_str(), kRuntimeISA, true); + ScopedNonWritable scoped_non_writable(dex_location); + ASSERT_TRUE(scoped_non_writable.IsSuccessful()); + + OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, true); std::string error_msg; Runtime::Current()->AddCompilerOption("--compiler-filter=speed"); + // We should get kUpdateSucceeded from MakeUpToDate since there's nothing + // that can be done in this situation. ASSERT_EQ(OatFileAssistant::kUpdateSucceeded, - oat_file_assistant.MakeUpToDate(false, &error_msg)) << error_msg; - - std::unique_ptr<OatFile> oat_file = oat_file_assistant.GetBestOatFile(); - ASSERT_TRUE(oat_file.get() != nullptr); - EXPECT_TRUE(oat_file->IsExecutable()); - std::vector<std::unique_ptr<const DexFile>> dex_files; - dex_files = oat_file_assistant.LoadDexFiles(*oat_file, dex_location.c_str()); - EXPECT_EQ(1u, dex_files.size()); - - EXPECT_TRUE(OS::FileExists(oat_location.c_str())); + oat_file_assistant.MakeUpToDate(false, &error_msg)); - // Verify it didn't create an oat in the default location. + // Verify it didn't create an oat in the default location (dalvik-cache). OatFileAssistant ofm(dex_location.c_str(), kRuntimeISA, false); EXPECT_EQ(OatFileAssistant::kOatCannotOpen, ofm.OatFileStatus()); -} - -// Case: We have a DEX file but can't write the oat file. -// Expect: We should fail to make the oat file up to date. -TEST_F(OatFileAssistantTest, LoadDexUnwriteableAlternateOat) { - std::string dex_location = GetScratchDir() + "/LoadDexUnwriteableAlternateOat.jar"; - - // Make the oat location unwritable by inserting some non-existent - // intermediate directories. - std::string oat_location = GetScratchDir() + "/foo/bar/LoadDexUnwriteableAlternateOat.oat"; - - Copy(GetDexSrc1(), dex_location); - - OatFileAssistant oat_file_assistant( - dex_location.c_str(), oat_location.c_str(), kRuntimeISA, true); - std::string error_msg; - Runtime::Current()->AddCompilerOption("--compiler-filter=speed"); - ASSERT_EQ(OatFileAssistant::kUpdateNotAttempted, - oat_file_assistant.MakeUpToDate(false, &error_msg)); - - std::unique_ptr<OatFile> oat_file = oat_file_assistant.GetBestOatFile(); - ASSERT_TRUE(oat_file.get() == nullptr); -} - -// Case: We don't have a DEX file and can't write the oat file. -// Expect: We should fail to generate the oat file without crashing. -TEST_F(OatFileAssistantTest, GenNoDex) { - std::string dex_location = GetScratchDir() + "/GenNoDex.jar"; - std::string oat_location = GetScratchDir() + "/GenNoDex.oat"; - - OatFileAssistant oat_file_assistant( - dex_location.c_str(), oat_location.c_str(), kRuntimeISA, true); - std::string error_msg; - Runtime::Current()->AddCompilerOption("--compiler-filter=speed"); - EXPECT_EQ(OatFileAssistant::kUpdateNotAttempted, - oat_file_assistant.GenerateOatFile(&error_msg)); + // Verify it didn't create the odex file in the default location (../oat/isa/...odex) + EXPECT_EQ(OatFileAssistant::kOatCannotOpen, ofm.OdexFileStatus()); } // Turn an absolute path into a path relative to the current working @@ -1003,19 +1165,19 @@ TEST_F(OatFileAssistantTest, RuntimeCompilerFilterOptionUsed) { OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false); std::string error_msg; - Runtime::Current()->AddCompilerOption("--compiler-filter=interpret-only"); + Runtime::Current()->AddCompilerOption("--compiler-filter=quicken"); EXPECT_EQ(OatFileAssistant::kUpdateSucceeded, oat_file_assistant.MakeUpToDate(false, &error_msg)) << error_msg; - EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded, - oat_file_assistant.GetDexOptNeeded(CompilerFilter::kInterpretOnly)); - EXPECT_EQ(OatFileAssistant::kDex2OatForFilter, + EXPECT_EQ(-OatFileAssistant::kNoDexOptNeeded, + oat_file_assistant.GetDexOptNeeded(CompilerFilter::kQuicken)); + EXPECT_EQ(-OatFileAssistant::kDex2OatForFilter, oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed)); Runtime::Current()->AddCompilerOption("--compiler-filter=speed"); EXPECT_EQ(OatFileAssistant::kUpdateSucceeded, oat_file_assistant.MakeUpToDate(false, &error_msg)) << error_msg; EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded, - oat_file_assistant.GetDexOptNeeded(CompilerFilter::kInterpretOnly)); + oat_file_assistant.GetDexOptNeeded(CompilerFilter::kQuicken)); EXPECT_EQ(OatFileAssistant::kNoDexOptNeeded, oat_file_assistant.GetDexOptNeeded(CompilerFilter::kSpeed)); diff --git a/runtime/oat_file_manager.cc b/runtime/oat_file_manager.cc index 6799918534..c1cf800e5d 100644 --- a/runtime/oat_file_manager.cc +++ b/runtime/oat_file_manager.cc @@ -546,8 +546,8 @@ bool OatFileManager::HasCollisions(const OatFile* oat_file, std::vector<const DexFile*> dex_files_loaded; // Try to get dex files from the given class loader. If the class loader is null, or we do - // not support one of the class loaders in the chain, conservatively compare against all - // (non-boot) oat files. + // not support one of the class loaders in the chain, we do nothing and assume the collision + // check has succeeded. bool class_loader_ok = false; { ScopedObjectAccess soa(Thread::Current()); @@ -566,37 +566,20 @@ bool OatFileManager::HasCollisions(const OatFile* oat_file, } else if (h_class_loader != nullptr) { VLOG(class_linker) << "Something unsupported with " << mirror::Class::PrettyClass(h_class_loader->GetClass()); + + // This is a class loader we don't recognize. Our earlier strategy would + // be to perform a global duplicate class check (with all loaded oat files) + // but that seems overly conservative - we have no way of knowing that + // those files are present in the same loader hierarchy. Among other + // things, it hurt GMS core and its filtering class loader. } } - // Dex files are registered late - once a class is actually being loaded. We have to compare - // against the open oat files. Take the oat_file_manager_lock_ that protects oat_files_ accesses. - ReaderMutexLock mu(Thread::Current(), *Locks::oat_file_manager_lock_); - - // Vector that holds the newly opened dex files live, this is done to prevent leaks. - std::vector<std::unique_ptr<const DexFile>> opened_dex_files; - + // Exit if we find a class loader we don't recognize. Proceed to check shared + // libraries and do a full class loader check otherwise. if (!class_loader_ok) { - // Add dex files from already loaded oat files, but skip boot. - - // Clean up the dex files. - dex_files_loaded.clear(); - - std::vector<const OatFile*> boot_oat_files = GetBootOatFiles(); - // The same OatFile can be loaded multiple times at different addresses. In this case, we don't - // need to check both against each other since they would have resolved the same way at compile - // time. - std::unordered_set<std::string> unique_locations; - for (const std::unique_ptr<const OatFile>& loaded_oat_file : oat_files_) { - DCHECK_NE(loaded_oat_file.get(), oat_file); - const std::string& location = loaded_oat_file->GetLocation(); - if (std::find(boot_oat_files.begin(), boot_oat_files.end(), loaded_oat_file.get()) == - boot_oat_files.end() && location != oat_file->GetLocation() && - unique_locations.find(location) == unique_locations.end()) { - unique_locations.insert(location); - AddDexFilesFromOat(loaded_oat_file.get(), &dex_files_loaded, &opened_dex_files); - } - } + LOG(WARNING) << "Skipping duplicate class check due to unrecognized classloader"; + return false; } // Exit if shared libraries are ok. Do a full duplicate classes check otherwise. @@ -606,6 +589,9 @@ bool OatFileManager::HasCollisions(const OatFile* oat_file, return false; } + // Vector that holds the newly opened dex files live, this is done to prevent leaks. + std::vector<std::unique_ptr<const DexFile>> opened_dex_files; + ScopedTrace st("Collision check"); // Add dex files from the oat file to check. std::vector<const DexFile*> dex_files_unloaded; @@ -629,9 +615,7 @@ std::vector<std::unique_ptr<const DexFile>> OatFileManager::OpenDexFilesFromOat( Locks::mutator_lock_->AssertNotHeld(self); Runtime* const runtime = Runtime::Current(); - // TODO(calin): remove the explicit oat_location for OatFileAssistant OatFileAssistant oat_file_assistant(dex_location, - /*oat_location*/ nullptr, kRuntimeISA, !runtime->IsAotCompiler()); @@ -677,21 +661,34 @@ std::vector<std::unique_ptr<const DexFile>> OatFileManager::OpenDexFilesFromOat( if (!accept_oat_file) { // Failed the collision check. Print warning. if (Runtime::Current()->IsDexFileFallbackEnabled()) { - LOG(WARNING) << "Found duplicate classes, falling back to interpreter mode for " - << dex_location; + if (!oat_file_assistant.HasOriginalDexFiles()) { + // We need to fallback but don't have original dex files. We have to + // fallback to opening the existing oat file. This is potentially + // unsafe so we warn about it. + accept_oat_file = true; + + LOG(WARNING) << "Dex location " << dex_location << " does not seem to include dex file. " + << "Allow oat file use. This is potentially dangerous."; + } else { + // We have to fallback and found original dex files - extract them from an APK. + // Also warn about this operation because it's potentially wasteful. + LOG(WARNING) << "Found duplicate classes, falling back to extracting from APK : " + << dex_location; + LOG(WARNING) << "NOTE: This wastes RAM and hurts startup performance."; + } } else { + // TODO: We should remove this. The fact that we're here implies -Xno-dex-file-fallback + // was set, which means that we should never fallback. If we don't have original dex + // files, we should just fail resolution as the flag intended. + if (!oat_file_assistant.HasOriginalDexFiles()) { + accept_oat_file = true; + } + LOG(WARNING) << "Found duplicate classes, dex-file-fallback disabled, will be failing to " " load classes for " << dex_location; } - LOG(WARNING) << error_msg; - // However, if the app was part of /system and preopted, there is no original dex file - // available. In that case grudgingly accept the oat file. - if (!oat_file_assistant.HasOriginalDexFiles()) { - accept_oat_file = true; - LOG(WARNING) << "Dex location " << dex_location << " does not seem to include dex file. " - << "Allow oat file use. This is potentially dangerous."; - } + LOG(WARNING) << error_msg; } if (accept_oat_file) { diff --git a/runtime/openjdkjvmti/OpenjdkJvmTi.cc b/runtime/openjdkjvmti/OpenjdkJvmTi.cc index c3a94b93a0..0921ceae05 100644 --- a/runtime/openjdkjvmti/OpenjdkJvmTi.cc +++ b/runtime/openjdkjvmti/OpenjdkJvmTi.cc @@ -1078,9 +1078,156 @@ class JvmtiFunctions { jint* extension_count_ptr, jvmtiExtensionFunctionInfo** extensions) { ENSURE_VALID_ENV(env); - // We do not have any extension functions. - *extension_count_ptr = 0; - *extensions = nullptr; + ENSURE_NON_NULL(extension_count_ptr); + ENSURE_NON_NULL(extensions); + + std::vector<jvmtiExtensionFunctionInfo> ext_vector; + + // Holders for allocated values. + std::vector<JvmtiUniquePtr<char[]>> char_buffers; + std::vector<JvmtiUniquePtr<jvmtiParamInfo[]>> param_buffers; + std::vector<JvmtiUniquePtr<jvmtiError[]>> error_buffers; + + // Add a helper struct that takes an arbitrary const char*. add_extension will use Allocate + // appropriately. + struct CParamInfo { + const char* name; + jvmtiParamKind kind; + jvmtiParamTypes base_type; + jboolean null_ok; + }; + + auto add_extension = [&](jvmtiExtensionFunction func, + const char* id, + const char* short_description, + jint param_count, + const std::vector<CParamInfo>& params, + jint error_count, + const std::vector<jvmtiError>& errors) { + jvmtiExtensionFunctionInfo func_info; + jvmtiError error; + + func_info.func = func; + + JvmtiUniquePtr<char[]> id_ptr = CopyString(env, id, &error); + if (id_ptr == nullptr) { + return error; + } + func_info.id = id_ptr.get(); + char_buffers.push_back(std::move(id_ptr)); + + JvmtiUniquePtr<char[]> descr = CopyString(env, short_description, &error); + if (descr == nullptr) { + return error; + } + func_info.short_description = descr.get(); + char_buffers.push_back(std::move(descr)); + + func_info.param_count = param_count; + if (param_count > 0) { + JvmtiUniquePtr<jvmtiParamInfo[]> params_ptr = + AllocJvmtiUniquePtr<jvmtiParamInfo[]>(env, param_count, &error); + if (params_ptr == nullptr) { + return error; + } + func_info.params = params_ptr.get(); + param_buffers.push_back(std::move(params_ptr)); + + for (jint i = 0; i != param_count; ++i) { + JvmtiUniquePtr<char[]> param_name = CopyString(env, params[i].name, &error); + if (param_name == nullptr) { + return error; + } + func_info.params[i].name = param_name.get(); + char_buffers.push_back(std::move(param_name)); + + func_info.params[i].kind = params[i].kind; + func_info.params[i].base_type = params[i].base_type; + func_info.params[i].null_ok = params[i].null_ok; + } + } else { + func_info.params = nullptr; + } + + func_info.error_count = error_count; + if (error_count > 0) { + JvmtiUniquePtr<jvmtiError[]> errors_ptr = + AllocJvmtiUniquePtr<jvmtiError[]>(env, error_count, &error); + if (errors_ptr == nullptr) { + return error; + } + func_info.errors = errors_ptr.get(); + error_buffers.push_back(std::move(errors_ptr)); + + for (jint i = 0; i != error_count; ++i) { + func_info.errors[i] = errors[i]; + } + } else { + func_info.errors = nullptr; + } + + ext_vector.push_back(func_info); + + return ERR(NONE); + }; + + jvmtiError error; + + // Heap extensions. + error = add_extension( + reinterpret_cast<jvmtiExtensionFunction>(HeapExtensions::GetObjectHeapId), + "com.android.art.heap.get_object_heap_id", + "Retrieve the heap id of the the object tagged with the given argument. An " + "arbitrary object is chosen if multiple objects exist with the same tag.", + 2, + { // NOLINT [whitespace/braces] [4] + { "tag", JVMTI_KIND_IN, JVMTI_TYPE_JLONG, false}, + { "heap_id", JVMTI_KIND_OUT, JVMTI_TYPE_JINT, false} + }, + 1, + { JVMTI_ERROR_NOT_FOUND }); + if (error != ERR(NONE)) { + return error; + } + + error = add_extension( + reinterpret_cast<jvmtiExtensionFunction>(HeapExtensions::GetHeapName), + "com.android.art.heap.get_heap_name", + "Retrieve the name of the heap with the given id.", + 2, + { // NOLINT [whitespace/braces] [4] + { "heap_id", JVMTI_KIND_IN, JVMTI_TYPE_JINT, false}, + { "heap_name", JVMTI_KIND_ALLOC_BUF, JVMTI_TYPE_CCHAR, false} + }, + 1, + { JVMTI_ERROR_ILLEGAL_ARGUMENT }); + if (error != ERR(NONE)) { + return error; + } + + // Copy into output buffer. + + *extension_count_ptr = ext_vector.size(); + JvmtiUniquePtr<jvmtiExtensionFunctionInfo[]> out_data = + AllocJvmtiUniquePtr<jvmtiExtensionFunctionInfo[]>(env, ext_vector.size(), &error); + if (out_data == nullptr) { + return error; + } + memcpy(out_data.get(), + ext_vector.data(), + ext_vector.size() * sizeof(jvmtiExtensionFunctionInfo)); + *extensions = out_data.release(); + + // Release all the buffer holders, we're OK now. + for (auto& holder : char_buffers) { + holder.release(); + } + for (auto& holder : param_buffers) { + holder.release(); + } + for (auto& holder : error_buffers) { + holder.release(); + } return ERR(NONE); } @@ -1358,23 +1505,26 @@ class JvmtiFunctions { static jvmtiError GetErrorName(jvmtiEnv* env, jvmtiError error, char** name_ptr) { ENSURE_NON_NULL(name_ptr); + auto copy_fn = [&](const char* name_cstr) { + jvmtiError res; + JvmtiUniquePtr<char[]> copy = CopyString(env, name_cstr, &res); + if (copy == nullptr) { + *name_ptr = nullptr; + return res; + } else { + *name_ptr = copy.release(); + return OK; + } + }; switch (error) { -#define ERROR_CASE(e) case (JVMTI_ERROR_ ## e) : do { \ - jvmtiError res; \ - JvmtiUniquePtr<char[]> copy = CopyString(env, "JVMTI_ERROR_"#e, &res); \ - if (copy == nullptr) { \ - *name_ptr = nullptr; \ - return res; \ - } else { \ - *name_ptr = copy.release(); \ - return OK; \ - } \ - } while (false) +#define ERROR_CASE(e) case (JVMTI_ERROR_ ## e) : \ + return copy_fn("JVMTI_ERROR_"#e); ERROR_CASE(NONE); ERROR_CASE(INVALID_THREAD); ERROR_CASE(INVALID_THREAD_GROUP); ERROR_CASE(INVALID_PRIORITY); ERROR_CASE(THREAD_NOT_SUSPENDED); + ERROR_CASE(THREAD_SUSPENDED); ERROR_CASE(THREAD_NOT_ALIVE); ERROR_CASE(INVALID_OBJECT); ERROR_CASE(INVALID_CLASS); @@ -1419,18 +1569,9 @@ class JvmtiFunctions { ERROR_CASE(UNATTACHED_THREAD); ERROR_CASE(INVALID_ENVIRONMENT); #undef ERROR_CASE - default: { - jvmtiError res; - JvmtiUniquePtr<char[]> copy = CopyString(env, "JVMTI_ERROR_UNKNOWN", &res); - if (copy == nullptr) { - *name_ptr = nullptr; - return res; - } else { - *name_ptr = copy.release(); - return ERR(ILLEGAL_ARGUMENT); - } - } } + + return ERR(ILLEGAL_ARGUMENT); } static jvmtiError SetVerboseFlag(jvmtiEnv* env, diff --git a/runtime/openjdkjvmti/fixed_up_dex_file.cc b/runtime/openjdkjvmti/fixed_up_dex_file.cc index 3338358796..29aebae4b6 100644 --- a/runtime/openjdkjvmti/fixed_up_dex_file.cc +++ b/runtime/openjdkjvmti/fixed_up_dex_file.cc @@ -32,10 +32,8 @@ #include "fixed_up_dex_file.h" #include "dex_file-inl.h" -// Compiler includes. -#include "dex/dex_to_dex_decompiler.h" - // Runtime includes. +#include "dex_to_dex_decompiler.h" #include "oat_file.h" #include "vdex_file.h" diff --git a/runtime/openjdkjvmti/jvmti_weak_table-inl.h b/runtime/openjdkjvmti/jvmti_weak_table-inl.h index f67fffccbb..64ab3e7b2e 100644 --- a/runtime/openjdkjvmti/jvmti_weak_table-inl.h +++ b/runtime/openjdkjvmti/jvmti_weak_table-inl.h @@ -384,6 +384,23 @@ jvmtiError JvmtiWeakTable<T>::GetTaggedObjects(jvmtiEnv* jvmti_env, return ERR(NONE); } +template <typename T> +art::mirror::Object* JvmtiWeakTable<T>::Find(T tag) { + art::Thread* self = art::Thread::Current(); + art::MutexLock mu(self, allow_disallow_lock_); + Wait(self); + + for (auto& pair : tagged_objects_) { + if (tag == pair.second) { + art::mirror::Object* obj = pair.first.template Read<art::kWithReadBarrier>(); + if (obj != nullptr) { + return obj; + } + } + } + return nullptr; +} + } // namespace openjdkjvmti #endif // ART_RUNTIME_OPENJDKJVMTI_JVMTI_WEAK_TABLE_INL_H_ diff --git a/runtime/openjdkjvmti/jvmti_weak_table.h b/runtime/openjdkjvmti/jvmti_weak_table.h index eeea75aa9d..a6fd247c51 100644 --- a/runtime/openjdkjvmti/jvmti_weak_table.h +++ b/runtime/openjdkjvmti/jvmti_weak_table.h @@ -116,6 +116,10 @@ class JvmtiWeakTable : public art::gc::SystemWeakHolder { void Unlock() RELEASE(allow_disallow_lock_); void AssertLocked() ASSERT_CAPABILITY(allow_disallow_lock_); + art::mirror::Object* Find(T tag) + REQUIRES_SHARED(art::Locks::mutator_lock_) + REQUIRES(!allow_disallow_lock_); + protected: // Should HandleNullSweep be called when Sweep detects the release of an object? virtual bool DoesHandleNullOnSweep() { diff --git a/runtime/openjdkjvmti/ti_heap.cc b/runtime/openjdkjvmti/ti_heap.cc index 7fc5104bce..9b4dcaa9d0 100644 --- a/runtime/openjdkjvmti/ti_heap.cc +++ b/runtime/openjdkjvmti/ti_heap.cc @@ -1400,4 +1400,95 @@ jvmtiError HeapUtil::ForceGarbageCollection(jvmtiEnv* env ATTRIBUTE_UNUSED) { return ERR(NONE); } + +static constexpr jint kHeapIdDefault = 0; +static constexpr jint kHeapIdImage = 1; +static constexpr jint kHeapIdZygote = 2; +static constexpr jint kHeapIdApp = 3; + +jvmtiError HeapExtensions::GetObjectHeapId(jvmtiEnv* env, jlong tag, jint* heap_id, ...) { + if (heap_id == nullptr) { + return ERR(NULL_POINTER); + } + + art::Thread* self = art::Thread::Current(); + + auto work = [&]() REQUIRES_SHARED(art::Locks::mutator_lock_) { + ObjectTagTable* tag_table = ArtJvmTiEnv::AsArtJvmTiEnv(env)->object_tag_table.get(); + art::ObjPtr<art::mirror::Object> obj = tag_table->Find(tag); + if (obj == nullptr) { + return ERR(NOT_FOUND); + } + + art::gc::Heap* const heap = art::Runtime::Current()->GetHeap(); + const art::gc::space::ContinuousSpace* const space = + heap->FindContinuousSpaceFromObject(obj, true); + jint heap_type = kHeapIdApp; + if (space != nullptr) { + if (space->IsZygoteSpace()) { + heap_type = kHeapIdZygote; + } else if (space->IsImageSpace() && heap->ObjectIsInBootImageSpace(obj)) { + // Only count objects in the boot image as HPROF_HEAP_IMAGE, this leaves app image objects + // as HPROF_HEAP_APP. b/35762934 + heap_type = kHeapIdImage; + } + } else { + const auto* los = heap->GetLargeObjectsSpace(); + if (los->Contains(obj.Ptr()) && los->IsZygoteLargeObject(self, obj.Ptr())) { + heap_type = kHeapIdZygote; + } + } + *heap_id = heap_type; + return ERR(NONE); + }; + + if (!art::Locks::mutator_lock_->IsSharedHeld(self)) { + if (!self->IsThreadSuspensionAllowable()) { + return ERR(INTERNAL); + } + art::ScopedObjectAccess soa(self); + return work(); + } else { + // We cannot use SOA in this case. We might be holding the lock, but may not be in the + // runnable state (e.g., during GC). + art::Locks::mutator_lock_->AssertSharedHeld(self); + // TODO: Investigate why ASSERT_SHARED_CAPABILITY doesn't work. + auto annotalysis_workaround = [&]() NO_THREAD_SAFETY_ANALYSIS { + return work(); + }; + return annotalysis_workaround(); + } +} + +static jvmtiError CopyStringAndReturn(jvmtiEnv* env, const char* in, char** out) { + jvmtiError error; + JvmtiUniquePtr<char[]> param_name = CopyString(env, in, &error); + if (param_name == nullptr) { + return error; + } + *out = param_name.release(); + return ERR(NONE); +} + +static constexpr const char* kHeapIdDefaultName = "default"; +static constexpr const char* kHeapIdImageName = "image"; +static constexpr const char* kHeapIdZygoteName = "zygote"; +static constexpr const char* kHeapIdAppName = "app"; + +jvmtiError HeapExtensions::GetHeapName(jvmtiEnv* env, jint heap_id, char** heap_name, ...) { + switch (heap_id) { + case kHeapIdDefault: + return CopyStringAndReturn(env, kHeapIdDefaultName, heap_name); + case kHeapIdImage: + return CopyStringAndReturn(env, kHeapIdImageName, heap_name); + case kHeapIdZygote: + return CopyStringAndReturn(env, kHeapIdZygoteName, heap_name); + case kHeapIdApp: + return CopyStringAndReturn(env, kHeapIdAppName, heap_name); + + default: + return ERR(ILLEGAL_ARGUMENT); + } +} + } // namespace openjdkjvmti diff --git a/runtime/openjdkjvmti/ti_heap.h b/runtime/openjdkjvmti/ti_heap.h index dccecb4aa3..b4b71ba88e 100644 --- a/runtime/openjdkjvmti/ti_heap.h +++ b/runtime/openjdkjvmti/ti_heap.h @@ -56,6 +56,12 @@ class HeapUtil { ObjectTagTable* tags_; }; +class HeapExtensions { + public: + static jvmtiError JNICALL GetObjectHeapId(jvmtiEnv* env, jlong tag, jint* heap_id, ...); + static jvmtiError JNICALL GetHeapName(jvmtiEnv* env, jint heap_id, char** heap_name, ...); +}; + } // namespace openjdkjvmti #endif // ART_RUNTIME_OPENJDKJVMTI_TI_HEAP_H_ diff --git a/runtime/os.h b/runtime/os.h index 46d89fb8a5..7130fc3732 100644 --- a/runtime/os.h +++ b/runtime/os.h @@ -44,7 +44,7 @@ class OS { static File* CreateEmptyFileWriteOnly(const char* name); // Open a file with the specified open(2) flags. - static File* OpenFileWithFlags(const char* name, int flags); + static File* OpenFileWithFlags(const char* name, int flags, bool auto_flush = true); // Check if a file exists. static bool FileExists(const char* name); diff --git a/runtime/os_linux.cc b/runtime/os_linux.cc index 1db09b4445..0add4965d1 100644 --- a/runtime/os_linux.cc +++ b/runtime/os_linux.cc @@ -51,10 +51,11 @@ File* OS::CreateEmptyFileWriteOnly(const char* name) { return art::CreateEmptyFile(name, O_WRONLY | O_TRUNC | O_NOFOLLOW | O_CLOEXEC); } -File* OS::OpenFileWithFlags(const char* name, int flags) { +File* OS::OpenFileWithFlags(const char* name, int flags, bool auto_flush) { CHECK(name != nullptr); bool read_only = ((flags & O_ACCMODE) == O_RDONLY); - std::unique_ptr<File> file(new File(name, flags, 0666, !read_only)); + bool check_usage = !read_only && auto_flush; + std::unique_ptr<File> file(new File(name, flags, 0666, check_usage)); if (!file->IsOpened()) { return nullptr; } diff --git a/runtime/quick_exception_handler.cc b/runtime/quick_exception_handler.cc index b8669412fc..db10103c4b 100644 --- a/runtime/quick_exception_handler.cc +++ b/runtime/quick_exception_handler.cc @@ -530,7 +530,7 @@ void QuickExceptionHandler::DeoptimizeStack() { PrepareForLongJumpToInvokeStubOrInterpreterBridge(); } -void QuickExceptionHandler::DeoptimizeSingleFrame() { +void QuickExceptionHandler::DeoptimizeSingleFrame(DeoptimizationKind kind) { DCHECK(is_deoptimization_); if (VLOG_IS_ON(deopt) || kDebugExceptionDelivery) { @@ -544,6 +544,10 @@ void QuickExceptionHandler::DeoptimizeSingleFrame() { // Compiled code made an explicit deoptimization. ArtMethod* deopt_method = visitor.GetSingleFrameDeoptMethod(); DCHECK(deopt_method != nullptr); + LOG(INFO) << "Deoptimizing " + << deopt_method->PrettyMethod() + << " due to " + << GetDeoptimizationKindName(kind); if (Runtime::Current()->UseJitCompilation()) { Runtime::Current()->GetJit()->GetCodeCache()->InvalidateCompiledCodeFor( deopt_method, visitor.GetSingleFrameDeoptQuickMethodHeader()); diff --git a/runtime/quick_exception_handler.h b/runtime/quick_exception_handler.h index 3ead7dbe64..8090f9b035 100644 --- a/runtime/quick_exception_handler.h +++ b/runtime/quick_exception_handler.h @@ -20,6 +20,7 @@ #include "base/logging.h" #include "base/macros.h" #include "base/mutex.h" +#include "deoptimization_kind.h" #include "stack_reference.h" namespace art { @@ -62,7 +63,7 @@ class QuickExceptionHandler { // the result of IsDeoptimizeable(). // - It can be either full-fragment, or partial-fragment deoptimization, depending // on whether that single frame covers full or partial fragment. - void DeoptimizeSingleFrame() REQUIRES_SHARED(Locks::mutator_lock_); + void DeoptimizeSingleFrame(DeoptimizationKind kind) REQUIRES_SHARED(Locks::mutator_lock_); void DeoptimizePartialFragmentFixup(uintptr_t return_pc) REQUIRES_SHARED(Locks::mutator_lock_); diff --git a/runtime/runtime.cc b/runtime/runtime.cc index 0125539ab0..60fa0828a0 100644 --- a/runtime/runtime.cc +++ b/runtime/runtime.cc @@ -262,6 +262,9 @@ Runtime::Runtime() std::fill(callee_save_methods_, callee_save_methods_ + arraysize(callee_save_methods_), 0u); interpreter::CheckInterpreterAsmConstants(); callbacks_.reset(new RuntimeCallbacks()); + for (size_t i = 0; i <= static_cast<size_t>(DeoptimizationKind::kLast); ++i) { + deoptimization_counts_[i] = 0u; + } } Runtime::~Runtime() { @@ -336,6 +339,16 @@ Runtime::~Runtime() { jit_->DeleteThreadPool(); } + // Make sure our internal threads are dead before we start tearing down things they're using. + Dbg::StopJdwp(); + delete signal_catcher_; + + // Make sure all other non-daemon threads have terminated, and all daemon threads are suspended. + { + ScopedTrace trace2("Delete thread list"); + thread_list_->ShutDown(); + } + // TODO Maybe do some locking. for (auto& agent : agents_) { agent.Unload(); @@ -346,15 +359,9 @@ Runtime::~Runtime() { plugin.Unload(); } - // Make sure our internal threads are dead before we start tearing down things they're using. - Dbg::StopJdwp(); - delete signal_catcher_; + // Finally delete the thread list. + delete thread_list_; - // Make sure all other non-daemon threads have terminated, and all daemon threads are suspended. - { - ScopedTrace trace2("Delete thread list"); - delete thread_list_; - } // Delete the JIT after thread list to ensure that there is no remaining threads which could be // accessing the instrumentation when we delete it. if (jit_ != nullptr) { @@ -905,6 +912,7 @@ static bool OpenDexFilesFromImage(const std::string& image_location, std::unique_ptr<VdexFile> vdex_file(VdexFile::Open(vdex_filename, false /* writable */, false /* low_4gb */, + false, /* unquicken */ &error_msg)); if (vdex_file.get() == nullptr) { return false; @@ -1570,6 +1578,23 @@ void Runtime::RegisterRuntimeNativeMethods(JNIEnv* env) { register_sun_misc_Unsafe(env); } +std::ostream& operator<<(std::ostream& os, const DeoptimizationKind& kind) { + os << GetDeoptimizationKindName(kind); + return os; +} + +void Runtime::DumpDeoptimizations(std::ostream& os) { + for (size_t i = 0; i <= static_cast<size_t>(DeoptimizationKind::kLast); ++i) { + if (deoptimization_counts_[i] != 0) { + os << "Number of " + << GetDeoptimizationKindName(static_cast<DeoptimizationKind>(i)) + << " deoptimizations: " + << deoptimization_counts_[i] + << "\n"; + } + } +} + void Runtime::DumpForSigQuit(std::ostream& os) { GetClassLinker()->DumpForSigQuit(os); GetInternTable()->DumpForSigQuit(os); @@ -1581,6 +1606,7 @@ void Runtime::DumpForSigQuit(std::ostream& os) { } else { os << "Running non JIT\n"; } + DumpDeoptimizations(os); TrackedAllocators::Dump(os); os << "\n"; @@ -1962,12 +1988,23 @@ void Runtime::SetInstructionSet(InstructionSet instruction_set) { } } +void Runtime::ClearInstructionSet() { + instruction_set_ = InstructionSet::kNone; +} + void Runtime::SetCalleeSaveMethod(ArtMethod* method, CalleeSaveType type) { DCHECK_LT(static_cast<int>(type), static_cast<int>(kLastCalleeSaveType)); CHECK(method != nullptr); callee_save_methods_[type] = reinterpret_cast<uintptr_t>(method); } +void Runtime::ClearCalleeSaveMethods() { + for (size_t i = 0; i < static_cast<size_t>(kLastCalleeSaveType); ++i) { + CalleeSaveType type = static_cast<CalleeSaveType>(i); + callee_save_methods_[type] = reinterpret_cast<uintptr_t>(nullptr); + } +} + void Runtime::RegisterAppInfo(const std::vector<std::string>& code_paths, const std::string& profile_output_filename) { if (jit_.get() == nullptr) { @@ -2133,7 +2170,7 @@ void Runtime::SetFaultMessage(const std::string& message) { void Runtime::AddCurrentRuntimeFeaturesAsDex2OatArguments(std::vector<std::string>* argv) const { if (GetInstrumentation()->InterpretOnly()) { - argv->push_back("--compiler-filter=interpret-only"); + argv->push_back("--compiler-filter=quicken"); } // Make the dex2oat instruction set match that of the launching runtime. If we have multiple diff --git a/runtime/runtime.h b/runtime/runtime.h index df13b70add..a2505e2292 100644 --- a/runtime/runtime.h +++ b/runtime/runtime.h @@ -29,6 +29,7 @@ #include "arch/instruction_set.h" #include "base/macros.h" #include "base/mutex.h" +#include "deoptimization_kind.h" #include "dex_file_types.h" #include "experimental_flags.h" #include "gc_root.h" @@ -235,6 +236,7 @@ class Runtime { // Detaches the current native thread from the runtime. void DetachCurrentThread() REQUIRES(!Locks::mutator_lock_); + void DumpDeoptimizations(std::ostream& os); void DumpForSigQuit(std::ostream& os); void DumpLockHolders(std::ostream& os); @@ -356,6 +358,9 @@ class Runtime { } void SetResolutionMethod(ArtMethod* method) REQUIRES_SHARED(Locks::mutator_lock_); + void ClearResolutionMethod() { + resolution_method_ = nullptr; + } ArtMethod* CreateResolutionMethod() REQUIRES_SHARED(Locks::mutator_lock_); @@ -367,6 +372,10 @@ class Runtime { return imt_conflict_method_ != nullptr; } + void ClearImtConflictMethod() { + imt_conflict_method_ = nullptr; + } + void FixupConflictTables(); void SetImtConflictMethod(ArtMethod* method) REQUIRES_SHARED(Locks::mutator_lock_); void SetImtUnimplementedMethod(ArtMethod* method) REQUIRES_SHARED(Locks::mutator_lock_); @@ -374,6 +383,10 @@ class Runtime { ArtMethod* CreateImtConflictMethod(LinearAlloc* linear_alloc) REQUIRES_SHARED(Locks::mutator_lock_); + void ClearImtUnimplementedMethod() { + imt_unimplemented_method_ = nullptr; + } + // Returns a special method that describes all callee saves being spilled to the stack. enum CalleeSaveType { kSaveAllCalleeSaves, // All callee-save registers. @@ -409,8 +422,10 @@ class Runtime { } void SetInstructionSet(InstructionSet instruction_set); + void ClearInstructionSet(); void SetCalleeSaveMethod(ArtMethod* method, CalleeSaveType type); + void ClearCalleeSaveMethods(); ArtMethod* CreateCalleeSaveMethod() REQUIRES_SHARED(Locks::mutator_lock_); @@ -669,6 +684,11 @@ class Runtime { dump_gc_performance_on_shutdown_ = value; } + void IncrementDeoptimizationCount(DeoptimizationKind kind) { + DCHECK_LE(kind, DeoptimizationKind::kLast); + deoptimization_counts_[static_cast<size_t>(kind)]++; + } + private: static void InitPlatformSignalHandlers(); @@ -928,6 +948,8 @@ class Runtime { std::unique_ptr<RuntimeCallbacks> callbacks_; + std::atomic<uint32_t> deoptimization_counts_[static_cast<uint32_t>(DeoptimizationKind::kLast)]; + DISALLOW_COPY_AND_ASSIGN(Runtime); }; std::ostream& operator<<(std::ostream& os, const Runtime::CalleeSaveType& rhs); diff --git a/runtime/runtime_callbacks_test.cc b/runtime/runtime_callbacks_test.cc index abe99e0d50..640f9ce848 100644 --- a/runtime/runtime_callbacks_test.cc +++ b/runtime/runtime_callbacks_test.cc @@ -335,6 +335,9 @@ class RuntimeSigQuitCallbackRuntimeCallbacksTest : public RuntimeCallbacksTest { }; TEST_F(RuntimeSigQuitCallbackRuntimeCallbacksTest, SigQuit) { + // SigQuit induces a dump. ASAN isn't happy with libunwind reading memory. + TEST_DISABLED_FOR_MEMORY_TOOL_ASAN(); + // The runtime needs to be started for the signal handler. Thread* self = Thread::Current(); diff --git a/runtime/signal_catcher.cc b/runtime/signal_catcher.cc index 382643314c..faea7b3821 100644 --- a/runtime/signal_catcher.cc +++ b/runtime/signal_catcher.cc @@ -115,7 +115,7 @@ std::string SignalCatcher::GetStackTraceFileName() { for (uint32_t i = 0; i < kMaxRetries; ++i) { std::srand(NanoTime()); - // Sample output for PID 1234 : /data/anr-pid1234-cafeffee.txt + // Sample output for PID 1234 : /data/anr/anr-pid1234-cafeffee.txt const std::string file_name = android::base::StringPrintf( "%s/anr-pid%" PRId32 "-%08" PRIx32 ".txt", stack_trace_dir_.c_str(), @@ -135,19 +135,19 @@ std::string SignalCatcher::GetStackTraceFileName() { } void SignalCatcher::Output(const std::string& s) { - const std::string stack_trace_file = GetStackTraceFileName(); - if (stack_trace_file.empty()) { + const std::string output_file = GetStackTraceFileName(); + if (output_file.empty()) { LOG(INFO) << s; return; } ScopedThreadStateChange tsc(Thread::Current(), kWaitingForSignalCatcherOutput); - int fd = open(stack_trace_file.c_str(), O_APPEND | O_CREAT | O_WRONLY, 0666); + int fd = open(output_file.c_str(), O_APPEND | O_CREAT | O_WRONLY, 0666); if (fd == -1) { - PLOG(ERROR) << "Unable to open stack trace file '" << stack_trace_file_ << "'"; + PLOG(ERROR) << "Unable to open stack trace file '" << output_file << "'"; return; } - std::unique_ptr<File> file(new File(fd, stack_trace_file, true)); + std::unique_ptr<File> file(new File(fd, output_file, true)); bool success = file->WriteFully(s.data(), s.size()); if (success) { success = file->FlushCloseOrErase() == 0; @@ -155,9 +155,9 @@ void SignalCatcher::Output(const std::string& s) { file->Erase(); } if (success) { - LOG(INFO) << "Wrote stack traces to '" << stack_trace_file << "'"; + LOG(INFO) << "Wrote stack traces to '" << output_file << "'"; } else { - PLOG(ERROR) << "Failed to write stack traces to '" << stack_trace_file << "'"; + PLOG(ERROR) << "Failed to write stack traces to '" << output_file << "'"; } } diff --git a/runtime/string_reference.h b/runtime/string_reference.h index 0fc06e6389..6ba47736ec 100644 --- a/runtime/string_reference.h +++ b/runtime/string_reference.h @@ -41,7 +41,7 @@ struct StringReference { // Compare only the reference and not the string contents. struct StringReferenceComparator { - bool operator()(const StringReference& a, const StringReference& b) { + bool operator()(const StringReference& a, const StringReference& b) const { if (a.dex_file != b.dex_file) { return a.dex_file < b.dex_file; } diff --git a/runtime/thread.cc b/runtime/thread.cc index 201701a510..653a9bd1d4 100644 --- a/runtime/thread.cc +++ b/runtime/thread.cc @@ -35,6 +35,7 @@ #include "android-base/stringprintf.h" #include "arch/context.h" +#include "arch/context-inl.h" #include "art_field-inl.h" #include "art_method-inl.h" #include "base/bit_utils.h" @@ -128,12 +129,12 @@ static void UnimplementedEntryPoint() { } void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints); -void UpdateReadBarrierEntrypoints(QuickEntryPoints* qpoints, bool is_marking); +void UpdateReadBarrierEntrypoints(QuickEntryPoints* qpoints, bool is_active); void Thread::SetIsGcMarkingAndUpdateEntrypoints(bool is_marking) { CHECK(kUseReadBarrier); tls32_.is_gc_marking = is_marking; - UpdateReadBarrierEntrypoints(&tlsPtr_.quick_entrypoints, is_marking); + UpdateReadBarrierEntrypoints(&tlsPtr_.quick_entrypoints, /* is_active */ is_marking); ResetQuickAllocEntryPointsForThread(is_marking); } @@ -3413,11 +3414,10 @@ void Thread::VisitRoots(RootVisitor* visitor) { verifier->VisitRoots(visitor, RootInfo(kRootNativeStack, thread_id)); } // Visit roots on this thread's stack - Context* context = GetLongJumpContext(); + RuntimeContextType context; RootCallbackVisitor visitor_to_callback(visitor, thread_id); - ReferenceMapVisitor<RootCallbackVisitor, kPrecise> mapper(this, context, visitor_to_callback); + ReferenceMapVisitor<RootCallbackVisitor, kPrecise> mapper(this, &context, visitor_to_callback); mapper.template WalkStack<StackVisitor::CountTransitions::kNo>(false); - ReleaseLongJumpContext(context); for (instrumentation::InstrumentationStackFrame& frame : *GetInstrumentationStack()) { visitor->VisitRootIfNonNull(&frame.this_object_, RootInfo(kRootVMInternal, thread_id)); } @@ -3604,4 +3604,9 @@ mirror::Object* Thread::GetPeerFromOtherThread() const { return peer; } +void Thread::SetReadBarrierEntrypoints() { + // Make sure entrypoints aren't null. + UpdateReadBarrierEntrypoints(&tlsPtr_.quick_entrypoints, /* is_active*/ true); +} + } // namespace art diff --git a/runtime/thread.h b/runtime/thread.h index 5251012cbb..6abde5b450 100644 --- a/runtime/thread.h +++ b/runtime/thread.h @@ -1180,6 +1180,9 @@ class Thread { return false; } + // Set to the read barrier marking entrypoints to be non-null. + void SetReadBarrierEntrypoints(); + static jobject CreateCompileTimePeer(JNIEnv* env, const char* name, bool as_daemon, diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc index 2e0d866c21..b63eaa40ef 100644 --- a/runtime/thread_list.cc +++ b/runtime/thread_list.cc @@ -73,12 +73,17 @@ ThreadList::ThreadList(uint64_t thread_suspend_timeout_ns) unregistering_count_(0), suspend_all_historam_("suspend all histogram", 16, 64), long_suspend_(false), + shut_down_(false), thread_suspend_timeout_ns_(thread_suspend_timeout_ns), empty_checkpoint_barrier_(new Barrier(0)) { CHECK(Monitor::IsValidLockWord(LockWord::FromThinLockId(kMaxThreadId, 1, 0U))); } ThreadList::~ThreadList() { + CHECK(shut_down_); +} + +void ThreadList::ShutDown() { ScopedTrace trace(__PRETTY_FUNCTION__); // Detach the current thread if necessary. If we failed to start, there might not be any threads. // We need to detach the current thread here in case there's another thread waiting to join with @@ -102,6 +107,8 @@ ThreadList::~ThreadList() { // TODO: there's an unaddressed race here where a thread may attach during shutdown, see // Thread::Init. SuspendAllDaemonThreadsForShutdown(); + + shut_down_ = true; } bool ThreadList::Contains(Thread* thread) { @@ -1362,6 +1369,7 @@ void ThreadList::SuspendAllDaemonThreadsForShutdown() { void ThreadList::Register(Thread* self) { DCHECK_EQ(self, Thread::Current()); + CHECK(!shut_down_); if (VLOG_IS_ON(threads)) { std::ostringstream oss; @@ -1387,13 +1395,14 @@ void ThreadList::Register(Thread* self) { CHECK(!Contains(self)); list_.push_back(self); if (kUseReadBarrier) { + gc::collector::ConcurrentCopying* const cc = + Runtime::Current()->GetHeap()->ConcurrentCopyingCollector(); // Initialize according to the state of the CC collector. - bool is_gc_marking = - Runtime::Current()->GetHeap()->ConcurrentCopyingCollector()->IsMarking(); - self->SetIsGcMarkingAndUpdateEntrypoints(is_gc_marking); - bool weak_ref_access_enabled = - Runtime::Current()->GetHeap()->ConcurrentCopyingCollector()->IsWeakRefAccessEnabled(); - self->SetWeakRefAccessEnabled(weak_ref_access_enabled); + self->SetIsGcMarkingAndUpdateEntrypoints(cc->IsMarking()); + if (cc->IsUsingReadBarrierEntrypoints()) { + self->SetReadBarrierEntrypoints(); + } + self->SetWeakRefAccessEnabled(cc->IsWeakRefAccessEnabled()); } } diff --git a/runtime/thread_list.h b/runtime/thread_list.h index 70917eb0f7..14bef5e2b9 100644 --- a/runtime/thread_list.h +++ b/runtime/thread_list.h @@ -50,6 +50,8 @@ class ThreadList { explicit ThreadList(uint64_t thread_suspend_timeout_ns); ~ThreadList(); + void ShutDown(); + void DumpForSigQuit(std::ostream& os) REQUIRES(!Locks::thread_list_lock_, !Locks::mutator_lock_); // For thread suspend timeout dumps. @@ -219,6 +221,10 @@ class ThreadList { // Whether or not the current thread suspension is long. bool long_suspend_; + // Whether the shutdown function has been called. This is checked in the destructor. It is an + // error to destroy a ThreadList instance without first calling ShutDown(). + bool shut_down_; + // Thread suspension timeout in nanoseconds. const uint64_t thread_suspend_timeout_ns_; diff --git a/runtime/vdex_file.cc b/runtime/vdex_file.cc index 945f08b58a..842aa04dfb 100644 --- a/runtime/vdex_file.cc +++ b/runtime/vdex_file.cc @@ -21,11 +21,14 @@ #include <memory> #include "base/logging.h" +#include "base/stl_util.h" #include "base/unix_file/fd_file.h" #include "dex_file.h" +#include "dex_to_dex_decompiler.h" namespace art { +constexpr uint8_t VdexFile::Header::kVdexInvalidMagic[4]; constexpr uint8_t VdexFile::Header::kVdexMagic[4]; constexpr uint8_t VdexFile::Header::kVdexVersion[4]; @@ -54,6 +57,7 @@ VdexFile::Header::Header(uint32_t number_of_dex_files, std::unique_ptr<VdexFile> VdexFile::Open(const std::string& vdex_filename, bool writable, bool low_4gb, + bool unquicken, std::string* error_msg) { if (!OS::FileExists(vdex_filename.c_str())) { *error_msg = "File " + vdex_filename + " does not exist."; @@ -78,7 +82,7 @@ std::unique_ptr<VdexFile> VdexFile::Open(const std::string& vdex_filename, return nullptr; } - return Open(vdex_file->Fd(), vdex_length, vdex_filename, writable, low_4gb, error_msg); + return Open(vdex_file->Fd(), vdex_length, vdex_filename, writable, low_4gb, unquicken, error_msg); } std::unique_ptr<VdexFile> VdexFile::Open(int file_fd, @@ -86,15 +90,17 @@ std::unique_ptr<VdexFile> VdexFile::Open(int file_fd, const std::string& vdex_filename, bool writable, bool low_4gb, + bool unquicken, std::string* error_msg) { - std::unique_ptr<MemMap> mmap(MemMap::MapFile(vdex_length, - writable ? PROT_READ | PROT_WRITE : PROT_READ, - MAP_SHARED, - file_fd, - 0 /* start offset */, - low_4gb, - vdex_filename.c_str(), - error_msg)); + std::unique_ptr<MemMap> mmap(MemMap::MapFile( + vdex_length, + (writable || unquicken) ? PROT_READ | PROT_WRITE : PROT_READ, + unquicken ? MAP_PRIVATE : MAP_SHARED, + file_fd, + 0 /* start offset */, + low_4gb, + vdex_filename.c_str(), + error_msg)); if (mmap == nullptr) { *error_msg = "Failed to mmap file " + vdex_filename + " : " + *error_msg; return nullptr; @@ -106,6 +112,16 @@ std::unique_ptr<VdexFile> VdexFile::Open(int file_fd, return nullptr; } + if (unquicken) { + std::vector<std::unique_ptr<const DexFile>> unique_ptr_dex_files; + if (!vdex->OpenAllDexFiles(&unique_ptr_dex_files, error_msg)) { + return nullptr; + } + Unquicken(MakeNonOwningPointerVector(unique_ptr_dex_files), vdex->GetQuickeningInfo()); + // Update the quickening info size to pretend there isn't any. + reinterpret_cast<Header*>(vdex->mmap_->Begin())->quickening_info_size_ = 0; + } + *error_msg = "Success"; return vdex; } @@ -148,4 +164,62 @@ bool VdexFile::OpenAllDexFiles(std::vector<std::unique_ptr<const DexFile>>* dex_ return true; } +void VdexFile::Unquicken(const std::vector<const DexFile*>& dex_files, + const ArrayRef<const uint8_t>& quickening_info) { + if (quickening_info.size() == 0) { + // If there is no quickening info, we bail early, as the code below expects at + // least the size of quickening data for each method that has a code item. + return; + } + const uint8_t* quickening_info_ptr = quickening_info.data(); + const uint8_t* const quickening_info_end = quickening_info.data() + quickening_info.size(); + for (const DexFile* dex_file : dex_files) { + for (uint32_t i = 0; i < dex_file->NumClassDefs(); ++i) { + const DexFile::ClassDef& class_def = dex_file->GetClassDef(i); + const uint8_t* class_data = dex_file->GetClassData(class_def); + if (class_data == nullptr) { + continue; + } + ClassDataItemIterator it(*dex_file, class_data); + // Skip fields + while (it.HasNextStaticField()) { + it.Next(); + } + while (it.HasNextInstanceField()) { + it.Next(); + } + + while (it.HasNextDirectMethod()) { + const DexFile::CodeItem* code_item = it.GetMethodCodeItem(); + if (code_item != nullptr) { + uint32_t quickening_size = *reinterpret_cast<const uint32_t*>(quickening_info_ptr); + quickening_info_ptr += sizeof(uint32_t); + optimizer::ArtDecompileDEX(*code_item, + ArrayRef<const uint8_t>(quickening_info_ptr, quickening_size), + /* decompile_return_instruction */ false); + quickening_info_ptr += quickening_size; + } + it.Next(); + } + + while (it.HasNextVirtualMethod()) { + const DexFile::CodeItem* code_item = it.GetMethodCodeItem(); + if (code_item != nullptr) { + uint32_t quickening_size = *reinterpret_cast<const uint32_t*>(quickening_info_ptr); + quickening_info_ptr += sizeof(uint32_t); + optimizer::ArtDecompileDEX(*code_item, + ArrayRef<const uint8_t>(quickening_info_ptr, quickening_size), + /* decompile_return_instruction */ false); + quickening_info_ptr += quickening_size; + } + it.Next(); + } + DCHECK(!it.HasNext()); + } + } + if (quickening_info_ptr != quickening_info_end) { + LOG(FATAL) << "Failed to use all quickening info"; + } +} + } // namespace art diff --git a/runtime/vdex_file.h b/runtime/vdex_file.h index 9840555bbd..ece5491472 100644 --- a/runtime/vdex_file.h +++ b/runtime/vdex_file.h @@ -61,6 +61,8 @@ class VdexFile { uint32_t GetQuickeningInfoSize() const { return quickening_info_size_; } uint32_t GetNumberOfDexFiles() const { return number_of_dex_files_; } + static constexpr uint8_t kVdexInvalidMagic[] = { 'w', 'd', 'e', 'x' }; + private: static constexpr uint8_t kVdexMagic[] = { 'v', 'd', 'e', 'x' }; static constexpr uint8_t kVdexVersion[] = { '0', '0', '5', '\0' }; // access flags @@ -71,6 +73,8 @@ class VdexFile { uint32_t dex_size_; uint32_t verifier_deps_size_; uint32_t quickening_info_size_; + + friend class VdexFile; }; typedef uint32_t VdexChecksum; @@ -79,6 +83,7 @@ class VdexFile { static std::unique_ptr<VdexFile> Open(const std::string& vdex_filename, bool writable, bool low_4gb, + bool unquicken, std::string* error_msg); // Returns nullptr if the vdex file cannot be opened or is not valid. @@ -87,6 +92,7 @@ class VdexFile { const std::string& vdex_filename, bool writable, bool low_4gb, + bool unquicken, std::string* error_msg); const uint8_t* Begin() const { return mmap_->Begin(); } @@ -124,12 +130,14 @@ class VdexFile { return reinterpret_cast<const uint32_t*>(Begin() + sizeof(Header))[dex_file_index]; } - // Opens all the dex files contained in this vdex file. This is currently - // used for dumping tools only, and has not been tested for use by the - // remainder of the runtime. + // Opens all the dex files contained in this vdex file. bool OpenAllDexFiles(std::vector<std::unique_ptr<const DexFile>>* dex_files, std::string* error_msg); + // In-place unquicken the given `dex_files` based on `quickening_info`. + static void Unquicken(const std::vector<const DexFile*>& dex_files, + const ArrayRef<const uint8_t>& quickening_info); + private: explicit VdexFile(MemMap* mmap) : mmap_(mmap) {} diff --git a/runtime/vdex_file_test.cc b/runtime/vdex_file_test.cc index 909e117ccc..ced6e28577 100644 --- a/runtime/vdex_file_test.cc +++ b/runtime/vdex_file_test.cc @@ -36,10 +36,12 @@ TEST_F(VdexFileTest, OpenEmptyVdex) { tmp.GetFilename(), /*writable*/false, /*low_4gb*/false, + /*quicken*/false, &error_msg); EXPECT_TRUE(vdex == nullptr); - vdex = VdexFile::Open(tmp.GetFilename(), /*writable*/false, /*low_4gb*/false, &error_msg); + vdex = VdexFile::Open( + tmp.GetFilename(), /*writable*/false, /*low_4gb*/false, /*quicken*/ false, &error_msg); EXPECT_TRUE(vdex == nullptr); } diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc index 2b0c6127af..cb9c6052a2 100644 --- a/runtime/verifier/method_verifier.cc +++ b/runtime/verifier/method_verifier.cc @@ -3764,7 +3764,7 @@ const RegType& MethodVerifier::ResolveClassAndCheckAccess(dex::TypeIndex class_i const RegType& referrer = GetDeclaringClass(); if (!referrer.IsUnresolvedTypes() && !referrer.CanAccess(*result)) { Fail(VERIFY_ERROR_ACCESS_CLASS) << "illegal class access: '" - << referrer << "' -> '" << result << "'"; + << referrer << "' -> '" << *result << "'"; } } return *result; diff --git a/test/030-bad-finalizer/src/Main.java b/test/030-bad-finalizer/src/Main.java index 0e69a966f5..71167c146c 100644 --- a/test/030-bad-finalizer/src/Main.java +++ b/test/030-bad-finalizer/src/Main.java @@ -94,9 +94,7 @@ public class Main { /* spin for a bit */ long start, end; start = System.nanoTime(); - for (int i = 0; i < 1000000; i++) { - j++; - } + snooze(2000); end = System.nanoTime(); System.out.println("Finalizer done spinning."); diff --git a/test/117-nopatchoat/nopatchoat.cc b/test/117-nopatchoat/nopatchoat.cc index 3236bde5a2..2248fc4efd 100644 --- a/test/117-nopatchoat/nopatchoat.cc +++ b/test/117-nopatchoat/nopatchoat.cc @@ -56,7 +56,7 @@ class NoPatchoatTest { const OatFile* oat_file = oat_dex_file->GetOatFile(); return !oat_file->IsPic() - && CompilerFilter::IsBytecodeCompilationEnabled(oat_file->GetCompilerFilter()); + && CompilerFilter::IsAotCompilationEnabled(oat_file->GetCompilerFilter()); } }; diff --git a/test/121-modifiers/info.txt b/test/121-modifiers/info.txt index 129aee8ae6..335df53f3d 100644 --- a/test/121-modifiers/info.txt +++ b/test/121-modifiers/info.txt @@ -14,5 +14,5 @@ mv Inf.out classes/Inf.class mv NonInf.out classes/NonInf.class mv Main.class A.class A\$B.class A\$C.class classes/ dx --debug --dex --output=classes.dex classes -baksmali classes.dex +baksmali disassemble classes.dex mv out/*.smali smali/ diff --git a/test/157-void-class/run b/test/157-void-class/run index 59e852c8cd..8c6159fc4c 100755 --- a/test/157-void-class/run +++ b/test/157-void-class/run @@ -14,9 +14,9 @@ # See the License for the specific language governing permissions and # limitations under the License. -# Let the test build its own core image with --no-image and use verify-profile, +# Let the test build its own core image with --no-image and use verify, # so that the compiler does not try to initialize classes. This leaves the # java.lang.Void compile-time verified but uninitialized. ./default-run "$@" --no-image \ --runtime-option -Ximage-compiler-option \ - --runtime-option --compiler-filter=verify-profile + --runtime-option --compiler-filter=verify diff --git a/test/476-checker-ctor-memory-barrier/src/Main.java b/test/476-checker-ctor-memory-barrier/src/Main.java index 330aa7416e..a538f52fa6 100644 --- a/test/476-checker-ctor-memory-barrier/src/Main.java +++ b/test/476-checker-ctor-memory-barrier/src/Main.java @@ -17,8 +17,8 @@ // TODO: Add more tests after we can inline functions with calls. class ClassWithoutFinals { - /// CHECK-START: void ClassWithoutFinals.<init>() register (after) - /// CHECK-NOT: MemoryBarrier kind:StoreStore + /// CHECK-START: void ClassWithoutFinals.<init>() inliner (after) + /// CHECK-NOT: ConstructorFence public ClassWithoutFinals() {} } @@ -33,17 +33,40 @@ class ClassWithFinals { // should not inline this constructor } - /// CHECK-START: void ClassWithFinals.<init>() register (after) - /// CHECK: MemoryBarrier kind:StoreStore + /// CHECK-START: void ClassWithFinals.<init>() inliner (after) + /// CHECK: ConstructorFence /// CHECK-NEXT: ReturnVoid + + /* + * Check that the correct assembly instructions are selected for a Store/Store fence. + * + * - ARM variants: DMB ISHST (store-store fence for inner shareable domain) + * - Intel variants: no-op (store-store does not need a fence). + */ + + /// CHECK-START-ARM64: void ClassWithFinals.<init>() disassembly (after) + /// CHECK: ConstructorFence + /// CHECK-NEXT: dmb ishst + + /// CHECK-START-ARM: void ClassWithFinals.<init>() disassembly (after) + /// CHECK: ConstructorFence + /// CHECK-NEXT: dmb ishst + + /// CHECK-START-X86_64: void ClassWithFinals.<init>() disassembly (after) + /// CHECK: ConstructorFence + /// CHECK-NOT: {{[slm]}}fence + + /// CHECK-START-X86: void ClassWithFinals.<init>() disassembly (after) + /// CHECK: ConstructorFence + /// CHECK-NOT: {{[slm]}}fence public ClassWithFinals() { // Exactly one constructor barrier. x = 0; } - /// CHECK-START: void ClassWithFinals.<init>(int) register (after) - /// CHECK: MemoryBarrier kind:StoreStore - /// CHECK: MemoryBarrier kind:StoreStore + /// CHECK-START: void ClassWithFinals.<init>(int) inliner (after) + /// CHECK: ConstructorFence + /// CHECK: ConstructorFence /// CHECK-NEXT: ReturnVoid public ClassWithFinals(int x) { // This should have exactly two barriers: @@ -55,11 +78,11 @@ class ClassWithFinals { } class InheritFromClassWithFinals extends ClassWithFinals { - /// CHECK-START: void InheritFromClassWithFinals.<init>() register (after) - /// CHECK: MemoryBarrier kind:StoreStore + /// CHECK-START: void InheritFromClassWithFinals.<init>() inliner (after) + /// CHECK: ConstructorFence /// CHECK-NEXT: ReturnVoid - /// CHECK-START: void InheritFromClassWithFinals.<init>() register (after) + /// CHECK-START: void InheritFromClassWithFinals.<init>() inliner (after) /// CHECK-NOT: InvokeStaticOrDirect public InheritFromClassWithFinals() { // Should inline the super constructor. @@ -67,23 +90,23 @@ class InheritFromClassWithFinals extends ClassWithFinals { // Exactly one constructor barrier here. } - /// CHECK-START: void InheritFromClassWithFinals.<init>(boolean) register (after) + /// CHECK-START: void InheritFromClassWithFinals.<init>(boolean) inliner (after) /// CHECK: InvokeStaticOrDirect - /// CHECK-START: void InheritFromClassWithFinals.<init>(boolean) register (after) - /// CHECK-NOT: MemoryBarrier kind:StoreStore + /// CHECK-START: void InheritFromClassWithFinals.<init>(boolean) inliner (after) + /// CHECK-NOT: ConstructorFence public InheritFromClassWithFinals(boolean cond) { super(cond); // should not inline the super constructor } - /// CHECK-START: void InheritFromClassWithFinals.<init>(int) register (after) - /// CHECK: MemoryBarrier kind:StoreStore - /// CHECK: MemoryBarrier kind:StoreStore - /// CHECK-NOT: MemoryBarrier kind:StoreStore + /// CHECK-START: void InheritFromClassWithFinals.<init>(int) inliner (after) + /// CHECK: ConstructorFence + /// CHECK: ConstructorFence + /// CHECK-NOT: ConstructorFence /// CHECK: ReturnVoid - /// CHECK-START: void InheritFromClassWithFinals.<init>(int) register (after) + /// CHECK-START: void InheritFromClassWithFinals.<init>(int) inliner (after) /// CHECK-NOT: InvokeStaticOrDirect public InheritFromClassWithFinals(int unused) { // Should inline the super constructor and insert a memory barrier. @@ -96,21 +119,21 @@ class InheritFromClassWithFinals extends ClassWithFinals { class HaveFinalsAndInheritFromClassWithFinals extends ClassWithFinals { final int y; - /// CHECK-START: void HaveFinalsAndInheritFromClassWithFinals.<init>() register (after) - /// CHECK: MemoryBarrier kind:StoreStore - /// CHECK: MemoryBarrier kind:StoreStore + /// CHECK-START: void HaveFinalsAndInheritFromClassWithFinals.<init>() inliner (after) + /// CHECK: ConstructorFence + /// CHECK: ConstructorFence /// CHECK-NEXT: ReturnVoid - /// CHECK-START: void HaveFinalsAndInheritFromClassWithFinals.<init>() register (after) + /// CHECK-START: void HaveFinalsAndInheritFromClassWithFinals.<init>() inliner (after) /// CHECK-NOT: InvokeStaticOrDirect public HaveFinalsAndInheritFromClassWithFinals() { // Should inline the super constructor and keep the memory barrier. y = 0; } - /// CHECK-START: void HaveFinalsAndInheritFromClassWithFinals.<init>(boolean) register (after) + /// CHECK-START: void HaveFinalsAndInheritFromClassWithFinals.<init>(boolean) inliner (after) /// CHECK: InvokeStaticOrDirect - /// CHECK: MemoryBarrier kind:StoreStore + /// CHECK: ConstructorFence /// CHECK-NEXT: ReturnVoid public HaveFinalsAndInheritFromClassWithFinals(boolean cond) { super(cond); @@ -118,15 +141,15 @@ class HaveFinalsAndInheritFromClassWithFinals extends ClassWithFinals { y = 0; } - /// CHECK-START: void HaveFinalsAndInheritFromClassWithFinals.<init>(int) register (after) - /// CHECK: MemoryBarrier kind:StoreStore - /// CHECK: MemoryBarrier kind:StoreStore - /// CHECK: MemoryBarrier kind:StoreStore - /// CHECK: MemoryBarrier kind:StoreStore - /// CHECK: MemoryBarrier kind:StoreStore + /// CHECK-START: void HaveFinalsAndInheritFromClassWithFinals.<init>(int) inliner (after) + /// CHECK: ConstructorFence + /// CHECK: ConstructorFence + /// CHECK: ConstructorFence + /// CHECK: ConstructorFence + /// CHECK: ConstructorFence /// CHECK-NEXT: ReturnVoid - /// CHECK-START: void HaveFinalsAndInheritFromClassWithFinals.<init>(int) register (after) + /// CHECK-START: void HaveFinalsAndInheritFromClassWithFinals.<init>(int) inliner (after) /// CHECK-NOT: InvokeStaticOrDirect public HaveFinalsAndInheritFromClassWithFinals(int unused) { // Should inline the super constructor and keep keep both memory barriers. @@ -141,55 +164,55 @@ class HaveFinalsAndInheritFromClassWithFinals extends ClassWithFinals { public class Main { - /// CHECK-START: ClassWithFinals Main.noInlineNoConstructorBarrier() register (after) + /// CHECK-START: ClassWithFinals Main.noInlineNoConstructorBarrier() inliner (after) /// CHECK: InvokeStaticOrDirect - /// CHECK-START: ClassWithFinals Main.noInlineNoConstructorBarrier() register (after) - /// CHECK-NOT: MemoryBarrier kind:StoreStore + /// CHECK-START: ClassWithFinals Main.noInlineNoConstructorBarrier() inliner (after) + /// CHECK-NOT: ConstructorFence public static ClassWithFinals noInlineNoConstructorBarrier() { return new ClassWithFinals(false); // should not inline the constructor } - /// CHECK-START: void Main.inlineNew() register (after) - /// CHECK: MemoryBarrier kind:StoreStore + /// CHECK-START: void Main.inlineNew() inliner (after) + /// CHECK: ConstructorFence /// CHECK-NEXT: ReturnVoid - /// CHECK-START: void Main.inlineNew() register (after) + /// CHECK-START: void Main.inlineNew() inliner (after) /// CHECK-NOT: InvokeStaticOrDirect public static void inlineNew() { new ClassWithFinals(); } - /// CHECK-START: void Main.inlineNew1() register (after) - /// CHECK: MemoryBarrier kind:StoreStore + /// CHECK-START: void Main.inlineNew1() inliner (after) + /// CHECK: ConstructorFence /// CHECK-NEXT: ReturnVoid - /// CHECK-START: void Main.inlineNew1() register (after) + /// CHECK-START: void Main.inlineNew1() inliner (after) /// CHECK-NOT: InvokeStaticOrDirect public static void inlineNew1() { new InheritFromClassWithFinals(); } - /// CHECK-START: void Main.inlineNew2() register (after) - /// CHECK: MemoryBarrier kind:StoreStore - /// CHECK: MemoryBarrier kind:StoreStore + /// CHECK-START: void Main.inlineNew2() inliner (after) + /// CHECK: ConstructorFence + /// CHECK: ConstructorFence /// CHECK-NEXT: ReturnVoid - /// CHECK-START: void Main.inlineNew2() register (after) + /// CHECK-START: void Main.inlineNew2() inliner (after) /// CHECK-NOT: InvokeStaticOrDirect public static void inlineNew2() { new HaveFinalsAndInheritFromClassWithFinals(); } - /// CHECK-START: void Main.inlineNew3() register (after) - /// CHECK: MemoryBarrier kind:StoreStore - /// CHECK: MemoryBarrier kind:StoreStore - /// CHECK: MemoryBarrier kind:StoreStore - /// CHECK: MemoryBarrier kind:StoreStore + /// CHECK-START: void Main.inlineNew3() inliner (after) + /// CHECK: ConstructorFence + /// CHECK: ConstructorFence + /// CHECK: ConstructorFence + /// CHECK: ConstructorFence /// CHECK-NEXT: ReturnVoid - /// CHECK-START: void Main.inlineNew3() register (after) + /// CHECK-START: void Main.inlineNew3() inliner (after) /// CHECK-NOT: InvokeStaticOrDirect public static void inlineNew3() { new HaveFinalsAndInheritFromClassWithFinals(); diff --git a/test/530-checker-lse-ctor-fences/expected.txt b/test/530-checker-lse-ctor-fences/expected.txt new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/test/530-checker-lse-ctor-fences/expected.txt diff --git a/test/530-checker-lse-ctor-fences/info.txt b/test/530-checker-lse-ctor-fences/info.txt new file mode 100644 index 0000000000..ccc7b47de9 --- /dev/null +++ b/test/530-checker-lse-ctor-fences/info.txt @@ -0,0 +1 @@ +Checker test for testing load-store elimination with final fields (constructor fences). diff --git a/test/530-checker-lse-ctor-fences/src/Main.java b/test/530-checker-lse-ctor-fences/src/Main.java new file mode 100644 index 0000000000..7755875b65 --- /dev/null +++ b/test/530-checker-lse-ctor-fences/src/Main.java @@ -0,0 +1,191 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// This base class has a single final field; +// the constructor should have one fence. +class Circle { + Circle(double radius) { + this.radius = radius; + } + public double getRadius() { + return radius; + } + public double getArea() { + return radius * radius * Math.PI; + } + + public double getCircumference() { + return 2 * Math.PI * radius; + } + + private final double radius; +} + +// This subclass adds an extra final field; +// there should be an extra constructor fence added +// (for a total of 2 after inlining). +class Ellipse extends Circle { + Ellipse(double vertex, double covertex) { + super(vertex); + + this.covertex = covertex; + } + + public double getVertex() { + return getRadius(); + } + + public double getCovertex() { + return covertex; + } + + @Override + public double getArea() { + return getRadius() * covertex * Math.PI; + } + + private final double covertex; +} + +class CalcCircleAreaOrCircumference { + public static final int TYPE_AREA = 0; + public static final int TYPE_CIRCUMFERENCE = 1; + + double value; + + public CalcCircleAreaOrCircumference(int type) { + this.type = type; + } + + final int type; +} + +public class Main { + + /// CHECK-START: double Main.calcCircleArea(double) load_store_elimination (before) + /// CHECK: NewInstance + /// CHECK: InstanceFieldSet + /// CHECK: ConstructorFence + /// CHECK: InstanceFieldGet + + /// CHECK-START: double Main.calcCircleArea(double) load_store_elimination (after) + /// CHECK-NOT: NewInstance + /// CHECK-NOT: InstanceFieldSet + /// CHECK-NOT: ConstructorFence + /// CHECK-NOT: InstanceFieldGet + + // Make sure the constructor fence gets eliminated when the allocation is eliminated. + static double calcCircleArea(double radius) { + return new Circle(radius).getArea(); + } + + /// CHECK-START: double Main.calcEllipseArea(double, double) load_store_elimination (before) + /// CHECK: NewInstance + /// CHECK: InstanceFieldSet + /// CHECK: InstanceFieldSet + /// CHECK: ConstructorFence + /// CHECK: InstanceFieldGet + /// CHECK: InstanceFieldGet + + /// CHECK-START: double Main.calcEllipseArea(double, double) load_store_elimination (after) + /// CHECK-NOT: NewInstance + /// CHECK-NOT: InstanceFieldSet + /// CHECK-NOT: ConstructorFence + /// CHECK-NOT: InstanceFieldGet + + // Multiple constructor fences can accumulate through inheritance, make sure + // they are all eliminated when the allocation is eliminated. + static double calcEllipseArea(double vertex, double covertex) { + return new Ellipse(vertex, covertex).getArea(); + } + + /// CHECK-START: double Main.calcCircleAreaOrCircumference(double, boolean) load_store_elimination (before) + /// CHECK: NewInstance + /// CHECK: InstanceFieldSet + /// CHECK: ConstructorFence + /// CHECK: InstanceFieldGet + + /// CHECK-START: double Main.calcCircleAreaOrCircumference(double, boolean) load_store_elimination (after) + /// CHECK: NewInstance + /// CHECK-NOT: ConstructorFence + + // + // The object allocation will not be eliminated by LSE because of aliased stores. + // However the object is still a singleton, so it never escapes the current thread. + // There should not be a constructor fence here after LSE. + static double calcCircleAreaOrCircumference(double radius, boolean area_or_circumference) { + CalcCircleAreaOrCircumference calc = + new CalcCircleAreaOrCircumference( + area_or_circumference ? CalcCircleAreaOrCircumference.TYPE_AREA : + CalcCircleAreaOrCircumference.TYPE_CIRCUMFERENCE); + + if (area_or_circumference) { + // Area + calc.value = Math.PI * Math.PI * radius; + } else { + // Circumference + calc.value = 2 * Math.PI * radius; + } + + return calc.value; + } + + /// CHECK-START: Circle Main.makeCircle(double) load_store_elimination (after) + /// CHECK: NewInstance + /// CHECK: ConstructorFence + + // The object allocation is considered a singleton by LSE, + // but we cannot eliminate the new because it is returned. + // + // The constructor fence must also not be removed because the object could escape the + // current thread (in the caller). + static Circle makeCircle(double radius) { + return new Circle(radius); + } + + static void assertIntEquals(int result, int expected) { + if (expected != result) { + throw new Error("Expected: " + expected + ", found: " + result); + } + } + + static void assertFloatEquals(float result, float expected) { + if (expected != result) { + throw new Error("Expected: " + expected + ", found: " + result); + } + } + + static void assertDoubleEquals(double result, double expected) { + if (expected != result) { + throw new Error("Expected: " + expected + ", found: " + result); + } + } + + static void assertInstanceOf(Object result, Class<?> expected) { + if (result.getClass() != expected) { + throw new Error("Expected type: " + expected + ", found : " + result.getClass()); + } + } + + public static void main(String[] args) { + assertDoubleEquals(Math.PI * Math.PI * Math.PI, calcCircleArea(Math.PI)); + assertDoubleEquals(Math.PI * Math.PI * Math.PI, calcEllipseArea(Math.PI, Math.PI)); + assertDoubleEquals(2 * Math.PI * Math.PI, calcCircleAreaOrCircumference(Math.PI, false)); + assertInstanceOf(makeCircle(Math.PI), Circle.class); + } + + static boolean sFlag; +} diff --git a/test/530-checker-lse2/src/Main.java b/test/530-checker-lse2/src/Main.java index 0fe3d873ea..491a9a12de 100644 --- a/test/530-checker-lse2/src/Main.java +++ b/test/530-checker-lse2/src/Main.java @@ -76,16 +76,27 @@ public class Main { /// CHECK-DAG: Deoptimize /// CHECK-DAG: Deoptimize /// CHECK-DAG: NewInstance + /// CHECK-DAG: ConstructorFence /// CHECK-DAG: NewInstance + /// CHECK-DAG: ConstructorFence /// CHECK-DAG: NewInstance + /// CHECK-DAG: ConstructorFence /// CHECK-DAG: NewInstance + /// CHECK-DAG: ConstructorFence /// CHECK-DAG: NewInstance + /// CHECK-DAG: ConstructorFence /// CHECK-DAG: NewInstance + /// CHECK-DAG: ConstructorFence /// CHECK-DAG: NewInstance + /// CHECK-DAG: ConstructorFence /// CHECK-DAG: NewInstance + /// CHECK-DAG: ConstructorFence /// CHECK-DAG: NewInstance + /// CHECK-DAG: ConstructorFence /// CHECK-DAG: NewInstance + /// CHECK-DAG: ConstructorFence /// CHECK-DAG: NewInstance + /// CHECK-DAG: ConstructorFence /// CHECK-DAG: NewInstance /// CHECK-DAG: NewInstance /// CHECK-DAG: NewInstance @@ -95,9 +106,14 @@ public class Main { /// CHECK-DAG: Deoptimize /// CHECK-DAG: Deoptimize /// CHECK-NOT: NewInstance + /// CHECK-NOT: ConstructorFence private float testMethod() { { + // Each of the "new" statements here will initialize an object with final fields, + // which after inlining will also retain a constructor fence. + // + // After LSE we remove the 'new-instance' and the associated constructor fence. int lI0 = (-1456058746 << mI); mD = ((double)(int)(double) mD); for (int i0 = 56 - 1; i0 >= 0; i0--) { diff --git a/test/551-checker-shifter-operand/build b/test/551-checker-shifter-operand/build index a78021f349..027a0ea5cd 100644 --- a/test/551-checker-shifter-operand/build +++ b/test/551-checker-shifter-operand/build @@ -168,7 +168,7 @@ fi if [ "${HAS_SMALI}" = "true" ]; then # Compile Smali classes - ${SMALI} -JXmx512m ${SMALI_ARGS} --output smali_classes.dex `find smali -name '*.smali'` + ${SMALI} -JXmx512m assemble ${SMALI_ARGS} --output smali_classes.dex `find smali -name '*.smali'` # Don't bother with dexmerger if we provide our own main function in a smali file. if [ ${SKIP_DX_MERGER} = "false" ]; then diff --git a/test/551-checker-shifter-operand/src/Main.java b/test/551-checker-shifter-operand/src/Main.java index e9673987da..bf09a6aa5e 100644 --- a/test/551-checker-shifter-operand/src/Main.java +++ b/test/551-checker-shifter-operand/src/Main.java @@ -642,6 +642,123 @@ public class Main { // Each test line below should see one merge. + // + /// CHECK-START: void Main.$opt$validateShiftInt(int, int) instruction_simplifier$after_inlining (before) + /// CHECK: Shl + /// CHECK: Shl + /// CHECK: Shl + /// CHECK: Shl + /// CHECK: Shl + /// CHECK: Shl + /// CHECK: Shl + /// CHECK: Shl + /// CHECK: Shl + /// CHECK: Shl + /// CHECK: Shl + /// CHECK: Shl + /// CHECK-NOT: Shl + /// CHECK: Shr + /// CHECK: Shr + /// CHECK: Shr + /// CHECK: Shr + /// CHECK: Shr + /// CHECK: Shr + /// CHECK: Shr + /// CHECK: Shr + /// CHECK: Shr + /// CHECK: Shr + /// CHECK: Shr + /// CHECK: Shr + /// CHECK-NOT: Shl + /// CHECK: UShr + /// CHECK: UShr + /// CHECK: UShr + /// CHECK: UShr + /// CHECK: UShr + /// CHECK: UShr + /// CHECK: UShr + /// CHECK: UShr + /// CHECK: UShr + /// CHECK: UShr + /// CHECK: UShr + /// CHECK: UShr + /// CHECK-NOT: UShr + // + // Note: simplification after inlining removes `b << 32`, `b >> 32` and `b >>> 32`. + // + /// CHECK-START: void Main.$opt$validateShiftInt(int, int) instruction_simplifier$after_inlining (after) + /// CHECK: Shl + /// CHECK: Shl + /// CHECK: Shl + /// CHECK: Shl + /// CHECK: Shl + /// CHECK: Shl + /// CHECK: Shl + /// CHECK: Shl + /// CHECK: Shl + /// CHECK: Shl + /// CHECK: Shl + /// CHECK-NOT: Shl + /// CHECK: Shr + /// CHECK: Shr + /// CHECK: Shr + /// CHECK: Shr + /// CHECK: Shr + /// CHECK: Shr + /// CHECK: Shr + /// CHECK: Shr + /// CHECK: Shr + /// CHECK: Shr + /// CHECK: Shr + /// CHECK-NOT: Shl + /// CHECK: UShr + /// CHECK: UShr + /// CHECK: UShr + /// CHECK: UShr + /// CHECK: UShr + /// CHECK: UShr + /// CHECK: UShr + /// CHECK: UShr + /// CHECK: UShr + /// CHECK: UShr + /// CHECK: UShr + /// CHECK-NOT: UShr + // + // Note: simplification followed by GVN exposes the common subexpressions between shifts with larger distance + // `b << 62`, `b << 63` etc. and the equivalent smaller distances. + // + /// CHECK-START: void Main.$opt$validateShiftInt(int, int) GVN (after) + /// CHECK: Shl + /// CHECK: Shl + /// CHECK: Shl + /// CHECK: Shl + /// CHECK: Shl + /// CHECK: Shl + /// CHECK: Shl + /// CHECK: Shl + /// CHECK: Shl + /// CHECK-NOT: Shl + /// CHECK: Shr + /// CHECK: Shr + /// CHECK: Shr + /// CHECK: Shr + /// CHECK: Shr + /// CHECK: Shr + /// CHECK: Shr + /// CHECK: Shr + /// CHECK: Shr + /// CHECK-NOT: Shl + /// CHECK: UShr + /// CHECK: UShr + /// CHECK: UShr + /// CHECK: UShr + /// CHECK: UShr + /// CHECK: UShr + /// CHECK: UShr + /// CHECK: UShr + /// CHECK: UShr + /// CHECK-NOT: UShr + // /// CHECK-START-ARM: void Main.$opt$validateShiftInt(int, int) instruction_simplifier_arm (after) /// CHECK: DataProcWithShifterOp /// CHECK: DataProcWithShifterOp @@ -670,14 +787,7 @@ public class Main { /// CHECK: DataProcWithShifterOp /// CHECK: DataProcWithShifterOp /// CHECK: DataProcWithShifterOp - /// CHECK: DataProcWithShifterOp - /// CHECK: DataProcWithShifterOp - /// CHECK: DataProcWithShifterOp - /// CHECK: DataProcWithShifterOp - /// CHECK: DataProcWithShifterOp - /// CHECK: DataProcWithShifterOp /// CHECK-NOT: DataProcWithShifterOp - // Note: `b << 32`, `b >> 32` and `b >>> 32` are optimized away by generic simplifier. /// CHECK-START-ARM: void Main.$opt$validateShiftInt(int, int) instruction_simplifier_arm (after) /// CHECK-NOT: Shl @@ -712,14 +822,7 @@ public class Main { /// CHECK: DataProcWithShifterOp /// CHECK: DataProcWithShifterOp /// CHECK: DataProcWithShifterOp - /// CHECK: DataProcWithShifterOp - /// CHECK: DataProcWithShifterOp - /// CHECK: DataProcWithShifterOp - /// CHECK: DataProcWithShifterOp - /// CHECK: DataProcWithShifterOp - /// CHECK: DataProcWithShifterOp /// CHECK-NOT: DataProcWithShifterOp - // Note: `b << 32`, `b >> 32` and `b >>> 32` are optimized away by generic simplifier. /// CHECK-START-ARM64: void Main.$opt$validateShiftInt(int, int) instruction_simplifier_arm64 (after) /// CHECK-NOT: Shl diff --git a/test/569-checker-pattern-replacement/src/Main.java b/test/569-checker-pattern-replacement/src/Main.java index 345e9fd222..26d87b1f8a 100644 --- a/test/569-checker-pattern-replacement/src/Main.java +++ b/test/569-checker-pattern-replacement/src/Main.java @@ -331,7 +331,7 @@ public class Main { /// CHECK-START: double Main.constructBase() inliner (after) /// CHECK-NOT: InvokeStaticOrDirect - /// CHECK-NOT: MemoryBarrier + /// CHECK-NOT: ConstructorFence /// CHECK-NOT: InstanceFieldSet public static double constructBase() { @@ -347,7 +347,7 @@ public class Main { /// CHECK-START: double Main.constructBase(int) inliner (after) /// CHECK-NOT: InvokeStaticOrDirect - /// CHECK-NOT: MemoryBarrier + /// CHECK-NOT: ConstructorFence /// CHECK-START: double Main.constructBase(int) inliner (after) /// CHECK-DAG: <<Value:i\d+>> ParameterValue @@ -371,7 +371,7 @@ public class Main { /// CHECK-START: double Main.constructBaseWith0() inliner (after) /// CHECK-NOT: InvokeStaticOrDirect - /// CHECK-NOT: MemoryBarrier + /// CHECK-NOT: ConstructorFence /// CHECK-NOT: InstanceFieldSet public static double constructBaseWith0() { @@ -387,7 +387,7 @@ public class Main { /// CHECK-START: java.lang.String Main.constructBase(java.lang.String) inliner (after) /// CHECK-NOT: InvokeStaticOrDirect - /// CHECK-NOT: MemoryBarrier + /// CHECK-NOT: ConstructorFence /// CHECK-START: java.lang.String Main.constructBase(java.lang.String) inliner (after) /// CHECK-DAG: <<Value:l\d+>> ParameterValue @@ -411,7 +411,7 @@ public class Main { /// CHECK-START: java.lang.String Main.constructBaseWithNullString() inliner (after) /// CHECK-NOT: InvokeStaticOrDirect - /// CHECK-NOT: MemoryBarrier + /// CHECK-NOT: ConstructorFence /// CHECK-START: java.lang.String Main.constructBaseWithNullString() inliner (after) /// CHECK-NOT: InstanceFieldSet @@ -431,7 +431,7 @@ public class Main { /// CHECK-START: double Main.constructBase(double, java.lang.Object) inliner (after) /// CHECK-NOT: InvokeStaticOrDirect - /// CHECK-NOT: MemoryBarrier + /// CHECK-NOT: ConstructorFence /// CHECK-START: double Main.constructBase(double, java.lang.Object) inliner (after) /// CHECK-DAG: <<DValue:d\d+>> ParameterValue @@ -460,7 +460,7 @@ public class Main { /// CHECK-START: double Main.constructBase(int, double, java.lang.Object) inliner (after) /// CHECK-NOT: InvokeStaticOrDirect - /// CHECK-NOT: MemoryBarrier + /// CHECK-NOT: ConstructorFence /// CHECK-START: double Main.constructBase(int, double, java.lang.Object) inliner (after) /// CHECK-DAG: <<IValue:i\d+>> ParameterValue @@ -493,7 +493,7 @@ public class Main { /// CHECK-START: double Main.constructBaseWith0DoubleNull(double) inliner (after) /// CHECK-NOT: InvokeStaticOrDirect - /// CHECK-NOT: MemoryBarrier + /// CHECK-NOT: ConstructorFence /// CHECK-START: double Main.constructBaseWith0DoubleNull(double) inliner (after) /// CHECK-DAG: <<DValue:d\d+>> ParameterValue @@ -543,7 +543,7 @@ public class Main { /// CHECK-START: double Main.constructBase(double) inliner (after) /// CHECK-NOT: InvokeStaticOrDirect - /// CHECK-NOT: MemoryBarrier + /// CHECK-NOT: ConstructorFence /// CHECK-START: double Main.constructBase(double) inliner (after) /// CHECK-DAG: <<Value:d\d+>> ParameterValue @@ -567,7 +567,7 @@ public class Main { /// CHECK-START: double Main.constructBaseWith0d() inliner (after) /// CHECK-NOT: InvokeStaticOrDirect - /// CHECK-NOT: MemoryBarrier + /// CHECK-NOT: ConstructorFence /// CHECK-NOT: InstanceFieldSet public static double constructBaseWith0d() { @@ -605,7 +605,7 @@ public class Main { /// CHECK-START: double Main.constructBase(int, long) inliner (after) /// CHECK-NOT: InvokeStaticOrDirect - /// CHECK-NOT: MemoryBarrier + /// CHECK-NOT: ConstructorFence /// CHECK-START: double Main.constructBase(int, long) inliner (after) /// CHECK-DAG: <<IValue:i\d+>> ParameterValue @@ -628,7 +628,7 @@ public class Main { /// CHECK-START: double Main.constructDerived() inliner (after) /// CHECK-NOT: InvokeStaticOrDirect - /// CHECK-NOT: MemoryBarrier + /// CHECK-NOT: ConstructorFence /// CHECK-NOT: InstanceFieldSet public static double constructDerived() { @@ -644,7 +644,7 @@ public class Main { /// CHECK-START: double Main.constructDerived(int) inliner (after) /// CHECK-NOT: InvokeStaticOrDirect - /// CHECK-NOT: MemoryBarrier + /// CHECK-NOT: ConstructorFence /// CHECK-START: double Main.constructDerived(int) inliner (after) /// CHECK-DAG: <<Value:i\d+>> ParameterValue @@ -668,7 +668,7 @@ public class Main { /// CHECK-START: double Main.constructDerivedWith0() inliner (after) /// CHECK-NOT: InvokeStaticOrDirect - /// CHECK-NOT: MemoryBarrier + /// CHECK-NOT: ConstructorFence /// CHECK-NOT: InstanceFieldSet public static double constructDerivedWith0() { @@ -684,7 +684,7 @@ public class Main { /// CHECK-START: java.lang.String Main.constructDerived(java.lang.String) inliner (after) /// CHECK-NOT: InvokeStaticOrDirect - /// CHECK-NOT: MemoryBarrier + /// CHECK-NOT: ConstructorFence /// CHECK-START: java.lang.String Main.constructDerived(java.lang.String) inliner (after) /// CHECK-NOT: InstanceFieldSet @@ -702,7 +702,7 @@ public class Main { /// CHECK-START: double Main.constructDerived(double) inliner (after) /// CHECK-NOT: InvokeStaticOrDirect - /// CHECK-NOT: MemoryBarrier + /// CHECK-NOT: ConstructorFence /// CHECK-START: double Main.constructDerived(double) inliner (after) /// CHECK-DAG: <<Value:d\d+>> ParameterValue @@ -726,7 +726,7 @@ public class Main { /// CHECK-START: double Main.constructDerivedWith0d() inliner (after) /// CHECK-NOT: InvokeStaticOrDirect - /// CHECK-NOT: MemoryBarrier + /// CHECK-NOT: ConstructorFence /// CHECK-NOT: InstanceFieldSet public static double constructDerivedWith0d() { @@ -744,7 +744,7 @@ public class Main { /// CHECK-START: double Main.constructDerived(int, double, java.lang.Object) inliner (after) /// CHECK-NOT: InvokeStaticOrDirect - /// CHECK-NOT: MemoryBarrier + /// CHECK-NOT: ConstructorFence /// CHECK-START: double Main.constructDerived(int, double, java.lang.Object) inliner (after) /// CHECK-DAG: <<DValue:d\d+>> ParameterValue @@ -794,7 +794,7 @@ public class Main { /// CHECK-START: double Main.constructDerived(float) inliner (after) /// CHECK-NOT: InvokeStaticOrDirect - /// CHECK-NOT: MemoryBarrier + /// CHECK-NOT: ConstructorFence /// CHECK-START: double Main.constructDerived(float) inliner (after) /// CHECK-DAG: <<Value:f\d+>> ParameterValue @@ -821,7 +821,7 @@ public class Main { /// CHECK-START: double Main.constructDerived(int, double, java.lang.Object, float) inliner (after) /// CHECK-NOT: InvokeStaticOrDirect - /// CHECK-NOT: MemoryBarrier + /// CHECK-NOT: ConstructorFence /// CHECK-START: double Main.constructDerived(int, double, java.lang.Object, float) inliner (after) /// CHECK-DAG: <<IValue:i\d+>> ParameterValue @@ -852,7 +852,7 @@ public class Main { /// CHECK-START: int Main.constructBaseWithFinalField() inliner (after) /// CHECK-NOT: InvokeStaticOrDirect - /// CHECK-NOT: MemoryBarrier + /// CHECK-NOT: ConstructorFence /// CHECK-NOT: InstanceFieldSet public static int constructBaseWithFinalField() { @@ -873,7 +873,7 @@ public class Main { /// CHECK-DAG: <<Value:i\d+>> ParameterValue /// CHECK-DAG: <<Obj:l\d+>> NewInstance /// CHECK-DAG: InstanceFieldSet [<<Obj>>,<<Value>>] - /// CHECK-DAG: MemoryBarrier + /// CHECK-DAG: ConstructorFence /// CHECK-START: int Main.constructBaseWithFinalField(int) inliner (after) /// CHECK-DAG: InstanceFieldSet @@ -892,7 +892,7 @@ public class Main { /// CHECK-START: int Main.constructBaseWithFinalFieldWith0() inliner (after) /// CHECK-NOT: InvokeStaticOrDirect - /// CHECK-NOT: MemoryBarrier + /// CHECK-NOT: ConstructorFence /// CHECK-NOT: InstanceFieldSet public static int constructBaseWithFinalFieldWith0() { @@ -907,7 +907,7 @@ public class Main { /// CHECK-START: double Main.constructDerivedWithFinalField() inliner (after) /// CHECK-NOT: InvokeStaticOrDirect - /// CHECK-NOT: MemoryBarrier + /// CHECK-NOT: ConstructorFence /// CHECK-NOT: InstanceFieldSet public static double constructDerivedWithFinalField() { @@ -928,7 +928,7 @@ public class Main { /// CHECK-DAG: <<Value:i\d+>> ParameterValue /// CHECK-DAG: <<Obj:l\d+>> NewInstance /// CHECK-DAG: InstanceFieldSet [<<Obj>>,<<Value>>] - /// CHECK-DAG: MemoryBarrier + /// CHECK-DAG: ConstructorFence /// CHECK-START: double Main.constructDerivedWithFinalField(int) inliner (after) /// CHECK-DAG: InstanceFieldSet @@ -947,7 +947,7 @@ public class Main { /// CHECK-START: double Main.constructDerivedWithFinalFieldWith0() inliner (after) /// CHECK-NOT: InvokeStaticOrDirect - /// CHECK-NOT: MemoryBarrier + /// CHECK-NOT: ConstructorFence /// CHECK-NOT: InstanceFieldSet public static double constructDerivedWithFinalFieldWith0() { @@ -968,7 +968,7 @@ public class Main { /// CHECK-DAG: <<Value:d\d+>> ParameterValue /// CHECK-DAG: <<Obj:l\d+>> NewInstance /// CHECK-DAG: InstanceFieldSet [<<Obj>>,<<Value>>] - /// CHECK-DAG: MemoryBarrier + /// CHECK-DAG: ConstructorFence /// CHECK-START: double Main.constructDerivedWithFinalField(double) inliner (after) /// CHECK-DAG: InstanceFieldSet @@ -987,7 +987,7 @@ public class Main { /// CHECK-START: double Main.constructDerivedWithFinalFieldWith0d() inliner (after) /// CHECK-NOT: InvokeStaticOrDirect - /// CHECK-NOT: MemoryBarrier + /// CHECK-NOT: ConstructorFence /// CHECK-NOT: InstanceFieldSet public static double constructDerivedWithFinalFieldWith0d() { @@ -1009,7 +1009,7 @@ public class Main { /// CHECK-DAG: <<Value:d\d+>> ParameterValue /// CHECK-DAG: <<Obj:l\d+>> NewInstance /// CHECK-DAG: InstanceFieldSet [<<Obj>>,<<Value>>] - /// CHECK-DAG: MemoryBarrier + /// CHECK-DAG: ConstructorFence /// CHECK-START: double Main.constructDerivedWithFinalField(int, double) inliner (after) /// CHECK-DAG: InstanceFieldSet @@ -1017,8 +1017,8 @@ public class Main { /// CHECK-NOT: InstanceFieldSet /// CHECK-START: double Main.constructDerivedWithFinalField(int, double) inliner (after) - /// CHECK-DAG: MemoryBarrier - /// CHECK-NOT: MemoryBarrier + /// CHECK-DAG: ConstructorFence + /// CHECK-NOT: ConstructorFence public static double constructDerivedWithFinalField(int intValue, double doubleValue) { DerivedWithFinalField d = new DerivedWithFinalField(intValue, doubleValue); @@ -1034,7 +1034,7 @@ public class Main { /// CHECK-START: double Main.constructDerivedWithFinalFieldWith0And0d() inliner (after) /// CHECK-NOT: InvokeStaticOrDirect - /// CHECK-NOT: MemoryBarrier + /// CHECK-NOT: ConstructorFence /// CHECK-NOT: InstanceFieldSet public static double constructDerivedWithFinalFieldWith0And0d() { @@ -1049,7 +1049,7 @@ public class Main { /// CHECK-START: int Main.constructDerivedInSecondDex() inliner (after) /// CHECK-NOT: InvokeStaticOrDirect - /// CHECK-NOT: MemoryBarrier + /// CHECK-NOT: ConstructorFence /// CHECK-NOT: InstanceFieldSet public static int constructDerivedInSecondDex() { @@ -1070,7 +1070,7 @@ public class Main { /// CHECK-DAG: InvokeStaticOrDirect [<<Obj>>,<<Value>>{{(,[ij]\d+)?}}] method_name:DerivedInSecondDex.<init> /// CHECK-START: int Main.constructDerivedInSecondDex(int) inliner (after) - /// CHECK-NOT: MemoryBarrier + /// CHECK-NOT: ConstructorFence /// CHECK-NOT: InstanceFieldSet public static int constructDerivedInSecondDex(int intValue) { @@ -1091,7 +1091,7 @@ public class Main { /// CHECK-DAG: InvokeStaticOrDirect [<<Obj>>,<<Value>>{{(,[ij]\d+)?}}] method_name:DerivedInSecondDex.<init> /// CHECK-START: int Main.constructDerivedInSecondDexWith0() inliner (after) - /// CHECK-NOT: MemoryBarrier + /// CHECK-NOT: ConstructorFence /// CHECK-NOT: InstanceFieldSet public static int constructDerivedInSecondDexWith0() { @@ -1107,7 +1107,7 @@ public class Main { /// CHECK-START: int Main.constructDerivedInSecondDex(long) inliner (after) /// CHECK-NOT: InvokeStaticOrDirect - /// CHECK-NOT: MemoryBarrier + /// CHECK-NOT: ConstructorFence /// CHECK-NOT: InstanceFieldSet public static int constructDerivedInSecondDex(long dummy) { diff --git a/test/595-profile-saving/run b/test/595-profile-saving/run index 068ad03ce0..fce6ac15d8 100644 --- a/test/595-profile-saving/run +++ b/test/595-profile-saving/run @@ -15,13 +15,13 @@ # limitations under the License. # Use -# --compiler-filter=interpret-only to make sure that the test is not compiled AOT +# --compiler-filter=quicken to make sure that the test is not compiled AOT # and to make sure the test is not compiled when loaded (by PathClassLoader) # -Xjitsaveprofilinginfo to enable profile saving # -Xusejit:false to disable jit and only test profiles. exec ${RUN} \ - -Xcompiler-option --compiler-filter=interpret-only \ - --runtime-option '-Xcompiler-option --compiler-filter=interpret-only' \ + -Xcompiler-option --compiler-filter=quicken \ + --runtime-option '-Xcompiler-option --compiler-filter=quicken' \ --runtime-option -Xjitsaveprofilinginfo \ --runtime-option -Xusejit:false \ "${@}" diff --git a/test/618-checker-induction/src/Main.java b/test/618-checker-induction/src/Main.java index 2d9daf1d43..0080ffa464 100644 --- a/test/618-checker-induction/src/Main.java +++ b/test/618-checker-induction/src/Main.java @@ -468,6 +468,19 @@ public class Main { return sum; } + // Ensure double induction does not "overshoot" the subscript range. + private static int getIncr2(int[] arr) { + for (int i = 0; i < 12; ) { + arr[i++] = 30; + arr[i++] = 29; + } + int sum = 0; + for (int i = 0; i < 12; i++) { + sum += arr[i]; + } + return sum; + } + // TODO: handle as closed/empty eventually? static int mainIndexReturnedN(int n) { int i; @@ -869,6 +882,7 @@ public class Main { expectEquals(1, periodicReturned9()); expectEquals(0, periodicReturned10()); expectEquals(21, getSum21()); + expectEquals(354, getIncr2(new int[12])); for (int n = -4; n < 4; n++) { int tc = (n <= 0) ? 0 : n; expectEquals(tc, mainIndexReturnedN(n)); diff --git a/test/623-checker-loop-regressions/src/Main.java b/test/623-checker-loop-regressions/src/Main.java index 2b30986ab3..520e7c367c 100644 --- a/test/623-checker-loop-regressions/src/Main.java +++ b/test/623-checker-loop-regressions/src/Main.java @@ -280,7 +280,17 @@ public class Main { } } - // If vectorized, string encoding should be dealt with. + /// CHECK-START: void Main.string2Bytes(char[], java.lang.String) loop_optimization (before) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: void Main.string2Bytes(char[], java.lang.String) loop_optimization (after) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: VecLoad loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecStore loop:<<Loop>> outer_loop:none + // + // NOTE: should correctly deal with compressed and uncompressed cases. private static void string2Bytes(char[] a, String b) { int min = Math.min(a.length, b.length()); for (int i = 0; i < min; i++) { @@ -310,6 +320,37 @@ public class Main { } } + /// CHECK-START: void Main.oneBoth(short[], char[]) loop_optimization (before) + /// CHECK-DAG: <<One:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<One>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<One>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: void Main.oneBoth(short[], char[]) loop_optimization (after) + /// CHECK-DAG: <<One:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<One>>] loop:none + /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Repl>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Repl>>] loop:<<Loop>> outer_loop:none + // + // Bug b/37764324: integral same-length packed types can be mixed freely. + private static void oneBoth(short[] a, char[] b) { + for (int i = 0; i < Math.min(a.length, b.length); i++) { + a[i] = 1; + b[i] = 1; + } + } + + // Bug b/37768917: potential dynamic BCE vs. loop optimizations + // case should be deal with correctly (used to DCHECK fail). + private static void arrayInTripCount(int[] a, byte[] b, int n) { + for (int k = 0; k < n; k++) { + for (int i = 0, u = a[0]; i < u; i++) { + b[i] += 2; + } + } + } + public static void main(String[] args) { expectEquals(10, earlyExitFirst(-1)); for (int i = 0; i <= 10; i++) { @@ -390,9 +431,28 @@ public class Main { for (int i = 0; i < aa.length; i++) { expectEquals(aa[i], bb.charAt(i)); } + String cc = "\u1010\u2020llo world how are y\u3030\u4040"; + string2Bytes(aa, cc); + for (int i = 0; i < aa.length; i++) { + expectEquals(aa[i], cc.charAt(i)); + } envUsesInCond(); + short[] dd = new short[23]; + oneBoth(dd, aa); + for (int i = 0; i < aa.length; i++) { + expectEquals(aa[i], 1); + expectEquals(dd[i], 1); + } + + xx[0] = 10; + byte[] bt = new byte[10]; + arrayInTripCount(xx, bt, 20); + for (int i = 0; i < bt.length; i++) { + expectEquals(40, bt[i]); + } + System.out.println("passed"); } diff --git a/test/628-vdex/run b/test/628-vdex/run index 4cbcea3b7e..bf0ac910c1 100644 --- a/test/628-vdex/run +++ b/test/628-vdex/run @@ -14,4 +14,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -exec ${RUN} -Xcompiler-option --compiler-filter=verify-profile --vdex "${@}" +exec ${RUN} -Xcompiler-option --compiler-filter=verify --vdex "${@}" diff --git a/test/634-vdex-duplicate/run b/test/634-vdex-duplicate/run index 1ccb84150b..571ccd90e1 100644 --- a/test/634-vdex-duplicate/run +++ b/test/634-vdex-duplicate/run @@ -14,4 +14,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -exec ${RUN} -Xcompiler-option --compiler-filter=verify-profile --vdex-filter speed --vdex "${@}" +exec ${RUN} -Xcompiler-option --compiler-filter=verify --vdex-filter speed --vdex "${@}" diff --git a/test/640-checker-byte-simd/src/Main.java b/test/640-checker-byte-simd/src/Main.java index 0f7452b045..10b20b83b0 100644 --- a/test/640-checker-byte-simd/src/Main.java +++ b/test/640-checker-byte-simd/src/Main.java @@ -179,6 +179,11 @@ public class Main { a[i] >>>= 33; // 1, since & 31 } + static void shl9() { + for (int i = 0; i < 128; i++) + a[i] <<= 9; // yields all-zeros + } + // // Loop bounds. // @@ -259,6 +264,10 @@ public class Main { shr33(); for (int i = 0; i < 128; i++) { expectEquals((byte) 0x09, a[i], "shr33"); + } + shl9(); + for (int i = 0; i < 128; i++) { + expectEquals((byte) 0x00, a[i], "shl9"); a[i] = (byte) 0xf0; // reset } not(); diff --git a/test/640-checker-int-simd/src/Main.java b/test/640-checker-int-simd/src/Main.java index ba1e142668..97048eb951 100644 --- a/test/640-checker-int-simd/src/Main.java +++ b/test/640-checker-int-simd/src/Main.java @@ -76,6 +76,7 @@ public class Main { /// CHECK-DAG: ArraySet loop:<<Loop>> outer_loop:none // /// CHECK-START: void Main.div(int) loop_optimization (after) + /// CHECK-NOT: VecDiv // // Not supported on any architecture. // @@ -159,14 +160,81 @@ public class Main { // Shift sanity. // + // Expose constants to optimizing compiler, but not to front-end. + public static int $opt$inline$IntConstant32() { return 32; } + public static int $opt$inline$IntConstant33() { return 33; } + public static int $opt$inline$IntConstantMinus254() { return -254; } + + /// CHECK-START: void Main.shr32() instruction_simplifier$after_inlining (before) + /// CHECK-DAG: <<Dist:i\d+>> IntConstant 32 loop:none + /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Get:i\d+>> ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<UShr:i\d+>> UShr [<<Get>>,<<Dist>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet [{{l\d+}},{{i\d+}},<<UShr>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-START: void Main.shr32() instruction_simplifier$after_inlining (after) + /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Get:i\d+>> ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet [{{l\d+}},{{i\d+}},<<Get>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: void Main.shr32() loop_optimization (after) + /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Get:d\d+>> VecLoad loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Get>>] loop:<<Loop>> outer_loop:none static void shr32() { + // TODO: remove a[i] = a[i] altogether? for (int i = 0; i < 128; i++) - a[i] >>>= 32; // 0, since & 31 + a[i] >>>= $opt$inline$IntConstant32(); // 0, since & 31 } + /// CHECK-START: void Main.shr33() instruction_simplifier$after_inlining (before) + /// CHECK-DAG: <<Dist:i\d+>> IntConstant 33 loop:none + /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Get:i\d+>> ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<UShr:i\d+>> UShr [<<Get>>,<<Dist>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet [{{l\d+}},{{i\d+}},<<UShr>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-START: void Main.shr33() instruction_simplifier$after_inlining (after) + /// CHECK-DAG: <<Dist:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Get:i\d+>> ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<UShr:i\d+>> UShr [<<Get>>,<<Dist>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet [{{l\d+}},{{i\d+}},<<UShr>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: void Main.shr33() loop_optimization (after) + /// CHECK-DAG: <<Dist:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Get:d\d+>> VecLoad loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<UShr:d\d+>> VecUShr [<<Get>>,<<Dist>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<UShr>>] loop:<<Loop>> outer_loop:none static void shr33() { for (int i = 0; i < 128; i++) - a[i] >>>= 33; // 1, since & 31 + a[i] >>>= $opt$inline$IntConstant33(); // 1, since & 31 + } + + /// CHECK-START: void Main.shrMinus254() instruction_simplifier$after_inlining (before) + /// CHECK-DAG: <<Dist:i\d+>> IntConstant -254 loop:none + /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Get:i\d+>> ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<UShr:i\d+>> UShr [<<Get>>,<<Dist>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet [{{l\d+}},{{i\d+}},<<UShr>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-START: void Main.shrMinus254() instruction_simplifier$after_inlining (after) + /// CHECK-DAG: <<Dist:i\d+>> IntConstant 2 loop:none + /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Get:i\d+>> ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<UShr:i\d+>> UShr [<<Get>>,<<Dist>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet [{{l\d+}},{{i\d+}},<<UShr>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: void Main.shrMinus254() loop_optimization (after) + /// CHECK-DAG: <<Dist:i\d+>> IntConstant 2 loop:none + /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Get:d\d+>> VecLoad loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<UShr:d\d+>> VecUShr [<<Get>>,<<Dist>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<UShr>>] loop:<<Loop>> outer_loop:none + static void shrMinus254() { + for (int i = 0; i < 128; i++) + a[i] >>>= $opt$inline$IntConstantMinus254(); // 2, since & 31 } // @@ -240,9 +308,14 @@ public class Main { for (int i = 0; i < 128; i++) { expectEquals(0x1fffffff, a[i], "shr33"); } + shrMinus254(); + for (int i = 0; i < 128; i++) { + expectEquals(0x07ffffff, a[i], "shrMinus254"); + } + // Bit-wise not operator. not(); for (int i = 0; i < 128; i++) { - expectEquals(0xe0000000, a[i], "not"); + expectEquals(0xf8000000, a[i], "not"); } // Done. System.out.println("passed"); diff --git a/test/640-checker-long-simd/src/Main.java b/test/640-checker-long-simd/src/Main.java index 56411821f1..e42c716d19 100644 --- a/test/640-checker-long-simd/src/Main.java +++ b/test/640-checker-long-simd/src/Main.java @@ -74,6 +74,7 @@ public class Main { /// CHECK-DAG: ArraySet loop:<<Loop>> outer_loop:none // /// CHECK-START: void Main.div(long) loop_optimization (after) + /// CHECK-NOT: VecDiv // // Not supported on any architecture. // @@ -157,14 +158,81 @@ public class Main { // Shift sanity. // + // Expose constants to optimizing compiler, but not to front-end. + public static int $opt$inline$IntConstant64() { return 64; } + public static int $opt$inline$IntConstant65() { return 65; } + public static int $opt$inline$IntConstantMinus254() { return -254; } + + /// CHECK-START: void Main.shr64() instruction_simplifier$after_inlining (before) + /// CHECK-DAG: <<Dist:i\d+>> IntConstant 64 loop:none + /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Get:j\d+>> ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<UShr:j\d+>> UShr [<<Get>>,<<Dist>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet [{{l\d+}},{{i\d+}},<<UShr>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-START: void Main.shr64() instruction_simplifier$after_inlining (after) + /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Get:j\d+>> ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet [{{l\d+}},{{i\d+}},<<Get>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: void Main.shr64() loop_optimization (after) + /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Get:d\d+>> VecLoad loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Get>>] loop:<<Loop>> outer_loop:none static void shr64() { + // TODO: remove a[i] = a[i] altogether? for (int i = 0; i < 128; i++) - a[i] >>>= 64; // 0, since & 63 + a[i] >>>= $opt$inline$IntConstant64(); // 0, since & 63 } + /// CHECK-START: void Main.shr65() instruction_simplifier$after_inlining (before) + /// CHECK-DAG: <<Dist:i\d+>> IntConstant 65 loop:none + /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Get:j\d+>> ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<UShr:j\d+>> UShr [<<Get>>,<<Dist>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet [{{l\d+}},{{i\d+}},<<UShr>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-START: void Main.shr65() instruction_simplifier$after_inlining (after) + /// CHECK-DAG: <<Dist:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Get:j\d+>> ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<UShr:j\d+>> UShr [<<Get>>,<<Dist>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet [{{l\d+}},{{i\d+}},<<UShr>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: void Main.shr65() loop_optimization (after) + /// CHECK-DAG: <<Dist:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Get:d\d+>> VecLoad loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<UShr:d\d+>> VecUShr [<<Get>>,<<Dist>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<UShr>>] loop:<<Loop>> outer_loop:none static void shr65() { for (int i = 0; i < 128; i++) - a[i] >>>= 65; // 1, since & 63 + a[i] >>>= $opt$inline$IntConstant65(); // 1, since & 63 + } + + /// CHECK-START: void Main.shrMinus254() instruction_simplifier$after_inlining (before) + /// CHECK-DAG: <<Dist:i\d+>> IntConstant -254 loop:none + /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Get:j\d+>> ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<UShr:j\d+>> UShr [<<Get>>,<<Dist>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet [{{l\d+}},{{i\d+}},<<UShr>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-START: void Main.shrMinus254() instruction_simplifier$after_inlining (after) + /// CHECK-DAG: <<Dist:i\d+>> IntConstant 2 loop:none + /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Get:j\d+>> ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<UShr:j\d+>> UShr [<<Get>>,<<Dist>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet [{{l\d+}},{{i\d+}},<<UShr>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: void Main.shrMinus254() loop_optimization (after) + /// CHECK-DAG: <<Dist:i\d+>> IntConstant 2 loop:none + /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Get:d\d+>> VecLoad loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<UShr:d\d+>> VecUShr [<<Get>>,<<Dist>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<UShr>>] loop:<<Loop>> outer_loop:none + static void shrMinus254() { + for (int i = 0; i < 128; i++) + a[i] >>>= $opt$inline$IntConstantMinus254(); // 2, since & 63 } // @@ -238,9 +306,14 @@ public class Main { for (int i = 0; i < 128; i++) { expectEquals(0x1fffffffffffffffL, a[i], "shr65"); } + shrMinus254(); + for (int i = 0; i < 128; i++) { + expectEquals(0x07ffffffffffffffL, a[i], "shrMinus254"); + } + // Bit-wise not operator. not(); for (int i = 0; i < 128; i++) { - expectEquals(0xe000000000000000L, a[i], "not"); + expectEquals(0xf800000000000000L, a[i], "not"); } // Done. System.out.println("passed"); diff --git a/test/648-inline-caches-unresolved/expected.txt b/test/648-inline-caches-unresolved/expected.txt new file mode 100644 index 0000000000..4e6a4384c5 --- /dev/null +++ b/test/648-inline-caches-unresolved/expected.txt @@ -0,0 +1 @@ +Subclass diff --git a/test/648-inline-caches-unresolved/info.txt b/test/648-inline-caches-unresolved/info.txt new file mode 100644 index 0000000000..8fc604281c --- /dev/null +++ b/test/648-inline-caches-unresolved/info.txt @@ -0,0 +1 @@ +Test for inlining with inline cache into an unresolved method. diff --git a/test/648-inline-caches-unresolved/profile b/test/648-inline-caches-unresolved/profile new file mode 100644 index 0000000000..92c0a41cab --- /dev/null +++ b/test/648-inline-caches-unresolved/profile @@ -0,0 +1 @@ +LMain;->inlineMonomorphicUnresolvedSuper(Ljava/lang/Object;)Ljava/lang/String;+LSubclass; diff --git a/test/648-inline-caches-unresolved/run b/test/648-inline-caches-unresolved/run new file mode 100644 index 0000000000..fb70d22867 --- /dev/null +++ b/test/648-inline-caches-unresolved/run @@ -0,0 +1,17 @@ +#!/bin/bash +# +# Copyright (C) 2017 The Android Open Source Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +exec ${RUN} $@ --secondary --profile diff --git a/test/648-inline-caches-unresolved/src-dex2oat-unresolved/UnresolvedSuperClass.java b/test/648-inline-caches-unresolved/src-dex2oat-unresolved/UnresolvedSuperClass.java new file mode 100644 index 0000000000..dd3be00633 --- /dev/null +++ b/test/648-inline-caches-unresolved/src-dex2oat-unresolved/UnresolvedSuperClass.java @@ -0,0 +1,21 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public class UnresolvedSuperClass { + public void superMethod() { + System.out.println("UnresolvedClass.superMethod()"); + } +} diff --git a/test/648-inline-caches-unresolved/src/Main.java b/test/648-inline-caches-unresolved/src/Main.java new file mode 100644 index 0000000000..4e8aeec171 --- /dev/null +++ b/test/648-inline-caches-unresolved/src/Main.java @@ -0,0 +1,31 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public class Main extends UnresolvedSuperClass { + public static String inlineMonomorphicUnresolvedSuper(Object o) { + return o.toString(); + } + + public static void main(String[] args) { + System.out.println(inlineMonomorphicUnresolvedSuper(new Subclass())); + } +} + +class Subclass { + public String toString() { + return "Subclass"; + } +} diff --git a/test/649-vdex-duplicate-method/classes.dex b/test/649-vdex-duplicate-method/classes.dex Binary files differnew file mode 100644 index 0000000000..8036a2f896 --- /dev/null +++ b/test/649-vdex-duplicate-method/classes.dex diff --git a/test/649-vdex-duplicate-method/expected.txt b/test/649-vdex-duplicate-method/expected.txt new file mode 100644 index 0000000000..573541ac97 --- /dev/null +++ b/test/649-vdex-duplicate-method/expected.txt @@ -0,0 +1 @@ +0 diff --git a/test/649-vdex-duplicate-method/info.txt b/test/649-vdex-duplicate-method/info.txt new file mode 100644 index 0000000000..d2c995914b --- /dev/null +++ b/test/649-vdex-duplicate-method/info.txt @@ -0,0 +1 @@ +Regression test for unquickening a vdex that has duplicate methods. diff --git a/test/650-checker-inline-access-thunks/expected.txt b/test/650-checker-inline-access-thunks/expected.txt new file mode 100644 index 0000000000..d81cc0710e --- /dev/null +++ b/test/650-checker-inline-access-thunks/expected.txt @@ -0,0 +1 @@ +42 diff --git a/test/650-checker-inline-access-thunks/info.txt b/test/650-checker-inline-access-thunks/info.txt new file mode 100644 index 0000000000..e1a1eb275c --- /dev/null +++ b/test/650-checker-inline-access-thunks/info.txt @@ -0,0 +1 @@ +Test that access thunks for nested classes are inlined. diff --git a/test/650-checker-inline-access-thunks/src/Main.java b/test/650-checker-inline-access-thunks/src/Main.java new file mode 100644 index 0000000000..17f581910e --- /dev/null +++ b/test/650-checker-inline-access-thunks/src/Main.java @@ -0,0 +1,60 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public class Main { + public static void main(String[] args) { + Main m = new Main(); + Nested n = new Nested(); + n.$noinline$setPrivateIntField(m, 42); + System.out.println(n.$noinline$getPrivateIntField(m)); + } + + private int privateIntField; + + private static class Nested { + /// CHECK-START: void Main$Nested.$noinline$setPrivateIntField(Main, int) inliner (before) + /// CHECK: InvokeStaticOrDirect + + /// CHECK-START: void Main$Nested.$noinline$setPrivateIntField(Main, int) inliner (before) + /// CHECK-NOT: InstanceFieldSet + + /// CHECK-START: void Main$Nested.$noinline$setPrivateIntField(Main, int) inliner (after) + /// CHECK-NOT: InvokeStaticOrDirect + + /// CHECK-START: void Main$Nested.$noinline$setPrivateIntField(Main, int) inliner (after) + /// CHECK: InstanceFieldSet + + public void $noinline$setPrivateIntField(Main m, int value) { + m.privateIntField = value; + } + + /// CHECK-START: int Main$Nested.$noinline$getPrivateIntField(Main) inliner (before) + /// CHECK: InvokeStaticOrDirect + + /// CHECK-START: int Main$Nested.$noinline$getPrivateIntField(Main) inliner (before) + /// CHECK-NOT: InstanceFieldGet + + /// CHECK-START: int Main$Nested.$noinline$getPrivateIntField(Main) inliner (after) + /// CHECK-NOT: InvokeStaticOrDirect + + /// CHECK-START: int Main$Nested.$noinline$getPrivateIntField(Main) inliner (after) + /// CHECK: InstanceFieldGet + + public int $noinline$getPrivateIntField(Main m) { + return m.privateIntField; + } + } +} diff --git a/test/901-hello-ti-agent/basics.cc b/test/901-hello-ti-agent/basics.cc index 8695e0c371..21dcf98ba7 100644 --- a/test/901-hello-ti-agent/basics.cc +++ b/test/901-hello-ti-agent/basics.cc @@ -176,5 +176,22 @@ extern "C" JNIEXPORT jboolean JNICALL Java_art_Test901_checkUnattached( return res == JVMTI_ERROR_UNATTACHED_THREAD; } +extern "C" JNIEXPORT jstring JNICALL Java_art_Test901_getErrorName( + JNIEnv* env, jclass Main_klass ATTRIBUTE_UNUSED, jint error) { + char* name; + jvmtiError res = jvmti_env->GetErrorName(static_cast<jvmtiError>(error), &name); + if (JvmtiErrorToException(env, jvmti_env, res)) { + return nullptr; + } + + jstring ret_string = env->NewStringUTF(name); + jvmtiError dealloc = jvmti_env->Deallocate(reinterpret_cast<unsigned char*>(name)); + if (JvmtiErrorToException(env, jvmti_env, dealloc)) { + return nullptr; + } + + return ret_string; +} + } // namespace Test901HelloTi } // namespace art diff --git a/test/901-hello-ti-agent/expected.txt b/test/901-hello-ti-agent/expected.txt index eb5b6a2f93..4177ffc4dc 100644 --- a/test/901-hello-ti-agent/expected.txt +++ b/test/901-hello-ti-agent/expected.txt @@ -10,4 +10,67 @@ Received expected error for unattached JVMTI calls 4 8 JVMTI_ERROR_ILLEGAL_ARGUMENT +1 times JVMTI_ERROR_ILLEGAL_ARGUMENT +0 = JVMTI_ERROR_NONE +9 times JVMTI_ERROR_ILLEGAL_ARGUMENT +10 = JVMTI_ERROR_INVALID_THREAD +11 = JVMTI_ERROR_INVALID_THREAD_GROUP +12 = JVMTI_ERROR_INVALID_PRIORITY +13 = JVMTI_ERROR_THREAD_NOT_SUSPENDED +14 = JVMTI_ERROR_THREAD_SUSPENDED +15 = JVMTI_ERROR_THREAD_NOT_ALIVE +4 times JVMTI_ERROR_ILLEGAL_ARGUMENT +20 = JVMTI_ERROR_INVALID_OBJECT +21 = JVMTI_ERROR_INVALID_CLASS +22 = JVMTI_ERROR_CLASS_NOT_PREPARED +23 = JVMTI_ERROR_INVALID_METHODID +24 = JVMTI_ERROR_INVALID_LOCATION +25 = JVMTI_ERROR_INVALID_FIELDID +5 times JVMTI_ERROR_ILLEGAL_ARGUMENT +31 = JVMTI_ERROR_NO_MORE_FRAMES +32 = JVMTI_ERROR_OPAQUE_FRAME +1 times JVMTI_ERROR_ILLEGAL_ARGUMENT +34 = JVMTI_ERROR_TYPE_MISMATCH +35 = JVMTI_ERROR_INVALID_SLOT +4 times JVMTI_ERROR_ILLEGAL_ARGUMENT +40 = JVMTI_ERROR_DUPLICATE +41 = JVMTI_ERROR_NOT_FOUND +8 times JVMTI_ERROR_ILLEGAL_ARGUMENT +50 = JVMTI_ERROR_INVALID_MONITOR +51 = JVMTI_ERROR_NOT_MONITOR_OWNER +52 = JVMTI_ERROR_INTERRUPT +7 times JVMTI_ERROR_ILLEGAL_ARGUMENT +60 = JVMTI_ERROR_INVALID_CLASS_FORMAT +61 = JVMTI_ERROR_CIRCULAR_CLASS_DEFINITION +62 = JVMTI_ERROR_FAILS_VERIFICATION +63 = JVMTI_ERROR_UNSUPPORTED_REDEFINITION_METHOD_ADDED +64 = JVMTI_ERROR_UNSUPPORTED_REDEFINITION_SCHEMA_CHANGED +65 = JVMTI_ERROR_INVALID_TYPESTATE +66 = JVMTI_ERROR_UNSUPPORTED_REDEFINITION_HIERARCHY_CHANGED +67 = JVMTI_ERROR_UNSUPPORTED_REDEFINITION_METHOD_DELETED +68 = JVMTI_ERROR_UNSUPPORTED_VERSION +69 = JVMTI_ERROR_NAMES_DONT_MATCH +70 = JVMTI_ERROR_UNSUPPORTED_REDEFINITION_CLASS_MODIFIERS_CHANGED +71 = JVMTI_ERROR_UNSUPPORTED_REDEFINITION_METHOD_MODIFIERS_CHANGED +7 times JVMTI_ERROR_ILLEGAL_ARGUMENT +79 = JVMTI_ERROR_UNMODIFIABLE_CLASS +18 times JVMTI_ERROR_ILLEGAL_ARGUMENT +98 = JVMTI_ERROR_NOT_AVAILABLE +99 = JVMTI_ERROR_MUST_POSSESS_CAPABILITY +100 = JVMTI_ERROR_NULL_POINTER +101 = JVMTI_ERROR_ABSENT_INFORMATION +102 = JVMTI_ERROR_INVALID_EVENT_TYPE +103 = JVMTI_ERROR_ILLEGAL_ARGUMENT +104 = JVMTI_ERROR_NATIVE_METHOD +1 times JVMTI_ERROR_ILLEGAL_ARGUMENT +106 = JVMTI_ERROR_CLASS_LOADER_UNSUPPORTED +3 times JVMTI_ERROR_ILLEGAL_ARGUMENT +110 = JVMTI_ERROR_OUT_OF_MEMORY +111 = JVMTI_ERROR_ACCESS_DENIED +112 = JVMTI_ERROR_WRONG_PHASE +113 = JVMTI_ERROR_INTERNAL +1 times JVMTI_ERROR_ILLEGAL_ARGUMENT +115 = JVMTI_ERROR_UNATTACHED_THREAD +116 = JVMTI_ERROR_INVALID_ENVIRONMENT +1 times JVMTI_ERROR_ILLEGAL_ARGUMENT VMDeath diff --git a/test/901-hello-ti-agent/src/art/Test901.java b/test/901-hello-ti-agent/src/art/Test901.java index eef2188612..7d853a7d51 100644 --- a/test/901-hello-ti-agent/src/art/Test901.java +++ b/test/901-hello-ti-agent/src/art/Test901.java @@ -32,6 +32,8 @@ public class Test901 { set(2); // CLASS set(4); // JNI set(8); // Error. + + testErrorNames(); } private static void set(int i) { @@ -44,7 +46,39 @@ public class Test901 { } } + private static void testErrorNames() { + int consecutiveErrors = 0; + String lastError = null; + for (int i = -1; i <= 117; i++) { + String errorName = null; + String error = null; + try { + errorName = getErrorName(i); + } catch (RuntimeException e) { + error = e.getMessage(); + } + + if (lastError != null && + (errorName != null || (error != null && !lastError.equals(error)))) { + System.out.println(consecutiveErrors + " times " + lastError); + lastError = null; + consecutiveErrors = 0; + } + + if (errorName != null) { + System.out.println(i + " = " + errorName); + } else { + lastError = error; + consecutiveErrors++; + } + } + if (consecutiveErrors > 0) { + System.out.println(consecutiveErrors + " times " + lastError); + } + } + private static native boolean checkLivePhase(); private static native void setVerboseFlag(int flag, boolean value); private static native boolean checkUnattached(); + private static native String getErrorName(int error); } diff --git a/test/911-get-stack-trace/src/art/PrintThread.java b/test/911-get-stack-trace/src/art/PrintThread.java index f50a66b963..fee5ba00ab 100644 --- a/test/911-get-stack-trace/src/art/PrintThread.java +++ b/test/911-get-stack-trace/src/art/PrintThread.java @@ -41,7 +41,8 @@ public class PrintThread { // We have to ignore some threads when printing all stack traces. These are threads that may or // may not exist depending on the environment. public final static String IGNORE_THREAD_NAME_REGEX = - "Binder:|RenderThread|hwuiTask|Jit thread pool worker|Instr:|JDWP|Profile Saver|main"; + "Binder:|RenderThread|hwuiTask|Jit thread pool worker|Instr:|JDWP|Profile Saver|main|" + + "queued-work-looper"; public final static Matcher IGNORE_THREADS = Pattern.compile(IGNORE_THREAD_NAME_REGEX).matcher(""); @@ -88,4 +89,4 @@ public class PrintThread { } public static native String[][] getStackTrace(Thread thread, int start, int max); -}
\ No newline at end of file +} diff --git a/test/912-classes/src/art/Test912Art.java b/test/912-classes/src/art/Test912Art.java index 6da3cadefe..a1e7ff2005 100644 --- a/test/912-classes/src/art/Test912Art.java +++ b/test/912-classes/src/art/Test912Art.java @@ -39,7 +39,7 @@ public class Test912Art { // run in configurations where dex2oat didn't verify the class itself. So explicitly // check whether the class has been already loaded, and skip then. // TODO: Add multiple configurations to the run script once that becomes easier to do. - if (hasJit() && !isLoadedClass("art.Test912Art$ClassD")) { + if (hasJit() && !isLoadedClass("Lart/Test912Art$ClassD;")) { testClassEventsJit(); } } diff --git a/test/913-heaps/expected.txt b/test/913-heaps/expected.txt index 702b247819..b128d1cb70 100644 --- a/test/913-heaps/expected.txt +++ b/test/913-heaps/expected.txt @@ -385,3 +385,10 @@ root@root --(thread)--> 1@1000 [size=16, length=-1] 5@1002 --(field@10)--> 1@1000 [size=16, length=-1] 5@1002 --(field@9)--> 6@1000 [size=16, length=-1] --- + +default +image +zygote +app + +3 diff --git a/test/913-heaps/heaps.cc b/test/913-heaps/heaps.cc index e319f7d98c..f39c5f16d7 100644 --- a/test/913-heaps/heaps.cc +++ b/test/913-heaps/heaps.cc @@ -817,5 +817,192 @@ extern "C" JNIEXPORT jint JNICALL Java_art_Test913_getGcFinishes(JNIEnv* env ATT return result; } +using GetObjectHeapId = jvmtiError(*)(jvmtiEnv*, jlong, jint*, ...); +static GetObjectHeapId gGetObjectHeapIdFn = nullptr; + +using GetHeapName = jvmtiError(*)(jvmtiEnv*, jint, char**, ...); +static GetHeapName gGetHeapNameFn = nullptr; + +static void FreeExtensionFunctionInfo(jvmtiExtensionFunctionInfo* extensions, jint count) { + for (size_t i = 0; i != static_cast<size_t>(count); ++i) { + jvmti_env->Deallocate(reinterpret_cast<unsigned char*>(extensions[i].id)); + jvmti_env->Deallocate(reinterpret_cast<unsigned char*>(extensions[i].short_description)); + for (size_t j = 0; j != static_cast<size_t>(extensions[i].param_count); ++j) { + jvmti_env->Deallocate(reinterpret_cast<unsigned char*>(extensions[i].params[j].name)); + } + jvmti_env->Deallocate(reinterpret_cast<unsigned char*>(extensions[i].params)); + jvmti_env->Deallocate(reinterpret_cast<unsigned char*>(extensions[i].errors)); + } +} + +extern "C" JNIEXPORT void JNICALL Java_art_Test913_checkForExtensionApis( + JNIEnv* env, jclass klass ATTRIBUTE_UNUSED) { + jint extension_count; + jvmtiExtensionFunctionInfo* extensions; + jvmtiError result = jvmti_env->GetExtensionFunctions(&extension_count, &extensions); + if (JvmtiErrorToException(env, jvmti_env, result)) { + return; + } + + for (size_t i = 0; i != static_cast<size_t>(extension_count); ++i) { + if (strcmp("com.android.art.heap.get_object_heap_id", extensions[i].id) == 0) { + CHECK(gGetObjectHeapIdFn == nullptr); + gGetObjectHeapIdFn = reinterpret_cast<GetObjectHeapId>(extensions[i].func); + + CHECK_EQ(extensions[i].param_count, 2); + + CHECK_EQ(strcmp("tag", extensions[i].params[0].name), 0); + CHECK_EQ(extensions[i].params[0].base_type, JVMTI_TYPE_JLONG); + CHECK_EQ(extensions[i].params[0].kind, JVMTI_KIND_IN); + + CHECK_EQ(strcmp("heap_id", extensions[i].params[1].name), 0); + CHECK_EQ(extensions[i].params[1].base_type, JVMTI_TYPE_JINT); + CHECK_EQ(extensions[i].params[1].kind, JVMTI_KIND_OUT); + CHECK_EQ(extensions[i].params[1].null_ok, false); + + CHECK_EQ(extensions[i].error_count, 1); + CHECK(extensions[i].errors != nullptr); + CHECK(extensions[i].errors[0] == JVMTI_ERROR_NOT_FOUND); + + continue; + } + + if (strcmp("com.android.art.heap.get_heap_name", extensions[i].id) == 0) { + CHECK(gGetHeapNameFn == nullptr); + gGetHeapNameFn = reinterpret_cast<GetHeapName>(extensions[i].func); + + CHECK_EQ(extensions[i].param_count, 2); + + CHECK_EQ(strcmp("heap_id", extensions[i].params[0].name), 0); + CHECK_EQ(extensions[i].params[0].base_type, JVMTI_TYPE_JINT); + CHECK_EQ(extensions[i].params[0].kind, JVMTI_KIND_IN); + + CHECK_EQ(strcmp("heap_name", extensions[i].params[1].name), 0); + CHECK_EQ(extensions[i].params[1].base_type, JVMTI_TYPE_CCHAR); + CHECK_EQ(extensions[i].params[1].kind, JVMTI_KIND_ALLOC_BUF); + CHECK_EQ(extensions[i].params[1].null_ok, false); + + CHECK_EQ(extensions[i].error_count, 1); + CHECK(extensions[i].errors != nullptr); + CHECK(extensions[i].errors[0] == JVMTI_ERROR_ILLEGAL_ARGUMENT); + } + } + + CHECK(gGetObjectHeapIdFn != nullptr); + CHECK(gGetHeapNameFn != nullptr); + + FreeExtensionFunctionInfo(extensions, extension_count); +} + +extern "C" JNIEXPORT jint JNICALL Java_art_Test913_getObjectHeapId( + JNIEnv* env, jclass klass ATTRIBUTE_UNUSED, jlong tag) { + CHECK(gGetObjectHeapIdFn != nullptr); + jint heap_id; + jvmtiError result = gGetObjectHeapIdFn(jvmti_env, tag, &heap_id); + JvmtiErrorToException(env, jvmti_env, result); + return heap_id; +} + +extern "C" JNIEXPORT jstring JNICALL Java_art_Test913_getHeapName( + JNIEnv* env, jclass klass ATTRIBUTE_UNUSED, jint heap_id) { + CHECK(gGetHeapNameFn != nullptr); + char* heap_name; + jvmtiError result = gGetHeapNameFn(jvmti_env, heap_id, &heap_name); + if (JvmtiErrorToException(env, jvmti_env, result)) { + return nullptr; + } + jstring ret = env->NewStringUTF(heap_name); + jvmti_env->Deallocate(reinterpret_cast<unsigned char*>(heap_name)); + return ret; +} + +extern "C" JNIEXPORT void JNICALL Java_art_Test913_checkGetObjectHeapIdInCallback( + JNIEnv* env, jclass klass ATTRIBUTE_UNUSED, jlong tag, jint heap_id) { + CHECK(gGetObjectHeapIdFn != nullptr); + + { + struct GetObjectHeapIdCallbacks { + static jint JNICALL FollowReferencesCallback( + jvmtiHeapReferenceKind reference_kind ATTRIBUTE_UNUSED, + const jvmtiHeapReferenceInfo* reference_info ATTRIBUTE_UNUSED, + jlong class_tag ATTRIBUTE_UNUSED, + jlong referrer_class_tag ATTRIBUTE_UNUSED, + jlong size ATTRIBUTE_UNUSED, + jlong* tag_ptr, + jlong* referrer_tag_ptr ATTRIBUTE_UNUSED, + jint length ATTRIBUTE_UNUSED, + void* user_data) { + if (*tag_ptr != 0) { + GetObjectHeapIdCallbacks* p = reinterpret_cast<GetObjectHeapIdCallbacks*>(user_data); + if (*tag_ptr == p->check_callback_tag) { + jint tag_heap_id; + jvmtiError result = gGetObjectHeapIdFn(jvmti_env, *tag_ptr, &tag_heap_id); + CHECK_EQ(result, JVMTI_ERROR_NONE); + CHECK_EQ(tag_heap_id, p->check_callback_id); + return JVMTI_VISIT_ABORT; + } + } + + return JVMTI_VISIT_OBJECTS; // Continue visiting. + } + + jlong check_callback_tag; + jint check_callback_id; + }; + + jvmtiHeapCallbacks callbacks; + memset(&callbacks, 0, sizeof(jvmtiHeapCallbacks)); + callbacks.heap_reference_callback = GetObjectHeapIdCallbacks::FollowReferencesCallback; + + GetObjectHeapIdCallbacks ffc; + ffc.check_callback_tag = tag; + ffc.check_callback_id = heap_id; + + jvmtiError ret = jvmti_env->FollowReferences(0, nullptr, nullptr, &callbacks, &ffc); + if (JvmtiErrorToException(env, jvmti_env, ret)) { + return; + } + } + + { + struct GetObjectHeapIdCallbacks { + static jint JNICALL HeapIterationCallback(jlong class_tag ATTRIBUTE_UNUSED, + jlong size ATTRIBUTE_UNUSED, + jlong* tag_ptr, + jint length ATTRIBUTE_UNUSED, + void* user_data) { + if (*tag_ptr != 0) { + GetObjectHeapIdCallbacks* p = reinterpret_cast<GetObjectHeapIdCallbacks*>(user_data); + if (*tag_ptr == p->check_callback_tag) { + jint tag_heap_id; + jvmtiError result = gGetObjectHeapIdFn(jvmti_env, *tag_ptr, &tag_heap_id); + CHECK_EQ(result, JVMTI_ERROR_NONE); + CHECK_EQ(tag_heap_id, p->check_callback_id); + return JVMTI_VISIT_ABORT; + } + } + + return 0; // Continue visiting. + } + + jlong check_callback_tag; + jint check_callback_id; + }; + + jvmtiHeapCallbacks callbacks; + memset(&callbacks, 0, sizeof(jvmtiHeapCallbacks)); + callbacks.heap_iteration_callback = GetObjectHeapIdCallbacks::HeapIterationCallback; + + GetObjectHeapIdCallbacks ffc; + ffc.check_callback_tag = tag; + ffc.check_callback_id = heap_id; + + jvmtiError ret = jvmti_env->IterateThroughHeap(0, nullptr, &callbacks, &ffc); + if (JvmtiErrorToException(env, jvmti_env, ret)) { + return; + } + } +} + } // namespace Test913Heaps } // namespace art diff --git a/test/913-heaps/src/art/Test913.java b/test/913-heaps/src/art/Test913.java index 8800b1a4d7..6694aad868 100644 --- a/test/913-heaps/src/art/Test913.java +++ b/test/913-heaps/src/art/Test913.java @@ -16,6 +16,9 @@ package art; +import java.io.BufferedReader; +import java.io.File; +import java.io.FileReader; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; @@ -44,6 +47,8 @@ public class Test913 { }; t.start(); cdl1.await(); + + doExtensionTests(); } public static void runFollowReferences() throws Exception { @@ -215,6 +220,50 @@ public class Test913 { System.out.println(getTag(floatObject)); } + static ArrayList<Object> extensionTestHolder; + + private static void doExtensionTests() { + checkForExtensionApis(); + + extensionTestHolder = new ArrayList<>(); + System.out.println(); + + try { + getHeapName(-1); + System.out.println("Expected failure for -1"); + } catch (Exception e) { + } + System.out.println(getHeapName(0)); + System.out.println(getHeapName(1)); + System.out.println(getHeapName(2)); + System.out.println(getHeapName(3)); + try { + getHeapName(4); + System.out.println("Expected failure for -1"); + } catch (Exception e) { + } + + System.out.println(); + + setTag(Object.class, 100000); + int objectClassHeapId = getObjectHeapId(100000); + int objClassExpectedHeapId = hasImage() ? 1 : 3; + if (objectClassHeapId != objClassExpectedHeapId) { + throw new RuntimeException("Expected object class in heap " + objClassExpectedHeapId + + " but received " + objectClassHeapId); + } + + A a = new A(); + extensionTestHolder.add(a); + setTag(a, 100001); + System.out.println(getObjectHeapId(100001)); + + checkGetObjectHeapIdInCallback(100000, objClassExpectedHeapId); + checkGetObjectHeapIdInCallback(100001, 3); + + extensionTestHolder = null; + } + private static void runGc() { clearStats(); forceGarbageCollection(); @@ -233,6 +282,24 @@ public class Test913 { System.out.println((s > 0) + " " + (f > 0)); } + private static boolean hasImage() { + try { + int pid = Integer.parseInt(new File("/proc/self").getCanonicalFile().getName()); + BufferedReader reader = new BufferedReader(new FileReader("/proc/" + pid + "/maps")); + String line; + while ((line = reader.readLine()) != null) { + if (line.endsWith(".art")) { + reader.close(); + return true; + } + } + reader.close(); + return false; + } catch (Exception e) { + throw new RuntimeException(e); + } + } + private static class TestConfig { private Class<?> klass = null; private int heapFilter = 0; @@ -642,6 +709,11 @@ public class Test913 { private static native int getGcFinishes(); private static native void forceGarbageCollection(); + private static native void checkForExtensionApis(); + private static native int getObjectHeapId(long tag); + private static native String getHeapName(int heapId); + private static native void checkGetObjectHeapIdInCallback(long tag, int heapId); + public static native String[] followReferences(int heapFilter, Class<?> klassFilter, Object initialObject, int stopAfter, int followSet, Object jniRef); public static native String[] followReferencesString(Object initialObject); diff --git a/test/etc/default-build b/test/etc/default-build index 744c38bb6d..0508b85529 100755 --- a/test/etc/default-build +++ b/test/etc/default-build @@ -82,9 +82,9 @@ JACK_EXPERIMENTAL_ARGS["method-handles"]="-D jack.java.source.version=1.7 -D jac JACK_EXPERIMENTAL_ARGS[${DEFAULT_EXPERIMENT}]="-D jack.java.source.version=1.8 -D jack.android.min-api-level=24" declare -A SMALI_EXPERIMENTAL_ARGS -SMALI_EXPERIMENTAL_ARGS["default-methods"]="--api-level 24" -SMALI_EXPERIMENTAL_ARGS["method-handles"]="--api-level 26" -SMALI_EXPERIMENTAL_ARGS["agents"]="--api-level 26" +SMALI_EXPERIMENTAL_ARGS["default-methods"]="--api 24" +SMALI_EXPERIMENTAL_ARGS["method-handles"]="--api 26" +SMALI_EXPERIMENTAL_ARGS["agents"]="--api 26" declare -A JAVAC_EXPERIMENTAL_ARGS JAVAC_EXPERIMENTAL_ARGS["default-methods"]="-source 1.8 -target 1.8" @@ -275,7 +275,7 @@ fi if [ "${HAS_SMALI}" = "true" -a ${NEED_DEX} = "true" ]; then # Compile Smali classes - ${SMALI} -JXmx512m ${SMALI_ARGS} --output smali_classes.dex `find smali -name '*.smali'` + ${SMALI} -JXmx512m assemble ${SMALI_ARGS} --output smali_classes.dex `find smali -name '*.smali'` # Don't bother with dexmerger if we provide our own main function in a smali file. if [ ${SKIP_DX_MERGER} = "false" ]; then @@ -287,7 +287,7 @@ fi if [ "${HAS_SMALI_MULTIDEX}" = "true" -a ${NEED_DEX} = "true" ]; then # Compile Smali classes - ${SMALI} -JXmx512m ${SMALI_ARGS} --output smali_classes2.dex `find smali-multidex -name '*.smali'` + ${SMALI} -JXmx512m assemble ${SMALI_ARGS} --output smali_classes2.dex `find smali-multidex -name '*.smali'` # Don't bother with dexmerger if we provide our own main function in a smali file. if [ ${HAS_SRC_MULTIDEX} = "true" ]; then diff --git a/test/etc/run-test-jar b/test/etc/run-test-jar index b34da08716..f75055674e 100755 --- a/test/etc/run-test-jar +++ b/test/etc/run-test-jar @@ -469,15 +469,15 @@ fi if [ "$INTERPRETER" = "y" ]; then INT_OPTS="-Xint" if [ "$VERIFY" = "y" ] ; then - INT_OPTS="${INT_OPTS} -Xcompiler-option --compiler-filter=interpret-only" - COMPILE_FLAGS="${COMPILE_FLAGS} --compiler-filter=interpret-only" + INT_OPTS="${INT_OPTS} -Xcompiler-option --compiler-filter=quicken" + COMPILE_FLAGS="${COMPILE_FLAGS} --compiler-filter=quicken" elif [ "$VERIFY" = "s" ]; then - INT_OPTS="${INT_OPTS} -Xcompiler-option --compiler-filter=verify-at-runtime" - COMPILE_FLAGS="${COMPILE_FLAGS} --compiler-filter=verify-at-runtime" + INT_OPTS="${INT_OPTS} -Xcompiler-option --compiler-filter=extract" + COMPILE_FLAGS="${COMPILE_FLAGS} --compiler-filter=extract" DEX_VERIFY="${DEX_VERIFY} -Xverify:softfail" else # VERIFY = "n" - INT_OPTS="${INT_OPTS} -Xcompiler-option --compiler-filter=verify-none" - COMPILE_FLAGS="${COMPILE_FLAGS} --compiler-filter=verify-none" + INT_OPTS="${INT_OPTS} -Xcompiler-option --compiler-filter=assume-verified" + COMPILE_FLAGS="${COMPILE_FLAGS} --compiler-filter=assume-verified" DEX_VERIFY="${DEX_VERIFY} -Xverify:none" fi fi @@ -485,11 +485,11 @@ fi if [ "$JIT" = "y" ]; then INT_OPTS="-Xusejit:true" if [ "$VERIFY" = "y" ] ; then - INT_OPTS="${INT_OPTS} -Xcompiler-option --compiler-filter=interpret-only" - COMPILE_FLAGS="${COMPILE_FLAGS} --compiler-filter=interpret-only" + INT_OPTS="${INT_OPTS} -Xcompiler-option --compiler-filter=quicken" + COMPILE_FLAGS="${COMPILE_FLAGS} --compiler-filter=quicken" else - INT_OPTS="${INT_OPTS} -Xcompiler-option --compiler-filter=verify-none" - COMPILE_FLAGS="${COMPILE_FLAGS} --compiler-filter=verify-none" + INT_OPTS="${INT_OPTS} -Xcompiler-option --compiler-filter=assume-verified" + COMPILE_FLAGS="${COMPILE_FLAGS} --compiler-filter=assume-verified" DEX_VERIFY="${DEX_VERIFY} -Xverify:none" fi fi @@ -564,6 +564,11 @@ if [ "$PROFILE" = "y" ] || [ "$RANDOM_PROFILE" = "y" ]; then profman_cmdline="${ANDROID_ROOT}/bin/profman \ --apk=$DEX_LOCATION/$TEST_NAME.jar \ --dex-location=$DEX_LOCATION/$TEST_NAME.jar" + if [ -f $DEX_LOCATION/$TEST_NAME-ex.jar ]; then + profman_cmdline="${profman_cmdline} \ + --apk=$DEX_LOCATION/$TEST_NAME-ex.jar \ + --dex-location=$DEX_LOCATION/$TEST_NAME-ex.jar" + fi COMPILE_FLAGS="${COMPILE_FLAGS} --profile-file=$DEX_LOCATION/$TEST_NAME.prof" FLAGS="${FLAGS} -Xcompiler-option --profile-file=$DEX_LOCATION/$TEST_NAME.prof" if [ "$PROFILE" = "y" ]; then @@ -582,7 +587,11 @@ if [ "$PREBUILD" = "y" ]; then app_image="--base=0x4000 --app-image-file=$DEX_LOCATION/oat/$ISA/$TEST_NAME.art" fi - dex2oat_cmdline="$INVOKE_WITH $ANDROID_ROOT/bin/dex2oatd \ + dex2oat_binary=dex2oatd + if [[ "$TEST_IS_NDEBUG" = "y" ]]; then + dex2oat_binary=dex2oat + fi + dex2oat_cmdline="$INVOKE_WITH $ANDROID_ROOT/bin/$dex2oat_binary \ $COMPILE_FLAGS \ --boot-image=${BOOT_IMAGE} \ --dex-file=$DEX_LOCATION/$TEST_NAME.jar \ @@ -603,7 +612,9 @@ if [ "$PREBUILD" = "y" ]; then # Use -k 1m to SIGKILL it a minute later if it hasn't ended. dex2oat_cmdline="timeout -k 1m -s SIGRTMIN+2 1m ${dex2oat_cmdline}" fi - if [ "$TEST_VDEX" = "y" ]; then + if [ "$PROFILE" = "y" ] || [ "$RANDOM_PROFILE" = "y" ]; then + vdex_cmdline="${dex2oat_cmdline} ${VDEX_FILTER} --input-vdex=$DEX_LOCATION/oat/$ISA/$TEST_NAME.vdex --output-vdex=$DEX_LOCATION/oat/$ISA/$TEST_NAME.vdex" + elif [ "$TEST_VDEX" = "y" ]; then vdex_cmdline="${dex2oat_cmdline} ${VDEX_FILTER} --input-vdex=$DEX_LOCATION/oat/$ISA/$TEST_NAME.vdex" fi fi diff --git a/test/knownfailures.json b/test/knownfailures.json index 3fdd9b3eb6..659b814561 100644 --- a/test/knownfailures.json +++ b/test/knownfailures.json @@ -108,20 +108,19 @@ "non-deterministic. Same for 913."] }, { - "tests": "961-default-iface-resolution-gen", + "tests": ["961-default-iface-resolution-gen", + "964-default-iface-init-gen", + "968-default-partial-compile-gen"], "variant": "gcstress", - "description": ["961-default-iface-resolution-gen and", - "964-default-iface-init-genare very long tests that", + "description": ["961-default-iface-resolution-gen,", + "968-default-partial-compile-gen and", + "964-default-iface-init-gen are very long tests that", "often will take more than the timeout to run when", "gcstress is enabled. This is because gcstress slows", "down allocations significantly which these tests do a", "lot."] }, { - "tests": "964-default-iface-init-gen", - "variant": "gcstress" - }, - { "tests": "154-gc-loop", "variant": "gcstress | jit & debug", "description": ["154-gc-loop depends GC not happening too often"], @@ -329,11 +328,6 @@ "variant": "interpreter | optimizing | regalloc_gc | jit" }, { - "tests": ["912-classes"], - "bug": "http://b/36344364", - "variant": "no-dex2oat | relocate-npatchoat" - }, - { "tests": ["476-clinit-inline-static-invoke", "496-checker-inlining-class-loader", "508-referrer-method", @@ -667,9 +661,28 @@ "bug": "b/37636792" }, { + "tests": [ + "536-checker-needs-access-check", + "537-checker-inline-and-unverified", + "569-checker-pattern-replacement", + "586-checker-null-array-get" + ], + "description": [ + "Tests that have verify-at-runtime classes, but being compiled when using vdex." + ], + "variant": "speed-profile" + }, + { "tests": "648-many-direct-methods", - "variant": "optimizing", - "description": "Test disabled with AOT because of dex2oatd timeouts.", + "variant": "debug", + "description": "Test disabled in debug mode because of dex2oatd timeouts.", "bug": "b/33650497" + }, + { + "tests": "640-checker-integer-valueof", + "description": [ + "The java.lang.Integer.valueOf intrinsic is not supported in PIC mode." + ], + "variant": "optimizing & pictest | speed-profile & pictest" } ] diff --git a/test/run-test b/test/run-test index f60f766751..933a7febac 100755 --- a/test/run-test +++ b/test/run-test @@ -46,7 +46,7 @@ export RUN="${progdir}/etc/run-test-jar" export DEX_LOCATION=/data/run-test/${test_dir} export NEED_DEX="true" export USE_JACK="true" -export SMALI_ARGS="--experimental" +export SMALI_ARGS="" # If dx was not set by the environment variable, assume it is in the path. if [ -z "$DX" ]; then diff --git a/test/testrunner/testrunner.py b/test/testrunner/testrunner.py index a80924639b..9a437cc822 100755 --- a/test/testrunner/testrunner.py +++ b/test/testrunner/testrunner.py @@ -497,7 +497,11 @@ def run_test(command, test, test_variant, test_name): test_skipped = True else: test_skipped = False - proc = subprocess.Popen(command.split(), stderr=subprocess.STDOUT, stdout=subprocess.PIPE, universal_newlines=True) + if gdb: + proc = subprocess.Popen(command.split(), stderr=subprocess.STDOUT, universal_newlines=True) + else: + proc = subprocess.Popen(command.split(), stderr=subprocess.STDOUT, stdout = subprocess.PIPE, + universal_newlines=True) script_output = proc.communicate(timeout=timeout)[0] test_passed = not proc.wait() @@ -46,17 +46,17 @@ function find_libdir() { fi } -function replace_compiler_filter_with_interepret_only() { - ARGS_WITH_INTERPRET_ONLY=("$@") +function replace_compiler_filter_with_quicken() { + ARGS_WITH_QUICKEN=("$@") found="false" ((index=0)) while ((index <= $#)); do - what="${ARGS_WITH_INTERPRET_ONLY[$index]}" + what="${ARGS_WITH_QUICKEN[$index]}" case "$what" in --compiler-filter=*) - ARGS_WITH_INTERPRET_ONLY[$index]="--compiler-filter=interpret-only" + ARGS_WITH_QUICKEN[$index]="--compiler-filter=quicken" found="true" ;; esac @@ -65,7 +65,7 @@ function replace_compiler_filter_with_interepret_only() { shift done if [ "$found" != "true" ]; then - ARGS_WITH_INTERPRET_ONLY=(-Xcompiler-option --compiler-filter=interpret-only "${ARGS_WITH_INTERPRET_ONLY[@]}") + ARGS_WITH_QUICKEN=(-Xcompiler-option --compiler-filter=quicken "${ARGS_WITH_QUICKEN[@]}") fi } @@ -224,10 +224,10 @@ if [ "$JIT_PROFILE" = "yes" ]; then PROFILE_PATH="$ANDROID_DATA/primary.prof" touch $PROFILE_PATH - # Replace the compiler filter with interpret-only so that we + # Replace the compiler filter with quicken so that we # can capture the profile. - ARGS_WITH_INTERPRET_ONLY= - replace_compiler_filter_with_interepret_only "$@" + ARGS_WITH_QUICKEN= + replace_compiler_filter_with_quicken "$@" run_art -Xjitsaveprofilinginfo \ -Xps-min-methods-to-save:1 \ @@ -235,7 +235,7 @@ if [ "$JIT_PROFILE" = "yes" ]; then -Xps-min-notification-before-wake:10 \ -Xps-profile-path:$PROFILE_PATH \ -Xusejit:true \ - "${ARGS_WITH_INTERPRET_ONLY[@]}" \ + "${ARGS_WITH_QUICKEN[@]}" \ "&>" "$ANDROID_DATA/profile_gen.log" EXIT_STATUS=$? diff --git a/tools/cpp-define-generator/constant_card_table.def b/tools/cpp-define-generator/constant_card_table.def new file mode 100644 index 0000000000..ae3e8f399f --- /dev/null +++ b/tools/cpp-define-generator/constant_card_table.def @@ -0,0 +1,25 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Export heap values. + +#if defined(DEFINE_INCLUDE_DEPENDENCIES) +#include "gc/accounting/card_table.h" +#endif + +// Size of references to the heap on the stack. +DEFINE_EXPR(CARD_TABLE_CARD_SHIFT, size_t, art::gc::accounting::CardTable::kCardShift) + diff --git a/tools/cpp-define-generator/offsets_all.def b/tools/cpp-define-generator/offsets_all.def index 13371a1f71..b8947de2dc 100644 --- a/tools/cpp-define-generator/offsets_all.def +++ b/tools/cpp-define-generator/offsets_all.def @@ -49,6 +49,7 @@ // TODO: MIRROR_STRING offsets (depends on header size) #include "offset_dexcache.def" #include "constant_dexcache.def" +#include "constant_card_table.def" #include "constant_heap.def" #include "constant_lockword.def" #include "constant_globals.def" diff --git a/tools/dexfuzz/src/dexfuzz/executors/Executor.java b/tools/dexfuzz/src/dexfuzz/executors/Executor.java index 2bcf3a1a77..074672d0ff 100644 --- a/tools/dexfuzz/src/dexfuzz/executors/Executor.java +++ b/tools/dexfuzz/src/dexfuzz/executors/Executor.java @@ -117,7 +117,7 @@ public abstract class Executor { commandBuilder.append("--runtime-arg -classpath "); commandBuilder.append("--runtime-arg ").append(programName).append(" "); commandBuilder.append("--dex-file=").append(programName).append(" "); - commandBuilder.append("--compiler-filter=interpret-only --runtime-arg -Xnorelocate "); + commandBuilder.append("--compiler-filter=quicken --runtime-arg -Xnorelocate "); ExecutionResult verificationResult = device.executeCommand(commandBuilder.toString(), true, outputConsumer, errorConsumer); diff --git a/tools/run-jdwp-tests.sh b/tools/run-jdwp-tests.sh index 07c300e7a7..d48d8579be 100755 --- a/tools/run-jdwp-tests.sh +++ b/tools/run-jdwp-tests.sh @@ -132,8 +132,8 @@ if [[ "$image" != "" ]]; then vm_args="--vm-arg $image" fi if $use_jit; then - vm_args="$vm_args --vm-arg -Xcompiler-option --vm-arg --compiler-filter=interpret-only" - debuggee_args="$debuggee_args -Xcompiler-option --compiler-filter=interpret-only" + vm_args="$vm_args --vm-arg -Xcompiler-option --vm-arg --compiler-filter=quicken" + debuggee_args="$debuggee_args -Xcompiler-option --compiler-filter=quicken" fi vm_args="$vm_args --vm-arg -Xusejit:$use_jit" debuggee_args="$debuggee_args -Xusejit:$use_jit" diff --git a/tools/run-libcore-tests.sh b/tools/run-libcore-tests.sh index 729a3e5ac4..b860a6273f 100755 --- a/tools/run-libcore-tests.sh +++ b/tools/run-libcore-tests.sh @@ -127,7 +127,7 @@ vogar_args="$vogar_args --toolchain jack --language JO" # JIT settings. if $use_jit; then - vogar_args="$vogar_args --vm-arg -Xcompiler-option --vm-arg --compiler-filter=interpret-only" + vogar_args="$vogar_args --vm-arg -Xcompiler-option --vm-arg --compiler-filter=quicken" fi vogar_args="$vogar_args --vm-arg -Xusejit:$use_jit" |