diff options
40 files changed, 501 insertions, 689 deletions
diff --git a/compiler/dex/quick_compiler_callbacks.cc b/compiler/dex/quick_compiler_callbacks.cc index 872f7ea15d..c7e9f4fc07 100644 --- a/compiler/dex/quick_compiler_callbacks.cc +++ b/compiler/dex/quick_compiler_callbacks.cc @@ -16,6 +16,7 @@ #include "quick_compiler_callbacks.h" +#include "driver/compiler_driver.h" #include "verification_results.h" #include "verifier/method_verifier-inl.h" @@ -33,4 +34,17 @@ void QuickCompilerCallbacks::ClassRejected(ClassReference ref) { } } +bool QuickCompilerCallbacks::CanAssumeVerified(ClassReference ref) { + // If we don't have class unloading enabled in the compiler, we will never see class that were + // previously verified. Return false to avoid overhead from the lookup in the compiler driver. + if (!does_class_unloading_) { + return false; + } + DCHECK(compiler_driver_ != nullptr); + // In the case of the quicken filter: avoiding verification of quickened instructions, which the + // verifier doesn't currently support. + // In the case of the verify filter, avoiding verifiying twice. + return compiler_driver_->CanAssumeVerified(ref); +} + } // namespace art diff --git a/compiler/dex/quick_compiler_callbacks.h b/compiler/dex/quick_compiler_callbacks.h index a3a6c0972c..578aff45e5 100644 --- a/compiler/dex/quick_compiler_callbacks.h +++ b/compiler/dex/quick_compiler_callbacks.h @@ -22,6 +22,7 @@ namespace art { +class CompilerDriver; class VerificationResults; class QuickCompilerCallbacks FINAL : public CompilerCallbacks { @@ -53,8 +54,19 @@ class QuickCompilerCallbacks FINAL : public CompilerCallbacks { verification_results_ = verification_results; } + bool CanAssumeVerified(ClassReference ref) OVERRIDE; + + void SetDoesClassUnloading(bool does_class_unloading, CompilerDriver* compiler_driver) + OVERRIDE { + does_class_unloading_ = does_class_unloading; + compiler_driver_ = compiler_driver; + DCHECK(!does_class_unloading || compiler_driver_ != nullptr); + } + private: VerificationResults* verification_results_ = nullptr; + bool does_class_unloading_ = false; + CompilerDriver* compiler_driver_ = nullptr; std::unique_ptr<verifier::VerifierDeps> verifier_deps_; }; diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc index 6e087c5785..037e45840d 100644 --- a/compiler/driver/compiler_driver.cc +++ b/compiler/driver/compiler_driver.cc @@ -3040,4 +3040,10 @@ void CompilerDriver::SetDexFilesForOatFile(const std::vector<const DexFile*>& de } } +bool CompilerDriver::CanAssumeVerified(ClassReference ref) const { + mirror::Class::Status existing = mirror::Class::kStatusNotReady; + compiled_classes_.Get(DexFileReference(ref.first, ref.second), &existing); + return existing >= mirror::Class::kStatusVerified; +} + } // namespace art diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h index d9886a2fba..ba4581c9f4 100644 --- a/compiler/driver/compiler_driver.h +++ b/compiler/driver/compiler_driver.h @@ -377,6 +377,8 @@ class CompilerDriver { return profile_compilation_info_; } + bool CanAssumeVerified(ClassReference ref) const; + private: void PreCompile(jobject class_loader, const std::vector<const DexFile*>& dex_files, diff --git a/compiler/driver/compiler_driver_test.cc b/compiler/driver/compiler_driver_test.cc index 4da3e0df39..392d57c0f2 100644 --- a/compiler/driver/compiler_driver_test.cc +++ b/compiler/driver/compiler_driver_test.cc @@ -23,6 +23,7 @@ #include "art_method-inl.h" #include "class_linker-inl.h" #include "common_compiler_test.h" +#include "compiler_callbacks.h" #include "dex_file.h" #include "dex_file_types.h" #include "gc/heap.h" @@ -368,7 +369,9 @@ TEST_F(CompilerDriverVerifyTest, VerifyCompilation) { // Test that a class of status kStatusRetryVerificationAtRuntime is indeed recorded that way in the // driver. -TEST_F(CompilerDriverVerifyTest, RetryVerifcationStatus) { +// Test that checks that classes can be assumed as verified if unloading mode is enabled and +// the class status is at least verified. +TEST_F(CompilerDriverVerifyTest, RetryVerifcationStatusCheckVerified) { Thread* const self = Thread::Current(); jobject class_loader; std::vector<const DexFile*> dex_files; @@ -382,6 +385,7 @@ TEST_F(CompilerDriverVerifyTest, RetryVerifcationStatus) { dex_file = dex_files.front(); } compiler_driver_->SetDexFilesForOatFile(dex_files); + callbacks_->SetDoesClassUnloading(true, compiler_driver_.get()); ClassReference ref(dex_file, 0u); // Test that the status is read from the compiler driver as expected. for (size_t i = mirror::Class::kStatusRetryVerificationAtRuntime; @@ -397,6 +401,12 @@ TEST_F(CompilerDriverVerifyTest, RetryVerifcationStatus) { mirror::Class::Status status = {}; ASSERT_TRUE(compiler_driver_->GetCompiledClass(ref, &status)); EXPECT_EQ(status, expected_status); + + // Check that we can assume verified if we are a status that is at least verified. + if (status >= mirror::Class::kStatusVerified) { + // Check that the class can be assumed as verified in the compiler driver. + EXPECT_TRUE(callbacks_->CanAssumeVerified(ref)) << status; + } } } diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index 76a243f793..51101f104a 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -1210,14 +1210,14 @@ bool OptimizingCompiler::JitCompile(Thread* self, uint8_t* stack_map_data = nullptr; uint8_t* method_info_data = nullptr; uint8_t* roots_data = nullptr; - code_cache->ReserveData(self, - stack_map_size, - method_info_size, - number_of_roots, - method, - &stack_map_data, - &method_info_data, - &roots_data); + uint32_t data_size = code_cache->ReserveData(self, + stack_map_size, + method_info_size, + number_of_roots, + method, + &stack_map_data, + &method_info_data, + &roots_data); if (stack_map_data == nullptr || roots_data == nullptr) { return false; } @@ -1238,6 +1238,7 @@ bool OptimizingCompiler::JitCompile(Thread* self, codegen->GetFpuSpillMask(), code_allocator.GetMemory().data(), code_allocator.GetSize(), + data_size, osr, roots, codegen->GetGraph()->HasShouldDeoptimizeFlag(), diff --git a/compiler/optimizing/scheduler.cc b/compiler/optimizing/scheduler.cc index 5ad011d8f9..3e373d16fb 100644 --- a/compiler/optimizing/scheduler.cc +++ b/compiler/optimizing/scheduler.cc @@ -554,6 +554,14 @@ SchedulingNode* CriticalPathSchedulingNodeSelector::GetHigherPrioritySchedulingN } void HScheduler::Schedule(HGraph* graph) { + // We run lsa here instead of in a separate pass to better control whether we + // should run the analysis or not. + LoadStoreAnalysis lsa(graph); + if (!only_optimize_loop_blocks_ || graph->HasLoops()) { + lsa.Run(); + scheduling_graph_.SetHeapLocationCollector(lsa.GetHeapLocationCollector()); + } + for (HBasicBlock* block : graph->GetReversePostOrder()) { if (IsSchedulable(block)) { Schedule(block); @@ -566,14 +574,6 @@ void HScheduler::Schedule(HBasicBlock* block) { // Build the scheduling graph. scheduling_graph_.Clear(); - - // Only perform LSA/HeapLocation analysis on the basic block that - // is going to get instruction scheduled. - HeapLocationCollector heap_location_collector(block->GetGraph()); - heap_location_collector.VisitBasicBlock(block); - heap_location_collector.BuildAliasingMatrix(); - scheduling_graph_.SetHeapLocationCollector(heap_location_collector); - for (HBackwardInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { HInstruction* instruction = it.Current(); CHECK_EQ(instruction->GetBlock(), block) diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc index ff193e933a..a6036dab5f 100644 --- a/dex2oat/dex2oat.cc +++ b/dex2oat/dex2oat.cc @@ -1774,13 +1774,11 @@ class Dex2Oat FINAL { bool ShouldCompileDexFilesIndividually() const { // Compile individually if we are not building an image, not using any compilation, and are // using multidex. - // This means extract, verify, and quicken will use the individual compilation mode (to reduce + // This means extract, verify, and quicken, will use the individual compilation mode (to reduce // RAM used by the compiler). - // TODO: Still do it for app images to get testing coverage. Note that this will generate empty - // app images. return !IsImage() && dex_files_.size() > 1 && - !CompilerFilter::IsAnyCompilationEnabled(compiler_options_->GetCompilerFilter()); + !CompilerFilter::IsAotCompilationEnabled(compiler_options_->GetCompilerFilter()); } // Set up and create the compiler driver and then invoke it to compile all the dex files. @@ -1856,6 +1854,16 @@ class Dex2Oat FINAL { profile_compilation_info_.get())); driver_->SetDexFilesForOatFile(dex_files_); + const bool compile_individually = ShouldCompileDexFilesIndividually(); + if (compile_individually) { + // Set the compiler driver in the callbacks so that we can avoid re-verification. This not + // only helps performance but also prevents reverifying quickened bytecodes. Attempting + // verify quickened bytecode causes verification failures. + // Only set the compiler filter if we are doing separate compilation since there is a bit + // of overhead when checking if a class was previously verified. + callbacks_->SetDoesClassUnloading(true, driver_.get()); + } + // Setup vdex for compilation. if (!DoEagerUnquickeningOfVdex() && input_vdex_file_ != nullptr) { callbacks_->SetVerifierDeps( @@ -1872,7 +1880,7 @@ class Dex2Oat FINAL { callbacks_->SetVerifierDeps(new verifier::VerifierDeps(dex_files_)); } // Invoke the compilation. - if (ShouldCompileDexFilesIndividually()) { + if (compile_individually) { CompileDexFilesIndividually(); // Return a null classloader since we already freed released it. return nullptr; @@ -2966,6 +2974,8 @@ class ScopedGlobalRef { static dex2oat::ReturnCode CompileImage(Dex2Oat& dex2oat) { dex2oat.LoadClassProfileDescriptors(); + // Keep the class loader that was used for compilation live for the rest of the compilation + // process. ScopedGlobalRef class_loader(dex2oat.Compile()); if (!dex2oat.WriteOutputFiles()) { @@ -3014,6 +3024,8 @@ static dex2oat::ReturnCode CompileImage(Dex2Oat& dex2oat) { } static dex2oat::ReturnCode CompileApp(Dex2Oat& dex2oat) { + // Keep the class loader that was used for compilation live for the rest of the compilation + // process. ScopedGlobalRef class_loader(dex2oat.Compile()); if (!dex2oat.WriteOutputFiles()) { diff --git a/dex2oat/dex2oat_test.cc b/dex2oat/dex2oat_test.cc index 12bceb3c1f..6a9d979d52 100644 --- a/dex2oat/dex2oat_test.cc +++ b/dex2oat/dex2oat_test.cc @@ -41,6 +41,7 @@ namespace art { static constexpr size_t kMaxMethodIds = 65535; +static constexpr bool kDebugArgs = false; using android::base::StringPrintf; @@ -55,7 +56,7 @@ class Dex2oatTest : public Dex2oatEnvironmentTest { } protected: - int GenerateOdexForTestWithStatus(const std::string& dex_location, + int GenerateOdexForTestWithStatus(const std::vector<std::string>& dex_locations, const std::string& odex_location, CompilerFilter::Filter filter, std::string* error_msg, @@ -63,7 +64,10 @@ class Dex2oatTest : public Dex2oatEnvironmentTest { bool use_fd = false) { std::unique_ptr<File> oat_file; std::vector<std::string> args; - args.push_back("--dex-file=" + dex_location); + // Add dex file args. + for (const std::string& dex_location : dex_locations) { + args.push_back("--dex-file=" + dex_location); + } if (use_fd) { oat_file.reset(OS::CreateEmptyFile(odex_location.c_str())); CHECK(oat_file != nullptr) << odex_location; @@ -93,7 +97,7 @@ class Dex2oatTest : public Dex2oatEnvironmentTest { bool use_fd = false, std::function<void(const OatFile&)> check_oat = [](const OatFile&) {}) { std::string error_msg; - int status = GenerateOdexForTestWithStatus(dex_location, + int status = GenerateOdexForTestWithStatus({dex_location}, odex_location, filter, &error_msg, @@ -187,6 +191,14 @@ class Dex2oatTest : public Dex2oatEnvironmentTest { CHECK(android_root != nullptr); argv.push_back("--android-root=" + std::string(android_root)); + if (kDebugArgs) { + std::string all_args; + for (const std::string& arg : argv) { + all_args += arg + " "; + } + LOG(ERROR) << all_args; + } + int link[2]; if (pipe(link) == -1) { @@ -951,7 +963,7 @@ class Dex2oatReturnCodeTest : public Dex2oatTest { Copy(GetTestDexFileName(), dex_location); std::string error_msg; - return GenerateOdexForTestWithStatus(dex_location, + return GenerateOdexForTestWithStatus({dex_location}, odex_location, CompilerFilter::kSpeed, &error_msg, @@ -1107,4 +1119,72 @@ TEST_F(Dex2oatClassLoaderContextTest, ChainContext) { RunTest(context.c_str(), expected_classpath_key.c_str(), true); } +class Dex2oatDeterminism : public Dex2oatTest {}; + +TEST_F(Dex2oatDeterminism, UnloadCompile) { + if (!kUseReadBarrier && + gc::kCollectorTypeDefault != gc::kCollectorTypeCMS && + gc::kCollectorTypeDefault != gc::kCollectorTypeMS) { + LOG(INFO) << "Test requires determinism support."; + return; + } + Runtime* const runtime = Runtime::Current(); + std::string out_dir = GetScratchDir(); + const std::string base_oat_name = out_dir + "/base.oat"; + const std::string base_vdex_name = out_dir + "/base.vdex"; + const std::string unload_oat_name = out_dir + "/unload.oat"; + const std::string unload_vdex_name = out_dir + "/unload.vdex"; + const std::string no_unload_oat_name = out_dir + "/nounload.oat"; + const std::string no_unload_vdex_name = out_dir + "/nounload.vdex"; + const std::string app_image_name = out_dir + "/unload.art"; + std::string error_msg; + const std::vector<gc::space::ImageSpace*>& spaces = runtime->GetHeap()->GetBootImageSpaces(); + ASSERT_GT(spaces.size(), 0u); + const std::string image_location = spaces[0]->GetImageLocation(); + // Without passing in an app image, it will unload in between compilations. + const int res = GenerateOdexForTestWithStatus( + GetLibCoreDexFileNames(), + base_oat_name, + CompilerFilter::Filter::kQuicken, + &error_msg, + {"--force-determinism", "--avoid-storing-invocation"}); + EXPECT_EQ(res, 0); + Copy(base_oat_name, unload_oat_name); + Copy(base_vdex_name, unload_vdex_name); + std::unique_ptr<File> unload_oat(OS::OpenFileForReading(unload_oat_name.c_str())); + std::unique_ptr<File> unload_vdex(OS::OpenFileForReading(unload_vdex_name.c_str())); + ASSERT_TRUE(unload_oat != nullptr); + ASSERT_TRUE(unload_vdex != nullptr); + EXPECT_GT(unload_oat->GetLength(), 0u); + EXPECT_GT(unload_vdex->GetLength(), 0u); + // Regenerate with an app image to disable the dex2oat unloading and verify that the output is + // the same. + const int res2 = GenerateOdexForTestWithStatus( + GetLibCoreDexFileNames(), + base_oat_name, + CompilerFilter::Filter::kQuicken, + &error_msg, + {"--force-determinism", "--avoid-storing-invocation", "--app-image-file=" + app_image_name}); + EXPECT_EQ(res2, 0); + Copy(base_oat_name, no_unload_oat_name); + Copy(base_vdex_name, no_unload_vdex_name); + std::unique_ptr<File> no_unload_oat(OS::OpenFileForReading(no_unload_oat_name.c_str())); + std::unique_ptr<File> no_unload_vdex(OS::OpenFileForReading(no_unload_vdex_name.c_str())); + ASSERT_TRUE(no_unload_oat != nullptr); + ASSERT_TRUE(no_unload_vdex != nullptr); + EXPECT_GT(no_unload_oat->GetLength(), 0u); + EXPECT_GT(no_unload_vdex->GetLength(), 0u); + // Verify that both of the files are the same (odex and vdex). + EXPECT_EQ(unload_oat->GetLength(), no_unload_oat->GetLength()); + EXPECT_EQ(unload_vdex->GetLength(), no_unload_vdex->GetLength()); + EXPECT_EQ(unload_oat->Compare(no_unload_oat.get()), 0) + << unload_oat_name << " " << no_unload_oat_name; + EXPECT_EQ(unload_vdex->Compare(no_unload_vdex.get()), 0) + << unload_vdex_name << " " << no_unload_vdex_name; + // App image file. + std::unique_ptr<File> app_image_file(OS::OpenFileForReading(app_image_name.c_str())); + ASSERT_TRUE(app_image_file != nullptr); + EXPECT_GT(app_image_file->GetLength(), 0u); +} + } // namespace art diff --git a/runtime/aot_class_linker.cc b/runtime/aot_class_linker.cc index b1bc3f8f2e..9396565eee 100644 --- a/runtime/aot_class_linker.cc +++ b/runtime/aot_class_linker.cc @@ -16,10 +16,12 @@ #include "aot_class_linker.h" +#include "class_reference.h" +#include "compiler_callbacks.h" #include "handle_scope-inl.h" -#include "mirror/class.h" -#include "mirror/object-inl.h" +#include "mirror/class-inl.h" #include "runtime.h" +#include "verifier/verifier_enums.h" namespace art { @@ -66,4 +68,18 @@ bool AotClassLinker::InitializeClass(Thread* self, Handle<mirror::Class> klass, } return success; } + +verifier::FailureKind AotClassLinker::PerformClassVerification(Thread* self, + Handle<mirror::Class> klass, + verifier::HardFailLogMode log_level, + std::string* error_msg) { + Runtime* const runtime = Runtime::Current(); + CompilerCallbacks* callbacks = runtime->GetCompilerCallbacks(); + if (callbacks->CanAssumeVerified(ClassReference(&klass->GetDexFile(), + klass->GetDexClassDefIndex()))) { + return verifier::FailureKind::kNoFailure; + } + return ClassLinker::PerformClassVerification(self, klass, log_level, error_msg); +} + } // namespace art diff --git a/runtime/aot_class_linker.h b/runtime/aot_class_linker.h index 11bea86fc4..927b53302b 100644 --- a/runtime/aot_class_linker.h +++ b/runtime/aot_class_linker.h @@ -27,6 +27,16 @@ class AotClassLinker : public ClassLinker { explicit AotClassLinker(InternTable *intern_table); ~AotClassLinker(); + protected: + // Overridden version of PerformClassVerification allows skipping verification if the class was + // previously verified but unloaded. + verifier::FailureKind PerformClassVerification(Thread* self, + Handle<mirror::Class> klass, + verifier::HardFailLogMode log_level, + std::string* error_msg) + OVERRIDE + REQUIRES_SHARED(Locks::mutator_lock_); + bool InitializeClass(Thread *self, Handle<mirror::Class> klass, bool can_run_clinit, diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc index fc14da1067..9756f5787f 100644 --- a/runtime/class_linker.cc +++ b/runtime/class_linker.cc @@ -3694,7 +3694,8 @@ bool ClassLinker::IsDexFileRegistered(Thread* self, const DexFile& dex_file) { ObjPtr<mirror::DexCache> ClassLinker::FindDexCache(Thread* self, const DexFile& dex_file) { ReaderMutexLock mu(self, *Locks::dex_lock_); - ObjPtr<mirror::DexCache> dex_cache = DecodeDexCache(self, FindDexCacheDataLocked(dex_file)); + DexCacheData dex_cache_data = FindDexCacheDataLocked(dex_file); + ObjPtr<mirror::DexCache> dex_cache = DecodeDexCache(self, dex_cache_data); if (dex_cache != nullptr) { return dex_cache; } @@ -3704,7 +3705,8 @@ ObjPtr<mirror::DexCache> ClassLinker::FindDexCache(Thread* self, const DexFile& LOG(FATAL_WITHOUT_ABORT) << "Registered dex file " << data.dex_file->GetLocation(); } } - LOG(FATAL) << "Failed to find DexCache for DexFile " << dex_file.GetLocation(); + LOG(FATAL) << "Failed to find DexCache for DexFile " << dex_file.GetLocation() + << " " << &dex_file << " " << dex_cache_data.dex_file; UNREACHABLE(); } @@ -4280,13 +4282,7 @@ verifier::FailureKind ClassLinker::VerifyClass( std::string error_msg; verifier::FailureKind verifier_failure = verifier::FailureKind::kNoFailure; if (!preverified) { - Runtime* runtime = Runtime::Current(); - verifier_failure = verifier::MethodVerifier::VerifyClass(self, - klass.Get(), - runtime->GetCompilerCallbacks(), - runtime->IsAotCompiler(), - log_level, - &error_msg); + verifier_failure = PerformClassVerification(self, klass, log_level, &error_msg); } // Verification is done, grab the lock again. @@ -4354,6 +4350,19 @@ verifier::FailureKind ClassLinker::VerifyClass( return verifier_failure; } +verifier::FailureKind ClassLinker::PerformClassVerification(Thread* self, + Handle<mirror::Class> klass, + verifier::HardFailLogMode log_level, + std::string* error_msg) { + Runtime* const runtime = Runtime::Current(); + return verifier::MethodVerifier::VerifyClass(self, + klass.Get(), + runtime->GetCompilerCallbacks(), + runtime->IsAotCompiler(), + log_level, + error_msg); +} + bool ClassLinker::VerifyClassUsingOatFile(const DexFile& dex_file, ObjPtr<mirror::Class> klass, mirror::Class::Status& oat_file_class_status) { diff --git a/runtime/class_linker.h b/runtime/class_linker.h index bf14aebb52..2fbbe79e47 100644 --- a/runtime/class_linker.h +++ b/runtime/class_linker.h @@ -710,6 +710,12 @@ class ClassLinker { REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(!Locks::dex_lock_); + virtual verifier::FailureKind PerformClassVerification(Thread* self, + Handle<mirror::Class> klass, + verifier::HardFailLogMode log_level, + std::string* error_msg) + REQUIRES_SHARED(Locks::mutator_lock_); + private: class LinkInterfaceMethodsHelper; diff --git a/runtime/compiler_callbacks.h b/runtime/compiler_callbacks.h index 806653a265..c51bb5e176 100644 --- a/runtime/compiler_callbacks.h +++ b/runtime/compiler_callbacks.h @@ -22,6 +22,8 @@ namespace art { +class CompilerDriver; + namespace verifier { class MethodVerifier; @@ -49,6 +51,13 @@ class CompilerCallbacks { virtual verifier::VerifierDeps* GetVerifierDeps() const = 0; virtual void SetVerifierDeps(verifier::VerifierDeps* deps ATTRIBUTE_UNUSED) {} + virtual bool CanAssumeVerified(ClassReference ref ATTRIBUTE_UNUSED) { + return false; + } + + virtual void SetDoesClassUnloading(bool does_class_unloading ATTRIBUTE_UNUSED, + CompilerDriver* compiler_driver ATTRIBUTE_UNUSED) {} + bool IsBootImage() { return mode_ == CallbackMode::kCompileBootImage; } diff --git a/runtime/entrypoints/quick/quick_field_entrypoints.cc b/runtime/entrypoints/quick/quick_field_entrypoints.cc index 726bddd334..b298db674c 100644 --- a/runtime/entrypoints/quick/quick_field_entrypoints.cc +++ b/runtime/entrypoints/quick/quick_field_entrypoints.cc @@ -301,6 +301,7 @@ extern "C" mirror::Object* artReadBarrierMark(mirror::Object* obj) { extern "C" mirror::Object* artReadBarrierSlow(mirror::Object* ref ATTRIBUTE_UNUSED, mirror::Object* obj, uint32_t offset) { + // Used only in connection with non-volatile loads. DCHECK(kEmitCompilerReadBarrier); uint8_t* raw_addr = reinterpret_cast<uint8_t*>(obj) + offset; mirror::HeapReference<mirror::Object>* ref_addr = @@ -308,9 +309,10 @@ extern "C" mirror::Object* artReadBarrierSlow(mirror::Object* ref ATTRIBUTE_UNUS constexpr ReadBarrierOption kReadBarrierOption = kUseReadBarrier ? kWithReadBarrier : kWithoutReadBarrier; mirror::Object* result = - ReadBarrier::Barrier<mirror::Object, kReadBarrierOption>(obj, - MemberOffset(offset), - ref_addr); + ReadBarrier::Barrier<mirror::Object, /* kIsVolatile */ false, kReadBarrierOption>( + obj, + MemberOffset(offset), + ref_addr); return result; } diff --git a/runtime/fault_handler.cc b/runtime/fault_handler.cc index fd0cd5f0b2..7d01af02a3 100644 --- a/runtime/fault_handler.cc +++ b/runtime/fault_handler.cc @@ -79,8 +79,7 @@ static mirror::Class* SafeGetDeclaringClass(ArtMethod* method) static mirror::Class* SafeGetClass(mirror::Object* obj) REQUIRES_SHARED(Locks::mutator_lock_) { char* obj_cls = reinterpret_cast<char*>(obj) + mirror::Object::ClassOffset().SizeValue(); - mirror::HeapReference<mirror::Class> cls = - mirror::HeapReference<mirror::Class>::FromMirrorPtr(nullptr); + mirror::HeapReference<mirror::Class> cls; ssize_t rc = SafeCopy(&cls, obj_cls, sizeof(cls)); CHECK_NE(-1, rc); diff --git a/runtime/gc/accounting/mod_union_table.cc b/runtime/gc/accounting/mod_union_table.cc index 290199579b..1b3d0dadae 100644 --- a/runtime/gc/accounting/mod_union_table.cc +++ b/runtime/gc/accounting/mod_union_table.cc @@ -115,8 +115,8 @@ class ModUnionUpdateObjectReferencesVisitor { } private: - template<bool kPoisonReferences> - void MarkReference(mirror::ObjectReference<kPoisonReferences, mirror::Object>* obj_ptr) const + template<typename CompressedReferenceType> + void MarkReference(CompressedReferenceType* obj_ptr) const REQUIRES_SHARED(Locks::mutator_lock_) { // Only add the reference if it is non null and fits our criteria. mirror::Object* ref = obj_ptr->AsMirrorPtr(); diff --git a/runtime/gc/accounting/space_bitmap.cc b/runtime/gc/accounting/space_bitmap.cc index 92b4360123..280c0b1d69 100644 --- a/runtime/gc/accounting/space_bitmap.cc +++ b/runtime/gc/accounting/space_bitmap.cc @@ -48,16 +48,21 @@ SpaceBitmap<kAlignment>* SpaceBitmap<kAlignment>::CreateFromMemMap( CHECK(mem_map != nullptr); uintptr_t* bitmap_begin = reinterpret_cast<uintptr_t*>(mem_map->Begin()); const size_t bitmap_size = ComputeBitmapSize(heap_capacity); - return new SpaceBitmap(name, mem_map, bitmap_begin, bitmap_size, heap_begin); + return new SpaceBitmap(name, mem_map, bitmap_begin, bitmap_size, heap_begin, heap_capacity); } template<size_t kAlignment> -SpaceBitmap<kAlignment>::SpaceBitmap(const std::string& name, MemMap* mem_map, uintptr_t* bitmap_begin, - size_t bitmap_size, const void* heap_begin) +SpaceBitmap<kAlignment>::SpaceBitmap(const std::string& name, + MemMap* mem_map, + uintptr_t* bitmap_begin, + size_t bitmap_size, + const void* heap_begin, + size_t heap_capacity) : mem_map_(mem_map), bitmap_begin_(reinterpret_cast<Atomic<uintptr_t>*>(bitmap_begin)), bitmap_size_(bitmap_size), heap_begin_(reinterpret_cast<uintptr_t>(heap_begin)), + heap_limit_(reinterpret_cast<uintptr_t>(heap_begin) + heap_capacity), name_(name) { CHECK(bitmap_begin_ != nullptr); CHECK_NE(bitmap_size, 0U); @@ -89,6 +94,7 @@ void SpaceBitmap<kAlignment>::SetHeapLimit(uintptr_t new_end) { if (new_size < bitmap_size_) { bitmap_size_ = new_size; } + heap_limit_ = new_end; // Not sure if doing this trim is necessary, since nothing past the end of the heap capacity // should be marked. } diff --git a/runtime/gc/accounting/space_bitmap.h b/runtime/gc/accounting/space_bitmap.h index 2fe6394c0f..b49e0b7f89 100644 --- a/runtime/gc/accounting/space_bitmap.h +++ b/runtime/gc/accounting/space_bitmap.h @@ -163,6 +163,7 @@ class SpaceBitmap { void SetHeapSize(size_t bytes) { // TODO: Un-map the end of the mem map. + heap_limit_ = heap_begin_ + bytes; bitmap_size_ = OffsetToIndex(bytes) * sizeof(intptr_t); CHECK_EQ(HeapSize(), bytes); } @@ -173,7 +174,7 @@ class SpaceBitmap { // The maximum address which the bitmap can span. (HeapBegin() <= object < HeapLimit()). uint64_t HeapLimit() const { - return static_cast<uint64_t>(HeapBegin()) + HeapSize(); + return heap_limit_; } // Set the max address which can covered by the bitmap. @@ -196,8 +197,12 @@ class SpaceBitmap { private: // TODO: heap_end_ is initialized so that the heap bitmap is empty, this doesn't require the -1, // however, we document that this is expected on heap_end_ - SpaceBitmap(const std::string& name, MemMap* mem_map, uintptr_t* bitmap_begin, size_t bitmap_size, - const void* heap_begin); + SpaceBitmap(const std::string& name, + MemMap* mem_map, + uintptr_t* bitmap_begin, + size_t bitmap_size, + const void* heap_begin, + size_t heap_capacity); template<bool kSetBit> bool Modify(const mirror::Object* obj); @@ -211,10 +216,13 @@ class SpaceBitmap { // Size of this bitmap. size_t bitmap_size_; - // The base address of the heap, which corresponds to the word containing the first bit in the - // bitmap. + // The start address of the memory covered by the bitmap, which corresponds to the word + // containing the first bit in the bitmap. const uintptr_t heap_begin_; + // The end address of the memory covered by the bitmap. This may not be on a word boundary. + uintptr_t heap_limit_; + // Name of this bitmap. std::string name_; }; diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc index 9b24885f53..52b355dedd 100644 --- a/runtime/gc/collector/concurrent_copying.cc +++ b/runtime/gc/collector/concurrent_copying.cc @@ -2464,6 +2464,7 @@ mirror::Object* ConcurrentCopying::IsMarked(mirror::Object* from_ref) { } bool ConcurrentCopying::IsOnAllocStack(mirror::Object* ref) { + // TODO: Explain why this is here. What release operation does it pair with? QuasiAtomic::ThreadFenceAcquire(); accounting::ObjectStack* alloc_stack = GetAllocationStack(); return alloc_stack->Contains(ref); @@ -2624,9 +2625,8 @@ bool ConcurrentCopying::IsNullOrMarkedHeapReference(mirror::HeapReference<mirror } } while (!field->CasWeakRelaxed(from_ref, to_ref)); } else { - QuasiAtomic::ThreadFenceRelease(); - field->Assign(to_ref); - QuasiAtomic::ThreadFenceSequentiallyConsistent(); + // TODO: Why is this seq_cst when the above is relaxed? Document memory ordering. + field->Assign</* kIsVolatile */ true>(to_ref); } } return true; diff --git a/runtime/gc/collector/garbage_collector.h b/runtime/gc/collector/garbage_collector.h index dec206be30..f722e8d855 100644 --- a/runtime/gc/collector/garbage_collector.h +++ b/runtime/gc/collector/garbage_collector.h @@ -113,7 +113,7 @@ class GarbageCollector : public RootVisitor, public IsMarkedVisitor, public Mark virtual mirror::Object* IsMarked(mirror::Object* obj) REQUIRES_SHARED(Locks::mutator_lock_) = 0; // Returns true if the given heap reference is null or is already marked. If it's already marked, - // update the reference (uses a CAS if do_atomic_update is true. Otherwise, returns false. + // update the reference (uses a CAS if do_atomic_update is true). Otherwise, returns false. virtual bool IsNullOrMarkedHeapReference(mirror::HeapReference<mirror::Object>* obj, bool do_atomic_update) REQUIRES_SHARED(Locks::mutator_lock_) = 0; diff --git a/runtime/gc/collector/semi_space-inl.h b/runtime/gc/collector/semi_space-inl.h index 78fb2d24ae..7db5d2ca20 100644 --- a/runtime/gc/collector/semi_space-inl.h +++ b/runtime/gc/collector/semi_space-inl.h @@ -38,9 +38,8 @@ inline mirror::Object* SemiSpace::GetForwardingAddressInFromSpace(mirror::Object // Used to mark and copy objects. Any newly-marked objects who are in the from space Get moved to // the to-space and have their forward address updated. Objects which have been newly marked are // pushed on the mark stack. -template<bool kPoisonReferences> -inline void SemiSpace::MarkObject( - mirror::ObjectReference<kPoisonReferences, mirror::Object>* obj_ptr) { +template<typename CompressedReferenceType> +inline void SemiSpace::MarkObject(CompressedReferenceType* obj_ptr) { mirror::Object* obj = obj_ptr->AsMirrorPtr(); if (obj == nullptr) { return; @@ -73,9 +72,8 @@ inline void SemiSpace::MarkObject( } } -template<bool kPoisonReferences> -inline void SemiSpace::MarkObjectIfNotInToSpace( - mirror::ObjectReference<kPoisonReferences, mirror::Object>* obj_ptr) { +template<typename CompressedReferenceType> +inline void SemiSpace::MarkObjectIfNotInToSpace(CompressedReferenceType* obj_ptr) { if (!to_space_->HasAddress(obj_ptr->AsMirrorPtr())) { MarkObject(obj_ptr); } diff --git a/runtime/gc/collector/semi_space.h b/runtime/gc/collector/semi_space.h index fd52da3947..6d4d789a72 100644 --- a/runtime/gc/collector/semi_space.h +++ b/runtime/gc/collector/semi_space.h @@ -97,13 +97,13 @@ class SemiSpace : public GarbageCollector { // Find the default mark bitmap. void FindDefaultMarkBitmap(); - // Updates obj_ptr if the object has moved. - template<bool kPoisonReferences> - void MarkObject(mirror::ObjectReference<kPoisonReferences, mirror::Object>* obj_ptr) + // Updates obj_ptr if the object has moved. Takes either an ObjectReference or a HeapReference. + template<typename CompressedReferenceType> + void MarkObject(CompressedReferenceType* obj_ptr) REQUIRES(Locks::heap_bitmap_lock_, Locks::mutator_lock_); - template<bool kPoisonReferences> - void MarkObjectIfNotInToSpace(mirror::ObjectReference<kPoisonReferences, mirror::Object>* obj_ptr) + template<typename CompressedReferenceType> + void MarkObjectIfNotInToSpace(CompressedReferenceType* obj_ptr) REQUIRES(Locks::heap_bitmap_lock_, Locks::mutator_lock_); virtual mirror::Object* MarkObject(mirror::Object* root) OVERRIDE diff --git a/runtime/gc/space/large_object_space_test.cc b/runtime/gc/space/large_object_space_test.cc index 79b775a510..9baa016dcd 100644 --- a/runtime/gc/space/large_object_space_test.cc +++ b/runtime/gc/space/large_object_space_test.cc @@ -38,12 +38,19 @@ void LargeObjectSpaceTest::LargeObjectTest() { Thread* const self = Thread::Current(); for (size_t i = 0; i < 2; ++i) { LargeObjectSpace* los = nullptr; + const size_t capacity = 128 * MB; if (i == 0) { los = space::LargeObjectMapSpace::Create("large object space"); } else { - los = space::FreeListSpace::Create("large object space", nullptr, 128 * MB); + los = space::FreeListSpace::Create("large object space", nullptr, capacity); } + // Make sure the bitmap is not empty and actually covers at least how much we expect. + CHECK_LT(static_cast<uintptr_t>(los->GetLiveBitmap()->HeapBegin()), + static_cast<uintptr_t>(los->GetLiveBitmap()->HeapLimit())); + CHECK_LE(static_cast<uintptr_t>(los->GetLiveBitmap()->HeapBegin() + capacity), + static_cast<uintptr_t>(los->GetLiveBitmap()->HeapLimit())); + static const size_t num_allocations = 64; static const size_t max_allocation_size = 0x100000; std::vector<std::pair<mirror::Object*, size_t>> requests; diff --git a/runtime/gc/space/malloc_space.cc b/runtime/gc/space/malloc_space.cc index c2a8de3aec..c99412785e 100644 --- a/runtime/gc/space/malloc_space.cc +++ b/runtime/gc/space/malloc_space.cc @@ -191,14 +191,10 @@ ZygoteSpace* MallocSpace::CreateZygoteSpace(const char* alloc_space_name, bool l VLOG(heap) << "Size " << GetMemMap()->Size(); VLOG(heap) << "GrowthLimit " << PrettySize(growth_limit); VLOG(heap) << "Capacity " << PrettySize(capacity); - // Remap the tail. Pass MAP_PRIVATE since we don't want to share the same ashmem as the zygote - // space. + // Remap the tail. std::string error_msg; - std::unique_ptr<MemMap> mem_map(GetMemMap()->RemapAtEnd(End(), - alloc_space_name, - PROT_READ | PROT_WRITE, - MAP_PRIVATE, - &error_msg)); + std::unique_ptr<MemMap> mem_map(GetMemMap()->RemapAtEnd(End(), alloc_space_name, + PROT_READ | PROT_WRITE, &error_msg)); CHECK(mem_map.get() != nullptr) << error_msg; void* allocator = CreateAllocator(End(), starting_size_, initial_size_, capacity, low_memory_mode); diff --git a/runtime/interpreter/interpreter_common.h b/runtime/interpreter/interpreter_common.h index b228e28a22..5b942f2e73 100644 --- a/runtime/interpreter/interpreter_common.h +++ b/runtime/interpreter/interpreter_common.h @@ -141,11 +141,8 @@ static inline bool DoFastInvoke(Thread* self, return false; } else { jit::Jit* jit = Runtime::Current()->GetJit(); - if (jit != nullptr) { - if (type == kVirtual) { - jit->InvokeVirtualOrInterface(receiver, sf_method, shadow_frame.GetDexPC(), called_method); - } - jit->AddSamples(self, sf_method, 1, /*with_backedges*/false); + if (jit != nullptr && type == kVirtual) { + jit->InvokeVirtualOrInterface(receiver, sf_method, shadow_frame.GetDexPC(), called_method); } if (called_method->IsIntrinsic()) { if (MterpHandleIntrinsic(&shadow_frame, called_method, inst, inst_data, @@ -182,11 +179,8 @@ static inline bool DoInvoke(Thread* self, return false; } else { jit::Jit* jit = Runtime::Current()->GetJit(); - if (jit != nullptr) { - if (type == kVirtual || type == kInterface) { - jit->InvokeVirtualOrInterface(receiver, sf_method, shadow_frame.GetDexPC(), called_method); - } - jit->AddSamples(self, sf_method, 1, /*with_backedges*/false); + if (jit != nullptr && (type == kVirtual || type == kInterface)) { + jit->InvokeVirtualOrInterface(receiver, sf_method, shadow_frame.GetDexPC(), called_method); } // TODO: Remove the InvokeVirtualOrInterface instrumentation, as it was only used by the JIT. if (type == kVirtual || type == kInterface) { diff --git a/runtime/interpreter/mterp/mterp.cc b/runtime/interpreter/mterp/mterp.cc index 5955b9001a..88254a8cdc 100644 --- a/runtime/interpreter/mterp/mterp.cc +++ b/runtime/interpreter/mterp/mterp.cc @@ -280,7 +280,6 @@ extern "C" size_t MterpInvokeVirtualQuick(Thread* self, if (jit != nullptr) { jit->InvokeVirtualOrInterface( receiver, shadow_frame->GetMethod(), shadow_frame->GetDexPC(), called_method); - jit->AddSamples(self, shadow_frame->GetMethod(), 1, /*with_backedges*/false); } return !self->IsExceptionPending(); } diff --git a/runtime/interpreter/unstarted_runtime.cc b/runtime/interpreter/unstarted_runtime.cc index 2c7282115f..ce06a036ec 100644 --- a/runtime/interpreter/unstarted_runtime.cc +++ b/runtime/interpreter/unstarted_runtime.cc @@ -1438,7 +1438,11 @@ void UnstartedRuntime::UnstartedUnsafeCompareAndSwapObject( mirror::HeapReference<mirror::Object>* field_addr = reinterpret_cast<mirror::HeapReference<mirror::Object>*>( reinterpret_cast<uint8_t*>(obj) + static_cast<size_t>(offset)); - ReadBarrier::Barrier<mirror::Object, kWithReadBarrier, /* kAlwaysUpdateField */ true>( + ReadBarrier::Barrier< + mirror::Object, + /* kIsVolatile */ false, + kWithReadBarrier, + /* kAlwaysUpdateField */ true>( obj, MemberOffset(offset), field_addr); diff --git a/runtime/jit/jit_code_cache.cc b/runtime/jit/jit_code_cache.cc index 31319f1106..47ace7fa71 100644 --- a/runtime/jit/jit_code_cache.cc +++ b/runtime/jit/jit_code_cache.cc @@ -47,13 +47,9 @@ namespace jit { static constexpr int kProtAll = PROT_READ | PROT_WRITE | PROT_EXEC; static constexpr int kProtData = PROT_READ | PROT_WRITE; static constexpr int kProtCode = PROT_READ | PROT_EXEC; -static constexpr int kProtReadOnly = PROT_READ; -static constexpr int kProtNone = PROT_NONE; static constexpr size_t kCodeSizeLogThreshold = 50 * KB; static constexpr size_t kStackMapSizeLogThreshold = 50 * KB; -static constexpr size_t kMinMapSpacingPages = 1; -static constexpr size_t kMaxMapSpacingPages = 128; #define CHECKED_MPROTECT(memory, size, prot) \ do { \ @@ -64,39 +60,12 @@ static constexpr size_t kMaxMapSpacingPages = 128; } \ } while (false) \ -static MemMap* SplitMemMap(MemMap* existing_map, - const char* name, - size_t split_offset, - int split_prot, - std::string* error_msg, - bool use_ashmem, - unique_fd* shmem_fd = nullptr) { - std::string error_str; - uint8_t* divider = existing_map->Begin() + split_offset; - MemMap* new_map = existing_map->RemapAtEnd(divider, - name, - split_prot, - MAP_SHARED, - &error_str, - use_ashmem, - shmem_fd); - if (new_map == nullptr) { - std::ostringstream oss; - oss << "Failed to create spacing for " << name << ": " - << error_str << " offset=" << split_offset; - *error_msg = oss.str(); - return nullptr; - } - return new_map; -} - JitCodeCache* JitCodeCache::Create(size_t initial_capacity, size_t max_capacity, bool generate_debug_info, std::string* error_msg) { ScopedTrace trace(__PRETTY_FUNCTION__); - CHECK_GT(max_capacity, initial_capacity); - CHECK_GE(max_capacity - kMaxMapSpacingPages * kPageSize, initial_capacity); + CHECK_GE(max_capacity, initial_capacity); // Generating debug information is for using the Linux perf tool on // host which does not work with ashmem. @@ -106,10 +75,6 @@ JitCodeCache* JitCodeCache::Create(size_t initial_capacity, // With 'perf', we want a 1-1 mapping between an address and a method. bool garbage_collect_code = !generate_debug_info; - // We only use two mappings (separating rw from rx) if we are able to use ashmem. - // See the above comment for debug information and not using ashmem. - bool use_two_mappings = use_ashmem; - // We need to have 32 bit offsets from method headers in code cache which point to things // in the data cache. If the maps are more than 4G apart, having multiple maps wouldn't work. // Ensure we're below 1 GB to be safe. @@ -121,10 +86,6 @@ JitCodeCache* JitCodeCache::Create(size_t initial_capacity, return nullptr; } - // Align both capacities to page size, as that's the unit mspaces use. - initial_capacity = RoundDown(initial_capacity, 2 * kPageSize); - max_capacity = RoundDown(max_capacity, 2 * kPageSize); - std::string error_str; // Map name specific for android_os_Debug.cpp accounting. // Map in low 4gb to simplify accessing root tables for x86_64. @@ -146,138 +107,35 @@ JitCodeCache* JitCodeCache::Create(size_t initial_capacity, return nullptr; } - // Create a region for JIT data and executable code. This will be - // laid out as: - // - // +----------------+ -------------------- - // | code_sync_map_ | ^ code_sync_size ^ - // | | v | - // +----------------+ -- | - // : : ^ | - // : post_code_map : | post_code_size | - // : [padding] : v | - // +----------------+ - | - // | | ^ | - // | code_map | | code_size | total_mapping_size - // | [JIT Code] | v | - // +----------------+ - | - // : : ^ | - // : pre_code_map : | pre_code_size | - // : [padding] : v | - // +----------------+ - | - // | | ^ | - // | data_map | | data_size | - // | [Jit Data] | v v - // +----------------+ -------------------- - // - // The code_sync_map_ contains a page that we use flush CPU instruction - // pipelines (see FlushInstructionPipelines()). - // - // The padding regions - pre_code_map and post_code_map - exist to - // put some random distance between the writable JIT code mapping - // and the executable mapping. The padding is discarded at the end - // of this function. - // - size_t data_size = (max_capacity - kMaxMapSpacingPages * kPageSize) / 2; - size_t pre_code_size = - GetRandomNumber(kMinMapSpacingPages, kMaxMapSpacingPages - 1) * kPageSize; - size_t code_size = max_capacity - data_size - kMaxMapSpacingPages * kPageSize; - size_t code_sync_size = kPageSize; - size_t post_code_size = kMaxMapSpacingPages * kPageSize - pre_code_size - code_sync_size; - DCHECK_EQ(data_size, code_size); - DCHECK_EQ(pre_code_size + post_code_size + code_sync_size, kMaxMapSpacingPages * kPageSize); - DCHECK_EQ(data_size + pre_code_size + code_size + post_code_size + code_sync_size, max_capacity); - - // Create pre-code padding region after data region, discarded after - // code and data regions are set-up. - std::unique_ptr<MemMap> pre_code_map(SplitMemMap(data_map.get(), - "jit-code-cache-padding", - data_size, - kProtNone, - error_msg, - use_ashmem)); - if (pre_code_map == nullptr) { - return nullptr; - } - DCHECK_EQ(data_map->Size(), data_size); - DCHECK_EQ(pre_code_map->Size(), pre_code_size + code_size + post_code_size + code_sync_size); - - // Create code region. - unique_fd writable_code_fd; - std::unique_ptr<MemMap> code_map(SplitMemMap(pre_code_map.get(), - "jit-code-cache", - pre_code_size, - use_two_mappings ? kProtCode : kProtAll, - error_msg, - use_ashmem, - &writable_code_fd)); - if (code_map == nullptr) { - return nullptr; - } - DCHECK_EQ(pre_code_map->Size(), pre_code_size); - DCHECK_EQ(code_map->Size(), code_size + post_code_size + code_sync_size); - - // Padding after code region, discarded after code and data regions - // are set-up. - std::unique_ptr<MemMap> post_code_map(SplitMemMap(code_map.get(), - "jit-code-cache-padding", - code_size, - kProtNone, - error_msg, - use_ashmem)); - if (post_code_map == nullptr) { - return nullptr; - } - DCHECK_EQ(code_map->Size(), code_size); - DCHECK_EQ(post_code_map->Size(), post_code_size + code_sync_size); + // Align both capacities to page size, as that's the unit mspaces use. + initial_capacity = RoundDown(initial_capacity, 2 * kPageSize); + max_capacity = RoundDown(max_capacity, 2 * kPageSize); + + // Data cache is 1 / 2 of the map. + // TODO: Make this variable? + size_t data_size = max_capacity / 2; + size_t code_size = max_capacity - data_size; + DCHECK_EQ(code_size + data_size, max_capacity); + uint8_t* divider = data_map->Begin() + data_size; - std::unique_ptr<MemMap> code_sync_map(SplitMemMap(post_code_map.get(), - "jit-code-sync", - post_code_size, - kProtCode, - error_msg, - use_ashmem)); - if (code_sync_map == nullptr) { + MemMap* code_map = + data_map->RemapAtEnd(divider, "jit-code-cache", kProtAll, &error_str, use_ashmem); + if (code_map == nullptr) { + std::ostringstream oss; + oss << "Failed to create read write execute cache: " << error_str << " size=" << max_capacity; + *error_msg = oss.str(); return nullptr; } - DCHECK_EQ(post_code_map->Size(), post_code_size); - DCHECK_EQ(code_sync_map->Size(), code_sync_size); - - std::unique_ptr<MemMap> writable_code_map; - if (use_two_mappings) { - // Allocate the R/W view. - writable_code_map.reset(MemMap::MapFile(code_size, - kProtData, - MAP_SHARED, - writable_code_fd.get(), - /* start */ 0, - /* low_4gb */ true, - "jit-writable-code", - &error_str)); - if (writable_code_map == nullptr) { - std::ostringstream oss; - oss << "Failed to create writable code cache: " << error_str << " size=" << code_size; - *error_msg = oss.str(); - return nullptr; - } - } + DCHECK_EQ(code_map->Begin(), divider); data_size = initial_capacity / 2; code_size = initial_capacity - data_size; DCHECK_EQ(code_size + data_size, initial_capacity); - return new JitCodeCache(writable_code_map.release(), - code_map.release(), - data_map.release(), - code_sync_map.release(), - code_size, - data_size, - max_capacity, - garbage_collect_code); + return new JitCodeCache( + code_map, data_map.release(), code_size, data_size, max_capacity, garbage_collect_code); } -JitCodeCache::JitCodeCache(MemMap* writable_code_map, - MemMap* executable_code_map, +JitCodeCache::JitCodeCache(MemMap* code_map, MemMap* data_map, - MemMap* code_sync_map, size_t initial_code_capacity, size_t initial_data_capacity, size_t max_capacity, @@ -285,10 +143,8 @@ JitCodeCache::JitCodeCache(MemMap* writable_code_map, : lock_("Jit code cache", kJitCodeCacheLock), lock_cond_("Jit code cache condition variable", lock_), collection_in_progress_(false), + code_map_(code_map), data_map_(data_map), - executable_code_map_(executable_code_map), - writable_code_map_(writable_code_map), - code_sync_map_(code_sync_map), max_capacity_(max_capacity), current_capacity_(initial_code_capacity + initial_data_capacity), code_end_(initial_code_capacity), @@ -308,8 +164,7 @@ JitCodeCache::JitCodeCache(MemMap* writable_code_map, inline_cache_cond_("Jit inline cache condition variable", lock_) { DCHECK_GE(max_capacity, initial_code_capacity + initial_data_capacity); - MemMap* writable_map = GetWritableMemMap(); - code_mspace_ = create_mspace_with_base(writable_map->Begin(), code_end_, false /*locked*/); + code_mspace_ = create_mspace_with_base(code_map_->Begin(), code_end_, false /*locked*/); data_mspace_ = create_mspace_with_base(data_map_->Begin(), data_end_, false /*locked*/); if (code_mspace_ == nullptr || data_mspace_ == nullptr) { @@ -318,10 +173,7 @@ JitCodeCache::JitCodeCache(MemMap* writable_code_map, SetFootprintLimit(current_capacity_); - if (writable_code_map_ != nullptr) { - CHECKED_MPROTECT(writable_code_map_->Begin(), writable_code_map_->Size(), kProtReadOnly); - } - CHECKED_MPROTECT(executable_code_map_->Begin(), executable_code_map_->Size(), kProtCode); + CHECKED_MPROTECT(code_map_->Begin(), code_map_->Size(), kProtCode); CHECKED_MPROTECT(data_map_->Begin(), data_map_->Size(), kProtData); VLOG(jit) << "Created jit code cache: initial data size=" @@ -331,7 +183,7 @@ JitCodeCache::JitCodeCache(MemMap* writable_code_map, } bool JitCodeCache::ContainsPc(const void* ptr) const { - return executable_code_map_->Begin() <= ptr && ptr < executable_code_map_->End(); + return code_map_->Begin() <= ptr && ptr < code_map_->End(); } bool JitCodeCache::ContainsMethod(ArtMethod* method) { @@ -344,96 +196,27 @@ bool JitCodeCache::ContainsMethod(ArtMethod* method) { return false; } -/* This method is only for CHECK/DCHECK that pointers are within to a region. */ -static bool IsAddressInMap(const void* addr, - const MemMap* mem_map, - const char* check_name) { - if (addr == nullptr || mem_map->HasAddress(addr)) { - return true; - } - LOG(ERROR) << "Is" << check_name << "Address " << addr - << " not in [" << reinterpret_cast<void*>(mem_map->Begin()) - << ", " << reinterpret_cast<void*>(mem_map->Begin() + mem_map->Size()) << ")"; - return false; -} - -bool JitCodeCache::IsDataAddress(const void* raw_addr) const { - return IsAddressInMap(raw_addr, data_map_.get(), "Data"); -} - -bool JitCodeCache::IsExecutableAddress(const void* raw_addr) const { - return IsAddressInMap(raw_addr, executable_code_map_.get(), "Executable"); -} - -bool JitCodeCache::IsWritableAddress(const void* raw_addr) const { - return IsAddressInMap(raw_addr, GetWritableMemMap(), "Writable"); -} - -// Convert one address within the source map to the same offset within the destination map. -static void* ConvertAddress(const void* source_address, - const MemMap* source_map, - const MemMap* destination_map) { - DCHECK(source_map->HasAddress(source_address)) << source_address; - ptrdiff_t offset = reinterpret_cast<const uint8_t*>(source_address) - source_map->Begin(); - uintptr_t address = reinterpret_cast<uintptr_t>(destination_map->Begin()) + offset; - return reinterpret_cast<void*>(address); -} - -template <typename T> -T* JitCodeCache::ToExecutableAddress(T* writable_address) const { - CHECK(IsWritableAddress(writable_address)); - if (writable_address == nullptr) { - return nullptr; - } - void* executable_address = ConvertAddress(writable_address, - GetWritableMemMap(), - executable_code_map_.get()); - CHECK(IsExecutableAddress(executable_address)); - return reinterpret_cast<T*>(executable_address); -} - -void* JitCodeCache::ToWritableAddress(const void* executable_address) const { - CHECK(IsExecutableAddress(executable_address)); - if (executable_address == nullptr) { - return nullptr; - } - void* writable_address = ConvertAddress(executable_address, - executable_code_map_.get(), - GetWritableMemMap()); - CHECK(IsWritableAddress(writable_address)); - return writable_address; -} - class ScopedCodeCacheWrite : ScopedTrace { public: - explicit ScopedCodeCacheWrite(JitCodeCache* code_cache) - : ScopedTrace("ScopedCodeCacheWrite") { + explicit ScopedCodeCacheWrite(MemMap* code_map, bool only_for_tlb_shootdown = false) + : ScopedTrace("ScopedCodeCacheWrite"), + code_map_(code_map), + only_for_tlb_shootdown_(only_for_tlb_shootdown) { ScopedTrace trace("mprotect all"); - int prot_to_start_writing = kProtAll; - if (code_cache->writable_code_map_ == nullptr) { - // If there is only one mapping, use the executable mapping and toggle between rwx and rx. - prot_to_start_writing = kProtAll; - prot_to_stop_writing_ = kProtCode; - } else { - // If there are two mappings, use the writable mapping and toggle between rw and r. - prot_to_start_writing = kProtData; - prot_to_stop_writing_ = kProtReadOnly; - } - writable_map_ = code_cache->GetWritableMemMap(); - // If we're using ScopedCacheWrite only for TLB shootdown, we limit the scope of mprotect to - // one page. - size_ = writable_map_->Size(); - CHECKED_MPROTECT(writable_map_->Begin(), size_, prot_to_start_writing); + CHECKED_MPROTECT( + code_map_->Begin(), only_for_tlb_shootdown_ ? kPageSize : code_map_->Size(), kProtAll); } ~ScopedCodeCacheWrite() { ScopedTrace trace("mprotect code"); - CHECKED_MPROTECT(writable_map_->Begin(), size_, prot_to_stop_writing_); + CHECKED_MPROTECT( + code_map_->Begin(), only_for_tlb_shootdown_ ? kPageSize : code_map_->Size(), kProtCode); } - private: - int prot_to_stop_writing_; - MemMap* writable_map_; - size_t size_; + MemMap* const code_map_; + + // If we're using ScopedCacheWrite only for TLB shootdown, we limit the scope of mprotect to + // one page. + const bool only_for_tlb_shootdown_; DISALLOW_COPY_AND_ASSIGN(ScopedCodeCacheWrite); }; @@ -448,6 +231,7 @@ uint8_t* JitCodeCache::CommitCode(Thread* self, size_t fp_spill_mask, const uint8_t* code, size_t code_size, + size_t data_size, bool osr, Handle<mirror::ObjectArray<mirror::Object>> roots, bool has_should_deoptimize_flag, @@ -462,6 +246,7 @@ uint8_t* JitCodeCache::CommitCode(Thread* self, fp_spill_mask, code, code_size, + data_size, osr, roots, has_should_deoptimize_flag, @@ -479,6 +264,7 @@ uint8_t* JitCodeCache::CommitCode(Thread* self, fp_spill_mask, code, code_size, + data_size, osr, roots, has_should_deoptimize_flag, @@ -540,10 +326,8 @@ static void FillRootTable(uint8_t* roots_data, Handle<mirror::ObjectArray<mirror } } -uint8_t* JitCodeCache::GetRootTable(const void* code_ptr, uint32_t* number_of_roots) { - CHECK(IsExecutableAddress(code_ptr)); +static uint8_t* GetRootTable(const void* code_ptr, uint32_t* number_of_roots = nullptr) { OatQuickMethodHeader* method_header = OatQuickMethodHeader::FromCodePointer(code_ptr); - // GetOptimizedCodeInfoPtr uses offsets relative to the EXECUTABLE address. uint8_t* data = method_header->GetOptimizedCodeInfoPtr(); uint32_t roots = GetNumberOfRoots(data); if (number_of_roots != nullptr) { @@ -588,8 +372,6 @@ static inline void ProcessWeakClass(GcRoot<mirror::Class>* root_ptr, void JitCodeCache::SweepRootTables(IsMarkedVisitor* visitor) { MutexLock mu(Thread::Current(), lock_); for (const auto& entry : method_code_map_) { - // GetRootTable takes an EXECUTABLE address. - CHECK(IsExecutableAddress(entry.first)); uint32_t number_of_roots = 0; uint8_t* roots_data = GetRootTable(entry.first, &number_of_roots); GcRoot<mirror::Object>* roots = reinterpret_cast<GcRoot<mirror::Object>*>(roots_data); @@ -627,19 +409,17 @@ void JitCodeCache::SweepRootTables(IsMarkedVisitor* visitor) { } } -void JitCodeCache::FreeCodeAndData(const void* code_ptr) { - CHECK(IsExecutableAddress(code_ptr)); +void JitCodeCache::FreeCode(const void* code_ptr) { + uintptr_t allocation = FromCodeToAllocation(code_ptr); // Notify native debugger that we are about to remove the code. // It does nothing if we are not using native debugger. DeleteJITCodeEntryForAddress(reinterpret_cast<uintptr_t>(code_ptr)); - // GetRootTable takes an EXECUTABLE address. FreeData(GetRootTable(code_ptr)); - FreeRawCode(reinterpret_cast<uint8_t*>(FromCodeToAllocation(code_ptr))); + FreeCode(reinterpret_cast<uint8_t*>(allocation)); } void JitCodeCache::FreeAllMethodHeaders( const std::unordered_set<OatQuickMethodHeader*>& method_headers) { - // method_headers are expected to be in the executable region. { MutexLock mu(Thread::Current(), *Locks::cha_lock_); Runtime::Current()->GetClassLinker()->GetClassHierarchyAnalysis() @@ -651,9 +431,9 @@ void JitCodeCache::FreeAllMethodHeaders( // so it's possible for the same method_header to start representing // different compile code. MutexLock mu(Thread::Current(), lock_); - ScopedCodeCacheWrite scc(this); + ScopedCodeCacheWrite scc(code_map_.get()); for (const OatQuickMethodHeader* method_header : method_headers) { - FreeCodeAndData(method_header->GetCode()); + FreeCode(method_header->GetCode()); } } @@ -670,10 +450,9 @@ void JitCodeCache::RemoveMethodsIn(Thread* self, const LinearAlloc& alloc) { // with the classlinker_classes_lock_ held, and suspending ourselves could // lead to a deadlock. { - ScopedCodeCacheWrite scc(this); + ScopedCodeCacheWrite scc(code_map_.get()); for (auto it = method_code_map_.begin(); it != method_code_map_.end();) { if (alloc.ContainsUnsafe(it->second)) { - CHECK(IsExecutableAddress(OatQuickMethodHeader::FromCodePointer(it->first))); method_headers.insert(OatQuickMethodHeader::FromCodePointer(it->first)); it = method_code_map_.erase(it); } else { @@ -765,129 +544,6 @@ static void ClearMethodCounter(ArtMethod* method, bool was_warm) { method->SetCounter(std::min(jit_warmup_threshold - 1, 1)); } -static void FlushInstructionPiplines(uint8_t* sync_page) { - // After updating the JIT code cache we need to force all CPUs to - // flush their instruction pipelines. In the absence of system call - // to do this explicitly, we can achieve this indirectly by toggling - // permissions on an executable page. This should send an IPI to - // each core to update the TLB entry with the interrupt raised on - // each core causing the instruction pipeline to be flushed. - CHECKED_MPROTECT(sync_page, kPageSize, kProtAll); - // Ensure the sync_page is present otherwise a TLB update may not be - // necessary. - sync_page[0] = 0; - CHECKED_MPROTECT(sync_page, kPageSize, kProtCode); -} - -#ifdef __aarch64__ - -static void FlushJitCodeCacheRange(uint8_t* code_ptr, - uint8_t* writable_ptr, - size_t code_size) { - // Cache maintenance instructions can cause permission faults when a - // page is not present (e.g. swapped out or not backed). These - // faults should be handled by the kernel, but a bug in some Linux - // kernels may surface these permission faults to user-land which - // does not currently deal with them (b/63885946). To work around - // this, we read a value from each page to fault it in before - // attempting to perform cache maintenance operations. - // - // For reference, this behavior is caused by this commit: - // https://android.googlesource.com/kernel/msm/+/3fbe6bc28a6b9939d0650f2f17eb5216c719950c - - // The cache-line size could be probed for from the CPU, but - // assuming a safe lower bound is safe for CPUs that have different - // cache-line sizes for big and little cores. - static const uintptr_t kSafeCacheLineSize = 32; - - // Ensure stores are present in L1 data cache. - __asm __volatile("dsb ish" ::: "memory"); - - volatile uint8_t mutant; - - // Push dirty cache-lines out to the point of unification (PoU). The - // point of unification is the first point in the cache/memory - // hierarchy where the instruction cache and data cache have the - // same view of memory. The PoU is where an instruction fetch will - // fetch the new code generated by the JIT. - // - // See: http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.den0024a/ch11s04.html - uintptr_t writable_addr = RoundDown(reinterpret_cast<uintptr_t>(writable_ptr), - kSafeCacheLineSize); - uintptr_t writable_end = RoundUp(reinterpret_cast<uintptr_t>(writable_ptr) + code_size, - kSafeCacheLineSize); - while (writable_addr < writable_end) { - // Read from the cache-line to minimize the chance that a cache - // maintenance instruction causes a fault (see kernel bug comment - // above). - mutant = *reinterpret_cast<const uint8_t*>(writable_addr); - - // Flush cache-line - __asm volatile("dc cvau, %0" :: "r"(writable_addr) : "memory"); - writable_addr += kSafeCacheLineSize; - } - - __asm __volatile("dsb ish" ::: "memory"); - - uintptr_t code_addr = RoundDown(reinterpret_cast<uintptr_t>(code_ptr), kSafeCacheLineSize); - const uintptr_t code_end = RoundUp(reinterpret_cast<uintptr_t>(code_ptr) + code_size, - kSafeCacheLineSize); - while (code_addr < code_end) { - // Read from the cache-line to minimize the chance that a cache - // maintenance instruction causes a fault (see kernel bug comment - // above). - mutant = *reinterpret_cast<const uint8_t*>(code_addr); - - // Invalidating the data cache line is only strictly necessary - // when the JIT code cache has two mappings (the default). We know - // this cache line is clean so this is just invalidating it (using - // "dc ivac" would be preferable, but counts as a write and this - // memory may not be mapped write permission). - __asm volatile("dc cvau, %0" :: "r"(code_addr) : "memory"); - - // Invalidate the instruction cache line to force instructions in - // range to be re-fetched following update. - __asm volatile("ic ivau, %0" :: "r"(code_addr) : "memory"); - - code_addr += kSafeCacheLineSize; - } - - // Wait for code cache invalidations to complete. - __asm __volatile("dsb ish" ::: "memory"); - - // Reset fetched instruction stream. - __asm __volatile("isb"); -} - -#else // __aarch64 - -static void FlushJitCodeCacheRange(uint8_t* code_ptr, - uint8_t* writable_ptr, - size_t code_size) { - if (writable_ptr != code_ptr) { - // When there are two mappings of the JIT code cache, RX and - // RW, flush the RW version first as we've just dirtied the - // cache lines with new code. Flushing the RX version first - // can cause a permission fault as the those addresses are not - // writable, but can appear dirty in the cache. There is a lot - // of potential subtlety here depending on how the cache is - // indexed and tagged. - // - // Flushing the RX version after the RW version is just - // invalidating cachelines in the instruction cache. This is - // necessary as the instruction cache will often have a - // different set of cache lines present and because the JIT - // code cache can start a new function at any boundary within - // a cache-line. - FlushDataCache(reinterpret_cast<char*>(writable_ptr), - reinterpret_cast<char*>(writable_ptr + code_size)); - } - FlushInstructionCache(reinterpret_cast<char*>(code_ptr), - reinterpret_cast<char*>(code_ptr + code_size)); -} - -#endif // __aarch64 - uint8_t* JitCodeCache::CommitCodeInternal(Thread* self, ArtMethod* method, uint8_t* stack_map, @@ -898,6 +554,7 @@ uint8_t* JitCodeCache::CommitCodeInternal(Thread* self, size_t fp_spill_mask, const uint8_t* code, size_t code_size, + size_t data_size, bool osr, Handle<mirror::ObjectArray<mirror::Object>> roots, bool has_should_deoptimize_flag, @@ -917,37 +574,35 @@ uint8_t* JitCodeCache::CommitCodeInternal(Thread* self, MutexLock mu(self, lock_); WaitForPotentialCollectionToComplete(self); { - ScopedCodeCacheWrite scc(this); + ScopedCodeCacheWrite scc(code_map_.get()); memory = AllocateCode(total_size); if (memory == nullptr) { return nullptr; } - uint8_t* writable_ptr = memory + header_size; - code_ptr = ToExecutableAddress(writable_ptr); - - std::copy(code, code + code_size, writable_ptr); - OatQuickMethodHeader* writable_method_header = - OatQuickMethodHeader::FromCodePointer(writable_ptr); - // We need to be able to write the OatQuickMethodHeader, so we use writable_method_header. - // Otherwise, the offsets encoded in OatQuickMethodHeader are used relative to an executable - // address, so we use code_ptr. - new (writable_method_header) OatQuickMethodHeader( + code_ptr = memory + header_size; + + std::copy(code, code + code_size, code_ptr); + method_header = OatQuickMethodHeader::FromCodePointer(code_ptr); + new (method_header) OatQuickMethodHeader( code_ptr - stack_map, code_ptr - method_info, frame_size_in_bytes, core_spill_mask, fp_spill_mask, code_size); - - FlushJitCodeCacheRange(code_ptr, writable_ptr, code_size); - FlushInstructionPiplines(code_sync_map_->Begin()); - + // Flush caches before we remove write permission because some ARMv8 Qualcomm kernels may + // trigger a segfault if a page fault occurs when requesting a cache maintenance operation. + // This is a kernel bug that we need to work around until affected devices (e.g. Nexus 5X and + // 6P) stop being supported or their kernels are fixed. + // + // For reference, this behavior is caused by this commit: + // https://android.googlesource.com/kernel/msm/+/3fbe6bc28a6b9939d0650f2f17eb5216c719950c + FlushInstructionCache(reinterpret_cast<char*>(code_ptr), + reinterpret_cast<char*>(code_ptr + code_size)); DCHECK(!Runtime::Current()->IsAotCompiler()); if (has_should_deoptimize_flag) { - writable_method_header->SetHasShouldDeoptimizeFlag(); + method_header->SetHasShouldDeoptimizeFlag(); } - // All the pointers exported from the cache are executable addresses. - method_header = ToExecutableAddress(writable_method_header); } number_of_compilations_++; @@ -986,14 +641,16 @@ uint8_t* JitCodeCache::CommitCodeInternal(Thread* self, // but below we still make the compiled code valid for the method. MutexLock mu(self, lock_); // Fill the root table before updating the entry point. - CHECK(IsDataAddress(roots_data)); DCHECK_EQ(FromStackMapToRoots(stack_map), roots_data); DCHECK_LE(roots_data, stack_map); FillRootTable(roots_data, roots); - - // Ensure the updates to the root table are visible with a store fence. - QuasiAtomic::ThreadFenceSequentiallyConsistent(); - + { + // Flush data cache, as compiled code references literals in it. + // We also need a TLB shootdown to act as memory barrier across cores. + ScopedCodeCacheWrite ccw(code_map_.get(), /* only_for_tlb_shootdown */ true); + FlushDataCache(reinterpret_cast<char*>(roots_data), + reinterpret_cast<char*>(roots_data + data_size)); + } method_code_map_.Put(code_ptr, method); if (osr) { number_of_osr_compilations_++; @@ -1041,11 +698,11 @@ bool JitCodeCache::RemoveMethod(ArtMethod* method, bool release_memory) { bool in_cache = false; { - ScopedCodeCacheWrite ccw(this); + ScopedCodeCacheWrite ccw(code_map_.get()); for (auto code_iter = method_code_map_.begin(); code_iter != method_code_map_.end();) { if (code_iter->second == method) { if (release_memory) { - FreeCodeAndData(code_iter->first); + FreeCode(code_iter->first); } code_iter = method_code_map_.erase(code_iter); in_cache = true; @@ -1099,10 +756,10 @@ void JitCodeCache::NotifyMethodRedefined(ArtMethod* method) { profiling_infos_.erase(profile); } method->SetProfilingInfo(nullptr); - ScopedCodeCacheWrite ccw(this); + ScopedCodeCacheWrite ccw(code_map_.get()); for (auto code_iter = method_code_map_.begin(); code_iter != method_code_map_.end();) { if (code_iter->second == method) { - FreeCodeAndData(code_iter->first); + FreeCode(code_iter->first); code_iter = method_code_map_.erase(code_iter); continue; } @@ -1168,7 +825,6 @@ void JitCodeCache::ClearData(Thread* self, uint8_t* stack_map_data, uint8_t* roots_data) { DCHECK_EQ(FromStackMapToRoots(stack_map_data), roots_data); - CHECK(IsDataAddress(roots_data)); MutexLock mu(self, lock_); FreeData(reinterpret_cast<uint8_t*>(roots_data)); } @@ -1290,11 +946,11 @@ void JitCodeCache::NotifyCollectionDone(Thread* self) { void JitCodeCache::SetFootprintLimit(size_t new_footprint) { size_t per_space_footprint = new_footprint / 2; - CHECK(IsAlignedParam(per_space_footprint, kPageSize)); + DCHECK(IsAlignedParam(per_space_footprint, kPageSize)); DCHECK_EQ(per_space_footprint * 2, new_footprint); mspace_set_footprint_limit(data_mspace_, per_space_footprint); { - ScopedCodeCacheWrite scc(this); + ScopedCodeCacheWrite scc(code_map_.get()); mspace_set_footprint_limit(code_mspace_, per_space_footprint); } } @@ -1370,8 +1026,8 @@ void JitCodeCache::GarbageCollectCache(Thread* self) { number_of_collections_++; live_bitmap_.reset(CodeCacheBitmap::Create( "code-cache-bitmap", - reinterpret_cast<uintptr_t>(executable_code_map_->Begin()), - reinterpret_cast<uintptr_t>(executable_code_map_->Begin() + current_capacity_ / 2))); + reinterpret_cast<uintptr_t>(code_map_->Begin()), + reinterpret_cast<uintptr_t>(code_map_->Begin() + current_capacity_ / 2))); collection_in_progress_ = true; } } @@ -1443,16 +1099,14 @@ void JitCodeCache::RemoveUnmarkedCode(Thread* self) { std::unordered_set<OatQuickMethodHeader*> method_headers; { MutexLock mu(self, lock_); - ScopedCodeCacheWrite scc(this); + ScopedCodeCacheWrite scc(code_map_.get()); // Iterate over all compiled code and remove entries that are not marked. for (auto it = method_code_map_.begin(); it != method_code_map_.end();) { const void* code_ptr = it->first; - CHECK(IsExecutableAddress(code_ptr)); uintptr_t allocation = FromCodeToAllocation(code_ptr); if (GetLiveBitmap()->Test(allocation)) { ++it; } else { - CHECK(IsExecutableAddress(it->first)); method_headers.insert(OatQuickMethodHeader::FromCodePointer(it->first)); it = method_code_map_.erase(it); } @@ -1495,7 +1149,6 @@ void JitCodeCache::DoCollection(Thread* self, bool collect_profiling_info) { for (const auto& it : method_code_map_) { ArtMethod* method = it.second; const void* code_ptr = it.first; - CHECK(IsExecutableAddress(code_ptr)); const OatQuickMethodHeader* method_header = OatQuickMethodHeader::FromCodePointer(code_ptr); if (method_header->GetEntryPoint() == method->GetEntryPointFromQuickCompiledCode()) { GetLiveBitmap()->AtomicTestAndSet(FromCodeToAllocation(code_ptr)); @@ -1521,7 +1174,6 @@ void JitCodeCache::DoCollection(Thread* self, bool collect_profiling_info) { // Free all profiling infos of methods not compiled nor being compiled. auto profiling_kept_end = std::remove_if(profiling_infos_.begin(), profiling_infos_.end(), [this] (ProfilingInfo* info) NO_THREAD_SAFETY_ANALYSIS { - CHECK(IsDataAddress(info)); const void* ptr = info->GetMethod()->GetEntryPointFromQuickCompiledCode(); // We have previously cleared the ProfilingInfo pointer in the ArtMethod in the hope // that the compiled code would not get revived. As mutator threads run concurrently, @@ -1582,7 +1234,6 @@ OatQuickMethodHeader* JitCodeCache::LookupMethodHeader(uintptr_t pc, ArtMethod* --it; const void* code_ptr = it->first; - CHECK(IsExecutableAddress(code_ptr)); OatQuickMethodHeader* method_header = OatQuickMethodHeader::FromCodePointer(code_ptr); if (!method_header->Contains(pc)) { return nullptr; @@ -1665,7 +1316,6 @@ ProfilingInfo* JitCodeCache::AddProfilingInfoInternal(Thread* self ATTRIBUTE_UNU // store in the ArtMethod's ProfilingInfo pointer. QuasiAtomic::ThreadFenceRelease(); - CHECK(IsDataAddress(info)); method->SetProfilingInfo(info); profiling_infos_.push_back(info); histogram_profiling_info_memory_use_.AddValue(profile_info_size); @@ -1678,8 +1328,7 @@ void* JitCodeCache::MoreCore(const void* mspace, intptr_t increment) NO_THREAD_S if (code_mspace_ == mspace) { size_t result = code_end_; code_end_ += increment; - MemMap* writable_map = GetWritableMemMap(); - return reinterpret_cast<void*>(result + writable_map->Begin()); + return reinterpret_cast<void*>(result + code_map_->Begin()); } else { DCHECK_EQ(data_mspace_, mspace); size_t result = data_end_; @@ -1831,7 +1480,6 @@ void JitCodeCache::DoneCompiling(ArtMethod* method, Thread* self ATTRIBUTE_UNUSE size_t JitCodeCache::GetMemorySizeOfCodePointer(const void* ptr) { MutexLock mu(Thread::Current(), lock_); - CHECK(IsExecutableAddress(ptr)); return mspace_usable_size(reinterpret_cast<const void*>(FromCodeToAllocation(ptr))); } @@ -1867,27 +1515,22 @@ uint8_t* JitCodeCache::AllocateCode(size_t code_size) { size_t header_size = RoundUp(sizeof(OatQuickMethodHeader), alignment); // Ensure the header ends up at expected instruction alignment. DCHECK_ALIGNED_PARAM(reinterpret_cast<uintptr_t>(result + header_size), alignment); - CHECK(IsWritableAddress(result)); used_memory_for_code_ += mspace_usable_size(result); return result; } -void JitCodeCache::FreeRawCode(void* code) { - CHECK(IsExecutableAddress(code)); - void* writable_code = ToWritableAddress(code); - used_memory_for_code_ -= mspace_usable_size(writable_code); - mspace_free(code_mspace_, writable_code); +void JitCodeCache::FreeCode(uint8_t* code) { + used_memory_for_code_ -= mspace_usable_size(code); + mspace_free(code_mspace_, code); } uint8_t* JitCodeCache::AllocateData(size_t data_size) { void* result = mspace_malloc(data_mspace_, data_size); - CHECK(IsDataAddress(reinterpret_cast<uint8_t*>(result))); used_memory_for_data_ += mspace_usable_size(result); return reinterpret_cast<uint8_t*>(result); } void JitCodeCache::FreeData(uint8_t* data) { - CHECK(IsDataAddress(data)); used_memory_for_data_ -= mspace_usable_size(data); mspace_free(data_mspace_, data); } diff --git a/runtime/jit/jit_code_cache.h b/runtime/jit/jit_code_cache.h index 175501f915..daa1d616a6 100644 --- a/runtime/jit/jit_code_cache.h +++ b/runtime/jit/jit_code_cache.h @@ -113,6 +113,7 @@ class JitCodeCache { size_t fp_spill_mask, const uint8_t* code, size_t code_size, + size_t data_size, bool osr, Handle<mirror::ObjectArray<mirror::Object>> roots, bool has_should_deoptimize_flag, @@ -228,8 +229,6 @@ class JitCodeCache { REQUIRES(!lock_) REQUIRES_SHARED(Locks::mutator_lock_); - uint8_t* GetRootTable(const void* code_ptr, uint32_t* number_of_roots = nullptr); - // The GC needs to disallow the reading of inline caches when it processes them, // to avoid having a class being used while it is being deleted. void AllowInlineCacheAccess() REQUIRES(!lock_); @@ -248,13 +247,9 @@ class JitCodeCache { } private: - friend class ScopedCodeCacheWrite; - // Take ownership of maps. JitCodeCache(MemMap* code_map, MemMap* data_map, - MemMap* writable_code_map, - MemMap* code_sync_map, size_t initial_code_capacity, size_t initial_data_capacity, size_t max_capacity, @@ -272,6 +267,7 @@ class JitCodeCache { size_t fp_spill_mask, const uint8_t* code, size_t code_size, + size_t data_size, bool osr, Handle<mirror::ObjectArray<mirror::Object>> roots, bool has_should_deoptimize_flag, @@ -296,7 +292,7 @@ class JitCodeCache { REQUIRES(!Locks::cha_lock_); // Free in the mspace allocations for `code_ptr`. - void FreeCodeAndData(const void* code_ptr) REQUIRES(lock_); + void FreeCode(const void* code_ptr) REQUIRES(lock_); // Number of bytes allocated in the code cache. size_t CodeCacheSizeLocked() REQUIRES(lock_); @@ -329,7 +325,7 @@ class JitCodeCache { bool CheckLiveCompiledCodeHasProfilingInfo() REQUIRES(lock_); - void FreeRawCode(void* code) REQUIRES(lock_); + void FreeCode(uint8_t* code) REQUIRES(lock_); uint8_t* AllocateCode(size_t code_size) REQUIRES(lock_); void FreeData(uint8_t* data) REQUIRES(lock_); uint8_t* AllocateData(size_t data_size) REQUIRES(lock_); @@ -339,61 +335,25 @@ class JitCodeCache { REQUIRES(!lock_) REQUIRES_SHARED(Locks::mutator_lock_); - MemMap* GetWritableMemMap() const { - if (writable_code_map_ == nullptr) { - // The system required us to map the JIT Code Cache RWX (see - // JitCodeCache::Create()). - return executable_code_map_.get(); - } else { - // Executable code is mapped RX, and writable code is mapped RW - // to the underlying same memory, but at a different address. - return writable_code_map_.get(); - } - } - - bool IsDataAddress(const void* raw_addr) const; - - bool IsExecutableAddress(const void* raw_addr) const; - - bool IsWritableAddress(const void* raw_addr) const; - - template <typename T> - T* ToExecutableAddress(T* writable_address) const; - - void* ToWritableAddress(const void* executable_address) const; - // Lock for guarding allocations, collections, and the method_code_map_. Mutex lock_; // Condition to wait on during collection. ConditionVariable lock_cond_ GUARDED_BY(lock_); // Whether there is a code cache collection in progress. bool collection_in_progress_ GUARDED_BY(lock_); - // JITting methods obviously requires both write and execute permissions on a region of memory. - // In tye typical (non-debugging) case, we separate the memory mapped view that can write the code - // from a view that the runtime uses to execute the code. Having these two views eliminates any - // single address region having rwx permissions. An attacker could still write the writable - // address and then execute the executable address. We allocate the mappings with a random - // address relationship to each other which makes the attacker need two addresses rather than - // just one. In the debugging case there is no file descriptor to back the - // shared memory, and hence we have to use a single mapping. + // Mem map which holds code. + std::unique_ptr<MemMap> code_map_; // Mem map which holds data (stack maps and profiling info). std::unique_ptr<MemMap> data_map_; - // Mem map which holds a non-writable view of code for JIT. - std::unique_ptr<MemMap> executable_code_map_; - // Mem map which holds a non-executable view of code for JIT. - std::unique_ptr<MemMap> writable_code_map_; - // Mem map which holds one executable page that we use for flushing instruction - // fetch buffers. The code on this page is never executed. - std::unique_ptr<MemMap> code_sync_map_; // The opaque mspace for allocating code. void* code_mspace_ GUARDED_BY(lock_); // The opaque mspace for allocating data. void* data_mspace_ GUARDED_BY(lock_); // Bitmap for collecting code and data. std::unique_ptr<CodeCacheBitmap> live_bitmap_; - // Holds non-writable compiled code associated to the ArtMethod. + // Holds compiled code associated to the ArtMethod. SafeMap<const void*, ArtMethod*> method_code_map_ GUARDED_BY(lock_); - // Holds non-writable osr compiled code associated to the ArtMethod. + // Holds osr compiled code associated to the ArtMethod. SafeMap<ArtMethod*, const void*> osr_code_map_ GUARDED_BY(lock_); // ProfilingInfo objects we have allocated. std::vector<ProfilingInfo*> profiling_infos_ GUARDED_BY(lock_); diff --git a/runtime/mem_map.cc b/runtime/mem_map.cc index 4e82480aca..743604cc47 100644 --- a/runtime/mem_map.cc +++ b/runtime/mem_map.cc @@ -496,7 +496,7 @@ MemMap::~MemMap() { MEMORY_TOOL_MAKE_UNDEFINED(base_begin_, base_size_); int result = munmap(base_begin_, base_size_); if (result == -1) { - PLOG(FATAL) << "munmap failed: " << BaseBegin() << "..." << BaseEnd(); + PLOG(FATAL) << "munmap failed"; } } @@ -535,13 +535,8 @@ MemMap::MemMap(const std::string& name, uint8_t* begin, size_t size, void* base_ } } -MemMap* MemMap::RemapAtEnd(uint8_t* new_end, - const char* tail_name, - int tail_prot, - int sharing_flags, - std::string* error_msg, - bool use_ashmem, - unique_fd* shmem_fd) { +MemMap* MemMap::RemapAtEnd(uint8_t* new_end, const char* tail_name, int tail_prot, + std::string* error_msg, bool use_ashmem) { use_ashmem = use_ashmem && !kIsTargetLinux; DCHECK_GE(new_end, Begin()); DCHECK_LE(new_end, End()); @@ -560,12 +555,6 @@ MemMap* MemMap::RemapAtEnd(uint8_t* new_end, size_ = new_end - reinterpret_cast<uint8_t*>(begin_); base_size_ = new_base_end - reinterpret_cast<uint8_t*>(base_begin_); DCHECK_LE(begin_ + size_, reinterpret_cast<uint8_t*>(base_begin_) + base_size_); - if (base_size_ == 0u) { - // All pages in this MemMap have been handed out. Invalidate base - // pointer to prevent the destructor calling munmap() on - // zero-length region (which can't succeed). - base_begin_ = nullptr; - } size_t tail_size = old_end - new_end; uint8_t* tail_base_begin = new_base_end; size_t tail_base_size = old_base_end - new_base_end; @@ -573,14 +562,14 @@ MemMap* MemMap::RemapAtEnd(uint8_t* new_end, DCHECK_ALIGNED(tail_base_size, kPageSize); unique_fd fd; - int flags = MAP_ANONYMOUS | sharing_flags; + int flags = MAP_PRIVATE | MAP_ANONYMOUS; if (use_ashmem) { // android_os_Debug.cpp read_mapinfo assumes all ashmem regions associated with the VM are // prefixed "dalvik-". std::string debug_friendly_name("dalvik-"); debug_friendly_name += tail_name; fd.reset(ashmem_create_region(debug_friendly_name.c_str(), tail_base_size)); - flags = MAP_FIXED | sharing_flags; + flags = MAP_PRIVATE | MAP_FIXED; if (fd.get() == -1) { *error_msg = StringPrintf("ashmem_create_region failed for '%s': %s", tail_name, strerror(errno)); @@ -614,9 +603,6 @@ MemMap* MemMap::RemapAtEnd(uint8_t* new_end, fd.get()); return nullptr; } - if (shmem_fd != nullptr) { - shmem_fd->reset(fd.release()); - } return new MemMap(tail_name, actual, tail_size, actual, tail_base_size, tail_prot, false); } diff --git a/runtime/mem_map.h b/runtime/mem_map.h index d8908ad603..5603963eac 100644 --- a/runtime/mem_map.h +++ b/runtime/mem_map.h @@ -25,7 +25,6 @@ #include <string> #include "android-base/thread_annotations.h" -#include "android-base/unique_fd.h" namespace art { @@ -38,8 +37,6 @@ namespace art { #define USE_ART_LOW_4G_ALLOCATOR 0 #endif -using android::base::unique_fd; - #ifdef __linux__ static constexpr bool kMadviseZeroes = true; #else @@ -171,14 +168,11 @@ class MemMap { } // Unmap the pages at end and remap them to create another memory map. - // sharing_flags should be either MAP_PRIVATE or MAP_SHARED. MemMap* RemapAtEnd(uint8_t* new_end, const char* tail_name, int tail_prot, - int sharing_flags, std::string* error_msg, - bool use_ashmem = true, - unique_fd* shmem_fd = nullptr); + bool use_ashmem = true); static bool CheckNoGaps(MemMap* begin_map, MemMap* end_map) REQUIRES(!MemMap::mem_maps_lock_); diff --git a/runtime/mem_map_test.cc b/runtime/mem_map_test.cc index 99bf0045c4..a4ebb16d09 100644 --- a/runtime/mem_map_test.cc +++ b/runtime/mem_map_test.cc @@ -74,7 +74,6 @@ class MemMapTest : public CommonRuntimeTest { MemMap* m1 = m0->RemapAtEnd(base0 + page_size, "MemMapTest_RemapAtEndTest_map1", PROT_READ | PROT_WRITE, - MAP_PRIVATE, &error_msg); // Check the states of the two maps. EXPECT_EQ(m0->Begin(), base0) << error_msg; @@ -457,7 +456,6 @@ TEST_F(MemMapTest, AlignBy) { std::unique_ptr<MemMap> m1(m0->RemapAtEnd(base0 + 3 * page_size, "MemMapTest_AlignByTest_map1", PROT_READ | PROT_WRITE, - MAP_PRIVATE, &error_msg)); uint8_t* base1 = m1->Begin(); ASSERT_TRUE(base1 != nullptr) << error_msg; @@ -467,7 +465,6 @@ TEST_F(MemMapTest, AlignBy) { std::unique_ptr<MemMap> m2(m1->RemapAtEnd(base1 + 4 * page_size, "MemMapTest_AlignByTest_map2", PROT_READ | PROT_WRITE, - MAP_PRIVATE, &error_msg)); uint8_t* base2 = m2->Begin(); ASSERT_TRUE(base2 != nullptr) << error_msg; @@ -477,7 +474,6 @@ TEST_F(MemMapTest, AlignBy) { std::unique_ptr<MemMap> m3(m2->RemapAtEnd(base2 + 3 * page_size, "MemMapTest_AlignByTest_map1", PROT_READ | PROT_WRITE, - MAP_PRIVATE, &error_msg)); uint8_t* base3 = m3->Begin(); ASSERT_TRUE(base3 != nullptr) << error_msg; diff --git a/runtime/mirror/object-inl.h b/runtime/mirror/object-inl.h index 4f8952d8c2..6eb200d0d3 100644 --- a/runtime/mirror/object-inl.h +++ b/runtime/mirror/object-inl.h @@ -716,11 +716,10 @@ inline T* Object::GetFieldObject(MemberOffset field_offset) { } uint8_t* raw_addr = reinterpret_cast<uint8_t*>(this) + field_offset.Int32Value(); HeapReference<T>* objref_addr = reinterpret_cast<HeapReference<T>*>(raw_addr); - T* result = ReadBarrier::Barrier<T, kReadBarrierOption>(this, field_offset, objref_addr); - if (kIsVolatile) { - // TODO: Refactor to use a SequentiallyConsistent load instead. - QuasiAtomic::ThreadFenceAcquire(); // Ensure visibility of operations preceding store. - } + T* result = ReadBarrier::Barrier<T, kIsVolatile, kReadBarrierOption>( + this, + field_offset, + objref_addr); if (kVerifyFlags & kVerifyReads) { VerifyObject(result); } @@ -756,15 +755,7 @@ inline void Object::SetFieldObjectWithoutWriteBarrier(MemberOffset field_offset, } uint8_t* raw_addr = reinterpret_cast<uint8_t*>(this) + field_offset.Int32Value(); HeapReference<Object>* objref_addr = reinterpret_cast<HeapReference<Object>*>(raw_addr); - if (kIsVolatile) { - // TODO: Refactor to use a SequentiallyConsistent store instead. - QuasiAtomic::ThreadFenceRelease(); // Ensure that prior accesses are visible before store. - objref_addr->Assign(new_value.Ptr()); - QuasiAtomic::ThreadFenceSequentiallyConsistent(); - // Ensure this store occurs before any volatile loads. - } else { - objref_addr->Assign(new_value.Ptr()); - } + objref_addr->Assign<kIsVolatile>(new_value.Ptr()); } template<bool kTransactionActive, bool kCheckTransaction, VerifyObjectFlags kVerifyFlags, @@ -835,13 +826,12 @@ inline bool Object::CasFieldWeakSequentiallyConsistentObjectWithoutWriteBarrier( if (kTransactionActive) { Runtime::Current()->RecordWriteFieldReference(this, field_offset, old_value, true); } - HeapReference<Object> old_ref(HeapReference<Object>::FromObjPtr(old_value)); - HeapReference<Object> new_ref(HeapReference<Object>::FromObjPtr(new_value)); + uint32_t old_ref(PtrCompression<kPoisonHeapReferences, Object>::Compress(old_value)); + uint32_t new_ref(PtrCompression<kPoisonHeapReferences, Object>::Compress(new_value)); uint8_t* raw_addr = reinterpret_cast<uint8_t*>(this) + field_offset.Int32Value(); Atomic<uint32_t>* atomic_addr = reinterpret_cast<Atomic<uint32_t>*>(raw_addr); - bool success = atomic_addr->CompareExchangeWeakSequentiallyConsistent(old_ref.reference_, - new_ref.reference_); + bool success = atomic_addr->CompareExchangeWeakSequentiallyConsistent(old_ref, new_ref); return success; } @@ -877,13 +867,12 @@ inline bool Object::CasFieldStrongSequentiallyConsistentObjectWithoutWriteBarrie if (kTransactionActive) { Runtime::Current()->RecordWriteFieldReference(this, field_offset, old_value, true); } - HeapReference<Object> old_ref(HeapReference<Object>::FromObjPtr(old_value)); - HeapReference<Object> new_ref(HeapReference<Object>::FromObjPtr(new_value)); + uint32_t old_ref(PtrCompression<kPoisonHeapReferences, Object>::Compress(old_value)); + uint32_t new_ref(PtrCompression<kPoisonHeapReferences, Object>::Compress(new_value)); uint8_t* raw_addr = reinterpret_cast<uint8_t*>(this) + field_offset.Int32Value(); Atomic<uint32_t>* atomic_addr = reinterpret_cast<Atomic<uint32_t>*>(raw_addr); - bool success = atomic_addr->CompareExchangeStrongSequentiallyConsistent(old_ref.reference_, - new_ref.reference_); + bool success = atomic_addr->CompareExchangeStrongSequentiallyConsistent(old_ref, new_ref); return success; } @@ -907,13 +896,12 @@ inline bool Object::CasFieldWeakRelaxedObjectWithoutWriteBarrier( if (kTransactionActive) { Runtime::Current()->RecordWriteFieldReference(this, field_offset, old_value, true); } - HeapReference<Object> old_ref(HeapReference<Object>::FromObjPtr(old_value)); - HeapReference<Object> new_ref(HeapReference<Object>::FromObjPtr(new_value)); + uint32_t old_ref(PtrCompression<kPoisonHeapReferences, Object>::Compress(old_value)); + uint32_t new_ref(PtrCompression<kPoisonHeapReferences, Object>::Compress(new_value)); uint8_t* raw_addr = reinterpret_cast<uint8_t*>(this) + field_offset.Int32Value(); Atomic<uint32_t>* atomic_addr = reinterpret_cast<Atomic<uint32_t>*>(raw_addr); - bool success = atomic_addr->CompareExchangeWeakRelaxed(old_ref.reference_, - new_ref.reference_); + bool success = atomic_addr->CompareExchangeWeakRelaxed(old_ref, new_ref); return success; } @@ -937,13 +925,12 @@ inline bool Object::CasFieldWeakReleaseObjectWithoutWriteBarrier( if (kTransactionActive) { Runtime::Current()->RecordWriteFieldReference(this, field_offset, old_value, true); } - HeapReference<Object> old_ref(HeapReference<Object>::FromObjPtr(old_value)); - HeapReference<Object> new_ref(HeapReference<Object>::FromObjPtr(new_value)); + uint32_t old_ref(PtrCompression<kPoisonHeapReferences, Object>::Compress(old_value)); + uint32_t new_ref(PtrCompression<kPoisonHeapReferences, Object>::Compress(new_value)); uint8_t* raw_addr = reinterpret_cast<uint8_t*>(this) + field_offset.Int32Value(); Atomic<uint32_t>* atomic_addr = reinterpret_cast<Atomic<uint32_t>*>(raw_addr); - bool success = atomic_addr->CompareExchangeWeakRelease(old_ref.reference_, - new_ref.reference_); + bool success = atomic_addr->CompareExchangeWeakRelease(old_ref, new_ref); return success; } diff --git a/runtime/mirror/object-readbarrier-inl.h b/runtime/mirror/object-readbarrier-inl.h index 69365af7fd..f0769409d4 100644 --- a/runtime/mirror/object-readbarrier-inl.h +++ b/runtime/mirror/object-readbarrier-inl.h @@ -211,13 +211,12 @@ inline bool Object::CasFieldStrongRelaxedObjectWithoutWriteBarrier( if (kTransactionActive) { Runtime::Current()->RecordWriteFieldReference(this, field_offset, old_value, true); } - HeapReference<Object> old_ref(HeapReference<Object>::FromObjPtr(old_value)); - HeapReference<Object> new_ref(HeapReference<Object>::FromObjPtr(new_value)); + uint32_t old_ref(PtrCompression<kPoisonHeapReferences, Object>::Compress(old_value)); + uint32_t new_ref(PtrCompression<kPoisonHeapReferences, Object>::Compress(new_value)); uint8_t* raw_addr = reinterpret_cast<uint8_t*>(this) + field_offset.Int32Value(); Atomic<uint32_t>* atomic_addr = reinterpret_cast<Atomic<uint32_t>*>(raw_addr); - bool success = atomic_addr->CompareExchangeStrongRelaxed(old_ref.reference_, - new_ref.reference_); + bool success = atomic_addr->CompareExchangeStrongRelaxed(old_ref, new_ref); return success; } @@ -241,13 +240,12 @@ inline bool Object::CasFieldStrongReleaseObjectWithoutWriteBarrier( if (kTransactionActive) { Runtime::Current()->RecordWriteFieldReference(this, field_offset, old_value, true); } - HeapReference<Object> old_ref(HeapReference<Object>::FromObjPtr(old_value)); - HeapReference<Object> new_ref(HeapReference<Object>::FromObjPtr(new_value)); + uint32_t old_ref(PtrCompression<kPoisonHeapReferences, Object>::Compress(old_value)); + uint32_t new_ref(PtrCompression<kPoisonHeapReferences, Object>::Compress(new_value)); uint8_t* raw_addr = reinterpret_cast<uint8_t*>(this) + field_offset.Int32Value(); Atomic<uint32_t>* atomic_addr = reinterpret_cast<Atomic<uint32_t>*>(raw_addr); - bool success = atomic_addr->CompareExchangeStrongRelease(old_ref.reference_, - new_ref.reference_); + bool success = atomic_addr->CompareExchangeStrongRelease(old_ref, new_ref); return success; } diff --git a/runtime/mirror/object_reference-inl.h b/runtime/mirror/object_reference-inl.h index 22fb83cb5c..60f3ce153f 100644 --- a/runtime/mirror/object_reference-inl.h +++ b/runtime/mirror/object_reference-inl.h @@ -30,17 +30,10 @@ void ObjectReference<kPoisonReferences, MirrorType>::Assign(ObjPtr<MirrorType> p } template<class MirrorType> -HeapReference<MirrorType> HeapReference<MirrorType>::FromObjPtr(ObjPtr<MirrorType> ptr) { - return HeapReference<MirrorType>(ptr.Ptr()); -} - -template<class MirrorType> bool HeapReference<MirrorType>::CasWeakRelaxed(MirrorType* expected_ptr, MirrorType* new_ptr) { - HeapReference<Object> expected_ref(HeapReference<Object>::FromMirrorPtr(expected_ptr)); - HeapReference<Object> new_ref(HeapReference<Object>::FromMirrorPtr(new_ptr)); - Atomic<uint32_t>* atomic_reference = reinterpret_cast<Atomic<uint32_t>*>(&this->reference_); - return atomic_reference->CompareExchangeWeakRelaxed(expected_ref.reference_, - new_ref.reference_); + return reference_.CompareExchangeWeakRelaxed( + Compression::Compress(expected_ptr), + Compression::Compress(new_ptr)); } } // namespace mirror diff --git a/runtime/mirror/object_reference.h b/runtime/mirror/object_reference.h index a96a120d68..c62ee6cb61 100644 --- a/runtime/mirror/object_reference.h +++ b/runtime/mirror/object_reference.h @@ -17,6 +17,7 @@ #ifndef ART_RUNTIME_MIRROR_OBJECT_REFERENCE_H_ #define ART_RUNTIME_MIRROR_OBJECT_REFERENCE_H_ +#include "atomic.h" #include "base/mutex.h" // For Locks::mutator_lock_. #include "globals.h" #include "obj_ptr.h" @@ -30,20 +31,43 @@ class Object; // extra platform specific padding. #define MANAGED PACKED(4) +template<bool kPoisonReferences, class MirrorType> +class PtrCompression { + public: + // Compress reference to its bit representation. + static uint32_t Compress(MirrorType* mirror_ptr) { + uintptr_t as_bits = reinterpret_cast<uintptr_t>(mirror_ptr); + return static_cast<uint32_t>(kPoisonReferences ? -as_bits : as_bits); + } + + // Uncompress an encoded reference from its bit representation. + static MirrorType* Decompress(uint32_t ref) { + uintptr_t as_bits = kPoisonReferences ? -ref : ref; + return reinterpret_cast<MirrorType*>(as_bits); + } + + // Convert an ObjPtr to a compressed reference. + static uint32_t Compress(ObjPtr<MirrorType> ptr) REQUIRES_SHARED(Locks::mutator_lock_) { + return Compress(ptr.Ptr()); + } +}; + // Value type representing a reference to a mirror::Object of type MirrorType. template<bool kPoisonReferences, class MirrorType> class MANAGED ObjectReference { + private: + using Compression = PtrCompression<kPoisonReferences, MirrorType>; + public: - MirrorType* AsMirrorPtr() const REQUIRES_SHARED(Locks::mutator_lock_) { - return UnCompress(); + MirrorType* AsMirrorPtr() const { + return Compression::Decompress(reference_); } - void Assign(MirrorType* other) REQUIRES_SHARED(Locks::mutator_lock_) { - reference_ = Compress(other); + void Assign(MirrorType* other) { + reference_ = Compression::Compress(other); } - void Assign(ObjPtr<MirrorType> ptr) - REQUIRES_SHARED(Locks::mutator_lock_); + void Assign(ObjPtr<MirrorType> ptr) REQUIRES_SHARED(Locks::mutator_lock_); void Clear() { reference_ = 0; @@ -58,48 +82,71 @@ class MANAGED ObjectReference { return reference_; } - protected: - explicit ObjectReference(MirrorType* mirror_ptr) - REQUIRES_SHARED(Locks::mutator_lock_) - : reference_(Compress(mirror_ptr)) { - } - - // Compress reference to its bit representation. - static uint32_t Compress(MirrorType* mirror_ptr) REQUIRES_SHARED(Locks::mutator_lock_) { - uintptr_t as_bits = reinterpret_cast<uintptr_t>(mirror_ptr); - return static_cast<uint32_t>(kPoisonReferences ? -as_bits : as_bits); + static ObjectReference<kPoisonReferences, MirrorType> FromMirrorPtr(MirrorType* mirror_ptr) + REQUIRES_SHARED(Locks::mutator_lock_) { + return ObjectReference<kPoisonReferences, MirrorType>(mirror_ptr); } - // Uncompress an encoded reference from its bit representation. - MirrorType* UnCompress() const REQUIRES_SHARED(Locks::mutator_lock_) { - uintptr_t as_bits = kPoisonReferences ? -reference_ : reference_; - return reinterpret_cast<MirrorType*>(as_bits); + protected: + explicit ObjectReference(MirrorType* mirror_ptr) REQUIRES_SHARED(Locks::mutator_lock_) + : reference_(Compression::Compress(mirror_ptr)) { } - friend class Object; - // The encoded reference to a mirror::Object. uint32_t reference_; }; // References between objects within the managed heap. +// Similar API to ObjectReference, but not a value type. Supports atomic access. template<class MirrorType> -class MANAGED HeapReference : public ObjectReference<kPoisonHeapReferences, MirrorType> { +class MANAGED HeapReference { + private: + using Compression = PtrCompression<kPoisonHeapReferences, MirrorType>; + public: + HeapReference() REQUIRES_SHARED(Locks::mutator_lock_) : HeapReference(nullptr) {} + + template <bool kIsVolatile = false> + MirrorType* AsMirrorPtr() const REQUIRES_SHARED(Locks::mutator_lock_) { + return Compression::Decompress( + kIsVolatile ? reference_.LoadSequentiallyConsistent() : reference_.LoadJavaData()); + } + + template <bool kIsVolatile = false> + void Assign(MirrorType* other) REQUIRES_SHARED(Locks::mutator_lock_) { + if (kIsVolatile) { + reference_.StoreSequentiallyConsistent(Compression::Compress(other)); + } else { + reference_.StoreJavaData(Compression::Compress(other)); + } + } + + template <bool kIsVolatile = false> + void Assign(ObjPtr<MirrorType> ptr) REQUIRES_SHARED(Locks::mutator_lock_); + + void Clear() { + reference_.StoreJavaData(0); + DCHECK(IsNull()); + } + + bool IsNull() const { + return reference_.LoadJavaData() == 0; + } + static HeapReference<MirrorType> FromMirrorPtr(MirrorType* mirror_ptr) REQUIRES_SHARED(Locks::mutator_lock_) { return HeapReference<MirrorType>(mirror_ptr); } - static HeapReference<MirrorType> FromObjPtr(ObjPtr<MirrorType> ptr) - REQUIRES_SHARED(Locks::mutator_lock_); - bool CasWeakRelaxed(MirrorType* old_ptr, MirrorType* new_ptr) REQUIRES_SHARED(Locks::mutator_lock_); private: explicit HeapReference(MirrorType* mirror_ptr) REQUIRES_SHARED(Locks::mutator_lock_) - : ObjectReference<kPoisonHeapReferences, MirrorType>(mirror_ptr) {} + : reference_(Compression::Compress(mirror_ptr)) {} + + // The encoded reference to a mirror::Object. Atomically updateable. + Atomic<uint32_t> reference_; }; static_assert(sizeof(mirror::HeapReference<mirror::Object>) == kHeapReferenceSize, diff --git a/runtime/native/sun_misc_Unsafe.cc b/runtime/native/sun_misc_Unsafe.cc index 761362fcb2..b2bdeed5cb 100644 --- a/runtime/native/sun_misc_Unsafe.cc +++ b/runtime/native/sun_misc_Unsafe.cc @@ -67,10 +67,12 @@ static jboolean Unsafe_compareAndSwapObject(JNIEnv* env, jobject, jobject javaOb if (kUseReadBarrier) { // Need to make sure the reference stored in the field is a to-space one before attempting the // CAS or the CAS could fail incorrectly. + // Note that the read barrier load does NOT need to be volatile. mirror::HeapReference<mirror::Object>* field_addr = reinterpret_cast<mirror::HeapReference<mirror::Object>*>( reinterpret_cast<uint8_t*>(obj.Ptr()) + static_cast<size_t>(offset)); - ReadBarrier::Barrier<mirror::Object, kWithReadBarrier, /* kAlwaysUpdateField */ true>( + ReadBarrier::Barrier<mirror::Object, /* kIsVolatile */ false, kWithReadBarrier, + /* kAlwaysUpdateField */ true>( obj.Ptr(), MemberOffset(offset), field_addr); @@ -112,6 +114,7 @@ static void Unsafe_putOrderedInt(JNIEnv* env, jobject, jobject javaObj, jlong of jint newValue) { ScopedFastNativeObjectAccess soa(env); ObjPtr<mirror::Object> obj = soa.Decode<mirror::Object>(javaObj); + // TODO: A release store is likely to be faster on future processors. QuasiAtomic::ThreadFenceRelease(); // JNI must use non transactional mode. obj->SetField32<false>(MemberOffset(offset), newValue); diff --git a/runtime/read_barrier-inl.h b/runtime/read_barrier-inl.h index b0935c0b56..642459924e 100644 --- a/runtime/read_barrier-inl.h +++ b/runtime/read_barrier-inl.h @@ -33,7 +33,8 @@ namespace art { // Disabled for performance reasons. static constexpr bool kCheckDebugDisallowReadBarrierCount = false; -template <typename MirrorType, ReadBarrierOption kReadBarrierOption, bool kAlwaysUpdateField> +template <typename MirrorType, bool kIsVolatile, ReadBarrierOption kReadBarrierOption, + bool kAlwaysUpdateField> inline MirrorType* ReadBarrier::Barrier( mirror::Object* obj, MemberOffset offset, mirror::HeapReference<MirrorType>* ref_addr) { constexpr bool with_read_barrier = kReadBarrierOption == kWithReadBarrier; @@ -55,7 +56,7 @@ inline MirrorType* ReadBarrier::Barrier( } ref_addr = reinterpret_cast<mirror::HeapReference<MirrorType>*>( fake_address_dependency | reinterpret_cast<uintptr_t>(ref_addr)); - MirrorType* ref = ref_addr->AsMirrorPtr(); + MirrorType* ref = ref_addr->template AsMirrorPtr<kIsVolatile>(); MirrorType* old_ref = ref; if (is_gray) { // Slow-path. @@ -71,9 +72,9 @@ inline MirrorType* ReadBarrier::Barrier( return ref; } else if (kUseBrooksReadBarrier) { // To be implemented. - return ref_addr->AsMirrorPtr(); + return ref_addr->template AsMirrorPtr<kIsVolatile>(); } else if (kUseTableLookupReadBarrier) { - MirrorType* ref = ref_addr->AsMirrorPtr(); + MirrorType* ref = ref_addr->template AsMirrorPtr<kIsVolatile>(); MirrorType* old_ref = ref; // The heap or the collector can be null at startup. TODO: avoid the need for this null check. gc::Heap* heap = Runtime::Current()->GetHeap(); @@ -93,7 +94,7 @@ inline MirrorType* ReadBarrier::Barrier( } } else { // No read barrier. - return ref_addr->AsMirrorPtr(); + return ref_addr->template AsMirrorPtr<kIsVolatile>(); } } diff --git a/runtime/read_barrier.h b/runtime/read_barrier.h index d36acbcdc4..8a106aabb0 100644 --- a/runtime/read_barrier.h +++ b/runtime/read_barrier.h @@ -46,9 +46,13 @@ class ReadBarrier { // fast-debug environment. DECLARE_RUNTIME_DEBUG_FLAG(kEnableReadBarrierInvariantChecks); + // Return the reference at ref_addr, invoking read barrier as appropriate. + // Ref_addr is an address within obj. // It's up to the implementation whether the given field gets updated whereas the return value // must be an updated reference unless kAlwaysUpdateField is true. - template <typename MirrorType, ReadBarrierOption kReadBarrierOption = kWithReadBarrier, + template <typename MirrorType, + bool kIsVolatile, + ReadBarrierOption kReadBarrierOption = kWithReadBarrier, bool kAlwaysUpdateField = false> ALWAYS_INLINE static MirrorType* Barrier( mirror::Object* obj, MemberOffset offset, mirror::HeapReference<MirrorType>* ref_addr) |