40 files changed, 501 insertions, 689 deletions
diff --git a/compiler/dex/quick_compiler_callbacks.cc b/compiler/dex/quick_compiler_callbacks.cc
index 872f7ea15d..c7e9f4fc07 100644
--- a/compiler/dex/quick_compiler_callbacks.cc
+++ b/compiler/dex/quick_compiler_callbacks.cc
@@ -16,6 +16,7 @@
 
 #include "quick_compiler_callbacks.h"
 
+#include "driver/compiler_driver.h"
 #include "verification_results.h"
 #include "verifier/method_verifier-inl.h"
 
@@ -33,4 +34,17 @@ void QuickCompilerCallbacks::ClassRejected(ClassReference ref) {
   }
 }
 
+bool QuickCompilerCallbacks::CanAssumeVerified(ClassReference ref) {
+  // If we don't have class unloading enabled in the compiler, we will never see class that were
+  // previously verified. Return false to avoid overhead from the lookup in the compiler driver.
+  if (!does_class_unloading_) {
+    return false;
+  }
+  DCHECK(compiler_driver_ != nullptr);
+  // In the case of the quicken filter: avoiding verification of quickened instructions, which the
+  // verifier doesn't currently support.
+  // In the case of the verify filter, avoiding verifiying twice.
+  return compiler_driver_->CanAssumeVerified(ref);
+}
+
 }  // namespace art
diff --git a/compiler/dex/quick_compiler_callbacks.h b/compiler/dex/quick_compiler_callbacks.h
index a3a6c0972c..578aff45e5 100644
--- a/compiler/dex/quick_compiler_callbacks.h
+++ b/compiler/dex/quick_compiler_callbacks.h
@@ -22,6 +22,7 @@
 
 namespace art {
 
+class CompilerDriver;
 class VerificationResults;
 
 class QuickCompilerCallbacks FINAL : public CompilerCallbacks {
@@ -53,8 +54,19 @@ class QuickCompilerCallbacks FINAL : public CompilerCallbacks {
       verification_results_ = verification_results;
     }
 
+    bool CanAssumeVerified(ClassReference ref) OVERRIDE;
+
+    void SetDoesClassUnloading(bool does_class_unloading, CompilerDriver* compiler_driver)
+        OVERRIDE {
+      does_class_unloading_ = does_class_unloading;
+      compiler_driver_ = compiler_driver;
+      DCHECK(!does_class_unloading || compiler_driver_ != nullptr);
+    }
+
   private:
     VerificationResults* verification_results_ = nullptr;
+    bool does_class_unloading_ = false;
+    CompilerDriver* compiler_driver_ = nullptr;
     std::unique_ptr<verifier::VerifierDeps> verifier_deps_;
 };
 
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index 6e087c5785..037e45840d 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -3040,4 +3040,10 @@ void CompilerDriver::SetDexFilesForOatFile(const std::vector<const DexFile*>& de
   }
 }
 
+bool CompilerDriver::CanAssumeVerified(ClassReference ref) const {
+  mirror::Class::Status existing = mirror::Class::kStatusNotReady;
+  compiled_classes_.Get(DexFileReference(ref.first, ref.second), &existing);
+  return existing >= mirror::Class::kStatusVerified;
+}
+
 }  // namespace art
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index d9886a2fba..ba4581c9f4 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -377,6 +377,8 @@ class CompilerDriver {
     return profile_compilation_info_;
   }
 
+  bool CanAssumeVerified(ClassReference ref) const;
+
  private:
   void PreCompile(jobject class_loader,
                   const std::vector<const DexFile*>& dex_files,
diff --git a/compiler/driver/compiler_driver_test.cc b/compiler/driver/compiler_driver_test.cc
index 4da3e0df39..392d57c0f2 100644
--- a/compiler/driver/compiler_driver_test.cc
+++ b/compiler/driver/compiler_driver_test.cc
@@ -23,6 +23,7 @@
 #include "art_method-inl.h"
 #include "class_linker-inl.h"
 #include "common_compiler_test.h"
+#include "compiler_callbacks.h"
 #include "dex_file.h"
 #include "dex_file_types.h"
 #include "gc/heap.h"
@@ -368,7 +369,9 @@ TEST_F(CompilerDriverVerifyTest, VerifyCompilation) {
 
 // Test that a class of status kStatusRetryVerificationAtRuntime is indeed recorded that way in the
 // driver.
-TEST_F(CompilerDriverVerifyTest, RetryVerifcationStatus) {
+// Test that checks that classes can be assumed as verified if unloading mode is enabled and
+// the class status is at least verified.
+TEST_F(CompilerDriverVerifyTest, RetryVerifcationStatusCheckVerified) {
   Thread* const self = Thread::Current();
   jobject class_loader;
   std::vector<const DexFile*> dex_files;
@@ -382,6 +385,7 @@ TEST_F(CompilerDriverVerifyTest, RetryVerifcationStatus) {
     dex_file = dex_files.front();
   }
   compiler_driver_->SetDexFilesForOatFile(dex_files);
+  callbacks_->SetDoesClassUnloading(true, compiler_driver_.get());
   ClassReference ref(dex_file, 0u);
   // Test that the status is read from the compiler driver as expected.
   for (size_t i = mirror::Class::kStatusRetryVerificationAtRuntime;
@@ -397,6 +401,12 @@ TEST_F(CompilerDriverVerifyTest, RetryVerifcationStatus) {
     mirror::Class::Status status = {};
     ASSERT_TRUE(compiler_driver_->GetCompiledClass(ref, &status));
     EXPECT_EQ(status, expected_status);
+
+    // Check that we can assume verified if we are a status that is at least verified.
+    if (status >= mirror::Class::kStatusVerified) {
+      // Check that the class can be assumed as verified in the compiler driver.
+      EXPECT_TRUE(callbacks_->CanAssumeVerified(ref)) << status;
+    }
   }
 }
 
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 76a243f793..51101f104a 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -1210,14 +1210,14 @@ bool OptimizingCompiler::JitCompile(Thread* self,
   uint8_t* stack_map_data = nullptr;
   uint8_t* method_info_data = nullptr;
   uint8_t* roots_data = nullptr;
-  code_cache->ReserveData(self,
-                          stack_map_size,
-                          method_info_size,
-                          number_of_roots,
-                          method,
-                          &stack_map_data,
-                          &method_info_data,
-                          &roots_data);
+  uint32_t data_size = code_cache->ReserveData(self,
+                                               stack_map_size,
+                                               method_info_size,
+                                               number_of_roots,
+                                               method,
+                                               &stack_map_data,
+                                               &method_info_data,
+                                               &roots_data);
   if (stack_map_data == nullptr || roots_data == nullptr) {
     return false;
   }
@@ -1238,6 +1238,7 @@ bool OptimizingCompiler::JitCompile(Thread* self,
       codegen->GetFpuSpillMask(),
       code_allocator.GetMemory().data(),
       code_allocator.GetSize(),
+      data_size,
       osr,
       roots,
       codegen->GetGraph()->HasShouldDeoptimizeFlag(),
diff --git a/compiler/optimizing/scheduler.cc b/compiler/optimizing/scheduler.cc
index 5ad011d8f9..3e373d16fb 100644
--- a/compiler/optimizing/scheduler.cc
+++ b/compiler/optimizing/scheduler.cc
@@ -554,6 +554,14 @@ SchedulingNode* CriticalPathSchedulingNodeSelector::GetHigherPrioritySchedulingN
 }
 
 void HScheduler::Schedule(HGraph* graph) {
+  // We run lsa here instead of in a separate pass to better control whether we
+  // should run the analysis or not.
+  LoadStoreAnalysis lsa(graph);
+  if (!only_optimize_loop_blocks_ || graph->HasLoops()) {
+    lsa.Run();
+    scheduling_graph_.SetHeapLocationCollector(lsa.GetHeapLocationCollector());
+  }
+
   for (HBasicBlock* block : graph->GetReversePostOrder()) {
     if (IsSchedulable(block)) {
       Schedule(block);
@@ -566,14 +574,6 @@ void HScheduler::Schedule(HBasicBlock* block) {
 
   // Build the scheduling graph.
   scheduling_graph_.Clear();
-
-  // Only perform LSA/HeapLocation analysis on the basic block that
-  // is going to get instruction scheduled.
-  HeapLocationCollector heap_location_collector(block->GetGraph());
-  heap_location_collector.VisitBasicBlock(block);
-  heap_location_collector.BuildAliasingMatrix();
-  scheduling_graph_.SetHeapLocationCollector(heap_location_collector);
-
   for (HBackwardInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
     HInstruction* instruction = it.Current();
     CHECK_EQ(instruction->GetBlock(), block)
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index ff193e933a..a6036dab5f 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -1774,13 +1774,11 @@ class Dex2Oat FINAL {
   bool ShouldCompileDexFilesIndividually() const {
     // Compile individually if we are not building an image, not using any compilation, and are
     // using multidex.
-    // This means extract, verify, and quicken will use the individual compilation mode (to reduce
+    // This means extract, verify, and quicken, will use the individual compilation mode (to reduce
     // RAM used by the compiler).
-    // TODO: Still do it for app images to get testing coverage. Note that this will generate empty
-    // app images.
     return !IsImage() &&
         dex_files_.size() > 1 &&
-        !CompilerFilter::IsAnyCompilationEnabled(compiler_options_->GetCompilerFilter());
+        !CompilerFilter::IsAotCompilationEnabled(compiler_options_->GetCompilerFilter());
   }
 
   // Set up and create the compiler driver and then invoke it to compile all the dex files.
@@ -1856,6 +1854,16 @@ class Dex2Oat FINAL {
                                      profile_compilation_info_.get()));
     driver_->SetDexFilesForOatFile(dex_files_);
 
+    const bool compile_individually = ShouldCompileDexFilesIndividually();
+    if (compile_individually) {
+      // Set the compiler driver in the callbacks so that we can avoid re-verification. This not
+      // only helps performance but also prevents reverifying quickened bytecodes. Attempting
+      // verify quickened bytecode causes verification failures.
+      // Only set the compiler filter if we are doing separate compilation since there is a bit
+      // of overhead when checking if a class was previously verified.
+      callbacks_->SetDoesClassUnloading(true, driver_.get());
+    }
+
     // Setup vdex for compilation.
     if (!DoEagerUnquickeningOfVdex() && input_vdex_file_ != nullptr) {
       callbacks_->SetVerifierDeps(
@@ -1872,7 +1880,7 @@ class Dex2Oat FINAL {
       callbacks_->SetVerifierDeps(new verifier::VerifierDeps(dex_files_));
     }
     // Invoke the compilation.
-    if (ShouldCompileDexFilesIndividually()) {
+    if (compile_individually) {
       CompileDexFilesIndividually();
       // Return a null classloader since we already freed released it.
       return nullptr;
@@ -2966,6 +2974,8 @@ class ScopedGlobalRef {
 
 static dex2oat::ReturnCode CompileImage(Dex2Oat& dex2oat) {
   dex2oat.LoadClassProfileDescriptors();
+  // Keep the class loader that was used for compilation live for the rest of the compilation
+  // process.
   ScopedGlobalRef class_loader(dex2oat.Compile());
 
   if (!dex2oat.WriteOutputFiles()) {
@@ -3014,6 +3024,8 @@ static dex2oat::ReturnCode CompileImage(Dex2Oat& dex2oat) {
 }
 
 static dex2oat::ReturnCode CompileApp(Dex2Oat& dex2oat) {
+  // Keep the class loader that was used for compilation live for the rest of the compilation
+  // process.
   ScopedGlobalRef class_loader(dex2oat.Compile());
 
   if (!dex2oat.WriteOutputFiles()) {
diff --git a/dex2oat/dex2oat_test.cc b/dex2oat/dex2oat_test.cc
index 12bceb3c1f..6a9d979d52 100644
--- a/dex2oat/dex2oat_test.cc
+++ b/dex2oat/dex2oat_test.cc
@@ -41,6 +41,7 @@
 namespace art {
 
 static constexpr size_t kMaxMethodIds = 65535;
+static constexpr bool kDebugArgs = false;
 
 using android::base::StringPrintf;
 
@@ -55,7 +56,7 @@ class Dex2oatTest : public Dex2oatEnvironmentTest {
   }
 
  protected:
-  int GenerateOdexForTestWithStatus(const std::string& dex_location,
+  int GenerateOdexForTestWithStatus(const std::vector<std::string>& dex_locations,
                                     const std::string& odex_location,
                                     CompilerFilter::Filter filter,
                                     std::string* error_msg,
@@ -63,7 +64,10 @@ class Dex2oatTest : public Dex2oatEnvironmentTest {
                                     bool use_fd = false) {
     std::unique_ptr<File> oat_file;
     std::vector<std::string> args;
-    args.push_back("--dex-file=" + dex_location);
+    // Add dex file args.
+    for (const std::string& dex_location : dex_locations) {
+      args.push_back("--dex-file=" + dex_location);
+    }
     if (use_fd) {
       oat_file.reset(OS::CreateEmptyFile(odex_location.c_str()));
       CHECK(oat_file != nullptr) << odex_location;
@@ -93,7 +97,7 @@ class Dex2oatTest : public Dex2oatEnvironmentTest {
                            bool use_fd = false,
                            std::function<void(const OatFile&)> check_oat = [](const OatFile&) {}) {
     std::string error_msg;
-    int status = GenerateOdexForTestWithStatus(dex_location,
+    int status = GenerateOdexForTestWithStatus({dex_location},
                                                odex_location,
                                                filter,
                                                &error_msg,
@@ -187,6 +191,14 @@ class Dex2oatTest : public Dex2oatEnvironmentTest {
     CHECK(android_root != nullptr);
     argv.push_back("--android-root=" + std::string(android_root));
 
+    if (kDebugArgs) {
+      std::string all_args;
+      for (const std::string& arg : argv) {
+        all_args += arg + " ";
+      }
+      LOG(ERROR) << all_args;
+    }
+
     int link[2];
 
     if (pipe(link) == -1) {
@@ -951,7 +963,7 @@ class Dex2oatReturnCodeTest : public Dex2oatTest {
     Copy(GetTestDexFileName(), dex_location);
 
     std::string error_msg;
-    return GenerateOdexForTestWithStatus(dex_location,
+    return GenerateOdexForTestWithStatus({dex_location},
                                          odex_location,
                                          CompilerFilter::kSpeed,
                                          &error_msg,
@@ -1107,4 +1119,72 @@ TEST_F(Dex2oatClassLoaderContextTest, ChainContext) {
   RunTest(context.c_str(), expected_classpath_key.c_str(), true);
 }
 
+class Dex2oatDeterminism : public Dex2oatTest {};
+
+TEST_F(Dex2oatDeterminism, UnloadCompile) {
+  if (!kUseReadBarrier &&
+      gc::kCollectorTypeDefault != gc::kCollectorTypeCMS &&
+      gc::kCollectorTypeDefault != gc::kCollectorTypeMS) {
+    LOG(INFO) << "Test requires determinism support.";
+    return;
+  }
+  Runtime* const runtime = Runtime::Current();
+  std::string out_dir = GetScratchDir();
+  const std::string base_oat_name = out_dir + "/base.oat";
+  const std::string base_vdex_name = out_dir + "/base.vdex";
+  const std::string unload_oat_name = out_dir + "/unload.oat";
+  const std::string unload_vdex_name = out_dir + "/unload.vdex";
+  const std::string no_unload_oat_name = out_dir + "/nounload.oat";
+  const std::string no_unload_vdex_name = out_dir + "/nounload.vdex";
+  const std::string app_image_name = out_dir + "/unload.art";
+  std::string error_msg;
+  const std::vector<gc::space::ImageSpace*>& spaces = runtime->GetHeap()->GetBootImageSpaces();
+  ASSERT_GT(spaces.size(), 0u);
+  const std::string image_location = spaces[0]->GetImageLocation();
+  // Without passing in an app image, it will unload in between compilations.
+  const int res = GenerateOdexForTestWithStatus(
+      GetLibCoreDexFileNames(),
+      base_oat_name,
+      CompilerFilter::Filter::kQuicken,
+      &error_msg,
+      {"--force-determinism", "--avoid-storing-invocation"});
+  EXPECT_EQ(res, 0);
+  Copy(base_oat_name, unload_oat_name);
+  Copy(base_vdex_name, unload_vdex_name);
+  std::unique_ptr<File> unload_oat(OS::OpenFileForReading(unload_oat_name.c_str()));
+  std::unique_ptr<File> unload_vdex(OS::OpenFileForReading(unload_vdex_name.c_str()));
+  ASSERT_TRUE(unload_oat != nullptr);
+  ASSERT_TRUE(unload_vdex != nullptr);
+  EXPECT_GT(unload_oat->GetLength(), 0u);
+  EXPECT_GT(unload_vdex->GetLength(), 0u);
+  // Regenerate with an app image to disable the dex2oat unloading and verify that the output is
+  // the same.
+  const int res2 = GenerateOdexForTestWithStatus(
+      GetLibCoreDexFileNames(),
+      base_oat_name,
+      CompilerFilter::Filter::kQuicken,
+      &error_msg,
+      {"--force-determinism", "--avoid-storing-invocation", "--app-image-file=" + app_image_name});
+  EXPECT_EQ(res2, 0);
+  Copy(base_oat_name, no_unload_oat_name);
+  Copy(base_vdex_name, no_unload_vdex_name);
+  std::unique_ptr<File> no_unload_oat(OS::OpenFileForReading(no_unload_oat_name.c_str()));
+  std::unique_ptr<File> no_unload_vdex(OS::OpenFileForReading(no_unload_vdex_name.c_str()));
+  ASSERT_TRUE(no_unload_oat != nullptr);
+  ASSERT_TRUE(no_unload_vdex != nullptr);
+  EXPECT_GT(no_unload_oat->GetLength(), 0u);
+  EXPECT_GT(no_unload_vdex->GetLength(), 0u);
+  // Verify that both of the files are the same (odex and vdex).
+  EXPECT_EQ(unload_oat->GetLength(), no_unload_oat->GetLength());
+  EXPECT_EQ(unload_vdex->GetLength(), no_unload_vdex->GetLength());
+  EXPECT_EQ(unload_oat->Compare(no_unload_oat.get()), 0)
+      << unload_oat_name << " " << no_unload_oat_name;
+  EXPECT_EQ(unload_vdex->Compare(no_unload_vdex.get()), 0)
+      << unload_vdex_name << " " << no_unload_vdex_name;
+  // App image file.
+  std::unique_ptr<File> app_image_file(OS::OpenFileForReading(app_image_name.c_str()));
+  ASSERT_TRUE(app_image_file != nullptr);
+  EXPECT_GT(app_image_file->GetLength(), 0u);
+}
+
 }  // namespace art
diff --git a/runtime/aot_class_linker.cc b/runtime/aot_class_linker.cc
index b1bc3f8f2e..9396565eee 100644
--- a/runtime/aot_class_linker.cc
+++ b/runtime/aot_class_linker.cc
@@ -16,10 +16,12 @@
 
 #include "aot_class_linker.h"
 
+#include "class_reference.h"
+#include "compiler_callbacks.h"
 #include "handle_scope-inl.h"
-#include "mirror/class.h"
-#include "mirror/object-inl.h"
+#include "mirror/class-inl.h"
 #include "runtime.h"
+#include "verifier/verifier_enums.h"
 
 namespace art {
 
@@ -66,4 +68,18 @@ bool AotClassLinker::InitializeClass(Thread* self, Handle<mirror::Class> klass,
   }
   return success;
 }
+
+verifier::FailureKind AotClassLinker::PerformClassVerification(Thread* self,
+                                                               Handle<mirror::Class> klass,
+                                                               verifier::HardFailLogMode log_level,
+                                                               std::string* error_msg) {
+  Runtime* const runtime = Runtime::Current();
+  CompilerCallbacks* callbacks = runtime->GetCompilerCallbacks();
+  if (callbacks->CanAssumeVerified(ClassReference(&klass->GetDexFile(),
+                                                  klass->GetDexClassDefIndex()))) {
+    return verifier::FailureKind::kNoFailure;
+  }
+  return ClassLinker::PerformClassVerification(self, klass, log_level, error_msg);
+}
+
 }  // namespace art
diff --git a/runtime/aot_class_linker.h b/runtime/aot_class_linker.h
index 11bea86fc4..927b53302b 100644
--- a/runtime/aot_class_linker.h
+++ b/runtime/aot_class_linker.h
@@ -27,6 +27,16 @@ class AotClassLinker : public ClassLinker {
   explicit AotClassLinker(InternTable *intern_table);
   ~AotClassLinker();
 
+ protected:
+  // Overridden version of PerformClassVerification allows skipping verification if the class was
+  // previously verified but unloaded.
+  verifier::FailureKind PerformClassVerification(Thread* self,
+                                                 Handle<mirror::Class> klass,
+                                                 verifier::HardFailLogMode log_level,
+                                                 std::string* error_msg)
+      OVERRIDE
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
   bool InitializeClass(Thread *self,
                        Handle<mirror::Class> klass,
                        bool can_run_clinit,
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index fc14da1067..9756f5787f 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -3694,7 +3694,8 @@ bool ClassLinker::IsDexFileRegistered(Thread* self, const DexFile& dex_file) {
 
 ObjPtr<mirror::DexCache> ClassLinker::FindDexCache(Thread* self, const DexFile& dex_file) {
   ReaderMutexLock mu(self, *Locks::dex_lock_);
-  ObjPtr<mirror::DexCache> dex_cache = DecodeDexCache(self, FindDexCacheDataLocked(dex_file));
+  DexCacheData dex_cache_data = FindDexCacheDataLocked(dex_file);
+  ObjPtr<mirror::DexCache> dex_cache = DecodeDexCache(self, dex_cache_data);
   if (dex_cache != nullptr) {
     return dex_cache;
   }
@@ -3704,7 +3705,8 @@ ObjPtr<mirror::DexCache> ClassLinker::FindDexCache(Thread* self, const DexFile&
       LOG(FATAL_WITHOUT_ABORT) << "Registered dex file " << data.dex_file->GetLocation();
     }
   }
-  LOG(FATAL) << "Failed to find DexCache for DexFile " << dex_file.GetLocation();
+  LOG(FATAL) << "Failed to find DexCache for DexFile " << dex_file.GetLocation()
+             << " " << &dex_file << " " << dex_cache_data.dex_file;
   UNREACHABLE();
 }
 
@@ -4280,13 +4282,7 @@ verifier::FailureKind ClassLinker::VerifyClass(
   std::string error_msg;
   verifier::FailureKind verifier_failure = verifier::FailureKind::kNoFailure;
   if (!preverified) {
-    Runtime* runtime = Runtime::Current();
-    verifier_failure = verifier::MethodVerifier::VerifyClass(self,
-                                                             klass.Get(),
-                                                             runtime->GetCompilerCallbacks(),
-                                                             runtime->IsAotCompiler(),
-                                                             log_level,
-                                                             &error_msg);
+    verifier_failure = PerformClassVerification(self, klass, log_level, &error_msg);
   }
 
   // Verification is done, grab the lock again.
@@ -4354,6 +4350,19 @@ verifier::FailureKind ClassLinker::VerifyClass(
   return verifier_failure;
 }
 
+verifier::FailureKind ClassLinker::PerformClassVerification(Thread* self,
+                                                            Handle<mirror::Class> klass,
+                                                            verifier::HardFailLogMode log_level,
+                                                            std::string* error_msg) {
+  Runtime* const runtime = Runtime::Current();
+  return verifier::MethodVerifier::VerifyClass(self,
+                                               klass.Get(),
+                                               runtime->GetCompilerCallbacks(),
+                                               runtime->IsAotCompiler(),
+                                               log_level,
+                                               error_msg);
+}
+
 bool ClassLinker::VerifyClassUsingOatFile(const DexFile& dex_file,
                                           ObjPtr<mirror::Class> klass,
                                           mirror::Class::Status& oat_file_class_status) {
diff --git a/runtime/class_linker.h b/runtime/class_linker.h
index bf14aebb52..2fbbe79e47 100644
--- a/runtime/class_linker.h
+++ b/runtime/class_linker.h
@@ -710,6 +710,12 @@ class ClassLinker {
       REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(!Locks::dex_lock_);
 
+  virtual verifier::FailureKind PerformClassVerification(Thread* self,
+                                                         Handle<mirror::Class> klass,
+                                                         verifier::HardFailLogMode log_level,
+                                                         std::string* error_msg)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
  private:
   class LinkInterfaceMethodsHelper;
 
diff --git a/runtime/compiler_callbacks.h b/runtime/compiler_callbacks.h
index 806653a265..c51bb5e176 100644
--- a/runtime/compiler_callbacks.h
+++ b/runtime/compiler_callbacks.h
@@ -22,6 +22,8 @@
 
 namespace art {
 
+class CompilerDriver;
+
 namespace verifier {
 
 class MethodVerifier;
@@ -49,6 +51,13 @@ class CompilerCallbacks {
   virtual verifier::VerifierDeps* GetVerifierDeps() const = 0;
   virtual void SetVerifierDeps(verifier::VerifierDeps* deps ATTRIBUTE_UNUSED) {}
 
+  virtual bool CanAssumeVerified(ClassReference ref ATTRIBUTE_UNUSED) {
+    return false;
+  }
+
+  virtual void SetDoesClassUnloading(bool does_class_unloading ATTRIBUTE_UNUSED,
+                                     CompilerDriver* compiler_driver ATTRIBUTE_UNUSED) {}
+
   bool IsBootImage() {
     return mode_ == CallbackMode::kCompileBootImage;
   }
diff --git a/runtime/entrypoints/quick/quick_field_entrypoints.cc b/runtime/entrypoints/quick/quick_field_entrypoints.cc
index 726bddd334..b298db674c 100644
--- a/runtime/entrypoints/quick/quick_field_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_field_entrypoints.cc
@@ -301,6 +301,7 @@ extern "C" mirror::Object* artReadBarrierMark(mirror::Object* obj) {
 extern "C" mirror::Object* artReadBarrierSlow(mirror::Object* ref ATTRIBUTE_UNUSED,
                                               mirror::Object* obj,
                                               uint32_t offset) {
+  // Used only in connection with non-volatile loads.
   DCHECK(kEmitCompilerReadBarrier);
   uint8_t* raw_addr = reinterpret_cast<uint8_t*>(obj) + offset;
   mirror::HeapReference<mirror::Object>* ref_addr =
@@ -308,9 +309,10 @@ extern "C" mirror::Object* artReadBarrierSlow(mirror::Object* ref ATTRIBUTE_UNUS
   constexpr ReadBarrierOption kReadBarrierOption =
       kUseReadBarrier ? kWithReadBarrier : kWithoutReadBarrier;
   mirror::Object* result =
-      ReadBarrier::Barrier<mirror::Object, kReadBarrierOption>(obj,
-                                                               MemberOffset(offset),
-                                                               ref_addr);
+      ReadBarrier::Barrier<mirror::Object, /* kIsVolatile */ false, kReadBarrierOption>(
+        obj,
+        MemberOffset(offset),
+        ref_addr);
   return result;
 }
 
diff --git a/runtime/fault_handler.cc b/runtime/fault_handler.cc
index fd0cd5f0b2..7d01af02a3 100644
--- a/runtime/fault_handler.cc
+++ b/runtime/fault_handler.cc
@@ -79,8 +79,7 @@ static mirror::Class* SafeGetDeclaringClass(ArtMethod* method)
 static mirror::Class* SafeGetClass(mirror::Object* obj) REQUIRES_SHARED(Locks::mutator_lock_) {
   char* obj_cls = reinterpret_cast<char*>(obj) + mirror::Object::ClassOffset().SizeValue();
 
-  mirror::HeapReference<mirror::Class> cls =
-      mirror::HeapReference<mirror::Class>::FromMirrorPtr(nullptr);
+  mirror::HeapReference<mirror::Class> cls;
   ssize_t rc = SafeCopy(&cls, obj_cls, sizeof(cls));
   CHECK_NE(-1, rc);
 
diff --git a/runtime/gc/accounting/mod_union_table.cc b/runtime/gc/accounting/mod_union_table.cc
index 290199579b..1b3d0dadae 100644
--- a/runtime/gc/accounting/mod_union_table.cc
+++ b/runtime/gc/accounting/mod_union_table.cc
@@ -115,8 +115,8 @@ class ModUnionUpdateObjectReferencesVisitor {
   }
 
  private:
-  template<bool kPoisonReferences>
-  void MarkReference(mirror::ObjectReference<kPoisonReferences, mirror::Object>* obj_ptr) const
+  template<typename CompressedReferenceType>
+  void MarkReference(CompressedReferenceType* obj_ptr) const
       REQUIRES_SHARED(Locks::mutator_lock_) {
     // Only add the reference if it is non null and fits our criteria.
     mirror::Object* ref = obj_ptr->AsMirrorPtr();
diff --git a/runtime/gc/accounting/space_bitmap.cc b/runtime/gc/accounting/space_bitmap.cc
index 92b4360123..280c0b1d69 100644
--- a/runtime/gc/accounting/space_bitmap.cc
+++ b/runtime/gc/accounting/space_bitmap.cc
@@ -48,16 +48,21 @@ SpaceBitmap<kAlignment>* SpaceBitmap<kAlignment>::CreateFromMemMap(
   CHECK(mem_map != nullptr);
   uintptr_t* bitmap_begin = reinterpret_cast<uintptr_t*>(mem_map->Begin());
   const size_t bitmap_size = ComputeBitmapSize(heap_capacity);
-  return new SpaceBitmap(name, mem_map, bitmap_begin, bitmap_size, heap_begin);
+  return new SpaceBitmap(name, mem_map, bitmap_begin, bitmap_size, heap_begin, heap_capacity);
 }
 
 template<size_t kAlignment>
-SpaceBitmap<kAlignment>::SpaceBitmap(const std::string& name, MemMap* mem_map, uintptr_t* bitmap_begin,
-                                     size_t bitmap_size, const void* heap_begin)
+SpaceBitmap<kAlignment>::SpaceBitmap(const std::string& name,
+                                     MemMap* mem_map,
+                                     uintptr_t* bitmap_begin,
+                                     size_t bitmap_size,
+                                     const void* heap_begin,
+                                     size_t heap_capacity)
     : mem_map_(mem_map),
       bitmap_begin_(reinterpret_cast<Atomic<uintptr_t>*>(bitmap_begin)),
       bitmap_size_(bitmap_size),
       heap_begin_(reinterpret_cast<uintptr_t>(heap_begin)),
+      heap_limit_(reinterpret_cast<uintptr_t>(heap_begin) + heap_capacity),
       name_(name) {
   CHECK(bitmap_begin_ != nullptr);
   CHECK_NE(bitmap_size, 0U);
@@ -89,6 +94,7 @@ void SpaceBitmap<kAlignment>::SetHeapLimit(uintptr_t new_end) {
   if (new_size < bitmap_size_) {
     bitmap_size_ = new_size;
   }
+  heap_limit_ = new_end;
   // Not sure if doing this trim is necessary, since nothing past the end of the heap capacity
   // should be marked.
 }
diff --git a/runtime/gc/accounting/space_bitmap.h b/runtime/gc/accounting/space_bitmap.h
index 2fe6394c0f..b49e0b7f89 100644
--- a/runtime/gc/accounting/space_bitmap.h
+++ b/runtime/gc/accounting/space_bitmap.h
@@ -163,6 +163,7 @@ class SpaceBitmap {
 
   void SetHeapSize(size_t bytes) {
     // TODO: Un-map the end of the mem map.
+    heap_limit_ = heap_begin_ + bytes;
     bitmap_size_ = OffsetToIndex(bytes) * sizeof(intptr_t);
     CHECK_EQ(HeapSize(), bytes);
   }
@@ -173,7 +174,7 @@ class SpaceBitmap {
 
   // The maximum address which the bitmap can span. (HeapBegin() <= object < HeapLimit()).
   uint64_t HeapLimit() const {
-    return static_cast<uint64_t>(HeapBegin()) + HeapSize();
+    return heap_limit_;
   }
 
   // Set the max address which can covered by the bitmap.
@@ -196,8 +197,12 @@ class SpaceBitmap {
  private:
   // TODO: heap_end_ is initialized so that the heap bitmap is empty, this doesn't require the -1,
   // however, we document that this is expected on heap_end_
-  SpaceBitmap(const std::string& name, MemMap* mem_map, uintptr_t* bitmap_begin, size_t bitmap_size,
-              const void* heap_begin);
+  SpaceBitmap(const std::string& name,
+              MemMap* mem_map,
+              uintptr_t* bitmap_begin,
+              size_t bitmap_size,
+              const void* heap_begin,
+              size_t heap_capacity);
 
   template<bool kSetBit>
   bool Modify(const mirror::Object* obj);
@@ -211,10 +216,13 @@ class SpaceBitmap {
   // Size of this bitmap.
   size_t bitmap_size_;
 
-  // The base address of the heap, which corresponds to the word containing the first bit in the
-  // bitmap.
+  // The start address of the memory covered by the bitmap, which corresponds to the word
+  // containing the first bit in the bitmap.
   const uintptr_t heap_begin_;
 
+  // The end address of the memory covered by the bitmap. This may not be on a word boundary.
+  uintptr_t heap_limit_;
+
   // Name of this bitmap.
   std::string name_;
 };
diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc
index 9b24885f53..52b355dedd 100644
--- a/runtime/gc/collector/concurrent_copying.cc
+++ b/runtime/gc/collector/concurrent_copying.cc
@@ -2464,6 +2464,7 @@ mirror::Object* ConcurrentCopying::IsMarked(mirror::Object* from_ref) {
 }
 
 bool ConcurrentCopying::IsOnAllocStack(mirror::Object* ref) {
+  // TODO: Explain why this is here. What release operation does it pair with?
   QuasiAtomic::ThreadFenceAcquire();
   accounting::ObjectStack* alloc_stack = GetAllocationStack();
   return alloc_stack->Contains(ref);
@@ -2624,9 +2625,8 @@ bool ConcurrentCopying::IsNullOrMarkedHeapReference(mirror::HeapReference<mirror
         }
       } while (!field->CasWeakRelaxed(from_ref, to_ref));
     } else {
-      QuasiAtomic::ThreadFenceRelease();
-      field->Assign(to_ref);
-      QuasiAtomic::ThreadFenceSequentiallyConsistent();
+      // TODO: Why is this seq_cst when the above is relaxed? Document memory ordering.
+      field->Assign</* kIsVolatile */ true>(to_ref);
     }
   }
   return true;
diff --git a/runtime/gc/collector/garbage_collector.h b/runtime/gc/collector/garbage_collector.h
index dec206be30..f722e8d855 100644
--- a/runtime/gc/collector/garbage_collector.h
+++ b/runtime/gc/collector/garbage_collector.h
@@ -113,7 +113,7 @@ class GarbageCollector : public RootVisitor, public IsMarkedVisitor, public Mark
   virtual mirror::Object* IsMarked(mirror::Object* obj)
       REQUIRES_SHARED(Locks::mutator_lock_) = 0;
   // Returns true if the given heap reference is null or is already marked. If it's already marked,
-  // update the reference (uses a CAS if do_atomic_update is true. Otherwise, returns false.
+  // update the reference (uses a CAS if do_atomic_update is true). Otherwise, returns false.
   virtual bool IsNullOrMarkedHeapReference(mirror::HeapReference<mirror::Object>* obj,
                                            bool do_atomic_update)
       REQUIRES_SHARED(Locks::mutator_lock_) = 0;
diff --git a/runtime/gc/collector/semi_space-inl.h b/runtime/gc/collector/semi_space-inl.h
index 78fb2d24ae..7db5d2ca20 100644
--- a/runtime/gc/collector/semi_space-inl.h
+++ b/runtime/gc/collector/semi_space-inl.h
@@ -38,9 +38,8 @@ inline mirror::Object* SemiSpace::GetForwardingAddressInFromSpace(mirror::Object
 // Used to mark and copy objects. Any newly-marked objects who are in the from space Get moved to
 // the to-space and have their forward address updated. Objects which have been newly marked are
 // pushed on the mark stack.
-template<bool kPoisonReferences>
-inline void SemiSpace::MarkObject(
-    mirror::ObjectReference<kPoisonReferences, mirror::Object>* obj_ptr) {
+template<typename CompressedReferenceType>
+inline void SemiSpace::MarkObject(CompressedReferenceType* obj_ptr) {
   mirror::Object* obj = obj_ptr->AsMirrorPtr();
   if (obj == nullptr) {
     return;
@@ -73,9 +72,8 @@ inline void SemiSpace::MarkObject(
   }
 }
 
-template<bool kPoisonReferences>
-inline void SemiSpace::MarkObjectIfNotInToSpace(
-    mirror::ObjectReference<kPoisonReferences, mirror::Object>* obj_ptr) {
+template<typename CompressedReferenceType>
+inline void SemiSpace::MarkObjectIfNotInToSpace(CompressedReferenceType* obj_ptr) {
   if (!to_space_->HasAddress(obj_ptr->AsMirrorPtr())) {
     MarkObject(obj_ptr);
   }
diff --git a/runtime/gc/collector/semi_space.h b/runtime/gc/collector/semi_space.h
index fd52da3947..6d4d789a72 100644
--- a/runtime/gc/collector/semi_space.h
+++ b/runtime/gc/collector/semi_space.h
@@ -97,13 +97,13 @@ class SemiSpace : public GarbageCollector {
   // Find the default mark bitmap.
   void FindDefaultMarkBitmap();
 
-  // Updates obj_ptr if the object has moved.
-  template<bool kPoisonReferences>
-  void MarkObject(mirror::ObjectReference<kPoisonReferences, mirror::Object>* obj_ptr)
+  // Updates obj_ptr if the object has moved. Takes either an ObjectReference or a HeapReference.
+  template<typename CompressedReferenceType>
+  void MarkObject(CompressedReferenceType* obj_ptr)
       REQUIRES(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
 
-  template<bool kPoisonReferences>
-  void MarkObjectIfNotInToSpace(mirror::ObjectReference<kPoisonReferences, mirror::Object>* obj_ptr)
+  template<typename CompressedReferenceType>
+  void MarkObjectIfNotInToSpace(CompressedReferenceType* obj_ptr)
       REQUIRES(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
 
   virtual mirror::Object* MarkObject(mirror::Object* root) OVERRIDE
diff --git a/runtime/gc/space/large_object_space_test.cc b/runtime/gc/space/large_object_space_test.cc
index 79b775a510..9baa016dcd 100644
--- a/runtime/gc/space/large_object_space_test.cc
+++ b/runtime/gc/space/large_object_space_test.cc
@@ -38,12 +38,19 @@ void LargeObjectSpaceTest::LargeObjectTest() {
   Thread* const self = Thread::Current();
   for (size_t i = 0; i < 2; ++i) {
     LargeObjectSpace* los = nullptr;
+    const size_t capacity = 128 * MB;
     if (i == 0) {
       los = space::LargeObjectMapSpace::Create("large object space");
     } else {
-      los = space::FreeListSpace::Create("large object space", nullptr, 128 * MB);
+      los = space::FreeListSpace::Create("large object space", nullptr, capacity);
     }
 
+    // Make sure the bitmap is not empty and actually covers at least how much we expect.
+    CHECK_LT(static_cast<uintptr_t>(los->GetLiveBitmap()->HeapBegin()),
+             static_cast<uintptr_t>(los->GetLiveBitmap()->HeapLimit()));
+    CHECK_LE(static_cast<uintptr_t>(los->GetLiveBitmap()->HeapBegin() + capacity),
+             static_cast<uintptr_t>(los->GetLiveBitmap()->HeapLimit()));
+
     static const size_t num_allocations = 64;
     static const size_t max_allocation_size = 0x100000;
     std::vector<std::pair<mirror::Object*, size_t>> requests;
diff --git a/runtime/gc/space/malloc_space.cc b/runtime/gc/space/malloc_space.cc
index c2a8de3aec..c99412785e 100644
--- a/runtime/gc/space/malloc_space.cc
+++ b/runtime/gc/space/malloc_space.cc
@@ -191,14 +191,10 @@ ZygoteSpace* MallocSpace::CreateZygoteSpace(const char* alloc_space_name, bool l
   VLOG(heap) << "Size " << GetMemMap()->Size();
   VLOG(heap) << "GrowthLimit " << PrettySize(growth_limit);
   VLOG(heap) << "Capacity " << PrettySize(capacity);
-  // Remap the tail. Pass MAP_PRIVATE since we don't want to share the same ashmem as the zygote
-  // space.
+  // Remap the tail.
   std::string error_msg;
-  std::unique_ptr<MemMap> mem_map(GetMemMap()->RemapAtEnd(End(),
-                                                          alloc_space_name,
-                                                          PROT_READ | PROT_WRITE,
-                                                          MAP_PRIVATE,
-                                                          &error_msg));
+  std::unique_ptr<MemMap> mem_map(GetMemMap()->RemapAtEnd(End(), alloc_space_name,
+                                                          PROT_READ | PROT_WRITE, &error_msg));
   CHECK(mem_map.get() != nullptr) << error_msg;
   void* allocator = CreateAllocator(End(), starting_size_, initial_size_, capacity,
                                     low_memory_mode);
diff --git a/runtime/interpreter/interpreter_common.h b/runtime/interpreter/interpreter_common.h
index b228e28a22..5b942f2e73 100644
--- a/runtime/interpreter/interpreter_common.h
+++ b/runtime/interpreter/interpreter_common.h
@@ -141,11 +141,8 @@ static inline bool DoFastInvoke(Thread* self,
     return false;
   } else {
     jit::Jit* jit = Runtime::Current()->GetJit();
-    if (jit != nullptr) {
-      if (type == kVirtual) {
-        jit->InvokeVirtualOrInterface(receiver, sf_method, shadow_frame.GetDexPC(), called_method);
-      }
-      jit->AddSamples(self, sf_method, 1, /*with_backedges*/false);
+    if (jit != nullptr && type == kVirtual) {
+      jit->InvokeVirtualOrInterface(receiver, sf_method, shadow_frame.GetDexPC(), called_method);
     }
     if (called_method->IsIntrinsic()) {
       if (MterpHandleIntrinsic(&shadow_frame, called_method, inst, inst_data,
@@ -182,11 +179,8 @@ static inline bool DoInvoke(Thread* self,
     return false;
   } else {
     jit::Jit* jit = Runtime::Current()->GetJit();
-    if (jit != nullptr) {
-      if (type == kVirtual || type == kInterface) {
-        jit->InvokeVirtualOrInterface(receiver, sf_method, shadow_frame.GetDexPC(), called_method);
-      }
-      jit->AddSamples(self, sf_method, 1, /*with_backedges*/false);
+    if (jit != nullptr && (type == kVirtual || type == kInterface)) {
+      jit->InvokeVirtualOrInterface(receiver, sf_method, shadow_frame.GetDexPC(), called_method);
     }
     // TODO: Remove the InvokeVirtualOrInterface instrumentation, as it was only used by the JIT.
     if (type == kVirtual || type == kInterface) {
diff --git a/runtime/interpreter/mterp/mterp.cc b/runtime/interpreter/mterp/mterp.cc
index 5955b9001a..88254a8cdc 100644
--- a/runtime/interpreter/mterp/mterp.cc
+++ b/runtime/interpreter/mterp/mterp.cc
@@ -280,7 +280,6 @@ extern "C" size_t MterpInvokeVirtualQuick(Thread* self,
         if (jit != nullptr) {
           jit->InvokeVirtualOrInterface(
               receiver, shadow_frame->GetMethod(), shadow_frame->GetDexPC(), called_method);
-          jit->AddSamples(self, shadow_frame->GetMethod(), 1, /*with_backedges*/false);
         }
         return !self->IsExceptionPending();
       }
diff --git a/runtime/interpreter/unstarted_runtime.cc b/runtime/interpreter/unstarted_runtime.cc
index 2c7282115f..ce06a036ec 100644
--- a/runtime/interpreter/unstarted_runtime.cc
+++ b/runtime/interpreter/unstarted_runtime.cc
@@ -1438,7 +1438,11 @@ void UnstartedRuntime::UnstartedUnsafeCompareAndSwapObject(
     mirror::HeapReference<mirror::Object>* field_addr =
         reinterpret_cast<mirror::HeapReference<mirror::Object>*>(
             reinterpret_cast<uint8_t*>(obj) + static_cast<size_t>(offset));
-    ReadBarrier::Barrier<mirror::Object, kWithReadBarrier, /* kAlwaysUpdateField */ true>(
+    ReadBarrier::Barrier<
+        mirror::Object,
+        /* kIsVolatile */ false,
+        kWithReadBarrier,
+        /* kAlwaysUpdateField */ true>(
         obj,
         MemberOffset(offset),
         field_addr);
diff --git a/runtime/jit/jit_code_cache.cc b/runtime/jit/jit_code_cache.cc
index 31319f1106..47ace7fa71 100644
--- a/runtime/jit/jit_code_cache.cc
+++ b/runtime/jit/jit_code_cache.cc
@@ -47,13 +47,9 @@ namespace jit {
 static constexpr int kProtAll = PROT_READ | PROT_WRITE | PROT_EXEC;
 static constexpr int kProtData = PROT_READ | PROT_WRITE;
 static constexpr int kProtCode = PROT_READ | PROT_EXEC;
-static constexpr int kProtReadOnly = PROT_READ;
-static constexpr int kProtNone = PROT_NONE;
 
 static constexpr size_t kCodeSizeLogThreshold = 50 * KB;
 static constexpr size_t kStackMapSizeLogThreshold = 50 * KB;
-static constexpr size_t kMinMapSpacingPages = 1;
-static constexpr size_t kMaxMapSpacingPages = 128;
 
 #define CHECKED_MPROTECT(memory, size, prot)                \
   do {                                                      \
@@ -64,39 +60,12 @@ static constexpr size_t kMaxMapSpacingPages = 128;
     }                                                       \
   } while (false)                                           \
 
-static MemMap* SplitMemMap(MemMap* existing_map,
-                           const char* name,
-                           size_t split_offset,
-                           int split_prot,
-                           std::string* error_msg,
-                           bool use_ashmem,
-                           unique_fd* shmem_fd = nullptr) {
-  std::string error_str;
-  uint8_t* divider = existing_map->Begin() + split_offset;
-  MemMap* new_map = existing_map->RemapAtEnd(divider,
-                                             name,
-                                             split_prot,
-                                             MAP_SHARED,
-                                             &error_str,
-                                             use_ashmem,
-                                             shmem_fd);
-  if (new_map == nullptr) {
-    std::ostringstream oss;
-    oss << "Failed to create spacing for " << name << ": "
-        << error_str << " offset=" << split_offset;
-    *error_msg = oss.str();
-    return nullptr;
-  }
-  return new_map;
-}
-
 JitCodeCache* JitCodeCache::Create(size_t initial_capacity,
                                    size_t max_capacity,
                                    bool generate_debug_info,
                                    std::string* error_msg) {
   ScopedTrace trace(__PRETTY_FUNCTION__);
-  CHECK_GT(max_capacity, initial_capacity);
-  CHECK_GE(max_capacity - kMaxMapSpacingPages * kPageSize, initial_capacity);
+  CHECK_GE(max_capacity, initial_capacity);
 
   // Generating debug information is for using the Linux perf tool on
   // host which does not work with ashmem.
@@ -106,10 +75,6 @@ JitCodeCache* JitCodeCache::Create(size_t initial_capacity,
   // With 'perf', we want a 1-1 mapping between an address and a method.
   bool garbage_collect_code = !generate_debug_info;
 
-  // We only use two mappings (separating rw from rx) if we are able to use ashmem.
-  // See the above comment for debug information and not using ashmem.
-  bool use_two_mappings = use_ashmem;
-
   // We need to have 32 bit offsets from method headers in code cache which point to things
   // in the data cache. If the maps are more than 4G apart, having multiple maps wouldn't work.
   // Ensure we're below 1 GB to be safe.
@@ -121,10 +86,6 @@ JitCodeCache* JitCodeCache::Create(size_t initial_capacity,
     return nullptr;
   }
 
-  // Align both capacities to page size, as that's the unit mspaces use.
-  initial_capacity = RoundDown(initial_capacity, 2 * kPageSize);
-  max_capacity = RoundDown(max_capacity, 2 * kPageSize);
-
   std::string error_str;
   // Map name specific for android_os_Debug.cpp accounting.
   // Map in low 4gb to simplify accessing root tables for x86_64.
@@ -146,138 +107,35 @@ JitCodeCache* JitCodeCache::Create(size_t initial_capacity,
     return nullptr;
   }
 
-  // Create a region for JIT data and executable code. This will be
-  // laid out as:
-  //
-  //          +----------------+ --------------------
-  //          | code_sync_map_ | ^ code_sync_size   ^
-  //          |                | v                  |
-  //          +----------------+ --                 |
-  //          :                : ^                  |
-  //          :  post_code_map : | post_code_size   |
-  //          :   [padding]    : v                  |
-  //          +----------------+ -                  |
-  //          |                | ^                  |
-  //          |   code_map     | | code_size        | total_mapping_size
-  //          |   [JIT Code]   | v                  |
-  //          +----------------+ -                  |
-  //          :                : ^                  |
-  //          :  pre_code_map  : | pre_code_size    |
-  //          :   [padding]    : v                  |
-  //          +----------------+ -                  |
-  //          |                | ^                  |
-  //          |    data_map    | | data_size        |
-  //          |   [Jit Data]   | v                  v
-  //          +----------------+ --------------------
-  //
-  // The code_sync_map_ contains a page that we use flush CPU instruction
-  // pipelines (see FlushInstructionPipelines()).
-  //
-  // The padding regions - pre_code_map and post_code_map - exist to
-  // put some random distance between the writable JIT code mapping
-  // and the executable mapping. The padding is discarded at the end
-  // of this function.
-  //
-  size_t data_size = (max_capacity - kMaxMapSpacingPages * kPageSize) / 2;
-  size_t pre_code_size =
-      GetRandomNumber(kMinMapSpacingPages, kMaxMapSpacingPages - 1) * kPageSize;
-  size_t code_size = max_capacity - data_size - kMaxMapSpacingPages * kPageSize;
-  size_t code_sync_size = kPageSize;
-  size_t post_code_size = kMaxMapSpacingPages * kPageSize - pre_code_size - code_sync_size;
-  DCHECK_EQ(data_size, code_size);
-  DCHECK_EQ(pre_code_size + post_code_size + code_sync_size, kMaxMapSpacingPages * kPageSize);
-  DCHECK_EQ(data_size + pre_code_size + code_size + post_code_size + code_sync_size, max_capacity);
-
-  // Create pre-code padding region after data region, discarded after
-  // code and data regions are set-up.
-  std::unique_ptr<MemMap> pre_code_map(SplitMemMap(data_map.get(),
-                                                   "jit-code-cache-padding",
-                                                   data_size,
-                                                   kProtNone,
-                                                   error_msg,
-                                                   use_ashmem));
-  if (pre_code_map == nullptr) {
-    return nullptr;
-  }
-  DCHECK_EQ(data_map->Size(), data_size);
-  DCHECK_EQ(pre_code_map->Size(), pre_code_size + code_size + post_code_size + code_sync_size);
-
-  // Create code region.
-  unique_fd writable_code_fd;
-  std::unique_ptr<MemMap> code_map(SplitMemMap(pre_code_map.get(),
-                                               "jit-code-cache",
-                                               pre_code_size,
-                                               use_two_mappings ? kProtCode : kProtAll,
-                                               error_msg,
-                                               use_ashmem,
-                                               &writable_code_fd));
-  if (code_map == nullptr) {
-    return nullptr;
-  }
-  DCHECK_EQ(pre_code_map->Size(), pre_code_size);
-  DCHECK_EQ(code_map->Size(), code_size + post_code_size + code_sync_size);
-
-  // Padding after code region, discarded after code and data regions
-  // are set-up.
-  std::unique_ptr<MemMap> post_code_map(SplitMemMap(code_map.get(),
-                                                    "jit-code-cache-padding",
-                                                    code_size,
-                                                    kProtNone,
-                                                    error_msg,
-                                                    use_ashmem));
-  if (post_code_map == nullptr) {
-    return nullptr;
-  }
-  DCHECK_EQ(code_map->Size(), code_size);
-  DCHECK_EQ(post_code_map->Size(), post_code_size + code_sync_size);
+  // Align both capacities to page size, as that's the unit mspaces use.
+  initial_capacity = RoundDown(initial_capacity, 2 * kPageSize);
+  max_capacity = RoundDown(max_capacity, 2 * kPageSize);
+
+  // Data cache is 1 / 2 of the map.
+  // TODO: Make this variable?
+  size_t data_size = max_capacity / 2;
+  size_t code_size = max_capacity - data_size;
+  DCHECK_EQ(code_size + data_size, max_capacity);
+  uint8_t* divider = data_map->Begin() + data_size;
 
-  std::unique_ptr<MemMap> code_sync_map(SplitMemMap(post_code_map.get(),
-                                                    "jit-code-sync",
-                                                    post_code_size,
-                                                    kProtCode,
-                                                    error_msg,
-                                                    use_ashmem));
-  if (code_sync_map == nullptr) {
+  MemMap* code_map =
+      data_map->RemapAtEnd(divider, "jit-code-cache", kProtAll, &error_str, use_ashmem);
+  if (code_map == nullptr) {
+    std::ostringstream oss;
+    oss << "Failed to create read write execute cache: " << error_str << " size=" << max_capacity;
+    *error_msg = oss.str();
     return nullptr;
   }
-  DCHECK_EQ(post_code_map->Size(), post_code_size);
-  DCHECK_EQ(code_sync_map->Size(), code_sync_size);
-
-  std::unique_ptr<MemMap> writable_code_map;
-  if (use_two_mappings) {
-    // Allocate the R/W view.
-    writable_code_map.reset(MemMap::MapFile(code_size,
-                                            kProtData,
-                                            MAP_SHARED,
-                                            writable_code_fd.get(),
-                                            /* start */ 0,
-                                            /* low_4gb */ true,
-                                            "jit-writable-code",
-                                            &error_str));
-    if (writable_code_map == nullptr) {
-      std::ostringstream oss;
-      oss << "Failed to create writable code cache: " << error_str << " size=" << code_size;
-      *error_msg = oss.str();
-      return nullptr;
-    }
-  }
+  DCHECK_EQ(code_map->Begin(), divider);
   data_size = initial_capacity / 2;
   code_size = initial_capacity - data_size;
   DCHECK_EQ(code_size + data_size, initial_capacity);
-  return new JitCodeCache(writable_code_map.release(),
-                          code_map.release(),
-                          data_map.release(),
-                          code_sync_map.release(),
-                          code_size,
-                          data_size,
-                          max_capacity,
-                          garbage_collect_code);
+  return new JitCodeCache(
+      code_map, data_map.release(), code_size, data_size, max_capacity, garbage_collect_code);
 }
 
-JitCodeCache::JitCodeCache(MemMap* writable_code_map,
-                           MemMap* executable_code_map,
+JitCodeCache::JitCodeCache(MemMap* code_map,
                            MemMap* data_map,
-                           MemMap* code_sync_map,
                            size_t initial_code_capacity,
                            size_t initial_data_capacity,
                            size_t max_capacity,
@@ -285,10 +143,8 @@ JitCodeCache::JitCodeCache(MemMap* writable_code_map,
     : lock_("Jit code cache", kJitCodeCacheLock),
       lock_cond_("Jit code cache condition variable", lock_),
       collection_in_progress_(false),
+      code_map_(code_map),
       data_map_(data_map),
-      executable_code_map_(executable_code_map),
-      writable_code_map_(writable_code_map),
-      code_sync_map_(code_sync_map),
       max_capacity_(max_capacity),
       current_capacity_(initial_code_capacity + initial_data_capacity),
       code_end_(initial_code_capacity),
@@ -308,8 +164,7 @@ JitCodeCache::JitCodeCache(MemMap* writable_code_map,
       inline_cache_cond_("Jit inline cache condition variable", lock_) {
 
   DCHECK_GE(max_capacity, initial_code_capacity + initial_data_capacity);
-  MemMap* writable_map = GetWritableMemMap();
-  code_mspace_ = create_mspace_with_base(writable_map->Begin(), code_end_, false /*locked*/);
+  code_mspace_ = create_mspace_with_base(code_map_->Begin(), code_end_, false /*locked*/);
   data_mspace_ = create_mspace_with_base(data_map_->Begin(), data_end_, false /*locked*/);
 
   if (code_mspace_ == nullptr || data_mspace_ == nullptr) {
@@ -318,10 +173,7 @@ JitCodeCache::JitCodeCache(MemMap* writable_code_map,
 
   SetFootprintLimit(current_capacity_);
 
-  if (writable_code_map_ != nullptr) {
-    CHECKED_MPROTECT(writable_code_map_->Begin(), writable_code_map_->Size(), kProtReadOnly);
-  }
-  CHECKED_MPROTECT(executable_code_map_->Begin(), executable_code_map_->Size(), kProtCode);
+  CHECKED_MPROTECT(code_map_->Begin(), code_map_->Size(), kProtCode);
   CHECKED_MPROTECT(data_map_->Begin(), data_map_->Size(), kProtData);
 
   VLOG(jit) << "Created jit code cache: initial data size="
@@ -331,7 +183,7 @@ JitCodeCache::JitCodeCache(MemMap* writable_code_map,
 }
 
 bool JitCodeCache::ContainsPc(const void* ptr) const {
-  return executable_code_map_->Begin() <= ptr && ptr < executable_code_map_->End();
+  return code_map_->Begin() <= ptr && ptr < code_map_->End();
 }
 
 bool JitCodeCache::ContainsMethod(ArtMethod* method) {
@@ -344,96 +196,27 @@ bool JitCodeCache::ContainsMethod(ArtMethod* method) {
   return false;
 }
 
-/* This method is only for CHECK/DCHECK that pointers are within to a region. */
-static bool IsAddressInMap(const void* addr,
-                           const MemMap* mem_map,
-                           const char* check_name) {
-  if (addr == nullptr || mem_map->HasAddress(addr)) {
-    return true;
-  }
-  LOG(ERROR) << "Is" << check_name << "Address " << addr
-             << " not in [" << reinterpret_cast<void*>(mem_map->Begin())
-             << ", " << reinterpret_cast<void*>(mem_map->Begin() + mem_map->Size()) << ")";
-  return false;
-}
-
-bool JitCodeCache::IsDataAddress(const void* raw_addr) const {
-  return IsAddressInMap(raw_addr, data_map_.get(), "Data");
-}
-
-bool JitCodeCache::IsExecutableAddress(const void* raw_addr) const {
-  return IsAddressInMap(raw_addr, executable_code_map_.get(), "Executable");
-}
-
-bool JitCodeCache::IsWritableAddress(const void* raw_addr) const {
-  return IsAddressInMap(raw_addr, GetWritableMemMap(), "Writable");
-}
-
-// Convert one address within the source map to the same offset within the destination map.
-static void* ConvertAddress(const void* source_address,
-                            const MemMap* source_map,
-                            const MemMap* destination_map) {
-  DCHECK(source_map->HasAddress(source_address)) << source_address;
-  ptrdiff_t offset = reinterpret_cast<const uint8_t*>(source_address) - source_map->Begin();
-  uintptr_t address = reinterpret_cast<uintptr_t>(destination_map->Begin()) + offset;
-  return reinterpret_cast<void*>(address);
-}
-
-template <typename T>
-T* JitCodeCache::ToExecutableAddress(T* writable_address) const {
-  CHECK(IsWritableAddress(writable_address));
-  if (writable_address == nullptr) {
-    return nullptr;
-  }
-  void* executable_address = ConvertAddress(writable_address,
-                                            GetWritableMemMap(),
-                                            executable_code_map_.get());
-  CHECK(IsExecutableAddress(executable_address));
-  return reinterpret_cast<T*>(executable_address);
-}
-
-void* JitCodeCache::ToWritableAddress(const void* executable_address) const {
-  CHECK(IsExecutableAddress(executable_address));
-  if (executable_address == nullptr) {
-    return nullptr;
-  }
-  void* writable_address = ConvertAddress(executable_address,
-                                          executable_code_map_.get(),
-                                          GetWritableMemMap());
-  CHECK(IsWritableAddress(writable_address));
-  return writable_address;
-}
-
 class ScopedCodeCacheWrite : ScopedTrace {
  public:
-  explicit ScopedCodeCacheWrite(JitCodeCache* code_cache)
-      : ScopedTrace("ScopedCodeCacheWrite") {
+  explicit ScopedCodeCacheWrite(MemMap* code_map, bool only_for_tlb_shootdown = false)
+      : ScopedTrace("ScopedCodeCacheWrite"),
+        code_map_(code_map),
+        only_for_tlb_shootdown_(only_for_tlb_shootdown) {
     ScopedTrace trace("mprotect all");
-    int prot_to_start_writing = kProtAll;
-    if (code_cache->writable_code_map_ == nullptr) {
-      // If there is only one mapping, use the executable mapping and toggle between rwx and rx.
-      prot_to_start_writing = kProtAll;
-      prot_to_stop_writing_ = kProtCode;
-    } else {
-      // If there are two mappings, use the writable mapping and toggle between rw and r.
-      prot_to_start_writing = kProtData;
-      prot_to_stop_writing_ = kProtReadOnly;
-    }
-    writable_map_ = code_cache->GetWritableMemMap();
-    // If we're using ScopedCacheWrite only for TLB shootdown, we limit the scope of mprotect to
-    // one page.
-    size_ = writable_map_->Size();
-    CHECKED_MPROTECT(writable_map_->Begin(), size_, prot_to_start_writing);
+    CHECKED_MPROTECT(
+        code_map_->Begin(), only_for_tlb_shootdown_ ? kPageSize : code_map_->Size(), kProtAll);
   }
   ~ScopedCodeCacheWrite() {
     ScopedTrace trace("mprotect code");
-    CHECKED_MPROTECT(writable_map_->Begin(), size_, prot_to_stop_writing_);
+    CHECKED_MPROTECT(
+        code_map_->Begin(), only_for_tlb_shootdown_ ? kPageSize : code_map_->Size(), kProtCode);
   }
-
  private:
-  int prot_to_stop_writing_;
-  MemMap* writable_map_;
-  size_t size_;
+  MemMap* const code_map_;
+
+  // If we're using ScopedCacheWrite only for TLB shootdown, we limit the scope of mprotect to
+  // one page.
+  const bool only_for_tlb_shootdown_;
 
   DISALLOW_COPY_AND_ASSIGN(ScopedCodeCacheWrite);
 };
@@ -448,6 +231,7 @@ uint8_t* JitCodeCache::CommitCode(Thread* self,
                                   size_t fp_spill_mask,
                                   const uint8_t* code,
                                   size_t code_size,
+                                  size_t data_size,
                                   bool osr,
                                   Handle<mirror::ObjectArray<mirror::Object>> roots,
                                   bool has_should_deoptimize_flag,
@@ -462,6 +246,7 @@ uint8_t* JitCodeCache::CommitCode(Thread* self,
                                        fp_spill_mask,
                                        code,
                                        code_size,
+                                       data_size,
                                        osr,
                                        roots,
                                        has_should_deoptimize_flag,
@@ -479,6 +264,7 @@ uint8_t* JitCodeCache::CommitCode(Thread* self,
                                 fp_spill_mask,
                                 code,
                                 code_size,
+                                data_size,
                                 osr,
                                 roots,
                                 has_should_deoptimize_flag,
@@ -540,10 +326,8 @@ static void FillRootTable(uint8_t* roots_data, Handle<mirror::ObjectArray<mirror
   }
 }
 
-uint8_t* JitCodeCache::GetRootTable(const void* code_ptr, uint32_t* number_of_roots) {
-  CHECK(IsExecutableAddress(code_ptr));
+static uint8_t* GetRootTable(const void* code_ptr, uint32_t* number_of_roots = nullptr) {
   OatQuickMethodHeader* method_header = OatQuickMethodHeader::FromCodePointer(code_ptr);
-  // GetOptimizedCodeInfoPtr uses offsets relative to the EXECUTABLE address.
   uint8_t* data = method_header->GetOptimizedCodeInfoPtr();
   uint32_t roots = GetNumberOfRoots(data);
   if (number_of_roots != nullptr) {
@@ -588,8 +372,6 @@ static inline void ProcessWeakClass(GcRoot<mirror::Class>* root_ptr,
 void JitCodeCache::SweepRootTables(IsMarkedVisitor* visitor) {
   MutexLock mu(Thread::Current(), lock_);
   for (const auto& entry : method_code_map_) {
-    // GetRootTable takes an EXECUTABLE address.
-    CHECK(IsExecutableAddress(entry.first));
     uint32_t number_of_roots = 0;
     uint8_t* roots_data = GetRootTable(entry.first, &number_of_roots);
     GcRoot<mirror::Object>* roots = reinterpret_cast<GcRoot<mirror::Object>*>(roots_data);
@@ -627,19 +409,17 @@ void JitCodeCache::SweepRootTables(IsMarkedVisitor* visitor) {
   }
 }
 
-void JitCodeCache::FreeCodeAndData(const void* code_ptr) {
-  CHECK(IsExecutableAddress(code_ptr));
+void JitCodeCache::FreeCode(const void* code_ptr) {
+  uintptr_t allocation = FromCodeToAllocation(code_ptr);
   // Notify native debugger that we are about to remove the code.
   // It does nothing if we are not using native debugger.
   DeleteJITCodeEntryForAddress(reinterpret_cast<uintptr_t>(code_ptr));
-  // GetRootTable takes an EXECUTABLE address.
   FreeData(GetRootTable(code_ptr));
-  FreeRawCode(reinterpret_cast<uint8_t*>(FromCodeToAllocation(code_ptr)));
+  FreeCode(reinterpret_cast<uint8_t*>(allocation));
 }
 
 void JitCodeCache::FreeAllMethodHeaders(
     const std::unordered_set<OatQuickMethodHeader*>& method_headers) {
-  // method_headers are expected to be in the executable region.
   {
     MutexLock mu(Thread::Current(), *Locks::cha_lock_);
     Runtime::Current()->GetClassLinker()->GetClassHierarchyAnalysis()
@@ -651,9 +431,9 @@ void JitCodeCache::FreeAllMethodHeaders(
   // so it's possible for the same method_header to start representing
   // different compile code.
   MutexLock mu(Thread::Current(), lock_);
-  ScopedCodeCacheWrite scc(this);
+  ScopedCodeCacheWrite scc(code_map_.get());
   for (const OatQuickMethodHeader* method_header : method_headers) {
-    FreeCodeAndData(method_header->GetCode());
+    FreeCode(method_header->GetCode());
   }
 }
 
@@ -670,10 +450,9 @@ void JitCodeCache::RemoveMethodsIn(Thread* self, const LinearAlloc& alloc) {
     // with the classlinker_classes_lock_ held, and suspending ourselves could
     // lead to a deadlock.
     {
-      ScopedCodeCacheWrite scc(this);
+      ScopedCodeCacheWrite scc(code_map_.get());
       for (auto it = method_code_map_.begin(); it != method_code_map_.end();) {
         if (alloc.ContainsUnsafe(it->second)) {
-          CHECK(IsExecutableAddress(OatQuickMethodHeader::FromCodePointer(it->first)));
           method_headers.insert(OatQuickMethodHeader::FromCodePointer(it->first));
           it = method_code_map_.erase(it);
         } else {
@@ -765,129 +544,6 @@ static void ClearMethodCounter(ArtMethod* method, bool was_warm) {
   method->SetCounter(std::min(jit_warmup_threshold - 1, 1));
 }
 
-static void FlushInstructionPiplines(uint8_t* sync_page) {
-  // After updating the JIT code cache we need to force all CPUs to
-  // flush their instruction pipelines. In the absence of system call
-  // to do this explicitly, we can achieve this indirectly by toggling
-  // permissions on an executable page. This should send an IPI to
-  // each core to update the TLB entry with the interrupt raised on
-  // each core causing the instruction pipeline to be flushed.
-  CHECKED_MPROTECT(sync_page, kPageSize, kProtAll);
-  // Ensure the sync_page is present otherwise a TLB update may not be
-  // necessary.
-  sync_page[0] = 0;
-  CHECKED_MPROTECT(sync_page, kPageSize, kProtCode);
-}
-
-#ifdef __aarch64__
-
-static void FlushJitCodeCacheRange(uint8_t* code_ptr,
-                                   uint8_t* writable_ptr,
-                                   size_t code_size) {
-  // Cache maintenance instructions can cause permission faults when a
-  // page is not present (e.g. swapped out or not backed). These
-  // faults should be handled by the kernel, but a bug in some Linux
-  // kernels may surface these permission faults to user-land which
-  // does not currently deal with them (b/63885946). To work around
-  // this, we read a value from each page to fault it in before
-  // attempting to perform cache maintenance operations.
-  //
-  // For reference, this behavior is caused by this commit:
-  // https://android.googlesource.com/kernel/msm/+/3fbe6bc28a6b9939d0650f2f17eb5216c719950c
-
-  // The cache-line size could be probed for from the CPU, but
-  // assuming a safe lower bound is safe for CPUs that have different
-  // cache-line sizes for big and little cores.
-  static const uintptr_t kSafeCacheLineSize = 32;
-
-  // Ensure stores are present in L1 data cache.
-  __asm __volatile("dsb ish" ::: "memory");
-
-  volatile uint8_t mutant;
-
-  // Push dirty cache-lines out to the point of unification (PoU). The
-  // point of unification is the first point in the cache/memory
-  // hierarchy where the instruction cache and data cache have the
-  // same view of memory. The PoU is where an instruction fetch will
-  // fetch the new code generated by the JIT.
-  //
-  // See: http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.den0024a/ch11s04.html
-  uintptr_t writable_addr = RoundDown(reinterpret_cast<uintptr_t>(writable_ptr),
-                                      kSafeCacheLineSize);
-  uintptr_t writable_end  = RoundUp(reinterpret_cast<uintptr_t>(writable_ptr) + code_size,
-                                    kSafeCacheLineSize);
-  while (writable_addr < writable_end) {
-    // Read from the cache-line to minimize the chance that a cache
-    // maintenance instruction causes a fault (see kernel bug comment
-    // above).
-    mutant = *reinterpret_cast<const uint8_t*>(writable_addr);
-
-    // Flush cache-line
-    __asm volatile("dc cvau, %0" :: "r"(writable_addr) : "memory");
-    writable_addr += kSafeCacheLineSize;
-  }
-
-  __asm __volatile("dsb ish" ::: "memory");
-
-  uintptr_t code_addr = RoundDown(reinterpret_cast<uintptr_t>(code_ptr), kSafeCacheLineSize);
-  const uintptr_t code_end = RoundUp(reinterpret_cast<uintptr_t>(code_ptr) + code_size,
-                                     kSafeCacheLineSize);
-  while (code_addr < code_end) {
-    // Read from the cache-line to minimize the chance that a cache
-    // maintenance instruction causes a fault (see kernel bug comment
-    // above).
-    mutant = *reinterpret_cast<const uint8_t*>(code_addr);
-
-    // Invalidating the data cache line is only strictly necessary
-    // when the JIT code cache has two mappings (the default). We know
-    // this cache line is clean so this is just invalidating it (using
-    // "dc ivac" would be preferable, but counts as a write and this
-    // memory may not be mapped write permission).
-    __asm volatile("dc cvau, %0" :: "r"(code_addr) : "memory");
-
-    // Invalidate the instruction cache line to force instructions in
-    // range to be re-fetched following update.
-    __asm volatile("ic ivau, %0" :: "r"(code_addr) : "memory");
-
-    code_addr += kSafeCacheLineSize;
-  }
-
-  // Wait for code cache invalidations to complete.
-  __asm __volatile("dsb ish" ::: "memory");
-
-  // Reset fetched instruction stream.
-  __asm __volatile("isb");
-}
-
-#else  // __aarch64
-
-static void FlushJitCodeCacheRange(uint8_t* code_ptr,
-                                   uint8_t* writable_ptr,
-                                   size_t code_size) {
-  if (writable_ptr != code_ptr) {
-    // When there are two mappings of the JIT code cache, RX and
-    // RW, flush the RW version first as we've just dirtied the
-    // cache lines with new code. Flushing the RX version first
-    // can cause a permission fault as the those addresses are not
-    // writable, but can appear dirty in the cache. There is a lot
-    // of potential subtlety here depending on how the cache is
-    // indexed and tagged.
-    //
-    // Flushing the RX version after the RW version is just
-    // invalidating cachelines in the instruction cache. This is
-    // necessary as the instruction cache will often have a
-    // different set of cache lines present and because the JIT
-    // code cache can start a new function at any boundary within
-    // a cache-line.
-    FlushDataCache(reinterpret_cast<char*>(writable_ptr),
-                   reinterpret_cast<char*>(writable_ptr + code_size));
-  }
-  FlushInstructionCache(reinterpret_cast<char*>(code_ptr),
-                        reinterpret_cast<char*>(code_ptr + code_size));
-}
-
-#endif  // __aarch64
-
 uint8_t* JitCodeCache::CommitCodeInternal(Thread* self,
                                           ArtMethod* method,
                                           uint8_t* stack_map,
@@ -898,6 +554,7 @@ uint8_t* JitCodeCache::CommitCodeInternal(Thread* self,
                                           size_t fp_spill_mask,
                                           const uint8_t* code,
                                           size_t code_size,
+                                          size_t data_size,
                                           bool osr,
                                           Handle<mirror::ObjectArray<mirror::Object>> roots,
                                           bool has_should_deoptimize_flag,
@@ -917,37 +574,35 @@ uint8_t* JitCodeCache::CommitCodeInternal(Thread* self,
     MutexLock mu(self, lock_);
     WaitForPotentialCollectionToComplete(self);
     {
-      ScopedCodeCacheWrite scc(this);
+      ScopedCodeCacheWrite scc(code_map_.get());
       memory = AllocateCode(total_size);
       if (memory == nullptr) {
         return nullptr;
       }
-      uint8_t* writable_ptr = memory + header_size;
-      code_ptr = ToExecutableAddress(writable_ptr);
-
-      std::copy(code, code + code_size, writable_ptr);
-      OatQuickMethodHeader* writable_method_header =
-          OatQuickMethodHeader::FromCodePointer(writable_ptr);
-      // We need to be able to write the OatQuickMethodHeader, so we use writable_method_header.
-      // Otherwise, the offsets encoded in OatQuickMethodHeader are used relative to an executable
-      // address, so we use code_ptr.
-      new (writable_method_header) OatQuickMethodHeader(
+      code_ptr = memory + header_size;
+
+      std::copy(code, code + code_size, code_ptr);
+      method_header = OatQuickMethodHeader::FromCodePointer(code_ptr);
+      new (method_header) OatQuickMethodHeader(
           code_ptr - stack_map,
           code_ptr - method_info,
           frame_size_in_bytes,
           core_spill_mask,
           fp_spill_mask,
           code_size);
-
-      FlushJitCodeCacheRange(code_ptr, writable_ptr, code_size);
-      FlushInstructionPiplines(code_sync_map_->Begin());
-
+      // Flush caches before we remove write permission because some ARMv8 Qualcomm kernels may
+      // trigger a segfault if a page fault occurs when requesting a cache maintenance operation.
+      // This is a kernel bug that we need to work around until affected devices (e.g. Nexus 5X and
+      // 6P) stop being supported or their kernels are fixed.
+      //
+      // For reference, this behavior is caused by this commit:
+      // https://android.googlesource.com/kernel/msm/+/3fbe6bc28a6b9939d0650f2f17eb5216c719950c
+      FlushInstructionCache(reinterpret_cast<char*>(code_ptr),
+                            reinterpret_cast<char*>(code_ptr + code_size));
       DCHECK(!Runtime::Current()->IsAotCompiler());
       if (has_should_deoptimize_flag) {
-        writable_method_header->SetHasShouldDeoptimizeFlag();
+        method_header->SetHasShouldDeoptimizeFlag();
       }
-      // All the pointers exported from the cache are executable addresses.
-      method_header = ToExecutableAddress(writable_method_header);
     }
 
     number_of_compilations_++;
@@ -986,14 +641,16 @@ uint8_t* JitCodeCache::CommitCodeInternal(Thread* self,
     // but below we still make the compiled code valid for the method.
     MutexLock mu(self, lock_);
     // Fill the root table before updating the entry point.
-    CHECK(IsDataAddress(roots_data));
     DCHECK_EQ(FromStackMapToRoots(stack_map), roots_data);
     DCHECK_LE(roots_data, stack_map);
     FillRootTable(roots_data, roots);
-
-    // Ensure the updates to the root table are visible with a store fence.
-    QuasiAtomic::ThreadFenceSequentiallyConsistent();
-
+    {
+      // Flush data cache, as compiled code references literals in it.
+      // We also need a TLB shootdown to act as memory barrier across cores.
+      ScopedCodeCacheWrite ccw(code_map_.get(), /* only_for_tlb_shootdown */ true);
+      FlushDataCache(reinterpret_cast<char*>(roots_data),
+                     reinterpret_cast<char*>(roots_data + data_size));
+    }
     method_code_map_.Put(code_ptr, method);
     if (osr) {
       number_of_osr_compilations_++;
@@ -1041,11 +698,11 @@ bool JitCodeCache::RemoveMethod(ArtMethod* method, bool release_memory) {
 
   bool in_cache = false;
   {
-    ScopedCodeCacheWrite ccw(this);
+    ScopedCodeCacheWrite ccw(code_map_.get());
     for (auto code_iter = method_code_map_.begin(); code_iter != method_code_map_.end();) {
       if (code_iter->second == method) {
         if (release_memory) {
-          FreeCodeAndData(code_iter->first);
+          FreeCode(code_iter->first);
         }
         code_iter = method_code_map_.erase(code_iter);
         in_cache = true;
@@ -1099,10 +756,10 @@ void JitCodeCache::NotifyMethodRedefined(ArtMethod* method) {
     profiling_infos_.erase(profile);
   }
   method->SetProfilingInfo(nullptr);
-  ScopedCodeCacheWrite ccw(this);
+  ScopedCodeCacheWrite ccw(code_map_.get());
   for (auto code_iter = method_code_map_.begin(); code_iter != method_code_map_.end();) {
     if (code_iter->second == method) {
-      FreeCodeAndData(code_iter->first);
+      FreeCode(code_iter->first);
       code_iter = method_code_map_.erase(code_iter);
       continue;
     }
@@ -1168,7 +825,6 @@ void JitCodeCache::ClearData(Thread* self,
                              uint8_t* stack_map_data,
                              uint8_t* roots_data) {
   DCHECK_EQ(FromStackMapToRoots(stack_map_data), roots_data);
-  CHECK(IsDataAddress(roots_data));
   MutexLock mu(self, lock_);
   FreeData(reinterpret_cast<uint8_t*>(roots_data));
 }
@@ -1290,11 +946,11 @@ void JitCodeCache::NotifyCollectionDone(Thread* self) {
 
 void JitCodeCache::SetFootprintLimit(size_t new_footprint) {
   size_t per_space_footprint = new_footprint / 2;
-  CHECK(IsAlignedParam(per_space_footprint, kPageSize));
+  DCHECK(IsAlignedParam(per_space_footprint, kPageSize));
   DCHECK_EQ(per_space_footprint * 2, new_footprint);
   mspace_set_footprint_limit(data_mspace_, per_space_footprint);
   {
-    ScopedCodeCacheWrite scc(this);
+    ScopedCodeCacheWrite scc(code_map_.get());
     mspace_set_footprint_limit(code_mspace_, per_space_footprint);
   }
 }
@@ -1370,8 +1026,8 @@ void JitCodeCache::GarbageCollectCache(Thread* self) {
       number_of_collections_++;
       live_bitmap_.reset(CodeCacheBitmap::Create(
           "code-cache-bitmap",
-          reinterpret_cast<uintptr_t>(executable_code_map_->Begin()),
-          reinterpret_cast<uintptr_t>(executable_code_map_->Begin() + current_capacity_ / 2)));
+          reinterpret_cast<uintptr_t>(code_map_->Begin()),
+          reinterpret_cast<uintptr_t>(code_map_->Begin() + current_capacity_ / 2)));
       collection_in_progress_ = true;
     }
   }
@@ -1443,16 +1099,14 @@ void JitCodeCache::RemoveUnmarkedCode(Thread* self) {
   std::unordered_set<OatQuickMethodHeader*> method_headers;
   {
     MutexLock mu(self, lock_);
-    ScopedCodeCacheWrite scc(this);
+    ScopedCodeCacheWrite scc(code_map_.get());
     // Iterate over all compiled code and remove entries that are not marked.
     for (auto it = method_code_map_.begin(); it != method_code_map_.end();) {
       const void* code_ptr = it->first;
-      CHECK(IsExecutableAddress(code_ptr));
       uintptr_t allocation = FromCodeToAllocation(code_ptr);
       if (GetLiveBitmap()->Test(allocation)) {
         ++it;
       } else {
-        CHECK(IsExecutableAddress(it->first));
         method_headers.insert(OatQuickMethodHeader::FromCodePointer(it->first));
         it = method_code_map_.erase(it);
       }
@@ -1495,7 +1149,6 @@ void JitCodeCache::DoCollection(Thread* self, bool collect_profiling_info) {
     for (const auto& it : method_code_map_) {
       ArtMethod* method = it.second;
       const void* code_ptr = it.first;
-      CHECK(IsExecutableAddress(code_ptr));
       const OatQuickMethodHeader* method_header = OatQuickMethodHeader::FromCodePointer(code_ptr);
       if (method_header->GetEntryPoint() == method->GetEntryPointFromQuickCompiledCode()) {
         GetLiveBitmap()->AtomicTestAndSet(FromCodeToAllocation(code_ptr));
@@ -1521,7 +1174,6 @@ void JitCodeCache::DoCollection(Thread* self, bool collect_profiling_info) {
     // Free all profiling infos of methods not compiled nor being compiled.
     auto profiling_kept_end = std::remove_if(profiling_infos_.begin(), profiling_infos_.end(),
       [this] (ProfilingInfo* info) NO_THREAD_SAFETY_ANALYSIS {
-        CHECK(IsDataAddress(info));
         const void* ptr = info->GetMethod()->GetEntryPointFromQuickCompiledCode();
         // We have previously cleared the ProfilingInfo pointer in the ArtMethod in the hope
         // that the compiled code would not get revived. As mutator threads run concurrently,
@@ -1582,7 +1234,6 @@ OatQuickMethodHeader* JitCodeCache::LookupMethodHeader(uintptr_t pc, ArtMethod*
   --it;
 
   const void* code_ptr = it->first;
-  CHECK(IsExecutableAddress(code_ptr));
   OatQuickMethodHeader* method_header = OatQuickMethodHeader::FromCodePointer(code_ptr);
   if (!method_header->Contains(pc)) {
     return nullptr;
@@ -1665,7 +1316,6 @@ ProfilingInfo* JitCodeCache::AddProfilingInfoInternal(Thread* self ATTRIBUTE_UNU
   // store in the ArtMethod's ProfilingInfo pointer.
   QuasiAtomic::ThreadFenceRelease();
 
-  CHECK(IsDataAddress(info));
   method->SetProfilingInfo(info);
   profiling_infos_.push_back(info);
   histogram_profiling_info_memory_use_.AddValue(profile_info_size);
@@ -1678,8 +1328,7 @@ void* JitCodeCache::MoreCore(const void* mspace, intptr_t increment) NO_THREAD_S
   if (code_mspace_ == mspace) {
     size_t result = code_end_;
     code_end_ += increment;
-    MemMap* writable_map = GetWritableMemMap();
-    return reinterpret_cast<void*>(result + writable_map->Begin());
+    return reinterpret_cast<void*>(result + code_map_->Begin());
   } else {
     DCHECK_EQ(data_mspace_, mspace);
     size_t result = data_end_;
@@ -1831,7 +1480,6 @@ void JitCodeCache::DoneCompiling(ArtMethod* method, Thread* self ATTRIBUTE_UNUSE
 
 size_t JitCodeCache::GetMemorySizeOfCodePointer(const void* ptr) {
   MutexLock mu(Thread::Current(), lock_);
-  CHECK(IsExecutableAddress(ptr));
   return mspace_usable_size(reinterpret_cast<const void*>(FromCodeToAllocation(ptr)));
 }
 
@@ -1867,27 +1515,22 @@ uint8_t* JitCodeCache::AllocateCode(size_t code_size) {
   size_t header_size = RoundUp(sizeof(OatQuickMethodHeader), alignment);
   // Ensure the header ends up at expected instruction alignment.
   DCHECK_ALIGNED_PARAM(reinterpret_cast<uintptr_t>(result + header_size), alignment);
-  CHECK(IsWritableAddress(result));
   used_memory_for_code_ += mspace_usable_size(result);
   return result;
 }
 
-void JitCodeCache::FreeRawCode(void* code) {
-  CHECK(IsExecutableAddress(code));
-  void* writable_code = ToWritableAddress(code);
-  used_memory_for_code_ -= mspace_usable_size(writable_code);
-  mspace_free(code_mspace_, writable_code);
+void JitCodeCache::FreeCode(uint8_t* code) {
+  used_memory_for_code_ -= mspace_usable_size(code);
+  mspace_free(code_mspace_, code);
 }
 
 uint8_t* JitCodeCache::AllocateData(size_t data_size) {
   void* result = mspace_malloc(data_mspace_, data_size);
-  CHECK(IsDataAddress(reinterpret_cast<uint8_t*>(result)));
   used_memory_for_data_ += mspace_usable_size(result);
   return reinterpret_cast<uint8_t*>(result);
 }
 
 void JitCodeCache::FreeData(uint8_t* data) {
-  CHECK(IsDataAddress(data));
   used_memory_for_data_ -= mspace_usable_size(data);
   mspace_free(data_mspace_, data);
 }
diff --git a/runtime/jit/jit_code_cache.h b/runtime/jit/jit_code_cache.h
index 175501f915..daa1d616a6 100644
--- a/runtime/jit/jit_code_cache.h
+++ b/runtime/jit/jit_code_cache.h
@@ -113,6 +113,7 @@ class JitCodeCache {
                       size_t fp_spill_mask,
                       const uint8_t* code,
                       size_t code_size,
+                      size_t data_size,
                       bool osr,
                       Handle<mirror::ObjectArray<mirror::Object>> roots,
                       bool has_should_deoptimize_flag,
@@ -228,8 +229,6 @@ class JitCodeCache {
       REQUIRES(!lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  uint8_t* GetRootTable(const void* code_ptr, uint32_t* number_of_roots = nullptr);
-
   // The GC needs to disallow the reading of inline caches when it processes them,
   // to avoid having a class being used while it is being deleted.
   void AllowInlineCacheAccess() REQUIRES(!lock_);
@@ -248,13 +247,9 @@ class JitCodeCache {
   }
 
  private:
-  friend class ScopedCodeCacheWrite;
-
   // Take ownership of maps.
   JitCodeCache(MemMap* code_map,
                MemMap* data_map,
-               MemMap* writable_code_map,
-               MemMap* code_sync_map,
                size_t initial_code_capacity,
                size_t initial_data_capacity,
                size_t max_capacity,
@@ -272,6 +267,7 @@ class JitCodeCache {
                               size_t fp_spill_mask,
                               const uint8_t* code,
                               size_t code_size,
+                              size_t data_size,
                               bool osr,
                               Handle<mirror::ObjectArray<mirror::Object>> roots,
                               bool has_should_deoptimize_flag,
@@ -296,7 +292,7 @@ class JitCodeCache {
       REQUIRES(!Locks::cha_lock_);
 
   // Free in the mspace allocations for `code_ptr`.
-  void FreeCodeAndData(const void* code_ptr) REQUIRES(lock_);
+  void FreeCode(const void* code_ptr) REQUIRES(lock_);
 
   // Number of bytes allocated in the code cache.
   size_t CodeCacheSizeLocked() REQUIRES(lock_);
@@ -329,7 +325,7 @@ class JitCodeCache {
   bool CheckLiveCompiledCodeHasProfilingInfo()
       REQUIRES(lock_);
 
-  void FreeRawCode(void* code) REQUIRES(lock_);
+  void FreeCode(uint8_t* code) REQUIRES(lock_);
   uint8_t* AllocateCode(size_t code_size) REQUIRES(lock_);
   void FreeData(uint8_t* data) REQUIRES(lock_);
   uint8_t* AllocateData(size_t data_size) REQUIRES(lock_);
@@ -339,61 +335,25 @@ class JitCodeCache {
       REQUIRES(!lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  MemMap* GetWritableMemMap() const {
-    if (writable_code_map_ == nullptr) {
-      // The system required us to map the JIT Code Cache RWX (see
-      // JitCodeCache::Create()).
-      return executable_code_map_.get();
-    } else {
-      // Executable code is mapped RX, and writable code is mapped RW
-      // to the underlying same memory, but at a different address.
-      return writable_code_map_.get();
-    }
-  }
-
-  bool IsDataAddress(const void* raw_addr) const;
-
-  bool IsExecutableAddress(const void* raw_addr) const;
-
-  bool IsWritableAddress(const void* raw_addr) const;
-
-  template <typename T>
-  T* ToExecutableAddress(T* writable_address) const;
-
-  void* ToWritableAddress(const void* executable_address) const;
-
   // Lock for guarding allocations, collections, and the method_code_map_.
   Mutex lock_;
   // Condition to wait on during collection.
   ConditionVariable lock_cond_ GUARDED_BY(lock_);
   // Whether there is a code cache collection in progress.
   bool collection_in_progress_ GUARDED_BY(lock_);
-  // JITting methods obviously requires both write and execute permissions on a region of memory.
-  // In tye typical (non-debugging) case, we separate the memory mapped view that can write the code
-  // from a view that the runtime uses to execute the code. Having these two views eliminates any
-  // single address region having rwx permissions.  An attacker could still write the writable
-  // address and then execute the executable address. We allocate the mappings with a random
-  // address relationship to each other which makes the attacker need two addresses rather than
-  // just one.  In the debugging case there is no file descriptor to back the
-  // shared memory, and hence we have to use a single mapping.
+  // Mem map which holds code.
+  std::unique_ptr<MemMap> code_map_;
   // Mem map which holds data (stack maps and profiling info).
   std::unique_ptr<MemMap> data_map_;
-  // Mem map which holds a non-writable view of code for JIT.
-  std::unique_ptr<MemMap> executable_code_map_;
-  // Mem map which holds a non-executable view of code for JIT.
-  std::unique_ptr<MemMap> writable_code_map_;
-  // Mem map which holds one executable page that we use for flushing instruction
-  // fetch buffers. The code on this page is never executed.
-  std::unique_ptr<MemMap> code_sync_map_;
   // The opaque mspace for allocating code.
   void* code_mspace_ GUARDED_BY(lock_);
   // The opaque mspace for allocating data.
   void* data_mspace_ GUARDED_BY(lock_);
   // Bitmap for collecting code and data.
   std::unique_ptr<CodeCacheBitmap> live_bitmap_;
-  // Holds non-writable compiled code associated to the ArtMethod.
+  // Holds compiled code associated to the ArtMethod.
   SafeMap<const void*, ArtMethod*> method_code_map_ GUARDED_BY(lock_);
-  // Holds non-writable osr compiled code associated to the ArtMethod.
+  // Holds osr compiled code associated to the ArtMethod.
   SafeMap<ArtMethod*, const void*> osr_code_map_ GUARDED_BY(lock_);
   // ProfilingInfo objects we have allocated.
   std::vector<ProfilingInfo*> profiling_infos_ GUARDED_BY(lock_);
diff --git a/runtime/mem_map.cc b/runtime/mem_map.cc
index 4e82480aca..743604cc47 100644
--- a/runtime/mem_map.cc
+++ b/runtime/mem_map.cc
@@ -496,7 +496,7 @@ MemMap::~MemMap() {
     MEMORY_TOOL_MAKE_UNDEFINED(base_begin_, base_size_);
     int result = munmap(base_begin_, base_size_);
     if (result == -1) {
-      PLOG(FATAL) << "munmap failed: " << BaseBegin() << "..." << BaseEnd();
+      PLOG(FATAL) << "munmap failed";
     }
   }
 
@@ -535,13 +535,8 @@ MemMap::MemMap(const std::string& name, uint8_t* begin, size_t size, void* base_
   }
 }
 
-MemMap* MemMap::RemapAtEnd(uint8_t* new_end,
-                           const char* tail_name,
-                           int tail_prot,
-                           int sharing_flags,
-                           std::string* error_msg,
-                           bool use_ashmem,
-                           unique_fd* shmem_fd) {
+MemMap* MemMap::RemapAtEnd(uint8_t* new_end, const char* tail_name, int tail_prot,
+                           std::string* error_msg, bool use_ashmem) {
   use_ashmem = use_ashmem && !kIsTargetLinux;
   DCHECK_GE(new_end, Begin());
   DCHECK_LE(new_end, End());
@@ -560,12 +555,6 @@ MemMap* MemMap::RemapAtEnd(uint8_t* new_end,
   size_ = new_end - reinterpret_cast<uint8_t*>(begin_);
   base_size_ = new_base_end - reinterpret_cast<uint8_t*>(base_begin_);
   DCHECK_LE(begin_ + size_, reinterpret_cast<uint8_t*>(base_begin_) + base_size_);
-  if (base_size_ == 0u) {
-    // All pages in this MemMap have been handed out. Invalidate base
-    // pointer to prevent the destructor calling munmap() on
-    // zero-length region (which can't succeed).
-    base_begin_ = nullptr;
-  }
   size_t tail_size = old_end - new_end;
   uint8_t* tail_base_begin = new_base_end;
   size_t tail_base_size = old_base_end - new_base_end;
@@ -573,14 +562,14 @@ MemMap* MemMap::RemapAtEnd(uint8_t* new_end,
   DCHECK_ALIGNED(tail_base_size, kPageSize);
 
   unique_fd fd;
-  int flags = MAP_ANONYMOUS | sharing_flags;
+  int flags = MAP_PRIVATE | MAP_ANONYMOUS;
   if (use_ashmem) {
     // android_os_Debug.cpp read_mapinfo assumes all ashmem regions associated with the VM are
     // prefixed "dalvik-".
     std::string debug_friendly_name("dalvik-");
     debug_friendly_name += tail_name;
     fd.reset(ashmem_create_region(debug_friendly_name.c_str(), tail_base_size));
-    flags = MAP_FIXED | sharing_flags;
+    flags = MAP_PRIVATE | MAP_FIXED;
     if (fd.get() == -1) {
       *error_msg = StringPrintf("ashmem_create_region failed for '%s': %s",
                                 tail_name, strerror(errno));
@@ -614,9 +603,6 @@ MemMap* MemMap::RemapAtEnd(uint8_t* new_end,
                               fd.get());
     return nullptr;
   }
-  if (shmem_fd != nullptr) {
-    shmem_fd->reset(fd.release());
-  }
   return new MemMap(tail_name, actual, tail_size, actual, tail_base_size, tail_prot, false);
 }
 
diff --git a/runtime/mem_map.h b/runtime/mem_map.h
index d8908ad603..5603963eac 100644
--- a/runtime/mem_map.h
+++ b/runtime/mem_map.h
@@ -25,7 +25,6 @@
 #include <string>
 
 #include "android-base/thread_annotations.h"
-#include "android-base/unique_fd.h"
 
 namespace art {
 
@@ -38,8 +37,6 @@ namespace art {
 #define USE_ART_LOW_4G_ALLOCATOR 0
 #endif
 
-using android::base::unique_fd;
-
 #ifdef __linux__
 static constexpr bool kMadviseZeroes = true;
 #else
@@ -171,14 +168,11 @@ class MemMap {
   }
 
   // Unmap the pages at end and remap them to create another memory map.
-  // sharing_flags should be either MAP_PRIVATE or MAP_SHARED.
   MemMap* RemapAtEnd(uint8_t* new_end,
                      const char* tail_name,
                      int tail_prot,
-                     int sharing_flags,
                      std::string* error_msg,
-                     bool use_ashmem = true,
-                     unique_fd* shmem_fd = nullptr);
+                     bool use_ashmem = true);
 
   static bool CheckNoGaps(MemMap* begin_map, MemMap* end_map)
       REQUIRES(!MemMap::mem_maps_lock_);
diff --git a/runtime/mem_map_test.cc b/runtime/mem_map_test.cc
index 99bf0045c4..a4ebb16d09 100644
--- a/runtime/mem_map_test.cc
+++ b/runtime/mem_map_test.cc
@@ -74,7 +74,6 @@ class MemMapTest : public CommonRuntimeTest {
     MemMap* m1 = m0->RemapAtEnd(base0 + page_size,
                                 "MemMapTest_RemapAtEndTest_map1",
                                 PROT_READ | PROT_WRITE,
-                                MAP_PRIVATE,
                                 &error_msg);
     // Check the states of the two maps.
     EXPECT_EQ(m0->Begin(), base0) << error_msg;
@@ -457,7 +456,6 @@ TEST_F(MemMapTest, AlignBy) {
   std::unique_ptr<MemMap> m1(m0->RemapAtEnd(base0 + 3 * page_size,
                                             "MemMapTest_AlignByTest_map1",
                                             PROT_READ | PROT_WRITE,
-                                            MAP_PRIVATE,
                                             &error_msg));
   uint8_t* base1 = m1->Begin();
   ASSERT_TRUE(base1 != nullptr) << error_msg;
@@ -467,7 +465,6 @@ TEST_F(MemMapTest, AlignBy) {
   std::unique_ptr<MemMap> m2(m1->RemapAtEnd(base1 + 4 * page_size,
                                             "MemMapTest_AlignByTest_map2",
                                             PROT_READ | PROT_WRITE,
-                                            MAP_PRIVATE,
                                             &error_msg));
   uint8_t* base2 = m2->Begin();
   ASSERT_TRUE(base2 != nullptr) << error_msg;
@@ -477,7 +474,6 @@ TEST_F(MemMapTest, AlignBy) {
   std::unique_ptr<MemMap> m3(m2->RemapAtEnd(base2 + 3 * page_size,
                                             "MemMapTest_AlignByTest_map1",
                                             PROT_READ | PROT_WRITE,
-                                            MAP_PRIVATE,
                                             &error_msg));
   uint8_t* base3 = m3->Begin();
   ASSERT_TRUE(base3 != nullptr) << error_msg;
diff --git a/runtime/mirror/object-inl.h b/runtime/mirror/object-inl.h
index 4f8952d8c2..6eb200d0d3 100644
--- a/runtime/mirror/object-inl.h
+++ b/runtime/mirror/object-inl.h
@@ -716,11 +716,10 @@ inline T* Object::GetFieldObject(MemberOffset field_offset) {
   }
   uint8_t* raw_addr = reinterpret_cast<uint8_t*>(this) + field_offset.Int32Value();
   HeapReference<T>* objref_addr = reinterpret_cast<HeapReference<T>*>(raw_addr);
-  T* result = ReadBarrier::Barrier<T, kReadBarrierOption>(this, field_offset, objref_addr);
-  if (kIsVolatile) {
-    // TODO: Refactor to use a SequentiallyConsistent load instead.
-    QuasiAtomic::ThreadFenceAcquire();  // Ensure visibility of operations preceding store.
-  }
+  T* result = ReadBarrier::Barrier<T, kIsVolatile, kReadBarrierOption>(
+      this,
+      field_offset,
+      objref_addr);
   if (kVerifyFlags & kVerifyReads) {
     VerifyObject(result);
   }
@@ -756,15 +755,7 @@ inline void Object::SetFieldObjectWithoutWriteBarrier(MemberOffset field_offset,
   }
   uint8_t* raw_addr = reinterpret_cast<uint8_t*>(this) + field_offset.Int32Value();
   HeapReference<Object>* objref_addr = reinterpret_cast<HeapReference<Object>*>(raw_addr);
-  if (kIsVolatile) {
-    // TODO: Refactor to use a SequentiallyConsistent store instead.
-    QuasiAtomic::ThreadFenceRelease();  // Ensure that prior accesses are visible before store.
-    objref_addr->Assign(new_value.Ptr());
-    QuasiAtomic::ThreadFenceSequentiallyConsistent();
-                                // Ensure this store occurs before any volatile loads.
-  } else {
-    objref_addr->Assign(new_value.Ptr());
-  }
+  objref_addr->Assign<kIsVolatile>(new_value.Ptr());
 }
 
 template<bool kTransactionActive, bool kCheckTransaction, VerifyObjectFlags kVerifyFlags,
@@ -835,13 +826,12 @@ inline bool Object::CasFieldWeakSequentiallyConsistentObjectWithoutWriteBarrier(
   if (kTransactionActive) {
     Runtime::Current()->RecordWriteFieldReference(this, field_offset, old_value, true);
   }
-  HeapReference<Object> old_ref(HeapReference<Object>::FromObjPtr(old_value));
-  HeapReference<Object> new_ref(HeapReference<Object>::FromObjPtr(new_value));
+  uint32_t old_ref(PtrCompression<kPoisonHeapReferences, Object>::Compress(old_value));
+  uint32_t new_ref(PtrCompression<kPoisonHeapReferences, Object>::Compress(new_value));
   uint8_t* raw_addr = reinterpret_cast<uint8_t*>(this) + field_offset.Int32Value();
   Atomic<uint32_t>* atomic_addr = reinterpret_cast<Atomic<uint32_t>*>(raw_addr);
 
-  bool success = atomic_addr->CompareExchangeWeakSequentiallyConsistent(old_ref.reference_,
-                                                                        new_ref.reference_);
+  bool success = atomic_addr->CompareExchangeWeakSequentiallyConsistent(old_ref, new_ref);
   return success;
 }
 
@@ -877,13 +867,12 @@ inline bool Object::CasFieldStrongSequentiallyConsistentObjectWithoutWriteBarrie
   if (kTransactionActive) {
     Runtime::Current()->RecordWriteFieldReference(this, field_offset, old_value, true);
   }
-  HeapReference<Object> old_ref(HeapReference<Object>::FromObjPtr(old_value));
-  HeapReference<Object> new_ref(HeapReference<Object>::FromObjPtr(new_value));
+  uint32_t old_ref(PtrCompression<kPoisonHeapReferences, Object>::Compress(old_value));
+  uint32_t new_ref(PtrCompression<kPoisonHeapReferences, Object>::Compress(new_value));
   uint8_t* raw_addr = reinterpret_cast<uint8_t*>(this) + field_offset.Int32Value();
   Atomic<uint32_t>* atomic_addr = reinterpret_cast<Atomic<uint32_t>*>(raw_addr);
 
-  bool success = atomic_addr->CompareExchangeStrongSequentiallyConsistent(old_ref.reference_,
-                                                                          new_ref.reference_);
+  bool success = atomic_addr->CompareExchangeStrongSequentiallyConsistent(old_ref, new_ref);
   return success;
 }
 
@@ -907,13 +896,12 @@ inline bool Object::CasFieldWeakRelaxedObjectWithoutWriteBarrier(
   if (kTransactionActive) {
     Runtime::Current()->RecordWriteFieldReference(this, field_offset, old_value, true);
   }
-  HeapReference<Object> old_ref(HeapReference<Object>::FromObjPtr(old_value));
-  HeapReference<Object> new_ref(HeapReference<Object>::FromObjPtr(new_value));
+  uint32_t old_ref(PtrCompression<kPoisonHeapReferences, Object>::Compress(old_value));
+  uint32_t new_ref(PtrCompression<kPoisonHeapReferences, Object>::Compress(new_value));
   uint8_t* raw_addr = reinterpret_cast<uint8_t*>(this) + field_offset.Int32Value();
   Atomic<uint32_t>* atomic_addr = reinterpret_cast<Atomic<uint32_t>*>(raw_addr);
 
-  bool success = atomic_addr->CompareExchangeWeakRelaxed(old_ref.reference_,
-                                                         new_ref.reference_);
+  bool success = atomic_addr->CompareExchangeWeakRelaxed(old_ref, new_ref);
   return success;
 }
 
@@ -937,13 +925,12 @@ inline bool Object::CasFieldWeakReleaseObjectWithoutWriteBarrier(
   if (kTransactionActive) {
     Runtime::Current()->RecordWriteFieldReference(this, field_offset, old_value, true);
   }
-  HeapReference<Object> old_ref(HeapReference<Object>::FromObjPtr(old_value));
-  HeapReference<Object> new_ref(HeapReference<Object>::FromObjPtr(new_value));
+  uint32_t old_ref(PtrCompression<kPoisonHeapReferences, Object>::Compress(old_value));
+  uint32_t new_ref(PtrCompression<kPoisonHeapReferences, Object>::Compress(new_value));
   uint8_t* raw_addr = reinterpret_cast<uint8_t*>(this) + field_offset.Int32Value();
   Atomic<uint32_t>* atomic_addr = reinterpret_cast<Atomic<uint32_t>*>(raw_addr);
 
-  bool success = atomic_addr->CompareExchangeWeakRelease(old_ref.reference_,
-                                                         new_ref.reference_);
+  bool success = atomic_addr->CompareExchangeWeakRelease(old_ref, new_ref);
   return success;
 }
 
diff --git a/runtime/mirror/object-readbarrier-inl.h b/runtime/mirror/object-readbarrier-inl.h
index 69365af7fd..f0769409d4 100644
--- a/runtime/mirror/object-readbarrier-inl.h
+++ b/runtime/mirror/object-readbarrier-inl.h
@@ -211,13 +211,12 @@ inline bool Object::CasFieldStrongRelaxedObjectWithoutWriteBarrier(
   if (kTransactionActive) {
     Runtime::Current()->RecordWriteFieldReference(this, field_offset, old_value, true);
   }
-  HeapReference<Object> old_ref(HeapReference<Object>::FromObjPtr(old_value));
-  HeapReference<Object> new_ref(HeapReference<Object>::FromObjPtr(new_value));
+  uint32_t old_ref(PtrCompression<kPoisonHeapReferences, Object>::Compress(old_value));
+  uint32_t new_ref(PtrCompression<kPoisonHeapReferences, Object>::Compress(new_value));
   uint8_t* raw_addr = reinterpret_cast<uint8_t*>(this) + field_offset.Int32Value();
   Atomic<uint32_t>* atomic_addr = reinterpret_cast<Atomic<uint32_t>*>(raw_addr);
 
-  bool success = atomic_addr->CompareExchangeStrongRelaxed(old_ref.reference_,
-                                                           new_ref.reference_);
+  bool success = atomic_addr->CompareExchangeStrongRelaxed(old_ref, new_ref);
   return success;
 }
 
@@ -241,13 +240,12 @@ inline bool Object::CasFieldStrongReleaseObjectWithoutWriteBarrier(
   if (kTransactionActive) {
     Runtime::Current()->RecordWriteFieldReference(this, field_offset, old_value, true);
   }
-  HeapReference<Object> old_ref(HeapReference<Object>::FromObjPtr(old_value));
-  HeapReference<Object> new_ref(HeapReference<Object>::FromObjPtr(new_value));
+  uint32_t old_ref(PtrCompression<kPoisonHeapReferences, Object>::Compress(old_value));
+  uint32_t new_ref(PtrCompression<kPoisonHeapReferences, Object>::Compress(new_value));
   uint8_t* raw_addr = reinterpret_cast<uint8_t*>(this) + field_offset.Int32Value();
   Atomic<uint32_t>* atomic_addr = reinterpret_cast<Atomic<uint32_t>*>(raw_addr);
 
-  bool success = atomic_addr->CompareExchangeStrongRelease(old_ref.reference_,
-                                                           new_ref.reference_);
+  bool success = atomic_addr->CompareExchangeStrongRelease(old_ref, new_ref);
   return success;
 }
 
diff --git a/runtime/mirror/object_reference-inl.h b/runtime/mirror/object_reference-inl.h
index 22fb83cb5c..60f3ce153f 100644
--- a/runtime/mirror/object_reference-inl.h
+++ b/runtime/mirror/object_reference-inl.h
@@ -30,17 +30,10 @@ void ObjectReference<kPoisonReferences, MirrorType>::Assign(ObjPtr<MirrorType> p
 }
 
 template<class MirrorType>
-HeapReference<MirrorType> HeapReference<MirrorType>::FromObjPtr(ObjPtr<MirrorType> ptr) {
-  return HeapReference<MirrorType>(ptr.Ptr());
-}
-
-template<class MirrorType>
 bool HeapReference<MirrorType>::CasWeakRelaxed(MirrorType* expected_ptr, MirrorType* new_ptr) {
-  HeapReference<Object> expected_ref(HeapReference<Object>::FromMirrorPtr(expected_ptr));
-  HeapReference<Object> new_ref(HeapReference<Object>::FromMirrorPtr(new_ptr));
-  Atomic<uint32_t>* atomic_reference = reinterpret_cast<Atomic<uint32_t>*>(&this->reference_);
-  return atomic_reference->CompareExchangeWeakRelaxed(expected_ref.reference_,
-                                                      new_ref.reference_);
+  return reference_.CompareExchangeWeakRelaxed(
+      Compression::Compress(expected_ptr),
+      Compression::Compress(new_ptr));
 }
 
 }  // namespace mirror
diff --git a/runtime/mirror/object_reference.h b/runtime/mirror/object_reference.h
index a96a120d68..c62ee6cb61 100644
--- a/runtime/mirror/object_reference.h
+++ b/runtime/mirror/object_reference.h
@@ -17,6 +17,7 @@
 #ifndef ART_RUNTIME_MIRROR_OBJECT_REFERENCE_H_
 #define ART_RUNTIME_MIRROR_OBJECT_REFERENCE_H_
 
+#include "atomic.h"
 #include "base/mutex.h"  // For Locks::mutator_lock_.
 #include "globals.h"
 #include "obj_ptr.h"
@@ -30,20 +31,43 @@ class Object;
 // extra platform specific padding.
 #define MANAGED PACKED(4)
 
+template<bool kPoisonReferences, class MirrorType>
+class PtrCompression {
+ public:
+  // Compress reference to its bit representation.
+  static uint32_t Compress(MirrorType* mirror_ptr) {
+    uintptr_t as_bits = reinterpret_cast<uintptr_t>(mirror_ptr);
+    return static_cast<uint32_t>(kPoisonReferences ? -as_bits : as_bits);
+  }
+
+  // Uncompress an encoded reference from its bit representation.
+  static MirrorType* Decompress(uint32_t ref) {
+    uintptr_t as_bits = kPoisonReferences ? -ref : ref;
+    return reinterpret_cast<MirrorType*>(as_bits);
+  }
+
+  // Convert an ObjPtr to a compressed reference.
+  static uint32_t Compress(ObjPtr<MirrorType> ptr) REQUIRES_SHARED(Locks::mutator_lock_) {
+    return Compress(ptr.Ptr());
+  }
+};
+
 // Value type representing a reference to a mirror::Object of type MirrorType.
 template<bool kPoisonReferences, class MirrorType>
 class MANAGED ObjectReference {
+ private:
+  using Compression = PtrCompression<kPoisonReferences, MirrorType>;
+
  public:
-  MirrorType* AsMirrorPtr() const REQUIRES_SHARED(Locks::mutator_lock_) {
-    return UnCompress();
+  MirrorType* AsMirrorPtr() const {
+    return Compression::Decompress(reference_);
   }
 
-  void Assign(MirrorType* other) REQUIRES_SHARED(Locks::mutator_lock_) {
-    reference_ = Compress(other);
+  void Assign(MirrorType* other) {
+    reference_ = Compression::Compress(other);
   }
 
-  void Assign(ObjPtr<MirrorType> ptr)
-      REQUIRES_SHARED(Locks::mutator_lock_);
+  void Assign(ObjPtr<MirrorType> ptr) REQUIRES_SHARED(Locks::mutator_lock_);
 
   void Clear() {
     reference_ = 0;
@@ -58,48 +82,71 @@ class MANAGED ObjectReference {
     return reference_;
   }
 
- protected:
-  explicit ObjectReference(MirrorType* mirror_ptr)
-      REQUIRES_SHARED(Locks::mutator_lock_)
-      : reference_(Compress(mirror_ptr)) {
-  }
-
-  // Compress reference to its bit representation.
-  static uint32_t Compress(MirrorType* mirror_ptr) REQUIRES_SHARED(Locks::mutator_lock_) {
-    uintptr_t as_bits = reinterpret_cast<uintptr_t>(mirror_ptr);
-    return static_cast<uint32_t>(kPoisonReferences ? -as_bits : as_bits);
+  static ObjectReference<kPoisonReferences, MirrorType> FromMirrorPtr(MirrorType* mirror_ptr)
+      REQUIRES_SHARED(Locks::mutator_lock_) {
+    return ObjectReference<kPoisonReferences, MirrorType>(mirror_ptr);
   }
 
-  // Uncompress an encoded reference from its bit representation.
-  MirrorType* UnCompress() const REQUIRES_SHARED(Locks::mutator_lock_) {
-    uintptr_t as_bits = kPoisonReferences ? -reference_ : reference_;
-    return reinterpret_cast<MirrorType*>(as_bits);
+ protected:
+  explicit ObjectReference(MirrorType* mirror_ptr) REQUIRES_SHARED(Locks::mutator_lock_)
+      : reference_(Compression::Compress(mirror_ptr)) {
   }
 
-  friend class Object;
-
   // The encoded reference to a mirror::Object.
   uint32_t reference_;
 };
 
 // References between objects within the managed heap.
+// Similar API to ObjectReference, but not a value type. Supports atomic access.
 template<class MirrorType>
-class MANAGED HeapReference : public ObjectReference<kPoisonHeapReferences, MirrorType> {
+class MANAGED HeapReference {
+ private:
+  using Compression = PtrCompression<kPoisonHeapReferences, MirrorType>;
+
  public:
+  HeapReference() REQUIRES_SHARED(Locks::mutator_lock_) : HeapReference(nullptr) {}
+
+  template <bool kIsVolatile = false>
+  MirrorType* AsMirrorPtr() const REQUIRES_SHARED(Locks::mutator_lock_) {
+    return Compression::Decompress(
+        kIsVolatile ? reference_.LoadSequentiallyConsistent() : reference_.LoadJavaData());
+  }
+
+  template <bool kIsVolatile = false>
+  void Assign(MirrorType* other) REQUIRES_SHARED(Locks::mutator_lock_) {
+    if (kIsVolatile) {
+      reference_.StoreSequentiallyConsistent(Compression::Compress(other));
+    } else {
+      reference_.StoreJavaData(Compression::Compress(other));
+    }
+  }
+
+  template <bool kIsVolatile = false>
+  void Assign(ObjPtr<MirrorType> ptr) REQUIRES_SHARED(Locks::mutator_lock_);
+
+  void Clear() {
+    reference_.StoreJavaData(0);
+    DCHECK(IsNull());
+  }
+
+  bool IsNull() const {
+    return reference_.LoadJavaData() == 0;
+  }
+
   static HeapReference<MirrorType> FromMirrorPtr(MirrorType* mirror_ptr)
       REQUIRES_SHARED(Locks::mutator_lock_) {
     return HeapReference<MirrorType>(mirror_ptr);
   }
 
-  static HeapReference<MirrorType> FromObjPtr(ObjPtr<MirrorType> ptr)
-      REQUIRES_SHARED(Locks::mutator_lock_);
-
   bool CasWeakRelaxed(MirrorType* old_ptr, MirrorType* new_ptr)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
  private:
   explicit HeapReference(MirrorType* mirror_ptr) REQUIRES_SHARED(Locks::mutator_lock_)
-      : ObjectReference<kPoisonHeapReferences, MirrorType>(mirror_ptr) {}
+      : reference_(Compression::Compress(mirror_ptr)) {}
+
+  // The encoded reference to a mirror::Object. Atomically updateable.
+  Atomic<uint32_t> reference_;
 };
 
 static_assert(sizeof(mirror::HeapReference<mirror::Object>) == kHeapReferenceSize,
diff --git a/runtime/native/sun_misc_Unsafe.cc b/runtime/native/sun_misc_Unsafe.cc
index 761362fcb2..b2bdeed5cb 100644
--- a/runtime/native/sun_misc_Unsafe.cc
+++ b/runtime/native/sun_misc_Unsafe.cc
@@ -67,10 +67,12 @@ static jboolean Unsafe_compareAndSwapObject(JNIEnv* env, jobject, jobject javaOb
   if (kUseReadBarrier) {
     // Need to make sure the reference stored in the field is a to-space one before attempting the
     // CAS or the CAS could fail incorrectly.
+    // Note that the read barrier load does NOT need to be volatile.
     mirror::HeapReference<mirror::Object>* field_addr =
         reinterpret_cast<mirror::HeapReference<mirror::Object>*>(
             reinterpret_cast<uint8_t*>(obj.Ptr()) + static_cast<size_t>(offset));
-    ReadBarrier::Barrier<mirror::Object, kWithReadBarrier, /* kAlwaysUpdateField */ true>(
+    ReadBarrier::Barrier<mirror::Object, /* kIsVolatile */ false, kWithReadBarrier,
+        /* kAlwaysUpdateField */ true>(
         obj.Ptr(),
         MemberOffset(offset),
         field_addr);
@@ -112,6 +114,7 @@ static void Unsafe_putOrderedInt(JNIEnv* env, jobject, jobject javaObj, jlong of
                                  jint newValue) {
   ScopedFastNativeObjectAccess soa(env);
   ObjPtr<mirror::Object> obj = soa.Decode<mirror::Object>(javaObj);
+  // TODO: A release store is likely to be faster on future processors.
   QuasiAtomic::ThreadFenceRelease();
   // JNI must use non transactional mode.
   obj->SetField32<false>(MemberOffset(offset), newValue);
diff --git a/runtime/read_barrier-inl.h b/runtime/read_barrier-inl.h
index b0935c0b56..642459924e 100644
--- a/runtime/read_barrier-inl.h
+++ b/runtime/read_barrier-inl.h
@@ -33,7 +33,8 @@ namespace art {
 // Disabled for performance reasons.
 static constexpr bool kCheckDebugDisallowReadBarrierCount = false;
 
-template <typename MirrorType, ReadBarrierOption kReadBarrierOption, bool kAlwaysUpdateField>
+template <typename MirrorType, bool kIsVolatile, ReadBarrierOption kReadBarrierOption,
+          bool kAlwaysUpdateField>
 inline MirrorType* ReadBarrier::Barrier(
     mirror::Object* obj, MemberOffset offset, mirror::HeapReference<MirrorType>* ref_addr) {
   constexpr bool with_read_barrier = kReadBarrierOption == kWithReadBarrier;
@@ -55,7 +56,7 @@ inline MirrorType* ReadBarrier::Barrier(
       }
       ref_addr = reinterpret_cast<mirror::HeapReference<MirrorType>*>(
           fake_address_dependency | reinterpret_cast<uintptr_t>(ref_addr));
-      MirrorType* ref = ref_addr->AsMirrorPtr();
+      MirrorType* ref = ref_addr->template AsMirrorPtr<kIsVolatile>();
       MirrorType* old_ref = ref;
       if (is_gray) {
         // Slow-path.
@@ -71,9 +72,9 @@ inline MirrorType* ReadBarrier::Barrier(
       return ref;
     } else if (kUseBrooksReadBarrier) {
       // To be implemented.
-      return ref_addr->AsMirrorPtr();
+      return ref_addr->template AsMirrorPtr<kIsVolatile>();
     } else if (kUseTableLookupReadBarrier) {
-      MirrorType* ref = ref_addr->AsMirrorPtr();
+      MirrorType* ref = ref_addr->template AsMirrorPtr<kIsVolatile>();
       MirrorType* old_ref = ref;
       // The heap or the collector can be null at startup. TODO: avoid the need for this null check.
       gc::Heap* heap = Runtime::Current()->GetHeap();
@@ -93,7 +94,7 @@ inline MirrorType* ReadBarrier::Barrier(
     }
   } else {
     // No read barrier.
-    return ref_addr->AsMirrorPtr();
+    return ref_addr->template AsMirrorPtr<kIsVolatile>();
   }
 }
 
diff --git a/runtime/read_barrier.h b/runtime/read_barrier.h
index d36acbcdc4..8a106aabb0 100644
--- a/runtime/read_barrier.h
+++ b/runtime/read_barrier.h
@@ -46,9 +46,13 @@ class ReadBarrier {
   // fast-debug environment.
   DECLARE_RUNTIME_DEBUG_FLAG(kEnableReadBarrierInvariantChecks);
 
+  // Return the reference at ref_addr, invoking read barrier as appropriate.
+  // Ref_addr is an address within obj.
   // It's up to the implementation whether the given field gets updated whereas the return value
   // must be an updated reference unless kAlwaysUpdateField is true.
-  template <typename MirrorType, ReadBarrierOption kReadBarrierOption = kWithReadBarrier,
+  template <typename MirrorType,
+            bool kIsVolatile,
+            ReadBarrierOption kReadBarrierOption = kWithReadBarrier,
             bool kAlwaysUpdateField = false>
   ALWAYS_INLINE static MirrorType* Barrier(
       mirror::Object* obj, MemberOffset offset, mirror::HeapReference<MirrorType>* ref_addr)