Merge "Replace invoke kind kDexCacheViaMethod with kRuntimeCall."
diff --git a/build/art.go b/build/art.go
index b33b565..f52c635 100644
--- a/build/art.go
+++ b/build/art.go
@@ -87,7 +87,7 @@
 				"-DART_STACK_OVERFLOW_GAP_arm64=8192",
 				"-DART_STACK_OVERFLOW_GAP_mips=16384",
 				"-DART_STACK_OVERFLOW_GAP_mips64=16384",
-				"-DART_STACK_OVERFLOW_GAP_x86=12288",
+				"-DART_STACK_OVERFLOW_GAP_x86=16384",
 				"-DART_STACK_OVERFLOW_GAP_x86_64=20480")
 	} else {
 		cflags = append(cflags,
@@ -170,12 +170,23 @@
 		}
 		Cflags  []string
 		Asflags []string
+		Sanitize struct {
+		  Recover []string
+		}
 	}
 
 	p := &props{}
 	p.Cflags, p.Asflags = globalFlags(ctx)
 	p.Target.Android.Cflags = deviceFlags(ctx)
 	p.Target.Host.Cflags = hostFlags(ctx)
+
+	if envTrue(ctx, "ART_DEX_FILE_ACCESS_TRACKING") {
+		p.Cflags = append(p.Cflags, "-DART_DEX_FILE_ACCESS_TRACKING")
+		p.Sanitize.Recover = []string {
+			"address",
+		}
+	}
+
 	ctx.AppendProperties(p)
 }
 
diff --git a/compiler/common_compiler_test.cc b/compiler/common_compiler_test.cc
index 39edd1e..a1ee68f 100644
--- a/compiler/common_compiler_test.cc
+++ b/compiler/common_compiler_test.cc
@@ -33,7 +33,7 @@
 #include "mirror/object-inl.h"
 #include "oat_quick_method_header.h"
 #include "scoped_thread_state_change-inl.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 #include "utils.h"
 
 namespace art {
diff --git a/compiler/dex/dex_to_dex_compiler.cc b/compiler/dex/dex_to_dex_compiler.cc
index 1573062..2db99cd 100644
--- a/compiler/dex/dex_to_dex_compiler.cc
+++ b/compiler/dex/dex_to_dex_compiler.cc
@@ -28,7 +28,7 @@
 #include "driver/compiler_driver.h"
 #include "driver/dex_compilation_unit.h"
 #include "mirror/dex_cache.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 
 namespace art {
 namespace optimizer {
diff --git a/compiler/dex/inline_method_analyser.cc b/compiler/dex/inline_method_analyser.cc
index e691a67..2572291 100644
--- a/compiler/dex/inline_method_analyser.cc
+++ b/compiler/dex/inline_method_analyser.cc
@@ -433,8 +433,11 @@
     // Native or abstract.
     return false;
   }
-  return AnalyseMethodCode(
-      code_item, method->ToMethodReference(), method->IsStatic(), method, result);
+  return AnalyseMethodCode(code_item,
+                           MethodReference(method->GetDexFile(), method->GetDexMethodIndex()),
+                           method->IsStatic(),
+                           method,
+                           result);
 }
 
 bool InlineMethodAnalyser::AnalyseMethodCode(const DexFile::CodeItem* code_item,
diff --git a/compiler/dex/verification_results.cc b/compiler/dex/verification_results.cc
index 0338cfd..b87cb61 100644
--- a/compiler/dex/verification_results.cc
+++ b/compiler/dex/verification_results.cc
@@ -17,12 +17,13 @@
 #include "verification_results.h"
 
 #include "base/logging.h"
-#include "base/stl_util.h"
 #include "base/mutex-inl.h"
+#include "base/stl_util.h"
 #include "driver/compiler_driver.h"
 #include "driver/compiler_options.h"
+#include "runtime.h"
 #include "thread.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 #include "utils/atomic_method_ref_map-inl.h"
 #include "verified_method.h"
 #include "verifier/method_verifier-inl.h"
diff --git a/compiler/driver/compiled_method_storage.cc b/compiler/driver/compiled_method_storage.cc
index e6a47ba..528b0a2 100644
--- a/compiler/driver/compiled_method_storage.cc
+++ b/compiler/driver/compiled_method_storage.cc
@@ -21,7 +21,7 @@
 
 #include "base/logging.h"
 #include "compiled_method.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 #include "utils.h"
 #include "utils/dedupe_set-inl.h"
 #include "utils/swap_space.h"
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index fc5f847..69f853a 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -1000,7 +1000,8 @@
   if (profile_compilation_info_ == nullptr) {
     return false;
   }
-  bool result = profile_compilation_info_->ContainsMethod(method_ref);
+  // TODO: Revisit compiling all startup methods. b/36457259
+  bool result = profile_compilation_info_->IsStartupOrHotMethod(method_ref);
 
   if (kDebugProfileGuidedCompilation) {
     LOG(INFO) << "[ProfileGuidedCompilation] "
@@ -2239,7 +2240,7 @@
  public:
   explicit InitializeClassVisitor(const ParallelCompilationManager* manager) : manager_(manager) {}
 
-  void Visit(size_t class_def_index) OVERRIDE {
+  void Visit(size_t class_def_index) REQUIRES(!Locks::mutator_lock_) OVERRIDE {
     ATRACE_CALL();
     jobject jclass_loader = manager_->GetClassLoader();
     const DexFile& dex_file = *manager_->GetDexFile();
@@ -2254,123 +2255,89 @@
     Handle<mirror::Class> klass(
         hs.NewHandle(manager_->GetClassLinker()->FindClass(soa.Self(), descriptor, class_loader)));
 
-    if (klass != nullptr && !SkipClass(manager_->GetClassLoader(), dex_file, klass.Get())) {
-      TryInitializeClass(klass, class_loader);
-    }
-    // Clear any class not found or verification exceptions.
-    soa.Self()->ClearException();
-  }
-
-  // A helper function for initializing klass.
-  void TryInitializeClass(Handle<mirror::Class> klass, Handle<mirror::ClassLoader>& class_loader)
-      REQUIRES_SHARED(Locks::mutator_lock_) {
-    const DexFile& dex_file = klass->GetDexFile();
-    const DexFile::ClassDef* class_def = klass->GetClassDef();
-    const DexFile::TypeId& class_type_id = dex_file.GetTypeId(class_def->class_idx_);
-    const char* descriptor = dex_file.StringDataByIdx(class_type_id.descriptor_idx_);
-    ScopedObjectAccessUnchecked soa(Thread::Current());
-    StackHandleScope<3> hs(soa.Self());
-
-    mirror::Class::Status old_status = klass->GetStatus();;
-    // Only try to initialize classes that were successfully verified.
-    if (klass->IsVerified()) {
-      // Attempt to initialize the class but bail if we either need to initialize the super-class
-      // or static fields.
-      manager_->GetClassLinker()->EnsureInitialized(soa.Self(), klass, false, false);
-      old_status = klass->GetStatus();
-      if (!klass->IsInitialized()) {
-        // We don't want non-trivial class initialization occurring on multiple threads due to
-        // deadlock problems. For example, a parent class is initialized (holding its lock) that
-        // refers to a sub-class in its static/class initializer causing it to try to acquire the
-        // sub-class' lock. While on a second thread the sub-class is initialized (holding its lock)
-        // after first initializing its parents, whose locks are acquired. This leads to a
-        // parent-to-child and a child-to-parent lock ordering and consequent potential deadlock.
-        // We need to use an ObjectLock due to potential suspension in the interpreting code. Rather
-        // than use a special Object for the purpose we use the Class of java.lang.Class.
-        Handle<mirror::Class> h_klass(hs.NewHandle(klass->GetClass()));
-        ObjectLock<mirror::Class> lock(soa.Self(), h_klass);
-        // Attempt to initialize allowing initialization of parent classes but still not static
-        // fields.
-        bool is_superclass_initialized = InitializeDependencies(klass, class_loader, soa.Self());
-        if (is_superclass_initialized) {
+    if (klass != nullptr && !SkipClass(jclass_loader, dex_file, klass.Get())) {
+      // Only try to initialize classes that were successfully verified.
+      if (klass->IsVerified()) {
+        // Attempt to initialize the class but bail if we either need to initialize the super-class
+        // or static fields.
+        manager_->GetClassLinker()->EnsureInitialized(soa.Self(), klass, false, false);
+        if (!klass->IsInitialized()) {
+          // We don't want non-trivial class initialization occurring on multiple threads due to
+          // deadlock problems. For example, a parent class is initialized (holding its lock) that
+          // refers to a sub-class in its static/class initializer causing it to try to acquire the
+          // sub-class' lock. While on a second thread the sub-class is initialized (holding its lock)
+          // after first initializing its parents, whose locks are acquired. This leads to a
+          // parent-to-child and a child-to-parent lock ordering and consequent potential deadlock.
+          // We need to use an ObjectLock due to potential suspension in the interpreting code. Rather
+          // than use a special Object for the purpose we use the Class of java.lang.Class.
+          Handle<mirror::Class> h_klass(hs.NewHandle(klass->GetClass()));
+          ObjectLock<mirror::Class> lock(soa.Self(), h_klass);
+          // Attempt to initialize allowing initialization of parent classes but still not static
+          // fields.
           manager_->GetClassLinker()->EnsureInitialized(soa.Self(), klass, false, true);
-        }
-        old_status = klass->GetStatus();
-        // If superclass cannot be initialized, no need to proceed.
-        if (!klass->IsInitialized() &&
-            is_superclass_initialized &&
-            manager_->GetCompiler()->IsImageClass(descriptor)) {
-          bool can_init_static_fields = false;
-          if (manager_->GetCompiler()->GetCompilerOptions().IsBootImage()) {
+          if (!klass->IsInitialized()) {
             // We need to initialize static fields, we only do this for image classes that aren't
             // marked with the $NoPreloadHolder (which implies this should not be initialized early).
-            can_init_static_fields = !StringPiece(descriptor).ends_with("$NoPreloadHolder;");
-          } else {
-            can_init_static_fields = manager_->GetCompiler()->GetCompilerOptions().IsAppImage() &&
-                !soa.Self()->IsExceptionPending() &&
-                NoClinitInDependency(klass, soa.Self(), &class_loader);
-            // TODO The checking for clinit can be removed since it's already
-            // checked when init superclass. Currently keep it because it contains
-            // processing of intern strings. Will be removed later when intern strings
-            // and clinit are both initialized.
-          }
+            bool can_init_static_fields =
+                manager_->GetCompiler()->GetCompilerOptions().IsBootImage() &&
+                manager_->GetCompiler()->IsImageClass(descriptor) &&
+                !StringPiece(descriptor).ends_with("$NoPreloadHolder;");
+            if (can_init_static_fields) {
+              VLOG(compiler) << "Initializing: " << descriptor;
+              // TODO multithreading support. We should ensure the current compilation thread has
+              // exclusive access to the runtime and the transaction. To achieve this, we could use
+              // a ReaderWriterMutex but we're holding the mutator lock so we fail mutex sanity
+              // checks in Thread::AssertThreadSuspensionIsAllowable.
+              Runtime* const runtime = Runtime::Current();
+              Transaction transaction;
 
-          if (can_init_static_fields) {
-            VLOG(compiler) << "Initializing: " << descriptor;
-            // TODO multithreading support. We should ensure the current compilation thread has
-            // exclusive access to the runtime and the transaction. To achieve this, we could use
-            // a ReaderWriterMutex but we're holding the mutator lock so we fail mutex sanity
-            // checks in Thread::AssertThreadSuspensionIsAllowable.
-            Runtime* const runtime = Runtime::Current();
-            Transaction transaction;
+              // Run the class initializer in transaction mode.
+              runtime->EnterTransactionMode(&transaction);
+              const mirror::Class::Status old_status = klass->GetStatus();
+              bool success = manager_->GetClassLinker()->EnsureInitialized(soa.Self(), klass, true,
+                                                                           true);
+              // TODO we detach transaction from runtime to indicate we quit the transactional
+              // mode which prevents the GC from visiting objects modified during the transaction.
+              // Ensure GC is not run so don't access freed objects when aborting transaction.
 
-            // Run the class initializer in transaction mode.
-            runtime->EnterTransactionMode(&transaction);
-            bool success = manager_->GetClassLinker()->EnsureInitialized(soa.Self(), klass, true,
-                                                                         true);
-            // TODO we detach transaction from runtime to indicate we quit the transactional
-            // mode which prevents the GC from visiting objects modified during the transaction.
-            // Ensure GC is not run so don't access freed objects when aborting transaction.
+              {
+                ScopedAssertNoThreadSuspension ants("Transaction end");
+                runtime->ExitTransactionMode();
 
-            {
-              ScopedAssertNoThreadSuspension ants("Transaction end");
-              runtime->ExitTransactionMode();
+                if (!success) {
+                  CHECK(soa.Self()->IsExceptionPending());
+                  mirror::Throwable* exception = soa.Self()->GetException();
+                  VLOG(compiler) << "Initialization of " << descriptor << " aborted because of "
+                      << exception->Dump();
+                  std::ostream* file_log = manager_->GetCompiler()->
+                      GetCompilerOptions().GetInitFailureOutput();
+                  if (file_log != nullptr) {
+                    *file_log << descriptor << "\n";
+                    *file_log << exception->Dump() << "\n";
+                  }
+                  soa.Self()->ClearException();
+                  transaction.Rollback();
+                  CHECK_EQ(old_status, klass->GetStatus()) << "Previous class status not restored";
+                }
+              }
 
               if (!success) {
-                CHECK(soa.Self()->IsExceptionPending());
-                mirror::Throwable* exception = soa.Self()->GetException();
-                VLOG(compiler) << "Initialization of " << descriptor << " aborted because of "
-                               << exception->Dump();
-                std::ostream* file_log = manager_->GetCompiler()->
-                    GetCompilerOptions().GetInitFailureOutput();
-                if (file_log != nullptr) {
-                  *file_log << descriptor << "\n";
-                  *file_log << exception->Dump() << "\n";
-                }
-                soa.Self()->ClearException();
-                transaction.Rollback();
-                CHECK_EQ(old_status, klass->GetStatus()) << "Previous class status not restored";
-              }
-            }
-
-            if (!success) {
-              // On failure, still intern strings of static fields and seen in <clinit>, as these
-              // will be created in the zygote. This is separated from the transaction code just
-              // above as we will allocate strings, so must be allowed to suspend.
-              if (&klass->GetDexFile() == manager_->GetDexFile()) {
+                // On failure, still intern strings of static fields and seen in <clinit>, as these
+                // will be created in the zygote. This is separated from the transaction code just
+                // above as we will allocate strings, so must be allowed to suspend.
                 InternStrings(klass, class_loader);
               }
             }
           }
+          soa.Self()->AssertNoPendingException();
         }
-        soa.Self()->AssertNoPendingException();
       }
+      // Record the final class status if necessary.
+      ClassReference ref(manager_->GetDexFile(), class_def_index);
+      manager_->GetCompiler()->RecordClassStatus(ref, klass->GetStatus());
     }
-    // Record the final class status if necessary.
-    ClassReference ref(&dex_file, klass->GetDexClassDefIndex());
-    // Back up the status before doing initialization for static encoded fields,
-    // because the static encoded branch wants to keep the status to uninitialized.
-    manager_->GetCompiler()->RecordClassStatus(ref, old_status);
+    // Clear any class not found or verification exceptions.
+    soa.Self()->ClearException();
   }
 
  private:
@@ -2425,136 +2392,6 @@
     }
   }
 
-  bool ResolveTypesOfMethods(Thread* self, ArtMethod* m)
-      REQUIRES_SHARED(Locks::mutator_lock_) {
-      auto rtn_type = m->GetReturnType(true);
-      if (rtn_type == nullptr) {
-        self->ClearException();
-        return false;
-      }
-      const DexFile::TypeList* types = m->GetParameterTypeList();
-      if (types != nullptr) {
-        for (uint32_t i = 0; i < types->Size(); ++i) {
-          dex::TypeIndex param_type_idx = types->GetTypeItem(i).type_idx_;
-          auto param_type = m->GetClassFromTypeIndex(param_type_idx, true);
-          if (param_type == nullptr) {
-            self->ClearException();
-            return false;
-          }
-        }
-      }
-      return true;
-  }
-
-  // Pre resolve types mentioned in all method signatures before start a transaction
-  // since ResolveType doesn't work in transaction mode.
-  bool PreResolveTypes(Thread* self, const Handle<mirror::Class>& klass)
-      REQUIRES_SHARED(Locks::mutator_lock_) {
-      PointerSize pointer_size = manager_->GetClassLinker()->GetImagePointerSize();
-      for (ArtMethod& m : klass->GetMethods(pointer_size)) {
-        if (!ResolveTypesOfMethods(self, &m)) {
-          return false;
-        }
-      }
-      if (klass->IsInterface()) {
-        return true;
-      } else if (klass->HasSuperClass()) {
-        StackHandleScope<1> hs(self);
-        MutableHandle<mirror::Class> super_klass(hs.NewHandle<mirror::Class>(klass->GetSuperClass()));
-        for (int i = super_klass->GetVTableLength() - 1; i >= 0; --i) {
-          ArtMethod* m = klass->GetVTableEntry(i, pointer_size);
-          ArtMethod* super_m = super_klass->GetVTableEntry(i, pointer_size);
-          if (!ResolveTypesOfMethods(self, m) || !ResolveTypesOfMethods(self, super_m)) {
-            return false;
-          }
-        }
-        for (int32_t i = 0; i < klass->GetIfTableCount(); ++i) {
-          super_klass.Assign(klass->GetIfTable()->GetInterface(i));
-          if (klass->GetClassLoader() != super_klass->GetClassLoader()) {
-            uint32_t num_methods = super_klass->NumVirtualMethods();
-            for (uint32_t j = 0; j < num_methods; ++j) {
-              ArtMethod* m = klass->GetIfTable()->GetMethodArray(i)->GetElementPtrSize<ArtMethod*>(
-                  j, pointer_size);
-              ArtMethod* super_m = super_klass->GetVirtualMethod(j, pointer_size);
-              if (!ResolveTypesOfMethods(self, m) || !ResolveTypesOfMethods(self, super_m)) {
-                return false;
-              }
-            }
-          }
-        }
-      }
-      return true;
-  }
-
-  // Initialize the klass's dependencies recursively before initializing itself.
-  // Checking for interfaces is also necessary since interfaces can contain
-  // both default methods and static encoded fields.
-  bool InitializeDependencies(const Handle<mirror::Class>& klass,
-                              Handle<mirror::ClassLoader> class_loader,
-                              Thread* self)
-      REQUIRES_SHARED(Locks::mutator_lock_) {
-    if (klass->HasSuperClass()) {
-      ObjPtr<mirror::Class> super_class = klass->GetSuperClass();
-      StackHandleScope<1> hs(self);
-      Handle<mirror::Class> handle_scope_super(hs.NewHandle(super_class));
-      if (!handle_scope_super->IsInitialized()) {
-        this->TryInitializeClass(handle_scope_super, class_loader);
-        if (!handle_scope_super->IsInitialized()) {
-          return false;
-        }
-      }
-    }
-
-    uint32_t num_if = klass->NumDirectInterfaces();
-    for (size_t i = 0; i < num_if; i++) {
-      ObjPtr<mirror::Class>
-          interface = mirror::Class::GetDirectInterface(self, klass.Get(), i);
-      StackHandleScope<1> hs(self);
-      Handle<mirror::Class> handle_interface(hs.NewHandle(interface));
-
-      TryInitializeClass(handle_interface, class_loader);
-
-      if (!handle_interface->IsInitialized()) {
-        return false;
-      }
-    }
-
-    return PreResolveTypes(self, klass);
-  }
-
-  // In this phase the classes containing class initializers are ignored. Make sure no
-  // clinit appears in kalss's super class chain and interfaces.
-  bool NoClinitInDependency(const Handle<mirror::Class>& klass,
-                            Thread* self,
-                            Handle<mirror::ClassLoader>* class_loader)
-      REQUIRES_SHARED(Locks::mutator_lock_) {
-    ArtMethod* clinit =
-        klass->FindClassInitializer(manager_->GetClassLinker()->GetImagePointerSize());
-    if (clinit != nullptr) {
-      VLOG(compiler) << klass->PrettyClass() << ' ' << clinit->PrettyMethod(true);
-      return false;
-    }
-    if (klass->HasSuperClass()) {
-      ObjPtr<mirror::Class> super_class = klass->GetSuperClass();
-      StackHandleScope<1> hs(self);
-      Handle<mirror::Class> handle_scope_super(hs.NewHandle(super_class));
-      if (!NoClinitInDependency(handle_scope_super, self, class_loader))
-        return false;
-    }
-
-    uint32_t num_if = klass->NumDirectInterfaces();
-    for (size_t i = 0; i < num_if; i++) {
-      ObjPtr<mirror::Class>
-          interface = mirror::Class::GetDirectInterface(self, klass.Get(), i);
-      StackHandleScope<1> hs(self);
-      Handle<mirror::Class> handle_interface(hs.NewHandle(interface));
-      if (!NoClinitInDependency(handle_interface, self, class_loader))
-        return false;
-    }
-
-    return true;
-  }
-
   const ParallelCompilationManager* const manager_;
 };
 
@@ -2574,10 +2411,7 @@
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
   ParallelCompilationManager context(class_linker, jni_class_loader, this, &dex_file, dex_files,
                                      init_thread_pool);
-
-  if (GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsAppImage()) {
-    // Set the concurrency thread to 1 to support initialization for App Images since transaction
-    // doesn't support multithreading now.
+  if (GetCompilerOptions().IsBootImage()) {
     // TODO: remove this when transactional mode supports multithreading.
     init_thread_count = 1U;
   }
diff --git a/compiler/driver/compiler_driver_test.cc b/compiler/driver/compiler_driver_test.cc
index 26ea39f..4b979d8 100644
--- a/compiler/driver/compiler_driver_test.cc
+++ b/compiler/driver/compiler_driver_test.cc
@@ -239,8 +239,14 @@
 
     ProfileCompilationInfo info;
     for (const std::unique_ptr<const DexFile>& dex_file : dex_files) {
-      profile_info_.AddMethodIndex(dex_file->GetLocation(), dex_file->GetLocationChecksum(), 1);
-      profile_info_.AddMethodIndex(dex_file->GetLocation(), dex_file->GetLocationChecksum(), 2);
+      profile_info_.AddMethodIndex(dex_file->GetLocation(),
+                                   dex_file->GetLocationChecksum(),
+                                   1,
+                                   dex_file->NumMethodIds());
+      profile_info_.AddMethodIndex(dex_file->GetLocation(),
+                                   dex_file->GetLocationChecksum(),
+                                   2,
+                                   dex_file->NumMethodIds());
     }
     return &profile_info_;
   }
diff --git a/compiler/elf_writer_quick.cc b/compiler/elf_writer_quick.cc
index 28c35e9..738f5a2 100644
--- a/compiler/elf_writer_quick.cc
+++ b/compiler/elf_writer_quick.cc
@@ -34,7 +34,7 @@
 #include "leb128.h"
 #include "linker/buffered_output_stream.h"
 #include "linker/file_output_stream.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 #include "thread_pool.h"
 #include "utils.h"
 
diff --git a/compiler/image_writer.h b/compiler/image_writer.h
index a12d849..2283b39 100644
--- a/compiler/image_writer.h
+++ b/compiler/image_writer.h
@@ -33,6 +33,7 @@
 #include "base/enums.h"
 #include "base/length_prefixed_array.h"
 #include "base/macros.h"
+#include "class_table.h"
 #include "driver/compiler_driver.h"
 #include "image.h"
 #include "lock_word.h"
@@ -60,7 +61,6 @@
 }  // namespace mirror
 
 class ClassLoaderVisitor;
-class ClassTable;
 class ImtConflictTable;
 
 static constexpr int kInvalidFd = -1;
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index a414f11..1364018 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -58,7 +58,7 @@
 #include "parallel_move_resolver.h"
 #include "ssa_liveness_analysis.h"
 #include "scoped_thread_state_change-inl.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 #include "utils/assembler.h"
 
 namespace art {
@@ -536,7 +536,7 @@
 void CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(HLoadClass* cls,
                                                               Location runtime_type_index_location,
                                                               Location runtime_return_location) {
-  DCHECK_EQ(cls->GetLoadKind(), HLoadClass::LoadKind::kDexCacheViaMethod);
+  DCHECK_EQ(cls->GetLoadKind(), HLoadClass::LoadKind::kRuntimeCall);
   DCHECK_EQ(cls->InputCount(), 1u);
   LocationSummary* locations = new (cls->GetBlock()->GetGraph()->GetArena()) LocationSummary(
       cls, LocationSummary::kCallOnMainOnly);
@@ -546,7 +546,7 @@
 }
 
 void CodeGenerator::GenerateLoadClassRuntimeCall(HLoadClass* cls) {
-  DCHECK_EQ(cls->GetLoadKind(), HLoadClass::LoadKind::kDexCacheViaMethod);
+  DCHECK_EQ(cls->GetLoadKind(), HLoadClass::LoadKind::kRuntimeCall);
   LocationSummary* locations = cls->GetLocations();
   MoveConstant(locations->GetTemp(0), cls->GetTypeIndex().index_);
   if (cls->NeedsAccessCheck()) {
@@ -585,6 +585,9 @@
 }
 
 void CodeGenerator::AllocateLocations(HInstruction* instruction) {
+  for (HEnvironment* env = instruction->GetEnvironment(); env != nullptr; env = env->GetParent()) {
+    env->AllocateLocations();
+  }
   instruction->Accept(GetLocationBuilder());
   DCHECK(CheckTypeConsistency(instruction));
   LocationSummary* locations = instruction->GetLocations();
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 1dcee29..7bf43f7 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -31,6 +31,7 @@
 #include "nodes.h"
 #include "optimizing_compiler_stats.h"
 #include "read_barrier_option.h"
+#include "stack.h"
 #include "stack_map_stream.h"
 #include "string_reference.h"
 #include "type_reference.h"
@@ -543,7 +544,7 @@
       case HLoadString::LoadKind::kBssEntry:
         DCHECK(load->NeedsEnvironment());
         return LocationSummary::kCallOnSlowPath;
-      case HLoadString::LoadKind::kDexCacheViaMethod:
+      case HLoadString::LoadKind::kRuntimeCall:
         DCHECK(load->NeedsEnvironment());
         return LocationSummary::kCallOnMainOnly;
       case HLoadString::LoadKind::kJitTableAddress:
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index b0f0aba..8300f81 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -7134,7 +7134,7 @@
       DCHECK(Runtime::Current()->UseJitCompilation());
       break;
     case HLoadClass::LoadKind::kBootImageAddress:
-    case HLoadClass::LoadKind::kDexCacheViaMethod:
+    case HLoadClass::LoadKind::kRuntimeCall:
       break;
   }
   return desired_class_load_kind;
@@ -7142,7 +7142,7 @@
 
 void LocationsBuilderARM::VisitLoadClass(HLoadClass* cls) {
   HLoadClass::LoadKind load_kind = cls->GetLoadKind();
-  if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) {
+  if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
     InvokeRuntimeCallingConvention calling_convention;
     CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(
         cls,
@@ -7195,7 +7195,7 @@
 // move.
 void InstructionCodeGeneratorARM::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS {
   HLoadClass::LoadKind load_kind = cls->GetLoadKind();
-  if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) {
+  if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
     codegen_->GenerateLoadClassRuntimeCall(cls);
     return;
   }
@@ -7267,7 +7267,7 @@
       GenerateGcRootFieldLoad(cls, out_loc, out, /* offset */ 0, read_barrier_option);
       break;
     }
-    case HLoadClass::LoadKind::kDexCacheViaMethod:
+    case HLoadClass::LoadKind::kRuntimeCall:
     case HLoadClass::LoadKind::kInvalid:
       LOG(FATAL) << "UNREACHABLE";
       UNREACHABLE();
@@ -7329,7 +7329,7 @@
       DCHECK(Runtime::Current()->UseJitCompilation());
       break;
     case HLoadString::LoadKind::kBootImageAddress:
-    case HLoadString::LoadKind::kDexCacheViaMethod:
+    case HLoadString::LoadKind::kRuntimeCall:
       break;
   }
   return desired_string_load_kind;
@@ -7339,7 +7339,7 @@
   LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load);
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind);
   HLoadString::LoadKind load_kind = load->GetLoadKind();
-  if (load_kind == HLoadString::LoadKind::kDexCacheViaMethod) {
+  if (load_kind == HLoadString::LoadKind::kRuntimeCall) {
     locations->SetOut(Location::RegisterLocation(R0));
   } else {
     locations->SetOut(Location::RequiresRegister());
@@ -7426,7 +7426,7 @@
   }
 
   // TODO: Consider re-adding the compiler code to do string dex cache lookup again.
-  DCHECK(load_kind == HLoadString::LoadKind::kDexCacheViaMethod);
+  DCHECK(load_kind == HLoadString::LoadKind::kRuntimeCall);
   InvokeRuntimeCallingConvention calling_convention;
   DCHECK_EQ(calling_convention.GetRegisterAt(0), out);
   __ LoadImmediate(calling_convention.GetRegisterAt(0), load->GetStringIndex().index_);
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index c65245e..a84f8f3 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -4841,7 +4841,7 @@
       DCHECK(Runtime::Current()->UseJitCompilation());
       break;
     case HLoadClass::LoadKind::kBootImageAddress:
-    case HLoadClass::LoadKind::kDexCacheViaMethod:
+    case HLoadClass::LoadKind::kRuntimeCall:
       break;
   }
   return desired_class_load_kind;
@@ -4849,7 +4849,7 @@
 
 void LocationsBuilderARM64::VisitLoadClass(HLoadClass* cls) {
   HLoadClass::LoadKind load_kind = cls->GetLoadKind();
-  if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) {
+  if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
     InvokeRuntimeCallingConvention calling_convention;
     CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(
         cls,
@@ -4894,7 +4894,7 @@
 // move.
 void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS {
   HLoadClass::LoadKind load_kind = cls->GetLoadKind();
-  if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) {
+  if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
     codegen_->GenerateLoadClassRuntimeCall(cls);
     return;
   }
@@ -4976,7 +4976,7 @@
                               read_barrier_option);
       break;
     }
-    case HLoadClass::LoadKind::kDexCacheViaMethod:
+    case HLoadClass::LoadKind::kRuntimeCall:
     case HLoadClass::LoadKind::kInvalid:
       LOG(FATAL) << "UNREACHABLE";
       UNREACHABLE();
@@ -5032,7 +5032,7 @@
       DCHECK(Runtime::Current()->UseJitCompilation());
       break;
     case HLoadString::LoadKind::kBootImageAddress:
-    case HLoadString::LoadKind::kDexCacheViaMethod:
+    case HLoadString::LoadKind::kRuntimeCall:
       break;
   }
   return desired_string_load_kind;
@@ -5041,7 +5041,7 @@
 void LocationsBuilderARM64::VisitLoadString(HLoadString* load) {
   LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load);
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind);
-  if (load->GetLoadKind() == HLoadString::LoadKind::kDexCacheViaMethod) {
+  if (load->GetLoadKind() == HLoadString::LoadKind::kRuntimeCall) {
     InvokeRuntimeCallingConvention calling_convention;
     locations->SetOut(calling_convention.GetReturnLocation(load->GetType()));
   } else {
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index afe9f5c..d5e3723 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -7249,7 +7249,7 @@
       DCHECK(Runtime::Current()->UseJitCompilation());
       break;
     case HLoadClass::LoadKind::kBootImageAddress:
-    case HLoadClass::LoadKind::kDexCacheViaMethod:
+    case HLoadClass::LoadKind::kRuntimeCall:
       break;
   }
   return desired_class_load_kind;
@@ -7257,7 +7257,7 @@
 
 void LocationsBuilderARMVIXL::VisitLoadClass(HLoadClass* cls) {
   HLoadClass::LoadKind load_kind = cls->GetLoadKind();
-  if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) {
+  if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
     InvokeRuntimeCallingConventionARMVIXL calling_convention;
     CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(
         cls,
@@ -7310,7 +7310,7 @@
 // move.
 void InstructionCodeGeneratorARMVIXL::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS {
   HLoadClass::LoadKind load_kind = cls->GetLoadKind();
-  if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) {
+  if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
     codegen_->GenerateLoadClassRuntimeCall(cls);
     return;
   }
@@ -7372,7 +7372,7 @@
       GenerateGcRootFieldLoad(cls, out_loc, out, /* offset */ 0, read_barrier_option);
       break;
     }
-    case HLoadClass::LoadKind::kDexCacheViaMethod:
+    case HLoadClass::LoadKind::kRuntimeCall:
     case HLoadClass::LoadKind::kInvalid:
       LOG(FATAL) << "UNREACHABLE";
       UNREACHABLE();
@@ -7441,7 +7441,7 @@
       DCHECK(Runtime::Current()->UseJitCompilation());
       break;
     case HLoadString::LoadKind::kBootImageAddress:
-    case HLoadString::LoadKind::kDexCacheViaMethod:
+    case HLoadString::LoadKind::kRuntimeCall:
       break;
   }
   return desired_string_load_kind;
@@ -7451,7 +7451,7 @@
   LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load);
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind);
   HLoadString::LoadKind load_kind = load->GetLoadKind();
-  if (load_kind == HLoadString::LoadKind::kDexCacheViaMethod) {
+  if (load_kind == HLoadString::LoadKind::kRuntimeCall) {
     locations->SetOut(LocationFrom(r0));
   } else {
     locations->SetOut(Location::RequiresRegister());
@@ -7529,7 +7529,7 @@
   }
 
   // TODO: Re-add the compiler code to do string dex cache lookup again.
-  DCHECK_EQ(load->GetLoadKind(), HLoadString::LoadKind::kDexCacheViaMethod);
+  DCHECK_EQ(load->GetLoadKind(), HLoadString::LoadKind::kRuntimeCall);
   InvokeRuntimeCallingConventionARMVIXL calling_convention;
   __ Mov(calling_convention.GetRegisterAt(0), load->GetStringIndex().index_);
   codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc());
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index defe67a..8560e3e 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -7016,12 +7016,12 @@
       DCHECK(Runtime::Current()->UseJitCompilation());
       fallback_load = false;
       break;
-    case HLoadString::LoadKind::kDexCacheViaMethod:
+    case HLoadString::LoadKind::kRuntimeCall:
       fallback_load = false;
       break;
   }
   if (fallback_load) {
-    desired_string_load_kind = HLoadString::LoadKind::kDexCacheViaMethod;
+    desired_string_load_kind = HLoadString::LoadKind::kRuntimeCall;
   }
   return desired_string_load_kind;
 }
@@ -7050,12 +7050,12 @@
       DCHECK(Runtime::Current()->UseJitCompilation());
       fallback_load = false;
       break;
-    case HLoadClass::LoadKind::kDexCacheViaMethod:
+    case HLoadClass::LoadKind::kRuntimeCall:
       fallback_load = false;
       break;
   }
   if (fallback_load) {
-    desired_class_load_kind = HLoadClass::LoadKind::kDexCacheViaMethod;
+    desired_class_load_kind = HLoadClass::LoadKind::kRuntimeCall;
   }
   return desired_class_load_kind;
 }
@@ -7256,7 +7256,7 @@
 
 void LocationsBuilderMIPS::VisitLoadClass(HLoadClass* cls) {
   HLoadClass::LoadKind load_kind = cls->GetLoadKind();
-  if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) {
+  if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
     InvokeRuntimeCallingConvention calling_convention;
     Location loc = Location::RegisterLocation(calling_convention.GetRegisterAt(0));
     CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(cls, loc, loc);
@@ -7310,7 +7310,7 @@
 // move.
 void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS {
   HLoadClass::LoadKind load_kind = cls->GetLoadKind();
-  if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) {
+  if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
     codegen_->GenerateLoadClassRuntimeCall(cls);
     return;
   }
@@ -7329,7 +7329,7 @@
       base_or_current_method_reg = isR6 ? ZERO : locations->InAt(0).AsRegister<Register>();
       break;
     case HLoadClass::LoadKind::kReferrersClass:
-    case HLoadClass::LoadKind::kDexCacheViaMethod:
+    case HLoadClass::LoadKind::kRuntimeCall:
       base_or_current_method_reg = locations->InAt(0).AsRegister<Register>();
       break;
     default:
@@ -7407,7 +7407,7 @@
       __ SetReorder(reordering);
       break;
     }
-    case HLoadClass::LoadKind::kDexCacheViaMethod:
+    case HLoadClass::LoadKind::kRuntimeCall:
     case HLoadClass::LoadKind::kInvalid:
       LOG(FATAL) << "UNREACHABLE";
       UNREACHABLE();
@@ -7467,13 +7467,13 @@
       }
       FALLTHROUGH_INTENDED;
     // We need an extra register for PC-relative dex cache accesses.
-    case HLoadString::LoadKind::kDexCacheViaMethod:
+    case HLoadString::LoadKind::kRuntimeCall:
       locations->SetInAt(0, Location::RequiresRegister());
       break;
     default:
       break;
   }
-  if (load_kind == HLoadString::LoadKind::kDexCacheViaMethod) {
+  if (load_kind == HLoadString::LoadKind::kRuntimeCall) {
     InvokeRuntimeCallingConvention calling_convention;
     locations->SetOut(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
   } else {
@@ -7589,7 +7589,7 @@
   }
 
   // TODO: Re-add the compiler code to do string dex cache lookup again.
-  DCHECK(load_kind == HLoadString::LoadKind::kDexCacheViaMethod);
+  DCHECK(load_kind == HLoadString::LoadKind::kRuntimeCall);
   InvokeRuntimeCallingConvention calling_convention;
   DCHECK_EQ(calling_convention.GetRegisterAt(0), out);
   __ LoadConst32(calling_convention.GetRegisterAt(0), load->GetStringIndex().index_);
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 5bb12d7..da43c4e 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -951,7 +951,7 @@
       location_builder_(graph, this),
       instruction_visitor_(graph, this),
       move_resolver_(graph->GetArena(), this),
-      assembler_(graph->GetArena()),
+      assembler_(graph->GetArena(), &isa_features),
       isa_features_(isa_features),
       uint32_literals_(std::less<uint32_t>(),
                        graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
@@ -4873,11 +4873,11 @@
       DCHECK(Runtime::Current()->UseJitCompilation());
       break;
     case HLoadString::LoadKind::kBootImageAddress:
-    case HLoadString::LoadKind::kDexCacheViaMethod:
+    case HLoadString::LoadKind::kRuntimeCall:
       break;
   }
   if (fallback_load) {
-    desired_string_load_kind = HLoadString::LoadKind::kDexCacheViaMethod;
+    desired_string_load_kind = HLoadString::LoadKind::kRuntimeCall;
   }
   return desired_string_load_kind;
 }
@@ -4899,11 +4899,11 @@
       DCHECK(Runtime::Current()->UseJitCompilation());
       break;
     case HLoadClass::LoadKind::kBootImageAddress:
-    case HLoadClass::LoadKind::kDexCacheViaMethod:
+    case HLoadClass::LoadKind::kRuntimeCall:
       break;
   }
   if (fallback_load) {
-    desired_class_load_kind = HLoadClass::LoadKind::kDexCacheViaMethod;
+    desired_class_load_kind = HLoadClass::LoadKind::kRuntimeCall;
   }
   return desired_class_load_kind;
 }
@@ -5047,7 +5047,7 @@
 
 void LocationsBuilderMIPS64::VisitLoadClass(HLoadClass* cls) {
   HLoadClass::LoadKind load_kind = cls->GetLoadKind();
-  if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) {
+  if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
     InvokeRuntimeCallingConvention calling_convention;
     Location loc = Location::RegisterLocation(calling_convention.GetRegisterAt(0));
     CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(cls, loc, loc);
@@ -5084,7 +5084,7 @@
 // move.
 void InstructionCodeGeneratorMIPS64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS {
   HLoadClass::LoadKind load_kind = cls->GetLoadKind();
-  if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) {
+  if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
     codegen_->GenerateLoadClassRuntimeCall(cls);
     return;
   }
@@ -5095,7 +5095,7 @@
   GpuRegister out = out_loc.AsRegister<GpuRegister>();
   GpuRegister current_method_reg = ZERO;
   if (load_kind == HLoadClass::LoadKind::kReferrersClass ||
-      load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) {
+      load_kind == HLoadClass::LoadKind::kRuntimeCall) {
       current_method_reg = locations->InAt(0).AsRegister<GpuRegister>();
   }
 
@@ -5149,7 +5149,7 @@
                                                           cls->GetClass()));
       GenerateGcRootFieldLoad(cls, out_loc, out, 0, read_barrier_option);
       break;
-    case HLoadClass::LoadKind::kDexCacheViaMethod:
+    case HLoadClass::LoadKind::kRuntimeCall:
     case HLoadClass::LoadKind::kInvalid:
       LOG(FATAL) << "UNREACHABLE";
       UNREACHABLE();
@@ -5198,7 +5198,7 @@
   HLoadString::LoadKind load_kind = load->GetLoadKind();
   LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load);
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind);
-  if (load_kind == HLoadString::LoadKind::kDexCacheViaMethod) {
+  if (load_kind == HLoadString::LoadKind::kRuntimeCall) {
     InvokeRuntimeCallingConvention calling_convention;
     locations->SetOut(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
   } else {
@@ -5272,7 +5272,7 @@
   }
 
   // TODO: Re-add the compiler code to do string dex cache lookup again.
-  DCHECK(load_kind == HLoadString::LoadKind::kDexCacheViaMethod);
+  DCHECK(load_kind == HLoadString::LoadKind::kRuntimeCall);
   InvokeRuntimeCallingConvention calling_convention;
   DCHECK_EQ(calling_convention.GetRegisterAt(0), out);
   __ LoadConst32(calling_convention.GetRegisterAt(0), load->GetStringIndex().index_);
diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h
index 662b941..2e8af21 100644
--- a/compiler/optimizing/code_generator_mips64.h
+++ b/compiler/optimizing/code_generator_mips64.h
@@ -314,6 +314,9 @@
                                  uint32_t num_entries,
                                  HBasicBlock* switch_block,
                                  HBasicBlock* default_block);
+  int32_t VecAddress(LocationSummary* locations,
+                     size_t size,
+                     /* out */ GpuRegister* adjusted_base);
 
   Mips64Assembler* const assembler_;
   CodeGeneratorMIPS64* const codegen_;
diff --git a/compiler/optimizing/code_generator_vector_mips64.cc b/compiler/optimizing/code_generator_vector_mips64.cc
index 50b95c1..af9e89e 100644
--- a/compiler/optimizing/code_generator_vector_mips64.cc
+++ b/compiler/optimizing/code_generator_vector_mips64.cc
@@ -15,6 +15,7 @@
  */
 
 #include "code_generator_mips64.h"
+#include "mirror/array-inl.h"
 
 namespace art {
 namespace mips64 {
@@ -22,12 +23,72 @@
 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
 #define __ down_cast<Mips64Assembler*>(GetAssembler())->  // NOLINT
 
+VectorRegister VectorRegisterFrom(Location location) {
+  DCHECK(location.IsFpuRegister());
+  return static_cast<VectorRegister>(location.AsFpuRegister<FpuRegister>());
+}
+
 void LocationsBuilderMIPS64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
-  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong:
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetOut(Location::RequiresFpuRegister());
+      break;
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+      locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
 }
 
 void InstructionCodeGeneratorMIPS64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
-  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+  LocationSummary* locations = instruction->GetLocations();
+  VectorRegister dst = VectorRegisterFrom(locations->Out());
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+      DCHECK_EQ(16u, instruction->GetVectorLength());
+      __ FillB(dst, locations->InAt(0).AsRegister<GpuRegister>());
+      break;
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      __ FillH(dst, locations->InAt(0).AsRegister<GpuRegister>());
+      break;
+    case Primitive::kPrimInt:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ FillW(dst, locations->InAt(0).AsRegister<GpuRegister>());
+      break;
+    case Primitive::kPrimLong:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ FillD(dst, locations->InAt(0).AsRegister<GpuRegister>());
+      break;
+    case Primitive::kPrimFloat:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ ReplicateFPToVectorRegister(dst,
+                                     locations->InAt(0).AsFpuRegister<FpuRegister>(),
+                                     /* is_double */ false);
+      break;
+    case Primitive::kPrimDouble:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ ReplicateFPToVectorRegister(dst,
+                                     locations->InAt(0).AsFpuRegister<FpuRegister>(),
+                                     /* is_double */ true);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
 }
 
 void LocationsBuilderMIPS64::VisitVecSetScalars(HVecSetScalars* instruction) {
@@ -51,13 +112,23 @@
   LocationSummary* locations = new (arena) LocationSummary(instruction);
   switch (instruction->GetPackedType()) {
     case Primitive::kPrimBoolean:
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+      locations->SetOut(Location::RequiresFpuRegister(),
+                        instruction->IsVecNot() ? Location::kOutputOverlap
+                                                : Location::kNoOutputOverlap);
+      break;
     case Primitive::kPrimByte:
     case Primitive::kPrimChar:
     case Primitive::kPrimShort:
     case Primitive::kPrimInt:
+    case Primitive::kPrimLong:
     case Primitive::kPrimFloat:
     case Primitive::kPrimDouble:
-      DCHECK(locations);
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+      locations->SetOut(Location::RequiresFpuRegister(),
+                        (instruction->IsVecNeg() || instruction->IsVecAbs())
+                            ? Location::kOutputOverlap
+                            : Location::kNoOutputOverlap);
       break;
     default:
       LOG(FATAL) << "Unsupported SIMD type";
@@ -70,7 +141,18 @@
 }
 
 void InstructionCodeGeneratorMIPS64::VisitVecCnv(HVecCnv* instruction) {
-  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+  LocationSummary* locations = instruction->GetLocations();
+  VectorRegister src = VectorRegisterFrom(locations->InAt(0));
+  VectorRegister dst = VectorRegisterFrom(locations->Out());
+  Primitive::Type from = instruction->GetInputType();
+  Primitive::Type to = instruction->GetResultType();
+  if (from == Primitive::kPrimInt && to == Primitive::kPrimFloat) {
+    DCHECK_EQ(4u, instruction->GetVectorLength());
+    __ Ffint_sW(dst, src);
+  } else {
+    LOG(FATAL) << "Unsupported SIMD type";
+    UNREACHABLE();
+  }
 }
 
 void LocationsBuilderMIPS64::VisitVecNeg(HVecNeg* instruction) {
@@ -78,7 +160,45 @@
 }
 
 void InstructionCodeGeneratorMIPS64::VisitVecNeg(HVecNeg* instruction) {
-  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+  LocationSummary* locations = instruction->GetLocations();
+  VectorRegister src = VectorRegisterFrom(locations->InAt(0));
+  VectorRegister dst = VectorRegisterFrom(locations->Out());
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimByte:
+      DCHECK_EQ(16u, instruction->GetVectorLength());
+      __ FillB(dst, ZERO);
+      __ SubvB(dst, dst, src);
+      break;
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      __ FillH(dst, ZERO);
+      __ SubvH(dst, dst, src);
+      break;
+    case Primitive::kPrimInt:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ FillW(dst, ZERO);
+      __ SubvW(dst, dst, src);
+      break;
+    case Primitive::kPrimLong:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ FillD(dst, ZERO);
+      __ SubvD(dst, dst, src);
+      break;
+    case Primitive::kPrimFloat:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ FillW(dst, ZERO);
+      __ FsubW(dst, dst, src);
+      break;
+    case Primitive::kPrimDouble:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ FillD(dst, ZERO);
+      __ FsubD(dst, dst, src);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
 }
 
 void LocationsBuilderMIPS64::VisitVecAbs(HVecAbs* instruction) {
@@ -86,7 +206,47 @@
 }
 
 void InstructionCodeGeneratorMIPS64::VisitVecAbs(HVecAbs* instruction) {
-  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+  LocationSummary* locations = instruction->GetLocations();
+  VectorRegister src = VectorRegisterFrom(locations->InAt(0));
+  VectorRegister dst = VectorRegisterFrom(locations->Out());
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimByte:
+      DCHECK_EQ(16u, instruction->GetVectorLength());
+      __ FillB(dst, ZERO);       // all zeroes
+      __ Add_aB(dst, dst, src);  // dst = abs(0) + abs(src)
+      break;
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      __ FillH(dst, ZERO);       // all zeroes
+      __ Add_aH(dst, dst, src);  // dst = abs(0) + abs(src)
+      break;
+    case Primitive::kPrimInt:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ FillW(dst, ZERO);       // all zeroes
+      __ Add_aW(dst, dst, src);  // dst = abs(0) + abs(src)
+      break;
+    case Primitive::kPrimLong:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ FillD(dst, ZERO);       // all zeroes
+      __ Add_aD(dst, dst, src);  // dst = abs(0) + abs(src)
+      break;
+    case Primitive::kPrimFloat:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ LdiW(dst, -1);          // all ones
+      __ SrliW(dst, dst, 1);
+      __ AndV(dst, dst, src);
+      break;
+    case Primitive::kPrimDouble:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ LdiD(dst, -1);          // all ones
+      __ SrliD(dst, dst, 1);
+      __ AndV(dst, dst, src);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
 }
 
 void LocationsBuilderMIPS64::VisitVecNot(HVecNot* instruction) {
@@ -94,7 +254,30 @@
 }
 
 void InstructionCodeGeneratorMIPS64::VisitVecNot(HVecNot* instruction) {
-  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+  LocationSummary* locations = instruction->GetLocations();
+  VectorRegister src = VectorRegisterFrom(locations->InAt(0));
+  VectorRegister dst = VectorRegisterFrom(locations->Out());
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimBoolean:  // special case boolean-not
+      DCHECK_EQ(16u, instruction->GetVectorLength());
+      __ LdiB(dst, 1);
+      __ XorV(dst, dst, src);
+      break;
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong:
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      DCHECK_LE(2u, instruction->GetVectorLength());
+      DCHECK_LE(instruction->GetVectorLength(), 16u);
+      __ NorV(dst, src, src);  // lanes do not matter
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
 }
 
 // Helper to set up locations for vector binary operations.
@@ -106,9 +289,12 @@
     case Primitive::kPrimChar:
     case Primitive::kPrimShort:
     case Primitive::kPrimInt:
+    case Primitive::kPrimLong:
     case Primitive::kPrimFloat:
     case Primitive::kPrimDouble:
-      DCHECK(locations);
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+      locations->SetInAt(1, Location::RequiresFpuRegister());
+      locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
       break;
     default:
       LOG(FATAL) << "Unsupported SIMD type";
@@ -121,7 +307,40 @@
 }
 
 void InstructionCodeGeneratorMIPS64::VisitVecAdd(HVecAdd* instruction) {
-  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+  LocationSummary* locations = instruction->GetLocations();
+  VectorRegister lhs = VectorRegisterFrom(locations->InAt(0));
+  VectorRegister rhs = VectorRegisterFrom(locations->InAt(1));
+  VectorRegister dst = VectorRegisterFrom(locations->Out());
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimByte:
+      DCHECK_EQ(16u, instruction->GetVectorLength());
+      __ AddvB(dst, lhs, rhs);
+      break;
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      __ AddvH(dst, lhs, rhs);
+      break;
+    case Primitive::kPrimInt:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ AddvW(dst, lhs, rhs);
+      break;
+    case Primitive::kPrimLong:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ AddvD(dst, lhs, rhs);
+      break;
+    case Primitive::kPrimFloat:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ FaddW(dst, lhs, rhs);
+      break;
+    case Primitive::kPrimDouble:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ FaddD(dst, lhs, rhs);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
 }
 
 void LocationsBuilderMIPS64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
@@ -129,7 +348,40 @@
 }
 
 void InstructionCodeGeneratorMIPS64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
-  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+  LocationSummary* locations = instruction->GetLocations();
+  VectorRegister lhs = VectorRegisterFrom(locations->InAt(0));
+  VectorRegister rhs = VectorRegisterFrom(locations->InAt(1));
+  VectorRegister dst = VectorRegisterFrom(locations->Out());
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimByte:
+      DCHECK_EQ(16u, instruction->GetVectorLength());
+      if (instruction->IsUnsigned()) {
+        instruction->IsRounded()
+            ? __ Aver_uB(dst, lhs, rhs)
+            : __ Ave_uB(dst, lhs, rhs);
+      } else {
+        instruction->IsRounded()
+            ? __ Aver_sB(dst, lhs, rhs)
+            : __ Ave_sB(dst, lhs, rhs);
+      }
+      break;
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      if (instruction->IsUnsigned()) {
+        instruction->IsRounded()
+            ? __ Aver_uH(dst, lhs, rhs)
+            : __ Ave_uH(dst, lhs, rhs);
+      } else {
+        instruction->IsRounded()
+            ? __ Aver_sH(dst, lhs, rhs)
+            : __ Ave_sH(dst, lhs, rhs);
+      }
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
 }
 
 void LocationsBuilderMIPS64::VisitVecSub(HVecSub* instruction) {
@@ -137,7 +389,40 @@
 }
 
 void InstructionCodeGeneratorMIPS64::VisitVecSub(HVecSub* instruction) {
-  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+  LocationSummary* locations = instruction->GetLocations();
+  VectorRegister lhs = VectorRegisterFrom(locations->InAt(0));
+  VectorRegister rhs = VectorRegisterFrom(locations->InAt(1));
+  VectorRegister dst = VectorRegisterFrom(locations->Out());
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimByte:
+      DCHECK_EQ(16u, instruction->GetVectorLength());
+      __ SubvB(dst, lhs, rhs);
+      break;
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      __ SubvH(dst, lhs, rhs);
+      break;
+    case Primitive::kPrimInt:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ SubvW(dst, lhs, rhs);
+      break;
+    case Primitive::kPrimLong:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ SubvD(dst, lhs, rhs);
+      break;
+    case Primitive::kPrimFloat:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ FsubW(dst, lhs, rhs);
+      break;
+    case Primitive::kPrimDouble:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ FsubD(dst, lhs, rhs);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
 }
 
 void LocationsBuilderMIPS64::VisitVecMul(HVecMul* instruction) {
@@ -145,7 +430,40 @@
 }
 
 void InstructionCodeGeneratorMIPS64::VisitVecMul(HVecMul* instruction) {
-  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+  LocationSummary* locations = instruction->GetLocations();
+  VectorRegister lhs = VectorRegisterFrom(locations->InAt(0));
+  VectorRegister rhs = VectorRegisterFrom(locations->InAt(1));
+  VectorRegister dst = VectorRegisterFrom(locations->Out());
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimByte:
+      DCHECK_EQ(16u, instruction->GetVectorLength());
+      __ MulvB(dst, lhs, rhs);
+      break;
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      __ MulvH(dst, lhs, rhs);
+      break;
+    case Primitive::kPrimInt:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ MulvW(dst, lhs, rhs);
+      break;
+    case Primitive::kPrimLong:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ MulvD(dst, lhs, rhs);
+      break;
+    case Primitive::kPrimFloat:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ FmulW(dst, lhs, rhs);
+      break;
+    case Primitive::kPrimDouble:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ FmulD(dst, lhs, rhs);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
 }
 
 void LocationsBuilderMIPS64::VisitVecDiv(HVecDiv* instruction) {
@@ -153,7 +471,23 @@
 }
 
 void InstructionCodeGeneratorMIPS64::VisitVecDiv(HVecDiv* instruction) {
-  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+  LocationSummary* locations = instruction->GetLocations();
+  VectorRegister lhs = VectorRegisterFrom(locations->InAt(0));
+  VectorRegister rhs = VectorRegisterFrom(locations->InAt(1));
+  VectorRegister dst = VectorRegisterFrom(locations->Out());
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimFloat:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ FdivW(dst, lhs, rhs);
+      break;
+    case Primitive::kPrimDouble:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ FdivD(dst, lhs, rhs);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
 }
 
 void LocationsBuilderMIPS64::VisitVecMin(HVecMin* instruction) {
@@ -177,7 +511,27 @@
 }
 
 void InstructionCodeGeneratorMIPS64::VisitVecAnd(HVecAnd* instruction) {
-  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+  LocationSummary* locations = instruction->GetLocations();
+  VectorRegister lhs = VectorRegisterFrom(locations->InAt(0));
+  VectorRegister rhs = VectorRegisterFrom(locations->InAt(1));
+  VectorRegister dst = VectorRegisterFrom(locations->Out());
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong:
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      DCHECK_LE(2u, instruction->GetVectorLength());
+      DCHECK_LE(instruction->GetVectorLength(), 16u);
+      __ AndV(dst, lhs, rhs);  // lanes do not matter
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
 }
 
 void LocationsBuilderMIPS64::VisitVecAndNot(HVecAndNot* instruction) {
@@ -193,7 +547,27 @@
 }
 
 void InstructionCodeGeneratorMIPS64::VisitVecOr(HVecOr* instruction) {
-  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+  LocationSummary* locations = instruction->GetLocations();
+  VectorRegister lhs = VectorRegisterFrom(locations->InAt(0));
+  VectorRegister rhs = VectorRegisterFrom(locations->InAt(1));
+  VectorRegister dst = VectorRegisterFrom(locations->Out());
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong:
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      DCHECK_LE(2u, instruction->GetVectorLength());
+      DCHECK_LE(instruction->GetVectorLength(), 16u);
+      __ OrV(dst, lhs, rhs);  // lanes do not matter
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
 }
 
 void LocationsBuilderMIPS64::VisitVecXor(HVecXor* instruction) {
@@ -201,7 +575,27 @@
 }
 
 void InstructionCodeGeneratorMIPS64::VisitVecXor(HVecXor* instruction) {
-  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+  LocationSummary* locations = instruction->GetLocations();
+  VectorRegister lhs = VectorRegisterFrom(locations->InAt(0));
+  VectorRegister rhs = VectorRegisterFrom(locations->InAt(1));
+  VectorRegister dst = VectorRegisterFrom(locations->Out());
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong:
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      DCHECK_LE(2u, instruction->GetVectorLength());
+      DCHECK_LE(instruction->GetVectorLength(), 16u);
+      __ XorV(dst, lhs, rhs);  // lanes do not matter
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
 }
 
 // Helper to set up locations for vector shift operations.
@@ -213,7 +607,9 @@
     case Primitive::kPrimShort:
     case Primitive::kPrimInt:
     case Primitive::kPrimLong:
-      DCHECK(locations);
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+      locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
+      locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
       break;
     default:
       LOG(FATAL) << "Unsupported SIMD type";
@@ -226,7 +622,32 @@
 }
 
 void InstructionCodeGeneratorMIPS64::VisitVecShl(HVecShl* instruction) {
-  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+  LocationSummary* locations = instruction->GetLocations();
+  VectorRegister lhs = VectorRegisterFrom(locations->InAt(0));
+  VectorRegister dst = VectorRegisterFrom(locations->Out());
+  int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimByte:
+      DCHECK_EQ(16u, instruction->GetVectorLength());
+      __ SlliB(dst, lhs, value);
+      break;
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      __ SlliH(dst, lhs, value);
+      break;
+    case Primitive::kPrimInt:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ SlliW(dst, lhs, value);
+      break;
+    case Primitive::kPrimLong:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ SlliD(dst, lhs, value);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
 }
 
 void LocationsBuilderMIPS64::VisitVecShr(HVecShr* instruction) {
@@ -234,7 +655,32 @@
 }
 
 void InstructionCodeGeneratorMIPS64::VisitVecShr(HVecShr* instruction) {
-  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+  LocationSummary* locations = instruction->GetLocations();
+  VectorRegister lhs = VectorRegisterFrom(locations->InAt(0));
+  VectorRegister dst = VectorRegisterFrom(locations->Out());
+  int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimByte:
+      DCHECK_EQ(16u, instruction->GetVectorLength());
+      __ SraiB(dst, lhs, value);
+      break;
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      __ SraiH(dst, lhs, value);
+      break;
+    case Primitive::kPrimInt:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ SraiW(dst, lhs, value);
+      break;
+    case Primitive::kPrimLong:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ SraiD(dst, lhs, value);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
 }
 
 void LocationsBuilderMIPS64::VisitVecUShr(HVecUShr* instruction) {
@@ -242,7 +688,32 @@
 }
 
 void InstructionCodeGeneratorMIPS64::VisitVecUShr(HVecUShr* instruction) {
-  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+  LocationSummary* locations = instruction->GetLocations();
+  VectorRegister lhs = VectorRegisterFrom(locations->InAt(0));
+  VectorRegister dst = VectorRegisterFrom(locations->Out());
+  int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimByte:
+      DCHECK_EQ(16u, instruction->GetVectorLength());
+      __ SrliB(dst, lhs, value);
+      break;
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      __ SrliH(dst, lhs, value);
+      break;
+    case Primitive::kPrimInt:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ SrliW(dst, lhs, value);
+      break;
+    case Primitive::kPrimLong:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ SrliD(dst, lhs, value);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
 }
 
 void LocationsBuilderMIPS64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) {
@@ -253,20 +724,143 @@
   LOG(FATAL) << "No SIMD for " << instr->GetId();
 }
 
+// Helper to set up locations for vector memory operations.
+static void CreateVecMemLocations(ArenaAllocator* arena,
+                                  HVecMemoryOperation* instruction,
+                                  bool is_load) {
+  LocationSummary* locations = new (arena) LocationSummary(instruction);
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong:
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+      if (is_load) {
+        locations->SetOut(Location::RequiresFpuRegister());
+      } else {
+        locations->SetInAt(2, Location::RequiresFpuRegister());
+      }
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
+}
+
+// Helper to prepare register and offset for vector memory operations. Returns the offset and sets
+// the output parameter adjusted_base to the original base or to a reserved temporary register (AT).
+int32_t InstructionCodeGeneratorMIPS64::VecAddress(LocationSummary* locations,
+                                                   size_t size,
+                                                   /* out */ GpuRegister* adjusted_base) {
+  GpuRegister base = locations->InAt(0).AsRegister<GpuRegister>();
+  Location index = locations->InAt(1);
+  int scale = TIMES_1;
+  switch (size) {
+    case 2: scale = TIMES_2; break;
+    case 4: scale = TIMES_4; break;
+    case 8: scale = TIMES_8; break;
+    default: break;
+  }
+  int32_t offset = mirror::Array::DataOffset(size).Int32Value();
+
+  if (index.IsConstant()) {
+    offset += index.GetConstant()->AsIntConstant()->GetValue() << scale;
+    __ AdjustBaseOffsetAndElementSizeShift(base, offset, scale);
+    *adjusted_base = base;
+  } else {
+    GpuRegister index_reg = index.AsRegister<GpuRegister>();
+    if (scale != TIMES_1) {
+      __ Dlsa(AT, index_reg, base, scale);
+    } else {
+      __ Daddu(AT, base, index_reg);
+    }
+    *adjusted_base = AT;
+  }
+  return offset;
+}
+
 void LocationsBuilderMIPS64::VisitVecLoad(HVecLoad* instruction) {
-  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+  CreateVecMemLocations(GetGraph()->GetArena(), instruction, /* is_load */ true);
 }
 
 void InstructionCodeGeneratorMIPS64::VisitVecLoad(HVecLoad* instruction) {
-  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+  LocationSummary* locations = instruction->GetLocations();
+  size_t size = Primitive::ComponentSize(instruction->GetPackedType());
+  VectorRegister reg = VectorRegisterFrom(locations->Out());
+  GpuRegister base;
+  int32_t offset = VecAddress(locations, size, &base);
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+      DCHECK_EQ(16u, instruction->GetVectorLength());
+      __ LdB(reg, base, offset);
+      break;
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      // Loading 8-bytes (needed if dealing with compressed strings in StringCharAt) from unaligned
+      // memory address may cause a trap to the kernel if the CPU doesn't directly support unaligned
+      // loads and stores.
+      // TODO: Implement support for StringCharAt.
+      DCHECK(!instruction->IsStringCharAt());
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      __ LdH(reg, base, offset);
+      break;
+    case Primitive::kPrimInt:
+    case Primitive::kPrimFloat:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ LdW(reg, base, offset);
+      break;
+    case Primitive::kPrimLong:
+    case Primitive::kPrimDouble:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ LdD(reg, base, offset);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
 }
 
 void LocationsBuilderMIPS64::VisitVecStore(HVecStore* instruction) {
-  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+  CreateVecMemLocations(GetGraph()->GetArena(), instruction, /* is_load */ false);
 }
 
 void InstructionCodeGeneratorMIPS64::VisitVecStore(HVecStore* instruction) {
-  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+  LocationSummary* locations = instruction->GetLocations();
+  size_t size = Primitive::ComponentSize(instruction->GetPackedType());
+  VectorRegister reg = VectorRegisterFrom(locations->InAt(2));
+  GpuRegister base;
+  int32_t offset = VecAddress(locations, size, &base);
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+      DCHECK_EQ(16u, instruction->GetVectorLength());
+      __ StB(reg, base, offset);
+      break;
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      __ StH(reg, base, offset);
+      break;
+    case Primitive::kPrimInt:
+    case Primitive::kPrimFloat:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ StW(reg, base, offset);
+      break;
+    case Primitive::kPrimLong:
+    case Primitive::kPrimDouble:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ StD(reg, base, offset);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
 }
 
 #undef __
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index eadcc15..ca921b8 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -6044,7 +6044,7 @@
       DCHECK(Runtime::Current()->UseJitCompilation());
       break;
     case HLoadClass::LoadKind::kBootImageAddress:
-    case HLoadClass::LoadKind::kDexCacheViaMethod:
+    case HLoadClass::LoadKind::kRuntimeCall:
       break;
   }
   return desired_class_load_kind;
@@ -6052,7 +6052,7 @@
 
 void LocationsBuilderX86::VisitLoadClass(HLoadClass* cls) {
   HLoadClass::LoadKind load_kind = cls->GetLoadKind();
-  if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) {
+  if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
     InvokeRuntimeCallingConvention calling_convention;
     CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(
         cls,
@@ -6106,7 +6106,7 @@
 // move.
 void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS {
   HLoadClass::LoadKind load_kind = cls->GetLoadKind();
-  if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) {
+  if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
     codegen_->GenerateLoadClassRuntimeCall(cls);
     return;
   }
@@ -6166,7 +6166,7 @@
       GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option);
       break;
     }
-    case HLoadClass::LoadKind::kDexCacheViaMethod:
+    case HLoadClass::LoadKind::kRuntimeCall:
     case HLoadClass::LoadKind::kInvalid:
       LOG(FATAL) << "UNREACHABLE";
       UNREACHABLE();
@@ -6229,7 +6229,7 @@
       DCHECK(Runtime::Current()->UseJitCompilation());
       break;
     case HLoadString::LoadKind::kBootImageAddress:
-    case HLoadString::LoadKind::kDexCacheViaMethod:
+    case HLoadString::LoadKind::kRuntimeCall:
       break;
   }
   return desired_string_load_kind;
@@ -6243,7 +6243,7 @@
       load_kind == HLoadString::LoadKind::kBssEntry) {
     locations->SetInAt(0, Location::RequiresRegister());
   }
-  if (load_kind == HLoadString::LoadKind::kDexCacheViaMethod) {
+  if (load_kind == HLoadString::LoadKind::kRuntimeCall) {
     locations->SetOut(Location::RegisterLocation(EAX));
   } else {
     locations->SetOut(Location::RequiresRegister());
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 99e3def..148f551 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -5460,7 +5460,7 @@
       DCHECK(Runtime::Current()->UseJitCompilation());
       break;
     case HLoadClass::LoadKind::kBootImageAddress:
-    case HLoadClass::LoadKind::kDexCacheViaMethod:
+    case HLoadClass::LoadKind::kRuntimeCall:
       break;
   }
   return desired_class_load_kind;
@@ -5468,7 +5468,7 @@
 
 void LocationsBuilderX86_64::VisitLoadClass(HLoadClass* cls) {
   HLoadClass::LoadKind load_kind = cls->GetLoadKind();
-  if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) {
+  if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
     // Custom calling convention: RAX serves as both input and output.
     CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(
         cls,
@@ -5519,7 +5519,7 @@
 // move.
 void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS {
   HLoadClass::LoadKind load_kind = cls->GetLoadKind();
-  if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) {
+  if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
     codegen_->GenerateLoadClassRuntimeCall(cls);
     return;
   }
@@ -5630,7 +5630,7 @@
       DCHECK(Runtime::Current()->UseJitCompilation());
       break;
     case HLoadString::LoadKind::kBootImageAddress:
-    case HLoadString::LoadKind::kDexCacheViaMethod:
+    case HLoadString::LoadKind::kRuntimeCall:
       break;
   }
   return desired_string_load_kind;
@@ -5639,7 +5639,7 @@
 void LocationsBuilderX86_64::VisitLoadString(HLoadString* load) {
   LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load);
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind);
-  if (load->GetLoadKind() == HLoadString::LoadKind::kDexCacheViaMethod) {
+  if (load->GetLoadKind() == HLoadString::LoadKind::kRuntimeCall) {
     locations->SetOut(Location::RegisterLocation(RAX));
   } else {
     locations->SetOut(Location::RequiresRegister());
diff --git a/compiler/optimizing/induction_var_range.cc b/compiler/optimizing/induction_var_range.cc
index 7c833cf..c0ec58f 100644
--- a/compiler/optimizing/induction_var_range.cc
+++ b/compiler/optimizing/induction_var_range.cc
@@ -1132,11 +1132,27 @@
                                                   /*out*/bool* needs_taken_test) const {
   DCHECK(info != nullptr);
   DCHECK_EQ(info->induction_class, HInductionVarAnalysis::kPeriodic);
-  // Count period.
+  // Count period and detect all-invariants.
   int64_t period = 1;
-  for (HInductionVarAnalysis::InductionInfo* p = info;
-       p->induction_class == HInductionVarAnalysis::kPeriodic;
-       p = p->op_b, ++period) {}
+  bool all_invariants = true;
+  HInductionVarAnalysis::InductionInfo* p = info;
+  for (; p->induction_class == HInductionVarAnalysis::kPeriodic; p = p->op_b, ++period) {
+    DCHECK_EQ(p->op_a->induction_class, HInductionVarAnalysis::kInvariant);
+    if (p->op_a->operation != HInductionVarAnalysis::kFetch) {
+      all_invariants = false;
+    }
+  }
+  DCHECK_EQ(p->induction_class, HInductionVarAnalysis::kInvariant);
+  if (p->operation != HInductionVarAnalysis::kFetch) {
+    all_invariants = false;
+  }
+  // Don't rely on FP arithmetic to be precise, unless the full period
+  // consist of pre-computed expressions only.
+  if (info->type == Primitive::kPrimFloat || info->type == Primitive::kPrimDouble) {
+    if (!all_invariants) {
+      return false;
+    }
+  }
   // Handle any periodic(x, periodic(.., y)) for known maximum index value m.
   int64_t m = 0;
   if (IsConstant(trip->op_a, kExact, &m) && m >= 1) {
diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc
index b1d2727..b664d41 100644
--- a/compiler/optimizing/intrinsics.cc
+++ b/compiler/optimizing/intrinsics.cc
@@ -25,7 +25,7 @@
 #include "mirror/dex_cache-inl.h"
 #include "nodes.h"
 #include "scoped_thread_state_change-inl.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 #include "utils.h"
 
 namespace art {
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index 8fc6f45..ae5f8d1 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -28,7 +28,7 @@
 #include "mirror/reference.h"
 #include "mirror/string.h"
 #include "scoped_thread_state_change-inl.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 #include "utils/arm/assembler_arm.h"
 
 namespace art {
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index 5d5610e..37d7981 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -28,7 +28,7 @@
 #include "mirror/reference.h"
 #include "mirror/string-inl.h"
 #include "scoped_thread_state_change-inl.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 #include "utils/arm64/assembler_arm64.h"
 
 using namespace vixl::aarch64;  // NOLINT(build/namespaces)
diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc
index 43c0759..3c9b613 100644
--- a/compiler/optimizing/intrinsics_arm_vixl.cc
+++ b/compiler/optimizing/intrinsics_arm_vixl.cc
@@ -26,7 +26,7 @@
 #include "mirror/reference.h"
 #include "mirror/string.h"
 #include "scoped_thread_state_change-inl.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 
 #include "aarch32/constants-aarch32.h"
 
diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc
index c9b378b..4cea6df 100644
--- a/compiler/optimizing/intrinsics_mips.cc
+++ b/compiler/optimizing/intrinsics_mips.cc
@@ -23,6 +23,7 @@
 #include "intrinsics.h"
 #include "mirror/array-inl.h"
 #include "mirror/string.h"
+#include "scoped_thread_state_change-inl.h"
 #include "thread.h"
 #include "utils/mips/assembler_mips.h"
 #include "utils/mips/constants_mips.h"
diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc
index 9474ec5..d785567 100644
--- a/compiler/optimizing/intrinsics_mips64.cc
+++ b/compiler/optimizing/intrinsics_mips64.cc
@@ -23,6 +23,7 @@
 #include "intrinsics.h"
 #include "mirror/array-inl.h"
 #include "mirror/string.h"
+#include "scoped_thread_state_change-inl.h"
 #include "thread.h"
 #include "utils/mips64/assembler_mips64.h"
 #include "utils/mips64/constants_mips64.h"
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index a61dc27..6b4851d 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -31,7 +31,7 @@
 #include "mirror/reference.h"
 #include "mirror/string.h"
 #include "scoped_thread_state_change-inl.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 #include "utils/x86/assembler_x86.h"
 #include "utils/x86/constants_x86.h"
 
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 7643f680..ef98b7b 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -31,7 +31,7 @@
 #include "mirror/reference.h"
 #include "mirror/string.h"
 #include "scoped_thread_state_change-inl.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 #include "utils/x86_64/assembler_x86_64.h"
 #include "utils/x86_64/constants_x86_64.h"
 
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc
index 94787c9..c3aa976 100644
--- a/compiler/optimizing/loop_optimization.cc
+++ b/compiler/optimizing/loop_optimization.cc
@@ -811,6 +811,11 @@
     }
     return true;
   } else if (instruction->IsArrayGet()) {
+    // Deal with vector restrictions.
+    if (instruction->AsArrayGet()->IsStringCharAt() &&
+        HasVectorRestrictions(restrictions, kNoStringCharAt)) {
+      return false;
+    }
     // Accept a right-hand-side array base[index] for
     // (1) exact matching vector type,
     // (2) loop-invariant base,
@@ -1072,9 +1077,36 @@
       }
       return false;
     case kMips:
-    case kMips64:
       // TODO: implement MIPS SIMD.
       return false;
+    case kMips64:
+      if (features->AsMips64InstructionSetFeatures()->HasMsa()) {
+        switch (type) {
+          case Primitive::kPrimBoolean:
+          case Primitive::kPrimByte:
+            *restrictions |= kNoDiv | kNoMinMax;
+            return TrySetVectorLength(16);
+          case Primitive::kPrimChar:
+          case Primitive::kPrimShort:
+            *restrictions |= kNoDiv | kNoMinMax | kNoStringCharAt;
+            return TrySetVectorLength(8);
+          case Primitive::kPrimInt:
+            *restrictions |= kNoDiv | kNoMinMax;
+            return TrySetVectorLength(4);
+          case Primitive::kPrimLong:
+            *restrictions |= kNoDiv | kNoMinMax;
+            return TrySetVectorLength(2);
+          case Primitive::kPrimFloat:
+            *restrictions |= kNoMinMax;
+            return TrySetVectorLength(4);
+          case Primitive::kPrimDouble:
+            *restrictions |= kNoMinMax;
+            return TrySetVectorLength(2);
+          default:
+            break;
+        }  // switch type
+      }
+      return false;
     default:
       return false;
   }  // switch instruction set
@@ -1270,9 +1302,10 @@
         // corresponding new scalar instructions in the loop. The instruction will get an
         // environment while being inserted from the instruction map in original program order.
         DCHECK(vector_mode_ == kSequential);
+        size_t num_args = invoke->GetNumberOfArguments();
         HInvokeStaticOrDirect* new_invoke = new (global_allocator_) HInvokeStaticOrDirect(
             global_allocator_,
-            invoke->GetNumberOfArguments(),
+            num_args,
             invoke->GetType(),
             invoke->GetDexPc(),
             invoke->GetDexMethodIndex(),
@@ -1282,8 +1315,14 @@
             invoke->GetTargetMethod(),
             invoke->GetClinitCheckRequirement());
         HInputsRef inputs = invoke->GetInputs();
-        for (size_t index = 0; index < inputs.size(); ++index) {
-          new_invoke->SetArgumentAt(index, vector_map_->Get(inputs[index]));
+        size_t num_inputs = inputs.size();
+        DCHECK_LE(num_args, num_inputs);
+        DCHECK_EQ(num_inputs, new_invoke->GetInputs().size());  // both invokes agree
+        for (size_t index = 0; index < num_inputs; ++index) {
+          HInstruction* new_input = index < num_args
+              ? vector_map_->Get(inputs[index])
+              : inputs[index];  // beyond arguments: just pass through
+          new_invoke->SetArgumentAt(index, new_input);
         }
         new_invoke->SetIntrinsic(invoke->GetIntrinsic(),
                                  kNeedsEnvironmentOrCache,
diff --git a/compiler/optimizing/loop_optimization.h b/compiler/optimizing/loop_optimization.h
index 35298d4..75a42f3 100644
--- a/compiler/optimizing/loop_optimization.h
+++ b/compiler/optimizing/loop_optimization.h
@@ -72,6 +72,7 @@
     kNoUnroundedHAdd = 64,   // no unrounded halving add
     kNoAbs           = 128,  // no absolute value
     kNoMinMax        = 256,  // no min/max
+    kNoStringCharAt  = 512,  // no StringCharAt
   };
 
   /*
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index a82c1ba..e53209f 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -2690,7 +2690,7 @@
 void HLoadClass::SetLoadKind(LoadKind load_kind) {
   SetPackedField<LoadKindField>(load_kind);
 
-  if (load_kind != LoadKind::kDexCacheViaMethod &&
+  if (load_kind != LoadKind::kRuntimeCall &&
       load_kind != LoadKind::kReferrersClass) {
     RemoveAsUserOfInput(0u);
     SetRawInputAt(0u, nullptr);
@@ -2714,8 +2714,8 @@
       return os << "BssEntry";
     case HLoadClass::LoadKind::kJitTableAddress:
       return os << "JitTableAddress";
-    case HLoadClass::LoadKind::kDexCacheViaMethod:
-      return os << "DexCacheViaMethod";
+    case HLoadClass::LoadKind::kRuntimeCall:
+      return os << "RuntimeCall";
     default:
       LOG(FATAL) << "Unknown HLoadClass::LoadKind: " << static_cast<int>(rhs);
       UNREACHABLE();
@@ -2743,10 +2743,10 @@
 
 void HLoadString::SetLoadKind(LoadKind load_kind) {
   // Once sharpened, the load kind should not be changed again.
-  DCHECK_EQ(GetLoadKind(), LoadKind::kDexCacheViaMethod);
+  DCHECK_EQ(GetLoadKind(), LoadKind::kRuntimeCall);
   SetPackedField<LoadKindField>(load_kind);
 
-  if (load_kind != LoadKind::kDexCacheViaMethod) {
+  if (load_kind != LoadKind::kRuntimeCall) {
     RemoveAsUserOfInput(0u);
     SetRawInputAt(0u, nullptr);
   }
@@ -2766,8 +2766,8 @@
       return os << "BssEntry";
     case HLoadString::LoadKind::kJitTableAddress:
       return os << "JitTableAddress";
-    case HLoadString::LoadKind::kDexCacheViaMethod:
-      return os << "DexCacheViaMethod";
+    case HLoadString::LoadKind::kRuntimeCall:
+      return os << "RuntimeCall";
     default:
       LOG(FATAL) << "Unknown HLoadString::LoadKind: " << static_cast<int>(rhs);
       UNREACHABLE();
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 99ad071..74bb2ab 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -1790,7 +1790,7 @@
                              uint32_t dex_pc,
                              HInstruction* holder)
      : vregs_(number_of_vregs, arena->Adapter(kArenaAllocEnvironmentVRegs)),
-       locations_(number_of_vregs, arena->Adapter(kArenaAllocEnvironmentLocations)),
+       locations_(arena->Adapter(kArenaAllocEnvironmentLocations)),
        parent_(nullptr),
        method_(method),
        dex_pc_(dex_pc),
@@ -1804,6 +1804,11 @@
                      to_copy.GetDexPc(),
                      holder) {}
 
+  void AllocateLocations() {
+    DCHECK(locations_.empty());
+    locations_.resize(vregs_.size());
+  }
+
   void SetAndCopyParentChain(ArenaAllocator* allocator, HEnvironment* parent) {
     if (parent_ != nullptr) {
       parent_->SetAndCopyParentChain(allocator, parent);
@@ -5685,12 +5690,11 @@
     // Load from the root table associated with the JIT compiled method.
     kJitTableAddress,
 
-    // Load from resolved types array accessed through the class loaded from
-    // the compiled method's own ArtMethod*. This is the default access type when
-    // all other types are unavailable.
-    kDexCacheViaMethod,
+    // Load using a simple runtime call. This is the fall-back load kind when
+    // the codegen is unable to use another appropriate kind.
+    kRuntimeCall,
 
-    kLast = kDexCacheViaMethod
+    kLast = kRuntimeCall
   };
 
   HLoadClass(HCurrentMethod* current_method,
@@ -5711,7 +5715,7 @@
     DCHECK(!is_referrers_class || !needs_access_check);
 
     SetPackedField<LoadKindField>(
-        is_referrers_class ? LoadKind::kReferrersClass : LoadKind::kDexCacheViaMethod);
+        is_referrers_class ? LoadKind::kReferrersClass : LoadKind::kRuntimeCall);
     SetPackedFlag<kFlagNeedsAccessCheck>(needs_access_check);
     SetPackedFlag<kFlagIsInBootImage>(false);
     SetPackedFlag<kFlagGenerateClInitCheck>(false);
@@ -5745,7 +5749,7 @@
   bool CanCallRuntime() const {
     return NeedsAccessCheck() ||
            MustGenerateClinitCheck() ||
-           GetLoadKind() == LoadKind::kDexCacheViaMethod ||
+           GetLoadKind() == LoadKind::kRuntimeCall ||
            GetLoadKind() == LoadKind::kBssEntry;
   }
 
@@ -5755,7 +5759,7 @@
            // If the class is in the boot image, the lookup in the runtime call cannot throw.
            // This keeps CanThrow() consistent between non-PIC (using kBootImageAddress) and
            // PIC and subsequently avoids a DCE behavior dependency on the PIC option.
-           ((GetLoadKind() == LoadKind::kDexCacheViaMethod ||
+           ((GetLoadKind() == LoadKind::kRuntimeCall ||
              GetLoadKind() == LoadKind::kBssEntry) &&
             !IsInBootImage());
   }
@@ -5774,7 +5778,7 @@
   const DexFile& GetDexFile() const { return dex_file_; }
 
   bool NeedsDexCacheOfDeclaringClass() const OVERRIDE {
-    return GetLoadKind() == LoadKind::kDexCacheViaMethod;
+    return GetLoadKind() == LoadKind::kRuntimeCall;
   }
 
   static SideEffects SideEffectsForArchRuntimeCalls() {
@@ -5825,12 +5829,12 @@
     return load_kind == LoadKind::kReferrersClass ||
         load_kind == LoadKind::kBootImageLinkTimePcRelative ||
         load_kind == LoadKind::kBssEntry ||
-        load_kind == LoadKind::kDexCacheViaMethod;
+        load_kind == LoadKind::kRuntimeCall;
   }
 
   void SetLoadKindInternal(LoadKind load_kind);
 
-  // The special input is the HCurrentMethod for kDexCacheViaMethod or kReferrersClass.
+  // The special input is the HCurrentMethod for kRuntimeCall or kReferrersClass.
   // For other load kinds it's empty or possibly some architecture-specific instruction
   // for PC-relative loads, i.e. kBssEntry or kBootImageLinkTimePcRelative.
   HUserRecord<HInstruction*> special_input_;
@@ -5839,7 +5843,7 @@
   // - The compiling method's dex file if the class is defined there too.
   // - The compiling method's dex file if the class is referenced there.
   // - The dex file where the class is defined. When the load kind can only be
-  //   kBssEntry or kDexCacheViaMethod, we cannot emit code for this `HLoadClass`.
+  //   kBssEntry or kRuntimeCall, we cannot emit code for this `HLoadClass`.
   const dex::TypeIndex type_index_;
   const DexFile& dex_file_;
 
@@ -5882,12 +5886,11 @@
     // Load from the root table associated with the JIT compiled method.
     kJitTableAddress,
 
-    // Load from resolved strings array accessed through the class loaded from
-    // the compiled method's own ArtMethod*. This is the default access type when
-    // all other types are unavailable.
-    kDexCacheViaMethod,
+    // Load using a simple runtime call. This is the fall-back load kind when
+    // the codegen is unable to use another appropriate kind.
+    kRuntimeCall,
 
-    kLast = kDexCacheViaMethod,
+    kLast = kRuntimeCall,
   };
 
   HLoadString(HCurrentMethod* current_method,
@@ -5898,7 +5901,7 @@
         special_input_(HUserRecord<HInstruction*>(current_method)),
         string_index_(string_index),
         dex_file_(dex_file) {
-    SetPackedField<LoadKindField>(LoadKind::kDexCacheViaMethod);
+    SetPackedField<LoadKindField>(LoadKind::kRuntimeCall);
   }
 
   void SetLoadKind(LoadKind load_kind);
@@ -5942,7 +5945,7 @@
   }
 
   bool NeedsDexCacheOfDeclaringClass() const OVERRIDE {
-    return GetLoadKind() == LoadKind::kDexCacheViaMethod;
+    return GetLoadKind() == LoadKind::kRuntimeCall;
   }
 
   bool CanBeNull() const OVERRIDE { return false; }
@@ -5976,7 +5979,7 @@
 
   void SetLoadKindInternal(LoadKind load_kind);
 
-  // The special input is the HCurrentMethod for kDexCacheViaMethod.
+  // The special input is the HCurrentMethod for kRuntimeCall.
   // For other load kinds it's empty or possibly some architecture-specific instruction
   // for PC-relative loads, i.e. kBssEntry or kBootImageLinkTimePcRelative.
   HUserRecord<HInstruction*> special_input_;
diff --git a/compiler/optimizing/register_allocator_graph_color.cc b/compiler/optimizing/register_allocator_graph_color.cc
index 300f4c6..2fd7b03 100644
--- a/compiler/optimizing/register_allocator_graph_color.cc
+++ b/compiler/optimizing/register_allocator_graph_color.cc
@@ -20,7 +20,7 @@
 #include "linear_order.h"
 #include "register_allocation_resolver.h"
 #include "ssa_liveness_analysis.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 
 namespace art {
 
diff --git a/compiler/optimizing/scheduler_arm.cc b/compiler/optimizing/scheduler_arm.cc
index 1a89567..832a7e1 100644
--- a/compiler/optimizing/scheduler_arm.cc
+++ b/compiler/optimizing/scheduler_arm.cc
@@ -288,6 +288,11 @@
   last_visited_latency_ = kArmIntegerOpLatency;
 }
 
+void SchedulingLatencyVisitorARM::VisitIntermediateAddressIndex(
+    HIntermediateAddressIndex* ATTRIBUTE_UNUSED) {
+  UNIMPLEMENTED(FATAL) << "IntermediateAddressIndex is not implemented for ARM";
+}
+
 void SchedulingLatencyVisitorARM::VisitMultiplyAccumulate(HMultiplyAccumulate* ATTRIBUTE_UNUSED) {
   last_visited_latency_ = kArmMulIntegerLatency;
 }
diff --git a/compiler/optimizing/scheduler_arm.h b/compiler/optimizing/scheduler_arm.h
index 8d5e4f3..897e97d 100644
--- a/compiler/optimizing/scheduler_arm.h
+++ b/compiler/optimizing/scheduler_arm.h
@@ -17,7 +17,11 @@
 #ifndef ART_COMPILER_OPTIMIZING_SCHEDULER_ARM_H_
 #define ART_COMPILER_OPTIMIZING_SCHEDULER_ARM_H_
 
+#ifdef ART_USE_OLD_ARM_BACKEND
+#include "code_generator_arm.h"
+#else
 #include "code_generator_arm_vixl.h"
+#endif
 #include "scheduler.h"
 
 namespace art {
@@ -99,6 +103,7 @@
   M(BitwiseNegatedRight, unused)                 \
   M(MultiplyAccumulate, unused)                  \
   M(IntermediateAddress, unused)                 \
+  M(IntermediateAddressIndex, unused)            \
   M(DataProcWithShifterOp, unused)
 
 #define DECLARE_VISIT_INSTRUCTION(type, unused)  \
diff --git a/compiler/optimizing/scheduler_arm64.cc b/compiler/optimizing/scheduler_arm64.cc
index 558dcc4..83b487f 100644
--- a/compiler/optimizing/scheduler_arm64.cc
+++ b/compiler/optimizing/scheduler_arm64.cc
@@ -16,6 +16,7 @@
 
 #include "scheduler_arm64.h"
 #include "code_generator_utils.h"
+#include "mirror/array-inl.h"
 
 namespace art {
 namespace arm64 {
@@ -43,6 +44,13 @@
   last_visited_latency_ = kArm64IntegerOpLatency + 2;
 }
 
+void SchedulingLatencyVisitorARM64::VisitIntermediateAddressIndex(
+    HIntermediateAddressIndex* instr ATTRIBUTE_UNUSED) {
+  // Although the code generated is a simple `add` instruction, we found through empirical results
+  // that spacing it from its use in memory accesses was beneficial.
+  last_visited_latency_ = kArm64DataProcWithShifterOpLatency + 2;
+}
+
 void SchedulingLatencyVisitorARM64::VisitMultiplyAccumulate(HMultiplyAccumulate* ATTRIBUTE_UNUSED) {
   last_visited_latency_ = kArm64MulIntegerLatency;
 }
@@ -192,5 +200,148 @@
   }
 }
 
+void SchedulingLatencyVisitorARM64::HandleSimpleArithmeticSIMD(HVecOperation *instr) {
+  if (Primitive::IsFloatingPointType(instr->GetPackedType())) {
+    last_visited_latency_ = kArm64SIMDFloatingPointOpLatency;
+  } else {
+    last_visited_latency_ = kArm64SIMDIntegerOpLatency;
+  }
+}
+
+void SchedulingLatencyVisitorARM64::VisitVecReplicateScalar(
+    HVecReplicateScalar* instr ATTRIBUTE_UNUSED) {
+  last_visited_latency_ = kArm64SIMDReplicateOpLatency;
+}
+
+void SchedulingLatencyVisitorARM64::VisitVecSetScalars(HVecSetScalars* instr) {
+  LOG(FATAL) << "Unsupported SIMD instruction " << instr->GetId();
+}
+
+void SchedulingLatencyVisitorARM64::VisitVecSumReduce(HVecSumReduce* instr) {
+  LOG(FATAL) << "Unsupported SIMD instruction " << instr->GetId();
+}
+
+void SchedulingLatencyVisitorARM64::VisitVecCnv(HVecCnv* instr ATTRIBUTE_UNUSED) {
+  last_visited_latency_ = kArm64SIMDTypeConversionInt2FPLatency;
+}
+
+void SchedulingLatencyVisitorARM64::VisitVecNeg(HVecNeg* instr) {
+  HandleSimpleArithmeticSIMD(instr);
+}
+
+void SchedulingLatencyVisitorARM64::VisitVecAbs(HVecAbs* instr) {
+  HandleSimpleArithmeticSIMD(instr);
+}
+
+void SchedulingLatencyVisitorARM64::VisitVecNot(HVecNot* instr) {
+  if (instr->GetPackedType() == Primitive::kPrimBoolean) {
+    last_visited_internal_latency_ = kArm64SIMDIntegerOpLatency;
+  }
+  last_visited_latency_ = kArm64SIMDIntegerOpLatency;
+}
+
+void SchedulingLatencyVisitorARM64::VisitVecAdd(HVecAdd* instr) {
+  HandleSimpleArithmeticSIMD(instr);
+}
+
+void SchedulingLatencyVisitorARM64::VisitVecHalvingAdd(HVecHalvingAdd* instr) {
+  HandleSimpleArithmeticSIMD(instr);
+}
+
+void SchedulingLatencyVisitorARM64::VisitVecSub(HVecSub* instr) {
+  HandleSimpleArithmeticSIMD(instr);
+}
+
+void SchedulingLatencyVisitorARM64::VisitVecMul(HVecMul* instr) {
+  if (Primitive::IsFloatingPointType(instr->GetPackedType())) {
+    last_visited_latency_ = kArm64SIMDMulFloatingPointLatency;
+  } else {
+    last_visited_latency_ = kArm64SIMDMulIntegerLatency;
+  }
+}
+
+void SchedulingLatencyVisitorARM64::VisitVecDiv(HVecDiv* instr) {
+  if (instr->GetPackedType() == Primitive::kPrimFloat) {
+    last_visited_latency_ = kArm64SIMDDivFloatLatency;
+  } else {
+    DCHECK(instr->GetPackedType() == Primitive::kPrimDouble);
+    last_visited_latency_ = kArm64SIMDDivDoubleLatency;
+  }
+}
+
+void SchedulingLatencyVisitorARM64::VisitVecMin(HVecMin* instr) {
+  HandleSimpleArithmeticSIMD(instr);
+}
+
+void SchedulingLatencyVisitorARM64::VisitVecMax(HVecMax* instr) {
+  HandleSimpleArithmeticSIMD(instr);
+}
+
+void SchedulingLatencyVisitorARM64::VisitVecAnd(HVecAnd* instr ATTRIBUTE_UNUSED) {
+  last_visited_latency_ = kArm64SIMDIntegerOpLatency;
+}
+
+void SchedulingLatencyVisitorARM64::VisitVecAndNot(HVecAndNot* instr) {
+  LOG(FATAL) << "Unsupported SIMD instruction " << instr->GetId();
+}
+
+void SchedulingLatencyVisitorARM64::VisitVecOr(HVecOr* instr ATTRIBUTE_UNUSED) {
+  last_visited_latency_ = kArm64SIMDIntegerOpLatency;
+}
+
+void SchedulingLatencyVisitorARM64::VisitVecXor(HVecXor* instr ATTRIBUTE_UNUSED) {
+  last_visited_latency_ = kArm64SIMDIntegerOpLatency;
+}
+
+void SchedulingLatencyVisitorARM64::VisitVecShl(HVecShl* instr) {
+  HandleSimpleArithmeticSIMD(instr);
+}
+
+void SchedulingLatencyVisitorARM64::VisitVecShr(HVecShr* instr) {
+  HandleSimpleArithmeticSIMD(instr);
+}
+
+void SchedulingLatencyVisitorARM64::VisitVecUShr(HVecUShr* instr) {
+  HandleSimpleArithmeticSIMD(instr);
+}
+
+void SchedulingLatencyVisitorARM64::VisitVecMultiplyAccumulate(
+    HVecMultiplyAccumulate* instr ATTRIBUTE_UNUSED) {
+  last_visited_latency_ = kArm64SIMDMulIntegerLatency;
+}
+
+void SchedulingLatencyVisitorARM64::HandleVecAddress(
+    HVecMemoryOperation* instruction,
+    size_t size ATTRIBUTE_UNUSED) {
+  HInstruction* index = instruction->InputAt(1);
+  if (!index->IsConstant()) {
+    last_visited_internal_latency_ += kArm64DataProcWithShifterOpLatency;
+  }
+}
+
+void SchedulingLatencyVisitorARM64::VisitVecLoad(HVecLoad* instr) {
+  last_visited_internal_latency_ = 0;
+  size_t size = Primitive::ComponentSize(instr->GetPackedType());
+
+  if (instr->GetPackedType() == Primitive::kPrimChar
+      && mirror::kUseStringCompression
+      && instr->IsStringCharAt()) {
+    // Set latencies for the uncompressed case.
+    last_visited_internal_latency_ += kArm64MemoryLoadLatency + kArm64BranchLatency;
+    HandleVecAddress(instr, size);
+    last_visited_latency_ = kArm64SIMDMemoryLoadLatency;
+  } else {
+    HandleVecAddress(instr, size);
+    last_visited_latency_ = kArm64SIMDMemoryLoadLatency;
+  }
+}
+
+void SchedulingLatencyVisitorARM64::VisitVecStore(HVecStore* instr) {
+  last_visited_internal_latency_ = 0;
+  size_t size = Primitive::ComponentSize(instr->GetPackedType());
+  HandleVecAddress(instr, size);
+  last_visited_latency_ = kArm64SIMDMemoryStoreLatency;
+}
+
 }  // namespace arm64
 }  // namespace art
diff --git a/compiler/optimizing/scheduler_arm64.h b/compiler/optimizing/scheduler_arm64.h
index 7a33720..63d5b7d 100644
--- a/compiler/optimizing/scheduler_arm64.h
+++ b/compiler/optimizing/scheduler_arm64.h
@@ -42,6 +42,18 @@
 static constexpr uint32_t kArm64MulFloatingPointLatency = 6;
 static constexpr uint32_t kArm64MulIntegerLatency = 6;
 static constexpr uint32_t kArm64TypeConversionFloatingPointIntegerLatency = 5;
+static constexpr uint32_t kArm64BranchLatency = kArm64IntegerOpLatency;
+
+static constexpr uint32_t kArm64SIMDFloatingPointOpLatency = 10;
+static constexpr uint32_t kArm64SIMDIntegerOpLatency = 6;
+static constexpr uint32_t kArm64SIMDMemoryLoadLatency = 10;
+static constexpr uint32_t kArm64SIMDMemoryStoreLatency = 6;
+static constexpr uint32_t kArm64SIMDMulFloatingPointLatency = 12;
+static constexpr uint32_t kArm64SIMDMulIntegerLatency = 12;
+static constexpr uint32_t kArm64SIMDReplicateOpLatency = 16;
+static constexpr uint32_t kArm64SIMDDivDoubleLatency = 60;
+static constexpr uint32_t kArm64SIMDDivFloatLatency = 30;
+static constexpr uint32_t kArm64SIMDTypeConversionInt2FPLatency = 10;
 
 class SchedulingLatencyVisitorARM64 : public SchedulingLatencyVisitor {
  public:
@@ -52,29 +64,54 @@
 
 // We add a second unused parameter to be able to use this macro like the others
 // defined in `nodes.h`.
-#define FOR_EACH_SCHEDULED_COMMON_INSTRUCTION(M) \
-  M(ArrayGet         , unused)                   \
-  M(ArrayLength      , unused)                   \
-  M(ArraySet         , unused)                   \
-  M(BinaryOperation  , unused)                   \
-  M(BoundsCheck      , unused)                   \
-  M(Div              , unused)                   \
-  M(InstanceFieldGet , unused)                   \
-  M(InstanceOf       , unused)                   \
-  M(Invoke           , unused)                   \
-  M(LoadString       , unused)                   \
-  M(Mul              , unused)                   \
-  M(NewArray         , unused)                   \
-  M(NewInstance      , unused)                   \
-  M(Rem              , unused)                   \
-  M(StaticFieldGet   , unused)                   \
-  M(SuspendCheck     , unused)                   \
-  M(TypeConversion   , unused)
+#define FOR_EACH_SCHEDULED_COMMON_INSTRUCTION(M)     \
+  M(ArrayGet             , unused)                   \
+  M(ArrayLength          , unused)                   \
+  M(ArraySet             , unused)                   \
+  M(BinaryOperation      , unused)                   \
+  M(BoundsCheck          , unused)                   \
+  M(Div                  , unused)                   \
+  M(InstanceFieldGet     , unused)                   \
+  M(InstanceOf           , unused)                   \
+  M(Invoke               , unused)                   \
+  M(LoadString           , unused)                   \
+  M(Mul                  , unused)                   \
+  M(NewArray             , unused)                   \
+  M(NewInstance          , unused)                   \
+  M(Rem                  , unused)                   \
+  M(StaticFieldGet       , unused)                   \
+  M(SuspendCheck         , unused)                   \
+  M(TypeConversion       , unused)                   \
+  M(VecReplicateScalar   , unused)                   \
+  M(VecSetScalars        , unused)                   \
+  M(VecSumReduce         , unused)                   \
+  M(VecCnv               , unused)                   \
+  M(VecNeg               , unused)                   \
+  M(VecAbs               , unused)                   \
+  M(VecNot               , unused)                   \
+  M(VecAdd               , unused)                   \
+  M(VecHalvingAdd        , unused)                   \
+  M(VecSub               , unused)                   \
+  M(VecMul               , unused)                   \
+  M(VecDiv               , unused)                   \
+  M(VecMin               , unused)                   \
+  M(VecMax               , unused)                   \
+  M(VecAnd               , unused)                   \
+  M(VecAndNot            , unused)                   \
+  M(VecOr                , unused)                   \
+  M(VecXor               , unused)                   \
+  M(VecShl               , unused)                   \
+  M(VecShr               , unused)                   \
+  M(VecUShr              , unused)                   \
+  M(VecMultiplyAccumulate, unused)                   \
+  M(VecLoad              , unused)                   \
+  M(VecStore             , unused)
 
 #define FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(M) \
   M(BitwiseNegatedRight, unused)                 \
   M(MultiplyAccumulate, unused)                  \
   M(IntermediateAddress, unused)                 \
+  M(IntermediateAddressIndex, unused)            \
   M(DataProcWithShifterOp, unused)
 
 #define DECLARE_VISIT_INSTRUCTION(type, unused)  \
@@ -85,6 +122,10 @@
   FOR_EACH_CONCRETE_INSTRUCTION_ARM64(DECLARE_VISIT_INSTRUCTION)
 
 #undef DECLARE_VISIT_INSTRUCTION
+
+ private:
+  void HandleSimpleArithmeticSIMD(HVecOperation *instr);
+  void HandleVecAddress(HVecMemoryOperation* instruction, size_t size);
 };
 
 class HSchedulerARM64 : public HScheduler {
@@ -101,6 +142,8 @@
         return true;
       FOR_EACH_CONCRETE_INSTRUCTION_ARM64(CASE_INSTRUCTION_KIND)
         return true;
+      FOR_EACH_SCHEDULED_COMMON_INSTRUCTION(CASE_INSTRUCTION_KIND)
+        return true;
       default:
         return HScheduler::IsSchedulable(instruction);
     }
diff --git a/compiler/optimizing/sharpening.cc b/compiler/optimizing/sharpening.cc
index 7b8104b..106b709 100644
--- a/compiler/optimizing/sharpening.cc
+++ b/compiler/optimizing/sharpening.cc
@@ -159,7 +159,7 @@
                                                        CompilerDriver* compiler_driver,
                                                        const DexCompilationUnit& dex_compilation_unit) {
   Handle<mirror::Class> klass = load_class->GetClass();
-  DCHECK(load_class->GetLoadKind() == HLoadClass::LoadKind::kDexCacheViaMethod ||
+  DCHECK(load_class->GetLoadKind() == HLoadClass::LoadKind::kRuntimeCall ||
          load_class->GetLoadKind() == HLoadClass::LoadKind::kReferrersClass)
       << load_class->GetLoadKind();
   DCHECK(!load_class->IsInBootImage()) << "HLoadClass should not be optimized before sharpening.";
@@ -185,7 +185,7 @@
       DCHECK(!runtime->UseJitCompilation());
       if (!compiler_driver->GetSupportBootImageFixup()) {
         // compiler_driver_test. Do not sharpen.
-        desired_load_kind = HLoadClass::LoadKind::kDexCacheViaMethod;
+        desired_load_kind = HLoadClass::LoadKind::kRuntimeCall;
       } else if ((klass != nullptr) &&
                  compiler_driver->IsImageClass(dex_file.StringByTypeIdx(type_index))) {
         is_in_boot_image = true;
@@ -210,7 +210,7 @@
           // this `HLoadClass` hasn't been executed in the interpreter.
           // Fallback to the dex cache.
           // TODO(ngeoffray): Generate HDeoptimize instead.
-          desired_load_kind = HLoadClass::LoadKind::kDexCacheViaMethod;
+          desired_load_kind = HLoadClass::LoadKind::kRuntimeCall;
         }
       } else if (is_in_boot_image && !codegen->GetCompilerOptions().GetCompilePic()) {
         // AOT app compilation. Check if the class is in the boot image.
@@ -229,7 +229,7 @@
   }
 
   if (!IsSameDexFile(load_class->GetDexFile(), *dex_compilation_unit.GetDexFile())) {
-    if ((load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) ||
+    if ((load_kind == HLoadClass::LoadKind::kRuntimeCall) ||
         (load_kind == HLoadClass::LoadKind::kBssEntry)) {
       // We actually cannot reference this class, we're forced to bail.
       // We cannot reference this class with Bss, as the entrypoint will lookup the class
@@ -241,7 +241,7 @@
 }
 
 void HSharpening::ProcessLoadString(HLoadString* load_string) {
-  DCHECK_EQ(load_string->GetLoadKind(), HLoadString::LoadKind::kDexCacheViaMethod);
+  DCHECK_EQ(load_string->GetLoadKind(), HLoadString::LoadKind::kRuntimeCall);
 
   const DexFile& dex_file = load_string->GetDexFile();
   dex::StringIndex string_index = load_string->GetStringIndex();
@@ -268,7 +268,7 @@
         desired_load_kind = HLoadString::LoadKind::kBootImageLinkTimePcRelative;
       } else {
         // compiler_driver_test. Do not sharpen.
-        desired_load_kind = HLoadString::LoadKind::kDexCacheViaMethod;
+        desired_load_kind = HLoadString::LoadKind::kRuntimeCall;
       }
     } else if (runtime->UseJitCompilation()) {
       DCHECK(!codegen_->GetCompilerOptions().GetCompilePic());
@@ -280,7 +280,7 @@
           desired_load_kind = HLoadString::LoadKind::kJitTableAddress;
         }
       } else {
-        desired_load_kind = HLoadString::LoadKind::kDexCacheViaMethod;
+        desired_load_kind = HLoadString::LoadKind::kRuntimeCall;
       }
     } else {
       // AOT app compilation. Try to lookup the string without allocating if not found.
diff --git a/compiler/utils/dedupe_set_test.cc b/compiler/utils/dedupe_set_test.cc
index 4c0979e..b390508 100644
--- a/compiler/utils/dedupe_set_test.cc
+++ b/compiler/utils/dedupe_set_test.cc
@@ -23,7 +23,7 @@
 #include "base/array_ref.h"
 #include "dedupe_set-inl.h"
 #include "gtest/gtest.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 
 namespace art {
 
diff --git a/compiler/utils/mips64/assembler_mips64.cc b/compiler/utils/mips64/assembler_mips64.cc
index c03b98c..b8b800a 100644
--- a/compiler/utils/mips64/assembler_mips64.cc
+++ b/compiler/utils/mips64/assembler_mips64.cc
@@ -1795,6 +1795,17 @@
   EmitMsa3R(0x5, 0x3, wt, ws, wd, 0x14);
 }
 
+void Mips64Assembler::ReplicateFPToVectorRegister(VectorRegister dst,
+                                                  FpuRegister src,
+                                                  bool is_double) {
+  // Float or double in FPU register Fx can be considered as 0th element in vector register Wx.
+  if (is_double) {
+    SplatiD(dst, static_cast<VectorRegister>(src), 0);
+  } else {
+    SplatiW(dst, static_cast<VectorRegister>(src), 0);
+  }
+}
+
 void Mips64Assembler::LoadConst32(GpuRegister rd, int32_t value) {
   TemplateLoadConst32(this, rd, value);
 }
diff --git a/compiler/utils/mips64/assembler_mips64.h b/compiler/utils/mips64/assembler_mips64.h
index c92cf4c..9b40645 100644
--- a/compiler/utils/mips64/assembler_mips64.h
+++ b/compiler/utils/mips64/assembler_mips64.h
@@ -774,6 +774,9 @@
   void IlvrW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
   void IlvrD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
 
+  // Helper for replicating floating point value in all destination elements.
+  void ReplicateFPToVectorRegister(VectorRegister dst, FpuRegister src, bool is_double);
+
   // Higher level composite instructions.
   int InstrCountForLoadReplicatedConst32(int64_t);
   void LoadConst32(GpuRegister rd, int32_t value);
diff --git a/compiler/utils/swap_space.cc b/compiler/utils/swap_space.cc
index a1eb08e..4f6c915 100644
--- a/compiler/utils/swap_space.cc
+++ b/compiler/utils/swap_space.cc
@@ -23,7 +23,7 @@
 #include "base/logging.h"
 #include "base/macros.h"
 #include "base/mutex.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 
 namespace art {
 
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index 53e73c3..dcdf3bc 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -74,6 +74,7 @@
 #include "mirror/class_loader.h"
 #include "mirror/object-inl.h"
 #include "mirror/object_array-inl.h"
+#include "oat_file.h"
 #include "oat_file_assistant.h"
 #include "oat_writer.h"
 #include "os.h"
@@ -477,6 +478,16 @@
                                        android::base::LogId::DEFAULT,
                                        LogSeverity::FATAL,
                                        message.c_str());
+    // If we're on the host, try to dump all threads to get a sense of what's going on. This is
+    // restricted to the host as the dump may itself go bad.
+    // TODO: Use a double watchdog timeout, so we can enable this on-device.
+    if (!kIsTargetBuild && Runtime::Current() != nullptr) {
+      Runtime::Current()->AttachCurrentThread("Watchdog thread attached for dumping",
+                                              true,
+                                              nullptr,
+                                              false);
+      Runtime::Current()->DumpForSigQuit(std::cerr);
+    }
     exit(1);
   }
 
diff --git a/dex2oat/dex2oat_test.cc b/dex2oat/dex2oat_test.cc
index 6420aa8..b604e8b 100644
--- a/dex2oat/dex2oat_test.cc
+++ b/dex2oat/dex2oat_test.cc
@@ -28,6 +28,7 @@
 
 #include "base/logging.h"
 #include "base/macros.h"
+#include "base/mutex-inl.h"
 #include "dex_file-inl.h"
 #include "dex2oat_environment_test.h"
 #include "dex2oat_return_codes.h"
@@ -38,6 +39,8 @@
 
 namespace art {
 
+static constexpr size_t kMaxMethodIds = 65535;
+
 using android::base::StringPrintf;
 
 class Dex2oatTest : public Dex2oatEnvironmentTest {
@@ -612,7 +615,7 @@
     ProfileCompilationInfo info;
     std::string profile_key = ProfileCompilationInfo::GetProfileDexFileKey(dex_location);
     for (size_t i = 0; i < num_classes; ++i) {
-      info.AddClassIndex(profile_key, checksum, dex::TypeIndex(1 + i));
+      info.AddClassIndex(profile_key, checksum, dex::TypeIndex(1 + i), kMaxMethodIds);
     }
     bool result = info.Save(profile_test_fd);
     close(profile_test_fd);
diff --git a/dexlayout/dexlayout.cc b/dexlayout/dexlayout.cc
index 205c0d1..db22767 100644
--- a/dexlayout/dexlayout.cc
+++ b/dexlayout/dexlayout.cc
@@ -1557,7 +1557,7 @@
             (method->GetAccessFlags() & kAccConstructor) != 0 &&
             (method->GetAccessFlags() & kAccStatic) != 0;
         const bool method_executed = is_clinit ||
-            info_->ContainsMethod(MethodReference(dex_file, method_id->GetIndex()));
+            info_->IsStartupOrHotMethod(MethodReference(dex_file, method_id->GetIndex()));
         if (!method_executed) {
           continue;
         }
@@ -1699,7 +1699,7 @@
             (method->GetAccessFlags() & kAccConstructor) != 0 &&
             (method->GetAccessFlags() & kAccStatic) != 0;
         const bool is_method_executed = is_clinit ||
-            info_->ContainsMethod(MethodReference(dex_file, method_id->GetIndex()));
+            info_->IsStartupOrHotMethod(MethodReference(dex_file, method_id->GetIndex()));
         code_items[is_method_executed
                        ? CodeItemKind::kMethodExecuted
                        : CodeItemKind::kMethodNotExecuted]
diff --git a/dexlayout/dexlayout_test.cc b/dexlayout/dexlayout_test.cc
index 1d09a7f..6fe8eeb 100644
--- a/dexlayout/dexlayout_test.cc
+++ b/dexlayout/dexlayout_test.cc
@@ -341,18 +341,30 @@
         if ((i & 3) != 0) {
           pfi.AddMethodIndex(dex_location,
                              dex_file->GetLocationChecksum(),
-                             i);
+                             i,
+                             dex_file->NumMethodIds());
+          ++profile_methods;
+        } else if ((i & 2) != 0) {
+          pfi.AddSampledMethod(/*startup*/true,
+                               dex_location,
+                               dex_file->GetLocationChecksum(),
+                               i,
+                               dex_file->NumMethodIds());
           ++profile_methods;
         }
       }
       DexCacheResolvedClasses cur_classes(dex_location,
                                           dex_location,
-                                          dex_file->GetLocationChecksum());
+                                          dex_file->GetLocationChecksum(),
+                                          dex_file->NumMethodIds());
       // Add every even class too.
       for (uint32_t i = 0; i < dex_file->NumClassDefs(); i += 1) {
-        cur_classes.AddClass(dex_file->GetClassDef(i).class_idx_);
-        ++profile_classes;
+        if ((i & 2) == 0) {
+          cur_classes.AddClass(dex_file->GetClassDef(i).class_idx_);
+          ++profile_classes;
+        }
       }
+      classes.insert(cur_classes);
     }
     pfi.AddMethodsAndClasses(pmis, classes);
     // Write to provided file.
diff --git a/imgdiag/imgdiag_test.cc b/imgdiag/imgdiag_test.cc
index 0d46b2e..c948d3c 100644
--- a/imgdiag/imgdiag_test.cc
+++ b/imgdiag/imgdiag_test.cc
@@ -28,6 +28,7 @@
 #include "runtime/utils.h"
 #include "runtime/gc/space/image_space.h"
 #include "runtime/gc/heap.h"
+#include "runtime/runtime.h"
 
 #include <sys/types.h>
 #include <unistd.h>
diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc
index f07e0f9..a79b408 100644
--- a/oatdump/oatdump.cc
+++ b/oatdump/oatdump.cc
@@ -63,6 +63,7 @@
 #include "safe_map.h"
 #include "scoped_thread_state_change-inl.h"
 #include "ScopedLocalRef.h"
+#include "stack.h"
 #include "stack_map.h"
 #include "string_reference.h"
 #include "thread_list.h"
diff --git a/profman/profile_assistant_test.cc b/profman/profile_assistant_test.cc
index 1c32898..ccf9ac6 100644
--- a/profman/profile_assistant_test.cc
+++ b/profman/profile_assistant_test.cc
@@ -30,6 +30,8 @@
 
 namespace art {
 
+static constexpr size_t kMaxMethodIds = 65535;
+
 class ProfileAssistantTest : public CommonRuntimeTest {
  public:
   void PostRuntimeCreate() OVERRIDE {
@@ -56,15 +58,18 @@
           GetOfflineProfileMethodInfo(dex_location1, dex_location_checksum1,
                                       dex_location2, dex_location_checksum2);
       if (reverse_dex_write_order) {
-        ASSERT_TRUE(info->AddMethod(dex_location2, dex_location_checksum2, i, pmi));
-        ASSERT_TRUE(info->AddMethod(dex_location1, dex_location_checksum1, i, pmi));
+        ASSERT_TRUE(info->AddMethod(dex_location2, dex_location_checksum2, i, kMaxMethodIds, pmi));
+        ASSERT_TRUE(info->AddMethod(dex_location1, dex_location_checksum1, i, kMaxMethodIds, pmi));
       } else {
-        ASSERT_TRUE(info->AddMethod(dex_location1, dex_location_checksum1, i, pmi));
-        ASSERT_TRUE(info->AddMethod(dex_location2, dex_location_checksum2, i, pmi));
+        ASSERT_TRUE(info->AddMethod(dex_location1, dex_location_checksum1, i, kMaxMethodIds, pmi));
+        ASSERT_TRUE(info->AddMethod(dex_location2, dex_location_checksum2, i, kMaxMethodIds, pmi));
       }
     }
     for (uint16_t i = 0; i < number_of_classes; i++) {
-      ASSERT_TRUE(info->AddClassIndex(dex_location1, dex_location_checksum1, dex::TypeIndex(i)));
+      ASSERT_TRUE(info->AddClassIndex(dex_location1,
+                                      dex_location_checksum1,
+                                      dex::TypeIndex(i),
+                                      kMaxMethodIds));
     }
 
     ASSERT_TRUE(info->Save(GetFd(profile)));
@@ -72,6 +77,29 @@
     ASSERT_TRUE(profile.GetFile()->ResetOffset());
   }
 
+  void SetupBasicProfile(const std::string& id,
+                         uint32_t checksum,
+                         uint16_t number_of_methods,
+                         const std::vector<uint32_t> hot_methods,
+                         const std::vector<uint32_t> startup_methods,
+                         const std::vector<uint32_t> post_startup_methods,
+                         const ScratchFile& profile,
+                         ProfileCompilationInfo* info) {
+    std::string dex_location = "location1" + id;
+    for (uint32_t idx : hot_methods) {
+      info->AddMethodIndex(dex_location, checksum, idx, number_of_methods);
+    }
+    for (uint32_t idx : startup_methods) {
+      info->AddSampledMethod(/*startup*/true, dex_location, checksum, idx, number_of_methods);
+    }
+    for (uint32_t idx : post_startup_methods) {
+      info->AddSampledMethod(/*startup*/false, dex_location, checksum, idx, number_of_methods);
+    }
+    ASSERT_TRUE(info->Save(GetFd(profile)));
+    ASSERT_EQ(0, profile.GetFile()->Flush());
+    ASSERT_TRUE(profile.GetFile()->ResetOffset());
+  }
+
   // Creates an inline cache which will be destructed at the end of the test.
   ProfileCompilationInfo::InlineCacheMap* CreateInlineCacheMap() {
     used_inline_caches.emplace_back(new ProfileCompilationInfo::InlineCacheMap(
@@ -84,8 +112,8 @@
         const std::string& dex_location2, uint32_t dex_checksum2) {
     ProfileCompilationInfo::InlineCacheMap* ic_map = CreateInlineCacheMap();
     ProfileCompilationInfo::OfflineProfileMethodInfo pmi(ic_map);
-    pmi.dex_references.emplace_back(dex_location1, dex_checksum1);
-    pmi.dex_references.emplace_back(dex_location2, dex_checksum2);
+    pmi.dex_references.emplace_back(dex_location1, dex_checksum1, kMaxMethodIds);
+    pmi.dex_references.emplace_back(dex_location2, dex_checksum2, kMaxMethodIds);
 
     // Monomorphic
     for (uint16_t dex_pc = 0; dex_pc < 11; dex_pc++) {
@@ -193,28 +221,42 @@
     return true;
   }
 
-  bool DumpClassesAndMethods(const std::string& filename, std::string* file_contents) {
-    ScratchFile class_names_file;
+  bool RunProfman(const std::string& filename,
+                  std::vector<std::string>& extra_args,
+                  std::string* output) {
+    ScratchFile output_file;
     std::string profman_cmd = GetProfmanCmd();
     std::vector<std::string> argv_str;
     argv_str.push_back(profman_cmd);
-    argv_str.push_back("--dump-classes-and-methods");
+    argv_str.insert(argv_str.end(), extra_args.begin(), extra_args.end());
     argv_str.push_back("--profile-file=" + filename);
     argv_str.push_back("--apk=" + GetLibCoreDexFileNames()[0]);
     argv_str.push_back("--dex-location=" + GetLibCoreDexFileNames()[0]);
-    argv_str.push_back("--dump-output-to-fd=" + std::to_string(GetFd(class_names_file)));
+    argv_str.push_back("--dump-output-to-fd=" + std::to_string(GetFd(output_file)));
     std::string error;
     EXPECT_EQ(ExecAndReturnCode(argv_str, &error), 0);
-    File* file = class_names_file.GetFile();
+    File* file = output_file.GetFile();
     EXPECT_EQ(0, file->Flush());
     EXPECT_TRUE(file->ResetOffset());
     int64_t length = file->GetLength();
     std::unique_ptr<char[]> buf(new char[length]);
     EXPECT_EQ(file->Read(buf.get(), length, 0), length);
-    *file_contents = std::string(buf.get(), length);
+    *output = std::string(buf.get(), length);
     return true;
   }
 
+  bool DumpClassesAndMethods(const std::string& filename, std::string* file_contents) {
+    std::vector<std::string> extra_args;
+    extra_args.push_back("--dump-classes-and-methods");
+    return RunProfman(filename, extra_args, file_contents);
+  }
+
+  bool DumpOnly(const std::string& filename, std::string* file_contents) {
+    std::vector<std::string> extra_args;
+    extra_args.push_back("--dump-only");
+    return RunProfman(filename, extra_args, file_contents);
+  }
+
   bool CreateAndDump(const std::string& input_file_contents,
                      std::string* output_file_contents) {
     ScratchFile profile_file;
@@ -520,10 +562,11 @@
 TEST_F(ProfileAssistantTest, TestProfileCreationAllMatch) {
   // Class names put here need to be in sorted order.
   std::vector<std::string> class_names = {
+    "HLjava/lang/Object;-><init>()V",
     "Ljava/lang/Comparable;",
     "Ljava/lang/Math;",
     "Ljava/lang/Object;",
-    "Ljava/lang/Object;-><init>()V"
+    "SPLjava/lang/Comparable;->compareTo(Ljava/lang/Object;)I",
   };
   std::string file_contents;
   for (std::string& class_name : class_names) {
@@ -807,15 +850,80 @@
 
   // Verify that the start-up classes contain the invalid class.
   std::set<dex::TypeIndex> classes;
-  std::set<uint16_t> methods;
-  ASSERT_TRUE(info.GetClassesAndMethods(*dex_file, &classes, &methods));
+  std::set<uint16_t> hot_methods;
+  std::set<uint16_t> startup_methods;
+  std::set<uint16_t> post_start_methods;
+  ASSERT_TRUE(info.GetClassesAndMethods(*dex_file,
+                                        &classes,
+                                        &hot_methods,
+                                        &startup_methods,
+                                        &post_start_methods));
   ASSERT_EQ(1u, classes.size());
   ASSERT_TRUE(classes.find(invalid_class_index) != classes.end());
 
   // Verify that the invalid method is in the profile.
-  ASSERT_EQ(2u, methods.size());
+  ASSERT_EQ(2u, hot_methods.size());
   uint16_t invalid_method_index = std::numeric_limits<uint16_t>::max() - 1;
-  ASSERT_TRUE(methods.find(invalid_method_index) != methods.end());
+  ASSERT_TRUE(hot_methods.find(invalid_method_index) != hot_methods.end());
+}
+
+TEST_F(ProfileAssistantTest, DumpOnly) {
+  ScratchFile profile;
+
+  const uint32_t kNumberOfMethods = 64;
+  std::vector<uint32_t> hot_methods;
+  std::vector<uint32_t> startup_methods;
+  std::vector<uint32_t> post_startup_methods;
+  for (size_t i = 0; i < kNumberOfMethods; ++i) {
+    if (i % 2 == 0) {
+      hot_methods.push_back(i);
+    }
+    if (i % 3 == 1) {
+      startup_methods.push_back(i);
+    }
+    if (i % 4 == 2) {
+      post_startup_methods.push_back(i);
+    }
+  }
+  EXPECT_GT(hot_methods.size(), 0u);
+  EXPECT_GT(startup_methods.size(), 0u);
+  EXPECT_GT(post_startup_methods.size(), 0u);
+  ProfileCompilationInfo info1;
+  SetupBasicProfile("p1",
+                    1,
+                    kNumberOfMethods,
+                    hot_methods,
+                    startup_methods,
+                    post_startup_methods,
+                    profile,
+                    &info1);
+  std::string output;
+  DumpOnly(profile.GetFilename(), &output);
+  const size_t hot_offset = output.find("hot methods:");
+  const size_t startup_offset = output.find("startup methods:");
+  const size_t post_startup_offset = output.find("post startup methods:");
+  const size_t classes_offset = output.find("classes:");
+  ASSERT_NE(hot_offset, std::string::npos);
+  ASSERT_NE(startup_offset, std::string::npos);
+  ASSERT_NE(post_startup_offset, std::string::npos);
+  ASSERT_LT(hot_offset, startup_offset);
+  ASSERT_LT(startup_offset, post_startup_offset);
+  // Check the actual contents of the dump by looking at the offsets of the methods.
+  for (uint32_t m : hot_methods) {
+    const size_t pos = output.find(std::to_string(m) + "[],", hot_offset);
+    ASSERT_NE(pos, std::string::npos);
+    EXPECT_LT(pos, startup_offset);
+  }
+  for (uint32_t m : startup_methods) {
+    const size_t pos = output.find(std::to_string(m) + ",", startup_offset);
+    ASSERT_NE(pos, std::string::npos);
+    EXPECT_LT(pos, post_startup_offset);
+  }
+  for (uint32_t m : post_startup_methods) {
+    const size_t pos = output.find(std::to_string(m) + ",", post_startup_offset);
+    ASSERT_NE(pos, std::string::npos);
+    EXPECT_LT(pos, classes_offset);
+  }
 }
 
 }  // namespace art
diff --git a/profman/profman.cc b/profman/profman.cc
index afc2105..adef0d0 100644
--- a/profman/profman.cc
+++ b/profman/profman.cc
@@ -43,6 +43,7 @@
 #include "runtime.h"
 #include "type_reference.h"
 #include "utils.h"
+#include "type_reference.h"
 #include "zip_archive.h"
 
 namespace art {
@@ -150,6 +151,9 @@
 static constexpr char kProfileParsingInlineChacheSep = '+';
 static constexpr char kProfileParsingTypeSep = ',';
 static constexpr char kProfileParsingFirstCharInSignature = '(';
+static constexpr char kMethodFlagStringHot = 'H';
+static constexpr char kMethodFlagStringStartup = 'S';
+static constexpr char kMethodFlagStringPostStartup = 'P';
 
 // TODO(calin): This class has grown too much from its initial design. Split the functionality
 // into smaller, more contained pieces.
@@ -426,18 +430,42 @@
     }
     for (const std::unique_ptr<const DexFile>& dex_file : *dex_files) {
       std::set<dex::TypeIndex> class_types;
-      std::set<uint16_t> methods;
-      if (profile_info.GetClassesAndMethods(*dex_file.get(), &class_types, &methods)) {
+      std::set<uint16_t> hot_methods;
+      std::set<uint16_t> startup_methods;
+      std::set<uint16_t> post_startup_methods;
+      std::set<uint16_t> combined_methods;
+      if (profile_info.GetClassesAndMethods(*dex_file.get(),
+                                            &class_types,
+                                            &hot_methods,
+                                            &startup_methods,
+                                            &post_startup_methods)) {
         for (const dex::TypeIndex& type_index : class_types) {
           const DexFile::TypeId& type_id = dex_file->GetTypeId(type_index);
           out_lines->insert(std::string(dex_file->GetTypeDescriptor(type_id)));
         }
-        for (uint16_t dex_method_idx : methods) {
+        combined_methods = hot_methods;
+        combined_methods.insert(startup_methods.begin(), startup_methods.end());
+        combined_methods.insert(post_startup_methods.begin(), post_startup_methods.end());
+        for (uint16_t dex_method_idx : combined_methods) {
           const DexFile::MethodId& id = dex_file->GetMethodId(dex_method_idx);
           std::string signature_string(dex_file->GetMethodSignature(id).ToString());
           std::string type_string(dex_file->GetTypeDescriptor(dex_file->GetTypeId(id.class_idx_)));
           std::string method_name(dex_file->GetMethodName(id));
-          out_lines->insert(type_string + kMethodSep + method_name + signature_string);
+          std::string flags_string;
+          if (hot_methods.find(dex_method_idx) != hot_methods.end()) {
+            flags_string += kMethodFlagStringHot;
+          }
+          if (startup_methods.find(dex_method_idx) != startup_methods.end()) {
+            flags_string += kMethodFlagStringStartup;
+          }
+          if (post_startup_methods.find(dex_method_idx) != post_startup_methods.end()) {
+            flags_string += kMethodFlagStringPostStartup;
+          }
+          out_lines->insert(flags_string +
+                            type_string +
+                            kMethodSep +
+                            method_name +
+                            signature_string);
         }
       }
     }
@@ -461,7 +489,7 @@
     return true;
   }
 
-  int DumpClasses() {
+  int DumpClassesAndMethods() {
     // Validate that at least one profile file or reference was specified.
     if (profile_files_.empty() && profile_files_fd_.empty() &&
         reference_profile_file_.empty() && !FdIsValid(reference_profile_file_fd_)) {
@@ -694,11 +722,30 @@
                    /*out*/ProfileCompilationInfo* profile) {
     std::string klass;
     std::string method_str;
-    size_t method_sep_index = line.find(kMethodSep);
+    bool is_hot = false;
+    bool is_startup = false;
+    bool is_post_startup = false;
+    const size_t method_sep_index = line.find(kMethodSep, 0);
     if (method_sep_index == std::string::npos) {
-      klass = line;
+      klass = line.substr(0);
     } else {
-      klass = line.substr(0, method_sep_index);
+      // The method prefix flags are only valid for method strings.
+      size_t start_index = 0;
+      while (start_index < line.size() && line[start_index] != 'L') {
+        const char c = line[start_index];
+        if (c == kMethodFlagStringHot) {
+          is_hot = true;
+        } else if (c == kMethodFlagStringStartup) {
+          is_startup = true;
+        } else if (c == kMethodFlagStringPostStartup) {
+          is_post_startup = true;
+        } else {
+          LOG(WARNING) << "Invalid flag " << c;
+          return false;
+        }
+        ++start_index;
+      }
+      klass = line.substr(start_index, method_sep_index - start_index);
       method_str = line.substr(method_sep_index + kMethodSep.size());
     }
 
@@ -715,7 +762,8 @@
       const auto& dex_resolved_classes = resolved_class_set.emplace(
             dex_file->GetLocation(),
             dex_file->GetBaseLocation(),
-            dex_file->GetLocationChecksum());
+            dex_file->GetLocationChecksum(),
+            dex_file->NumMethodIds());
       dex_resolved_classes.first->AddClass(class_ref.type_index);
       std::vector<ProfileMethodInfo> methods;
       if (method_str == kClassAllMethods) {
@@ -745,6 +793,9 @@
     std::string method_spec;
     std::vector<std::string> inline_cache_elems;
 
+    // If none of the flags are set, default to hot.
+    is_hot = is_hot || (!is_hot && !is_startup && !is_post_startup);
+
     std::vector<std::string> method_elems;
     bool is_missing_types = false;
     Split(method_str, kProfileParsingInlineChacheSep, &method_elems);
@@ -766,7 +817,6 @@
       return false;
     }
 
-    std::vector<ProfileMethodInfo> pmi;
     std::vector<ProfileMethodInfo::ProfileInlineCache> inline_caches;
     if (is_missing_types || !inline_cache_elems.empty()) {
       uint32_t dex_pc;
@@ -783,8 +833,29 @@
       }
       inline_caches.emplace_back(dex_pc, is_missing_types, classes);
     }
-    pmi.emplace_back(class_ref.dex_file, method_index, inline_caches);
-    profile->AddMethodsAndClasses(pmi, std::set<DexCacheResolvedClasses>());
+    ProfileMethodInfo pmi(class_ref.dex_file, method_index, inline_caches);
+    if (is_hot) {
+      profile->AddMethod(pmi);
+    }
+    if (is_startup) {
+      if (!profile->AddSampledMethod(/*is_startup*/ true,
+                                     pmi.dex_file->GetLocation(),
+                                     pmi.dex_file->GetLocationChecksum(),
+                                     method_index,
+                                     pmi.dex_file->NumMethodIds())) {
+        return false;
+      }
+      DCHECK(profile->IsStartupOrHotMethod(MethodReference(pmi.dex_file, method_index)));
+    }
+    if (is_post_startup) {
+      if (!profile->AddSampledMethod(/*is_startup*/ false,
+                                     pmi.dex_file->GetLocation(),
+                                     pmi.dex_file->GetLocationChecksum(),
+                                     method_index,
+                                     pmi.dex_file->NumMethodIds())) {
+        return false;
+      }
+    }
     return true;
   }
 
@@ -959,7 +1030,7 @@
     return profman.DumpProfileInfo();
   }
   if (profman.ShouldOnlyDumpClassesAndMethods()) {
-    return profman.DumpClasses();
+    return profman.DumpClassesAndMethods();
   }
   if (profman.ShouldCreateProfile()) {
     return profman.CreateProfile();
diff --git a/runtime/Android.bp b/runtime/Android.bp
index aa7dc65..c5508e3 100644
--- a/runtime/Android.bp
+++ b/runtime/Android.bp
@@ -54,6 +54,7 @@
         "compiler_filter.cc",
         "debugger.cc",
         "dex_file.cc",
+        "dex_file_tracking_registrar.cc",
         "dex_file_annotations.cc",
         "dex_file_verifier.cc",
         "dex_instruction.cc",
@@ -123,6 +124,7 @@
         "jni_internal.cc",
         "jobject_comparator.cc",
         "linear_alloc.cc",
+        "managed_stack.cc",
         "mem_map.cc",
         "memory_region.cc",
         "method_handles.cc",
diff --git a/runtime/arch/arm/context_arm.cc b/runtime/arch/arm/context_arm.cc
index 9cbec1e..817dcf5 100644
--- a/runtime/arch/arm/context_arm.cc
+++ b/runtime/arch/arm/context_arm.cc
@@ -18,7 +18,7 @@
 
 #include "base/bit_utils.h"
 #include "quick/quick_method_frame_info.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 
 namespace art {
 namespace arm {
diff --git a/runtime/arch/arm/fault_handler_arm.cc b/runtime/arch/arm/fault_handler_arm.cc
index 4c15450..b4bca01 100644
--- a/runtime/arch/arm/fault_handler_arm.cc
+++ b/runtime/arch/arm/fault_handler_arm.cc
@@ -25,7 +25,7 @@
 #include "base/logging.h"
 #include "base/macros.h"
 #include "globals.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 
 //
 // ARM specific fault handler functions.
diff --git a/runtime/arch/arm64/context_arm64.cc b/runtime/arch/arm64/context_arm64.cc
index d5d1ec7..a8f034e 100644
--- a/runtime/arch/arm64/context_arm64.cc
+++ b/runtime/arch/arm64/context_arm64.cc
@@ -20,7 +20,7 @@
 
 #include "base/bit_utils.h"
 #include "quick/quick_method_frame_info.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 
 namespace art {
 namespace arm64 {
diff --git a/runtime/arch/arm64/fault_handler_arm64.cc b/runtime/arch/arm64/fault_handler_arm64.cc
index dc4e8f3..0ead732 100644
--- a/runtime/arch/arm64/fault_handler_arm64.cc
+++ b/runtime/arch/arm64/fault_handler_arm64.cc
@@ -26,7 +26,7 @@
 #include "base/macros.h"
 #include "globals.h"
 #include "registers_arm64.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 
 extern "C" void art_quick_throw_stack_overflow();
 extern "C" void art_quick_throw_null_pointer_exception_from_signal();
diff --git a/runtime/arch/mips/fault_handler_mips.cc b/runtime/arch/mips/fault_handler_mips.cc
index 7072a8a..25e442c 100644
--- a/runtime/arch/mips/fault_handler_mips.cc
+++ b/runtime/arch/mips/fault_handler_mips.cc
@@ -24,7 +24,7 @@
 #include "globals.h"
 #include "quick_method_frame_info_mips.h"
 #include "registers_mips.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 
 extern "C" void art_quick_throw_stack_overflow();
 extern "C" void art_quick_throw_null_pointer_exception_from_signal();
diff --git a/runtime/arch/mips/instruction_set_features_mips.cc b/runtime/arch/mips/instruction_set_features_mips.cc
index 3c5afc2..6540b44 100644
--- a/runtime/arch/mips/instruction_set_features_mips.cc
+++ b/runtime/arch/mips/instruction_set_features_mips.cc
@@ -47,7 +47,7 @@
 static constexpr MipsLevel kRuntimeMipsLevel = MipsLevel::kBase;
 #endif
 
-static void GetFlagsFromCppDefined(bool* mips_isa_gte2, bool* r6, bool* fpu_32bit) {
+static void GetFlagsFromCppDefined(bool* mips_isa_gte2, bool* r6, bool* fpu_32bit, bool* msa) {
   // Override defaults based on compiler flags.
   if (kRuntimeMipsLevel >= MipsLevel::kR2) {
     *mips_isa_gte2 = true;
@@ -57,8 +57,10 @@
 
   if (kRuntimeMipsLevel >= MipsLevel::kR5) {
     *fpu_32bit = false;
+    *msa = true;
   } else {
     *fpu_32bit = true;
+    *msa = false;
   }
 
   if (kRuntimeMipsLevel >= MipsLevel::kR6) {
@@ -76,7 +78,8 @@
   bool fpu_32bit;
   bool mips_isa_gte2;
   bool r6;
-  GetFlagsFromCppDefined(&mips_isa_gte2, &r6, &fpu_32bit);
+  bool msa;
+  GetFlagsFromCppDefined(&mips_isa_gte2, &r6, &fpu_32bit, &msa);
 
   // Override defaults based on variant string.
   // Only care if it is R1, R2, R5 or R6 and we assume all CPUs will have a FP unit.
@@ -87,6 +90,7 @@
     r6 = (variant[kPrefixLength] >= '6');
     fpu_32bit = (variant[kPrefixLength] < '5');
     mips_isa_gte2 = (variant[kPrefixLength] >= '2');
+    msa = (variant[kPrefixLength] >= '5');
   } else if (variant == "default") {
     // Default variant has FPU, is gte2. This is the traditional setting.
     //
@@ -100,32 +104,57 @@
     LOG(WARNING) << "Unexpected CPU variant for Mips32 using defaults: " << variant;
   }
 
-  return MipsFeaturesUniquePtr(new MipsInstructionSetFeatures(fpu_32bit, mips_isa_gte2, r6));
+  return MipsFeaturesUniquePtr(new MipsInstructionSetFeatures(fpu_32bit, mips_isa_gte2, r6, msa));
 }
 
 MipsFeaturesUniquePtr MipsInstructionSetFeatures::FromBitmap(uint32_t bitmap) {
   bool fpu_32bit = (bitmap & kFpu32Bitfield) != 0;
   bool mips_isa_gte2 = (bitmap & kIsaRevGte2Bitfield) != 0;
   bool r6 = (bitmap & kR6) != 0;
-  return MipsFeaturesUniquePtr(new MipsInstructionSetFeatures(fpu_32bit, mips_isa_gte2, r6));
+  bool msa = (bitmap & kMsaBitfield) != 0;
+  return MipsFeaturesUniquePtr(new MipsInstructionSetFeatures(fpu_32bit, mips_isa_gte2, r6, msa));
 }
 
 MipsFeaturesUniquePtr MipsInstructionSetFeatures::FromCppDefines() {
   bool fpu_32bit;
   bool mips_isa_gte2;
   bool r6;
-  GetFlagsFromCppDefined(&mips_isa_gte2, &r6, &fpu_32bit);
+  bool msa;
+  GetFlagsFromCppDefined(&mips_isa_gte2, &r6, &fpu_32bit, &msa);
 
-  return MipsFeaturesUniquePtr(new MipsInstructionSetFeatures(fpu_32bit, mips_isa_gte2, r6));
+  return MipsFeaturesUniquePtr(new MipsInstructionSetFeatures(fpu_32bit, mips_isa_gte2, r6, msa));
 }
 
 MipsFeaturesUniquePtr MipsInstructionSetFeatures::FromCpuInfo() {
   bool fpu_32bit;
   bool mips_isa_gte2;
   bool r6;
-  GetFlagsFromCppDefined(&mips_isa_gte2, &r6, &fpu_32bit);
+  bool msa;
+  GetFlagsFromCppDefined(&mips_isa_gte2, &r6, &fpu_32bit, &msa);
 
-  return MipsFeaturesUniquePtr(new MipsInstructionSetFeatures(fpu_32bit, mips_isa_gte2, r6));
+  msa = false;
+
+  std::ifstream in("/proc/cpuinfo");
+  if (!in.fail()) {
+    while (!in.eof()) {
+      std::string line;
+      std::getline(in, line);
+      if (!in.eof()) {
+        LOG(INFO) << "cpuinfo line: " << line;
+        if (line.find("ASEs") != std::string::npos) {
+          LOG(INFO) << "found Application Specific Extensions";
+          if (line.find("msa") != std::string::npos) {
+            msa = true;
+          }
+        }
+      }
+    }
+    in.close();
+  } else {
+    LOG(ERROR) << "Failed to open /proc/cpuinfo";
+  }
+
+  return MipsFeaturesUniquePtr(new MipsInstructionSetFeatures(fpu_32bit, mips_isa_gte2, r6, msa));
 }
 
 MipsFeaturesUniquePtr MipsInstructionSetFeatures::FromHwcap() {
@@ -145,13 +174,15 @@
   const MipsInstructionSetFeatures* other_as_mips = other->AsMipsInstructionSetFeatures();
   return (fpu_32bit_ == other_as_mips->fpu_32bit_) &&
       (mips_isa_gte2_ == other_as_mips->mips_isa_gte2_) &&
-      (r6_ == other_as_mips->r6_);
+      (r6_ == other_as_mips->r6_) &&
+      (msa_ == other_as_mips->msa_);
 }
 
 uint32_t MipsInstructionSetFeatures::AsBitmap() const {
   return (fpu_32bit_ ? kFpu32Bitfield : 0) |
       (mips_isa_gte2_ ? kIsaRevGte2Bitfield : 0) |
-      (r6_ ? kR6 : 0);
+      (r6_ ? kR6 : 0) |
+      (msa_ ? kMsaBitfield : 0);
 }
 
 std::string MipsInstructionSetFeatures::GetFeatureString() const {
@@ -169,6 +200,11 @@
   if (r6_) {
     result += ",r6";
   }  // Suppress non-r6.
+  if (msa_) {
+    result += ",msa";
+  } else {
+    result += ",-msa";
+  }
   return result;
 }
 
@@ -178,6 +214,7 @@
   bool fpu_32bit = fpu_32bit_;
   bool mips_isa_gte2 = mips_isa_gte2_;
   bool r6 = r6_;
+  bool msa = msa_;
   for (auto i = features.begin(); i != features.end(); i++) {
     std::string feature = android::base::Trim(*i);
     if (feature == "fpu32") {
@@ -192,13 +229,17 @@
       r6 = true;
     } else if (feature == "-r6") {
       r6 = false;
+    } else if (feature == "msa") {
+      msa = true;
+    } else if (feature == "-msa") {
+      msa = false;
     } else {
       *error_msg = StringPrintf("Unknown instruction set feature: '%s'", feature.c_str());
       return nullptr;
     }
   }
   return std::unique_ptr<const InstructionSetFeatures>(
-      new MipsInstructionSetFeatures(fpu_32bit, mips_isa_gte2, r6));
+      new MipsInstructionSetFeatures(fpu_32bit, mips_isa_gte2, r6, msa));
 }
 
 }  // namespace art
diff --git a/runtime/arch/mips/instruction_set_features_mips.h b/runtime/arch/mips/instruction_set_features_mips.h
index 1aec99f..1cb852e 100644
--- a/runtime/arch/mips/instruction_set_features_mips.h
+++ b/runtime/arch/mips/instruction_set_features_mips.h
@@ -75,6 +75,11 @@
     return r6_;
   }
 
+  // Does it have MSA (MIPS SIMD Architecture) support.
+  bool HasMsa() const {
+    return msa_;
+  }
+
   virtual ~MipsInstructionSetFeatures() {}
 
  protected:
@@ -84,11 +89,12 @@
                                  std::string* error_msg) const OVERRIDE;
 
  private:
-  MipsInstructionSetFeatures(bool fpu_32bit, bool mips_isa_gte2, bool r6)
+  MipsInstructionSetFeatures(bool fpu_32bit, bool mips_isa_gte2, bool r6, bool msa)
       : InstructionSetFeatures(),
         fpu_32bit_(fpu_32bit),
         mips_isa_gte2_(mips_isa_gte2),
-        r6_(r6) {
+        r6_(r6),
+        msa_(msa) {
     // Sanity checks.
     if (r6) {
       CHECK(mips_isa_gte2);
@@ -104,11 +110,13 @@
     kFpu32Bitfield = 1 << 0,
     kIsaRevGte2Bitfield = 1 << 1,
     kR6 = 1 << 2,
+    kMsaBitfield = 1 << 3,
   };
 
   const bool fpu_32bit_;
   const bool mips_isa_gte2_;
   const bool r6_;
+  const bool msa_;
 
   DISALLOW_COPY_AND_ASSIGN(MipsInstructionSetFeatures);
 };
diff --git a/runtime/arch/mips/instruction_set_features_mips_test.cc b/runtime/arch/mips/instruction_set_features_mips_test.cc
index 6613b84..54fd2c9 100644
--- a/runtime/arch/mips/instruction_set_features_mips_test.cc
+++ b/runtime/arch/mips/instruction_set_features_mips_test.cc
@@ -20,15 +20,109 @@
 
 namespace art {
 
-TEST(MipsInstructionSetFeaturesTest, MipsFeatures) {
+TEST(MipsInstructionSetFeaturesTest, MipsFeaturesFromDefaultVariant) {
   std::string error_msg;
   std::unique_ptr<const InstructionSetFeatures> mips_features(
       InstructionSetFeatures::FromVariant(kMips, "default", &error_msg));
   ASSERT_TRUE(mips_features.get() != nullptr) << error_msg;
   EXPECT_EQ(mips_features->GetInstructionSet(), kMips);
   EXPECT_TRUE(mips_features->Equals(mips_features.get()));
-  EXPECT_STREQ("fpu32,mips2", mips_features->GetFeatureString().c_str());
+  EXPECT_STREQ("fpu32,mips2,-msa", mips_features->GetFeatureString().c_str());
   EXPECT_EQ(mips_features->AsBitmap(), 3U);
 }
 
+TEST(MipsInstructionSetFeaturesTest, MipsFeaturesFromR1Variant) {
+  std::string error_msg;
+  std::unique_ptr<const InstructionSetFeatures> mips32r1_features(
+      InstructionSetFeatures::FromVariant(kMips, "mips32r1", &error_msg));
+  ASSERT_TRUE(mips32r1_features.get() != nullptr) << error_msg;
+  EXPECT_EQ(mips32r1_features->GetInstructionSet(), kMips);
+  EXPECT_TRUE(mips32r1_features->Equals(mips32r1_features.get()));
+  EXPECT_STREQ("fpu32,-mips2,-msa", mips32r1_features->GetFeatureString().c_str());
+  EXPECT_EQ(mips32r1_features->AsBitmap(), 1U);
+
+  std::unique_ptr<const InstructionSetFeatures> mips_default_features(
+      InstructionSetFeatures::FromVariant(kMips, "default", &error_msg));
+  ASSERT_TRUE(mips_default_features.get() != nullptr) << error_msg;
+  EXPECT_FALSE(mips32r1_features->Equals(mips_default_features.get()));
+}
+
+TEST(MipsInstructionSetFeaturesTest, MipsFeaturesFromR2Variant) {
+  std::string error_msg;
+  std::unique_ptr<const InstructionSetFeatures> mips32r2_features(
+      InstructionSetFeatures::FromVariant(kMips, "mips32r2", &error_msg));
+  ASSERT_TRUE(mips32r2_features.get() != nullptr) << error_msg;
+  EXPECT_EQ(mips32r2_features->GetInstructionSet(), kMips);
+  EXPECT_TRUE(mips32r2_features->Equals(mips32r2_features.get()));
+  EXPECT_STREQ("fpu32,mips2,-msa", mips32r2_features->GetFeatureString().c_str());
+  EXPECT_EQ(mips32r2_features->AsBitmap(), 3U);
+
+  std::unique_ptr<const InstructionSetFeatures> mips_default_features(
+      InstructionSetFeatures::FromVariant(kMips, "default", &error_msg));
+  ASSERT_TRUE(mips_default_features.get() != nullptr) << error_msg;
+  EXPECT_TRUE(mips32r2_features->Equals(mips_default_features.get()));
+
+  std::unique_ptr<const InstructionSetFeatures> mips32r1_features(
+      InstructionSetFeatures::FromVariant(kMips, "mips32r1", &error_msg));
+  ASSERT_TRUE(mips32r1_features.get() != nullptr) << error_msg;
+  EXPECT_FALSE(mips32r2_features->Equals(mips32r1_features.get()));
+}
+
+TEST(MipsInstructionSetFeaturesTest, MipsFeaturesFromR5Variant) {
+  std::string error_msg;
+  std::unique_ptr<const InstructionSetFeatures> mips32r5_features(
+      InstructionSetFeatures::FromVariant(kMips, "mips32r5", &error_msg));
+  ASSERT_TRUE(mips32r5_features.get() != nullptr) << error_msg;
+  EXPECT_EQ(mips32r5_features->GetInstructionSet(), kMips);
+  EXPECT_TRUE(mips32r5_features->Equals(mips32r5_features.get()));
+  EXPECT_STREQ("-fpu32,mips2,msa", mips32r5_features->GetFeatureString().c_str());
+  EXPECT_EQ(mips32r5_features->AsBitmap(), 10U);
+
+  std::unique_ptr<const InstructionSetFeatures> mips_default_features(
+      InstructionSetFeatures::FromVariant(kMips, "default", &error_msg));
+  ASSERT_TRUE(mips_default_features.get() != nullptr) << error_msg;
+  EXPECT_FALSE(mips32r5_features->Equals(mips_default_features.get()));
+
+  std::unique_ptr<const InstructionSetFeatures> mips32r1_features(
+      InstructionSetFeatures::FromVariant(kMips, "mips32r1", &error_msg));
+  ASSERT_TRUE(mips32r1_features.get() != nullptr) << error_msg;
+  EXPECT_FALSE(mips32r5_features->Equals(mips32r1_features.get()));
+
+  std::unique_ptr<const InstructionSetFeatures> mips32r2_features(
+      InstructionSetFeatures::FromVariant(kMips, "mips32r2", &error_msg));
+  ASSERT_TRUE(mips32r2_features.get() != nullptr) << error_msg;
+  EXPECT_FALSE(mips32r5_features->Equals(mips32r2_features.get()));
+}
+
+TEST(MipsInstructionSetFeaturesTest, MipsFeaturesFromR6Variant) {
+  std::string error_msg;
+  std::unique_ptr<const InstructionSetFeatures> mips32r6_features(
+      InstructionSetFeatures::FromVariant(kMips, "mips32r6", &error_msg));
+  ASSERT_TRUE(mips32r6_features.get() != nullptr) << error_msg;
+  EXPECT_EQ(mips32r6_features->GetInstructionSet(), kMips);
+  EXPECT_TRUE(mips32r6_features->Equals(mips32r6_features.get()));
+  EXPECT_STREQ("-fpu32,mips2,r6,msa", mips32r6_features->GetFeatureString().c_str());
+  EXPECT_EQ(mips32r6_features->AsBitmap(), 14U);
+
+  std::unique_ptr<const InstructionSetFeatures> mips_default_features(
+      InstructionSetFeatures::FromVariant(kMips, "default", &error_msg));
+  ASSERT_TRUE(mips_default_features.get() != nullptr) << error_msg;
+  EXPECT_FALSE(mips32r6_features->Equals(mips_default_features.get()));
+
+  std::unique_ptr<const InstructionSetFeatures> mips32r1_features(
+      InstructionSetFeatures::FromVariant(kMips, "mips32r1", &error_msg));
+  ASSERT_TRUE(mips32r1_features.get() != nullptr) << error_msg;
+  EXPECT_FALSE(mips32r6_features->Equals(mips32r1_features.get()));
+
+  std::unique_ptr<const InstructionSetFeatures> mips32r2_features(
+      InstructionSetFeatures::FromVariant(kMips, "mips32r2", &error_msg));
+  ASSERT_TRUE(mips32r2_features.get() != nullptr) << error_msg;
+  EXPECT_FALSE(mips32r6_features->Equals(mips32r2_features.get()));
+
+  std::unique_ptr<const InstructionSetFeatures> mips32r5_features(
+      InstructionSetFeatures::FromVariant(kMips, "mips32r5", &error_msg));
+  ASSERT_TRUE(mips32r5_features.get() != nullptr) << error_msg;
+  EXPECT_FALSE(mips32r6_features->Equals(mips32r5_features.get()));
+}
+
 }  // namespace art
diff --git a/runtime/arch/mips64/fault_handler_mips64.cc b/runtime/arch/mips64/fault_handler_mips64.cc
index f9a92c8..69d73b0 100644
--- a/runtime/arch/mips64/fault_handler_mips64.cc
+++ b/runtime/arch/mips64/fault_handler_mips64.cc
@@ -25,7 +25,7 @@
 #include "globals.h"
 #include "quick_method_frame_info_mips64.h"
 #include "registers_mips64.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 
 extern "C" void art_quick_throw_stack_overflow();
 extern "C" void art_quick_throw_null_pointer_exception_from_signal();
diff --git a/runtime/arch/x86/fault_handler_x86.cc b/runtime/arch/x86/fault_handler_x86.cc
index 7d8abb8..798c500 100644
--- a/runtime/arch/x86/fault_handler_x86.cc
+++ b/runtime/arch/x86/fault_handler_x86.cc
@@ -26,7 +26,7 @@
 #include "base/macros.h"
 #include "base/safe_copy.h"
 #include "globals.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 
 #if defined(__APPLE__)
 #define ucontext __darwin_ucontext
diff --git a/runtime/arch/x86/thread_x86.cc b/runtime/arch/x86/thread_x86.cc
index 241650e..cc8f1fa 100644
--- a/runtime/arch/x86/thread_x86.cc
+++ b/runtime/arch/x86/thread_x86.cc
@@ -22,7 +22,7 @@
 #include "asm_support_x86.h"
 #include "base/enums.h"
 #include "base/macros.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 #include "thread_list.h"
 
 #if defined(__APPLE__)
diff --git a/runtime/arch/x86_64/thread_x86_64.cc b/runtime/arch/x86_64/thread_x86_64.cc
index 553b656..19d25f6 100644
--- a/runtime/arch/x86_64/thread_x86_64.cc
+++ b/runtime/arch/x86_64/thread_x86_64.cc
@@ -18,7 +18,7 @@
 
 #include "asm_support_x86_64.h"
 #include "base/macros.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 #include "thread_list.h"
 
 #if defined(__linux__)
diff --git a/runtime/art_field-inl.h b/runtime/art_field-inl.h
index 98002ae..a8a58e1 100644
--- a/runtime/art_field-inl.h
+++ b/runtime/art_field-inl.h
@@ -28,7 +28,7 @@
 #include "mirror/dex_cache-inl.h"
 #include "mirror/object-inl.h"
 #include "primitive.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 #include "scoped_thread_state_change-inl.h"
 #include "well_known_classes.h"
 
diff --git a/runtime/art_method-inl.h b/runtime/art_method-inl.h
index 9d0bfde..d1afcb8 100644
--- a/runtime/art_method-inl.h
+++ b/runtime/art_method-inl.h
@@ -27,6 +27,7 @@
 #include "dex_file_annotations.h"
 #include "dex_file-inl.h"
 #include "gc_root-inl.h"
+#include "invoke_type.h"
 #include "jit/profiling_info.h"
 #include "mirror/class-inl.h"
 #include "mirror/dex_cache-inl.h"
@@ -38,7 +39,7 @@
 #include "quick/quick_method_frame_info.h"
 #include "runtime-inl.h"
 #include "scoped_thread_state_change-inl.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 #include "utils.h"
 
 namespace art {
diff --git a/runtime/art_method.h b/runtime/art_method.h
index 856bfd2..d8dfdd7 100644
--- a/runtime/art_method.h
+++ b/runtime/art_method.h
@@ -24,19 +24,16 @@
 #include "base/enums.h"
 #include "dex_file.h"
 #include "gc_root.h"
-#include "invoke_type.h"
-#include "method_reference.h"
 #include "modifiers.h"
-#include "mirror/dex_cache.h"
-#include "mirror/object.h"
 #include "obj_ptr.h"
+#include "offsets.h"
 #include "read_barrier_option.h"
-#include "utils.h"
 
 namespace art {
 
 template<class T> class Handle;
 class ImtConflictTable;
+enum InvokeType : uint32_t;
 union JValue;
 class OatQuickMethodHeader;
 class ProfilingInfo;
@@ -47,8 +44,13 @@
 namespace mirror {
 class Array;
 class Class;
+class ClassLoader;
+class DexCache;
 class IfTable;
+class Object;
+template <typename MirrorType> class ObjectArray;
 class PointerArray;
+class String;
 }  // namespace mirror
 
 class ArtMethod FINAL {
@@ -318,11 +320,11 @@
   }
 
   static MemberOffset DexMethodIndexOffset() {
-    return OFFSET_OF_OBJECT_MEMBER(ArtMethod, dex_method_index_);
+    return MemberOffset(OFFSETOF_MEMBER(ArtMethod, dex_method_index_));
   }
 
   static MemberOffset MethodIndexOffset() {
-    return OFFSET_OF_OBJECT_MEMBER(ArtMethod, method_index_);
+    return MemberOffset(OFFSETOF_MEMBER(ArtMethod, method_index_));
   }
 
   uint32_t GetCodeItemOffset() {
@@ -524,10 +526,6 @@
 
   bool IsImtUnimplementedMethod() REQUIRES_SHARED(Locks::mutator_lock_);
 
-  MethodReference ToMethodReference() REQUIRES_SHARED(Locks::mutator_lock_) {
-    return MethodReference(GetDexFile(), GetDexMethodIndex());
-  }
-
   // Find the catch block for the given exception type and dex_pc. When a catch block is found,
   // indicates whether the found catch block is responsible for clearing the exception or whether
   // a move-exception instruction is present.
diff --git a/runtime/asm_support.h b/runtime/asm_support.h
index fce6da4..1ce7fd3 100644
--- a/runtime/asm_support.h
+++ b/runtime/asm_support.h
@@ -30,6 +30,7 @@
 #include "mirror/string.h"
 #include "utils/dex_cache_arrays_layout.h"
 #include "runtime.h"
+#include "stack.h"
 #include "thread.h"
 #endif
 
diff --git a/runtime/atomic.cc b/runtime/atomic.cc
index d5ae570..07aceb7 100644
--- a/runtime/atomic.cc
+++ b/runtime/atomic.cc
@@ -17,7 +17,7 @@
 #include "atomic.h"
 #include "base/mutex.h"
 #include "base/stl_util.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 
 namespace art {
 
diff --git a/runtime/atomic.h b/runtime/atomic.h
index 45c3165..25dd1a3 100644
--- a/runtime/atomic.h
+++ b/runtime/atomic.h
@@ -257,6 +257,13 @@
     return this->compare_exchange_strong(expected_value, desired_value, std::memory_order_relaxed);
   }
 
+  // Atomically replace the value with desired value if it matches the expected value. Prior writes
+  // to other memory locations become visible to the threads that do a consume or an acquire on the
+  // same location.
+  bool CompareExchangeStrongRelease(T expected_value, T desired_value) {
+    return this->compare_exchange_strong(expected_value, desired_value, std::memory_order_release);
+  }
+
   // The same, except it may fail spuriously.
   bool CompareExchangeWeakRelaxed(T expected_value, T desired_value) {
     return this->compare_exchange_weak(expected_value, desired_value, std::memory_order_relaxed);
diff --git a/runtime/barrier_test.cc b/runtime/barrier_test.cc
index f68a5d4..25b6925 100644
--- a/runtime/barrier_test.cc
+++ b/runtime/barrier_test.cc
@@ -22,7 +22,7 @@
 #include "common_runtime_test.h"
 #include "mirror/object_array-inl.h"
 #include "thread_pool.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 
 namespace art {
 class CheckWaitTask : public Task {
diff --git a/runtime/base/arena_allocator.cc b/runtime/base/arena_allocator.cc
index 99c310a..54b40f2 100644
--- a/runtime/base/arena_allocator.cc
+++ b/runtime/base/arena_allocator.cc
@@ -26,7 +26,7 @@
 #include "logging.h"
 #include "mem_map.h"
 #include "mutex.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 #include "systrace.h"
 
 namespace art {
diff --git a/runtime/base/dumpable-inl.h b/runtime/base/dumpable-inl.h
index 2cdf083..9d7fc39 100644
--- a/runtime/base/dumpable-inl.h
+++ b/runtime/base/dumpable-inl.h
@@ -19,7 +19,7 @@
 
 #include "base/dumpable.h"
 #include "base/mutex.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 
 namespace art {
 
diff --git a/runtime/base/histogram-inl.h b/runtime/base/histogram-inl.h
index b28eb72..be20920 100644
--- a/runtime/base/histogram-inl.h
+++ b/runtime/base/histogram-inl.h
@@ -198,7 +198,7 @@
                                                         kFractionalDigits)
      << "-" << FormatDuration(Percentile(per_1, data) * kAdjust, unit, kFractionalDigits) << " "
      << "Avg: " << FormatDuration(Mean() * kAdjust, unit, kFractionalDigits) << " Max: "
-     << FormatDuration(Max() * kAdjust, unit, kFractionalDigits) << "\n";
+     << FormatDuration(Max() * kAdjust, unit, kFractionalDigits) << std::endl;
 }
 
 template <class Value>
diff --git a/runtime/base/logging.cc b/runtime/base/logging.cc
index 553928d..adfd7d3 100644
--- a/runtime/base/logging.cc
+++ b/runtime/base/logging.cc
@@ -21,7 +21,7 @@
 #include <sstream>
 
 #include "base/mutex.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 #include "utils.h"
 
 // Headers for LogMessage::LogLine.
diff --git a/runtime/base/mutex-inl.h b/runtime/base/mutex-inl.h
index 08b370e..0ac2399 100644
--- a/runtime/base/mutex-inl.h
+++ b/runtime/base/mutex-inl.h
@@ -194,6 +194,16 @@
   return exclusive_owner_;
 }
 
+inline void Mutex::AssertExclusiveHeld(const Thread* self) const {
+  if (kDebugLocking && (gAborting == 0)) {
+    CHECK(IsExclusiveHeld(self)) << *this;
+  }
+}
+
+inline void Mutex::AssertHeld(const Thread* self) const {
+  AssertExclusiveHeld(self);
+}
+
 inline bool ReaderWriterMutex::IsExclusiveHeld(const Thread* self) const {
   DCHECK(self == nullptr || self == Thread::Current());
   bool result = (GetExclusiveOwnerTid() == SafeGetTid(self));
@@ -221,6 +231,16 @@
 #endif
 }
 
+inline void ReaderWriterMutex::AssertExclusiveHeld(const Thread* self) const {
+  if (kDebugLocking && (gAborting == 0)) {
+    CHECK(IsExclusiveHeld(self)) << *this;
+  }
+}
+
+inline void ReaderWriterMutex::AssertWriterHeld(const Thread* self) const {
+  AssertExclusiveHeld(self);
+}
+
 inline void MutatorMutex::TransitionFromRunnableToSuspended(Thread* self) {
   AssertSharedHeld(self);
   RegisterAsUnlocked(self);
@@ -231,6 +251,19 @@
   AssertSharedHeld(self);
 }
 
+inline ReaderMutexLock::ReaderMutexLock(Thread* self, ReaderWriterMutex& mu)
+    : self_(self), mu_(mu) {
+  mu_.SharedLock(self_);
+}
+
+inline ReaderMutexLock::~ReaderMutexLock() {
+  mu_.SharedUnlock(self_);
+}
+
+// Catch bug where variable name is omitted. "ReaderMutexLock (lock);" instead of
+// "ReaderMutexLock mu(lock)".
+#define ReaderMutexLock(x) static_assert(0, "ReaderMutexLock declaration missing variable name")
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_BASE_MUTEX_INL_H_
diff --git a/runtime/base/mutex.h b/runtime/base/mutex.h
index 03ae63a..e77d8d7 100644
--- a/runtime/base/mutex.h
+++ b/runtime/base/mutex.h
@@ -244,15 +244,11 @@
   void Unlock(Thread* self) RELEASE() {  ExclusiveUnlock(self); }
 
   // Is the current thread the exclusive holder of the Mutex.
-  bool IsExclusiveHeld(const Thread* self) const;
+  ALWAYS_INLINE bool IsExclusiveHeld(const Thread* self) const;
 
   // Assert that the Mutex is exclusively held by the current thread.
-  void AssertExclusiveHeld(const Thread* self) ASSERT_CAPABILITY(this) {
-    if (kDebugLocking && (gAborting == 0)) {
-      CHECK(IsExclusiveHeld(self)) << *this;
-    }
-  }
-  void AssertHeld(const Thread* self) ASSERT_CAPABILITY(this) { AssertExclusiveHeld(self); }
+  ALWAYS_INLINE void AssertExclusiveHeld(const Thread* self) const ASSERT_CAPABILITY(this);
+  ALWAYS_INLINE void AssertHeld(const Thread* self) const ASSERT_CAPABILITY(this);
 
   // Assert that the Mutex is not held by the current thread.
   void AssertNotHeldExclusive(const Thread* self) ASSERT_CAPABILITY(!*this) {
@@ -349,15 +345,11 @@
   void ReaderUnlock(Thread* self) RELEASE_SHARED() { SharedUnlock(self); }
 
   // Is the current thread the exclusive holder of the ReaderWriterMutex.
-  bool IsExclusiveHeld(const Thread* self) const;
+  ALWAYS_INLINE bool IsExclusiveHeld(const Thread* self) const;
 
   // Assert the current thread has exclusive access to the ReaderWriterMutex.
-  void AssertExclusiveHeld(const Thread* self) ASSERT_CAPABILITY(this) {
-    if (kDebugLocking && (gAborting == 0)) {
-      CHECK(IsExclusiveHeld(self)) << *this;
-    }
-  }
-  void AssertWriterHeld(const Thread* self) ASSERT_CAPABILITY(this) { AssertExclusiveHeld(self); }
+  ALWAYS_INLINE void AssertExclusiveHeld(const Thread* self) const ASSERT_CAPABILITY(this);
+  ALWAYS_INLINE void AssertWriterHeld(const Thread* self) const ASSERT_CAPABILITY(this);
 
   // Assert the current thread doesn't have exclusive access to the ReaderWriterMutex.
   void AssertNotExclusiveHeld(const Thread* self) ASSERT_CAPABILITY(!this) {
@@ -517,23 +509,15 @@
 // construction and releases it upon destruction.
 class SCOPED_CAPABILITY ReaderMutexLock {
  public:
-  ReaderMutexLock(Thread* self, ReaderWriterMutex& mu) ACQUIRE(mu) ALWAYS_INLINE :
-      self_(self), mu_(mu) {
-    mu_.SharedLock(self_);
-  }
+  ALWAYS_INLINE ReaderMutexLock(Thread* self, ReaderWriterMutex& mu) ACQUIRE(mu);
 
-  ~ReaderMutexLock() RELEASE() ALWAYS_INLINE {
-    mu_.SharedUnlock(self_);
-  }
+  ALWAYS_INLINE ~ReaderMutexLock() RELEASE();
 
  private:
   Thread* const self_;
   ReaderWriterMutex& mu_;
   DISALLOW_COPY_AND_ASSIGN(ReaderMutexLock);
 };
-// Catch bug where variable name is omitted. "ReaderMutexLock (lock);" instead of
-// "ReaderMutexLock mu(lock)".
-#define ReaderMutexLock(x) static_assert(0, "ReaderMutexLock declaration missing variable name")
 
 // Scoped locker/unlocker for a ReaderWriterMutex that acquires write access to mu upon
 // construction and releases it upon destruction.
diff --git a/runtime/base/mutex_test.cc b/runtime/base/mutex_test.cc
index 340550f..752e77a 100644
--- a/runtime/base/mutex_test.cc
+++ b/runtime/base/mutex_test.cc
@@ -14,10 +14,10 @@
  * limitations under the License.
  */
 
-#include "mutex.h"
+#include "mutex-inl.h"
 
 #include "common_runtime_test.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 
 namespace art {
 
diff --git a/runtime/base/timing_logger.cc b/runtime/base/timing_logger.cc
index 06e2526..aaa2431 100644
--- a/runtime/base/timing_logger.cc
+++ b/runtime/base/timing_logger.cc
@@ -25,7 +25,8 @@
 #include "base/systrace.h"
 #include "base/time_utils.h"
 #include "gc/heap.h"
-#include "thread-inl.h"
+#include "runtime.h"
+#include "thread-current-inl.h"
 
 #include <cmath>
 #include <iomanip>
diff --git a/runtime/check_reference_map_visitor.h b/runtime/check_reference_map_visitor.h
index a955cb5..f6c8fa9 100644
--- a/runtime/check_reference_map_visitor.h
+++ b/runtime/check_reference_map_visitor.h
@@ -20,6 +20,7 @@
 #include "art_method-inl.h"
 #include "oat_quick_method_header.h"
 #include "scoped_thread_state_change-inl.h"
+#include "stack.h"
 #include "stack_map.h"
 
 namespace art {
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index c169ac0..0921bd6 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -55,7 +55,6 @@
 #include "gc_root-inl.h"
 #include "gc/accounting/card_table-inl.h"
 #include "gc/accounting/heap_bitmap-inl.h"
-#include "gc/accounting/space_bitmap-inl.h"
 #include "gc/heap.h"
 #include "gc/scoped_gc_critical_section.h"
 #include "gc/space/image_space.h"
@@ -89,7 +88,6 @@
 #include "mirror/method_handles_lookup.h"
 #include "mirror/object-inl.h"
 #include "mirror/object_array-inl.h"
-#include "mirror/object-refvisitor-inl.h"
 #include "mirror/proxy.h"
 #include "mirror/reference-inl.h"
 #include "mirror/stack_trace_element.h"
@@ -109,6 +107,7 @@
 #include "thread-inl.h"
 #include "thread_list.h"
 #include "trace.h"
+#include "utf.h"
 #include "utils.h"
 #include "utils/dex_cache_arrays_layout-inl.h"
 #include "verifier/method_verifier.h"
@@ -1194,63 +1193,6 @@
   gc::accounting::HeapBitmap* const live_bitmap_;
 };
 
-class FixupInternVisitor {
- public:
-  ALWAYS_INLINE ObjPtr<mirror::Object> TryInsertIntern(mirror::Object* obj) const
-      REQUIRES_SHARED(Locks::mutator_lock_) {
-    if (obj != nullptr && obj->IsString()) {
-      const auto intern = Runtime::Current()->GetInternTable()->InternStrong(obj->AsString());
-      return intern;
-    }
-    return obj;
-  }
-
-  ALWAYS_INLINE void VisitRootIfNonNull(
-      mirror::CompressedReference<mirror::Object>* root) const
-      REQUIRES_SHARED(Locks::mutator_lock_) {
-    if (!root->IsNull()) {
-      VisitRoot(root);
-    }
-  }
-
-  ALWAYS_INLINE void VisitRoot(mirror::CompressedReference<mirror::Object>* root) const
-      REQUIRES_SHARED(Locks::mutator_lock_) {
-    root->Assign(TryInsertIntern(root->AsMirrorPtr()));
-  }
-
-  // Visit Class Fields
-  ALWAYS_INLINE void operator()(ObjPtr<mirror::Object> obj,
-                                MemberOffset offset,
-                                bool is_static ATTRIBUTE_UNUSED) const
-      REQUIRES_SHARED(Locks::mutator_lock_) {
-    // There could be overlap between ranges, we must avoid visiting the same reference twice.
-    // Avoid the class field since we already fixed it up in FixupClassVisitor.
-    if (offset.Uint32Value() != mirror::Object::ClassOffset().Uint32Value()) {
-      // Updating images, don't do a read barrier.
-      // Only string fields are fixed, don't do a verify.
-      mirror::Object* ref = obj->GetFieldObject<mirror::Object, kVerifyNone, kWithoutReadBarrier>(
-          offset);
-      obj->SetFieldObject<false, false>(offset, TryInsertIntern(ref));
-    }
-  }
-
-  void operator()(ObjPtr<mirror::Class> klass ATTRIBUTE_UNUSED,
-                  ObjPtr<mirror::Reference> ref) const
-      REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_) {
-    this->operator()(ref, mirror::Reference::ReferentOffset(), false);
-  }
-
-  void operator()(mirror::Object* obj) const
-      REQUIRES_SHARED(Locks::mutator_lock_) {
-    if (obj->IsDexCache()) {
-      obj->VisitReferences<true, kVerifyNone, kWithoutReadBarrier>(*this, *this);
-    } else {
-      // Don't visit native roots for non-dex-cache
-      obj->VisitReferences<false, kVerifyNone, kWithoutReadBarrier>(*this, *this);
-    }
-  }
-};
-
 // Copies data from one array to another array at the same position
 // if pred returns false. If there is a page of continuous data in
 // the src array for which pred consistently returns true then
@@ -1343,7 +1285,6 @@
         return false;
       }
     }
-
     // Only add the classes to the class loader after the points where we can return false.
     for (size_t i = 0; i < num_dex_caches; i++) {
       ObjPtr<mirror::DexCache> dex_cache = dex_caches->Get(i);
@@ -1507,21 +1448,6 @@
       }
     }
   }
-  {
-    // Fixup all the literal strings happens at app images which are supposed to be interned.
-    ScopedTrace timing("Fixup String Intern in image and dex_cache");
-    const auto& image_header = space->GetImageHeader();
-    const auto bitmap = space->GetMarkBitmap();  // bitmap of objects
-    const uint8_t* target_base = space->GetMemMap()->Begin();
-    const ImageSection& objects_section =
-        image_header.GetImageSection(ImageHeader::kSectionObjects);
-
-    uintptr_t objects_begin = reinterpret_cast<uintptr_t>(target_base + objects_section.Offset());
-    uintptr_t objects_end = reinterpret_cast<uintptr_t>(target_base + objects_section.End());
-
-    FixupInternVisitor fixup_intern_visitor;
-    bitmap->VisitMarkedRange(objects_begin, objects_end, fixup_intern_visitor);
-  }
   if (*out_forward_dex_cache_array) {
     ScopedTrace timing("Fixup ArtMethod dex cache arrays");
     FixupArtMethodArrayVisitor visitor(header);
@@ -3535,6 +3461,39 @@
   return dex_cache;
 }
 
+void ClassLinker::RegisterExistingDexCache(ObjPtr<mirror::DexCache> dex_cache,
+                                           ObjPtr<mirror::ClassLoader> class_loader) {
+  Thread* self = Thread::Current();
+  StackHandleScope<2> hs(self);
+  Handle<mirror::DexCache> h_dex_cache(hs.NewHandle(dex_cache));
+  Handle<mirror::ClassLoader> h_class_loader(hs.NewHandle(class_loader));
+  const DexFile* dex_file = dex_cache->GetDexFile();
+  DCHECK(dex_file != nullptr) << "Attempt to register uninitialized dex_cache object!";
+  if (kIsDebugBuild) {
+    DexCacheData old_data;
+    {
+      ReaderMutexLock mu(self, *Locks::dex_lock_);
+      old_data = FindDexCacheDataLocked(*dex_file);
+    }
+    ObjPtr<mirror::DexCache> old_dex_cache = DecodeDexCache(self, old_data);
+    DCHECK(old_dex_cache.IsNull()) << "Attempt to manually register a dex cache thats already "
+                                   << "been registered on dex file " << dex_file->GetLocation();
+  }
+  ClassTable* table;
+  {
+    WriterMutexLock mu(self, *Locks::classlinker_classes_lock_);
+    table = InsertClassTableForClassLoader(h_class_loader.Get());
+  }
+  WriterMutexLock mu(self, *Locks::dex_lock_);
+  RegisterDexFileLocked(*dex_file, h_dex_cache.Get(), h_class_loader.Get());
+  table->InsertStrongRoot(h_dex_cache.Get());
+  if (h_class_loader.Get() != nullptr) {
+    // Since we added a strong root to the class table, do the write barrier as required for
+    // remembered sets and generational GCs.
+    Runtime::Current()->GetHeap()->WriteBarrierEveryFieldOf(h_class_loader.Get());
+  }
+}
+
 ObjPtr<mirror::DexCache> ClassLinker::RegisterDexFile(const DexFile& dex_file,
                                                       ObjPtr<mirror::ClassLoader> class_loader) {
   Thread* self = Thread::Current();
@@ -3949,6 +3908,12 @@
 }
 
 mirror::Class* ClassLinker::LookupClass(Thread* self,
+                           const char* descriptor,
+                           ObjPtr<mirror::ClassLoader> class_loader) {
+  return LookupClass(self, descriptor, ComputeModifiedUtf8Hash(descriptor), class_loader);
+}
+
+mirror::Class* ClassLinker::LookupClass(Thread* self,
                                         const char* descriptor,
                                         size_t hash,
                                         ObjPtr<mirror::ClassLoader> class_loader) {
@@ -4571,7 +4536,10 @@
   DCHECK(out != nullptr);
   out->CopyFrom(proxy_constructor, image_pointer_size_);
   // Make this constructor public and fix the class to be our Proxy version
-  out->SetAccessFlags((out->GetAccessFlags() & ~kAccProtected) | kAccPublic);
+  // Mark kAccCompileDontBother so that we don't take JIT samples for the method. b/62349349
+  out->SetAccessFlags((out->GetAccessFlags() & ~kAccProtected) |
+                      kAccPublic |
+                      kAccCompileDontBother);
   out->SetDeclaringClass(klass.Get());
 }
 
@@ -4605,7 +4573,8 @@
   // preference to the invocation handler.
   const uint32_t kRemoveFlags = kAccAbstract | kAccDefault | kAccDefaultConflict;
   // Make the method final.
-  const uint32_t kAddFlags = kAccFinal;
+  // Mark kAccCompileDontBother so that we don't take JIT samples for the method. b/62349349
+  const uint32_t kAddFlags = kAccFinal | kAccCompileDontBother;
   out->SetAccessFlags((out->GetAccessFlags() & ~kRemoveFlags) | kAddFlags);
 
   // Clear the dex_code_item_offset_. It needs to be 0 since proxy methods have no CodeItems but the
@@ -8949,7 +8918,8 @@
         last_dex_file_ = &dex_file;
         DexCacheResolvedClasses resolved_classes(dex_file.GetLocation(),
                                                  dex_file.GetBaseLocation(),
-                                                 dex_file.GetLocationChecksum());
+                                                 dex_file.GetLocationChecksum(),
+                                                 dex_file.NumMethodIds());
         last_resolved_classes_ = result_->find(resolved_classes);
         if (last_resolved_classes_ == result_->end()) {
           last_resolved_classes_ = result_->insert(resolved_classes).first;
diff --git a/runtime/class_linker.h b/runtime/class_linker.h
index 205ea1e..1e8125e 100644
--- a/runtime/class_linker.h
+++ b/runtime/class_linker.h
@@ -212,9 +212,7 @@
                              const char* descriptor,
                              ObjPtr<mirror::ClassLoader> class_loader)
       REQUIRES(!Locks::classlinker_classes_lock_)
-      REQUIRES_SHARED(Locks::mutator_lock_) {
-    return LookupClass(self, descriptor, ComputeModifiedUtf8Hash(descriptor), class_loader);
-  }
+      REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Finds all the classes with the given descriptor, regardless of ClassLoader.
   void LookupClasses(const char* descriptor, std::vector<ObjPtr<mirror::Class>>& classes)
@@ -385,6 +383,13 @@
       REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(!Locks::dex_lock_, !Roles::uninterruptible_);
 
+  // Directly register an already existing dex cache. RegisterDexFile should be preferred since that
+  // reduplicates DexCaches when possible. The DexCache given to this function must already be fully
+  // initialized and not already registered.
+  void RegisterExistingDexCache(ObjPtr<mirror::DexCache> cache,
+                                ObjPtr<mirror::ClassLoader> class_loader)
+      REQUIRES(!Locks::dex_lock_)
+      REQUIRES_SHARED(Locks::mutator_lock_);
   ObjPtr<mirror::DexCache> RegisterDexFile(const DexFile& dex_file,
                                            ObjPtr<mirror::ClassLoader> class_loader)
       REQUIRES(!Locks::dex_lock_)
diff --git a/runtime/class_linker_test.cc b/runtime/class_linker_test.cc
index b421810..684a261 100644
--- a/runtime/class_linker_test.cc
+++ b/runtime/class_linker_test.cc
@@ -50,7 +50,7 @@
 #include "mirror/string-inl.h"
 #include "handle_scope-inl.h"
 #include "scoped_thread_state_change-inl.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 
 namespace art {
 
diff --git a/runtime/class_table-inl.h b/runtime/class_table-inl.h
index dfe8949..b15d82f 100644
--- a/runtime/class_table-inl.h
+++ b/runtime/class_table-inl.h
@@ -18,6 +18,8 @@
 #define ART_RUNTIME_CLASS_TABLE_INL_H_
 
 #include "class_table.h"
+
+#include "gc_root-inl.h"
 #include "oat_file.h"
 
 namespace art {
@@ -93,7 +95,7 @@
   if (kReadBarrierOption != kWithoutReadBarrier && before_ptr != after_ptr) {
     // If another thread raced and updated the reference, do not store the read barrier updated
     // one.
-    data_.CompareExchangeStrongRelaxed(before, Encode(after_ptr, MaskHash(before)));
+    data_.CompareExchangeStrongRelease(before, Encode(after_ptr, MaskHash(before)));
   }
   return after_ptr.Ptr();
 }
@@ -108,7 +110,7 @@
   if (before_ptr != after_ptr) {
     // If another thread raced and updated the reference, do not store the read barrier updated
     // one.
-    data_.CompareExchangeStrongRelaxed(before, Encode(after_ptr, MaskHash(before)));
+    data_.CompareExchangeStrongRelease(before, Encode(after_ptr, MaskHash(before)));
   }
 }
 
diff --git a/runtime/class_table.cc b/runtime/class_table.cc
index 0891d3f..b71610a 100644
--- a/runtime/class_table.cc
+++ b/runtime/class_table.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "class_table.h"
+#include "class_table-inl.h"
 
 #include "mirror/class-inl.h"
 #include "oat_file.h"
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index d0b50fe..cfa56a5 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -38,7 +38,7 @@
 #include "gc/scoped_gc_critical_section.h"
 #include "gc/space/large_object_space.h"
 #include "gc/space/space-inl.h"
-#include "handle_scope.h"
+#include "handle_scope-inl.h"
 #include "jdwp/jdwp_priv.h"
 #include "jdwp/object_registry.h"
 #include "jni_internal.h"
@@ -56,7 +56,7 @@
 #include "scoped_thread_state_change-inl.h"
 #include "ScopedLocalRef.h"
 #include "ScopedPrimitiveArray.h"
-#include "handle_scope-inl.h"
+#include "stack.h"
 #include "thread_list.h"
 #include "utf.h"
 #include "well_known_classes.h"
diff --git a/runtime/dex_cache_resolved_classes.h b/runtime/dex_cache_resolved_classes.h
index bebdf0d..2278b05 100644
--- a/runtime/dex_cache_resolved_classes.h
+++ b/runtime/dex_cache_resolved_classes.h
@@ -30,10 +30,12 @@
  public:
   DexCacheResolvedClasses(const std::string& dex_location,
                           const std::string& base_location,
-                          uint32_t location_checksum)
+                          uint32_t location_checksum,
+                          uint32_t num_method_ids)
       : dex_location_(dex_location),
         base_location_(base_location),
-        location_checksum_(location_checksum) {}
+        location_checksum_(location_checksum),
+        num_method_ids_(num_method_ids) {}
 
   // Only compare the key elements, ignore the resolved classes.
   int Compare(const DexCacheResolvedClasses& other) const {
@@ -69,10 +71,15 @@
     return classes_;
   }
 
+  size_t NumMethodIds() const {
+    return num_method_ids_;
+  }
+
  private:
   const std::string dex_location_;
   const std::string base_location_;
   const uint32_t location_checksum_;
+  const uint32_t num_method_ids_;
   // Array of resolved class def indexes.
   mutable std::unordered_set<dex::TypeIndex> classes_;
 };
diff --git a/runtime/dex_file.h b/runtime/dex_file.h
index 591ba42..3de78ed 100644
--- a/runtime/dex_file.h
+++ b/runtime/dex_file.h
@@ -28,7 +28,6 @@
 #include "invoke_type.h"
 #include "jni.h"
 #include "modifiers.h"
-#include "utf.h"
 
 namespace art {
 
diff --git a/runtime/dex_file_test.cc b/runtime/dex_file_test.cc
index 1d4507a..78d5c5f 100644
--- a/runtime/dex_file_test.cc
+++ b/runtime/dex_file_test.cc
@@ -27,7 +27,7 @@
 #include "mem_map.h"
 #include "os.h"
 #include "scoped_thread_state_change-inl.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 #include "utils.h"
 
 namespace art {
diff --git a/runtime/dex_file_tracking_registrar.cc b/runtime/dex_file_tracking_registrar.cc
new file mode 100644
index 0000000..cfbca3d
--- /dev/null
+++ b/runtime/dex_file_tracking_registrar.cc
@@ -0,0 +1,52 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dex_file_tracking_registrar.h"
+
+// For dex tracking through poisoning. Note: Requires forcing sanitization. This is the reason for
+// the ifdefs and early include.
+#ifdef ART_DEX_FILE_ACCESS_TRACKING
+#ifndef ART_ENABLE_ADDRESS_SANITIZER
+#define ART_ENABLE_ADDRESS_SANITIZER
+#endif
+#endif
+#include "base/memory_tool.h"
+
+#include "base/logging.h"
+
+namespace art {
+namespace dex {
+namespace tracking {
+
+// If true, poison dex files to track accesses.
+static constexpr bool kDexFileAccessTracking =
+#ifdef ART_DEX_FILE_ACCESS_TRACKING
+    true;
+#else
+    false;
+#endif
+
+void RegisterDexFile(const DexFile* const dex_file) {
+  if (kDexFileAccessTracking && dex_file != nullptr) {
+    LOG(ERROR) << dex_file->GetLocation() + " @ " << std::hex
+               << reinterpret_cast<uintptr_t>(dex_file->Begin());
+    MEMORY_TOOL_MAKE_NOACCESS(dex_file->Begin(), dex_file->Size());
+  }
+}
+
+}  // namespace tracking
+}  // namespace dex
+}  // namespace art
diff --git a/runtime/dex_file_tracking_registrar.h b/runtime/dex_file_tracking_registrar.h
new file mode 100644
index 0000000..7d5d78d
--- /dev/null
+++ b/runtime/dex_file_tracking_registrar.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_DEX_FILE_TRACKING_REGISTRAR_H_
+#define ART_RUNTIME_DEX_FILE_TRACKING_REGISTRAR_H_
+
+#include "dex_file.h"
+
+namespace art {
+namespace dex {
+namespace tracking {
+
+void RegisterDexFile(const DexFile* const dex_file);
+
+}  // namespace tracking
+}  // namespace dex
+}  // namespace art
+
+#endif  // ART_RUNTIME_DEX_FILE_TRACKING_REGISTRAR_H_
diff --git a/runtime/dex_file_verifier_test.cc b/runtime/dex_file_verifier_test.cc
index 068e122..0e58e6d 100644
--- a/runtime/dex_file_verifier_test.cc
+++ b/runtime/dex_file_verifier_test.cc
@@ -29,7 +29,7 @@
 #include "dex_file_types.h"
 #include "leb128.h"
 #include "scoped_thread_state_change-inl.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 #include "utils.h"
 
 namespace art {
diff --git a/runtime/dex_method_iterator_test.cc b/runtime/dex_method_iterator_test.cc
index cd8c390..e83829b 100644
--- a/runtime/dex_method_iterator_test.cc
+++ b/runtime/dex_method_iterator_test.cc
@@ -20,7 +20,7 @@
 #include "common_runtime_test.h"
 #include "oat_file.h"
 #include "scoped_thread_state_change-inl.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 
 namespace art {
 
diff --git a/runtime/entrypoints/jni/jni_entrypoints.cc b/runtime/entrypoints/jni/jni_entrypoints.cc
index eeb138b..dd0819e 100644
--- a/runtime/entrypoints/jni/jni_entrypoints.cc
+++ b/runtime/entrypoints/jni/jni_entrypoints.cc
@@ -42,12 +42,11 @@
   // otherwise we return the address of the method we found.
   void* native_code = soa.Vm()->FindCodeForNativeMethod(method);
   if (native_code == nullptr) {
-    DCHECK(self->IsExceptionPending());
+    self->AssertPendingException();
     return nullptr;
-  } else {
-    // Register so that future calls don't come here
-    return method->RegisterNative(native_code, false);
   }
+  // Register so that future calls don't come here
+  return method->RegisterNative(native_code, false);
 }
 
 }  // namespace art
diff --git a/runtime/entrypoints/quick/callee_save_frame.h b/runtime/entrypoints/quick/callee_save_frame.h
index df37f95..c94bf4a 100644
--- a/runtime/entrypoints/quick/callee_save_frame.h
+++ b/runtime/entrypoints/quick/callee_save_frame.h
@@ -21,7 +21,7 @@
 #include "base/enums.h"
 #include "base/mutex.h"
 #include "runtime.h"
-#include "thread-inl.h"
+#include "thread.h"
 
 // Specific frame size code is in architecture-specific files. We include this to compile-time
 // specialize the code.
@@ -46,13 +46,6 @@
     }
   }
 
-  ScopedQuickEntrypointChecks() REQUIRES_SHARED(Locks::mutator_lock_)
-      : self_(kIsDebugBuild ? Thread::Current() : nullptr), exit_check_(kIsDebugBuild) {
-    if (kIsDebugBuild) {
-      TestsOnEntry();
-    }
-  }
-
   ~ScopedQuickEntrypointChecks() REQUIRES_SHARED(Locks::mutator_lock_) {
     if (exit_check_) {
       TestsOnExit();
diff --git a/runtime/entrypoints/quick/quick_instrumentation_entrypoints.cc b/runtime/entrypoints/quick/quick_instrumentation_entrypoints.cc
index 81560cc..aa1ebb7 100644
--- a/runtime/entrypoints/quick/quick_instrumentation_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_instrumentation_entrypoints.cc
@@ -21,7 +21,7 @@
 #include "instrumentation.h"
 #include "mirror/object-inl.h"
 #include "runtime.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 
 namespace art {
 
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index 2b349e3..90231e2 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -18,6 +18,7 @@
 #include "base/enums.h"
 #include "callee_save_frame.h"
 #include "common_throws.h"
+#include "debugger.h"
 #include "dex_file-inl.h"
 #include "dex_instruction-inl.h"
 #include "entrypoints/entrypoint_utils-inl.h"
@@ -40,7 +41,7 @@
 #include "runtime.h"
 #include "scoped_thread_state_change-inl.h"
 #include "stack.h"
-#include "debugger.h"
+#include "thread-inl.h"
 #include "well_known_classes.h"
 
 namespace art {
diff --git a/runtime/fault_handler.cc b/runtime/fault_handler.cc
index 5594f4d..fd0cd5f 100644
--- a/runtime/fault_handler.cc
+++ b/runtime/fault_handler.cc
@@ -27,7 +27,7 @@
 #include "mirror/object_reference.h"
 #include "oat_quick_method_header.h"
 #include "sigchain.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 #include "verify_object-inl.h"
 
 namespace art {
diff --git a/runtime/gc/accounting/bitmap.h b/runtime/gc/accounting/bitmap.h
index eb00472..d039d88 100644
--- a/runtime/gc/accounting/bitmap.h
+++ b/runtime/gc/accounting/bitmap.h
@@ -25,7 +25,6 @@
 
 #include "base/mutex.h"
 #include "globals.h"
-#include "object_callbacks.h"
 
 namespace art {
 
diff --git a/runtime/gc/accounting/heap_bitmap.h b/runtime/gc/accounting/heap_bitmap.h
index 76247bc..7097f87 100644
--- a/runtime/gc/accounting/heap_bitmap.h
+++ b/runtime/gc/accounting/heap_bitmap.h
@@ -19,7 +19,6 @@
 
 #include "base/allocator.h"
 #include "base/logging.h"
-#include "object_callbacks.h"
 #include "space_bitmap.h"
 
 namespace art {
diff --git a/runtime/gc/accounting/mod_union_table.cc b/runtime/gc/accounting/mod_union_table.cc
index c416b9c..57c290e 100644
--- a/runtime/gc/accounting/mod_union_table.cc
+++ b/runtime/gc/accounting/mod_union_table.cc
@@ -28,7 +28,7 @@
 #include "mirror/object-inl.h"
 #include "mirror/object-refvisitor-inl.h"
 #include "space_bitmap-inl.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 
 namespace art {
 namespace gc {
diff --git a/runtime/gc/accounting/mod_union_table_test.cc b/runtime/gc/accounting/mod_union_table_test.cc
index 48a8742..e5b8ea5 100644
--- a/runtime/gc/accounting/mod_union_table_test.cc
+++ b/runtime/gc/accounting/mod_union_table_test.cc
@@ -21,7 +21,7 @@
 #include "gc/space/space-inl.h"
 #include "mirror/array-inl.h"
 #include "space_bitmap-inl.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 #include "thread_list.h"
 
 namespace art {
diff --git a/runtime/gc/accounting/remembered_set.h b/runtime/gc/accounting/remembered_set.h
index 5594781..c332f96 100644
--- a/runtime/gc/accounting/remembered_set.h
+++ b/runtime/gc/accounting/remembered_set.h
@@ -19,7 +19,6 @@
 
 #include "base/allocator.h"
 #include "globals.h"
-#include "object_callbacks.h"
 #include "safe_map.h"
 
 #include <set>
diff --git a/runtime/gc/accounting/space_bitmap.h b/runtime/gc/accounting/space_bitmap.h
index b136488..889f57b 100644
--- a/runtime/gc/accounting/space_bitmap.h
+++ b/runtime/gc/accounting/space_bitmap.h
@@ -25,7 +25,6 @@
 
 #include "base/mutex.h"
 #include "globals.h"
-#include "object_callbacks.h"
 
 namespace art {
 
@@ -35,6 +34,9 @@
 }  // namespace mirror
 class MemMap;
 
+// Same as in object_callbacks.h. Just avoid the include.
+typedef void (ObjectCallback)(mirror::Object* obj, void* arg);
+
 namespace gc {
 namespace accounting {
 
diff --git a/runtime/gc/allocation_listener.h b/runtime/gc/allocation_listener.h
index d694a68..21fa214 100644
--- a/runtime/gc/allocation_listener.h
+++ b/runtime/gc/allocation_listener.h
@@ -23,7 +23,6 @@
 #include "base/macros.h"
 #include "base/mutex.h"
 #include "obj_ptr.h"
-#include "object_callbacks.h"
 #include "gc_root.h"
 
 namespace art {
diff --git a/runtime/gc/allocation_record.cc b/runtime/gc/allocation_record.cc
index 122f779..2257b81 100644
--- a/runtime/gc/allocation_record.cc
+++ b/runtime/gc/allocation_record.cc
@@ -20,6 +20,7 @@
 #include "base/enums.h"
 #include "base/stl_util.h"
 #include "obj_ptr-inl.h"
+#include "object_callbacks.h"
 #include "stack.h"
 
 #ifdef ART_TARGET_ANDROID
diff --git a/runtime/gc/allocation_record.h b/runtime/gc/allocation_record.h
index 227c7ad..d31e442 100644
--- a/runtime/gc/allocation_record.h
+++ b/runtime/gc/allocation_record.h
@@ -22,12 +22,12 @@
 
 #include "base/mutex.h"
 #include "obj_ptr.h"
-#include "object_callbacks.h"
 #include "gc_root.h"
 
 namespace art {
 
 class ArtMethod;
+class IsMarkedVisitor;
 class Thread;
 
 namespace mirror {
diff --git a/runtime/gc/allocator/rosalloc.cc b/runtime/gc/allocator/rosalloc.cc
index 35a251f..d5d3540 100644
--- a/runtime/gc/allocator/rosalloc.cc
+++ b/runtime/gc/allocator/rosalloc.cc
@@ -30,7 +30,7 @@
 #include "mirror/class-inl.h"
 #include "mirror/object.h"
 #include "mirror/object-inl.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 #include "thread_list.h"
 
 namespace art {
diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc
index ef843c6..c0d6481 100644
--- a/runtime/gc/collector/concurrent_copying.cc
+++ b/runtime/gc/collector/concurrent_copying.cc
@@ -359,7 +359,7 @@
     ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
     // We can use the non-CAS VisitRoots functions below because we update thread-local GC roots
     // only.
-    thread->VisitRoots(this);
+    thread->VisitRoots(this, kVisitRootFlagAllRoots);
     concurrent_copying_->GetBarrier().Pass(self);
   }
 
@@ -2086,8 +2086,11 @@
       // It was updated by the mutator.
       break;
     }
-  } while (!obj->CasFieldWeakRelaxedObjectWithoutWriteBarrier<
-      false, false, kVerifyNone>(offset, expected_ref, new_ref));
+    // Use release cas to make sure threads reading the reference see contents of copied objects.
+  } while (!obj->CasFieldWeakReleaseObjectWithoutWriteBarrier<false, false, kVerifyNone>(
+      offset,
+      expected_ref,
+      new_ref));
 }
 
 // Process some roots.
diff --git a/runtime/gc/collector/concurrent_copying.h b/runtime/gc/collector/concurrent_copying.h
index f8ca8db..7b4340e 100644
--- a/runtime/gc/collector/concurrent_copying.h
+++ b/runtime/gc/collector/concurrent_copying.h
@@ -21,9 +21,7 @@
 #include "garbage_collector.h"
 #include "immune_spaces.h"
 #include "jni.h"
-#include "object_callbacks.h"
 #include "offsets.h"
-#include "mirror/object.h"
 #include "mirror/object_reference.h"
 #include "safe_map.h"
 
@@ -34,6 +32,10 @@
 class Closure;
 class RootInfo;
 
+namespace mirror {
+class Object;
+}  // namespace mirror
+
 namespace gc {
 
 namespace accounting {
diff --git a/runtime/gc/collector/garbage_collector.cc b/runtime/gc/collector/garbage_collector.cc
index 1e4196b..c5a341f 100644
--- a/runtime/gc/collector/garbage_collector.cc
+++ b/runtime/gc/collector/garbage_collector.cc
@@ -31,7 +31,8 @@
 #include "gc/heap.h"
 #include "gc/space/large_object_space.h"
 #include "gc/space/space-inl.h"
-#include "thread-inl.h"
+#include "runtime.h"
+#include "thread-current-inl.h"
 #include "thread_list.h"
 #include "utils.h"
 
diff --git a/runtime/gc/collector/immune_spaces_test.cc b/runtime/gc/collector/immune_spaces_test.cc
index acb4f57..9823708 100644
--- a/runtime/gc/collector/immune_spaces_test.cc
+++ b/runtime/gc/collector/immune_spaces_test.cc
@@ -21,7 +21,7 @@
 #include "gc/space/image_space.h"
 #include "gc/space/space-inl.h"
 #include "oat_file.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 
 namespace art {
 namespace mirror {
diff --git a/runtime/gc/collector/mark_compact.cc b/runtime/gc/collector/mark_compact.cc
index 9d3d950..aef98de 100644
--- a/runtime/gc/collector/mark_compact.cc
+++ b/runtime/gc/collector/mark_compact.cc
@@ -32,7 +32,7 @@
 #include "mirror/object-refvisitor-inl.h"
 #include "runtime.h"
 #include "stack.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 #include "thread_list.h"
 
 namespace art {
diff --git a/runtime/gc/collector/mark_compact.h b/runtime/gc/collector/mark_compact.h
index 85727c2..0bf4095 100644
--- a/runtime/gc/collector/mark_compact.h
+++ b/runtime/gc/collector/mark_compact.h
@@ -28,7 +28,6 @@
 #include "gc/accounting/heap_bitmap.h"
 #include "immune_spaces.h"
 #include "lock_word.h"
-#include "object_callbacks.h"
 #include "offsets.h"
 
 namespace art {
diff --git a/runtime/gc/collector/mark_sweep.cc b/runtime/gc/collector/mark_sweep.cc
index f591cf0..fb82b4d 100644
--- a/runtime/gc/collector/mark_sweep.cc
+++ b/runtime/gc/collector/mark_sweep.cc
@@ -42,7 +42,7 @@
 #include "mirror/object-inl.h"
 #include "runtime.h"
 #include "scoped_thread_state_change-inl.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 #include "thread_list.h"
 
 namespace art {
@@ -1141,7 +1141,7 @@
     Thread* const self = Thread::Current();
     CHECK(thread == self || thread->IsSuspended() || thread->GetState() == kWaitingPerformingGc)
         << thread->GetState() << " thread " << thread << " self " << self;
-    thread->VisitRoots(this);
+    thread->VisitRoots(this, kVisitRootFlagAllRoots);
     if (revoke_ros_alloc_thread_local_buffers_at_checkpoint_) {
       ScopedTrace trace2("RevokeRosAllocThreadLocalBuffers");
       mark_sweep_->GetHeap()->RevokeRosAllocThreadLocalBuffers(thread);
diff --git a/runtime/gc/collector/mark_sweep.h b/runtime/gc/collector/mark_sweep.h
index 5a9b9f8..b9e06f9 100644
--- a/runtime/gc/collector/mark_sweep.h
+++ b/runtime/gc/collector/mark_sweep.h
@@ -27,7 +27,6 @@
 #include "gc_root.h"
 #include "gc/accounting/heap_bitmap.h"
 #include "immune_spaces.h"
-#include "object_callbacks.h"
 #include "offsets.h"
 
 namespace art {
diff --git a/runtime/gc/collector/partial_mark_sweep.cc b/runtime/gc/collector/partial_mark_sweep.cc
index 9847794..f6ca867 100644
--- a/runtime/gc/collector/partial_mark_sweep.cc
+++ b/runtime/gc/collector/partial_mark_sweep.cc
@@ -19,7 +19,7 @@
 #include "gc/heap.h"
 #include "gc/space/space.h"
 #include "partial_mark_sweep.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 
 namespace art {
 namespace gc {
diff --git a/runtime/gc/collector/semi_space.h b/runtime/gc/collector/semi_space.h
index 9d6e74d..d3858ba 100644
--- a/runtime/gc/collector/semi_space.h
+++ b/runtime/gc/collector/semi_space.h
@@ -27,7 +27,6 @@
 #include "gc/accounting/heap_bitmap.h"
 #include "immune_spaces.h"
 #include "mirror/object_reference.h"
-#include "object_callbacks.h"
 #include "offsets.h"
 
 namespace art {
diff --git a/runtime/gc/collector/sticky_mark_sweep.cc b/runtime/gc/collector/sticky_mark_sweep.cc
index 428e387..98fdfac 100644
--- a/runtime/gc/collector/sticky_mark_sweep.cc
+++ b/runtime/gc/collector/sticky_mark_sweep.cc
@@ -21,7 +21,8 @@
 #include "gc/heap.h"
 #include "gc/space/large_object_space.h"
 #include "gc/space/space-inl.h"
-#include "thread-inl.h"
+#include "runtime.h"
+#include "thread-current-inl.h"
 
 namespace art {
 namespace gc {
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index 24f4ce2..0289250 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -34,7 +34,6 @@
 #include "globals.h"
 #include "handle.h"
 #include "obj_ptr.h"
-#include "object_callbacks.h"
 #include "offsets.h"
 #include "process_state.h"
 #include "safe_map.h"
@@ -43,6 +42,7 @@
 namespace art {
 
 class ConditionVariable;
+class IsMarkedVisitor;
 class Mutex;
 class RootVisitor;
 class StackVisitor;
@@ -51,6 +51,9 @@
 class TimingLogger;
 class VariableSizedHandleScope;
 
+// Same as in object_callbacks.h. Just avoid the include.
+typedef void (ObjectCallback)(mirror::Object* obj, void* arg);
+
 namespace mirror {
   class Class;
   class Object;
diff --git a/runtime/gc/heap_verification_test.cc b/runtime/gc/heap_verification_test.cc
index 2cdfc16..a307c51 100644
--- a/runtime/gc/heap_verification_test.cc
+++ b/runtime/gc/heap_verification_test.cc
@@ -17,7 +17,7 @@
 #include "common_runtime_test.h"
 
 #include "base/memory_tool.h"
-#include "class_linker.h"
+#include "class_linker-inl.h"
 #include "handle_scope-inl.h"
 #include "mirror/object-inl.h"
 #include "mirror/object_array-inl.h"
diff --git a/runtime/gc/reference_processor-inl.h b/runtime/gc/reference_processor-inl.h
index f619a15..0f47d3d 100644
--- a/runtime/gc/reference_processor-inl.h
+++ b/runtime/gc/reference_processor-inl.h
@@ -19,6 +19,8 @@
 
 #include "reference_processor.h"
 
+#include "mirror/reference-inl.h"
+
 namespace art {
 namespace gc {
 
diff --git a/runtime/gc/reference_processor.cc b/runtime/gc/reference_processor.cc
index 886c950..52da763 100644
--- a/runtime/gc/reference_processor.cc
+++ b/runtime/gc/reference_processor.cc
@@ -22,6 +22,7 @@
 #include "mirror/class-inl.h"
 #include "mirror/object-inl.h"
 #include "mirror/reference-inl.h"
+#include "object_callbacks.h"
 #include "reference_processor-inl.h"
 #include "reflection.h"
 #include "ScopedLocalRef.h"
diff --git a/runtime/gc/reference_processor.h b/runtime/gc/reference_processor.h
index 38b68cb..a8135d9 100644
--- a/runtime/gc/reference_processor.h
+++ b/runtime/gc/reference_processor.h
@@ -20,11 +20,11 @@
 #include "base/mutex.h"
 #include "globals.h"
 #include "jni.h"
-#include "object_callbacks.h"
 #include "reference_queue.h"
 
 namespace art {
 
+class IsMarkedVisitor;
 class TimingLogger;
 
 namespace mirror {
diff --git a/runtime/gc/reference_queue.cc b/runtime/gc/reference_queue.cc
index fd5dcf9..321d22a 100644
--- a/runtime/gc/reference_queue.cc
+++ b/runtime/gc/reference_queue.cc
@@ -22,6 +22,7 @@
 #include "mirror/class-inl.h"
 #include "mirror/object-inl.h"
 #include "mirror/reference-inl.h"
+#include "object_callbacks.h"
 
 namespace art {
 namespace gc {
diff --git a/runtime/gc/reference_queue.h b/runtime/gc/reference_queue.h
index b73a880..c48d48c 100644
--- a/runtime/gc/reference_queue.h
+++ b/runtime/gc/reference_queue.h
@@ -27,7 +27,6 @@
 #include "globals.h"
 #include "jni.h"
 #include "obj_ptr.h"
-#include "object_callbacks.h"
 #include "offsets.h"
 #include "thread_pool.h"
 
@@ -36,6 +35,9 @@
 class Reference;
 }  // namespace mirror
 
+class IsMarkedVisitor;
+class MarkObjectVisitor;
+
 namespace gc {
 
 namespace collector {
diff --git a/runtime/gc/scoped_gc_critical_section.cc b/runtime/gc/scoped_gc_critical_section.cc
index f937d2c..2976dd0 100644
--- a/runtime/gc/scoped_gc_critical_section.cc
+++ b/runtime/gc/scoped_gc_critical_section.cc
@@ -19,7 +19,7 @@
 #include "gc/collector_type.h"
 #include "gc/heap.h"
 #include "runtime.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 
 namespace art {
 namespace gc {
diff --git a/runtime/gc/space/bump_pointer_space.h b/runtime/gc/space/bump_pointer_space.h
index e9982e9..566dc5d 100644
--- a/runtime/gc/space/bump_pointer_space.h
+++ b/runtime/gc/space/bump_pointer_space.h
@@ -17,10 +17,17 @@
 #ifndef ART_RUNTIME_GC_SPACE_BUMP_POINTER_SPACE_H_
 #define ART_RUNTIME_GC_SPACE_BUMP_POINTER_SPACE_H_
 
-#include "object_callbacks.h"
 #include "space.h"
 
 namespace art {
+
+namespace mirror {
+class Object;
+}
+
+// Same as in object_callbacks.h. Just avoid the include.
+typedef void (ObjectCallback)(mirror::Object* obj, void* arg);
+
 namespace gc {
 
 namespace collector {
diff --git a/runtime/gc/space/dlmalloc_space.cc b/runtime/gc/space/dlmalloc_space.cc
index 9282ec7..7ec54f5 100644
--- a/runtime/gc/space/dlmalloc_space.cc
+++ b/runtime/gc/space/dlmalloc_space.cc
@@ -26,6 +26,7 @@
 #include "mirror/class-inl.h"
 #include "mirror/object-inl.h"
 #include "runtime.h"
+#include "scoped_thread_state_change-inl.h"
 #include "thread.h"
 #include "thread_list.h"
 #include "utils.h"
diff --git a/runtime/gc/space/large_object_space.cc b/runtime/gc/space/large_object_space.cc
index 0f856b8..4597a96 100644
--- a/runtime/gc/space/large_object_space.cc
+++ b/runtime/gc/space/large_object_space.cc
@@ -31,7 +31,7 @@
 #include "os.h"
 #include "scoped_thread_state_change-inl.h"
 #include "space-inl.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 
 namespace art {
 namespace gc {
diff --git a/runtime/gc/space/region_space-inl.h b/runtime/gc/space/region_space-inl.h
index 6116160..fc24fc2 100644
--- a/runtime/gc/space/region_space-inl.h
+++ b/runtime/gc/space/region_space-inl.h
@@ -18,7 +18,7 @@
 #define ART_RUNTIME_GC_SPACE_REGION_SPACE_INL_H_
 
 #include "region_space.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 
 namespace art {
 namespace gc {
diff --git a/runtime/gc/space/region_space.cc b/runtime/gc/space/region_space.cc
index 27f30e0..8d8c488 100644
--- a/runtime/gc/space/region_space.cc
+++ b/runtime/gc/space/region_space.cc
@@ -255,13 +255,28 @@
   MutexLock mu(Thread::Current(), region_lock_);
   VerifyNonFreeRegionLimit();
   size_t new_non_free_region_index_limit = 0;
+
+  // Combine zeroing and releasing pages to reduce how often madvise is called. This helps
+  // reduce contention on the mmap semaphore. b/62194020
+  // clear_region adds a region to the current block. If the region is not adjacent, the
+  // clear block is zeroed, released, and a new block begins.
+  uint8_t* clear_block_begin = nullptr;
+  uint8_t* clear_block_end = nullptr;
+  auto clear_region = [&clear_block_begin, &clear_block_end](Region* r) {
+    r->Clear(/*zero_and_release_pages*/false);
+    if (clear_block_end != r->Begin()) {
+      ZeroAndReleasePages(clear_block_begin, clear_block_end - clear_block_begin);
+      clear_block_begin = r->Begin();
+    }
+    clear_block_end = r->End();
+  };
   for (size_t i = 0; i < std::min(num_regions_, non_free_region_index_limit_); ++i) {
     Region* r = &regions_[i];
     if (r->IsInFromSpace()) {
       *cleared_bytes += r->BytesAllocated();
       *cleared_objects += r->ObjectsAllocated();
       --num_non_free_regions_;
-      r->Clear();
+      clear_region(r);
     } else if (r->IsInUnevacFromSpace()) {
       if (r->LiveBytes() == 0) {
         // Special case for 0 live bytes, this means all of the objects in the region are dead and
@@ -274,13 +289,13 @@
         // Also release RAM for large tails.
         while (i + free_regions < num_regions_ && regions_[i + free_regions].IsLargeTail()) {
           DCHECK(r->IsLarge());
-          regions_[i + free_regions].Clear();
+          clear_region(&regions_[i + free_regions]);
           ++free_regions;
         }
         *cleared_bytes += r->BytesAllocated();
         *cleared_objects += r->ObjectsAllocated();
         num_non_free_regions_ -= free_regions;
-        r->Clear();
+        clear_region(r);
         GetLiveBitmap()->ClearRange(
             reinterpret_cast<mirror::Object*>(r->Begin()),
             reinterpret_cast<mirror::Object*>(r->Begin() + free_regions * kRegionSize));
@@ -317,6 +332,8 @@
                                                  last_checked_region->Idx() + 1);
     }
   }
+  // Clear pages for the last block since clearing happens when a new block opens.
+  ZeroAndReleasePages(clear_block_begin, clear_block_end - clear_block_begin);
   // Update non_free_region_index_limit_.
   SetNonFreeRegionLimit(new_non_free_region_index_limit);
   evac_region_ = nullptr;
@@ -369,7 +386,7 @@
     if (!r->IsFree()) {
       --num_non_free_regions_;
     }
-    r->Clear();
+    r->Clear(/*zero_and_release_pages*/true);
   }
   SetNonFreeRegionLimit(0);
   current_region_ = &full_region_;
@@ -395,7 +412,7 @@
     } else {
       DCHECK(reg->IsLargeTail());
     }
-    reg->Clear();
+    reg->Clear(/*zero_and_release_pages*/true);
     --num_non_free_regions_;
   }
   if (end_addr < Limit()) {
diff --git a/runtime/gc/space/region_space.h b/runtime/gc/space/region_space.h
index 1d1d27e..323ccdb 100644
--- a/runtime/gc/space/region_space.h
+++ b/runtime/gc/space/region_space.h
@@ -284,14 +284,16 @@
       return type_;
     }
 
-    void Clear() {
+    void Clear(bool zero_and_release_pages) {
       top_.StoreRelaxed(begin_);
       state_ = RegionState::kRegionStateFree;
       type_ = RegionType::kRegionTypeNone;
       objects_allocated_.StoreRelaxed(0);
       alloc_time_ = 0;
       live_bytes_ = static_cast<size_t>(-1);
-      ZeroAndReleasePages(begin_, end_ - begin_);
+      if (zero_and_release_pages) {
+        ZeroAndReleasePages(begin_, end_ - begin_);
+      }
       is_newly_allocated_ = false;
       is_a_tlab_ = false;
       thread_ = nullptr;
diff --git a/runtime/gc/space/rosalloc_space.cc b/runtime/gc/space/rosalloc_space.cc
index 8d8b745..9e900e4 100644
--- a/runtime/gc/space/rosalloc_space.cc
+++ b/runtime/gc/space/rosalloc_space.cc
@@ -24,6 +24,7 @@
 #include "mirror/class-inl.h"
 #include "mirror/object-inl.h"
 #include "runtime.h"
+#include "scoped_thread_state_change-inl.h"
 #include "thread.h"
 #include "thread_list.h"
 #include "utils.h"
diff --git a/runtime/gc/space/space.cc b/runtime/gc/space/space.cc
index 4a078b8..74ce273 100644
--- a/runtime/gc/space/space.cc
+++ b/runtime/gc/space/space.cc
@@ -21,7 +21,7 @@
 #include "gc/accounting/space_bitmap-inl.h"
 #include "gc/heap.h"
 #include "runtime.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 
 namespace art {
 namespace gc {
diff --git a/runtime/gc/space/zygote_space.cc b/runtime/gc/space/zygote_space.cc
index bbfcb31..fddb3f2 100644
--- a/runtime/gc/space/zygote_space.cc
+++ b/runtime/gc/space/zygote_space.cc
@@ -16,10 +16,12 @@
 
 #include "zygote_space.h"
 
+#include "base/mutex-inl.h"
 #include "gc/accounting/card_table-inl.h"
 #include "gc/accounting/space_bitmap-inl.h"
 #include "gc/heap.h"
-#include "thread-inl.h"
+#include "runtime.h"
+#include "thread-current-inl.h"
 #include "utils.h"
 
 namespace art {
diff --git a/runtime/gc/task_processor_test.cc b/runtime/gc/task_processor_test.cc
index f1d26d9..5a75b37 100644
--- a/runtime/gc/task_processor_test.cc
+++ b/runtime/gc/task_processor_test.cc
@@ -18,7 +18,7 @@
 #include "common_runtime_test.h"
 #include "task_processor.h"
 #include "thread_pool.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 
 namespace art {
 namespace gc {
diff --git a/runtime/handle_scope-inl.h b/runtime/handle_scope-inl.h
index 492d4b4..d091e7f 100644
--- a/runtime/handle_scope-inl.h
+++ b/runtime/handle_scope-inl.h
@@ -22,7 +22,7 @@
 #include "base/mutex.h"
 #include "handle.h"
 #include "obj_ptr-inl.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 #include "verify_object.h"
 
 namespace art {
diff --git a/runtime/imtable_test.cc b/runtime/imtable_test.cc
index 17149df..d482183 100644
--- a/runtime/imtable_test.cc
+++ b/runtime/imtable_test.cc
@@ -29,7 +29,7 @@
 #include "mirror/class_loader.h"
 #include "handle_scope-inl.h"
 #include "scoped_thread_state_change-inl.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 
 namespace art {
 
diff --git a/runtime/indirect_reference_table.cc b/runtime/indirect_reference_table.cc
index c852d5a..cff3ea7 100644
--- a/runtime/indirect_reference_table.cc
+++ b/runtime/indirect_reference_table.cc
@@ -34,6 +34,9 @@
 static constexpr bool kDumpStackOnNonLocalReference = false;
 static constexpr bool kDebugIRT = false;
 
+// Maximum table size we allow.
+static constexpr size_t kMaxTableSizeInBytes = 128 * MB;
+
 const char* GetIndirectRefKindString(const IndirectRefKind& kind) {
   switch (kind) {
     case kHandleScopeOrInvalid:
@@ -71,6 +74,9 @@
   CHECK(error_msg != nullptr);
   CHECK_NE(desired_kind, kHandleScopeOrInvalid);
 
+  // Overflow and maximum check.
+  CHECK_LE(max_count, kMaxTableSizeInBytes / sizeof(IrtEntry));
+
   const size_t table_bytes = max_count * sizeof(IrtEntry);
   table_mem_map_.reset(MemMap::MapAnonymous("indirect ref table", nullptr, table_bytes,
                                             PROT_READ | PROT_WRITE, false, false, error_msg));
@@ -203,6 +209,13 @@
 bool IndirectReferenceTable::Resize(size_t new_size, std::string* error_msg) {
   CHECK_GT(new_size, max_entries_);
 
+  constexpr size_t kMaxEntries = kMaxTableSizeInBytes / sizeof(IrtEntry);
+  if (new_size > kMaxEntries) {
+    *error_msg = android::base::StringPrintf("Requested size exceeds maximum: %zu", new_size);
+    return false;
+  }
+  // Note: the above check also ensures that there is no overflow below.
+
   const size_t table_bytes = new_size * sizeof(IrtEntry);
   std::unique_ptr<MemMap> new_map(MemMap::MapAnonymous("indirect ref table",
                                                        nullptr,
@@ -247,6 +260,14 @@
     }
 
     // Try to double space.
+    if (std::numeric_limits<size_t>::max() / 2 < max_entries_) {
+      LOG(FATAL) << "JNI ERROR (app bug): " << kind_ << " table overflow "
+                 << "(max=" << max_entries_ << ")" << std::endl
+                 << MutatorLockedDumpable<IndirectReferenceTable>(*this)
+                << " Resizing failed: exceeds size_t";
+      UNREACHABLE();
+    }
+
     std::string error_msg;
     if (!Resize(max_entries_ * 2, &error_msg)) {
       LOG(FATAL) << "JNI ERROR (app bug): " << kind_ << " table overflow "
@@ -453,4 +474,38 @@
   segment_state_ = new_state;
 }
 
+bool IndirectReferenceTable::EnsureFreeCapacity(size_t free_capacity, std::string* error_msg) {
+  size_t top_index = segment_state_.top_index;
+  if (top_index < max_entries_ && top_index + free_capacity <= max_entries_) {
+    return true;
+  }
+
+  // We're only gonna do a simple best-effort here, ensuring the asked-for capacity at the end.
+  if (resizable_ == ResizableCapacity::kNo) {
+    *error_msg = "Table is not resizable";
+    return false;
+  }
+
+  // Try to increase the table size.
+
+  // Would this overflow?
+  if (std::numeric_limits<size_t>::max() - free_capacity < top_index) {
+    *error_msg = "Cannot resize table, overflow.";
+    return false;
+  }
+
+  if (!Resize(top_index + free_capacity, error_msg)) {
+    LOG(WARNING) << "JNI ERROR: Unable to reserve space in EnsureFreeCapacity (" << free_capacity
+                 << "): " << std::endl
+                 << MutatorLockedDumpable<IndirectReferenceTable>(*this)
+                 << " Resizing failed: " << *error_msg;
+    return false;
+  }
+  return true;
+}
+
+size_t IndirectReferenceTable::FreeCapacity() {
+  return max_entries_ - segment_state_.top_index;
+}
+
 }  // namespace art
diff --git a/runtime/indirect_reference_table.h b/runtime/indirect_reference_table.h
index 7e452a2..6d52d95 100644
--- a/runtime/indirect_reference_table.h
+++ b/runtime/indirect_reference_table.h
@@ -28,7 +28,6 @@
 #include "base/mutex.h"
 #include "gc_root.h"
 #include "obj_ptr.h"
-#include "object_callbacks.h"
 #include "offsets.h"
 #include "read_barrier_option.h"
 
@@ -285,6 +284,13 @@
     return segment_state_.top_index;
   }
 
+  // Ensure that at least free_capacity elements are available, or return false.
+  bool EnsureFreeCapacity(size_t free_capacity, std::string* error_msg)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+  // See implementation of EnsureFreeCapacity. We'll only state here how much is trivially free,
+  // without recovering holes. Thus this is a conservative estimate.
+  size_t FreeCapacity() REQUIRES_SHARED(Locks::mutator_lock_);
+
   // Note IrtIterator does not have a read barrier as it's used to visit roots.
   IrtIterator begin() {
     return IrtIterator(table_, 0, Capacity());
diff --git a/runtime/instrumentation_test.cc b/runtime/instrumentation_test.cc
index 7f9f04f..9926ee7 100644
--- a/runtime/instrumentation_test.cc
+++ b/runtime/instrumentation_test.cc
@@ -27,7 +27,7 @@
 #include "runtime.h"
 #include "scoped_thread_state_change-inl.h"
 #include "thread_list.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 
 namespace art {
 namespace instrumentation {
diff --git a/runtime/intern_table.cc b/runtime/intern_table.cc
index 3e19146..2bac231 100644
--- a/runtime/intern_table.cc
+++ b/runtime/intern_table.cc
@@ -27,6 +27,8 @@
 #include "mirror/object_array-inl.h"
 #include "mirror/object-inl.h"
 #include "mirror/string-inl.h"
+#include "object_callbacks.h"
+#include "scoped_thread_state_change-inl.h"
 #include "thread.h"
 #include "utf.h"
 
diff --git a/runtime/intern_table.h b/runtime/intern_table.h
index 68454fb..2ec03be 100644
--- a/runtime/intern_table.h
+++ b/runtime/intern_table.h
@@ -25,10 +25,11 @@
 #include "base/mutex.h"
 #include "gc_root.h"
 #include "gc/weak_root_state.h"
-#include "object_callbacks.h"
 
 namespace art {
 
+class IsMarkedVisitor;
+
 namespace gc {
 namespace space {
 class ImageSpace;
diff --git a/runtime/intern_table_test.cc b/runtime/intern_table_test.cc
index 311515c..bb27b34 100644
--- a/runtime/intern_table_test.cc
+++ b/runtime/intern_table_test.cc
@@ -23,6 +23,7 @@
 #include "handle_scope-inl.h"
 #include "mirror/string.h"
 #include "scoped_thread_state_change-inl.h"
+#include "utf.h"
 
 namespace art {
 
diff --git a/runtime/interpreter/interpreter.cc b/runtime/interpreter/interpreter.cc
index d2f5232..4bc0f2f 100644
--- a/runtime/interpreter/interpreter.cc
+++ b/runtime/interpreter/interpreter.cc
@@ -22,15 +22,16 @@
 #include "interpreter_common.h"
 #include "interpreter_mterp_impl.h"
 #include "interpreter_switch_impl.h"
+#include "jit/jit.h"
+#include "jit/jit_code_cache.h"
 #include "jvalue-inl.h"
 #include "mirror/string-inl.h"
+#include "mterp/mterp.h"
 #include "scoped_thread_state_change-inl.h"
 #include "ScopedLocalRef.h"
 #include "stack.h"
+#include "thread-inl.h"
 #include "unstarted_runtime.h"
-#include "mterp/mterp.h"
-#include "jit/jit.h"
-#include "jit/jit_code_cache.h"
 
 namespace art {
 namespace interpreter {
diff --git a/runtime/interpreter/interpreter_common.cc b/runtime/interpreter/interpreter_common.cc
index 084cb42..d06ac23 100644
--- a/runtime/interpreter/interpreter_common.cc
+++ b/runtime/interpreter/interpreter_common.cc
@@ -32,6 +32,7 @@
 #include "reflection.h"
 #include "reflection-inl.h"
 #include "stack.h"
+#include "thread-inl.h"
 #include "well_known_classes.h"
 
 namespace art {
diff --git a/runtime/interpreter/unstarted_runtime.cc b/runtime/interpreter/unstarted_runtime.cc
index 96934bc..152cce4 100644
--- a/runtime/interpreter/unstarted_runtime.cc
+++ b/runtime/interpreter/unstarted_runtime.cc
@@ -50,7 +50,7 @@
 #include "mirror/string-inl.h"
 #include "nth_caller_visitor.h"
 #include "reflection.h"
-#include "thread.h"
+#include "thread-inl.h"
 #include "transaction.h"
 #include "well_known_classes.h"
 #include "zip_archive.h"
diff --git a/runtime/invoke_type.h b/runtime/invoke_type.h
index de07c72..a003f7f 100644
--- a/runtime/invoke_type.h
+++ b/runtime/invoke_type.h
@@ -21,7 +21,7 @@
 
 namespace art {
 
-enum InvokeType {
+enum InvokeType : uint32_t {
   kStatic,     // <<static>>
   kDirect,     // <<direct>>
   kVirtual,    // <<virtual>>
diff --git a/runtime/java_vm_ext.cc b/runtime/java_vm_ext.cc
index bd0f842..2ad3b29 100644
--- a/runtime/java_vm_ext.cc
+++ b/runtime/java_vm_ext.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "jni_internal.h"
+#include "java_vm_ext.h"
 
 #include <dlfcn.h>
 
@@ -22,7 +22,7 @@
 
 #include "art_method-inl.h"
 #include "base/dumpable.h"
-#include "base/mutex.h"
+#include "base/mutex-inl.h"
 #include "base/stl_util.h"
 #include "base/systrace.h"
 #include "check_jni.h"
@@ -30,11 +30,12 @@
 #include "fault_handler.h"
 #include "gc_root-inl.h"
 #include "indirect_reference_table-inl.h"
+#include "jni_internal.h"
 #include "mirror/class-inl.h"
 #include "mirror/class_loader.h"
 #include "nativebridge/native_bridge.h"
 #include "nativeloader/native_loader.h"
-#include "java_vm_ext.h"
+#include "object_callbacks.h"
 #include "parsed_options.h"
 #include "runtime-inl.h"
 #include "runtime_options.h"
@@ -145,19 +146,24 @@
     return needs_native_bridge_;
   }
 
-  void* FindSymbol(const std::string& symbol_name, const char* shorty = nullptr) {
+  // No mutator lock since dlsym may block for a while if another thread is doing dlopen.
+  void* FindSymbol(const std::string& symbol_name, const char* shorty = nullptr)
+      REQUIRES(!Locks::mutator_lock_) {
     return NeedsNativeBridge()
         ? FindSymbolWithNativeBridge(symbol_name.c_str(), shorty)
         : FindSymbolWithoutNativeBridge(symbol_name.c_str());
   }
 
-  void* FindSymbolWithoutNativeBridge(const std::string& symbol_name) {
+  // No mutator lock since dlsym may block for a while if another thread is doing dlopen.
+  void* FindSymbolWithoutNativeBridge(const std::string& symbol_name)
+      REQUIRES(!Locks::mutator_lock_) {
     CHECK(!NeedsNativeBridge());
 
     return dlsym(handle_, symbol_name.c_str());
   }
 
-  void* FindSymbolWithNativeBridge(const std::string& symbol_name, const char* shorty) {
+  void* FindSymbolWithNativeBridge(const std::string& symbol_name, const char* shorty)
+      REQUIRES(!Locks::mutator_lock_) {
     CHECK(NeedsNativeBridge());
 
     uint32_t len = 0;
@@ -236,8 +242,8 @@
   }
 
   // See section 11.3 "Linking Native Methods" of the JNI spec.
-  void* FindNativeMethod(ArtMethod* m, std::string& detail)
-      REQUIRES(Locks::jni_libraries_lock_)
+  void* FindNativeMethod(Thread* self, ArtMethod* m, std::string& detail)
+      REQUIRES(!Locks::jni_libraries_lock_)
       REQUIRES_SHARED(Locks::mutator_lock_) {
     std::string jni_short_name(m->JniShortName());
     std::string jni_long_name(m->JniLongName());
@@ -246,25 +252,18 @@
     void* const declaring_class_loader_allocator =
         Runtime::Current()->GetClassLinker()->GetAllocatorForClassLoader(declaring_class_loader);
     CHECK(declaring_class_loader_allocator != nullptr);
-    for (const auto& lib : libraries_) {
-      SharedLibrary* const library = lib.second;
-      // Use the allocator address for class loader equality to avoid unnecessary weak root decode.
-      if (library->GetClassLoaderAllocator() != declaring_class_loader_allocator) {
-        // We only search libraries loaded by the appropriate ClassLoader.
-        continue;
-      }
-      // Try the short name then the long name...
-      const char* shorty = library->NeedsNativeBridge()
-          ? m->GetShorty()
-          : nullptr;
-      void* fn = library->FindSymbol(jni_short_name, shorty);
-      if (fn == nullptr) {
-        fn = library->FindSymbol(jni_long_name, shorty);
-      }
-      if (fn != nullptr) {
-        VLOG(jni) << "[Found native code for " << m->PrettyMethod()
-                  << " in \"" << library->GetPath() << "\"]";
-        return fn;
+    // TODO: Avoid calling GetShorty here to prevent dirtying dex pages?
+    const char* shorty = m->GetShorty();
+    {
+      // Go to suspended since dlsym may block for a long time if other threads are using dlopen.
+      ScopedThreadSuspension sts(self, kNative);
+      void* native_code = FindNativeMethodInternal(self,
+                                                   declaring_class_loader_allocator,
+                                                   shorty,
+                                                   jni_short_name,
+                                                   jni_long_name);
+      if (native_code != nullptr) {
+        return native_code;
       }
     }
     detail += "No implementation found for ";
@@ -273,22 +272,51 @@
     return nullptr;
   }
 
+  void* FindNativeMethodInternal(Thread* self,
+                                 void* declaring_class_loader_allocator,
+                                 const char* shorty,
+                                 const std::string& jni_short_name,
+                                 const std::string& jni_long_name)
+      REQUIRES(!Locks::jni_libraries_lock_)
+      REQUIRES(!Locks::mutator_lock_) {
+    MutexLock mu(self, *Locks::jni_libraries_lock_);
+    for (const auto& lib : libraries_) {
+      SharedLibrary* const library = lib.second;
+      // Use the allocator address for class loader equality to avoid unnecessary weak root decode.
+      if (library->GetClassLoaderAllocator() != declaring_class_loader_allocator) {
+        // We only search libraries loaded by the appropriate ClassLoader.
+        continue;
+      }
+      // Try the short name then the long name...
+      const char* arg_shorty = library->NeedsNativeBridge() ? shorty : nullptr;
+      void* fn = library->FindSymbol(jni_short_name, arg_shorty);
+      if (fn == nullptr) {
+        fn = library->FindSymbol(jni_long_name, arg_shorty);
+      }
+      if (fn != nullptr) {
+        VLOG(jni) << "[Found native code for " << jni_long_name
+                  << " in \"" << library->GetPath() << "\"]";
+        return fn;
+      }
+    }
+    return nullptr;
+  }
+
   // Unload native libraries with cleared class loaders.
   void UnloadNativeLibraries()
       REQUIRES(!Locks::jni_libraries_lock_)
       REQUIRES_SHARED(Locks::mutator_lock_) {
-    ScopedObjectAccessUnchecked soa(Thread::Current());
+    Thread* const self = Thread::Current();
     std::vector<SharedLibrary*> unload_libraries;
     {
-      MutexLock mu(soa.Self(), *Locks::jni_libraries_lock_);
+      MutexLock mu(self, *Locks::jni_libraries_lock_);
       for (auto it = libraries_.begin(); it != libraries_.end(); ) {
         SharedLibrary* const library = it->second;
         // If class loader is null then it was unloaded, call JNI_OnUnload.
         const jweak class_loader = library->GetClassLoader();
         // If class_loader is a null jobject then it is the boot class loader. We should not unload
         // the native libraries of the boot class loader.
-        if (class_loader != nullptr &&
-            soa.Self()->IsJWeakCleared(class_loader)) {
+        if (class_loader != nullptr && self->IsJWeakCleared(class_loader)) {
           unload_libraries.push_back(library);
           it = libraries_.erase(it);
         } else {
@@ -296,6 +324,7 @@
         }
       }
     }
+    ScopedThreadSuspension sts(self, kNative);
     // Do this without holding the jni libraries lock to prevent possible deadlocks.
     typedef void (*JNI_OnUnloadFn)(JavaVM*, void*);
     for (auto library : unload_libraries) {
@@ -305,7 +334,7 @@
       } else {
         VLOG(jni) << "[JNI_OnUnload found for \"" << library->GetPath() << "\"]: Calling...";
         JNI_OnUnloadFn jni_on_unload = reinterpret_cast<JNI_OnUnloadFn>(sym);
-        jni_on_unload(soa.Vm(), nullptr);
+        jni_on_unload(self->GetJniEnv()->vm, nullptr);
       }
       delete library;
     }
@@ -956,12 +985,8 @@
   // If this is a static method, it could be called before the class has been initialized.
   CHECK(c->IsInitializing()) << c->GetStatus() << " " << m->PrettyMethod();
   std::string detail;
-  void* native_method;
-  Thread* self = Thread::Current();
-  {
-    MutexLock mu(self, *Locks::jni_libraries_lock_);
-    native_method = libraries_->FindNativeMethod(m, detail);
-  }
+  Thread* const self = Thread::Current();
+  void* native_method = libraries_->FindNativeMethod(self, m, detail);
   if (native_method == nullptr) {
     // Lookup JNI native methods from native TI Agent libraries. See runtime/ti/agent.h for more
     // information. Agent libraries are searched for native methods after all jni libraries.
diff --git a/runtime/java_vm_ext.h b/runtime/java_vm_ext.h
index 7374920..50aabdc 100644
--- a/runtime/java_vm_ext.h
+++ b/runtime/java_vm_ext.h
@@ -32,6 +32,7 @@
 }  // namespace mirror
 
 class ArtMethod;
+class IsMarkedVisitor;
 class Libraries;
 class ParsedOptions;
 class Runtime;
diff --git a/runtime/jdwp/jdwp_adb.cc b/runtime/jdwp/jdwp_adb.cc
index 0aa04c1..ede4f9e 100644
--- a/runtime/jdwp/jdwp_adb.cc
+++ b/runtime/jdwp/jdwp_adb.cc
@@ -24,7 +24,7 @@
 
 #include "base/logging.h"
 #include "jdwp/jdwp_priv.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 
 #ifdef ART_TARGET_ANDROID
 #include "cutils/sockets.h"
diff --git a/runtime/jdwp/jdwp_handler.cc b/runtime/jdwp/jdwp_handler.cc
index e8a9904..618332b 100644
--- a/runtime/jdwp/jdwp_handler.cc
+++ b/runtime/jdwp/jdwp_handler.cc
@@ -33,7 +33,7 @@
 #include "jdwp/jdwp_priv.h"
 #include "runtime.h"
 #include "scoped_thread_state_change-inl.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 #include "utils.h"
 
 namespace art {
diff --git a/runtime/jit/debugger_interface.cc b/runtime/jit/debugger_interface.cc
index ae00044..135d9b1 100644
--- a/runtime/jit/debugger_interface.cc
+++ b/runtime/jit/debugger_interface.cc
@@ -18,7 +18,7 @@
 
 #include "base/logging.h"
 #include "base/mutex.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 #include "thread.h"
 
 #include <unordered_map>
diff --git a/runtime/jit/jit.cc b/runtime/jit/jit.cc
index ae474da..969a570 100644
--- a/runtime/jit/jit.cc
+++ b/runtime/jit/jit.cc
@@ -32,7 +32,9 @@
 #include "profile_saver.h"
 #include "runtime.h"
 #include "runtime_options.h"
+#include "stack.h"
 #include "stack_map.h"
+#include "thread-inl.h"
 #include "thread_list.h"
 #include "utils.h"
 
diff --git a/runtime/jit/jit.h b/runtime/jit/jit.h
index 75f9b0a..f898d41 100644
--- a/runtime/jit/jit.h
+++ b/runtime/jit/jit.h
@@ -23,7 +23,6 @@
 #include "base/timing_logger.h"
 #include "jit/profile_saver_options.h"
 #include "obj_ptr.h"
-#include "object_callbacks.h"
 #include "profile_compilation_info.h"
 #include "thread_pool.h"
 
diff --git a/runtime/jit/jit_code_cache.cc b/runtime/jit/jit_code_cache.cc
index fdac24e..0cafac7 100644
--- a/runtime/jit/jit_code_cache.cc
+++ b/runtime/jit/jit_code_cache.cc
@@ -34,7 +34,10 @@
 #include "linear_alloc.h"
 #include "mem_map.h"
 #include "oat_file-inl.h"
+#include "oat_quick_method_header.h"
+#include "object_callbacks.h"
 #include "scoped_thread_state_change-inl.h"
+#include "stack.h"
 #include "thread_list.h"
 
 namespace art {
diff --git a/runtime/jit/jit_code_cache.h b/runtime/jit/jit_code_cache.h
index 612d06b..9ecc876 100644
--- a/runtime/jit/jit_code_cache.h
+++ b/runtime/jit/jit_code_cache.h
@@ -29,7 +29,6 @@
 #include "jni.h"
 #include "method_reference.h"
 #include "oat_file.h"
-#include "object_callbacks.h"
 #include "profile_compilation_info.h"
 #include "safe_map.h"
 #include "thread_pool.h"
@@ -39,6 +38,8 @@
 class ArtMethod;
 class LinearAlloc;
 class InlineCache;
+class IsMarkedVisitor;
+class OatQuickMethodHeader;
 class ProfilingInfo;
 
 namespace jit {
diff --git a/runtime/jit/profile_compilation_info.cc b/runtime/jit/profile_compilation_info.cc
index 86c15e6..a292a6e 100644
--- a/runtime/jit/profile_compilation_info.cc
+++ b/runtime/jit/profile_compilation_info.cc
@@ -49,7 +49,7 @@
 const uint8_t ProfileCompilationInfo::kProfileMagic[] = { 'p', 'r', 'o', '\0' };
 // Last profile version: Instead of method index, put the difference with the last
 // method's index.
-const uint8_t ProfileCompilationInfo::kProfileVersion[] = { '0', '0', '7', '\0' };
+const uint8_t ProfileCompilationInfo::kProfileVersion[] = { '0', '0', '8', '\0' };
 
 static constexpr uint16_t kMaxDexFileKeyLength = PATH_MAX;
 
@@ -132,6 +132,33 @@
   }
 }
 
+bool ProfileCompilationInfo::AddSampledMethod(bool startup,
+                                              const std::string& dex_location,
+                                              uint32_t checksum,
+                                              uint16_t method_idx,
+                                              uint32_t num_method_ids) {
+  DexFileData* data = GetOrAddDexFileData(GetProfileDexFileKey(dex_location),
+                                          checksum,
+                                          num_method_ids);
+  if (data == nullptr) {
+    return false;
+  }
+  data->AddSampledMethod(startup, method_idx);
+  return true;
+}
+
+bool ProfileCompilationInfo::AddSampledMethods(bool startup,
+                                               std::vector<MethodReference>& methods) {
+  for (const MethodReference& ref : methods) {
+    DexFileData* data = GetOrAddDexFileData(ref.dex_file);
+    if (data == nullptr) {
+      return false;
+    }
+    data->AddSampledMethod(startup, ref.dex_method_index);
+  }
+  return true;
+}
+
 bool ProfileCompilationInfo::AddMethodsAndClasses(
     const std::vector<ProfileMethodInfo>& methods,
     const std::set<DexCacheResolvedClasses>& resolved_classes) {
@@ -252,7 +279,7 @@
 
 static constexpr size_t kLineHeaderSize =
     2 * sizeof(uint16_t) +  // class_set.size + dex_location.size
-    2 * sizeof(uint32_t);   // method_map.size + checksum
+    3 * sizeof(uint32_t);   // method_map.size + checksum + num_method_ids
 
 /**
  * Serialization format:
@@ -297,7 +324,8 @@
     required_capacity += kLineHeaderSize +
         dex_data.profile_key.size() +
         sizeof(uint16_t) * dex_data.class_set.size() +
-        methods_region_size;
+        methods_region_size +
+        dex_data.bitmap_storage.size();
   }
   if (required_capacity > kProfileSizeErrorThresholdInBytes) {
     LOG(ERROR) << "Profile data size exceeds "
@@ -335,10 +363,12 @@
 
     DCHECK_LE(dex_data.profile_key.size(), std::numeric_limits<uint16_t>::max());
     DCHECK_LE(dex_data.class_set.size(), std::numeric_limits<uint16_t>::max());
+    // Write profile line header.
     AddUintToBuffer(&buffer, static_cast<uint16_t>(dex_data.profile_key.size()));
     AddUintToBuffer(&buffer, static_cast<uint16_t>(dex_data.class_set.size()));
     AddUintToBuffer(&buffer, methods_region_size);  // uint32_t
     AddUintToBuffer(&buffer, dex_data.checksum);  // uint32_t
+    AddUintToBuffer(&buffer, dex_data.num_method_ids);  // uint32_t
 
     AddStringToBuffer(&buffer, dex_data.profile_key);
 
@@ -362,6 +392,10 @@
       last_class_index = class_id.index_;
       AddUintToBuffer(&buffer, diff_with_last_class_index);
     }
+
+    buffer.insert(buffer.end(),
+                  dex_data.bitmap_storage.begin(),
+                  dex_data.bitmap_storage.end());
   }
 
   uint32_t output_size = 0;
@@ -476,7 +510,8 @@
 
 ProfileCompilationInfo::DexFileData* ProfileCompilationInfo::GetOrAddDexFileData(
     const std::string& profile_key,
-    uint32_t checksum) {
+    uint32_t checksum,
+    uint32_t num_method_ids) {
   const auto profile_index_it = profile_key_map_.FindOrAdd(profile_key, profile_key_map_.size());
   if (profile_key_map_.size() > std::numeric_limits<uint8_t>::max()) {
     // Allow only 255 dex files to be profiled. This allows us to save bytes
@@ -492,7 +527,11 @@
   if (info_.size() <= profile_index) {
     // This is a new addition. Add it to the info_ array.
     DexFileData* dex_file_data = new (&arena_) DexFileData(
-        &arena_, profile_key, checksum, profile_index);
+        &arena_,
+        profile_key,
+        checksum,
+        profile_index,
+        num_method_ids);
     info_.push_back(dex_file_data);
   }
   DexFileData* result = info_[profile_index];
@@ -500,6 +539,7 @@
   // This should always be the case since since the cache map is managed by ProfileCompilationInfo.
   DCHECK_EQ(profile_key, result->profile_key);
   DCHECK_EQ(profile_index, result->profile_index);
+  DCHECK_EQ(num_method_ids, result->num_method_ids);
 
   // Check that the checksum matches.
   // This may different if for example the dex file was updated and
@@ -528,7 +568,7 @@
 bool ProfileCompilationInfo::AddResolvedClasses(const DexCacheResolvedClasses& classes) {
   const std::string dex_location = GetProfileDexFileKey(classes.GetDexLocation());
   const uint32_t checksum = classes.GetLocationChecksum();
-  DexFileData* const data = GetOrAddDexFileData(dex_location, checksum);
+  DexFileData* const data = GetOrAddDexFileData(dex_location, checksum, classes.NumMethodIds());
   if (data == nullptr) {
     return false;
   }
@@ -538,15 +578,23 @@
 
 bool ProfileCompilationInfo::AddMethodIndex(const std::string& dex_location,
                                             uint32_t dex_checksum,
-                                            uint16_t method_index) {
-  return AddMethod(dex_location, dex_checksum, method_index, OfflineProfileMethodInfo(nullptr));
+                                            uint16_t method_index,
+                                            uint32_t num_method_ids) {
+  return AddMethod(dex_location,
+                   dex_checksum,
+                   method_index,
+                   num_method_ids,
+                   OfflineProfileMethodInfo(nullptr));
 }
 
 bool ProfileCompilationInfo::AddMethod(const std::string& dex_location,
                                        uint32_t dex_checksum,
                                        uint16_t method_index,
+                                       uint32_t num_method_ids,
                                        const OfflineProfileMethodInfo& pmi) {
-  DexFileData* const data = GetOrAddDexFileData(GetProfileDexFileKey(dex_location), dex_checksum);
+  DexFileData* const data = GetOrAddDexFileData(GetProfileDexFileKey(dex_location),
+                                                dex_checksum,
+                                                num_method_ids);
   if (data == nullptr) {  // checksum mismatch
     return false;
   }
@@ -579,7 +627,8 @@
       const DexReference& dex_ref = pmi.dex_references[class_ref.dex_profile_index];
       DexFileData* class_dex_data = GetOrAddDexFileData(
           GetProfileDexFileKey(dex_ref.dex_location),
-          dex_ref.dex_checksum);
+          dex_ref.dex_checksum,
+          dex_ref.num_method_ids);
       if (class_dex_data == nullptr) {  // checksum mismatch
         return false;
       }
@@ -590,9 +639,7 @@
 }
 
 bool ProfileCompilationInfo::AddMethod(const ProfileMethodInfo& pmi) {
-  DexFileData* const data = GetOrAddDexFileData(
-      GetProfileDexFileKey(pmi.dex_file->GetLocation()),
-      pmi.dex_file->GetLocationChecksum());
+  DexFileData* const data = GetOrAddDexFileData(pmi.dex_file);
   if (data == nullptr) {  // checksum mismatch
     return false;
   }
@@ -604,9 +651,7 @@
       continue;
     }
     for (const TypeReference& class_ref : cache.classes) {
-      DexFileData* class_dex_data = GetOrAddDexFileData(
-          GetProfileDexFileKey(class_ref.dex_file->GetLocation()),
-          class_ref.dex_file->GetLocationChecksum());
+      DexFileData* class_dex_data = GetOrAddDexFileData(class_ref.dex_file);
       if (class_dex_data == nullptr) {  // checksum mismatch
         return false;
       }
@@ -623,8 +668,9 @@
 
 bool ProfileCompilationInfo::AddClassIndex(const std::string& dex_location,
                                            uint32_t checksum,
-                                           dex::TypeIndex type_idx) {
-  DexFileData* const data = GetOrAddDexFileData(dex_location, checksum);
+                                           dex::TypeIndex type_idx,
+                                           uint32_t num_method_ids) {
+  DexFileData* const data = GetOrAddDexFileData(dex_location, checksum, num_method_ids);
   if (data == nullptr) {
     return false;
   }
@@ -694,7 +740,9 @@
       - line_header.method_region_size_bytes;
   uint16_t last_method_index = 0;
   while (buffer.CountUnreadBytes() > expected_unread_bytes_after_operation) {
-    DexFileData* const data = GetOrAddDexFileData(line_header.dex_location, line_header.checksum);
+    DexFileData* const data = GetOrAddDexFileData(line_header.dex_location,
+                                                  line_header.checksum,
+                                                  line_header.num_method_ids);
     uint16_t diff_with_last_method_index;
     READ_UINT(uint16_t, buffer, diff_with_last_method_index, error);
     uint16_t method_index = last_method_index + diff_with_last_method_index;
@@ -729,7 +777,8 @@
     last_class_index = type_index;
     if (!AddClassIndex(line_header.dex_location,
                        line_header.checksum,
-                       dex::TypeIndex(type_index))) {
+                       dex::TypeIndex(type_index),
+                       line_header.num_method_ids)) {
       return false;
     }
   }
@@ -863,6 +912,7 @@
   READ_UINT(uint16_t, buffer, line_header->class_set_size, error);
   READ_UINT(uint32_t, buffer, line_header->method_region_size_bytes, error);
   READ_UINT(uint32_t, buffer, line_header->checksum, error);
+  READ_UINT(uint32_t, buffer, line_header->num_method_ids, error);
   return true;
 }
 
@@ -902,7 +952,10 @@
       uint8_t number_of_dex_files,
       const ProfileLineHeader& line_header,
       /*out*/std::string* error) {
-  if (GetOrAddDexFileData(line_header.dex_location, line_header.checksum) == nullptr) {
+  DexFileData* data = GetOrAddDexFileData(line_header.dex_location,
+                                          line_header.checksum,
+                                          line_header.num_method_ids);
+  if (data == nullptr) {
     *error = "Error when reading profile file line header: checksum mismatch for "
         + line_header.dex_location;
     return kProfileLoadBadData;
@@ -915,6 +968,16 @@
   if (!ReadClasses(buffer, line_header, error)) {
     return kProfileLoadBadData;
   }
+
+  const size_t bytes = data->bitmap_storage.size();
+  if (buffer.CountUnreadBytes() < bytes) {
+    *error += "Profile EOF reached prematurely for ReadProfileHeaderDexLocation";
+    return kProfileLoadBadData;
+  }
+  const uint8_t* base_ptr = buffer.GetCurrentPtr();
+  std::copy_n(base_ptr, bytes, &data->bitmap_storage[0]);
+  buffer.Advance(bytes);
+  // Read method bitmap.
   return kProfileLoadSuccess;
 }
 
@@ -932,6 +995,15 @@
   }
 }
 
+void ProfileCompilationInfo::DexFileData::CreateBitmap() {
+  const size_t num_bits = num_method_ids * kMethodBitCount;
+  bitmap_storage.resize(RoundUp(num_bits, kBitsPerByte) / kBitsPerByte);
+  if (!bitmap_storage.empty()) {
+    method_bitmap =
+        BitMemoryRegion(MemoryRegion(&bitmap_storage[0], bitmap_storage.size()), 0, num_bits);
+  }
+}
+
 // TODO(calin): fail fast if the dex checksums don't match.
 ProfileCompilationInfo::ProfileLoadSatus ProfileCompilationInfo::LoadInternal(
       int fd, std::string* error) {
@@ -1110,7 +1182,8 @@
   SafeMap<uint8_t, uint8_t> dex_profile_index_remap;
   for (const DexFileData* other_dex_data : other.info_) {
     const DexFileData* dex_data = GetOrAddDexFileData(other_dex_data->profile_key,
-                                                      other_dex_data->checksum);
+                                                      other_dex_data->checksum,
+                                                      other_dex_data->num_method_ids);
     if (dex_data == nullptr) {
       return false;  // Could happen if we exceed the number of allowed dex files.
     }
@@ -1147,6 +1220,9 @@
         }
       }
     }
+
+    // Merge the bitmaps.
+    dex_data->MergeBitmap(*other_dex_data);
   }
   return true;
 }
@@ -1159,6 +1235,27 @@
   return ChecksumMatch(dex_file.GetLocationChecksum(), checksum);
 }
 
+bool ProfileCompilationInfo::IsStartupOrHotMethod(const MethodReference& method_ref) const {
+  return IsStartupOrHotMethod(method_ref.dex_file->GetLocation(),
+                              method_ref.dex_file->GetLocationChecksum(),
+                              method_ref.dex_method_index);
+}
+
+bool ProfileCompilationInfo::IsStartupOrHotMethod(const std::string& dex_location,
+                                                  uint32_t dex_checksum,
+                                                  uint16_t dex_method_index) const {
+  const DexFileData* dex_data = FindDexData(GetProfileDexFileKey(dex_location));
+  if (dex_data == nullptr || !ChecksumMatch(dex_checksum, dex_data->checksum)) {
+    return false;
+  }
+  if (dex_data->HasSampledMethod(/*startup*/ true, dex_method_index)) {
+    return true;
+  }
+  const MethodMap& methods = dex_data->method_map;
+  const auto method_it = methods.find(dex_method_index);
+  return method_it != methods.end();
+}
+
 bool ProfileCompilationInfo::ContainsMethod(const MethodReference& method_ref) const {
   return FindMethod(method_ref.dex_file->GetLocation(),
                     method_ref.dex_file->GetLocationChecksum(),
@@ -1196,6 +1293,7 @@
   for (const DexFileData* dex_data : info_) {
     pmi->dex_references[dex_data->profile_index].dex_location = dex_data->profile_key;
     pmi->dex_references[dex_data->profile_index].dex_checksum = dex_data->checksum;
+    pmi->dex_references[dex_data->profile_index].num_method_ids = dex_data->num_method_ids;
   }
 
   return pmi;
@@ -1277,7 +1375,7 @@
         }
       }
     }
-    os << "\n\tmethods: ";
+    os << "\n\thot methods: ";
     for (const auto& method_it : dex_data->method_map) {
       if (dex_file != nullptr) {
         os << "\n\t\t" << dex_file->PrettyMethod(method_it.first, true);
@@ -1302,6 +1400,19 @@
       }
       os << "], ";
     }
+    bool startup = true;
+    while (true) {
+      os << "\n\t" << (startup ? "startup methods: " : "post startup methods: ");
+      for (uint32_t method_idx = 0; method_idx < dex_data->num_method_ids; ++method_idx) {
+        if (dex_data->HasSampledMethod(startup, method_idx)) {
+          os << method_idx << ", ";
+        }
+      }
+      if (startup == false) {
+        break;
+      }
+      startup = false;
+    }
     os << "\n\tclasses: ";
     for (const auto class_it : dex_data->class_set) {
       if (dex_file != nullptr) {
@@ -1314,9 +1425,12 @@
   return os.str();
 }
 
-bool ProfileCompilationInfo::GetClassesAndMethods(const DexFile& dex_file,
-                                                  std::set<dex::TypeIndex>* class_set,
-                                                  std::set<uint16_t>* method_set) const {
+bool ProfileCompilationInfo::GetClassesAndMethods(
+    const DexFile& dex_file,
+    /*out*/std::set<dex::TypeIndex>* class_set,
+    /*out*/std::set<uint16_t>* hot_method_set,
+    /*out*/std::set<uint16_t>* startup_method_set,
+    /*out*/std::set<uint16_t>* post_startup_method_method_set) const {
   std::set<std::string> ret;
   std::string profile_key = GetProfileDexFileKey(dex_file.GetLocation());
   const DexFileData* dex_data = FindDexData(profile_key);
@@ -1324,7 +1438,15 @@
     return false;
   }
   for (const auto& it : dex_data->method_map) {
-    method_set->insert(it.first);
+    hot_method_set->insert(it.first);
+  }
+  for (uint32_t method_idx = 0; method_idx < dex_data->num_method_ids; ++method_idx) {
+    if (dex_data->HasSampledMethod(/*startup*/ true, method_idx)) {
+      startup_method_set->insert(method_idx);
+    }
+    if (dex_data->HasSampledMethod(/*startup*/ false, method_idx)) {
+      post_startup_method_method_set->insert(method_idx);
+    }
   }
   for (const dex::TypeIndex& type_index : dex_data->class_set) {
     class_set->insert(type_index);
@@ -1366,7 +1488,10 @@
             << ", profile checksum=" << dex_data->checksum;
         return std::set<DexCacheResolvedClasses>();
       }
-      DexCacheResolvedClasses classes(dex_location, dex_location, dex_data->checksum);
+      DexCacheResolvedClasses classes(dex_location,
+                                      dex_location,
+                                      dex_data->checksum,
+                                      dex_data->num_method_ids);
       classes.AddClasses(dex_data->class_set.begin(), dex_data->class_set.end());
       ret.insert(classes);
     }
@@ -1383,8 +1508,8 @@
   const std::string base_dex_location = "base.apk";
   ProfileCompilationInfo info;
   // The limits are defined by the dex specification.
-  uint16_t max_method = std::numeric_limits<uint16_t>::max();
-  uint16_t max_classes = std::numeric_limits<uint16_t>::max();
+  const uint16_t max_method = std::numeric_limits<uint16_t>::max();
+  const uint16_t max_classes = std::numeric_limits<uint16_t>::max();
   uint16_t number_of_methods = max_method * method_ratio / 100;
   uint16_t number_of_classes = max_classes * class_ratio / 100;
 
@@ -1404,7 +1529,7 @@
       if (m < (number_of_methods / kFavorSplit)) {
         method_idx %= kFavorFirstN;
       }
-      info.AddMethodIndex(profile_key, 0, method_idx);
+      info.AddMethodIndex(profile_key, 0, method_idx, max_method);
     }
 
     for (uint16_t c = 0; c < number_of_classes; c++) {
@@ -1412,7 +1537,7 @@
       if (c < (number_of_classes / kFavorSplit)) {
         type_idx %= kFavorFirstN;
       }
-      info.AddClassIndex(profile_key, 0, dex::TypeIndex(type_idx));
+      info.AddClassIndex(profile_key, 0, dex::TypeIndex(type_idx), max_method);
     }
   }
   return info.Save(fd);
@@ -1431,13 +1556,16 @@
     for (uint32_t i = 0; i < dex_file->NumClassDefs(); ++i) {
       // Randomly add a class from the dex file (with 50% chance).
       if (std::rand() % 2 != 0) {
-        info.AddClassIndex(location, checksum, dex::TypeIndex(dex_file->GetClassDef(i).class_idx_));
+        info.AddClassIndex(location,
+                           checksum,
+                           dex::TypeIndex(dex_file->GetClassDef(i).class_idx_),
+                           dex_file->NumMethodIds());
       }
     }
     for (uint32_t i = 0; i < dex_file->NumMethodIds(); ++i) {
       // Randomly add a method from the dex file (with 50% chance).
       if (std::rand() % 2 != 0) {
-        info.AddMethodIndex(location, checksum, i);
+        info.AddMethodIndex(location, checksum, i, dex_file->NumMethodIds());
       }
     }
   }
diff --git a/runtime/jit/profile_compilation_info.h b/runtime/jit/profile_compilation_info.h
index ca5b28a..2b89a41 100644
--- a/runtime/jit/profile_compilation_info.h
+++ b/runtime/jit/profile_compilation_info.h
@@ -23,6 +23,7 @@
 #include "atomic.h"
 #include "base/arena_object.h"
 #include "base/arena_containers.h"
+#include "bit_memory_region.h"
 #include "dex_cache_resolved_classes.h"
 #include "dex_file.h"
 #include "dex_file_types.h"
@@ -54,7 +55,9 @@
   ProfileMethodInfo(const DexFile* dex,
                     uint32_t method_index,
                     const std::vector<ProfileInlineCache>& caches)
-      : dex_file(dex), dex_method_index(method_index), inline_caches(caches) {}
+      : dex_file(dex),
+        dex_method_index(method_index),
+        inline_caches(caches) {}
 
   const DexFile* dex_file;
   const uint32_t dex_method_index;
@@ -79,13 +82,15 @@
 
   // A dex location together with its checksum.
   struct DexReference {
-    DexReference() : dex_checksum(0) {}
+    DexReference() : dex_checksum(0), num_method_ids(0) {}
 
-    DexReference(const std::string& location, uint32_t checksum)
-        : dex_location(location), dex_checksum(checksum) {}
+    DexReference(const std::string& location, uint32_t checksum, uint32_t num_methods)
+        : dex_location(location), dex_checksum(checksum), num_method_ids(num_methods) {}
 
     bool operator==(const DexReference& other) const {
-      return dex_checksum == other.dex_checksum && dex_location == other.dex_location;
+      return dex_checksum == other.dex_checksum &&
+          dex_location == other.dex_location &&
+          num_method_ids == other.num_method_ids;
     }
 
     bool MatchesDex(const DexFile* dex_file) const {
@@ -95,6 +100,7 @@
 
     std::string dex_location;
     uint32_t dex_checksum;
+    uint32_t num_method_ids;
   };
 
   // Encodes a class reference in the profile.
@@ -191,6 +197,24 @@
   bool AddMethodsAndClasses(const std::vector<ProfileMethodInfo>& methods,
                             const std::set<DexCacheResolvedClasses>& resolved_classes);
 
+  // Add a method index to the profile (without inline caches).
+  bool AddMethodIndex(const std::string& dex_location,
+                      uint32_t checksum,
+                      uint16_t method_idx,
+                      uint32_t num_method_ids);
+
+  // Add a method to the profile using its online representation (containing runtime structures).
+  bool AddMethod(const ProfileMethodInfo& pmi);
+
+  // Add methods that have samples but are are not necessarily hot. These are partitioned into two
+  // possibly interesecting sets startup and post startup.
+  bool AddSampledMethods(bool startup, std::vector<MethodReference>& methods);
+  bool AddSampledMethod(bool startup,
+                        const std::string& dex_location,
+                        uint32_t checksum,
+                        uint16_t method_idx,
+                        uint32_t num_method_ids);
+
   // Load profile information from the given file descriptor.
   // If the current profile is non-empty the load will fail.
   bool Load(int fd);
@@ -216,6 +240,12 @@
   // Return the number of resolved classes that were profiled.
   uint32_t GetNumberOfResolvedClasses() const;
 
+  // Return true if the method reference is a hot or startup method in the profiling info.
+  bool IsStartupOrHotMethod(const MethodReference& method_ref) const;
+  bool IsStartupOrHotMethod(const std::string& dex_location,
+                            uint32_t dex_checksum,
+                            uint16_t dex_method_index) const;
+
   // Return true if the method reference is present in the profiling info.
   bool ContainsMethod(const MethodReference& method_ref) const;
 
@@ -244,7 +274,9 @@
   // file is register and has a matching checksum, false otherwise.
   bool GetClassesAndMethods(const DexFile& dex_file,
                             /*out*/std::set<dex::TypeIndex>* class_set,
-                            /*out*/std::set<uint16_t>* method_set) const;
+                            /*out*/std::set<uint16_t>* hot_method_set,
+                            /*out*/std::set<uint16_t>* startup_method_set,
+                            /*out*/std::set<uint16_t>* post_startup_method_method_set) const;
 
   // Perform an equality test with the `other` profile information.
   bool Equals(const ProfileCompilationInfo& other);
@@ -301,13 +333,31 @@
     DexFileData(ArenaAllocator* arena,
                 const std::string& key,
                 uint32_t location_checksum,
-                uint16_t index)
+                uint16_t index,
+                uint32_t num_methods)
         : arena_(arena),
           profile_key(key),
           profile_index(index),
           checksum(location_checksum),
           method_map(std::less<uint16_t>(), arena->Adapter(kArenaAllocProfile)),
-          class_set(std::less<dex::TypeIndex>(), arena->Adapter(kArenaAllocProfile)) {}
+          class_set(std::less<dex::TypeIndex>(), arena->Adapter(kArenaAllocProfile)),
+          num_method_ids(num_methods),
+          bitmap_storage(arena->Adapter(kArenaAllocProfile)) {
+      CreateBitmap();
+    }
+
+    bool operator==(const DexFileData& other) const {
+      return checksum == other.checksum && method_map == other.method_map;
+    }
+
+    // Mark a method as executed at least once.
+    void AddSampledMethod(bool startup, size_t index) {
+      method_bitmap.StoreBit(MethodBitIndex(startup, index), true);
+    }
+
+    bool HasSampledMethod(bool startup, size_t index) const {
+      return method_bitmap.LoadBit(MethodBitIndex(startup, index));
+    }
 
     // The arena used to allocate new inline cache maps.
     ArenaAllocator* arena_;
@@ -322,32 +372,64 @@
     // The classes which have been profiled. Note that these don't necessarily include
     // all the classes that can be found in the inline caches reference.
     ArenaSet<dex::TypeIndex> class_set;
-
-    bool operator==(const DexFileData& other) const {
-      return checksum == other.checksum && method_map == other.method_map;
-    }
-
     // Find the inline caches of the the given method index. Add an empty entry if
     // no previous data is found.
     InlineCacheMap* FindOrAddMethod(uint16_t method_index);
+    // Num method ids.
+    uint32_t num_method_ids;
+    ArenaVector<uint8_t> bitmap_storage;
+    BitMemoryRegion method_bitmap;
+
+    void CreateBitmap();
+
+    void MergeBitmap(const DexFileData& other) {
+      DCHECK_EQ(bitmap_storage.size(), other.bitmap_storage.size());
+      for (size_t i = 0; i < bitmap_storage.size(); ++i) {
+        bitmap_storage[i] |= other.bitmap_storage[i];
+      }
+    }
+
+   private:
+    enum Bits {
+      kMethodBitStartup,
+      kMethodBitAfterStartup,
+      kMethodBitCount,
+    };
+
+    size_t MethodBitIndex(bool startup, size_t index) const {
+      DCHECK_LT(index, num_method_ids);
+      if (!startup) {
+        index += num_method_ids;
+      }
+      return index;
+    }
   };
 
   // Return the profile data for the given profile key or null if the dex location
   // already exists but has a different checksum
-  DexFileData* GetOrAddDexFileData(const std::string& profile_key, uint32_t checksum);
+  DexFileData* GetOrAddDexFileData(const std::string& profile_key,
+                                   uint32_t checksum,
+                                   uint32_t num_method_ids);
 
-  // Add a method to the profile using its online representation (containing runtime structures).
-  bool AddMethod(const ProfileMethodInfo& pmi);
+  DexFileData* GetOrAddDexFileData(const DexFile* dex_file) {
+    return GetOrAddDexFileData(GetProfileDexFileKey(dex_file->GetLocation()),
+                               dex_file->GetLocationChecksum(),
+                               dex_file->NumMethodIds());
+  }
 
   // Add a method to the profile using its offline representation.
   // This is mostly used to facilitate testing.
   bool AddMethod(const std::string& dex_location,
                  uint32_t dex_checksum,
                  uint16_t method_index,
+                 uint32_t num_method_ids,
                  const OfflineProfileMethodInfo& pmi);
 
   // Add a class index to the profile.
-  bool AddClassIndex(const std::string& dex_location, uint32_t checksum, dex::TypeIndex type_idx);
+  bool AddClassIndex(const std::string& dex_location,
+                     uint32_t checksum,
+                     dex::TypeIndex type_idx,
+                     uint32_t num_method_ids);
 
   // Add all classes from the given dex cache to the the profile.
   bool AddResolvedClasses(const DexCacheResolvedClasses& classes);
@@ -392,6 +474,7 @@
     uint16_t class_set_size;
     uint32_t method_region_size_bytes;
     uint32_t checksum;
+    uint32_t num_method_ids;
   };
 
   // A helper structure to make sure we don't read past our buffers in the loops.
diff --git a/runtime/jit/profile_compilation_info_test.cc b/runtime/jit/profile_compilation_info_test.cc
index 1cfa355..615149f 100644
--- a/runtime/jit/profile_compilation_info_test.cc
+++ b/runtime/jit/profile_compilation_info_test.cc
@@ -32,6 +32,8 @@
 
 namespace art {
 
+static constexpr size_t kMaxMethodIds = 65535;
+
 class ProfileCompilationInfoTest : public CommonRuntimeTest {
  public:
   void PostRuntimeCreate() OVERRIDE {
@@ -61,7 +63,7 @@
                  uint32_t checksum,
                  uint16_t method_index,
                  ProfileCompilationInfo* info) {
-    return info->AddMethodIndex(dex_location, checksum, method_index);
+    return info->AddMethodIndex(dex_location, checksum, method_index, kMaxMethodIds);
   }
 
   bool AddMethod(const std::string& dex_location,
@@ -69,14 +71,14 @@
                  uint16_t method_index,
                  const ProfileCompilationInfo::OfflineProfileMethodInfo& pmi,
                  ProfileCompilationInfo* info) {
-    return info->AddMethod(dex_location, checksum, method_index, pmi);
+    return info->AddMethod(dex_location, checksum, method_index, kMaxMethodIds, pmi);
   }
 
   bool AddClass(const std::string& dex_location,
                 uint32_t checksum,
                 uint16_t class_index,
                 ProfileCompilationInfo* info) {
-    return info->AddMethodIndex(dex_location, checksum, class_index);
+    return info->AddMethodIndex(dex_location, checksum, class_index, kMaxMethodIds);
   }
 
   uint32_t GetFd(const ScratchFile& file) {
@@ -149,7 +151,9 @@
         std::vector<TypeReference> classes;
         caches.emplace_back(dex_pc, /*is_missing_types*/true, classes);
       }
-      ProfileMethodInfo pmi(method->GetDexFile(), method->GetDexMethodIndex(), caches);
+      ProfileMethodInfo pmi(method->GetDexFile(),
+                            method->GetDexMethodIndex(),
+                            caches);
       profile_methods.push_back(pmi);
       profile_methods_map->Put(method, pmi);
     }
@@ -191,7 +195,8 @@
           const std::string& dex_key = ProfileCompilationInfo::GetProfileDexFileKey(
               class_ref.dex_file->GetLocation());
           offline_pmi.dex_references.emplace_back(dex_key,
-                                                  class_ref.dex_file->GetLocationChecksum());
+                                                  class_ref.dex_file->GetLocationChecksum(),
+                                                  class_ref.dex_file->NumMethodIds());
         }
       }
     }
@@ -201,6 +206,7 @@
   // Creates an offline profile used for testing inline caches.
   ProfileCompilationInfo::OfflineProfileMethodInfo GetOfflineProfileMethodInfo() {
     ProfileCompilationInfo::InlineCacheMap* ic_map = CreateInlineCacheMap();
+
     // Monomorphic
     for (uint16_t dex_pc = 0; dex_pc < 11; dex_pc++) {
       ProfileCompilationInfo::DexPcData dex_pc_data(arena_.get());
@@ -231,9 +237,9 @@
 
     ProfileCompilationInfo::OfflineProfileMethodInfo pmi(ic_map);
 
-    pmi.dex_references.emplace_back("dex_location1", /* checksum */1);
-    pmi.dex_references.emplace_back("dex_location2", /* checksum */2);
-    pmi.dex_references.emplace_back("dex_location3", /* checksum */3);
+    pmi.dex_references.emplace_back("dex_location1", /* checksum */1, kMaxMethodIds);
+    pmi.dex_references.emplace_back("dex_location2", /* checksum */2, kMaxMethodIds);
+    pmi.dex_references.emplace_back("dex_location3", /* checksum */3, kMaxMethodIds);
 
     return pmi;
   }
@@ -694,8 +700,8 @@
 
   ProfileCompilationInfo::InlineCacheMap* ic_map = CreateInlineCacheMap();
   ProfileCompilationInfo::OfflineProfileMethodInfo pmi(ic_map);
-  pmi.dex_references.emplace_back("dex_location1", /* checksum */ 1);
-  pmi.dex_references.emplace_back("dex_location2", /* checksum */ 2);
+  pmi.dex_references.emplace_back("dex_location1", /* checksum */ 1, kMaxMethodIds);
+  pmi.dex_references.emplace_back("dex_location2", /* checksum */ 2, kMaxMethodIds);
   for (uint16_t dex_pc = 1; dex_pc < 5; dex_pc++) {
     ProfileCompilationInfo::DexPcData dex_pc_data(arena_.get());
     dex_pc_data.AddClass(0, dex::TypeIndex(0));
@@ -705,8 +711,8 @@
 
   ProfileCompilationInfo::InlineCacheMap* ic_map_reindexed = CreateInlineCacheMap();
   ProfileCompilationInfo::OfflineProfileMethodInfo pmi_reindexed(ic_map_reindexed);
-  pmi_reindexed.dex_references.emplace_back("dex_location2", /* checksum */ 2);
-  pmi_reindexed.dex_references.emplace_back("dex_location1", /* checksum */ 1);
+  pmi_reindexed.dex_references.emplace_back("dex_location2", /* checksum */ 2, kMaxMethodIds);
+  pmi_reindexed.dex_references.emplace_back("dex_location1", /* checksum */ 1, kMaxMethodIds);
   for (uint16_t dex_pc = 1; dex_pc < 5; dex_pc++) {
     ProfileCompilationInfo::DexPcData dex_pc_data(arena_.get());
     dex_pc_data.AddClass(1, dex::TypeIndex(0));
@@ -761,7 +767,7 @@
   // Create a megamorphic inline cache.
   ProfileCompilationInfo::InlineCacheMap* ic_map = CreateInlineCacheMap();
   ProfileCompilationInfo::OfflineProfileMethodInfo pmi(ic_map);
-  pmi.dex_references.emplace_back("dex_location1", /* checksum */ 1);
+  pmi.dex_references.emplace_back("dex_location1", /* checksum */ 1, kMaxMethodIds);
   ProfileCompilationInfo::DexPcData dex_pc_data(arena_.get());
   dex_pc_data.SetIsMegamorphic();
   ic_map->Put(/*dex_pc*/ 0, dex_pc_data);
@@ -791,7 +797,7 @@
   // Create an inline cache with missing types
   ProfileCompilationInfo::InlineCacheMap* ic_map = CreateInlineCacheMap();
   ProfileCompilationInfo::OfflineProfileMethodInfo pmi(ic_map);
-  pmi.dex_references.emplace_back("dex_location1", /* checksum */ 1);
+  pmi.dex_references.emplace_back("dex_location1", /* checksum */ 1, kMaxMethodIds);
   ProfileCompilationInfo::DexPcData dex_pc_data(arena_.get());
   dex_pc_data.SetIsMissingTypes();
   ic_map->Put(/*dex_pc*/ 0, dex_pc_data);
@@ -839,4 +845,48 @@
   // This should fail since the test_info already contains data and the load would overwrite it.
   ASSERT_FALSE(test_info.Load(GetFd(profile)));
 }
+
+TEST_F(ProfileCompilationInfoTest, SampledMethodsTest) {
+  ProfileCompilationInfo test_info;
+  static constexpr size_t kNumMethods = 1000;
+  static constexpr size_t kChecksum1 = 1234;
+  static constexpr size_t kChecksum2 = 4321;
+  static const std::string kDex1 = "dex1";
+  static const std::string kDex2 = "dex2";
+  test_info.AddSampledMethod(true, kDex1, kChecksum1, 1, kNumMethods);
+  test_info.AddSampledMethod(true, kDex1, kChecksum1, 5, kNumMethods);
+  test_info.AddSampledMethod(false, kDex2, kChecksum2, 1, kNumMethods);
+  test_info.AddSampledMethod(false, kDex2, kChecksum2, 5, kNumMethods);
+  auto run_test = [](const ProfileCompilationInfo& info) {
+    EXPECT_FALSE(info.IsStartupOrHotMethod(kDex1, kChecksum1, 0));
+    EXPECT_TRUE(info.IsStartupOrHotMethod(kDex1, kChecksum1, 1));
+    EXPECT_FALSE(info.IsStartupOrHotMethod(kDex1, kChecksum1, 3));
+    EXPECT_TRUE(info.IsStartupOrHotMethod(kDex1, kChecksum1, 5));
+    EXPECT_FALSE(info.IsStartupOrHotMethod(kDex1, kChecksum1, 6));
+    EXPECT_FALSE(info.IsStartupOrHotMethod(kDex2, kChecksum2, 5));
+    EXPECT_FALSE(info.IsStartupOrHotMethod(kDex2, kChecksum2, 5));
+  };
+  run_test(test_info);
+
+  // Save the profile.
+  ScratchFile profile;
+  ASSERT_TRUE(test_info.Save(GetFd(profile)));
+  ASSERT_EQ(0, profile.GetFile()->Flush());
+  ASSERT_TRUE(profile.GetFile()->ResetOffset());
+
+  // Load the profile and make sure we can read the data and it matches what we expect.
+  ProfileCompilationInfo loaded_info;
+  ASSERT_TRUE(loaded_info.Load(GetFd(profile)));
+  run_test(loaded_info);
+
+  // Test that the bitmap gets merged properly.
+  EXPECT_FALSE(test_info.IsStartupOrHotMethod(kDex1, kChecksum1, 11));
+  {
+    ProfileCompilationInfo merge_info;
+    merge_info.AddSampledMethod(true, kDex1, kChecksum1, 11, kNumMethods);
+    test_info.MergeWith(merge_info);
+  }
+  EXPECT_TRUE(test_info.IsStartupOrHotMethod(kDex1, kChecksum1, 11));
+}
+
 }  // namespace art
diff --git a/runtime/jit/profile_saver.cc b/runtime/jit/profile_saver.cc
index bc829cf..c96ca88 100644
--- a/runtime/jit/profile_saver.cc
+++ b/runtime/jit/profile_saver.cc
@@ -183,8 +183,11 @@
 // Excludes native methods and classes in the boot image.
 class GetMethodsVisitor : public ClassVisitor {
  public:
-  GetMethodsVisitor(std::vector<MethodReference>* methods, uint32_t startup_method_samples)
-    : methods_(methods),
+  GetMethodsVisitor(std::vector<MethodReference>* hot_methods,
+                    std::vector<MethodReference>* startup_methods,
+                    uint32_t startup_method_samples)
+    : hot_methods_(hot_methods),
+      startup_methods_(startup_methods),
       startup_method_samples_(startup_method_samples) {}
 
   virtual bool operator()(ObjPtr<mirror::Class> klass) REQUIRES_SHARED(Locks::mutator_lock_) {
@@ -192,22 +195,26 @@
       return true;
     }
     for (ArtMethod& method : klass->GetMethods(kRuntimePointerSize)) {
-      if (!method.IsNative()) {
-        if (method.GetCounter() >= startup_method_samples_ ||
-            method.GetProfilingInfo(kRuntimePointerSize) != nullptr ||
+      if (!method.IsNative() && !method.IsProxyMethod()) {
+        const uint16_t counter = method.GetCounter();
+        MethodReference ref(method.GetDexFile(), method.GetDexMethodIndex());
+        if (method.GetProfilingInfo(kRuntimePointerSize) != nullptr ||
             (method.GetAccessFlags() & kAccPreviouslyWarm) != 0) {
-          // Have samples, add to profile.
-          const DexFile* dex_file =
-              method.GetInterfaceMethodIfProxy(kRuntimePointerSize)->GetDexFile();
-          methods_->push_back(MethodReference(dex_file, method.GetDexMethodIndex()));
+          hot_methods_->push_back(ref);
+          startup_methods_->push_back(ref);
+        } else if (counter >= startup_method_samples_) {
+          startup_methods_->push_back(ref);
         }
+      } else {
+        CHECK_EQ(method.GetCounter(), 0u);
       }
     }
     return true;
   }
 
  private:
-  std::vector<MethodReference>* const methods_;
+  std::vector<MethodReference>* const hot_methods_;
+  std::vector<MethodReference>* const startup_methods_;
   uint32_t startup_method_samples_;
 };
 
@@ -218,7 +225,8 @@
   ResolveTrackedLocations();
 
   Thread* const self = Thread::Current();
-  std::vector<MethodReference> methods;
+  std::vector<MethodReference> hot_methods;
+  std::vector<MethodReference> startup_methods;
   std::set<DexCacheResolvedClasses> resolved_classes;
   {
     ScopedObjectAccess soa(self);
@@ -231,10 +239,13 @@
 
     {
       ScopedTrace trace2("Get hot methods");
-      GetMethodsVisitor visitor(&methods, options_.GetStartupMethodSamples());
+      GetMethodsVisitor visitor(&hot_methods,
+                                &startup_methods,
+                                options_.GetStartupMethodSamples());
       class_linker->VisitClasses(&visitor);
-      VLOG(profiler) << "Methods with samples greater than "
-                     << options_.GetStartupMethodSamples() << " = " << methods.size();
+      VLOG(profiler) << "Profile saver recorded " << hot_methods.size() << " hot methods and "
+                     << startup_methods.size() << " startup methods with threshold "
+                     << options_.GetStartupMethodSamples();
     }
   }
   MutexLock mu(self, *Locks::profiler_lock_);
@@ -245,11 +256,18 @@
     const std::string& filename = it.first;
     const std::set<std::string>& locations = it.second;
     std::vector<ProfileMethodInfo> profile_methods_for_location;
-    for (const MethodReference& ref : methods) {
+    for (const MethodReference& ref : hot_methods) {
       if (locations.find(ref.dex_file->GetBaseLocation()) != locations.end()) {
         profile_methods_for_location.emplace_back(ref.dex_file, ref.dex_method_index);
       }
     }
+    std::vector<MethodReference> startup_methods_for_locations;
+    for (const MethodReference& ref : startup_methods) {
+      if (locations.find(ref.dex_file->GetBaseLocation()) != locations.end()) {
+        startup_methods_for_locations.push_back(ref);
+      }
+    }
+
     for (const DexCacheResolvedClasses& classes : resolved_classes) {
       if (locations.find(classes.GetBaseLocation()) != locations.end()) {
         VLOG(profiler) << "Added " << classes.GetClasses().size() << " classes for location "
@@ -265,8 +283,8 @@
         new ProfileCompilationInfo(Runtime::Current()->GetArenaPool()));
 
     ProfileCompilationInfo* cached_info = info_it->second;
-    cached_info->AddMethodsAndClasses(profile_methods_for_location,
-                                      resolved_classes_for_location);
+    cached_info->AddMethodsAndClasses(profile_methods_for_location, resolved_classes_for_location);
+    cached_info->AddSampledMethods(/*startup*/ true, startup_methods_for_locations);
     total_number_of_profile_entries_cached += resolved_classes_for_location.size();
   }
   max_number_of_profile_entries_cached_ = std::max(
@@ -317,8 +335,7 @@
       uint64_t last_save_number_of_methods = info.GetNumberOfMethods();
       uint64_t last_save_number_of_classes = info.GetNumberOfResolvedClasses();
 
-      info.AddMethodsAndClasses(profile_methods,
-                                std::set<DexCacheResolvedClasses>());
+      info.AddMethodsAndClasses(profile_methods, std::set<DexCacheResolvedClasses>());
       auto profile_cache_it = profile_cache_.find(filename);
       if (profile_cache_it != profile_cache_.end()) {
         info.MergeWith(*(profile_cache_it->second));
diff --git a/runtime/jni_env_ext.cc b/runtime/jni_env_ext.cc
index 0148a1c..3ff94f9 100644
--- a/runtime/jni_env_ext.cc
+++ b/runtime/jni_env_ext.cc
@@ -28,7 +28,7 @@
 #include "lock_word.h"
 #include "mirror/object-inl.h"
 #include "nth_caller_visitor.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 #include "thread_list.h"
 
 namespace art {
@@ -123,8 +123,8 @@
   monitors.Dump(os);
 }
 
-void JNIEnvExt::PushFrame(int capacity ATTRIBUTE_UNUSED) {
-  // TODO: take 'capacity' into account.
+void JNIEnvExt::PushFrame(int capacity) {
+  DCHECK_GE(locals.FreeCapacity(), static_cast<size_t>(capacity));
   stacked_local_ref_cookies.push_back(local_ref_cookie);
   local_ref_cookie = locals.GetSegmentState();
 }
diff --git a/runtime/jni_env_ext.h b/runtime/jni_env_ext.h
index 60e4295..af933ae 100644
--- a/runtime/jni_env_ext.h
+++ b/runtime/jni_env_ext.h
@@ -22,7 +22,6 @@
 #include "base/macros.h"
 #include "base/mutex.h"
 #include "indirect_reference_table.h"
-#include "object_callbacks.h"
 #include "obj_ptr.h"
 #include "reference_table.h"
 
diff --git a/runtime/jni_internal.cc b/runtime/jni_internal.cc
index 0fde41b..6be0953 100644
--- a/runtime/jni_internal.cc
+++ b/runtime/jni_internal.cc
@@ -106,10 +106,9 @@
 static void ReportInvalidJNINativeMethod(const ScopedObjectAccess& soa,
                                          ObjPtr<mirror::Class> c,
                                          const char* kind,
-                                         jint idx,
-                                         bool return_errors)
+                                         jint idx)
     REQUIRES_SHARED(Locks::mutator_lock_) {
-  LOG(return_errors ? ::android::base::ERROR : ::android::base::FATAL)
+  LOG(ERROR)
       << "Failed to register native method in " << c->PrettyDescriptor()
       << " in " << c->GetDexCache()->GetLocation()->ToModifiedUtf8()
       << ": " << kind << " is null at index " << idx;
@@ -2145,13 +2144,10 @@
                                                                      buf);
   }
 
-  static jint RegisterNatives(JNIEnv* env, jclass java_class, const JNINativeMethod* methods,
+  static jint RegisterNatives(JNIEnv* env,
+                              jclass java_class,
+                              const JNINativeMethod* methods,
                               jint method_count) {
-    return RegisterNativeMethods(env, java_class, methods, method_count, true);
-  }
-
-  static jint RegisterNativeMethods(JNIEnv* env, jclass java_class, const JNINativeMethod* methods,
-                                    jint method_count, bool return_errors) {
     if (UNLIKELY(method_count < 0)) {
       JavaVmExtFromEnv(env)->JniAbortF("RegisterNatives", "negative method count: %d",
                                        method_count);
@@ -2172,13 +2168,13 @@
       const char* sig = methods[i].signature;
       const void* fnPtr = methods[i].fnPtr;
       if (UNLIKELY(name == nullptr)) {
-        ReportInvalidJNINativeMethod(soa, c.Get(), "method name", i, return_errors);
+        ReportInvalidJNINativeMethod(soa, c.Get(), "method name", i);
         return JNI_ERR;
       } else if (UNLIKELY(sig == nullptr)) {
-        ReportInvalidJNINativeMethod(soa, c.Get(), "method signature", i, return_errors);
+        ReportInvalidJNINativeMethod(soa, c.Get(), "method signature", i);
         return JNI_ERR;
       } else if (UNLIKELY(fnPtr == nullptr)) {
-        ReportInvalidJNINativeMethod(soa, c.Get(), "native function", i, return_errors);
+        ReportInvalidJNINativeMethod(soa, c.Get(), "native function", i);
         return JNI_ERR;
       }
       bool is_fast = false;
@@ -2244,19 +2240,15 @@
       }
 
       if (m == nullptr) {
-        c->DumpClass(
-            LOG_STREAM(return_errors
-                           ? ::android::base::ERROR
-                           : ::android::base::FATAL_WITHOUT_ABORT),
-            mirror::Class::kDumpClassFullDetail);
-        LOG(return_errors ? ::android::base::ERROR : ::android::base::FATAL)
+        c->DumpClass(LOG_STREAM(ERROR), mirror::Class::kDumpClassFullDetail);
+        LOG(ERROR)
             << "Failed to register native method "
             << c->PrettyDescriptor() << "." << name << sig << " in "
             << c->GetDexCache()->GetLocation()->ToModifiedUtf8();
         ThrowNoSuchMethodError(soa, c.Get(), name, sig, "static or non-static");
         return JNI_ERR;
       } else if (!m->IsNative()) {
-        LOG(return_errors ? ::android::base::ERROR : ::android::base::FATAL)
+        LOG(ERROR)
             << "Failed to register non-native method "
             << c->PrettyDescriptor() << "." << name << sig
             << " as native";
@@ -2407,18 +2399,18 @@
   static jint EnsureLocalCapacityInternal(ScopedObjectAccess& soa, jint desired_capacity,
                                           const char* caller)
       REQUIRES_SHARED(Locks::mutator_lock_) {
-    // TODO: we should try to expand the table if necessary.
-    if (desired_capacity < 0 || desired_capacity > static_cast<jint>(kLocalsInitial)) {
+    if (desired_capacity < 0) {
       LOG(ERROR) << "Invalid capacity given to " << caller << ": " << desired_capacity;
       return JNI_ERR;
     }
-    // TODO: this isn't quite right, since "capacity" includes holes.
-    const size_t capacity = soa.Env()->locals.Capacity();
-    bool okay = (static_cast<jint>(kLocalsInitial - capacity) >= desired_capacity);
-    if (!okay) {
-      soa.Self()->ThrowOutOfMemoryError(caller);
+
+    std::string error_msg;
+    if (!soa.Env()->locals.EnsureFreeCapacity(static_cast<size_t>(desired_capacity), &error_msg)) {
+      std::string caller_error = android::base::StringPrintf("%s: %s", caller, error_msg.c_str());
+      soa.Self()->ThrowOutOfMemoryError(caller_error.c_str());
+      return JNI_ERR;
     }
-    return okay ? JNI_OK : JNI_ERR;
+    return JNI_OK;
   }
 
   template<typename JniT, typename ArtT>
@@ -3051,16 +3043,6 @@
   return reinterpret_cast<JNINativeInterface*>(&gJniSleepForeverStub);
 }
 
-void RegisterNativeMethods(JNIEnv* env, const char* jni_class_name, const JNINativeMethod* methods,
-                           jint method_count) {
-  ScopedLocalRef<jclass> c(env, env->FindClass(jni_class_name));
-  if (c.get() == nullptr) {
-    LOG(FATAL) << "Couldn't find class: " << jni_class_name;
-  }
-  jint jni_result = env->RegisterNatives(c.get(), methods, method_count);
-  CHECK_EQ(JNI_OK, jni_result);
-}
-
 }  // namespace art
 
 std::ostream& operator<<(std::ostream& os, const jobjectRefType& rhs) {
diff --git a/runtime/jni_internal.h b/runtime/jni_internal.h
index 24bee6f..2c90b3b 100644
--- a/runtime/jni_internal.h
+++ b/runtime/jni_internal.h
@@ -22,9 +22,6 @@
 
 #include "base/macros.h"
 
-#define REGISTER_NATIVE_METHODS(jni_class_name) \
-  RegisterNativeMethods(env, jni_class_name, gMethods, arraysize(gMethods))
-
 namespace art {
 
 class ArtField;
@@ -33,11 +30,6 @@
 const JNINativeInterface* GetJniNativeInterface();
 const JNINativeInterface* GetRuntimeShutdownNativeInterface();
 
-// Similar to RegisterNatives except its passed a descriptor for a class name and failures are
-// fatal.
-void RegisterNativeMethods(JNIEnv* env, const char* jni_class_name, const JNINativeMethod* methods,
-                           jint method_count);
-
 int ThrowNewException(JNIEnv* env, jclass exception_class, const char* msg, jobject cause);
 
 namespace jni {
diff --git a/runtime/jni_internal_test.cc b/runtime/jni_internal_test.cc
index 08d1eeb..e1e4f9c 100644
--- a/runtime/jni_internal_test.cc
+++ b/runtime/jni_internal_test.cc
@@ -1908,9 +1908,6 @@
 
   // Negative capacities are not allowed.
   ASSERT_EQ(JNI_ERR, env_->PushLocalFrame(-1));
-
-  // And it's okay to have an upper limit. Ours is currently 512.
-  ASSERT_EQ(JNI_ERR, env_->PushLocalFrame(8192));
 }
 
 TEST_F(JniInternalTest, PushLocalFrame_PopLocalFrame) {
@@ -1962,6 +1959,28 @@
   check_jni_abort_catcher.Check("use of deleted local reference");
 }
 
+TEST_F(JniInternalTest, PushLocalFrame_LimitAndOverflow) {
+  // Try a very large value that should fail.
+  ASSERT_NE(JNI_OK, env_->PushLocalFrame(std::numeric_limits<jint>::max()));
+  ASSERT_TRUE(env_->ExceptionCheck());
+  env_->ExceptionClear();
+
+  // On 32-bit, also check for some overflow conditions.
+#ifndef __LP64__
+  ASSERT_EQ(JNI_OK, env_->PushLocalFrame(10));
+  ASSERT_NE(JNI_OK, env_->PushLocalFrame(std::numeric_limits<jint>::max() - 10));
+  ASSERT_TRUE(env_->ExceptionCheck());
+  env_->ExceptionClear();
+  EXPECT_EQ(env_->PopLocalFrame(nullptr), nullptr);
+#endif
+}
+
+TEST_F(JniInternalTest, PushLocalFrame_b62223672) {
+  // The 512 entry limit has been lifted, try a larger value.
+  ASSERT_EQ(JNI_OK, env_->PushLocalFrame(1024));
+  EXPECT_EQ(env_->PopLocalFrame(nullptr), nullptr);
+}
+
 TEST_F(JniInternalTest, NewGlobalRef_nullptr) {
   EXPECT_EQ(env_->NewGlobalRef(nullptr), nullptr);
 }
diff --git a/runtime/linear_alloc.cc b/runtime/linear_alloc.cc
index e9db9b8..3f01fc3 100644
--- a/runtime/linear_alloc.cc
+++ b/runtime/linear_alloc.cc
@@ -16,7 +16,7 @@
 
 #include "linear_alloc.h"
 
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 
 namespace art {
 
diff --git a/runtime/managed_stack-inl.h b/runtime/managed_stack-inl.h
new file mode 100644
index 0000000..f3f31cf
--- /dev/null
+++ b/runtime/managed_stack-inl.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_MANAGED_STACK_INL_H_
+#define ART_RUNTIME_MANAGED_STACK_INL_H_
+
+#include "managed_stack.h"
+
+#include <cstring>
+#include <stdint.h>
+#include <string>
+
+#include "stack.h"
+
+namespace art {
+
+inline ShadowFrame* ManagedStack::PushShadowFrame(ShadowFrame* new_top_frame) {
+  DCHECK(top_quick_frame_ == nullptr);
+  ShadowFrame* old_frame = top_shadow_frame_;
+  top_shadow_frame_ = new_top_frame;
+  new_top_frame->SetLink(old_frame);
+  return old_frame;
+}
+
+inline ShadowFrame* ManagedStack::PopShadowFrame() {
+  DCHECK(top_quick_frame_ == nullptr);
+  CHECK(top_shadow_frame_ != nullptr);
+  ShadowFrame* frame = top_shadow_frame_;
+  top_shadow_frame_ = frame->GetLink();
+  return frame;
+}
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_MANAGED_STACK_INL_H_
diff --git a/runtime/managed_stack.cc b/runtime/managed_stack.cc
new file mode 100644
index 0000000..be609c3
--- /dev/null
+++ b/runtime/managed_stack.cc
@@ -0,0 +1,57 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "managed_stack-inl.h"
+
+#include "android-base/stringprintf.h"
+
+#include "art_method.h"
+#include "mirror/object.h"
+#include "stack_reference.h"
+
+namespace art {
+
+size_t ManagedStack::NumJniShadowFrameReferences() const {
+  size_t count = 0;
+  for (const ManagedStack* current_fragment = this; current_fragment != nullptr;
+       current_fragment = current_fragment->GetLink()) {
+    for (ShadowFrame* current_frame = current_fragment->top_shadow_frame_;
+         current_frame != nullptr;
+         current_frame = current_frame->GetLink()) {
+      if (current_frame->GetMethod()->IsNative()) {
+        // The JNI ShadowFrame only contains references. (For indirect reference.)
+        count += current_frame->NumberOfVRegs();
+      }
+    }
+  }
+  return count;
+}
+
+bool ManagedStack::ShadowFramesContain(StackReference<mirror::Object>* shadow_frame_entry) const {
+  for (const ManagedStack* current_fragment = this; current_fragment != nullptr;
+       current_fragment = current_fragment->GetLink()) {
+    for (ShadowFrame* current_frame = current_fragment->top_shadow_frame_;
+         current_frame != nullptr;
+         current_frame = current_frame->GetLink()) {
+      if (current_frame->Contains(shadow_frame_entry)) {
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
+}  // namespace art
diff --git a/runtime/managed_stack.h b/runtime/managed_stack.h
new file mode 100644
index 0000000..8337f96
--- /dev/null
+++ b/runtime/managed_stack.h
@@ -0,0 +1,107 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_MANAGED_STACK_H_
+#define ART_RUNTIME_MANAGED_STACK_H_
+
+#include <cstring>
+#include <stdint.h>
+#include <string>
+
+#include "base/logging.h"
+#include "base/macros.h"
+#include "base/mutex.h"
+
+namespace art {
+
+namespace mirror {
+class Object;
+}  // namespace mirror
+
+class ArtMethod;
+class ShadowFrame;
+template <typename T> class StackReference;
+
+// The managed stack is used to record fragments of managed code stacks. Managed code stacks
+// may either be shadow frames or lists of frames using fixed frame sizes. Transition records are
+// necessary for transitions between code using different frame layouts and transitions into native
+// code.
+class PACKED(4) ManagedStack {
+ public:
+  ManagedStack()
+      : top_quick_frame_(nullptr), link_(nullptr), top_shadow_frame_(nullptr) {}
+
+  void PushManagedStackFragment(ManagedStack* fragment) {
+    // Copy this top fragment into given fragment.
+    memcpy(fragment, this, sizeof(ManagedStack));
+    // Clear this fragment, which has become the top.
+    memset(this, 0, sizeof(ManagedStack));
+    // Link our top fragment onto the given fragment.
+    link_ = fragment;
+  }
+
+  void PopManagedStackFragment(const ManagedStack& fragment) {
+    DCHECK(&fragment == link_);
+    // Copy this given fragment back to the top.
+    memcpy(this, &fragment, sizeof(ManagedStack));
+  }
+
+  ManagedStack* GetLink() const {
+    return link_;
+  }
+
+  ArtMethod** GetTopQuickFrame() const {
+    return top_quick_frame_;
+  }
+
+  void SetTopQuickFrame(ArtMethod** top) {
+    DCHECK(top_shadow_frame_ == nullptr);
+    top_quick_frame_ = top;
+  }
+
+  static size_t TopQuickFrameOffset() {
+    return OFFSETOF_MEMBER(ManagedStack, top_quick_frame_);
+  }
+
+  ALWAYS_INLINE ShadowFrame* PushShadowFrame(ShadowFrame* new_top_frame);
+  ALWAYS_INLINE ShadowFrame* PopShadowFrame();
+
+  ShadowFrame* GetTopShadowFrame() const {
+    return top_shadow_frame_;
+  }
+
+  void SetTopShadowFrame(ShadowFrame* top) {
+    DCHECK(top_quick_frame_ == nullptr);
+    top_shadow_frame_ = top;
+  }
+
+  static size_t TopShadowFrameOffset() {
+    return OFFSETOF_MEMBER(ManagedStack, top_shadow_frame_);
+  }
+
+  size_t NumJniShadowFrameReferences() const REQUIRES_SHARED(Locks::mutator_lock_);
+
+  bool ShadowFramesContain(StackReference<mirror::Object>* shadow_frame_entry) const;
+
+ private:
+  ArtMethod** top_quick_frame_;
+  ManagedStack* link_;
+  ShadowFrame* top_shadow_frame_;
+};
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_MANAGED_STACK_H_
diff --git a/runtime/mem_map.cc b/runtime/mem_map.cc
index 6c39361..12793e4 100644
--- a/runtime/mem_map.cc
+++ b/runtime/mem_map.cc
@@ -952,6 +952,9 @@
 }
 
 void ZeroAndReleasePages(void* address, size_t length) {
+  if (length == 0) {
+    return;
+  }
   uint8_t* const mem_begin = reinterpret_cast<uint8_t*>(address);
   uint8_t* const mem_end = mem_begin + length;
   uint8_t* const page_begin = AlignUp(mem_begin, kPageSize);
diff --git a/runtime/method_handles.h b/runtime/method_handles.h
index e8a2dce..e02e620 100644
--- a/runtime/method_handles.h
+++ b/runtime/method_handles.h
@@ -23,6 +23,7 @@
 #include "handle.h"
 #include "jvalue.h"
 #include "mirror/class.h"
+#include "stack.h"
 
 namespace art {
 
diff --git a/runtime/mirror/accessible_object.h b/runtime/mirror/accessible_object.h
index 2581ac2..a217193 100644
--- a/runtime/mirror/accessible_object.h
+++ b/runtime/mirror/accessible_object.h
@@ -20,7 +20,6 @@
 #include "class.h"
 #include "gc_root.h"
 #include "object.h"
-#include "object_callbacks.h"
 #include "read_barrier_option.h"
 #include "thread.h"
 
diff --git a/runtime/mirror/array.h b/runtime/mirror/array.h
index 51d9d24..7287a92 100644
--- a/runtime/mirror/array.h
+++ b/runtime/mirror/array.h
@@ -22,7 +22,6 @@
 #include "gc/allocator_type.h"
 #include "obj_ptr.h"
 #include "object.h"
-#include "object_callbacks.h"
 
 namespace art {
 
diff --git a/runtime/mirror/class-inl.h b/runtime/mirror/class-inl.h
index 5122b37..c8d4557 100644
--- a/runtime/mirror/class-inl.h
+++ b/runtime/mirror/class-inl.h
@@ -23,13 +23,14 @@
 #include "art_method.h"
 #include "base/array_slice.h"
 #include "base/length_prefixed_array.h"
-#include "class_linker-inl.h"
+#include "class_linker.h"
 #include "class_loader.h"
 #include "common_throws.h"
+#include "dex_cache.h"
 #include "dex_file-inl.h"
 #include "gc/heap-inl.h"
 #include "iftable.h"
-#include "object_array-inl.h"
+#include "object_array.h"
 #include "object-inl.h"
 #include "read_barrier-inl.h"
 #include "reference-inl.h"
diff --git a/runtime/mirror/class.h b/runtime/mirror/class.h
index dfb2788..dfdd162 100644
--- a/runtime/mirror/class.h
+++ b/runtime/mirror/class.h
@@ -29,7 +29,6 @@
 #include "modifiers.h"
 #include "object.h"
 #include "object_array.h"
-#include "object_callbacks.h"
 #include "primitive.h"
 #include "read_barrier_option.h"
 #include "stride_iterator.h"
diff --git a/runtime/mirror/class_ext.h b/runtime/mirror/class_ext.h
index 708665d..75a3800 100644
--- a/runtime/mirror/class_ext.h
+++ b/runtime/mirror/class_ext.h
@@ -23,7 +23,6 @@
 #include "gc_root.h"
 #include "object.h"
 #include "object_array.h"
-#include "object_callbacks.h"
 #include "string.h"
 
 namespace art {
diff --git a/runtime/mirror/class_loader-inl.h b/runtime/mirror/class_loader-inl.h
index f5ecdae..39c8ee0 100644
--- a/runtime/mirror/class_loader-inl.h
+++ b/runtime/mirror/class_loader-inl.h
@@ -19,9 +19,7 @@
 
 #include "class_loader.h"
 
-#include "base/mutex-inl.h"
 #include "class_table-inl.h"
-#include "obj_ptr-inl.h"
 
 namespace art {
 namespace mirror {
diff --git a/runtime/mirror/class_loader.h b/runtime/mirror/class_loader.h
index 6e1f44b..381d96b 100644
--- a/runtime/mirror/class_loader.h
+++ b/runtime/mirror/class_loader.h
@@ -17,7 +17,10 @@
 #ifndef ART_RUNTIME_MIRROR_CLASS_LOADER_H_
 #define ART_RUNTIME_MIRROR_CLASS_LOADER_H_
 
+#include "base/mutex.h"
 #include "object.h"
+#include "object_reference.h"
+#include "obj_ptr.h"
 
 namespace art {
 
diff --git a/runtime/mirror/dex_cache.cc b/runtime/mirror/dex_cache.cc
index c95d92e..96e3475 100644
--- a/runtime/mirror/dex_cache.cc
+++ b/runtime/mirror/dex_cache.cc
@@ -23,6 +23,7 @@
 #include "gc/heap.h"
 #include "globals.h"
 #include "linear_alloc.h"
+#include "oat_file.h"
 #include "object.h"
 #include "object-inl.h"
 #include "object_array-inl.h"
diff --git a/runtime/mirror/executable.h b/runtime/mirror/executable.h
index 6c465f6..8a28f66 100644
--- a/runtime/mirror/executable.h
+++ b/runtime/mirror/executable.h
@@ -20,7 +20,6 @@
 #include "accessible_object.h"
 #include "gc_root.h"
 #include "object.h"
-#include "object_callbacks.h"
 #include "read_barrier_option.h"
 
 namespace art {
diff --git a/runtime/mirror/field.h b/runtime/mirror/field.h
index 222d709..40186a6 100644
--- a/runtime/mirror/field.h
+++ b/runtime/mirror/field.h
@@ -22,7 +22,6 @@
 #include "gc_root.h"
 #include "obj_ptr.h"
 #include "object.h"
-#include "object_callbacks.h"
 #include "read_barrier_option.h"
 
 namespace art {
diff --git a/runtime/mirror/object-inl.h b/runtime/mirror/object-inl.h
index baed5f1..95f829d 100644
--- a/runtime/mirror/object-inl.h
+++ b/runtime/mirror/object-inl.h
@@ -26,8 +26,7 @@
 #include "class-inl.h"
 #include "class_flags.h"
 #include "class_linker.h"
-#include "class_loader-inl.h"
-#include "dex_cache-inl.h"
+#include "dex_cache.h"
 #include "lock_word-inl.h"
 #include "monitor.h"
 #include "object_array-inl.h"
@@ -899,6 +898,36 @@
   return success;
 }
 
+template<bool kTransactionActive, bool kCheckTransaction, VerifyObjectFlags kVerifyFlags>
+inline bool Object::CasFieldWeakReleaseObjectWithoutWriteBarrier(
+    MemberOffset field_offset,
+    ObjPtr<Object> old_value,
+    ObjPtr<Object> new_value) {
+  if (kCheckTransaction) {
+    DCHECK_EQ(kTransactionActive, Runtime::Current()->IsActiveTransaction());
+  }
+  if (kVerifyFlags & kVerifyThis) {
+    VerifyObject(this);
+  }
+  if (kVerifyFlags & kVerifyWrites) {
+    VerifyObject(new_value);
+  }
+  if (kVerifyFlags & kVerifyReads) {
+    VerifyObject(old_value);
+  }
+  if (kTransactionActive) {
+    Runtime::Current()->RecordWriteFieldReference(this, field_offset, old_value, true);
+  }
+  HeapReference<Object> old_ref(HeapReference<Object>::FromObjPtr(old_value));
+  HeapReference<Object> new_ref(HeapReference<Object>::FromObjPtr(new_value));
+  uint8_t* raw_addr = reinterpret_cast<uint8_t*>(this) + field_offset.Int32Value();
+  Atomic<uint32_t>* atomic_addr = reinterpret_cast<Atomic<uint32_t>*>(raw_addr);
+
+  bool success = atomic_addr->CompareExchangeWeakRelease(old_ref.reference_,
+                                                         new_ref.reference_);
+  return success;
+}
+
 template<bool kIsStatic,
          VerifyObjectFlags kVerifyFlags,
          ReadBarrierOption kReadBarrierOption,
diff --git a/runtime/mirror/object-readbarrier-inl.h b/runtime/mirror/object-readbarrier-inl.h
index 58e7c20..69365af 100644
--- a/runtime/mirror/object-readbarrier-inl.h
+++ b/runtime/mirror/object-readbarrier-inl.h
@@ -221,6 +221,36 @@
   return success;
 }
 
+template<bool kTransactionActive, bool kCheckTransaction, VerifyObjectFlags kVerifyFlags>
+inline bool Object::CasFieldStrongReleaseObjectWithoutWriteBarrier(
+    MemberOffset field_offset,
+    ObjPtr<Object> old_value,
+    ObjPtr<Object> new_value) {
+  if (kCheckTransaction) {
+    DCHECK_EQ(kTransactionActive, Runtime::Current()->IsActiveTransaction());
+  }
+  if (kVerifyFlags & kVerifyThis) {
+    VerifyObject(this);
+  }
+  if (kVerifyFlags & kVerifyWrites) {
+    VerifyObject(new_value);
+  }
+  if (kVerifyFlags & kVerifyReads) {
+    VerifyObject(old_value);
+  }
+  if (kTransactionActive) {
+    Runtime::Current()->RecordWriteFieldReference(this, field_offset, old_value, true);
+  }
+  HeapReference<Object> old_ref(HeapReference<Object>::FromObjPtr(old_value));
+  HeapReference<Object> new_ref(HeapReference<Object>::FromObjPtr(new_value));
+  uint8_t* raw_addr = reinterpret_cast<uint8_t*>(this) + field_offset.Int32Value();
+  Atomic<uint32_t>* atomic_addr = reinterpret_cast<Atomic<uint32_t>*>(raw_addr);
+
+  bool success = atomic_addr->CompareExchangeStrongRelease(old_ref.reference_,
+                                                           new_ref.reference_);
+  return success;
+}
+
 }  // namespace mirror
 }  // namespace art
 
diff --git a/runtime/mirror/object-refvisitor-inl.h b/runtime/mirror/object-refvisitor-inl.h
index 49ab7c2..f5ab4dd 100644
--- a/runtime/mirror/object-refvisitor-inl.h
+++ b/runtime/mirror/object-refvisitor-inl.h
@@ -19,7 +19,9 @@
 
 #include "object-inl.h"
 
+#include "class_loader-inl.h"
 #include "class-refvisitor-inl.h"
+#include "dex_cache-inl.h"
 
 namespace art {
 namespace mirror {
diff --git a/runtime/mirror/object.h b/runtime/mirror/object.h
index 35a1b73..9cf4252 100644
--- a/runtime/mirror/object.h
+++ b/runtime/mirror/object.h
@@ -350,10 +350,25 @@
   template<bool kTransactionActive,
            bool kCheckTransaction = true,
            VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
+  bool CasFieldWeakReleaseObjectWithoutWriteBarrier(MemberOffset field_offset,
+                                                    ObjPtr<Object> old_value,
+                                                    ObjPtr<Object> new_value)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
+  template<bool kTransactionActive,
+           bool kCheckTransaction = true,
+           VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
   bool CasFieldStrongRelaxedObjectWithoutWriteBarrier(MemberOffset field_offset,
                                                       ObjPtr<Object> old_value,
                                                       ObjPtr<Object> new_value)
       REQUIRES_SHARED(Locks::mutator_lock_);
+  template<bool kTransactionActive,
+           bool kCheckTransaction = true,
+           VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
+  bool CasFieldStrongReleaseObjectWithoutWriteBarrier(MemberOffset field_offset,
+                                                      ObjPtr<Object> old_value,
+                                                      ObjPtr<Object> new_value)
+      REQUIRES_SHARED(Locks::mutator_lock_);
 
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
   HeapReference<Object>* GetFieldObjectReferenceAddr(MemberOffset field_offset);
diff --git a/runtime/mirror/reference-inl.h b/runtime/mirror/reference-inl.h
index a449b41..84e5494 100644
--- a/runtime/mirror/reference-inl.h
+++ b/runtime/mirror/reference-inl.h
@@ -19,7 +19,9 @@
 
 #include "reference.h"
 
+#include "gc_root-inl.h"
 #include "obj_ptr-inl.h"
+#include "runtime.h"
 
 namespace art {
 namespace mirror {
@@ -47,6 +49,12 @@
   return SetFieldObjectVolatile<kTransactionActive>(ZombieOffset(), zombie);
 }
 
+template<ReadBarrierOption kReadBarrierOption>
+inline Class* Reference::GetJavaLangRefReference() {
+  DCHECK(!java_lang_ref_Reference_.IsNull());
+  return java_lang_ref_Reference_.Read<kReadBarrierOption>();
+}
+
 }  // namespace mirror
 }  // namespace art
 
diff --git a/runtime/mirror/reference.h b/runtime/mirror/reference.h
index cfcbd5a..b10c294 100644
--- a/runtime/mirror/reference.h
+++ b/runtime/mirror/reference.h
@@ -18,6 +18,8 @@
 #define ART_RUNTIME_MIRROR_REFERENCE_H_
 
 #include "base/enums.h"
+#include "base/macros.h"
+#include "base/mutex.h"
 #include "class.h"
 #include "gc_root.h"
 #include "obj_ptr.h"
@@ -97,10 +99,7 @@
   }
 
   template<ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
-  static Class* GetJavaLangRefReference() REQUIRES_SHARED(Locks::mutator_lock_) {
-    DCHECK(!java_lang_ref_Reference_.IsNull());
-    return java_lang_ref_Reference_.Read<kReadBarrierOption>();
-  }
+  static ALWAYS_INLINE Class* GetJavaLangRefReference() REQUIRES_SHARED(Locks::mutator_lock_);
   static void SetClass(ObjPtr<Class> klass);
   static void ResetClass();
   static void VisitRoots(RootVisitor* visitor) REQUIRES_SHARED(Locks::mutator_lock_);
diff --git a/runtime/mirror/stack_trace_element.cc b/runtime/mirror/stack_trace_element.cc
index c00cf91..53de821 100644
--- a/runtime/mirror/stack_trace_element.cc
+++ b/runtime/mirror/stack_trace_element.cc
@@ -19,6 +19,7 @@
 #include "class.h"
 #include "class-inl.h"
 #include "gc/accounting/card_table-inl.h"
+#include "gc_root-inl.h"
 #include "object-inl.h"
 #include "handle_scope-inl.h"
 #include "string.h"
diff --git a/runtime/mirror/stack_trace_element.h b/runtime/mirror/stack_trace_element.h
index d32d8dc..87e8a1f 100644
--- a/runtime/mirror/stack_trace_element.h
+++ b/runtime/mirror/stack_trace_element.h
@@ -19,7 +19,6 @@
 
 #include "gc_root.h"
 #include "object.h"
-#include "object_callbacks.h"
 
 namespace art {
 
diff --git a/runtime/mirror/string.cc b/runtime/mirror/string.cc
index de0e75b..80745d2 100644
--- a/runtime/mirror/string.cc
+++ b/runtime/mirror/string.cc
@@ -18,8 +18,10 @@
 
 #include "arch/memcmp16.h"
 #include "array.h"
+#include "base/array_ref.h"
 #include "class-inl.h"
 #include "gc/accounting/card_table-inl.h"
+#include "gc_root-inl.h"
 #include "handle_scope-inl.h"
 #include "intern_table.h"
 #include "object-inl.h"
diff --git a/runtime/mirror/string.h b/runtime/mirror/string.h
index b59bbfb..7fbe8bd 100644
--- a/runtime/mirror/string.h
+++ b/runtime/mirror/string.h
@@ -20,7 +20,6 @@
 #include "gc_root.h"
 #include "gc/allocator_type.h"
 #include "object.h"
-#include "object_callbacks.h"
 
 namespace art {
 
diff --git a/runtime/mirror/throwable.cc b/runtime/mirror/throwable.cc
index e50409f..7027410 100644
--- a/runtime/mirror/throwable.cc
+++ b/runtime/mirror/throwable.cc
@@ -26,7 +26,9 @@
 #include "object-inl.h"
 #include "object_array.h"
 #include "object_array-inl.h"
+#include "object_callbacks.h"
 #include "stack_trace_element.h"
+#include "string.h"
 #include "utils.h"
 #include "well_known_classes.h"
 
@@ -169,5 +171,17 @@
   java_lang_Throwable_.VisitRootIfNonNull(visitor, RootInfo(kRootStickyClass));
 }
 
+Object* Throwable::GetStackState() {
+  return GetFieldObjectVolatile<Object>(OFFSET_OF_OBJECT_MEMBER(Throwable, backtrace_));
+}
+
+Object* Throwable::GetStackTrace() {
+  return GetFieldObjectVolatile<Object>(OFFSET_OF_OBJECT_MEMBER(Throwable, backtrace_));
+}
+
+String* Throwable::GetDetailMessage() {
+  return GetFieldObject<String>(OFFSET_OF_OBJECT_MEMBER(Throwable, detail_message_));
+}
+
 }  // namespace mirror
 }  // namespace art
diff --git a/runtime/mirror/throwable.h b/runtime/mirror/throwable.h
index 0a4ab6f..fb45228 100644
--- a/runtime/mirror/throwable.h
+++ b/runtime/mirror/throwable.h
@@ -19,23 +19,22 @@
 
 #include "gc_root.h"
 #include "object.h"
-#include "object_callbacks.h"
-#include "string.h"
 
 namespace art {
 
+class RootVisitor;
 struct ThrowableOffsets;
 
 namespace mirror {
 
+class String;
+
 // C++ mirror of java.lang.Throwable
 class MANAGED Throwable : public Object {
  public:
   void SetDetailMessage(ObjPtr<String> new_detail_message) REQUIRES_SHARED(Locks::mutator_lock_);
 
-  String* GetDetailMessage() REQUIRES_SHARED(Locks::mutator_lock_) {
-    return GetFieldObject<String>(OFFSET_OF_OBJECT_MEMBER(Throwable, detail_message_));
-  }
+  String* GetDetailMessage() REQUIRES_SHARED(Locks::mutator_lock_);
 
   std::string Dump() REQUIRES_SHARED(Locks::mutator_lock_);
 
@@ -59,12 +58,8 @@
       REQUIRES_SHARED(Locks::mutator_lock_);
 
  private:
-  Object* GetStackState() REQUIRES_SHARED(Locks::mutator_lock_) {
-    return GetFieldObjectVolatile<Object>(OFFSET_OF_OBJECT_MEMBER(Throwable, backtrace_));
-  }
-  Object* GetStackTrace() REQUIRES_SHARED(Locks::mutator_lock_) {
-    return GetFieldObjectVolatile<Object>(OFFSET_OF_OBJECT_MEMBER(Throwable, backtrace_));
-  }
+  Object* GetStackState() REQUIRES_SHARED(Locks::mutator_lock_);
+  Object* GetStackTrace() REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Field order required by test "ValidateFieldOrderOfJavaCppUnionClasses".
   HeapReference<Object> backtrace_;  // Note this is Java volatile:
diff --git a/runtime/monitor.cc b/runtime/monitor.cc
index bb33047..a617818 100644
--- a/runtime/monitor.cc
+++ b/runtime/monitor.cc
@@ -31,7 +31,9 @@
 #include "lock_word-inl.h"
 #include "mirror/class-inl.h"
 #include "mirror/object-inl.h"
+#include "object_callbacks.h"
 #include "scoped_thread_state_change-inl.h"
+#include "stack.h"
 #include "thread.h"
 #include "thread_list.h"
 #include "verifier/method_verifier.h"
@@ -437,17 +439,11 @@
                     << " in " << ArtMethod::PrettyMethod(m) << " for "
                     << PrettyDuration(MsToNs(wait_ms));
               }
-              const char* owners_filename;
-              int32_t owners_line_number;
-              TranslateLocation(owners_method,
-                                owners_dex_pc,
-                                &owners_filename,
-                                &owners_line_number);
               LogContentionEvent(self,
                                  wait_ms,
                                  sample_percent,
-                                 owners_filename,
-                                 owners_line_number);
+                                 owners_method,
+                                 owners_dex_pc);
             }
           }
         }
diff --git a/runtime/monitor.h b/runtime/monitor.h
index e80d31c..96c5a5b 100644
--- a/runtime/monitor.h
+++ b/runtime/monitor.h
@@ -30,13 +30,13 @@
 #include "base/mutex.h"
 #include "gc_root.h"
 #include "lock_word.h"
-#include "object_callbacks.h"
 #include "read_barrier_option.h"
 #include "thread_state.h"
 
 namespace art {
 
 class ArtMethod;
+class IsMarkedVisitor;
 class LockWord;
 template<class T> class Handle;
 class StackVisitor;
@@ -181,8 +181,11 @@
       REQUIRES_SHARED(Locks::mutator_lock_)
       NO_THREAD_SAFETY_ANALYSIS;  // For m->Install(self)
 
-  void LogContentionEvent(Thread* self, uint32_t wait_ms, uint32_t sample_percent,
-                          const char* owner_filename, int32_t owner_line_number)
+  void LogContentionEvent(Thread* self,
+                          uint32_t wait_ms,
+                          uint32_t sample_percent,
+                          ArtMethod* owner_method,
+                          uint32_t owner_dex_pc)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   static void FailedUnlock(mirror::Object* obj,
diff --git a/runtime/monitor_android.cc b/runtime/monitor_android.cc
index 1dd60f8..74623da 100644
--- a/runtime/monitor_android.cc
+++ b/runtime/monitor_android.cc
@@ -15,96 +15,94 @@
  */
 
 #include "monitor.h"
-#include "thread.h"
 
 #include <fcntl.h>
 #include <sys/stat.h>
 #include <sys/types.h>
 
 #include <log/log.h>
+#include <log/log_event_list.h>
+
+#include "art_method.h"
+#include "thread.h"
 
 #define EVENT_LOG_TAG_dvm_lock_sample 20003
 
 namespace art {
 
-static void Set4LE(uint8_t* buf, uint32_t val) {
-  *buf++ = (uint8_t)(val);
-  *buf++ = (uint8_t)(val >> 8);
-  *buf++ = (uint8_t)(val >> 16);
-  *buf = (uint8_t)(val >> 24);
-}
+void Monitor::LogContentionEvent(Thread* self,
+                                 uint32_t wait_ms,
+                                 uint32_t sample_percent,
+                                 ArtMethod* owner_method,
+                                 uint32_t owner_dex_pc) {
+  android_log_event_list ctx(EVENT_LOG_TAG_dvm_lock_sample);
 
-static char* EventLogWriteInt(char* dst, int value) {
-  *dst++ = EVENT_TYPE_INT;
-  Set4LE(reinterpret_cast<uint8_t*>(dst), value);
-  return dst + 4;
-}
-
-static char* EventLogWriteString(char* dst, const char* value, size_t len) {
-  *dst++ = EVENT_TYPE_STRING;
-  len = len < 32 ? len : 32;
-  Set4LE(reinterpret_cast<uint8_t*>(dst), len);
-  dst += 4;
-  memcpy(dst, value, len);
-  return dst + len;
-}
-
-void Monitor::LogContentionEvent(Thread* self, uint32_t wait_ms, uint32_t sample_percent,
-                                 const char* owner_filename, int32_t owner_line_number) {
-  // Emit the event list length, 1 byte.
-  char eventBuffer[174];
-  char* cp = eventBuffer;
-  *cp++ = 9;
+  const char* owner_filename;
+  int32_t owner_line_number;
+  TranslateLocation(owner_method, owner_dex_pc, &owner_filename, &owner_line_number);
 
   // Emit the process name, <= 37 bytes.
-  int fd = open("/proc/self/cmdline", O_RDONLY);
-  char procName[33];
-  memset(procName, 0, sizeof(procName));
-  read(fd, procName, sizeof(procName) - 1);
-  close(fd);
-  size_t len = strlen(procName);
-  cp = EventLogWriteString(cp, procName, len);
+  {
+    int fd = open("/proc/self/cmdline", O_RDONLY);
+    char procName[33];
+    memset(procName, 0, sizeof(procName));
+    read(fd, procName, sizeof(procName) - 1);
+    close(fd);
+    ctx << procName;
+  }
 
-  // Emit the sensitive thread ("main thread") status, 5 bytes.
-  cp = EventLogWriteInt(cp, Thread::IsSensitiveThread());
+  // Emit the sensitive thread ("main thread") status. We follow tradition that this corresponds
+  // to a C++ bool's value, but be explicit.
+  constexpr uint32_t kIsSensitive = 1u;
+  constexpr uint32_t kIsNotSensitive = 0u;
+  ctx << (Thread::IsSensitiveThread() ? kIsSensitive : kIsNotSensitive);
 
-  // Emit self thread name string, <= 37 bytes.
-  std::string thread_name;
-  self->GetThreadName(thread_name);
-  cp = EventLogWriteString(cp, thread_name.c_str(), thread_name.size());
+  // Emit self thread name string.
+  {
+    std::string thread_name;
+    self->GetThreadName(thread_name);
+    ctx << thread_name;
+  }
 
-  // Emit the wait time, 5 bytes.
-  cp = EventLogWriteInt(cp, wait_ms);
+  // Emit the wait time.
+  ctx << wait_ms;
 
-  // Emit the source code file name, <= 37 bytes.
-  uint32_t pc;
-  ArtMethod* m = self->GetCurrentMethod(&pc);
-  const char* filename;
-  int32_t line_number;
-  TranslateLocation(m, pc, &filename, &line_number);
-  cp = EventLogWriteString(cp, filename, strlen(filename));
+  const char* filename = nullptr;
+  {
+    uint32_t pc;
+    ArtMethod* m = self->GetCurrentMethod(&pc);
+    int32_t line_number;
+    TranslateLocation(m, pc, &filename, &line_number);
 
-  // Emit the source code line number, 5 bytes.
-  cp = EventLogWriteInt(cp, line_number);
+    // Emit the source code file name.
+    ctx << filename;
 
-  // Emit the lock owner source code file name, <= 37 bytes.
+    // Emit the source code line number.
+    ctx << line_number;
+
+    // Emit the method name.
+    ctx << ArtMethod::PrettyMethod(m);
+  }
+
+  // Emit the lock owner source code file name.
   if (owner_filename == nullptr) {
     owner_filename = "";
   } else if (strcmp(filename, owner_filename) == 0) {
     // Common case, so save on log space.
     owner_filename = "-";
   }
-  cp = EventLogWriteString(cp, owner_filename, strlen(owner_filename));
+  ctx << owner_filename;
 
-  // Emit the source code line number, 5 bytes.
-  cp = EventLogWriteInt(cp, owner_line_number);
+  // Emit the source code line number.
+  ctx << owner_line_number;
 
-  // Emit the sample percentage, 5 bytes.
-  cp = EventLogWriteInt(cp, sample_percent);
+  // Emit the owner method name.
+  ctx << ArtMethod::PrettyMethod(owner_method);
 
-  CHECK_LE((size_t)(cp - eventBuffer), sizeof(eventBuffer));
-  android_btWriteLog(EVENT_LOG_TAG_dvm_lock_sample, EVENT_TYPE_LIST, eventBuffer,
-                     (size_t)(cp - eventBuffer));
+  // Emit the sample percentage.
+  ctx << sample_percent;
+
+  ctx << LOG_ID_EVENTS;
 }
 
 }  // namespace art
diff --git a/runtime/monitor_linux.cc b/runtime/monitor_linux.cc
index 1c77ac0..6678661 100644
--- a/runtime/monitor_linux.cc
+++ b/runtime/monitor_linux.cc
@@ -18,7 +18,7 @@
 
 namespace art {
 
-void Monitor::LogContentionEvent(Thread*, uint32_t, uint32_t, const char*, int32_t) {
+void Monitor::LogContentionEvent(Thread*, uint32_t, uint32_t, ArtMethod*, uint32_t) {
 }
 
 }  // namespace art
diff --git a/runtime/monitor_pool.cc b/runtime/monitor_pool.cc
index 0f4e238..48e9a6b 100644
--- a/runtime/monitor_pool.cc
+++ b/runtime/monitor_pool.cc
@@ -18,7 +18,7 @@
 
 #include "base/logging.h"
 #include "base/mutex-inl.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 #include "monitor.h"
 
 namespace art {
diff --git a/runtime/monitor_pool_test.cc b/runtime/monitor_pool_test.cc
index a111c6c..5463877 100644
--- a/runtime/monitor_pool_test.cc
+++ b/runtime/monitor_pool_test.cc
@@ -18,7 +18,7 @@
 
 #include "common_runtime_test.h"
 #include "scoped_thread_state_change-inl.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 
 namespace art {
 
diff --git a/runtime/native/dalvik_system_DexFile.cc b/runtime/native/dalvik_system_DexFile.cc
index 870402d..ad00966 100644
--- a/runtime/native/dalvik_system_DexFile.cc
+++ b/runtime/native/dalvik_system_DexFile.cc
@@ -31,6 +31,7 @@
 #include "mirror/class_loader.h"
 #include "mirror/object-inl.h"
 #include "mirror/string.h"
+#include "native_util.h"
 #include "oat_file.h"
 #include "oat_file_assistant.h"
 #include "oat_file_manager.h"
diff --git a/runtime/native/dalvik_system_VMDebug.cc b/runtime/native/dalvik_system_VMDebug.cc
index bb8233b..e1eae21 100644
--- a/runtime/native/dalvik_system_VMDebug.cc
+++ b/runtime/native/dalvik_system_VMDebug.cc
@@ -39,6 +39,7 @@
 #include "jni_internal.h"
 #include "mirror/class.h"
 #include "mirror/object_array-inl.h"
+#include "native_util.h"
 #include "ScopedLocalRef.h"
 #include "ScopedUtfChars.h"
 #include "scoped_fast_native_object_access-inl.h"
diff --git a/runtime/native/dalvik_system_VMRuntime.cc b/runtime/native/dalvik_system_VMRuntime.cc
index 18b871c..fed9c1c 100644
--- a/runtime/native/dalvik_system_VMRuntime.cc
+++ b/runtime/native/dalvik_system_VMRuntime.cc
@@ -52,6 +52,7 @@
 #include "mirror/class-inl.h"
 #include "mirror/dex_cache-inl.h"
 #include "mirror/object-inl.h"
+#include "native_util.h"
 #include "runtime.h"
 #include "scoped_fast_native_object_access-inl.h"
 #include "scoped_thread_state_change-inl.h"
diff --git a/runtime/native/dalvik_system_VMStack.cc b/runtime/native/dalvik_system_VMStack.cc
index 6c41d51..e86e64e 100644
--- a/runtime/native/dalvik_system_VMStack.cc
+++ b/runtime/native/dalvik_system_VMStack.cc
@@ -25,6 +25,7 @@
 #include "mirror/class-inl.h"
 #include "mirror/class_loader.h"
 #include "mirror/object-inl.h"
+#include "native_util.h"
 #include "scoped_fast_native_object_access-inl.h"
 #include "scoped_thread_state_change-inl.h"
 #include "thread_list.h"
diff --git a/runtime/native/dalvik_system_ZygoteHooks.cc b/runtime/native/dalvik_system_ZygoteHooks.cc
index 4c6f530..31aeba0 100644
--- a/runtime/native/dalvik_system_ZygoteHooks.cc
+++ b/runtime/native/dalvik_system_ZygoteHooks.cc
@@ -28,10 +28,12 @@
 #include "jit/jit.h"
 #include "jni_internal.h"
 #include "JNIHelp.h"
+#include "native_util.h"
 #include "non_debuggable_classes.h"
 #include "scoped_thread_state_change-inl.h"
 #include "ScopedUtfChars.h"
-#include "thread-inl.h"
+#include "stack.h"
+#include "thread-current-inl.h"
 #include "thread_list.h"
 #include "trace.h"
 
diff --git a/runtime/native/java_lang_Class.cc b/runtime/native/java_lang_Class.cc
index 9e07a5c..d3377be 100644
--- a/runtime/native/java_lang_Class.cc
+++ b/runtime/native/java_lang_Class.cc
@@ -36,6 +36,7 @@
 #include "mirror/object-inl.h"
 #include "mirror/object_array-inl.h"
 #include "mirror/string-inl.h"
+#include "native_util.h"
 #include "obj_ptr-inl.h"
 #include "reflection.h"
 #include "scoped_thread_state_change-inl.h"
diff --git a/runtime/native/java_lang_Object.cc b/runtime/native/java_lang_Object.cc
index c9841d1..d52bf04 100644
--- a/runtime/native/java_lang_Object.cc
+++ b/runtime/native/java_lang_Object.cc
@@ -20,6 +20,7 @@
 
 #include "jni_internal.h"
 #include "mirror/object-inl.h"
+#include "native_util.h"
 #include "scoped_fast_native_object_access-inl.h"
 
 namespace art {
diff --git a/runtime/native/java_lang_String.cc b/runtime/native/java_lang_String.cc
index 4928c01..ac0d633 100644
--- a/runtime/native/java_lang_String.cc
+++ b/runtime/native/java_lang_String.cc
@@ -24,6 +24,7 @@
 #include "mirror/object-inl.h"
 #include "mirror/string.h"
 #include "mirror/string-inl.h"
+#include "native_util.h"
 #include "scoped_fast_native_object_access-inl.h"
 #include "scoped_thread_state_change-inl.h"
 #include "ScopedLocalRef.h"
diff --git a/runtime/native/java_lang_StringFactory.cc b/runtime/native/java_lang_StringFactory.cc
index c1292ef..9c2e918 100644
--- a/runtime/native/java_lang_StringFactory.cc
+++ b/runtime/native/java_lang_StringFactory.cc
@@ -22,6 +22,7 @@
 #include "jni_internal.h"
 #include "mirror/object-inl.h"
 #include "mirror/string.h"
+#include "native_util.h"
 #include "scoped_fast_native_object_access-inl.h"
 #include "scoped_thread_state_change-inl.h"
 #include "ScopedLocalRef.h"
diff --git a/runtime/native/java_lang_System.cc b/runtime/native/java_lang_System.cc
index 264b427..0e5d740 100644
--- a/runtime/native/java_lang_System.cc
+++ b/runtime/native/java_lang_System.cc
@@ -26,6 +26,7 @@
 #include "mirror/class-inl.h"
 #include "mirror/object-inl.h"
 #include "mirror/object_array-inl.h"
+#include "native_util.h"
 #include "scoped_fast_native_object_access-inl.h"
 
 namespace art {
diff --git a/runtime/native/java_lang_Thread.cc b/runtime/native/java_lang_Thread.cc
index f3254c4..e4d1705 100644
--- a/runtime/native/java_lang_Thread.cc
+++ b/runtime/native/java_lang_Thread.cc
@@ -22,6 +22,7 @@
 #include "jni_internal.h"
 #include "monitor.h"
 #include "mirror/object.h"
+#include "native_util.h"
 #include "scoped_fast_native_object_access-inl.h"
 #include "scoped_thread_state_change-inl.h"
 #include "ScopedUtfChars.h"
diff --git a/runtime/native/java_lang_Throwable.cc b/runtime/native/java_lang_Throwable.cc
index b69fbef..03b7f9d 100644
--- a/runtime/native/java_lang_Throwable.cc
+++ b/runtime/native/java_lang_Throwable.cc
@@ -19,6 +19,7 @@
 #include "nativehelper/jni_macros.h"
 
 #include "jni_internal.h"
+#include "native_util.h"
 #include "scoped_fast_native_object_access-inl.h"
 #include "thread.h"
 
diff --git a/runtime/native/java_lang_VMClassLoader.cc b/runtime/native/java_lang_VMClassLoader.cc
index 55955e7..fc50d55 100644
--- a/runtime/native/java_lang_VMClassLoader.cc
+++ b/runtime/native/java_lang_VMClassLoader.cc
@@ -22,6 +22,7 @@
 #include "jni_internal.h"
 #include "mirror/class_loader.h"
 #include "mirror/object-inl.h"
+#include "native_util.h"
 #include "obj_ptr.h"
 #include "scoped_fast_native_object_access-inl.h"
 #include "ScopedLocalRef.h"
diff --git a/runtime/native/java_lang_Void.cc b/runtime/native/java_lang_Void.cc
index b0d63ef..af83dd1 100644
--- a/runtime/native/java_lang_Void.cc
+++ b/runtime/native/java_lang_Void.cc
@@ -20,6 +20,7 @@
 
 #include "class_linker-inl.h"
 #include "jni_internal.h"
+#include "native_util.h"
 #include "runtime.h"
 #include "scoped_fast_native_object_access-inl.h"
 
diff --git a/runtime/native/java_lang_invoke_MethodHandleImpl.cc b/runtime/native/java_lang_invoke_MethodHandleImpl.cc
index 63168ce..2e3b4d4 100644
--- a/runtime/native/java_lang_invoke_MethodHandleImpl.cc
+++ b/runtime/native/java_lang_invoke_MethodHandleImpl.cc
@@ -24,6 +24,7 @@
 #include "mirror/field.h"
 #include "mirror/method.h"
 #include "mirror/method_handle_impl.h"
+#include "native_util.h"
 #include "runtime.h"
 #include "scoped_thread_state_change-inl.h"
 
diff --git a/runtime/native/java_lang_ref_FinalizerReference.cc b/runtime/native/java_lang_ref_FinalizerReference.cc
index c75837a..72af5f7 100644
--- a/runtime/native/java_lang_ref_FinalizerReference.cc
+++ b/runtime/native/java_lang_ref_FinalizerReference.cc
@@ -23,6 +23,7 @@
 #include "jni_internal.h"
 #include "mirror/object-inl.h"
 #include "mirror/reference-inl.h"
+#include "native_util.h"
 #include "scoped_fast_native_object_access-inl.h"
 
 namespace art {
diff --git a/runtime/native/java_lang_ref_Reference.cc b/runtime/native/java_lang_ref_Reference.cc
index 606e656..524a18c 100644
--- a/runtime/native/java_lang_ref_Reference.cc
+++ b/runtime/native/java_lang_ref_Reference.cc
@@ -23,6 +23,7 @@
 #include "jni_internal.h"
 #include "mirror/object-inl.h"
 #include "mirror/reference-inl.h"
+#include "native_util.h"
 #include "scoped_fast_native_object_access-inl.h"
 
 namespace art {
diff --git a/runtime/native/java_lang_reflect_Array.cc b/runtime/native/java_lang_reflect_Array.cc
index 9662395..5be3171 100644
--- a/runtime/native/java_lang_reflect_Array.cc
+++ b/runtime/native/java_lang_reflect_Array.cc
@@ -21,11 +21,12 @@
 #include "class_linker-inl.h"
 #include "common_throws.h"
 #include "dex_file-inl.h"
+#include "handle_scope-inl.h"
 #include "jni_internal.h"
 #include "mirror/class-inl.h"
 #include "mirror/object-inl.h"
+#include "native_util.h"
 #include "scoped_fast_native_object_access-inl.h"
-#include "handle_scope-inl.h"
 
 namespace art {
 
diff --git a/runtime/native/java_lang_reflect_Constructor.cc b/runtime/native/java_lang_reflect_Constructor.cc
index d1953ad..242e87a 100644
--- a/runtime/native/java_lang_reflect_Constructor.cc
+++ b/runtime/native/java_lang_reflect_Constructor.cc
@@ -27,6 +27,7 @@
 #include "mirror/class-inl.h"
 #include "mirror/method.h"
 #include "mirror/object-inl.h"
+#include "native_util.h"
 #include "reflection.h"
 #include "scoped_fast_native_object_access-inl.h"
 #include "well_known_classes.h"
diff --git a/runtime/native/java_lang_reflect_Executable.cc b/runtime/native/java_lang_reflect_Executable.cc
index 256a3d0..2aad12d 100644
--- a/runtime/native/java_lang_reflect_Executable.cc
+++ b/runtime/native/java_lang_reflect_Executable.cc
@@ -27,6 +27,7 @@
 #include "mirror/method.h"
 #include "mirror/object-inl.h"
 #include "mirror/object_array-inl.h"
+#include "native_util.h"
 #include "reflection.h"
 #include "scoped_fast_native_object_access-inl.h"
 #include "well_known_classes.h"
diff --git a/runtime/native/java_lang_reflect_Field.cc b/runtime/native/java_lang_reflect_Field.cc
index e38bcd6..f19004d 100644
--- a/runtime/native/java_lang_reflect_Field.cc
+++ b/runtime/native/java_lang_reflect_Field.cc
@@ -28,6 +28,7 @@
 #include "jni_internal.h"
 #include "mirror/class-inl.h"
 #include "mirror/field.h"
+#include "native_util.h"
 #include "reflection-inl.h"
 #include "scoped_fast_native_object_access-inl.h"
 #include "utils.h"
diff --git a/runtime/native/java_lang_reflect_Method.cc b/runtime/native/java_lang_reflect_Method.cc
index c9e8dba..cbbb6a8 100644
--- a/runtime/native/java_lang_reflect_Method.cc
+++ b/runtime/native/java_lang_reflect_Method.cc
@@ -27,6 +27,7 @@
 #include "mirror/class-inl.h"
 #include "mirror/object-inl.h"
 #include "mirror/object_array-inl.h"
+#include "native_util.h"
 #include "reflection.h"
 #include "scoped_fast_native_object_access-inl.h"
 #include "well_known_classes.h"
diff --git a/runtime/native/java_lang_reflect_Parameter.cc b/runtime/native/java_lang_reflect_Parameter.cc
index 92a7ac9..c4ab5d6 100644
--- a/runtime/native/java_lang_reflect_Parameter.cc
+++ b/runtime/native/java_lang_reflect_Parameter.cc
@@ -24,6 +24,7 @@
 #include "dex_file-inl.h"
 #include "dex_file_annotations.h"
 #include "jni_internal.h"
+#include "native_util.h"
 #include "scoped_fast_native_object_access-inl.h"
 #include "utils.h"
 
diff --git a/runtime/native/java_lang_reflect_Proxy.cc b/runtime/native/java_lang_reflect_Proxy.cc
index 518aaa7..691ed28 100644
--- a/runtime/native/java_lang_reflect_Proxy.cc
+++ b/runtime/native/java_lang_reflect_Proxy.cc
@@ -23,6 +23,7 @@
 #include "mirror/class_loader.h"
 #include "mirror/object_array.h"
 #include "mirror/string.h"
+#include "native_util.h"
 #include "scoped_fast_native_object_access-inl.h"
 #include "verify_object.h"
 
diff --git a/runtime/native/java_util_concurrent_atomic_AtomicLong.cc b/runtime/native/java_util_concurrent_atomic_AtomicLong.cc
index 101f386..bd4b0fe 100644
--- a/runtime/native/java_util_concurrent_atomic_AtomicLong.cc
+++ b/runtime/native/java_util_concurrent_atomic_AtomicLong.cc
@@ -21,6 +21,7 @@
 #include "arch/instruction_set.h"
 #include "atomic.h"
 #include "jni_internal.h"
+#include "native_util.h"
 
 namespace art {
 
diff --git a/runtime/native/libcore_util_CharsetUtils.cc b/runtime/native/libcore_util_CharsetUtils.cc
index c388ea1..38634e6 100644
--- a/runtime/native/libcore_util_CharsetUtils.cc
+++ b/runtime/native/libcore_util_CharsetUtils.cc
@@ -23,6 +23,7 @@
 #include "jni_internal.h"
 #include "mirror/string.h"
 #include "mirror/string-inl.h"
+#include "native_util.h"
 #include "scoped_fast_native_object_access-inl.h"
 #include "ScopedPrimitiveArray.h"
 #include "unicode/utf16.h"
diff --git a/runtime/native/native_util.h b/runtime/native/native_util.h
new file mode 100644
index 0000000..98384e0
--- /dev/null
+++ b/runtime/native/native_util.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_NATIVE_NATIVE_UTIL_H_
+#define ART_RUNTIME_NATIVE_NATIVE_UTIL_H_
+
+#include <jni.h>
+
+#include "android-base/logging.h"
+#include "base/macros.h"
+#include "ScopedLocalRef.h"
+
+namespace art {
+
+ALWAYS_INLINE inline void RegisterNativeMethodsInternal(JNIEnv* env,
+                                                        const char* jni_class_name,
+                                                        const JNINativeMethod* methods,
+                                                        jint method_count) {
+  ScopedLocalRef<jclass> c(env, env->FindClass(jni_class_name));
+  if (c.get() == nullptr) {
+    LOG(FATAL) << "Couldn't find class: " << jni_class_name;
+  }
+  jint jni_result = env->RegisterNatives(c.get(), methods, method_count);
+  CHECK_EQ(JNI_OK, jni_result);
+}
+
+#define REGISTER_NATIVE_METHODS(jni_class_name) \
+  RegisterNativeMethodsInternal(env, (jni_class_name), gMethods, arraysize(gMethods))
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_NATIVE_NATIVE_UTIL_H_
diff --git a/runtime/native/org_apache_harmony_dalvik_ddmc_DdmServer.cc b/runtime/native/org_apache_harmony_dalvik_ddmc_DdmServer.cc
index a860977..925b909 100644
--- a/runtime/native/org_apache_harmony_dalvik_ddmc_DdmServer.cc
+++ b/runtime/native/org_apache_harmony_dalvik_ddmc_DdmServer.cc
@@ -21,6 +21,7 @@
 #include "base/logging.h"
 #include "debugger.h"
 #include "jni_internal.h"
+#include "native_util.h"
 #include "scoped_fast_native_object_access-inl.h"
 #include "ScopedPrimitiveArray.h"
 
diff --git a/runtime/native/org_apache_harmony_dalvik_ddmc_DdmVmInternal.cc b/runtime/native/org_apache_harmony_dalvik_ddmc_DdmVmInternal.cc
index 71c5b50..0a254ac 100644
--- a/runtime/native/org_apache_harmony_dalvik_ddmc_DdmVmInternal.cc
+++ b/runtime/native/org_apache_harmony_dalvik_ddmc_DdmVmInternal.cc
@@ -23,6 +23,7 @@
 #include "debugger.h"
 #include "gc/heap.h"
 #include "jni_internal.h"
+#include "native_util.h"
 #include "scoped_fast_native_object_access-inl.h"
 #include "ScopedLocalRef.h"
 #include "ScopedPrimitiveArray.h"
diff --git a/runtime/native/sun_misc_Unsafe.cc b/runtime/native/sun_misc_Unsafe.cc
index b42cedf..e78c9da 100644
--- a/runtime/native/sun_misc_Unsafe.cc
+++ b/runtime/native/sun_misc_Unsafe.cc
@@ -29,9 +29,9 @@
 #include "mirror/array.h"
 #include "mirror/class-inl.h"
 #include "mirror/object-inl.h"
+#include "native_util.h"
 #include "scoped_fast_native_object_access-inl.h"
 
-
 namespace art {
 
 static jboolean Unsafe_compareAndSwapInt(JNIEnv* env, jobject, jobject javaObj, jlong offset,
diff --git a/runtime/native_stack_dump.cc b/runtime/native_stack_dump.cc
index cbc5024..cbff0bb 100644
--- a/runtime/native_stack_dump.cc
+++ b/runtime/native_stack_dump.cc
@@ -45,7 +45,7 @@
 #include "base/unix_file/fd_file.h"
 #include "oat_quick_method_header.h"
 #include "os.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 #include "utils.h"
 
 #endif
diff --git a/runtime/non_debuggable_classes.cc b/runtime/non_debuggable_classes.cc
index 829ea65..9cc7e60 100644
--- a/runtime/non_debuggable_classes.cc
+++ b/runtime/non_debuggable_classes.cc
@@ -21,7 +21,7 @@
 #include "mirror/class-inl.h"
 #include "obj_ptr-inl.h"
 #include "ScopedLocalRef.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 
 namespace art {
 
diff --git a/runtime/oat_file.h b/runtime/oat_file.h
index 06c76b5..a6d2eba 100644
--- a/runtime/oat_file.h
+++ b/runtime/oat_file.h
@@ -39,9 +39,10 @@
 class ElfFile;
 template <class MirrorType> class GcRoot;
 class MemMap;
-class OatMethodOffsets;
-class OatHeader;
 class OatDexFile;
+class OatHeader;
+class OatMethodOffsets;
+class OatQuickMethodHeader;
 class VdexFile;
 
 namespace gc {
diff --git a/runtime/oat_file_assistant_test.cc b/runtime/oat_file_assistant_test.cc
index b2b86ee..c202916 100644
--- a/runtime/oat_file_assistant_test.cc
+++ b/runtime/oat_file_assistant_test.cc
@@ -28,7 +28,7 @@
 #include "oat_file_manager.h"
 #include "os.h"
 #include "scoped_thread_state_change-inl.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 #include "utils.h"
 
 namespace art {
diff --git a/runtime/oat_file_manager.cc b/runtime/oat_file_manager.cc
index c1cf800..630945a 100644
--- a/runtime/oat_file_manager.cc
+++ b/runtime/oat_file_manager.cc
@@ -29,6 +29,7 @@
 #include "base/systrace.h"
 #include "class_linker.h"
 #include "dex_file-inl.h"
+#include "dex_file_tracking_registrar.h"
 #include "gc/scoped_gc_critical_section.h"
 #include "gc/space/image_space.h"
 #include "handle_scope-inl.h"
@@ -38,7 +39,7 @@
 #include "oat_file_assistant.h"
 #include "obj_ptr-inl.h"
 #include "scoped_thread_state_change-inl.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 #include "thread_list.h"
 #include "well_known_classes.h"
 
@@ -737,6 +738,11 @@
             // Successfully added image space to heap, release the map so that it does not get
             // freed.
             image_space.release();
+
+            // Register for tracking.
+            for (const auto& dex_file : dex_files) {
+              dex::tracking::RegisterDexFile(dex_file.get());
+            }
           } else {
             LOG(INFO) << "Failed to add image file " << temp_error_msg;
             dex_files.clear();
@@ -756,6 +762,11 @@
     if (!added_image_space) {
       DCHECK(dex_files.empty());
       dex_files = oat_file_assistant.LoadDexFiles(*source_oat_file, dex_location);
+
+      // Register for tracking.
+      for (const auto& dex_file : dex_files) {
+        dex::tracking::RegisterDexFile(dex_file.get());
+      }
     }
     if (dex_files.empty()) {
       error_msgs->push_back("Failed to open dex files from " + source_oat_file->GetLocation());
diff --git a/runtime/obj_ptr-inl.h b/runtime/obj_ptr-inl.h
index f2921da..3d9b3c6 100644
--- a/runtime/obj_ptr-inl.h
+++ b/runtime/obj_ptr-inl.h
@@ -18,7 +18,7 @@
 #define ART_RUNTIME_OBJ_PTR_INL_H_
 
 #include "obj_ptr.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 
 namespace art {
 
diff --git a/runtime/openjdkjvmti/OpenjdkJvmTi.cc b/runtime/openjdkjvmti/OpenjdkJvmTi.cc
index 9be486e..45773fd 100644
--- a/runtime/openjdkjvmti/OpenjdkJvmTi.cc
+++ b/runtime/openjdkjvmti/OpenjdkJvmTi.cc
@@ -46,7 +46,7 @@
 #include "object_tagging.h"
 #include "runtime.h"
 #include "scoped_thread_state_change-inl.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 #include "thread_list.h"
 #include "ti_class.h"
 #include "ti_dump.h"
diff --git a/runtime/openjdkjvmti/events.cc b/runtime/openjdkjvmti/events.cc
index 0ec92b7..320c59c 100644
--- a/runtime/openjdkjvmti/events.cc
+++ b/runtime/openjdkjvmti/events.cc
@@ -44,7 +44,7 @@
 #include "runtime.h"
 #include "ScopedLocalRef.h"
 #include "scoped_thread_state_change-inl.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 
 namespace openjdkjvmti {
 
diff --git a/runtime/openjdkjvmti/jvmti_weak_table.h b/runtime/openjdkjvmti/jvmti_weak_table.h
index be6edef..01c24b1 100644
--- a/runtime/openjdkjvmti/jvmti_weak_table.h
+++ b/runtime/openjdkjvmti/jvmti_weak_table.h
@@ -41,7 +41,7 @@
 #include "globals.h"
 #include "jvmti.h"
 #include "mirror/object.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 
 namespace openjdkjvmti {
 
diff --git a/runtime/openjdkjvmti/ti_class.cc b/runtime/openjdkjvmti/ti_class.cc
index dd90a71..0aa93df 100644
--- a/runtime/openjdkjvmti/ti_class.cc
+++ b/runtime/openjdkjvmti/ti_class.cc
@@ -63,7 +63,7 @@
 #include "runtime_callbacks.h"
 #include "ScopedLocalRef.h"
 #include "scoped_thread_state_change-inl.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 #include "thread_list.h"
 #include "ti_class_loader.h"
 #include "ti_phase.h"
diff --git a/runtime/openjdkjvmti/ti_dump.cc b/runtime/openjdkjvmti/ti_dump.cc
index d9e3ef1..7a1e53f 100644
--- a/runtime/openjdkjvmti/ti_dump.cc
+++ b/runtime/openjdkjvmti/ti_dump.cc
@@ -39,7 +39,7 @@
 #include "events-inl.h"
 #include "runtime_callbacks.h"
 #include "scoped_thread_state_change-inl.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 #include "thread_list.h"
 
 namespace openjdkjvmti {
diff --git a/runtime/openjdkjvmti/ti_field.cc b/runtime/openjdkjvmti/ti_field.cc
index 1e5fbda..342d8be 100644
--- a/runtime/openjdkjvmti/ti_field.cc
+++ b/runtime/openjdkjvmti/ti_field.cc
@@ -39,7 +39,7 @@
 #include "mirror/object_array-inl.h"
 #include "modifiers.h"
 #include "scoped_thread_state_change-inl.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 
 namespace openjdkjvmti {
 
diff --git a/runtime/openjdkjvmti/ti_heap.cc b/runtime/openjdkjvmti/ti_heap.cc
index 99774c6..319b1c2 100644
--- a/runtime/openjdkjvmti/ti_heap.cc
+++ b/runtime/openjdkjvmti/ti_heap.cc
@@ -35,6 +35,7 @@
 #include "primitive.h"
 #include "runtime.h"
 #include "scoped_thread_state_change-inl.h"
+#include "stack.h"
 #include "thread-inl.h"
 #include "thread_list.h"
 
diff --git a/runtime/openjdkjvmti/ti_jni.cc b/runtime/openjdkjvmti/ti_jni.cc
index 88f0395..dd2dda1 100644
--- a/runtime/openjdkjvmti/ti_jni.cc
+++ b/runtime/openjdkjvmti/ti_jni.cc
@@ -38,7 +38,7 @@
 #include "java_vm_ext.h"
 #include "jni_env_ext.h"
 #include "runtime.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 
 namespace openjdkjvmti {
 
diff --git a/runtime/openjdkjvmti/ti_method.cc b/runtime/openjdkjvmti/ti_method.cc
index f7e5347..beb639e 100644
--- a/runtime/openjdkjvmti/ti_method.cc
+++ b/runtime/openjdkjvmti/ti_method.cc
@@ -42,7 +42,7 @@
 #include "runtime_callbacks.h"
 #include "scoped_thread_state_change-inl.h"
 #include "ScopedLocalRef.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 #include "thread_list.h"
 #include "ti_phase.h"
 
diff --git a/runtime/openjdkjvmti/ti_monitor.cc b/runtime/openjdkjvmti/ti_monitor.cc
index 645faea..61bf533 100644
--- a/runtime/openjdkjvmti/ti_monitor.cc
+++ b/runtime/openjdkjvmti/ti_monitor.cc
@@ -39,7 +39,7 @@
 #include "art_jvmti.h"
 #include "runtime.h"
 #include "scoped_thread_state_change-inl.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 
 namespace openjdkjvmti {
 
diff --git a/runtime/openjdkjvmti/ti_object.cc b/runtime/openjdkjvmti/ti_object.cc
index bf84499..2506aca 100644
--- a/runtime/openjdkjvmti/ti_object.cc
+++ b/runtime/openjdkjvmti/ti_object.cc
@@ -34,7 +34,7 @@
 #include "art_jvmti.h"
 #include "mirror/object-inl.h"
 #include "scoped_thread_state_change-inl.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 
 namespace openjdkjvmti {
 
diff --git a/runtime/openjdkjvmti/ti_phase.cc b/runtime/openjdkjvmti/ti_phase.cc
index 941cf7b..3c8bdc6 100644
--- a/runtime/openjdkjvmti/ti_phase.cc
+++ b/runtime/openjdkjvmti/ti_phase.cc
@@ -38,7 +38,7 @@
 #include "runtime_callbacks.h"
 #include "ScopedLocalRef.h"
 #include "scoped_thread_state_change-inl.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 #include "thread_list.h"
 #include "ti_thread.h"
 
diff --git a/runtime/openjdkjvmti/ti_properties.cc b/runtime/openjdkjvmti/ti_properties.cc
index 8ee5366..e399b48 100644
--- a/runtime/openjdkjvmti/ti_properties.cc
+++ b/runtime/openjdkjvmti/ti_properties.cc
@@ -40,7 +40,7 @@
 
 #include "art_jvmti.h"
 #include "runtime.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 #include "ti_phase.h"
 #include "well_known_classes.h"
 
diff --git a/runtime/openjdkjvmti/ti_redefine.cc b/runtime/openjdkjvmti/ti_redefine.cc
index ca3a0e6..b382a3e 100644
--- a/runtime/openjdkjvmti/ti_redefine.cc
+++ b/runtime/openjdkjvmti/ti_redefine.cc
@@ -48,6 +48,7 @@
 #include "gc/allocation_listener.h"
 #include "gc/heap.h"
 #include "instrumentation.h"
+#include "intern_table.h"
 #include "jdwp/jdwp.h"
 #include "jdwp/jdwp_constants.h"
 #include "jdwp/jdwp_event.h"
@@ -452,7 +453,30 @@
 
 art::mirror::DexCache* Redefiner::ClassRedefinition::CreateNewDexCache(
     art::Handle<art::mirror::ClassLoader> loader) {
-  return driver_->runtime_->GetClassLinker()->RegisterDexFile(*dex_file_, loader.Get()).Ptr();
+  art::StackHandleScope<2> hs(driver_->self_);
+  art::ClassLinker* cl = driver_->runtime_->GetClassLinker();
+  art::Handle<art::mirror::DexCache> cache(hs.NewHandle(
+      art::ObjPtr<art::mirror::DexCache>::DownCast(
+          cl->GetClassRoot(art::ClassLinker::kJavaLangDexCache)->AllocObject(driver_->self_))));
+  if (cache.IsNull()) {
+    driver_->self_->AssertPendingOOMException();
+    return nullptr;
+  }
+  art::Handle<art::mirror::String> location(hs.NewHandle(
+      cl->GetInternTable()->InternStrong(dex_file_->GetLocation().c_str())));
+  if (location.IsNull()) {
+    driver_->self_->AssertPendingOOMException();
+    return nullptr;
+  }
+  art::WriterMutexLock mu(driver_->self_, *art::Locks::dex_lock_);
+  art::mirror::DexCache::InitializeDexCache(driver_->self_,
+                                            cache.Get(),
+                                            location.Get(),
+                                            dex_file_.get(),
+                                            loader.IsNull() ? driver_->runtime_->GetLinearAlloc()
+                                                            : loader->GetAllocator(),
+                                            art::kRuntimePointerSize);
+  return cache.Get();
 }
 
 void Redefiner::RecordFailure(jvmtiError result,
@@ -1293,8 +1317,10 @@
 
   // At this point we can no longer fail without corrupting the runtime state.
   for (RedefinitionDataIter data = holder.begin(); data != holder.end(); ++data) {
+    art::ClassLinker* cl = runtime_->GetClassLinker();
+    cl->RegisterExistingDexCache(data.GetNewDexCache(), data.GetSourceClassLoader());
     if (data.GetSourceClassLoader() == nullptr) {
-      runtime_->GetClassLinker()->AppendToBootClassPath(self_, data.GetRedefinition().GetDexFile());
+      cl->AppendToBootClassPath(self_, data.GetRedefinition().GetDexFile());
     }
   }
   UnregisterAllBreakpoints();
diff --git a/runtime/openjdkjvmti/ti_search.cc b/runtime/openjdkjvmti/ti_search.cc
index ec139f2..6e0196e 100644
--- a/runtime/openjdkjvmti/ti_search.cc
+++ b/runtime/openjdkjvmti/ti_search.cc
@@ -49,7 +49,7 @@
 #include "scoped_thread_state_change-inl.h"
 #include "ScopedLocalRef.h"
 #include "ti_phase.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 #include "thread_list.h"
 #include "well_known_classes.h"
 
diff --git a/runtime/openjdkjvmti/ti_stack.cc b/runtime/openjdkjvmti/ti_stack.cc
index 1ddf04f..22da2d2 100644
--- a/runtime/openjdkjvmti/ti_stack.cc
+++ b/runtime/openjdkjvmti/ti_stack.cc
@@ -52,7 +52,7 @@
 #include "scoped_thread_state_change-inl.h"
 #include "ScopedLocalRef.h"
 #include "stack.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 #include "thread_list.h"
 #include "thread_pool.h"
 #include "well_known_classes.h"
diff --git a/runtime/openjdkjvmti/ti_thread.cc b/runtime/openjdkjvmti/ti_thread.cc
index 3dfa633..2cc2a26 100644
--- a/runtime/openjdkjvmti/ti_thread.cc
+++ b/runtime/openjdkjvmti/ti_thread.cc
@@ -49,7 +49,7 @@
 #include "runtime_callbacks.h"
 #include "ScopedLocalRef.h"
 #include "scoped_thread_state_change-inl.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 #include "thread_list.h"
 #include "well_known_classes.h"
 
diff --git a/runtime/openjdkjvmti/ti_threadgroup.cc b/runtime/openjdkjvmti/ti_threadgroup.cc
index dd7be11..c0597ad 100644
--- a/runtime/openjdkjvmti/ti_threadgroup.cc
+++ b/runtime/openjdkjvmti/ti_threadgroup.cc
@@ -45,7 +45,7 @@
 #include "object_lock.h"
 #include "runtime.h"
 #include "scoped_thread_state_change-inl.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 #include "thread_list.h"
 #include "well_known_classes.h"
 
diff --git a/runtime/read_barrier-inl.h b/runtime/read_barrier-inl.h
index db77490..2d06e54 100644
--- a/runtime/read_barrier-inl.h
+++ b/runtime/read_barrier-inl.h
@@ -63,7 +63,7 @@
         // If kAlwaysUpdateField is true, update the field atomically. This may fail if mutator
         // updates before us, but it's OK.
         if (kAlwaysUpdateField && ref != old_ref) {
-          obj->CasFieldStrongRelaxedObjectWithoutWriteBarrier<false, false>(
+          obj->CasFieldStrongReleaseObjectWithoutWriteBarrier<false, false>(
               offset, old_ref, ref);
         }
       }
@@ -81,7 +81,7 @@
         ref = reinterpret_cast<MirrorType*>(Mark(old_ref));
         // Update the field atomically. This may fail if mutator updates before us, but it's ok.
         if (ref != old_ref) {
-          obj->CasFieldStrongRelaxedObjectWithoutWriteBarrier<false, false>(
+          obj->CasFieldStrongReleaseObjectWithoutWriteBarrier<false, false>(
               offset, old_ref, ref);
         }
       }
diff --git a/runtime/reference_table.h b/runtime/reference_table.h
index 8423e04..010c6f8 100644
--- a/runtime/reference_table.h
+++ b/runtime/reference_table.h
@@ -26,7 +26,6 @@
 #include "base/mutex.h"
 #include "gc_root.h"
 #include "obj_ptr.h"
-#include "object_callbacks.h"
 
 namespace art {
 namespace mirror {
diff --git a/runtime/reference_table_test.cc b/runtime/reference_table_test.cc
index e809ecf..260be8f 100644
--- a/runtime/reference_table_test.cc
+++ b/runtime/reference_table_test.cc
@@ -29,7 +29,7 @@
 #include "primitive.h"
 #include "runtime.h"
 #include "scoped_thread_state_change-inl.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 
 namespace art {
 
diff --git a/runtime/runtime_common.cc b/runtime/runtime_common.cc
index 5511fb7..940e461 100644
--- a/runtime/runtime_common.cc
+++ b/runtime/runtime_common.cc
@@ -29,7 +29,8 @@
 #include "base/macros.h"
 #include "base/mutex.h"
 #include "native_stack_dump.h"
-#include "thread-inl.h"
+#include "runtime.h"
+#include "thread-current-inl.h"
 #include "thread_list.h"
 
 namespace art {
diff --git a/runtime/safe_map.h b/runtime/safe_map.h
index e638fdb..b54f587 100644
--- a/runtime/safe_map.h
+++ b/runtime/safe_map.h
@@ -46,6 +46,7 @@
 
   SafeMap() = default;
   SafeMap(const SafeMap&) = default;
+  SafeMap(SafeMap&&) = default;
   explicit SafeMap(const key_compare& cmp, const allocator_type& allocator = allocator_type())
     : map_(cmp, allocator) {
   }
@@ -151,6 +152,11 @@
     return map_ == rhs.map_;
   }
 
+  template <class... Args>
+  std::pair<iterator, bool> emplace(Args&&... args) {
+    return map_.emplace(std::forward<Args>(args)...);
+  }
+
  private:
   ::std::map<K, V, Comparator, Allocator> map_;
 };
diff --git a/runtime/scoped_thread_state_change-inl.h b/runtime/scoped_thread_state_change-inl.h
index ed6e349..aa96871 100644
--- a/runtime/scoped_thread_state_change-inl.h
+++ b/runtime/scoped_thread_state_change-inl.h
@@ -22,6 +22,7 @@
 #include "base/casts.h"
 #include "jni_env_ext-inl.h"
 #include "obj_ptr-inl.h"
+#include "runtime.h"
 #include "thread-inl.h"
 
 namespace art {
diff --git a/runtime/stack.cc b/runtime/stack.cc
index 5c6eead..aedcc1e 100644
--- a/runtime/stack.cc
+++ b/runtime/stack.cc
@@ -29,6 +29,7 @@
 #include "jit/jit.h"
 #include "jit/jit_code_cache.h"
 #include "linear_alloc.h"
+#include "managed_stack.h"
 #include "mirror/class-inl.h"
 #include "mirror/object-inl.h"
 #include "mirror/object_array-inl.h"
@@ -68,34 +69,6 @@
   }
 }
 
-size_t ManagedStack::NumJniShadowFrameReferences() const {
-  size_t count = 0;
-  for (const ManagedStack* current_fragment = this; current_fragment != nullptr;
-       current_fragment = current_fragment->GetLink()) {
-    for (ShadowFrame* current_frame = current_fragment->top_shadow_frame_; current_frame != nullptr;
-         current_frame = current_frame->GetLink()) {
-      if (current_frame->GetMethod()->IsNative()) {
-        // The JNI ShadowFrame only contains references. (For indirect reference.)
-        count += current_frame->NumberOfVRegs();
-      }
-    }
-  }
-  return count;
-}
-
-bool ManagedStack::ShadowFramesContain(StackReference<mirror::Object>* shadow_frame_entry) const {
-  for (const ManagedStack* current_fragment = this; current_fragment != nullptr;
-       current_fragment = current_fragment->GetLink()) {
-    for (ShadowFrame* current_frame = current_fragment->top_shadow_frame_; current_frame != nullptr;
-         current_frame = current_frame->GetLink()) {
-      if (current_frame->Contains(shadow_frame_entry)) {
-        return true;
-      }
-    }
-  }
-  return false;
-}
-
 StackVisitor::StackVisitor(Thread* thread,
                            Context* context,
                            StackWalkKind walk_kind,
@@ -648,6 +621,12 @@
     return;
   }
 
+  Runtime* runtime = Runtime::Current();
+  if (runtime->UseJitCompilation() &&
+      runtime->GetJit()->GetCodeCache()->ContainsPc(reinterpret_cast<const void*>(pc))) {
+    return;
+  }
+
   const void* code = method->GetEntryPointFromQuickCompiledCode();
   if (code == GetQuickInstrumentationEntryPoint() || code == GetInvokeObsoleteMethodStub()) {
     return;
@@ -659,9 +638,6 @@
     return;
   }
 
-  // If we are the JIT then we may have just compiled the method after the
-  // IsQuickToInterpreterBridge check.
-  Runtime* runtime = Runtime::Current();
   if (runtime->UseJitCompilation() && runtime->GetJit()->GetCodeCache()->ContainsPc(code)) {
     return;
   }
diff --git a/runtime/stack.h b/runtime/stack.h
index bdaa4c3..8c74a8c 100644
--- a/runtime/stack.h
+++ b/runtime/stack.h
@@ -512,86 +512,6 @@
   const size_t vreg_;
 };
 
-// The managed stack is used to record fragments of managed code stacks. Managed code stacks
-// may either be shadow frames or lists of frames using fixed frame sizes. Transition records are
-// necessary for transitions between code using different frame layouts and transitions into native
-// code.
-class PACKED(4) ManagedStack {
- public:
-  ManagedStack()
-      : top_quick_frame_(nullptr), link_(nullptr), top_shadow_frame_(nullptr) {}
-
-  void PushManagedStackFragment(ManagedStack* fragment) {
-    // Copy this top fragment into given fragment.
-    memcpy(fragment, this, sizeof(ManagedStack));
-    // Clear this fragment, which has become the top.
-    memset(this, 0, sizeof(ManagedStack));
-    // Link our top fragment onto the given fragment.
-    link_ = fragment;
-  }
-
-  void PopManagedStackFragment(const ManagedStack& fragment) {
-    DCHECK(&fragment == link_);
-    // Copy this given fragment back to the top.
-    memcpy(this, &fragment, sizeof(ManagedStack));
-  }
-
-  ManagedStack* GetLink() const {
-    return link_;
-  }
-
-  ArtMethod** GetTopQuickFrame() const {
-    return top_quick_frame_;
-  }
-
-  void SetTopQuickFrame(ArtMethod** top) {
-    DCHECK(top_shadow_frame_ == nullptr);
-    top_quick_frame_ = top;
-  }
-
-  static size_t TopQuickFrameOffset() {
-    return OFFSETOF_MEMBER(ManagedStack, top_quick_frame_);
-  }
-
-  ShadowFrame* PushShadowFrame(ShadowFrame* new_top_frame) {
-    DCHECK(top_quick_frame_ == nullptr);
-    ShadowFrame* old_frame = top_shadow_frame_;
-    top_shadow_frame_ = new_top_frame;
-    new_top_frame->SetLink(old_frame);
-    return old_frame;
-  }
-
-  ShadowFrame* PopShadowFrame() {
-    DCHECK(top_quick_frame_ == nullptr);
-    CHECK(top_shadow_frame_ != nullptr);
-    ShadowFrame* frame = top_shadow_frame_;
-    top_shadow_frame_ = frame->GetLink();
-    return frame;
-  }
-
-  ShadowFrame* GetTopShadowFrame() const {
-    return top_shadow_frame_;
-  }
-
-  void SetTopShadowFrame(ShadowFrame* top) {
-    DCHECK(top_quick_frame_ == nullptr);
-    top_shadow_frame_ = top;
-  }
-
-  static size_t TopShadowFrameOffset() {
-    return OFFSETOF_MEMBER(ManagedStack, top_shadow_frame_);
-  }
-
-  size_t NumJniShadowFrameReferences() const REQUIRES_SHARED(Locks::mutator_lock_);
-
-  bool ShadowFramesContain(StackReference<mirror::Object>* shadow_frame_entry) const;
-
- private:
-  ArtMethod** top_quick_frame_;
-  ManagedStack* link_;
-  ShadowFrame* top_shadow_frame_;
-};
-
 class StackVisitor {
  public:
   // This enum defines a flag to control whether inlined frames are included
diff --git a/runtime/thread-current-inl.h b/runtime/thread-current-inl.h
new file mode 100644
index 0000000..9241b1f
--- /dev/null
+++ b/runtime/thread-current-inl.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_THREAD_CURRENT_INL_H_
+#define ART_RUNTIME_THREAD_CURRENT_INL_H_
+
+#include "thread.h"
+
+#ifdef ART_TARGET_ANDROID
+#include <bionic_tls.h>  // Access to our own TLS slot.
+#endif
+
+#include <pthread.h>
+
+namespace art {
+
+inline Thread* Thread::Current() {
+  // We rely on Thread::Current returning null for a detached thread, so it's not obvious
+  // that we can replace this with a direct %fs access on x86.
+  if (!is_started_) {
+    return nullptr;
+  } else {
+#ifdef ART_TARGET_ANDROID
+    void* thread = __get_tls()[TLS_SLOT_ART_THREAD_SELF];
+#else
+    void* thread = pthread_getspecific(Thread::pthread_key_self_);
+#endif
+    return reinterpret_cast<Thread*>(thread);
+  }
+}
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_THREAD_CURRENT_INL_H_
diff --git a/runtime/thread-inl.h b/runtime/thread-inl.h
index 5c65da6..7da15d9 100644
--- a/runtime/thread-inl.h
+++ b/runtime/thread-inl.h
@@ -19,18 +19,13 @@
 
 #include "thread.h"
 
-#ifdef ART_TARGET_ANDROID
-#include <bionic_tls.h>  // Access to our own TLS slot.
-#endif
-
-#include <pthread.h>
-
 #include "base/casts.h"
 #include "base/mutex-inl.h"
 #include "base/time_utils.h"
 #include "jni_env_ext.h"
+#include "managed_stack-inl.h"
 #include "obj_ptr.h"
-#include "runtime.h"
+#include "thread-current-inl.h"
 #include "thread_pool.h"
 
 namespace art {
@@ -41,21 +36,6 @@
   return full_env->self;
 }
 
-inline Thread* Thread::Current() {
-  // We rely on Thread::Current returning null for a detached thread, so it's not obvious
-  // that we can replace this with a direct %fs access on x86.
-  if (!is_started_) {
-    return nullptr;
-  } else {
-#ifdef ART_TARGET_ANDROID
-    void* thread = __get_tls()[TLS_SLOT_ART_THREAD_SELF];
-#else
-    void* thread = pthread_getspecific(Thread::pthread_key_self_);
-#endif
-    return reinterpret_cast<Thread*>(thread);
-  }
-}
-
 inline void Thread::AllowThreadSuspension() {
   DCHECK_EQ(Thread::Current(), this);
   if (UNLIKELY(TestAllFlags())) {
@@ -295,12 +275,6 @@
   return static_cast<ThreadState>(old_state);
 }
 
-inline void Thread::VerifyStack() {
-  if (kVerifyStack) {
-    VerifyStackImpl();
-  }
-}
-
 inline mirror::Object* Thread::AllocTlab(size_t bytes) {
   DCHECK_GE(TlabSize(), bytes);
   ++tlsPtr_.thread_local_objects;
@@ -384,6 +358,14 @@
   }
 }
 
+inline ShadowFrame* Thread::PushShadowFrame(ShadowFrame* new_top_frame) {
+  return tlsPtr_.managed_stack.PushShadowFrame(new_top_frame);
+}
+
+inline ShadowFrame* Thread::PopShadowFrame() {
+  return tlsPtr_.managed_stack.PopShadowFrame();
+}
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_THREAD_INL_H_
diff --git a/runtime/thread.cc b/runtime/thread.cc
index c849a12..789f571 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -55,6 +55,7 @@
 #include "gc/allocator/rosalloc.h"
 #include "gc/heap.h"
 #include "gc/space/space-inl.h"
+#include "gc_root.h"
 #include "handle_scope-inl.h"
 #include "indirect_reference_table-inl.h"
 #include "java_vm_ext.h"
@@ -2160,7 +2161,7 @@
   TearDownAlternateSignalStack();
 }
 
-void Thread::HandleUncaughtExceptions(ScopedObjectAccess& soa) {
+void Thread::HandleUncaughtExceptions(ScopedObjectAccessAlreadyRunnable& soa) {
   if (!IsExceptionPending()) {
     return;
   }
@@ -2180,7 +2181,7 @@
   tlsPtr_.jni_env->ExceptionClear();
 }
 
-void Thread::RemoveFromThreadGroup(ScopedObjectAccess& soa) {
+void Thread::RemoveFromThreadGroup(ScopedObjectAccessAlreadyRunnable& soa) {
   // this.group.removeThread(this);
   // group can be null if we're in the compiler or a test.
   ObjPtr<mirror::Object> ogroup = jni::DecodeArtField(WellKnownClasses::java_lang_Thread_group)
diff --git a/runtime/thread.h b/runtime/thread.h
index a60fd58..e85ee0d 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -33,15 +33,13 @@
 #include "base/mutex.h"
 #include "entrypoints/jni/jni_entrypoints.h"
 #include "entrypoints/quick/quick_entrypoints.h"
-#include "gc_root.h"
 #include "globals.h"
 #include "handle_scope.h"
 #include "instrumentation.h"
 #include "jvalue.h"
-#include "object_callbacks.h"
+#include "managed_stack.h"
 #include "offsets.h"
 #include "runtime_stats.h"
-#include "stack.h"
 #include "thread_state.h"
 
 class BacktraceMap;
@@ -87,12 +85,14 @@
 class JavaVMExt;
 struct JNIEnvExt;
 class Monitor;
+class RootVisitor;
 class ScopedObjectAccessAlreadyRunnable;
 class ShadowFrame;
 class SingleStepControl;
 class StackedShadowFrameRecord;
 class Thread;
 class ThreadList;
+enum VisitRootFlags : uint8_t;
 
 // Thread priorities. These must match the Thread.MIN_PRIORITY,
 // Thread.NORM_PRIORITY, and Thread.MAX_PRIORITY constants.
@@ -149,6 +149,7 @@
 class Thread {
  public:
   static const size_t kStackOverflowImplicitCheckSize;
+  static constexpr bool kVerifyStack = kIsDebugBuild;
 
   // Creates a new native thread corresponding to the given managed peer.
   // Used to implement Thread.start.
@@ -560,10 +561,14 @@
     return tlsPtr_.frame_id_to_shadow_frame != nullptr;
   }
 
-  void VisitRoots(RootVisitor* visitor, VisitRootFlags flags = kVisitRootFlagAllRoots)
+  void VisitRoots(RootVisitor* visitor, VisitRootFlags flags)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  ALWAYS_INLINE void VerifyStack() REQUIRES_SHARED(Locks::mutator_lock_);
+  void VerifyStack() REQUIRES_SHARED(Locks::mutator_lock_) {
+    if (kVerifyStack) {
+      VerifyStackImpl();
+    }
+  }
 
   //
   // Offsets of various members of native Thread class, used by compiled code.
@@ -793,13 +798,8 @@
     tlsPtr_.managed_stack.PopManagedStackFragment(fragment);
   }
 
-  ShadowFrame* PushShadowFrame(ShadowFrame* new_top_frame) {
-    return tlsPtr_.managed_stack.PushShadowFrame(new_top_frame);
-  }
-
-  ShadowFrame* PopShadowFrame() {
-    return tlsPtr_.managed_stack.PopShadowFrame();
-  }
+  ALWAYS_INLINE ShadowFrame* PushShadowFrame(ShadowFrame* new_top_frame);
+  ALWAYS_INLINE ShadowFrame* PopShadowFrame();
 
   template<PointerSize pointer_size>
   static ThreadOffset<pointer_size> TopShadowFrameOffset() {
@@ -1250,9 +1250,10 @@
 
   static void* CreateCallback(void* arg);
 
-  void HandleUncaughtExceptions(ScopedObjectAccess& soa)
+  void HandleUncaughtExceptions(ScopedObjectAccessAlreadyRunnable& soa)
       REQUIRES_SHARED(Locks::mutator_lock_);
-  void RemoveFromThreadGroup(ScopedObjectAccess& soa) REQUIRES_SHARED(Locks::mutator_lock_);
+  void RemoveFromThreadGroup(ScopedObjectAccessAlreadyRunnable& soa)
+      REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Initialize a thread.
   //
diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc
index ca8f7b6..95aba79 100644
--- a/runtime/thread_list.cc
+++ b/runtime/thread_list.cc
@@ -37,6 +37,7 @@
 #include "gc/gc_pause_listener.h"
 #include "gc/heap.h"
 #include "gc/reference_processor.h"
+#include "gc_root.h"
 #include "jni_internal.h"
 #include "lock_word.h"
 #include "monitor.h"
@@ -164,7 +165,7 @@
   if (dump_native_stack) {
     DumpNativeStack(os, tid, nullptr, "  native: ");
   }
-  os << "\n";
+  os << std::endl;
 }
 
 void ThreadList::DumpUnattachedThreads(std::ostream& os, bool dump_native_stack) {
@@ -215,11 +216,10 @@
       ScopedObjectAccess soa(self);
       thread->Dump(local_os, dump_native_stack_, backtrace_map_.get());
     }
-    local_os << "\n";
     {
       // Use the logging lock to ensure serialization when writing to the common ostream.
       MutexLock mu(self, *Locks::logging_lock_);
-      *os_ << local_os.str();
+      *os_ << local_os.str() << std::endl;
     }
     barrier_.Pass(self);
   }
@@ -757,7 +757,7 @@
         // EAGAIN and EINTR both indicate a spurious failure, try again from the beginning.
         if ((errno != EAGAIN) && (errno != EINTR)) {
           if (errno == ETIMEDOUT) {
-            LOG(::android::base::FATAL)
+            LOG(kIsDebugBuild ? ::android::base::FATAL : ::android::base::ERROR)
                 << "Timed out waiting for threads to suspend, waited for "
                 << PrettyDuration(NanoTime() - start_time);
           } else {
@@ -1509,7 +1509,7 @@
   // Visit roots without holding thread_list_lock_ and thread_suspend_count_lock_ to prevent lock
   // order violations.
   for (Thread* thread : threads_to_visit) {
-    thread->VisitRoots(visitor);
+    thread->VisitRoots(visitor, kVisitRootFlagAllRoots);
   }
 
   // Restore suspend counts.
diff --git a/runtime/thread_list.h b/runtime/thread_list.h
index 0ce1d78..92702c6 100644
--- a/runtime/thread_list.h
+++ b/runtime/thread_list.h
@@ -22,9 +22,7 @@
 #include "base/mutex.h"
 #include "base/time_utils.h"
 #include "base/value_object.h"
-#include "gc_root.h"
 #include "jni.h"
-#include "object_callbacks.h"
 
 #include <bitset>
 #include <list>
@@ -38,8 +36,10 @@
   class GcPauseListener;
 }  // namespace gc
 class Closure;
+class RootVisitor;
 class Thread;
 class TimingLogger;
+enum VisitRootFlags : uint8_t;
 
 class ThreadList {
  public:
diff --git a/runtime/thread_pool.cc b/runtime/thread_pool.cc
index e051e76..8349f33 100644
--- a/runtime/thread_pool.cc
+++ b/runtime/thread_pool.cc
@@ -30,7 +30,7 @@
 #include "base/stl_util.h"
 #include "base/time_utils.h"
 #include "runtime.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 
 namespace art {
 
diff --git a/runtime/trace.cc b/runtime/trace.cc
index 3a9975a..3550d56 100644
--- a/runtime/trace.cc
+++ b/runtime/trace.cc
@@ -41,6 +41,7 @@
 #include "os.h"
 #include "scoped_thread_state_change-inl.h"
 #include "ScopedLocalRef.h"
+#include "stack.h"
 #include "thread.h"
 #include "thread_list.h"
 #include "utils.h"
diff --git a/runtime/transaction.cc b/runtime/transaction.cc
index 56ff0a1..907d37e 100644
--- a/runtime/transaction.cc
+++ b/runtime/transaction.cc
@@ -19,8 +19,10 @@
 #include "base/stl_util.h"
 #include "base/logging.h"
 #include "gc/accounting/card_table-inl.h"
+#include "gc_root-inl.h"
 #include "intern_table.h"
 #include "mirror/class-inl.h"
+#include "mirror/dex_cache-inl.h"
 #include "mirror/object-inl.h"
 #include "mirror/object_array-inl.h"
 
diff --git a/runtime/transaction.h b/runtime/transaction.h
index 921de03..747c2d0 100644
--- a/runtime/transaction.h
+++ b/runtime/transaction.h
@@ -22,7 +22,6 @@
 #include "base/value_object.h"
 #include "dex_file_types.h"
 #include "gc_root.h"
-#include "object_callbacks.h"
 #include "offsets.h"
 #include "primitive.h"
 #include "safe_map.h"
diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc
index 7490611..12f791c 100644
--- a/runtime/verifier/method_verifier.cc
+++ b/runtime/verifier/method_verifier.cc
@@ -50,6 +50,7 @@
 #include "register_line-inl.h"
 #include "runtime.h"
 #include "scoped_thread_state_change-inl.h"
+#include "stack.h"
 #include "utils.h"
 #include "verifier_deps.h"
 #include "verifier_compiler_binding.h"
@@ -883,10 +884,13 @@
                             InstructionFlags());
   // Run through the instructions and see if the width checks out.
   bool result = ComputeWidthsAndCountOps();
+  bool allow_runtime_only_instructions = !Runtime::Current()->IsAotCompiler() || verify_to_dump_;
   // Flag instructions guarded by a "try" block and check exception handlers.
   result = result && ScanTryCatchBlocks();
   // Perform static instruction verification.
-  result = result && VerifyInstructions();
+  result = result && (allow_runtime_only_instructions
+                          ? VerifyInstructions<true>()
+                          : VerifyInstructions<false>());
   // Perform code-flow analysis and return.
   result = result && VerifyCodeFlow();
 
@@ -1102,6 +1106,7 @@
   return true;
 }
 
+template <bool kAllowRuntimeOnlyInstructions>
 bool MethodVerifier::VerifyInstructions() {
   const Instruction* inst = Instruction::At(code_item_->insns_);
 
@@ -1110,9 +1115,8 @@
   GetInstructionFlags(0).SetCompileTimeInfoPoint();
 
   uint32_t insns_size = code_item_->insns_size_in_code_units_;
-  bool allow_runtime_only_instructions = !Runtime::Current()->IsAotCompiler() || verify_to_dump_;
   for (uint32_t dex_pc = 0; dex_pc < insns_size;) {
-    if (!VerifyInstruction(inst, dex_pc, allow_runtime_only_instructions)) {
+    if (!VerifyInstruction<kAllowRuntimeOnlyInstructions>(inst, dex_pc)) {
       DCHECK_NE(failures_.size(), 0U);
       return false;
     }
@@ -1139,9 +1143,8 @@
   return true;
 }
 
-bool MethodVerifier::VerifyInstruction(const Instruction* inst,
-                                       uint32_t code_offset,
-                                       bool allow_runtime_only_instructions) {
+template <bool kAllowRuntimeOnlyInstructions>
+bool MethodVerifier::VerifyInstruction(const Instruction* inst, uint32_t code_offset) {
   if (Instruction::kHaveExperimentalInstructions && UNLIKELY(inst->IsExperimental())) {
     // Experimental instructions don't yet have verifier support implementation.
     // While it is possible to use them by themselves, when we try to use stable instructions
@@ -1250,7 +1253,7 @@
       result = false;
       break;
   }
-  if (!allow_runtime_only_instructions && inst->GetVerifyIsRuntimeOnly()) {
+  if (!kAllowRuntimeOnlyInstructions && inst->GetVerifyIsRuntimeOnly()) {
     Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "opcode only expected at runtime " << inst->Name();
     result = false;
   }
diff --git a/runtime/verifier/method_verifier.h b/runtime/verifier/method_verifier.h
index 9ef98f7..cb208f4 100644
--- a/runtime/verifier/method_verifier.h
+++ b/runtime/verifier/method_verifier.h
@@ -360,6 +360,7 @@
    *
    * Walks through instructions in a method calling VerifyInstruction on each.
    */
+  template <bool kAllowRuntimeOnlyInstructions>
   bool VerifyInstructions();
 
   /*
@@ -395,9 +396,8 @@
    * - (earlier) for each exception handler, the handler must start at a valid
    *   instruction
    */
-  bool VerifyInstruction(const Instruction* inst,
-                         uint32_t code_offset,
-                         bool allow_runtime_only_instructions);
+  template <bool kAllowRuntimeOnlyInstructions>
+  bool VerifyInstruction(const Instruction* inst, uint32_t code_offset);
 
   /* Ensure that the register index is valid for this code item. */
   bool CheckRegisterIndex(uint32_t idx);
diff --git a/runtime/verifier/reg_type.h b/runtime/verifier/reg_type.h
index 25baac5..6c01a79 100644
--- a/runtime/verifier/reg_type.h
+++ b/runtime/verifier/reg_type.h
@@ -30,7 +30,6 @@
 #include "gc_root.h"
 #include "handle_scope.h"
 #include "obj_ptr.h"
-#include "object_callbacks.h"
 #include "primitive.h"
 
 namespace art {
diff --git a/runtime/verifier/reg_type_test.cc b/runtime/verifier/reg_type_test.cc
index 49dac26..b0ea6c8 100644
--- a/runtime/verifier/reg_type_test.cc
+++ b/runtime/verifier/reg_type_test.cc
@@ -25,7 +25,7 @@
 #include "reg_type_cache-inl.h"
 #include "reg_type-inl.h"
 #include "scoped_thread_state_change-inl.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 
 namespace art {
 namespace verifier {
diff --git a/runtime/verifier/verifier_deps.h b/runtime/verifier/verifier_deps.h
index 70ce0c4..43eb948 100644
--- a/runtime/verifier/verifier_deps.h
+++ b/runtime/verifier/verifier_deps.h
@@ -23,6 +23,7 @@
 
 #include "base/array_ref.h"
 #include "base/mutex.h"
+#include "dex_file_types.h"
 #include "handle.h"
 #include "method_resolution_kind.h"
 #include "obj_ptr.h"
diff --git a/runtime/verify_object.h b/runtime/verify_object.h
index 519f7f5..e4c01d0 100644
--- a/runtime/verify_object.h
+++ b/runtime/verify_object.h
@@ -48,7 +48,6 @@
   kVerifyAll = kVerifyThis | kVerifyReads | kVerifyWrites,
 };
 
-static constexpr bool kVerifyStack = kIsDebugBuild;
 static constexpr VerifyObjectFlags kDefaultVerifyFlags = kVerifyNone;
 static constexpr VerifyObjectMode kVerifyObjectSupport =
     kDefaultVerifyFlags != 0 ? kVerifyObjectModeFast : kVerifyObjectModeDisabled;
diff --git a/runtime/well_known_classes.cc b/runtime/well_known_classes.cc
index 5aef062..24f194b 100644
--- a/runtime/well_known_classes.cc
+++ b/runtime/well_known_classes.cc
@@ -30,7 +30,7 @@
 #include "obj_ptr-inl.h"
 #include "ScopedLocalRef.h"
 #include "scoped_thread_state_change-inl.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 
 namespace art {
 
diff --git a/test/003-omnibus-opcodes/src/Main.java b/test/003-omnibus-opcodes/src/Main.java
index a30ec15..4e1ffe2 100644
--- a/test/003-omnibus-opcodes/src/Main.java
+++ b/test/003-omnibus-opcodes/src/Main.java
@@ -67,7 +67,7 @@
         } catch (Throwable th) {
             // We and the RI throw ClassNotFoundException, but that isn't declared so javac
             // won't let us try to catch it.
-            th.printStackTrace();
+            th.printStackTrace(System.out);
         }
         InternedString.run();
         GenSelect.run();
diff --git a/test/008-exceptions/src/Main.java b/test/008-exceptions/src/Main.java
index 74af00c..89fe016 100644
--- a/test/008-exceptions/src/Main.java
+++ b/test/008-exceptions/src/Main.java
@@ -155,7 +155,7 @@
         } catch (BadError e) {
             System.out.println(e);
         } catch (Throwable t) {
-            t.printStackTrace();
+            t.printStackTrace(System.out);
         }
         try {
             // Before splitting mirror::Class::kStatusError into
@@ -171,11 +171,11 @@
             throw new IllegalStateException("Should not reach here.");
         } catch (NoClassDefFoundError ncdfe) {
             if (!(ncdfe.getCause() instanceof BadError)) {
-                ncdfe.getCause().printStackTrace();
+                ncdfe.getCause().printStackTrace(System.out);
             }
         } catch (VerifyError e) {
         } catch (Throwable t) {
-            t.printStackTrace();
+            t.printStackTrace(System.out);
         }
     }
 
@@ -186,7 +186,7 @@
         } catch (Error e) {
             System.out.println(e);
         } catch (Throwable t) {
-            t.printStackTrace();
+            t.printStackTrace(System.out);
         }
         // Before splitting mirror::Class::kStatusError into
         // kStatusErrorUnresolved and kStatusErrorResolved,
@@ -200,7 +200,7 @@
             System.out.println(ncdfe);
             System.out.println("  cause: " + ncdfe.getCause());
         } catch (Throwable t) {
-            t.printStackTrace();
+            t.printStackTrace(System.out);
         }
         try {
             MultiDexBadInitWrapper2.setDummy(1);
@@ -209,7 +209,7 @@
             System.out.println(ncdfe);
             System.out.println("  cause: " + ncdfe.getCause());
         } catch (Throwable t) {
-            t.printStackTrace();
+            t.printStackTrace(System.out);
         }
     }
 }
diff --git a/test/023-many-interfaces/src/ManyInterfaces.java b/test/023-many-interfaces/src/ManyInterfaces.java
index d69a490..8ec4566 100644
--- a/test/023-many-interfaces/src/ManyInterfaces.java
+++ b/test/023-many-interfaces/src/ManyInterfaces.java
@@ -355,7 +355,7 @@
 
     static void testInstance001(Object obj, int count) {
         if (!(obj instanceof Interface001))
-            System.err.println("BAD");
+            System.out.println("BAD");
         while (count-- != 0) {
             boolean is;
             is = obj instanceof Interface001;
@@ -379,7 +379,7 @@
 
     static void testInstance049(Object obj, int count) {
         if (!(obj instanceof Interface049))
-            System.err.println("BAD");
+            System.out.println("BAD");
         while (count-- != 0) {
             boolean is;
             is = obj instanceof Interface049;
@@ -403,7 +403,7 @@
 
     static void testInstance099(Object obj, int count) {
         if (!(obj instanceof Interface099))
-            System.err.println("BAD");
+            System.out.println("BAD");
         while (count-- != 0) {
             boolean is;
             is = obj instanceof Interface099;
diff --git a/test/024-illegal-access/src/Main.java b/test/024-illegal-access/src/Main.java
index 84c7114..de9ad5b 100644
--- a/test/024-illegal-access/src/Main.java
+++ b/test/024-illegal-access/src/Main.java
@@ -18,7 +18,7 @@
     static public void main(String[] args) {
         try {
             PublicAccess.accessStaticField();
-            System.err.println("ERROR: call 1 not expected to succeed");
+            System.out.println("ERROR: call 1 not expected to succeed");
         } catch (VerifyError ve) {
             // dalvik
             System.out.println("Got expected failure 1");
@@ -29,7 +29,7 @@
 
         try {
             PublicAccess.accessStaticMethod();
-            System.err.println("ERROR: call 2 not expected to succeed");
+            System.out.println("ERROR: call 2 not expected to succeed");
         } catch (IllegalAccessError iae) {
             // reference
             System.out.println("Got expected failure 2");
@@ -37,7 +37,7 @@
 
         try {
             PublicAccess.accessInstanceField();
-            System.err.println("ERROR: call 3 not expected to succeed");
+            System.out.println("ERROR: call 3 not expected to succeed");
         } catch (VerifyError ve) {
             // dalvik
             System.out.println("Got expected failure 3");
@@ -48,7 +48,7 @@
 
         try {
             PublicAccess.accessInstanceMethod();
-            System.err.println("ERROR: call 4 not expected to succeed");
+            System.out.println("ERROR: call 4 not expected to succeed");
         } catch (IllegalAccessError iae) {
             // reference
             System.out.println("Got expected failure 4");
@@ -56,7 +56,7 @@
 
         try {
             CheckInstanceof.main(new Object());
-            System.err.println("ERROR: call 5 not expected to succeed");
+            System.out.println("ERROR: call 5 not expected to succeed");
         } catch (VerifyError ve) {
             // dalvik
             System.out.println("Got expected failure 5");
diff --git a/test/031-class-attributes/src/ClassAttrs.java b/test/031-class-attributes/src/ClassAttrs.java
index 39e69a3..8489a2c 100644
--- a/test/031-class-attributes/src/ClassAttrs.java
+++ b/test/031-class-attributes/src/ClassAttrs.java
@@ -133,12 +133,12 @@
             System.out.println("field signature: "
                     + getSignatureAttribute(field));
         } catch (NoSuchMethodException nsme) {
-            System.err.println("FAILED: " + nsme);
+            System.out.println("FAILED: " + nsme);
         } catch (NoSuchFieldException nsfe) {
-            System.err.println("FAILED: " + nsfe);
+            System.out.println("FAILED: " + nsfe);
         } catch (RuntimeException re) {
-            System.err.println("FAILED: " + re);
-            re.printStackTrace();
+            System.out.println("FAILED: " + re);
+            re.printStackTrace(System.out);
         }
 
         test_isAssignableFrom();
@@ -228,7 +228,7 @@
             method = c.getDeclaredMethod("getSignatureAttribute");
             method.setAccessible(true);
         } catch (Exception ex) {
-            ex.printStackTrace();
+            ex.printStackTrace(System.out);
             return "<unknown>";
         }
 
diff --git a/test/032-concrete-sub/src/ConcreteSub.java b/test/032-concrete-sub/src/ConcreteSub.java
index 95adf63..61d1602 100644
--- a/test/032-concrete-sub/src/ConcreteSub.java
+++ b/test/032-concrete-sub/src/ConcreteSub.java
@@ -45,7 +45,7 @@
         try {
             meth = absClass.getMethod("redefineMe");
         } catch (NoSuchMethodException nsme) {
-            nsme.printStackTrace();
+            nsme.printStackTrace(System.out);
             return;
         }
         System.out.println("meth modifiers=" + meth.getModifiers());
diff --git a/test/032-concrete-sub/src/Main.java b/test/032-concrete-sub/src/Main.java
index 4a5193d..7d3be15 100644
--- a/test/032-concrete-sub/src/Main.java
+++ b/test/032-concrete-sub/src/Main.java
@@ -26,7 +26,7 @@
             ConcreteSub2 blah = new ConcreteSub2();
             // other VMs fail here (AbstractMethodError)
             blah.doStuff();
-            System.err.println("Succeeded unexpectedly");
+            System.out.println("Succeeded unexpectedly");
         } catch (VerifyError ve) {
             System.out.println("Got expected failure");
         } catch (AbstractMethodError ame) {
diff --git a/test/036-finalizer/src/Main.java b/test/036-finalizer/src/Main.java
index 0de56f9..734830f 100644
--- a/test/036-finalizer/src/Main.java
+++ b/test/036-finalizer/src/Main.java
@@ -120,7 +120,7 @@
       static void printNonFinalized() {
         for (int i = 0; i < maxCount; ++i) {
           if (!FinalizeCounter.finalized[i]) {
-            System.err.println("Element " + i + " was not finalized");
+            System.out.println("Element " + i + " was not finalized");
           }
         }
       }
diff --git a/test/042-new-instance/src/Main.java b/test/042-new-instance/src/Main.java
index 755d62e..34d1f5a 100644
--- a/test/042-new-instance/src/Main.java
+++ b/test/042-new-instance/src/Main.java
@@ -37,31 +37,31 @@
             Object obj = c.newInstance();
             System.out.println("LocalClass succeeded");
         } catch (Exception ex) {
-            System.err.println("LocalClass failed");
-            ex.printStackTrace();
+            System.out.println("LocalClass failed");
+            ex.printStackTrace(System.out);
         }
 
         // should fail
         try {
             Class<?> c = Class.forName("otherpackage.PackageAccess");
             Object obj = c.newInstance();
-            System.err.println("ERROR: PackageAccess succeeded unexpectedly");
+            System.out.println("ERROR: PackageAccess succeeded unexpectedly");
         } catch (IllegalAccessException iae) {
             System.out.println("Got expected PackageAccess complaint");
         } catch (Exception ex) {
-            System.err.println("Got unexpected PackageAccess failure");
-            ex.printStackTrace();
+            System.out.println("Got unexpected PackageAccess failure");
+            ex.printStackTrace(System.out);
         }
 
         LocalClass3.main();
 
         try {
             MaybeAbstract ma = new MaybeAbstract();
-            System.err.println("ERROR: MaybeAbstract succeeded unexpectedly");
+            System.out.println("ERROR: MaybeAbstract succeeded unexpectedly");
         } catch (InstantiationError ie) {
             System.out.println("Got expected InstantationError");
         } catch (Exception ex) {
-            System.err.println("Got unexpected MaybeAbstract failure");
+            System.out.println("Got unexpected MaybeAbstract failure");
         }
     }
 
@@ -73,12 +73,12 @@
         try {
             Class<?> c = Class.forName("LocalClass");
             Constructor<?> cons = c.getConstructor();
-            System.err.println("Cons LocalClass succeeded unexpectedly");
+            System.out.println("Cons LocalClass succeeded unexpectedly");
         } catch (NoSuchMethodException nsme) {
             System.out.println("Cons LocalClass failed as expected");
         } catch (Exception ex) {
-            System.err.println("Cons LocalClass failed strangely");
-            ex.printStackTrace();
+            System.out.println("Cons LocalClass failed strangely");
+            ex.printStackTrace(System.out);
         }
 
         // should succeed
@@ -88,8 +88,8 @@
             Object obj = cons.newInstance();
             System.out.println("Cons LocalClass2 succeeded");
         } catch (Exception ex) {
-            System.err.println("Cons LocalClass2 failed");
-            ex.printStackTrace();
+            System.out.println("Cons LocalClass2 failed");
+            ex.printStackTrace(System.out);
         }
 
         // should succeed
@@ -99,8 +99,8 @@
             Object obj = cons.newInstance(new Main());
             System.out.println("Cons InnerClass succeeded");
         } catch (Exception ex) {
-            System.err.println("Cons InnerClass failed");
-            ex.printStackTrace();
+            System.out.println("Cons InnerClass failed");
+            ex.printStackTrace(System.out);
         }
 
         // should succeed
@@ -110,21 +110,21 @@
             Object obj = cons.newInstance();
             System.out.println("Cons StaticInnerClass succeeded");
         } catch (Exception ex) {
-            System.err.println("Cons StaticInnerClass failed");
-            ex.printStackTrace();
+            System.out.println("Cons StaticInnerClass failed");
+            ex.printStackTrace(System.out);
         }
 
         // should fail
         try {
             Class<?> c = Class.forName("otherpackage.PackageAccess");
             Constructor<?> cons = c.getConstructor();
-            System.err.println("ERROR: Cons PackageAccess succeeded unexpectedly");
+            System.out.println("ERROR: Cons PackageAccess succeeded unexpectedly");
         } catch (NoSuchMethodException nsme) {
             // constructor isn't public
             System.out.println("Cons got expected PackageAccess complaint");
         } catch (Exception ex) {
-            System.err.println("Cons got unexpected PackageAccess failure");
-            ex.printStackTrace();
+            System.out.println("Cons got unexpected PackageAccess failure");
+            ex.printStackTrace(System.out);
         }
 
         // should fail
@@ -132,13 +132,13 @@
             Class<?> c = Class.forName("MaybeAbstract");
             Constructor<?> cons = c.getConstructor();
             Object obj = cons.newInstance();
-            System.err.println("ERROR: Cons MaybeAbstract succeeded unexpectedly");
+            System.out.println("ERROR: Cons MaybeAbstract succeeded unexpectedly");
         } catch (InstantiationException ie) {
             // note InstantiationException vs. InstantiationError
             System.out.println("Cons got expected InstantationException");
         } catch (Exception ex) {
-            System.err.println("Cons got unexpected MaybeAbstract failure");
-            ex.printStackTrace();
+            System.out.println("Cons got unexpected MaybeAbstract failure");
+            ex.printStackTrace(System.out);
         }
 
         // should fail
@@ -147,13 +147,13 @@
             Constructor<?> cons = c.getConstructor();
             if (!FULL_ACCESS_CHECKS) { throw new IllegalAccessException(); }
             Object obj = cons.newInstance();
-            System.err.println("ERROR: Cons PackageAccess2 succeeded unexpectedly");
+            System.out.println("ERROR: Cons PackageAccess2 succeeded unexpectedly");
         } catch (IllegalAccessException iae) {
             // constructor is public, but class has package scope
             System.out.println("Cons got expected PackageAccess2 complaint");
         } catch (Exception ex) {
-            System.err.println("Cons got unexpected PackageAccess2 failure");
-            ex.printStackTrace();
+            System.out.println("Cons got unexpected PackageAccess2 failure");
+            ex.printStackTrace(System.out);
         }
 
         // should succeed
@@ -161,8 +161,8 @@
             otherpackage.ConstructorAccess.newConstructorInstance();
             System.out.println("Cons ConstructorAccess succeeded");
         } catch (Exception ex) {
-            System.err.println("Cons ConstructorAccess failed");
-            ex.printStackTrace();
+            System.out.println("Cons ConstructorAccess failed");
+            ex.printStackTrace(System.out);
         }
     }
 
@@ -187,8 +187,8 @@
             CC.newInstance();
             System.out.println("LocalClass3 succeeded");
         } catch (Exception ex) {
-            System.err.println("Got unexpected LocalClass3 failure");
-            ex.printStackTrace();
+            System.out.println("Got unexpected LocalClass3 failure");
+            ex.printStackTrace(System.out);
         }
     }
 
@@ -200,7 +200,7 @@
                 Class<?> c = CC.class;
                 return c.newInstance();
             } catch (Exception ex) {
-                ex.printStackTrace();
+                ex.printStackTrace(System.out);
                 return null;
             }
         }
diff --git a/test/044-proxy/src/BasicTest.java b/test/044-proxy/src/BasicTest.java
index 5f04b93..7f301f6 100644
--- a/test/044-proxy/src/BasicTest.java
+++ b/test/044-proxy/src/BasicTest.java
@@ -34,9 +34,9 @@
         Object proxy = createProxy(proxyMe);
 
         if (!Proxy.isProxyClass(proxy.getClass()))
-            System.err.println("not a proxy class?");
+            System.out.println("not a proxy class?");
         if (Proxy.getInvocationHandler(proxy) == null)
-            System.err.println("ERROR: Proxy.getInvocationHandler is null");
+            System.out.println("ERROR: Proxy.getInvocationHandler is null");
 
         /* take it for a spin; verifies instanceof constraint */
         Shapes shapes = (Shapes) proxy;
@@ -110,13 +110,13 @@
             //System.out.println("Constructor is " + cons);
             proxy = cons.newInstance(handler);
         } catch (NoSuchMethodException nsme) {
-            System.err.println("failed: " + nsme);
+            System.out.println("failed: " + nsme);
         } catch (InstantiationException ie) {
-            System.err.println("failed: " + ie);
+            System.out.println("failed: " + ie);
         } catch (IllegalAccessException ie) {
-            System.err.println("failed: " + ie);
+            System.out.println("failed: " + ie);
         } catch (InvocationTargetException ite) {
-            System.err.println("failed: " + ite);
+            System.out.println("failed: " + ite);
         }
 
         return proxy;
diff --git a/test/044-proxy/src/Clash.java b/test/044-proxy/src/Clash.java
index d000112..7dabe92 100644
--- a/test/044-proxy/src/Clash.java
+++ b/test/044-proxy/src/Clash.java
@@ -32,7 +32,7 @@
             Proxy.newProxyInstance(Clash.class.getClassLoader(),
                 new Class<?>[] { Interface1A.class, Interface1A.class },
                 handler);
-            System.err.println("Dupe did not throw expected exception");
+            System.out.println("Dupe did not throw expected exception");
         } catch (IllegalArgumentException iae) {
             System.out.println("Dupe threw expected exception");
         }
@@ -41,7 +41,7 @@
             Proxy.newProxyInstance(Clash.class.getClassLoader(),
                 new Class<?>[] { Interface1A.class, Interface1B.class },
                 handler);
-            System.err.println("Clash did not throw expected exception");
+            System.out.println("Clash did not throw expected exception");
         } catch (IllegalArgumentException iae) {
             System.out.println("Clash threw expected exception");
         }
diff --git a/test/044-proxy/src/Clash2.java b/test/044-proxy/src/Clash2.java
index e405cfe..51221f2 100644
--- a/test/044-proxy/src/Clash2.java
+++ b/test/044-proxy/src/Clash2.java
@@ -31,7 +31,7 @@
             Proxy.newProxyInstance(Clash.class.getClassLoader(),
                 new Class<?>[] { Interface2A.class, Interface2B.class },
                 handler);
-            System.err.println("Clash2 did not throw expected exception");
+            System.out.println("Clash2 did not throw expected exception");
         } catch (IllegalArgumentException iae) {
             System.out.println("Clash2 threw expected exception");
         }
diff --git a/test/044-proxy/src/Clash3.java b/test/044-proxy/src/Clash3.java
index 44806ce..9d23059 100644
--- a/test/044-proxy/src/Clash3.java
+++ b/test/044-proxy/src/Clash3.java
@@ -35,7 +35,7 @@
                     Interface3aa.class,
                     Interface3b.class },
                 handler);
-            System.err.println("Clash3 did not throw expected exception");
+            System.out.println("Clash3 did not throw expected exception");
         } catch (IllegalArgumentException iae) {
             System.out.println("Clash3 threw expected exception");
         }
diff --git a/test/044-proxy/src/Clash4.java b/test/044-proxy/src/Clash4.java
index ca5c3ab..45d4820 100644
--- a/test/044-proxy/src/Clash4.java
+++ b/test/044-proxy/src/Clash4.java
@@ -36,7 +36,7 @@
                     Interface4b.class,
                     Interface4bb.class },
                 handler);
-            System.err.println("Clash4 did not throw expected exception");
+            System.out.println("Clash4 did not throw expected exception");
         } catch (IllegalArgumentException iae) {
             System.out.println("Clash4 threw expected exception");
             //System.out.println(iae);
diff --git a/test/044-proxy/src/ConstructorProxy.java b/test/044-proxy/src/ConstructorProxy.java
index 95d150c..dfafbd8 100644
--- a/test/044-proxy/src/ConstructorProxy.java
+++ b/test/044-proxy/src/ConstructorProxy.java
@@ -28,7 +28,7 @@
       new ConstructorProxy().runTest();
     } catch (Exception e) {
       System.out.println("Unexpected failure occured");
-      e.printStackTrace();
+      e.printStackTrace(System.out);
     }
   }
 
diff --git a/test/044-proxy/src/WrappedThrow.java b/test/044-proxy/src/WrappedThrow.java
index 643ba05..afea26d 100644
--- a/test/044-proxy/src/WrappedThrow.java
+++ b/test/044-proxy/src/WrappedThrow.java
@@ -43,29 +43,29 @@
         InterfaceW2 if2 = (InterfaceW2) proxy;
         try {
             if1.throwFunky();
-            System.err.println("No exception thrown");
+            System.out.println("No exception thrown");
         } catch (UndeclaredThrowableException ute) {
             System.out.println("Got expected UTE");
         } catch (Throwable t) {
-            System.err.println("Got unexpected exception: " + t);
+            System.out.println("Got unexpected exception: " + t);
         }
 
         try {
             if1.throwFunky2();
-            System.err.println("No exception thrown");
+            System.out.println("No exception thrown");
         } catch (IOException ioe) {
             System.out.println("Got expected IOE");
         } catch (Throwable t) {
-            System.err.println("Got unexpected exception: " + t);
+            System.out.println("Got unexpected exception: " + t);
         }
 
         try {
             if2.throwFunky2();
-            System.err.println("No exception thrown");
+            System.out.println("No exception thrown");
         } catch (IOException ioe) {
             System.out.println("Got expected IOE");
         } catch (Throwable t) {
-            System.err.println("Got unexpected exception: " + t);
+            System.out.println("Got unexpected exception: " + t);
         }
 
         /*
@@ -73,38 +73,38 @@
          */
         try {
             if1.throwException();
-            System.err.println("No exception thrown");
+            System.out.println("No exception thrown");
         } catch (UndeclaredThrowableException ute) {
             System.out.println("Got expected UTE");
         } catch (Throwable t) {
-            System.err.println("Got unexpected exception: " + t);
+            System.out.println("Got unexpected exception: " + t);
         }
 
         try {
             if1.throwBase();
-            System.err.println("No exception thrown");
+            System.out.println("No exception thrown");
         } catch (UndeclaredThrowableException ute) {
             System.out.println("Got expected UTE");
         } catch (Throwable t) {
-            System.err.println("Got unexpected exception: " + t);
+            System.out.println("Got unexpected exception: " + t);
         }
 
         try {
             if2.throwSub();
-            System.err.println("No exception thrown");
+            System.out.println("No exception thrown");
         } catch (SubException se) {
             System.out.println("Got expected exception");
         } catch (Throwable t) {
-            System.err.println("Got unexpected exception: " + t);
+            System.out.println("Got unexpected exception: " + t);
         }
 
         try {
             if2.throwSubSub();
-            System.err.println("No exception thrown");
+            System.out.println("No exception thrown");
         } catch (SubException se) {
             System.out.println("Got expected exception");
         } catch (Throwable t) {
-            System.err.println("Got unexpected exception: " + t);
+            System.out.println("Got unexpected exception: " + t);
         }
 
         /*
@@ -113,11 +113,11 @@
          */
         try {
             if1.bothThrowBase();
-            System.err.println("No exception thrown");
+            System.out.println("No exception thrown");
         } catch (BaseException se) {
             System.out.println("Got expected exception");
         } catch (Throwable t) {
-            System.err.println("Got unexpected exception: " + t);
+            System.out.println("Got unexpected exception: " + t);
         }
     }
 }
diff --git a/test/045-reflect-array/src/Main.java b/test/045-reflect-array/src/Main.java
index 7418eed8..4c321b3 100644
--- a/test/045-reflect-array/src/Main.java
+++ b/test/045-reflect-array/src/Main.java
@@ -102,7 +102,7 @@
                 throw new RuntimeException("load should have worked");
             }
         } catch (IllegalArgumentException iae) {
-            iae.printStackTrace();
+            iae.printStackTrace(System.out);
         }
         try {
             Array.getByte(charArray, 2);
@@ -116,7 +116,7 @@
                     + Array.getInt(charArray, 3));
             }
         } catch (IllegalArgumentException iae) {
-            iae.printStackTrace();
+            iae.printStackTrace(System.out);
         }
 
         System.out.println("ReflectArrayTest.testSingleChar passed");
diff --git a/test/046-reflect/src/Main.java b/test/046-reflect/src/Main.java
index 10dad8d..b8a48ea 100644
--- a/test/046-reflect/src/Main.java
+++ b/test/046-reflect/src/Main.java
@@ -89,7 +89,7 @@
 
             try {
                 meth = target.getMethod("packageMethod");
-                System.err.println("succeeded on package-scope method");
+                System.out.println("succeeded on package-scope method");
             } catch (NoSuchMethodException nsme) {
                 // good
             }
@@ -101,7 +101,7 @@
             try {
                 if (!FULL_ACCESS_CHECKS) { throw new IllegalAccessException(); }
                 meth.invoke(instance);
-                System.err.println("inner-method invoke unexpectedly worked");
+                System.out.println("inner-method invoke unexpectedly worked");
             } catch (IllegalAccessException iae) {
                 // good
             }
@@ -110,13 +110,13 @@
             try {
                 int x = field.getInt(instance);
                 if (!FULL_ACCESS_CHECKS) { throw new IllegalAccessException(); }
-                System.err.println("field get unexpectedly worked: " + x);
+                System.out.println("field get unexpectedly worked: " + x);
             } catch (IllegalAccessException iae) {
                 // good
             }
         } catch (Exception ex) {
             System.out.println("----- unexpected exception -----");
-            ex.printStackTrace();
+            ex.printStackTrace(System.out);
         }
     }
 
@@ -171,7 +171,7 @@
             }
             catch (Exception ex) {
                 System.out.println("GLITCH: invoke got wrong exception:");
-                ex.printStackTrace();
+                ex.printStackTrace(System.out);
             }
             System.out.println("");
 
@@ -400,7 +400,7 @@
 
         } catch (Exception ex) {
             System.out.println("----- unexpected exception -----");
-            ex.printStackTrace();
+            ex.printStackTrace(System.out);
         }
 
         System.out.println("ReflectTest done!");
@@ -414,7 +414,7 @@
             m = Collections.class.getDeclaredMethod("swap",
                             Object[].class, int.class, int.class);
         } catch (NoSuchMethodException nsme) {
-            nsme.printStackTrace();
+            nsme.printStackTrace(System.out);
             return;
         }
         System.out.println(m + " accessible=" + m.isAccessible());
@@ -423,10 +423,10 @@
         try {
             m.invoke(null, objects, 0, 1);
         } catch (IllegalAccessException iae) {
-            iae.printStackTrace();
+            iae.printStackTrace(System.out);
             return;
         } catch (InvocationTargetException ite) {
-            ite.printStackTrace();
+            ite.printStackTrace(System.out);
             return;
         }
 
@@ -434,10 +434,10 @@
             String s = "Should be ignored";
             m.invoke(s, objects, 0, 1);
         } catch (IllegalAccessException iae) {
-            iae.printStackTrace();
+            iae.printStackTrace(System.out);
             return;
         } catch (InvocationTargetException ite) {
-            ite.printStackTrace();
+            ite.printStackTrace(System.out);
             return;
         }
 
@@ -449,7 +449,7 @@
         } catch (InvocationTargetException ite) {
             System.out.println("checkType got expected exception");
         } catch (IllegalAccessException iae) {
-            iae.printStackTrace();
+            iae.printStackTrace(System.out);
             return;
         }
     }
@@ -826,7 +826,7 @@
     static {
         System.out.println("FieldNoisyInit is initializing");
         //Throwable th = new Throwable();
-        //th.printStackTrace();
+        //th.printStackTrace(System.out);
     }
 }
 
@@ -842,7 +842,7 @@
     static {
         System.out.println("MethodNoisyInit is initializing");
         //Throwable th = new Throwable();
-        //th.printStackTrace();
+        //th.printStackTrace(System.out);
     }
 }
 
diff --git a/test/048-reflect-v8/src/DefaultDeclared.java b/test/048-reflect-v8/src/DefaultDeclared.java
index 16e8a24..d49bdc9 100644
--- a/test/048-reflect-v8/src/DefaultDeclared.java
+++ b/test/048-reflect-v8/src/DefaultDeclared.java
@@ -52,7 +52,7 @@
       System.out.println("NoSuchMethodException thrown for class " + klass.toString());
     } catch (Throwable t) {
       System.out.println("Unknown error thrown for class " + klass.toString());
-      t.printStackTrace();
+      t.printStackTrace(System.out);
     }
   }
 
diff --git a/test/050-sync-test/src/Main.java b/test/050-sync-test/src/Main.java
index 5364e2a..734b51e 100644
--- a/test/050-sync-test/src/Main.java
+++ b/test/050-sync-test/src/Main.java
@@ -39,7 +39,7 @@
             Thread.sleep(1000);
         } catch (InterruptedException ie) {
             System.out.println("INTERRUPT!");
-            ie.printStackTrace();
+            ie.printStackTrace(System.out);
         }
         System.out.println("GONE");
     }
@@ -56,7 +56,7 @@
                 one.wait();
             } catch (InterruptedException ie) {
                 System.out.println("INTERRUPT!");
-                ie.printStackTrace();
+                ie.printStackTrace(System.out);
             }
         }
 
@@ -69,7 +69,7 @@
             two.join();
         } catch (InterruptedException ie) {
             System.out.println("INTERRUPT!");
-            ie.printStackTrace();
+            ie.printStackTrace(System.out);
         }
         System.out.println("main: all done");
     }
@@ -167,7 +167,7 @@
                         " interrupted, flag=" + Thread.interrupted());
                 intr = true;
             } catch (Exception ex) {
-                ex.printStackTrace();
+                ex.printStackTrace(System.out);
             }
 
             if (!intr)
diff --git a/test/050-sync-test/src/ThreadDeathHandler.java b/test/050-sync-test/src/ThreadDeathHandler.java
index 0a7437d..58061f8 100644
--- a/test/050-sync-test/src/ThreadDeathHandler.java
+++ b/test/050-sync-test/src/ThreadDeathHandler.java
@@ -27,7 +27,7 @@
     }
 
     public void uncaughtException(Thread t, Throwable e) {
-        System.err.println("Uncaught exception " + mMyMessage + "!");
-        e.printStackTrace();
+        System.out.println("Uncaught exception " + mMyMessage + "!");
+        e.printStackTrace(System.out);
     }
 }
diff --git a/test/051-thread/src/Main.java b/test/051-thread/src/Main.java
index 08cb5de..fe1cafe 100644
--- a/test/051-thread/src/Main.java
+++ b/test/051-thread/src/Main.java
@@ -79,7 +79,7 @@
         try {
             t.join();
         } catch (InterruptedException ex) {
-            ex.printStackTrace();
+            ex.printStackTrace(System.out);
         }
 
         System.out.print("testThreadDaemons finished\n");
diff --git a/test/053-wait-some/src/Main.java b/test/053-wait-some/src/Main.java
index 377a578..b8e6dfe 100644
--- a/test/053-wait-some/src/Main.java
+++ b/test/053-wait-some/src/Main.java
@@ -39,7 +39,7 @@
             } catch (IllegalArgumentException iae) {
                 System.out.println("Caught expected exception on neg arg");
             } catch (InterruptedException ie) {
-                ie.printStackTrace();
+                ie.printStackTrace(System.out);
             }
 
             for (long delay : DELAYS) {
@@ -49,7 +49,7 @@
                 try {
                     sleepy.wait(delay);
                 } catch (InterruptedException ie) {
-                    ie.printStackTrace();
+                    ie.printStackTrace(System.out);
                 }
                 end = System.currentTimeMillis();
 
diff --git a/test/054-uncaught/src/Main.java b/test/054-uncaught/src/Main.java
index 688a2a4..43de7ae 100644
--- a/test/054-uncaught/src/Main.java
+++ b/test/054-uncaught/src/Main.java
@@ -33,7 +33,7 @@
         try {
             t.join();
         } catch (InterruptedException ex) {
-            ex.printStackTrace();
+            ex.printStackTrace(System.out);
         }
     }
 
@@ -41,7 +41,7 @@
         ThreadDeathHandler defHandler = new ThreadDeathHandler("DEFAULT");
         ThreadDeathHandler threadHandler = new ThreadDeathHandler("THREAD");
 
-        System.err.println("Test " + which);
+        System.out.println("Test " + which);
         switch (which) {
             case 1: {
                 Thread.setDefaultUncaughtExceptionHandler(defHandler);
diff --git a/test/054-uncaught/src/ThreadDeathHandler.java b/test/054-uncaught/src/ThreadDeathHandler.java
index 0a7437d..58061f8 100644
--- a/test/054-uncaught/src/ThreadDeathHandler.java
+++ b/test/054-uncaught/src/ThreadDeathHandler.java
@@ -27,7 +27,7 @@
     }
 
     public void uncaughtException(Thread t, Throwable e) {
-        System.err.println("Uncaught exception " + mMyMessage + "!");
-        e.printStackTrace();
+        System.out.println("Uncaught exception " + mMyMessage + "!");
+        e.printStackTrace(System.out);
     }
 }
diff --git a/test/059-finalizer-throw/src/Main.java b/test/059-finalizer-throw/src/Main.java
index fa80fe3..3bfbc2d 100644
--- a/test/059-finalizer-throw/src/Main.java
+++ b/test/059-finalizer-throw/src/Main.java
@@ -46,7 +46,7 @@
             try {
                 Thread.sleep(500);
             } catch (InterruptedException ie) {
-                System.err.println(ie);
+                System.out.println(ie);
             }
         }
 
@@ -54,7 +54,7 @@
         try {
             Thread.sleep(750);
         } catch (InterruptedException ie) {
-            System.err.println(ie);
+            System.out.println(ie);
         }
 
         System.out.println("done");
diff --git a/test/064-field-access/src/Main.java b/test/064-field-access/src/Main.java
index 50ad5b9..b08f3ae 100644
--- a/test/064-field-access/src/Main.java
+++ b/test/064-field-access/src/Main.java
@@ -28,7 +28,7 @@
 
     try {
       GetNonexistent.main(null);
-      System.err.println("Not expected to succeed");
+      System.out.println("Not expected to succeed");
     } catch (VerifyError fe) {
       // dalvik
       System.out.println("Got expected failure");
@@ -101,22 +101,22 @@
 
       /* success; expected? */
       if (expectedException != null) {
-        System.err.println("ERROR: call succeeded for field " + field +
+        System.out.println("ERROR: call succeeded for field " + field +
             " with a read of type '" + type +
             "', was expecting " + expectedException);
         Thread.dumpStack();
       }
     } catch (Exception ex) {
       if (expectedException == null) {
-        System.err.println("ERROR: call failed unexpectedly: "
+        System.out.println("ERROR: call failed unexpectedly: "
             + ex.getClass());
-        ex.printStackTrace();
+        ex.printStackTrace(System.out);
       } else {
         if (!expectedException.equals(ex.getClass())) {
-          System.err.println("ERROR: incorrect exception: wanted "
+          System.out.println("ERROR: incorrect exception: wanted "
               + expectedException.getName() + ", got "
               + ex.getClass());
-          ex.printStackTrace();
+          ex.printStackTrace(System.out);
         }
       }
     }
@@ -675,22 +675,22 @@
 
       /* success; expected? */
       if (expectedException != null) {
-        System.err.println("ERROR: call succeeded for field " + field +
+        System.out.println("ERROR: call succeeded for field " + field +
             " with a read of type '" + type +
             "', was expecting " + expectedException);
         Thread.dumpStack();
       }
     } catch (Exception ex) {
       if (expectedException == null) {
-        System.err.println("ERROR: call failed unexpectedly: "
+        System.out.println("ERROR: call failed unexpectedly: "
             + ex.getClass());
-        ex.printStackTrace();
+        ex.printStackTrace(System.out);
       } else {
         if (!expectedException.equals(ex.getClass())) {
-          System.err.println("ERROR: incorrect exception: wanted "
+          System.out.println("ERROR: incorrect exception: wanted "
               + expectedException.getName() + ", got "
               + ex.getClass());
-          ex.printStackTrace();
+          ex.printStackTrace(System.out);
         }
       }
     }
@@ -704,19 +704,19 @@
       result = method.invoke(obj);
       /* success; expected? */
       if (expectedException != null) {
-        System.err.println("ERROR: call succeeded for method " + method + "', was expecting " +
+        System.out.println("ERROR: call succeeded for method " + method + "', was expecting " +
                            expectedException);
         Thread.dumpStack();
       }
     } catch (Exception ex) {
       if (expectedException == null) {
-        System.err.println("ERROR: call failed unexpectedly: " + ex.getClass());
-        ex.printStackTrace();
+        System.out.println("ERROR: call failed unexpectedly: " + ex.getClass());
+        ex.printStackTrace(System.out);
       } else {
         if (!expectedException.equals(ex.getClass())) {
-          System.err.println("ERROR: incorrect exception: wanted " + expectedException.getName() +
+          System.out.println("ERROR: incorrect exception: wanted " + expectedException.getName() +
                              ", got " + ex.getClass());
-          ex.printStackTrace();
+          ex.printStackTrace(System.out);
         }
       }
     }
diff --git a/test/065-mismatched-implements/src/Main.java b/test/065-mismatched-implements/src/Main.java
index 5975b99..55d0bab 100644
--- a/test/065-mismatched-implements/src/Main.java
+++ b/test/065-mismatched-implements/src/Main.java
@@ -21,7 +21,7 @@
     public static void main(String[] args) {
         try {
             Indirect.main();
-            System.err.println("Succeeded unexpectedly");
+            System.out.println("Succeeded unexpectedly");
         } catch (IncompatibleClassChangeError icce) {
             System.out.println("Got expected ICCE");
         }
diff --git a/test/066-mismatched-super/src/Main.java b/test/066-mismatched-super/src/Main.java
index 5975b99..55d0bab 100644
--- a/test/066-mismatched-super/src/Main.java
+++ b/test/066-mismatched-super/src/Main.java
@@ -21,7 +21,7 @@
     public static void main(String[] args) {
         try {
             Indirect.main();
-            System.err.println("Succeeded unexpectedly");
+            System.out.println("Succeeded unexpectedly");
         } catch (IncompatibleClassChangeError icce) {
             System.out.println("Got expected ICCE");
         }
diff --git a/test/068-classloader/src/Main.java b/test/068-classloader/src/Main.java
index 01539b7..0aaa152 100644
--- a/test/068-classloader/src/Main.java
+++ b/test/068-classloader/src/Main.java
@@ -129,7 +129,7 @@
                 throw new RuntimeException("target 2 has unexpected value " + value);
             }
         } catch (Exception ex) {
-            ex.printStackTrace();
+            ex.printStackTrace(System.out);
         }
     }
 
@@ -153,8 +153,8 @@
         try {
             altClass = loader.loadClass("Inaccessible1");
         } catch (ClassNotFoundException cnfe) {
-            System.err.println("loadClass failed");
-            cnfe.printStackTrace();
+            System.out.println("loadClass failed");
+            cnfe.printStackTrace(System.out);
             return;
         }
 
@@ -162,9 +162,9 @@
         Object obj;
         try {
             obj = altClass.newInstance();
-            System.err.println("ERROR: Inaccessible1 was accessible");
+            System.out.println("ERROR: Inaccessible1 was accessible");
         } catch (InstantiationException ie) {
-            System.err.println("newInstance failed: " + ie);
+            System.out.println("newInstance failed: " + ie);
             return;
         } catch (IllegalAccessException iae) {
             System.out.println("Got expected access exception #1");
@@ -182,14 +182,14 @@
 
         try {
             altClass = loader.loadClass("Inaccessible2");
-            System.err.println("ERROR: Inaccessible2 was accessible: " + altClass);
+            System.out.println("ERROR: Inaccessible2 was accessible: " + altClass);
         } catch (ClassNotFoundException cnfe) {
             Throwable cause = cnfe.getCause();
             if (cause instanceof IllegalAccessError) {
                 System.out.println("Got expected CNFE/IAE #2");
             } else {
-                System.err.println("Got unexpected CNFE/IAE #2");
-                cnfe.printStackTrace();
+                System.out.println("Got unexpected CNFE/IAE #2");
+                cnfe.printStackTrace(System.out);
             }
         }
     }
@@ -202,14 +202,14 @@
 
         try {
             altClass = loader.loadClass("Inaccessible3");
-            System.err.println("ERROR: Inaccessible3 was accessible: " + altClass);
+            System.out.println("ERROR: Inaccessible3 was accessible: " + altClass);
         } catch (ClassNotFoundException cnfe) {
             Throwable cause = cnfe.getCause();
             if (cause instanceof IllegalAccessError) {
                 System.out.println("Got expected CNFE/IAE #3");
             } else {
-                System.err.println("Got unexpected CNFE/IAE #3");
-                cnfe.printStackTrace();
+                System.out.println("Got unexpected CNFE/IAE #3");
+                cnfe.printStackTrace(System.out);
             }
         }
     }
@@ -227,7 +227,7 @@
             //System.out.println("+++ DoubledExtend is " + doubledExtendClass
             //    + " in " + doubledExtendClass.getClassLoader());
         } catch (ClassNotFoundException cnfe) {
-            System.err.println("loadClass failed: " + cnfe);
+            System.out.println("loadClass failed: " + cnfe);
             return;
         }
 
@@ -235,10 +235,10 @@
         try {
             obj = doubledExtendClass.newInstance();
         } catch (InstantiationException ie) {
-            System.err.println("newInstance failed: " + ie);
+            System.out.println("newInstance failed: " + ie);
             return;
         } catch (IllegalAccessException iae) {
-            System.err.println("newInstance failed: " + iae);
+            System.out.println("newInstance failed: " + iae);
             return;
         } catch (LinkageError le) {
             System.out.println("Got expected LinkageError on DE");
@@ -254,8 +254,8 @@
             String result;
 
             result = Base.doStuff(de);
-            System.err.println("ERROR: did not get LinkageError on DE");
-            System.err.println("(result=" + result + ")");
+            System.out.println("ERROR: did not get LinkageError on DE");
+            System.out.println("(result=" + result + ")");
         } catch (LinkageError le) {
             System.out.println("Got expected LinkageError on DE");
             return;
@@ -274,7 +274,7 @@
         try {
             doubledExtendOkayClass = loader.loadClass("DoubledExtendOkay");
         } catch (ClassNotFoundException cnfe) {
-            System.err.println("loadClass failed: " + cnfe);
+            System.out.println("loadClass failed: " + cnfe);
             return;
         }
 
@@ -282,14 +282,14 @@
         try {
             obj = doubledExtendOkayClass.newInstance();
         } catch (InstantiationException ie) {
-            System.err.println("newInstance failed: " + ie);
+            System.out.println("newInstance failed: " + ie);
             return;
         } catch (IllegalAccessException iae) {
-            System.err.println("newInstance failed: " + iae);
+            System.out.println("newInstance failed: " + iae);
             return;
         } catch (LinkageError le) {
-            System.err.println("Got unexpected LinkageError on DEO");
-            le.printStackTrace();
+            System.out.println("Got unexpected LinkageError on DEO");
+            le.printStackTrace(System.out);
             return;
         }
 
@@ -304,8 +304,8 @@
             result = BaseOkay.doStuff(de);
             System.out.println("Got DEO result " + result);
         } catch (LinkageError le) {
-            System.err.println("Got unexpected LinkageError on DEO");
-            le.printStackTrace();
+            System.out.println("Got unexpected LinkageError on DEO");
+            le.printStackTrace(System.out);
             return;
         }
     }
@@ -322,7 +322,7 @@
         try {
             getDoubledClass = loader.loadClass("GetDoubled");
         } catch (ClassNotFoundException cnfe) {
-            System.err.println("loadClass failed: " + cnfe);
+            System.out.println("loadClass failed: " + cnfe);
             return;
         }
 
@@ -330,10 +330,10 @@
         try {
             obj = getDoubledClass.newInstance();
         } catch (InstantiationException ie) {
-            System.err.println("newInstance failed: " + ie);
+            System.out.println("newInstance failed: " + ie);
             return;
         } catch (IllegalAccessException iae) {
-            System.err.println("newInstance failed: " + iae);
+            System.out.println("newInstance failed: " + iae);
             return;
         } catch (LinkageError le) {
             // Dalvik bails here
@@ -354,7 +354,7 @@
             System.out.println("Got LinkageError on GD");
             return;
         }
-        System.err.println("Should have failed by now on GetDoubled");
+        System.out.println("Should have failed by now on GetDoubled");
     }
 
     /**
@@ -368,7 +368,7 @@
         try {
             abstractGetClass = loader.loadClass("AbstractGet");
         } catch (ClassNotFoundException cnfe) {
-            System.err.println("loadClass ta failed: " + cnfe);
+            System.out.println("loadClass ta failed: " + cnfe);
             return;
         }
 
@@ -376,10 +376,10 @@
         try {
             obj = abstractGetClass.newInstance();
         } catch (InstantiationException ie) {
-            System.err.println("newInstance failed: " + ie);
+            System.out.println("newInstance failed: " + ie);
             return;
         } catch (IllegalAccessException iae) {
-            System.err.println("newInstance failed: " + iae);
+            System.out.println("newInstance failed: " + iae);
             return;
         } catch (LinkageError le) {
             System.out.println("Got LinkageError on TA");
@@ -399,7 +399,7 @@
             System.out.println("Got LinkageError on TA");
             return;
         }
-        System.err.println("Should have failed by now in testAbstract");
+        System.out.println("Should have failed by now in testAbstract");
     }
 
     /**
@@ -415,7 +415,7 @@
         try {
             doubledImplementClass = loader.loadClass("DoubledImplement");
         } catch (ClassNotFoundException cnfe) {
-            System.err.println("loadClass failed: " + cnfe);
+            System.out.println("loadClass failed: " + cnfe);
             return;
         }
 
@@ -423,10 +423,10 @@
         try {
             obj = doubledImplementClass.newInstance();
         } catch (InstantiationException ie) {
-            System.err.println("newInstance failed: " + ie);
+            System.out.println("newInstance failed: " + ie);
             return;
         } catch (IllegalAccessException iae) {
-            System.err.println("newInstance failed: " + iae);
+            System.out.println("newInstance failed: " + iae);
             return;
         } catch (LinkageError le) {
             System.out.println("Got LinkageError on DI (early)");
@@ -447,7 +447,7 @@
         try {
             di.one();
             if (!isOne) {
-                System.err.println("ERROR: did not get LinkageError on DI");
+                System.out.println("ERROR: did not get LinkageError on DI");
             }
         } catch (LinkageError le) {
             if (!isOne) {
@@ -476,7 +476,7 @@
             ifaceImplClass = loader.loadClass("IfaceImpl");
             ifaceImplClass = loader.loadClass("DoubledImplement2");
         } catch (ClassNotFoundException cnfe) {
-            System.err.println("loadClass failed: " + cnfe);
+            System.out.println("loadClass failed: " + cnfe);
             return;
         }
 
@@ -484,10 +484,10 @@
         try {
             obj = ifaceImplClass.newInstance();
         } catch (InstantiationException ie) {
-            System.err.println("newInstance failed: " + ie);
+            System.out.println("newInstance failed: " + ie);
             return;
         } catch (IllegalAccessException iae) {
-            System.err.println("newInstance failed: " + iae);
+            System.out.println("newInstance failed: " + iae);
             return;
         } catch (LinkageError le) {
             System.out.println("Got LinkageError on IDI (early)");
diff --git a/test/069-field-type/src/Main.java b/test/069-field-type/src/Main.java
index f9885e6..d9aa9e1 100644
--- a/test/069-field-type/src/Main.java
+++ b/test/069-field-type/src/Main.java
@@ -19,7 +19,7 @@
         /* try to use the reference; should fail */
         try {
             holder.mValue.run();
-            System.err.println("ERROR: did not get expected ICCE");
+            System.out.println("ERROR: did not get expected ICCE");
         } catch (IncompatibleClassChangeError icce) {
             System.out.println("Got expected IncompatibleClassChangeError");
         }
diff --git a/test/070-nio-buffer/src/Main.java b/test/070-nio-buffer/src/Main.java
index a7433b8..a3eeb3f 100644
--- a/test/070-nio-buffer/src/Main.java
+++ b/test/070-nio-buffer/src/Main.java
@@ -58,7 +58,7 @@
 
         try {
             shortBuf.put(myShorts, 0, 1);     // should fail
-            System.err.println("ERROR: out-of-bounds put succeeded\n");
+            System.out.println("ERROR: out-of-bounds put succeeded\n");
         } catch (BufferOverflowException boe) {
             System.out.println("Got expected buffer overflow exception");
         }
@@ -66,7 +66,7 @@
         try {
             shortBuf.position(0);
             shortBuf.put(myShorts, 0, 33);     // should fail
-            System.err.println("ERROR: out-of-bounds put succeeded\n");
+            System.out.println("ERROR: out-of-bounds put succeeded\n");
         } catch (IndexOutOfBoundsException ioobe) {
             System.out.println("Got expected out-of-bounds exception");
         }
@@ -74,7 +74,7 @@
         try {
             shortBuf.position(16);
             shortBuf.put(myShorts, 0, 17);     // should fail
-            System.err.println("ERROR: out-of-bounds put succeeded\n");
+            System.out.println("ERROR: out-of-bounds put succeeded\n");
         } catch (BufferOverflowException boe) {
             System.out.println("Got expected buffer overflow exception");
         }
diff --git a/test/073-mismatched-field/src/Main.java b/test/073-mismatched-field/src/Main.java
index 70709c0..2d6b9eb 100644
--- a/test/073-mismatched-field/src/Main.java
+++ b/test/073-mismatched-field/src/Main.java
@@ -23,7 +23,7 @@
     void doit() {
         try {
             System.out.println("value=" + this.f);
-            System.err.println("Succeeded unexpectedly");
+            System.out.println("Succeeded unexpectedly");
         } catch (IncompatibleClassChangeError icce) {
             System.out.println("Got expected failure");
         }
diff --git a/test/074-gc-thrash/src/Main.java b/test/074-gc-thrash/src/Main.java
index df04793..5165df7 100644
--- a/test/074-gc-thrash/src/Main.java
+++ b/test/074-gc-thrash/src/Main.java
@@ -52,9 +52,9 @@
             try {
                 dumpHprofDataMethod.invoke(null, dumpFile);
             } catch (IllegalAccessException iae) {
-                System.err.println(iae);
+                System.out.println(iae);
             } catch (InvocationTargetException ite) {
-                System.err.println(ite);
+                System.out.println(ite);
             }
         }
 
@@ -80,7 +80,7 @@
         try {
             meth = vmdClass.getMethod("dumpHprofData", String.class);
         } catch (NoSuchMethodException nsme) {
-            System.err.println("Found VMDebug but not dumpHprofData method");
+            System.out.println("Found VMDebug but not dumpHprofData method");
             return null;
         }
 
@@ -126,7 +126,7 @@
             deep.join();
             large.join();
         } catch (InterruptedException ie) {
-            System.err.println("join was interrupted");
+            System.out.println("join was interrupted");
         }
     }
 
@@ -137,7 +137,7 @@
         try {
             Thread.sleep(ms);
         } catch (InterruptedException ie) {
-            System.err.println("sleep was interrupted");
+            System.out.println("sleep was interrupted");
         }
     }
 
@@ -213,7 +213,7 @@
         }
 
         if (!once) {
-            System.err.println("not even once?");
+            System.out.println("not even once?");
             return;
         }
 
@@ -229,7 +229,7 @@
 
         for (int i = 0; i < MAX_DEPTH; i++) {
             if (weak[i].get() != null) {
-                System.err.println("Deep: weak still has " + i);
+                System.out.println("Deep: weak still has " + i);
             }
         }
 
@@ -251,7 +251,7 @@
     private static void checkStringReferences() {
       for (int i = 0; i < MAX_DEPTH; i++) {
           if (strong[i] != weak[i].get()) {
-              System.err.println("Deep: " + i + " strong=" + strong[i] +
+              System.out.println("Deep: " + i + " strong=" + strong[i] +
                   ", weak=" + weak[i].get());
           }
       }
diff --git a/test/075-verification-error/src/Main.java b/test/075-verification-error/src/Main.java
index 9b66a8d..3f2881e 100644
--- a/test/075-verification-error/src/Main.java
+++ b/test/075-verification-error/src/Main.java
@@ -36,12 +36,12 @@
     static void testClassNewInstance() {
         try {
             MaybeAbstract ma = new MaybeAbstract();
-            System.err.println("ERROR: MaybeAbstract succeeded unexpectedly");
+            System.out.println("ERROR: MaybeAbstract succeeded unexpectedly");
         } catch (InstantiationError ie) {
             System.out.println("Got expected InstantationError");
             if (VERBOSE) System.out.println("--- " + ie);
         } catch (Exception ex) {
-            System.err.println("Got unexpected MaybeAbstract failure");
+            System.out.println("Got unexpected MaybeAbstract failure");
         }
     }
 
@@ -88,7 +88,7 @@
 
         try {
             int x = mutant.inaccessibleField;
-            System.err.println("ERROR: bad access succeeded (ifield)");
+            System.out.println("ERROR: bad access succeeded (ifield)");
         } catch (IllegalAccessError iae) {
             System.out.println("Got expected IllegalAccessError (ifield)");
             if (VERBOSE) System.out.println("--- " + iae);
@@ -96,7 +96,7 @@
 
         try {
             int y = Mutant.inaccessibleStaticField;
-            System.err.println("ERROR: bad access succeeded (sfield)");
+            System.out.println("ERROR: bad access succeeded (sfield)");
         } catch (IllegalAccessError iae) {
             System.out.println("Got expected IllegalAccessError (sfield)");
             if (VERBOSE) System.out.println("--- " + iae);
@@ -104,7 +104,7 @@
 
         try {
             mutant.inaccessibleMethod();
-            System.err.println("ERROR: bad access succeeded (method)");
+            System.out.println("ERROR: bad access succeeded (method)");
         } catch (IllegalAccessError iae) {
             System.out.println("Got expected IllegalAccessError (method)");
             if (VERBOSE) System.out.println("--- " + iae);
@@ -112,7 +112,7 @@
 
         try {
             Mutant.inaccessibleStaticMethod();
-            System.err.println("ERROR: bad access succeeded (smethod)");
+            System.out.println("ERROR: bad access succeeded (smethod)");
         } catch (IllegalAccessError iae) {
             System.out.println("Got expected IllegalAccessError (smethod)");
             if (VERBOSE) System.out.println("--- " + iae);
@@ -121,7 +121,7 @@
         try {
             /* accessible static method in an inaccessible class */
             InaccessibleClass.test();
-            System.err.println("ERROR: bad meth-class access succeeded (meth-class)");
+            System.out.println("ERROR: bad meth-class access succeeded (meth-class)");
         } catch (IllegalAccessError iae) {
             System.out.println("Got expected IllegalAccessError (meth-class)");
             if (VERBOSE) System.out.println("--- " + iae);
@@ -130,7 +130,7 @@
         try {
             /* accessible static field in an inaccessible class */
             int blah = InaccessibleClass.blah;
-            System.err.println("ERROR: bad field-class access succeeded (field-class)");
+            System.out.println("ERROR: bad field-class access succeeded (field-class)");
         } catch (IllegalAccessError iae) {
             System.out.println("Got expected IllegalAccessError (field-class)");
             if (VERBOSE) System.out.println("--- " + iae);
@@ -139,7 +139,7 @@
         try {
             /* inaccessible static method in an accessible class */
             InaccessibleMethod.test();
-            System.err.println("ERROR: bad access succeeded (meth-meth)");
+            System.out.println("ERROR: bad access succeeded (meth-meth)");
         } catch (IllegalAccessError iae) {
             System.out.println("Got expected IllegalAccessError (meth-meth)");
             if (VERBOSE) System.out.println("--- " + iae);
diff --git a/test/077-method-override/src/Main.java b/test/077-method-override/src/Main.java
index 84bdf35..3a3c528 100644
--- a/test/077-method-override/src/Main.java
+++ b/test/077-method-override/src/Main.java
@@ -37,8 +37,8 @@
             ((Base)derived).overrideVirtualWithStatic();
         } catch (NoSuchMethodError nsme) {
             /* NSME is subclass of ICCE, so check it explicitly */
-            System.err.println("Got NSME - ovws");
-            nsme.printStackTrace(System.err);
+            System.out.println("Got NSME - ovws");
+            nsme.printStackTrace(System.out);
         } catch (IncompatibleClassChangeError icce) {
             System.out.println("Got expected exception - ovws");
         }
@@ -46,8 +46,8 @@
         try {
             ((Base)derived).overrideStaticWithVirtual();
         } catch (NoSuchMethodError nsme) {
-            System.err.println("Got NSME - oswv");
-            nsme.printStackTrace(System.err);
+            System.out.println("Got NSME - oswv");
+            nsme.printStackTrace(System.out);
         } catch (IncompatibleClassChangeError icce) {
             System.out.println("Got expected exception - oswv");
         }
diff --git a/test/079-phantom/src/Main.java b/test/079-phantom/src/Main.java
index c54bc0b..daead2e 100644
--- a/test/079-phantom/src/Main.java
+++ b/test/079-phantom/src/Main.java
@@ -21,7 +21,7 @@
         try {
             Thread.sleep(ms);
         } catch (InterruptedException ie) {
-            System.err.println("sleep interrupted");
+            System.out.println("sleep interrupted");
         }
     }
 
diff --git a/test/084-class-init/src/Main.java b/test/084-class-init/src/Main.java
index 28eb3e9..a60fbac 100644
--- a/test/084-class-init/src/Main.java
+++ b/test/084-class-init/src/Main.java
@@ -24,7 +24,7 @@
         // that is currently a resolution stub because it's running on behalf of <clinit>.
         try {
             throwDuringClinit();
-            System.err.println("didn't throw!");
+            System.out.println("didn't throw!");
         } catch (NullPointerException ex) {
             System.out.println("caught exception thrown during clinit");
         }
@@ -44,34 +44,34 @@
         try {
             Thread.sleep(msec);
         } catch (InterruptedException ie) {
-            System.err.println("sleep interrupted");
+            System.out.println("sleep interrupted");
         }
     }
 
     static void checkExceptions() {
         try {
             System.out.println(PartialInit.FIELD0);
-            System.err.println("Construction of PartialInit succeeded unexpectedly");
+            System.out.println("Construction of PartialInit succeeded unexpectedly");
         } catch (ExceptionInInitializerError eiie) {
             System.out.println("Got expected EIIE for FIELD0");
         }
 
         try {
             System.out.println(PartialInit.FIELD0);
-            System.err.println("Load of FIELD0 succeeded unexpectedly");
+            System.out.println("Load of FIELD0 succeeded unexpectedly");
         } catch (NoClassDefFoundError ncdfe) {
             System.out.println("Got expected NCDFE for FIELD0");
         }
         try {
             System.out.println(PartialInit.FIELD1);
-            System.err.println("Load of FIELD1 succeeded unexpectedly");
+            System.out.println("Load of FIELD1 succeeded unexpectedly");
         } catch (NoClassDefFoundError ncdfe) {
             System.out.println("Got expected NCDFE for FIELD1");
         }
 
         try {
             System.out.println(Exploder.FIELD);
-            System.err.println("Load of FIELD succeeded unexpectedly");
+            System.out.println("Load of FIELD succeeded unexpectedly");
         } catch (AssertionError expected) {
             System.out.println("Got expected '" + expected.getMessage() + "' from Exploder");
         }
@@ -92,7 +92,7 @@
             fieldThread.join();
             methodThread.join();
         } catch (InterruptedException ie) {
-            System.err.println(ie);
+            System.out.println(ie);
         }
 
         /* print all values */
diff --git a/test/086-null-super/src/Main.java b/test/086-null-super/src/Main.java
index 8bd1786..039a959 100644
--- a/test/086-null-super/src/Main.java
+++ b/test/086-null-super/src/Main.java
@@ -149,14 +149,14 @@
 
             loader = new BrokenDexLoader(ClassLoader.getSystemClassLoader());
             loader.findBrokenClass();
-            System.err.println("ERROR: Inaccessible was accessible");
+            System.out.println("ERROR: Inaccessible was accessible");
         } catch (InvocationTargetException ite) {
             Throwable cause = ite.getCause();
             if (cause instanceof NullPointerException) {
-                System.err.println("Got expected ITE/NPE");
+                System.out.println("Got expected ITE/NPE");
             } else {
-                System.err.println("Got unexpected ITE");
-                ite.printStackTrace();
+                System.out.println("Got unexpected ITE");
+                ite.printStackTrace(System.out);
             }
         }
     }
diff --git a/test/088-monitor-verification/src/Main.java b/test/088-monitor-verification/src/Main.java
index a6f0e64..bca3df6 100644
--- a/test/088-monitor-verification/src/Main.java
+++ b/test/088-monitor-verification/src/Main.java
@@ -41,7 +41,7 @@
         m.nestedMayThrow(false);
         try {
             m.nestedMayThrow(true);
-            System.err.println("nestedThrow(true) did not throw");
+            System.out.println("nestedThrow(true) did not throw");
         } catch (MyException me) {}
         System.out.println("nestedMayThrow ok");
 
diff --git a/test/092-locale/src/Main.java b/test/092-locale/src/Main.java
index 8916a29..60c0551 100644
--- a/test/092-locale/src/Main.java
+++ b/test/092-locale/src/Main.java
@@ -34,31 +34,31 @@
         try {
             testCalendar();
         } catch (Exception ex) {
-            ex.printStackTrace();
+            ex.printStackTrace(System.out);
         }
 
         try {
             testDateFormatSymbols();
         } catch (Exception ex) {
-            ex.printStackTrace();
+            ex.printStackTrace(System.out);
         }
 
         try {
             testCurrency();
         } catch (Exception ex) {
-            ex.printStackTrace();
+            ex.printStackTrace(System.out);
         }
 
         try {
             testNormalizer();
         } catch (Exception ex) {
-            ex.printStackTrace();
+            ex.printStackTrace(System.out);
         }
 
         try {
             testIso3();
         } catch (Exception ex) {
-            ex.printStackTrace();
+            ex.printStackTrace(System.out);
         }
     }
 
@@ -125,13 +125,13 @@
 
         res = Normalizer.normalize(composed, Normalizer.Form.NFD);
         if (!decomposed.equals(res)) {
-            System.err.println("Bad decompose: '" + composed + "' --> '"
+            System.out.println("Bad decompose: '" + composed + "' --> '"
                 + res + "'");
         }
 
         res = Normalizer.normalize(decomposed, Normalizer.Form.NFC);
         if (!composed.equals(res)) {
-            System.err.println("Bad compose: '" + decomposed + "' --> '"
+            System.out.println("Bad compose: '" + decomposed + "' --> '"
                 + res + "'");
         }
 
@@ -153,7 +153,7 @@
         try {
             System.out.println(" iso3=" + loc.getISO3Language());
         } catch (MissingResourceException mre) {
-            System.err.println("couldn't get iso3 language");
+            System.out.println("couldn't get iso3 language");
         }
     }
 }
diff --git a/test/095-switch-MAX_INT/src/Main.java b/test/095-switch-MAX_INT/src/Main.java
index d1171ea..a004a1a 100644
--- a/test/095-switch-MAX_INT/src/Main.java
+++ b/test/095-switch-MAX_INT/src/Main.java
@@ -2,7 +2,7 @@
   static public void main(String[] args) throws Exception {
     switch (0x7fffffff) {
     case 0x7fffffff:
-      System.err.println("good");
+      System.out.println("good");
       break;
     default:
       throw new AssertionError();
diff --git a/test/100-reflect2/src/Main.java b/test/100-reflect2/src/Main.java
index 91ba307..5f6ffa8 100644
--- a/test/100-reflect2/src/Main.java
+++ b/test/100-reflect2/src/Main.java
@@ -292,7 +292,7 @@
       // Expected.
     } catch (Exception e) {
       // Error.
-      e.printStackTrace();
+      e.printStackTrace(System.out);
     }
   }
 
@@ -304,7 +304,7 @@
       cons.newInstance();
     } catch (Exception e) {
       // Error.
-      e.printStackTrace();
+      e.printStackTrace(System.out);
     }
   }
 
diff --git a/test/101-fibonacci/src/Main.java b/test/101-fibonacci/src/Main.java
index c594edb..9c57ba7 100644
--- a/test/101-fibonacci/src/Main.java
+++ b/test/101-fibonacci/src/Main.java
@@ -51,7 +51,7 @@
             y = fibonacci(x + 1);
             System.out.printf("fibonacci(%d)=%d\n", x + 1, y);
         } catch (NumberFormatException ex) {
-            System.err.println(ex);
+            System.out.println(ex);
             System.exit(1);
         }
     }
diff --git a/test/109-suspend-check/src/Main.java b/test/109-suspend-check/src/Main.java
index 3c3353b..e140a59 100644
--- a/test/109-suspend-check/src/Main.java
+++ b/test/109-suspend-check/src/Main.java
@@ -55,7 +55,7 @@
         try {
             Thread.sleep(ms);
         } catch (InterruptedException ie) {
-            System.err.println("sleep was interrupted");
+            System.out.println("sleep was interrupted");
         }
     }
 }
diff --git a/test/114-ParallelGC/src/Main.java b/test/114-ParallelGC/src/Main.java
index 159dd5c..2199872 100644
--- a/test/114-ParallelGC/src/Main.java
+++ b/test/114-ParallelGC/src/Main.java
@@ -82,7 +82,7 @@
             // Any exception or error getting here is bad.
             try {
                 // May need allocations...
-                t.printStackTrace(System.err);
+                t.printStackTrace(System.out);
             } catch (Throwable tInner) {
             }
             System.exit(1);
diff --git a/test/115-native-bridge/check b/test/115-native-bridge/check
new file mode 100755
index 0000000..1ecf334
--- /dev/null
+++ b/test/115-native-bridge/check
@@ -0,0 +1,20 @@
+#!/bin/bash
+#
+# Copyright (C) 2017 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# ASAN prints a warning here.
+
+sed -e '/WARNING: ASan is ignoring requested __asan_handle_no_return/,+2d' "$2" | \
+    diff --strip-trailing-cr -q "$1" - >/dev/null
diff --git a/test/120-hashcode/src/Main.java b/test/120-hashcode/src/Main.java
index d2435ce..0955f50 100644
--- a/test/120-hashcode/src/Main.java
+++ b/test/120-hashcode/src/Main.java
@@ -30,7 +30,7 @@
         // Make sure that all the hashes agree.
         if (hashOrig != hashInflated || hashOrig != hashSystemOrig ||
             hashSystemOrig != hashSystemInflated) {
-            System.err.println("hash codes dont match: " + hashOrig + " " + hashInflated + " " +
+            System.out.println("hash codes dont match: " + hashOrig + " " + hashInflated + " " +
             hashSystemOrig + " " + hashSystemInflated);
         }
         System.out.println("Done.");
diff --git a/test/130-hprof/src/Main.java b/test/130-hprof/src/Main.java
index 5899dd1..a8597f1 100644
--- a/test/130-hprof/src/Main.java
+++ b/test/130-hprof/src/Main.java
@@ -140,7 +140,7 @@
             allocator.join();
             dumper.join();
         } catch (InterruptedException e) {
-            System.err.println("join interrupted");
+            System.out.println("join interrupted");
         }
     }
 
@@ -178,7 +178,7 @@
         try {
             Thread.sleep(ms);
         } catch (InterruptedException e) {
-            System.err.println("sleep interrupted");
+            System.out.println("sleep interrupted");
         }
     }
 
@@ -223,7 +223,7 @@
         try {
             meth = vmdClass.getMethod("dumpHprofData", String.class);
         } catch (NoSuchMethodException nsme) {
-            System.err.println("Found VMDebug but not dumpHprofData method");
+            System.out.println("Found VMDebug but not dumpHprofData method");
             return null;
         }
 
diff --git a/test/1337-gc-coverage/gc_coverage.cc b/test/1337-gc-coverage/gc_coverage.cc
index 1cb2fb0..ac959f6 100644
--- a/test/1337-gc-coverage/gc_coverage.cc
+++ b/test/1337-gc-coverage/gc_coverage.cc
@@ -18,7 +18,7 @@
 #include "jni.h"
 #include "runtime.h"
 #include "scoped_thread_state_change-inl.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 
 namespace art {
 namespace {
diff --git a/test/135-MirandaDispatch/src/Main.java b/test/135-MirandaDispatch/src/Main.java
index ada8cef..ab2a90b 100644
--- a/test/135-MirandaDispatch/src/Main.java
+++ b/test/135-MirandaDispatch/src/Main.java
@@ -53,7 +53,7 @@
         } catch (VerifyError expected) {
             System.out.println("b/21646347");
         } catch (Throwable t) {
-            t.printStackTrace();
+            t.printStackTrace(System.out);
         }
         System.out.println("Finishing");
     }
diff --git a/test/136-daemon-jni-shutdown/daemon_jni_shutdown.cc b/test/136-daemon-jni-shutdown/daemon_jni_shutdown.cc
index b729301..7d40f57 100644
--- a/test/136-daemon-jni-shutdown/daemon_jni_shutdown.cc
+++ b/test/136-daemon-jni-shutdown/daemon_jni_shutdown.cc
@@ -21,7 +21,7 @@
 #include "base/macros.h"
 #include "java_vm_ext.h"
 #include "jni_env_ext.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 
 namespace art {
 namespace {
diff --git a/test/138-duplicate-classes-check/src/Main.java b/test/138-duplicate-classes-check/src/Main.java
index 5ffceb9..b32f0bc 100644
--- a/test/138-duplicate-classes-check/src/Main.java
+++ b/test/138-duplicate-classes-check/src/Main.java
@@ -42,7 +42,7 @@
             Method test = testEx.getDeclaredMethod("test");
             test.invoke(null);
         } catch (Exception exc) {
-            exc.printStackTrace();
+            exc.printStackTrace(System.out);
         }
     }
 }
diff --git a/test/138-duplicate-classes-check2/src/Main.java b/test/138-duplicate-classes-check2/src/Main.java
index a0d6977..faf8b5d 100644
--- a/test/138-duplicate-classes-check2/src/Main.java
+++ b/test/138-duplicate-classes-check2/src/Main.java
@@ -37,7 +37,7 @@
             Method test = testEx.getDeclaredMethod("test");
             test.invoke(null);
         } catch (Exception exc) {
-            exc.printStackTrace();
+            exc.printStackTrace(System.out);
         }
     }
 }
diff --git a/test/141-class-unload/jni_unload.cc b/test/141-class-unload/jni_unload.cc
index 9b7e171..355457d 100644
--- a/test/141-class-unload/jni_unload.cc
+++ b/test/141-class-unload/jni_unload.cc
@@ -20,7 +20,7 @@
 
 #include "jit/jit.h"
 #include "runtime.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 
 namespace art {
 namespace {
diff --git a/test/141-class-unload/src/Main.java b/test/141-class-unload/src/Main.java
index 7e8431f..9072c8b 100644
--- a/test/141-class-unload/src/Main.java
+++ b/test/141-class-unload/src/Main.java
@@ -50,7 +50,7 @@
             // Test that objects keep class loader live for sticky GC.
             testStickyUnload(constructor);
         } catch (Exception e) {
-            e.printStackTrace();
+            e.printStackTrace(System.out);
         }
     }
 
diff --git a/test/142-classloader2/src/Main.java b/test/142-classloader2/src/Main.java
index a0c7764..193fd5d 100644
--- a/test/142-classloader2/src/Main.java
+++ b/test/142-classloader2/src/Main.java
@@ -91,7 +91,7 @@
           if (e.getCause() instanceof VerifyError) {
             System.out.println("Caught wrapped VerifyError.");
           } else {
-            e.printStackTrace();
+            e.printStackTrace(System.out);
           }
         }
 
diff --git a/test/146-bad-interface/src/Main.java b/test/146-bad-interface/src/Main.java
index 5534bb4..958ec7c 100644
--- a/test/146-bad-interface/src/Main.java
+++ b/test/146-bad-interface/src/Main.java
@@ -37,7 +37,7 @@
     } catch (Throwable t) {
       System.out.println("Error occurred");
       System.out.println(t);
-      t.printStackTrace();
+      t.printStackTrace(System.out);
     }
   }
 }
diff --git a/test/148-multithread-gc-annotations/gc_coverage.cc b/test/148-multithread-gc-annotations/gc_coverage.cc
index 4862b87..f48493c 100644
--- a/test/148-multithread-gc-annotations/gc_coverage.cc
+++ b/test/148-multithread-gc-annotations/gc_coverage.cc
@@ -18,7 +18,7 @@
 #include "jni.h"
 #include "runtime.h"
 #include "scoped_thread_state_change-inl.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 
 namespace art {
 namespace {
diff --git a/test/155-java-set-resolved-type/src/Main.java b/test/155-java-set-resolved-type/src/Main.java
index 8f79bd7..44278a1 100644
--- a/test/155-java-set-resolved-type/src/Main.java
+++ b/test/155-java-set-resolved-type/src/Main.java
@@ -61,7 +61,7 @@
             // to be resolved and found through simple lookup.
             timpl.newInstance();
         } catch (Throwable t) {
-            t.printStackTrace();
+            t.printStackTrace(System.out);
         }
     }
 
diff --git a/test/156-register-dex-file-multi-loader/src/Main.java b/test/156-register-dex-file-multi-loader/src/Main.java
index ff5a2bd..6aa1d78 100644
--- a/test/156-register-dex-file-multi-loader/src/Main.java
+++ b/test/156-register-dex-file-multi-loader/src/Main.java
@@ -81,7 +81,7 @@
                      !message.endsWith(" with multiple class loaders");
       }
       if (unexpected) {
-        cnfe.getCause().printStackTrace();
+        cnfe.getCause().printStackTrace(System.out);
       }
     }
   }
diff --git a/test/158-app-image-class-table/src/Main.java b/test/158-app-image-class-table/src/Main.java
index 804468f..97aa14d 100644
--- a/test/158-app-image-class-table/src/Main.java
+++ b/test/158-app-image-class-table/src/Main.java
@@ -39,7 +39,7 @@
             // to be resolved and found through simple lookup.
             timpl.newInstance();
         } catch (Throwable t) {
-            t.printStackTrace();
+            t.printStackTrace(System.out);
         }
     }
 
diff --git a/test/159-app-image-fields/src/Main.java b/test/159-app-image-fields/src/Main.java
index d06a502..47d0116 100644
--- a/test/159-app-image-fields/src/Main.java
+++ b/test/159-app-image-fields/src/Main.java
@@ -57,7 +57,7 @@
                 System.out.println("another_value: " + another_value);
             }
         } catch (Throwable t) {
-            t.printStackTrace();
+            t.printStackTrace(System.out);
         }
     }
 
diff --git a/test/301-abstract-protected/src/Main.java b/test/301-abstract-protected/src/Main.java
index 9b19a9d..f120267 100644
--- a/test/301-abstract-protected/src/Main.java
+++ b/test/301-abstract-protected/src/Main.java
@@ -16,7 +16,7 @@
 
 public class Main {
   public static void main(String args[]) throws Exception {
-    System.err.println(new C().m());
+    System.out.println(new C().m());
   }
 }
 
diff --git a/test/487-checker-inline-calls/src/Main.java b/test/487-checker-inline-calls/src/Main.java
index 70384d5..00694f3 100644
--- a/test/487-checker-inline-calls/src/Main.java
+++ b/test/487-checker-inline-calls/src/Main.java
@@ -20,7 +20,7 @@
     try {
       doTopCall();
     } catch (Error e) {
-      e.printStackTrace();
+      e.printStackTrace(System.out);
     }
   }
 
diff --git a/test/488-checker-inline-recursive-calls/src/Main.java b/test/488-checker-inline-recursive-calls/src/Main.java
index 441dbbf..1137837 100644
--- a/test/488-checker-inline-recursive-calls/src/Main.java
+++ b/test/488-checker-inline-recursive-calls/src/Main.java
@@ -20,7 +20,7 @@
     try {
       doTopCall(true);
     } catch (Error e) {
-      e.printStackTrace();
+      e.printStackTrace(System.out);
     }
   }
 
diff --git a/test/492-checker-inline-invoke-interface/src/Main.java b/test/492-checker-inline-invoke-interface/src/Main.java
index a919690..785c0db 100644
--- a/test/492-checker-inline-invoke-interface/src/Main.java
+++ b/test/492-checker-inline-invoke-interface/src/Main.java
@@ -21,7 +21,7 @@
 class ForceStatic {
   static {
     System.out.println("Hello from clinit");
-    new Exception().printStackTrace();
+    new Exception().printStackTrace(System.out);
   }
   static int field;
 }
diff --git a/test/493-checker-inline-invoke-interface/src/Main.java b/test/493-checker-inline-invoke-interface/src/Main.java
index 171405c..0570b20 100644
--- a/test/493-checker-inline-invoke-interface/src/Main.java
+++ b/test/493-checker-inline-invoke-interface/src/Main.java
@@ -21,7 +21,7 @@
 class ForceStatic {
   static {
     System.out.println("Hello from clinit");
-    new Exception().printStackTrace();
+    new Exception().printStackTrace(System.out);
   }
   static int field;
 }
diff --git a/test/497-inlining-and-class-loader/src/Main.java b/test/497-inlining-and-class-loader/src/Main.java
index 1e27e77..01b4bcd 100644
--- a/test/497-inlining-and-class-loader/src/Main.java
+++ b/test/497-inlining-and-class-loader/src/Main.java
@@ -121,7 +121,7 @@
     // Because we cleared dex cache entries, we will have to find
     // classes again, which require to use the correct class loader
     // in the presence of inlining.
-    new Exception().printStackTrace();
+    new Exception().printStackTrace(System.out);
   }
   static Object savedResolvedMethods;
 
diff --git a/test/522-checker-regression-monitor-exit/src/Main.java b/test/522-checker-regression-monitor-exit/src/Main.java
index c4f80fc..5c26f36 100644
--- a/test/522-checker-regression-monitor-exit/src/Main.java
+++ b/test/522-checker-regression-monitor-exit/src/Main.java
@@ -43,8 +43,8 @@
         Method m = c.getMethod("synchronizedHashCode", Object.class);
         result = (Integer) m.invoke(null, m_obj);
       } catch (Exception e) {
-        System.err.println("Hash code query exception");
-        e.printStackTrace();
+        System.out.println("Hash code query exception");
+        e.printStackTrace(System.out);
         result = -1;
       }
       return result;
@@ -77,7 +77,7 @@
       }
       pool.shutdown();
     } catch (CancellationException ex) {
-      System.err.println("Job timeout");
+      System.out.println("Job timeout");
       System.exit(1);
     }
   }
diff --git a/test/552-checker-sharpening/src/Main.java b/test/552-checker-sharpening/src/Main.java
index e1795de..4bb179e 100644
--- a/test/552-checker-sharpening/src/Main.java
+++ b/test/552-checker-sharpening/src/Main.java
@@ -212,7 +212,7 @@
   }
 
   /// CHECK-START: java.lang.String Main.$noinline$getBootImageString() sharpening (before)
-  /// CHECK:                LoadString load_kind:DexCacheViaMethod
+  /// CHECK:                LoadString load_kind:RuntimeCall
 
   /// CHECK-START-X86: java.lang.String Main.$noinline$getBootImageString() sharpening (after)
   // Note: load kind depends on PIC/non-PIC
@@ -246,7 +246,7 @@
   }
 
   /// CHECK-START: java.lang.String Main.$noinline$getNonBootImageString() sharpening (before)
-  /// CHECK:                LoadString load_kind:DexCacheViaMethod
+  /// CHECK:                LoadString load_kind:RuntimeCall
 
   /// CHECK-START-X86: java.lang.String Main.$noinline$getNonBootImageString() sharpening (after)
   /// CHECK:                LoadString load_kind:BssEntry
diff --git a/test/570-checker-osr/osr.cc b/test/570-checker-osr/osr.cc
index 8eca6b2..45ead6b 100644
--- a/test/570-checker-osr/osr.cc
+++ b/test/570-checker-osr/osr.cc
@@ -21,6 +21,7 @@
 #include "oat_quick_method_header.h"
 #include "scoped_thread_state_change-inl.h"
 #include "ScopedUtfChars.h"
+#include "stack.h"
 #include "stack_map.h"
 
 namespace art {
diff --git a/test/570-checker-osr/src/DeoptimizationController.java b/test/570-checker-osr/src/DeoptimizationController.java
index 907d133..e272607 100644
--- a/test/570-checker-osr/src/DeoptimizationController.java
+++ b/test/570-checker-osr/src/DeoptimizationController.java
@@ -53,7 +53,7 @@
         throw new IllegalStateException("Not tracing.");
       }
     } catch (Exception exc) {
-      exc.printStackTrace(System.err);
+      exc.printStackTrace(System.out);
     } finally {
       if (tempFile != null) {
         tempFile.delete();
@@ -68,7 +68,7 @@
         throw new IllegalStateException("Still tracing.");
       }
     } catch (Exception exc) {
-      exc.printStackTrace(System.err);
+      exc.printStackTrace(System.out);
     }
   }
 
diff --git a/test/595-profile-saving/profile-saving.cc b/test/595-profile-saving/profile-saving.cc
index 0f8dd57..019ddad 100644
--- a/test/595-profile-saving/profile-saving.cc
+++ b/test/595-profile-saving/profile-saving.cc
@@ -26,6 +26,7 @@
 #include "oat_file_manager.h"
 #include "scoped_thread_state_change-inl.h"
 #include "ScopedUtfChars.h"
+#include "stack.h"
 #include "thread.h"
 
 namespace art {
diff --git a/test/596-app-images/app_images.cc b/test/596-app-images/app_images.cc
index fa9c902..42211f7 100644
--- a/test/596-app-images/app_images.cc
+++ b/test/596-app-images/app_images.cc
@@ -63,12 +63,6 @@
   return JNI_FALSE;
 }
 
-extern "C" JNIEXPORT jboolean JNICALL Java_Main_checkInitialized(JNIEnv*, jclass, jclass c) {
-  ScopedObjectAccess soa(Thread::Current());
-  ObjPtr<mirror::Class> klass_ptr = soa.Decode<mirror::Class>(c);
-  return klass_ptr->IsInitialized();
-}
-
 }  // namespace
 
 }  // namespace art
diff --git a/test/596-app-images/src/Main.java b/test/596-app-images/src/Main.java
index 674ba4d..75b31b8 100644
--- a/test/596-app-images/src/Main.java
+++ b/test/596-app-images/src/Main.java
@@ -14,17 +14,9 @@
  * limitations under the License.
  */
 
-import java.lang.reflect.Field;
-import java.lang.reflect.Constructor;
-import java.lang.reflect.Method;
-
 class Main {
   static class Inner {
-    final public static int abc = 10;
-  }
-
-  static class Nested {
-
+    public static int abc = 0;
   }
 
   public static void main(String[] args) {
@@ -34,122 +26,8 @@
     } else if (!checkAppImageContains(Inner.class)) {
       System.out.println("App image does not contain Inner!");
     }
-
-    if (!checkInitialized(Inner.class))
-      System.out.println("Inner class is not initialized!");
-
-    if (!checkInitialized(Nested.class))
-      System.out.println("Nested class is not initialized!");
-
-    if (!checkInitialized(StaticFields.class))
-      System.out.println("StaticFields class is not initialized!");
-
-    if (!checkInitialized(StaticFieldsInitSub.class))
-      System.out.println("StaticFieldsInitSub class is not initialized!");
-
-    if (!checkInitialized(StaticFieldsInit.class))
-      System.out.println("StaticFieldsInit class is not initialized!");
-
-    if (!checkInitialized(StaticInternString.class))
-      System.out.println("StaticInternString class is not initialized!");
-
-    StringBuffer sb = new StringBuffer();
-    sb.append("java.");
-    sb.append("abc.");
-    sb.append("Action");
-
-    String tmp = sb.toString();
-    String intern = tmp.intern();
-
-    assertNotEqual(tmp, intern, "Dynamically constructed String, not interned.");
-    assertEqual(intern, StaticInternString.intent, "Static encoded literal String not interned.");
-    assertEqual(BootInternedString.boot, BootInternedString.boot.intern(),
-        "Static encoded literal String not moved back to runtime intern table.");
-
-    try {
-      Field f = StaticInternString.class.getDeclaredField("intent");
-      assertEqual(intern, f.get(null), "String Literals are not interned properly.");
-
-    } catch (Exception e) {
-      System.out.println("Exception");
-    }
-
-    assertEqual(StaticInternString.getIntent(), StaticInternString2.getIntent(),
-        "String Literals are not intenred properly, App image static strings duplicated.");
-
-    // reload the class StaticInternString, check whether static strings interned properly
-    final String DEX_FILE = System.getenv("DEX_LOCATION") + "/596-app-images.jar";
-    final String LIBRARY_SEARCH_PATH = System.getProperty("java.library.path");
-
-    try {
-      Class<?> pathClassLoader = Class.forName("dalvik.system.PathClassLoader");
-      if (pathClassLoader == null) {
-        throw new AssertionError("Counldn't find path class loader class");
-      }
-      Constructor<?> ctor =
-          pathClassLoader.getDeclaredConstructor(String.class, String.class, ClassLoader.class);
-      ClassLoader loader = (ClassLoader) ctor.newInstance(
-          DEX_FILE, LIBRARY_SEARCH_PATH, null);
-
-      Class<?> staticInternString = loader.loadClass("StaticInternString");
-
-      if (!checkAppImageContains(staticInternString)) {
-        System.out.println("Not loaded again.");
-      }
-      Method getIntent = staticInternString.getDeclaredMethod("getIntent");
-
-      assertEqual(StaticInternString.getIntent(), getIntent.invoke(staticInternString),
-          "Dynamically loaded app image's literal strings not interned properly.");
-    } catch (Exception e) {
-      e.printStackTrace();
-    }
-
   }
 
   public static native boolean checkAppImageLoaded();
   public static native boolean checkAppImageContains(Class<?> klass);
-  public static native boolean checkInitialized(Class<?> klass);
-
-  public static void assertEqual(Object a, Object b, String msg) {
-    if (a != b)
-      System.out.println(msg);
-  }
-
-  public static void assertNotEqual(Object a, Object b, String msg) {
-    if (a == b)
-      System.out.println(msg);
-  }
-
 }
-
-class StaticFields{
-  public static int abc;
-}
-
-class StaticFieldsInitSub extends StaticFieldsInit {
-  final public static int def = 10;
-}
-
-class StaticFieldsInit{
-  final public static int abc = 10;
-}
-
-class StaticInternString {
-  final public static String intent = "java.abc.Action";
-  static public String getIntent() {
-    return intent;
-  }
-}
-
-class BootInternedString {
-  final public static String boot = "double";
-}
-
-class StaticInternString2 {
-  final public static String intent = "java.abc.Action";
-
-  static String getIntent() {
-    return intent;
-  }
-}
-
diff --git a/test/596-monitor-inflation/monitor_inflation.cc b/test/596-monitor-inflation/monitor_inflation.cc
index fb4275b..07d1ddb 100644
--- a/test/596-monitor-inflation/monitor_inflation.cc
+++ b/test/596-monitor-inflation/monitor_inflation.cc
@@ -18,7 +18,7 @@
 #include "jni.h"
 #include "monitor.h"
 #include "runtime.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 
 namespace art {
 namespace {
diff --git a/test/602-deoptimizeable/src/Main.java b/test/602-deoptimizeable/src/Main.java
index 743a579..d995923 100644
--- a/test/602-deoptimizeable/src/Main.java
+++ b/test/602-deoptimizeable/src/Main.java
@@ -99,7 +99,7 @@
                         System.exit(0);
                     }
                 } catch (Exception e) {
-                    e.printStackTrace();
+                    e.printStackTrace(System.out);
                 }
             }
         });
@@ -127,7 +127,7 @@
                     map.put(new DummyObject(10), Long.valueOf(100));
                     assertIsInterpreted();  // Every deoptimizeable method is deoptimized.
                 } catch (Exception e) {
-                    e.printStackTrace();
+                    e.printStackTrace(System.out);
                 }
             }
         });
diff --git a/test/617-clinit-oome/src/Main.java b/test/617-clinit-oome/src/Main.java
index 749a232..94cb7ce 100644
--- a/test/617-clinit-oome/src/Main.java
+++ b/test/617-clinit-oome/src/Main.java
@@ -37,7 +37,7 @@
         Other.print();
     } catch (OutOfMemoryError e) {
     } catch (Exception e) {
-        System.err.println(e);
+        System.out.println(e);
     }
   }
 }
diff --git a/test/623-checker-loop-regressions/src/Main.java b/test/623-checker-loop-regressions/src/Main.java
index 3a2145bf..af205b0 100644
--- a/test/623-checker-loop-regressions/src/Main.java
+++ b/test/623-checker-loop-regressions/src/Main.java
@@ -291,6 +291,9 @@
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   //
   // NOTE: should correctly deal with compressed and uncompressed cases.
+  //
+  /// CHECK-START-MIPS64: void Main.string2Bytes(char[], java.lang.String) loop_optimization (after)
+  /// CHECK-NOT: VecLoad
   private static void string2Bytes(char[] a, String b) {
     int min = Math.min(a.length, b.length());
     for (int i = 0; i < min; i++) {
@@ -333,6 +336,13 @@
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Repl>>] loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Repl>>] loop:<<Loop>>      outer_loop:none
   //
+  /// CHECK-START-MIPS64: void Main.oneBoth(short[], char[]) loop_optimization (after)
+  /// CHECK-DAG: <<One:i\d+>>  IntConstant 1                        loop:none
+  /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<One>>]         loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Repl>>] loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Repl>>] loop:<<Loop>>      outer_loop:none
+  //
   // Bug b/37764324: integral same-length packed types can be mixed freely.
   private static void oneBoth(short[] a, char[] b) {
     for (int i = 0; i < Math.min(a.length, b.length); i++) {
@@ -372,6 +382,19 @@
   /// CHECK-DAG: <<Cnv:b\d+>>  TypeConversion [<<Add>>]              loop:<<Loop2>>      outer_loop:none
   /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi2>>,<<Cnv>>]  loop:<<Loop2>>      outer_loop:none
   //
+  /// CHECK-START-MIPS64: void Main.typeConv(byte[], byte[]) loop_optimization (after)
+  /// CHECK-DAG: <<One:i\d+>>  IntConstant 1                         loop:none
+  /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<One>>]          loop:none
+  /// CHECK-DAG: <<Phi1:i\d+>> Phi                                   loop:<<Loop1:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Load:d\d+>> VecLoad [{{l\d+}},<<Phi1>>]           loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG: <<Vadd:d\d+>> VecAdd [<<Load>>,<<Repl>>]            loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi1>>,<<Vadd>>] loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG: <<Phi2:i\d+>> Phi                                   loop:<<Loop2:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get:b\d+>>  ArrayGet [{{l\d+}},<<Phi2>>]          loop:<<Loop2>>      outer_loop:none
+  /// CHECK-DAG: <<Add:i\d+>>  Add [<<Get>>,<<One>>]                 loop:<<Loop2>>      outer_loop:none
+  /// CHECK-DAG: <<Cnv:b\d+>>  TypeConversion [<<Add>>]              loop:<<Loop2>>      outer_loop:none
+  /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi2>>,<<Cnv>>]  loop:<<Loop2>>      outer_loop:none
+  //
   // Scalar code in cleanup loop uses correct byte type on array get and type conversion.
   private static void typeConv(byte[] a, byte[] b) {
     int len = Math.min(a.length, b.length);
diff --git a/test/626-const-class-linking/src/RacyMisbehavingHelper.java b/test/626-const-class-linking/src/RacyMisbehavingHelper.java
index 4525278..9acd3c3 100644
--- a/test/626-const-class-linking/src/RacyMisbehavingHelper.java
+++ b/test/626-const-class-linking/src/RacyMisbehavingHelper.java
@@ -26,7 +26,7 @@
             Method reportAfterLoading = loader.getClass().getDeclaredMethod("reportAfterLoading");
             reportAfterLoading.invoke(loader);
         } catch (Throwable t) {
-            t.printStackTrace();
+            t.printStackTrace(System.out);
         }
         return new ClassPair(helper1_class, test_class);
     }
diff --git a/test/638-no-line-number/src/Main.java b/test/638-no-line-number/src/Main.java
index 7fe0404..851f049 100644
--- a/test/638-no-line-number/src/Main.java
+++ b/test/638-no-line-number/src/Main.java
@@ -19,12 +19,12 @@
     try {
       doThrow(new Error());
     } catch (Error e) {
-      e.printStackTrace();
+      e.printStackTrace(System.out);
     }
     try {
       doThrow(null);
     } catch (Throwable t) {
-      t.printStackTrace();
+      t.printStackTrace(System.out);
     }
   }
 
diff --git a/test/640-checker-boolean-simd/src/Main.java b/test/640-checker-boolean-simd/src/Main.java
index f8239fa..64b76f8 100644
--- a/test/640-checker-boolean-simd/src/Main.java
+++ b/test/640-checker-boolean-simd/src/Main.java
@@ -35,6 +35,12 @@
   /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecAnd   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.and(boolean) loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecAnd   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void and(boolean x) {
     for (int i = 0; i < 128; i++)
       a[i] &= x;  // NOTE: bitwise and, not the common &&
@@ -50,6 +56,12 @@
   /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecOr    loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.or(boolean) loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecOr    loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void or(boolean x) {
     for (int i = 0; i < 128; i++)
       a[i] |= x;  // NOTE: bitwise or, not the common ||
@@ -65,6 +77,12 @@
   /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecXor   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.xor(boolean) loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecXor   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void xor(boolean x) {
     for (int i = 0; i < 128; i++)
       a[i] ^= x;  // NOTE: bitwise xor
@@ -80,6 +98,12 @@
   /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecNot   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.not() loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecNot   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void not() {
     for (int i = 0; i < 128; i++)
       a[i] = !a[i];
diff --git a/test/640-checker-byte-simd/src/Main.java b/test/640-checker-byte-simd/src/Main.java
index 21d71e8..283c2c9 100644
--- a/test/640-checker-byte-simd/src/Main.java
+++ b/test/640-checker-byte-simd/src/Main.java
@@ -35,6 +35,12 @@
   /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecAdd   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.add(int) loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecAdd   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void add(int x) {
     for (int i = 0; i < 128; i++)
       a[i] += x;
@@ -50,6 +56,12 @@
   /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecSub   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.sub(int) loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecSub   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void sub(int x) {
     for (int i = 0; i < 128; i++)
       a[i] -= x;
@@ -65,6 +77,12 @@
   /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecMul   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.mul(int) loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecMul   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void mul(int x) {
     for (int i = 0; i < 128; i++)
       a[i] *= x;
@@ -94,6 +112,12 @@
   /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecNeg   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.neg() loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecNeg   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void neg() {
     for (int i = 0; i < 128; i++)
       a[i] = (byte) -a[i];
@@ -109,6 +133,12 @@
   /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecNot   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.not() loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecNot   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void not() {
     for (int i = 0; i < 128; i++)
       a[i] = (byte) ~a[i];
@@ -124,6 +154,12 @@
   /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecShl   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.shl4() loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecShl   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void shl4() {
     for (int i = 0; i < 128; i++)
       a[i] <<= 4;
@@ -139,6 +175,12 @@
   /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecShr   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.sar2() loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecShr   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void sar2() {
     for (int i = 0; i < 128; i++)
       a[i] >>= 2;
diff --git a/test/640-checker-char-simd/src/Main.java b/test/640-checker-char-simd/src/Main.java
index 89d4b6b..dd879b4 100644
--- a/test/640-checker-char-simd/src/Main.java
+++ b/test/640-checker-char-simd/src/Main.java
@@ -35,6 +35,12 @@
   /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecAdd   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.add(int) loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecAdd   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void add(int x) {
     for (int i = 0; i < 128; i++)
       a[i] += x;
@@ -50,6 +56,12 @@
   /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecSub   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.sub(int) loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecSub   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void sub(int x) {
     for (int i = 0; i < 128; i++)
       a[i] -= x;
@@ -65,6 +77,12 @@
   /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecMul   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.mul(int) loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecMul   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void mul(int x) {
     for (int i = 0; i < 128; i++)
       a[i] *= x;
@@ -94,6 +112,12 @@
   /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecNeg   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.neg() loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecNeg   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void neg() {
     for (int i = 0; i < 128; i++)
       a[i] = (char) -a[i];
@@ -109,6 +133,12 @@
   /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecNot   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.not() loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecNot   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void not() {
     for (int i = 0; i < 128; i++)
       a[i] = (char) ~a[i];
@@ -124,6 +154,12 @@
   /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecShl   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.shl4() loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecShl   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void shl4() {
     for (int i = 0; i < 128; i++)
       a[i] <<= 4;
@@ -152,6 +188,12 @@
   /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecUShr  loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.shr2() loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecUShr  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void shr2() {
     for (int i = 0; i < 128; i++)
       a[i] >>>= 2;
diff --git a/test/640-checker-double-simd/src/Main.java b/test/640-checker-double-simd/src/Main.java
index 5709b5d..f7492d5 100644
--- a/test/640-checker-double-simd/src/Main.java
+++ b/test/640-checker-double-simd/src/Main.java
@@ -36,6 +36,12 @@
   /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecAdd   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.add(double) loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecAdd   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void add(double x) {
     for (int i = 0; i < 128; i++)
       a[i] += x;
@@ -51,6 +57,12 @@
   /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecSub   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.sub(double) loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecSub   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void sub(double x) {
     for (int i = 0; i < 128; i++)
       a[i] -= x;
@@ -66,6 +78,12 @@
   /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecMul   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.mul(double) loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecMul   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void mul(double x) {
     for (int i = 0; i < 128; i++)
       a[i] *= x;
@@ -81,6 +99,12 @@
   /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecDiv   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.div(double) loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecDiv   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void div(double x) {
     for (int i = 0; i < 128; i++)
       a[i] /= x;
@@ -96,6 +120,12 @@
   /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecNeg   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.neg() loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecNeg   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void neg() {
     for (int i = 0; i < 128; i++)
       a[i] = -a[i];
@@ -111,6 +141,12 @@
   /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecAbs   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.abs() loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecAbs   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void abs() {
     for (int i = 0; i < 128; i++)
       a[i] = Math.abs(a[i]);
@@ -125,6 +161,10 @@
   /// CHECK-NOT: VecLoad
   /// CHECK-NOT: VecStore
   //
+  /// CHECK-START-MIPS64: void Main.conv(long[]) loop_optimization (after)
+  /// CHECK-NOT: VecLoad
+  /// CHECK-NOT: VecStore
+  //
   // TODO: fill in when long2double is supported
   static void conv(long[] b) {
     for (int i = 0; i < 128; i++)
diff --git a/test/640-checker-float-simd/src/Main.java b/test/640-checker-float-simd/src/Main.java
index 4bcb7e2..4fe9675 100644
--- a/test/640-checker-float-simd/src/Main.java
+++ b/test/640-checker-float-simd/src/Main.java
@@ -36,6 +36,12 @@
   /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecAdd   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.add(float) loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecAdd   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void add(float x) {
     for (int i = 0; i < 128; i++)
       a[i] += x;
@@ -51,6 +57,12 @@
   /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecSub   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.sub(float) loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecSub   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void sub(float x) {
     for (int i = 0; i < 128; i++)
       a[i] -= x;
@@ -66,6 +78,12 @@
   /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecMul   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.mul(float) loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecMul   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void mul(float x) {
     for (int i = 0; i < 128; i++)
       a[i] *= x;
@@ -81,6 +99,12 @@
   /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecDiv   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.div(float) loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecDiv   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void div(float x) {
     for (int i = 0; i < 128; i++)
       a[i] /= x;
@@ -96,6 +120,12 @@
   /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecNeg   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.neg() loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecNeg   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void neg() {
     for (int i = 0; i < 128; i++)
       a[i] = -a[i];
@@ -106,6 +136,12 @@
   /// CHECK-DAG: ArrayGet loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   //
+  /// CHECK-START-MIPS64: void Main.abs() loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecAbs   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  //
   /// CHECK-START-ARM64: void Main.abs() loop_optimization (after)
   /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
@@ -126,6 +162,12 @@
   /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecCnv   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.conv(int[]) loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecCnv   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void conv(int[] b) {
     for (int i = 0; i < 128; i++)
       a[i] = b[i];
diff --git a/test/640-checker-int-simd/src/Main.java b/test/640-checker-int-simd/src/Main.java
index 9ee553c..9abf60d 100644
--- a/test/640-checker-int-simd/src/Main.java
+++ b/test/640-checker-int-simd/src/Main.java
@@ -35,6 +35,12 @@
   /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecAdd   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.add(int) loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecAdd   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void add(int x) {
     for (int i = 0; i < 128; i++)
       a[i] += x;
@@ -50,6 +56,12 @@
   /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecSub   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.sub(int) loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecSub   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void sub(int x) {
     for (int i = 0; i < 128; i++)
       a[i] -= x;
@@ -65,6 +77,12 @@
   /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecMul   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.mul(int) loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecMul   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void mul(int x) {
     for (int i = 0; i < 128; i++)
       a[i] *= x;
@@ -95,6 +113,12 @@
   /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecNeg   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.neg() loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecNeg   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void neg() {
     for (int i = 0; i < 128; i++)
       a[i] = -a[i];
@@ -110,6 +134,12 @@
   /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecNot   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+   //
+  /// CHECK-START-MIPS64: void Main.not() loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecNot   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void not() {
     for (int i = 0; i < 128; i++)
       a[i] = ~a[i];
@@ -125,6 +155,12 @@
   /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecShl   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+   //
+  /// CHECK-START-MIPS64: void Main.shl4() loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecShl   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void shl4() {
     for (int i = 0; i < 128; i++)
       a[i] <<= 4;
@@ -134,12 +170,18 @@
   /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: ArrayGet loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
-  //
+   //
   /// CHECK-START-ARM64: void Main.sar2() loop_optimization (after)
   /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecShr   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.sar2() loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecShr   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void sar2() {
     for (int i = 0; i < 128; i++)
       a[i] >>= 2;
@@ -155,6 +197,12 @@
   /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecUShr  loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.shr2() loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecUShr  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void shr2() {
     for (int i = 0; i < 128; i++)
       a[i] >>>= 2;
@@ -185,6 +233,11 @@
   /// CHECK-DAG: <<Phi:i\d+>> Phi                                 loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:              VecStore [{{l\d+}},<<Phi>>,<<Get>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.shr32() loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>> Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:              VecStore [{{l\d+}},<<Phi>>,<<Get>>] loop:<<Loop>>      outer_loop:none
   static void shr32() {
     // TODO: remove a[i] = a[i] altogether?
     for (int i = 0; i < 128; i++)
@@ -211,6 +264,13 @@
   /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<UShr:d\d+>> VecUShr [<<Get>>,<<Dist>>]           loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<UShr>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.shr33() loop_optimization (after)
+  /// CHECK-DAG: <<Dist:i\d+>> IntConstant 1                        loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<UShr:d\d+>> VecUShr [<<Get>>,<<Dist>>]           loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<UShr>>] loop:<<Loop>>      outer_loop:none
   static void shr33() {
     for (int i = 0; i < 128; i++)
       a[i] >>>= $opt$inline$IntConstant33();  // 1, since & 31
@@ -236,6 +296,13 @@
   /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<UShr:d\d+>> VecUShr [<<Get>>,<<Dist>>]           loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<UShr>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.shrMinus254() loop_optimization (after)
+  /// CHECK-DAG: <<Dist:i\d+>> IntConstant 2                         loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<UShr:d\d+>> VecUShr [<<Get>>,<<Dist>>]           loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<UShr>>] loop:<<Loop>>      outer_loop:none
   static void shrMinus254() {
     for (int i = 0; i < 128; i++)
       a[i] >>>= $opt$inline$IntConstantMinus254();  // 2, since & 31
diff --git a/test/640-checker-long-simd/src/Main.java b/test/640-checker-long-simd/src/Main.java
index 8f6af9d..05dcae6 100644
--- a/test/640-checker-long-simd/src/Main.java
+++ b/test/640-checker-long-simd/src/Main.java
@@ -35,6 +35,12 @@
   /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecAdd   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.add(long) loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecAdd   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void add(long x) {
     for (int i = 0; i < 128; i++)
       a[i] += x;
@@ -50,6 +56,12 @@
   /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecSub   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.sub(long) loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecSub   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void sub(long x) {
     for (int i = 0; i < 128; i++)
       a[i] -= x;
@@ -60,6 +72,12 @@
   /// CHECK-DAG: ArrayGet loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   //
+  /// CHECK-START-MIPS64: void Main.mul(long) loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecMul   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  //
   //  Not supported for longs.
   /// CHECK-START-ARM64: void Main.mul(long) loop_optimization (after)
   /// CHECK-NOT: VecMul
@@ -93,6 +111,12 @@
   /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecNeg   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.neg() loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecNeg   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void neg() {
     for (int i = 0; i < 128; i++)
       a[i] = -a[i];
@@ -108,6 +132,12 @@
   /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecNot   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.not() loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecNot   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void not() {
     for (int i = 0; i < 128; i++)
       a[i] = ~a[i];
@@ -123,6 +153,12 @@
   /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecShl   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.shl4() loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecShl   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void shl4() {
     for (int i = 0; i < 128; i++)
       a[i] <<= 4;
@@ -138,6 +174,12 @@
   /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecShr   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.sar2() loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecShr   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void sar2() {
     for (int i = 0; i < 128; i++)
       a[i] >>= 2;
@@ -153,6 +195,12 @@
   /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecUShr  loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.shr2() loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecUShr  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void shr2() {
     for (int i = 0; i < 128; i++)
       a[i] >>>= 2;
@@ -183,6 +231,11 @@
   /// CHECK-DAG: <<Phi:i\d+>> Phi                                 loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:              VecStore [{{l\d+}},<<Phi>>,<<Get>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.shr64() loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>> Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:              VecStore [{{l\d+}},<<Phi>>,<<Get>>] loop:<<Loop>>      outer_loop:none
   static void shr64() {
     // TODO: remove a[i] = a[i] altogether?
     for (int i = 0; i < 128; i++)
@@ -209,6 +262,13 @@
   /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<UShr:d\d+>> VecUShr [<<Get>>,<<Dist>>]           loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<UShr>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.shr65() loop_optimization (after)
+  /// CHECK-DAG: <<Dist:i\d+>> IntConstant 1                        loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<UShr:d\d+>> VecUShr [<<Get>>,<<Dist>>]           loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<UShr>>] loop:<<Loop>>      outer_loop:none
   static void shr65() {
     for (int i = 0; i < 128; i++)
       a[i] >>>= $opt$inline$IntConstant65();  // 1, since & 63
@@ -234,6 +294,13 @@
   /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<UShr:d\d+>> VecUShr [<<Get>>,<<Dist>>]           loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<UShr>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.shrMinus254() loop_optimization (after)
+  /// CHECK-DAG: <<Dist:i\d+>> IntConstant 2                        loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<UShr:d\d+>> VecUShr [<<Get>>,<<Dist>>]           loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<UShr>>] loop:<<Loop>>      outer_loop:none
   static void shrMinus254() {
     for (int i = 0; i < 128; i++)
       a[i] >>>= $opt$inline$IntConstantMinus254();  // 2, since & 63
diff --git a/test/640-checker-short-simd/src/Main.java b/test/640-checker-short-simd/src/Main.java
index f62c726..4cca837 100644
--- a/test/640-checker-short-simd/src/Main.java
+++ b/test/640-checker-short-simd/src/Main.java
@@ -35,6 +35,12 @@
   /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecAdd   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.add(int) loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecAdd   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void add(int x) {
     for (int i = 0; i < 128; i++)
       a[i] += x;
@@ -50,6 +56,12 @@
   /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecSub   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.sub(int) loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecSub   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void sub(int x) {
     for (int i = 0; i < 128; i++)
       a[i] -= x;
@@ -65,6 +77,12 @@
   /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecMul   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.mul(int) loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecMul   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void mul(int x) {
     for (int i = 0; i < 128; i++)
       a[i] *= x;
@@ -94,6 +112,12 @@
   /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecNeg   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.neg() loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecNeg   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void neg() {
     for (int i = 0; i < 128; i++)
       a[i] = (short) -a[i];
@@ -109,6 +133,12 @@
   /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecNot   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.not() loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecNot   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void not() {
     for (int i = 0; i < 128; i++)
       a[i] = (short) ~a[i];
@@ -124,6 +154,12 @@
   /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecShl   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.shl4() loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecShl   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void shl4() {
     for (int i = 0; i < 128; i++)
       a[i] <<= 4;
@@ -139,6 +175,12 @@
   /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecShr   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.sar2() loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecShr   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void sar2() {
     for (int i = 0; i < 128; i++)
       a[i] >>= 2;
diff --git a/test/645-checker-abs-simd/src/Main.java b/test/645-checker-abs-simd/src/Main.java
index 5a63d9f..9714a46 100644
--- a/test/645-checker-abs-simd/src/Main.java
+++ b/test/645-checker-abs-simd/src/Main.java
@@ -39,6 +39,18 @@
   /// CHECK-DAG: ArraySet                                  loop:<<Loop2>>      outer_loop:none
   //
   /// CHECK-EVAL: "<<Loop1>>" != "<<Loop2>>"
+  //
+  /// CHECK-START-MIPS64: void Main.doitByte(byte[]) loop_optimization (after)
+  /// CHECK-DAG: Phi                                       loop:<<Loop1:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad                                   loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG: VecAbs                                    loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG: VecStore                                  loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG: Phi                                       loop:<<Loop2:B\d+>> outer_loop:none
+  /// CHECK-DAG: ArrayGet                                  loop:<<Loop2>>      outer_loop:none
+  /// CHECK-DAG: InvokeStaticOrDirect intrinsic:MathAbsInt loop:<<Loop2>>      outer_loop:none
+  /// CHECK-DAG: ArraySet                                  loop:<<Loop2>>      outer_loop:none
+  //
+  /// CHECK-EVAL: "<<Loop1>>" != "<<Loop2>>"
   private static void doitByte(byte[] x) {
     for (int i = 0; i < x.length; i++) {
       x[i] = (byte) Math.abs(x[i]);
@@ -77,6 +89,18 @@
   /// CHECK-DAG: ArraySet                                  loop:<<Loop2>>      outer_loop:none
   //
   /// CHECK-EVAL: "<<Loop1>>" != "<<Loop2>>"
+  //
+  /// CHECK-START-MIPS64: void Main.doitShort(short[]) loop_optimization (after)
+  /// CHECK-DAG: Phi                                       loop:<<Loop1:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad                                   loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG: VecAbs                                    loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG: VecStore                                  loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG: Phi                                       loop:<<Loop2:B\d+>> outer_loop:none
+  /// CHECK-DAG: ArrayGet                                  loop:<<Loop2>>      outer_loop:none
+  /// CHECK-DAG: InvokeStaticOrDirect intrinsic:MathAbsInt loop:<<Loop2>>      outer_loop:none
+  /// CHECK-DAG: ArraySet                                  loop:<<Loop2>>      outer_loop:none
+  //
+  /// CHECK-EVAL: "<<Loop1>>" != "<<Loop2>>"
   private static void doitShort(short[] x) {
     for (int i = 0; i < x.length; i++) {
       x[i] = (short) Math.abs(x[i]);
@@ -100,6 +124,18 @@
   /// CHECK-DAG: ArraySet                                  loop:<<Loop2>>      outer_loop:none
   //
   /// CHECK-EVAL: "<<Loop1>>" != "<<Loop2>>"
+  //
+  /// CHECK-START-MIPS64: void Main.doitInt(int[]) loop_optimization (after)
+  /// CHECK-DAG: Phi                                       loop:<<Loop1:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad                                   loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG: VecAbs                                    loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG: VecStore                                  loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG: Phi                                       loop:<<Loop2:B\d+>> outer_loop:none
+  /// CHECK-DAG: ArrayGet                                  loop:<<Loop2>>      outer_loop:none
+  /// CHECK-DAG: InvokeStaticOrDirect intrinsic:MathAbsInt loop:<<Loop2>>      outer_loop:none
+  /// CHECK-DAG: ArraySet                                  loop:<<Loop2>>      outer_loop:none
+  //
+  /// CHECK-EVAL: "<<Loop1>>" != "<<Loop2>>"
   private static void doitInt(int[] x) {
     for (int i = 0; i < x.length; i++) {
       x[i] = Math.abs(x[i]);
@@ -123,6 +159,18 @@
   /// CHECK-DAG: ArraySet                                   loop:<<Loop2>>      outer_loop:none
   //
   /// CHECK-EVAL: "<<Loop1>>" != "<<Loop2>>"
+  //
+  /// CHECK-START-MIPS64: void Main.doitLong(long[]) loop_optimization (after)
+  /// CHECK-DAG: Phi                                        loop:<<Loop1:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad                                    loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG: VecAbs                                     loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG: VecStore                                   loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG: Phi                                        loop:<<Loop2:B\d+>> outer_loop:none
+  /// CHECK-DAG: ArrayGet                                   loop:<<Loop2>>      outer_loop:none
+  /// CHECK-DAG: InvokeStaticOrDirect intrinsic:MathAbsLong loop:<<Loop2>>      outer_loop:none
+  /// CHECK-DAG: ArraySet                                   loop:<<Loop2>>      outer_loop:none
+  //
+  /// CHECK-EVAL: "<<Loop1>>" != "<<Loop2>>"
   private static void doitLong(long[] x) {
     for (int i = 0; i < x.length; i++) {
       x[i] = Math.abs(x[i]);
@@ -146,6 +194,18 @@
   /// CHECK-DAG: ArraySet                                    loop:<<Loop2>>      outer_loop:none
   //
   /// CHECK-EVAL: "<<Loop1>>" != "<<Loop2>>"
+  //
+  /// CHECK-START-MIPS64: void Main.doitFloat(float[]) loop_optimization (after)
+  /// CHECK-DAG: Phi                                         loop:<<Loop1:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad                                     loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG: VecAbs                                      loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG: VecStore                                    loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG: Phi                                         loop:<<Loop2:B\d+>> outer_loop:none
+  /// CHECK-DAG: ArrayGet                                    loop:<<Loop2>>      outer_loop:none
+  /// CHECK-DAG: InvokeStaticOrDirect intrinsic:MathAbsFloat loop:<<Loop2>>      outer_loop:none
+  /// CHECK-DAG: ArraySet                                    loop:<<Loop2>>      outer_loop:none
+  //
+  /// CHECK-EVAL: "<<Loop1>>" != "<<Loop2>>"
   private static void doitFloat(float[] x) {
     for (int i = 0; i < x.length; i++) {
       x[i] = Math.abs(x[i]);
@@ -169,6 +229,18 @@
   /// CHECK-DAG: ArraySet                                     loop:<<Loop2>>      outer_loop:none
   //
   /// CHECK-EVAL: "<<Loop1>>" != "<<Loop2>>"
+  //
+  /// CHECK-START-MIPS64: void Main.doitDouble(double[]) loop_optimization (after)
+  /// CHECK-DAG: Phi                                          loop:<<Loop1:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad                                      loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG: VecAbs                                       loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG: VecStore                                     loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG: Phi                                          loop:<<Loop2:B\d+>> outer_loop:none
+  /// CHECK-DAG: ArrayGet                                     loop:<<Loop2>>      outer_loop:none
+  /// CHECK-DAG: InvokeStaticOrDirect intrinsic:MathAbsDouble loop:<<Loop2>>      outer_loop:none
+  /// CHECK-DAG: ArraySet                                     loop:<<Loop2>>      outer_loop:none
+  //
+  /// CHECK-EVAL: "<<Loop1>>" != "<<Loop2>>"
   private static void doitDouble(double[] x) {
     for (int i = 0; i < x.length; i++) {
       x[i] = Math.abs(x[i]);
diff --git a/test/646-checker-hadd-alt-byte/src/Main.java b/test/646-checker-hadd-alt-byte/src/Main.java
index d1b33ea..9cc6828 100644
--- a/test/646-checker-hadd-alt-byte/src/Main.java
+++ b/test/646-checker-hadd-alt-byte/src/Main.java
@@ -45,6 +45,13 @@
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:false rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.halving_add_signed(byte[], byte[], byte[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:false rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   private static void halving_add_signed(byte[] b1, byte[] b2, byte[] bo) {
     int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
     for (int i = 0; i < min_length; i++) {
@@ -71,6 +78,13 @@
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.halving_add_unsigned(byte[], byte[], byte[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   private static void halving_add_unsigned(byte[] b1, byte[] b2, byte[] bo) {
     int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
     for (int i = 0; i < min_length; i++) {
@@ -95,6 +109,13 @@
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:false rounded:true loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.rounding_halving_add_signed(byte[], byte[], byte[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:false rounded:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   private static void rounding_halving_add_signed(byte[] b1, byte[] b2, byte[] bo) {
     int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
     for (int i = 0; i < min_length; i++) {
@@ -122,6 +143,13 @@
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>]  unsigned:true rounded:true loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.rounding_halving_add_unsigned(byte[], byte[], byte[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>]  unsigned:true rounded:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   private static void rounding_halving_add_unsigned(byte[] b1, byte[] b2, byte[] bo) {
     int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
     for (int i = 0; i < min_length; i++) {
@@ -146,6 +174,14 @@
   /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] unsigned:false rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.halving_add_signed_constant(byte[], byte[]) loop_optimization (after)
+  /// CHECK-DAG: <<I127:i\d+>> IntConstant 127                      loop:none
+  /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<I127>>]        loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] unsigned:false rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   private static void halving_add_signed_constant(byte[] b1, byte[] bo) {
     int min_length = Math.min(bo.length, b1.length);
     for (int i = 0; i < min_length; i++) {
@@ -171,6 +207,14 @@
   /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.halving_add_unsigned_constant(byte[], byte[]) loop_optimization (after)
+  /// CHECK-DAG: <<I255:i\d+>> IntConstant 255                      loop:none
+  /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<I255>>]        loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   private static void halving_add_unsigned_constant(byte[] b1, byte[] bo) {
     int min_length = Math.min(bo.length, b1.length);
     for (int i = 0; i < min_length; i++) {
diff --git a/test/646-checker-hadd-alt-char/src/Main.java b/test/646-checker-hadd-alt-char/src/Main.java
index 1ea8d3f..3f81299 100644
--- a/test/646-checker-hadd-alt-char/src/Main.java
+++ b/test/646-checker-hadd-alt-char/src/Main.java
@@ -45,6 +45,13 @@
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.halving_add_unsigned(char[], char[], char[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   private static void halving_add_unsigned(char[] b1, char[] b2, char[] bo) {
     int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
     for (int i = 0; i < min_length; i++) {
@@ -72,6 +79,13 @@
   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   //
+  /// CHECK-START-MIPS64: void Main.halving_add_also_unsigned(char[], char[], char[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  //
   // Note: HAnd has no impact (already a zero extension).
   //
   private static void halving_add_also_unsigned(char[] b1, char[] b2, char[] bo) {
@@ -98,6 +112,13 @@
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:true loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.rounding_halving_add_unsigned(char[], char[], char[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   private static void rounding_halving_add_unsigned(char[] b1, char[] b2, char[] bo) {
     int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
     for (int i = 0; i < min_length; i++) {
@@ -126,6 +147,13 @@
   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:true loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   //
+  /// CHECK-START-MIPS64: void Main.rounding_halving_add_also_unsigned(char[], char[], char[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  //
   // Note: HAnd has no impact (already a zero extension).
   //
   private static void rounding_halving_add_also_unsigned(char[] b1, char[] b2, char[] bo) {
@@ -152,6 +180,14 @@
   /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.halving_add_unsigned_constant(char[], char[]) loop_optimization (after)
+  /// CHECK-DAG: <<UMAX:i\d+>> IntConstant 65535                    loop:none
+  /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<UMAX>>]        loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   private static void halving_add_unsigned_constant(char[] b1, char[] bo) {
     int min_length = Math.min(bo.length, b1.length);
     for (int i = 0; i < min_length; i++) {
@@ -178,6 +214,14 @@
   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   //
+  /// CHECK-START-MIPS64: void Main.halving_add_also_unsigned_constant(char[], char[]) loop_optimization (after)
+  /// CHECK-DAG: <<UMAX:i\d+>> IntConstant 65535                    loop:none
+  /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<UMAX>>]        loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  //
   // Note: HAnd has no impact (already a zero extension).
   //
   private static void halving_add_also_unsigned_constant(char[] b1, char[] bo) {
diff --git a/test/646-checker-hadd-alt-short/src/Main.java b/test/646-checker-hadd-alt-short/src/Main.java
index 269e618..150626c 100644
--- a/test/646-checker-hadd-alt-short/src/Main.java
+++ b/test/646-checker-hadd-alt-short/src/Main.java
@@ -45,6 +45,13 @@
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:false rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.halving_add_signed(short[], short[], short[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:false rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   private static void halving_add_signed(short[] b1, short[] b2, short[] bo) {
     int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
     for (int i = 0; i < min_length; i++) {
@@ -71,6 +78,13 @@
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.halving_add_unsigned(short[], short[], short[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   private static void halving_add_unsigned(short[] b1, short[] b2, short[] bo) {
     int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
     for (int i = 0; i < min_length; i++) {
@@ -95,6 +109,13 @@
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:false rounded:true loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.rounding_halving_add_signed(short[], short[], short[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:false rounded:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   private static void rounding_halving_add_signed(short[] b1, short[] b2, short[] bo) {
     int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
     for (int i = 0; i < min_length; i++) {
@@ -122,6 +143,13 @@
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:true loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.rounding_halving_add_unsigned(short[], short[], short[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   private static void rounding_halving_add_unsigned(short[] b1, short[] b2, short[] bo) {
     int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
     for (int i = 0; i < min_length; i++) {
@@ -146,6 +174,14 @@
   /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] unsigned:false rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.halving_add_signed_constant(short[], short[]) loop_optimization (after)
+  /// CHECK-DAG: <<SMAX:i\d+>> IntConstant 32767                    loop:none
+  /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<SMAX>>]        loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] unsigned:false rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   private static void halving_add_signed_constant(short[] b1, short[] bo) {
     int min_length = Math.min(bo.length, b1.length);
     for (int i = 0; i < min_length; i++) {
@@ -171,6 +207,14 @@
   /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.halving_add_unsigned_constant(short[], short[]) loop_optimization (after)
+  /// CHECK-DAG: <<UMAX:i\d+>> IntConstant 65535                    loop:none
+  /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<UMAX>>]        loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   private static void halving_add_unsigned_constant(short[] b1, short[] bo) {
     int min_length = Math.min(bo.length, b1.length);
     for (int i = 0; i < min_length; i++) {
diff --git a/test/646-checker-hadd-byte/src/Main.java b/test/646-checker-hadd-byte/src/Main.java
index 7e29a7e..5a615a4 100644
--- a/test/646-checker-hadd-byte/src/Main.java
+++ b/test/646-checker-hadd-byte/src/Main.java
@@ -42,6 +42,13 @@
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:false rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.halving_add_signed(byte[], byte[], byte[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:false rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   private static void halving_add_signed(byte[] b1, byte[] b2, byte[] bo) {
     int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
     for (int i = 0; i < min_length; i++) {
@@ -68,6 +75,13 @@
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.halving_add_unsigned(byte[], byte[], byte[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   private static void halving_add_unsigned(byte[] b1, byte[] b2, byte[] bo) {
     int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
     for (int i = 0; i < min_length; i++) {
@@ -92,6 +106,13 @@
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:false rounded:true loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.rounding_halving_add_signed(byte[], byte[], byte[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:false rounded:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   private static void rounding_halving_add_signed(byte[] b1, byte[] b2, byte[] bo) {
     int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
     for (int i = 0; i < min_length; i++) {
@@ -119,6 +140,13 @@
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>]  unsigned:true rounded:true loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.rounding_halving_add_unsigned(byte[], byte[], byte[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>]  unsigned:true rounded:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   private static void rounding_halving_add_unsigned(byte[] b1, byte[] b2, byte[] bo) {
     int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
     for (int i = 0; i < min_length; i++) {
@@ -143,6 +171,14 @@
   /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] unsigned:false rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.halving_add_signed_constant(byte[], byte[]) loop_optimization (after)
+  /// CHECK-DAG: <<I127:i\d+>> IntConstant 127                      loop:none
+  /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<I127>>]        loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] unsigned:false rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   private static void halving_add_signed_constant(byte[] b1, byte[] bo) {
     int min_length = Math.min(bo.length, b1.length);
     for (int i = 0; i < min_length; i++) {
@@ -168,6 +204,14 @@
   /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.halving_add_unsigned_constant(byte[], byte[]) loop_optimization (after)
+  /// CHECK-DAG: <<I255:i\d+>> IntConstant 255                      loop:none
+  /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<I255>>]        loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   private static void halving_add_unsigned_constant(byte[] b1, byte[] bo) {
     int min_length = Math.min(bo.length, b1.length);
     for (int i = 0; i < min_length; i++) {
diff --git a/test/646-checker-hadd-char/src/Main.java b/test/646-checker-hadd-char/src/Main.java
index d24608f..bb8a01f 100644
--- a/test/646-checker-hadd-char/src/Main.java
+++ b/test/646-checker-hadd-char/src/Main.java
@@ -42,6 +42,13 @@
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.halving_add_unsigned(char[], char[], char[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   private static void halving_add_unsigned(char[] b1, char[] b2, char[] bo) {
     int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
     for (int i = 0; i < min_length; i++) {
@@ -69,6 +76,13 @@
   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   //
+  /// CHECK-START-MIPS64: void Main.halving_add_also_unsigned(char[], char[], char[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  //
   // Note: HAnd has no impact (already a zero extension).
   //
   private static void halving_add_also_unsigned(char[] b1, char[] b2, char[] bo) {
@@ -95,6 +109,13 @@
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:true loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.rounding_halving_add_unsigned(char[], char[], char[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   private static void rounding_halving_add_unsigned(char[] b1, char[] b2, char[] bo) {
     int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
     for (int i = 0; i < min_length; i++) {
@@ -123,6 +144,13 @@
   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:true loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   //
+  /// CHECK-START-MIPS64: void Main.rounding_halving_add_also_unsigned(char[], char[], char[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  //
   // Note: HAnd has no impact (already a zero extension).
   //
   private static void rounding_halving_add_also_unsigned(char[] b1, char[] b2, char[] bo) {
@@ -149,6 +177,14 @@
   /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.halving_add_unsigned_constant(char[], char[]) loop_optimization (after)
+  /// CHECK-DAG: <<UMAX:i\d+>> IntConstant 65535                   loop:none
+  /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<UMAX>>]        loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   private static void halving_add_unsigned_constant(char[] b1, char[] bo) {
     int min_length = Math.min(bo.length, b1.length);
     for (int i = 0; i < min_length; i++) {
@@ -175,6 +211,14 @@
   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   //
+  /// CHECK-START-MIPS64: void Main.halving_add_also_unsigned_constant(char[], char[]) loop_optimization (after)
+  /// CHECK-DAG: <<UMAX:i\d+>> IntConstant 65535                    loop:none
+  /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<UMAX>>]        loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  //
   // Note: HAnd has no impact (already a zero extension).
   //
   private static void halving_add_also_unsigned_constant(char[] b1, char[] bo) {
diff --git a/test/646-checker-hadd-short/src/Main.java b/test/646-checker-hadd-short/src/Main.java
index 4e6b4bd..07845a6 100644
--- a/test/646-checker-hadd-short/src/Main.java
+++ b/test/646-checker-hadd-short/src/Main.java
@@ -42,6 +42,13 @@
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:false rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.halving_add_signed(short[], short[], short[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:false rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   private static void halving_add_signed(short[] b1, short[] b2, short[] bo) {
     int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
     for (int i = 0; i < min_length; i++) {
@@ -69,6 +76,13 @@
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:false rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.halving_add_signed_alt(short[], short[], short[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:false rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   private static void halving_add_signed_alt(short[] b1, short[] b2, short[] bo) {
     int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
     for (int i = 0; i < min_length; i++) {
@@ -96,6 +110,13 @@
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.halving_add_unsigned(short[], short[], short[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   private static void halving_add_unsigned(short[] b1, short[] b2, short[] bo) {
     int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
     for (int i = 0; i < min_length; i++) {
@@ -120,6 +141,13 @@
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:false rounded:true loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.rounding_halving_add_signed(short[], short[], short[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:false rounded:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   private static void rounding_halving_add_signed(short[] b1, short[] b2, short[] bo) {
     int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
     for (int i = 0; i < min_length; i++) {
@@ -144,6 +172,13 @@
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:false rounded:true loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.rounding_halving_add_signed_alt(short[], short[], short[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:false rounded:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   private static void rounding_halving_add_signed_alt(short[] b1, short[] b2, short[] bo) {
     int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
     for (int i = 0; i < min_length; i++) {
@@ -172,6 +207,13 @@
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:false rounded:true loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.rounding_halving_add_signed_alt2(short[], short[], short[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:false rounded:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   private static void rounding_halving_add_signed_alt2(short[] b1, short[] b2, short[] bo) {
     int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
     for (int i = 0; i < min_length; i++) {
@@ -200,6 +242,13 @@
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:true loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.rounding_halving_add_unsigned(short[], short[], short[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   private static void rounding_halving_add_unsigned(short[] b1, short[] b2, short[] bo) {
     int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
     for (int i = 0; i < min_length; i++) {
@@ -227,6 +276,13 @@
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:true loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.rounding_halving_add_unsigned_alt(short[], short[], short[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   private static void rounding_halving_add_unsigned_alt(short[] b1, short[] b2, short[] bo) {
     int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
     for (int i = 0; i < min_length; i++) {
@@ -252,6 +308,14 @@
   /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] unsigned:false rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.halving_add_signed_constant(short[], short[]) loop_optimization (after)
+  /// CHECK-DAG: <<SMAX:i\d+>> IntConstant 32767                    loop:none
+  /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<SMAX>>]        loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] unsigned:false rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   private static void halving_add_signed_constant(short[] b1, short[] bo) {
     int min_length = Math.min(bo.length, b1.length);
     for (int i = 0; i < min_length; i++) {
@@ -277,6 +341,14 @@
   /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.halving_add_unsigned_constant(short[], short[]) loop_optimization (after)
+  /// CHECK-DAG: <<UMAX:i\d+>> IntConstant 65535                    loop:none
+  /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<UMAX>>]        loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   private static void halving_add_unsigned_constant(short[] b1, short[] bo) {
     int min_length = Math.min(bo.length, b1.length);
     for (int i = 0; i < min_length; i++) {
diff --git a/test/654-checker-periodic/expected.txt b/test/654-checker-periodic/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/654-checker-periodic/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/654-checker-periodic/info.txt b/test/654-checker-periodic/info.txt
new file mode 100644
index 0000000..7c8a777
--- /dev/null
+++ b/test/654-checker-periodic/info.txt
@@ -0,0 +1 @@
+Periodic sequence on integer and floating-point.
diff --git a/test/654-checker-periodic/src/Main.java b/test/654-checker-periodic/src/Main.java
new file mode 100644
index 0000000..7a0c98c
--- /dev/null
+++ b/test/654-checker-periodic/src/Main.java
@@ -0,0 +1,173 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Tests for last value of a few periodic sequences
+ * (found by fuzz testing).
+ */
+public class Main {
+
+  /// CHECK-START: int Main.doitUpInt(int) loop_optimization (before)
+  /// CHECK-DAG: <<Phi:i\d+>> Phi  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG:              Phi  loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START: int Main.doitUpInt(int) loop_optimization (after)
+  /// CHECK-NOT: Phi
+  static int doitUpInt(int n) {
+    // Complete loop is replaced by last-value.
+    int lI = 1;
+    for (int i1 = 0; i1  < n; i1++) {
+      lI = (1486662021 - lI);
+    }
+    return lI;
+  }
+
+  /// CHECK-START: int Main.doitDownInt(int) loop_optimization (before)
+  /// CHECK-DAG: <<Phi:i\d+>> Phi  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG:              Phi  loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START: int Main.doitDownInt(int) loop_optimization (after)
+  /// CHECK-NOT: Phi
+  static int doitDownInt(int n) {
+    // Complete loop is replaced by last-value.
+    int lI = 1;
+    for (int i1 = n - 1; i1 >= 0; i1--) {
+      lI = (1486662021 - lI);
+    }
+    return lI;
+  }
+
+  /// CHECK-START: float Main.doitUpFloat(int) loop_optimization (before)
+  /// CHECK-DAG: <<Phi:i\d+>> Phi  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG:              Phi  loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START: float Main.doitUpFloat(int) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>> Phi  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG:              Phi  loop:<<Loop>>      outer_loop:none
+  static float doitUpFloat(int n) {
+    // FP arithmetic is not sufficiently precise.
+    // The loop remains.
+    float lF = 1.0f;
+    for (int i1 = 0; i1  < n; i1++) {
+      lF = (1486662021.0f - lF);
+    }
+    return lF;
+  }
+
+  /// CHECK-START: float Main.doitDownFloat(int) loop_optimization (before)
+  /// CHECK-DAG: <<Phi:i\d+>> Phi  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG:              Phi  loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START: float Main.doitDownFloat(int) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>> Phi  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG:              Phi  loop:<<Loop>>      outer_loop:none
+  static float doitDownFloat(int n) {
+    // FP arithmetic is not sufficiently precise.
+    // The loop remains.
+    float lF = 1.0f;
+    for (int i1 = n - 1; i1 >= 0; i1--) {
+      lF = (1486662021.0f - lF);
+    }
+    return lF;
+  }
+
+  /// CHECK-START: float Main.doitUpFloatAlt(int) loop_optimization (before)
+  /// CHECK-DAG: <<Phi:i\d+>> Phi  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG:              Phi  loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START: float Main.doitUpFloatAlt(int) loop_optimization (after)
+  /// CHECK-NOT: Phi
+  static float doitUpFloatAlt(int n) {
+    // Complete loop is replaced by last-value
+    // since the values are now precise.
+    float lF = 1.0f;
+    float l2 = 1486662020.0f;
+    for (int i1 = 0; i1  < n; i1++) {
+      float old = lF;
+      lF = l2;
+      l2 = old;
+    }
+    return lF;
+  }
+
+  /// CHECK-START: float Main.doitDownFloatAlt(int) loop_optimization (before)
+  /// CHECK-DAG: <<Phi:i\d+>> Phi  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG:              Phi  loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START: float Main.doitDownFloatAlt(int) loop_optimization (after)
+  /// CHECK-NOT: Phi
+  static float doitDownFloatAlt(int n) {
+    // Complete loop is replaced by last-value
+    // since the values are now precise.
+    float lF = 1.0f;
+    float l2 = 1486662020.0f;
+    for (int i1 = n - 1; i1 >= 0; i1--) {
+      float old = lF;
+      lF = l2;
+      l2 = old;
+    }
+    return lF;
+  }
+
+  // Main driver.
+  public static void main(String[] args) {
+    for (int i = 0; i < 10; i++) {
+      int ei = (i & 1) == 0 ? 1 : 1486662020;
+      int ci = doitUpInt(i);
+      expectEquals(ei, ci);
+    }
+    for (int i = 0; i < 10; i++) {
+      int ei = (i & 1) == 0 ? 1 : 1486662020;
+      int ci = doitDownInt(i);
+      expectEquals(ei, ci);
+    }
+    for (int i = 0; i < 10; i++) {
+      float ef = i == 0 ? 1.0f : ((i & 1) == 0 ? 0.0f : 1486662021.0f);
+      float cf = doitUpFloat(i);
+      expectEquals(ef, cf);
+    }
+    for (int i = 0; i < 10; i++) {
+      float ef = i == 0 ? 1.0f : ((i & 1) == 0 ? 0.0f : 1486662021.0f);
+      float cf = doitDownFloat(i);
+      expectEquals(ef, cf);
+    }
+    for (int i = 0; i < 10; i++) {
+      float ef = (i & 1) == 0 ? 1.0f : 1486662020.0f;
+      float cf = doitUpFloatAlt(i);
+      expectEquals(ef, cf);
+    }
+    for (int i = 0; i < 10; i++) {
+      float ef = (i & 1) == 0 ? 1.0f : 1486662020.0f;
+      float cf = doitDownFloatAlt(i);
+      expectEquals(ef, cf);
+    }
+    System.out.println("passed");
+  }
+
+  private static void expectEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  private static void expectEquals(float expected, float result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+}
+
+
diff --git a/test/802-deoptimization/src/DeoptimizationController.java b/test/802-deoptimization/src/DeoptimizationController.java
index d6e662d..88579de 100644
--- a/test/802-deoptimization/src/DeoptimizationController.java
+++ b/test/802-deoptimization/src/DeoptimizationController.java
@@ -50,7 +50,7 @@
         throw new IllegalStateException("Not tracing.");
       }
     } catch (Exception exc) {
-      exc.printStackTrace(System.err);
+      exc.printStackTrace(System.out);
     } finally {
       if (tempFile != null) {
         tempFile.delete();
@@ -65,7 +65,7 @@
         throw new IllegalStateException("Still tracing.");
       }
     } catch (Exception exc) {
-      exc.printStackTrace(System.err);
+      exc.printStackTrace(System.out);
     }
   }
 
diff --git a/test/906-iterate-heap/expected.txt b/test/906-iterate-heap/expected.txt
index 73b7129..b6af843 100644
--- a/test/906-iterate-heap/expected.txt
+++ b/test/906-iterate-heap/expected.txt
@@ -18,14 +18,14 @@
 2
 1@0 (32, 2xD '0000000000000000000000000000f03f')
 2
-doTestPrimitiveFieldsClasses
 10000@0 (static, int, index=3) 0000000000000000
 10001
 10000@0 (static, int, index=11) 0000000000000000
 10001
+10000@0 (static, int, index=0) 0000000000000000
 10001
+10000@0 (static, int, index=1) 0000000000000000
 10001
-doTestPrimitiveFieldsIntegral
 10000@0 (instance, int, index=2) 0000000000000000
 10001@0 (instance, byte, index=4) 0000000000000001
 10002@0 (instance, char, index=5) 0000000000000061
@@ -33,7 +33,6 @@
 10004@0 (instance, long, index=7) 0000000000000004
 10005@0 (instance, short, index=9) 0000000000000002
 10006
-doTestPrimitiveFieldsFloat
 10000@0 (instance, int, index=3) 0000000000000000
 10001@0 (instance, byte, index=5) 0000000000000001
 10002@0 (instance, char, index=6) 0000000000000061
diff --git a/test/906-iterate-heap/iterate_heap.cc b/test/906-iterate-heap/iterate_heap.cc
index 02ac699..6534b4c 100644
--- a/test/906-iterate-heap/iterate_heap.cc
+++ b/test/906-iterate-heap/iterate_heap.cc
@@ -408,15 +408,5 @@
   return env->NewStringUTF(ffc.data.c_str());
 }
 
-extern "C" JNIEXPORT jboolean JNICALL Java_art_Test906_checkInitialized(
-    JNIEnv* env, jclass, jclass c) {
-  jint status;
-  jvmtiError error = jvmti_env->GetClassStatus(c, &status);
-  if (JvmtiErrorToException(env, jvmti_env, error)) {
-    return false;
-  }
-  return (status & JVMTI_CLASS_STATUS_INITIALIZED) != 0;
-}
-
 }  // namespace Test906IterateHeap
 }  // namespace art
diff --git a/test/906-iterate-heap/src/art/Test906.java b/test/906-iterate-heap/src/art/Test906.java
index 65c2c8c..fe18e38 100644
--- a/test/906-iterate-heap/src/art/Test906.java
+++ b/test/906-iterate-heap/src/art/Test906.java
@@ -142,7 +142,6 @@
   }
 
   private static void doTestPrimitiveFieldsClasses() {
-    System.out.println("doTestPrimitiveFieldsClasses");
     setTag(IntObject.class, 10000);
     System.out.println(iterateThroughHeapPrimitiveFields(10000));
     System.out.println(getTag(IntObject.class));
@@ -153,40 +152,18 @@
     System.out.println(getTag(FloatObject.class));
     setTag(FloatObject.class, 0);
 
-    boolean correctHeapValue = false;
     setTag(Inf1.class, 10000);
-    String heapTrace = iterateThroughHeapPrimitiveFields(10000);
-
-    if (!checkInitialized(Inf1.class)) {
-      correctHeapValue = heapTrace.equals("10000@0 (static, int, index=0) 0000000000000000");
-    } else {
-      correctHeapValue = heapTrace.equals("10000@0 (static, int, index=0) 0000000000000001");
-    }
-
-    if (!correctHeapValue)
-      System.out.println("Heap Trace for Inf1 is not as expected:\n" + heapTrace);
-
+    System.out.println(iterateThroughHeapPrimitiveFields(10000));
     System.out.println(getTag(Inf1.class));
     setTag(Inf1.class, 0);
 
     setTag(Inf2.class, 10000);
-    heapTrace = iterateThroughHeapPrimitiveFields(10000);
-
-    if (!checkInitialized(Inf2.class)) {
-      correctHeapValue = heapTrace.equals("10000@0 (static, int, index=1) 0000000000000000");
-    } else {
-      correctHeapValue = heapTrace.equals("10000@0 (static, int, index=1) 0000000000000001");
-    }
-
-    if (!correctHeapValue)
-      System.out.println("Heap Trace for Inf2 is not as expected:\n" + heapTrace);
+    System.out.println(iterateThroughHeapPrimitiveFields(10000));
     System.out.println(getTag(Inf2.class));
-
     setTag(Inf2.class, 0);
   }
 
   private static void doTestPrimitiveFieldsIntegral() {
-    System.out.println("doTestPrimitiveFieldsIntegral");
     IntObject intObject = new IntObject();
     setTag(intObject, 10000);
     System.out.println(iterateThroughHeapPrimitiveFields(10000));
@@ -194,7 +171,6 @@
   }
 
   private static void doTestPrimitiveFieldsFloat() {
-    System.out.println("doTestPrimitiveFieldsFloat");
     FloatObject floatObject = new FloatObject();
     setTag(floatObject, 10000);
     System.out.println(iterateThroughHeapPrimitiveFields(10000));
@@ -289,7 +265,6 @@
     return Main.getTag(o);
   }
 
-  private static native boolean checkInitialized(Class<?> klass);
   private static native int iterateThroughHeapCount(int heapFilter,
       Class<?> klassFilter, int stopAfter);
   private static native int iterateThroughHeapData(int heapFilter,
diff --git a/test/909-attach-agent/src/Main.java b/test/909-attach-agent/src/Main.java
index 569b89a..25ebd57 100644
--- a/test/909-attach-agent/src/Main.java
+++ b/test/909-attach-agent/src/Main.java
@@ -19,17 +19,17 @@
 
 public class Main {
   public static void main(String[] args) {
-    System.err.println("Hello, world!");
+    System.out.println("Hello, world!");
     for(String a : args) {
       if(a.startsWith("agent:")) {
         String agent = a.substring(6);
         try {
           VMDebug.attachAgent(agent);
         } catch(IOException e) {
-          e.printStackTrace();
+          e.printStackTrace(System.out);
         }
       }
     }
-    System.err.println("Goodbye!");
+    System.out.println("Goodbye!");
   }
 }
diff --git a/test/913-heaps/expected.txt b/test/913-heaps/expected.txt
index 80f8b9e..b128d1c 100644
--- a/test/913-heaps/expected.txt
+++ b/test/913-heaps/expected.txt
@@ -140,7 +140,9 @@
 10001
 10000@0 (static, int, index=11) 0000000000000000
 10001
+10000@0 (static, int, index=0) 0000000000000000
 10001
+10000@0 (static, int, index=1) 0000000000000000
 10001
 10000@0 (instance, int, index=2) 0000000000000000
 10001@0 (instance, byte, index=4) 0000000000000001
diff --git a/test/913-heaps/heaps.cc b/test/913-heaps/heaps.cc
index bf3f7b6..ec36ceb 100644
--- a/test/913-heaps/heaps.cc
+++ b/test/913-heaps/heaps.cc
@@ -1078,14 +1078,5 @@
   CHECK(gFoundExt);
 }
 
-extern "C" JNIEXPORT jboolean JNICALL Java_art_Test913_checkInitialized(JNIEnv* env, jclass, jclass c) {
-  jint status;
-  jvmtiError error = jvmti_env->GetClassStatus(c, &status);
-  if (JvmtiErrorToException(env, jvmti_env, error)) {
-    return false;
-  }
-  return (status & JVMTI_CLASS_STATUS_INITIALIZED) != 0;
-}
-
 }  // namespace Test913Heaps
 }  // namespace art
diff --git a/test/913-heaps/src/art/Test913.java b/test/913-heaps/src/art/Test913.java
index b999001..97f48ee 100644
--- a/test/913-heaps/src/art/Test913.java
+++ b/test/913-heaps/src/art/Test913.java
@@ -195,33 +195,13 @@
     System.out.println(getTag(FloatObject.class));
     setTag(FloatObject.class, 0);
 
-    boolean correctHeapValue = false;
     setTag(Inf1.class, 10000);
-    String heapTrace = followReferencesPrimitiveFields(Inf1.class);
-
-    if (!checkInitialized(Inf1.class)) {
-      correctHeapValue = heapTrace.equals("10000@0 (static, int, index=0) 0000000000000000");
-    } else {
-      correctHeapValue = heapTrace.equals("10000@0 (static, int, index=0) 0000000000000001");
-    }
-
-    if (!correctHeapValue)
-      System.out.println("Heap Trace for Inf1 is not as expected:\n" + heapTrace);
-
+    System.out.println(followReferencesPrimitiveFields(Inf1.class));
     System.out.println(getTag(Inf1.class));
     setTag(Inf1.class, 0);
 
     setTag(Inf2.class, 10000);
-    heapTrace = followReferencesPrimitiveFields(Inf2.class);
-
-    if (!checkInitialized(Inf2.class)) {
-      correctHeapValue = heapTrace.equals("10000@0 (static, int, index=1) 0000000000000000");
-    } else {
-      correctHeapValue = heapTrace.equals("10000@0 (static, int, index=1) 0000000000000001");
-    }
-
-    if (!correctHeapValue)
-      System.out.println("Heap Trace for Inf2 is not as expected:\n" + heapTrace);
+    System.out.println(followReferencesPrimitiveFields(Inf2.class));
     System.out.println(getTag(Inf2.class));
     setTag(Inf2.class, 0);
   }
@@ -732,7 +712,6 @@
     return Main.getTag(o);
   }
 
-  private static native boolean checkInitialized(Class<?> klass);
   private static native void setupGcCallback();
   private static native void enableGcTracking(boolean enable);
   private static native int getGcStarts();
diff --git a/test/916-obsolete-jit/src/Main.java b/test/916-obsolete-jit/src/Main.java
index 17a7a86..d7b32ba 100644
--- a/test/916-obsolete-jit/src/Main.java
+++ b/test/916-obsolete-jit/src/Main.java
@@ -132,7 +132,7 @@
           "sayHi", Runnable.class, Consumer.class);
     } catch (Exception e) {
       System.out.println("Unable to find methods!");
-      e.printStackTrace();
+      e.printStackTrace(System.out);
       return;
     }
     // Makes sure the stack is the way we want it for the test and does the redefinition. It will
diff --git a/test/934-load-transform/src/Main.java b/test/934-load-transform/src/Main.java
index 1401b7d..2d0c297 100644
--- a/test/934-load-transform/src/Main.java
+++ b/test/934-load-transform/src/Main.java
@@ -86,7 +86,7 @@
       run_test.invoke(null);
     } catch (Exception e) {
       System.out.println(e.toString());
-      e.printStackTrace();
+      e.printStackTrace(System.out);
     }
   }
 }
diff --git a/test/935-non-retransformable/src/Main.java b/test/935-non-retransformable/src/Main.java
index f240224..5098712 100644
--- a/test/935-non-retransformable/src/Main.java
+++ b/test/935-non-retransformable/src/Main.java
@@ -97,7 +97,7 @@
       run_test.invoke(null);
     } catch (Exception e) {
       System.out.println(e.toString());
-      e.printStackTrace();
+      e.printStackTrace(System.out);
     }
   }
 }
diff --git a/test/938-load-transform-bcp/src-ex/TestMain.java b/test/938-load-transform-bcp/src-ex/TestMain.java
index 3757a0f..b60fe36 100644
--- a/test/938-load-transform-bcp/src-ex/TestMain.java
+++ b/test/938-load-transform-bcp/src-ex/TestMain.java
@@ -29,7 +29,7 @@
       System.out.println(
           "Exception occured (did something load OptionalLong before this test method!: "
           + e.toString());
-      e.printStackTrace();
+      e.printStackTrace(System.out);
     }
   }
 }
diff --git a/test/938-load-transform-bcp/src/Main.java b/test/938-load-transform-bcp/src/Main.java
index 69658c0..939bdbe 100644
--- a/test/938-load-transform-bcp/src/Main.java
+++ b/test/938-load-transform-bcp/src/Main.java
@@ -111,7 +111,7 @@
       run_test.invoke(null);
     } catch (Exception e) {
       System.out.println(e.toString());
-      e.printStackTrace();
+      e.printStackTrace(System.out);
     }
   }
 }
diff --git a/test/941-recurive-obsolete-jit/src/Main.java b/test/941-recurive-obsolete-jit/src/Main.java
index 89d593b..e3065a7 100644
--- a/test/941-recurive-obsolete-jit/src/Main.java
+++ b/test/941-recurive-obsolete-jit/src/Main.java
@@ -116,7 +116,7 @@
           "sayHi", int.class, Consumer.class, Runnable.class);
     } catch (Exception e) {
       System.out.println("Unable to find methods!");
-      e.printStackTrace();
+      e.printStackTrace(System.out);
       return;
     }
     // Makes sure the stack is the way we want it for the test and does the redefinition. It will
diff --git a/test/943-private-recursive-jit/src/Main.java b/test/943-private-recursive-jit/src/Main.java
index 871c636..09337ba 100644
--- a/test/943-private-recursive-jit/src/Main.java
+++ b/test/943-private-recursive-jit/src/Main.java
@@ -129,7 +129,7 @@
           "privateSayHi", int.class, Consumer.class, Runnable.class);
     } catch (Exception e) {
       System.out.println("Unable to find methods!");
-      e.printStackTrace();
+      e.printStackTrace(System.out);
       return;
     }
     // Makes sure the stack is the way we want it for the test and does the redefinition. It will
diff --git a/test/947-reflect-method/src/art/Test947.java b/test/947-reflect-method/src/art/Test947.java
index 8cb515e..90e0f81 100644
--- a/test/947-reflect-method/src/art/Test947.java
+++ b/test/947-reflect-method/src/art/Test947.java
@@ -76,7 +76,7 @@
       Redefinition.doCommonClassRedefinition(Transform.class, CLASS_BYTES, DEX_BYTES);
       say_hi_method.invoke(t);
     } catch (Exception e) {
-      e.printStackTrace();
+      e.printStackTrace(System.out);
     }
   }
 }
diff --git a/test/953-invoke-polymorphic-compiler/src/Main.java b/test/953-invoke-polymorphic-compiler/src/Main.java
index 20a8fec..ce3f4db 100644
--- a/test/953-invoke-polymorphic-compiler/src/Main.java
+++ b/test/953-invoke-polymorphic-compiler/src/Main.java
@@ -70,30 +70,30 @@
   }
 
   public static void fail() {
-    System.err.println("fail");
+    System.out.println("fail");
     Thread.dumpStack();
   }
 
   public static void fail(String message) {
-    System.err.println("fail: " + message);
+    System.out.println("fail: " + message);
     Thread.dumpStack();
   }
 
   public static int Min2Print2(int a, int b) {
     int[] values = new int[] { a, b };
-    System.err.println("Running Main.Min2Print2(" + Arrays.toString(values) + ")");
+    System.out.println("Running Main.Min2Print2(" + Arrays.toString(values) + ")");
     return a > b ? a : b;
   }
 
   public static int Min2Print3(int a, int b, int c) {
     int[] values = new int[] { a, b, c };
-    System.err.println("Running Main.Min2Print3(" + Arrays.toString(values) + ")");
+    System.out.println("Running Main.Min2Print3(" + Arrays.toString(values) + ")");
     return a > b ? a : b;
   }
 
   public static int Min2Print6(int a, int b, int c, int d, int e, int f) {
     int[] values = new int[] { a, b, c, d, e, f };
-    System.err.println("Running Main.Min2Print6(" + Arrays.toString(values) + ")");
+    System.out.println("Running Main.Min2Print6(" + Arrays.toString(values) + ")");
     return a > b ? a : b;
   }
 
@@ -106,7 +106,7 @@
                                 int y, int z) {
     int[] values = new int[] { a, b, c, d, e, f, g, h, i, j, k, l, m,
                                n, o, p, q, r, s, t, u, v, w, x, y, z };
-    System.err.println("Running Main.Min2Print26(" + Arrays.toString(values) + ")");
+    System.out.println("Running Main.Min2Print26(" + Arrays.toString(values) + ")");
     return a > b ? a : b;
   }
 
@@ -176,7 +176,7 @@
         fail("No NPE for you");
     } catch (NullPointerException npe) {}
 
-    System.err.println("BasicTest done.");
+    System.out.println("BasicTest done.");
   }
 
   private static boolean And(boolean lhs, boolean rhs) {
@@ -248,7 +248,7 @@
     assertEquals(true, (boolean) mh.invoke(false, true));
     assertEquals(false, (boolean) mh.invoke(false, false));
 
-    System.err.println("$opt$ReturnBooleanTest done.");
+    System.out.println("$opt$ReturnBooleanTest done.");
   }
 
   public static void $opt$ReturnCharTest() throws Throwable {
@@ -257,7 +257,7 @@
                            MethodType.methodType(char.class, char.class));
     assertEquals('B', (char) mh.invokeExact('A'));
     assertEquals((char) -55, (char) mh.invokeExact((char) -56));
-    System.err.println("$opt$ReturnCharTest done.");
+    System.out.println("$opt$ReturnCharTest done.");
   }
 
   public static void $opt$ReturnByteTest() throws Throwable {
@@ -266,7 +266,7 @@
                                          MethodType.methodType(byte.class, byte.class, byte.class));
     assertEquals((byte) 30, (byte) mh.invokeExact((byte) 10, (byte) 3));
     assertEquals((byte) -90, (byte) mh.invoke((byte) -10, (byte) 9));
-    System.err.println("$opt$ReturnByteTest done.");
+    System.out.println("$opt$ReturnByteTest done.");
   }
 
   public static void $opt$ReturnShortTest() throws Throwable {
@@ -275,7 +275,7 @@
                            MethodType.methodType(short.class, short.class, short.class));
     assertEquals((short) 3000, (short) mh.invokeExact((short) 1000, (short) 3));
     assertEquals((short) -3000, (short) mh.invoke((short) -1000, (short) 3));
-    System.err.println("$opt$ReturnShortTest done.");
+    System.out.println("$opt$ReturnShortTest done.");
   }
 
   public static void $opt$ReturnIntTest() throws Throwable {
@@ -284,7 +284,7 @@
                            MethodType.methodType(int.class, int.class, int.class));
     assertEquals(3_000_000, (int) mh.invokeExact(1_000_000, 3));
     assertEquals(-3_000_000, (int) mh.invoke(-1_000, 3_000));
-    System.err.println("$opt$ReturnIntTest done.");
+    System.out.println("$opt$ReturnIntTest done.");
   }
 
   public static void $opt$ReturnLongTest() throws Throwable {
@@ -293,7 +293,7 @@
                            MethodType.methodType(long.class, long.class, long.class));
     assertEquals(4_294_967_295_000L, (long) mh.invokeExact(1000L, 4_294_967_295L));
     assertEquals(-4_294_967_295_000L, (long) mh.invoke(-1000L, 4_294_967_295L));
-    System.err.println("$opt$ReturnLongTest done.");
+    System.out.println("$opt$ReturnLongTest done.");
   }
 
   public static void $opt$ReturnFloatTest() throws Throwable {
@@ -302,7 +302,7 @@
                            MethodType.methodType(float.class, float.class, float.class));
     assertEquals(3.0F, (float) mh.invokeExact(1000.0F, 3e-3F));
     assertEquals(-3.0F, (float) mh.invoke(-1000.0F, 3e-3F));
-    System.err.println("$opt$ReturnFloatTest done.");
+    System.out.println("$opt$ReturnFloatTest done.");
   }
 
   public static void $opt$ReturnDoubleTest() throws Throwable {
@@ -311,7 +311,7 @@
                            MethodType.methodType(double.class, double.class, double.class));
     assertEquals(3033000.0, (double) mh.invokeExact(1000.0, 3.033e3));
     assertEquals(-3033000.0, (double) mh.invoke(-1000.0, 3.033e3));
-    System.err.println("$opt$ReturnDoubleTest done.");
+    System.out.println("$opt$ReturnDoubleTest done.");
   }
 
   public static void $opt$ReturnStringTest() throws Throwable {
@@ -320,7 +320,7 @@
                            MethodType.methodType(String.class, String.class, int.class));
     assertEquals("100010001000", (String) mh.invokeExact("1000", 3));
     assertEquals("100010001000", (String) mh.invoke("1000", 3));
-    System.err.println("$opt$ReturnStringTest done.");
+    System.out.println("$opt$ReturnStringTest done.");
   }
 
   public static void ReturnValuesTest() throws Throwable {
@@ -333,7 +333,7 @@
     $opt$ReturnFloatTest();
     $opt$ReturnDoubleTest();
     $opt$ReturnStringTest();
-    System.err.println("ReturnValuesTest done.");
+    System.out.println("ReturnValuesTest done.");
   }
 
   static class ValueHolder {
diff --git a/test/972-default-imt-collision/src/Main.java b/test/972-default-imt-collision/src/Main.java
index 6819e43..043cef1 100644
--- a/test/972-default-imt-collision/src/Main.java
+++ b/test/972-default-imt-collision/src/Main.java
@@ -24,7 +24,7 @@
       Method test = c.getMethod("testMe", iface);
       test.invoke(null, o);
     } catch (Exception e) {
-      e.printStackTrace();
+      e.printStackTrace(System.out);
       System.out.println("FAILED: could not run testMe!");
     }
   }
diff --git a/test/972-iface-super-multidex/src/Main.java b/test/972-iface-super-multidex/src/Main.java
index 3fb3f45..dea5f1d 100644
--- a/test/972-iface-super-multidex/src/Main.java
+++ b/test/972-iface-super-multidex/src/Main.java
@@ -22,7 +22,7 @@
       c = Class.forName("ConcreteClass");
     } catch (Exception e) {
       System.out.println("Could not load class");
-      e.printStackTrace();
+      e.printStackTrace(System.out);
       return;
     }
     try {
@@ -30,7 +30,7 @@
       System.out.println((String)m.invoke(c.newInstance(), new Object[0]));
     } catch (Exception e) {
       System.out.println("Unknown exception occurred");
-      e.printStackTrace();
+      e.printStackTrace(System.out);
     }
     try {
       Method m = c.getMethod("runConflict");
@@ -41,15 +41,15 @@
       }
     } catch (AbstractMethodError e) {
       System.out.println("Unexpected AME caught");
-      e.printStackTrace();
+      e.printStackTrace(System.out);
     } catch (NoSuchMethodError e) {
       System.out.println("Unexpected NSME caught");
-      e.printStackTrace();
+      e.printStackTrace(System.out);
     } catch (IncompatibleClassChangeError e) {
       System.out.println("Expected ICCE caught");
     } catch (Throwable e) {
       System.out.println("Unknown exception caught!");
-      e.printStackTrace();
+      e.printStackTrace(System.out);
     }
   }
 }
diff --git a/test/973-default-multidex/src/Main.java b/test/973-default-multidex/src/Main.java
index b93265a..c7dd6dc 100644
--- a/test/973-default-multidex/src/Main.java
+++ b/test/973-default-multidex/src/Main.java
@@ -23,7 +23,7 @@
       Method m = c.getMethod("callMethod");
       System.out.println(m.invoke(c.newInstance(), new Object[0]));
     } catch (Exception e) {
-      e.printStackTrace();
+      e.printStackTrace(System.out);
       System.out.println("FAILED: Could not call method");
       return;
     }
diff --git a/test/983-source-transform-verify/source_transform.cc b/test/983-source-transform-verify/source_transform.cc
index 3ef3c7c..a433dc9 100644
--- a/test/983-source-transform-verify/source_transform.cc
+++ b/test/983-source-transform-verify/source_transform.cc
@@ -34,7 +34,7 @@
 #include "jvmti.h"
 #include "runtime.h"
 #include "scoped_thread_state_change-inl.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 #include "thread_list.h"
 
 // Test infrastructure
diff --git a/test/988-redefine-use-after-free/expected.txt b/test/988-redefine-use-after-free/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/988-redefine-use-after-free/expected.txt
diff --git a/test/988-redefine-use-after-free/info.txt b/test/988-redefine-use-after-free/info.txt
new file mode 100644
index 0000000..2b683dd
--- /dev/null
+++ b/test/988-redefine-use-after-free/info.txt
@@ -0,0 +1,13 @@
+Regression test for b/62237378
+
+It was possible for the JVMTI class redefinition to encounter a use-after-free
+bug if there had been an attempted redefinition that failed due to a
+verification error in the same class loader. Actually encountering the bug
+required that a later redefinition happen to get the same native pointer for its
+dex-file as the failed redefinition.
+
+Hitting this use-after-free can cause many strange outcomes, from CHECK failures
+to segfaults to incorrect redefinition failures (for example on buggy builds
+this test will fail a DCHECK on debug builds, segfault on x86_64 hosts and have
+redefinition of LDexCacheSmash$Transform; erroneously fail with
+JVMTI_ERROR_FAILS_VERIFICATION on 32 bit hosts).
diff --git a/test/988-redefine-use-after-free/run b/test/988-redefine-use-after-free/run
new file mode 100755
index 0000000..c6e62ae
--- /dev/null
+++ b/test/988-redefine-use-after-free/run
@@ -0,0 +1,17 @@
+#!/bin/bash
+#
+# Copyright 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+./default-run "$@" --jvmti
diff --git a/test/988-redefine-use-after-free/src-ex/DexCacheSmash.java b/test/988-redefine-use-after-free/src-ex/DexCacheSmash.java
new file mode 100644
index 0000000..2193a63
--- /dev/null
+++ b/test/988-redefine-use-after-free/src-ex/DexCacheSmash.java
@@ -0,0 +1,155 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import art.Redefinition;
+import java.util.Base64;
+
+public class DexCacheSmash {
+  static class Transform {
+    public void foo() {}
+    public void bar() {}
+    public String getId() {
+      return "TRANSFORM_INITIAL";
+    }
+  }
+
+  static class Transform2 {
+    public String getId() {
+      return "TRANSFORM2_INITIAL";
+    }
+  }
+
+  /**
+   * A base64 encoding of the dex/class file of the Transform class above.
+   */
+  static final  Redefinition.CommonClassDefinition TRANSFORM_INITIAL =
+      new Redefinition.CommonClassDefinition(Transform.class,
+          Base64.getDecoder().decode(
+            "yv66vgAAADQAFwoABAAPCAAQBwASBwAVAQAGPGluaXQ+AQADKClWAQAEQ29kZQEAD0xpbmVOdW1i" +
+            "ZXJUYWJsZQEAA2ZvbwEAA2JhcgEABWdldElkAQAUKClMamF2YS9sYW5nL1N0cmluZzsBAApTb3Vy" +
+            "Y2VGaWxlAQASRGV4Q2FjaGVTbWFzaC5qYXZhDAAFAAYBABFUUkFOU0ZPUk1fSU5JVElBTAcAFgEA" +
+            "F0RleENhY2hlU21hc2gkVHJhbnNmb3JtAQAJVHJhbnNmb3JtAQAMSW5uZXJDbGFzc2VzAQAQamF2" +
+            "YS9sYW5nL09iamVjdAEADURleENhY2hlU21hc2gAIAADAAQAAAAAAAQAAAAFAAYAAQAHAAAAHQAB" +
+            "AAEAAAAFKrcAAbEAAAABAAgAAAAGAAEAAAATAAEACQAGAAEABwAAABkAAAABAAAAAbEAAAABAAgA" +
+            "AAAGAAEAAAAUAAEACgAGAAEABwAAABkAAAABAAAAAbEAAAABAAgAAAAGAAEAAAAVAAEACwAMAAEA" +
+            "BwAAABsAAQABAAAAAxICsAAAAAEACAAAAAYAAQAAABcAAgANAAAAAgAOABQAAAAKAAEAAwARABMA" +
+            "CA=="),
+          Base64.getDecoder().decode(
+            "ZGV4CjAzNQDhg9CfghG1SRlLClguRuFYsqihr4F7NsGQAwAAcAAAAHhWNBIAAAAAAAAAAOQCAAAS" +
+            "AAAAcAAAAAcAAAC4AAAAAgAAANQAAAAAAAAAAAAAAAUAAADsAAAAAQAAABQBAABcAgAANAEAAKgB" +
+            "AACwAQAAxAEAAMcBAADiAQAA8wEAABcCAAA3AgAASwIAAF8CAAByAgAAfQIAAIACAACNAgAAkgIA" +
+            "AJcCAACeAgAApAIAAAMAAAAEAAAABQAAAAYAAAAHAAAACAAAAAsAAAACAAAABQAAAAAAAAALAAAA" +
+            "BgAAAAAAAAAAAAEAAAAAAAAAAQANAAAAAAABAA4AAAAAAAAADwAAAAQAAQAAAAAAAAAAAAAAAAAE" +
+            "AAAAAAAAAAEAAACYAQAAzgIAAAAAAAACAAAAvwIAAMUCAAABAAEAAQAAAKsCAAAEAAAAcBAEAAAA" +
+            "DgABAAEAAAAAALACAAABAAAADgAAAAEAAQAAAAAAtQIAAAEAAAAOAAAAAgABAAAAAAC6AgAAAwAA" +
+            "ABoACQARAAAANAEAAAAAAAAAAAAAAAAAAAY8aW5pdD4AEkRleENhY2hlU21hc2guamF2YQABTAAZ" +
+            "TERleENhY2hlU21hc2gkVHJhbnNmb3JtOwAPTERleENhY2hlU21hc2g7ACJMZGFsdmlrL2Fubm90" +
+            "YXRpb24vRW5jbG9zaW5nQ2xhc3M7AB5MZGFsdmlrL2Fubm90YXRpb24vSW5uZXJDbGFzczsAEkxq" +
+            "YXZhL2xhbmcvT2JqZWN0OwASTGphdmEvbGFuZy9TdHJpbmc7ABFUUkFOU0ZPUk1fSU5JVElBTAAJ" +
+            "VHJhbnNmb3JtAAFWAAthY2Nlc3NGbGFncwADYmFyAANmb28ABWdldElkAARuYW1lAAV2YWx1ZQAT" +
+            "AAcOABUABw4AFAAHDgAXAAcOAAICAREYAQIDAgwECBAXCgAAAQMAgIAEwAIBAdgCAQHsAgEBgAMO" +
+            "AAAAAAAAAAEAAAAAAAAAAQAAABIAAABwAAAAAgAAAAcAAAC4AAAAAwAAAAIAAADUAAAABQAAAAUA" +
+            "AADsAAAABgAAAAEAAAAUAQAAAxAAAAEAAAA0AQAAASAAAAQAAABAAQAABiAAAAEAAACYAQAAAiAA" +
+            "ABIAAACoAQAAAyAAAAQAAACrAgAABCAAAAIAAAC/AgAAACAAAAEAAADOAgAAABAAAAEAAADkAgAA"));
+
+  /**
+   * A base64 encoding of the following (invalid) class.
+   *
+   *  .class LDexCacheSmash$Transform2;
+   *  .super Ljava/lang/Object;
+   *  .source "DexCacheSmash.java"
+   *
+   *  # annotations
+   *  .annotation system Ldalvik/annotation/EnclosingClass;
+   *      value = LDexCacheSmash;
+   *  .end annotation
+   *
+   *  .annotation system Ldalvik/annotation/InnerClass;
+   *      accessFlags = 0x8
+   *      name = "Transform2"
+   *  .end annotation
+   *
+   *
+   *  # direct methods
+   *  .method constructor <init>()V
+   *      .registers 1
+   *
+   *      .prologue
+   *      .line 26
+   *      invoke-direct {p0}, Ljava/lang/Object;-><init>()V
+   *
+   *      return-void
+   *  .end method
+   *
+   *
+   *  # virtual methods
+   *  .method public getId()Ljava/lang/String;
+   *      .registers 2
+   *
+   *      .prologue
+   *      .line 28
+   *      # NB Fails verification due to this function not returning a String.
+   *      return-void
+   *  .end method
+   */
+  static final  Redefinition.CommonClassDefinition TRANSFORM2_INVALID =
+      new Redefinition.CommonClassDefinition(Transform2.class,
+          Base64.getDecoder().decode(
+            "yv66vgAAADQAEwcAEgcAEQEABjxpbml0PgEAAygpVgEABENvZGUKAAIAEAEAD0xpbmVOdW1iZXJU" +
+            "YWJsZQEABWdldElkAQAUKClMamF2YS9sYW5nL1N0cmluZzsBAApTb3VyY2VGaWxlAQASRGV4Q2Fj" +
+            "aGVTbWFzaC5qYXZhAQAMSW5uZXJDbGFzc2VzBwAPAQAKVHJhbnNmb3JtMgEADURleENhY2hlU21h" +
+            "c2gMAAMABAEAEGphdmEvbGFuZy9PYmplY3QBABhEZXhDYWNoZVNtYXNoJFRyYW5zZm9ybTIAIAAB" +
+            "AAIAAAAAAAIAAAADAAQAAQAFAAAAHQABAAEAAAAFKrcABrEAAAABAAcAAAAGAAEAAAAaAAEACAAJ" +
+            "AAEABQAAABkAAQABAAAAAbEAAAABAAcAAAAGAAEAAAAcAAIACgAAAAIACwAMAAAACgABAAEADQAO" +
+            "AAg="),
+          Base64.getDecoder().decode(
+            "ZGV4CjAzNQCFcegr6Ns+I7iEF4uLRkUX4yGrLhP6soEgAwAAcAAAAHhWNBIAAAAAAAAAAHQCAAAP" +
+            "AAAAcAAAAAcAAACsAAAAAgAAAMgAAAAAAAAAAAAAAAMAAADgAAAAAQAAAPgAAAAIAgAAGAEAABgB" +
+            "AAAgAQAANAEAADcBAABTAQAAZAEAAIgBAACoAQAAvAEAANABAADcAQAA3wEAAOwBAADzAQAA+QEA" +
+            "AAMAAAAEAAAABQAAAAYAAAAHAAAACAAAAAoAAAACAAAABQAAAAAAAAAKAAAABgAAAAAAAAAAAAEA" +
+            "AAAAAAAAAAAMAAAABAABAAAAAAAAAAAAAAAAAAQAAAAAAAAAAQAAACACAABmAgAAAAAAAAY8aW5p" +
+            "dD4AEkRleENhY2hlU21hc2guamF2YQABTAAaTERleENhY2hlU21hc2gkVHJhbnNmb3JtMjsAD0xE" +
+            "ZXhDYWNoZVNtYXNoOwAiTGRhbHZpay9hbm5vdGF0aW9uL0VuY2xvc2luZ0NsYXNzOwAeTGRhbHZp" +
+            "ay9hbm5vdGF0aW9uL0lubmVyQ2xhc3M7ABJMamF2YS9sYW5nL09iamVjdDsAEkxqYXZhL2xhbmcv" +
+            "U3RyaW5nOwAKVHJhbnNmb3JtMgABVgALYWNjZXNzRmxhZ3MABWdldElkAARuYW1lAAV2YWx1ZQAC" +
+            "AwILBAgNFwkCAgEOGAEAAAAAAAIAAAAJAgAAAAIAABQCAAAAAAAAAAAAAAAAAAAaAAcOABwABw4A" +
+            "AAABAAEAAQAAADACAAAEAAAAcBACAAAADgACAAEAAAAAADUCAAABAAAADgAAAAEBAICABLwEAQHU" +
+            "BA4AAAAAAAAAAQAAAAAAAAABAAAADwAAAHAAAAACAAAABwAAAKwAAAADAAAAAgAAAMgAAAAFAAAA" +
+            "AwAAAOAAAAAGAAAAAQAAAPgAAAACIAAADwAAABgBAAAEIAAAAgAAAAACAAADEAAAAgAAABACAAAG" +
+            "IAAAAQAAACACAAADIAAAAgAAADACAAABIAAAAgAAADwCAAAAIAAAAQAAAGYCAAAAEAAAAQAAAHQC" +
+            "AAA="));
+
+  public static void run() throws Exception {
+    try {
+      Redefinition.doMultiClassRedefinition(TRANSFORM2_INVALID);
+    } catch (Exception e) {
+      if (!e.getMessage().endsWith("JVMTI_ERROR_FAILS_VERIFICATION")) {
+        throw new Error(
+            "Unexpected error: Expected failure due to JVMTI_ERROR_FAILS_VERIFICATION", e);
+      }
+    }
+    // Doing this redefinition after a redefinition that failed due to FAILS_VERIFICATION could
+    // cause a use-after-free of the Transform2's DexCache by the redefinition code if it happens
+    // that the native pointer of the art::DexFile created for the Transform redefinition aliases
+    // the one created for Transform2's failed redefinition.
+    //
+    // Due to the order of checks performed by the redefinition code FAILS_VERIFICATION is the only
+    // failure mode that can cause Use-after-frees in this way.
+    //
+    // This should never throw any exceptions (except perhaps OOME in very strange circumstances).
+    Redefinition.doMultiClassRedefinition(TRANSFORM_INITIAL);
+  }
+}
diff --git a/test/988-redefine-use-after-free/src-ex/art/Redefinition.java b/test/988-redefine-use-after-free/src-ex/art/Redefinition.java
new file mode 100644
index 0000000..56d2938
--- /dev/null
+++ b/test/988-redefine-use-after-free/src-ex/art/Redefinition.java
@@ -0,0 +1,91 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package art;
+
+import java.util.ArrayList;
+// Common Redefinition functions. Placed here for use by CTS
+public class Redefinition {
+  public static final class CommonClassDefinition {
+    public final Class<?> target;
+    public final byte[] class_file_bytes;
+    public final byte[] dex_file_bytes;
+
+    public CommonClassDefinition(Class<?> target, byte[] class_file_bytes, byte[] dex_file_bytes) {
+      this.target = target;
+      this.class_file_bytes = class_file_bytes;
+      this.dex_file_bytes = dex_file_bytes;
+    }
+  }
+
+  // A set of possible test configurations. Test should set this if they need to.
+  // This must be kept in sync with the defines in ti-agent/common_helper.cc
+  public static enum Config {
+    COMMON_REDEFINE(0),
+    COMMON_RETRANSFORM(1),
+    COMMON_TRANSFORM(2);
+
+    private final int val;
+    private Config(int val) {
+      this.val = val;
+    }
+  }
+
+  public static void setTestConfiguration(Config type) {
+    nativeSetTestConfiguration(type.val);
+  }
+
+  private static native void nativeSetTestConfiguration(int type);
+
+  // Transforms the class
+  public static native void doCommonClassRedefinition(Class<?> target,
+                                                      byte[] classfile,
+                                                      byte[] dexfile);
+
+  public static void doMultiClassRedefinition(CommonClassDefinition... defs) {
+    ArrayList<Class<?>> classes = new ArrayList<>();
+    ArrayList<byte[]> class_files = new ArrayList<>();
+    ArrayList<byte[]> dex_files = new ArrayList<>();
+
+    for (CommonClassDefinition d : defs) {
+      classes.add(d.target);
+      class_files.add(d.class_file_bytes);
+      dex_files.add(d.dex_file_bytes);
+    }
+    doCommonMultiClassRedefinition(classes.toArray(new Class<?>[0]),
+                                   class_files.toArray(new byte[0][]),
+                                   dex_files.toArray(new byte[0][]));
+  }
+
+  public static void addMultiTransformationResults(CommonClassDefinition... defs) {
+    for (CommonClassDefinition d : defs) {
+      addCommonTransformationResult(d.target.getCanonicalName(),
+                                    d.class_file_bytes,
+                                    d.dex_file_bytes);
+    }
+  }
+
+  public static native void doCommonMultiClassRedefinition(Class<?>[] targets,
+                                                           byte[][] classfiles,
+                                                           byte[][] dexfiles);
+  public static native void doCommonClassRetransformation(Class<?>... target);
+  public static native void setPopRetransformations(boolean pop);
+  public static native void popTransformationFor(String name);
+  public static native void enableCommonRetransformation(boolean enable);
+  public static native void addCommonTransformationResult(String target_name,
+                                                          byte[] class_bytes,
+                                                          byte[] dex_bytes);
+}
diff --git a/test/988-redefine-use-after-free/src/Main.java b/test/988-redefine-use-after-free/src/Main.java
new file mode 100644
index 0000000..d88c471
--- /dev/null
+++ b/test/988-redefine-use-after-free/src/Main.java
@@ -0,0 +1,54 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.*;
+
+public class Main {
+  public static final String TEST_NAME = "988-redefine-use-after-free";
+  public static final int REPS = 1000;
+  public static final int STEP = 100;
+
+  public static void main(String[] args) throws Exception {
+    for (int i = 0; i < REPS; i += STEP) {
+      runSeveralTimes(STEP);
+    }
+  }
+
+  public static ClassLoader getClassLoaderFor(String location) throws Exception {
+    try {
+      Class<?> class_loader_class = Class.forName("dalvik.system.PathClassLoader");
+      Constructor<?> ctor = class_loader_class.getConstructor(String.class, ClassLoader.class);
+      return (ClassLoader)ctor.newInstance(location + "/" + TEST_NAME + "-ex.jar",
+                                           Main.class.getClassLoader());
+    } catch (ClassNotFoundException e) {
+      // Running on RI. Use URLClassLoader.
+      return new java.net.URLClassLoader(
+          new java.net.URL[] { new java.net.URL("file://" + location + "/classes-ex/") });
+    }
+  }
+
+  // Run the redefinition several times on a single class-loader to try to trigger the
+  // Use-after-free bug b/62237378
+  public static void runSeveralTimes(int times) throws Exception {
+    ClassLoader c = getClassLoaderFor(System.getenv("DEX_LOCATION"));
+
+    Class<?> klass = (Class<?>)c.loadClass("DexCacheSmash");
+    Method m = klass.getDeclaredMethod("run");
+    for (int i = 0 ; i < times; i++) {
+      m.invoke(null);
+    }
+  }
+}
diff --git a/test/common/runtime_state.cc b/test/common/runtime_state.cc
index d2cfbff..7ac019e 100644
--- a/test/common/runtime_state.cc
+++ b/test/common/runtime_state.cc
@@ -29,7 +29,7 @@
 #include "runtime.h"
 #include "scoped_thread_state_change-inl.h"
 #include "ScopedUtfChars.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 
 namespace art {
 
diff --git a/test/common/stack_inspect.cc b/test/common/stack_inspect.cc
index ceb4ba2..80a2780 100644
--- a/test/common/stack_inspect.cc
+++ b/test/common/stack_inspect.cc
@@ -25,7 +25,7 @@
 #include "runtime.h"
 #include "scoped_thread_state_change-inl.h"
 #include "stack.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 
 namespace art {
 
diff --git a/test/etc/run-test-jar b/test/etc/run-test-jar
index ca52a99..7677025 100755
--- a/test/etc/run-test-jar
+++ b/test/etc/run-test-jar
@@ -610,7 +610,7 @@
   if [ "$HOST" != "n" ]; then
     # Use SIGRTMIN+2 to try to dump threads.
     # Use -k 1m to SIGKILL it a minute later if it hasn't ended.
-    dex2oat_cmdline="timeout -k 1m -s SIGRTMIN+2 1m ${dex2oat_cmdline}"
+    dex2oat_cmdline="timeout -k 1m -s SIGRTMIN+2 90s ${dex2oat_cmdline} --watchdog-timeout=60000"
   fi
   if [ "$PROFILE" = "y" ] || [ "$RANDOM_PROFILE" = "y" ]; then
     vdex_cmdline="${dex2oat_cmdline} ${VDEX_FILTER} --input-vdex=$DEX_LOCATION/oat/$ISA/$TEST_NAME.vdex --output-vdex=$DEX_LOCATION/oat/$ISA/$TEST_NAME.vdex"
diff --git a/test/knownfailures.json b/test/knownfailures.json
index 36ac307..214b827 100644
--- a/test/knownfailures.json
+++ b/test/knownfailures.json
@@ -680,5 +680,31 @@
             "The java.lang.Integer.valueOf intrinsic is not supported in PIC mode."
         ],
         "variant": "optimizing & pictest | speed-profile & pictest"
+    },
+    {
+        "tests": "202-thread-oome",
+        "description": "ASAN aborts when large thread stacks are requested.",
+        "variant": "host",
+        "env_vars": {"SANITIZE_HOST": "address"}
+    },
+    {
+        "tests": "202-thread-oome",
+        "description": "ASAN aborts when large thread stacks are requested.",
+        "variant": "target",
+        "env_vars": {"SANITIZE_TARGET": "address"}
+    },
+    {
+        "tests": "071-dexfile-map-clean",
+        "description": [ "We use prebuilt zipalign on master-art-host to avoid pulling in a lot",
+                         "of the framework. But a non-sanitized zipalign binary does not work with",
+                         "a sanitized libc++."],
+        "env_vars": {"SANITIZE_HOST": "address"}
+    },
+    {
+        "tests": "137-cfi",
+        "description": [ "ASan is reporting out-of-bounds reads in libunwind."],
+        "variant": "host",
+        "env_vars": {"SANITIZE_HOST": "address"},
+        "bug": "b/62350406"
     }
 ]
diff --git a/tools/add_package_property.sh b/tools/add_package_property.sh
new file mode 100644
index 0000000..e9294a9
--- /dev/null
+++ b/tools/add_package_property.sh
@@ -0,0 +1,29 @@
+#!/bin/bash
+#
+# Copyright (C) 2017 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Sets the property of an Android package
+
+if [ "$#" -ne 2 ] ; then
+  echo "USAGE: sh add_package_property.sh [PACKAGE_NAME] [PROPERTY_SCRIPT_PATH]"
+  exit 1
+fi
+PACKAGE_NAME=$1
+PROPERTY_SCRIPT_PATH=$2
+PROPERTY_SCRIPT_NAME=`basename $PROPERTY_SCRIPT_PATH`
+adb push $PROPERTY_SCRIPT_PATH /data/data/$PACKAGE_NAME/
+adb shell chmod o+x /data/data/$PACKAGE_NAME/$PROPERTY_SCRIPT_NAME
+adb shell restorecon /data/data/$PACKAGE_NAME/$PROPERTY_SCRIPT_NAME
+adb shell setprop wrap.$PACKAGE_NAME /data/data/$PACKAGE_NAME/$PROPERTY_SCRIPT_NAME
diff --git a/tools/asan.sh b/tools/asan.sh
new file mode 100644
index 0000000..b749545
--- /dev/null
+++ b/tools/asan.sh
@@ -0,0 +1,21 @@
+#!/system/bin/sh
+#
+# Copyright (C) 2017 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# NOTE: This script is used by add_package_property.sh and not meant to be executed directly
+#
+# This script contains the property and the options required to log poisoned
+# memory accesses (found in logcat)
+ASAN_OPTIONS=halt_on_error=0:verbosity=0:print_legend=0:print_full_thread_history=0:print_stats=0:print_summary=0:suppress_equal_pcs=0:fast_unwind_on_fatal=1 asanwrapper $@