Class Hierarchy Analysis (CHA)

The class linker now tracks whether a method has a single implementation
and if so, the JIT compiler will try to devirtualize a virtual call for
the method into a direct call. If the single-implementation assumption
is violated due to additional class linking, compiled code that makes the
assumption is invalidated. Deoptimization is triggered for compiled code
live on stack. Instead of patching return pc's on stack, a CHA guard is
added which checks a hidden should_deoptimize flag for deoptimization.
This approach limits the number of deoptimization points.

This CL does not devirtualize abstract/interface method invocation.

Slides on CHA:
https://docs.google.com/a/google.com/presentation/d/1Ax6cabP1vM44aLOaJU3B26n5fTE9w5YU-1CRevIDsBc/edit?usp=sharing

Change-Id: I18bf716a601b6413b46312e925a6ad9e4008efa4
Test: ART_TEST_JIT=true m test-art-host/target-run-test test-art-host-gtest
diff --git a/runtime/Android.bp b/runtime/Android.bp
index c6f479f..08be5b2 100644
--- a/runtime/Android.bp
+++ b/runtime/Android.bp
@@ -45,6 +45,7 @@
         "base/timing_logger.cc",
         "base/unix_file/fd_file.cc",
         "base/unix_file/random_access_file_utils.cc",
+        "cha.cc",
         "check_jni.cc",
         "class_linker.cc",
         "class_table.cc",
@@ -514,6 +515,7 @@
         "base/transform_iterator_test.cc",
         "base/variant_map_test.cc",
         "base/unix_file/fd_file_test.cc",
+        "cha_test.cc",
         "class_linker_test.cc",
         "compiler_filter_test.cc",
         "dex_file_test.cc",
diff --git a/runtime/art_method-inl.h b/runtime/art_method-inl.h
index 730a9c3..ef03bb3 100644
--- a/runtime/art_method-inl.h
+++ b/runtime/art_method-inl.h
@@ -105,7 +105,7 @@
       DoGetAccessFlagsHelper<kReadBarrierOption>(this);
     }
   }
-  return access_flags_;
+  return access_flags_.load(std::memory_order_relaxed);
 }
 
 inline uint16_t ArtMethod::GetMethodIndex() {
@@ -452,6 +452,53 @@
   return type;
 }
 
+inline bool ArtMethod::HasSingleImplementation() {
+  if (IsFinal() || GetDeclaringClass()->IsFinal()) {
+    // We don't set kAccSingleImplementation for these cases since intrinsic
+    // can use the flag also.
+    return true;
+  }
+  return (GetAccessFlags() & kAccSingleImplementation) != 0;
+}
+
+inline void ArtMethod::SetIntrinsic(uint32_t intrinsic) {
+  DCHECK(IsUint<8>(intrinsic));
+  // Currently we only do intrinsics for static/final methods or methods of final
+  // classes. We don't set kHasSingleImplementation for those methods.
+  DCHECK(IsStatic() || IsFinal() || GetDeclaringClass()->IsFinal()) <<
+      "Potential conflict with kAccSingleImplementation";
+  uint32_t new_value = (GetAccessFlags() & kAccFlagsNotUsedByIntrinsic) |
+      kAccIntrinsic |
+      (intrinsic << POPCOUNT(kAccFlagsNotUsedByIntrinsic));
+  if (kIsDebugBuild) {
+    uint32_t java_flags = (GetAccessFlags() & kAccJavaFlagsMask);
+    bool is_constructor = IsConstructor();
+    bool is_synchronized = IsSynchronized();
+    bool skip_access_checks = SkipAccessChecks();
+    bool is_fast_native = IsFastNative();
+    bool is_copied = IsCopied();
+    bool is_miranda = IsMiranda();
+    bool is_default = IsDefault();
+    bool is_default_conflict = IsDefaultConflicting();
+    bool is_compilable = IsCompilable();
+    bool must_count_locks = MustCountLocks();
+    SetAccessFlags(new_value);
+    DCHECK_EQ(java_flags, (GetAccessFlags() & kAccJavaFlagsMask));
+    DCHECK_EQ(is_constructor, IsConstructor());
+    DCHECK_EQ(is_synchronized, IsSynchronized());
+    DCHECK_EQ(skip_access_checks, SkipAccessChecks());
+    DCHECK_EQ(is_fast_native, IsFastNative());
+    DCHECK_EQ(is_copied, IsCopied());
+    DCHECK_EQ(is_miranda, IsMiranda());
+    DCHECK_EQ(is_default, IsDefault());
+    DCHECK_EQ(is_default_conflict, IsDefaultConflicting());
+    DCHECK_EQ(is_compilable, IsCompilable());
+    DCHECK_EQ(must_count_locks, MustCountLocks());
+  } else {
+    SetAccessFlags(new_value);
+  }
+}
+
 template<ReadBarrierOption kReadBarrierOption, typename RootVisitorType>
 void ArtMethod::VisitRoots(RootVisitorType& visitor, PointerSize pointer_size) {
   if (LIKELY(!declaring_class_.IsNull())) {
diff --git a/runtime/art_method.cc b/runtime/art_method.cc
index eeece90..77d799f 100644
--- a/runtime/art_method.cc
+++ b/runtime/art_method.cc
@@ -51,6 +51,17 @@
 extern "C" void art_quick_invoke_static_stub(ArtMethod*, uint32_t*, uint32_t, Thread*, JValue*,
                                              const char*);
 
+ArtMethod* ArtMethod::GetSingleImplementation() {
+  DCHECK(!IsNative());
+  if (!IsAbstract()) {
+    // A non-abstract's single implementation is itself.
+    return this;
+  }
+  // TODO: add single-implementation logic for abstract method by storing it
+  // in ptr_sized_fields_.
+  return nullptr;
+}
+
 ArtMethod* ArtMethod::FromReflectedMethod(const ScopedObjectAccessAlreadyRunnable& soa,
                                           jobject jlr_method) {
   ObjPtr<mirror::Executable> executable = soa.Decode<mirror::Executable>(jlr_method);
@@ -345,7 +356,7 @@
   CHECK(!IsFastNative()) << PrettyMethod();
   CHECK(native_method != nullptr) << PrettyMethod();
   if (is_fast) {
-    SetAccessFlags(GetAccessFlags() | kAccFastNative);
+    AddAccessFlags(kAccFastNative);
   }
   SetEntryPointFromJni(native_method);
 }
@@ -503,7 +514,7 @@
     if (oat_file == nullptr) {
       return nullptr;
     }
-    return oat_file->DexBegin() + header->vmap_table_offset_;
+    return oat_file->DexBegin() + header->GetVmapTableOffset();
   } else {
     return oat_method.GetVmapTable();
   }
@@ -601,7 +612,7 @@
   DCHECK(method_header->Contains(pc))
       << PrettyMethod()
       << " " << std::hex << pc << " " << oat_entry_point
-      << " " << (uintptr_t)(method_header->code_ + method_header->code_size_);
+      << " " << (uintptr_t)(method_header->GetCode() + method_header->GetCodeSize());
   return method_header;
 }
 
diff --git a/runtime/art_method.h b/runtime/art_method.h
index 00fab65..2ae8147 100644
--- a/runtime/art_method.h
+++ b/runtime/art_method.h
@@ -85,9 +85,29 @@
   template <ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   ALWAYS_INLINE uint32_t GetAccessFlags();
 
+  // This version should only be called when it's certain there is no
+  // concurrency so there is no need to guarantee atomicity. For example,
+  // before the method is linked.
   void SetAccessFlags(uint32_t new_access_flags) {
-    // Not called within a transaction.
-    access_flags_ = new_access_flags;
+    access_flags_.store(new_access_flags, std::memory_order_relaxed);
+  }
+
+  // This setter guarantees atomicity.
+  void AddAccessFlags(uint32_t flag) {
+    uint32_t old_access_flags = access_flags_.load(std::memory_order_relaxed);
+    uint32_t new_access_flags;
+    do {
+      new_access_flags = old_access_flags | flag;
+    } while (!access_flags_.compare_exchange_weak(old_access_flags, new_access_flags));
+  }
+
+  // This setter guarantees atomicity.
+  void ClearAccessFlags(uint32_t flag) {
+    uint32_t old_access_flags = access_flags_.load(std::memory_order_relaxed);
+    uint32_t new_access_flags;
+    do {
+      new_access_flags = old_access_flags & ~flag;
+    } while (!access_flags_.compare_exchange_weak(old_access_flags, new_access_flags));
   }
 
   // Approximate what kind of method call would be used for this method.
@@ -142,39 +162,7 @@
     return (GetAccessFlags() & kAccIntrinsic) != 0;
   }
 
-  void SetIntrinsic(uint32_t intrinsic) {
-    DCHECK(IsUint<8>(intrinsic));
-    uint32_t new_value = (GetAccessFlags() & kAccFlagsNotUsedByIntrinsic) |
-        kAccIntrinsic |
-        (intrinsic << POPCOUNT(kAccFlagsNotUsedByIntrinsic));
-    if (kIsDebugBuild) {
-      uint32_t java_flags = (GetAccessFlags() & kAccJavaFlagsMask);
-      bool is_constructor = IsConstructor();
-      bool is_synchronized = IsSynchronized();
-      bool skip_access_checks = SkipAccessChecks();
-      bool is_fast_native = IsFastNative();
-      bool is_copied = IsCopied();
-      bool is_miranda = IsMiranda();
-      bool is_default = IsDefault();
-      bool is_default_conflict = IsDefaultConflicting();
-      bool is_compilable = IsCompilable();
-      bool must_count_locks = MustCountLocks();
-      SetAccessFlags(new_value);
-      DCHECK_EQ(java_flags, (GetAccessFlags() & kAccJavaFlagsMask));
-      DCHECK_EQ(is_constructor, IsConstructor());
-      DCHECK_EQ(is_synchronized, IsSynchronized());
-      DCHECK_EQ(skip_access_checks, SkipAccessChecks());
-      DCHECK_EQ(is_fast_native, IsFastNative());
-      DCHECK_EQ(is_copied, IsCopied());
-      DCHECK_EQ(is_miranda, IsMiranda());
-      DCHECK_EQ(is_default, IsDefault());
-      DCHECK_EQ(is_default_conflict, IsDefaultConflicting());
-      DCHECK_EQ(is_compilable, IsCompilable());
-      DCHECK_EQ(must_count_locks, MustCountLocks());
-    } else {
-      SetAccessFlags(new_value);
-    }
-  }
+  ALWAYS_INLINE void SetIntrinsic(uint32_t intrinsic) REQUIRES_SHARED(Locks::mutator_lock_);
 
   uint32_t GetIntrinsic() {
     DCHECK(IsIntrinsic());
@@ -259,7 +247,7 @@
 
   void SetSkipAccessChecks() {
     DCHECK(!SkipAccessChecks());
-    SetAccessFlags(GetAccessFlags() | kAccSkipAccessChecks);
+    AddAccessFlags(kAccSkipAccessChecks);
   }
 
   // Should this method be run in the interpreter and count locks (e.g., failed structured-
@@ -461,6 +449,26 @@
     return DataOffset(kRuntimePointerSize);
   }
 
+  ALWAYS_INLINE bool HasSingleImplementation() REQUIRES_SHARED(Locks::mutator_lock_);
+
+  ALWAYS_INLINE void SetHasSingleImplementation(bool single_impl) {
+    DCHECK(!IsIntrinsic()) << "conflict with intrinsic bits";
+    if (single_impl) {
+      AddAccessFlags(kAccSingleImplementation);
+    } else {
+      ClearAccessFlags(kAccSingleImplementation);
+    }
+  }
+
+  ArtMethod* GetSingleImplementation()
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
+  ALWAYS_INLINE void SetSingleImplementation(ArtMethod* method, PointerSize pointer_size) {
+    DCHECK(!IsNative());
+    DCHECK(IsAbstract());  // Non-abstract method's single implementation is just itself.
+    SetDataPtrSize(method, pointer_size);
+  }
+
   void* GetEntryPointFromJni() {
     DCHECK(IsNative());
     return GetEntryPointFromJniPtrSize(kRuntimePointerSize);
@@ -655,7 +663,10 @@
   GcRoot<mirror::Class> declaring_class_;
 
   // Access flags; low 16 bits are defined by spec.
-  uint32_t access_flags_;
+  // Getting and setting this flag needs to be atomic when concurrency is
+  // possible, e.g. after this method's class is linked. Such as when setting
+  // verifier flags and single-implementation flag.
+  std::atomic<std::uint32_t> access_flags_;
 
   /* Dex file fields. The defining dex file is available via declaring_class_->dex_cache_ */
 
diff --git a/runtime/base/arena_allocator.h b/runtime/base/arena_allocator.h
index 62cd2a7..2feb28a 100644
--- a/runtime/base/arena_allocator.h
+++ b/runtime/base/arena_allocator.h
@@ -94,6 +94,7 @@
   kArenaAllocGraphChecker,
   kArenaAllocVerifier,
   kArenaAllocCallingConvention,
+  kArenaAllocCHA,
   kNumArenaAllocKinds
 };
 
diff --git a/runtime/base/mutex.cc b/runtime/base/mutex.cc
index e8ef69f..6665f95 100644
--- a/runtime/base/mutex.cc
+++ b/runtime/base/mutex.cc
@@ -58,6 +58,7 @@
 Mutex* Locks::reference_queue_soft_references_lock_ = nullptr;
 Mutex* Locks::reference_queue_weak_references_lock_ = nullptr;
 Mutex* Locks::runtime_shutdown_lock_ = nullptr;
+Mutex* Locks::cha_lock_ = nullptr;
 Mutex* Locks::thread_list_lock_ = nullptr;
 ConditionVariable* Locks::thread_exit_cond_ = nullptr;
 Mutex* Locks::thread_suspend_count_lock_ = nullptr;
@@ -955,6 +956,7 @@
     DCHECK(logging_lock_ != nullptr);
     DCHECK(mutator_lock_ != nullptr);
     DCHECK(profiler_lock_ != nullptr);
+    DCHECK(cha_lock_ != nullptr);
     DCHECK(thread_list_lock_ != nullptr);
     DCHECK(thread_suspend_count_lock_ != nullptr);
     DCHECK(trace_lock_ != nullptr);
@@ -1014,6 +1016,10 @@
     DCHECK(breakpoint_lock_ == nullptr);
     breakpoint_lock_ = new ReaderWriterMutex("breakpoint lock", current_lock_level);
 
+    UPDATE_CURRENT_LOCK_LEVEL(kCHALock);
+    DCHECK(cha_lock_ == nullptr);
+    cha_lock_ = new Mutex("CHA lock", current_lock_level);
+
     UPDATE_CURRENT_LOCK_LEVEL(kClassLinkerClassesLock);
     DCHECK(classlinker_classes_lock_ == nullptr);
     classlinker_classes_lock_ = new ReaderWriterMutex("ClassLinker classes lock",
diff --git a/runtime/base/mutex.h b/runtime/base/mutex.h
index 7e73e0d..21b5bb9 100644
--- a/runtime/base/mutex.h
+++ b/runtime/base/mutex.h
@@ -97,6 +97,7 @@
   kMonitorPoolLock,
   kClassLinkerClassesLock,  // TODO rename.
   kJitCodeCacheLock,
+  kCHALock,
   kBreakpointLock,
   kMonitorLock,
   kMonitorListLock,
@@ -627,9 +628,12 @@
   // TODO: improve name, perhaps instrumentation_update_lock_.
   static Mutex* deoptimization_lock_ ACQUIRED_AFTER(alloc_tracker_lock_);
 
+  // Guards Class Hierarchy Analysis (CHA).
+  static Mutex* cha_lock_ ACQUIRED_AFTER(deoptimization_lock_);
+
   // The thread_list_lock_ guards ThreadList::list_. It is also commonly held to stop threads
   // attaching and detaching.
-  static Mutex* thread_list_lock_ ACQUIRED_AFTER(deoptimization_lock_);
+  static Mutex* thread_list_lock_ ACQUIRED_AFTER(cha_lock_);
 
   // Signaled when threads terminate. Used to determine when all non-daemons have terminated.
   static ConditionVariable* thread_exit_cond_ GUARDED_BY(Locks::thread_list_lock_);
diff --git a/runtime/cha.cc b/runtime/cha.cc
new file mode 100644
index 0000000..be675a8
--- /dev/null
+++ b/runtime/cha.cc
@@ -0,0 +1,356 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "cha.h"
+
+#include "jit/jit.h"
+#include "jit/jit_code_cache.h"
+#include "runtime.h"
+#include "scoped_thread_state_change-inl.h"
+#include "stack.h"
+#include "thread.h"
+#include "thread_list.h"
+#include "thread_pool.h"
+
+namespace art {
+
+void ClassHierarchyAnalysis::AddDependency(ArtMethod* method,
+                                           ArtMethod* dependent_method,
+                                           OatQuickMethodHeader* dependent_header) {
+  auto it = cha_dependency_map_.find(method);
+  if (it == cha_dependency_map_.end()) {
+    cha_dependency_map_[method] =
+        new std::vector<std::pair<art::ArtMethod*, art::OatQuickMethodHeader*>>();
+    it = cha_dependency_map_.find(method);
+  } else {
+    DCHECK(it->second != nullptr);
+  }
+  it->second->push_back(std::make_pair(dependent_method, dependent_header));
+}
+
+std::vector<std::pair<ArtMethod*, OatQuickMethodHeader*>>*
+    ClassHierarchyAnalysis::GetDependents(ArtMethod* method) {
+  auto it = cha_dependency_map_.find(method);
+  if (it != cha_dependency_map_.end()) {
+    DCHECK(it->second != nullptr);
+    return it->second;
+  }
+  return nullptr;
+}
+
+void ClassHierarchyAnalysis::RemoveDependencyFor(ArtMethod* method) {
+  auto it = cha_dependency_map_.find(method);
+  if (it != cha_dependency_map_.end()) {
+    auto dependents = it->second;
+    cha_dependency_map_.erase(it);
+    delete dependents;
+  }
+}
+
+void ClassHierarchyAnalysis::RemoveDependentsWithMethodHeaders(
+    const std::unordered_set<OatQuickMethodHeader*>& method_headers) {
+  // Iterate through all entries in the dependency map and remove any entry that
+  // contains one of those in method_headers.
+  for (auto map_it = cha_dependency_map_.begin(); map_it != cha_dependency_map_.end(); ) {
+    auto dependents = map_it->second;
+    for (auto vec_it = dependents->begin(); vec_it != dependents->end(); ) {
+      OatQuickMethodHeader* method_header = vec_it->second;
+      auto it = std::find(method_headers.begin(), method_headers.end(), method_header);
+      if (it != method_headers.end()) {
+        vec_it = dependents->erase(vec_it);
+      } else {
+        vec_it++;
+      }
+    }
+    // Remove the map entry if there are no more dependents.
+    if (dependents->empty()) {
+      map_it = cha_dependency_map_.erase(map_it);
+      delete dependents;
+    } else {
+      map_it++;
+    }
+  }
+}
+
+// This stack visitor walks the stack and for compiled code with certain method
+// headers, sets the should_deoptimize flag on stack to 1.
+// TODO: also set the register value to 1 when should_deoptimize is allocated in
+// a register.
+class CHAStackVisitor FINAL  : public StackVisitor {
+ public:
+  CHAStackVisitor(Thread* thread_in,
+                  Context* context,
+                  const std::unordered_set<OatQuickMethodHeader*>& method_headers)
+      : StackVisitor(thread_in, context, StackVisitor::StackWalkKind::kSkipInlinedFrames),
+        method_headers_(method_headers) {
+  }
+
+  bool VisitFrame() OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_) {
+    ArtMethod* method = GetMethod();
+    if (method == nullptr || method->IsRuntimeMethod() || method->IsNative()) {
+      return true;
+    }
+    if (GetCurrentQuickFrame() == nullptr) {
+      // Not compiled code.
+      return true;
+    }
+    // Method may have multiple versions of compiled code. Check
+    // the method header to see if it has should_deoptimize flag.
+    const OatQuickMethodHeader* method_header = GetCurrentOatQuickMethodHeader();
+    if (!method_header->HasShouldDeoptimizeFlag()) {
+      // This compiled version doesn't have should_deoptimize flag. Skip.
+      return true;
+    }
+    auto it = std::find(method_headers_.begin(), method_headers_.end(), method_header);
+    if (it == method_headers_.end()) {
+      // Not in the list of method headers that should be deoptimized.
+      return true;
+    }
+
+    // The compiled code on stack is not valid anymore. Need to deoptimize.
+    SetShouldDeoptimizeFlag();
+
+    return true;
+  }
+
+ private:
+  void SetShouldDeoptimizeFlag() REQUIRES_SHARED(Locks::mutator_lock_) {
+    QuickMethodFrameInfo frame_info = GetCurrentQuickFrameInfo();
+    size_t frame_size = frame_info.FrameSizeInBytes();
+    uint8_t* sp = reinterpret_cast<uint8_t*>(GetCurrentQuickFrame());
+    size_t core_spill_size = POPCOUNT(frame_info.CoreSpillMask()) *
+        GetBytesPerGprSpillLocation(kRuntimeISA);
+    size_t fpu_spill_size = POPCOUNT(frame_info.FpSpillMask()) *
+        GetBytesPerFprSpillLocation(kRuntimeISA);
+    size_t offset = frame_size - core_spill_size - fpu_spill_size - kShouldDeoptimizeFlagSize;
+    uint8_t* should_deoptimize_addr = sp + offset;
+    // Set deoptimization flag to 1.
+    DCHECK(*should_deoptimize_addr == 0 || *should_deoptimize_addr == 1);
+    *should_deoptimize_addr = 1;
+  }
+
+  // Set of method headers for compiled code that should be deoptimized.
+  const std::unordered_set<OatQuickMethodHeader*>& method_headers_;
+
+  DISALLOW_COPY_AND_ASSIGN(CHAStackVisitor);
+};
+
+class CHACheckpoint FINAL : public Closure {
+ public:
+  explicit CHACheckpoint(const std::unordered_set<OatQuickMethodHeader*>& method_headers)
+      : barrier_(0),
+        method_headers_(method_headers) {}
+
+  void Run(Thread* thread) OVERRIDE {
+    // Note thread and self may not be equal if thread was already suspended at
+    // the point of the request.
+    Thread* self = Thread::Current();
+    ScopedObjectAccess soa(self);
+    CHAStackVisitor visitor(thread, nullptr, method_headers_);
+    visitor.WalkStack();
+    barrier_.Pass(self);
+  }
+
+  void WaitForThreadsToRunThroughCheckpoint(size_t threads_running_checkpoint) {
+    Thread* self = Thread::Current();
+    ScopedThreadStateChange tsc(self, kWaitingForCheckPointsToRun);
+    barrier_.Increment(self, threads_running_checkpoint);
+  }
+
+ private:
+  // The barrier to be passed through and for the requestor to wait upon.
+  Barrier barrier_;
+  // List of method headers for invalidated compiled code.
+  const std::unordered_set<OatQuickMethodHeader*>& method_headers_;
+
+  DISALLOW_COPY_AND_ASSIGN(CHACheckpoint);
+};
+
+void ClassHierarchyAnalysis::VerifyNonSingleImplementation(mirror::Class* verify_class,
+                                                           uint16_t verify_index) {
+  // Grab cha_lock_ to make sure all single-implementation updates are seen.
+  PointerSize image_pointer_size =
+      Runtime::Current()->GetClassLinker()->GetImagePointerSize();
+  MutexLock cha_mu(Thread::Current(), *Locks::cha_lock_);
+  while (verify_class != nullptr) {
+    if (verify_index >= verify_class->GetVTableLength()) {
+      return;
+    }
+    ArtMethod* verify_method = verify_class->GetVTableEntry(verify_index, image_pointer_size);
+    DCHECK(!verify_method->HasSingleImplementation())
+        << "class: " << verify_class->PrettyClass()
+        << " verify_method: " << verify_method->PrettyMethod(true);
+    verify_class = verify_class->GetSuperClass();
+  }
+}
+
+void ClassHierarchyAnalysis::CheckSingleImplementationInfo(
+    Handle<mirror::Class> klass,
+    ArtMethod* virtual_method,
+    ArtMethod* method_in_super,
+    std::unordered_set<ArtMethod*>& invalidated_single_impl_methods) {
+  // TODO: if klass is not instantiable, virtual_method isn't invocable yet so
+  // even if it overrides, it doesn't invalidate single-implementation
+  // assumption.
+
+  DCHECK_NE(virtual_method, method_in_super);
+  DCHECK(method_in_super->GetDeclaringClass()->IsResolved()) << "class isn't resolved";
+  // If virtual_method doesn't come from a default interface method, it should
+  // be supplied by klass.
+  DCHECK(virtual_method->IsCopied() ||
+         virtual_method->GetDeclaringClass() == klass.Get());
+
+  // A new virtual_method should set method_in_super to
+  // non-single-implementation (if not set already).
+  // We don't grab cha_lock_. Single-implementation flag won't be set to true
+  // again once it's set to false.
+  if (!method_in_super->HasSingleImplementation()) {
+    // method_in_super already has multiple implementations. All methods in the
+    // same vtable slots in its super classes should have
+    // non-single-implementation already.
+    if (kIsDebugBuild) {
+      VerifyNonSingleImplementation(klass->GetSuperClass()->GetSuperClass(),
+                                    method_in_super->GetMethodIndex());
+    }
+    return;
+  }
+
+  // Native methods don't have single-implementation flag set.
+  DCHECK(!method_in_super->IsNative());
+  // Invalidate method_in_super's single-implementation status.
+  invalidated_single_impl_methods.insert(method_in_super);
+}
+
+void ClassHierarchyAnalysis::InitSingleImplementationFlag(Handle<mirror::Class> klass,
+                                                          ArtMethod* method) {
+  DCHECK(method->IsCopied() || method->GetDeclaringClass() == klass.Get());
+  if (klass->IsFinal() || method->IsFinal()) {
+    // Final classes or methods do not need CHA for devirtualization.
+    // This frees up modifier bits for intrinsics which currently are only
+    // used for static methods or methods of final classes.
+    return;
+  }
+  if (method->IsNative()) {
+    // Native method's invocation overhead is already high and it
+    // cannot be inlined. It's not worthwhile to devirtualize the
+    // call which can add a deoptimization point.
+    DCHECK(!method->HasSingleImplementation());
+  } else {
+    method->SetHasSingleImplementation(true);
+    if (method->IsAbstract()) {
+      // There is no real implementation yet.
+      // TODO: implement single-implementation logic for abstract methods.
+      DCHECK(method->GetSingleImplementation() == nullptr);
+    } else {
+      // Single implementation of non-abstract method is itself.
+      DCHECK_EQ(method->GetSingleImplementation(), method);
+    }
+  }
+}
+
+void ClassHierarchyAnalysis::UpdateAfterLoadingOf(Handle<mirror::Class> klass) {
+  if (klass->IsInterface()) {
+    return;
+  }
+  mirror::Class* super_class = klass->GetSuperClass();
+  if (super_class == nullptr) {
+    return;
+  }
+
+  // Keeps track of all methods whose single-implementation assumption
+  // is invalidated by linking `klass`.
+  std::unordered_set<ArtMethod*> invalidated_single_impl_methods;
+
+  PointerSize image_pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
+  // Do an entry-by-entry comparison of vtable contents with super's vtable.
+  for (int32_t i = 0; i < super_class->GetVTableLength(); ++i) {
+    ArtMethod* method = klass->GetVTableEntry(i, image_pointer_size);
+    ArtMethod* method_in_super = super_class->GetVTableEntry(i, image_pointer_size);
+    if (method == method_in_super) {
+      // vtable slot entry is inherited from super class.
+      continue;
+    }
+    InitSingleImplementationFlag(klass, method);
+    CheckSingleImplementationInfo(klass,
+                                  method,
+                                  method_in_super,
+                                  invalidated_single_impl_methods);
+  }
+
+  // For new virtual methods that don't override.
+  for (int32_t i = super_class->GetVTableLength(); i < klass->GetVTableLength(); ++i) {
+    ArtMethod* method = klass->GetVTableEntry(i, image_pointer_size);
+    InitSingleImplementationFlag(klass, method);
+  }
+
+  Runtime* const runtime = Runtime::Current();
+  if (!invalidated_single_impl_methods.empty()) {
+    Thread *self = Thread::Current();
+    // Method headers for compiled code to be invalidated.
+    std::unordered_set<OatQuickMethodHeader*> dependent_method_headers;
+
+    {
+      // We do this under cha_lock_. Committing code also grabs this lock to
+      // make sure the code is only committed when all single-implementation
+      // assumptions are still true.
+      MutexLock cha_mu(self, *Locks::cha_lock_);
+      // Invalidate compiled methods that assume some virtual calls have only
+      // single implementations.
+      for (ArtMethod* invalidated : invalidated_single_impl_methods) {
+        if (!invalidated->HasSingleImplementation()) {
+          // It might have been invalidated already when other class linking is
+          // going on.
+          continue;
+        }
+        invalidated->SetHasSingleImplementation(false);
+
+        if (runtime->IsAotCompiler()) {
+          // No need to invalidate any compiled code as the AotCompiler doesn't
+          // run any code.
+          continue;
+        }
+
+        // Invalidate all dependents.
+        auto dependents = GetDependents(invalidated);
+        if (dependents == nullptr) {
+          continue;
+        }
+        for (const auto& dependent : *dependents) {
+          ArtMethod* method = dependent.first;;
+          OatQuickMethodHeader* method_header = dependent.second;
+          VLOG(class_linker) << "CHA invalidated compiled code for " << method->PrettyMethod();
+          DCHECK(runtime->UseJitCompilation());
+          runtime->GetJit()->GetCodeCache()->InvalidateCompiledCodeFor(
+              method, method_header);
+          dependent_method_headers.insert(method_header);
+        }
+        RemoveDependencyFor(invalidated);
+      }
+    }
+
+    if (dependent_method_headers.empty()) {
+      return;
+    }
+    // Deoptimze compiled code on stack that should have been invalidated.
+    CHACheckpoint checkpoint(dependent_method_headers);
+    size_t threads_running_checkpoint = runtime->GetThreadList()->RunCheckpoint(&checkpoint);
+    if (threads_running_checkpoint != 0) {
+      checkpoint.WaitForThreadsToRunThroughCheckpoint(threads_running_checkpoint);
+    }
+  }
+}
+
+}  // namespace art
diff --git a/runtime/cha.h b/runtime/cha.h
new file mode 100644
index 0000000..ada5c89
--- /dev/null
+++ b/runtime/cha.h
@@ -0,0 +1,144 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_CHA_H_
+#define ART_RUNTIME_CHA_H_
+
+#include "art_method.h"
+#include "base/enums.h"
+#include "base/mutex.h"
+#include "handle.h"
+#include "mirror/class.h"
+#include "oat_quick_method_header.h"
+#include <unordered_map>
+#include <unordered_set>
+
+namespace art {
+
+/**
+ * Class Hierarchy Analysis (CHA) tries to devirtualize virtual calls into
+ * direct calls based on the info generated by analyzing class hierarchies.
+ * If a class is not subclassed, or even if it's subclassed but one of its
+ * virtual methods isn't overridden, a virtual call for that method can be
+ * changed into a direct call.
+ *
+ * Each virtual method carries a single-implementation status. The status is
+ * incrementally maintained at the end of class linking time when method
+ * overriding takes effect.
+ *
+ * Compiler takes advantage of the single-implementation info of a
+ * method. If a method A has the single-implementation flag set, the compiler
+ * devirtualizes the virtual call for method A into a direct call, and
+ * further try to inline the direct call as a result. The compiler will
+ * also register a dependency that the compiled code depends on the
+ * assumption that method A has single-implementation status.
+ *
+ * When single-implementation info is updated at the end of class linking,
+ * and if method A's single-implementation status is invalidated, all compiled
+ * code that depends on the assumption that method A has single-implementation
+ * status need to be invalidated. Method entrypoints that have this dependency
+ * will be updated as a result. Method A can later be recompiled with less
+ * aggressive assumptions.
+ *
+ * For live compiled code that's on stack, deoptmization will be initiated
+ * to force the invalidated compiled code into interpreter mode to guarantee
+ * correctness. The deoptimization mechanism used is a hybrid of
+ * synchronous and asynchronous deoptimization. The synchronous deoptimization
+ * part checks a hidden local variable flag for the method, and if true,
+ * initiates deoptimization. The asynchronous deoptimization part issues a
+ * checkpoint that walks the stack and for any compiled code on the stack
+ * that should be deoptimized, set the hidden local variable value to be true.
+ *
+ * A cha_lock_ needs to be held for updating single-implementation status,
+ * and registering/unregistering CHA dependencies. Registering CHA dependency
+ * and making compiled code visible also need to be atomic. Otherwise, we
+ * may miss invalidating CHA dependents or making compiled code visible even
+ * after it is invalidated. Care needs to be taken between cha_lock_ and
+ * JitCodeCache::lock_ to guarantee the atomicity.
+ *
+ * We base our CHA on dynamically linked class profiles instead of doing static
+ * analysis. Static analysis can be too aggressive due to dynamic class loading
+ * at runtime, and too conservative since some classes may not be really loaded
+ * at runtime.
+ */
+class ClassHierarchyAnalysis {
+ public:
+  // Types for recording CHA dependencies.
+  // For invalidating CHA dependency, we need to know both the ArtMethod and
+  // the method header. If the ArtMethod has compiled code with the method header
+  // as the entrypoint, we update the entrypoint to the interpreter bridge.
+  // We will also deoptimize frames that are currently executing the code of
+  // the method header.
+  typedef std::pair<ArtMethod*, OatQuickMethodHeader*> MethodAndMethodHeaderPair;
+  typedef std::vector<MethodAndMethodHeaderPair> ListOfDependentPairs;
+
+  ClassHierarchyAnalysis() {}
+
+  // Add a dependency that compiled code with `dependent_header` for `dependent_method`
+  // assumes that virtual `method` has single-implementation.
+  void AddDependency(ArtMethod* method,
+                     ArtMethod* dependent_method,
+                     OatQuickMethodHeader* dependent_header) REQUIRES(Locks::cha_lock_);
+
+  // Return compiled code that assumes that `method` has single-implementation.
+  std::vector<MethodAndMethodHeaderPair>* GetDependents(ArtMethod* method)
+      REQUIRES(Locks::cha_lock_);
+
+  // Remove dependency tracking for compiled code that assumes that
+  // `method` has single-implementation.
+  void RemoveDependencyFor(ArtMethod* method) REQUIRES(Locks::cha_lock_);
+
+  // Remove from cha_dependency_map_ all entries that contain OatQuickMethodHeader from
+  // the given `method_headers` set.
+  // This is used when some compiled code is freed.
+  void RemoveDependentsWithMethodHeaders(
+      const std::unordered_set<OatQuickMethodHeader*>& method_headers)
+      REQUIRES(Locks::cha_lock_);
+
+  // Update CHA info for methods that `klass` overrides, after loading `klass`.
+  void UpdateAfterLoadingOf(Handle<mirror::Class> klass) REQUIRES_SHARED(Locks::mutator_lock_);
+
+ private:
+  void InitSingleImplementationFlag(Handle<mirror::Class> klass, ArtMethod* method)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
+  // `virtual_method` in `klass` overrides `method_in_super`.
+  // This will invalidate some assumptions on single-implementation.
+  // Append methods that should have their single-implementation flag invalidated
+  // to `invalidated_single_impl_methods`.
+  void CheckSingleImplementationInfo(
+      Handle<mirror::Class> klass,
+      ArtMethod* virtual_method,
+      ArtMethod* method_in_super,
+      std::unordered_set<ArtMethod*>& invalidated_single_impl_methods)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
+  // Verify all methods in the same vtable slot from verify_class and its supers
+  // don't have single-implementation.
+  void VerifyNonSingleImplementation(mirror::Class* verify_class, uint16_t verify_index)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
+  // A map that maps a method to a set of compiled code that assumes that method has a
+  // single implementation, which is used to do CHA-based devirtualization.
+  std::unordered_map<ArtMethod*, ListOfDependentPairs*> cha_dependency_map_
+    GUARDED_BY(Locks::cha_lock_);
+
+  DISALLOW_COPY_AND_ASSIGN(ClassHierarchyAnalysis);
+};
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_CHA_H_
diff --git a/runtime/cha_test.cc b/runtime/cha_test.cc
new file mode 100644
index 0000000..d2f335e
--- /dev/null
+++ b/runtime/cha_test.cc
@@ -0,0 +1,93 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "cha.h"
+
+#include "common_runtime_test.h"
+
+namespace art {
+
+class CHATest : public CommonRuntimeTest {};
+
+// Mocks some methods.
+#define METHOD1 (reinterpret_cast<ArtMethod*>(8u))
+#define METHOD2 (reinterpret_cast<ArtMethod*>(16u))
+#define METHOD3 (reinterpret_cast<ArtMethod*>(24u))
+
+// Mocks some method headers.
+#define METHOD_HEADER1 (reinterpret_cast<OatQuickMethodHeader*>(128u))
+#define METHOD_HEADER2 (reinterpret_cast<OatQuickMethodHeader*>(136u))
+#define METHOD_HEADER3 (reinterpret_cast<OatQuickMethodHeader*>(144u))
+
+TEST_F(CHATest, CHACheckDependency) {
+  ClassHierarchyAnalysis cha;
+  MutexLock cha_mu(Thread::Current(), *Locks::cha_lock_);
+
+  ASSERT_EQ(cha.GetDependents(METHOD1), nullptr);
+  ASSERT_EQ(cha.GetDependents(METHOD2), nullptr);
+  ASSERT_EQ(cha.GetDependents(METHOD3), nullptr);
+
+  cha.AddDependency(METHOD1, METHOD2, METHOD_HEADER2);
+  ASSERT_EQ(cha.GetDependents(METHOD2), nullptr);
+  ASSERT_EQ(cha.GetDependents(METHOD3), nullptr);
+  auto dependents = cha.GetDependents(METHOD1);
+  ASSERT_EQ(dependents->size(), 1u);
+  ASSERT_EQ(dependents->at(0).first, METHOD2);
+  ASSERT_EQ(dependents->at(0).second, METHOD_HEADER2);
+
+  cha.AddDependency(METHOD1, METHOD3, METHOD_HEADER3);
+  ASSERT_EQ(cha.GetDependents(METHOD2), nullptr);
+  ASSERT_EQ(cha.GetDependents(METHOD3), nullptr);
+  dependents = cha.GetDependents(METHOD1);
+  ASSERT_EQ(dependents->size(), 2u);
+  ASSERT_EQ(dependents->at(0).first, METHOD2);
+  ASSERT_EQ(dependents->at(0).second, METHOD_HEADER2);
+  ASSERT_EQ(dependents->at(1).first, METHOD3);
+  ASSERT_EQ(dependents->at(1).second, METHOD_HEADER3);
+
+  std::unordered_set<OatQuickMethodHeader*> headers;
+  headers.insert(METHOD_HEADER2);
+  cha.RemoveDependentsWithMethodHeaders(headers);
+  ASSERT_EQ(cha.GetDependents(METHOD2), nullptr);
+  ASSERT_EQ(cha.GetDependents(METHOD3), nullptr);
+  dependents = cha.GetDependents(METHOD1);
+  ASSERT_EQ(dependents->size(), 1u);
+  ASSERT_EQ(dependents->at(0).first, METHOD3);
+  ASSERT_EQ(dependents->at(0).second, METHOD_HEADER3);
+
+  cha.AddDependency(METHOD2, METHOD1, METHOD_HEADER1);
+  ASSERT_EQ(cha.GetDependents(METHOD3), nullptr);
+  dependents = cha.GetDependents(METHOD1);
+  ASSERT_EQ(dependents->size(), 1u);
+  dependents = cha.GetDependents(METHOD2);
+  ASSERT_EQ(dependents->size(), 1u);
+
+  headers.insert(METHOD_HEADER3);
+  cha.RemoveDependentsWithMethodHeaders(headers);
+  ASSERT_EQ(cha.GetDependents(METHOD1), nullptr);
+  ASSERT_EQ(cha.GetDependents(METHOD3), nullptr);
+  dependents = cha.GetDependents(METHOD2);
+  ASSERT_EQ(dependents->size(), 1u);
+  ASSERT_EQ(dependents->at(0).first, METHOD1);
+  ASSERT_EQ(dependents->at(0).second, METHOD_HEADER1);
+
+  cha.RemoveDependencyFor(METHOD2);
+  ASSERT_EQ(cha.GetDependents(METHOD1), nullptr);
+  ASSERT_EQ(cha.GetDependents(METHOD2), nullptr);
+  ASSERT_EQ(cha.GetDependents(METHOD3), nullptr);
+}
+
+}  // namespace art
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index 0de647f..f13060a 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -40,6 +40,7 @@
 #include "base/time_utils.h"
 #include "base/unix_file/fd_file.h"
 #include "base/value_object.h"
+#include "cha.h"
 #include "class_linker-inl.h"
 #include "class_table-inl.h"
 #include "compiler_callbacks.h"
@@ -96,6 +97,7 @@
 #include "ScopedLocalRef.h"
 #include "scoped_thread_state_change-inl.h"
 #include "thread-inl.h"
+#include "thread_list.h"
 #include "trace.h"
 #include "utils.h"
 #include "utils/dex_cache_arrays_layout-inl.h"
@@ -5111,6 +5113,12 @@
     if (klass->ShouldHaveImt()) {
       klass->SetImt(imt, image_pointer_size_);
     }
+
+    // Update CHA info based on whether we override methods.
+    // Have to do this before setting the class as resolved which allows
+    // instantiation of klass.
+    Runtime::Current()->GetClassHierarchyAnalysis()->UpdateAfterLoadingOf(klass);
+
     // This will notify waiters on klass that saw the not yet resolved
     // class in the class_table_ during EnsureResolved.
     mirror::Class::SetStatus(klass, mirror::Class::kStatusResolved, self);
@@ -5154,6 +5162,11 @@
       }
     }
 
+    // Update CHA info based on whether we override methods.
+    // Have to do this before setting the class as resolved which allows
+    // instantiation of klass.
+    Runtime::Current()->GetClassHierarchyAnalysis()->UpdateAfterLoadingOf(h_new_class);
+
     // This will notify waiters on temp class that saw the not yet resolved class in the
     // class_table_ during EnsureResolved.
     mirror::Class::SetStatus(klass, mirror::Class::kStatusRetired, self);
diff --git a/runtime/jit/jit_code_cache.cc b/runtime/jit/jit_code_cache.cc
index 2ae989a..19f3099 100644
--- a/runtime/jit/jit_code_cache.cc
+++ b/runtime/jit/jit_code_cache.cc
@@ -23,6 +23,7 @@
 #include "base/stl_util.h"
 #include "base/systrace.h"
 #include "base/time_utils.h"
+#include "cha.h"
 #include "debugger_interface.h"
 #include "entrypoints/runtime_asm_entrypoints.h"
 #include "gc/accounting/bitmap-inl.h"
@@ -217,7 +218,9 @@
                                   const uint8_t* code,
                                   size_t code_size,
                                   bool osr,
-                                  Handle<mirror::ObjectArray<mirror::Object>> roots) {
+                                  Handle<mirror::ObjectArray<mirror::Object>> roots,
+                                  bool has_should_deoptimize_flag,
+                                  const ArenaSet<ArtMethod*>& cha_single_implementation_list) {
   uint8_t* result = CommitCodeInternal(self,
                                        method,
                                        stack_map,
@@ -228,7 +231,9 @@
                                        code,
                                        code_size,
                                        osr,
-                                       roots);
+                                       roots,
+                                       has_should_deoptimize_flag,
+                                       cha_single_implementation_list);
   if (result == nullptr) {
     // Retry.
     GarbageCollectCache(self);
@@ -242,7 +247,9 @@
                                 code,
                                 code_size,
                                 osr,
-                                roots);
+                                roots,
+                                has_should_deoptimize_flag,
+                                cha_single_implementation_list);
   }
   return result;
 }
@@ -359,7 +366,7 @@
   }
 }
 
-void JitCodeCache::FreeCode(const void* code_ptr, ArtMethod* method ATTRIBUTE_UNUSED) {
+void JitCodeCache::FreeCode(const void* code_ptr) {
   uintptr_t allocation = FromCodeToAllocation(code_ptr);
   // Notify native debugger that we are about to remove the code.
   // It does nothing if we are not using native debugger.
@@ -368,41 +375,69 @@
   FreeCode(reinterpret_cast<uint8_t*>(allocation));
 }
 
+void JitCodeCache::FreeAllMethodHeaders(
+    const std::unordered_set<OatQuickMethodHeader*>& method_headers) {
+  {
+    MutexLock mu(Thread::Current(), *Locks::cha_lock_);
+    Runtime::Current()->GetClassHierarchyAnalysis()
+        ->RemoveDependentsWithMethodHeaders(method_headers);
+  }
+
+  // We need to remove entries in method_headers from CHA dependencies
+  // first since once we do FreeCode() below, the memory can be reused
+  // so it's possible for the same method_header to start representing
+  // different compile code.
+  MutexLock mu(Thread::Current(), lock_);
+  ScopedCodeCacheWrite scc(code_map_.get());
+  for (const OatQuickMethodHeader* method_header : method_headers) {
+    FreeCode(method_header->GetCode());
+  }
+}
+
 void JitCodeCache::RemoveMethodsIn(Thread* self, const LinearAlloc& alloc) {
   ScopedTrace trace(__PRETTY_FUNCTION__);
-  MutexLock mu(self, lock_);
-  // We do not check if a code cache GC is in progress, as this method comes
-  // with the classlinker_classes_lock_ held, and suspending ourselves could
-  // lead to a deadlock.
+  // We use a set to first collect all method_headers whose code need to be
+  // removed. We need to free the underlying code after we remove CHA dependencies
+  // for entries in this set. And it's more efficient to iterate through
+  // the CHA dependency map just once with an unordered_set.
+  std::unordered_set<OatQuickMethodHeader*> method_headers;
   {
-    ScopedCodeCacheWrite scc(code_map_.get());
-    for (auto it = method_code_map_.begin(); it != method_code_map_.end();) {
-      if (alloc.ContainsUnsafe(it->second)) {
-        FreeCode(it->first, it->second);
-        it = method_code_map_.erase(it);
+    MutexLock mu(self, lock_);
+    // We do not check if a code cache GC is in progress, as this method comes
+    // with the classlinker_classes_lock_ held, and suspending ourselves could
+    // lead to a deadlock.
+    {
+      ScopedCodeCacheWrite scc(code_map_.get());
+      for (auto it = method_code_map_.begin(); it != method_code_map_.end();) {
+        if (alloc.ContainsUnsafe(it->second)) {
+          method_headers.insert(OatQuickMethodHeader::FromCodePointer(it->first));
+          it = method_code_map_.erase(it);
+        } else {
+          ++it;
+        }
+      }
+    }
+    for (auto it = osr_code_map_.begin(); it != osr_code_map_.end();) {
+      if (alloc.ContainsUnsafe(it->first)) {
+        // Note that the code has already been pushed to method_headers in the loop
+        // above and is going to be removed in FreeCode() below.
+        it = osr_code_map_.erase(it);
+      } else {
+        ++it;
+      }
+    }
+    for (auto it = profiling_infos_.begin(); it != profiling_infos_.end();) {
+      ProfilingInfo* info = *it;
+      if (alloc.ContainsUnsafe(info->GetMethod())) {
+        info->GetMethod()->SetProfilingInfo(nullptr);
+        FreeData(reinterpret_cast<uint8_t*>(info));
+        it = profiling_infos_.erase(it);
       } else {
         ++it;
       }
     }
   }
-  for (auto it = osr_code_map_.begin(); it != osr_code_map_.end();) {
-    if (alloc.ContainsUnsafe(it->first)) {
-      // Note that the code has already been removed in the loop above.
-      it = osr_code_map_.erase(it);
-    } else {
-      ++it;
-    }
-  }
-  for (auto it = profiling_infos_.begin(); it != profiling_infos_.end();) {
-    ProfilingInfo* info = *it;
-    if (alloc.ContainsUnsafe(info->GetMethod())) {
-      info->GetMethod()->SetProfilingInfo(nullptr);
-      FreeData(reinterpret_cast<uint8_t*>(info));
-      it = profiling_infos_.erase(it);
-    } else {
-      ++it;
-    }
-  }
+  FreeAllMethodHeaders(method_headers);
 }
 
 bool JitCodeCache::IsWeakAccessEnabled(Thread* self) const {
@@ -464,7 +499,10 @@
                                           const uint8_t* code,
                                           size_t code_size,
                                           bool osr,
-                                          Handle<mirror::ObjectArray<mirror::Object>> roots) {
+                                          Handle<mirror::ObjectArray<mirror::Object>> roots,
+                                          bool has_should_deoptimize_flag,
+                                          const ArenaSet<ArtMethod*>&
+                                              cha_single_implementation_list) {
   DCHECK(stack_map != nullptr);
   size_t alignment = GetInstructionSetAlignment(kRuntimeISA);
   // Ensure the header ends up at expected instruction alignment.
@@ -506,12 +544,44 @@
       // https://patchwork.kernel.org/patch/9047921/
       FlushInstructionCache(reinterpret_cast<char*>(code_ptr),
                             reinterpret_cast<char*>(code_ptr + code_size));
+      DCHECK(!Runtime::Current()->IsAotCompiler());
+      if (has_should_deoptimize_flag) {
+        method_header->SetHasShouldDeoptimizeFlag();
+      }
     }
 
     number_of_compilations_++;
   }
   // We need to update the entry point in the runnable state for the instrumentation.
   {
+    // Need cha_lock_ for checking all single-implementation flags and register
+    // dependencies.
+    MutexLock cha_mu(self, *Locks::cha_lock_);
+    bool single_impl_still_valid = true;
+    for (ArtMethod* single_impl : cha_single_implementation_list) {
+      if (!single_impl->HasSingleImplementation()) {
+        // We simply discard the compiled code. Clear the
+        // counter so that it may be recompiled later. Hopefully the
+        // class hierarchy will be more stable when compilation is retried.
+        single_impl_still_valid = false;
+        method->ClearCounter();
+        break;
+      }
+    }
+
+    // Discard the code if any single-implementation assumptions are now invalid.
+    if (!single_impl_still_valid) {
+      VLOG(jit) << "JIT discarded jitted code due to invalid single-implementation assumptions.";
+      return nullptr;
+    }
+    for (ArtMethod* single_impl : cha_single_implementation_list) {
+      Runtime::Current()->GetClassHierarchyAnalysis()->AddDependency(
+          single_impl, method, method_header);
+    }
+
+    // The following needs to be guarded by cha_lock_ also. Otherwise it's
+    // possible that the compiled code is considered invalidated by some class linking,
+    // but below we still make the compiled code valid for the method.
     MutexLock mu(self, lock_);
     method_code_map_.Put(code_ptr, method);
     // Fill the root table before updating the entry point.
@@ -535,7 +605,8 @@
         << " ccache_size=" << PrettySize(CodeCacheSizeLocked()) << ": "
         << " dcache_size=" << PrettySize(DataCacheSizeLocked()) << ": "
         << reinterpret_cast<const void*>(method_header->GetEntryPoint()) << ","
-        << reinterpret_cast<const void*>(method_header->GetEntryPoint() + method_header->code_size_);
+        << reinterpret_cast<const void*>(method_header->GetEntryPoint() +
+                                         method_header->GetCodeSize());
     histogram_code_memory_use_.AddValue(code_size);
     if (code_size > kCodeSizeLogThreshold) {
       LOG(INFO) << "JIT allocated "
@@ -836,20 +907,23 @@
 
 void JitCodeCache::RemoveUnmarkedCode(Thread* self) {
   ScopedTrace trace(__FUNCTION__);
-  MutexLock mu(self, lock_);
-  ScopedCodeCacheWrite scc(code_map_.get());
-  // Iterate over all compiled code and remove entries that are not marked.
-  for (auto it = method_code_map_.begin(); it != method_code_map_.end();) {
-    const void* code_ptr = it->first;
-    ArtMethod* method = it->second;
-    uintptr_t allocation = FromCodeToAllocation(code_ptr);
-    if (GetLiveBitmap()->Test(allocation)) {
-      ++it;
-    } else {
-      FreeCode(code_ptr, method);
-      it = method_code_map_.erase(it);
+  std::unordered_set<OatQuickMethodHeader*> method_headers;
+  {
+    MutexLock mu(self, lock_);
+    ScopedCodeCacheWrite scc(code_map_.get());
+    // Iterate over all compiled code and remove entries that are not marked.
+    for (auto it = method_code_map_.begin(); it != method_code_map_.end();) {
+      const void* code_ptr = it->first;
+      uintptr_t allocation = FromCodeToAllocation(code_ptr);
+      if (GetLiveBitmap()->Test(allocation)) {
+        ++it;
+      } else {
+        method_headers.insert(OatQuickMethodHeader::FromCodePointer(it->first));
+        it = method_code_map_.erase(it);
+      }
     }
   }
+  FreeAllMethodHeaders(method_headers);
 }
 
 void JitCodeCache::DoCollection(Thread* self, bool collect_profiling_info) {
diff --git a/runtime/jit/jit_code_cache.h b/runtime/jit/jit_code_cache.h
index be2cec5..30e2efb 100644
--- a/runtime/jit/jit_code_cache.h
+++ b/runtime/jit/jit_code_cache.h
@@ -20,6 +20,7 @@
 #include "instrumentation.h"
 
 #include "atomic.h"
+#include "base/arena_containers.h"
 #include "base/histogram-inl.h"
 #include "base/macros.h"
 #include "base/mutex.h"
@@ -91,6 +92,11 @@
       REQUIRES(!lock_);
 
   // Allocate and write code and its metadata to the code cache.
+  // `cha_single_implementation_list` needs to be registered via CHA (if it's
+  // still valid), since the compiled code still needs to be invalidated if the
+  // single-implementation assumptions are violated later. This needs to be done
+  // even if `has_should_deoptimize_flag` is false, which can happen due to CHA
+  // guard elimination.
   uint8_t* CommitCode(Thread* self,
                       ArtMethod* method,
                       uint8_t* stack_map,
@@ -101,7 +107,9 @@
                       const uint8_t* code,
                       size_t code_size,
                       bool osr,
-                      Handle<mirror::ObjectArray<mirror::Object>> roots)
+                      Handle<mirror::ObjectArray<mirror::Object>> roots,
+                      bool has_should_deoptimize_flag,
+                      const ArenaSet<ArtMethod*>& cha_single_implementation_list)
       REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(!lock_);
 
@@ -230,7 +238,9 @@
                               const uint8_t* code,
                               size_t code_size,
                               bool osr,
-                              Handle<mirror::ObjectArray<mirror::Object>> roots)
+                              Handle<mirror::ObjectArray<mirror::Object>> roots,
+                              bool has_should_deoptimize_flag,
+                              const ArenaSet<ArtMethod*>& cha_single_implementation_list)
       REQUIRES(!lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
@@ -245,8 +255,13 @@
   bool WaitForPotentialCollectionToComplete(Thread* self)
       REQUIRES(lock_) REQUIRES(!Locks::mutator_lock_);
 
-  // Free in the mspace allocations taken by 'method'.
-  void FreeCode(const void* code_ptr, ArtMethod* method) REQUIRES(lock_);
+  // Remove CHA dependents and underlying allocations for entries in `method_headers`.
+  void FreeAllMethodHeaders(const std::unordered_set<OatQuickMethodHeader*>& method_headers)
+      REQUIRES(!lock_)
+      REQUIRES(!Locks::cha_lock_);
+
+  // Free in the mspace allocations for `code_ptr`.
+  void FreeCode(const void* code_ptr) REQUIRES(lock_);
 
   // Number of bytes allocated in the code cache.
   size_t CodeCacheSizeLocked() REQUIRES(lock_);
diff --git a/runtime/mirror/class-inl.h b/runtime/mirror/class-inl.h
index 5def65e..5fdf8f3 100644
--- a/runtime/mirror/class-inl.h
+++ b/runtime/mirror/class-inl.h
@@ -238,7 +238,7 @@
 template<VerifyObjectFlags kVerifyFlags,
          ReadBarrierOption kReadBarrierOption>
 inline PointerArray* Class::GetVTable() {
-  DCHECK(IsResolved<kVerifyFlags>() || IsErroneous<kVerifyFlags>());
+  DCHECK(IsLoaded<kVerifyFlags>() || IsErroneous<kVerifyFlags>());
   return GetFieldObject<PointerArray, kVerifyFlags, kReadBarrierOption>(
       OFFSET_OF_OBJECT_MEMBER(Class, vtable_));
 }
diff --git a/runtime/modifiers.h b/runtime/modifiers.h
index a1110d9..8a01043 100644
--- a/runtime/modifiers.h
+++ b/runtime/modifiers.h
@@ -71,6 +71,11 @@
 // class.
 // TODO Might want to re-arrange some of these so that we can have obsolete + intrinsic methods.
 static constexpr uint32_t kAccObsoleteMethod =        0x04000000;  // method (runtime)
+
+// Set by the class linker for a method that has only one implementation for a
+// virtual call.
+static constexpr uint32_t kAccSingleImplementation =  0x08000000;  // method (runtime)
+
 static constexpr uint32_t kAccIntrinsic  =            0x80000000;  // method (runtime)
 
 // Special runtime-only flags.
diff --git a/runtime/native_stack_dump.cc b/runtime/native_stack_dump.cc
index 2376889..5565565 100644
--- a/runtime/native_stack_dump.cc
+++ b/runtime/native_stack_dump.cc
@@ -272,7 +272,7 @@
   if (code == 0) {
     return pc == 0;
   }
-  uintptr_t code_size = reinterpret_cast<const OatQuickMethodHeader*>(code)[-1].code_size_;
+  uintptr_t code_size = reinterpret_cast<const OatQuickMethodHeader*>(code)[-1].GetCodeSize();
   return code <= pc && pc <= (code + code_size);
 }
 
diff --git a/runtime/oat_file-inl.h b/runtime/oat_file-inl.h
index d7d0c4f..721fab9 100644
--- a/runtime/oat_file-inl.h
+++ b/runtime/oat_file-inl.h
@@ -44,7 +44,7 @@
   if (method_header == nullptr) {
     return 0u;
   }
-  return reinterpret_cast<const uint8_t*>(&method_header->code_size_) - begin_;
+  return reinterpret_cast<const uint8_t*>(method_header->GetCodeSizeAddr()) - begin_;
 }
 
 inline size_t OatFile::OatMethod::GetFrameSizeInBytes() const {
@@ -52,7 +52,7 @@
   if (code == nullptr) {
     return 0u;
   }
-  return reinterpret_cast<const OatQuickMethodHeader*>(code)[-1].frame_info_.FrameSizeInBytes();
+  return reinterpret_cast<const OatQuickMethodHeader*>(code)[-1].GetFrameInfo().FrameSizeInBytes();
 }
 
 inline uint32_t OatFile::OatMethod::GetCoreSpillMask() const {
@@ -60,7 +60,7 @@
   if (code == nullptr) {
     return 0u;
   }
-  return reinterpret_cast<const OatQuickMethodHeader*>(code)[-1].frame_info_.CoreSpillMask();
+  return reinterpret_cast<const OatQuickMethodHeader*>(code)[-1].GetFrameInfo().CoreSpillMask();
 }
 
 inline uint32_t OatFile::OatMethod::GetFpSpillMask() const {
@@ -68,7 +68,7 @@
   if (code == nullptr) {
     return 0u;
   }
-  return reinterpret_cast<const OatQuickMethodHeader*>(code)[-1].frame_info_.FpSpillMask();
+  return reinterpret_cast<const OatQuickMethodHeader*>(code)[-1].GetFrameInfo().FpSpillMask();
 }
 
 inline uint32_t OatFile::OatMethod::GetVmapTableOffset() const {
@@ -81,7 +81,7 @@
   if (method_header == nullptr) {
     return 0u;
   }
-  return reinterpret_cast<const uint8_t*>(&method_header->vmap_table_offset_) - begin_;
+  return reinterpret_cast<const uint8_t*>(method_header->GetVmapTableOffsetAddr()) - begin_;
 }
 
 inline const uint8_t* OatFile::OatMethod::GetVmapTable() const {
@@ -89,7 +89,7 @@
   if (code == nullptr) {
     return nullptr;
   }
-  uint32_t offset = reinterpret_cast<const OatQuickMethodHeader*>(code)[-1].vmap_table_offset_;
+  uint32_t offset = reinterpret_cast<const OatQuickMethodHeader*>(code)[-1].GetVmapTableOffset();
   if (UNLIKELY(offset == 0u)) {
     return nullptr;
   }
@@ -101,7 +101,7 @@
   if (code == nullptr) {
     return 0u;
   }
-  return reinterpret_cast<const OatQuickMethodHeader*>(code)[-1].code_size_;
+  return reinterpret_cast<const OatQuickMethodHeader*>(code)[-1].GetCodeSize();
 }
 
 inline uint32_t OatFile::OatMethod::GetCodeOffset() const {
diff --git a/runtime/oat_quick_method_header.h b/runtime/oat_quick_method_header.h
index 4afca7d..3cdde5a 100644
--- a/runtime/oat_quick_method_header.h
+++ b/runtime/oat_quick_method_header.h
@@ -58,7 +58,7 @@
   }
 
   bool IsOptimized() const {
-    return code_size_ != 0 && vmap_table_offset_ != 0;
+    return GetCodeSize() != 0 && vmap_table_offset_ != 0;
   }
 
   const void* GetOptimizedCodeInfoPtr() const {
@@ -81,7 +81,23 @@
   }
 
   uint32_t GetCodeSize() const {
-    return code_size_;
+    return code_size_ & kCodeSizeMask;
+  }
+
+  const uint32_t* GetCodeSizeAddr() const {
+    return &code_size_;
+  }
+
+  uint32_t GetVmapTableOffset() const {
+    return vmap_table_offset_;
+  }
+
+  void SetVmapTableOffset(uint32_t offset) {
+    vmap_table_offset_ = offset;
+  }
+
+  const uint32_t* GetVmapTableOffsetAddr() const {
+    return &vmap_table_offset_;
   }
 
   const uint8_t* GetVmapTable() const {
@@ -96,7 +112,7 @@
       // On Thumb-2, the pc is offset by one.
       code_start++;
     }
-    return code_start <= pc && pc <= (code_start + code_size_);
+    return code_start <= pc && pc <= (code_start + GetCodeSize());
   }
 
   const uint8_t* GetEntryPoint() const {
@@ -130,11 +146,25 @@
 
   uint32_t ToDexPc(ArtMethod* method, const uintptr_t pc, bool abort_on_failure = true) const;
 
+  void SetHasShouldDeoptimizeFlag() {
+    DCHECK_EQ(code_size_ & kShouldDeoptimizeMask, 0u);
+    code_size_ |= kShouldDeoptimizeMask;
+  }
+
+  bool HasShouldDeoptimizeFlag() const {
+    return (code_size_ & kShouldDeoptimizeMask) != 0;
+  }
+
+ private:
+  static constexpr uint32_t kShouldDeoptimizeMask = 0x80000000;
+  static constexpr uint32_t kCodeSizeMask = ~kShouldDeoptimizeMask;
+
   // The offset in bytes from the start of the vmap table to the end of the header.
   uint32_t vmap_table_offset_;
   // The stack frame information.
   QuickMethodFrameInfo frame_info_;
-  // The code size in bytes.
+  // The code size in bytes. The highest bit is used to signify if the compiled
+  // code with the method header has should_deoptimize flag.
   uint32_t code_size_;
   // The actual code.
   uint8_t code_[0];
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 68b956b..92e00ec 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -62,6 +62,7 @@
 #include "base/stl_util.h"
 #include "base/systrace.h"
 #include "base/unix_file/fd_file.h"
+#include "cha.h"
 #include "class_linker-inl.h"
 #include "compiler_callbacks.h"
 #include "debugger.h"
@@ -349,6 +350,7 @@
   delete monitor_list_;
   delete monitor_pool_;
   delete class_linker_;
+  delete cha_;
   delete heap_;
   delete intern_table_;
   delete oat_file_manager_;
@@ -1203,6 +1205,7 @@
 
   CHECK_GE(GetHeap()->GetContinuousSpaces().size(), 1U);
   class_linker_ = new ClassLinker(intern_table_);
+  cha_ = new ClassHierarchyAnalysis;
   if (GetHeap()->HasBootImageSpace()) {
     bool result = class_linker_->InitFromBootImage(&error_msg);
     if (!result) {
diff --git a/runtime/runtime.h b/runtime/runtime.h
index 4f31887..e6b3128 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -76,6 +76,7 @@
 }  // namespace verifier
 class ArenaPool;
 class ArtMethod;
+class ClassHierarchyAnalysis;
 class ClassLinker;
 class Closure;
 class CompilerCallbacks;
@@ -674,6 +675,10 @@
   void AddSystemWeakHolder(gc::AbstractSystemWeakHolder* holder);
   void RemoveSystemWeakHolder(gc::AbstractSystemWeakHolder* holder);
 
+  ClassHierarchyAnalysis* GetClassHierarchyAnalysis() {
+    return cha_;
+  }
+
   NO_RETURN
   static void Aborter(const char* abort_message);
 
@@ -921,6 +926,8 @@
   // Generic system-weak holders.
   std::vector<gc::AbstractSystemWeakHolder*> system_weak_holders_;
 
+  ClassHierarchyAnalysis* cha_;
+
   DISALLOW_COPY_AND_ASSIGN(Runtime);
 };
 std::ostream& operator<<(std::ostream& os, const Runtime::CalleeSaveType& rhs);
diff --git a/runtime/stack.cc b/runtime/stack.cc
index 167a30b..f20aa20 100644
--- a/runtime/stack.cc
+++ b/runtime/stack.cc
@@ -648,7 +648,7 @@
     return;
   }
 
-  uint32_t code_size = OatQuickMethodHeader::FromEntryPoint(code)->code_size_;
+  uint32_t code_size = OatQuickMethodHeader::FromEntryPoint(code)->GetCodeSize();
   uintptr_t code_start = reinterpret_cast<uintptr_t>(code);
   CHECK(code_start <= pc && pc <= (code_start + code_size))
       << method->PrettyMethod()
diff --git a/runtime/stack.h b/runtime/stack.h
index e879214..d02e4b7 100644
--- a/runtime/stack.h
+++ b/runtime/stack.h
@@ -66,6 +66,11 @@
 struct ShadowFrameDeleter;
 using ShadowFrameAllocaUniquePtr = std::unique_ptr<ShadowFrame, ShadowFrameDeleter>;
 
+// Size in bytes of the should_deoptimize flag on stack.
+// We just need 4 bytes for our purpose regardless of the architecture. Frame size
+// calculation will automatically do alignment for the final frame size.
+static constexpr size_t kShouldDeoptimizeFlagSize = 4;
+
 // Counting locks by storing object pointers into a vector. Duplicate entries mark recursive locks.
 // The vector will be visited with the ShadowFrame during GC (so all the locked-on objects are
 // thread roots).
diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc
index 7137db8..461c172 100644
--- a/runtime/verifier/method_verifier.cc
+++ b/runtime/verifier/method_verifier.cc
@@ -410,15 +410,15 @@
       result.kind = kSoftFailure;
       if (method != nullptr &&
           !CanCompilerHandleVerificationFailure(verifier.encountered_failure_types_)) {
-        method->SetAccessFlags(method->GetAccessFlags() | kAccCompileDontBother);
+        method->AddAccessFlags(kAccCompileDontBother);
       }
     }
     if (method != nullptr) {
       if (verifier.HasInstructionThatWillThrow()) {
-        method->SetAccessFlags(method->GetAccessFlags() | kAccCompileDontBother);
+        method->AddAccessFlags(kAccCompileDontBother);
       }
       if ((verifier.encountered_failure_types_ & VerifyError::VERIFY_ERROR_LOCKING) != 0) {
-        method->SetAccessFlags(method->GetAccessFlags() | kAccMustCountLocks);
+        method->AddAccessFlags(kAccMustCountLocks);
       }
     }
   } else {