Class Hierarchy Analysis (CHA)

The class linker now tracks whether a method has a single implementation and if so, the JIT compiler will try to devirtualize a virtual call for the method into a direct call. If the single-implementation assumption is violated due to additional class linking, compiled code that makes the assumption is invalidated. Deoptimization is triggered for compiled code live on stack. Instead of patching return pc's on stack, a CHA guard is added which checks a hidden should_deoptimize flag for deoptimization. This approach limits the number of deoptimization points. This CL does not devirtualize abstract/interface method invocation. Slides on CHA: https://docs.google.com/a/google.com/presentation/d/1Ax6cabP1vM44aLOaJU3B26n5fTE9w5YU-1CRevIDsBc/edit?usp=sharing Change-Id: I18bf716a601b6413b46312e925a6ad9e4008efa4 Test: ART_TEST_JIT=true m test-art-host/target-run-test test-art-host-gtest
author: Mingyao Yang <mingyao@google.com> 2016-08-02 11:02:54 -0700
committer: Mingyao Yang <mingyao@google.com> 2016-12-01 11:15:47 -0800
commit: 063fc772b5b8aed7d769cd7cccb6ddc7619326ee (patch)
tree: bc165781989087a998721991504e589a7d5b0926
parent: 48d08a4233ee4450b0d5073d41445f9dd1f17191 (diff)
43 files changed, 1444 insertions, 120 deletions
diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc
index 153aff40dc..a7a451fb76 100644
--- a/compiler/oat_writer.cc
+++ b/compiler/oat_writer.cc
@@ -793,7 +793,7 @@ class OatWriter::InitCodeMethodVisitor : public OatDexMethodVisitor {
       // Update quick method header.
       DCHECK_LT(method_offsets_index_, oat_class->method_headers_.size());
       OatQuickMethodHeader* method_header = &oat_class->method_headers_[method_offsets_index_];
-      uint32_t vmap_table_offset = method_header->vmap_table_offset_;
+      uint32_t vmap_table_offset = method_header->GetVmapTableOffset();
       // The code offset was 0 when the mapping/vmap table offset was set, so it's set
       // to 0-offset and we need to adjust it by code_offset.
       uint32_t code_offset = quick_code_offset - thumb_offset;
@@ -935,7 +935,7 @@ class OatWriter::InitMapMethodVisitor : public OatDexMethodVisitor {
       // If vdex is enabled, we only emit the stack map of compiled code. The quickening info will
       // be in the vdex file.
       if (!compiled_method->GetQuickCode().empty() || !kIsVdexEnabled) {
-        DCHECK_EQ(oat_class->method_headers_[method_offsets_index_].vmap_table_offset_, 0u);
+        DCHECK_EQ(oat_class->method_headers_[method_offsets_index_].GetVmapTableOffset(), 0u);
 
         ArrayRef<const uint8_t> map = compiled_method->GetVmapTable();
         uint32_t map_size = map.size() * sizeof(map[0]);
@@ -949,7 +949,7 @@ class OatWriter::InitMapMethodVisitor : public OatDexMethodVisitor {
               });
           // Code offset is not initialized yet, so set the map offset to 0u-offset.
           DCHECK_EQ(oat_class->method_offsets_[method_offsets_index_].code_offset_, 0u);
-          oat_class->method_headers_[method_offsets_index_].vmap_table_offset_ = 0u - offset;
+          oat_class->method_headers_[method_offsets_index_].SetVmapTableOffset(0u - offset);
         }
       }
       ++method_offsets_index_;
@@ -1406,7 +1406,7 @@ class OatWriter::WriteMapMethodVisitor : public OatDexMethodVisitor {
       size_t file_offset = file_offset_;
       OutputStream* out = out_;
 
-      uint32_t map_offset = oat_class->method_headers_[method_offsets_index_].vmap_table_offset_;
+      uint32_t map_offset = oat_class->method_headers_[method_offsets_index_].GetVmapTableOffset();
       uint32_t code_offset = oat_class->method_offsets_[method_offsets_index_].code_offset_;
       ++method_offsets_index_;
 
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 9f6b78a82c..fa6a5225e7 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -304,6 +304,7 @@ void CodeGenerator::InitializeCodeGeneration(size_t number_of_spill_slots,
     SetFrameSize(RoundUp(
         first_register_slot_in_slow_path_
         + maximum_safepoint_spill_size
+        + (GetGraph()->HasShouldDeoptimizeFlag() ? kShouldDeoptimizeFlagSize : 0)
         + FrameEntrySpillSize(),
         kStackAlignment));
   }
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index a5d19abe92..4b11e7c699 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -307,6 +307,12 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> {
     return POPCOUNT(GetSlowPathSpills(locations, core_registers));
   }
 
+  size_t GetStackOffsetOfShouldDeoptimizeFlag() const {
+    DCHECK(GetGraph()->HasShouldDeoptimizeFlag());
+    DCHECK_GE(GetFrameSize(), FrameEntrySpillSize() + kShouldDeoptimizeFlagSize);
+    return GetFrameSize() - FrameEntrySpillSize() - kShouldDeoptimizeFlagSize;
+  }
+
   // Record native to dex mapping for a suspend point.  Required by runtime.
   void RecordPcInfo(HInstruction* instruction, uint32_t dex_pc, SlowPathCode* slow_path = nullptr);
   // Check whether we have already recorded mapping at this PC.
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 1f5981682b..ed6eef1b55 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -1329,6 +1329,13 @@ void CodeGeneratorARM::GenerateFrameEntry() {
     __ cfi().AdjustCFAOffset(kArmWordSize * POPCOUNT(fpu_spill_mask_));
     __ cfi().RelOffsetForMany(DWARFReg(S0), 0, fpu_spill_mask_, kArmWordSize);
   }
+
+  if (GetGraph()->HasShouldDeoptimizeFlag()) {
+    // Initialize should_deoptimize flag to 0.
+    __ mov(IP, ShifterOperand(0));
+    __ StoreToOffset(kStoreWord, IP, SP, -kShouldDeoptimizeFlagSize);
+  }
+
   int adjust = GetFrameSize() - FrameEntrySpillSize();
   __ AddConstant(SP, -adjust);
   __ cfi().AdjustCFAOffset(adjust);
@@ -1944,6 +1951,19 @@ void InstructionCodeGeneratorARM::VisitDeoptimize(HDeoptimize* deoptimize) {
                         /* false_target */ nullptr);
 }
 
+void LocationsBuilderARM::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
+  LocationSummary* locations = new (GetGraph()->GetArena())
+      LocationSummary(flag, LocationSummary::kNoCall);
+  locations->SetOut(Location::RequiresRegister());
+}
+
+void InstructionCodeGeneratorARM::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
+  __ LoadFromOffset(kLoadWord,
+                    flag->GetLocations()->Out().AsRegister<Register>(),
+                    SP,
+                    codegen_->GetStackOffsetOfShouldDeoptimizeFlag());
+}
+
 void LocationsBuilderARM::VisitSelect(HSelect* select) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(select);
   if (Primitive::IsFloatingPointType(select->GetType())) {
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index ab6a33fbd9..6eebd69a04 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -1273,6 +1273,12 @@ void CodeGeneratorARM64::GenerateFrameEntry() {
         frame_size - GetCoreSpillSize());
     GetAssembler()->SpillRegisters(GetFramePreservedFPRegisters(),
         frame_size - FrameEntrySpillSize());
+
+    if (GetGraph()->HasShouldDeoptimizeFlag()) {
+      // Initialize should_deoptimize flag to 0.
+      Register wzr = Register(VIXLRegCodeFromART(WZR), kWRegSize);
+      __ Str(wzr, MemOperand(sp, GetStackOffsetOfShouldDeoptimizeFlag()));
+    }
   }
 }
 
@@ -3235,6 +3241,17 @@ void InstructionCodeGeneratorARM64::VisitDeoptimize(HDeoptimize* deoptimize) {
                         /* false_target */ nullptr);
 }
 
+void LocationsBuilderARM64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
+  LocationSummary* locations = new (GetGraph()->GetArena())
+      LocationSummary(flag, LocationSummary::kNoCall);
+  locations->SetOut(Location::RequiresRegister());
+}
+
+void InstructionCodeGeneratorARM64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
+  __ Ldr(OutputRegister(flag),
+         MemOperand(sp, codegen_->GetStackOffsetOfShouldDeoptimizeFlag()));
+}
+
 static inline bool IsConditionOnFloatingPointValues(HInstruction* condition) {
   return condition->IsCondition() &&
          Primitive::IsFloatingPointType(condition->InputAt(0)->GetType());
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index 572d900909..61dabfabaa 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -4688,6 +4688,16 @@ void InstructionCodeGeneratorMIPS::GenConditionalMoveR6(HSelect* select) {
   }
 }
 
+void LocationsBuilderMIPS::VisitShouldDeoptimizeFlag(
+    HShouldDeoptimizeFlag* flag ATTRIBUTE_UNUSED) {
+  // TODO: to be implemented.
+}
+
+void InstructionCodeGeneratorMIPS::VisitShouldDeoptimizeFlag(
+    HShouldDeoptimizeFlag* flag ATTRIBUTE_UNUSED) {
+  // TODO: to be implemented.
+}
+
 void LocationsBuilderMIPS::VisitSelect(HSelect* select) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(select);
   CanMoveConditionally(select, codegen_->GetInstructionSetFeatures().IsR6(), locations);
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index b5e98714e6..b1f9b1db53 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -2636,6 +2636,16 @@ void InstructionCodeGeneratorMIPS64::VisitDeoptimize(HDeoptimize* deoptimize) {
                         /* false_target */ nullptr);
 }
 
+void LocationsBuilderMIPS64::VisitShouldDeoptimizeFlag(
+    HShouldDeoptimizeFlag* flag ATTRIBUTE_UNUSED) {
+  // TODO: to be implemented.
+}
+
+void InstructionCodeGeneratorMIPS64::VisitShouldDeoptimizeFlag(
+    HShouldDeoptimizeFlag* flag ATTRIBUTE_UNUSED) {
+  // TODO: to be implemented.
+}
+
 void LocationsBuilderMIPS64::VisitSelect(HSelect* select) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(select);
   if (Primitive::IsFloatingPointType(select->GetType())) {
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 12aa03c4af..d6e92ccb81 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -1059,6 +1059,11 @@ void CodeGeneratorX86::GenerateFrameEntry() {
     }
   }
 
+  if (GetGraph()->HasShouldDeoptimizeFlag()) {
+    // Initialize should_deoptimize flag to 0.
+    __ movl(Address(ESP, -kShouldDeoptimizeFlagSize), Immediate(0));
+  }
+
   int adjust = GetFrameSize() - FrameEntrySpillSize();
   __ subl(ESP, Immediate(adjust));
   __ cfi().AdjustCFAOffset(adjust);
@@ -1676,6 +1681,17 @@ void InstructionCodeGeneratorX86::VisitDeoptimize(HDeoptimize* deoptimize) {
                                /* false_target */ nullptr);
 }
 
+void LocationsBuilderX86::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
+  LocationSummary* locations = new (GetGraph()->GetArena())
+      LocationSummary(flag, LocationSummary::kNoCall);
+  locations->SetOut(Location::RequiresRegister());
+}
+
+void InstructionCodeGeneratorX86::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
+  __ movl(flag->GetLocations()->Out().AsRegister<Register>(),
+          Address(ESP, codegen_->GetStackOffsetOfShouldDeoptimizeFlag()));
+}
+
 static bool SelectCanUseCMOV(HSelect* select) {
   // There are no conditional move instructions for XMMs.
   if (Primitive::IsFloatingPointType(select->GetType())) {
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 22f7f6b52b..4474decf59 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -1326,6 +1326,12 @@ void CodeGeneratorX86_64::GenerateFrameEntry() {
     }
   }
 
+  if (GetGraph()->HasShouldDeoptimizeFlag()) {
+    // Initialize should_deoptimize flag to 0.
+    __ movl(Address(CpuRegister(RSP), xmm_spill_location - kShouldDeoptimizeFlagSize),
+            Immediate(0));
+  }
+
   // Save the current method if we need it. Note that we do not
   // do this in HCurrentMethod, as the instruction might have been removed
   // in the SSA graph.
@@ -1747,6 +1753,17 @@ void InstructionCodeGeneratorX86_64::VisitDeoptimize(HDeoptimize* deoptimize) {
                                /* false_target */ nullptr);
 }
 
+void LocationsBuilderX86_64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
+  LocationSummary* locations = new (GetGraph()->GetArena())
+      LocationSummary(flag, LocationSummary::kNoCall);
+  locations->SetOut(Location::RequiresRegister());
+}
+
+void InstructionCodeGeneratorX86_64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
+  __ movl(flag->GetLocations()->Out().AsRegister<CpuRegister>(),
+          Address(CpuRegister(RSP), codegen_->GetStackOffsetOfShouldDeoptimizeFlag()));
+}
+
 static bool SelectCanUseCMOV(HSelect* select) {
   // There are no conditional move instructions for XMMs.
   if (Primitive::IsFloatingPointType(select->GetType())) {
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index 01e89bb304..8d93867230 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -292,6 +292,21 @@ static bool IsPolymorphic(Handle<mirror::ObjectArray<mirror::Class>> classes)
       classes->Get(InlineCache::kIndividualCacheSize - 1) == nullptr;
 }
 
+ArtMethod* HInliner::TryCHADevirtualization(ArtMethod* resolved_method) {
+  if (!resolved_method->HasSingleImplementation()) {
+    return nullptr;
+  }
+  if (Runtime::Current()->IsAotCompiler()) {
+    // No CHA-based devirtulization for AOT compiler (yet).
+    return nullptr;
+  }
+  if (outermost_graph_->IsCompilingOsr()) {
+    // We do not support HDeoptimize in OSR methods.
+    return nullptr;
+  }
+  return resolved_method->GetSingleImplementation();
+}
+
 bool HInliner::TryInline(HInvoke* invoke_instruction) {
   if (invoke_instruction->IsInvokeUnresolved()) {
     return false;  // Don't bother to move further if we know the method is unresolved.
@@ -317,10 +332,29 @@ bool HInliner::TryInline(HInvoke* invoke_instruction) {
     actual_method = FindVirtualOrInterfaceTarget(invoke_instruction, resolved_method);
   }
 
+  bool cha_devirtualize = false;
+  if (actual_method == nullptr) {
+    ArtMethod* method = TryCHADevirtualization(resolved_method);
+    if (method != nullptr) {
+      cha_devirtualize = true;
+      actual_method = method;
+    }
+  }
+
   if (actual_method != nullptr) {
-    bool result = TryInlineAndReplace(invoke_instruction, actual_method, /* do_rtp */ true);
+    bool result = TryInlineAndReplace(invoke_instruction,
+                                      actual_method,
+                                      /* do_rtp */ true,
+                                      cha_devirtualize);
     if (result && !invoke_instruction->IsInvokeStaticOrDirect()) {
-      MaybeRecordStat(kInlinedInvokeVirtualOrInterface);
+      if (cha_devirtualize) {
+        // Add dependency due to devirtulization. We've assumed resolved_method
+        // has single implementation.
+        outermost_graph_->AddCHASingleImplementationDependency(resolved_method);
+        MaybeRecordStat(kCHAInline);
+      } else {
+        MaybeRecordStat(kInlinedInvokeVirtualOrInterface);
+      }
     }
     return result;
   }
@@ -438,7 +472,10 @@ bool HInliner::TryInlineMonomorphicCall(HInvoke* invoke_instruction,
   HInstruction* cursor = invoke_instruction->GetPrevious();
   HBasicBlock* bb_cursor = invoke_instruction->GetBlock();
 
-  if (!TryInlineAndReplace(invoke_instruction, resolved_method, /* do_rtp */ false)) {
+  if (!TryInlineAndReplace(invoke_instruction,
+                           resolved_method,
+                           /* do_rtp */ false,
+                           /* cha_devirtualize */ false)) {
     return false;
   }
 
@@ -465,6 +502,25 @@ bool HInliner::TryInlineMonomorphicCall(HInvoke* invoke_instruction,
   return true;
 }
 
+void HInliner::AddCHAGuard(HInstruction* invoke_instruction,
+                           uint32_t dex_pc,
+                           HInstruction* cursor,
+                           HBasicBlock* bb_cursor) {
+  HInstruction* deopt_flag = new (graph_->GetArena()) HShouldDeoptimizeFlag(dex_pc);
+  HInstruction* should_deopt = new (graph_->GetArena()) HNotEqual(
+      deopt_flag, graph_->GetIntConstant(0, dex_pc));
+  HInstruction* deopt = new (graph_->GetArena()) HDeoptimize(should_deopt, dex_pc);
+
+  if (cursor != nullptr) {
+    bb_cursor->InsertInstructionAfter(deopt_flag, cursor);
+  } else {
+    bb_cursor->InsertInstructionBefore(deopt_flag, bb_cursor->GetFirstInstruction());
+  }
+  bb_cursor->InsertInstructionAfter(should_deopt, deopt_flag);
+  bb_cursor->InsertInstructionAfter(deopt, should_deopt);
+  deopt->CopyEnvironmentFrom(invoke_instruction->GetEnvironment());
+}
+
 HInstruction* HInliner::AddTypeGuard(HInstruction* receiver,
                                      HInstruction* cursor,
                                      HBasicBlock* bb_cursor,
@@ -787,8 +843,14 @@ bool HInliner::TryInlinePolymorphicCallToSameTarget(
   return true;
 }
 
-bool HInliner::TryInlineAndReplace(HInvoke* invoke_instruction, ArtMethod* method, bool do_rtp) {
+bool HInliner::TryInlineAndReplace(HInvoke* invoke_instruction,
+                                   ArtMethod* method,
+                                   bool do_rtp,
+                                   bool cha_devirtualize) {
   HInstruction* return_replacement = nullptr;
+  uint32_t dex_pc = invoke_instruction->GetDexPc();
+  HInstruction* cursor = invoke_instruction->GetPrevious();
+  HBasicBlock* bb_cursor = invoke_instruction->GetBlock();
   if (!TryBuildAndInline(invoke_instruction, method, &return_replacement)) {
     if (invoke_instruction->IsInvokeInterface()) {
       // Turn an invoke-interface into an invoke-virtual. An invoke-virtual is always
@@ -826,6 +888,9 @@ bool HInliner::TryInlineAndReplace(HInvoke* invoke_instruction, ArtMethod* metho
       return false;
     }
   }
+  if (cha_devirtualize) {
+    AddCHAGuard(invoke_instruction, dex_pc, cursor, bb_cursor);
+  }
   if (return_replacement != nullptr) {
     invoke_instruction->ReplaceWith(return_replacement);
   }
diff --git a/compiler/optimizing/inliner.h b/compiler/optimizing/inliner.h
index a2b4fc96c4..ffebd97cb8 100644
--- a/compiler/optimizing/inliner.h
+++ b/compiler/optimizing/inliner.h
@@ -62,8 +62,12 @@ class HInliner : public HOptimization {
 
   // Try to inline `resolved_method` in place of `invoke_instruction`. `do_rtp` is whether
   // reference type propagation can run after the inlining. If the inlining is successful, this
-  // method will replace and remove the `invoke_instruction`.
-  bool TryInlineAndReplace(HInvoke* invoke_instruction, ArtMethod* resolved_method, bool do_rtp)
+  // method will replace and remove the `invoke_instruction`. If `cha_devirtualize` is true,
+  // a CHA guard needs to be added for the inlining.
+  bool TryInlineAndReplace(HInvoke* invoke_instruction,
+                           ArtMethod* resolved_method,
+                           bool do_rtp,
+                           bool cha_devirtualize)
     REQUIRES_SHARED(Locks::mutator_lock_);
 
   bool TryBuildAndInline(HInvoke* invoke_instruction,
@@ -118,6 +122,18 @@ class HInliner : public HOptimization {
                                             Handle<mirror::ObjectArray<mirror::Class>> classes)
     REQUIRES_SHARED(Locks::mutator_lock_);
 
+  // Try CHA-based devirtualization to change virtual method calls into
+  // direct calls.
+  // Returns the actual method that resolved_method can be devirtualized to.
+  ArtMethod* TryCHADevirtualization(ArtMethod* resolved_method)
+    REQUIRES_SHARED(Locks::mutator_lock_);
+
+  // Add a CHA guard for a CHA-based devirtualized call. A CHA guard checks a
+  // should_deoptimize flag and if it's true, does deoptimization.
+  void AddCHAGuard(HInstruction* invoke_instruction,
+                   uint32_t dex_pc,
+                   HInstruction* cursor,
+                   HBasicBlock* bb_cursor);
 
   HInstanceFieldGet* BuildGetReceiverClass(ClassLinker* class_linker,
                                            HInstruction* receiver,
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 7ab04e15fc..e3f4d8f035 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -333,7 +333,8 @@ class HGraph : public ArenaObject<kArenaAllocGraph> {
         cached_double_constants_(std::less<int64_t>(), arena->Adapter(kArenaAllocConstantsMap)),
         cached_current_method_(nullptr),
         inexact_object_rti_(ReferenceTypeInfo::CreateInvalid()),
-        osr_(osr) {
+        osr_(osr),
+        cha_single_implementation_list_(arena->Adapter(kArenaAllocCHA)) {
     blocks_.reserve(kDefaultNumberOfBlocks);
   }
 
@@ -536,6 +537,20 @@ class HGraph : public ArenaObject<kArenaAllocGraph> {
 
   bool IsCompilingOsr() const { return osr_; }
 
+  ArenaSet<ArtMethod*>& GetCHASingleImplementationList() {
+    return cha_single_implementation_list_;
+  }
+
+  void AddCHASingleImplementationDependency(ArtMethod* method) {
+    cha_single_implementation_list_.insert(method);
+  }
+
+  bool HasShouldDeoptimizeFlag() const {
+    // TODO: if all CHA guards can be eliminated, there is no need for the flag
+    // even if cha_single_implementation_list_ is not empty.
+    return !cha_single_implementation_list_.empty();
+  }
+
   bool HasTryCatch() const { return has_try_catch_; }
   void SetHasTryCatch(bool value) { has_try_catch_ = value; }
 
@@ -672,6 +687,9 @@ class HGraph : public ArenaObject<kArenaAllocGraph> {
   // compiled code entries which the interpreter can directly jump to.
   const bool osr_;
 
+  // List of methods that are assumed to have single implementation.
+  ArenaSet<ArtMethod*> cha_single_implementation_list_;
+
   friend class SsaBuilder;           // For caching constants.
   friend class SsaLivenessAnalysis;  // For the linear order.
   friend class HInliner;             // For the reverse post order.
@@ -1240,6 +1258,7 @@ class HLoopInformationOutwardIterator : public ValueObject {
   M(ClinitCheck, Instruction)                                           \
   M(Compare, BinaryOperation)                                           \
   M(CurrentMethod, Instruction)                                         \
+  M(ShouldDeoptimizeFlag, Instruction)                                  \
   M(Deoptimize, Instruction)                                            \
   M(Div, BinaryOperation)                                               \
   M(DivZeroCheck, Instruction)                                          \
@@ -2875,6 +2894,27 @@ class HDeoptimize FINAL : public HTemplateInstruction<1> {
   DISALLOW_COPY_AND_ASSIGN(HDeoptimize);
 };
 
+// Represents a should_deoptimize flag. Currently used for CHA-based devirtualization.
+// The compiled code checks this flag value in a guard before devirtualized call and
+// if it's true, starts to do deoptimization.
+// It has a 4-byte slot on stack.
+// TODO: allocate a register for this flag.
+class HShouldDeoptimizeFlag FINAL : public HExpression<0> {
+ public:
+  // TODO: use SideEffects to aid eliminating some CHA guards.
+  explicit HShouldDeoptimizeFlag(uint32_t dex_pc)
+      : HExpression(Primitive::kPrimInt, SideEffects::None(), dex_pc) {
+  }
+
+  // We don't eliminate CHA guards yet.
+  bool CanBeMoved() const OVERRIDE { return false; }
+
+  DECLARE_INSTRUCTION(ShouldDeoptimizeFlag);
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(HShouldDeoptimizeFlag);
+};
+
 // Represents the ArtMethod that was passed as a first argument to
 // the method. It is used by instructions that depend on it, like
 // instructions that work with the dex cache.
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 2382b728df..e15e33dc78 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -1208,7 +1208,9 @@ bool OptimizingCompiler::JitCompile(Thread* self,
       code_allocator.GetMemory().data(),
       code_allocator.GetSize(),
       osr,
-      roots);
+      roots,
+      codegen->GetGraph()->HasShouldDeoptimizeFlag(),
+      codegen->GetGraph()->GetCHASingleImplementationList());
 
   if (code == nullptr) {
     code_cache->ClearData(self, stack_map_data, roots_data);
diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h
index c8d1ce0bd5..203b1ec7ec 100644
--- a/compiler/optimizing/optimizing_compiler_stats.h
+++ b/compiler/optimizing/optimizing_compiler_stats.h
@@ -27,6 +27,7 @@ namespace art {
 
 enum MethodCompilationStat {
   kAttemptCompilation = 0,
+  kCHAInline,
   kCompiled,
   kInlinedInvoke,
   kReplacedInvokeWithSimplePattern,
@@ -106,6 +107,7 @@ class OptimizingCompilerStats {
     std::string name;
     switch (stat) {
       case kAttemptCompilation : name = "AttemptCompilation"; break;
+      case kCHAInline : name = "CHAInline"; break;
       case kCompiled : name = "Compiled"; break;
       case kInlinedInvoke : name = "InlinedInvoke"; break;
       case kReplacedInvokeWithSimplePattern: name = "ReplacedInvokeWithSimplePattern"; break;
diff --git a/compiler/optimizing/register_allocation_resolver.cc b/compiler/optimizing/register_allocation_resolver.cc
index 5991791a15..59523a93a0 100644
--- a/compiler/optimizing/register_allocation_resolver.cc
+++ b/compiler/optimizing/register_allocation_resolver.cc
@@ -87,6 +87,10 @@ void RegisterAllocationResolver::Resolve(ArrayRef<HInstruction* const> safepoint
       // Adjust the stack slot, now that we know the number of them for each type.
       // The way this implementation lays out the stack is the following:
       // [parameter slots       ]
+      // [art method (caller)   ]
+      // [entry spill (core)    ]
+      // [entry spill (float)   ]
+      // [should_deoptimize flag] (this is optional)
       // [catch phi spill slots ]
       // [double spill slots    ]
       // [long spill slots      ]
diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc
index a1984a7d9f..692a951c97 100644
--- a/oatdump/oatdump.cc
+++ b/oatdump/oatdump.cc
@@ -1053,7 +1053,8 @@ class OatDumper {
       if (options_.absolute_addresses_) {
         vios->Stream() << StringPrintf("%p ", oat_method.GetVmapTable());
       }
-      uint32_t vmap_table_offset = method_header == nullptr ? 0 : method_header->vmap_table_offset_;
+      uint32_t vmap_table_offset = method_header ==
+          nullptr ? 0 : method_header->GetVmapTableOffset();
       vios->Stream() << StringPrintf("(offset=0x%08x)\n", vmap_table_offset);
 
       size_t vmap_table_offset_limit =
diff --git a/runtime/Android.bp b/runtime/Android.bp
index c6f479ff40..08be5b2979 100644
--- a/runtime/Android.bp
+++ b/runtime/Android.bp
@@ -45,6 +45,7 @@ cc_defaults {
         "base/timing_logger.cc",
         "base/unix_file/fd_file.cc",
         "base/unix_file/random_access_file_utils.cc",
+        "cha.cc",
         "check_jni.cc",
         "class_linker.cc",
         "class_table.cc",
@@ -514,6 +515,7 @@ art_cc_test {
         "base/transform_iterator_test.cc",
         "base/variant_map_test.cc",
         "base/unix_file/fd_file_test.cc",
+        "cha_test.cc",
         "class_linker_test.cc",
         "compiler_filter_test.cc",
         "dex_file_test.cc",
diff --git a/runtime/art_method-inl.h b/runtime/art_method-inl.h
index 730a9c32fd..ef03bb3dd4 100644
--- a/runtime/art_method-inl.h
+++ b/runtime/art_method-inl.h
@@ -105,7 +105,7 @@ inline uint32_t ArtMethod::GetAccessFlags() {
       DoGetAccessFlagsHelper<kReadBarrierOption>(this);
     }
   }
-  return access_flags_;
+  return access_flags_.load(std::memory_order_relaxed);
 }
 
 inline uint16_t ArtMethod::GetMethodIndex() {
@@ -452,6 +452,53 @@ inline mirror::Class* ArtMethod::GetReturnType(bool resolve, PointerSize pointer
   return type;
 }
 
+inline bool ArtMethod::HasSingleImplementation() {
+  if (IsFinal() || GetDeclaringClass()->IsFinal()) {
+    // We don't set kAccSingleImplementation for these cases since intrinsic
+    // can use the flag also.
+    return true;
+  }
+  return (GetAccessFlags() & kAccSingleImplementation) != 0;
+}
+
+inline void ArtMethod::SetIntrinsic(uint32_t intrinsic) {
+  DCHECK(IsUint<8>(intrinsic));
+  // Currently we only do intrinsics for static/final methods or methods of final
+  // classes. We don't set kHasSingleImplementation for those methods.
+  DCHECK(IsStatic() || IsFinal() || GetDeclaringClass()->IsFinal()) <<
+      "Potential conflict with kAccSingleImplementation";
+  uint32_t new_value = (GetAccessFlags() & kAccFlagsNotUsedByIntrinsic) |
+      kAccIntrinsic |
+      (intrinsic << POPCOUNT(kAccFlagsNotUsedByIntrinsic));
+  if (kIsDebugBuild) {
+    uint32_t java_flags = (GetAccessFlags() & kAccJavaFlagsMask);
+    bool is_constructor = IsConstructor();
+    bool is_synchronized = IsSynchronized();
+    bool skip_access_checks = SkipAccessChecks();
+    bool is_fast_native = IsFastNative();
+    bool is_copied = IsCopied();
+    bool is_miranda = IsMiranda();
+    bool is_default = IsDefault();
+    bool is_default_conflict = IsDefaultConflicting();
+    bool is_compilable = IsCompilable();
+    bool must_count_locks = MustCountLocks();
+    SetAccessFlags(new_value);
+    DCHECK_EQ(java_flags, (GetAccessFlags() & kAccJavaFlagsMask));
+    DCHECK_EQ(is_constructor, IsConstructor());
+    DCHECK_EQ(is_synchronized, IsSynchronized());
+    DCHECK_EQ(skip_access_checks, SkipAccessChecks());
+    DCHECK_EQ(is_fast_native, IsFastNative());
+    DCHECK_EQ(is_copied, IsCopied());
+    DCHECK_EQ(is_miranda, IsMiranda());
+    DCHECK_EQ(is_default, IsDefault());
+    DCHECK_EQ(is_default_conflict, IsDefaultConflicting());
+    DCHECK_EQ(is_compilable, IsCompilable());
+    DCHECK_EQ(must_count_locks, MustCountLocks());
+  } else {
+    SetAccessFlags(new_value);
+  }
+}
+
 template<ReadBarrierOption kReadBarrierOption, typename RootVisitorType>
 void ArtMethod::VisitRoots(RootVisitorType& visitor, PointerSize pointer_size) {
   if (LIKELY(!declaring_class_.IsNull())) {
diff --git a/runtime/art_method.cc b/runtime/art_method.cc
index eeece90be5..77d799fec4 100644
--- a/runtime/art_method.cc
+++ b/runtime/art_method.cc
@@ -51,6 +51,17 @@ extern "C" void art_quick_invoke_stub(ArtMethod*, uint32_t*, uint32_t, Thread*,
 extern "C" void art_quick_invoke_static_stub(ArtMethod*, uint32_t*, uint32_t, Thread*, JValue*,
                                              const char*);
 
+ArtMethod* ArtMethod::GetSingleImplementation() {
+  DCHECK(!IsNative());
+  if (!IsAbstract()) {
+    // A non-abstract's single implementation is itself.
+    return this;
+  }
+  // TODO: add single-implementation logic for abstract method by storing it
+  // in ptr_sized_fields_.
+  return nullptr;
+}
+
 ArtMethod* ArtMethod::FromReflectedMethod(const ScopedObjectAccessAlreadyRunnable& soa,
                                           jobject jlr_method) {
   ObjPtr<mirror::Executable> executable = soa.Decode<mirror::Executable>(jlr_method);
@@ -345,7 +356,7 @@ void ArtMethod::RegisterNative(const void* native_method, bool is_fast) {
   CHECK(!IsFastNative()) << PrettyMethod();
   CHECK(native_method != nullptr) << PrettyMethod();
   if (is_fast) {
-    SetAccessFlags(GetAccessFlags() | kAccFastNative);
+    AddAccessFlags(kAccFastNative);
   }
   SetEntryPointFromJni(native_method);
 }
@@ -503,7 +514,7 @@ const uint8_t* ArtMethod::GetQuickenedInfo(PointerSize pointer_size) {
     if (oat_file == nullptr) {
       return nullptr;
     }
-    return oat_file->DexBegin() + header->vmap_table_offset_;
+    return oat_file->DexBegin() + header->GetVmapTableOffset();
   } else {
     return oat_method.GetVmapTable();
   }
@@ -601,7 +612,7 @@ const OatQuickMethodHeader* ArtMethod::GetOatQuickMethodHeader(uintptr_t pc) {
   DCHECK(method_header->Contains(pc))
       << PrettyMethod()
       << " " << std::hex << pc << " " << oat_entry_point
-      << " " << (uintptr_t)(method_header->code_ + method_header->code_size_);
+      << " " << (uintptr_t)(method_header->GetCode() + method_header->GetCodeSize());
   return method_header;
 }
 
diff --git a/runtime/art_method.h b/runtime/art_method.h
index 00fab65241..2ae8147bde 100644
--- a/runtime/art_method.h
+++ b/runtime/art_method.h
@@ -85,9 +85,29 @@ class ArtMethod FINAL {
   template <ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   ALWAYS_INLINE uint32_t GetAccessFlags();
 
+  // This version should only be called when it's certain there is no
+  // concurrency so there is no need to guarantee atomicity. For example,
+  // before the method is linked.
   void SetAccessFlags(uint32_t new_access_flags) {
-    // Not called within a transaction.
-    access_flags_ = new_access_flags;
+    access_flags_.store(new_access_flags, std::memory_order_relaxed);
+  }
+
+  // This setter guarantees atomicity.
+  void AddAccessFlags(uint32_t flag) {
+    uint32_t old_access_flags = access_flags_.load(std::memory_order_relaxed);
+    uint32_t new_access_flags;
+    do {
+      new_access_flags = old_access_flags | flag;
+    } while (!access_flags_.compare_exchange_weak(old_access_flags, new_access_flags));
+  }
+
+  // This setter guarantees atomicity.
+  void ClearAccessFlags(uint32_t flag) {
+    uint32_t old_access_flags = access_flags_.load(std::memory_order_relaxed);
+    uint32_t new_access_flags;
+    do {
+      new_access_flags = old_access_flags & ~flag;
+    } while (!access_flags_.compare_exchange_weak(old_access_flags, new_access_flags));
   }
 
   // Approximate what kind of method call would be used for this method.
@@ -142,39 +162,7 @@ class ArtMethod FINAL {
     return (GetAccessFlags() & kAccIntrinsic) != 0;
   }
 
-  void SetIntrinsic(uint32_t intrinsic) {
-    DCHECK(IsUint<8>(intrinsic));
-    uint32_t new_value = (GetAccessFlags() & kAccFlagsNotUsedByIntrinsic) |
-        kAccIntrinsic |
-        (intrinsic << POPCOUNT(kAccFlagsNotUsedByIntrinsic));
-    if (kIsDebugBuild) {
-      uint32_t java_flags = (GetAccessFlags() & kAccJavaFlagsMask);
-      bool is_constructor = IsConstructor();
-      bool is_synchronized = IsSynchronized();
-      bool skip_access_checks = SkipAccessChecks();
-      bool is_fast_native = IsFastNative();
-      bool is_copied = IsCopied();
-      bool is_miranda = IsMiranda();
-      bool is_default = IsDefault();
-      bool is_default_conflict = IsDefaultConflicting();
-      bool is_compilable = IsCompilable();
-      bool must_count_locks = MustCountLocks();
-      SetAccessFlags(new_value);
-      DCHECK_EQ(java_flags, (GetAccessFlags() & kAccJavaFlagsMask));
-      DCHECK_EQ(is_constructor, IsConstructor());
-      DCHECK_EQ(is_synchronized, IsSynchronized());
-      DCHECK_EQ(skip_access_checks, SkipAccessChecks());
-      DCHECK_EQ(is_fast_native, IsFastNative());
-      DCHECK_EQ(is_copied, IsCopied());
-      DCHECK_EQ(is_miranda, IsMiranda());
-      DCHECK_EQ(is_default, IsDefault());
-      DCHECK_EQ(is_default_conflict, IsDefaultConflicting());
-      DCHECK_EQ(is_compilable, IsCompilable());
-      DCHECK_EQ(must_count_locks, MustCountLocks());
-    } else {
-      SetAccessFlags(new_value);
-    }
-  }
+  ALWAYS_INLINE void SetIntrinsic(uint32_t intrinsic) REQUIRES_SHARED(Locks::mutator_lock_);
 
   uint32_t GetIntrinsic() {
     DCHECK(IsIntrinsic());
@@ -259,7 +247,7 @@ class ArtMethod FINAL {
 
   void SetSkipAccessChecks() {
     DCHECK(!SkipAccessChecks());
-    SetAccessFlags(GetAccessFlags() | kAccSkipAccessChecks);
+    AddAccessFlags(kAccSkipAccessChecks);
   }
 
   // Should this method be run in the interpreter and count locks (e.g., failed structured-
@@ -461,6 +449,26 @@ class ArtMethod FINAL {
     return DataOffset(kRuntimePointerSize);
   }
 
+  ALWAYS_INLINE bool HasSingleImplementation() REQUIRES_SHARED(Locks::mutator_lock_);
+
+  ALWAYS_INLINE void SetHasSingleImplementation(bool single_impl) {
+    DCHECK(!IsIntrinsic()) << "conflict with intrinsic bits";
+    if (single_impl) {
+      AddAccessFlags(kAccSingleImplementation);
+    } else {
+      ClearAccessFlags(kAccSingleImplementation);
+    }
+  }
+
+  ArtMethod* GetSingleImplementation()
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
+  ALWAYS_INLINE void SetSingleImplementation(ArtMethod* method, PointerSize pointer_size) {
+    DCHECK(!IsNative());
+    DCHECK(IsAbstract());  // Non-abstract method's single implementation is just itself.
+    SetDataPtrSize(method, pointer_size);
+  }
+
   void* GetEntryPointFromJni() {
     DCHECK(IsNative());
     return GetEntryPointFromJniPtrSize(kRuntimePointerSize);
@@ -655,7 +663,10 @@ class ArtMethod FINAL {
   GcRoot<mirror::Class> declaring_class_;
 
   // Access flags; low 16 bits are defined by spec.
-  uint32_t access_flags_;
+  // Getting and setting this flag needs to be atomic when concurrency is
+  // possible, e.g. after this method's class is linked. Such as when setting
+  // verifier flags and single-implementation flag.
+  std::atomic<std::uint32_t> access_flags_;
 
   /* Dex file fields. The defining dex file is available via declaring_class_->dex_cache_ */
 
diff --git a/runtime/base/arena_allocator.h b/runtime/base/arena_allocator.h
index 62cd2a7561..2feb28a778 100644
--- a/runtime/base/arena_allocator.h
+++ b/runtime/base/arena_allocator.h
@@ -94,6 +94,7 @@ enum ArenaAllocKind {
   kArenaAllocGraphChecker,
   kArenaAllocVerifier,
   kArenaAllocCallingConvention,
+  kArenaAllocCHA,
   kNumArenaAllocKinds
 };
 
diff --git a/runtime/base/mutex.cc b/runtime/base/mutex.cc
index e8ef69f778..6665f95bba 100644
--- a/runtime/base/mutex.cc
+++ b/runtime/base/mutex.cc
@@ -58,6 +58,7 @@ Mutex* Locks::reference_queue_phantom_references_lock_ = nullptr;
 Mutex* Locks::reference_queue_soft_references_lock_ = nullptr;
 Mutex* Locks::reference_queue_weak_references_lock_ = nullptr;
 Mutex* Locks::runtime_shutdown_lock_ = nullptr;
+Mutex* Locks::cha_lock_ = nullptr;
 Mutex* Locks::thread_list_lock_ = nullptr;
 ConditionVariable* Locks::thread_exit_cond_ = nullptr;
 Mutex* Locks::thread_suspend_count_lock_ = nullptr;
@@ -955,6 +956,7 @@ void Locks::Init() {
     DCHECK(logging_lock_ != nullptr);
     DCHECK(mutator_lock_ != nullptr);
     DCHECK(profiler_lock_ != nullptr);
+    DCHECK(cha_lock_ != nullptr);
     DCHECK(thread_list_lock_ != nullptr);
     DCHECK(thread_suspend_count_lock_ != nullptr);
     DCHECK(trace_lock_ != nullptr);
@@ -1014,6 +1016,10 @@ void Locks::Init() {
     DCHECK(breakpoint_lock_ == nullptr);
     breakpoint_lock_ = new ReaderWriterMutex("breakpoint lock", current_lock_level);
 
+    UPDATE_CURRENT_LOCK_LEVEL(kCHALock);
+    DCHECK(cha_lock_ == nullptr);
+    cha_lock_ = new Mutex("CHA lock", current_lock_level);
+
     UPDATE_CURRENT_LOCK_LEVEL(kClassLinkerClassesLock);
     DCHECK(classlinker_classes_lock_ == nullptr);
     classlinker_classes_lock_ = new ReaderWriterMutex("ClassLinker classes lock",
diff --git a/runtime/base/mutex.h b/runtime/base/mutex.h
index 7e73e0dbd9..21b5bb926f 100644
--- a/runtime/base/mutex.h
+++ b/runtime/base/mutex.h
@@ -97,6 +97,7 @@ enum LockLevel {
   kMonitorPoolLock,
   kClassLinkerClassesLock,  // TODO rename.
   kJitCodeCacheLock,
+  kCHALock,
   kBreakpointLock,
   kMonitorLock,
   kMonitorListLock,
@@ -627,9 +628,12 @@ class Locks {
   // TODO: improve name, perhaps instrumentation_update_lock_.
   static Mutex* deoptimization_lock_ ACQUIRED_AFTER(alloc_tracker_lock_);
 
+  // Guards Class Hierarchy Analysis (CHA).
+  static Mutex* cha_lock_ ACQUIRED_AFTER(deoptimization_lock_);
+
   // The thread_list_lock_ guards ThreadList::list_. It is also commonly held to stop threads
   // attaching and detaching.
-  static Mutex* thread_list_lock_ ACQUIRED_AFTER(deoptimization_lock_);
+  static Mutex* thread_list_lock_ ACQUIRED_AFTER(cha_lock_);
 
   // Signaled when threads terminate. Used to determine when all non-daemons have terminated.
   static ConditionVariable* thread_exit_cond_ GUARDED_BY(Locks::thread_list_lock_);
diff --git a/runtime/cha.cc b/runtime/cha.cc
new file mode 100644
index 0000000000..be675a82c8
--- /dev/null
+++ b/runtime/cha.cc
@@ -0,0 +1,356 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "cha.h"
+
+#include "jit/jit.h"
+#include "jit/jit_code_cache.h"
+#include "runtime.h"
+#include "scoped_thread_state_change-inl.h"
+#include "stack.h"
+#include "thread.h"
+#include "thread_list.h"
+#include "thread_pool.h"
+
+namespace art {
+
+void ClassHierarchyAnalysis::AddDependency(ArtMethod* method,
+                                           ArtMethod* dependent_method,
+                                           OatQuickMethodHeader* dependent_header) {
+  auto it = cha_dependency_map_.find(method);
+  if (it == cha_dependency_map_.end()) {
+    cha_dependency_map_[method] =
+        new std::vector<std::pair<art::ArtMethod*, art::OatQuickMethodHeader*>>();
+    it = cha_dependency_map_.find(method);
+  } else {
+    DCHECK(it->second != nullptr);
+  }
+  it->second->push_back(std::make_pair(dependent_method, dependent_header));
+}
+
+std::vector<std::pair<ArtMethod*, OatQuickMethodHeader*>>*
+    ClassHierarchyAnalysis::GetDependents(ArtMethod* method) {
+  auto it = cha_dependency_map_.find(method);
+  if (it != cha_dependency_map_.end()) {
+    DCHECK(it->second != nullptr);
+    return it->second;
+  }
+  return nullptr;
+}
+
+void ClassHierarchyAnalysis::RemoveDependencyFor(ArtMethod* method) {
+  auto it = cha_dependency_map_.find(method);
+  if (it != cha_dependency_map_.end()) {
+    auto dependents = it->second;
+    cha_dependency_map_.erase(it);
+    delete dependents;
+  }
+}
+
+void ClassHierarchyAnalysis::RemoveDependentsWithMethodHeaders(
+    const std::unordered_set<OatQuickMethodHeader*>& method_headers) {
+  // Iterate through all entries in the dependency map and remove any entry that
+  // contains one of those in method_headers.
+  for (auto map_it = cha_dependency_map_.begin(); map_it != cha_dependency_map_.end(); ) {
+    auto dependents = map_it->second;
+    for (auto vec_it = dependents->begin(); vec_it != dependents->end(); ) {
+      OatQuickMethodHeader* method_header = vec_it->second;
+      auto it = std::find(method_headers.begin(), method_headers.end(), method_header);
+      if (it != method_headers.end()) {
+        vec_it = dependents->erase(vec_it);
+      } else {
+        vec_it++;
+      }
+    }
+    // Remove the map entry if there are no more dependents.
+    if (dependents->empty()) {
+      map_it = cha_dependency_map_.erase(map_it);
+      delete dependents;
+    } else {
+      map_it++;
+    }
+  }
+}
+
+// This stack visitor walks the stack and for compiled code with certain method
+// headers, sets the should_deoptimize flag on stack to 1.
+// TODO: also set the register value to 1 when should_deoptimize is allocated in
+// a register.
+class CHAStackVisitor FINAL  : public StackVisitor {
+ public:
+  CHAStackVisitor(Thread* thread_in,
+                  Context* context,
+                  const std::unordered_set<OatQuickMethodHeader*>& method_headers)
+      : StackVisitor(thread_in, context, StackVisitor::StackWalkKind::kSkipInlinedFrames),
+        method_headers_(method_headers) {
+  }
+
+  bool VisitFrame() OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_) {
+    ArtMethod* method = GetMethod();
+    if (method == nullptr || method->IsRuntimeMethod() || method->IsNative()) {
+      return true;
+    }
+    if (GetCurrentQuickFrame() == nullptr) {
+      // Not compiled code.
+      return true;
+    }
+    // Method may have multiple versions of compiled code. Check
+    // the method header to see if it has should_deoptimize flag.
+    const OatQuickMethodHeader* method_header = GetCurrentOatQuickMethodHeader();
+    if (!method_header->HasShouldDeoptimizeFlag()) {
+      // This compiled version doesn't have should_deoptimize flag. Skip.
+      return true;
+    }
+    auto it = std::find(method_headers_.begin(), method_headers_.end(), method_header);
+    if (it == method_headers_.end()) {
+      // Not in the list of method headers that should be deoptimized.
+      return true;
+    }
+
+    // The compiled code on stack is not valid anymore. Need to deoptimize.
+    SetShouldDeoptimizeFlag();
+
+    return true;
+  }
+
+ private:
+  void SetShouldDeoptimizeFlag() REQUIRES_SHARED(Locks::mutator_lock_) {
+    QuickMethodFrameInfo frame_info = GetCurrentQuickFrameInfo();
+    size_t frame_size = frame_info.FrameSizeInBytes();
+    uint8_t* sp = reinterpret_cast<uint8_t*>(GetCurrentQuickFrame());
+    size_t core_spill_size = POPCOUNT(frame_info.CoreSpillMask()) *
+        GetBytesPerGprSpillLocation(kRuntimeISA);
+    size_t fpu_spill_size = POPCOUNT(frame_info.FpSpillMask()) *
+        GetBytesPerFprSpillLocation(kRuntimeISA);
+    size_t offset = frame_size - core_spill_size - fpu_spill_size - kShouldDeoptimizeFlagSize;
+    uint8_t* should_deoptimize_addr = sp + offset;
+    // Set deoptimization flag to 1.
+    DCHECK(*should_deoptimize_addr == 0 || *should_deoptimize_addr == 1);
+    *should_deoptimize_addr = 1;
+  }
+
+  // Set of method headers for compiled code that should be deoptimized.
+  const std::unordered_set<OatQuickMethodHeader*>& method_headers_;
+
+  DISALLOW_COPY_AND_ASSIGN(CHAStackVisitor);
+};
+
+class CHACheckpoint FINAL : public Closure {
+ public:
+  explicit CHACheckpoint(const std::unordered_set<OatQuickMethodHeader*>& method_headers)
+      : barrier_(0),
+        method_headers_(method_headers) {}
+
+  void Run(Thread* thread) OVERRIDE {
+    // Note thread and self may not be equal if thread was already suspended at
+    // the point of the request.
+    Thread* self = Thread::Current();
+    ScopedObjectAccess soa(self);
+    CHAStackVisitor visitor(thread, nullptr, method_headers_);
+    visitor.WalkStack();
+    barrier_.Pass(self);
+  }
+
+  void WaitForThreadsToRunThroughCheckpoint(size_t threads_running_checkpoint) {
+    Thread* self = Thread::Current();
+    ScopedThreadStateChange tsc(self, kWaitingForCheckPointsToRun);
+    barrier_.Increment(self, threads_running_checkpoint);
+  }
+
+ private:
+  // The barrier to be passed through and for the requestor to wait upon.
+  Barrier barrier_;
+  // List of method headers for invalidated compiled code.
+  const std::unordered_set<OatQuickMethodHeader*>& method_headers_;
+
+  DISALLOW_COPY_AND_ASSIGN(CHACheckpoint);
+};
+
+void ClassHierarchyAnalysis::VerifyNonSingleImplementation(mirror::Class* verify_class,
+                                                           uint16_t verify_index) {
+  // Grab cha_lock_ to make sure all single-implementation updates are seen.
+  PointerSize image_pointer_size =
+      Runtime::Current()->GetClassLinker()->GetImagePointerSize();
+  MutexLock cha_mu(Thread::Current(), *Locks::cha_lock_);
+  while (verify_class != nullptr) {
+    if (verify_index >= verify_class->GetVTableLength()) {
+      return;
+    }
+    ArtMethod* verify_method = verify_class->GetVTableEntry(verify_index, image_pointer_size);
+    DCHECK(!verify_method->HasSingleImplementation())
+        << "class: " << verify_class->PrettyClass()
+        << " verify_method: " << verify_method->PrettyMethod(true);
+    verify_class = verify_class->GetSuperClass();
+  }
+}
+
+void ClassHierarchyAnalysis::CheckSingleImplementationInfo(
+    Handle<mirror::Class> klass,
+    ArtMethod* virtual_method,
+    ArtMethod* method_in_super,
+    std::unordered_set<ArtMethod*>& invalidated_single_impl_methods) {
+  // TODO: if klass is not instantiable, virtual_method isn't invocable yet so
+  // even if it overrides, it doesn't invalidate single-implementation
+  // assumption.
+
+  DCHECK_NE(virtual_method, method_in_super);
+  DCHECK(method_in_super->GetDeclaringClass()->IsResolved()) << "class isn't resolved";
+  // If virtual_method doesn't come from a default interface method, it should
+  // be supplied by klass.
+  DCHECK(virtual_method->IsCopied() ||
+         virtual_method->GetDeclaringClass() == klass.Get());
+
+  // A new virtual_method should set method_in_super to
+  // non-single-implementation (if not set already).
+  // We don't grab cha_lock_. Single-implementation flag won't be set to true
+  // again once it's set to false.
+  if (!method_in_super->HasSingleImplementation()) {
+    // method_in_super already has multiple implementations. All methods in the
+    // same vtable slots in its super classes should have
+    // non-single-implementation already.
+    if (kIsDebugBuild) {
+      VerifyNonSingleImplementation(klass->GetSuperClass()->GetSuperClass(),
+                                    method_in_super->GetMethodIndex());
+    }
+    return;
+  }
+
+  // Native methods don't have single-implementation flag set.
+  DCHECK(!method_in_super->IsNative());
+  // Invalidate method_in_super's single-implementation status.
+  invalidated_single_impl_methods.insert(method_in_super);
+}
+
+void ClassHierarchyAnalysis::InitSingleImplementationFlag(Handle<mirror::Class> klass,
+                                                          ArtMethod* method) {
+  DCHECK(method->IsCopied() || method->GetDeclaringClass() == klass.Get());
+  if (klass->IsFinal() || method->IsFinal()) {
+    // Final classes or methods do not need CHA for devirtualization.
+    // This frees up modifier bits for intrinsics which currently are only
+    // used for static methods or methods of final classes.
+    return;
+  }
+  if (method->IsNative()) {
+    // Native method's invocation overhead is already high and it
+    // cannot be inlined. It's not worthwhile to devirtualize the
+    // call which can add a deoptimization point.
+    DCHECK(!method->HasSingleImplementation());
+  } else {
+    method->SetHasSingleImplementation(true);
+    if (method->IsAbstract()) {
+      // There is no real implementation yet.
+      // TODO: implement single-implementation logic for abstract methods.
+      DCHECK(method->GetSingleImplementation() == nullptr);
+    } else {
+      // Single implementation of non-abstract method is itself.
+      DCHECK_EQ(method->GetSingleImplementation(), method);
+    }
+  }
+}
+
+void ClassHierarchyAnalysis::UpdateAfterLoadingOf(Handle<mirror::Class> klass) {
+  if (klass->IsInterface()) {
+    return;
+  }
+  mirror::Class* super_class = klass->GetSuperClass();
+  if (super_class == nullptr) {
+    return;
+  }
+
+  // Keeps track of all methods whose single-implementation assumption
+  // is invalidated by linking `klass`.
+  std::unordered_set<ArtMethod*> invalidated_single_impl_methods;
+
+  PointerSize image_pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
+  // Do an entry-by-entry comparison of vtable contents with super's vtable.
+  for (int32_t i = 0; i < super_class->GetVTableLength(); ++i) {
+    ArtMethod* method = klass->GetVTableEntry(i, image_pointer_size);
+    ArtMethod* method_in_super = super_class->GetVTableEntry(i, image_pointer_size);
+    if (method == method_in_super) {
+      // vtable slot entry is inherited from super class.
+      continue;
+    }
+    InitSingleImplementationFlag(klass, method);
+    CheckSingleImplementationInfo(klass,
+                                  method,
+                                  method_in_super,
+                                  invalidated_single_impl_methods);
+  }
+
+  // For new virtual methods that don't override.
+  for (int32_t i = super_class->GetVTableLength(); i < klass->GetVTableLength(); ++i) {
+    ArtMethod* method = klass->GetVTableEntry(i, image_pointer_size);
+    InitSingleImplementationFlag(klass, method);
+  }
+
+  Runtime* const runtime = Runtime::Current();
+  if (!invalidated_single_impl_methods.empty()) {
+    Thread *self = Thread::Current();
+    // Method headers for compiled code to be invalidated.
+    std::unordered_set<OatQuickMethodHeader*> dependent_method_headers;
+
+    {
+      // We do this under cha_lock_. Committing code also grabs this lock to
+      // make sure the code is only committed when all single-implementation
+      // assumptions are still true.
+      MutexLock cha_mu(self, *Locks::cha_lock_);
+      // Invalidate compiled methods that assume some virtual calls have only
+      // single implementations.
+      for (ArtMethod* invalidated : invalidated_single_impl_methods) {
+        if (!invalidated->HasSingleImplementation()) {
+          // It might have been invalidated already when other class linking is
+          // going on.
+          continue;
+        }
+        invalidated->SetHasSingleImplementation(false);
+
+        if (runtime->IsAotCompiler()) {
+          // No need to invalidate any compiled code as the AotCompiler doesn't
+          // run any code.
+          continue;
+        }
+
+        // Invalidate all dependents.
+        auto dependents = GetDependents(invalidated);
+        if (dependents == nullptr) {
+          continue;
+        }
+        for (const auto& dependent : *dependents) {
+          ArtMethod* method = dependent.first;;
+          OatQuickMethodHeader* method_header = dependent.second;
+          VLOG(class_linker) << "CHA invalidated compiled code for " << method->PrettyMethod();
+          DCHECK(runtime->UseJitCompilation());
+          runtime->GetJit()->GetCodeCache()->InvalidateCompiledCodeFor(
+              method, method_header);
+          dependent_method_headers.insert(method_header);
+        }
+        RemoveDependencyFor(invalidated);
+      }
+    }
+
+    if (dependent_method_headers.empty()) {
+      return;
+    }
+    // Deoptimze compiled code on stack that should have been invalidated.
+    CHACheckpoint checkpoint(dependent_method_headers);
+    size_t threads_running_checkpoint = runtime->GetThreadList()->RunCheckpoint(&checkpoint);
+    if (threads_running_checkpoint != 0) {
+      checkpoint.WaitForThreadsToRunThroughCheckpoint(threads_running_checkpoint);
+    }
+  }
+}
+
+}  // namespace art
diff --git a/runtime/cha.h b/runtime/cha.h
new file mode 100644
index 0000000000..ada5c89981
--- /dev/null
+++ b/runtime/cha.h
@@ -0,0 +1,144 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_CHA_H_
+#define ART_RUNTIME_CHA_H_
+
+#include "art_method.h"
+#include "base/enums.h"
+#include "base/mutex.h"
+#include "handle.h"
+#include "mirror/class.h"
+#include "oat_quick_method_header.h"
+#include <unordered_map>
+#include <unordered_set>
+
+namespace art {
+
+/**
+ * Class Hierarchy Analysis (CHA) tries to devirtualize virtual calls into
+ * direct calls based on the info generated by analyzing class hierarchies.
+ * If a class is not subclassed, or even if it's subclassed but one of its
+ * virtual methods isn't overridden, a virtual call for that method can be
+ * changed into a direct call.
+ *
+ * Each virtual method carries a single-implementation status. The status is
+ * incrementally maintained at the end of class linking time when method
+ * overriding takes effect.
+ *
+ * Compiler takes advantage of the single-implementation info of a
+ * method. If a method A has the single-implementation flag set, the compiler
+ * devirtualizes the virtual call for method A into a direct call, and
+ * further try to inline the direct call as a result. The compiler will
+ * also register a dependency that the compiled code depends on the
+ * assumption that method A has single-implementation status.
+ *
+ * When single-implementation info is updated at the end of class linking,
+ * and if method A's single-implementation status is invalidated, all compiled
+ * code that depends on the assumption that method A has single-implementation
+ * status need to be invalidated. Method entrypoints that have this dependency
+ * will be updated as a result. Method A can later be recompiled with less
+ * aggressive assumptions.
+ *
+ * For live compiled code that's on stack, deoptmization will be initiated
+ * to force the invalidated compiled code into interpreter mode to guarantee
+ * correctness. The deoptimization mechanism used is a hybrid of
+ * synchronous and asynchronous deoptimization. The synchronous deoptimization
+ * part checks a hidden local variable flag for the method, and if true,
+ * initiates deoptimization. The asynchronous deoptimization part issues a
+ * checkpoint that walks the stack and for any compiled code on the stack
+ * that should be deoptimized, set the hidden local variable value to be true.
+ *
+ * A cha_lock_ needs to be held for updating single-implementation status,
+ * and registering/unregistering CHA dependencies. Registering CHA dependency
+ * and making compiled code visible also need to be atomic. Otherwise, we
+ * may miss invalidating CHA dependents or making compiled code visible even
+ * after it is invalidated. Care needs to be taken between cha_lock_ and
+ * JitCodeCache::lock_ to guarantee the atomicity.
+ *
+ * We base our CHA on dynamically linked class profiles instead of doing static
+ * analysis. Static analysis can be too aggressive due to dynamic class loading
+ * at runtime, and too conservative since some classes may not be really loaded
+ * at runtime.
+ */
+class ClassHierarchyAnalysis {
+ public:
+  // Types for recording CHA dependencies.
+  // For invalidating CHA dependency, we need to know both the ArtMethod and
+  // the method header. If the ArtMethod has compiled code with the method header
+  // as the entrypoint, we update the entrypoint to the interpreter bridge.
+  // We will also deoptimize frames that are currently executing the code of
+  // the method header.
+  typedef std::pair<ArtMethod*, OatQuickMethodHeader*> MethodAndMethodHeaderPair;
+  typedef std::vector<MethodAndMethodHeaderPair> ListOfDependentPairs;
+
+  ClassHierarchyAnalysis() {}
+
+  // Add a dependency that compiled code with `dependent_header` for `dependent_method`
+  // assumes that virtual `method` has single-implementation.
+  void AddDependency(ArtMethod* method,
+                     ArtMethod* dependent_method,
+                     OatQuickMethodHeader* dependent_header) REQUIRES(Locks::cha_lock_);
+
+  // Return compiled code that assumes that `method` has single-implementation.
+  std::vector<MethodAndMethodHeaderPair>* GetDependents(ArtMethod* method)
+      REQUIRES(Locks::cha_lock_);
+
+  // Remove dependency tracking for compiled code that assumes that
+  // `method` has single-implementation.
+  void RemoveDependencyFor(ArtMethod* method) REQUIRES(Locks::cha_lock_);
+
+  // Remove from cha_dependency_map_ all entries that contain OatQuickMethodHeader from
+  // the given `method_headers` set.
+  // This is used when some compiled code is freed.
+  void RemoveDependentsWithMethodHeaders(
+      const std::unordered_set<OatQuickMethodHeader*>& method_headers)
+      REQUIRES(Locks::cha_lock_);
+
+  // Update CHA info for methods that `klass` overrides, after loading `klass`.
+  void UpdateAfterLoadingOf(Handle<mirror::Class> klass) REQUIRES_SHARED(Locks::mutator_lock_);
+
+ private:
+  void InitSingleImplementationFlag(Handle<mirror::Class> klass, ArtMethod* method)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
+  // `virtual_method` in `klass` overrides `method_in_super`.
+  // This will invalidate some assumptions on single-implementation.
+  // Append methods that should have their single-implementation flag invalidated
+  // to `invalidated_single_impl_methods`.
+  void CheckSingleImplementationInfo(
+      Handle<mirror::Class> klass,
+      ArtMethod* virtual_method,
+      ArtMethod* method_in_super,
+      std::unordered_set<ArtMethod*>& invalidated_single_impl_methods)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
+  // Verify all methods in the same vtable slot from verify_class and its supers
+  // don't have single-implementation.
+  void VerifyNonSingleImplementation(mirror::Class* verify_class, uint16_t verify_index)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
+  // A map that maps a method to a set of compiled code that assumes that method has a
+  // single implementation, which is used to do CHA-based devirtualization.
+  std::unordered_map<ArtMethod*, ListOfDependentPairs*> cha_dependency_map_
+    GUARDED_BY(Locks::cha_lock_);
+
+  DISALLOW_COPY_AND_ASSIGN(ClassHierarchyAnalysis);
+};
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_CHA_H_
diff --git a/runtime/cha_test.cc b/runtime/cha_test.cc
new file mode 100644
index 0000000000..d2f335e951
--- /dev/null
+++ b/runtime/cha_test.cc
@@ -0,0 +1,93 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "cha.h"
+
+#include "common_runtime_test.h"
+
+namespace art {
+
+class CHATest : public CommonRuntimeTest {};
+
+// Mocks some methods.
+#define METHOD1 (reinterpret_cast<ArtMethod*>(8u))
+#define METHOD2 (reinterpret_cast<ArtMethod*>(16u))
+#define METHOD3 (reinterpret_cast<ArtMethod*>(24u))
+
+// Mocks some method headers.
+#define METHOD_HEADER1 (reinterpret_cast<OatQuickMethodHeader*>(128u))
+#define METHOD_HEADER2 (reinterpret_cast<OatQuickMethodHeader*>(136u))
+#define METHOD_HEADER3 (reinterpret_cast<OatQuickMethodHeader*>(144u))
+
+TEST_F(CHATest, CHACheckDependency) {
+  ClassHierarchyAnalysis cha;
+  MutexLock cha_mu(Thread::Current(), *Locks::cha_lock_);
+
+  ASSERT_EQ(cha.GetDependents(METHOD1), nullptr);
+  ASSERT_EQ(cha.GetDependents(METHOD2), nullptr);
+  ASSERT_EQ(cha.GetDependents(METHOD3), nullptr);
+
+  cha.AddDependency(METHOD1, METHOD2, METHOD_HEADER2);
+  ASSERT_EQ(cha.GetDependents(METHOD2), nullptr);
+  ASSERT_EQ(cha.GetDependents(METHOD3), nullptr);
+  auto dependents = cha.GetDependents(METHOD1);
+  ASSERT_EQ(dependents->size(), 1u);
+  ASSERT_EQ(dependents->at(0).first, METHOD2);
+  ASSERT_EQ(dependents->at(0).second, METHOD_HEADER2);
+
+  cha.AddDependency(METHOD1, METHOD3, METHOD_HEADER3);
+  ASSERT_EQ(cha.GetDependents(METHOD2), nullptr);
+  ASSERT_EQ(cha.GetDependents(METHOD3), nullptr);
+  dependents = cha.GetDependents(METHOD1);
+  ASSERT_EQ(dependents->size(), 2u);
+  ASSERT_EQ(dependents->at(0).first, METHOD2);
+  ASSERT_EQ(dependents->at(0).second, METHOD_HEADER2);
+  ASSERT_EQ(dependents->at(1).first, METHOD3);
+  ASSERT_EQ(dependents->at(1).second, METHOD_HEADER3);
+
+  std::unordered_set<OatQuickMethodHeader*> headers;
+  headers.insert(METHOD_HEADER2);
+  cha.RemoveDependentsWithMethodHeaders(headers);
+  ASSERT_EQ(cha.GetDependents(METHOD2), nullptr);
+  ASSERT_EQ(cha.GetDependents(METHOD3), nullptr);
+  dependents = cha.GetDependents(METHOD1);
+  ASSERT_EQ(dependents->size(), 1u);
+  ASSERT_EQ(dependents->at(0).first, METHOD3);
+  ASSERT_EQ(dependents->at(0).second, METHOD_HEADER3);
+
+  cha.AddDependency(METHOD2, METHOD1, METHOD_HEADER1);
+  ASSERT_EQ(cha.GetDependents(METHOD3), nullptr);
+  dependents = cha.GetDependents(METHOD1);
+  ASSERT_EQ(dependents->size(), 1u);
+  dependents = cha.GetDependents(METHOD2);
+  ASSERT_EQ(dependents->size(), 1u);
+
+  headers.insert(METHOD_HEADER3);
+  cha.RemoveDependentsWithMethodHeaders(headers);
+  ASSERT_EQ(cha.GetDependents(METHOD1), nullptr);
+  ASSERT_EQ(cha.GetDependents(METHOD3), nullptr);
+  dependents = cha.GetDependents(METHOD2);
+  ASSERT_EQ(dependents->size(), 1u);
+  ASSERT_EQ(dependents->at(0).first, METHOD1);
+  ASSERT_EQ(dependents->at(0).second, METHOD_HEADER1);
+
+  cha.RemoveDependencyFor(METHOD2);
+  ASSERT_EQ(cha.GetDependents(METHOD1), nullptr);
+  ASSERT_EQ(cha.GetDependents(METHOD2), nullptr);
+  ASSERT_EQ(cha.GetDependents(METHOD3), nullptr);
+}
+
+}  // namespace art
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index 0de647f509..f13060a1fd 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -40,6 +40,7 @@
 #include "base/time_utils.h"
 #include "base/unix_file/fd_file.h"
 #include "base/value_object.h"
+#include "cha.h"
 #include "class_linker-inl.h"
 #include "class_table-inl.h"
 #include "compiler_callbacks.h"
@@ -96,6 +97,7 @@
 #include "ScopedLocalRef.h"
 #include "scoped_thread_state_change-inl.h"
 #include "thread-inl.h"
+#include "thread_list.h"
 #include "trace.h"
 #include "utils.h"
 #include "utils/dex_cache_arrays_layout-inl.h"
@@ -5111,6 +5113,12 @@ bool ClassLinker::LinkClass(Thread* self,
     if (klass->ShouldHaveImt()) {
       klass->SetImt(imt, image_pointer_size_);
     }
+
+    // Update CHA info based on whether we override methods.
+    // Have to do this before setting the class as resolved which allows
+    // instantiation of klass.
+    Runtime::Current()->GetClassHierarchyAnalysis()->UpdateAfterLoadingOf(klass);
+
     // This will notify waiters on klass that saw the not yet resolved
     // class in the class_table_ during EnsureResolved.
     mirror::Class::SetStatus(klass, mirror::Class::kStatusResolved, self);
@@ -5154,6 +5162,11 @@ bool ClassLinker::LinkClass(Thread* self,
       }
     }
 
+    // Update CHA info based on whether we override methods.
+    // Have to do this before setting the class as resolved which allows
+    // instantiation of klass.
+    Runtime::Current()->GetClassHierarchyAnalysis()->UpdateAfterLoadingOf(h_new_class);
+
     // This will notify waiters on temp class that saw the not yet resolved class in the
     // class_table_ during EnsureResolved.
     mirror::Class::SetStatus(klass, mirror::Class::kStatusRetired, self);
diff --git a/runtime/jit/jit_code_cache.cc b/runtime/jit/jit_code_cache.cc
index 2ae989a239..19f309951e 100644
--- a/runtime/jit/jit_code_cache.cc
+++ b/runtime/jit/jit_code_cache.cc
@@ -23,6 +23,7 @@
 #include "base/stl_util.h"
 #include "base/systrace.h"
 #include "base/time_utils.h"
+#include "cha.h"
 #include "debugger_interface.h"
 #include "entrypoints/runtime_asm_entrypoints.h"
 #include "gc/accounting/bitmap-inl.h"
@@ -217,7 +218,9 @@ uint8_t* JitCodeCache::CommitCode(Thread* self,
                                   const uint8_t* code,
                                   size_t code_size,
                                   bool osr,
-                                  Handle<mirror::ObjectArray<mirror::Object>> roots) {
+                                  Handle<mirror::ObjectArray<mirror::Object>> roots,
+                                  bool has_should_deoptimize_flag,
+                                  const ArenaSet<ArtMethod*>& cha_single_implementation_list) {
   uint8_t* result = CommitCodeInternal(self,
                                        method,
                                        stack_map,
@@ -228,7 +231,9 @@ uint8_t* JitCodeCache::CommitCode(Thread* self,
                                        code,
                                        code_size,
                                        osr,
-                                       roots);
+                                       roots,
+                                       has_should_deoptimize_flag,
+                                       cha_single_implementation_list);
   if (result == nullptr) {
     // Retry.
     GarbageCollectCache(self);
@@ -242,7 +247,9 @@ uint8_t* JitCodeCache::CommitCode(Thread* self,
                                 code,
                                 code_size,
                                 osr,
-                                roots);
+                                roots,
+                                has_should_deoptimize_flag,
+                                cha_single_implementation_list);
   }
   return result;
 }
@@ -359,7 +366,7 @@ void JitCodeCache::SweepRootTables(IsMarkedVisitor* visitor) {
   }
 }
 
-void JitCodeCache::FreeCode(const void* code_ptr, ArtMethod* method ATTRIBUTE_UNUSED) {
+void JitCodeCache::FreeCode(const void* code_ptr) {
   uintptr_t allocation = FromCodeToAllocation(code_ptr);
   // Notify native debugger that we are about to remove the code.
   // It does nothing if we are not using native debugger.
@@ -368,41 +375,69 @@ void JitCodeCache::FreeCode(const void* code_ptr, ArtMethod* method ATTRIBUTE_UN
   FreeCode(reinterpret_cast<uint8_t*>(allocation));
 }
 
+void JitCodeCache::FreeAllMethodHeaders(
+    const std::unordered_set<OatQuickMethodHeader*>& method_headers) {
+  {
+    MutexLock mu(Thread::Current(), *Locks::cha_lock_);
+    Runtime::Current()->GetClassHierarchyAnalysis()
+        ->RemoveDependentsWithMethodHeaders(method_headers);
+  }
+
+  // We need to remove entries in method_headers from CHA dependencies
+  // first since once we do FreeCode() below, the memory can be reused
+  // so it's possible for the same method_header to start representing
+  // different compile code.
+  MutexLock mu(Thread::Current(), lock_);
+  ScopedCodeCacheWrite scc(code_map_.get());
+  for (const OatQuickMethodHeader* method_header : method_headers) {
+    FreeCode(method_header->GetCode());
+  }
+}
+
 void JitCodeCache::RemoveMethodsIn(Thread* self, const LinearAlloc& alloc) {
   ScopedTrace trace(__PRETTY_FUNCTION__);
-  MutexLock mu(self, lock_);
-  // We do not check if a code cache GC is in progress, as this method comes
-  // with the classlinker_classes_lock_ held, and suspending ourselves could
-  // lead to a deadlock.
+  // We use a set to first collect all method_headers whose code need to be
+  // removed. We need to free the underlying code after we remove CHA dependencies
+  // for entries in this set. And it's more efficient to iterate through
+  // the CHA dependency map just once with an unordered_set.
+  std::unordered_set<OatQuickMethodHeader*> method_headers;
   {
-    ScopedCodeCacheWrite scc(code_map_.get());
-    for (auto it = method_code_map_.begin(); it != method_code_map_.end();) {
-      if (alloc.ContainsUnsafe(it->second)) {
-        FreeCode(it->first, it->second);
-        it = method_code_map_.erase(it);
+    MutexLock mu(self, lock_);
+    // We do not check if a code cache GC is in progress, as this method comes
+    // with the classlinker_classes_lock_ held, and suspending ourselves could
+    // lead to a deadlock.
+    {
+      ScopedCodeCacheWrite scc(code_map_.get());
+      for (auto it = method_code_map_.begin(); it != method_code_map_.end();) {
+        if (alloc.ContainsUnsafe(it->second)) {
+          method_headers.insert(OatQuickMethodHeader::FromCodePointer(it->first));
+          it = method_code_map_.erase(it);
+        } else {
+          ++it;
+        }
+      }
+    }
+    for (auto it = osr_code_map_.begin(); it != osr_code_map_.end();) {
+      if (alloc.ContainsUnsafe(it->first)) {
+        // Note that the code has already been pushed to method_headers in the loop
+        // above and is going to be removed in FreeCode() below.
+        it = osr_code_map_.erase(it);
       } else {
         ++it;
       }
     }
-  }
-  for (auto it = osr_code_map_.begin(); it != osr_code_map_.end();) {
-    if (alloc.ContainsUnsafe(it->first)) {
-      // Note that the code has already been removed in the loop above.
-      it = osr_code_map_.erase(it);
-    } else {
-      ++it;
-    }
-  }
-  for (auto it = profiling_infos_.begin(); it != profiling_infos_.end();) {
-    ProfilingInfo* info = *it;
-    if (alloc.ContainsUnsafe(info->GetMethod())) {
-      info->GetMethod()->SetProfilingInfo(nullptr);
-      FreeData(reinterpret_cast<uint8_t*>(info));
-      it = profiling_infos_.erase(it);
-    } else {
-      ++it;
+    for (auto it = profiling_infos_.begin(); it != profiling_infos_.end();) {
+      ProfilingInfo* info = *it;
+      if (alloc.ContainsUnsafe(info->GetMethod())) {
+        info->GetMethod()->SetProfilingInfo(nullptr);
+        FreeData(reinterpret_cast<uint8_t*>(info));
+        it = profiling_infos_.erase(it);
+      } else {
+        ++it;
+      }
     }
   }
+  FreeAllMethodHeaders(method_headers);
 }
 
 bool JitCodeCache::IsWeakAccessEnabled(Thread* self) const {
@@ -464,7 +499,10 @@ uint8_t* JitCodeCache::CommitCodeInternal(Thread* self,
                                           const uint8_t* code,
                                           size_t code_size,
                                           bool osr,
-                                          Handle<mirror::ObjectArray<mirror::Object>> roots) {
+                                          Handle<mirror::ObjectArray<mirror::Object>> roots,
+                                          bool has_should_deoptimize_flag,
+                                          const ArenaSet<ArtMethod*>&
+                                              cha_single_implementation_list) {
   DCHECK(stack_map != nullptr);
   size_t alignment = GetInstructionSetAlignment(kRuntimeISA);
   // Ensure the header ends up at expected instruction alignment.
@@ -506,12 +544,44 @@ uint8_t* JitCodeCache::CommitCodeInternal(Thread* self,
       // https://patchwork.kernel.org/patch/9047921/
       FlushInstructionCache(reinterpret_cast<char*>(code_ptr),
                             reinterpret_cast<char*>(code_ptr + code_size));
+      DCHECK(!Runtime::Current()->IsAotCompiler());
+      if (has_should_deoptimize_flag) {
+        method_header->SetHasShouldDeoptimizeFlag();
+      }
     }
 
     number_of_compilations_++;
   }
   // We need to update the entry point in the runnable state for the instrumentation.
   {
+    // Need cha_lock_ for checking all single-implementation flags and register
+    // dependencies.
+    MutexLock cha_mu(self, *Locks::cha_lock_);
+    bool single_impl_still_valid = true;
+    for (ArtMethod* single_impl : cha_single_implementation_list) {
+      if (!single_impl->HasSingleImplementation()) {
+        // We simply discard the compiled code. Clear the
+        // counter so that it may be recompiled later. Hopefully the
+        // class hierarchy will be more stable when compilation is retried.
+        single_impl_still_valid = false;
+        method->ClearCounter();
+        break;
+      }
+    }
+
+    // Discard the code if any single-implementation assumptions are now invalid.
+    if (!single_impl_still_valid) {
+      VLOG(jit) << "JIT discarded jitted code due to invalid single-implementation assumptions.";
+      return nullptr;
+    }
+    for (ArtMethod* single_impl : cha_single_implementation_list) {
+      Runtime::Current()->GetClassHierarchyAnalysis()->AddDependency(
+          single_impl, method, method_header);
+    }
+
+    // The following needs to be guarded by cha_lock_ also. Otherwise it's
+    // possible that the compiled code is considered invalidated by some class linking,
+    // but below we still make the compiled code valid for the method.
     MutexLock mu(self, lock_);
     method_code_map_.Put(code_ptr, method);
     // Fill the root table before updating the entry point.
@@ -535,7 +605,8 @@ uint8_t* JitCodeCache::CommitCodeInternal(Thread* self,
         << " ccache_size=" << PrettySize(CodeCacheSizeLocked()) << ": "
         << " dcache_size=" << PrettySize(DataCacheSizeLocked()) << ": "
         << reinterpret_cast<const void*>(method_header->GetEntryPoint()) << ","
-        << reinterpret_cast<const void*>(method_header->GetEntryPoint() + method_header->code_size_);
+        << reinterpret_cast<const void*>(method_header->GetEntryPoint() +
+                                         method_header->GetCodeSize());
     histogram_code_memory_use_.AddValue(code_size);
     if (code_size > kCodeSizeLogThreshold) {
       LOG(INFO) << "JIT allocated "
@@ -836,20 +907,23 @@ void JitCodeCache::GarbageCollectCache(Thread* self) {
 
 void JitCodeCache::RemoveUnmarkedCode(Thread* self) {
   ScopedTrace trace(__FUNCTION__);
-  MutexLock mu(self, lock_);
-  ScopedCodeCacheWrite scc(code_map_.get());
-  // Iterate over all compiled code and remove entries that are not marked.
-  for (auto it = method_code_map_.begin(); it != method_code_map_.end();) {
-    const void* code_ptr = it->first;
-    ArtMethod* method = it->second;
-    uintptr_t allocation = FromCodeToAllocation(code_ptr);
-    if (GetLiveBitmap()->Test(allocation)) {
-      ++it;
-    } else {
-      FreeCode(code_ptr, method);
-      it = method_code_map_.erase(it);
+  std::unordered_set<OatQuickMethodHeader*> method_headers;
+  {
+    MutexLock mu(self, lock_);
+    ScopedCodeCacheWrite scc(code_map_.get());
+    // Iterate over all compiled code and remove entries that are not marked.
+    for (auto it = method_code_map_.begin(); it != method_code_map_.end();) {
+      const void* code_ptr = it->first;
+      uintptr_t allocation = FromCodeToAllocation(code_ptr);
+      if (GetLiveBitmap()->Test(allocation)) {
+        ++it;
+      } else {
+        method_headers.insert(OatQuickMethodHeader::FromCodePointer(it->first));
+        it = method_code_map_.erase(it);
+      }
     }
   }
+  FreeAllMethodHeaders(method_headers);
 }
 
 void JitCodeCache::DoCollection(Thread* self, bool collect_profiling_info) {
diff --git a/runtime/jit/jit_code_cache.h b/runtime/jit/jit_code_cache.h
index be2cec5091..30e2efbac5 100644
--- a/runtime/jit/jit_code_cache.h
+++ b/runtime/jit/jit_code_cache.h
@@ -20,6 +20,7 @@
 #include "instrumentation.h"
 
 #include "atomic.h"
+#include "base/arena_containers.h"
 #include "base/histogram-inl.h"
 #include "base/macros.h"
 #include "base/mutex.h"
@@ -91,6 +92,11 @@ class JitCodeCache {
       REQUIRES(!lock_);
 
   // Allocate and write code and its metadata to the code cache.
+  // `cha_single_implementation_list` needs to be registered via CHA (if it's
+  // still valid), since the compiled code still needs to be invalidated if the
+  // single-implementation assumptions are violated later. This needs to be done
+  // even if `has_should_deoptimize_flag` is false, which can happen due to CHA
+  // guard elimination.
   uint8_t* CommitCode(Thread* self,
                       ArtMethod* method,
                       uint8_t* stack_map,
@@ -101,7 +107,9 @@ class JitCodeCache {
                       const uint8_t* code,
                       size_t code_size,
                       bool osr,
-                      Handle<mirror::ObjectArray<mirror::Object>> roots)
+                      Handle<mirror::ObjectArray<mirror::Object>> roots,
+                      bool has_should_deoptimize_flag,
+                      const ArenaSet<ArtMethod*>& cha_single_implementation_list)
       REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(!lock_);
 
@@ -230,7 +238,9 @@ class JitCodeCache {
                               const uint8_t* code,
                               size_t code_size,
                               bool osr,
-                              Handle<mirror::ObjectArray<mirror::Object>> roots)
+                              Handle<mirror::ObjectArray<mirror::Object>> roots,
+                              bool has_should_deoptimize_flag,
+                              const ArenaSet<ArtMethod*>& cha_single_implementation_list)
       REQUIRES(!lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
@@ -245,8 +255,13 @@ class JitCodeCache {
   bool WaitForPotentialCollectionToComplete(Thread* self)
       REQUIRES(lock_) REQUIRES(!Locks::mutator_lock_);
 
-  // Free in the mspace allocations taken by 'method'.
-  void FreeCode(const void* code_ptr, ArtMethod* method) REQUIRES(lock_);
+  // Remove CHA dependents and underlying allocations for entries in `method_headers`.
+  void FreeAllMethodHeaders(const std::unordered_set<OatQuickMethodHeader*>& method_headers)
+      REQUIRES(!lock_)
+      REQUIRES(!Locks::cha_lock_);
+
+  // Free in the mspace allocations for `code_ptr`.
+  void FreeCode(const void* code_ptr) REQUIRES(lock_);
 
   // Number of bytes allocated in the code cache.
   size_t CodeCacheSizeLocked() REQUIRES(lock_);
diff --git a/runtime/mirror/class-inl.h b/runtime/mirror/class-inl.h
index 5def65e4d4..5fdf8f3c3a 100644
--- a/runtime/mirror/class-inl.h
+++ b/runtime/mirror/class-inl.h
@@ -238,7 +238,7 @@ inline ArtMethod* Class::GetVirtualMethodUnchecked(size_t i, PointerSize pointer
 template<VerifyObjectFlags kVerifyFlags,
          ReadBarrierOption kReadBarrierOption>
 inline PointerArray* Class::GetVTable() {
-  DCHECK(IsResolved<kVerifyFlags>() || IsErroneous<kVerifyFlags>());
+  DCHECK(IsLoaded<kVerifyFlags>() || IsErroneous<kVerifyFlags>());
   return GetFieldObject<PointerArray, kVerifyFlags, kReadBarrierOption>(
       OFFSET_OF_OBJECT_MEMBER(Class, vtable_));
 }
diff --git a/runtime/modifiers.h b/runtime/modifiers.h
index a1110d92a5..8a01043062 100644
--- a/runtime/modifiers.h
+++ b/runtime/modifiers.h
@@ -71,6 +71,11 @@ static constexpr uint32_t kAccMustCountLocks =        0x02000000;  // method (ru
 // class.
 // TODO Might want to re-arrange some of these so that we can have obsolete + intrinsic methods.
 static constexpr uint32_t kAccObsoleteMethod =        0x04000000;  // method (runtime)
+
+// Set by the class linker for a method that has only one implementation for a
+// virtual call.
+static constexpr uint32_t kAccSingleImplementation =  0x08000000;  // method (runtime)
+
 static constexpr uint32_t kAccIntrinsic  =            0x80000000;  // method (runtime)
 
 // Special runtime-only flags.
diff --git a/runtime/native_stack_dump.cc b/runtime/native_stack_dump.cc
index 23768899bd..5565565c7b 100644
--- a/runtime/native_stack_dump.cc
+++ b/runtime/native_stack_dump.cc
@@ -272,7 +272,7 @@ static bool PcIsWithinQuickCode(ArtMethod* method, uintptr_t pc) NO_THREAD_SAFET
   if (code == 0) {
     return pc == 0;
   }
-  uintptr_t code_size = reinterpret_cast<const OatQuickMethodHeader*>(code)[-1].code_size_;
+  uintptr_t code_size = reinterpret_cast<const OatQuickMethodHeader*>(code)[-1].GetCodeSize();
   return code <= pc && pc <= (code + code_size);
 }
 
diff --git a/runtime/oat_file-inl.h b/runtime/oat_file-inl.h
index d7d0c4f733..721fab993f 100644
--- a/runtime/oat_file-inl.h
+++ b/runtime/oat_file-inl.h
@@ -44,7 +44,7 @@ inline uint32_t OatFile::OatMethod::GetQuickCodeSizeOffset() const {
   if (method_header == nullptr) {
     return 0u;
   }
-  return reinterpret_cast<const uint8_t*>(&method_header->code_size_) - begin_;
+  return reinterpret_cast<const uint8_t*>(method_header->GetCodeSizeAddr()) - begin_;
 }
 
 inline size_t OatFile::OatMethod::GetFrameSizeInBytes() const {
@@ -52,7 +52,7 @@ inline size_t OatFile::OatMethod::GetFrameSizeInBytes() const {
   if (code == nullptr) {
     return 0u;
   }
-  return reinterpret_cast<const OatQuickMethodHeader*>(code)[-1].frame_info_.FrameSizeInBytes();
+  return reinterpret_cast<const OatQuickMethodHeader*>(code)[-1].GetFrameInfo().FrameSizeInBytes();
 }
 
 inline uint32_t OatFile::OatMethod::GetCoreSpillMask() const {
@@ -60,7 +60,7 @@ inline uint32_t OatFile::OatMethod::GetCoreSpillMask() const {
   if (code == nullptr) {
     return 0u;
   }
-  return reinterpret_cast<const OatQuickMethodHeader*>(code)[-1].frame_info_.CoreSpillMask();
+  return reinterpret_cast<const OatQuickMethodHeader*>(code)[-1].GetFrameInfo().CoreSpillMask();
 }
 
 inline uint32_t OatFile::OatMethod::GetFpSpillMask() const {
@@ -68,7 +68,7 @@ inline uint32_t OatFile::OatMethod::GetFpSpillMask() const {
   if (code == nullptr) {
     return 0u;
   }
-  return reinterpret_cast<const OatQuickMethodHeader*>(code)[-1].frame_info_.FpSpillMask();
+  return reinterpret_cast<const OatQuickMethodHeader*>(code)[-1].GetFrameInfo().FpSpillMask();
 }
 
 inline uint32_t OatFile::OatMethod::GetVmapTableOffset() const {
@@ -81,7 +81,7 @@ inline uint32_t OatFile::OatMethod::GetVmapTableOffsetOffset() const {
   if (method_header == nullptr) {
     return 0u;
   }
-  return reinterpret_cast<const uint8_t*>(&method_header->vmap_table_offset_) - begin_;
+  return reinterpret_cast<const uint8_t*>(method_header->GetVmapTableOffsetAddr()) - begin_;
 }
 
 inline const uint8_t* OatFile::OatMethod::GetVmapTable() const {
@@ -89,7 +89,7 @@ inline const uint8_t* OatFile::OatMethod::GetVmapTable() const {
   if (code == nullptr) {
     return nullptr;
   }
-  uint32_t offset = reinterpret_cast<const OatQuickMethodHeader*>(code)[-1].vmap_table_offset_;
+  uint32_t offset = reinterpret_cast<const OatQuickMethodHeader*>(code)[-1].GetVmapTableOffset();
   if (UNLIKELY(offset == 0u)) {
     return nullptr;
   }
@@ -101,7 +101,7 @@ inline uint32_t OatFile::OatMethod::GetQuickCodeSize() const {
   if (code == nullptr) {
     return 0u;
   }
-  return reinterpret_cast<const OatQuickMethodHeader*>(code)[-1].code_size_;
+  return reinterpret_cast<const OatQuickMethodHeader*>(code)[-1].GetCodeSize();
 }
 
 inline uint32_t OatFile::OatMethod::GetCodeOffset() const {
diff --git a/runtime/oat_quick_method_header.h b/runtime/oat_quick_method_header.h
index 4afca7d828..3cdde5a065 100644
--- a/runtime/oat_quick_method_header.h
+++ b/runtime/oat_quick_method_header.h
@@ -58,7 +58,7 @@ class PACKED(4) OatQuickMethodHeader {
   }
 
   bool IsOptimized() const {
-    return code_size_ != 0 && vmap_table_offset_ != 0;
+    return GetCodeSize() != 0 && vmap_table_offset_ != 0;
   }
 
   const void* GetOptimizedCodeInfoPtr() const {
@@ -81,7 +81,23 @@ class PACKED(4) OatQuickMethodHeader {
   }
 
   uint32_t GetCodeSize() const {
-    return code_size_;
+    return code_size_ & kCodeSizeMask;
+  }
+
+  const uint32_t* GetCodeSizeAddr() const {
+    return &code_size_;
+  }
+
+  uint32_t GetVmapTableOffset() const {
+    return vmap_table_offset_;
+  }
+
+  void SetVmapTableOffset(uint32_t offset) {
+    vmap_table_offset_ = offset;
+  }
+
+  const uint32_t* GetVmapTableOffsetAddr() const {
+    return &vmap_table_offset_;
   }
 
   const uint8_t* GetVmapTable() const {
@@ -96,7 +112,7 @@ class PACKED(4) OatQuickMethodHeader {
       // On Thumb-2, the pc is offset by one.
       code_start++;
     }
-    return code_start <= pc && pc <= (code_start + code_size_);
+    return code_start <= pc && pc <= (code_start + GetCodeSize());
   }
 
   const uint8_t* GetEntryPoint() const {
@@ -130,11 +146,25 @@ class PACKED(4) OatQuickMethodHeader {
 
   uint32_t ToDexPc(ArtMethod* method, const uintptr_t pc, bool abort_on_failure = true) const;
 
+  void SetHasShouldDeoptimizeFlag() {
+    DCHECK_EQ(code_size_ & kShouldDeoptimizeMask, 0u);
+    code_size_ |= kShouldDeoptimizeMask;
+  }
+
+  bool HasShouldDeoptimizeFlag() const {
+    return (code_size_ & kShouldDeoptimizeMask) != 0;
+  }
+
+ private:
+  static constexpr uint32_t kShouldDeoptimizeMask = 0x80000000;
+  static constexpr uint32_t kCodeSizeMask = ~kShouldDeoptimizeMask;
+
   // The offset in bytes from the start of the vmap table to the end of the header.
   uint32_t vmap_table_offset_;
   // The stack frame information.
   QuickMethodFrameInfo frame_info_;
-  // The code size in bytes.
+  // The code size in bytes. The highest bit is used to signify if the compiled
+  // code with the method header has should_deoptimize flag.
   uint32_t code_size_;
   // The actual code.
   uint8_t code_[0];
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 68b956b17e..92e00ec155 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -62,6 +62,7 @@
 #include "base/stl_util.h"
 #include "base/systrace.h"
 #include "base/unix_file/fd_file.h"
+#include "cha.h"
 #include "class_linker-inl.h"
 #include "compiler_callbacks.h"
 #include "debugger.h"
@@ -349,6 +350,7 @@ Runtime::~Runtime() {
   delete monitor_list_;
   delete monitor_pool_;
   delete class_linker_;
+  delete cha_;
   delete heap_;
   delete intern_table_;
   delete oat_file_manager_;
@@ -1203,6 +1205,7 @@ bool Runtime::Init(RuntimeArgumentMap&& runtime_options_in) {
 
   CHECK_GE(GetHeap()->GetContinuousSpaces().size(), 1U);
   class_linker_ = new ClassLinker(intern_table_);
+  cha_ = new ClassHierarchyAnalysis;
   if (GetHeap()->HasBootImageSpace()) {
     bool result = class_linker_->InitFromBootImage(&error_msg);
     if (!result) {
diff --git a/runtime/runtime.h b/runtime/runtime.h
index 4f31887b72..e6b3128675 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -76,6 +76,7 @@ namespace verifier {
 }  // namespace verifier
 class ArenaPool;
 class ArtMethod;
+class ClassHierarchyAnalysis;
 class ClassLinker;
 class Closure;
 class CompilerCallbacks;
@@ -674,6 +675,10 @@ class Runtime {
   void AddSystemWeakHolder(gc::AbstractSystemWeakHolder* holder);
   void RemoveSystemWeakHolder(gc::AbstractSystemWeakHolder* holder);
 
+  ClassHierarchyAnalysis* GetClassHierarchyAnalysis() {
+    return cha_;
+  }
+
   NO_RETURN
   static void Aborter(const char* abort_message);
 
@@ -921,6 +926,8 @@ class Runtime {
   // Generic system-weak holders.
   std::vector<gc::AbstractSystemWeakHolder*> system_weak_holders_;
 
+  ClassHierarchyAnalysis* cha_;
+
   DISALLOW_COPY_AND_ASSIGN(Runtime);
 };
 std::ostream& operator<<(std::ostream& os, const Runtime::CalleeSaveType& rhs);
diff --git a/runtime/stack.cc b/runtime/stack.cc
index 167a30ba2b..f20aa201aa 100644
--- a/runtime/stack.cc
+++ b/runtime/stack.cc
@@ -648,7 +648,7 @@ static void AssertPcIsWithinQuickCode(ArtMethod* method, uintptr_t pc)
     return;
   }
 
-  uint32_t code_size = OatQuickMethodHeader::FromEntryPoint(code)->code_size_;
+  uint32_t code_size = OatQuickMethodHeader::FromEntryPoint(code)->GetCodeSize();
   uintptr_t code_start = reinterpret_cast<uintptr_t>(code);
   CHECK(code_start <= pc && pc <= (code_start + code_size))
       << method->PrettyMethod()
diff --git a/runtime/stack.h b/runtime/stack.h
index e879214c75..d02e4b71d1 100644
--- a/runtime/stack.h
+++ b/runtime/stack.h
@@ -66,6 +66,11 @@ std::ostream& operator<<(std::ostream& os, const VRegKind& rhs);
 struct ShadowFrameDeleter;
 using ShadowFrameAllocaUniquePtr = std::unique_ptr<ShadowFrame, ShadowFrameDeleter>;
 
+// Size in bytes of the should_deoptimize flag on stack.
+// We just need 4 bytes for our purpose regardless of the architecture. Frame size
+// calculation will automatically do alignment for the final frame size.
+static constexpr size_t kShouldDeoptimizeFlagSize = 4;
+
 // Counting locks by storing object pointers into a vector. Duplicate entries mark recursive locks.
 // The vector will be visited with the ShadowFrame during GC (so all the locked-on objects are
 // thread roots).
diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc
index 7137db8773..461c172487 100644
--- a/runtime/verifier/method_verifier.cc
+++ b/runtime/verifier/method_verifier.cc
@@ -410,15 +410,15 @@ MethodVerifier::FailureData MethodVerifier::VerifyMethod(Thread* self,
       result.kind = kSoftFailure;
       if (method != nullptr &&
           !CanCompilerHandleVerificationFailure(verifier.encountered_failure_types_)) {
-        method->SetAccessFlags(method->GetAccessFlags() | kAccCompileDontBother);
+        method->AddAccessFlags(kAccCompileDontBother);
       }
     }
     if (method != nullptr) {
       if (verifier.HasInstructionThatWillThrow()) {
-        method->SetAccessFlags(method->GetAccessFlags() | kAccCompileDontBother);
+        method->AddAccessFlags(kAccCompileDontBother);
       }
       if ((verifier.encountered_failure_types_ & VerifyError::VERIFY_ERROR_LOCKING) != 0) {
-        method->SetAccessFlags(method->GetAccessFlags() | kAccMustCountLocks);
+        method->AddAccessFlags(kAccMustCountLocks);
       }
     }
   } else {
diff --git a/test/616-cha/expected.txt b/test/616-cha/expected.txt
new file mode 100644
index 0000000000..6a5618ebc6
--- /dev/null
+++ b/test/616-cha/expected.txt
@@ -0,0 +1 @@
+JNI_OnLoad called
diff --git a/test/616-cha/info.txt b/test/616-cha/info.txt
new file mode 100644
index 0000000000..50e3b0db18
--- /dev/null
+++ b/test/616-cha/info.txt
@@ -0,0 +1 @@
+Test for Class Hierarchy Analysis (CHA).
diff --git a/test/616-cha/src/Main.java b/test/616-cha/src/Main.java
new file mode 100644
index 0000000000..787318dd67
--- /dev/null
+++ b/test/616-cha/src/Main.java
@@ -0,0 +1,255 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+class Main1 {
+  String getName() {
+    return "Main1";
+  }
+
+  void printError(String msg) {
+    System.out.println(msg);
+  }
+
+  void foo(int i) {
+    if (i != 1) {
+      printError("error1");
+    }
+  }
+
+  int getValue1() {
+    return 1;
+  }
+  int getValue2() {
+    return 2;
+  }
+  int getValue3() {
+    return 3;
+  }
+  int getValue4() {
+    return 4;
+  }
+  int getValue5() {
+    return 5;
+  }
+  int getValue6() {
+    return 6;
+  }
+}
+
+class Main2 extends Main1 {
+  String getName() {
+    return "Main2";
+  }
+
+  void foo(int i) {
+    if (i != 2) {
+      printError("error2");
+    }
+  }
+}
+
+class Main3 extends Main1 {
+  String getName() {
+    return "Main3";
+  }
+}
+
+public class Main {
+  static Main1 sMain1;
+  static Main1 sMain2;
+
+  static boolean sIsOptimizing = true;
+  static boolean sHasJIT = true;
+  static volatile boolean sOtherThreadStarted;
+
+  // sMain1.foo() will be always be Main1.foo() before Main2 is loaded/linked.
+  // So sMain1.foo() can be devirtualized to Main1.foo() and be inlined.
+  // After Dummy.createMain2() which links in Main2, live testOverride() on stack
+  // should be deoptimized.
+  static void testOverride(boolean createMain2, boolean wait, boolean setHasJIT) {
+    if (setHasJIT) {
+      if (isInterpreted()) {
+        sHasJIT = false;
+      }
+      return;
+    }
+
+    if (createMain2 && (sIsOptimizing || sHasJIT)) {
+      assertIsManaged();
+    }
+
+    sMain1.foo(sMain1.getClass() == Main1.class ? 1 : 2);
+
+    if (createMain2) {
+      // Wait for the other thread to start.
+      while (!sOtherThreadStarted);
+      // Create an Main2 instance and assign it to sMain2.
+      // sMain1 is kept the same.
+      sMain2 = Dummy.createMain2();
+      // Wake up the other thread.
+      synchronized(Main.class) {
+        Main.class.notify();
+      }
+    } else if (wait) {
+      // This is the other thread.
+      synchronized(Main.class) {
+        sOtherThreadStarted = true;
+        // Wait for Main2 to be linked and deoptimization is triggered.
+        try {
+          Main.class.wait();
+        } catch (Exception e) {
+        }
+      }
+    }
+
+    // There should be a deoptimization here right after Main2 is linked by
+    // calling Dummy.createMain2(), even though sMain1 didn't change.
+    // The behavior here would be different if inline-cache is used, which
+    // doesn't deoptimize since sMain1 still hits the type cache.
+    sMain1.foo(sMain1.getClass() == Main1.class ? 1 : 2);
+    if ((createMain2 || wait) && sHasJIT && !sIsOptimizing) {
+      // This method should be deoptimized right after Main2 is created.
+      assertIsInterpreted();
+    }
+
+    if (sMain2 != null) {
+      sMain2.foo(sMain2.getClass() == Main1.class ? 1 : 2);
+    }
+  }
+
+  static Main1[] sArray;
+
+  static long calcValue(Main1 m) {
+    return m.getValue1()
+        + m.getValue2() * 2
+        + m.getValue3() * 3
+        + m.getValue4() * 4
+        + m.getValue5() * 5
+        + m.getValue6() * 6;
+  }
+
+  static long testNoOverrideLoop(int count) {
+    long sum = 0;
+    for (int i=0; i<count; i++) {
+      sum += calcValue(sArray[0]);
+      sum += calcValue(sArray[1]);
+      sum += calcValue(sArray[2]);
+    }
+    return sum;
+  }
+
+  static void testNoOverride() {
+    sArray = new Main1[3];
+    sArray[0] = new Main1();
+    sArray[1] = Dummy.createMain2();
+    sArray[2] = Dummy.createMain3();
+    long sum = 0;
+    // Loop enough to get methods JITed.
+    for (int i=0; i<100; i++) {
+      testNoOverrideLoop(1);
+    }
+    ensureJitCompiled(Main.class, "testNoOverrideLoop");
+    ensureJitCompiled(Main.class, "calcValue");
+
+    long t1 = System.currentTimeMillis();
+    sum = testNoOverrideLoop(100000);
+    long t2 = System.currentTimeMillis();
+    if (sum != 27300000L) {
+      System.out.println("Unexpected result.");
+    }
+  }
+
+  private static void assertSingleImplementation(Class<?> clazz, String method_name, boolean b) {
+    if (hasSingleImplementation(clazz, method_name) != b) {
+      System.out.println(clazz + "." + method_name +
+          " doesn't have single implementation value of " + b);
+    }
+  }
+
+  // Test scanerios under which CHA-based devirtualization happens,
+  // and class loading that overrides a method can invalidate compiled code.
+  // Also test pure non-overriding case, which is more for checking generated
+  // code form.
+  public static void main(String[] args) {
+    System.loadLibrary(args[0]);
+
+    // CHeck some boot-image methods.
+    assertSingleImplementation(java.util.ArrayList.class, "size", true);
+    // java.util.LinkedHashMap overrides get().
+    assertSingleImplementation(java.util.HashMap.class, "get", false);
+
+    // We don't set single-implementation modifier bit for final classes or methods
+    // since we can devirtualize without CHA for those cases. However hasSingleImplementation()
+    // should return true for those cases.
+    assertSingleImplementation(java.lang.String.class, "charAt", true);
+    assertSingleImplementation(java.lang.Thread.class, "join", true);
+    // We don't set single-implementation modifier bit for native methods.
+    assertSingleImplementation(java.lang.Thread.class, "isInterrupted", false);
+
+    if (isInterpreted()) {
+      sIsOptimizing = false;
+    }
+
+    // sMain1 is an instance of Main1. Main2 hasn't bee loaded yet.
+    sMain1 = new Main1();
+
+    // Loop enough to get testOverride() JITed.
+    for (int i=0; i<100; i++) {
+      testOverride(false, false, false);
+    }
+
+    ensureJitCompiled(Main.class, "testOverride");
+    testOverride(false, false, true);
+
+    if (sHasJIT && !sIsOptimizing) {
+      assertSingleImplementation(Main1.class, "foo", true);
+    } else {
+      // Main2 is verified ahead-of-time so it's linked in already.
+    }
+    assertSingleImplementation(Main1.class, "getValue1", true);
+
+    // Create another thread that also calls sMain1.foo().
+    // Try to test suspend and deopt another thread.
+    new Thread() {
+      public void run() {
+        testOverride(false, true, false);
+      }
+    }.start();
+
+    // This will create Main2 instance in the middle of testOverride().
+    testOverride(true, false, false);
+    assertSingleImplementation(Main1.class, "foo", false);
+    assertSingleImplementation(Main1.class, "getValue1", true);
+
+    testNoOverride();
+  }
+
+  private static native void ensureJitCompiled(Class<?> itf, String method_name);
+  private static native void assertIsInterpreted();
+  private static native void assertIsManaged();
+  private static native boolean isInterpreted();
+  private static native boolean hasSingleImplementation(Class<?> clazz, String method_name);
+}
+
+// Do it in another class to avoid class loading due to verifier.
+class Dummy {
+  static Main1 createMain2() {
+    return new Main2();
+  }
+  static Main1 createMain3() {
+    return new Main3();
+  }
+}
diff --git a/test/common/runtime_state.cc b/test/common/runtime_state.cc
index a2eb370326..9cfa3247c8 100644
--- a/test/common/runtime_state.cc
+++ b/test/common/runtime_state.cc
@@ -157,4 +157,17 @@ extern "C" JNIEXPORT void JNICALL Java_Main_ensureJitCompiled(JNIEnv* env,
   }
 }
 
+extern "C" JNIEXPORT jboolean JNICALL Java_Main_hasSingleImplementation(JNIEnv* env,
+                                                                        jclass,
+                                                                        jclass cls,
+                                                                        jstring method_name) {
+  ArtMethod* method = nullptr;
+  ScopedObjectAccess soa(Thread::Current());
+  ScopedUtfChars chars(env, method_name);
+  CHECK(chars.c_str() != nullptr);
+  method = soa.Decode<mirror::Class>(cls)->FindDeclaredVirtualMethodByName(
+      chars.c_str(), kRuntimePointerSize);
+  return method->HasSingleImplementation();
+}
+
 }  // namespace art
author	Mingyao Yang <mingyao@google.com>	2016-08-02 11:02:54 -0700
committer	Mingyao Yang <mingyao@google.com>	2016-12-01 11:15:47 -0800
commit	063fc772b5b8aed7d769cd7cccb6ddc7619326ee (patch)
tree	bc165781989087a998721991504e589a7d5b0926
parent	48d08a4233ee4450b0d5073d41445f9dd1f17191 (diff)