Card mark holding class instead of declaring class

For profiling info, we need to mark the card of the holding class
instead of declaring class. This is required for GC correctness since
the GC relies on the card table to track cross space references.

Test: test-art-host ART_TEST_JIT=true

Bug: 30655270

Change-Id: Ia4690219ded0df38032b644440273e06bc303956
diff --git a/runtime/base/array_slice.h b/runtime/base/array_slice.h
index 19ad302..32283d0 100644
--- a/runtime/base/array_slice.h
+++ b/runtime/base/array_slice.h
@@ -129,6 +129,10 @@
     return element_size_;
   }
 
+  bool Contains(const T* element) const {
+    return &AtUnchecked(0) <= element && element < &AtUnchecked(size_);
+  }
+
  private:
   T& AtUnchecked(size_t index) {
     return *reinterpret_cast<T*>(reinterpret_cast<uintptr_t>(array_) + index * element_size_);
diff --git a/runtime/base/mutex.h b/runtime/base/mutex.h
index 3d7624d..a4e05bd 100644
--- a/runtime/base/mutex.h
+++ b/runtime/base/mutex.h
@@ -88,7 +88,6 @@
   kTracingUniqueMethodsLock,
   kTracingStreamingLock,
   kDeoptimizedMethodsLock,
-  kJitCodeCacheLock,
   kClassLoaderClassesLock,
   kDefaultMutexLevel,
   kMarkSweepLargeObjectLock,
@@ -99,6 +98,7 @@
   kMonitorPoolLock,
   kMethodVerifiersLock,
   kClassLinkerClassesLock,  // TODO rename.
+  kJitCodeCacheLock,
   kBreakpointLock,
   kMonitorLock,
   kMonitorListLock,
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index 534f53d..46722ec 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -3560,32 +3560,40 @@
     }
     LOG(INFO) << "Loaded class " << descriptor << source;
   }
-  WriterMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_);
-  mirror::ClassLoader* const class_loader = klass->GetClassLoader();
-  ClassTable* const class_table = InsertClassTableForClassLoader(class_loader);
-  mirror::Class* existing = class_table->Lookup(descriptor, hash);
-  if (existing != nullptr) {
-    return existing;
-  }
-  if (kIsDebugBuild &&
-      !klass->IsTemp() &&
-      class_loader == nullptr &&
-      dex_cache_boot_image_class_lookup_required_) {
-    // Check a class loaded with the system class loader matches one in the image if the class
-    // is in the image.
-    existing = LookupClassFromBootImage(descriptor);
+  {
+    WriterMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_);
+    mirror::ClassLoader* const class_loader = klass->GetClassLoader();
+    ClassTable* const class_table = InsertClassTableForClassLoader(class_loader);
+    mirror::Class* existing = class_table->Lookup(descriptor, hash);
     if (existing != nullptr) {
-      CHECK_EQ(klass, existing);
+      return existing;
+    }
+    if (kIsDebugBuild &&
+        !klass->IsTemp() &&
+        class_loader == nullptr &&
+        dex_cache_boot_image_class_lookup_required_) {
+      // Check a class loaded with the system class loader matches one in the image if the class
+      // is in the image.
+      existing = LookupClassFromBootImage(descriptor);
+      if (existing != nullptr) {
+        CHECK_EQ(klass, existing);
+      }
+    }
+    VerifyObject(klass);
+    class_table->InsertWithHash(klass, hash);
+    if (class_loader != nullptr) {
+      // This is necessary because we need to have the card dirtied for remembered sets.
+      Runtime::Current()->GetHeap()->WriteBarrierEveryFieldOf(class_loader);
+    }
+    if (log_new_class_table_roots_) {
+      new_class_roots_.push_back(GcRoot<mirror::Class>(klass));
     }
   }
-  VerifyObject(klass);
-  class_table->InsertWithHash(klass, hash);
-  if (class_loader != nullptr) {
-    // This is necessary because we need to have the card dirtied for remembered sets.
-    Runtime::Current()->GetHeap()->WriteBarrierEveryFieldOf(class_loader);
-  }
-  if (log_new_class_table_roots_) {
-    new_class_roots_.push_back(GcRoot<mirror::Class>(klass));
+  if (kIsDebugBuild) {
+    // Test that copied methods correctly can find their holder.
+    for (ArtMethod& method : klass->GetCopiedMethods(image_pointer_size_)) {
+      CHECK_EQ(GetHoldingClassOfCopiedMethod(&method), klass);
+    }
   }
   return nullptr;
 }
@@ -8105,19 +8113,27 @@
 
 void ClassLinker::CleanupClassLoaders() {
   Thread* const self = Thread::Current();
-  WriterMutexLock mu(self, *Locks::classlinker_classes_lock_);
-  for (auto it = class_loaders_.begin(); it != class_loaders_.end(); ) {
-    const ClassLoaderData& data = *it;
-    // Need to use DecodeJObject so that we get null for cleared JNI weak globals.
-    auto* const class_loader = down_cast<mirror::ClassLoader*>(self->DecodeJObject(data.weak_root));
-    if (class_loader != nullptr) {
-      ++it;
-    } else {
-      VLOG(class_linker) << "Freeing class loader";
-      DeleteClassLoader(self, data);
-      it = class_loaders_.erase(it);
+  std::vector<ClassLoaderData> to_delete;
+  // Do the delete outside the lock to avoid lock violation in jit code cache.
+  {
+    WriterMutexLock mu(self, *Locks::classlinker_classes_lock_);
+    for (auto it = class_loaders_.begin(); it != class_loaders_.end(); ) {
+      const ClassLoaderData& data = *it;
+      // Need to use DecodeJObject so that we get null for cleared JNI weak globals.
+      auto* const class_loader =
+          down_cast<mirror::ClassLoader*>(self->DecodeJObject(data.weak_root));
+      if (class_loader != nullptr) {
+        ++it;
+      } else {
+        VLOG(class_linker) << "Freeing class loader";
+        to_delete.push_back(data);
+        it = class_loaders_.erase(it);
+      }
     }
   }
+  for (ClassLoaderData& data : to_delete) {
+    DeleteClassLoader(self, data);
+  }
 }
 
 std::set<DexCacheResolvedClasses> ClassLinker::GetResolvedClasses(bool ignore_boot_classes) {
@@ -8236,6 +8252,33 @@
   return ret;
 }
 
+class ClassLinker::FindVirtualMethodHolderVisitor : public ClassVisitor {
+ public:
+  FindVirtualMethodHolderVisitor(const ArtMethod* method, PointerSize pointer_size)
+      : method_(method),
+        pointer_size_(pointer_size) {}
+
+  bool operator()(mirror::Class* klass) SHARED_REQUIRES(Locks::mutator_lock_) OVERRIDE {
+    if (klass->GetVirtualMethodsSliceUnchecked(pointer_size_).Contains(method_)) {
+      holder_ = klass;
+    }
+    // Return false to stop searching if holder_ is not null.
+    return holder_ == nullptr;
+  }
+
+  mirror::Class* holder_ = nullptr;
+  const ArtMethod* const method_;
+  const PointerSize pointer_size_;
+};
+
+mirror::Class* ClassLinker::GetHoldingClassOfCopiedMethod(ArtMethod* method) {
+  ScopedTrace trace(__FUNCTION__);  // Since this function is slow, have a trace to notify people.
+  CHECK(method->IsCopied());
+  FindVirtualMethodHolderVisitor visitor(method, image_pointer_size_);
+  VisitClasses(&visitor);
+  return visitor.holder_;
+}
+
 // Instantiate ResolveMethod.
 template ArtMethod* ClassLinker::ResolveMethod<ClassLinker::kForceICCECheck>(
     const DexFile& dex_file,
diff --git a/runtime/class_linker.h b/runtime/class_linker.h
index fcc6b23..c3ab8c5 100644
--- a/runtime/class_linker.h
+++ b/runtime/class_linker.h
@@ -648,6 +648,10 @@
       SHARED_REQUIRES(Locks::mutator_lock_)
       REQUIRES(!dex_lock_);
 
+  // Get the actual holding class for a copied method. Pretty slow, don't call often.
+  mirror::Class* GetHoldingClassOfCopiedMethod(ArtMethod* method)
+      SHARED_REQUIRES(Locks::mutator_lock_);
+
   struct DexCacheData {
     // Weak root to the DexCache. Note: Do not decode this unnecessarily or else class unloading may
     // not work properly.
@@ -676,7 +680,6 @@
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   static void DeleteClassLoader(Thread* self, const ClassLoaderData& data)
-      REQUIRES(Locks::classlinker_classes_lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   void VisitClassLoaders(ClassLoaderVisitor* visitor) const
@@ -1168,6 +1171,7 @@
   // Image pointer size.
   PointerSize image_pointer_size_;
 
+  class FindVirtualMethodHolderVisitor;
   friend class ImageDumper;  // for DexLock
   friend class ImageWriter;  // for GetClassRoots
   friend class JniCompilerTest;  // for GetRuntimeQuickGenericJniStub
diff --git a/runtime/gc_root.h b/runtime/gc_root.h
index 3734bcc..0304d0d 100644
--- a/runtime/gc_root.h
+++ b/runtime/gc_root.h
@@ -195,7 +195,8 @@
     return root_.IsNull();
   }
 
-  ALWAYS_INLINE GcRoot(MirrorType* ref = nullptr) SHARED_REQUIRES(Locks::mutator_lock_);
+  ALWAYS_INLINE GcRoot() {}
+  explicit ALWAYS_INLINE GcRoot(MirrorType* ref) SHARED_REQUIRES(Locks::mutator_lock_);
 
  private:
   // Root visitors take pointers to root_ and place them in CompressedReference** arrays. We use a
diff --git a/runtime/jit/jit.cc b/runtime/jit/jit.cc
index d52030f..cff2354 100644
--- a/runtime/jit/jit.cc
+++ b/runtime/jit/jit.cc
@@ -692,9 +692,6 @@
   DCHECK(this_object != nullptr);
   ProfilingInfo* info = caller->GetProfilingInfo(kRuntimePointerSize);
   if (info != nullptr) {
-    // Since the instrumentation is marked from the declaring class we need to mark the card so
-    // that mod-union tables and card rescanning know about the update.
-    Runtime::Current()->GetHeap()->WriteBarrierEveryFieldOf(caller->GetDeclaringClass());
     info->AddInvokeInfo(dex_pc, this_object->GetClass());
   }
 }
diff --git a/runtime/jit/jit_code_cache.h b/runtime/jit/jit_code_cache.h
index 6dc1578..1938221 100644
--- a/runtime/jit/jit_code_cache.h
+++ b/runtime/jit/jit_code_cache.h
@@ -146,7 +146,6 @@
   // Remove all methods in our cache that were allocated by 'alloc'.
   void RemoveMethodsIn(Thread* self, const LinearAlloc& alloc)
       REQUIRES(!lock_)
-      REQUIRES(Locks::classlinker_classes_lock_)
       SHARED_REQUIRES(Locks::mutator_lock_);
 
   void ClearGcRootsInInlineCaches(Thread* self) REQUIRES(!lock_);
diff --git a/runtime/jit/profiling_info.cc b/runtime/jit/profiling_info.cc
index 07c8051..216df2f 100644
--- a/runtime/jit/profiling_info.cc
+++ b/runtime/jit/profiling_info.cc
@@ -25,10 +25,33 @@
 
 namespace art {
 
+ProfilingInfo::ProfilingInfo(ArtMethod* method, const std::vector<uint32_t>& entries)
+      : number_of_inline_caches_(entries.size()),
+        method_(method),
+        is_method_being_compiled_(false),
+        is_osr_method_being_compiled_(false),
+        current_inline_uses_(0),
+        saved_entry_point_(nullptr) {
+  memset(&cache_, 0, number_of_inline_caches_ * sizeof(InlineCache));
+  for (size_t i = 0; i < number_of_inline_caches_; ++i) {
+    cache_[i].dex_pc_ = entries[i];
+  }
+  if (method->IsCopied()) {
+    // GetHoldingClassOfCopiedMethod is expensive, but creating a profiling info for a copied method
+    // appears to happen very rarely in practice.
+    holding_class_ = GcRoot<mirror::Class>(
+        Runtime::Current()->GetClassLinker()->GetHoldingClassOfCopiedMethod(method));
+  } else {
+    holding_class_ = GcRoot<mirror::Class>(method->GetDeclaringClass());
+  }
+  DCHECK(!holding_class_.IsNull());
+}
+
 bool ProfilingInfo::Create(Thread* self, ArtMethod* method, bool retry_allocation) {
   // Walk over the dex instructions of the method and keep track of
   // instructions we are interested in profiling.
   DCHECK(!method->IsNative());
+
   const DexFile::CodeItem& code_item = *method->GetCodeItem();
   const uint16_t* code_ptr = code_item.insns_;
   const uint16_t* code_end = code_item.insns_ + code_item.insns_size_in_code_units_;
@@ -93,6 +116,14 @@
         --i;
       } else {
         // We successfully set `cls`, just return.
+        // Since the instrumentation is marked from the declaring class we need to mark the card so
+        // that mod-union tables and card rescanning know about the update.
+        // Note that the declaring class is not necessarily the holding class if the method is
+        // copied. We need the card mark to be in the holding class since that is from where we
+        // will visit the profiling info.
+        if (!holding_class_.IsNull()) {
+          Runtime::Current()->GetHeap()->WriteBarrierEveryFieldOf(holding_class_.Read());
+        }
         return;
       }
     }
diff --git a/runtime/jit/profiling_info.h b/runtime/jit/profiling_info.h
index d04d2de..a890fbb 100644
--- a/runtime/jit/profiling_info.h
+++ b/runtime/jit/profiling_info.h
@@ -105,6 +105,7 @@
   // NO_THREAD_SAFETY_ANALYSIS since we don't know what the callback requires.
   template<typename RootVisitorType>
   void VisitRoots(RootVisitorType& visitor) NO_THREAD_SAFETY_ANALYSIS {
+    visitor.VisitRootIfNonNull(holding_class_.AddressWithoutBarrier());
     for (size_t i = 0; i < number_of_inline_caches_; ++i) {
       InlineCache* cache = &cache_[i];
       for (size_t j = 0; j < InlineCache::kIndividualCacheSize; ++j) {
@@ -166,18 +167,7 @@
   }
 
  private:
-  ProfilingInfo(ArtMethod* method, const std::vector<uint32_t>& entries)
-      : number_of_inline_caches_(entries.size()),
-        method_(method),
-        is_method_being_compiled_(false),
-        is_osr_method_being_compiled_(false),
-        current_inline_uses_(0),
-        saved_entry_point_(nullptr) {
-    memset(&cache_, 0, number_of_inline_caches_ * sizeof(InlineCache));
-    for (size_t i = 0; i < number_of_inline_caches_; ++i) {
-      cache_[i].dex_pc_ = entries[i];
-    }
-  }
+  ProfilingInfo(ArtMethod* method, const std::vector<uint32_t>& entries);
 
   // Number of instructions we are profiling in the ArtMethod.
   const uint32_t number_of_inline_caches_;
@@ -185,6 +175,9 @@
   // Method this profiling info is for.
   ArtMethod* const method_;
 
+  // Holding class for the method in case method is a copied method.
+  GcRoot<mirror::Class> holding_class_;
+
   // Whether the ArtMethod is currently being compiled. This flag
   // is implicitly guarded by the JIT code cache lock.
   // TODO: Make the JIT code cache lock global.