Drop object counting from the allocation fast path.

The use cases that want object counting should already set the
instrumented entrypoints, which perform the counting.

Test: test.py
Bug: 289500504
Change-Id: I7859a825d5e8e0a281d8ef0860ab381acecd0567
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index f060411..e2b5bca 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -1175,9 +1175,6 @@
     //
     // (Note: The actual check is done by checking that the object's class pointer is non-null.
     // Also, unlike rosalloc, the object can never be observed as null).
-    ldr    r1, [rSELF, #THREAD_LOCAL_OBJECTS_OFFSET]          // Increment thread_local_objects.
-    add    r1, r1, #1
-    str    r1, [rSELF, #THREAD_LOCAL_OBJECTS_OFFSET]
     POISON_HEAP_REF r0
     str    r0, [r2, #MIRROR_OBJECT_CLASS_OFFSET]              // Store the class pointer.
     mov    r0, r2
@@ -1236,9 +1233,6 @@
     // "Point of no slow path". Won't go to the slow path from here on. OK to clobber r0 and r1.
     add    r2, r2, r3
     str    r2, [rSELF, #THREAD_LOCAL_POS_OFFSET]              // Store new thread_local_pos.
-    ldr    r2, [rSELF, #THREAD_LOCAL_OBJECTS_OFFSET]          // Increment thread_local_objects.
-    add    r2, r2, #1
-    str    r2, [rSELF, #THREAD_LOCAL_OBJECTS_OFFSET]
     POISON_HEAP_REF r0
     str    r0, [r3, #MIRROR_OBJECT_CLASS_OFFSET]              // Store the class pointer.
     str    r1, [r3, #MIRROR_ARRAY_LENGTH_OFFSET]              // Store the array length.
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index 8f613d9..694c76a 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -1460,9 +1460,6 @@
     // See Class::SetStatus() in class.cc for more details.
     bhi    \slowPathLabel
     str    x6, [xSELF, #THREAD_LOCAL_POS_OFFSET]              // Store new thread_local_pos.
-    ldr    x5, [xSELF, #THREAD_LOCAL_OBJECTS_OFFSET]          // Increment thread_local_objects.
-    add    x5, x5, #1
-    str    x5, [xSELF, #THREAD_LOCAL_OBJECTS_OFFSET]
     POISON_HEAP_REF w0
     str    w0, [x4, #MIRROR_OBJECT_CLASS_OFFSET]              // Store the class pointer.
     mov    x0, x4
@@ -1535,9 +1532,6 @@
     mov    x0, \xTemp0
     add    \xTemp0, \xTemp0, \xTemp1
     str    \xTemp0, [xSELF, #THREAD_LOCAL_POS_OFFSET]         // Store new thread_local_pos.
-    ldr    \xTemp0, [xSELF, #THREAD_LOCAL_OBJECTS_OFFSET]     // Increment thread_local_objects.
-    add    \xTemp0, \xTemp0, #1
-    str    \xTemp0, [xSELF, #THREAD_LOCAL_OBJECTS_OFFSET]
     POISON_HEAP_REF \wClass
     str    \wClass, [x0, #MIRROR_OBJECT_CLASS_OFFSET]         // Store the class pointer.
     str    \wCount, [x0, #MIRROR_ARRAY_LENGTH_OFFSET]         // Store the array length.
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index bff674f..8fd854f 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -972,7 +972,6 @@
                                                         // as allocated object.
     addl %edx, %ecx                                     // Add the object size.
     movl %ecx, THREAD_LOCAL_POS_OFFSET(%ebx)            // Update thread_local_pos.
-    incl THREAD_LOCAL_OBJECTS_OFFSET(%ebx)              // Increase thread_local_objects.
                                                         // Store the class pointer in the header.
                                                         // No fence needed for x86.
     POISON_HEAP_REF eax
@@ -1034,7 +1033,6 @@
     movl THREAD_LOCAL_POS_OFFSET(%ebx), %edi
     addl %edi, %edx                                            // Add the object size.
     movl %edx, THREAD_LOCAL_POS_OFFSET(%ebx)                   // Update thread_local_pos_
-    addl LITERAL(1), THREAD_LOCAL_OBJECTS_OFFSET(%ebx)         // Increase thread_local_objects.
                                                                // Store the class pointer in the
                                                                // header.
                                                                // No fence needed for x86.
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index ae8f4bd..01d9133 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -930,7 +930,6 @@
     cmpq THREAD_LOCAL_END_OFFSET(%r8), %rcx                    // Check if it fits.
     ja   RAW_VAR(slowPathLabel)
     movq %rcx, THREAD_LOCAL_POS_OFFSET(%r8)                    // Update thread_local_pos.
-    incq THREAD_LOCAL_OBJECTS_OFFSET(%r8)                      // Increase thread_local_objects.
                                                                // Store the class pointer in the
                                                                // header.
                                                                // No fence needed for x86.
@@ -952,7 +951,6 @@
     cmpq THREAD_LOCAL_END_OFFSET(%rcx), %r9                    // Check if it fits.
     ja   RAW_VAR(slowPathLabel)
     movq %r9, THREAD_LOCAL_POS_OFFSET(%rcx)                    // Update thread_local_pos.
-    addq LITERAL(1), THREAD_LOCAL_OBJECTS_OFFSET(%rcx)         // Increase thread_local_objects.
                                                                // Store the class pointer in the
                                                                // header.
                                                                // No fence needed for x86.
diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc
index e6c4d99..fb64ddd 100644
--- a/runtime/gc/collector/concurrent_copying.cc
+++ b/runtime/gc/collector/concurrent_copying.cc
@@ -95,7 +95,6 @@
       region_space_bitmap_(nullptr),
       heap_mark_bitmap_(nullptr),
       live_stack_freeze_size_(0),
-      from_space_num_objects_at_first_pause_(0),
       from_space_num_bytes_at_first_pause_(0),
       mark_stack_mode_(kMarkStackModeOff),
       weak_ref_access_enabled_(true),
@@ -103,7 +102,6 @@
       gc_count_(0),
       reclaimed_bytes_ratio_sum_(0.f),
       cumulative_bytes_moved_(0),
-      cumulative_objects_moved_(0),
       skipped_blocks_lock_("concurrent copying bytes blocks lock", kMarkSweepMarkStackLock),
       measure_read_barrier_slow_path_(measure_read_barrier_slow_path),
       mark_from_read_barrier_measurements_(false),
@@ -490,18 +488,7 @@
         << thread->GetState() << " thread " << thread << " self " << self;
     thread->SetIsGcMarkingAndUpdateEntrypoints(true);
     if (use_tlab_ && thread->HasTlab()) {
-      // We should not reuse the partially utilized TLABs revoked here as they
-      // are going to be part of from-space.
-      if (ConcurrentCopying::kEnableFromSpaceAccountingCheck) {
-        // This must come before the revoke.
-        size_t thread_local_objects = thread->GetThreadLocalObjectsAllocated();
-        concurrent_copying_->region_space_->RevokeThreadLocalBuffers(thread, /*reuse=*/ false);
-        reinterpret_cast<Atomic<size_t>*>(
-            &concurrent_copying_->from_space_num_objects_at_first_pause_)->
-                fetch_add(thread_local_objects, std::memory_order_relaxed);
-      } else {
-        concurrent_copying_->region_space_->RevokeThreadLocalBuffers(thread, /*reuse=*/ false);
-      }
+      concurrent_copying_->region_space_->RevokeThreadLocalBuffers(thread, /*reuse=*/ false);
     }
     if (kUseThreadLocalAllocationStack) {
       thread->RevokeThreadLocalAllocationStack();
@@ -588,7 +575,6 @@
     cc->SwapStacks();
     if (ConcurrentCopying::kEnableFromSpaceAccountingCheck) {
       cc->RecordLiveStackFreezeSize(self);
-      cc->from_space_num_objects_at_first_pause_ = cc->region_space_->GetObjectsAllocated();
       cc->from_space_num_bytes_at_first_pause_ = cc->region_space_->GetBytesAllocated();
     }
     cc->is_marking_ = true;
@@ -2786,18 +2772,13 @@
     TimingLogger::ScopedTiming split2("RecordFree", GetTimings());
     // Don't include thread-locals that are in the to-space.
     const uint64_t from_bytes = region_space_->GetBytesAllocatedInFromSpace();
-    const uint64_t from_objects = region_space_->GetObjectsAllocatedInFromSpace();
     const uint64_t unevac_from_bytes = region_space_->GetBytesAllocatedInUnevacFromSpace();
-    const uint64_t unevac_from_objects = region_space_->GetObjectsAllocatedInUnevacFromSpace();
     uint64_t to_bytes = bytes_moved_.load(std::memory_order_relaxed) + bytes_moved_gc_thread_;
     cumulative_bytes_moved_ += to_bytes;
     uint64_t to_objects = objects_moved_.load(std::memory_order_relaxed) + objects_moved_gc_thread_;
-    cumulative_objects_moved_ += to_objects;
     if (kEnableFromSpaceAccountingCheck) {
-      CHECK_EQ(from_space_num_objects_at_first_pause_, from_objects + unevac_from_objects);
       CHECK_EQ(from_space_num_bytes_at_first_pause_, from_bytes + unevac_from_bytes);
     }
-    CHECK_LE(to_objects, from_objects);
     // to_bytes <= from_bytes is only approximately true, because objects expand a little when
     // copying to non-moving space in near-OOM situations.
     if (from_bytes > 0) {
@@ -2815,10 +2796,9 @@
                                     &cleared_objects,
                                     /*clear_bitmap*/ !young_gen_,
                                     should_eagerly_release_memory);
-      // `cleared_bytes` and `cleared_objects` may be greater than the from space equivalents since
+      // `cleared_bytes` may be greater than the from space equivalents since
       // RegionSpace::ClearFromSpace may clear empty unevac regions.
       CHECK_GE(cleared_bytes, from_bytes);
-      CHECK_GE(cleared_objects, from_objects);
     }
 
     // If we need to release available memory to the OS, go over all free
@@ -2834,11 +2814,10 @@
     uint64_t freed_objects = cleared_objects - to_objects;
     if (kVerboseMode) {
       LOG(INFO) << "RecordFree:"
-                << " from_bytes=" << from_bytes << " from_objects=" << from_objects
+                << " from_bytes=" << from_bytes
                 << " unevac_from_bytes=" << unevac_from_bytes
-                << " unevac_from_objects=" << unevac_from_objects
-                << " to_bytes=" << to_bytes << " to_objects=" << to_objects
-                << " freed_bytes=" << freed_bytes << " freed_objects=" << freed_objects
+                << " to_bytes=" << to_bytes
+                << " freed_bytes=" << freed_bytes
                 << " from_space size=" << region_space_->FromSpaceSize()
                 << " unevac_from_space size=" << region_space_->UnevacFromSpaceSize()
                 << " to_space size=" << region_space_->ToSpaceSize();
@@ -3933,7 +3912,6 @@
      << " " << (young_gen_ ? "minor" : "major") << " GCs\n";
 
   os << "Cumulative bytes moved " << cumulative_bytes_moved_ << "\n";
-  os << "Cumulative objects moved " << cumulative_objects_moved_ << "\n";
 
   os << "Peak regions allocated "
      << region_space_->GetMaxPeakNumNonFreeRegions() << " ("
diff --git a/runtime/gc/collector/concurrent_copying.h b/runtime/gc/collector/concurrent_copying.h
index 888c38a..db09852 100644
--- a/runtime/gc/collector/concurrent_copying.h
+++ b/runtime/gc/collector/concurrent_copying.h
@@ -390,7 +390,6 @@
   // A cache of Heap::GetMarkBitmap().
   accounting::HeapBitmap* heap_mark_bitmap_;
   size_t live_stack_freeze_size_;
-  size_t from_space_num_objects_at_first_pause_;  // Computed if kEnableFromSpaceAccountingCheck
   size_t from_space_num_bytes_at_first_pause_;  // Computed if kEnableFromSpaceAccountingCheck
   Atomic<int> is_mark_stack_push_disallowed_;
   enum MarkStackMode {
@@ -439,7 +438,6 @@
   size_t objects_moved_gc_thread_;
   uint64_t bytes_scanned_;
   uint64_t cumulative_bytes_moved_;
-  uint64_t cumulative_objects_moved_;
 
   // The skipped blocks are memory blocks/chucks that were copies of
   // objects that were unused due to lost races (cas failures) at
diff --git a/runtime/gc/collector/garbage_collector.cc b/runtime/gc/collector/garbage_collector.cc
index 10eb4a0..f35be96 100644
--- a/runtime/gc/collector/garbage_collector.cc
+++ b/runtime/gc/collector/garbage_collector.cc
@@ -355,7 +355,6 @@
   const uint64_t total_ns = logger.GetTotalNs();
   const double seconds = NsToMs(total_ns) / 1000.0;
   const uint64_t freed_bytes = GetTotalFreedBytes();
-  const uint64_t freed_objects = GetTotalFreedObjects();
   const uint64_t scanned_bytes = GetTotalScannedBytes();
   {
     MutexLock mu(Thread::Current(), pause_histogram_lock_);
@@ -388,10 +387,8 @@
   const double cpu_seconds = NsToMs(GetTotalCpuTime()) / 1000.0;
   os << GetName() << " total time: " << PrettyDuration(total_ns)
      << " mean time: " << PrettyDuration(total_ns / iterations) << "\n"
-     << GetName() << " freed: " << freed_objects
-     << " objects with total size " << PrettySize(freed_bytes) << "\n"
-     << GetName() << " throughput: " << freed_objects / seconds << "/s / "
-     << PrettySize(freed_bytes / seconds) << "/s"
+     << GetName() << " freed: " << PrettySize(freed_bytes) << "\n"
+     << GetName() << " throughput: " << PrettySize(freed_bytes / seconds) << "/s"
      << "  per cpu-time: "
      << static_cast<uint64_t>(freed_bytes / cpu_seconds) << "/s / "
      << PrettySize(freed_bytes / cpu_seconds) << "/s\n"
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index 2539766..478c14c 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -1268,11 +1268,7 @@
        << PrettySize(GetBytesFreedEver() / total_seconds) << "/s"
        << " per cpu-time: "
        << PrettySize(GetBytesFreedEver() / total_cpu_seconds) << "/s\n";
-    os << "Mean GC object throughput: "
-       << (GetObjectsFreedEver() / total_seconds) << " objects/s\n";
   }
-  uint64_t total_objects_allocated = GetObjectsAllocatedEver();
-  os << "Total number of allocations " << total_objects_allocated << "\n";
   os << "Total bytes allocated " << PrettySize(GetBytesAllocatedEver()) << "\n";
   os << "Total bytes freed " << PrettySize(GetBytesFreedEver()) << "\n";
   os << "Free memory " << PrettySize(GetFreeMemory()) << "\n";
@@ -2078,15 +2074,6 @@
   return total;
 }
 
-uint64_t Heap::GetObjectsAllocatedEver() const {
-  uint64_t total = GetObjectsFreedEver();
-  // If we are detached, we can't use GetObjectsAllocated since we can't change thread states.
-  if (Thread::Current() != nullptr) {
-    total += GetObjectsAllocated();
-  }
-  return total;
-}
-
 uint64_t Heap::GetBytesAllocatedEver() const {
   // Force the returned value to be monotonically increasing, in the sense that if this is called
   // at A and B, such that A happens-before B, then the call at B returns a value no smaller than
@@ -2876,8 +2863,8 @@
     }
     LOG(INFO) << gc_cause << " " << collector->GetName()
               << (is_sampled ? " (sampled)" : "")
-              << " GC freed "  << current_gc_iteration_.GetFreedObjects() << "("
-              << PrettySize(current_gc_iteration_.GetFreedBytes()) << ") AllocSpace objects, "
+              << " GC freed "
+              << PrettySize(current_gc_iteration_.GetFreedBytes()) << " AllocSpace bytes, "
               << current_gc_iteration_.GetFreedLargeObjects() << "("
               << PrettySize(current_gc_iteration_.GetFreedLargeObjectBytes()) << ") LOS objects, "
               << percent_free << "% free, " << PrettySize(current_heap_size) << "/"
@@ -3628,7 +3615,7 @@
 
 void Heap::DumpForSigQuit(std::ostream& os) {
   os << "Heap: " << GetPercentFree() << "% free, " << PrettySize(GetBytesAllocated()) << "/"
-     << PrettySize(GetTotalMemory()) << "; " << GetObjectsAllocated() << " objects\n";
+     << PrettySize(GetTotalMemory());
   {
     os << "Image spaces:\n";
     ScopedObjectAccess soa(Thread::Current());
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index 3e7aaf3..a66b3ba 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -558,18 +558,9 @@
   size_t GetObjectsAllocated() const
       REQUIRES(!Locks::heap_bitmap_lock_);
 
-  // Returns the total number of objects allocated since the heap was created.
-  uint64_t GetObjectsAllocatedEver() const;
-
   // Returns the total number of bytes allocated since the heap was created.
   uint64_t GetBytesAllocatedEver() const;
 
-  // Returns the total number of objects freed since the heap was created.
-  // With default memory order, this should be viewed only as a hint.
-  uint64_t GetObjectsFreedEver(std::memory_order mo = std::memory_order_relaxed) const {
-    return total_objects_freed_ever_.load(mo);
-  }
-
   // Returns the total number of bytes freed since the heap was created.
   // With default memory order, this should be viewed only as a hint.
   uint64_t GetBytesFreedEver(std::memory_order mo = std::memory_order_relaxed) const {
diff --git a/tools/cpp-define-generator/thread.def b/tools/cpp-define-generator/thread.def
index 97033fc..5cc5f71 100644
--- a/tools/cpp-define-generator/thread.def
+++ b/tools/cpp-define-generator/thread.def
@@ -45,8 +45,6 @@
            art::Thread::ThreadLocalAllocStackTopOffset<art::kRuntimePointerSize>().Int32Value())
 ASM_DEFINE(THREAD_LOCAL_END_OFFSET,
            art::Thread::ThreadLocalEndOffset<art::kRuntimePointerSize>().Int32Value())
-ASM_DEFINE(THREAD_LOCAL_OBJECTS_OFFSET,
-           art::Thread::ThreadLocalObjectsOffset<art::kRuntimePointerSize>().Int32Value())
 ASM_DEFINE(THREAD_LOCAL_POS_OFFSET,
            art::Thread::ThreadLocalPosOffset<art::kRuntimePointerSize>().Int32Value())
 ASM_DEFINE(THREAD_ROSALLOC_RUNS_OFFSET,