Make atomics use for allocator counters consistent.

Use memory_order_relaxed atomics everywhere. Document what that means.

We were previously using seq_cst updates in some places. The only
benefit of that might have been for the expected invariants between
the counters, e.g. bytes_used < total_bytes_used, to actually hold.
But they didn't anyway because no care was taken to update them
in the correct order. And we were using relaxed (and even volatile)
accesses in other places.

Update max_bytes_used atomically, so that it can't decrease.

Bug: 31023171
Test: Build and boot AOSP
Change-Id: Icfca919d48c67899acb1798f5357f17e956099a6
diff --git a/libartbase/base/allocator.cc b/libartbase/base/allocator.cc
index 17da789..c7be4e0 100644
--- a/libartbase/base/allocator.cc
+++ b/libartbase/base/allocator.cc
@@ -76,7 +76,7 @@
 
 // These globals are safe since they don't have any non-trivial destructors.
 Atomic<size_t> g_bytes_used[kAllocatorTagCount];
-volatile size_t g_max_bytes_used[kAllocatorTagCount];
+Atomic<size_t> g_max_bytes_used[kAllocatorTagCount];
 Atomic<uint64_t> g_total_bytes_used[kAllocatorTagCount];
 
 void Dump(std::ostream& os) {
@@ -84,7 +84,7 @@
     os << "Dumping native memory usage\n";
     for (size_t i = 0; i < kAllocatorTagCount; ++i) {
       uint64_t bytes_used = g_bytes_used[i].load(std::memory_order_relaxed);
-      uint64_t max_bytes_used = g_max_bytes_used[i];
+      uint64_t max_bytes_used = g_max_bytes_used[i].load(std::memory_order_relaxed);
       uint64_t total_bytes_used = g_total_bytes_used[i].load(std::memory_order_relaxed);
       if (total_bytes_used != 0) {
         os << static_cast<AllocatorTag>(i) << " active=" << bytes_used << " max="
diff --git a/libartbase/base/allocator.h b/libartbase/base/allocator.h
index 7ddbacf..662f78e 100644
--- a/libartbase/base/allocator.h
+++ b/libartbase/base/allocator.h
@@ -71,12 +71,14 @@
 
 namespace TrackedAllocators {
 
+// We use memory_order_relaxed updates of the following counters. Values are treated as approximate
+// wherever concurrent updates are possible.
 // Running count of number of bytes used for this kind of allocation. Increased by allocations,
 // decreased by deallocations.
 extern Atomic<size_t> g_bytes_used[kAllocatorTagCount];
 
 // Largest value of bytes used seen.
-extern volatile size_t g_max_bytes_used[kAllocatorTagCount];
+extern Atomic<size_t> g_max_bytes_used[kAllocatorTagCount];
 
 // Total number of bytes allocated of this kind.
 extern Atomic<uint64_t> g_total_bytes_used[kAllocatorTagCount];
@@ -84,15 +86,17 @@
 void Dump(std::ostream& os);
 
 inline void RegisterAllocation(AllocatorTag tag, size_t bytes) {
-  g_total_bytes_used[tag].fetch_add(bytes, std::memory_order_seq_cst);
-  size_t new_bytes = g_bytes_used[tag].fetch_add(bytes, std::memory_order_seq_cst) + bytes;
-  if (g_max_bytes_used[tag] < new_bytes) {
-    g_max_bytes_used[tag] = new_bytes;
+  g_total_bytes_used[tag].fetch_add(bytes, std::memory_order_relaxed);
+  size_t new_bytes = g_bytes_used[tag].fetch_add(bytes, std::memory_order_relaxed) + bytes;
+  size_t max_bytes = g_max_bytes_used[tag].load(std::memory_order_relaxed);
+  while (max_bytes < new_bytes
+    && !g_max_bytes_used[tag].compare_exchange_weak(max_bytes /* updated */, new_bytes,
+                                                    std::memory_order_relaxed)) {
   }
 }
 
 inline void RegisterFree(AllocatorTag tag, size_t bytes) {
-  g_bytes_used[tag].fetch_sub(bytes, std::memory_order_seq_cst);
+  g_bytes_used[tag].fetch_sub(bytes, std::memory_order_relaxed);
 }
 
 }  // namespace TrackedAllocators