Do not use atomic increment in allocation as fence.
A sequentially consistent fetch_and_add implemented with ARM v8 acquire
release operations is not a fence. Don't use it as one.
The result may also be somewhat faster, since a sequentially consistent
increment requires more fencing than needed for the increment.
Bug: 16377103
Change-Id: I5b1add098d3488aa755f140612e54521b80aa749
diff --git a/runtime/atomic.h b/runtime/atomic.h
index 0faa3c6..d4a7f37 100644
--- a/runtime/atomic.h
+++ b/runtime/atomic.h
@@ -278,6 +278,10 @@
return this->fetch_add(value, std::memory_order_seq_cst); // Return old_value.
}
+ T FetchAndAddRelaxed(const T value) {
+ return this->fetch_add(value, std::memory_order_relaxed); // Return old_value.
+ }
+
T FetchAndSubSequentiallyConsistent(const T value) {
return this->fetch_sub(value, std::memory_order_seq_cst); // Return old value.
}
diff --git a/runtime/gc/heap-inl.h b/runtime/gc/heap-inl.h
index d1ab587..f437830 100644
--- a/runtime/gc/heap-inl.h
+++ b/runtime/gc/heap-inl.h
@@ -145,9 +145,9 @@
WriteBarrierField(obj, mirror::Object::ClassOffset(), klass);
}
pre_fence_visitor(obj, usable_size);
+ QuasiAtomic::ThreadFenceForConstructor();
new_num_bytes_allocated = static_cast<size_t>(
- num_bytes_allocated_.FetchAndAddSequentiallyConsistent(bytes_tl_bulk_allocated))
- + bytes_tl_bulk_allocated;
+ num_bytes_allocated_.FetchAndAddRelaxed(bytes_tl_bulk_allocated)) + bytes_tl_bulk_allocated;
}
if (kIsDebugBuild && Runtime::Current()->IsStarted()) {
CHECK_LE(obj->SizeOf(), usable_size);