Fix Object::Clone()'s pre-fence barrier.

Pass in a pre-fence barrier object that sets in the array length
instead of setting it after returning from AllocObject().

Fix another potential bug due to the wrong default pre-fence barrier
parameter value. Since this appears error-prone, removed the default
parameter value and make it an explicit parameter.

Fix another potential moving GC bug due to a lack of a SirtRef.

Bug: 13097759
Change-Id: I466aa0e50f9e1a5dbf20be5a195edee619c7514e
diff --git a/runtime/gc/allocator/rosalloc.cc b/runtime/gc/allocator/rosalloc.cc
index f5f6f16..920741f 100644
--- a/runtime/gc/allocator/rosalloc.cc
+++ b/runtime/gc/allocator/rosalloc.cc
@@ -1997,6 +1997,8 @@
         CHECK_LE(obj_size, kLargeSizeThreshold)
             << "A run slot contains a large object " << Dump();
         CHECK_EQ(SizeToIndex(obj_size), idx)
+            << PrettyTypeOf(obj) << " "
+            << "obj_size=" << obj_size << ", idx=" << idx << " "
             << "A run slot contains an object with wrong size " << Dump();
       }
     }
diff --git a/runtime/gc/heap-inl.h b/runtime/gc/heap-inl.h
index 25f20d6..a06f272 100644
--- a/runtime/gc/heap-inl.h
+++ b/runtime/gc/heap-inl.h
@@ -65,7 +65,7 @@
       bool after_is_current_allocator = allocator == GetCurrentAllocator();
       if (is_current_allocator && !after_is_current_allocator) {
         // If the allocator changed, we need to restart the allocation.
-        return AllocObject<kInstrumented>(self, klass, byte_count);
+        return AllocObject<kInstrumented>(self, klass, byte_count, pre_fence_visitor);
       }
       return nullptr;
     }
@@ -111,7 +111,7 @@
     DCHECK(!Runtime::Current()->HasStatsEnabled());
   }
   if (AllocatorHasAllocationStack(allocator)) {
-    PushOnAllocationStack(self, obj);
+    PushOnAllocationStack(self, &obj);
   }
   if (kInstrumented) {
     if (Dbg::IsAllocTrackingEnabled()) {
@@ -135,28 +135,34 @@
 // The size of a thread-local allocation stack in the number of references.
 static constexpr size_t kThreadLocalAllocationStackSize = 128;
 
-inline void Heap::PushOnAllocationStack(Thread* self, mirror::Object* obj) {
+inline void Heap::PushOnAllocationStack(Thread* self, mirror::Object** obj) {
   if (kUseThreadLocalAllocationStack) {
-    bool success = self->PushOnThreadLocalAllocationStack(obj);
+    bool success = self->PushOnThreadLocalAllocationStack(*obj);
     if (UNLIKELY(!success)) {
       // Slow path. Allocate a new thread-local allocation stack.
       mirror::Object** start_address;
       mirror::Object** end_address;
       while (!allocation_stack_->AtomicBumpBack(kThreadLocalAllocationStackSize,
                                                 &start_address, &end_address)) {
+        // Disable verify object in SirtRef as obj isn't on the alloc stack yet.
+        SirtRefNoVerify<mirror::Object> ref(self, *obj);
         CollectGarbageInternal(collector::kGcTypeSticky, kGcCauseForAlloc, false);
+        *obj = ref.get();
       }
       self->SetThreadLocalAllocationStack(start_address, end_address);
       // Retry on the new thread-local allocation stack.
-      success = self->PushOnThreadLocalAllocationStack(obj);
+      success = self->PushOnThreadLocalAllocationStack(*obj);
       // Must succeed.
       CHECK(success);
     }
   } else {
     // This is safe to do since the GC will never free objects which are neither in the allocation
     // stack or the live bitmap.
-    while (!allocation_stack_->AtomicPushBack(obj)) {
+    while (!allocation_stack_->AtomicPushBack(*obj)) {
+      // Disable verify object in SirtRef as obj isn't on the alloc stack yet.
+      SirtRefNoVerify<mirror::Object> ref(self, *obj);
       CollectGarbageInternal(collector::kGcTypeSticky, kGcCauseForAlloc, false);
+      *obj = ref.get();
     }
   }
 }
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index 5879757..ffb4e59 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -158,28 +158,28 @@
   ~Heap();
 
   // Allocates and initializes storage for an object instance.
-  template <bool kInstrumented, typename PreFenceVisitor = VoidFunctor>
+  template <bool kInstrumented, typename PreFenceVisitor>
   mirror::Object* AllocObject(Thread* self, mirror::Class* klass, size_t num_bytes,
-                              const PreFenceVisitor& pre_fence_visitor = VoidFunctor())
+                              const PreFenceVisitor& pre_fence_visitor)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return AllocObjectWithAllocator<kInstrumented, true>(self, klass, num_bytes,
                                                          GetCurrentAllocator(),
                                                          pre_fence_visitor);
   }
 
-  template <bool kInstrumented, typename PreFenceVisitor = VoidFunctor>
+  template <bool kInstrumented, typename PreFenceVisitor>
   mirror::Object* AllocNonMovableObject(Thread* self, mirror::Class* klass, size_t num_bytes,
-                                        const PreFenceVisitor& pre_fence_visitor = VoidFunctor())
+                                        const PreFenceVisitor& pre_fence_visitor)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return AllocObjectWithAllocator<kInstrumented, true>(self, klass, num_bytes,
                                                          GetCurrentNonMovingAllocator(),
                                                          pre_fence_visitor);
   }
 
-  template <bool kInstrumented, bool kCheckLargeObject, typename PreFenceVisitor = VoidFunctor>
+  template <bool kInstrumented, bool kCheckLargeObject, typename PreFenceVisitor>
   ALWAYS_INLINE mirror::Object* AllocObjectWithAllocator(
       Thread* self, mirror::Class* klass, size_t byte_count, AllocatorType allocator,
-      const PreFenceVisitor& pre_fence_visitor = VoidFunctor())
+      const PreFenceVisitor& pre_fence_visitor)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   AllocatorType GetCurrentAllocator() const {
@@ -691,7 +691,8 @@
   void SignalHeapTrimDaemon(Thread* self);
 
   // Push an object onto the allocation stack.
-  void PushOnAllocationStack(Thread* self, mirror::Object* obj);
+  void PushOnAllocationStack(Thread* self, mirror::Object** obj)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // What kind of concurrency behavior is the runtime after? Currently true for concurrent mark
   // sweep GC, false for other GC types.