Improve the allocation speed.

- Improves the Ritz MemAllocTest benchmark result by ~500 ms (or ~5%) on Nexus 4.
- Move the memset() call that zeroes the allocated memory out of the lock region.
- De-virtualize/Inline the allocation call chains into Heap::AllocObject().
- Turn Heap::measure_allocation_time_ into a static const variable.
- Surround the VerifyObject() call with kIsDebugBuild.

Bug: 9986565
Change-Id: Ib70b6d051a80ec329788b30256565561f031da2a
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index d27290b..9a9e00c 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -21,6 +21,7 @@
 
 #include <limits>
 #include <vector>
+#include <valgrind.h>
 
 #include "base/stl_util.h"
 #include "common_throws.h"
@@ -34,6 +35,7 @@
 #include "gc/collector/mark_sweep-inl.h"
 #include "gc/collector/partial_mark_sweep.h"
 #include "gc/collector/sticky_mark_sweep.h"
+#include "gc/space/dlmalloc_space-inl.h"
 #include "gc/space/image_space.h"
 #include "gc/space/large_object_space.h"
 #include "gc/space/space-inl.h"
@@ -66,6 +68,8 @@
 // Minimum amount of remaining bytes before a concurrent GC is triggered.
 static const size_t kMinConcurrentRemainingBytes = 128 * KB;
 const double Heap::kDefaultTargetUtilization = 0.5;
+// If true, measure the total allocation time.
+static const bool kMeasureAllocationTime = false;
 
 Heap::Heap(size_t initial_size, size_t growth_limit, size_t min_free, size_t max_free,
            double target_utilization, size_t capacity,
@@ -118,9 +122,9 @@
       max_free_(max_free),
       target_utilization_(target_utilization),
       total_wait_time_(0),
-      measure_allocation_time_(false),
       total_allocation_time_(0),
-      verify_object_mode_(kHeapVerificationNotPermitted) {
+      verify_object_mode_(kHeapVerificationNotPermitted),
+      running_on_valgrind_(RUNNING_ON_VALGRIND) {
   if (VLOG_IS_ON(heap) || VLOG_IS_ON(startup)) {
     LOG(INFO) << "Heap() entering";
   }
@@ -347,7 +351,7 @@
   }
   os << "Total number of allocations: " << total_objects_allocated << "\n";
   os << "Total bytes allocated " << PrettySize(total_bytes_allocated) << "\n";
-  if (measure_allocation_time_) {
+  if (kMeasureAllocationTime) {
     os << "Total time spent allocating: " << PrettyDuration(allocation_time) << "\n";
     os << "Mean allocation time: " << PrettyDuration(allocation_time / total_objects_allocated)
        << "\n";
@@ -445,8 +449,9 @@
 
   mirror::Object* obj = NULL;
   size_t size = 0;
+  size_t bytes_allocated;
   uint64_t allocation_start = 0;
-  if (UNLIKELY(measure_allocation_time_)) {
+  if (UNLIKELY(kMeasureAllocationTime)) {
     allocation_start = NanoTime() / kTimeAdjust;
   }
 
@@ -458,18 +463,20 @@
       byte_count >= large_object_threshold_ && have_zygote_space_ && c->IsPrimitiveArray();
   if (UNLIKELY(large_object_allocation)) {
     size = RoundUp(byte_count, kPageSize);
-    obj = Allocate(self, large_object_space_, size);
+    obj = Allocate(self, large_object_space_, size, &bytes_allocated);
+    DCHECK(obj == NULL || size == bytes_allocated);
     // Make sure that our large object didn't get placed anywhere within the space interval or else
     // it breaks the immune range.
     DCHECK(obj == NULL ||
            reinterpret_cast<byte*>(obj) < continuous_spaces_.front()->Begin() ||
            reinterpret_cast<byte*>(obj) >= continuous_spaces_.back()->End());
   } else {
-    obj = Allocate(self, alloc_space_, byte_count);
+    obj = Allocate(self, alloc_space_, byte_count, &bytes_allocated);
+    DCHECK(obj == NULL || size <= bytes_allocated);
+    size = bytes_allocated;
 
     // Ensure that we did not allocate into a zygote space.
     DCHECK(obj == NULL || !have_zygote_space_ || !FindSpaceFromObject(obj, false)->IsZygoteSpace());
-    size = alloc_space_->AllocationSize(obj);
   }
 
   if (LIKELY(obj != NULL)) {
@@ -487,9 +494,11 @@
       SirtRef<mirror::Object> ref(self, obj);
       RequestConcurrentGC(self);
     }
-    VerifyObject(obj);
+    if (kDesiredHeapVerification > kNoHeapVerification) {
+      VerifyObject(obj);
+    }
 
-    if (UNLIKELY(measure_allocation_time_)) {
+    if (UNLIKELY(kMeasureAllocationTime)) {
       total_allocation_time_.fetch_add(NanoTime() / kTimeAdjust - allocation_start);
     }
 
@@ -645,7 +654,7 @@
   GetLiveBitmap()->Walk(Heap::VerificationCallback, this);
 }
 
-void Heap::RecordAllocation(size_t size, mirror::Object* obj) {
+inline void Heap::RecordAllocation(size_t size, mirror::Object* obj) {
   DCHECK(obj != NULL);
   DCHECK_GT(size, 0u);
   num_bytes_allocated_.fetch_add(size);
@@ -684,37 +693,55 @@
   }
 }
 
-mirror::Object* Heap::TryToAllocate(Thread* self, space::AllocSpace* space, size_t alloc_size,
-                                    bool grow) {
-  // Should we try to use a CAS here and fix up num_bytes_allocated_ later with AllocationSize?
-  if (num_bytes_allocated_ + alloc_size > max_allowed_footprint_) {
-    // max_allowed_footprint_ <= growth_limit_ so it is safe to check in here.
-    if (num_bytes_allocated_ + alloc_size > growth_limit_) {
-      // Completely out of memory.
-      return NULL;
-    }
-  }
-
-  return space->Alloc(self, alloc_size);
+inline bool Heap::IsOutOfMemoryOnAllocation(size_t alloc_size) {
+  return num_bytes_allocated_ + alloc_size > growth_limit_;
 }
 
-mirror::Object* Heap::Allocate(Thread* self, space::AllocSpace* space, size_t alloc_size) {
+inline mirror::Object* Heap::TryToAllocate(Thread* self, space::AllocSpace* space, size_t alloc_size,
+                                           bool grow, size_t* bytes_allocated) {
+  if (IsOutOfMemoryOnAllocation(alloc_size)) {
+    return NULL;
+  }
+  return space->Alloc(self, alloc_size, bytes_allocated);
+}
+
+// DlMallocSpace-specific version.
+inline mirror::Object* Heap::TryToAllocate(Thread* self, space::DlMallocSpace* space, size_t alloc_size,
+                                           bool grow, size_t* bytes_allocated) {
+  if (IsOutOfMemoryOnAllocation(alloc_size)) {
+    return NULL;
+  }
+  if (!running_on_valgrind_) {
+    return space->AllocNonvirtual(self, alloc_size, bytes_allocated);
+  } else {
+    return space->Alloc(self, alloc_size, bytes_allocated);
+  }
+}
+
+template <class T>
+inline mirror::Object* Heap::Allocate(Thread* self, T* space, size_t alloc_size, size_t* bytes_allocated) {
   // Since allocation can cause a GC which will need to SuspendAll, make sure all allocations are
   // done in the runnable state where suspension is expected.
   DCHECK_EQ(self->GetState(), kRunnable);
   self->AssertThreadSuspensionIsAllowable();
 
-  mirror::Object* ptr = TryToAllocate(self, space, alloc_size, false);
+  mirror::Object* ptr = TryToAllocate(self, space, alloc_size, false, bytes_allocated);
   if (ptr != NULL) {
     return ptr;
   }
+  return AllocateInternalWithGc(self, space, alloc_size, bytes_allocated);
+}
+
+mirror::Object* Heap::AllocateInternalWithGc(Thread* self, space::AllocSpace* space, size_t alloc_size,
+                                             size_t* bytes_allocated) {
+  mirror::Object* ptr;
 
   // The allocation failed. If the GC is running, block until it completes, and then retry the
   // allocation.
   collector::GcType last_gc = WaitForConcurrentGcToComplete(self);
   if (last_gc != collector::kGcTypeNone) {
     // A GC was in progress and we blocked, retry allocation now that memory has been freed.
-    ptr = TryToAllocate(self, space, alloc_size, false);
+    ptr = TryToAllocate(self, space, alloc_size, false, bytes_allocated);
     if (ptr != NULL) {
       return ptr;
     }
@@ -749,7 +776,7 @@
       i = static_cast<size_t>(gc_type_ran);
 
       // Did we free sufficient memory for the allocation to succeed?
-      ptr = TryToAllocate(self, space, alloc_size, false);
+      ptr = TryToAllocate(self, space, alloc_size, false, bytes_allocated);
       if (ptr != NULL) {
         return ptr;
       }
@@ -758,7 +785,7 @@
 
   // Allocations have failed after GCs;  this is an exceptional state.
   // Try harder, growing the heap if necessary.
-  ptr = TryToAllocate(self, space, alloc_size, true);
+  ptr = TryToAllocate(self, space, alloc_size, true, bytes_allocated);
   if (ptr != NULL) {
     return ptr;
   }
@@ -773,7 +800,7 @@
 
   // We don't need a WaitForConcurrentGcToComplete here either.
   CollectGarbageInternal(collector::kGcTypeFull, kGcCauseForAlloc, true);
-  return TryToAllocate(self, space, alloc_size, true);
+  return TryToAllocate(self, space, alloc_size, true, bytes_allocated);
 }
 
 void Heap::SetTargetHeapUtilization(float target) {
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index 7615f98..54cf287 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -412,15 +412,31 @@
  private:
   // Allocates uninitialized storage. Passing in a null space tries to place the object in the
   // large object space.
-  mirror::Object* Allocate(Thread* self, space::AllocSpace* space, size_t num_bytes)
+  template <class T> mirror::Object* Allocate(Thread* self, T* space, size_t num_bytes, size_t* bytes_allocated)
+      LOCKS_EXCLUDED(Locks::thread_suspend_count_lock_)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  // Handles Allocate()'s slow allocation path with GC involved after
+  // an initial allocation attempt failed.
+  mirror::Object* AllocateInternalWithGc(Thread* self, space::AllocSpace* space, size_t num_bytes,
+                                         size_t* bytes_allocated)
       LOCKS_EXCLUDED(Locks::thread_suspend_count_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Try to allocate a number of bytes, this function never does any GCs.
-  mirror::Object* TryToAllocate(Thread* self, space::AllocSpace* space, size_t alloc_size, bool grow)
+  mirror::Object* TryToAllocate(Thread* self, space::AllocSpace* space, size_t alloc_size, bool grow,
+                                size_t* bytes_allocated)
       LOCKS_EXCLUDED(Locks::thread_suspend_count_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  // Try to allocate a number of bytes, this function never does any GCs. DlMallocSpace-specialized version.
+  mirror::Object* TryToAllocate(Thread* self, space::DlMallocSpace* space, size_t alloc_size, bool grow,
+                                size_t* bytes_allocated)
+      LOCKS_EXCLUDED(Locks::thread_suspend_count_lock_)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  bool IsOutOfMemoryOnAllocation(size_t alloc_size);
+
   // Pushes a list of cleared references out to the managed heap.
   void EnqueueClearedReferences(mirror::Object** cleared_references);
 
@@ -635,7 +651,6 @@
   uint64_t total_wait_time_;
 
   // Total number of objects allocated in microseconds.
-  const bool measure_allocation_time_;
   AtomicInteger total_allocation_time_;
 
   // The current state of heap verification, may be enabled or disabled.
@@ -643,6 +658,8 @@
 
   std::vector<collector::MarkSweep*> mark_sweep_collectors_;
 
+  const bool running_on_valgrind_;
+
   friend class collector::MarkSweep;
   friend class VerifyReferenceCardVisitor;
   friend class VerifyReferenceVisitor;
diff --git a/runtime/gc/space/dlmalloc_space-inl.h b/runtime/gc/space/dlmalloc_space-inl.h
new file mode 100644
index 0000000..849157f
--- /dev/null
+++ b/runtime/gc/space/dlmalloc_space-inl.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_GC_SPACE_DLMALLOC_SPACE_INL_H_
+#define ART_RUNTIME_GC_SPACE_DLMALLOC_SPACE_INL_H_
+
+#include "dlmalloc_space.h"
+
+namespace art {
+namespace gc {
+namespace space {
+
+inline mirror::Object* DlMallocSpace::AllocNonvirtual(Thread* self, size_t num_bytes,
+                                                      size_t* bytes_allocated) {
+  mirror::Object* obj;
+  {
+    MutexLock mu(self, lock_);
+    obj = AllocWithoutGrowthLocked(num_bytes, bytes_allocated);
+  }
+  if (obj != NULL) {
+    // Zero freshly allocated memory, done while not holding the space's lock.
+    memset(obj, 0, num_bytes);
+  }
+  return obj;
+}
+
+inline mirror::Object* DlMallocSpace::AllocWithoutGrowthLocked(size_t num_bytes, size_t* bytes_allocated) {
+  mirror::Object* result = reinterpret_cast<mirror::Object*>(mspace_malloc(mspace_, num_bytes));
+  if (result != NULL) {
+    if (kDebugSpaces) {
+      CHECK(Contains(result)) << "Allocation (" << reinterpret_cast<void*>(result)
+            << ") not in bounds of allocation space " << *this;
+    }
+    size_t allocation_size = AllocationSizeNonvirtual(result);
+    if (bytes_allocated != NULL) {
+      *bytes_allocated = allocation_size;
+    }
+    num_bytes_allocated_ += allocation_size;
+    total_bytes_allocated_ += allocation_size;
+    ++total_objects_allocated_;
+    ++num_objects_allocated_;
+  }
+  return result;
+}
+
+}  // namespace space
+}  // namespace gc
+}  // namespace art
+
+#endif  // ART_RUNTIME_GC_SPACE_DLMALLOC_SPACE_INL_H_
diff --git a/runtime/gc/space/dlmalloc_space.cc b/runtime/gc/space/dlmalloc_space.cc
index d539aa2..8b99e96 100644
--- a/runtime/gc/space/dlmalloc_space.cc
+++ b/runtime/gc/space/dlmalloc_space.cc
@@ -14,6 +14,7 @@
  * limitations under the License.
  */
 #include "dlmalloc_space.h"
+#include "dlmalloc_space-inl.h"
 #include "gc/accounting/card_table.h"
 #include "gc/heap.h"
 #include "runtime.h"
@@ -46,8 +47,9 @@
 // A specialization of DlMallocSpace that provides information to valgrind wrt allocations.
 class ValgrindDlMallocSpace : public DlMallocSpace {
  public:
-  virtual mirror::Object* AllocWithGrowth(Thread* self, size_t num_bytes) {
-    void* obj_with_rdz = DlMallocSpace::AllocWithGrowth(self, num_bytes + 2 * kValgrindRedZoneBytes);
+  virtual mirror::Object* AllocWithGrowth(Thread* self, size_t num_bytes, size_t* bytes_allocated) {
+    void* obj_with_rdz = DlMallocSpace::AllocWithGrowth(self, num_bytes + 2 * kValgrindRedZoneBytes,
+                                                        bytes_allocated);
     if (obj_with_rdz == NULL) {
       return NULL;
     }
@@ -59,8 +61,9 @@
     return result;
   }
 
-  virtual mirror::Object* Alloc(Thread* self, size_t num_bytes) {
-    void* obj_with_rdz = DlMallocSpace::Alloc(self, num_bytes + 2 * kValgrindRedZoneBytes);
+  virtual mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated) {
+    void* obj_with_rdz = DlMallocSpace::Alloc(self, num_bytes + 2 * kValgrindRedZoneBytes,
+                                              bytes_allocated);
     if (obj_with_rdz == NULL) {
      return NULL;
     }
@@ -234,37 +237,27 @@
   mark_bitmap_->SetName(temp_name);
 }
 
-mirror::Object* DlMallocSpace::AllocWithoutGrowthLocked(size_t num_bytes) {
-  mirror::Object* result = reinterpret_cast<mirror::Object*>(mspace_calloc(mspace_, 1, num_bytes));
-  if (result != NULL) {
-    if (kDebugSpaces) {
-      CHECK(Contains(result)) << "Allocation (" << reinterpret_cast<void*>(result)
-            << ") not in bounds of allocation space " << *this;
-    }
-    size_t allocation_size = InternalAllocationSize(result);
-    num_bytes_allocated_ += allocation_size;
-    total_bytes_allocated_ += allocation_size;
-    ++total_objects_allocated_;
-    ++num_objects_allocated_;
+mirror::Object* DlMallocSpace::Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated) {
+  return AllocNonvirtual(self, num_bytes, bytes_allocated);
+}
+
+mirror::Object* DlMallocSpace::AllocWithGrowth(Thread* self, size_t num_bytes, size_t* bytes_allocated) {
+  mirror::Object* result;
+  {
+    MutexLock mu(self, lock_);
+    // Grow as much as possible within the mspace.
+    size_t max_allowed = Capacity();
+    mspace_set_footprint_limit(mspace_, max_allowed);
+    // Try the allocation.
+    result = AllocWithoutGrowthLocked(num_bytes, bytes_allocated);
+    // Shrink back down as small as possible.
+    size_t footprint = mspace_footprint(mspace_);
+    mspace_set_footprint_limit(mspace_, footprint);
   }
-  return result;
-}
-
-mirror::Object* DlMallocSpace::Alloc(Thread* self, size_t num_bytes) {
-  MutexLock mu(self, lock_);
-  return AllocWithoutGrowthLocked(num_bytes);
-}
-
-mirror::Object* DlMallocSpace::AllocWithGrowth(Thread* self, size_t num_bytes) {
-  MutexLock mu(self, lock_);
-  // Grow as much as possible within the mspace.
-  size_t max_allowed = Capacity();
-  mspace_set_footprint_limit(mspace_, max_allowed);
-  // Try the allocation.
-  mirror::Object* result = AllocWithoutGrowthLocked(num_bytes);
-  // Shrink back down as small as possible.
-  size_t footprint = mspace_footprint(mspace_);
-  mspace_set_footprint_limit(mspace_, footprint);
+  if (result != NULL) {
+    // Zero freshly allocated memory, done while not holding the space's lock.
+    memset(result, 0, num_bytes);
+  }
   // Return the new allocation or NULL.
   CHECK(!kDebugSpaces || result == NULL || Contains(result));
   return result;
@@ -415,8 +408,7 @@
 
 // Virtual functions can't get inlined.
 inline size_t DlMallocSpace::InternalAllocationSize(const mirror::Object* obj) {
-  return mspace_usable_size(const_cast<void*>(reinterpret_cast<const void*>(obj))) +
-      kChunkOverhead;
+  return AllocationSizeNonvirtual(obj);
 }
 
 size_t DlMallocSpace::AllocationSize(const mirror::Object* obj) {
diff --git a/runtime/gc/space/dlmalloc_space.h b/runtime/gc/space/dlmalloc_space.h
index c15d0ba..c498ef8 100644
--- a/runtime/gc/space/dlmalloc_space.h
+++ b/runtime/gc/space/dlmalloc_space.h
@@ -50,16 +50,24 @@
                                size_t capacity, byte* requested_begin);
 
   // Allocate num_bytes without allowing the underlying mspace to grow.
-  virtual mirror::Object* AllocWithGrowth(Thread* self, size_t num_bytes);
+  virtual mirror::Object* AllocWithGrowth(Thread* self, size_t num_bytes,
+                                          size_t* bytes_allocated) LOCKS_EXCLUDED(lock_);
 
   // Allocate num_bytes allowing the underlying mspace to grow.
-  virtual mirror::Object* Alloc(Thread* self, size_t num_bytes);
+  virtual mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated);
 
   // Return the storage space required by obj.
   virtual size_t AllocationSize(const mirror::Object* obj);
   virtual size_t Free(Thread* self, mirror::Object* ptr);
   virtual size_t FreeList(Thread* self, size_t num_ptrs, mirror::Object** ptrs);
 
+  mirror::Object* AllocNonvirtual(Thread* self, size_t num_bytes, size_t* bytes_allocated);
+
+  size_t AllocationSizeNonvirtual(const mirror::Object* obj) {
+    return mspace_usable_size(const_cast<void*>(reinterpret_cast<const void*>(obj))) +
+        kChunkOverhead;
+  }
+
   void* MoreCore(intptr_t increment);
 
   void* GetMspace() const {
@@ -141,7 +149,8 @@
 
  private:
   size_t InternalAllocationSize(const mirror::Object* obj);
-  mirror::Object* AllocWithoutGrowthLocked(size_t num_bytes) EXCLUSIVE_LOCKS_REQUIRED(lock_);
+  mirror::Object* AllocWithoutGrowthLocked(size_t num_bytes, size_t* bytes_allocated)
+      EXCLUSIVE_LOCKS_REQUIRED(lock_);
 
   bool Init(size_t initial_size, size_t maximum_size, size_t growth_size, byte* requested_base);
 
diff --git a/runtime/gc/space/large_object_space.cc b/runtime/gc/space/large_object_space.cc
index d7db561..832c6fa 100644
--- a/runtime/gc/space/large_object_space.cc
+++ b/runtime/gc/space/large_object_space.cc
@@ -55,7 +55,7 @@
   return new LargeObjectMapSpace(name);
 }
 
-mirror::Object* LargeObjectMapSpace::Alloc(Thread* self, size_t num_bytes) {
+mirror::Object* LargeObjectMapSpace::Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated) {
   MemMap* mem_map = MemMap::MapAnonymous("large object space allocation", NULL, num_bytes,
                                          PROT_READ | PROT_WRITE);
   if (mem_map == NULL) {
@@ -66,6 +66,9 @@
   large_objects_.push_back(obj);
   mem_maps_.Put(obj, mem_map);
   size_t allocation_size = mem_map->Size();
+  if (bytes_allocated != NULL) {
+    *bytes_allocated = allocation_size;
+  }
   num_bytes_allocated_ += allocation_size;
   total_bytes_allocated_ += allocation_size;
   ++num_objects_allocated_;
@@ -239,7 +242,7 @@
   return chunk->GetSize();
 }
 
-mirror::Object* FreeListSpace::Alloc(Thread* self, size_t num_bytes) {
+mirror::Object* FreeListSpace::Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated) {
   MutexLock mu(self, lock_);
   num_bytes = RoundUp(num_bytes, kAlignment);
   Chunk temp;
@@ -262,6 +265,9 @@
     AddFreeChunk(AddrFromChunk(new_chunk), chunk_size - num_bytes, chunk);
   }
 
+  if (bytes_allocated != NULL) {
+    *bytes_allocated = num_bytes;
+  }
   num_objects_allocated_++;
   total_objects_allocated_++;
   num_bytes_allocated_ += num_bytes;
diff --git a/runtime/gc/space/large_object_space.h b/runtime/gc/space/large_object_space.h
index 8cd5088..31659db 100644
--- a/runtime/gc/space/large_object_space.h
+++ b/runtime/gc/space/large_object_space.h
@@ -83,7 +83,7 @@
 
   // Return the storage space required by obj.
   size_t AllocationSize(const mirror::Object* obj);
-  mirror::Object* Alloc(Thread* self, size_t num_bytes);
+  mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated);
   size_t Free(Thread* self, mirror::Object* ptr);
   void Walk(DlMallocSpace::WalkCallback, void* arg);
   // TODO: disabling thread safety analysis as this may be called when we already hold lock_.
@@ -110,7 +110,7 @@
   static FreeListSpace* Create(const std::string& name, byte* requested_begin, size_t capacity);
 
   size_t AllocationSize(const mirror::Object* obj) EXCLUSIVE_LOCKS_REQUIRED(lock_);
-  mirror::Object* Alloc(Thread* self, size_t num_bytes);
+  mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated);
   size_t Free(Thread* self, mirror::Object* obj);
   bool Contains(const mirror::Object* obj) const;
   void Walk(DlMallocSpace::WalkCallback callback, void* arg);
diff --git a/runtime/gc/space/space.h b/runtime/gc/space/space.h
index bc6e818..231cabc 100644
--- a/runtime/gc/space/space.h
+++ b/runtime/gc/space/space.h
@@ -154,8 +154,10 @@
   // Number of objects allocated since the space was created.
   virtual uint64_t GetTotalObjectsAllocated() const = 0;
 
-  // Allocate num_bytes without allowing growth.
-  virtual mirror::Object* Alloc(Thread* self, size_t num_bytes) = 0;
+  // Allocate num_bytes without allowing growth. If the allocation
+  // succeeds, the output parameter bytes_allocated will be set to the
+  // actually allocated bytes which is >= num_bytes.
+  virtual mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated) = 0;
 
   // Return the storage space required by obj.
   virtual size_t AllocationSize(const mirror::Object* obj) = 0;
diff --git a/runtime/gc/space/space_test.cc b/runtime/gc/space/space_test.cc
index 3003140..66b8c11 100644
--- a/runtime/gc/space/space_test.cc
+++ b/runtime/gc/space/space_test.cc
@@ -88,32 +88,35 @@
     Thread* self = Thread::Current();
 
     // Succeeds, fits without adjusting the footprint limit.
-    mirror::Object* ptr1 = space->Alloc(self, 1 * MB);
+    mirror::Object* ptr1 = space->Alloc(self, 1 * MB, NULL);
     EXPECT_TRUE(ptr1 != NULL);
 
     // Fails, requires a higher footprint limit.
-    mirror::Object* ptr2 = space->Alloc(self, 8 * MB);
+    mirror::Object* ptr2 = space->Alloc(self, 8 * MB, NULL);
     EXPECT_TRUE(ptr2 == NULL);
 
     // Succeeds, adjusts the footprint.
-    mirror::Object* ptr3 = space->AllocWithGrowth(self, 8 * MB);
+    size_t ptr3_bytes_allocated;
+    mirror::Object* ptr3 = space->AllocWithGrowth(self, 8 * MB, &ptr3_bytes_allocated);
     EXPECT_TRUE(ptr3 != NULL);
+    EXPECT_LE(8U * MB, ptr3_bytes_allocated);
 
     // Fails, requires a higher footprint limit.
-    mirror::Object* ptr4 = space->Alloc(self, 8 * MB);
+    mirror::Object* ptr4 = space->Alloc(self, 8 * MB, NULL);
     EXPECT_TRUE(ptr4 == NULL);
 
     // Also fails, requires a higher allowed footprint.
-    mirror::Object* ptr5 = space->AllocWithGrowth(self, 8 * MB);
+    mirror::Object* ptr5 = space->AllocWithGrowth(self, 8 * MB, NULL);
     EXPECT_TRUE(ptr5 == NULL);
 
     // Release some memory.
     size_t free3 = space->AllocationSize(ptr3);
+    EXPECT_EQ(free3, ptr3_bytes_allocated);
     EXPECT_EQ(free3, space->Free(self, ptr3));
     EXPECT_LE(8U * MB, free3);
 
     // Succeeds, now that memory has been freed.
-    void* ptr6 = space->AllocWithGrowth(self, 9 * MB);
+    void* ptr6 = space->AllocWithGrowth(self, 9 * MB, NULL);
     EXPECT_TRUE(ptr6 != NULL);
 
     // Final clean up.
@@ -122,22 +125,22 @@
     EXPECT_LE(1U * MB, free1);
 
     // Make sure that the zygote space isn't directly at the start of the space.
-    space->Alloc(self, 1U * MB);
+    space->Alloc(self, 1U * MB, NULL);
     space = space->CreateZygoteSpace("alloc space");
 
     // Make space findable to the heap, will also delete space when runtime is cleaned up
     AddContinuousSpace(space);
 
     // Succeeds, fits without adjusting the footprint limit.
-    ptr1 = space->Alloc(self, 1 * MB);
+    ptr1 = space->Alloc(self, 1 * MB, NULL);
     EXPECT_TRUE(ptr1 != NULL);
 
     // Fails, requires a higher footprint limit.
-    ptr2 = space->Alloc(self, 8 * MB);
+    ptr2 = space->Alloc(self, 8 * MB, NULL);
     EXPECT_TRUE(ptr2 == NULL);
 
     // Succeeds, adjusts the footprint.
-    ptr3 = space->AllocWithGrowth(self, 2 * MB);
+    ptr3 = space->AllocWithGrowth(self, 2 * MB, NULL);
     EXPECT_TRUE(ptr3 != NULL);
     space->Free(self, ptr3);
 
@@ -156,32 +159,35 @@
   AddContinuousSpace(space);
 
   // Succeeds, fits without adjusting the footprint limit.
-  mirror::Object* ptr1 = space->Alloc(self, 1 * MB);
+  mirror::Object* ptr1 = space->Alloc(self, 1 * MB, NULL);
   EXPECT_TRUE(ptr1 != NULL);
 
   // Fails, requires a higher footprint limit.
-  mirror::Object* ptr2 = space->Alloc(self, 8 * MB);
+  mirror::Object* ptr2 = space->Alloc(self, 8 * MB, NULL);
   EXPECT_TRUE(ptr2 == NULL);
 
   // Succeeds, adjusts the footprint.
-  mirror::Object* ptr3 = space->AllocWithGrowth(self, 8 * MB);
+  size_t ptr3_bytes_allocated;
+  mirror::Object* ptr3 = space->AllocWithGrowth(self, 8 * MB, &ptr3_bytes_allocated);
   EXPECT_TRUE(ptr3 != NULL);
+  EXPECT_LE(8U * MB, ptr3_bytes_allocated);
 
   // Fails, requires a higher footprint limit.
-  mirror::Object* ptr4 = space->Alloc(self, 8 * MB);
+  mirror::Object* ptr4 = space->Alloc(self, 8 * MB, NULL);
   EXPECT_TRUE(ptr4 == NULL);
 
   // Also fails, requires a higher allowed footprint.
-  mirror::Object* ptr5 = space->AllocWithGrowth(self, 8 * MB);
+  mirror::Object* ptr5 = space->AllocWithGrowth(self, 8 * MB, NULL);
   EXPECT_TRUE(ptr5 == NULL);
 
   // Release some memory.
   size_t free3 = space->AllocationSize(ptr3);
+  EXPECT_EQ(free3, ptr3_bytes_allocated);
   space->Free(self, ptr3);
   EXPECT_LE(8U * MB, free3);
 
   // Succeeds, now that memory has been freed.
-  void* ptr6 = space->AllocWithGrowth(self, 9 * MB);
+  void* ptr6 = space->AllocWithGrowth(self, 9 * MB, NULL);
   EXPECT_TRUE(ptr6 != NULL);
 
   // Final clean up.
@@ -201,7 +207,7 @@
   // Succeeds, fits without adjusting the max allowed footprint.
   mirror::Object* lots_of_objects[1024];
   for (size_t i = 0; i < arraysize(lots_of_objects); i++) {
-    lots_of_objects[i] = space->Alloc(self, 16);
+    lots_of_objects[i] = space->Alloc(self, 16, NULL);
     EXPECT_TRUE(lots_of_objects[i] != NULL);
   }
 
@@ -213,7 +219,7 @@
 
   // Succeeds, fits by adjusting the max allowed footprint.
   for (size_t i = 0; i < arraysize(lots_of_objects); i++) {
-    lots_of_objects[i] = space->AllocWithGrowth(self, 1024);
+    lots_of_objects[i] = space->AllocWithGrowth(self, 1024, NULL);
     EXPECT_TRUE(lots_of_objects[i] != NULL);
   }
 
@@ -276,9 +282,9 @@
       }
       mirror::Object* object;
       if (round <= 1) {
-        object = space->Alloc(self, alloc_size);
+        object = space->Alloc(self, alloc_size, NULL);
       } else {
-        object = space->AllocWithGrowth(self, alloc_size);
+        object = space->AllocWithGrowth(self, alloc_size, NULL);
       }
       footprint = mspace_footprint(mspace);
       EXPECT_GE(space->Size(), footprint);  // invariant
@@ -355,9 +361,9 @@
   mirror::Object* large_object;
   size_t three_quarters_space = (growth_limit / 2) + (growth_limit / 4);
   if (round <= 1) {
-    large_object = space->Alloc(self, three_quarters_space);
+    large_object = space->Alloc(self, three_quarters_space, NULL);
   } else {
-    large_object = space->AllocWithGrowth(self, three_quarters_space);
+    large_object = space->AllocWithGrowth(self, three_quarters_space, NULL);
   }
   EXPECT_TRUE(large_object != NULL);