Split the allocation path into 'instrumented' and 'uninstrumented'
ones.

The instrumented path is equivalent to the existing allocation path
that checks for three instrumentation mechanisms (the debugger
allocation tracking, the runtime allocation stats collection, and
valgrind) for every allocation. The uinstrumented path does not
perform these checks. We use the uninstrumented path by default and
enable the instrumented path only when any of the three mechanisms is
enabled. The uninstrumented version of Heap::AllocObject() is inlined.

This change improves the Ritz MemAllocTest by ~4% on Nexus 4 and ~3%
on Host/x86.

Bug: 9986565
Change-Id: I3e68dfff6789d77bbdcea98457b694e1b5fcef5f
diff --git a/runtime/arch/arm/entrypoints_init_arm.cc b/runtime/arch/arm/entrypoints_init_arm.cc
index 9e6902d..e6e13be 100644
--- a/runtime/arch/arm/entrypoints_init_arm.cc
+++ b/runtime/arch/arm/entrypoints_init_arm.cc
@@ -42,6 +42,13 @@
 extern "C" void* art_quick_check_and_alloc_array(uint32_t, void*, int32_t);
 extern "C" void* art_quick_check_and_alloc_array_with_access_check(uint32_t, void*, int32_t);
 
+extern "C" void* art_quick_alloc_array_instrumented(uint32_t, void*, int32_t);
+extern "C" void* art_quick_alloc_array_with_access_check_instrumented(uint32_t, void*, int32_t);
+extern "C" void* art_quick_alloc_object_instrumented(uint32_t type_idx, void* method);
+extern "C" void* art_quick_alloc_object_with_access_check_instrumented(uint32_t type_idx, void* method);
+extern "C" void* art_quick_check_and_alloc_array_instrumented(uint32_t, void*, int32_t);
+extern "C" void* art_quick_check_and_alloc_array_with_access_check_instrumented(uint32_t, void*, int32_t);
+
 // Cast entrypoints.
 extern "C" uint32_t artIsAssignableFromCode(const mirror::Class* klass,
                                             const mirror::Class* ref_class);
@@ -133,6 +140,30 @@
 extern "C" void art_quick_throw_null_pointer_exception();
 extern "C" void art_quick_throw_stack_overflow(void*);
 
+static bool quick_alloc_entry_points_instrumented = false;
+
+void SetQuickAllocEntryPointsInstrumented(bool instrumented) {
+  quick_alloc_entry_points_instrumented = instrumented;
+}
+
+void ResetQuickAllocEntryPoints(QuickEntryPoints* qpoints) {
+  if (quick_alloc_entry_points_instrumented) {
+    qpoints->pAllocArray = art_quick_alloc_array_instrumented;
+    qpoints->pAllocArrayWithAccessCheck = art_quick_alloc_array_with_access_check_instrumented;
+    qpoints->pAllocObject = art_quick_alloc_object_instrumented;
+    qpoints->pAllocObjectWithAccessCheck = art_quick_alloc_object_with_access_check_instrumented;
+    qpoints->pCheckAndAllocArray = art_quick_check_and_alloc_array_instrumented;
+    qpoints->pCheckAndAllocArrayWithAccessCheck = art_quick_check_and_alloc_array_with_access_check_instrumented;
+  } else {
+    qpoints->pAllocArray = art_quick_alloc_array;
+    qpoints->pAllocArrayWithAccessCheck = art_quick_alloc_array_with_access_check;
+    qpoints->pAllocObject = art_quick_alloc_object;
+    qpoints->pAllocObjectWithAccessCheck = art_quick_alloc_object_with_access_check;
+    qpoints->pCheckAndAllocArray = art_quick_check_and_alloc_array;
+    qpoints->pCheckAndAllocArrayWithAccessCheck = art_quick_check_and_alloc_array_with_access_check;
+  }
+}
+
 void InitEntryPoints(InterpreterEntryPoints* ipoints, JniEntryPoints* jpoints,
                      PortableEntryPoints* ppoints, QuickEntryPoints* qpoints) {
   // Interpreter
@@ -147,12 +178,7 @@
   ppoints->pPortableToInterpreterBridge = art_portable_to_interpreter_bridge;
 
   // Alloc
-  qpoints->pAllocArray = art_quick_alloc_array;
-  qpoints->pAllocArrayWithAccessCheck = art_quick_alloc_array_with_access_check;
-  qpoints->pAllocObject = art_quick_alloc_object;
-  qpoints->pAllocObjectWithAccessCheck = art_quick_alloc_object_with_access_check;
-  qpoints->pCheckAndAllocArray = art_quick_check_and_alloc_array;
-  qpoints->pCheckAndAllocArrayWithAccessCheck = art_quick_check_and_alloc_array_with_access_check;
+  ResetQuickAllocEntryPoints(qpoints);
 
   // Cast
   qpoints->pInstanceofNonTrivial = artIsAssignableFromCode;
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index a77ce01..5b2dd6c 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -706,6 +706,17 @@
     DELIVER_PENDING_EXCEPTION
 END art_quick_alloc_object
 
+    .extern artAllocObjectFromCodeInstrumented
+ENTRY art_quick_alloc_object_instrumented
+    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  @ save callee saves in case of GC
+    mov    r2, r9                     @ pass Thread::Current
+    mov    r3, sp                     @ pass SP
+    bl     artAllocObjectFromCodeInstrumented     @ (uint32_t type_idx, Method* method, Thread*, SP)
+    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
+    RETURN_IF_RESULT_IS_NON_ZERO
+    DELIVER_PENDING_EXCEPTION
+END art_quick_alloc_object_instrumented
+
     /*
      * Called by managed code to allocate an object when the caller doesn't know whether it has
      * access to the created type.
@@ -721,6 +732,17 @@
     DELIVER_PENDING_EXCEPTION
 END art_quick_alloc_object_with_access_check
 
+    .extern artAllocObjectFromCodeWithAccessCheckInstrumented
+ENTRY art_quick_alloc_object_with_access_check_instrumented
+    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  @ save callee saves in case of GC
+    mov    r2, r9                     @ pass Thread::Current
+    mov    r3, sp                     @ pass SP
+    bl     artAllocObjectFromCodeWithAccessCheckInstrumented  @ (uint32_t type_idx, Method* method, Thread*, SP)
+    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
+    RETURN_IF_RESULT_IS_NON_ZERO
+    DELIVER_PENDING_EXCEPTION
+END art_quick_alloc_object_with_access_check_instrumented
+
     /*
      * Called by managed code to allocate an array.
      */
@@ -741,6 +763,23 @@
     DELIVER_PENDING_EXCEPTION
 END art_quick_alloc_array
 
+    .extern artAllocArrayFromCodeInstrumented
+ENTRY art_quick_alloc_array_instrumented
+    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  @ save callee saves in case of GC
+    mov    r3, r9                     @ pass Thread::Current
+    mov    r12, sp
+    str    r12, [sp, #-16]!           @ expand the frame and pass SP
+    .pad #16
+    .cfi_adjust_cfa_offset 16
+    @ artAllocArrayFromCodeInstrumented(uint32_t type_idx, Method* method, int32_t component_count, Thread*, SP)
+    bl     artAllocArrayFromCodeInstrumented
+    add    sp, #16                    @ strip the extra frame
+    .cfi_adjust_cfa_offset -16
+    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
+    RETURN_IF_RESULT_IS_NON_ZERO
+    DELIVER_PENDING_EXCEPTION
+END art_quick_alloc_array_instrumented
+
     /*
      * Called by managed code to allocate an array when the caller doesn't know whether it has
      * access to the created type.
@@ -762,6 +801,23 @@
     DELIVER_PENDING_EXCEPTION
 END art_quick_alloc_array_with_access_check
 
+    .extern artAllocArrayFromCodeWithAccessCheckInstrumented
+ENTRY art_quick_alloc_array_with_access_check_instrumented
+    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  @ save callee saves in case of GC
+    mov    r3, r9                     @ pass Thread::Current
+    mov    r12, sp
+    str    r12, [sp, #-16]!           @ expand the frame and pass SP
+    .pad #16
+    .cfi_adjust_cfa_offset 16
+    @ artAllocArrayFromCodeWithAccessCheckInstrumented(type_idx, method, component_count, Thread*, SP)
+    bl     artAllocArrayFromCodeWithAccessCheckInstrumented
+    add    sp, #16                    @ strip the extra frame
+    .cfi_adjust_cfa_offset -16
+    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
+    RETURN_IF_RESULT_IS_NON_ZERO
+    DELIVER_PENDING_EXCEPTION
+END art_quick_alloc_array_with_access_check_instrumented
+
     /*
      * Called by managed code to allocate an array in a special case for FILLED_NEW_ARRAY.
      */
@@ -782,6 +838,23 @@
     DELIVER_PENDING_EXCEPTION
 END art_quick_check_and_alloc_array
 
+    .extern artCheckAndAllocArrayFromCodeInstrumented
+ENTRY art_quick_check_and_alloc_array_instrumented
+    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  @ save callee saves in case of GC
+    mov    r3, r9                     @ pass Thread::Current
+    mov    r12, sp
+    str    r12, [sp, #-16]!           @ expand the frame and pass SP
+    .pad #16
+    .cfi_adjust_cfa_offset 16
+    @ artCheckAndAllocArrayFromCodeInstrumented(uint32_t type_idx, Method* method, int32_t count, Thread* , SP)
+    bl     artCheckAndAllocArrayFromCodeInstrumented
+    add    sp, #16                    @ strip the extra frame
+    .cfi_adjust_cfa_offset -16
+    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
+    RETURN_IF_RESULT_IS_NON_ZERO
+    DELIVER_PENDING_EXCEPTION
+END art_quick_check_and_alloc_array_instrumented
+
     /*
      * Called by managed code to allocate an array in a special case for FILLED_NEW_ARRAY.
      */
@@ -802,6 +875,23 @@
     DELIVER_PENDING_EXCEPTION
 END art_quick_check_and_alloc_array_with_access_check
 
+    .extern artCheckAndAllocArrayFromCodeWithAccessCheckInstrumented
+ENTRY art_quick_check_and_alloc_array_with_access_check_instrumented
+    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  @ save callee saves in case of GC
+    mov    r3, r9                     @ pass Thread::Current
+    mov    r12, sp
+    str    r12, [sp, #-16]!           @ expand the frame and pass SP
+    .pad #16
+    .cfi_adjust_cfa_offset 16
+    @ artCheckAndAllocArrayFromCodeWithAccessCheckInstrumented(type_idx, method, count, Thread* , SP)
+    bl     artCheckAndAllocArrayFromCodeWithAccessCheckInstrumented
+    add    sp, #16                    @ strip the extra frame
+    .cfi_adjust_cfa_offset -16
+    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
+    RETURN_IF_RESULT_IS_NON_ZERO
+    DELIVER_PENDING_EXCEPTION
+END art_quick_check_and_alloc_array_with_access_check_instrumented
+
     /*
      * Called by managed code when the value in rSUSPEND has been decremented to 0.
      */
diff --git a/runtime/arch/mips/entrypoints_init_mips.cc b/runtime/arch/mips/entrypoints_init_mips.cc
index 40d7cd9..3d08298 100644
--- a/runtime/arch/mips/entrypoints_init_mips.cc
+++ b/runtime/arch/mips/entrypoints_init_mips.cc
@@ -41,6 +41,13 @@
 extern "C" void* art_quick_check_and_alloc_array(uint32_t, void*, int32_t);
 extern "C" void* art_quick_check_and_alloc_array_with_access_check(uint32_t, void*, int32_t);
 
+extern "C" void* art_quick_alloc_array_instrumented(uint32_t, void*, int32_t);
+extern "C" void* art_quick_alloc_array_with_access_check_instrumented(uint32_t, void*, int32_t);
+extern "C" void* art_quick_alloc_object_instrumented(uint32_t type_idx, void* method);
+extern "C" void* art_quick_alloc_object_with_access_check_instrumented(uint32_t type_idx, void* method);
+extern "C" void* art_quick_check_and_alloc_array_instrumented(uint32_t, void*, int32_t);
+extern "C" void* art_quick_check_and_alloc_array_with_access_check_instrumented(uint32_t, void*, int32_t);
+
 // Cast entrypoints.
 extern "C" uint32_t artIsAssignableFromCode(const mirror::Class* klass,
                                             const mirror::Class* ref_class);
@@ -134,6 +141,30 @@
 extern "C" void art_quick_throw_null_pointer_exception();
 extern "C" void art_quick_throw_stack_overflow(void*);
 
+static bool quick_alloc_entry_points_instrumented = false;
+
+void SetQuickAllocEntryPointsInstrumented(bool instrumented) {
+  quick_alloc_entry_points_instrumented = instrumented;
+}
+
+void ResetQuickAllocEntryPoints(QuickEntryPoints* qpoints) {
+  if (quick_alloc_entry_points_instrumented) {
+    qpoints->pAllocArray = art_quick_alloc_array_instrumented;
+    qpoints->pAllocArrayWithAccessCheck = art_quick_alloc_array_with_access_check_instrumented;
+    qpoints->pAllocObject = art_quick_alloc_object_instrumented;
+    qpoints->pAllocObjectWithAccessCheck = art_quick_alloc_object_with_access_check_instrumented;
+    qpoints->pCheckAndAllocArray = art_quick_check_and_alloc_array_instrumented;
+    qpoints->pCheckAndAllocArrayWithAccessCheck = art_quick_check_and_alloc_array_with_access_check_instrumented;
+  } else {
+    qpoints->pAllocArray = art_quick_alloc_array;
+    qpoints->pAllocArrayWithAccessCheck = art_quick_alloc_array_with_access_check;
+    qpoints->pAllocObject = art_quick_alloc_object;
+    qpoints->pAllocObjectWithAccessCheck = art_quick_alloc_object_with_access_check;
+    qpoints->pCheckAndAllocArray = art_quick_check_and_alloc_array;
+    qpoints->pCheckAndAllocArrayWithAccessCheck = art_quick_check_and_alloc_array_with_access_check;
+  }
+}
+
 void InitEntryPoints(InterpreterEntryPoints* ipoints, JniEntryPoints* jpoints,
                      PortableEntryPoints* ppoints, QuickEntryPoints* qpoints) {
   // Interpreter
@@ -148,12 +179,7 @@
   ppoints->pPortableToInterpreterBridge = art_portable_to_interpreter_bridge;
 
   // Alloc
-  qpoints->pAllocArray = art_quick_alloc_array;
-  qpoints->pAllocArrayWithAccessCheck = art_quick_alloc_array_with_access_check;
-  qpoints->pAllocObject = art_quick_alloc_object;
-  qpoints->pAllocObjectWithAccessCheck = art_quick_alloc_object_with_access_check;
-  qpoints->pCheckAndAllocArray = art_quick_check_and_alloc_array;
-  qpoints->pCheckAndAllocArrayWithAccessCheck = art_quick_check_and_alloc_array_with_access_check;
+  ResetQuickAllocEntryPoints(qpoints);
 
   // Cast
   qpoints->pInstanceofNonTrivial = artIsAssignableFromCode;
diff --git a/runtime/arch/mips/quick_entrypoints_mips.S b/runtime/arch/mips/quick_entrypoints_mips.S
index 004fda6..cb82606 100644
--- a/runtime/arch/mips/quick_entrypoints_mips.S
+++ b/runtime/arch/mips/quick_entrypoints_mips.S
@@ -770,6 +770,16 @@
     RETURN_IF_NONZERO
 END art_quick_alloc_object
 
+    .extern artAllocObjectFromCodeInstrumented
+ENTRY art_quick_alloc_object_instrumented
+    GENERATE_GLOBAL_POINTER
+    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  # save callee saves in case of GC
+    move    $a2, rSELF                # pass Thread::Current
+    jal     artAllocObjectFromCodeInstrumented    # (uint32_t type_idx, Method* method, Thread*, $sp)
+    move    $a3, $sp                  # pass $sp
+    RETURN_IF_NONZERO
+END art_quick_alloc_object_instrumented
+
     /*
      * Called by managed code to allocate an object when the caller doesn't know whether it has
      * access to the created type.
@@ -784,6 +794,16 @@
     RETURN_IF_NONZERO
 END art_quick_alloc_object_with_access_check
 
+    .extern artAllocObjectFromCodeWithAccessCheckInstrumented
+ENTRY art_quick_alloc_object_with_access_check_instrumented
+    GENERATE_GLOBAL_POINTER
+    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  # save callee saves in case of GC
+    move    $a2, rSELF                # pass Thread::Current
+    jal     artAllocObjectFromCodeWithAccessCheckInstrumented  # (uint32_t type_idx, Method* method, Thread*, $sp)
+    move    $a3, $sp                  # pass $sp
+    RETURN_IF_NONZERO
+END art_quick_alloc_object_with_access_check_instrumented
+
     /*
      * Called by managed code to allocate an array.
      */
@@ -798,6 +818,17 @@
     RETURN_IF_NONZERO
 END art_quick_alloc_array
 
+    .extern artAllocArrayFromCodeInstrumented
+ENTRY art_quick_alloc_array_instrumented
+    GENERATE_GLOBAL_POINTER
+    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  # save callee saves in case of GC
+    move    $a3, rSELF                # pass Thread::Current
+    # artAllocArrayFromCodeInstrumented(uint32_t type_idx, Method* method, int32_t component_count, Thread*, $sp)
+    jal     artAllocArrayFromCodeInstrumented
+    sw      $sp, 16($sp)              # pass $sp
+    RETURN_IF_NONZERO
+END art_quick_alloc_array_instrumented
+
     /*
      * Called by managed code to allocate an array when the caller doesn't know whether it has
      * access to the created type.
@@ -813,6 +844,17 @@
     RETURN_IF_NONZERO
 END art_quick_alloc_array_with_access_check
 
+    .extern artAllocArrayFromCodeWithAccessCheckInstrumented
+ENTRY art_quick_alloc_array_with_access_check_instrumented
+    GENERATE_GLOBAL_POINTER
+    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  # save callee saves in case of GC
+    move    $a3, rSELF                # pass Thread::Current
+    # artAllocArrayFromCodeWithAccessCheckInstrumented(type_idx, method, component_count, Thread*, $sp)
+    jal     artAllocArrayFromCodeWithAccessCheckInstrumented
+    sw      $sp, 16($sp)              # pass $sp
+    RETURN_IF_NONZERO
+END art_quick_alloc_array_with_access_check_instrumented
+
     /*
      * Called by managed code to allocate an array in a special case for FILLED_NEW_ARRAY.
      */
@@ -827,6 +869,17 @@
     RETURN_IF_NONZERO
 END art_quick_check_and_alloc_array
 
+    .extern artCheckAndAllocArrayFromCodeInstrumented
+ENTRY art_quick_check_and_alloc_array_instrumented
+    GENERATE_GLOBAL_POINTER
+    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  # save callee saves in case of GC
+    move    $a3, rSELF                # pass Thread::Current
+    # artCheckAndAllocArrayFromCodeInstrumented(uint32_t type_idx, Method* method, int32_t count, Thread* , $sp)
+    jal     artCheckAndAllocArrayFromCodeInstrumented
+    sw      $sp, 16($sp)              # pass $sp
+    RETURN_IF_NONZERO
+END art_quick_check_and_alloc_array_instrumented
+
     /*
      * Called by managed code to allocate an array in a special case for FILLED_NEW_ARRAY.
      */
@@ -841,6 +894,17 @@
     RETURN_IF_NONZERO
 END art_quick_check_and_alloc_array_with_access_check
 
+    .extern artCheckAndAllocArrayFromCodeWithAccessCheckInstrumented
+ENTRY art_quick_check_and_alloc_array_with_access_check_instrumented
+    GENERATE_GLOBAL_POINTER
+    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  # save callee saves in case of GC
+    move    $a3, rSELF                # pass Thread::Current
+    # artCheckAndAllocArrayFromCodeWithAccessCheckInstrumented(type_idx, method, count, Thread* , $sp)
+    jal     artCheckAndAllocArrayFromCodeWithAccessCheckInstrumented
+    sw      $sp, 16($sp)              # pass $sp
+    RETURN_IF_NONZERO
+END art_quick_check_and_alloc_array_with_access_check_instrumented
+
     /*
      * Called by managed code when the value in rSUSPEND has been decremented to 0.
      */
diff --git a/runtime/arch/x86/entrypoints_init_x86.cc b/runtime/arch/x86/entrypoints_init_x86.cc
index abc2990..4c87e07 100644
--- a/runtime/arch/x86/entrypoints_init_x86.cc
+++ b/runtime/arch/x86/entrypoints_init_x86.cc
@@ -40,6 +40,13 @@
 extern "C" void* art_quick_check_and_alloc_array(uint32_t, void*, int32_t);
 extern "C" void* art_quick_check_and_alloc_array_with_access_check(uint32_t, void*, int32_t);
 
+extern "C" void* art_quick_alloc_array_instrumented(uint32_t, void*, int32_t);
+extern "C" void* art_quick_alloc_array_with_access_check_instrumented(uint32_t, void*, int32_t);
+extern "C" void* art_quick_alloc_object_instrumented(uint32_t type_idx, void* method);
+extern "C" void* art_quick_alloc_object_with_access_check_instrumented(uint32_t type_idx, void* method);
+extern "C" void* art_quick_check_and_alloc_array_instrumented(uint32_t, void*, int32_t);
+extern "C" void* art_quick_check_and_alloc_array_with_access_check_instrumented(uint32_t, void*, int32_t);
+
 // Cast entrypoints.
 extern "C" uint32_t art_quick_is_assignable(const mirror::Class* klass,
                                                 const mirror::Class* ref_class);
@@ -116,6 +123,30 @@
 extern "C" void art_quick_throw_null_pointer_exception();
 extern "C" void art_quick_throw_stack_overflow(void*);
 
+static bool quick_alloc_entry_points_instrumented = false;
+
+void SetQuickAllocEntryPointsInstrumented(bool instrumented) {
+  quick_alloc_entry_points_instrumented = instrumented;
+}
+
+void ResetQuickAllocEntryPoints(QuickEntryPoints* qpoints) {
+  if (quick_alloc_entry_points_instrumented) {
+    qpoints->pAllocArray = art_quick_alloc_array_instrumented;
+    qpoints->pAllocArrayWithAccessCheck = art_quick_alloc_array_with_access_check_instrumented;
+    qpoints->pAllocObject = art_quick_alloc_object_instrumented;
+    qpoints->pAllocObjectWithAccessCheck = art_quick_alloc_object_with_access_check_instrumented;
+    qpoints->pCheckAndAllocArray = art_quick_check_and_alloc_array_instrumented;
+    qpoints->pCheckAndAllocArrayWithAccessCheck = art_quick_check_and_alloc_array_with_access_check_instrumented;
+  } else {
+    qpoints->pAllocArray = art_quick_alloc_array;
+    qpoints->pAllocArrayWithAccessCheck = art_quick_alloc_array_with_access_check;
+    qpoints->pAllocObject = art_quick_alloc_object;
+    qpoints->pAllocObjectWithAccessCheck = art_quick_alloc_object_with_access_check;
+    qpoints->pCheckAndAllocArray = art_quick_check_and_alloc_array;
+    qpoints->pCheckAndAllocArrayWithAccessCheck = art_quick_check_and_alloc_array_with_access_check;
+  }
+}
+
 void InitEntryPoints(InterpreterEntryPoints* ipoints, JniEntryPoints* jpoints,
                      PortableEntryPoints* ppoints, QuickEntryPoints* qpoints) {
   // Interpreter
@@ -130,12 +161,7 @@
   ppoints->pPortableToInterpreterBridge = art_portable_to_interpreter_bridge;
 
   // Alloc
-  qpoints->pAllocArray = art_quick_alloc_array;
-  qpoints->pAllocArrayWithAccessCheck = art_quick_alloc_array_with_access_check;
-  qpoints->pAllocObject = art_quick_alloc_object;
-  qpoints->pAllocObjectWithAccessCheck = art_quick_alloc_object_with_access_check;
-  qpoints->pCheckAndAllocArray = art_quick_check_and_alloc_array;
-  qpoints->pCheckAndAllocArrayWithAccessCheck = art_quick_check_and_alloc_array_with_access_check;
+  ResetQuickAllocEntryPoints(qpoints);
 
   // Cast
   qpoints->pInstanceofNonTrivial = art_quick_is_assignable;
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index dbf552f..06b2203 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -389,6 +389,13 @@
 THREE_ARG_DOWNCALL art_quick_check_and_alloc_array, artCheckAndAllocArrayFromCode, RETURN_IF_EAX_NOT_ZERO
 THREE_ARG_DOWNCALL art_quick_check_and_alloc_array_with_access_check, artCheckAndAllocArrayFromCodeWithAccessCheck, RETURN_IF_EAX_NOT_ZERO
 
+TWO_ARG_DOWNCALL art_quick_alloc_object_instrumented, artAllocObjectFromCodeInstrumented, RETURN_IF_EAX_NOT_ZERO
+TWO_ARG_DOWNCALL art_quick_alloc_object_with_access_check_instrumented, artAllocObjectFromCodeWithAccessCheckInstrumented, RETURN_IF_EAX_NOT_ZERO
+THREE_ARG_DOWNCALL art_quick_alloc_array_instrumented, artAllocArrayFromCodeInstrumented, RETURN_IF_EAX_NOT_ZERO
+THREE_ARG_DOWNCALL art_quick_alloc_array_with_access_check_instrumented, artAllocArrayFromCodeWithAccessCheckInstrumented, RETURN_IF_EAX_NOT_ZERO
+THREE_ARG_DOWNCALL art_quick_check_and_alloc_array_instrumented, artCheckAndAllocArrayFromCodeInstrumented, RETURN_IF_EAX_NOT_ZERO
+THREE_ARG_DOWNCALL art_quick_check_and_alloc_array_with_access_check_instrumented, artCheckAndAllocArrayFromCodeWithAccessCheckInstrumented, RETURN_IF_EAX_NOT_ZERO
+
 TWO_ARG_DOWNCALL art_quick_resolve_string, artResolveStringFromCode, RETURN_IF_EAX_NOT_ZERO
 TWO_ARG_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode, RETURN_IF_EAX_NOT_ZERO
 TWO_ARG_DOWNCALL art_quick_initialize_type, artInitializeTypeFromCode, RETURN_IF_EAX_NOT_ZERO
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index 2eca734..e57137f 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -3487,7 +3487,9 @@
       recent_allocation_records_ = new AllocRecord[gAllocRecordMax];
       CHECK(recent_allocation_records_ != NULL);
     }
+    Runtime::Current()->InstrumentQuickAllocEntryPoints();
   } else {
+    Runtime::Current()->UninstrumentQuickAllocEntryPoints();
     delete[] recent_allocation_records_;
     recent_allocation_records_ = NULL;
   }
diff --git a/runtime/entrypoints/entrypoint_utils.cc b/runtime/entrypoints/entrypoint_utils.cc
index 52f8c81..d9c9e31 100644
--- a/runtime/entrypoints/entrypoint_utils.cc
+++ b/runtime/entrypoints/entrypoint_utils.cc
@@ -33,20 +33,20 @@
 
 namespace art {
 
-// Helper function to allocate array for FILLED_NEW_ARRAY.
-mirror::Array* CheckAndAllocArrayFromCode(uint32_t type_idx, mirror::ArtMethod* referrer,
-                                          int32_t component_count, Thread* self,
-                                          bool access_check) {
+static inline bool CheckFilledNewArrayAlloc(uint32_t type_idx, mirror::ArtMethod* referrer,
+                                            int32_t component_count, Thread* self,
+                                            bool access_check, mirror::Class** klass_ptr)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   if (UNLIKELY(component_count < 0)) {
     ThrowNegativeArraySizeException(component_count);
-    return NULL;  // Failure
+    return false;  // Failure
   }
-  mirror::Class* klass = referrer->GetDexCacheResolvedTypes()->Get(type_idx);
+  mirror::Class* klass = referrer->GetDexCacheResolvedTypes()->GetWithoutChecks(type_idx);
   if (UNLIKELY(klass == NULL)) {  // Not in dex cache so try to resolve
     klass = Runtime::Current()->GetClassLinker()->ResolveType(type_idx, referrer);
     if (klass == NULL) {  // Error
       DCHECK(self->IsExceptionPending());
-      return NULL;  // Failure
+      return false;  // Failure
     }
   }
   if (UNLIKELY(klass->IsPrimitive() && !klass->IsPrimitiveInt())) {
@@ -60,18 +60,40 @@
                                "Found type %s; filled-new-array not implemented for anything but \'int\'",
                                PrettyDescriptor(klass).c_str());
     }
-    return NULL;  // Failure
-  } else {
-    if (access_check) {
-      mirror::Class* referrer_klass = referrer->GetDeclaringClass();
-      if (UNLIKELY(!referrer_klass->CanAccess(klass))) {
-        ThrowIllegalAccessErrorClass(referrer_klass, klass);
-        return NULL;  // Failure
-      }
-    }
-    DCHECK(klass->IsArrayClass()) << PrettyClass(klass);
-    return mirror::Array::Alloc(self, klass, component_count);
+    return false;  // Failure
   }
+  if (access_check) {
+    mirror::Class* referrer_klass = referrer->GetDeclaringClass();
+    if (UNLIKELY(!referrer_klass->CanAccess(klass))) {
+      ThrowIllegalAccessErrorClass(referrer_klass, klass);
+      return false;  // Failure
+    }
+  }
+  DCHECK(klass->IsArrayClass()) << PrettyClass(klass);
+  *klass_ptr = klass;
+  return true;
+}
+
+// Helper function to allocate array for FILLED_NEW_ARRAY.
+mirror::Array* CheckAndAllocArrayFromCode(uint32_t type_idx, mirror::ArtMethod* referrer,
+                                          int32_t component_count, Thread* self,
+                                          bool access_check) {
+  mirror::Class* klass;
+  if (UNLIKELY(!CheckFilledNewArrayAlloc(type_idx, referrer, component_count, self, access_check, &klass))) {
+    return NULL;
+  }
+  return mirror::Array::AllocUninstrumented(self, klass, component_count);
+}
+
+// Helper function to allocate array for FILLED_NEW_ARRAY.
+mirror::Array* CheckAndAllocArrayFromCodeInstrumented(uint32_t type_idx, mirror::ArtMethod* referrer,
+                                                      int32_t component_count, Thread* self,
+                                                      bool access_check) {
+  mirror::Class* klass;
+  if (UNLIKELY(!CheckFilledNewArrayAlloc(type_idx, referrer, component_count, self, access_check, &klass))) {
+    return NULL;
+  }
+  return mirror::Array::AllocInstrumented(self, klass, component_count);
 }
 
 mirror::ArtField* FindFieldFromCode(uint32_t field_idx, const mirror::ArtMethod* referrer,
@@ -405,5 +427,4 @@
     return zero;
   }
 }
-
 }  // namespace art
diff --git a/runtime/entrypoints/entrypoint_utils.h b/runtime/entrypoints/entrypoint_utils.h
index fff7b71..e87dc96 100644
--- a/runtime/entrypoints/entrypoint_utils.h
+++ b/runtime/entrypoints/entrypoint_utils.h
@@ -40,6 +40,42 @@
   class Object;
 }  // namespace mirror
 
+static inline bool CheckObjectAlloc(uint32_t type_idx, mirror::ArtMethod* method,
+                                    Thread* self,
+                                    bool access_check,
+                                    mirror::Class** klass_ptr)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  mirror::Class* klass = method->GetDexCacheResolvedTypes()->GetWithoutChecks(type_idx);
+  Runtime* runtime = Runtime::Current();
+  if (UNLIKELY(klass == NULL)) {
+    klass = runtime->GetClassLinker()->ResolveType(type_idx, method);
+    if (klass == NULL) {
+      DCHECK(self->IsExceptionPending());
+      return false;  // Failure
+    }
+  }
+  if (access_check) {
+    if (UNLIKELY(!klass->IsInstantiable())) {
+      ThrowLocation throw_location = self->GetCurrentLocationForThrow();
+      self->ThrowNewException(throw_location, "Ljava/lang/InstantiationError;",
+                              PrettyDescriptor(klass).c_str());
+      return false;  // Failure
+    }
+    mirror::Class* referrer = method->GetDeclaringClass();
+    if (UNLIKELY(!referrer->CanAccess(klass))) {
+      ThrowIllegalAccessErrorClass(referrer, klass);
+      return false;  // Failure
+    }
+  }
+  if (!klass->IsInitialized() &&
+      !runtime->GetClassLinker()->EnsureInitialized(klass, true, true)) {
+    DCHECK(self->IsExceptionPending());
+    return false;  // Failure
+  }
+  *klass_ptr = klass;
+  return true;
+}
+
 // Given the context of a calling Method, use its DexCache to resolve a type to a Class. If it
 // cannot be resolved, throw an error. If it can, use it to create an instance.
 // When verification/compiler hasn't been able to verify access, optionally perform an access
@@ -48,34 +84,50 @@
                                                   Thread* self,
                                                   bool access_check)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  mirror::Class* klass = method->GetDexCacheResolvedTypes()->Get(type_idx);
-  Runtime* runtime = Runtime::Current();
-  if (UNLIKELY(klass == NULL)) {
-    klass = runtime->GetClassLinker()->ResolveType(type_idx, method);
-    if (klass == NULL) {
-      DCHECK(self->IsExceptionPending());
-      return NULL;  // Failure
+  mirror::Class* klass;
+  if (UNLIKELY(!CheckObjectAlloc(type_idx, method, self, access_check, &klass))) {
+    return NULL;
+  }
+  return klass->AllocObjectUninstrumented(self);
+}
+
+static inline mirror::Object* AllocObjectFromCodeInstrumented(uint32_t type_idx, mirror::ArtMethod* method,
+                                                              Thread* self,
+                                                              bool access_check)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  mirror::Class* klass;
+  if (UNLIKELY(!CheckObjectAlloc(type_idx, method, self, access_check, &klass))) {
+    return NULL;
+  }
+  return klass->AllocObjectInstrumented(self);
+}
+
+static inline bool CheckArrayAlloc(uint32_t type_idx, mirror::ArtMethod* method,
+                                   int32_t component_count,
+                                   bool access_check, mirror::Class** klass_ptr)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  if (UNLIKELY(component_count < 0)) {
+    ThrowNegativeArraySizeException(component_count);
+    return false;  // Failure
+  }
+  mirror::Class* klass = method->GetDexCacheResolvedTypes()->GetWithoutChecks(type_idx);
+  if (UNLIKELY(klass == NULL)) {  // Not in dex cache so try to resolve
+    klass = Runtime::Current()->GetClassLinker()->ResolveType(type_idx, method);
+    if (klass == NULL) {  // Error
+      DCHECK(Thread::Current()->IsExceptionPending());
+      return false;  // Failure
     }
+    CHECK(klass->IsArrayClass()) << PrettyClass(klass);
   }
   if (access_check) {
-    if (UNLIKELY(!klass->IsInstantiable())) {
-      ThrowLocation throw_location = self->GetCurrentLocationForThrow();
-      self->ThrowNewException(throw_location, "Ljava/lang/InstantiationError;",
-                              PrettyDescriptor(klass).c_str());
-      return NULL;  // Failure
-    }
     mirror::Class* referrer = method->GetDeclaringClass();
     if (UNLIKELY(!referrer->CanAccess(klass))) {
       ThrowIllegalAccessErrorClass(referrer, klass);
-      return NULL;  // Failure
+      return false;  // Failure
     }
   }
-  if (!klass->IsInitialized() &&
-      !runtime->GetClassLinker()->EnsureInitialized(klass, true, true)) {
-    DCHECK(self->IsExceptionPending());
-    return NULL;  // Failure
-  }
-  return klass->AllocObject(self);
+  *klass_ptr = klass;
+  return true;
 }
 
 // Given the context of a calling Method, use its DexCache to resolve a type to an array Class. If
@@ -86,27 +138,22 @@
                                                 int32_t component_count,
                                                 Thread* self, bool access_check)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
-  if (UNLIKELY(component_count < 0)) {
-    ThrowNegativeArraySizeException(component_count);
-    return NULL;  // Failure
+  mirror::Class* klass;
+  if (UNLIKELY(!CheckArrayAlloc(type_idx, method, component_count, access_check, &klass))) {
+    return NULL;
   }
-  mirror::Class* klass = method->GetDexCacheResolvedTypes()->Get(type_idx);
-  if (UNLIKELY(klass == NULL)) {  // Not in dex cache so try to resolve
-    klass = Runtime::Current()->GetClassLinker()->ResolveType(type_idx, method);
-    if (klass == NULL) {  // Error
-      DCHECK(Thread::Current()->IsExceptionPending());
-      return NULL;  // Failure
-    }
-    CHECK(klass->IsArrayClass()) << PrettyClass(klass);
+  return mirror::Array::AllocUninstrumented(self, klass, component_count);
+}
+
+static inline mirror::Array* AllocArrayFromCodeInstrumented(uint32_t type_idx, mirror::ArtMethod* method,
+                                                            int32_t component_count,
+                                                            Thread* self, bool access_check)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  mirror::Class* klass;
+  if (UNLIKELY(!CheckArrayAlloc(type_idx, method, component_count, access_check, &klass))) {
+    return NULL;
   }
-  if (access_check) {
-    mirror::Class* referrer = method->GetDeclaringClass();
-    if (UNLIKELY(!referrer->CanAccess(klass))) {
-      ThrowIllegalAccessErrorClass(referrer, klass);
-      return NULL;  // Failure
-    }
-  }
-  return mirror::Array::Alloc(self, klass, component_count);
+  return mirror::Array::AllocInstrumented(self, klass, component_count);
 }
 
 extern mirror::Array* CheckAndAllocArrayFromCode(uint32_t type_idx, mirror::ArtMethod* method,
@@ -114,6 +161,11 @@
                                                  Thread* self, bool access_check)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+extern mirror::Array* CheckAndAllocArrayFromCodeInstrumented(uint32_t type_idx, mirror::ArtMethod* method,
+                                                             int32_t component_count,
+                                                             Thread* self, bool access_check)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
 // Type of find field operation for fast and slow case.
 enum FindFieldType {
   InstanceObjectRead,
diff --git a/runtime/entrypoints/quick/quick_alloc_entrypoints.cc b/runtime/entrypoints/quick/quick_alloc_entrypoints.cc
index 420e63a..6f7b1ab 100644
--- a/runtime/entrypoints/quick/quick_alloc_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_alloc_entrypoints.cc
@@ -76,4 +76,57 @@
   return CheckAndAllocArrayFromCode(type_idx, method, component_count, self, true);
 }
 
+extern "C" mirror::Object* artAllocObjectFromCodeInstrumented(uint32_t type_idx, mirror::ArtMethod* method,
+                                                              Thread* self, mirror::ArtMethod** sp)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
+  return AllocObjectFromCodeInstrumented(type_idx, method, self, false);
+}
+
+extern "C" mirror::Object* artAllocObjectFromCodeWithAccessCheckInstrumented(uint32_t type_idx,
+                                                                             mirror::ArtMethod* method,
+                                                                             Thread* self,
+                                                                             mirror::ArtMethod** sp)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
+  return AllocObjectFromCodeInstrumented(type_idx, method, self, true);
+}
+
+extern "C" mirror::Array* artAllocArrayFromCodeInstrumented(uint32_t type_idx, mirror::ArtMethod* method,
+                                                            int32_t component_count, Thread* self,
+                                                              mirror::ArtMethod** sp)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
+  return AllocArrayFromCodeInstrumented(type_idx, method, component_count, self, false);
+}
+
+extern "C" mirror::Array* artAllocArrayFromCodeWithAccessCheckInstrumented(uint32_t type_idx,
+                                                                           mirror::ArtMethod* method,
+                                                                           int32_t component_count,
+                                                                           Thread* self,
+                                                                           mirror::ArtMethod** sp)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
+  return AllocArrayFromCodeInstrumented(type_idx, method, component_count, self, true);
+}
+
+extern "C" mirror::Array* artCheckAndAllocArrayFromCodeInstrumented(uint32_t type_idx,
+                                                                    mirror::ArtMethod* method,
+                                                                    int32_t component_count, Thread* self,
+                                                                    mirror::ArtMethod** sp)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
+  return CheckAndAllocArrayFromCodeInstrumented(type_idx, method, component_count, self, false);
+}
+
+extern "C" mirror::Array* artCheckAndAllocArrayFromCodeWithAccessCheckInstrumented(uint32_t type_idx,
+                                                                                   mirror::ArtMethod* method,
+                                                                                   int32_t component_count,
+                                                                                   Thread* self,
+                                                                                   mirror::ArtMethod** sp)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  FinishCalleeSaveFrameSetup(self, sp, Runtime::kRefsOnly);
+  return CheckAndAllocArrayFromCodeInstrumented(type_idx, method, component_count, self, true);
+}
+
 }  // namespace art
diff --git a/runtime/gc/heap-inl.h b/runtime/gc/heap-inl.h
new file mode 100644
index 0000000..b7ef77c
--- /dev/null
+++ b/runtime/gc/heap-inl.h
@@ -0,0 +1,188 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_GC_HEAP_INL_H_
+#define ART_RUNTIME_GC_HEAP_INL_H_
+
+#include "heap.h"
+
+#include "debugger.h"
+#include "gc/space/dlmalloc_space-inl.h"
+#include "gc/space/large_object_space.h"
+#include "object_utils.h"
+#include "runtime.h"
+#include "thread.h"
+#include "thread-inl.h"
+
+namespace art {
+namespace gc {
+
+inline mirror::Object* Heap::AllocObjectUninstrumented(Thread* self, mirror::Class* c, size_t byte_count) {
+  DebugCheckPreconditionsForAllobObject(c, byte_count);
+  mirror::Object* obj;
+  size_t bytes_allocated;
+  AllocationTimer alloc_timer(this, &obj);
+  bool large_object_allocation = TryAllocLargeObjectUninstrumented(self, c, byte_count,
+                                                                   &obj, &bytes_allocated);
+  if (LIKELY(!large_object_allocation)) {
+    // Non-large object allocation.
+    obj = AllocateUninstrumented(self, alloc_space_, byte_count, &bytes_allocated);
+    // Ensure that we did not allocate into a zygote space.
+    DCHECK(obj == NULL || !have_zygote_space_ || !FindSpaceFromObject(obj, false)->IsZygoteSpace());
+  }
+  if (LIKELY(obj != NULL)) {
+    obj->SetClass(c);
+    // Record allocation after since we want to use the atomic add for the atomic fence to guard
+    // the SetClass since we do not want the class to appear NULL in another thread.
+    size_t new_num_bytes_allocated = RecordAllocationUninstrumented(bytes_allocated, obj);
+    DCHECK(!Dbg::IsAllocTrackingEnabled());
+    CheckConcurrentGC(self, new_num_bytes_allocated, obj);
+    if (kDesiredHeapVerification > kNoHeapVerification) {
+      VerifyObject(obj);
+    }
+    return obj;
+  }
+  ThrowOutOfMemoryError(self, byte_count, large_object_allocation);
+  return NULL;
+}
+
+inline size_t Heap::RecordAllocationUninstrumented(size_t size, mirror::Object* obj) {
+  DCHECK(obj != NULL);
+  DCHECK_GT(size, 0u);
+  size_t old_num_bytes_allocated = static_cast<size_t>(num_bytes_allocated_.fetch_add(size));
+
+  DCHECK(!Runtime::Current()->HasStatsEnabled());
+
+  // This is safe to do since the GC will never free objects which are neither in the allocation
+  // stack or the live bitmap.
+  while (!allocation_stack_->AtomicPushBack(obj)) {
+    CollectGarbageInternal(collector::kGcTypeSticky, kGcCauseForAlloc, false);
+  }
+
+  return old_num_bytes_allocated + size;
+}
+
+inline mirror::Object* Heap::TryToAllocateUninstrumented(Thread* self, space::AllocSpace* space, size_t alloc_size,
+                                                         bool grow, size_t* bytes_allocated) {
+  if (UNLIKELY(IsOutOfMemoryOnAllocation(alloc_size, grow))) {
+    return NULL;
+  }
+  DCHECK(!running_on_valgrind_);
+  return space->Alloc(self, alloc_size, bytes_allocated);
+}
+
+// DlMallocSpace-specific version.
+inline mirror::Object* Heap::TryToAllocateUninstrumented(Thread* self, space::DlMallocSpace* space, size_t alloc_size,
+                                                         bool grow, size_t* bytes_allocated) {
+  if (UNLIKELY(IsOutOfMemoryOnAllocation(alloc_size, grow))) {
+    return NULL;
+  }
+  DCHECK(!running_on_valgrind_);
+  return space->AllocNonvirtual(self, alloc_size, bytes_allocated);
+}
+
+template <class T>
+inline mirror::Object* Heap::AllocateUninstrumented(Thread* self, T* space, size_t alloc_size,
+                                                    size_t* bytes_allocated) {
+  // Since allocation can cause a GC which will need to SuspendAll, make sure all allocations are
+  // done in the runnable state where suspension is expected.
+  DCHECK_EQ(self->GetState(), kRunnable);
+  self->AssertThreadSuspensionIsAllowable();
+
+  mirror::Object* ptr = TryToAllocateUninstrumented(self, space, alloc_size, false, bytes_allocated);
+  if (LIKELY(ptr != NULL)) {
+    return ptr;
+  }
+  return AllocateInternalWithGc(self, space, alloc_size, bytes_allocated);
+}
+
+inline bool Heap::TryAllocLargeObjectUninstrumented(Thread* self, mirror::Class* c, size_t byte_count,
+                                                    mirror::Object** obj_ptr, size_t* bytes_allocated) {
+  bool large_object_allocation = ShouldAllocLargeObject(c, byte_count);
+  if (UNLIKELY(large_object_allocation)) {
+    mirror::Object* obj = AllocateUninstrumented(self, large_object_space_, byte_count, bytes_allocated);
+    // Make sure that our large object didn't get placed anywhere within the space interval or else
+    // it breaks the immune range.
+    DCHECK(obj == NULL ||
+           reinterpret_cast<byte*>(obj) < continuous_spaces_.front()->Begin() ||
+           reinterpret_cast<byte*>(obj) >= continuous_spaces_.back()->End());
+    *obj_ptr = obj;
+  }
+  return large_object_allocation;
+}
+
+inline void Heap::DebugCheckPreconditionsForAllobObject(mirror::Class* c, size_t byte_count) {
+  DCHECK(c == NULL || (c->IsClassClass() && byte_count >= sizeof(mirror::Class)) ||
+         (c->IsVariableSize() || c->GetObjectSize() == byte_count) ||
+         ClassHelper(c).GetDescriptorAsStringPiece().length() == 0);
+  DCHECK_GE(byte_count, sizeof(mirror::Object));
+}
+
+inline Heap::AllocationTimer::AllocationTimer(Heap* heap, mirror::Object** allocated_obj_ptr)
+    : heap_(heap), allocated_obj_ptr_(allocated_obj_ptr) {
+  if (kMeasureAllocationTime) {
+    allocation_start_time_ = NanoTime() / kTimeAdjust;
+  }
+}
+
+inline Heap::AllocationTimer::~AllocationTimer() {
+  if (kMeasureAllocationTime) {
+    mirror::Object* allocated_obj = *allocated_obj_ptr_;
+    // Only if the allocation succeeded, record the time.
+    if (allocated_obj != NULL) {
+      uint64_t allocation_end_time = NanoTime() / kTimeAdjust;
+      heap_->total_allocation_time_.fetch_add(allocation_end_time - allocation_start_time_);
+    }
+  }
+};
+
+inline bool Heap::ShouldAllocLargeObject(mirror::Class* c, size_t byte_count) {
+  // We need to have a zygote space or else our newly allocated large object can end up in the
+  // Zygote resulting in it being prematurely freed.
+  // We can only do this for primitive objects since large objects will not be within the card table
+  // range. This also means that we rely on SetClass not dirtying the object's card.
+  return byte_count >= kLargeObjectThreshold && have_zygote_space_ && c->IsPrimitiveArray();
+}
+
+inline bool Heap::IsOutOfMemoryOnAllocation(size_t alloc_size, bool grow) {
+  size_t new_footprint = num_bytes_allocated_ + alloc_size;
+  if (UNLIKELY(new_footprint > max_allowed_footprint_)) {
+    if (UNLIKELY(new_footprint > growth_limit_)) {
+      return true;
+    }
+    if (!concurrent_gc_) {
+      if (!grow) {
+        return true;
+      } else {
+        max_allowed_footprint_ = new_footprint;
+      }
+    }
+  }
+  return false;
+}
+
+inline void Heap::CheckConcurrentGC(Thread* self, size_t new_num_bytes_allocated, mirror::Object* obj) {
+  if (UNLIKELY(new_num_bytes_allocated >= concurrent_start_bytes_)) {
+    // The SirtRef is necessary since the calls in RequestConcurrentGC are a safepoint.
+    SirtRef<mirror::Object> ref(self, obj);
+    RequestConcurrentGC(self);
+  }
+}
+
+}  // namespace gc
+}  // namespace art
+
+#endif  // ART_RUNTIME_GC_HEAP_INL_H_
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index cefde04..c0e46ac 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -39,6 +39,7 @@
 #include "gc/space/image_space.h"
 #include "gc/space/large_object_space.h"
 #include "gc/space/space-inl.h"
+#include "heap-inl.h"
 #include "image.h"
 #include "invoke_arg_array_builder.h"
 #include "mirror/art_field-inl.h"
@@ -63,8 +64,6 @@
 static constexpr bool kDumpGcPerformanceOnShutdown = false;
 // Minimum amount of remaining bytes before a concurrent GC is triggered.
 static constexpr size_t kMinConcurrentRemainingBytes = 128 * KB;
-// If true, measure the total allocation time.
-static constexpr bool kMeasureAllocationTime = false;
 
 Heap::Heap(size_t initial_size, size_t growth_limit, size_t min_free, size_t max_free,
            double target_utilization, size_t capacity, const std::string& original_image_file_name,
@@ -105,7 +104,6 @@
           :  std::numeric_limits<size_t>::max()),
       total_bytes_freed_ever_(0),
       total_objects_freed_ever_(0),
-      large_object_threshold_(3 * kPageSize),
       num_bytes_allocated_(0),
       native_bytes_allocated_(0),
       gc_memory_overhead_(0),
@@ -238,6 +236,11 @@
   }
 
   CHECK_NE(max_allowed_footprint_, 0U);
+
+  if (running_on_valgrind_) {
+    Runtime::Current()->InstrumentQuickAllocEntryPoints();
+  }
+
   if (VLOG_IS_ON(heap) || VLOG_IS_ON(startup)) {
     LOG(INFO) << "Heap() exiting";
   }
@@ -551,81 +554,69 @@
   }
 }
 
-mirror::Object* Heap::AllocObject(Thread* self, mirror::Class* c, size_t byte_count) {
-  DCHECK(c == NULL || (c->IsClassClass() && byte_count >= sizeof(mirror::Class)) ||
-         (c->IsVariableSize() || c->GetObjectSize() == byte_count) ||
-         ClassHelper(c).GetDescriptorAsStringPiece().length() == 0);
-  DCHECK_GE(byte_count, sizeof(mirror::Object));
-
-  mirror::Object* obj = NULL;
-  size_t bytes_allocated = 0;
-  uint64_t allocation_start = 0;
-  if (UNLIKELY(kMeasureAllocationTime)) {
-    allocation_start = NanoTime() / kTimeAdjust;
+void Heap::ThrowOutOfMemoryError(Thread* self, size_t byte_count, bool large_object_allocation) {
+  std::ostringstream oss;
+  int64_t total_bytes_free = GetFreeMemory();
+  oss << "Failed to allocate a " << byte_count << " byte allocation with " << total_bytes_free
+      << " free bytes";
+  // If the allocation failed due to fragmentation, print out the largest continuous allocation.
+  if (!large_object_allocation && total_bytes_free >= byte_count) {
+    size_t max_contiguous_allocation = 0;
+    for (const auto& space : continuous_spaces_) {
+      if (space->IsDlMallocSpace()) {
+        space->AsDlMallocSpace()->Walk(MSpaceChunkCallback, &max_contiguous_allocation);
+      }
+    }
+    oss << "; failed due to fragmentation (largest possible contiguous allocation "
+        <<  max_contiguous_allocation << " bytes)";
   }
+  self->ThrowOutOfMemoryError(oss.str().c_str());
+}
 
-  // We need to have a zygote space or else our newly allocated large object can end up in the
-  // Zygote resulting in it being prematurely freed.
-  // We can only do this for primitive objects since large objects will not be within the card table
-  // range. This also means that we rely on SetClass not dirtying the object's card.
-  bool large_object_allocation =
-      byte_count >= large_object_threshold_ && have_zygote_space_ && c->IsPrimitiveArray();
+inline bool Heap::TryAllocLargeObjectInstrumented(Thread* self, mirror::Class* c, size_t byte_count,
+                                                  mirror::Object** obj_ptr, size_t* bytes_allocated) {
+  bool large_object_allocation = ShouldAllocLargeObject(c, byte_count);
   if (UNLIKELY(large_object_allocation)) {
-    obj = Allocate(self, large_object_space_, byte_count, &bytes_allocated);
+    mirror::Object* obj = AllocateInstrumented(self, large_object_space_, byte_count, bytes_allocated);
     // Make sure that our large object didn't get placed anywhere within the space interval or else
     // it breaks the immune range.
     DCHECK(obj == NULL ||
            reinterpret_cast<byte*>(obj) < continuous_spaces_.front()->Begin() ||
            reinterpret_cast<byte*>(obj) >= continuous_spaces_.back()->End());
-  } else {
-    obj = Allocate(self, alloc_space_, byte_count, &bytes_allocated);
+    *obj_ptr = obj;
+  }
+  return large_object_allocation;
+}
+
+mirror::Object* Heap::AllocObjectInstrumented(Thread* self, mirror::Class* c, size_t byte_count) {
+  DebugCheckPreconditionsForAllobObject(c, byte_count);
+  mirror::Object* obj;
+  size_t bytes_allocated;
+  AllocationTimer alloc_timer(this, &obj);
+  bool large_object_allocation = TryAllocLargeObjectInstrumented(self, c, byte_count,
+                                                                 &obj, &bytes_allocated);
+  if (LIKELY(!large_object_allocation)) {
+    // Non-large object allocation.
+    obj = AllocateInstrumented(self, alloc_space_, byte_count, &bytes_allocated);
     // Ensure that we did not allocate into a zygote space.
     DCHECK(obj == NULL || !have_zygote_space_ || !FindSpaceFromObject(obj, false)->IsZygoteSpace());
   }
-
   if (LIKELY(obj != NULL)) {
     obj->SetClass(c);
-
     // Record allocation after since we want to use the atomic add for the atomic fence to guard
     // the SetClass since we do not want the class to appear NULL in another thread.
-    RecordAllocation(bytes_allocated, obj);
-
+    size_t new_num_bytes_allocated = RecordAllocationInstrumented(bytes_allocated, obj);
     if (Dbg::IsAllocTrackingEnabled()) {
       Dbg::RecordAllocation(c, byte_count);
     }
-    if (UNLIKELY(static_cast<size_t>(num_bytes_allocated_) >= concurrent_start_bytes_)) {
-      // The SirtRef is necessary since the calls in RequestConcurrentGC are a safepoint.
-      SirtRef<mirror::Object> ref(self, obj);
-      RequestConcurrentGC(self);
-    }
+    CheckConcurrentGC(self, new_num_bytes_allocated, obj);
     if (kDesiredHeapVerification > kNoHeapVerification) {
       VerifyObject(obj);
     }
-
-    if (UNLIKELY(kMeasureAllocationTime)) {
-      total_allocation_time_.fetch_add(NanoTime() / kTimeAdjust - allocation_start);
-    }
-
     return obj;
-  } else {
-    std::ostringstream oss;
-    int64_t total_bytes_free = GetFreeMemory();
-    oss << "Failed to allocate a " << byte_count << " byte allocation with " << total_bytes_free
-        << " free bytes";
-    // If the allocation failed due to fragmentation, print out the largest continuous allocation.
-    if (!large_object_allocation && total_bytes_free >= byte_count) {
-      size_t max_contiguous_allocation = 0;
-      for (const auto& space : continuous_spaces_) {
-        if (space->IsDlMallocSpace()) {
-          space->AsDlMallocSpace()->Walk(MSpaceChunkCallback, &max_contiguous_allocation);
-        }
-      }
-      oss << "; failed due to fragmentation (largest possible contiguous allocation "
-          <<  max_contiguous_allocation << " bytes)";
-    }
-    self->ThrowOutOfMemoryError(oss.str().c_str());
-    return NULL;
   }
+  ThrowOutOfMemoryError(self, byte_count, large_object_allocation);
+  return NULL;
 }
 
 bool Heap::IsHeapAddress(const mirror::Object* obj) {
@@ -768,10 +759,10 @@
   GetLiveBitmap()->Walk(Heap::VerificationCallback, this);
 }
 
-inline void Heap::RecordAllocation(size_t size, mirror::Object* obj) {
+inline size_t Heap::RecordAllocationInstrumented(size_t size, mirror::Object* obj) {
   DCHECK(obj != NULL);
   DCHECK_GT(size, 0u);
-  num_bytes_allocated_.fetch_add(size);
+  size_t old_num_bytes_allocated = static_cast<size_t>(num_bytes_allocated_.fetch_add(size));
 
   if (Runtime::Current()->HasStatsEnabled()) {
     RuntimeStats* thread_stats = Thread::Current()->GetStats();
@@ -789,6 +780,8 @@
   while (!allocation_stack_->AtomicPushBack(obj)) {
     CollectGarbageInternal(collector::kGcTypeSticky, kGcCauseForAlloc, false);
   }
+
+  return old_num_bytes_allocated + size;
 }
 
 void Heap::RecordFree(size_t freed_objects, size_t freed_bytes) {
@@ -807,25 +800,8 @@
   }
 }
 
-inline bool Heap::IsOutOfMemoryOnAllocation(size_t alloc_size, bool grow) {
-  size_t new_footprint = num_bytes_allocated_ + alloc_size;
-  if (UNLIKELY(new_footprint > max_allowed_footprint_)) {
-    if (UNLIKELY(new_footprint > growth_limit_)) {
-      return true;
-    }
-    if (!concurrent_gc_) {
-      if (!grow) {
-        return true;
-      } else {
-        max_allowed_footprint_ = new_footprint;
-      }
-    }
-  }
-  return false;
-}
-
-inline mirror::Object* Heap::TryToAllocate(Thread* self, space::AllocSpace* space, size_t alloc_size,
-                                           bool grow, size_t* bytes_allocated) {
+inline mirror::Object* Heap::TryToAllocateInstrumented(Thread* self, space::AllocSpace* space, size_t alloc_size,
+                                                       bool grow, size_t* bytes_allocated) {
   if (UNLIKELY(IsOutOfMemoryOnAllocation(alloc_size, grow))) {
     return NULL;
   }
@@ -833,8 +809,8 @@
 }
 
 // DlMallocSpace-specific version.
-inline mirror::Object* Heap::TryToAllocate(Thread* self, space::DlMallocSpace* space, size_t alloc_size,
-                                           bool grow, size_t* bytes_allocated) {
+inline mirror::Object* Heap::TryToAllocateInstrumented(Thread* self, space::DlMallocSpace* space, size_t alloc_size,
+                                                       bool grow, size_t* bytes_allocated) {
   if (UNLIKELY(IsOutOfMemoryOnAllocation(alloc_size, grow))) {
     return NULL;
   }
@@ -846,15 +822,15 @@
 }
 
 template <class T>
-inline mirror::Object* Heap::Allocate(Thread* self, T* space, size_t alloc_size,
-                                      size_t* bytes_allocated) {
+inline mirror::Object* Heap::AllocateInstrumented(Thread* self, T* space, size_t alloc_size,
+                                                  size_t* bytes_allocated) {
   // Since allocation can cause a GC which will need to SuspendAll, make sure all allocations are
   // done in the runnable state where suspension is expected.
   DCHECK_EQ(self->GetState(), kRunnable);
   self->AssertThreadSuspensionIsAllowable();
 
-  mirror::Object* ptr = TryToAllocate(self, space, alloc_size, false, bytes_allocated);
-  if (ptr != NULL) {
+  mirror::Object* ptr = TryToAllocateInstrumented(self, space, alloc_size, false, bytes_allocated);
+  if (LIKELY(ptr != NULL)) {
     return ptr;
   }
   return AllocateInternalWithGc(self, space, alloc_size, bytes_allocated);
@@ -869,7 +845,7 @@
   collector::GcType last_gc = WaitForConcurrentGcToComplete(self);
   if (last_gc != collector::kGcTypeNone) {
     // A GC was in progress and we blocked, retry allocation now that memory has been freed.
-    ptr = TryToAllocate(self, space, alloc_size, false, bytes_allocated);
+    ptr = TryToAllocateInstrumented(self, space, alloc_size, false, bytes_allocated);
     if (ptr != NULL) {
       return ptr;
     }
@@ -904,7 +880,7 @@
       i = static_cast<size_t>(gc_type_ran);
 
       // Did we free sufficient memory for the allocation to succeed?
-      ptr = TryToAllocate(self, space, alloc_size, false, bytes_allocated);
+      ptr = TryToAllocateInstrumented(self, space, alloc_size, false, bytes_allocated);
       if (ptr != NULL) {
         return ptr;
       }
@@ -913,7 +889,7 @@
 
   // Allocations have failed after GCs;  this is an exceptional state.
   // Try harder, growing the heap if necessary.
-  ptr = TryToAllocate(self, space, alloc_size, true, bytes_allocated);
+  ptr = TryToAllocateInstrumented(self, space, alloc_size, true, bytes_allocated);
   if (ptr != NULL) {
     return ptr;
   }
@@ -928,7 +904,7 @@
 
   // We don't need a WaitForConcurrentGcToComplete here either.
   CollectGarbageInternal(collector::kGcTypeFull, kGcCauseForAlloc, true);
-  return TryToAllocate(self, space, alloc_size, true, bytes_allocated);
+  return TryToAllocateInstrumented(self, space, alloc_size, true, bytes_allocated);
 }
 
 void Heap::SetTargetHeapUtilization(float target) {
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index 0ac3cf0..ffd3034 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -101,6 +101,11 @@
 };
 static constexpr HeapVerificationMode kDesiredHeapVerification = kNoHeapVerification;
 
+// If true, measure the total allocation time.
+static constexpr bool kMeasureAllocationTime = false;
+// Primitive arrays larger than this size are put in the large object space.
+static constexpr size_t kLargeObjectThreshold = 3 * kPageSize;
+
 class Heap {
  public:
   static constexpr size_t kDefaultInitialSize = 2 * MB;
@@ -129,7 +134,17 @@
 
   // Allocates and initializes storage for an object instance.
   mirror::Object* AllocObject(Thread* self, mirror::Class* klass, size_t num_bytes)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return AllocObjectInstrumented(self, klass, num_bytes);
+  }
+  mirror::Object* AllocObjectInstrumented(Thread* self, mirror::Class* klass, size_t num_bytes)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  mirror::Object* AllocObjectUninstrumented(Thread* self, mirror::Class* klass, size_t num_bytes)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  void DebugCheckPreconditionsForAllobObject(mirror::Class* c, size_t byte_count)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  void ThrowOutOfMemoryError(size_t byte_count, bool large_object_allocation);
 
   void RegisterNativeAllocation(int bytes)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -425,9 +440,24 @@
   void AddModUnionTable(accounting::ModUnionTable* mod_union_table);
 
  private:
+  bool TryAllocLargeObjectInstrumented(Thread* self, mirror::Class* c, size_t byte_count,
+                                       mirror::Object** obj_ptr, size_t* bytes_allocated)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  bool TryAllocLargeObjectUninstrumented(Thread* self, mirror::Class* c, size_t byte_count,
+                                         mirror::Object** obj_ptr, size_t* bytes_allocated)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  bool ShouldAllocLargeObject(mirror::Class* c, size_t byte_count);
+  void CheckConcurrentGC(Thread* self, size_t new_num_bytes_allocated, mirror::Object* obj);
+
   // Allocates uninitialized storage. Passing in a null space tries to place the object in the
   // large object space.
-  template <class T> mirror::Object* Allocate(Thread* self, T* space, size_t num_bytes, size_t* bytes_allocated)
+  template <class T> mirror::Object* AllocateInstrumented(Thread* self, T* space, size_t num_bytes,
+                                                          size_t* bytes_allocated)
+      LOCKS_EXCLUDED(Locks::thread_suspend_count_lock_)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  template <class T> mirror::Object* AllocateUninstrumented(Thread* self, T* space, size_t num_bytes,
+                                                            size_t* bytes_allocated)
       LOCKS_EXCLUDED(Locks::thread_suspend_count_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
@@ -439,17 +469,29 @@
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Try to allocate a number of bytes, this function never does any GCs.
-  mirror::Object* TryToAllocate(Thread* self, space::AllocSpace* space, size_t alloc_size, bool grow,
-                                size_t* bytes_allocated)
+  mirror::Object* TryToAllocateInstrumented(Thread* self, space::AllocSpace* space, size_t alloc_size,
+                                            bool grow, size_t* bytes_allocated)
       LOCKS_EXCLUDED(Locks::thread_suspend_count_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   // Try to allocate a number of bytes, this function never does any GCs. DlMallocSpace-specialized version.
-  mirror::Object* TryToAllocate(Thread* self, space::DlMallocSpace* space, size_t alloc_size, bool grow,
-                                size_t* bytes_allocated)
+  mirror::Object* TryToAllocateInstrumented(Thread* self, space::DlMallocSpace* space, size_t alloc_size,
+                                            bool grow, size_t* bytes_allocated)
       LOCKS_EXCLUDED(Locks::thread_suspend_count_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  mirror::Object* TryToAllocateUninstrumented(Thread* self, space::AllocSpace* space, size_t alloc_size,
+                                              bool grow, size_t* bytes_allocated)
+      LOCKS_EXCLUDED(Locks::thread_suspend_count_lock_)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  mirror::Object* TryToAllocateUninstrumented(Thread* self, space::DlMallocSpace* space, size_t alloc_size,
+                                              bool grow, size_t* bytes_allocated)
+      LOCKS_EXCLUDED(Locks::thread_suspend_count_lock_)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  void ThrowOutOfMemoryError(Thread* self, size_t byte_count, bool large_object_allocation)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
   bool IsOutOfMemoryOnAllocation(size_t alloc_size, bool grow);
 
   // Pushes a list of cleared references out to the managed heap.
@@ -459,7 +501,11 @@
   void RequestConcurrentGC(Thread* self) LOCKS_EXCLUDED(Locks::runtime_shutdown_lock_);
   bool IsGCRequestPending() const;
 
-  void RecordAllocation(size_t size, mirror::Object* object)
+  size_t RecordAllocationInstrumented(size_t size, mirror::Object* object)
+      LOCKS_EXCLUDED(GlobalSynchronization::heap_bitmap_lock_)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
+  size_t RecordAllocationUninstrumented(size_t size, mirror::Object* object)
       LOCKS_EXCLUDED(GlobalSynchronization::heap_bitmap_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
@@ -608,9 +654,6 @@
   // Since the heap was created, how many objects have been freed.
   size_t total_objects_freed_ever_;
 
-  // Primitive objects larger than this size are put in the large object space.
-  const size_t large_object_threshold_;
-
   // Number of bytes allocated.  Adjusted after each allocation and free.
   AtomicInteger num_bytes_allocated_;
 
@@ -712,6 +755,16 @@
   friend class ScopedHeapLock;
   friend class space::SpaceTest;
 
+  class AllocationTimer {
+   private:
+    Heap* heap_;
+    mirror::Object** allocated_obj_ptr_;
+    uint64_t allocation_start_time_;
+   public:
+    AllocationTimer(Heap* heap, mirror::Object** allocated_obj_ptr);
+    ~AllocationTimer();
+  };
+
   DISALLOW_IMPLICIT_CONSTRUCTORS(Heap);
 };
 
diff --git a/runtime/gc/space/dlmalloc_space-inl.h b/runtime/gc/space/dlmalloc_space-inl.h
index 5481141..242ef68 100644
--- a/runtime/gc/space/dlmalloc_space-inl.h
+++ b/runtime/gc/space/dlmalloc_space-inl.h
@@ -30,7 +30,7 @@
     MutexLock mu(self, lock_);
     obj = AllocWithoutGrowthLocked(num_bytes, bytes_allocated);
   }
-  if (obj != NULL) {
+  if (LIKELY(obj != NULL)) {
     // Zero freshly allocated memory, done while not holding the space's lock.
     memset(obj, 0, num_bytes);
   }
@@ -39,7 +39,7 @@
 
 inline mirror::Object* DlMallocSpace::AllocWithoutGrowthLocked(size_t num_bytes, size_t* bytes_allocated) {
   mirror::Object* result = reinterpret_cast<mirror::Object*>(mspace_malloc(mspace_, num_bytes));
-  if (result != NULL) {
+  if (LIKELY(result != NULL)) {
     if (kDebugSpaces) {
       CHECK(Contains(result)) << "Allocation (" << reinterpret_cast<void*>(result)
             << ") not in bounds of allocation space " << *this;
diff --git a/runtime/mirror/array-inl.h b/runtime/mirror/array-inl.h
index c7b370f..c60e714 100644
--- a/runtime/mirror/array-inl.h
+++ b/runtime/mirror/array-inl.h
@@ -20,6 +20,7 @@
 #include "array.h"
 
 #include "class.h"
+#include "gc/heap-inl.h"
 #include "thread.h"
 #include "utils.h"
 
@@ -35,8 +36,9 @@
   return header_size + data_size;
 }
 
-inline Array* Array::Alloc(Thread* self, Class* array_class, int32_t component_count,
-                           size_t component_size) {
+static inline size_t ComputeArraySize(Thread* self, Class* array_class, int32_t component_count,
+                                      size_t component_size)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   DCHECK(array_class != NULL);
   DCHECK_GE(component_count, 0);
   DCHECK(array_class->IsArrayClass());
@@ -51,21 +53,49 @@
     self->ThrowOutOfMemoryError(StringPrintf("%s of length %d would overflow",
                                              PrettyDescriptor(array_class).c_str(),
                                              component_count).c_str());
-    return NULL;
+    return 0;  // failure
   }
+  return size;
+}
 
-  gc::Heap* heap = Runtime::Current()->GetHeap();
-  Array* array = down_cast<Array*>(heap->AllocObject(self, array_class, size));
+static inline Array* SetArrayLength(Array* array, size_t length) {
   if (LIKELY(array != NULL)) {
     DCHECK(array->IsArrayInstance());
-    array->SetLength(component_count);
+    array->SetLength(length);
   }
   return array;
 }
 
-inline Array* Array::Alloc(Thread* self, Class* array_class, int32_t component_count) {
+inline Array* Array::AllocInstrumented(Thread* self, Class* array_class, int32_t component_count,
+                                       size_t component_size) {
+  size_t size = ComputeArraySize(self, array_class, component_count, component_size);
+  if (UNLIKELY(size == 0)) {
+    return NULL;
+  }
+  gc::Heap* heap = Runtime::Current()->GetHeap();
+  Array* array = down_cast<Array*>(heap->AllocObjectInstrumented(self, array_class, size));
+  return SetArrayLength(array, component_count);
+}
+
+inline Array* Array::AllocUninstrumented(Thread* self, Class* array_class, int32_t component_count,
+                                         size_t component_size) {
+  size_t size = ComputeArraySize(self, array_class, component_count, component_size);
+  if (UNLIKELY(size == 0)) {
+    return NULL;
+  }
+  gc::Heap* heap = Runtime::Current()->GetHeap();
+  Array* array = down_cast<Array*>(heap->AllocObjectUninstrumented(self, array_class, size));
+  return SetArrayLength(array, component_count);
+}
+
+inline Array* Array::AllocInstrumented(Thread* self, Class* array_class, int32_t component_count) {
   DCHECK(array_class->IsArrayClass());
-  return Alloc(self, array_class, component_count, array_class->GetComponentSize());
+  return AllocInstrumented(self, array_class, component_count, array_class->GetComponentSize());
+}
+
+inline Array* Array::AllocUninstrumented(Thread* self, Class* array_class, int32_t component_count) {
+  DCHECK(array_class->IsArrayClass());
+  return AllocUninstrumented(self, array_class, component_count, array_class->GetComponentSize());
 }
 
 }  // namespace mirror
diff --git a/runtime/mirror/array.h b/runtime/mirror/array.h
index db6132d..570dcaa 100644
--- a/runtime/mirror/array.h
+++ b/runtime/mirror/array.h
@@ -27,10 +27,24 @@
   // A convenience for code that doesn't know the component size,
   // and doesn't want to have to work it out itself.
   static Array* Alloc(Thread* self, Class* array_class, int32_t component_count)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return AllocInstrumented(self, array_class, component_count);
+  }
+  static Array* AllocUninstrumented(Thread* self, Class* array_class, int32_t component_count)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  static Array* AllocInstrumented(Thread* self, Class* array_class, int32_t component_count)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   static Array* Alloc(Thread* self, Class* array_class, int32_t component_count,
                       size_t component_size)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return AllocInstrumented(self, array_class, component_count, component_size);
+  }
+  static Array* AllocUninstrumented(Thread* self, Class* array_class, int32_t component_count,
+                                    size_t component_size)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  static Array* AllocInstrumented(Thread* self, Class* array_class, int32_t component_count,
+                                  size_t component_size)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   static Array* CreateMultiArray(Thread* self, Class* element_class, IntArray* dimensions)
diff --git a/runtime/mirror/class-inl.h b/runtime/mirror/class-inl.h
index 438ce81..88cffb7 100644
--- a/runtime/mirror/class-inl.h
+++ b/runtime/mirror/class-inl.h
@@ -23,6 +23,7 @@
 #include "art_method.h"
 #include "class_loader.h"
 #include "dex_cache.h"
+#include "gc/heap-inl.h"
 #include "iftable.h"
 #include "object_array-inl.h"
 #include "runtime.h"
@@ -342,13 +343,22 @@
   SetFieldObject(OFFSET_OF_OBJECT_MEMBER(Class, name_), name, false);
 }
 
-inline Object* Class::AllocObject(Thread* self) {
+inline void Class::CheckObjectAlloc() {
   DCHECK(!IsArrayClass()) << PrettyClass(this);
   DCHECK(IsInstantiable()) << PrettyClass(this);
   // TODO: decide whether we want this check. It currently fails during bootstrap.
   // DCHECK(!Runtime::Current()->IsStarted() || IsInitializing()) << PrettyClass(this);
   DCHECK_GE(this->object_size_, sizeof(Object));
-  return Runtime::Current()->GetHeap()->AllocObject(self, this, this->object_size_);
+}
+
+inline Object* Class::AllocObjectInstrumented(Thread* self) {
+  CheckObjectAlloc();
+  return Runtime::Current()->GetHeap()->AllocObjectInstrumented(self, this, this->object_size_);
+}
+
+inline Object* Class::AllocObjectUninstrumented(Thread* self) {
+  CheckObjectAlloc();
+  return Runtime::Current()->GetHeap()->AllocObjectUninstrumented(self, this, this->object_size_);
 }
 
 }  // namespace mirror
diff --git a/runtime/mirror/class.h b/runtime/mirror/class.h
index d97b603..4f8ab7d 100644
--- a/runtime/mirror/class.h
+++ b/runtime/mirror/class.h
@@ -371,7 +371,12 @@
   }
 
   // Creates a raw object instance but does not invoke the default constructor.
-  Object* AllocObject(Thread* self) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  Object* AllocObject(Thread* self) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    return AllocObjectInstrumented(self);
+  }
+
+  Object* AllocObjectUninstrumented(Thread* self) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+  Object* AllocObjectInstrumented(Thread* self) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   bool IsVariableSize() const {
     // Classes and arrays vary in size, and so the object_size_ field cannot
@@ -764,6 +769,8 @@
   bool IsAssignableFromArray(const Class* klass) const
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  void CheckObjectAlloc() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+
   // defining class loader, or NULL for the "bootstrap" system loader
   ClassLoader* class_loader_;
 
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 86a8f1b..b4ce37f 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -99,7 +99,8 @@
       instrumentation_(),
       use_compile_time_class_path_(false),
       main_thread_group_(NULL),
-      system_thread_group_(NULL) {
+      system_thread_group_(NULL),
+      quick_alloc_entry_points_instrumentation_counter_(0) {
   for (int i = 0; i < Runtime::kLastCalleeSaveType; i++) {
     callee_save_methods_[i] = NULL;
   }
@@ -1055,6 +1056,9 @@
     GetStats()->Clear(~0);
     // TODO: wouldn't it make more sense to clear _all_ threads' stats?
     Thread::Current()->GetStats()->Clear(~0);
+    InstrumentQuickAllocEntryPoints();
+  } else {
+    UninstrumentQuickAllocEntryPoints();
   }
   stats_enabled_ = new_state;
 }
@@ -1282,4 +1286,46 @@
   compile_time_class_paths_.Put(class_loader, class_path);
 }
 
+static void ResetQuickAllocEntryPointsForThread(Thread* thread, void* arg) {
+  thread->ResetQuickAllocEntryPointsForThread();
+}
+
+void SetQuickAllocEntryPointsInstrumented(bool instrumented);
+
+void Runtime::InstrumentQuickAllocEntryPoints() {
+  ThreadList* tl = thread_list_;
+  Thread* self = Thread::Current();
+  tl->SuspendAll();
+  {
+    MutexLock mu(self, *Locks::runtime_shutdown_lock_);
+    MutexLock mu2(self, *Locks::thread_list_lock_);
+    DCHECK_LE(quick_alloc_entry_points_instrumentation_counter_, 0);
+    int old_counter = quick_alloc_entry_points_instrumentation_counter_++;
+    if (old_counter == 0) {
+      // If it was disabled, enable it.
+      SetQuickAllocEntryPointsInstrumented(true);
+      tl->ForEach(ResetQuickAllocEntryPointsForThread, NULL);
+    }
+  }
+  tl->ResumeAll();
+}
+
+void Runtime::UninstrumentQuickAllocEntryPoints() {
+  ThreadList* tl = thread_list_;
+  Thread* self = Thread::Current();
+  tl->SuspendAll();
+  {
+    MutexLock mu(self, *Locks::runtime_shutdown_lock_);
+    MutexLock mu2(self, *Locks::thread_list_lock_);
+    DCHECK_LT(quick_alloc_entry_points_instrumentation_counter_, 0);
+    int new_counter = --quick_alloc_entry_points_instrumentation_counter_;
+    if (new_counter == 0) {
+      // Disable it if the counter becomes zero.
+      SetQuickAllocEntryPointsInstrumented(false);
+      tl->ForEach(ResetQuickAllocEntryPointsForThread, NULL);
+    }
+  }
+  tl->ResumeAll();
+}
+
 }  // namespace art
diff --git a/runtime/runtime.h b/runtime/runtime.h
index d73940b..552cfdf 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -398,6 +398,9 @@
   const std::vector<const DexFile*>& GetCompileTimeClassPath(jobject class_loader);
   void SetCompileTimeClassPath(jobject class_loader, std::vector<const DexFile*>& class_path);
 
+  void InstrumentQuickAllocEntryPoints();
+  void UninstrumentQuickAllocEntryPoints();
+
  private:
   static void InitPlatformSignalHandlers();
 
@@ -514,6 +517,8 @@
   jobject main_thread_group_;
   jobject system_thread_group_;
 
+  int quick_alloc_entry_points_instrumentation_counter_;
+
   DISALLOW_COPY_AND_ASSIGN(Runtime);
 };
 
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 7e79ce1..67a614f 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -108,6 +108,12 @@
                   &quick_entrypoints_);
 }
 
+void ResetQuickAllocEntryPoints(QuickEntryPoints* qpoints);
+
+void Thread::ResetQuickAllocEntryPointsForThread() {
+  ResetQuickAllocEntryPoints(&quick_entrypoints_);
+}
+
 void Thread::SetDeoptimizationShadowFrame(ShadowFrame* sf) {
   deoptimization_shadow_frame_ = sf;
 }
diff --git a/runtime/thread.h b/runtime/thread.h
index dbf9736..2d9e009 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -576,6 +576,8 @@
 
   void AtomicClearFlag(ThreadFlag flag);
 
+  void ResetQuickAllocEntryPointsForThread();
+
  private:
   // We have no control over the size of 'bool', but want our boolean fields
   // to be 4-byte quantities.