Revert "Make object allocation entrypoints only take a class."

960-default-smali64 is failing.

This reverts commit 2b615ba29c4dfcf54aaf44955f2eac60f5080b2e.

Change-Id: Iebb8ee5a917fa84c5f01660ce432798524d078ef
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index 4d4ebdc..a71ab4b 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -1124,23 +1124,28 @@
 // Generate the allocation entrypoints for each allocator.
 GENERATE_ALLOC_ENTRYPOINTS_FOR_EACH_ALLOCATOR
 
-// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_RESOLVED_OBJECT(_rosalloc, RosAlloc).
-ENTRY art_quick_alloc_object_resolved_rosalloc
+// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc).
+ENTRY art_quick_alloc_object_rosalloc
     // Fast path rosalloc allocation.
-    // r0: type/return value, r9: Thread::Current
-    // r1, r2, r3, r12: free.
+    // r0: type_idx/return value, r1: ArtMethod*, r9: Thread::Current
+    // r2, r3, r12: free.
+    ldr    r2, [r1, #ART_METHOD_DEX_CACHE_TYPES_OFFSET_32]    // Load dex cache resolved types array
+                                                              // Load the class (r2)
+    ldr    r2, [r2, r0, lsl #COMPRESSED_REFERENCE_SIZE_SHIFT]
+    cbz    r2, .Lart_quick_alloc_object_rosalloc_slow_path    // Check null class
+
     ldr    r3, [r9, #THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET]     // Check if the thread local
                                                               // allocation stack has room.
                                                               // TODO: consider using ldrd.
     ldr    r12, [r9, #THREAD_LOCAL_ALLOC_STACK_END_OFFSET]
     cmp    r3, r12
-    bhs    .Lart_quick_alloc_object_resolved_rosalloc_slow_path
+    bhs    .Lart_quick_alloc_object_rosalloc_slow_path
 
-    ldr    r3, [r0, #MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET]  // Load the object size (r3)
+    ldr    r3, [r2, #MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET]  // Load the object size (r3)
     cmp    r3, #ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE        // Check if the size is for a thread
                                                               // local allocation. Also does the
                                                               // initialized and finalizable checks.
-    bhs    .Lart_quick_alloc_object_resolved_rosalloc_slow_path
+    bhs    .Lart_quick_alloc_object_rosalloc_slow_path
                                                               // Compute the rosalloc bracket index
                                                               // from the size. Since the size is
                                                               // already aligned we can combine the
@@ -1154,7 +1159,7 @@
                                                               // Load the free list head (r3). This
                                                               // will be the return val.
     ldr    r3, [r12, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)]
-    cbz    r3, .Lart_quick_alloc_object_resolved_rosalloc_slow_path
+    cbz    r3, .Lart_quick_alloc_object_rosalloc_slow_path
     // "Point of no slow path". Won't go to the slow path from here on. OK to clobber r0 and r1.
     ldr    r1, [r3, #ROSALLOC_SLOT_NEXT_OFFSET]               // Load the next pointer of the head
                                                               // and update the list head with the
@@ -1167,8 +1172,8 @@
 #if ROSALLOC_SLOT_NEXT_OFFSET != MIRROR_OBJECT_CLASS_OFFSET
 #error "Class pointer needs to overwrite next pointer."
 #endif
-    POISON_HEAP_REF r0
-    str    r0, [r3, #MIRROR_OBJECT_CLASS_OFFSET]
+    POISON_HEAP_REF r2
+    str    r2, [r3, #MIRROR_OBJECT_CLASS_OFFSET]
                                                               // Fence. This is "ish" not "ishst" so
                                                               // that it also ensures ordering of
                                                               // the class status load with respect
@@ -1199,20 +1204,20 @@
     mov    r0, r3                                             // Set the return value and return.
     bx     lr
 
-.Lart_quick_alloc_object_resolved_rosalloc_slow_path:
+.Lart_quick_alloc_object_rosalloc_slow_path:
     SETUP_SAVE_REFS_ONLY_FRAME r2     @ save callee saves in case of GC
-    mov    r1, r9                     @ pass Thread::Current
-    bl     artAllocObjectFromCodeResolvedRosAlloc     @ (mirror::Class* cls, Thread*)
+    mov    r2, r9                     @ pass Thread::Current
+    bl     artAllocObjectFromCodeRosAlloc     @ (uint32_t type_idx, Method* method, Thread*)
     RESTORE_SAVE_REFS_ONLY_FRAME
     RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
-END art_quick_alloc_object_resolved_rosalloc
+END art_quick_alloc_object_rosalloc
 
-// The common fast path code for art_quick_alloc_object_resolved_tlab
-// and art_quick_alloc_object_resolved_region_tlab.
+// The common fast path code for art_quick_alloc_object_tlab and art_quick_alloc_object_region_tlab.
 //
-// r0: type r9: Thread::Current, r1, r2, r3, r12: free.
-// Need to preserve r0 to the slow path.
-.macro ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH slowPathLabel
+// r0: type_idx/return value, r1: ArtMethod*, r2: class, r9: Thread::Current, r3, r12: free.
+// Need to preserve r0 and r1 to the slow path.
+.macro ALLOC_OBJECT_TLAB_FAST_PATH slowPathLabel
+    cbz    r2, \slowPathLabel                                 // Check null class
                                                               // Load thread_local_pos (r12) and
                                                               // thread_local_end (r3) with ldrd.
                                                               // Check constraints for ldrd.
@@ -1227,14 +1232,14 @@
     // "Point of no slow path". Won't go to the slow path from here on. OK to clobber r0 and r1.
                                                               // Reload old thread_local_pos (r0)
                                                               // for the return value.
-    ldr    r2, [r9, #THREAD_LOCAL_POS_OFFSET]
-    add    r1, r2, r3
+    ldr    r0, [r9, #THREAD_LOCAL_POS_OFFSET]
+    add    r1, r0, r3
     str    r1, [r9, #THREAD_LOCAL_POS_OFFSET]                 // Store new thread_local_pos.
     ldr    r1, [r9, #THREAD_LOCAL_OBJECTS_OFFSET]             // Increment thread_local_objects.
     add    r1, r1, #1
     str    r1, [r9, #THREAD_LOCAL_OBJECTS_OFFSET]
-    POISON_HEAP_REF r0
-    str    r0, [r2, #MIRROR_OBJECT_CLASS_OFFSET]              // Store the class pointer.
+    POISON_HEAP_REF r2
+    str    r2, [r0, #MIRROR_OBJECT_CLASS_OFFSET]              // Store the class pointer.
                                                               // Fence. This is "ish" not "ishst" so
                                                               // that the code after this allocation
                                                               // site will see the right values in
@@ -1242,46 +1247,71 @@
                                                               // Alternatively we could use "ishst"
                                                               // if we use load-acquire for the
                                                               // object size load.)
-    mov    r0, r2
     dmb    ish
     bx     lr
 .endm
 
-// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_RESOLVED_OBJECT(_tlab, TLAB).
-ENTRY art_quick_alloc_object_resolved_tlab
+// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB).
+ENTRY art_quick_alloc_object_tlab
     // Fast path tlab allocation.
-    // r0: type, r9: Thread::Current
-    // r1, r2, r3, r12: free.
+    // r0: type_idx/return value, r1: ArtMethod*, r9: Thread::Current
+    // r2, r3, r12: free.
 #if defined(USE_READ_BARRIER)
     mvn    r0, #0                                             // Read barrier not supported here.
     bx     lr                                                 // Return -1.
 #endif
-    ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH .Lart_quick_alloc_object_resolved_tlab_slow_path
-.Lart_quick_alloc_object_resolved_tlab_slow_path:
+    ldr    r2, [r1, #ART_METHOD_DEX_CACHE_TYPES_OFFSET_32]    // Load dex cache resolved types array
+                                                              // Load the class (r2)
+    ldr    r2, [r2, r0, lsl #COMPRESSED_REFERENCE_SIZE_SHIFT]
+    ALLOC_OBJECT_TLAB_FAST_PATH .Lart_quick_alloc_object_tlab_slow_path
+.Lart_quick_alloc_object_tlab_slow_path:
     SETUP_SAVE_REFS_ONLY_FRAME r2                             // Save callee saves in case of GC.
-    mov    r1, r9                                             // Pass Thread::Current.
-    bl     artAllocObjectFromCodeResolvedTLAB                 // (mirror::Class* klass, Thread*)
+    mov    r2, r9                                             // Pass Thread::Current.
+    bl     artAllocObjectFromCodeTLAB    // (uint32_t type_idx, Method* method, Thread*)
     RESTORE_SAVE_REFS_ONLY_FRAME
     RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
-END art_quick_alloc_object_resolved_tlab
+END art_quick_alloc_object_tlab
 
-// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB)
-ENTRY art_quick_alloc_object_resolved_region_tlab
+// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab, RegionTLAB)
+ENTRY art_quick_alloc_object_region_tlab
     // Fast path tlab allocation.
-    // r0: type, r9: Thread::Current, r1, r2, r3, r12: free.
+    // r0: type_idx/return value, r1: ArtMethod*, r9: Thread::Current, r2, r3, r12: free.
 #if !defined(USE_READ_BARRIER)
     eor    r0, r0, r0                                         // Read barrier must be enabled here.
     sub    r0, r0, #1                                         // Return -1.
     bx     lr
 #endif
-    ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH .Lart_quick_alloc_object_resolved_region_tlab_slow_path
-.Lart_quick_alloc_object_resolved_region_tlab_slow_path:
+    ldr    r2, [r1, #ART_METHOD_DEX_CACHE_TYPES_OFFSET_32]    // Load dex cache resolved types array
+                                                              // Load the class (r2)
+    ldr    r2, [r2, r0, lsl #COMPRESSED_REFERENCE_SIZE_SHIFT]
+                                                              // Read barrier for class load.
+    ldr    r3, [r9, #THREAD_IS_GC_MARKING_OFFSET]
+    cbnz   r3, .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_marking
+.Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit:
+    ALLOC_OBJECT_TLAB_FAST_PATH .Lart_quick_alloc_object_region_tlab_slow_path
+.Lart_quick_alloc_object_region_tlab_class_load_read_barrier_marking:
+    cbz    r2, .Lart_quick_alloc_object_region_tlab_slow_path  // Null check for loading lock word.
+    // Check lock word for mark bit, if marked do the allocation.
+    ldr r3, [r2, MIRROR_OBJECT_LOCK_WORD_OFFSET]
+    ands r3, #LOCK_WORD_MARK_BIT_MASK_SHIFTED
+    bne .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit
+.Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path:
+                                                              // The read barrier slow path. Mark
+                                                              // the class.
+    push   {r0, r1, r3, lr}                                   // Save registers. r3 is pushed only
+                                                              // to align sp by 16 bytes.
+    mov    r0, r2                                             // Pass the class as the first param.
+    bl     artReadBarrierMark
+    mov    r2, r0                                             // Get the (marked) class back.
+    pop    {r0, r1, r3, lr}
+    b      .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit
+.Lart_quick_alloc_object_region_tlab_slow_path:
     SETUP_SAVE_REFS_ONLY_FRAME r2                             // Save callee saves in case of GC.
-    mov    r1, r9                                             // Pass Thread::Current.
-    bl     artAllocObjectFromCodeResolvedRegionTLAB           // (mirror::Class* klass, Thread*)
+    mov    r2, r9                                             // Pass Thread::Current.
+    bl     artAllocObjectFromCodeRegionTLAB    // (uint32_t type_idx, Method* method, Thread*)
     RESTORE_SAVE_REFS_ONLY_FRAME
     RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
-END art_quick_alloc_object_resolved_region_tlab
+END art_quick_alloc_object_region_tlab
 
     /*
      * Called by managed code when the value in rSUSPEND has been decremented to 0.
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index 8b1e038..b88515f 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -1669,6 +1669,7 @@
 // Generate the allocation entrypoints for each allocator.
 GENERATE_ALLOC_ENTRYPOINTS_FOR_NON_REGION_TLAB_ALLOCATORS
 // Comment out allocators that have arm64 specific asm.
+// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab, RegionTLAB) implemented in asm
 // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB)
 // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab, RegionTLAB)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
@@ -1681,23 +1682,27 @@
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_tlab, RegionTLAB)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_tlab, RegionTLAB)
 
-// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_rosalloc, RosAlloc).
-ENTRY art_quick_alloc_object_resolved_rosalloc
+// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc).
+ENTRY art_quick_alloc_object_rosalloc
     // Fast path rosalloc allocation.
-    // x0: type, xSELF(x19): Thread::Current
-    // x1-x7: free.
+    // x0: type_idx/return value, x1: ArtMethod*, xSELF(x19): Thread::Current
+    // x2-x7: free.
+    ldr    x2, [x1, #ART_METHOD_DEX_CACHE_TYPES_OFFSET_64]    // Load dex cache resolved types array
+                                                              // Load the class (x2)
+    ldr    w2, [x2, x0, lsl #COMPRESSED_REFERENCE_SIZE_SHIFT]
+    cbz    x2, .Lart_quick_alloc_object_rosalloc_slow_path    // Check null class
     ldr    x3, [xSELF, #THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET]  // Check if the thread local
                                                               // allocation stack has room.
                                                               // ldp won't work due to large offset.
     ldr    x4, [xSELF, #THREAD_LOCAL_ALLOC_STACK_END_OFFSET]
     cmp    x3, x4
-    bhs    .Lart_quick_alloc_object_resolved_rosalloc_slow_path
-    ldr    w3, [x0, #MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET]  // Load the object size (x3)
+    bhs    .Lart_quick_alloc_object_rosalloc_slow_path
+    ldr    w3, [x2, #MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET]  // Load the object size (x3)
     cmp    x3, #ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE        // Check if the size is for a thread
                                                               // local allocation. Also does the
                                                               // finalizable and initialization
                                                               // checks.
-    bhs    .Lart_quick_alloc_object_resolved_rosalloc_slow_path
+    bhs    .Lart_quick_alloc_object_rosalloc_slow_path
                                                               // Compute the rosalloc bracket index
                                                               // from the size. Since the size is
                                                               // already aligned we can combine the
@@ -1710,7 +1715,7 @@
                                                               // Load the free list head (x3). This
                                                               // will be the return val.
     ldr    x3, [x4, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)]
-    cbz    x3, .Lart_quick_alloc_object_resolved_rosalloc_slow_path
+    cbz    x3, .Lart_quick_alloc_object_rosalloc_slow_path
     // "Point of no slow path". Won't go to the slow path from here on. OK to clobber x0 and x1.
     ldr    x1, [x3, #ROSALLOC_SLOT_NEXT_OFFSET]               // Load the next pointer of the head
                                                               // and update the list head with the
@@ -1723,8 +1728,8 @@
 #if ROSALLOC_SLOT_NEXT_OFFSET != MIRROR_OBJECT_CLASS_OFFSET
 #error "Class pointer needs to overwrite next pointer."
 #endif
-    POISON_HEAP_REF w0
-    str    w0, [x3, #MIRROR_OBJECT_CLASS_OFFSET]
+    POISON_HEAP_REF w2
+    str    w2, [x3, #MIRROR_OBJECT_CLASS_OFFSET]
                                                               // Fence. This is "ish" not "ishst" so
                                                               // that it also ensures ordering of
                                                               // the object size load with respect
@@ -1754,13 +1759,13 @@
 
     mov    x0, x3                                             // Set the return value and return.
     ret
-.Lart_quick_alloc_object_resolved_rosalloc_slow_path:
-    SETUP_SAVE_REFS_ONLY_FRAME                      // save callee saves in case of GC
-    mov    x1, xSELF                                // pass Thread::Current
-    bl     artAllocObjectFromCodeResolvedRosAlloc   // (mirror::Class* klass, Thread*)
+.Lart_quick_alloc_object_rosalloc_slow_path:
+    SETUP_SAVE_REFS_ONLY_FRAME             // save callee saves in case of GC
+    mov    x2, xSELF                       // pass Thread::Current
+    bl     artAllocObjectFromCodeRosAlloc  // (uint32_t type_idx, Method* method, Thread*)
     RESTORE_SAVE_REFS_ONLY_FRAME
     RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
-END art_quick_alloc_object_resolved_rosalloc
+END art_quick_alloc_object_rosalloc
 
 
 // The common fast path code for art_quick_alloc_array_region_tlab.
@@ -1829,6 +1834,16 @@
     ret
 .endm
 
+// The common fast path code for art_quick_alloc_object_tlab and art_quick_alloc_object_region_tlab.
+//
+// x0: type_idx/return value, x1: ArtMethod*, x2: Class*, xSELF(x19): Thread::Current
+// x3-x7: free.
+// Need to preserve x0 and x1 to the slow path.
+.macro ALLOC_OBJECT_TLAB_FAST_PATH slowPathLabel
+    cbz    x2, \slowPathLabel                                 // Check null class
+    ALLOC_OBJECT_TLAB_FAST_PATH_RESOLVED \slowPathLabel
+.endm
+
 // TODO: delete ALLOC_OBJECT_TLAB_FAST_PATH_RESOLVED since it is the same as
 // ALLOC_OBJECT_TLAB_FAST_PATH_INITIALIZED.
 .macro ALLOC_OBJECT_TLAB_FAST_PATH_RESOLVED slowPathLabel
@@ -1838,18 +1853,20 @@
 .macro ALLOC_OBJECT_TLAB_FAST_PATH_INITIALIZED slowPathLabel
     ldr    x4, [xSELF, #THREAD_LOCAL_POS_OFFSET]
     ldr    x5, [xSELF, #THREAD_LOCAL_END_OFFSET]
-    ldr    w7, [x0, #MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET]  // Load the object size (x7).
+    ldr    w7, [x2, #MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET]  // Load the object size (x7).
     add    x6, x4, x7                                         // Add object size to tlab pos.
     cmp    x6, x5                                             // Check if it fits, overflow works
                                                               // since the tlab pos and end are 32
                                                               // bit values.
     bhi    \slowPathLabel
+    // "Point of no slow path". Won't go to the slow path from here on. OK to clobber x0 and x1.
+    mov    x0, x4
     str    x6, [xSELF, #THREAD_LOCAL_POS_OFFSET]              // Store new thread_local_pos.
     ldr    x5, [xSELF, #THREAD_LOCAL_OBJECTS_OFFSET]          // Increment thread_local_objects.
     add    x5, x5, #1
     str    x5, [xSELF, #THREAD_LOCAL_OBJECTS_OFFSET]
-    POISON_HEAP_REF w0
-    str    w0, [x4, #MIRROR_OBJECT_CLASS_OFFSET]              // Store the class pointer.
+    POISON_HEAP_REF w2
+    str    w2, [x0, #MIRROR_OBJECT_CLASS_OFFSET]              // Store the class pointer.
                                                               // Fence. This is "ish" not "ishst" so
                                                               // that the code after this allocation
                                                               // site will see the right values in
@@ -1857,52 +1874,91 @@
                                                               // Alternatively we could use "ishst"
                                                               // if we use load-acquire for the
                                                               // object size load.)
-    mov    x0, x4
     dmb    ish
     ret
 .endm
 
-// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB).
-ENTRY art_quick_alloc_object_resolved_tlab
+// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB).
+ENTRY art_quick_alloc_object_tlab
     // Fast path tlab allocation.
-    // x0: type, xSELF(x19): Thread::Current
-    // x1-x7: free.
+    // x0: type_idx/return value, x1: ArtMethod*, xSELF(x19): Thread::Current
+    // x2-x7: free.
 #if defined(USE_READ_BARRIER)
     mvn    x0, xzr                                            // Read barrier not supported here.
     ret                                                       // Return -1.
 #endif
-    ALLOC_OBJECT_TLAB_FAST_PATH_RESOLVED .Lart_quick_alloc_object_resolved_tlab_slow_path
-.Lart_quick_alloc_object_resolved_tlab_slow_path:
-    SETUP_SAVE_REFS_ONLY_FRAME                    // Save callee saves in case of GC.
-    mov    x1, xSELF                              // Pass Thread::Current.
-    bl     artAllocObjectFromCodeResolvedTLAB     // (mirror::Class*, Thread*)
+    ldr    x2, [x1, #ART_METHOD_DEX_CACHE_TYPES_OFFSET_64]    // Load dex cache resolved types array
+                                                              // Load the class (x2)
+    ldr    w2, [x2, x0, lsl #COMPRESSED_REFERENCE_SIZE_SHIFT]
+    ALLOC_OBJECT_TLAB_FAST_PATH .Lart_quick_alloc_object_tlab_slow_path
+.Lart_quick_alloc_object_tlab_slow_path:
+    SETUP_SAVE_REFS_ONLY_FRAME           // Save callee saves in case of GC.
+    mov    x2, xSELF                     // Pass Thread::Current.
+    bl     artAllocObjectFromCodeTLAB    // (uint32_t type_idx, Method* method, Thread*)
     RESTORE_SAVE_REFS_ONLY_FRAME
     RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
-END art_quick_alloc_object_resolved_tlab
+END art_quick_alloc_object_tlab
 
 // The common code for art_quick_alloc_object_*region_tlab
-.macro GENERATE_ALLOC_OBJECT_RESOLVED_REGION_TLAB name, entrypoint, fast_path
+.macro GENERATE_ALLOC_OBJECT_REGION_TLAB name, entrypoint, fast_path, is_resolved, read_barrier
 ENTRY \name
     // Fast path region tlab allocation.
-    // x0: type, xSELF(x19): Thread::Current
-    // x1-x7: free.
+    // x0: type_idx/resolved class/return value, x1: ArtMethod*, xSELF(x19): Thread::Current
+    // If is_resolved is 1 then x0 is the resolved type, otherwise it is the index.
+    // x2-x7: free.
 #if !defined(USE_READ_BARRIER)
     mvn    x0, xzr                                            // Read barrier must be enabled here.
     ret                                                       // Return -1.
 #endif
+.if \is_resolved
+    mov    x2, x0 // class is actually stored in x0 already
+.else
+    ldr    x2, [x1, #ART_METHOD_DEX_CACHE_TYPES_OFFSET_64]    // Load dex cache resolved types array
+                                                              // Load the class (x2)
+    ldr    w2, [x2, x0, lsl #COMPRESSED_REFERENCE_SIZE_SHIFT]
+    // If the class is null, go slow path. The check is required to read the lock word.
+    cbz    w2, .Lslow_path\name
+.endif
+.if \read_barrier
+    // Most common case: GC is not marking.
+    ldr    w3, [xSELF, #THREAD_IS_GC_MARKING_OFFSET]
+    cbnz   x3, .Lmarking\name
+.endif
 .Ldo_allocation\name:
     \fast_path .Lslow_path\name
+.Lmarking\name:
+.if \read_barrier
+    // GC is marking, check the lock word of the class for the mark bit.
+    // Class is not null, check mark bit in lock word.
+    ldr    w3, [x2, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
+    // If the bit is not zero, do the allocation.
+    tbnz    w3, #LOCK_WORD_MARK_BIT_SHIFT, .Ldo_allocation\name
+                                                              // The read barrier slow path. Mark
+                                                              // the class.
+    SAVE_TWO_REGS_INCREASE_FRAME x0, x1, 32                   // Save registers (x0, x1, lr).
+    SAVE_REG xLR, 24                                          // Align sp by 16 bytes.
+    mov    x0, x2                                             // Pass the class as the first param.
+    bl     artReadBarrierMark
+    mov    x2, x0                                             // Get the (marked) class back.
+    RESTORE_REG xLR, 24
+    RESTORE_TWO_REGS_DECREASE_FRAME x0, x1, 32                // Restore registers.
+    b      .Ldo_allocation\name
+.endif
 .Lslow_path\name:
     SETUP_SAVE_REFS_ONLY_FRAME                 // Save callee saves in case of GC.
-    mov    x1, xSELF                           // Pass Thread::Current.
-    bl     \entrypoint                         // (mirror::Class*, Thread*)
+    mov    x2, xSELF                           // Pass Thread::Current.
+    bl     \entrypoint                         // (uint32_t type_idx, Method* method, Thread*)
     RESTORE_SAVE_REFS_ONLY_FRAME
     RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
 END \name
 .endm
 
-GENERATE_ALLOC_OBJECT_RESOLVED_REGION_TLAB art_quick_alloc_object_resolved_region_tlab, artAllocObjectFromCodeResolvedRegionTLAB, ALLOC_OBJECT_TLAB_FAST_PATH_RESOLVED
-GENERATE_ALLOC_OBJECT_RESOLVED_REGION_TLAB art_quick_alloc_object_initialized_region_tlab, artAllocObjectFromCodeInitializedRegionTLAB, ALLOC_OBJECT_TLAB_FAST_PATH_INITIALIZED
+// Use ALLOC_OBJECT_TLAB_FAST_PATH_RESOLVED since the null check is already done in GENERATE_ALLOC_OBJECT_TLAB.
+GENERATE_ALLOC_OBJECT_REGION_TLAB art_quick_alloc_object_region_tlab, artAllocObjectFromCodeRegionTLAB, ALLOC_OBJECT_TLAB_FAST_PATH_RESOLVED, 0, 1
+// No read barrier for the resolved or initialized cases since the caller is responsible for the
+// read barrier due to the to-space invariant.
+GENERATE_ALLOC_OBJECT_REGION_TLAB art_quick_alloc_object_resolved_region_tlab, artAllocObjectFromCodeResolvedRegionTLAB, ALLOC_OBJECT_TLAB_FAST_PATH_RESOLVED, 1, 0
+GENERATE_ALLOC_OBJECT_REGION_TLAB art_quick_alloc_object_initialized_region_tlab, artAllocObjectFromCodeInitializedRegionTLAB, ALLOC_OBJECT_TLAB_FAST_PATH_INITIALIZED, 1, 0
 
 // TODO: We could use this macro for the normal tlab allocator too.
 
diff --git a/runtime/arch/mips/quick_entrypoints_mips.S b/runtime/arch/mips/quick_entrypoints_mips.S
index 964ea56..3e8cdc9 100644
--- a/runtime/arch/mips/quick_entrypoints_mips.S
+++ b/runtime/arch/mips/quick_entrypoints_mips.S
@@ -1831,10 +1831,116 @@
 // Generate the allocation entrypoints for each allocator.
 GENERATE_ALLOC_ENTRYPOINTS_FOR_EACH_ALLOCATOR
 
+// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc).
+ENTRY art_quick_alloc_object_rosalloc
 
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_rosalloc, RosAlloc)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB)
+    # Fast path rosalloc allocation
+    # a0: type_idx
+    # a1: ArtMethod*
+    # s1: Thread::Current
+    # -----------------------------
+    # t0: class
+    # t1: object size
+    # t2: rosalloc run
+    # t3: thread stack top offset
+    # t4: thread stack bottom offset
+    # v0: free list head
+    #
+    # t5, t6 : temps
+
+    lw    $t0, ART_METHOD_DEX_CACHE_TYPES_OFFSET_32($a1)       # Load dex cache resolved types
+                                                               # array.
+
+    sll   $t5, $a0, COMPRESSED_REFERENCE_SIZE_SHIFT            # Shift the value.
+    addu  $t5, $t0, $t5                                        # Compute the index.
+    lw    $t0, 0($t5)                                          # Load class (t0).
+    beqz  $t0, .Lart_quick_alloc_object_rosalloc_slow_path
+
+    li    $t6, MIRROR_CLASS_STATUS_INITIALIZED
+    lw    $t5, MIRROR_CLASS_STATUS_OFFSET($t0)                 # Check class status.
+    bne   $t5, $t6, .Lart_quick_alloc_object_rosalloc_slow_path
+
+    # Add a fake dependence from the following access flag and size loads to the status load. This
+    # is to prevent those loads from being reordered above the status load and reading wrong values.
+    xor   $t5, $t5, $t5
+    addu  $t0, $t0, $t5
+
+    lw    $t5, MIRROR_CLASS_ACCESS_FLAGS_OFFSET($t0)           # Check if access flags has
+    li    $t6, ACCESS_FLAGS_CLASS_IS_FINALIZABLE               # kAccClassIsFinalizable.
+    and   $t6, $t5, $t6
+    bnez  $t6, .Lart_quick_alloc_object_rosalloc_slow_path
+
+    lw    $t3, THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET($s1)        # Check if thread local allocation
+    lw    $t4, THREAD_LOCAL_ALLOC_STACK_END_OFFSET($s1)        # stack has any room left.
+    bgeu  $t3, $t4, .Lart_quick_alloc_object_rosalloc_slow_path
+
+    lw    $t1, MIRROR_CLASS_OBJECT_SIZE_OFFSET($t0)            # Load object size (t1).
+    li    $t5, ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE          # Check if size is for a thread local
+                                                               # allocation.
+    bgtu  $t1, $t5, .Lart_quick_alloc_object_rosalloc_slow_path
+
+    # Compute the rosalloc bracket index from the size. Allign up the size by the rosalloc bracket
+    # quantum size and divide by the quantum size and subtract by 1.
+
+    addiu $t1, $t1, -1                                         # Decrease obj size and shift right
+    srl   $t1, $t1, ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT        # by quantum.
+
+    sll   $t2, $t1, POINTER_SIZE_SHIFT
+    addu  $t2, $t2, $s1
+    lw    $t2, THREAD_ROSALLOC_RUNS_OFFSET($t2)                # Load rosalloc run (t2).
+
+    # Load the free list head (v0).
+    # NOTE: this will be the return val.
+
+    lw    $v0, (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)($t2)
+    beqz  $v0, .Lart_quick_alloc_object_rosalloc_slow_path
+    nop
+
+    # Load the next pointer of the head and update the list head with the next pointer.
+
+    lw    $t5, ROSALLOC_SLOT_NEXT_OFFSET($v0)
+    sw    $t5, (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)($t2)
+
+    # Store the class pointer in the header. This also overwrites the first pointer. The offsets are
+    # asserted to match.
+
+#if ROSALLOC_SLOT_NEXT_OFFSET != MIRROR_OBJECT_CLASS_OFFSET
+#error "Class pointer needs to overwrite next pointer."
+#endif
+
+    POISON_HEAP_REF $t0
+    sw    $t0, MIRROR_OBJECT_CLASS_OFFSET($v0)
+
+    # Push the new object onto the thread local allocation stack and increment the thread local
+    # allocation stack top.
+
+    sw    $v0, 0($t3)
+    addiu $t3, $t3, COMPRESSED_REFERENCE_SIZE
+    sw    $t3, THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET($s1)
+
+    # Decrement the size of the free list.
+
+    lw    $t5, (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)($t2)
+    addiu $t5, $t5, -1
+    sw    $t5, (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)($t2)
+
+    sync                                                          # Fence.
+
+    jalr  $zero, $ra
+    nop
+
+  .Lart_quick_alloc_object_rosalloc_slow_path:
+
+    SETUP_SAVE_REFS_ONLY_FRAME
+    la    $t9, artAllocObjectFromCodeRosAlloc
+    jalr  $t9
+    move  $a2, $s1                                                # Pass self as argument.
+    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+
+END art_quick_alloc_object_rosalloc
+
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab, RegionTLAB)
 
     /*
      * Entry from managed code to resolve a string, this stub will allocate a String and deliver an
diff --git a/runtime/arch/mips64/quick_entrypoints_mips64.S b/runtime/arch/mips64/quick_entrypoints_mips64.S
index 2a18d53..0861d2d 100644
--- a/runtime/arch/mips64/quick_entrypoints_mips64.S
+++ b/runtime/arch/mips64/quick_entrypoints_mips64.S
@@ -1775,9 +1775,107 @@
 // Generate the allocation entrypoints for each allocator.
 GENERATE_ALLOC_ENTRYPOINTS_FOR_EACH_ALLOCATOR
 
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_rosalloc, RosAlloc)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB)
-GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB)
+// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc).
+ENTRY art_quick_alloc_object_rosalloc
+
+    # Fast path rosalloc allocation
+    # a0: type_idx
+    # a1: ArtMethod*
+    # s1: Thread::Current
+    # -----------------------------
+    # t0: class
+    # t1: object size
+    # t2: rosalloc run
+    # t3: thread stack top offset
+    # a4: thread stack bottom offset
+    # v0: free list head
+    #
+    # a5, a6 : temps
+
+    ld     $t0, ART_METHOD_DEX_CACHE_TYPES_OFFSET_64($a1)   # Load dex cache resolved types array.
+
+    dsll   $a5, $a0, COMPRESSED_REFERENCE_SIZE_SHIFT        # Shift the value.
+    daddu  $a5, $t0, $a5                                    # Compute the index.
+    lwu    $t0, 0($a5)                                      # Load class (t0).
+    beqzc  $t0, .Lart_quick_alloc_object_rosalloc_slow_path
+
+    li     $a6, MIRROR_CLASS_STATUS_INITIALIZED
+    lwu    $a5, MIRROR_CLASS_STATUS_OFFSET($t0)             # Check class status.
+    bnec   $a5, $a6, .Lart_quick_alloc_object_rosalloc_slow_path
+
+    # Add a fake dependence from the following access flag and size loads to the status load. This
+    # is to prevent those loads from being reordered above the status load and reading wrong values.
+    xor    $a5, $a5, $a5
+    daddu  $t0, $t0, $a5
+
+    lwu    $a5, MIRROR_CLASS_ACCESS_FLAGS_OFFSET($t0)       # Check if access flags has
+    li     $a6, ACCESS_FLAGS_CLASS_IS_FINALIZABLE           # kAccClassIsFinalizable.
+    and    $a6, $a5, $a6
+    bnezc  $a6, .Lart_quick_alloc_object_rosalloc_slow_path
+
+    ld     $t3, THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET($s1)    # Check if thread local allocation stack
+    ld     $a4, THREAD_LOCAL_ALLOC_STACK_END_OFFSET($s1)    # has any room left.
+    bgeuc  $t3, $a4, .Lart_quick_alloc_object_rosalloc_slow_path
+
+    lwu    $t1, MIRROR_CLASS_OBJECT_SIZE_OFFSET($t0)        # Load object size (t1).
+    li     $a5, ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE      # Check if size is for a thread local
+                                                            # allocation.
+    bltuc  $a5, $t1, .Lart_quick_alloc_object_rosalloc_slow_path
+
+    # Compute the rosalloc bracket index from the size. Allign up the size by the rosalloc bracket
+    # quantum size and divide by the quantum size and subtract by 1.
+    daddiu $t1, $t1, -1                                     # Decrease obj size and shift right by
+    dsrl   $t1, $t1, ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT    # quantum.
+
+    dsll   $t2, $t1, POINTER_SIZE_SHIFT
+    daddu  $t2, $t2, $s1
+    ld     $t2, THREAD_ROSALLOC_RUNS_OFFSET($t2)            # Load rosalloc run (t2).
+
+    # Load the free list head (v0).
+    # NOTE: this will be the return val.
+    ld     $v0, (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)($t2)
+    beqzc  $v0, .Lart_quick_alloc_object_rosalloc_slow_path
+
+    # Load the next pointer of the head and update the list head with the next pointer.
+    ld     $a5, ROSALLOC_SLOT_NEXT_OFFSET($v0)
+    sd     $a5, (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)($t2)
+
+    # Store the class pointer in the header. This also overwrites the first pointer. The offsets are
+    # asserted to match.
+
+#if ROSALLOC_SLOT_NEXT_OFFSET != MIRROR_OBJECT_CLASS_OFFSET
+#error "Class pointer needs to overwrite next pointer."
+#endif
+
+    POISON_HEAP_REF $t0
+    sw     $t0, MIRROR_OBJECT_CLASS_OFFSET($v0)
+
+    # Push the new object onto the thread local allocation stack and increment the thread local
+    # allocation stack top.
+    sd     $v0, 0($t3)
+    daddiu $t3, $t3, COMPRESSED_REFERENCE_SIZE
+    sd     $t3, THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET($s1)
+
+    # Decrement the size of the free list.
+    lw     $a5, (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)($t2)
+    addiu  $a5, $a5, -1
+    sw     $a5, (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)($t2)
+
+    sync                                         # Fence.
+
+    jalr   $zero, $ra
+    .cpreturn                                    # Restore gp from t8 in branch delay slot.
+
+.Lart_quick_alloc_object_rosalloc_slow_path:
+    SETUP_SAVE_REFS_ONLY_FRAME
+    jal    artAllocObjectFromCodeRosAlloc
+    move   $a2 ,$s1                              # Pass self as argument.
+    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+
+END art_quick_alloc_object_rosalloc
+
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab, RegionTLAB)
 
     /*
      * Entry from managed code to resolve a string, this stub will allocate a String and deliver an
diff --git a/runtime/arch/quick_alloc_entrypoints.S b/runtime/arch/quick_alloc_entrypoints.S
index abd9046..db2fdca 100644
--- a/runtime/arch/quick_alloc_entrypoints.S
+++ b/runtime/arch/quick_alloc_entrypoints.S
@@ -15,13 +15,15 @@
  */
 
 .macro GENERATE_ALLOC_ENTRYPOINTS c_suffix, cxx_suffix
+// Called by managed code to allocate an object.
+TWO_ARG_DOWNCALL art_quick_alloc_object\c_suffix, artAllocObjectFromCode\cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
 // Called by managed code to allocate an object of a resolved class.
-ONE_ARG_DOWNCALL art_quick_alloc_object_resolved\c_suffix, artAllocObjectFromCodeResolved\cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+TWO_ARG_DOWNCALL art_quick_alloc_object_resolved\c_suffix, artAllocObjectFromCodeResolved\cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
 // Called by managed code to allocate an object of an initialized class.
-ONE_ARG_DOWNCALL art_quick_alloc_object_initialized\c_suffix, artAllocObjectFromCodeInitialized\cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+TWO_ARG_DOWNCALL art_quick_alloc_object_initialized\c_suffix, artAllocObjectFromCodeInitialized\cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
 // Called by managed code to allocate an object when the caller doesn't know whether it has access
 // to the created type.
-ONE_ARG_DOWNCALL art_quick_alloc_object_with_checks\c_suffix, artAllocObjectFromCodeWithChecks\cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+TWO_ARG_DOWNCALL art_quick_alloc_object_with_access_check\c_suffix, artAllocObjectFromCodeWithAccessCheck\cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
 // Called by managed code to allocate an array.
 THREE_ARG_DOWNCALL art_quick_alloc_array\c_suffix, artAllocArrayFromCode\cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
 // Called by managed code to allocate an array of a resolve class.
@@ -59,12 +61,14 @@
 // Generate the allocation entrypoints for each allocator. This is used as an alternative to
 // GNERATE_ALL_ALLOC_ENTRYPOINTS for selectively implementing allocation fast paths in
 // hand-written assembly.
+#define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(c_suffix, cxx_suffix) \
+  TWO_ARG_DOWNCALL art_quick_alloc_object ## c_suffix, artAllocObjectFromCode ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
 #define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(c_suffix, cxx_suffix) \
-  ONE_ARG_DOWNCALL art_quick_alloc_object_resolved ## c_suffix, artAllocObjectFromCodeResolved ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+  TWO_ARG_DOWNCALL art_quick_alloc_object_resolved ## c_suffix, artAllocObjectFromCodeResolved ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
 #define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(c_suffix, cxx_suffix) \
-  ONE_ARG_DOWNCALL art_quick_alloc_object_initialized ## c_suffix, artAllocObjectFromCodeInitialized ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+  TWO_ARG_DOWNCALL art_quick_alloc_object_initialized ## c_suffix, artAllocObjectFromCodeInitialized ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
 #define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(c_suffix, cxx_suffix) \
-  ONE_ARG_DOWNCALL art_quick_alloc_object_with_checks ## c_suffix, artAllocObjectFromCodeWithChecks ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+  TWO_ARG_DOWNCALL art_quick_alloc_object_with_access_check ## c_suffix, artAllocObjectFromCodeWithAccessCheck ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
 #define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(c_suffix, cxx_suffix) \
   THREE_ARG_DOWNCALL art_quick_alloc_array ## c_suffix, artAllocArrayFromCode ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
 #define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(c_suffix, cxx_suffix) \
@@ -89,7 +93,8 @@
 
 .macro GENERATE_ALLOC_ENTRYPOINTS_FOR_REGION_TLAB_ALLOCATOR
 // This is to be separately defined for each architecture to allow a hand-written assembly fast path.
-// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB)
+// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab, RegionTLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab, RegionTLAB)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_region_tlab, RegionTLAB)
@@ -104,7 +109,8 @@
 
 .macro GENERATE_ALLOC_ENTRYPOINTS_FOR_TLAB_ALLOCATOR
 // This is to be separately defined for each architecture to allow a hand-written assembly fast path.
-// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB)
+// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab, TLAB)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_tlab, TLAB)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_tlab, TLAB)
@@ -123,6 +129,7 @@
 .endm
 
 .macro GENERATE_ALLOC_ENTRYPOINTS_FOR_NON_TLAB_ALLOCATORS
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_dlmalloc, DlMalloc)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_dlmalloc, DlMalloc)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_dlmalloc, DlMalloc)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_dlmalloc, DlMalloc)
@@ -135,6 +142,7 @@
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_dlmalloc, DlMalloc)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_dlmalloc, DlMalloc)
 
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_dlmalloc_instrumented, DlMallocInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_dlmalloc_instrumented, DlMallocInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_dlmalloc_instrumented, DlMallocInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_dlmalloc_instrumented, DlMallocInstrumented)
@@ -148,7 +156,8 @@
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_dlmalloc_instrumented, DlMallocInstrumented)
 
 // This is to be separately defined for each architecture to allow a hand-written assembly fast path.
-// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_rosalloc, RosAlloc)
+// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc)
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_rosalloc, RosAlloc)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_rosalloc, RosAlloc)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_rosalloc, RosAlloc)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_rosalloc, RosAlloc)
@@ -160,6 +169,7 @@
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_rosalloc, RosAlloc)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_rosalloc, RosAlloc)
 
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc_instrumented, RosAllocInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_rosalloc_instrumented, RosAllocInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_rosalloc_instrumented, RosAllocInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_rosalloc_instrumented, RosAllocInstrumented)
@@ -172,6 +182,7 @@
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_rosalloc_instrumented, RosAllocInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_rosalloc_instrumented, RosAllocInstrumented)
 
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_bump_pointer, BumpPointer)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_bump_pointer, BumpPointer)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_bump_pointer, BumpPointer)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_bump_pointer, BumpPointer)
@@ -184,6 +195,7 @@
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_bump_pointer, BumpPointer)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_bump_pointer, BumpPointer)
 
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_bump_pointer_instrumented, BumpPointerInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_bump_pointer_instrumented, BumpPointerInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_bump_pointer_instrumented, BumpPointerInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_bump_pointer_instrumented, BumpPointerInstrumented)
@@ -196,6 +208,7 @@
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_bump_pointer_instrumented, BumpPointerInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_bump_pointer_instrumented, BumpPointerInstrumented)
 
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab_instrumented, TLABInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab_instrumented, TLABInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab_instrumented, TLABInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_tlab_instrumented, TLABInstrumented)
@@ -208,6 +221,7 @@
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_tlab_instrumented, TLABInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_tlab_instrumented, TLABInstrumented)
 
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region, Region)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region, Region)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region, Region)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region, Region)
@@ -220,6 +234,7 @@
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region, Region)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region, Region)
 
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_instrumented, RegionInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_instrumented, RegionInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_instrumented, RegionInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region_instrumented, RegionInstrumented)
@@ -232,6 +247,7 @@
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_instrumented, RegionInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_instrumented, RegionInstrumented)
 
+GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab_instrumented, RegionTLABInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab_instrumented, RegionTLABInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab_instrumented, RegionTLABInstrumented)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region_tlab_instrumented, RegionTLABInstrumented)
diff --git a/runtime/arch/stub_test.cc b/runtime/arch/stub_test.cc
index ee65fa8..9e385f8 100644
--- a/runtime/arch/stub_test.cc
+++ b/runtime/arch/stub_test.cc
@@ -1062,8 +1062,12 @@
 
   EXPECT_FALSE(self->IsExceptionPending());
   {
-    size_t result = Invoke3(reinterpret_cast<size_t>(c.Get()), 0u, 0U,
-                            StubTest::GetEntrypoint(self, kQuickAllocObjectWithChecks),
+    // Use an arbitrary method from c to use as referrer
+    size_t result = Invoke3(static_cast<size_t>(c->GetDexTypeIndex().index_),    // type_idx
+                            // arbitrary
+                            reinterpret_cast<size_t>(c->GetVirtualMethod(0, kRuntimePointerSize)),
+                            0U,
+                            StubTest::GetEntrypoint(self, kQuickAllocObject),
                             self);
 
     EXPECT_FALSE(self->IsExceptionPending());
@@ -1074,6 +1078,8 @@
   }
 
   {
+    // We can use null in the second argument as we do not need a method here (not used in
+    // resolved/initialized cases)
     size_t result = Invoke3(reinterpret_cast<size_t>(c.Get()), 0u, 0U,
                             StubTest::GetEntrypoint(self, kQuickAllocObjectResolved),
                             self);
@@ -1086,6 +1092,8 @@
   }
 
   {
+    // We can use null in the second argument as we do not need a method here (not used in
+    // resolved/initialized cases)
     size_t result = Invoke3(reinterpret_cast<size_t>(c.Get()), 0u, 0U,
                             StubTest::GetEntrypoint(self, kQuickAllocObjectInitialized),
                             self);
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index 62c29cf..c6f4c03 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -956,42 +956,52 @@
 // Generate the allocation entrypoints for each allocator.
 GENERATE_ALLOC_ENTRYPOINTS_FOR_EACH_ALLOCATOR
 
-// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_rosalloc, RosAlloc).
-DEFINE_FUNCTION art_quick_alloc_object_resolved_rosalloc
+// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc).
+DEFINE_FUNCTION art_quick_alloc_object_rosalloc
     // Fast path rosalloc allocation.
-    // eax: type/return value
-    // ecx, ebx, edx: free
+    // eax: uint32_t type_idx/return value, ecx: ArtMethod*
+    // ebx, edx: free
+    PUSH edi
+    movl ART_METHOD_DEX_CACHE_TYPES_OFFSET_32(%ecx), %edx  // Load dex cache resolved types array
+                                                        // Load the class (edx)
+    movl 0(%edx, %eax, COMPRESSED_REFERENCE_SIZE), %edx
+    testl %edx, %edx                                    // Check null class
+    jz   .Lart_quick_alloc_object_rosalloc_slow_path
+
     movl %fs:THREAD_SELF_OFFSET, %ebx                   // ebx = thread
                                                         // Check if the thread local allocation
                                                         // stack has room
-    movl THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET(%ebx), %ecx
-    cmpl THREAD_LOCAL_ALLOC_STACK_END_OFFSET(%ebx), %ecx
-    jae  .Lart_quick_alloc_object_resolved_rosalloc_slow_path
+    movl THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET(%ebx), %edi
+    cmpl THREAD_LOCAL_ALLOC_STACK_END_OFFSET(%ebx), %edi
+    jae  .Lart_quick_alloc_object_rosalloc_slow_path
 
-    movl MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET(%eax), %ecx  // Load the object size (ecx)
+    movl MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET(%edx), %edi  // Load the object size (edi)
                                                         // Check if the size is for a thread
                                                         // local allocation. Also does the
                                                         // finalizable and initialization check.
-    cmpl LITERAL(ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE), %ecx
-    ja   .Lart_quick_alloc_object_resolved_rosalloc_slow_path
-    shrl LITERAL(ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT), %ecx // Calculate the rosalloc bracket index
+    cmpl LITERAL(ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE), %edi
+    ja   .Lart_quick_alloc_object_rosalloc_slow_path
+    shrl LITERAL(ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT), %edi // Calculate the rosalloc bracket index
                                                             // from object size.
                                                         // Load thread local rosalloc run (ebx)
                                                         // Subtract __SIZEOF_POINTER__ to subtract
                                                         // one from edi as there is no 0 byte run
                                                         // and the size is already aligned.
-    movl (THREAD_ROSALLOC_RUNS_OFFSET - __SIZEOF_POINTER__)(%ebx, %ecx, __SIZEOF_POINTER__), %ebx
+    movl (THREAD_ROSALLOC_RUNS_OFFSET - __SIZEOF_POINTER__)(%ebx, %edi, __SIZEOF_POINTER__), %ebx
                                                         // Load free_list head (edi),
                                                         // this will be the return value.
-    movl (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)(%ebx), %ecx
-    jecxz   .Lart_quick_alloc_object_resolved_rosalloc_slow_path
+    movl (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)(%ebx), %edi
+    test %edi, %edi
+    jz   .Lart_quick_alloc_object_rosalloc_slow_path
                                                         // Point of no slow path. Won't go to
-                                                        // the slow path from here on.
+                                                        // the slow path from here on. Ok to
+                                                        // clobber eax and ecx.
+    movl %edi, %eax
                                                         // Load the next pointer of the head
                                                         // and update head of free list with
                                                         // next pointer
-    movl ROSALLOC_SLOT_NEXT_OFFSET(%ecx), %edx
-    movl %edx, (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)(%ebx)
+    movl ROSALLOC_SLOT_NEXT_OFFSET(%eax), %edi
+    movl %edi, (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)(%ebx)
                                                         // Decrement size of free list by 1
     decl (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)(%ebx)
                                                         // Store the class pointer in the
@@ -1001,104 +1011,141 @@
 #if ROSALLOC_SLOT_NEXT_OFFSET != MIRROR_OBJECT_CLASS_OFFSET
 #error "Class pointer needs to overwrite next pointer."
 #endif
-    POISON_HEAP_REF eax
-    movl %eax, MIRROR_OBJECT_CLASS_OFFSET(%ecx)
+    POISON_HEAP_REF edx
+    movl %edx, MIRROR_OBJECT_CLASS_OFFSET(%eax)
     movl %fs:THREAD_SELF_OFFSET, %ebx                   // ebx = thread
                                                         // Push the new object onto the thread
                                                         // local allocation stack and
                                                         // increment the thread local
                                                         // allocation stack top.
-    movl THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET(%ebx), %eax
-    movl %ecx, (%eax)
-    addl LITERAL(COMPRESSED_REFERENCE_SIZE), %eax
-    movl %eax, THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET(%ebx)
+    movl THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET(%ebx), %edi
+    movl %eax, (%edi)
+    addl LITERAL(COMPRESSED_REFERENCE_SIZE), %edi
+    movl %edi, THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET(%ebx)
                                                         // No fence needed for x86.
-    movl %ecx, %eax                                     // Move object to return register
+    POP edi
     ret
-.Lart_quick_alloc_object_resolved_rosalloc_slow_path:
+.Lart_quick_alloc_object_rosalloc_slow_path:
+    POP edi
     SETUP_SAVE_REFS_ONLY_FRAME ebx, ebx          // save ref containing registers for GC
     // Outgoing argument set up
-    subl LITERAL(8), %esp                       // alignment padding
+    PUSH eax                      // alignment padding
     pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
     CFI_ADJUST_CFA_OFFSET(4)
+    PUSH ecx
     PUSH eax
-    call SYMBOL(artAllocObjectFromCodeResolvedRosAlloc)  // cxx_name(arg0, Thread*)
+    call SYMBOL(artAllocObjectFromCodeRosAlloc)  // cxx_name(arg0, arg1, Thread*)
     addl LITERAL(16), %esp                       // pop arguments
     CFI_ADJUST_CFA_OFFSET(-16)
     RESTORE_SAVE_REFS_ONLY_FRAME                 // restore frame up to return address
     RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER      // return or deliver exception
-END_FUNCTION art_quick_alloc_object_resolved_rosalloc
+END_FUNCTION art_quick_alloc_object_rosalloc
 
-// The common fast path code for art_quick_alloc_object_resolved_tlab
-// and art_quick_alloc_object_resolved_region_tlab.
+// The common fast path code for art_quick_alloc_object_tlab and art_quick_alloc_object_region_tlab.
 //
-// EAX: type/return_value
-MACRO1(ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH, slowPathLabel)
+// EAX: type_idx/return_value, ECX: ArtMethod*, EDX: the class.
+MACRO1(ALLOC_OBJECT_TLAB_FAST_PATH, slowPathLabel)
+    testl %edx, %edx                                    // Check null class
+    jz   VAR(slowPathLabel)
     movl %fs:THREAD_SELF_OFFSET, %ebx                   // ebx = thread
     movl THREAD_LOCAL_END_OFFSET(%ebx), %edi            // Load thread_local_end.
     subl THREAD_LOCAL_POS_OFFSET(%ebx), %edi            // Compute the remaining buffer size.
-    movl MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET(%eax), %ecx  // Load the object size.
-    cmpl %edi, %ecx                                     // Check if it fits.
+    movl MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET(%edx), %esi  // Load the object size.
+    cmpl %edi, %esi                                     // Check if it fits.
     ja   VAR(slowPathLabel)
-    movl THREAD_LOCAL_POS_OFFSET(%ebx), %edx            // Load thread_local_pos
+    movl THREAD_LOCAL_POS_OFFSET(%ebx), %eax            // Load thread_local_pos
                                                         // as allocated object.
-    addl %edx, %ecx                                     // Add the object size.
-    movl %ecx, THREAD_LOCAL_POS_OFFSET(%ebx)            // Update thread_local_pos.
+    addl %eax, %esi                                     // Add the object size.
+    movl %esi, THREAD_LOCAL_POS_OFFSET(%ebx)            // Update thread_local_pos.
     incl THREAD_LOCAL_OBJECTS_OFFSET(%ebx)              // Increase thread_local_objects.
                                                         // Store the class pointer in the header.
                                                         // No fence needed for x86.
-    POISON_HEAP_REF eax
-    movl %eax, MIRROR_OBJECT_CLASS_OFFSET(%edx)
-    movl %edx, %eax
+    POISON_HEAP_REF edx
+    movl %edx, MIRROR_OBJECT_CLASS_OFFSET(%eax)
     POP edi
+    POP esi
     ret                                                 // Fast path succeeded.
 END_MACRO
 
-// The common slow path code for art_quick_alloc_object_resolved_tlab
-// and art_quick_alloc_object_resolved_region_tlab.
-MACRO1(ALLOC_OBJECT_RESOLVED_TLAB_SLOW_PATH, cxx_name)
+// The common slow path code for art_quick_alloc_object_tlab and art_quick_alloc_object_region_tlab.
+MACRO1(ALLOC_OBJECT_TLAB_SLOW_PATH, cxx_name)
     POP edi
+    POP esi
     SETUP_SAVE_REFS_ONLY_FRAME ebx, ebx                 // save ref containing registers for GC
     // Outgoing argument set up
-    subl LITERAL(8), %esp                               // alignment padding
-    CFI_ADJUST_CFA_OFFSET(8)
+    PUSH eax                                            // alignment padding
     pushl %fs:THREAD_SELF_OFFSET                        // pass Thread::Current()
     CFI_ADJUST_CFA_OFFSET(4)
+    PUSH ecx
     PUSH eax
-    call CALLVAR(cxx_name)                              // cxx_name(arg0, Thread*)
+    call CALLVAR(cxx_name)                              // cxx_name(arg0, arg1, Thread*)
     addl LITERAL(16), %esp
     CFI_ADJUST_CFA_OFFSET(-16)
     RESTORE_SAVE_REFS_ONLY_FRAME                        // restore frame up to return address
     RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER             // return or deliver exception
 END_MACRO
 
-// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB). May be called
+// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB). May be called
 // for CC if the GC is not marking.
-DEFINE_FUNCTION art_quick_alloc_object_resolved_tlab
+DEFINE_FUNCTION art_quick_alloc_object_tlab
     // Fast path tlab allocation.
-    // EAX: type
-    // EBX, ECX, EDX: free.
+    // EAX: uint32_t type_idx/return value, ECX: ArtMethod*.
+    // EBX, EDX: free.
+    PUSH esi
     PUSH edi
-    ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH .Lart_quick_alloc_object_resolved_tlab_slow_path
-.Lart_quick_alloc_object_resolved_tlab_slow_path:
-    ALLOC_OBJECT_RESOLVED_TLAB_SLOW_PATH artAllocObjectFromCodeResolvedTLAB
-END_FUNCTION art_quick_alloc_object_resolved_tlab
+    movl ART_METHOD_DEX_CACHE_TYPES_OFFSET_32(%ecx), %edx   // Load dex cache resolved types array
+    // Might need to break down into multiple instructions to get the base address in a register.
+                                                            // Load the class
+    movl 0(%edx, %eax, COMPRESSED_REFERENCE_SIZE), %edx
+    ALLOC_OBJECT_TLAB_FAST_PATH .Lart_quick_alloc_object_tlab_slow_path
+.Lart_quick_alloc_object_tlab_slow_path:
+    ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeTLAB
+END_FUNCTION art_quick_alloc_object_tlab
 
-// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB).
-DEFINE_FUNCTION art_quick_alloc_object_resolved_region_tlab
+// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab, RegionTLAB).
+DEFINE_FUNCTION art_quick_alloc_object_region_tlab
     // Fast path region tlab allocation.
-    // EAX: type/return value
-    // EBX, ECX, EDX: free.
+    // EAX: uint32_t type_idx/return value, ECX: ArtMethod*.
+    // EBX, EDX: free.
 #if !defined(USE_READ_BARRIER)
     int3
     int3
 #endif
+    PUSH esi
     PUSH edi
-    ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH .Lart_quick_alloc_object_resolved_region_tlab_slow_path
-.Lart_quick_alloc_object_resolved_region_tlab_slow_path:
-    ALLOC_OBJECT_RESOLVED_TLAB_SLOW_PATH artAllocObjectFromCodeResolvedRegionTLAB
-END_FUNCTION art_quick_alloc_object_resolved_region_tlab
-
+    movl ART_METHOD_DEX_CACHE_TYPES_OFFSET_32(%ecx), %edx   // Load dex cache resolved types array
+    // Might need to break down into multiple instructions to get the base address in a register.
+                                                            // Load the class
+    movl 0(%edx, %eax, COMPRESSED_REFERENCE_SIZE), %edx
+                                                            // Read barrier for class load.
+    cmpl LITERAL(0), %fs:THREAD_IS_GC_MARKING_OFFSET
+    jz .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit
+    // Null check so that we can load the lock word.
+    testl %edx, %edx
+    jz .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit
+    // Check the mark bit, if it is 1 return.
+    testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(%edx)
+    jz .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path
+.Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit:
+    ALLOC_OBJECT_TLAB_FAST_PATH .Lart_quick_alloc_object_region_tlab_slow_path
+.Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path:
+    // The read barrier slow path. Mark the class.
+    PUSH eax
+    PUSH ecx
+    // Outgoing argument set up
+    subl MACRO_LITERAL(8), %esp                             // Alignment padding
+    CFI_ADJUST_CFA_OFFSET(8)
+    PUSH edx                                                // Pass the class as the first param.
+    call SYMBOL(artReadBarrierMark)                         // cxx_name(mirror::Object* obj)
+    movl %eax, %edx
+    addl MACRO_LITERAL(12), %esp
+    CFI_ADJUST_CFA_OFFSET(-12)
+    POP ecx
+    POP eax
+    jmp .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit
+.Lart_quick_alloc_object_region_tlab_slow_path:
+    ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeRegionTLAB
+END_FUNCTION art_quick_alloc_object_region_tlab
 
 DEFINE_FUNCTION art_quick_resolve_string
     SETUP_SAVE_EVERYTHING_FRAME ebx, ebx
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index facd563..4c46b08 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -983,6 +983,7 @@
 
 // Comment out allocators that have x86_64 specific asm.
 // Region TLAB:
+// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab, RegionTLAB)
 // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB)
 // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab, RegionTLAB)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
@@ -995,9 +996,11 @@
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_tlab, RegionTLAB)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_tlab, RegionTLAB)
 // Normal TLAB:
+// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB)
 // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB)
 // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab, TLAB)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_tlab, TLAB)
+// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_tlab, TLAB)
 // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_tlab, TLAB)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_tlab, TLAB)
 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_tlab, TLAB)
@@ -1006,25 +1009,29 @@
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_tlab, TLAB)
 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_tlab, TLAB)
 
-
-// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_rosalloc, RosAlloc).
-DEFINE_FUNCTION art_quick_alloc_object_resolved_rosalloc
+// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc).
+DEFINE_FUNCTION art_quick_alloc_object_rosalloc
     // Fast path rosalloc allocation.
-    // RDI: mirror::Class*, RAX: return value
-    // RSI, RDX, RCX, R8, R9: free.
+    // RDI: type_idx, RSI: ArtMethod*, RAX: return value
+    // RDX, RCX, R8, R9: free.
+    movq   ART_METHOD_DEX_CACHE_TYPES_OFFSET_64(%rsi), %rdx  // Load dex cache resolved types array
+                                                             // Load the class (edx)
+    movl   0(%rdx, %rdi, COMPRESSED_REFERENCE_SIZE), %edx
+    testl  %edx, %edx                                      // Check null class
+    jz     .Lart_quick_alloc_object_rosalloc_slow_path
                                                            // Check if the thread local
                                                            // allocation stack has room.
     movq   %gs:THREAD_SELF_OFFSET, %r8                     // r8 = thread
     movq   THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET(%r8), %rcx  // rcx = alloc stack top.
     cmpq   THREAD_LOCAL_ALLOC_STACK_END_OFFSET(%r8), %rcx
-    jae    .Lart_quick_alloc_object_resolved_rosalloc_slow_path
+    jae    .Lart_quick_alloc_object_rosalloc_slow_path
                                                            // Load the object size
-    movl   MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET(%rdi), %eax
+    movl   MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET(%rdx), %eax
                                                            // Check if the size is for a thread
                                                            // local allocation. Also does the
                                                            // initialized and finalizable checks.
     cmpl   LITERAL(ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE), %eax
-    ja     .Lart_quick_alloc_object_resolved_rosalloc_slow_path
+    ja     .Lart_quick_alloc_object_rosalloc_slow_path
                                                            // Compute the rosalloc bracket index
                                                            // from the size.
     shrq   LITERAL(ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT), %rax
@@ -1038,7 +1045,7 @@
                                                            // will be the return val.
     movq   (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)(%r9), %rax
     testq  %rax, %rax
-    jz     .Lart_quick_alloc_object_resolved_rosalloc_slow_path
+    jz     .Lart_quick_alloc_object_rosalloc_slow_path
     // "Point of no slow path". Won't go to the slow path from here on. OK to clobber rdi and rsi.
                                                            // Push the new object onto the thread
                                                            // local allocation stack and
@@ -1059,17 +1066,17 @@
 #if ROSALLOC_SLOT_NEXT_OFFSET != MIRROR_OBJECT_CLASS_OFFSET
 #error "Class pointer needs to overwrite next pointer."
 #endif
-    POISON_HEAP_REF edi
-    movl   %edi, MIRROR_OBJECT_CLASS_OFFSET(%rax)
+    POISON_HEAP_REF edx
+    movl   %edx, MIRROR_OBJECT_CLASS_OFFSET(%rax)
                                                            // Decrement the size of the free list
     decl   (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)(%r9)
                                                            // No fence necessary for x86.
     ret
-.Lart_quick_alloc_object_resolved_rosalloc_slow_path:
+.Lart_quick_alloc_object_rosalloc_slow_path:
     SETUP_SAVE_REFS_ONLY_FRAME                             // save ref containing registers for GC
     // Outgoing argument set up
-    movq %gs:THREAD_SELF_OFFSET, %rsi                      // pass Thread::Current()
-    call SYMBOL(artAllocObjectFromCodeResolvedRosAlloc)    // cxx_name(arg0, Thread*)
+    movq %gs:THREAD_SELF_OFFSET, %rdx                      // pass Thread::Current()
+    call SYMBOL(artAllocObjectFromCodeRosAlloc)            // cxx_name(arg0, arg1, Thread*)
     RESTORE_SAVE_REFS_ONLY_FRAME                           // restore frame up to return address
     RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER                // return or deliver exception
 END_FUNCTION art_quick_alloc_object_rosalloc
@@ -1088,19 +1095,19 @@
 // TODO: delete ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH since it is the same as
 // ALLOC_OBJECT_INITIALIZED_TLAB_FAST_PATH.
 //
-// RDI: the class, RAX: return value.
-// RCX, RSI, RDX: scratch, r8: Thread::Current().
+// RDI: type_idx, RSI: ArtMethod*, RDX/EDX: the class, RAX: return value.
+// RCX: scratch, r8: Thread::Current().
 MACRO1(ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH, slowPathLabel)
     ALLOC_OBJECT_INITIALIZED_TLAB_FAST_PATH(RAW_VAR(slowPathLabel))
 END_MACRO
 
 // The fast path code for art_quick_alloc_object_initialized_region_tlab.
 //
-// RDI: the class, RSI: ArtMethod*, RAX: return value.
-// RCX, RSI, RDX: scratch, r8: Thread::Current().
+// RDI: type_idx, RSI: ArtMethod*, RDX/EDX: the class, RAX: return value.
+// RCX: scratch, r8: Thread::Current().
 MACRO1(ALLOC_OBJECT_INITIALIZED_TLAB_FAST_PATH, slowPathLabel)
     movq %gs:THREAD_SELF_OFFSET, %r8                           // r8 = thread
-    movl MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET(%rdi), %ecx // Load the object size.
+    movl MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET(%rdx), %ecx // Load the object size.
     movq THREAD_LOCAL_POS_OFFSET(%r8), %rax
     addq %rax, %rcx                                            // Add size to pos, note that these
                                                                // are both 32 bit ints, overflow
@@ -1113,8 +1120,8 @@
                                                                // Store the class pointer in the
                                                                // header.
                                                                // No fence needed for x86.
-    POISON_HEAP_REF edi
-    movl %edi, MIRROR_OBJECT_CLASS_OFFSET(%rax)
+    POISON_HEAP_REF edx
+    movl %edx, MIRROR_OBJECT_CLASS_OFFSET(%rax)
     ret                                                        // Fast path succeeded.
 END_MACRO
 
@@ -1157,14 +1164,12 @@
     ret                                                        // Fast path succeeded.
 END_MACRO
 
-
-// The common slow path code for art_quick_alloc_object_{resolved, initialized}_tlab
-// and art_quick_alloc_object_{resolved, initialized}_region_tlab.
+// The common slow path code for art_quick_alloc_object_tlab and art_quick_alloc_object_region_tlab.
 MACRO1(ALLOC_OBJECT_TLAB_SLOW_PATH, cxx_name)
     SETUP_SAVE_REFS_ONLY_FRAME                             // save ref containing registers for GC
     // Outgoing argument set up
-    movq %gs:THREAD_SELF_OFFSET, %rsi                      // pass Thread::Current()
-    call CALLVAR(cxx_name)                                 // cxx_name(arg0, Thread*)
+    movq %gs:THREAD_SELF_OFFSET, %rdx                      // pass Thread::Current()
+    call CALLVAR(cxx_name)                                 // cxx_name(arg0, arg1, Thread*)
     RESTORE_SAVE_REFS_ONLY_FRAME                           // restore frame up to return address
     RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER                // return or deliver exception
 END_MACRO
@@ -1179,11 +1184,26 @@
     RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER                    // return or deliver exception
 END_MACRO
 
+// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB). May be
+// called with CC if the GC is not active.
+DEFINE_FUNCTION art_quick_alloc_object_tlab
+    // RDI: uint32_t type_idx, RSI: ArtMethod*
+    // RDX, RCX, R8, R9: free. RAX: return val.
+    movq ART_METHOD_DEX_CACHE_TYPES_OFFSET_64(%rsi), %rdx  // Load dex cache resolved types array
+    // Might need to break down into multiple instructions to get the base address in a register.
+                                                               // Load the class
+    movl 0(%rdx, %rdi, COMPRESSED_REFERENCE_SIZE), %edx
+    ALLOC_OBJECT_TLAB_FAST_PATH .Lart_quick_alloc_object_tlab_slow_path
+.Lart_quick_alloc_object_tlab_slow_path:
+    ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeTLAB
+END_FUNCTION art_quick_alloc_object_tlab
+
 // A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB). May be
 // called with CC if the GC is not active.
 DEFINE_FUNCTION art_quick_alloc_object_resolved_tlab
-    // RDI: mirror::Class* klass
-    // RDX, RSI, RCX, R8, R9: free. RAX: return val.
+    // RDI: mirror::Class* klass, RSI: ArtMethod*
+    // RDX, RCX, R8, R9: free. RAX: return val.
+    movq %rdi, %rdx
     ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH .Lart_quick_alloc_object_resolved_tlab_slow_path
 .Lart_quick_alloc_object_resolved_tlab_slow_path:
     ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeResolvedTLAB
@@ -1192,8 +1212,9 @@
 // A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab, TLAB).
 // May be called with CC if the GC is not active.
 DEFINE_FUNCTION art_quick_alloc_object_initialized_tlab
-    // RDI: mirror::Class* klass
-    // RDX, RSI, RCX, R8, R9: free. RAX: return val.
+    // RDI: mirror::Class* klass, RSI: ArtMethod*
+    // RDX, RCX, R8, R9: free. RAX: return val.
+    movq %rdi, %rdx
     ALLOC_OBJECT_INITIALIZED_TLAB_FAST_PATH .Lart_quick_alloc_object_initialized_tlab_slow_path
 .Lart_quick_alloc_object_initialized_tlab_slow_path:
     ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeInitializedTLAB
@@ -1271,12 +1292,49 @@
     ALLOC_ARRAY_TLAB_SLOW_PATH artAllocArrayFromCodeResolvedRegionTLAB
 END_FUNCTION art_quick_alloc_array_resolved_region_tlab
 
+// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab, RegionTLAB).
+DEFINE_FUNCTION art_quick_alloc_object_region_tlab
+    // Fast path region tlab allocation.
+    // RDI: uint32_t type_idx, RSI: ArtMethod*
+    // RDX, RCX, R8, R9: free. RAX: return val.
+    ASSERT_USE_READ_BARRIER
+    movq ART_METHOD_DEX_CACHE_TYPES_OFFSET_64(%rsi), %rdx  // Load dex cache resolved types array
+    movl 0(%rdx, %rdi, COMPRESSED_REFERENCE_SIZE), %edx    // Load the class
+    // Null check so that we can load the lock word.
+    testl %edx, %edx
+    jz .Lart_quick_alloc_object_region_tlab_slow_path
+    // Since we have allocation entrypoint switching, we know the GC is marking.
+    // Check the mark bit, if it is 0, do the read barrier mark.
+    testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(%edx)
+    jz .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path
+.Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit:
+    // Use resolved one since we already did the null check.
+    ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH .Lart_quick_alloc_object_region_tlab_slow_path
+.Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path:
+    // The read barrier slow path. Mark the class.
+    PUSH rdi
+    PUSH rsi
+    subq LITERAL(8), %rsp // 16 byte alignment
+    // Outgoing argument set up
+    movq %rdx, %rdi                                            // Pass the class as the first param.
+    call SYMBOL(artReadBarrierMark)                            // cxx_name(mirror::Object* obj)
+    movq %rax, %rdx
+    addq LITERAL(8), %rsp
+    POP rsi
+    POP rdi
+    jmp .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit
+.Lart_quick_alloc_object_region_tlab_slow_path:
+    ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeRegionTLAB
+END_FUNCTION art_quick_alloc_object_region_tlab
+
 // A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB).
 DEFINE_FUNCTION art_quick_alloc_object_resolved_region_tlab
     // Fast path region tlab allocation.
-    // RDI: mirror::Class* klass
-    // RDX, RSI, RCX, R8, R9: free. RAX: return val.
+    // RDI: mirror::Class* klass, RSI: ArtMethod*
+    // RDX, RCX, R8, R9: free. RAX: return val.
     ASSERT_USE_READ_BARRIER
+    // No read barrier since the caller is responsible for that.
+    movq %rdi, %rdx
     ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH .Lart_quick_alloc_object_resolved_region_tlab_slow_path
 .Lart_quick_alloc_object_resolved_region_tlab_slow_path:
     ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeResolvedRegionTLAB
@@ -1285,9 +1343,10 @@
 // A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab, RegionTLAB).
 DEFINE_FUNCTION art_quick_alloc_object_initialized_region_tlab
     // Fast path region tlab allocation.
-    // RDI: mirror::Class* klass
-    // RDX, RSI, RCX, R8, R9: free. RAX: return val.
+    // RDI: mirror::Class* klass, RSI: ArtMethod*
+    // RDX, RCX, R8, R9: free. RAX: return val.
     ASSERT_USE_READ_BARRIER
+    movq %rdi, %rdx
     // No read barrier since the caller is responsible for that.
     ALLOC_OBJECT_INITIALIZED_TLAB_FAST_PATH .Lart_quick_alloc_object_initialized_region_tlab_slow_path
 .Lart_quick_alloc_object_initialized_region_tlab_slow_path:
diff --git a/runtime/asm_support.h b/runtime/asm_support.h
index bfdddf7..e4972da 100644
--- a/runtime/asm_support.h
+++ b/runtime/asm_support.h
@@ -98,7 +98,7 @@
 ADD_TEST_EQ(THREAD_LOCAL_END_OFFSET,
             art::Thread::ThreadLocalEndOffset<POINTER_SIZE>().Int32Value())
 // Offset of field Thread::tlsPtr_.thread_local_objects.
-#define THREAD_LOCAL_OBJECTS_OFFSET (THREAD_LOCAL_END_OFFSET + __SIZEOF_POINTER__)
+#define THREAD_LOCAL_OBJECTS_OFFSET (THREAD_LOCAL_END_OFFSET + 2 * __SIZEOF_POINTER__)
 ADD_TEST_EQ(THREAD_LOCAL_OBJECTS_OFFSET,
             art::Thread::ThreadLocalObjectsOffset<POINTER_SIZE>().Int32Value())
 // Offset of field Thread::tlsPtr_.mterp_current_ibase.
diff --git a/runtime/entrypoints/entrypoint_utils-inl.h b/runtime/entrypoints/entrypoint_utils-inl.h
index 469c45c..14c9c21 100644
--- a/runtime/entrypoints/entrypoint_utils-inl.h
+++ b/runtime/entrypoints/entrypoint_utils-inl.h
@@ -127,21 +127,43 @@
       self->GetManagedStack()->GetTopQuickFrame(), type, true /* do_caller_check */);
 }
 
-ALWAYS_INLINE inline mirror::Class* CheckObjectAlloc(mirror::Class* klass,
-                                                     Thread* self,
-                                                     bool* slow_path)
-    REQUIRES_SHARED(Locks::mutator_lock_)
-    REQUIRES(!Roles::uninterruptible_) {
-  if (UNLIKELY(!klass->IsInstantiable())) {
-    self->ThrowNewException("Ljava/lang/InstantiationError;", klass->PrettyDescriptor().c_str());
+template <const bool kAccessCheck>
+ALWAYS_INLINE
+inline mirror::Class* CheckObjectAlloc(dex::TypeIndex type_idx,
+                                       ArtMethod* method,
+                                       Thread* self,
+                                       bool* slow_path) {
+  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+  PointerSize pointer_size = class_linker->GetImagePointerSize();
+  mirror::Class* klass = method->GetDexCacheResolvedType<false>(type_idx, pointer_size);
+  if (UNLIKELY(klass == nullptr)) {
+    klass = class_linker->ResolveType(type_idx, method);
     *slow_path = true;
-    return nullptr;  // Failure
+    if (klass == nullptr) {
+      DCHECK(self->IsExceptionPending());
+      return nullptr;  // Failure
+    } else {
+      DCHECK(!self->IsExceptionPending());
+    }
   }
-  if (UNLIKELY(klass->IsClassClass())) {
-    ThrowIllegalAccessError(nullptr, "Class %s is inaccessible",
-                            klass->PrettyDescriptor().c_str());
-    *slow_path = true;
-    return nullptr;  // Failure
+  if (kAccessCheck) {
+    if (UNLIKELY(!klass->IsInstantiable())) {
+      self->ThrowNewException("Ljava/lang/InstantiationError;", klass->PrettyDescriptor().c_str());
+      *slow_path = true;
+      return nullptr;  // Failure
+    }
+    if (UNLIKELY(klass->IsClassClass())) {
+      ThrowIllegalAccessError(nullptr, "Class %s is inaccessible",
+                              klass->PrettyDescriptor().c_str());
+      *slow_path = true;
+      return nullptr;  // Failure
+    }
+    mirror::Class* referrer = method->GetDeclaringClass();
+    if (UNLIKELY(!referrer->CanAccess(klass))) {
+      ThrowIllegalAccessErrorClass(referrer, klass);
+      *slow_path = true;
+      return nullptr;  // Failure
+    }
   }
   if (UNLIKELY(!klass->IsInitialized())) {
     StackHandleScope<1> hs(self);
@@ -169,9 +191,7 @@
 ALWAYS_INLINE
 inline mirror::Class* CheckClassInitializedForObjectAlloc(mirror::Class* klass,
                                                           Thread* self,
-                                                          bool* slow_path)
-    REQUIRES_SHARED(Locks::mutator_lock_)
-    REQUIRES(!Roles::uninterruptible_) {
+                                                          bool* slow_path) {
   if (UNLIKELY(!klass->IsInitialized())) {
     StackHandleScope<1> hs(self);
     Handle<mirror::Class> h_class(hs.NewHandle(klass));
@@ -193,15 +213,18 @@
   return klass;
 }
 
-// Allocate an instance of klass. Throws InstantationError if klass is not instantiable,
-// or IllegalAccessError if klass is j.l.Class. Performs a clinit check too.
-template <bool kInstrumented>
+// Given the context of a calling Method, use its DexCache to resolve a type to a Class. If it
+// cannot be resolved, throw an error. If it can, use it to create an instance.
+// When verification/compiler hasn't been able to verify access, optionally perform an access
+// check.
+template <bool kAccessCheck, bool kInstrumented>
 ALWAYS_INLINE
-inline mirror::Object* AllocObjectFromCode(mirror::Class* klass,
+inline mirror::Object* AllocObjectFromCode(dex::TypeIndex type_idx,
+                                           ArtMethod* method,
                                            Thread* self,
                                            gc::AllocatorType allocator_type) {
   bool slow_path = false;
-  klass = CheckObjectAlloc(klass, self, &slow_path);
+  mirror::Class* klass = CheckObjectAlloc<kAccessCheck>(type_idx, method, self, &slow_path);
   if (UNLIKELY(slow_path)) {
     if (klass == nullptr) {
       return nullptr;
diff --git a/runtime/entrypoints/entrypoint_utils.h b/runtime/entrypoints/entrypoint_utils.h
index 4794610..7cc136e 100644
--- a/runtime/entrypoints/entrypoint_utils.h
+++ b/runtime/entrypoints/entrypoint_utils.h
@@ -45,10 +45,27 @@
 class ScopedObjectAccessAlreadyRunnable;
 class Thread;
 
+template <const bool kAccessCheck>
+ALWAYS_INLINE inline mirror::Class* CheckObjectAlloc(dex::TypeIndex type_idx,
+                                                     ArtMethod* method,
+                                                     Thread* self,
+                                                     bool* slow_path)
+    REQUIRES_SHARED(Locks::mutator_lock_)
+    REQUIRES(!Roles::uninterruptible_);
+
+ALWAYS_INLINE inline mirror::Class* CheckClassInitializedForObjectAlloc(mirror::Class* klass,
+                                                                        Thread* self,
+                                                                        bool* slow_path)
+    REQUIRES_SHARED(Locks::mutator_lock_)
+    REQUIRES(!Roles::uninterruptible_);
+
 // Given the context of a calling Method, use its DexCache to resolve a type to a Class. If it
 // cannot be resolved, throw an error. If it can, use it to create an instance.
-template <bool kInstrumented>
-ALWAYS_INLINE inline mirror::Object* AllocObjectFromCode(mirror::Class* klass,
+// When verification/compiler hasn't been able to verify access, optionally perform an access
+// check.
+template <bool kAccessCheck, bool kInstrumented>
+ALWAYS_INLINE inline mirror::Object* AllocObjectFromCode(dex::TypeIndex type_idx,
+                                                         ArtMethod* method,
                                                          Thread* self,
                                                          gc::AllocatorType allocator_type)
     REQUIRES_SHARED(Locks::mutator_lock_)
diff --git a/runtime/entrypoints/quick/quick_alloc_entrypoints.cc b/runtime/entrypoints/quick/quick_alloc_entrypoints.cc
index 2d06508..82bb8e5 100644
--- a/runtime/entrypoints/quick/quick_alloc_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_alloc_entrypoints.cc
@@ -29,58 +29,87 @@
 
 static constexpr bool kUseTlabFastPath = true;
 
-template <bool kInitialized,
-          bool kFinalize,
-          bool kInstrumented,
-          gc::AllocatorType allocator_type>
-static ALWAYS_INLINE inline mirror::Object* artAllocObjectFromCode(
-    mirror::Class* klass,
-    Thread* self) REQUIRES_SHARED(Locks::mutator_lock_) {
-  ScopedQuickEntrypointChecks sqec(self);
-  DCHECK(klass != nullptr);
-  if (kUseTlabFastPath && !kInstrumented && allocator_type == gc::kAllocatorTypeTLAB) {
-    if (kInitialized || klass->IsInitialized()) {
-      if (!kFinalize || !klass->IsFinalizable()) {
-        size_t byte_count = klass->GetObjectSize();
-        byte_count = RoundUp(byte_count, gc::space::BumpPointerSpace::kAlignment);
-        mirror::Object* obj;
-        if (LIKELY(byte_count < self->TlabSize())) {
-          obj = self->AllocTlab(byte_count);
-          DCHECK(obj != nullptr) << "AllocTlab can't fail";
-          obj->SetClass(klass);
-          if (kUseBakerReadBarrier) {
-            obj->AssertReadBarrierState();
-          }
-          QuasiAtomic::ThreadFenceForConstructor();
-          return obj;
-        }
-      }
-    }
-  }
-  if (kInitialized) {
-    return AllocObjectFromCodeInitialized<kInstrumented>(klass, self, allocator_type);
-  } else if (!kFinalize) {
-    return AllocObjectFromCodeResolved<kInstrumented>(klass, self, allocator_type);
-  } else {
-    return AllocObjectFromCode<kInstrumented>(klass, self, allocator_type);
-  }
-}
-
 #define GENERATE_ENTRYPOINTS_FOR_ALLOCATOR_INST(suffix, suffix2, instrumented_bool, allocator_type) \
-extern "C" mirror::Object* artAllocObjectFromCodeWithChecks##suffix##suffix2( \
-    mirror::Class* klass, Thread* self) \
+extern "C" mirror::Object* artAllocObjectFromCode ##suffix##suffix2( \
+    uint32_t type_idx, ArtMethod* method, Thread* self) \
     REQUIRES_SHARED(Locks::mutator_lock_) { \
-  return artAllocObjectFromCode<false, true, instrumented_bool, allocator_type>(klass, self); \
+  ScopedQuickEntrypointChecks sqec(self); \
+  if (kUseTlabFastPath && !(instrumented_bool) && (allocator_type) == gc::kAllocatorTypeTLAB) { \
+    mirror::Class* klass = method->GetDexCacheResolvedType<false>(dex::TypeIndex(type_idx), \
+                                                                  kRuntimePointerSize); \
+    if (LIKELY(klass != nullptr && klass->IsInitialized() && !klass->IsFinalizable())) { \
+      size_t byte_count = klass->GetObjectSize(); \
+      byte_count = RoundUp(byte_count, gc::space::BumpPointerSpace::kAlignment); \
+      mirror::Object* obj; \
+      if (LIKELY(byte_count < self->TlabSize())) { \
+        obj = self->AllocTlab(byte_count); \
+        DCHECK(obj != nullptr) << "AllocTlab can't fail"; \
+        obj->SetClass(klass); \
+        if (kUseBakerReadBarrier) { \
+          obj->AssertReadBarrierState(); \
+        } \
+        QuasiAtomic::ThreadFenceForConstructor(); \
+        return obj; \
+      } \
+    } \
+  } \
+  return AllocObjectFromCode<false, instrumented_bool>(dex::TypeIndex(type_idx), \
+                                                       method, \
+                                                       self, \
+                                                       allocator_type); \
 } \
 extern "C" mirror::Object* artAllocObjectFromCodeResolved##suffix##suffix2( \
-    mirror::Class* klass, Thread* self) \
+    mirror::Class* klass, ArtMethod* method ATTRIBUTE_UNUSED, Thread* self) \
     REQUIRES_SHARED(Locks::mutator_lock_) { \
-  return artAllocObjectFromCode<false, false, instrumented_bool, allocator_type>(klass, self); \
+  ScopedQuickEntrypointChecks sqec(self); \
+  if (kUseTlabFastPath && !(instrumented_bool) && (allocator_type) == gc::kAllocatorTypeTLAB) { \
+    if (LIKELY(klass->IsInitialized())) { \
+      size_t byte_count = klass->GetObjectSize(); \
+      byte_count = RoundUp(byte_count, gc::space::BumpPointerSpace::kAlignment); \
+      mirror::Object* obj; \
+      if (LIKELY(byte_count < self->TlabSize())) { \
+        obj = self->AllocTlab(byte_count); \
+        DCHECK(obj != nullptr) << "AllocTlab can't fail"; \
+        obj->SetClass(klass); \
+        if (kUseBakerReadBarrier) { \
+          obj->AssertReadBarrierState(); \
+        } \
+        QuasiAtomic::ThreadFenceForConstructor(); \
+        return obj; \
+      } \
+    } \
+  } \
+  return AllocObjectFromCodeResolved<instrumented_bool>(klass, self, allocator_type); \
 } \
 extern "C" mirror::Object* artAllocObjectFromCodeInitialized##suffix##suffix2( \
-    mirror::Class* klass, Thread* self) \
+    mirror::Class* klass, ArtMethod* method ATTRIBUTE_UNUSED, Thread* self) \
     REQUIRES_SHARED(Locks::mutator_lock_) { \
-  return artAllocObjectFromCode<true, false, instrumented_bool, allocator_type>(klass, self); \
+  ScopedQuickEntrypointChecks sqec(self); \
+  if (kUseTlabFastPath && !(instrumented_bool) && (allocator_type) == gc::kAllocatorTypeTLAB) { \
+    size_t byte_count = klass->GetObjectSize(); \
+    byte_count = RoundUp(byte_count, gc::space::BumpPointerSpace::kAlignment); \
+    mirror::Object* obj; \
+    if (LIKELY(byte_count < self->TlabSize())) { \
+      obj = self->AllocTlab(byte_count); \
+      DCHECK(obj != nullptr) << "AllocTlab can't fail"; \
+      obj->SetClass(klass); \
+      if (kUseBakerReadBarrier) { \
+        obj->AssertReadBarrierState(); \
+      } \
+      QuasiAtomic::ThreadFenceForConstructor(); \
+      return obj; \
+    } \
+  } \
+  return AllocObjectFromCodeInitialized<instrumented_bool>(klass, self, allocator_type); \
+} \
+extern "C" mirror::Object* artAllocObjectFromCodeWithAccessCheck##suffix##suffix2( \
+    uint32_t type_idx, ArtMethod* method, Thread* self) \
+    REQUIRES_SHARED(Locks::mutator_lock_) { \
+  ScopedQuickEntrypointChecks sqec(self); \
+  return AllocObjectFromCode<true, instrumented_bool>(dex::TypeIndex(type_idx), \
+                                                      method, \
+                                                      self, \
+                                                      allocator_type); \
 } \
 extern "C" mirror::Array* artAllocArrayFromCode##suffix##suffix2( \
     uint32_t type_idx, int32_t component_count, ArtMethod* method, Thread* self) \
@@ -191,9 +220,10 @@
 extern "C" void* art_quick_alloc_array##suffix(uint32_t, int32_t, ArtMethod* ref); \
 extern "C" void* art_quick_alloc_array_resolved##suffix(mirror::Class* klass, int32_t, ArtMethod* ref); \
 extern "C" void* art_quick_alloc_array_with_access_check##suffix(uint32_t, int32_t, ArtMethod* ref); \
-extern "C" void* art_quick_alloc_object_resolved##suffix(mirror::Class* klass); \
-extern "C" void* art_quick_alloc_object_initialized##suffix(mirror::Class* klass); \
-extern "C" void* art_quick_alloc_object_with_checks##suffix(mirror::Class* klass); \
+extern "C" void* art_quick_alloc_object##suffix(uint32_t type_idx, ArtMethod* ref); \
+extern "C" void* art_quick_alloc_object_resolved##suffix(mirror::Class* klass, ArtMethod* ref); \
+extern "C" void* art_quick_alloc_object_initialized##suffix(mirror::Class* klass, ArtMethod* ref); \
+extern "C" void* art_quick_alloc_object_with_access_check##suffix(uint32_t type_idx, ArtMethod* ref); \
 extern "C" void* art_quick_check_and_alloc_array##suffix(uint32_t, int32_t, ArtMethod* ref); \
 extern "C" void* art_quick_check_and_alloc_array_with_access_check##suffix(uint32_t, int32_t, ArtMethod* ref); \
 extern "C" void* art_quick_alloc_string_from_bytes##suffix(void*, int32_t, int32_t, int32_t); \
@@ -203,9 +233,9 @@
 extern "C" void* art_quick_alloc_array_resolved##suffix##_instrumented(mirror::Class* klass, int32_t, ArtMethod* ref); \
 extern "C" void* art_quick_alloc_array_with_access_check##suffix##_instrumented(uint32_t, int32_t, ArtMethod* ref); \
 extern "C" void* art_quick_alloc_object##suffix##_instrumented(uint32_t type_idx, ArtMethod* ref); \
-extern "C" void* art_quick_alloc_object_resolved##suffix##_instrumented(mirror::Class* klass); \
-extern "C" void* art_quick_alloc_object_initialized##suffix##_instrumented(mirror::Class* klass); \
-extern "C" void* art_quick_alloc_object_with_checks##suffix##_instrumented(mirror::Class* klass); \
+extern "C" void* art_quick_alloc_object_resolved##suffix##_instrumented(mirror::Class* klass, ArtMethod* ref); \
+extern "C" void* art_quick_alloc_object_initialized##suffix##_instrumented(mirror::Class* klass, ArtMethod* ref); \
+extern "C" void* art_quick_alloc_object_with_access_check##suffix##_instrumented(uint32_t type_idx, ArtMethod* ref); \
 extern "C" void* art_quick_check_and_alloc_array##suffix##_instrumented(uint32_t, int32_t, ArtMethod* ref); \
 extern "C" void* art_quick_check_and_alloc_array_with_access_check##suffix##_instrumented(uint32_t, int32_t, ArtMethod* ref); \
 extern "C" void* art_quick_alloc_string_from_bytes##suffix##_instrumented(void*, int32_t, int32_t, int32_t); \
@@ -216,9 +246,10 @@
     qpoints->pAllocArray = art_quick_alloc_array##suffix##_instrumented; \
     qpoints->pAllocArrayResolved = art_quick_alloc_array_resolved##suffix##_instrumented; \
     qpoints->pAllocArrayWithAccessCheck = art_quick_alloc_array_with_access_check##suffix##_instrumented; \
+    qpoints->pAllocObject = art_quick_alloc_object##suffix##_instrumented; \
     qpoints->pAllocObjectResolved = art_quick_alloc_object_resolved##suffix##_instrumented; \
     qpoints->pAllocObjectInitialized = art_quick_alloc_object_initialized##suffix##_instrumented; \
-    qpoints->pAllocObjectWithChecks = art_quick_alloc_object_with_checks##suffix##_instrumented; \
+    qpoints->pAllocObjectWithAccessCheck = art_quick_alloc_object_with_access_check##suffix##_instrumented; \
     qpoints->pCheckAndAllocArray = art_quick_check_and_alloc_array##suffix##_instrumented; \
     qpoints->pCheckAndAllocArrayWithAccessCheck = art_quick_check_and_alloc_array_with_access_check##suffix##_instrumented; \
     qpoints->pAllocStringFromBytes = art_quick_alloc_string_from_bytes##suffix##_instrumented; \
@@ -228,9 +259,10 @@
     qpoints->pAllocArray = art_quick_alloc_array##suffix; \
     qpoints->pAllocArrayResolved = art_quick_alloc_array_resolved##suffix; \
     qpoints->pAllocArrayWithAccessCheck = art_quick_alloc_array_with_access_check##suffix; \
+    qpoints->pAllocObject = art_quick_alloc_object##suffix; \
     qpoints->pAllocObjectResolved = art_quick_alloc_object_resolved##suffix; \
     qpoints->pAllocObjectInitialized = art_quick_alloc_object_initialized##suffix; \
-    qpoints->pAllocObjectWithChecks = art_quick_alloc_object_with_checks##suffix; \
+    qpoints->pAllocObjectWithAccessCheck = art_quick_alloc_object_with_access_check##suffix; \
     qpoints->pCheckAndAllocArray = art_quick_check_and_alloc_array##suffix; \
     qpoints->pCheckAndAllocArrayWithAccessCheck = art_quick_check_and_alloc_array_with_access_check##suffix; \
     qpoints->pAllocStringFromBytes = art_quick_alloc_string_from_bytes##suffix; \
diff --git a/runtime/entrypoints/quick/quick_entrypoints_list.h b/runtime/entrypoints/quick/quick_entrypoints_list.h
index 0911aeb..a1c5082 100644
--- a/runtime/entrypoints/quick/quick_entrypoints_list.h
+++ b/runtime/entrypoints/quick/quick_entrypoints_list.h
@@ -23,9 +23,10 @@
   V(AllocArray, void*, uint32_t, int32_t, ArtMethod*) \
   V(AllocArrayResolved, void*, mirror::Class*, int32_t, ArtMethod*) \
   V(AllocArrayWithAccessCheck, void*, uint32_t, int32_t, ArtMethod*) \
-  V(AllocObjectResolved, void*, mirror::Class*) \
-  V(AllocObjectInitialized, void*, mirror::Class*) \
-  V(AllocObjectWithChecks, void*, mirror::Class*) \
+  V(AllocObject, void*, uint32_t, ArtMethod*) \
+  V(AllocObjectResolved, void*, mirror::Class*, ArtMethod*) \
+  V(AllocObjectInitialized, void*, mirror::Class*, ArtMethod*) \
+  V(AllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*) \
   V(CheckAndAllocArray, void*, uint32_t, int32_t, ArtMethod*) \
   V(CheckAndAllocArrayWithAccessCheck, void*, uint32_t, int32_t, ArtMethod*) \
   V(AllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t) \
diff --git a/runtime/entrypoints_order_test.cc b/runtime/entrypoints_order_test.cc
index 6866abb..1283660 100644
--- a/runtime/entrypoints_order_test.cc
+++ b/runtime/entrypoints_order_test.cc
@@ -122,9 +122,9 @@
 
     // Skip across the entrypoints structures.
 
-    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_start, thread_local_pos, sizeof(void*));
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_pos, thread_local_end, sizeof(void*));
-    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_end, thread_local_objects, sizeof(void*));
+    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_end, thread_local_start, sizeof(void*));
+    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_start, thread_local_objects, sizeof(void*));
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_objects, mterp_current_ibase, sizeof(size_t));
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, mterp_current_ibase, mterp_default_ibase, sizeof(void*));
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, mterp_default_ibase, mterp_alt_ibase, sizeof(void*));
@@ -156,13 +156,13 @@
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pAllocArray, pAllocArrayResolved, sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pAllocArrayResolved, pAllocArrayWithAccessCheck,
                          sizeof(void*));
-    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pAllocArrayWithAccessCheck, pAllocObjectResolved,
-                         sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pAllocArrayWithAccessCheck, pAllocObject, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pAllocObject, pAllocObjectResolved, sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pAllocObjectResolved, pAllocObjectInitialized,
                          sizeof(void*));
-    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pAllocObjectInitialized, pAllocObjectWithChecks,
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pAllocObjectInitialized, pAllocObjectWithAccessCheck,
                          sizeof(void*));
-    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pAllocObjectWithChecks, pCheckAndAllocArray,
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pAllocObjectWithAccessCheck, pCheckAndAllocArray,
                          sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pCheckAndAllocArray, pCheckAndAllocArrayWithAccessCheck,
                          sizeof(void*));
diff --git a/runtime/interpreter/interpreter_switch_impl.cc b/runtime/interpreter/interpreter_switch_impl.cc
index d7dfcd4..b0d7fb2 100644
--- a/runtime/interpreter/interpreter_switch_impl.cc
+++ b/runtime/interpreter/interpreter_switch_impl.cc
@@ -508,8 +508,9 @@
             gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator();
             obj = mirror::String::AllocEmptyString<true>(self, allocator_type);
           } else {
-            obj = AllocObjectFromCode<true>(
-                c.Ptr(),
+            obj = AllocObjectFromCode<do_access_check, true>(
+                dex::TypeIndex(inst->VRegB_21c()),
+                shadow_frame.GetMethod(),
                 self,
                 Runtime::Current()->GetHeap()->GetCurrentAllocator());
           }
diff --git a/runtime/interpreter/mterp/mterp.cc b/runtime/interpreter/mterp/mterp.cc
index 369c261..c8c1563 100644
--- a/runtime/interpreter/mterp/mterp.cc
+++ b/runtime/interpreter/mterp/mterp.cc
@@ -375,9 +375,10 @@
       gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator();
       obj = mirror::String::AllocEmptyString<true>(self, allocator_type);
     } else {
-      obj = AllocObjectFromCode<true>(c,
-                                      self,
-                                      Runtime::Current()->GetHeap()->GetCurrentAllocator());
+      obj = AllocObjectFromCode<false, true>(dex::TypeIndex(inst->VRegB_21c()),
+                                             shadow_frame->GetMethod(),
+                                             self,
+                                             Runtime::Current()->GetHeap()->GetCurrentAllocator());
     }
   }
   if (UNLIKELY(obj == nullptr)) {
diff --git a/runtime/oat.h b/runtime/oat.h
index dc103e2..1fd906d 100644
--- a/runtime/oat.h
+++ b/runtime/oat.h
@@ -32,7 +32,7 @@
 class PACKED(4) OatHeader {
  public:
   static constexpr uint8_t kOatMagic[] = { 'o', 'a', 't', '\n' };
-  static constexpr uint8_t kOatVersion[] = { '0', '9', '5', '\0' };  // alloc entrypoints change
+  static constexpr uint8_t kOatVersion[] = { '0', '9', '4', '\0' };
 
   static constexpr const char* kImageLocationKey = "image-location";
   static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline";
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 33c6a40..aff12ff 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -2627,9 +2627,10 @@
   QUICK_ENTRY_POINT_INFO(pAllocArray)
   QUICK_ENTRY_POINT_INFO(pAllocArrayResolved)
   QUICK_ENTRY_POINT_INFO(pAllocArrayWithAccessCheck)
+  QUICK_ENTRY_POINT_INFO(pAllocObject)
   QUICK_ENTRY_POINT_INFO(pAllocObjectResolved)
   QUICK_ENTRY_POINT_INFO(pAllocObjectInitialized)
-  QUICK_ENTRY_POINT_INFO(pAllocObjectWithChecks)
+  QUICK_ENTRY_POINT_INFO(pAllocObjectWithAccessCheck)
   QUICK_ENTRY_POINT_INFO(pCheckAndAllocArray)
   QUICK_ENTRY_POINT_INFO(pCheckAndAllocArrayWithAccessCheck)
   QUICK_ENTRY_POINT_INFO(pAllocStringFromBytes)
diff --git a/runtime/thread.h b/runtime/thread.h
index 6308851..411d85f 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -1416,7 +1416,7 @@
       stacked_shadow_frame_record(nullptr), deoptimization_context_stack(nullptr),
       frame_id_to_shadow_frame(nullptr), name(nullptr), pthread_self(0),
       last_no_thread_suspension_cause(nullptr), checkpoint_function(nullptr),
-      thread_local_start(nullptr), thread_local_pos(nullptr), thread_local_end(nullptr),
+      thread_local_pos(nullptr), thread_local_end(nullptr), thread_local_start(nullptr),
       thread_local_objects(0), mterp_current_ibase(nullptr), mterp_default_ibase(nullptr),
       mterp_alt_ibase(nullptr), thread_local_alloc_stack_top(nullptr),
       thread_local_alloc_stack_end(nullptr), nested_signal_state(nullptr),
@@ -1540,12 +1540,12 @@
     JniEntryPoints jni_entrypoints;
     QuickEntryPoints quick_entrypoints;
 
-    // Thread-local allocation pointer. Moved here to force alignment for thread_local_pos on ARM.
-    uint8_t* thread_local_start;
     // thread_local_pos and thread_local_end must be consecutive for ldrd and are 8 byte aligned for
     // potentially better performance.
     uint8_t* thread_local_pos;
     uint8_t* thread_local_end;
+    // Thread-local allocation pointer.
+    uint8_t* thread_local_start;
 
     size_t thread_local_objects;