Inline some type checks of instanceof and checkcast in nterp.

Spotted in an app-startup profile that the type checks are hot.
Implement for arm/arm64/x64, and a reduced version on x86 due to lack of
available registers.

Test: test.py
Bug: 112676029
Change-Id: If2c2517b4012e5fc5d12b240ad3b8c28de261cec
diff --git a/runtime/interpreter/mterp/arm64ng/object.S b/runtime/interpreter/mterp/arm64ng/object.S
index 41131c2..c0e8e0a 100644
--- a/runtime/interpreter/mterp/arm64ng/object.S
+++ b/runtime/interpreter/mterp/arm64ng/object.S
@@ -1,55 +1,139 @@
 %def op_check_cast():
+%  slow_path = add_helper(lambda: op_check_cast_slow_path())
    // Fast-path which gets the class from thread-local cache.
-   EXPORT_PC
-   FETCH_FROM_THREAD_CACHE x1, 3f
-   TEST_IF_MARKING 4f
+   FETCH_FROM_THREAD_CACHE x1, 2f
 1:
    lsr     w2, wINST, #8               // w2<- A
    GET_VREG w0, w2                     // w0<- vA (object)
-   cbz     w0, 2f
-   bl      art_quick_check_instance_of
-2:
+   cbz     w0, .L${opcode}_resume
+   ldr     w2, [x0, #MIRROR_OBJECT_CLASS_OFFSET]
+   // Fast path: do a comparison without read barrier.
+   cmp     w1, w2
+   bne     ${slow_path}
+.L${opcode}_resume:
    FETCH_ADVANCE_INST 2
    GET_INST_OPCODE ip
    GOTO_OPCODE ip
-3:
+2:
+   EXPORT_PC
    mov     x0, xSELF
    ldr     x1, [sp]
    mov     x2, xPC
    bl      nterp_get_class_or_allocate_object
    mov     x1, x0
    b       1b
+
+%def op_check_cast_slow_path():
+   // We don't do read barriers for simplicity. However, this means that x1
+   // (and all other fetched objects) may be a from-space reference. Tthat's OK as
+   // we only fetch constant information from the references.
+   // This also means that some of the comparisons below may lead to false negative,
+   // but it will eventually be handled in the runtime.
+   ldr     w3, [x1, #MIRROR_CLASS_ACCESS_FLAGS_OFFSET]
+   tbnz    w3, #MIRROR_CLASS_IS_INTERFACE_FLAG_BIT, 2f
+   ldr     w3, [x1, #MIRROR_CLASS_COMPONENT_TYPE_OFFSET]
+   cbnz    w3, 5f
+1:
+   ldr     w2, [x2, #MIRROR_CLASS_SUPER_CLASS_OFFSET]
+   cmp     w1, w2
+   beq     .L${opcode}_resume
+   cbnz    w2, 1b
+2:
+   TEST_IF_MARKING 4f
+3:
+   EXPORT_PC
+   bl      art_quick_check_instance_of
+   b       .L${opcode}_resume
 4:
    bl      art_quick_read_barrier_mark_reg01
-   b       1b
+   b       3b
+5:
+   // Class in w1 is an array, w3 is the component type.
+   ldr     w2, [x2, #MIRROR_CLASS_COMPONENT_TYPE_OFFSET]
+   // Check if object is an array.
+   cbz     w2, 2b
+   ldr     w4, [x3, #MIRROR_CLASS_SUPER_CLASS_OFFSET]
+   // If the super class of the component type is not null, go slow path.
+   cbnz    w4, 2b
+   ldrh    w3, [x3, #MIRROR_CLASS_OBJECT_PRIMITIVE_TYPE_OFFSET]
+   // If the component type is primitive, go slow path.
+   cbnz    w3, 2b
+   // Check if the object is a primitive array.
+   ldrh    w2, [x2, #MIRROR_CLASS_OBJECT_PRIMITIVE_TYPE_OFFSET]
+   cbz     w2, .L${opcode}_resume
+   // Go slow path for throwing the exception.
+   b 2b
 
 %def op_instance_of():
+%  slow_path = add_helper(lambda: op_instance_of_slow_path())
    /* instance-of vA, vB, class@CCCC */
    // Fast-path which gets the class from thread-local cache.
-   EXPORT_PC
-   FETCH_FROM_THREAD_CACHE x1, 3f
-   TEST_IF_MARKING 4f
+   FETCH_FROM_THREAD_CACHE x1, 2f
 1:
    lsr     w2, wINST, #12              // w2<- B
    GET_VREG w0, w2                     // w0<- vB (object)
-   cbz     w0, 2f
-   bl      artInstanceOfFromCode
-2:
+   cbz     w0, .L${opcode}_resume
+   ldr     w2, [x0, #MIRROR_OBJECT_CLASS_OFFSET]
+   // Fast path: do a comparison without read barrier.
+   cmp     w1, w2
+   bne     ${slow_path}
+.L${opcode}_set_one:
+   mov     w0, #1
+.L${opcode}_resume:
    ubfx    w1, wINST, #8, #4           // w1<- A
    SET_VREG w0, w1
    FETCH_ADVANCE_INST 2
    GET_INST_OPCODE ip
    GOTO_OPCODE ip
-3:
+2:
+   EXPORT_PC
    mov     x0, xSELF
    ldr     x1, [sp]
    mov     x2, xPC
    bl      nterp_get_class_or_allocate_object
    mov     x1, x0
    b       1b
+
+%def op_instance_of_slow_path():
+   // Go slow path if we are marking. Checking now allows
+   // not going to slow path if the super class hierarchy check fails.
+   TEST_IF_MARKING 4f
+   ldr     w3, [x1, #MIRROR_CLASS_ACCESS_FLAGS_OFFSET]
+   tbnz    w3, #MIRROR_CLASS_IS_INTERFACE_FLAG_BIT, 4f
+   ldr     w3, [x1, #MIRROR_CLASS_COMPONENT_TYPE_OFFSET]
+   cbnz    w3, 3f
+1:
+   ldr     w2, [x2, #MIRROR_CLASS_SUPER_CLASS_OFFSET]
+   cmp     w1, w2
+   beq     .L${opcode}_set_one
+   cbnz    w2, 1b
+2:
+   mov     w0, #0
+   b       .L${opcode}_resume
+3:
+   // Class in x1 is an array, x3 is the component type of x1, and x2 is the class of the object.
+   ldr     w2, [x2, #MIRROR_CLASS_COMPONENT_TYPE_OFFSET]
+   // Check if object is an array.
+   cbz     w2, 2b
+   // Check of x1 is Object[]
+   ldr     w4, [x3, #MIRROR_CLASS_SUPER_CLASS_OFFSET]
+   // If the super class is not Object, go to slow path.
+   cbnz    w4, 5f
+   // Super class is null, this could either be a primitive array or Object[].
+   ldrh    w3, [x3, #MIRROR_CLASS_OBJECT_PRIMITIVE_TYPE_OFFSET]
+   // If x1 is a primitive array class, we know the check is false.
+   cbnz    w3, 2b
+   // Check if x2 is a primitive array class.
+   ldrh    w2, [x2, #MIRROR_CLASS_OBJECT_PRIMITIVE_TYPE_OFFSET]
+   cmp     w2, #0
+   cset    w0, eq
+   b       .L${opcode}_resume
 4:
    bl      art_quick_read_barrier_mark_reg01
-   b       1b
+5:
+   EXPORT_PC
+   bl      artInstanceOfFromCode
+   b       .L${opcode}_resume
 
 %def op_iget_boolean():
 %  op_iget(load="ldrb", volatile_load="ldarb", maybe_extend="", wide="0", is_object="0")
diff --git a/runtime/interpreter/mterp/armng/object.S b/runtime/interpreter/mterp/armng/object.S
index 50f1c4f..0012e78 100644
--- a/runtime/interpreter/mterp/armng/object.S
+++ b/runtime/interpreter/mterp/armng/object.S
@@ -1,57 +1,141 @@
 %def op_check_cast():
+%  slow_path = add_helper(lambda: op_check_cast_slow_path())
    // Fast-path which gets the class from thread-local cache.
-   EXPORT_PC
-   FETCH_FROM_THREAD_CACHE r1, 3f
-   TEST_IF_MARKING 4f
+   FETCH_FROM_THREAD_CACHE r1, 2f
 1:
    lsr     r2, rINST, #8               // r2<- A
    GET_VREG r0, r2                     // r0<- vA (object)
    cmp     r0, #0
-   beq     2f
-   bl      art_quick_check_instance_of
-2:
+   beq     .L${opcode}_resume
+   ldr     r2, [r0, #MIRROR_OBJECT_CLASS_OFFSET]
+   // Fast path: do a comparison without read barrier.
+   cmp     r1, r2
+   bne     ${slow_path}
+.L${opcode}_resume:
    FETCH_ADVANCE_INST 2
    GET_INST_OPCODE ip
    GOTO_OPCODE ip
-3:
+2:
+   EXPORT_PC
    mov     r0, rSELF
    ldr     r1, [sp]
    mov     r2, rPC
    bl      nterp_get_class_or_allocate_object
    mov     r1, r0
    b       1b
+
+%def op_check_cast_slow_path():
+   ldr     r3, [r1, #MIRROR_CLASS_ACCESS_FLAGS_OFFSET]
+   tst     r3, #MIRROR_CLASS_IS_INTERFACE_FLAG
+   bne     2f
+   ldr     r3, [r1, #MIRROR_CLASS_COMPONENT_TYPE_OFFSET]
+   cmp     r3, #0
+   bne     5f
+1:
+   ldr     r2, [r2, #MIRROR_CLASS_SUPER_CLASS_OFFSET]
+   cmp     r1, r2
+   beq     .L${opcode}_resume
+   cmp     r2, #0
+   bne     1b
+2:
+   TEST_IF_MARKING 4f
+3:
+   EXPORT_PC
+   bl      art_quick_check_instance_of
+   b       .L${opcode}_resume
 4:
    bl      art_quick_read_barrier_mark_reg01
-   b       1b
+   b       3b
+5:
+   // Class in r1 is an array, r3 is the component type.
+   ldr     r2, [r2, #MIRROR_CLASS_COMPONENT_TYPE_OFFSET]
+   // Check if object is an array.
+   cmp     r2, #0
+   beq     2b
+   ldr     r4, [r3, #MIRROR_CLASS_SUPER_CLASS_OFFSET]
+   cmp     r4, #0
+   // If the super class of the component type is not null, go slow path.
+   bne     2b
+   ldrh    r3, [r3, #MIRROR_CLASS_OBJECT_PRIMITIVE_TYPE_OFFSET]
+   // Check if the object is a primitive array.
+   ldrh    r2, [r2, #MIRROR_CLASS_OBJECT_PRIMITIVE_TYPE_OFFSET]
+   orrs    r2, r3
+   beq     .L${opcode}_resume
+   // Go slow path for throwing the exception.
+   b 2b
 
 %def op_instance_of():
+%  slow_path = add_helper(lambda: op_instance_of_slow_path())
    /* instance-of vA, vB, class@CCCC */
    // Fast-path which gets the class from thread-local cache.
-   EXPORT_PC
-   FETCH_FROM_THREAD_CACHE r1, 3f
-   TEST_IF_MARKING 4f
+   FETCH_FROM_THREAD_CACHE r1, 2f
 1:
    lsr     r2, rINST, #12              // r2<- B
    GET_VREG r0, r2                     // r0<- vB (object)
    cmp     r0, #0
-   beq     2f
-   bl      artInstanceOfFromCode
-2:
+   beq     .L${opcode}_resume
+   ldr     r2, [r0, #MIRROR_OBJECT_CLASS_OFFSET]
+   // Fast path: do a comparison without read barrier.
+   cmp     r1, r2
+   bne     ${slow_path}
+.L${opcode}_set_one:
+   mov     r0, #1
+.L${opcode}_resume:
    ubfx    r1, rINST, #8, #4           // r1<- A
    SET_VREG r0, r1
    FETCH_ADVANCE_INST 2
    GET_INST_OPCODE ip
    GOTO_OPCODE ip
-3:
+2:
+   EXPORT_PC
    mov     r0, rSELF
    ldr     r1, [sp]
    mov     r2, rPC
    bl      nterp_get_class_or_allocate_object
    mov     r1, r0
    b       1b
+
+%def op_instance_of_slow_path():
+   // Go slow path if we are marking. Checking now allows
+   // not going to slow path if the super class hierarchy check fails.
+   TEST_IF_MARKING 4f
+   ldr     r3, [r1, #MIRROR_CLASS_ACCESS_FLAGS_OFFSET]
+   tst     r3, #MIRROR_CLASS_IS_INTERFACE_FLAG
+   bne     4f
+   ldr     r3, [r1, #MIRROR_CLASS_COMPONENT_TYPE_OFFSET]
+   cmp     r3, #0
+   bne     3f
+1:
+   ldr     r2, [r2, #MIRROR_CLASS_SUPER_CLASS_OFFSET]
+   cmp     r1, r2
+   beq     .L${opcode}_set_one
+   cmp     r2, #0
+   bne     1b
+2:
+   mov     r0, #0
+   b       .L${opcode}_resume
+3:
+   // Class in r1 is an array, r3 is the component type.
+   ldr     r2, [r2, #MIRROR_CLASS_COMPONENT_TYPE_OFFSET]
+   // Check if object is an array.
+   cmp     r2, #0
+   beq     2b
+   ldr     r4, [r3, #MIRROR_CLASS_SUPER_CLASS_OFFSET]
+   cmp     r4, #0
+   bne     5f
+   ldrh    r3, [r3, #MIRROR_CLASS_OBJECT_PRIMITIVE_TYPE_OFFSET]
+   // Check if the object is a primitive array.
+   ldrh    r2, [r2, #MIRROR_CLASS_OBJECT_PRIMITIVE_TYPE_OFFSET]
+   orr     r0, r2, r3
+   clz     r0, r0
+   lsrs    r0, r0, #5
+   b       .L${opcode}_resume
 4:
    bl      art_quick_read_barrier_mark_reg01
-   b       1b
+5:
+   EXPORT_PC
+   bl      artInstanceOfFromCode
+   b       .L${opcode}_resume
 
 %def op_iget_boolean():
 %  op_iget(load="ldrb", wide="0", is_object="0")
diff --git a/runtime/interpreter/mterp/x86_64ng/main.S b/runtime/interpreter/mterp/x86_64ng/main.S
index 4980007..8ba3e3d 100644
--- a/runtime/interpreter/mterp/x86_64ng/main.S
+++ b/runtime/interpreter/mterp/x86_64ng/main.S
@@ -2170,64 +2170,6 @@
 NterpGetInstanceField:
   OP_IGET load="movl", wide=0
 
-NterpInstanceOf:
-    /* instance-of vA, vB, class@CCCC */
-   // Fast-path which gets the class from thread-local cache.
-   EXPORT_PC
-   FETCH_FROM_THREAD_CACHE %rsi, 2f
-   cmpq $$0, rSELF:THREAD_READ_BARRIER_MARK_REG00_OFFSET
-   jne 5f
-1:
-   movzbl  rINSTbl,%edi
-   sarl    $$4,%edi                          # edi<- B
-   GET_VREG %edi %rdi                        # edi<- vB (object)
-   andb    $$0xf,rINSTbl                     # rINST<- A
-   testl %edi, %edi
-   je 3f
-   call art_quick_instance_of
-   SET_VREG %eax, rINSTq            # fp[A] <- value
-4:
-   ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
-3:
-   SET_VREG %edi, rINSTq            # fp[A] <-0
-   jmp 4b
-2:
-   movq rSELF:THREAD_SELF_OFFSET, %rdi
-   movq 0(%rsp), %rsi
-   movq rPC, %rdx
-   call nterp_get_class_or_allocate_object
-   movq %rax, %rsi
-   jmp 1b
-5:
-   // 06 is %rsi
-   call art_quick_read_barrier_mark_reg06
-   jmp 1b
-
-NterpCheckCast:
-   // Fast-path which gets the class from thread-local cache.
-   EXPORT_PC
-   FETCH_FROM_THREAD_CACHE %rsi, 3f
-   cmpq $$0, rSELF:THREAD_READ_BARRIER_MARK_REG00_OFFSET
-   jne 4f
-1:
-   GET_VREG %edi, rINSTq
-   testl %edi, %edi
-   je 2f
-   call art_quick_check_instance_of
-2:
-   ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
-3:
-   movq rSELF:THREAD_SELF_OFFSET, %rdi
-   movq 0(%rsp), %rsi
-   movq rPC, %rdx
-   call nterp_get_class_or_allocate_object
-   movq %rax, %rsi
-   jmp 1b
-4:
-   // 06 is %rsi
-   call art_quick_read_barrier_mark_reg06
-   jmp 1b
-
 NterpHandleHotnessOverflow:
     leaq (rPC, rINSTq, 2), %rsi
     movq rFP, %rdx
diff --git a/runtime/interpreter/mterp/x86_64ng/object.S b/runtime/interpreter/mterp/x86_64ng/object.S
index 2b4e12c..9e1628c 100644
--- a/runtime/interpreter/mterp/x86_64ng/object.S
+++ b/runtime/interpreter/mterp/x86_64ng/object.S
@@ -1,5 +1,63 @@
 %def op_check_cast():
-  jmp NterpCheckCast
+%  slow_path = add_helper(lambda: op_check_cast_slow_path())
+   // Fast-path which gets the class from thread-local cache.
+   FETCH_FROM_THREAD_CACHE %rsi, 2f
+1:
+   GET_VREG %edi, rINSTq
+   testl %edi, %edi
+   je .L${opcode}_resume
+   // Fast path without read barriers.
+   cmpl MIRROR_OBJECT_CLASS_OFFSET(%edi), %esi
+   jne ${slow_path}
+.L${opcode}_resume:
+   ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+2:
+   EXPORT_PC
+   movq rSELF:THREAD_SELF_OFFSET, %rdi
+   movq 0(%rsp), %rsi
+   movq rPC, %rdx
+   call nterp_get_class_or_allocate_object
+   movq %rax, %rsi
+   jmp 1b
+
+%def op_check_cast_slow_path():
+   testl $$MIRROR_CLASS_IS_INTERFACE_FLAG, MIRROR_CLASS_ACCESS_FLAGS_OFFSET(%rsi)
+   jne 2f
+   movl MIRROR_OBJECT_CLASS_OFFSET(%edi), %eax
+   cmpl $$0, MIRROR_CLASS_COMPONENT_TYPE_OFFSET(%rsi)
+   jne 2f
+1:
+   movl MIRROR_CLASS_SUPER_CLASS_OFFSET(%eax), %eax
+   cmpl %eax, %esi
+   je .L${opcode}_resume
+   testl %eax, %eax
+   jne 1b
+2:
+   cmpq $$0, rSELF:THREAD_READ_BARRIER_MARK_REG00_OFFSET
+   jne 4f
+3:
+   EXPORT_PC
+   call art_quick_check_instance_of
+   jmp .L${opcode}_resume
+4:
+   // 06 is %rsi
+   call art_quick_read_barrier_mark_reg06
+   jmp 3b
+5:
+   movl MIRROR_CLASS_COMPONENT_TYPE_OFFSET(%eax), %eax
+   // Check if object is an array.
+   testl %eax, %eax
+   je 2b
+   movl MIRROR_CLASS_COMPONENT_TYPE_OFFSET(%esi), %ecx
+   cmpl $$0, MIRROR_CLASS_SUPER_CLASS_OFFSET(%ecx)
+   jne 2b
+   cmpw $$0, MIRROR_CLASS_OBJECT_PRIMITIVE_TYPE_OFFSET(%ecx)
+   jne 2b
+   // %ecx is Object[]
+   // Check if the object is a primitive array.
+   cmpw $$0, MIRROR_CLASS_OBJECT_PRIMITIVE_TYPE_OFFSET(%eax)
+   je .L${opcode}_resume
+   jmp 2b
 
 %def op_iget_boolean():
    jmp NterpGetBooleanInstanceField
@@ -20,7 +78,80 @@
    jmp NterpGetWideInstanceField
 
 %def op_instance_of():
-   jmp NterpInstanceOf
+%  slow_path = add_helper(lambda: op_instance_of_slow_path())
+    /* instance-of vA, vB, class@CCCC */
+   // Fast-path which gets the class from thread-local cache.
+   FETCH_FROM_THREAD_CACHE %rsi, .L${opcode}_init
+.L${opcode}_start:
+   movzbl  rINSTbl,%edi
+   sarl    $$4,%edi                          # edi<- B
+   GET_VREG %edi %rdi                        # edi<- vB (object)
+   andb    $$0xf,rINSTbl                     # rINST<- A
+   testl %edi, %edi
+   je .L${opcode}_set_vreg
+   // Fast path without read barriers.
+   cmpl MIRROR_OBJECT_CLASS_OFFSET(%edi), %esi
+   jne ${slow_path}
+.L${opcode}_set_one:
+   movl $$1, %edi
+.L${opcode}_set_vreg:
+   SET_VREG %edi, rINSTq
+.L${opcode}_resume:
+   ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+
+%def op_instance_of_slow_path():
+   // Go slow path if we are marking. Checking now allows
+   // not going to slow path if the super class hierarchy check fails.
+   cmpq $$0, rSELF:THREAD_READ_BARRIER_MARK_REG00_OFFSET
+   jne 4f
+   testl $$MIRROR_CLASS_IS_INTERFACE_FLAG, MIRROR_CLASS_ACCESS_FLAGS_OFFSET(%rsi)
+   jne 5f
+   movl MIRROR_OBJECT_CLASS_OFFSET(%edi), %eax
+   cmpl $$0, MIRROR_CLASS_COMPONENT_TYPE_OFFSET(%rsi)
+   jne 3f
+1:
+   movl MIRROR_CLASS_SUPER_CLASS_OFFSET(%eax), %eax
+   cmpl %eax, %esi
+   je .L${opcode}_set_one
+   testl %eax, %eax
+   jne 1b
+2:
+   SET_VREG $$0, rINSTq            # fp[A] <- value
+   jmp       .L${opcode}_resume
+3:
+   movl MIRROR_CLASS_COMPONENT_TYPE_OFFSET(%eax), %eax
+   // Check if object is an array.
+   testl %eax, %eax
+   je 2b
+   movl MIRROR_CLASS_COMPONENT_TYPE_OFFSET(%esi), %ecx
+   cmpl $$0, MIRROR_CLASS_SUPER_CLASS_OFFSET(%ecx)
+   jne 5f
+   cmpw $$0, MIRROR_CLASS_OBJECT_PRIMITIVE_TYPE_OFFSET(%ecx)
+   jne 2b
+   // %ecx is Object[]
+   // Check if the object is a primitive array.
+   xorl %ecx, %ecx
+   cmpw $$0, MIRROR_CLASS_OBJECT_PRIMITIVE_TYPE_OFFSET(%eax)
+   sete %cl
+   SET_VREG %ecx, rINSTq
+   jmp .L${opcode}_resume
+4:
+   // 06 is %rsi
+   call art_quick_read_barrier_mark_reg06
+5:
+   EXPORT_PC
+   call artInstanceOfFromCode
+   SET_VREG %eax, rINSTq            # fp[A] <- value
+   jmp .L${opcode}_resume
+
+.L${opcode}_init:
+   EXPORT_PC
+   movq rSELF:THREAD_SELF_OFFSET, %rdi
+   movq 0(%rsp), %rsi
+   movq rPC, %rdx
+   call nterp_get_class_or_allocate_object
+   movq %rax, %rsi
+   jmp .L${opcode}_start
 
 %def op_iget():
    jmp NterpGetInstanceField
diff --git a/runtime/interpreter/mterp/x86ng/main.S b/runtime/interpreter/mterp/x86ng/main.S
index 7cc4126..b785f24 100644
--- a/runtime/interpreter/mterp/x86ng/main.S
+++ b/runtime/interpreter/mterp/x86ng/main.S
@@ -2233,65 +2233,6 @@
 NterpGetInstanceField:
   OP_IGET load="movl", wide=0
 
-NterpInstanceOf:
-   /* instance-of vA, vB, class@CCCC */
-   // Fast-path which gets the class from thread-local cache.
-   EXPORT_PC
-   FETCH_FROM_THREAD_CACHE %ecx, 2f
-   cmpl $$0, rSELF:THREAD_READ_BARRIER_MARK_REG00_OFFSET
-   jne 5f
-1:
-   movzbl  rINSTbl, %eax
-   sarl    $$4,%eax                          # eax<- B
-   GET_VREG %eax %eax                        # eax<- vB (object)
-   testl %eax, %eax
-   je 3f
-   call art_quick_instance_of
-   RESTORE_IBASE
-   FETCH_INST_CLEAR_OPCODE
-3:
-   andb    $$0xf,rINSTbl                     # rINST<- A
-   SET_VREG %eax, rINST                      # fp[A] <- value
-4:
-   ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
-2:
-   movl rSELF:THREAD_SELF_OFFSET, ARG0
-   movl 0(%esp), ARG1
-   movl rPC, ARG2
-   call nterp_get_class_or_allocate_object
-   movl %eax, %ecx
-   jmp 1b
-5:
-   // 01 is %ecx
-   call art_quick_read_barrier_mark_reg01
-   jmp 1b
-
-NterpCheckCast:
-   // Fast-path which gets the class from thread-local cache.
-   EXPORT_PC
-   FETCH_FROM_THREAD_CACHE %ecx, 3f
-   cmpl $$0, rSELF:THREAD_READ_BARRIER_MARK_REG00_OFFSET
-   jne 4f
-1:
-   GET_VREG %eax, rINST
-   testl %eax, %eax
-   je 2f
-   call art_quick_check_instance_of
-   RESTORE_IBASE
-2:
-   ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
-3:
-   movl rSELF:THREAD_SELF_OFFSET, ARG0
-   movl 0(%esp), ARG1
-   movl rPC, ARG2
-   call nterp_get_class_or_allocate_object
-   movl %eax, %ecx
-   jmp 1b
-4:
-   // 01 is %ecx
-   call art_quick_read_barrier_mark_reg01
-   jmp 1b
-
 NterpCallSuspend:
     EXPORT_PC
     // Save branch offset.
diff --git a/runtime/interpreter/mterp/x86ng/object.S b/runtime/interpreter/mterp/x86ng/object.S
index 31c3fc3..395733f 100644
--- a/runtime/interpreter/mterp/x86ng/object.S
+++ b/runtime/interpreter/mterp/x86ng/object.S
@@ -1,8 +1,91 @@
 %def op_check_cast():
-  jmp NterpCheckCast
+%  slow_path = add_helper(lambda: op_check_cast_slow_path())
+   // Fast-path which gets the class from thread-local cache.
+   FETCH_FROM_THREAD_CACHE %ecx, 3f
+1:
+   GET_VREG %eax, rINST
+   testl %eax, %eax
+   je .L${opcode}_resume
+   // Fast path without read barriers.
+   cmpl MIRROR_OBJECT_CLASS_OFFSET(%eax), %ecx
+   jne ${slow_path}
+.L${opcode}_resume:
+   ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+3:
+   EXPORT_PC
+   movl rSELF:THREAD_SELF_OFFSET, ARG0
+   movl 0(%esp), ARG1
+   movl rPC, ARG2
+   call nterp_get_class_or_allocate_object
+   movl %eax, %ecx
+   jmp 1b
+
+%def op_check_cast_slow_path():
+   cmpl $$0, rSELF:THREAD_READ_BARRIER_MARK_REG00_OFFSET
+   jne 2f
+1:
+   EXPORT_PC
+   call art_quick_check_instance_of
+   RESTORE_IBASE
+   jmp .L${opcode}_resume
+2:
+   // 01 is %ecx
+   call art_quick_read_barrier_mark_reg01
+   jmp 1b
 
 %def op_instance_of():
-   jmp NterpInstanceOf
+%  slow_path = add_helper(lambda: op_instance_of_slow_path())
+   /* instance-of vA, vB, class@CCCC */
+   // Fast-path which gets the class from thread-local cache.
+   FETCH_FROM_THREAD_CACHE %ecx, 2f
+1:
+   movzbl  rINSTbl, %eax
+   sarl    $$4,%eax                          # eax<- B
+   GET_VREG %eax %eax                        # eax<- vB (object)
+   testl %eax, %eax
+   je .L${opcode}_resume
+   // Fast path without read barriers.
+   cmpl MIRROR_OBJECT_CLASS_OFFSET(%eax), %ecx
+   jne ${slow_path}
+.L${opcode}_set_one:
+   movl $$1, %eax
+.L${opcode}_resume:
+   andb    $$0xf,rINSTbl                     # rINST<- A
+   SET_VREG %eax, rINST                      # fp[A] <- value
+   ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
+2:
+   EXPORT_PC
+   movl rSELF:THREAD_SELF_OFFSET, ARG0
+   movl 0(%esp), ARG1
+   movl rPC, ARG2
+   call nterp_get_class_or_allocate_object
+   movl %eax, %ecx
+   jmp 1b
+
+%def op_instance_of_slow_path():
+   cmpl $$0, rSELF:THREAD_READ_BARRIER_MARK_REG00_OFFSET
+   jne 2f
+   testl $$MIRROR_CLASS_IS_INTERFACE_FLAG, MIRROR_CLASS_ACCESS_FLAGS_OFFSET(%ecx)
+   jne 3f
+   cmpl $$0, MIRROR_CLASS_COMPONENT_TYPE_OFFSET(%ecx)
+   jne 3f
+   movl MIRROR_OBJECT_CLASS_OFFSET(%eax), %eax
+1:
+   movl MIRROR_CLASS_SUPER_CLASS_OFFSET(%eax), %eax
+   cmpl %eax, %ecx
+   je .L${opcode}_set_one
+   testl %eax, %eax
+   jne 1b
+   jmp .L${opcode}_resume
+2:
+   // 01 is %ecx
+   call art_quick_read_barrier_mark_reg01
+3:
+   EXPORT_PC
+   call art_quick_instance_of
+   RESTORE_IBASE
+   FETCH_INST_CLEAR_OPCODE
+   jmp .L${opcode}_resume
 
 %def op_iget_boolean():
    jmp NterpGetBooleanInstanceField
diff --git a/tools/cpp-define-generator/mirror_class.def b/tools/cpp-define-generator/mirror_class.def
index 6df6c41..8cfd54e 100644
--- a/tools/cpp-define-generator/mirror_class.def
+++ b/tools/cpp-define-generator/mirror_class.def
@@ -44,3 +44,8 @@
            art::mirror::Class::ImtPtrOffset(art::PointerSize::k32).Int32Value())
 ASM_DEFINE(MIRROR_CLASS_IMT_PTR_OFFSET_64,
            art::mirror::Class::ImtPtrOffset(art::PointerSize::k64).Int32Value())
+ASM_DEFINE(MIRROR_CLASS_SUPER_CLASS_OFFSET,
+           art::mirror::Class::SuperClassOffset().Int32Value())
+ASM_DEFINE(MIRROR_CLASS_IS_INTERFACE_FLAG, art::kAccInterface)
+ASM_DEFINE(MIRROR_CLASS_IS_INTERFACE_FLAG_BIT,
+           art::WhichPowerOf2(art::kAccInterface))