Faster class retrieval in nterp.

Split `NterpGetClassOrAllocateObject` to two individual
functions to avoid a slow `switch` and an `if`.

Also deduplicate exception throwing code in nterp.

Test: testrunner.py --host --interpreter --jit
Test: testrunner.py --target --interpreter --jit
Change-Id: I927af8f534c43d0bd760f555c0de1dd220f7df93
diff --git a/runtime/arch/x86_64/asm_support_x86_64.S b/runtime/arch/x86_64/asm_support_x86_64.S
index bfec8c0..a2aa686 100644
--- a/runtime/arch/x86_64/asm_support_x86_64.S
+++ b/runtime/arch/x86_64/asm_support_x86_64.S
@@ -485,11 +485,10 @@
 END_MACRO
 
 MACRO0(RETURN_OR_DELIVER_PENDING_EXCEPTION)
-    movq %gs:THREAD_EXCEPTION_OFFSET, %rcx // get exception field
-    testq %rcx, %rcx               // rcx == 0 ?
-    jnz 1f                         // if rcx != 0 goto 1
-    ret                            // return
-1:                                 // deliver exception on current thread
+    cmpq MACRO_LITERAL(0), %gs:THREAD_EXCEPTION_OFFSET  // compare exception field with 0
+    jne 1f                                              // if exception != 0 goto 1
+    ret                                                 // return
+1:                                                      // deliver exception on current thread
     DELIVER_PENDING_EXCEPTION
 END_MACRO
 
diff --git a/runtime/interpreter/mterp/arm64ng/array.S b/runtime/interpreter/mterp/arm64ng/array.S
index 6863662..9edbb22 100644
--- a/runtime/interpreter/mterp/arm64ng/array.S
+++ b/runtime/interpreter/mterp/arm64ng/array.S
@@ -179,7 +179,7 @@
    mov x0, xSELF
    ldr x1, [sp, 0]
    mov x2, xPC
-   bl nterp_get_class_or_allocate_object
+   bl nterp_get_class
    b 1b
 3:
    bl art_quick_read_barrier_mark_reg00
diff --git a/runtime/interpreter/mterp/arm64ng/main.S b/runtime/interpreter/mterp/arm64ng/main.S
index 0ee0435..99816c6 100644
--- a/runtime/interpreter/mterp/arm64ng/main.S
+++ b/runtime/interpreter/mterp/arm64ng/main.S
@@ -273,7 +273,9 @@
   bl \helper
   RESTORE_SAVE_REFS_ONLY_FRAME
   REFRESH_MARKING_REGISTER
-  RETURN_OR_DELIVER_PENDING_EXCEPTION
+  ldr xIP0, [xSELF, # THREAD_EXCEPTION_OFFSET]   // Get exception field.
+  cbnz xIP0, nterp_deliver_pending_exception
+  ret
 END \name
 .endm
 
@@ -1910,11 +1912,16 @@
 NTERP_TRAMPOLINE nterp_get_instance_field_offset, NterpGetInstanceFieldOffset
 NTERP_TRAMPOLINE nterp_filled_new_array, NterpFilledNewArray
 NTERP_TRAMPOLINE nterp_filled_new_array_range, NterpFilledNewArrayRange
-NTERP_TRAMPOLINE nterp_get_class_or_allocate_object, NterpGetClassOrAllocateObject
+NTERP_TRAMPOLINE nterp_get_class, NterpGetClass
+NTERP_TRAMPOLINE nterp_allocate_object, NterpAllocateObject
 NTERP_TRAMPOLINE nterp_get_method, NterpGetMethod
 NTERP_TRAMPOLINE nterp_hot_method, NterpHotMethod
 NTERP_TRAMPOLINE nterp_load_object, NterpLoadObject
 
+ENTRY nterp_deliver_pending_exception
+    DELIVER_PENDING_EXCEPTION
+END nterp_deliver_pending_exception
+
 // gen_mterp.py will inline the following definitions
 // within [ExecuteNterpImpl, EndExecuteNterpImpl).
 %def instruction_end():
diff --git a/runtime/interpreter/mterp/arm64ng/object.S b/runtime/interpreter/mterp/arm64ng/object.S
index df044d9..b843fcd 100644
--- a/runtime/interpreter/mterp/arm64ng/object.S
+++ b/runtime/interpreter/mterp/arm64ng/object.S
@@ -19,7 +19,7 @@
    mov     x0, xSELF
    ldr     x1, [sp]
    mov     x2, xPC
-   bl      nterp_get_class_or_allocate_object
+   bl      nterp_get_class
    mov     x1, x0
    b       1b
 
@@ -90,7 +90,7 @@
    mov     x0, xSELF
    ldr     x1, [sp]
    mov     x2, xPC
-   bl      nterp_get_class_or_allocate_object
+   bl      nterp_get_class
    mov     x1, x0
    b       1b
 
@@ -476,7 +476,7 @@
    mov     x0, xSELF
    ldr     x1, [sp]
    mov     x2, xPC
-   bl      nterp_get_class_or_allocate_object
+   bl      nterp_allocate_object
    b       1b
 3:
    bl      art_quick_read_barrier_mark_reg00
diff --git a/runtime/interpreter/mterp/arm64ng/other.S b/runtime/interpreter/mterp/arm64ng/other.S
index 1feafd5..3470ee8 100644
--- a/runtime/interpreter/mterp/arm64ng/other.S
+++ b/runtime/interpreter/mterp/arm64ng/other.S
@@ -66,7 +66,7 @@
    b 1b
 
 %def op_const_class():
-%  op_const_object(jumbo="0", helper="nterp_get_class_or_allocate_object")
+%  op_const_object(jumbo="0", helper="nterp_get_class")
 
 %def op_const_method_handle():
 %  op_const_object(jumbo="0")
diff --git a/runtime/interpreter/mterp/armng/array.S b/runtime/interpreter/mterp/armng/array.S
index 4ab418c..49c4e38 100644
--- a/runtime/interpreter/mterp/armng/array.S
+++ b/runtime/interpreter/mterp/armng/array.S
@@ -186,7 +186,7 @@
    mov r0, rSELF
    ldr r1, [sp]
    mov r2, rPC
-   bl nterp_get_class_or_allocate_object
+   bl nterp_get_class
    b 1b
 3:
    bl art_quick_read_barrier_mark_reg00
diff --git a/runtime/interpreter/mterp/armng/main.S b/runtime/interpreter/mterp/armng/main.S
index 39cf537..9b739b0 100644
--- a/runtime/interpreter/mterp/armng/main.S
+++ b/runtime/interpreter/mterp/armng/main.S
@@ -284,7 +284,10 @@
   bl \helper
   RESTORE_SAVE_REFS_ONLY_FRAME
   REFRESH_MARKING_REGISTER
-  RETURN_OR_DELIVER_PENDING_EXCEPTION
+  ldr ip, [rSELF, #THREAD_EXCEPTION_OFFSET]  @ Get exception field.
+  cmp ip, #0
+  bne nterp_deliver_pending_exception
+  bx lr
 END \name
 .endm
 
@@ -1991,11 +1994,16 @@
 NTERP_TRAMPOLINE nterp_get_instance_field_offset, NterpGetInstanceFieldOffset
 NTERP_TRAMPOLINE nterp_filled_new_array, NterpFilledNewArray
 NTERP_TRAMPOLINE nterp_filled_new_array_range, NterpFilledNewArrayRange
-NTERP_TRAMPOLINE nterp_get_class_or_allocate_object, NterpGetClassOrAllocateObject
+NTERP_TRAMPOLINE nterp_get_class, NterpGetClass
+NTERP_TRAMPOLINE nterp_allocate_object, NterpAllocateObject
 NTERP_TRAMPOLINE nterp_get_method, NterpGetMethod
 NTERP_TRAMPOLINE nterp_hot_method, NterpHotMethod
 NTERP_TRAMPOLINE nterp_load_object, NterpLoadObject
 
+ENTRY nterp_deliver_pending_exception
+    DELIVER_PENDING_EXCEPTION
+END nterp_deliver_pending_exception
+
 // gen_mterp.py will inline the following definitions
 // within [ExecuteNterpImpl, EndExecuteNterpImpl).
 %def instruction_end():
diff --git a/runtime/interpreter/mterp/armng/object.S b/runtime/interpreter/mterp/armng/object.S
index 7deffaf..984565c 100644
--- a/runtime/interpreter/mterp/armng/object.S
+++ b/runtime/interpreter/mterp/armng/object.S
@@ -20,7 +20,7 @@
    mov     r0, rSELF
    ldr     r1, [sp]
    mov     r2, rPC
-   bl      nterp_get_class_or_allocate_object
+   bl      nterp_get_class
    mov     r1, r0
    b       1b
 
@@ -91,7 +91,7 @@
    mov     r0, rSELF
    ldr     r1, [sp]
    mov     r2, rPC
-   bl      nterp_get_class_or_allocate_object
+   bl      nterp_get_class
    mov     r1, r0
    b       1b
 
@@ -523,7 +523,7 @@
    mov     r0, rSELF
    ldr     r1, [sp]
    mov     r2, rPC
-   bl      nterp_get_class_or_allocate_object
+   bl      nterp_allocate_object
    b       1b
 3:
    bl      art_quick_read_barrier_mark_reg00
diff --git a/runtime/interpreter/mterp/armng/other.S b/runtime/interpreter/mterp/armng/other.S
index 3376808..7dfed62 100644
--- a/runtime/interpreter/mterp/armng/other.S
+++ b/runtime/interpreter/mterp/armng/other.S
@@ -67,7 +67,7 @@
    b 1b
 
 %def op_const_class():
-%  op_const_object(jumbo="0", helper="nterp_get_class_or_allocate_object")
+%  op_const_object(jumbo="0", helper="nterp_get_class")
 
 %def op_const_method_handle():
 %  op_const_object(jumbo="0")
diff --git a/runtime/interpreter/mterp/nterp.cc b/runtime/interpreter/mterp/nterp.cc
index 753dcc1..2498319 100644
--- a/runtime/interpreter/mterp/nterp.cc
+++ b/runtime/interpreter/mterp/nterp.cc
@@ -453,63 +453,71 @@
   return resolved_field->GetOffset().Uint32Value();
 }
 
-extern "C" mirror::Object* NterpGetClassOrAllocateObject(Thread* self,
-                                                         ArtMethod* caller,
-                                                         uint16_t* dex_pc_ptr)
+extern "C" mirror::Object* NterpGetClass(Thread* self, ArtMethod* caller, uint16_t* dex_pc_ptr)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   UpdateHotness(caller);
   const Instruction* inst = Instruction::At(dex_pc_ptr);
-  dex::TypeIndex index;
-  switch (inst->Opcode()) {
-    case Instruction::NEW_INSTANCE:
-      index = dex::TypeIndex(inst->VRegB_21c());
-      break;
-    case Instruction::CHECK_CAST:
-      index = dex::TypeIndex(inst->VRegB_21c());
-      break;
-    case Instruction::INSTANCE_OF:
-      index = dex::TypeIndex(inst->VRegC_22c());
-      break;
-    case Instruction::CONST_CLASS:
-      index = dex::TypeIndex(inst->VRegB_21c());
-      break;
-    case Instruction::NEW_ARRAY:
-      index = dex::TypeIndex(inst->VRegC_22c());
-      break;
-    default:
-      LOG(FATAL) << "Unreachable";
-  }
+  Instruction::Code opcode = inst->Opcode();
+  DCHECK(opcode == Instruction::CHECK_CAST ||
+         opcode == Instruction::INSTANCE_OF ||
+         opcode == Instruction::CONST_CLASS ||
+         opcode == Instruction::NEW_ARRAY);
+
+  // In release mode, this is just a simple load.
+  // In debug mode, this checks that we're using the correct instruction format.
+  dex::TypeIndex index = dex::TypeIndex(
+      (opcode == Instruction::CHECK_CAST || opcode == Instruction::CONST_CLASS)
+          ? inst->VRegB_21c()
+          : inst->VRegC_22c());
+
   ObjPtr<mirror::Class> c =
       ResolveVerifyAndClinit(index,
                              caller,
                              self,
                              /* can_run_clinit= */ false,
                              /* verify_access= */ !caller->SkipAccessChecks());
-  if (c == nullptr) {
+  if (UNLIKELY(c == nullptr)) {
     DCHECK(self->IsExceptionPending());
     return nullptr;
   }
 
-  if (inst->Opcode() == Instruction::NEW_INSTANCE) {
-    gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator();
-    if (UNLIKELY(c->IsStringClass())) {
-      // We don't cache the class for strings as we need to special case their
-      // allocation.
-      return mirror::String::AllocEmptyString(self, allocator_type).Ptr();
-    } else {
-      if (!c->IsFinalizable() && c->IsInstantiable()) {
-        // Cache non-finalizable classes for next calls.
-        UpdateCache(self, dex_pc_ptr, c.Ptr());
-      }
-      return AllocObjectFromCode(c, self, allocator_type).Ptr();
-    }
-  } else {
-    // For all other cases, cache the class.
-    UpdateCache(self, dex_pc_ptr, c.Ptr());
-  }
+  UpdateCache(self, dex_pc_ptr, c.Ptr());
   return c.Ptr();
 }
 
+extern "C" mirror::Object* NterpAllocateObject(Thread* self,
+                                               ArtMethod* caller,
+                                               uint16_t* dex_pc_ptr)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  UpdateHotness(caller);
+  const Instruction* inst = Instruction::At(dex_pc_ptr);
+  DCHECK_EQ(inst->Opcode(), Instruction::NEW_INSTANCE);
+  dex::TypeIndex index = dex::TypeIndex(inst->VRegB_21c());
+  ObjPtr<mirror::Class> c =
+      ResolveVerifyAndClinit(index,
+                             caller,
+                             self,
+                             /* can_run_clinit= */ false,
+                             /* verify_access= */ !caller->SkipAccessChecks());
+  if (UNLIKELY(c == nullptr)) {
+    DCHECK(self->IsExceptionPending());
+    return nullptr;
+  }
+
+  gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator();
+  if (UNLIKELY(c->IsStringClass())) {
+    // We don't cache the class for strings as we need to special case their
+    // allocation.
+    return mirror::String::AllocEmptyString(self, allocator_type).Ptr();
+  } else {
+    if (!c->IsFinalizable() && c->IsInstantiable()) {
+      // Cache non-finalizable classes for next calls.
+      UpdateCache(self, dex_pc_ptr, c.Ptr());
+    }
+    return AllocObjectFromCode(c, self, allocator_type).Ptr();
+  }
+}
+
 extern "C" mirror::Object* NterpLoadObject(Thread* self, ArtMethod* caller, uint16_t* dex_pc_ptr)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   const Instruction* inst = Instruction::At(dex_pc_ptr);
diff --git a/runtime/interpreter/mterp/x86_64ng/main.S b/runtime/interpreter/mterp/x86_64ng/main.S
index 6fc1777..a1890a1 100644
--- a/runtime/interpreter/mterp/x86_64ng/main.S
+++ b/runtime/interpreter/mterp/x86_64ng/main.S
@@ -237,7 +237,9 @@
   SETUP_SAVE_REFS_ONLY_FRAME
   call \helper
   RESTORE_SAVE_REFS_ONLY_FRAME
-  RETURN_OR_DELIVER_PENDING_EXCEPTION
+  cmpq LITERAL(0), %gs:THREAD_EXCEPTION_OFFSET
+  jne nterp_deliver_pending_exception
+  ret
 END_FUNCTION \name
 .endm
 
@@ -1938,7 +1940,7 @@
    movq rSELF:THREAD_SELF_OFFSET, %rdi
    movq 0(%rsp), %rsi
    movq rPC, %rdx
-   call nterp_get_class_or_allocate_object
+   call nterp_allocate_object
    jmp 1b
 3:
    // 07 is %rdi
@@ -1964,7 +1966,7 @@
    movq rSELF:THREAD_SELF_OFFSET, %rdi
    movq 0(%rsp), %rsi
    movq rPC, %rdx
-   call nterp_get_class_or_allocate_object
+   call nterp_get_class
    movq %rax, %rdi
    jmp 1b
 3:
@@ -2310,11 +2312,16 @@
 NTERP_TRAMPOLINE nterp_get_instance_field_offset, NterpGetInstanceFieldOffset
 NTERP_TRAMPOLINE nterp_filled_new_array, NterpFilledNewArray
 NTERP_TRAMPOLINE nterp_filled_new_array_range, NterpFilledNewArrayRange
-NTERP_TRAMPOLINE nterp_get_class_or_allocate_object, NterpGetClassOrAllocateObject
+NTERP_TRAMPOLINE nterp_get_class, NterpGetClass
+NTERP_TRAMPOLINE nterp_allocate_object, NterpAllocateObject
 NTERP_TRAMPOLINE nterp_get_method, NterpGetMethod
 NTERP_TRAMPOLINE nterp_hot_method, NterpHotMethod
 NTERP_TRAMPOLINE nterp_load_object, NterpLoadObject
 
+DEFINE_FUNCTION nterp_deliver_pending_exception
+    DELIVER_PENDING_EXCEPTION
+END_FUNCTION nterp_deliver_pending_exception
+
 // gen_mterp.py will inline the following definitions
 // within [ExecuteNterpImpl, EndExecuteNterpImpl).
 %def instruction_end():
diff --git a/runtime/interpreter/mterp/x86_64ng/object.S b/runtime/interpreter/mterp/x86_64ng/object.S
index 140ea75..21a6e67 100644
--- a/runtime/interpreter/mterp/x86_64ng/object.S
+++ b/runtime/interpreter/mterp/x86_64ng/object.S
@@ -16,7 +16,7 @@
    movq rSELF:THREAD_SELF_OFFSET, %rdi
    movq 0(%rsp), %rsi
    movq rPC, %rdx
-   call nterp_get_class_or_allocate_object
+   call nterp_get_class
    movq %rax, %rsi
    jmp 1b
 
@@ -149,7 +149,7 @@
    movq rSELF:THREAD_SELF_OFFSET, %rdi
    movq 0(%rsp), %rsi
    movq rPC, %rdx
-   call nterp_get_class_or_allocate_object
+   call nterp_get_class
    movq %rax, %rsi
    jmp .L${opcode}_start
 
diff --git a/runtime/interpreter/mterp/x86_64ng/other.S b/runtime/interpreter/mterp/x86_64ng/other.S
index a72ee58..f789086 100644
--- a/runtime/interpreter/mterp/x86_64ng/other.S
+++ b/runtime/interpreter/mterp/x86_64ng/other.S
@@ -53,7 +53,7 @@
    jmp 1b
 
 %def op_const_class():
-%  op_const_object(jumbo="0", helper="nterp_get_class_or_allocate_object")
+%  op_const_object(jumbo="0", helper="nterp_get_class")
 
 %def op_const_method_handle():
 %  op_const_object(jumbo="0")
diff --git a/runtime/interpreter/mterp/x86ng/main.S b/runtime/interpreter/mterp/x86ng/main.S
index 42e2d18..5b0edd4 100644
--- a/runtime/interpreter/mterp/x86ng/main.S
+++ b/runtime/interpreter/mterp/x86ng/main.S
@@ -275,7 +275,9 @@
   RESTORE_IBASE
   FETCH_INST_CLEAR_OPCODE
   RESTORE_SAVE_REFS_ONLY_FRAME
-  RETURN_OR_DELIVER_PENDING_EXCEPTION
+  cmpl LITERAL(0), %fs:THREAD_EXCEPTION_OFFSET
+  jne nterp_deliver_pending_exception
+  ret
 END_FUNCTION \name
 .endm
 
@@ -2001,7 +2003,7 @@
    movl rSELF:THREAD_SELF_OFFSET, ARG0
    movl 0(%esp), ARG1
    movl rPC, ARG2
-   call nterp_get_class_or_allocate_object
+   call nterp_allocate_object
    jmp 1b
 3:
    // 00 is %eax
@@ -2029,7 +2031,7 @@
    movl rSELF:THREAD_SELF_OFFSET, ARG0
    movl 0(%esp), ARG1
    movl rPC, ARG2
-   call nterp_get_class_or_allocate_object
+   call nterp_get_class
    jmp 1b
 3:
    // 00 is %eax
@@ -2360,11 +2362,16 @@
 NTERP_TRAMPOLINE nterp_get_instance_field_offset, NterpGetInstanceFieldOffset
 NTERP_TRAMPOLINE nterp_filled_new_array, NterpFilledNewArray
 NTERP_TRAMPOLINE nterp_filled_new_array_range, NterpFilledNewArrayRange
-NTERP_TRAMPOLINE nterp_get_class_or_allocate_object, NterpGetClassOrAllocateObject
+NTERP_TRAMPOLINE nterp_get_class, NterpGetClass
+NTERP_TRAMPOLINE nterp_allocate_object, NterpAllocateObject
 NTERP_TRAMPOLINE nterp_get_method, NterpGetMethod
 NTERP_TRAMPOLINE nterp_hot_method, NterpHotMethod
 NTERP_TRAMPOLINE nterp_load_object, NterpLoadObject
 
+DEFINE_FUNCTION nterp_deliver_pending_exception
+    DELIVER_PENDING_EXCEPTION
+END_FUNCTION nterp_deliver_pending_exception
+
 // gen_mterp.py will inline the following definitions
 // within [ExecuteNterpImpl, EndExecuteNterpImpl).
 %def instruction_end():
diff --git a/runtime/interpreter/mterp/x86ng/object.S b/runtime/interpreter/mterp/x86ng/object.S
index 1d11e10..39091ce 100644
--- a/runtime/interpreter/mterp/x86ng/object.S
+++ b/runtime/interpreter/mterp/x86ng/object.S
@@ -16,7 +16,7 @@
    movl rSELF:THREAD_SELF_OFFSET, ARG0
    movl 0(%esp), ARG1
    movl rPC, ARG2
-   call nterp_get_class_or_allocate_object
+   call nterp_get_class
    movl %eax, %ecx
    jmp 1b
 
@@ -58,7 +58,7 @@
    movl rSELF:THREAD_SELF_OFFSET, ARG0
    movl 0(%esp), ARG1
    movl rPC, ARG2
-   call nterp_get_class_or_allocate_object
+   call nterp_get_class
    movl %eax, %ecx
    jmp 1b
 
diff --git a/runtime/interpreter/mterp/x86ng/other.S b/runtime/interpreter/mterp/x86ng/other.S
index 4cf982c..6dd1ce3 100644
--- a/runtime/interpreter/mterp/x86ng/other.S
+++ b/runtime/interpreter/mterp/x86ng/other.S
@@ -53,7 +53,7 @@
    jmp 1b
 
 %def op_const_class():
-%  op_const_object(jumbo="0", helper="nterp_get_class_or_allocate_object")
+%  op_const_object(jumbo="0", helper="nterp_get_class")
 
 %def op_const_method_handle():
 %  op_const_object(jumbo="0")