Add a fast path for nterp entrypoint to avoid fetching the shorty.

When all parameters are references, we don't need to look at the shorty.
Use the 0x00100000 flag in the modifiers which is free for non-native
methods.

Test: test.py
Bug: 112676029
Change-Id: Ied9a253f7f7230045dd13188a5b806fb1d6d019d
diff --git a/libdexfile/dex/modifiers.h b/libdexfile/dex/modifiers.h
index 7b15cca..5fad46c 100644
--- a/libdexfile/dex/modifiers.h
+++ b/libdexfile/dex/modifiers.h
@@ -97,6 +97,9 @@
 // virtual call.
 static constexpr uint32_t kAccSingleImplementation =  0x08000000;  // method (runtime)
 
+// Whether nterp can take a fast path when entering this method (runtime; non-native)
+static constexpr uint32_t kAccNterpEntryPointFastPathFlag = 0x00100000;
+
 static constexpr uint32_t kAccPublicApi =             0x10000000;  // field, method
 static constexpr uint32_t kAccCorePlatformApi =       0x20000000;  // field, method
 
diff --git a/runtime/art_method.h b/runtime/art_method.h
index b2c211f..412da07 100644
--- a/runtime/art_method.h
+++ b/runtime/art_method.h
@@ -394,6 +394,16 @@
     ClearAccessFlags(kAccSkipAccessChecks);
   }
 
+  bool HasNterpEntryPointFastPathFlag() const {
+    constexpr uint32_t mask = kAccNative | kAccNterpEntryPointFastPathFlag;
+    return (GetAccessFlags() & mask) == kAccNterpEntryPointFastPathFlag;
+  }
+
+  void SetNterpEntryPointFastPathFlag() REQUIRES_SHARED(Locks::mutator_lock_) {
+    DCHECK(!IsNative());
+    AddAccessFlags(kAccNterpEntryPointFastPathFlag);
+  }
+
   // Returns true if this method could be overridden by a default method.
   bool IsOverridableByDefaultMethod() REQUIRES_SHARED(Locks::mutator_lock_);
 
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index e894037..2040263 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -3838,6 +3838,17 @@
     } else {
       dst->SetCodeItem(dst->GetDexFile()->GetCodeItem(method.GetCodeItemOffset()));
     }
+    bool has_all_references = true;
+    const char* shorty = dst->GetShorty();
+    for (size_t i = 1, e = strlen(shorty); i < e; ++i) {
+      if (shorty[i] != 'L') {
+        has_all_references = false;
+        break;
+      }
+    }
+    if (has_all_references) {
+      dst->SetNterpEntryPointFastPathFlag();
+    }
   } else {
     dst->SetDataPtrSize(nullptr, image_pointer_size_);
     DCHECK_EQ(method.GetCodeItemOffset(), 0u);
diff --git a/runtime/image.cc b/runtime/image.cc
index 788c981..7c4cc39 100644
--- a/runtime/image.cc
+++ b/runtime/image.cc
@@ -29,8 +29,8 @@
 namespace art {
 
 const uint8_t ImageHeader::kImageMagic[] = { 'a', 'r', 't', '\n' };
-// Last change: Remove kAccMiranda, kAccDefaultConflict.
-const uint8_t ImageHeader::kImageVersion[] = { '0', '9', '7', '\0' };
+// Last change: Add kAccNterpEntryPointFastPathFlag
+const uint8_t ImageHeader::kImageVersion[] = { '0', '9', '8', '\0' };
 
 ImageHeader::ImageHeader(uint32_t image_reservation_size,
                          uint32_t component_count,
diff --git a/runtime/interpreter/mterp/arm64ng/main.S b/runtime/interpreter/mterp/arm64ng/main.S
index 3ba175d..27c4b7c 100644
--- a/runtime/interpreter/mterp/arm64ng/main.S
+++ b/runtime/interpreter/mterp/arm64ng/main.S
@@ -1438,6 +1438,25 @@
     b 1b
 .endm
 
+.macro SETUP_REFERENCE_PARAMETER_IN_GPR gpr32, regs, refs, ins, arg_offset, finished
+    str \gpr32, [\regs, \arg_offset]
+    sub \ins, \ins, #1
+    str \gpr32, [\refs, \arg_offset]
+    add \arg_offset, \arg_offset, #4
+    cbz \ins, \finished
+.endm
+
+// Uses ip2 as temporary.
+.macro SETUP_REFERENCE_PARAMETERS_IN_STACK regs, refs, ins, stack_ptr, arg_offset
+1:
+    ldr wip2, [\stack_ptr, \arg_offset]
+    sub \ins, \ins, #1
+    str wip2, [\regs, \arg_offset]
+    str wip2, [\refs, \arg_offset]
+    add \arg_offset, \arg_offset, #4
+    cbnz \ins, 1b
+.endm
+
 %def entry():
 /*
  * ArtMethod entry point.
@@ -1465,6 +1484,23 @@
     ldr w26, [x0, #ART_METHOD_ACCESS_FLAGS_OFFSET]
     lsl x21, ip2, #2 // x21 is now the offset for inputs into the registers array.
 
+    tbz w26, #ART_METHOD_NTERP_ENTRY_POINT_FAST_PATH_FLAG_BIT, .Lsetup_slow_path
+    // Setup pointer to inputs in FP and pointer to inputs in REFS
+    add x10, xFP, x21
+    add x11, xREFS, x21
+    mov x12, #0
+    SETUP_REFERENCE_PARAMETER_IN_GPR w1, x10, x11, w15, x12, .Lxmm_setup_finished
+    SETUP_REFERENCE_PARAMETER_IN_GPR w2, x10, x11, w15, x12, .Lxmm_setup_finished
+    SETUP_REFERENCE_PARAMETER_IN_GPR w3, x10, x11, w15, x12, .Lxmm_setup_finished
+    SETUP_REFERENCE_PARAMETER_IN_GPR w4, x10, x11, w15, x12, .Lxmm_setup_finished
+    SETUP_REFERENCE_PARAMETER_IN_GPR w5, x10, x11, w15, x12, .Lxmm_setup_finished
+    SETUP_REFERENCE_PARAMETER_IN_GPR w6, x10, x11, w15, x12, .Lxmm_setup_finished
+    SETUP_REFERENCE_PARAMETER_IN_GPR w7, x10, x11, w15, x12, .Lxmm_setup_finished
+    add x28, x28, #OFFSET_TO_FIRST_ARGUMENT_IN_STACK
+    SETUP_REFERENCE_PARAMETERS_IN_STACK x10, x11, w15, x28, x12
+    b .Lxmm_setup_finished
+
+.Lsetup_slow_path:
     // If the method is not static and there is one argument ('this'), we don't need to fetch the
     // shorty.
     tbnz w26, #ART_METHOD_IS_STATIC_FLAG_BIT, .Lsetup_with_shorty
diff --git a/runtime/interpreter/mterp/armng/main.S b/runtime/interpreter/mterp/armng/main.S
index 4b0a70f..d7f1bf8 100644
--- a/runtime/interpreter/mterp/armng/main.S
+++ b/runtime/interpreter/mterp/armng/main.S
@@ -1472,6 +1472,24 @@
     b 1b
 .endm
 
+.macro SETUP_REFERENCE_PARAMETER_IN_GPR gpr32, regs, refs, ins, arg_offset, finished
+    str \gpr32, [\regs, \arg_offset]
+    subs \ins, \ins, #1
+    str \gpr32, [\refs, \arg_offset]
+    add \arg_offset, \arg_offset, #4
+    beq \finished
+.endm
+
+.macro SETUP_REFERENCE_PARAMETERS_IN_STACK regs, refs, ins, stack_ptr, arg_offset
+1:
+    ldr ip, [\stack_ptr, \arg_offset]
+    subs \ins, \ins, #1
+    str ip, [\regs, \arg_offset]
+    str ip, [\refs, \arg_offset]
+    add \arg_offset, \arg_offset, #4
+    bne 1b
+.endm
+
 %def entry():
 /*
  * ArtMethod entry point.
@@ -1502,6 +1520,20 @@
     lsl rINST, rINST, #2 // rINST is now the offset for inputs into the registers array.
     mov rIBASE, ip // rIBASE contains the old stack pointer
 
+    tst r8, #ART_METHOD_NTERP_ENTRY_POINT_FAST_PATH_FLAG
+    beq .Lsetup_slow_path
+    // Setup pointer to inputs in FP and pointer to inputs in REFS
+    add lr, rFP, rINST
+    add r8, rREFS, rINST
+    mov r0, #0
+    SETUP_REFERENCE_PARAMETER_IN_GPR r1, lr, r8, r4, r0, .Lxmm_setup_finished
+    SETUP_REFERENCE_PARAMETER_IN_GPR r2, lr, r8, r4, r0, .Lxmm_setup_finished
+    SETUP_REFERENCE_PARAMETER_IN_GPR r3, lr, r8, r4, r0, .Lxmm_setup_finished
+    add rIBASE, rIBASE, #OFFSET_TO_FIRST_ARGUMENT_IN_STACK
+    SETUP_REFERENCE_PARAMETERS_IN_STACK lr, r8, r4, rIBASE, r0
+    b .Lxmm_setup_finished
+
+.Lsetup_slow_path:
     // If the method is not static and there is one argument ('this'), we don't need to fetch the
     // shorty.
     tst r8, #ART_METHOD_IS_STATIC_FLAG
diff --git a/runtime/interpreter/mterp/x86_64ng/main.S b/runtime/interpreter/mterp/x86_64ng/main.S
index 21214bb..0089429 100644
--- a/runtime/interpreter/mterp/x86_64ng/main.S
+++ b/runtime/interpreter/mterp/x86_64ng/main.S
@@ -1553,6 +1553,25 @@
    jmp 1b
 .endm
 
+.macro SETUP_REFERENCE_PARAMETER_IN_GPR gpr32, regs, refs, ins, arg_offset, finished
+    movl REG_VAR(gpr32), (REG_VAR(regs), REG_VAR(arg_offset))
+    movl REG_VAR(gpr32), (REG_VAR(refs), REG_VAR(arg_offset))
+    addq MACRO_LITERAL(4), REG_VAR(arg_offset)
+    subl MACRO_LITERAL(1), REG_VAR(ins)
+    je \finished
+.endm
+
+// Uses eax as temporary
+.macro SETUP_REFERENCE_PARAMETERS_IN_STACK regs, refs, ins, stack_ptr, arg_offset
+1:
+    movl OFFSET_TO_FIRST_ARGUMENT_IN_STACK(REG_VAR(stack_ptr), REG_VAR(arg_offset)), %eax
+    movl %eax, (REG_VAR(regs), REG_VAR(arg_offset))
+    movl %eax, (REG_VAR(refs), REG_VAR(arg_offset))
+    addq MACRO_LITERAL(4), REG_VAR(arg_offset)
+    subl MACRO_LITERAL(1), REG_VAR(ins)
+    jne 1b
+.endm
+
 %def entry():
 /*
  * ArtMethod entry point.
@@ -1581,6 +1600,21 @@
     subq %r14, %rbx
     salq $$2, %rbx // rbx is now the offset for inputs into the registers array.
 
+    testl $$ART_METHOD_NTERP_ENTRY_POINT_FAST_PATH_FLAG, ART_METHOD_ACCESS_FLAGS_OFFSET(%rdi)
+    je .Lsetup_slow_path
+    leaq (rFP, %rbx, 1), %rdi
+    leaq (rREFS, %rbx, 1), %rbx
+    movq $$0, %r10
+
+    SETUP_REFERENCE_PARAMETER_IN_GPR esi, rdi, rbx, r14d, r10, .Lxmm_setup_finished
+    SETUP_REFERENCE_PARAMETER_IN_GPR edx, rdi, rbx, r14d, r10, .Lxmm_setup_finished
+    SETUP_REFERENCE_PARAMETER_IN_GPR ecx, rdi, rbx, r14d, r10, .Lxmm_setup_finished
+    SETUP_REFERENCE_PARAMETER_IN_GPR r8d, rdi, rbx, r14d, r10, .Lxmm_setup_finished
+    SETUP_REFERENCE_PARAMETER_IN_GPR r9d, rdi, rbx, r14d, r10, .Lxmm_setup_finished
+    SETUP_REFERENCE_PARAMETERS_IN_STACK rdi, rbx, r14d, r11, r10
+    jmp .Lxmm_setup_finished
+
+.Lsetup_slow_path:
     // If the method is not static and there is one argument ('this'), we don't need to fetch the
     // shorty.
     testl $$ART_METHOD_IS_STATIC_FLAG, ART_METHOD_ACCESS_FLAGS_OFFSET(%rdi)
diff --git a/tools/cpp-define-generator/art_method.def b/tools/cpp-define-generator/art_method.def
index c2e18b1..5e09565 100644
--- a/tools/cpp-define-generator/art_method.def
+++ b/tools/cpp-define-generator/art_method.def
@@ -23,10 +23,14 @@
            art::ArtMethod::AccessFlagsOffset().Int32Value())
 ASM_DEFINE(ART_METHOD_IS_STATIC_FLAG,
            art::kAccStatic)
+ASM_DEFINE(ART_METHOD_NTERP_ENTRY_POINT_FAST_PATH_FLAG,
+           art::kAccNterpEntryPointFastPathFlag)
 ASM_DEFINE(ART_METHOD_IMT_MASK,
            art::ImTable::kSizeTruncToPowerOfTwo - 1)
 ASM_DEFINE(ART_METHOD_IS_STATIC_FLAG_BIT,
            art::MostSignificantBit(art::kAccStatic))
+ASM_DEFINE(ART_METHOD_NTERP_ENTRY_POINT_FAST_PATH_FLAG_BIT,
+           art::MostSignificantBit(art::kAccNterpEntryPointFastPathFlag))
 ASM_DEFINE(ART_METHOD_DECLARING_CLASS_OFFSET,
            art::ArtMethod::DeclaringClassOffset().Int32Value())
 ASM_DEFINE(ART_METHOD_JNI_OFFSET_32,