Improve nterp -> compiled code transitions.

Use an unused bit in the access flags of an ArtMethod (0x00200000) to store
the information a method only takes ints or references and returns an
int, a reference, or a long. This avoids the need to fetch the shorty in nterp
when doing a call.

Test: test.py
Test: 821-many-args
Bug: 112676029

Change-Id: Ie657ccf69c17c1097dc2a97f18e3093ef3be391b
diff --git a/libdexfile/dex/modifiers.h b/libdexfile/dex/modifiers.h
index 5fad46c..60141df 100644
--- a/libdexfile/dex/modifiers.h
+++ b/libdexfile/dex/modifiers.h
@@ -99,6 +99,9 @@
 
 // Whether nterp can take a fast path when entering this method (runtime; non-native)
 static constexpr uint32_t kAccNterpEntryPointFastPathFlag = 0x00100000;
+// Set by the class linker to mark that a method does not have floating points
+// or longs in its shorty.
+static constexpr uint32_t kAccNterpInvokeFastPathFlag     = 0x00200000;  // method (runtime)
 
 static constexpr uint32_t kAccPublicApi =             0x10000000;  // field, method
 static constexpr uint32_t kAccCorePlatformApi =       0x20000000;  // field, method
diff --git a/runtime/art_method.h b/runtime/art_method.h
index 412da07..76a1cac 100644
--- a/runtime/art_method.h
+++ b/runtime/art_method.h
@@ -404,6 +404,10 @@
     AddAccessFlags(kAccNterpEntryPointFastPathFlag);
   }
 
+  void SetNterpInvokeFastPathFlag() REQUIRES_SHARED(Locks::mutator_lock_) {
+    AddAccessFlags(kAccNterpInvokeFastPathFlag);
+  }
+
   // Returns true if this method could be overridden by a default method.
   bool IsOverridableByDefaultMethod() REQUIRES_SHARED(Locks::mutator_lock_);
 
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index 2040263..745e7cf 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -3838,21 +3838,34 @@
     } else {
       dst->SetCodeItem(dst->GetDexFile()->GetCodeItem(method.GetCodeItemOffset()));
     }
-    bool has_all_references = true;
-    const char* shorty = dst->GetShorty();
-    for (size_t i = 1, e = strlen(shorty); i < e; ++i) {
-      if (shorty[i] != 'L') {
-        has_all_references = false;
-        break;
-      }
-    }
-    if (has_all_references) {
-      dst->SetNterpEntryPointFastPathFlag();
-    }
   } else {
     dst->SetDataPtrSize(nullptr, image_pointer_size_);
     DCHECK_EQ(method.GetCodeItemOffset(), 0u);
   }
+
+  // Set optimization flags related to the shorty.
+  const char* shorty = dst->GetShorty();
+  bool all_parameters_are_reference = true;
+  bool all_parameters_are_reference_or_int = true;
+  bool return_type_is_fp = (shorty[0] == 'F' || shorty[0] == 'D');
+
+  for (size_t i = 1, e = strlen(shorty); i < e; ++i) {
+    if (shorty[i] != 'L') {
+      all_parameters_are_reference = false;
+      if (shorty[i] == 'F' || shorty[i] == 'D' || shorty[i] == 'J') {
+        all_parameters_are_reference_or_int = false;
+        break;
+      }
+    }
+  }
+
+  if (!dst->IsNative() && all_parameters_are_reference) {
+    dst->SetNterpEntryPointFastPathFlag();
+  }
+
+  if (!return_type_is_fp && all_parameters_are_reference_or_int) {
+    dst->SetNterpInvokeFastPathFlag();
+  }
 }
 
 void ClassLinker::AppendToBootClassPath(Thread* self, const DexFile* dex_file) {
diff --git a/runtime/image.cc b/runtime/image.cc
index 7c4cc39..1e5ce6d 100644
--- a/runtime/image.cc
+++ b/runtime/image.cc
@@ -29,8 +29,8 @@
 namespace art {
 
 const uint8_t ImageHeader::kImageMagic[] = { 'a', 'r', 't', '\n' };
-// Last change: Add kAccNterpEntryPointFastPathFlag
-const uint8_t ImageHeader::kImageVersion[] = { '0', '9', '8', '\0' };
+// Last change: kAccNterpInvokeFastPathFlag in method modifiers.
+const uint8_t ImageHeader::kImageVersion[] = { '0', '9', '9', '\0' };
 
 ImageHeader::ImageHeader(uint32_t image_reservation_size,
                          uint32_t component_count,
diff --git a/runtime/interpreter/mterp/arm64ng/main.S b/runtime/interpreter/mterp/arm64ng/main.S
index 27c4b7c..627bb04 100644
--- a/runtime/interpreter/mterp/arm64ng/main.S
+++ b/runtime/interpreter/mterp/arm64ng/main.S
@@ -876,6 +876,56 @@
    .elseif \is_string_init
    // No fast path for string.init.
    .else
+     ldr wip, [x0, #ART_METHOD_ACCESS_FLAGS_OFFSET]
+     tbz wip, #ART_METHOD_NTERP_INVOKE_FAST_PATH_FLAG_BIT, .Lfast_path_with_few_args_\suffix
+     FETCH_B wip2, 0, 1
+     asr ip, ip2, #4
+     .if \is_static
+     cbz ip, .Linvoke_fast_path_\suffix
+     .else
+     cmp ip, #1
+     b.eq .Linvoke_fast_path_\suffix
+     .endif
+     FETCH w8, 2
+     cmp ip, #2
+     .if \is_static
+     b.lt .Lone_arg_fast_path_\suffix
+     .endif
+     b.eq .Ltwo_args_fast_path_\suffix
+     cmp ip, #4
+     b.lt .Lthree_args_fast_path_\suffix
+     b.eq .Lfour_args_fast_path_\suffix
+
+     and         ip, ip2, #15
+     GET_VREG    w5, wip
+.Lfour_args_fast_path_\suffix:
+     asr         ip, x8, #12
+     GET_VREG    w4, wip
+.Lthree_args_fast_path_\suffix:
+     ubfx        ip, x8, #8, #4
+     GET_VREG    w3, wip
+.Ltwo_args_fast_path_\suffix:
+     ubfx        ip, x8, #4, #4
+     GET_VREG    w2, wip
+.Lone_arg_fast_path_\suffix:
+     .if \is_static
+     and         ip, x8, #0xf
+     GET_VREG    w1, wip
+     .else
+     // First argument already in w1.
+     .endif
+.Linvoke_fast_path_\suffix:
+     .if \is_interface
+     // Setup hidden argument.
+     mov ip2, x26
+     .endif
+     ldr lr, [x0, #ART_METHOD_QUICK_CODE_OFFSET_64]
+     blr lr
+     FETCH_ADVANCE_INST 3
+     GET_INST_OPCODE ip
+     GOTO_OPCODE ip
+
+.Lfast_path_with_few_args_\suffix:
      // Fast path when we have zero or one argument (modulo 'this'). If there
      // is one argument, we can put it in both floating point and core register.
      FETCH_B w2, 0, 1
@@ -886,9 +936,9 @@
      .endif
      b.ge .Lget_shorty_\suffix
      .if \is_static
-     tbz w2, #4, .Lfast_path_\suffix
+     tbz w2, #4, .Linvoke_with_few_args_\suffix
      .else
-     tbnz w2, #4, .Lfast_path_\suffix
+     tbnz w2, #4, .Linvoke_with_few_args_\suffix
      .endif
      FETCH w2, 2
      .if \is_static
@@ -900,7 +950,7 @@
      GET_VREG w2, w2
      fmov s0, w2
      .endif
-.Lfast_path_\suffix:
+.Linvoke_with_few_args_\suffix:
      // Check if the next instruction is move-result or move-result-wide.
      // If it is, we fetch the shorty and jump to the regular invocation.
      FETCH w27, 3
@@ -1161,6 +1211,71 @@
    .elseif \is_string_init
    // No fast path for string.init.
    .else
+     ldr wip, [x0, #ART_METHOD_ACCESS_FLAGS_OFFSET]
+     tbz wip, #ART_METHOD_NTERP_INVOKE_FAST_PATH_FLAG_BIT, .Lfast_path_with_few_args_range_\suffix
+     FETCH_B wip2, 0, 1  // Number of arguments
+     .if \is_static
+     cbz ip2, .Linvoke_fast_path_range_\suffix
+     .else
+     cmp ip2, #1
+     b.eq .Linvoke_fast_path_range_\suffix
+     .endif
+     FETCH wip, 2  // dex register of first argument
+     add x8, xFP, wip, uxtw #2  // location of first dex register value
+     cmp ip2, #2
+     .if \is_static
+     b.lt .Lone_arg_fast_path_range_\suffix
+     .endif
+     b.eq .Ltwo_args_fast_path_range_\suffix
+     cmp ip2, #4
+     b.lt .Lthree_args_fast_path_range_\suffix
+     b.eq .Lfour_args_fast_path_range_\suffix
+     cmp ip2, #6
+     b.lt .Lfive_args_fast_path_range_\suffix
+     b.eq .Lsix_args_fast_path_range_\suffix
+     cmp ip2, #7
+     b.eq .Lseven_args_fast_path_range_\suffix
+     // Setup x8 to point to the stack location of parameters we do not need
+     // to put parameters in.
+     add x9, sp, #8  // Add space for the ArtMethod
+
+.Lloop_over_fast_path_range_\suffix:
+     sub ip2, ip2, #1
+     ldr wip, [x8, ip2, lsl #2]
+     str wip, [x9, ip2, lsl #2]
+     cmp ip2, #7
+     b.ne .Lloop_over_fast_path_range_\suffix
+
+.Lseven_args_fast_path_range_\suffix:
+     ldr w7, [x8, #24]
+.Lsix_args_fast_path_range_\suffix:
+     ldr w6, [x8, #20]
+.Lfive_args_fast_path_range_\suffix:
+     ldr w5, [x8, #16]
+.Lfour_args_fast_path_range_\suffix:
+     ldr w4, [x8, #12]
+.Lthree_args_fast_path_range_\suffix:
+     ldr w3, [x8, #8]
+.Ltwo_args_fast_path_range_\suffix:
+     ldr w2, [x8, #4]
+.Lone_arg_fast_path_range_\suffix:
+     .if \is_static
+     ldr w1, [x8, #0]
+     .else
+     // First argument already in w1.
+     .endif
+.Linvoke_fast_path_range_\suffix:
+     .if \is_interface
+     // Setup hidden argument.
+     mov ip2, x26
+     .endif
+     ldr lr, [x0, #ART_METHOD_QUICK_CODE_OFFSET_64]
+     blr lr
+     FETCH_ADVANCE_INST 3
+     GET_INST_OPCODE ip
+     GOTO_OPCODE ip
+
+.Lfast_path_with_few_args_range_\suffix:
      // Fast path when we have zero or one argument (modulo 'this'). If there
      // is one argument, we can put it in both floating point and core register.
      FETCH_B w2, 0, 1 // number of arguments
@@ -1169,7 +1284,7 @@
      .else
      cmp w2, #2
      .endif
-     b.lt .Lfast_path_range_\suffix
+     b.lt .Linvoke_with_few_args_range_\suffix
      b.ne .Lget_shorty_range_\suffix
      FETCH w3, 2  // dex register of first argument
      .if \is_static
@@ -1180,7 +1295,7 @@
      GET_VREG w2, w3
      fmov s0, w2
      .endif
-.Lfast_path_range_\suffix:
+.Linvoke_with_few_args_range_\suffix:
      // Check if the next instruction is move-result or move-result-wide.
      // If it is, we fetch the shorty and jump to the regular invocation.
      FETCH w27, 3
diff --git a/runtime/interpreter/mterp/armng/main.S b/runtime/interpreter/mterp/armng/main.S
index d7f1bf8..4427908 100644
--- a/runtime/interpreter/mterp/armng/main.S
+++ b/runtime/interpreter/mterp/armng/main.S
@@ -972,13 +972,66 @@
    .elseif \is_string_init
    // No fast path for string.init.
    .else
+     ldr ip, [r0, #ART_METHOD_ACCESS_FLAGS_OFFSET]
+     tst ip, #ART_METHOD_NTERP_INVOKE_FAST_PATH_FLAG
+     beq .Lfast_path_with_few_args_\suffix
+     FETCH_B rINST, 0, 1
+     .if \is_static
+     asrs lr, rINST, #4
+     beq .Linvoke_fast_path_\suffix
+     .else
+     asr lr, rINST, #4
+     cmp lr, #1
+     beq .Linvoke_fast_path_\suffix
+     .endif
+     FETCH ip, 2
+     cmp lr, #2
+     .if \is_static
+     blt .Lone_arg_fast_path_\suffix
+     .endif
+     beq .Ltwo_args_fast_path_\suffix
+     cmp lr, #4
+     blt .Lthree_args_fast_path_\suffix
+     beq .Lfour_args_fast_path_\suffix
+     and         rINST, rINST, #15
+     GET_VREG    rINST, rINST
+     str         rINST, [sp, #(4 + 4 * 4)]
+.Lfour_args_fast_path_\suffix:
+     asr         rINST, ip, #12
+     GET_VREG    rINST, rINST
+     str         rINST, [sp, #(4 + 3 * 4)]
+.Lthree_args_fast_path_\suffix:
+     ubfx        rINST, ip, #8, #4
+     GET_VREG    r3, rINST
+.Ltwo_args_fast_path_\suffix:
+     ubfx        rINST, ip, #4, #4
+     GET_VREG    r2, rINST
+.Lone_arg_fast_path_\suffix:
+     .if \is_static
+     and         rINST, ip, #0xf
+     GET_VREG    r1, rINST
+     .else
+     // First argument already in r1.
+     .endif
+.Linvoke_fast_path_\suffix:
+     .if \is_interface
+     // Setup hidden argument.
+     mov ip, r4
+     .endif
+     ldr lr, [r0, #ART_METHOD_QUICK_CODE_OFFSET_32]
+     blx lr
+     FETCH_ADVANCE_INST 3
+     GET_INST_OPCODE ip
+     GOTO_OPCODE ip
+
+.Lfast_path_with_few_args_\suffix:
      // Fast path when we have zero or one argument (modulo 'this'). If there
      // is one argument, we can put it in both floating point and core register.
      FETCH_B r2, 0, 1
      asr r2, r2, #4  // number of arguments
      .if \is_static
      cmp r2, #1
-     blt .Lfast_path_\suffix
+     blt .Linvoke_with_few_args_\suffix
      bne .Lget_shorty_\suffix
      FETCH r2, 2
      and r2, r2, #0xf  // dex register of first argument
@@ -986,14 +1039,14 @@
      vmov s0, r1
      .else
      cmp r2, #2
-     blt .Lfast_path_\suffix
+     blt .Linvoke_with_few_args_\suffix
      bne .Lget_shorty_\suffix
      FETCH r2, 2
      ubfx r2, r2, #4, #4  // dex register of second argument
      GET_VREG r2, r2
      vmov s0, r2
      .endif
-.Lfast_path_\suffix:
+.Linvoke_with_few_args_\suffix:
      // Check if the next instruction is move-result or move-result-wide.
      // If it is, we fetch the shorty and jump to the regular invocation.
      FETCH r3, 3
@@ -1218,26 +1271,79 @@
    .elseif \is_string_init
    // No fast path for string.init.
    .else
+     ldr ip, [r0, #ART_METHOD_ACCESS_FLAGS_OFFSET]
+     tst ip, #ART_METHOD_NTERP_INVOKE_FAST_PATH_FLAG
+     beq .Lfast_path_with_few_args_range_\suffix
+     FETCH_B ip, 0, 1  // Number of arguments
+     .if \is_static
+     cmp ip, #0
+     .else
+     cmp ip, #1
+     .endif
+     beq .Linvoke_fast_path_range_\suffix
+     FETCH lr, 2  // dex register of first argument
+     add lr, rFP, lr, lsl #2  // location of first dex register value.
+     .if \is_static
+     cmp ip, #2
+     blt .Lone_arg_fast_path_range_\suffix
+     beq .Ltwo_args_fast_path_range_\suffix
+     cmp ip, #3
+     .else
+     cmp ip, #3
+     blt .Ltwo_args_fast_path_range_\suffix
+     .endif
+     beq .Lthree_args_fast_path_range_\suffix
+     add rINST, sp, #4  // Add space for the ArtMethod
+
+.Lloop_over_fast_path_range_\suffix:
+     sub ip, ip, #1
+     ldr r3, [lr, ip, lsl #2]
+     str r3, [rINST, ip, lsl #2]
+     cmp ip, #3
+     bne .Lloop_over_fast_path_range_\suffix
+
+.Lthree_args_fast_path_range_\suffix:
+     ldr r3, [lr, #8]
+.Ltwo_args_fast_path_range_\suffix:
+     ldr r2, [lr, #4]
+.Lone_arg_fast_path_range_\suffix:
+     .if \is_static
+     ldr r1, [lr, #0]
+     .else
+     // First argument already in r1.
+     .endif
+.Linvoke_fast_path_range_\suffix:
+     .if \is_interface
+     // Setup hidden argument.
+     mov ip, r4
+     .endif
+     ldr lr, [r0, #ART_METHOD_QUICK_CODE_OFFSET_32]
+     blx lr
+     FETCH_ADVANCE_INST 3
+     GET_INST_OPCODE ip
+     GOTO_OPCODE ip
+
+.Lfast_path_with_few_args_range_\suffix:
      // Fast path when we have zero or one argument (modulo 'this'). If there
      // is one argument, we can put it in both floating point and core register.
      FETCH_B r2, 0, 1 // number of arguments
      .if \is_static
      cmp r2, #1
-     blt .Lfast_path_range_\suffix
+     blt .Linvoke_with_few_args_range_\suffix
      bne .Lget_shorty_range_\suffix
      FETCH r3, 2  // dex register of first argument
      GET_VREG r1, r3
      vmov s0, r1
      .else
      cmp r2, #2
-     blt .Lfast_path_range_\suffix
+     blt .Linvoke_with_few_args_range_\suffix
      bne .Lget_shorty_range_\suffix
      FETCH r3, 2  // dex register of first argument
      add r3, r3, #1  // Add 1 for next argument
      GET_VREG r2, r3
      vmov s0, r2
      .endif
-.Lfast_path_range_\suffix:
+.Linvoke_with_few_args_range_\suffix:
      // Check if the next instruction is move-result or move-result-wide.
      // If it is, we fetch the shorty and jump to the regular invocation.
      FETCH r3, 3
diff --git a/runtime/interpreter/mterp/x86_64ng/main.S b/runtime/interpreter/mterp/x86_64ng/main.S
index 0089429..71df9e1 100644
--- a/runtime/interpreter/mterp/x86_64ng/main.S
+++ b/runtime/interpreter/mterp/x86_64ng/main.S
@@ -1118,13 +1118,62 @@
    .elseif \is_string_init
    // No fast path for string.init.
    .else
+     testl $$ART_METHOD_NTERP_INVOKE_FAST_PATH_FLAG, ART_METHOD_ACCESS_FLAGS_OFFSET(%rdi)
+     je .Lfast_path_with_few_args_\suffix
+     movzbl 1(rPC), %r9d
+     movl %r9d, %ebp
+     shrl MACRO_LITERAL(4), %ebp # Number of arguments
+     .if \is_static
+     jz .Linvoke_fast_path_\suffix  # shl sets the Z flag
+     .else
+     cmpl MACRO_LITERAL(1), %ebp
+     je .Linvoke_fast_path_\suffix
+     .endif
+     movzwl 4(rPC), %r11d
+     cmpl MACRO_LITERAL(2), %ebp
+     .if \is_static
+     jl .Lone_arg_fast_path_\suffix
+     .endif
+     je .Ltwo_args_fast_path_\suffix
+     cmpl MACRO_LITERAL(4), %ebp
+     jl .Lthree_args_fast_path_\suffix
+     je .Lfour_args_fast_path_\suffix
+
+     andl        MACRO_LITERAL(0xf), %r9d
+     GET_VREG    %r9d, %r9
+.Lfour_args_fast_path_\suffix:
+     movl        %r11d, %r8d
+     shrl        MACRO_LITERAL(12), %r8d
+     GET_VREG    %r8d, %r8
+.Lthree_args_fast_path_\suffix:
+     movl        %r11d, %ecx
+     shrl        MACRO_LITERAL(8), %ecx
+     andl        MACRO_LITERAL(0xf), %ecx
+     GET_VREG    %ecx, %rcx
+.Ltwo_args_fast_path_\suffix:
+     movl        %r11d, %edx
+     shrl        MACRO_LITERAL(4), %edx
+     andl        MACRO_LITERAL(0xf), %edx
+     GET_VREG    %edx, %rdx
+.Lone_arg_fast_path_\suffix:
+     .if \is_static
+     andl        MACRO_LITERAL(0xf), %r11d
+     GET_VREG    %esi, %r11
+     .else
+     // First argument already in %esi.
+     .endif
+.Linvoke_fast_path_\suffix:
+     call *ART_METHOD_QUICK_CODE_OFFSET_64(%rdi) // Call the method.
+     ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
+
+.Lfast_path_with_few_args_\suffix:
      // Fast path when we have zero or one argument (modulo 'this'). If there
      // is one argument, we can put it in both floating point and core register.
      movzbl 1(rPC), %r9d
      shrl MACRO_LITERAL(4), %r9d # Number of arguments
      .if \is_static
      cmpl MACRO_LITERAL(1), %r9d
-     jl .Lfast_path_\suffix
+     jl .Linvoke_with_few_args_\suffix
      jne .Lget_shorty_\suffix
      movzwl 4(rPC), %r9d
      andl MACRO_LITERAL(0xf), %r9d  // dex register of first argument
@@ -1132,7 +1181,7 @@
      movd %esi, %xmm0
      .else
      cmpl MACRO_LITERAL(2), %r9d
-     jl .Lfast_path_\suffix
+     jl .Linvoke_with_few_args_\suffix
      jne .Lget_shorty_\suffix
      movzwl 4(rPC), %r9d
      shrl MACRO_LITERAL(4), %r9d
@@ -1140,7 +1189,7 @@
      GET_VREG %edx, %r9
      movd %edx, %xmm0
      .endif
-.Lfast_path_\suffix:
+.Linvoke_with_few_args_\suffix:
      // Check if the next instruction is move-result or move-result-wide.
      // If it is, we fetch the shorty and jump to the regular invocation.
      movzwq  6(rPC), %r9
@@ -1263,26 +1312,75 @@
    .elseif \is_string_init
    // No fast path for string.init.
    .else
+     testl $$ART_METHOD_NTERP_INVOKE_FAST_PATH_FLAG, ART_METHOD_ACCESS_FLAGS_OFFSET(%rdi)
+     je .Lfast_path_with_few_args_range_\suffix
+     movzbl 1(rPC), %r9d  // number of arguments
+     .if \is_static
+     testl %r9d, %r9d
+     je .Linvoke_fast_path_range_\suffix
+     .else
+     cmpl MACRO_LITERAL(1), %r9d
+     je .Linvoke_fast_path_range_\suffix
+     .endif
+     movzwl 4(rPC), %r11d  // dex register of first argument
+     leaq (rFP, %r11, 4), %r11  // location of first dex register value
+     cmpl MACRO_LITERAL(2), %r9d
+     .if \is_static
+     jl .Lone_arg_fast_path_range_\suffix
+     .endif
+     je .Ltwo_args_fast_path_range_\suffix
+     cmp MACRO_LITERAL(4), %r9d
+     jl .Lthree_args_fast_path_range_\suffix
+     je .Lfour_args_fast_path_range_\suffix
+     cmp MACRO_LITERAL(5), %r9d
+     je .Lfive_args_fast_path_range_\suffix
+
+.Lloop_over_fast_path_range_\suffix:
+     subl MACRO_LITERAL(1), %r9d
+     movl (%r11, %r9, 4), %r8d
+     movl %r8d, 8(%rsp, %r9, 4)  // Add 8 for the ArtMethod
+     cmpl MACRO_LITERAL(5), %r9d
+     jne .Lloop_over_fast_path_range_\suffix
+
+.Lfive_args_fast_path_range_\suffix:
+     movl 16(%r11), %r9d
+.Lfour_args_fast_path_range_\suffix:
+     movl 12(%r11), %r8d
+.Lthree_args_fast_path_range_\suffix:
+     movl 8(%r11), %ecx
+.Ltwo_args_fast_path_range_\suffix:
+     movl 4(%r11), %edx
+.Lone_arg_fast_path_range_\suffix:
+     .if \is_static
+     movl 0(%r11), %esi
+     .else
+     // First argument already in %esi.
+     .endif
+.Linvoke_fast_path_range_\suffix:
+     call *ART_METHOD_QUICK_CODE_OFFSET_64(%rdi) // Call the method.
+     ADVANCE_PC_FETCH_AND_GOTO_NEXT 3
+
+.Lfast_path_with_few_args_range_\suffix:
      // Fast path when we have zero or one argument (modulo 'this'). If there
      // is one argument, we can put it in both floating point and core register.
      movzbl 1(rPC), %r9d # Number of arguments
      .if \is_static
      cmpl MACRO_LITERAL(1), %r9d
-     jl .Lfast_path_range_\suffix
+     jl .Linvoke_with_few_args_range_\suffix
      jne .Lget_shorty_range_\suffix
      movzwl 4(rPC), %r9d  // Dex register of first argument
      GET_VREG %esi, %r9
      movd %esi, %xmm0
      .else
      cmpl MACRO_LITERAL(2), %r9d
-     jl .Lfast_path_range_\suffix
+     jl .Linvoke_with_few_args_range_\suffix
      jne .Lget_shorty_range_\suffix
      movzwl 4(rPC), %r9d
      addl MACRO_LITERAL(1), %r9d  // dex register of second argument
      GET_VREG %edx, %r9
      movd %edx, %xmm0
      .endif
-.Lfast_path_range_\suffix:
+.Linvoke_with_few_args_range_\suffix:
      // Check if the next instruction is move-result or move-result-wide.
      // If it is, we fetch the shorty and jump to the regular invocation.
      movzwq  6(rPC), %r9
diff --git a/test/821-many-args/expected-stderr.txt b/test/821-many-args/expected-stderr.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/821-many-args/expected-stderr.txt
diff --git a/test/821-many-args/expected-stdout.txt b/test/821-many-args/expected-stdout.txt
new file mode 100644
index 0000000..6a5618e
--- /dev/null
+++ b/test/821-many-args/expected-stdout.txt
@@ -0,0 +1 @@
+JNI_OnLoad called
diff --git a/test/821-many-args/info.txt b/test/821-many-args/info.txt
new file mode 100644
index 0000000..8d150aa
--- /dev/null
+++ b/test/821-many-args/info.txt
@@ -0,0 +1 @@
+Test for doing nterp -> compiled code transitions in the fast path.
diff --git a/test/821-many-args/src/Main.java b/test/821-many-args/src/Main.java
new file mode 100644
index 0000000..8818b90
--- /dev/null
+++ b/test/821-many-args/src/Main.java
@@ -0,0 +1,102 @@
+/*
+ * Copyright (C) 2021 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  public static void main(String[] args) {
+    System.loadLibrary(args[0]);
+    ensureJitCompiled(Main.class, "staticMethod");
+    int a = staticMethod(1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+    assertEquals(42, a);
+
+    ensureJitCompiled(Main.class, "staticMethodNonRange");
+    a = staticMethodNonRange(1, 2, 3, 4, 5);
+    assertEquals(42, a);
+
+    staticMain = new Main();
+    ensureJitCompiled(Main.class, "instanceMethod");
+    a = staticMain.instanceMethod(1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+    assertEquals(42, a);
+
+    ensureJitCompiled(Main.class, "instanceMethodNonRange");
+    a = staticMain.instanceMethodNonRange(1, 2, 3, 4);
+    assertEquals(42, a);
+  }
+
+  public static int staticMethod(
+      int a, int b, int c, int d, int e, int f, int g, int h, int i, int j) {
+    assertEquals(10, j);
+    assertEquals(9, i);
+    assertEquals(8, h);
+    assertEquals(7, g);
+    assertEquals(6, f);
+    assertEquals(5, e);
+    assertEquals(4, d);
+    assertEquals(3, c);
+    assertEquals(2, b);
+    assertEquals(1, a);
+    return 42;
+  }
+
+  public int instanceMethod(int a, int b, int c, int d, int e, int f, int g, int h, int i, int j) {
+    assertEquals(10, j);
+    assertEquals(9, i);
+    assertEquals(8, h);
+    assertEquals(7, g);
+    assertEquals(6, f);
+    assertEquals(5, e);
+    assertEquals(4, d);
+    assertEquals(3, c);
+    assertEquals(2, b);
+    assertEquals(1, a);
+    assertEquals(staticMain, this);
+    return 42;
+  }
+
+  public static int staticMethodNonRange(int a, int b, int c, int d, int e) {
+    assertEquals(5, e);
+    assertEquals(4, d);
+    assertEquals(3, c);
+    assertEquals(2, b);
+    assertEquals(1, a);
+    return 42;
+  }
+
+  public int instanceMethodNonRange(int a, int b, int c, int d) {
+    assertEquals(4, d);
+    assertEquals(3, c);
+    assertEquals(2, b);
+    assertEquals(1, a);
+    assertEquals(staticMain, this);
+    return 42;
+  }
+
+  static Main staticMain;
+
+  public static void assertEquals(int expected, int actual) {
+    if (expected != actual) {
+      throw new Error("Expected " + expected + ", got " + actual);
+    }
+  }
+
+  public static void assertEquals(Object expected, Object actual) {
+    if (expected != actual) {
+      throw new Error("Expected " + expected + ", got " + actual);
+    }
+  }
+
+  public static native void ensureJitCompiled(Class<?> cls, String methodName);
+}
diff --git a/test/knownfailures.json b/test/knownfailures.json
index 1c5408d..a5aea43 100644
--- a/test/knownfailures.json
+++ b/test/knownfailures.json
@@ -1137,6 +1137,7 @@
                   "811-checker-invoke-super-secondary",
                   "817-hiddenapi",
                   "820-vdex-multidex",
+                  "821-many-args",
                   "999-redefine-hiddenapi",
                   "1000-non-moving-space-stress",
                   "1001-app-image-regions",
diff --git a/tools/cpp-define-generator/art_method.def b/tools/cpp-define-generator/art_method.def
index 5e09565..a73bbed 100644
--- a/tools/cpp-define-generator/art_method.def
+++ b/tools/cpp-define-generator/art_method.def
@@ -23,14 +23,18 @@
            art::ArtMethod::AccessFlagsOffset().Int32Value())
 ASM_DEFINE(ART_METHOD_IS_STATIC_FLAG,
            art::kAccStatic)
-ASM_DEFINE(ART_METHOD_NTERP_ENTRY_POINT_FAST_PATH_FLAG,
-           art::kAccNterpEntryPointFastPathFlag)
-ASM_DEFINE(ART_METHOD_IMT_MASK,
-           art::ImTable::kSizeTruncToPowerOfTwo - 1)
 ASM_DEFINE(ART_METHOD_IS_STATIC_FLAG_BIT,
            art::MostSignificantBit(art::kAccStatic))
+ASM_DEFINE(ART_METHOD_NTERP_INVOKE_FAST_PATH_FLAG,
+           art::kAccNterpInvokeFastPathFlag)
+ASM_DEFINE(ART_METHOD_NTERP_INVOKE_FAST_PATH_FLAG_BIT,
+           art::MostSignificantBit(art::kAccNterpInvokeFastPathFlag))
+ASM_DEFINE(ART_METHOD_NTERP_ENTRY_POINT_FAST_PATH_FLAG,
+           art::kAccNterpEntryPointFastPathFlag)
 ASM_DEFINE(ART_METHOD_NTERP_ENTRY_POINT_FAST_PATH_FLAG_BIT,
            art::MostSignificantBit(art::kAccNterpEntryPointFastPathFlag))
+ASM_DEFINE(ART_METHOD_IMT_MASK,
+           art::ImTable::kSizeTruncToPowerOfTwo - 1)
 ASM_DEFINE(ART_METHOD_DECLARING_CLASS_OFFSET,
            art::ArtMethod::DeclaringClassOffset().Int32Value())
 ASM_DEFINE(ART_METHOD_JNI_OFFSET_32,