Direct calls to @CriticalNative methods.

Emit direct calls from compiled managed code to the native
code registered with the method, avoiding the JNI stub.

Golem results:
art-opt-cc                       x86 x86-64    arm  arm64
NativeDowncallStaticCritical  +12.5% +62.5% +75.9% +41.7%
NativeDowncallStaticCritical6 +55.6% +87.5% +72.1% +35.3%
art-opt                          x86 x86-64    arm  arm64
NativeDowncallStaticCritical  +28.6% +85.6% +76.4% +38.4%
NativeDowncallStaticCritical6 +44.6% +44.6% +74.6% +32.2%

Test: Covered by 178-app-image-native-method.
Test: m test-art-host-gtest
Test: testrunner.py --host --debuggable --ndebuggable \
          --optimizing --jit --jit-on-first-use
Test: run-gtests.sh
Test: testrunner.py --target --optimizing
Test: testrunner.py --target --debuggable --ndebuggable \
          --optimizing --jit --jit-on-first-use -t 178
Test: aosp_cf_x86_phone-userdebug boots.
Test: aosp_cf_x86_phone-userdebug/jitzygote boots.
Bug: 112189621
Change-Id: I8b37da51e8fe0b7bc513bb81b127fe0416068866
diff --git a/compiler/jni/quick/arm/calling_convention_arm.cc b/compiler/jni/quick/arm/calling_convention_arm.cc
index d07ab98..7afa8b1 100644
--- a/compiler/jni/quick/arm/calling_convention_arm.cc
+++ b/compiler/jni/quick/arm/calling_convention_arm.cc
@@ -420,7 +420,7 @@
   return RoundUp(total_size, kStackAlignment);
 }
 
-size_t ArmJniCallingConvention::OutArgSize() const {
+size_t ArmJniCallingConvention::OutFrameSize() const {
   // Count param args, including JNIEnv* and jclass*; count 8-byte args twice.
   size_t all_args = NumberOfExtraArgumentsForJni() + NumArgs() + NumLongOrDoubleArgs();
   // Account for arguments passed through r0-r3. (No FP args, AAPCS32 is soft-float.)
@@ -440,7 +440,7 @@
   }
   size_t out_args_size = RoundUp(size, kAapcsStackAlignment);
   if (UNLIKELY(IsCriticalNative())) {
-    DCHECK_EQ(out_args_size, GetCriticalNativeOutArgsSize(GetShorty(), NumArgs() + 1u));
+    DCHECK_EQ(out_args_size, GetCriticalNativeStubFrameSize(GetShorty(), NumArgs() + 1u));
   }
   return out_args_size;
 }
@@ -512,9 +512,9 @@
   CHECK_GE(itr_slots_, kJniArgumentRegisterCount);
   size_t offset =
       displacement_.Int32Value()
-          - OutArgSize()
+          - OutFrameSize()
           + ((itr_slots_ - kJniArgumentRegisterCount) * kFramePointerSize);
-  CHECK_LT(offset, OutArgSize());
+  CHECK_LT(offset, OutFrameSize());
   return FrameOffset(offset);
 }
 
@@ -537,7 +537,7 @@
 // Whether to use tail call (used only for @CriticalNative).
 bool ArmJniCallingConvention::UseTailCall() const {
   CHECK(IsCriticalNative());
-  return OutArgSize() == 0u;
+  return OutFrameSize() == 0u;
 }
 
 }  // namespace arm
diff --git a/compiler/jni/quick/arm/calling_convention_arm.h b/compiler/jni/quick/arm/calling_convention_arm.h
index 7896d64..38f7184 100644
--- a/compiler/jni/quick/arm/calling_convention_arm.h
+++ b/compiler/jni/quick/arm/calling_convention_arm.h
@@ -65,7 +65,7 @@
   // JNI calling convention
   void Next() override;  // Override default behavior for AAPCS
   size_t FrameSize() const override;
-  size_t OutArgSize() const override;
+  size_t OutFrameSize() const override;
   ArrayRef<const ManagedRegister> CalleeSaveRegisters() const override;
   ManagedRegister ReturnScratchRegister() const override;
   uint32_t CoreSpillMask() const override;
diff --git a/compiler/jni/quick/arm64/calling_convention_arm64.cc b/compiler/jni/quick/arm64/calling_convention_arm64.cc
index 32da141..06796c1 100644
--- a/compiler/jni/quick/arm64/calling_convention_arm64.cc
+++ b/compiler/jni/quick/arm64/calling_convention_arm64.cc
@@ -265,20 +265,14 @@
   return RoundUp(total_size, kStackAlignment);
 }
 
-size_t Arm64JniCallingConvention::OutArgSize() const {
+size_t Arm64JniCallingConvention::OutFrameSize() const {
   // Count param args, including JNIEnv* and jclass*.
   size_t all_args = NumberOfExtraArgumentsForJni() + NumArgs();
   size_t num_fp_args = NumFloatOrDoubleArgs();
   DCHECK_GE(all_args, num_fp_args);
   size_t num_non_fp_args = all_args - num_fp_args;
-  // Account for FP arguments passed through v0-v7.
-  size_t num_stack_fp_args =
-      num_fp_args - std::min(kMaxFloatOrDoubleRegisterArguments, num_fp_args);
-  // Account for other (integer and pointer) arguments passed through GPR (x0-x7).
-  size_t num_stack_non_fp_args =
-      num_non_fp_args - std::min(kMaxIntLikeRegisterArguments, num_non_fp_args);
   // The size of outgoing arguments.
-  size_t size = (num_stack_fp_args + num_stack_non_fp_args) * kFramePointerSize;
+  size_t size = GetNativeOutArgsSize(num_fp_args, num_non_fp_args);
 
   // @CriticalNative can use tail call as all managed callee saves are preserved by AAPCS64.
   static_assert((kCoreCalleeSpillMask & ~kAapcs64CoreCalleeSpillMask) == 0u);
@@ -291,7 +285,7 @@
   }
   size_t out_args_size = RoundUp(size, kAapcs64StackAlignment);
   if (UNLIKELY(IsCriticalNative())) {
-    DCHECK_EQ(out_args_size, GetCriticalNativeOutArgsSize(GetShorty(), NumArgs() + 1u));
+    DCHECK_EQ(out_args_size, GetCriticalNativeStubFrameSize(GetShorty(), NumArgs() + 1u));
   }
   return out_args_size;
 }
@@ -355,8 +349,8 @@
                              static_cast<size_t>(itr_float_and_doubles_))
                   - std::min(kMaxIntLikeRegisterArguments,
                              static_cast<size_t>(itr_args_ - itr_float_and_doubles_));
-  size_t offset = displacement_.Int32Value() - OutArgSize() + (args_on_stack * kFramePointerSize);
-  CHECK_LT(offset, OutArgSize());
+  size_t offset = displacement_.Int32Value() - OutFrameSize() + (args_on_stack * kFramePointerSize);
+  CHECK_LT(offset, OutFrameSize());
   return FrameOffset(offset);
 }
 
@@ -378,7 +372,7 @@
 // Whether to use tail call (used only for @CriticalNative).
 bool Arm64JniCallingConvention::UseTailCall() const {
   CHECK(IsCriticalNative());
-  return OutArgSize() == 0u;
+  return OutFrameSize() == 0u;
 }
 
 }  // namespace arm64
diff --git a/compiler/jni/quick/arm64/calling_convention_arm64.h b/compiler/jni/quick/arm64/calling_convention_arm64.h
index 7beca08..d381d9d 100644
--- a/compiler/jni/quick/arm64/calling_convention_arm64.h
+++ b/compiler/jni/quick/arm64/calling_convention_arm64.h
@@ -56,7 +56,7 @@
   ManagedRegister IntReturnRegister() override;
   // JNI calling convention
   size_t FrameSize() const override;
-  size_t OutArgSize() const override;
+  size_t OutFrameSize() const override;
   ArrayRef<const ManagedRegister> CalleeSaveRegisters() const override;
   ManagedRegister ReturnScratchRegister() const override;
   uint32_t CoreSpillMask() const override;
diff --git a/compiler/jni/quick/calling_convention.h b/compiler/jni/quick/calling_convention.h
index b4396f0..005ae91 100644
--- a/compiler/jni/quick/calling_convention.h
+++ b/compiler/jni/quick/calling_convention.h
@@ -303,9 +303,9 @@
   // always at the bottom of a frame, but this doesn't work for outgoing
   // native args). Includes alignment.
   virtual size_t FrameSize() const = 0;
-  // Size of outgoing arguments (stack portion), including alignment.
+  // Size of outgoing frame, i.e. stack arguments, @CriticalNative return PC if needed, alignment.
   // -- Arguments that are passed via registers are excluded from this size.
-  virtual size_t OutArgSize() const = 0;
+  virtual size_t OutFrameSize() const = 0;
   // Number of references in stack indirect reference table
   size_t ReferenceCount() const;
   // Location where the segment state of the local indirect reference table is saved
diff --git a/compiler/jni/quick/jni_compiler.cc b/compiler/jni/quick/jni_compiler.cc
index 036cdbb..913a3ba 100644
--- a/compiler/jni/quick/jni_compiler.cc
+++ b/compiler/jni/quick/jni_compiler.cc
@@ -220,7 +220,7 @@
   // 1. Build the frame saving all callee saves, Method*, and PC return address.
   //    For @CriticalNative, this includes space for out args, otherwise just the managed frame.
   const size_t managed_frame_size = main_jni_conv->FrameSize();
-  const size_t main_out_arg_size = main_jni_conv->OutArgSize();
+  const size_t main_out_arg_size = main_jni_conv->OutFrameSize();
   size_t current_frame_size = is_critical_native ? main_out_arg_size : managed_frame_size;
   ManagedRegister method_register =
       is_critical_native ? ManagedRegister::NoRegister() : mr_conv->MethodRegister();
@@ -582,7 +582,7 @@
 
   if (LIKELY(!is_critical_native)) {
     // Increase frame size for out args if needed by the end_jni_conv.
-    const size_t end_out_arg_size = end_jni_conv->OutArgSize();
+    const size_t end_out_arg_size = end_jni_conv->OutFrameSize();
     if (end_out_arg_size > current_out_arg_size) {
       size_t out_arg_size_diff = end_out_arg_size - current_out_arg_size;
       current_out_arg_size = end_out_arg_size;
diff --git a/compiler/jni/quick/x86/calling_convention_x86.cc b/compiler/jni/quick/x86/calling_convention_x86.cc
index 6776f12..df45627 100644
--- a/compiler/jni/quick/x86/calling_convention_x86.cc
+++ b/compiler/jni/quick/x86/calling_convention_x86.cc
@@ -220,11 +220,10 @@
   return RoundUp(total_size, kStackAlignment);
 }
 
-size_t X86JniCallingConvention::OutArgSize() const {
-  // Count param args, including JNIEnv* and jclass*; count 8-byte args twice.
-  size_t all_args = NumberOfExtraArgumentsForJni() + NumArgs() + NumLongOrDoubleArgs();
-  // The size of outgoiong arguments.
-  size_t size = all_args * kFramePointerSize;
+size_t X86JniCallingConvention::OutFrameSize() const {
+  // The size of outgoing arguments.
+  size_t size = GetNativeOutArgsSize(/*num_args=*/ NumberOfExtraArgumentsForJni() + NumArgs(),
+                                     NumLongOrDoubleArgs());
 
   // @CriticalNative can use tail call as all managed callee saves are preserved by AAPCS.
   static_assert((kCoreCalleeSpillMask & ~kNativeCoreCalleeSpillMask) == 0u);
@@ -244,14 +243,16 @@
     if (return_type_ok && size == kFramePointerSize) {
       // Note: This is not aligned to kNativeStackAlignment but that's OK for tail call.
       static_assert(kFramePointerSize < kNativeStackAlignment);
-      DCHECK_EQ(kFramePointerSize, GetCriticalNativeOutArgsSize(GetShorty(), NumArgs() + 1u));
+      // The stub frame size is considered 0 in the callee where the return PC is a part of
+      // the callee frame but it is kPointerSize in the compiled stub before the tail call.
+      DCHECK_EQ(0u, GetCriticalNativeStubFrameSize(GetShorty(), NumArgs() + 1u));
       return kFramePointerSize;
     }
   }
 
   size_t out_args_size = RoundUp(size, kNativeStackAlignment);
   if (UNLIKELY(IsCriticalNative())) {
-    DCHECK_EQ(out_args_size, GetCriticalNativeOutArgsSize(GetShorty(), NumArgs() + 1u));
+    DCHECK_EQ(out_args_size, GetCriticalNativeStubFrameSize(GetShorty(), NumArgs() + 1u));
   }
   return out_args_size;
 }
@@ -279,7 +280,8 @@
 }
 
 FrameOffset X86JniCallingConvention::CurrentParamStackOffset() {
-  return FrameOffset(displacement_.Int32Value() - OutArgSize() + (itr_slots_ * kFramePointerSize));
+  return
+      FrameOffset(displacement_.Int32Value() - OutFrameSize() + (itr_slots_ * kFramePointerSize));
 }
 
 ManagedRegister X86JniCallingConvention::HiddenArgumentRegister() const {
@@ -295,7 +297,7 @@
 
 bool X86JniCallingConvention::UseTailCall() const {
   CHECK(IsCriticalNative());
-  return OutArgSize() == kFramePointerSize;
+  return OutFrameSize() == kFramePointerSize;
 }
 
 }  // namespace x86
diff --git a/compiler/jni/quick/x86/calling_convention_x86.h b/compiler/jni/quick/x86/calling_convention_x86.h
index 6f22c2b..81f617d 100644
--- a/compiler/jni/quick/x86/calling_convention_x86.h
+++ b/compiler/jni/quick/x86/calling_convention_x86.h
@@ -61,7 +61,7 @@
   ManagedRegister IntReturnRegister() override;
   // JNI calling convention
   size_t FrameSize() const override;
-  size_t OutArgSize() const override;
+  size_t OutFrameSize() const override;
   ArrayRef<const ManagedRegister> CalleeSaveRegisters() const override;
   ManagedRegister ReturnScratchRegister() const override;
   uint32_t CoreSpillMask() const override;
diff --git a/compiler/jni/quick/x86_64/calling_convention_x86_64.cc b/compiler/jni/quick/x86_64/calling_convention_x86_64.cc
index e97cab8..44ae8be 100644
--- a/compiler/jni/quick/x86_64/calling_convention_x86_64.cc
+++ b/compiler/jni/quick/x86_64/calling_convention_x86_64.cc
@@ -208,21 +208,14 @@
   return RoundUp(total_size, kStackAlignment);
 }
 
-size_t X86_64JniCallingConvention::OutArgSize() const {
+size_t X86_64JniCallingConvention::OutFrameSize() const {
   // Count param args, including JNIEnv* and jclass*.
   size_t all_args = NumberOfExtraArgumentsForJni() + NumArgs();
   size_t num_fp_args = NumFloatOrDoubleArgs();
   DCHECK_GE(all_args, num_fp_args);
   size_t num_non_fp_args = all_args - num_fp_args;
-  // Account for FP arguments passed through Xmm0..Xmm7.
-  size_t num_stack_fp_args =
-      num_fp_args - std::min(kMaxFloatOrDoubleRegisterArguments, num_fp_args);
-  // Account for other (integer) arguments passed through GPR (RDI, RSI, RDX, RCX, R8, R9).
-  size_t num_stack_non_fp_args =
-      num_non_fp_args - std::min(kMaxIntLikeRegisterArguments, num_non_fp_args);
   // The size of outgoing arguments.
-  static_assert(kFramePointerSize == kMmxSpillSize);
-  size_t size = (num_stack_fp_args + num_stack_non_fp_args) * kFramePointerSize;
+  size_t size = GetNativeOutArgsSize(num_fp_args, num_non_fp_args);
 
   if (UNLIKELY(IsCriticalNative())) {
     // We always need to spill xmm12-xmm15 as they are managed callee-saves
@@ -239,7 +232,7 @@
 
   size_t out_args_size = RoundUp(size, kNativeStackAlignment);
   if (UNLIKELY(IsCriticalNative())) {
-    DCHECK_EQ(out_args_size, GetCriticalNativeOutArgsSize(GetShorty(), NumArgs() + 1u));
+    DCHECK_EQ(out_args_size, GetCriticalNativeStubFrameSize(GetShorty(), NumArgs() + 1u));
   }
   return out_args_size;
 }
@@ -297,8 +290,8 @@
       - std::min(kMaxIntLikeRegisterArguments,
                  static_cast<size_t>(itr_args_ - itr_float_and_doubles_));
           // Integer arguments passed through GPR
-  size_t offset = displacement_.Int32Value() - OutArgSize() + (args_on_stack * kFramePointerSize);
-  CHECK_LT(offset, OutArgSize());
+  size_t offset = displacement_.Int32Value() - OutFrameSize() + (args_on_stack * kFramePointerSize);
+  CHECK_LT(offset, OutFrameSize());
   return FrameOffset(offset);
 }
 
diff --git a/compiler/jni/quick/x86_64/calling_convention_x86_64.h b/compiler/jni/quick/x86_64/calling_convention_x86_64.h
index d043a3e..5bde766 100644
--- a/compiler/jni/quick/x86_64/calling_convention_x86_64.h
+++ b/compiler/jni/quick/x86_64/calling_convention_x86_64.h
@@ -56,7 +56,7 @@
   ManagedRegister IntReturnRegister() override;
   // JNI calling convention
   size_t FrameSize() const override;
-  size_t OutArgSize() const override;
+  size_t OutFrameSize() const override;
   ArrayRef<const ManagedRegister> CalleeSaveRegisters() const override;
   ManagedRegister ReturnScratchRegister() const override;
   uint32_t CoreSpillMask() const override;