Rewrite GenericJNI frame setup.

Move the handle scope out of the managed frame, move the
register values to load to the bottom of the reserved area
and pass the hidden argument to @CriticalNative methods to
prepare for implementing late lookup.

Test: m test-art-host-gtest
Test: testrunner.py --host --optimizing --interp-ac
Test: aosp_taimen-userdebug boots.
Test: run-gtests.sh
Test: testrunner.py --target --optimizing --interp-ac
Bug: 112189621
Change-Id: I4672176f9627bcbebafebb3dda0d02b8108e1329
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index 1304c0d..aab5ff5 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -1560,7 +1560,7 @@
   static constexpr bool kAlignDoubleOnStack = true;
 #elif defined(__aarch64__)
   static constexpr bool kNativeSoftFloatAbi = false;  // This is a hard float ABI.
-  static constexpr size_t kNumNativeGprArgs = 8;  // 6 arguments passed in GPRs.
+  static constexpr size_t kNumNativeGprArgs = 8;  // 8 arguments passed in GPRs.
   static constexpr size_t kNumNativeFprArgs = 8;  // 8 arguments passed in FPRs.
 
   static constexpr size_t kRegistersNeededForLong = 1;
@@ -1598,8 +1598,8 @@
 #elif defined(__i386__)
   // TODO: Check these!
   static constexpr bool kNativeSoftFloatAbi = false;  // Not using int registers for fp
-  static constexpr size_t kNumNativeGprArgs = 0;  // 6 arguments passed in GPRs.
-  static constexpr size_t kNumNativeFprArgs = 0;  // 8 arguments passed in FPRs.
+  static constexpr size_t kNumNativeGprArgs = 0;  // 0 arguments passed in GPRs.
+  static constexpr size_t kNumNativeFprArgs = 0;  // 0 arguments passed in FPRs.
 
   static constexpr size_t kRegistersNeededForLong = 2;
   static constexpr size_t kRegistersNeededForDouble = 2;
@@ -1871,38 +1871,13 @@
     return num_stack_entries_ * sizeof(uintptr_t);
   }
 
-  uint8_t* LayoutCallStack(uint8_t* sp8) const {
+  uint8_t* LayoutStackArgs(uint8_t* sp8) const {
     sp8 -= GetStackSize();
-    // Align by kStackAlignment.
+    // Align by kStackAlignment; it is at least as strict as native stack alignment.
     sp8 = reinterpret_cast<uint8_t*>(RoundDown(reinterpret_cast<uintptr_t>(sp8), kStackAlignment));
     return sp8;
   }
 
-  uint8_t* LayoutCallRegisterStacks(uint8_t* sp8, uintptr_t** start_gpr, uint32_t** start_fpr)
-      const {
-    // Assumption is OK right now, as we have soft-float arm
-    size_t fregs = BuildNativeCallFrameStateMachine<ComputeNativeCallFrameSize>::kNumNativeFprArgs;
-    sp8 -= fregs * sizeof(uintptr_t);
-    *start_fpr = reinterpret_cast<uint32_t*>(sp8);
-    size_t iregs = BuildNativeCallFrameStateMachine<ComputeNativeCallFrameSize>::kNumNativeGprArgs;
-    sp8 -= iregs * sizeof(uintptr_t);
-    *start_gpr = reinterpret_cast<uintptr_t*>(sp8);
-    return sp8;
-  }
-
-  uint8_t* LayoutNativeCall(uint8_t* sp8, uintptr_t** start_stack, uintptr_t** start_gpr,
-                            uint32_t** start_fpr) const {
-    // Native call stack.
-    sp8 = LayoutCallStack(sp8);
-    *start_stack = reinterpret_cast<uintptr_t*>(sp8);
-
-    // Put fprs and gprs below.
-    sp8 = LayoutCallRegisterStacks(sp8, start_gpr, start_fpr);
-
-    // Return the new bottom.
-    return sp8;
-  }
-
   virtual void WalkHeader(
       BuildNativeCallFrameStateMachine<ComputeNativeCallFrameSize>* sm ATTRIBUTE_UNUSED)
       REQUIRES_SHARED(Locks::mutator_lock_) {
@@ -1976,80 +1951,53 @@
   explicit ComputeGenericJniFrameSize(bool critical_native)
     : num_handle_scope_references_(0), critical_native_(critical_native) {}
 
-  // Lays out the callee-save frame. Assumes that the incorrect frame corresponding to RefsAndArgs
-  // is at *m = sp. Will update to point to the bottom of the save frame.
-  //
-  // Note: assumes ComputeAll() has been run before.
-  void LayoutCalleeSaveFrame(Thread* self, ArtMethod*** m, void* sp, HandleScope** handle_scope)
-      REQUIRES_SHARED(Locks::mutator_lock_) {
-    ArtMethod* method = **m;
-
+  uintptr_t* ComputeLayout(Thread* self,
+                           ArtMethod** managed_sp,
+                           const char* shorty,
+                           uint32_t shorty_len,
+                           HandleScope** handle_scope) REQUIRES_SHARED(Locks::mutator_lock_) {
     DCHECK_EQ(Runtime::Current()->GetClassLinker()->GetImagePointerSize(), kRuntimePointerSize);
 
-    uint8_t* sp8 = reinterpret_cast<uint8_t*>(sp);
-
-    // First, fix up the layout of the callee-save frame.
-    // We have to squeeze in the HandleScope, and relocate the method pointer.
-
-    // "Free" the slot for the method.
-    sp8 += sizeof(void*);  // In the callee-save frame we use a full pointer.
-
-    // Under the callee saves put handle scope and new method stack reference.
-    size_t handle_scope_size = HandleScope::SizeOf(num_handle_scope_references_);
-    size_t scope_and_method = handle_scope_size + sizeof(ArtMethod*);
-
-    sp8 -= scope_and_method;
-    // Align by kStackAlignment.
-    sp8 = reinterpret_cast<uint8_t*>(RoundDown(reinterpret_cast<uintptr_t>(sp8), kStackAlignment));
-
-    uint8_t* sp8_table = sp8 + sizeof(ArtMethod*);
-    *handle_scope = HandleScope::Create(sp8_table, self->GetTopHandleScope(),
-                                        num_handle_scope_references_);
-
-    // Add a slot for the method pointer, and fill it. Fix the pointer-pointer given to us.
-    uint8_t* method_pointer = sp8;
-    auto** new_method_ref = reinterpret_cast<ArtMethod**>(method_pointer);
-    *new_method_ref = method;
-    *m = new_method_ref;
-  }
-
-  // Adds space for the cookie. Note: may leave stack unaligned.
-  void LayoutCookie(uint8_t** sp) const {
-    // Reference cookie and padding
-    *sp -= 8;
-  }
-
-  // Re-layout the callee-save frame (insert a handle-scope). Then add space for the cookie.
-  // Returns the new bottom. Note: this may be unaligned.
-  uint8_t* LayoutJNISaveFrame(Thread* self, ArtMethod*** m, void* sp, HandleScope** handle_scope)
-      REQUIRES_SHARED(Locks::mutator_lock_) {
-    // First, fix up the layout of the callee-save frame.
-    // We have to squeeze in the HandleScope, and relocate the method pointer.
-    LayoutCalleeSaveFrame(self, m, sp, handle_scope);
-
-    // The bottom of the callee-save frame is now where the method is, *m.
-    uint8_t* sp8 = reinterpret_cast<uint8_t*>(*m);
-
-    // Add space for cookie.
-    LayoutCookie(&sp8);
-
-    return sp8;
-  }
-
-  // WARNING: After this, *sp won't be pointing to the method anymore!
-  uint8_t* ComputeLayout(Thread* self, ArtMethod*** m, const char* shorty, uint32_t shorty_len,
-                         HandleScope** handle_scope, uintptr_t** start_stack, uintptr_t** start_gpr,
-                         uint32_t** start_fpr)
-      REQUIRES_SHARED(Locks::mutator_lock_) {
     Walk(shorty, shorty_len);
 
-    // JNI part.
-    uint8_t* sp8 = LayoutJNISaveFrame(self, m, reinterpret_cast<void*>(*m), handle_scope);
+    // Add space for cookie and HandleScope.
+    void* storage = GetGenericJniHandleScope(managed_sp, num_handle_scope_references_);
+    DCHECK_ALIGNED(storage, sizeof(uintptr_t));
+    *handle_scope =
+        HandleScope::Create(storage, self->GetTopHandleScope(), num_handle_scope_references_);
+    DCHECK_EQ(*handle_scope, storage);
+    uint8_t* sp8 = reinterpret_cast<uint8_t*>(*handle_scope);
+    DCHECK_GE(static_cast<size_t>(reinterpret_cast<uint8_t*>(managed_sp) - sp8),
+              HandleScope::SizeOf(num_handle_scope_references_) + kJniCookieSize);
 
-    sp8 = LayoutNativeCall(sp8, start_stack, start_gpr, start_fpr);
+    // Layout stack arguments.
+    sp8 = LayoutStackArgs(sp8);
 
     // Return the new bottom.
-    return sp8;
+    DCHECK_ALIGNED(sp8, sizeof(uintptr_t));
+    return reinterpret_cast<uintptr_t*>(sp8);
+  }
+
+  static uintptr_t* GetStartGprRegs(uintptr_t* reserved_area) {
+    return reserved_area;
+  }
+
+  static uint32_t* GetStartFprRegs(uintptr_t* reserved_area) {
+    constexpr size_t num_gprs =
+        BuildNativeCallFrameStateMachine<ComputeNativeCallFrameSize>::kNumNativeGprArgs;
+    return reinterpret_cast<uint32_t*>(GetStartGprRegs(reserved_area) + num_gprs);
+  }
+
+  static uintptr_t* GetHiddenArgSlot(uintptr_t* reserved_area) {
+    // Note: `num_fprs` is 0 on architectures where sizeof(uintptr_t) does not match the
+    // FP register size (it is actually 0 on all supported 32-bit architectures).
+    constexpr size_t num_fprs =
+        BuildNativeCallFrameStateMachine<ComputeNativeCallFrameSize>::kNumNativeFprArgs;
+    return reinterpret_cast<uintptr_t*>(GetStartFprRegs(reserved_area)) + num_fprs;
+  }
+
+  static uintptr_t* GetOutArgsSpSlot(uintptr_t* reserved_area) {
+    return GetHiddenArgSlot(reserved_area) + 1;
   }
 
   uintptr_t PushHandle(mirror::Object* /* ptr */) override;
@@ -2138,20 +2086,33 @@
                               bool critical_native,
                               const char* shorty,
                               uint32_t shorty_len,
-                              ArtMethod*** sp)
-     : QuickArgumentVisitor(*sp, is_static, shorty, shorty_len),
+                              ArtMethod** managed_sp,
+                              uintptr_t* reserved_area)
+     : QuickArgumentVisitor(managed_sp, is_static, shorty, shorty_len),
        jni_call_(nullptr, nullptr, nullptr, nullptr, critical_native),
        sm_(&jni_call_) {
-    ComputeGenericJniFrameSize fsc(critical_native);
-    uintptr_t* start_gpr_reg;
-    uint32_t* start_fpr_reg;
-    uintptr_t* start_stack_arg;
-    bottom_of_used_area_ = fsc.ComputeLayout(self, sp, shorty, shorty_len,
-                                             &handle_scope_,
-                                             &start_stack_arg,
-                                             &start_gpr_reg, &start_fpr_reg);
+    DCHECK_ALIGNED(managed_sp, kStackAlignment);
+    DCHECK_ALIGNED(reserved_area, sizeof(uintptr_t));
 
-    jni_call_.Reset(start_gpr_reg, start_fpr_reg, start_stack_arg, handle_scope_);
+    ComputeGenericJniFrameSize fsc(critical_native);
+    uintptr_t* out_args_sp =
+        fsc.ComputeLayout(self, managed_sp, shorty, shorty_len, &handle_scope_);
+
+    // Store hidden argument for @CriticalNative.
+    uintptr_t* hidden_arg_slot = fsc.GetHiddenArgSlot(reserved_area);
+    constexpr uintptr_t kGenericJniTag = 1u;
+    ArtMethod* method = *managed_sp;
+    *hidden_arg_slot = critical_native ? (reinterpret_cast<uintptr_t>(method) | kGenericJniTag)
+                                       : 0xebad6a89u;  // Bad value.
+
+    // Set out args SP.
+    uintptr_t* out_args_sp_slot = fsc.GetOutArgsSpSlot(reserved_area);
+    *out_args_sp_slot = reinterpret_cast<uintptr_t>(out_args_sp);
+
+    jni_call_.Reset(fsc.GetStartGprRegs(reserved_area),
+                    fsc.GetStartFprRegs(reserved_area),
+                    out_args_sp,
+                    handle_scope_);
 
     // First 2 parameters are always excluded for CriticalNative methods.
     if (LIKELY(!critical_native)) {
@@ -2159,7 +2120,7 @@
       sm_.AdvancePointer(self->GetJniEnv());
 
       if (is_static) {
-        sm_.AdvanceHandleScope((**sp)->GetDeclaringClass().Ptr());
+        sm_.AdvanceHandleScope(method->GetDeclaringClass().Ptr());
       }  // else "this" reference is already handled by QuickArgumentVisitor.
     }
   }
@@ -2176,10 +2137,6 @@
     return handle_scope_->GetHandle(0).ToJObject();
   }
 
-  void* GetBottomOfUsedArea() const {
-    return bottom_of_used_area_;
-  }
-
  private:
   // A class to fill a JNI call. Adds reference/handle-scope management to FillNativeCall.
   class FillJniCall final : public FillNativeCall {
@@ -2224,7 +2181,6 @@
 
   HandleScope* handle_scope_;
   FillJniCall jni_call_;
-  void* bottom_of_used_area_;
 
   BuildNativeCallFrameStateMachine<FillJniCall> sm_;
 
@@ -2296,21 +2252,27 @@
 }
 
 /*
- * Initializes an alloca region assumed to be directly below sp for a native call:
- * Create a HandleScope and call stack and fill a mini stack with values to be pushed to registers.
- * The final element on the stack is a pointer to the native code.
+ * Initializes the reserved area assumed to be directly below `managed_sp` for a native call:
  *
- * On entry, the stack has a standard callee-save frame above sp, and an alloca below it.
- * We need to fix this, as the handle scope needs to go into the callee-save frame.
+ * On entry, the stack has a standard callee-save frame above `managed_sp`,
+ * and the reserved area below it. Starting below `managed_sp`, we reserve space
+ * for local reference cookie (not present for @CriticalNative), HandleScope
+ * (not present for @CriticalNative) and stack args (if args do not fit into
+ * registers). At the bottom of the reserved area, there is space for register
+ * arguments, hidden arg (for @CriticalNative) and the SP for the native call
+ * (i.e. pointer to the stack args area), which the calling stub shall load
+ * to perform the native call. We fill all these fields, perform class init
+ * check (for static methods) and/or locking (for synchronized methods) if
+ * needed and return to the stub.
  *
- * The return of this function denotes:
- * 1) How many bytes of the alloca can be released, if the value is non-negative.
- * 2) An error, if the value is negative.
+ * The return value is the pointer to the native code, null on failure.
  */
-extern "C" TwoWordReturn artQuickGenericJniTrampoline(Thread* self, ArtMethod** sp)
+extern "C" const void* artQuickGenericJniTrampoline(Thread* self,
+                                                    ArtMethod** managed_sp,
+                                                    uintptr_t* reserved_area)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   // Note: We cannot walk the stack properly until fixed up below.
-  ArtMethod* called = *sp;
+  ArtMethod* called = *managed_sp;
   DCHECK(called->IsNative()) << called->PrettyMethod(true);
   Runtime* runtime = Runtime::Current();
   uint32_t shorty_len = 0;
@@ -2325,7 +2287,8 @@
                                       critical_native,
                                       shorty,
                                       shorty_len,
-                                      &sp);
+                                      managed_sp,
+                                      reserved_area);
   {
     ScopedAssertNoThreadSuspension sants(__FUNCTION__);
     visitor.VisitArguments();
@@ -2334,7 +2297,7 @@
   }
 
   // Fix up managed-stack things in Thread. After this we can walk the stack.
-  self->SetTopOfStackTagged(sp);
+  self->SetTopOfStackTagged(managed_sp);
 
   self->VerifyStack();
 
@@ -2356,8 +2319,7 @@
       if (!runtime->GetClassLinker()->EnsureInitialized(self, h_class, true, true)) {
         DCHECK(Thread::Current()->IsExceptionPending()) << called->PrettyMethod();
         self->PopHandleScope();
-        // A negative value denotes an error.
-        return GetTwoWordFailureValue();
+        return nullptr;  // Report error.
       }
     }
   }
@@ -2372,8 +2334,7 @@
       cookie = JniMethodStartSynchronized(visitor.GetFirstHandleScopeJObject(), self);
       if (self->IsExceptionPending()) {
         self->PopHandleScope();
-        // A negative value denotes an error.
-        return GetTwoWordFailureValue();
+        return nullptr;  // Report error.
       }
     } else {
       if (fast_native) {
@@ -2383,7 +2344,7 @@
         cookie = JniMethodStart(self);
       }
     }
-    sp32 = reinterpret_cast<uint32_t*>(sp);
+    sp32 = reinterpret_cast<uint32_t*>(managed_sp);
     *(sp32 - 1) = cookie;
   }
 
@@ -2433,15 +2394,17 @@
                         << " -> "
                         << std::hex << reinterpret_cast<uintptr_t>(nativeCode);
 
-  // Return native code addr(lo) and bottom of alloca address(hi).
-  return GetTwoWordSuccessValue(reinterpret_cast<uintptr_t>(visitor.GetBottomOfUsedArea()),
-                                reinterpret_cast<uintptr_t>(nativeCode));
+  // Return native code.
+  return nativeCode;
 }
 
 // Defined in quick_jni_entrypoints.cc.
-extern uint64_t GenericJniMethodEnd(Thread* self, uint32_t saved_local_ref_cookie,
-                                    jvalue result, uint64_t result_f, ArtMethod* called,
-                                    HandleScope* handle_scope);
+extern uint64_t GenericJniMethodEnd(Thread* self,
+                                    uint32_t saved_local_ref_cookie,
+                                    jvalue result,
+                                    uint64_t result_f,
+                                    ArtMethod* called);
+
 /*
  * Is called after the native JNI code. Responsible for cleanup (handle scope, saved state) and
  * unlocking.
@@ -2458,8 +2421,15 @@
   uint32_t* sp32 = reinterpret_cast<uint32_t*>(sp);
   ArtMethod* called = *sp;
   uint32_t cookie = *(sp32 - 1);
-  HandleScope* table = reinterpret_cast<HandleScope*>(reinterpret_cast<uint8_t*>(sp) + sizeof(*sp));
-  return GenericJniMethodEnd(self, cookie, result, result_f, called, table);
+  if (kIsDebugBuild && !called->IsCriticalNative()) {
+    BaseHandleScope* handle_scope = self->GetTopHandleScope();
+    DCHECK(handle_scope != nullptr);
+    DCHECK(!handle_scope->IsVariableSized());
+    // Note: We do not hold mutator lock here for normal JNI, so we cannot use the method's shorty
+    // to determine the number of references. Instead rely on the value from the HandleScope.
+    DCHECK_EQ(handle_scope, GetGenericJniHandleScope(sp, handle_scope->NumberOfReferences()));
+  }
+  return GenericJniMethodEnd(self, cookie, result, result_f, called);
 }
 
 // We use TwoWordReturn to optimize scalar returns. We use the hi value for code, and the lo value