jni: Do not create a managed frame for @CriticalNative.

Omit managed frame for @CriticalNative methods, do not check
for exceptions and and make a tail call when possible.
Pass the method pointer in a hidden argument to prepare for
implementing late binding for @CriticalNative methods.

This changes only the JNI compiler, Generic JNI shall be
updated in a separate change.

Performance improvements reported by Golem (art-opt-cc):
                                 x86 x86-64    arm  arm64
NativeDowncallStaticCritical6   +17%   +50%   +88%  +139%
NativeDowncallStaticCritical    +37%   +32%  +103%  +216%

Test: m test-art-host-gtest
Test: testrunner.py --host --optimizing
Test: aosp_taimen-userdebug boots.
Test: run-gtests.sh
Test: testrunner.py --target --optimizing
Bug: 112189621
Change-Id: I5758c8f478627f2eee8f615b4537a907c211b9f8
diff --git a/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc b/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc
index 47a067b..ffb58ac 100644
--- a/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc
+++ b/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc
@@ -37,6 +37,10 @@
 #define ___   asm_.GetVIXLAssembler()->
 #endif
 
+// The AAPCS requires 8-byte alignement. This is not as strict as the Managed ABI stack alignment.
+static constexpr size_t kAapcsStackAlignment = 8u;
+static_assert(kAapcsStackAlignment < kStackAlignment);
+
 vixl::aarch32::Register AsVIXLRegister(ArmManagedRegister reg) {
   CHECK(reg.IsCoreRegister());
   return vixl::aarch32::Register(reg.RegId());
@@ -74,11 +78,16 @@
                                           ManagedRegister method_reg,
                                           ArrayRef<const ManagedRegister> callee_save_regs,
                                           const ManagedRegisterEntrySpills& entry_spills) {
-  CHECK_ALIGNED(frame_size, kStackAlignment);
-  CHECK(r0.Is(AsVIXLRegister(method_reg.AsArm())));
+  // If we're creating an actual frame with the method, enforce managed stack alignment,
+  // otherwise only the native stack alignment.
+  if (method_reg.IsNoRegister()) {
+    CHECK_ALIGNED_PARAM(frame_size, kAapcsStackAlignment);
+  } else {
+    CHECK_ALIGNED_PARAM(frame_size, kStackAlignment);
+  }
 
   // Push callee saves and link register.
-  RegList core_spill_mask = 1 << LR;
+  RegList core_spill_mask = 0;
   uint32_t fp_spill_mask = 0;
   for (const ManagedRegister& reg : callee_save_regs) {
     if (reg.AsArm().IsCoreRegister()) {
@@ -87,9 +96,11 @@
       fp_spill_mask |= 1 << reg.AsArm().AsSRegister();
     }
   }
-  ___ Push(RegisterList(core_spill_mask));
-  cfi().AdjustCFAOffset(POPCOUNT(core_spill_mask) * kFramePointerSize);
-  cfi().RelOffsetForMany(DWARFReg(r0), 0, core_spill_mask, kFramePointerSize);
+  if (core_spill_mask != 0u) {
+    ___ Push(RegisterList(core_spill_mask));
+    cfi().AdjustCFAOffset(POPCOUNT(core_spill_mask) * kFramePointerSize);
+    cfi().RelOffsetForMany(DWARFReg(r0), 0, core_spill_mask, kFramePointerSize);
+  }
   if (fp_spill_mask != 0) {
     uint32_t first = CTZ(fp_spill_mask);
 
@@ -103,12 +114,15 @@
 
   // Increase frame to required size.
   int pushed_values = POPCOUNT(core_spill_mask) + POPCOUNT(fp_spill_mask);
-  // Must at least have space for Method*.
-  CHECK_GT(frame_size, pushed_values * kFramePointerSize);
+  // Must at least have space for Method* if we're going to spill it.
+  CHECK_GE(frame_size, (pushed_values + (method_reg.IsRegister() ? 1u : 0u)) * kFramePointerSize);
   IncreaseFrameSize(frame_size - pushed_values * kFramePointerSize);  // handles CFI as well.
 
-  // Write out Method*.
-  asm_.StoreToOffset(kStoreWord, r0, sp, 0);
+  if (method_reg.IsRegister()) {
+    // Write out Method*.
+    CHECK(r0.Is(AsVIXLRegister(method_reg.AsArm())));
+    asm_.StoreToOffset(kStoreWord, r0, sp, 0);
+  }
 
   // Write out entry spills.
   int32_t offset = frame_size + kFramePointerSize;
@@ -133,27 +147,27 @@
 void ArmVIXLJNIMacroAssembler::RemoveFrame(size_t frame_size,
                                            ArrayRef<const ManagedRegister> callee_save_regs,
                                            bool may_suspend) {
-  CHECK_ALIGNED(frame_size, kStackAlignment);
+  CHECK_ALIGNED(frame_size, kAapcsStackAlignment);
   cfi().RememberState();
 
-  // Compute callee saves to pop and LR.
-  RegList core_spill_mask = 1 << LR;
-  uint32_t fp_spill_mask = 0;
+  // Compute callee saves to pop.
+  RegList core_spill_mask = 0u;
+  uint32_t fp_spill_mask = 0u;
   for (const ManagedRegister& reg : callee_save_regs) {
     if (reg.AsArm().IsCoreRegister()) {
-      core_spill_mask |= 1 << reg.AsArm().AsCoreRegister();
+      core_spill_mask |= 1u << reg.AsArm().AsCoreRegister();
     } else {
-      fp_spill_mask |= 1 << reg.AsArm().AsSRegister();
+      fp_spill_mask |= 1u << reg.AsArm().AsSRegister();
     }
   }
 
   // Decrease frame to start of callee saves.
-  int pop_values = POPCOUNT(core_spill_mask) + POPCOUNT(fp_spill_mask);
-  CHECK_GT(frame_size, pop_values * kFramePointerSize);
+  size_t pop_values = POPCOUNT(core_spill_mask) + POPCOUNT(fp_spill_mask);
+  CHECK_GE(frame_size, pop_values * kFramePointerSize);
   DecreaseFrameSize(frame_size - (pop_values * kFramePointerSize));  // handles CFI as well.
 
   // Pop FP callee saves.
-  if (fp_spill_mask != 0) {
+  if (fp_spill_mask != 0u) {
     uint32_t first = CTZ(fp_spill_mask);
     // Check that list is contiguous.
      DCHECK_EQ(fp_spill_mask >> CTZ(fp_spill_mask), ~0u >> (32 - POPCOUNT(fp_spill_mask)));
@@ -164,7 +178,9 @@
   }
 
   // Pop core callee saves and LR.
-  ___ Pop(RegisterList(core_spill_mask));
+  if (core_spill_mask != 0u) {
+    ___ Pop(RegisterList(core_spill_mask));
+  }
 
   if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
     if (may_suspend) {
@@ -173,11 +189,8 @@
     } else {
       // The method shall not be suspended; no need to refresh the Marking Register.
 
-      // Check that the Marking Register is a callee-save register,
-      // and thus has been preserved by native code following the
-      // AAPCS calling convention.
-      DCHECK_NE(core_spill_mask & (1 << MR), 0)
-          << "core_spill_mask should contain Marking Register R" << MR;
+      // The Marking Register is a callee-save register, and thus has been
+      // preserved by native code following the AAPCS calling convention.
 
       // The following condition is a compile-time one, so it does not have a run-time cost.
       if (kIsDebugBuild) {
@@ -206,13 +219,17 @@
 
 
 void ArmVIXLJNIMacroAssembler::IncreaseFrameSize(size_t adjust) {
-  asm_.AddConstant(sp, -adjust);
-  cfi().AdjustCFAOffset(adjust);
+  if (adjust != 0u) {
+    asm_.AddConstant(sp, -adjust);
+    cfi().AdjustCFAOffset(adjust);
+  }
 }
 
 void ArmVIXLJNIMacroAssembler::DecreaseFrameSize(size_t adjust) {
-  asm_.AddConstant(sp, adjust);
-  cfi().AdjustCFAOffset(-adjust);
+  if (adjust != 0u) {
+    asm_.AddConstant(sp, adjust);
+    cfi().AdjustCFAOffset(-adjust);
+  }
 }
 
 void ArmVIXLJNIMacroAssembler::Store(FrameOffset dest, ManagedRegister m_src, size_t size) {
@@ -562,6 +579,17 @@
   // TODO: not validating references.
 }
 
+void ArmVIXLJNIMacroAssembler::Jump(ManagedRegister mbase,
+                                    Offset offset,
+                                    ManagedRegister mscratch) {
+  vixl::aarch32::Register base = AsVIXLRegister(mbase.AsArm());
+  vixl::aarch32::Register scratch = AsVIXLRegister(mscratch.AsArm());
+  UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
+  temps.Exclude(scratch);
+  asm_.LoadFromOffset(kLoadWord, scratch, base, offset.Int32Value());
+  ___ Bx(scratch);
+}
+
 void ArmVIXLJNIMacroAssembler::Call(ManagedRegister mbase,
                                     Offset offset,
                                     ManagedRegister mscratch) {
@@ -602,7 +630,7 @@
 }
 
 void ArmVIXLJNIMacroAssembler::ExceptionPoll(ManagedRegister mscratch, size_t stack_adjust) {
-  CHECK_ALIGNED(stack_adjust, kStackAlignment);
+  CHECK_ALIGNED(stack_adjust, kAapcsStackAlignment);
   vixl::aarch32::Register scratch = AsVIXLRegister(mscratch.AsArm());
   UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
   temps.Exclude(scratch);
diff --git a/compiler/utils/arm/jni_macro_assembler_arm_vixl.h b/compiler/utils/arm/jni_macro_assembler_arm_vixl.h
index 0b1b6d2..1724671 100644
--- a/compiler/utils/arm/jni_macro_assembler_arm_vixl.h
+++ b/compiler/utils/arm/jni_macro_assembler_arm_vixl.h
@@ -181,6 +181,9 @@
   void VerifyObject(ManagedRegister src, bool could_be_null) override;
   void VerifyObject(FrameOffset src, bool could_be_null) override;
 
+  // Jump to address held at [base+offset] (used for tail calls).
+  void Jump(ManagedRegister base, Offset offset, ManagedRegister scratch) override;
+
   // Call to address held at [base+offset].
   void Call(ManagedRegister base, Offset offset, ManagedRegister scratch) override;
   void Call(FrameOffset base, Offset offset, ManagedRegister scratch) override;
diff --git a/compiler/utils/arm64/jni_macro_assembler_arm64.cc b/compiler/utils/arm64/jni_macro_assembler_arm64.cc
index 0eab49f..5b46971 100644
--- a/compiler/utils/arm64/jni_macro_assembler_arm64.cc
+++ b/compiler/utils/arm64/jni_macro_assembler_arm64.cc
@@ -37,6 +37,10 @@
 #define reg_d(D) Arm64Assembler::reg_d(D)
 #define reg_s(S) Arm64Assembler::reg_s(S)
 
+// The AAPCS64 requires 16-byte alignement. This is the same as the Managed ABI stack alignment.
+static constexpr size_t kAapcs64StackAlignment = 16u;
+static_assert(kAapcs64StackAlignment == kStackAlignment);
+
 Arm64JNIMacroAssembler::~Arm64JNIMacroAssembler() {
 }
 
@@ -57,16 +61,20 @@
 
 // See Arm64 PCS Section 5.2.2.1.
 void Arm64JNIMacroAssembler::IncreaseFrameSize(size_t adjust) {
-  CHECK_ALIGNED(adjust, kStackAlignment);
-  AddConstant(SP, -adjust);
-  cfi().AdjustCFAOffset(adjust);
+  if (adjust != 0u) {
+    CHECK_ALIGNED(adjust, kStackAlignment);
+    AddConstant(SP, -adjust);
+    cfi().AdjustCFAOffset(adjust);
+  }
 }
 
 // See Arm64 PCS Section 5.2.2.1.
 void Arm64JNIMacroAssembler::DecreaseFrameSize(size_t adjust) {
-  CHECK_ALIGNED(adjust, kStackAlignment);
-  AddConstant(SP, adjust);
-  cfi().AdjustCFAOffset(-adjust);
+  if (adjust != 0u) {
+    CHECK_ALIGNED(adjust, kStackAlignment);
+    AddConstant(SP, adjust);
+    cfi().AdjustCFAOffset(-adjust);
+  }
 }
 
 void Arm64JNIMacroAssembler::AddConstant(XRegister rd, int32_t value, Condition cond) {
@@ -531,6 +539,15 @@
   // TODO: not validating references.
 }
 
+void Arm64JNIMacroAssembler::Jump(ManagedRegister m_base, Offset offs, ManagedRegister m_scratch) {
+  Arm64ManagedRegister base = m_base.AsArm64();
+  Arm64ManagedRegister scratch = m_scratch.AsArm64();
+  CHECK(base.IsXRegister()) << base;
+  CHECK(scratch.IsXRegister()) << scratch;
+  LoadFromOffset(scratch.AsXRegister(), base.AsXRegister(), offs.Int32Value());
+  ___ Br(reg_x(scratch.AsXRegister()));
+}
+
 void Arm64JNIMacroAssembler::Call(ManagedRegister m_base, Offset offs, ManagedRegister m_scratch) {
   Arm64ManagedRegister base = m_base.AsArm64();
   Arm64ManagedRegister scratch = m_scratch.AsArm64();
@@ -704,18 +721,20 @@
 
   // Increase frame to required size.
   DCHECK_ALIGNED(frame_size, kStackAlignment);
-  DCHECK_GE(frame_size, core_reg_size + fp_reg_size + static_cast<size_t>(kArm64PointerSize));
+  // Must at least have space for Method* if we're going to spill it.
+  DCHECK_GE(frame_size,
+            core_reg_size + fp_reg_size + (method_reg.IsRegister() ? kXRegSizeInBytes : 0u));
   IncreaseFrameSize(frame_size);
 
   // Save callee-saves.
   asm_.SpillRegisters(core_reg_list, frame_size - core_reg_size);
   asm_.SpillRegisters(fp_reg_list, frame_size - core_reg_size - fp_reg_size);
 
-  DCHECK(core_reg_list.IncludesAliasOf(reg_x(TR)));
-
-  // Write ArtMethod*
-  DCHECK(X0 == method_reg.AsArm64().AsXRegister());
-  StoreToOffset(X0, SP, 0);
+  if (method_reg.IsRegister()) {
+    // Write ArtMethod*
+    DCHECK(X0 == method_reg.AsArm64().AsXRegister());
+    StoreToOffset(X0, SP, 0);
+  }
 
   // Write out entry spills
   int32_t offset = frame_size + static_cast<size_t>(kArm64PointerSize);
@@ -760,10 +779,8 @@
 
   // For now we only check that the size of the frame is large enough to hold spills and method
   // reference.
-  DCHECK_GE(frame_size, core_reg_size + fp_reg_size + static_cast<size_t>(kArm64PointerSize));
-  DCHECK_ALIGNED(frame_size, kStackAlignment);
-
-  DCHECK(core_reg_list.IncludesAliasOf(reg_x(TR)));
+  DCHECK_GE(frame_size, core_reg_size + fp_reg_size);
+  DCHECK_ALIGNED(frame_size, kAapcs64StackAlignment);
 
   cfi().RememberState();
 
@@ -781,11 +798,8 @@
     } else {
       // The method shall not be suspended; no need to refresh the Marking Register.
 
-      // Check that the Marking Register is a callee-save register,
-      // and thus has been preserved by native code following the
-      // AAPCS64 calling convention.
-      DCHECK(core_reg_list.IncludesAliasOf(mr))
-          << "core_reg_list should contain Marking Register X" << mr.GetCode();
+      // The Marking Register is a callee-save register and thus has been
+      // preserved by native code following the AAPCS64 calling convention.
 
       // The following condition is a compile-time one, so it does not have a run-time cost.
       if (kIsDebugBuild) {
diff --git a/compiler/utils/arm64/jni_macro_assembler_arm64.h b/compiler/utils/arm64/jni_macro_assembler_arm64.h
index 45316ed..54592a3 100644
--- a/compiler/utils/arm64/jni_macro_assembler_arm64.h
+++ b/compiler/utils/arm64/jni_macro_assembler_arm64.h
@@ -162,6 +162,9 @@
   void VerifyObject(ManagedRegister src, bool could_be_null) override;
   void VerifyObject(FrameOffset src, bool could_be_null) override;
 
+  // Jump to address held at [base+offset] (used for tail calls).
+  void Jump(ManagedRegister base, Offset offset, ManagedRegister scratch) override;
+
   // Call to address held at [base+offset].
   void Call(ManagedRegister base, Offset offset, ManagedRegister scratch) override;
   void Call(FrameOffset base, Offset offset, ManagedRegister scratch) override;
diff --git a/compiler/utils/jni_macro_assembler.h b/compiler/utils/jni_macro_assembler.h
index e6130cf..bbe0f73 100644
--- a/compiler/utils/jni_macro_assembler.h
+++ b/compiler/utils/jni_macro_assembler.h
@@ -197,6 +197,9 @@
   virtual void VerifyObject(ManagedRegister src, bool could_be_null) = 0;
   virtual void VerifyObject(FrameOffset src, bool could_be_null) = 0;
 
+  // Jump to address held at [base+offset] (used for tail calls).
+  virtual void Jump(ManagedRegister base, Offset offset, ManagedRegister scratch) = 0;
+
   // Call to address held at [base+offset]
   virtual void Call(ManagedRegister base, Offset offset, ManagedRegister scratch) = 0;
   virtual void Call(FrameOffset base, Offset offset, ManagedRegister scratch) = 0;
diff --git a/compiler/utils/managed_register.h b/compiler/utils/managed_register.h
index db9c36c..fb41153 100644
--- a/compiler/utils/managed_register.h
+++ b/compiler/utils/managed_register.h
@@ -66,6 +66,10 @@
     return id_ == other.id_;
   }
 
+  constexpr bool IsRegister() const {
+    return id_ != kNoRegister;
+  }
+
   constexpr bool IsNoRegister() const {
     return id_ == kNoRegister;
   }
diff --git a/compiler/utils/mips/assembler_mips.cc b/compiler/utils/mips/assembler_mips.cc
index a9d1a25..6b73695 100644
--- a/compiler/utils/mips/assembler_mips.cc
+++ b/compiler/utils/mips/assembler_mips.cc
@@ -5191,6 +5191,17 @@
   // TODO: not validating references.
 }
 
+void MipsAssembler::Jump(ManagedRegister mbase, Offset offset, ManagedRegister mscratch) {
+  MipsManagedRegister base = mbase.AsMips();
+  MipsManagedRegister scratch = mscratch.AsMips();
+  CHECK(base.IsCoreRegister()) << base;
+  CHECK(scratch.IsCoreRegister()) << scratch;
+  LoadFromOffset(kLoadWord, scratch.AsCoreRegister(),
+                 base.AsCoreRegister(), offset.Int32Value());
+  Jr(scratch.AsCoreRegister());
+  NopIfNoReordering();
+}
+
 void MipsAssembler::Call(ManagedRegister mbase, Offset offset, ManagedRegister mscratch) {
   MipsManagedRegister base = mbase.AsMips();
   MipsManagedRegister scratch = mscratch.AsMips();
diff --git a/compiler/utils/mips/assembler_mips.h b/compiler/utils/mips/assembler_mips.h
index a24071d..3a4e0ce 100644
--- a/compiler/utils/mips/assembler_mips.h
+++ b/compiler/utils/mips/assembler_mips.h
@@ -1359,6 +1359,9 @@
   void VerifyObject(ManagedRegister src, bool could_be_null) override;
   void VerifyObject(FrameOffset src, bool could_be_null) override;
 
+  // Jump to address held at [base+offset] (used for tail calls).
+  void Jump(ManagedRegister base, Offset offset, ManagedRegister scratch) override;
+
   // Call to address held at [base+offset].
   void Call(ManagedRegister base, Offset offset, ManagedRegister mscratch) override;
   void Call(FrameOffset base, Offset offset, ManagedRegister mscratch) override;
diff --git a/compiler/utils/mips64/assembler_mips64.cc b/compiler/utils/mips64/assembler_mips64.cc
index 70313ca..07d3716 100644
--- a/compiler/utils/mips64/assembler_mips64.cc
+++ b/compiler/utils/mips64/assembler_mips64.cc
@@ -4027,6 +4027,17 @@
   // TODO: not validating references
 }
 
+void Mips64Assembler::Jump(ManagedRegister mbase, Offset offset, ManagedRegister mscratch) {
+  Mips64ManagedRegister base = mbase.AsMips64();
+  Mips64ManagedRegister scratch = mscratch.AsMips64();
+  CHECK(base.IsGpuRegister()) << base;
+  CHECK(scratch.IsGpuRegister()) << scratch;
+  LoadFromOffset(kLoadDoubleword, scratch.AsGpuRegister(),
+                 base.AsGpuRegister(), offset.Int32Value());
+  Jr(scratch.AsGpuRegister());
+  Nop();
+}
+
 void Mips64Assembler::Call(ManagedRegister mbase, Offset offset, ManagedRegister mscratch) {
   Mips64ManagedRegister base = mbase.AsMips64();
   Mips64ManagedRegister scratch = mscratch.AsMips64();
diff --git a/compiler/utils/mips64/assembler_mips64.h b/compiler/utils/mips64/assembler_mips64.h
index b331cee..03eae91 100644
--- a/compiler/utils/mips64/assembler_mips64.h
+++ b/compiler/utils/mips64/assembler_mips64.h
@@ -1424,6 +1424,9 @@
   void VerifyObject(ManagedRegister src, bool could_be_null) override;
   void VerifyObject(FrameOffset src, bool could_be_null) override;
 
+  // Jump to address held at [base+offset] (used for tail calls).
+  void Jump(ManagedRegister base, Offset offset, ManagedRegister scratch) override;
+
   // Call to address held at [base+offset].
   void Call(ManagedRegister base, Offset offset, ManagedRegister mscratch) override;
   void Call(FrameOffset base, Offset offset, ManagedRegister mscratch) override;
diff --git a/compiler/utils/x86/jni_macro_assembler_x86.cc b/compiler/utils/x86/jni_macro_assembler_x86.cc
index 540d72b..f4ea004 100644
--- a/compiler/utils/x86/jni_macro_assembler_x86.cc
+++ b/compiler/utils/x86/jni_macro_assembler_x86.cc
@@ -39,6 +39,9 @@
 
 constexpr size_t kFramePointerSize = 4;
 
+static constexpr size_t kNativeStackAlignment = 16;
+static_assert(kNativeStackAlignment == kStackAlignment);
+
 #define __ asm_.
 
 void X86JNIMacroAssembler::BuildFrame(size_t frame_size,
@@ -47,7 +50,15 @@
                                       const ManagedRegisterEntrySpills& entry_spills) {
   DCHECK_EQ(CodeSize(), 0U);  // Nothing emitted yet.
   cfi().SetCurrentCFAOffset(4);  // Return address on stack.
-  CHECK_ALIGNED(frame_size, kStackAlignment);
+  if (frame_size == kFramePointerSize) {
+    // For @CriticalNative tail call.
+    CHECK(method_reg.IsNoRegister());
+    CHECK(spill_regs.empty());
+  } else if (method_reg.IsNoRegister()) {
+    CHECK_ALIGNED(frame_size, kNativeStackAlignment);
+  } else {
+    CHECK_ALIGNED(frame_size, kStackAlignment);
+  }
   int gpr_count = 0;
   for (int i = spill_regs.size() - 1; i >= 0; --i) {
     Register spill = spill_regs[i].AsX86().AsCpuRegister();
@@ -59,12 +70,16 @@
 
   // return address then method on stack.
   int32_t adjust = frame_size - gpr_count * kFramePointerSize -
-      kFramePointerSize /*method*/ -
-      kFramePointerSize /*return address*/;
-  __ addl(ESP, Immediate(-adjust));
-  cfi().AdjustCFAOffset(adjust);
-  __ pushl(method_reg.AsX86().AsCpuRegister());
-  cfi().AdjustCFAOffset(kFramePointerSize);
+      kFramePointerSize /*return address*/ -
+      (method_reg.IsRegister() ? kFramePointerSize /*method*/ : 0u);
+  if (adjust != 0) {
+    __ addl(ESP, Immediate(-adjust));
+    cfi().AdjustCFAOffset(adjust);
+  }
+  if (method_reg.IsRegister()) {
+    __ pushl(method_reg.AsX86().AsCpuRegister());
+    cfi().AdjustCFAOffset(kFramePointerSize);
+  }
   DCHECK_EQ(static_cast<size_t>(cfi().GetCurrentCFAOffset()), frame_size);
 
   for (const ManagedRegisterSpill& spill : entry_spills) {
@@ -86,12 +101,14 @@
 void X86JNIMacroAssembler::RemoveFrame(size_t frame_size,
                                        ArrayRef<const ManagedRegister> spill_regs,
                                        bool may_suspend ATTRIBUTE_UNUSED) {
-  CHECK_ALIGNED(frame_size, kStackAlignment);
+  CHECK_ALIGNED(frame_size, kNativeStackAlignment);
   cfi().RememberState();
   // -kFramePointerSize for ArtMethod*.
   int adjust = frame_size - spill_regs.size() * kFramePointerSize - kFramePointerSize;
-  __ addl(ESP, Immediate(adjust));
-  cfi().AdjustCFAOffset(-adjust);
+  if (adjust != 0) {
+    __ addl(ESP, Immediate(adjust));
+    cfi().AdjustCFAOffset(-adjust);
+  }
   for (size_t i = 0; i < spill_regs.size(); ++i) {
     Register spill = spill_regs[i].AsX86().AsCpuRegister();
     __ popl(spill);
@@ -105,15 +122,19 @@
 }
 
 void X86JNIMacroAssembler::IncreaseFrameSize(size_t adjust) {
-  CHECK_ALIGNED(adjust, kStackAlignment);
-  __ addl(ESP, Immediate(-adjust));
-  cfi().AdjustCFAOffset(adjust);
+  if (adjust != 0u) {
+    CHECK_ALIGNED(adjust, kNativeStackAlignment);
+    __ addl(ESP, Immediate(-adjust));
+    cfi().AdjustCFAOffset(adjust);
+  }
 }
 
 static void DecreaseFrameSizeImpl(X86Assembler* assembler, size_t adjust) {
-  CHECK_ALIGNED(adjust, kStackAlignment);
-  assembler->addl(ESP, Immediate(adjust));
-  assembler->cfi().AdjustCFAOffset(-adjust);
+  if (adjust != 0u) {
+    CHECK_ALIGNED(adjust, kNativeStackAlignment);
+    assembler->addl(ESP, Immediate(adjust));
+    assembler->cfi().AdjustCFAOffset(-adjust);
+  }
 }
 
 void X86JNIMacroAssembler::DecreaseFrameSize(size_t adjust) {
@@ -301,7 +322,7 @@
       __ movl(dest.AsCpuRegister(), src.AsCpuRegister());
     } else if (src.IsX87Register() && dest.IsXmmRegister()) {
       // Pass via stack and pop X87 register
-      __ subl(ESP, Immediate(16));
+      IncreaseFrameSize(16);
       if (size == 4) {
         CHECK_EQ(src.AsX87Register(), ST0);
         __ fstps(Address(ESP, 0));
@@ -311,7 +332,7 @@
         __ fstpl(Address(ESP, 0));
         __ movsd(dest.AsXmmRegister(), Address(ESP, 0));
       }
-      __ addl(ESP, Immediate(16));
+      DecreaseFrameSize(16);
     } else {
       // TODO: x87, SSE
       UNIMPLEMENTED(FATAL) << ": Move " << dest << ", " << src;
@@ -487,6 +508,12 @@
   // TODO: not validating references
 }
 
+void X86JNIMacroAssembler::Jump(ManagedRegister mbase, Offset offset, ManagedRegister) {
+  X86ManagedRegister base = mbase.AsX86();
+  CHECK(base.IsCpuRegister());
+  __ jmp(Address(base.AsCpuRegister(), offset.Int32Value()));
+}
+
 void X86JNIMacroAssembler::Call(ManagedRegister mbase, Offset offset, ManagedRegister) {
   X86ManagedRegister base = mbase.AsX86();
   CHECK(base.IsCpuRegister());
diff --git a/compiler/utils/x86/jni_macro_assembler_x86.h b/compiler/utils/x86/jni_macro_assembler_x86.h
index a701080..7bf2f98 100644
--- a/compiler/utils/x86/jni_macro_assembler_x86.h
+++ b/compiler/utils/x86/jni_macro_assembler_x86.h
@@ -146,6 +146,9 @@
   void VerifyObject(ManagedRegister src, bool could_be_null) override;
   void VerifyObject(FrameOffset src, bool could_be_null) override;
 
+  // Jump to address held at [base+offset] (used for tail calls).
+  void Jump(ManagedRegister base, Offset offset, ManagedRegister scratch) override;
+
   // Call to address held at [base+offset]
   void Call(ManagedRegister base, Offset offset, ManagedRegister scratch) override;
   void Call(FrameOffset base, Offset offset, ManagedRegister scratch) override;
diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc
index 3921c4a..993cf95 100644
--- a/compiler/utils/x86_64/assembler_x86_64_test.cc
+++ b/compiler/utils/x86_64/assembler_x86_64_test.cc
@@ -2410,7 +2410,7 @@
 
   // Construct assembly text counterpart.
   std::ostringstream str;
-  str << "addq $0, %rsp\n";
+  // Increase by 0 is a NO-OP and ignored by the assembler.
   str << "addq $-" << kStackAlignment << ", %rsp\n";
   str << "addq $-" << 10 * kStackAlignment << ", %rsp\n";
 
@@ -2430,7 +2430,7 @@
 
   // Construct assembly text counterpart.
   std::ostringstream str;
-  str << "addq $0, %rsp\n";
+  // Decrease by 0 is a NO-OP and ignored by the assembler.
   str << "addq $" << kStackAlignment << ", %rsp\n";
   str << "addq $" << 10 * kStackAlignment << ", %rsp\n";
 
diff --git a/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc b/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc
index 5924a8b..ffe9020 100644
--- a/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc
+++ b/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc
@@ -33,6 +33,9 @@
 
 constexpr size_t kFramePointerSize = 8;
 
+static constexpr size_t kNativeStackAlignment = 16;
+static_assert(kNativeStackAlignment == kStackAlignment);
+
 #define __ asm_.
 
 void X86_64JNIMacroAssembler::BuildFrame(size_t frame_size,
@@ -41,8 +44,13 @@
                                          const ManagedRegisterEntrySpills& entry_spills) {
   DCHECK_EQ(CodeSize(), 0U);  // Nothing emitted yet.
   cfi().SetCurrentCFAOffset(8);  // Return address on stack.
-  CHECK_ALIGNED(frame_size, kStackAlignment);
-  int gpr_count = 0;
+  // Note: @CriticalNative tail call is not used (would have frame_size == kFramePointerSize).
+  if (method_reg.IsNoRegister()) {
+    CHECK_ALIGNED(frame_size, kNativeStackAlignment);
+  } else {
+    CHECK_ALIGNED(frame_size, kStackAlignment);
+  }
+  size_t gpr_count = 0u;
   for (int i = spill_regs.size() - 1; i >= 0; --i) {
     x86_64::X86_64ManagedRegister spill = spill_regs[i].AsX86_64();
     if (spill.IsCpuRegister()) {
@@ -56,8 +64,10 @@
   int64_t rest_of_frame = static_cast<int64_t>(frame_size)
                           - (gpr_count * kFramePointerSize)
                           - kFramePointerSize /*return address*/;
-  __ subq(CpuRegister(RSP), Immediate(rest_of_frame));
-  cfi().AdjustCFAOffset(rest_of_frame);
+  if (rest_of_frame != 0) {
+    __ subq(CpuRegister(RSP), Immediate(rest_of_frame));
+    cfi().AdjustCFAOffset(rest_of_frame);
+  }
 
   // spill xmms
   int64_t offset = rest_of_frame;
@@ -73,7 +83,9 @@
   static_assert(static_cast<size_t>(kX86_64PointerSize) == kFramePointerSize,
                 "Unexpected frame pointer size.");
 
-  __ movq(Address(CpuRegister(RSP), 0), method_reg.AsX86_64().AsCpuRegister());
+  if (method_reg.IsRegister()) {
+    __ movq(Address(CpuRegister(RSP), 0), method_reg.AsX86_64().AsCpuRegister());
+  }
 
   for (const ManagedRegisterSpill& spill : entry_spills) {
     if (spill.AsX86_64().IsCpuRegister()) {
@@ -101,26 +113,29 @@
 void X86_64JNIMacroAssembler::RemoveFrame(size_t frame_size,
                                           ArrayRef<const ManagedRegister> spill_regs,
                                           bool may_suspend ATTRIBUTE_UNUSED) {
-  CHECK_ALIGNED(frame_size, kStackAlignment);
+  CHECK_ALIGNED(frame_size, kNativeStackAlignment);
   cfi().RememberState();
   int gpr_count = 0;
   // unspill xmms
   int64_t offset = static_cast<int64_t>(frame_size)
       - (spill_regs.size() * kFramePointerSize)
-      - 2 * kFramePointerSize;
+      - kFramePointerSize;
   for (size_t i = 0; i < spill_regs.size(); ++i) {
     x86_64::X86_64ManagedRegister spill = spill_regs[i].AsX86_64();
     if (spill.IsXmmRegister()) {
-      offset += sizeof(double);
       __ movsd(spill.AsXmmRegister(), Address(CpuRegister(RSP), offset));
       cfi().Restore(DWARFReg(spill.AsXmmRegister().AsFloatRegister()));
+      offset += sizeof(double);
     } else {
       gpr_count++;
     }
   }
-  int adjust = static_cast<int>(frame_size) - (gpr_count * kFramePointerSize) - kFramePointerSize;
-  __ addq(CpuRegister(RSP), Immediate(adjust));
-  cfi().AdjustCFAOffset(-adjust);
+  DCHECK_EQ(static_cast<size_t>(offset),
+            frame_size - (gpr_count * kFramePointerSize) - kFramePointerSize);
+  if (offset != 0) {
+    __ addq(CpuRegister(RSP), Immediate(offset));
+    cfi().AdjustCFAOffset(-offset);
+  }
   for (size_t i = 0; i < spill_regs.size(); ++i) {
     x86_64::X86_64ManagedRegister spill = spill_regs[i].AsX86_64();
     if (spill.IsCpuRegister()) {
@@ -136,15 +151,19 @@
 }
 
 void X86_64JNIMacroAssembler::IncreaseFrameSize(size_t adjust) {
-  CHECK_ALIGNED(adjust, kStackAlignment);
-  __ addq(CpuRegister(RSP), Immediate(-static_cast<int64_t>(adjust)));
-  cfi().AdjustCFAOffset(adjust);
+  if (adjust != 0u) {
+    CHECK_ALIGNED(adjust, kNativeStackAlignment);
+    __ addq(CpuRegister(RSP), Immediate(-static_cast<int64_t>(adjust)));
+    cfi().AdjustCFAOffset(adjust);
+  }
 }
 
 static void DecreaseFrameSizeImpl(size_t adjust, X86_64Assembler* assembler) {
-  CHECK_ALIGNED(adjust, kStackAlignment);
-  assembler->addq(CpuRegister(RSP), Immediate(adjust));
-  assembler->cfi().AdjustCFAOffset(-adjust);
+  if (adjust != 0u) {
+    CHECK_ALIGNED(adjust, kNativeStackAlignment);
+    assembler->addq(CpuRegister(RSP), Immediate(adjust));
+    assembler->cfi().AdjustCFAOffset(-adjust);
+  }
 }
 
 void X86_64JNIMacroAssembler::DecreaseFrameSize(size_t adjust) {
@@ -544,6 +563,12 @@
   // TODO: not validating references
 }
 
+void X86_64JNIMacroAssembler::Jump(ManagedRegister mbase, Offset offset, ManagedRegister) {
+  X86_64ManagedRegister base = mbase.AsX86_64();
+  CHECK(base.IsCpuRegister());
+  __ jmp(Address(base.AsCpuRegister(), offset.Int32Value()));
+}
+
 void X86_64JNIMacroAssembler::Call(ManagedRegister mbase, Offset offset, ManagedRegister) {
   X86_64ManagedRegister base = mbase.AsX86_64();
   CHECK(base.IsCpuRegister());
diff --git a/compiler/utils/x86_64/jni_macro_assembler_x86_64.h b/compiler/utils/x86_64/jni_macro_assembler_x86_64.h
index 4c2fd8f..d3f1fce 100644
--- a/compiler/utils/x86_64/jni_macro_assembler_x86_64.h
+++ b/compiler/utils/x86_64/jni_macro_assembler_x86_64.h
@@ -172,6 +172,9 @@
   void VerifyObject(ManagedRegister src, bool could_be_null) override;
   void VerifyObject(FrameOffset src, bool could_be_null) override;
 
+  // Jump to address held at [base+offset] (used for tail calls).
+  void Jump(ManagedRegister base, Offset offset, ManagedRegister scratch) override;
+
   // Call to address held at [base+offset]
   void Call(ManagedRegister base, Offset offset, ManagedRegister scratch) override;
   void Call(FrameOffset base, Offset offset, ManagedRegister scratch) override;