jni: Do not create a managed frame for @CriticalNative.
Omit managed frame for @CriticalNative methods, do not check
for exceptions and and make a tail call when possible.
Pass the method pointer in a hidden argument to prepare for
implementing late binding for @CriticalNative methods.
This changes only the JNI compiler, Generic JNI shall be
updated in a separate change.
Performance improvements reported by Golem (art-opt-cc):
x86 x86-64 arm arm64
NativeDowncallStaticCritical6 +17% +50% +88% +139%
NativeDowncallStaticCritical +37% +32% +103% +216%
Test: m test-art-host-gtest
Test: testrunner.py --host --optimizing
Test: aosp_taimen-userdebug boots.
Test: run-gtests.sh
Test: testrunner.py --target --optimizing
Bug: 112189621
Change-Id: I5758c8f478627f2eee8f615b4537a907c211b9f8
diff --git a/compiler/jni/quick/arm/calling_convention_arm.cc b/compiler/jni/quick/arm/calling_convention_arm.cc
index 42a4603..2072302 100644
--- a/compiler/jni/quick/arm/calling_convention_arm.cc
+++ b/compiler/jni/quick/arm/calling_convention_arm.cc
@@ -74,6 +74,7 @@
ArmManagedRegister::FromCoreRegister(R8),
ArmManagedRegister::FromCoreRegister(R10),
ArmManagedRegister::FromCoreRegister(R11),
+ ArmManagedRegister::FromCoreRegister(LR),
// Hard float registers.
ArmManagedRegister::FromSRegister(S16),
ArmManagedRegister::FromSRegister(S17),
@@ -93,37 +94,79 @@
ArmManagedRegister::FromSRegister(S31)
};
-static constexpr uint32_t CalculateCoreCalleeSpillMask() {
+template <size_t size>
+static constexpr uint32_t CalculateCoreCalleeSpillMask(
+ const ManagedRegister (&callee_saves)[size]) {
// LR is a special callee save which is not reported by CalleeSaveRegisters().
- uint32_t result = 1 << LR;
- for (auto&& r : kCalleeSaveRegisters) {
+ uint32_t result = 0u;
+ for (auto&& r : callee_saves) {
if (r.AsArm().IsCoreRegister()) {
- result |= (1 << r.AsArm().AsCoreRegister());
+ result |= (1u << r.AsArm().AsCoreRegister());
}
}
return result;
}
-static constexpr uint32_t CalculateFpCalleeSpillMask() {
- uint32_t result = 0;
- for (auto&& r : kCalleeSaveRegisters) {
+template <size_t size>
+static constexpr uint32_t CalculateFpCalleeSpillMask(const ManagedRegister (&callee_saves)[size]) {
+ uint32_t result = 0u;
+ for (auto&& r : callee_saves) {
if (r.AsArm().IsSRegister()) {
- result |= (1 << r.AsArm().AsSRegister());
+ result |= (1u << r.AsArm().AsSRegister());
}
}
return result;
}
-static constexpr uint32_t kCoreCalleeSpillMask = CalculateCoreCalleeSpillMask();
-static constexpr uint32_t kFpCalleeSpillMask = CalculateFpCalleeSpillMask();
+static constexpr uint32_t kCoreCalleeSpillMask = CalculateCoreCalleeSpillMask(kCalleeSaveRegisters);
+static constexpr uint32_t kFpCalleeSpillMask = CalculateFpCalleeSpillMask(kCalleeSaveRegisters);
+
+// The AAPCS requires 8-byte alignement. This is not as strict as the Managed ABI stack alignment.
+static constexpr size_t kAapcsStackAlignment = 8u;
+static_assert(kAapcsStackAlignment < kStackAlignment);
+
+static constexpr ManagedRegister kAapcsCalleeSaveRegisters[] = {
+ // Core registers.
+ ArmManagedRegister::FromCoreRegister(R4),
+ ArmManagedRegister::FromCoreRegister(R5),
+ ArmManagedRegister::FromCoreRegister(R6),
+ ArmManagedRegister::FromCoreRegister(R7),
+ ArmManagedRegister::FromCoreRegister(R8),
+ ArmManagedRegister::FromCoreRegister(R9), // The platform register is callee-save on Android.
+ ArmManagedRegister::FromCoreRegister(R10),
+ ArmManagedRegister::FromCoreRegister(R11),
+ ArmManagedRegister::FromCoreRegister(LR),
+ // Hard float registers.
+ ArmManagedRegister::FromSRegister(S16),
+ ArmManagedRegister::FromSRegister(S17),
+ ArmManagedRegister::FromSRegister(S18),
+ ArmManagedRegister::FromSRegister(S19),
+ ArmManagedRegister::FromSRegister(S20),
+ ArmManagedRegister::FromSRegister(S21),
+ ArmManagedRegister::FromSRegister(S22),
+ ArmManagedRegister::FromSRegister(S23),
+ ArmManagedRegister::FromSRegister(S24),
+ ArmManagedRegister::FromSRegister(S25),
+ ArmManagedRegister::FromSRegister(S26),
+ ArmManagedRegister::FromSRegister(S27),
+ ArmManagedRegister::FromSRegister(S28),
+ ArmManagedRegister::FromSRegister(S29),
+ ArmManagedRegister::FromSRegister(S30),
+ ArmManagedRegister::FromSRegister(S31)
+};
+
+static constexpr uint32_t kAapcsCoreCalleeSpillMask =
+ CalculateCoreCalleeSpillMask(kAapcsCalleeSaveRegisters);
+static constexpr uint32_t kAapcsFpCalleeSpillMask =
+ CalculateFpCalleeSpillMask(kAapcsCalleeSaveRegisters);
// Calling convention
-ManagedRegister ArmManagedRuntimeCallingConvention::InterproceduralScratchRegister() {
+ManagedRegister ArmManagedRuntimeCallingConvention::InterproceduralScratchRegister() const {
return ArmManagedRegister::FromCoreRegister(IP); // R12
}
-ManagedRegister ArmJniCallingConvention::InterproceduralScratchRegister() {
+ManagedRegister ArmJniCallingConvention::InterproceduralScratchRegister() const {
return ArmManagedRegister::FromCoreRegister(IP); // R12
}
@@ -179,11 +222,9 @@
FrameOffset ArmManagedRuntimeCallingConvention::CurrentParamStackOffset() {
CHECK(IsCurrentParamOnStack());
- FrameOffset result =
- FrameOffset(displacement_.Int32Value() + // displacement
- kFramePointerSize + // Method*
- (itr_slots_ * kFramePointerSize)); // offset into in args
- return result;
+ return FrameOffset(displacement_.Int32Value() + // displacement
+ kFramePointerSize + // Method*
+ (itr_slots_ * kFramePointerSize)); // offset into in args
}
const ManagedRegisterEntrySpills& ArmManagedRuntimeCallingConvention::EntrySpills() {
@@ -252,6 +293,7 @@
}
return entry_spills_;
}
+
// JNI calling convention
ArmJniCallingConvention::ArmJniCallingConvention(bool is_static,
@@ -321,7 +363,7 @@
}
}
- if (cur_reg < kJniArgumentRegisterCount) {
+ if (cur_reg <= kJniArgumentRegisterCount) {
// As a special case when, as a result of shifting (or not) there are no arguments on the stack,
// we actually have 0 stack padding.
//
@@ -347,53 +389,84 @@
uint32_t ArmJniCallingConvention::CoreSpillMask() const {
// Compute spill mask to agree with callee saves initialized in the constructor
- return kCoreCalleeSpillMask;
+ return is_critical_native_ ? 0u : kCoreCalleeSpillMask;
}
uint32_t ArmJniCallingConvention::FpSpillMask() const {
- return kFpCalleeSpillMask;
+ return is_critical_native_ ? 0u : kFpCalleeSpillMask;
}
ManagedRegister ArmJniCallingConvention::ReturnScratchRegister() const {
return ArmManagedRegister::FromCoreRegister(R2);
}
-size_t ArmJniCallingConvention::FrameSize() {
- // Method*, LR and callee save area size, local reference segment state
+size_t ArmJniCallingConvention::FrameSize() const {
+ if (UNLIKELY(is_critical_native_)) {
+ CHECK(!SpillsMethod());
+ CHECK(!HasLocalReferenceSegmentState());
+ CHECK(!HasHandleScope());
+ CHECK(!SpillsReturnValue());
+ return 0u; // There is no managed frame for @CriticalNative.
+ }
+
+ // Method*, callee save area size, local reference segment state
+ CHECK(SpillsMethod());
const size_t method_ptr_size = static_cast<size_t>(kArmPointerSize);
- const size_t lr_return_addr_size = kFramePointerSize;
const size_t callee_save_area_size = CalleeSaveRegisters().size() * kFramePointerSize;
- size_t frame_data_size = method_ptr_size + lr_return_addr_size + callee_save_area_size;
+ size_t total_size = method_ptr_size + callee_save_area_size;
- if (LIKELY(HasLocalReferenceSegmentState())) {
- // local reference segment state
- frame_data_size += kFramePointerSize;
- // TODO: Probably better to use sizeof(IRTSegmentState) here...
- }
+ CHECK(HasLocalReferenceSegmentState());
+ // local reference segment state
+ total_size += kFramePointerSize;
+ // TODO: Probably better to use sizeof(IRTSegmentState) here...
- // References plus link_ (pointer) and number_of_references_ (uint32_t) for HandleScope header
- const size_t handle_scope_size = HandleScope::SizeOf(kArmPointerSize, ReferenceCount());
-
- size_t total_size = frame_data_size;
- if (LIKELY(HasHandleScope())) {
- // HandleScope is sometimes excluded.
- total_size += handle_scope_size; // handle scope size
- }
+ CHECK(HasHandleScope());
+ total_size += HandleScope::SizeOf(kArmPointerSize, ReferenceCount());
// Plus return value spill area size
+ CHECK(SpillsReturnValue());
total_size += SizeOfReturnValue();
return RoundUp(total_size, kStackAlignment);
}
-size_t ArmJniCallingConvention::OutArgSize() {
- // TODO: Identical to x86_64 except for also adding additional padding.
- return RoundUp(NumberOfOutgoingStackArgs() * kFramePointerSize + padding_,
- kStackAlignment);
+size_t ArmJniCallingConvention::OutArgSize() const {
+ // Count param args, including JNIEnv* and jclass*; count 8-byte args twice.
+ size_t all_args = NumberOfExtraArgumentsForJni() + NumArgs() + NumLongOrDoubleArgs();
+ // Account for arguments passed through r0-r3. (No FP args, AAPCS32 is soft-float.)
+ size_t stack_args = all_args - std::min(kJniArgumentRegisterCount, all_args);
+ // The size of outgoing arguments.
+ size_t size = stack_args * kFramePointerSize + padding_;
+
+ // @CriticalNative can use tail call as all managed callee saves are preserved by AAPCS.
+ static_assert((kCoreCalleeSpillMask & ~kAapcsCoreCalleeSpillMask) == 0u);
+ static_assert((kFpCalleeSpillMask & ~kAapcsFpCalleeSpillMask) == 0u);
+
+ // For @CriticalNative, we can make a tail call if there are no stack args and the
+ // return type is not an FP type (otherwise we need to move the result to FP register).
+ DCHECK(!RequiresSmallResultTypeExtension());
+ if (is_critical_native_ && (size != 0u || GetShorty()[0] == 'F' || GetShorty()[0] == 'D')) {
+ size += kFramePointerSize; // We need to spill LR with the args.
+ }
+ return RoundUp(size, kAapcsStackAlignment);
}
ArrayRef<const ManagedRegister> ArmJniCallingConvention::CalleeSaveRegisters() const {
- return ArrayRef<const ManagedRegister>(kCalleeSaveRegisters);
+ if (UNLIKELY(IsCriticalNative())) {
+ if (UseTailCall()) {
+ return ArrayRef<const ManagedRegister>(); // Do not spill anything.
+ } else {
+ // Spill LR with out args.
+ static_assert((kCoreCalleeSpillMask >> LR) == 1u); // Contains LR as the highest bit.
+ constexpr size_t lr_index = POPCOUNT(kCoreCalleeSpillMask) - 1u;
+ static_assert(kCalleeSaveRegisters[lr_index].Equals(
+ ArmManagedRegister::FromCoreRegister(LR)));
+ return ArrayRef<const ManagedRegister>(kCalleeSaveRegisters).SubArray(
+ /*pos*/ lr_index, /*length=*/ 1u);
+ }
+ } else {
+ return ArrayRef<const ManagedRegister>(kCalleeSaveRegisters);
+ }
}
// JniCallingConvention ABI follows AAPCS where longs and doubles must occur
@@ -451,18 +524,27 @@
return FrameOffset(offset);
}
-size_t ArmJniCallingConvention::NumberOfOutgoingStackArgs() {
- size_t static_args = HasSelfClass() ? 1 : 0; // count jclass
- // regular argument parameters and this
- size_t param_args = NumArgs() + NumLongOrDoubleArgs(); // twice count 8-byte args
- // XX: Why is the long/ordouble counted twice but not JNIEnv* ???
- // count JNIEnv* less arguments in registers
- size_t internal_args = (HasJniEnv() ? 1 : 0 /* jni env */);
- size_t total_args = static_args + param_args + internal_args;
+ManagedRegister ArmJniCallingConvention::HiddenArgumentRegister() const {
+ CHECK(IsCriticalNative());
+ // R4 is neither managed callee-save, nor argument register, nor scratch register.
+ // (It is native callee-save but the value coming from managed code can be clobbered.)
+ // TODO: Change to static_assert; std::none_of should be constexpr since C++20.
+ DCHECK(std::none_of(kCalleeSaveRegisters,
+ kCalleeSaveRegisters + std::size(kCalleeSaveRegisters),
+ [](ManagedRegister callee_save) constexpr {
+ return callee_save.Equals(ArmManagedRegister::FromCoreRegister(R4));
+ }));
+ DCHECK(std::none_of(kJniArgumentRegisters,
+ kJniArgumentRegisters + std::size(kJniArgumentRegisters),
+ [](Register reg) { return reg == R4; }));
+ DCHECK(!InterproceduralScratchRegister().Equals(ArmManagedRegister::FromCoreRegister(R4)));
+ return ArmManagedRegister::FromCoreRegister(R4);
+}
- return total_args - std::min(kJniArgumentRegisterCount, static_cast<size_t>(total_args));
-
- // TODO: Very similar to x86_64 except for the return pc.
+// Whether to use tail call (used only for @CriticalNative).
+bool ArmJniCallingConvention::UseTailCall() const {
+ CHECK(IsCriticalNative());
+ return OutArgSize() == 0u;
}
} // namespace arm
diff --git a/compiler/jni/quick/arm/calling_convention_arm.h b/compiler/jni/quick/arm/calling_convention_arm.h
index b327898..04ad00b 100644
--- a/compiler/jni/quick/arm/calling_convention_arm.h
+++ b/compiler/jni/quick/arm/calling_convention_arm.h
@@ -35,7 +35,7 @@
~ArmManagedRuntimeCallingConvention() override {}
// Calling convention
ManagedRegister ReturnRegister() override;
- ManagedRegister InterproceduralScratchRegister() override;
+ ManagedRegister InterproceduralScratchRegister() const override;
// Managed runtime calling convention
ManagedRegister MethodRegister() override;
bool IsCurrentParamInRegister() override;
@@ -60,11 +60,11 @@
// Calling convention
ManagedRegister ReturnRegister() override;
ManagedRegister IntReturnRegister() override;
- ManagedRegister InterproceduralScratchRegister() override;
+ ManagedRegister InterproceduralScratchRegister() const override;
// JNI calling convention
void Next() override; // Override default behavior for AAPCS
- size_t FrameSize() override;
- size_t OutArgSize() override;
+ size_t FrameSize() const override;
+ size_t OutArgSize() const override;
ArrayRef<const ManagedRegister> CalleeSaveRegisters() const override;
ManagedRegister ReturnScratchRegister() const override;
uint32_t CoreSpillMask() const override;
@@ -79,8 +79,11 @@
return false;
}
- protected:
- size_t NumberOfOutgoingStackArgs() override;
+ // Hidden argument register, used to pass the method pointer for @CriticalNative call.
+ ManagedRegister HiddenArgumentRegister() const override;
+
+ // Whether to use tail call (used only for @CriticalNative).
+ bool UseTailCall() const override;
private:
// Padding to ensure longs and doubles are not split in AAPCS
diff --git a/compiler/jni/quick/arm64/calling_convention_arm64.cc b/compiler/jni/quick/arm64/calling_convention_arm64.cc
index 4a6a754..44218ef 100644
--- a/compiler/jni/quick/arm64/calling_convention_arm64.cc
+++ b/compiler/jni/quick/arm64/calling_convention_arm64.cc
@@ -59,6 +59,8 @@
// Jni function is the native function which the java code wants to call.
// Jni method is the method that is compiled by jni compiler.
// Call chain: managed code(java) --> jni method --> jni function.
+ // This does not apply to the @CriticalNative.
+
// Thread register(X19) is saved on stack.
Arm64ManagedRegister::FromXRegister(X19),
Arm64ManagedRegister::FromXRegister(X20),
@@ -86,58 +88,73 @@
Arm64ManagedRegister::FromDRegister(D15),
};
-static constexpr uint32_t CalculateCoreCalleeSpillMask() {
+template <size_t size>
+static constexpr uint32_t CalculateCoreCalleeSpillMask(
+ const ManagedRegister (&callee_saves)[size]) {
uint32_t result = 0u;
- for (auto&& r : kCalleeSaveRegisters) {
+ for (auto&& r : callee_saves) {
if (r.AsArm64().IsXRegister()) {
- result |= (1 << r.AsArm64().AsXRegister());
+ result |= (1u << r.AsArm64().AsXRegister());
}
}
return result;
}
-static constexpr uint32_t CalculateFpCalleeSpillMask() {
- uint32_t result = 0;
- for (auto&& r : kCalleeSaveRegisters) {
+template <size_t size>
+static constexpr uint32_t CalculateFpCalleeSpillMask(const ManagedRegister (&callee_saves)[size]) {
+ uint32_t result = 0u;
+ for (auto&& r : callee_saves) {
if (r.AsArm64().IsDRegister()) {
- result |= (1 << r.AsArm64().AsDRegister());
+ result |= (1u << r.AsArm64().AsDRegister());
}
}
return result;
}
-static constexpr uint32_t kCoreCalleeSpillMask = CalculateCoreCalleeSpillMask();
-static constexpr uint32_t kFpCalleeSpillMask = CalculateFpCalleeSpillMask();
+static constexpr uint32_t kCoreCalleeSpillMask = CalculateCoreCalleeSpillMask(kCalleeSaveRegisters);
+static constexpr uint32_t kFpCalleeSpillMask = CalculateFpCalleeSpillMask(kCalleeSaveRegisters);
+
+// The AAPCS64 requires 16-byte alignement. This is the same as the Managed ABI stack alignment.
+static constexpr size_t kAapcs64StackAlignment = 16u;
+static_assert(kAapcs64StackAlignment == kStackAlignment);
+
+static constexpr ManagedRegister kAapcs64CalleeSaveRegisters[] = {
+ // Core registers.
+ Arm64ManagedRegister::FromXRegister(X19),
+ Arm64ManagedRegister::FromXRegister(X20),
+ Arm64ManagedRegister::FromXRegister(X21),
+ Arm64ManagedRegister::FromXRegister(X22),
+ Arm64ManagedRegister::FromXRegister(X23),
+ Arm64ManagedRegister::FromXRegister(X24),
+ Arm64ManagedRegister::FromXRegister(X25),
+ Arm64ManagedRegister::FromXRegister(X26),
+ Arm64ManagedRegister::FromXRegister(X27),
+ Arm64ManagedRegister::FromXRegister(X28),
+ Arm64ManagedRegister::FromXRegister(X29),
+ Arm64ManagedRegister::FromXRegister(LR),
+ // Hard float registers.
+ Arm64ManagedRegister::FromDRegister(D8),
+ Arm64ManagedRegister::FromDRegister(D9),
+ Arm64ManagedRegister::FromDRegister(D10),
+ Arm64ManagedRegister::FromDRegister(D11),
+ Arm64ManagedRegister::FromDRegister(D12),
+ Arm64ManagedRegister::FromDRegister(D13),
+ Arm64ManagedRegister::FromDRegister(D14),
+ Arm64ManagedRegister::FromDRegister(D15),
+};
+
+static constexpr uint32_t kAapcs64CoreCalleeSpillMask =
+ CalculateCoreCalleeSpillMask(kAapcs64CalleeSaveRegisters);
+static constexpr uint32_t kAapcs64FpCalleeSpillMask =
+ CalculateFpCalleeSpillMask(kAapcs64CalleeSaveRegisters);
// Calling convention
-ManagedRegister Arm64ManagedRuntimeCallingConvention::InterproceduralScratchRegister() {
- // X20 is safe to use as a scratch register:
- // - with Baker read barriers (in the case of a non-critical native
- // method), it is reserved as Marking Register, and thus does not
- // actually need to be saved/restored; it is refreshed on exit
- // (see Arm64JNIMacroAssembler::RemoveFrame);
- // - in other cases, it is saved on entry (in
- // Arm64JNIMacroAssembler::BuildFrame) and restored on exit (in
- // Arm64JNIMacroAssembler::RemoveFrame). This is also expected in
- // the case of a critical native method in the Baker read barrier
- // configuration, where the value of MR must be preserved across
- // the JNI call (as there is no MR refresh in that case).
- return Arm64ManagedRegister::FromXRegister(X20);
+ManagedRegister Arm64ManagedRuntimeCallingConvention::InterproceduralScratchRegister() const {
+ return Arm64ManagedRegister::FromXRegister(IP0); // X16
}
-ManagedRegister Arm64JniCallingConvention::InterproceduralScratchRegister() {
- // X20 is safe to use as a scratch register:
- // - with Baker read barriers (in the case of a non-critical native
- // method), it is reserved as Marking Register, and thus does not
- // actually need to be saved/restored; it is refreshed on exit
- // (see Arm64JNIMacroAssembler::RemoveFrame);
- // - in other cases, it is saved on entry (in
- // Arm64JNIMacroAssembler::BuildFrame) and restored on exit (in
- // Arm64JNIMacroAssembler::RemoveFrame). This is also expected in
- // the case of a critical native method in the Baker read barrier
- // configuration, where the value of MR must be preserved across
- // the JNI call (as there is no MR refresh in that case).
- return Arm64ManagedRegister::FromXRegister(X20);
+ManagedRegister Arm64JniCallingConvention::InterproceduralScratchRegister() const {
+ return Arm64ManagedRegister::FromXRegister(IP0); // X16
}
static ManagedRegister ReturnRegisterForShorty(const char* shorty) {
@@ -187,11 +204,9 @@
FrameOffset Arm64ManagedRuntimeCallingConvention::CurrentParamStackOffset() {
CHECK(IsCurrentParamOnStack());
- FrameOffset result =
- FrameOffset(displacement_.Int32Value() + // displacement
- kFramePointerSize + // Method ref
- (itr_slots_ * sizeof(uint32_t))); // offset into in args
- return result;
+ return FrameOffset(displacement_.Int32Value() + // displacement
+ kFramePointerSize + // Method ref
+ (itr_slots_ * sizeof(uint32_t))); // offset into in args
}
const ManagedRegisterEntrySpills& Arm64ManagedRuntimeCallingConvention::EntrySpills() {
@@ -243,6 +258,7 @@
}
// JNI calling convention
+
Arm64JniCallingConvention::Arm64JniCallingConvention(bool is_static,
bool is_synchronized,
bool is_critical_native,
@@ -255,52 +271,88 @@
}
uint32_t Arm64JniCallingConvention::CoreSpillMask() const {
- return kCoreCalleeSpillMask;
+ return is_critical_native_ ? 0u : kCoreCalleeSpillMask;
}
uint32_t Arm64JniCallingConvention::FpSpillMask() const {
- return kFpCalleeSpillMask;
+ return is_critical_native_ ? 0u : kFpCalleeSpillMask;
}
ManagedRegister Arm64JniCallingConvention::ReturnScratchRegister() const {
return ManagedRegister::NoRegister();
}
-size_t Arm64JniCallingConvention::FrameSize() {
+size_t Arm64JniCallingConvention::FrameSize() const {
+ if (is_critical_native_) {
+ CHECK(!SpillsMethod());
+ CHECK(!HasLocalReferenceSegmentState());
+ CHECK(!HasHandleScope());
+ CHECK(!SpillsReturnValue());
+ return 0u; // There is no managed frame for @CriticalNative.
+ }
+
// Method*, callee save area size, local reference segment state
- //
- // (Unlike x86_64, do not include return address, and the segment state is uint32
- // instead of pointer).
+ CHECK(SpillsMethod());
size_t method_ptr_size = static_cast<size_t>(kFramePointerSize);
size_t callee_save_area_size = CalleeSaveRegisters().size() * kFramePointerSize;
+ size_t total_size = method_ptr_size + callee_save_area_size;
- size_t frame_data_size = method_ptr_size + callee_save_area_size;
- if (LIKELY(HasLocalReferenceSegmentState())) {
- frame_data_size += sizeof(uint32_t);
- }
- // References plus 2 words for HandleScope header
- size_t handle_scope_size = HandleScope::SizeOf(kArm64PointerSize, ReferenceCount());
+ CHECK(HasLocalReferenceSegmentState());
+ total_size += sizeof(uint32_t);
- size_t total_size = frame_data_size;
- if (LIKELY(HasHandleScope())) {
- // HandleScope is sometimes excluded.
- total_size += handle_scope_size; // handle scope size
- }
+ CHECK(HasHandleScope());
+ total_size += HandleScope::SizeOf(kArm64PointerSize, ReferenceCount());
// Plus return value spill area size
+ CHECK(SpillsReturnValue());
total_size += SizeOfReturnValue();
return RoundUp(total_size, kStackAlignment);
}
-size_t Arm64JniCallingConvention::OutArgSize() {
- // Same as X86_64
- return RoundUp(NumberOfOutgoingStackArgs() * kFramePointerSize, kStackAlignment);
+size_t Arm64JniCallingConvention::OutArgSize() const {
+ // Count param args, including JNIEnv* and jclass*.
+ size_t all_args = NumberOfExtraArgumentsForJni() + NumArgs();
+ size_t num_fp_args = NumFloatOrDoubleArgs();
+ DCHECK_GE(all_args, num_fp_args);
+ size_t num_non_fp_args = all_args - num_fp_args;
+ // Account for FP arguments passed through v0-v7.
+ size_t num_stack_fp_args =
+ num_fp_args - std::min(kMaxFloatOrDoubleRegisterArguments, num_fp_args);
+ // Account for other (integer and pointer) arguments passed through GPR (x0-x7).
+ size_t num_stack_non_fp_args =
+ num_non_fp_args - std::min(kMaxIntLikeRegisterArguments, num_non_fp_args);
+ // The size of outgoing arguments.
+ size_t size = (num_stack_fp_args + num_stack_non_fp_args) * kFramePointerSize;
+
+ // @CriticalNative can use tail call as all managed callee saves are preserved by AAPCS64.
+ static_assert((kCoreCalleeSpillMask & ~kAapcs64CoreCalleeSpillMask) == 0u);
+ static_assert((kFpCalleeSpillMask & ~kAapcs64FpCalleeSpillMask) == 0u);
+
+ // For @CriticalNative, we can make a tail call if there are no stack args and
+ // we do not need to extend the result. Otherwise, add space for return PC.
+ if (is_critical_native_ && (size != 0u || RequiresSmallResultTypeExtension())) {
+ size += kFramePointerSize; // We need to spill LR with the args.
+ }
+ return RoundUp(size, kStackAlignment);
}
ArrayRef<const ManagedRegister> Arm64JniCallingConvention::CalleeSaveRegisters() const {
- // Same as X86_64
- return ArrayRef<const ManagedRegister>(kCalleeSaveRegisters);
+ if (UNLIKELY(IsCriticalNative())) {
+ if (UseTailCall()) {
+ return ArrayRef<const ManagedRegister>(); // Do not spill anything.
+ } else {
+ // Spill LR with out args.
+ static_assert((kCoreCalleeSpillMask >> LR) == 1u); // Contains LR as the highest bit.
+ constexpr size_t lr_index = POPCOUNT(kCoreCalleeSpillMask) - 1u;
+ static_assert(kCalleeSaveRegisters[lr_index].Equals(
+ Arm64ManagedRegister::FromXRegister(LR)));
+ return ArrayRef<const ManagedRegister>(kCalleeSaveRegisters).SubArray(
+ /*pos*/ lr_index, /*length=*/ 1u);
+ }
+ } else {
+ return ArrayRef<const ManagedRegister>(kCalleeSaveRegisters);
+ }
}
bool Arm64JniCallingConvention::IsCurrentParamInRegister() {
@@ -347,25 +399,28 @@
size_t offset = displacement_.Int32Value() - OutArgSize() + (args_on_stack * kFramePointerSize);
CHECK_LT(offset, OutArgSize());
return FrameOffset(offset);
- // TODO: Seems identical to X86_64 code.
}
-size_t Arm64JniCallingConvention::NumberOfOutgoingStackArgs() {
- // all arguments including JNI args
- size_t all_args = NumArgs() + NumberOfExtraArgumentsForJni();
+ManagedRegister Arm64JniCallingConvention::HiddenArgumentRegister() const {
+ CHECK(IsCriticalNative());
+ // X15 is neither managed callee-save, nor argument register, nor scratch register.
+ // TODO: Change to static_assert; std::none_of should be constexpr since C++20.
+ DCHECK(std::none_of(kCalleeSaveRegisters,
+ kCalleeSaveRegisters + std::size(kCalleeSaveRegisters),
+ [](ManagedRegister callee_save) constexpr {
+ return callee_save.Equals(Arm64ManagedRegister::FromXRegister(X15));
+ }));
+ DCHECK(std::none_of(kXArgumentRegisters,
+ kXArgumentRegisters + std::size(kXArgumentRegisters),
+ [](XRegister reg) { return reg == X15; }));
+ DCHECK(!InterproceduralScratchRegister().Equals(Arm64ManagedRegister::FromXRegister(X15)));
+ return Arm64ManagedRegister::FromXRegister(X15);
+}
- DCHECK_GE(all_args, NumFloatOrDoubleArgs());
-
- size_t all_stack_args =
- all_args
- - std::min(kMaxFloatOrDoubleRegisterArguments,
- static_cast<size_t>(NumFloatOrDoubleArgs()))
- - std::min(kMaxIntLikeRegisterArguments,
- static_cast<size_t>((all_args - NumFloatOrDoubleArgs())));
-
- // TODO: Seems similar to X86_64 code except it doesn't count return pc.
-
- return all_stack_args;
+// Whether to use tail call (used only for @CriticalNative).
+bool Arm64JniCallingConvention::UseTailCall() const {
+ CHECK(IsCriticalNative());
+ return OutArgSize() == 0u;
}
} // namespace arm64
diff --git a/compiler/jni/quick/arm64/calling_convention_arm64.h b/compiler/jni/quick/arm64/calling_convention_arm64.h
index ed0ddeb..f4148c7 100644
--- a/compiler/jni/quick/arm64/calling_convention_arm64.h
+++ b/compiler/jni/quick/arm64/calling_convention_arm64.h
@@ -35,7 +35,7 @@
~Arm64ManagedRuntimeCallingConvention() override {}
// Calling convention
ManagedRegister ReturnRegister() override;
- ManagedRegister InterproceduralScratchRegister() override;
+ ManagedRegister InterproceduralScratchRegister() const override;
// Managed runtime calling convention
ManagedRegister MethodRegister() override;
bool IsCurrentParamInRegister() override;
@@ -60,10 +60,10 @@
// Calling convention
ManagedRegister ReturnRegister() override;
ManagedRegister IntReturnRegister() override;
- ManagedRegister InterproceduralScratchRegister() override;
+ ManagedRegister InterproceduralScratchRegister() const override;
// JNI calling convention
- size_t FrameSize() override;
- size_t OutArgSize() override;
+ size_t FrameSize() const override;
+ size_t OutArgSize() const override;
ArrayRef<const ManagedRegister> CalleeSaveRegisters() const override;
ManagedRegister ReturnScratchRegister() const override;
uint32_t CoreSpillMask() const override;
@@ -75,11 +75,14 @@
// aarch64 calling convention leaves upper bits undefined.
bool RequiresSmallResultTypeExtension() const override {
- return true;
+ return HasSmallReturnType();
}
- protected:
- size_t NumberOfOutgoingStackArgs() override;
+ // Hidden argument register, used to pass the method pointer for @CriticalNative call.
+ ManagedRegister HiddenArgumentRegister() const override;
+
+ // Whether to use tail call (used only for @CriticalNative).
+ bool UseTailCall() const override;
private:
DISALLOW_COPY_AND_ASSIGN(Arm64JniCallingConvention);
diff --git a/compiler/jni/quick/calling_convention.cc b/compiler/jni/quick/calling_convention.cc
index f031b9b..15af248 100644
--- a/compiler/jni/quick/calling_convention.cc
+++ b/compiler/jni/quick/calling_convention.cc
@@ -347,21 +347,6 @@
}
}
-bool JniCallingConvention::HasHandleScope() const {
- // Exclude HandleScope for @CriticalNative methods for optimization speed.
- return is_critical_native_ == false;
-}
-
-bool JniCallingConvention::HasLocalReferenceSegmentState() const {
- // Exclude local reference segment states for @CriticalNative methods for optimization speed.
- return is_critical_native_ == false;
-}
-
-bool JniCallingConvention::HasJniEnv() const {
- // Exclude "JNIEnv*" parameter for @CriticalNative methods.
- return HasExtraArgumentsForJni();
-}
-
bool JniCallingConvention::HasSelfClass() const {
if (!IsStatic()) {
// Virtual functions: There is never an implicit jclass parameter.
@@ -372,11 +357,6 @@
}
}
-bool JniCallingConvention::HasExtraArgumentsForJni() const {
- // @CriticalNative jni implementations exclude both JNIEnv* and the jclass/jobject parameters.
- return is_critical_native_ == false;
-}
-
unsigned int JniCallingConvention::GetIteratorPositionWithinShorty() const {
// We need to subtract out the extra JNI arguments if we want to use this iterator position
// with the inherited CallingConvention member functions, which rely on scanning the shorty.
diff --git a/compiler/jni/quick/calling_convention.h b/compiler/jni/quick/calling_convention.h
index 77a5d59..3d4cefe 100644
--- a/compiler/jni/quick/calling_convention.h
+++ b/compiler/jni/quick/calling_convention.h
@@ -49,12 +49,7 @@
// Register that holds result of this method invocation.
virtual ManagedRegister ReturnRegister() = 0;
// Register reserved for scratch usage during procedure calls.
- virtual ManagedRegister InterproceduralScratchRegister() = 0;
-
- // Offset of Method within the frame.
- FrameOffset MethodStackOffset() {
- return displacement_;
- }
+ virtual ManagedRegister InterproceduralScratchRegister() const = 0;
// Iterator interface
@@ -70,6 +65,14 @@
itr_float_and_doubles_ = 0;
}
+ FrameOffset GetDisplacement() const {
+ return displacement_;
+ }
+
+ PointerSize GetFramePointerSize() const {
+ return frame_pointer_size_;
+ }
+
virtual ~CallingConvention() {}
protected:
@@ -239,6 +242,11 @@
const char* shorty,
InstructionSet instruction_set);
+ // Offset of Method within the managed frame.
+ FrameOffset MethodStackOffset() {
+ return FrameOffset(0u);
+ }
+
// Register that holds the incoming method argument
virtual ManagedRegister MethodRegister() = 0;
@@ -296,10 +304,10 @@
// Size of frame excluding space for outgoing args (its assumed Method* is
// always at the bottom of a frame, but this doesn't work for outgoing
// native args). Includes alignment.
- virtual size_t FrameSize() = 0;
+ virtual size_t FrameSize() const = 0;
// Size of outgoing arguments (stack portion), including alignment.
// -- Arguments that are passed via registers are excluded from this size.
- virtual size_t OutArgSize() = 0;
+ virtual size_t OutArgSize() const = 0;
// Number of references in stack indirect reference table
size_t ReferenceCount() const;
// Location where the segment state of the local indirect reference table is saved
@@ -365,6 +373,32 @@
virtual ~JniCallingConvention() {}
+ bool IsCriticalNative() const {
+ return is_critical_native_;
+ }
+
+ // Does the transition have a method pointer in the stack frame?
+ bool SpillsMethod() const {
+ // Exclude method pointer for @CriticalNative methods for optimization speed.
+ return !IsCriticalNative();
+ }
+
+ // Hidden argument register, used to pass the method pointer for @CriticalNative call.
+ virtual ManagedRegister HiddenArgumentRegister() const = 0;
+
+ // Whether to use tail call (used only for @CriticalNative).
+ virtual bool UseTailCall() const = 0;
+
+ // Whether the return type is small. Used for RequiresSmallResultTypeExtension()
+ // on architectures that require the sign/zero extension.
+ bool HasSmallReturnType() const {
+ Primitive::Type return_type = GetReturnType();
+ return return_type == Primitive::kPrimByte ||
+ return_type == Primitive::kPrimShort ||
+ return_type == Primitive::kPrimBoolean ||
+ return_type == Primitive::kPrimChar;
+ }
+
protected:
// Named iterator positions
enum IteratorPos {
@@ -380,24 +414,41 @@
: CallingConvention(is_static, is_synchronized, shorty, frame_pointer_size),
is_critical_native_(is_critical_native) {}
- // Number of stack slots for outgoing arguments, above which the handle scope is
- // located
- virtual size_t NumberOfOutgoingStackArgs() = 0;
-
protected:
size_t NumberOfExtraArgumentsForJni() const;
// Does the transition have a StackHandleScope?
- bool HasHandleScope() const;
+ bool HasHandleScope() const {
+ // Exclude HandleScope for @CriticalNative methods for optimization speed.
+ return !IsCriticalNative();
+ }
+
// Does the transition have a local reference segment state?
- bool HasLocalReferenceSegmentState() const;
- // Has a JNIEnv* parameter implicitly?
- bool HasJniEnv() const;
- // Has a 'jclass' parameter implicitly?
- bool HasSelfClass() const;
+ bool HasLocalReferenceSegmentState() const {
+ // Exclude local reference segment states for @CriticalNative methods for optimization speed.
+ return !IsCriticalNative();
+ }
+
+ // Does the transition back spill the return value in the stack frame?
+ bool SpillsReturnValue() const {
+ // Exclude return value for @CriticalNative methods for optimization speed.
+ return !IsCriticalNative();
+ }
// Are there extra JNI arguments (JNIEnv* and maybe jclass)?
- bool HasExtraArgumentsForJni() const;
+ bool HasExtraArgumentsForJni() const {
+ // @CriticalNative jni implementations exclude both JNIEnv* and the jclass/jobject parameters.
+ return !IsCriticalNative();
+ }
+
+ // Has a JNIEnv* parameter implicitly?
+ bool HasJniEnv() const {
+ // Exclude "JNIEnv*" parameter for @CriticalNative methods.
+ return HasExtraArgumentsForJni();
+ }
+
+ // Has a 'jclass' parameter implicitly?
+ bool HasSelfClass() const;
// Returns the position of itr_args_, fixed up by removing the offset of extra JNI arguments.
unsigned int GetIteratorPositionWithinShorty() const;
diff --git a/compiler/jni/quick/jni_compiler.cc b/compiler/jni/quick/jni_compiler.cc
index 7054078..363e646 100644
--- a/compiler/jni/quick/jni_compiler.cc
+++ b/compiler/jni/quick/jni_compiler.cc
@@ -54,8 +54,7 @@
template <PointerSize kPointerSize>
static void CopyParameter(JNIMacroAssembler<kPointerSize>* jni_asm,
ManagedRuntimeCallingConvention* mr_conv,
- JniCallingConvention* jni_conv,
- size_t frame_size, size_t out_arg_size);
+ JniCallingConvention* jni_conv);
template <PointerSize kPointerSize>
static void SetNativeParameter(JNIMacroAssembler<kPointerSize>* jni_asm,
JniCallingConvention* jni_conv,
@@ -131,7 +130,7 @@
const bool is_fast_native = (access_flags & kAccFastNative) != 0u;
// i.e. if the method was annotated with @CriticalNative
- bool is_critical_native = (access_flags & kAccCriticalNative) != 0u;
+ const bool is_critical_native = (access_flags & kAccCriticalNative) != 0u;
VLOG(jni) << "JniCompile: Method :: "
<< dex_file.PrettyMethod(method_idx, /* with signature */ true)
@@ -220,17 +219,22 @@
jni_asm->SetEmitRunTimeChecksInDebugMode(compiler_options.EmitRunTimeChecksInDebugMode());
// 1. Build the frame saving all callee saves, Method*, and PC return address.
- const size_t frame_size(main_jni_conv->FrameSize()); // Excludes outgoing args.
+ // For @CriticalNative, this includes space for out args, otherwise just the managed frame.
+ const size_t managed_frame_size = main_jni_conv->FrameSize();
+ const size_t main_out_arg_size = main_jni_conv->OutArgSize();
+ size_t current_frame_size = is_critical_native ? main_out_arg_size : managed_frame_size;
+ ManagedRegister method_register =
+ is_critical_native ? ManagedRegister::NoRegister() : mr_conv->MethodRegister();
ArrayRef<const ManagedRegister> callee_save_regs = main_jni_conv->CalleeSaveRegisters();
- __ BuildFrame(frame_size, mr_conv->MethodRegister(), callee_save_regs, mr_conv->EntrySpills());
- DCHECK_EQ(jni_asm->cfi().GetCurrentCFAOffset(), static_cast<int>(frame_size));
+ __ BuildFrame(current_frame_size, method_register, callee_save_regs, mr_conv->EntrySpills());
+ DCHECK_EQ(jni_asm->cfi().GetCurrentCFAOffset(), static_cast<int>(current_frame_size));
if (LIKELY(!is_critical_native)) {
// NOTE: @CriticalNative methods don't have a HandleScope
// because they can't have any reference parameters or return values.
// 2. Set up the HandleScope
- mr_conv->ResetIterator(FrameOffset(frame_size));
+ mr_conv->ResetIterator(FrameOffset(current_frame_size));
main_jni_conv->ResetIterator(FrameOffset(0));
__ StoreImmediateToFrame(main_jni_conv->HandleScopeNumRefsOffset(),
main_jni_conv->ReferenceCount(),
@@ -249,7 +253,7 @@
if (is_static) {
FrameOffset handle_scope_offset = main_jni_conv->CurrentParamHandleScopeEntryOffset();
// Check handle scope offset is within frame
- CHECK_LT(handle_scope_offset.Uint32Value(), frame_size);
+ CHECK_LT(handle_scope_offset.Uint32Value(), current_frame_size);
// Note this LoadRef() doesn't need heap unpoisoning since it's from the ArtMethod.
// Note this LoadRef() does not include read barrier. It will be handled below.
//
@@ -272,7 +276,7 @@
// must be null.
FrameOffset handle_scope_offset = main_jni_conv->CurrentParamHandleScopeEntryOffset();
// Check handle scope offset is within frame and doesn't run into the saved segment state.
- CHECK_LT(handle_scope_offset.Uint32Value(), frame_size);
+ CHECK_LT(handle_scope_offset.Uint32Value(), current_frame_size);
CHECK_NE(handle_scope_offset.Uint32Value(),
main_jni_conv->SavedLocalReferenceCookieOffset().Uint32Value());
bool input_in_reg = mr_conv->IsCurrentParamInRegister();
@@ -304,9 +308,17 @@
} // if (!is_critical_native)
// 5. Move frame down to allow space for out going args.
- const size_t main_out_arg_size = main_jni_conv->OutArgSize();
size_t current_out_arg_size = main_out_arg_size;
- __ IncreaseFrameSize(main_out_arg_size);
+ if (UNLIKELY(is_critical_native)) {
+ DCHECK_EQ(main_out_arg_size, current_frame_size);
+ // Move the method pointer to the hidden argument register.
+ __ Move(main_jni_conv->HiddenArgumentRegister(),
+ mr_conv->MethodRegister(),
+ static_cast<size_t>(main_jni_conv->GetFramePointerSize()));
+ } else {
+ __ IncreaseFrameSize(main_out_arg_size);
+ current_frame_size += main_out_arg_size;
+ }
// Call the read barrier for the declaring class loaded from the method for a static call.
// Skip this for @CriticalNative because we didn't build a HandleScope to begin with.
@@ -376,6 +388,8 @@
// abuse the JNI calling convention here, that is guaranteed to support passing 2 pointer
// arguments.
FrameOffset locked_object_handle_scope_offset(0xBEEFDEAD);
+ FrameOffset saved_cookie_offset(
+ FrameOffset(0xDEADBEEFu)); // @CriticalNative - use obviously bad value for debugging
if (LIKELY(!is_critical_native)) {
// Skip this for @CriticalNative methods. They do not call JniMethodStart.
ThreadOffset<kPointerSize> jni_start(
@@ -414,12 +428,8 @@
if (is_synchronized) { // Check for exceptions from monitor enter.
__ ExceptionPoll(main_jni_conv->InterproceduralScratchRegister(), main_out_arg_size);
}
- }
- // Store into stack_frame[saved_cookie_offset] the return value of JniMethodStart.
- FrameOffset saved_cookie_offset(
- FrameOffset(0xDEADBEEFu)); // @CriticalNative - use obviously bad value for debugging
- if (LIKELY(!is_critical_native)) {
+ // Store into stack_frame[saved_cookie_offset] the return value of JniMethodStart.
saved_cookie_offset = main_jni_conv->SavedLocalReferenceCookieOffset();
__ Store(saved_cookie_offset, main_jni_conv->IntReturnRegister(), 4 /* sizeof cookie */);
}
@@ -430,7 +440,7 @@
// null (which must be encoded as null).
// Note: we do this prior to materializing the JNIEnv* and static's jclass to
// give as many free registers for the shuffle as possible.
- mr_conv->ResetIterator(FrameOffset(frame_size + main_out_arg_size));
+ mr_conv->ResetIterator(FrameOffset(current_frame_size));
uint32_t args_count = 0;
while (mr_conv->HasNext()) {
args_count++;
@@ -440,8 +450,12 @@
// Do a backward pass over arguments, so that the generated code will be "mov
// R2, R3; mov R1, R2" instead of "mov R1, R2; mov R2, R3."
// TODO: A reverse iterator to improve readability.
+ // TODO: This is currently useless as all archs spill args when building the frame.
+ // To avoid the full spilling, we would have to do one pass before the BuildFrame()
+ // to determine which arg registers are clobbered before they are needed.
+ // TODO: For @CriticalNative, do a forward pass because there are no JNIEnv* and jclass* args.
for (uint32_t i = 0; i < args_count; ++i) {
- mr_conv->ResetIterator(FrameOffset(frame_size + main_out_arg_size));
+ mr_conv->ResetIterator(FrameOffset(current_frame_size));
main_jni_conv->ResetIterator(FrameOffset(main_out_arg_size));
// Skip the extra JNI parameters for now.
@@ -456,11 +470,11 @@
mr_conv->Next();
main_jni_conv->Next();
}
- CopyParameter(jni_asm.get(), mr_conv.get(), main_jni_conv.get(), frame_size, main_out_arg_size);
+ CopyParameter(jni_asm.get(), mr_conv.get(), main_jni_conv.get());
}
if (is_static && !is_critical_native) {
// Create argument for Class
- mr_conv->ResetIterator(FrameOffset(frame_size + main_out_arg_size));
+ mr_conv->ResetIterator(FrameOffset(current_frame_size));
main_jni_conv->ResetIterator(FrameOffset(main_out_arg_size));
main_jni_conv->Next(); // Skip JNIEnv*
FrameOffset handle_scope_offset = main_jni_conv->CurrentParamHandleScopeEntryOffset();
@@ -496,20 +510,33 @@
// 9. Plant call to native code associated with method.
MemberOffset jni_entrypoint_offset =
ArtMethod::EntryPointFromJniOffset(InstructionSetPointerSize(instruction_set));
- // FIXME: Not sure if MethodStackOffset will work here. What does it even do?
- __ Call(main_jni_conv->MethodStackOffset(),
- jni_entrypoint_offset,
- // XX: Why not the jni conv scratch register?
- mr_conv->InterproceduralScratchRegister());
+ if (UNLIKELY(is_critical_native)) {
+ if (main_jni_conv->UseTailCall()) {
+ __ Jump(main_jni_conv->HiddenArgumentRegister(),
+ jni_entrypoint_offset,
+ main_jni_conv->InterproceduralScratchRegister());
+ } else {
+ __ Call(main_jni_conv->HiddenArgumentRegister(),
+ jni_entrypoint_offset,
+ main_jni_conv->InterproceduralScratchRegister());
+ }
+ } else {
+ __ Call(FrameOffset(main_out_arg_size + mr_conv->MethodStackOffset().SizeValue()),
+ jni_entrypoint_offset,
+ main_jni_conv->InterproceduralScratchRegister());
+ }
// 10. Fix differences in result widths.
if (main_jni_conv->RequiresSmallResultTypeExtension()) {
+ DCHECK(main_jni_conv->HasSmallReturnType());
+ CHECK(!is_critical_native || !main_jni_conv->UseTailCall());
if (main_jni_conv->GetReturnType() == Primitive::kPrimByte ||
main_jni_conv->GetReturnType() == Primitive::kPrimShort) {
__ SignExtend(main_jni_conv->ReturnRegister(),
Primitive::ComponentSize(main_jni_conv->GetReturnType()));
- } else if (main_jni_conv->GetReturnType() == Primitive::kPrimBoolean ||
- main_jni_conv->GetReturnType() == Primitive::kPrimChar) {
+ } else {
+ CHECK(main_jni_conv->GetReturnType() == Primitive::kPrimBoolean ||
+ main_jni_conv->GetReturnType() == Primitive::kPrimChar);
__ ZeroExtend(main_jni_conv->ReturnRegister(),
Primitive::ComponentSize(main_jni_conv->GetReturnType()));
}
@@ -531,7 +558,7 @@
// TODO: refactor this into the JniCallingConvention code
// as a return value alignment requirement.
}
- CHECK_LT(return_save_location.Uint32Value(), frame_size + main_out_arg_size);
+ CHECK_LT(return_save_location.Uint32Value(), current_frame_size);
__ Store(return_save_location,
main_jni_conv->ReturnRegister(),
main_jni_conv->SizeOfReturnValue());
@@ -545,6 +572,7 @@
// If they differ, only then do we have to do anything about it.
// Otherwise the return value is already in the right place when we return.
if (!jni_return_reg.Equals(mr_return_reg)) {
+ CHECK(!main_jni_conv->UseTailCall());
// This is typically only necessary on ARM32 due to native being softfloat
// while managed is hardfloat.
// -- For example VMOV {r0, r1} -> D0; VMOV r0 -> S0.
@@ -557,23 +585,21 @@
}
}
- // Increase frame size for out args if needed by the end_jni_conv.
- const size_t end_out_arg_size = end_jni_conv->OutArgSize();
- if (end_out_arg_size > current_out_arg_size) {
- size_t out_arg_size_diff = end_out_arg_size - current_out_arg_size;
- current_out_arg_size = end_out_arg_size;
- // TODO: This is redundant for @CriticalNative but we need to
- // conditionally do __DecreaseFrameSize below.
- __ IncreaseFrameSize(out_arg_size_diff);
- saved_cookie_offset = FrameOffset(saved_cookie_offset.SizeValue() + out_arg_size_diff);
- locked_object_handle_scope_offset =
- FrameOffset(locked_object_handle_scope_offset.SizeValue() + out_arg_size_diff);
- return_save_location = FrameOffset(return_save_location.SizeValue() + out_arg_size_diff);
- }
- // thread.
- end_jni_conv->ResetIterator(FrameOffset(end_out_arg_size));
-
if (LIKELY(!is_critical_native)) {
+ // Increase frame size for out args if needed by the end_jni_conv.
+ const size_t end_out_arg_size = end_jni_conv->OutArgSize();
+ if (end_out_arg_size > current_out_arg_size) {
+ size_t out_arg_size_diff = end_out_arg_size - current_out_arg_size;
+ current_out_arg_size = end_out_arg_size;
+ __ IncreaseFrameSize(out_arg_size_diff);
+ current_frame_size += out_arg_size_diff;
+ saved_cookie_offset = FrameOffset(saved_cookie_offset.SizeValue() + out_arg_size_diff);
+ locked_object_handle_scope_offset =
+ FrameOffset(locked_object_handle_scope_offset.SizeValue() + out_arg_size_diff);
+ return_save_location = FrameOffset(return_save_location.SizeValue() + out_arg_size_diff);
+ }
+ end_jni_conv->ResetIterator(FrameOffset(end_out_arg_size));
+
// 12. Call JniMethodEnd
ThreadOffset<kPointerSize> jni_end(
GetJniEntrypointThreadOffset<kPointerSize>(JniEntrypoint::kEnd,
@@ -629,19 +655,28 @@
} // if (!is_critical_native)
// 14. Move frame up now we're done with the out arg space.
- __ DecreaseFrameSize(current_out_arg_size);
+ // @CriticalNative remove out args together with the frame in RemoveFrame().
+ if (LIKELY(!is_critical_native)) {
+ __ DecreaseFrameSize(current_out_arg_size);
+ current_frame_size -= current_out_arg_size;
+ }
// 15. Process pending exceptions from JNI call or monitor exit.
- __ ExceptionPoll(main_jni_conv->InterproceduralScratchRegister(), 0 /* stack_adjust= */);
+ // @CriticalNative methods do not need exception poll in the stub.
+ if (LIKELY(!is_critical_native)) {
+ __ ExceptionPoll(main_jni_conv->InterproceduralScratchRegister(), 0 /* stack_adjust= */);
+ }
// 16. Remove activation - need to restore callee save registers since the GC may have changed
// them.
- DCHECK_EQ(jni_asm->cfi().GetCurrentCFAOffset(), static_cast<int>(frame_size));
- // We expect the compiled method to possibly be suspended during its
- // execution, except in the case of a CriticalNative method.
- bool may_suspend = !is_critical_native;
- __ RemoveFrame(frame_size, callee_save_regs, may_suspend);
- DCHECK_EQ(jni_asm->cfi().GetCurrentCFAOffset(), static_cast<int>(frame_size));
+ DCHECK_EQ(jni_asm->cfi().GetCurrentCFAOffset(), static_cast<int>(current_frame_size));
+ if (LIKELY(!is_critical_native) || !main_jni_conv->UseTailCall()) {
+ // We expect the compiled method to possibly be suspended during its
+ // execution, except in the case of a CriticalNative method.
+ bool may_suspend = !is_critical_native;
+ __ RemoveFrame(current_frame_size, callee_save_regs, may_suspend);
+ DCHECK_EQ(jni_asm->cfi().GetCurrentCFAOffset(), static_cast<int>(current_frame_size));
+ }
// 17. Finalize code generation
__ FinalizeCode();
@@ -652,7 +687,7 @@
return JniCompiledMethod(instruction_set,
std::move(managed_code),
- frame_size,
+ managed_frame_size,
main_jni_conv->CoreSpillMask(),
main_jni_conv->FpSpillMask(),
ArrayRef<const uint8_t>(*jni_asm->cfi().data()));
@@ -662,9 +697,7 @@
template <PointerSize kPointerSize>
static void CopyParameter(JNIMacroAssembler<kPointerSize>* jni_asm,
ManagedRuntimeCallingConvention* mr_conv,
- JniCallingConvention* jni_conv,
- size_t frame_size,
- size_t out_arg_size) {
+ JniCallingConvention* jni_conv) {
bool input_in_reg = mr_conv->IsCurrentParamInRegister();
bool output_in_reg = jni_conv->IsCurrentParamInRegister();
FrameOffset handle_scope_offset(0);
@@ -686,7 +719,7 @@
// as with regular references).
handle_scope_offset = jni_conv->CurrentParamHandleScopeEntryOffset();
// Check handle scope offset is within frame.
- CHECK_LT(handle_scope_offset.Uint32Value(), (frame_size + out_arg_size));
+ CHECK_LT(handle_scope_offset.Uint32Value(), mr_conv->GetDisplacement().Uint32Value());
}
if (input_in_reg && output_in_reg) {
ManagedRegister in_reg = mr_conv->CurrentParamRegister();
@@ -716,7 +749,7 @@
FrameOffset in_off = mr_conv->CurrentParamStackOffset();
ManagedRegister out_reg = jni_conv->CurrentParamRegister();
// Check that incoming stack arguments are above the current stack frame.
- CHECK_GT(in_off.Uint32Value(), frame_size);
+ CHECK_GT(in_off.Uint32Value(), mr_conv->GetDisplacement().Uint32Value());
if (ref_param) {
__ CreateHandleScopeEntry(out_reg, handle_scope_offset, ManagedRegister::NoRegister(), null_allowed);
} else {
@@ -728,8 +761,8 @@
CHECK(input_in_reg && !output_in_reg);
ManagedRegister in_reg = mr_conv->CurrentParamRegister();
FrameOffset out_off = jni_conv->CurrentParamStackOffset();
- // Check outgoing argument is within frame
- CHECK_LT(out_off.Uint32Value(), frame_size);
+ // Check outgoing argument is within frame part dedicated to out args.
+ CHECK_LT(out_off.Uint32Value(), jni_conv->GetDisplacement().Uint32Value());
if (ref_param) {
// TODO: recycle value in in_reg rather than reload from handle scope
__ CreateHandleScopeEntry(out_off, handle_scope_offset, mr_conv->InterproceduralScratchRegister(),
diff --git a/compiler/jni/quick/mips/calling_convention_mips.cc b/compiler/jni/quick/mips/calling_convention_mips.cc
index c69854d..cbb692e 100644
--- a/compiler/jni/quick/mips/calling_convention_mips.cc
+++ b/compiler/jni/quick/mips/calling_convention_mips.cc
@@ -75,11 +75,11 @@
static constexpr uint32_t kFpCalleeSpillMask = 0u;
// Calling convention
-ManagedRegister MipsManagedRuntimeCallingConvention::InterproceduralScratchRegister() {
+ManagedRegister MipsManagedRuntimeCallingConvention::InterproceduralScratchRegister() const {
return MipsManagedRegister::FromCoreRegister(T9);
}
-ManagedRegister MipsJniCallingConvention::InterproceduralScratchRegister() {
+ManagedRegister MipsJniCallingConvention::InterproceduralScratchRegister() const {
return MipsManagedRegister::FromCoreRegister(T9);
}
@@ -334,7 +334,7 @@
return MipsManagedRegister::FromCoreRegister(AT);
}
-size_t MipsJniCallingConvention::FrameSize() {
+size_t MipsJniCallingConvention::FrameSize() const {
// ArtMethod*, RA and callee save area size, local reference segment state.
const size_t method_ptr_size = static_cast<size_t>(kMipsPointerSize);
const size_t ra_return_addr_size = kFramePointerSize;
@@ -362,7 +362,7 @@
return RoundUp(total_size, kStackAlignment);
}
-size_t MipsJniCallingConvention::OutArgSize() {
+size_t MipsJniCallingConvention::OutArgSize() const {
// Argument Passing (3-17):
// "Despite the fact that some or all of the arguments to a function are passed in registers,
// always allocate space on the stack for all arguments. This stack space should be a structure
@@ -371,8 +371,19 @@
// for arguments are called the home locations."
//
// Allocate 16 bytes for home locations + space needed for stack arguments.
+
+ size_t static_args = HasSelfClass() ? 1 : 0; // Count jclass.
+ // Regular argument parameters and this.
+ size_t param_args = NumArgs() + NumLongOrDoubleArgs(); // Twice count 8-byte args.
+ // Count JNIEnv* less arguments in registers.
+ size_t internal_args = (HasJniEnv() ? 1 : 0);
+ size_t total_args = static_args + param_args + internal_args;
+
+ size_t stack_args =
+ total_args - std::min(kMaxIntLikeRegisterArguments, static_cast<size_t>(total_args));
+
return RoundUp(
- (kMaxIntLikeRegisterArguments + NumberOfOutgoingStackArgs()) * kFramePointerSize + padding_,
+ (kMaxIntLikeRegisterArguments + stack_args) * kFramePointerSize + padding_,
kStackAlignment);
}
@@ -446,15 +457,14 @@
return FrameOffset(offset);
}
-size_t MipsJniCallingConvention::NumberOfOutgoingStackArgs() {
- size_t static_args = HasSelfClass() ? 1 : 0; // Count jclass.
- // Regular argument parameters and this.
- size_t param_args = NumArgs() + NumLongOrDoubleArgs(); // Twice count 8-byte args.
- // Count JNIEnv* less arguments in registers.
- size_t internal_args = (HasJniEnv() ? 1 : 0);
- size_t total_args = static_args + param_args + internal_args;
+ManagedRegister MipsJniCallingConvention::HiddenArgumentRegister() const {
+ UNIMPLEMENTED(FATAL);
+ UNREACHABLE();
+}
- return total_args - std::min(kMaxIntLikeRegisterArguments, static_cast<size_t>(total_args));
+bool MipsJniCallingConvention::UseTailCall() const {
+ UNIMPLEMENTED(FATAL);
+ UNREACHABLE();
}
} // namespace mips
diff --git a/compiler/jni/quick/mips/calling_convention_mips.h b/compiler/jni/quick/mips/calling_convention_mips.h
index 8b395a0..af27dc8 100644
--- a/compiler/jni/quick/mips/calling_convention_mips.h
+++ b/compiler/jni/quick/mips/calling_convention_mips.h
@@ -37,7 +37,7 @@
~MipsManagedRuntimeCallingConvention() override {}
// Calling convention
ManagedRegister ReturnRegister() override;
- ManagedRegister InterproceduralScratchRegister() override;
+ ManagedRegister InterproceduralScratchRegister() const override;
// Managed runtime calling convention
ManagedRegister MethodRegister() override;
bool IsCurrentParamInRegister() override;
@@ -62,11 +62,11 @@
// Calling convention
ManagedRegister ReturnRegister() override;
ManagedRegister IntReturnRegister() override;
- ManagedRegister InterproceduralScratchRegister() override;
+ ManagedRegister InterproceduralScratchRegister() const override;
// JNI calling convention
void Next() override; // Override default behavior for o32.
- size_t FrameSize() override;
- size_t OutArgSize() override;
+ size_t FrameSize() const override;
+ size_t OutArgSize() const override;
ArrayRef<const ManagedRegister> CalleeSaveRegisters() const override;
ManagedRegister ReturnScratchRegister() const override;
uint32_t CoreSpillMask() const override;
@@ -81,8 +81,11 @@
return false;
}
- protected:
- size_t NumberOfOutgoingStackArgs() override;
+ // Hidden argument register, used to pass the method pointer for @CriticalNative call.
+ ManagedRegister HiddenArgumentRegister() const override;
+
+ // Whether to use tail call (used only for @CriticalNative).
+ bool UseTailCall() const override;
private:
// Padding to ensure longs and doubles are not split in o32.
diff --git a/compiler/jni/quick/mips64/calling_convention_mips64.cc b/compiler/jni/quick/mips64/calling_convention_mips64.cc
index 2c297b3..e65ad83 100644
--- a/compiler/jni/quick/mips64/calling_convention_mips64.cc
+++ b/compiler/jni/quick/mips64/calling_convention_mips64.cc
@@ -64,11 +64,11 @@
static constexpr uint32_t kFpCalleeSpillMask = 0u;
// Calling convention
-ManagedRegister Mips64ManagedRuntimeCallingConvention::InterproceduralScratchRegister() {
+ManagedRegister Mips64ManagedRuntimeCallingConvention::InterproceduralScratchRegister() const {
return Mips64ManagedRegister::FromGpuRegister(T9);
}
-ManagedRegister Mips64JniCallingConvention::InterproceduralScratchRegister() {
+ManagedRegister Mips64JniCallingConvention::InterproceduralScratchRegister() const {
return Mips64ManagedRegister::FromGpuRegister(T9);
}
@@ -178,7 +178,7 @@
return Mips64ManagedRegister::FromGpuRegister(AT);
}
-size_t Mips64JniCallingConvention::FrameSize() {
+size_t Mips64JniCallingConvention::FrameSize() const {
// ArtMethod*, RA and callee save area size, local reference segment state.
size_t method_ptr_size = static_cast<size_t>(kFramePointerSize);
size_t ra_and_callee_save_area_size = (CalleeSaveRegisters().size() + 1) * kFramePointerSize;
@@ -203,8 +203,14 @@
return RoundUp(total_size, kStackAlignment);
}
-size_t Mips64JniCallingConvention::OutArgSize() {
- return RoundUp(NumberOfOutgoingStackArgs() * kFramePointerSize, kStackAlignment);
+size_t Mips64JniCallingConvention::OutArgSize() const {
+ // all arguments including JNI args
+ size_t all_args = NumArgs() + NumberOfExtraArgumentsForJni();
+
+ // Nothing on the stack unless there are more than 8 arguments
+ size_t stack_args = (all_args > kMaxRegisterArguments) ? all_args - kMaxRegisterArguments : 0;
+
+ return RoundUp(stack_args * kFramePointerSize, kStackAlignment);
}
ArrayRef<const ManagedRegister> Mips64JniCallingConvention::CalleeSaveRegisters() const {
@@ -236,12 +242,15 @@
return FrameOffset(offset);
}
-size_t Mips64JniCallingConvention::NumberOfOutgoingStackArgs() {
- // all arguments including JNI args
- size_t all_args = NumArgs() + NumberOfExtraArgumentsForJni();
-
- // Nothing on the stack unless there are more than 8 arguments
- return (all_args > kMaxRegisterArguments) ? all_args - kMaxRegisterArguments : 0;
+ManagedRegister Mips64JniCallingConvention::HiddenArgumentRegister() const {
+ UNIMPLEMENTED(FATAL);
+ UNREACHABLE();
}
+
+bool Mips64JniCallingConvention::UseTailCall() const {
+ UNIMPLEMENTED(FATAL);
+ UNREACHABLE();
+}
+
} // namespace mips64
} // namespace art
diff --git a/compiler/jni/quick/mips64/calling_convention_mips64.h b/compiler/jni/quick/mips64/calling_convention_mips64.h
index d87f73a..e9a42a4 100644
--- a/compiler/jni/quick/mips64/calling_convention_mips64.h
+++ b/compiler/jni/quick/mips64/calling_convention_mips64.h
@@ -37,7 +37,7 @@
~Mips64ManagedRuntimeCallingConvention() override {}
// Calling convention
ManagedRegister ReturnRegister() override;
- ManagedRegister InterproceduralScratchRegister() override;
+ ManagedRegister InterproceduralScratchRegister() const override;
// Managed runtime calling convention
ManagedRegister MethodRegister() override;
bool IsCurrentParamInRegister() override;
@@ -62,10 +62,10 @@
// Calling convention
ManagedRegister ReturnRegister() override;
ManagedRegister IntReturnRegister() override;
- ManagedRegister InterproceduralScratchRegister() override;
+ ManagedRegister InterproceduralScratchRegister() const override;
// JNI calling convention
- size_t FrameSize() override;
- size_t OutArgSize() override;
+ size_t FrameSize() const override;
+ size_t OutArgSize() const override;
ArrayRef<const ManagedRegister> CalleeSaveRegisters() const override;
ManagedRegister ReturnScratchRegister() const override;
uint32_t CoreSpillMask() const override;
@@ -80,8 +80,11 @@
return false;
}
- protected:
- size_t NumberOfOutgoingStackArgs() override;
+ // Hidden argument register, used to pass the method pointer for @CriticalNative call.
+ ManagedRegister HiddenArgumentRegister() const override;
+
+ // Whether to use tail call (used only for @CriticalNative).
+ bool UseTailCall() const override;
private:
DISALLOW_COPY_AND_ASSIGN(Mips64JniCallingConvention);
diff --git a/compiler/jni/quick/x86/calling_convention_x86.cc b/compiler/jni/quick/x86/calling_convention_x86.cc
index 1f255e2..d12eb9b 100644
--- a/compiler/jni/quick/x86/calling_convention_x86.cc
+++ b/compiler/jni/quick/x86/calling_convention_x86.cc
@@ -26,7 +26,6 @@
namespace x86 {
static_assert(kX86PointerSize == PointerSize::k32, "Unexpected x86 pointer size");
-static_assert(kStackAlignment >= 16u, "IA-32 cdecl requires at least 16 byte stack alignment");
static constexpr ManagedRegister kCalleeSaveRegisters[] = {
// Core registers.
@@ -36,10 +35,12 @@
// No hard float callee saves.
};
-static constexpr uint32_t CalculateCoreCalleeSpillMask() {
+template <size_t size>
+static constexpr uint32_t CalculateCoreCalleeSpillMask(
+ const ManagedRegister (&callee_saves)[size]) {
// The spilled PC gets a special marker.
uint32_t result = 1 << kNumberOfCpuRegisters;
- for (auto&& r : kCalleeSaveRegisters) {
+ for (auto&& r : callee_saves) {
if (r.AsX86().IsCpuRegister()) {
result |= (1 << r.AsX86().AsCpuRegister());
}
@@ -47,16 +48,32 @@
return result;
}
-static constexpr uint32_t kCoreCalleeSpillMask = CalculateCoreCalleeSpillMask();
+static constexpr uint32_t kCoreCalleeSpillMask = CalculateCoreCalleeSpillMask(kCalleeSaveRegisters);
static constexpr uint32_t kFpCalleeSpillMask = 0u;
+static constexpr size_t kNativeStackAlignment = 16; // IA-32 cdecl requires 16 byte alignment.
+static_assert(kNativeStackAlignment == kStackAlignment);
+
+static constexpr ManagedRegister kNativeCalleeSaveRegisters[] = {
+ // Core registers.
+ X86ManagedRegister::FromCpuRegister(EBX),
+ X86ManagedRegister::FromCpuRegister(EBP),
+ X86ManagedRegister::FromCpuRegister(ESI),
+ X86ManagedRegister::FromCpuRegister(EDI),
+ // No hard float callee saves.
+};
+
+static constexpr uint32_t kNativeCoreCalleeSpillMask =
+ CalculateCoreCalleeSpillMask(kNativeCalleeSaveRegisters);
+static constexpr uint32_t kNativeFpCalleeSpillMask = 0u;
+
// Calling convention
-ManagedRegister X86ManagedRuntimeCallingConvention::InterproceduralScratchRegister() {
+ManagedRegister X86ManagedRuntimeCallingConvention::InterproceduralScratchRegister() const {
return X86ManagedRegister::FromCpuRegister(ECX);
}
-ManagedRegister X86JniCallingConvention::InterproceduralScratchRegister() {
+ManagedRegister X86JniCallingConvention::InterproceduralScratchRegister() const {
return X86ManagedRegister::FromCpuRegister(ECX);
}
@@ -205,47 +222,81 @@
}
uint32_t X86JniCallingConvention::CoreSpillMask() const {
- return kCoreCalleeSpillMask;
+ return is_critical_native_ ? 0u : kCoreCalleeSpillMask;
}
uint32_t X86JniCallingConvention::FpSpillMask() const {
- return kFpCalleeSpillMask;
+ return is_critical_native_ ? 0u : kFpCalleeSpillMask;
}
-size_t X86JniCallingConvention::FrameSize() {
+size_t X86JniCallingConvention::FrameSize() const {
+ if (is_critical_native_) {
+ CHECK(!SpillsMethod());
+ CHECK(!HasLocalReferenceSegmentState());
+ CHECK(!HasHandleScope());
+ CHECK(!SpillsReturnValue());
+ return 0u; // There is no managed frame for @CriticalNative.
+ }
+
// Method*, PC return address and callee save area size, local reference segment state
+ CHECK(SpillsMethod());
const size_t method_ptr_size = static_cast<size_t>(kX86PointerSize);
const size_t pc_return_addr_size = kFramePointerSize;
const size_t callee_save_area_size = CalleeSaveRegisters().size() * kFramePointerSize;
- size_t frame_data_size = method_ptr_size + pc_return_addr_size + callee_save_area_size;
+ size_t total_size = method_ptr_size + pc_return_addr_size + callee_save_area_size;
- if (LIKELY(HasLocalReferenceSegmentState())) { // local ref. segment state
- // Local reference segment state is sometimes excluded.
- frame_data_size += kFramePointerSize;
- }
+ CHECK(HasLocalReferenceSegmentState());
+ total_size += kFramePointerSize;
- // References plus link_ (pointer) and number_of_references_ (uint32_t) for HandleScope header
- const size_t handle_scope_size = HandleScope::SizeOf(kX86PointerSize, ReferenceCount());
-
- size_t total_size = frame_data_size;
- if (LIKELY(HasHandleScope())) {
- // HandleScope is sometimes excluded.
- total_size += handle_scope_size; // handle scope size
- }
+ CHECK(HasHandleScope());
+ total_size += HandleScope::SizeOf(kX86_64PointerSize, ReferenceCount());
// Plus return value spill area size
+ CHECK(SpillsReturnValue());
total_size += SizeOfReturnValue();
return RoundUp(total_size, kStackAlignment);
- // TODO: Same thing as x64 except using different pointer size. Refactor?
}
-size_t X86JniCallingConvention::OutArgSize() {
- return RoundUp(NumberOfOutgoingStackArgs() * kFramePointerSize, kStackAlignment);
+size_t X86JniCallingConvention::OutArgSize() const {
+ // Count param args, including JNIEnv* and jclass*; count 8-byte args twice.
+ size_t all_args = NumberOfExtraArgumentsForJni() + NumArgs() + NumLongOrDoubleArgs();
+ // The size of outgoiong arguments.
+ size_t size = all_args * kFramePointerSize;
+
+ // @CriticalNative can use tail call as all managed callee saves are preserved by AAPCS.
+ static_assert((kCoreCalleeSpillMask & ~kNativeCoreCalleeSpillMask) == 0u);
+ static_assert((kFpCalleeSpillMask & ~kNativeFpCalleeSpillMask) == 0u);
+
+ if (is_critical_native_) {
+ // Add return address size for @CriticalNative
+ // For normal native the return PC is part of the managed stack frame instead of out args.
+ size += kFramePointerSize;
+ // For @CriticalNative, we can make a tail call if there are no stack args
+ // and the return type is not FP type (needs moving from ST0 to MMX0) and
+ // we do not need to extend the result.
+ bool return_type_ok = GetShorty()[0] == 'I' || GetShorty()[0] == 'J' || GetShorty()[0] == 'V';
+ DCHECK_EQ(
+ return_type_ok,
+ GetShorty()[0] != 'F' && GetShorty()[0] != 'D' && !RequiresSmallResultTypeExtension());
+ if (return_type_ok && size == kFramePointerSize) {
+ // Note: This is not aligned to kNativeStackAlignment but that's OK for tail call.
+ DCHECK_EQ(size, kFramePointerSize);
+ static_assert(kFramePointerSize < kNativeStackAlignment);
+ return kFramePointerSize;
+ }
+ }
+
+ return RoundUp(size, kNativeStackAlignment);
}
ArrayRef<const ManagedRegister> X86JniCallingConvention::CalleeSaveRegisters() const {
- return ArrayRef<const ManagedRegister>(kCalleeSaveRegisters);
+ if (UNLIKELY(IsCriticalNative())) {
+ // Do not spill anything, whether tail call or not (return PC is already on the stack).
+ return ArrayRef<const ManagedRegister>();
+ } else {
+ return ArrayRef<const ManagedRegister>(kCalleeSaveRegisters);
+ }
}
bool X86JniCallingConvention::IsCurrentParamInRegister() {
@@ -265,15 +316,21 @@
return FrameOffset(displacement_.Int32Value() - OutArgSize() + (itr_slots_ * kFramePointerSize));
}
-size_t X86JniCallingConvention::NumberOfOutgoingStackArgs() {
- size_t static_args = HasSelfClass() ? 1 : 0; // count jclass
- // regular argument parameters and this
- size_t param_args = NumArgs() + NumLongOrDoubleArgs();
- // count JNIEnv* and return pc (pushed after Method*)
- size_t internal_args = 1 /* return pc */ + (HasJniEnv() ? 1 : 0 /* jni env */);
- // No register args.
- size_t total_args = static_args + param_args + internal_args;
- return total_args;
+ManagedRegister X86JniCallingConvention::HiddenArgumentRegister() const {
+ CHECK(IsCriticalNative());
+ // EAX is neither managed callee-save, nor argument register, nor scratch register.
+ DCHECK(std::none_of(kCalleeSaveRegisters,
+ kCalleeSaveRegisters + std::size(kCalleeSaveRegisters),
+ [](ManagedRegister callee_save) constexpr {
+ return callee_save.Equals(X86ManagedRegister::FromCpuRegister(EAX));
+ }));
+ DCHECK(!InterproceduralScratchRegister().Equals(X86ManagedRegister::FromCpuRegister(EAX)));
+ return X86ManagedRegister::FromCpuRegister(EAX);
+}
+
+bool X86JniCallingConvention::UseTailCall() const {
+ CHECK(IsCriticalNative());
+ return OutArgSize() == kFramePointerSize;
}
} // namespace x86
diff --git a/compiler/jni/quick/x86/calling_convention_x86.h b/compiler/jni/quick/x86/calling_convention_x86.h
index d0c6198..4d65fc3 100644
--- a/compiler/jni/quick/x86/calling_convention_x86.h
+++ b/compiler/jni/quick/x86/calling_convention_x86.h
@@ -36,7 +36,7 @@
~X86ManagedRuntimeCallingConvention() override {}
// Calling convention
ManagedRegister ReturnRegister() override;
- ManagedRegister InterproceduralScratchRegister() override;
+ ManagedRegister InterproceduralScratchRegister() const override;
// Managed runtime calling convention
ManagedRegister MethodRegister() override;
bool IsCurrentParamInRegister() override;
@@ -63,10 +63,10 @@
// Calling convention
ManagedRegister ReturnRegister() override;
ManagedRegister IntReturnRegister() override;
- ManagedRegister InterproceduralScratchRegister() override;
+ ManagedRegister InterproceduralScratchRegister() const override;
// JNI calling convention
- size_t FrameSize() override;
- size_t OutArgSize() override;
+ size_t FrameSize() const override;
+ size_t OutArgSize() const override;
ArrayRef<const ManagedRegister> CalleeSaveRegisters() const override;
ManagedRegister ReturnScratchRegister() const override;
uint32_t CoreSpillMask() const override;
@@ -78,11 +78,14 @@
// x86 needs to extend small return types.
bool RequiresSmallResultTypeExtension() const override {
- return true;
+ return HasSmallReturnType();
}
- protected:
- size_t NumberOfOutgoingStackArgs() override;
+ // Hidden argument register, used to pass the method pointer for @CriticalNative call.
+ ManagedRegister HiddenArgumentRegister() const override;
+
+ // Whether to use tail call (used only for @CriticalNative).
+ bool UseTailCall() const override;
private:
DISALLOW_COPY_AND_ASSIGN(X86JniCallingConvention);
diff --git a/compiler/jni/quick/x86_64/calling_convention_x86_64.cc b/compiler/jni/quick/x86_64/calling_convention_x86_64.cc
index 9e77d6b..b15d904 100644
--- a/compiler/jni/quick/x86_64/calling_convention_x86_64.cc
+++ b/compiler/jni/quick/x86_64/calling_convention_x86_64.cc
@@ -28,7 +28,8 @@
constexpr size_t kFramePointerSize = static_cast<size_t>(PointerSize::k64);
static_assert(kX86_64PointerSize == PointerSize::k64, "Unexpected x86_64 pointer size");
-static_assert(kStackAlignment >= 16u, "System V AMD64 ABI requires at least 16 byte stack alignment");
+
+constexpr size_t kMmxSpillSize = 8u;
// XMM0..XMM7 can be used to pass the first 8 floating args. The rest must go on the stack.
// -- Managed and JNI calling conventions.
@@ -53,37 +54,59 @@
X86_64ManagedRegister::FromXmmRegister(XMM15),
};
-static constexpr uint32_t CalculateCoreCalleeSpillMask() {
+template <size_t size>
+static constexpr uint32_t CalculateCoreCalleeSpillMask(
+ const ManagedRegister (&callee_saves)[size]) {
// The spilled PC gets a special marker.
- uint32_t result = 1 << kNumberOfCpuRegisters;
- for (auto&& r : kCalleeSaveRegisters) {
+ uint32_t result = 1u << kNumberOfCpuRegisters;
+ for (auto&& r : callee_saves) {
if (r.AsX86_64().IsCpuRegister()) {
- result |= (1 << r.AsX86_64().AsCpuRegister().AsRegister());
+ result |= (1u << r.AsX86_64().AsCpuRegister().AsRegister());
}
}
return result;
}
-static constexpr uint32_t CalculateFpCalleeSpillMask() {
- uint32_t result = 0;
- for (auto&& r : kCalleeSaveRegisters) {
+template <size_t size>
+static constexpr uint32_t CalculateFpCalleeSpillMask(const ManagedRegister (&callee_saves)[size]) {
+ uint32_t result = 0u;
+ for (auto&& r : callee_saves) {
if (r.AsX86_64().IsXmmRegister()) {
- result |= (1 << r.AsX86_64().AsXmmRegister().AsFloatRegister());
+ result |= (1u << r.AsX86_64().AsXmmRegister().AsFloatRegister());
}
}
return result;
}
-static constexpr uint32_t kCoreCalleeSpillMask = CalculateCoreCalleeSpillMask();
-static constexpr uint32_t kFpCalleeSpillMask = CalculateFpCalleeSpillMask();
+static constexpr uint32_t kCoreCalleeSpillMask = CalculateCoreCalleeSpillMask(kCalleeSaveRegisters);
+static constexpr uint32_t kFpCalleeSpillMask = CalculateFpCalleeSpillMask(kCalleeSaveRegisters);
+
+static constexpr size_t kNativeStackAlignment = 16;
+static_assert(kNativeStackAlignment == kStackAlignment);
+
+static constexpr ManagedRegister kNativeCalleeSaveRegisters[] = {
+ // Core registers.
+ X86_64ManagedRegister::FromCpuRegister(RBX),
+ X86_64ManagedRegister::FromCpuRegister(RBP),
+ X86_64ManagedRegister::FromCpuRegister(R12),
+ X86_64ManagedRegister::FromCpuRegister(R13),
+ X86_64ManagedRegister::FromCpuRegister(R14),
+ X86_64ManagedRegister::FromCpuRegister(R15),
+ // No callee-save float registers.
+};
+
+static constexpr uint32_t kNativeCoreCalleeSpillMask =
+ CalculateCoreCalleeSpillMask(kNativeCalleeSaveRegisters);
+static constexpr uint32_t kNativeFpCalleeSpillMask =
+ CalculateFpCalleeSpillMask(kNativeCalleeSaveRegisters);
// Calling convention
-ManagedRegister X86_64ManagedRuntimeCallingConvention::InterproceduralScratchRegister() {
+ManagedRegister X86_64ManagedRuntimeCallingConvention::InterproceduralScratchRegister() const {
return X86_64ManagedRegister::FromCpuRegister(RAX);
}
-ManagedRegister X86_64JniCallingConvention::InterproceduralScratchRegister() {
+ManagedRegister X86_64JniCallingConvention::InterproceduralScratchRegister() const {
return X86_64ManagedRegister::FromCpuRegister(RAX);
}
@@ -149,6 +172,7 @@
}
FrameOffset X86_64ManagedRuntimeCallingConvention::CurrentParamStackOffset() {
+ CHECK(IsCurrentParamOnStack());
return FrameOffset(displacement_.Int32Value() + // displacement
static_cast<size_t>(kX86_64PointerSize) + // Method ref
itr_slots_ * sizeof(uint32_t)); // offset into in args
@@ -187,46 +211,86 @@
}
uint32_t X86_64JniCallingConvention::CoreSpillMask() const {
- return kCoreCalleeSpillMask;
+ return is_critical_native_ ? 0u : kCoreCalleeSpillMask;
}
uint32_t X86_64JniCallingConvention::FpSpillMask() const {
- return kFpCalleeSpillMask;
+ return is_critical_native_ ? 0u : kFpCalleeSpillMask;
}
-size_t X86_64JniCallingConvention::FrameSize() {
+size_t X86_64JniCallingConvention::FrameSize() const {
+ if (is_critical_native_) {
+ CHECK(!SpillsMethod());
+ CHECK(!HasLocalReferenceSegmentState());
+ CHECK(!HasHandleScope());
+ CHECK(!SpillsReturnValue());
+ return 0u; // There is no managed frame for @CriticalNative.
+ }
+
// Method*, PC return address and callee save area size, local reference segment state
+ CHECK(SpillsMethod());
const size_t method_ptr_size = static_cast<size_t>(kX86_64PointerSize);
const size_t pc_return_addr_size = kFramePointerSize;
const size_t callee_save_area_size = CalleeSaveRegisters().size() * kFramePointerSize;
- size_t frame_data_size = method_ptr_size + pc_return_addr_size + callee_save_area_size;
+ size_t total_size = method_ptr_size + pc_return_addr_size + callee_save_area_size;
- if (LIKELY(HasLocalReferenceSegmentState())) { // local ref. segment state
- // Local reference segment state is sometimes excluded.
- frame_data_size += kFramePointerSize;
- }
+ CHECK(HasLocalReferenceSegmentState());
+ total_size += kFramePointerSize;
- // References plus link_ (pointer) and number_of_references_ (uint32_t) for HandleScope header
- const size_t handle_scope_size = HandleScope::SizeOf(kX86_64PointerSize, ReferenceCount());
-
- size_t total_size = frame_data_size;
- if (LIKELY(HasHandleScope())) {
- // HandleScope is sometimes excluded.
- total_size += handle_scope_size; // handle scope size
- }
+ CHECK(HasHandleScope());
+ total_size += HandleScope::SizeOf(kX86_64PointerSize, ReferenceCount());
// Plus return value spill area size
+ CHECK(SpillsReturnValue());
total_size += SizeOfReturnValue();
return RoundUp(total_size, kStackAlignment);
}
-size_t X86_64JniCallingConvention::OutArgSize() {
- return RoundUp(NumberOfOutgoingStackArgs() * kFramePointerSize, kStackAlignment);
+size_t X86_64JniCallingConvention::OutArgSize() const {
+ // Count param args, including JNIEnv* and jclass*.
+ size_t all_args = NumberOfExtraArgumentsForJni() + NumArgs();
+ size_t num_fp_args = NumFloatOrDoubleArgs();
+ DCHECK_GE(all_args, num_fp_args);
+ size_t num_non_fp_args = all_args - num_fp_args;
+ // Account for FP arguments passed through Xmm0..Xmm7.
+ size_t num_stack_fp_args =
+ num_fp_args - std::min(kMaxFloatOrDoubleRegisterArguments, num_fp_args);
+ // Account for other (integer) arguments passed through GPR (RDI, RSI, RDX, RCX, R8, R9).
+ size_t num_stack_non_fp_args =
+ num_non_fp_args - std::min(kMaxIntLikeRegisterArguments, num_non_fp_args);
+ // The size of outgoing arguments.
+ static_assert(kFramePointerSize == kMmxSpillSize);
+ size_t size = (num_stack_fp_args + num_stack_non_fp_args) * kFramePointerSize;
+
+ if (UNLIKELY(IsCriticalNative())) {
+ // We always need to spill xmm12-xmm15 as they are managed callee-saves
+ // but not native callee-saves.
+ static_assert((kCoreCalleeSpillMask & ~kNativeCoreCalleeSpillMask) == 0u);
+ static_assert((kFpCalleeSpillMask & ~kNativeFpCalleeSpillMask) != 0u);
+ size += POPCOUNT(kFpCalleeSpillMask & ~kNativeFpCalleeSpillMask) * kMmxSpillSize;
+ // Add return address size for @CriticalNative
+ // For normal native the return PC is part of the managed stack frame instead of out args.
+ size += kFramePointerSize;
+ }
+
+ return RoundUp(size, kNativeStackAlignment);
}
ArrayRef<const ManagedRegister> X86_64JniCallingConvention::CalleeSaveRegisters() const {
- return ArrayRef<const ManagedRegister>(kCalleeSaveRegisters);
+ if (UNLIKELY(IsCriticalNative())) {
+ DCHECK(!UseTailCall());
+ static_assert(std::size(kCalleeSaveRegisters) > std::size(kNativeCalleeSaveRegisters));
+ // TODO: Change to static_assert; std::equal should be constexpr since C++20.
+ DCHECK(std::equal(kCalleeSaveRegisters,
+ kCalleeSaveRegisters + std::size(kNativeCalleeSaveRegisters),
+ kNativeCalleeSaveRegisters,
+ [](ManagedRegister lhs, ManagedRegister rhs) { return lhs.Equals(rhs); }));
+ return ArrayRef<const ManagedRegister>(kCalleeSaveRegisters).SubArray(
+ /*pos=*/ std::size(kNativeCalleeSaveRegisters));
+ } else {
+ return ArrayRef<const ManagedRegister>(kCalleeSaveRegisters);
+ }
}
bool X86_64JniCallingConvention::IsCurrentParamInRegister() {
@@ -271,24 +335,24 @@
return FrameOffset(offset);
}
-// TODO: Calling this "NumberArgs" is misleading.
-// It's really more like NumberSlots (like itr_slots_)
-// because doubles/longs get counted twice.
-size_t X86_64JniCallingConvention::NumberOfOutgoingStackArgs() {
- size_t static_args = HasSelfClass() ? 1 : 0; // count jclass
- // regular argument parameters and this
- size_t param_args = NumArgs() + NumLongOrDoubleArgs();
- // count JNIEnv* and return pc (pushed after Method*)
- size_t internal_args = 1 /* return pc */ + (HasJniEnv() ? 1 : 0 /* jni env */);
- size_t total_args = static_args + param_args + internal_args;
+ManagedRegister X86_64JniCallingConvention::HiddenArgumentRegister() const {
+ CHECK(IsCriticalNative());
+ // R11 is neither managed callee-save, nor argument register, nor scratch register.
+ DCHECK(std::none_of(kCalleeSaveRegisters,
+ kCalleeSaveRegisters + std::size(kCalleeSaveRegisters),
+ [](ManagedRegister callee_save) constexpr {
+ return callee_save.Equals(X86_64ManagedRegister::FromCpuRegister(R11));
+ }));
+ DCHECK(!InterproceduralScratchRegister().Equals(X86_64ManagedRegister::FromCpuRegister(R11)));
+ return X86_64ManagedRegister::FromCpuRegister(R11);
+}
- // Float arguments passed through Xmm0..Xmm7
- // Other (integer) arguments passed through GPR (RDI, RSI, RDX, RCX, R8, R9)
- size_t total_stack_args = total_args
- - std::min(kMaxFloatOrDoubleRegisterArguments, static_cast<size_t>(NumFloatOrDoubleArgs()))
- - std::min(kMaxIntLikeRegisterArguments, static_cast<size_t>(NumArgs() - NumFloatOrDoubleArgs()));
-
- return total_stack_args;
+// Whether to use tail call (used only for @CriticalNative).
+bool X86_64JniCallingConvention::UseTailCall() const {
+ CHECK(IsCriticalNative());
+ // We always need to spill xmm12-xmm15 as they are managed callee-saves
+ // but not native callee-saves, so we can never use a tail call.
+ return false;
}
} // namespace x86_64
diff --git a/compiler/jni/quick/x86_64/calling_convention_x86_64.h b/compiler/jni/quick/x86_64/calling_convention_x86_64.h
index dfab41b..37b5978 100644
--- a/compiler/jni/quick/x86_64/calling_convention_x86_64.h
+++ b/compiler/jni/quick/x86_64/calling_convention_x86_64.h
@@ -33,7 +33,7 @@
~X86_64ManagedRuntimeCallingConvention() override {}
// Calling convention
ManagedRegister ReturnRegister() override;
- ManagedRegister InterproceduralScratchRegister() override;
+ ManagedRegister InterproceduralScratchRegister() const override;
// Managed runtime calling convention
ManagedRegister MethodRegister() override;
bool IsCurrentParamInRegister() override;
@@ -56,10 +56,10 @@
// Calling convention
ManagedRegister ReturnRegister() override;
ManagedRegister IntReturnRegister() override;
- ManagedRegister InterproceduralScratchRegister() override;
+ ManagedRegister InterproceduralScratchRegister() const override;
// JNI calling convention
- size_t FrameSize() override;
- size_t OutArgSize() override;
+ size_t FrameSize() const override;
+ size_t OutArgSize() const override;
ArrayRef<const ManagedRegister> CalleeSaveRegisters() const override;
ManagedRegister ReturnScratchRegister() const override;
uint32_t CoreSpillMask() const override;
@@ -71,11 +71,14 @@
// x86-64 needs to extend small return types.
bool RequiresSmallResultTypeExtension() const override {
- return true;
+ return HasSmallReturnType();
}
- protected:
- size_t NumberOfOutgoingStackArgs() override;
+ // Hidden argument register, used to pass the method pointer for @CriticalNative call.
+ ManagedRegister HiddenArgumentRegister() const override;
+
+ // Whether to use tail call (used only for @CriticalNative).
+ bool UseTailCall() const override;
private:
DISALLOW_COPY_AND_ASSIGN(X86_64JniCallingConvention);