JNI: Inline fast-path for `JniMethodEnd()`.
Golem results for art-opt-cc (higher is better):
linux-ia32 before after
NativeDowncallStaticNormal 46.766 51.016 (+9.086%)
NativeDowncallStaticNormal6 42.268 45.748 (+8.235%)
NativeDowncallStaticNormalRefs6 41.355 44.776 (+8.272%)
NativeDowncallVirtualNormal 46.361 52.527 (+13.30%)
NativeDowncallVirtualNormal6 41.812 45.206 (+8.118%)
NativeDowncallVirtualNormalRefs6 40.500 44.169 (+9.059%)
(The NativeDowncallVirtualNormal result for x86 is skewed
by one extra good run as Golem reports the best result in
the summary. Using the second best and most frequent
result 50.5, the improvement is only around 8.9%.)
linux-x64 before after
NativeDowncallStaticNormal 44.169 47.976 (+8.620%)
NativeDowncallStaticNormal6 43.198 46.836 (+8.423%)
NativeDowncallStaticNormalRefs6 38.481 44.687 (+16.13%)
NativeDowncallVirtualNormal 43.672 47.405 (+8.547%)
NativeDowncallVirtualNormal6 42.268 45.726 (+8.182%)
NativeDowncallVirtualNormalRefs6 41.355 44.687 (+8.057%)
(The NativeDowncallStaticNormalRefs6 result for x86-64 is
a bit inflated because recent results jump between ~38.5
and ~40.5. If we take the latter as the baseline, the
improvements is only around 10.3%.)
linux-armv7 before after
NativeDowncallStaticNormal 10.659 14.620 (+37.16%)
NativeDowncallStaticNormal6 9.8377 13.120 (+33.36%)
NativeDowncallStaticNormalRefs6 8.8714 11.454 (+29.11%)
NativeDowncallVirtualNormal 10.511 14.349 (+36.51%)
NativeDowncallVirtualNormal6 9.9701 13.347 (+33.87%)
NativeDowncallVirtualNormalRefs6 8.9241 11.454 (+28.35%)
linux-armv8 before after
NativeDowncallStaticNormal 10.608 16.329 (+53.93%)
NativeDowncallStaticNormal6 10.179 15.347 (+50.76%)
NativeDowncallStaticNormalRefs6 9.2457 13.705 (+48.23%)
NativeDowncallVirtualNormal 9.9850 14.903 (+49.25%)
NativeDowncallVirtualNormal6 9.9206 14.757 (+48.75%)
NativeDowncallVirtualNormalRefs6 8.8235 12.789 (+44.94%)
Test: m test-art-host-gtest
Test: testrunner.py --host --optimizing
Test: run-gtests.sh
Test: testrunner.py --target --optimizing
Bug: 172332525
Change-Id: Ie144bc4f7f82be95790ea7d3123b81a3b6bfa603
diff --git a/compiler/jni/quick/x86/calling_convention_x86.cc b/compiler/jni/quick/x86/calling_convention_x86.cc
index 2fb063f..65be92c 100644
--- a/compiler/jni/quick/x86/calling_convention_x86.cc
+++ b/compiler/jni/quick/x86/calling_convention_x86.cc
@@ -25,8 +25,11 @@
namespace art {
namespace x86 {
-static constexpr Register kManagedCoreArgumentRegisters[] = {
- EAX, ECX, EDX, EBX
+static constexpr ManagedRegister kManagedCoreArgumentRegisters[] = {
+ X86ManagedRegister::FromCpuRegister(EAX),
+ X86ManagedRegister::FromCpuRegister(ECX),
+ X86ManagedRegister::FromCpuRegister(EDX),
+ X86ManagedRegister::FromCpuRegister(EBX),
};
static constexpr size_t kManagedCoreArgumentRegistersCount =
arraysize(kManagedCoreArgumentRegisters);
@@ -79,6 +82,33 @@
return ArrayRef<const ManagedRegister>(kCalleeSaveRegisters);
}
+ArrayRef<const ManagedRegister> X86JniCallingConvention::ArgumentScratchRegisters() const {
+ DCHECK(!IsCriticalNative());
+ // Exclude EAX or EAX/EDX if they are used as return registers.
+ // Due to the odd ordering of argument registers, use a re-ordered array (pull EDX forward).
+ static constexpr ManagedRegister kArgumentRegisters[] = {
+ X86ManagedRegister::FromCpuRegister(EAX),
+ X86ManagedRegister::FromCpuRegister(EDX),
+ X86ManagedRegister::FromCpuRegister(ECX),
+ X86ManagedRegister::FromCpuRegister(EBX),
+ };
+ static_assert(arraysize(kArgumentRegisters) == kManagedCoreArgumentRegistersCount);
+ static_assert(kManagedCoreArgumentRegisters[0].Equals(kArgumentRegisters[0]));
+ static_assert(kManagedCoreArgumentRegisters[1].Equals(kArgumentRegisters[2]));
+ static_assert(kManagedCoreArgumentRegisters[2].Equals(kArgumentRegisters[1]));
+ static_assert(kManagedCoreArgumentRegisters[3].Equals(kArgumentRegisters[3]));
+ ArrayRef<const ManagedRegister> scratch_regs(kArgumentRegisters);
+ X86ManagedRegister return_reg = ReturnRegister().AsX86();
+ auto return_reg_overlaps = [return_reg](ManagedRegister reg) {
+ return return_reg.Overlaps(reg.AsX86());
+ };
+ if (return_reg_overlaps(scratch_regs[0])) {
+ scratch_regs = scratch_regs.SubArray(/*pos=*/ return_reg_overlaps(scratch_regs[1]) ? 2u : 1u);
+ }
+ DCHECK(std::none_of(scratch_regs.begin(), scratch_regs.end(), return_reg_overlaps));
+ return scratch_regs;
+}
+
static ManagedRegister ReturnRegisterForShorty(const char* shorty, bool jni) {
if (shorty[0] == 'F' || shorty[0] == 'D') {
if (jni) {
@@ -95,15 +125,15 @@
}
}
-ManagedRegister X86ManagedRuntimeCallingConvention::ReturnRegister() {
+ManagedRegister X86ManagedRuntimeCallingConvention::ReturnRegister() const {
return ReturnRegisterForShorty(GetShorty(), false);
}
-ManagedRegister X86JniCallingConvention::ReturnRegister() {
+ManagedRegister X86JniCallingConvention::ReturnRegister() const {
return ReturnRegisterForShorty(GetShorty(), true);
}
-ManagedRegister X86JniCallingConvention::IntReturnRegister() {
+ManagedRegister X86JniCallingConvention::IntReturnRegister() const {
return X86ManagedRegister::FromCpuRegister(EAX);
}
@@ -149,20 +179,19 @@
if (IsCurrentParamALong()) {
switch (gpr_arg_count_) {
case 1:
- static_assert(kManagedCoreArgumentRegisters[1] == ECX);
- static_assert(kManagedCoreArgumentRegisters[2] == EDX);
+ static_assert(kManagedCoreArgumentRegisters[1].AsX86().AsCpuRegister() == ECX);
+ static_assert(kManagedCoreArgumentRegisters[2].AsX86().AsCpuRegister() == EDX);
return X86ManagedRegister::FromRegisterPair(ECX_EDX);
case 2:
- static_assert(kManagedCoreArgumentRegisters[2] == EDX);
- static_assert(kManagedCoreArgumentRegisters[3] == EBX);
+ static_assert(kManagedCoreArgumentRegisters[2].AsX86().AsCpuRegister() == EDX);
+ static_assert(kManagedCoreArgumentRegisters[3].AsX86().AsCpuRegister() == EBX);
return X86ManagedRegister::FromRegisterPair(EDX_EBX);
default:
LOG(FATAL) << "UNREACHABLE";
UNREACHABLE();
}
} else {
- Register core_reg = kManagedCoreArgumentRegisters[gpr_arg_count_];
- return X86ManagedRegister::FromCpuRegister(core_reg);
+ return kManagedCoreArgumentRegisters[gpr_arg_count_];
}
}
}
@@ -200,7 +229,6 @@
if (is_critical_native_) {
CHECK(!SpillsMethod());
CHECK(!HasLocalReferenceSegmentState());
- CHECK(!SpillsReturnValue());
return 0u; // There is no managed frame for @CriticalNative.
}
@@ -214,19 +242,6 @@
DCHECK(HasLocalReferenceSegmentState());
// Cookie is saved in one of the spilled registers.
- // Plus return value spill area size
- if (SpillsReturnValue()) {
- // For 64-bit return values there shall be a 4B alignment gap between
- // the method pointer and the saved return value.
- size_t padding = ReturnValueSaveLocation().SizeValue() - method_ptr_size;
- DCHECK_EQ(padding,
- (GetReturnType() == Primitive::kPrimLong || GetReturnType() == Primitive::kPrimDouble)
- ? 4u
- : 0u);
- total_size += padding;
- total_size += SizeOfReturnValue();
- }
-
return RoundUp(total_size, kStackAlignment);
}