JNI: Move args in registers for @FastNative.
Golem results for art-opt-cc (higher is better):
linux-ia32 before after
NativeDowncallStaticFast 222.00 222.17 (+0.0751%)
NativeDowncallStaticFast6 139.86 161.00 (+15.11%)
NativeDowncallStaticFastRefs6 131.00 137.86 (+5.238%)
NativeDowncallVirtualFast 211.79 217.17 (+2.543%)
NativeDowncallVirtualFast6 137.36 150.55 (+9.599%)
NativeDowncallVirtualFastRefs6 131.50 132.60 (+0.8382%)
linux-x64 before after
NativeDowncallStaticFast 173.15 173.24 (+0.0499%)
NativeDowncallStaticFast6 135.50 157.61 (+16.31%)
NativeDowncallStaticFastRefs6 127.06 134.87 (+6.147%)
NativeDowncallVirtualFast 163.67 165.83 (+1.321%)
NativeDowncallVirtualFast6 128.18 147.35 (+14.96%)
NativeDowncallVirtualFastRefs6 123.44 130.74 (+5.914%)
linux-armv7 before after
NativeDowncallStaticFast 21.622 21.622 (0%)
NativeDowncallStaticFast6 17.250 18.719 (+8.518%)
NativeDowncallStaticFastRefs6 14.757 15.663 (+6.145%)
NativeDowncallVirtualFast 21.027 21.319 (+1.388%)
NativeDowncallVirtualFast6 17.439 18.953 (+8.680%)
NativeDowncallVirtualFastRefs6 14.764 15.992 (+8.319%)
linux-armv8 before after
NativeDowncallStaticFast 23.244 23.610 (+1.575%)
NativeDowncallStaticFast6 18.719 21.622 (+15.50%)
NativeDowncallStaticFastRefs6 14.757 18.491 (+20.89%)
NativeDowncallVirtualFast 20.197 21.319 (+5.554%)
NativeDowncallVirtualFast6 18.272 21.027 (+15.08%)
NativeDowncallVirtualFastRefs6 13.951 16.865 (+20.89%)
(The arm64 NativeDowncallVirtualFast reference value is very
low, resulting in an unexpected +5.554% improvement. As the
previous results seem to jump between 20.197 and 20.741,
the actual improvement is probably just around 2.5%.)
Test: m test-art-host-gtest
Test: testrunner.py --host --optimizing
Test: run-gtests.sh
Test: testrunner.py --target --optimizing
Bug: 172332525
Change-Id: I2b596414458b48a758826eafc223529e9f2fe059
diff --git a/compiler/utils/x86/jni_macro_assembler_x86.cc b/compiler/utils/x86/jni_macro_assembler_x86.cc
index 2e7f23d..d0afa72 100644
--- a/compiler/utils/x86/jni_macro_assembler_x86.cc
+++ b/compiler/utils/x86/jni_macro_assembler_x86.cc
@@ -317,35 +317,57 @@
}
void X86JNIMacroAssembler::MoveArguments(ArrayRef<ArgumentLocation> dests,
- ArrayRef<ArgumentLocation> srcs) {
- DCHECK_EQ(dests.size(), srcs.size());
+ ArrayRef<ArgumentLocation> srcs,
+ ArrayRef<FrameOffset> refs) {
+ size_t arg_count = dests.size();
+ DCHECK_EQ(arg_count, srcs.size());
+ DCHECK_EQ(arg_count, refs.size());
+
+ // Store register args to stack slots. Convert processed references to `jobject`.
bool found_hidden_arg = false;
- for (size_t i = 0, arg_count = srcs.size(); i != arg_count; ++i) {
+ for (size_t i = 0; i != arg_count; ++i) {
const ArgumentLocation& src = srcs[i];
const ArgumentLocation& dest = dests[i];
- DCHECK_EQ(src.GetSize(), dest.GetSize());
+ const FrameOffset ref = refs[i];
+ DCHECK_EQ(src.GetSize(), dest.GetSize()); // Even for references.
if (src.IsRegister()) {
if (UNLIKELY(dest.IsRegister())) {
// Native ABI has only stack arguments but we may pass one "hidden arg" in register.
CHECK(!found_hidden_arg);
found_hidden_arg = true;
+ DCHECK_EQ(ref, kInvalidReferenceOffset);
DCHECK(
!dest.GetRegister().Equals(X86ManagedRegister::FromCpuRegister(GetScratchRegister())));
Move(dest.GetRegister(), src.GetRegister(), dest.GetSize());
} else {
+ if (ref != kInvalidReferenceOffset) {
+ Store(ref, srcs[i].GetRegister(), kObjectReferenceSize);
+ // Note: We can clobber `src` here as the register cannot hold more than one argument.
+ // This overload of `CreateJObject()` currently does not use the scratch
+ // register ECX, so this shall not clobber another argument.
+ CreateJObject(src.GetRegister(), ref, src.GetRegister(), /*null_allowed=*/ i != 0u);
+ }
Store(dest.GetFrameOffset(), src.GetRegister(), dest.GetSize());
}
} else {
// Delay copying until we have spilled all registers, including the scratch register ECX.
}
}
- for (size_t i = 0, arg_count = srcs.size(); i != arg_count; ++i) {
+
+ // Copy incoming stack args. Convert processed references to `jobject`.
+ for (size_t i = 0; i != arg_count; ++i) {
const ArgumentLocation& src = srcs[i];
const ArgumentLocation& dest = dests[i];
- DCHECK_EQ(src.GetSize(), dest.GetSize());
+ const FrameOffset ref = refs[i];
+ DCHECK_EQ(src.GetSize(), dest.GetSize()); // Even for references.
if (!src.IsRegister()) {
DCHECK(!dest.IsRegister());
- Copy(dest.GetFrameOffset(), src.GetFrameOffset(), dest.GetSize());
+ if (ref != kInvalidReferenceOffset) {
+ DCHECK_EQ(srcs[i].GetFrameOffset(), refs[i]);
+ CreateJObject(dest.GetFrameOffset(), ref, /*null_allowed=*/ i != 0u);
+ } else {
+ Copy(dest.GetFrameOffset(), src.GetFrameOffset(), dest.GetSize());
+ }
}
}
}