Move @CriticalNative arguments in registers.
And spill stack arguments directly to the right location.
Do not spill to the reserved space in the caller's frame.
Preliminary Golem results for art-opt-cc:
x86 x86-64 arm arm64
NativeDowncallCritical6: n/a +14.3% +17.2% +26.1%
(x86 seems to be currently providing results that are worse
than interpreter, so something is not working.)
Test: Additional tests in 178-app-image-native-method test.
Test: m test-art-host-gtest
Test: testrunner.py --host --optimizing
Test: aosp_taimen-userdebug boots.
Test: run-gtests.sh
Test: testrunner.py --target --optimizing
Bug: 112189621
Change-Id: I709c52ab2585a8f5f441f53ad2bf4a01d2b25dca
diff --git a/test/178-app-image-native-method/src/Main.java b/test/178-app-image-native-method/src/Main.java
index 07990cb..9043081 100644
--- a/test/178-app-image-native-method/src/Main.java
+++ b/test/178-app-image-native-method/src/Main.java
@@ -29,6 +29,7 @@
new TestMissing();
new TestMissingFast();
new TestMissingCritical();
+ new CriticalSignatures();
makeVisiblyInitialized(); // Make sure they are visibly initialized.
test();
@@ -37,6 +38,8 @@
testMissing();
testMissingFast();
testMissingCritical();
+
+ testCriticalSignatures();
}
static void test() {
@@ -165,6 +168,194 @@
} catch (LinkageError expected) {}
}
+ static void testCriticalSignatures() {
+ System.out.println("testCriticalSignatures");
+ long l = 0xf00000000L;
+ assertEquals(42, CriticalSignatures.nativeILFFFFD(1, l + 2L, 3.0f, 4.0f, 5.0f, 6.0f, 7.0));
+ assertEquals(42, CriticalSignatures.nativeLIFFFFD(l + 7L, 6, 5.0f, 4.0f, 3.0f, 2.0f, 1.0));
+ assertEquals(42, CriticalSignatures.nativeFLIFFFD(1.0f, l + 2L, 3, 4.0f, 5.0f, 6.0f, 7.0));
+ assertEquals(42, CriticalSignatures.nativeDDIIIIII(8.0, 7.0, 6, 5, 4, 3, 2, 1));
+ assertEquals(42, CriticalSignatures.nativeDFFILIII(1.0, 2.0f, 3.0f, 4, l + 5L, 6, 7, 8));
+ assertEquals(42, CriticalSignatures.nativeDDFILIII(8.0, 7.0, 6.0f, 5, l + 4L, 3, 2, 1));
+ assertEquals(42, CriticalSignatures.nativeDDIFII(1.0, 2.0, 3, 4.0f, 5, 6));
+ assertEquals(42, CriticalSignatures.nativeFullArgs(
+ // Generated by script (then modified to close argument list):
+ // for i in {0..84}; \
+ // do echo " 0xf00000000L + $((i*3))L,"; \
+ // echo " $((i*3+2)),"; \
+ // done
+ 0xf00000000L + 0L,
+ 2,
+ 0xf00000000L + 3L,
+ 5,
+ 0xf00000000L + 6L,
+ 8,
+ 0xf00000000L + 9L,
+ 11,
+ 0xf00000000L + 12L,
+ 14,
+ 0xf00000000L + 15L,
+ 17,
+ 0xf00000000L + 18L,
+ 20,
+ 0xf00000000L + 21L,
+ 23,
+ 0xf00000000L + 24L,
+ 26,
+ 0xf00000000L + 27L,
+ 29,
+ 0xf00000000L + 30L,
+ 32,
+ 0xf00000000L + 33L,
+ 35,
+ 0xf00000000L + 36L,
+ 38,
+ 0xf00000000L + 39L,
+ 41,
+ 0xf00000000L + 42L,
+ 44,
+ 0xf00000000L + 45L,
+ 47,
+ 0xf00000000L + 48L,
+ 50,
+ 0xf00000000L + 51L,
+ 53,
+ 0xf00000000L + 54L,
+ 56,
+ 0xf00000000L + 57L,
+ 59,
+ 0xf00000000L + 60L,
+ 62,
+ 0xf00000000L + 63L,
+ 65,
+ 0xf00000000L + 66L,
+ 68,
+ 0xf00000000L + 69L,
+ 71,
+ 0xf00000000L + 72L,
+ 74,
+ 0xf00000000L + 75L,
+ 77,
+ 0xf00000000L + 78L,
+ 80,
+ 0xf00000000L + 81L,
+ 83,
+ 0xf00000000L + 84L,
+ 86,
+ 0xf00000000L + 87L,
+ 89,
+ 0xf00000000L + 90L,
+ 92,
+ 0xf00000000L + 93L,
+ 95,
+ 0xf00000000L + 96L,
+ 98,
+ 0xf00000000L + 99L,
+ 101,
+ 0xf00000000L + 102L,
+ 104,
+ 0xf00000000L + 105L,
+ 107,
+ 0xf00000000L + 108L,
+ 110,
+ 0xf00000000L + 111L,
+ 113,
+ 0xf00000000L + 114L,
+ 116,
+ 0xf00000000L + 117L,
+ 119,
+ 0xf00000000L + 120L,
+ 122,
+ 0xf00000000L + 123L,
+ 125,
+ 0xf00000000L + 126L,
+ 128,
+ 0xf00000000L + 129L,
+ 131,
+ 0xf00000000L + 132L,
+ 134,
+ 0xf00000000L + 135L,
+ 137,
+ 0xf00000000L + 138L,
+ 140,
+ 0xf00000000L + 141L,
+ 143,
+ 0xf00000000L + 144L,
+ 146,
+ 0xf00000000L + 147L,
+ 149,
+ 0xf00000000L + 150L,
+ 152,
+ 0xf00000000L + 153L,
+ 155,
+ 0xf00000000L + 156L,
+ 158,
+ 0xf00000000L + 159L,
+ 161,
+ 0xf00000000L + 162L,
+ 164,
+ 0xf00000000L + 165L,
+ 167,
+ 0xf00000000L + 168L,
+ 170,
+ 0xf00000000L + 171L,
+ 173,
+ 0xf00000000L + 174L,
+ 176,
+ 0xf00000000L + 177L,
+ 179,
+ 0xf00000000L + 180L,
+ 182,
+ 0xf00000000L + 183L,
+ 185,
+ 0xf00000000L + 186L,
+ 188,
+ 0xf00000000L + 189L,
+ 191,
+ 0xf00000000L + 192L,
+ 194,
+ 0xf00000000L + 195L,
+ 197,
+ 0xf00000000L + 198L,
+ 200,
+ 0xf00000000L + 201L,
+ 203,
+ 0xf00000000L + 204L,
+ 206,
+ 0xf00000000L + 207L,
+ 209,
+ 0xf00000000L + 210L,
+ 212,
+ 0xf00000000L + 213L,
+ 215,
+ 0xf00000000L + 216L,
+ 218,
+ 0xf00000000L + 219L,
+ 221,
+ 0xf00000000L + 222L,
+ 224,
+ 0xf00000000L + 225L,
+ 227,
+ 0xf00000000L + 228L,
+ 230,
+ 0xf00000000L + 231L,
+ 233,
+ 0xf00000000L + 234L,
+ 236,
+ 0xf00000000L + 237L,
+ 239,
+ 0xf00000000L + 240L,
+ 242,
+ 0xf00000000L + 243L,
+ 245,
+ 0xf00000000L + 246L,
+ 248,
+ 0xf00000000L + 249L,
+ 251,
+ 0xf00000000L + 252L,
+ 254));
+ }
+
static void assertEquals(int expected, int actual) {
if (expected != actual) {
throw new AssertionError("Expected " + expected + " got " + actual);
@@ -281,3 +472,280 @@
int i7, long l7, float f7, double d7,
int i8, long l8, float f8, double d8);
}
+
+class CriticalSignatures {
+ // The following signatures exercise ARM argument moving and serve
+ // as an example of the optimizations performed by the assembler.
+ // Moving arguments is a lot simpler for other architectures.
+
+ // JNI compiler does not emit the CFG, so we cannot CHECK the "dissassembly (after)".
+
+ // vstm sp, {d0-d2} # f1, f2, f3, f4, d -- store floats as D regs together with double
+ // mov r4, r0 # hidden arg
+ // mov r0, r1 # i
+ // # l stays in r2-r3
+ @CriticalNative
+ public static native int nativeILFFFFD(
+ int i, long l, float f1, float f2, float f3, float f4, double d);
+
+ // vstm sp, {s1-s3} # f2, f3, f4 -- store floats up to alignment gap
+ // vstr d2, [sp, #16] # d
+ // mov r4, r0 # hidden arg
+ // mov r0, r2 # low(l)
+ // mov r1, r3 # high(l)
+ // ldr r2, [sp, #...] # i
+ // vmov r3, s0 # f1
+ @CriticalNative
+ public static native int nativeLIFFFFD(
+ long l, int i, float f1, float f2, float f3, float f4, double d);
+
+ // ldr ip, [sp, #...] # i
+ // str ip, [sp] # i
+ // add ip, sp, #4 # Spilling multiple floats at an offset from SP
+ // vstm ip, {s1-s5} # f2, f3, f4, d
+ // mov r4, r0 # hidden arg
+ // vmov r0, s0 # f1
+ // # l stays in r2-r3
+ @CriticalNative
+ public static native int nativeFLIFFFD(
+ float f1, long l, int i, float f2, float f3, float f4, double d);
+
+ // stm sp, {r1,r2,r3} # i1, i2, i3 -- store ints together
+ // ldrd r1, ip, [sp, #...] # i4, i5
+ // strd r1, ip, [sp, #12] # i4, i5
+ // ldr ip, [sp, #72] # i6
+ // str ip, [sp, #20] # i6
+ // mov r4, r0 # hidden arg
+ // vmov r0, r1, d0 # d1
+ // vmov r2, r3, d1 # d2
+ @CriticalNative
+ public static native int nativeDDIIIIII(
+ double d1, double d2, int i1, int i2, int i3, int i4, int i5, int i6);
+
+ // str r1, [sp] # i1 -- cannot store with l due to alignment gap
+ // strd r2, r3, [sp, #8] # l
+ // ldrd r1, ip, [sp, #...] # i2, i3
+ // strd r1, ip, [sp, #16] # i2, i3
+ // ldr ip, [sp, #...] # i4
+ // str ip, [sp, #24] # i4
+ // mov r4, r0 # hidden arg
+ // vmov r0, r1, d0 # d
+ // vmov r2, r3, d1 # f1, f2 -- move both floats together as double
+ @CriticalNative
+ public static native int nativeDFFILIII(
+ double d, float f1, float f2, int i1, long l, int i2, int i3, int i4);
+
+ // vstr s4, [sp] # f
+ // add ip, sp, #4 # Spilling multiple core registers at an offset from SP
+ // stm ip, {r1,r2,r3} # i1, l -- store int together with long
+ // ldrd r1, ip, [sp, #...] # i2, i3
+ // strd r1, ip, [sp, #16] # i2, i3
+ // ldr ip, [sp, #...] # i4
+ // str ip, [sp, #24] # i4
+ // mov r4, r0 # hidden arg
+ // vmov r0, r1, d0 # d1
+ // vmov r2, r3, d1 # d2
+ @CriticalNative
+ public static native int nativeDDFILIII(
+ double d1, double d2, float f, int i1, long l, int i2, int i3, int i4);
+
+ // str r1, [sp] # i1
+ // vstr s4, [sp, #4] # f
+ // strd r2, r3, [sp, #8] # i2, i3 -- store ints together with STRD
+ // mov r4, r0 # hidden arg
+ // vmov r0, r1, d0 # d1
+ // vmov r2, r3, d1 # d2
+ @CriticalNative
+ public static native int nativeDDIFII(
+ double d1, double d2, int i1, float f, int i2, int i3);
+
+ // ...
+ // ldr ip, [sp, #2112] # int
+ // str ip, [sp, #1000] # int
+ // add r1, sp, #2048 # Prepare to use LDRD for loading long from a large offset
+ // ldrd r1, ip, [r1, #68] # long
+ // strd r1, ip, [sp, #1008] # long
+ // ldr ip, [sp, #2124] # int
+ // str ip, [sp, #1016] # int
+ // ldr ip, [sp, #2128] # low(long) -- copy the next long as two words because the offset
+ // str ip, [sp, #1024] # low(long) -- is too large for STRD and we only use 2 temps (r1, ip)
+ // ldr ip, [sp, #2132] # high(long)
+ // str ip, [sp, #1028] # high(long)
+ // ...
+ @CriticalNative
+ public static native int nativeFullArgs(
+ // Note: Numbered by dalvik registers, 0-254 (max 255 regs for invoke-*-range)
+ //
+ // Generated by script (then modified to close the argument list):
+ // for i in {0..84}; do echo " long l$((i*3)),"; echo " int i$(($i*3+2)),"; done
+ long l0,
+ int i2,
+ long l3,
+ int i5,
+ long l6,
+ int i8,
+ long l9,
+ int i11,
+ long l12,
+ int i14,
+ long l15,
+ int i17,
+ long l18,
+ int i20,
+ long l21,
+ int i23,
+ long l24,
+ int i26,
+ long l27,
+ int i29,
+ long l30,
+ int i32,
+ long l33,
+ int i35,
+ long l36,
+ int i38,
+ long l39,
+ int i41,
+ long l42,
+ int i44,
+ long l45,
+ int i47,
+ long l48,
+ int i50,
+ long l51,
+ int i53,
+ long l54,
+ int i56,
+ long l57,
+ int i59,
+ long l60,
+ int i62,
+ long l63,
+ int i65,
+ long l66,
+ int i68,
+ long l69,
+ int i71,
+ long l72,
+ int i74,
+ long l75,
+ int i77,
+ long l78,
+ int i80,
+ long l81,
+ int i83,
+ long l84,
+ int i86,
+ long l87,
+ int i89,
+ long l90,
+ int i92,
+ long l93,
+ int i95,
+ long l96,
+ int i98,
+ long l99,
+ int i101,
+ long l102,
+ int i104,
+ long l105,
+ int i107,
+ long l108,
+ int i110,
+ long l111,
+ int i113,
+ long l114,
+ int i116,
+ long l117,
+ int i119,
+ long l120,
+ int i122,
+ long l123,
+ int i125,
+ long l126,
+ int i128,
+ long l129,
+ int i131,
+ long l132,
+ int i134,
+ long l135,
+ int i137,
+ long l138,
+ int i140,
+ long l141,
+ int i143,
+ long l144,
+ int i146,
+ long l147,
+ int i149,
+ long l150,
+ int i152,
+ long l153,
+ int i155,
+ long l156,
+ int i158,
+ long l159,
+ int i161,
+ long l162,
+ int i164,
+ long l165,
+ int i167,
+ long l168,
+ int i170,
+ long l171,
+ int i173,
+ long l174,
+ int i176,
+ long l177,
+ int i179,
+ long l180,
+ int i182,
+ long l183,
+ int i185,
+ long l186,
+ int i188,
+ long l189,
+ int i191,
+ long l192,
+ int i194,
+ long l195,
+ int i197,
+ long l198,
+ int i200,
+ long l201,
+ int i203,
+ long l204,
+ int i206,
+ long l207,
+ int i209,
+ long l210,
+ int i212,
+ long l213,
+ int i215,
+ long l216,
+ int i218,
+ long l219,
+ int i221,
+ long l222,
+ int i224,
+ long l225,
+ int i227,
+ long l228,
+ int i230,
+ long l231,
+ int i233,
+ long l234,
+ int i236,
+ long l237,
+ int i239,
+ long l240,
+ int i242,
+ long l243,
+ int i245,
+ long l246,
+ int i248,
+ long l249,
+ int i251,
+ long l252,
+ int i254);
+}