diff options
| -rw-r--r-- | runtime/arch/stub_test.cc | 149 |
1 files changed, 71 insertions, 78 deletions
diff --git a/runtime/arch/stub_test.cc b/runtime/arch/stub_test.cc index 44edd4b076..0b7f268c5e 100644 --- a/runtime/arch/stub_test.cc +++ b/runtime/arch/stub_test.cc @@ -125,27 +125,24 @@ class StubTest : public CommonRuntimeTest { : ); // clobber. #elif defined(__aarch64__) __asm__ __volatile__( - // Spill space for d8 - d15 + // Spill x0-x7 which we say we don't clobber. May contain args. "sub sp, sp, #64\n\t" ".cfi_adjust_cfa_offset 64\n\t" - "stp d8, d9, [sp]\n\t" - "stp d10, d11, [sp, #16]\n\t" - "stp d12, d13, [sp, #32]\n\t" - "stp d14, d15, [sp, #48]\n\t" + "stp x0, x1, [sp]\n\t" + "stp x2, x3, [sp, #16]\n\t" + "stp x4, x5, [sp, #32]\n\t" + "stp x6, x7, [sp, #48]\n\t" - "sub sp, sp, #48\n\t" // Reserve stack space, 16B aligned - ".cfi_adjust_cfa_offset 48\n\t" - "stp %[referrer], x1, [sp]\n\t"// referrer, x1 - "stp x2, x3, [sp, #16]\n\t" // Save x2, x3 - "stp x18, x30, [sp, #32]\n\t" // Save x18(xSELF), xLR + "sub sp, sp, #16\n\t" // Reserve stack space, 16B aligned + ".cfi_adjust_cfa_offset 16\n\t" + "str %[referrer], [sp]\n\t" // referrer // Push everything on the stack, so we don't rely on the order. What a mess. :-( "sub sp, sp, #48\n\t" ".cfi_adjust_cfa_offset 48\n\t" - "str %[arg0], [sp]\n\t" - "str %[arg1], [sp, #8]\n\t" - "str %[arg2], [sp, #16]\n\t" - "str %[code], [sp, #24]\n\t" + // All things are "r" constraints, so direct str/stp should work. + "stp %[arg0], %[arg1], [sp]\n\t" + "stp %[arg2], %[code], [sp, #16]\n\t" "str %[self], [sp, #32]\n\t" // Now we definitely have x0-x3 free, use it to garble d8 - d15 @@ -169,17 +166,18 @@ class StubTest : public CommonRuntimeTest { "add x0, x0, 1\n\t" "fmov d15, x0\n\t" - // Load call params - "ldr x0, [sp]\n\t" - "ldr x1, [sp, #8]\n\t" - "ldr x2, [sp, #16]\n\t" - "ldr x3, [sp, #24]\n\t" + // Load call params into the right registers. + "ldp x0, x1, [sp]\n\t" + "ldp x2, x3, [sp, #16]\n\t" "ldr x18, [sp, #32]\n\t" "add sp, sp, #48\n\t" ".cfi_adjust_cfa_offset -48\n\t" "blr x3\n\t" // Call the stub + "mov x8, x0\n\t" // Store result + "add sp, sp, #16\n\t" // Drop the quick "frame" + ".cfi_adjust_cfa_offset -16\n\t" // Test d8 - d15. We can use x1 and x2. "movk x1, #0xfad0\n\t" @@ -225,31 +223,25 @@ class StubTest : public CommonRuntimeTest { "cmp x1, x2\n\t" "b.ne 1f\n\t" - "mov x2, #0\n\t" - "str x2, %[fpr_result]\n\t" + "mov x9, #0\n\t" // Use x9 as flag, in clobber list // Finish up. "2:\n\t" - "ldp x1, x2, [sp, #8]\n\t" // Restore x1, x2 - "ldp x3, x18, [sp, #24]\n\t" // Restore x3, xSELF - "ldr x30, [sp, #40]\n\t" // Restore xLR - "add sp, sp, #48\n\t" // Free stack space - ".cfi_adjust_cfa_offset -48\n\t" - "mov %[result], x0\n\t" // Save the result - - "ldp d8, d9, [sp]\n\t" // Restore d8 - d15 - "ldp d10, d11, [sp, #16]\n\t" - "ldp d12, d13, [sp, #32]\n\t" - "ldp d14, d15, [sp, #48]\n\t" - "add sp, sp, #64\n\t" + "ldp x0, x1, [sp]\n\t" // Restore stuff not named clobbered, may contain fpr_result + "ldp x2, x3, [sp, #16]\n\t" + "ldp x4, x5, [sp, #32]\n\t" + "ldp x6, x7, [sp, #48]\n\t" + "add sp, sp, #64\n\t" // Free stack space, now sp as on entry ".cfi_adjust_cfa_offset -64\n\t" + "str x9, %[fpr_result]\n\t" // Store the FPR comparison result + "mov %[result], x8\n\t" // Store the call result + "b 3f\n\t" // Goto end // Failed fpr verification. "1:\n\t" - "mov x2, #1\n\t" - "str x2, %[fpr_result]\n\t" + "mov x9, #1\n\t" "b 2b\n\t" // Goto finish-up // End @@ -258,7 +250,12 @@ class StubTest : public CommonRuntimeTest { // Use the result from r0 : [arg0] "0"(arg0), [arg1] "r"(arg1), [arg2] "r"(arg2), [code] "r"(code), [self] "r"(self), [referrer] "r"(referrer), [fpr_result] "m" (fpr_result) - : "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17"); // clobber. + : "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x18", "x19", "x20", + "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "x30", + "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", + "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15", + "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23", + "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31"); // clobber. #elif defined(__x86_64__) // Note: Uses the native convention // TODO: Set the thread? @@ -351,29 +348,25 @@ class StubTest : public CommonRuntimeTest { : ); // clobber. #elif defined(__aarch64__) __asm__ __volatile__( - // Spill space for d8 - d15 + // Spill x0-x7 which we say we don't clobber. May contain args. "sub sp, sp, #64\n\t" ".cfi_adjust_cfa_offset 64\n\t" - "stp d8, d9, [sp]\n\t" - "stp d10, d11, [sp, #16]\n\t" - "stp d12, d13, [sp, #32]\n\t" - "stp d14, d15, [sp, #48]\n\t" + "stp x0, x1, [sp]\n\t" + "stp x2, x3, [sp, #16]\n\t" + "stp x4, x5, [sp, #32]\n\t" + "stp x6, x7, [sp, #48]\n\t" - "sub sp, sp, #48\n\t" // Reserve stack space, 16B aligned - ".cfi_adjust_cfa_offset 48\n\t" - "stp %[referrer], x1, [sp]\n\t"// referrer, x1 - "stp x2, x3, [sp, #16]\n\t" // Save x2, x3 - "stp x18, x30, [sp, #32]\n\t" // Save x18(xSELF), xLR + "sub sp, sp, #16\n\t" // Reserve stack space, 16B aligned + ".cfi_adjust_cfa_offset 16\n\t" + "str %[referrer], [sp]\n\t" // referrer // Push everything on the stack, so we don't rely on the order. What a mess. :-( "sub sp, sp, #48\n\t" ".cfi_adjust_cfa_offset 48\n\t" - "str %[arg0], [sp]\n\t" - "str %[arg1], [sp, #8]\n\t" - "str %[arg2], [sp, #16]\n\t" - "str %[code], [sp, #24]\n\t" - "str %[self], [sp, #32]\n\t" - "str %[hidden], [sp, #40]\n\t" + // All things are "r" constraints, so direct str/stp should work. + "stp %[arg0], %[arg1], [sp]\n\t" + "stp %[arg2], %[code], [sp, #16]\n\t" + "stp %[self], %[hidden], [sp, #32]\n\t" // Now we definitely have x0-x3 free, use it to garble d8 - d15 "movk x0, #0xfad0\n\t" @@ -396,18 +389,17 @@ class StubTest : public CommonRuntimeTest { "add x0, x0, 1\n\t" "fmov d15, x0\n\t" - // Load call params - "ldr x0, [sp]\n\t" - "ldr x1, [sp, #8]\n\t" - "ldr x2, [sp, #16]\n\t" - "ldr x3, [sp, #24]\n\t" - "ldr x18, [sp, #32]\n\t" - "ldr x12, [sp, #40]\n\t" + // Load call params into the right registers. + "ldp x0, x1, [sp]\n\t" + "ldp x2, x3, [sp, #16]\n\t" + "ldp x18, x12, [sp, #32]\n\t" "add sp, sp, #48\n\t" ".cfi_adjust_cfa_offset -48\n\t" - "blr x3\n\t" // Call the stub + "mov x8, x0\n\t" // Store result + "add sp, sp, #16\n\t" // Drop the quick "frame" + ".cfi_adjust_cfa_offset -16\n\t" // Test d8 - d15. We can use x1 and x2. "movk x1, #0xfad0\n\t" @@ -453,38 +445,39 @@ class StubTest : public CommonRuntimeTest { "cmp x1, x2\n\t" "b.ne 1f\n\t" - "mov %[fpr_result], #0\n\t" + "mov x9, #0\n\t" // Use x9 as flag, in clobber list // Finish up. "2:\n\t" - "ldp x1, x2, [sp, #8]\n\t" // Restore x1, x2 - "ldp x3, x18, [sp, #24]\n\t" // Restore x3, xSELF - "ldr x30, [sp, #40]\n\t" // Restore xLR - "add sp, sp, #48\n\t" // Free stack space - ".cfi_adjust_cfa_offset -48\n\t" - "mov %[result], x0\n\t" // Save the result - - "ldp d8, d9, [sp]\n\t" // Restore d8 - d15 - "ldp d10, d11, [sp, #16]\n\t" - "ldp d12, d13, [sp, #32]\n\t" - "ldp d14, d15, [sp, #48]\n\t" - "add sp, sp, #64\n\t" + "ldp x0, x1, [sp]\n\t" // Restore stuff not named clobbered, may contain fpr_result + "ldp x2, x3, [sp, #16]\n\t" + "ldp x4, x5, [sp, #32]\n\t" + "ldp x6, x7, [sp, #48]\n\t" + "add sp, sp, #64\n\t" // Free stack space, now sp as on entry ".cfi_adjust_cfa_offset -64\n\t" + "str x9, %[fpr_result]\n\t" // Store the FPR comparison result + "mov %[result], x8\n\t" // Store the call result + "b 3f\n\t" // Goto end // Failed fpr verification. "1:\n\t" - "mov %[fpr_result], #1\n\t" + "mov x9, #1\n\t" "b 2b\n\t" // Goto finish-up // End "3:\n\t" - : [result] "=r" (result), [fpr_result] "=r" (fpr_result) - // Use the result from r0 + : [result] "=r" (result) + // Use the result from r0 : [arg0] "0"(arg0), [arg1] "r"(arg1), [arg2] "r"(arg2), [code] "r"(code), [self] "r"(self), - [referrer] "r"(referrer), [hidden] "r"(hidden) - : "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17"); // clobber. + [referrer] "r"(referrer), [hidden] "r"(hidden), [fpr_result] "m" (fpr_result) + : "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x18", "x19", "x20", + "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "x30", + "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", + "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15", + "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23", + "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31"); // clobber. #elif defined(__x86_64__) // Note: Uses the native convention // TODO: Set the thread? |