diff options
author | 2021-01-08 18:09:36 +0000 | |
---|---|---|
committer | 2021-01-25 09:22:46 +0000 | |
commit | e585964df42e9fd2fab6f209810cb03e1b261ab1 (patch) | |
tree | 35edc923e92389e4fcb0518289aacb72339f1247 | |
parent | 1bcd7cd823826e95827ea2d666291f56fcccb368 (diff) |
When entering nterp, take a fast path for instance calls with 1 argument.
Such methods only take 'this' as an argument and we don't need to fetch
the shorty.
We can make this optimization when doing nterp->compiled as a follow-up,
by checking that the next instruction after the invoke is not
move-result(-wide).
Test: test.py
Bug: 112676029
Change-Id: Ibc7b4d4ca1c636f4ad6572484e0990ccdbd63293
-rw-r--r-- | runtime/interpreter/mterp/arm64ng/main.S | 86 | ||||
-rw-r--r-- | runtime/interpreter/mterp/armng/main.S | 90 | ||||
-rw-r--r-- | runtime/interpreter/mterp/x86_64ng/main.S | 73 | ||||
-rw-r--r-- | tools/cpp-define-generator/art_method.def | 2 |
4 files changed, 121 insertions, 130 deletions
diff --git a/runtime/interpreter/mterp/arm64ng/main.S b/runtime/interpreter/mterp/arm64ng/main.S index b6d9db696f..a977a90db8 100644 --- a/runtime/interpreter/mterp/arm64ng/main.S +++ b/runtime/interpreter/mterp/arm64ng/main.S @@ -383,7 +383,7 @@ END \name // // Outputs // - ip contains the dex registers size -// - x13 contains the old stack pointer. +// - x28 contains the old stack pointer. // - \code_item is replaced with a pointer to the instructions // - if load_ins is 1, w15 contains the ins // @@ -410,10 +410,10 @@ END \name add \fp, \refs, ip, lsl #2 // Now setup the stack pointer. - mov x13, sp - .cfi_def_cfa_register x13 + mov x28, sp + .cfi_def_cfa_register x28 mov sp, x14 - str x13, [\refs, #-8] + str x28, [\refs, #-8] CFI_DEF_CFA_BREG_PLUS_UCONST \cfi_refs, -8, CALLEE_SAVES_SIZE // Put nulls in reference frame. @@ -483,14 +483,10 @@ END \name .endm .macro SPILL_ALL_ARGUMENTS - INCREASE_FRAME 128 - // GP arguments. - SAVE_TWO_REGS x0, x1, 0 - SAVE_TWO_REGS x2, x3, 16 - SAVE_TWO_REGS x4, x5, 32 - SAVE_TWO_REGS x6, x7, 48 - - // FP arguments + stp x0, x1, [sp, #-128]! + stp x2, x3, [sp, #16] + stp x4, x5, [sp, #32] + stp x6, x7, [sp, #48] stp d0, d1, [sp, #64] stp d2, d3, [sp, #80] stp d4, d5, [sp, #96] @@ -498,18 +494,14 @@ END \name .endm .macro RESTORE_ALL_ARGUMENTS - // GP arguments. - RESTORE_TWO_REGS x0, x1, 0 - RESTORE_TWO_REGS x2, x3, 16 - RESTORE_TWO_REGS x4, x5, 32 - RESTORE_TWO_REGS x6, x7, 48 - - // FP arguments + ldp x2, x3, [sp, #16] + ldp x4, x5, [sp, #32] + ldp x6, x7, [sp, #48] ldp d0, d1, [sp, #64] ldp d2, d3, [sp, #80] ldp d4, d5, [sp, #96] ldp d6, d7, [sp, #112] - DECREASE_FRAME 128 + ldp x0, x1, [sp], #128 .endm // Helper to setup the stack after doing a nterp to nterp call. This will setup: @@ -1356,16 +1348,7 @@ OAT_ENTRY ExecuteNterpImpl, EndExecuteNterpImpl /* Spill callee save regs */ SPILL_ALL_CALLEE_SAVES - // TODO: Get shorty in a better way and remove below - SPILL_ALL_ARGUMENTS - - bl NterpGetShorty - // Save shorty in callee-save xIBASE. - mov xIBASE, x0 - - RESTORE_ALL_ARGUMENTS ldr xPC, [x0, #ART_METHOD_DATA_OFFSET_64] - // Setup the stack for executing the method. SETUP_STACK_FRAME xPC, xREFS, xFP, CFI_REFS, load_ins=1 @@ -1373,24 +1356,37 @@ OAT_ENTRY ExecuteNterpImpl, EndExecuteNterpImpl cbz w15, .Lxmm_setup_finished sub ip2, ip, x15 - lsl x8, ip2, #2 // x8 is now the offset for inputs into the registers array. + ldr w26, [x0, #ART_METHOD_ACCESS_FLAGS_OFFSET] + lsl x21, ip2, #2 // x21 is now the offset for inputs into the registers array. + + // If the method is not static and there is one argument ('this'), we don't need to fetch the + // shorty. + tbnz w26, #ART_METHOD_IS_STATIC_FLAG_BIT, .Lsetup_with_shorty + str w1, [xFP, x21] + str w1, [xREFS, x21] + cmp w15, #1 + b.eq .Lxmm_setup_finished + +.Lsetup_with_shorty: + // TODO: Get shorty in a better way and remove below + SPILL_ALL_ARGUMENTS + bl NterpGetShorty + // Save shorty in callee-save xIBASE. + mov xIBASE, x0 + RESTORE_ALL_ARGUMENTS - // Setup shorty, pointer to inputs in FP and pointer to inputs in REFS - add x9, xIBASE, #1 // shorty + 1 ; ie skip return arg character - add x10, xFP, x8 - add x11, xREFS, x8 - - ldr wip, [x0, #ART_METHOD_ACCESS_FLAGS_OFFSET] - // TODO: could be TBNZ but we'd need a constant for log2(ART_METHOD_IS_STATIC_FLAG). - tst wip, #ART_METHOD_IS_STATIC_FLAG - b.ne .Lhandle_static_method - str w1, [x10], #4 - str w1, [x11], #4 - add x13, x13, #4 + // Setup pointer to inputs in FP and pointer to inputs in REFS + add x10, xFP, x21 + add x11, xREFS, x21 mov x12, #0 + + add x9, xIBASE, #1 // shorty + 1 ; ie skip return arg character + tbnz w26, #ART_METHOD_IS_STATIC_FLAG_BIT, .Lhandle_static_method + add x10, x10, #4 + add x11, x11, #4 + add x28, x28, #4 b .Lcontinue_setup_gprs .Lhandle_static_method: - mov x12, #0 LOOP_OVER_SHORTY_STORING_GPRS x1, w1, x9, x12, x10, x11, .Lgpr_setup_finished .Lcontinue_setup_gprs: LOOP_OVER_SHORTY_STORING_GPRS x2, w2, x9, x12, x10, x11, .Lgpr_setup_finished @@ -1399,7 +1395,7 @@ OAT_ENTRY ExecuteNterpImpl, EndExecuteNterpImpl LOOP_OVER_SHORTY_STORING_GPRS x5, w5, x9, x12, x10, x11, .Lgpr_setup_finished LOOP_OVER_SHORTY_STORING_GPRS x6, w6, x9, x12, x10, x11, .Lgpr_setup_finished LOOP_OVER_SHORTY_STORING_GPRS x7, w7, x9, x12, x10, x11, .Lgpr_setup_finished - LOOP_OVER_INTs x9, x12, x10, x11, x13, .Lgpr_setup_finished + LOOP_OVER_INTs x9, x12, x10, x11, x28, .Lgpr_setup_finished .Lgpr_setup_finished: add x9, xIBASE, #1 // shorty + 1 ; ie skip return arg character mov x12, #0 // reset counter @@ -1411,7 +1407,7 @@ OAT_ENTRY ExecuteNterpImpl, EndExecuteNterpImpl LOOP_OVER_SHORTY_STORING_FPS d5, s5, x9, x12, x10, .Lxmm_setup_finished LOOP_OVER_SHORTY_STORING_FPS d6, s6, x9, x12, x10, .Lxmm_setup_finished LOOP_OVER_SHORTY_STORING_FPS d7, s7, x9, x12, x10, .Lxmm_setup_finished - LOOP_OVER_FPs x9, x12, x10, x13, .Lxmm_setup_finished + LOOP_OVER_FPs x9, x12, x10, x28, .Lxmm_setup_finished .Lxmm_setup_finished: CFI_DEFINE_DEX_PC_WITH_OFFSET(CFI_TMP, CFI_DEX, 0) diff --git a/runtime/interpreter/mterp/armng/main.S b/runtime/interpreter/mterp/armng/main.S index 7095f58e53..d2ca06f37d 100644 --- a/runtime/interpreter/mterp/armng/main.S +++ b/runtime/interpreter/mterp/armng/main.S @@ -505,31 +505,6 @@ END \name .endif .endm -.macro SPILL_ALL_ARGUMENTS - // We spill r4 for stack alignment. - push {r0-r4} - .cfi_adjust_cfa_offset 20 - .cfi_rel_offset r0, 0 - .cfi_rel_offset r1, 4 - .cfi_rel_offset r2, 8 - .cfi_rel_offset r3, 12 - .cfi_rel_offset r4, 16 - vpush {s0-s15} - .cfi_adjust_cfa_offset 64 -.endm - -.macro RESTORE_ALL_ARGUMENTS - vpop {s0-s15} - .cfi_adjust_cfa_offset -64 - pop {r0-r4} - .cfi_restore r0 - .cfi_restore r1 - .cfi_restore r2 - .cfi_restore r3 - .cfi_restore r4 - .cfi_adjust_cfa_offset -20 -.endm - // Helper to setup the stack after doing a nterp to nterp call. This will setup: // - rNEW_FP: the new pointer to dex registers // - rNEW_REFS: the new pointer to references @@ -1396,15 +1371,6 @@ OAT_ENTRY ExecuteNterpImpl, EndExecuteNterpImpl /* Spill callee save regs */ SPILL_ALL_CALLEE_SAVES - // TODO: Get shorty in a better way and remove below - SPILL_ALL_ARGUMENTS - - bl NterpGetShorty - // Save shorty in callee-save rIBASE. - mov rIBASE, r0 - - RESTORE_ALL_ARGUMENTS - ldr rPC, [r0, #ART_METHOD_DATA_OFFSET_32] // Setup the stack for executing the method. @@ -1414,36 +1380,54 @@ OAT_ENTRY ExecuteNterpImpl, EndExecuteNterpImpl cmp r4, #0 beq .Lxmm_setup_finished - sub r4, rINST, r4 - lsl r4, r4, #2 // r4 is now the offset for inputs into the registers array. + sub rINST, rINST, r4 + ldr r8, [r0, #ART_METHOD_ACCESS_FLAGS_OFFSET] + lsl rINST, rINST, #2 // rINST is now the offset for inputs into the registers array. + mov rIBASE, ip // rIBASE contains the old stack pointer + + // If the method is not static and there is one argument ('this'), we don't need to fetch the + // shorty. + tst r8, #ART_METHOD_IS_STATIC_FLAG + bne .Lsetup_with_shorty + str r1, [rFP, rINST] + str r1, [rREFS, rINST] + cmp r4, #1 + beq .Lxmm_setup_finished - mov lr, ip // lr contains the old stack pointer +.Lsetup_with_shorty: + // Save arguments that were passed before calling into the runtime. + // No need to save r0 (ArtMethod) as we're not using it later in this code. + // Save r4 for stack aligment. + // TODO: Get shorty in a better way and remove below + push {r1-r4} + vpush {s0-s15} + bl NterpGetShorty + vpop {s0-s15} + pop {r1-r4} - ldr ip, [r0, #ART_METHOD_ACCESS_FLAGS_OFFSET] - // r0 is now available. + mov ip, r8 + add r8, rREFS, rINST + add r7, rFP, rINST + mov r4, #0 // Setup shorty, pointer to inputs in FP and pointer to inputs in REFS - add r0, rIBASE, #1 // shorty + 1 ; ie skip return arg character - add r7, rFP, r4 - add r8, rREFS, r4 + add lr, r0, #1 // shorty + 1 ; ie skip return arg character tst ip, #ART_METHOD_IS_STATIC_FLAG bne .Lhandle_static_method - str r1, [r7], #4 - str r1, [r8], #4 - add lr, lr, #4 - mov r4, #0 + add r7, r7, #4 + add r8, r8, #4 + add rIBASE, rIBASE, #4 b .Lcontinue_setup_gprs .Lhandle_static_method: - mov r4, #0 - LOOP_OVER_SHORTY_STORING_GPRS r1, r0, r4, r7, r8, .Lgpr_setup_finished, .Lif_long, is_r3=0 + LOOP_OVER_SHORTY_STORING_GPRS r1, lr, r4, r7, r8, .Lgpr_setup_finished, .Lif_long, is_r3=0 .Lcontinue_setup_gprs: - LOOP_OVER_SHORTY_STORING_GPRS r2, r0, r4, r7, r8, .Lgpr_setup_finished, .Lif_long, is_r3=0 - LOOP_OVER_SHORTY_STORING_GPRS r3, r0, r4, r7, r8, .Lgpr_setup_finished, .Lif_long, is_r3=1 + LOOP_OVER_SHORTY_STORING_GPRS r2, lr, r4, r7, r8, .Lgpr_setup_finished, .Lif_long, is_r3=0 + LOOP_OVER_SHORTY_STORING_GPRS r3, lr, r4, r7, r8, .Lgpr_setup_finished, .Lif_long, is_r3=1 .Lif_long: - LOOP_OVER_INTs r0, r4, r7, r8, lr, ip, r1, .Lgpr_setup_finished + LOOP_OVER_INTs lr, r4, r7, r8, rIBASE, ip, r1, .Lgpr_setup_finished .Lgpr_setup_finished: - add r0, rIBASE, #1 // shorty + 1 ; ie skip return arg character + add r0, r0, #1 // shorty + 1 ; ie skip return arg character mov r1, r7 - add r2, lr, #OFFSET_TO_FIRST_ARGUMENT_IN_STACK + add r2, rIBASE, #OFFSET_TO_FIRST_ARGUMENT_IN_STACK vpush {s0-s15} mov r3, sp bl NterpStoreArm32Fprs diff --git a/runtime/interpreter/mterp/x86_64ng/main.S b/runtime/interpreter/mterp/x86_64ng/main.S index 20dc760ab3..02f0c5ad79 100644 --- a/runtime/interpreter/mterp/x86_64ng/main.S +++ b/runtime/interpreter/mterp/x86_64ng/main.S @@ -1450,17 +1450,40 @@ OAT_ENTRY ExecuteNterpImpl, EndExecuteNterpImpl /* Spill callee save regs */ SPILL_ALL_CALLEE_SAVES + movq ART_METHOD_DATA_OFFSET_64(%rdi), rPC + + // Setup the stack for executing the method. + SETUP_STACK_FRAME rPC, rREFS, rREFS32, rFP, CFI_REFS, load_ins=1 + + // Setup the parameters + testl %r14d, %r14d + je .Lxmm_setup_finished + + subq %r14, %rbx + salq $$2, %rbx // rbx is now the offset for inputs into the registers array. + + // If the method is not static and there is one argument ('this'), we don't need to fetch the + // shorty. + testl $$ART_METHOD_IS_STATIC_FLAG, ART_METHOD_ACCESS_FLAGS_OFFSET(%rdi) + jne .Lsetup_with_shorty + + movl %esi, (rFP, %rbx) + movl %esi, (rREFS, %rbx) + + cmpl $$1, %r14d + je .Lxmm_setup_finished + +.Lsetup_with_shorty: // TODO: Get shorty in a better way and remove below - PUSH rdi - PUSH rsi - PUSH rdx - PUSH rcx - PUSH r8 - PUSH r9 + push %rdi + push %rsi + push %rdx + push %rcx + push %r8 + push %r9 // Save xmm registers + alignment. subq MACRO_LITERAL(8 * 8 + 8), %rsp - CFI_ADJUST_CFA_OFFSET(8 * 8 + 8) movq %xmm0, 0(%rsp) movq %xmm1, 8(%rsp) movq %xmm2, 16(%rsp) @@ -1484,45 +1507,31 @@ OAT_ENTRY ExecuteNterpImpl, EndExecuteNterpImpl movq 48(%rsp), %xmm6 movq 56(%rsp), %xmm7 addq MACRO_LITERAL(8 * 8 + 8), %rsp - CFI_ADJUST_CFA_OFFSET(-8 * 8 - 8) - - POP r9 - POP r8 - POP rcx - POP rdx - POP rsi - POP rdi - // TODO: Get shorty in a better way and remove above - - movq ART_METHOD_DATA_OFFSET_64(%rdi), rPC - - // Setup the stack for executing the method. - SETUP_STACK_FRAME rPC, rREFS, rREFS32, rFP, CFI_REFS, load_ins=1 - - // Setup the parameters - testl %r14d, %r14d - je .Lxmm_setup_finished - subq %r14, %rbx - salq $$2, %rbx // rbx is now the offset for inputs into the registers array. + pop %r9 + pop %r8 + pop %rcx + pop %rdx + pop %rsi + pop %rdi + // Reload the old stack pointer, which used to be stored in %r11, which is not callee-saved. + movq -8(rREFS), %r11 + // TODO: Get shorty in a better way and remove above + movq $$0, %r14 testl $$ART_METHOD_IS_STATIC_FLAG, ART_METHOD_ACCESS_FLAGS_OFFSET(%rdi) - // Available: rdi, r10, r14 + // Available: rdi, r10 // Note the leaq below don't change the flags. leaq 1(%rbp), %r10 // shorty + 1 ; ie skip return arg character leaq (rFP, %rbx, 1), %rdi leaq (rREFS, %rbx, 1), %rbx jne .Lhandle_static_method - movl %esi, (%rdi) - movl %esi, (%rbx) addq $$4, %rdi addq $$4, %rbx addq $$4, %r11 - movq $$0, %r14 jmp .Lcontinue_setup_gprs .Lhandle_static_method: - movq $$0, %r14 LOOP_OVER_SHORTY_STORING_GPRS rsi, esi, r10, r14, rdi, rbx, .Lgpr_setup_finished .Lcontinue_setup_gprs: LOOP_OVER_SHORTY_STORING_GPRS rdx, edx, r10, r14, rdi, rbx, .Lgpr_setup_finished diff --git a/tools/cpp-define-generator/art_method.def b/tools/cpp-define-generator/art_method.def index 7b5606f935..c2e18b1913 100644 --- a/tools/cpp-define-generator/art_method.def +++ b/tools/cpp-define-generator/art_method.def @@ -25,6 +25,8 @@ ASM_DEFINE(ART_METHOD_IS_STATIC_FLAG, art::kAccStatic) ASM_DEFINE(ART_METHOD_IMT_MASK, art::ImTable::kSizeTruncToPowerOfTwo - 1) +ASM_DEFINE(ART_METHOD_IS_STATIC_FLAG_BIT, + art::MostSignificantBit(art::kAccStatic)) ASM_DEFINE(ART_METHOD_DECLARING_CLASS_OFFSET, art::ArtMethod::DeclaringClassOffset().Int32Value()) ASM_DEFINE(ART_METHOD_JNI_OFFSET_32, |