| // Theory of operation. These invoke-X opcodes bounce to code labels in main.S which attempt a |
| // variety of fast paths; the full asm doesn't fit in the per-opcode handler's size limit. |
| // |
| // Calling convention. There are three argument transfer types. |
| // (A) Managed ABI -> Nterp. The ExecuteNterpImpl handles this case. We set up a fresh nterp frame |
| // and move arguments from machine arg registers (and sometimes stack) into the frame. |
| // (B) Nterp -> Nterp. An invoke op's fast path handles this case. If we can stay in nterp, then |
| // we set up a fresh nterp frame, and copy the register slots from caller to callee. |
| // (C) Nterp -> Managed ABI. Invoke op's remaining cases. To leave nterp, we read out arguments from |
| // the caller's nterp frame and place them into machine arg registers (and sometimes stack). |
| // Doing so requires obtaining and deciphering the method's shorty for arg type, width, and |
| // order info. |
| // |
| // Fast path structure. |
| // (0) If the next method's "quick code" is nterp, then set up a fresh nterp frame and perform a |
| // vreg->vreg transfer. Jump to handler for the next method's first opcode. |
| // - The following paths leave nterp. - |
| // (1) If the next method is guaranteed to be only object refs, then the managed ABI is very simple: |
| // just place all arguments in the native arg registers using LWU. Call the quick code. |
| // (2) The next method might have an arg/return shape that can avoid the shorty, or at least avoid |
| // most complications of the managed ABI arg setup. |
| // (2.1) If the next method has 0 args, then peek ahead in dex: if no scalar return, then call the |
| // quick code. (Even when the next opcode is move-result-object, nterp will expect the |
| // reference at a0, matching where the managed ABI leaves it after the call.) |
| // (2.2) If the next method has 0 args and scalar return, or has 1 arg, then obtain the shorty. |
| // (2.2.1) Post-shorty: if 0 args, call the quick code. (After the call, a returned float must be |
| // copied from fa0 into a0.) |
| // (2.2.2) Post-shorty: check the arg's shorty type. If 'L', we must load it with LWU. Otherwise, we |
| // load it with LW and store a copy into FA0 (to avoid another branch). Call the quick code. |
| // - The fully pessimistic case. - |
| // (3) The next method has 2+ arguments with a mix of float/double/long, OR it is polymorphic OR |
| // custom. Obtain the shorty and perform the full setup for managed ABI. Polymorphic and |
| // custom invokes are specially shunted to the runtime. Otherwise we call the quick code. |
| // |
| // Code organization. These functions are organized in a three tier structure to aid readability. |
| // (P) The "front end" is an opcode handler, such as op_invoke_virtual(). They are defined in |
| // invoke.S. Since all the invoke code cannot fit in the allotted handler region, every invoke |
| // handler has code extending into a "back end". |
| // (Q) The opcode handler calls a "back end" label that is located in main.S. The code for that |
| // label is defined in invoke.S. As a convention, the label in main.S is NterpInvokeVirtual. The |
| // code in invoke.S is nterp_invoke_virtual(). |
| // (R) For the Nterp to Nterp fast path case, the back end calls a label located in main.S, the code |
| // for which is defined in invoke.S. As a convention, the label in main.S is |
| // NterpToNterpInstance, and the code in invoke.S is nterp_to_nterp_instance(). |
| // Helpers for each tier are placed just after the functions of each tier. |
| |
| // |
| // invoke-kind {vC, vD, vE, vF, vG}, meth@BBBB |
| // Format 35c: A|G|op BBBB F|E|D|C |
| // |
| |
| // invoke-virtual {vC, vD, vE, vF, vG}, meth@BBBB |
| // Format 35c: A|G|6e BBBB F|E|D|C |
| // |
| // Note: invoke-virtual is used to invoke a normal virtual method (a method that is not private, |
| // static, or final, and is also not a constructor). |
| %def op_invoke_virtual(range=""): |
| EXPORT_PC |
| FETCH s7, count=2 // s7 := F|E|D|C or CCCC (range) |
| FETCH_FROM_THREAD_CACHE a0, /*slow path*/2f, t0, t1 |
| // a0 := method idx of resolved virtual method |
| 1: |
| % fetch_receiver(reg="a1", vreg="s7", range=range) |
| // a1 := fp[C] (this) |
| // Note: null case handled by SEGV handler. |
| lwu t0, MIRROR_OBJECT_CLASS_OFFSET(a1) |
| // t0 := klass object (32-bit addr) |
| UNPOISON_HEAP_REF t0 |
| // Entry address = entry's byte offset in vtable + vtable's byte offset in klass object. |
| sh3add a0, a0, t0 // a0 := entry's byte offset |
| ld a0, MIRROR_CLASS_VTABLE_OFFSET_64(a0) |
| // a0 := ArtMethod* |
| tail NterpInvokeVirtual${range} // args a0, a1, s7 |
| 2: |
| % resolve_method_into_a0() |
| j 1b |
| |
| |
| // invoke-super {vC, vD, vE, vF, vG}, meth@BBBB |
| // Format 35c: A|G|6f BBBB F|E|D|C |
| // |
| // Note: When the method_id references a method of a non-interface class, invoke-super is used to |
| // invoke the closest superclass's virtual method (as opposed to the one with the same method_id in |
| // the calling class). |
| // Note: In Dex files version 037 or later, if the method_id refers to an interface method, |
| // invoke-super is used to invoke the most specific, non-overridden version of that method defined |
| // on that interface. The same method restrictions hold as for invoke-virtual. In Dex files prior to |
| // version 037, having an interface method_id is illegal and undefined. |
| %def op_invoke_super(range=""): |
| EXPORT_PC |
| FETCH s7, count=2 // s7 := F|E|D|C or CCCC (range) |
| FETCH_FROM_THREAD_CACHE a0, /*slow path*/2f, t0, t1 |
| // a0 := ArtMethod* |
| 1: |
| % fetch_receiver(reg="a1", vreg="s7", range=range) |
| // a1 := fp[C] (this) |
| beqz a1, 3f // throw if null |
| tail NterpInvokeSuper${range} // args a0, a1, s7 |
| 2: |
| % resolve_method_into_a0() |
| j 1b |
| 3: |
| tail common_errNullObject |
| |
| |
| // invoke-direct {vC, vD, vE, vF, vG}, meth@BBBB |
| // Format 35c: A|G|70 BBBB F|E|D|C |
| // |
| // Note: invoke-direct is used to invoke a non-static direct method (that is, an instance method |
| // that is by its nature non-overridable, namely either a private instance method or a constructor). |
| // |
| // For additional context on string init, see b/28555675. The object reference is replaced after |
| // the string factory call, so we disable thread-caching the resolution of string init, and skip |
| // fast paths out to managed ABI calls. |
| %def op_invoke_direct(range=""): |
| EXPORT_PC |
| FETCH s7, count=2 // s7 := F|E|D|C or CCCC (range) |
| FETCH_FROM_THREAD_CACHE a0, /*slow path*/2f, t0, t1 |
| // a0 := ArtMethod*, never String.<init> |
| 1: |
| % fetch_receiver(reg="a1", vreg="s7", range=range) |
| // a1 := fp[C] (this) |
| beqz a1, 3f // throw if null |
| tail NterpInvokeDirect${range} // args a0, a1, s7 |
| 2: |
| % resolve_method_into_a0() # a0 := ArtMethod* or String.<init> |
| and t0, a0, 0x1 // t0 := string-init bit |
| beqz t0, 1b // not string init |
| and a0, a0, ~0x1 // clear string-init bit |
| tail NterpInvokeStringInit${range} // args a0, s7 |
| 3: |
| tail common_errNullObject |
| |
| |
| // invoke-static {vC, vD, vE, vF, vG}, meth@BBBB |
| // Format 35c: A|G|71 BBBB F|E|D|C |
| // |
| // Note: invoke-static is used to invoke a static method (which is always considered a direct |
| // method). |
| %def op_invoke_static(range=""): |
| EXPORT_PC |
| // TODO: Unnecessary if A=0, and unnecessary if nterp-to-nterp. |
| FETCH s7, count=2 // s7 := F|E|D|C or CCCC (range) |
| FETCH_FROM_THREAD_CACHE a0, /*slow path*/1f, t0, t1 |
| // a0 := ArtMethod* |
| tail NterpInvokeStatic${range} // arg a0, s7 |
| 1: |
| % resolve_method_into_a0() |
| tail NterpInvokeStatic${range} // arg a0, s7 |
| |
| |
| // invoke-interface {vC, vD, vE, vF, vG}, meth@BBBB |
| // Format 35c: A|G|72 BBBB F|E|D|C |
| // |
| // Note: invoke-interface is used to invoke an interface method, that is, on an object whose |
| // concrete class isn't known, using a method_id that refers to an interface. |
| %def op_invoke_interface(range=""): |
| EXPORT_PC |
| FETCH s7, count=2 // s7 := F|E|D|C or CCCC (range) |
| // T0 is eventually used to carry the "hidden argument" in the managed ABI. |
| // This handler is tight on space, so we cache this arg in A0 and move it to T0 later. |
| // Here, A0 is one of |
| // (1) ArtMethod* |
| // (2) ArtMethod* with LSB #1 set (default method) |
| // (3) method index << 16 with LSB #0 set (j.l.Object method) |
| FETCH_FROM_THREAD_CACHE a0, /*slow path*/5f, t0, t1 |
| 1: |
| % fetch_receiver(reg="a1", vreg="s7", range=range) |
| // a1 := fp[C] (this) |
| // Note: null case handled by SEGV handler. |
| lwu t0, MIRROR_OBJECT_CLASS_OFFSET(a1) |
| // t0 := klass object (32-bit addr) |
| UNPOISON_HEAP_REF t0 |
| slliw t1, a0, 30 // test LSB #0 and #1 |
| bltz t1, 3f // LSB #1 is set; handle default method |
| bgtz t1, 4f // LSB #0 is set; handle object method |
| // no signal bits; it is a clean ArtMethod* |
| lhu t1, ART_METHOD_IMT_INDEX_OFFSET(a0) |
| // t1 := idx into interface method table (16-bit value) |
| 2: |
| ld t0, MIRROR_CLASS_IMT_PTR_OFFSET_64(t0) |
| // t0 := base address of imt |
| sh3add t0, t1, t0 // t0 := entry's address in imt |
| ld a2, (t0) // a2 := ArtMethod* |
| tail NterpInvokeInterface${range} // a0 (hidden arg), a1 (this), a2 (ArtMethod*), s7 (vregs) |
| 3: |
| andi a0, a0, ~2 // a0 := default ArtMethod*, LSB #1 cleared |
| lhu t1, ART_METHOD_METHOD_INDEX_OFFSET(a0) |
| // t1 := method_index_ (16-bit value) |
| // Default methods have a contract with art::IMTable. |
| andi t1, t1, ART_METHOD_IMT_MASK |
| // t1 := idx into interface method table |
| j 2b |
| 4: |
| // Interface methods on j.l.Object have a contract with NterpGetMethod. |
| srliw t1, a0, 16 // t3 := method index |
| sh3add t0, t1, t0 // t0 := entry's byte offset, before vtable offset adjustment |
| ld a0, MIRROR_CLASS_VTABLE_OFFSET_64(t0) |
| tail NterpInvokeDirect${range} // args a0, a1, s7 |
| 5: |
| % resolve_method_into_a0() |
| j 1b |
| |
| |
| // |
| // invoke-kind/range {vCCCC .. vNNNN}, meth@BBBB |
| // Format 3rc: AA|op BBBB CCCC |
| // where NNNN = CCCC + AA - 1, that is A determines the count 0..255, and C determines the first |
| // register. |
| // |
| |
| // invoke-virtual/range {vCCCC .. vNNNN}, meth@BBBB |
| // Format 3rc: AA|74 BBBB CCCC |
| // |
| // Note: invoke-virtual/range is used to invoke a normal virtual method (a method that is not |
| // private, static, or final, and is also not a constructor). |
| %def op_invoke_virtual_range(): |
| % op_invoke_virtual(range="Range") |
| |
| |
| // invoke-super/range {vCCCC .. vNNNN}, meth@BBBB |
| // Format 3rc: AA|75 BBBB CCCC |
| // |
| // Note: When the method_id references a method of a non-interface class, invoke-super/range is used |
| // to invoke the closest superclass's virtual method (as opposed to the one with the same method_id |
| // in the calling class). |
| // Note: In Dex files version 037 or later, if the method_id refers to an interface method, |
| // invoke-super/range is used to invoke the most specific, non-overridden version of that method |
| // defined on that interface. In Dex files prior to version 037, having an interface method_id is |
| // illegal and undefined. |
| %def op_invoke_super_range(): |
| % op_invoke_super(range="Range") |
| |
| |
| // invoke-direct/range {vCCCC .. vNNNN}, meth@BBBB |
| // Format 3rc: AA|76 BBBB CCCC |
| // |
| // Note: invoke-direct/range is used to invoke a non-static direct method (that is, an instance |
| // method that is by its nature non-overridable, namely either a private instance method or a |
| // constructor). |
| %def op_invoke_direct_range(): |
| % op_invoke_direct(range="Range") |
| |
| |
| // invoke-static/range {vCCCC .. vNNNN}, meth@BBBB |
| // Format 3rc: AA|77 BBBB CCCC |
| // |
| // Note: invoke-static/range is used to invoke a static method (which is always considered a direct |
| // method). |
| %def op_invoke_static_range(): |
| % op_invoke_static(range="Range") |
| |
| |
| // invoke-interface/range {vCCCC .. vNNNN}, meth@BBBB |
| // Format 3rc: AA|78 BBBB CCCC |
| // |
| // Note: invoke-interface/range is used to invoke an interface method, that is, on an object whose |
| // concrete class isn't known, using a method_id that refers to an interface. |
| %def op_invoke_interface_range(): |
| % op_invoke_interface(range="Range") |
| |
| |
| // invoke-polymorphic {vC, vD, vE, vF, vG}, meth@BBBB, proto@HHHH |
| // Format 45cc: A|G|fa BBBB F|E|D|C HHHH |
| // |
| // Note: Invoke the indicated signature polymorphic method. The result (if any) may be stored with |
| // an appropriate move-result* variant as the immediately subsequent instruction. |
| // |
| // The method reference must be to a signature polymorphic method, such as |
| // java.lang.invoke.MethodHandle.invoke or java.lang.invoke.MethodHandle.invokeExact. |
| // |
| // The receiver must be an object supporting the signature polymorphic method being invoked. |
| // |
| // The prototype reference describes the argument types provided and the expected return type. |
| // |
| // The invoke-polymorphic bytecode may raise exceptions when it executes. The exceptions are |
| // described in the API documentation for the signature polymorphic method being invoked. |
| // |
| // Present in Dex files from version 038 onwards. |
| %def op_invoke_polymorphic(range=""): |
| EXPORT_PC |
| FETCH s7, count=2 // s7 := F|E|D|C or CCCC (range) |
| // No need to fetch the target method; the runtime handles it. |
| % fetch_receiver(reg="s8", vreg="s7", range=range) |
| beqz s8, 1f // throw if null |
| |
| ld a0, (sp) // a0 := caller ArtMethod* |
| mv a1, xPC |
| call NterpGetShortyFromInvokePolymorphic // args a0, a1 |
| mv a1, s8 |
| tail NterpInvokePolymorphic${range} // args a0 (shorty), a1 (this), s7 (vregs) |
| 1: |
| tail common_errNullObject |
| |
| |
| // invoke-polymorphic/range {vCCCC .. vNNNN}, meth@BBBB, proto@HHHH |
| // Format 4rcc: AA|fb BBBB CCCC HHHH |
| // where NNNN = CCCC + AA - 1, that is A determines the count 0..255, and C determines the first |
| // register. |
| // |
| // Note: Invoke the indicated method handle. See the invoke-polymorphic description above for |
| // details. |
| // |
| // Present in Dex files from version 038 onwards. |
| %def op_invoke_polymorphic_range(): |
| % op_invoke_polymorphic(range="Range") |
| |
| |
| // invoke-custom {vC, vD, vE, vF, vG}, call_site@BBBB |
| // Format 35c: A|G|fc BBBB F|E|D|C |
| // |
| // Note: Resolves and invokes the indicated call site. The result from the invocation (if any) may |
| // be stored with an appropriate move-result* variant as the immediately subsequent instruction. |
| // |
| // This instruction executes in two phases: call site resolution and call site invocation. |
| // |
| // Call site resolution checks whether the indicated call site has an associated |
| // java.lang.invoke.CallSite instance. If not, the bootstrap linker method for the indicated call |
| // site is invoked using arguments present in the DEX file (see call_site_item). The bootstrap |
| // linker method returns a java.lang.invoke.CallSite instance that will then be associated with the |
| // indicated call site if no association exists. Another thread may have already made the |
| // association first, and if so execution of the instruction continues with the first associated |
| // java.lang.invoke.CallSite instance. |
| // |
| // Call site invocation is made on the java.lang.invoke.MethodHandle target of the resolved |
| // java.lang.invoke.CallSite instance. The target is invoked as if executing invoke-polymorphic |
| // (described above) using the method handle and arguments to the invoke-custom instruction as the |
| // arguments to an exact method handle invocation. |
| // |
| // Exceptions raised by the bootstrap linker method are wrapped in a java.lang.BootstrapMethodError. |
| // A BootstrapMethodError is also raised if: |
| // - the bootstrap linker method fails to return a java.lang.invoke.CallSite instance. |
| // - the returned java.lang.invoke.CallSite has a null method handle target. |
| // - the method handle target is not of the requested type. |
| // |
| // Present in Dex files from version 038 onwards. |
| %def op_invoke_custom(range=""): |
| EXPORT_PC |
| ld a0, (sp) // a0 := caller ArtMethod* |
| mv a1, xPC |
| call NterpGetShortyFromInvokeCustom // args a0, a1 |
| mv s7, a0 // s7 := shorty |
| FETCH a0, 1 // a0 := BBBB |
| FETCH s8, 2 // s8 := F|E|D|C or CCCC (range) |
| tail NterpInvokeCustom${range} // args a0 (BBBB), s7 (shorty), s8 (vregs) |
| |
| |
| // invoke-custom/range {vCCCC .. vNNNN}, call_site@BBBB |
| // Format 3rc: AA|fd BBBB CCCC |
| // where NNNN = CCCC + AA - 1, that is A determines the count 0..255, and C determines the first |
| // register. |
| // |
| // Note: Resolve and invoke a call site. See the invoke-custom description above for details. |
| // |
| // Present in Dex files from version 038 onwards. |
| %def op_invoke_custom_range(): |
| % op_invoke_custom(range="Range") |
| |
| |
| // handler helpers |
| |
| %def resolve_method_into_a0(): |
| mv a0, xSELF |
| ld a1, (sp) // We can't always rely on a0 = ArtMethod*. |
| mv a2, xPC |
| call nterp_get_method |
| |
| |
| %def fetch_receiver(reg="", vreg="", range=""): |
| % if range == 'Range': |
| GET_VREG_OBJECT $reg, $vreg // reg := refs[CCCC] |
| % else: |
| andi $reg, $vreg, 0xF // reg := C |
| GET_VREG_OBJECT $reg, $reg // reg := refs[C] |
| |
| |
| // |
| // These asm blocks are positioned in main.S for visibility to stack walking. |
| // |
| |
| |
| // NterpInvokeVirtual |
| // a0: ArtMethod* |
| // a1: this |
| // s7: vreg ids F|E|D|C |
| %def nterp_invoke_virtual(): |
| % nterp_invoke_direct(uniq="invoke_virtual") |
| |
| |
| // NterpInvokeSuper |
| // a0: ArtMethod* |
| // a1: this |
| // s7: vreg ids F|E|D|C |
| %def nterp_invoke_super(): |
| % nterp_invoke_direct(uniq="invoke_super") |
| |
| |
| // NterpInvokeDirect |
| // a0: ArtMethod* |
| // a1: this |
| // s7: (regular) vreg ids F|E|D|C, (range) vreg id CCCC |
| %def nterp_invoke_direct(uniq="invoke_direct", range=""): |
| ld s8, ART_METHOD_QUICK_CODE_OFFSET_64(a0) |
| // s8 := quick code |
| % try_nterp(quick="s8", z0="t0", skip=f".L{uniq}_simple") |
| call NterpToNterpInstance${range} // args a0, a1 |
| j .L${uniq}_next_op |
| |
| .L${uniq}_simple: |
| % if range == 'Range': |
| % try_simple_args_range(vC="s7", z0="t0", z1="t1", z2="t2", z3="t3", z4="t4", skip=f".L{uniq}_01", uniq=uniq) |
| % else: |
| % try_simple_args(v_fedc="s7", z0="t0", z1="t1", skip=f".L{uniq}_01", uniq=uniq) |
| %#: |
| jalr s8 // (regular) args a0 - a5, (range) args a0 - a7 and stack |
| j .L${uniq}_next_op |
| |
| .L${uniq}_01: |
| mv s9, zero // initialize shorty reg |
| % try_01_args(vreg="s7", shorty="s9", z0="t0", z1="t1", z2="t2", y0="s10", y1="s11", y2="s0", skip=f".L{uniq}_slow", call=f".L{uniq}_01_call", uniq=uniq, range=range) |
| // if s9 := shorty, then maybe (a2, fa0) := fp[D] or fp[CCCC + 1] |
| .L${uniq}_01_call: |
| jalr s8 // args a0, a1, and maybe a2, fa0 |
| beqz s9, .L${uniq}_next_op // no shorty, no scalar return |
| % maybe_float_returned(shorty="s9", z0="t0", z1="t1", uniq=f"{uniq}_0") |
| // a0 := fa0 if float return |
| j .L${uniq}_next_op |
| |
| .L${uniq}_slow: |
| % get_shorty_save_a0_a1(shorty="s9", y0="s10", y1="s11") |
| % if range == 'Range': |
| % slow_setup_args_range(shorty="s9", vC="s7", z0="t0", z1="t1", z2="t2", z3="t3", z4="t4", z5="t5", z6="t6", z7="s10", uniq=uniq) |
| % else: |
| % slow_setup_args(shorty="s9", vregs="s7", z0="t0", z1="t1", z2="t2", z3="t3", z4="t4", z5="t5", z6="t6", uniq=uniq) |
| %#: |
| jalr s8 // args in a0-a5, fa0-fa4 |
| % maybe_float_returned(shorty="s9", z0="t0", z1="t1", uniq=f"{uniq}_1") |
| // a0 := fa0 if float return |
| .L${uniq}_next_op: |
| FETCH_ADVANCE_INST 3 |
| GET_INST_OPCODE t0 |
| GOTO_OPCODE t0 |
| |
| |
| // NterpInvokeStringInit |
| // a0: ArtMethod* |
| // s7: (regular) vreg ids F|E|D|C, (range) vreg id CCCC |
| %def nterp_invoke_string_init(uniq="invoke_string_init", range=""): |
| ld s8, ART_METHOD_QUICK_CODE_OFFSET_64(a0) |
| // s8 := quick code |
| % try_nterp(quick="s8", z0="t0", skip=f".L{uniq}_slow") |
| call NterpToNterpStringInit${range} // arg a0 |
| j .L${uniq}_next_op |
| |
| .L${uniq}_slow: |
| % get_shorty_save_a0_a1(shorty="s9", y0="s10", y1="s11") |
| % if range == 'Range': |
| % slow_setup_args_string_init_range(shorty="s9", vC="s7", z0="t0", z1="t1", z2="t2", z3="t3", uniq=uniq) |
| % else: |
| % slow_setup_args_string_init(shorty="s9", v_fedc="s7", z0="t0", z1="t1", z2="t2", uniq=uniq) |
| %#: |
| jalr s8 // args (regular) a0 - a5, (range) a0 - a5 |
| |
| .L${uniq}_next_op: |
| % fetch_receiver(reg="t0", vreg="s7", range=range) |
| // t0 := fp[C] (this) |
| % subst_vreg_references(old="t0", new="a0", z0="t1", z1="t2", z2="t3", uniq=uniq) |
| FETCH_ADVANCE_INST 3 |
| GET_INST_OPCODE t0 |
| GOTO_OPCODE t0 |
| |
| |
| // NterpInvokeStatic |
| // a0: ArtMethod* |
| // s7: (regular) vreg ids F|E|D|C, (range) vreg id CCCC |
| %def nterp_invoke_static(uniq="invoke_static", range=""): |
| ld s8, ART_METHOD_QUICK_CODE_OFFSET_64(a0) |
| // s8 := quick code |
| % try_nterp(quick="s8", z0="t0", skip=f".L{uniq}_simple") |
| call NterpToNterpStatic${range} // arg a0 |
| j .L${uniq}_next_op |
| |
| .L${uniq}_simple: |
| % if range == 'Range': |
| % try_simple_args_range(vC="s7", z0="t0", z1="t1", z2="t2", z3="t3", z4="t4", arg_start="0", skip=f".L{uniq}_01", uniq=uniq) |
| % else: |
| % try_simple_args(v_fedc="s7", z0="t0", z1="t1", arg_start="0", skip=f".L{uniq}_01", uniq=uniq) |
| %#: |
| jalr s8 // args (regular) a0 - a5, (range) a0 - a7 and maybe stack |
| j .L${uniq}_next_op |
| |
| .L${uniq}_01: |
| mv s9, zero // initialize shorty reg |
| % try_01_args_static(vreg="s7", shorty="s9", z0="t0", z1="t1", z2="t2", y0="s10", y1="s11", skip=f".L{uniq}_slow", call=f".L{uniq}_01_call", uniq=uniq, range=range) |
| // if s9 := shorty, then maybe (a2, fa0) := fp[C] or fp[CCCC] |
| .L${uniq}_01_call: |
| jalr s8 // args a0, and maybe a1, fa0 |
| beqz s9, .L${uniq}_next_op // no shorty, no scalar return |
| % maybe_float_returned(shorty="s9", z0="t0", z1="t1", uniq=f"{uniq}_0") |
| // a0 := fa0 if float return |
| j .L${uniq}_next_op |
| |
| .L${uniq}_slow: |
| % get_shorty_save_a0(shorty="s9", y0="s10") |
| % if range == 'Range': |
| % slow_setup_args_range(shorty="s9", vC="s7", z0="t0", z1="t1", z2="t2", z3="t3", z4="t4", z5="t5", z6="t6", z7="s10", arg_start="0", uniq=uniq) |
| % else: |
| % slow_setup_args(shorty="s9", vregs="s7", z0="t0", z1="t1", z2="t2", z3="t3", z4="t4", z5="t5", z6="t6", arg_start="0", uniq=uniq) |
| %#: |
| jalr s8 // args (regular) a0 - a5 and fa0 - fa4, (range) a0 - a7 and fa0 - fa7 and maybe stack |
| % maybe_float_returned(shorty="s9", z0="t0", z1="t1", uniq=f"{uniq}_1") |
| // a0 := fa0 if float return |
| .L${uniq}_next_op: |
| FETCH_ADVANCE_INST 3 |
| GET_INST_OPCODE t0 |
| GOTO_OPCODE t0 |
| |
| |
| // NterpInvokeInterface |
| // a0: the target interface method |
| // - ignored in nterp-to-nterp transfer |
| // - preserved through shorty calls |
| // - side-loaded in T0 as a "hidden argument" in managed ABI transfer |
| // a1: this |
| // a2: ArtMethod* |
| // s7: vreg ids F|E|D|C |
| %def nterp_invoke_interface(uniq="invoke_interface", range=""): |
| // We immediately adjust the incoming arguments to suit the rest of the invoke. |
| mv t0, a0 // t0 := hidden arg, preserve until quick call |
| mv a0, a2 // a0 := ArtMethod* |
| |
| ld s8, ART_METHOD_QUICK_CODE_OFFSET_64(a0) |
| // s8 := quick code |
| % try_nterp(quick="s8", z0="t1", skip=f".L{uniq}_simple") |
| call NterpToNterpInstance${range} // args a0, a1 |
| j .L${uniq}_next_op |
| |
| .L${uniq}_simple: |
| % if range == 'Range': |
| % try_simple_args_range(vC="s7", z0="t1", z1="t2", z2="t3", z3="t4", z4="t5", skip=f".L{uniq}_01", uniq=uniq) |
| % else: |
| % try_simple_args(v_fedc="s7", z0="t1", z1="t2", skip=f".L{uniq}_01", uniq=uniq) |
| %#: |
| jalr s8 // args (regular) a0 - a5 and t0, (range) a0 - a7 and t0 and maybe stack |
| j .L${uniq}_next_op |
| |
| .L${uniq}_01: |
| mv s9, zero // initialize shorty reg |
| % try_01_args(vreg="s7", shorty="s9", z0="t1", z1="t2", z2="t3", y0="s10", y1="s11", y2="s0", interface=True, skip=f".L{uniq}_slow", call=f".L{uniq}_01_call", uniq=uniq, range=range) |
| // if s9 := shorty, then maybe (a2, fa0) := fp[D] or fp[CCCC + 1] |
| // (xINST clobbered, if taking this fast path) |
| .L${uniq}_01_call: |
| jalr s8 // args a0, a1, and t0, and maybe a2, fa0 |
| beqz s9, .L${uniq}_next_op // no shorty, no scalar return |
| % maybe_float_returned(shorty="s9", z0="t0", z1="t1", uniq=f"{uniq}_0") |
| // a0 := fa0 if float return |
| j .L${uniq}_next_op |
| |
| .L${uniq}_slow: |
| % get_shorty_for_interface_save_a0_a1_t0(shorty="s9", y0="s10", y1="s11", y2="s0") |
| % if range == 'Range': |
| % slow_setup_args_range(shorty="s9", vC="s7", z0="s10", z1="t1", z2="t2", z3="t3", z4="t4", z5="t5", z6="t6", z7="s11", uniq=uniq) |
| % else: |
| % slow_setup_args(shorty="s9", vregs="s7", z0="s10", z1="t1", z2="t2", z3="t3", z4="t4", z5="t5", z6="t6", uniq=uniq) |
| %#: |
| jalr s8 // args (regular) a0 - a5, fa0 - fa4, t0, (range) a0 - a7, fa0 - fa7, t0 |
| % maybe_float_returned(shorty="s9", z0="t0", z1="t1", uniq=f"{uniq}_1") |
| // a0 := fa0 if float return |
| .L${uniq}_next_op: |
| FETCH_ADVANCE_INST 3 |
| GET_INST_OPCODE t0 |
| GOTO_OPCODE t0 |
| |
| |
| // NterpInvokePolymorphic |
| // a0: shorty |
| // a1: receiver this |
| // s7: (regular) vreg ids F|E|D|C, (range) vreg id CCCC |
| %def nterp_invoke_polymorphic(uniq="invoke_polymorphic", range=""): |
| % if range == "Range": |
| % slow_setup_args_range(shorty="a0", vC="s7", z0="t0", z1="t1", z2="t2", z3="t3", z4="t4", z5="t5", z6="t6", z7="s8", uniq=uniq) |
| % else: |
| % slow_setup_args(shorty="a0", vregs="s7", z0="t0", z1="t1", z2="t2", z3="t3", z4="t4", z5="t5", z6="t6", uniq=uniq) |
| %#: |
| // Managed ABI argument regs get spilled to stack and consumed by artInvokePolymorphic. |
| call art_quick_invoke_polymorphic // args a1 - a7, fa0 - fa7, and maybe stack |
| // Note: If float return, artInvokePolymorphic will place the value in A0, as Nterp expects. |
| FETCH_ADVANCE_INST 4 |
| GET_INST_OPCODE t0 |
| GOTO_OPCODE t0 |
| |
| |
| // NterpInvokeCustom |
| // a0: BBBB |
| // s7: shorty |
| // s8: (regular) vreg ids F|E|D|C, (range) vreg id CCCC |
| %def nterp_invoke_custom(uniq="invoke_custom", range=""): |
| % if range == "Range": |
| % slow_setup_args_range(shorty="s7", vC="s8", z0="t0", z1="t1", z2="t2", z3="t3", z4="t4", z5="t5", z6="t6", z7="s9", arg_start="0", uniq=uniq) |
| % else: |
| % slow_setup_args(shorty="s7", vregs="s8", z0="t0", z1="t1", z2="t2", z3="t3", z4="t4", z5="t5", z6="t6", arg_start="0", uniq=uniq) |
| %#: |
| // Managed ABI argument regs get spilled to stack and consumed by artInvokeCustom. |
| call art_quick_invoke_custom // args a0 - a7, fa0 - fa7, and maybe stack |
| // Note: If float return, artInvokeCustom will place the value in A0, as Nterp expects. |
| FETCH_ADVANCE_INST 3 |
| GET_INST_OPCODE t0 |
| GOTO_OPCODE t0 |
| |
| |
| // NterpInvokeVirtualRange |
| // a0: ArtMethod* |
| // a1: this |
| // s7: vreg id CCCC |
| %def nterp_invoke_virtual_range(): |
| % nterp_invoke_direct(uniq="invoke_virtual_range", range="Range") |
| |
| |
| // NterpInvokeSuperRange |
| // a0: ArtMethod* |
| // a1: this |
| // s7: vreg id CCCC |
| %def nterp_invoke_super_range(): |
| % nterp_invoke_direct(uniq="invoke_super_range", range="Range") |
| |
| |
| // NterpInvokeDirectRange |
| // Hardcoded: |
| // a0: ArtMethod* |
| // a1: this |
| // s7: vreg id CCCC |
| %def nterp_invoke_direct_range(): |
| % nterp_invoke_direct(uniq="invoke_direct_range", range="Range") |
| |
| |
| // NterpInvokeStringInitRange |
| // a0: ArtMethod* |
| // s7: vreg id CCCC |
| %def nterp_invoke_string_init_range(): |
| % nterp_invoke_string_init(uniq="invoke_string_init_range", range="Range") |
| |
| |
| // NterpInvokeStaticRange |
| // a0: ArtMethod* |
| // s7: vreg id CCCC |
| %def nterp_invoke_static_range(): |
| % nterp_invoke_static(uniq="invoke_static_range", range="Range") |
| |
| |
| // NterpInvokeInterfaceRange |
| // a0: the target interface method |
| // - ignored in nterp-to-nterp transfer |
| // - preserved through shorty calls |
| // - side-loaded in T0 as a "hidden argument" in managed ABI transfer |
| // a1: this |
| // a2: ArtMethod* |
| // s7: vreg id CCCC |
| %def nterp_invoke_interface_range(): |
| % nterp_invoke_interface(uniq="invoke_interface_range", range="Range") |
| |
| |
| // NterpInvokePolymorphicRange |
| %def nterp_invoke_polymorphic_range(): |
| % nterp_invoke_polymorphic(uniq="invoke_polymorphic_range", range="Range") |
| |
| |
| // NterpInvokeCustomRange |
| %def nterp_invoke_custom_range(): |
| % nterp_invoke_custom(uniq="invoke_custom_range", range="Range") |
| |
| |
| // fast path and slow path helpers |
| |
| |
| // Input |
| // - quick: quick code ptr |
| // Temporaries: z0 |
| %def try_nterp(quick="", z0="", skip=""): |
| lla $z0, ExecuteNterpImpl |
| bne $z0, $quick, $skip |
| |
| |
| // Hardcoded |
| // - a0: ArtMethod* |
| // - xINST |
| // Input |
| // - v_fedc: vreg ids F|E|D|C |
| // Temporaries: z0, z1 |
| %def try_simple_args(v_fedc="", z0="", z1="", arg_start="1", skip="", uniq=""): |
| lwu $z0, ART_METHOD_ACCESS_FLAGS_OFFSET(a0) |
| // The meaning of nterp-invoke-fast-path-flag for RISC-V diverges from other ISAs. |
| BRANCH_IF_BIT_CLEAR $z0, $z0, ART_METHOD_NTERP_INVOKE_FAST_PATH_FLAG_BIT, $skip |
| |
| srliw $z0, xINST, 12 // z0 := A |
| % if arg_start == "0": |
| beqz $z0, .L${uniq}_simple_done // A = 0: no further args. |
| %#: |
| li $z1, 2 |
| blt $z0, $z1, .L${uniq}_simple_1 // A = 1 |
| beq $z0, $z1, .L${uniq}_simple_2 // A = 2 |
| li $z1, 4 |
| blt $z0, $z1, .L${uniq}_simple_3 // A = 3 |
| beq $z0, $z1, .L${uniq}_simple_4 // A = 4 |
| // A = 5 |
| srliw $z1, xINST, 8 // z1 := A|G |
| andi $z1, $z1, 0xF // z1 := G |
| GET_VREG_OBJECT a5, $z1 |
| .L${uniq}_simple_4: |
| srliw $z1, $v_fedc, 12 // z1 := F |
| GET_VREG_OBJECT a4, $z1 |
| .L${uniq}_simple_3: |
| srliw $z1, $v_fedc, 8 // z1 := F|E |
| andi $z1, $z1, 0xF // z1 := E |
| GET_VREG_OBJECT a3, $z1 |
| .L${uniq}_simple_2: |
| srliw $z1, $v_fedc, 4 // z1 := F|E|D |
| andi $z1, $z1, 0xF // z1 := D |
| GET_VREG_OBJECT a2, $z1 |
| .L${uniq}_simple_1: |
| % if arg_start == "0": |
| andi $z1, $v_fedc, 0xF // z1 := C |
| GET_VREG_OBJECT a1, $z1 |
| // instance: a1 already set to "this" |
| .L${uniq}_simple_done: |
| |
| |
| // Range variant. |
| %def try_simple_args_range(vC="", z0="", z1="", z2="", z3="", z4="", skip="", arg_start="1", uniq=""): |
| lwu $z0, ART_METHOD_ACCESS_FLAGS_OFFSET(a0) |
| // The meaning of nterp-invoke-fast-path-flag for RISC-V diverges from other ISAs. |
| BRANCH_IF_BIT_CLEAR $z0, $z0, ART_METHOD_NTERP_INVOKE_FAST_PATH_FLAG_BIT, $skip |
| |
| srliw $z0, xINST, 8 // z0 := AA |
| % if arg_start == "0": # static: |
| beqz $z0, .L${uniq}_simple_done // AA = 0: no further args. |
| sh2add $z1, $vC, xFP // z1 := &FP[CCCC] |
| li $z2, 2 |
| blt $z0, $z2, .L${uniq}_simple_1 // AA = 1 |
| % else: # instance: |
| li $z2, 2 |
| blt $z0, $z2, .L${uniq}_simple_done // AA = 1, and a1 already loaded. |
| sh2add $z1, $vC, xFP // z1 := &FP[CCCC] |
| %#: |
| // Here: z0, z1, z2 same values for static vs instance. |
| beq $z0, $z2, .L${uniq}_simple_2 // AA = 2 |
| li $z2, 4 |
| blt $z0, $z2, .L${uniq}_simple_3 // AA = 3 |
| beq $z0, $z2, .L${uniq}_simple_4 // AA = 4 |
| li $z2, 6 |
| blt $z0, $z2, .L${uniq}_simple_5 // AA = 5 |
| beq $z0, $z2, .L${uniq}_simple_6 // AA = 6 |
| li $z2, 7 |
| beq $z0, $z2, .L${uniq}_simple_7 // AA = 7 |
| |
| // AA >= 8: store in stack. Load/store from FP[CCCC + 7] upwards. |
| slli $z2, $z0, 63 // z2 := negative if z0 bit #0 is set (odd) |
| sh2add $z0, $z0, $z1 // z0 := loop guard at top of stack |
| addi $z3, $z1, 7*4 // z3 := &FP[CCCC + 7] |
| addi $z4, sp, __SIZEOF_POINTER__ + 7*4 |
| // z4 := &OUT[CCCC + 7] |
| bltz $z2, .L${uniq}_simple_loop_wide |
| // if AA odd, branch to wide-copy |
| lwu $z2, ($z3) |
| sw $z2, ($z4) |
| addi $z3, $z3, 4 |
| addi $z4, $z4, 4 |
| |
| .L${uniq}_simple_loop_wide: |
| // TODO: Consider ensuring 64-bit stores are aligned. |
| beq $z3, $z0, .L${uniq}_simple_7 |
| ld $z2, ($z3) |
| sd $z2, ($z4) |
| addi $z3, $z3, 8 |
| addi $z4, $z4, 8 |
| j .L${uniq}_simple_loop_wide |
| |
| // Bottom 7 slots of OUT array never written; first args are passed with a1-a7. |
| .L${uniq}_simple_7: |
| lwu a7, 6*4($z1) |
| .L${uniq}_simple_6: |
| lwu a6, 5*4($z1) |
| .L${uniq}_simple_5: |
| lwu a5, 4*4($z1) |
| .L${uniq}_simple_4: |
| lwu a4, 3*4($z1) |
| .L${uniq}_simple_3: |
| lwu a3, 2*4($z1) |
| .L${uniq}_simple_2: |
| lwu a2, 1*4($z1) |
| .L${uniq}_simple_1: |
| % if arg_start == "0": # static: |
| lwu a1, 0*4($z1) |
| %#: |
| .L${uniq}_simple_done: |
| |
| |
| // Check if a 0/1 arg invoke form is possible, set up a2 and fa0 if needed. |
| // If a return value expected, move possible float return to a0. |
| // Hardcoded: xINST, xPC, xFP, a0, a1, t0, fa0 |
| // NOTE xINST clobbered if interface=True and we're taking the fast path. |
| // zN are temporaries, yN are callee-save |
| %def try_01_args(vreg="", shorty="", z0="", z1="", z2="", y0="", y1="", y2="", interface=False, skip="", call="", uniq="", range=""): |
| % if range == 'Range': |
| srliw $y0, xINST, 8 // y0 := AA |
| % else: |
| srliw $y0, xINST, 12 // y0 := A |
| %#: |
| addi $y0, $y0, -2 // y0 := A - 2 or (range) AA - 2 |
| bgtz $y0, $skip // 2+ args: slow path |
| beqz $y0, .L${uniq}_01_shorty // this and 1 arg: determine arg type with shorty |
| // 0 args |
| % try_01_args_peek_next(z0=z0) # z0 is zero if invoke has scalar return |
| bnez $z0, $call // Non-scalar return, 0 args: make the call. |
| // Scalar return, 0 args: determine return type with shorty |
| |
| .L${uniq}_01_shorty: |
| // Get shorty, stash in callee-save to be available on return. |
| // When getting shorty, stash this fast path's A0 and A1, then restore. |
| % if interface: |
| // xINST is a regular callee save. Safe: orig xINST value unused before FETCH_ADVANCE_INST. |
| % get_shorty_for_interface_save_a0_a1_t0(shorty=shorty, y0=y1, y1=y2, y2="xINST") |
| % else: |
| % get_shorty_save_a0_a1(shorty=shorty, y0=y1, y1=y2) |
| %#: |
| // shorty assigned |
| bltz $y0, $call // Scalar return, 0 args: make the call. |
| // ins = 2: this and 1 arg. Load arg type. |
| lb $z0, 1($shorty) // z0 := first arg |
| li $z1, 'L' // ref type |
| % if range == 'Range': |
| sh2add $z2, $vreg, xFP // z2 := &fp[CCCC] |
| lwu a2, 4($z2) // a2 := fp[CCCC + 1], zext |
| % else: |
| srliw $z2, $vreg, 4 // z2 := F|E|D |
| andi $z2, $z2, 0xF // z2 := D |
| sh2add $z2, $z2, xFP // z2 := &fp[D] |
| lwu a2, ($z2) // a2 := fp[D], zext |
| %#: |
| beq $z0, $z1, $call // ref type: LWU into a2 |
| // non-'L' type |
| fmv.w.x fa0, a2 // overload of managed ABI, for one arg |
| sext.w a2, a2 // scalar type: LW into a2 |
| // immediately followed by call |
| |
| |
| // Static variant. |
| %def try_01_args_static(vreg="", shorty="", z0="", z1="", z2="", y0="", y1="", skip="", call="", uniq="", range=""): |
| % if range == 'Range': |
| srliw $y0, xINST, 8 // y0 := AA |
| % else: |
| srliw $y0, xINST, 12 // y0 := A |
| %#: |
| addi $y0, $y0, -1 // y0 := A - 1 or (range) AA - 1 |
| bgtz $y0, $skip // 2+ args: slow path |
| beqz $y0, .L${uniq}_01_shorty // 1 arg: determine arg type with shorty |
| // 0 args |
| % try_01_args_peek_next(z0=z0) # z0 is zero if invoke has scalar return |
| bnez $z0, $call // Non-scalar return, 0 args: make the call. |
| // Scalar return, 0 args: determine return type with shorty. |
| |
| .L${uniq}_01_shorty: |
| // Get shorty, stash in callee-save to be available on return. |
| // When getting shorty, stash this fast path's A0 then restore. |
| % get_shorty_save_a0(shorty=shorty, y0=y1) |
| // shorty assigned |
| bltz $y0, $call // Scalar return, 0 args: make the call. |
| // ins = 1: load arg type |
| lb $z0, 1($shorty) // z0 := first arg |
| li $z1, 'L' // ref type |
| % if range == 'Range': |
| sh2add $z2, $vreg, xFP // z2 := &fp[CCCC] |
| % else: |
| andi $z2, $vreg, 0xF // z2 := C |
| sh2add $z2, $z2, xFP // z2 := &fp[C] |
| %#: |
| lwu a1, ($z2) // a1 := fp[C] or (range) fp[CCCC], zext |
| beq $z0, $z1, $call // ref type: LWU into a1 |
| // non-'L' type |
| fmv.w.x fa0, a1 // overload of managed ABI, for one arg |
| sext.w a1, a1 // scalar type: LW into a1 |
| // immediately followed by call |
| |
| |
| %def try_01_args_peek_next(z0=""): |
| FETCH $z0, count=3, width=8, byte=0 |
| // z0 := next op |
| andi $z0, $z0, ~1 // clear bit #0 |
| addi $z0, $z0, -0x0A // z0 := zero if op is 0x0A or 0x0B |
| |
| |
| // The invoked method might return in FA0, via managed ABI. |
| // The next opcode, MOVE-RESULT{-WIDE}, expects the value in A0. |
| %def maybe_float_returned(shorty="", z0="", z1="", uniq=""): |
| lb $z0, ($shorty) // z0 := first byte of shorty; type of return |
| li $z1, 'F' // |
| beq $z0, $z1, .L${uniq}_float_return_move |
| li $z1, 'D' // |
| bne $z0, $z1, .L${uniq}_float_return_done |
| .L${uniq}_float_return_move: |
| // If fa0 carries a 32-bit float, the hi bits of fa0 will contain all 1's (NaN boxing). |
| // The use of fmv.x.d will transfer those hi bits into a0, and that's okay, because the next |
| // opcode, move-result, will only read the lo 32-bits of a0 - the box bits are correctly ignored. |
| // If fa0 carries a 64-bit float, then fmv.x.d works as expected. |
| fmv.x.d a0, fa0 |
| .L${uniq}_float_return_done: |
| |
| |
| // Hardcoded: |
| // - a0: ArtMethod* |
| // - a1: this |
| // Callee-saves: y0, y1 |
| %def get_shorty_save_a0_a1(shorty="", y0="", y1=""): |
| mv $y1, a1 |
| mv $y0, a0 |
| call NterpGetShorty // arg a0 |
| mv $shorty, a0 |
| mv a0, $y0 |
| mv a1, $y1 |
| |
| |
| // Static variant. |
| // Hardcoded: |
| // - a0: ArtMethod* |
| // Callee-saves: y0 |
| %def get_shorty_save_a0(shorty="", y0=""): |
| mv $y0, a0 |
| call NterpGetShorty // arg a0 |
| mv $shorty, a0 |
| mv a0, $y0 |
| |
| |
| // Interface variant. |
| // Hardcoded: |
| // - a0: ArtMethod* |
| // - a1: this |
| // - t0: "hidden argument" |
| // Callee-saves: y0, y1, y2 |
| %def get_shorty_for_interface_save_a0_a1_t0(shorty="", y0="", y1="", y2=""): |
| mv $y2, t0 |
| mv $y1, a1 |
| mv $y0, a0 |
| ld a0, (sp) // a0 := caller ArtMethod* |
| FETCH reg=a1, count=1 // a1 := BBBB method idx |
| call NterpGetShortyFromMethodId |
| mv $shorty, a0 |
| mv a0, $y0 |
| mv a1, $y1 |
| mv t0, $y2 |
| |
| |
| // Hardcoded: xFP, xREFS |
| // Starting with vreg index 0, replace any old reference with new reference. |
| %def subst_vreg_references(old="", new="", z0="", z1="", z2="", uniq=""): |
| mv $z0, xFP // z0 := &fp[0] |
| mv $z1, xREFS // z1 := &refs[0] |
| .L${uniq}_subst_try: |
| lwu $z2, ($z1) |
| bne $z2, $old, .L${uniq}_subst_next |
| sw $new, ($z0) |
| sw $new, ($z1) |
| .L${uniq}_subst_next: |
| addi $z0, $z0, 4 |
| addi $z1, $z1, 4 |
| bne $z1, xFP, .L${uniq}_subst_try |
| |
| |
| // Hardcoded |
| // - a0: ArtMethod* |
| // - a1: this |
| // Input |
| // - vregs: F|E|D|C from dex |
| %def slow_setup_args(shorty="", vregs="", z0="", z1="", z2="", z3="", z4="", z5="", z6="", arg_start="1", uniq=""): |
| srliw $z0, xINST, 12 // z0 := A |
| li $z1, 5 |
| blt $z0, $z1, .L${uniq}_slow_gpr |
| // A = 5: need vreg G |
| srliw $z1, xINST, 8 // z1 := A|G |
| andi $z1, $z1, 0xF // z1 := G |
| slliw $z1, $z1, 16 // z1 := G0000 |
| add $vregs, $z1, $vregs // vregs := G|F|E|D|C |
| |
| .L${uniq}_slow_gpr: |
| addi $z0, $shorty, 1 // z0 := first arg of shorty |
| srliw $z1, $vregs, 4*$arg_start |
| // z1 := (instance) F|E|D or G|F|E|D, (static) F|E|D|C or G|F|E|D|C |
| li $z2, 'D' // double |
| li $z3, 'F' // float |
| li $z4, 'J' // long |
| li $z5, 'L' // ref |
| // linear scan through shorty: extract non-float vregs |
| % if arg_start == "0": # static can place vC into a1; instance already loaded "this" into a1. |
| % load_vreg_in_gpr(gpr="a1", shorty=z0, vregs=z1, D=z2, F=z3, J=z4, L=z5, z0=z6, done=f".L{uniq}_slow_fpr", uniq=f"{uniq}_0") |
| % load_vreg_in_gpr(gpr="a2", shorty=z0, vregs=z1, D=z2, F=z3, J=z4, L=z5, z0=z6, done=f".L{uniq}_slow_fpr", uniq=f"{uniq}_1") |
| % load_vreg_in_gpr(gpr="a3", shorty=z0, vregs=z1, D=z2, F=z3, J=z4, L=z5, z0=z6, done=f".L{uniq}_slow_fpr", uniq=f"{uniq}_2") |
| % load_vreg_in_gpr(gpr="a4", shorty=z0, vregs=z1, D=z2, F=z3, J=z4, L=z5, z0=z6, done=f".L{uniq}_slow_fpr", uniq=f"{uniq}_3") |
| % load_vreg_in_gpr(gpr="a5", shorty=z0, vregs=z1, D=z2, F=z3, J=z4, L=z5, z0=z6, done=f".L{uniq}_slow_fpr", uniq=f"{uniq}_4") |
| |
| .L${uniq}_slow_fpr: |
| addi $z0, $shorty, 1 // z0 := first arg of shorty |
| srliw $z1, $vregs, 4*$arg_start |
| // z1 := (instance) F|E|D or G|F|E|D, (static) F|E|D|C or G|F|E|D|C |
| // linear scan through shorty: extract float/double vregs |
| % load_vreg_in_fpr(fpr="fa0", shorty=z0, vregs=z1, D=z2, F=z3, J=z4, z0=z5, done=f".L{uniq}_slow_done", uniq=f"{uniq}_0") |
| % load_vreg_in_fpr(fpr="fa1", shorty=z0, vregs=z1, D=z2, F=z3, J=z4, z0=z5, done=f".L{uniq}_slow_done", uniq=f"{uniq}_1") |
| % load_vreg_in_fpr(fpr="fa2", shorty=z0, vregs=z1, D=z2, F=z3, J=z4, z0=z5, done=f".L{uniq}_slow_done", uniq=f"{uniq}_2") |
| % load_vreg_in_fpr(fpr="fa3", shorty=z0, vregs=z1, D=z2, F=z3, J=z4, z0=z5, done=f".L{uniq}_slow_done", uniq=f"{uniq}_3") |
| % if arg_start == "0": # static can place G into fa4; instance has only 4 args. |
| % load_vreg_in_fpr(fpr="fa4", shorty=z0, vregs=z1, D=z2, F=z3, J=z4, z0=z5, done=f".L{uniq}_slow_done", uniq=f"{uniq}_4") |
| %#: |
| .L${uniq}_slow_done: |
| |
| |
| // String-init variant: up to 4 args, no long/double/float args. |
| // Ref args ('L') loaded with LW *must* apply ZEXT.W to avoid subtle address bugs. |
| %def slow_setup_args_string_init(shorty="", v_fedc="", z0="", z1="", z2="", uniq=""): |
| srliw $z0, xINST, 12 // z0 := A; possible values 1-5 |
| li $z1, 2 |
| blt $z0, $z1, .L${uniq}_slow_1 // A = 1 |
| li $z2, 'L' // z2 := ref type |
| beq $z0, $z1, .L${uniq}_slow_2 // A = 2 |
| li $z1, 4 |
| blt $z0, $z1, .L${uniq}_slow_3 // A = 3 |
| beq $z0, $z1, .L${uniq}_slow_4 // A = 4 |
| |
| // A = 5 |
| srliw $z0, xINST, 8 // z0 := A|G |
| andi $z0, $z0, 0xF // z0 := G |
| % get_vreg("a4", z0) |
| lb $z1, 4($shorty) // shorty RDEFG |
| bne $z1, $z2, .L${uniq}_slow_4 |
| zext.w a4, a4 |
| .L${uniq}_slow_4: |
| srliw $z1, $v_fedc, 12 // z1 := F |
| % get_vreg("a3", z1) |
| lb $z1, 3($shorty) // shorty RDEF |
| bne $z1, $z2, .L${uniq}_slow_3 |
| zext.w a3, a3 |
| .L${uniq}_slow_3: |
| srliw $z1, $v_fedc, 8 // z1 := F|E |
| andi $z1, $z1, 0xF // z1 := E |
| % get_vreg("a2", z1) |
| lb $z1, 2($shorty) // shorty RDE |
| bne $z1, $z2, .L${uniq}_slow_2 |
| zext.w a2, a2 |
| .L${uniq}_slow_2: |
| srliw $z1, $v_fedc, 4 // z1 := F|E|D |
| andi $z1, $z1, 0xF // z1 := D |
| % get_vreg("a1", z1) |
| lb $z1, 1($shorty) // shorty RD |
| bne $z1, $z2, .L${uniq}_slow_1 |
| zext.w a1, a1 |
| .L${uniq}_slow_1: |
| // "this" never read in string-init |
| |
| |
| // Range and static-range variant. |
| // Hardcoded |
| // - (caller) xPC, xINST, xFP |
| // - (callee) sp |
| // Input |
| // - vC: CCCC from dex |
| %def slow_setup_args_range(shorty="", vC="", z0="", z1="", z2="", z3="", z4="", z5="", z6="", z7="", arg_start="1", uniq=""): |
| addi $z0, $shorty, 1 // z0 := first arg of shorty |
| addi $z1, $vC, $arg_start // z1 := (instance) CCCC+1, (static) CCCC |
| mv $z2, zero // z2 := is_out_stack_needed false |
| li $z3, 'D' // double |
| li $z4, 'F' // float |
| li $z5, 'J' // long |
| li $z6, 'L' // ref |
| |
| // linear scan through shorty: extract non-float vregs |
| % if arg_start == "0": # static can place vCCCC into a1; instance already loaded "this" into a1. |
| % load_vreg_in_gpr_range(gpr="a1", shorty=z0, idx=z1, D=z3, F=z4, J=z5, L=z6, z0=z7, done=f".L{uniq}_slow_fpr", uniq=f"{uniq}_1") |
| % load_vreg_in_gpr_range(gpr="a2", shorty=z0, idx=z1, D=z3, F=z4, J=z5, L=z6, z0=z7, done=f".L{uniq}_slow_fpr", uniq=f"{uniq}_2") |
| % load_vreg_in_gpr_range(gpr="a3", shorty=z0, idx=z1, D=z3, F=z4, J=z5, L=z6, z0=z7, done=f".L{uniq}_slow_fpr", uniq=f"{uniq}_3") |
| % load_vreg_in_gpr_range(gpr="a4", shorty=z0, idx=z1, D=z3, F=z4, J=z5, L=z6, z0=z7, done=f".L{uniq}_slow_fpr", uniq=f"{uniq}_4") |
| % load_vreg_in_gpr_range(gpr="a5", shorty=z0, idx=z1, D=z3, F=z4, J=z5, L=z6, z0=z7, done=f".L{uniq}_slow_fpr", uniq=f"{uniq}_5") |
| % load_vreg_in_gpr_range(gpr="a6", shorty=z0, idx=z1, D=z3, F=z4, J=z5, L=z6, z0=z7, done=f".L{uniq}_slow_fpr", uniq=f"{uniq}_6") |
| % load_vreg_in_gpr_range(gpr="a7", shorty=z0, idx=z1, D=z3, F=z4, J=z5, L=z6, z0=z7, done=f".L{uniq}_slow_fpr", uniq=f"{uniq}_7") |
| % is_out_stack_needed(needed=z2, shorty=z0, D=z3, F=z4, z0=z1, uniq=uniq) |
| |
| .L${uniq}_slow_fpr: |
| addi $z0, $shorty, 1 // z0 := first arg of shorty |
| addi $z1, $vC, $arg_start // z1 := (instance) CCCC+1, (static) CCCC |
| // linear scan through shorty: extract float/double vregs |
| % load_vreg_in_fpr_range(fpr="fa0", shorty=z0, idx=z1, D=z3, F=z4, J=z5, z0=z6, done=f".L{uniq}_slow_stack", uniq=f"{uniq}_0") |
| % load_vreg_in_fpr_range(fpr="fa1", shorty=z0, idx=z1, D=z3, F=z4, J=z5, z0=z6, done=f".L{uniq}_slow_stack", uniq=f"{uniq}_1") |
| % load_vreg_in_fpr_range(fpr="fa2", shorty=z0, idx=z1, D=z3, F=z4, J=z5, z0=z6, done=f".L{uniq}_slow_stack", uniq=f"{uniq}_2") |
| % load_vreg_in_fpr_range(fpr="fa3", shorty=z0, idx=z1, D=z3, F=z4, J=z5, z0=z6, done=f".L{uniq}_slow_stack", uniq=f"{uniq}_3") |
| % load_vreg_in_fpr_range(fpr="fa4", shorty=z0, idx=z1, D=z3, F=z4, J=z5, z0=z6, done=f".L{uniq}_slow_stack", uniq=f"{uniq}_4") |
| % load_vreg_in_fpr_range(fpr="fa5", shorty=z0, idx=z1, D=z3, F=z4, J=z5, z0=z6, done=f".L{uniq}_slow_stack", uniq=f"{uniq}_5") |
| % load_vreg_in_fpr_range(fpr="fa6", shorty=z0, idx=z1, D=z3, F=z4, J=z5, z0=z6, done=f".L{uniq}_slow_stack", uniq=f"{uniq}_6") |
| % load_vreg_in_fpr_range(fpr="fa7", shorty=z0, idx=z1, D=z3, F=z4, J=z5, z0=z6, done=f".L{uniq}_slow_stack", uniq=f"{uniq}_7") |
| % is_out_stack_needed_float(needed=z2, shorty=z0, D=z3, F=z4, z0=z1, uniq=uniq) |
| |
| .L${uniq}_slow_stack: |
| beqz $z2, .L${uniq}_slow_done // No stack needed, skip it. Otherwise copy-paste it all with LD/SD. |
| addi $z0, sp, 8 // z0 := base addr of out array |
| sh2add $z1, $vC, xFP // z1 := base addr of FP[CCCC] |
| srliw $z2, xINST, 8 // z2 := AA, vreg count |
| sh2add $z2, $z2, $z1 // z2 := loop guard, addr of one slot past top of xFP array |
| % copy_vregs_to_out(out=z0, fp=z1, fp_top=z2, z0=z3, uniq=uniq) |
| .L${uniq}_slow_done: |
| |
| |
| // String-init variant: up to 4 args, no long/float/double args. |
| // Ref args ('L') loaded with LW *must* apply ZEXT.W to avoid subtle address bugs. |
| %def slow_setup_args_string_init_range(shorty="", vC="", z0="", z1="", z2="", z3="", uniq=""): |
| srliw $z0, xINST, 8 // z0 := AA; possible values 1-5 |
| li $z1, 2 |
| blt $z0, $z1, .L${uniq}_slow_1 // A = 1 |
| sh2add $z2, $vC, xFP // z2 := &fp[CCCC] |
| li $z3, 'L' // z3 := ref type |
| beq $z0, $z1, .L${uniq}_slow_2 // A = 2 |
| li $z1, 4 |
| blt $z0, $z1, .L${uniq}_slow_3 // A = 3 |
| beq $z0, $z1, .L${uniq}_slow_4 // A = 4 |
| // A = 5 |
| lw a4, 4*4($z2) |
| lb $z1, 4($shorty) |
| bne $z1, $z3, .L${uniq}_slow_4 |
| zext.w a4, a4 |
| .L${uniq}_slow_4: |
| lw a3, 3*4($z2) |
| lb $z1, 3($shorty) |
| bne $z1, $z3, .L${uniq}_slow_3 |
| zext.w a3, a3 |
| .L${uniq}_slow_3: |
| lw a2, 2*4($z2) |
| lb $z1, 2($shorty) |
| bne $z1, $z3, .L${uniq}_slow_2 |
| zext.w a2, a2 |
| .L${uniq}_slow_2: |
| lw a1, 1*4($z2) |
| lb $z1, 1($shorty) |
| bne $z1, $z3, .L${uniq}_slow_1 |
| zext.w a1, a1 |
| .L${uniq}_slow_1: |
| // "this" never read in string-init |
| |
| |
| // Iterate through 4-bit vreg ids in the "vregs" register, load a non-FP value |
| // into one argument register. |
| %def load_vreg_in_gpr(gpr="", shorty="", vregs="", D="", F="", J="", L="", z0="", done="", uniq=""): |
| .L${uniq}_gpr_find: |
| lb $z0, ($shorty) // z0 := next shorty arg spec |
| addi $shorty, $shorty, 1 // increment char ptr |
| beqz $z0, $done // z0 == \0 |
| beq $z0, $F, .L${uniq}_gpr_skip_4_bytes |
| beq $z0, $D, .L${uniq}_gpr_skip_8_bytes |
| |
| andi $gpr, $vregs, 0xF // gpr := vreg id |
| beq $z0, $J, .L${uniq}_gpr_load_8_bytes |
| % get_vreg(gpr, gpr) # gpr := 32-bit load |
| bne $z0, $L, .L${uniq}_gpr_load_common |
| zext.w $gpr, $gpr |
| .L${uniq}_gpr_load_common: |
| srliw $vregs, $vregs, 4 // shift out the processed arg, one vreg |
| j .L${uniq}_gpr_set // and exit |
| .L${uniq}_gpr_load_8_bytes: |
| GET_VREG_WIDE $gpr, $gpr // gpr := 64-bit load |
| srliw $vregs, $vregs, 8 // shift out the processed arg, a vreg pair |
| j .L${uniq}_gpr_set // and exit |
| |
| .L${uniq}_gpr_skip_8_bytes: |
| srliw $vregs, $vregs, 4 // shift out a skipped arg |
| .L${uniq}_gpr_skip_4_bytes: |
| srliw $vregs, $vregs, 4 // shift out a skipped arg |
| j .L${uniq}_gpr_find |
| .L${uniq}_gpr_set: |
| |
| |
| // Iterate through 4-bit vreg ids in the "vregs" register, load a float or double |
| // value into one floating point argument register. |
| %def load_vreg_in_fpr(fpr="", shorty="", vregs="", D="", F="", J="", z0="", done="", uniq=""): |
| .L${uniq}_fpr_find: |
| lb $z0, ($shorty) // z0 := next shorty arg spec |
| addi $shorty, $shorty, 1 // increment char ptr |
| beqz $z0, $done // z0 == \0 |
| beq $z0, $F, .L${uniq}_fpr_load_4_bytes |
| beq $z0, $D, .L${uniq}_fpr_load_8_bytes |
| |
| srliw $vregs, $vregs, 4 // shift out a skipped arg, one vreg |
| bne $z0, $J, .L${uniq}_fpr_find |
| srliw $vregs, $vregs, 4 // shift out one more skipped arg, for J |
| j .L${uniq}_fpr_find |
| |
| .L${uniq}_fpr_load_4_bytes: |
| andi $z0, $vregs, 0xF |
| % get_vreg_float(fpr, z0) |
| srliw $vregs, $vregs, 4 // shift out the processed arg, one vreg |
| j .L${uniq}_fpr_set |
| .L${uniq}_fpr_load_8_bytes: |
| andi $z0, $vregs, 0xF |
| GET_VREG_DOUBLE $fpr, $z0 |
| srliw $vregs, $vregs, 8 // shift out the processed arg, a vreg pair |
| .L${uniq}_fpr_set: |
| |
| |
| // Range variant |
| %def load_vreg_in_gpr_range(gpr="", shorty="", idx="", D="", F="", J="", L="", z0="", done="", uniq=""): |
| .L${uniq}_gpr_range_find: |
| lb $z0, ($shorty) // z0 := next shorty arg |
| addi $shorty, $shorty, 1 // increment char ptr |
| beqz $z0, $done // z0 == \0 |
| beq $z0, $F, .L${uniq}_gpr_range_skip_1_vreg |
| beq $z0, $D, .L${uniq}_gpr_range_skip_2_vreg |
| |
| beq $z0, $J, .L${uniq}_gpr_range_load_2_vreg |
| % get_vreg(gpr, idx) |
| bne $z0, $L, .L${uniq}_gpr_range_load_common |
| zext.w $gpr, $gpr |
| .L${uniq}_gpr_range_load_common: |
| addi $idx, $idx, 1 |
| j .L${uniq}_gpr_range_done |
| .L${uniq}_gpr_range_load_2_vreg: |
| GET_VREG_WIDE $gpr, $idx |
| addi $idx, $idx, 2 |
| j .L${uniq}_gpr_range_done |
| |
| .L${uniq}_gpr_range_skip_2_vreg: |
| addi $idx, $idx, 1 |
| .L${uniq}_gpr_range_skip_1_vreg: |
| addi $idx, $idx, 1 |
| j .L${uniq}_gpr_range_find |
| .L${uniq}_gpr_range_done: |
| |
| |
| // Range variant. |
| %def load_vreg_in_fpr_range(fpr="", shorty="", idx="", D="", F="", J="", z0="", done="", uniq=""): |
| .L${uniq}_fpr_range_find: |
| lb $z0, ($shorty) // z0 := next shorty arg |
| addi $shorty, $shorty, 1 // increment char ptr |
| beqz $z0, $done // z0 == \0 |
| beq $z0, $F, .L${uniq}_fpr_range_load_4_bytes |
| beq $z0, $D, .L${uniq}_fpr_range_load_8_bytes |
| |
| addi $idx, $idx, 1 // increment idx |
| bne $z0, $J, .L${uniq}_fpr_range_find |
| addi $idx, $idx, 1 // increment once more for J |
| j .L${uniq}_fpr_range_find |
| |
| .L${uniq}_fpr_range_load_4_bytes: |
| mv $z0, $idx |
| % get_vreg_float(fpr, z0) |
| addi $idx, $idx, 1 |
| j .L${uniq}_fpr_range_set |
| .L${uniq}_fpr_range_load_8_bytes: |
| mv $z0, $idx |
| GET_VREG_DOUBLE $fpr, $z0 |
| addi $idx, $idx, 2 |
| .L${uniq}_fpr_range_set: |
| |
| |
| %def is_out_stack_needed(needed="", shorty="", D="", F="", z0="", uniq=""): |
| .L${uniq}_scan_arg: |
| lb $z0, ($shorty) |
| addi $shorty, $shorty, 1 |
| beqz $z0, .L${uniq}_scan_done |
| beq $z0, $F, .L${uniq}_scan_arg |
| beq $z0, $D, .L${uniq}_scan_arg |
| li $needed, 1 |
| .L${uniq}_scan_done: |
| |
| |
| %def is_out_stack_needed_float(needed="", shorty="", D="", F="", z0="", uniq=""): |
| bnez $needed, .L${uniq}_scan_float_done |
| .L${uniq}_scan_float_arg: |
| lb $z0, ($shorty) |
| addi $shorty, $shorty, 1 |
| beqz $z0, .L${uniq}_scan_float_done |
| beq $z0, $F, .L${uniq}_scan_float_found |
| beq $z0, $D, .L${uniq}_scan_float_found |
| j .L${uniq}_scan_float_arg |
| .L${uniq}_scan_float_found: |
| li $needed, 1 |
| .L${uniq}_scan_float_done: |
| |
| |
| %def copy_vregs_to_out(out="", fp="", fp_top="", z0="", uniq=""): |
| sub $z0, $fp_top, $fp // z0 := byte range |
| BRANCH_IF_BIT_CLEAR $z0, $z0, 2, .L${uniq}_copy_wide |
| // branch if odd count of slots |
| lwu $z0, ($fp) |
| sw $z0, ($out) |
| addi $fp, $fp, 4 |
| addi $out, $out, 4 |
| .L${uniq}_copy_wide: |
| beq $fp, $fp_top, .L${uniq}_copy_done |
| ld $z0, ($fp) |
| sd $z0, ($out) |
| addi $fp, $fp, 8 |
| addi $out, $out, 8 |
| j .L${uniq}_copy_wide |
| .L${uniq}_copy_done: |
| |
| |
| // NterpToNterpInstance |
| // a0: ArtMethod* |
| // a1: this |
| %def nterp_to_nterp_instance(): |
| % nterp_to_nterp(how_vC="in_a1", uniq="n2n_instance") |
| |
| |
| // NterpToNterpStringInit |
| // a0: ArtMethod* |
| %def nterp_to_nterp_string_init(): |
| % nterp_to_nterp(how_vC="skip", uniq="n2n_string_init") |
| |
| |
| // NterpToNterpStatic |
| // a0: ArtMethod* |
| %def nterp_to_nterp_static(): |
| % nterp_to_nterp(a1_instance=False, how_vC="load", uniq="n2n_static") |
| |
| |
| // NterpToNterpInstanceRange |
| %def nterp_to_nterp_instance_range(): |
| % nterp_to_nterp(how_vC="in_a1", uniq="n2n_instance_range", range="Range") |
| |
| |
| // NterpToNterpStringInitRange |
| %def nterp_to_nterp_string_init_range(): |
| % nterp_to_nterp(how_vC="skip", uniq="n2n_string_init_range", range="Range") |
| |
| |
| // NterpToNterpStaticRange |
| %def nterp_to_nterp_static_range(): |
| % nterp_to_nterp(a1_instance=False, how_vC="load", uniq="n2n_static_range", range="Range") |
| |
| |
| // helpers |
| |
| |
| %def nterp_to_nterp(a1_instance=True, how_vC="", uniq="", range=""): |
| .cfi_startproc |
| % setup_nterp_frame(cfi_refs="23", refs="s8", fp="s9", pc="s10", regs="s11", spills_sp="t0", z0="t1", z1="t2", z2="t3", z3="t4", uniq=uniq) |
| // s8 := callee xREFS |
| // s9 := callee xFP |
| // s10 := callee xPC |
| // s11 := fp/refs vreg count |
| // t0 := post-spills pre-frame sp (unused here) |
| // sp := post-frame callee sp |
| % if range == 'Range': |
| % n2n_arg_move_range(refs="s8", fp="s9", regs="s11", vC="s7", z0="t0", z1="t1", z2="t2", z3="t3", z4="t4", z5="t5", a1_instance=a1_instance, how_vC=how_vC, uniq=uniq) |
| % else: |
| % n2n_arg_move(refs="s8", fp="s9", pc="s10", regs="s11", v_fedc="s7", z0="t0", z1="t1", z2="t2", z3="t3", a1_instance=a1_instance, how_vC=how_vC, uniq=uniq) |
| %#: |
| mv xREFS, s8 |
| mv xFP, s9 |
| mv xPC, s10 |
| CFI_DEFINE_DEX_PC_WITH_OFFSET(/*tmpReg*/CFI_TMP, /*dexReg*/CFI_DEX, /*dexOffset*/0) |
| |
| START_EXECUTING_INSTRUCTIONS |
| .cfi_endproc |
| |
| |
| // See runtime/nterp_helpers.cc for a diagram of the setup. |
| // Hardcoded |
| // - a0 - ArtMethod* |
| // Input |
| // - \cfi_refs: dwarf register number of \refs, for CFI |
| // - \uniq: string to ensure unique symbolic labels between instantiations |
| // Output |
| // - sp: adjusted downward for callee-saves and nterp frame |
| // - \refs: callee xREFS |
| // - \fp: callee xFP |
| // - \pc: callee xPC |
| // - \regs: register count in \refs |
| // - \ins: in count |
| // - \spills_sp: stack pointer after reg spills |
| %def setup_nterp_frame(cfi_refs="", refs="", fp="", pc="", regs="", ins="zero", spills_sp="", z0="", z1="", z2="", z3="", uniq=""): |
| // Check guard page for stack overflow. |
| li $z0, -STACK_OVERFLOW_RESERVED_BYTES |
| add $z0, $z0, sp |
| ld zero, ($z0) |
| |
| INCREASE_FRAME NTERP_SIZE_SAVE_CALLEE_SAVES |
| // sp := sp + callee-saves |
| SETUP_NTERP_SAVE_CALLEE_SAVES |
| |
| ld $pc, ART_METHOD_DATA_OFFSET_64(a0) |
| FETCH_CODE_ITEM_INFO code_item=$pc, regs=$regs, outs=$z0, ins=$ins |
| // pc := callee dex array |
| // regs := vreg count for fp array and refs array |
| // z0 := vreg count for outs array |
| // ins := vreg count for ins array |
| |
| // Compute required frame size: ((2 * \regs) + \z0) * 4 + 24 |
| // - The register array and reference array each have \regs number of slots. |
| // - The out array has \z0 slots. |
| // - Each register slot is 4 bytes. |
| // - Additional 24 bytes for 3 fields: saved frame pointer, dex pc, and ArtMethod*. |
| sh1add $z1, $regs, $z0 |
| slli $z1, $z1, 2 |
| addi $z1, $z1, 24 // z1 := frame size, without alignment padding |
| |
| // compute new stack pointer |
| sub $z1, sp, $z1 |
| // 16-byte alignment. |
| andi $z1, $z1, ~0xF // z1 := new sp |
| |
| // Set \refs to base of reference array. Align to pointer size for the frame pointer and dex pc |
| // pointer, below the reference array. |
| sh2add $z0, $z0, $z1 // z0 := out array size in bytes |
| addi $z0, $z0, 28 // + 24 bytes for 3 fields, plus 4 for alignment slack. |
| andi $refs, $z0, -__SIZEOF_POINTER__ |
| // refs := refs array base |
| |
| // Set \fp to base of register array, above the reference array. This region is already aligned. |
| sh2add $fp, $regs, $refs |
| // fp := fp array base |
| |
| // Set up the stack pointer. |
| mv $spills_sp, sp // spills_sp := old sp |
| .cfi_def_cfa_register $spills_sp |
| mv sp, $z1 // sp := new sp |
| sd $spills_sp, -8($refs) |
| // The CFA rule is now a dwarf expression, because the nterp frame offset for SP is a dynamic |
| // value, and thus SP cannot help compute CFA. For the duration of the nterp frame, CFI |
| // directives cannot adjust this CFA rule, but may still capture CFI for register spills as |
| // "register + offset" with a dwarf expression. |
| CFI_DEF_CFA_BREG_PLUS_UCONST $cfi_refs, -8, NTERP_SIZE_SAVE_CALLEE_SAVES |
| |
| // Put nulls in reference array. |
| beqz $regs, .L${uniq}_ref_zero_done |
| mv $z0, $refs // z0 := address iterator |
| .L${uniq}_ref_zero: |
| // Write in 8-byte increments, so fp[0] gets zero'ed too, if \regs is odd. |
| sd zero, ($z0) |
| addi $z0, $z0, 8 |
| bltu $z0, $fp, .L${uniq}_ref_zero |
| .L${uniq}_ref_zero_done: |
| // Save the ArtMethod*. |
| sd a0, (sp) |
| |
| |
| // Hardcoded |
| // - (caller) xINST, xFP, xREFS, xPC |
| // - a0: ArtMethod* |
| // - a1: this, for instance invoke |
| %def n2n_arg_move(refs="", fp="", regs="", pc="", v_fedc="", z0="", z1="", z2="", z3="", a1_instance=True, how_vC="", uniq=""): |
| srliw $z0, xINST, 12 // z0 := A (arg count) |
| |
| % if not a1_instance: |
| beqz $z0, .L${uniq}_arg_done |
| %#: |
| // A >= 1, decide and branch |
| li $z1, 2 |
| sub $z2, $regs, $z0 // z2 := regs - A; vC's index in fp |
| sh2add $z3, $z2, $fp // z3 := addr of fp[C] |
| sh2add $z2, $z2, $refs // z2 := addr of refs[C] |
| blt $z0, $z1, .L${uniq}_arg_1 |
| beq $z0, $z1, .L${uniq}_arg_2 |
| li $z1, 4 |
| blt $z0, $z1, .L${uniq}_arg_3 |
| beq $z0, $z1, .L${uniq}_arg_4 |
| |
| // A = 5 |
| srliw $z0, xINST, 8 |
| andi $z0, $z0, 0xF // z0 := G |
| % get_vreg(z1, z0) # z1 := xFP[G] |
| sw $z1, (4*4)($z3) // fp[G] := z1 |
| GET_VREG_OBJECT $z0, $z0 // z0 := xREFS[G] |
| sw $z0, (4*4)($z2) // refs[G] := z0 |
| .L${uniq}_arg_4: |
| srliw $z0, $v_fedc, 12 // z0 := F |
| % get_vreg(z1, z0) # z1 := xFP[F] |
| sw $z1, (3*4)($z3) // fp[F] := z1 |
| GET_VREG_OBJECT $z0, $z0 // z0 := xREFS[F] |
| sw $z0, (3*4)($z2) // refs[F] := z0 |
| .L${uniq}_arg_3: |
| srliw $z0, $v_fedc, 8 // z0 := F|E |
| andi $z0, $z0, 0xF // z0 := E |
| % get_vreg(z1, z0) # z1 := xFP[E] |
| sw $z1, (2*4)($z3) // fp[E] := z1 |
| GET_VREG_OBJECT $z0, $z0 // z0 := xREFS[E] |
| sw $z0, (2*4)($z2) // refs[E] := z0 |
| .L${uniq}_arg_2: |
| srliw $z0, $v_fedc, 4 // z0 := F|E|D |
| andi $z0, $z0, 0xF // z0 := D |
| % get_vreg(z1, z0) # z1 := xFP[D] |
| sw $z1, (1*4)($z3) // fp[D] := z1 |
| GET_VREG_OBJECT $z0, $z0 // z0 := xREFS[D] |
| sw $z0, (1*4)($z2) // refs[D] := z0 |
| .L${uniq}_arg_1: |
| % if how_vC == "in_a1": |
| // a1 = xFP[C] from earlier stage of instance invoke |
| sw a1, (0*4)($z3) // fp[C] := a1 |
| sw a1, (0*4)($z2) // refs[C] := a1 |
| % elif how_vC == "skip": |
| // string init doesn't read "this" |
| % elif how_vC == "load": |
| // static method loads vC just like other vregs |
| andi $z0, $v_fedc, 0xF // z0 := C |
| % get_vreg(z1, z0) # z1 := xFP[C] |
| sw $z1, (0*4)($z3) // fp[C] := z1 |
| GET_VREG_OBJECT $z0, $z0 // z0 := xREFS[C] |
| sw $z0, (0*4)($z2) // refs[C] := z0 |
| %#: |
| .L${uniq}_arg_done: |
| |
| |
| %def n2n_arg_move_range(refs="", fp="", regs="", vC="", z0="", z1="", z2="", z3="", z4="", z5="", a1_instance=True, how_vC="", uniq=""): |
| srliw $z0, xINST, 8 // z0 := AA (arg count) |
| |
| % if not a1_instance: |
| beqz $z0, .L${uniq}_arg_range_done |
| %#: |
| // AA >= 1, iterator setup |
| sub $z4, $regs, $z0 // z4 := regs - AA; starting idx in fp and refs |
| sh2add $z1, $vC, xREFS // z1 := addr of xREFS[CCCC] |
| sh2add $z2, $vC, xFP // z2 := addr of xFP[CCCC] |
| sh2add $z3, $z4, $refs // z3 := addr of refs[z4] |
| sh2add $z4, $z4, $fp // z4 := addr of fp[z4] |
| |
| BRANCH_IF_BIT_CLEAR $z0, $z0, 0, .L${uniq}_arg_range_copy_wide |
| // branch if AA is even |
| // AA is odd, transfer one slot. Apply some optimizations. |
| % if how_vC == "in_a1": |
| sw a1, ($z3) |
| sw a1, ($z4) |
| % elif how_vC == "skip": |
| // string init doesn't read "this" |
| % elif how_vC == "load": |
| lw $z0, ($z1) |
| lw $z5, ($z2) |
| sw $z0, ($z3) |
| sw $z5, ($z4) |
| %#: |
| addi $z1, $z1, 4 |
| addi $z2, $z2, 4 |
| addi $z3, $z3, 4 |
| addi $z4, $z4, 4 |
| .L${uniq}_arg_range_copy_wide: |
| // Even count of vreg slots, apply LD/SD. |
| beq $z3, $fp, .L${uniq}_arg_range_done // terminate loop if refs[regs] == fp[0] |
| ld $z0, ($z1) |
| ld $z5, ($z2) |
| sd $z0, ($z3) |
| sd $z5, ($z4) |
| addi $z1, $z1, 8 |
| addi $z2, $z2, 8 |
| addi $z3, $z3, 8 |
| addi $z4, $z4, 8 |
| j .L${uniq}_arg_range_copy_wide |
| .L${uniq}_arg_range_done: |
| |
| |
| // |
| // Nterp entry point helpers |
| // |
| |
| |
| // Hardcoded: |
| // - a0: ArtMethod* |
| %def setup_ref_args_and_go(fp="", refs="", refs_end="", spills_sp="", z0="", z1="", done=""): |
| // Store managed-ABI register args into fp/refs arrays. |
| % store_ref_to_vreg(gpr="a1", fp=fp, refs=refs, refs_end=refs_end, done=done) |
| % store_ref_to_vreg(gpr="a2", fp=fp, refs=refs, refs_end=refs_end, done=done) |
| % store_ref_to_vreg(gpr="a3", fp=fp, refs=refs, refs_end=refs_end, done=done) |
| % store_ref_to_vreg(gpr="a4", fp=fp, refs=refs, refs_end=refs_end, done=done) |
| % store_ref_to_vreg(gpr="a5", fp=fp, refs=refs, refs_end=refs_end, done=done) |
| % store_ref_to_vreg(gpr="a6", fp=fp, refs=refs, refs_end=refs_end, done=done) |
| % store_ref_to_vreg(gpr="a7", fp=fp, refs=refs, refs_end=refs_end, done=done) |
| // We drained arg registers, so continue from caller's stack. |
| // A ref arg is 4 bytes, so the continuation offset is well known. |
| addi $z0, $spills_sp, (NTERP_SIZE_SAVE_CALLEE_SAVES + 8 + 7*4) |
| // z0 := out array base addr + 7 vreg slots |
| .Lentry_ref_stack: |
| lwu $z1, ($z0) |
| sw $z1, ($fp) |
| sw $z1, ($refs) |
| addi $z0, $z0, 4 |
| addi $fp, $fp, 4 |
| addi $refs, $refs, 4 |
| bne $refs, $refs_end, .Lentry_ref_stack |
| |
| j $done |
| |
| |
| %def store_ref_to_vreg(gpr="", fp="", refs="", refs_end="", done=""): |
| sw $gpr, ($fp) |
| sw $gpr, ($refs) |
| addi $fp, $fp, 4 |
| addi $refs, $refs, 4 |
| beq $refs, $refs_end, $done |
| |
| |
| // \fp and \refs are used as array base addrs, unmodified. |
| %def store_gpr_to_vreg(gpr="", offset="", shorty="", fp="", refs="", z0="", z1="", D="", F="", J="", L="", next=""): |
| .Lentry_arg_${gpr}: |
| lb $z0, ($shorty) // z0 := shorty type |
| addi $shorty, $shorty, 1 // Increment char ptr. |
| beqz $z0, $next // z0 = \0: finished shorty pass |
| beq $z0, $D, .Lentry_arg_skip_double_${gpr} |
| beq $z0, $F, .Lentry_arg_skip_float_${gpr} |
| |
| add $z1, $offset, $fp |
| beq $z0, $J, .Lentry_arg_long_${gpr} |
| sw $gpr, ($z1) |
| bne $z0, $L, .Lentry_arg_finish_${gpr} |
| add $z1, $offset, $refs |
| sw $gpr, ($z1) |
| j .Lentry_arg_finish_${gpr} |
| .Lentry_arg_skip_double_${gpr}: |
| addi $offset, $offset, 4 |
| .Lentry_arg_skip_float_${gpr}: |
| addi $offset, $offset, 4 |
| j .Lentry_arg_${gpr} |
| .Lentry_arg_long_${gpr}: |
| sd $gpr, ($z1) |
| addi $offset, $offset, 4 |
| .Lentry_arg_finish_${gpr}: |
| addi $offset, $offset, 4 |
| |
| |
| // \fp is used as array base addr, unmodified. |
| %def store_fpr_to_vreg(fpr="", offset="", shorty="", fp="", z0="", z1="", D="", F="", J="", next=""): |
| .Lentry_farg_${fpr}: |
| lb $z0, ($shorty) // z0 := shorty type |
| addi $shorty, $shorty, 1 // Increment char ptr. |
| beqz $z0, $next // z0 = \0: finished shorty pass |
| beq $z0, $D, .Lentry_farg_double_${fpr} |
| beq $z0, $F, .Lentry_farg_float_${fpr} |
| addi $offset, $offset, 4 |
| bne $z0, $J, .Lentry_farg_${fpr} |
| addi $offset, $offset, 4 |
| j .Lentry_farg_${fpr} |
| |
| .Lentry_farg_float_${fpr}: |
| add $z1, $offset, $fp |
| fsw $fpr, ($z1) |
| j .Lentry_farg_finish_${fpr} |
| .Lentry_farg_double_${fpr}: |
| add $z1, $offset, $fp |
| fsd $fpr, ($z1) |
| addi $offset, $offset, 4 |
| .Lentry_farg_finish_${fpr}: |
| addi $offset, $offset, 4 |
| |
| |
| // \outs, \fp, \refs are used as iterators, modified. |
| %def store_outs_to_vregs(outs="", shorty="", fp="", refs="", z0="", z1="", D="", F="", J="", L="", next=""): |
| .Lentry_stack: |
| lb $z0, ($shorty) // z0 := next shorty arg spec |
| addi $shorty, $shorty, 1 // Increment char ptr. |
| beqz $z0, $next // z0 == \0 |
| beq $z0, $F, .Lentry_stack_next_4 |
| beq $z0, $D, .Lentry_stack_next_8 |
| beq $z0, $J, .Lentry_stack_long |
| // 32-bit arg |
| lwu $z1, ($outs) |
| sw $z1, ($fp) |
| bne $z0, $L, .Lentry_stack_next_4 |
| // and also a ref |
| sw $z1, ($refs) |
| .Lentry_stack_next_4: |
| addi $outs, $outs, 4 |
| addi $fp, $fp, 4 |
| addi $refs, $refs, 4 |
| j .Lentry_stack |
| .Lentry_stack_long: |
| ld $z1, ($outs) |
| sd $z1, ($fp) |
| .Lentry_stack_next_8: |
| addi $outs, $outs, 8 |
| addi $fp, $fp, 8 |
| addi $refs, $refs, 8 |
| j .Lentry_stack |
| |
| |
| // \outs, \fp are used as iterators, modified. |
| %def store_float_outs_to_vregs(outs="", shorty="", fp="", z0="", D="", F="", J="", next=""): |
| .Lentry_fstack: |
| lb $z0, ($shorty) // z0 := next shorty arg spec |
| addi $shorty, $shorty, 1 // Increment char ptr. |
| beqz $z0, $next // z0 == \0 |
| beq $z0, $F, .Lentry_fstack_float |
| beq $z0, $D, .Lentry_fstack_double |
| beq $z0, $J, .Lentry_fstack_next_8 |
| // 32-bit arg |
| addi $outs, $outs, 4 |
| addi $fp, $fp, 4 |
| j .Lentry_fstack |
| .Lentry_fstack_float: |
| lwu $z0, ($outs) |
| sw $z0, ($fp) |
| addi $outs, $outs, 4 |
| addi $fp, $fp, 4 |
| j .Lentry_fstack |
| .Lentry_fstack_double: |
| ld $z0, ($outs) |
| sd $z0, ($fp) |
| .Lentry_fstack_next_8: |
| addi $outs, $outs, 8 |
| addi $fp, $fp, 8 |
| j .Lentry_fstack |
| |