| /* |
| * Copyright (C) 2023 The Android Open Source Project |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| #include "asm_support_riscv64.S" |
| #include "interpreter/cfi_asm_support.h" |
| |
| #include "arch/quick_alloc_entrypoints.S" |
| #include "arch/quick_field_entrypoints.S" |
| |
| |
| // Wrap ExecuteSwitchImpl in assembly method which specifies DEX PC for unwinding. |
| // Argument 0: a0: The context pointer for ExecuteSwitchImpl. |
| // Argument 1: a1: Pointer to the templated ExecuteSwitchImpl to call. |
| // Argument 2: a2: The value of DEX PC (memory address of the methods bytecode). |
| ENTRY ExecuteSwitchImplAsm |
| INCREASE_FRAME 16 |
| SAVE_GPR s1, 0 |
| SAVE_GPR ra, 8 |
| |
| mv s1, a2 // s1 = DEX PC |
| CFI_DEFINE_DEX_PC_WITH_OFFSET(0 /* a0 */, 9 /* s1, a.k.a. x9 */, 0) |
| jalr a1 // Call the wrapped method. |
| |
| RESTORE_GPR s1, 0 |
| RESTORE_GPR ra, 8 |
| DECREASE_FRAME 16 |
| ret |
| END ExecuteSwitchImplAsm |
| |
| |
| .macro INVOKE_STUB_CREATE_FRAME |
| // Save RA, FP, xSELF (current thread), A4, A5 (they will be needed in the invoke stub return). |
| INCREASE_FRAME 48 |
| // Slot (8*0) is used for `ArtMethod*` (if no args), args or padding, see below. |
| SAVE_GPR xSELF, (8*1) |
| SAVE_GPR a4, (8*2) |
| SAVE_GPR a5, (8*3) |
| SAVE_GPR fp, (8*4) // Store FP just under the return address. |
| SAVE_GPR ra, (8*5) |
| |
| // Make the new FP point to the location where we stored the old FP. |
| // Some stack-walking tools may rely on this simply-linked list of saved FPs. |
| addi fp, sp, (8*4) // save frame pointer |
| .cfi_def_cfa fp, 48 - (8*4) |
| |
| // We already have space for `ArtMethod*` on the stack but we need space for args above |
| // the `ArtMethod*`, so add sufficient space now, pushing the `ArtMethod*` slot down. |
| addi t0, a2, 0xf // Reserve space for arguments and |
| andi t0, t0, ~0xf // round up for 16-byte stack alignment. |
| sub sp, sp, t0 |
| |
| mv xSELF, a3 |
| |
| // Copy arguments on stack (4 bytes per slot): |
| // A1: source address |
| // A2: arguments length |
| // T0: destination address if there are any args. |
| |
| beqz a2, 2f // loop through 4-byte arguments from the last to the first |
| addi t0, sp, 8 // destination address is bottom of the stack + 8 bytes for ArtMethod* (null) |
| 1: |
| addi a2, a2, -4 |
| add t1, a1, a2 // T1 is the source address of the next copied argument |
| lw t2, (t1) // T2 is the 4 bytes at address T1 |
| add t1, t0, a2 // T1 is the destination address of the next copied argument |
| sw t2, (t1) // save T2 at the destination address T1 |
| bnez a2, 1b |
| 2: |
| sd zero, (sp) // Store null into ArtMethod* at bottom of frame. |
| .endm |
| |
| |
| .macro INVOKE_STUB_CALL_AND_RETURN |
| // Call the method. |
| ld t0, ART_METHOD_QUICK_CODE_OFFSET_64(a0) |
| jalr t0 |
| |
| addi sp, fp, -(8*4) // restore SP (see `INVOKE_STUB_CREATE_FRAME`) |
| .cfi_def_cfa sp, 48 |
| |
| // Restore ra, fp, xSELF (current thread) a4 (shorty), a5 (result pointer) from stack. |
| RESTORE_GPR xSELF, (8*1) |
| RESTORE_GPR a4, (8*2) |
| RESTORE_GPR a5, (8*3) |
| RESTORE_GPR fp, (8*4) |
| RESTORE_GPR ra, (8*5) |
| DECREASE_FRAME 48 |
| |
| // Load result type (1-byte symbol) from a5. |
| // Check result type and store the correct register into the jvalue in memory at a4 address. |
| lbu t0, (a5) |
| |
| li t1, 'V' // void (do not store result at all) |
| beq t1, t0, 1f |
| |
| li t1, 'D' // double |
| beq t1, t0, 2f |
| |
| li t1, 'F' // float |
| beq t1, t0, 3f |
| |
| // Otherwise, result is in a0 (either 8 or 4 bytes, but it is fine to store 8 bytes as the |
| // upper bytes in a0 in that case are zero, and jvalue has enough space). |
| sd a0, (a4) |
| 1: |
| ret |
| |
| 2: // double: result in fa0 (8 bytes) |
| fsd fa0, (a4) |
| ret |
| |
| 3: // float: result in fa0 (4 bytes) |
| fsw fa0, (a4) |
| ret |
| .endm |
| |
| |
| ENTRY art_deliver_pending_exception |
| DELIVER_PENDING_EXCEPTION |
| END art_deliver_pending_exception |
| |
| |
| // The size of the handler emitted by `INVOKE_STUB_LOAD_REG` below. |
| #define INVOKE_STUB_LOAD_REG_SIZE 8 |
| |
| // The offset within `INVOKE_STUB_LOAD_REG` for skipping arguments. |
| #define INVOKE_STUB_LOAD_REG_SKIP_OFFSET 6 |
| |
| // Macro for loading an argument into a register. |
| // load - instruction used for loading, |
| // reg - the register to load, |
| // args - pointer to next argument, |
| // size - the size of the register - 4 or 8 bytes, used as an offset for the load, |
| // handler_reg - the register with the address of the handler (points to this handler on entry), |
| // handler_diff - the difference in bytes from the current to the next handler, |
| // cont - the base name of the label for continuing the shorty processing loop, |
| // sfx - suffix added to all labels to make labels unique for different users. |
| .macro INVOKE_STUB_LOAD_REG load, reg, args, size, handler_reg, handler_diff, cont, sfx |
| .Linvoke_stub_\load\reg\sfx: |
| \load \reg, -\size(\args) |
| c.addi \handler_reg, \handler_diff |
| .org .Linvoke_stub_\load\reg\sfx + INVOKE_STUB_LOAD_REG_SKIP_OFFSET // Enforce skip offset. |
| c.j \cont\sfx |
| .org .Linvoke_stub_\load\reg\sfx + INVOKE_STUB_LOAD_REG_SIZE // Enforce handler size. |
| .endm |
| |
| |
| // Fill registers a1 to a7 and fa0 to fa7 with parameters. |
| // Parse the passed shorty to determine which register to load. |
| // a5 - shorty, |
| // t0 - points to arguments on the stack if any (undefined for static method without args), |
| // sfx - suffix added to all labels to make labels unique for different users. |
| .macro INVOKE_STUB_LOAD_ALL_ARGS sfx |
| addi t1, a5, 1 // Load shorty address, plus one to skip the return type. |
| |
| // Load this (if instance method) and record the number of GPRs to fill. |
| .ifc \sfx, _instance |
| lwu a1, (t0) // Load "this" parameter, |
| addi t0, t0, 4 // and increment arg pointer. |
| .equ NUM_GPRS_TO_FILL, 6 |
| .else |
| .equ NUM_GPRS_TO_FILL, 7 |
| .endif |
| .equ NUM_FPRS_TO_FILL, 8 |
| |
| // Load addresses for routines that load argument GPRs and FPRs. |
| lla t4, .Lreg_handlers_start\sfx // First handler for non-FP args. |
| addi t5, t4, (3 * NUM_GPRS_TO_FILL * INVOKE_STUB_LOAD_REG_SIZE) // First handler for FP args. |
| |
| // Loop to fill registers. |
| .Lfill_regs\sfx: |
| lb t2, (t1) // Load next character in signature, |
| addi t1, t1, 1 // and increment. |
| beqz t2, .Lcall_method\sfx // Exit at end of signature. Shorty 0 terminated. |
| |
| li t3, 'L' |
| beq t2, t3, .Lload_reference\sfx // Is this a reference? |
| |
| li t3, 'J' |
| beq t2, t3, .Lload_long\sfx // Is this a long? |
| |
| li t3, 'F' |
| beq t2, t3, .Lload_float\sfx // Is this a float? |
| |
| li t3, 'D' |
| beq t2, t3, .Lload_double\sfx // Is this a double? |
| |
| // Everything else uses a 4-byte value sign-extened to a 64 bit GPR. |
| addi t0, t0, 4 |
| jalr x0, 0(t4) |
| |
| .Lload_reference\sfx: |
| addi t0, t0, 4 |
| jalr x0, (NUM_GPRS_TO_FILL * INVOKE_STUB_LOAD_REG_SIZE)(t4) |
| |
| .Lload_long\sfx: |
| addi t0, t0, 8 |
| jalr x0, (2 * NUM_GPRS_TO_FILL * INVOKE_STUB_LOAD_REG_SIZE)(t4) |
| |
| .Lload_float\sfx: |
| addi t0, t0, 4 |
| jalr x0, 0(t5) |
| |
| .Lload_double\sfx: |
| addi t0, t0, 8 |
| jalr x0, (NUM_FPRS_TO_FILL * INVOKE_STUB_LOAD_REG_SIZE)(t5) |
| |
| .Lreg_handlers_start\sfx: |
| |
| // Handlers for loading other args (not reference/long/float/double) into GPRs. |
| .ifnc \sfx, _instance |
| INVOKE_STUB_LOAD_REG lw, a1, t0, 4, t4, INVOKE_STUB_LOAD_REG_SIZE, .Lfill_regs, \sfx |
| .endif |
| INVOKE_STUB_LOAD_REG lw, a2, t0, 4, t4, INVOKE_STUB_LOAD_REG_SIZE, .Lfill_regs, \sfx |
| INVOKE_STUB_LOAD_REG lw, a3, t0, 4, t4, INVOKE_STUB_LOAD_REG_SIZE, .Lfill_regs, \sfx |
| INVOKE_STUB_LOAD_REG lw, a4, t0, 4, t4, INVOKE_STUB_LOAD_REG_SIZE, .Lfill_regs, \sfx |
| INVOKE_STUB_LOAD_REG lw, a5, t0, 4, t4, INVOKE_STUB_LOAD_REG_SIZE, .Lfill_regs, \sfx |
| INVOKE_STUB_LOAD_REG lw, a6, t0, 4, t4, INVOKE_STUB_LOAD_REG_SIZE, .Lfill_regs, \sfx |
| INVOKE_STUB_LOAD_REG lw, a7, t0, 4, t4, INVOKE_STUB_LOAD_REG_SKIP_OFFSET, .Lfill_regs, \sfx |
| |
| // Handlers for loading reference args into GPRs. |
| .ifnc \sfx, _instance |
| INVOKE_STUB_LOAD_REG lwu, a1, t0, 4, t4, INVOKE_STUB_LOAD_REG_SIZE, .Lfill_regs, \sfx |
| .endif |
| INVOKE_STUB_LOAD_REG lwu, a2, t0, 4, t4, INVOKE_STUB_LOAD_REG_SIZE, .Lfill_regs, \sfx |
| INVOKE_STUB_LOAD_REG lwu, a3, t0, 4, t4, INVOKE_STUB_LOAD_REG_SIZE, .Lfill_regs, \sfx |
| INVOKE_STUB_LOAD_REG lwu, a4, t0, 4, t4, INVOKE_STUB_LOAD_REG_SIZE, .Lfill_regs, \sfx |
| INVOKE_STUB_LOAD_REG lwu, a5, t0, 4, t4, INVOKE_STUB_LOAD_REG_SIZE, .Lfill_regs, \sfx |
| INVOKE_STUB_LOAD_REG lwu, a6, t0, 4, t4, INVOKE_STUB_LOAD_REG_SIZE, .Lfill_regs, \sfx |
| INVOKE_STUB_LOAD_REG lwu, a7, t0, 4, t4, INVOKE_STUB_LOAD_REG_SKIP_OFFSET, .Lfill_regs, \sfx |
| |
| // Handlers for loading long args into GPRs. |
| .ifnc \sfx, _instance |
| INVOKE_STUB_LOAD_REG ld, a1, t0, 8, t4, INVOKE_STUB_LOAD_REG_SIZE, .Lfill_regs, \sfx |
| .endif |
| INVOKE_STUB_LOAD_REG ld, a2, t0, 8, t4, INVOKE_STUB_LOAD_REG_SIZE, .Lfill_regs, \sfx |
| INVOKE_STUB_LOAD_REG ld, a3, t0, 8, t4, INVOKE_STUB_LOAD_REG_SIZE, .Lfill_regs, \sfx |
| INVOKE_STUB_LOAD_REG ld, a4, t0, 8, t4, INVOKE_STUB_LOAD_REG_SIZE, .Lfill_regs, \sfx |
| INVOKE_STUB_LOAD_REG ld, a5, t0, 8, t4, INVOKE_STUB_LOAD_REG_SIZE, .Lfill_regs, \sfx |
| INVOKE_STUB_LOAD_REG ld, a6, t0, 8, t4, INVOKE_STUB_LOAD_REG_SIZE, .Lfill_regs, \sfx |
| INVOKE_STUB_LOAD_REG ld, a7, t0, 8, t4, INVOKE_STUB_LOAD_REG_SKIP_OFFSET, .Lfill_regs, \sfx |
| |
| // Handlers for loading floats into FPRs. |
| INVOKE_STUB_LOAD_REG flw, fa0, t0, 4, t5, INVOKE_STUB_LOAD_REG_SIZE, .Lfill_regs, \sfx |
| INVOKE_STUB_LOAD_REG flw, fa1, t0, 4, t5, INVOKE_STUB_LOAD_REG_SIZE, .Lfill_regs, \sfx |
| INVOKE_STUB_LOAD_REG flw, fa2, t0, 4, t5, INVOKE_STUB_LOAD_REG_SIZE, .Lfill_regs, \sfx |
| INVOKE_STUB_LOAD_REG flw, fa3, t0, 4, t5, INVOKE_STUB_LOAD_REG_SIZE, .Lfill_regs, \sfx |
| INVOKE_STUB_LOAD_REG flw, fa4, t0, 4, t5, INVOKE_STUB_LOAD_REG_SIZE, .Lfill_regs, \sfx |
| INVOKE_STUB_LOAD_REG flw, fa5, t0, 4, t5, INVOKE_STUB_LOAD_REG_SIZE, .Lfill_regs, \sfx |
| INVOKE_STUB_LOAD_REG flw, fa6, t0, 4, t5, INVOKE_STUB_LOAD_REG_SIZE, .Lfill_regs, \sfx |
| INVOKE_STUB_LOAD_REG flw, fa7, t0, 4, t5, INVOKE_STUB_LOAD_REG_SKIP_OFFSET, .Lfill_regs, \sfx |
| |
| // Handlers for loading doubles into FPRs. |
| INVOKE_STUB_LOAD_REG fld, fa0, t0, 8, t5, INVOKE_STUB_LOAD_REG_SIZE, .Lfill_regs, \sfx |
| INVOKE_STUB_LOAD_REG fld, fa1, t0, 8, t5, INVOKE_STUB_LOAD_REG_SIZE, .Lfill_regs, \sfx |
| INVOKE_STUB_LOAD_REG fld, fa2, t0, 8, t5, INVOKE_STUB_LOAD_REG_SIZE, .Lfill_regs, \sfx |
| INVOKE_STUB_LOAD_REG fld, fa3, t0, 8, t5, INVOKE_STUB_LOAD_REG_SIZE, .Lfill_regs, \sfx |
| INVOKE_STUB_LOAD_REG fld, fa4, t0, 8, t5, INVOKE_STUB_LOAD_REG_SIZE, .Lfill_regs, \sfx |
| INVOKE_STUB_LOAD_REG fld, fa5, t0, 8, t5, INVOKE_STUB_LOAD_REG_SIZE, .Lfill_regs, \sfx |
| INVOKE_STUB_LOAD_REG fld, fa6, t0, 8, t5, INVOKE_STUB_LOAD_REG_SIZE, .Lfill_regs, \sfx |
| INVOKE_STUB_LOAD_REG fld, fa7, t0, 8, t5, INVOKE_STUB_LOAD_REG_SKIP_OFFSET, .Lfill_regs, \sfx |
| |
| .Lcall_method\sfx: |
| .endm |
| |
| |
| // void art_quick_invoke_stub(ArtMethod* method, // a0 |
| // uint32_t* args, // a1 |
| // uint32_t argsize, // a2 |
| // Thread* self, // a3 |
| // JValue* result, // a4 |
| // char* shorty) // a5 |
| ENTRY art_quick_invoke_stub |
| INVOKE_STUB_CREATE_FRAME |
| |
| // Load args into registers. |
| INVOKE_STUB_LOAD_ALL_ARGS _instance |
| |
| // Call the method and return. |
| INVOKE_STUB_CALL_AND_RETURN |
| END art_quick_invoke_stub |
| |
| |
| // void art_quick_invoke_static_stub(ArtMethod* method, // a0 |
| // uint32_t* args, // a1 |
| // uint32_t argsize, // a2 |
| // Thread* self, // a3 |
| // JValue* result, // a4 |
| // char* shorty) // a5 |
| ENTRY art_quick_invoke_static_stub |
| INVOKE_STUB_CREATE_FRAME |
| |
| // Load args into registers. |
| INVOKE_STUB_LOAD_ALL_ARGS _static |
| |
| // Call the method and return. |
| INVOKE_STUB_CALL_AND_RETURN |
| END art_quick_invoke_static_stub |
| |
| |
| ENTRY art_quick_generic_jni_trampoline |
| SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_A0 |
| |
| // Save sp, so we can have static CFI info. |
| mv fp, sp |
| .cfi_def_cfa_register fp |
| |
| li t0, GENERIC_JNI_TRAMPOLINE_RESERVED_AREA |
| sub sp, sp, t0 |
| |
| mv a0, xSELF // Thread* |
| mv a1, fp // SP for the managed frame. |
| mv a2, sp // reserved area for arguments and other saved data (up to managed frame) |
| call artQuickGenericJniTrampoline |
| |
| // Check for error (class init check or locking for synchronized native method can throw). |
| beqz a0, .Lexception_in_native |
| |
| mv t2, a0 // save pointer to native method code into temporary |
| |
| // Load argument GPRs from stack (saved there by artQuickGenericJniTrampoline). |
| ld a0, 8*0(sp) // JniEnv* for the native method |
| ld a1, 8*1(sp) |
| ld a2, 8*2(sp) |
| ld a3, 8*3(sp) |
| ld a4, 8*4(sp) |
| ld a5, 8*5(sp) |
| ld a6, 8*6(sp) |
| ld a7, 8*7(sp) |
| |
| // Load argument FPRs from stack (saved there by artQuickGenericJniTrampoline). |
| fld fa0, 8*8(sp) |
| fld fa1, 8*9(sp) |
| fld fa2, 8*10(sp) |
| fld fa3, 8*11(sp) |
| fld fa4, 8*12(sp) |
| fld fa5, 8*13(sp) |
| fld fa6, 8*14(sp) |
| fld fa7, 8*15(sp) |
| |
| ld t0, 8*16(sp) // @CriticalNative arg, used by art_jni_dlsym_lookup_critical_stub |
| |
| ld t1, 8*17(sp) // restore stack |
| mv sp, t1 |
| |
| jalr t2 // call native method |
| |
| // result sign extension is handled in C code, prepare for artQuickGenericJniEndTrampoline call: |
| // uint64_t artQuickGenericJniEndTrampoline(Thread* self, // a0 |
| // jvalue result, // a1 (need to move from a0) |
| // uint64_t result_f) // a2 (need to move from fa0) |
| mv a1, a0 |
| mv a0, xSELF |
| fmv.x.d a2, fa0 |
| call artQuickGenericJniEndTrampoline |
| |
| // Pending exceptions possible. |
| ld t0, THREAD_EXCEPTION_OFFSET(xSELF) |
| bnez t0, .Lexception_in_native |
| |
| // Tear down the alloca. |
| mv sp, fp |
| CFI_REMEMBER_STATE |
| .cfi_def_cfa_register sp |
| |
| LOAD_RUNTIME_INSTANCE a1 |
| lb a1, RUN_EXIT_HOOKS_OFFSET_FROM_RUNTIME_INSTANCE(a1) |
| bnez a1, .Lcall_method_exit_hook |
| |
| .Lcall_method_exit_hook_done: |
| // This does not clobber the result register a0. a1 is not used for result as the managed code |
| // does not have a 128-bit type. Alternatively we could restore a subset of these registers. |
| RESTORE_SAVE_REFS_AND_ARGS_FRAME |
| fmv.d.x fa0, a0 |
| ret |
| CFI_RESTORE_STATE_AND_DEF_CFA sp, FRAME_SIZE_SAVE_REFS_AND_ARGS |
| |
| .Lcall_method_exit_hook: |
| fmv.d.x fa0, a0 |
| li a4, FRAME_SIZE_SAVE_REFS_AND_ARGS |
| call art_quick_method_exit_hook |
| j .Lcall_method_exit_hook_done |
| |
| .Lexception_in_native: |
| // Move to a1 then sp to please assembler. |
| ld a1, THREAD_TOP_QUICK_FRAME_OFFSET(xSELF) |
| addi sp, a1, -1 // Remove the GenericJNI tag. |
| call art_deliver_pending_exception |
| END art_quick_generic_jni_trampoline |
| |
| |
| ENTRY art_quick_to_interpreter_bridge |
| SETUP_SAVE_REFS_AND_ARGS_FRAME |
| |
| // uint64_t artQuickToInterpreterBridge(ArtMethod* method, Thread* self, ArtMethod** sp) |
| // a0 will contain ArtMethod* |
| mv a1, xSELF |
| mv a2, sp |
| call artQuickToInterpreterBridge |
| |
| // TODO: no need to restore arguments in this case. |
| RESTORE_SAVE_REFS_AND_ARGS_FRAME |
| |
| fmv.d.x fa0, a0 // copy the result to FP result register |
| |
| RETURN_OR_DELIVER_PENDING_EXCEPTION_REG t0 |
| END art_quick_to_interpreter_bridge |
| |
| |
| .extern artMethodEntryHook |
| ENTRY art_quick_method_entry_hook |
| SETUP_SAVE_EVERYTHING_FRAME |
| |
| ld a0, FRAME_SIZE_SAVE_EVERYTHING(sp) // Pass ArtMethod*. |
| mv a1, xSELF // Pass Thread::Current(). |
| mv a2, sp // pass SP |
| call artMethodEntryHook // (ArtMethod*, Thread*, SP) |
| |
| RESTORE_SAVE_EVERYTHING_FRAME |
| ret |
| END art_quick_method_entry_hook |
| |
| |
| .extern artMethodExitHook |
| ENTRY art_quick_method_exit_hook |
| SETUP_SAVE_EVERYTHING_FRAME |
| |
| // frame_size is passed in A4 from JITed code and `art_quick_generic_jni_trampoline`. |
| addi a3, sp, SAVE_EVERYTHING_FRAME_OFFSET_FA0 // FP result ptr in kSaveEverything frame |
| addi a2, sp, SAVE_EVERYTHING_FRAME_OFFSET_A0 // integer result ptr in kSaveEverything frame |
| addi a1, sp, FRAME_SIZE_SAVE_EVERYTHING // ArtMethod** |
| mv a0, xSELF // Thread::Current |
| call artMethodExitHook // (Thread*, ArtMethod**, gpr_res*, fpr_res*, |
| // frame_size) |
| |
| // Normal return. |
| RESTORE_SAVE_EVERYTHING_FRAME |
| ret |
| END art_quick_method_exit_hook |
| |
| |
| // On entry a0 is uintptr_t* gprs_ and a1 is uint64_t* fprs_. |
| // Both must reside on the stack, between current sp and target sp. |
| ENTRY art_quick_do_long_jump |
| // Load FPRs |
| fld ft0, 8*0(a1) // f0 |
| fld ft1, 8*1(a1) // f1 |
| fld ft2, 8*2(a1) // f2 |
| fld ft3, 8*3(a1) // f3 |
| fld ft4, 8*4(a1) // f4 |
| fld ft5, 8*5(a1) // f5 |
| fld ft6, 8*6(a1) // f6 |
| fld ft7, 8*7(a1) // f7 |
| fld fs0, 8*8(a1) // f8 |
| fld fs1, 8*9(a1) // f9 |
| fld fa0, 8*10(a1) // f10 |
| fld fa1, 8*11(a1) // f11 |
| fld fa2, 8*12(a1) // f12 |
| fld fa3, 8*13(a1) // f13 |
| fld fa4, 8*14(a1) // f14 |
| fld fa5, 8*15(a1) // f15 |
| fld fa6, 8*16(a1) // f16 |
| fld fa7, 8*17(a1) // f17 |
| fld fs2, 8*18(a1) // f18 |
| fld fs3, 8*19(a1) // f19 |
| fld fs4, 8*20(a1) // f20 |
| fld fs5, 8*21(a1) // f21 |
| fld fs6, 8*22(a1) // f22 |
| fld fs7, 8*23(a1) // f23 |
| fld fs8, 8*24(a1) // f24 |
| fld fs9, 8*25(a1) // f25 |
| fld fs10, 8*26(a1) // f26 |
| fld fs11, 8*27(a1) // f27 |
| fld ft8, 8*28(a1) // f28 |
| fld ft9, 8*29(a1) // f29 |
| fld ft10, 8*30(a1) // f30 |
| fld ft11, 8*31(a1) // f31 |
| |
| // Load GPRs. |
| // Skip slot 8*0(a0) for zero/x0 as it is hard-wired zero. |
| ld ra, 8*1(a0) // x1 |
| // Skip slot 8*2(a0) for sp/x2 as it is set below. |
| // Skip slot 8*3(a0) for platform-specific thread pointer gp/x3. |
| // Skip slot 8*4(a0) for platform-specific global pointer tp/x4. |
| // Skip slot 8*5(a0) for t0/x5 as it is clobbered below. |
| // Skip slot 8*6(a0) for t1/x6 as it is clobbered below. |
| ld t2, 8*7(a0) // x7 |
| ld s0, 8*8(a0) // x8 |
| ld s1, 8*9(a0) // x9 |
| // Delay loading a0 as the base is in a0. |
| ld a1, 8*11(a0) // x11 |
| ld a2, 8*12(a0) // x12 |
| ld a3, 8*13(a0) // x13 |
| ld a4, 8*14(a0) // x14 |
| ld a5, 8*15(a0) // x15 |
| ld a6, 8*16(a0) // x16 |
| ld a7, 8*17(a0) // x17 |
| ld s2, 8*18(a0) // x18 |
| ld s3, 8*19(a0) // x19 |
| ld s4, 8*20(a0) // x20 |
| ld s5, 8*21(a0) // x21 |
| ld s6, 8*22(a0) // x22 |
| ld s7, 8*23(a0) // x23 |
| ld s8, 8*24(a0) // x24 |
| ld s9, 8*25(a0) // x25 |
| ld s10, 8*26(a0) // x26 |
| ld s11, 8*27(a0) // x27 |
| ld t3, 8*28(a0) // x28 |
| ld t4, 8*29(a0) // x29 |
| ld t5, 8*30(a0) // x30 |
| ld t6, 8*31(a0) // x31 |
| |
| // Load sp to t0. |
| ld t0, 8*2(a0) |
| |
| // Load PC to t1, it is in the last stack slot. |
| ld t1, 8*32(a0) |
| |
| // Now load a0. |
| ld a0, 8*10(a0) // x10 |
| |
| // Set sp. Do not access fprs_ and gprs_ from now, they are below sp. |
| mv sp, t0 |
| |
| jr t1 |
| END art_quick_do_long_jump |
| |
| |
| .macro DEOPT_OR_RETURN temp, is_ref = 0 |
| lwu \temp, THREAD_DEOPT_CHECK_REQUIRED_OFFSET(xSELF) |
| bnez \temp, 2f |
| ret |
| 2: |
| SETUP_SAVE_EVERYTHING_FRAME |
| li a2, \is_ref // pass if result is a reference |
| mv a1, a0 // pass the result |
| mv a0, xSELF // pass Thread::Current |
| call artDeoptimizeIfNeeded // (Thread*, uintptr_t, bool) |
| RESTORE_SAVE_EVERYTHING_FRAME |
| ret |
| .endm |
| |
| |
| .macro RETURN_OR_DEOPT_IF_RESULT_IS_NON_NULL_OR_DELIVER |
| beqz a0, 1f |
| DEOPT_OR_RETURN a1, /*is_ref=*/ 1 |
| 1: |
| DELIVER_PENDING_EXCEPTION |
| .endm |
| |
| |
| .macro RETURN_OR_DEOPT_IF_INT_RESULT_IS_ZERO_OR_DELIVER |
| bnez a0, 1f |
| DEOPT_OR_RETURN a1 |
| 1: |
| DELIVER_PENDING_EXCEPTION |
| .endm |
| |
| |
| .macro RETURN_OR_DEOPT_OR_DELIVER_PENDING_EXCEPTION is_ref = 0 |
| lwu a1, THREAD_EXCEPTION_OFFSET(xSELF) // Get exception field. |
| bnez a1, 1f |
| DEOPT_OR_RETURN a1, \is_ref // Check if deopt is required. |
| 1: |
| DELIVER_PENDING_EXCEPTION // Deliver exception on current thread. |
| .endm |
| |
| |
| .macro RETURN_REF_OR_DEOPT_OR_DELIVER_PENDING_EXCEPTION |
| RETURN_OR_DEOPT_OR_DELIVER_PENDING_EXCEPTION /* is_ref= */ 1 |
| .endm |
| |
| |
| .macro DEOPT_OR_RESTORE_SAVE_EVERYTHING_FRAME_AND_RETURN_A0 temp, is_ref |
| ld \temp, THREAD_DEOPT_CHECK_REQUIRED_OFFSET(xSELF) |
| CFI_REMEMBER_STATE |
| bnez \temp, 2f |
| RESTORE_SAVE_EVERYTHING_FRAME /* load_a0= */ 0 |
| ret |
| 2: |
| CFI_RESTORE_STATE_AND_DEF_CFA sp, FRAME_SIZE_SAVE_EVERYTHING |
| sd a0, SAVE_EVERYTHING_FRAME_OFFSET_A0(sp) // update result in the frame |
| li a2, \is_ref // pass if result is a reference |
| mv a1, a0 // pass the result |
| mv a0, xSELF // Thread::Current |
| call artDeoptimizeIfNeeded |
| CFI_REMEMBER_STATE |
| RESTORE_SAVE_EVERYTHING_FRAME |
| ret |
| CFI_RESTORE_STATE_AND_DEF_CFA sp, FRAME_SIZE_SAVE_EVERYTHING |
| .endm |
| |
| |
| // Entry from managed code that tries to lock the object in a fast path and |
| // calls `artLockObjectFromCode()` for the difficult cases, may block for GC. |
| // A0 holds the possibly null object to lock. |
| ENTRY art_quick_lock_object |
| LOCK_OBJECT_FAST_PATH a0, art_quick_lock_object_no_inline, /*can_be_null*/ 1 |
| END art_quick_lock_object |
| |
| |
| // Entry from managed code that calls `artLockObjectFromCode()`, may block for GC. |
| // A0 holds the possibly null object to lock. |
| .extern artLockObjectFromCode |
| ENTRY art_quick_lock_object_no_inline |
| // This is also the slow path for `art_quick_lock_object`. |
| SETUP_SAVE_REFS_ONLY_FRAME // save callee saves in case we block |
| mv a1, xSELF // pass Thread::Current |
| call artLockObjectFromCode // (Object*, Thread*) |
| RESTORE_SAVE_REFS_ONLY_FRAME |
| RETURN_OR_DEOPT_IF_INT_RESULT_IS_ZERO_OR_DELIVER |
| END art_quick_lock_object_no_inline |
| |
| |
| // Entry from managed code that tries to unlock the object in a fast path and calls |
| // `artUnlockObjectFromCode()` for the difficult cases and delivers exception on failure. |
| // A0 holds the possibly null object to unlock. |
| ENTRY art_quick_unlock_object |
| UNLOCK_OBJECT_FAST_PATH a0, art_quick_unlock_object_no_inline, /*can_be_null*/ 1 |
| END art_quick_unlock_object |
| |
| |
| // Entry from managed code that calls `artUnlockObjectFromCode()` |
| // and delivers exception on failure. |
| // A0 holds the possibly null object to unlock. |
| .extern artUnlockObjectFromCode |
| ENTRY art_quick_unlock_object_no_inline |
| // This is also the slow path for `art_quick_unlock_object`. |
| SETUP_SAVE_REFS_ONLY_FRAME // save callee saves in case exception allocation triggers GC |
| mv a1, xSELF // pass Thread::Current |
| call artUnlockObjectFromCode // (Object*, Thread*) |
| RESTORE_SAVE_REFS_ONLY_FRAME |
| RETURN_OR_DEOPT_IF_INT_RESULT_IS_ZERO_OR_DELIVER |
| END art_quick_unlock_object_no_inline |
| |
| |
| // Called by managed code that is attempting to call a method on a proxy class. On entry a0 holds |
| // the proxy method and a1 holds the receiver. The frame size of the invoked proxy method agrees |
| // with kSaveRefsAndArgs frame. |
| .extern artQuickProxyInvokeHandler |
| ENTRY art_quick_proxy_invoke_handler |
| SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_A0 |
| |
| // uint64_t artQuickProxyInvokeHandler(ArtMethod* proxy_method, // a0 |
| // mirror::Object* receiver, // a1 |
| // Thread* self, // a2 |
| // ArtMethod** sp) // a3 |
| mv a2, xSELF // pass Thread::Current |
| mv a3, sp // pass sp |
| call artQuickProxyInvokeHandler // (Method* proxy method, receiver, Thread*, sp) |
| |
| ld a2, THREAD_EXCEPTION_OFFSET(xSELF) |
| bnez a2, .Lexception_in_proxy // success if no exception is pending |
| CFI_REMEMBER_STATE |
| RESTORE_SAVE_REFS_AND_ARGS_FRAME // Restore frame |
| fmv.d.x fa0, a0 // Store result in fa0 in case it was float or double |
| ret // return on success |
| |
| .Lexception_in_proxy: |
| CFI_RESTORE_STATE_AND_DEF_CFA sp, FRAME_SIZE_SAVE_REFS_AND_ARGS |
| RESTORE_SAVE_REFS_AND_ARGS_FRAME |
| DELIVER_PENDING_EXCEPTION |
| END art_quick_proxy_invoke_handler |
| |
| |
| // Compiled code has requested that we deoptimize into the interpreter. The deoptimization |
| // will long jump to the upcall with a special exception of -1. |
| .extern artDeoptimizeFromCompiledCode |
| ENTRY art_quick_deoptimize_from_compiled_code |
| SETUP_SAVE_EVERYTHING_FRAME |
| mv a1, xSELF // Pass Thread::Current(). |
| call artDeoptimizeFromCompiledCode // (DeoptimizationKind, Thread*) |
| unimp |
| END art_quick_deoptimize_from_compiled_code |
| |
| |
| .extern artStringBuilderAppend |
| ENTRY art_quick_string_builder_append |
| SETUP_SAVE_REFS_ONLY_FRAME // Save callee saves in case of GC. |
| addi a1, sp, (FRAME_SIZE_SAVE_REFS_ONLY + __SIZEOF_POINTER__) // Pass args. |
| mv a2, xSELF // Pass Thread::Current(). |
| call artStringBuilderAppend // (uint32_t, const unit32_t*, Thread*) |
| RESTORE_SAVE_REFS_ONLY_FRAME |
| RETURN_OR_DEOPT_IF_RESULT_IS_NON_NULL_OR_DELIVER |
| END art_quick_string_builder_append |
| |
| |
| // Entry from managed code that calls artInstanceOfFromCode and on failure calls |
| // artThrowClassCastExceptionForObject. |
| .extern artInstanceOfFromCode |
| .extern artThrowClassCastExceptionForObject |
| ENTRY art_quick_check_instance_of |
| // Type check using the bit string passes null as the target class. In that case just throw. |
| beqz a1, .Lthrow_class_cast_exception_for_bitstring_check |
| |
| // Store arguments and return address register. |
| // Stack needs to be 16B aligned on calls. |
| INCREASE_FRAME 32 |
| sd a0, 0*8(sp) |
| sd a1, 1*8(sp) |
| SAVE_GPR ra, 3*8 |
| |
| // Call runtime code. |
| call artInstanceOfFromCode |
| |
| // Restore RA. |
| RESTORE_GPR ra, 3*8 |
| |
| // Check for exception. |
| CFI_REMEMBER_STATE |
| beqz a0, .Lthrow_class_cast_exception |
| |
| // Remove spill area and return (no need to restore A0 and A1). |
| DECREASE_FRAME 32 |
| ret |
| |
| .Lthrow_class_cast_exception: |
| CFI_RESTORE_STATE_AND_DEF_CFA sp, 32 |
| // Restore A0 and remove spill area. |
| ld a0, 0*8(sp) |
| ld a1, 1*8(sp) |
| DECREASE_FRAME 32 |
| |
| .Lthrow_class_cast_exception_for_bitstring_check: |
| SETUP_SAVE_ALL_CALLEE_SAVES_FRAME // Save all registers as basis for long jump context. |
| mv a2, xSELF // Pass Thread::Current(). |
| call artThrowClassCastExceptionForObject // (Object*, Class*, Thread*) |
| unimp // We should not return here... |
| END art_quick_check_instance_of |
| |
| |
| .macro N_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING n, c_name, cxx_name |
| .extern \cxx_name |
| ENTRY \c_name |
| SETUP_SAVE_EVERYTHING_FRAME // save all registers as basis for long jump context. |
| mv a\n, xSELF // pass Thread::Current. |
| call \cxx_name // \cxx_name(args..., Thread*). |
| unimp |
| END \c_name |
| .endm |
| |
| |
| .macro NO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING c_name, cxx_name |
| N_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING 0, \c_name, \cxx_name |
| .endm |
| |
| |
| .macro TWO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING c_name, cxx_name |
| N_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING 2, \c_name, \cxx_name |
| .endm |
| |
| |
| .macro N_ARG_RUNTIME_EXCEPTION n, c_name, cxx_name |
| .extern \cxx_name |
| ENTRY \c_name |
| SETUP_SAVE_ALL_CALLEE_SAVES_FRAME // save all registers as basis for long jump context. |
| mv a\n, xSELF // pass Thread::Current. |
| call \cxx_name // \cxx_name(args..., Thread*). |
| unimp |
| END \c_name |
| .endm |
| |
| .macro NO_ARG_RUNTIME_EXCEPTION c_name, cxx_name |
| N_ARG_RUNTIME_EXCEPTION 0, \c_name, \cxx_name |
| .endm |
| |
| |
| .macro ONE_ARG_RUNTIME_EXCEPTION c_name, cxx_name |
| N_ARG_RUNTIME_EXCEPTION 1, \c_name, \cxx_name |
| .endm |
| |
| |
| // Called by managed code to create and deliver a NullPointerException. |
| NO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING \ |
| art_quick_throw_null_pointer_exception, artThrowNullPointerExceptionFromCode |
| |
| |
| // Call installed by a signal handler to create and deliver a NullPointerException. |
| .extern artThrowNullPointerExceptionFromSignal |
| ENTRY art_quick_throw_null_pointer_exception_from_signal |
| // The fault handler pushes the gc map address, i.e. "return address", to stack |
| // and passes the fault address in RA. So we need to set up the CFI info accordingly. |
| .cfi_def_cfa_offset __SIZEOF_POINTER__ |
| .cfi_rel_offset ra, 0 |
| // Save all registers as basis for long jump context. |
| INCREASE_FRAME (FRAME_SIZE_SAVE_EVERYTHING - __SIZEOF_POINTER__) |
| SETUP_SAVE_EVERYTHING_FRAME_DECREMENTED_SP_SKIP_RA |
| mv a0, ra // pass the fault address stored in RA by the fault handler. |
| mv a1, xSELF // pass Thread::Current. |
| call artThrowNullPointerExceptionFromSignal // (arg, Thread*). |
| unimp |
| END art_quick_throw_null_pointer_exception_from_signal |
| |
| |
| // Called by managed code to deliver an ArithmeticException. |
| NO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_div_zero, artThrowDivZeroFromCode |
| |
| |
| // Called by managed code to create and deliver an ArrayIndexOutOfBoundsException. |
| // Arg0 holds index, arg1 holds limit. |
| TWO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_array_bounds, artThrowArrayBoundsFromCode |
| |
| |
| // Called by managed code to create and deliver a StringIndexOutOfBoundsException |
| // as if thrown from a call to String.charAt(). Arg0 holds index, arg1 holds limit. |
| TWO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING \ |
| art_quick_throw_string_bounds, artThrowStringBoundsFromCode |
| |
| // Called by managed code to create and deliver a StackOverflowError. |
| NO_ARG_RUNTIME_EXCEPTION art_quick_throw_stack_overflow, artThrowStackOverflowFromCode |
| |
| // Called by managed code to deliver an exception. |
| ONE_ARG_RUNTIME_EXCEPTION art_quick_deliver_exception, artDeliverExceptionFromCode |
| |
| |
| // Called to attempt to execute an obsolete method. |
| ONE_ARG_RUNTIME_EXCEPTION art_invoke_obsolete_method_stub, artInvokeObsoleteMethod |
| |
| |
| ENTRY art_quick_resolution_trampoline |
| SETUP_SAVE_REFS_AND_ARGS_FRAME |
| |
| // const void* artQuickResolutionTrampoline(ArtMethod* called, // a0 |
| // mirror::Object* receiver, // a1 |
| // Thread* self, // a2 |
| // ArtMethod** sp) // a3 |
| mv a2, xSELF |
| mv a3, sp |
| call artQuickResolutionTrampoline |
| CFI_REMEMBER_STATE |
| beqz a0, 1f |
| mv t0, a0 // Remember returned code pointer in t0. |
| ld a0, (sp) // artQuickResolutionTrampoline puts called method in *sp. |
| |
| RESTORE_SAVE_REFS_AND_ARGS_FRAME |
| jr t0 |
| 1: |
| CFI_RESTORE_STATE_AND_DEF_CFA sp, FRAME_SIZE_SAVE_REFS_AND_ARGS |
| RESTORE_SAVE_REFS_AND_ARGS_FRAME |
| DELIVER_PENDING_EXCEPTION |
| END art_quick_resolution_trampoline |
| |
| |
| ENTRY art_quick_test_suspend |
| SETUP_SAVE_EVERYTHING_FRAME \ |
| RUNTIME_SAVE_EVERYTHING_FOR_SUSPEND_CHECK_METHOD_OFFSET |
| mv a0, xSELF |
| call artTestSuspendFromCode |
| RESTORE_SAVE_EVERYTHING_FRAME |
| ret |
| END art_quick_test_suspend |
| |
| |
| ENTRY art_quick_compile_optimized |
| SETUP_SAVE_EVERYTHING_FRAME |
| ld a0, FRAME_SIZE_SAVE_EVERYTHING(sp) // pass ArtMethod |
| mv a1, xSELF // pass Thread::Current |
| call artCompileOptimized // (ArtMethod*, Thread*) |
| RESTORE_SAVE_EVERYTHING_FRAME |
| // Note: If we implement implicit suspend checks or a marking register for GC, we don't need |
| // to restore such registers here, as artCompileOptimized doesn't allow thread suspension. |
| ret |
| END art_quick_compile_optimized |
| |
| |
| /* extern"C" void art_quick_osr_stub(void* stack, A0 |
| * size_t stack_size_in_bytes, A1 |
| * const uint8_t* native_pc, A2 |
| * JValue* result, A3 |
| * char* shorty, A4 |
| * Thread* self) A5 |
| */ |
| ENTRY art_quick_osr_stub |
| // Save all callee-save registers (we do not fill the spill area in the OSR frame, so we |
| // need to preserve them here) and A3 (it will be needed after the OSR method returns). |
| // Also add space for the `ArtMethod*` slot (null to indicate transition) and padding. |
| SAVE_SIZE=(12 + 12 + /* RA */ 1 + /* A3 */ 1 + /* ArtMethod* */ 1 + /* padding */ 1) * 8 |
| INCREASE_FRAME SAVE_SIZE |
| sd zero, 0*8(sp) // Store null to the `ArtMethod*` slot to indicate transition. |
| // Skip padding. |
| SAVE_GPR a3, 2*8 // Save `result`. |
| SAVE_FPR fs0, 3*8 |
| SAVE_FPR fs1, 4*8 |
| SAVE_FPR fs2, 5*8 |
| SAVE_FPR fs3, 6*8 |
| SAVE_FPR fs4, 7*8 |
| SAVE_FPR fs5, 8*8 |
| SAVE_FPR fs6, 9*8 |
| SAVE_FPR fs7, 10*8 |
| SAVE_FPR fs8, 11*8 |
| SAVE_FPR fs9, 12*8 |
| SAVE_FPR fs10, 13*8 |
| SAVE_FPR fs11, 14*8 |
| SAVE_GPR s2, 15*8 |
| SAVE_GPR s3, 16*8 |
| SAVE_GPR s4, 17*8 |
| SAVE_GPR s5, 18*8 |
| SAVE_GPR s6, 19*8 |
| SAVE_GPR s7, 20*8 |
| SAVE_GPR s8, 21*8 |
| SAVE_GPR s9, 22*8 |
| SAVE_GPR s10, 23*8 |
| SAVE_GPR s11, 24*8 |
| SAVE_GPR xSELF, 25*8 // Save xSELF/S1. |
| SAVE_GPR fp, 26*8 // Save FP/S0. |
| SAVE_GPR ra, 27*8 // Save return address. |
| |
| // Make the new FP point to the location where we stored the old FP. |
| // Some stack-walking tools may rely on this simply-linked list of saved FPs. |
| addi fp, sp, (26*8) // save frame pointer |
| .cfi_def_cfa fp, SAVE_SIZE - (26*8) |
| |
| mv xSELF, a5 |
| |
| CFI_REMEMBER_STATE |
| jal .Losr_entry |
| |
| // The called method removes the stack frame created in `.Losr_entry`. |
| // The SP is already correctly restored, we do not need to restore it from FP. |
| .cfi_def_cfa sp, SAVE_SIZE |
| |
| // Restore saved registers including the result address. |
| RESTORE_GPR a3, 2*8 // Restore `result`. |
| RESTORE_FPR fs0, 3*8 |
| RESTORE_FPR fs1, 4*8 |
| RESTORE_FPR fs2, 5*8 |
| RESTORE_FPR fs3, 6*8 |
| RESTORE_FPR fs4, 7*8 |
| RESTORE_FPR fs5, 8*8 |
| RESTORE_FPR fs6, 9*8 |
| RESTORE_FPR fs7, 10*8 |
| RESTORE_FPR fs8, 11*8 |
| RESTORE_FPR fs9, 12*8 |
| RESTORE_FPR fs10, 13*8 |
| RESTORE_FPR fs11, 14*8 |
| RESTORE_GPR s2, 15*8 |
| RESTORE_GPR s3, 16*8 |
| RESTORE_GPR s4, 17*8 |
| RESTORE_GPR s5, 18*8 |
| RESTORE_GPR s6, 19*8 |
| RESTORE_GPR s7, 20*8 |
| RESTORE_GPR s8, 21*8 |
| RESTORE_GPR s9, 22*8 |
| RESTORE_GPR s10, 23*8 |
| RESTORE_GPR s11, 24*8 |
| RESTORE_GPR xSELF, 25*8 // Restore xSELF/S1. |
| RESTORE_GPR fp, 26*8 // Restore FP/S0. |
| RESTORE_GPR ra, 27*8 // Restore return address. |
| DECREASE_FRAME SAVE_SIZE |
| |
| // The compiler put the result in A0. Doesn't matter if it is 64 or 32 bits. |
| sd a0, (a3) |
| ret |
| |
| .Losr_entry: |
| CFI_RESTORE_STATE_AND_DEF_CFA fp, SAVE_SIZE - (26*8) |
| |
| // Prepare the destination register for backward copy of arguments. |
| addi t1, sp, -8 |
| |
| // Update stack pointer for the callee frame. |
| sub sp, sp, a1 |
| |
| // Subtract the return address slot size from args size. |
| addi a1, a1, -8 |
| |
| // Update return address slot expected by the callee. |
| sd ra, (t1) |
| |
| // Prepare the source register for backward copy of arguments. |
| add t0, a0, a1 |
| |
| // Copy arguments into stack frame. Use simple backward-copy routine for now. |
| // There is always at least the `ArtMethod*` to to copy. |
| // A0 - source address |
| // A1 - args length |
| // SP - destination address. |
| // T0 - loop variable initialized to A0 + A1 for backward copy |
| // T1 - loop variable initialized to SP + A1 for backward copy |
| // T2 - temporary for holding the copied value |
| .Losr_loop: |
| addi t0, t0, -8 |
| ld t2, (t0) |
| addi t1, t1, -8 |
| sd t2, (t1) |
| bne t1, sp, .Losr_loop |
| |
| // Branch to the OSR entry point. |
| jr a2 |
| |
| END art_quick_osr_stub |
| |
| |
| /* |
| * All generated callsites for interface invokes and invocation slow paths will load arguments |
| * as usual - except instead of loading arg0/A0 with the target Method*, arg0/A0 will contain |
| * the method_idx. This wrapper will call the appropriate C++ helper while preserving arguments |
| * and allowing a moving GC to update references in callee-save registers. |
| * NOTE: "this" is the first visible argument of the target, and so can be found in arg1/A1. |
| * |
| * The helper will attempt to locate the target and return a 128-bit result consisting of the |
| * target `ArtMethod*` in A0 and its `entry_point_from_quick_compiled_code_` in A1. |
| * |
| * If unsuccessful, the helper will return null/null. There will be a pending exception |
| * to deliver in the thread. |
| * |
| * On success this wrapper will restore arguments and *jump* to the target, leaving the RA |
| * pointing back to the original caller. |
| */ |
| .macro INVOKE_TRAMPOLINE_BODY cxx_name |
| .extern \cxx_name |
| SETUP_SAVE_REFS_AND_ARGS_FRAME |
| mv a2, xSELF // Pass Thread::Current(). |
| mv a3, sp // Pass pointer to the saved frame context. |
| call \cxx_name // (method_idx, this, Thread*, $sp) |
| mv t0, a1 // Save method's code pointer in T0. |
| RESTORE_SAVE_REFS_AND_ARGS_FRAME |
| beq a0, zero, 1f |
| jr t0 |
| 1: |
| DELIVER_PENDING_EXCEPTION |
| .endm |
| |
| .macro INVOKE_TRAMPOLINE c_name, cxx_name |
| ENTRY \c_name |
| INVOKE_TRAMPOLINE_BODY \cxx_name |
| END \c_name |
| .endm |
| |
| INVOKE_TRAMPOLINE art_quick_invoke_interface_trampoline_with_access_check, \ |
| artInvokeInterfaceTrampolineWithAccessCheck |
| INVOKE_TRAMPOLINE art_quick_invoke_static_trampoline_with_access_check, \ |
| artInvokeStaticTrampolineWithAccessCheck |
| INVOKE_TRAMPOLINE art_quick_invoke_direct_trampoline_with_access_check, \ |
| artInvokeDirectTrampolineWithAccessCheck |
| INVOKE_TRAMPOLINE art_quick_invoke_super_trampoline_with_access_check, \ |
| artInvokeSuperTrampolineWithAccessCheck |
| INVOKE_TRAMPOLINE art_quick_invoke_virtual_trampoline_with_access_check, \ |
| artInvokeVirtualTrampolineWithAccessCheck |
| |
| /* |
| * Polymorphic method invocation. |
| * On entry: |
| * A0 = unused |
| * A1 = receiver |
| */ |
| .extern artInvokePolymorphic |
| ENTRY art_quick_invoke_polymorphic |
| SETUP_SAVE_REFS_AND_ARGS_FRAME |
| mv a0, a1 // Pass the receiver. |
| mv a1, xSELF // Pass Thread::Current(). |
| mv a2, sp // Pass pointer to the saved frame context. |
| call artInvokePolymorphic // artInvokePolymorphic(receiver, Thread*, context) |
| RESTORE_SAVE_REFS_AND_ARGS_FRAME |
| fmv.d.x fa0, a0 // Copy the result also to the FP return register. |
| RETURN_OR_DELIVER_PENDING_EXCEPTION_REG t0 |
| END art_quick_invoke_polymorphic |
| |
| /* |
| * InvokeCustom invocation. |
| * On entry: |
| * A0 = call_site_idx |
| */ |
| .extern artInvokeCustom |
| ENTRY art_quick_invoke_custom |
| SETUP_SAVE_REFS_AND_ARGS_FRAME |
| mv a1, xSELF // Pass Thread::Current(). |
| mv a2, sp // Pass pointer to the saved frame context. |
| call artInvokeCustom // artInvokeCustom(call_site_idx, Thread*, context) |
| RESTORE_SAVE_REFS_AND_ARGS_FRAME |
| fmv.d.x fa0, a0 // Copy the result also to the FP return register. |
| RETURN_OR_DELIVER_PENDING_EXCEPTION_REG t0 |
| END art_quick_invoke_custom |
| |
| /* |
| * Called to resolve an imt conflict. |
| * On entry: |
| * A0 is the conflict ArtMethod. |
| * T0 is a hidden argument that holds the target interface method's dex method index. |
| */ |
| ENTRY art_quick_imt_conflict_trampoline |
| ld t1, ART_METHOD_JNI_OFFSET_64(a0) // Load ImtConflictTable |
| ld a0, 0(t1) // Load first entry in ImtConflictTable. |
| .Limt_table_iterate: |
| // Branch if found. |
| beq a0, t0, .Limt_table_found |
| |
| // If the entry is null, the interface method is not in the ImtConflictTable. |
| beqz a0, .Lconflict_trampoline |
| // Iterate over the entries of the ImtConflictTable. |
| addi t1, t1, (2 * __SIZEOF_POINTER__) |
| ld a0, 0(t1) |
| j .Limt_table_iterate |
| .Limt_table_found: |
| // We successfully hit an entry in the table. Load the target method and jump to it. |
| ld a0, __SIZEOF_POINTER__(t1) |
| ld t1, ART_METHOD_QUICK_CODE_OFFSET_64(a0) |
| jr t1 |
| .Lconflict_trampoline: |
| // Call the runtime stub to populate the ImtConflictTable and jump to the |
| // resolved method. |
| move a0, t0 // Load interface method |
| INVOKE_TRAMPOLINE_BODY artInvokeInterfaceTrampoline |
| END art_quick_imt_conflict_trampoline |
| |
| |
| .macro UPDATE_INLINE_CACHE_ENTRY class, entry, temp, loop_label, done_label, next_label |
| \loop_label: |
| lwu \temp, (\entry) |
| beq \class, \temp, \done_label |
| bnez \temp, \next_label |
| lr.w \temp, (\entry) |
| bnez \temp, \loop_label |
| sc.w \temp, \class, (\entry) |
| beqz \temp, \done_label |
| j \loop_label |
| .endm |
| |
| // A0 contains the class, T5 contains the inline cache. T6 can be used, T5 can be clobbered. |
| ENTRY art_quick_update_inline_cache |
| #if (INLINE_CACHE_SIZE != 5) |
| #error "INLINE_CACHE_SIZE not as expected." |
| #endif |
| #if defined(USE_READ_BARRIER) && defined(USE_BAKER_READ_BARRIER) |
| // Don't update the cache if we are marking. |
| lwu t6, THREAD_IS_GC_MARKING_OFFSET(xSELF) |
| bnez t6, .Ldone |
| #endif |
| addi t5, t5, INLINE_CACHE_CLASSES_OFFSET |
| UPDATE_INLINE_CACHE_ENTRY a0, t5, t6, .Lentry1_loop, .Ldone, .Lentry2 |
| .Lentry2: |
| addi t5, t5, 4 |
| UPDATE_INLINE_CACHE_ENTRY a0, t5, t6, .Lentry2_loop, .Ldone, .Lentry3 |
| .Lentry3: |
| addi t5, t5, 4 |
| UPDATE_INLINE_CACHE_ENTRY a0, t5, t6, .Lentry3_loop, .Ldone, .Lentry4 |
| .Lentry4: |
| addi t5, t5, 4 |
| UPDATE_INLINE_CACHE_ENTRY a0, t5, t6, .Lentry4_loop, .Ldone, .Lentry5 |
| .Lentry5: |
| // Unconditionally store, the inline cache is megamorphic. |
| sw a0, 4(t5) |
| .Ldone: |
| ret |
| END art_quick_update_inline_cache |
| |
| |
| .macro ONE_ARG_SAVE_EVERYTHING_DOWNCALL \ |
| name, entrypoint, runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET |
| .extern \entrypoint |
| ENTRY \name |
| SETUP_SAVE_EVERYTHING_FRAME \runtime_method_offset // Save everything for stack crawl. |
| mv a1, xSELF // Pass Thread::Current(). |
| call \entrypoint // (uint32_t/Class* index/klass, Thread* self) |
| beqz a0, 1f // If result is null, deliver the exception. |
| DEOPT_OR_RESTORE_SAVE_EVERYTHING_FRAME_AND_RETURN_A0 /* temp= */ a1, /* is_ref= */ 1 |
| 1: |
| DELIVER_PENDING_EXCEPTION_FRAME_READY |
| END \name |
| .endm |
| |
| |
| .macro ONE_ARG_SAVE_EVERYTHING_DOWNCALL_FOR_CLINIT name, entrypoint |
| ONE_ARG_SAVE_EVERYTHING_DOWNCALL \ |
| \name, \entrypoint, RUNTIME_SAVE_EVERYTHING_FOR_CLINIT_METHOD_OFFSET |
| .endm |
| |
| |
| ONE_ARG_SAVE_EVERYTHING_DOWNCALL_FOR_CLINIT \ |
| art_quick_initialize_static_storage, artInitializeStaticStorageFromCode |
| ONE_ARG_SAVE_EVERYTHING_DOWNCALL_FOR_CLINIT art_quick_resolve_type, artResolveTypeFromCode |
| ONE_ARG_SAVE_EVERYTHING_DOWNCALL \ |
| art_quick_resolve_type_and_verify_access, artResolveTypeAndVerifyAccessFromCode |
| ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_method_handle, artResolveMethodHandleFromCode |
| ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_method_type, artResolveMethodTypeFromCode |
| ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_string, artResolveStringFromCode |
| |
| |
| // Helper macros for `art_quick_aput_obj`. |
| #ifdef USE_READ_BARRIER |
| #ifdef USE_BAKER_READ_BARRIER |
| .macro BAKER_RB_CHECK_GRAY_BIT_AND_LOAD dest, obj, offset, gray_slow_path_label |
| lw t6, MIRROR_OBJECT_LOCK_WORD_OFFSET(\obj) |
| slliw t6, t6, 31 - LOCK_WORD_READ_BARRIER_STATE_SHIFT // Shift the state bit to sign bit. |
| bltz t6, \gray_slow_path_label |
| // False dependency to avoid needing load/load fence. |
| xor t6, t6, t6 |
| add \obj, \obj, t6 |
| lwu \dest, \offset(\obj) // Heap reference = 32b; zero-extends to `dest`. |
| UNPOISON_HEAP_REF \dest |
| .endm |
| |
| .macro BAKER_RB_LOAD_AND_MARK dest, obj, offset, mark_function |
| lwu \dest, \offset(\obj) // Heap reference = 32b; zero-extends to `dest`. |
| UNPOISON_HEAP_REF \dest |
| // Save RA in a register preserved by `art_quick_read_barrier_mark_regNN` |
| // and unused by the `art_quick_aput_obj`. |
| mv t2, ra |
| call \mark_function |
| mv ra, t2 // Restore RA. |
| .endm |
| #else // USE_BAKER_READ_BARRIER |
| .extern artReadBarrierSlow |
| .macro READ_BARRIER_SLOW dest, obj, offset |
| // Store registers used in art_quick_aput_obj (a0-a4, RA), stack is 16B aligned. |
| INCREASE_FRAME 48 |
| SAVE_GPR a0, 0*8 |
| SAVE_GPR a1, 1*8 |
| SAVE_GPR a2, 2*8 |
| SAVE_GPR a3, 3*8 |
| SAVE_GPR a4, 4*8 |
| SAVE_GPR ra, 5*8 |
| |
| // mv a0, \ref // Pass ref in A0 (no-op for now since parameter ref is unused). |
| .ifnc \obj, a1 |
| mv a1, \obj // Pass `obj`. |
| .endif |
| li a2, \offset // Pass offset. |
| call artReadBarrierSlow // artReadBarrierSlow(ref, obj, offset) |
| // No need to unpoison return value in A0, `artReadBarrierSlow()` would do the unpoisoning. |
| .ifnc \dest, a0 |
| mv \dest, a0 // save return value in dest |
| .endif |
| |
| // Conditionally restore saved registers |
| RESTORE_GPR_NE a0, 0*8, \dest |
| RESTORE_GPR_NE a1, 1*8, \dest |
| RESTORE_GPR_NE a2, 2*8, \dest |
| RESTORE_GPR_NE a3, 3*8, \dest |
| RESTORE_GPR_NE a4, 4*8, \dest |
| RESTORE_GPR ra, 5*8 |
| DECREASE_FRAME 48 |
| .endm |
| #endif // USE_BAKER_READ_BARRIER |
| #endif // USE_READ_BARRIER |
| |
| ENTRY art_quick_aput_obj |
| beqz a2, .Laput_obj_null |
| #if defined(USE_READ_BARRIER) && !defined(USE_BAKER_READ_BARRIER) |
| READ_BARRIER_SLOW a3, a0, MIRROR_OBJECT_CLASS_OFFSET |
| READ_BARRIER_SLOW a3, a3, MIRROR_CLASS_COMPONENT_TYPE_OFFSET |
| READ_BARRIER_SLOW a4, a2, MIRROR_OBJECT_CLASS_OFFSET |
| #else // !defined(USE_READ_BARRIER) || defined(USE_BAKER_READ_BARRIER) |
| #ifdef USE_READ_BARRIER |
| // TODO(riscv64): Define marking register to avoid this load. |
| lw t6, THREAD_IS_GC_MARKING_OFFSET(xSELF) |
| bnez t6, .Laput_obj_gc_marking |
| #endif // USE_READ_BARRIER |
| lwu a3, MIRROR_OBJECT_CLASS_OFFSET(a0) // Heap reference = 32b; zero-extends to a3. |
| UNPOISON_HEAP_REF a3 |
| lwu a3, MIRROR_CLASS_COMPONENT_TYPE_OFFSET(a3) // Heap reference = 32b; zero-extends to a3. |
| UNPOISON_HEAP_REF a3 |
| lwu a4, MIRROR_OBJECT_CLASS_OFFSET(a2) // Heap reference = 32b; zero-extends to a4. |
| UNPOISON_HEAP_REF a4 |
| #endif // !defined(USE_READ_BARRIER) || defined(USE_BAKER_READ_BARRIER) |
| // value's type == array's component type - trivial assignability |
| bne a3, a4, .Laput_obj_check_assignability |
| .Laput_obj_store: |
| sh2add a3, a1, a0 |
| POISON_HEAP_REF a2 |
| sw a2, MIRROR_OBJECT_ARRAY_DATA_OFFSET(a3) // Heap reference = 32b. |
| ld a3, THREAD_CARD_TABLE_OFFSET(xSELF) |
| srli a0, a0, CARD_TABLE_CARD_SHIFT |
| add a0, a0, a3 |
| sb a3, (a0) |
| ret |
| |
| .Laput_obj_null: |
| sh2add a3, a1, a0 |
| sw a2, MIRROR_OBJECT_ARRAY_DATA_OFFSET(a3) // Heap reference = 32b. |
| ret |
| |
| .Laput_obj_check_assignability: |
| // Store arguments and return register |
| INCREASE_FRAME 32 |
| SAVE_GPR a0, 0*8 |
| SAVE_GPR a1, 1*8 |
| SAVE_GPR a2, 2*8 |
| SAVE_GPR ra, 3*8 |
| |
| // Call runtime code |
| mv a0, a3 // Heap reference, 32b, "uncompress" = do nothing, already zero-extended. |
| mv a1, a4 // Heap reference, 32b, "uncompress" = do nothing, already zero-extended. |
| call artIsAssignableFromCode |
| |
| // Check for exception |
| CFI_REMEMBER_STATE |
| beqz a0, .Laput_obj_throw_array_store_exception |
| |
| // Restore |
| RESTORE_GPR a0, 0*8 |
| RESTORE_GPR a1, 1*8 |
| RESTORE_GPR a2, 2*8 |
| RESTORE_GPR ra, 3*8 |
| DECREASE_FRAME 32 |
| |
| sh2add a3, a1, a0 |
| POISON_HEAP_REF a2 |
| sw a2, MIRROR_OBJECT_ARRAY_DATA_OFFSET(a3) // Heap reference = 32b. |
| ld a3, THREAD_CARD_TABLE_OFFSET(xSELF) |
| srli a0, a0, CARD_TABLE_CARD_SHIFT |
| add a0, a0, a3 |
| sb a3, (a0) |
| ret |
| |
| .Laput_obj_throw_array_store_exception: |
| CFI_RESTORE_STATE_AND_DEF_CFA sp, 32 |
| RESTORE_GPR a0, 0*8 |
| RESTORE_GPR a1, 1*8 |
| RESTORE_GPR a2, 2*8 |
| RESTORE_GPR ra, 3*8 |
| DECREASE_FRAME 32 |
| |
| #if defined(USE_READ_BARRIER) && defined(USE_BAKER_READ_BARRIER) |
| CFI_REMEMBER_STATE |
| #endif // defined(USE_READ_BARRIER) && defined(USE_BAKER_READ_BARRIER) |
| SETUP_SAVE_ALL_CALLEE_SAVES_FRAME |
| mv a1, a2 // Pass value. |
| mv a2, xSELF // Pass Thread::Current(). |
| call artThrowArrayStoreException // (Object*, Object*, Thread*). |
| unimp // Unreachable. |
| |
| #if defined(USE_READ_BARRIER) && defined(USE_BAKER_READ_BARRIER) |
| CFI_RESTORE_STATE_AND_DEF_CFA sp, 0 |
| .Laput_obj_gc_marking: |
| BAKER_RB_CHECK_GRAY_BIT_AND_LOAD \ |
| a3, a0, MIRROR_OBJECT_CLASS_OFFSET, .Laput_obj_mark_array_class |
| .Laput_obj_mark_array_class_continue: |
| BAKER_RB_CHECK_GRAY_BIT_AND_LOAD \ |
| a3, a3, MIRROR_CLASS_COMPONENT_TYPE_OFFSET, .Laput_obj_mark_array_element |
| .Laput_obj_mark_array_element_continue: |
| BAKER_RB_CHECK_GRAY_BIT_AND_LOAD \ |
| a4, a2, MIRROR_OBJECT_CLASS_OFFSET, .Laput_obj_mark_object_class |
| .Laput_obj_mark_object_class_continue: |
| // value's type == array's component type - trivial assignability |
| bne a3, a4, .Laput_obj_check_assignability |
| j .Laput_obj_store |
| |
| .Laput_obj_mark_array_class: |
| BAKER_RB_LOAD_AND_MARK a3, a0, MIRROR_OBJECT_CLASS_OFFSET, art_quick_read_barrier_mark_reg13 |
| j .Laput_obj_mark_array_class_continue |
| |
| .Laput_obj_mark_array_element: |
| BAKER_RB_LOAD_AND_MARK \ |
| a3, a3, MIRROR_CLASS_COMPONENT_TYPE_OFFSET, art_quick_read_barrier_mark_reg13 |
| j .Laput_obj_mark_array_element_continue |
| |
| .Laput_obj_mark_object_class: |
| BAKER_RB_LOAD_AND_MARK a4, a2, MIRROR_OBJECT_CLASS_OFFSET, art_quick_read_barrier_mark_reg14 |
| j .Laput_obj_mark_object_class_continue |
| #endif // defined(USE_READ_BARRIER) && defined(USE_BAKER_READ_BARRIER) |
| END art_quick_aput_obj |
| |
| |
| // Create a function `name` calling the art::ReadBarrier::Mark routine, getting its argument and |
| // returning its result through \reg, saving and restoring all caller-save registers. |
| // |
| // The generated function follows a non-standard calling convention: |
| // - register `reg` is used to pass the singleton argument, |
| // - register `reg` is used to return the result, |
| // - all other registers are callee-save (the values they hold are preserved). |
| .macro READ_BARRIER_MARK_REG name, reg |
| ENTRY \name |
| beqz \reg, .Lrb_return_\name // early return if null |
| |
| // Save t5 and t6 onto stack to honor caller-save calling convention. |
| INCREASE_FRAME 16 |
| SAVE_GPR t5, (8*0) |
| SAVE_GPR t6, (8*1) |
| |
| lw t5, MIRROR_OBJECT_LOCK_WORD_OFFSET(\reg) // t5 := lock word |
| slliw t6, t5, 31-LOCK_WORD_MARK_BIT_SHIFT // mark bit into MSB |
| bltz t6, .Lrb_tmp_restore_\name |
| // Check if the top two bits are set. If so, it is a forwarding address. |
| slliw t6, t5, 1 |
| and t6, t6, t5 |
| CFI_REMEMBER_STATE |
| bgez t6, .Lrb_full_\name |
| // Extract and zero-extend the forwarding address. |
| slli \reg, t5, (LOCK_WORD_STATE_FORWARDING_ADDRESS_SHIFT + 32) |
| srli \reg, \reg, 32 |
| .ifc \reg, t5 |
| sd t5, (8*0)(sp) |
| .endif |
| .ifc \reg, t6 |
| sd t6, (8*1)(sp) |
| .endif |
| .Lrb_tmp_restore_\name: |
| RESTORE_GPR t5, (8*0) |
| RESTORE_GPR t6, (8*1) |
| DECREASE_FRAME 16 |
| .Lrb_return_\name: |
| ret |
| .Lrb_full_\name: |
| CFI_RESTORE_STATE_AND_DEF_CFA sp, 16 |
| // Save remaining caller-save registers on stack. t5 and t6 already saved. |
| // fa0-fa7, ft0-ft11, a0-a7, t0-t4, ra: 8 * (8 + 12 + 8 + 5 + 1) = 8 * 34 = 272 bytes |
| INCREASE_FRAME 272 |
| SAVE_FPR fa0, (8*0) |
| SAVE_FPR fa1, (8*1) |
| SAVE_FPR fa2, (8*2) |
| SAVE_FPR fa3, (8*3) |
| SAVE_FPR fa4, (8*4) |
| SAVE_FPR fa5, (8*5) |
| SAVE_FPR fa6, (8*6) |
| SAVE_FPR fa7, (8*7) |
| SAVE_FPR ft0, (8*8) |
| SAVE_FPR ft1, (8*9) |
| SAVE_FPR ft2, (8*10) |
| SAVE_FPR ft3, (8*11) |
| SAVE_FPR ft4, (8*12) |
| SAVE_FPR ft5, (8*13) |
| SAVE_FPR ft6, (8*14) |
| SAVE_FPR ft7, (8*15) |
| SAVE_FPR ft8, (8*16) |
| SAVE_FPR ft9, (8*17) |
| SAVE_FPR ft10, (8*18) |
| SAVE_FPR ft11, (8*19) |
| |
| SAVE_GPR a0, (8*20) |
| SAVE_GPR a1, (8*21) |
| SAVE_GPR a2, (8*22) |
| SAVE_GPR a3, (8*23) |
| SAVE_GPR a4, (8*24) |
| SAVE_GPR a5, (8*25) |
| SAVE_GPR a6, (8*26) |
| SAVE_GPR a7, (8*27) |
| SAVE_GPR t0, (8*28) |
| SAVE_GPR t1, (8*29) |
| SAVE_GPR t2, (8*30) |
| SAVE_GPR t3, (8*31) |
| SAVE_GPR t4, (8*32) |
| SAVE_GPR ra, (8*33) |
| |
| .ifc \reg, t5 |
| ld a0, (8*34)(sp) |
| .else |
| .ifc \reg, t6 |
| ld a0, (8*35)(sp) |
| .else |
| .ifnc \reg, a0 |
| mv a0, \reg |
| .endif |
| .endif |
| .endif |
| call artReadBarrierMark |
| .ifnc \reg, a0 |
| mv \reg, a0 |
| .endif |
| |
| // Restore all caller-save registers from stack, including t5 and t6. |
| // fa0-fa7, ft0-ft11, ra, a0-a7, t0-t6: 8 * (8 + 12 + 1 + 8 + 7) = 8 * 36 = 288 bytes |
| RESTORE_FPR fa0, (8*0) |
| RESTORE_FPR fa1, (8*1) |
| RESTORE_FPR fa2, (8*2) |
| RESTORE_FPR fa3, (8*3) |
| RESTORE_FPR fa4, (8*4) |
| RESTORE_FPR fa5, (8*5) |
| RESTORE_FPR fa6, (8*6) |
| RESTORE_FPR fa7, (8*7) |
| RESTORE_FPR ft0, (8*8) |
| RESTORE_FPR ft1, (8*9) |
| RESTORE_FPR ft2, (8*10) |
| RESTORE_FPR ft3, (8*11) |
| RESTORE_FPR ft4, (8*12) |
| RESTORE_FPR ft5, (8*13) |
| RESTORE_FPR ft6, (8*14) |
| RESTORE_FPR ft7, (8*15) |
| RESTORE_FPR ft8, (8*16) |
| RESTORE_FPR ft9, (8*17) |
| RESTORE_FPR ft10, (8*18) |
| RESTORE_FPR ft11, (8*19) |
| RESTORE_GPR_NE \reg, a0, (8*20) |
| RESTORE_GPR_NE \reg, a1, (8*21) |
| RESTORE_GPR_NE \reg, a2, (8*22) |
| RESTORE_GPR_NE \reg, a3, (8*23) |
| RESTORE_GPR_NE \reg, a4, (8*24) |
| RESTORE_GPR_NE \reg, a5, (8*25) |
| RESTORE_GPR_NE \reg, a6, (8*26) |
| RESTORE_GPR_NE \reg, a7, (8*27) |
| RESTORE_GPR_NE \reg, t0, (8*28) |
| RESTORE_GPR_NE \reg, t1, (8*29) |
| RESTORE_GPR_NE \reg, t2, (8*30) |
| RESTORE_GPR_NE \reg, t3, (8*31) |
| RESTORE_GPR_NE \reg, t4, (8*32) |
| RESTORE_GPR_NE \reg, ra, (8*33) |
| RESTORE_GPR_NE \reg, t5, (8*34) |
| RESTORE_GPR_NE \reg, t6, (8*35) |
| DECREASE_FRAME 288 |
| ret |
| END \name |
| .endm |
| |
| |
| // No read barrier for X0 (Zero), X1 (RA), X2 (SP), X3 (GP) and X4 (TP). |
| READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg05, t0 |
| READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg06, t1 |
| READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg07, t2 |
| READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg08, s0 |
| // No read barrier for X9 (S1/xSELF). |
| READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg10, a0 |
| READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg11, a1 |
| READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg12, a2 |
| READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg13, a3 |
| READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg14, a4 |
| READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg15, a5 |
| READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg16, a6 |
| READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg17, a7 |
| READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg18, s2 |
| READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg19, s3 |
| READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg20, s4 |
| READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg21, s5 |
| READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg22, s6 |
| READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg23, s7 |
| READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg24, s8 |
| READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg25, s9 |
| READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg26, s10 |
| READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg27, s11 |
| READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg28, t3 |
| READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg29, t4 |
| READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg30, t5 |
| READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg31, t6 |
| |
| |
| .macro N_ARG_DOWNCALL n, name, entrypoint, return |
| .extern \entrypoint |
| ENTRY \name |
| SETUP_SAVE_REFS_ONLY_FRAME // Save callee saves in case of GC. |
| mv a\n, xSELF // Pass Thread::Current(). |
| call \entrypoint // (<n args>, Thread*) |
| RESTORE_SAVE_REFS_ONLY_FRAME |
| \return |
| END \name |
| .endm |
| |
| |
| .macro ONE_ARG_DOWNCALL name, entrypoint, return |
| N_ARG_DOWNCALL 1, \name, \entrypoint, \return |
| .endm |
| |
| |
| .macro TWO_ARG_DOWNCALL name, entrypoint, return |
| N_ARG_DOWNCALL 2, \name, \entrypoint, \return |
| .endm |
| |
| |
| .macro THREE_ARG_DOWNCALL name, entrypoint, return |
| N_ARG_DOWNCALL 3, \name, \entrypoint, \return |
| .endm |
| |
| |
| .macro FOUR_ARG_DOWNCALL name, entrypoint, return |
| N_ARG_DOWNCALL 4, \name, \entrypoint, \return |
| .endm |
| |
| |
| // Entry from managed code that calls artHandleFillArrayDataFromCode and |
| // delivers exception on failure. |
| TWO_ARG_DOWNCALL art_quick_handle_fill_data, \ |
| artHandleFillArrayDataFromCode, \ |
| RETURN_OR_DEOPT_IF_INT_RESULT_IS_ZERO_OR_DELIVER |
| |
| |
| // Generate the allocation entrypoints for each allocator. |
| GENERATE_ALLOC_ENTRYPOINTS_FOR_NON_TLAB_ALLOCATORS |
| // Comment out allocators that have riscv64 specific asm. |
| // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB) |
| // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab, RegionTLAB) |
| GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB) |
| GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_OBJECT(_region_tlab, RegionTLAB) |
| // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_region_tlab, RegionTLAB) |
| // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED8(_region_tlab, RegionTLAB) |
| // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED16(_region_tlab, RegionTLAB) |
| // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED32(_region_tlab, RegionTLAB) |
| // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED64(_region_tlab, RegionTLAB) |
| GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region_tlab, RegionTLAB) |
| GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_tlab, RegionTLAB) |
| GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_tlab, RegionTLAB) |
| |
| // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB) |
| // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab, TLAB) |
| GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_tlab, TLAB) |
| GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_OBJECT(_tlab, TLAB) |
| // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_tlab, TLAB) |
| // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED8(_tlab, TLAB) |
| // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED16(_tlab, TLAB) |
| // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED32(_tlab, TLAB) |
| // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED64(_tlab, TLAB) |
| GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_tlab, TLAB) |
| GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_tlab, TLAB) |
| GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_tlab, TLAB) |
| |
| // If isInitialized=1 then the compiler assumes the object's class has already been initialized. |
| // If isInitialized=0 the compiler can only assume it's been at least resolved. |
| .macro ART_QUICK_ALLOC_OBJECT_ROSALLOC c_name, cxx_name, isInitialized |
| ENTRY \c_name |
| // Fast path rosalloc allocation. |
| // a0: type, xSELF(s1): Thread::Current |
| // a1-a7: free. |
| ld a3, THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET(xSELF) // Check if the thread local |
| // allocation stack has room. |
| // ldp won't work due to large offset. |
| ld a4, THREAD_LOCAL_ALLOC_STACK_END_OFFSET(xSELF) |
| bgeu a3, a4, .Lslow_path\c_name |
| lwu a3, MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET(a0) // Load the object size (a3) |
| li a5, ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE // Check if the size is for a thread |
| // local allocation. |
| // If the class is not yet visibly initialized, or it is finalizable, |
| // the object size will be very large to force the branch below to be taken. |
| // |
| // See Class::SetStatus() in class.cc for more details. |
| bgeu a3, a5, .Lslow_path\c_name |
| // Compute the rosalloc bracket index |
| // from the size. Since the size is |
| // already aligned we can combine the |
| // two shifts together. |
| #if ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT != POINTER_SIZE_SHIFT |
| #error "Unexpected ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT != POINTER_SIZE_SHIFT" |
| #endif |
| // No-op: srli a3, a3, (ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT - POINTER_SIZE_SHIFT) |
| add a4, xSELF, a3 |
| // Subtract pointer size since there |
| // are no runs for 0 byte allocations |
| // and the size is already aligned. |
| ld a4, (THREAD_ROSALLOC_RUNS_OFFSET - __SIZEOF_POINTER__)(a4) |
| // Load the free list head (a3). This |
| // will be the return val. |
| ld a3, (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)(a4) |
| beqz a3, .Lslow_path\c_name |
| // "Point of no slow path". Won't go to the slow path from here on. OK to clobber a0 and a1. |
| ld a1, ROSALLOC_SLOT_NEXT_OFFSET(a3) // Load the next pointer of the head |
| // and update the list head with the |
| // next pointer. |
| sd a1, (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)(a4) |
| // Store the class pointer in the |
| // header. This also overwrites the |
| // next pointer. The offsets are |
| // asserted to match. |
| |
| #if ROSALLOC_SLOT_NEXT_OFFSET != MIRROR_OBJECT_CLASS_OFFSET |
| #error "Class pointer needs to overwrite next pointer." |
| #endif |
| POISON_HEAP_REF a0 |
| sw a0, MIRROR_OBJECT_CLASS_OFFSET(a3) |
| // Push the new object onto the thread |
| // local allocation stack and |
| // increment the thread local |
| // allocation stack top. |
| ld a1, THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET(xSELF) |
| sw a3, (a1) |
| addi a1, a1, COMPRESSED_REFERENCE_SIZE // Increment A1 to point to next slot. |
| sd a1, THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET(xSELF) // Decrement the size of the free list. |
| |
| // After this "SD" the object is published to the thread local allocation stack, |
| // and it will be observable from a runtime internal (eg. Heap::VisitObjects) point of view. |
| // It is not yet visible to the running (user) compiled code until after the return. |
| // |
| // To avoid the memory barrier prior to the "SD", a trick is employed, by differentiating |
| // the state of the allocation stack slot. It can be a pointer to one of: |
| // 0) Null entry, because the stack was bumped but the new pointer wasn't written yet. |
| // (The stack initial state is "null" pointers). |
| // 1) A partially valid object, with an invalid class pointer to the next free rosalloc slot. |
| // 2) A fully valid object, with a valid class pointer pointing to a real class. |
| // Other states are not allowed. |
| // |
| // An object that is invalid only temporarily, and will eventually become valid. |
| // The internal runtime code simply checks if the object is not null or is partial and then |
| // ignores it. |
| // |
| // (Note: The actual check is done by seeing if a non-null object has a class pointer pointing |
| // to ClassClass, and that the ClassClass's class pointer is self-cyclic. A rosalloc free slot |
| // "next" pointer is not-cyclic.) |
| // |
| // See also b/28790624 for a listing of CLs dealing with this race. |
| lwu a1, (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)(a4) |
| addi a1, a1, -1 |
| sw a1, (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)(a4) |
| |
| mv a0, a3 // Set the return value and return. |
| // No barrier. The class is already observably initialized (otherwise the fast |
| // path size check above would fail) and new-instance allocations are protected |
| // from publishing by the compiler which inserts its own StoreStore barrier. |
| ret |
| .Lslow_path\c_name: |
| SETUP_SAVE_REFS_ONLY_FRAME // Save callee saves in case of GC. |
| mv a1, xSELF // Pass Thread::Current(). |
| call \cxx_name |
| RESTORE_SAVE_REFS_ONLY_FRAME |
| RETURN_OR_DEOPT_IF_RESULT_IS_NON_NULL_OR_DELIVER |
| END \c_name |
| .endm |
| |
| ART_QUICK_ALLOC_OBJECT_ROSALLOC art_quick_alloc_object_resolved_rosalloc, \ |
| artAllocObjectFromCodeResolvedRosAlloc, /* isInitialized */ 0 |
| ART_QUICK_ALLOC_OBJECT_ROSALLOC art_quick_alloc_object_initialized_rosalloc, \ |
| artAllocObjectFromCodeInitializedRosAlloc, /* isInitialized */ 1 |
| |
| // If isInitialized=1 then the compiler assumes the object's class has already been initialized. |
| // If isInitialized=0 the compiler can only assume it's been at least resolved. |
| .macro ALLOC_OBJECT_TLAB_FAST_PATH_RESOLVED slowPathLabel isInitialized |
| ld a4, THREAD_LOCAL_POS_OFFSET(xSELF) |
| ld a5, THREAD_LOCAL_END_OFFSET(xSELF) |
| lwu a7, MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET(a0) // Load the object size (a7). |
| add a6, a4, a7 // Add object size to tlab pos. |
| // Check if it fits, overflow works |
| // since the tlab pos and end are 32 |
| // bit values. |
| |
| // If the class is not yet visibly initialized, or it is finalizable, |
| // the object size will be very large to force the branch below to be taken. |
| // |
| // See Class::SetStatus() in class.cc for more details. |
| bgtu a6, a5, \slowPathLabel |
| sd a6, THREAD_LOCAL_POS_OFFSET(xSELF) // Store new thread_local_pos. |
| POISON_HEAP_REF a0 |
| sw a0, MIRROR_OBJECT_CLASS_OFFSET(a4) // Store the class pointer. |
| mv a0, a4 |
| // No barrier. The class is already observably initialized (otherwise the fast |
| // path size check above would fail) and new-instance allocations are protected |
| // from publishing by the compiler which inserts its own StoreStore barrier. |
| ret |
| .endm |
| |
| // The common code for art_quick_alloc_object_*region_tlab |
| // Currently the implementation ignores isInitialized. TODO(b/172087402): clean this up. |
| // Caller must execute a constructor fence after this. |
| .macro GENERATE_ALLOC_OBJECT_RESOLVED_TLAB name, entrypoint, isInitialized |
| ENTRY \name |
| // Fast path region tlab allocation. |
| // a0: type, xSELF(s1): Thread::Current |
| // a1-a7: free. |
| ALLOC_OBJECT_TLAB_FAST_PATH_RESOLVED .Lslow_path\name, \isInitialized |
| .Lslow_path\name: |
| SETUP_SAVE_REFS_ONLY_FRAME // Save callee saves in case of GC. |
| mv a1, xSELF // Pass Thread::Current(). |
| call \entrypoint // (mirror::Class*, Thread*) |
| RESTORE_SAVE_REFS_ONLY_FRAME |
| RETURN_OR_DEOPT_IF_RESULT_IS_NON_NULL_OR_DELIVER |
| END \name |
| .endm |
| |
| GENERATE_ALLOC_OBJECT_RESOLVED_TLAB \ |
| art_quick_alloc_object_resolved_region_tlab, \ |
| artAllocObjectFromCodeResolvedRegionTLAB, /* isInitialized */ 0 |
| GENERATE_ALLOC_OBJECT_RESOLVED_TLAB \ |
| art_quick_alloc_object_initialized_region_tlab, \ |
| artAllocObjectFromCodeInitializedRegionTLAB, /* isInitialized */ 1 |
| GENERATE_ALLOC_OBJECT_RESOLVED_TLAB \ |
| art_quick_alloc_object_resolved_tlab, \ |
| artAllocObjectFromCodeResolvedTLAB, /* isInitialized */ 0 |
| GENERATE_ALLOC_OBJECT_RESOLVED_TLAB \ |
| art_quick_alloc_object_initialized_tlab, \ |
| artAllocObjectFromCodeInitializedTLAB, /* isInitialized */ 1 |
| |
| .macro ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED_WITH_SIZE \ |
| slowPathLabel, class, count, temp0, temp1, temp2 |
| andi \temp1, \temp1, OBJECT_ALIGNMENT_MASK_TOGGLED64 // Apply alignment mask |
| // (addr + 7) & ~7. The mask must |
| // be 64 bits to keep high bits in |
| // case of overflow. |
| // Negative sized arrays are handled here since xCount holds a zero extended 32 bit value. |
| // Negative ints become large 64 bit unsigned ints which will always be larger than max signed |
| // 32 bit int. Since the max shift for arrays is 3, it can not become a negative 64 bit int. |
| li \temp2, MIN_LARGE_OBJECT_THRESHOLD // Possibly a large object, go slow |
| bgeu \temp1, \temp2, \slowPathLabel // path. |
| |
| ld \temp0, THREAD_LOCAL_POS_OFFSET(xSELF) // Check tlab for space, note that |
| // we use (end - begin) to handle |
| // negative size arrays. It is |
| // assumed that a negative size will |
| // always be greater unsigned than |
| // region size. |
| ld \temp2, THREAD_LOCAL_END_OFFSET(xSELF) |
| sub \temp2, \temp2, \temp0 |
| |
| // The array class is always initialized here. Unlike new-instance, |
| // this does not act as a double test. |
| bgtu \temp1, \temp2, \slowPathLabel |
| // "Point of no slow path". Won't go to the slow path from here on. OK to clobber x0 and x1. |
| // Move old thread_local_pos to x0 |
| // for the return value. |
| mv a0, \temp0 |
| add \temp0, \temp0, \temp1 |
| sd \temp0, THREAD_LOCAL_POS_OFFSET(xSELF) // Store new thread_local_pos. |
| POISON_HEAP_REF \class |
| sw \class, MIRROR_OBJECT_CLASS_OFFSET(a0) // Store the class pointer. |
| sw \count, MIRROR_ARRAY_LENGTH_OFFSET(a0) // Store the array length. |
| // new-array is special. The class is loaded and immediately goes to the Initialized state |
| // before it is published. Therefore the only fence needed is for the publication of the object. |
| // See ClassLinker::CreateArrayClass() for more details. |
| |
| // For publication of the new array, we don't need a 'fence w, w' here. |
| // The compiler generates 'fence w, w' for all new-array insts. |
| ret |
| .endm |
| |
| // Caller must execute a constructor fence after this. |
| .macro GENERATE_ALLOC_ARRAY_TLAB name, entrypoint, size_setup |
| ENTRY \name |
| // Fast path array allocation for region tlab allocation. |
| // a0: mirror::Class* type |
| // a1: int32_t component_count |
| // a2-a7: free. |
| mv a3, a0 |
| \size_setup a3, a1, a4, a5, a6 |
| ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED_WITH_SIZE .Lslow_path\name, a3, a1, a4, a5, a6 |
| .Lslow_path\name: |
| // a0: mirror::Class* klass |
| // a1: int32_t component_count |
| // a2: Thread* self |
| SETUP_SAVE_REFS_ONLY_FRAME // Save callee saves in case of GC. |
| mv a2, xSELF // Pass Thread::Current(). |
| call \entrypoint |
| RESTORE_SAVE_REFS_ONLY_FRAME |
| RETURN_OR_DEOPT_IF_RESULT_IS_NON_NULL_OR_DELIVER |
| END \name |
| .endm |
| |
| .macro COMPUTE_ARRAY_SIZE_UNKNOWN class, count, temp0, temp1, temp2 |
| // Array classes are never finalizable or uninitialized, no need to check. |
| lwu \temp0, MIRROR_CLASS_COMPONENT_TYPE_OFFSET(\class) // Load component type |
| UNPOISON_HEAP_REF \temp0 |
| lwu \temp0, MIRROR_CLASS_OBJECT_PRIMITIVE_TYPE_OFFSET(\temp0) |
| srli \temp0, \temp0, PRIMITIVE_TYPE_SIZE_SHIFT_SHIFT // Component size shift is in high 16 |
| // bits. |
| zext.w \temp1, \count // From \count we use a 32 bit value, |
| // it can not overflow. |
| sll \temp1, \temp1, \temp0 // Calculate data size |
| // Add array data offset and alignment. |
| addi \temp1, \temp1, (MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK) |
| #if MIRROR_LONG_ARRAY_DATA_OFFSET != MIRROR_INT_ARRAY_DATA_OFFSET + 4 |
| #error Long array data offset must be 4 greater than int array data offset. |
| #endif |
| |
| addi \temp0, \temp0, 1 // Add 4 to the length only if the |
| // component size shift is 3 |
| // (for 64 bit alignment). |
| andi \temp0, \temp0, 4 |
| add \temp1, \temp1, \temp0 |
| .endm |
| |
| .macro COMPUTE_ARRAY_SIZE_8 class, count, temp0, temp1, temp2 |
| // Add array data offset and alignment adjustment to the `\count`. |
| li \temp1, (MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK) |
| add.uw \temp1, \count, \temp1 |
| .endm |
| |
| .macro COMPUTE_ARRAY_SIZE_16 class, count, temp0, temp1, temp2 |
| // Add array data offset and alignment adjustment to the shifted `\count`. |
| li \temp1, (MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK) |
| sh1add.uw \temp1, \count, \temp1 |
| .endm |
| |
| .macro COMPUTE_ARRAY_SIZE_32 class, count, temp0, temp1, temp2 |
| // Add array data offset and alignment adjustment to the shifted `\count`. |
| li \temp1, (MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK) |
| sh2add.uw \temp1, \count, \temp1 |
| .endm |
| |
| .macro COMPUTE_ARRAY_SIZE_64 class, count, temp0, temp1, temp2 |
| // Add array data offset and alignment adjustment to the shifted `\count`. |
| li \temp1, (MIRROR_WIDE_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK) |
| sh3add.uw \temp1, \count, \temp1 |
| .endm |
| |
| // TODO(ngeoffray): art_quick_alloc_array_resolved_region_tlab is not used for arm64, remove |
| // the entrypoint once all backends have been updated to use the size variants. |
| GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved_region_tlab, \ |
| artAllocArrayFromCodeResolvedRegionTLAB, \ |
| COMPUTE_ARRAY_SIZE_UNKNOWN |
| GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved8_region_tlab, \ |
| artAllocArrayFromCodeResolvedRegionTLAB, \ |
| COMPUTE_ARRAY_SIZE_8 |
| GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved16_region_tlab, \ |
| artAllocArrayFromCodeResolvedRegionTLAB, \ |
| COMPUTE_ARRAY_SIZE_16 |
| GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved32_region_tlab, \ |
| artAllocArrayFromCodeResolvedRegionTLAB, \ |
| COMPUTE_ARRAY_SIZE_32 |
| GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved64_region_tlab, \ |
| artAllocArrayFromCodeResolvedRegionTLAB, \ |
| COMPUTE_ARRAY_SIZE_64 |
| GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved_tlab, \ |
| artAllocArrayFromCodeResolvedTLAB, \ |
| COMPUTE_ARRAY_SIZE_UNKNOWN |
| GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved8_tlab, \ |
| artAllocArrayFromCodeResolvedTLAB, \ |
| COMPUTE_ARRAY_SIZE_8 |
| GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved16_tlab, \ |
| artAllocArrayFromCodeResolvedTLAB, \ |
| COMPUTE_ARRAY_SIZE_16 |
| GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved32_tlab, \ |
| artAllocArrayFromCodeResolvedTLAB, \ |
| COMPUTE_ARRAY_SIZE_32 |
| GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved64_tlab, \ |
| artAllocArrayFromCodeResolvedTLAB, \ |
| COMPUTE_ARRAY_SIZE_64 |
| |
| |
| GENERATE_FIELD_ENTRYPOINTS |
| |
| |
| // String's indexOf. |
| // |
| // TODO: Not very optimized. We should use the vector extension. |
| // On entry: |
| // a0: string object (known non-null) |
| // a1: char to match (known <= 0xFFFF) |
| // a2: Starting offset in string data |
| ENTRY art_quick_indexof |
| #if (STRING_COMPRESSION_FEATURE) |
| lwu a4, MIRROR_STRING_COUNT_OFFSET(a0) |
| #else |
| lwu a3, MIRROR_STRING_COUNT_OFFSET(a0) |
| #endif |
| addi a0, a0, MIRROR_STRING_VALUE_OFFSET |
| #if (STRING_COMPRESSION_FEATURE) |
| /* Split the count into length (a3) and compression flag (a4) */ |
| srliw a3, a4, 1 |
| andi a4, a4, 1 |
| #endif |
| /* Clamp start to [0..count) */ |
| sraiw a5, a2, 31 |
| andn a2, a2, a5 |
| bge a2, a3, .Lstring_indexof_nomatch |
| |
| #if (STRING_COMPRESSION_FEATURE) |
| beqz a4, .Lstring_indexof_compressed |
| #endif |
| /* Build pointers to start and end of the data to compare */ |
| sh1add a2, a2, a0 |
| sh1add a3, a3, a0 |
| |
| /* |
| * At this point we have: |
| * a0: original start of string data |
| * a1: char to compare |
| * a2: start of the data to test |
| * a3: end of the data to test |
| */ |
| |
| .Lstring_indexof_loop: |
| lhu a4, 0(a2) |
| beq a4, a1, .Lstring_indexof_match |
| addi a2, a2, 2 |
| bne a2, a3, .Lstring_indexof_loop |
| .Lstring_indexof_nomatch: |
| li a0, -1 |
| ret |
| .Lstring_indexof_match: |
| sub a0, a2, a0 |
| srli a0, a0, 1 |
| ret |
| |
| #if (STRING_COMPRESSION_FEATURE) |
| // Comparing compressed string one character at a time with the input character. |
| .Lstring_indexof_compressed: |
| add a2, a2, a0 |
| add a3, a3, a0 |
| .Lstring_indexof_compressed_loop: |
| lbu a4, (a2) |
| beq a4, a1, .Lstring_indexof_compressed_match |
| addi a2, a2, 1 |
| bne a2, a3, .Lstring_indexof_compressed_loop |
| li a0, -1 |
| ret |
| .Lstring_indexof_compressed_match: |
| sub a0, a2, a0 |
| ret |
| #endif |
| END art_quick_indexof |