blob: 3c2445ce2ce6e4d71367b0f209d6949c0426e738 [file] [log] [blame]
/*
* Copyright (C) 2014 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "asm_support_arm64.S"
#include "interpreter/cfi_asm_support.h"
#include "arch/quick_alloc_entrypoints.S"
#include "arch/quick_field_entrypoints.S"
.macro SAVE_REG_INCREASE_FRAME reg, frame_adjustment
str \reg, [sp, #-(\frame_adjustment)]!
.cfi_adjust_cfa_offset (\frame_adjustment)
.cfi_rel_offset \reg, 0
.endm
.macro RESTORE_REG_DECREASE_FRAME reg, frame_adjustment
ldr \reg, [sp], #(\frame_adjustment)
.cfi_restore \reg
.cfi_adjust_cfa_offset -(\frame_adjustment)
.endm
.macro SAVE_TWO_REGS_INCREASE_FRAME reg1, reg2, frame_adjustment
stp \reg1, \reg2, [sp, #-(\frame_adjustment)]!
.cfi_adjust_cfa_offset (\frame_adjustment)
.cfi_rel_offset \reg1, 0
.cfi_rel_offset \reg2, 8
.endm
.macro RESTORE_TWO_REGS_DECREASE_FRAME reg1, reg2, frame_adjustment
ldp \reg1, \reg2, [sp], #(\frame_adjustment)
.cfi_restore \reg1
.cfi_restore \reg2
.cfi_adjust_cfa_offset -(\frame_adjustment)
.endm
.macro POP_SAVE_REFS_ONLY_FRAME
DECREASE_FRAME 96
.endm
/*
* Macro that sets up the callee save frame to conform with
* Runtime::CreateCalleeSaveMethod(kSaveRefsAndArgs).
*
* TODO This is probably too conservative - saving FP & LR.
*/
.macro SETUP_SAVE_REFS_AND_ARGS_FRAME
// art::Runtime* xIP0 = art::Runtime::instance_;
// Our registers aren't intermixed - just spill in order.
LOAD_RUNTIME_INSTANCE xIP0
// ArtMethod* xIP0 = Runtime::instance_->callee_save_methods_[kSaveRefAndArgs];
ldr xIP0, [xIP0, RUNTIME_SAVE_REFS_AND_ARGS_METHOD_OFFSET]
INCREASE_FRAME FRAME_SIZE_SAVE_REFS_AND_ARGS
SETUP_SAVE_REFS_AND_ARGS_FRAME_INTERNAL sp
str xIP0, [sp] // Store ArtMethod* Runtime::callee_save_methods_[kSaveRefsAndArgs].
// Place sp in Thread::Current()->top_quick_frame.
mov xIP0, sp
str xIP0, [xSELF, # THREAD_TOP_QUICK_FRAME_OFFSET]
.endm
.macro SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_X0
INCREASE_FRAME FRAME_SIZE_SAVE_REFS_AND_ARGS
SETUP_SAVE_REFS_AND_ARGS_FRAME_INTERNAL sp
str x0, [sp, #0] // Store ArtMethod* to bottom of stack.
// Place sp in Thread::Current()->top_quick_frame.
mov xIP0, sp
str xIP0, [xSELF, # THREAD_TOP_QUICK_FRAME_OFFSET]
.endm
/*
* Macro that sets up the callee save frame to conform with
* Runtime::CreateCalleeSaveMethod(kSaveEverything)
* when the SP has already been decremented by FRAME_SIZE_SAVE_EVERYTHING
* and saving registers x29 and LR is handled elsewhere.
*/
.macro SETUP_SAVE_EVERYTHING_FRAME_DECREMENTED_SP_SKIP_X29_LR \
runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET
// Ugly compile-time check, but we only have the preprocessor.
#if (FRAME_SIZE_SAVE_EVERYTHING != 512)
#error "FRAME_SIZE_SAVE_EVERYTHING(ARM64) size not as expected."
#endif
// Save FP registers.
stp d0, d1, [sp, #16]
stp d2, d3, [sp, #32]
stp d4, d5, [sp, #48]
stp d6, d7, [sp, #64]
stp d8, d9, [sp, #80]
stp d10, d11, [sp, #96]
stp d12, d13, [sp, #112]
stp d14, d15, [sp, #128]
stp d16, d17, [sp, #144]
stp d18, d19, [sp, #160]
stp d20, d21, [sp, #176]
stp d22, d23, [sp, #192]
stp d24, d25, [sp, #208]
stp d26, d27, [sp, #224]
stp d28, d29, [sp, #240]
stp d30, d31, [sp, #256]
// Save core registers.
SAVE_TWO_REGS x0, x1, 272
SAVE_TWO_REGS x2, x3, 288
SAVE_TWO_REGS x4, x5, 304
SAVE_TWO_REGS x6, x7, 320
SAVE_TWO_REGS x8, x9, 336
SAVE_TWO_REGS x10, x11, 352
SAVE_TWO_REGS x12, x13, 368
SAVE_TWO_REGS x14, x15, 384
SAVE_TWO_REGS x16, x17, 400 // Do not save the platform register.
SAVE_TWO_REGS x19, x20, 416
SAVE_TWO_REGS x21, x22, 432
SAVE_TWO_REGS x23, x24, 448
SAVE_TWO_REGS x25, x26, 464
SAVE_TWO_REGS x27, x28, 480
// art::Runtime* xIP0 = art::Runtime::instance_;
LOAD_RUNTIME_INSTANCE xIP0
// ArtMethod* xIP0 = Runtime::instance_->callee_save_methods_[kSaveEverything];
ldr xIP0, [xIP0, \runtime_method_offset]
// Store ArtMethod* Runtime::callee_save_methods_[kSaveEverything].
str xIP0, [sp]
// Place sp in Thread::Current()->top_quick_frame.
mov xIP0, sp
str xIP0, [xSELF, # THREAD_TOP_QUICK_FRAME_OFFSET]
.endm
/*
* Macro that sets up the callee save frame to conform with
* Runtime::CreateCalleeSaveMethod(kSaveEverything)
*/
.macro SETUP_SAVE_EVERYTHING_FRAME runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET
INCREASE_FRAME 512
SAVE_TWO_REGS x29, xLR, 496
SETUP_SAVE_EVERYTHING_FRAME_DECREMENTED_SP_SKIP_X29_LR \runtime_method_offset
.endm
.macro RESTORE_SAVE_EVERYTHING_FRAME_KEEP_X0
// Restore FP registers.
ldp d0, d1, [sp, #16]
ldp d2, d3, [sp, #32]
ldp d4, d5, [sp, #48]
ldp d6, d7, [sp, #64]
ldp d8, d9, [sp, #80]
ldp d10, d11, [sp, #96]
ldp d12, d13, [sp, #112]
ldp d14, d15, [sp, #128]
ldp d16, d17, [sp, #144]
ldp d18, d19, [sp, #160]
ldp d20, d21, [sp, #176]
ldp d22, d23, [sp, #192]
ldp d24, d25, [sp, #208]
ldp d26, d27, [sp, #224]
ldp d28, d29, [sp, #240]
ldp d30, d31, [sp, #256]
// Restore core registers, except x0.
RESTORE_REG x1, 280
RESTORE_TWO_REGS x2, x3, 288
RESTORE_TWO_REGS x4, x5, 304
RESTORE_TWO_REGS x6, x7, 320
RESTORE_TWO_REGS x8, x9, 336
RESTORE_TWO_REGS x10, x11, 352
RESTORE_TWO_REGS x12, x13, 368
RESTORE_TWO_REGS x14, x15, 384
RESTORE_TWO_REGS x16, x17, 400 // Do not restore the platform register.
RESTORE_TWO_REGS x19, x20, 416
RESTORE_TWO_REGS x21, x22, 432
RESTORE_TWO_REGS x23, x24, 448
RESTORE_TWO_REGS x25, x26, 464
RESTORE_TWO_REGS x27, x28, 480
RESTORE_TWO_REGS x29, xLR, 496
DECREASE_FRAME 512
.endm
.macro RESTORE_SAVE_EVERYTHING_FRAME
RESTORE_REG x0, 272
RESTORE_SAVE_EVERYTHING_FRAME_KEEP_X0
.endm
.macro RETURN_OR_DEOPT_OR_DELIVER_PENDING_EXCEPTION is_ref = 0
ldr x1, [xSELF, # THREAD_EXCEPTION_OFFSET] // Get exception field.
cbnz x1, 1f
DEOPT_OR_RETURN x1, \is_ref // Check if deopt is required
1: // deliver exception on current thread
DELIVER_PENDING_EXCEPTION
.endm
.macro RETURN_REF_OR_DEOPT_OR_DELIVER_PENDING_EXCEPTION
RETURN_OR_DEOPT_OR_DELIVER_PENDING_EXCEPTION /* is_ref= */ 1
.endm
.macro DEOPT_OR_RETURN temp, is_ref = 0
ldr \temp, [xSELF, #THREAD_DEOPT_CHECK_REQUIRED_OFFSET]
cbnz \temp, 2f
ret
2:
SETUP_SAVE_EVERYTHING_FRAME
mov x2, \is_ref // pass if result is a reference
mov x1, x0 // pass the result
mov x0, xSELF // Thread::Current
bl artDeoptimizeIfNeeded
RESTORE_SAVE_EVERYTHING_FRAME
REFRESH_MARKING_REGISTER
ret
.endm
.macro DEOPT_OR_RESTORE_SAVE_EVERYTHING_FRAME_AND_RETURN_X0 temp, is_ref
ldr \temp, [xSELF, #THREAD_DEOPT_CHECK_REQUIRED_OFFSET]
CFI_REMEMBER_STATE
cbnz \temp, 2f
RESTORE_SAVE_EVERYTHING_FRAME_KEEP_X0
REFRESH_MARKING_REGISTER
ret
2:
CFI_RESTORE_STATE_AND_DEF_CFA sp, FRAME_SIZE_SAVE_EVERYTHING
str x0, [sp, #SAVE_EVERYTHING_FRAME_X0_OFFSET] // update result in the frame
mov x2, \is_ref // pass if result is a reference
mov x1, x0 // pass the result
mov x0, xSELF // Thread::Current
bl artDeoptimizeIfNeeded
CFI_REMEMBER_STATE
RESTORE_SAVE_EVERYTHING_FRAME
REFRESH_MARKING_REGISTER
ret
CFI_RESTORE_STATE_AND_DEF_CFA sp, FRAME_SIZE_SAVE_EVERYTHING
.endm
.macro RETURN_OR_DEOPT_IF_INT_RESULT_IS_ZERO_OR_DELIVER
cbnz w0, 1f // result non-zero branch over
DEOPT_OR_RETURN x1
1:
DELIVER_PENDING_EXCEPTION
.endm
.macro NO_ARG_RUNTIME_EXCEPTION c_name, cxx_name
.extern \cxx_name
ENTRY \c_name
SETUP_SAVE_ALL_CALLEE_SAVES_FRAME // save all registers as basis for long jump context
mov x0, xSELF // pass Thread::Current
bl \cxx_name // \cxx_name(Thread*)
brk 0
END \c_name
.endm
.macro NO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING c_name, cxx_name
.extern \cxx_name
ENTRY \c_name
SETUP_SAVE_EVERYTHING_FRAME // save all registers as basis for long jump context
mov x0, xSELF // pass Thread::Current
bl \cxx_name // \cxx_name(Thread*)
brk 0
END \c_name
.endm
.macro ONE_ARG_RUNTIME_EXCEPTION c_name, cxx_name
.extern \cxx_name
ENTRY \c_name
SETUP_SAVE_ALL_CALLEE_SAVES_FRAME // save all registers as basis for long jump context.
mov x1, xSELF // pass Thread::Current.
bl \cxx_name // \cxx_name(arg, Thread*).
brk 0
END \c_name
.endm
.macro TWO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING c_name, cxx_name
.extern \cxx_name
ENTRY \c_name
SETUP_SAVE_EVERYTHING_FRAME // save all registers as basis for long jump context
mov x2, xSELF // pass Thread::Current
bl \cxx_name // \cxx_name(arg1, arg2, Thread*)
brk 0
END \c_name
.endm
/*
* Called by managed code, saves callee saves and then calls artThrowException
* that will place a mock Method* at the bottom of the stack. Arg1 holds the exception.
*/
ONE_ARG_RUNTIME_EXCEPTION art_quick_deliver_exception, artDeliverExceptionFromCode
/*
* Called by managed code to create and deliver a NullPointerException.
*/
NO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING \
art_quick_throw_null_pointer_exception, artThrowNullPointerExceptionFromCode
/*
* Call installed by a signal handler to create and deliver a NullPointerException.
*/
.extern art_quick_throw_null_pointer_exception_from_signal
ENTRY art_quick_throw_null_pointer_exception_from_signal
// The fault handler pushes the gc map address, i.e. "return address", to stack
// and passes the fault address in LR. So we need to set up the CFI info accordingly.
.cfi_def_cfa_offset __SIZEOF_POINTER__
.cfi_rel_offset lr, 0
// Save all registers as basis for long jump context.
INCREASE_FRAME (FRAME_SIZE_SAVE_EVERYTHING - __SIZEOF_POINTER__)
SAVE_REG x29, (FRAME_SIZE_SAVE_EVERYTHING - 2 * __SIZEOF_POINTER__) // LR already saved.
SETUP_SAVE_EVERYTHING_FRAME_DECREMENTED_SP_SKIP_X29_LR
mov x0, lr // pass the fault address stored in LR by the fault handler.
mov x1, xSELF // pass Thread::Current.
bl artThrowNullPointerExceptionFromSignal // (arg, Thread*).
brk 0
END art_quick_throw_null_pointer_exception_from_signal
/*
* Called by managed code to create and deliver an ArithmeticException.
*/
NO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_div_zero, artThrowDivZeroFromCode
/*
* Called by managed code to create and deliver an ArrayIndexOutOfBoundsException. Arg1 holds
* index, arg2 holds limit.
*/
TWO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_array_bounds, artThrowArrayBoundsFromCode
/*
* Called by managed code to create and deliver a StringIndexOutOfBoundsException
* as if thrown from a call to String.charAt(). Arg1 holds index, arg2 holds limit.
*/
TWO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING \
art_quick_throw_string_bounds, artThrowStringBoundsFromCode
/*
* Called by managed code to create and deliver a StackOverflowError.
*/
NO_ARG_RUNTIME_EXCEPTION art_quick_throw_stack_overflow, artThrowStackOverflowFromCode
/*
* All generated callsites for interface invokes and invocation slow paths will load arguments
* as usual - except instead of loading arg0/x0 with the target Method*, arg0/x0 will contain
* the method_idx. This wrapper will save arg1-arg3, and call the appropriate C helper.
* NOTE: "this" is first visible argument of the target, and so can be found in arg1/x1.
*
* The helper will attempt to locate the target and return a 128-bit result in x0/x1 consisting
* of the target Method* in x0 and method->code_ in x1.
*
* If unsuccessful, the helper will return null/????. There will be a pending exception in the
* thread and we branch to another stub to deliver it.
*
* On success this wrapper will restore arguments and *jump* to the target, leaving the lr
* pointing back to the original caller.
*
* Adapted from ARM32 code.
*
* Clobbers xIP0.
*/
.macro INVOKE_TRAMPOLINE_BODY cxx_name
.extern \cxx_name
SETUP_SAVE_REFS_AND_ARGS_FRAME // save callee saves in case allocation triggers GC
// Helper signature is always
// (method_idx, *this_object, *caller_method, *self, sp)
mov x2, xSELF // pass Thread::Current
mov x3, sp
bl \cxx_name // (method_idx, this, Thread*, SP)
mov xIP0, x1 // save Method*->code_
RESTORE_SAVE_REFS_AND_ARGS_FRAME
REFRESH_MARKING_REGISTER
cbz x0, 1f // did we find the target? if not go to exception delivery
br xIP0 // tail call to target
1:
DELIVER_PENDING_EXCEPTION
.endm
.macro INVOKE_TRAMPOLINE c_name, cxx_name
ENTRY \c_name
INVOKE_TRAMPOLINE_BODY \cxx_name
END \c_name
.endm
INVOKE_TRAMPOLINE art_quick_invoke_interface_trampoline_with_access_check, \
artInvokeInterfaceTrampolineWithAccessCheck
INVOKE_TRAMPOLINE art_quick_invoke_static_trampoline_with_access_check, \
artInvokeStaticTrampolineWithAccessCheck
INVOKE_TRAMPOLINE art_quick_invoke_direct_trampoline_with_access_check, \
artInvokeDirectTrampolineWithAccessCheck
INVOKE_TRAMPOLINE art_quick_invoke_super_trampoline_with_access_check, \
artInvokeSuperTrampolineWithAccessCheck
INVOKE_TRAMPOLINE art_quick_invoke_virtual_trampoline_with_access_check, \
artInvokeVirtualTrampolineWithAccessCheck
.macro INVOKE_STUB_CREATE_FRAME
SAVE_SIZE=8*8 // x4, x5, <padding>, x19, x20, x21, FP, LR saved.
SAVE_TWO_REGS_INCREASE_FRAME x4, x5, SAVE_SIZE
SAVE_REG x19, 24
SAVE_TWO_REGS x20, x21, 32
SAVE_TWO_REGS xFP, xLR, 48
mov xFP, sp // Use xFP for frame pointer, as it's callee-saved.
.cfi_def_cfa_register xFP
add x10, x2, #(__SIZEOF_POINTER__ + 0xf) // Reserve space for ArtMethod*, arguments and
and x10, x10, # ~0xf // round up for 16-byte stack alignment.
sub sp, sp, x10 // Adjust SP for ArtMethod*, args and alignment padding.
mov xSELF, x3 // Move thread pointer into SELF register.
// Copy arguments into stack frame.
// Use simple copy routine for now.
// 4 bytes per slot.
// X1 - source address
// W2 - args length
// X9 - destination address.
// W10 - temporary
add x9, sp, #8 // Destination address is bottom of stack + null.
// Copy parameters into the stack. Use numeric label as this is a macro and Clang's assembler
// does not have unique-id variables.
cbz w2, 2f
1:
sub w2, w2, #4 // Need 65536 bytes of range.
ldr w10, [x1, x2]
str w10, [x9, x2]
cbnz w2, 1b
2:
// Store null into ArtMethod* at bottom of frame.
str xzr, [sp]
.endm
.macro INVOKE_STUB_CALL_AND_RETURN
REFRESH_MARKING_REGISTER
REFRESH_SUSPEND_CHECK_REGISTER
// load method-> METHOD_QUICK_CODE_OFFSET
ldr x9, [x0, #ART_METHOD_QUICK_CODE_OFFSET_64]
// Branch to method.
blr x9
// Pop the ArtMethod* (null), arguments and alignment padding from the stack.
mov sp, xFP
.cfi_def_cfa_register sp
// Restore saved registers including value address and shorty address.
RESTORE_REG x19, 24
RESTORE_TWO_REGS x20, x21, 32
RESTORE_TWO_REGS xFP, xLR, 48
RESTORE_TWO_REGS_DECREASE_FRAME x4, x5, SAVE_SIZE
// Store result (w0/x0/s0/d0) appropriately, depending on resultType.
ldrb w10, [x5]
// Check the return type and store the correct register into the jvalue in memory.
// Use numeric label as this is a macro and Clang's assembler does not have unique-id variables.
// Don't set anything for a void type.
cmp w10, #'V'
beq 1f
// Is it a double?
cmp w10, #'D'
beq 2f
// Is it a float?
cmp w10, #'F'
beq 3f
// Just store x0. Doesn't matter if it is 64 or 32 bits.
str x0, [x4]
1: // Finish up.
ret
2: // Store double.
str d0, [x4]
ret
3: // Store float.
str s0, [x4]
ret
.endm
// Macro for loading an argument into a register.
// label - the base name of the label of the load routine,
// reg - the register to load,
// args - pointer to current argument, incremented by size,
// size - the size of the register - 4 or 8 bytes,
// nh4_reg - the register to fill with the address of the next handler for 4-byte values,
// nh4_l - the base name of the label of the next handler for 4-byte values,
// nh8_reg - the register to fill with the address of the next handler for 8-byte values,
// nh8_l - the base name of the label of the next handler for 8-byte values,
// cont - the base name of the label for continuing the shorty processing loop,
// suffix - suffix added to all labels to make labels unique for different users.
.macro INVOKE_STUB_LOAD_REG label, reg, args, size, nh4_reg, nh4_l, nh8_reg, nh8_l, cont, suffix
\label\suffix:
ldr \reg, [\args], #\size
adr \nh4_reg, \nh4_l\suffix
adr \nh8_reg, \nh8_l\suffix
b \cont\suffix
.endm
// Macro for skipping an argument that does not fit into argument registers.
// label - the base name of the label of the skip routine,
// args - pointer to current argument, incremented by size,
// size - the size of the argument - 4 or 8 bytes,
// cont - the base name of the label for continuing the shorty processing loop,
// suffix - suffix added to all labels to make labels unique for different users.
.macro INVOKE_STUB_SKIP_ARG label, args, size, cont, suffix
\label\suffix:
add \args, \args, #\size
b \cont\suffix
.endm
// Fill registers x/w1 to x/w7 and s/d0 to s/d7 with parameters.
// Parse the passed shorty to determine which register to load.
// x5 - shorty,
// x9 - points to arguments on the stack,
// suffix - suffix added to all labels to make labels unique for different users.
.macro INVOKE_STUB_LOAD_ALL_ARGS suffix
add x10, x5, #1 // Load shorty address, plus one to skip the return type.
// Load this (if instance method) and addresses for routines that load WXSD registers.
.ifc \suffix, _instance
ldr w1, [x9], #4 // Load "this" parameter, and increment arg pointer.
adr x11, .Lload_w2\suffix
adr x12, .Lload_x2\suffix
.else
adr x11, .Lload_w1\suffix
adr x12, .Lload_x1\suffix
.endif
adr x13, .Lload_s0\suffix
adr x14, .Lload_d0\suffix
// Loop to fill registers.
.Lfill_regs\suffix:
ldrb w17, [x10], #1 // Load next character in signature, and increment.
cbz w17, .Lcall_method\suffix // Exit at end of signature. Shorty 0 terminated.
cmp w17, #'J' // Is this a long?
beq .Lload_long\suffix
cmp w17, #'F' // Is this a float?
beq .Lload_float\suffix
cmp w17, #'D' // Is this a double?
beq .Lload_double\suffix
// Everything else uses a 4-byte GPR.
br x11
.Lload_long\suffix:
br x12
.Lload_float\suffix:
br x13
.Lload_double\suffix:
br x14
// Handlers for loading other args (not float/double/long) into W registers.
.ifnc \suffix, _instance
INVOKE_STUB_LOAD_REG \
.Lload_w1, w1, x9, 4, x11, .Lload_w2, x12, .Lload_x2, .Lfill_regs, \suffix
.endif
INVOKE_STUB_LOAD_REG .Lload_w2, w2, x9, 4, x11, .Lload_w3, x12, .Lload_x3, .Lfill_regs, \suffix
INVOKE_STUB_LOAD_REG .Lload_w3, w3, x9, 4, x11, .Lload_w4, x12, .Lload_x4, .Lfill_regs, \suffix
INVOKE_STUB_LOAD_REG .Lload_w4, w4, x9, 4, x11, .Lload_w5, x12, .Lload_x5, .Lfill_regs, \suffix
INVOKE_STUB_LOAD_REG .Lload_w5, w5, x9, 4, x11, .Lload_w6, x12, .Lload_x6, .Lfill_regs, \suffix
INVOKE_STUB_LOAD_REG .Lload_w6, w6, x9, 4, x11, .Lload_w7, x12, .Lload_x7, .Lfill_regs, \suffix
INVOKE_STUB_LOAD_REG .Lload_w7, w7, x9, 4, x11, .Lskip4, x12, .Lskip8, .Lfill_regs, \suffix
// Handlers for loading longs into X registers.
.ifnc \suffix, _instance
INVOKE_STUB_LOAD_REG \
.Lload_x1, x1, x9, 8, x11, .Lload_w2, x12, .Lload_x2, .Lfill_regs, \suffix
.endif
INVOKE_STUB_LOAD_REG .Lload_x2, x2, x9, 8, x11, .Lload_w3, x12, .Lload_x3, .Lfill_regs, \suffix
INVOKE_STUB_LOAD_REG .Lload_x3, x3, x9, 8, x11, .Lload_w4, x12, .Lload_x4, .Lfill_regs, \suffix
INVOKE_STUB_LOAD_REG .Lload_x4, x4, x9, 8, x11, .Lload_w5, x12, .Lload_x5, .Lfill_regs, \suffix
INVOKE_STUB_LOAD_REG .Lload_x5, x5, x9, 8, x11, .Lload_w6, x12, .Lload_x6, .Lfill_regs, \suffix
INVOKE_STUB_LOAD_REG .Lload_x6, x6, x9, 8, x11, .Lload_w7, x12, .Lload_x7, .Lfill_regs, \suffix
INVOKE_STUB_LOAD_REG .Lload_x7, x7, x9, 8, x11, .Lskip4, x12, .Lskip8, .Lfill_regs, \suffix
// Handlers for loading singles into S registers.
INVOKE_STUB_LOAD_REG .Lload_s0, s0, x9, 4, x13, .Lload_s1, x14, .Lload_d1, .Lfill_regs, \suffix
INVOKE_STUB_LOAD_REG .Lload_s1, s1, x9, 4, x13, .Lload_s2, x14, .Lload_d2, .Lfill_regs, \suffix
INVOKE_STUB_LOAD_REG .Lload_s2, s2, x9, 4, x13, .Lload_s3, x14, .Lload_d3, .Lfill_regs, \suffix
INVOKE_STUB_LOAD_REG .Lload_s3, s3, x9, 4, x13, .Lload_s4, x14, .Lload_d4, .Lfill_regs, \suffix
INVOKE_STUB_LOAD_REG .Lload_s4, s4, x9, 4, x13, .Lload_s5, x14, .Lload_d5, .Lfill_regs, \suffix
INVOKE_STUB_LOAD_REG .Lload_s5, s5, x9, 4, x13, .Lload_s6, x14, .Lload_d6, .Lfill_regs, \suffix
INVOKE_STUB_LOAD_REG .Lload_s6, s6, x9, 4, x13, .Lload_s7, x14, .Lload_d7, .Lfill_regs, \suffix
INVOKE_STUB_LOAD_REG .Lload_s7, s7, x9, 4, x13, .Lskip4, x14, .Lskip8, .Lfill_regs, \suffix
// Handlers for loading doubles into D registers.
INVOKE_STUB_LOAD_REG .Lload_d0, d0, x9, 8, x13, .Lload_s1, x14, .Lload_d1, .Lfill_regs, \suffix
INVOKE_STUB_LOAD_REG .Lload_d1, d1, x9, 8, x13, .Lload_s2, x14, .Lload_d2, .Lfill_regs, \suffix
INVOKE_STUB_LOAD_REG .Lload_d2, d2, x9, 8, x13, .Lload_s3, x14, .Lload_d3, .Lfill_regs, \suffix
INVOKE_STUB_LOAD_REG .Lload_d3, d3, x9, 8, x13, .Lload_s4, x14, .Lload_d4, .Lfill_regs, \suffix
INVOKE_STUB_LOAD_REG .Lload_d4, d4, x9, 8, x13, .Lload_s5, x14, .Lload_d5, .Lfill_regs, \suffix
INVOKE_STUB_LOAD_REG .Lload_d5, d5, x9, 8, x13, .Lload_s6, x14, .Lload_d6, .Lfill_regs, \suffix
INVOKE_STUB_LOAD_REG .Lload_d6, d6, x9, 8, x13, .Lload_s7, x14, .Lload_d7, .Lfill_regs, \suffix
INVOKE_STUB_LOAD_REG .Lload_d7, d7, x9, 8, x13, .Lskip4, x14, .Lskip8, .Lfill_regs, \suffix
// Handlers for skipping arguments that do not fit into registers.
INVOKE_STUB_SKIP_ARG .Lskip4, x9, 4, .Lfill_regs, \suffix
INVOKE_STUB_SKIP_ARG .Lskip8, x9, 8, .Lfill_regs, \suffix
.Lcall_method\suffix:
.endm
/*
* extern"C" void art_quick_invoke_stub(ArtMethod *method, x0
* uint32_t *args, x1
* uint32_t argsize, w2
* Thread *self, x3
* JValue *result, x4
* char *shorty); x5
* +----------------------+
* | |
* | C/C++ frame |
* | LR'' |
* | FP'' | <- SP'
* +----------------------+
* +----------------------+
* | x28 | <- TODO: Remove callee-saves.
* | : |
* | x19 |
* | SP' |
* | X5 |
* | X4 | Saved registers
* | LR' |
* | FP' | <- FP
* +----------------------+
* | uint32_t out[n-1] |
* | : : | Outs
* | uint32_t out[0] |
* | ArtMethod* | <- SP value=null
* +----------------------+
*
* Outgoing registers:
* x0 - Method*
* x1-x7 - integer parameters.
* d0-d7 - Floating point parameters.
* xSELF = self
* SP = & of ArtMethod*
* x1 = "this" pointer.
*
*/
ENTRY art_quick_invoke_stub
// Spill registers as per AACPS64 calling convention.
INVOKE_STUB_CREATE_FRAME
// Load args into registers.
INVOKE_STUB_LOAD_ALL_ARGS _instance
// Call the method and return.
INVOKE_STUB_CALL_AND_RETURN
END art_quick_invoke_stub
/* extern"C"
* void art_quick_invoke_static_stub(ArtMethod *method, x0
* uint32_t *args, x1
* uint32_t argsize, w2
* Thread *self, x3
* JValue *result, x4
* char *shorty); x5
*/
ENTRY art_quick_invoke_static_stub
// Spill registers as per AACPS64 calling convention.
INVOKE_STUB_CREATE_FRAME
// Load args into registers.
INVOKE_STUB_LOAD_ALL_ARGS _static
// Call the method and return.
INVOKE_STUB_CALL_AND_RETURN
END art_quick_invoke_static_stub
/* extern"C" void art_quick_osr_stub(void** stack, x0
* size_t stack_size_in_bytes, x1
* const uint8_t* native_pc, x2
* JValue *result, x3
* char *shorty, x4
* Thread *self) x5
*/
ENTRY art_quick_osr_stub
SAVE_SIZE=22*8
SAVE_TWO_REGS_INCREASE_FRAME x3, x4, SAVE_SIZE
SAVE_TWO_REGS x19, x20, 16
SAVE_TWO_REGS x21, x22, 32
SAVE_TWO_REGS x23, x24, 48
SAVE_TWO_REGS x25, x26, 64
SAVE_TWO_REGS x27, x28, 80
SAVE_TWO_REGS xFP, xLR, 96
stp d8, d9, [sp, #112]
stp d10, d11, [sp, #128]
stp d12, d13, [sp, #144]
stp d14, d15, [sp, #160]
mov xSELF, x5 // Move thread pointer into SELF register.
REFRESH_MARKING_REGISTER
REFRESH_SUSPEND_CHECK_REGISTER
INCREASE_FRAME 16
str xzr, [sp] // Store null for ArtMethod* slot
// Branch to stub.
CFI_REMEMBER_STATE
bl .Losr_entry
DECREASE_FRAME 16
// Restore saved registers including value address and shorty address.
ldp d8, d9, [sp, #112]
ldp d10, d11, [sp, #128]
ldp d12, d13, [sp, #144]
ldp d14, d15, [sp, #160]
RESTORE_TWO_REGS x19, x20, 16
RESTORE_TWO_REGS x21, x22, 32
RESTORE_TWO_REGS x23, x24, 48
RESTORE_TWO_REGS x25, x26, 64
RESTORE_TWO_REGS x27, x28, 80
RESTORE_TWO_REGS xFP, xLR, 96
RESTORE_TWO_REGS_DECREASE_FRAME x3, x4, SAVE_SIZE
// The compiler put the result in x0. Doesn't matter if it is 64 or 32 bits.
str x0, [x3]
ret
.Losr_entry:
CFI_RESTORE_STATE_AND_DEF_CFA sp, (SAVE_SIZE+16)
mov x9, sp // Save stack pointer.
.cfi_def_cfa_register x9
// Update stack pointer for the callee
sub sp, sp, x1
// Update link register slot expected by the callee.
sub w1, w1, #8
str lr, [sp, x1]
// Copy arguments into stack frame.
// Use simple copy routine for now.
// 4 bytes per slot.
// X0 - source address
// W1 - args length
// SP - destination address.
// W10 - temporary
.Losr_loop_entry:
cbz w1, .Losr_loop_exit
sub w1, w1, #4
ldr w10, [x0, x1]
str w10, [sp, x1]
b .Losr_loop_entry
.Losr_loop_exit:
// Branch to the OSR entry point.
br x2
END art_quick_osr_stub
/*
* On entry x0 is uintptr_t* gprs_ and x1 is uint64_t* fprs_.
* Both must reside on the stack, between current SP and target SP.
* IP0 and IP1 shall be clobbered rather than retrieved from gprs_.
*/
ENTRY art_quick_do_long_jump
// Load FPRs
ldp d0, d1, [x1, #0]
ldp d2, d3, [x1, #16]
ldp d4, d5, [x1, #32]
ldp d6, d7, [x1, #48]
ldp d8, d9, [x1, #64]
ldp d10, d11, [x1, #80]
ldp d12, d13, [x1, #96]
ldp d14, d15, [x1, #112]
ldp d16, d17, [x1, #128]
ldp d18, d19, [x1, #144]
ldp d20, d21, [x1, #160]
ldp d22, d23, [x1, #176]
ldp d24, d25, [x1, #192]
ldp d26, d27, [x1, #208]
ldp d28, d29, [x1, #224]
ldp d30, d31, [x1, #240]
// Load GPRs. Delay loading x0, x1 because x0 is used as gprs_.
ldp x2, x3, [x0, #16]
ldp x4, x5, [x0, #32]
ldp x6, x7, [x0, #48]
ldp x8, x9, [x0, #64]
ldp x10, x11, [x0, #80]
ldp x12, x13, [x0, #96]
ldp x14, x15, [x0, #112]
// Do not load IP0 (x16) and IP1 (x17), these shall be clobbered below.
// Don't load the platform register (x18) either.
ldr x19, [x0, #152] // xSELF.
ldp x20, x21, [x0, #160] // For Baker RB, wMR (w20) is reloaded below.
ldp x22, x23, [x0, #176]
ldp x24, x25, [x0, #192]
ldp x26, x27, [x0, #208]
ldp x28, x29, [x0, #224]
ldp x30, xIP0, [x0, #240] // LR and SP, load SP to IP0.
// Load PC to IP1, it's at the end (after the space for the unused XZR).
ldr xIP1, [x0, #33*8]
// Load x0, x1.
ldp x0, x1, [x0, #0]
// Set SP. Do not access fprs_ and gprs_ from now, they are below SP.
mov sp, xIP0
REFRESH_MARKING_REGISTER
REFRESH_SUSPEND_CHECK_REGISTER
br xIP1
END art_quick_do_long_jump
/*
* Entry from managed code that tries to lock the object in a fast path and
* calls `artLockObjectFromCode()` for the difficult cases, may block for GC.
* x0 holds the possibly null object to lock.
*/
ENTRY art_quick_lock_object
LOCK_OBJECT_FAST_PATH x0, art_quick_lock_object_no_inline, /*can_be_null*/ 1
END art_quick_lock_object
/*
* Entry from managed code that calls `artLockObjectFromCode()`, may block for GC.
* x0 holds the possibly null object to lock.
*/
.extern artLockObjectFromCode
ENTRY art_quick_lock_object_no_inline
// This is also the slow path for art_quick_lock_object.
SETUP_SAVE_REFS_ONLY_FRAME // save callee saves in case we block
mov x1, xSELF // pass Thread::Current
bl artLockObjectFromCode // (Object* obj, Thread*)
RESTORE_SAVE_REFS_ONLY_FRAME
REFRESH_MARKING_REGISTER
RETURN_OR_DEOPT_IF_INT_RESULT_IS_ZERO_OR_DELIVER
END art_quick_lock_object_no_inline
/*
* Entry from managed code that tries to unlock the object in a fast path and calls
* `artUnlockObjectFromCode()` for the difficult cases and delivers exception on failure.
* x0 holds the possibly null object to unlock.
*/
ENTRY art_quick_unlock_object
UNLOCK_OBJECT_FAST_PATH x0, art_quick_unlock_object_no_inline, /*can_be_null*/ 1
END art_quick_unlock_object
/*
* Entry from managed code that calls `artUnlockObjectFromCode()`
* and delivers exception on failure.
* x0 holds the possibly null object to unlock.
*/
.extern artUnlockObjectFromCode
ENTRY art_quick_unlock_object_no_inline
// This is also the slow path for art_quick_unlock_object.
SETUP_SAVE_REFS_ONLY_FRAME // save callee saves in case exception allocation triggers GC
mov x1, xSELF // pass Thread::Current
bl artUnlockObjectFromCode // (Object* obj, Thread*)
RESTORE_SAVE_REFS_ONLY_FRAME
REFRESH_MARKING_REGISTER
RETURN_OR_DEOPT_IF_INT_RESULT_IS_ZERO_OR_DELIVER
END art_quick_unlock_object_no_inline
/*
* Entry from managed code that calls artInstanceOfFromCode and on failure calls
* artThrowClassCastExceptionForObject.
*/
.extern artInstanceOfFromCode
.extern artThrowClassCastExceptionForObject
ENTRY art_quick_check_instance_of
// Type check using the bit string passes null as the target class. In that case just throw.
cbz x1, .Lthrow_class_cast_exception_for_bitstring_check
// Store arguments and link register
// Stack needs to be 16B aligned on calls.
SAVE_TWO_REGS_INCREASE_FRAME x0, x1, 32
SAVE_REG xLR, 24
// Call runtime code
bl artInstanceOfFromCode
// Restore LR.
RESTORE_REG xLR, 24
// Check for exception
CFI_REMEMBER_STATE
cbz x0, .Lthrow_class_cast_exception
// Restore and return
// TODO: We do not need to restore X0 and X1 on success. We also do not need
// to record CFI for them as the information is not very useful.
RESTORE_TWO_REGS_DECREASE_FRAME x0, x1, 32
ret
.Lthrow_class_cast_exception:
CFI_RESTORE_STATE_AND_DEF_CFA sp, 32
// Restore
RESTORE_TWO_REGS_DECREASE_FRAME x0, x1, 32
.Lthrow_class_cast_exception_for_bitstring_check:
SETUP_SAVE_ALL_CALLEE_SAVES_FRAME // save all registers as basis for long jump context
mov x2, xSELF // pass Thread::Current
bl artThrowClassCastExceptionForObject // (Object*, Class*, Thread*)
brk 0 // We should not return here...
END art_quick_check_instance_of
// Restore xReg's value from [sp, #offset] if xReg is not the same as xExclude.
.macro POP_REG_NE xReg, offset, xExclude
.ifnc \xReg, \xExclude
ldr \xReg, [sp, #\offset] // restore xReg
.cfi_restore \xReg
.endif
.endm
// Restore xReg1's value from [sp, #offset] if xReg1 is not the same as xExclude.
// Restore xReg2's value from [sp, #(offset + 8)] if xReg2 is not the same as xExclude.
.macro POP_REGS_NE xReg1, xReg2, offset, xExclude
.ifc \xReg1, \xExclude
ldr \xReg2, [sp, #(\offset + 8)] // restore xReg2
.else
.ifc \xReg2, \xExclude
ldr \xReg1, [sp, #\offset] // restore xReg1
.else
ldp \xReg1, \xReg2, [sp, #\offset] // restore xReg1 and xReg2
.endif
.endif
.cfi_restore \xReg1
.cfi_restore \xReg2
.endm
// Helper macros for `art_quick_aput_obj`.
#ifdef USE_READ_BARRIER
#ifdef USE_BAKER_READ_BARRIER
.macro BAKER_RB_CHECK_GRAY_BIT_AND_LOAD wDest, xObj, offset, gray_slow_path_label
ldr wIP0, [\xObj, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
tbnz wIP0, #LOCK_WORD_READ_BARRIER_STATE_SHIFT, \gray_slow_path_label
// False dependency to avoid needing load/load fence.
add \xObj, \xObj, xIP0, lsr #32
ldr \wDest, [\xObj, #\offset] // Heap reference = 32b; zero-extends to xN.
UNPOISON_HEAP_REF \wDest
.endm
.macro BAKER_RB_LOAD_AND_MARK wDest, xObj, offset, mark_function
ldr \wDest, [\xObj, #\offset] // Heap reference = 32b; zero-extends to xN.
UNPOISON_HEAP_REF \wDest
// Save LR in a register preserved by `art_quick_read_barrier_mark_regNN`
// and unused by the `art_quick_aput_obj`.
mov x5, lr
bl \mark_function
mov lr, x5 // Restore LR.
.endm
#else // USE_BAKER_READ_BARRIER
.extern artReadBarrierSlow
.macro READ_BARRIER_SLOW xDest, wDest, xObj, offset
// Store registers used in art_quick_aput_obj (x0-x4, LR), stack is 16B aligned.
SAVE_TWO_REGS_INCREASE_FRAME x0, x1, 48
SAVE_TWO_REGS x2, x3, 16
SAVE_TWO_REGS x4, xLR, 32
// mov x0, \xRef // pass ref in x0 (no-op for now since parameter ref is unused)
.ifnc \xObj, x1
mov x1, \xObj // pass xObj
.endif
mov w2, #\offset // pass offset
bl artReadBarrierSlow // artReadBarrierSlow(ref, xObj, offset)
// No need to unpoison return value in w0, artReadBarrierSlow() would do the unpoisoning.
.ifnc \wDest, w0
mov \wDest, w0 // save return value in wDest
.endif
// Conditionally restore saved registers
POP_REG_NE x0, 0, \xDest
POP_REG_NE x1, 8, \xDest
POP_REG_NE x2, 16, \xDest
POP_REG_NE x3, 24, \xDest
POP_REG_NE x4, 32, \xDest
RESTORE_REG xLR, 40
DECREASE_FRAME 48
.endm
#endif // USE_BAKER_READ_BARRIER
#endif // USE_READ_BARRIER
ENTRY art_quick_aput_obj
cbz x2, .Laput_obj_null
#if defined(USE_READ_BARRIER) && !defined(USE_BAKER_READ_BARRIER)
READ_BARRIER_SLOW x3, w3, x0, MIRROR_OBJECT_CLASS_OFFSET
READ_BARRIER_SLOW x3, w3, x3, MIRROR_CLASS_COMPONENT_TYPE_OFFSET
READ_BARRIER_SLOW x4, w4, x2, MIRROR_OBJECT_CLASS_OFFSET
#else // !defined(USE_READ_BARRIER) || defined(USE_BAKER_READ_BARRIER)
#ifdef USE_READ_BARRIER
cbnz wMR, .Laput_obj_gc_marking
#endif // USE_READ_BARRIER
ldr w3, [x0, #MIRROR_OBJECT_CLASS_OFFSET] // Heap reference = 32b; zero-extends to x3.
UNPOISON_HEAP_REF w3
ldr w3, [x3, #MIRROR_CLASS_COMPONENT_TYPE_OFFSET] // Heap reference = 32b; zero-extends to x3.
UNPOISON_HEAP_REF w3
ldr w4, [x2, #MIRROR_OBJECT_CLASS_OFFSET] // Heap reference = 32b; zero-extends to x4.
UNPOISON_HEAP_REF w4
#endif // !defined(USE_READ_BARRIER) || defined(USE_BAKER_READ_BARRIER)
cmp w3, w4 // value's type == array's component type - trivial assignability
bne .Laput_obj_check_assignability
.Laput_obj_store:
add x3, x0, #MIRROR_OBJECT_ARRAY_DATA_OFFSET
POISON_HEAP_REF w2
str w2, [x3, x1, lsl #2] // Heap reference = 32b.
ldr x3, [xSELF, #THREAD_CARD_TABLE_OFFSET]
lsr x0, x0, #CARD_TABLE_CARD_SHIFT
strb w3, [x3, x0]
ret
.Laput_obj_null:
add x3, x0, #MIRROR_OBJECT_ARRAY_DATA_OFFSET
str w2, [x3, x1, lsl #2] // Heap reference = 32b.
ret
.Laput_obj_check_assignability:
// Store arguments and link register
SAVE_TWO_REGS_INCREASE_FRAME x0, x1, 32
SAVE_TWO_REGS x2, xLR, 16
// Call runtime code
mov x0, x3 // Heap reference, 32b, "uncompress" = do nothing, already zero-extended
mov x1, x4 // Heap reference, 32b, "uncompress" = do nothing, already zero-extended
bl artIsAssignableFromCode
// Check for exception
CFI_REMEMBER_STATE
cbz x0, .Laput_obj_throw_array_store_exception
// Restore
RESTORE_TWO_REGS x2, xLR, 16
RESTORE_TWO_REGS_DECREASE_FRAME x0, x1, 32
add x3, x0, #MIRROR_OBJECT_ARRAY_DATA_OFFSET
POISON_HEAP_REF w2
str w2, [x3, x1, lsl #2] // Heap reference = 32b.
ldr x3, [xSELF, #THREAD_CARD_TABLE_OFFSET]
lsr x0, x0, #CARD_TABLE_CARD_SHIFT
strb w3, [x3, x0]
ret
.Laput_obj_throw_array_store_exception:
CFI_RESTORE_STATE_AND_DEF_CFA sp, 32
RESTORE_TWO_REGS x2, xLR, 16
RESTORE_TWO_REGS_DECREASE_FRAME x0, x1, 32
#if defined(USE_READ_BARRIER) && defined(USE_BAKER_READ_BARRIER)
CFI_REMEMBER_STATE
#endif // defined(USE_READ_BARRIER) && defined(USE_BAKER_READ_BARRIER)
SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
mov x1, x2 // Pass value.
mov x2, xSELF // Pass Thread::Current.
bl artThrowArrayStoreException // (Object*, Object*, Thread*).
brk 0 // Unreachable.
#if defined(USE_READ_BARRIER) && defined(USE_BAKER_READ_BARRIER)
CFI_RESTORE_STATE_AND_DEF_CFA sp, 0
.Laput_obj_gc_marking:
BAKER_RB_CHECK_GRAY_BIT_AND_LOAD \
w3, x0, MIRROR_OBJECT_CLASS_OFFSET, .Laput_obj_mark_array_class
.Laput_obj_mark_array_class_continue:
BAKER_RB_CHECK_GRAY_BIT_AND_LOAD \
w3, x3, MIRROR_CLASS_COMPONENT_TYPE_OFFSET, .Laput_obj_mark_array_element
.Laput_obj_mark_array_element_continue:
BAKER_RB_CHECK_GRAY_BIT_AND_LOAD \
w4, x2, MIRROR_OBJECT_CLASS_OFFSET, .Laput_obj_mark_object_class
.Laput_obj_mark_object_class_continue:
cmp w3, w4 // value's type == array's component type - trivial assignability
bne .Laput_obj_check_assignability
b .Laput_obj_store
.Laput_obj_mark_array_class:
BAKER_RB_LOAD_AND_MARK w3, x0, MIRROR_OBJECT_CLASS_OFFSET, art_quick_read_barrier_mark_reg03
b .Laput_obj_mark_array_class_continue
.Laput_obj_mark_array_element:
BAKER_RB_LOAD_AND_MARK \
w3, x3, MIRROR_CLASS_COMPONENT_TYPE_OFFSET, art_quick_read_barrier_mark_reg03
b .Laput_obj_mark_array_element_continue
.Laput_obj_mark_object_class:
BAKER_RB_LOAD_AND_MARK w4, x2, MIRROR_OBJECT_CLASS_OFFSET, art_quick_read_barrier_mark_reg04
b .Laput_obj_mark_object_class_continue
#endif // defined(USE_READ_BARRIER) && defined(USE_BAKER_READ_BARRIER)
END art_quick_aput_obj
// Macro to facilitate adding new allocation entrypoints.
.macro ONE_ARG_DOWNCALL name, entrypoint, return
.extern \entrypoint
ENTRY \name
SETUP_SAVE_REFS_ONLY_FRAME // save callee saves in case of GC
mov x1, xSELF // pass Thread::Current
bl \entrypoint // (uint32_t type_idx, Method* method, Thread*)
RESTORE_SAVE_REFS_ONLY_FRAME
REFRESH_MARKING_REGISTER
\return
END \name
.endm
// Macro to facilitate adding new allocation entrypoints.
.macro TWO_ARG_DOWNCALL name, entrypoint, return
.extern \entrypoint
ENTRY \name
SETUP_SAVE_REFS_ONLY_FRAME // save callee saves in case of GC
mov x2, xSELF // pass Thread::Current
bl \entrypoint // (uint32_t type_idx, Method* method, Thread*)
RESTORE_SAVE_REFS_ONLY_FRAME
REFRESH_MARKING_REGISTER
\return
END \name
.endm
// Macro to facilitate adding new allocation entrypoints.
.macro THREE_ARG_DOWNCALL name, entrypoint, return
.extern \entrypoint
ENTRY \name
SETUP_SAVE_REFS_ONLY_FRAME // save callee saves in case of GC
mov x3, xSELF // pass Thread::Current
bl \entrypoint
RESTORE_SAVE_REFS_ONLY_FRAME
REFRESH_MARKING_REGISTER
\return
END \name
.endm
// Macro to facilitate adding new allocation entrypoints.
.macro FOUR_ARG_DOWNCALL name, entrypoint, return
.extern \entrypoint
ENTRY \name
SETUP_SAVE_REFS_ONLY_FRAME // save callee saves in case of GC
mov x4, xSELF // pass Thread::Current
bl \entrypoint //
RESTORE_SAVE_REFS_ONLY_FRAME
REFRESH_MARKING_REGISTER
\return
END \name
.endm
/*
* Macro for resolution and initialization of indexed DEX file
* constants such as classes and strings.
*/
.macro ONE_ARG_SAVE_EVERYTHING_DOWNCALL \
name, entrypoint, runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET
.extern \entrypoint
ENTRY \name
SETUP_SAVE_EVERYTHING_FRAME \runtime_method_offset // save everything for stack crawl
mov x1, xSELF // pass Thread::Current
bl \entrypoint // (int32_t index, Thread* self)
cbz w0, 1f // If result is null, deliver the OOME.
DEOPT_OR_RESTORE_SAVE_EVERYTHING_FRAME_AND_RETURN_X0 x1, /* is_ref= */ 1
1:
DELIVER_PENDING_EXCEPTION_FRAME_READY
END \name
.endm
.macro ONE_ARG_SAVE_EVERYTHING_DOWNCALL_FOR_CLINIT name, entrypoint
ONE_ARG_SAVE_EVERYTHING_DOWNCALL \
\name, \entrypoint, RUNTIME_SAVE_EVERYTHING_FOR_CLINIT_METHOD_OFFSET
.endm
.macro RETURN_OR_DEOPT_IF_RESULT_IS_NON_NULL_OR_DELIVER
cbz w0, 1f // result zero branch over
DEOPT_OR_RETURN x1, /*is_ref=*/1 // check for deopt or return
1:
DELIVER_PENDING_EXCEPTION
.endm
/*
* Entry from managed code that calls artHandleFillArrayDataFromCode and delivers exception on
* failure.
*/
TWO_ARG_DOWNCALL art_quick_handle_fill_data, \
artHandleFillArrayDataFromCode, \
RETURN_OR_DEOPT_IF_INT_RESULT_IS_ZERO_OR_DELIVER
/*
* Entry from managed code when uninitialized static storage, this stub will run the class
* initializer and deliver the exception on error. On success the static storage base is
* returned.
*/
ONE_ARG_SAVE_EVERYTHING_DOWNCALL_FOR_CLINIT \
art_quick_initialize_static_storage, artInitializeStaticStorageFromCode
ONE_ARG_SAVE_EVERYTHING_DOWNCALL_FOR_CLINIT art_quick_resolve_type, artResolveTypeFromCode
ONE_ARG_SAVE_EVERYTHING_DOWNCALL \
art_quick_resolve_type_and_verify_access, artResolveTypeAndVerifyAccessFromCode
ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_method_handle, artResolveMethodHandleFromCode
ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_method_type, artResolveMethodTypeFromCode
ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_string, artResolveStringFromCode
GENERATE_FIELD_ENTRYPOINTS
// Generate the allocation entrypoints for each allocator.
GENERATE_ALLOC_ENTRYPOINTS_FOR_NON_TLAB_ALLOCATORS
// Comment out allocators that have arm64 specific asm.
// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB)
// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab, RegionTLAB)
GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_OBJECT(_region_tlab, RegionTLAB)
// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_region_tlab, RegionTLAB)
// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED8(_region_tlab, RegionTLAB)
// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED16(_region_tlab, RegionTLAB)
// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED32(_region_tlab, RegionTLAB)
// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED64(_region_tlab, RegionTLAB)
GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region_tlab, RegionTLAB)
GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_tlab, RegionTLAB)
GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_tlab, RegionTLAB)
// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB)
// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab, TLAB)
GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_tlab, TLAB)
GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_OBJECT(_tlab, TLAB)
// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_tlab, TLAB)
// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED8(_tlab, TLAB)
// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED16(_tlab, TLAB)
// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED32(_tlab, TLAB)
// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED64(_tlab, TLAB)
GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_tlab, TLAB)
GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_tlab, TLAB)
GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_tlab, TLAB)
// If isInitialized=1 then the compiler assumes the object's class has already been initialized.
// If isInitialized=0 the compiler can only assume it's been at least resolved.
.macro ART_QUICK_ALLOC_OBJECT_ROSALLOC c_name, cxx_name, isInitialized
ENTRY \c_name
// Fast path rosalloc allocation.
// x0: type, xSELF(x19): Thread::Current
// x1-x7: free.
ldr x3, [xSELF, #THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET] // Check if the thread local
// allocation stack has room.
// ldp won't work due to large offset.
ldr x4, [xSELF, #THREAD_LOCAL_ALLOC_STACK_END_OFFSET]
cmp x3, x4
bhs .Lslow_path\c_name
ldr w3, [x0, #MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET] // Load the object size (x3)
cmp x3, #ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE // Check if the size is for a thread
// local allocation.
// If the class is not yet visibly initialized, or it is finalizable,
// the object size will be very large to force the branch below to be taken.
//
// See Class::SetStatus() in class.cc for more details.
bhs .Lslow_path\c_name
// Compute the rosalloc bracket index
// from the size. Since the size is
// already aligned we can combine the
// two shifts together.
add x4, xSELF, x3, lsr #(ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT - POINTER_SIZE_SHIFT)
// Subtract pointer size since there
// are no runs for 0 byte allocations
// and the size is already aligned.
ldr x4, [x4, #(THREAD_ROSALLOC_RUNS_OFFSET - __SIZEOF_POINTER__)]
// Load the free list head (x3). This
// will be the return val.
ldr x3, [x4, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)]
cbz x3, .Lslow_path\c_name
// "Point of no slow path". Won't go to the slow path from here on. OK to clobber x0 and x1.
ldr x1, [x3, #ROSALLOC_SLOT_NEXT_OFFSET] // Load the next pointer of the head
// and update the list head with the
// next pointer.
str x1, [x4, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)]
// Store the class pointer in the
// header. This also overwrites the
// next pointer. The offsets are
// asserted to match.
#if ROSALLOC_SLOT_NEXT_OFFSET != MIRROR_OBJECT_CLASS_OFFSET
#error "Class pointer needs to overwrite next pointer."
#endif
POISON_HEAP_REF w0
str w0, [x3, #MIRROR_OBJECT_CLASS_OFFSET]
// Push the new object onto the thread
// local allocation stack and
// increment the thread local
// allocation stack top.
ldr x1, [xSELF, #THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET]
str w3, [x1], #COMPRESSED_REFERENCE_SIZE // (Increment x1 as a side effect.)
str x1, [xSELF, #THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET]
// Decrement the size of the free list
// After this "STR" the object is published to the thread local allocation stack,
// and it will be observable from a runtime internal (eg. Heap::VisitObjects) point of view.
// It is not yet visible to the running (user) compiled code until after the return.
//
// To avoid the memory barrier prior to the "STR", a trick is employed, by differentiating
// the state of the allocation stack slot. It can be a pointer to one of:
// 0) Null entry, because the stack was bumped but the new pointer wasn't written yet.
// (The stack initial state is "null" pointers).
// 1) A partially valid object, with an invalid class pointer to the next free rosalloc slot.
// 2) A fully valid object, with a valid class pointer pointing to a real class.
// Other states are not allowed.
//
// An object that is invalid only temporarily, and will eventually become valid.
// The internal runtime code simply checks if the object is not null or is partial and then
// ignores it.
//
// (Note: The actual check is done by seeing if a non-null object has a class pointer pointing
// to ClassClass, and that the ClassClass's class pointer is self-cyclic. A rosalloc free slot
// "next" pointer is not-cyclic.)
//
// See also b/28790624 for a listing of CLs dealing with this race.
ldr w1, [x4, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)]
sub x1, x1, #1
// TODO: consider combining this store
// and the list head store above using
// strd.
str w1, [x4, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)]
mov x0, x3 // Set the return value and return.
// No barrier. The class is already observably initialized (otherwise the fast
// path size check above would fail) and new-instance allocations are protected
// from publishing by the compiler which inserts its own StoreStore barrier.
ret
.Lslow_path\c_name:
SETUP_SAVE_REFS_ONLY_FRAME // save callee saves in case of GC
mov x1, xSELF // pass Thread::Current
bl \cxx_name
RESTORE_SAVE_REFS_ONLY_FRAME
REFRESH_MARKING_REGISTER
RETURN_OR_DEOPT_IF_RESULT_IS_NON_NULL_OR_DELIVER
END \c_name
.endm
ART_QUICK_ALLOC_OBJECT_ROSALLOC art_quick_alloc_object_resolved_rosalloc, \
artAllocObjectFromCodeResolvedRosAlloc, /* isInitialized */ 0
ART_QUICK_ALLOC_OBJECT_ROSALLOC art_quick_alloc_object_initialized_rosalloc, \
artAllocObjectFromCodeInitializedRosAlloc, /* isInitialized */ 1
// If isInitialized=1 then the compiler assumes the object's class has already been initialized.
// If isInitialized=0 the compiler can only assume it's been at least resolved.
.macro ALLOC_OBJECT_TLAB_FAST_PATH_RESOLVED slowPathLabel isInitialized
ldr x4, [xSELF, #THREAD_LOCAL_POS_OFFSET]
ldr x5, [xSELF, #THREAD_LOCAL_END_OFFSET]
ldr w7, [x0, #MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET] // Load the object size (x7).
add x6, x4, x7 // Add object size to tlab pos.
cmp x6, x5 // Check if it fits, overflow works
// since the tlab pos and end are 32
// bit values.
// If the class is not yet visibly initialized, or it is finalizable,
// the object size will be very large to force the branch below to be taken.
//
// See Class::SetStatus() in class.cc for more details.
bhi \slowPathLabel
str x6, [xSELF, #THREAD_LOCAL_POS_OFFSET] // Store new thread_local_pos.
POISON_HEAP_REF w0
str w0, [x4, #MIRROR_OBJECT_CLASS_OFFSET] // Store the class pointer.
mov x0, x4
// No barrier. The class is already observably initialized (otherwise the fast
// path size check above would fail) and new-instance allocations are protected
// from publishing by the compiler which inserts its own StoreStore barrier.
ret
.endm
// The common code for art_quick_alloc_object_*region_tlab
// Currently the implementation ignores isInitialized. TODO(b/172087402): clean this up.
// Caller must execute a constructor fence after this.
.macro GENERATE_ALLOC_OBJECT_RESOLVED_TLAB name, entrypoint, isInitialized
ENTRY \name
// Fast path region tlab allocation.
// x0: type, xSELF(x19): Thread::Current
// x1-x7: free.
ALLOC_OBJECT_TLAB_FAST_PATH_RESOLVED .Lslow_path\name, \isInitialized
.Lslow_path\name:
SETUP_SAVE_REFS_ONLY_FRAME // Save callee saves in case of GC.
mov x1, xSELF // Pass Thread::Current.
bl \entrypoint // (mirror::Class*, Thread*)
RESTORE_SAVE_REFS_ONLY_FRAME
REFRESH_MARKING_REGISTER
RETURN_OR_DEOPT_IF_RESULT_IS_NON_NULL_OR_DELIVER
END \name
.endm
GENERATE_ALLOC_OBJECT_RESOLVED_TLAB \
art_quick_alloc_object_resolved_region_tlab, \
artAllocObjectFromCodeResolvedRegionTLAB, /* isInitialized */ 0
GENERATE_ALLOC_OBJECT_RESOLVED_TLAB \
art_quick_alloc_object_initialized_region_tlab, \
artAllocObjectFromCodeInitializedRegionTLAB, /* isInitialized */ 1
GENERATE_ALLOC_OBJECT_RESOLVED_TLAB \
art_quick_alloc_object_resolved_tlab, \
artAllocObjectFromCodeResolvedTLAB, /* isInitialized */ 0
GENERATE_ALLOC_OBJECT_RESOLVED_TLAB \
art_quick_alloc_object_initialized_tlab, \
artAllocObjectFromCodeInitializedTLAB, /* isInitialized */ 1
.macro ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED_WITH_SIZE \
slowPathLabel, xClass, wClass, xCount, wCount, xTemp0, wTemp0, xTemp1, wTemp1, xTemp2, wTemp2
and \xTemp1, \xTemp1, #OBJECT_ALIGNMENT_MASK_TOGGLED64 // Apply alignment mask
// (addr + 7) & ~7. The mask must
// be 64 bits to keep high bits in
// case of overflow.
// Negative sized arrays are handled here since xCount holds a zero extended 32 bit value.
// Negative ints become large 64 bit unsigned ints which will always be larger than max signed
// 32 bit int. Since the max shift for arrays is 3, it can not become a negative 64 bit int.
cmp \xTemp1, #MIN_LARGE_OBJECT_THRESHOLD // Possibly a large object, go slow
bhs \slowPathLabel // path.
ldr \xTemp0, [xSELF, #THREAD_LOCAL_POS_OFFSET] // Check tlab for space, note that
// we use (end - begin) to handle
// negative size arrays. It is
// assumed that a negative size will
// always be greater unsigned than
// region size.
ldr \xTemp2, [xSELF, #THREAD_LOCAL_END_OFFSET]
sub \xTemp2, \xTemp2, \xTemp0
cmp \xTemp1, \xTemp2
// The array class is always initialized here. Unlike new-instance,
// this does not act as a double test.
bhi \slowPathLabel
// "Point of no slow path". Won't go to the slow path from here on. OK to clobber x0 and x1.
// Move old thread_local_pos to x0
// for the return value.
mov x0, \xTemp0
add \xTemp0, \xTemp0, \xTemp1
str \xTemp0, [xSELF, #THREAD_LOCAL_POS_OFFSET] // Store new thread_local_pos.
POISON_HEAP_REF \wClass
str \wClass, [x0, #MIRROR_OBJECT_CLASS_OFFSET] // Store the class pointer.
str \wCount, [x0, #MIRROR_ARRAY_LENGTH_OFFSET] // Store the array length.
// new-array is special. The class is loaded and immediately goes to the Initialized state
// before it is published. Therefore the only fence needed is for the publication of the object.
// See ClassLinker::CreateArrayClass() for more details.
// For publication of the new array, we don't need a 'dmb ishst' here.
// The compiler generates 'dmb ishst' for all new-array insts.
ret
.endm
// Caller must execute a constructor fence after this.
.macro GENERATE_ALLOC_ARRAY_TLAB name, entrypoint, size_setup
ENTRY \name
// Fast path array allocation for region tlab allocation.
// x0: mirror::Class* type
// x1: int32_t component_count
// x2-x7: free.
mov x3, x0
\size_setup x3, w3, x1, w1, x4, w4, x5, w5, x6, w6
ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED_WITH_SIZE \
.Lslow_path\name, x3, w3, x1, w1, x4, w4, x5, w5, x6, w6
.Lslow_path\name:
// x0: mirror::Class* klass
// x1: int32_t component_count
// x2: Thread* self
SETUP_SAVE_REFS_ONLY_FRAME // save callee saves in case of GC
mov x2, xSELF // pass Thread::Current
bl \entrypoint
RESTORE_SAVE_REFS_ONLY_FRAME
REFRESH_MARKING_REGISTER
RETURN_OR_DEOPT_IF_RESULT_IS_NON_NULL_OR_DELIVER
END \name
.endm
.macro COMPUTE_ARRAY_SIZE_UNKNOWN \
xClass, wClass, xCount, wCount, xTemp0, wTemp0, xTemp1, wTemp1, xTemp2, wTemp2
// Array classes are never finalizable or uninitialized, no need to check.
ldr \wTemp0, [\xClass, #MIRROR_CLASS_COMPONENT_TYPE_OFFSET] // Load component type
UNPOISON_HEAP_REF \wTemp0
ldr \wTemp0, [\xTemp0, #MIRROR_CLASS_OBJECT_PRIMITIVE_TYPE_OFFSET]
lsr \xTemp0, \xTemp0, #PRIMITIVE_TYPE_SIZE_SHIFT_SHIFT // Component size shift is in high 16
// bits.
// xCount is holding a 32 bit value,
// it can not overflow.
lsl \xTemp1, \xCount, \xTemp0 // Calculate data size
// Add array data offset and alignment.
add \xTemp1, \xTemp1, #(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK)
#if MIRROR_LONG_ARRAY_DATA_OFFSET != MIRROR_INT_ARRAY_DATA_OFFSET + 4
#error Long array data offset must be 4 greater than int array data offset.
#endif
add \xTemp0, \xTemp0, #1 // Add 4 to the length only if the
// component size shift is 3
// (for 64 bit alignment).
and \xTemp0, \xTemp0, #4
add \xTemp1, \xTemp1, \xTemp0
.endm
.macro COMPUTE_ARRAY_SIZE_8 \
xClass, wClass, xCount, wCount, xTemp0, wTemp0, xTemp1, wTemp1, xTemp2, wTemp2
// Add array data offset and alignment.
add \xTemp1, \xCount, #(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK)
.endm
.macro COMPUTE_ARRAY_SIZE_16 \
xClass, wClass, xCount, wCount, xTemp0, wTemp0, xTemp1, wTemp1, xTemp2, wTemp2
lsl \xTemp1, \xCount, #1
// Add array data offset and alignment.
add \xTemp1, \xTemp1, #(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK)
.endm
.macro COMPUTE_ARRAY_SIZE_32 \
xClass, wClass, xCount, wCount, xTemp0, wTemp0, xTemp1, wTemp1, xTemp2, wTemp2
lsl \xTemp1, \xCount, #2
// Add array data offset and alignment.
add \xTemp1, \xTemp1, #(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK)
.endm
.macro COMPUTE_ARRAY_SIZE_64 \
xClass, wClass, xCount, wCount, xTemp0, wTemp0, xTemp1, wTemp1, xTemp2, wTemp2
lsl \xTemp1, \xCount, #3
// Add array data offset and alignment.
add \xTemp1, \xTemp1, #(MIRROR_WIDE_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK)
.endm
// TODO(ngeoffray): art_quick_alloc_array_resolved_region_tlab is not used for arm64, remove
// the entrypoint once all backends have been updated to use the size variants.
GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved_region_tlab, \
artAllocArrayFromCodeResolvedRegionTLAB, \
COMPUTE_ARRAY_SIZE_UNKNOWN
GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved8_region_tlab, \
artAllocArrayFromCodeResolvedRegionTLAB, \
COMPUTE_ARRAY_SIZE_8
GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved16_region_tlab, \
artAllocArrayFromCodeResolvedRegionTLAB, \
COMPUTE_ARRAY_SIZE_16
GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved32_region_tlab, \
artAllocArrayFromCodeResolvedRegionTLAB, \
COMPUTE_ARRAY_SIZE_32
GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved64_region_tlab, \
artAllocArrayFromCodeResolvedRegionTLAB, \
COMPUTE_ARRAY_SIZE_64
GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved_tlab, \
artAllocArrayFromCodeResolvedTLAB, \
COMPUTE_ARRAY_SIZE_UNKNOWN
GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved8_tlab, \
artAllocArrayFromCodeResolvedTLAB, \
COMPUTE_ARRAY_SIZE_8
GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved16_tlab, \
artAllocArrayFromCodeResolvedTLAB, \
COMPUTE_ARRAY_SIZE_16
GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved32_tlab, \
artAllocArrayFromCodeResolvedTLAB, \
COMPUTE_ARRAY_SIZE_32
GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved64_tlab, \
artAllocArrayFromCodeResolvedTLAB, \
COMPUTE_ARRAY_SIZE_64
/*
* Called by managed code when the thread has been asked to suspend.
*/
.extern artTestSuspendFromCode
ENTRY art_quick_test_suspend
// Save callee saves for stack crawl.
SETUP_SAVE_EVERYTHING_FRAME RUNTIME_SAVE_EVERYTHING_FOR_SUSPEND_CHECK_METHOD_OFFSET
mov x0, xSELF
bl artTestSuspendFromCode // (Thread*)
RESTORE_SAVE_EVERYTHING_FRAME
REFRESH_MARKING_REGISTER
REFRESH_SUSPEND_CHECK_REGISTER
ret
END art_quick_test_suspend
/*
* Redirection point from implicit suspend check fault handler.
*/
.extern artImplicitSuspendFromCode
ENTRY art_quick_implicit_suspend
// Save callee saves for stack crawl.
SETUP_SAVE_EVERYTHING_FRAME RUNTIME_SAVE_EVERYTHING_FOR_SUSPEND_CHECK_METHOD_OFFSET
mov x0, xSELF
bl artImplicitSuspendFromCode // (Thread*)
RESTORE_SAVE_EVERYTHING_FRAME
REFRESH_MARKING_REGISTER
REFRESH_SUSPEND_CHECK_REGISTER
br lr // Do not use RET as we do not enter the entrypoint with "BL".
END art_quick_implicit_suspend
/*
* Called by managed code that is attempting to call a method on a proxy class. On entry
* x0 holds the proxy method and x1 holds the receiver; The frame size of the invoked proxy
* method agrees with a ref and args callee save frame.
*/
.extern artQuickProxyInvokeHandler
ENTRY art_quick_proxy_invoke_handler
SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_X0
mov x2, xSELF // pass Thread::Current
mov x3, sp // pass SP
bl artQuickProxyInvokeHandler // (Method* proxy method, receiver, Thread*, SP)
ldr x2, [xSELF, THREAD_EXCEPTION_OFFSET]
CFI_REMEMBER_STATE
cbnz x2, .Lexception_in_proxy // success if no exception is pending
RESTORE_SAVE_REFS_AND_ARGS_FRAME // Restore frame
REFRESH_MARKING_REGISTER
fmov d0, x0 // Store result in d0 in case it was float or double
ret // return on success
.Lexception_in_proxy:
CFI_RESTORE_STATE_AND_DEF_CFA sp, FRAME_SIZE_SAVE_REFS_AND_ARGS
RESTORE_SAVE_REFS_AND_ARGS_FRAME
DELIVER_PENDING_EXCEPTION
END art_quick_proxy_invoke_handler
/*
* Called to resolve an imt conflict.
* x0 is the conflict ArtMethod.
* xIP1 is a hidden argument that holds the target interface method.
*
* Note that this stub writes to xIP0, xIP1, and x0.
*/
ENTRY art_quick_imt_conflict_trampoline
ldr xIP0, [x0, #ART_METHOD_JNI_OFFSET_64] // Load ImtConflictTable
ldr x0, [xIP0] // Load first entry in ImtConflictTable.
.Limt_table_iterate:
cmp x0, xIP1
// Branch if found. Benchmarks have shown doing a branch here is better.
beq .Limt_table_found
// If the entry is null, the interface method is not in the ImtConflictTable.
cbz x0, .Lconflict_trampoline
// Iterate over the entries of the ImtConflictTable.
ldr x0, [xIP0, #(2 * __SIZEOF_POINTER__)]!
b .Limt_table_iterate
.Limt_table_found:
// We successfully hit an entry in the table. Load the target method
// and jump to it.
ldr x0, [xIP0, #__SIZEOF_POINTER__]
ldr xIP0, [x0, #ART_METHOD_QUICK_CODE_OFFSET_64]
br xIP0
.Lconflict_trampoline:
// Call the runtime stub to populate the ImtConflictTable and jump to the
// resolved method.
mov x0, xIP1 // Load interface method
INVOKE_TRAMPOLINE_BODY artInvokeInterfaceTrampoline
END art_quick_imt_conflict_trampoline
ENTRY art_quick_resolution_trampoline
SETUP_SAVE_REFS_AND_ARGS_FRAME
mov x2, xSELF
mov x3, sp
bl artQuickResolutionTrampoline // (called, receiver, Thread*, SP)
CFI_REMEMBER_STATE
cbz x0, 1f
mov xIP0, x0 // Remember returned code pointer in xIP0.
ldr x0, [sp, #0] // artQuickResolutionTrampoline puts called method in *SP.
RESTORE_SAVE_REFS_AND_ARGS_FRAME
REFRESH_MARKING_REGISTER
br xIP0
1:
CFI_RESTORE_STATE_AND_DEF_CFA sp, FRAME_SIZE_SAVE_REFS_AND_ARGS
RESTORE_SAVE_REFS_AND_ARGS_FRAME
DELIVER_PENDING_EXCEPTION
END art_quick_resolution_trampoline
/*
* Generic JNI frame layout:
*
* #-------------------#
* | |
* | caller method... |
* #-------------------# <--- SP on entry
* | Return X30/LR |
* | X29/FP | callee save
* | X28 | callee save
* | X27 | callee save
* | X26 | callee save
* | X25 | callee save
* | X24 | callee save
* | X23 | callee save
* | X22 | callee save
* | X21 | callee save
* | X20 | callee save
* | X7 | arg7
* | X6 | arg6
* | X5 | arg5
* | X4 | arg4
* | X3 | arg3
* | X2 | arg2
* | X1 | arg1
* | D7 | float arg 8
* | D6 | float arg 7
* | D5 | float arg 6
* | D4 | float arg 5
* | D3 | float arg 4
* | D2 | float arg 3
* | D1 | float arg 2
* | D0 | float arg 1
* | padding | // 8B
* | Method* | <- X0 (Managed frame similar to SaveRefsAndArgs.)
* #-------------------#
* | local ref cookie | // 4B
* | padding | // 0B or 4B to align stack args on 8B address
* #-------------------#
* | JNI Stack Args | // Empty if all args fit into registers x0-x7, d0-d7.
* #-------------------# <--- SP on native call (1)
* | Free scratch |
* #-------------------#
* | SP for JNI call | // Pointer to (1).
* #-------------------#
* | Hidden arg | // For @CriticalNative
* #-------------------#
* | |
* | Stack for Regs | The trampoline assembly will pop these values
* | | into registers for native call
* #-------------------#
*/
/*
* Called to do a generic JNI down-call
*/
.extern artQuickGenericJniTrampoline
ENTRY art_quick_generic_jni_trampoline
SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_X0
// Save SP, so we can have static CFI info.
mov x28, sp
.cfi_def_cfa_register x28
mov xIP0, #GENERIC_JNI_TRAMPOLINE_RESERVED_AREA
sub sp, sp, xIP0
// prepare for artQuickGenericJniTrampoline call
// (Thread*, managed_sp, reserved_area)
// x0 x1 x2 <= C calling convention
// xSELF x28 sp <= where they are
mov x0, xSELF // Thread*
mov x1, x28 // SP for the managed frame.
mov x2, sp // reserved area for arguments and other saved data (up to managed frame)
bl artQuickGenericJniTrampoline // (Thread*, sp)
// The C call will have registered the complete save-frame on success.
// The result of the call is:
// x0: pointer to native code, 0 on error.
// The bottom of the reserved area contains values for arg registers,
// hidden arg register and SP for out args for the call.
// Check for error (class init check or locking for synchronized native method can throw).
cbz x0, .Lexception_in_native
// Save the code pointer
mov xIP0, x0
// Load parameters from frame into registers.
ldp x0, x1, [sp]
ldp x2, x3, [sp, #16]
ldp x4, x5, [sp, #32]
ldp x6, x7, [sp, #48]
ldp d0, d1, [sp, #64]
ldp d2, d3, [sp, #80]
ldp d4, d5, [sp, #96]
ldp d6, d7, [sp, #112]
// Load hidden arg (x15) for @CriticalNative and SP for out args.
ldp x15, xIP1, [sp, #128]
// Apply the new SP for out args, releasing unneeded reserved area.
mov sp, xIP1
blr xIP0 // native call.
// result sign extension is handled in C code
// prepare for artQuickGenericJniEndTrampoline call
// (Thread*, result, result_f)
// x0 x1 x2 <= C calling convention
mov x1, x0 // Result (from saved).
mov x0, xSELF // Thread register.
fmov x2, d0 // d0 will contain floating point result, but needs to go into x2
bl artQuickGenericJniEndTrampoline
// Pending exceptions possible.
ldr x2, [xSELF, THREAD_EXCEPTION_OFFSET]
cbnz x2, .Lexception_in_native
// Tear down the alloca.
mov sp, x28
LOAD_RUNTIME_INSTANCE x1
ldrb w1, [x1, #RUN_EXIT_HOOKS_OFFSET_FROM_RUNTIME_INSTANCE]
CFI_REMEMBER_STATE
cbnz w1, .Lcall_method_exit_hook
.Lcall_method_exit_hook_done:
// Tear down the callee-save frame.
.cfi_def_cfa_register sp
// Restore callee-saves and LR as in `RESTORE_SAVE_REFS_AND_ARGS_FRAME`
// but do not restore argument registers.
// Note: Likewise, we could avoid restoring X20 in the case of Baker
// read barriers, as it is overwritten by REFRESH_MARKING_REGISTER
// later; but it's not worth handling this special case.
#if (FRAME_SIZE_SAVE_REFS_AND_ARGS != 224)
#error "FRAME_SIZE_SAVE_REFS_AND_ARGS(ARM64) size not as expected."
#endif
RESTORE_REG x20, 136
RESTORE_TWO_REGS x21, x22, 144
RESTORE_TWO_REGS x23, x24, 160
RESTORE_TWO_REGS x25, x26, 176
RESTORE_TWO_REGS x27, x28, 192
RESTORE_TWO_REGS x29, xLR, 208
// Remove the frame.
DECREASE_FRAME FRAME_SIZE_SAVE_REFS_AND_ARGS
REFRESH_MARKING_REGISTER
// store into fpr, for when it's a fpr return...
fmov d0, x0
ret
.Lcall_method_exit_hook:
CFI_RESTORE_STATE_AND_DEF_CFA x28, FRAME_SIZE_SAVE_REFS_AND_ARGS
fmov d0, x0
mov x4, FRAME_SIZE_SAVE_REFS_AND_ARGS
bl art_quick_method_exit_hook
b .Lcall_method_exit_hook_done
.Lexception_in_native:
// Move to x1 then sp to please assembler.
ldr x1, [xSELF, # THREAD_TOP_QUICK_FRAME_OFFSET]
add sp, x1, #-1 // Remove the GenericJNI tag.
bl art_deliver_pending_exception
END art_quick_generic_jni_trampoline
ENTRY art_deliver_pending_exception
# This will create a new save-all frame, required by the runtime.
DELIVER_PENDING_EXCEPTION
END art_deliver_pending_exception
/*
* Called to bridge from the quick to interpreter ABI. On entry the arguments match those
* of a quick call:
* x0 = method being called/to bridge to.
* x1..x7, d0..d7 = arguments to that method.
*/
ENTRY art_quick_to_interpreter_bridge
SETUP_SAVE_REFS_AND_ARGS_FRAME // Set up frame and save arguments.
// x0 will contain mirror::ArtMethod* method.
mov x1, xSELF // How to get Thread::Current() ???
mov x2, sp
// uint64_t artQuickToInterpreterBridge(mirror::ArtMethod* method, Thread* self,
// mirror::ArtMethod** sp)
bl artQuickToInterpreterBridge
RESTORE_SAVE_REFS_AND_ARGS_FRAME // TODO: no need to restore arguments in this case.
REFRESH_MARKING_REGISTER
fmov d0, x0
RETURN_OR_DELIVER_PENDING_EXCEPTION
END art_quick_to_interpreter_bridge
/*
* Called to attempt to execute an obsolete method.
*/
ONE_ARG_RUNTIME_EXCEPTION art_invoke_obsolete_method_stub, artInvokeObsoleteMethod
/*
* Compiled code has requested that we deoptimize into the interpreter. The deoptimization
* will long jump to the upcall with a special exception of -1.
*/
.extern artDeoptimizeFromCompiledCode
ENTRY art_quick_deoptimize_from_compiled_code
SETUP_SAVE_EVERYTHING_FRAME
mov x1, xSELF // Pass thread.
bl artDeoptimizeFromCompiledCode // (DeoptimizationKind, Thread*)
brk 0
END art_quick_deoptimize_from_compiled_code
/*
* String's indexOf.
*
* TODO: Not very optimized.
* On entry:
* x0: string object (known non-null)
* w1: char to match (known <= 0xFFFF)
* w2: Starting offset in string data
*/
ENTRY art_quick_indexof
#if (STRING_COMPRESSION_FEATURE)
ldr w4, [x0, #MIRROR_STRING_COUNT_OFFSET]
#else
ldr w3, [x0, #MIRROR_STRING_COUNT_OFFSET]
#endif
add x0, x0, #MIRROR_STRING_VALUE_OFFSET
#if (STRING_COMPRESSION_FEATURE)
/* w4 holds count (with flag) and w3 holds actual length */
lsr w3, w4, #1
#endif
/* Clamp start to [0..count] */
cmp w2, #0
csel w2, wzr, w2, lt
cmp w2, w3
csel w2, w3, w2, gt
/* Save a copy to compute result */
mov x5, x0
#if (STRING_COMPRESSION_FEATURE)
tbz w4, #0, .Lstring_indexof_compressed
#endif
/* Build pointer to start of data to compare and pre-bias */
add x0, x0, x2, lsl #1
sub x0, x0, #2
/* Compute iteration count */
sub w2, w3, w2
/*
* At this point we have:
* x0: start of the data to test
* w1: char to compare
* w2: iteration count
* x5: original start of string data
*/
subs w2, w2, #4
b.lt .Lindexof_remainder
.Lindexof_loop4:
ldrh w6, [x0, #2]!
ldrh w7, [x0, #2]!
ldrh wIP0, [x0, #2]!
ldrh wIP1, [x0, #2]!
cmp w6, w1
b.eq .Lmatch_0
cmp w7, w1
b.eq .Lmatch_1
cmp wIP0, w1
b.eq .Lmatch_2
cmp wIP1, w1
b.eq .Lmatch_3
subs w2, w2, #4
b.ge .Lindexof_loop4
.Lindexof_remainder:
adds w2, w2, #4
b.eq .Lindexof_nomatch
.Lindexof_loop1:
ldrh w6, [x0, #2]!
cmp w6, w1
b.eq .Lmatch_3
subs w2, w2, #1
b.ne .Lindexof_loop1
.Lindexof_nomatch:
mov x0, #-1
ret
.Lmatch_0:
sub x0, x0, #6
sub x0, x0, x5
asr x0, x0, #1
ret
.Lmatch_1:
sub x0, x0, #4
sub x0, x0, x5
asr x0, x0, #1
ret
.Lmatch_2:
sub x0, x0, #2
sub x0, x0, x5
asr x0, x0, #1
ret
.Lmatch_3:
sub x0, x0, x5
asr x0, x0, #1
ret
#if (STRING_COMPRESSION_FEATURE)
/*
* Comparing compressed string character-per-character with
* input character
*/
.Lstring_indexof_compressed:
add x0, x0, x2
sub x0, x0, #1
sub w2, w3, w2
.Lstring_indexof_compressed_loop:
subs w2, w2, #1
b.lt .Lindexof_nomatch
ldrb w6, [x0, #1]!
cmp w6, w1
b.eq .Lstring_indexof_compressed_matched
b .Lstring_indexof_compressed_loop
.Lstring_indexof_compressed_matched:
sub x0, x0, x5
ret
#endif
END art_quick_indexof
.extern artStringBuilderAppend
ENTRY art_quick_string_builder_append
SETUP_SAVE_REFS_ONLY_FRAME // save callee saves in case of GC
add x1, sp, #(FRAME_SIZE_SAVE_REFS_ONLY + __SIZEOF_POINTER__) // pass args
mov x2, xSELF // pass Thread::Current
bl artStringBuilderAppend // (uint32_t, const unit32_t*, Thread*)
RESTORE_SAVE_REFS_ONLY_FRAME
REFRESH_MARKING_REGISTER
RETURN_OR_DEOPT_IF_RESULT_IS_NON_NULL_OR_DELIVER
END art_quick_string_builder_append
/*
* Create a function `name` calling the ReadBarrier::Mark routine,
* getting its argument and returning its result through W register
* `wreg` (corresponding to X register `xreg`), saving and restoring
* all caller-save registers.
*
* The generated function follows a non-standard runtime calling convention:
* - register `reg` (which may be different from W0) is used to pass the (sole) argument,
* - register `reg` (which may be different from W0) is used to return the result,
* - all other registers are callee-save (the values they hold are preserved).
*/
.macro READ_BARRIER_MARK_REG name, wreg, xreg
ENTRY \name
// Reference is null, no work to do at all.
cbz \wreg, .Lret_rb_\name
// Use wIP0 as temp and check the mark bit of the reference. wIP0 is not used by the compiler.
ldr wIP0, [\xreg, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
tbz wIP0, #LOCK_WORD_MARK_BIT_SHIFT, .Lnot_marked_rb_\name
.Lret_rb_\name:
ret
.Lnot_marked_rb_\name:
// Check if the top two bits are one, if this is the case it is a forwarding address.
tst wIP0, wIP0, lsl #1
bmi .Lret_forwarding_address\name
.Lslow_rb_\name:
/*
* Allocate 44 stack slots * 8 = 352 bytes:
* - 19 slots for core registers X0-15, X17, X19, LR
* - 1 slot padding
* - 24 slots for floating-point registers D0-D7 and D16-D31
*/
// We must not clobber IP1 since code emitted for HLoadClass and HLoadString
// relies on IP1 being preserved.
// Save all potentially live caller-save core registers.
SAVE_TWO_REGS_INCREASE_FRAME x0, x1, 352
SAVE_TWO_REGS x2, x3, 16
SAVE_TWO_REGS x4, x5, 32
SAVE_TWO_REGS x6, x7, 48
SAVE_TWO_REGS x8, x9, 64
SAVE_TWO_REGS x10, x11, 80
SAVE_TWO_REGS x12, x13, 96
SAVE_TWO_REGS x14, x15, 112
SAVE_TWO_REGS x17, x19, 128 // Skip x16, i.e. IP0, and x18, the platform register.
SAVE_REG xLR, 144 // Save also return address.
// Save all potentially live caller-save floating-point registers.
stp d0, d1, [sp, #160]
stp d2, d3, [sp, #176]
stp d4, d5, [sp, #192]
stp d6, d7, [sp, #208]
stp d16, d17, [sp, #224]
stp d18, d19, [sp, #240]
stp d20, d21, [sp, #256]
stp d22, d23, [sp, #272]
stp d24, d25, [sp, #288]
stp d26, d27, [sp, #304]
stp d28, d29, [sp, #320]
stp d30, d31, [sp, #336]
.ifnc \wreg, w0
mov w0, \wreg // Pass arg1 - obj from `wreg`
.endif
bl artReadBarrierMark // artReadBarrierMark(obj)
.ifnc \wreg, w0
mov \wreg, w0 // Return result into `wreg`
.endif
// Restore core regs, except `xreg`, as `wreg` is used to return the
// result of this function (simply remove it from the stack instead).
POP_REGS_NE x0, x1, 0, \xreg
POP_REGS_NE x2, x3, 16, \xreg
POP_REGS_NE x4, x5, 32, \xreg
POP_REGS_NE x6, x7, 48, \xreg
POP_REGS_NE x8, x9, 64, \xreg
POP_REGS_NE x10, x11, 80, \xreg
POP_REGS_NE x12, x13, 96, \xreg
POP_REGS_NE x14, x15, 112, \xreg
POP_REGS_NE x17, x19, 128, \xreg
POP_REG_NE xLR, 144, \xreg // Restore also return address.
// Restore floating-point registers.
ldp d0, d1, [sp, #160]
ldp d2, d3, [sp, #176]
ldp d4, d5, [sp, #192]
ldp d6, d7, [sp, #208]
ldp d16, d17, [sp, #224]
ldp d18, d19, [sp, #240]
ldp d20, d21, [sp, #256]
ldp d22, d23, [sp, #272]
ldp d24, d25, [sp, #288]
ldp d26, d27, [sp, #304]
ldp d28, d29, [sp, #320]
ldp d30, d31, [sp, #336]
// Remove frame and return.
DECREASE_FRAME 352
ret
.Lret_forwarding_address\name:
// Shift left by the forwarding address shift. This clears out the state bits since they are
// in the top 2 bits of the lock word.
lsl \wreg, wIP0, #LOCK_WORD_STATE_FORWARDING_ADDRESS_SHIFT
ret
END \name
.endm
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg00, w0, x0
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg01, w1, x1
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg02, w2, x2
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg03, w3, x3
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg04, w4, x4
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg05, w5, x5
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg06, w6, x6
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg07, w7, x7
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg08, w8, x8
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg09, w9, x9
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg10, w10, x10
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg11, w11, x11
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg12, w12, x12
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg13, w13, x13
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg14, w14, x14
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg15, w15, x15
// READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg16, w16, x16 ip0 is blocked
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg17, w17, x17
// READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg18, w18, x18 x18 is blocked
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg19, w19, x19
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg20, w20, x20
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg21, w21, x21
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg22, w22, x22
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg23, w23, x23
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg24, w24, x24
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg25, w25, x25
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg26, w26, x26
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg27, w27, x27
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg28, w28, x28
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg29, w29, x29
.macro SELECT_X_OR_W_FOR_MACRO macro_to_use, x, w, xreg
.if \xreg
\macro_to_use \x
.else
\macro_to_use \w
.endif
.endm
.macro FOR_REGISTERS macro_for_register, macro_for_reserved_register, xreg
SELECT_X_OR_W_FOR_MACRO \macro_for_register, x0, w0, \xreg
SELECT_X_OR_W_FOR_MACRO \macro_for_register, x1, w1, \xreg
SELECT_X_OR_W_FOR_MACRO \macro_for_register, x2, w2, \xreg
SELECT_X_OR_W_FOR_MACRO \macro_for_register, x3, w3, \xreg
SELECT_X_OR_W_FOR_MACRO \macro_for_register, x4, w4, \xreg
SELECT_X_OR_W_FOR_MACRO \macro_for_register, x5, w5, \xreg
SELECT_X_OR_W_FOR_MACRO \macro_for_register, x6, w6, \xreg
SELECT_X_OR_W_FOR_MACRO \macro_for_register, x7, w7, \xreg
SELECT_X_OR_W_FOR_MACRO \macro_for_register, x8, w8, \xreg
SELECT_X_OR_W_FOR_MACRO \macro_for_register, x9, w9, \xreg
SELECT_X_OR_W_FOR_MACRO \macro_for_register, x10, w10, \xreg
SELECT_X_OR_W_FOR_MACRO \macro_for_register, x11, w11, \xreg
SELECT_X_OR_W_FOR_MACRO \macro_for_register, x12, w12, \xreg
SELECT_X_OR_W_FOR_MACRO \macro_for_register, x13, w13, \xreg
SELECT_X_OR_W_FOR_MACRO \macro_for_register, x14, w14, \xreg
SELECT_X_OR_W_FOR_MACRO \macro_for_register, x15, w15, \xreg
\macro_for_reserved_register // IP0 is reserved
\macro_for_reserved_register // IP1 is reserved
\macro_for_reserved_register // x18 is reserved
SELECT_X_OR_W_FOR_MACRO \macro_for_register, x19, w19, \xreg
SELECT_X_OR_W_FOR_MACRO \macro_for_register, x20, w20, \xreg
SELECT_X_OR_W_FOR_MACRO \macro_for_register, x21, w21, \xreg
SELECT_X_OR_W_FOR_MACRO \macro_for_register, x22, w22, \xreg
SELECT_X_OR_W_FOR_MACRO \macro_for_register, x23, w23, \xreg
SELECT_X_OR_W_FOR_MACRO \macro_for_register, x24, w24, \xreg
SELECT_X_OR_W_FOR_MACRO \macro_for_register, x25, w25, \xreg
SELECT_X_OR_W_FOR_MACRO \macro_for_register, x26, w26, \xreg
SELECT_X_OR_W_FOR_MACRO \macro_for_register, x27, w27, \xreg
SELECT_X_OR_W_FOR_MACRO \macro_for_register, x28, w28, \xreg
SELECT_X_OR_W_FOR_MACRO \macro_for_register, x29, w29, \xreg
\macro_for_reserved_register // lr is reserved
\macro_for_reserved_register // sp is reserved
.endm
.macro FOR_XREGISTERS macro_for_register, macro_for_reserved_register
FOR_REGISTERS \macro_for_register, \macro_for_reserved_register, /* xreg */ 1
.endm
.macro FOR_WREGISTERS macro_for_register, macro_for_reserved_register
FOR_REGISTERS \macro_for_register, \macro_for_reserved_register, /* xreg */ 0
.endm
.macro BRK0_BRK0
brk 0
brk 0
.endm
#if BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET != BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET
#error "Array and field introspection code sharing requires same LDR offset."
#endif
.macro INTROSPECTION_ARRAY_LOAD index_reg
ldr wIP0, [xIP0, \index_reg, lsl #2]
b art_quick_read_barrier_mark_introspection
.endm
.macro MOV_WIP0_TO_WREG_AND_BL_LR reg
mov \reg, wIP0
br lr // Do not use RET as we do not enter the entrypoint with "BL".
.endm
.macro READ_BARRIER_MARK_INTROSPECTION_SLOW_PATH ldr_offset
/*
* Allocate 42 stack slots * 8 = 336 bytes:
* - 18 slots for core registers X0-15, X19, LR
* - 24 slots for floating-point registers D0-D7 and D16-D31
*/
// Save all potentially live caller-save core registers.
SAVE_TWO_REGS_INCREASE_FRAME x0, x1, 336
SAVE_TWO_REGS x2, x3, 16
SAVE_TWO_REGS x4, x5, 32
SAVE_TWO_REGS x6, x7, 48
SAVE_TWO_REGS x8, x9, 64
SAVE_TWO_REGS x10, x11, 80
SAVE_TWO_REGS x12, x13, 96
SAVE_TWO_REGS x14, x15, 112
// Skip x16, x17, i.e. IP0, IP1, and x18, the platform register.
SAVE_TWO_REGS x19, xLR, 128 // Save return address.
// Save all potentially live caller-save floating-point registers.
stp d0, d1, [sp, #144]
stp d2, d3, [sp, #160]
stp d4, d5, [sp, #176]
stp d6, d7, [sp, #192]
stp d16, d17, [sp, #208]
stp d18, d19, [sp, #224]
stp d20, d21, [sp, #240]
stp d22, d23, [sp, #256]
stp d24, d25, [sp, #272]
stp d26, d27, [sp, #288]
stp d28, d29, [sp, #304]
stp d30, d31, [sp, #320]
mov x0, xIP0
bl artReadBarrierMark // artReadBarrierMark(obj)
mov xIP0, x0
// Restore core regs, except x0 and x1 as the return register switch case
// address calculation is smoother with an extra register.
RESTORE_TWO_REGS x2, x3, 16
RESTORE_TWO_REGS x4, x5, 32
RESTORE_TWO_REGS x6, x7, 48
RESTORE_TWO_REGS x8, x9, 64
RESTORE_TWO_REGS x10, x11, 80
RESTORE_TWO_REGS x12, x13, 96
RESTORE_TWO_REGS x14, x15, 112
// Skip x16, x17, i.e. IP0, IP1, and x18, the platform register.
RESTORE_TWO_REGS x19, xLR, 128 // Restore return address.
// Restore caller-save floating-point registers.
ldp d0, d1, [sp, #144]
ldp d2, d3, [sp, #160]
ldp d4, d5, [sp, #176]
ldp d6, d7, [sp, #192]
ldp d16, d17, [sp, #208]
ldp d18, d19, [sp, #224]
ldp d20, d21, [sp, #240]
ldp d22, d23, [sp, #256]
ldp d24, d25, [sp, #272]
ldp d26, d27, [sp, #288]
ldp d28, d29, [sp, #304]
ldp d30, d31, [sp, #320]
ldr x0, [lr, #\ldr_offset] // Load the instruction.
adr xIP1, .Lmark_introspection_return_switch
bfi xIP1, x0, #3, #5 // Calculate switch case address.
RESTORE_TWO_REGS_DECREASE_FRAME x0, x1, 336
br xIP1
.endm
/*
* Use introspection to load a reference from the same address as the LDR
* instruction in generated code would load (unless loaded by the thunk,
* see below), call ReadBarrier::Mark() with that reference if needed
* and return it in the same register as the LDR instruction would load.
*
* The entrypoint is called through a thunk that differs across load kinds.
* For field and array loads the LDR instruction in generated code follows
* the branch to the thunk, i.e. the LDR is at [LR, #-4], and the thunk
* knows the holder and performs the gray bit check, returning to the LDR
* instruction if the object is not gray, so this entrypoint no longer
* needs to know anything about the holder. For GC root loads, the LDR
* instruction in generated code precedes the branch to the thunk (i.e.
* the LDR is at [LR, #-8]) and the thunk does not do the gray bit check.
*
* For field accesses and array loads with a constant index the thunk loads
* the reference into IP0 using introspection and calls the main entrypoint,
* art_quick_read_barrier_mark_introspection. With heap poisoning enabled,
* the passed reference is poisoned.
*
* For array accesses with non-constant index, the thunk inserts the bits
* 16-21 of the LDR instruction to the entrypoint address, effectively
* calculating a switch case label based on the index register (bits 16-20)
* and adding an extra offset (bit 21 is set) to differentiate from the
* main entrypoint, then moves the base register to IP0 and jumps to the
* switch case. Therefore we need to align the main entrypoint to 512 bytes,
* accounting for a 256-byte offset followed by 32 array entrypoints
* starting at art_quick_read_barrier_mark_introspection_arrays, each
* containing an LDR (register) and a branch to the main entrypoint.
*
* For GC root accesses we cannot use the main entrypoint because of the
* different offset where the LDR instruction in generated code is located.
* (And even with heap poisoning enabled, GC roots are not poisoned.)
* To re-use the same entrypoint pointer in generated code, we make sure
* that the gc root entrypoint (a copy of the entrypoint with a different
* offset for introspection loads) is located at a known offset (768 bytes,
* or BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRYPOINT_OFFSET) from the main
* entrypoint and the GC root thunk adjusts the entrypoint pointer, moves
* the root register to IP0 and jumps to the customized entrypoint,
* art_quick_read_barrier_mark_introspection_gc_roots. The thunk also
* performs all the fast-path checks, so we need just the slow path.
* The UnsafeCASObject intrinsic is also using the GC root entrypoint with
* MOV instead of LDR, the destination register is in the same bits.
*
* The code structure is
* art_quick_read_barrier_mark_introspection:
* Up to 256 bytes for the main entrypoint code.
* Padding to 256 bytes if needed.
* art_quick_read_barrier_mark_introspection_arrays:
* Exactly 256 bytes for array load switch cases (32x2 instructions).
* .Lmark_introspection_return_switch:
* Exactly 256 bytes for return switch cases (32x2 instructions).
* art_quick_read_barrier_mark_introspection_gc_roots:
* GC root entrypoint code.
*/
ENTRY_ALIGNED art_quick_read_barrier_mark_introspection, 512
// At this point, IP0 contains the reference, IP1 can be freely used.
// For heap poisoning, the reference is poisoned, so unpoison it first.
UNPOISON_HEAP_REF wIP0
// If reference is null, just return it in the right register.
cbz wIP0, .Lmark_introspection_return
// Use wIP1 as temp and check the mark bit of the reference.
ldr wIP1, [xIP0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
tbz wIP1, #LOCK_WORD_MARK_BIT_SHIFT, .Lmark_introspection_unmarked
.Lmark_introspection_return:
// Without an extra register for the return switch case address calculation,
// we exploit the high word of the xIP0 to temporarily store the ref_reg*8,
// so the return switch below must move wIP0 instead of xIP0 to the register.
ldr wIP1, [lr, #BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET] // Load the instruction.
bfi xIP0, xIP1, #(32 + 3), #5 // Extract ref_reg*8 to high word in xIP0.
adr xIP1, .Lmark_introspection_return_switch
bfxil xIP1, xIP0, #32, #8 // Calculate return switch case address.
br xIP1
.Lmark_introspection_unmarked:
// Check if the top two bits are one, if this is the case it is a forwarding address.
tst wIP1, wIP1, lsl #1
bmi .Lmark_introspection_forwarding_address
READ_BARRIER_MARK_INTROSPECTION_SLOW_PATH BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET
.Lmark_introspection_forwarding_address:
// Shift left by the forwarding address shift. This clears out the state bits since they are
// in the top 2 bits of the lock word.
lsl wIP0, wIP1, #LOCK_WORD_STATE_FORWARDING_ADDRESS_SHIFT
b .Lmark_introspection_return
// We're very close to the alloted 256B for the entrypoint code before the
// array switch cases. Should we go a little bit over the limit, we can
// move some code after the array switch cases and return switch cases.
.balign 256
.hidden art_quick_read_barrier_mark_introspection_arrays
.global art_quick_read_barrier_mark_introspection_arrays
art_quick_read_barrier_mark_introspection_arrays:
FOR_XREGISTERS INTROSPECTION_ARRAY_LOAD, BRK0_BRK0
.Lmark_introspection_return_switch:
FOR_WREGISTERS MOV_WIP0_TO_WREG_AND_BL_LR, BRK0_BRK0
.hidden art_quick_read_barrier_mark_introspection_gc_roots
.global art_quick_read_barrier_mark_introspection_gc_roots
art_quick_read_barrier_mark_introspection_gc_roots:
READ_BARRIER_MARK_INTROSPECTION_SLOW_PATH BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET
END art_quick_read_barrier_mark_introspection
.extern artInvokePolymorphic
ENTRY art_quick_invoke_polymorphic
SETUP_SAVE_REFS_AND_ARGS_FRAME // Save callee saves in case allocation triggers GC.
mov x0, x1 // x0 := receiver
mov x1, xSELF // x1 := Thread::Current()
mov x2, sp // x2 := SP
bl artInvokePolymorphic // artInvokePolymorphic(receiver, thread, save_area)
RESTORE_SAVE_REFS_AND_ARGS_FRAME
REFRESH_MARKING_REGISTER
fmov d0, x0 // Result is in x0. Copy to floating return register.
RETURN_OR_DELIVER_PENDING_EXCEPTION
END art_quick_invoke_polymorphic
.extern artInvokeCustom
ENTRY art_quick_invoke_custom
SETUP_SAVE_REFS_AND_ARGS_FRAME // Save callee saves in case allocation triggers GC.
// x0 := call_site_idx
mov x1, xSELF // x1 := Thread::Current()
mov x2, sp // x2 := SP
bl artInvokeCustom // artInvokeCustom(call_site_idx, thread, save_area)
RESTORE_SAVE_REFS_AND_ARGS_FRAME
REFRESH_MARKING_REGISTER
fmov d0, x0 // Copy result to double result register.
RETURN_OR_DELIVER_PENDING_EXCEPTION
END art_quick_invoke_custom
// Wrap ExecuteSwitchImpl in assembly method which specifies DEX PC for unwinding.
// Argument 0: x0: The context pointer for ExecuteSwitchImpl.
// Argument 1: x1: Pointer to the templated ExecuteSwitchImpl to call.
// Argument 2: x2: The value of DEX PC (memory address of the methods bytecode).
ENTRY ExecuteSwitchImplAsm
SAVE_TWO_REGS_INCREASE_FRAME x19, xLR, 16
mov x19, x2 // x19 = DEX PC
CFI_DEFINE_DEX_PC_WITH_OFFSET(0 /* x0 */, 19 /* x19 */, 0)
blr x1 // Call the wrapped method.
RESTORE_TWO_REGS_DECREASE_FRAME x19, xLR, 16
ret
END ExecuteSwitchImplAsm
// x0 contains the class, x8 contains the inline cache. x9-x15 can be used.
ENTRY art_quick_update_inline_cache
#if (INLINE_CACHE_SIZE != 5)
#error "INLINE_CACHE_SIZE not as expected."
#endif
#if defined(USE_READ_BARRIER) && defined(USE_BAKER_READ_BARRIER)
// Don't update the cache if we are marking.
cbnz wMR, .Ldone
#endif
.Lentry1:
ldr w9, [x8, #INLINE_CACHE_CLASSES_OFFSET]
cmp w9, w0
beq .Ldone
cbnz w9, .Lentry2
add x10, x8, #INLINE_CACHE_CLASSES_OFFSET
ldxr w9, [x10]
cbnz w9, .Lentry1
stxr w9, w0, [x10]
cbz w9, .Ldone
b .Lentry1
.Lentry2:
ldr w9, [x8, #INLINE_CACHE_CLASSES_OFFSET+4]
cmp w9, w0
beq .Ldone
cbnz w9, .Lentry3
add x10, x8, #INLINE_CACHE_CLASSES_OFFSET+4
ldxr w9, [x10]
cbnz w9, .Lentry2
stxr w9, w0, [x10]
cbz w9, .Ldone
b .Lentry2
.Lentry3:
ldr w9, [x8, #INLINE_CACHE_CLASSES_OFFSET+8]
cmp w9, w0
beq .Ldone
cbnz w9, .Lentry4
add x10, x8, #INLINE_CACHE_CLASSES_OFFSET+8
ldxr w9, [x10]
cbnz w9, .Lentry3
stxr w9, w0, [x10]
cbz w9, .Ldone
b .Lentry3
.Lentry4:
ldr w9, [x8, #INLINE_CACHE_CLASSES_OFFSET+12]
cmp w9, w0
beq .Ldone
cbnz w9, .Lentry5
add x10, x8, #INLINE_CACHE_CLASSES_OFFSET+12
ldxr w9, [x10]
cbnz w9, .Lentry4
stxr w9, w0, [x10]
cbz w9, .Ldone
b .Lentry4
.Lentry5:
// Unconditionally store, the inline cache is megamorphic.
str w0, [x8, #INLINE_CACHE_CLASSES_OFFSET+16]
.Ldone:
ret
END art_quick_update_inline_cache
// On entry, method is at the bottom of the stack.
ENTRY art_quick_compile_optimized
SETUP_SAVE_EVERYTHING_FRAME
ldr x0, [sp, #FRAME_SIZE_SAVE_EVERYTHING] // pass ArtMethod
mov x1, xSELF // pass Thread::Current
bl artCompileOptimized // (ArtMethod*, Thread*)
RESTORE_SAVE_EVERYTHING_FRAME
// We don't need to restore the marking register here, as
// artCompileOptimized doesn't allow thread suspension.
ret
END art_quick_compile_optimized
.extern artMethodEntryHook
ENTRY art_quick_method_entry_hook
SETUP_SAVE_EVERYTHING_FRAME
ldr x0, [sp, #FRAME_SIZE_SAVE_EVERYTHING] // pass ArtMethod*
mov x1, xSELF // pass Thread::Current
mov x2, sp // pass SP
bl artMethodEntryHook // (ArtMethod*, Thread*, SP)
RESTORE_SAVE_EVERYTHING_FRAME // Note: will restore xSELF
REFRESH_MARKING_REGISTER
ret
END art_quick_method_entry_hook
.extern artMethodExitHook
ENTRY art_quick_method_exit_hook
SETUP_SAVE_EVERYTHING_FRAME
// frame_size is passed from JITed code in x4
add x3, sp, #16 // floating-point result ptr in kSaveEverything frame
add x2, sp, #272 // integer result ptr in kSaveEverything frame
add x1, sp, #FRAME_SIZE_SAVE_EVERYTHING // ArtMethod**
mov x0, xSELF // Thread::Current
bl artMethodExitHook // (Thread*, ArtMethod**, gpr_res*, fpr_res*,
// frame_size)
// Normal return.
RESTORE_SAVE_EVERYTHING_FRAME
REFRESH_MARKING_REGISTER
ret
END art_quick_method_exit_hook