blob: 10e098d7db5306b293100bb8a5019fc95fc9e6bd [file] [log] [blame]
/*
* Copyright (C) 2012 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "asm_support_x86_64.S"
#include "interpreter/cfi_asm_support.h"
#include "arch/quick_alloc_entrypoints.S"
#include "arch/quick_field_entrypoints.S"
MACRO0(ASSERT_USE_READ_BARRIER)
#if !defined(USE_READ_BARRIER)
int3
int3
#endif
END_MACRO
// For x86, the CFA is esp+4, the address above the pushed return address on the stack.
/*
* Macro that sets up the callee save frame to conform with
* Runtime::CreateCalleeSaveMethod(kSaveRefsAndArgs)
*/
MACRO0(SETUP_SAVE_REFS_AND_ARGS_FRAME)
#if defined(__APPLE__)
int3
int3
#else
SETUP_SAVE_REFS_AND_ARGS_FRAME_REGISTERS_ONLY
// R10 := Runtime::Current()
LOAD_RUNTIME_INSTANCE r10
// R10 := ArtMethod* for ref and args callee save frame method.
movq RUNTIME_SAVE_REFS_AND_ARGS_METHOD_OFFSET(%r10), %r10
// Store ArtMethod* to bottom of stack.
movq %r10, 0(%rsp)
// Store rsp as the top quick frame.
movq %rsp, %gs:THREAD_TOP_QUICK_FRAME_OFFSET
#endif // __APPLE__
END_MACRO
MACRO0(SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_RDI)
SETUP_SAVE_REFS_AND_ARGS_FRAME_REGISTERS_ONLY
// Store ArtMethod to bottom of stack.
movq %rdi, 0(%rsp)
// Store rsp as the stop quick frame.
movq %rsp, %gs:THREAD_TOP_QUICK_FRAME_OFFSET
END_MACRO
/*
* Macro that sets up the callee save frame to conform with
* Runtime::CreateCalleeSaveMethod(kSaveEverything)
* when R14 and R15 are already saved.
*/
MACRO1(SETUP_SAVE_EVERYTHING_FRAME_R14_R15_SAVED, runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET)
#if defined(__APPLE__)
int3
int3
#else
// Save core registers from highest to lowest to agree with core spills bitmap.
// R14 and R15, or at least placeholders for them, are already on the stack.
PUSH r13
PUSH r12
PUSH r11
PUSH r10
PUSH r9
PUSH r8
PUSH rdi
PUSH rsi
PUSH rbp
PUSH rbx
PUSH rdx
PUSH rcx
PUSH rax
// Create space for FPRs and stack alignment padding.
INCREASE_FRAME 8 + 16 * 8
// R10 := Runtime::Current()
LOAD_RUNTIME_INSTANCE r10
// Save FPRs.
movq %xmm0, 8(%rsp)
movq %xmm1, 16(%rsp)
movq %xmm2, 24(%rsp)
movq %xmm3, 32(%rsp)
movq %xmm4, 40(%rsp)
movq %xmm5, 48(%rsp)
movq %xmm6, 56(%rsp)
movq %xmm7, 64(%rsp)
movq %xmm8, 72(%rsp)
movq %xmm9, 80(%rsp)
movq %xmm10, 88(%rsp)
movq %xmm11, 96(%rsp)
movq %xmm12, 104(%rsp)
movq %xmm13, 112(%rsp)
movq %xmm14, 120(%rsp)
movq %xmm15, 128(%rsp)
// Push ArtMethod* for save everything frame method.
pushq \runtime_method_offset(%r10)
CFI_ADJUST_CFA_OFFSET(8)
// Store rsp as the top quick frame.
movq %rsp, %gs:THREAD_TOP_QUICK_FRAME_OFFSET
// Ugly compile-time check, but we only have the preprocessor.
// Last +8: implicit return address pushed on stack when caller made call.
#if (FRAME_SIZE_SAVE_EVERYTHING != 15 * 8 + 16 * 8 + 16 + 8)
#error "FRAME_SIZE_SAVE_EVERYTHING(X86_64) size not as expected."
#endif
#endif // __APPLE__
END_MACRO
/*
* Macro that sets up the callee save frame to conform with
* Runtime::CreateCalleeSaveMethod(kSaveEverything)
* when R15 is already saved.
*/
MACRO1(SETUP_SAVE_EVERYTHING_FRAME_R15_SAVED, runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET)
PUSH r14
SETUP_SAVE_EVERYTHING_FRAME_R14_R15_SAVED \runtime_method_offset
END_MACRO
/*
* Macro that sets up the callee save frame to conform with
* Runtime::CreateCalleeSaveMethod(kSaveEverything)
*/
MACRO1(SETUP_SAVE_EVERYTHING_FRAME, runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET)
PUSH r15
SETUP_SAVE_EVERYTHING_FRAME_R15_SAVED \runtime_method_offset
END_MACRO
MACRO0(RESTORE_SAVE_EVERYTHING_FRAME_FRPS)
// Restore FPRs. Method and padding is still on the stack.
movq 16(%rsp), %xmm0
movq 24(%rsp), %xmm1
movq 32(%rsp), %xmm2
movq 40(%rsp), %xmm3
movq 48(%rsp), %xmm4
movq 56(%rsp), %xmm5
movq 64(%rsp), %xmm6
movq 72(%rsp), %xmm7
movq 80(%rsp), %xmm8
movq 88(%rsp), %xmm9
movq 96(%rsp), %xmm10
movq 104(%rsp), %xmm11
movq 112(%rsp), %xmm12
movq 120(%rsp), %xmm13
movq 128(%rsp), %xmm14
movq 136(%rsp), %xmm15
END_MACRO
MACRO0(RESTORE_SAVE_EVERYTHING_FRAME_GPRS_EXCEPT_RAX)
// Restore callee and GPR args (except RAX), mixed together to agree with core spills bitmap.
POP rcx
POP rdx
POP rbx
POP rbp
POP rsi
POP rdi
POP r8
POP r9
POP r10
POP r11
POP r12
POP r13
POP r14
POP r15
END_MACRO
MACRO0(RESTORE_SAVE_EVERYTHING_FRAME)
RESTORE_SAVE_EVERYTHING_FRAME_FRPS
// Remove save everything callee save method, stack alignment padding and FPRs.
DECREASE_FRAME 16 + 16 * 8
POP rax
RESTORE_SAVE_EVERYTHING_FRAME_GPRS_EXCEPT_RAX
END_MACRO
MACRO0(RESTORE_SAVE_EVERYTHING_FRAME_KEEP_RAX)
RESTORE_SAVE_EVERYTHING_FRAME_FRPS
// Remove save everything callee save method, stack alignment padding and FPRs, skip RAX.
DECREASE_FRAME 16 + 16 * 8 + 8
RESTORE_SAVE_EVERYTHING_FRAME_GPRS_EXCEPT_RAX
END_MACRO
MACRO2(NO_ARG_RUNTIME_EXCEPTION, c_name, cxx_name)
DEFINE_FUNCTION VAR(c_name)
SETUP_SAVE_ALL_CALLEE_SAVES_FRAME // save all registers as basis for long jump context
// Outgoing argument set up
movq %gs:THREAD_SELF_OFFSET, %rdi // pass Thread::Current()
call CALLVAR(cxx_name) // cxx_name(Thread*)
UNREACHABLE
END_FUNCTION VAR(c_name)
END_MACRO
MACRO2(NO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING, c_name, cxx_name)
DEFINE_FUNCTION VAR(c_name)
SETUP_SAVE_EVERYTHING_FRAME // save all registers as basis for long jump context
// Outgoing argument set up
movq %gs:THREAD_SELF_OFFSET, %rdi // pass Thread::Current()
call CALLVAR(cxx_name) // cxx_name(Thread*)
UNREACHABLE
END_FUNCTION VAR(c_name)
END_MACRO
MACRO2(ONE_ARG_RUNTIME_EXCEPTION, c_name, cxx_name)
DEFINE_FUNCTION VAR(c_name)
SETUP_SAVE_ALL_CALLEE_SAVES_FRAME // save all registers as basis for long jump context
// Outgoing argument set up
movq %gs:THREAD_SELF_OFFSET, %rsi // pass Thread::Current()
call CALLVAR(cxx_name) // cxx_name(arg1, Thread*)
UNREACHABLE
END_FUNCTION VAR(c_name)
END_MACRO
MACRO2(TWO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING, c_name, cxx_name)
DEFINE_FUNCTION VAR(c_name)
SETUP_SAVE_EVERYTHING_FRAME // save all registers as basis for long jump context
// Outgoing argument set up
movq %gs:THREAD_SELF_OFFSET, %rdx // pass Thread::Current()
call CALLVAR(cxx_name) // cxx_name(Thread*)
UNREACHABLE
END_FUNCTION VAR(c_name)
END_MACRO
/*
* Called by managed code to create and deliver a NullPointerException.
*/
NO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_null_pointer_exception, artThrowNullPointerExceptionFromCode
/*
* Call installed by a signal handler to create and deliver a NullPointerException.
*/
DEFINE_FUNCTION_CUSTOM_CFA art_quick_throw_null_pointer_exception_from_signal, 2 * __SIZEOF_POINTER__
// Fault address and return address were saved by the fault handler.
// Save all registers as basis for long jump context; R15 will replace fault address later.
SETUP_SAVE_EVERYTHING_FRAME_R15_SAVED
// Retrieve fault address and save R15.
movq (FRAME_SIZE_SAVE_EVERYTHING - 2 * __SIZEOF_POINTER__)(%rsp), %rdi
movq %r15, (FRAME_SIZE_SAVE_EVERYTHING - 2 * __SIZEOF_POINTER__)(%rsp)
CFI_REL_OFFSET(%r15, (FRAME_SIZE_SAVE_EVERYTHING - 2 * __SIZEOF_POINTER__))
// Outgoing argument set up; RDI already contains the fault address.
movq %gs:THREAD_SELF_OFFSET, %rsi // pass Thread::Current()
call SYMBOL(artThrowNullPointerExceptionFromSignal) // (addr, self)
UNREACHABLE
END_FUNCTION art_quick_throw_null_pointer_exception_from_signal
/*
* Called by managed code to create and deliver an ArithmeticException.
*/
NO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_div_zero, artThrowDivZeroFromCode
/*
* Called by managed code to create and deliver a StackOverflowError.
*/
NO_ARG_RUNTIME_EXCEPTION art_quick_throw_stack_overflow, artThrowStackOverflowFromCode
/*
* Called by managed code, saves callee saves and then calls artThrowException
* that will place a mock Method* at the bottom of the stack. Arg1 holds the exception.
*/
ONE_ARG_RUNTIME_EXCEPTION art_quick_deliver_exception, artDeliverExceptionFromCode
/*
* Called by managed code to create and deliver an ArrayIndexOutOfBoundsException. Arg1 holds
* index, arg2 holds limit.
*/
TWO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_array_bounds, artThrowArrayBoundsFromCode
/*
* Called by managed code to create and deliver a StringIndexOutOfBoundsException
* as if thrown from a call to String.charAt(). Arg1 holds index, arg2 holds limit.
*/
TWO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_string_bounds, artThrowStringBoundsFromCode
/*
* All generated callsites for interface invokes and invocation slow paths will load arguments
* as usual - except instead of loading arg0/rdi with the target Method*, arg0/rdi will contain
* the method_idx. This wrapper will save arg1-arg3, and call the appropriate C helper.
* NOTE: "this" is first visible argument of the target, and so can be found in arg1/rsi.
*
* The helper will attempt to locate the target and return a 128-bit result in rax/rdx consisting
* of the target Method* in rax and method->code_ in rdx.
*
* If unsuccessful, the helper will return null/????. There will be a pending exception in the
* thread and we branch to another stub to deliver it.
*
* On success this wrapper will restore arguments and *jump* to the target, leaving the return
* location on the stack.
*
* Adapted from x86 code.
*/
MACRO1(INVOKE_TRAMPOLINE_BODY, cxx_name)
SETUP_SAVE_REFS_AND_ARGS_FRAME // save callee saves in case allocation triggers GC
// Helper signature is always
// (method_idx, *this_object, *caller_method, *self, sp)
movq %gs:THREAD_SELF_OFFSET, %rdx // pass Thread
movq %rsp, %rcx // pass SP
call CALLVAR(cxx_name) // cxx_name(arg1, arg2, Thread*, SP)
// save the code pointer
movq %rax, %rdi
movq %rdx, %rax
RESTORE_SAVE_REFS_AND_ARGS_FRAME
testq %rdi, %rdi
jz 1f
// Tail call to intended method.
jmp *%rax
1:
DELIVER_PENDING_EXCEPTION
END_MACRO
MACRO2(INVOKE_TRAMPOLINE, c_name, cxx_name)
DEFINE_FUNCTION VAR(c_name)
INVOKE_TRAMPOLINE_BODY RAW_VAR(cxx_name)
END_FUNCTION VAR(c_name)
END_MACRO
INVOKE_TRAMPOLINE art_quick_invoke_interface_trampoline_with_access_check, artInvokeInterfaceTrampolineWithAccessCheck
INVOKE_TRAMPOLINE art_quick_invoke_static_trampoline_with_access_check, artInvokeStaticTrampolineWithAccessCheck
INVOKE_TRAMPOLINE art_quick_invoke_direct_trampoline_with_access_check, artInvokeDirectTrampolineWithAccessCheck
INVOKE_TRAMPOLINE art_quick_invoke_super_trampoline_with_access_check, artInvokeSuperTrampolineWithAccessCheck
INVOKE_TRAMPOLINE art_quick_invoke_virtual_trampoline_with_access_check, artInvokeVirtualTrampolineWithAccessCheck
/*
* Helper for quick invocation stub to set up XMM registers. Assumes r10 == shorty,
* r11 == arg_array. Clobbers r10, r11 and al. Branches to xmm_setup_finished if it encounters
* the end of the shorty.
*/
MACRO2(LOOP_OVER_SHORTY_LOADING_XMMS, xmm_reg, finished)
1: // LOOP
movb (%r10), %al // al := *shorty
addq MACRO_LITERAL(1), %r10 // shorty++
cmpb MACRO_LITERAL(0), %al // if (al == '\0') goto xmm_setup_finished
je VAR(finished)
cmpb MACRO_LITERAL(68), %al // if (al == 'D') goto FOUND_DOUBLE
je 2f
cmpb MACRO_LITERAL(70), %al // if (al == 'F') goto FOUND_FLOAT
je 3f
addq MACRO_LITERAL(4), %r11 // arg_array++
// Handle extra space in arg array taken by a long.
cmpb MACRO_LITERAL(74), %al // if (al != 'J') goto LOOP
jne 1b
addq MACRO_LITERAL(4), %r11 // arg_array++
jmp 1b // goto LOOP
2: // FOUND_DOUBLE
movsd (%r11), REG_VAR(xmm_reg)
addq MACRO_LITERAL(8), %r11 // arg_array+=2
jmp 4f
3: // FOUND_FLOAT
movss (%r11), REG_VAR(xmm_reg)
addq MACRO_LITERAL(4), %r11 // arg_array++
4:
END_MACRO
/*
* Helper for quick invocation stub to set up GPR registers. Assumes r10 == shorty,
* r11 == arg_array. Clobbers r10, r11 and al. Branches to gpr_setup_finished if it encounters
* the end of the shorty.
*/
MACRO3(LOOP_OVER_SHORTY_LOADING_GPRS, gpr_reg64, gpr_reg32, finished)
1: // LOOP
movb (%r10), %al // al := *shorty
addq MACRO_LITERAL(1), %r10 // shorty++
cmpb MACRO_LITERAL(0), %al // if (al == '\0') goto gpr_setup_finished
je VAR(finished)
cmpb MACRO_LITERAL(74), %al // if (al == 'J') goto FOUND_LONG
je 2f
cmpb MACRO_LITERAL(70), %al // if (al == 'F') goto SKIP_FLOAT
je 3f
cmpb MACRO_LITERAL(68), %al // if (al == 'D') goto SKIP_DOUBLE
je 4f
movl (%r11), REG_VAR(gpr_reg32)
addq MACRO_LITERAL(4), %r11 // arg_array++
jmp 5f
2: // FOUND_LONG
movq (%r11), REG_VAR(gpr_reg64)
addq MACRO_LITERAL(8), %r11 // arg_array+=2
jmp 5f
3: // SKIP_FLOAT
addq MACRO_LITERAL(4), %r11 // arg_array++
jmp 1b
4: // SKIP_DOUBLE
addq MACRO_LITERAL(8), %r11 // arg_array+=2
jmp 1b
5:
END_MACRO
/*
* Quick invocation stub.
* On entry:
* [sp] = return address
* rdi = method pointer
* rsi = argument array that must at least contain the this pointer.
* rdx = size of argument array in bytes
* rcx = (managed) thread pointer
* r8 = JValue* result
* r9 = char* shorty
*/
DEFINE_FUNCTION art_quick_invoke_stub
#if defined(__APPLE__)
int3
int3
#else
// Set up argument XMM registers.
leaq 1(%r9), %r10 // R10 := shorty + 1 ; ie skip return arg character.
leaq 4(%rsi), %r11 // R11 := arg_array + 4 ; ie skip this pointer.
LOOP_OVER_SHORTY_LOADING_XMMS xmm0, .Lxmm_setup_finished
LOOP_OVER_SHORTY_LOADING_XMMS xmm1, .Lxmm_setup_finished
LOOP_OVER_SHORTY_LOADING_XMMS xmm2, .Lxmm_setup_finished
LOOP_OVER_SHORTY_LOADING_XMMS xmm3, .Lxmm_setup_finished
LOOP_OVER_SHORTY_LOADING_XMMS xmm4, .Lxmm_setup_finished
LOOP_OVER_SHORTY_LOADING_XMMS xmm5, .Lxmm_setup_finished
LOOP_OVER_SHORTY_LOADING_XMMS xmm6, .Lxmm_setup_finished
LOOP_OVER_SHORTY_LOADING_XMMS xmm7, .Lxmm_setup_finished
.balign 16
.Lxmm_setup_finished:
PUSH rbp // Save rbp.
PUSH r8 // Save r8/result*.
PUSH r9 // Save r9/shorty*.
PUSH rbx // Save native callee save rbx
PUSH r12 // Save native callee save r12
PUSH r13 // Save native callee save r13
PUSH r14 // Save native callee save r14
PUSH r15 // Save native callee save r15
movq %rsp, %rbp // Copy value of stack pointer into base pointer.
CFI_DEF_CFA_REGISTER(rbp)
movl %edx, %r10d
addl LITERAL(100), %edx // Reserve space for return addr, StackReference<method>, rbp,
// r8, r9, rbx, r12, r13, r14, and r15 in frame.
andl LITERAL(0xFFFFFFF0), %edx // Align frame size to 16 bytes.
subl LITERAL(72), %edx // Remove space for return address, rbp, r8, r9, rbx, r12,
// r13, r14, and r15
subq %rdx, %rsp // Reserve stack space for argument array.
#if (STACK_REFERENCE_SIZE != 4)
#error "STACK_REFERENCE_SIZE(X86_64) size not as expected."
#endif
movq LITERAL(0), (%rsp) // Store null for method*
movl %r10d, %ecx // Place size of args in rcx.
movq %rdi, %rax // rax := method to be called
movq %rsi, %r11 // r11 := arg_array
leaq 8(%rsp), %rdi // rdi is pointing just above the ArtMethod* in the stack
// arguments.
// Copy arg array into stack.
rep movsb // while (rcx--) { *rdi++ = *rsi++ }
leaq 1(%r9), %r10 // r10 := shorty + 1 ; ie skip return arg character
movq %rax, %rdi // rdi := method to be called
movl (%r11), %esi // rsi := this pointer
addq LITERAL(4), %r11 // arg_array++
LOOP_OVER_SHORTY_LOADING_GPRS rdx, edx, .Lgpr_setup_finished
LOOP_OVER_SHORTY_LOADING_GPRS rcx, ecx, .Lgpr_setup_finished
LOOP_OVER_SHORTY_LOADING_GPRS r8, r8d, .Lgpr_setup_finished
LOOP_OVER_SHORTY_LOADING_GPRS r9, r9d, .Lgpr_setup_finished
.Lgpr_setup_finished:
call *ART_METHOD_QUICK_CODE_OFFSET_64(%rdi) // Call the method.
movq %rbp, %rsp // Restore stack pointer.
POP r15 // Pop r15
POP r14 // Pop r14
POP r13 // Pop r13
POP r12 // Pop r12
POP rbx // Pop rbx
POP r9 // Pop r9 - shorty*
POP r8 // Pop r8 - result*.
POP rbp // Pop rbp
cmpb LITERAL(68), (%r9) // Test if result type char == 'D'.
je .Lreturn_double_quick
cmpb LITERAL(70), (%r9) // Test if result type char == 'F'.
je .Lreturn_float_quick
movq %rax, (%r8) // Store the result assuming its a long, int or Object*
ret
.Lreturn_double_quick:
movsd %xmm0, (%r8) // Store the double floating point result.
ret
.Lreturn_float_quick:
movss %xmm0, (%r8) // Store the floating point result.
ret
#endif // __APPLE__
END_FUNCTION art_quick_invoke_stub
/*
* Quick invocation stub.
* On entry:
* [sp] = return address
* rdi = method pointer
* rsi = argument array or null if no arguments.
* rdx = size of argument array in bytes
* rcx = (managed) thread pointer
* r8 = JValue* result
* r9 = char* shorty
*/
DEFINE_FUNCTION art_quick_invoke_static_stub
#if defined(__APPLE__)
int3
int3
#else
// Set up argument XMM registers.
leaq 1(%r9), %r10 // R10 := shorty + 1 ; ie skip return arg character
movq %rsi, %r11 // R11 := arg_array
LOOP_OVER_SHORTY_LOADING_XMMS xmm0, .Lxmm_setup_finished2
LOOP_OVER_SHORTY_LOADING_XMMS xmm1, .Lxmm_setup_finished2
LOOP_OVER_SHORTY_LOADING_XMMS xmm2, .Lxmm_setup_finished2
LOOP_OVER_SHORTY_LOADING_XMMS xmm3, .Lxmm_setup_finished2
LOOP_OVER_SHORTY_LOADING_XMMS xmm4, .Lxmm_setup_finished2
LOOP_OVER_SHORTY_LOADING_XMMS xmm5, .Lxmm_setup_finished2
LOOP_OVER_SHORTY_LOADING_XMMS xmm6, .Lxmm_setup_finished2
LOOP_OVER_SHORTY_LOADING_XMMS xmm7, .Lxmm_setup_finished2
.balign 16
.Lxmm_setup_finished2:
PUSH rbp // Save rbp.
PUSH r8 // Save r8/result*.
PUSH r9 // Save r9/shorty*.
PUSH rbx // Save rbx
PUSH r12 // Save r12
PUSH r13 // Save r13
PUSH r14 // Save r14
PUSH r15 // Save r15
movq %rsp, %rbp // Copy value of stack pointer into base pointer.
CFI_DEF_CFA_REGISTER(rbp)
movl %edx, %r10d
addl LITERAL(100), %edx // Reserve space for return addr, StackReference<method>, rbp,
// r8, r9, r12, r13, r14, and r15 in frame.
andl LITERAL(0xFFFFFFF0), %edx // Align frame size to 16 bytes.
subl LITERAL(72), %edx // Remove space for return address, rbp, r8, r9, rbx, r12,
// r13, r14, and r15.
subq %rdx, %rsp // Reserve stack space for argument array.
#if (STACK_REFERENCE_SIZE != 4)
#error "STACK_REFERENCE_SIZE(X86_64) size not as expected."
#endif
movq LITERAL(0), (%rsp) // Store null for method*
movl %r10d, %ecx // Place size of args in rcx.
movq %rdi, %rax // rax := method to be called
movq %rsi, %r11 // r11 := arg_array
leaq 8(%rsp), %rdi // rdi is pointing just above the ArtMethod* in the
// stack arguments.
// Copy arg array into stack.
rep movsb // while (rcx--) { *rdi++ = *rsi++ }
leaq 1(%r9), %r10 // r10 := shorty + 1 ; ie skip return arg character
movq %rax, %rdi // rdi := method to be called
LOOP_OVER_SHORTY_LOADING_GPRS rsi, esi, .Lgpr_setup_finished2
LOOP_OVER_SHORTY_LOADING_GPRS rdx, edx, .Lgpr_setup_finished2
LOOP_OVER_SHORTY_LOADING_GPRS rcx, ecx, .Lgpr_setup_finished2
LOOP_OVER_SHORTY_LOADING_GPRS r8, r8d, .Lgpr_setup_finished2
LOOP_OVER_SHORTY_LOADING_GPRS r9, r9d, .Lgpr_setup_finished2
.Lgpr_setup_finished2:
call *ART_METHOD_QUICK_CODE_OFFSET_64(%rdi) // Call the method.
movq %rbp, %rsp // Restore stack pointer.
POP r15 // Pop r15
POP r14 // Pop r14
POP r13 // Pop r13
POP r12 // Pop r12
POP rbx // Pop rbx
POP r9 // Pop r9 - shorty*.
POP r8 // Pop r8 - result*.
POP rbp // Pop rbp
cmpb LITERAL(68), (%r9) // Test if result type char == 'D'.
je .Lreturn_double_quick2
cmpb LITERAL(70), (%r9) // Test if result type char == 'F'.
je .Lreturn_float_quick2
movq %rax, (%r8) // Store the result assuming its a long, int or Object*
ret
.Lreturn_double_quick2:
movsd %xmm0, (%r8) // Store the double floating point result.
ret
.Lreturn_float_quick2:
movss %xmm0, (%r8) // Store the floating point result.
ret
#endif // __APPLE__
END_FUNCTION art_quick_invoke_static_stub
/*
* Long jump stub.
* On entry:
* rdi = gprs
* rsi = fprs
*/
DEFINE_FUNCTION art_quick_do_long_jump
#if defined(__APPLE__)
int3
int3
#else
// Restore FPRs.
movq 0(%rsi), %xmm0
movq 8(%rsi), %xmm1
movq 16(%rsi), %xmm2
movq 24(%rsi), %xmm3
movq 32(%rsi), %xmm4
movq 40(%rsi), %xmm5
movq 48(%rsi), %xmm6
movq 56(%rsi), %xmm7
movq 64(%rsi), %xmm8
movq 72(%rsi), %xmm9
movq 80(%rsi), %xmm10
movq 88(%rsi), %xmm11
movq 96(%rsi), %xmm12
movq 104(%rsi), %xmm13
movq 112(%rsi), %xmm14
movq 120(%rsi), %xmm15
// Restore FPRs.
movq %rdi, %rsp // RSP points to gprs.
// Load all registers except RSP and RIP with values in gprs.
popq %r15
popq %r14
popq %r13
popq %r12
popq %r11
popq %r10
popq %r9
popq %r8
popq %rdi
popq %rsi
popq %rbp
addq LITERAL(8), %rsp // Skip rsp
popq %rbx
popq %rdx
popq %rcx
popq %rax
popq %rsp // Load stack pointer.
ret // From higher in the stack pop rip.
#endif // __APPLE__
END_FUNCTION art_quick_do_long_jump
MACRO3(ONE_ARG_DOWNCALL, c_name, cxx_name, return_macro)
DEFINE_FUNCTION VAR(c_name)
SETUP_SAVE_REFS_ONLY_FRAME // save ref containing registers for GC
// Outgoing argument set up
movq %gs:THREAD_SELF_OFFSET, %rsi // pass Thread::Current()
call CALLVAR(cxx_name) // cxx_name(arg0, Thread*)
RESTORE_SAVE_REFS_ONLY_FRAME // restore frame up to return address
CALL_MACRO(return_macro) // return or deliver exception
END_FUNCTION VAR(c_name)
END_MACRO
MACRO3(TWO_ARG_DOWNCALL, c_name, cxx_name, return_macro)
DEFINE_FUNCTION VAR(c_name)
SETUP_SAVE_REFS_ONLY_FRAME // save ref containing registers for GC
// Outgoing argument set up
movq %gs:THREAD_SELF_OFFSET, %rdx // pass Thread::Current()
call CALLVAR(cxx_name) // cxx_name(arg0, arg1, Thread*)
RESTORE_SAVE_REFS_ONLY_FRAME // restore frame up to return address
CALL_MACRO(return_macro) // return or deliver exception
END_FUNCTION VAR(c_name)
END_MACRO
MACRO3(THREE_ARG_DOWNCALL, c_name, cxx_name, return_macro)
DEFINE_FUNCTION VAR(c_name)
SETUP_SAVE_REFS_ONLY_FRAME // save ref containing registers for GC
// Outgoing argument set up
movq %gs:THREAD_SELF_OFFSET, %rcx // pass Thread::Current()
call CALLVAR(cxx_name) // cxx_name(arg0, arg1, arg2, Thread*)
RESTORE_SAVE_REFS_ONLY_FRAME // restore frame up to return address
CALL_MACRO(return_macro) // return or deliver exception
END_FUNCTION VAR(c_name)
END_MACRO
MACRO3(FOUR_ARG_DOWNCALL, c_name, cxx_name, return_macro)
DEFINE_FUNCTION VAR(c_name)
SETUP_SAVE_REFS_ONLY_FRAME // save ref containing registers for GC
// Outgoing argument set up
movq %gs:THREAD_SELF_OFFSET, %r8 // pass Thread::Current()
call CALLVAR(cxx_name) // cxx_name(arg1, arg2, arg3, arg4, Thread*)
RESTORE_SAVE_REFS_ONLY_FRAME // restore frame up to return address
CALL_MACRO(return_macro) // return or deliver exception
END_FUNCTION VAR(c_name)
END_MACRO
/*
* Macro for resolution and initialization of indexed DEX file
* constants such as classes and strings.
*/
MACRO3(ONE_ARG_SAVE_EVERYTHING_DOWNCALL, c_name, cxx_name, runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET)
DEFINE_FUNCTION VAR(c_name)
SETUP_SAVE_EVERYTHING_FRAME \runtime_method_offset // save everything for GC
// Outgoing argument set up
movl %eax, %edi // pass the index of the constant as arg0
movq %gs:THREAD_SELF_OFFSET, %rsi // pass Thread::Current()
call CALLVAR(cxx_name) // cxx_name(arg0, Thread*)
testl %eax, %eax // If result is null, deliver pending exception.
jz 1f
DEOPT_OR_RESTORE_SAVE_EVERYTHING_FRAME_AND_RETURN_RAX /*is_ref=*/1
1:
DELIVER_PENDING_EXCEPTION_FRAME_READY
END_FUNCTION VAR(c_name)
END_MACRO
MACRO2(ONE_ARG_SAVE_EVERYTHING_DOWNCALL_FOR_CLINIT, c_name, cxx_name)
ONE_ARG_SAVE_EVERYTHING_DOWNCALL \c_name, \cxx_name, RUNTIME_SAVE_EVERYTHING_FOR_CLINIT_METHOD_OFFSET
END_MACRO
MACRO0(RETURN_OR_DEOPT_IF_RESULT_IS_NON_NULL_OR_DELIVER)
testq %rax, %rax // rax == 0 ?
jz 1f // if rax == 0 goto 1
DEOPT_OR_RETURN /*is_ref=*/1 // Check if deopt is required
1: // deliver exception on current thread
DELIVER_PENDING_EXCEPTION
END_MACRO
MACRO1(RETURN_OR_DEOPT_OR_DELIVER_PENDING_EXCEPTION, is_ref = 0)
movq %gs:THREAD_EXCEPTION_OFFSET, %rcx // get exception field
testq %rcx, %rcx // rcx == 0 ?
jnz 1f // if rcx != 0 goto 1
DEOPT_OR_RETURN \is_ref // Check if deopt is required
1: // deliver exception on current thread
DELIVER_PENDING_EXCEPTION
END_MACRO
MACRO0(RETURN_REF_OR_DEOPT_OR_DELIVER_PENDING_EXCEPTION)
RETURN_OR_DEOPT_OR_DELIVER_PENDING_EXCEPTION /*is_ref=*/1
END_MACRO
MACRO1(DEOPT_OR_RETURN, is_ref = 0)
cmpl LITERAL(0), %gs:THREAD_DEOPT_CHECK_REQUIRED_OFFSET
jne 2f
ret
2:
SETUP_SAVE_EVERYTHING_FRAME
movq LITERAL(\is_ref), %rdx // pass if result is a reference
movq %rax, %rsi // pass the result
movq %gs:THREAD_SELF_OFFSET, %rdi // pass Thread::Current
call SYMBOL(artDeoptimizeIfNeeded)
RESTORE_SAVE_EVERYTHING_FRAME
ret
END_MACRO
MACRO1(DEOPT_OR_RESTORE_SAVE_EVERYTHING_FRAME_AND_RETURN_RAX, is_ref = 0)
cmpl LITERAL(0), %gs:THREAD_DEOPT_CHECK_REQUIRED_OFFSET
CFI_REMEMBER_STATE
jne 2f
RESTORE_SAVE_EVERYTHING_FRAME_KEEP_RAX
ret
2:
CFI_RESTORE_STATE_AND_DEF_CFA rsp, FRAME_SIZE_SAVE_EVERYTHING
movq %rax, SAVE_EVERYTHING_FRAME_RAX_OFFSET(%rsp) // update result in the frame
movq LITERAL(\is_ref), %rdx // pass if result is a reference
movq %rax, %rsi // pass the result
movq %gs:THREAD_SELF_OFFSET, %rdi // pass Thread::Current
call SYMBOL(artDeoptimizeIfNeeded)
CFI_REMEMBER_STATE
RESTORE_SAVE_EVERYTHING_FRAME
ret
CFI_RESTORE_STATE_AND_DEF_CFA rsp, FRAME_SIZE_SAVE_EVERYTHING
END_MACRO
MACRO0(RETURN_OR_DEOPT_IF_INT_RESULT_IS_ZERO_OR_DELIVER)
testl %eax, %eax // eax == 0 ?
jnz 1f // if eax != 0 goto 1
DEOPT_OR_RETURN // Check if we need a deopt
1: // deliver exception on current thread
DELIVER_PENDING_EXCEPTION
END_MACRO
// Generate the allocation entrypoints for each allocator.
GENERATE_ALLOC_ENTRYPOINTS_FOR_NON_TLAB_ALLOCATORS
// Comment out allocators that have x86_64 specific asm.
// Region TLAB:
// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB)
// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab, RegionTLAB)
GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_OBJECT(_region_tlab, RegionTLAB)
// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_region_tlab, RegionTLAB)
// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED8(_region_tlab, RegionTLAB)
// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED16(_region_tlab, RegionTLAB)
// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED32(_region_tlab, RegionTLAB)
// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED64(_region_tlab, RegionTLAB)
GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region_tlab, RegionTLAB)
GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_tlab, RegionTLAB)
GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_tlab, RegionTLAB)
// Normal TLAB:
// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB)
// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab, TLAB)
GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_tlab, TLAB)
GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_OBJECT(_tlab, TLAB)
// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_tlab, TLAB)
// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED8(_tlab, TLAB)
// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED16(_tlab, TLAB)
// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED32(_tlab, TLAB)
// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED64(_tlab, TLAB)
GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_tlab, TLAB)
GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_tlab, TLAB)
GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_tlab, TLAB)
// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_rosalloc, RosAlloc).
MACRO2(ART_QUICK_ALLOC_OBJECT_ROSALLOC, c_name, cxx_name)
DEFINE_FUNCTION VAR(c_name)
// Fast path rosalloc allocation.
// RDI: mirror::Class*, RAX: return value
// RSI, RDX, RCX, R8, R9: free.
// Check if the thread local
// allocation stack has room.
movq %gs:THREAD_SELF_OFFSET, %r8 // r8 = thread
movq THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET(%r8), %rcx // rcx = alloc stack top.
cmpq THREAD_LOCAL_ALLOC_STACK_END_OFFSET(%r8), %rcx
jae .Lslow_path\c_name
// Load the object size
movl MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET(%rdi), %eax
// Check if the size is for a thread
// local allocation. Also does the
// initialized and finalizable checks.
cmpl LITERAL(ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE), %eax
ja .Lslow_path\c_name
// Compute the rosalloc bracket index
// from the size.
shrq LITERAL(ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT), %rax
// Load the rosalloc run (r9)
// Subtract __SIZEOF_POINTER__ to
// subtract one from edi as there is no
// 0 byte run and the size is already
// aligned.
movq (THREAD_ROSALLOC_RUNS_OFFSET - __SIZEOF_POINTER__)(%r8, %rax, __SIZEOF_POINTER__), %r9
// Load the free list head (rax). This
// will be the return val.
movq (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)(%r9), %rax
testq %rax, %rax
jz .Lslow_path\c_name
// "Point of no slow path". Won't go to the slow path from here on. OK to clobber rdi and rsi.
// Push the new object onto the thread
// local allocation stack and
// increment the thread local
// allocation stack top.
movl %eax, (%rcx)
addq LITERAL(COMPRESSED_REFERENCE_SIZE), %rcx
movq %rcx, THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET(%r8)
// Load the next pointer of the head
// and update the list head with the
// next pointer.
movq ROSALLOC_SLOT_NEXT_OFFSET(%rax), %rcx
movq %rcx, (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)(%r9)
// Store the class pointer in the
// header. This also overwrites the
// next pointer. The offsets are
// asserted to match.
#if ROSALLOC_SLOT_NEXT_OFFSET != MIRROR_OBJECT_CLASS_OFFSET
#error "Class pointer needs to overwrite next pointer."
#endif
POISON_HEAP_REF edi
movl %edi, MIRROR_OBJECT_CLASS_OFFSET(%rax)
// Decrement the size of the free list
decl (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)(%r9)
// No fence necessary for x86.
ret
.Lslow_path\c_name:
SETUP_SAVE_REFS_ONLY_FRAME // save ref containing registers for GC
// Outgoing argument set up
movq %gs:THREAD_SELF_OFFSET, %rsi // pass Thread::Current()
call CALLVAR(cxx_name) // cxx_name(arg0, Thread*)
RESTORE_SAVE_REFS_ONLY_FRAME // restore frame up to return address
RETURN_OR_DEOPT_IF_RESULT_IS_NON_NULL_OR_DELIVER // return or deliver exception
END_FUNCTION VAR(c_name)
END_MACRO
ART_QUICK_ALLOC_OBJECT_ROSALLOC art_quick_alloc_object_resolved_rosalloc, artAllocObjectFromCodeResolvedRosAlloc
ART_QUICK_ALLOC_OBJECT_ROSALLOC art_quick_alloc_object_initialized_rosalloc, artAllocObjectFromCodeInitializedRosAlloc
// The common fast path code for art_quick_alloc_object_resolved_region_tlab.
// TODO: delete ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH since it is the same as
// ALLOC_OBJECT_INITIALIZED_TLAB_FAST_PATH.
//
// RDI: the class, RAX: return value.
// RCX, RSI, RDX: scratch, r8: Thread::Current().
MACRO1(ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH, slowPathLabel)
ALLOC_OBJECT_INITIALIZED_TLAB_FAST_PATH(RAW_VAR(slowPathLabel))
END_MACRO
// The fast path code for art_quick_alloc_object_initialized_region_tlab.
//
// RDI: the class, RSI: ArtMethod*, RAX: return value.
// RCX, RSI, RDX: scratch, r8: Thread::Current().
MACRO1(ALLOC_OBJECT_INITIALIZED_TLAB_FAST_PATH, slowPathLabel)
movq %gs:THREAD_SELF_OFFSET, %r8 // r8 = thread
movl MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET(%rdi), %ecx // Load the object size.
movq THREAD_LOCAL_POS_OFFSET(%r8), %rax
addq %rax, %rcx // Add size to pos, note that these
// are both 32 bit ints, overflow
// will cause the add to be past the
// end of the thread local region.
cmpq THREAD_LOCAL_END_OFFSET(%r8), %rcx // Check if it fits.
ja RAW_VAR(slowPathLabel)
movq %rcx, THREAD_LOCAL_POS_OFFSET(%r8) // Update thread_local_pos.
// Store the class pointer in the
// header.
// No fence needed for x86.
POISON_HEAP_REF edi
movl %edi, MIRROR_OBJECT_CLASS_OFFSET(%rax)
ret // Fast path succeeded.
END_MACRO
// The fast path code for art_quick_alloc_array_region_tlab.
// Inputs: RDI: the class, RSI: int32_t component_count, R9: total_size
// Free temps: RCX, RDX, R8
// Output: RAX: return value.
MACRO1(ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED_WITH_SIZE, slowPathLabel)
movq %gs:THREAD_SELF_OFFSET, %rcx // rcx = thread
// Mask out the unaligned part to make sure we are 8 byte aligned.
andq LITERAL(OBJECT_ALIGNMENT_MASK_TOGGLED64), %r9
cmpq LITERAL(MIN_LARGE_OBJECT_THRESHOLD), %r9 // Possibly a large object.
jae RAW_VAR(slowPathLabel) // Go to slow path if large object
movq THREAD_LOCAL_POS_OFFSET(%rcx), %rax
addq %rax, %r9
cmpq THREAD_LOCAL_END_OFFSET(%rcx), %r9 // Check if it fits.
ja RAW_VAR(slowPathLabel)
movq %r9, THREAD_LOCAL_POS_OFFSET(%rcx) // Update thread_local_pos.
// Store the class pointer in the
// header.
// No fence needed for x86.
POISON_HEAP_REF edi
movl %edi, MIRROR_OBJECT_CLASS_OFFSET(%rax)
movl %esi, MIRROR_ARRAY_LENGTH_OFFSET(%rax)
ret // Fast path succeeded.
END_MACRO
// The common slow path code for art_quick_alloc_object_{resolved, initialized}_tlab
// and art_quick_alloc_object_{resolved, initialized}_region_tlab.
MACRO1(ALLOC_OBJECT_TLAB_SLOW_PATH, cxx_name)
SETUP_SAVE_REFS_ONLY_FRAME // save ref containing registers for GC
// Outgoing argument set up
movq %gs:THREAD_SELF_OFFSET, %rsi // pass Thread::Current()
call CALLVAR(cxx_name) // cxx_name(arg0, Thread*)
RESTORE_SAVE_REFS_ONLY_FRAME // restore frame up to return address
RETURN_OR_DEOPT_IF_RESULT_IS_NON_NULL_OR_DELIVER // return or deliver exception
END_MACRO
// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB). May be
// called with CC if the GC is not active.
DEFINE_FUNCTION art_quick_alloc_object_resolved_tlab
// RDI: mirror::Class* klass
// RDX, RSI, RCX, R8, R9: free. RAX: return val.
ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH .Lart_quick_alloc_object_resolved_tlab_slow_path
.Lart_quick_alloc_object_resolved_tlab_slow_path:
ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeResolvedTLAB
END_FUNCTION art_quick_alloc_object_resolved_tlab
// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab, TLAB).
// May be called with CC if the GC is not active.
DEFINE_FUNCTION art_quick_alloc_object_initialized_tlab
// RDI: mirror::Class* klass
// RDX, RSI, RCX, R8, R9: free. RAX: return val.
ALLOC_OBJECT_INITIALIZED_TLAB_FAST_PATH .Lart_quick_alloc_object_initialized_tlab_slow_path
.Lart_quick_alloc_object_initialized_tlab_slow_path:
ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeInitializedTLAB
END_FUNCTION art_quick_alloc_object_initialized_tlab
MACRO0(COMPUTE_ARRAY_SIZE_UNKNOWN)
movl MIRROR_CLASS_COMPONENT_TYPE_OFFSET(%rdi), %ecx // Load component type.
UNPOISON_HEAP_REF ecx
movl MIRROR_CLASS_OBJECT_PRIMITIVE_TYPE_OFFSET(%rcx), %ecx // Load primitive type.
shrq MACRO_LITERAL(PRIMITIVE_TYPE_SIZE_SHIFT_SHIFT), %rcx // Get component size shift.
movq %rsi, %r9
salq %cl, %r9 // Calculate array count shifted.
// Add array header + alignment rounding.
addq MACRO_LITERAL(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK), %r9
// Add 4 extra bytes if we are doing a long array.
addq MACRO_LITERAL(1), %rcx
andq MACRO_LITERAL(4), %rcx
#if MIRROR_LONG_ARRAY_DATA_OFFSET != MIRROR_INT_ARRAY_DATA_OFFSET + 4
#error Long array data offset must be 4 greater than int array data offset.
#endif
addq %rcx, %r9
END_MACRO
MACRO0(COMPUTE_ARRAY_SIZE_8)
// RDI: mirror::Class* klass, RSI: int32_t component_count
// RDX, RCX, R8, R9: free. RAX: return val.
movq %rsi, %r9
// Add array header + alignment rounding.
addq MACRO_LITERAL(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK), %r9
END_MACRO
MACRO0(COMPUTE_ARRAY_SIZE_16)
// RDI: mirror::Class* klass, RSI: int32_t component_count
// RDX, RCX, R8, R9: free. RAX: return val.
movq %rsi, %r9
salq MACRO_LITERAL(1), %r9
// Add array header + alignment rounding.
addq MACRO_LITERAL(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK), %r9
END_MACRO
MACRO0(COMPUTE_ARRAY_SIZE_32)
// RDI: mirror::Class* klass, RSI: int32_t component_count
// RDX, RCX, R8, R9: free. RAX: return val.
movq %rsi, %r9
salq MACRO_LITERAL(2), %r9
// Add array header + alignment rounding.
addq MACRO_LITERAL(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK), %r9
END_MACRO
MACRO0(COMPUTE_ARRAY_SIZE_64)
// RDI: mirror::Class* klass, RSI: int32_t component_count
// RDX, RCX, R8, R9: free. RAX: return val.
movq %rsi, %r9
salq MACRO_LITERAL(3), %r9
// Add array header + alignment rounding.
addq MACRO_LITERAL(MIRROR_WIDE_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK), %r9
END_MACRO
MACRO3(GENERATE_ALLOC_ARRAY_TLAB, c_entrypoint, cxx_name, size_setup)
DEFINE_FUNCTION VAR(c_entrypoint)
// RDI: mirror::Class* klass, RSI: int32_t component_count
// RDX, RCX, R8, R9: free. RAX: return val.
CALL_MACRO(size_setup)
ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED_WITH_SIZE .Lslow_path\c_entrypoint
.Lslow_path\c_entrypoint:
SETUP_SAVE_REFS_ONLY_FRAME // save ref containing registers for GC
// Outgoing argument set up
movq %gs:THREAD_SELF_OFFSET, %rdx // pass Thread::Current()
call CALLVAR(cxx_name) // cxx_name(arg0, arg1, Thread*)
RESTORE_SAVE_REFS_ONLY_FRAME // restore frame up to return address
RETURN_OR_DEOPT_IF_RESULT_IS_NON_NULL_OR_DELIVER // return or deliver exception
END_FUNCTION VAR(c_entrypoint)
END_MACRO
GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_UNKNOWN
GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved8_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_8
GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved16_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_16
GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved32_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_32
GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved64_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_64
GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_UNKNOWN
GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved8_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_8
GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved16_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_16
GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved32_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_32
GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved64_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_64
// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB).
DEFINE_FUNCTION art_quick_alloc_object_resolved_region_tlab
// Fast path region tlab allocation.
// RDI: mirror::Class* klass
// RDX, RSI, RCX, R8, R9: free. RAX: return val.
ASSERT_USE_READ_BARRIER
ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH .Lart_quick_alloc_object_resolved_region_tlab_slow_path
.Lart_quick_alloc_object_resolved_region_tlab_slow_path:
ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeResolvedRegionTLAB
END_FUNCTION art_quick_alloc_object_resolved_region_tlab
// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab, RegionTLAB).
DEFINE_FUNCTION art_quick_alloc_object_initialized_region_tlab
// Fast path region tlab allocation.
// RDI: mirror::Class* klass
// RDX, RSI, RCX, R8, R9: free. RAX: return val.
ASSERT_USE_READ_BARRIER
// No read barrier since the caller is responsible for that.
ALLOC_OBJECT_INITIALIZED_TLAB_FAST_PATH .Lart_quick_alloc_object_initialized_region_tlab_slow_path
.Lart_quick_alloc_object_initialized_region_tlab_slow_path:
ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeInitializedRegionTLAB
END_FUNCTION art_quick_alloc_object_initialized_region_tlab
ONE_ARG_SAVE_EVERYTHING_DOWNCALL_FOR_CLINIT art_quick_initialize_static_storage, artInitializeStaticStorageFromCode
ONE_ARG_SAVE_EVERYTHING_DOWNCALL_FOR_CLINIT art_quick_resolve_type, artResolveTypeFromCode
ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_type_and_verify_access, artResolveTypeAndVerifyAccessFromCode
ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_method_handle, artResolveMethodHandleFromCode
ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_method_type, artResolveMethodTypeFromCode
ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_string, artResolveStringFromCode
TWO_ARG_DOWNCALL art_quick_handle_fill_data, \
artHandleFillArrayDataFromCode, \
RETURN_OR_DEOPT_IF_INT_RESULT_IS_ZERO_OR_DELIVER
/*
* Entry from managed code that tries to lock the object in a fast path and
* calls `artLockObjectFromCode()` for the difficult cases, may block for GC.
* RDI holds the possibly null object to lock.
*/
DEFINE_FUNCTION art_quick_lock_object
testq %rdi, %rdi // Null check object.
jz art_quick_lock_object_no_inline
LOCK_OBJECT_FAST_PATH rdi, ecx, art_quick_lock_object_no_inline
END_FUNCTION art_quick_lock_object
/*
* Entry from managed code that calls `artLockObjectFromCode()`, may block for GC.
* RDI holds the possibly null object to lock.
*/
DEFINE_FUNCTION art_quick_lock_object_no_inline
// This is also the slow path for art_quick_lock_object.
SETUP_SAVE_REFS_ONLY_FRAME
movq %gs:THREAD_SELF_OFFSET, %rsi // pass Thread::Current()
call SYMBOL(artLockObjectFromCode) // artLockObjectFromCode(object, Thread*)
RESTORE_SAVE_REFS_ONLY_FRAME // restore frame up to return address
RETURN_OR_DEOPT_IF_INT_RESULT_IS_ZERO_OR_DELIVER
END_FUNCTION art_quick_lock_object_no_inline
/*
* Entry from managed code that tries to unlock the object in a fast path and calls
* `artUnlockObjectFromCode()` for the difficult cases and delivers exception on failure.
* RDI holds the possibly null object to unlock.
*/
DEFINE_FUNCTION art_quick_unlock_object
testq %rdi, %rdi // Null check object.
jz art_quick_lock_object_no_inline
UNLOCK_OBJECT_FAST_PATH rdi, ecx, /*saved_rax*/ none, art_quick_unlock_object_no_inline
END_FUNCTION art_quick_unlock_object
/*
* Entry from managed code that calls `artUnlockObjectFromCode()`
* and delivers exception on failure.
* RDI holds the possibly null object to unlock.
*/
DEFINE_FUNCTION art_quick_unlock_object_no_inline
// This is also the slow path for art_quick_unlock_object.
SETUP_SAVE_REFS_ONLY_FRAME
movq %gs:THREAD_SELF_OFFSET, %rsi // pass Thread::Current()
call SYMBOL(artUnlockObjectFromCode) // artUnlockObjectFromCode(object, Thread*)
RESTORE_SAVE_REFS_ONLY_FRAME // restore frame up to return address
RETURN_OR_DEOPT_IF_INT_RESULT_IS_ZERO_OR_DELIVER
END_FUNCTION art_quick_unlock_object_no_inline
DEFINE_FUNCTION art_quick_check_instance_of
// Type check using the bit string passes null as the target class. In that case just throw.
testl %esi, %esi
jz .Lthrow_class_cast_exception_for_bitstring_check
// We could check the super classes here but that is usually already checked in the caller.
PUSH rdi // Save args for exc
PUSH rsi
subq LITERAL(8), %rsp // Alignment padding.
CFI_ADJUST_CFA_OFFSET(8)
SETUP_FP_CALLEE_SAVE_FRAME
call SYMBOL(artInstanceOfFromCode) // (Object* obj, Class* ref_klass)
testq %rax, %rax
CFI_REMEMBER_STATE
jz .Lthrow_class_cast_exception // jump forward if not assignable
RESTORE_FP_CALLEE_SAVE_FRAME
addq LITERAL(24), %rsp // pop arguments
CFI_ADJUST_CFA_OFFSET(-24)
ret
.Lthrow_class_cast_exception:
CFI_RESTORE_STATE_AND_DEF_CFA rsp, 64 // Reset unwind info so following code unwinds.
RESTORE_FP_CALLEE_SAVE_FRAME
addq LITERAL(8), %rsp // pop padding
CFI_ADJUST_CFA_OFFSET(-8)
POP rsi // Pop arguments
POP rdi
.Lthrow_class_cast_exception_for_bitstring_check:
SETUP_SAVE_ALL_CALLEE_SAVES_FRAME // save all registers as basis for long jump context
mov %gs:THREAD_SELF_OFFSET, %rdx // pass Thread::Current()
call SYMBOL(artThrowClassCastExceptionForObject) // (Object* src, Class* dest, Thread*)
UNREACHABLE
END_FUNCTION art_quick_check_instance_of
// Restore reg's value if reg is not the same as exclude_reg, otherwise just adjust stack.
MACRO2(POP_REG_NE, reg, exclude_reg)
.ifc RAW_VAR(reg), RAW_VAR(exclude_reg)
DECREASE_FRAME 8
.else
POP RAW_VAR(reg)
.endif
END_MACRO
DEFINE_FUNCTION art_quick_aput_obj
test %edx, %edx // store of null
jz .Laput_obj_null
movl MIRROR_OBJECT_CLASS_OFFSET(%rdi), %ecx
UNPOISON_HEAP_REF ecx
#ifdef USE_READ_BARRIER
cmpl LITERAL(0), %gs:THREAD_IS_GC_MARKING_OFFSET
CFI_REMEMBER_STATE
jnz .Laput_obj_gc_marking
#endif // USE_READ_BARRIER
movl MIRROR_CLASS_COMPONENT_TYPE_OFFSET(%rcx), %ecx
cmpl MIRROR_OBJECT_CLASS_OFFSET(%rdx), %ecx // Both poisoned if heap poisoning is enabled.
jne .Laput_obj_check_assignability
.Laput_obj_store:
POISON_HEAP_REF edx
movl %edx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%rdi, %rsi, 4)
movq %gs:THREAD_CARD_TABLE_OFFSET, %rdx
shrl LITERAL(CARD_TABLE_CARD_SHIFT), %edi
movb %dl, (%rdx, %rdi)
ret
.Laput_obj_null:
movl %edx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%rdi, %rsi, 4)
ret
.Laput_obj_check_assignability:
UNPOISON_HEAP_REF ecx // Unpoison array component type if poisoning is enabled.
PUSH_ARG rdi // Save arguments.
PUSH_ARG rsi
PUSH_ARG rdx
movl MIRROR_OBJECT_CLASS_OFFSET(%rdx), %esi // Pass arg2 = value's class.
UNPOISON_HEAP_REF esi
.Laput_obj_check_assignability_call:
movl %ecx, %edi // Pass arg1 = array's component type.
SETUP_FP_CALLEE_SAVE_FRAME
call SYMBOL(artIsAssignableFromCode) // (Class* a, Class* b)
RESTORE_FP_CALLEE_SAVE_FRAME // Resore FP registers.
POP_ARG rdx // Restore arguments.
POP_ARG rsi
POP_ARG rdi
testq %rax, %rax // Check for exception.
jz .Laput_obj_throw_array_store_exception
POISON_HEAP_REF edx
movl %edx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%rdi, %rsi, 4)
movq %gs:THREAD_CARD_TABLE_OFFSET, %rdx
shrl LITERAL(CARD_TABLE_CARD_SHIFT), %edi
movb %dl, (%rdx, %rdi)
ret
.Laput_obj_throw_array_store_exception:
SETUP_SAVE_ALL_CALLEE_SAVES_FRAME // Save all registers as basis for long jump context.
// Outgoing argument set up.
movq %rdx, %rsi // Pass arg 2 = value.
movq %gs:THREAD_SELF_OFFSET, %rdx // Pass arg 3 = Thread::Current().
// Pass arg 1 = array.
call SYMBOL(artThrowArrayStoreException) // (array, value, Thread*)
UNREACHABLE
#ifdef USE_READ_BARRIER
.Laput_obj_gc_marking:
CFI_RESTORE_STATE_AND_DEF_CFA rsp, 8
// We need to align stack for `art_quick_read_barrier_mark_regNN`.
INCREASE_FRAME 8 // Stack alignment.
call SYMBOL(art_quick_read_barrier_mark_reg01) // Mark ECX
movl MIRROR_CLASS_COMPONENT_TYPE_OFFSET(%rcx), %ecx
UNPOISON_HEAP_REF ecx
call SYMBOL(art_quick_read_barrier_mark_reg01) // Mark ECX
movl MIRROR_OBJECT_CLASS_OFFSET(%rdx), %eax
UNPOISON_HEAP_REF eax
call SYMBOL(art_quick_read_barrier_mark_reg00) // Mark EAX
DECREASE_FRAME 8 // Remove stack alignment.
cmpl %eax, %ecx
je .Laput_obj_store
// Prepare arguments in line with `.Laput_obj_check_assignability_call` and jump there.
PUSH_ARG rdi // Save arguments.
PUSH_ARG rsi
PUSH_ARG rdx
movl %eax, %esi // Pass arg2 - type of the value to be stored.
// The arg1 shall be moved at `.Ldo_assignability_check_call`.
jmp .Laput_obj_check_assignability_call
#endif // USE_READ_BARRIER
END_FUNCTION art_quick_aput_obj
// TODO: This is quite silly on X86_64 now.
DEFINE_FUNCTION art_quick_memcpy
call PLT_SYMBOL(memcpy) // (void*, const void*, size_t)
ret
END_FUNCTION art_quick_memcpy
DEFINE_FUNCTION art_quick_test_suspend
SETUP_SAVE_EVERYTHING_FRAME RUNTIME_SAVE_EVERYTHING_FOR_SUSPEND_CHECK_METHOD_OFFSET // save everything for GC
// Outgoing argument set up
movq %gs:THREAD_SELF_OFFSET, %rdi // pass Thread::Current()
call SYMBOL(artTestSuspendFromCode) // (Thread*)
RESTORE_SAVE_EVERYTHING_FRAME // restore frame up to return address
ret
END_FUNCTION art_quick_test_suspend
UNIMPLEMENTED art_quick_ldiv
UNIMPLEMENTED art_quick_lmod
UNIMPLEMENTED art_quick_lmul
UNIMPLEMENTED art_quick_lshl
UNIMPLEMENTED art_quick_lshr
UNIMPLEMENTED art_quick_lushr
GENERATE_FIELD_ENTRYPOINTS
DEFINE_FUNCTION art_quick_proxy_invoke_handler
SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_RDI
movq %gs:THREAD_SELF_OFFSET, %rdx // Pass Thread::Current().
movq %rsp, %rcx // Pass SP.
call SYMBOL(artQuickProxyInvokeHandler) // (proxy method, receiver, Thread*, SP)
RESTORE_SAVE_REFS_AND_ARGS_FRAME
movq %rax, %xmm0 // Copy return value in case of float returns.
RETURN_OR_DELIVER_PENDING_EXCEPTION
END_FUNCTION art_quick_proxy_invoke_handler
/*
* Called to resolve an imt conflict.
* rdi is the conflict ArtMethod.
* rax is a hidden argument that holds the target interface method.
*
* Note that this stub writes to rdi.
*/
DEFINE_FUNCTION art_quick_imt_conflict_trampoline
#if defined(__APPLE__)
int3
int3
#else
movq ART_METHOD_JNI_OFFSET_64(%rdi), %rdi // Load ImtConflictTable
.Limt_table_iterate:
cmpq %rax, 0(%rdi)
jne .Limt_table_next_entry
// We successfully hit an entry in the table. Load the target method
// and jump to it.
movq __SIZEOF_POINTER__(%rdi), %rdi
jmp *ART_METHOD_QUICK_CODE_OFFSET_64(%rdi)
.Limt_table_next_entry:
// If the entry is null, the interface method is not in the ImtConflictTable.
cmpq LITERAL(0), 0(%rdi)
jz .Lconflict_trampoline
// Iterate over the entries of the ImtConflictTable.
addq LITERAL(2 * __SIZEOF_POINTER__), %rdi
jmp .Limt_table_iterate
.Lconflict_trampoline:
// Call the runtime stub to populate the ImtConflictTable and jump to the
// resolved method.
movq %rax, %rdi // Load interface method
INVOKE_TRAMPOLINE_BODY artInvokeInterfaceTrampoline
#endif // __APPLE__
END_FUNCTION art_quick_imt_conflict_trampoline
DEFINE_FUNCTION art_quick_resolution_trampoline
SETUP_SAVE_REFS_AND_ARGS_FRAME
movq %gs:THREAD_SELF_OFFSET, %rdx
movq %rsp, %rcx
call SYMBOL(artQuickResolutionTrampoline) // (called, receiver, Thread*, SP)
movq %rax, %r10 // Remember returned code pointer in R10.
movq (%rsp), %rdi // Load called method into RDI.
RESTORE_SAVE_REFS_AND_ARGS_FRAME
testq %r10, %r10 // If code pointer is null goto deliver pending exception.
jz 1f
jmp *%r10 // Tail call into method.
1:
DELIVER_PENDING_EXCEPTION
END_FUNCTION art_quick_resolution_trampoline
/* Generic JNI frame layout:
*
* #-------------------#
* | |
* | caller method... |
* #-------------------# <--- SP on entry
*
* |
* V
*
* #-------------------#
* | caller method... |
* #-------------------#
* | Return |
* | R15 | callee save
* | R14 | callee save
* | R13 | callee save
* | R12 | callee save
* | R9 | arg5
* | R8 | arg4
* | RSI/R6 | arg1
* | RBP/R5 | callee save
* | RBX/R3 | callee save
* | RDX/R2 | arg2
* | RCX/R1 | arg3
* | XMM7 | float arg 8
* | XMM6 | float arg 7
* | XMM5 | float arg 6
* | XMM4 | float arg 5
* | XMM3 | float arg 4
* | XMM2 | float arg 3
* | XMM1 | float arg 2
* | XMM0 | float arg 1
* | RDI/Method* | <- sp
* #-------------------#
* | Scratch Alloca | 5K scratch space
* #---------#---------#
* | | sp* |
* | Tramp. #---------#
* | args | thread |
* | Tramp. #---------#
* | | method |
* #-------------------# <--- SP on artQuickGenericJniTrampoline
*
* |
* v artQuickGenericJniTrampoline
*
* #-------------------#
* | caller method... |
* #-------------------#
* | Return PC |
* | Callee-Saves |
* | padding | // 8B
* | Method* | <--- (1)
* #-------------------#
* | local ref cookie | // 4B
* | padding | // 0B or 4B to align handle scope on 8B address
* | handle scope | // Size depends on number of references; multiple of 4B.
* #-------------------#
* | JNI Stack Args | // Empty if all args fit into registers.
* #-------------------# <--- SP on native call (1)
* | Free scratch |
* #-------------------#
* | SP for JNI call | // Pointer to (1).
* #-------------------#
* | Hidden arg | // For @CriticalNative
* #-------------------#
* | |
* | Stack for Regs | The trampoline assembly will pop these values
* | | into registers for native call
* #-------------------#
*/
/*
* Called to do a generic JNI down-call
*/
DEFINE_FUNCTION art_quick_generic_jni_trampoline
SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_RDI
movq %rsp, %rbp // save SP at (old) callee-save frame
CFI_DEF_CFA_REGISTER(rbp)
subq LITERAL(GENERIC_JNI_TRAMPOLINE_RESERVED_AREA), %rsp
// prepare for artQuickGenericJniTrampoline call
// (Thread*, managed_sp, reserved_area)
// rdi rsi rdx <= C calling convention
// gs:... rbp rsp <= where they are
movq %gs:THREAD_SELF_OFFSET, %rdi // Pass Thread::Current().
movq %rbp, %rsi // Pass managed frame SP.
movq %rsp, %rdx // Pass reserved area.
call SYMBOL(artQuickGenericJniTrampoline) // (Thread*, sp)
// The C call will have registered the complete save-frame on success.
// The result of the call is:
// %rax: pointer to native code, 0 on error.
// The bottom of the reserved area contains values for arg registers,
// hidden arg register and SP for out args for the call.
// Check for error (class init check or locking for synchronized native method can throw).
test %rax, %rax
jz .Lexception_in_native
// pop from the register-passing alloca region
// what's the right layout?
popq %rdi
popq %rsi
popq %rdx
popq %rcx
popq %r8
popq %r9
// TODO: skip floating point if unused, some flag.
movq 0(%rsp), %xmm0
movq 8(%rsp), %xmm1
movq 16(%rsp), %xmm2
movq 24(%rsp), %xmm3
movq 32(%rsp), %xmm4
movq 40(%rsp), %xmm5
movq 48(%rsp), %xmm6
movq 56(%rsp), %xmm7
// Save call target in scratch register.
movq %rax, %r11
// Load hidden arg (rax) for @CriticalNative.
movq 64(%rsp), %rax
// Load SP for out args, releasing unneeded reserved area.
movq 72(%rsp), %rsp
// native call
call *%r11
// result sign extension is handled in C code
// prepare for artQuickGenericJniEndTrampoline call
// (Thread*, result, result_f)
// rdi rsi rdx <= C calling convention
// gs:... rax xmm0 <= where they are
movq %gs:THREAD_SELF_OFFSET, %rdi
movq %rax, %rsi
movq %xmm0, %rdx
call SYMBOL(artQuickGenericJniEndTrampoline)
// Pending exceptions possible.
// TODO: use cmpq, needs direct encoding because of gas bug
movq %gs:THREAD_EXCEPTION_OFFSET, %rcx
test %rcx, %rcx
jnz .Lexception_in_native
// Tear down the alloca.
movq %rbp, %rsp
// store into fpr, for when it's a fpr return...
movq %rax, %xmm0
LOAD_RUNTIME_INSTANCE rcx
cmpb MACRO_LITERAL(0), RUN_EXIT_HOOKS_OFFSET_FROM_RUNTIME_INSTANCE(%rcx)
CFI_REMEMBER_STATE
jne .Lcall_method_exit_hook
.Lcall_method_exit_hook_done:
// Tear down the callee-save frame.
CFI_DEF_CFA_REGISTER(rsp)
// Load callee-save FPRs. Skip FP args.
movq 80(%rsp), %xmm12
movq 88(%rsp), %xmm13
movq 96(%rsp), %xmm14
movq 104(%rsp), %xmm15
// Pop method, padding, FP args and two GRP args (rcx, rdx).
DECREASE_FRAME 16 + 12*8 + 2*8
// Load callee-save GPRs and skip args, mixed together to agree with core spills bitmap.
POP rbx // Callee save.
POP rbp // Callee save.
DECREASE_FRAME 3*8 // Skip three args (RSI, R8, R9).
POP r12 // Callee save.
POP r13 // Callee save.
POP r14 // Callee save.
POP r15 // Callee save.
ret
.Lcall_method_exit_hook:
CFI_RESTORE_STATE_AND_DEF_CFA rbp, 208
movq LITERAL(FRAME_SIZE_SAVE_REFS_AND_ARGS), %r8
call art_quick_method_exit_hook
jmp .Lcall_method_exit_hook_done
.Lexception_in_native:
pushq %gs:THREAD_TOP_QUICK_FRAME_OFFSET
addq LITERAL(-1), (%rsp) // Remove the GenericJNI tag.
movq (%rsp), %rsp
call art_deliver_pending_exception
END_FUNCTION art_quick_generic_jni_trampoline
DEFINE_FUNCTION art_deliver_pending_exception
// This will create a new save-all frame, required by the runtime.
DELIVER_PENDING_EXCEPTION
END_FUNCTION art_deliver_pending_exception
/*
* Called to bridge from the quick to interpreter ABI. On entry the arguments match those
* of a quick call:
* RDI = method being called / to bridge to.
* RSI, RDX, RCX, R8, R9 are arguments to that method.
*/
DEFINE_FUNCTION art_quick_to_interpreter_bridge
SETUP_SAVE_REFS_AND_ARGS_FRAME // Set up frame and save arguments.
movq %gs:THREAD_SELF_OFFSET, %rsi // RSI := Thread::Current()
movq %rsp, %rdx // RDX := sp
call SYMBOL(artQuickToInterpreterBridge) // (method, Thread*, SP)
RESTORE_SAVE_REFS_AND_ARGS_FRAME // TODO: no need to restore arguments in this case.
movq %rax, %xmm0 // Place return value also into floating point return value.
RETURN_OR_DELIVER_PENDING_EXCEPTION // return or deliver exception
END_FUNCTION art_quick_to_interpreter_bridge
/*
* Called to catch an attempt to invoke an obsolete method.
* RDI = method being called.
*/
ONE_ARG_RUNTIME_EXCEPTION art_invoke_obsolete_method_stub, artInvokeObsoleteMethod
/*
* Compiled code has requested that we deoptimize into the interpreter. The deoptimization
* will long jump to the interpreter bridge.
*/
DEFINE_FUNCTION art_quick_deoptimize_from_compiled_code
SETUP_SAVE_EVERYTHING_FRAME
// Stack should be aligned now.
movq %gs:THREAD_SELF_OFFSET, %rsi // Pass Thread.
call SYMBOL(artDeoptimizeFromCompiledCode) // (DeoptimizationKind, Thread*)
UNREACHABLE
END_FUNCTION art_quick_deoptimize_from_compiled_code
/*
* String's compareTo.
*
* On entry:
* rdi: this string object (known non-null)
* rsi: comp string object (known non-null)
*/
DEFINE_FUNCTION art_quick_string_compareto
movl MIRROR_STRING_COUNT_OFFSET(%edi), %r8d
movl MIRROR_STRING_COUNT_OFFSET(%esi), %r9d
/* Build pointers to the start of string data */
leal MIRROR_STRING_VALUE_OFFSET(%edi), %edi
leal MIRROR_STRING_VALUE_OFFSET(%esi), %esi
#if (STRING_COMPRESSION_FEATURE)
/* Differ cases */
shrl LITERAL(1), %r8d
jnc .Lstring_compareto_this_is_compressed
shrl LITERAL(1), %r9d
jnc .Lstring_compareto_that_is_compressed
jmp .Lstring_compareto_both_not_compressed
.Lstring_compareto_this_is_compressed:
shrl LITERAL(1), %r9d
jnc .Lstring_compareto_both_compressed
/* Comparison this (8-bit) and that (16-bit) */
mov %r8d, %eax
subl %r9d, %eax
mov %r8d, %ecx
cmovg %r9d, %ecx
/* Going into loop to compare each character */
jecxz .Lstring_compareto_keep_length1 // check loop counter (if 0 then stop)
.Lstring_compareto_loop_comparison_this_compressed:
movzbl (%edi), %r8d // move *(this_cur_char) byte to long
movzwl (%esi), %r9d // move *(that_cur_char) word to long
addl LITERAL(1), %edi // ++this_cur_char (8-bit)
addl LITERAL(2), %esi // ++that_cur_char (16-bit)
subl %r9d, %r8d
loope .Lstring_compareto_loop_comparison_this_compressed
cmovne %r8d, %eax // return eax = *(this_cur_char) - *(that_cur_char)
.Lstring_compareto_keep_length1:
ret
.Lstring_compareto_that_is_compressed:
movl %r8d, %eax
subl %r9d, %eax
mov %r8d, %ecx
cmovg %r9d, %ecx
/* Comparison this (8-bit) and that (16-bit) */
jecxz .Lstring_compareto_keep_length2 // check loop counter (if 0, don't compare)
.Lstring_compareto_loop_comparison_that_compressed:
movzwl (%edi), %r8d // move *(this_cur_char) word to long
movzbl (%esi), %r9d // move *(that_cur_chat) byte to long
addl LITERAL(2), %edi // ++this_cur_char (16-bit)
addl LITERAL(1), %esi // ++that_cur_char (8-bit)
subl %r9d, %r8d
loope .Lstring_compareto_loop_comparison_that_compressed
cmovne %r8d, %eax // return eax = *(this_cur_char) - *(that_cur_char)
.Lstring_compareto_keep_length2:
ret
.Lstring_compareto_both_compressed:
/* Calculate min length and count diff */
movl %r8d, %ecx
movl %r8d, %eax
subl %r9d, %eax
cmovg %r9d, %ecx
jecxz .Lstring_compareto_keep_length3
repe cmpsb
je .Lstring_compareto_keep_length3
movzbl -1(%edi), %eax // get last compared char from this string (8-bit)
movzbl -1(%esi), %ecx // get last compared char from comp string (8-bit)
jmp .Lstring_compareto_count_difference
#endif // STRING_COMPRESSION_FEATURE
.Lstring_compareto_both_not_compressed:
/* Calculate min length and count diff */
movl %r8d, %ecx
movl %r8d, %eax
subl %r9d, %eax
cmovg %r9d, %ecx
/*
* At this point we have:
* eax: value to return if first part of strings are equal
* ecx: minimum among the lengths of the two strings
* esi: pointer to comp string data
* edi: pointer to this string data
*/
jecxz .Lstring_compareto_keep_length3
repe cmpsw // find nonmatching chars in [%esi] and [%edi], up to length %ecx
je .Lstring_compareto_keep_length3
movzwl -2(%edi), %eax // get last compared char from this string (16-bit)
movzwl -2(%esi), %ecx // get last compared char from comp string (16-bit)
.Lstring_compareto_count_difference:
subl %ecx, %eax // return the difference
.Lstring_compareto_keep_length3:
ret
END_FUNCTION art_quick_string_compareto
UNIMPLEMENTED art_quick_memcmp16
DEFINE_FUNCTION art_quick_instance_of
SETUP_FP_CALLEE_SAVE_FRAME
subq LITERAL(8), %rsp // Alignment padding.
CFI_ADJUST_CFA_OFFSET(8)
call SYMBOL(artInstanceOfFromCode) // (mirror::Object*, mirror::Class*)
addq LITERAL(8), %rsp
CFI_ADJUST_CFA_OFFSET(-8)
RESTORE_FP_CALLEE_SAVE_FRAME
ret
END_FUNCTION art_quick_instance_of
DEFINE_FUNCTION art_quick_string_builder_append
SETUP_SAVE_REFS_ONLY_FRAME // save ref containing registers for GC
// Outgoing argument set up
leaq FRAME_SIZE_SAVE_REFS_ONLY + __SIZEOF_POINTER__(%rsp), %rsi // pass args
movq %gs:THREAD_SELF_OFFSET, %rdx // pass Thread::Current()
call artStringBuilderAppend // (uint32_t, const unit32_t*, Thread*)
RESTORE_SAVE_REFS_ONLY_FRAME // restore frame up to return address
RETURN_OR_DEOPT_IF_RESULT_IS_NON_NULL_OR_DELIVER // return or deopt or deliver exception
END_FUNCTION art_quick_string_builder_append
// Create a function `name` calling the ReadBarrier::Mark routine,
// getting its argument and returning its result through register
// `reg`, saving and restoring all caller-save registers.
//
// The generated function follows a non-standard runtime calling
// convention:
// - register `reg` (which may be different from RDI) is used to pass the (sole) argument,
// - register `reg` (which may be different from RAX) is used to return the result,
// - all other registers are callee-save (the values they hold are preserved).
MACRO2(READ_BARRIER_MARK_REG, name, reg)
DEFINE_FUNCTION VAR(name)
// Null check so that we can load the lock word.
testq REG_VAR(reg), REG_VAR(reg)
jz .Lret_rb_\name
.Lnot_null_\name:
// Check the mark bit, if it is 1 return.
testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(reg))
jz .Lslow_rb_\name
ret
.Lslow_rb_\name:
PUSH rax
movl MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(reg)), %eax
addl LITERAL(LOCK_WORD_STATE_FORWARDING_ADDRESS_OVERFLOW), %eax
// Jump if the addl caused eax to unsigned overflow. The only case where it overflows is the
// forwarding address one.
// Taken ~25% of the time.
CFI_REMEMBER_STATE
jnae .Lret_forwarding_address\name
// Save all potentially live caller-save core registers.
movq 0(%rsp), %rax
PUSH rcx
PUSH rdx
PUSH rsi
PUSH rdi
PUSH r8
PUSH r9
PUSH r10
PUSH r11
// Create space for caller-save floating-point registers.
INCREASE_FRAME 12 * 8
// Save all potentially live caller-save floating-point registers.
movq %xmm0, 0(%rsp)
movq %xmm1, 8(%rsp)
movq %xmm2, 16(%rsp)
movq %xmm3, 24(%rsp)
movq %xmm4, 32(%rsp)
movq %xmm5, 40(%rsp)
movq %xmm6, 48(%rsp)
movq %xmm7, 56(%rsp)
movq %xmm8, 64(%rsp)
movq %xmm9, 72(%rsp)
movq %xmm10, 80(%rsp)
movq %xmm11, 88(%rsp)
SETUP_FP_CALLEE_SAVE_FRAME
.ifnc RAW_VAR(reg), rdi
movq REG_VAR(reg), %rdi // Pass arg1 - obj from `reg`.
.endif
call SYMBOL(artReadBarrierMark) // artReadBarrierMark(obj)
.ifnc RAW_VAR(reg), rax
movq %rax, REG_VAR(reg) // Return result into `reg`.
.endif
RESTORE_FP_CALLEE_SAVE_FRAME
// Restore floating-point registers.
movq 0(%rsp), %xmm0
movq 8(%rsp), %xmm1
movq 16(%rsp), %xmm2
movq 24(%rsp), %xmm3
movq 32(%rsp), %xmm4
movq 40(%rsp), %xmm5
movq 48(%rsp), %xmm6
movq 56(%rsp), %xmm7
movq 64(%rsp), %xmm8
movq 72(%rsp), %xmm9
movq 80(%rsp), %xmm10
movq 88(%rsp), %xmm11
// Remove floating-point registers.
DECREASE_FRAME 12 * 8
// Restore core regs, except `reg`, as it is used to return the
// result of this function (simply remove it from the stack instead).
POP_REG_NE r11, RAW_VAR(reg)
POP_REG_NE r10, RAW_VAR(reg)
POP_REG_NE r9, RAW_VAR(reg)
POP_REG_NE r8, RAW_VAR(reg)
POP_REG_NE rdi, RAW_VAR(reg)
POP_REG_NE rsi, RAW_VAR(reg)
POP_REG_NE rdx, RAW_VAR(reg)
POP_REG_NE rcx, RAW_VAR(reg)
POP_REG_NE rax, RAW_VAR(reg)
.Lret_rb_\name:
ret
.Lret_forwarding_address\name:
CFI_RESTORE_STATE_AND_DEF_CFA rsp, 16
// The overflow cleared the top bits.
sall LITERAL(LOCK_WORD_STATE_FORWARDING_ADDRESS_SHIFT), %eax
movq %rax, REG_VAR(reg)
POP_REG_NE rax, RAW_VAR(reg)
ret
END_FUNCTION VAR(name)
END_MACRO
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg00, rax
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg01, rcx
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg02, rdx
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg03, rbx
// Note: There is no art_quick_read_barrier_mark_reg04, as register 4 (RSP)
// cannot be used to pass arguments.
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg05, rbp
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg06, rsi
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg07, rdi
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg08, r8
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg09, r9
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg10, r10
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg11, r11
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg12, r12
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg13, r13
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg14, r14
READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg15, r15
DEFINE_FUNCTION art_quick_read_barrier_slow
SETUP_FP_CALLEE_SAVE_FRAME
subq LITERAL(8), %rsp // Alignment padding.
CFI_ADJUST_CFA_OFFSET(8)
call SYMBOL(artReadBarrierSlow) // artReadBarrierSlow(ref, obj, offset)
addq LITERAL(8), %rsp
CFI_ADJUST_CFA_OFFSET(-8)
RESTORE_FP_CALLEE_SAVE_FRAME
ret
END_FUNCTION art_quick_read_barrier_slow
DEFINE_FUNCTION art_quick_read_barrier_for_root_slow
SETUP_FP_CALLEE_SAVE_FRAME
subq LITERAL(8), %rsp // Alignment padding.
CFI_ADJUST_CFA_OFFSET(8)
call SYMBOL(artReadBarrierForRootSlow) // artReadBarrierForRootSlow(root)
addq LITERAL(8), %rsp
CFI_ADJUST_CFA_OFFSET(-8)
RESTORE_FP_CALLEE_SAVE_FRAME
ret
END_FUNCTION art_quick_read_barrier_for_root_slow
/*
* On stack replacement stub.
* On entry:
* [sp] = return address
* rdi = stack to copy
* rsi = size of stack
* rdx = pc to call
* rcx = JValue* result
* r8 = shorty
* r9 = thread
*
* Note that the native C ABI already aligned the stack to 16-byte.
*/
DEFINE_FUNCTION art_quick_osr_stub
// Save the non-volatiles.
PUSH rbp // Save rbp.
PUSH rcx // Save rcx/result*.
PUSH r8 // Save r8/shorty*.
// Save callee saves.
PUSH rbx
PUSH r12
PUSH r13
PUSH r14
PUSH r15
pushq LITERAL(0) // Push null for ArtMethod*.
CFI_ADJUST_CFA_OFFSET(8)
movl %esi, %ecx // rcx := size of stack
movq %rdi, %rsi // rsi := stack to copy
movq %rsp, %rbp // Save stack pointer to RBP for CFI use in .Losr_entry.
CFI_REMEMBER_STATE
call .Losr_entry
// Restore stack and callee-saves.
addq LITERAL(8), %rsp
CFI_ADJUST_CFA_OFFSET(-8)
POP r15
POP r14
POP r13
POP r12
POP rbx
POP r8
POP rcx
POP rbp
movq %rax, (%rcx) // Store the result.
ret
.Losr_entry:
CFI_RESTORE_STATE_AND_DEF_CFA rsp, 80
// Since the call has pushed the return address we need to switch the CFA register to RBP.
CFI_DEF_CFA_REGISTER(rbp)
subl LITERAL(8), %ecx // Given stack size contains pushed frame pointer, substract it.
subq %rcx, %rsp
movq %rsp, %rdi // rdi := beginning of stack
rep movsb // while (rcx--) { *rdi++ = *rsi++ }
jmp *%rdx
END_FUNCTION art_quick_osr_stub
DEFINE_FUNCTION art_quick_invoke_polymorphic
// On entry: RDI := unused, RSI := receiver
SETUP_SAVE_REFS_AND_ARGS_FRAME // save callee saves
movq %rsi, %rdi // RDI := receiver
movq %gs:THREAD_SELF_OFFSET, %rsi // RSI := Thread (self)
movq %rsp, %rdx // RDX := pass SP
call SYMBOL(artInvokePolymorphic) // invoke with (receiver, self, SP)
// save the code pointer
RESTORE_SAVE_REFS_AND_ARGS_FRAME
movq %rax, %xmm0 // Result is in RAX. Copy to FP result register.
RETURN_OR_DELIVER_PENDING_EXCEPTION
END_FUNCTION art_quick_invoke_polymorphic
DEFINE_FUNCTION art_quick_invoke_custom
SETUP_SAVE_REFS_AND_ARGS_FRAME // save callee saves
// RDI := call_site_index
movq %gs:THREAD_SELF_OFFSET, %rsi // RSI := Thread::Current()
movq %rsp, %rdx // RDX := SP
call SYMBOL(artInvokeCustom) // artInvokeCustom(Thread*, SP)
RESTORE_SAVE_REFS_AND_ARGS_FRAME
movq %rax, %xmm0 // Result is in RAX. Copy to FP result register.
RETURN_OR_DELIVER_PENDING_EXCEPTION
END_FUNCTION art_quick_invoke_custom
// Wrap ExecuteSwitchImpl in assembly method which specifies DEX PC for unwinding.
// Argument 0: RDI: The context pointer for ExecuteSwitchImpl.
// Argument 1: RSI: Pointer to the templated ExecuteSwitchImpl to call.
// Argument 2: RDX: The value of DEX PC (memory address of the methods bytecode).
DEFINE_FUNCTION ExecuteSwitchImplAsm
PUSH rbx // Spill RBX
movq %rdx, %rbx // RBX = DEX PC (callee save register)
CFI_DEFINE_DEX_PC_WITH_OFFSET(0 /* RAX */, 3 /* RBX */, 0)
call *%rsi // Call the wrapped function
POP rbx // Restore RBX
ret
END_FUNCTION ExecuteSwitchImplAsm
// On entry: edi is the class, r11 is the inline cache. r10 and rax are available.
DEFINE_FUNCTION art_quick_update_inline_cache
#if (INLINE_CACHE_SIZE != 5)
#error "INLINE_CACHE_SIZE not as expected."
#endif
// Don't update the cache if we are marking.
cmpl LITERAL(0), %gs:THREAD_IS_GC_MARKING_OFFSET
jnz .Ldone
.Lentry1:
movl INLINE_CACHE_CLASSES_OFFSET(%r11), %eax
cmpl %edi, %eax
je .Ldone
cmpl LITERAL(0), %eax
jne .Lentry2
lock cmpxchg %edi, INLINE_CACHE_CLASSES_OFFSET(%r11)
jz .Ldone
jmp .Lentry1
.Lentry2:
movl (INLINE_CACHE_CLASSES_OFFSET+4)(%r11), %eax
cmpl %edi, %eax
je .Ldone
cmpl LITERAL(0), %eax
jne .Lentry3
lock cmpxchg %edi, (INLINE_CACHE_CLASSES_OFFSET+4)(%r11)
jz .Ldone
jmp .Lentry2
.Lentry3:
movl (INLINE_CACHE_CLASSES_OFFSET+8)(%r11), %eax
cmpl %edi, %eax
je .Ldone
cmpl LITERAL(0), %eax
jne .Lentry4
lock cmpxchg %edi, (INLINE_CACHE_CLASSES_OFFSET+8)(%r11)
jz .Ldone
jmp .Lentry3
.Lentry4:
movl (INLINE_CACHE_CLASSES_OFFSET+12)(%r11), %eax
cmpl %edi, %eax
je .Ldone
cmpl LITERAL(0), %eax
jne .Lentry5
lock cmpxchg %edi, (INLINE_CACHE_CLASSES_OFFSET+12)(%r11)
jz .Ldone
jmp .Lentry4
.Lentry5:
// Unconditionally store, the cache is megamorphic.
movl %edi, (INLINE_CACHE_CLASSES_OFFSET+16)(%r11)
.Ldone:
ret
END_FUNCTION art_quick_update_inline_cache
// On entry, method is at the bottom of the stack.
DEFINE_FUNCTION art_quick_compile_optimized
SETUP_SAVE_EVERYTHING_FRAME
movq FRAME_SIZE_SAVE_EVERYTHING(%rsp), %rdi // pass ArtMethod
movq %gs:THREAD_SELF_OFFSET, %rsi // pass Thread::Current()
call SYMBOL(artCompileOptimized) // (ArtMethod*, Thread*)
RESTORE_SAVE_EVERYTHING_FRAME // restore frame up to return address
ret
END_FUNCTION art_quick_compile_optimized
// On entry, method is at the bottom of the stack.
DEFINE_FUNCTION art_quick_method_entry_hook
SETUP_SAVE_EVERYTHING_FRAME
movq FRAME_SIZE_SAVE_EVERYTHING(%rsp), %rdi // pass ArtMethod
movq %gs:THREAD_SELF_OFFSET, %rsi // pass Thread::Current()
movq %rsp, %rdx // SP
call SYMBOL(artMethodEntryHook) // (ArtMethod*, Thread*, sp)
RESTORE_SAVE_EVERYTHING_FRAME
ret
END_FUNCTION art_quick_method_entry_hook
// On entry, method is at the bottom of the stack.
DEFINE_FUNCTION art_quick_method_exit_hook
SETUP_SAVE_EVERYTHING_FRAME
// R8 passed from JITed code contains frame_size
leaq 16(%rsp), %rcx // floating-point result pointer in kSaveEverything
// frame
leaq 144(%rsp), %rdx // integer result pointer in kSaveEverything frame
leaq FRAME_SIZE_SAVE_EVERYTHING(%rsp), %rsi // ArtMethod**
movq %gs:THREAD_SELF_OFFSET, %rdi // Thread::Current
call SYMBOL(artMethodExitHook) // (Thread*, ArtMethod**, gpr_res*, fpr_res*,
// frame_size)
// Normal return.
RESTORE_SAVE_EVERYTHING_FRAME
ret
END_FUNCTION art_quick_method_exit_hook