blob: bfc49c0c20a29235d21f7455e777eeef87f207cd [file] [log] [blame]
%def header():
/*
* Copyright (C) 2023 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* This is a #include, not a %include, because we want the C pre-processor
* to expand the macros into assembler assignment statements.
*/
#include "asm_support.h"
#include "arch/riscv64/asm_support_riscv64.S"
/**
* RISC-V 64 ABI general notes
*
* References
* - https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-cc.adoc
* - runtime/arch/riscv64/registers_riscv64.h
*
* 32 general purposes registers
* - fixed purpose: zero, ra, sp, gp, tp, s1
* gp/scs: shadow call stack - do not clobber!
* s1/tr: ART thread register - do not clobber!
* - temporaries: t0-t6
* - arguments: a0-a7
* - callee saved: ra, s0/fp, s2-s11
* s0 is flexible, available to use as a frame pointer if needed.
*
* 32 floating point registers
* - temporaries: ft0-ft11
* - arguments: fa0-fa7
* - callee saved: fs0-fs11
*/
// Android references
// Bytecodes: https://source.android.com/docs/core/runtime/dalvik-bytecode
// Instruction formats: https://source.android.com/docs/core/runtime/instruction-formats
// Shorty: https://source.android.com/docs/core/runtime/dex-format#shortydescriptor
// Fixed register usages in Nterp.
// nickname ABI reg purpose
#define xSELF s1 // x9, Thread* self pointer
#define xFP s2 // x18, interpreted frame pointer: to access locals and args
#define xPC s3 // x19, interpreted program counter: to fetch instructions
#define xINST s4 // x20, first 16-bit code unit of current instruction
#define xIBASE s5 // x21, interpreted instruction base pointer: for computed goto
#define xREFS s6 // x22, base of object references of dex registers
// DWARF registers reference
// https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-dwarf.adoc
#define CFI_TMP 10 // DWARF register number for a0/x10
#define CFI_DEX 19 // DWARF register number for xPC /s3/x19
#define CFI_REFS 22 // DWARF register number for xREFS/s6/x22
// Synchronization
// This code follows the RISC-V atomics ABI specification [1].
//
// Object publication.
// new-instance and new-array operations must first perform a `fence w,w` "constructor fence" to
// ensure their new object references are correctly published with a subsequent SET_VREG_OBJECT.
//
// Volatile load/store.
// A volatile load is implemented as: fence rw,rw ; load ; fence r,rw.
// A 32-bit or 64-bit volatile store is implemented as: amoswap.{w,d}.rl
// A volatile store for a narrower type is implemented as: fence rw,w ; store ; fence rw,rw
//
// [1] https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-atomic.adoc
// An assembly entry for nterp.
.macro OAT_ENTRY name
.type \name, @function
.hidden \name
.global \name
.balign 16
\name:
.endm
.macro SIZE name
.size \name, .-\name
.endm
// Similar to ENTRY but without the CFI directives.
.macro NAME_START name
.type \name, @function
.hidden \name // Hide this as a global symbol, so we do not incur plt calls.
.global \name
/* Cache alignment for function entry */
.balign 16
\name:
.endm
.macro NAME_END name
SIZE \name
.endm
// Macro for defining entrypoints into runtime. We don't need to save registers (we're not holding
// references there), but there is no kDontSave runtime method. So just use the kSaveRefsOnly
// runtime method.
.macro NTERP_TRAMPOLINE name, helper
ENTRY \name
SETUP_SAVE_REFS_ONLY_FRAME
call \helper
RESTORE_SAVE_REFS_ONLY_FRAME
ld t0, THREAD_EXCEPTION_OFFSET(xSELF)
bnez t0, nterp_deliver_pending_exception
ret
END \name
.endm
// Unpack code items from dex format.
// Input: \code_item
// Output:
// - \regs: register count
// - \outs: out count
// - \ins: in count. If set to register "zero" (x0), load is skipped.
// - \code_item: holds instruction array on exit
.macro FETCH_CODE_ITEM_INFO code_item, regs, outs, ins
// Check LSB of \code_item. If 1, it's a compact dex file.
BRANCH_IF_BIT_CLEAR \regs, \code_item, 0, 1f // Regular dex.
unimp // Compact dex: unimplemented.
1:
// Unpack values from regular dex format.
lhu \regs, CODE_ITEM_REGISTERS_SIZE_OFFSET(\code_item)
lhu \outs, CODE_ITEM_OUTS_SIZE_OFFSET(\code_item)
.ifnc \ins, zero
lhu \ins, CODE_ITEM_INS_SIZE_OFFSET(\code_item)
.endif
addi \code_item, \code_item, CODE_ITEM_INSNS_OFFSET
.endm
.macro EXPORT_PC
sd xPC, -16(xREFS)
.endm
.macro TEST_IF_MARKING reg, label
lb \reg, THREAD_IS_GC_MARKING_OFFSET(xSELF)
bnez \reg, \label
.endm
.macro DO_SUSPEND_CHECK continue
lwu t0, THREAD_FLAGS_OFFSET(xSELF)
andi t0, t0, THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
beqz t0, \continue
EXPORT_PC
call art_quick_test_suspend
.endm
// Fetch one or more half-word units from an offset past the current PC.
// The offset is specified in 16-bit code units.
//
// A \width flag allows reading 32 bits (2 units) or 64 bits (4 units) from the offset.
// The RISC-V ISA supports unaligned accesses for these wider loads.
//
// If \width=8, \byte={0,1} indexes into the code unit at the offset.
//
// Default behavior loads one code unit with unsigned zext.
// The \signed flag is for signed sext, for shorter loads.
//
// Does not advance xPC.
.macro FETCH reg, count, signed=0, width=16, byte=0
.if \width == 8
.if \signed
lb \reg, (\count*2 + \byte)(xPC)
.else
lbu \reg, (\count*2 + \byte)(xPC)
.endif
.elseif \width == 16
.if \signed
lh \reg, (\count*2)(xPC)
.else
lhu \reg, (\count*2)(xPC)
.endif
.elseif \width == 32
.if \signed
lw \reg, (\count*2)(xPC)
.else
lwu \reg, (\count*2)(xPC)
.endif
.elseif \width == 64
ld \reg, (\count*2)(xPC)
.else
unimp // impossible
.endif
.endm
// Fetch the next instruction, from xPC into xINST.
// Does not advance xPC.
.macro FETCH_INST
lhu xINST, (xPC) // zero in upper 48 bits
.endm
// Fetch the next instruction, from xPC into xINST. Advance xPC by \count units, each 2 bytes.
//
// Immediates have a 12-bit offset range from xPC. Thus, \count can range from -1024 to 1023.
//
// Note: Must be placed AFTER anything that can throw an exception, or the exception catch may miss.
// Thus, this macro must be placed after EXPORT_PC.
.macro FETCH_ADVANCE_INST count
lhu xINST, (\count*2)(xPC) // zero in upper 48 bits
addi xPC, xPC, (\count*2)
.endm
// Clobbers: \reg
.macro GET_INST_OPCODE reg
and \reg, xINST, 0xFF
.endm
// Clobbers: \reg
.macro GOTO_OPCODE reg
slliw \reg, \reg, ${handler_size_bits}
add \reg, xIBASE, \reg
jr \reg
.endm
.macro FETCH_FROM_THREAD_CACHE reg, miss_label, z0, z1
// See art::InterpreterCache::IndexOf() for computing index of key within cache array.
// Entry address:
// xSELF + OFFSET + ((xPC>>2 & xFF) << 4)
// = xSELF + OFFSET + ((xPC & xFF<<2) << 2)
// = xSELF + ((OFFSET>>2 + (xPC & xFF<<2)) << 2)
// => ANDI, ADD, SH2ADD
#if (THREAD_INTERPRETER_CACHE_SIZE_LOG2 != 8)
#error Expected interpreter cache array size = 256 elements
#endif
#if (THREAD_INTERPRETER_CACHE_SIZE_SHIFT != 2)
#error Expected interpreter cache entry size = 16 bytes
#endif
#if ((THREAD_INTERPRETER_CACHE_OFFSET & 0x3) != 0)
#error Expected interpreter cache offset to be 4-byte aligned
#endif
andi \z0, xPC, 0xFF << 2
addi \z0, \z0, THREAD_INTERPRETER_CACHE_OFFSET >> 2
sh2add \z0, \z0, xSELF // z0 := entry's address
ld \z1, (\z0) // z1 := dex PC
bne xPC, \z1, \miss_label
ld \reg, 8(\z0) // value: depends on context; see call site
.endm
// Inputs:
// - a0
// - xSELF
// Clobbers: t0
.macro CHECK_AND_UPDATE_SHARED_MEMORY_METHOD if_hot, if_not_hot
lwu t0, ART_METHOD_ACCESS_FLAGS_OFFSET(a0)
BRANCH_IF_BIT_CLEAR t0, t0, ART_METHOD_IS_MEMORY_SHARED_FLAG_BIT, \if_hot
lwu t0, THREAD_SHARED_METHOD_HOTNESS_OFFSET(xSELF) // t0 := hotness
beqz t0, \if_hot
addi t0, t0, -1 // increase hotness
sw t0, THREAD_SHARED_METHOD_HOTNESS_OFFSET(xSELF)
j \if_not_hot
.endm
// Update xPC by \units code units. On back edges, perform hotness and suspend.
.macro BRANCH units
sh1add xPC, \units, xPC
blez \units, 2f // If branch is <= 0, increase hotness and do a suspend check.
1:
FETCH_INST
GET_INST_OPCODE t0
GOTO_OPCODE t0
2:
ld a0, (sp)
lhu t0, ART_METHOD_HOTNESS_COUNT_OFFSET(a0) // t0 := hotness
#if (NTERP_HOTNESS_VALUE != 0)
#error Expected 0 for hotness value
#endif
// If the counter is at zero (hot), handle it in the runtime.
beqz t0, 3f
addi t0, t0, -1 // increase hotness
sh t0, ART_METHOD_HOTNESS_COUNT_OFFSET(a0)
DO_SUSPEND_CHECK continue=1b
j 1b
3:
tail NterpHandleHotnessOverflow // arg a0 (ArtMethod*)
.endm
// Increase method hotness before starting the method.
// Hardcoded:
// - a0: ArtMethod*
// Clobbers: t0
.macro START_EXECUTING_INSTRUCTIONS
ld a0, (sp)
lhu t0, ART_METHOD_HOTNESS_COUNT_OFFSET(a0) // t0 := hotness
#if (NTERP_HOTNESS_VALUE != 0)
#error Expected 0 for hotness value
#endif
// If the counter is at zero (hot), handle it in the runtime.
beqz t0, 3f
addi t0, t0, -1 // increase hotness
sh t0, ART_METHOD_HOTNESS_COUNT_OFFSET(a0)
1:
DO_SUSPEND_CHECK continue=2f
2:
FETCH_INST
GET_INST_OPCODE t0
GOTO_OPCODE t0
3:
CHECK_AND_UPDATE_SHARED_MEMORY_METHOD if_hot=4f, if_not_hot=1b
4:
mv a1, zero // dex_pc_ptr=nullptr
mv a2, zero // vergs=nullptr
call nterp_hot_method
j 2b
.endm
// 64 bit read
// Clobbers: \reg
// Safe if \reg == \vreg.
.macro GET_VREG_WIDE reg, vreg
sh2add \reg, \vreg, xFP // vreg addr in register array
ld \reg, (\reg) // reg := fp[vreg](lo) | fp[vreg+1](hi)
.endm
// 64 bit write
// Clobbers: z0
.macro SET_VREG_WIDE reg, vreg, z0
sh2add \z0, \vreg, xFP // vreg addr in register array
sd \reg, (\z0) // fp[vreg] := reg(lo) ; fp[vreg+1] := reg(hi)
sh2add \z0, \vreg, xREFS // vreg addr in reference array
sd zero, (\z0) // refs[vreg] := null ; refs[vreg+1] := null
.endm
// Object read
// Clobbers: \reg
// Safe if \reg == \vreg.
.macro GET_VREG_OBJECT reg, vreg
sh2add \reg, \vreg, xREFS // vreg addr in reference array
lwu \reg, (\reg) // reg := refs[vreg]
.endm
// Object write
// Clobbers: z0
.macro SET_VREG_OBJECT reg, vreg, z0
sh2add \z0, \vreg, xFP // vreg addr in register array
sw \reg, (\z0) // fp[vreg] := reg
sh2add \z0, \vreg, xREFS // vreg addr in reference array
sw \reg, (\z0) // refs[vreg] := reg
.endm
// Floating-point 64 bit read
// Clobbers: \reg, \vreg
.macro GET_VREG_DOUBLE reg, vreg
sh2add \vreg, \vreg, xFP // vreg addr in register array
fld \reg, (\vreg) // reg := fp[vreg](lo) | fp[vreg+1](hi)
.endm
// Floating-point 64 bit write
// Clobbers: \reg, z0
.macro SET_VREG_DOUBLE reg, vreg, z0
sh2add \z0, \vreg, xFP // vreg addr in register array
fsd \reg, (\z0) // fp[vreg] := reg(lo) ; fp[vreg+1] := reg(hi)
sh2add \z0, \vreg, xREFS // vreg addr in reference array
sd zero, (\z0) // refs[vreg] := null ; refs[vreg+1] := null
.endm
// Put "%def" definitions after ".macro" definitions for proper expansion. %def is greedy.
// Typed read, defaults to 32-bit read
// Note: An object ref requires LWU, or LW;ZEXT.W.
// Clobbers: \reg
// Safe if \reg == \vreg.
%def get_vreg(reg, vreg, width=32, is_wide=False, is_unsigned=False):
% if is_wide or width == 64:
GET_VREG_WIDE $reg, $vreg
% elif is_unsigned:
sh2add $reg, $vreg, xFP // vreg addr in register array
lwu $reg, ($reg) // reg := fp[vreg], zext
% else:
sh2add $reg, $vreg, xFP // vreg addr in register array
lw $reg, ($reg) // reg := fp[vreg]
%#:
// Typed write, defaults to 32-bit write.
// Note: Incorrect for an object ref; it requires 2nd SW into xREFS.
// Clobbers: z0
%def set_vreg(reg, vreg, z0, width=32, is_wide=False):
% if is_wide or width == 64:
SET_VREG_WIDE $reg, $vreg, $z0
% else:
sh2add $z0, $vreg, xFP // vreg addr in register array
sw $reg, ($z0) // fp[vreg] := reg
sh2add $z0, $vreg, xREFS // vreg addr in reference array
sw zero, ($z0) // refs[vreg] := null
%#:
// Floating-point read, defaults to 32-bit read.
// Clobbers: reg, vreg
%def get_vreg_float(reg, vreg, is_double=False):
% if is_double:
GET_VREG_DOUBLE $reg, $vreg
% else:
sh2add $vreg, $vreg, xFP // vreg addr in register array
flw $reg, ($vreg) // reg := fp[vreg]
%#:
// Floating-point write, defaults to 32-bit write.
// Clobbers: reg, z0
%def set_vreg_float(reg, vreg, z0, is_double=False):
% if is_double:
SET_VREG_DOUBLE $reg, $vreg, $z0
% else:
sh2add $z0, $vreg, xFP // vreg addr in register array
fsw $reg, ($z0) // fp[vreg] := reg
sh2add $z0, $vreg, xREFS // vreg addr in reference array
sw zero, ($z0) // refs[vreg] := null
%#:
%def entry():
/*
* ArtMethod entry point.
*
* On entry:
* a0 ArtMethod* callee
* a1-a7 method parameters
*/
OAT_ENTRY ExecuteNterpWithClinitImpl
#if MIRROR_CLASS_STATUS_SHIFT < 12
#error mirror class status bits cannot use LUI load technique
#endif
.cfi_startproc
// For simplicity, we don't do a read barrier here, but instead rely
// on art_quick_resolution_trampoline to always have a suspend point before
// calling back here.
lwu t0, ART_METHOD_DECLARING_CLASS_OFFSET(a0)
lw t1, MIRROR_CLASS_STATUS_OFFSET(t0) // t1 := status word, sext
lui t2, MIRROR_CLASS_STATUS_VISIBLY_INITIALIZED << (MIRROR_CLASS_STATUS_SHIFT - 12)
// The unsigned comparison works in tandem with the 64-bit sign-extension of
// the status bits at the top of the 32-bit word. The order of the status
// constants (sign extended from LUI) is unchanged with unsigned comparison.
bgeu t1, t2, ExecuteNterpImpl
lui t2, MIRROR_CLASS_STATUS_INITIALIZED << (MIRROR_CLASS_STATUS_SHIFT - 12)
bltu t1, t2, .Linitializing_check
fence w, w
j ExecuteNterpImpl
.Linitializing_check:
lui t2, MIRROR_CLASS_STATUS_INITIALIZING << (MIRROR_CLASS_STATUS_SHIFT - 12)
bltu t1, t2, .Lresolution_trampoline
lwu t1, MIRROR_CLASS_CLINIT_THREAD_ID_OFFSET(t0)
lwu t0, THREAD_TID_OFFSET(xSELF)
beq t0, t1, ExecuteNterpImpl
.Lresolution_trampoline:
tail art_quick_resolution_trampoline
.cfi_endproc
.type EndExecuteNterpWithClinitImpl, @function
.hidden EndExecuteNterpWithClinitImpl
.global EndExecuteNterpWithClinitImpl
EndExecuteNterpWithClinitImpl:
OAT_ENTRY ExecuteNterpImpl
.cfi_startproc
% setup_nterp_frame(cfi_refs="CFI_REFS", refs="xREFS", fp="xFP", pc="xPC", regs="s7", ins="s8", spills_sp="s9", z0="t0", z1="t1", z2="t2", z3="t3", uniq="entry")
// xREFS := callee refs array
// xFP := callee fp array
// xPC := callee dex array
// s7 := refs/fp vreg count
// s8 := ins count
// s9 := post-spills pre-frame sp
// sp := post-frame sp
CFI_DEFINE_DEX_PC_WITH_OFFSET(/*tmpReg*/CFI_TMP, /*dexReg*/CFI_DEX, /*dexOffset*/0)
// Fast path: zero args.
beqz s8, .Lentry_go
sub s7, s7, s8 // s7 := a1 index in fp/refs
lwu s10, ART_METHOD_ACCESS_FLAGS_OFFSET(a0)
// s10 := method flags
// Fast path: all reference args.
sh2add t0, s7, xFP // t0 := &xFP[a1]
sh2add t1, s7, xREFS // t1 := &xREFS[a1]
BRANCH_IF_BIT_CLEAR t2, s10, ART_METHOD_NTERP_ENTRY_POINT_FAST_PATH_FLAG_BIT, .Lentry_a1
% setup_ref_args_and_go(fp="t0", refs="t1", refs_end="xFP", spills_sp="s9", z0="t2", z1="t3", done=".Lentry_go")
// Fast path: instance with zero args.
.Lentry_a1:
bexti s10, s10, ART_METHOD_IS_STATIC_FLAG_BIT
// s10 := 1 if static, 0 if instance
bnez s10, .Lentry_shorty
sw a1, (t0)
sw a1, (t1)
li t2, 1
beq s8, t2, .Lentry_go
// Slow path: runtime call to obtain shorty, full setup from managed ABI.
.Lentry_shorty:
SPILL_ALL_ARGUMENTS
// TODO: Better way to get shorty
call NterpGetShorty // arg a0
mv s11, a0 // s11 := shorty
RESTORE_ALL_ARGUMENTS
// temporaries are trashed, recompute some values
sh2add t0, s7, xFP // t0 := &xFP[a1]
sh2add t1, s7, xREFS // t1 := &xREFS[a1]
addi t2, s11, 1 // t2 := shorty arg (skip return type)
xori s10, s10, 1 // s10 := 0 if static, 1 if instance
slliw t3, s10, 2 // t3 := (static) 0, (instance) 4: fp/refs/outs byte offset
// constant setup for gpr/fpr shorty comparisons
li s0, 'D' // s0 := double char (unused fp)
li s4, 'F' // s4 := float char (unused xINST)
li s5, 'J' // s5 := long char (unused xIBASE)
li s8, 'L' // s8 := ref char (unused ins count)
bnez s10, .Lentry_args // instance a1 already stored into callee's xFP and xREFS
% store_gpr_to_vreg(gpr="a1", offset="t3", shorty="t2", fp="t0", refs="t1", z0="t4", z1="t5", D="s0", F="s4", J="s5", L="s8", next=".Lentry_fargs")
.Lentry_args:
// linear scan through shorty: extract non-float args
% store_gpr_to_vreg(gpr="a2", offset="t3", shorty="t2", fp="t0", refs="t1", z0="t4", z1="t5", D="s0", F="s4", J="s5", L="s8", next=".Lentry_fargs")
% store_gpr_to_vreg(gpr="a3", offset="t3", shorty="t2", fp="t0", refs="t1", z0="t4", z1="t5", D="s0", F="s4", J="s5", L="s8", next=".Lentry_fargs")
% store_gpr_to_vreg(gpr="a4", offset="t3", shorty="t2", fp="t0", refs="t1", z0="t4", z1="t5", D="s0", F="s4", J="s5", L="s8", next=".Lentry_fargs")
% store_gpr_to_vreg(gpr="a5", offset="t3", shorty="t2", fp="t0", refs="t1", z0="t4", z1="t5", D="s0", F="s4", J="s5", L="s8", next=".Lentry_fargs")
% store_gpr_to_vreg(gpr="a6", offset="t3", shorty="t2", fp="t0", refs="t1", z0="t4", z1="t5", D="s0", F="s4", J="s5", L="s8", next=".Lentry_fargs")
% store_gpr_to_vreg(gpr="a7", offset="t3", shorty="t2", fp="t0", refs="t1", z0="t4", z1="t5", D="s0", F="s4", J="s5", L="s8", next=".Lentry_fargs")
// We drained arg registers, so continue from caller stack's out array. Unlike the reference-only
// fast-path, the continuation offset in the out array can vary, depending on the presence of
// 64-bit values in the arg registers. \offset tracks this value as a byte offset.
addi t5, s9, (NTERP_SIZE_SAVE_CALLEE_SAVES + 8)
// t5 := (caller) outs array base address
add t4, t3, t0 // t4 := (callee) &FP[next]
add t1, t3, t1 // t1 := (callee) &REFS[next]
add t3, t3, t5 // t3 := (caller) &OUTS[next]
% store_outs_to_vregs(outs="t3", shorty="t2", fp="t4", refs="t1", z0="t5", z1="t6", D="s0", F="s4", J="s5", L="s8", next=".Lentry_fargs")
// t0 = &xFP[a1], unclobbered
.Lentry_fargs:
addi t1, s11, 1 // t1 := shorty arg (skip return type)
slliw t2, s10, 2 // t2 := starting byte offset for fp/outs, static and instance
// linear scan through shorty: extract float args
% store_fpr_to_vreg(fpr="fa0", offset="t2", shorty="t1", fp="t0", z0="t3", z1="t4", D="s0", F="s4", J="s5", next=".Lentry_go")
% store_fpr_to_vreg(fpr="fa1", offset="t2", shorty="t1", fp="t0", z0="t3", z1="t4", D="s0", F="s4", J="s5", next=".Lentry_go")
% store_fpr_to_vreg(fpr="fa2", offset="t2", shorty="t1", fp="t0", z0="t3", z1="t4", D="s0", F="s4", J="s5", next=".Lentry_go")
% store_fpr_to_vreg(fpr="fa3", offset="t2", shorty="t1", fp="t0", z0="t3", z1="t4", D="s0", F="s4", J="s5", next=".Lentry_go")
% store_fpr_to_vreg(fpr="fa4", offset="t2", shorty="t1", fp="t0", z0="t3", z1="t4", D="s0", F="s4", J="s5", next=".Lentry_go")
% store_fpr_to_vreg(fpr="fa5", offset="t2", shorty="t1", fp="t0", z0="t3", z1="t4", D="s0", F="s4", J="s5", next=".Lentry_go")
% store_fpr_to_vreg(fpr="fa6", offset="t2", shorty="t1", fp="t0", z0="t3", z1="t4", D="s0", F="s4", J="s5", next=".Lentry_go")
% store_fpr_to_vreg(fpr="fa7", offset="t2", shorty="t1", fp="t0", z0="t3", z1="t4", D="s0", F="s4", J="s5", next=".Lentry_go")
addi t3, s9, (NTERP_SIZE_SAVE_CALLEE_SAVES + 8)
// t3 := (caller) outs array base address
add t0, t2, t0 // t0 := (callee) &FP[next]
add t2, t2, t3 // t2 := (caller) &OUTS[next]
% store_float_outs_to_vregs(outs="t2", shorty="t1", fp="t0", z0="t3", D="s0", F="s4", J="s5", next=".Lentry_go")
.Lentry_go:
la xIBASE, artNterpAsmInstructionStart
START_EXECUTING_INSTRUCTIONS
// NOTE: no fallthrough
// cfi info continues, and covers the whole nterp implementation.
SIZE ExecuteNterpImpl
%def footer():
/*
* ===========================================================================
* Common subroutines and data
* ===========================================================================
*/
.text
.align 2
// Enclose all code below in a symbol (which gets printed in backtraces).
NAME_START nterp_helper
common_errArrayIndex:
EXPORT_PC
// CALL preserves RA for stack walking.
call art_quick_throw_array_bounds // args a0 (index), a1 (length)
common_errDivideByZero:
EXPORT_PC
// CALL preserves RA for stack walking.
call art_quick_throw_div_zero
common_errNullObject:
EXPORT_PC
// CALL preserves RA for stack walking.
call art_quick_throw_null_pointer_exception
NterpInvokeVirtual:
% nterp_invoke_virtual()
NterpInvokeSuper:
% nterp_invoke_super()
NterpInvokeDirect:
% nterp_invoke_direct()
NterpInvokeStringInit:
% nterp_invoke_string_init()
NterpInvokeStatic:
% nterp_invoke_static()
NterpInvokeInterface:
% nterp_invoke_interface()
NterpInvokePolymorphic:
% nterp_invoke_polymorphic()
NterpInvokeCustom:
% nterp_invoke_custom()
NterpInvokeVirtualRange:
% nterp_invoke_virtual_range()
NterpInvokeSuperRange:
% nterp_invoke_super_range()
NterpInvokeDirectRange:
% nterp_invoke_direct_range()
NterpInvokeStringInitRange:
% nterp_invoke_string_init_range()
NterpInvokeStaticRange:
% nterp_invoke_static_range()
NterpInvokeInterfaceRange:
% nterp_invoke_interface_range()
NterpInvokePolymorphicRange:
% nterp_invoke_polymorphic_range()
NterpInvokeCustomRange:
% nterp_invoke_custom_range()
// Arg a0: ArtMethod*
NterpHandleHotnessOverflow:
CHECK_AND_UPDATE_SHARED_MEMORY_METHOD if_hot=.Lhotspill_hot, if_not_hot=.Lhotspill_suspend
.Lhotspill_hot:
mv a1, xPC
mv a2, xFP
call nterp_hot_method // args a0, a1, a2
bnez a0, .Lhotspill_osr
.Lhotspill_advance:
FETCH_INST
GET_INST_OPCODE t0
GOTO_OPCODE t0
.Lhotspill_osr:
// a0 = OsrData*
// Drop most of the current nterp frame, but keep the callee-saves.
// The nterp callee-saves (count and layout) match the OSR frame's callee-saves.
ld sp, -8(xREFS) // caller's interpreted frame pointer
.cfi_def_cfa sp, NTERP_SIZE_SAVE_CALLEE_SAVES
lwu t0, OSR_DATA_FRAME_SIZE(a0)
addi t0, t0, -NTERP_SIZE_SAVE_CALLEE_SAVES // t0 := osr frame - callee saves, in bytes
mv s7, sp // Remember CFA in a callee-save register.
.cfi_def_cfa_register s7
sub sp, sp, t0 // OSR size guaranteed to be stack aligned (16 bytes).
addi t1, a0, OSR_DATA_MEMORY // t1 := read start
add t1, t1, t0 // t1 := read end (exclusive)
mv t2, s7 // t2 := write end (exclusive)
// t0 >= 8 (OSR places ArtMethod* at bottom of frame), so loop will terminate.
.Lhotspill_osr_copy_loop:
addi t1, t1, -8
ld t3, (t1)
addi t2, t2, -8
sd t3, (t2)
bne t2, sp, .Lhotspill_osr_copy_loop
ld s8, OSR_DATA_NATIVE_PC(a0) // s8 := native PC; jump after free
call free // arg a0; release OsrData*
jr s8 // Jump to the compiled code.
.Lhotspill_suspend:
DO_SUSPEND_CHECK continue=.Lhotspill_advance
j .Lhotspill_advance
// This is the logical end of ExecuteNterpImpl, where the frame info applies.
.cfi_endproc
NterpToNterpInstance:
% nterp_to_nterp_instance()
NterpToNterpStringInit:
% nterp_to_nterp_string_init()
NterpToNterpStatic:
% nterp_to_nterp_static()
NterpToNterpInstanceRange:
% nterp_to_nterp_instance_range()
NterpToNterpStringInitRange:
% nterp_to_nterp_string_init_range()
NterpToNterpStaticRange:
% nterp_to_nterp_static_range()
NAME_END nterp_helper
// EndExecuteNterpImpl includes the methods after .cfi_endproc, as we want the runtime to see them
// as part of the Nterp PCs. This label marks the end of PCs contained by the OatQuickMethodHeader
// created for the interpreter entry point.
.type EndExecuteNterpImpl, @function
.hidden EndExecuteNterpImpl
.global EndExecuteNterpImpl
EndExecuteNterpImpl:
// Entrypoints into runtime.
NTERP_TRAMPOLINE nterp_allocate_object, NterpAllocateObject
NTERP_TRAMPOLINE nterp_filled_new_array, NterpFilledNewArray
NTERP_TRAMPOLINE nterp_filled_new_array_range, NterpFilledNewArrayRange
NTERP_TRAMPOLINE nterp_get_class, NterpGetClass
NTERP_TRAMPOLINE nterp_get_instance_field_offset, NterpGetInstanceFieldOffset
NTERP_TRAMPOLINE nterp_get_method, NterpGetMethod
NTERP_TRAMPOLINE nterp_get_static_field, NterpGetStaticField
NTERP_TRAMPOLINE nterp_hot_method, NterpHotMethod
NTERP_TRAMPOLINE nterp_load_object, NterpLoadObject
ENTRY nterp_deliver_pending_exception
DELIVER_PENDING_EXCEPTION
END nterp_deliver_pending_exception
// gen_mterp.py will inline the following definitions
// within [ExecuteNterpImpl, EndExecuteNterpImpl).
%def instruction_start():
.type artNterpAsmInstructionStart, @function
.hidden artNterpAsmInstructionStart
.global artNterpAsmInstructionStart
artNterpAsmInstructionStart = .L_op_nop
.text
%def instruction_end():
.type artNterpAsmInstructionEnd, @function
.hidden artNterpAsmInstructionEnd
.global artNterpAsmInstructionEnd
artNterpAsmInstructionEnd:
// artNterpAsmInstructionEnd is used as landing pad for exception handling.
// xPC (S3) for the exception handler was set just prior to the long jump coming here.
FETCH_INST
GET_INST_OPCODE t0
GOTO_OPCODE t0
%def opcode_pre():
% pass
%def opcode_name_prefix():
% return "nterp_"
%def opcode_start():
NAME_START nterp_${opcode}
%def opcode_end():
NAME_END nterp_${opcode}
%def opcode_slow_path_start(name):
NAME_START ${name}
%def opcode_slow_path_end(name):
NAME_END ${name}