riscv64: ExecuteNterpImpl basic implementation
Minimal asm to start HelloWorld until we hit the first opcode
(nterp_op_const_4) with SIGILL.
This CL skips argument processing, compact dex, etc.
NOTE that Nterp now claims use x18 (s2) for regular usage,
since the shadow stack register is now assigned at x3 (gp)
from https://android-review.git.corp.google.com/c/platform/art/+/2535780
Test: Run ART test 001-HelloWorld on a Linux RISC-V VM:
lunch aosp_riscv64-userdebug
export ART_TEST_SSH_USER=ubuntu
export ART_TEST_SSH_HOST=localhost
export ART_TEST_SSH_PORT=10001
export ART_TEST_ON_VM=true
. art/tools/buildbot-utils.sh
art/tools/buildbot-build.sh --target
# Create, boot and configure the VM.
art/tools/buildbot-vm.sh create
art/tools/buildbot-vm.sh boot
art/tools/buildbot-vm.sh setup-ssh # password: 'ubuntu'
art/tools/buildbot-cleanup-device.sh
art/tools/buildbot-setup-device.sh
art/tools/buildbot-sync.sh
# Test switch interpreter
art/test.py --target -r --no-prebuild --ndebug --no-image \
--64 --interpreter 001-HelloWorld
# PASS
# Test Nterp (first revert 2547153 in local build)
art/test.py --target -r --no-prebuild --ndebug --no-image \
--64 001-HelloWorld
# SIGILL in nterp_op_const_4
Change-Id: I0d5c6bc83af92f299d22477494332fff6c76197c
diff --git a/runtime/arch/riscv64/asm_support_riscv64.S b/runtime/arch/riscv64/asm_support_riscv64.S
index 71c9928..2c02581 100644
--- a/runtime/arch/riscv64/asm_support_riscv64.S
+++ b/runtime/arch/riscv64/asm_support_riscv64.S
@@ -60,6 +60,23 @@
.endm
+.macro CFI_DEF_CFA_BREG_PLUS_UCONST reg, offset, size
+ .if (((\offset) < -0x40) || ((\offset) >= 0x40))
+ .error "Unsupported offset"
+ .endif
+
+ .if ((\size) < 0)
+ .error "Unsupported size, negative"
+ .elseif ((\size) < 0x80)
+ CFI_DEF_CFA_BREG_PLUS_UCONST_1_1(\reg, \offset, \size)
+ .elseif ((\size) < 0x4000)
+ CFI_DEF_CFA_BREG_PLUS_UCONST_1_2(\reg, \offset, \size)
+ .else
+ .error "Unsupported size, too large"
+ .endif
+.endm
+
+
.macro INCREASE_FRAME frame_adjustment
addi sp, sp, -(\frame_adjustment)
.cfi_adjust_cfa_offset (\frame_adjustment)
@@ -277,9 +294,9 @@
.endm
-.macro SETUP_SAVE_EVERYTHING_FRAME
+.macro SETUP_SAVE_EVERYTHING_FRAME offset
#if (FRAME_SIZE_SAVE_EVERYTHING != 8*(1 + 32 + 27))
-#error "FRAME_SIZE_SAVE_EVERYTHING(ARM64) size not as expected."
+#error "FRAME_SIZE_SAVE_EVERYTHING(RISCV64) size not as expected."
#endif
INCREASE_FRAME FRAME_SIZE_SAVE_EVERYTHING
@@ -297,7 +314,7 @@
SAVE_FPR fs0, 8*9 // f8
SAVE_FPR fs1, 8*10 // f9
#define SAVE_EVERYTHING_FRAME_OFFSET_FA0 (8*11)
- SAVE_FPR fa0, 8*11 // f10, offset must equal SAVE_EVERYTHING_FRAME_OFFSET_FA0
+ SAVE_FPR fa0, 8*11 // f10, its offset must equal SAVE_EVERYTHING_FRAME_OFFSET_FA0
SAVE_FPR fa1, 8*12 // f11
SAVE_FPR fa2, 8*13 // f12
SAVE_FPR fa3, 8*14 // f13
@@ -326,7 +343,7 @@
SAVE_GPR t2, 8*35 // x7
SAVE_GPR s0, 8*36 // x8
#define SAVE_EVERYTHING_FRAME_OFFSET_A0 (8*37)
- SAVE_GPR a0, 8*37 // x10, offset must equal SAVE_EVERYTHING_FRAME_OFFSET_A0
+ SAVE_GPR a0, 8*37 // x10, its offset must equal SAVE_EVERYTHING_FRAME_OFFSET_A0
SAVE_GPR a1, 8*38 // x11
SAVE_GPR a2, 8*39 // x12
SAVE_GPR a3, 8*40 // x13
@@ -351,7 +368,7 @@
SAVE_GPR ra, 8*59 // x1, return address
- SETUP_CALLEE_SAVE_FRAME_COMMON t0, RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET
+ SETUP_CALLEE_SAVE_FRAME_COMMON t0, \offset
.endm
@@ -432,6 +449,46 @@
.endm
+// For compatibility with Runtime::CreateCalleeSaveMethod(kSaveRefsOnly).
+.macro SETUP_SAVE_REFS_ONLY_FRAME
+ // 11 callee-save regs, plus ArtMethod*
+ INCREASE_FRAME FRAME_SIZE_SAVE_REFS_ONLY
+
+ // stack slot (8*0)(sp) is for ArtMethod*
+ SAVE_GPR s0, (8*1) // x8
+ SAVE_GPR s2, (8*2) // x18
+ SAVE_GPR s3, (8*3) // x19
+ SAVE_GPR s4, (8*4) // x20
+ SAVE_GPR s5, (8*5) // x21
+ SAVE_GPR s6, (8*6) // x22
+ SAVE_GPR s7, (8*7) // x23
+ SAVE_GPR s8, (8*8) // x24
+ SAVE_GPR s9, (8*9) // x25
+ SAVE_GPR s10, (8*10) // x26
+ SAVE_GPR s11, (8*11) // x27
+
+ SETUP_CALLEE_SAVE_FRAME_COMMON t0, RUNTIME_SAVE_REFS_ONLY_METHOD_OFFSET
+.endm
+
+
+.macro RESTORE_SAVE_REFS_ONLY_FRAME
+ // stack slot (8*0)(sp) is for ArtMethod*
+ RESTORE_GPR s0, (8*1) // x8
+ RESTORE_GPR s2, (8*2) // x18
+ RESTORE_GPR s3, (8*3) // x19
+ RESTORE_GPR s4, (8*4) // x20
+ RESTORE_GPR s5, (8*5) // x21
+ RESTORE_GPR s6, (8*6) // x22
+ RESTORE_GPR s7, (8*7) // x23
+ RESTORE_GPR s8, (8*8) // x24
+ RESTORE_GPR s9, (8*9) // x25
+ RESTORE_GPR s10, (8*10) // x26
+ RESTORE_GPR s11, (8*11) // x27
+
+ DECREASE_FRAME FRAME_SIZE_SAVE_REFS_ONLY
+.endm
+
+
// Macro that calls through to artDeliverPendingExceptionFromCode, where the pending exception is
// Thread::Current()->exception_ when the runtime method frame is ready.
.macro DELIVER_PENDING_EXCEPTION_FRAME_READY
diff --git a/runtime/arch/riscv64/asm_support_riscv64.h b/runtime/arch/riscv64/asm_support_riscv64.h
index d4c90f6..5510c08 100644
--- a/runtime/arch/riscv64/asm_support_riscv64.h
+++ b/runtime/arch/riscv64/asm_support_riscv64.h
@@ -20,6 +20,8 @@
#include "asm_support.h"
#include "entrypoints/entrypoint_asm_constants.h"
+// S0, S2 - S11, ArtMethod*, total 8*(11 + 1) = 96
+#define FRAME_SIZE_SAVE_REFS_ONLY 96
// FS0 - FS11, S0, S2 - S11, RA, ArtMethod* and padding, total 8*(12 + 11 + 1 + 1 + 1) = 208
#define FRAME_SIZE_SAVE_ALL_CALLEE_SAVES 208
// FA0 - FA7, A1 - A7, S0, S2 - S11, RA and ArtMethod*, total 8*(8 + 7 + 11 + 1 + 1) = 224
diff --git a/runtime/arch/riscv64/quick_entrypoints_riscv64.S b/runtime/arch/riscv64/quick_entrypoints_riscv64.S
index ef9a043..19fa3c3 100644
--- a/runtime/arch/riscv64/quick_entrypoints_riscv64.S
+++ b/runtime/arch/riscv64/quick_entrypoints_riscv64.S
@@ -411,7 +411,8 @@
.extern artMethodExitHook
ENTRY art_quick_method_exit_hook
- SETUP_SAVE_EVERYTHING_FRAME
+ SETUP_SAVE_EVERYTHING_FRAME \
+ RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET
addi a3, sp, SAVE_EVERYTHING_FRAME_OFFSET_FA0 // FP result ptr in kSaveEverything frame
addi a2, sp, SAVE_EVERYTHING_FRAME_OFFSET_A0 // integer result ptr in kSaveEverything frame
@@ -582,6 +583,16 @@
END art_quick_resolution_trampoline
+ENTRY art_quick_test_suspend
+ SETUP_SAVE_EVERYTHING_FRAME \
+ RUNTIME_SAVE_EVERYTHING_FOR_SUSPEND_CHECK_METHOD_OFFSET
+ mv a0, xSELF
+ call artTestSuspendFromCode
+ RESTORE_SAVE_EVERYTHING_FRAME
+ ret
+END
+
+
UNDEFINED art_quick_imt_conflict_trampoline
UNDEFINED art_quick_deoptimize_from_compiled_code
UNDEFINED art_quick_string_builder_append
@@ -776,7 +787,6 @@
UNDEFINED art_quick_invoke_virtual_trampoline_with_access_check
UNDEFINED art_quick_invoke_polymorphic
UNDEFINED art_quick_invoke_custom
-UNDEFINED art_quick_test_suspend
UNDEFINED art_quick_deliver_exception
UNDEFINED art_quick_throw_array_bounds
UNDEFINED art_quick_throw_div_zero
diff --git a/runtime/interpreter/mterp/riscv64/main.S b/runtime/interpreter/mterp/riscv64/main.S
index fa317de..9881d1d 100644
--- a/runtime/interpreter/mterp/riscv64/main.S
+++ b/runtime/interpreter/mterp/riscv64/main.S
@@ -46,14 +46,22 @@
* - callee saved: fs0-fs11
*/
+// Android references
+// Bytecodes: https://source.android.com/docs/core/runtime/dalvik-bytecode
+// Instruction formats: https://source.android.com/docs/core/runtime/instruction-formats
+
// Fixed register usages in Nterp.
// nickname ABI reg purpose
#define xSELF s1 // x9, Thread* self pointer
-#define xFP s3 // x19, interpreted frame pointer: to access locals and args
-#define xPC s4 // x20, interpreted program counter: to fetch instructions
-#define xINST s5 // x21, first 16-bit code unit of current instruction
-#define xIBASE s6 // x22, interpreted instruction base pointer: for computed goto
-#define xREFS s7 // x23, base of object references of dex registers
+#define xFP s2 // x18, interpreted frame pointer: to access locals and args
+#define xPC s3 // x19, interpreted program counter: to fetch instructions
+#define xINST s4 // x20, first 16-bit code unit of current instruction
+#define xIBASE s5 // x21, interpreted instruction base pointer: for computed goto
+#define xREFS s6 // x22, base of object references of dex registers
+
+#define CFI_TMP 10 // DWARF register number for a0/x10
+#define CFI_DEX 19 // DWARF register number for xPC /s3/x19
+#define CFI_REFS 22 // DWARF register number for xREFS/s6/x22
// An assembly entry that has a OatQuickMethodHeader prefix.
.macro OAT_ENTRY name, end
@@ -77,7 +85,7 @@
.type \name, @function
.hidden \name // Hide this as a global symbol, so we do not incur plt calls.
.global \name
- /* XXX Cache alignment for function entry */
+ /* Cache alignment for function entry */
.balign 16
\name:
.endm
@@ -86,26 +94,221 @@
SIZE \name
.endm
+// Macro for defining entrypoints into runtime. We don't need to save registers (we're not holding
+// references there), but there is no kDontSave runtime method. So just use the kSaveRefsOnly
+// runtime method.
+.macro NTERP_TRAMPOLINE name, helper
+ENTRY \name
+ SETUP_SAVE_REFS_ONLY_FRAME
+ jal \helper
+ RESTORE_SAVE_REFS_ONLY_FRAME
+ lw t0, THREAD_EXCEPTION_OFFSET(xSELF)
+ bnez t0, nterp_deliver_pending_exception
+ ret
+END \name
+.endm
+
+// Unpack code items from dex format.
+// Input: \code_item
+// Output:
+// - \registers: register count
+// - \outs: out count
+// - \ins: in count
+// - \code_item: holds instruction array on exit
+// Clobbers: t0
+.macro FETCH_CODE_ITEM_INFO code_item, registers, outs, ins
+ // Check LSB of \code_item. If 1, it's a compact dex file.
+ andi t0, \code_item, 0x1
+ beqz t0, 1f // Regular dex.
+ unimp // Compact dex: unimplemented.
+1:
+ // Unpack values from regular dex format.
+ lhu \registers, CODE_ITEM_REGISTERS_SIZE_OFFSET(\code_item)
+ lhu \outs, CODE_ITEM_OUTS_SIZE_OFFSET(\code_item)
+ lhu \ins, CODE_ITEM_INS_SIZE_OFFSET(\code_item)
+ addi \code_item, \code_item, CODE_ITEM_INSNS_OFFSET
+.endm
+
+// Set up the stack to start executing the method.
+// See runtime/nterp_helpers.cc for a diagram of the setup.
+// Input:
+// - a0: ArtMethod*
+// - sp
+// - \code_item: CodeItem*
+// - \cfi_refs: DWARF register number for \refs
+// Output:
+// - \code_item: pointer to instruction array `insns_*` on exit
+// - \refs: pointer to obj reference array
+// - \fp: pointer to dex register array
+// - \regs: count of dex registers
+// - \ins: count of in-registers
+// - \old_sp: old stack pointer
+// - sp modified
+//
+// Clobbers: t0, t1
+.macro SETUP_STACK_FRAME code_item, cfi_refs, refs, fp, regs, ins, old_sp
+ FETCH_CODE_ITEM_INFO \code_item, \regs, /*out count*/ t1, \ins
+
+ // Compute required frame size: ((2 * \regs) + t1) * 4 + 24
+ // - The register array and reference array are each \regs in length.
+ // - The out array is t1 in length.
+ // - Each register is 4 bytes.
+ // - Additional 24 bytes for 3 fields: saved frame pointer, dex pc, and ArtMethod*.
+ sll t0, \regs, 1
+ add t0, t0, t1
+ sll t0, t0, 2
+ add t0, t0, 24
+
+ // Compute new stack pointer in t0.
+ sub t0, sp, t0
+ // 16-byte alignment.
+ andi t0, t0, ~0xF
+
+ // Set \refs to base of reference array. Align to pointer size for the frame pointer and dex pc
+ // pointer, below the reference array.
+ sll t1, t1, 2 // 4 bytes per entry.
+ add t1, t0, t1
+ add t1, t1, 28 // 24 bytes from 3 fields mentioned earlier, plus 4 for alignment slack.
+ andi \refs, t1, -__SIZEOF_POINTER__
+
+ // Set \fp to base of register array, above the reference array. This region is already aligned.
+ sll t1, \regs, 2
+ add \fp, \refs, t1
+
+ // Set up the stack pointer.
+ mv \old_sp, sp
+ .cfi_def_cfa_register \old_sp
+ mv sp, t0
+ sw \old_sp, -8(\refs)
+ CFI_DEF_CFA_BREG_PLUS_UCONST \cfi_refs, -8, FRAME_SIZE_SAVE_ALL_CALLEE_SAVES
+
+ // Put nulls in reference array.
+ beqz \regs, 2f
+ mv t1, \refs // t1 as iterator
+1:
+ // Write in 8-byte increments, so vreg(0) gets zero'ed too, if \regs is odd.
+ sd zero, (t1)
+ addi t1, t1, 8
+ bltu t1, \fp, 1b
+2:
+ // Save the ArtMethod*.
+ sd a0, (sp)
+.endm
+
+.macro EXPORT_PC
+ sd xPC, -16(xREFS)
+.endm
+
+.macro DO_SUSPEND_CHECK continue
+ lw t0, THREAD_FLAGS_OFFSET(xSELF)
+ andi t0, t0, THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
+ beqz t0, \continue
+ EXPORT_PC
+ jal art_quick_test_suspend
+.endm
+
+// Fetch the next instruction, from xPC into xINST.
+// Does not advance xPC.
+.macro FETCH_INST
+ lhu xINST, (xPC) // zero in upper 48 bits
+.endm
+
+// Uses: \reg
+.macro GET_INST_OPCODE reg
+ and \reg, xINST, 0xFF
+.endm
+
+// Clobbers: \reg
+.macro GOTO_OPCODE reg
+ slliw \reg, \reg, ${handler_size_bits}
+ add \reg, xIBASE, \reg
+ jr \reg
+.endm
+
+// Inputs:
+// - a0
+// - xSELF
+// Clobbers: t0
+.macro CHECK_AND_UPDATE_SHARED_MEMORY_METHOD if_hot, if_not_hot
+ lw t0, ART_METHOD_ACCESS_FLAGS_OFFSET(a0)
+ // Send flag bit to MSB, branch if bit is unset.
+ sll t0, t0, 63 - ART_METHOD_IS_MEMORY_SHARED_FLAG_BIT
+ bgez t0, \if_hot
+
+ lw t0, THREAD_SHARED_METHOD_HOTNESS_OFFSET(xSELF)
+ beqz t0, \if_hot
+
+ addi t0, t0, -1 // Reduce hotness
+ sw t0, THREAD_SHARED_METHOD_HOTNESS_OFFSET(xSELF)
+ j \if_not_hot
+.endm
+
+// Increase method hotness before starting the method.
+// Clobbers: a0, t0
+.macro START_EXECUTING_INSTRUCTIONS
+ ld a0, (sp)
+ lhu t0, ART_METHOD_HOTNESS_COUNT_OFFSET(a0)
+#if (NTERP_HOTNESS_VALUE != 0)
+#error Expected 0 for hotness value
+#endif
+ // If the counter is at zero, handle it in the runtime.
+ beqz t0, 3f
+ addi t0, t0, -1
+ sh t0, ART_METHOD_HOTNESS_COUNT_OFFSET(a0)
+1:
+ DO_SUSPEND_CHECK continue=2f
+2:
+ FETCH_INST
+ GET_INST_OPCODE t0
+ GOTO_OPCODE t0
+3:
+ CHECK_AND_UPDATE_SHARED_MEMORY_METHOD if_hot=4f, if_not_hot=1b
+4:
+ mv a1, zero // dex_pc_ptr=nullptr
+ mv a2, zero // vergs=nullptr
+ jal nterp_hot_method
+ j 2b
+.endm
+
%def entry():
/*
* ArtMethod entry point.
*
* On entry:
- * XXX ArtMethod* callee
- * rest method parameters
+ * a0 ArtMethod* callee
+ * a1-a7 method parameters
*/
OAT_ENTRY ExecuteNterpWithClinitImpl, EndExecuteNterpWithClinitImpl
- // For simplicity, we don't do a read barrier here, but instead rely
- // on art_quick_resolution_trampoline to always have a suspend point before
- // calling back here.
unimp
EndExecuteNterpWithClinitImpl:
OAT_ENTRY ExecuteNterpImpl, EndExecuteNterpImpl
.cfi_startproc
+
+ // Check guard page for stack overflow.
+ li t0, -STACK_OVERFLOW_RESERVED_BYTES
+ add t0, t0, sp
+ ld zero, (t0)
+
+ INCREASE_FRAME FRAME_SIZE_SAVE_ALL_CALLEE_SAVES
+ SAVE_ALL_CALLEE_SAVES
+
+ ld xPC, ART_METHOD_DATA_OFFSET_64(a0)
+ SETUP_STACK_FRAME xPC, CFI_REFS, xREFS, xFP, /*reg count*/ s7, /*in count*/s8, /*old sp*/s9
+
+ beqz s8, .Lsetup_execution // no args
unimp
+.Lsetup_execution:
+ CFI_DEFINE_DEX_PC_WITH_OFFSET(/*tmpReg*/CFI_TMP, /*dexReg*/CFI_DEX, /*dexOffset*/0)
+
+ la xIBASE, artNterpAsmInstructionStart
+ START_EXECUTING_INSTRUCTIONS
+ // NOTE: no fallthrough
+ // cfi info continues, and covers the whole nterp implementation.
+ SIZE ExecuteNterpImpl
+
%def fetch_from_thread_cache(dest_reg, miss_label):
%def footer():
@@ -122,18 +325,24 @@
// Enclose all code below in a symbol (which gets printed in backtraces).
NAME_START nterp_helper
// This is the logical end of ExecuteNterpImpl, where the frame info applies.
-// EndExecuteNterpImpl includes the methods below as we want the runtime to
-// see them as part of the Nterp PCs.
.cfi_endproc
NAME_END nterp_helper
-// This is the end of PCs contained by the OatQuickMethodHeader created for the interpreter
-// entry point.
+// EndExecuteNterpImpl includes the methods after .cfi_endproc, as we want the runtime to see them
+// as part of the Nterp PCs. This label marks the end of PCs contained by the OatQuickMethodHeader
+// created for the interpreter entry point.
.type EndExecuteNterpImpl, @function
.hidden EndExecuteNterpImpl
.global EndExecuteNterpImpl
EndExecuteNterpImpl:
+// Entrypoints into runtime.
+NTERP_TRAMPOLINE nterp_hot_method, NterpHotMethod
+
+ENTRY nterp_deliver_pending_exception
+ DELIVER_PENDING_EXCEPTION
+END nterp_deliver_pending_exception
+
// gen_mterp.py will inline the following definitions
// within [ExecuteNterpImpl, EndExecuteNterpImpl).
%def instruction_start():