Add an arm32 port of nterp.
Currently using arm32, will try thumb once this CL lands to compare
performance.
Test: test.py, run-libcore-tests, device boots
Bug: 112676029
Change-Id: I6535e2982a3ceed83eba6664fc8ba8609974bc08
diff --git a/runtime/Android.bp b/runtime/Android.bp
index 393c2cb..9792442 100644
--- a/runtime/Android.bp
+++ b/runtime/Android.bp
@@ -282,8 +282,9 @@
arm: {
srcs: [
"interpreter/mterp/mterp.cc",
- "interpreter/mterp/nterp_stub.cc",
+ "interpreter/mterp/nterp.cc",
":libart_mterp.arm",
+ ":libart_mterp.armng",
"arch/arm/context_arm.cc",
"arch/arm/entrypoints_init_arm.cc",
"arch/arm/instruction_set_features_assembly_tests.S",
@@ -843,3 +844,16 @@
],
cmd: "$(location interpreter/mterp/gen_mterp.py) $(out) $(in)",
}
+
+genrule {
+ name: "libart_mterp.armng",
+ out: ["mterp_armng.S"],
+ srcs: [
+ "interpreter/mterp/armng/*.S",
+ ],
+ tool_files: [
+ "interpreter/mterp/gen_mterp.py",
+ "interpreter/mterp/common/gen_setup.py",
+ ],
+ cmd: "$(location interpreter/mterp/gen_mterp.py) $(out) $(in)",
+}
diff --git a/runtime/arch/arm/asm_support_arm.S b/runtime/arch/arm/asm_support_arm.S
index 7ffdf18..dd48d1d 100644
--- a/runtime/arch/arm/asm_support_arm.S
+++ b/runtime/arch/arm/asm_support_arm.S
@@ -36,6 +36,7 @@
.syntax unified
.arch armv7-a
+.arch_extension idiv
.thumb
.macro CFI_EXPRESSION_BREG n, b, offset
@@ -48,6 +49,22 @@
.endif
.endm
+.macro CFI_DEF_CFA_BREG_PLUS_UCONST reg, offset, size
+ .if ((\size) < 0)
+ .error "Size should be positive"
+ .endif
+ .if (((\offset) < -0x40) || ((\offset) >= 0x40))
+ .error "Unsupported offset"
+ .endif
+ .if ((\size) < 0x80)
+ CFI_DEF_CFA_BREG_PLUS_UCONST_1_1(\reg, \offset, \size)
+ .elseif ((\size) < 0x4000)
+ CFI_DEF_CFA_BREG_PLUS_UCONST_1_2(\reg, \offset, \size)
+ .else
+ .error "Unsupported size"
+ .endif
+.endm
+
// Macro to generate the value of Runtime::Current into rDest. As it uses labels
// then the labels need to be unique. We bind these to the function name in the ENTRY macros.
.macro RUNTIME_CURRENT name, num, rDest
@@ -290,4 +307,73 @@
DELIVER_PENDING_EXCEPTION_FRAME_READY
.endm
+.macro RETURN_OR_DELIVER_PENDING_EXCEPTION_REG reg
+ ldr \reg, [rSELF, #THREAD_EXCEPTION_OFFSET] @ Get exception field.
+ cbnz \reg, 1f
+ bx lr
+1:
+ DELIVER_PENDING_EXCEPTION
+.endm
+
+.macro RETURN_OR_DELIVER_PENDING_EXCEPTION_R1
+ RETURN_OR_DELIVER_PENDING_EXCEPTION_REG r1
+.endm
+
+.macro RETURN_OR_DELIVER_PENDING_EXCEPTION
+ ldr ip, [rSELF, #THREAD_EXCEPTION_OFFSET] @ Get exception field.
+ cmp ip, #0
+ bne 1f
+ bx lr
+1:
+ DELIVER_PENDING_EXCEPTION
+.endm
+
+ /*
+ * Macro that sets up the callee save frame to conform with
+ * Runtime::CreateCalleeSaveMethod(kSaveRefsOnly).
+ */
+.macro SETUP_SAVE_REFS_ONLY_FRAME rTemp
+ // Note: We could avoid saving R8 in the case of Baker read
+ // barriers, as it is overwritten by REFRESH_MARKING_REGISTER
+ // later; but it's not worth handling this special case.
+ push {r5-r8, r10-r11, lr} @ 7 words of callee saves
+ .cfi_adjust_cfa_offset 28
+ .cfi_rel_offset r5, 0
+ .cfi_rel_offset r6, 4
+ .cfi_rel_offset r7, 8
+ .cfi_rel_offset r8, 12
+ .cfi_rel_offset r10, 16
+ .cfi_rel_offset r11, 20
+ .cfi_rel_offset lr, 24
+ sub sp, #4 @ bottom word will hold Method*
+ .cfi_adjust_cfa_offset 4
+ RUNTIME_CURRENT2 \rTemp @ Load Runtime::Current into rTemp.
+ @ Load kSaveRefsOnly Method* into rTemp.
+ ldr \rTemp, [\rTemp, #RUNTIME_SAVE_REFS_ONLY_METHOD_OFFSET]
+ str \rTemp, [sp, #0] @ Place Method* at bottom of stack.
+ str sp, [rSELF, #THREAD_TOP_QUICK_FRAME_OFFSET] @ Place sp in Thread::Current()->top_quick_frame.
+
+ // Ugly compile-time check, but we only have the preprocessor.
+#if (FRAME_SIZE_SAVE_REFS_ONLY != 28 + 4)
+#error "FRAME_SIZE_SAVE_REFS_ONLY(ARM) size not as expected."
+#endif
+.endm
+
+.macro RESTORE_SAVE_REFS_ONLY_FRAME
+ add sp, #4 @ bottom word holds Method*
+ .cfi_adjust_cfa_offset -4
+ // Note: Likewise, we could avoid restoring R8 in the case of Baker
+ // read barriers, as it is overwritten by REFRESH_MARKING_REGISTER
+ // later; but it's not worth handling this special case.
+ pop {r5-r8, r10-r11, lr} @ 7 words of callee saves
+ .cfi_restore r5
+ .cfi_restore r6
+ .cfi_restore r7
+ .cfi_restore r8
+ .cfi_restore r10
+ .cfi_restore r11
+ .cfi_restore lr
+ .cfi_adjust_cfa_offset -28
+.endm
+
#endif // ART_RUNTIME_ARCH_X86_ASM_SUPPORT_X86_S_
diff --git a/runtime/arch/arm/context_arm.h b/runtime/arch/arm/context_arm.h
index 845cdaa..006939c 100644
--- a/runtime/arch/arm/context_arm.h
+++ b/runtime/arch/arm/context_arm.h
@@ -46,6 +46,10 @@
SetGPR(PC, new_pc);
}
+ void SetNterpDexPC(uintptr_t dex_pc_ptr) override {
+ SetGPR(R11, dex_pc_ptr);
+ }
+
void SetArg0(uintptr_t new_arg0_value) override {
SetGPR(R0, new_arg0_value);
}
diff --git a/runtime/arch/arm/fault_handler_arm.cc b/runtime/arch/arm/fault_handler_arm.cc
index 4e7d64c..7bd402f 100644
--- a/runtime/arch/arm/fault_handler_arm.cc
+++ b/runtime/arch/arm/fault_handler_arm.cc
@@ -107,14 +107,17 @@
struct ucontext *uc = reinterpret_cast<struct ucontext*>(context);
struct sigcontext *sc = reinterpret_cast<struct sigcontext*>(&uc->uc_mcontext);
uint8_t* ptr = reinterpret_cast<uint8_t*>(sc->arm_pc);
- uint32_t instr_size = GetInstructionSize(ptr);
- uintptr_t gc_map_location = (sc->arm_pc + instr_size) | 1;
+ bool in_thumb_mode = sc->arm_cpsr & (1 << 5);
+ uint32_t instr_size = in_thumb_mode ? GetInstructionSize(ptr) : 4;
+ uintptr_t gc_map_location = (sc->arm_pc + instr_size) | (in_thumb_mode ? 1 : 0);
// Push the gc map location to the stack and pass the fault address in LR.
sc->arm_sp -= sizeof(uintptr_t);
*reinterpret_cast<uintptr_t*>(sc->arm_sp) = gc_map_location;
sc->arm_lr = reinterpret_cast<uintptr_t>(info->si_addr);
sc->arm_pc = reinterpret_cast<uintptr_t>(art_quick_throw_null_pointer_exception_from_signal);
+ // Make sure the thumb bit is set as the handler is in thumb mode.
+ sc->arm_cpsr = sc->arm_cpsr | (1 << 5);
// Pass the faulting address as the first argument of
// art_quick_throw_null_pointer_exception_from_signal.
VLOG(signals) << "Generating null pointer exception";
@@ -231,6 +234,9 @@
// the function to which this handler returns (art_quick_throw_stack_overflow).
sc->arm_pc = reinterpret_cast<uintptr_t>(art_quick_throw_stack_overflow);
+ // Make sure the thumb bit is set as the handler is in thumb mode.
+ sc->arm_cpsr = sc->arm_cpsr | (1 << 5);
+
// The kernel will now return to the address in sc->arm_pc.
return true;
}
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index fae2b97..964f2ae 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -24,54 +24,6 @@
/* Deliver an exception pending on a thread */
.extern artDeliverPendingException
- /*
- * Macro that sets up the callee save frame to conform with
- * Runtime::CreateCalleeSaveMethod(kSaveRefsOnly).
- */
-.macro SETUP_SAVE_REFS_ONLY_FRAME rTemp
- // Note: We could avoid saving R8 in the case of Baker read
- // barriers, as it is overwritten by REFRESH_MARKING_REGISTER
- // later; but it's not worth handling this special case.
- push {r5-r8, r10-r11, lr} @ 7 words of callee saves
- .cfi_adjust_cfa_offset 28
- .cfi_rel_offset r5, 0
- .cfi_rel_offset r6, 4
- .cfi_rel_offset r7, 8
- .cfi_rel_offset r8, 12
- .cfi_rel_offset r10, 16
- .cfi_rel_offset r11, 20
- .cfi_rel_offset lr, 24
- sub sp, #4 @ bottom word will hold Method*
- .cfi_adjust_cfa_offset 4
- RUNTIME_CURRENT2 \rTemp @ Load Runtime::Current into rTemp.
- @ Load kSaveRefsOnly Method* into rTemp.
- ldr \rTemp, [\rTemp, #RUNTIME_SAVE_REFS_ONLY_METHOD_OFFSET]
- str \rTemp, [sp, #0] @ Place Method* at bottom of stack.
- str sp, [rSELF, #THREAD_TOP_QUICK_FRAME_OFFSET] @ Place sp in Thread::Current()->top_quick_frame.
-
- // Ugly compile-time check, but we only have the preprocessor.
-#if (FRAME_SIZE_SAVE_REFS_ONLY != 28 + 4)
-#error "FRAME_SIZE_SAVE_REFS_ONLY(ARM) size not as expected."
-#endif
-.endm
-
-.macro RESTORE_SAVE_REFS_ONLY_FRAME
- add sp, #4 @ bottom word holds Method*
- .cfi_adjust_cfa_offset -4
- // Note: Likewise, we could avoid restoring R8 in the case of Baker
- // read barriers, as it is overwritten by REFRESH_MARKING_REGISTER
- // later; but it's not worth handling this special case.
- pop {r5-r8, r10-r11, lr} @ 7 words of callee saves
- .cfi_restore r5
- .cfi_restore r6
- .cfi_restore r7
- .cfi_restore r8
- .cfi_restore r10
- .cfi_restore r11
- .cfi_restore lr
- .cfi_adjust_cfa_offset -28
-.endm
-
.macro SETUP_SAVE_REFS_AND_ARGS_FRAME rTemp
SETUP_SAVE_REFS_AND_ARGS_FRAME_REGISTERS_ONLY
RUNTIME_CURRENT3 \rTemp @ Load Runtime::Current into rTemp.
@@ -230,18 +182,6 @@
END \c_name
.endm
-.macro RETURN_OR_DELIVER_PENDING_EXCEPTION_REG reg
- ldr \reg, [rSELF, #THREAD_EXCEPTION_OFFSET] @ Get exception field.
- cbnz \reg, 1f
- bx lr
-1:
- DELIVER_PENDING_EXCEPTION
-.endm
-
-.macro RETURN_OR_DELIVER_PENDING_EXCEPTION_R1
- RETURN_OR_DELIVER_PENDING_EXCEPTION_REG r1
-.endm
-
.macro RETURN_IF_RESULT_IS_ZERO_OR_DELIVER
RETURN_IF_RESULT_IS_ZERO
DELIVER_PENDING_EXCEPTION
@@ -1316,10 +1256,6 @@
.endm
.macro COMPUTE_ARRAY_SIZE_UNKNOWN slow_path
- bkpt // We should never enter here.
- // Code below is for reference.
- // Possibly a large object, go slow.
- // Also does negative array size check.
movw r2, #((MIN_LARGE_OBJECT_THRESHOLD - MIRROR_WIDE_ARRAY_DATA_OFFSET) / 8)
cmp r1, r2
bhi \slow_path
@@ -1387,8 +1323,6 @@
add r2, r2, #(MIRROR_WIDE_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK)
.endm
-// TODO(ngeoffray): art_quick_alloc_array_resolved_region_tlab is not used for arm, remove
-// the entrypoint once all backends have been updated to use the size variants.
GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_UNKNOWN
GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved8_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_8
GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved16_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_16
diff --git a/runtime/interpreter/mterp/armng/arithmetic.S b/runtime/interpreter/mterp/armng/arithmetic.S
new file mode 100644
index 0000000..1cec598
--- /dev/null
+++ b/runtime/interpreter/mterp/armng/arithmetic.S
@@ -0,0 +1,986 @@
+%def binop(preinstr="", result="r0", chkzero="0", instr=""):
+ /*
+ * Generic 32-bit binary operation. Provide an "instr" line that
+ * specifies an instruction that performs "result = r0 op r1".
+ * This could be an ARM instruction or a function call. (If the result
+ * comes back in a register other than r0, you can override "result".)
+ *
+ * If "chkzero" is set to 1, we perform a divide-by-zero check on
+ * vCC (r1). Useful for integer division and modulus. Note that we
+ * *don't* check for (INT_MIN / -1) here, because the ARM math lib
+ * handles it correctly.
+ *
+ * For: add-int, sub-int, mul-int, div-int, rem-int, and-int, or-int,
+ * xor-int, shl-int, shr-int, ushr-int, add-float, sub-float,
+ * mul-float, div-float, rem-float
+ */
+ /* binop vAA, vBB, vCC */
+ FETCH r0, 1 @ r0<- CCBB
+ mov r4, rINST, lsr #8 @ r4<- AA
+ mov r3, r0, lsr #8 @ r3<- CC
+ and r2, r0, #255 @ r2<- BB
+ GET_VREG r1, r3 @ r1<- vCC
+ GET_VREG r0, r2 @ r0<- vBB
+ .if $chkzero
+ cmp r1, #0 @ is second operand zero?
+ beq common_errDivideByZero
+ .endif
+
+ FETCH_ADVANCE_INST 2 @ advance rPC, load rINST
+ $preinstr @ optional op; may set condition codes
+ $instr @ $result<- op, r0-r3 changed
+ GET_INST_OPCODE ip @ extract opcode from rINST
+ SET_VREG $result, r4 @ vAA<- $result
+ GOTO_OPCODE ip @ jump to next instruction
+ /* 11-14 instructions */
+
+%def binop2addr(preinstr="", result="r0", chkzero="0", instr=""):
+ /*
+ * Generic 32-bit "/2addr" binary operation. Provide an "instr" line
+ * that specifies an instruction that performs "result = r0 op r1".
+ * This could be an ARM instruction or a function call. (If the result
+ * comes back in a register other than r0, you can override "result".)
+ *
+ * If "chkzero" is set to 1, we perform a divide-by-zero check on
+ * vCC (r1). Useful for integer division and modulus.
+ *
+ * For: add-int/2addr, sub-int/2addr, mul-int/2addr, div-int/2addr,
+ * rem-int/2addr, and-int/2addr, or-int/2addr, xor-int/2addr,
+ * shl-int/2addr, shr-int/2addr, ushr-int/2addr, add-float/2addr,
+ * sub-float/2addr, mul-float/2addr, div-float/2addr, rem-float/2addr
+ */
+ /* binop/2addr vA, vB */
+ mov r3, rINST, lsr #12 @ r3<- B
+ ubfx r4, rINST, #8, #4 @ r4<- A
+ GET_VREG r1, r3 @ r1<- vB
+ GET_VREG r0, r4 @ r0<- vA
+ .if $chkzero
+ cmp r1, #0 @ is second operand zero?
+ beq common_errDivideByZero
+ .endif
+ FETCH_ADVANCE_INST 1 @ advance rPC, load rINST
+
+ $preinstr @ optional op; may set condition codes
+ $instr @ $result<- op, r0-r3 changed
+ GET_INST_OPCODE ip @ extract opcode from rINST
+ SET_VREG $result, r4 @ vAA<- $result
+ GOTO_OPCODE ip @ jump to next instruction
+ /* 10-13 instructions */
+
+%def binopLit16(result="r0", chkzero="0", instr=""):
+ /*
+ * Generic 32-bit "lit16" binary operation. Provide an "instr" line
+ * that specifies an instruction that performs "result = r0 op r1".
+ * This could be an ARM instruction or a function call. (If the result
+ * comes back in a register other than r0, you can override "result".)
+ *
+ * If "chkzero" is set to 1, we perform a divide-by-zero check on
+ * vCC (r1). Useful for integer division and modulus.
+ *
+ * For: add-int/lit16, rsub-int, mul-int/lit16, div-int/lit16,
+ * rem-int/lit16, and-int/lit16, or-int/lit16, xor-int/lit16
+ */
+ /* binop/lit16 vA, vB, #+CCCC */
+ FETCH_S r1, 1 @ r1<- ssssCCCC (sign-extended)
+ mov r2, rINST, lsr #12 @ r2<- B
+ ubfx r4, rINST, #8, #4 @ r4<- A
+ GET_VREG r0, r2 @ r0<- vB
+ .if $chkzero
+ cmp r1, #0 @ is second operand zero?
+ beq common_errDivideByZero
+ .endif
+ FETCH_ADVANCE_INST 2 @ advance rPC, load rINST
+
+ $instr @ $result<- op, r0-r3 changed
+ GET_INST_OPCODE ip @ extract opcode from rINST
+ SET_VREG $result, r4 @ vAA<- $result
+ GOTO_OPCODE ip @ jump to next instruction
+ /* 10-13 instructions */
+
+%def binopLit8(extract="asr r1, r3, #8", result="r0", chkzero="0", instr=""):
+ /*
+ * Generic 32-bit "lit8" binary operation. Provide an "instr" line
+ * that specifies an instruction that performs "result = r0 op r1".
+ * This could be an ARM instruction or a function call. (If the result
+ * comes back in a register other than r0, you can override "result".)
+ *
+ * You can override "extract" if the extraction of the literal value
+ * from r3 to r1 is not the default "asr r1, r3, #8". The extraction
+ * can be omitted completely if the shift is embedded in "instr".
+ *
+ * If "chkzero" is set to 1, we perform a divide-by-zero check on
+ * vCC (r1). Useful for integer division and modulus.
+ *
+ * For: add-int/lit8, rsub-int/lit8, mul-int/lit8, div-int/lit8,
+ * rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
+ * shl-int/lit8, shr-int/lit8, ushr-int/lit8
+ */
+ /* binop/lit8 vAA, vBB, #+CC */
+ FETCH_S r3, 1 @ r3<- ssssCCBB (sign-extended for CC)
+ mov r4, rINST, lsr #8 @ r4<- AA
+ and r2, r3, #255 @ r2<- BB
+ GET_VREG r0, r2 @ r0<- vBB
+ $extract @ optional; typically r1<- ssssssCC (sign extended)
+ .if $chkzero
+ @cmp r1, #0 @ is second operand zero?
+ beq common_errDivideByZero
+ .endif
+ FETCH_ADVANCE_INST 2 @ advance rPC, load rINST
+
+ $instr @ $result<- op, r0-r3 changed
+ GET_INST_OPCODE ip @ extract opcode from rINST
+ SET_VREG $result, r4 @ vAA<- $result
+ GOTO_OPCODE ip @ jump to next instruction
+ /* 10-12 instructions */
+
+%def binopWide(preinstr="", result0="r0", result1="r1", chkzero="0", instr=""):
+ /*
+ * Generic 64-bit binary operation. Provide an "instr" line that
+ * specifies an instruction that performs "result = r0-r1 op r2-r3".
+ * This could be an ARM instruction or a function call. (If the result
+ * comes back in a register other than r0, you can override "result".)
+ *
+ * If "chkzero" is set to 1, we perform a divide-by-zero check on
+ * vCC (r1). Useful for integer division and modulus.
+ *
+ * for: add-long, sub-long, div-long, rem-long, and-long, or-long,
+ * xor-long, add-double, sub-double, mul-double, div-double,
+ * rem-double
+ *
+ * IMPORTANT: you may specify "chkzero" or "preinstr" but not both.
+ */
+ /* binop vAA, vBB, vCC */
+ FETCH r0, 1 @ r0<- CCBB
+ mov rINST, rINST, lsr #8 @ rINST<- AA
+ and r2, r0, #255 @ r2<- BB
+ mov r3, r0, lsr #8 @ r3<- CC
+ VREG_INDEX_TO_ADDR r4, rINST @ r4<- &fp[AA]
+ VREG_INDEX_TO_ADDR r2, r2 @ r2<- &fp[BB]
+ VREG_INDEX_TO_ADDR r3, r3 @ r3<- &fp[CC]
+ GET_VREG_WIDE_BY_ADDR r0, r1, r2 @ r0/r1<- vBB/vBB+1
+ GET_VREG_WIDE_BY_ADDR r2, r3, r3 @ r2/r3<- vCC/vCC+1
+ .if $chkzero
+ orrs ip, r2, r3 @ second arg (r2-r3) is zero?
+ beq common_errDivideByZero
+ .endif
+ CLEAR_SHADOW_PAIR rINST, lr, ip @ Zero out the shadow regs
+ FETCH_ADVANCE_INST 2 @ advance rPC, load rINST
+ $preinstr @ optional op; may set condition codes
+ $instr @ result<- op, r0-r3 changed
+ GET_INST_OPCODE ip @ extract opcode from rINST
+ SET_VREG_WIDE_BY_ADDR $result0,$result1,r4 @ vAA/vAA+1<, $result0/$result1
+ GOTO_OPCODE ip @ jump to next instruction
+ /* 14-17 instructions */
+
+%def binopWide2addr(preinstr="", result0="r0", result1="r1", chkzero="0", instr=""):
+ /*
+ * Generic 64-bit "/2addr" binary operation. Provide an "instr" line
+ * that specifies an instruction that performs "result = r0-r1 op r2-r3".
+ * This could be an ARM instruction or a function call. (If the result
+ * comes back in a register other than r0, you can override "result".)
+ *
+ * If "chkzero" is set to 1, we perform a divide-by-zero check on
+ * vCC (r1). Useful for integer division and modulus.
+ *
+ * For: add-long/2addr, sub-long/2addr, div-long/2addr, rem-long/2addr,
+ * and-long/2addr, or-long/2addr, xor-long/2addr, add-double/2addr,
+ * sub-double/2addr, mul-double/2addr, div-double/2addr,
+ * rem-double/2addr
+ */
+ /* binop/2addr vA, vB */
+ mov r1, rINST, lsr #12 @ r1<- B
+ ubfx rINST, rINST, #8, #4 @ rINST<- A
+ VREG_INDEX_TO_ADDR r1, r1 @ r1<- &fp[B]
+ VREG_INDEX_TO_ADDR r4, rINST @ r4<- &fp[A]
+ GET_VREG_WIDE_BY_ADDR r2, r3, r1 @ r2/r3<- vBB/vBB+1
+ GET_VREG_WIDE_BY_ADDR r0, r1, r4 @ r0/r1<- vAA/vAA+1
+ .if $chkzero
+ orrs ip, r2, r3 @ second arg (r2-r3) is zero?
+ beq common_errDivideByZero
+ .endif
+ CLEAR_SHADOW_PAIR rINST, ip, lr @ Zero shadow regs
+ FETCH_ADVANCE_INST 1 @ advance rPC, load rINST
+ $preinstr @ optional op; may set condition codes
+ $instr @ result<- op, r0-r3 changed
+ GET_INST_OPCODE ip @ extract opcode from rINST
+ SET_VREG_WIDE_BY_ADDR $result0,$result1,r4 @ vAA/vAA+1<- $result0/$result1
+ GOTO_OPCODE ip @ jump to next instruction
+ /* 12-15 instructions */
+
+%def unop(preinstr="", instr=""):
+ /*
+ * Generic 32-bit unary operation. Provide an "instr" line that
+ * specifies an instruction that performs "result = op r0".
+ * This could be an ARM instruction or a function call.
+ *
+ * for: neg-int, not-int, neg-float, int-to-float, float-to-int,
+ * int-to-byte, int-to-char, int-to-short
+ */
+ /* unop vA, vB */
+ mov r3, rINST, lsr #12 @ r3<- B
+ ubfx r4, rINST, #8, #4 @ r4<- A
+ GET_VREG r0, r3 @ r0<- vB
+ $preinstr @ optional op; may set condition codes
+ FETCH_ADVANCE_INST 1 @ advance rPC, load rINST
+ $instr @ r0<- op, r0-r3 changed
+ GET_INST_OPCODE ip @ extract opcode from rINST
+ SET_VREG r0, r4 @ vAA<- r0
+ GOTO_OPCODE ip @ jump to next instruction
+ /* 8-9 instructions */
+
+%def unopNarrower(preinstr="", instr=""):
+ /*
+ * Generic 64bit-to-32bit unary operation. Provide an "instr" line
+ * that specifies an instruction that performs "result = op r0/r1", where
+ * "result" is a 32-bit quantity in r0.
+ *
+ * For: long-to-float
+ *
+ * (This would work for long-to-int, but that instruction is actually
+ * an exact match for op_move.)
+ */
+ /* unop vA, vB */
+ mov r3, rINST, lsr #12 @ r3<- B
+ ubfx r4, rINST, #8, #4 @ r4<- A
+ VREG_INDEX_TO_ADDR r3, r3 @ r3<- &fp[B]
+ GET_VREG_WIDE_BY_ADDR r0, r1, r3 @ r0/r1<- vB/vB+1
+ FETCH_ADVANCE_INST 1 @ advance rPC, load rINST
+ $preinstr @ optional op; may set condition codes
+ $instr @ r0<- op, r0-r3 changed
+ GET_INST_OPCODE ip @ extract opcode from rINST
+ SET_VREG r0, r4 @ vA<- r0
+ GOTO_OPCODE ip @ jump to next instruction
+ /* 9-10 instructions */
+
+%def unopWide(preinstr="", instr=""):
+ /*
+ * Generic 64-bit unary operation. Provide an "instr" line that
+ * specifies an instruction that performs "result = op r0/r1".
+ * This could be an ARM instruction or a function call.
+ *
+ * For: neg-long, not-long, neg-double, long-to-double, double-to-long
+ */
+ /* unop vA, vB */
+ mov r3, rINST, lsr #12 @ r3<- B
+ ubfx rINST, rINST, #8, #4 @ rINST<- A
+ VREG_INDEX_TO_ADDR r3, r3 @ r3<- &fp[B]
+ VREG_INDEX_TO_ADDR r4, rINST @ r4<- &fp[A]
+ GET_VREG_WIDE_BY_ADDR r0, r1, r3 @ r0/r1<- vAA
+ CLEAR_SHADOW_PAIR rINST, ip, lr @ Zero shadow regs
+ FETCH_ADVANCE_INST 1 @ advance rPC, load rINST
+ $preinstr @ optional op; may set condition codes
+ $instr @ r0/r1<- op, r2-r3 changed
+ GET_INST_OPCODE ip @ extract opcode from rINST
+ SET_VREG_WIDE_BY_ADDR r0, r1, r4 @ vAA<- r0/r1
+ GOTO_OPCODE ip @ jump to next instruction
+ /* 10-11 instructions */
+
+%def unopWider(preinstr="", instr=""):
+ /*
+ * Generic 32bit-to-64bit unary operation. Provide an "instr" line
+ * that specifies an instruction that performs "result = op r0", where
+ * "result" is a 64-bit quantity in r0/r1.
+ *
+ * For: int-to-long, int-to-double, float-to-long, float-to-double
+ */
+ /* unop vA, vB */
+ mov r3, rINST, lsr #12 @ r3<- B
+ ubfx rINST, rINST, #8, #4 @ rINST<- A
+ GET_VREG r0, r3 @ r0<- vB
+ VREG_INDEX_TO_ADDR r4, rINST @ r4<- &fp[A]
+ $preinstr @ optional op; may set condition codes
+ CLEAR_SHADOW_PAIR rINST, ip, lr @ Zero shadow regs
+ FETCH_ADVANCE_INST 1 @ advance rPC, load rINST
+ $instr @ r0<- op, r0-r3 changed
+ GET_INST_OPCODE ip @ extract opcode from rINST
+ SET_VREG_WIDE_BY_ADDR r0, r1, r4 @ vA/vA+1<- r0/r1
+ GOTO_OPCODE ip @ jump to next instruction
+ /* 9-10 instructions */
+
+%def op_add_int():
+% binop(instr="add r0, r0, r1")
+
+%def op_add_int_2addr():
+% binop2addr(instr="add r0, r0, r1")
+
+%def op_add_int_lit16():
+% binopLit16(instr="add r0, r0, r1")
+
+%def op_add_int_lit8():
+% binopLit8(extract="", instr="add r0, r0, r3, asr #8")
+
+%def op_add_long():
+% binopWide(preinstr="adds r0, r0, r2", instr="adc r1, r1, r3")
+
+%def op_add_long_2addr():
+% binopWide2addr(preinstr="adds r0, r0, r2", instr="adc r1, r1, r3")
+
+%def op_and_int():
+% binop(instr="and r0, r0, r1")
+
+%def op_and_int_2addr():
+% binop2addr(instr="and r0, r0, r1")
+
+%def op_and_int_lit16():
+% binopLit16(instr="and r0, r0, r1")
+
+%def op_and_int_lit8():
+% binopLit8(extract="", instr="and r0, r0, r3, asr #8")
+
+%def op_and_long():
+% binopWide(preinstr="and r0, r0, r2", instr="and r1, r1, r3")
+
+%def op_and_long_2addr():
+% binopWide2addr(preinstr="and r0, r0, r2", instr="and r1, r1, r3")
+
+%def op_cmp_long():
+ /*
+ * Compare two 64-bit values. Puts 0, 1, or -1 into the destination
+ * register based on the results of the comparison.
+ */
+ /* cmp-long vAA, vBB, vCC */
+ FETCH r0, 1 @ r0<- CCBB
+ mov r4, rINST, lsr #8 @ r4<- AA
+ and r2, r0, #255 @ r2<- BB
+ mov r3, r0, lsr #8 @ r3<- CC
+ VREG_INDEX_TO_ADDR r2, r2 @ r2<- &fp[BB]
+ VREG_INDEX_TO_ADDR r3, r3 @ r3<- &fp[CC]
+ GET_VREG_WIDE_BY_ADDR r0, r1, r2 @ r0/r1<- vBB/vBB+1
+ GET_VREG_WIDE_BY_ADDR r2, r3, r3 @ r2/r3<- vCC/vCC+1
+ cmp r0, r2
+ sbcs ip, r1, r3 @ Sets correct CCs for checking LT (but not EQ/NE)
+ mov r3, #-1
+ it ge
+ movge r3, #1
+ it eq
+ cmpeq r0, r2
+ it eq
+ moveq r3, #0
+ FETCH_ADVANCE_INST 2 @ advance rPC, load rINST
+ SET_VREG r3, r4 @ vAA<- ip
+ GET_INST_OPCODE ip @ extract opcode from rINST
+ GOTO_OPCODE ip @ jump to next instruction
+
+%def op_div_int():
+ /*
+ * Specialized 32-bit binary operation
+ *
+ * Performs "r0 = r0 div r1". The selection between sdiv or the gcc helper
+ * depends on the compile time value of __ARM_ARCH_EXT_IDIV__ (defined for
+ * ARMv7 CPUs that have hardware division support).
+ *
+ * div-int
+ *
+ */
+ FETCH r0, 1 @ r0<- CCBB
+ mov r4, rINST, lsr #8 @ r4<- AA
+ mov r3, r0, lsr #8 @ r3<- CC
+ and r2, r0, #255 @ r2<- BB
+ GET_VREG r1, r3 @ r1<- vCC
+ GET_VREG r0, r2 @ r0<- vBB
+ cmp r1, #0 @ is second operand zero?
+ beq common_errDivideByZero
+
+ FETCH_ADVANCE_INST 2 @ advance rPC, load rINST
+#ifdef __ARM_ARCH_EXT_IDIV__
+ sdiv r0, r0, r1 @ r0<- op
+#else
+ bl __aeabi_idiv @ r0<- op, r0-r3 changed
+#endif
+ GET_INST_OPCODE ip @ extract opcode from rINST
+ SET_VREG r0, r4 @ vAA<- r0
+ GOTO_OPCODE ip @ jump to next instruction
+ /* 11-14 instructions */
+
+%def op_div_int_2addr():
+ /*
+ * Specialized 32-bit binary operation
+ *
+ * Performs "r0 = r0 div r1". The selection between sdiv or the gcc helper
+ * depends on the compile time value of __ARM_ARCH_EXT_IDIV__ (defined for
+ * ARMv7 CPUs that have hardware division support).
+ *
+ * div-int/2addr
+ *
+ */
+ mov r3, rINST, lsr #12 @ r3<- B
+ ubfx r4, rINST, #8, #4 @ r4<- A
+ GET_VREG r1, r3 @ r1<- vB
+ GET_VREG r0, r4 @ r0<- vA
+ cmp r1, #0 @ is second operand zero?
+ beq common_errDivideByZero
+ FETCH_ADVANCE_INST 1 @ advance rPC, load rINST
+
+#ifdef __ARM_ARCH_EXT_IDIV__
+ sdiv r0, r0, r1 @ r0<- op
+#else
+ bl __aeabi_idiv @ r0<- op, r0-r3 changed
+#endif
+ GET_INST_OPCODE ip @ extract opcode from rINST
+ SET_VREG r0, r4 @ vAA<- r0
+ GOTO_OPCODE ip @ jump to next instruction
+ /* 10-13 instructions */
+
+
+%def op_div_int_lit16():
+ /*
+ * Specialized 32-bit binary operation
+ *
+ * Performs "r0 = r0 div r1". The selection between sdiv or the gcc helper
+ * depends on the compile time value of __ARM_ARCH_EXT_IDIV__ (defined for
+ * ARMv7 CPUs that have hardware division support).
+ *
+ * div-int/lit16
+ *
+ */
+ FETCH_S r1, 1 @ r1<- ssssCCCC (sign-extended)
+ mov r2, rINST, lsr #12 @ r2<- B
+ ubfx r4, rINST, #8, #4 @ r4<- A
+ GET_VREG r0, r2 @ r0<- vB
+ cmp r1, #0 @ is second operand zero?
+ beq common_errDivideByZero
+ FETCH_ADVANCE_INST 2 @ advance rPC, load rINST
+
+#ifdef __ARM_ARCH_EXT_IDIV__
+ sdiv r0, r0, r1 @ r0<- op
+#else
+ bl __aeabi_idiv @ r0<- op, r0-r3 changed
+#endif
+ GET_INST_OPCODE ip @ extract opcode from rINST
+ SET_VREG r0, r4 @ vAA<- r0
+ GOTO_OPCODE ip @ jump to next instruction
+ /* 10-13 instructions */
+
+%def op_div_int_lit8():
+ /*
+ * Specialized 32-bit binary operation
+ *
+ * Performs "r0 = r0 div r1". The selection between sdiv or the gcc helper
+ * depends on the compile time value of __ARM_ARCH_EXT_IDIV__ (defined for
+ * ARMv7 CPUs that have hardware division support).
+ *
+ * div-int/lit8
+ *
+ */
+ FETCH_S r3, 1 @ r3<- ssssCCBB (sign-extended for CC
+ mov r4, rINST, lsr #8 @ r4<- AA
+ and r2, r3, #255 @ r2<- BB
+ GET_VREG r0, r2 @ r0<- vBB
+ movs r1, r3, asr #8 @ r1<- ssssssCC (sign extended)
+ @cmp r1, #0 @ is second operand zero?
+ beq common_errDivideByZero
+ FETCH_ADVANCE_INST 2 @ advance rPC, load rINST
+
+#ifdef __ARM_ARCH_EXT_IDIV__
+ sdiv r0, r0, r1 @ r0<- op
+#else
+ bl __aeabi_idiv @ r0<- op, r0-r3 changed
+#endif
+ GET_INST_OPCODE ip @ extract opcode from rINST
+ SET_VREG r0, r4 @ vAA<- r0
+ GOTO_OPCODE ip @ jump to next instruction
+ /* 10-12 instructions */
+
+%def op_div_long():
+% binopWide(instr="bl __aeabi_ldivmod", chkzero="1")
+
+%def op_div_long_2addr():
+% binopWide2addr(instr="bl __aeabi_ldivmod", chkzero="1")
+
+%def op_int_to_byte():
+% unop(instr="sxtb r0, r0")
+
+%def op_int_to_char():
+% unop(instr="uxth r0, r0")
+
+%def op_int_to_long():
+% unopWider(instr="mov r1, r0, asr #31")
+
+%def op_int_to_short():
+% unop(instr="sxth r0, r0")
+
+%def op_long_to_int():
+/* we ignore the high word, making this equivalent to a 32-bit reg move */
+% op_move()
+
+/*
+ * We use "mul r0, r1, r0" instead of "r0, r0, r1". The latter was illegal in old versions.
+ * Also, for T32, this operand order allows using a 16-bit instruction (encoding T1) while the
+ * other order would require 32-bit instruction (encoding T2).
+ */
+
+%def op_mul_int():
+% binop(instr="mul r0, r1, r0")
+
+%def op_mul_int_2addr():
+% binop2addr(instr="mul r0, r1, r0")
+
+%def op_mul_int_lit16():
+% binopLit16(instr="mul r0, r1, r0")
+
+%def op_mul_int_lit8():
+% binopLit8(instr="mul r0, r1, r0")
+
+%def op_mul_long():
+ /*
+ * Signed 64-bit integer multiply.
+ *
+ * Consider WXxYZ (r1r0 x r3r2) with a long multiply:
+ * WX
+ * x YZ
+ * --------
+ * ZW ZX
+ * YW YX
+ *
+ * The low word of the result holds ZX, the high word holds
+ * (ZW+YX) + (the high overflow from ZX). YW doesn't matter because
+ * it doesn't fit in the low 64 bits.
+ *
+ * Unlike most ARM math operations, multiply instructions have
+ * restrictions on using the same register more than once (Rd and Rn
+ * cannot be the same).
+ */
+ /* mul-long vAA, vBB, vCC */
+ FETCH r0, 1 @ r0<- CCBB
+ and r2, r0, #255 @ r2<- BB
+ mov r3, r0, lsr #8 @ r3<- CC
+ VREG_INDEX_TO_ADDR r2, r2 @ r2<- &fp[BB]
+ VREG_INDEX_TO_ADDR r3, r3 @ r3<- &fp[CC]
+ GET_VREG_WIDE_BY_ADDR r0, r1, r2 @ r0/r1<- vBB/vBB+1
+ GET_VREG_WIDE_BY_ADDR r2, r3, r3 @ r2/r3<- vCC/vCC+1
+ mul ip, r0, r3 @ ip<- YxX
+ umull r0, lr, r2, r0 @ r0/lr <- ZxX RdLo == Rn - this is OK.
+ mla r3, r1, r2, ip @ r3<- YxX + (ZxW)
+ mov r4, rINST, lsr #8 @ r4<- AA
+ add r1, r3, lr @ r1<- lr + low(ZxW + (YxX))
+ CLEAR_SHADOW_PAIR r4, lr, ip @ Zero out the shadow regs
+ VREG_INDEX_TO_ADDR r4, r4 @ r2<- &fp[AA]
+ FETCH_ADVANCE_INST 2 @ advance rPC, load rINST
+ GET_INST_OPCODE ip @ extract opcode from rINST
+ SET_VREG_WIDE_BY_ADDR r0, r1 , r4 @ vAA/vAA+1<- r1/r2
+ GOTO_OPCODE ip @ jump to next instruction
+
+%def op_mul_long_2addr():
+ /*
+ * Signed 64-bit integer multiply, "/2addr" version.
+ *
+ * See op_mul_long for an explanation.
+ *
+ * We get a little tight on registers, so to avoid looking up &fp[A]
+ * again we stuff it into rINST.
+ */
+ /* mul-long/2addr vA, vB */
+ mov r1, rINST, lsr #12 @ r1<- B
+ ubfx r4, rINST, #8, #4 @ r4<- A
+ VREG_INDEX_TO_ADDR r1, r1 @ r1<- &fp[B]
+ VREG_INDEX_TO_ADDR rINST, r4 @ rINST<- &fp[A]
+ GET_VREG_WIDE_BY_ADDR r2, r3, r1 @ r2/r3<- vBB/vBB+1
+ GET_VREG_WIDE_BY_ADDR r0, r1, rINST @ r0/r1<- vAA/vAA+1
+ mul ip, r0, r3 @ ip<- YxX
+ umull r0, lr, r2, r0 @ r0/lr <- ZxX RdLo == Rn - this is OK.
+ mla r3, r1, r2, ip @ r3<- YxX + (ZxW)
+ mov r4, rINST @ Save vAA before FETCH_ADVANCE_INST
+ FETCH_ADVANCE_INST 1 @ advance rPC, load rINST
+ add r1, r3, lr @ r1<- lr + low(ZxW + (YxX))
+ GET_INST_OPCODE ip @ extract opcode from rINST
+ SET_VREG_WIDE_BY_ADDR r0, r1, r4 @ vAA/vAA+1<- r0/r1
+ GOTO_OPCODE ip @ jump to next instruction
+
+%def op_neg_int():
+% unop(instr="rsb r0, r0, #0")
+
+%def op_neg_long():
+% unopWide(preinstr="rsbs r0, r0, #0", instr="rsc r1, r1, #0")
+
+%def op_not_int():
+% unop(instr="mvn r0, r0")
+
+%def op_not_long():
+% unopWide(preinstr="mvn r0, r0", instr="mvn r1, r1")
+
+%def op_or_int():
+% binop(instr="orr r0, r0, r1")
+
+%def op_or_int_2addr():
+% binop2addr(instr="orr r0, r0, r1")
+
+%def op_or_int_lit16():
+% binopLit16(instr="orr r0, r0, r1")
+
+%def op_or_int_lit8():
+% binopLit8(extract="", instr="orr r0, r0, r3, asr #8")
+
+%def op_or_long():
+% binopWide(preinstr="orr r0, r0, r2", instr="orr r1, r1, r3")
+
+%def op_or_long_2addr():
+% binopWide2addr(preinstr="orr r0, r0, r2", instr="orr r1, r1, r3")
+
+%def op_rem_int():
+ /*
+ * Specialized 32-bit binary operation
+ *
+ * Performs "r1 = r0 rem r1". The selection between sdiv block or the gcc helper
+ * depends on the compile time value of __ARM_ARCH_EXT_IDIV__ (defined for
+ * ARMv7 CPUs that have hardware division support).
+ *
+ * NOTE: idivmod returns quotient in r0 and remainder in r1
+ *
+ * rem-int
+ *
+ */
+ FETCH r0, 1 @ r0<- CCBB
+ mov r4, rINST, lsr #8 @ r4<- AA
+ mov r3, r0, lsr #8 @ r3<- CC
+ and r2, r0, #255 @ r2<- BB
+ GET_VREG r1, r3 @ r1<- vCC
+ GET_VREG r0, r2 @ r0<- vBB
+ cmp r1, #0 @ is second operand zero?
+ beq common_errDivideByZero
+
+ FETCH_ADVANCE_INST 2 @ advance rPC, load rINST
+#ifdef __ARM_ARCH_EXT_IDIV__
+ sdiv r2, r0, r1
+ mls r1, r1, r2, r0 @ r1<- op, r0-r2 changed
+#else
+ bl __aeabi_idivmod @ r1<- op, r0-r3 changed
+#endif
+ GET_INST_OPCODE ip @ extract opcode from rINST
+ SET_VREG r1, r4 @ vAA<- r1
+ GOTO_OPCODE ip @ jump to next instruction
+ /* 11-14 instructions */
+
+%def op_rem_int_2addr():
+ /*
+ * Specialized 32-bit binary operation
+ *
+ * Performs "r1 = r0 rem r1". The selection between sdiv block or the gcc helper
+ * depends on the compile time value of __ARM_ARCH_EXT_IDIV__ (defined for
+ * ARMv7 CPUs that have hardware division support).
+ *
+ * NOTE: idivmod returns quotient in r0 and remainder in r1
+ *
+ * rem-int/2addr
+ *
+ */
+ mov r3, rINST, lsr #12 @ r3<- B
+ ubfx r4, rINST, #8, #4 @ r4<- A
+ GET_VREG r1, r3 @ r1<- vB
+ GET_VREG r0, r4 @ r0<- vA
+ cmp r1, #0 @ is second operand zero?
+ beq common_errDivideByZero
+ FETCH_ADVANCE_INST 1 @ advance rPC, load rINST
+
+#ifdef __ARM_ARCH_EXT_IDIV__
+ sdiv r2, r0, r1
+ mls r1, r1, r2, r0 @ r1<- op
+#else
+ bl __aeabi_idivmod @ r1<- op, r0-r3 changed
+#endif
+ GET_INST_OPCODE ip @ extract opcode from rINST
+ SET_VREG r1, r4 @ vAA<- r1
+ GOTO_OPCODE ip @ jump to next instruction
+ /* 10-13 instructions */
+
+
+%def op_rem_int_lit16():
+ /*
+ * Specialized 32-bit binary operation
+ *
+ * Performs "r1 = r0 rem r1". The selection between sdiv block or the gcc helper
+ * depends on the compile time value of __ARM_ARCH_EXT_IDIV__ (defined for
+ * ARMv7 CPUs that have hardware division support).
+ *
+ * NOTE: idivmod returns quotient in r0 and remainder in r1
+ *
+ * rem-int/lit16
+ *
+ */
+ FETCH_S r1, 1 @ r1<- ssssCCCC (sign-extended)
+ mov r2, rINST, lsr #12 @ r2<- B
+ ubfx r4, rINST, #8, #4 @ r4<- A
+ GET_VREG r0, r2 @ r0<- vB
+ cmp r1, #0 @ is second operand zero?
+ beq common_errDivideByZero
+ FETCH_ADVANCE_INST 2 @ advance rPC, load rINST
+
+#ifdef __ARM_ARCH_EXT_IDIV__
+ sdiv r2, r0, r1
+ mls r1, r1, r2, r0 @ r1<- op
+#else
+ bl __aeabi_idivmod @ r1<- op, r0-r3 changed
+#endif
+ GET_INST_OPCODE ip @ extract opcode from rINST
+ SET_VREG r1, r4 @ vAA<- r1
+ GOTO_OPCODE ip @ jump to next instruction
+ /* 10-13 instructions */
+
+%def op_rem_int_lit8():
+ /*
+ * Specialized 32-bit binary operation
+ *
+ * Performs "r1 = r0 rem r1". The selection between sdiv block or the gcc helper
+ * depends on the compile time value of __ARM_ARCH_EXT_IDIV__ (defined for
+ * ARMv7 CPUs that have hardware division support).
+ *
+ * NOTE: idivmod returns quotient in r0 and remainder in r1
+ *
+ * rem-int/lit8
+ *
+ */
+ FETCH_S r3, 1 @ r3<- ssssCCBB (sign-extended for CC)
+ mov r4, rINST, lsr #8 @ r4<- AA
+ and r2, r3, #255 @ r2<- BB
+ GET_VREG r0, r2 @ r0<- vBB
+ movs r1, r3, asr #8 @ r1<- ssssssCC (sign extended)
+ @cmp r1, #0 @ is second operand zero?
+ beq common_errDivideByZero
+ FETCH_ADVANCE_INST 2 @ advance rPC, load rINST
+
+#ifdef __ARM_ARCH_EXT_IDIV__
+ sdiv r2, r0, r1
+ mls r1, r1, r2, r0 @ r1<- op
+#else
+ bl __aeabi_idivmod @ r1<- op, r0-r3 changed
+#endif
+ GET_INST_OPCODE ip @ extract opcode from rINST
+ SET_VREG r1, r4 @ vAA<- r1
+ GOTO_OPCODE ip @ jump to next instruction
+ /* 10-12 instructions */
+
+%def op_rem_long():
+/* ldivmod returns quotient in r0/r1 and remainder in r2/r3 */
+% binopWide(instr="bl __aeabi_ldivmod", result0="r2", result1="r3", chkzero="1")
+
+%def op_rem_long_2addr():
+/* ldivmod returns quotient in r0/r1 and remainder in r2/r3 */
+% binopWide2addr(instr="bl __aeabi_ldivmod", result0="r2", result1="r3", chkzero="1")
+
+%def op_rsub_int():
+/* this op is "rsub-int", but can be thought of as "rsub-int/lit16" */
+% binopLit16(instr="rsb r0, r0, r1")
+
+%def op_rsub_int_lit8():
+% binopLit8(extract="", instr="rsb r0, r0, r3, asr #8")
+
+%def op_shl_int():
+% binop(preinstr="and r1, r1, #31", instr="mov r0, r0, lsl r1")
+
+%def op_shl_int_2addr():
+% binop2addr(preinstr="and r1, r1, #31", instr="mov r0, r0, lsl r1")
+
+%def op_shl_int_lit8():
+% binopLit8(extract="ubfx r1, r3, #8, #5", instr="mov r0, r0, lsl r1")
+
+%def op_shl_long():
+ /*
+ * Long integer shift. This is different from the generic 32/64-bit
+ * binary operations because vAA/vBB are 64-bit but vCC (the shift
+ * distance) is 32-bit. Also, Dalvik requires us to mask off the low
+ * 6 bits of the shift distance.
+ */
+ /* shl-long vAA, vBB, vCC */
+ FETCH r0, 1 @ r0<- CCBB
+ mov r4, rINST, lsr #8 @ r4<- AA
+ and r3, r0, #255 @ r3<- BB
+ mov r0, r0, lsr #8 @ r0<- CC
+ VREG_INDEX_TO_ADDR r3, r3 @ r3<- &fp[BB]
+ GET_VREG r2, r0 @ r2<- vCC
+ GET_VREG_WIDE_BY_ADDR r0, r1, r3 @ r0/r1<- vBB/vBB+1
+ CLEAR_SHADOW_PAIR r4, lr, ip @ Zero out the shadow regs
+ and r2, r2, #63 @ r2<- r2 & 0x3f
+ VREG_INDEX_TO_ADDR r4, r4 @ r4<- &fp[AA]
+ mov r1, r1, asl r2 @ r1<- r1 << r2
+ rsb r3, r2, #32 @ r3<- 32 - r2
+ orr r1, r1, r0, lsr r3 @ r1<- r1 | (r0 << (32-r2))
+ subs ip, r2, #32 @ ip<- r2 - 32
+ it pl
+ movpl r1, r0, asl ip @ if r2 >= 32, r1<- r0 << (r2-32)
+ FETCH_ADVANCE_INST 2 @ advance rPC, load rINST
+ mov r0, r0, asl r2 @ r0<- r0 << r2
+ GET_INST_OPCODE ip @ extract opcode from rINST
+ SET_VREG_WIDE_BY_ADDR r0, r1, r4 @ vAA/vAA+1<- r0/r1
+ GOTO_OPCODE ip @ jump to next instruction
+
+%def op_shl_long_2addr():
+ /*
+ * Long integer shift, 2addr version. vA is 64-bit value/result, vB is
+ * 32-bit shift distance.
+ */
+ /* shl-long/2addr vA, vB */
+ mov r3, rINST, lsr #12 @ r3<- B
+ ubfx r4, rINST, #8, #4 @ r4<- A
+ GET_VREG r2, r3 @ r2<- vB
+ CLEAR_SHADOW_PAIR r4, lr, ip @ Zero out the shadow regs
+ VREG_INDEX_TO_ADDR r4, r4 @ r4<- &fp[A]
+ and r2, r2, #63 @ r2<- r2 & 0x3f
+ GET_VREG_WIDE_BY_ADDR r0, r1, r4 @ r0/r1<- vAA/vAA+1
+ mov r1, r1, asl r2 @ r1<- r1 << r2
+ rsb r3, r2, #32 @ r3<- 32 - r2
+ orr r1, r1, r0, lsr r3 @ r1<- r1 | (r0 << (32-r2))
+ subs ip, r2, #32 @ ip<- r2 - 32
+ FETCH_ADVANCE_INST 1 @ advance rPC, load rINST
+ it pl
+ movpl r1, r0, asl ip @ if r2 >= 32, r1<- r0 << (r2-32)
+ mov r0, r0, asl r2 @ r0<- r0 << r2
+ GET_INST_OPCODE ip @ extract opcode from rINST
+ SET_VREG_WIDE_BY_ADDR r0, r1, r4 @ vAA/vAA+1<- r0/r1
+ GOTO_OPCODE ip @ jump to next instruction
+
+%def op_shr_int():
+% binop(preinstr="and r1, r1, #31", instr="mov r0, r0, asr r1")
+
+%def op_shr_int_2addr():
+% binop2addr(preinstr="and r1, r1, #31", instr="mov r0, r0, asr r1")
+
+%def op_shr_int_lit8():
+% binopLit8(extract="ubfx r1, r3, #8, #5", instr="mov r0, r0, asr r1")
+
+%def op_shr_long():
+ /*
+ * Long integer shift. This is different from the generic 32/64-bit
+ * binary operations because vAA/vBB are 64-bit but vCC (the shift
+ * distance) is 32-bit. Also, Dalvik requires us to mask off the low
+ * 6 bits of the shift distance.
+ */
+ /* shr-long vAA, vBB, vCC */
+ FETCH r0, 1 @ r0<- CCBB
+ mov r4, rINST, lsr #8 @ r4<- AA
+ and r3, r0, #255 @ r3<- BB
+ mov r0, r0, lsr #8 @ r0<- CC
+ VREG_INDEX_TO_ADDR r3, r3 @ r3<- &fp[BB]
+ GET_VREG r2, r0 @ r2<- vCC
+ GET_VREG_WIDE_BY_ADDR r0, r1, r3 @ r0/r1<- vBB/vBB+1
+ CLEAR_SHADOW_PAIR r4, lr, ip @ Zero out the shadow regs
+ and r2, r2, #63 @ r0<- r0 & 0x3f
+ VREG_INDEX_TO_ADDR r4, r4 @ r4<- &fp[AA]
+ mov r0, r0, lsr r2 @ r0<- r2 >> r2
+ rsb r3, r2, #32 @ r3<- 32 - r2
+ orr r0, r0, r1, lsl r3 @ r0<- r0 | (r1 << (32-r2))
+ subs ip, r2, #32 @ ip<- r2 - 32
+ it pl
+ movpl r0, r1, asr ip @ if r2 >= 32, r0<-r1 >> (r2-32)
+ FETCH_ADVANCE_INST 2 @ advance rPC, load rINST
+ mov r1, r1, asr r2 @ r1<- r1 >> r2
+ GET_INST_OPCODE ip @ extract opcode from rINST
+ SET_VREG_WIDE_BY_ADDR r0, r1, r4 @ vAA/vAA+1<- r0/r1
+ GOTO_OPCODE ip @ jump to next instruction
+
+%def op_shr_long_2addr():
+ /*
+ * Long integer shift, 2addr version. vA is 64-bit value/result, vB is
+ * 32-bit shift distance.
+ */
+ /* shr-long/2addr vA, vB */
+ mov r3, rINST, lsr #12 @ r3<- B
+ ubfx r4, rINST, #8, #4 @ r4<- A
+ GET_VREG r2, r3 @ r2<- vB
+ CLEAR_SHADOW_PAIR r4, lr, ip @ Zero out the shadow regs
+ VREG_INDEX_TO_ADDR r4, r4 @ r4<- &fp[A]
+ and r2, r2, #63 @ r2<- r2 & 0x3f
+ GET_VREG_WIDE_BY_ADDR r0, r1, r4 @ r0/r1<- vAA/vAA+1
+ mov r0, r0, lsr r2 @ r0<- r2 >> r2
+ rsb r3, r2, #32 @ r3<- 32 - r2
+ orr r0, r0, r1, lsl r3 @ r0<- r0 | (r1 << (32-r2))
+ subs ip, r2, #32 @ ip<- r2 - 32
+ FETCH_ADVANCE_INST 1 @ advance rPC, load rINST
+ it pl
+ movpl r0, r1, asr ip @ if r2 >= 32, r0<-r1 >> (r2-32)
+ mov r1, r1, asr r2 @ r1<- r1 >> r2
+ GET_INST_OPCODE ip @ extract opcode from rINST
+ SET_VREG_WIDE_BY_ADDR r0, r1, r4 @ vAA/vAA+1<- r0/r1
+ GOTO_OPCODE ip @ jump to next instruction
+
+%def op_sub_int():
+% binop(instr="sub r0, r0, r1")
+
+%def op_sub_int_2addr():
+% binop2addr(instr="sub r0, r0, r1")
+
+%def op_sub_long():
+% binopWide(preinstr="subs r0, r0, r2", instr="sbc r1, r1, r3")
+
+%def op_sub_long_2addr():
+% binopWide2addr(preinstr="subs r0, r0, r2", instr="sbc r1, r1, r3")
+
+%def op_ushr_int():
+% binop(preinstr="and r1, r1, #31", instr="mov r0, r0, lsr r1")
+
+%def op_ushr_int_2addr():
+% binop2addr(preinstr="and r1, r1, #31", instr="mov r0, r0, lsr r1")
+
+%def op_ushr_int_lit8():
+% binopLit8(extract="ubfx r1, r3, #8, #5", instr="mov r0, r0, lsr r1")
+
+%def op_ushr_long():
+ /*
+ * Long integer shift. This is different from the generic 32/64-bit
+ * binary operations because vAA/vBB are 64-bit but vCC (the shift
+ * distance) is 32-bit. Also, Dalvik requires us to mask off the low
+ * 6 bits of the shift distance.
+ */
+ /* ushr-long vAA, vBB, vCC */
+ FETCH r0, 1 @ r0<- CCBB
+ mov r4, rINST, lsr #8 @ r4<- AA
+ and r3, r0, #255 @ r3<- BB
+ mov r0, r0, lsr #8 @ r0<- CC
+ VREG_INDEX_TO_ADDR r3, r3 @ r3<- &fp[BB]
+ GET_VREG r2, r0 @ r2<- vCC
+ GET_VREG_WIDE_BY_ADDR r0, r1, r3 @ r0/r1<- vBB/vBB+1
+ CLEAR_SHADOW_PAIR r4, lr, ip @ Zero out the shadow regs
+ and r2, r2, #63 @ r0<- r0 & 0x3f
+ VREG_INDEX_TO_ADDR r4, r4 @ r4<- &fp[AA]
+ mov r0, r0, lsr r2 @ r0<- r2 >> r2
+ rsb r3, r2, #32 @ r3<- 32 - r2
+ orr r0, r0, r1, lsl r3 @ r0<- r0 | (r1 << (32-r2))
+ subs ip, r2, #32 @ ip<- r2 - 32
+ it pl
+ movpl r0, r1, lsr ip @ if r2 >= 32, r0<-r1 >>> (r2-32)
+ FETCH_ADVANCE_INST 2 @ advance rPC, load rINST
+ mov r1, r1, lsr r2 @ r1<- r1 >>> r2
+ GET_INST_OPCODE ip @ extract opcode from rINST
+ SET_VREG_WIDE_BY_ADDR r0, r1, r4 @ vAA/vAA+1<- r0/r1
+ GOTO_OPCODE ip @ jump to next instruction
+
+%def op_ushr_long_2addr():
+ /*
+ * Long integer shift, 2addr version. vA is 64-bit value/result, vB is
+ * 32-bit shift distance.
+ */
+ /* ushr-long/2addr vA, vB */
+ mov r3, rINST, lsr #12 @ r3<- B
+ ubfx r4, rINST, #8, #4 @ r4<- A
+ GET_VREG r2, r3 @ r2<- vB
+ CLEAR_SHADOW_PAIR r4, lr, ip @ Zero out the shadow regs
+ VREG_INDEX_TO_ADDR r4, r4 @ r4<- &fp[A]
+ and r2, r2, #63 @ r2<- r2 & 0x3f
+ GET_VREG_WIDE_BY_ADDR r0, r1, r4 @ r0/r1<- vAA/vAA+1
+ mov r0, r0, lsr r2 @ r0<- r2 >> r2
+ rsb r3, r2, #32 @ r3<- 32 - r2
+ orr r0, r0, r1, lsl r3 @ r0<- r0 | (r1 << (32-r2))
+ subs ip, r2, #32 @ ip<- r2 - 32
+ FETCH_ADVANCE_INST 1 @ advance rPC, load rINST
+ it pl
+ movpl r0, r1, lsr ip @ if r2 >= 32, r0<-r1 >>> (r2-32)
+ mov r1, r1, lsr r2 @ r1<- r1 >>> r2
+ GET_INST_OPCODE ip @ extract opcode from rINST
+ SET_VREG_WIDE_BY_ADDR r0, r1, r4 @ vAA/vAA+1<- r0/r1
+ GOTO_OPCODE ip @ jump to next instruction
+
+%def op_xor_int():
+% binop(instr="eor r0, r0, r1")
+
+%def op_xor_int_2addr():
+% binop2addr(instr="eor r0, r0, r1")
+
+%def op_xor_int_lit16():
+% binopLit16(instr="eor r0, r0, r1")
+
+%def op_xor_int_lit8():
+% binopLit8(extract="", instr="eor r0, r0, r3, asr #8")
+
+%def op_xor_long():
+% binopWide(preinstr="eor r0, r0, r2", instr="eor r1, r1, r3")
+
+%def op_xor_long_2addr():
+% binopWide2addr(preinstr="eor r0, r0, r2", instr="eor r1, r1, r3")
diff --git a/runtime/interpreter/mterp/armng/array.S b/runtime/interpreter/mterp/armng/array.S
new file mode 100644
index 0000000..93f11c6
--- /dev/null
+++ b/runtime/interpreter/mterp/armng/array.S
@@ -0,0 +1,171 @@
+%def op_aget(load="ldr", shift="2", data_offset="MIRROR_INT_ARRAY_DATA_OFFSET", wide="0", is_object="0"):
+/*
+ * Array get. vAA <- vBB[vCC].
+ *
+ * for: aget, aget-boolean, aget-byte, aget-char, aget-short, aget-wide, aget-object
+ *
+ */
+ FETCH_B r2, 1, 0 @ r2<- BB
+ mov r4, rINST, lsr #8 @ r4<- AA
+ FETCH_B r3, 1, 1 @ r3<- CC
+ GET_VREG r0, r2 @ r0<- vBB (array object)
+ GET_VREG r1, r3 @ r1<- vCC (requested index)
+ cmp r0, #0 @ null array object?
+ beq common_errNullObject @ yes, bail
+ ldr r3, [r0, #MIRROR_ARRAY_LENGTH_OFFSET] @ r3<- arrayObj->length
+ add r0, r0, r1, lsl #$shift @ r0<- arrayObj + index*width
+ cmp r1, r3 @ compare unsigned index, length
+ bcs common_errArrayIndex @ index >= length, bail
+ FETCH_ADVANCE_INST 2 @ advance rPC, load rINST
+ .if $wide
+ CLEAR_SHADOW_PAIR r4, lr, ip @ Zero out the shadow regs
+ ldrd r2, [r0, #$data_offset] @ r2/r3<- vBB[vCC]
+ VREG_INDEX_TO_ADDR r4, r4 @ r4<- &fp[AA]
+ GET_INST_OPCODE ip @ extract opcode from rINST
+ SET_VREG_WIDE_BY_ADDR r2, r3, r4 @ vAA/vAA+1<- r2/r3
+ GOTO_OPCODE ip @ jump to next instruction
+ .elseif $is_object
+ $load r2, [r0, #$data_offset] @ w2<- vBB[vCC]
+ cmp rMR, #0
+ bne 2f
+1:
+ GET_INST_OPCODE ip @ extract opcode from rINST
+ SET_VREG_OBJECT r2, r4 @ vAA<- w2
+ GOTO_OPCODE ip @ jump to next instruction
+2:
+ bl art_quick_read_barrier_mark_reg02
+ b 1b
+ .else
+ GET_INST_OPCODE ip @ extract opcode from rINST
+ $load r2, [r0, #$data_offset] @ r2<- vBB[vCC]
+ SET_VREG r2, r4 @ vAA<- r2
+ GOTO_OPCODE ip @ jump to next instruction
+ .endif
+
+%def op_aget_boolean():
+% op_aget(load="ldrb", shift="0", data_offset="MIRROR_BOOLEAN_ARRAY_DATA_OFFSET", wide="0", is_object="0")
+
+%def op_aget_byte():
+% op_aget(load="ldrsb", shift="0", data_offset="MIRROR_BYTE_ARRAY_DATA_OFFSET", wide="0", is_object="0")
+
+%def op_aget_char():
+% op_aget(load="ldrh", shift="1", data_offset="MIRROR_CHAR_ARRAY_DATA_OFFSET", wide="0", is_object="0")
+
+%def op_aget_object():
+% op_aget(load="ldr", shift="2", data_offset="MIRROR_OBJECT_ARRAY_DATA_OFFSET", wide="0", is_object="1")
+
+%def op_aget_short():
+% op_aget(load="ldrsh", shift="1", data_offset="MIRROR_SHORT_ARRAY_DATA_OFFSET", wide="0", is_object="0")
+
+%def op_aget_wide():
+% op_aget(load="ldrd", shift="3", data_offset="MIRROR_WIDE_ARRAY_DATA_OFFSET", wide="1", is_object="0")
+
+%def op_aput(store="str", shift="2", data_offset="MIRROR_INT_ARRAY_DATA_OFFSET", wide="0", is_object="0"):
+/*
+ * Array put. vBB[vCC] <- vAA.
+ *
+ * for: aput, aput-boolean, aput-byte, aput-char, aput-short, aput-wide, aput-object
+ *
+ */
+ FETCH_B r2, 1, 0 @ r2<- BB
+ mov r4, rINST, lsr #8 @ r4<- AA
+ FETCH_B r3, 1, 1 @ r3<- CC
+ GET_VREG r0, r2 @ r0<- vBB (array object)
+ GET_VREG r1, r3 @ r1<- vCC (requested index)
+ cmp r0, #0 @ null array object?
+ beq common_errNullObject @ yes, bail
+ ldr r3, [r0, #MIRROR_ARRAY_LENGTH_OFFSET] @ r3<- arrayObj->length
+ cmp r1, r3 @ compare unsigned index, length
+ bcs common_errArrayIndex @ index >= length, bail
+ .if $is_object
+ EXPORT_PC // Export PC before overwriting it.
+ FETCH_ADVANCE_INST 2 @ advance rPC, load rINST
+ GET_VREG r2, r4 @ r2<- vAA
+ bl art_quick_aput_obj
+ GET_INST_OPCODE ip @ extract opcode from rINST
+ .elseif $wide
+ add r0, r0, r1, lsl #$shift @ r0<- arrayObj + index*width
+ VREG_INDEX_TO_ADDR r4, r4 @ r4<- &fp[AA]
+ FETCH_ADVANCE_INST 2 @ advance rPC, load rINST
+ GET_VREG_WIDE_BY_ADDR r2, r3, r4 @ r2/r3<- vAA/vAA+1
+ GET_INST_OPCODE ip @ extract opcode from rINST
+ strd r2, [r0, #$data_offset] @ r2/r3<- vBB[vCC]
+ .else
+ add r0, r0, r1, lsl #$shift @ r0<- arrayObj + index*width
+ FETCH_ADVANCE_INST 2 @ advance rPC, load rINST
+ GET_VREG r2, r4 @ r2<- vAA
+ GET_INST_OPCODE ip @ extract opcode from rINST
+ $store r2, [r0, #$data_offset] @ vBB[vCC]<- r2
+ .endif
+ GOTO_OPCODE ip @ jump to next instruction
+
+%def op_aput_boolean():
+% op_aput(store="strb", shift="0", data_offset="MIRROR_BOOLEAN_ARRAY_DATA_OFFSET", wide="0", is_object="0")
+
+%def op_aput_byte():
+% op_aput(store="strb", shift="0", data_offset="MIRROR_BYTE_ARRAY_DATA_OFFSET", wide="0", is_object="0")
+
+%def op_aput_char():
+% op_aput(store="strh", shift="1", data_offset="MIRROR_CHAR_ARRAY_DATA_OFFSET", wide="0", is_object="0")
+
+%def op_aput_short():
+% op_aput(store="strh", shift="1", data_offset="MIRROR_SHORT_ARRAY_DATA_OFFSET", wide="0", is_object="0")
+
+%def op_aput_wide():
+% op_aput(store="str", shift="3", data_offset="MIRROR_WIDE_ARRAY_DATA_OFFSET", wide="1", is_object="0")
+
+%def op_aput_object():
+% op_aput(store="str", shift="2", data_offset="MIRROR_OBJECT_ARRAY_DATA_OFFSET", wide="0", is_object="1")
+
+%def op_array_length():
+ /*
+ * Return the length of an array.
+ */
+ mov r1, rINST, lsr #12 @ r1<- B
+ ubfx r2, rINST, #8, #4 @ r2<- A
+ GET_VREG r0, r1 @ r0<- vB (object ref)
+ cmp r0, #0 @ is object null?
+ beq common_errNullObject @ yup, fail
+ FETCH_ADVANCE_INST 1 @ advance rPC, load rINST
+ ldr r3, [r0, #MIRROR_ARRAY_LENGTH_OFFSET] @ r3<- array length
+ GET_INST_OPCODE ip @ extract opcode from rINST
+ SET_VREG r3, r2 @ vB<- length
+ GOTO_OPCODE ip @ jump to next instruction
+
+%def op_fill_array_data():
+ /* fill-array-data vAA, +BBBBBBBB */
+ EXPORT_PC
+ FETCH r0, 1 @ r0<- bbbb (lo)
+ FETCH r1, 2 @ r1<- BBBB (hi)
+ mov r3, rINST, lsr #8 @ r3<- AA
+ orr r0, r0, r1, lsl #16 @ r0<- BBBBbbbb
+ GET_VREG r1, r3 @ r1<- vAA (array object)
+ add r0, rPC, r0, lsl #1 @ r1<- PC + BBBBbbbb*2 (array data off.)
+ bl art_quick_handle_fill_data @ (payload, object)
+ FETCH_ADVANCE_INST 3 @ advance rPC, load rINST
+ GET_INST_OPCODE ip @ extract opcode from rINST
+ GOTO_OPCODE ip @ jump to next instruction
+
+%def op_filled_new_array(helper="nterp_filled_new_array"):
+/*
+ * Create a new array with elements filled from registers.
+ *
+ * for: filled-new-array, filled-new-array/range
+ */
+ /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+ /* op {vCCCC..v(CCCC+AA-1)}, type@BBBB */
+ EXPORT_PC
+ mov r0, rSELF
+ ldr r1, [sp]
+ mov r2, rFP
+ mov r3, rPC
+ bl $helper
+ FETCH_ADVANCE_INST 3 // advance rPC, load rINST
+ GET_INST_OPCODE ip // extract opcode from rINST
+ GOTO_OPCODE ip // jump to next instruction
+
+%def op_filled_new_array_range():
+% op_filled_new_array(helper="nterp_filled_new_array_range")
+
+%def op_new_array():
+ b NterpNewArray
diff --git a/runtime/interpreter/mterp/armng/control_flow.S b/runtime/interpreter/mterp/armng/control_flow.S
new file mode 100644
index 0000000..3d564e7
--- /dev/null
+++ b/runtime/interpreter/mterp/armng/control_flow.S
@@ -0,0 +1,192 @@
+%def bincmp(condition=""):
+ /*
+ * Generic two-operand compare-and-branch operation. Provide a "condition"
+ * fragment that specifies the comparison to perform.
+ *
+ * For: if-eq, if-ne, if-lt, if-ge, if-gt, if-le
+ */
+ /* if-cmp vA, vB, +CCCC */
+ mov r1, rINST, lsr #12 @ r1<- B
+ ubfx r0, rINST, #8, #4 @ r0<- A
+ GET_VREG r3, r1 @ r3<- vB
+ GET_VREG r0, r0 @ r0<- vA
+ FETCH_S rINST, 1 @ rINST<- branch offset, in code units
+ cmp r0, r3 @ compare (vA, vB)
+ b${condition} 1f
+ FETCH_ADVANCE_INST 2
+ GET_INST_OPCODE ip // extract opcode from rINST
+ GOTO_OPCODE ip // jump to next instruction
+1:
+ FETCH_S rINST, 1 // rINST<- branch offset, in code units
+ BRANCH
+
+%def zcmp(condition=""):
+ /*
+ * Generic one-operand compare-and-branch operation. Provide a "condition"
+ * fragment that specifies the comparison to perform.
+ *
+ * for: if-eqz, if-nez, if-ltz, if-gez, if-gtz, if-lez
+ */
+ /* if-cmp vAA, +BBBB */
+ mov r0, rINST, lsr #8 @ r0<- AA
+ GET_VREG r0, r0 @ r0<- vAA
+ FETCH_S rINST, 1 @ rINST<- branch offset, in code units
+ cmp r0, #0 // compare (vA, 0)
+ b${condition} 1f
+ FETCH_ADVANCE_INST 2
+ GET_INST_OPCODE ip // extract opcode from rINST
+ GOTO_OPCODE ip // jump to next instruction
+1:
+ FETCH_S rINST, 1 // rINST<- branch offset, in code units
+ BRANCH
+
+%def op_goto():
+/*
+ * Unconditional branch, 8-bit offset.
+ *
+ * The branch distance is a signed code-unit offset, which we need to
+ * double to get a byte offset.
+ */
+ /* goto +AA */
+ sbfx rINST, rINST, #8, #8 // rINST<- ssssssAA (sign-extended)
+ BRANCH
+
+%def op_goto_16():
+/*
+ * Unconditional branch, 16-bit offset.
+ *
+ * The branch distance is a signed code-unit offset, which we need to
+ * double to get a byte offset.
+ */
+ /* goto/16 +AAAA */
+ FETCH_S rINST, 1 // wINST<- ssssAAAA (sign-extended)
+ BRANCH
+
+%def op_goto_32():
+/*
+ * Unconditional branch, 32-bit offset.
+ *
+ * The branch distance is a signed code-unit offset, which we need to
+ * double to get a byte offset.
+ *
+ * Because we need the SF bit set, we'll use an adds
+ * to convert from Dalvik offset to byte offset.
+ */
+ /* goto/32 +AAAAAAAA */
+ FETCH r0, 1 // r0<- aaaa (lo)
+ FETCH r1, 2 // r1<- AAAA (hi)
+ orrs rINST, r0, r1, lsl #16 // wINST<- AAAAaaaa
+ BRANCH
+
+%def op_if_eq():
+% bincmp(condition="eq")
+
+%def op_if_eqz():
+% zcmp(condition="eq")
+
+%def op_if_ge():
+% bincmp(condition="ge")
+
+%def op_if_gez():
+% zcmp(condition="ge")
+
+%def op_if_gt():
+% bincmp(condition="gt")
+
+%def op_if_gtz():
+% zcmp(condition="gt")
+
+%def op_if_le():
+% bincmp(condition="le")
+
+%def op_if_lez():
+% zcmp(condition="le")
+
+%def op_if_lt():
+% bincmp(condition="lt")
+
+%def op_if_ltz():
+% zcmp(condition="lt")
+
+%def op_if_ne():
+% bincmp(condition="ne")
+
+%def op_if_nez():
+% zcmp(condition="ne")
+
+%def op_packed_switch(func="NterpDoPackedSwitch"):
+/*
+ * Handle a packed-switch or sparse-switch instruction. In both cases
+ * we decode it and hand it off to a helper function.
+ *
+ * We don't really expect backward branches in a switch statement, but
+ * they're perfectly legal, so we check for them here.
+ *
+ * for: packed-switch, sparse-switch
+ */
+ /* op vAA, +BBBB */
+ FETCH r0, 1 @ r0<- bbbb (lo)
+ FETCH r1, 2 @ r1<- BBBB (hi)
+ mov r3, rINST, lsr #8 @ r3<- AA
+ orr r0, r0, r1, lsl #16 @ r0<- BBBBbbbb
+ GET_VREG r1, r3 @ r1<- vAA
+ add r0, rPC, r0, lsl #1 @ r0<- PC + BBBBbbbb*2
+ bl $func @ r0<- code-unit branch offset
+ mov rINST, r0
+ BRANCH
+
+%def op_sparse_switch():
+% op_packed_switch(func="NterpDoSparseSwitch")
+
+/*
+ * Return a 32-bit value.
+ */
+%def op_return(is_object="0", is_void="0", is_wide="0", is_no_barrier="0"):
+ .if $is_void
+ .if !$is_no_barrier
+ // Thread fence for constructor
+ dmb ishst
+ .endif
+ .else
+ mov r2, rINST, lsr #8 @ r2<- AA
+ .if $is_wide
+ VREG_INDEX_TO_ADDR r2, r2
+ GET_VREG_WIDE_BY_ADDR r0, r1, r2 // r0,r1 <- vAA
+ // In case we're going back to compiled code, put the
+ // result also in d0.
+ vmov d0, r0, r1
+ .else
+ GET_VREG r0, r2 // r0<- vAA
+ .if !$is_object
+ // In case we're going back to compiled code, put the
+ // result also in s0.
+ vmov s0, r0
+ .endif
+ .endif
+ .endif
+ .cfi_remember_state
+ ldr ip, [rREFS, #-4]
+ mov sp, ip
+ .cfi_def_cfa sp, CALLEE_SAVES_SIZE
+ RESTORE_ALL_CALLEE_SAVES lr_to_pc=1
+ .cfi_restore_state
+
+%def op_return_object():
+% op_return(is_object="1", is_void="0", is_wide="0", is_no_barrier="0")
+
+%def op_return_void():
+% op_return(is_object="0", is_void="1", is_wide="0", is_no_barrier="0")
+
+%def op_return_void_no_barrier():
+% op_return(is_object="0", is_void="1", is_wide="0", is_no_barrier="1")
+
+%def op_return_wide():
+% op_return(is_object="0", is_void="0", is_wide="1", is_no_barrier="0")
+
+%def op_throw():
+ EXPORT_PC
+ mov r2, rINST, lsr #8 @ r2<- AA
+ GET_VREG r0, r2 @ r0<- vAA (exception object)
+ mov r1, rSELF
+ bl art_quick_deliver_exception
+ bkpt 0
diff --git a/runtime/interpreter/mterp/armng/floating_point.S b/runtime/interpreter/mterp/armng/floating_point.S
new file mode 100644
index 0000000..5052f13
--- /dev/null
+++ b/runtime/interpreter/mterp/armng/floating_point.S
@@ -0,0 +1,424 @@
+%def fbinop(instr=""):
+ /*
+ * Generic 32-bit floating-point operation. Provide an "instr" line that
+ * specifies an instruction that performs "s2 = s0 op s1". Because we
+ * use the "softfp" ABI, this must be an instruction, not a function call.
+ *
+ * For: add-float, sub-float, mul-float, div-float
+ */
+ /* floatop vAA, vBB, vCC */
+ FETCH r0, 1 @ r0<- CCBB
+ mov r4, rINST, lsr #8 @ r4<- AA
+ mov r3, r0, lsr #8 @ r3<- CC
+ and r2, r0, #255 @ r2<- BB
+ VREG_INDEX_TO_ADDR r3, r3 @ r3<- &vCC
+ VREG_INDEX_TO_ADDR r2, r2 @ r2<- &vBB
+ GET_VREG_FLOAT_BY_ADDR s1, r3 @ s1<- vCC
+ GET_VREG_FLOAT_BY_ADDR s0, r2 @ s0<- vBB
+
+ FETCH_ADVANCE_INST 2 @ advance rPC, load rINST
+ $instr @ s2<- op
+ GET_INST_OPCODE ip @ extract opcode from rINST
+ SET_VREG_FLOAT s2, r4, lr @ vAA<- s2
+ GOTO_OPCODE ip @ jump to next instruction
+
+%def fbinop2addr(instr=""):
+ /*
+ * Generic 32-bit floating point "/2addr" binary operation. Provide
+ * an "instr" line that specifies an instruction that performs
+ * "s2 = s0 op s1".
+ *
+ * For: add-float/2addr, sub-float/2addr, mul-float/2addr, div-float/2addr
+ */
+ /* binop/2addr vA, vB */
+ mov r3, rINST, lsr #12 @ r3<- B
+ ubfx r4, rINST, #8, #4 @ r4<- A
+ VREG_INDEX_TO_ADDR r3, r3 @ r3<- &vB
+ VREG_INDEX_TO_ADDR r4, r4 @ r4<- &vA
+ GET_VREG_FLOAT_BY_ADDR s1, r3 @ s1<- vB
+ FETCH_ADVANCE_INST 1 @ advance rPC, load rINST
+ GET_VREG_FLOAT_BY_ADDR s0, r4 @ s0<- vA
+ $instr @ s2<- op
+ GET_INST_OPCODE ip @ extract opcode from rINST
+ SET_VREG_FLOAT_BY_ADDR s2, r4 @ vAA<- s2 No need to clear as it's 2addr
+ GOTO_OPCODE ip @ jump to next instruction
+
+%def fbinopWide(instr=""):
+ /*
+ * Generic 64-bit double-precision floating point binary operation.
+ * Provide an "instr" line that specifies an instruction that performs
+ * "d2 = d0 op d1".
+ *
+ * for: add-double, sub-double, mul-double, div-double
+ */
+ /* doubleop vAA, vBB, vCC */
+ FETCH r0, 1 @ r0<- CCBB
+ mov r4, rINST, lsr #8 @ r4<- AA
+ mov r3, r0, lsr #8 @ r3<- CC
+ and r2, r0, #255 @ r2<- BB
+ VREG_INDEX_TO_ADDR r3, r3 @ r3<- &vCC
+ VREG_INDEX_TO_ADDR r2, r2 @ r2<- &vBB
+ GET_VREG_DOUBLE_BY_ADDR d1, r3 @ d1<- vCC
+ GET_VREG_DOUBLE_BY_ADDR d0, r2 @ d0<- vBB
+ FETCH_ADVANCE_INST 2 @ advance rPC, load rINST
+ $instr @ d2<- op
+ CLEAR_SHADOW_PAIR r4, ip, lr @ Zero shadow regs
+ GET_INST_OPCODE ip @ extract opcode from rINST
+ VREG_INDEX_TO_ADDR r4, r4 @ r4<- &vAA
+ SET_VREG_DOUBLE_BY_ADDR d2, r4 @ vAA<- d2
+ GOTO_OPCODE ip @ jump to next instruction
+
+%def fbinopWide2addr(instr=""):
+ /*
+ * Generic 64-bit floating point "/2addr" binary operation. Provide
+ * an "instr" line that specifies an instruction that performs
+ * "d2 = d0 op d1".
+ *
+ * For: add-double/2addr, sub-double/2addr, mul-double/2addr,
+ * div-double/2addr
+ */
+ /* binop/2addr vA, vB */
+ mov r3, rINST, lsr #12 @ r3<- B
+ ubfx r4, rINST, #8, #4 @ r4<- A
+ VREG_INDEX_TO_ADDR r3, r3 @ r3<- &vB
+ CLEAR_SHADOW_PAIR r4, ip, r0 @ Zero out shadow regs
+ GET_VREG_DOUBLE_BY_ADDR d1, r3 @ d1<- vB
+ VREG_INDEX_TO_ADDR r4, r4 @ r4<- &vA
+ FETCH_ADVANCE_INST 1 @ advance rPC, load rINST
+ GET_VREG_DOUBLE_BY_ADDR d0, r4 @ d0<- vA
+ $instr @ d2<- op
+ GET_INST_OPCODE ip @ extract opcode from rINST
+ SET_VREG_DOUBLE_BY_ADDR d2, r4 @ vAA<- d2
+ GOTO_OPCODE ip @ jump to next instruction
+
+%def funop(instr=""):
+ /*
+ * Generic 32-bit unary floating-point operation. Provide an "instr"
+ * line that specifies an instruction that performs "s1 = op s0".
+ *
+ * for: int-to-float, float-to-int
+ */
+ /* unop vA, vB */
+ mov r3, rINST, lsr #12 @ r3<- B
+ VREG_INDEX_TO_ADDR r3, r3 @ r3<- &vB
+ GET_VREG_FLOAT_BY_ADDR s0, r3 @ s0<- vB
+ ubfx r4, rINST, #8, #4 @ r4<- A
+ FETCH_ADVANCE_INST 1 @ advance rPC, load rINST
+ $instr @ s1<- op
+ GET_INST_OPCODE ip @ extract opcode from rINST
+ SET_VREG_FLOAT s1, r4, lr @ vA<- s1
+ GOTO_OPCODE ip @ jump to next instruction
+
+%def funopNarrower(instr=""):
+ /*
+ * Generic 64bit-to-32bit unary floating point operation. Provide an
+ * "instr" line that specifies an instruction that performs "s0 = op d0".
+ *
+ * For: double-to-int, double-to-float
+ */
+ /* unop vA, vB */
+ mov r3, rINST, lsr #12 @ r3<- B
+ VREG_INDEX_TO_ADDR r3, r3 @ r3<- &vB
+ GET_VREG_DOUBLE_BY_ADDR d0, r3 @ d0<- vB
+ ubfx r4, rINST, #8, #4 @ r4<- A
+ FETCH_ADVANCE_INST 1 @ advance rPC, load rINST
+ $instr @ s0<- op
+ GET_INST_OPCODE ip @ extract opcode from rINST
+ SET_VREG_FLOAT s0, r4, lr @ vA<- s0
+ GOTO_OPCODE ip @ jump to next instruction
+
+%def funopWider(instr=""):
+ /*
+ * Generic 32bit-to-64bit floating point unary operation. Provide an
+ * "instr" line that specifies an instruction that performs "d0 = op s0".
+ *
+ * For: int-to-double, float-to-double
+ */
+ /* unop vA, vB */
+ mov r3, rINST, lsr #12 @ r3<- B
+ VREG_INDEX_TO_ADDR r3, r3 @ r3<- &vB
+ GET_VREG_FLOAT_BY_ADDR s0, r3 @ s0<- vB
+ ubfx r4, rINST, #8, #4 @ r4<- A
+ FETCH_ADVANCE_INST 1 @ advance rPC, load rINST
+ $instr @ d0<- op
+ CLEAR_SHADOW_PAIR r4, ip, lr @ Zero shadow regs
+ GET_INST_OPCODE ip @ extract opcode from rINST
+ VREG_INDEX_TO_ADDR r4, r4 @ r4<- &vA
+ SET_VREG_DOUBLE_BY_ADDR d0, r4 @ vA<- d0
+ GOTO_OPCODE ip @ jump to next instruction
+
+%def op_add_double():
+% fbinopWide(instr="faddd d2, d0, d1")
+
+%def op_add_double_2addr():
+% fbinopWide2addr(instr="faddd d2, d0, d1")
+
+%def op_add_float():
+% fbinop(instr="fadds s2, s0, s1")
+
+%def op_add_float_2addr():
+% fbinop2addr(instr="fadds s2, s0, s1")
+
+%def op_cmpg_double():
+ /*
+ * Compare two floating-point values. Puts 0, 1, or -1 into the
+ * destination register based on the results of the comparison.
+ *
+ * int compare(x, y) {
+ * if (x == y) {
+ * return 0;
+ * } else if (x < y) {
+ * return -1;
+ * } else if (x > y) {
+ * return 1;
+ * } else {
+ * return 1;
+ * }
+ * }
+ */
+ /* op vAA, vBB, vCC */
+ FETCH r0, 1 @ r0<- CCBB
+ mov r4, rINST, lsr #8 @ r4<- AA
+ and r2, r0, #255 @ r2<- BB
+ mov r3, r0, lsr #8 @ r3<- CC
+ VREG_INDEX_TO_ADDR r2, r2 @ r2<- &vBB
+ VREG_INDEX_TO_ADDR r3, r3 @ r3<- &vCC
+ GET_VREG_DOUBLE_BY_ADDR d0, r2 @ d0<- vBB
+ GET_VREG_DOUBLE_BY_ADDR d1, r3 @ d1<- vCC
+ vcmpe.f64 d0, d1 @ compare (vBB, vCC)
+ FETCH_ADVANCE_INST 2 @ advance rPC, load rINST
+ mvn r0, #0 @ r0<- -1 (default)
+ GET_INST_OPCODE ip @ extract opcode from rINST
+ fmstat
+ it hi
+ movhi r0, #1 @ (greater than, or unordered) r0<- 1
+ moveq r0, #0 @ (equal) r0<- 0
+ SET_VREG r0, r4 @ vAA<- r0
+ GOTO_OPCODE ip @ jump to next instruction
+
+%def op_cmpg_float():
+ /*
+ * Compare two floating-point values. Puts 0, 1, or -1 into the
+ * destination register based on the results of the comparison.
+ *
+ * int compare(x, y) {
+ * if (x == y) {
+ * return 0;
+ * } else if (x < y) {
+ * return -1;
+ * } else if (x > y) {
+ * return 1;
+ * } else {
+ * return 1;
+ * }
+ * }
+ */
+ /* op vAA, vBB, vCC */
+ FETCH r0, 1 @ r0<- CCBB
+ mov r4, rINST, lsr #8 @ r4<- AA
+ and r2, r0, #255 @ r2<- BB
+ mov r3, r0, lsr #8 @ r3<- CC
+ VREG_INDEX_TO_ADDR r2, r2 @ r2<- &vBB
+ VREG_INDEX_TO_ADDR r3, r3 @ r3<- &vCC
+ GET_VREG_FLOAT_BY_ADDR s0, r2 @ s0<- vBB
+ GET_VREG_FLOAT_BY_ADDR s1, r3 @ s1<- vCC
+ vcmpe.f32 s0, s1 @ compare (vBB, vCC)
+ FETCH_ADVANCE_INST 2 @ advance rPC, load rINST
+ mvn r0, #0 @ r0<- -1 (default)
+ GET_INST_OPCODE ip @ extract opcode from rINST
+ fmstat
+ it hi
+ movhi r0, #1 @ (greater than, or unordered) r0<- 1
+ moveq r0, #0 @ (equal) r0<- 0
+ SET_VREG r0, r4 @ vAA<- r0
+ GOTO_OPCODE ip @ jump to next instruction
+
+%def op_cmpl_double():
+ /*
+ * Compare two floating-point values. Puts 0, 1, or -1 into the
+ * destination register based on the results of the comparison.
+ *
+ * int compare(x, y) {
+ * if (x == y) {
+ * return 0;
+ * } else if (x > y) {
+ * return 1;
+ * } else if (x < y) {
+ * return -1;
+ * } else {
+ * return -1;
+ * }
+ * }
+ */
+ /* op vAA, vBB, vCC */
+ FETCH r0, 1 @ r0<- CCBB
+ mov r4, rINST, lsr #8 @ r4<- AA
+ and r2, r0, #255 @ r2<- BB
+ mov r3, r0, lsr #8 @ r3<- CC
+ VREG_INDEX_TO_ADDR r2, r2 @ r2<- &vBB
+ VREG_INDEX_TO_ADDR r3, r3 @ r3<- &vCC
+ GET_VREG_DOUBLE_BY_ADDR d0, r2 @ d0<- vBB
+ GET_VREG_DOUBLE_BY_ADDR d1, r3 @ d1<- vCC
+ vcmpe.f64 d0, d1 @ compare (vBB, vCC)
+ FETCH_ADVANCE_INST 2 @ advance rPC, load rINST
+ mvn r0, #0 @ r0<- -1 (default)
+ GET_INST_OPCODE ip @ extract opcode from rINST
+ fmstat @ export status flags
+ it gt
+ movgt r0, #1 @ (greater than) r1<- 1
+ it eq
+ moveq r0, #0 @ (equal) r1<- 0
+ SET_VREG r0, r4 @ vAA<- r0
+ GOTO_OPCODE ip @ jump to next instruction
+
+%def op_cmpl_float():
+ /*
+ * Compare two floating-point values. Puts 0, 1, or -1 into the
+ * destination register based on the results of the comparison.
+ *
+ * int compare(x, y) {
+ * if (x == y) {
+ * return 0;
+ * } else if (x > y) {
+ * return 1;
+ * } else if (x < y) {
+ * return -1;
+ * } else {
+ * return -1;
+ * }
+ * }
+ */
+ /* op vAA, vBB, vCC */
+ FETCH r0, 1 @ r0<- CCBB
+ mov r4, rINST, lsr #8 @ r4<- AA
+ and r2, r0, #255 @ r2<- BB
+ mov r3, r0, lsr #8 @ r3<- CC
+ VREG_INDEX_TO_ADDR r2, r2 @ r2<- &vBB
+ VREG_INDEX_TO_ADDR r3, r3 @ r3<- &vCC
+ GET_VREG_FLOAT_BY_ADDR s0, r2 @ s0<- vBB
+ GET_VREG_FLOAT_BY_ADDR s1, r3 @ s1<- vCC
+ vcmpe.f32 s0, s1 @ compare (vBB, vCC)
+ FETCH_ADVANCE_INST 2 @ advance rPC, load rINST
+ mvn r0, #0 @ r0<- -1 (default)
+ GET_INST_OPCODE ip @ extract opcode from rINST
+ fmstat @ export status flags
+ it gt
+ movgt r0, #1 @ (greater than) r1<- 1
+ it eq
+ moveq r0, #0 @ (equal) r1<- 0
+ SET_VREG r0, r4 @ vAA<- r0
+ GOTO_OPCODE ip @ jump to next instruction
+
+%def op_div_double():
+% fbinopWide(instr="fdivd d2, d0, d1")
+
+%def op_div_double_2addr():
+% fbinopWide2addr(instr="fdivd d2, d0, d1")
+
+%def op_div_float():
+% fbinop(instr="fdivs s2, s0, s1")
+
+%def op_div_float_2addr():
+% fbinop2addr(instr="fdivs s2, s0, s1")
+
+%def op_double_to_float():
+% funopNarrower(instr="vcvt.f32.f64 s0, d0")
+
+%def op_double_to_int():
+% funopNarrower(instr="vcvt.s32.f64 s0, d0")
+
+%def op_double_to_long():
+% unopWide(instr="bl nterp_d2l_doconv")
+
+%def op_float_to_double():
+% funopWider(instr="vcvt.f64.f32 d0, s0")
+
+%def op_float_to_int():
+% funop(instr="vcvt.s32.f32 s1, s0")
+
+%def op_float_to_long():
+% unopWider(instr="bl nterp_f2l_doconv")
+
+%def op_int_to_double():
+% funopWider(instr="vcvt.f64.s32 d0, s0")
+
+%def op_int_to_float():
+% funop(instr="vcvt.f32.s32 s1, s0")
+
+%def op_long_to_double():
+ /*
+ * Specialised 64-bit floating point operation.
+ *
+ * Note: The result will be returned in d2.
+ *
+ * For: long-to-double
+ */
+ mov r3, rINST, lsr #12 @ r3<- B
+ ubfx r4, rINST, #8, #4 @ r4<- A
+ CLEAR_SHADOW_PAIR r4, ip, lr @ Zero shadow regs
+ VREG_INDEX_TO_ADDR r3, r3 @ r3<- &fp[B]
+ VREG_INDEX_TO_ADDR r4, r4 @ r4<- &fp[A]
+ GET_VREG_DOUBLE_BY_ADDR d0, r3 @ d0<- vBB
+ FETCH_ADVANCE_INST 1 @ advance rPC, load rINST
+
+ vcvt.f64.s32 d1, s1 @ d1<- (double)(vAAh)
+ vcvt.f64.u32 d2, s0 @ d2<- (double)(vAAl)
+ vldr d3, constval$opcode
+ vmla.f64 d2, d1, d3 @ d2<- vAAh*2^32 + vAAl
+
+ GET_INST_OPCODE ip @ extract opcode from rINST
+ SET_VREG_DOUBLE_BY_ADDR d2, r4 @ vAA<- d2
+ GOTO_OPCODE ip @ jump to next instruction
+
+ /* literal pool helper */
+constval${opcode}:
+ .8byte 0x41f0000000000000
+
+%def op_long_to_float():
+% unopNarrower(instr="bl __aeabi_l2f")
+
+%def op_mul_double():
+% fbinopWide(instr="fmuld d2, d0, d1")
+
+%def op_mul_double_2addr():
+% fbinopWide2addr(instr="fmuld d2, d0, d1")
+
+%def op_mul_float():
+% fbinop(instr="fmuls s2, s0, s1")
+
+%def op_mul_float_2addr():
+% fbinop2addr(instr="fmuls s2, s0, s1")
+
+%def op_neg_double():
+% unopWide(instr="add r1, r1, #0x80000000")
+
+%def op_neg_float():
+% unop(instr="add r0, r0, #0x80000000")
+
+%def op_rem_double():
+/* EABI doesn't define a double remainder function, but libm does */
+% binopWide(instr="bl fmod")
+
+%def op_rem_double_2addr():
+/* EABI doesn't define a double remainder function, but libm does */
+% binopWide2addr(instr="bl fmod")
+
+%def op_rem_float():
+/* EABI doesn't define a float remainder function, but libm does */
+% binop(instr="bl fmodf")
+
+%def op_rem_float_2addr():
+/* EABI doesn't define a float remainder function, but libm does */
+% binop2addr(instr="bl fmodf")
+
+%def op_sub_double():
+% fbinopWide(instr="fsubd d2, d0, d1")
+
+%def op_sub_double_2addr():
+% fbinopWide2addr(instr="fsubd d2, d0, d1")
+
+%def op_sub_float():
+% fbinop(instr="fsubs s2, s0, s1")
+
+%def op_sub_float_2addr():
+% fbinop2addr(instr="fsubs s2, s0, s1")
diff --git a/runtime/interpreter/mterp/armng/invoke.S b/runtime/interpreter/mterp/armng/invoke.S
new file mode 100644
index 0000000..47678dc
--- /dev/null
+++ b/runtime/interpreter/mterp/armng/invoke.S
@@ -0,0 +1,183 @@
+%def op_invoke_custom():
+ EXPORT_PC
+ FETCH r0, 1 // call_site index, first argument of runtime call.
+ b NterpCommonInvokeCustom
+
+%def op_invoke_custom_range():
+ EXPORT_PC
+ FETCH r0, 1 // call_site index, first argument of runtime call.
+ b NterpCommonInvokeCustomRange
+
+%def invoke_direct_or_super(helper="", range="", is_super=""):
+ EXPORT_PC
+ // Fast-path which gets the method from thread-local cache.
+ FETCH_FROM_THREAD_CACHE r0, 2f
+1:
+ // Load the first argument (the 'this' pointer).
+ FETCH r1, 2
+ .if !$range
+ and r1, r1, #0xf
+ .endif
+ GET_VREG r1, r1
+ cmp r1, #0
+ beq common_errNullObject // bail if null
+ b $helper
+2:
+ mov r0, rSELF
+ ldr r1, [sp]
+ mov r2, rPC
+ bl nterp_get_method
+ .if $is_super
+ b 1b
+ .else
+ tst r0, #1
+ beq 1b
+ and r0, r0, #-2 // Remove the extra bit that marks it's a String.<init> method.
+ .if $range
+ b NterpHandleStringInitRange
+ .else
+ b NterpHandleStringInit
+ .endif
+ .endif
+
+%def op_invoke_direct():
+% invoke_direct_or_super(helper="NterpCommonInvokeInstance", range="0", is_super="0")
+
+%def op_invoke_direct_range():
+% invoke_direct_or_super(helper="NterpCommonInvokeInstanceRange", range="1", is_super="0")
+
+%def op_invoke_super():
+% invoke_direct_or_super(helper="NterpCommonInvokeInstance", range="0", is_super="1")
+
+%def op_invoke_super_range():
+% invoke_direct_or_super(helper="NterpCommonInvokeInstanceRange", range="1", is_super="1")
+
+%def op_invoke_polymorphic():
+ EXPORT_PC
+ // No need to fetch the target method.
+ // Load the first argument (the 'this' pointer).
+ FETCH r1, 2
+ and r1, r1, #0xf
+ GET_VREG r1, r1
+ cmp r1, #0
+ beq common_errNullObject // bail if null
+ b NterpCommonInvokePolymorphic
+
+%def op_invoke_polymorphic_range():
+ EXPORT_PC
+ // No need to fetch the target method.
+ // Load the first argument (the 'this' pointer).
+ FETCH r1, 2
+ GET_VREG r1, r1
+ cmp r1, #0
+ beq common_errNullObject // bail if null
+ b NterpCommonInvokePolymorphicRange
+
+%def invoke_interface(range=""):
+ EXPORT_PC
+ // Fast-path which gets the method from thread-local cache.
+ FETCH_FROM_THREAD_CACHE r0, 2f
+1:
+ // First argument is the 'this' pointer.
+ FETCH r1, 2
+ .if !$range
+ and r1, r1, #0xf
+ .endif
+ GET_VREG r1, r1
+ // Note: if r1 is null, this will be handled by our SIGSEGV handler.
+ ldr r2, [r1, #MIRROR_OBJECT_CLASS_OFFSET]
+ ldr r2, [r2, #MIRROR_CLASS_IMT_PTR_OFFSET_32]
+ ldr r0, [r2, r0, uxtw #2]
+ .if $range
+ b NterpCommonInvokeInterfaceRange
+ .else
+ b NterpCommonInvokeInterface
+ .endif
+2:
+ mov r0, rSELF
+ ldr r1, [sp]
+ mov r2, rPC
+ bl nterp_get_method
+ // For j.l.Object interface calls, the high bit is set. Also the method index is 16bits.
+ cmp r0, #0
+ bge 1b
+ ubfx r0, r0, #0, #16
+ .if $range
+ b NterpHandleInvokeInterfaceOnObjectMethodRange
+ .else
+ b NterpHandleInvokeInterfaceOnObjectMethod
+ .endif
+
+%def op_invoke_interface():
+% invoke_interface(range="0")
+
+%def op_invoke_interface_range():
+% invoke_interface(range="1")
+
+%def invoke_static(helper=""):
+ EXPORT_PC
+ // Fast-path which gets the method from thread-local cache.
+ FETCH_FROM_THREAD_CACHE r0, 1f
+ b $helper
+1:
+ mov r0, rSELF
+ ldr r1, [sp]
+ mov r2, rPC
+ bl nterp_get_method
+ b $helper
+
+%def op_invoke_static():
+% invoke_static(helper="NterpCommonInvokeStatic")
+
+%def op_invoke_static_range():
+% invoke_static(helper="NterpCommonInvokeStaticRange")
+
+%def invoke_virtual(helper="", range=""):
+ EXPORT_PC
+ // Fast-path which gets the vtable offset from thread-local cache.
+ FETCH_FROM_THREAD_CACHE r2, 2f
+1:
+ FETCH r1, 2
+ .if !$range
+ and r1, r1, #0xf
+ .endif
+ GET_VREG r1, r1
+ // Note: if r1 is null, this will be handled by our SIGSEGV handler.
+ ldr r0, [r1, #MIRROR_OBJECT_CLASS_OFFSET]
+ add r0, r0, #MIRROR_CLASS_VTABLE_OFFSET_32
+ ldr r0, [r0, r2, uxtw #2]
+ b $helper
+2:
+ mov r0, rSELF
+ ldr r1, [sp]
+ mov r2, rPC
+ bl nterp_get_method
+ mov r2, r0
+ b 1b
+
+%def op_invoke_virtual():
+% invoke_virtual(helper="NterpCommonInvokeInstance", range="0")
+
+%def op_invoke_virtual_range():
+% invoke_virtual(helper="NterpCommonInvokeInstanceRange", range="1")
+
+%def invoke_virtual_quick(helper="", range=""):
+ EXPORT_PC
+ FETCH r2, 1 // offset
+ // First argument is the 'this' pointer.
+ FETCH r1, 2 // arguments
+ .if !$range
+ and r1, r1, #0xf
+ .endif
+ GET_VREG r1, r1
+ // Note: if r1 is null, this will be handled by our SIGSEGV handler.
+ ldr r0, [r1, #MIRROR_OBJECT_CLASS_OFFSET]
+ add r0, r0, #MIRROR_CLASS_VTABLE_OFFSET_32
+ ldr r0, [r0, r2, uxtw #2]
+ b $helper
+
+%def op_invoke_virtual_quick():
+% invoke_virtual_quick(helper="NterpCommonInvokeInstance", range="0")
+
+%def op_invoke_virtual_range_quick():
+% invoke_virtual_quick(helper="NterpCommonInvokeInstanceRange", range="1")
diff --git a/runtime/interpreter/mterp/armng/main.S b/runtime/interpreter/mterp/armng/main.S
new file mode 100644
index 0000000..0b14006
--- /dev/null
+++ b/runtime/interpreter/mterp/armng/main.S
@@ -0,0 +1,2333 @@
+%def header():
+/*
+ * Copyright (C) 2020 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * This is a #include, not a %include, because we want the C pre-processor
+ * to expand the macros into assembler assignment statements.
+ */
+#include "asm_support.h"
+#include "arch/arm/asm_support_arm.S"
+
+/**
+ * ARM EABI general notes:
+ *
+ * r0-r3 hold first 4 args to a method; they are not preserved across method calls
+ * r4-r8 are available for general use
+ * r9 is given special treatment in some situations, but not for us
+ * r10 (sl) seems to be generally available
+ * r11 (fp) is used by gcc (unless -fomit-frame-pointer is set)
+ * r12 (ip) is scratch -- not preserved across method calls
+ * r13 (sp) should be managed carefully in case a signal arrives
+ * r14 (lr) must be preserved
+ * r15 (pc) can be tinkered with directly
+ *
+ * r0 holds returns of <= 4 bytes
+ * r0-r1 hold returns of 8 bytes, low word in r0
+ *
+ * Callee must save/restore r4+ (except r12) if it modifies them. If VFP
+ * is present, registers s16-s31 (a/k/a d8-d15, a/k/a q4-q7) must be preserved,
+ * s0-s15 (d0-d7, q0-a3) do not need to be.
+ *
+ * Stack is "full descending". Only the arguments that don't fit in the first 4
+ * registers are placed on the stack. "sp" points at the first stacked argument
+ * (i.e. the 5th arg).
+ *
+ * Native ABI uses soft-float, single-precision results are in r0,
+ * double-precision results in r0-r1.
+ *
+ * In the EABI, "sp" must be 64-bit aligned on entry to a function, and any
+ * 64-bit quantities (long long, double) must be 64-bit aligned.
+ *
+ * Nterp notes:
+ *
+ * The following registers have fixed assignments:
+ *
+ * reg nick purpose
+ * r5 rFP interpreted frame pointer, used for accessing locals and args
+ * r6 rREFS base of object references of dex registers
+ * r7 rINST first 16-bit code unit of current instruction
+ * r8 rMR marking register
+ * r9 rSELF self (Thread) pointer
+ * r10 rIBASE interpreted instruction base pointer, used for computed goto
+ * r11 rPC interpreted program counter, used for fetching instructions
+ *
+ * r4, ip, and lr can be used as temporary
+ *
+ * Note that r4 is a callee-save register in ARM EABI, but not in managed code.
+ *
+ */
+
+/* single-purpose registers, given names for clarity */
+#define CFI_DEX 11 // DWARF register number of the register holding dex-pc (rPC).
+#define CFI_TMP 0 // DWARF register number of the first argument register (r0).
+#define CFI_REFS 6
+#define rFP r5
+#define rREFS r6
+#define rINST r7
+#define rSELF r9
+#define rIBASE r10
+#define rPC r11
+
+// To avoid putting ifdefs arond the use of rMR, make sure it's defined.
+// IsNterpSupported returns false for configurations that don't have rMR (typically CMS).
+#ifndef rMR
+#define rMR r8
+#endif
+
+// Temporary registers while setting up a frame.
+#define rNEW_FP r8
+#define rNEW_REFS r10
+#define CFI_NEW_REFS 10
+
+#define CALLEE_SAVES_SIZE (9 * 4 + 16 * 4)
+
+// +4 for the ArtMethod of the caller.
+#define OFFSET_TO_FIRST_ARGUMENT_IN_STACK (CALLEE_SAVES_SIZE + 4)
+
+/*
+ * Fetch the next instruction from rPC into rINST. Does not advance rPC.
+ */
+.macro FETCH_INST
+ ldrh rINST, [rPC]
+.endm
+
+/*
+ * Fetch the next instruction from the specified offset. Advances rPC
+ * to point to the next instruction. "count" is in 16-bit code units.
+ *
+ * Because of the limited size of immediate constants on ARM, this is only
+ * suitable for small forward movements (i.e. don't try to implement "goto"
+ * with this).
+ *
+ * This must come AFTER anything that can throw an exception, or the
+ * exception catch may miss. (This also implies that it must come after
+ * EXPORT_PC.)
+ */
+.macro FETCH_ADVANCE_INST count
+ ldrh rINST, [rPC, #((\count)*2)]!
+.endm
+
+/*
+ * Similar to FETCH_ADVANCE_INST, but does not update xPC. Used to load
+ * rINST ahead of possible exception point. Be sure to manually advance xPC
+ * later.
+ */
+.macro PREFETCH_INST count
+ ldrh rINST, [rPC, #((\count)*2)]
+.endm
+
+/* Advance xPC by some number of code units. */
+.macro ADVANCE count
+ add rPC, #((\count)*2)
+.endm
+
+/*
+ * Fetch the next instruction from an offset specified by "reg" and advance xPC.
+ * xPC to point to the next instruction. "reg" must specify the distance
+ * in bytes, *not* 16-bit code units, and may be a signed value.
+ */
+.macro FETCH_ADVANCE_INST_RB reg
+ ldrh rINST, [rPC, \reg]!
+.endm
+
+/*
+ * Fetch a half-word code unit from an offset past the current PC. The
+ * "count" value is in 16-bit code units. Does not advance xPC.
+ *
+ * The "_S" variant works the same but treats the value as signed.
+ */
+.macro FETCH reg, count
+ ldrh \reg, [rPC, #((\count)*2)]
+.endm
+
+.macro FETCH_S reg, count
+ ldrsh \reg, [rPC, #((\count)*2)]
+.endm
+
+/*
+ * Fetch one byte from an offset past the current PC. Pass in the same
+ * "count" as you would for FETCH, and an additional 0/1 indicating which
+ * byte of the halfword you want (lo/hi).
+ */
+.macro FETCH_B reg, count, byte
+ ldrb \reg, [rPC, #((\count)*2+(\byte))]
+.endm
+
+/*
+ * Put the instruction's opcode field into the specified register.
+ */
+.macro GET_INST_OPCODE reg
+ and \reg, rINST, #255
+.endm
+
+/*
+ * Begin executing the opcode in _reg. Clobbers reg
+ */
+
+.macro GOTO_OPCODE reg
+ add pc, rIBASE, \reg, lsl #${handler_size_bits}
+.endm
+
+/*
+ * Get/set value from a Dalvik register.
+ */
+.macro GET_VREG reg, vreg
+ ldr \reg, [rFP, \vreg, lsl #2]
+.endm
+.macro GET_VREG_OBJECT reg, vreg
+ ldr \reg, [rREFS, \vreg, lsl #2]
+.endm
+.macro SET_VREG reg, vreg
+ str \reg, [rFP, \vreg, lsl #2]
+ mov \reg, #0
+ str \reg, [rREFS, \vreg, lsl #2]
+.endm
+.macro SET_VREG_OBJECT reg, vreg
+ str \reg, [rFP, \vreg, lsl #2]
+ str \reg, [rREFS, \vreg, lsl #2]
+.endm
+.macro SET_VREG_FLOAT reg, vreg, tmpreg
+ add \tmpreg, rFP, \vreg, lsl #2
+ vstr \reg, [\tmpreg]
+ mov \tmpreg, #0
+ str \tmpreg, [rREFS, \vreg, lsl #2]
+.endm
+.macro GET_VREG_WIDE_BY_ADDR reg0, reg1, addr
+ ldmia \addr, {\reg0, \reg1}
+.endm
+.macro SET_VREG_WIDE_BY_ADDR reg0, reg1, addr
+ stmia \addr, {\reg0, \reg1}
+.endm
+.macro GET_VREG_FLOAT sreg, vreg
+ ldr \vreg, [rFP, \vreg, lsl #2]
+ vmov \sreg, \vreg
+.endm
+.macro GET_VREG_FLOAT_BY_ADDR reg, addr
+ vldr \reg, [\addr]
+.endm
+.macro SET_VREG_FLOAT_BY_ADDR reg, addr
+ vstr \reg, [\addr]
+.endm
+.macro GET_VREG_DOUBLE_BY_ADDR reg, addr
+ vldr \reg, [\addr]
+.endm
+.macro SET_VREG_DOUBLE_BY_ADDR reg, addr
+ vstr \reg, [\addr]
+.endm
+.macro SET_VREG_SHADOW reg, vreg
+ str \reg, [rREFS, \vreg, lsl #2]
+.endm
+.macro CLEAR_SHADOW_PAIR vreg, tmp1, tmp2
+ mov \tmp1, #0
+ add \tmp2, \vreg, #1
+ SET_VREG_SHADOW \tmp1, \vreg
+ SET_VREG_SHADOW \tmp1, \tmp2
+.endm
+.macro VREG_INDEX_TO_ADDR reg, vreg
+ add \reg, rFP, \vreg, lsl #2
+.endm
+
+// An assembly entry that has a OatQuickMethodHeader prefix.
+.macro OAT_ENTRY name, end
+ .arm
+ .type \name, #function
+ .hidden \name
+ .global \name
+ .balign 16
+ // Padding of 8 bytes to get 16 bytes alignment of code entry.
+ .long 0
+ .long 0
+ // OatQuickMethodHeader.
+ .long 0
+ .long (\end - \name)
+\name:
+.endm
+
+.macro SIZE name
+ .size \name, .-\name
+.endm
+
+.macro NAME_START name
+ .arm
+ .type \name, #function
+ .hidden \name // Hide this as a global symbol, so we do not incur plt calls.
+ .global \name
+ /* Cache alignment for function entry */
+ .balign 16
+\name:
+.endm
+
+.macro NAME_END name
+ SIZE \name
+.endm
+
+// Macro for defining entrypoints into runtime. We don't need to save registers
+// (we're not holding references there), but there is no
+// kDontSave runtime method. So just use the kSaveRefsOnly runtime method.
+.macro NTERP_TRAMPOLINE name, helper
+ENTRY \name
+ SETUP_SAVE_REFS_ONLY_FRAME ip
+ bl \helper
+ RESTORE_SAVE_REFS_ONLY_FRAME
+ REFRESH_MARKING_REGISTER
+ RETURN_OR_DELIVER_PENDING_EXCEPTION
+END \name
+.endm
+
+.macro CLEAR_STATIC_VOLATILE_MARKER reg
+ and \reg, \reg, #-2
+.endm
+
+.macro CLEAR_INSTANCE_VOLATILE_MARKER reg
+ rsb \reg, \reg, #0
+.endm
+
+.macro EXPORT_PC
+ str rPC, [rREFS, #-8]
+.endm
+
+.macro BRANCH
+ // Update method counter and do a suspend check if the branch is negative.
+ cmp rINST, #0
+ blt 2f
+1:
+ add r2, rINST, rINST // r2<- byte offset
+ FETCH_ADVANCE_INST_RB r2 // update xPC, load rINST
+ GET_INST_OPCODE ip // extract opcode from rINST
+ GOTO_OPCODE ip // jump to next instruction
+2:
+ ldr r0, [sp]
+ ldrh r2, [r0, #ART_METHOD_HOTNESS_COUNT_OFFSET]
+ add r2, r2, #1
+ ubfx r2, r2, #0, #NTERP_HOTNESS_BITS
+ strh r2, [r0, #ART_METHOD_HOTNESS_COUNT_OFFSET]
+ // If the counter overflows, handle this in the runtime.
+ cmp r2, #0
+ beq NterpHandleHotnessOverflow
+ // Otherwise, do a suspend check.
+ ldr r0, [rSELF, #THREAD_FLAGS_OFFSET]
+ ands r0, r0, #THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
+ beq 1b
+ EXPORT_PC
+ bl art_quick_test_suspend
+ b 1b
+.endm
+
+// Expects:
+// - ip and lr to be available.
+// Outputs:
+// - \registers contains the dex registers size
+// - \outs contains the outs size
+// - if load_ins is 1, \ins contains the ins
+// - \code_item is replaced with a pointer to the instructions
+.macro FETCH_CODE_ITEM_INFO code_item, registers, outs, ins, load_ins
+ tst \code_item, #1
+ beq 5f
+ bic \code_item, \code_item, #1 // Remove the extra bit that marks it's a compact dex file
+ ldrh lr, [\code_item, #COMPACT_CODE_ITEM_FIELDS_OFFSET]
+ ubfx \registers, lr, #COMPACT_CODE_ITEM_REGISTERS_SIZE_SHIFT, #4
+ ubfx \outs, lr, #COMPACT_CODE_ITEM_OUTS_SIZE_SHIFT, #4
+ .if \load_ins
+ ubfx \ins, lr, #COMPACT_CODE_ITEM_INS_SIZE_SHIFT, #4
+ .else
+ ubfx ip, lr, #COMPACT_CODE_ITEM_INS_SIZE_SHIFT, #4
+ add \registers, \registers, ip
+ .endif
+
+ ldrh lr, [\code_item, #COMPACT_CODE_ITEM_FLAGS_OFFSET]
+ tst lr, #COMPACT_CODE_ITEM_REGISTERS_INS_OUTS_FLAGS
+ beq 4f
+ mov ip, \code_item
+ tst lr, #COMPACT_CODE_ITEM_INSNS_FLAG
+ beq 1f
+ sub ip, ip, #4
+1:
+ tst lr, #COMPACT_CODE_ITEM_REGISTERS_FLAG
+ beq 2f
+ ldrh lr, [ip, #-2]!
+ add \registers, \registers, lr
+ ldrh lr, [\code_item, #COMPACT_CODE_ITEM_FLAGS_OFFSET]
+2:
+ tst lr, #COMPACT_CODE_ITEM_INS_FLAG
+ beq 3f
+ ldrh lr, [ip, #-2]!
+ .if \load_ins
+ add \ins, \ins, lr
+ .else
+ add \registers, \registers, lr
+ .endif
+ ldrh lr, [\code_item, #COMPACT_CODE_ITEM_FLAGS_OFFSET]
+3:
+ tst lr, #COMPACT_CODE_ITEM_OUTS_FLAG
+ beq 4f
+ ldrh lr, [ip, #-2]!
+ add \outs, \outs, lr
+4:
+ .if \load_ins
+ add \registers, \registers, \ins
+ .endif
+ add \code_item, \code_item, #COMPACT_CODE_ITEM_INSNS_OFFSET
+ b 6f
+5:
+ // Fetch dex register size.
+ ldrh \registers, [\code_item, #CODE_ITEM_REGISTERS_SIZE_OFFSET]
+ // Fetch outs size.
+ ldrh \outs, [\code_item, #CODE_ITEM_OUTS_SIZE_OFFSET]
+ .if \load_ins
+ ldrh \ins, [\code_item, #CODE_ITEM_INS_SIZE_OFFSET]
+ .endif
+ add \code_item, \code_item, #CODE_ITEM_INSNS_OFFSET
+6:
+.endm
+
+// Setup the stack to start executing the method. Expects:
+// - r0 to contain the ArtMethod
+// - \code_item to already contain the code item
+// - rINST, ip, lr to be available
+//
+// Outputs
+// - rINST contains the dex registers size
+// - ip contains the old stack pointer.
+// - \code_item is replaced with a pointer to the instructions
+// - if load_ins is 1, r4 contains the ins
+//
+.macro SETUP_STACK_FRAME code_item, refs, fp, cfi_refs, load_ins
+ FETCH_CODE_ITEM_INFO \code_item, rINST, \refs, r4, \load_ins
+
+ // Compute required frame size: ((2 * rINST) + \refs) * 4 + 12
+ // 12 is for saving the previous frame, pc, and method being executed.
+ add ip, \refs, rINST, lsl #1
+
+ // Compute new stack pointer in lr
+ sub lr, sp, #12
+ sub lr, lr, ip, lsl #2
+ // Alignment
+ and lr, lr, #-16
+
+ // Set reference and dex registers.
+ add \refs, lr, \refs, lsl #2
+ add \refs, \refs, #12
+ add \fp, \refs, rINST, lsl #2
+
+ // Now setup the stack pointer.
+ mov ip, sp
+ .cfi_def_cfa_register ip
+ mov sp, lr
+ str ip, [\refs, #-4]
+ CFI_DEF_CFA_BREG_PLUS_UCONST \cfi_refs, -4, CALLEE_SAVES_SIZE
+
+ // Save the ArtMethod, and use r0 as a temporary.
+ str r0, [sp]
+
+ // Put nulls in reference frame.
+ cmp rINST, #0
+ beq 2f
+ mov lr, \refs
+ mov r0, #0
+1:
+ str r0, [lr], #4
+ str r0, [lr], #4 // May clear vreg[0].
+ cmp lr, \fp
+ blo 1b
+2:
+ ldr r0, [sp] // Reload the ArtMethod, expected by the callers.
+.endm
+
+// Increase method hotness and do suspend check before starting executing the method.
+.macro START_EXECUTING_INSTRUCTIONS
+ ldr r0, [sp]
+ ldrh r2, [r0, #ART_METHOD_HOTNESS_COUNT_OFFSET]
+ add r2, r2, #1
+ ubfx r2, r2, #0, #NTERP_HOTNESS_BITS
+ strh r2, [r0, #ART_METHOD_HOTNESS_COUNT_OFFSET]
+ // If the counter overflows, handle this in the runtime.
+ cmp r2, #0
+ beq 2f
+ ldr r0, [rSELF, #THREAD_FLAGS_OFFSET]
+ tst r0, #THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
+ bne 3f
+1:
+ FETCH_INST
+ GET_INST_OPCODE ip
+ GOTO_OPCODE ip
+2:
+ mov r1, #0
+ mov r2, rFP
+ bl nterp_hot_method
+ b 1b
+3:
+ EXPORT_PC
+ bl art_quick_test_suspend
+ b 1b
+.endm
+
+.macro SPILL_ALL_CALLEE_SAVES
+ SPILL_ALL_CALLEE_SAVE_GPRS @ 9 words (36 bytes) of callee saves.
+ vpush {s16-s31} @ 16 words (64 bytes) of floats.
+ .cfi_adjust_cfa_offset 64
+.endm
+
+.macro RESTORE_ALL_CALLEE_SAVES lr_to_pc=0
+ vpop {s16-s31}
+ .cfi_adjust_cfa_offset -64
+ pop {r4-r7}
+ .cfi_adjust_cfa_offset -16
+ .cfi_restore r4
+ .cfi_restore r5
+ .cfi_restore r6
+ .cfi_restore r7
+ // Don't restore r8, the marking register gets updated when coming back from runtime.
+ add sp, sp, #4
+ .cfi_adjust_cfa_offset -4
+ .if \lr_to_pc
+ pop {r9-r11, pc} @ 9 words of callee saves and args.
+ .else
+ pop {r9-r11, lr} @ 9 words of callee saves and args.
+ .cfi_adjust_cfa_offset -16
+ .cfi_restore r9
+ .cfi_restore r10
+ .cfi_restore r11
+ .cfi_restore lr
+ .endif
+.endm
+
+.macro SPILL_ALL_ARGUMENTS
+ // We spill r4 for stack alignment.
+ push {r0-r4}
+ .cfi_adjust_cfa_offset 20
+ .cfi_rel_offset r0, 0
+ .cfi_rel_offset r1, 4
+ .cfi_rel_offset r2, 8
+ .cfi_rel_offset r3, 12
+ .cfi_rel_offset r4, 16
+ vpush {s0-s15}
+ .cfi_adjust_cfa_offset 64
+.endm
+
+.macro RESTORE_ALL_ARGUMENTS
+ vpop {s0-s15}
+ .cfi_adjust_cfa_offset -64
+ pop {r0-r4}
+ .cfi_restore r0
+ .cfi_restore r1
+ .cfi_restore r2
+ .cfi_restore r3
+ .cfi_restore r4
+ .cfi_adjust_cfa_offset -20
+.endm
+
+// Helper to setup the stack after doing a nterp to nterp call. This will setup:
+// - rNEW_FP: the new pointer to dex registers
+// - rNEW_REFS: the new pointer to references
+// - rPC: the new PC pointer to execute
+// - r2: value in instruction to decode the number of arguments.
+// - r3: first dex register for range invokes, up to 4 arguments for non-range invokes.
+// - r4: top of dex register array
+//
+// The method expects:
+// - r0 to contain the ArtMethod
+// - r4 to contain the code item
+.macro SETUP_STACK_FOR_INVOKE
+ // We do the same stack overflow check as the compiler. See CanMethodUseNterp
+ // in how we limit the maximum nterp frame size.
+ sub ip, sp, #STACK_OVERFLOW_RESERVED_BYTES
+ ldr ip, [ip]
+
+ // Spill all callee saves to have a consistent stack frame whether we
+ // are called by compiled code or nterp.
+ SPILL_ALL_CALLEE_SAVES
+
+ // Setup the frame.
+ SETUP_STACK_FRAME r4, rNEW_REFS, rNEW_FP, CFI_NEW_REFS, load_ins=0
+
+ // Fetch instruction information before replacing rPC.
+ FETCH_B r2, 0, 1
+ FETCH r3, 2
+
+ // Set the dex pc pointer.
+ mov rPC, r4
+
+ // Make r4 point to the top of the dex register array.
+ add r4, rNEW_FP, rINST, lsl #2
+
+ CFI_DEFINE_DEX_PC_WITH_OFFSET(CFI_TMP, CFI_DEX, 0)
+.endm
+
+// Setup arguments based on a non-range nterp to nterp call, and start executing
+// the method. We expect:
+// - rNEW_FP: the new pointer to dex registers
+// - rPC: the new PC pointer to execute
+// - r2: number of arguments (bits 4-7), 5th argument if any (bits 0-3)
+// - r3: up to four dex register arguments
+// - r4: top of dex register array
+// - r1: receiver if non-static.
+//
+// Uses r0 and rINST as temporaries.
+.macro SETUP_NON_RANGE_ARGUMENTS_AND_EXECUTE is_static=0, is_string_init=0
+ // /* op vA, vB, {vC...vG} */
+ .if \is_static
+ asrs r0, r2, #4
+ beq 6f
+ .else
+ asr r0, r2, #4
+ .endif
+ mov rINST, #-4
+ cmp r0, #2
+ blt 1f
+ beq 2f
+ cmp r0, #4
+ blt 3f
+ beq 4f
+
+ // We use a decrementing rINST to store references relative
+ // to rNEW_FP and dex registers relative to r4
+ //
+ // TODO: We could set up rINST as the number of registers (this can be an additional output from
+ // SETUP_STACK_FOR_INVOKE) and then just decrement it by one before copying each arg.
+ // Maybe even introduce macros NEW_VREG_ADDRESS/NEW_VREG_REF_ADDRESS.
+5:
+ and r2, r2, #15
+ GET_VREG_OBJECT r0, r2
+ str r0, [rNEW_FP, rINST]
+ GET_VREG r0, r2
+ str r0, [r4, rINST]
+ sub rINST, rINST, #4
+4:
+ asr r2, r3, #12
+ GET_VREG_OBJECT r0, r2
+ str r0, [rNEW_FP, rINST]
+ GET_VREG r0, r2
+ str r0, [r4, rINST]
+ sub rINST, rINST, #4
+3:
+ ubfx r2, r3, #8, #4
+ GET_VREG_OBJECT r0, r2
+ str r0, [rNEW_FP, rINST]
+ GET_VREG r0, r2
+ str r0, [r4, rINST]
+ sub rINST, rINST, #4
+2:
+ ubfx r2, r3, #4, #4
+ GET_VREG_OBJECT r0, r2
+ str r0, [rNEW_FP, rINST]
+ GET_VREG r0, r2
+ str r0, [r4, rINST]
+ .if !\is_string_init
+ sub rINST, rINST, #4
+ .endif
+1:
+ .if \is_string_init
+ // Ignore the first argument
+ .elseif \is_static
+ and r2, r3, #0xf
+ GET_VREG_OBJECT r0, r2
+ str r0, [rNEW_FP, rINST]
+ GET_VREG r0, r2
+ str r0, [r4, rINST]
+ .else
+ str r1, [rNEW_FP, rINST]
+ str r1, [r4, rINST]
+ .endif
+
+6:
+ // Start executing the method.
+ mov rFP, rNEW_FP
+ mov rREFS, rNEW_REFS
+ CFI_DEF_CFA_BREG_PLUS_UCONST CFI_REFS, -4, CALLEE_SAVES_SIZE
+ // r8 was used for setting up the frame, restore it now.
+ REFRESH_MARKING_REGISTER
+ // Branch to the main handler, which will reload rIBASE,
+ // that was used for setting up the frame.
+ b .Lexecute_instructions
+.endm
+
+// Setup arguments based on a range nterp to nterp call, and start executing
+// the method.
+// - rNEW_FP: the new pointer to dex registers
+// - rNEW_REFS: the new pointer to references
+// - rPC: the new PC pointer to execute
+// - r2: number of arguments
+// - r3: first dex register
+// - r4: top of dex register array
+// - r1: receiver if non-static.
+//
+// Expects r0 to be available.
+.macro SETUP_RANGE_ARGUMENTS_AND_EXECUTE is_static=0, is_string_init=0
+ mov r0, #-4
+ .if \is_string_init
+ // Ignore the first argument
+ sub r2, r2, #1
+ add r3, r3, #1
+ .elseif !\is_static
+ sub r2, r2, #1
+ add r3, r3, #1
+ .endif
+
+ cmp r2, #0
+ beq 2f
+ add rREFS, rREFS, r3, lsl #2 // pointer to first argument in reference array
+ add rREFS, rREFS, r2, lsl #2 // pointer to last argument in reference array
+ add rFP, rFP, r3, lsl #2 // pointer to first argument in register array
+ add rFP, rFP, r2, lsl #2 // pointer to last argument in register array
+1:
+ ldr r3, [rREFS, #-4]!
+ str r3, [rNEW_FP, r0]
+ subs r2, r2, 1
+ ldr r3, [rFP, #-4]!
+ str r3, [r4, r0]
+ sub r0, r0, 4
+ bne 1b
+2:
+ .if \is_string_init
+ // Ignore first argument
+ .elseif !\is_static
+ str r1, [rNEW_FP, r0]
+ str r1, [r4, r0]
+ .endif
+ mov rFP, rNEW_FP
+ mov rREFS, rNEW_REFS
+ CFI_DEF_CFA_BREG_PLUS_UCONST CFI_REFS, -4, CALLEE_SAVES_SIZE
+ // r8 was used for setting up the frame, restore it now.
+ REFRESH_MARKING_REGISTER
+ // Branch to the main handler, which will reload rIBASE,
+ // that was used for setting up the frame.
+ b .Lexecute_instructions
+.endm
+
+.macro GET_SHORTY dest, is_interface, is_polymorphic, is_custom
+ push {r0-r3}
+ .if \is_polymorphic
+ ldr r0, [sp, #16]
+ mov r1, rPC
+ bl NterpGetShortyFromInvokePolymorphic
+ .elseif \is_custom
+ ldr r0, [sp, #16]
+ mov r1, rPC
+ bl NterpGetShortyFromInvokeCustom
+ .elseif \is_interface
+ ldr r0, [sp, #16]
+ FETCH r1, 1
+ bl NterpGetShortyFromMethodId
+ .else
+ bl NterpGetShorty
+ .endif
+ mov \dest, r0
+ pop {r0-r3}
+.endm
+
+// Input: r0 contains the ArtMethod
+// Output: r4 contains the code item
+.macro GET_CODE_ITEM
+ ldr r4, [r0, #ART_METHOD_DATA_OFFSET_32]
+.endm
+
+.macro DO_ENTRY_POINT_CHECK call_compiled_code, name
+ // On entry, the method is r0, the instance is r1
+ ldr r2, .Lfetch_nterp_\name
+.Lfetch_location_\name:
+ // Note that this won't work for thumb.
+ sub r2, pc, r2
+ ldr r3, [r0, #ART_METHOD_QUICK_CODE_OFFSET_32]
+ cmp r2, r3
+ bne \call_compiled_code
+.endm
+
+// Expects ip and lr to be available.
+.macro UPDATE_REGISTERS_FOR_STRING_INIT old_value, new_value
+ mov ip, #0
+1:
+ GET_VREG_OBJECT lr, ip
+ cmp lr, \old_value
+ bne 2f
+ SET_VREG_OBJECT \new_value, ip
+2:
+ add ip, ip, #1
+ add lr, rREFS, ip, lsl #2
+ cmp lr, rFP
+ bne 1b
+.endm
+
+// Puts the next floating point argument into the expected register,
+// fetching values based on a non-range invoke.
+// Uses ip and lr.
+.macro LOOP_OVER_SHORTY_LOADING_FPS dreg, sreg, inst, shorty, arg_index, finished, if_double
+1: // LOOP
+ ldrb ip, [\shorty], #1 // Load next character in shorty, and increment.
+ cmp ip, #0
+ beq \finished // if (ip == '\0') goto finished
+ cmp ip, #68 // if (ip == 'D') goto FOUND_DOUBLE
+ beq 2f
+ cmp ip, #70 // if (ip == 'F') goto FOUND_FLOAT
+ beq 3f
+ lsr \inst, \inst, #4
+ add \arg_index, \arg_index, #1
+ // Handle extra argument in arg array taken by a long.
+ cmp ip, #74 // if (ip != 'J') goto LOOP
+ bne 1b
+ lsr \inst, \inst, #4
+ add \arg_index, \arg_index, #1
+ b 1b // goto LOOP
+2: // FOUND_DOUBLE
+ and ip, \inst, #0xf
+ GET_VREG ip, ip
+ lsr \inst, \inst, #4
+ add \arg_index, \arg_index, #1
+ cmp \arg_index, #4
+ beq 5f
+ and lr, \inst, #0xf
+ lsr \inst, \inst, #4
+ add \arg_index, \arg_index, #1
+ b 6f
+5:
+ FETCH_B lr, 0, 1
+ and lr, lr, #0xf
+6:
+ GET_VREG lr, lr
+ vmov \dreg, ip, lr
+ b \if_double
+3: // FOUND_FLOAT
+ cmp \arg_index, #4
+ beq 7f
+ and ip, \inst, #0xf
+ lsr \inst, \inst, #4
+ add \arg_index, \arg_index, #1
+ b 8f
+7:
+ FETCH_B ip, 0, 1
+ and ip, ip, #0xf
+8:
+ GET_VREG_FLOAT \sreg, ip
+.endm
+
+// Puts the next int/long/object argument in the expected register,
+// fetching values based on a non-range invoke.
+// Uses ip.
+.macro LOOP_OVER_SHORTY_LOADING_GPRS gpr_reg, inst, shorty, arg_index, finished, if_long, is_r3
+1: // LOOP
+ ldrb ip, [\shorty], #1 // Load next character in shorty, and increment.
+ cmp ip, #0
+ beq \finished // if (ip == '\0') goto finished
+ cmp ip, #74 // if (ip == 'J') goto FOUND_LONG
+ beq 2f
+ cmp ip, #70 // if (ip == 'F') goto SKIP_FLOAT
+ beq 3f
+ cmp ip, #68 // if (ip == 'D') goto SKIP_DOUBLE
+ beq 4f
+ cmp \arg_index, #4
+ beq 7f
+ and ip, \inst, #0xf
+ lsr \inst, \inst, #4
+ add \arg_index, \arg_index, #1
+ b 8f
+7:
+ FETCH_B ip, 0, 1
+ and ip, ip, #0xf
+8:
+ GET_VREG \gpr_reg, ip
+ b 5f
+2: // FOUND_LONG
+ .if \is_r3
+ // Put back shorty and exit
+ sub \shorty, \shorty, #1
+ b 5f
+ .endif
+ and ip, \inst, #0xf
+ GET_VREG ip, ip
+ // The only one possible for non-range long is r2-r3
+ mov r2, ip
+ lsr \inst, \inst, #4
+ add \arg_index, \arg_index, #1
+ cmp \arg_index, #4
+ beq 9f
+ and ip, \inst, #0xf
+ lsr \inst, \inst, #4
+ b 10f
+9:
+ FETCH_B ip, 0, 1
+ and ip, ip, #0xf
+10:
+ GET_VREG ip, ip
+ // The only one possible for non-range long is r2-r3
+ mov r3, ip
+ add \arg_index, \arg_index, #1
+ b \if_long
+3: // SKIP_FLOAT
+ lsr \inst, \inst, #4
+ add \arg_index, \arg_index, #1
+ b 1b
+4: // SKIP_DOUBLE
+ lsr \inst, \inst, #8
+ add \arg_index, \arg_index, #2
+ b 1b
+5:
+.endm
+
+// Puts the next int/long/object argument in the expected stack slot,
+// fetching values based on a non-range invoke.
+// Uses ip as temporary.
+.macro LOOP_OVER_SHORTY_LOADING_INTs shorty, inst, arg_index, finished, is_string_init
+1: // LOOP
+ ldrb ip, [\shorty], #1 // Load next character in shorty, and increment.
+ cmp ip, #0
+ beq \finished // if (ip == '\0') goto finished
+ cmp ip, #74 // if (ip == 'J') goto FOUND_LONG
+ beq 2f
+ cmp ip, #70 // if (ip == 'F') goto SKIP_FLOAT
+ beq 3f
+ cmp ip, #68 // if (ip == 'D') goto SKIP_DOUBLE
+ beq 4f
+ .if \is_string_init
+ cmp \arg_index, #4
+ .else
+ cmp \arg_index, #(4+1) // +1 for ArtMethod
+ .endif
+ beq 7f
+ and ip, \inst, #0xf
+ lsr \inst, \inst, #4
+ b 8f
+7:
+ FETCH_B ip, 0, 1
+ and ip, ip, #0xf
+8:
+ GET_VREG ip, ip
+ str ip, [sp, \arg_index, lsl #2]
+ add \arg_index, \arg_index, #1
+ b 1b
+2: // FOUND_LONG
+ and ip, \inst, #0xf
+ GET_VREG ip, ip
+ str ip, [sp, \arg_index, lsl #2]
+ lsr \inst, \inst, #4
+ add \arg_index, \arg_index, #1
+ .if \is_string_init
+ cmp \arg_index, #4
+ .else
+ cmp \arg_index, #(4+1) // +1 for ArtMethod
+ .endif
+ beq 9f
+ and ip, \inst, #0xf
+ lsr \inst, \inst, #4
+ b 10f
+9:
+ FETCH_B ip, 0, 1
+ and ip, ip, #0xf
+10:
+ GET_VREG ip, ip
+ str ip, [sp, \arg_index, lsl #2]
+ add \arg_index, \arg_index, #1
+ b 1b
+3: // SKIP_FLOAT
+ lsr \inst, \inst, #4
+ add \arg_index, \arg_index, #1
+ b 1b
+4: // SKIP_DOUBLE
+ lsr \inst, \inst, #8
+ add \arg_index, \arg_index, #2
+ b 1b
+.endm
+
+.macro COMMON_INVOKE_NON_RANGE is_static=0, is_interface=0, suffix="", is_string_init=0, is_polymorphic=0, is_custom=0
+ .if \is_polymorphic
+ // We always go to compiled code for polymorphic calls.
+ .elseif \is_custom
+ // We always go to compiled code for custom calls.
+ .else
+ DO_ENTRY_POINT_CHECK .Lcall_compiled_code_\suffix, \suffix
+ GET_CODE_ITEM
+ .if \is_string_init
+ bl nterp_to_nterp_string_init_non_range
+ .elseif \is_static
+ bl nterp_to_nterp_static_non_range
+ .else
+ bl nterp_to_nterp_instance_non_range
+ .endif
+ b .Ldone_return_\suffix
+.Lfetch_nterp_\suffix:
+ .word (.Lfetch_location_\suffix+8) - ExecuteNterpImpl
+ .endif
+
+.Lcall_compiled_code_\suffix:
+ GET_SHORTY rINST, \is_interface, \is_polymorphic, \is_custom
+ // From this point:
+ // - rINST contains shorty (in callee-save to switch over return value after call).
+ // - r0 contains method
+ // - r1 contains 'this' pointer for instance method.
+ // We need three registers.
+ add r3, rINST, #1 // shorty + 1 ; ie skip return arg character
+ FETCH r2, 2 // arguments
+ .if \is_string_init
+ lsr r2, r2, #4
+ mov r4, #1 // ignore first argument
+ .elseif \is_static
+ mov r4, #0 // arg_index
+ .else
+ lsr r2, r2, #4
+ mov r4, #1 // ignore first argument
+ .endif
+ LOOP_OVER_SHORTY_LOADING_FPS d0, s0, r2, r3, r4, .Lxmm_setup_finished_\suffix, .Ld1_s2_\suffix
+.Ld1_s1_\suffix:
+ LOOP_OVER_SHORTY_LOADING_FPS d1, s1, r2, r3, r4, .Lxmm_setup_finished_\suffix, .Ld2_s1_\suffix
+.Ld1_s2_\suffix:
+ LOOP_OVER_SHORTY_LOADING_FPS d1, s2, r2, r3, r4, .Lxmm_setup_finished_\suffix, .Ls4_\suffix
+.Ld2_s3_\suffix:
+ LOOP_OVER_SHORTY_LOADING_FPS d2, s3, r2, r3, r4, .Lxmm_setup_finished_\suffix, .Lxmm_setup_finished_\suffix
+ b .Ls4_\suffix
+.Ld2_s1_\suffix:
+ LOOP_OVER_SHORTY_LOADING_FPS d2, s1, r2, r3, r4, .Lxmm_setup_finished_\suffix, .Lxmm_setup_finished_\suffix
+.Ls4_\suffix:
+ // If we arrive here, we can only have a float.
+ LOOP_OVER_SHORTY_LOADING_FPS d2, s4, r2, r3, r4, .Lxmm_setup_finished_\suffix, .Lxmm_setup_finished_\suffix
+.Lxmm_setup_finished_\suffix:
+ add r4, rINST, #1 // shorty + 1 ; ie skip return arg character
+ FETCH r8, 2 // arguments
+ .if \is_string_init
+ lsr r8, r8, #4
+ mov lr, #1 // ignore first argument
+ LOOP_OVER_SHORTY_LOADING_GPRS r1, r8, r4, lr, .Lgpr_setup_finished_\suffix, .Lif_long_\suffix, is_r3=0
+ .elseif \is_static
+ mov lr, #0 // arg_index
+ LOOP_OVER_SHORTY_LOADING_GPRS r1, r8, r4, lr, .Lgpr_setup_finished_\suffix, .Lif_long_\suffix, is_r3=0
+ .else
+ lsr r8, r8, #4
+ mov lr, #1 // ignore first argument
+ .endif
+ LOOP_OVER_SHORTY_LOADING_GPRS r2, r8, r4, lr, .Lgpr_setup_finished_\suffix, .Lif_long_\suffix, is_r3=0
+ LOOP_OVER_SHORTY_LOADING_GPRS r3, r8, r4, lr, .Lgpr_setup_finished_\suffix, .Lif_long_\suffix, is_r3=1
+.Lif_long_\suffix:
+ // Store in the outs array (stored above the ArtMethod in the stack). We only do this for non-string-init
+ // calls as the index is already adjusted above.
+ .if !\is_string_init
+ add lr, lr, #1
+ .endif
+ LOOP_OVER_SHORTY_LOADING_INTs r4, r8, lr, .Lgpr_setup_finished_\suffix, \is_string_init
+.Lgpr_setup_finished_\suffix:
+ REFRESH_MARKING_REGISTER // r8 was used when setting parameters, restore it.
+ .if \is_polymorphic
+ bl art_quick_invoke_polymorphic
+ .elseif \is_custom
+ bl art_quick_invoke_custom
+ .else
+ .if \is_interface
+ // Setup hidden argument. As we don't have access to the interface method,
+ // just pass the method from the IMT. If the method is the conflict trampoline,
+ // this will make the stub go to runtime, otherwise the hidden argument is unused.
+ mov ip, r0
+ .endif
+ ldr lr, [r0, #ART_METHOD_QUICK_CODE_OFFSET_32]
+ blx lr
+ .endif
+ ldrb ip, [rINST]
+ cmp ip, #68 // Test if result type char == 'D'.
+ beq .Lreturn_double_\suffix
+ cmp ip, #70
+ bne .Ldone_return_\suffix
+.Lreturn_float_\suffix:
+ vmov r0, s0
+ b .Ldone_return_\suffix
+.Lreturn_double_\suffix:
+ vmov r0, r1, d0
+.Ldone_return_\suffix:
+ /* resume execution of caller */
+ .if \is_string_init
+ FETCH ip, 2 // arguments
+ and ip, ip, #0xf
+ GET_VREG r1, ip
+ UPDATE_REGISTERS_FOR_STRING_INIT r1, r0
+ .endif
+
+ .if \is_polymorphic
+ FETCH_ADVANCE_INST 4
+ .else
+ FETCH_ADVANCE_INST 3
+ .endif
+ GET_INST_OPCODE ip
+ GOTO_OPCODE ip
+.endm
+
+// Puts the next int/long/object argument in the expected register,
+// fetching values based on a range invoke.
+// Uses ip as temporary.
+.macro LOOP_RANGE_OVER_SHORTY_LOADING_GPRS reg32, shorty, arg_index, stack_index, finished, if_long, is_r3
+1: // LOOP
+ ldrb ip, [\shorty], #1 // Load next character in shorty, and increment.
+ cmp ip, #0
+ beq \finished // if (ip == '\0') goto finished
+ cmp ip, #74 // if (ip == 'J') goto FOUND_LONG
+ beq 2f
+ cmp ip, #70 // if (ip == 'F') goto SKIP_FLOAT
+ beq 3f
+ cmp ip, #68 // if (ip == 'D') goto SKIP_DOUBLE
+ beq 4f
+ GET_VREG \reg32, \arg_index
+ add \arg_index, \arg_index, #1
+ add \stack_index, \stack_index, #1
+ b 5f
+2: // FOUND_LONG
+ .if \is_r3
+ // Put back shorty and jump to \if_long
+ sub \shorty, \shorty, #1
+ .else
+ GET_VREG r2, \arg_index
+ add \arg_index, \arg_index, #1
+ add \stack_index, \stack_index, #1
+ GET_VREG r3, \arg_index
+ add \arg_index, \arg_index, #1
+ add \stack_index, \stack_index, #1
+ .endif
+ b \if_long
+3: // SKIP_FLOAT
+ add \arg_index, \arg_index, #1
+ add \stack_index, \stack_index, #1
+ b 1b
+4: // SKIP_DOUBLE
+ add \arg_index, \arg_index, #2
+ add \stack_index, \stack_index, #2
+ b 1b
+5:
+.endm
+
+// Puts the next int/long/object argument in the expected stack slot,
+// fetching values based on a range invoke.
+// Uses ip as temporary.
+.macro LOOP_RANGE_OVER_INTs shorty, arg_index, stack_index, finished
+1: // LOOP
+ ldrb ip, [\shorty], #1 // Load next character in shorty, and increment.
+ cmp ip, #0
+ beq \finished // if (ip == '\0') goto finished
+ cmp ip, #74 // if (ip == 'J') goto FOUND_LONG
+ beq 2f
+ cmp ip, #70 // if (ip == 'F') goto SKIP_FLOAT
+ beq 3f
+ cmp ip, #68 // if (ip == 'D') goto SKIP_DOUBLE
+ beq 4f
+ GET_VREG ip, \arg_index
+ str ip, [sp, \stack_index, lsl #2]
+ add \arg_index, \arg_index, #1
+ add \stack_index, \stack_index, #1
+ b 1b
+2: // FOUND_LONG
+ GET_VREG ip, \arg_index
+ str ip, [sp, \stack_index, lsl #2]
+ add \arg_index, \arg_index, #1
+ add \stack_index, \stack_index, #1
+ GET_VREG ip, \arg_index
+ str ip, [sp, \stack_index, lsl #2]
+ add \arg_index, \arg_index, #1
+ add \stack_index, \stack_index, #1
+ b 1b
+3: // SKIP_FLOAT
+ add \arg_index, \arg_index, #1
+ add \stack_index, \stack_index, #1
+ b 1b
+4: // SKIP_DOUBLE
+ add \arg_index, \arg_index, #2
+ add \stack_index, \stack_index, #2
+ b 1b
+.endm
+
+.macro COMMON_INVOKE_RANGE is_static=0, is_interface=0, suffix="", is_string_init=0, is_polymorphic=0, is_custom=0
+ .if \is_polymorphic
+ // We always go to compiled code for polymorphic calls.
+ .elseif \is_custom
+ // We always go to compiled code for custom calls.
+ .else
+ DO_ENTRY_POINT_CHECK .Lcall_compiled_code_range_\suffix, range_\suffix
+ GET_CODE_ITEM
+ .if \is_string_init
+ bl nterp_to_nterp_string_init_range
+ .elseif \is_static
+ bl nterp_to_nterp_static_range
+ .else
+ bl nterp_to_nterp_instance_range
+ .endif
+ b .Ldone_return_range_\suffix
+.Lfetch_nterp_range_\suffix:
+ .word (.Lfetch_location_range_\suffix+8) - ExecuteNterpImpl
+ .endif
+
+.Lcall_compiled_code_range_\suffix:
+ GET_SHORTY rINST, \is_interface, \is_polymorphic, \is_custom
+ // From this point:
+ // - rINST contains shorty (in callee-save to switch over return value after call).
+ // - r0 contains method
+ // - r1 contains 'this' pointer for instance method.
+ //
+ // Save r0 and r1 before calling NterpSetupArm32Fprs.
+ push {r0, r1}
+ add r0, rINST, #1 // shorty + 1 ; ie skip return arg character
+ FETCH r1, 2 // arguments
+ .if \is_string_init
+ add r1, r1, #1 // arg start index
+ mov r2, #1 // index in stack
+ .elseif \is_static
+ mov r2, #0 // index in stack
+ .else
+ add r1, r1, #1 // arg start index
+ mov r2, #1 // index in stack
+ .endif
+ vpush {s0-s15}
+ mov r3, sp
+ // Pass the stack address for arguments, +16 for fprs, +2 for saved registers,
+ // +1 for ArtMethod.
+ add lr, sp, #((16 + 2 + 1) * 4)
+ push {rFP, lr}
+ bl NterpSetupArm32Fprs
+ add sp, sp, #8
+ vpop {s0-s15}
+ pop {r0, r1}
+.Lxmm_setup_finished_range_\suffix:
+ add r8, rINST, #1 // shorty + 1 ; ie skip return arg character
+ FETCH lr, 2 // arguments
+ .if \is_string_init
+ add lr, lr, #1 // arg start index
+ mov r4, #1 // index in stack
+ LOOP_RANGE_OVER_SHORTY_LOADING_GPRS r1, r8, lr, r4, .Lgpr_setup_finished_range_\suffix, .Lif_long_range_\suffix, is_r3=0
+ .elseif \is_static
+ mov r4, #0 // index in stack
+ LOOP_RANGE_OVER_SHORTY_LOADING_GPRS r1, r8, lr, r4, .Lgpr_setup_finished_range_\suffix, .Lif_long_range_\suffix, is_r3=0
+ .else
+ add lr, lr, #1 // arg start index
+ mov r4, #1 // index in stack
+ .endif
+ LOOP_RANGE_OVER_SHORTY_LOADING_GPRS r2, r8, lr, r4, .Lgpr_setup_finished_range_\suffix, .Lif_long_range_\suffix, is_r3=0
+ LOOP_RANGE_OVER_SHORTY_LOADING_GPRS r3, r8, lr, r4, .Lgpr_setup_finished_range_\suffix, .Lif_long_range_\suffix, is_r3=1
+.Lif_long_range_\suffix:
+ // Add 1 word for the ArtMethod stored before the outs.
+ add r4, r4, #1
+ LOOP_RANGE_OVER_INTs r8, lr, r4, .Lgpr_setup_finished_range_\suffix
+.Lgpr_setup_finished_range_\suffix:
+ REFRESH_MARKING_REGISTER // r8 was used when setting parameters, restore it.
+ .if \is_polymorphic
+ bl art_quick_invoke_polymorphic
+ .elseif \is_custom
+ bl art_quick_invoke_custom
+ .else
+ .if \is_interface
+ // Setup hidden argument. As we don't have access to the interface method,
+ // just pass the method from the IMT. If the method is the conflict trampoline,
+ // this will make the stub go to runtime, otherwise the hidden argument is unused.
+ mov ip, r0
+ .endif
+ ldr lr, [r0, #ART_METHOD_QUICK_CODE_OFFSET_32]
+ blx lr
+ .endif
+ ldrb ip, [rINST]
+ cmp ip, #68 // Test if result type char == 'D'.
+ beq .Lreturn_double_range_\suffix
+ cmp ip, #70
+ bne .Ldone_return_range_\suffix
+.Lreturn_float_range_\suffix:
+ vmov r0, s0
+ b .Ldone_return_range_\suffix
+.Lreturn_double_range_\suffix:
+ vmov r0, r1, d0
+.Ldone_return_range_\suffix:
+ /* resume execution of caller */
+ .if \is_string_init
+ FETCH ip, 2 // arguments
+ GET_VREG r1, ip
+ UPDATE_REGISTERS_FOR_STRING_INIT r1, r0
+ .endif
+
+ .if \is_polymorphic
+ FETCH_ADVANCE_INST 4
+ .else
+ FETCH_ADVANCE_INST 3
+ .endif
+ GET_INST_OPCODE ip
+ GOTO_OPCODE ip
+.endm
+
+// Fetch some information from the thread cache.
+// Uses ip and lr as temporaries.
+.macro FETCH_FROM_THREAD_CACHE dest_reg, slow_path
+ add ip, rSELF, #THREAD_INTERPRETER_CACHE_OFFSET // cache address
+ ubfx lr, rPC, #2, #THREAD_INTERPRETER_CACHE_SIZE_LOG2 // entry index
+ add ip, ip, lr, lsl #3 // entry address within the cache
+ ldr \dest_reg, [ip, #4] // value (offset)
+ ldr ip, [ip] // entry key (pc)
+ cmp ip, rPC
+ bne \slow_path
+.endm
+
+// Helper for static field get.
+.macro OP_SGET load="ldr", wide="0"
+ // Fast-path which gets the field from thread-local cache.
+ FETCH_FROM_THREAD_CACHE r0, 4f
+1:
+ ldr r1, [r0, #ART_FIELD_OFFSET_OFFSET]
+ ldr r0, [r0, #ART_FIELD_DECLARING_CLASS_OFFSET]
+ cmp rMR, #0
+ bne 3f
+2:
+ lsr r2, rINST, #8 // w2 <- A
+ .if \wide
+ add r0, r0, r1
+ ldrd r0, r1, [r0]
+ CLEAR_SHADOW_PAIR r2, ip, lr
+ VREG_INDEX_TO_ADDR r2, r2
+ SET_VREG_WIDE_BY_ADDR r0, r1, r2 // fp[A] <- value
+ .else
+ \load r0, [r0, r1]
+ SET_VREG r0, r2 // fp[A] <- value
+ .endif
+ FETCH_ADVANCE_INST 2
+ GET_INST_OPCODE ip
+ GOTO_OPCODE ip
+3:
+ bl art_quick_read_barrier_mark_reg00
+ b 2b
+4:
+ mov r0, rSELF
+ ldr r1, [sp]
+ mov r2, rPC
+ EXPORT_PC
+ bl nterp_get_static_field
+ tst r0, #1
+ beq 1b
+ CLEAR_STATIC_VOLATILE_MARKER r0
+ ldr r1, [r0, #ART_FIELD_OFFSET_OFFSET]
+ ldr r0, [r0, #ART_FIELD_DECLARING_CLASS_OFFSET]
+ cmp rMR, #0
+ bne 7f
+5:
+ lsr r2, rINST, #8 // w2 <- A
+ .if \wide
+ dmb ish
+ add ip, r0, r1
+6:
+ ldrexd r0, r1, [ip]
+ strexd r3, r0, r1, [ip]
+ cmp r3, #0
+ bne 6b
+ dmb ish
+ CLEAR_SHADOW_PAIR r2, ip, lr
+ VREG_INDEX_TO_ADDR r2, r2
+ SET_VREG_WIDE_BY_ADDR r0, r1, r2 // fp[A] <- value
+ .else
+ dmb ish
+ \load r3, [r0, r1]
+ dmb ish
+ SET_VREG r3, r2 // fp[A] <- value
+ .endif
+ FETCH_ADVANCE_INST 2
+ GET_INST_OPCODE ip
+ GOTO_OPCODE ip
+7:
+ bl art_quick_read_barrier_mark_reg00
+ b 5b
+.endm
+
+// Helper for static field put.
+.macro OP_SPUT store="str", wide="0"
+ // Fast-path which gets the field from thread-local cache.
+ FETCH_FROM_THREAD_CACHE r0, 4f
+1:
+ ldr r1, [r0, #ART_FIELD_OFFSET_OFFSET]
+ ldr r0, [r0, #ART_FIELD_DECLARING_CLASS_OFFSET]
+ cmp rMR, #0
+ bne 3f
+2:
+ lsr r2, rINST, #8 // w2 <- A
+ .if \wide
+ VREG_INDEX_TO_ADDR r2, r2
+ GET_VREG_WIDE_BY_ADDR r2, r3, r2 // fp[A] <- value
+ add r0, r0, r1
+ strd r2, r3, [r0]
+ .else
+ GET_VREG r2, r2 // w2 <- v[A]
+ \store r2, [r0, r1]
+ .endif
+ FETCH_ADVANCE_INST 2
+ GET_INST_OPCODE ip
+ GOTO_OPCODE ip
+3:
+ bl art_quick_read_barrier_mark_reg00
+ b 2b
+4:
+ mov r0, rSELF
+ ldr r1, [sp]
+ mov r2, rPC
+ EXPORT_PC
+ bl nterp_get_static_field
+ tst r0, #1
+ beq 1b
+ CLEAR_STATIC_VOLATILE_MARKER r0
+ ldr r1, [r0, #ART_FIELD_OFFSET_OFFSET]
+ ldr r0, [r0, #ART_FIELD_DECLARING_CLASS_OFFSET]
+ cmp rMR, #0
+ bne 6f
+5:
+ lsr r2, rINST, #8 // r2 <- A
+ .if \wide
+ VREG_INDEX_TO_ADDR r2, r2
+ GET_VREG_WIDE_BY_ADDR r2, r3, r2
+ add ip, r0, r1
+ dmb ish
+7:
+ ldrexd r0, r1, [ip]
+ strexd r0, r2, r3, [ip]
+ cmp r0, #0
+ bne 7b
+ dmb ish
+ .else
+ GET_VREG r2, r2 // r2 <- v[A]
+ dmb ish
+ \store r2, [r0, r1]
+ dmb ish
+ .endif
+ FETCH_ADVANCE_INST 2
+ GET_INST_OPCODE ip
+ GOTO_OPCODE ip
+6:
+ bl art_quick_read_barrier_mark_reg00
+ b 5b
+.endm
+
+
+// Helper for instance field put.
+.macro OP_IPUT store="str", wide="0":
+ // Fast-path which gets the field from thread-local cache.
+ FETCH_FROM_THREAD_CACHE r0, 2f
+1:
+ ubfx r1, rINST, #8, #4 // r1<- A
+ lsr r4, rINST, #12 // r2<- B
+ GET_VREG r4, r4 // vB (object we're operating on)
+ cmp r4, #0
+ beq common_errNullObject
+ .if \wide
+ VREG_INDEX_TO_ADDR r1, r1
+ GET_VREG_WIDE_BY_ADDR r2, r3, r1 // fp[A] <- value
+ add r4, r4, r0
+ strd r2, r3, [r4]
+ .else
+ GET_VREG r1, r1 // r1 <- v[A]
+ \store r1, [r4, r0]
+ .endif
+ FETCH_ADVANCE_INST 2
+ GET_INST_OPCODE ip
+ GOTO_OPCODE ip
+2:
+ mov r0, rSELF
+ ldr r1, [sp]
+ mov r2, rPC
+ EXPORT_PC
+ bl nterp_get_instance_field_offset
+ cmp r0, #0
+ bge 1b
+ CLEAR_INSTANCE_VOLATILE_MARKER r0
+ ubfx r1, rINST, #8, #4 // r1<- A
+ lsr r4, rINST, #12 // r2<- B
+ GET_VREG r4, r4 // vB (object we're operating on)
+ cmp r4, #0
+ beq common_errNullObject
+ .if \wide
+ VREG_INDEX_TO_ADDR r1, r1
+ GET_VREG_WIDE_BY_ADDR r2, r3, r1
+ add ip, r4, r0
+ dmb ish
+3:
+ ldrexd r0, r1, [ip]
+ strexd r0, r2, r3, [ip]
+ cmp r0, #0
+ bne 3b
+ dmb ish
+ .else
+ GET_VREG r1, r1 // r1 <- v[A]
+ dmb ish
+ \store r1, [r4, r0]
+ dmb ish
+ .endif
+ FETCH_ADVANCE_INST 2
+ GET_INST_OPCODE ip
+ GOTO_OPCODE ip
+.endm
+
+// Helper for instance field get.
+.macro OP_IGET load="ldr", wide="0"
+ // Fast-path which gets the field from thread-local cache.
+ FETCH_FROM_THREAD_CACHE r0, 2f
+1:
+ lsr r2, rINST, #12 // w2<- B
+ GET_VREG r3, r2 // w3<- object we're operating on
+ ubfx r2, rINST, #8, #4 // w2<- A
+ cmp r3, #0
+ beq common_errNullObject // object was null
+ .if \wide
+ add r3, r3, r0
+ ldrd r0, r1, [r3]
+ CLEAR_SHADOW_PAIR r2, ip, lr
+ VREG_INDEX_TO_ADDR r2, r2
+ SET_VREG_WIDE_BY_ADDR r0, r1, r2 // fp[A] <- value
+ .else
+ \load r0, [r3, r0]
+ SET_VREG r0, r2 // fp[A] <- value
+ .endif
+ FETCH_ADVANCE_INST 2
+ GET_INST_OPCODE ip
+ GOTO_OPCODE ip
+2:
+ mov r0, rSELF
+ ldr r1, [sp]
+ mov r2, rPC
+ EXPORT_PC
+ bl nterp_get_instance_field_offset
+ cmp r0, #0
+ bge 1b
+ CLEAR_INSTANCE_VOLATILE_MARKER r0
+ lsr r2, rINST, #12 // r2<- B
+ GET_VREG r3, r2 // r3<- object we're operating on
+ ubfx r2, rINST, #8, #4 // r2<- A
+ cmp r3, #0
+ beq common_errNullObject // object was null
+ .if \wide
+ dmb ish
+ add ip, r3, r0
+3:
+ ldrexd r0, r1, [ip]
+ strexd r3, r0, r1, [ip]
+ cmp r3, #0
+ bne 3b
+ dmb ish
+ CLEAR_SHADOW_PAIR r2, ip, lr
+ VREG_INDEX_TO_ADDR r2, r2
+ SET_VREG_WIDE_BY_ADDR r0, r1, r2 // fp[A] <- value
+ dmb ish
+ .else
+ dmb ish
+ \load r0, [r3, r0]
+ dmb ish
+ SET_VREG r0, r2 // fp[A] <- value
+ .endif
+ FETCH_ADVANCE_INST 2
+ GET_INST_OPCODE ip
+ GOTO_OPCODE ip
+.endm
+
+// Puts the next int/long/object parameter passed in physical register
+// in the expected dex register array entry, and in case of object in the
+// expected reference array entry.
+// Uses ip as temporary.
+.macro LOOP_OVER_SHORTY_STORING_GPRS gpr_32, shorty, arg_offset, regs, refs, finished, if_long, is_r3
+1: // LOOP
+ ldrb ip, [\shorty], #1 // Load next character in shorty, and increment.
+ cmp ip, #0
+ beq \finished // if (ip == '\0') goto finished
+ cmp ip, #74 // if (ip == 'J') goto FOUND_LONG
+ beq 2f
+ cmp ip, #70 // if (ip == 'F') goto SKIP_FLOAT
+ beq 3f
+ cmp ip, #68 // if (ip == 'D') goto SKIP_DOUBLE
+ beq 4f
+ str \gpr_32, [\regs, \arg_offset]
+ cmp ip, #76 // if (ip != 'L') goto NOT_REFERENCE
+ bne 6f
+ str \gpr_32, [\refs, \arg_offset]
+6: // NOT_REFERENCE
+ add \arg_offset, \arg_offset, #4
+ b 5f
+2: // FOUND_LONG
+ .if \is_r3
+ // Put back shorty and jump to \if_long
+ sub \shorty, \shorty, #1
+ .else
+ // A long can only be in r2, r3
+ str r2, [\regs, \arg_offset]
+ add \arg_offset, \arg_offset, #4
+ str r3, [\regs, \arg_offset]
+ add \arg_offset, \arg_offset, #4
+ .endif
+ b \if_long
+3: // SKIP_FLOAT
+ add \arg_offset, \arg_offset, #4
+ b 1b
+4: // SKIP_DOUBLE
+ add \arg_offset, \arg_offset, #8
+ b 1b
+5:
+.endm
+
+// Puts the next int/long/object parameter passed in stack
+// in the expected dex register array entry, and in case of object in the
+// expected reference array entry.
+.macro LOOP_OVER_INTs shorty, arg_offset, regs, refs, stack_ptr, tmp1, tmp2, finished
+1: // LOOP
+ ldrb \tmp1, [\shorty], #1 // Load next character in shorty, and increment.
+ cmp \tmp1, #0
+ beq \finished // if (\tmp1 == '\0') goto finished
+ cmp \tmp1, #74 // if (\tmp1 == 'J') goto FOUND_LONG
+ beq 2f
+ cmp \tmp1, #70 // if (\tmp1 == 'F') goto SKIP_FLOAT
+ beq 3f
+ cmp \tmp1, #68 // if (\tmp1 == 'D') goto SKIP_DOUBLE
+ beq 4f
+ add \tmp2, \stack_ptr, \arg_offset
+ ldr \tmp2, [\tmp2, #OFFSET_TO_FIRST_ARGUMENT_IN_STACK]
+ str \tmp2, [\regs, \arg_offset]
+ cmp \tmp1, #76 // if (\tmp1 != 'L') goto loop
+ bne 3f
+ str \tmp2, [\refs, \arg_offset]
+ add \arg_offset, \arg_offset, #4
+ b 1b
+2: // FOUND_LONG
+ add \tmp1, \stack_ptr, \arg_offset
+ ldr \tmp1, [\tmp1, #OFFSET_TO_FIRST_ARGUMENT_IN_STACK]
+ str \tmp1, [\regs, \arg_offset]
+ add \arg_offset, \arg_offset, #4
+ add \tmp1, \stack_ptr, \arg_offset
+ ldr \tmp1, [\tmp1, #OFFSET_TO_FIRST_ARGUMENT_IN_STACK]
+ str \tmp1, [\regs, \arg_offset]
+ add \arg_offset, \arg_offset, #4
+ b 1b
+3: // SKIP_FLOAT
+ add \arg_offset, \arg_offset, #4
+ b 1b
+4: // SKIP_DOUBLE
+ add \arg_offset, \arg_offset, #8
+ b 1b
+.endm
+
+%def entry():
+/*
+ * ArtMethod entry point.
+ *
+ * On entry:
+ * r0 ArtMethod* callee
+ * rest method parameters
+ */
+
+OAT_ENTRY ExecuteNterpImpl, EndExecuteNterpImpl
+ .cfi_startproc
+ sub ip, sp, #STACK_OVERFLOW_RESERVED_BYTES
+ ldr ip, [ip]
+ /* Spill callee save regs */
+ SPILL_ALL_CALLEE_SAVES
+
+ // TODO: Get shorty in a better way and remove below
+ SPILL_ALL_ARGUMENTS
+
+ bl NterpGetShorty
+ // Save shorty in callee-save rIBASE.
+ mov rIBASE, r0
+
+ RESTORE_ALL_ARGUMENTS
+
+ ldr rPC, [r0, #ART_METHOD_DATA_OFFSET_32]
+
+ // Setup the stack for executing the method.
+ SETUP_STACK_FRAME rPC, rREFS, rFP, CFI_REFS, load_ins=1
+
+ // Setup the parameters
+ cmp r4, #0
+ beq .Lxmm_setup_finished
+
+ sub r4, rINST, r4
+ lsl r4, r4, #2 // r4 is now the offset for inputs into the registers array.
+
+ mov lr, ip // lr contains the old stack pointer
+
+ ldr ip, [r0, #ART_METHOD_ACCESS_FLAGS_OFFSET]
+ // r0 is now available.
+ // Setup shorty, pointer to inputs in FP and pointer to inputs in REFS
+ add r0, rIBASE, #1 // shorty + 1 ; ie skip return arg character
+ add r7, rFP, r4
+ add r8, rREFS, r4
+ tst ip, #ART_METHOD_IS_STATIC_FLAG
+ bne .Lhandle_static_method
+ str r1, [r7], #4
+ str r1, [r8], #4
+ add lr, lr, #4
+ mov r4, #0
+ b .Lcontinue_setup_gprs
+.Lhandle_static_method:
+ mov r4, #0
+ LOOP_OVER_SHORTY_STORING_GPRS r1, r0, r4, r7, r8, .Lgpr_setup_finished, .Lif_long, is_r3=0
+.Lcontinue_setup_gprs:
+ LOOP_OVER_SHORTY_STORING_GPRS r2, r0, r4, r7, r8, .Lgpr_setup_finished, .Lif_long, is_r3=0
+ LOOP_OVER_SHORTY_STORING_GPRS r3, r0, r4, r7, r8, .Lgpr_setup_finished, .Lif_long, is_r3=1
+.Lif_long:
+ LOOP_OVER_INTs r0, r4, r7, r8, lr, ip, r1, .Lgpr_setup_finished
+.Lgpr_setup_finished:
+ add r0, rIBASE, #1 // shorty + 1 ; ie skip return arg character
+ mov r1, r7
+ add r2, lr, #OFFSET_TO_FIRST_ARGUMENT_IN_STACK
+ vpush {s0-s15}
+ mov r3, sp
+ bl NterpStoreArm32Fprs
+ add sp, sp, #(16 * 4)
+.Lxmm_setup_finished:
+ CFI_DEFINE_DEX_PC_WITH_OFFSET(CFI_TMP, CFI_DEX, 0)
+ // r8 was used for setting up the frame, restore it now.
+ REFRESH_MARKING_REGISTER
+.Lexecute_instructions:
+ // Set rIBASE
+ adr rIBASE, artNterpAsmInstructionStart
+ /* start executing the instruction at rPC */
+ START_EXECUTING_INSTRUCTIONS
+ /* NOTE: no fallthrough */
+ // cfi info continues, and covers the whole nterp implementation.
+ SIZE ExecuteNterpImpl
+
+%def opcode_pre():
+
+%def helpers():
+
+%def footer():
+/*
+ * ===========================================================================
+ * Common subroutines and data
+ * ===========================================================================
+ */
+
+ .text
+ .align 2
+
+// Note: mterp also uses the common_* names below for helpers, but that's OK
+// as the assembler compiled each interpreter separately.
+common_errDivideByZero:
+ EXPORT_PC
+ bl art_quick_throw_div_zero
+
+// Expect index in r1, length in r3
+common_errArrayIndex:
+ EXPORT_PC
+ mov r0, r1
+ mov r1, r3
+ bl art_quick_throw_array_bounds
+
+common_errNullObject:
+ EXPORT_PC
+ bl art_quick_throw_null_pointer_exception
+
+NterpCommonInvokeStatic:
+ COMMON_INVOKE_NON_RANGE is_static=1, suffix="invokeStatic"
+
+NterpCommonInvokeStaticRange:
+ COMMON_INVOKE_RANGE is_static=1, suffix="invokeStatic"
+
+NterpCommonInvokeInstance:
+ COMMON_INVOKE_NON_RANGE suffix="invokeInstance"
+
+NterpCommonInvokeInstanceRange:
+ COMMON_INVOKE_RANGE suffix="invokeInstance"
+
+NterpCommonInvokeInterface:
+ COMMON_INVOKE_NON_RANGE is_interface=1, suffix="invokeInterface"
+
+NterpCommonInvokeInterfaceRange:
+ COMMON_INVOKE_RANGE is_interface=1, suffix="invokeInterface"
+
+NterpCommonInvokePolymorphic:
+ COMMON_INVOKE_NON_RANGE is_polymorphic=1, suffix="invokePolymorphic"
+
+NterpCommonInvokePolymorphicRange:
+ COMMON_INVOKE_RANGE is_polymorphic=1, suffix="invokePolymorphic"
+
+NterpCommonInvokeCustom:
+ COMMON_INVOKE_NON_RANGE is_static=1, is_custom=1, suffix="invokeCustom"
+
+NterpCommonInvokeCustomRange:
+ COMMON_INVOKE_RANGE is_static=1, is_custom=1, suffix="invokeCustom"
+
+NterpHandleStringInit:
+ COMMON_INVOKE_NON_RANGE is_string_init=1, suffix="stringInit"
+
+NterpHandleStringInitRange:
+ COMMON_INVOKE_RANGE is_string_init=1, suffix="stringInit"
+
+NterpNewInstance:
+ EXPORT_PC
+ // Fast-path which gets the class from thread-local cache.
+ FETCH_FROM_THREAD_CACHE r0, 2f
+ cmp rMR, #0
+ bne 3f
+4:
+ ldr lr, [rSELF, #THREAD_ALLOC_OBJECT_ENTRYPOINT_OFFSET]
+ blx lr
+1:
+ lsr r1, rINST, #8 // r1 <- A
+ SET_VREG_OBJECT r0, r1 // fp[A] <- value
+ FETCH_ADVANCE_INST 2
+ GET_INST_OPCODE ip
+ GOTO_OPCODE ip
+2:
+ mov r0, rSELF
+ ldr r1, [sp]
+ mov r2, rPC
+ bl nterp_get_class_or_allocate_object
+ b 1b
+3:
+ bl art_quick_read_barrier_mark_reg00
+ b 4b
+
+NterpNewArray:
+ /* new-array vA, vB, class@CCCC */
+ EXPORT_PC
+ // Fast-path which gets the class from thread-local cache.
+ FETCH_FROM_THREAD_CACHE r0, 2f
+ cmp rMR, #0
+ bne 3f
+1:
+ lsr r1, rINST, #12 // r1<- B
+ GET_VREG r1, r1 // r1<- vB (array length)
+ ldr lr, [rSELF, #THREAD_ALLOC_ARRAY_ENTRYPOINT_OFFSET]
+ blx lr
+ ubfx r1, rINST, #8, #4 // r1<- A
+ SET_VREG_OBJECT r0, r1
+ FETCH_ADVANCE_INST 2
+ GET_INST_OPCODE ip
+ GOTO_OPCODE ip
+2:
+ mov r0, rSELF
+ ldr r1, [sp]
+ mov r2, rPC
+ bl nterp_get_class_or_allocate_object
+ b 1b
+3:
+ bl art_quick_read_barrier_mark_reg00
+ b 1b
+
+NterpPutObjectInstanceField:
+ // Fast-path which gets the field from thread-local cache.
+ FETCH_FROM_THREAD_CACHE r0, 3f
+1:
+ ubfx r1, rINST, #8, #4 // r1<- A
+ lsr r2, rINST, #12 // r2<- B
+ GET_VREG r2, r2 // vB (object we're operating on)
+ cmp r2, #0
+ beq common_errNullObject // is object null?
+ GET_VREG r1, r1 // r1 <- v[A]
+ str r1, [r2, r0]
+4:
+ cmp r1, #0
+ beq 2f
+ ldr r1, [rSELF, #THREAD_CARD_TABLE_OFFSET]
+ lsr r3, r2, #CARD_TABLE_CARD_SHIFT
+ strb r1, [r1, r3]
+2:
+ FETCH_ADVANCE_INST 2
+ GET_INST_OPCODE ip
+ GOTO_OPCODE ip
+3:
+ mov r0, rSELF
+ ldr r1, [sp]
+ mov r2, rPC
+ EXPORT_PC
+ bl nterp_get_instance_field_offset
+ cmp r0, #0
+ bge 1b
+ CLEAR_INSTANCE_VOLATILE_MARKER r0
+ ubfx r1, rINST, #8, #4 // r1<- A
+ lsr r2, rINST, #12 // r2<- B
+ GET_VREG r2, r2 // vB (object we're operating on)
+ cmp r2, #0
+ beq common_errNullObject // is object null?
+ GET_VREG r1, r1 // r1 <- v[A]
+ dmb ish
+ str r1, [r2, r0]
+ dmb ish
+ b 4b
+
+NterpGetObjectInstanceField:
+ // Fast-path which gets the field from thread-local cache.
+ FETCH_FROM_THREAD_CACHE r0, 4f
+1:
+ ubfx r1, rINST, #8, #4 // r1<- A
+ lsr r2, rINST, #12 // r2<- B
+ GET_VREG r2, r2 // vB (object we're operating on)
+ cmp r2, #0
+ beq common_errNullObject
+ ldr r0, [r2, r0]
+7:
+ cmp rMR, #0
+ bne 3f
+2:
+ SET_VREG_OBJECT r0, r1 // fp[A] <- value
+ FETCH_ADVANCE_INST 2
+ GET_INST_OPCODE ip
+ GOTO_OPCODE ip
+3:
+ bl art_quick_read_barrier_mark_reg00
+ b 2b
+4:
+ mov r0, rSELF
+ ldr r1, [sp]
+ mov r2, rPC
+ EXPORT_PC
+ bl nterp_get_instance_field_offset
+ cmp r0, #0
+ bge 1b
+ CLEAR_INSTANCE_VOLATILE_MARKER r0
+ ubfx r1, rINST, #8, #4 // r1<- A
+ lsr r2, rINST, #12 // r2<- B
+ GET_VREG r2, r2 // vB (object we're operating on)
+ cmp r2, #0
+ beq common_errNullObject
+ dmb ish
+ ldr r0, [r2, r0]
+ dmb ish
+ b 7b
+
+NterpPutObjectStaticField:
+ // Fast-path which gets the field from thread-local cache.
+ FETCH_FROM_THREAD_CACHE r0, 5f
+1:
+ ldr r1, [r0, #ART_FIELD_OFFSET_OFFSET]
+ ldr r0, [r0, #ART_FIELD_DECLARING_CLASS_OFFSET]
+ cmp rMR, #0
+ bne 4f
+2:
+ lsr r2, rINST, #8 // w2 <- A
+ GET_VREG r2, r2
+ str r2, [r0, r1]
+8:
+ cmp r2, #0
+ beq 3f
+ ldr r1, [rSELF, #THREAD_CARD_TABLE_OFFSET]
+ lsr r3, r0, #CARD_TABLE_CARD_SHIFT
+ strb r1, [r1, r3]
+3:
+ FETCH_ADVANCE_INST 2
+ GET_INST_OPCODE ip
+ GOTO_OPCODE ip
+4:
+ bl art_quick_read_barrier_mark_reg00
+ b 2b
+5:
+ mov r0, rSELF
+ ldr r1, [sp]
+ mov r2, rPC
+ EXPORT_PC
+ bl nterp_get_static_field
+ tst r0, #1
+ beq 1b
+ CLEAR_STATIC_VOLATILE_MARKER r0
+ ldr r1, [r0, #ART_FIELD_OFFSET_OFFSET]
+ ldr r0, [r0, #ART_FIELD_DECLARING_CLASS_OFFSET]
+ cmp rMR, #0
+ bne 7f
+6:
+ lsr r2, rINST, #8 // 21 <- A
+ GET_VREG r2, r2
+ dmb ish
+ str r2, [r0, r1]
+ dmb ish
+ b 8b
+7:
+ bl art_quick_read_barrier_mark_reg00
+ b 6b
+
+NterpGetObjectStaticField:
+ // Fast-path which gets the field from thread-local cache.
+ FETCH_FROM_THREAD_CACHE r0, 4f
+1:
+ ldr r1, [r0, #ART_FIELD_OFFSET_OFFSET]
+ ldr r0, [r0, #ART_FIELD_DECLARING_CLASS_OFFSET]
+ cmp rMR, #0
+ bne 3f
+ ldr r0, [r0, r1]
+ // No need to check the marking register, we know it's not set here.
+2:
+ lsr r1, rINST, #8 // r1 <- A
+ SET_VREG_OBJECT r0, r1 // fp[A] <- value
+ FETCH_ADVANCE_INST 2
+ GET_INST_OPCODE ip
+ GOTO_OPCODE ip
+3:
+ bl art_quick_read_barrier_mark_reg00
+ ldr r0, [r0, r1]
+ // Here, we know the marking register is set.
+ bl art_quick_read_barrier_mark_reg00
+ b 2b
+4:
+ mov r0, rSELF
+ ldr r1, [sp]
+ mov r2, rPC
+ EXPORT_PC
+ bl nterp_get_static_field
+ tst r0, #1
+ beq 1b
+ CLEAR_STATIC_VOLATILE_MARKER r0
+ ldr r1, [r0, #ART_FIELD_OFFSET_OFFSET]
+ ldr r0, [r0, #ART_FIELD_DECLARING_CLASS_OFFSET]
+ cmp rMR, #0
+ bne 7f
+5:
+ dmb ish
+ ldr r0, [r0, r1]
+ dmb ish
+ cmp rMR, #0
+ bne 8f
+ b 2b
+7:
+ bl art_quick_read_barrier_mark_reg00
+ b 5b
+8:
+ bl art_quick_read_barrier_mark_reg00
+ b 2b
+
+NterpGetBooleanStaticField:
+ OP_SGET load="ldrb", wide=0
+
+NterpGetByteStaticField:
+ OP_SGET load="ldrsb", wide=0
+
+NterpGetCharStaticField:
+ OP_SGET load="ldrh", wide=0
+
+NterpGetShortStaticField:
+ OP_SGET load="ldrsh", wide=0
+
+NterpGetWideStaticField:
+ OP_SGET load="ldr", wide=1
+
+NterpGetIntStaticField:
+ OP_SGET load="ldr", wide=0
+
+NterpPutStaticField:
+ OP_SPUT store="str", wide=0
+
+NterpPutBooleanStaticField:
+NterpPutByteStaticField:
+ OP_SPUT store="strb", wide=0
+
+NterpPutCharStaticField:
+NterpPutShortStaticField:
+ OP_SPUT store="strh", wide=0
+
+NterpPutWideStaticField:
+ OP_SPUT store="str", wide=1
+
+NterpPutInstanceField:
+ OP_IPUT store="str", wide=0
+
+NterpPutBooleanInstanceField:
+NterpPutByteInstanceField:
+ OP_IPUT store="strb", wide=0
+
+NterpPutCharInstanceField:
+NterpPutShortInstanceField:
+ OP_IPUT store="strh", wide=0
+
+NterpPutWideInstanceField:
+ OP_IPUT store="str", wide=1
+
+NterpGetBooleanInstanceField:
+ OP_IGET load="ldrb", wide=0
+
+NterpGetByteInstanceField:
+ OP_IGET load="ldrsb", wide=0
+
+NterpGetCharInstanceField:
+ OP_IGET load="ldrh", wide=0
+
+NterpGetShortInstanceField:
+ OP_IGET load="ldrsh", wide=0
+
+NterpGetWideInstanceField:
+ OP_IGET load="ldr", wide=1
+
+NterpGetInstanceField:
+ OP_IGET load="ldr", wide=0
+
+NterpInstanceOf:
+ /* instance-of vA, vB, class@CCCC */
+ // Fast-path which gets the class from thread-local cache.
+ EXPORT_PC
+ FETCH_FROM_THREAD_CACHE r1, 3f
+ cmp rMR, #0
+ bne 4f
+1:
+ lsr r2, rINST, #12 // r2<- B
+ GET_VREG r0, r2 // r0<- vB (object)
+ cmp r0, #0
+ beq 2f
+ bl artInstanceOfFromCode
+2:
+ ubfx r1, rINST, #8, #4 // r1<- A
+ SET_VREG r0, r1
+ FETCH_ADVANCE_INST 2
+ GET_INST_OPCODE ip
+ GOTO_OPCODE ip
+3:
+ mov r0, rSELF
+ ldr r1, [sp]
+ mov r2, rPC
+ bl nterp_get_class_or_allocate_object
+ mov r1, r0
+ b 1b
+4:
+ bl art_quick_read_barrier_mark_reg01
+ b 1b
+
+NterpCheckCast:
+ // Fast-path which gets the class from thread-local cache.
+ EXPORT_PC
+ FETCH_FROM_THREAD_CACHE r1, 3f
+ cmp rMR, #0
+ bne 4f
+1:
+ lsr r2, rINST, #8 // r2<- A
+ GET_VREG r0, r2 // r0<- vA (object)
+ cmp r0, #0
+ beq 2f
+ bl art_quick_check_instance_of
+2:
+ FETCH_ADVANCE_INST 2
+ GET_INST_OPCODE ip
+ GOTO_OPCODE ip
+3:
+ mov r0, rSELF
+ ldr r1, [sp]
+ mov r2, rPC
+ bl nterp_get_class_or_allocate_object
+ mov r1, r0
+ b 1b
+4:
+ bl art_quick_read_barrier_mark_reg01
+ b 1b
+
+NterpHandleInvokeInterfaceOnObjectMethodRange:
+ // First argument is the 'this' pointer.
+ FETCH r1, 2
+ GET_VREG r1, r1
+ // Note: if r1 is null, this will be handled by our SIGSEGV handler.
+ ldr r2, [r1, #MIRROR_OBJECT_CLASS_OFFSET]
+ add r2, r2, #MIRROR_CLASS_VTABLE_OFFSET_32
+ ldr r0, [r2, r0, lsl #2]
+ b NterpCommonInvokeInstanceRange
+
+NterpHandleInvokeInterfaceOnObjectMethod:
+ // First argument is the 'this' pointer.
+ FETCH r1, 2
+ and r1, r1, #0xf
+ GET_VREG r1, r1
+ // Note: if r1 is null, this will be handled by our SIGSEGV handler.
+ ldr r2, [r1, #MIRROR_OBJECT_CLASS_OFFSET]
+ add r2, r2, #MIRROR_CLASS_VTABLE_OFFSET_32
+ ldr r0, [r2, r0, lsl #2]
+ b NterpCommonInvokeInstance
+
+NterpHandleHotnessOverflow:
+ add r1, rPC, rINST, lsl #1
+ mov r2, rFP
+ bl nterp_hot_method
+ cmp r0, #0
+ bne 1f
+ add r2, rINST, rINST // w2<- byte offset
+ FETCH_ADVANCE_INST_RB r2 // update rPC, load rINST
+ GET_INST_OPCODE ip // extract opcode from rINST
+ GOTO_OPCODE ip // jump to next instruction
+1:
+ // Drop the current frame.
+ ldr ip, [rREFS, #-4]
+ mov sp, ip
+ .cfi_def_cfa sp, CALLEE_SAVES_SIZE
+
+ // The transition frame of type SaveAllCalleeSaves saves r4, r8, and r9,
+ // but not managed ABI. So we need to restore callee-saves of the nterp frame,
+ // and save managed ABI callee saves, which will be restored by the callee upon
+ // return.
+
+ RESTORE_ALL_CALLEE_SAVES
+ push {r5-r7, r10-r11, lr}
+ .cfi_adjust_cfa_offset 24
+ .cfi_rel_offset r5, 0
+ .cfi_rel_offset r6, 4
+ .cfi_rel_offset r7, 8
+ .cfi_rel_offset r10, 12
+ .cfi_rel_offset r11, 16
+ .cfi_rel_offset lr, 20
+ vpush {s16-s31}
+ .cfi_adjust_cfa_offset 64
+
+ // Setup the new frame
+ ldr r1, [r0, #OSR_DATA_FRAME_SIZE]
+ // Given stack size contains all callee saved registers, remove them.
+ sub r1, r1, #(CALLEE_SAVES_SIZE - 12)
+
+ // We know r1 cannot be 0, as it at least contains the ArtMethod.
+
+ // Remember CFA in a callee-save register.
+ mov rINST, sp
+ .cfi_def_cfa_register rINST
+
+ sub sp, sp, r1
+
+ add r2, r0, #OSR_DATA_MEMORY
+2:
+ sub r1, r1, #4
+ ldr ip, [r2, r1]
+ str ip, [sp, r1]
+ cmp r1, #0
+ bne 2b
+
+ // Fetch the native PC to jump to and save it in a callee-save register.
+ ldr rFP, [r0, #OSR_DATA_NATIVE_PC]
+
+ // Free the memory holding OSR Data.
+ bl free
+
+ // Jump to the compiled code.
+ bx rFP
+// This is the logical end of ExecuteNterpImpl, where the frame info applies.
+// EndExecuteNterpImpl includes the methods below as we want the runtime to
+// see them as part of the Nterp PCs.
+.cfi_endproc
+
+nterp_to_nterp_static_non_range:
+ .cfi_startproc
+ SETUP_STACK_FOR_INVOKE
+ SETUP_NON_RANGE_ARGUMENTS_AND_EXECUTE is_static=1, is_string_init=0
+ .cfi_endproc
+
+nterp_to_nterp_string_init_non_range:
+ .cfi_startproc
+ SETUP_STACK_FOR_INVOKE
+ SETUP_NON_RANGE_ARGUMENTS_AND_EXECUTE is_static=0, is_string_init=1
+ .cfi_endproc
+
+nterp_to_nterp_instance_non_range:
+ .cfi_startproc
+ SETUP_STACK_FOR_INVOKE
+ SETUP_NON_RANGE_ARGUMENTS_AND_EXECUTE is_static=0, is_string_init=0
+ .cfi_endproc
+
+nterp_to_nterp_static_range:
+ .cfi_startproc
+ SETUP_STACK_FOR_INVOKE
+ SETUP_RANGE_ARGUMENTS_AND_EXECUTE is_static=1, is_string_init=0
+ .cfi_endproc
+
+nterp_to_nterp_string_init_range:
+ .cfi_startproc
+ SETUP_STACK_FOR_INVOKE
+ SETUP_RANGE_ARGUMENTS_AND_EXECUTE is_static=0, is_string_init=1
+ .cfi_endproc
+
+nterp_to_nterp_instance_range:
+ .cfi_startproc
+ SETUP_STACK_FOR_INVOKE
+ SETUP_RANGE_ARGUMENTS_AND_EXECUTE is_static=0, is_string_init=0
+ .cfi_endproc
+
+// This is the end of PCs contained by the OatQuickMethodHeader created for the interpreter
+// entry point.
+ .type EndExecuteNterpImpl, #function
+ .hidden EndExecuteNterpImpl
+ .global EndExecuteNterpImpl
+EndExecuteNterpImpl:
+
+/*
+ * Convert the double in r0/r1 to a long in r0/r1.
+ *
+ * We have to clip values to long min/max per the specification. The
+ * expected common case is a "reasonable" value that converts directly
+ * to modest integer. The EABI convert function isn't doing this for us.
+ */
+nterp_d2l_doconv:
+ ubfx r2, r1, #20, #11 @ grab the exponent
+ movw r3, #0x43e
+ cmp r2, r3 @ MINLONG < x > MAXLONG?
+ bhs d2l_special_cases
+ b __aeabi_d2lz @ tail call to convert double to long
+d2l_special_cases:
+ movw r3, #0x7ff
+ cmp r2, r3
+ beq d2l_maybeNaN @ NaN?
+d2l_notNaN:
+ adds r1, r1, r1 @ sign bit to carry
+ mov r0, #0xffffffff @ assume maxlong for lsw
+ mov r1, #0x7fffffff @ assume maxlong for msw
+ adc r0, r0, #0
+ adc r1, r1, #0 @ convert maxlong to minlong if exp negative
+ bx lr @ return
+d2l_maybeNaN:
+ orrs r3, r0, r1, lsl #12
+ beq d2l_notNaN @ if fraction is non-zero, it's a NaN
+ mov r0, #0
+ mov r1, #0
+ bx lr @ return 0 for NaN
+
+/*
+ * Convert the float in r0 to a long in r0/r1.
+ *
+ * We have to clip values to long min/max per the specification. The
+ * expected common case is a "reasonable" value that converts directly
+ * to modest integer. The EABI convert function isn't doing this for us.
+ */
+nterp_f2l_doconv:
+ ubfx r2, r0, #23, #8 @ grab the exponent
+ cmp r2, #0xbe @ MININT < x > MAXINT?
+ bhs f2l_special_cases
+ b __aeabi_f2lz @ tail call to convert float to long
+f2l_special_cases:
+ cmp r2, #0xff @ NaN or infinity?
+ beq f2l_maybeNaN
+f2l_notNaN:
+ adds r0, r0, r0 @ sign bit to carry
+ mov r0, #0xffffffff @ assume maxlong for lsw
+ mov r1, #0x7fffffff @ assume maxlong for msw
+ adc r0, r0, #0
+ adc r1, r1, #0 @ convert maxlong to minlong if exp negative
+ bx lr @ return
+f2l_maybeNaN:
+ lsls r3, r0, #9
+ beq f2l_notNaN @ if fraction is non-zero, it's a NaN
+ mov r0, #0
+ mov r1, #0
+ bx lr @ return 0 for NaN
+
+// Entrypoints into runtime.
+NTERP_TRAMPOLINE nterp_get_static_field, NterpGetStaticField
+NTERP_TRAMPOLINE nterp_get_instance_field_offset, NterpGetInstanceFieldOffset
+NTERP_TRAMPOLINE nterp_filled_new_array, NterpFilledNewArray
+NTERP_TRAMPOLINE nterp_filled_new_array_range, NterpFilledNewArrayRange
+NTERP_TRAMPOLINE nterp_get_class_or_allocate_object, NterpGetClassOrAllocateObject
+NTERP_TRAMPOLINE nterp_get_method, NterpGetMethod
+NTERP_TRAMPOLINE nterp_hot_method, NterpHotMethod
+NTERP_TRAMPOLINE nterp_load_object, NterpLoadObject
+
+// gen_mterp.py will inline the following definitions
+// within [ExecuteNterpImpl, EndExecuteNterpImpl).
+%def instruction_end():
+
+ .type artNterpAsmInstructionEnd, #object
+ .hidden artNterpAsmInstructionEnd
+ .global artNterpAsmInstructionEnd
+artNterpAsmInstructionEnd:
+ // artNterpAsmInstructionEnd is used as landing pad for exception handling.
+ FETCH_INST
+ GET_INST_OPCODE ip
+ GOTO_OPCODE ip
+
+%def instruction_start():
+
+ .type artNterpAsmInstructionStart, #object
+ .hidden artNterpAsmInstructionStart
+ .global artNterpAsmInstructionStart
+artNterpAsmInstructionStart = .L_op_nop
+ .text
+
+%def opcode_start():
+ NAME_START nterp_${opcode}
+%def opcode_end():
+ NAME_END nterp_${opcode}
+%def helper_start(name):
+ NAME_START ${name}
+%def helper_end(name):
+ NAME_END ${name}
diff --git a/runtime/interpreter/mterp/armng/object.S b/runtime/interpreter/mterp/armng/object.S
new file mode 100644
index 0000000..0b1589f
--- /dev/null
+++ b/runtime/interpreter/mterp/armng/object.S
@@ -0,0 +1,201 @@
+%def op_check_cast():
+ b NterpCheckCast
+
+%def op_instance_of():
+ b NterpInstanceOf
+
+%def op_iget_boolean():
+ b NterpGetBooleanInstanceField
+
+%def op_iget_boolean_quick():
+% op_iget_quick(load="ldrb")
+
+%def op_iget_byte():
+ b NterpGetByteInstanceField
+
+%def op_iget_byte_quick():
+% op_iget_quick(load="ldrsb")
+
+%def op_iget_char():
+ b NterpGetCharInstanceField
+
+%def op_iget_char_quick():
+% op_iget_quick(load="ldrh")
+
+%def op_iget_object():
+ b NterpGetObjectInstanceField
+
+%def op_iget_object_quick():
+ /* For: iget-object-quick */
+ /* op vA, vB, offset@CCCC */
+ mov r2, rINST, lsr #12 @ r2<- B
+ FETCH r1, 1 @ r1<- field byte offset
+ EXPORT_PC
+ GET_VREG r0, r2 @ r0<- object we're operating on
+ cmp r0, #0
+ beq common_errNullObject
+ ldr r0, [r0, r1]
+ cmp rMR, #0
+ bne 2f
+1:
+ ubfx r2, rINST, #8, #4 @ r2<- A
+ FETCH_ADVANCE_INST 2
+ SET_VREG_OBJECT r0, r2 @ fp[A]<- r0
+ GET_INST_OPCODE ip @ extract opcode from rINST
+ GOTO_OPCODE ip @ jump to next instruction
+2:
+ bl art_quick_read_barrier_mark_reg00
+ b 1b
+
+%def op_iget():
+ b NterpGetInstanceField
+
+%def op_iget_quick(load="ldr", wide="0"):
+ /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick, iget-wide-quick*/
+ /* op vA, vB, offset@CCCC */
+ mov r2, rINST, lsr #12 @ r2<- B
+ FETCH r1, 1 @ r1<- field byte offset
+ GET_VREG r3, r2 @ r3<- object we're operating on
+ ubfx r2, rINST, #8, #4 @ r2<- A
+ cmp r3, #0 @ check object for null
+ beq common_errNullObject @ object was null
+ FETCH_ADVANCE_INST 2 @ advance rPC, load rINST
+ .if $wide
+ ldrd r0, [r3, r1] @ r0<- obj.field (64 bits, aligned)
+ VREG_INDEX_TO_ADDR r3, r2 @ r3<- &fp[A]
+ CLEAR_SHADOW_PAIR r2, ip, lr @ Zero out the shadow regs
+ GET_INST_OPCODE ip @ extract opcode from rINST
+ SET_VREG_WIDE_BY_ADDR r0, r1, r3 @ fp[A]<- r0/r1
+ .else
+ $load r0, [r3, r1] @ r0<- obj.field
+ SET_VREG r0, r2 @ fp[A]<- r0
+ GET_INST_OPCODE ip @ extract opcode from rINST
+ .endif
+ GOTO_OPCODE ip @ jump to next instruction
+
+%def op_iget_short():
+ b NterpGetShortInstanceField
+
+%def op_iget_short_quick():
+% op_iget_quick(load="ldrsh")
+
+%def op_iget_wide():
+ b NterpGetWideInstanceField
+
+%def op_iget_wide_quick():
+% op_iget_quick(load="ldr", wide="1")
+
+%def op_iput_boolean():
+ b NterpPutBooleanInstanceField
+
+%def op_iput_boolean_quick():
+% op_iput_quick(store="strb")
+
+%def op_iput_byte():
+ b NterpPutByteInstanceField
+
+%def op_iput_byte_quick():
+% op_iput_quick(store="strb")
+
+%def op_iput_char():
+ b NterpPutCharInstanceField
+
+%def op_iput_char_quick():
+% op_iput_quick(store="strh")
+
+%def op_iput_object():
+ b NterpPutObjectInstanceField
+
+%def op_iput_object_quick():
+% op_iput_quick(store="str", wide="0", is_object="1")
+
+%def op_iput():
+ b NterpPutInstanceField
+
+%def op_iput_quick(store="str", wide="0", is_object="0"):
+ /* For: iput-quick, iput-object-quick */
+ /* op vA, vB, offset@CCCC */
+ mov r2, rINST, lsr #12 @ r2<- B
+ FETCH ip, 1 @ r1<- field byte offset
+ GET_VREG r3, r2 @ r3<- fp[B], the object pointer
+ ubfx r2, rINST, #8, #4 @ r2<- A
+ cmp r3, #0 @ check object for null
+ beq common_errNullObject @ object was null
+ .if $wide
+ VREG_INDEX_TO_ADDR r0, r2 @ r0<- &fp[A]
+ GET_VREG_WIDE_BY_ADDR r0, r1, r0 @ r0/r1<- fp[A]/fp[A+1]
+ FETCH_ADVANCE_INST 2 @ advance rPC, load rINST
+ strd r0, [r3, ip] @ obj.field<- r0/r1
+ .else
+ GET_VREG r0, r2 @ r0<- fp[A]
+ FETCH_ADVANCE_INST 2 @ advance rPC, load rINST
+ $store r0, [r3, ip] @ obj.field<- r0
+ .endif
+ .if $is_object
+ cmp r0, #0
+ beq 1f
+ ldr r1, [rSELF, #THREAD_CARD_TABLE_OFFSET]
+ lsr r0, r3, #CARD_TABLE_CARD_SHIFT
+ strb r1, [r1, r0]
+1:
+ .endif
+ GET_INST_OPCODE ip @ extract opcode from rINST
+ GOTO_OPCODE ip @ jump to next instruction
+
+%def op_iput_short():
+ b NterpPutShortInstanceField
+
+%def op_iput_short_quick():
+% op_iput_quick(store="strh")
+
+%def op_iput_wide():
+ b NterpPutWideInstanceField
+
+%def op_iput_wide_quick():
+% op_iput_quick(store="str", wide="1", is_object="0")
+
+%def op_sget_boolean():
+ b NterpGetBooleanStaticField
+
+%def op_sget_byte():
+ b NterpGetByteStaticField
+
+%def op_sget_char():
+ b NterpGetCharStaticField
+
+%def op_sget_object():
+ b NterpGetObjectStaticField
+
+%def op_sget():
+ b NterpGetIntStaticField
+
+%def op_sget_short():
+ b NterpGetShortStaticField
+
+%def op_sget_wide():
+ b NterpGetWideStaticField
+
+%def op_sput_boolean():
+ b NterpPutBooleanStaticField
+
+%def op_sput_byte():
+ b NterpPutByteStaticField
+
+%def op_sput_char():
+ b NterpPutCharStaticField
+
+%def op_sput_object():
+ b NterpPutObjectStaticField
+
+%def op_sput():
+ b NterpPutStaticField
+
+%def op_sput_short():
+ b NterpPutShortStaticField
+
+%def op_sput_wide():
+ b NterpPutWideStaticField
+
+%def op_new_instance():
+ // The routine is too big to fit in a handler, so jump to it.
+ b NterpNewInstance
diff --git a/runtime/interpreter/mterp/armng/other.S b/runtime/interpreter/mterp/armng/other.S
new file mode 100644
index 0000000..9100ed7
--- /dev/null
+++ b/runtime/interpreter/mterp/armng/other.S
@@ -0,0 +1,361 @@
+%def unused():
+ bkpt
+
+%def op_const():
+ /* const vAA, #+BBBBbbbb */
+ mov r3, rINST, lsr #8 @ r3<- AA
+ FETCH r0, 1 @ r0<- bbbb (low)
+ FETCH r1, 2 @ r1<- BBBB (high)
+ FETCH_ADVANCE_INST 3 @ advance rPC, load rINST
+ orr r0, r0, r1, lsl #16 @ r0<- BBBBbbbb
+ GET_INST_OPCODE ip @ extract opcode from rINST
+ SET_VREG r0, r3 @ vAA<- r0
+ GOTO_OPCODE ip @ jump to next instruction
+
+%def op_const_16():
+ /* const/16 vAA, #+BBBB */
+ FETCH_S r0, 1 @ r0<- ssssBBBB (sign-extended)
+ mov r3, rINST, lsr #8 @ r3<- AA
+ FETCH_ADVANCE_INST 2 @ advance rPC, load rINST
+ SET_VREG r0, r3 @ vAA<- r0
+ GET_INST_OPCODE ip @ extract opcode from rINST
+ GOTO_OPCODE ip @ jump to next instruction
+
+%def op_const_4():
+ /* const/4 vA, #+B */
+ sbfx r1, rINST, #12, #4 @ r1<- sssssssB (sign-extended)
+ ubfx r0, rINST, #8, #4 @ r0<- A
+ FETCH_ADVANCE_INST 1 @ advance rPC, load rINST
+ GET_INST_OPCODE ip @ ip<- opcode from rINST
+ SET_VREG r1, r0 @ fp[A]<- r1
+ GOTO_OPCODE ip @ execute next instruction
+
+%def op_const_high16():
+ /* const/high16 vAA, #+BBBB0000 */
+ FETCH r0, 1 @ r0<- 0000BBBB (zero-extended)
+ mov r3, rINST, lsr #8 @ r3<- AA
+ mov r0, r0, lsl #16 @ r0<- BBBB0000
+ FETCH_ADVANCE_INST 2 @ advance rPC, load rINST
+ SET_VREG r0, r3 @ vAA<- r0
+ GET_INST_OPCODE ip @ extract opcode from rINST
+ GOTO_OPCODE ip @ jump to next instruction
+
+%def op_const_object(jumbo="0", helper="nterp_load_object"):
+ // Fast-path which gets the object from thread-local cache.
+ FETCH_FROM_THREAD_CACHE r0, 2f
+ cmp rMR, #0
+ bne 3f
+1:
+ mov r1, rINST, lsr #8 @ r1<- AA
+ .if $jumbo
+ FETCH_ADVANCE_INST 3 // advance rPC, load rINST
+ .else
+ FETCH_ADVANCE_INST 2 // advance rPC, load rINST
+ .endif
+ GET_INST_OPCODE ip // extract opcode from rINST
+ SET_VREG_OBJECT r0, r1 // vAA <- value
+ GOTO_OPCODE ip // jump to next instruction
+2:
+ EXPORT_PC
+ mov r0, rSELF
+ ldr r1, [sp]
+ mov r2, rPC
+ bl $helper
+ b 1b
+3:
+ bl art_quick_read_barrier_mark_reg00
+ b 1b
+
+%def op_const_class():
+% op_const_object(jumbo="0", helper="nterp_get_class_or_allocate_object")
+
+%def op_const_method_handle():
+% op_const_object(jumbo="0")
+
+%def op_const_method_type():
+% op_const_object(jumbo="0")
+
+%def op_const_string():
+ /* const/string vAA, String@BBBB */
+% op_const_object(jumbo="0")
+
+%def op_const_string_jumbo():
+ /* const/string vAA, String@BBBBBBBB */
+% op_const_object(jumbo="1")
+
+%def op_const_wide():
+ /* const-wide vAA, #+HHHHhhhhBBBBbbbb */
+ FETCH r0, 1 @ r0<- bbbb (low)
+ FETCH r1, 2 @ r1<- BBBB (low middle)
+ FETCH r2, 3 @ r2<- hhhh (high middle)
+ orr r0, r0, r1, lsl #16 @ r0<- BBBBbbbb (low word)
+ FETCH r3, 4 @ r3<- HHHH (high)
+ mov r4, rINST, lsr #8 @ r4<- AA
+ orr r1, r2, r3, lsl #16 @ r1<- HHHHhhhh (high word)
+ CLEAR_SHADOW_PAIR r4, r2, r3 @ Zero out the shadow regs
+ FETCH_ADVANCE_INST 5 @ advance rPC, load rINST
+ VREG_INDEX_TO_ADDR r4, r4 @ r4<- &fp[AA]
+ GET_INST_OPCODE ip @ extract opcode from rINST
+ SET_VREG_WIDE_BY_ADDR r0, r1, r4 @ vAA<- r0/r1
+ GOTO_OPCODE ip @ jump to next instruction
+
+%def op_const_wide_16():
+ /* const-wide/16 vAA, #+BBBB */
+ FETCH_S r0, 1 @ r0<- ssssBBBB (sign-extended)
+ mov r3, rINST, lsr #8 @ r3<- AA
+ mov r1, r0, asr #31 @ r1<- ssssssss
+ FETCH_ADVANCE_INST 2 @ advance rPC, load rINST
+ CLEAR_SHADOW_PAIR r3, r2, lr @ Zero out the shadow regs
+ VREG_INDEX_TO_ADDR r3, r3 @ r3<- &fp[AA]
+ GET_INST_OPCODE ip @ extract opcode from rINST
+ SET_VREG_WIDE_BY_ADDR r0, r1, r3 @ vAA<- r0/r1
+ GOTO_OPCODE ip @ jump to next instruction
+
+%def op_const_wide_32():
+ /* const-wide/32 vAA, #+BBBBbbbb */
+ FETCH r0, 1 @ r0<- 0000bbbb (low)
+ mov r3, rINST, lsr #8 @ r3<- AA
+ FETCH_S r2, 2 @ r2<- ssssBBBB (high)
+ FETCH_ADVANCE_INST 3 @ advance rPC, load rINST
+ orr r0, r0, r2, lsl #16 @ r0<- BBBBbbbb
+ CLEAR_SHADOW_PAIR r3, r2, lr @ Zero out the shadow regs
+ VREG_INDEX_TO_ADDR r3, r3 @ r3<- &fp[AA]
+ mov r1, r0, asr #31 @ r1<- ssssssss
+ GET_INST_OPCODE ip @ extract opcode from rINST
+ SET_VREG_WIDE_BY_ADDR r0, r1, r3 @ vAA<- r0/r1
+ GOTO_OPCODE ip @ jump to next instruction
+
+%def op_const_wide_high16():
+ /* const-wide/high16 vAA, #+BBBB000000000000 */
+ FETCH r1, 1 @ r1<- 0000BBBB (zero-extended)
+ mov r3, rINST, lsr #8 @ r3<- AA
+ mov r0, #0 @ r0<- 00000000
+ mov r1, r1, lsl #16 @ r1<- BBBB0000
+ FETCH_ADVANCE_INST 2 @ advance rPC, load rINST
+ CLEAR_SHADOW_PAIR r3, r0, r2 @ Zero shadow regs
+ VREG_INDEX_TO_ADDR r3, r3 @ r3<- &fp[AA]
+ GET_INST_OPCODE ip @ extract opcode from rINST
+ SET_VREG_WIDE_BY_ADDR r0, r1, r3 @ vAA<- r0/r1
+ GOTO_OPCODE ip @ jump to next instruction
+
+%def op_monitor_enter():
+ /*
+ * Synchronize on an object.
+ */
+ /* monitor-enter vAA */
+ EXPORT_PC
+ mov r2, rINST, lsr #8 @ r2<- AA
+ GET_VREG r0, r2 @ r0<- vAA (object)
+ bl art_quick_lock_object
+ FETCH_ADVANCE_INST 1
+ GET_INST_OPCODE ip @ extract opcode from rINST
+ GOTO_OPCODE ip @ jump to next instruction
+
+%def op_monitor_exit():
+ /*
+ * Unlock an object.
+ *
+ * Exceptions that occur when unlocking a monitor need to appear as
+ * if they happened at the following instruction. See the Dalvik
+ * instruction spec.
+ */
+ /* monitor-exit vAA */
+ EXPORT_PC
+ mov r2, rINST, lsr #8 @ r2<- AA
+ GET_VREG r0, r2 @ r0<- vAA (object)
+ bl art_quick_unlock_object
+ FETCH_ADVANCE_INST 1 @ before throw: advance rPC, load rINST
+ GET_INST_OPCODE ip @ extract opcode from rINST
+ GOTO_OPCODE ip @ jump to next instruction
+
+%def op_move(is_object="0"):
+ /* for move, move-object, long-to-int */
+ /* op vA, vB */
+ mov r1, rINST, lsr #12 @ r1<- B from 15:12
+ ubfx r0, rINST, #8, #4 @ r0<- A from 11:8
+ FETCH_ADVANCE_INST 1 @ advance rPC, load rINST
+ GET_VREG r2, r1 @ r2<- fp[B]
+ GET_INST_OPCODE ip @ ip<- opcode from rINST
+ .if $is_object
+ SET_VREG_OBJECT r2, r0 @ fp[A]<- r2
+ .else
+ SET_VREG r2, r0 @ fp[A]<- r2
+ .endif
+ GOTO_OPCODE ip @ execute next instruction
+
+%def op_move_16(is_object="0"):
+ /* for: move/16, move-object/16 */
+ /* op vAAAA, vBBBB */
+ FETCH r1, 2 @ r1<- BBBB
+ FETCH r0, 1 @ r0<- AAAA
+ FETCH_ADVANCE_INST 3 @ advance rPC, load rINST
+ GET_VREG r2, r1 @ r2<- fp[BBBB]
+ GET_INST_OPCODE ip @ extract opcode from rINST
+ .if $is_object
+ SET_VREG_OBJECT r2, r0 @ fp[AAAA]<- r2
+ .else
+ SET_VREG r2, r0 @ fp[AAAA]<- r2
+ .endif
+ GOTO_OPCODE ip @ jump to next instruction
+
+%def op_move_exception():
+ /* move-exception vAA */
+ mov r2, rINST, lsr #8 @ r2<- AA
+ ldr r3, [rSELF, #THREAD_EXCEPTION_OFFSET]
+ mov r1, #0 @ r1<- 0
+ FETCH_ADVANCE_INST 1 @ advance rPC, load rINST
+ SET_VREG_OBJECT r3, r2 @ fp[AA]<- exception obj
+ GET_INST_OPCODE ip @ extract opcode from rINST
+ str r1, [rSELF, #THREAD_EXCEPTION_OFFSET] @ clear exception
+ GOTO_OPCODE ip @ jump to next instruction
+
+%def op_move_from16(is_object="0"):
+ /* for: move/from16, move-object/from16 */
+ /* op vAA, vBBBB */
+ FETCH r1, 1 @ r1<- BBBB
+ mov r0, rINST, lsr #8 @ r0<- AA
+ FETCH_ADVANCE_INST 2 @ advance rPC, load rINST
+ GET_VREG r2, r1 @ r2<- fp[BBBB]
+ GET_INST_OPCODE ip @ extract opcode from rINST
+ .if $is_object
+ SET_VREG_OBJECT r2, r0 @ fp[AA]<- r2
+ .else
+ SET_VREG r2, r0 @ fp[AA]<- r2
+ .endif
+ GOTO_OPCODE ip @ jump to next instruction
+
+%def op_move_object():
+% op_move(is_object="1")
+
+%def op_move_object_16():
+% op_move_16(is_object="1")
+
+%def op_move_object_from16():
+% op_move_from16(is_object="1")
+
+%def op_move_result(is_object="0"):
+ /* for: move-result, move-result-object */
+ /* op vAA */
+ mov r2, rINST, lsr #8 @ r2<- AA
+ FETCH_ADVANCE_INST 1 @ advance rPC, load rINST
+ GET_INST_OPCODE ip @ extract opcode from rINST
+ .if $is_object
+ SET_VREG_OBJECT r0, r2 @ fp[AA]<- r0
+ .else
+ SET_VREG r0, r2 @ fp[AA]<- r0
+ .endif
+ GOTO_OPCODE ip @ jump to next instruction
+
+%def op_move_result_object():
+% op_move_result(is_object="1")
+
+%def op_move_result_wide():
+ /* move-result-wide vAA */
+ mov rINST, rINST, lsr #8 @ rINST<- AA
+ VREG_INDEX_TO_ADDR r2, rINST @ r2<- &fp[AA]
+ CLEAR_SHADOW_PAIR rINST, ip, lr @ Zero out the shadow regs
+ FETCH_ADVANCE_INST 1 @ advance rPC, load rINST
+ SET_VREG_WIDE_BY_ADDR r0, r1, r2 @ fp[AA]<- r0/r1
+ GET_INST_OPCODE ip @ extract opcode from rINST
+ GOTO_OPCODE ip @ jump to next instruction
+
+%def op_move_wide():
+ /* move-wide vA, vB */
+ /* NOTE: regs can overlap, e.g. "move v6,v7" or "move v7,v6" */
+ mov r3, rINST, lsr #12 @ r3<- B
+ ubfx rINST, rINST, #8, #4 @ rINST<- A
+ VREG_INDEX_TO_ADDR r3, r3 @ r3<- &fp[B]
+ VREG_INDEX_TO_ADDR r2, rINST @ r2<- &fp[A]
+ GET_VREG_WIDE_BY_ADDR r0, r1, r3 @ r0/r1<- fp[B]
+ CLEAR_SHADOW_PAIR rINST, ip, lr @ Zero out the shadow regs
+ FETCH_ADVANCE_INST 1 @ advance rPC, load rINST
+ GET_INST_OPCODE ip @ extract opcode from rINST
+ SET_VREG_WIDE_BY_ADDR r0, r1, r2 @ fp[A]<- r0/r1
+ GOTO_OPCODE ip @ jump to next instruction
+
+%def op_move_wide_16():
+ /* move-wide/16 vAAAA, vBBBB */
+ /* NOTE: regs can overlap, e.g. "move v6,v7" or "move v7,v6" */
+ FETCH r3, 2 @ r3<- BBBB
+ FETCH r2, 1 @ r2<- AAAA
+ VREG_INDEX_TO_ADDR r3, r3 @ r3<- &fp[BBBB]
+ VREG_INDEX_TO_ADDR lr, r2 @ r2<- &fp[AAAA]
+ GET_VREG_WIDE_BY_ADDR r0, r1, r3 @ r0/r1<- fp[BBBB]
+ FETCH_ADVANCE_INST 3 @ advance rPC, load rINST
+ CLEAR_SHADOW_PAIR r2, r3, ip @ Zero out the shadow regs
+ SET_VREG_WIDE_BY_ADDR r0, r1, lr @ fp[AAAA]<- r0/r1
+ GET_INST_OPCODE ip @ extract opcode from rINST
+ GOTO_OPCODE ip @ jump to next instruction
+
+%def op_move_wide_from16():
+ /* move-wide/from16 vAA, vBBBB */
+ /* NOTE: regs can overlap, e.g. "move v6,v7" or "move v7,v6" */
+ FETCH r3, 1 @ r3<- BBBB
+ mov rINST, rINST, lsr #8 @ rINST<- AA
+ VREG_INDEX_TO_ADDR r3, r3 @ r3<- &fp[BBBB]
+ VREG_INDEX_TO_ADDR r2, rINST @ r2<- &fp[AA]
+ GET_VREG_WIDE_BY_ADDR r0, r1, r3 @ r0/r1<- fp[BBBB]
+ CLEAR_SHADOW_PAIR rINST, ip, lr @ Zero out the shadow regs
+ FETCH_ADVANCE_INST 2 @ advance rPC, load rINST
+ GET_INST_OPCODE ip @ extract opcode from rINST
+ SET_VREG_WIDE_BY_ADDR r0, r1, r2 @ fp[AA]<- r0/r1
+ GOTO_OPCODE ip @ jump to next instruction
+
+%def op_nop():
+ FETCH_ADVANCE_INST 1 @ advance to next instr, load rINST
+ GET_INST_OPCODE ip @ ip<- opcode from rINST
+ GOTO_OPCODE ip @ execute it
+
+%def op_unused_3e():
+% unused()
+
+%def op_unused_3f():
+% unused()
+
+%def op_unused_40():
+% unused()
+
+%def op_unused_41():
+% unused()
+
+%def op_unused_42():
+% unused()
+
+%def op_unused_43():
+% unused()
+
+%def op_unused_73():
+% unused()
+
+%def op_unused_79():
+% unused()
+
+%def op_unused_7a():
+% unused()
+
+%def op_unused_f3():
+% unused()
+
+%def op_unused_f4():
+% unused()
+
+%def op_unused_f5():
+% unused()
+
+%def op_unused_f6():
+% unused()
+
+%def op_unused_f7():
+% unused()
+
+%def op_unused_f8():
+% unused()
+
+%def op_unused_f9():
+% unused()
+
+%def op_unused_fc():
+% unused()
+
+%def op_unused_fd():
+% unused()
diff --git a/runtime/interpreter/mterp/nterp.cc b/runtime/interpreter/mterp/nterp.cc
index 74e49e7..3d92473 100644
--- a/runtime/interpreter/mterp/nterp.cc
+++ b/runtime/interpreter/mterp/nterp.cc
@@ -118,6 +118,111 @@
UpdateCache(self, dex_pc_ptr, reinterpret_cast<size_t>(value));
}
+#ifdef __arm__
+
+extern "C" void NterpStoreArm32Fprs(const char* shorty,
+ uint32_t* registers,
+ uint32_t* stack_args,
+ const uint32_t* fprs) {
+ // Note `shorty` has already the returned type removed.
+ ScopedAssertNoThreadSuspension sants("In nterp");
+ uint32_t arg_index = 0;
+ uint32_t fpr_double_index = 0;
+ uint32_t fpr_index = 0;
+ for (uint32_t shorty_index = 0; shorty[shorty_index] != '\0'; ++shorty_index) {
+ char arg_type = shorty[shorty_index];
+ switch (arg_type) {
+ case 'D': {
+ // Double should not overlap with float.
+ fpr_double_index = std::max(fpr_double_index, RoundUp(fpr_index, 2));
+ if (fpr_double_index < 16) {
+ registers[arg_index] = fprs[fpr_double_index++];
+ registers[arg_index + 1] = fprs[fpr_double_index++];
+ } else {
+ registers[arg_index] = stack_args[arg_index];
+ registers[arg_index + 1] = stack_args[arg_index + 1];
+ }
+ arg_index += 2;
+ break;
+ }
+ case 'F': {
+ if (fpr_index % 2 == 0) {
+ fpr_index = std::max(fpr_double_index, fpr_index);
+ }
+ if (fpr_index < 16) {
+ registers[arg_index] = fprs[fpr_index++];
+ } else {
+ registers[arg_index] = stack_args[arg_index];
+ }
+ arg_index++;
+ break;
+ }
+ case 'J': {
+ arg_index += 2;
+ break;
+ }
+ default: {
+ arg_index++;
+ break;
+ }
+ }
+ }
+}
+
+extern "C" void NterpSetupArm32Fprs(const char* shorty,
+ uint32_t dex_register,
+ uint32_t stack_index,
+ uint32_t* fprs,
+ uint32_t* registers,
+ uint32_t* stack_args) {
+ // Note `shorty` has already the returned type removed.
+ ScopedAssertNoThreadSuspension sants("In nterp");
+ uint32_t fpr_double_index = 0;
+ uint32_t fpr_index = 0;
+ for (uint32_t shorty_index = 0; shorty[shorty_index] != '\0'; ++shorty_index) {
+ char arg_type = shorty[shorty_index];
+ switch (arg_type) {
+ case 'D': {
+ // Double should not overlap with float.
+ fpr_double_index = std::max(fpr_double_index, RoundUp(fpr_index, 2));
+ if (fpr_double_index < 16) {
+ fprs[fpr_double_index++] = registers[dex_register++];
+ fprs[fpr_double_index++] = registers[dex_register++];
+ stack_index += 2;
+ } else {
+ stack_args[stack_index++] = registers[dex_register++];
+ stack_args[stack_index++] = registers[dex_register++];
+ }
+ break;
+ }
+ case 'F': {
+ if (fpr_index % 2 == 0) {
+ fpr_index = std::max(fpr_double_index, fpr_index);
+ }
+ if (fpr_index < 16) {
+ fprs[fpr_index++] = registers[dex_register++];
+ stack_index++;
+ } else {
+ stack_args[stack_index++] = registers[dex_register++];
+ }
+ break;
+ }
+ case 'J': {
+ stack_index += 2;
+ dex_register += 2;
+ break;
+ }
+ default: {
+ stack_index++;
+ dex_register++;
+ break;
+ }
+ }
+ }
+}
+
+#endif
+
extern "C" const dex::CodeItem* NterpGetCodeItem(ArtMethod* method)
REQUIRES_SHARED(Locks::mutator_lock_) {
ScopedAssertNoThreadSuspension sants("In nterp");
@@ -294,6 +399,7 @@
} else if (resolved_method->GetDeclaringClass()->IsStringClass()
&& !resolved_method->IsStatic()
&& resolved_method->IsConstructor()) {
+ CHECK_NE(invoke_type, kSuper);
resolved_method = WellKnownClasses::StringInitToStringFactory(resolved_method);
// Or the result with 1 to notify to nterp this is a string init method. We
// also don't cache the result as we don't want nterp to have its fast path always
diff --git a/test/813-fp-args/expected-stderr.txt b/test/813-fp-args/expected-stderr.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/813-fp-args/expected-stderr.txt
diff --git a/test/813-fp-args/expected-stdout.txt b/test/813-fp-args/expected-stdout.txt
new file mode 100644
index 0000000..6a5618e
--- /dev/null
+++ b/test/813-fp-args/expected-stdout.txt
@@ -0,0 +1 @@
+JNI_OnLoad called
diff --git a/test/813-fp-args/info.txt b/test/813-fp-args/info.txt
new file mode 100644
index 0000000..5c204cb
--- /dev/null
+++ b/test/813-fp-args/info.txt
@@ -0,0 +1 @@
+Regression test on floating point native ABI, targeted for ARM.
diff --git a/test/813-fp-args/src/Main.java b/test/813-fp-args/src/Main.java
new file mode 100644
index 0000000..fa8ac07
--- /dev/null
+++ b/test/813-fp-args/src/Main.java
@@ -0,0 +1,45 @@
+/*
+ * Copyright (C) 2020 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+ public static void main(String[] args) {
+ System.loadLibrary(args[0]);
+ // Compile it to ensure we're calling compiled code.
+ ensureJitCompiled(Main.class, "myMethod");
+ myMethod(1, 2, 3, 4);
+ }
+
+ public static void assertEquals(float expected, float actual) {
+ if (expected != actual) {
+ throw new Error("Expected " + expected + " got " + actual);
+ }
+ }
+
+ public static void assertEquals(double expected, double actual) {
+ if (expected != actual) {
+ throw new Error("Expected " + expected + " got " + actual);
+ }
+ }
+
+ public static void myMethod(float a, double b, float c, float d) {
+ assertEquals(1, a);
+ assertEquals(2, b);
+ assertEquals(3, c);
+ assertEquals(4, d);
+ }
+
+ public static native void ensureJitCompiled(Class<?> cls, String name);
+}
diff --git a/tools/cpp-define-generator/globals.def b/tools/cpp-define-generator/globals.def
index 50ca3d6..50f9b33 100644
--- a/tools/cpp-define-generator/globals.def
+++ b/tools/cpp-define-generator/globals.def
@@ -83,3 +83,5 @@
GetStackOverflowReservedBytes(art::kRuntimeISA))
ASM_DEFINE(NTERP_HOTNESS_MASK,
art::interpreter::kNterpHotnessMask)
+ASM_DEFINE(NTERP_HOTNESS_BITS,
+ art::POPCOUNT(art::interpreter::kNterpHotnessMask))