MIPS32: Pass more arguments in registers.
Specifically, use A0-A3,T0-T1 for non-floats and F8-F19 for floats.
Test: booted MIPS32R2 in QEMU
Test: test-art-target-run-test-optimizing (MIPS32R2) on CI20
Test: test-art-target-gtest (MIPS32R2) on CI20
Test: booted MIPS64 (with 2nd arch MIPS32R6) in QEMU
Test: test-art-target-run-test-optimizing (MIPS32R6) in QEMU
Test: test-art-target-gtest (MIPS32R6) in QEMU
Test: test-art-host-gtest
Change-Id: Ib8b0310a109d9f3d70119c1e605e54b013e60728
diff --git a/compiler/jni/jni_cfi_test_expected.inc b/compiler/jni/jni_cfi_test_expected.inc
index a205800..2710ae9 100644
--- a/compiler/jni/jni_cfi_test_expected.inc
+++ b/compiler/jni/jni_cfi_test_expected.inc
@@ -327,7 +327,7 @@
0xC0, 0xFF, 0xBD, 0x27, 0x3C, 0x00, 0xBF, 0xAF, 0x38, 0x00, 0xBE, 0xAF,
0x34, 0x00, 0xB7, 0xAF, 0x30, 0x00, 0xB6, 0xAF, 0x2C, 0x00, 0xB5, 0xAF,
0x28, 0x00, 0xB4, 0xAF, 0x24, 0x00, 0xB3, 0xAF, 0x20, 0x00, 0xB2, 0xAF,
- 0x00, 0x00, 0xA4, 0xAF, 0x44, 0x00, 0xA5, 0xAF, 0x48, 0x00, 0xAC, 0xE7,
+ 0x00, 0x00, 0xA4, 0xAF, 0x44, 0x00, 0xA5, 0xAF, 0x48, 0x00, 0xA8, 0xE7,
0x4C, 0x00, 0xA6, 0xAF, 0x50, 0x00, 0xA7, 0xAF, 0xE0, 0xFF, 0xBD, 0x27,
0x20, 0x00, 0xBD, 0x27, 0x20, 0x00, 0xB2, 0x8F, 0x24, 0x00, 0xB3, 0x8F,
0x28, 0x00, 0xB4, 0x8F, 0x2C, 0x00, 0xB5, 0x8F, 0x30, 0x00, 0xB6, 0x8F,
@@ -361,7 +361,7 @@
// 0x00000024: .cfi_offset: r18 at cfa-32
// 0x00000024: sw r4, +0(r29)
// 0x00000028: sw r5, +68(r29)
-// 0x0000002c: swc1 f12, +72(r29)
+// 0x0000002c: swc1 f8, +72(r29)
// 0x00000030: sw r6, +76(r29)
// 0x00000034: sw r7, +80(r29)
// 0x00000038: addiu r29, r29, -32
diff --git a/compiler/jni/quick/mips/calling_convention_mips.cc b/compiler/jni/quick/mips/calling_convention_mips.cc
index e6948ec..0e0716e 100644
--- a/compiler/jni/quick/mips/calling_convention_mips.cc
+++ b/compiler/jni/quick/mips/calling_convention_mips.cc
@@ -23,6 +23,10 @@
namespace art {
namespace mips {
+//
+// JNI calling convention constants.
+//
+
// Up to how many float-like (float, double) args can be enregistered in floating-point registers.
// The rest of the args must go in integer registers or on the stack.
constexpr size_t kMaxFloatOrDoubleRegisterArguments = 2u;
@@ -30,9 +34,17 @@
// enregistered. The rest of the args must go on the stack.
constexpr size_t kMaxIntLikeRegisterArguments = 4u;
-static const Register kCoreArgumentRegisters[] = { A0, A1, A2, A3 };
-static const FRegister kFArgumentRegisters[] = { F12, F14 };
-static const DRegister kDArgumentRegisters[] = { D6, D7 };
+static const Register kJniCoreArgumentRegisters[] = { A0, A1, A2, A3 };
+static const FRegister kJniFArgumentRegisters[] = { F12, F14 };
+static const DRegister kJniDArgumentRegisters[] = { D6, D7 };
+
+//
+// Managed calling convention constants.
+//
+
+static const Register kManagedCoreArgumentRegisters[] = { A0, A1, A2, A3, T0, T1 };
+static const FRegister kManagedFArgumentRegisters[] = { F8, F10, F12, F14, F16, F18 };
+static const DRegister kManagedDArgumentRegisters[] = { D4, D5, D6, D7, D8, D9 };
static constexpr ManagedRegister kCalleeSaveRegisters[] = {
// Core registers.
@@ -133,30 +145,30 @@
for (ResetIterator(FrameOffset(0)); HasNext(); Next()) {
if (IsCurrentParamAFloatOrDouble()) {
if (IsCurrentParamADouble()) {
- if (fpr_index < arraysize(kDArgumentRegisters)) {
+ if (fpr_index < arraysize(kManagedDArgumentRegisters)) {
entry_spills_.push_back(
- MipsManagedRegister::FromDRegister(kDArgumentRegisters[fpr_index++]));
+ MipsManagedRegister::FromDRegister(kManagedDArgumentRegisters[fpr_index++]));
} else {
entry_spills_.push_back(ManagedRegister::NoRegister(), 8);
}
} else {
- if (fpr_index < arraysize(kFArgumentRegisters)) {
+ if (fpr_index < arraysize(kManagedFArgumentRegisters)) {
entry_spills_.push_back(
- MipsManagedRegister::FromFRegister(kFArgumentRegisters[fpr_index++]));
+ MipsManagedRegister::FromFRegister(kManagedFArgumentRegisters[fpr_index++]));
} else {
entry_spills_.push_back(ManagedRegister::NoRegister(), 4);
}
}
} else {
if (IsCurrentParamALong() && !IsCurrentParamAReference()) {
- if (gpr_index == 1) {
- // Don't use a1-a2 as a register pair, move to a2-a3 instead.
+ if (gpr_index == 1 || gpr_index == 3) {
+ // Don't use A1-A2(A3-T0) as a register pair, move to A2-A3(T0-T1) instead.
gpr_index++;
}
- if (gpr_index < arraysize(kCoreArgumentRegisters) - 1) {
+ if (gpr_index < arraysize(kManagedCoreArgumentRegisters) - 1) {
entry_spills_.push_back(
- MipsManagedRegister::FromCoreRegister(kCoreArgumentRegisters[gpr_index++]));
- } else if (gpr_index == arraysize(kCoreArgumentRegisters) - 1) {
+ MipsManagedRegister::FromCoreRegister(kManagedCoreArgumentRegisters[gpr_index++]));
+ } else if (gpr_index == arraysize(kManagedCoreArgumentRegisters) - 1) {
gpr_index++;
entry_spills_.push_back(ManagedRegister::NoRegister(), 4);
} else {
@@ -164,9 +176,9 @@
}
}
- if (gpr_index < arraysize(kCoreArgumentRegisters)) {
+ if (gpr_index < arraysize(kManagedCoreArgumentRegisters)) {
entry_spills_.push_back(
- MipsManagedRegister::FromCoreRegister(kCoreArgumentRegisters[gpr_index++]));
+ MipsManagedRegister::FromCoreRegister(kManagedCoreArgumentRegisters[gpr_index++]));
} else {
entry_spills_.push_back(ManagedRegister::NoRegister(), 4);
}
@@ -175,6 +187,7 @@
}
return entry_spills_;
}
+
// JNI calling convention
MipsJniCallingConvention::MipsJniCallingConvention(bool is_static,
@@ -285,7 +298,7 @@
// | FLOAT | INT | DOUBLE |
// | F12 | A1 | A2 | A3 |
// (c) first two arguments are floating-point (float, double)
- // | FLAOT | (PAD) | DOUBLE | INT |
+ // | FLOAT | (PAD) | DOUBLE | INT |
// | F12 | | F14 | SP+16 |
// (d) first two arguments are floating-point (double, float)
// | DOUBLE | FLOAT | INT |
@@ -404,9 +417,9 @@
if (use_fp_arg_registers_ && (itr_args_ < kMaxFloatOrDoubleRegisterArguments)) {
if (IsCurrentParamAFloatOrDouble()) {
if (IsCurrentParamADouble()) {
- return MipsManagedRegister::FromDRegister(kDArgumentRegisters[itr_args_]);
+ return MipsManagedRegister::FromDRegister(kJniDArgumentRegisters[itr_args_]);
} else {
- return MipsManagedRegister::FromFRegister(kFArgumentRegisters[itr_args_]);
+ return MipsManagedRegister::FromFRegister(kJniFArgumentRegisters[itr_args_]);
}
}
}
@@ -420,7 +433,7 @@
return MipsManagedRegister::FromRegisterPair(A2_A3);
}
} else {
- return MipsManagedRegister::FromCoreRegister(kCoreArgumentRegisters[itr_slots_]);
+ return MipsManagedRegister::FromCoreRegister(kJniCoreArgumentRegisters[itr_slots_]);
}
}
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index 8f94834..f0d4910 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -99,8 +99,9 @@
uint32_t gp_index = gp_index_;
gp_index_ += 2;
if (gp_index + 1 < calling_convention.GetNumberOfRegisters()) {
- if (calling_convention.GetRegisterAt(gp_index) == A1) {
- gp_index_++; // Skip A1, and use A2_A3 instead.
+ Register reg = calling_convention.GetRegisterAt(gp_index);
+ if (reg == A1 || reg == A3) {
+ gp_index_++; // Skip A1(A3), and use A2_A3(T0_T1) instead.
gp_index++;
}
Register low_even = calling_convention.GetRegisterAt(gp_index);
@@ -5085,9 +5086,9 @@
void LocationsBuilderMIPS::VisitInvokeInterface(HInvokeInterface* invoke) {
HandleInvoke(invoke);
- // The register T0 is required to be used for the hidden argument in
+ // The register T7 is required to be used for the hidden argument in
// art_quick_imt_conflict_trampoline, so add the hidden argument.
- invoke->GetLocations()->AddTemp(Location::RegisterLocation(T0));
+ invoke->GetLocations()->AddTemp(Location::RegisterLocation(T7));
}
void InstructionCodeGeneratorMIPS::VisitInvokeInterface(HInvokeInterface* invoke) {
diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h
index e225d20..685e4a9 100644
--- a/compiler/optimizing/code_generator_mips.h
+++ b/compiler/optimizing/code_generator_mips.h
@@ -31,11 +31,11 @@
// InvokeDexCallingConvention registers
static constexpr Register kParameterCoreRegisters[] =
- { A1, A2, A3 };
+ { A1, A2, A3, T0, T1 };
static constexpr size_t kParameterCoreRegistersLength = arraysize(kParameterCoreRegisters);
static constexpr FRegister kParameterFpuRegisters[] =
- { F12, F14 };
+ { F8, F10, F12, F14, F16, F18 };
static constexpr size_t kParameterFpuRegistersLength = arraysize(kParameterFpuRegisters);
@@ -47,7 +47,7 @@
arraysize(kRuntimeParameterCoreRegisters);
static constexpr FRegister kRuntimeParameterFpuRegisters[] =
- { F12, F14};
+ { F12, F14 };
static constexpr size_t kRuntimeParameterFpuRegistersLength =
arraysize(kRuntimeParameterFpuRegisters);
diff --git a/compiler/optimizing/emit_swap_mips_test.cc b/compiler/optimizing/emit_swap_mips_test.cc
index 9dc53e6..0d4e1c5 100644
--- a/compiler/optimizing/emit_swap_mips_test.cc
+++ b/compiler/optimizing/emit_swap_mips_test.cc
@@ -154,54 +154,54 @@
TEST_F(EmitSwapMipsTest, TwoFpuRegistersFloat) {
moves_->AddMove(
Location::FpuRegisterLocation(4),
- Location::FpuRegisterLocation(6),
+ Location::FpuRegisterLocation(2),
Primitive::kPrimFloat,
nullptr);
moves_->AddMove(
- Location::FpuRegisterLocation(6),
+ Location::FpuRegisterLocation(2),
Location::FpuRegisterLocation(4),
Primitive::kPrimFloat,
nullptr);
const char* expected =
- "mov.s $f8, $f6\n"
- "mov.s $f6, $f4\n"
- "mov.s $f4, $f8\n";
+ "mov.s $f6, $f2\n"
+ "mov.s $f2, $f4\n"
+ "mov.s $f4, $f6\n";
DriverWrapper(moves_, expected, "TwoFpuRegistersFloat");
}
TEST_F(EmitSwapMipsTest, TwoFpuRegistersDouble) {
moves_->AddMove(
Location::FpuRegisterLocation(4),
- Location::FpuRegisterLocation(6),
+ Location::FpuRegisterLocation(2),
Primitive::kPrimDouble,
nullptr);
moves_->AddMove(
- Location::FpuRegisterLocation(6),
+ Location::FpuRegisterLocation(2),
Location::FpuRegisterLocation(4),
Primitive::kPrimDouble,
nullptr);
const char* expected =
- "mov.d $f8, $f6\n"
- "mov.d $f6, $f4\n"
- "mov.d $f4, $f8\n";
+ "mov.d $f6, $f2\n"
+ "mov.d $f2, $f4\n"
+ "mov.d $f4, $f6\n";
DriverWrapper(moves_, expected, "TwoFpuRegistersDouble");
}
TEST_F(EmitSwapMipsTest, RegisterAndFpuRegister) {
moves_->AddMove(
Location::RegisterLocation(4),
- Location::FpuRegisterLocation(6),
+ Location::FpuRegisterLocation(2),
Primitive::kPrimFloat,
nullptr);
moves_->AddMove(
- Location::FpuRegisterLocation(6),
+ Location::FpuRegisterLocation(2),
Location::RegisterLocation(4),
Primitive::kPrimFloat,
nullptr);
const char* expected =
"or $t8, $a0, $zero\n"
- "mfc1 $a0, $f6\n"
- "mtc1 $t8, $f6\n";
+ "mfc1 $a0, $f2\n"
+ "mtc1 $t8, $f2\n";
DriverWrapper(moves_, expected, "RegisterAndFpuRegister");
}
@@ -327,9 +327,9 @@
Primitive::kPrimFloat,
nullptr);
const char* expected =
- "mov.s $f8, $f4\n"
+ "mov.s $f6, $f4\n"
"lwc1 $f4, 48($sp)\n"
- "swc1 $f8, 48($sp)\n";
+ "swc1 $f6, 48($sp)\n";
DriverWrapper(moves_, expected, "FpuRegisterAndStackSlot");
}
@@ -345,9 +345,9 @@
Primitive::kPrimDouble,
nullptr);
const char* expected =
- "mov.d $f8, $f4\n"
+ "mov.d $f6, $f4\n"
"ldc1 $f4, 48($sp)\n"
- "sdc1 $f8, 48($sp)\n";
+ "sdc1 $f6, 48($sp)\n";
DriverWrapper(moves_, expected, "FpuRegisterAndDoubleStackSlot");
}
diff --git a/compiler/utils/mips/assembler_mips.cc b/compiler/utils/mips/assembler_mips.cc
index b29974c..3dcad6a 100644
--- a/compiler/utils/mips/assembler_mips.cc
+++ b/compiler/utils/mips/assembler_mips.cc
@@ -3252,6 +3252,9 @@
CHECK_EQ(kMipsDoublewordSize, size) << dst;
LoadDFromOffset(dst.AsFRegister(), src_register, src_offset);
}
+ } else if (dst.IsDRegister()) {
+ CHECK_EQ(kMipsDoublewordSize, size) << dst;
+ LoadDFromOffset(dst.AsOverlappingDRegisterLow(), src_register, src_offset);
}
}
@@ -3396,6 +3399,9 @@
CHECK_EQ(kMipsDoublewordSize, size);
StoreDToOffset(src.AsFRegister(), SP, dest.Int32Value());
}
+ } else if (src.IsDRegister()) {
+ CHECK_EQ(kMipsDoublewordSize, size);
+ StoreDToOffset(src.AsOverlappingDRegisterLow(), SP, dest.Int32Value());
}
}
diff --git a/runtime/arch/mips/asm_support_mips.h b/runtime/arch/mips/asm_support_mips.h
index 135b074..7437774 100644
--- a/runtime/arch/mips/asm_support_mips.h
+++ b/runtime/arch/mips/asm_support_mips.h
@@ -21,7 +21,7 @@
#define FRAME_SIZE_SAVE_ALL_CALLEE_SAVES 96
#define FRAME_SIZE_SAVE_REFS_ONLY 48
-#define FRAME_SIZE_SAVE_REFS_AND_ARGS 80
+#define FRAME_SIZE_SAVE_REFS_AND_ARGS 112
#define FRAME_SIZE_SAVE_EVERYTHING 256
#endif // ART_RUNTIME_ARCH_MIPS_ASM_SUPPORT_MIPS_H_
diff --git a/runtime/arch/mips/context_mips.cc b/runtime/arch/mips/context_mips.cc
index 375a03a..98ed5e6 100644
--- a/runtime/arch/mips/context_mips.cc
+++ b/runtime/arch/mips/context_mips.cc
@@ -75,11 +75,21 @@
gprs_[A1] = nullptr;
gprs_[A2] = nullptr;
gprs_[A3] = nullptr;
+ gprs_[T0] = nullptr;
+ gprs_[T1] = nullptr;
+ fprs_[F8] = nullptr;
+ fprs_[F9] = nullptr;
+ fprs_[F10] = nullptr;
+ fprs_[F11] = nullptr;
fprs_[F12] = nullptr;
fprs_[F13] = nullptr;
fprs_[F14] = nullptr;
fprs_[F15] = nullptr;
+ fprs_[F16] = nullptr;
+ fprs_[F17] = nullptr;
+ fprs_[F18] = nullptr;
+ fprs_[F19] = nullptr;
}
extern "C" NO_RETURN void art_quick_do_long_jump(uint32_t*, uint32_t*);
diff --git a/runtime/arch/mips/quick_entrypoints_mips.S b/runtime/arch/mips/quick_entrypoints_mips.S
index 34e34b4..3e8cdc9 100644
--- a/runtime/arch/mips/quick_entrypoints_mips.S
+++ b/runtime/arch/mips/quick_entrypoints_mips.S
@@ -167,50 +167,60 @@
/*
* Macro that sets up the callee save frame to conform with
* Runtime::CreateCalleeSaveMethod(kSaveRefsAndArgs).
- * callee-save: $a1-$a3, $s2-$s8 + $gp + $ra, 12 total + 3 words padding + method*
+ * callee-save: $a1-$a3, $t0-$t1, $s2-$s8, $gp, $ra, $f8-$f19
+ * (26 total + 1 word padding + method*)
*/
.macro SETUP_SAVE_REFS_AND_ARGS_FRAME_REGISTERS_ONLY
- addiu $sp, $sp, -80
- .cfi_adjust_cfa_offset 80
+ addiu $sp, $sp, -112
+ .cfi_adjust_cfa_offset 112
// Ugly compile-time check, but we only have the preprocessor.
-#if (FRAME_SIZE_SAVE_REFS_AND_ARGS != 80)
+#if (FRAME_SIZE_SAVE_REFS_AND_ARGS != 112)
#error "FRAME_SIZE_SAVE_REFS_AND_ARGS(MIPS) size not as expected."
#endif
- sw $ra, 76($sp)
- .cfi_rel_offset 31, 76
- sw $s8, 72($sp)
- .cfi_rel_offset 30, 72
- sw $gp, 68($sp)
- .cfi_rel_offset 28, 68
- sw $s7, 64($sp)
- .cfi_rel_offset 23, 64
- sw $s6, 60($sp)
- .cfi_rel_offset 22, 60
- sw $s5, 56($sp)
- .cfi_rel_offset 21, 56
- sw $s4, 52($sp)
- .cfi_rel_offset 20, 52
- sw $s3, 48($sp)
- .cfi_rel_offset 19, 48
- sw $s2, 44($sp)
- .cfi_rel_offset 18, 44
- sw $a3, 40($sp)
- .cfi_rel_offset 7, 40
- sw $a2, 36($sp)
- .cfi_rel_offset 6, 36
- sw $a1, 32($sp)
- .cfi_rel_offset 5, 32
- SDu $f14, $f15, 24, $sp, $t0
- SDu $f12, $f13, 16, $sp, $t0
+ sw $ra, 108($sp)
+ .cfi_rel_offset 31, 108
+ sw $s8, 104($sp)
+ .cfi_rel_offset 30, 104
+ sw $gp, 100($sp)
+ .cfi_rel_offset 28, 100
+ sw $s7, 96($sp)
+ .cfi_rel_offset 23, 96
+ sw $s6, 92($sp)
+ .cfi_rel_offset 22, 92
+ sw $s5, 88($sp)
+ .cfi_rel_offset 21, 88
+ sw $s4, 84($sp)
+ .cfi_rel_offset 20, 84
+ sw $s3, 80($sp)
+ .cfi_rel_offset 19, 80
+ sw $s2, 76($sp)
+ .cfi_rel_offset 18, 76
+ sw $t1, 72($sp)
+ .cfi_rel_offset 9, 72
+ sw $t0, 68($sp)
+ .cfi_rel_offset 8, 68
+ sw $a3, 64($sp)
+ .cfi_rel_offset 7, 64
+ sw $a2, 60($sp)
+ .cfi_rel_offset 6, 60
+ sw $a1, 56($sp)
+ .cfi_rel_offset 5, 56
+ SDu $f18, $f19, 48, $sp, $t8
+ SDu $f16, $f17, 40, $sp, $t8
+ SDu $f14, $f15, 32, $sp, $t8
+ SDu $f12, $f13, 24, $sp, $t8
+ SDu $f10, $f11, 16, $sp, $t8
+ SDu $f8, $f9, 8, $sp, $t8
# bottom will hold Method*
.endm
/*
* Macro that sets up the callee save frame to conform with
* Runtime::CreateCalleeSaveMethod(kSaveRefsAndArgs). Restoration assumes non-moving GC.
- * callee-save: $a1-$a3, $f12-$f15, $s2-$s8 + $gp + $ra, 12 total + 3 words padding + method*
+ * callee-save: $a1-$a3, $t0-$t1, $s2-$s8, $gp, $ra, $f8-$f19
+ * (26 total + 1 word padding + method*)
* Clobbers $t0 and $sp
* Allocates ARG_SLOT_SIZE bytes at the bottom of the stack for arg slots.
* Reserves FRAME_SIZE_SAVE_REFS_AND_ARGS + ARG_SLOT_SIZE bytes on the stack
@@ -229,7 +239,8 @@
/*
* Macro that sets up the callee save frame to conform with
* Runtime::CreateCalleeSaveMethod(kSaveRefsAndArgs). Restoration assumes non-moving GC.
- * callee-save: $a1-$a3, $f12-$f15, $s2-$s8 + $gp + $ra, 12 total + 3 words padding + method*
+ * callee-save: $a1-$a3, $t0-$t1, $s2-$s8, $gp, $ra, $f8-$f19
+ * (26 total + 1 word padding + method*)
* Clobbers $sp
* Use $a0 as the Method* and loads it into bottom of stack.
* Allocates ARG_SLOT_SIZE bytes at the bottom of the stack for arg slots.
@@ -246,34 +257,42 @@
.macro RESTORE_SAVE_REFS_AND_ARGS_FRAME
addiu $sp, $sp, ARG_SLOT_SIZE # remove argument slots on the stack
.cfi_adjust_cfa_offset -ARG_SLOT_SIZE
- lw $ra, 76($sp)
+ lw $ra, 108($sp)
.cfi_restore 31
- lw $s8, 72($sp)
+ lw $s8, 104($sp)
.cfi_restore 30
- lw $gp, 68($sp)
+ lw $gp, 100($sp)
.cfi_restore 28
- lw $s7, 64($sp)
+ lw $s7, 96($sp)
.cfi_restore 23
- lw $s6, 60($sp)
+ lw $s6, 92($sp)
.cfi_restore 22
- lw $s5, 56($sp)
+ lw $s5, 88($sp)
.cfi_restore 21
- lw $s4, 52($sp)
+ lw $s4, 84($sp)
.cfi_restore 20
- lw $s3, 48($sp)
+ lw $s3, 80($sp)
.cfi_restore 19
- lw $s2, 44($sp)
+ lw $s2, 76($sp)
.cfi_restore 18
- lw $a3, 40($sp)
+ lw $t1, 72($sp)
+ .cfi_restore 9
+ lw $t0, 68($sp)
+ .cfi_restore 8
+ lw $a3, 64($sp)
.cfi_restore 7
- lw $a2, 36($sp)
+ lw $a2, 60($sp)
.cfi_restore 6
- lw $a1, 32($sp)
+ lw $a1, 56($sp)
.cfi_restore 5
- LDu $f14, $f15, 24, $sp, $t1
- LDu $f12, $f13, 16, $sp, $t1
- addiu $sp, $sp, 80 # pop frame
- .cfi_adjust_cfa_offset -80
+ LDu $f18, $f19, 48, $sp, $t8
+ LDu $f16, $f17, 40, $sp, $t8
+ LDu $f14, $f15, 32, $sp, $t8
+ LDu $f12, $f13, 24, $sp, $t8
+ LDu $f10, $f11, 16, $sp, $t8
+ LDu $f8, $f9, 8, $sp, $t8
+ addiu $sp, $sp, 112 # pop frame
+ .cfi_adjust_cfa_offset -112
.endm
/*
@@ -824,30 +843,56 @@
INVOKE_TRAMPOLINE art_quick_invoke_super_trampoline_with_access_check, artInvokeSuperTrampolineWithAccessCheck
INVOKE_TRAMPOLINE art_quick_invoke_virtual_trampoline_with_access_check, artInvokeVirtualTrampolineWithAccessCheck
-.macro LOAD_WORD_TO_REG reg, next_arg, index, label
+// Each of the following macros expands into four instructions or 16 bytes.
+// They are used to build indexable "tables" of code.
+
+.macro LOAD_WORD_TO_REG reg, next_arg, index_reg, label
lw $\reg, -4($\next_arg) # next_arg points to argument after the current one (offset is 4)
b \label
- addiu $\index, 1
+ addiu $\index_reg, 16
+ .balign 16
.endm
-.macro LOAD_LONG_TO_REG reg1, reg2, next_arg, index, label
+.macro LOAD_LONG_TO_REG reg1, reg2, next_arg, index_reg, next_index, label
lw $\reg1, -8($\next_arg) # next_arg points to argument after the current one (offset is 8)
lw $\reg2, -4($\next_arg)
b \label
- li $\index, 4 # long can be loaded only to a2_a3 pair so index will be always 4
+ li $\index_reg, \next_index
+ .balign 16
.endm
-.macro LOAD_FLOAT_TO_REG reg, next_arg, index, label
+.macro LOAD_FLOAT_TO_REG reg, next_arg, index_reg, label
lwc1 $\reg, -4($\next_arg) # next_arg points to argument after the current one (offset is 4)
b \label
- addiu $\index, 1
+ addiu $\index_reg, 16
+ .balign 16
.endm
-.macro LOAD_DOUBLE_TO_REG reg1, reg2, next_arg, index, tmp, label
+#if defined(__mips_isa_rev) && __mips_isa_rev > 2
+// LDu expands into 3 instructions for 64-bit FPU, so index_reg cannot be updated here.
+.macro LOAD_DOUBLE_TO_REG reg1, reg2, next_arg, index_reg, tmp, label
+ .set reorder # force use of the branch delay slot
LDu $\reg1, $\reg2, -8, $\next_arg, $\tmp # next_arg points to argument after the current one
# (offset is 8)
b \label
- addiu $\index, 1
+ .set noreorder
+ .balign 16
+.endm
+#else
+// LDu expands into 2 instructions for 32-bit FPU, so index_reg is updated here.
+.macro LOAD_DOUBLE_TO_REG reg1, reg2, next_arg, index_reg, tmp, label
+ LDu $\reg1, $\reg2, -8, $\next_arg, $\tmp # next_arg points to argument after the current one
+ # (offset is 8)
+ b \label
+ addiu $\index_reg, 16
+ .balign 16
+.endm
+#endif
+
+.macro LOAD_END index_reg, next_index, label
+ b \label
+ li $\index_reg, \next_index
+ .balign 16
.endm
#define SPILL_SIZE 32
@@ -891,61 +936,63 @@
lw $gp, 16($fp) # restore $gp
lw $a0, SPILL_SIZE($fp) # restore ArtMethod*
lw $a1, 4($sp) # a1 = this*
- addiu $t0, $sp, 8 # t0 = pointer to the current argument (skip ArtMethod* and this*)
- li $t3, 2 # t3 = gpr_index = 2 (skip A0 and A1)
- move $t4, $zero # t4 = fp_index = 0
- lw $t1, 20 + SPILL_SIZE($fp) # get shorty (20 is offset from the $sp on entry + SPILL_SIZE
+ addiu $t8, $sp, 8 # t8 = pointer to the current argument (skip ArtMethod* and this*)
+ li $t6, 0 # t6 = gpr_index = 0 (corresponds to A2; A0 and A1 are skipped)
+ li $t7, 0 # t7 = fp_index = 0
+ lw $t9, 20 + SPILL_SIZE($fp) # get shorty (20 is offset from the $sp on entry + SPILL_SIZE
# as the $fp is SPILL_SIZE bytes below the $sp on entry)
- addiu $t1, 1 # t1 = shorty + 1 (skip 1 for return type)
+ addiu $t9, 1 # t9 = shorty + 1 (skip 1 for return type)
+
+ // Load the base addresses of tabInt ... tabDouble.
+ // We will use the register indices (gpr_index, fp_index) to branch.
+ // Note that the indices are scaled by 16, so they can be added to the bases directly.
+#if defined(__mips_isa_rev) && __mips_isa_rev >= 6
+ lapc $t2, tabInt
+ lapc $t3, tabLong
+ lapc $t4, tabSingle
+ lapc $t5, tabDouble
+#else
+ bltzal $zero, tabBase # nal
+ addiu $t2, $ra, %lo(tabInt - tabBase)
+tabBase:
+ addiu $t3, $ra, %lo(tabLong - tabBase)
+ addiu $t4, $ra, %lo(tabSingle - tabBase)
+ addiu $t5, $ra, %lo(tabDouble - tabBase)
+#endif
+
loop:
- lbu $t2, 0($t1) # t2 = shorty[i]
- beqz $t2, loopEnd # finish getting args when shorty[i] == '\0'
- addiu $t1, 1
+ lbu $ra, 0($t9) # ra = shorty[i]
+ beqz $ra, loopEnd # finish getting args when shorty[i] == '\0'
+ addiu $t9, 1
- li $t9, 'J' # put char 'J' into t9
- beq $t9, $t2, isLong # branch if result type char == 'J'
- li $t9, 'D' # put char 'D' into t9
- beq $t9, $t2, isDouble # branch if result type char == 'D'
- li $t9, 'F' # put char 'F' into t9
- beq $t9, $t2, isSingle # branch if result type char == 'F'
- addiu $t0, 4 # next_arg = curr_arg + 4 (in branch delay slot,
- # for both, int and single)
+ addiu $ra, -'J'
+ beqz $ra, isLong # branch if result type char == 'J'
+ addiu $ra, 'J' - 'D'
+ beqz $ra, isDouble # branch if result type char == 'D'
+ addiu $ra, 'D' - 'F'
+ beqz $ra, isSingle # branch if result type char == 'F'
- li $t5, 2 # skip a0 and a1 (ArtMethod* and this*)
- bne $t5, $t3, 1f # if (gpr_index == 2)
- addiu $t5, 1
- LOAD_WORD_TO_REG a2, t0, t3, loop # a2 = current argument, gpr_index++
-1: bne $t5, $t3, loop # else if (gpr_index == 3)
- nop
- LOAD_WORD_TO_REG a3, t0, t3, loop # a3 = current argument, gpr_index++
+ addu $ra, $t2, $t6
+ jalr $zero, $ra
+ addiu $t8, 4 # next_arg = curr_arg + 4
isLong:
- addiu $t0, 8 # next_arg = curr_arg + 8
- slti $t5, $t3, 3
- beqz $t5, 2f # if (gpr_index < 3)
- nop
- LOAD_LONG_TO_REG a2, a3, t0, t3, loop # a2_a3 = curr_arg, gpr_index = 4
-2: b loop # else
- li $t3, 4 # gpr_index = 4
-
-isDouble:
- addiu $t0, 8 # next_arg = curr_arg + 8
- li $t5, 0
- bne $t5, $t4, 3f # if (fp_index == 0)
- addiu $t5, 1
- LOAD_DOUBLE_TO_REG f12, f13, t0, t4, t9, loop # f12_f13 = curr_arg, fp_index++
-3: bne $t5, $t4, loop # else if (fp_index == 1)
- nop
- LOAD_DOUBLE_TO_REG f14, f15, t0, t4, t9, loop # f14_f15 = curr_arg, fp_index++
+ addu $ra, $t3, $t6
+ jalr $zero, $ra
+ addiu $t8, 8 # next_arg = curr_arg + 8
isSingle:
- li $t5, 0
- bne $t5, $t4, 4f # if (fp_index == 0)
- addiu $t5, 1
- LOAD_FLOAT_TO_REG f12, t0, t4, loop # f12 = curr_arg, fp_index++
-4: bne $t5, $t4, loop # else if (fp_index == 1)
- nop
- LOAD_FLOAT_TO_REG f14, t0, t4, loop # f14 = curr_arg, fp_index++
+ addu $ra, $t4, $t7
+ jalr $zero, $ra
+ addiu $t8, 4 # next_arg = curr_arg + 4
+
+isDouble:
+ addu $ra, $t5, $t7
+#if defined(__mips_isa_rev) && __mips_isa_rev > 2
+ addiu $t7, 16 # fp_index += 16 didn't fit into LOAD_DOUBLE_TO_REG
+#endif
+ jalr $zero, $ra
+ addiu $t8, 8 # next_arg = curr_arg + 8
loopEnd:
lw $t9, ART_METHOD_QUICK_CODE_OFFSET_32($a0) # get pointer to the code
@@ -976,6 +1023,38 @@
SDu $f0, $f1, 0, $t0, $t1 # store floating point result
jalr $zero, $ra
nop
+
+ // Note that gpr_index is kept within the range of tabInt and tabLong
+ // and fp_index is kept within the range of tabSingle and tabDouble.
+ .balign 16
+tabInt:
+ LOAD_WORD_TO_REG a2, t8, t6, loop # a2 = current argument, gpr_index += 16
+ LOAD_WORD_TO_REG a3, t8, t6, loop # a3 = current argument, gpr_index += 16
+ LOAD_WORD_TO_REG t0, t8, t6, loop # t0 = current argument, gpr_index += 16
+ LOAD_WORD_TO_REG t1, t8, t6, loop # t1 = current argument, gpr_index += 16
+ LOAD_END t6, 4*16, loop # no more GPR args, gpr_index = 4*16
+tabLong:
+ LOAD_LONG_TO_REG a2, a3, t8, t6, 2*16, loop # a2_a3 = curr_arg, gpr_index = 2*16
+ LOAD_LONG_TO_REG t0, t1, t8, t6, 4*16, loop # t0_t1 = curr_arg, gpr_index = 4*16
+ LOAD_LONG_TO_REG t0, t1, t8, t6, 4*16, loop # t0_t1 = curr_arg, gpr_index = 4*16
+ LOAD_END t6, 4*16, loop # no more GPR args, gpr_index = 4*16
+ LOAD_END t6, 4*16, loop # no more GPR args, gpr_index = 4*16
+tabSingle:
+ LOAD_FLOAT_TO_REG f8, t8, t7, loop # f8 = curr_arg, fp_index += 16
+ LOAD_FLOAT_TO_REG f10, t8, t7, loop # f10 = curr_arg, fp_index += 16
+ LOAD_FLOAT_TO_REG f12, t8, t7, loop # f12 = curr_arg, fp_index += 16
+ LOAD_FLOAT_TO_REG f14, t8, t7, loop # f14 = curr_arg, fp_index += 16
+ LOAD_FLOAT_TO_REG f16, t8, t7, loop # f16 = curr_arg, fp_index += 16
+ LOAD_FLOAT_TO_REG f18, t8, t7, loop # f18 = curr_arg, fp_index += 16
+ LOAD_END t7, 6*16, loop # no more FPR args, fp_index = 6*16
+tabDouble:
+ LOAD_DOUBLE_TO_REG f8, f9, t8, t7, ra, loop # f8_f9 = curr_arg; if FPU32, fp_index += 16
+ LOAD_DOUBLE_TO_REG f10, f11, t8, t7, ra, loop # f10_f11 = curr_arg; if FPU32, fp_index += 16
+ LOAD_DOUBLE_TO_REG f12, f13, t8, t7, ra, loop # f12_f13 = curr_arg; if FPU32, fp_index += 16
+ LOAD_DOUBLE_TO_REG f14, f15, t8, t7, ra, loop # f14_f15 = curr_arg; if FPU32, fp_index += 16
+ LOAD_DOUBLE_TO_REG f16, f17, t8, t7, ra, loop # f16_f17 = curr_arg; if FPU32, fp_index += 16
+ LOAD_DOUBLE_TO_REG f18, f19, t8, t7, ra, loop # f18_f19 = curr_arg; if FPU32, fp_index += 16
+ LOAD_END t7, 6*16, loop # no more FPR args, fp_index = 6*16
END art_quick_invoke_stub
/*
@@ -1016,64 +1095,63 @@
addiu $sp, $sp, 16 # restore stack after memcpy
lw $gp, 16($fp) # restore $gp
lw $a0, SPILL_SIZE($fp) # restore ArtMethod*
- addiu $t0, $sp, 4 # t0 = pointer to the current argument (skip ArtMethod*)
- li $t3, 1 # t3 = gpr_index = 1 (skip A0)
- move $t4, $zero # t4 = fp_index = 0
- lw $t1, 20 + SPILL_SIZE($fp) # get shorty (20 is offset from the $sp on entry + SPILL_SIZE
+ addiu $t8, $sp, 4 # t8 = pointer to the current argument (skip ArtMethod*)
+ li $t6, 0 # t6 = gpr_index = 0 (corresponds to A1; A0 is skipped)
+ li $t7, 0 # t7 = fp_index = 0
+ lw $t9, 20 + SPILL_SIZE($fp) # get shorty (20 is offset from the $sp on entry + SPILL_SIZE
# as the $fp is SPILL_SIZE bytes below the $sp on entry)
- addiu $t1, 1 # t1 = shorty + 1 (skip 1 for return type)
+ addiu $t9, 1 # t9 = shorty + 1 (skip 1 for return type)
+
+ // Load the base addresses of tabIntS ... tabDoubleS.
+ // We will use the register indices (gpr_index, fp_index) to branch.
+ // Note that the indices are scaled by 16, so they can be added to the bases directly.
+#if defined(__mips_isa_rev) && __mips_isa_rev >= 6
+ lapc $t2, tabIntS
+ lapc $t3, tabLongS
+ lapc $t4, tabSingleS
+ lapc $t5, tabDoubleS
+#else
+ bltzal $zero, tabBaseS # nal
+ addiu $t2, $ra, %lo(tabIntS - tabBaseS)
+tabBaseS:
+ addiu $t3, $ra, %lo(tabLongS - tabBaseS)
+ addiu $t4, $ra, %lo(tabSingleS - tabBaseS)
+ addiu $t5, $ra, %lo(tabDoubleS - tabBaseS)
+#endif
+
loopS:
- lbu $t2, 0($t1) # t2 = shorty[i]
- beqz $t2, loopEndS # finish getting args when shorty[i] == '\0'
- addiu $t1, 1
+ lbu $ra, 0($t9) # ra = shorty[i]
+ beqz $ra, loopEndS # finish getting args when shorty[i] == '\0'
+ addiu $t9, 1
- li $t9, 'J' # put char 'J' into t9
- beq $t9, $t2, isLongS # branch if result type char == 'J'
- li $t9, 'D' # put char 'D' into t9
- beq $t9, $t2, isDoubleS # branch if result type char == 'D'
- li $t9, 'F' # put char 'F' into t9
- beq $t9, $t2, isSingleS # branch if result type char == 'F'
- addiu $t0, 4 # next_arg = curr_arg + 4 (in branch delay slot,
- # for both, int and single)
+ addiu $ra, -'J'
+ beqz $ra, isLongS # branch if result type char == 'J'
+ addiu $ra, 'J' - 'D'
+ beqz $ra, isDoubleS # branch if result type char == 'D'
+ addiu $ra, 'D' - 'F'
+ beqz $ra, isSingleS # branch if result type char == 'F'
- li $t5, 1 # skip a0 (ArtMethod*)
- bne $t5, $t3, 1f # if (gpr_index == 1)
- addiu $t5, 1
- LOAD_WORD_TO_REG a1, t0, t3, loopS # a1 = current argument, gpr_index++
-1: bne $t5, $t3, 2f # else if (gpr_index == 2)
- addiu $t5, 1
- LOAD_WORD_TO_REG a2, t0, t3, loopS # a2 = current argument, gpr_index++
-2: bne $t5, $t3, loopS # else if (gpr_index == 3)
- nop
- LOAD_WORD_TO_REG a3, t0, t3, loopS # a3 = current argument, gpr_index++
+ addu $ra, $t2, $t6
+ jalr $zero, $ra
+ addiu $t8, 4 # next_arg = curr_arg + 4
isLongS:
- addiu $t0, 8 # next_arg = curr_arg + 8
- slti $t5, $t3, 3
- beqz $t5, 3f # if (gpr_index < 3)
- nop
- LOAD_LONG_TO_REG a2, a3, t0, t3, loopS # a2_a3 = curr_arg, gpr_index = 4
-3: b loopS # else
- li $t3, 4 # gpr_index = 4
-
-isDoubleS:
- addiu $t0, 8 # next_arg = curr_arg + 8
- li $t5, 0
- bne $t5, $t4, 4f # if (fp_index == 0)
- addiu $t5, 1
- LOAD_DOUBLE_TO_REG f12, f13, t0, t4, t9, loopS # f12_f13 = curr_arg, fp_index++
-4: bne $t5, $t4, loopS # else if (fp_index == 1)
- nop
- LOAD_DOUBLE_TO_REG f14, f15, t0, t4, t9, loopS # f14_f15 = curr_arg, fp_index++
+ addu $ra, $t3, $t6
+ jalr $zero, $ra
+ addiu $t8, 8 # next_arg = curr_arg + 8
isSingleS:
- li $t5, 0
- bne $t5, $t4, 5f # if (fp_index == 0)
- addiu $t5, 1
- LOAD_FLOAT_TO_REG f12, t0, t4, loopS # f12 = curr_arg, fp_index++
-5: bne $t5, $t4, loopS # else if (fp_index == 1)
- nop
- LOAD_FLOAT_TO_REG f14, t0, t4, loopS # f14 = curr_arg, fp_index++
+ addu $ra, $t4, $t7
+ jalr $zero, $ra
+ addiu $t8, 4 # next_arg = curr_arg + 4
+
+isDoubleS:
+ addu $ra, $t5, $t7
+#if defined(__mips_isa_rev) && __mips_isa_rev > 2
+ addiu $t7, 16 # fp_index += 16 didn't fit into LOAD_DOUBLE_TO_REG
+#endif
+ jalr $zero, $ra
+ addiu $t8, 8 # next_arg = curr_arg + 8
loopEndS:
lw $t9, ART_METHOD_QUICK_CODE_OFFSET_32($a0) # get pointer to the code
@@ -1104,6 +1182,40 @@
SDu $f0, $f1, 0, $t0, $t1 # store floating point result
jalr $zero, $ra
nop
+
+ // Note that gpr_index is kept within the range of tabIntS and tabLongS
+ // and fp_index is kept within the range of tabSingleS and tabDoubleS.
+ .balign 16
+tabIntS:
+ LOAD_WORD_TO_REG a1, t8, t6, loopS # a1 = current argument, gpr_index += 16
+ LOAD_WORD_TO_REG a2, t8, t6, loopS # a2 = current argument, gpr_index += 16
+ LOAD_WORD_TO_REG a3, t8, t6, loopS # a3 = current argument, gpr_index += 16
+ LOAD_WORD_TO_REG t0, t8, t6, loopS # t0 = current argument, gpr_index += 16
+ LOAD_WORD_TO_REG t1, t8, t6, loopS # t1 = current argument, gpr_index += 16
+ LOAD_END t6, 5*16, loopS # no more GPR args, gpr_index = 5*16
+tabLongS:
+ LOAD_LONG_TO_REG a2, a3, t8, t6, 3*16, loopS # a2_a3 = curr_arg, gpr_index = 3*16
+ LOAD_LONG_TO_REG a2, a3, t8, t6, 3*16, loopS # a2_a3 = curr_arg, gpr_index = 3*16
+ LOAD_LONG_TO_REG t0, t1, t8, t6, 5*16, loopS # t0_t1 = curr_arg, gpr_index = 5*16
+ LOAD_LONG_TO_REG t0, t1, t8, t6, 5*16, loopS # t0_t1 = curr_arg, gpr_index = 5*16
+ LOAD_END t6, 5*16, loopS # no more GPR args, gpr_index = 5*16
+ LOAD_END t6, 5*16, loopS # no more GPR args, gpr_index = 5*16
+tabSingleS:
+ LOAD_FLOAT_TO_REG f8, t8, t7, loopS # f8 = curr_arg, fp_index += 16
+ LOAD_FLOAT_TO_REG f10, t8, t7, loopS # f10 = curr_arg, fp_index += 16
+ LOAD_FLOAT_TO_REG f12, t8, t7, loopS # f12 = curr_arg, fp_index += 16
+ LOAD_FLOAT_TO_REG f14, t8, t7, loopS # f14 = curr_arg, fp_index += 16
+ LOAD_FLOAT_TO_REG f16, t8, t7, loopS # f16 = curr_arg, fp_index += 16
+ LOAD_FLOAT_TO_REG f18, t8, t7, loopS # f18 = curr_arg, fp_index += 16
+ LOAD_END t7, 6*16, loopS # no more FPR args, fp_index = 6*16
+tabDoubleS:
+ LOAD_DOUBLE_TO_REG f8, f9, t8, t7, ra, loopS # f8_f9 = curr_arg; if FPU32, fp_index += 16
+ LOAD_DOUBLE_TO_REG f10, f11, t8, t7, ra, loopS # f10_f11 = curr_arg; if FPU32, fp_index += 16
+ LOAD_DOUBLE_TO_REG f12, f13, t8, t7, ra, loopS # f12_f13 = curr_arg; if FPU32, fp_index += 16
+ LOAD_DOUBLE_TO_REG f14, f15, t8, t7, ra, loopS # f14_f15 = curr_arg; if FPU32, fp_index += 16
+ LOAD_DOUBLE_TO_REG f16, f17, t8, t7, ra, loopS # f16_f17 = curr_arg; if FPU32, fp_index += 16
+ LOAD_DOUBLE_TO_REG f18, f19, t8, t7, ra, loopS # f18_f19 = curr_arg; if FPU32, fp_index += 16
+ LOAD_END t7, 6*16, loopS # no more FPR args, fp_index = 6*16
END art_quick_invoke_static_stub
#undef SPILL_SIZE
@@ -1886,9 +1998,9 @@
la $t9, artQuickProxyInvokeHandler
jalr $t9 # (Method* proxy method, receiver, Thread*, SP)
addiu $a3, $sp, ARG_SLOT_SIZE # pass $sp (remove arg slots)
- lw $t0, THREAD_EXCEPTION_OFFSET(rSELF) # load Thread::Current()->exception_
+ lw $t7, THREAD_EXCEPTION_OFFSET(rSELF) # load Thread::Current()->exception_
RESTORE_SAVE_REFS_AND_ARGS_FRAME
- bnez $t0, 1f
+ bnez $t7, 1f
# don't care if $v0 and/or $v1 are modified, when exception branch taken
MTD $v0, $v1, $f0, $f1 # move float value to return value
jalr $zero, $ra
@@ -1900,25 +2012,25 @@
/*
* Called to resolve an imt conflict.
* a0 is the conflict ArtMethod.
- * t0 is a hidden argument that holds the target interface method's dex method index.
+ * t7 is a hidden argument that holds the target interface method's dex method index.
*
- * Note that this stub writes to a0, t0 and t1.
+ * Note that this stub writes to a0, t7 and t8.
*/
ENTRY art_quick_imt_conflict_trampoline
- lw $t1, 0($sp) # Load referrer.
- lw $t1, ART_METHOD_DEX_CACHE_METHODS_OFFSET_32($t1) # Load dex cache methods array.
- sll $t0, $t0, POINTER_SIZE_SHIFT # Calculate offset.
- addu $t0, $t1, $t0 # Add offset to base.
- lw $t0, 0($t0) # Load interface method.
+ lw $t8, 0($sp) # Load referrer.
+ lw $t8, ART_METHOD_DEX_CACHE_METHODS_OFFSET_32($t8) # Load dex cache methods array.
+ sll $t7, $t7, POINTER_SIZE_SHIFT # Calculate offset.
+ addu $t7, $t8, $t7 # Add offset to base.
+ lw $t7, 0($t7) # Load interface method.
lw $a0, ART_METHOD_JNI_OFFSET_32($a0) # Load ImtConflictTable.
.Limt_table_iterate:
- lw $t1, 0($a0) # Load next entry in ImtConflictTable.
+ lw $t8, 0($a0) # Load next entry in ImtConflictTable.
# Branch if found.
- beq $t1, $t0, .Limt_table_found
+ beq $t8, $t7, .Limt_table_found
nop
# If the entry is null, the interface method is not in the ImtConflictTable.
- beqz $t1, .Lconflict_trampoline
+ beqz $t8, .Lconflict_trampoline
nop
# Iterate over the entries of the ImtConflictTable.
b .Limt_table_iterate
@@ -1928,7 +2040,7 @@
# We successfully hit an entry in the table. Load the target method and jump to it.
lw $a0, __SIZEOF_POINTER__($a0)
lw $t9, ART_METHOD_QUICK_CODE_OFFSET_32($a0)
- jr $t9
+ jalr $zero, $t9
nop
.Lconflict_trampoline:
@@ -1972,7 +2084,7 @@
# The result of the call is:
# v0: ptr to native code, 0 on error.
# v1: ptr to the bottom of the used area of the alloca, can restore stack till here.
- beq $v0, $zero, 1f # check entry error
+ beq $v0, $zero, 2f # check entry error
move $t9, $v0 # save the code ptr
move $sp, $v1 # release part of the alloca
@@ -1980,10 +2092,22 @@
lw $a0, 0($sp)
lw $a1, 4($sp)
lw $a2, 8($sp)
-
- # Load FPRs the same as GPRs. Look at BuildNativeCallFrameStateMachine.
- jalr $t9 # native call
lw $a3, 12($sp)
+
+ # artQuickGenericJniTrampoline sets bit 0 of the native code address to 1
+ # when the first two arguments are both single precision floats. This lets
+ # us extract them properly from the stack and load into floating point
+ # registers.
+ MTD $a0, $a1, $f12, $f13
+ andi $t0, $t9, 1
+ xor $t9, $t9, $t0
+ bnez $t0, 1f
+ mtc1 $a1, $f14
+ MTD $a2, $a3, $f14, $f15
+
+1:
+ jalr $t9 # native call
+ nop
addiu $sp, $sp, 16 # remove arg slots
move $gp, $s3 # restore $gp from $s3
@@ -1999,18 +2123,18 @@
s.d $f0, 16($sp) # pass result_f
lw $t0, THREAD_EXCEPTION_OFFSET(rSELF) # load Thread::Current()->exception_
- bne $t0, $zero, 1f # check for pending exceptions
+ bne $t0, $zero, 2f # check for pending exceptions
move $sp, $s8 # tear down the alloca
- # tear dpown the callee-save frame
+ # tear down the callee-save frame
RESTORE_SAVE_REFS_AND_ARGS_FRAME
MTD $v0, $v1, $f0, $f1 # move float value to return value
jalr $zero, $ra
nop
-1:
+2:
lw $sp, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF)
# This will create a new save-all frame, required by the runtime.
DELIVER_PENDING_EXCEPTION
@@ -2023,9 +2147,9 @@
la $t9, artQuickToInterpreterBridge
jalr $t9 # (Method* method, Thread*, SP)
addiu $a2, $sp, ARG_SLOT_SIZE # pass $sp (remove arg slots)
- lw $t0, THREAD_EXCEPTION_OFFSET(rSELF) # load Thread::Current()->exception_
+ lw $t7, THREAD_EXCEPTION_OFFSET(rSELF) # load Thread::Current()->exception_
RESTORE_SAVE_REFS_AND_ARGS_FRAME
- bnez $t0, 1f
+ bnez $t7, 1f
# don't care if $v0 and/or $v1 are modified, when exception branch taken
MTD $v0, $v1, $f0, $f1 # move float value to return value
jalr $zero, $ra
diff --git a/runtime/arch/mips/quick_method_frame_info_mips.h b/runtime/arch/mips/quick_method_frame_info_mips.h
index 90e7b20..6f16352 100644
--- a/runtime/arch/mips/quick_method_frame_info_mips.h
+++ b/runtime/arch/mips/quick_method_frame_info_mips.h
@@ -26,12 +26,13 @@
namespace mips {
static constexpr uint32_t kMipsCalleeSaveAlwaysSpills =
- (1 << art::mips::RA);
+ (1u << art::mips::RA);
static constexpr uint32_t kMipsCalleeSaveRefSpills =
(1 << art::mips::S2) | (1 << art::mips::S3) | (1 << art::mips::S4) | (1 << art::mips::S5) |
(1 << art::mips::S6) | (1 << art::mips::S7) | (1 << art::mips::GP) | (1 << art::mips::FP);
static constexpr uint32_t kMipsCalleeSaveArgSpills =
- (1 << art::mips::A1) | (1 << art::mips::A2) | (1 << art::mips::A3);
+ (1 << art::mips::A1) | (1 << art::mips::A2) | (1 << art::mips::A3) | (1 << art::mips::T0) |
+ (1 << art::mips::T1);
static constexpr uint32_t kMipsCalleeSaveAllSpills =
(1 << art::mips::S0) | (1 << art::mips::S1);
static constexpr uint32_t kMipsCalleeSaveEverythingSpills =
@@ -44,11 +45,13 @@
static constexpr uint32_t kMipsCalleeSaveFpAlwaysSpills = 0;
static constexpr uint32_t kMipsCalleeSaveFpRefSpills = 0;
static constexpr uint32_t kMipsCalleeSaveFpArgSpills =
- (1 << art::mips::F12) | (1 << art::mips::F13) | (1 << art::mips::F14) | (1 << art::mips::F15);
+ (1 << art::mips::F8) | (1 << art::mips::F9) | (1 << art::mips::F10) | (1 << art::mips::F11) |
+ (1 << art::mips::F12) | (1 << art::mips::F13) | (1 << art::mips::F14) | (1 << art::mips::F15) |
+ (1 << art::mips::F16) | (1 << art::mips::F17) | (1 << art::mips::F18) | (1 << art::mips::F19);
static constexpr uint32_t kMipsCalleeSaveAllFPSpills =
(1 << art::mips::F20) | (1 << art::mips::F21) | (1 << art::mips::F22) | (1 << art::mips::F23) |
(1 << art::mips::F24) | (1 << art::mips::F25) | (1 << art::mips::F26) | (1 << art::mips::F27) |
- (1 << art::mips::F28) | (1 << art::mips::F29) | (1 << art::mips::F30) | (1 << art::mips::F31);
+ (1 << art::mips::F28) | (1 << art::mips::F29) | (1 << art::mips::F30) | (1u << art::mips::F31);
static constexpr uint32_t kMipsCalleeSaveFpEverythingSpills =
(1 << art::mips::F0) | (1 << art::mips::F1) | (1 << art::mips::F2) | (1 << art::mips::F3) |
(1 << art::mips::F4) | (1 << art::mips::F5) | (1 << art::mips::F6) | (1 << art::mips::F7) |
@@ -57,7 +60,7 @@
(1 << art::mips::F16) | (1 << art::mips::F17) | (1 << art::mips::F18) | (1 << art::mips::F19) |
(1 << art::mips::F20) | (1 << art::mips::F21) | (1 << art::mips::F22) | (1 << art::mips::F23) |
(1 << art::mips::F24) | (1 << art::mips::F25) | (1 << art::mips::F26) | (1 << art::mips::F27) |
- (1 << art::mips::F28) | (1 << art::mips::F29) | (1 << art::mips::F30) | (1 << art::mips::F31);
+ (1 << art::mips::F28) | (1 << art::mips::F29) | (1 << art::mips::F30) | (1u << art::mips::F31);
constexpr uint32_t MipsCalleeSaveCoreSpills(Runtime::CalleeSaveType type) {
return kMipsCalleeSaveAlwaysSpills | kMipsCalleeSaveRefSpills |
diff --git a/runtime/arch/mips/registers_mips.h b/runtime/arch/mips/registers_mips.h
index ae01bd5..555f3f0 100644
--- a/runtime/arch/mips/registers_mips.h
+++ b/runtime/arch/mips/registers_mips.h
@@ -35,9 +35,9 @@
A1 = 5,
A2 = 6,
A3 = 7,
- T0 = 8, // Temporaries.
+ T0 = 8, // Two extra arguments / temporaries.
T1 = 9,
- T2 = 10,
+ T2 = 10, // Temporaries.
T3 = 11,
T4 = 12,
T5 = 13,
@@ -100,7 +100,7 @@
F29 = 29,
F30 = 30,
F31 = 31,
- FTMP = F8, // scratch register
+ FTMP = F6, // scratch register
kNumberOfFRegisters = 32,
kNoFRegister = -1,
};
diff --git a/runtime/arch/stub_test.cc b/runtime/arch/stub_test.cc
index 6665897..9e385f8 100644
--- a/runtime/arch/stub_test.cc
+++ b/runtime/arch/stub_test.cc
@@ -355,7 +355,7 @@
"lw $a2, 8($sp)\n\t"
"lw $t9, 12($sp)\n\t"
"lw $s1, 16($sp)\n\t"
- "lw $t0, 20($sp)\n\t"
+ "lw $t7, 20($sp)\n\t"
"addiu $sp, $sp, 24\n\t"
"jalr $t9\n\t" // Call the stub.
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index fe82878..bf1d4ea 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -134,13 +134,23 @@
// | Method* | ---
// | RA |
// | ... | callee saves
+ // | T1 | arg5
+ // | T0 | arg4
// | A3 | arg3
// | A2 | arg2
// | A1 | arg1
+ // | F19 |
+ // | F18 | f_arg5
+ // | F17 |
+ // | F16 | f_arg4
// | F15 |
- // | F14 | f_arg1
+ // | F14 | f_arg3
// | F13 |
- // | F12 | f_arg0
+ // | F12 | f_arg2
+ // | F11 |
+ // | F10 | f_arg1
+ // | F9 |
+ // | F8 | f_arg0
// | | padding
// | A0/Method* | <- sp
static constexpr bool kSplitPairAcrossRegisterAndStack = false;
@@ -148,14 +158,14 @@
static constexpr bool kQuickSoftFloatAbi = false;
static constexpr bool kQuickDoubleRegAlignedFloatBackFilled = false;
static constexpr bool kQuickSkipOddFpRegisters = true;
- static constexpr size_t kNumQuickGprArgs = 3; // 3 arguments passed in GPRs.
- static constexpr size_t kNumQuickFprArgs = 4; // 2 arguments passed in FPRs. Floats can be passed
- // only in even numbered registers and each double
- // occupies two registers.
+ static constexpr size_t kNumQuickGprArgs = 5; // 5 arguments passed in GPRs.
+ static constexpr size_t kNumQuickFprArgs = 12; // 6 arguments passed in FPRs. Floats can be
+ // passed only in even numbered registers and each
+ // double occupies two registers.
static constexpr bool kGprFprLockstep = false;
- static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Fpr1Offset = 16; // Offset of first FPR arg.
- static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Gpr1Offset = 32; // Offset of first GPR arg.
- static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_LrOffset = 76; // Offset of return address.
+ static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Fpr1Offset = 8; // Offset of first FPR arg.
+ static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Gpr1Offset = 56; // Offset of first GPR arg.
+ static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_LrOffset = 108; // Offset of return address.
static size_t GprIndexToGprOffset(uint32_t gpr_index) {
return gpr_index * GetBytesPerGprSpillLocation(kRuntimeISA);
}
@@ -187,7 +197,7 @@
// | F12 | f_arg0
// | | padding
// | A0/Method* | <- sp
- // NOTE: for Mip64, when A0 is skipped, F0 is also skipped.
+ // NOTE: for Mip64, when A0 is skipped, F12 is also skipped.
static constexpr bool kSplitPairAcrossRegisterAndStack = false;
static constexpr bool kAlignPairRegister = false;
static constexpr bool kQuickSoftFloatAbi = false;
@@ -197,7 +207,7 @@
static constexpr size_t kNumQuickFprArgs = 7; // 7 arguments passed in FPRs.
static constexpr bool kGprFprLockstep = true;
- static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Fpr1Offset = 24; // Offset of first FPR arg (F1).
+ static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Fpr1Offset = 24; // Offset of first FPR arg (F13).
static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Gpr1Offset = 80; // Offset of first GPR arg (A1).
static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_LrOffset = 200; // Offset of return address.
static size_t GprIndexToGprOffset(uint32_t gpr_index) {
@@ -501,10 +511,16 @@
case Primitive::kPrimDouble:
case Primitive::kPrimLong:
if (kQuickSoftFloatAbi || (cur_type_ == Primitive::kPrimLong)) {
- if (cur_type_ == Primitive::kPrimLong && kAlignPairRegister && gpr_index_ == 0) {
- // Currently, this is only for ARM and MIPS, where the first available parameter
- // register is R1 (on ARM) or A1 (on MIPS). So we skip it, and use R2 (on ARM) or
- // A2 (on MIPS) instead.
+ if (cur_type_ == Primitive::kPrimLong &&
+#if defined(__mips__) && !defined(__LP64__)
+ (gpr_index_ == 0 || gpr_index_ == 2) &&
+#else
+ gpr_index_ == 0 &&
+#endif
+ kAlignPairRegister) {
+ // Currently, this is only for ARM and MIPS, where we align long parameters with
+ // even-numbered registers by skipping R1 (on ARM) or A1(A3) (on MIPS) and using
+ // R2 (on ARM) or A2(T0) (on MIPS) instead.
IncGprIndex();
}
is_split_long_or_double_ = (GetBytesPerGprSpillLocation(kRuntimeISA) == 4) &&
@@ -2086,6 +2102,41 @@
// Note that the native code pointer will be automatically set by artFindNativeMethod().
}
+#if defined(__mips__) && !defined(__LP64__)
+ // On MIPS32 if the first two arguments are floating-point, we need to know their types
+ // so that art_quick_generic_jni_trampoline can correctly extract them from the stack
+ // and load into floating-point registers.
+ // Possible arrangements of first two floating-point arguments on the stack (32-bit FPU
+ // view):
+ // (1)
+ // | DOUBLE | DOUBLE | other args, if any
+ // | F12 | F13 | F14 | F15 |
+ // | SP+0 | SP+4 | SP+8 | SP+12 | SP+16
+ // (2)
+ // | DOUBLE | FLOAT | (PAD) | other args, if any
+ // | F12 | F13 | F14 | |
+ // | SP+0 | SP+4 | SP+8 | SP+12 | SP+16
+ // (3)
+ // | FLOAT | (PAD) | DOUBLE | other args, if any
+ // | F12 | | F14 | F15 |
+ // | SP+0 | SP+4 | SP+8 | SP+12 | SP+16
+ // (4)
+ // | FLOAT | FLOAT | other args, if any
+ // | F12 | F14 |
+ // | SP+0 | SP+4 | SP+8
+ // As you can see, only the last case (4) is special. In all others we can just
+ // load F12/F13 and F14/F15 in the same manner.
+ // Set bit 0 of the native code address to 1 in this case (valid code addresses
+ // are always a multiple of 4 on MIPS32, so we have 2 spare bits available).
+ if (nativeCode != nullptr &&
+ shorty != nullptr &&
+ shorty_len >= 3 &&
+ shorty[1] == 'F' &&
+ shorty[2] == 'F') {
+ nativeCode = reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(nativeCode) | 1);
+ }
+#endif
+
// Return native code addr(lo) and bottom of alloca address(hi).
return GetTwoWordSuccessValue(reinterpret_cast<uintptr_t>(visitor.GetBottomOfUsedArea()),
reinterpret_cast<uintptr_t>(nativeCode));
diff --git a/runtime/oat.h b/runtime/oat.h
index 8c84d42..0f4cbbb 100644
--- a/runtime/oat.h
+++ b/runtime/oat.h
@@ -32,7 +32,7 @@
class PACKED(4) OatHeader {
public:
static constexpr uint8_t kOatMagic[] = { 'o', 'a', 't', '\n' };
- static constexpr uint8_t kOatVersion[] = { '0', '9', '2', '\0' };
+ static constexpr uint8_t kOatVersion[] = { '0', '9', '3', '\0' };
static constexpr const char* kImageLocationKey = "image-location";
static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline";