diff options
author | 2016-12-07 17:15:08 +0000 | |
---|---|---|
committer | 2016-12-07 17:15:08 +0000 | |
commit | b08265b2d61cd3923dd6fc01d6c82f73d5230e82 (patch) | |
tree | cebb0dd2570ed90265dc376d89c17768700fd90d | |
parent | b4ee681c21564ee9afe0202e1006cfa21019e88b (diff) | |
parent | 1b8464d17c2266763714ae18be7c4dc26e28bf61 (diff) |
Merge "MIPS32: Pass more arguments in registers."
-rw-r--r-- | compiler/jni/jni_cfi_test_expected.inc | 4 | ||||
-rw-r--r-- | compiler/jni/quick/mips/calling_convention_mips.cc | 49 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_mips.cc | 9 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_mips.h | 6 | ||||
-rw-r--r-- | compiler/optimizing/emit_swap_mips_test.cc | 36 | ||||
-rw-r--r-- | compiler/utils/mips/assembler_mips.cc | 6 | ||||
-rw-r--r-- | runtime/arch/mips/asm_support_mips.h | 2 | ||||
-rw-r--r-- | runtime/arch/mips/context_mips.cc | 10 | ||||
-rw-r--r-- | runtime/arch/mips/quick_entrypoints_mips.S | 480 | ||||
-rw-r--r-- | runtime/arch/mips/quick_method_frame_info_mips.h | 13 | ||||
-rw-r--r-- | runtime/arch/mips/registers_mips.h | 6 | ||||
-rw-r--r-- | runtime/arch/stub_test.cc | 2 | ||||
-rw-r--r-- | runtime/entrypoints/quick/quick_trampoline_entrypoints.cc | 81 |
13 files changed, 456 insertions, 248 deletions
diff --git a/compiler/jni/jni_cfi_test_expected.inc b/compiler/jni/jni_cfi_test_expected.inc index a205800dfa..2710ae9b53 100644 --- a/compiler/jni/jni_cfi_test_expected.inc +++ b/compiler/jni/jni_cfi_test_expected.inc @@ -327,7 +327,7 @@ static constexpr uint8_t expected_asm_kMips[] = { 0xC0, 0xFF, 0xBD, 0x27, 0x3C, 0x00, 0xBF, 0xAF, 0x38, 0x00, 0xBE, 0xAF, 0x34, 0x00, 0xB7, 0xAF, 0x30, 0x00, 0xB6, 0xAF, 0x2C, 0x00, 0xB5, 0xAF, 0x28, 0x00, 0xB4, 0xAF, 0x24, 0x00, 0xB3, 0xAF, 0x20, 0x00, 0xB2, 0xAF, - 0x00, 0x00, 0xA4, 0xAF, 0x44, 0x00, 0xA5, 0xAF, 0x48, 0x00, 0xAC, 0xE7, + 0x00, 0x00, 0xA4, 0xAF, 0x44, 0x00, 0xA5, 0xAF, 0x48, 0x00, 0xA8, 0xE7, 0x4C, 0x00, 0xA6, 0xAF, 0x50, 0x00, 0xA7, 0xAF, 0xE0, 0xFF, 0xBD, 0x27, 0x20, 0x00, 0xBD, 0x27, 0x20, 0x00, 0xB2, 0x8F, 0x24, 0x00, 0xB3, 0x8F, 0x28, 0x00, 0xB4, 0x8F, 0x2C, 0x00, 0xB5, 0x8F, 0x30, 0x00, 0xB6, 0x8F, @@ -361,7 +361,7 @@ static constexpr uint8_t expected_cfi_kMips[] = { // 0x00000024: .cfi_offset: r18 at cfa-32 // 0x00000024: sw r4, +0(r29) // 0x00000028: sw r5, +68(r29) -// 0x0000002c: swc1 f12, +72(r29) +// 0x0000002c: swc1 f8, +72(r29) // 0x00000030: sw r6, +76(r29) // 0x00000034: sw r7, +80(r29) // 0x00000038: addiu r29, r29, -32 diff --git a/compiler/jni/quick/mips/calling_convention_mips.cc b/compiler/jni/quick/mips/calling_convention_mips.cc index e6948ec198..0e0716e911 100644 --- a/compiler/jni/quick/mips/calling_convention_mips.cc +++ b/compiler/jni/quick/mips/calling_convention_mips.cc @@ -23,6 +23,10 @@ namespace art { namespace mips { +// +// JNI calling convention constants. +// + // Up to how many float-like (float, double) args can be enregistered in floating-point registers. // The rest of the args must go in integer registers or on the stack. constexpr size_t kMaxFloatOrDoubleRegisterArguments = 2u; @@ -30,9 +34,17 @@ constexpr size_t kMaxFloatOrDoubleRegisterArguments = 2u; // enregistered. The rest of the args must go on the stack. constexpr size_t kMaxIntLikeRegisterArguments = 4u; -static const Register kCoreArgumentRegisters[] = { A0, A1, A2, A3 }; -static const FRegister kFArgumentRegisters[] = { F12, F14 }; -static const DRegister kDArgumentRegisters[] = { D6, D7 }; +static const Register kJniCoreArgumentRegisters[] = { A0, A1, A2, A3 }; +static const FRegister kJniFArgumentRegisters[] = { F12, F14 }; +static const DRegister kJniDArgumentRegisters[] = { D6, D7 }; + +// +// Managed calling convention constants. +// + +static const Register kManagedCoreArgumentRegisters[] = { A0, A1, A2, A3, T0, T1 }; +static const FRegister kManagedFArgumentRegisters[] = { F8, F10, F12, F14, F16, F18 }; +static const DRegister kManagedDArgumentRegisters[] = { D4, D5, D6, D7, D8, D9 }; static constexpr ManagedRegister kCalleeSaveRegisters[] = { // Core registers. @@ -133,30 +145,30 @@ const ManagedRegisterEntrySpills& MipsManagedRuntimeCallingConvention::EntrySpil for (ResetIterator(FrameOffset(0)); HasNext(); Next()) { if (IsCurrentParamAFloatOrDouble()) { if (IsCurrentParamADouble()) { - if (fpr_index < arraysize(kDArgumentRegisters)) { + if (fpr_index < arraysize(kManagedDArgumentRegisters)) { entry_spills_.push_back( - MipsManagedRegister::FromDRegister(kDArgumentRegisters[fpr_index++])); + MipsManagedRegister::FromDRegister(kManagedDArgumentRegisters[fpr_index++])); } else { entry_spills_.push_back(ManagedRegister::NoRegister(), 8); } } else { - if (fpr_index < arraysize(kFArgumentRegisters)) { + if (fpr_index < arraysize(kManagedFArgumentRegisters)) { entry_spills_.push_back( - MipsManagedRegister::FromFRegister(kFArgumentRegisters[fpr_index++])); + MipsManagedRegister::FromFRegister(kManagedFArgumentRegisters[fpr_index++])); } else { entry_spills_.push_back(ManagedRegister::NoRegister(), 4); } } } else { if (IsCurrentParamALong() && !IsCurrentParamAReference()) { - if (gpr_index == 1) { - // Don't use a1-a2 as a register pair, move to a2-a3 instead. + if (gpr_index == 1 || gpr_index == 3) { + // Don't use A1-A2(A3-T0) as a register pair, move to A2-A3(T0-T1) instead. gpr_index++; } - if (gpr_index < arraysize(kCoreArgumentRegisters) - 1) { + if (gpr_index < arraysize(kManagedCoreArgumentRegisters) - 1) { entry_spills_.push_back( - MipsManagedRegister::FromCoreRegister(kCoreArgumentRegisters[gpr_index++])); - } else if (gpr_index == arraysize(kCoreArgumentRegisters) - 1) { + MipsManagedRegister::FromCoreRegister(kManagedCoreArgumentRegisters[gpr_index++])); + } else if (gpr_index == arraysize(kManagedCoreArgumentRegisters) - 1) { gpr_index++; entry_spills_.push_back(ManagedRegister::NoRegister(), 4); } else { @@ -164,9 +176,9 @@ const ManagedRegisterEntrySpills& MipsManagedRuntimeCallingConvention::EntrySpil } } - if (gpr_index < arraysize(kCoreArgumentRegisters)) { + if (gpr_index < arraysize(kManagedCoreArgumentRegisters)) { entry_spills_.push_back( - MipsManagedRegister::FromCoreRegister(kCoreArgumentRegisters[gpr_index++])); + MipsManagedRegister::FromCoreRegister(kManagedCoreArgumentRegisters[gpr_index++])); } else { entry_spills_.push_back(ManagedRegister::NoRegister(), 4); } @@ -175,6 +187,7 @@ const ManagedRegisterEntrySpills& MipsManagedRuntimeCallingConvention::EntrySpil } return entry_spills_; } + // JNI calling convention MipsJniCallingConvention::MipsJniCallingConvention(bool is_static, @@ -285,7 +298,7 @@ MipsJniCallingConvention::MipsJniCallingConvention(bool is_static, // | FLOAT | INT | DOUBLE | // | F12 | A1 | A2 | A3 | // (c) first two arguments are floating-point (float, double) - // | FLAOT | (PAD) | DOUBLE | INT | + // | FLOAT | (PAD) | DOUBLE | INT | // | F12 | | F14 | SP+16 | // (d) first two arguments are floating-point (double, float) // | DOUBLE | FLOAT | INT | @@ -404,9 +417,9 @@ ManagedRegister MipsJniCallingConvention::CurrentParamRegister() { if (use_fp_arg_registers_ && (itr_args_ < kMaxFloatOrDoubleRegisterArguments)) { if (IsCurrentParamAFloatOrDouble()) { if (IsCurrentParamADouble()) { - return MipsManagedRegister::FromDRegister(kDArgumentRegisters[itr_args_]); + return MipsManagedRegister::FromDRegister(kJniDArgumentRegisters[itr_args_]); } else { - return MipsManagedRegister::FromFRegister(kFArgumentRegisters[itr_args_]); + return MipsManagedRegister::FromFRegister(kJniFArgumentRegisters[itr_args_]); } } } @@ -420,7 +433,7 @@ ManagedRegister MipsJniCallingConvention::CurrentParamRegister() { return MipsManagedRegister::FromRegisterPair(A2_A3); } } else { - return MipsManagedRegister::FromCoreRegister(kCoreArgumentRegisters[itr_slots_]); + return MipsManagedRegister::FromCoreRegister(kJniCoreArgumentRegisters[itr_slots_]); } } diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc index 61dabfabaa..ff48f6642d 100644 --- a/compiler/optimizing/code_generator_mips.cc +++ b/compiler/optimizing/code_generator_mips.cc @@ -99,8 +99,9 @@ Location InvokeDexCallingConventionVisitorMIPS::GetNextLocation(Primitive::Type uint32_t gp_index = gp_index_; gp_index_ += 2; if (gp_index + 1 < calling_convention.GetNumberOfRegisters()) { - if (calling_convention.GetRegisterAt(gp_index) == A1) { - gp_index_++; // Skip A1, and use A2_A3 instead. + Register reg = calling_convention.GetRegisterAt(gp_index); + if (reg == A1 || reg == A3) { + gp_index_++; // Skip A1(A3), and use A2_A3(T0_T1) instead. gp_index++; } Register low_even = calling_convention.GetRegisterAt(gp_index); @@ -5095,9 +5096,9 @@ void LocationsBuilderMIPS::HandleInvoke(HInvoke* invoke) { void LocationsBuilderMIPS::VisitInvokeInterface(HInvokeInterface* invoke) { HandleInvoke(invoke); - // The register T0 is required to be used for the hidden argument in + // The register T7 is required to be used for the hidden argument in // art_quick_imt_conflict_trampoline, so add the hidden argument. - invoke->GetLocations()->AddTemp(Location::RegisterLocation(T0)); + invoke->GetLocations()->AddTemp(Location::RegisterLocation(T7)); } void InstructionCodeGeneratorMIPS::VisitInvokeInterface(HInvokeInterface* invoke) { diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h index 2273e52b06..f03f29c5d4 100644 --- a/compiler/optimizing/code_generator_mips.h +++ b/compiler/optimizing/code_generator_mips.h @@ -32,11 +32,11 @@ namespace mips { // InvokeDexCallingConvention registers static constexpr Register kParameterCoreRegisters[] = - { A1, A2, A3 }; + { A1, A2, A3, T0, T1 }; static constexpr size_t kParameterCoreRegistersLength = arraysize(kParameterCoreRegisters); static constexpr FRegister kParameterFpuRegisters[] = - { F12, F14 }; + { F8, F10, F12, F14, F16, F18 }; static constexpr size_t kParameterFpuRegistersLength = arraysize(kParameterFpuRegisters); @@ -48,7 +48,7 @@ static constexpr size_t kRuntimeParameterCoreRegistersLength = arraysize(kRuntimeParameterCoreRegisters); static constexpr FRegister kRuntimeParameterFpuRegisters[] = - { F12, F14}; + { F12, F14 }; static constexpr size_t kRuntimeParameterFpuRegistersLength = arraysize(kRuntimeParameterFpuRegisters); diff --git a/compiler/optimizing/emit_swap_mips_test.cc b/compiler/optimizing/emit_swap_mips_test.cc index 9dc53e6811..0d4e1c5c97 100644 --- a/compiler/optimizing/emit_swap_mips_test.cc +++ b/compiler/optimizing/emit_swap_mips_test.cc @@ -154,54 +154,54 @@ TEST_F(EmitSwapMipsTest, TwoRegisterPairs) { TEST_F(EmitSwapMipsTest, TwoFpuRegistersFloat) { moves_->AddMove( Location::FpuRegisterLocation(4), - Location::FpuRegisterLocation(6), + Location::FpuRegisterLocation(2), Primitive::kPrimFloat, nullptr); moves_->AddMove( - Location::FpuRegisterLocation(6), + Location::FpuRegisterLocation(2), Location::FpuRegisterLocation(4), Primitive::kPrimFloat, nullptr); const char* expected = - "mov.s $f8, $f6\n" - "mov.s $f6, $f4\n" - "mov.s $f4, $f8\n"; + "mov.s $f6, $f2\n" + "mov.s $f2, $f4\n" + "mov.s $f4, $f6\n"; DriverWrapper(moves_, expected, "TwoFpuRegistersFloat"); } TEST_F(EmitSwapMipsTest, TwoFpuRegistersDouble) { moves_->AddMove( Location::FpuRegisterLocation(4), - Location::FpuRegisterLocation(6), + Location::FpuRegisterLocation(2), Primitive::kPrimDouble, nullptr); moves_->AddMove( - Location::FpuRegisterLocation(6), + Location::FpuRegisterLocation(2), Location::FpuRegisterLocation(4), Primitive::kPrimDouble, nullptr); const char* expected = - "mov.d $f8, $f6\n" - "mov.d $f6, $f4\n" - "mov.d $f4, $f8\n"; + "mov.d $f6, $f2\n" + "mov.d $f2, $f4\n" + "mov.d $f4, $f6\n"; DriverWrapper(moves_, expected, "TwoFpuRegistersDouble"); } TEST_F(EmitSwapMipsTest, RegisterAndFpuRegister) { moves_->AddMove( Location::RegisterLocation(4), - Location::FpuRegisterLocation(6), + Location::FpuRegisterLocation(2), Primitive::kPrimFloat, nullptr); moves_->AddMove( - Location::FpuRegisterLocation(6), + Location::FpuRegisterLocation(2), Location::RegisterLocation(4), Primitive::kPrimFloat, nullptr); const char* expected = "or $t8, $a0, $zero\n" - "mfc1 $a0, $f6\n" - "mtc1 $t8, $f6\n"; + "mfc1 $a0, $f2\n" + "mtc1 $t8, $f2\n"; DriverWrapper(moves_, expected, "RegisterAndFpuRegister"); } @@ -327,9 +327,9 @@ TEST_F(EmitSwapMipsTest, FpuRegisterAndStackSlot) { Primitive::kPrimFloat, nullptr); const char* expected = - "mov.s $f8, $f4\n" + "mov.s $f6, $f4\n" "lwc1 $f4, 48($sp)\n" - "swc1 $f8, 48($sp)\n"; + "swc1 $f6, 48($sp)\n"; DriverWrapper(moves_, expected, "FpuRegisterAndStackSlot"); } @@ -345,9 +345,9 @@ TEST_F(EmitSwapMipsTest, FpuRegisterAndDoubleStackSlot) { Primitive::kPrimDouble, nullptr); const char* expected = - "mov.d $f8, $f4\n" + "mov.d $f6, $f4\n" "ldc1 $f4, 48($sp)\n" - "sdc1 $f8, 48($sp)\n"; + "sdc1 $f6, 48($sp)\n"; DriverWrapper(moves_, expected, "FpuRegisterAndDoubleStackSlot"); } diff --git a/compiler/utils/mips/assembler_mips.cc b/compiler/utils/mips/assembler_mips.cc index b29974c238..3dcad6a6b9 100644 --- a/compiler/utils/mips/assembler_mips.cc +++ b/compiler/utils/mips/assembler_mips.cc @@ -3252,6 +3252,9 @@ void MipsAssembler::EmitLoad(ManagedRegister m_dst, Register src_register, int32 CHECK_EQ(kMipsDoublewordSize, size) << dst; LoadDFromOffset(dst.AsFRegister(), src_register, src_offset); } + } else if (dst.IsDRegister()) { + CHECK_EQ(kMipsDoublewordSize, size) << dst; + LoadDFromOffset(dst.AsOverlappingDRegisterLow(), src_register, src_offset); } } @@ -3396,6 +3399,9 @@ void MipsAssembler::Store(FrameOffset dest, ManagedRegister msrc, size_t size) { CHECK_EQ(kMipsDoublewordSize, size); StoreDToOffset(src.AsFRegister(), SP, dest.Int32Value()); } + } else if (src.IsDRegister()) { + CHECK_EQ(kMipsDoublewordSize, size); + StoreDToOffset(src.AsOverlappingDRegisterLow(), SP, dest.Int32Value()); } } diff --git a/runtime/arch/mips/asm_support_mips.h b/runtime/arch/mips/asm_support_mips.h index 135b074c99..7437774c13 100644 --- a/runtime/arch/mips/asm_support_mips.h +++ b/runtime/arch/mips/asm_support_mips.h @@ -21,7 +21,7 @@ #define FRAME_SIZE_SAVE_ALL_CALLEE_SAVES 96 #define FRAME_SIZE_SAVE_REFS_ONLY 48 -#define FRAME_SIZE_SAVE_REFS_AND_ARGS 80 +#define FRAME_SIZE_SAVE_REFS_AND_ARGS 112 #define FRAME_SIZE_SAVE_EVERYTHING 256 #endif // ART_RUNTIME_ARCH_MIPS_ASM_SUPPORT_MIPS_H_ diff --git a/runtime/arch/mips/context_mips.cc b/runtime/arch/mips/context_mips.cc index 375a03acee..98ed5e60e6 100644 --- a/runtime/arch/mips/context_mips.cc +++ b/runtime/arch/mips/context_mips.cc @@ -75,11 +75,21 @@ void MipsContext::SmashCallerSaves() { gprs_[A1] = nullptr; gprs_[A2] = nullptr; gprs_[A3] = nullptr; + gprs_[T0] = nullptr; + gprs_[T1] = nullptr; + fprs_[F8] = nullptr; + fprs_[F9] = nullptr; + fprs_[F10] = nullptr; + fprs_[F11] = nullptr; fprs_[F12] = nullptr; fprs_[F13] = nullptr; fprs_[F14] = nullptr; fprs_[F15] = nullptr; + fprs_[F16] = nullptr; + fprs_[F17] = nullptr; + fprs_[F18] = nullptr; + fprs_[F19] = nullptr; } extern "C" NO_RETURN void art_quick_do_long_jump(uint32_t*, uint32_t*); diff --git a/runtime/arch/mips/quick_entrypoints_mips.S b/runtime/arch/mips/quick_entrypoints_mips.S index 34e34b40ff..3e8cdc9374 100644 --- a/runtime/arch/mips/quick_entrypoints_mips.S +++ b/runtime/arch/mips/quick_entrypoints_mips.S @@ -167,50 +167,60 @@ /* * Macro that sets up the callee save frame to conform with * Runtime::CreateCalleeSaveMethod(kSaveRefsAndArgs). - * callee-save: $a1-$a3, $s2-$s8 + $gp + $ra, 12 total + 3 words padding + method* + * callee-save: $a1-$a3, $t0-$t1, $s2-$s8, $gp, $ra, $f8-$f19 + * (26 total + 1 word padding + method*) */ .macro SETUP_SAVE_REFS_AND_ARGS_FRAME_REGISTERS_ONLY - addiu $sp, $sp, -80 - .cfi_adjust_cfa_offset 80 + addiu $sp, $sp, -112 + .cfi_adjust_cfa_offset 112 // Ugly compile-time check, but we only have the preprocessor. -#if (FRAME_SIZE_SAVE_REFS_AND_ARGS != 80) +#if (FRAME_SIZE_SAVE_REFS_AND_ARGS != 112) #error "FRAME_SIZE_SAVE_REFS_AND_ARGS(MIPS) size not as expected." #endif - sw $ra, 76($sp) - .cfi_rel_offset 31, 76 - sw $s8, 72($sp) - .cfi_rel_offset 30, 72 - sw $gp, 68($sp) - .cfi_rel_offset 28, 68 - sw $s7, 64($sp) - .cfi_rel_offset 23, 64 - sw $s6, 60($sp) - .cfi_rel_offset 22, 60 - sw $s5, 56($sp) - .cfi_rel_offset 21, 56 - sw $s4, 52($sp) - .cfi_rel_offset 20, 52 - sw $s3, 48($sp) - .cfi_rel_offset 19, 48 - sw $s2, 44($sp) - .cfi_rel_offset 18, 44 - sw $a3, 40($sp) - .cfi_rel_offset 7, 40 - sw $a2, 36($sp) - .cfi_rel_offset 6, 36 - sw $a1, 32($sp) - .cfi_rel_offset 5, 32 - SDu $f14, $f15, 24, $sp, $t0 - SDu $f12, $f13, 16, $sp, $t0 + sw $ra, 108($sp) + .cfi_rel_offset 31, 108 + sw $s8, 104($sp) + .cfi_rel_offset 30, 104 + sw $gp, 100($sp) + .cfi_rel_offset 28, 100 + sw $s7, 96($sp) + .cfi_rel_offset 23, 96 + sw $s6, 92($sp) + .cfi_rel_offset 22, 92 + sw $s5, 88($sp) + .cfi_rel_offset 21, 88 + sw $s4, 84($sp) + .cfi_rel_offset 20, 84 + sw $s3, 80($sp) + .cfi_rel_offset 19, 80 + sw $s2, 76($sp) + .cfi_rel_offset 18, 76 + sw $t1, 72($sp) + .cfi_rel_offset 9, 72 + sw $t0, 68($sp) + .cfi_rel_offset 8, 68 + sw $a3, 64($sp) + .cfi_rel_offset 7, 64 + sw $a2, 60($sp) + .cfi_rel_offset 6, 60 + sw $a1, 56($sp) + .cfi_rel_offset 5, 56 + SDu $f18, $f19, 48, $sp, $t8 + SDu $f16, $f17, 40, $sp, $t8 + SDu $f14, $f15, 32, $sp, $t8 + SDu $f12, $f13, 24, $sp, $t8 + SDu $f10, $f11, 16, $sp, $t8 + SDu $f8, $f9, 8, $sp, $t8 # bottom will hold Method* .endm /* * Macro that sets up the callee save frame to conform with * Runtime::CreateCalleeSaveMethod(kSaveRefsAndArgs). Restoration assumes non-moving GC. - * callee-save: $a1-$a3, $f12-$f15, $s2-$s8 + $gp + $ra, 12 total + 3 words padding + method* + * callee-save: $a1-$a3, $t0-$t1, $s2-$s8, $gp, $ra, $f8-$f19 + * (26 total + 1 word padding + method*) * Clobbers $t0 and $sp * Allocates ARG_SLOT_SIZE bytes at the bottom of the stack for arg slots. * Reserves FRAME_SIZE_SAVE_REFS_AND_ARGS + ARG_SLOT_SIZE bytes on the stack @@ -229,7 +239,8 @@ /* * Macro that sets up the callee save frame to conform with * Runtime::CreateCalleeSaveMethod(kSaveRefsAndArgs). Restoration assumes non-moving GC. - * callee-save: $a1-$a3, $f12-$f15, $s2-$s8 + $gp + $ra, 12 total + 3 words padding + method* + * callee-save: $a1-$a3, $t0-$t1, $s2-$s8, $gp, $ra, $f8-$f19 + * (26 total + 1 word padding + method*) * Clobbers $sp * Use $a0 as the Method* and loads it into bottom of stack. * Allocates ARG_SLOT_SIZE bytes at the bottom of the stack for arg slots. @@ -246,34 +257,42 @@ .macro RESTORE_SAVE_REFS_AND_ARGS_FRAME addiu $sp, $sp, ARG_SLOT_SIZE # remove argument slots on the stack .cfi_adjust_cfa_offset -ARG_SLOT_SIZE - lw $ra, 76($sp) + lw $ra, 108($sp) .cfi_restore 31 - lw $s8, 72($sp) + lw $s8, 104($sp) .cfi_restore 30 - lw $gp, 68($sp) + lw $gp, 100($sp) .cfi_restore 28 - lw $s7, 64($sp) + lw $s7, 96($sp) .cfi_restore 23 - lw $s6, 60($sp) + lw $s6, 92($sp) .cfi_restore 22 - lw $s5, 56($sp) + lw $s5, 88($sp) .cfi_restore 21 - lw $s4, 52($sp) + lw $s4, 84($sp) .cfi_restore 20 - lw $s3, 48($sp) + lw $s3, 80($sp) .cfi_restore 19 - lw $s2, 44($sp) + lw $s2, 76($sp) .cfi_restore 18 - lw $a3, 40($sp) + lw $t1, 72($sp) + .cfi_restore 9 + lw $t0, 68($sp) + .cfi_restore 8 + lw $a3, 64($sp) .cfi_restore 7 - lw $a2, 36($sp) + lw $a2, 60($sp) .cfi_restore 6 - lw $a1, 32($sp) + lw $a1, 56($sp) .cfi_restore 5 - LDu $f14, $f15, 24, $sp, $t1 - LDu $f12, $f13, 16, $sp, $t1 - addiu $sp, $sp, 80 # pop frame - .cfi_adjust_cfa_offset -80 + LDu $f18, $f19, 48, $sp, $t8 + LDu $f16, $f17, 40, $sp, $t8 + LDu $f14, $f15, 32, $sp, $t8 + LDu $f12, $f13, 24, $sp, $t8 + LDu $f10, $f11, 16, $sp, $t8 + LDu $f8, $f9, 8, $sp, $t8 + addiu $sp, $sp, 112 # pop frame + .cfi_adjust_cfa_offset -112 .endm /* @@ -824,30 +843,56 @@ INVOKE_TRAMPOLINE art_quick_invoke_direct_trampoline_with_access_check, artInvok INVOKE_TRAMPOLINE art_quick_invoke_super_trampoline_with_access_check, artInvokeSuperTrampolineWithAccessCheck INVOKE_TRAMPOLINE art_quick_invoke_virtual_trampoline_with_access_check, artInvokeVirtualTrampolineWithAccessCheck -.macro LOAD_WORD_TO_REG reg, next_arg, index, label +// Each of the following macros expands into four instructions or 16 bytes. +// They are used to build indexable "tables" of code. + +.macro LOAD_WORD_TO_REG reg, next_arg, index_reg, label lw $\reg, -4($\next_arg) # next_arg points to argument after the current one (offset is 4) b \label - addiu $\index, 1 + addiu $\index_reg, 16 + .balign 16 .endm -.macro LOAD_LONG_TO_REG reg1, reg2, next_arg, index, label +.macro LOAD_LONG_TO_REG reg1, reg2, next_arg, index_reg, next_index, label lw $\reg1, -8($\next_arg) # next_arg points to argument after the current one (offset is 8) lw $\reg2, -4($\next_arg) b \label - li $\index, 4 # long can be loaded only to a2_a3 pair so index will be always 4 + li $\index_reg, \next_index + .balign 16 .endm -.macro LOAD_FLOAT_TO_REG reg, next_arg, index, label +.macro LOAD_FLOAT_TO_REG reg, next_arg, index_reg, label lwc1 $\reg, -4($\next_arg) # next_arg points to argument after the current one (offset is 4) b \label - addiu $\index, 1 + addiu $\index_reg, 16 + .balign 16 .endm -.macro LOAD_DOUBLE_TO_REG reg1, reg2, next_arg, index, tmp, label +#if defined(__mips_isa_rev) && __mips_isa_rev > 2 +// LDu expands into 3 instructions for 64-bit FPU, so index_reg cannot be updated here. +.macro LOAD_DOUBLE_TO_REG reg1, reg2, next_arg, index_reg, tmp, label + .set reorder # force use of the branch delay slot LDu $\reg1, $\reg2, -8, $\next_arg, $\tmp # next_arg points to argument after the current one # (offset is 8) b \label - addiu $\index, 1 + .set noreorder + .balign 16 +.endm +#else +// LDu expands into 2 instructions for 32-bit FPU, so index_reg is updated here. +.macro LOAD_DOUBLE_TO_REG reg1, reg2, next_arg, index_reg, tmp, label + LDu $\reg1, $\reg2, -8, $\next_arg, $\tmp # next_arg points to argument after the current one + # (offset is 8) + b \label + addiu $\index_reg, 16 + .balign 16 +.endm +#endif + +.macro LOAD_END index_reg, next_index, label + b \label + li $\index_reg, \next_index + .balign 16 .endm #define SPILL_SIZE 32 @@ -891,61 +936,63 @@ ENTRY art_quick_invoke_stub lw $gp, 16($fp) # restore $gp lw $a0, SPILL_SIZE($fp) # restore ArtMethod* lw $a1, 4($sp) # a1 = this* - addiu $t0, $sp, 8 # t0 = pointer to the current argument (skip ArtMethod* and this*) - li $t3, 2 # t3 = gpr_index = 2 (skip A0 and A1) - move $t4, $zero # t4 = fp_index = 0 - lw $t1, 20 + SPILL_SIZE($fp) # get shorty (20 is offset from the $sp on entry + SPILL_SIZE + addiu $t8, $sp, 8 # t8 = pointer to the current argument (skip ArtMethod* and this*) + li $t6, 0 # t6 = gpr_index = 0 (corresponds to A2; A0 and A1 are skipped) + li $t7, 0 # t7 = fp_index = 0 + lw $t9, 20 + SPILL_SIZE($fp) # get shorty (20 is offset from the $sp on entry + SPILL_SIZE # as the $fp is SPILL_SIZE bytes below the $sp on entry) - addiu $t1, 1 # t1 = shorty + 1 (skip 1 for return type) + addiu $t9, 1 # t9 = shorty + 1 (skip 1 for return type) + + // Load the base addresses of tabInt ... tabDouble. + // We will use the register indices (gpr_index, fp_index) to branch. + // Note that the indices are scaled by 16, so they can be added to the bases directly. +#if defined(__mips_isa_rev) && __mips_isa_rev >= 6 + lapc $t2, tabInt + lapc $t3, tabLong + lapc $t4, tabSingle + lapc $t5, tabDouble +#else + bltzal $zero, tabBase # nal + addiu $t2, $ra, %lo(tabInt - tabBase) +tabBase: + addiu $t3, $ra, %lo(tabLong - tabBase) + addiu $t4, $ra, %lo(tabSingle - tabBase) + addiu $t5, $ra, %lo(tabDouble - tabBase) +#endif + loop: - lbu $t2, 0($t1) # t2 = shorty[i] - beqz $t2, loopEnd # finish getting args when shorty[i] == '\0' - addiu $t1, 1 - - li $t9, 'J' # put char 'J' into t9 - beq $t9, $t2, isLong # branch if result type char == 'J' - li $t9, 'D' # put char 'D' into t9 - beq $t9, $t2, isDouble # branch if result type char == 'D' - li $t9, 'F' # put char 'F' into t9 - beq $t9, $t2, isSingle # branch if result type char == 'F' - addiu $t0, 4 # next_arg = curr_arg + 4 (in branch delay slot, - # for both, int and single) - - li $t5, 2 # skip a0 and a1 (ArtMethod* and this*) - bne $t5, $t3, 1f # if (gpr_index == 2) - addiu $t5, 1 - LOAD_WORD_TO_REG a2, t0, t3, loop # a2 = current argument, gpr_index++ -1: bne $t5, $t3, loop # else if (gpr_index == 3) - nop - LOAD_WORD_TO_REG a3, t0, t3, loop # a3 = current argument, gpr_index++ + lbu $ra, 0($t9) # ra = shorty[i] + beqz $ra, loopEnd # finish getting args when shorty[i] == '\0' + addiu $t9, 1 + + addiu $ra, -'J' + beqz $ra, isLong # branch if result type char == 'J' + addiu $ra, 'J' - 'D' + beqz $ra, isDouble # branch if result type char == 'D' + addiu $ra, 'D' - 'F' + beqz $ra, isSingle # branch if result type char == 'F' + + addu $ra, $t2, $t6 + jalr $zero, $ra + addiu $t8, 4 # next_arg = curr_arg + 4 isLong: - addiu $t0, 8 # next_arg = curr_arg + 8 - slti $t5, $t3, 3 - beqz $t5, 2f # if (gpr_index < 3) - nop - LOAD_LONG_TO_REG a2, a3, t0, t3, loop # a2_a3 = curr_arg, gpr_index = 4 -2: b loop # else - li $t3, 4 # gpr_index = 4 - -isDouble: - addiu $t0, 8 # next_arg = curr_arg + 8 - li $t5, 0 - bne $t5, $t4, 3f # if (fp_index == 0) - addiu $t5, 1 - LOAD_DOUBLE_TO_REG f12, f13, t0, t4, t9, loop # f12_f13 = curr_arg, fp_index++ -3: bne $t5, $t4, loop # else if (fp_index == 1) - nop - LOAD_DOUBLE_TO_REG f14, f15, t0, t4, t9, loop # f14_f15 = curr_arg, fp_index++ + addu $ra, $t3, $t6 + jalr $zero, $ra + addiu $t8, 8 # next_arg = curr_arg + 8 isSingle: - li $t5, 0 - bne $t5, $t4, 4f # if (fp_index == 0) - addiu $t5, 1 - LOAD_FLOAT_TO_REG f12, t0, t4, loop # f12 = curr_arg, fp_index++ -4: bne $t5, $t4, loop # else if (fp_index == 1) - nop - LOAD_FLOAT_TO_REG f14, t0, t4, loop # f14 = curr_arg, fp_index++ + addu $ra, $t4, $t7 + jalr $zero, $ra + addiu $t8, 4 # next_arg = curr_arg + 4 + +isDouble: + addu $ra, $t5, $t7 +#if defined(__mips_isa_rev) && __mips_isa_rev > 2 + addiu $t7, 16 # fp_index += 16 didn't fit into LOAD_DOUBLE_TO_REG +#endif + jalr $zero, $ra + addiu $t8, 8 # next_arg = curr_arg + 8 loopEnd: lw $t9, ART_METHOD_QUICK_CODE_OFFSET_32($a0) # get pointer to the code @@ -976,6 +1023,38 @@ loopEnd: SDu $f0, $f1, 0, $t0, $t1 # store floating point result jalr $zero, $ra nop + + // Note that gpr_index is kept within the range of tabInt and tabLong + // and fp_index is kept within the range of tabSingle and tabDouble. + .balign 16 +tabInt: + LOAD_WORD_TO_REG a2, t8, t6, loop # a2 = current argument, gpr_index += 16 + LOAD_WORD_TO_REG a3, t8, t6, loop # a3 = current argument, gpr_index += 16 + LOAD_WORD_TO_REG t0, t8, t6, loop # t0 = current argument, gpr_index += 16 + LOAD_WORD_TO_REG t1, t8, t6, loop # t1 = current argument, gpr_index += 16 + LOAD_END t6, 4*16, loop # no more GPR args, gpr_index = 4*16 +tabLong: + LOAD_LONG_TO_REG a2, a3, t8, t6, 2*16, loop # a2_a3 = curr_arg, gpr_index = 2*16 + LOAD_LONG_TO_REG t0, t1, t8, t6, 4*16, loop # t0_t1 = curr_arg, gpr_index = 4*16 + LOAD_LONG_TO_REG t0, t1, t8, t6, 4*16, loop # t0_t1 = curr_arg, gpr_index = 4*16 + LOAD_END t6, 4*16, loop # no more GPR args, gpr_index = 4*16 + LOAD_END t6, 4*16, loop # no more GPR args, gpr_index = 4*16 +tabSingle: + LOAD_FLOAT_TO_REG f8, t8, t7, loop # f8 = curr_arg, fp_index += 16 + LOAD_FLOAT_TO_REG f10, t8, t7, loop # f10 = curr_arg, fp_index += 16 + LOAD_FLOAT_TO_REG f12, t8, t7, loop # f12 = curr_arg, fp_index += 16 + LOAD_FLOAT_TO_REG f14, t8, t7, loop # f14 = curr_arg, fp_index += 16 + LOAD_FLOAT_TO_REG f16, t8, t7, loop # f16 = curr_arg, fp_index += 16 + LOAD_FLOAT_TO_REG f18, t8, t7, loop # f18 = curr_arg, fp_index += 16 + LOAD_END t7, 6*16, loop # no more FPR args, fp_index = 6*16 +tabDouble: + LOAD_DOUBLE_TO_REG f8, f9, t8, t7, ra, loop # f8_f9 = curr_arg; if FPU32, fp_index += 16 + LOAD_DOUBLE_TO_REG f10, f11, t8, t7, ra, loop # f10_f11 = curr_arg; if FPU32, fp_index += 16 + LOAD_DOUBLE_TO_REG f12, f13, t8, t7, ra, loop # f12_f13 = curr_arg; if FPU32, fp_index += 16 + LOAD_DOUBLE_TO_REG f14, f15, t8, t7, ra, loop # f14_f15 = curr_arg; if FPU32, fp_index += 16 + LOAD_DOUBLE_TO_REG f16, f17, t8, t7, ra, loop # f16_f17 = curr_arg; if FPU32, fp_index += 16 + LOAD_DOUBLE_TO_REG f18, f19, t8, t7, ra, loop # f18_f19 = curr_arg; if FPU32, fp_index += 16 + LOAD_END t7, 6*16, loop # no more FPR args, fp_index = 6*16 END art_quick_invoke_stub /* @@ -1016,64 +1095,63 @@ ENTRY art_quick_invoke_static_stub addiu $sp, $sp, 16 # restore stack after memcpy lw $gp, 16($fp) # restore $gp lw $a0, SPILL_SIZE($fp) # restore ArtMethod* - addiu $t0, $sp, 4 # t0 = pointer to the current argument (skip ArtMethod*) - li $t3, 1 # t3 = gpr_index = 1 (skip A0) - move $t4, $zero # t4 = fp_index = 0 - lw $t1, 20 + SPILL_SIZE($fp) # get shorty (20 is offset from the $sp on entry + SPILL_SIZE + addiu $t8, $sp, 4 # t8 = pointer to the current argument (skip ArtMethod*) + li $t6, 0 # t6 = gpr_index = 0 (corresponds to A1; A0 is skipped) + li $t7, 0 # t7 = fp_index = 0 + lw $t9, 20 + SPILL_SIZE($fp) # get shorty (20 is offset from the $sp on entry + SPILL_SIZE # as the $fp is SPILL_SIZE bytes below the $sp on entry) - addiu $t1, 1 # t1 = shorty + 1 (skip 1 for return type) + addiu $t9, 1 # t9 = shorty + 1 (skip 1 for return type) + + // Load the base addresses of tabIntS ... tabDoubleS. + // We will use the register indices (gpr_index, fp_index) to branch. + // Note that the indices are scaled by 16, so they can be added to the bases directly. +#if defined(__mips_isa_rev) && __mips_isa_rev >= 6 + lapc $t2, tabIntS + lapc $t3, tabLongS + lapc $t4, tabSingleS + lapc $t5, tabDoubleS +#else + bltzal $zero, tabBaseS # nal + addiu $t2, $ra, %lo(tabIntS - tabBaseS) +tabBaseS: + addiu $t3, $ra, %lo(tabLongS - tabBaseS) + addiu $t4, $ra, %lo(tabSingleS - tabBaseS) + addiu $t5, $ra, %lo(tabDoubleS - tabBaseS) +#endif + loopS: - lbu $t2, 0($t1) # t2 = shorty[i] - beqz $t2, loopEndS # finish getting args when shorty[i] == '\0' - addiu $t1, 1 - - li $t9, 'J' # put char 'J' into t9 - beq $t9, $t2, isLongS # branch if result type char == 'J' - li $t9, 'D' # put char 'D' into t9 - beq $t9, $t2, isDoubleS # branch if result type char == 'D' - li $t9, 'F' # put char 'F' into t9 - beq $t9, $t2, isSingleS # branch if result type char == 'F' - addiu $t0, 4 # next_arg = curr_arg + 4 (in branch delay slot, - # for both, int and single) - - li $t5, 1 # skip a0 (ArtMethod*) - bne $t5, $t3, 1f # if (gpr_index == 1) - addiu $t5, 1 - LOAD_WORD_TO_REG a1, t0, t3, loopS # a1 = current argument, gpr_index++ -1: bne $t5, $t3, 2f # else if (gpr_index == 2) - addiu $t5, 1 - LOAD_WORD_TO_REG a2, t0, t3, loopS # a2 = current argument, gpr_index++ -2: bne $t5, $t3, loopS # else if (gpr_index == 3) - nop - LOAD_WORD_TO_REG a3, t0, t3, loopS # a3 = current argument, gpr_index++ + lbu $ra, 0($t9) # ra = shorty[i] + beqz $ra, loopEndS # finish getting args when shorty[i] == '\0' + addiu $t9, 1 + + addiu $ra, -'J' + beqz $ra, isLongS # branch if result type char == 'J' + addiu $ra, 'J' - 'D' + beqz $ra, isDoubleS # branch if result type char == 'D' + addiu $ra, 'D' - 'F' + beqz $ra, isSingleS # branch if result type char == 'F' + + addu $ra, $t2, $t6 + jalr $zero, $ra + addiu $t8, 4 # next_arg = curr_arg + 4 isLongS: - addiu $t0, 8 # next_arg = curr_arg + 8 - slti $t5, $t3, 3 - beqz $t5, 3f # if (gpr_index < 3) - nop - LOAD_LONG_TO_REG a2, a3, t0, t3, loopS # a2_a3 = curr_arg, gpr_index = 4 -3: b loopS # else - li $t3, 4 # gpr_index = 4 - -isDoubleS: - addiu $t0, 8 # next_arg = curr_arg + 8 - li $t5, 0 - bne $t5, $t4, 4f # if (fp_index == 0) - addiu $t5, 1 - LOAD_DOUBLE_TO_REG f12, f13, t0, t4, t9, loopS # f12_f13 = curr_arg, fp_index++ -4: bne $t5, $t4, loopS # else if (fp_index == 1) - nop - LOAD_DOUBLE_TO_REG f14, f15, t0, t4, t9, loopS # f14_f15 = curr_arg, fp_index++ + addu $ra, $t3, $t6 + jalr $zero, $ra + addiu $t8, 8 # next_arg = curr_arg + 8 isSingleS: - li $t5, 0 - bne $t5, $t4, 5f # if (fp_index == 0) - addiu $t5, 1 - LOAD_FLOAT_TO_REG f12, t0, t4, loopS # f12 = curr_arg, fp_index++ -5: bne $t5, $t4, loopS # else if (fp_index == 1) - nop - LOAD_FLOAT_TO_REG f14, t0, t4, loopS # f14 = curr_arg, fp_index++ + addu $ra, $t4, $t7 + jalr $zero, $ra + addiu $t8, 4 # next_arg = curr_arg + 4 + +isDoubleS: + addu $ra, $t5, $t7 +#if defined(__mips_isa_rev) && __mips_isa_rev > 2 + addiu $t7, 16 # fp_index += 16 didn't fit into LOAD_DOUBLE_TO_REG +#endif + jalr $zero, $ra + addiu $t8, 8 # next_arg = curr_arg + 8 loopEndS: lw $t9, ART_METHOD_QUICK_CODE_OFFSET_32($a0) # get pointer to the code @@ -1104,6 +1182,40 @@ loopEndS: SDu $f0, $f1, 0, $t0, $t1 # store floating point result jalr $zero, $ra nop + + // Note that gpr_index is kept within the range of tabIntS and tabLongS + // and fp_index is kept within the range of tabSingleS and tabDoubleS. + .balign 16 +tabIntS: + LOAD_WORD_TO_REG a1, t8, t6, loopS # a1 = current argument, gpr_index += 16 + LOAD_WORD_TO_REG a2, t8, t6, loopS # a2 = current argument, gpr_index += 16 + LOAD_WORD_TO_REG a3, t8, t6, loopS # a3 = current argument, gpr_index += 16 + LOAD_WORD_TO_REG t0, t8, t6, loopS # t0 = current argument, gpr_index += 16 + LOAD_WORD_TO_REG t1, t8, t6, loopS # t1 = current argument, gpr_index += 16 + LOAD_END t6, 5*16, loopS # no more GPR args, gpr_index = 5*16 +tabLongS: + LOAD_LONG_TO_REG a2, a3, t8, t6, 3*16, loopS # a2_a3 = curr_arg, gpr_index = 3*16 + LOAD_LONG_TO_REG a2, a3, t8, t6, 3*16, loopS # a2_a3 = curr_arg, gpr_index = 3*16 + LOAD_LONG_TO_REG t0, t1, t8, t6, 5*16, loopS # t0_t1 = curr_arg, gpr_index = 5*16 + LOAD_LONG_TO_REG t0, t1, t8, t6, 5*16, loopS # t0_t1 = curr_arg, gpr_index = 5*16 + LOAD_END t6, 5*16, loopS # no more GPR args, gpr_index = 5*16 + LOAD_END t6, 5*16, loopS # no more GPR args, gpr_index = 5*16 +tabSingleS: + LOAD_FLOAT_TO_REG f8, t8, t7, loopS # f8 = curr_arg, fp_index += 16 + LOAD_FLOAT_TO_REG f10, t8, t7, loopS # f10 = curr_arg, fp_index += 16 + LOAD_FLOAT_TO_REG f12, t8, t7, loopS # f12 = curr_arg, fp_index += 16 + LOAD_FLOAT_TO_REG f14, t8, t7, loopS # f14 = curr_arg, fp_index += 16 + LOAD_FLOAT_TO_REG f16, t8, t7, loopS # f16 = curr_arg, fp_index += 16 + LOAD_FLOAT_TO_REG f18, t8, t7, loopS # f18 = curr_arg, fp_index += 16 + LOAD_END t7, 6*16, loopS # no more FPR args, fp_index = 6*16 +tabDoubleS: + LOAD_DOUBLE_TO_REG f8, f9, t8, t7, ra, loopS # f8_f9 = curr_arg; if FPU32, fp_index += 16 + LOAD_DOUBLE_TO_REG f10, f11, t8, t7, ra, loopS # f10_f11 = curr_arg; if FPU32, fp_index += 16 + LOAD_DOUBLE_TO_REG f12, f13, t8, t7, ra, loopS # f12_f13 = curr_arg; if FPU32, fp_index += 16 + LOAD_DOUBLE_TO_REG f14, f15, t8, t7, ra, loopS # f14_f15 = curr_arg; if FPU32, fp_index += 16 + LOAD_DOUBLE_TO_REG f16, f17, t8, t7, ra, loopS # f16_f17 = curr_arg; if FPU32, fp_index += 16 + LOAD_DOUBLE_TO_REG f18, f19, t8, t7, ra, loopS # f18_f19 = curr_arg; if FPU32, fp_index += 16 + LOAD_END t7, 6*16, loopS # no more FPR args, fp_index = 6*16 END art_quick_invoke_static_stub #undef SPILL_SIZE @@ -1886,9 +1998,9 @@ ENTRY art_quick_proxy_invoke_handler la $t9, artQuickProxyInvokeHandler jalr $t9 # (Method* proxy method, receiver, Thread*, SP) addiu $a3, $sp, ARG_SLOT_SIZE # pass $sp (remove arg slots) - lw $t0, THREAD_EXCEPTION_OFFSET(rSELF) # load Thread::Current()->exception_ + lw $t7, THREAD_EXCEPTION_OFFSET(rSELF) # load Thread::Current()->exception_ RESTORE_SAVE_REFS_AND_ARGS_FRAME - bnez $t0, 1f + bnez $t7, 1f # don't care if $v0 and/or $v1 are modified, when exception branch taken MTD $v0, $v1, $f0, $f1 # move float value to return value jalr $zero, $ra @@ -1900,25 +2012,25 @@ END art_quick_proxy_invoke_handler /* * Called to resolve an imt conflict. * a0 is the conflict ArtMethod. - * t0 is a hidden argument that holds the target interface method's dex method index. + * t7 is a hidden argument that holds the target interface method's dex method index. * - * Note that this stub writes to a0, t0 and t1. + * Note that this stub writes to a0, t7 and t8. */ ENTRY art_quick_imt_conflict_trampoline - lw $t1, 0($sp) # Load referrer. - lw $t1, ART_METHOD_DEX_CACHE_METHODS_OFFSET_32($t1) # Load dex cache methods array. - sll $t0, $t0, POINTER_SIZE_SHIFT # Calculate offset. - addu $t0, $t1, $t0 # Add offset to base. - lw $t0, 0($t0) # Load interface method. + lw $t8, 0($sp) # Load referrer. + lw $t8, ART_METHOD_DEX_CACHE_METHODS_OFFSET_32($t8) # Load dex cache methods array. + sll $t7, $t7, POINTER_SIZE_SHIFT # Calculate offset. + addu $t7, $t8, $t7 # Add offset to base. + lw $t7, 0($t7) # Load interface method. lw $a0, ART_METHOD_JNI_OFFSET_32($a0) # Load ImtConflictTable. .Limt_table_iterate: - lw $t1, 0($a0) # Load next entry in ImtConflictTable. + lw $t8, 0($a0) # Load next entry in ImtConflictTable. # Branch if found. - beq $t1, $t0, .Limt_table_found + beq $t8, $t7, .Limt_table_found nop # If the entry is null, the interface method is not in the ImtConflictTable. - beqz $t1, .Lconflict_trampoline + beqz $t8, .Lconflict_trampoline nop # Iterate over the entries of the ImtConflictTable. b .Limt_table_iterate @@ -1928,7 +2040,7 @@ ENTRY art_quick_imt_conflict_trampoline # We successfully hit an entry in the table. Load the target method and jump to it. lw $a0, __SIZEOF_POINTER__($a0) lw $t9, ART_METHOD_QUICK_CODE_OFFSET_32($a0) - jr $t9 + jalr $zero, $t9 nop .Lconflict_trampoline: @@ -1972,7 +2084,7 @@ ENTRY art_quick_generic_jni_trampoline # The result of the call is: # v0: ptr to native code, 0 on error. # v1: ptr to the bottom of the used area of the alloca, can restore stack till here. - beq $v0, $zero, 1f # check entry error + beq $v0, $zero, 2f # check entry error move $t9, $v0 # save the code ptr move $sp, $v1 # release part of the alloca @@ -1980,10 +2092,22 @@ ENTRY art_quick_generic_jni_trampoline lw $a0, 0($sp) lw $a1, 4($sp) lw $a2, 8($sp) + lw $a3, 12($sp) + + # artQuickGenericJniTrampoline sets bit 0 of the native code address to 1 + # when the first two arguments are both single precision floats. This lets + # us extract them properly from the stack and load into floating point + # registers. + MTD $a0, $a1, $f12, $f13 + andi $t0, $t9, 1 + xor $t9, $t9, $t0 + bnez $t0, 1f + mtc1 $a1, $f14 + MTD $a2, $a3, $f14, $f15 - # Load FPRs the same as GPRs. Look at BuildNativeCallFrameStateMachine. +1: jalr $t9 # native call - lw $a3, 12($sp) + nop addiu $sp, $sp, 16 # remove arg slots move $gp, $s3 # restore $gp from $s3 @@ -1999,18 +2123,18 @@ ENTRY art_quick_generic_jni_trampoline s.d $f0, 16($sp) # pass result_f lw $t0, THREAD_EXCEPTION_OFFSET(rSELF) # load Thread::Current()->exception_ - bne $t0, $zero, 1f # check for pending exceptions + bne $t0, $zero, 2f # check for pending exceptions move $sp, $s8 # tear down the alloca - # tear dpown the callee-save frame + # tear down the callee-save frame RESTORE_SAVE_REFS_AND_ARGS_FRAME MTD $v0, $v1, $f0, $f1 # move float value to return value jalr $zero, $ra nop -1: +2: lw $sp, THREAD_TOP_QUICK_FRAME_OFFSET(rSELF) # This will create a new save-all frame, required by the runtime. DELIVER_PENDING_EXCEPTION @@ -2023,9 +2147,9 @@ ENTRY art_quick_to_interpreter_bridge la $t9, artQuickToInterpreterBridge jalr $t9 # (Method* method, Thread*, SP) addiu $a2, $sp, ARG_SLOT_SIZE # pass $sp (remove arg slots) - lw $t0, THREAD_EXCEPTION_OFFSET(rSELF) # load Thread::Current()->exception_ + lw $t7, THREAD_EXCEPTION_OFFSET(rSELF) # load Thread::Current()->exception_ RESTORE_SAVE_REFS_AND_ARGS_FRAME - bnez $t0, 1f + bnez $t7, 1f # don't care if $v0 and/or $v1 are modified, when exception branch taken MTD $v0, $v1, $f0, $f1 # move float value to return value jalr $zero, $ra diff --git a/runtime/arch/mips/quick_method_frame_info_mips.h b/runtime/arch/mips/quick_method_frame_info_mips.h index 90e7b203ac..6f16352d91 100644 --- a/runtime/arch/mips/quick_method_frame_info_mips.h +++ b/runtime/arch/mips/quick_method_frame_info_mips.h @@ -26,12 +26,13 @@ namespace art { namespace mips { static constexpr uint32_t kMipsCalleeSaveAlwaysSpills = - (1 << art::mips::RA); + (1u << art::mips::RA); static constexpr uint32_t kMipsCalleeSaveRefSpills = (1 << art::mips::S2) | (1 << art::mips::S3) | (1 << art::mips::S4) | (1 << art::mips::S5) | (1 << art::mips::S6) | (1 << art::mips::S7) | (1 << art::mips::GP) | (1 << art::mips::FP); static constexpr uint32_t kMipsCalleeSaveArgSpills = - (1 << art::mips::A1) | (1 << art::mips::A2) | (1 << art::mips::A3); + (1 << art::mips::A1) | (1 << art::mips::A2) | (1 << art::mips::A3) | (1 << art::mips::T0) | + (1 << art::mips::T1); static constexpr uint32_t kMipsCalleeSaveAllSpills = (1 << art::mips::S0) | (1 << art::mips::S1); static constexpr uint32_t kMipsCalleeSaveEverythingSpills = @@ -44,11 +45,13 @@ static constexpr uint32_t kMipsCalleeSaveEverythingSpills = static constexpr uint32_t kMipsCalleeSaveFpAlwaysSpills = 0; static constexpr uint32_t kMipsCalleeSaveFpRefSpills = 0; static constexpr uint32_t kMipsCalleeSaveFpArgSpills = - (1 << art::mips::F12) | (1 << art::mips::F13) | (1 << art::mips::F14) | (1 << art::mips::F15); + (1 << art::mips::F8) | (1 << art::mips::F9) | (1 << art::mips::F10) | (1 << art::mips::F11) | + (1 << art::mips::F12) | (1 << art::mips::F13) | (1 << art::mips::F14) | (1 << art::mips::F15) | + (1 << art::mips::F16) | (1 << art::mips::F17) | (1 << art::mips::F18) | (1 << art::mips::F19); static constexpr uint32_t kMipsCalleeSaveAllFPSpills = (1 << art::mips::F20) | (1 << art::mips::F21) | (1 << art::mips::F22) | (1 << art::mips::F23) | (1 << art::mips::F24) | (1 << art::mips::F25) | (1 << art::mips::F26) | (1 << art::mips::F27) | - (1 << art::mips::F28) | (1 << art::mips::F29) | (1 << art::mips::F30) | (1 << art::mips::F31); + (1 << art::mips::F28) | (1 << art::mips::F29) | (1 << art::mips::F30) | (1u << art::mips::F31); static constexpr uint32_t kMipsCalleeSaveFpEverythingSpills = (1 << art::mips::F0) | (1 << art::mips::F1) | (1 << art::mips::F2) | (1 << art::mips::F3) | (1 << art::mips::F4) | (1 << art::mips::F5) | (1 << art::mips::F6) | (1 << art::mips::F7) | @@ -57,7 +60,7 @@ static constexpr uint32_t kMipsCalleeSaveFpEverythingSpills = (1 << art::mips::F16) | (1 << art::mips::F17) | (1 << art::mips::F18) | (1 << art::mips::F19) | (1 << art::mips::F20) | (1 << art::mips::F21) | (1 << art::mips::F22) | (1 << art::mips::F23) | (1 << art::mips::F24) | (1 << art::mips::F25) | (1 << art::mips::F26) | (1 << art::mips::F27) | - (1 << art::mips::F28) | (1 << art::mips::F29) | (1 << art::mips::F30) | (1 << art::mips::F31); + (1 << art::mips::F28) | (1 << art::mips::F29) | (1 << art::mips::F30) | (1u << art::mips::F31); constexpr uint32_t MipsCalleeSaveCoreSpills(Runtime::CalleeSaveType type) { return kMipsCalleeSaveAlwaysSpills | kMipsCalleeSaveRefSpills | diff --git a/runtime/arch/mips/registers_mips.h b/runtime/arch/mips/registers_mips.h index ae01bd5d18..555f3f0473 100644 --- a/runtime/arch/mips/registers_mips.h +++ b/runtime/arch/mips/registers_mips.h @@ -35,9 +35,9 @@ enum Register { A1 = 5, A2 = 6, A3 = 7, - T0 = 8, // Temporaries. + T0 = 8, // Two extra arguments / temporaries. T1 = 9, - T2 = 10, + T2 = 10, // Temporaries. T3 = 11, T4 = 12, T5 = 13, @@ -100,7 +100,7 @@ enum FRegister { F29 = 29, F30 = 30, F31 = 31, - FTMP = F8, // scratch register + FTMP = F6, // scratch register kNumberOfFRegisters = 32, kNoFRegister = -1, }; diff --git a/runtime/arch/stub_test.cc b/runtime/arch/stub_test.cc index 6665897c9d..9e385f839f 100644 --- a/runtime/arch/stub_test.cc +++ b/runtime/arch/stub_test.cc @@ -355,7 +355,7 @@ class StubTest : public CommonRuntimeTest { "lw $a2, 8($sp)\n\t" "lw $t9, 12($sp)\n\t" "lw $s1, 16($sp)\n\t" - "lw $t0, 20($sp)\n\t" + "lw $t7, 20($sp)\n\t" "addiu $sp, $sp, 24\n\t" "jalr $t9\n\t" // Call the stub. diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc index fe82878699..bf1d4ea1a1 100644 --- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc +++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc @@ -134,13 +134,23 @@ class QuickArgumentVisitor { // | Method* | --- // | RA | // | ... | callee saves + // | T1 | arg5 + // | T0 | arg4 // | A3 | arg3 // | A2 | arg2 // | A1 | arg1 + // | F19 | + // | F18 | f_arg5 + // | F17 | + // | F16 | f_arg4 // | F15 | - // | F14 | f_arg1 + // | F14 | f_arg3 // | F13 | - // | F12 | f_arg0 + // | F12 | f_arg2 + // | F11 | + // | F10 | f_arg1 + // | F9 | + // | F8 | f_arg0 // | | padding // | A0/Method* | <- sp static constexpr bool kSplitPairAcrossRegisterAndStack = false; @@ -148,14 +158,14 @@ class QuickArgumentVisitor { static constexpr bool kQuickSoftFloatAbi = false; static constexpr bool kQuickDoubleRegAlignedFloatBackFilled = false; static constexpr bool kQuickSkipOddFpRegisters = true; - static constexpr size_t kNumQuickGprArgs = 3; // 3 arguments passed in GPRs. - static constexpr size_t kNumQuickFprArgs = 4; // 2 arguments passed in FPRs. Floats can be passed - // only in even numbered registers and each double - // occupies two registers. + static constexpr size_t kNumQuickGprArgs = 5; // 5 arguments passed in GPRs. + static constexpr size_t kNumQuickFprArgs = 12; // 6 arguments passed in FPRs. Floats can be + // passed only in even numbered registers and each + // double occupies two registers. static constexpr bool kGprFprLockstep = false; - static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Fpr1Offset = 16; // Offset of first FPR arg. - static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Gpr1Offset = 32; // Offset of first GPR arg. - static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_LrOffset = 76; // Offset of return address. + static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Fpr1Offset = 8; // Offset of first FPR arg. + static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Gpr1Offset = 56; // Offset of first GPR arg. + static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_LrOffset = 108; // Offset of return address. static size_t GprIndexToGprOffset(uint32_t gpr_index) { return gpr_index * GetBytesPerGprSpillLocation(kRuntimeISA); } @@ -187,7 +197,7 @@ class QuickArgumentVisitor { // | F12 | f_arg0 // | | padding // | A0/Method* | <- sp - // NOTE: for Mip64, when A0 is skipped, F0 is also skipped. + // NOTE: for Mip64, when A0 is skipped, F12 is also skipped. static constexpr bool kSplitPairAcrossRegisterAndStack = false; static constexpr bool kAlignPairRegister = false; static constexpr bool kQuickSoftFloatAbi = false; @@ -197,7 +207,7 @@ class QuickArgumentVisitor { static constexpr size_t kNumQuickFprArgs = 7; // 7 arguments passed in FPRs. static constexpr bool kGprFprLockstep = true; - static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Fpr1Offset = 24; // Offset of first FPR arg (F1). + static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Fpr1Offset = 24; // Offset of first FPR arg (F13). static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Gpr1Offset = 80; // Offset of first GPR arg (A1). static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_LrOffset = 200; // Offset of return address. static size_t GprIndexToGprOffset(uint32_t gpr_index) { @@ -501,10 +511,16 @@ class QuickArgumentVisitor { case Primitive::kPrimDouble: case Primitive::kPrimLong: if (kQuickSoftFloatAbi || (cur_type_ == Primitive::kPrimLong)) { - if (cur_type_ == Primitive::kPrimLong && kAlignPairRegister && gpr_index_ == 0) { - // Currently, this is only for ARM and MIPS, where the first available parameter - // register is R1 (on ARM) or A1 (on MIPS). So we skip it, and use R2 (on ARM) or - // A2 (on MIPS) instead. + if (cur_type_ == Primitive::kPrimLong && +#if defined(__mips__) && !defined(__LP64__) + (gpr_index_ == 0 || gpr_index_ == 2) && +#else + gpr_index_ == 0 && +#endif + kAlignPairRegister) { + // Currently, this is only for ARM and MIPS, where we align long parameters with + // even-numbered registers by skipping R1 (on ARM) or A1(A3) (on MIPS) and using + // R2 (on ARM) or A2(T0) (on MIPS) instead. IncGprIndex(); } is_split_long_or_double_ = (GetBytesPerGprSpillLocation(kRuntimeISA) == 4) && @@ -2086,6 +2102,41 @@ extern "C" TwoWordReturn artQuickGenericJniTrampoline(Thread* self, ArtMethod** // Note that the native code pointer will be automatically set by artFindNativeMethod(). } +#if defined(__mips__) && !defined(__LP64__) + // On MIPS32 if the first two arguments are floating-point, we need to know their types + // so that art_quick_generic_jni_trampoline can correctly extract them from the stack + // and load into floating-point registers. + // Possible arrangements of first two floating-point arguments on the stack (32-bit FPU + // view): + // (1) + // | DOUBLE | DOUBLE | other args, if any + // | F12 | F13 | F14 | F15 | + // | SP+0 | SP+4 | SP+8 | SP+12 | SP+16 + // (2) + // | DOUBLE | FLOAT | (PAD) | other args, if any + // | F12 | F13 | F14 | | + // | SP+0 | SP+4 | SP+8 | SP+12 | SP+16 + // (3) + // | FLOAT | (PAD) | DOUBLE | other args, if any + // | F12 | | F14 | F15 | + // | SP+0 | SP+4 | SP+8 | SP+12 | SP+16 + // (4) + // | FLOAT | FLOAT | other args, if any + // | F12 | F14 | + // | SP+0 | SP+4 | SP+8 + // As you can see, only the last case (4) is special. In all others we can just + // load F12/F13 and F14/F15 in the same manner. + // Set bit 0 of the native code address to 1 in this case (valid code addresses + // are always a multiple of 4 on MIPS32, so we have 2 spare bits available). + if (nativeCode != nullptr && + shorty != nullptr && + shorty_len >= 3 && + shorty[1] == 'F' && + shorty[2] == 'F') { + nativeCode = reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(nativeCode) | 1); + } +#endif + // Return native code addr(lo) and bottom of alloca address(hi). return GetTwoWordSuccessValue(reinterpret_cast<uintptr_t>(visitor.GetBottomOfUsedArea()), reinterpret_cast<uintptr_t>(nativeCode)); |