diff options
author | 2017-10-23 11:00:32 -0700 | |
---|---|---|
committer | 2017-10-23 11:00:32 -0700 | |
commit | 715f43e1553330bc804cea2951be195473dc343d (patch) | |
tree | 55e143005efe10e8448c91eff6b88a635af2a3f6 | |
parent | 9e842d3e7d6102d964178e36e5d596ca91895147 (diff) |
MIPS32: Improve stack alignment, use sdc1/ldc1, where possible.
- Ensure that SP is a multiple of 16 at all times, and
- Use ldc1/sdc1 to load/store FPU registers from/to 8-byte-aligned
locations wherever possible.
Use `export ART_MIPS32_CHECK_ALIGNMENT=true` when building Android
to enable the new runtime alignment checks.
Test: Boot & run tests on 32-bit version of QEMU, and CI-20.
Test: test/testrunner/testrunner.py --target --optimizing --32
Test: test-art-host-gtest
Test: test-art-target-gtest
Change-Id: Ia667004573f419fd006098fcfadf5834239cb485
-rw-r--r-- | build/art.go | 5 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_mips.cc | 2 | ||||
-rw-r--r-- | compiler/optimizing/emit_swap_mips_test.cc | 32 | ||||
-rw-r--r-- | compiler/optimizing/optimizing_cfi_test_expected.inc | 12 | ||||
-rw-r--r-- | compiler/utils/mips/assembler_mips.cc | 10 | ||||
-rw-r--r-- | compiler/utils/mips/assembler_mips_test.cc | 8 | ||||
-rw-r--r-- | runtime/arch/mips/asm_support_mips.S | 26 | ||||
-rw-r--r-- | runtime/arch/mips/asm_support_mips.h | 2 | ||||
-rw-r--r-- | runtime/arch/mips/context_mips.cc | 15 | ||||
-rw-r--r-- | runtime/arch/mips/jni_entrypoints_mips.S | 10 | ||||
-rw-r--r-- | runtime/arch/mips/quick_entrypoints_mips.S | 214 | ||||
-rw-r--r-- | runtime/arch/mips/quick_method_frame_info_mips.h | 18 |
12 files changed, 214 insertions, 140 deletions
diff --git a/build/art.go b/build/art.go index 1bcaf51a1d..452b3485a3 100644 --- a/build/art.go +++ b/build/art.go @@ -97,6 +97,11 @@ func globalFlags(ctx android.BaseContext) ([]string, []string) { asflags = append(asflags, "-DART_ENABLE_ADDRESS_SANITIZER=1") } + if envTrue(ctx, "ART_MIPS32_CHECK_ALIGNMENT") { + // Enable the use of MIPS32 CHECK_ALIGNMENT macro for debugging purposes + asflags = append(asflags, "-DART_MIPS32_CHECK_ALIGNMENT") + } + return cflags, asflags } diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc index 3ba107a283..2f65e8c958 100644 --- a/compiler/optimizing/code_generator_mips.cc +++ b/compiler/optimizing/code_generator_mips.cc @@ -1300,7 +1300,7 @@ void ParallelMoveResolverMIPS::Exchange(int index1, int index2, bool double_slot // automatically unspilled when the scratch scope object is destroyed). ScratchRegisterScope ensure_scratch(this, TMP, V0, codegen_->GetNumberOfCoreRegisters()); // If V0 spills onto the stack, SP-relative offsets need to be adjusted. - int stack_offset = ensure_scratch.IsSpilled() ? kMipsWordSize : 0; + int stack_offset = ensure_scratch.IsSpilled() ? kStackAlignment : 0; for (int i = 0; i <= (double_slot ? 1 : 0); i++, stack_offset += kMipsWordSize) { __ LoadFromOffset(kLoadWord, Register(ensure_scratch.GetRegister()), diff --git a/compiler/optimizing/emit_swap_mips_test.cc b/compiler/optimizing/emit_swap_mips_test.cc index 36e932c67a..b63914faf7 100644 --- a/compiler/optimizing/emit_swap_mips_test.cc +++ b/compiler/optimizing/emit_swap_mips_test.cc @@ -238,14 +238,14 @@ TEST_F(EmitSwapMipsTest, TwoStackSlots) { DataType::Type::kInt32, nullptr); const char* expected = - "addiu $sp, $sp, -4\n" + "addiu $sp, $sp, -16\n" "sw $v0, 0($sp)\n" - "lw $v0, 56($sp)\n" - "lw $t8, 52($sp)\n" - "sw $v0, 52($sp)\n" - "sw $t8, 56($sp)\n" + "lw $v0, 68($sp)\n" + "lw $t8, 64($sp)\n" + "sw $v0, 64($sp)\n" + "sw $t8, 68($sp)\n" "lw $v0, 0($sp)\n" - "addiu $sp, $sp, 4\n"; + "addiu $sp, $sp, 16\n"; DriverWrapper(moves_, expected, "TwoStackSlots"); } @@ -261,18 +261,18 @@ TEST_F(EmitSwapMipsTest, TwoDoubleStackSlots) { DataType::Type::kInt64, nullptr); const char* expected = - "addiu $sp, $sp, -4\n" + "addiu $sp, $sp, -16\n" "sw $v0, 0($sp)\n" - "lw $v0, 60($sp)\n" - "lw $t8, 52($sp)\n" - "sw $v0, 52($sp)\n" - "sw $t8, 60($sp)\n" - "lw $v0, 64($sp)\n" - "lw $t8, 56($sp)\n" - "sw $v0, 56($sp)\n" - "sw $t8, 64($sp)\n" + "lw $v0, 72($sp)\n" + "lw $t8, 64($sp)\n" + "sw $v0, 64($sp)\n" + "sw $t8, 72($sp)\n" + "lw $v0, 76($sp)\n" + "lw $t8, 68($sp)\n" + "sw $v0, 68($sp)\n" + "sw $t8, 76($sp)\n" "lw $v0, 0($sp)\n" - "addiu $sp, $sp, 4\n"; + "addiu $sp, $sp, 16\n"; DriverWrapper(moves_, expected, "TwoDoubleStackSlots"); } diff --git a/compiler/optimizing/optimizing_cfi_test_expected.inc b/compiler/optimizing/optimizing_cfi_test_expected.inc index fde55cb92f..1e82c4b0f7 100644 --- a/compiler/optimizing/optimizing_cfi_test_expected.inc +++ b/compiler/optimizing/optimizing_cfi_test_expected.inc @@ -330,10 +330,10 @@ static constexpr uint8_t expected_cfi_kThumb2_adjust[] = { static constexpr uint8_t expected_asm_kMips_adjust_head[] = { 0xC0, 0xFF, 0xBD, 0x27, 0x3C, 0x00, 0xBF, 0xAF, 0x38, 0x00, 0xB1, 0xAF, 0x34, 0x00, 0xB0, 0xAF, 0x28, 0x00, 0xB6, 0xF7, 0x20, 0x00, 0xB4, 0xF7, - 0x08, 0x00, 0x80, 0x14, 0xFC, 0xFF, 0xBD, 0x27, + 0x08, 0x00, 0x80, 0x14, 0xF0, 0xFF, 0xBD, 0x27, 0x00, 0x00, 0xBF, 0xAF, 0x00, 0x00, 0x10, 0x04, 0x02, 0x00, 0x01, 0x3C, 0x18, 0x00, 0x21, 0x34, 0x21, 0x08, 0x3F, 0x00, 0x00, 0x00, 0xBF, 0x8F, - 0x09, 0x00, 0x20, 0x00, 0x04, 0x00, 0xBD, 0x27, + 0x09, 0x00, 0x20, 0x00, 0x10, 0x00, 0xBD, 0x27, }; static constexpr uint8_t expected_asm_kMips_adjust_tail[] = { 0x3C, 0x00, 0xBF, 0x8F, 0x38, 0x00, 0xB1, 0x8F, 0x34, 0x00, 0xB0, 0x8F, @@ -342,7 +342,7 @@ static constexpr uint8_t expected_asm_kMips_adjust_tail[] = { }; static constexpr uint8_t expected_cfi_kMips_adjust[] = { 0x44, 0x0E, 0x40, 0x44, 0x9F, 0x01, 0x44, 0x91, 0x02, 0x44, 0x90, 0x03, - 0x50, 0x0E, 0x44, 0x60, 0x0E, 0x40, 0x04, 0x04, 0x00, 0x02, 0x00, 0x0A, + 0x50, 0x0E, 0x50, 0x60, 0x0E, 0x40, 0x04, 0x04, 0x00, 0x02, 0x00, 0x0A, 0x44, 0xDF, 0x44, 0xD1, 0x44, 0xD0, 0x50, 0x0E, 0x00, 0x0B, 0x0E, 0x40, }; // 0x00000000: addiu sp, sp, -64 @@ -356,8 +356,8 @@ static constexpr uint8_t expected_cfi_kMips_adjust[] = { // 0x00000010: sdc1 f22, +40(sp) // 0x00000014: sdc1 f20, +32(sp) // 0x00000018: bnez a0, 0x0000003c ; +36 -// 0x0000001c: addiu sp, sp, -4 -// 0x00000020: .cfi_def_cfa_offset: 68 +// 0x0000001c: addiu sp, sp, -16 +// 0x00000020: .cfi_def_cfa_offset: 80 // 0x00000020: sw ra, +0(sp) // 0x00000024: nal // 0x00000028: lui at, 2 @@ -365,7 +365,7 @@ static constexpr uint8_t expected_cfi_kMips_adjust[] = { // 0x00000030: addu at, at, ra // 0x00000034: lw ra, +0(sp) // 0x00000038: jr at -// 0x0000003c: addiu sp, sp, 4 +// 0x0000003c: addiu sp, sp, 16 // 0x00000040: .cfi_def_cfa_offset: 64 // 0x00000040: nop // ... diff --git a/compiler/utils/mips/assembler_mips.cc b/compiler/utils/mips/assembler_mips.cc index cbb2c0ea47..9545ca6869 100644 --- a/compiler/utils/mips/assembler_mips.cc +++ b/compiler/utils/mips/assembler_mips.cc @@ -1863,20 +1863,20 @@ void MipsAssembler::Not(Register rd, Register rs) { } void MipsAssembler::Push(Register rs) { - IncreaseFrameSize(kMipsWordSize); + IncreaseFrameSize(kStackAlignment); Sw(rs, SP, 0); } void MipsAssembler::Pop(Register rd) { Lw(rd, SP, 0); - DecreaseFrameSize(kMipsWordSize); + DecreaseFrameSize(kStackAlignment); } void MipsAssembler::PopAndReturn(Register rd, Register rt) { bool reordering = SetReorder(false); Lw(rd, SP, 0); Jr(rt); - DecreaseFrameSize(kMipsWordSize); // Single instruction in delay slot. + DecreaseFrameSize(kStackAlignment); // Single instruction in delay slot. SetReorder(reordering); } @@ -4588,7 +4588,7 @@ void MipsAssembler::EmitBranch(uint32_t branch_id) { Addu(AT, AT, RA); Lw(RA, SP, 0); Jr(AT); - DecreaseFrameSize(kMipsWordSize); + DecreaseFrameSize(kStackAlignment); break; case Branch::kLongCondBranch: // The comment on case 'Branch::kLongUncondBranch' applies here as well. @@ -4608,7 +4608,7 @@ void MipsAssembler::EmitBranch(uint32_t branch_id) { Addu(AT, AT, RA); Lw(RA, SP, 0); Jr(AT); - DecreaseFrameSize(kMipsWordSize); + DecreaseFrameSize(kStackAlignment); break; case Branch::kLongCall: DCHECK_NE(delayed_instruction, Branch::kUnfillableDelaySlot); diff --git a/compiler/utils/mips/assembler_mips_test.cc b/compiler/utils/mips/assembler_mips_test.cc index 9397be4c09..b027d3a549 100644 --- a/compiler/utils/mips/assembler_mips_test.cc +++ b/compiler/utils/mips/assembler_mips_test.cc @@ -2803,7 +2803,7 @@ TEST_F(AssemblerMIPSTest, LongBranchReorder) { oss << ".set noreorder\n" "addiu $t0, $t1, 0x5678\n" - "addiu $sp, $sp, -4\n" + "addiu $sp, $sp, -16\n" "sw $ra, 0($sp)\n" "bltzal $zero, .+4\n" "lui $at, 0x" << std::hex << High16Bits(offset_forward) << "\n" @@ -2811,11 +2811,11 @@ TEST_F(AssemblerMIPSTest, LongBranchReorder) { "addu $at, $at, $ra\n" "lw $ra, 0($sp)\n" "jalr $zero, $at\n" - "addiu $sp, $sp, 4\n" << + "addiu $sp, $sp, 16\n" << RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") << RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") << "addiu $t0, $t1, 0x5678\n" - "addiu $sp, $sp, -4\n" + "addiu $sp, $sp, -16\n" "sw $ra, 0($sp)\n" "bltzal $zero, .+4\n" "lui $at, 0x" << std::hex << High16Bits(offset_back) << "\n" @@ -2823,7 +2823,7 @@ TEST_F(AssemblerMIPSTest, LongBranchReorder) { "addu $at, $at, $ra\n" "lw $ra, 0($sp)\n" "jalr $zero, $at\n" - "addiu $sp, $sp, 4\n"; + "addiu $sp, $sp, 16\n"; std::string expected = oss.str(); DriverStr(expected, "LongBranchReorder"); EXPECT_EQ(__ GetLabelLocation(&patcher_label1), 0 * 4u); diff --git a/runtime/arch/mips/asm_support_mips.S b/runtime/arch/mips/asm_support_mips.S index 50095ae77e..fa51059d3a 100644 --- a/runtime/arch/mips/asm_support_mips.S +++ b/runtime/arch/mips/asm_support_mips.S @@ -173,4 +173,30 @@ .set pop .endm +// This utility macro is used to check whether the address contained in +// a register is suitably aligned. Default usage is confirm that the +// address stored in $sp is a multiple of 16. It can be used for other +// alignments, and for other base address registers, if needed. +// +// Enable this macro by running the shell command: +// +// export ART_MIPS32_CHECK_ALIGNMENT=true +// +// NOTE: The value of alignment must be a power of 2, and must fit in an +// unsigned 15-bit integer. The macro won't behave as expected if these +// conditions aren't met. +// +.macro CHECK_ALIGNMENT ba=$sp, tmp=$at, alignment=16 +#ifdef ART_MIPS32_CHECK_ALIGNMENT + .set push + .set noat + .set noreorder + andi \tmp, \ba, \alignment-1 + beqz \tmp, .+12 # Skip break instruction if base address register (ba) is aligned + nop + break + .set pop +#endif +.endm + #endif // ART_RUNTIME_ARCH_MIPS_ASM_SUPPORT_MIPS_S_ diff --git a/runtime/arch/mips/asm_support_mips.h b/runtime/arch/mips/asm_support_mips.h index 2edd63f58a..bec52384ac 100644 --- a/runtime/arch/mips/asm_support_mips.h +++ b/runtime/arch/mips/asm_support_mips.h @@ -19,7 +19,7 @@ #include "asm_support.h" -#define FRAME_SIZE_SAVE_ALL_CALLEE_SAVES 96 +#define FRAME_SIZE_SAVE_ALL_CALLEE_SAVES 112 #define FRAME_SIZE_SAVE_REFS_ONLY 48 #define FRAME_SIZE_SAVE_REFS_AND_ARGS 112 #define FRAME_SIZE_SAVE_EVERYTHING 256 diff --git a/runtime/arch/mips/context_mips.cc b/runtime/arch/mips/context_mips.cc index ca1de0ae2a..3f362de7ce 100644 --- a/runtime/arch/mips/context_mips.cc +++ b/runtime/arch/mips/context_mips.cc @@ -42,7 +42,16 @@ void MipsContext::FillCalleeSaves(uint8_t* frame, const QuickMethodFrameInfo& fr // Core registers come first, from the highest down to the lowest. for (uint32_t core_reg : HighToLowBits(frame_info.CoreSpillMask())) { - gprs_[core_reg] = CalleeSaveAddress(frame, spill_pos, frame_info.FrameSizeInBytes()); + // If the $ZERO register shows up in the list of registers to + // be saved this was only done to properly align the floating + // point register save locations to addresses which are + // multiples of 8. We only store the address of a register in + // gprs_ if the register is not the $ZERO register. The $ZERO + // register is read-only so there's never a reason to save it + // on the stack. + if (core_reg != 0u) { + gprs_[core_reg] = CalleeSaveAddress(frame, spill_pos, frame_info.FrameSizeInBytes()); + } ++spill_pos; } DCHECK_EQ(spill_pos, POPCOUNT(frame_info.CoreSpillMask())); @@ -97,7 +106,9 @@ extern "C" NO_RETURN void art_quick_do_long_jump(uint32_t*, uint32_t*); void MipsContext::DoLongJump() { uintptr_t gprs[kNumberOfCoreRegisters]; - uint32_t fprs[kNumberOfFRegisters]; + // Align fprs[] so that art_quick_do_long_jump() can load FPU + // registers from it using the ldc1 instruction. + uint32_t fprs[kNumberOfFRegisters] __attribute__((aligned(8))); for (size_t i = 0; i < kNumberOfCoreRegisters; ++i) { gprs[i] = gprs_[i] != nullptr ? *gprs_[i] : MipsContext::kBadGprBase + i; } diff --git a/runtime/arch/mips/jni_entrypoints_mips.S b/runtime/arch/mips/jni_entrypoints_mips.S index 5c950717c4..2c0e75090d 100644 --- a/runtime/arch/mips/jni_entrypoints_mips.S +++ b/runtime/arch/mips/jni_entrypoints_mips.S @@ -28,8 +28,9 @@ ENTRY art_jni_dlsym_lookup_stub .cfi_adjust_cfa_offset 48 sw $ra, 32($sp) .cfi_rel_offset 31, 32 - SDu $f14, $f15, 24, $sp, $t0 - SDu $f12, $f13, 16, $sp, $t0 + CHECK_ALIGNMENT $sp, $t0 + sdc1 $f14, 24($sp) + sdc1 $f12, 16($sp) sw $a3, 12($sp) .cfi_rel_offset 7, 12 sw $a2, 8($sp) @@ -45,8 +46,9 @@ ENTRY art_jni_dlsym_lookup_stub lw $a1, 4($sp) lw $a2, 8($sp) lw $a3, 12($sp) - LDu $f12, $f13, 16, $sp, $t0 - LDu $f14, $f15, 24, $sp, $t0 + CHECK_ALIGNMENT $sp, $t0 + ldc1 $f12, 16($sp) + ldc1 $f14, 24($sp) lw $ra, 32($sp) beq $v0, $zero, .Lno_native_code_found addiu $sp, $sp, 48 # restore the stack diff --git a/runtime/arch/mips/quick_entrypoints_mips.S b/runtime/arch/mips/quick_entrypoints_mips.S index f6204bd8b6..ee3f17d06a 100644 --- a/runtime/arch/mips/quick_entrypoints_mips.S +++ b/runtime/arch/mips/quick_entrypoints_mips.S @@ -37,45 +37,49 @@ * Reserves FRAME_SIZE_SAVE_ALL_CALLEE_SAVES + ARG_SLOT_SIZE bytes on the stack */ .macro SETUP_SAVE_ALL_CALLEE_SAVES_FRAME - addiu $sp, $sp, -96 - .cfi_adjust_cfa_offset 96 + addiu $sp, $sp, -112 + .cfi_adjust_cfa_offset 112 // Ugly compile-time check, but we only have the preprocessor. -#if (FRAME_SIZE_SAVE_ALL_CALLEE_SAVES != 96) +#if (FRAME_SIZE_SAVE_ALL_CALLEE_SAVES != 112) #error "FRAME_SIZE_SAVE_ALL_CALLEE_SAVES(MIPS) size not as expected." #endif - sw $ra, 92($sp) - .cfi_rel_offset 31, 92 - sw $s8, 88($sp) - .cfi_rel_offset 30, 88 - sw $gp, 84($sp) - .cfi_rel_offset 28, 84 - sw $s7, 80($sp) - .cfi_rel_offset 23, 80 - sw $s6, 76($sp) - .cfi_rel_offset 22, 76 - sw $s5, 72($sp) - .cfi_rel_offset 21, 72 - sw $s4, 68($sp) - .cfi_rel_offset 20, 68 - sw $s3, 64($sp) - .cfi_rel_offset 19, 64 - sw $s2, 60($sp) - .cfi_rel_offset 18, 60 - sw $s1, 56($sp) - .cfi_rel_offset 17, 56 - sw $s0, 52($sp) - .cfi_rel_offset 16, 52 - - SDu $f30, $f31, 44, $sp, $t1 - SDu $f28, $f29, 36, $sp, $t1 - SDu $f26, $f27, 28, $sp, $t1 - SDu $f24, $f25, 20, $sp, $t1 - SDu $f22, $f23, 12, $sp, $t1 - SDu $f20, $f21, 4, $sp, $t1 - - # 1 word for holding Method* + sw $ra, 108($sp) + .cfi_rel_offset 31, 108 + sw $s8, 104($sp) + .cfi_rel_offset 30, 104 + sw $gp, 100($sp) + .cfi_rel_offset 28, 100 + sw $s7, 96($sp) + .cfi_rel_offset 23, 96 + sw $s6, 92($sp) + .cfi_rel_offset 22, 92 + sw $s5, 88($sp) + .cfi_rel_offset 21, 88 + sw $s4, 84($sp) + .cfi_rel_offset 20, 84 + sw $s3, 80($sp) + .cfi_rel_offset 19, 80 + sw $s2, 76($sp) + .cfi_rel_offset 18, 76 + sw $s1, 72($sp) + .cfi_rel_offset 17, 72 + sw $s0, 68($sp) + .cfi_rel_offset 16, 68 + // 4-byte placeholder for register $zero, serving for alignment + // of the following double precision floating point registers. + + CHECK_ALIGNMENT $sp, $t1 + sdc1 $f30, 56($sp) + sdc1 $f28, 48($sp) + sdc1 $f26, 40($sp) + sdc1 $f24, 32($sp) + sdc1 $f22, 24($sp) + sdc1 $f20, 16($sp) + + # 1 word for holding Method* plus 12 bytes padding to keep contents of SP + # a multiple of 16. lw $t0, %got(_ZN3art7Runtime9instance_E)($gp) lw $t0, 0($t0) @@ -216,12 +220,13 @@ .cfi_rel_offset 6, 60 sw $a1, 56($sp) .cfi_rel_offset 5, 56 - SDu $f18, $f19, 48, $sp, $t8 - SDu $f16, $f17, 40, $sp, $t8 - SDu $f14, $f15, 32, $sp, $t8 - SDu $f12, $f13, 24, $sp, $t8 - SDu $f10, $f11, 16, $sp, $t8 - SDu $f8, $f9, 8, $sp, $t8 + CHECK_ALIGNMENT $sp, $t8 + sdc1 $f18, 48($sp) + sdc1 $f16, 40($sp) + sdc1 $f14, 32($sp) + sdc1 $f12, 24($sp) + sdc1 $f10, 16($sp) + sdc1 $f8, 8($sp) # bottom will hold Method* .endm @@ -320,12 +325,13 @@ lw $a2, 60($sp) .cfi_restore 6 RESTORE_SAVE_REFS_AND_ARGS_FRAME_A1 - LDu $f18, $f19, 48, $sp, $t8 - LDu $f16, $f17, 40, $sp, $t8 - LDu $f14, $f15, 32, $sp, $t8 - LDu $f12, $f13, 24, $sp, $t8 - LDu $f10, $f11, 16, $sp, $t8 - LDu $f8, $f9, 8, $sp, $t8 + CHECK_ALIGNMENT $sp, $t8 + ldc1 $f18, 48($sp) + ldc1 $f16, 40($sp) + ldc1 $f14, 32($sp) + ldc1 $f12, 24($sp) + ldc1 $f10, 16($sp) + ldc1 $f8, 8($sp) addiu $sp, $sp, 112 # Pop frame. .cfi_adjust_cfa_offset -112 .endm @@ -412,22 +418,23 @@ 1: .cpload $ra - SDu $f30, $f31, 136, $sp, $t1 - SDu $f28, $f29, 128, $sp, $t1 - SDu $f26, $f27, 120, $sp, $t1 - SDu $f24, $f25, 112, $sp, $t1 - SDu $f22, $f23, 104, $sp, $t1 - SDu $f20, $f21, 96, $sp, $t1 - SDu $f18, $f19, 88, $sp, $t1 - SDu $f16, $f17, 80, $sp, $t1 - SDu $f14, $f15, 72, $sp, $t1 - SDu $f12, $f13, 64, $sp, $t1 - SDu $f10, $f11, 56, $sp, $t1 - SDu $f8, $f9, 48, $sp, $t1 - SDu $f6, $f7, 40, $sp, $t1 - SDu $f4, $f5, 32, $sp, $t1 - SDu $f2, $f3, 24, $sp, $t1 - SDu $f0, $f1, 16, $sp, $t1 + CHECK_ALIGNMENT $sp, $t1 + sdc1 $f30, 136($sp) + sdc1 $f28, 128($sp) + sdc1 $f26, 120($sp) + sdc1 $f24, 112($sp) + sdc1 $f22, 104($sp) + sdc1 $f20, 96($sp) + sdc1 $f18, 88($sp) + sdc1 $f16, 80($sp) + sdc1 $f14, 72($sp) + sdc1 $f12, 64($sp) + sdc1 $f10, 56($sp) + sdc1 $f8, 48($sp) + sdc1 $f6, 40($sp) + sdc1 $f4, 32($sp) + sdc1 $f2, 24($sp) + sdc1 $f0, 16($sp) # 3 words padding and 1 word for holding Method* @@ -460,22 +467,23 @@ addiu $sp, $sp, ARG_SLOT_SIZE # remove argument slots on the stack .cfi_adjust_cfa_offset -ARG_SLOT_SIZE - LDu $f30, $f31, 136, $sp, $t1 - LDu $f28, $f29, 128, $sp, $t1 - LDu $f26, $f27, 120, $sp, $t1 - LDu $f24, $f25, 112, $sp, $t1 - LDu $f22, $f23, 104, $sp, $t1 - LDu $f20, $f21, 96, $sp, $t1 - LDu $f18, $f19, 88, $sp, $t1 - LDu $f16, $f17, 80, $sp, $t1 - LDu $f14, $f15, 72, $sp, $t1 - LDu $f12, $f13, 64, $sp, $t1 - LDu $f10, $f11, 56, $sp, $t1 - LDu $f8, $f9, 48, $sp, $t1 - LDu $f6, $f7, 40, $sp, $t1 - LDu $f4, $f5, 32, $sp, $t1 - LDu $f2, $f3, 24, $sp, $t1 - LDu $f0, $f1, 16, $sp, $t1 + CHECK_ALIGNMENT $sp, $t1 + ldc1 $f30, 136($sp) + ldc1 $f28, 128($sp) + ldc1 $f26, 120($sp) + ldc1 $f24, 112($sp) + ldc1 $f22, 104($sp) + ldc1 $f20, 96($sp) + ldc1 $f18, 88($sp) + ldc1 $f16, 80($sp) + ldc1 $f14, 72($sp) + ldc1 $f12, 64($sp) + ldc1 $f10, 56($sp) + ldc1 $f8, 48($sp) + ldc1 $f6, 40($sp) + ldc1 $f4, 32($sp) + ldc1 $f2, 24($sp) + ldc1 $f0, 16($sp) lw $ra, 252($sp) .cfi_restore 31 @@ -665,7 +673,8 @@ ENTRY art_quick_osr_stub b .Losr_exit sw $v1, 4($a2) # store v0/v1 into result .Losr_fp_result: - SDu $f0, $f1, 0, $a2, $t0 # store f0/f1 into result + CHECK_ALIGNMENT $a2, $t0, 8 + sdc1 $f0, 0($a2) # store f0/f1 into result .Losr_exit: lw $ra, 44($sp) .cfi_restore 31 @@ -701,26 +710,28 @@ ENTRY art_quick_osr_stub END art_quick_osr_stub /* - * On entry $a0 is uint32_t* gprs_ and $a1 is uint32_t* fprs_ + * On entry $a0 is uint32_t* gprs_ and $a1 is uint32_t* fprs_. + * Note that fprs_ is expected to be an address that is a multiple of 8. * FIXME: just guessing about the shape of the jmpbuf. Where will pc be? */ ENTRY art_quick_do_long_jump - LDu $f0, $f1, 0*8, $a1, $t1 - LDu $f2, $f3, 1*8, $a1, $t1 - LDu $f4, $f5, 2*8, $a1, $t1 - LDu $f6, $f7, 3*8, $a1, $t1 - LDu $f8, $f9, 4*8, $a1, $t1 - LDu $f10, $f11, 5*8, $a1, $t1 - LDu $f12, $f13, 6*8, $a1, $t1 - LDu $f14, $f15, 7*8, $a1, $t1 - LDu $f16, $f17, 8*8, $a1, $t1 - LDu $f18, $f19, 9*8, $a1, $t1 - LDu $f20, $f21, 10*8, $a1, $t1 - LDu $f22, $f23, 11*8, $a1, $t1 - LDu $f24, $f25, 12*8, $a1, $t1 - LDu $f26, $f27, 13*8, $a1, $t1 - LDu $f28, $f29, 14*8, $a1, $t1 - LDu $f30, $f31, 15*8, $a1, $t1 + CHECK_ALIGNMENT $a1, $t1, 8 + ldc1 $f0, 0*8($a1) + ldc1 $f2, 1*8($a1) + ldc1 $f4, 2*8($a1) + ldc1 $f6, 3*8($a1) + ldc1 $f8, 4*8($a1) + ldc1 $f10, 5*8($a1) + ldc1 $f12, 6*8($a1) + ldc1 $f14, 7*8($a1) + ldc1 $f16, 8*8($a1) + ldc1 $f18, 9*8($a1) + ldc1 $f20, 10*8($a1) + ldc1 $f22, 11*8($a1) + ldc1 $f24, 12*8($a1) + ldc1 $f26, 13*8($a1) + ldc1 $f28, 14*8($a1) + ldc1 $f30, 15*8($a1) .set push .set nomacro @@ -1067,7 +1078,8 @@ loopEnd: jalr $zero, $ra sw $v1, 4($t0) # store the other half of the result 5: - SDu $f0, $f1, 0, $t0, $t1 # store floating point result + CHECK_ALIGNMENT $t0, $t1, 8 + sdc1 $f0, 0($t0) # store floating point result jalr $zero, $ra nop @@ -1225,7 +1237,8 @@ loopEndS: jalr $zero, $ra sw $v1, 4($t0) # store the other half of the result 6: - SDu $f0, $f1, 0, $t0, $t1 # store floating point result + CHECK_ALIGNMENT $t0, $t1, 8 + sdc1 $f0, 0($t0) # store floating point result jalr $zero, $ra nop @@ -2252,7 +2265,7 @@ ENTRY art_quick_generic_jni_trampoline move $a0, rSELF # pass Thread::Current move $a2, $v0 # pass result move $a3, $v1 - addiu $sp, $sp, -24 # reserve arg slots + addiu $sp, $sp, -32 # reserve arg slots la $t9, artQuickGenericJniEndTrampoline jalr $t9 s.d $f0, 16($sp) # pass result_f @@ -3243,7 +3256,8 @@ ENTRY art_quick_invoke_polymorphic lhu $v0, 16($sp) # Move char from JValue result to return value register. .Lstore_double_result: .Lstore_float_result: - LDu $f0, $f1, 16, $sp, $t0 # Move double/float from JValue result to return value register. + CHECK_ALIGNMENT $sp, $t0 + ldc1 $f0, 16($sp) # Move double/float from JValue result to return value register. b .Lcleanup_and_return nop .Lstore_long_result: diff --git a/runtime/arch/mips/quick_method_frame_info_mips.h b/runtime/arch/mips/quick_method_frame_info_mips.h index 45a21ab942..8c86252152 100644 --- a/runtime/arch/mips/quick_method_frame_info_mips.h +++ b/runtime/arch/mips/quick_method_frame_info_mips.h @@ -35,8 +35,24 @@ static constexpr uint32_t kMipsCalleeSaveRefSpills = static constexpr uint32_t kMipsCalleeSaveArgSpills = (1 << art::mips::A1) | (1 << art::mips::A2) | (1 << art::mips::A3) | (1 << art::mips::T0) | (1 << art::mips::T1); +// We want to save all floating point register pairs at addresses +// which are multiples of 8 so that we can eliminate use of the +// SDu/LDu macros by using sdc1/ldc1 to store/load floating +// register values using a single instruction. Because integer +// registers are stored at the top of the frame, to achieve having +// the floating point register pairs aligned on multiples of 8 the +// number of integer registers saved must be even. Previously, the +// only case in which we saved floating point registers beneath an +// odd number of integer registers was when "type" is +// CalleeSaveType::kSaveAllCalleeSaves. (There are other cases in +// which an odd number of integer registers are saved but those +// cases don't save any floating point registers. If no floating +// point registers are saved we don't care if the number of integer +// registers saved is odd or even). To save an even number of +// integer registers in this particular case we add the ZERO +// register to the list of registers which get saved. static constexpr uint32_t kMipsCalleeSaveAllSpills = - (1 << art::mips::S0) | (1 << art::mips::S1); + (1 << art::mips::ZERO) | (1 << art::mips::S0) | (1 << art::mips::S1); static constexpr uint32_t kMipsCalleeSaveEverythingSpills = (1 << art::mips::AT) | (1 << art::mips::V0) | (1 << art::mips::V1) | (1 << art::mips::A0) | (1 << art::mips::A1) | (1 << art::mips::A2) | (1 << art::mips::A3) | |