diff options
| -rw-r--r-- | compiler/jni/jni_cfi_test_expected.inc | 15 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator.cc | 2 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_mips.cc | 39 | ||||
| -rw-r--r-- | compiler/optimizing/intrinsics_mips.cc | 10 | ||||
| -rw-r--r-- | compiler/optimizing/optimizing_cfi_test_expected.inc | 35 | ||||
| -rw-r--r-- | compiler/trampolines/trampoline_compiler.cc | 2 | ||||
| -rw-r--r-- | compiler/utils/assembler.h | 10 | ||||
| -rw-r--r-- | compiler/utils/mips/assembler_mips.cc | 988 | ||||
| -rw-r--r-- | compiler/utils/mips/assembler_mips.h | 157 | ||||
| -rw-r--r-- | compiler/utils/mips/assembler_mips32r6_test.cc | 138 | ||||
| -rw-r--r-- | compiler/utils/mips/assembler_mips_test.cc | 418 |
11 files changed, 1527 insertions, 287 deletions
diff --git a/compiler/jni/jni_cfi_test_expected.inc b/compiler/jni/jni_cfi_test_expected.inc index da72c756fe..a205800dfa 100644 --- a/compiler/jni/jni_cfi_test_expected.inc +++ b/compiler/jni/jni_cfi_test_expected.inc @@ -332,14 +332,14 @@ static constexpr uint8_t expected_asm_kMips[] = { 0x20, 0x00, 0xBD, 0x27, 0x20, 0x00, 0xB2, 0x8F, 0x24, 0x00, 0xB3, 0x8F, 0x28, 0x00, 0xB4, 0x8F, 0x2C, 0x00, 0xB5, 0x8F, 0x30, 0x00, 0xB6, 0x8F, 0x34, 0x00, 0xB7, 0x8F, 0x38, 0x00, 0xBE, 0x8F, 0x3C, 0x00, 0xBF, 0x8F, - 0x40, 0x00, 0xBD, 0x27, 0x09, 0x00, 0xE0, 0x03, 0x00, 0x00, 0x00, 0x00, + 0x09, 0x00, 0xE0, 0x03, 0x40, 0x00, 0xBD, 0x27, }; static constexpr uint8_t expected_cfi_kMips[] = { 0x44, 0x0E, 0x40, 0x44, 0x9F, 0x01, 0x44, 0x9E, 0x02, 0x44, 0x97, 0x03, 0x44, 0x96, 0x04, 0x44, 0x95, 0x05, 0x44, 0x94, 0x06, 0x44, 0x93, 0x07, 0x44, 0x92, 0x08, 0x58, 0x0E, 0x60, 0x44, 0x0E, 0x40, 0x0A, 0x44, 0xD2, 0x44, 0xD3, 0x44, 0xD4, 0x44, 0xD5, 0x44, 0xD6, 0x44, 0xD7, 0x44, 0xDE, - 0x44, 0xDF, 0x44, 0x0E, 0x00, 0x48, 0x0B, 0x0E, 0x40, + 0x44, 0xDF, 0x48, 0x0E, 0x00, 0x0B, 0x0E, 0x40, }; // 0x00000000: addiu r29, r29, -64 // 0x00000004: .cfi_def_cfa_offset: 64 @@ -385,12 +385,11 @@ static constexpr uint8_t expected_cfi_kMips[] = { // 0x0000005c: .cfi_restore: r30 // 0x0000005c: lw r31, +60(r29) // 0x00000060: .cfi_restore: r31 -// 0x00000060: addiu r29, r29, 64 -// 0x00000064: .cfi_def_cfa_offset: 0 -// 0x00000064: jr r31 -// 0x00000068: nop -// 0x0000006c: .cfi_restore_state -// 0x0000006c: .cfi_def_cfa_offset: 64 +// 0x00000060: jr r31 +// 0x00000064: addiu r29, r29, 64 +// 0x00000068: .cfi_def_cfa_offset: 0 +// 0x00000068: .cfi_restore_state +// 0x00000068: .cfi_def_cfa_offset: 64 static constexpr uint8_t expected_asm_kMips64[] = { 0x90, 0xFF, 0xBD, 0x67, 0x68, 0x00, 0xBF, 0xFF, 0x60, 0x00, 0xBE, 0xFF, diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index 0d3f849143..b0de9640fb 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -753,7 +753,7 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction, } // Collect PC infos for the mapping table. - uint32_t native_pc = GetAssembler()->CodeSize(); + uint32_t native_pc = GetAssembler()->CodePosition(); if (instruction == nullptr) { // For stack overflow checks and native-debug-info entries without dex register diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc index 8a2f90d541..e0de03bf8f 100644 --- a/compiler/optimizing/code_generator_mips.cc +++ b/compiler/optimizing/code_generator_mips.cc @@ -792,12 +792,24 @@ void CodeGeneratorMIPS::GenerateFrameExit() { // TODO: __ cfi().Restore(DWARFReg(reg)); } - __ DecreaseFrameSize(GetFrameSize()); + size_t frame_size = GetFrameSize(); + // Adjust the stack pointer in the delay slot if doing so doesn't break CFI. + bool exchange = IsInt<16>(static_cast<int32_t>(frame_size)); + bool reordering = __ SetReorder(false); + if (exchange) { + __ Jr(RA); + __ DecreaseFrameSize(frame_size); // Single instruction in delay slot. + } else { + __ DecreaseFrameSize(frame_size); + __ Jr(RA); + __ Nop(); // In delay slot. + } + __ SetReorder(reordering); + } else { + __ Jr(RA); + __ NopIfNoReordering(); } - __ Jr(RA); - __ Nop(); - __ cfi().RestoreState(); __ cfi().DefCFAOffset(GetFrameSize()); } @@ -1251,6 +1263,7 @@ void CodeGeneratorMIPS::InvokeRuntime(int32_t entry_point_offset, uint32_t dex_pc, SlowPathCode* slow_path, bool is_direct_entrypoint) { + bool reordering = __ SetReorder(false); __ LoadFromOffset(kLoadWord, T9, TR, entry_point_offset); __ Jalr(T9); if (is_direct_entrypoint) { @@ -1262,6 +1275,7 @@ void CodeGeneratorMIPS::InvokeRuntime(int32_t entry_point_offset, } else { __ Nop(); // In delay slot. } + __ SetReorder(reordering); RecordPcInfo(instruction, dex_pc, slow_path); } @@ -3953,7 +3967,7 @@ void InstructionCodeGeneratorMIPS::VisitInvokeInterface(HInvokeInterface* invoke __ LoadFromOffset(kLoadWord, T9, temp, entry_point.Int32Value()); // T9(); __ Jalr(T9); - __ Nop(); + __ NopIfNoReordering(); DCHECK(!codegen_->IsLeafMethod()); codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); } @@ -4254,7 +4268,7 @@ void CodeGeneratorMIPS::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke // T9 prepared above for better instruction scheduling. // T9() __ Jalr(T9); - __ Nop(); + __ NopIfNoReordering(); break; case HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative: // TODO: Implement this type. @@ -4270,7 +4284,7 @@ void CodeGeneratorMIPS::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke kMipsPointerSize).Int32Value()); // T9() __ Jalr(T9); - __ Nop(); + __ NopIfNoReordering(); break; } DCHECK(!IsLeafMethod()); @@ -4312,7 +4326,7 @@ void CodeGeneratorMIPS::GenerateVirtualCall(HInvokeVirtual* invoke, Location tem __ LoadFromOffset(kLoadWord, T9, temp, entry_point.Int32Value()); // T9(); __ Jalr(T9); - __ Nop(); + __ NopIfNoReordering(); } void InstructionCodeGeneratorMIPS::VisitInvokeVirtual(HInvokeVirtual* invoke) { @@ -4421,6 +4435,7 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) { DCHECK(!kEmitCompilerReadBarrier); CodeGeneratorMIPS::PcRelativePatchInfo* info = codegen_->NewPcRelativeTypePatch(cls->GetDexFile(), cls->GetTypeIndex()); + bool reordering = __ SetReorder(false); if (isR6) { __ Bind(&info->high_label); __ Bind(&info->pc_rel_label); @@ -4436,6 +4451,7 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) { // Add a 32-bit offset to PC. __ Addu(out, out, base_or_current_method_reg); } + __ SetReorder(reordering); break; } case HLoadClass::LoadKind::kBootImageAddress: { @@ -4579,6 +4595,7 @@ void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) { DCHECK(!kEmitCompilerReadBarrier); CodeGeneratorMIPS::PcRelativePatchInfo* info = codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex()); + bool reordering = __ SetReorder(false); if (isR6) { __ Bind(&info->high_label); __ Bind(&info->pc_rel_label); @@ -4594,6 +4611,7 @@ void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) { // Add a 32-bit offset to PC. __ Addu(out, out, base_or_current_method_reg); } + __ SetReorder(reordering); return; // No dex cache slow path. } case HLoadString::LoadKind::kBootImageAddress: { @@ -4851,7 +4869,7 @@ void InstructionCodeGeneratorMIPS::VisitNewInstance(HNewInstance* instruction) { __ LoadFromOffset(kLoadWord, temp, TR, QUICK_ENTRY_POINT(pNewEmptyString)); __ LoadFromOffset(kLoadWord, T9, temp, code_offset.Int32Value()); __ Jalr(T9); - __ Nop(); + __ NopIfNoReordering(); codegen_->RecordPcInfo(instruction, instruction->GetDexPc()); } else { codegen_->InvokeRuntime( @@ -5751,7 +5769,7 @@ void InstructionCodeGeneratorMIPS::VisitMipsDexCacheArraysBase(HMipsDexCacheArra Register reg = base->GetLocations()->Out().AsRegister<Register>(); CodeGeneratorMIPS::PcRelativePatchInfo* info = codegen_->NewPcRelativeDexCacheArrayPatch(base->GetDexFile(), base->GetElementOffset()); - + bool reordering = __ SetReorder(false); if (codegen_->GetInstructionSetFeatures().IsR6()) { __ Bind(&info->high_label); __ Bind(&info->pc_rel_label); @@ -5769,6 +5787,7 @@ void InstructionCodeGeneratorMIPS::VisitMipsDexCacheArraysBase(HMipsDexCacheArra __ Addu(reg, reg, RA); // TODO: Can we share this code with that of VisitMipsComputeBaseMethodAddress()? } + __ SetReorder(reordering); } void LocationsBuilderMIPS::VisitInvokeUnresolved(HInvokeUnresolved* invoke) { diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc index 6e5eb6622b..862a93f9d6 100644 --- a/compiler/optimizing/intrinsics_mips.cc +++ b/compiler/optimizing/intrinsics_mips.cc @@ -1901,7 +1901,7 @@ void IntrinsicCodeGeneratorMIPS::VisitStringCompareTo(HInvoke* invoke) { TR, QUICK_ENTRYPOINT_OFFSET(kMipsPointerSize, pStringCompareTo).Int32Value()); __ Jalr(T9); - __ Nop(); + __ NopIfNoReordering(); __ Bind(slow_path->GetExitLabel()); } @@ -2060,7 +2060,7 @@ static void GenerateStringIndexOf(HInvoke* invoke, TR, QUICK_ENTRYPOINT_OFFSET(kMipsPointerSize, pIndexOf).Int32Value()); __ Jalr(T9); - __ Nop(); + __ NopIfNoReordering(); if (slow_path != nullptr) { __ Bind(slow_path->GetExitLabel()); @@ -2146,7 +2146,7 @@ void IntrinsicCodeGeneratorMIPS::VisitStringNewStringFromBytes(HInvoke* invoke) TR, QUICK_ENTRYPOINT_OFFSET(kMipsPointerSize, pAllocStringFromBytes).Int32Value()); __ Jalr(T9); - __ Nop(); + __ NopIfNoReordering(); codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); __ Bind(slow_path->GetExitLabel()); } @@ -2179,7 +2179,7 @@ void IntrinsicCodeGeneratorMIPS::VisitStringNewStringFromChars(HInvoke* invoke) TR, QUICK_ENTRYPOINT_OFFSET(kMipsPointerSize, pAllocStringFromChars).Int32Value()); __ Jalr(T9); - __ Nop(); + __ NopIfNoReordering(); codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); } @@ -2208,7 +2208,7 @@ void IntrinsicCodeGeneratorMIPS::VisitStringNewStringFromString(HInvoke* invoke) TR, QUICK_ENTRYPOINT_OFFSET(kMipsPointerSize, pAllocStringFromString).Int32Value()); __ Jalr(T9); - __ Nop(); + __ NopIfNoReordering(); codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); __ Bind(slow_path->GetExitLabel()); } diff --git a/compiler/optimizing/optimizing_cfi_test_expected.inc b/compiler/optimizing/optimizing_cfi_test_expected.inc index 05eb06333e..6c5030c9cb 100644 --- a/compiler/optimizing/optimizing_cfi_test_expected.inc +++ b/compiler/optimizing/optimizing_cfi_test_expected.inc @@ -144,12 +144,12 @@ static constexpr uint8_t expected_asm_kMips[] = { 0x34, 0x00, 0xB0, 0xAF, 0x28, 0x00, 0xB6, 0xF7, 0x20, 0x00, 0xB4, 0xF7, 0x00, 0x00, 0xA4, 0xAF, 0x3C, 0x00, 0xBF, 0x8F, 0x38, 0x00, 0xB1, 0x8F, 0x34, 0x00, 0xB0, 0x8F, 0x28, 0x00, 0xB6, 0xD7, 0x20, 0x00, 0xB4, 0xD7, - 0x40, 0x00, 0xBD, 0x27, 0x09, 0x00, 0xE0, 0x03, 0x00, 0x00, 0x00, 0x00, + 0x09, 0x00, 0xE0, 0x03, 0x40, 0x00, 0xBD, 0x27, }; static constexpr uint8_t expected_cfi_kMips[] = { 0x44, 0x0E, 0x40, 0x44, 0x9F, 0x01, 0x44, 0x91, 0x02, 0x44, 0x90, 0x03, - 0x4C, 0x0A, 0x44, 0xDF, 0x44, 0xD1, 0x44, 0xD0, 0x4C, 0x0E, 0x00, 0x48, - 0x0B, 0x0E, 0x40, + 0x4C, 0x0A, 0x44, 0xDF, 0x44, 0xD1, 0x44, 0xD0, 0x50, 0x0E, 0x00, 0x0B, + 0x0E, 0x40, }; // 0x00000000: addiu r29, r29, -64 // 0x00000004: .cfi_def_cfa_offset: 64 @@ -171,12 +171,11 @@ static constexpr uint8_t expected_cfi_kMips[] = { // 0x00000028: .cfi_restore: r16 // 0x00000028: ldc1 f22, +40(r29) // 0x0000002c: ldc1 f20, +32(r29) -// 0x00000030: addiu r29, r29, 64 -// 0x00000034: .cfi_def_cfa_offset: 0 -// 0x00000034: jr r31 -// 0x00000038: nop -// 0x0000003c: .cfi_restore_state -// 0x0000003c: .cfi_def_cfa_offset: 64 +// 0x00000030: jr r31 +// 0x00000034: addiu r29, r29, 64 +// 0x00000038: .cfi_def_cfa_offset: 0 +// 0x00000038: .cfi_restore_state +// 0x00000038: .cfi_def_cfa_offset: 64 static constexpr uint8_t expected_asm_kMips64[] = { 0xD8, 0xFF, 0xBD, 0x67, 0x20, 0x00, 0xBF, 0xFF, 0x18, 0x00, 0xB1, 0xFF, @@ -348,14 +347,13 @@ static constexpr uint8_t expected_asm_kMips_adjust_head[] = { }; static constexpr uint8_t expected_asm_kMips_adjust_tail[] = { 0x3C, 0x00, 0xBF, 0x8F, 0x38, 0x00, 0xB1, 0x8F, 0x34, 0x00, 0xB0, 0x8F, - 0x28, 0x00, 0xB6, 0xD7, 0x20, 0x00, 0xB4, 0xD7, 0x40, 0x00, 0xBD, 0x27, - 0x09, 0x00, 0xE0, 0x03, 0x00, 0x00, 0x00, 0x00, + 0x28, 0x00, 0xB6, 0xD7, 0x20, 0x00, 0xB4, 0xD7, 0x09, 0x00, 0xE0, 0x03, + 0x40, 0x00, 0xBD, 0x27, }; static constexpr uint8_t expected_cfi_kMips_adjust[] = { 0x44, 0x0E, 0x40, 0x44, 0x9F, 0x01, 0x44, 0x91, 0x02, 0x44, 0x90, 0x03, 0x54, 0x0E, 0x44, 0x60, 0x0E, 0x40, 0x04, 0x04, 0x00, 0x02, 0x00, 0x0A, - 0x44, 0xDF, 0x44, 0xD1, 0x44, 0xD0, 0x4C, 0x0E, 0x00, 0x48, 0x0B, 0x0E, - 0x40, + 0x44, 0xDF, 0x44, 0xD1, 0x44, 0xD0, 0x50, 0x0E, 0x00, 0x0B, 0x0E, 0x40, }; // 0x00000000: addiu r29, r29, -64 // 0x00000004: .cfi_def_cfa_offset: 64 @@ -392,12 +390,11 @@ static constexpr uint8_t expected_cfi_kMips_adjust[] = { // 0x00020054: .cfi_restore: r16 // 0x00020054: ldc1 f22, +40(r29) // 0x00020058: ldc1 f20, +32(r29) -// 0x0002005c: addiu r29, r29, 64 -// 0x00020060: .cfi_def_cfa_offset: 0 -// 0x00020060: jr r31 -// 0x00020064: nop -// 0x00020068: .cfi_restore_state -// 0x00020068: .cfi_def_cfa_offset: 64 +// 0x0002005c: jr r31 +// 0x00020060: addiu r29, r29, 64 +// 0x00020064: .cfi_def_cfa_offset: 0 +// 0x00020064: .cfi_restore_state +// 0x00020064: .cfi_def_cfa_offset: 64 static constexpr uint8_t expected_asm_kMips64_adjust_head[] = { 0xD8, 0xFF, 0xBD, 0x67, 0x20, 0x00, 0xBF, 0xFF, 0x18, 0x00, 0xB1, 0xFF, diff --git a/compiler/trampolines/trampoline_compiler.cc b/compiler/trampolines/trampoline_compiler.cc index 55835e77eb..70f290d60e 100644 --- a/compiler/trampolines/trampoline_compiler.cc +++ b/compiler/trampolines/trampoline_compiler.cc @@ -152,7 +152,7 @@ static std::unique_ptr<const std::vector<uint8_t>> CreateTrampoline( __ LoadFromOffset(kLoadWord, T9, S1, offset.Int32Value()); } __ Jr(T9); - __ Nop(); + __ NopIfNoReordering(); __ Break(); __ FinalizeCode(); diff --git a/compiler/utils/assembler.h b/compiler/utils/assembler.h index 8981776314..b616057e79 100644 --- a/compiler/utils/assembler.h +++ b/compiler/utils/assembler.h @@ -362,6 +362,16 @@ class Assembler : public DeletableArenaObject<kArenaAllocAssembler> { // Size of generated code virtual size_t CodeSize() const { return buffer_.Size(); } virtual const uint8_t* CodeBufferBaseAddress() const { return buffer_.contents(); } + // CodePosition() is a non-const method similar to CodeSize(), which is used to + // record positions within the code buffer for the purpose of signal handling + // (stack overflow checks and implicit null checks may trigger signals and the + // signal handlers expect them right before the recorded positions). + // On most architectures CodePosition() should be equivalent to CodeSize(), but + // the MIPS assembler needs to be aware of this recording, so it doesn't put + // the instructions that can trigger signals into branch delay slots. Handling + // signals from instructions in delay slots is a bit problematic and should be + // avoided. + virtual size_t CodePosition() { return CodeSize(); } // Copy instructions out of assembly buffer into the given region of memory virtual void FinalizeInstructions(const MemoryRegion& region) { diff --git a/compiler/utils/mips/assembler_mips.cc b/compiler/utils/mips/assembler_mips.cc index bfc63d14da..4b580b620f 100644 --- a/compiler/utils/mips/assembler_mips.cc +++ b/compiler/utils/mips/assembler_mips.cc @@ -40,10 +40,195 @@ std::ostream& operator<<(std::ostream& os, const DRegister& rhs) { return os; } +MipsAssembler::DelaySlot::DelaySlot() + : instruction_(0), + gpr_outs_mask_(0), + gpr_ins_mask_(0), + fpr_outs_mask_(0), + fpr_ins_mask_(0), + cc_outs_mask_(0), + cc_ins_mask_(0) {} + +void MipsAssembler::DsFsmInstr(uint32_t instruction, + uint32_t gpr_outs_mask, + uint32_t gpr_ins_mask, + uint32_t fpr_outs_mask, + uint32_t fpr_ins_mask, + uint32_t cc_outs_mask, + uint32_t cc_ins_mask) { + if (!reordering_) { + CHECK_EQ(ds_fsm_state_, kExpectingLabel); + CHECK_EQ(delay_slot_.instruction_, 0u); + return; + } + switch (ds_fsm_state_) { + case kExpectingLabel: + break; + case kExpectingInstruction: + CHECK_EQ(ds_fsm_target_pc_ + sizeof(uint32_t), buffer_.Size()); + // If the last instruction is not suitable for delay slots, drop + // the PC of the label preceding it so that no unconditional branch + // uses this instruction to fill its delay slot. + if (instruction == 0) { + DsFsmDropLabel(); // Sets ds_fsm_state_ = kExpectingLabel. + } else { + // Otherwise wait for another instruction or label before we can + // commit the label PC. The label PC will be dropped if instead + // of another instruction or label there's a call from the code + // generator to CodePosition() to record the buffer size. + // Instructions after which the buffer size is recorded cannot + // be moved into delay slots or anywhere else because they may + // trigger signals and the signal handlers expect these signals + // to be coming from the instructions immediately preceding the + // recorded buffer locations. + ds_fsm_state_ = kExpectingCommit; + } + break; + case kExpectingCommit: + CHECK_EQ(ds_fsm_target_pc_ + 2 * sizeof(uint32_t), buffer_.Size()); + DsFsmCommitLabel(); // Sets ds_fsm_state_ = kExpectingLabel. + break; + } + delay_slot_.instruction_ = instruction; + delay_slot_.gpr_outs_mask_ = gpr_outs_mask & ~1u; // Ignore register ZERO. + delay_slot_.gpr_ins_mask_ = gpr_ins_mask & ~1u; // Ignore register ZERO. + delay_slot_.fpr_outs_mask_ = fpr_outs_mask; + delay_slot_.fpr_ins_mask_ = fpr_ins_mask; + delay_slot_.cc_outs_mask_ = cc_outs_mask; + delay_slot_.cc_ins_mask_ = cc_ins_mask; +} + +void MipsAssembler::DsFsmLabel() { + if (!reordering_) { + CHECK_EQ(ds_fsm_state_, kExpectingLabel); + CHECK_EQ(delay_slot_.instruction_, 0u); + return; + } + switch (ds_fsm_state_) { + case kExpectingLabel: + ds_fsm_target_pc_ = buffer_.Size(); + ds_fsm_state_ = kExpectingInstruction; + break; + case kExpectingInstruction: + // Allow consecutive labels. + CHECK_EQ(ds_fsm_target_pc_, buffer_.Size()); + break; + case kExpectingCommit: + CHECK_EQ(ds_fsm_target_pc_ + sizeof(uint32_t), buffer_.Size()); + DsFsmCommitLabel(); + ds_fsm_target_pc_ = buffer_.Size(); + ds_fsm_state_ = kExpectingInstruction; + break; + } + // We cannot move instructions into delay slots across labels. + delay_slot_.instruction_ = 0; +} + +void MipsAssembler::DsFsmCommitLabel() { + if (ds_fsm_state_ == kExpectingCommit) { + ds_fsm_target_pcs_.emplace_back(ds_fsm_target_pc_); + } + ds_fsm_state_ = kExpectingLabel; +} + +void MipsAssembler::DsFsmDropLabel() { + ds_fsm_state_ = kExpectingLabel; +} + +bool MipsAssembler::SetReorder(bool enable) { + bool last_state = reordering_; + if (last_state != enable) { + DsFsmCommitLabel(); + DsFsmInstrNop(0); + } + reordering_ = enable; + return last_state; +} + +size_t MipsAssembler::CodePosition() { + // The last instruction cannot be used in a delay slot, do not commit + // the label before it (if any) and clear the delay slot. + DsFsmDropLabel(); + DsFsmInstrNop(0); + size_t size = buffer_.Size(); + // In theory we can get the following sequence: + // label1: + // instr + // label2: # label1 gets committed when label2 is seen + // CodePosition() call + // and we need to uncommit label1. + if (ds_fsm_target_pcs_.size() != 0 && ds_fsm_target_pcs_.back() + sizeof(uint32_t) == size) { + ds_fsm_target_pcs_.pop_back(); + } + return size; +} + +void MipsAssembler::DsFsmInstrNop(uint32_t instruction ATTRIBUTE_UNUSED) { + DsFsmInstr(0, 0, 0, 0, 0, 0, 0); +} + +void MipsAssembler::DsFsmInstrRrr(uint32_t instruction, Register out, Register in1, Register in2) { + DsFsmInstr(instruction, (1u << out), (1u << in1) | (1u << in2), 0, 0, 0, 0); +} + +void MipsAssembler::DsFsmInstrRrrr(uint32_t instruction, + Register in1_out, + Register in2, + Register in3) { + DsFsmInstr(instruction, (1u << in1_out), (1u << in1_out) | (1u << in2) | (1u << in3), 0, 0, 0, 0); +} + +void MipsAssembler::DsFsmInstrFff(uint32_t instruction, + FRegister out, + FRegister in1, + FRegister in2) { + DsFsmInstr(instruction, 0, 0, (1u << out), (1u << in1) | (1u << in2), 0, 0); +} + +void MipsAssembler::DsFsmInstrFfff(uint32_t instruction, + FRegister in1_out, + FRegister in2, + FRegister in3) { + DsFsmInstr(instruction, 0, 0, (1u << in1_out), (1u << in1_out) | (1u << in2) | (1u << in3), 0, 0); +} + +void MipsAssembler::DsFsmInstrRf(uint32_t instruction, Register out, FRegister in) { + DsFsmInstr(instruction, (1u << out), 0, 0, (1u << in), 0, 0); +} + +void MipsAssembler::DsFsmInstrFr(uint32_t instruction, FRegister out, Register in) { + DsFsmInstr(instruction, 0, (1u << in), (1u << out), 0, 0, 0); +} + +void MipsAssembler::DsFsmInstrFR(uint32_t instruction, FRegister in1, Register in2) { + DsFsmInstr(instruction, 0, (1u << in2), 0, (1u << in1), 0, 0); +} + +void MipsAssembler::DsFsmInstrCff(uint32_t instruction, int cc_out, FRegister in1, FRegister in2) { + DsFsmInstr(instruction, 0, 0, 0, (1u << in1) | (1u << in2), (1 << cc_out), 0); +} + +void MipsAssembler::DsFsmInstrRrrc(uint32_t instruction, + Register in1_out, + Register in2, + int cc_in) { + DsFsmInstr(instruction, (1u << in1_out), (1u << in1_out) | (1u << in2), 0, 0, 0, (1 << cc_in)); +} + +void MipsAssembler::DsFsmInstrFffc(uint32_t instruction, + FRegister in1_out, + FRegister in2, + int cc_in) { + DsFsmInstr(instruction, 0, 0, (1u << in1_out), (1u << in1_out) | (1u << in2), 0, (1 << cc_in)); +} + void MipsAssembler::FinalizeCode() { for (auto& exception_block : exception_blocks_) { EmitExceptionPoll(&exception_block); } + // Commit the last branch target label (if any) and disable instruction reordering. + DsFsmCommitLabel(); + SetReorder(false); EmitLiterals(); PromoteBranches(); } @@ -107,6 +292,12 @@ void MipsAssembler::PatchCFI(size_t number_of_delayed_adjust_pcs) { void MipsAssembler::EmitBranches() { CHECK(!overwriting_); + CHECK(!reordering_); + // Now that everything has its final position in the buffer (the branches have + // been promoted), adjust the target label PCs. + for (size_t cnt = ds_fsm_target_pcs_.size(), i = 0; i < cnt; i++) { + ds_fsm_target_pcs_[i] = GetAdjustedPosition(ds_fsm_target_pcs_[i]); + } // Switch from appending instructions at the end of the buffer to overwriting // existing instructions (branch placeholders) in the buffer. overwriting_ = true; @@ -128,7 +319,12 @@ void MipsAssembler::Emit(uint32_t value) { } } -void MipsAssembler::EmitR(int opcode, Register rs, Register rt, Register rd, int shamt, int funct) { +uint32_t MipsAssembler::EmitR(int opcode, + Register rs, + Register rt, + Register rd, + int shamt, + int funct) { CHECK_NE(rs, kNoRegister); CHECK_NE(rt, kNoRegister); CHECK_NE(rd, kNoRegister); @@ -139,9 +335,10 @@ void MipsAssembler::EmitR(int opcode, Register rs, Register rt, Register rd, int shamt << kShamtShift | funct; Emit(encoding); + return encoding; } -void MipsAssembler::EmitI(int opcode, Register rs, Register rt, uint16_t imm) { +uint32_t MipsAssembler::EmitI(int opcode, Register rs, Register rt, uint16_t imm) { CHECK_NE(rs, kNoRegister); CHECK_NE(rt, kNoRegister); uint32_t encoding = static_cast<uint32_t>(opcode) << kOpcodeShift | @@ -149,25 +346,32 @@ void MipsAssembler::EmitI(int opcode, Register rs, Register rt, uint16_t imm) { static_cast<uint32_t>(rt) << kRtShift | imm; Emit(encoding); + return encoding; } -void MipsAssembler::EmitI21(int opcode, Register rs, uint32_t imm21) { +uint32_t MipsAssembler::EmitI21(int opcode, Register rs, uint32_t imm21) { CHECK_NE(rs, kNoRegister); CHECK(IsUint<21>(imm21)) << imm21; uint32_t encoding = static_cast<uint32_t>(opcode) << kOpcodeShift | static_cast<uint32_t>(rs) << kRsShift | imm21; Emit(encoding); + return encoding; } -void MipsAssembler::EmitI26(int opcode, uint32_t imm26) { +uint32_t MipsAssembler::EmitI26(int opcode, uint32_t imm26) { CHECK(IsUint<26>(imm26)) << imm26; uint32_t encoding = static_cast<uint32_t>(opcode) << kOpcodeShift | imm26; Emit(encoding); + return encoding; } -void MipsAssembler::EmitFR(int opcode, int fmt, FRegister ft, FRegister fs, FRegister fd, - int funct) { +uint32_t MipsAssembler::EmitFR(int opcode, + int fmt, + FRegister ft, + FRegister fs, + FRegister fd, + int funct) { CHECK_NE(ft, kNoFRegister); CHECK_NE(fs, kNoFRegister); CHECK_NE(fd, kNoFRegister); @@ -178,52 +382,54 @@ void MipsAssembler::EmitFR(int opcode, int fmt, FRegister ft, FRegister fs, FReg static_cast<uint32_t>(fd) << kFdShift | funct; Emit(encoding); + return encoding; } -void MipsAssembler::EmitFI(int opcode, int fmt, FRegister ft, uint16_t imm) { +uint32_t MipsAssembler::EmitFI(int opcode, int fmt, FRegister ft, uint16_t imm) { CHECK_NE(ft, kNoFRegister); uint32_t encoding = static_cast<uint32_t>(opcode) << kOpcodeShift | fmt << kFmtShift | static_cast<uint32_t>(ft) << kFtShift | imm; Emit(encoding); + return encoding; } void MipsAssembler::Addu(Register rd, Register rs, Register rt) { - EmitR(0, rs, rt, rd, 0, 0x21); + DsFsmInstrRrr(EmitR(0, rs, rt, rd, 0, 0x21), rd, rs, rt); } void MipsAssembler::Addiu(Register rt, Register rs, uint16_t imm16) { - EmitI(0x9, rs, rt, imm16); + DsFsmInstrRrr(EmitI(0x9, rs, rt, imm16), rt, rs, rs); } void MipsAssembler::Subu(Register rd, Register rs, Register rt) { - EmitR(0, rs, rt, rd, 0, 0x23); + DsFsmInstrRrr(EmitR(0, rs, rt, rd, 0, 0x23), rd, rs, rt); } void MipsAssembler::MultR2(Register rs, Register rt) { CHECK(!IsR6()); - EmitR(0, rs, rt, static_cast<Register>(0), 0, 0x18); + DsFsmInstrRrr(EmitR(0, rs, rt, static_cast<Register>(0), 0, 0x18), ZERO, rs, rt); } void MipsAssembler::MultuR2(Register rs, Register rt) { CHECK(!IsR6()); - EmitR(0, rs, rt, static_cast<Register>(0), 0, 0x19); + DsFsmInstrRrr(EmitR(0, rs, rt, static_cast<Register>(0), 0, 0x19), ZERO, rs, rt); } void MipsAssembler::DivR2(Register rs, Register rt) { CHECK(!IsR6()); - EmitR(0, rs, rt, static_cast<Register>(0), 0, 0x1a); + DsFsmInstrRrr(EmitR(0, rs, rt, static_cast<Register>(0), 0, 0x1a), ZERO, rs, rt); } void MipsAssembler::DivuR2(Register rs, Register rt) { CHECK(!IsR6()); - EmitR(0, rs, rt, static_cast<Register>(0), 0, 0x1b); + DsFsmInstrRrr(EmitR(0, rs, rt, static_cast<Register>(0), 0, 0x1b), ZERO, rs, rt); } void MipsAssembler::MulR2(Register rd, Register rs, Register rt) { CHECK(!IsR6()); - EmitR(0x1c, rs, rt, rd, 0, 2); + DsFsmInstrRrr(EmitR(0x1c, rs, rt, rd, 0, 2), rd, rs, rt); } void MipsAssembler::DivR2(Register rd, Register rs, Register rt) { @@ -252,308 +458,307 @@ void MipsAssembler::ModuR2(Register rd, Register rs, Register rt) { void MipsAssembler::MulR6(Register rd, Register rs, Register rt) { CHECK(IsR6()); - EmitR(0, rs, rt, rd, 2, 0x18); + DsFsmInstrRrr(EmitR(0, rs, rt, rd, 2, 0x18), rd, rs, rt); } void MipsAssembler::MuhR6(Register rd, Register rs, Register rt) { CHECK(IsR6()); - EmitR(0, rs, rt, rd, 3, 0x18); + DsFsmInstrRrr(EmitR(0, rs, rt, rd, 3, 0x18), rd, rs, rt); } void MipsAssembler::MuhuR6(Register rd, Register rs, Register rt) { CHECK(IsR6()); - EmitR(0, rs, rt, rd, 3, 0x19); + DsFsmInstrRrr(EmitR(0, rs, rt, rd, 3, 0x19), rd, rs, rt); } void MipsAssembler::DivR6(Register rd, Register rs, Register rt) { CHECK(IsR6()); - EmitR(0, rs, rt, rd, 2, 0x1a); + DsFsmInstrRrr(EmitR(0, rs, rt, rd, 2, 0x1a), rd, rs, rt); } void MipsAssembler::ModR6(Register rd, Register rs, Register rt) { CHECK(IsR6()); - EmitR(0, rs, rt, rd, 3, 0x1a); + DsFsmInstrRrr(EmitR(0, rs, rt, rd, 3, 0x1a), rd, rs, rt); } void MipsAssembler::DivuR6(Register rd, Register rs, Register rt) { CHECK(IsR6()); - EmitR(0, rs, rt, rd, 2, 0x1b); + DsFsmInstrRrr(EmitR(0, rs, rt, rd, 2, 0x1b), rd, rs, rt); } void MipsAssembler::ModuR6(Register rd, Register rs, Register rt) { CHECK(IsR6()); - EmitR(0, rs, rt, rd, 3, 0x1b); + DsFsmInstrRrr(EmitR(0, rs, rt, rd, 3, 0x1b), rd, rs, rt); } void MipsAssembler::And(Register rd, Register rs, Register rt) { - EmitR(0, rs, rt, rd, 0, 0x24); + DsFsmInstrRrr(EmitR(0, rs, rt, rd, 0, 0x24), rd, rs, rt); } void MipsAssembler::Andi(Register rt, Register rs, uint16_t imm16) { - EmitI(0xc, rs, rt, imm16); + DsFsmInstrRrr(EmitI(0xc, rs, rt, imm16), rt, rs, rs); } void MipsAssembler::Or(Register rd, Register rs, Register rt) { - EmitR(0, rs, rt, rd, 0, 0x25); + DsFsmInstrRrr(EmitR(0, rs, rt, rd, 0, 0x25), rd, rs, rt); } void MipsAssembler::Ori(Register rt, Register rs, uint16_t imm16) { - EmitI(0xd, rs, rt, imm16); + DsFsmInstrRrr(EmitI(0xd, rs, rt, imm16), rt, rs, rs); } void MipsAssembler::Xor(Register rd, Register rs, Register rt) { - EmitR(0, rs, rt, rd, 0, 0x26); + DsFsmInstrRrr(EmitR(0, rs, rt, rd, 0, 0x26), rd, rs, rt); } void MipsAssembler::Xori(Register rt, Register rs, uint16_t imm16) { - EmitI(0xe, rs, rt, imm16); + DsFsmInstrRrr(EmitI(0xe, rs, rt, imm16), rt, rs, rs); } void MipsAssembler::Nor(Register rd, Register rs, Register rt) { - EmitR(0, rs, rt, rd, 0, 0x27); + DsFsmInstrRrr(EmitR(0, rs, rt, rd, 0, 0x27), rd, rs, rt); } void MipsAssembler::Movz(Register rd, Register rs, Register rt) { CHECK(!IsR6()); - EmitR(0, rs, rt, rd, 0, 0x0A); + DsFsmInstrRrrr(EmitR(0, rs, rt, rd, 0, 0x0A), rd, rs, rt); } void MipsAssembler::Movn(Register rd, Register rs, Register rt) { CHECK(!IsR6()); - EmitR(0, rs, rt, rd, 0, 0x0B); + DsFsmInstrRrrr(EmitR(0, rs, rt, rd, 0, 0x0B), rd, rs, rt); } void MipsAssembler::Seleqz(Register rd, Register rs, Register rt) { CHECK(IsR6()); - EmitR(0, rs, rt, rd, 0, 0x35); + DsFsmInstrRrr(EmitR(0, rs, rt, rd, 0, 0x35), rd, rs, rt); } void MipsAssembler::Selnez(Register rd, Register rs, Register rt) { CHECK(IsR6()); - EmitR(0, rs, rt, rd, 0, 0x37); + DsFsmInstrRrr(EmitR(0, rs, rt, rd, 0, 0x37), rd, rs, rt); } void MipsAssembler::ClzR6(Register rd, Register rs) { CHECK(IsR6()); - EmitR(0, rs, static_cast<Register>(0), rd, 0x01, 0x10); + DsFsmInstrRrr(EmitR(0, rs, static_cast<Register>(0), rd, 0x01, 0x10), rd, rs, rs); } void MipsAssembler::ClzR2(Register rd, Register rs) { CHECK(!IsR6()); - EmitR(0x1C, rs, rd, rd, 0, 0x20); + DsFsmInstrRrr(EmitR(0x1C, rs, rd, rd, 0, 0x20), rd, rs, rs); } void MipsAssembler::CloR6(Register rd, Register rs) { CHECK(IsR6()); - EmitR(0, rs, static_cast<Register>(0), rd, 0x01, 0x11); + DsFsmInstrRrr(EmitR(0, rs, static_cast<Register>(0), rd, 0x01, 0x11), rd, rs, rs); } void MipsAssembler::CloR2(Register rd, Register rs) { CHECK(!IsR6()); - EmitR(0x1C, rs, rd, rd, 0, 0x21); + DsFsmInstrRrr(EmitR(0x1C, rs, rd, rd, 0, 0x21), rd, rs, rs); } void MipsAssembler::Seb(Register rd, Register rt) { - EmitR(0x1f, static_cast<Register>(0), rt, rd, 0x10, 0x20); + DsFsmInstrRrr(EmitR(0x1f, static_cast<Register>(0), rt, rd, 0x10, 0x20), rd, rt, rt); } void MipsAssembler::Seh(Register rd, Register rt) { - EmitR(0x1f, static_cast<Register>(0), rt, rd, 0x18, 0x20); + DsFsmInstrRrr(EmitR(0x1f, static_cast<Register>(0), rt, rd, 0x18, 0x20), rd, rt, rt); } void MipsAssembler::Wsbh(Register rd, Register rt) { - EmitR(0x1f, static_cast<Register>(0), rt, rd, 2, 0x20); + DsFsmInstrRrr(EmitR(0x1f, static_cast<Register>(0), rt, rd, 2, 0x20), rd, rt, rt); } void MipsAssembler::Bitswap(Register rd, Register rt) { CHECK(IsR6()); - EmitR(0x1f, static_cast<Register>(0), rt, rd, 0x0, 0x20); + DsFsmInstrRrr(EmitR(0x1f, static_cast<Register>(0), rt, rd, 0x0, 0x20), rd, rt, rt); } void MipsAssembler::Sll(Register rd, Register rt, int shamt) { CHECK(IsUint<5>(shamt)) << shamt; - EmitR(0, static_cast<Register>(0), rt, rd, shamt, 0x00); + DsFsmInstrRrr(EmitR(0, static_cast<Register>(0), rt, rd, shamt, 0x00), rd, rt, rt); } void MipsAssembler::Srl(Register rd, Register rt, int shamt) { CHECK(IsUint<5>(shamt)) << shamt; - EmitR(0, static_cast<Register>(0), rt, rd, shamt, 0x02); + DsFsmInstrRrr(EmitR(0, static_cast<Register>(0), rt, rd, shamt, 0x02), rd, rt, rt); } void MipsAssembler::Rotr(Register rd, Register rt, int shamt) { CHECK(IsUint<5>(shamt)) << shamt; - EmitR(0, static_cast<Register>(1), rt, rd, shamt, 0x02); + DsFsmInstrRrr(EmitR(0, static_cast<Register>(1), rt, rd, shamt, 0x02), rd, rt, rt); } void MipsAssembler::Sra(Register rd, Register rt, int shamt) { CHECK(IsUint<5>(shamt)) << shamt; - EmitR(0, static_cast<Register>(0), rt, rd, shamt, 0x03); + DsFsmInstrRrr(EmitR(0, static_cast<Register>(0), rt, rd, shamt, 0x03), rd, rt, rt); } void MipsAssembler::Sllv(Register rd, Register rt, Register rs) { - EmitR(0, rs, rt, rd, 0, 0x04); + DsFsmInstrRrr(EmitR(0, rs, rt, rd, 0, 0x04), rd, rs, rt); } void MipsAssembler::Srlv(Register rd, Register rt, Register rs) { - EmitR(0, rs, rt, rd, 0, 0x06); + DsFsmInstrRrr(EmitR(0, rs, rt, rd, 0, 0x06), rd, rs, rt); } void MipsAssembler::Rotrv(Register rd, Register rt, Register rs) { - EmitR(0, rs, rt, rd, 1, 0x06); + DsFsmInstrRrr(EmitR(0, rs, rt, rd, 1, 0x06), rd, rs, rt); } void MipsAssembler::Srav(Register rd, Register rt, Register rs) { - EmitR(0, rs, rt, rd, 0, 0x07); + DsFsmInstrRrr(EmitR(0, rs, rt, rd, 0, 0x07), rd, rs, rt); } void MipsAssembler::Ext(Register rd, Register rt, int pos, int size) { CHECK(IsUint<5>(pos)) << pos; CHECK(0 < size && size <= 32) << size; CHECK(0 < pos + size && pos + size <= 32) << pos << " + " << size; - EmitR(0x1f, rt, rd, static_cast<Register>(size - 1), pos, 0x00); + DsFsmInstrRrr(EmitR(0x1f, rt, rd, static_cast<Register>(size - 1), pos, 0x00), rd, rt, rt); } void MipsAssembler::Ins(Register rd, Register rt, int pos, int size) { CHECK(IsUint<5>(pos)) << pos; CHECK(0 < size && size <= 32) << size; CHECK(0 < pos + size && pos + size <= 32) << pos << " + " << size; - EmitR(0x1f, rt, rd, static_cast<Register>(pos + size - 1), pos, 0x04); + DsFsmInstrRrr(EmitR(0x1f, rt, rd, static_cast<Register>(pos + size - 1), pos, 0x04), rd, rd, rt); } void MipsAssembler::Lb(Register rt, Register rs, uint16_t imm16) { - EmitI(0x20, rs, rt, imm16); + DsFsmInstrRrr(EmitI(0x20, rs, rt, imm16), rt, rs, rs); } void MipsAssembler::Lh(Register rt, Register rs, uint16_t imm16) { - EmitI(0x21, rs, rt, imm16); + DsFsmInstrRrr(EmitI(0x21, rs, rt, imm16), rt, rs, rs); } void MipsAssembler::Lw(Register rt, Register rs, uint16_t imm16) { - EmitI(0x23, rs, rt, imm16); + DsFsmInstrRrr(EmitI(0x23, rs, rt, imm16), rt, rs, rs); } void MipsAssembler::Lwl(Register rt, Register rs, uint16_t imm16) { CHECK(!IsR6()); - EmitI(0x22, rs, rt, imm16); + DsFsmInstrRrr(EmitI(0x22, rs, rt, imm16), rt, rt, rs); } void MipsAssembler::Lwr(Register rt, Register rs, uint16_t imm16) { CHECK(!IsR6()); - EmitI(0x26, rs, rt, imm16); + DsFsmInstrRrr(EmitI(0x26, rs, rt, imm16), rt, rt, rs); } void MipsAssembler::Lbu(Register rt, Register rs, uint16_t imm16) { - EmitI(0x24, rs, rt, imm16); + DsFsmInstrRrr(EmitI(0x24, rs, rt, imm16), rt, rs, rs); } void MipsAssembler::Lhu(Register rt, Register rs, uint16_t imm16) { - EmitI(0x25, rs, rt, imm16); + DsFsmInstrRrr(EmitI(0x25, rs, rt, imm16), rt, rs, rs); } void MipsAssembler::Lwpc(Register rs, uint32_t imm19) { CHECK(IsR6()); CHECK(IsUint<19>(imm19)) << imm19; - EmitI21(0x3B, rs, (0x01 << 19) | imm19); + DsFsmInstrNop(EmitI21(0x3B, rs, (0x01 << 19) | imm19)); } void MipsAssembler::Lui(Register rt, uint16_t imm16) { - EmitI(0xf, static_cast<Register>(0), rt, imm16); + DsFsmInstrRrr(EmitI(0xf, static_cast<Register>(0), rt, imm16), rt, ZERO, ZERO); } void MipsAssembler::Aui(Register rt, Register rs, uint16_t imm16) { CHECK(IsR6()); - EmitI(0xf, rs, rt, imm16); + DsFsmInstrRrr(EmitI(0xf, rs, rt, imm16), rt, rt, rs); } void MipsAssembler::Sync(uint32_t stype) { - EmitR(0, static_cast<Register>(0), static_cast<Register>(0), static_cast<Register>(0), - stype & 0x1f, 0xf); + DsFsmInstrNop(EmitR(0, ZERO, ZERO, ZERO, stype & 0x1f, 0xf)); } void MipsAssembler::Mfhi(Register rd) { CHECK(!IsR6()); - EmitR(0, static_cast<Register>(0), static_cast<Register>(0), rd, 0, 0x10); + DsFsmInstrRrr(EmitR(0, ZERO, ZERO, rd, 0, 0x10), rd, ZERO, ZERO); } void MipsAssembler::Mflo(Register rd) { CHECK(!IsR6()); - EmitR(0, static_cast<Register>(0), static_cast<Register>(0), rd, 0, 0x12); + DsFsmInstrRrr(EmitR(0, ZERO, ZERO, rd, 0, 0x12), rd, ZERO, ZERO); } void MipsAssembler::Sb(Register rt, Register rs, uint16_t imm16) { - EmitI(0x28, rs, rt, imm16); + DsFsmInstrRrr(EmitI(0x28, rs, rt, imm16), ZERO, rt, rs); } void MipsAssembler::Sh(Register rt, Register rs, uint16_t imm16) { - EmitI(0x29, rs, rt, imm16); + DsFsmInstrRrr(EmitI(0x29, rs, rt, imm16), ZERO, rt, rs); } void MipsAssembler::Sw(Register rt, Register rs, uint16_t imm16) { - EmitI(0x2b, rs, rt, imm16); + DsFsmInstrRrr(EmitI(0x2b, rs, rt, imm16), ZERO, rt, rs); } void MipsAssembler::Swl(Register rt, Register rs, uint16_t imm16) { CHECK(!IsR6()); - EmitI(0x2a, rs, rt, imm16); + DsFsmInstrRrr(EmitI(0x2a, rs, rt, imm16), ZERO, rt, rs); } void MipsAssembler::Swr(Register rt, Register rs, uint16_t imm16) { CHECK(!IsR6()); - EmitI(0x2e, rs, rt, imm16); + DsFsmInstrRrr(EmitI(0x2e, rs, rt, imm16), ZERO, rt, rs); } void MipsAssembler::LlR2(Register rt, Register base, int16_t imm16) { CHECK(!IsR6()); - EmitI(0x30, base, rt, imm16); + DsFsmInstrRrr(EmitI(0x30, base, rt, imm16), rt, base, base); } void MipsAssembler::ScR2(Register rt, Register base, int16_t imm16) { CHECK(!IsR6()); - EmitI(0x38, base, rt, imm16); + DsFsmInstrRrr(EmitI(0x38, base, rt, imm16), rt, rt, base); } void MipsAssembler::LlR6(Register rt, Register base, int16_t imm9) { CHECK(IsR6()); CHECK(IsInt<9>(imm9)); - EmitI(0x1f, base, rt, ((imm9 & 0x1ff) << 7) | 0x36); + DsFsmInstrRrr(EmitI(0x1f, base, rt, ((imm9 & 0x1ff) << 7) | 0x36), rt, base, base); } void MipsAssembler::ScR6(Register rt, Register base, int16_t imm9) { CHECK(IsR6()); CHECK(IsInt<9>(imm9)); - EmitI(0x1f, base, rt, ((imm9 & 0x1ff) << 7) | 0x26); + DsFsmInstrRrr(EmitI(0x1f, base, rt, ((imm9 & 0x1ff) << 7) | 0x26), rt, rt, base); } void MipsAssembler::Slt(Register rd, Register rs, Register rt) { - EmitR(0, rs, rt, rd, 0, 0x2a); + DsFsmInstrRrr(EmitR(0, rs, rt, rd, 0, 0x2a), rd, rs, rt); } void MipsAssembler::Sltu(Register rd, Register rs, Register rt) { - EmitR(0, rs, rt, rd, 0, 0x2b); + DsFsmInstrRrr(EmitR(0, rs, rt, rd, 0, 0x2b), rd, rs, rt); } void MipsAssembler::Slti(Register rt, Register rs, uint16_t imm16) { - EmitI(0xa, rs, rt, imm16); + DsFsmInstrRrr(EmitI(0xa, rs, rt, imm16), rt, rs, rs); } void MipsAssembler::Sltiu(Register rt, Register rs, uint16_t imm16) { - EmitI(0xb, rs, rt, imm16); + DsFsmInstrRrr(EmitI(0xb, rs, rt, imm16), rt, rs, rs); } void MipsAssembler::B(uint16_t imm16) { - EmitI(0x4, static_cast<Register>(0), static_cast<Register>(0), imm16); + DsFsmInstrNop(EmitI(0x4, static_cast<Register>(0), static_cast<Register>(0), imm16)); } void MipsAssembler::Bal(uint16_t imm16) { - EmitI(0x1, static_cast<Register>(0), static_cast<Register>(0x11), imm16); + DsFsmInstrNop(EmitI(0x1, static_cast<Register>(0), static_cast<Register>(0x11), imm16)); } void MipsAssembler::Beq(Register rs, Register rt, uint16_t imm16) { - EmitI(0x4, rs, rt, imm16); + DsFsmInstrNop(EmitI(0x4, rs, rt, imm16)); } void MipsAssembler::Bne(Register rs, Register rt, uint16_t imm16) { - EmitI(0x5, rs, rt, imm16); + DsFsmInstrNop(EmitI(0x5, rs, rt, imm16)); } void MipsAssembler::Beqz(Register rt, uint16_t imm16) { @@ -565,19 +770,19 @@ void MipsAssembler::Bnez(Register rt, uint16_t imm16) { } void MipsAssembler::Bltz(Register rt, uint16_t imm16) { - EmitI(0x1, rt, static_cast<Register>(0), imm16); + DsFsmInstrNop(EmitI(0x1, rt, static_cast<Register>(0), imm16)); } void MipsAssembler::Bgez(Register rt, uint16_t imm16) { - EmitI(0x1, rt, static_cast<Register>(0x1), imm16); + DsFsmInstrNop(EmitI(0x1, rt, static_cast<Register>(0x1), imm16)); } void MipsAssembler::Blez(Register rt, uint16_t imm16) { - EmitI(0x6, rt, static_cast<Register>(0), imm16); + DsFsmInstrNop(EmitI(0x6, rt, static_cast<Register>(0), imm16)); } void MipsAssembler::Bgtz(Register rt, uint16_t imm16) { - EmitI(0x7, rt, static_cast<Register>(0), imm16); + DsFsmInstrNop(EmitI(0x7, rt, static_cast<Register>(0), imm16)); } void MipsAssembler::Bc1f(uint16_t imm16) { @@ -587,7 +792,7 @@ void MipsAssembler::Bc1f(uint16_t imm16) { void MipsAssembler::Bc1f(int cc, uint16_t imm16) { CHECK(!IsR6()); CHECK(IsUint<3>(cc)) << cc; - EmitI(0x11, static_cast<Register>(0x8), static_cast<Register>(cc << 2), imm16); + DsFsmInstrNop(EmitI(0x11, static_cast<Register>(0x8), static_cast<Register>(cc << 2), imm16)); } void MipsAssembler::Bc1t(uint16_t imm16) { @@ -597,19 +802,45 @@ void MipsAssembler::Bc1t(uint16_t imm16) { void MipsAssembler::Bc1t(int cc, uint16_t imm16) { CHECK(!IsR6()); CHECK(IsUint<3>(cc)) << cc; - EmitI(0x11, static_cast<Register>(0x8), static_cast<Register>((cc << 2) | 1), imm16); + DsFsmInstrNop(EmitI(0x11, + static_cast<Register>(0x8), + static_cast<Register>((cc << 2) | 1), + imm16)); } void MipsAssembler::J(uint32_t addr26) { - EmitI26(0x2, addr26); + DsFsmInstrNop(EmitI26(0x2, addr26)); } void MipsAssembler::Jal(uint32_t addr26) { - EmitI26(0x3, addr26); + DsFsmInstrNop(EmitI26(0x3, addr26)); } void MipsAssembler::Jalr(Register rd, Register rs) { - EmitR(0, rs, static_cast<Register>(0), rd, 0, 0x09); + uint32_t last_instruction = delay_slot_.instruction_; + bool exchange = (last_instruction != 0 && + (delay_slot_.gpr_outs_mask_ & (1u << rs)) == 0 && + ((delay_slot_.gpr_ins_mask_ | delay_slot_.gpr_outs_mask_) & (1u << rd)) == 0); + if (exchange) { + // The last instruction cannot be used in a different delay slot, + // do not commit the label before it (if any). + DsFsmDropLabel(); + } + DsFsmInstrNop(EmitR(0, rs, static_cast<Register>(0), rd, 0, 0x09)); + if (exchange) { + // Exchange the last two instructions in the assembler buffer. + size_t size = buffer_.Size(); + CHECK_GE(size, 2 * sizeof(uint32_t)); + size_t pos1 = size - 2 * sizeof(uint32_t); + size_t pos2 = size - sizeof(uint32_t); + uint32_t instr1 = buffer_.Load<uint32_t>(pos1); + uint32_t instr2 = buffer_.Load<uint32_t>(pos2); + CHECK_EQ(instr1, last_instruction); + buffer_.Store<uint32_t>(pos1, instr2); + buffer_.Store<uint32_t>(pos2, instr1); + } else if (reordering_) { + Nop(); + } } void MipsAssembler::Jalr(Register rs) { @@ -621,38 +852,38 @@ void MipsAssembler::Jr(Register rs) { } void MipsAssembler::Nal() { - EmitI(0x1, static_cast<Register>(0), static_cast<Register>(0x10), 0); + DsFsmInstrNop(EmitI(0x1, static_cast<Register>(0), static_cast<Register>(0x10), 0)); } void MipsAssembler::Auipc(Register rs, uint16_t imm16) { CHECK(IsR6()); - EmitI(0x3B, rs, static_cast<Register>(0x1E), imm16); + DsFsmInstrNop(EmitI(0x3B, rs, static_cast<Register>(0x1E), imm16)); } void MipsAssembler::Addiupc(Register rs, uint32_t imm19) { CHECK(IsR6()); CHECK(IsUint<19>(imm19)) << imm19; - EmitI21(0x3B, rs, imm19); + DsFsmInstrNop(EmitI21(0x3B, rs, imm19)); } void MipsAssembler::Bc(uint32_t imm26) { CHECK(IsR6()); - EmitI26(0x32, imm26); + DsFsmInstrNop(EmitI26(0x32, imm26)); } void MipsAssembler::Balc(uint32_t imm26) { CHECK(IsR6()); - EmitI26(0x3A, imm26); + DsFsmInstrNop(EmitI26(0x3A, imm26)); } void MipsAssembler::Jic(Register rt, uint16_t imm16) { CHECK(IsR6()); - EmitI(0x36, static_cast<Register>(0), rt, imm16); + DsFsmInstrNop(EmitI(0x36, static_cast<Register>(0), rt, imm16)); } void MipsAssembler::Jialc(Register rt, uint16_t imm16) { CHECK(IsR6()); - EmitI(0x3E, static_cast<Register>(0), rt, imm16); + DsFsmInstrNop(EmitI(0x3E, static_cast<Register>(0), rt, imm16)); } void MipsAssembler::Bltc(Register rs, Register rt, uint16_t imm16) { @@ -660,19 +891,19 @@ void MipsAssembler::Bltc(Register rs, Register rt, uint16_t imm16) { CHECK_NE(rs, ZERO); CHECK_NE(rt, ZERO); CHECK_NE(rs, rt); - EmitI(0x17, rs, rt, imm16); + DsFsmInstrNop(EmitI(0x17, rs, rt, imm16)); } void MipsAssembler::Bltzc(Register rt, uint16_t imm16) { CHECK(IsR6()); CHECK_NE(rt, ZERO); - EmitI(0x17, rt, rt, imm16); + DsFsmInstrNop(EmitI(0x17, rt, rt, imm16)); } void MipsAssembler::Bgtzc(Register rt, uint16_t imm16) { CHECK(IsR6()); CHECK_NE(rt, ZERO); - EmitI(0x17, static_cast<Register>(0), rt, imm16); + DsFsmInstrNop(EmitI(0x17, static_cast<Register>(0), rt, imm16)); } void MipsAssembler::Bgec(Register rs, Register rt, uint16_t imm16) { @@ -680,19 +911,19 @@ void MipsAssembler::Bgec(Register rs, Register rt, uint16_t imm16) { CHECK_NE(rs, ZERO); CHECK_NE(rt, ZERO); CHECK_NE(rs, rt); - EmitI(0x16, rs, rt, imm16); + DsFsmInstrNop(EmitI(0x16, rs, rt, imm16)); } void MipsAssembler::Bgezc(Register rt, uint16_t imm16) { CHECK(IsR6()); CHECK_NE(rt, ZERO); - EmitI(0x16, rt, rt, imm16); + DsFsmInstrNop(EmitI(0x16, rt, rt, imm16)); } void MipsAssembler::Blezc(Register rt, uint16_t imm16) { CHECK(IsR6()); CHECK_NE(rt, ZERO); - EmitI(0x16, static_cast<Register>(0), rt, imm16); + DsFsmInstrNop(EmitI(0x16, static_cast<Register>(0), rt, imm16)); } void MipsAssembler::Bltuc(Register rs, Register rt, uint16_t imm16) { @@ -700,7 +931,7 @@ void MipsAssembler::Bltuc(Register rs, Register rt, uint16_t imm16) { CHECK_NE(rs, ZERO); CHECK_NE(rt, ZERO); CHECK_NE(rs, rt); - EmitI(0x7, rs, rt, imm16); + DsFsmInstrNop(EmitI(0x7, rs, rt, imm16)); } void MipsAssembler::Bgeuc(Register rs, Register rt, uint16_t imm16) { @@ -708,7 +939,7 @@ void MipsAssembler::Bgeuc(Register rs, Register rt, uint16_t imm16) { CHECK_NE(rs, ZERO); CHECK_NE(rt, ZERO); CHECK_NE(rs, rt); - EmitI(0x6, rs, rt, imm16); + DsFsmInstrNop(EmitI(0x6, rs, rt, imm16)); } void MipsAssembler::Beqc(Register rs, Register rt, uint16_t imm16) { @@ -716,7 +947,7 @@ void MipsAssembler::Beqc(Register rs, Register rt, uint16_t imm16) { CHECK_NE(rs, ZERO); CHECK_NE(rt, ZERO); CHECK_NE(rs, rt); - EmitI(0x8, std::min(rs, rt), std::max(rs, rt), imm16); + DsFsmInstrNop(EmitI(0x8, std::min(rs, rt), std::max(rs, rt), imm16)); } void MipsAssembler::Bnec(Register rs, Register rt, uint16_t imm16) { @@ -724,29 +955,29 @@ void MipsAssembler::Bnec(Register rs, Register rt, uint16_t imm16) { CHECK_NE(rs, ZERO); CHECK_NE(rt, ZERO); CHECK_NE(rs, rt); - EmitI(0x18, std::min(rs, rt), std::max(rs, rt), imm16); + DsFsmInstrNop(EmitI(0x18, std::min(rs, rt), std::max(rs, rt), imm16)); } void MipsAssembler::Beqzc(Register rs, uint32_t imm21) { CHECK(IsR6()); CHECK_NE(rs, ZERO); - EmitI21(0x36, rs, imm21); + DsFsmInstrNop(EmitI21(0x36, rs, imm21)); } void MipsAssembler::Bnezc(Register rs, uint32_t imm21) { CHECK(IsR6()); CHECK_NE(rs, ZERO); - EmitI21(0x3E, rs, imm21); + DsFsmInstrNop(EmitI21(0x3E, rs, imm21)); } void MipsAssembler::Bc1eqz(FRegister ft, uint16_t imm16) { CHECK(IsR6()); - EmitFI(0x11, 0x9, ft, imm16); + DsFsmInstrNop(EmitFI(0x11, 0x9, ft, imm16)); } void MipsAssembler::Bc1nez(FRegister ft, uint16_t imm16) { CHECK(IsR6()); - EmitFI(0x11, 0xD, ft, imm16); + DsFsmInstrNop(EmitFI(0x11, 0xD, ft, imm16)); } void MipsAssembler::EmitBcondR2(BranchCondition cond, Register rs, Register rt, uint16_t imm16) { @@ -868,67 +1099,67 @@ void MipsAssembler::EmitBcondR6(BranchCondition cond, Register rs, Register rt, } void MipsAssembler::AddS(FRegister fd, FRegister fs, FRegister ft) { - EmitFR(0x11, 0x10, ft, fs, fd, 0x0); + DsFsmInstrFff(EmitFR(0x11, 0x10, ft, fs, fd, 0x0), fd, fs, ft); } void MipsAssembler::SubS(FRegister fd, FRegister fs, FRegister ft) { - EmitFR(0x11, 0x10, ft, fs, fd, 0x1); + DsFsmInstrFff(EmitFR(0x11, 0x10, ft, fs, fd, 0x1), fd, fs, ft); } void MipsAssembler::MulS(FRegister fd, FRegister fs, FRegister ft) { - EmitFR(0x11, 0x10, ft, fs, fd, 0x2); + DsFsmInstrFff(EmitFR(0x11, 0x10, ft, fs, fd, 0x2), fd, fs, ft); } void MipsAssembler::DivS(FRegister fd, FRegister fs, FRegister ft) { - EmitFR(0x11, 0x10, ft, fs, fd, 0x3); + DsFsmInstrFff(EmitFR(0x11, 0x10, ft, fs, fd, 0x3), fd, fs, ft); } void MipsAssembler::AddD(FRegister fd, FRegister fs, FRegister ft) { - EmitFR(0x11, 0x11, ft, fs, fd, 0x0); + DsFsmInstrFff(EmitFR(0x11, 0x11, ft, fs, fd, 0x0), fd, fs, ft); } void MipsAssembler::SubD(FRegister fd, FRegister fs, FRegister ft) { - EmitFR(0x11, 0x11, ft, fs, fd, 0x1); + DsFsmInstrFff(EmitFR(0x11, 0x11, ft, fs, fd, 0x1), fd, fs, ft); } void MipsAssembler::MulD(FRegister fd, FRegister fs, FRegister ft) { - EmitFR(0x11, 0x11, ft, fs, fd, 0x2); + DsFsmInstrFff(EmitFR(0x11, 0x11, ft, fs, fd, 0x2), fd, fs, ft); } void MipsAssembler::DivD(FRegister fd, FRegister fs, FRegister ft) { - EmitFR(0x11, 0x11, ft, fs, fd, 0x3); + DsFsmInstrFff(EmitFR(0x11, 0x11, ft, fs, fd, 0x3), fd, fs, ft); } void MipsAssembler::SqrtS(FRegister fd, FRegister fs) { - EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x4); + DsFsmInstrFff(EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x4), fd, fs, fs); } void MipsAssembler::SqrtD(FRegister fd, FRegister fs) { - EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0x4); + DsFsmInstrFff(EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0x4), fd, fs, fs); } void MipsAssembler::AbsS(FRegister fd, FRegister fs) { - EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x5); + DsFsmInstrFff(EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x5), fd, fs, fs); } void MipsAssembler::AbsD(FRegister fd, FRegister fs) { - EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0x5); + DsFsmInstrFff(EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0x5), fd, fs, fs); } void MipsAssembler::MovS(FRegister fd, FRegister fs) { - EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x6); + DsFsmInstrFff(EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x6), fd, fs, fs); } void MipsAssembler::MovD(FRegister fd, FRegister fs) { - EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0x6); + DsFsmInstrFff(EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0x6), fd, fs, fs); } void MipsAssembler::NegS(FRegister fd, FRegister fs) { - EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x7); + DsFsmInstrFff(EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x7), fd, fs, fs); } void MipsAssembler::NegD(FRegister fd, FRegister fs) { - EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0x7); + DsFsmInstrFff(EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0x7), fd, fs, fs); } void MipsAssembler::CunS(FRegister fs, FRegister ft) { @@ -938,7 +1169,7 @@ void MipsAssembler::CunS(FRegister fs, FRegister ft) { void MipsAssembler::CunS(int cc, FRegister fs, FRegister ft) { CHECK(!IsR6()); CHECK(IsUint<3>(cc)) << cc; - EmitFR(0x11, 0x10, ft, fs, static_cast<FRegister>(cc << 2), 0x31); + DsFsmInstrCff(EmitFR(0x11, 0x10, ft, fs, static_cast<FRegister>(cc << 2), 0x31), cc, fs, ft); } void MipsAssembler::CeqS(FRegister fs, FRegister ft) { @@ -948,7 +1179,7 @@ void MipsAssembler::CeqS(FRegister fs, FRegister ft) { void MipsAssembler::CeqS(int cc, FRegister fs, FRegister ft) { CHECK(!IsR6()); CHECK(IsUint<3>(cc)) << cc; - EmitFR(0x11, 0x10, ft, fs, static_cast<FRegister>(cc << 2), 0x32); + DsFsmInstrCff(EmitFR(0x11, 0x10, ft, fs, static_cast<FRegister>(cc << 2), 0x32), cc, fs, ft); } void MipsAssembler::CueqS(FRegister fs, FRegister ft) { @@ -958,7 +1189,7 @@ void MipsAssembler::CueqS(FRegister fs, FRegister ft) { void MipsAssembler::CueqS(int cc, FRegister fs, FRegister ft) { CHECK(!IsR6()); CHECK(IsUint<3>(cc)) << cc; - EmitFR(0x11, 0x10, ft, fs, static_cast<FRegister>(cc << 2), 0x33); + DsFsmInstrCff(EmitFR(0x11, 0x10, ft, fs, static_cast<FRegister>(cc << 2), 0x33), cc, fs, ft); } void MipsAssembler::ColtS(FRegister fs, FRegister ft) { @@ -968,7 +1199,7 @@ void MipsAssembler::ColtS(FRegister fs, FRegister ft) { void MipsAssembler::ColtS(int cc, FRegister fs, FRegister ft) { CHECK(!IsR6()); CHECK(IsUint<3>(cc)) << cc; - EmitFR(0x11, 0x10, ft, fs, static_cast<FRegister>(cc << 2), 0x34); + DsFsmInstrCff(EmitFR(0x11, 0x10, ft, fs, static_cast<FRegister>(cc << 2), 0x34), cc, fs, ft); } void MipsAssembler::CultS(FRegister fs, FRegister ft) { @@ -978,7 +1209,7 @@ void MipsAssembler::CultS(FRegister fs, FRegister ft) { void MipsAssembler::CultS(int cc, FRegister fs, FRegister ft) { CHECK(!IsR6()); CHECK(IsUint<3>(cc)) << cc; - EmitFR(0x11, 0x10, ft, fs, static_cast<FRegister>(cc << 2), 0x35); + DsFsmInstrCff(EmitFR(0x11, 0x10, ft, fs, static_cast<FRegister>(cc << 2), 0x35), cc, fs, ft); } void MipsAssembler::ColeS(FRegister fs, FRegister ft) { @@ -988,7 +1219,7 @@ void MipsAssembler::ColeS(FRegister fs, FRegister ft) { void MipsAssembler::ColeS(int cc, FRegister fs, FRegister ft) { CHECK(!IsR6()); CHECK(IsUint<3>(cc)) << cc; - EmitFR(0x11, 0x10, ft, fs, static_cast<FRegister>(cc << 2), 0x36); + DsFsmInstrCff(EmitFR(0x11, 0x10, ft, fs, static_cast<FRegister>(cc << 2), 0x36), cc, fs, ft); } void MipsAssembler::CuleS(FRegister fs, FRegister ft) { @@ -998,7 +1229,7 @@ void MipsAssembler::CuleS(FRegister fs, FRegister ft) { void MipsAssembler::CuleS(int cc, FRegister fs, FRegister ft) { CHECK(!IsR6()); CHECK(IsUint<3>(cc)) << cc; - EmitFR(0x11, 0x10, ft, fs, static_cast<FRegister>(cc << 2), 0x37); + DsFsmInstrCff(EmitFR(0x11, 0x10, ft, fs, static_cast<FRegister>(cc << 2), 0x37), cc, fs, ft); } void MipsAssembler::CunD(FRegister fs, FRegister ft) { @@ -1008,7 +1239,7 @@ void MipsAssembler::CunD(FRegister fs, FRegister ft) { void MipsAssembler::CunD(int cc, FRegister fs, FRegister ft) { CHECK(!IsR6()); CHECK(IsUint<3>(cc)) << cc; - EmitFR(0x11, 0x11, ft, fs, static_cast<FRegister>(cc << 2), 0x31); + DsFsmInstrCff(EmitFR(0x11, 0x11, ft, fs, static_cast<FRegister>(cc << 2), 0x31), cc, fs, ft); } void MipsAssembler::CeqD(FRegister fs, FRegister ft) { @@ -1018,7 +1249,7 @@ void MipsAssembler::CeqD(FRegister fs, FRegister ft) { void MipsAssembler::CeqD(int cc, FRegister fs, FRegister ft) { CHECK(!IsR6()); CHECK(IsUint<3>(cc)) << cc; - EmitFR(0x11, 0x11, ft, fs, static_cast<FRegister>(cc << 2), 0x32); + DsFsmInstrCff(EmitFR(0x11, 0x11, ft, fs, static_cast<FRegister>(cc << 2), 0x32), cc, fs, ft); } void MipsAssembler::CueqD(FRegister fs, FRegister ft) { @@ -1028,7 +1259,7 @@ void MipsAssembler::CueqD(FRegister fs, FRegister ft) { void MipsAssembler::CueqD(int cc, FRegister fs, FRegister ft) { CHECK(!IsR6()); CHECK(IsUint<3>(cc)) << cc; - EmitFR(0x11, 0x11, ft, fs, static_cast<FRegister>(cc << 2), 0x33); + DsFsmInstrCff(EmitFR(0x11, 0x11, ft, fs, static_cast<FRegister>(cc << 2), 0x33), cc, fs, ft); } void MipsAssembler::ColtD(FRegister fs, FRegister ft) { @@ -1038,7 +1269,7 @@ void MipsAssembler::ColtD(FRegister fs, FRegister ft) { void MipsAssembler::ColtD(int cc, FRegister fs, FRegister ft) { CHECK(!IsR6()); CHECK(IsUint<3>(cc)) << cc; - EmitFR(0x11, 0x11, ft, fs, static_cast<FRegister>(cc << 2), 0x34); + DsFsmInstrCff(EmitFR(0x11, 0x11, ft, fs, static_cast<FRegister>(cc << 2), 0x34), cc, fs, ft); } void MipsAssembler::CultD(FRegister fs, FRegister ft) { @@ -1048,7 +1279,7 @@ void MipsAssembler::CultD(FRegister fs, FRegister ft) { void MipsAssembler::CultD(int cc, FRegister fs, FRegister ft) { CHECK(!IsR6()); CHECK(IsUint<3>(cc)) << cc; - EmitFR(0x11, 0x11, ft, fs, static_cast<FRegister>(cc << 2), 0x35); + DsFsmInstrCff(EmitFR(0x11, 0x11, ft, fs, static_cast<FRegister>(cc << 2), 0x35), cc, fs, ft); } void MipsAssembler::ColeD(FRegister fs, FRegister ft) { @@ -1058,7 +1289,7 @@ void MipsAssembler::ColeD(FRegister fs, FRegister ft) { void MipsAssembler::ColeD(int cc, FRegister fs, FRegister ft) { CHECK(!IsR6()); CHECK(IsUint<3>(cc)) << cc; - EmitFR(0x11, 0x11, ft, fs, static_cast<FRegister>(cc << 2), 0x36); + DsFsmInstrCff(EmitFR(0x11, 0x11, ft, fs, static_cast<FRegister>(cc << 2), 0x36), cc, fs, ft); } void MipsAssembler::CuleD(FRegister fs, FRegister ft) { @@ -1068,247 +1299,261 @@ void MipsAssembler::CuleD(FRegister fs, FRegister ft) { void MipsAssembler::CuleD(int cc, FRegister fs, FRegister ft) { CHECK(!IsR6()); CHECK(IsUint<3>(cc)) << cc; - EmitFR(0x11, 0x11, ft, fs, static_cast<FRegister>(cc << 2), 0x37); + DsFsmInstrCff(EmitFR(0x11, 0x11, ft, fs, static_cast<FRegister>(cc << 2), 0x37), cc, fs, ft); } void MipsAssembler::CmpUnS(FRegister fd, FRegister fs, FRegister ft) { CHECK(IsR6()); - EmitFR(0x11, 0x14, ft, fs, fd, 0x01); + DsFsmInstrFff(EmitFR(0x11, 0x14, ft, fs, fd, 0x01), fd, fs, ft); } void MipsAssembler::CmpEqS(FRegister fd, FRegister fs, FRegister ft) { CHECK(IsR6()); - EmitFR(0x11, 0x14, ft, fs, fd, 0x02); + DsFsmInstrFff(EmitFR(0x11, 0x14, ft, fs, fd, 0x02), fd, fs, ft); } void MipsAssembler::CmpUeqS(FRegister fd, FRegister fs, FRegister ft) { CHECK(IsR6()); - EmitFR(0x11, 0x14, ft, fs, fd, 0x03); + DsFsmInstrFff(EmitFR(0x11, 0x14, ft, fs, fd, 0x03), fd, fs, ft); } void MipsAssembler::CmpLtS(FRegister fd, FRegister fs, FRegister ft) { CHECK(IsR6()); - EmitFR(0x11, 0x14, ft, fs, fd, 0x04); + DsFsmInstrFff(EmitFR(0x11, 0x14, ft, fs, fd, 0x04), fd, fs, ft); } void MipsAssembler::CmpUltS(FRegister fd, FRegister fs, FRegister ft) { CHECK(IsR6()); - EmitFR(0x11, 0x14, ft, fs, fd, 0x05); + DsFsmInstrFff(EmitFR(0x11, 0x14, ft, fs, fd, 0x05), fd, fs, ft); } void MipsAssembler::CmpLeS(FRegister fd, FRegister fs, FRegister ft) { CHECK(IsR6()); - EmitFR(0x11, 0x14, ft, fs, fd, 0x06); + DsFsmInstrFff(EmitFR(0x11, 0x14, ft, fs, fd, 0x06), fd, fs, ft); } void MipsAssembler::CmpUleS(FRegister fd, FRegister fs, FRegister ft) { CHECK(IsR6()); - EmitFR(0x11, 0x14, ft, fs, fd, 0x07); + DsFsmInstrFff(EmitFR(0x11, 0x14, ft, fs, fd, 0x07), fd, fs, ft); } void MipsAssembler::CmpOrS(FRegister fd, FRegister fs, FRegister ft) { CHECK(IsR6()); - EmitFR(0x11, 0x14, ft, fs, fd, 0x11); + DsFsmInstrFff(EmitFR(0x11, 0x14, ft, fs, fd, 0x11), fd, fs, ft); } void MipsAssembler::CmpUneS(FRegister fd, FRegister fs, FRegister ft) { CHECK(IsR6()); - EmitFR(0x11, 0x14, ft, fs, fd, 0x12); + DsFsmInstrFff(EmitFR(0x11, 0x14, ft, fs, fd, 0x12), fd, fs, ft); } void MipsAssembler::CmpNeS(FRegister fd, FRegister fs, FRegister ft) { CHECK(IsR6()); - EmitFR(0x11, 0x14, ft, fs, fd, 0x13); + DsFsmInstrFff(EmitFR(0x11, 0x14, ft, fs, fd, 0x13), fd, fs, ft); } void MipsAssembler::CmpUnD(FRegister fd, FRegister fs, FRegister ft) { CHECK(IsR6()); - EmitFR(0x11, 0x15, ft, fs, fd, 0x01); + DsFsmInstrFff(EmitFR(0x11, 0x15, ft, fs, fd, 0x01), fd, fs, ft); } void MipsAssembler::CmpEqD(FRegister fd, FRegister fs, FRegister ft) { CHECK(IsR6()); - EmitFR(0x11, 0x15, ft, fs, fd, 0x02); + DsFsmInstrFff(EmitFR(0x11, 0x15, ft, fs, fd, 0x02), fd, fs, ft); } void MipsAssembler::CmpUeqD(FRegister fd, FRegister fs, FRegister ft) { CHECK(IsR6()); - EmitFR(0x11, 0x15, ft, fs, fd, 0x03); + DsFsmInstrFff(EmitFR(0x11, 0x15, ft, fs, fd, 0x03), fd, fs, ft); } void MipsAssembler::CmpLtD(FRegister fd, FRegister fs, FRegister ft) { CHECK(IsR6()); - EmitFR(0x11, 0x15, ft, fs, fd, 0x04); + DsFsmInstrFff(EmitFR(0x11, 0x15, ft, fs, fd, 0x04), fd, fs, ft); } void MipsAssembler::CmpUltD(FRegister fd, FRegister fs, FRegister ft) { CHECK(IsR6()); - EmitFR(0x11, 0x15, ft, fs, fd, 0x05); + DsFsmInstrFff(EmitFR(0x11, 0x15, ft, fs, fd, 0x05), fd, fs, ft); } void MipsAssembler::CmpLeD(FRegister fd, FRegister fs, FRegister ft) { CHECK(IsR6()); - EmitFR(0x11, 0x15, ft, fs, fd, 0x06); + DsFsmInstrFff(EmitFR(0x11, 0x15, ft, fs, fd, 0x06), fd, fs, ft); } void MipsAssembler::CmpUleD(FRegister fd, FRegister fs, FRegister ft) { CHECK(IsR6()); - EmitFR(0x11, 0x15, ft, fs, fd, 0x07); + DsFsmInstrFff(EmitFR(0x11, 0x15, ft, fs, fd, 0x07), fd, fs, ft); } void MipsAssembler::CmpOrD(FRegister fd, FRegister fs, FRegister ft) { CHECK(IsR6()); - EmitFR(0x11, 0x15, ft, fs, fd, 0x11); + DsFsmInstrFff(EmitFR(0x11, 0x15, ft, fs, fd, 0x11), fd, fs, ft); } void MipsAssembler::CmpUneD(FRegister fd, FRegister fs, FRegister ft) { CHECK(IsR6()); - EmitFR(0x11, 0x15, ft, fs, fd, 0x12); + DsFsmInstrFff(EmitFR(0x11, 0x15, ft, fs, fd, 0x12), fd, fs, ft); } void MipsAssembler::CmpNeD(FRegister fd, FRegister fs, FRegister ft) { CHECK(IsR6()); - EmitFR(0x11, 0x15, ft, fs, fd, 0x13); + DsFsmInstrFff(EmitFR(0x11, 0x15, ft, fs, fd, 0x13), fd, fs, ft); } void MipsAssembler::Movf(Register rd, Register rs, int cc) { CHECK(!IsR6()); CHECK(IsUint<3>(cc)) << cc; - EmitR(0, rs, static_cast<Register>(cc << 2), rd, 0, 0x01); + DsFsmInstrRrrc(EmitR(0, rs, static_cast<Register>(cc << 2), rd, 0, 0x01), rd, rs, cc); } void MipsAssembler::Movt(Register rd, Register rs, int cc) { CHECK(!IsR6()); CHECK(IsUint<3>(cc)) << cc; - EmitR(0, rs, static_cast<Register>((cc << 2) | 1), rd, 0, 0x01); + DsFsmInstrRrrc(EmitR(0, rs, static_cast<Register>((cc << 2) | 1), rd, 0, 0x01), rd, rs, cc); } void MipsAssembler::MovfS(FRegister fd, FRegister fs, int cc) { CHECK(!IsR6()); CHECK(IsUint<3>(cc)) << cc; - EmitFR(0x11, 0x10, static_cast<FRegister>(cc << 2), fs, fd, 0x11); + DsFsmInstrFffc(EmitFR(0x11, 0x10, static_cast<FRegister>(cc << 2), fs, fd, 0x11), fd, fs, cc); } void MipsAssembler::MovfD(FRegister fd, FRegister fs, int cc) { CHECK(!IsR6()); CHECK(IsUint<3>(cc)) << cc; - EmitFR(0x11, 0x11, static_cast<FRegister>(cc << 2), fs, fd, 0x11); + DsFsmInstrFffc(EmitFR(0x11, 0x11, static_cast<FRegister>(cc << 2), fs, fd, 0x11), fd, fs, cc); } void MipsAssembler::MovtS(FRegister fd, FRegister fs, int cc) { CHECK(!IsR6()); CHECK(IsUint<3>(cc)) << cc; - EmitFR(0x11, 0x10, static_cast<FRegister>((cc << 2) | 1), fs, fd, 0x11); + DsFsmInstrFffc(EmitFR(0x11, 0x10, static_cast<FRegister>((cc << 2) | 1), fs, fd, 0x11), + fd, + fs, + cc); } void MipsAssembler::MovtD(FRegister fd, FRegister fs, int cc) { CHECK(!IsR6()); CHECK(IsUint<3>(cc)) << cc; - EmitFR(0x11, 0x11, static_cast<FRegister>((cc << 2) | 1), fs, fd, 0x11); + DsFsmInstrFffc(EmitFR(0x11, 0x11, static_cast<FRegister>((cc << 2) | 1), fs, fd, 0x11), + fd, + fs, + cc); } void MipsAssembler::SelS(FRegister fd, FRegister fs, FRegister ft) { CHECK(IsR6()); - EmitFR(0x11, 0x10, ft, fs, fd, 0x10); + DsFsmInstrFfff(EmitFR(0x11, 0x10, ft, fs, fd, 0x10), fd, fs, ft); } void MipsAssembler::SelD(FRegister fd, FRegister fs, FRegister ft) { CHECK(IsR6()); - EmitFR(0x11, 0x11, ft, fs, fd, 0x10); + DsFsmInstrFfff(EmitFR(0x11, 0x11, ft, fs, fd, 0x10), fd, fs, ft); } void MipsAssembler::ClassS(FRegister fd, FRegister fs) { CHECK(IsR6()); - EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x1b); + DsFsmInstrFff(EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x1b), fd, fs, fs); } void MipsAssembler::ClassD(FRegister fd, FRegister fs) { CHECK(IsR6()); - EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0x1b); + DsFsmInstrFff(EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0x1b), fd, fs, fs); } void MipsAssembler::MinS(FRegister fd, FRegister fs, FRegister ft) { CHECK(IsR6()); - EmitFR(0x11, 0x10, ft, fs, fd, 0x1c); + DsFsmInstrFff(EmitFR(0x11, 0x10, ft, fs, fd, 0x1c), fd, fs, ft); } void MipsAssembler::MinD(FRegister fd, FRegister fs, FRegister ft) { CHECK(IsR6()); - EmitFR(0x11, 0x11, ft, fs, fd, 0x1c); + DsFsmInstrFff(EmitFR(0x11, 0x11, ft, fs, fd, 0x1c), fd, fs, ft); } void MipsAssembler::MaxS(FRegister fd, FRegister fs, FRegister ft) { CHECK(IsR6()); - EmitFR(0x11, 0x10, ft, fs, fd, 0x1e); + DsFsmInstrFff(EmitFR(0x11, 0x10, ft, fs, fd, 0x1e), fd, fs, ft); } void MipsAssembler::MaxD(FRegister fd, FRegister fs, FRegister ft) { CHECK(IsR6()); - EmitFR(0x11, 0x11, ft, fs, fd, 0x1e); + DsFsmInstrFff(EmitFR(0x11, 0x11, ft, fs, fd, 0x1e), fd, fs, ft); } void MipsAssembler::TruncLS(FRegister fd, FRegister fs) { - EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x09); + DsFsmInstrFff(EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x09), fd, fs, fs); } void MipsAssembler::TruncLD(FRegister fd, FRegister fs) { - EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0x09); + DsFsmInstrFff(EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0x09), fd, fs, fs); } void MipsAssembler::TruncWS(FRegister fd, FRegister fs) { - EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x0D); + DsFsmInstrFff(EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x0D), fd, fs, fs); } void MipsAssembler::TruncWD(FRegister fd, FRegister fs) { - EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0x0D); + DsFsmInstrFff(EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0x0D), fd, fs, fs); } void MipsAssembler::Cvtsw(FRegister fd, FRegister fs) { - EmitFR(0x11, 0x14, static_cast<FRegister>(0), fs, fd, 0x20); + DsFsmInstrFff(EmitFR(0x11, 0x14, static_cast<FRegister>(0), fs, fd, 0x20), fd, fs, fs); } void MipsAssembler::Cvtdw(FRegister fd, FRegister fs) { - EmitFR(0x11, 0x14, static_cast<FRegister>(0), fs, fd, 0x21); + DsFsmInstrFff(EmitFR(0x11, 0x14, static_cast<FRegister>(0), fs, fd, 0x21), fd, fs, fs); } void MipsAssembler::Cvtsd(FRegister fd, FRegister fs) { - EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0x20); + DsFsmInstrFff(EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0x20), fd, fs, fs); } void MipsAssembler::Cvtds(FRegister fd, FRegister fs) { - EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x21); + DsFsmInstrFff(EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x21), fd, fs, fs); } void MipsAssembler::Cvtsl(FRegister fd, FRegister fs) { - EmitFR(0x11, 0x15, static_cast<FRegister>(0), fs, fd, 0x20); + DsFsmInstrFff(EmitFR(0x11, 0x15, static_cast<FRegister>(0), fs, fd, 0x20), fd, fs, fs); } void MipsAssembler::Cvtdl(FRegister fd, FRegister fs) { - EmitFR(0x11, 0x15, static_cast<FRegister>(0), fs, fd, 0x21); + DsFsmInstrFff(EmitFR(0x11, 0x15, static_cast<FRegister>(0), fs, fd, 0x21), fd, fs, fs); } void MipsAssembler::FloorWS(FRegister fd, FRegister fs) { - EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0xf); + DsFsmInstrFff(EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0xf), fd, fs, fs); } void MipsAssembler::FloorWD(FRegister fd, FRegister fs) { - EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0xf); + DsFsmInstrFff(EmitFR(0x11, 0x11, static_cast<FRegister>(0), fs, fd, 0xf), fd, fs, fs); } void MipsAssembler::Mfc1(Register rt, FRegister fs) { - EmitFR(0x11, 0x00, static_cast<FRegister>(rt), fs, static_cast<FRegister>(0), 0x0); + DsFsmInstrRf(EmitFR(0x11, 0x00, static_cast<FRegister>(rt), fs, static_cast<FRegister>(0), 0x0), + rt, + fs); } void MipsAssembler::Mtc1(Register rt, FRegister fs) { - EmitFR(0x11, 0x04, static_cast<FRegister>(rt), fs, static_cast<FRegister>(0), 0x0); + DsFsmInstrFr(EmitFR(0x11, 0x04, static_cast<FRegister>(rt), fs, static_cast<FRegister>(0), 0x0), + fs, + rt); } void MipsAssembler::Mfhc1(Register rt, FRegister fs) { - EmitFR(0x11, 0x03, static_cast<FRegister>(rt), fs, static_cast<FRegister>(0), 0x0); + DsFsmInstrRf(EmitFR(0x11, 0x03, static_cast<FRegister>(rt), fs, static_cast<FRegister>(0), 0x0), + rt, + fs); } void MipsAssembler::Mthc1(Register rt, FRegister fs) { - EmitFR(0x11, 0x07, static_cast<FRegister>(rt), fs, static_cast<FRegister>(0), 0x0); + DsFsmInstrFr(EmitFR(0x11, 0x07, static_cast<FRegister>(rt), fs, static_cast<FRegister>(0), 0x0), + fs, + rt); } void MipsAssembler::MoveFromFpuHigh(Register rt, FRegister fs) { @@ -1330,28 +1575,33 @@ void MipsAssembler::MoveToFpuHigh(Register rt, FRegister fs) { } void MipsAssembler::Lwc1(FRegister ft, Register rs, uint16_t imm16) { - EmitI(0x31, rs, static_cast<Register>(ft), imm16); + DsFsmInstrFr(EmitI(0x31, rs, static_cast<Register>(ft), imm16), ft, rs); } void MipsAssembler::Ldc1(FRegister ft, Register rs, uint16_t imm16) { - EmitI(0x35, rs, static_cast<Register>(ft), imm16); + DsFsmInstrFr(EmitI(0x35, rs, static_cast<Register>(ft), imm16), ft, rs); } void MipsAssembler::Swc1(FRegister ft, Register rs, uint16_t imm16) { - EmitI(0x39, rs, static_cast<Register>(ft), imm16); + DsFsmInstrFR(EmitI(0x39, rs, static_cast<Register>(ft), imm16), ft, rs); } void MipsAssembler::Sdc1(FRegister ft, Register rs, uint16_t imm16) { - EmitI(0x3d, rs, static_cast<Register>(ft), imm16); + DsFsmInstrFR(EmitI(0x3d, rs, static_cast<Register>(ft), imm16), ft, rs); } void MipsAssembler::Break() { - EmitR(0, static_cast<Register>(0), static_cast<Register>(0), - static_cast<Register>(0), 0, 0xD); + DsFsmInstrNop(EmitR(0, ZERO, ZERO, ZERO, 0, 0xD)); } void MipsAssembler::Nop() { - EmitR(0x0, static_cast<Register>(0), static_cast<Register>(0), static_cast<Register>(0), 0, 0x0); + DsFsmInstrNop(EmitR(0x0, ZERO, ZERO, ZERO, 0, 0x0)); +} + +void MipsAssembler::NopIfNoReordering() { + if (!reordering_) { + Nop(); + } } void MipsAssembler::Move(Register rd, Register rs) { @@ -1377,9 +1627,11 @@ void MipsAssembler::Pop(Register rd) { } void MipsAssembler::PopAndReturn(Register rd, Register rt) { + bool reordering = SetReorder(false); Lw(rd, SP, 0); Jr(rt); - DecreaseFrameSize(kMipsWordSize); + DecreaseFrameSize(kMipsWordSize); // Single instruction in delay slot. + SetReorder(reordering); } void MipsAssembler::LoadConst32(Register rd, int32_t value) { @@ -1550,7 +1802,8 @@ MipsAssembler::Branch::Branch(bool is_r6, uint32_t location, uint32_t target, bo target_(target), lhs_reg_(0), rhs_reg_(0), - condition_(kUncond) { + condition_(kUncond), + delayed_instruction_(kUnfilledDelaySlot) { InitializeType(is_call, /* is_literal */ false, is_r6); } @@ -1565,7 +1818,8 @@ MipsAssembler::Branch::Branch(bool is_r6, target_(target), lhs_reg_(lhs_reg), rhs_reg_(rhs_reg), - condition_(condition) { + condition_(condition), + delayed_instruction_(kUnfilledDelaySlot) { CHECK_NE(condition, kUncond); switch (condition) { case kCondLT: @@ -1617,7 +1871,8 @@ MipsAssembler::Branch::Branch(bool is_r6, uint32_t location, Register dest_reg, target_(kUnresolved), lhs_reg_(dest_reg), rhs_reg_(base_reg), - condition_(kUncond) { + condition_(kUncond), + delayed_instruction_(kUnfilledDelaySlot) { CHECK_NE(dest_reg, ZERO); if (is_r6) { CHECK_EQ(base_reg, ZERO); @@ -1696,12 +1951,38 @@ uint32_t MipsAssembler::Branch::GetOldLocation() const { return old_location_; } +uint32_t MipsAssembler::Branch::GetPrecedingInstructionLength(Type type) const { + // Short branches with delay slots always consist of two instructions, the branch + // and the delay slot, irrespective of whether the delay slot is filled with a + // useful instruction or not. + // Long composite branches may have a length longer by one instruction than + // specified in branch_info_[].length. This happens when an instruction is taken + // to fill the short branch delay slot, but the branch eventually becomes long + // and formally has no delay slot to fill. This instruction is placed at the + // beginning of the long composite branch and this needs to be accounted for in + // the branch length and the location of the offset encoded in the branch. + switch (type) { + case kLongUncondBranch: + case kLongCondBranch: + case kLongCall: + case kR6LongCondBranch: + return (delayed_instruction_ != kUnfilledDelaySlot && + delayed_instruction_ != kUnfillableDelaySlot) ? 1 : 0; + default: + return 0; + } +} + +uint32_t MipsAssembler::Branch::GetPrecedingInstructionSize(Type type) const { + return GetPrecedingInstructionLength(type) * sizeof(uint32_t); +} + uint32_t MipsAssembler::Branch::GetLength() const { - return branch_info_[type_].length; + return GetPrecedingInstructionLength(type_) + branch_info_[type_].length; } uint32_t MipsAssembler::Branch::GetOldLength() const { - return branch_info_[old_type_].length; + return GetPrecedingInstructionLength(old_type_) + branch_info_[old_type_].length; } uint32_t MipsAssembler::Branch::GetSize() const { @@ -1883,7 +2164,8 @@ uint32_t MipsAssembler::Branch::PromoteIfNeeded(uint32_t location, uint32_t max_ } uint32_t MipsAssembler::Branch::GetOffsetLocation() const { - return location_ + branch_info_[type_].instr_offset * sizeof(uint32_t); + return location_ + GetPrecedingInstructionSize(type_) + + branch_info_[type_].instr_offset * sizeof(uint32_t); } uint32_t MipsAssembler::GetBranchOrPcRelBaseForEncoding(const MipsAssembler::Branch* branch) const { @@ -1925,6 +2207,9 @@ void MipsAssembler::Bind(MipsLabel* label) { CHECK(!label->IsBound()); uint32_t bound_pc = buffer_.Size(); + // Make the delay slot FSM aware of the new label. + DsFsmLabel(); + // Walk the list of branches referring to and preceding this label. // Store the previously unknown target addresses in them. while (label->IsLinked()) { @@ -1997,11 +2282,15 @@ uint32_t MipsAssembler::GetPcRelBaseLabelLocation() const { void MipsAssembler::FinalizeLabeledBranch(MipsLabel* label) { uint32_t length = branches_.back().GetLength(); + // Commit the last branch target label (if any). + DsFsmCommitLabel(); if (!label->IsBound()) { // Branch forward (to a following label), distance is unknown. // The first branch forward will contain 0, serving as the terminator of // the list of forward-reaching branches. Emit(label->position_); + // Nothing for the delay slot (yet). + DsFsmInstrNop(0); length--; // Now make the label object point to this branch // (this forms a linked list of branches preceding this label). @@ -2014,9 +2303,139 @@ void MipsAssembler::FinalizeLabeledBranch(MipsLabel* label) { } } +bool MipsAssembler::Branch::CanHaveDelayedInstruction(const DelaySlot& delay_slot) const { + if (delay_slot.instruction_ == 0) { + // NOP or no instruction for the delay slot. + return false; + } + switch (type_) { + // R2 unconditional branches. + case kUncondBranch: + case kLongUncondBranch: + // There are no register interdependencies. + return true; + + // R2 calls. + case kCall: + case kLongCall: + // Instructions depending on or modifying RA should not be moved into delay slots + // of branches modifying RA. + return ((delay_slot.gpr_ins_mask_ | delay_slot.gpr_outs_mask_) & (1u << RA)) == 0; + + // R2 conditional branches. + case kCondBranch: + case kLongCondBranch: + switch (condition_) { + // Branches with one GPR source. + case kCondLTZ: + case kCondGEZ: + case kCondLEZ: + case kCondGTZ: + case kCondEQZ: + case kCondNEZ: + return (delay_slot.gpr_outs_mask_ & (1u << lhs_reg_)) == 0; + + // Branches with two GPR sources. + case kCondEQ: + case kCondNE: + return (delay_slot.gpr_outs_mask_ & ((1u << lhs_reg_) | (1u << rhs_reg_))) == 0; + + // Branches with one FPU condition code source. + case kCondF: + case kCondT: + return (delay_slot.cc_outs_mask_ & (1u << lhs_reg_)) == 0; + + default: + // We don't support synthetic R2 branches (preceded with slt[u]) at this level + // (R2 doesn't have branches to compare 2 registers using <, <=, >=, >). + LOG(FATAL) << "Unexpected branch condition " << condition_; + UNREACHABLE(); + } + + // R6 unconditional branches. + case kR6UncondBranch: + case kR6LongUncondBranch: + // R6 calls. + case kR6Call: + case kR6LongCall: + // There are no delay slots. + return false; + + // R6 conditional branches. + case kR6CondBranch: + case kR6LongCondBranch: + switch (condition_) { + // Branches with one FPU register source. + case kCondF: + case kCondT: + return (delay_slot.fpr_outs_mask_ & (1u << lhs_reg_)) == 0; + // Others have a forbidden slot instead of a delay slot. + default: + return false; + } + + // Literals. + default: + LOG(FATAL) << "Unexpected branch type " << type_; + UNREACHABLE(); + } +} + +uint32_t MipsAssembler::Branch::GetDelayedInstruction() const { + return delayed_instruction_; +} + +void MipsAssembler::Branch::SetDelayedInstruction(uint32_t instruction) { + CHECK_NE(instruction, kUnfilledDelaySlot); + CHECK_EQ(delayed_instruction_, kUnfilledDelaySlot); + delayed_instruction_ = instruction; +} + +void MipsAssembler::Branch::DecrementLocations() { + // We first create a branch object, which gets its type and locations initialized, + // and then we check if the branch can actually have the preceding instruction moved + // into its delay slot. If it can, the branch locations need to be decremented. + // + // We could make the check before creating the branch object and avoid the location + // adjustment, but the check is cleaner when performed on an initialized branch + // object. + // + // If the branch is backwards (to a previously bound label), reducing the locations + // cannot cause a short branch to exceed its offset range because the offset reduces. + // And this is not at all a problem for a long branch backwards. + // + // If the branch is forward (not linked to any label yet), reducing the locations + // is harmless. The branch will be promoted to long if needed when the target is known. + CHECK_EQ(location_, old_location_); + CHECK_GE(old_location_, sizeof(uint32_t)); + old_location_ -= sizeof(uint32_t); + location_ = old_location_; +} + +void MipsAssembler::MoveInstructionToDelaySlot(Branch& branch) { + if (branch.CanHaveDelayedInstruction(delay_slot_)) { + // The last instruction cannot be used in a different delay slot, + // do not commit the label before it (if any). + DsFsmDropLabel(); + // Remove the last emitted instruction. + size_t size = buffer_.Size(); + CHECK_GE(size, sizeof(uint32_t)); + size -= sizeof(uint32_t); + CHECK_EQ(buffer_.Load<uint32_t>(size), delay_slot_.instruction_); + buffer_.Resize(size); + // Attach it to the branch and adjust the branch locations. + branch.DecrementLocations(); + branch.SetDelayedInstruction(delay_slot_.instruction_); + } else if (!reordering_ && branch.GetType() == Branch::kUncondBranch) { + // If reordefing is disabled, prevent absorption of the target instruction. + branch.SetDelayedInstruction(Branch::kUnfillableDelaySlot); + } +} + void MipsAssembler::Buncond(MipsLabel* label) { uint32_t target = label->IsBound() ? GetLabelLocation(label) : Branch::kUnresolved; branches_.emplace_back(IsR6(), buffer_.Size(), target, /* is_call */ false); + MoveInstructionToDelaySlot(branches_.back()); FinalizeLabeledBranch(label); } @@ -2027,12 +2446,14 @@ void MipsAssembler::Bcond(MipsLabel* label, BranchCondition condition, Register } uint32_t target = label->IsBound() ? GetLabelLocation(label) : Branch::kUnresolved; branches_.emplace_back(IsR6(), buffer_.Size(), target, condition, lhs, rhs); + MoveInstructionToDelaySlot(branches_.back()); FinalizeLabeledBranch(label); } void MipsAssembler::Call(MipsLabel* label) { uint32_t target = label->IsBound() ? GetLabelLocation(label) : Branch::kUnresolved; branches_.emplace_back(IsR6(), buffer_.Size(), target, /* is_call */ true); + MoveInstructionToDelaySlot(branches_.back()); FinalizeLabeledBranch(label); } @@ -2104,6 +2525,7 @@ void MipsAssembler::PromoteBranches() { uint32_t end = old_size; for (size_t i = branch_count; i > 0; ) { Branch& branch = branches_[--i]; + CHECK_GE(end, branch.GetOldEndLocation()); uint32_t size = end - branch.GetOldEndLocation(); buffer_.Move(branch.GetEndLocation(), branch.GetOldEndLocation(), size); end = branch.GetOldLocation(); @@ -2148,26 +2570,53 @@ void MipsAssembler::EmitBranch(MipsAssembler::Branch* branch) { BranchCondition condition = branch->GetCondition(); Register lhs = branch->GetLeftRegister(); Register rhs = branch->GetRightRegister(); + uint32_t delayed_instruction = branch->GetDelayedInstruction(); switch (branch->GetType()) { // R2 short branches. case Branch::kUncondBranch: + if (delayed_instruction == Branch::kUnfillableDelaySlot) { + // The branch was created when reordering was disabled, do not absorb the target + // instruction. + delayed_instruction = 0; // NOP. + } else if (delayed_instruction == Branch::kUnfilledDelaySlot) { + // Try to absorb the target instruction into the delay slot. + delayed_instruction = 0; // NOP. + // Incrementing the signed 16-bit offset past the target instruction must not + // cause overflow into the negative subrange, check for the max offset. + if (offset != 0x7FFF) { + uint32_t target = branch->GetTarget(); + if (std::binary_search(ds_fsm_target_pcs_.begin(), ds_fsm_target_pcs_.end(), target)) { + delayed_instruction = buffer_.Load<uint32_t>(target); + offset++; + } + } + } CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); B(offset); - Nop(); // TODO: improve by filling the delay slot. + Emit(delayed_instruction); break; case Branch::kCondBranch: + DCHECK_NE(delayed_instruction, Branch::kUnfillableDelaySlot); + if (delayed_instruction == Branch::kUnfilledDelaySlot) { + delayed_instruction = 0; // NOP. + } CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); EmitBcondR2(condition, lhs, rhs, offset); - Nop(); // TODO: improve by filling the delay slot. + Emit(delayed_instruction); break; case Branch::kCall: + DCHECK_NE(delayed_instruction, Branch::kUnfillableDelaySlot); + if (delayed_instruction == Branch::kUnfilledDelaySlot) { + delayed_instruction = 0; // NOP. + } CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); Bal(offset); - Nop(); // TODO: improve by filling the delay slot. + Emit(delayed_instruction); break; // R2 near literal. case Branch::kLiteral: + DCHECK_EQ(delayed_instruction, Branch::kUnfilledDelaySlot); CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); Lw(lhs, rhs, offset); break; @@ -2192,6 +2641,12 @@ void MipsAssembler::EmitBranch(MipsAssembler::Branch* branch) { // For now simply use the stack for RA. This should be OK since for the // vast majority of code a short PC-relative branch is sufficient. // TODO: can this be improved? + // TODO: consider generation of a shorter sequence when we know that RA + // is explicitly preserved by the method entry/exit code. + if (delayed_instruction != Branch::kUnfilledDelaySlot && + delayed_instruction != Branch::kUnfillableDelaySlot) { + Emit(delayed_instruction); + } Push(RA); Nal(); CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); @@ -2204,6 +2659,10 @@ void MipsAssembler::EmitBranch(MipsAssembler::Branch* branch) { break; case Branch::kLongCondBranch: // The comment on case 'Branch::kLongUncondBranch' applies here as well. + DCHECK_NE(delayed_instruction, Branch::kUnfillableDelaySlot); + if (delayed_instruction != Branch::kUnfilledDelaySlot) { + Emit(delayed_instruction); + } // Note: the opposite condition branch encodes 8 as the distance, which is equal to the // number of instructions skipped: // (PUSH(IncreaseFrameSize(ADDIU) + SW) + NAL + LUI + ORI + ADDU + LW + JR). @@ -2219,6 +2678,10 @@ void MipsAssembler::EmitBranch(MipsAssembler::Branch* branch) { DecreaseFrameSize(kMipsWordSize); break; case Branch::kLongCall: + DCHECK_NE(delayed_instruction, Branch::kUnfillableDelaySlot); + if (delayed_instruction != Branch::kUnfilledDelaySlot) { + Emit(delayed_instruction); + } Nal(); CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); Lui(AT, High16Bits(offset)); @@ -2230,6 +2693,7 @@ void MipsAssembler::EmitBranch(MipsAssembler::Branch* branch) { // R2 far literal. case Branch::kFarLiteral: + DCHECK_EQ(delayed_instruction, Branch::kUnfilledDelaySlot); offset += (offset & 0x8000) << 1; // Account for sign extension in lw. CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); Lui(AT, High16Bits(offset)); @@ -2239,33 +2703,48 @@ void MipsAssembler::EmitBranch(MipsAssembler::Branch* branch) { // R6 short branches. case Branch::kR6UncondBranch: + DCHECK_EQ(delayed_instruction, Branch::kUnfilledDelaySlot); CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); Bc(offset); break; case Branch::kR6CondBranch: CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); EmitBcondR6(condition, lhs, rhs, offset); - Nop(); // TODO: improve by filling the forbidden/delay slot. + DCHECK_NE(delayed_instruction, Branch::kUnfillableDelaySlot); + if (delayed_instruction != Branch::kUnfilledDelaySlot) { + Emit(delayed_instruction); + } else { + // TODO: improve by filling the forbidden slot (IFF this is + // a forbidden and not a delay slot). + Nop(); + } break; case Branch::kR6Call: + DCHECK_EQ(delayed_instruction, Branch::kUnfilledDelaySlot); CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); Balc(offset); break; // R6 near literal. case Branch::kR6Literal: + DCHECK_EQ(delayed_instruction, Branch::kUnfilledDelaySlot); CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); Lwpc(lhs, offset); break; // R6 long branches. case Branch::kR6LongUncondBranch: + DCHECK_EQ(delayed_instruction, Branch::kUnfilledDelaySlot); offset += (offset & 0x8000) << 1; // Account for sign extension in jic. CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); Auipc(AT, High16Bits(offset)); Jic(AT, Low16Bits(offset)); break; case Branch::kR6LongCondBranch: + DCHECK_NE(delayed_instruction, Branch::kUnfillableDelaySlot); + if (delayed_instruction != Branch::kUnfilledDelaySlot) { + Emit(delayed_instruction); + } EmitBcondR6(Branch::OppositeCondition(condition), lhs, rhs, 2); offset += (offset & 0x8000) << 1; // Account for sign extension in jic. CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); @@ -2273,6 +2752,7 @@ void MipsAssembler::EmitBranch(MipsAssembler::Branch* branch) { Jic(AT, Low16Bits(offset)); break; case Branch::kR6LongCall: + DCHECK_EQ(delayed_instruction, Branch::kUnfilledDelaySlot); offset += (offset & 0x8000) << 1; // Account for sign extension in jialc. CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); Auipc(AT, High16Bits(offset)); @@ -2281,6 +2761,7 @@ void MipsAssembler::EmitBranch(MipsAssembler::Branch* branch) { // R6 far literal. case Branch::kR6FarLiteral: + DCHECK_EQ(delayed_instruction, Branch::kUnfilledDelaySlot); offset += (offset & 0x8000) << 1; // Account for sign extension in lw. CHECK_EQ(overwrite_location_, branch->GetOffsetLocation()); Auipc(AT, High16Bits(offset)); @@ -2331,12 +2812,60 @@ void MipsAssembler::Bgtz(Register rt, MipsLabel* label) { Bcond(label, kCondGTZ, rt); } +bool MipsAssembler::CanExchangeWithSlt(Register rs, Register rt) const { + // If the instruction modifies AT, `rs` or `rt`, it can't be exchanged with the slt[u] + // instruction because either slt[u] depends on `rs` or `rt` or the following + // conditional branch depends on AT set by slt[u]. + // Likewise, if the instruction depends on AT, it can't be exchanged with slt[u] + // because slt[u] changes AT. + return (delay_slot_.instruction_ != 0 && + (delay_slot_.gpr_outs_mask_ & ((1u << AT) | (1u << rs) | (1u << rt))) == 0 && + (delay_slot_.gpr_ins_mask_ & (1u << AT)) == 0); +} + +void MipsAssembler::ExchangeWithSlt(const DelaySlot& forwarded_slot) { + // Exchange the last two instructions in the assembler buffer. + size_t size = buffer_.Size(); + CHECK_GE(size, 2 * sizeof(uint32_t)); + size_t pos1 = size - 2 * sizeof(uint32_t); + size_t pos2 = size - sizeof(uint32_t); + uint32_t instr1 = buffer_.Load<uint32_t>(pos1); + uint32_t instr2 = buffer_.Load<uint32_t>(pos2); + CHECK_EQ(instr1, forwarded_slot.instruction_); + CHECK_EQ(instr2, delay_slot_.instruction_); + buffer_.Store<uint32_t>(pos1, instr2); + buffer_.Store<uint32_t>(pos2, instr1); + // Set the current delay slot information to that of the last instruction + // in the buffer. + delay_slot_ = forwarded_slot; +} + +void MipsAssembler::GenerateSltForCondBranch(bool unsigned_slt, Register rs, Register rt) { + // If possible, exchange the slt[u] instruction with the preceding instruction, + // so it can fill the delay slot. + DelaySlot forwarded_slot = delay_slot_; + bool exchange = CanExchangeWithSlt(rs, rt); + if (exchange) { + // The last instruction cannot be used in a different delay slot, + // do not commit the label before it (if any). + DsFsmDropLabel(); + } + if (unsigned_slt) { + Sltu(AT, rs, rt); + } else { + Slt(AT, rs, rt); + } + if (exchange) { + ExchangeWithSlt(forwarded_slot); + } +} + void MipsAssembler::Blt(Register rs, Register rt, MipsLabel* label) { if (IsR6()) { Bcond(label, kCondLT, rs, rt); } else if (!Branch::IsNop(kCondLT, rs, rt)) { // Synthesize the instruction (not available on R2). - Slt(AT, rs, rt); + GenerateSltForCondBranch(/* unsigned_slt */ false, rs, rt); Bnez(AT, label); } } @@ -2348,7 +2877,7 @@ void MipsAssembler::Bge(Register rs, Register rt, MipsLabel* label) { B(label); } else { // Synthesize the instruction (not available on R2). - Slt(AT, rs, rt); + GenerateSltForCondBranch(/* unsigned_slt */ false, rs, rt); Beqz(AT, label); } } @@ -2358,7 +2887,7 @@ void MipsAssembler::Bltu(Register rs, Register rt, MipsLabel* label) { Bcond(label, kCondLTU, rs, rt); } else if (!Branch::IsNop(kCondLTU, rs, rt)) { // Synthesize the instruction (not available on R2). - Sltu(AT, rs, rt); + GenerateSltForCondBranch(/* unsigned_slt */ true, rs, rt); Bnez(AT, label); } } @@ -2370,7 +2899,7 @@ void MipsAssembler::Bgeu(Register rs, Register rt, MipsLabel* label) { B(label); } else { // Synthesize the instruction (not available on R2). - Sltu(AT, rs, rt); + GenerateSltForCondBranch(/* unsigned_slt */ true, rs, rt); Beqz(AT, label); } } @@ -2613,12 +3142,22 @@ void MipsAssembler::RemoveFrame(size_t frame_size, LoadFromOffset(kLoadWord, RA, SP, stack_offset); cfi_.Restore(DWARFReg(RA)); - // Decrease frame to required size. - DecreaseFrameSize(frame_size); - - // Then jump to the return address. - Jr(RA); - Nop(); + // Adjust the stack pointer in the delay slot if doing so doesn't break CFI. + bool exchange = IsInt<16>(static_cast<int32_t>(frame_size)); + bool reordering = SetReorder(false); + if (exchange) { + // Jump to the return address. + Jr(RA); + // Decrease frame to required size. + DecreaseFrameSize(frame_size); // Single instruction in delay slot. + } else { + // Decrease frame to required size. + DecreaseFrameSize(frame_size); + // Jump to the return address. + Jr(RA); + Nop(); // In delay slot. + } + SetReorder(reordering); // The CFI should be restored for any code that follows the exit block. cfi_.RestoreState(); @@ -2963,7 +3502,7 @@ void MipsAssembler::Call(ManagedRegister mbase, Offset offset, ManagedRegister m LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), base.AsCoreRegister(), offset.Int32Value()); Jalr(scratch.AsCoreRegister()); - Nop(); + NopIfNoReordering(); // TODO: place reference map on call. } @@ -2975,7 +3514,7 @@ void MipsAssembler::Call(FrameOffset base, Offset offset, ManagedRegister mscrat LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), scratch.AsCoreRegister(), offset.Int32Value()); Jalr(scratch.AsCoreRegister()); - Nop(); + NopIfNoReordering(); // TODO: place reference map on call. } @@ -2998,9 +3537,6 @@ void MipsAssembler::ExceptionPoll(ManagedRegister mscratch, size_t stack_adjust) exception_blocks_.emplace_back(scratch, stack_adjust); LoadFromOffset(kLoadWord, scratch.AsCoreRegister(), S1, Thread::ExceptionOffset<kMipsPointerSize>().Int32Value()); - // TODO: on MIPS32R6 prefer Bnezc(scratch.AsCoreRegister(), slow.Entry()); - // as the NAL instruction (occurring in long R2 branches) may become deprecated. - // For now use common for R2 and R6 instructions as this code must execute on both. Bnez(scratch.AsCoreRegister(), exception_blocks_.back().Entry()); } @@ -3017,7 +3553,7 @@ void MipsAssembler::EmitExceptionPoll(MipsExceptionSlowPath* exception) { LoadFromOffset(kLoadWord, T9, S1, QUICK_ENTRYPOINT_OFFSET(kMipsPointerSize, pDeliverException).Int32Value()); Jr(T9); - Nop(); + NopIfNoReordering(); // Call never returns. Break(); diff --git a/compiler/utils/mips/assembler_mips.h b/compiler/utils/mips/assembler_mips.h index 434ca679d5..d50c439418 100644 --- a/compiler/utils/mips/assembler_mips.h +++ b/compiler/utils/mips/assembler_mips.h @@ -154,6 +154,9 @@ class MipsAssembler FINAL : public Assembler, public JNIMacroAssembler<PointerSi : Assembler(arena), overwriting_(false), overwrite_location_(0), + reordering_(true), + ds_fsm_state_(kExpectingLabel), + ds_fsm_target_pc_(0), literals_(arena->Adapter(kArenaAllocAssembler)), last_position_adjustment_(0), last_old_position_(0), @@ -163,6 +166,7 @@ class MipsAssembler FINAL : public Assembler, public JNIMacroAssembler<PointerSi } size_t CodeSize() const OVERRIDE { return Assembler::CodeSize(); } + size_t CodePosition() OVERRIDE; DebugFrameOpCodeWriterForAssembler& cfi() { return Assembler::cfi(); } virtual ~MipsAssembler() { @@ -256,6 +260,11 @@ class MipsAssembler FINAL : public Assembler, public JNIMacroAssembler<PointerSi void Slti(Register rt, Register rs, uint16_t imm16); void Sltiu(Register rt, Register rs, uint16_t imm16); + // Branches and jumps to immediate offsets/addresses do not take care of their + // delay/forbidden slots and generally should not be used directly. This applies + // to the following R2 and R6 branch/jump instructions with imm16, imm21, addr26 + // offsets/addresses. + // Use branches/jumps to labels instead. void B(uint16_t imm16); void Bal(uint16_t imm16); void Beq(Register rs, Register rt, uint16_t imm16); @@ -272,9 +281,13 @@ class MipsAssembler FINAL : public Assembler, public JNIMacroAssembler<PointerSi void Bc1t(int cc, uint16_t imm16); // R2 void J(uint32_t addr26); void Jal(uint32_t addr26); + // Jalr() and Jr() fill their delay slots when reordering is enabled. + // When reordering is disabled, the delay slots must be filled manually. + // You may use NopIfNoReordering() to fill them when reordering is disabled. void Jalr(Register rd, Register rs); void Jalr(Register rs); void Jr(Register rs); + // Nal() does not fill its delay slot. It must be filled manually. void Nal(); void Auipc(Register rs, uint16_t imm16); // R6 void Addiupc(Register rs, uint32_t imm19); // R6 @@ -403,6 +416,7 @@ class MipsAssembler FINAL : public Assembler, public JNIMacroAssembler<PointerSi void Break(); void Nop(); + void NopIfNoReordering(); void Move(Register rd, Register rs); void Clear(Register rd); void Not(Register rd, Register rs); @@ -414,7 +428,8 @@ class MipsAssembler FINAL : public Assembler, public JNIMacroAssembler<PointerSi void LoadSConst32(FRegister r, int32_t value, Register temp); void Addiu32(Register rt, Register rs, int32_t value, Register rtmp = AT); - // These will generate R2 branches or R6 branches as appropriate. + // These will generate R2 branches or R6 branches as appropriate and take care of + // the delay/forbidden slots. void Bind(MipsLabel* label); void B(MipsLabel* label); void Bal(MipsLabel* label); @@ -868,7 +883,51 @@ class MipsAssembler FINAL : public Assembler, public JNIMacroAssembler<PointerSi }; friend std::ostream& operator<<(std::ostream& os, const BranchCondition& rhs); + // Enables or disables instruction reordering (IOW, automatic filling of delay slots) + // similarly to ".set reorder" / ".set noreorder" in traditional MIPS assembly. + // Returns the last state, which may be useful for temporary enabling/disabling of + // reordering. + bool SetReorder(bool enable); + private: + // Description of the last instruction in terms of input and output registers. + // Used to make the decision of moving the instruction into a delay slot. + struct DelaySlot { + DelaySlot(); + // Encoded instruction that may be used to fill the delay slot or 0 + // (0 conveniently represents NOP). + uint32_t instruction_; + // Mask of output GPRs for the instruction. + uint32_t gpr_outs_mask_; + // Mask of input GPRs for the instruction. + uint32_t gpr_ins_mask_; + // Mask of output FPRs for the instruction. + uint32_t fpr_outs_mask_; + // Mask of input FPRs for the instruction. + uint32_t fpr_ins_mask_; + // Mask of output FPU condition code flags for the instruction. + uint32_t cc_outs_mask_; + // Mask of input FPU condition code flags for the instruction. + uint32_t cc_ins_mask_; + // Branches never operate on the LO and HI registers, hence there's + // no mask for LO and HI. + }; + + // Delay slot finite state machine's (DS FSM's) state. The FSM state is updated + // upon every new instruction and label generated. The FSM detects instructions + // suitable for delay slots and immediately preceded with labels. These are target + // instructions for branches. If an unconditional R2 branch does not get its delay + // slot filled with the immediately preceding instruction, it may instead get the + // slot filled with the target instruction (the branch will need its offset + // incremented past the target instruction). We call this "absorption". The FSM + // records PCs of the target instructions suitable for this optimization. + enum DsFsmState { + kExpectingLabel, + kExpectingInstruction, + kExpectingCommit + }; + friend std::ostream& operator<<(std::ostream& os, const DsFsmState& rhs); + class Branch { public: enum Type { @@ -910,6 +969,17 @@ class MipsAssembler FINAL : public Assembler, public JNIMacroAssembler<PointerSi static constexpr uint32_t kUnresolved = 0xffffffff; // Unresolved target_ static constexpr int32_t kMaxBranchLength = 32; static constexpr int32_t kMaxBranchSize = kMaxBranchLength * sizeof(uint32_t); + // The following two instruction encodings can never legally occur in branch delay + // slots and are used as markers. + // + // kUnfilledDelaySlot means that the branch may use either the preceding or the target + // instruction to fill its delay slot (the latter is only possible with unconditional + // R2 branches and is termed here as "absorption"). + static constexpr uint32_t kUnfilledDelaySlot = 0x10000000; // beq zero, zero, 0. + // kUnfillableDelaySlot means that the branch cannot use an instruction (other than NOP) + // to fill its delay slot. This is only used for unconditional R2 branches to prevent + // absorption of the target instruction when reordering is disabled. + static constexpr uint32_t kUnfillableDelaySlot = 0x13FF0000; // beq ra, ra, 0. struct BranchInfo { // Branch length as a number of 4-byte-long instructions. @@ -958,6 +1028,8 @@ class MipsAssembler FINAL : public Assembler, public JNIMacroAssembler<PointerSi uint32_t GetTarget() const; uint32_t GetLocation() const; uint32_t GetOldLocation() const; + uint32_t GetPrecedingInstructionLength(Type type) const; + uint32_t GetPrecedingInstructionSize(Type type) const; uint32_t GetLength() const; uint32_t GetOldLength() const; uint32_t GetSize() const; @@ -967,6 +1039,12 @@ class MipsAssembler FINAL : public Assembler, public JNIMacroAssembler<PointerSi bool IsLong() const; bool IsResolved() const; + // Various helpers for branch delay slot management. + bool CanHaveDelayedInstruction(const DelaySlot& delay_slot) const; + void SetDelayedInstruction(uint32_t instruction); + uint32_t GetDelayedInstruction() const; + void DecrementLocations(); + // Returns the bit size of the signed offset that the branch instruction can handle. OffsetBits GetOffsetSize() const; @@ -1031,27 +1109,34 @@ class MipsAssembler FINAL : public Assembler, public JNIMacroAssembler<PointerSi // Helper for the above. void InitShortOrLong(OffsetBits ofs_size, Type short_type, Type long_type); - uint32_t old_location_; // Offset into assembler buffer in bytes. - uint32_t location_; // Offset into assembler buffer in bytes. - uint32_t target_; // Offset into assembler buffer in bytes. + uint32_t old_location_; // Offset into assembler buffer in bytes. + uint32_t location_; // Offset into assembler buffer in bytes. + uint32_t target_; // Offset into assembler buffer in bytes. + + uint32_t lhs_reg_; // Left-hand side register in conditional branches or + // FPU condition code. Destination register in literals. + uint32_t rhs_reg_; // Right-hand side register in conditional branches. + // Base register in literals (ZERO on R6). + BranchCondition condition_; // Condition for conditional branches. - uint32_t lhs_reg_; // Left-hand side register in conditional branches or - // indirect call register. - uint32_t rhs_reg_; // Right-hand side register in conditional branches. - BranchCondition condition_; // Condition for conditional branches. + Type type_; // Current type of the branch. + Type old_type_; // Initial type of the branch. - Type type_; // Current type of the branch. - Type old_type_; // Initial type of the branch. + uint32_t delayed_instruction_; // Encoded instruction for the delay slot or + // kUnfilledDelaySlot if none but fillable or + // kUnfillableDelaySlot if none and unfillable + // (the latter is only used for unconditional R2 + // branches). }; friend std::ostream& operator<<(std::ostream& os, const Branch::Type& rhs); friend std::ostream& operator<<(std::ostream& os, const Branch::OffsetBits& rhs); - void EmitR(int opcode, Register rs, Register rt, Register rd, int shamt, int funct); - void EmitI(int opcode, Register rs, Register rt, uint16_t imm); - void EmitI21(int opcode, Register rs, uint32_t imm21); - void EmitI26(int opcode, uint32_t imm26); - void EmitFR(int opcode, int fmt, FRegister ft, FRegister fs, FRegister fd, int funct); - void EmitFI(int opcode, int fmt, FRegister rt, uint16_t imm); + uint32_t EmitR(int opcode, Register rs, Register rt, Register rd, int shamt, int funct); + uint32_t EmitI(int opcode, Register rs, Register rt, uint16_t imm); + uint32_t EmitI21(int opcode, Register rs, uint32_t imm21); + uint32_t EmitI26(int opcode, uint32_t imm26); + uint32_t EmitFR(int opcode, int fmt, FRegister ft, FRegister fs, FRegister fd, int funct); + uint32_t EmitFI(int opcode, int fmt, FRegister rt, uint16_t imm); void EmitBcondR2(BranchCondition cond, Register rs, Register rt, uint16_t imm16); void EmitBcondR6(BranchCondition cond, Register rs, Register rt, uint32_t imm16_21); @@ -1060,6 +1145,33 @@ class MipsAssembler FINAL : public Assembler, public JNIMacroAssembler<PointerSi void Call(MipsLabel* label); void FinalizeLabeledBranch(MipsLabel* label); + // Various helpers for branch delay slot management. + void DsFsmInstr(uint32_t instruction, + uint32_t gpr_outs_mask, + uint32_t gpr_ins_mask, + uint32_t fpr_outs_mask, + uint32_t fpr_ins_mask, + uint32_t cc_outs_mask, + uint32_t cc_ins_mask); + void DsFsmInstrNop(uint32_t instruction); + void DsFsmInstrRrr(uint32_t instruction, Register out, Register in1, Register in2); + void DsFsmInstrRrrr(uint32_t instruction, Register in1_out, Register in2, Register in3); + void DsFsmInstrFff(uint32_t instruction, FRegister out, FRegister in1, FRegister in2); + void DsFsmInstrFfff(uint32_t instruction, FRegister in1_out, FRegister in2, FRegister in3); + void DsFsmInstrRf(uint32_t instruction, Register out, FRegister in); + void DsFsmInstrFr(uint32_t instruction, FRegister out, Register in); + void DsFsmInstrFR(uint32_t instruction, FRegister in1, Register in2); + void DsFsmInstrCff(uint32_t instruction, int cc_out, FRegister in1, FRegister in2); + void DsFsmInstrRrrc(uint32_t instruction, Register in1_out, Register in2, int cc_in); + void DsFsmInstrFffc(uint32_t instruction, FRegister in1_out, FRegister in2, int cc_in); + void DsFsmLabel(); + void DsFsmCommitLabel(); + void DsFsmDropLabel(); + void MoveInstructionToDelaySlot(Branch& branch); + bool CanExchangeWithSlt(Register rs, Register rt) const; + void ExchangeWithSlt(const DelaySlot& forwarded_slot); + void GenerateSltForCondBranch(bool unsigned_slt, Register rs, Register rt); + Branch* GetBranch(uint32_t branch_id); const Branch* GetBranch(uint32_t branch_id) const; uint32_t GetBranchLocationOrPcRelBase(const MipsAssembler::Branch* branch) const; @@ -1100,6 +1212,17 @@ class MipsAssembler FINAL : public Assembler, public JNIMacroAssembler<PointerSi // The current overwrite location. uint32_t overwrite_location_; + // Whether instruction reordering (IOW, automatic filling of delay slots) is enabled. + bool reordering_; + // Information about the last instruction that may be used to fill a branch delay slot. + DelaySlot delay_slot_; + // Delay slot FSM state. + DsFsmState ds_fsm_state_; + // PC of the current labeled target instruction. + uint32_t ds_fsm_target_pc_; + // PCs of labeled target instructions. + std::vector<uint32_t> ds_fsm_target_pcs_; + // Use std::deque<> for literal labels to allow insertions at the end // without invalidating pointers and references to existing elements. ArenaDeque<Literal> literals_; @@ -1109,7 +1232,7 @@ class MipsAssembler FINAL : public Assembler, public JNIMacroAssembler<PointerSi // that PC (from NAL) points to. MipsLabel pc_rel_base_label_; - // Data for AdjustedPosition(), see the description there. + // Data for GetAdjustedPosition(), see the description there. uint32_t last_position_adjustment_; uint32_t last_old_position_; uint32_t last_branch_id_; diff --git a/compiler/utils/mips/assembler_mips32r6_test.cc b/compiler/utils/mips/assembler_mips32r6_test.cc index 49ef272fb0..fabb0962fb 100644 --- a/compiler/utils/mips/assembler_mips32r6_test.cc +++ b/compiler/utils/mips/assembler_mips32r6_test.cc @@ -673,6 +673,144 @@ TEST_F(AssemblerMIPS32r6Test, LoadNearestFarLiteral) { // BRANCHES // ////////////// +TEST_F(AssemblerMIPS32r6Test, ImpossibleReordering) { + mips::MipsLabel label; + __ SetReorder(true); + __ Bind(&label); + + __ CmpLtD(mips::F0, mips::F2, mips::F4); + __ Bc1nez(mips::F0, &label); // F0 dependency. + + __ MulD(mips::F10, mips::F2, mips::F4); + __ Bc1eqz(mips::F10, &label); // F10 dependency. + + std::string expected = + ".set noreorder\n" + "1:\n" + + "cmp.lt.d $f0, $f2, $f4\n" + "bc1nez $f0, 1b\n" + "nop\n" + + "mul.d $f10, $f2, $f4\n" + "bc1eqz $f10, 1b\n" + "nop\n"; + DriverStr(expected, "ImpossibleReordering"); +} + +TEST_F(AssemblerMIPS32r6Test, Reordering) { + mips::MipsLabel label; + __ SetReorder(true); + __ Bind(&label); + + __ CmpLtD(mips::F0, mips::F2, mips::F4); + __ Bc1nez(mips::F2, &label); + + __ MulD(mips::F0, mips::F2, mips::F4); + __ Bc1eqz(mips::F4, &label); + + std::string expected = + ".set noreorder\n" + "1:\n" + + "bc1nez $f2, 1b\n" + "cmp.lt.d $f0, $f2, $f4\n" + + "bc1eqz $f4, 1b\n" + "mul.d $f0, $f2, $f4\n"; + DriverStr(expected, "Reordering"); +} + +TEST_F(AssemblerMIPS32r6Test, SetReorder) { + mips::MipsLabel label1, label2, label3, label4; + + __ SetReorder(true); + __ Bind(&label1); + __ Addu(mips::T0, mips::T1, mips::T2); + __ Bc1nez(mips::F0, &label1); + + __ SetReorder(false); + __ Bind(&label2); + __ Addu(mips::T0, mips::T1, mips::T2); + __ Bc1nez(mips::F0, &label2); + + __ SetReorder(true); + __ Bind(&label3); + __ Addu(mips::T0, mips::T1, mips::T2); + __ Bc1eqz(mips::F0, &label3); + + __ SetReorder(false); + __ Bind(&label4); + __ Addu(mips::T0, mips::T1, mips::T2); + __ Bc1eqz(mips::F0, &label4); + + std::string expected = + ".set noreorder\n" + "1:\n" + "bc1nez $f0, 1b\n" + "addu $t0, $t1, $t2\n" + + "2:\n" + "addu $t0, $t1, $t2\n" + "bc1nez $f0, 2b\n" + "nop\n" + + "3:\n" + "bc1eqz $f0, 3b\n" + "addu $t0, $t1, $t2\n" + + "4:\n" + "addu $t0, $t1, $t2\n" + "bc1eqz $f0, 4b\n" + "nop\n"; + DriverStr(expected, "SetReorder"); +} + +TEST_F(AssemblerMIPS32r6Test, LongBranchReorder) { + mips::MipsLabel label; + __ SetReorder(true); + __ Subu(mips::T0, mips::T1, mips::T2); + __ Bc1nez(mips::F0, &label); + constexpr uint32_t kAdduCount1 = (1u << 15) + 1; + for (uint32_t i = 0; i != kAdduCount1; ++i) { + __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); + } + __ Bind(&label); + constexpr uint32_t kAdduCount2 = (1u << 15) + 1; + for (uint32_t i = 0; i != kAdduCount2; ++i) { + __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); + } + __ Subu(mips::T0, mips::T1, mips::T2); + __ Bc1eqz(mips::F0, &label); + + uint32_t offset_forward = 2 + kAdduCount1; // 2: account for auipc and jic. + offset_forward <<= 2; + offset_forward += (offset_forward & 0x8000) << 1; // Account for sign extension in jic. + + uint32_t offset_back = -(kAdduCount2 + 2); // 2: account for subu and bc1nez. + offset_back <<= 2; + offset_back += (offset_back & 0x8000) << 1; // Account for sign extension in jic. + + std::ostringstream oss; + oss << + ".set noreorder\n" + "subu $t0, $t1, $t2\n" + "bc1eqz $f0, 1f\n" + "auipc $at, 0x" << std::hex << High16Bits(offset_forward) << "\n" + "jic $at, 0x" << std::hex << Low16Bits(offset_forward) << "\n" + "1:\n" << + RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") << + "2:\n" << + RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") << + "subu $t0, $t1, $t2\n" + "bc1nez $f0, 3f\n" + "auipc $at, 0x" << std::hex << High16Bits(offset_back) << "\n" + "jic $at, 0x" << std::hex << Low16Bits(offset_back) << "\n" + "3:\n"; + std::string expected = oss.str(); + DriverStr(expected, "LongBeqc"); +} + // TODO: MipsAssembler::Addiupc // MipsAssembler::Bc // MipsAssembler::Jic diff --git a/compiler/utils/mips/assembler_mips_test.cc b/compiler/utils/mips/assembler_mips_test.cc index 50a8dc202a..708bc3d50d 100644 --- a/compiler/utils/mips/assembler_mips_test.cc +++ b/compiler/utils/mips/assembler_mips_test.cc @@ -2009,14 +2009,17 @@ TEST_F(AssemblerMIPSTest, B) { } TEST_F(AssemblerMIPSTest, Beq) { + __ SetReorder(false); BranchCondTwoRegsHelper(&mips::MipsAssembler::Beq, "Beq"); } TEST_F(AssemblerMIPSTest, Bne) { + __ SetReorder(false); BranchCondTwoRegsHelper(&mips::MipsAssembler::Bne, "Bne"); } TEST_F(AssemblerMIPSTest, Beqz) { + __ SetReorder(false); mips::MipsLabel label; __ Beqz(mips::A0, &label); constexpr size_t kAdduCount1 = 63; @@ -2043,6 +2046,7 @@ TEST_F(AssemblerMIPSTest, Beqz) { } TEST_F(AssemblerMIPSTest, Bnez) { + __ SetReorder(false); mips::MipsLabel label; __ Bnez(mips::A0, &label); constexpr size_t kAdduCount1 = 63; @@ -2069,22 +2073,27 @@ TEST_F(AssemblerMIPSTest, Bnez) { } TEST_F(AssemblerMIPSTest, Bltz) { + __ SetReorder(false); BranchCondOneRegHelper(&mips::MipsAssembler::Bltz, "Bltz"); } TEST_F(AssemblerMIPSTest, Bgez) { + __ SetReorder(false); BranchCondOneRegHelper(&mips::MipsAssembler::Bgez, "Bgez"); } TEST_F(AssemblerMIPSTest, Blez) { + __ SetReorder(false); BranchCondOneRegHelper(&mips::MipsAssembler::Blez, "Blez"); } TEST_F(AssemblerMIPSTest, Bgtz) { + __ SetReorder(false); BranchCondOneRegHelper(&mips::MipsAssembler::Bgtz, "Bgtz"); } TEST_F(AssemblerMIPSTest, Blt) { + __ SetReorder(false); mips::MipsLabel label; __ Blt(mips::A0, mips::A1, &label); constexpr size_t kAdduCount1 = 63; @@ -2113,6 +2122,7 @@ TEST_F(AssemblerMIPSTest, Blt) { } TEST_F(AssemblerMIPSTest, Bge) { + __ SetReorder(false); mips::MipsLabel label; __ Bge(mips::A0, mips::A1, &label); constexpr size_t kAdduCount1 = 63; @@ -2141,6 +2151,7 @@ TEST_F(AssemblerMIPSTest, Bge) { } TEST_F(AssemblerMIPSTest, Bltu) { + __ SetReorder(false); mips::MipsLabel label; __ Bltu(mips::A0, mips::A1, &label); constexpr size_t kAdduCount1 = 63; @@ -2169,6 +2180,7 @@ TEST_F(AssemblerMIPSTest, Bltu) { } TEST_F(AssemblerMIPSTest, Bgeu) { + __ SetReorder(false); mips::MipsLabel label; __ Bgeu(mips::A0, mips::A1, &label); constexpr size_t kAdduCount1 = 63; @@ -2197,6 +2209,7 @@ TEST_F(AssemblerMIPSTest, Bgeu) { } TEST_F(AssemblerMIPSTest, Bc1f) { + __ SetReorder(false); mips::MipsLabel label; __ Bc1f(0, &label); constexpr size_t kAdduCount1 = 63; @@ -2223,6 +2236,7 @@ TEST_F(AssemblerMIPSTest, Bc1f) { } TEST_F(AssemblerMIPSTest, Bc1t) { + __ SetReorder(false); mips::MipsLabel label; __ Bc1t(0, &label); constexpr size_t kAdduCount1 = 63; @@ -2331,6 +2345,410 @@ TEST_F(AssemblerMIPSTest, LoadNearestFarLiteral) { DriverStr(expected, "LoadNearestFarLiteral"); } +TEST_F(AssemblerMIPSTest, ImpossibleReordering) { + mips::MipsLabel label1, label2; + __ SetReorder(true); + + __ B(&label1); // No preceding or target instruction for the delay slot. + + __ Addu(mips::T0, mips::T1, mips::T2); + __ Bind(&label1); + __ B(&label1); // The preceding label prevents moving Addu into the delay slot. + __ B(&label1); // No preceding or target instruction for the delay slot. + + __ Addu(mips::T0, mips::T1, mips::T2); + __ Beqz(mips::T0, &label1); // T0 dependency. + + __ Or(mips::T1, mips::T2, mips::T3); + __ Bne(mips::T2, mips::T1, &label1); // T1 dependency. + + __ And(mips::T0, mips::T1, mips::T2); + __ Blt(mips::T1, mips::T0, &label1); // T0 dependency. + + __ Xor(mips::AT, mips::T0, mips::T1); + __ Bge(mips::T1, mips::T0, &label1); // AT dependency. + + __ Subu(mips::T0, mips::T1, mips::AT); + __ Bltu(mips::T1, mips::T0, &label1); // AT dependency. + + __ ColtS(1, mips::F2, mips::F4); + __ Bc1t(1, &label1); // cc1 dependency. + + __ Move(mips::T0, mips::RA); + __ Bal(&label1); // RA dependency. + + __ Lw(mips::RA, mips::T0, 0); + __ Bal(&label1); // RA dependency. + + __ LlR2(mips::T9, mips::T0, 0); + __ Jalr(mips::T9); // T9 dependency. + + __ Sw(mips::RA, mips::T0, 0); + __ Jalr(mips::T9); // RA dependency. + + __ Lw(mips::T1, mips::T0, 0); + __ Jalr(mips::T1, mips::T9); // T1 dependency. + + __ ScR2(mips::T9, mips::T0, 0); + __ Jr(mips::T9); // T9 dependency. + + __ Bind(&label2); + + __ Bnez(mips::T0, &label2); // No preceding instruction for the delay slot. + + __ Bgeu(mips::T1, mips::T0, &label2); // No preceding instruction for the delay slot. + + __ Bc1f(2, &label2); // No preceding instruction for the delay slot. + + __ Bal(&label2); // No preceding instruction for the delay slot. + + __ Jalr(mips::T9); // No preceding instruction for the delay slot. + + __ Addu(mips::T0, mips::T1, mips::T2); + __ CodePosition(); // Drops the delay slot candidate (the last instruction). + __ Beq(mips::T1, mips::T2, &label2); // No preceding or target instruction for the delay slot. + + std::string expected = + ".set noreorder\n" + "b 1f\n" + "nop\n" + + "addu $t0, $t1, $t2\n" + "1:\n" + "b 1b\n" + "nop\n" + "b 1b\n" + "nop\n" + + "addu $t0, $t1, $t2\n" + "beq $zero, $t0, 1b\n" + "nop\n" + + "or $t1, $t2, $t3\n" + "bne $t2, $t1, 1b\n" + "nop\n" + + "and $t0, $t1, $t2\n" + "slt $at, $t1, $t0\n" + "bne $zero, $at, 1b\n" + "nop\n" + + "xor $at, $t0, $t1\n" + "slt $at, $t1, $t0\n" + "beq $zero, $at, 1b\n" + "nop\n" + + "subu $t0, $t1, $at\n" + "sltu $at, $t1, $t0\n" + "bne $zero, $at, 1b\n" + "nop\n" + + "c.olt.s $fcc1, $f2, $f4\n" + "bc1t $fcc1, 1b\n" + "nop\n" + + "or $t0, $ra, $zero\n" + "bal 1b\n" + "nop\n" + + "lw $ra, 0($t0)\n" + "bal 1b\n" + "nop\n" + + "ll $t9, 0($t0)\n" + "jalr $t9\n" + "nop\n" + + "sw $ra, 0($t0)\n" + "jalr $t9\n" + "nop\n" + + "lw $t1, 0($t0)\n" + "jalr $t1, $t9\n" + "nop\n" + + "sc $t9, 0($t0)\n" + "jalr $zero, $t9\n" + "nop\n" + + "2:\n" + + "bne $zero, $t0, 2b\n" + "nop\n" + + "sltu $at, $t1, $t0\n" + "beq $zero, $at, 2b\n" + "nop\n" + + "bc1f $fcc2, 2b\n" + "nop\n" + + "bal 2b\n" + "nop\n" + + "jalr $t9\n" + "nop\n" + + "addu $t0, $t1, $t2\n" + "beq $t1, $t2, 2b\n" + "nop\n"; + DriverStr(expected, "ImpossibleReordering"); +} + +TEST_F(AssemblerMIPSTest, Reordering) { + mips::MipsLabel label1, label2; + __ SetReorder(true); + + __ Bind(&label1); + __ Bind(&label2); + + __ Addu(mips::T0, mips::T1, mips::T2); + __ Beqz(mips::T1, &label1); + + __ Or(mips::T1, mips::T2, mips::T3); + __ Bne(mips::T2, mips::T3, &label1); + + __ And(mips::T0, mips::T1, mips::T2); + __ Blt(mips::T1, mips::T2, &label1); + + __ Xor(mips::T2, mips::T0, mips::T1); + __ Bge(mips::T1, mips::T0, &label1); + + __ Subu(mips::T2, mips::T1, mips::T0); + __ Bltu(mips::T1, mips::T0, &label1); + + __ ColtS(0, mips::F2, mips::F4); + __ Bc1t(1, &label1); + + __ Move(mips::T0, mips::T1); + __ Bal(&label1); + + __ LlR2(mips::T1, mips::T0, 0); + __ Jalr(mips::T9); + + __ ScR2(mips::T1, mips::T0, 0); + __ Jr(mips::T9); + + std::string expected = + ".set noreorder\n" + "1:\n" + + "beq $zero, $t1, 1b\n" + "addu $t0, $t1, $t2\n" + + "bne $t2, $t3, 1b\n" + "or $t1, $t2, $t3\n" + + "slt $at, $t1, $t2\n" + "bne $zero, $at, 1b\n" + "and $t0, $t1, $t2\n" + + "slt $at, $t1, $t0\n" + "beq $zero, $at, 1b\n" + "xor $t2, $t0, $t1\n" + + "sltu $at, $t1, $t0\n" + "bne $zero, $at, 1b\n" + "subu $t2, $t1, $t0\n" + + "bc1t $fcc1, 1b\n" + "c.olt.s $fcc0, $f2, $f4\n" + + "bal 1b\n" + "or $t0, $t1, $zero\n" + + "jalr $t9\n" + "ll $t1, 0($t0)\n" + + "jalr $zero, $t9\n" + "sc $t1, 0($t0)\n"; + DriverStr(expected, "Reordering"); +} + +TEST_F(AssemblerMIPSTest, AbsorbTargetInstruction) { + mips::MipsLabel label1, label2, label3, label4, label5, label6; + __ SetReorder(true); + + __ B(&label1); + __ Bind(&label1); + __ Addu(mips::T0, mips::T1, mips::T2); + + __ Bind(&label2); + __ Xor(mips::T0, mips::T1, mips::T2); + __ Addu(mips::T0, mips::T1, mips::T2); + __ Bind(&label3); // Prevents reordering ADDU above with B below. + __ B(&label2); + + __ B(&label4); + __ Bind(&label4); + __ Addu(mips::T0, mips::T1, mips::T2); + __ CodePosition(); // Prevents absorbing ADDU above. + + __ B(&label5); + __ Bind(&label5); + __ Addu(mips::T0, mips::T1, mips::T2); + __ Bind(&label6); + __ CodePosition(); // Even across Bind(), CodePosition() prevents absorbing the ADDU above. + + std::string expected = + ".set noreorder\n" + "b 1f\n" + "addu $t0, $t1, $t2\n" + "addu $t0, $t1, $t2\n" + "1:\n" + + "xor $t0, $t1, $t2\n" + "2:\n" + "addu $t0, $t1, $t2\n" + "b 2b\n" + "xor $t0, $t1, $t2\n" + + "b 4f\n" + "nop\n" + "4:\n" + "addu $t0, $t1, $t2\n" + + "b 5f\n" + "nop\n" + "5:\n" + "addu $t0, $t1, $t2\n"; + DriverStr(expected, "AbsorbTargetInstruction"); +} + +TEST_F(AssemblerMIPSTest, SetReorder) { + mips::MipsLabel label1, label2, label3, label4, label5, label6; + + __ SetReorder(true); + __ Bind(&label1); + __ Addu(mips::T0, mips::T1, mips::T2); + __ B(&label1); + __ B(&label5); + __ B(&label6); + + __ SetReorder(false); + __ Bind(&label2); + __ Addu(mips::T0, mips::T1, mips::T2); + __ B(&label2); + __ B(&label5); + __ B(&label6); + + __ SetReorder(true); + __ Bind(&label3); + __ Addu(mips::T0, mips::T1, mips::T2); + __ B(&label3); + __ B(&label5); + __ B(&label6); + + __ SetReorder(false); + __ Bind(&label4); + __ Addu(mips::T0, mips::T1, mips::T2); + __ B(&label4); + __ B(&label5); + __ B(&label6); + + __ SetReorder(true); + __ Bind(&label5); + __ Subu(mips::T0, mips::T1, mips::T2); + + __ SetReorder(false); + __ Bind(&label6); + __ Xor(mips::T0, mips::T1, mips::T2); + + std::string expected = + ".set noreorder\n" + "1:\n" + "b 1b\n" + "addu $t0, $t1, $t2\n" + "b 55f\n" + "subu $t0, $t1, $t2\n" + "b 6f\n" + "nop\n" + + "2:\n" + "addu $t0, $t1, $t2\n" + "b 2b\n" + "nop\n" + "b 5f\n" + "nop\n" + "b 6f\n" + "nop\n" + + "3:\n" + "b 3b\n" + "addu $t0, $t1, $t2\n" + "b 55f\n" + "subu $t0, $t1, $t2\n" + "b 6f\n" + "nop\n" + + "4:\n" + "addu $t0, $t1, $t2\n" + "b 4b\n" + "nop\n" + "b 5f\n" + "nop\n" + "b 6f\n" + "nop\n" + + "5:\n" + "subu $t0, $t1, $t2\n" + "55:\n" + "6:\n" + "xor $t0, $t1, $t2\n"; + DriverStr(expected, "SetReorder"); +} + +TEST_F(AssemblerMIPSTest, LongBranchReorder) { + mips::MipsLabel label; + __ SetReorder(true); + __ Subu(mips::T0, mips::T1, mips::T2); + __ B(&label); + constexpr uint32_t kAdduCount1 = (1u << 15) + 1; + for (size_t i = 0; i != kAdduCount1; ++i) { + __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); + } + __ Bind(&label); + constexpr uint32_t kAdduCount2 = (1u << 15) + 1; + for (size_t i = 0; i != kAdduCount2; ++i) { + __ Addu(mips::ZERO, mips::ZERO, mips::ZERO); + } + __ Subu(mips::T0, mips::T1, mips::T2); + __ B(&label); + + // Account for 5 extra instructions: ori, addu, lw, jalr, addiu. + uint32_t offset_forward = (kAdduCount1 + 5) * sizeof(uint32_t); + // Account for 5 extra instructions: subu, addiu, sw, nal, lui. + uint32_t offset_back = -(kAdduCount1 + 5) * sizeof(uint32_t); + + std::ostringstream oss; + oss << + ".set noreorder\n" + "subu $t0, $t1, $t2\n" + "addiu $sp, $sp, -4\n" + "sw $ra, 0($sp)\n" + "bltzal $zero, .+4\n" + "lui $at, 0x" << std::hex << High16Bits(offset_forward) << "\n" + "ori $at, $at, 0x" << std::hex << Low16Bits(offset_forward) << "\n" + "addu $at, $at, $ra\n" + "lw $ra, 0($sp)\n" + "jalr $zero, $at\n" + "addiu $sp, $sp, 4\n" << + RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") << + RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") << + "subu $t0, $t1, $t2\n" + "addiu $sp, $sp, -4\n" + "sw $ra, 0($sp)\n" + "bltzal $zero, .+4\n" + "lui $at, 0x" << std::hex << High16Bits(offset_back) << "\n" + "ori $at, $at, 0x" << std::hex << Low16Bits(offset_back) << "\n" + "addu $at, $at, $ra\n" + "lw $ra, 0($sp)\n" + "jalr $zero, $at\n" + "addiu $sp, $sp, 4\n"; + std::string expected = oss.str(); + DriverStr(expected, "LongBranchReorder"); +} + #undef __ } // namespace art |